xref: /openbsd-src/gnu/usr.bin/perl/pp_pack.c (revision 2b0358df1d88d06ef4139321dd05bd5e05d91eaf)
1 /*    pp_pack.c
2  *
3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4  *    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  * He still hopefully carried some of his gear in his pack: a small tinder-box,
13  * two small shallow pans, the smaller fitting into the larger; inside them a
14  * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15  * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16  * some salt.
17  */
18 
19 /* This file contains pp ("push/pop") functions that
20  * execute the opcodes that make up a perl program. A typical pp function
21  * expects to find its arguments on the stack, and usually pushes its
22  * results onto the stack, hence the 'pp' terminology. Each OP structure
23  * contains a pointer to the relevant pp_foo() function.
24  *
25  * This particular file just contains pp_pack() and pp_unpack(). See the
26  * other pp*.c files for the rest of the pp_ functions.
27  */
28 
29 #include "EXTERN.h"
30 #define PERL_IN_PP_PACK_C
31 #include "perl.h"
32 
33 /* Types used by pack/unpack */
34 typedef enum {
35   e_no_len,     /* no length  */
36   e_number,     /* number, [] */
37   e_star        /* asterisk   */
38 } howlen_t;
39 
40 typedef struct tempsym {
41   const char*    patptr;   /* current template char */
42   const char*    patend;   /* one after last char   */
43   const char*    grpbeg;   /* 1st char of ()-group  */
44   const char*    grpend;   /* end of ()-group       */
45   I32      code;     /* template code (!<>)   */
46   I32      length;   /* length/repeat count   */
47   howlen_t howlen;   /* how length is given   */
48   int      level;    /* () nesting level      */
49   U32      flags;    /* /=4, comma=2, pack=1  */
50                      /*   and group modifiers */
51   STRLEN   strbeg;   /* offset of group start */
52   struct tempsym *previous; /* previous group */
53 } tempsym_t;
54 
55 #define TEMPSYM_INIT(symptr, p, e, f) \
56     STMT_START {	\
57 	(symptr)->patptr   = (p);	\
58 	(symptr)->patend   = (e);	\
59 	(symptr)->grpbeg   = NULL;	\
60 	(symptr)->grpend   = NULL;	\
61 	(symptr)->grpend   = NULL;	\
62 	(symptr)->code     = 0;		\
63 	(symptr)->length   = 0;		\
64 	(symptr)->howlen   = e_no_len;	\
65 	(symptr)->level    = 0;		\
66 	(symptr)->flags    = (f);	\
67 	(symptr)->strbeg   = 0;		\
68 	(symptr)->previous = NULL;	\
69    } STMT_END
70 
71 #if PERL_VERSION >= 9
72 # define PERL_PACK_CAN_BYTEORDER
73 # define PERL_PACK_CAN_SHRIEKSIGN
74 #endif
75 
76 #ifndef CHAR_BIT
77 # define CHAR_BIT	8
78 #endif
79 /* Maximum number of bytes to which a byte can grow due to upgrade */
80 #define UTF8_EXPAND	2
81 
82 /*
83  * Offset for integer pack/unpack.
84  *
85  * On architectures where I16 and I32 aren't really 16 and 32 bits,
86  * which for now are all Crays, pack and unpack have to play games.
87  */
88 
89 /*
90  * These values are required for portability of pack() output.
91  * If they're not right on your machine, then pack() and unpack()
92  * wouldn't work right anyway; you'll need to apply the Cray hack.
93  * (I'd like to check them with #if, but you can't use sizeof() in
94  * the preprocessor.)  --???
95  */
96 /*
97     The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
98     defines are now in config.h.  --Andy Dougherty  April 1998
99  */
100 #define SIZE16 2
101 #define SIZE32 4
102 
103 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
104    --jhi Feb 1999 */
105 
106 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
107 #  if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678    /* little-endian */
108 #    define OFF16(p)	((char*)(p))
109 #    define OFF32(p)	((char*)(p))
110 #  else
111 #    if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321  /* big-endian */
112 #      define OFF16(p)	((char*)(p) + (sizeof(U16) - SIZE16))
113 #      define OFF32(p)	((char*)(p) + (sizeof(U32) - SIZE32))
114 #    else
115        ++++ bad cray byte order
116 #    endif
117 #  endif
118 #else
119 #  define OFF16(p)     ((char *) (p))
120 #  define OFF32(p)     ((char *) (p))
121 #endif
122 
123 /* Only to be used inside a loop (see the break) */
124 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START {		\
125     if (utf8) {								\
126 	if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break;	\
127     } else {								\
128 	Copy(s, OFF16(p), SIZE16, char);				\
129 	(s) += SIZE16;							\
130     }									\
131 } STMT_END
132 
133 /* Only to be used inside a loop (see the break) */
134 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START {		\
135     if (utf8) {								\
136 	if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break;	\
137     } else {								\
138 	Copy(s, OFF32(p), SIZE32, char);				\
139 	(s) += SIZE32;							\
140     }									\
141 } STMT_END
142 
143 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
144 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
145 
146 /* Only to be used inside a loop (see the break) */
147 #define SHIFT_VAR(utf8, s, strend, var, datumtype)	\
148 STMT_START {						\
149     if (utf8) {						\
150         if (!uni_to_bytes(aTHX_ &s, strend,		\
151             (char *) &var, sizeof(var), datumtype)) break;\
152     } else {						\
153         Copy(s, (char *) &var, sizeof(var), char);	\
154         s += sizeof(var);				\
155     }							\
156 } STMT_END
157 
158 #define PUSH_VAR(utf8, aptr, var)	\
159 	PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
160 
161 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
162 #define MAX_SUB_TEMPLATE_LEVEL 100
163 
164 /* flags (note that type modifiers can also be used as flags!) */
165 #define FLAG_WAS_UTF8	      0x40
166 #define FLAG_PARSE_UTF8       0x20	/* Parse as utf8 */
167 #define FLAG_UNPACK_ONLY_ONE  0x10
168 #define FLAG_DO_UTF8          0x08	/* The underlying string is utf8 */
169 #define FLAG_SLASH            0x04
170 #define FLAG_COMMA            0x02
171 #define FLAG_PACK             0x01
172 
173 STATIC SV *
174 S_mul128(pTHX_ SV *sv, U8 m)
175 {
176   STRLEN          len;
177   char           *s = SvPV(sv, len);
178   char           *t;
179 
180   if (!strnEQ(s, "0000", 4)) {  /* need to grow sv */
181     SV * const tmpNew = newSVpvs("0000000000");
182 
183     sv_catsv(tmpNew, sv);
184     SvREFCNT_dec(sv);		/* free old sv */
185     sv = tmpNew;
186     s = SvPV(sv, len);
187   }
188   t = s + len - 1;
189   while (!*t)                   /* trailing '\0'? */
190     t--;
191   while (t > s) {
192     const U32 i = ((*t - '0') << 7) + m;
193     *(t--) = '0' + (char)(i % 10);
194     m = (char)(i / 10);
195   }
196   return (sv);
197 }
198 
199 /* Explosives and implosives. */
200 
201 #if 'I' == 73 && 'J' == 74
202 /* On an ASCII/ISO kind of system */
203 #define ISUUCHAR(ch)    ((ch) >= ' ' && (ch) < 'a')
204 #else
205 /*
206   Some other sort of character set - use memchr() so we don't match
207   the null byte.
208  */
209 #define ISUUCHAR(ch)    (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
210 #endif
211 
212 /* type modifiers */
213 #define TYPE_IS_SHRIEKING	0x100
214 #define TYPE_IS_BIG_ENDIAN	0x200
215 #define TYPE_IS_LITTLE_ENDIAN	0x400
216 #define TYPE_IS_PACK		0x800
217 #define TYPE_ENDIANNESS_MASK	(TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
218 #define TYPE_MODIFIERS(t)	((t) & ~0xFF)
219 #define TYPE_NO_MODIFIERS(t)	((t) & 0xFF)
220 
221 #ifdef PERL_PACK_CAN_SHRIEKSIGN
222 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
223 #else
224 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
225 #endif
226 
227 #ifndef PERL_PACK_CAN_BYTEORDER
228 /* Put "can't" first because it is shorter  */
229 # define TYPE_ENDIANNESS(t)	0
230 # define TYPE_NO_ENDIANNESS(t)	(t)
231 
232 # define ENDIANNESS_ALLOWED_TYPES   ""
233 
234 # define DO_BO_UNPACK(var, type)
235 # define DO_BO_PACK(var, type)
236 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
237 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
238 # define DO_BO_UNPACK_N(var, type)
239 # define DO_BO_PACK_N(var, type)
240 # define DO_BO_UNPACK_P(var)
241 # define DO_BO_PACK_P(var)
242 # define DO_BO_UNPACK_PC(var)
243 # define DO_BO_PACK_PC(var)
244 
245 #else /* PERL_PACK_CAN_BYTEORDER */
246 
247 # define TYPE_ENDIANNESS(t)	((t) & TYPE_ENDIANNESS_MASK)
248 # define TYPE_NO_ENDIANNESS(t)	((t) & ~TYPE_ENDIANNESS_MASK)
249 
250 # define ENDIANNESS_ALLOWED_TYPES   "sSiIlLqQjJfFdDpP("
251 
252 # define DO_BO_UNPACK(var, type)                                              \
253         STMT_START {                                                          \
254           switch (TYPE_ENDIANNESS(datumtype)) {                               \
255             case TYPE_IS_BIG_ENDIAN:    var = my_betoh ## type (var); break;  \
256             case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break;  \
257             default: break;                                                   \
258           }                                                                   \
259         } STMT_END
260 
261 # define DO_BO_PACK(var, type)                                                \
262         STMT_START {                                                          \
263           switch (TYPE_ENDIANNESS(datumtype)) {                               \
264             case TYPE_IS_BIG_ENDIAN:    var = my_htobe ## type (var); break;  \
265             case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break;  \
266             default: break;                                                   \
267           }                                                                   \
268         } STMT_END
269 
270 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)                     \
271         STMT_START {                                                          \
272           switch (TYPE_ENDIANNESS(datumtype)) {                               \
273             case TYPE_IS_BIG_ENDIAN:                                          \
274               var = (post_cast*) my_betoh ## type ((pre_cast) var);           \
275               break;                                                          \
276             case TYPE_IS_LITTLE_ENDIAN:                                       \
277               var = (post_cast *) my_letoh ## type ((pre_cast) var);          \
278               break;                                                          \
279             default:                                                          \
280               break;                                                          \
281           }                                                                   \
282         } STMT_END
283 
284 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)                       \
285         STMT_START {                                                          \
286           switch (TYPE_ENDIANNESS(datumtype)) {                               \
287             case TYPE_IS_BIG_ENDIAN:                                          \
288               var = (post_cast *) my_htobe ## type ((pre_cast) var);          \
289               break;                                                          \
290             case TYPE_IS_LITTLE_ENDIAN:                                       \
291               var = (post_cast *) my_htole ## type ((pre_cast) var);          \
292               break;                                                          \
293             default:                                                          \
294               break;                                                          \
295           }                                                                   \
296         } STMT_END
297 
298 # define BO_CANT_DOIT(action, type)                                           \
299         STMT_START {                                                          \
300           switch (TYPE_ENDIANNESS(datumtype)) {                               \
301              case TYPE_IS_BIG_ENDIAN:                                         \
302                Perl_croak(aTHX_ "Can't %s big-endian %ss on this "            \
303                                 "platform", #action, #type);                  \
304                break;                                                         \
305              case TYPE_IS_LITTLE_ENDIAN:                                      \
306                Perl_croak(aTHX_ "Can't %s little-endian %ss on this "         \
307                                 "platform", #action, #type);                  \
308                break;                                                         \
309              default:                                                         \
310                break;                                                         \
311            }                                                                  \
312          } STMT_END
313 
314 # if PTRSIZE == INTSIZE
315 #  define DO_BO_UNPACK_P(var)	DO_BO_UNPACK_PTR(var, i, int, void)
316 #  define DO_BO_PACK_P(var)	DO_BO_PACK_PTR(var, i, int, void)
317 #  define DO_BO_UNPACK_PC(var)	DO_BO_UNPACK_PTR(var, i, int, char)
318 #  define DO_BO_PACK_PC(var)	DO_BO_PACK_PTR(var, i, int, char)
319 # elif PTRSIZE == LONGSIZE
320 #  define DO_BO_UNPACK_P(var)	DO_BO_UNPACK_PTR(var, l, long, void)
321 #  define DO_BO_PACK_P(var)	DO_BO_PACK_PTR(var, l, long, void)
322 #  define DO_BO_UNPACK_PC(var)	DO_BO_UNPACK_PTR(var, l, long, char)
323 #  define DO_BO_PACK_PC(var)	DO_BO_PACK_PTR(var, l, long, char)
324 # elif PTRSIZE == IVSIZE
325 #  define DO_BO_UNPACK_P(var)	DO_BO_UNPACK_PTR(var, l, IV, void)
326 #  define DO_BO_PACK_P(var)	DO_BO_PACK_PTR(var, l, IV, void)
327 #  define DO_BO_UNPACK_PC(var)	DO_BO_UNPACK_PTR(var, l, IV, char)
328 #  define DO_BO_PACK_PC(var)	DO_BO_PACK_PTR(var, l, IV, char)
329 # else
330 #  define DO_BO_UNPACK_P(var)	BO_CANT_DOIT(unpack, pointer)
331 #  define DO_BO_PACK_P(var)	BO_CANT_DOIT(pack, pointer)
332 #  define DO_BO_UNPACK_PC(var)	BO_CANT_DOIT(unpack, pointer)
333 #  define DO_BO_PACK_PC(var)	BO_CANT_DOIT(pack, pointer)
334 # endif
335 
336 # if defined(my_htolen) && defined(my_letohn) && \
337     defined(my_htoben) && defined(my_betohn)
338 #  define DO_BO_UNPACK_N(var, type)                                           \
339          STMT_START {                                                         \
340            switch (TYPE_ENDIANNESS(datumtype)) {                              \
341              case TYPE_IS_BIG_ENDIAN:    my_betohn(&var, sizeof(type)); break;\
342              case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
343              default: break;                                                  \
344            }                                                                  \
345          } STMT_END
346 
347 #  define DO_BO_PACK_N(var, type)                                             \
348          STMT_START {                                                         \
349            switch (TYPE_ENDIANNESS(datumtype)) {                              \
350              case TYPE_IS_BIG_ENDIAN:    my_htoben(&var, sizeof(type)); break;\
351              case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
352              default: break;                                                  \
353            }                                                                  \
354          } STMT_END
355 # else
356 #  define DO_BO_UNPACK_N(var, type)	BO_CANT_DOIT(unpack, type)
357 #  define DO_BO_PACK_N(var, type)	BO_CANT_DOIT(pack, type)
358 # endif
359 
360 #endif /* PERL_PACK_CAN_BYTEORDER */
361 
362 #define PACK_SIZE_CANNOT_CSUM		0x80
363 #define PACK_SIZE_UNPREDICTABLE		0x40	/* Not a fixed size element */
364 #define PACK_SIZE_MASK			0x3F
365 
366 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
367    in).  You're unlikely ever to need to regenerate them.  */
368 
369 #if TYPE_IS_SHRIEKING != 0x100
370    ++++shriek offset should be 256
371 #endif
372 
373 typedef U8 packprops_t;
374 #if 'J'-'I' == 1
375 /* ASCII */
376 STATIC const packprops_t packprops[512] = {
377     /* normal */
378     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
379     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382     0, 0, 0,
383     /* C */ sizeof(unsigned char),
384 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
385     /* D */ LONG_DOUBLESIZE,
386 #else
387     0,
388 #endif
389     0,
390     /* F */ NVSIZE,
391     0, 0,
392     /* I */ sizeof(unsigned int),
393     /* J */ UVSIZE,
394     0,
395     /* L */ SIZE32,
396     0,
397     /* N */ SIZE32,
398     0, 0,
399 #if defined(HAS_QUAD)
400     /* Q */ sizeof(Uquad_t),
401 #else
402     0,
403 #endif
404     0,
405     /* S */ SIZE16,
406     0,
407     /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
408     /* V */ SIZE32,
409     /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
410     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
411     /* c */ sizeof(char),
412     /* d */ sizeof(double),
413     0,
414     /* f */ sizeof(float),
415     0, 0,
416     /* i */ sizeof(int),
417     /* j */ IVSIZE,
418     0,
419     /* l */ SIZE32,
420     0,
421     /* n */ SIZE16,
422     0,
423     /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
424 #if defined(HAS_QUAD)
425     /* q */ sizeof(Quad_t),
426 #else
427     0,
428 #endif
429     0,
430     /* s */ SIZE16,
431     0, 0,
432     /* v */ SIZE16,
433     /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
434     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
435     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
438     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442     0, 0, 0, 0, 0, 0, 0, 0,
443     /* shrieking */
444     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
445     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448     0, 0, 0, 0, 0, 0, 0, 0, 0,
449     /* I */ sizeof(unsigned int),
450     0, 0,
451     /* L */ sizeof(unsigned long),
452     0,
453 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
454     /* N */ SIZE32,
455 #else
456     0,
457 #endif
458     0, 0, 0, 0,
459     /* S */ sizeof(unsigned short),
460     0, 0,
461 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
462     /* V */ SIZE32,
463 #else
464     0,
465 #endif
466     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467     0, 0,
468     /* i */ sizeof(int),
469     0, 0,
470     /* l */ sizeof(long),
471     0,
472 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
473     /* n */ SIZE16,
474 #else
475     0,
476 #endif
477     0, 0, 0, 0,
478     /* s */ sizeof(short),
479     0, 0,
480 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
481     /* v */ SIZE16,
482 #else
483     0,
484 #endif
485     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
486     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493     0, 0, 0, 0, 0, 0, 0, 0, 0
494 };
495 #else
496 /* EBCDIC (or bust) */
497 STATIC const packprops_t packprops[512] = {
498     /* normal */
499     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
503     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507     0, 0, 0,
508     /* c */ sizeof(char),
509     /* d */ sizeof(double),
510     0,
511     /* f */ sizeof(float),
512     0, 0,
513     /* i */ sizeof(int),
514     0, 0, 0, 0, 0, 0, 0,
515     /* j */ IVSIZE,
516     0,
517     /* l */ SIZE32,
518     0,
519     /* n */ SIZE16,
520     0,
521     /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
522 #if defined(HAS_QUAD)
523     /* q */ sizeof(Quad_t),
524 #else
525     0,
526 #endif
527     0, 0, 0, 0, 0, 0, 0, 0, 0,
528     /* s */ SIZE16,
529     0, 0,
530     /* v */ SIZE16,
531     /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
532     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
533     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
534     /* C */ sizeof(unsigned char),
535 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
536     /* D */ LONG_DOUBLESIZE,
537 #else
538     0,
539 #endif
540     0,
541     /* F */ NVSIZE,
542     0, 0,
543     /* I */ sizeof(unsigned int),
544     0, 0, 0, 0, 0, 0, 0,
545     /* J */ UVSIZE,
546     0,
547     /* L */ SIZE32,
548     0,
549     /* N */ SIZE32,
550     0, 0,
551 #if defined(HAS_QUAD)
552     /* Q */ sizeof(Uquad_t),
553 #else
554     0,
555 #endif
556     0, 0, 0, 0, 0, 0, 0, 0, 0,
557     /* S */ SIZE16,
558     0,
559     /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
560     /* V */ SIZE32,
561     /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
562     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563     0, 0, 0, 0, 0, 0, 0, 0, 0,
564     /* shrieking */
565     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573     0, 0, 0, 0, 0, 0, 0, 0, 0,
574     /* i */ sizeof(int),
575     0, 0, 0, 0, 0, 0, 0, 0, 0,
576     /* l */ sizeof(long),
577     0,
578 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
579     /* n */ SIZE16,
580 #else
581     0,
582 #endif
583     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584     /* s */ sizeof(short),
585     0, 0,
586 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
587     /* v */ SIZE16,
588 #else
589     0,
590 #endif
591     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
592     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593     0, 0, 0,
594     /* I */ sizeof(unsigned int),
595     0, 0, 0, 0, 0, 0, 0, 0, 0,
596     /* L */ sizeof(unsigned long),
597     0,
598 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
599     /* N */ SIZE32,
600 #else
601     0,
602 #endif
603     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
604     /* S */ sizeof(unsigned short),
605     0, 0,
606 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
607     /* V */ SIZE32,
608 #else
609     0,
610 #endif
611     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
612     0, 0, 0, 0, 0, 0, 0, 0, 0, 0
613 };
614 #endif
615 
616 STATIC U8
617 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
618 {
619     STRLEN retlen;
620     UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
621 			 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
622     /* We try to process malformed UTF-8 as much as possible (preferrably with
623        warnings), but these two mean we make no progress in the string and
624        might enter an infinite loop */
625     if (retlen == (STRLEN) -1 || retlen == 0)
626 	Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
627 		   (int) TYPE_NO_MODIFIERS(datumtype));
628     if (val >= 0x100) {
629 	if (ckWARN(WARN_UNPACK))
630 	Perl_warner(aTHX_ packWARN(WARN_UNPACK),
631 		    "Character in '%c' format wrapped in unpack",
632 		    (int) TYPE_NO_MODIFIERS(datumtype));
633 	val &= 0xff;
634     }
635     *s += retlen;
636     return (U8)val;
637 }
638 
639 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
640 	uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
641 	*(U8 *)(s)++)
642 
643 STATIC bool
644 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
645 {
646     UV val;
647     STRLEN retlen;
648     const char *from = *s;
649     int bad = 0;
650     const U32 flags = ckWARN(WARN_UTF8) ?
651 	UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
652     for (;buf_len > 0; buf_len--) {
653 	if (from >= end) return FALSE;
654 	val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
655 	if (retlen == (STRLEN) -1 || retlen == 0) {
656 	    from += UTF8SKIP(from);
657 	    bad |= 1;
658 	} else from += retlen;
659 	if (val >= 0x100) {
660 	    bad |= 2;
661 	    val &= 0xff;
662 	}
663 	*(U8 *)buf++ = (U8)val;
664     }
665     /* We have enough characters for the buffer. Did we have problems ? */
666     if (bad) {
667 	if (bad & 1) {
668 	    /* Rewalk the string fragment while warning */
669 	    const char *ptr;
670 	    const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
671 	    for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
672 		if (ptr >= end) break;
673 		utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
674 	    }
675 	    if (from > end) from = end;
676 	}
677 	if ((bad & 2) && ckWARN(WARN_UNPACK))
678 	    Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
679 				       WARN_PACK : WARN_UNPACK),
680 			"Character(s) in '%c' format wrapped in %s",
681 			(int) TYPE_NO_MODIFIERS(datumtype),
682 			datumtype & TYPE_IS_PACK ? "pack" : "unpack");
683     }
684     *s = from;
685     return TRUE;
686 }
687 
688 STATIC bool
689 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
690 {
691     dVAR;
692     STRLEN retlen;
693     const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
694     if (val >= 0x100 || !ISUUCHAR(val) ||
695 	retlen == (STRLEN) -1 || retlen == 0) {
696 	*out = 0;
697 	return FALSE;
698     }
699     *out = PL_uudmap[val] & 077;
700     *s += retlen;
701     return TRUE;
702 }
703 
704 STATIC char *
705 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
706     const U8 * const end = start + len;
707 
708     while (start < end) {
709 	const UV uv = NATIVE_TO_ASCII(*start);
710 	if (UNI_IS_INVARIANT(uv))
711 	    *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
712 	else {
713 	    *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
714 	    *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
715 	}
716 	start++;
717     }
718     return dest;
719 }
720 
721 #define PUSH_BYTES(utf8, cur, buf, len)				\
722 STMT_START {							\
723     if (utf8)							\
724 	(cur) = bytes_to_uni((U8 *) buf, len, (cur));		\
725     else {							\
726 	Copy(buf, cur, len, char);				\
727 	(cur) += (len);						\
728     }								\
729 } STMT_END
730 
731 #define GROWING(utf8, cat, start, cur, in_len)	\
732 STMT_START {					\
733     STRLEN glen = (in_len);			\
734     if (utf8) glen *= UTF8_EXPAND;		\
735     if ((cur) + glen >= (start) + SvLEN(cat)) {	\
736 	(start) = sv_exp_grow(cat, glen);	\
737 	(cur) = (start) + SvCUR(cat);		\
738     }						\
739 } STMT_END
740 
741 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
742 STMT_START {					\
743     const STRLEN glen = (in_len);		\
744     STRLEN gl = glen;				\
745     if (utf8) gl *= UTF8_EXPAND;		\
746     if ((cur) + gl >= (start) + SvLEN(cat)) {	\
747         *cur = '\0';				\
748         SvCUR_set((cat), (cur) - (start));	\
749 	(start) = sv_exp_grow(cat, gl);		\
750 	(cur) = (start) + SvCUR(cat);		\
751     }						\
752     PUSH_BYTES(utf8, cur, buf, glen);		\
753 } STMT_END
754 
755 #define PUSH_BYTE(utf8, s, byte)		\
756 STMT_START {					\
757     if (utf8) {					\
758 	const U8 au8 = (byte);			\
759 	(s) = bytes_to_uni(&au8, 1, (s));	\
760     } else *(U8 *)(s)++ = (byte);		\
761 } STMT_END
762 
763 /* Only to be used inside a loop (see the break) */
764 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags)		\
765 STMT_START {							\
766     STRLEN retlen;						\
767     if (str >= end) break;					\
768     val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags);	\
769     if (retlen == (STRLEN) -1 || retlen == 0) {			\
770 	*cur = '\0';						\
771 	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");	\
772     }								\
773     str += retlen;						\
774 } STMT_END
775 
776 static const char *_action( const tempsym_t* symptr )
777 {
778     return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
779 }
780 
781 /* Returns the sizeof() struct described by pat */
782 STATIC I32
783 S_measure_struct(pTHX_ tempsym_t* symptr)
784 {
785     I32 total = 0;
786 
787     while (next_symbol(symptr)) {
788 	I32 len;
789 	int size;
790 
791         switch (symptr->howlen) {
792 	  case e_star:
793    	    Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
794                         _action( symptr ) );
795             break;
796 	  default:
797 	    /* e_no_len and e_number */
798 	    len = symptr->length;
799 	    break;
800         }
801 
802 	size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
803 	if (!size) {
804             int star;
805 	    /* endianness doesn't influence the size of a type */
806 	    switch(TYPE_NO_ENDIANNESS(symptr->code)) {
807 	    default:
808 		Perl_croak(aTHX_ "Invalid type '%c' in %s",
809 			   (int)TYPE_NO_MODIFIERS(symptr->code),
810                            _action( symptr ) );
811 #ifdef PERL_PACK_CAN_SHRIEKSIGN
812 	    case '.' | TYPE_IS_SHRIEKING:
813 	    case '@' | TYPE_IS_SHRIEKING:
814 #endif
815 	    case '@':
816 	    case '.':
817 	    case '/':
818 	    case 'U':			/* XXXX Is it correct? */
819 	    case 'w':
820 	    case 'u':
821 		Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
822 			   (int) TYPE_NO_MODIFIERS(symptr->code),
823                            _action( symptr ) );
824 	    case '%':
825 		size = 0;
826 		break;
827 	    case '(':
828 	    {
829 		tempsym_t savsym = *symptr;
830 		symptr->patptr = savsym.grpbeg;
831 		symptr->patend = savsym.grpend;
832 		/* XXXX Theoretically, we need to measure many times at
833 		   different positions, since the subexpression may contain
834 		   alignment commands, but be not of aligned length.
835 		   Need to detect this and croak().  */
836 		size = measure_struct(symptr);
837 		*symptr = savsym;
838 		break;
839 	    }
840 	    case 'X' | TYPE_IS_SHRIEKING:
841 		/* XXXX Is this useful?  Then need to treat MEASURE_BACKWARDS.
842 		 */
843 		if (!len)		/* Avoid division by 0 */
844 		    len = 1;
845 		len = total % len;	/* Assumed: the start is aligned. */
846 		/* FALL THROUGH */
847 	    case 'X':
848 		size = -1;
849 		if (total < len)
850                     Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
851 		break;
852 	    case 'x' | TYPE_IS_SHRIEKING:
853 		if (!len)		/* Avoid division by 0 */
854 		    len = 1;
855 		star = total % len;	/* Assumed: the start is aligned. */
856 		if (star)		/* Other portable ways? */
857 		    len = len - star;
858 		else
859 		    len = 0;
860 		/* FALL THROUGH */
861 	    case 'x':
862 	    case 'A':
863 	    case 'Z':
864 	    case 'a':
865 		size = 1;
866 		break;
867 	    case 'B':
868 	    case 'b':
869 		len = (len + 7)/8;
870 		size = 1;
871 		break;
872 	    case 'H':
873 	    case 'h':
874 		len = (len + 1)/2;
875 		size = 1;
876 		break;
877 
878 	    case 'P':
879 		len = 1;
880 		size = sizeof(char*);
881 		break;
882 	    }
883 	}
884 	total += len * size;
885     }
886     return total;
887 }
888 
889 
890 /* locate matching closing parenthesis or bracket
891  * returns char pointer to char after match, or NULL
892  */
893 STATIC const char *
894 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
895 {
896     while (patptr < patend) {
897 	const char c = *patptr++;
898 
899 	if (isSPACE(c))
900 	    continue;
901 	else if (c == ender)
902 	    return patptr-1;
903 	else if (c == '#') {
904 	    while (patptr < patend && *patptr != '\n')
905 		patptr++;
906 	    continue;
907 	} else if (c == '(')
908 	    patptr = group_end(patptr, patend, ')') + 1;
909 	else if (c == '[')
910 	    patptr = group_end(patptr, patend, ']') + 1;
911     }
912     Perl_croak(aTHX_ "No group ending character '%c' found in template",
913                ender);
914     return 0;
915 }
916 
917 
918 /* Convert unsigned decimal number to binary.
919  * Expects a pointer to the first digit and address of length variable
920  * Advances char pointer to 1st non-digit char and returns number
921  */
922 STATIC const char *
923 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
924 {
925   I32 len = *patptr++ - '0';
926   while (isDIGIT(*patptr)) {
927     if (len >= 0x7FFFFFFF/10)
928       Perl_croak(aTHX_ "pack/unpack repeat count overflow");
929     len = (len * 10) + (*patptr++ - '0');
930   }
931   *lenptr = len;
932   return patptr;
933 }
934 
935 /* The marvellous template parsing routine: Using state stored in *symptr,
936  * locates next template code and count
937  */
938 STATIC bool
939 S_next_symbol(pTHX_ tempsym_t* symptr )
940 {
941   const char* patptr = symptr->patptr;
942   const char* const patend = symptr->patend;
943 
944   symptr->flags &= ~FLAG_SLASH;
945 
946   while (patptr < patend) {
947     if (isSPACE(*patptr))
948       patptr++;
949     else if (*patptr == '#') {
950       patptr++;
951       while (patptr < patend && *patptr != '\n')
952 	patptr++;
953       if (patptr < patend)
954 	patptr++;
955     } else {
956       /* We should have found a template code */
957       I32 code = *patptr++ & 0xFF;
958       U32 inherited_modifiers = 0;
959 
960       if (code == ','){ /* grandfather in commas but with a warning */
961 	if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
962           symptr->flags |= FLAG_COMMA;
963 	  Perl_warner(aTHX_ packWARN(WARN_UNPACK),
964 	 	      "Invalid type ',' in %s", _action( symptr ) );
965         }
966 	continue;
967       }
968 
969       /* for '(', skip to ')' */
970       if (code == '(') {
971         if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
972           Perl_croak(aTHX_ "()-group starts with a count in %s",
973                         _action( symptr ) );
974         symptr->grpbeg = patptr;
975         patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
976         if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
977 	  Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
978                         _action( symptr ) );
979       }
980 
981       /* look for group modifiers to inherit */
982       if (TYPE_ENDIANNESS(symptr->flags)) {
983         if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
984           inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
985       }
986 
987       /* look for modifiers */
988       while (patptr < patend) {
989         const char *allowed;
990         I32 modifier;
991         switch (*patptr) {
992           case '!':
993             modifier = TYPE_IS_SHRIEKING;
994             allowed = SHRIEKING_ALLOWED_TYPES;
995             break;
996 #ifdef PERL_PACK_CAN_BYTEORDER
997           case '>':
998             modifier = TYPE_IS_BIG_ENDIAN;
999             allowed = ENDIANNESS_ALLOWED_TYPES;
1000             break;
1001           case '<':
1002             modifier = TYPE_IS_LITTLE_ENDIAN;
1003             allowed = ENDIANNESS_ALLOWED_TYPES;
1004             break;
1005 #endif /* PERL_PACK_CAN_BYTEORDER */
1006           default:
1007             allowed = "";
1008             modifier = 0;
1009             break;
1010         }
1011 
1012         if (modifier == 0)
1013           break;
1014 
1015         if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1016           Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1017                         allowed, _action( symptr ) );
1018 
1019         if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1020           Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1021                      (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1022         else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1023                  TYPE_ENDIANNESS_MASK)
1024           Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1025                      *patptr, _action( symptr ) );
1026 
1027         if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1028 	    Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1029                         "Duplicate modifier '%c' after '%c' in %s",
1030                         *patptr, (int) TYPE_NO_MODIFIERS(code),
1031                         _action( symptr ) );
1032         }
1033 
1034         code |= modifier;
1035         patptr++;
1036       }
1037 
1038       /* inherit modifiers */
1039       code |= inherited_modifiers;
1040 
1041       /* look for count and/or / */
1042       if (patptr < patend) {
1043 	if (isDIGIT(*patptr)) {
1044  	  patptr = get_num( patptr, &symptr->length );
1045           symptr->howlen = e_number;
1046 
1047         } else if (*patptr == '*') {
1048           patptr++;
1049           symptr->howlen = e_star;
1050 
1051         } else if (*patptr == '[') {
1052           const char* lenptr = ++patptr;
1053           symptr->howlen = e_number;
1054           patptr = group_end( patptr, patend, ']' ) + 1;
1055           /* what kind of [] is it? */
1056           if (isDIGIT(*lenptr)) {
1057             lenptr = get_num( lenptr, &symptr->length );
1058             if( *lenptr != ']' )
1059               Perl_croak(aTHX_ "Malformed integer in [] in %s",
1060                             _action( symptr ) );
1061           } else {
1062             tempsym_t savsym = *symptr;
1063             symptr->patend = patptr-1;
1064             symptr->patptr = lenptr;
1065             savsym.length = measure_struct(symptr);
1066             *symptr = savsym;
1067           }
1068         } else {
1069           symptr->howlen = e_no_len;
1070           symptr->length = 1;
1071         }
1072 
1073         /* try to find / */
1074         while (patptr < patend) {
1075           if (isSPACE(*patptr))
1076             patptr++;
1077           else if (*patptr == '#') {
1078             patptr++;
1079             while (patptr < patend && *patptr != '\n')
1080 	      patptr++;
1081             if (patptr < patend)
1082 	      patptr++;
1083           } else {
1084             if (*patptr == '/') {
1085               symptr->flags |= FLAG_SLASH;
1086               patptr++;
1087               if (patptr < patend &&
1088                   (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1089                 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1090                             _action( symptr ) );
1091             }
1092             break;
1093 	  }
1094 	}
1095       } else {
1096         /* at end - no count, no / */
1097         symptr->howlen = e_no_len;
1098         symptr->length = 1;
1099       }
1100 
1101       symptr->code = code;
1102       symptr->patptr = patptr;
1103       return TRUE;
1104     }
1105   }
1106   symptr->patptr = patptr;
1107   return FALSE;
1108 }
1109 
1110 /*
1111    There is no way to cleanly handle the case where we should process the
1112    string per byte in its upgraded form while it's really in downgraded form
1113    (e.g. estimates like strend-s as an upper bound for the number of
1114    characters left wouldn't work). So if we foresee the need of this
1115    (pattern starts with U or contains U0), we want to work on the encoded
1116    version of the string. Users are advised to upgrade their pack string
1117    themselves if they need to do a lot of unpacks like this on it
1118 */
1119 STATIC bool
1120 need_utf8(const char *pat, const char *patend)
1121 {
1122     bool first = TRUE;
1123     while (pat < patend) {
1124 	if (pat[0] == '#') {
1125 	    pat++;
1126 	    pat = (const char *) memchr(pat, '\n', patend-pat);
1127 	    if (!pat) return FALSE;
1128 	} else if (pat[0] == 'U') {
1129 	    if (first || pat[1] == '0') return TRUE;
1130 	} else first = FALSE;
1131 	pat++;
1132     }
1133     return FALSE;
1134 }
1135 
1136 STATIC char
1137 first_symbol(const char *pat, const char *patend) {
1138     while (pat < patend) {
1139 	if (pat[0] != '#') return pat[0];
1140 	pat++;
1141 	pat = (const char *) memchr(pat, '\n', patend-pat);
1142 	if (!pat) return 0;
1143 	pat++;
1144     }
1145     return 0;
1146 }
1147 
1148 /*
1149 =for apidoc unpackstring
1150 
1151 The engine implementing unpack() Perl function. C<unpackstring> puts the
1152 extracted list items on the stack and returns the number of elements.
1153 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1154 
1155 =cut */
1156 
1157 I32
1158 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1159 {
1160     tempsym_t sym;
1161 
1162     if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1163     else if (need_utf8(pat, patend)) {
1164 	/* We probably should try to avoid this in case a scalar context call
1165 	   wouldn't get to the "U0" */
1166 	STRLEN len = strend - s;
1167 	s = (char *) bytes_to_utf8((U8 *) s, &len);
1168 	SAVEFREEPV(s);
1169 	strend = s + len;
1170 	flags |= FLAG_DO_UTF8;
1171     }
1172 
1173     if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1174 	flags |= FLAG_PARSE_UTF8;
1175 
1176     TEMPSYM_INIT(&sym, pat, patend, flags);
1177 
1178     return unpack_rec(&sym, s, s, strend, NULL );
1179 }
1180 
1181 STATIC I32
1182 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1183 {
1184     dVAR; dSP;
1185     SV *sv;
1186     const I32 start_sp_offset = SP - PL_stack_base;
1187     howlen_t howlen;
1188 
1189     I32 checksum = 0;
1190     UV cuv = 0;
1191     NV cdouble = 0.0;
1192     const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1193     bool beyond = FALSE;
1194     bool explicit_length;
1195     const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1196     bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1197     symptr->strbeg = s - strbeg;
1198 
1199     while (next_symbol(symptr)) {
1200 	packprops_t props;
1201 	I32 len;
1202         I32 datumtype = symptr->code;
1203 	/* do first one only unless in list context
1204 	   / is implemented by unpacking the count, then popping it from the
1205 	   stack, so must check that we're not in the middle of a /  */
1206         if ( unpack_only_one
1207 	     && (SP - PL_stack_base == start_sp_offset + 1)
1208 	     && (datumtype != '/') )   /* XXX can this be omitted */
1209             break;
1210 
1211         switch (howlen = symptr->howlen) {
1212 	  case e_star:
1213 	    len = strend - strbeg;	/* long enough */
1214 	    break;
1215 	  default:
1216 	    /* e_no_len and e_number */
1217 	    len = symptr->length;
1218 	    break;
1219         }
1220 
1221         explicit_length = TRUE;
1222       redo_switch:
1223         beyond = s >= strend;
1224 
1225 	props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1226 	if (props) {
1227 	    /* props nonzero means we can process this letter. */
1228             const long size = props & PACK_SIZE_MASK;
1229             const long howmany = (strend - s) / size;
1230 	    if (len > howmany)
1231 		len = howmany;
1232 
1233 	    if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1234 		if (len && unpack_only_one) len = 1;
1235 		EXTEND(SP, len);
1236 		EXTEND_MORTAL(len);
1237 	    }
1238 	}
1239 
1240 	switch(TYPE_NO_ENDIANNESS(datumtype)) {
1241 	default:
1242 	    Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1243 
1244 	case '%':
1245 	    if (howlen == e_no_len)
1246 		len = 16;		/* len is not specified */
1247 	    checksum = len;
1248 	    cuv = 0;
1249 	    cdouble = 0;
1250 	    continue;
1251 	    break;
1252 	case '(':
1253 	{
1254             tempsym_t savsym = *symptr;
1255             const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1256 	    symptr->flags |= group_modifiers;
1257             symptr->patend = savsym.grpend;
1258 	    symptr->previous = &savsym;
1259             symptr->level++;
1260 	    PUTBACK;
1261 	    while (len--) {
1262   	        symptr->patptr = savsym.grpbeg;
1263 		if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
1264 		else      symptr->flags &= ~FLAG_PARSE_UTF8;
1265  	        unpack_rec(symptr, s, strbeg, strend, &s);
1266                 if (s == strend && savsym.howlen == e_star)
1267 		    break; /* No way to continue */
1268 	    }
1269 	    SPAGAIN;
1270             savsym.flags = symptr->flags & ~group_modifiers;
1271             *symptr = savsym;
1272 	    break;
1273 	}
1274 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1275 	case '.' | TYPE_IS_SHRIEKING:
1276 #endif
1277 	case '.': {
1278 	    const char *from;
1279 	    SV *sv;
1280 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1281 	    const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1282 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1283 	    const bool u8 = utf8;
1284 #endif
1285 	    if (howlen == e_star) from = strbeg;
1286 	    else if (len <= 0) from = s;
1287 	    else {
1288 		tempsym_t *group = symptr;
1289 
1290 		while (--len && group) group = group->previous;
1291 		from = group ? strbeg + group->strbeg : strbeg;
1292 	    }
1293 	    sv = from <= s ?
1294 		newSVuv(  u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1295 		newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1296 	    XPUSHs(sv_2mortal(sv));
1297 	    break;
1298 	}
1299 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1300 	case '@' | TYPE_IS_SHRIEKING:
1301 #endif
1302 	case '@':
1303 	    s = strbeg + symptr->strbeg;
1304 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1305 	    if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
1306 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1307 	    if (utf8)
1308 #endif
1309 	    {
1310 		while (len > 0) {
1311 		    if (s >= strend)
1312 			Perl_croak(aTHX_ "'@' outside of string in unpack");
1313 		    s += UTF8SKIP(s);
1314 		    len--;
1315 		}
1316 		if (s > strend)
1317 		    Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1318 	    } else {
1319 		if (strend-s < len)
1320 		    Perl_croak(aTHX_ "'@' outside of string in unpack");
1321 		s += len;
1322 	    }
1323 	    break;
1324  	case 'X' | TYPE_IS_SHRIEKING:
1325  	    if (!len)			/* Avoid division by 0 */
1326  		len = 1;
1327 	    if (utf8) {
1328 		const char *hop, *last;
1329 		I32 l = len;
1330 		hop = last = strbeg;
1331 		while (hop < s) {
1332 		    hop += UTF8SKIP(hop);
1333 		    if (--l == 0) {
1334 			last = hop;
1335 			l = len;
1336 		    }
1337 		}
1338 		if (last > s)
1339 		    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1340 		s = last;
1341 		break;
1342 	    }
1343 	    len = (s - strbeg) % len;
1344  	    /* FALL THROUGH */
1345 	case 'X':
1346 	    if (utf8) {
1347 		while (len > 0) {
1348 		    if (s <= strbeg)
1349 			Perl_croak(aTHX_ "'X' outside of string in unpack");
1350 		    while (--s, UTF8_IS_CONTINUATION(*s)) {
1351 			if (s <= strbeg)
1352 			    Perl_croak(aTHX_ "'X' outside of string in unpack");
1353 		    }
1354 		    len--;
1355 		}
1356 	    } else {
1357 		if (len > s - strbeg)
1358 		    Perl_croak(aTHX_ "'X' outside of string in unpack" );
1359 		s -= len;
1360 	    }
1361 	    break;
1362  	case 'x' | TYPE_IS_SHRIEKING: {
1363             I32 ai32;
1364  	    if (!len)			/* Avoid division by 0 */
1365  		len = 1;
1366 	    if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1367 	    else      ai32 = (s - strbeg)                         % len;
1368 	    if (ai32 == 0) break;
1369 	    len -= ai32;
1370             }
1371  	    /* FALL THROUGH */
1372 	case 'x':
1373 	    if (utf8) {
1374 		while (len>0) {
1375 		    if (s >= strend)
1376 			Perl_croak(aTHX_ "'x' outside of string in unpack");
1377 		    s += UTF8SKIP(s);
1378 		    len--;
1379 		}
1380 	    } else {
1381 		if (len > strend - s)
1382 		    Perl_croak(aTHX_ "'x' outside of string in unpack");
1383 		s += len;
1384 	    }
1385 	    break;
1386 	case '/':
1387 	    Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1388             break;
1389 	case 'A':
1390 	case 'Z':
1391 	case 'a':
1392 	    if (checksum) {
1393 		/* Preliminary length estimate is assumed done in 'W' */
1394 		if (len > strend - s) len = strend - s;
1395 		goto W_checksum;
1396 	    }
1397 	    if (utf8) {
1398 		I32 l;
1399 		const char *hop;
1400 		for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1401 		    if (hop >= strend) {
1402 			if (hop > strend)
1403 			    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1404 			break;
1405 		    }
1406 		}
1407 		if (hop > strend)
1408 		    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1409 		len = hop - s;
1410 	    } else if (len > strend - s)
1411 		len = strend - s;
1412 
1413 	    if (datumtype == 'Z') {
1414 		/* 'Z' strips stuff after first null */
1415 		const char *ptr, *end;
1416 		end = s + len;
1417 		for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1418 		sv = newSVpvn(s, ptr-s);
1419 		if (howlen == e_star) /* exact for 'Z*' */
1420 		    len = ptr-s + (ptr != strend ? 1 : 0);
1421 	    } else if (datumtype == 'A') {
1422 		/* 'A' strips both nulls and spaces */
1423 		const char *ptr;
1424 		if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1425 		    for (ptr = s+len-1; ptr >= s; ptr--)
1426 			if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1427 			    !is_utf8_space((U8 *) ptr)) break;
1428 		    if (ptr >= s) ptr += UTF8SKIP(ptr);
1429 		    else ptr++;
1430 		    if (ptr > s+len)
1431 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1432 		} else {
1433 		    for (ptr = s+len-1; ptr >= s; ptr--)
1434 			if (*ptr != 0 && !isSPACE(*ptr)) break;
1435 		    ptr++;
1436 		}
1437 		sv = newSVpvn(s, ptr-s);
1438 	    } else sv = newSVpvn(s, len);
1439 
1440 	    if (utf8) {
1441 		SvUTF8_on(sv);
1442 		/* Undo any upgrade done due to need_utf8() */
1443 		if (!(symptr->flags & FLAG_WAS_UTF8))
1444 		    sv_utf8_downgrade(sv, 0);
1445 	    }
1446 	    XPUSHs(sv_2mortal(sv));
1447 	    s += len;
1448 	    break;
1449 	case 'B':
1450 	case 'b': {
1451 	    char *str;
1452 	    if (howlen == e_star || len > (strend - s) * 8)
1453 		len = (strend - s) * 8;
1454 	    if (checksum) {
1455 		if (!PL_bitcount) {
1456 		    int bits;
1457 		    Newxz(PL_bitcount, 256, char);
1458 		    for (bits = 1; bits < 256; bits++) {
1459 			if (bits & 1)	PL_bitcount[bits]++;
1460 			if (bits & 2)	PL_bitcount[bits]++;
1461 			if (bits & 4)	PL_bitcount[bits]++;
1462 			if (bits & 8)	PL_bitcount[bits]++;
1463 			if (bits & 16)	PL_bitcount[bits]++;
1464 			if (bits & 32)	PL_bitcount[bits]++;
1465 			if (bits & 64)	PL_bitcount[bits]++;
1466 			if (bits & 128)	PL_bitcount[bits]++;
1467 		    }
1468 		}
1469 		if (utf8)
1470 		    while (len >= 8 && s < strend) {
1471 			cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1472 			len -= 8;
1473 		    }
1474 		else
1475 		    while (len >= 8) {
1476 			cuv += PL_bitcount[*(U8 *)s++];
1477 			len -= 8;
1478 		    }
1479 		if (len && s < strend) {
1480 		    U8 bits;
1481 		    bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1482 		    if (datumtype == 'b')
1483 			while (len-- > 0) {
1484 			    if (bits & 1) cuv++;
1485 			    bits >>= 1;
1486 			}
1487 		    else
1488 			while (len-- > 0) {
1489 			    if (bits & 0x80) cuv++;
1490 			    bits <<= 1;
1491 			}
1492 		}
1493 		break;
1494 	    }
1495 
1496 	    sv = sv_2mortal(newSV(len ? len : 1));
1497 	    SvPOK_on(sv);
1498 	    str = SvPVX(sv);
1499 	    if (datumtype == 'b') {
1500 		U8 bits = 0;
1501 		const I32 ai32 = len;
1502 		for (len = 0; len < ai32; len++) {
1503 		    if (len & 7) bits >>= 1;
1504 		    else if (utf8) {
1505 			if (s >= strend) break;
1506 			bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1507 		    } else bits = *(U8 *) s++;
1508 		    *str++ = bits & 1 ? '1' : '0';
1509 		}
1510 	    } else {
1511 		U8 bits = 0;
1512 		const I32 ai32 = len;
1513 		for (len = 0; len < ai32; len++) {
1514 		    if (len & 7) bits <<= 1;
1515 		    else if (utf8) {
1516 			if (s >= strend) break;
1517 			bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1518 		    } else bits = *(U8 *) s++;
1519 		    *str++ = bits & 0x80 ? '1' : '0';
1520 		}
1521 	    }
1522 	    *str = '\0';
1523 	    SvCUR_set(sv, str - SvPVX_const(sv));
1524 	    XPUSHs(sv);
1525 	    break;
1526 	}
1527 	case 'H':
1528 	case 'h': {
1529 	    char *str;
1530 	    /* Preliminary length estimate, acceptable for utf8 too */
1531 	    if (howlen == e_star || len > (strend - s) * 2)
1532 		len = (strend - s) * 2;
1533 	    sv = sv_2mortal(newSV(len ? len : 1));
1534 	    SvPOK_on(sv);
1535 	    str = SvPVX(sv);
1536 	    if (datumtype == 'h') {
1537 		U8 bits = 0;
1538 		I32 ai32 = len;
1539 		for (len = 0; len < ai32; len++) {
1540 		    if (len & 1) bits >>= 4;
1541 		    else if (utf8) {
1542 			if (s >= strend) break;
1543 			bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1544 		    } else bits = * (U8 *) s++;
1545 		    *str++ = PL_hexdigit[bits & 15];
1546 		}
1547 	    } else {
1548 		U8 bits = 0;
1549 		const I32 ai32 = len;
1550 		for (len = 0; len < ai32; len++) {
1551 		    if (len & 1) bits <<= 4;
1552 		    else if (utf8) {
1553 			if (s >= strend) break;
1554 			bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1555 		    } else bits = *(U8 *) s++;
1556 		    *str++ = PL_hexdigit[(bits >> 4) & 15];
1557 		}
1558 	    }
1559 	    *str = '\0';
1560 	    SvCUR_set(sv, str - SvPVX_const(sv));
1561 	    XPUSHs(sv);
1562 	    break;
1563 	}
1564 	case 'C':
1565             if (len == 0) {
1566                 if (explicit_length)
1567 		    /* Switch to "character" mode */
1568 		    utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1569 		break;
1570 	    }
1571 	    /* FALL THROUGH */
1572 	case 'c':
1573 	    while (len-- > 0 && s < strend) {
1574 		int aint;
1575 		if (utf8)
1576 		  {
1577 		    STRLEN retlen;
1578 		    aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1579 				 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1580 		    if (retlen == (STRLEN) -1 || retlen == 0)
1581 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1582 		    s += retlen;
1583 		  }
1584 		else
1585 		  aint = *(U8 *)(s)++;
1586 		if (aint >= 128 && datumtype != 'C')	/* fake up signed chars */
1587 		    aint -= 256;
1588 		if (!checksum)
1589 		    PUSHs(sv_2mortal(newSViv((IV)aint)));
1590 		else if (checksum > bits_in_uv)
1591 		    cdouble += (NV)aint;
1592 		else
1593 		    cuv += aint;
1594 	    }
1595 	    break;
1596 	case 'W':
1597 	  W_checksum:
1598 	    if (utf8) {
1599 		while (len-- > 0 && s < strend) {
1600 		    STRLEN retlen;
1601 		    const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1602 					 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1603 		    if (retlen == (STRLEN) -1 || retlen == 0)
1604 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1605 		    s += retlen;
1606 		    if (!checksum)
1607 			PUSHs(sv_2mortal(newSVuv((UV) val)));
1608 		    else if (checksum > bits_in_uv)
1609 			cdouble += (NV) val;
1610 		    else
1611 			cuv += val;
1612 		}
1613 	    } else if (!checksum)
1614 		while (len-- > 0) {
1615 		    const U8 ch = *(U8 *) s++;
1616 		    PUSHs(sv_2mortal(newSVuv((UV) ch)));
1617 	    }
1618 	    else if (checksum > bits_in_uv)
1619 		while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1620 	    else
1621 		while (len-- > 0) cuv += *(U8 *) s++;
1622 	    break;
1623 	case 'U':
1624 	    if (len == 0) {
1625                 if (explicit_length) {
1626 		    /* Switch to "bytes in UTF-8" mode */
1627 		    if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1628 		    else
1629 			/* Should be impossible due to the need_utf8() test */
1630 			Perl_croak(aTHX_ "U0 mode on a byte string");
1631 		}
1632 		break;
1633 	    }
1634 	    if (len > strend - s) len = strend - s;
1635 	    if (!checksum) {
1636 		if (len && unpack_only_one) len = 1;
1637 		EXTEND(SP, len);
1638 		EXTEND_MORTAL(len);
1639 	    }
1640 	    while (len-- > 0 && s < strend) {
1641 		STRLEN retlen;
1642 		UV auv;
1643 		if (utf8) {
1644 		    U8 result[UTF8_MAXLEN];
1645 		    const char *ptr = s;
1646 		    STRLEN len;
1647 		    /* Bug: warns about bad utf8 even if we are short on bytes
1648 		       and will break out of the loop */
1649 		    if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1650 				      'U'))
1651 			break;
1652 		    len = UTF8SKIP(result);
1653 		    if (!uni_to_bytes(aTHX_ &ptr, strend,
1654 				      (char *) &result[1], len-1, 'U')) break;
1655 		    auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1656 		    s = ptr;
1657 		} else {
1658 		    auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1659 		    if (retlen == (STRLEN) -1 || retlen == 0)
1660 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1661 		    s += retlen;
1662 		}
1663 		if (!checksum)
1664 		    PUSHs(sv_2mortal(newSVuv((UV) auv)));
1665 		else if (checksum > bits_in_uv)
1666 		    cdouble += (NV) auv;
1667 		else
1668 		    cuv += auv;
1669 	    }
1670 	    break;
1671 	case 's' | TYPE_IS_SHRIEKING:
1672 #if SHORTSIZE != SIZE16
1673 	    while (len-- > 0) {
1674 		short ashort;
1675 		SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1676 		DO_BO_UNPACK(ashort, s);
1677 		if (!checksum)
1678 		    PUSHs(sv_2mortal(newSViv((IV)ashort)));
1679 		else if (checksum > bits_in_uv)
1680 		    cdouble += (NV)ashort;
1681 		else
1682 		    cuv += ashort;
1683 	    }
1684 	    break;
1685 #else
1686 	    /* Fallthrough! */
1687 #endif
1688 	case 's':
1689 	    while (len-- > 0) {
1690 		I16 ai16;
1691 
1692 #if U16SIZE > SIZE16
1693 		ai16 = 0;
1694 #endif
1695 		SHIFT16(utf8, s, strend, &ai16, datumtype);
1696 		DO_BO_UNPACK(ai16, 16);
1697 #if U16SIZE > SIZE16
1698 		if (ai16 > 32767)
1699 		    ai16 -= 65536;
1700 #endif
1701 		if (!checksum)
1702 		    PUSHs(sv_2mortal(newSViv((IV)ai16)));
1703 		else if (checksum > bits_in_uv)
1704 		    cdouble += (NV)ai16;
1705 		else
1706 		    cuv += ai16;
1707 	    }
1708 	    break;
1709 	case 'S' | TYPE_IS_SHRIEKING:
1710 #if SHORTSIZE != SIZE16
1711 	    while (len-- > 0) {
1712 		unsigned short aushort;
1713 		SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1714 		DO_BO_UNPACK(aushort, s);
1715 		if (!checksum)
1716 		    PUSHs(sv_2mortal(newSVuv((UV) aushort)));
1717 		else if (checksum > bits_in_uv)
1718 		    cdouble += (NV)aushort;
1719 		else
1720 		    cuv += aushort;
1721 	    }
1722 	    break;
1723 #else
1724             /* Fallhrough! */
1725 #endif
1726 	case 'v':
1727 	case 'n':
1728 	case 'S':
1729 	    while (len-- > 0) {
1730 		U16 au16;
1731 #if U16SIZE > SIZE16
1732 		au16 = 0;
1733 #endif
1734 		SHIFT16(utf8, s, strend, &au16, datumtype);
1735 		DO_BO_UNPACK(au16, 16);
1736 #ifdef HAS_NTOHS
1737 		if (datumtype == 'n')
1738 		    au16 = PerlSock_ntohs(au16);
1739 #endif
1740 #ifdef HAS_VTOHS
1741 		if (datumtype == 'v')
1742 		    au16 = vtohs(au16);
1743 #endif
1744 		if (!checksum)
1745 		    PUSHs(sv_2mortal(newSVuv((UV)au16)));
1746 		else if (checksum > bits_in_uv)
1747 		    cdouble += (NV) au16;
1748 		else
1749 		    cuv += au16;
1750 	    }
1751 	    break;
1752 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1753 	case 'v' | TYPE_IS_SHRIEKING:
1754 	case 'n' | TYPE_IS_SHRIEKING:
1755 	    while (len-- > 0) {
1756 		I16 ai16;
1757 # if U16SIZE > SIZE16
1758 		ai16 = 0;
1759 # endif
1760 		SHIFT16(utf8, s, strend, &ai16, datumtype);
1761 # ifdef HAS_NTOHS
1762 		if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1763 		    ai16 = (I16) PerlSock_ntohs((U16) ai16);
1764 # endif /* HAS_NTOHS */
1765 # ifdef HAS_VTOHS
1766 		if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1767 		    ai16 = (I16) vtohs((U16) ai16);
1768 # endif /* HAS_VTOHS */
1769 		if (!checksum)
1770 		    PUSHs(sv_2mortal(newSViv((IV)ai16)));
1771 		else if (checksum > bits_in_uv)
1772 		    cdouble += (NV) ai16;
1773 		else
1774 		    cuv += ai16;
1775 	    }
1776 	    break;
1777 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1778 	case 'i':
1779 	case 'i' | TYPE_IS_SHRIEKING:
1780 	    while (len-- > 0) {
1781 		int aint;
1782 		SHIFT_VAR(utf8, s, strend, aint, datumtype);
1783 		DO_BO_UNPACK(aint, i);
1784 		if (!checksum)
1785 		    PUSHs(sv_2mortal(newSViv((IV)aint)));
1786 		else if (checksum > bits_in_uv)
1787 		    cdouble += (NV)aint;
1788 		else
1789 		    cuv += aint;
1790 	    }
1791 	    break;
1792 	case 'I':
1793 	case 'I' | TYPE_IS_SHRIEKING:
1794 	    while (len-- > 0) {
1795 		unsigned int auint;
1796 		SHIFT_VAR(utf8, s, strend, auint, datumtype);
1797 		DO_BO_UNPACK(auint, i);
1798 		if (!checksum)
1799 		    PUSHs(sv_2mortal(newSVuv((UV)auint)));
1800 		else if (checksum > bits_in_uv)
1801 		    cdouble += (NV)auint;
1802 		else
1803 		    cuv += auint;
1804 	    }
1805 	    break;
1806 	case 'j':
1807 	    while (len-- > 0) {
1808 		IV aiv;
1809 		SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1810 #if IVSIZE == INTSIZE
1811 		DO_BO_UNPACK(aiv, i);
1812 #elif IVSIZE == LONGSIZE
1813 		DO_BO_UNPACK(aiv, l);
1814 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1815 		DO_BO_UNPACK(aiv, 64);
1816 #else
1817 		Perl_croak(aTHX_ "'j' not supported on this platform");
1818 #endif
1819 		if (!checksum)
1820 		    PUSHs(sv_2mortal(newSViv(aiv)));
1821 		else if (checksum > bits_in_uv)
1822 		    cdouble += (NV)aiv;
1823 		else
1824 		    cuv += aiv;
1825 	    }
1826 	    break;
1827 	case 'J':
1828 	    while (len-- > 0) {
1829 		UV auv;
1830 		SHIFT_VAR(utf8, s, strend, auv, datumtype);
1831 #if IVSIZE == INTSIZE
1832 		DO_BO_UNPACK(auv, i);
1833 #elif IVSIZE == LONGSIZE
1834 		DO_BO_UNPACK(auv, l);
1835 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1836 		DO_BO_UNPACK(auv, 64);
1837 #else
1838 		Perl_croak(aTHX_ "'J' not supported on this platform");
1839 #endif
1840 		if (!checksum)
1841 		    PUSHs(sv_2mortal(newSVuv(auv)));
1842 		else if (checksum > bits_in_uv)
1843 		    cdouble += (NV)auv;
1844 		else
1845 		    cuv += auv;
1846 	    }
1847 	    break;
1848 	case 'l' | TYPE_IS_SHRIEKING:
1849 #if LONGSIZE != SIZE32
1850 	    while (len-- > 0) {
1851 		long along;
1852 		SHIFT_VAR(utf8, s, strend, along, datumtype);
1853 		DO_BO_UNPACK(along, l);
1854 		if (!checksum)
1855 		    PUSHs(sv_2mortal(newSViv((IV)along)));
1856 		else if (checksum > bits_in_uv)
1857 		    cdouble += (NV)along;
1858 		else
1859 		    cuv += along;
1860 	    }
1861 	    break;
1862 #else
1863 	    /* Fallthrough! */
1864 #endif
1865 	case 'l':
1866 	    while (len-- > 0) {
1867 		I32 ai32;
1868 #if U32SIZE > SIZE32
1869 		ai32 = 0;
1870 #endif
1871 		SHIFT32(utf8, s, strend, &ai32, datumtype);
1872 		DO_BO_UNPACK(ai32, 32);
1873 #if U32SIZE > SIZE32
1874 		if (ai32 > 2147483647) ai32 -= 4294967296;
1875 #endif
1876 		if (!checksum)
1877 		    PUSHs(sv_2mortal(newSViv((IV)ai32)));
1878 		else if (checksum > bits_in_uv)
1879 		    cdouble += (NV)ai32;
1880 		else
1881 		    cuv += ai32;
1882 	    }
1883 	    break;
1884 	case 'L' | TYPE_IS_SHRIEKING:
1885 #if LONGSIZE != SIZE32
1886 	    while (len-- > 0) {
1887 		unsigned long aulong;
1888 		SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1889 		DO_BO_UNPACK(aulong, l);
1890 		if (!checksum)
1891 		    PUSHs(sv_2mortal(newSVuv((UV)aulong)));
1892 		else if (checksum > bits_in_uv)
1893 		    cdouble += (NV)aulong;
1894 		else
1895 		    cuv += aulong;
1896 	    }
1897 	    break;
1898 #else
1899             /* Fall through! */
1900 #endif
1901 	case 'V':
1902 	case 'N':
1903 	case 'L':
1904 	    while (len-- > 0) {
1905 		U32 au32;
1906 #if U32SIZE > SIZE32
1907 		au32 = 0;
1908 #endif
1909 		SHIFT32(utf8, s, strend, &au32, datumtype);
1910 		DO_BO_UNPACK(au32, 32);
1911 #ifdef HAS_NTOHL
1912 		if (datumtype == 'N')
1913 		    au32 = PerlSock_ntohl(au32);
1914 #endif
1915 #ifdef HAS_VTOHL
1916 		if (datumtype == 'V')
1917 		    au32 = vtohl(au32);
1918 #endif
1919 		if (!checksum)
1920 		    PUSHs(sv_2mortal(newSVuv((UV)au32)));
1921 		else if (checksum > bits_in_uv)
1922 		    cdouble += (NV)au32;
1923 		else
1924 		    cuv += au32;
1925 	    }
1926 	    break;
1927 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1928 	case 'V' | TYPE_IS_SHRIEKING:
1929 	case 'N' | TYPE_IS_SHRIEKING:
1930 	    while (len-- > 0) {
1931 		I32 ai32;
1932 # if U32SIZE > SIZE32
1933 		ai32 = 0;
1934 # endif
1935 		SHIFT32(utf8, s, strend, &ai32, datumtype);
1936 # ifdef HAS_NTOHL
1937 		if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1938 		    ai32 = (I32)PerlSock_ntohl((U32)ai32);
1939 # endif
1940 # ifdef HAS_VTOHL
1941 		if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1942 		    ai32 = (I32)vtohl((U32)ai32);
1943 # endif
1944 		if (!checksum)
1945 		    PUSHs(sv_2mortal(newSViv((IV)ai32)));
1946 		else if (checksum > bits_in_uv)
1947 		    cdouble += (NV)ai32;
1948 		else
1949 		    cuv += ai32;
1950 	    }
1951 	    break;
1952 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1953 	case 'p':
1954 	    while (len-- > 0) {
1955 		const char *aptr;
1956 		SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1957 		DO_BO_UNPACK_PC(aptr);
1958 		/* newSVpv generates undef if aptr is NULL */
1959 		PUSHs(sv_2mortal(newSVpv(aptr, 0)));
1960 	    }
1961 	    break;
1962 	case 'w':
1963 	    {
1964 		UV auv = 0;
1965 		U32 bytes = 0;
1966 
1967 		while (len > 0 && s < strend) {
1968 		    U8 ch;
1969 		    ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1970 		    auv = (auv << 7) | (ch & 0x7f);
1971 		    /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1972 		    if (ch < 0x80) {
1973 			bytes = 0;
1974 			PUSHs(sv_2mortal(newSVuv(auv)));
1975 			len--;
1976 			auv = 0;
1977 			continue;
1978 		    }
1979 		    if (++bytes >= sizeof(UV)) {	/* promote to string */
1980 			const char *t;
1981 
1982 			sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1983 			while (s < strend) {
1984 			    ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1985 			    sv = mul128(sv, (U8)(ch & 0x7f));
1986 			    if (!(ch & 0x80)) {
1987 				bytes = 0;
1988 				break;
1989 			    }
1990 			}
1991 			t = SvPV_nolen_const(sv);
1992 			while (*t == '0')
1993 			    t++;
1994 			sv_chop(sv, t);
1995 			PUSHs(sv_2mortal(sv));
1996 			len--;
1997 			auv = 0;
1998 		    }
1999 		}
2000 		if ((s >= strend) && bytes)
2001 		    Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2002 	    }
2003 	    break;
2004 	case 'P':
2005 	    if (symptr->howlen == e_star)
2006 	        Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2007 	    EXTEND(SP, 1);
2008 	    if (s + sizeof(char*) <= strend) {
2009 		char *aptr;
2010 		SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2011 		DO_BO_UNPACK_PC(aptr);
2012 		/* newSVpvn generates undef if aptr is NULL */
2013 		PUSHs(sv_2mortal(newSVpvn(aptr, len)));
2014 	    }
2015 	    break;
2016 #ifdef HAS_QUAD
2017 	case 'q':
2018 	    while (len-- > 0) {
2019 		Quad_t aquad;
2020 		SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2021 		DO_BO_UNPACK(aquad, 64);
2022 		if (!checksum)
2023                     PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
2024 				     newSViv((IV)aquad) : newSVnv((NV)aquad)));
2025 		else if (checksum > bits_in_uv)
2026 		    cdouble += (NV)aquad;
2027 		else
2028 		    cuv += aquad;
2029 	    }
2030 	    break;
2031 	case 'Q':
2032 	    while (len-- > 0) {
2033 		Uquad_t auquad;
2034 		SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2035 		DO_BO_UNPACK(auquad, 64);
2036 		if (!checksum)
2037 		    PUSHs(sv_2mortal(auquad <= UV_MAX ?
2038 				     newSVuv((UV)auquad):newSVnv((NV)auquad)));
2039 		else if (checksum > bits_in_uv)
2040 		    cdouble += (NV)auquad;
2041 		else
2042 		    cuv += auquad;
2043 	    }
2044 	    break;
2045 #endif /* HAS_QUAD */
2046 	/* float and double added gnb@melba.bby.oz.au 22/11/89 */
2047 	case 'f':
2048 	    while (len-- > 0) {
2049 		float afloat;
2050 		SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2051 		DO_BO_UNPACK_N(afloat, float);
2052 		if (!checksum)
2053 		    PUSHs(sv_2mortal(newSVnv((NV)afloat)));
2054 		else
2055 		    cdouble += afloat;
2056 	    }
2057 	    break;
2058 	case 'd':
2059 	    while (len-- > 0) {
2060 		double adouble;
2061 		SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2062 		DO_BO_UNPACK_N(adouble, double);
2063 		if (!checksum)
2064 		    PUSHs(sv_2mortal(newSVnv((NV)adouble)));
2065 		else
2066 		    cdouble += adouble;
2067 	    }
2068 	    break;
2069 	case 'F':
2070 	    while (len-- > 0) {
2071 		NV anv;
2072 		SHIFT_VAR(utf8, s, strend, anv, datumtype);
2073 		DO_BO_UNPACK_N(anv, NV);
2074 		if (!checksum)
2075 		    PUSHs(sv_2mortal(newSVnv(anv)));
2076 		else
2077 		    cdouble += anv;
2078 	    }
2079 	    break;
2080 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2081 	case 'D':
2082 	    while (len-- > 0) {
2083 		long double aldouble;
2084 		SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2085 		DO_BO_UNPACK_N(aldouble, long double);
2086 		if (!checksum)
2087 		    PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
2088 		else
2089 		    cdouble += aldouble;
2090 	    }
2091 	    break;
2092 #endif
2093 	case 'u':
2094 	    {
2095                 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2096 		sv = sv_2mortal(newSV(l));
2097 		if (l) SvPOK_on(sv);
2098 	    }
2099 	    if (utf8) {
2100 		while (next_uni_uu(aTHX_ &s, strend, &len)) {
2101 		    I32 a, b, c, d;
2102 		    char hunk[3];
2103 
2104 		    while (len > 0) {
2105 			next_uni_uu(aTHX_ &s, strend, &a);
2106 			next_uni_uu(aTHX_ &s, strend, &b);
2107 			next_uni_uu(aTHX_ &s, strend, &c);
2108 			next_uni_uu(aTHX_ &s, strend, &d);
2109 			hunk[0] = (char)((a << 2) | (b >> 4));
2110 			hunk[1] = (char)((b << 4) | (c >> 2));
2111 			hunk[2] = (char)((c << 6) | d);
2112 			sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2113 			len -= 3;
2114 		    }
2115 		    if (s < strend) {
2116 			if (*s == '\n') {
2117                             s++;
2118                         }
2119 			else {
2120 			    /* possible checksum byte */
2121 			    const char *skip = s+UTF8SKIP(s);
2122 			    if (skip < strend && *skip == '\n')
2123                                 s = skip+1;
2124 			}
2125 		    }
2126 		}
2127 	    } else {
2128 		while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2129 		    I32 a, b, c, d;
2130 		    char hunk[3];
2131 
2132 		    len = PL_uudmap[*(U8*)s++] & 077;
2133 		    while (len > 0) {
2134 			if (s < strend && ISUUCHAR(*s))
2135 			    a = PL_uudmap[*(U8*)s++] & 077;
2136 			else
2137 			    a = 0;
2138 			if (s < strend && ISUUCHAR(*s))
2139 			    b = PL_uudmap[*(U8*)s++] & 077;
2140 			else
2141 			    b = 0;
2142 			if (s < strend && ISUUCHAR(*s))
2143 			    c = PL_uudmap[*(U8*)s++] & 077;
2144 			else
2145 			    c = 0;
2146 			if (s < strend && ISUUCHAR(*s))
2147 			    d = PL_uudmap[*(U8*)s++] & 077;
2148 			else
2149 			    d = 0;
2150 			hunk[0] = (char)((a << 2) | (b >> 4));
2151 			hunk[1] = (char)((b << 4) | (c >> 2));
2152 			hunk[2] = (char)((c << 6) | d);
2153 			sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2154 			len -= 3;
2155 		    }
2156 		    if (*s == '\n')
2157 			s++;
2158 		    else	/* possible checksum byte */
2159 			if (s + 1 < strend && s[1] == '\n')
2160 			    s += 2;
2161 		}
2162 	    }
2163 	    XPUSHs(sv);
2164 	    break;
2165 	}
2166 
2167 	if (checksum) {
2168 	    if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2169 	      (checksum > bits_in_uv &&
2170 	       strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2171 		NV trouble, anv;
2172 
2173                 anv = (NV) (1 << (checksum & 15));
2174 		while (checksum >= 16) {
2175 		    checksum -= 16;
2176 		    anv *= 65536.0;
2177 		}
2178 		while (cdouble < 0.0)
2179 		    cdouble += anv;
2180 		cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2181 		sv = newSVnv(cdouble);
2182 	    }
2183 	    else {
2184 		if (checksum < bits_in_uv) {
2185 		    UV mask = ((UV)1 << checksum) - 1;
2186 		    cuv &= mask;
2187 		}
2188 		sv = newSVuv(cuv);
2189 	    }
2190 	    XPUSHs(sv_2mortal(sv));
2191 	    checksum = 0;
2192 	}
2193 
2194         if (symptr->flags & FLAG_SLASH){
2195             if (SP - PL_stack_base - start_sp_offset <= 0)
2196                 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2197             if( next_symbol(symptr) ){
2198               if( symptr->howlen == e_number )
2199 		Perl_croak(aTHX_ "Count after length/code in unpack" );
2200               if( beyond ){
2201          	/* ...end of char buffer then no decent length available */
2202 		Perl_croak(aTHX_ "length/code after end of string in unpack" );
2203               } else {
2204          	/* take top of stack (hope it's numeric) */
2205                 len = POPi;
2206                 if( len < 0 )
2207                     Perl_croak(aTHX_ "Negative '/' count in unpack" );
2208               }
2209             } else {
2210 		Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2211             }
2212             datumtype = symptr->code;
2213             explicit_length = FALSE;
2214 	    goto redo_switch;
2215         }
2216     }
2217 
2218     if (new_s)
2219 	*new_s = s;
2220     PUTBACK;
2221     return SP - PL_stack_base - start_sp_offset;
2222 }
2223 
2224 PP(pp_unpack)
2225 {
2226     dVAR;
2227     dSP;
2228     dPOPPOPssrl;
2229     I32 gimme = GIMME_V;
2230     STRLEN llen;
2231     STRLEN rlen;
2232     const char *pat = SvPV_const(left,  llen);
2233     const char *s   = SvPV_const(right, rlen);
2234     const char *strend = s + rlen;
2235     const char *patend = pat + llen;
2236     I32 cnt;
2237 
2238     PUTBACK;
2239     cnt = unpackstring(pat, patend, s, strend,
2240 		     ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2241 		     | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2242 
2243     SPAGAIN;
2244     if ( !cnt && gimme == G_SCALAR )
2245        PUSHs(&PL_sv_undef);
2246     RETURN;
2247 }
2248 
2249 STATIC U8 *
2250 doencodes(U8 *h, const char *s, I32 len)
2251 {
2252     *h++ = PL_uuemap[len];
2253     while (len > 2) {
2254 	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
2255 	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2256 	*h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2257 	*h++ = PL_uuemap[(077 & (s[2] & 077))];
2258 	s += 3;
2259 	len -= 3;
2260     }
2261     if (len > 0) {
2262         const char r = (len > 1 ? s[1] : '\0');
2263 	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
2264 	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2265 	*h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2266 	*h++ = PL_uuemap[0];
2267     }
2268     *h++ = '\n';
2269     return h;
2270 }
2271 
2272 STATIC SV *
2273 S_is_an_int(pTHX_ const char *s, STRLEN l)
2274 {
2275   SV *result = newSVpvn(s, l);
2276   char *const result_c = SvPV_nolen(result);	/* convenience */
2277   char *out = result_c;
2278   bool skip = 1;
2279   bool ignore = 0;
2280 
2281   while (*s) {
2282     switch (*s) {
2283     case ' ':
2284       break;
2285     case '+':
2286       if (!skip) {
2287 	SvREFCNT_dec(result);
2288 	return (NULL);
2289       }
2290       break;
2291     case '0':
2292     case '1':
2293     case '2':
2294     case '3':
2295     case '4':
2296     case '5':
2297     case '6':
2298     case '7':
2299     case '8':
2300     case '9':
2301       skip = 0;
2302       if (!ignore) {
2303 	*(out++) = *s;
2304       }
2305       break;
2306     case '.':
2307       ignore = 1;
2308       break;
2309     default:
2310       SvREFCNT_dec(result);
2311       return (NULL);
2312     }
2313     s++;
2314   }
2315   *(out++) = '\0';
2316   SvCUR_set(result, out - result_c);
2317   return (result);
2318 }
2319 
2320 /* pnum must be '\0' terminated */
2321 STATIC int
2322 S_div128(pTHX_ SV *pnum, bool *done)
2323 {
2324     STRLEN len;
2325     char * const s = SvPV(pnum, len);
2326     char *t = s;
2327     int m = 0;
2328 
2329     *done = 1;
2330     while (*t) {
2331 	const int i = m * 10 + (*t - '0');
2332 	const int r = (i >> 7); /* r < 10 */
2333 	m = i & 0x7F;
2334 	if (r) {
2335 	    *done = 0;
2336 	}
2337 	*(t++) = '0' + r;
2338     }
2339     *(t++) = '\0';
2340     SvCUR_set(pnum, (STRLEN) (t - s));
2341     return (m);
2342 }
2343 
2344 /*
2345 =for apidoc packlist
2346 
2347 The engine implementing pack() Perl function.
2348 
2349 =cut
2350 */
2351 
2352 void
2353 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2354 {
2355     dVAR;
2356     tempsym_t sym;
2357 
2358     TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2359 
2360     /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2361        Also make sure any UTF8 flag is loaded */
2362     SvPV_force_nolen(cat);
2363     if (DO_UTF8(cat))
2364 	sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2365 
2366     (void)pack_rec( cat, &sym, beglist, endlist );
2367 }
2368 
2369 /* like sv_utf8_upgrade, but also repoint the group start markers */
2370 STATIC void
2371 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2372     STRLEN len;
2373     tempsym_t *group;
2374     const char *from_ptr, *from_start, *from_end, **marks, **m;
2375     char *to_start, *to_ptr;
2376 
2377     if (SvUTF8(sv)) return;
2378 
2379     from_start = SvPVX_const(sv);
2380     from_end = from_start + SvCUR(sv);
2381     for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2382 	if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2383     if (from_ptr == from_end) {
2384 	/* Simple case: no character needs to be changed */
2385 	SvUTF8_on(sv);
2386 	return;
2387     }
2388 
2389     len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2390     Newx(to_start, len, char);
2391     Copy(from_start, to_start, from_ptr-from_start, char);
2392     to_ptr = to_start + (from_ptr-from_start);
2393 
2394     Newx(marks, sym_ptr->level+2, const char *);
2395     for (group=sym_ptr; group; group = group->previous)
2396 	marks[group->level] = from_start + group->strbeg;
2397     marks[sym_ptr->level+1] = from_end+1;
2398     for (m = marks; *m < from_ptr; m++)
2399 	*m = to_start + (*m-from_start);
2400 
2401     for (;from_ptr < from_end; from_ptr++) {
2402 	while (*m == from_ptr) *m++ = to_ptr;
2403 	to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2404     }
2405     *to_ptr = 0;
2406 
2407     while (*m == from_ptr) *m++ = to_ptr;
2408     if (m != marks + sym_ptr->level+1) {
2409 	Safefree(marks);
2410 	Safefree(to_start);
2411 	Perl_croak(aTHX_ "Assertion: marks beyond string end");
2412     }
2413     for (group=sym_ptr; group; group = group->previous)
2414 	group->strbeg = marks[group->level] - to_start;
2415     Safefree(marks);
2416 
2417     if (SvOOK(sv)) {
2418 	if (SvIVX(sv)) {
2419 	    SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2420 	    from_start -= SvIVX(sv);
2421 	    SvIV_set(sv, 0);
2422 	}
2423 	SvFLAGS(sv) &= ~SVf_OOK;
2424     }
2425     if (SvLEN(sv) != 0)
2426 	Safefree(from_start);
2427     SvPV_set(sv, to_start);
2428     SvCUR_set(sv, to_ptr - to_start);
2429     SvLEN_set(sv, len);
2430     SvUTF8_on(sv);
2431 }
2432 
2433 /* Exponential string grower. Makes string extension effectively O(n)
2434    needed says how many extra bytes we need (not counting the final '\0')
2435    Only grows the string if there is an actual lack of space
2436 */
2437 STATIC char *
2438 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2439     const STRLEN cur = SvCUR(sv);
2440     const STRLEN len = SvLEN(sv);
2441     STRLEN extend;
2442     if (len - cur > needed) return SvPVX(sv);
2443     extend = needed > len ? needed : len;
2444     return SvGROW(sv, len+extend+1);
2445 }
2446 
2447 STATIC
2448 SV **
2449 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2450 {
2451     dVAR;
2452     tempsym_t lookahead;
2453     I32 items  = endlist - beglist;
2454     bool found = next_symbol(symptr);
2455     bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2456     bool warn_utf8 = ckWARN(WARN_UTF8);
2457 
2458     if (symptr->level == 0 && found && symptr->code == 'U') {
2459 	marked_upgrade(aTHX_ cat, symptr);
2460 	symptr->flags |= FLAG_DO_UTF8;
2461 	utf8 = 0;
2462     }
2463     symptr->strbeg = SvCUR(cat);
2464 
2465     while (found) {
2466 	SV *fromstr;
2467 	STRLEN fromlen;
2468 	I32 len;
2469 	SV *lengthcode = NULL;
2470         I32 datumtype = symptr->code;
2471         howlen_t howlen = symptr->howlen;
2472 	char *start = SvPVX(cat);
2473 	char *cur   = start + SvCUR(cat);
2474 
2475 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2476 
2477         switch (howlen) {
2478 	  case e_star:
2479 	    len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2480 		0 : items;
2481 	    break;
2482 	  default:
2483 	    /* e_no_len and e_number */
2484 	    len = symptr->length;
2485 	    break;
2486         }
2487 
2488 	if (len) {
2489 	    packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2490 
2491 	    if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2492 		/* We can process this letter. */
2493 		STRLEN size = props & PACK_SIZE_MASK;
2494 		GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2495 	    }
2496         }
2497 
2498         /* Look ahead for next symbol. Do we have code/code? */
2499         lookahead = *symptr;
2500         found = next_symbol(&lookahead);
2501 	if (symptr->flags & FLAG_SLASH) {
2502 	    IV count;
2503 	    if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2504 	    if (strchr("aAZ", lookahead.code)) {
2505 		if (lookahead.howlen == e_number) count = lookahead.length;
2506 		else {
2507 		    if (items > 0) {
2508 			if (SvGAMAGIC(*beglist)) {
2509 			    /* Avoid reading the active data more than once
2510 			       by copying it to a temporary.  */
2511 			    STRLEN len;
2512 			    const char *const pv = SvPV_const(*beglist, len);
2513 			    SV *const temp = sv_2mortal(newSVpvn(pv, len));
2514 			    if (SvUTF8(*beglist))
2515 				SvUTF8_on(temp);
2516 			    *beglist = temp;
2517 			}
2518 			count = DO_UTF8(*beglist) ?
2519 			    sv_len_utf8(*beglist) : sv_len(*beglist);
2520 		    }
2521 		    else count = 0;
2522 		    if (lookahead.code == 'Z') count++;
2523 		}
2524 	    } else {
2525 		if (lookahead.howlen == e_number && lookahead.length < items)
2526 		    count = lookahead.length;
2527 		else count = items;
2528 	    }
2529 	    lookahead.howlen = e_number;
2530 	    lookahead.length = count;
2531 	    lengthcode = sv_2mortal(newSViv(count));
2532 	}
2533 
2534 	/* Code inside the switch must take care to properly update
2535 	   cat (CUR length and '\0' termination) if it updated *cur and
2536 	   doesn't simply leave using break */
2537 	switch(TYPE_NO_ENDIANNESS(datumtype)) {
2538 	default:
2539 	    Perl_croak(aTHX_ "Invalid type '%c' in pack",
2540 		       (int) TYPE_NO_MODIFIERS(datumtype));
2541 	case '%':
2542 	    Perl_croak(aTHX_ "'%%' may not be used in pack");
2543 	{
2544 	    char *from;
2545 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2546 	case '.' | TYPE_IS_SHRIEKING:
2547 #endif
2548 	case '.':
2549 	    if (howlen == e_star) from = start;
2550 	    else if (len == 0) from = cur;
2551 	    else {
2552 		tempsym_t *group = symptr;
2553 
2554 		while (--len && group) group = group->previous;
2555 		from = group ? start + group->strbeg : start;
2556 	    }
2557 	    fromstr = NEXTFROM;
2558 	    len = SvIV(fromstr);
2559 	    goto resize;
2560 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2561 	case '@' | TYPE_IS_SHRIEKING:
2562 #endif
2563 	case '@':
2564 	    from = start + symptr->strbeg;
2565 	  resize:
2566 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2567 	    if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
2568 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2569 	    if (utf8)
2570 #endif
2571 		if (len >= 0) {
2572 		    while (len && from < cur) {
2573 			from += UTF8SKIP(from);
2574 			len--;
2575 		    }
2576 		    if (from > cur)
2577 			Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2578 		    if (len) {
2579 			/* Here we know from == cur */
2580 		      grow:
2581 			GROWING(0, cat, start, cur, len);
2582 			Zero(cur, len, char);
2583 			cur += len;
2584 		    } else if (from < cur) {
2585 			len = cur - from;
2586 			goto shrink;
2587 		    } else goto no_change;
2588 		} else {
2589 		    cur = from;
2590 		    len = -len;
2591 		    goto utf8_shrink;
2592 		}
2593 	    else {
2594 		len -= cur - from;
2595 		if (len > 0) goto grow;
2596 		if (len == 0) goto no_change;
2597 		len = -len;
2598 		goto shrink;
2599 	    }
2600 	    break;
2601 	}
2602 	case '(': {
2603             tempsym_t savsym = *symptr;
2604 	    U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2605 	    symptr->flags |= group_modifiers;
2606             symptr->patend = savsym.grpend;
2607             symptr->level++;
2608 	    symptr->previous = &lookahead;
2609 	    while (len--) {
2610 		U32 was_utf8;
2611 		if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
2612 		else      symptr->flags &= ~FLAG_PARSE_UTF8;
2613 		was_utf8 = SvUTF8(cat);
2614   	        symptr->patptr = savsym.grpbeg;
2615 		beglist = pack_rec(cat, symptr, beglist, endlist);
2616 		if (SvUTF8(cat) != was_utf8)
2617 		    /* This had better be an upgrade while in utf8==0 mode */
2618 		    utf8 = 1;
2619 
2620 		if (savsym.howlen == e_star && beglist == endlist)
2621 		    break;		/* No way to continue */
2622 	    }
2623 	    items = endlist - beglist;
2624 	    lookahead.flags  = symptr->flags & ~group_modifiers;
2625 	    goto no_change;
2626 	}
2627 	case 'X' | TYPE_IS_SHRIEKING:
2628 	    if (!len)			/* Avoid division by 0 */
2629 		len = 1;
2630 	    if (utf8) {
2631 		char *hop, *last;
2632 		I32 l = len;
2633 		hop = last = start;
2634 		while (hop < cur) {
2635 		    hop += UTF8SKIP(hop);
2636 		    if (--l == 0) {
2637 			last = hop;
2638 			l = len;
2639 		    }
2640 		}
2641 		if (last > cur)
2642 		    Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2643 		cur = last;
2644 		break;
2645 	    }
2646 	    len = (cur-start) % len;
2647 	    /* FALL THROUGH */
2648 	case 'X':
2649 	    if (utf8) {
2650 		if (len < 1) goto no_change;
2651 	      utf8_shrink:
2652 		while (len > 0) {
2653 		    if (cur <= start)
2654 			Perl_croak(aTHX_ "'%c' outside of string in pack",
2655 				   (int) TYPE_NO_MODIFIERS(datumtype));
2656 		    while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2657 			if (cur <= start)
2658 			    Perl_croak(aTHX_ "'%c' outside of string in pack",
2659 				       (int) TYPE_NO_MODIFIERS(datumtype));
2660 		    }
2661 		    len--;
2662 		}
2663 	    } else {
2664 	      shrink:
2665 		if (cur - start < len)
2666 		    Perl_croak(aTHX_ "'%c' outside of string in pack",
2667 			       (int) TYPE_NO_MODIFIERS(datumtype));
2668 		cur -= len;
2669 	    }
2670 	    if (cur < start+symptr->strbeg) {
2671 		/* Make sure group starts don't point into the void */
2672 		tempsym_t *group;
2673 		const STRLEN length = cur-start;
2674 		for (group = symptr;
2675 		     group && length < group->strbeg;
2676 		     group = group->previous) group->strbeg = length;
2677 		lookahead.strbeg = length;
2678 	    }
2679 	    break;
2680 	case 'x' | TYPE_IS_SHRIEKING: {
2681 	    I32 ai32;
2682 	    if (!len)			/* Avoid division by 0 */
2683 		len = 1;
2684 	    if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2685 	    else      ai32 = (cur - start) % len;
2686 	    if (ai32 == 0) goto no_change;
2687 	    len -= ai32;
2688 	}
2689 	/* FALL THROUGH */
2690 	case 'x':
2691 	    goto grow;
2692 	case 'A':
2693 	case 'Z':
2694 	case 'a': {
2695 	    const char *aptr;
2696 
2697 	    fromstr = NEXTFROM;
2698 	    aptr = SvPV_const(fromstr, fromlen);
2699 	    if (DO_UTF8(fromstr)) {
2700                 const char *end, *s;
2701 
2702 		if (!utf8 && !SvUTF8(cat)) {
2703 		    marked_upgrade(aTHX_ cat, symptr);
2704 		    lookahead.flags |= FLAG_DO_UTF8;
2705 		    lookahead.strbeg = symptr->strbeg;
2706 		    utf8 = 1;
2707 		    start = SvPVX(cat);
2708 		    cur = start + SvCUR(cat);
2709 		}
2710 		if (howlen == e_star) {
2711 		    if (utf8) goto string_copy;
2712 		    len = fromlen+1;
2713 		}
2714 		s = aptr;
2715 		end = aptr + fromlen;
2716 		fromlen = datumtype == 'Z' ? len-1 : len;
2717 		while ((I32) fromlen > 0 && s < end) {
2718 		    s += UTF8SKIP(s);
2719 		    fromlen--;
2720 		}
2721 		if (s > end)
2722 		    Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2723 		if (utf8) {
2724 		    len = fromlen;
2725 		    if (datumtype == 'Z') len++;
2726 		    fromlen = s-aptr;
2727 		    len += fromlen;
2728 
2729 		    goto string_copy;
2730 		}
2731 		fromlen = len - fromlen;
2732 		if (datumtype == 'Z') fromlen--;
2733 		if (howlen == e_star) {
2734 		    len = fromlen;
2735 		    if (datumtype == 'Z') len++;
2736 		}
2737 		GROWING(0, cat, start, cur, len);
2738 		if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2739 				  datumtype | TYPE_IS_PACK))
2740 		    Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2741 		cur += fromlen;
2742 		len -= fromlen;
2743 	    } else if (utf8) {
2744 		if (howlen == e_star) {
2745 		    len = fromlen;
2746 		    if (datumtype == 'Z') len++;
2747 		}
2748 		if (len <= (I32) fromlen) {
2749 		    fromlen = len;
2750 		    if (datumtype == 'Z' && fromlen > 0) fromlen--;
2751 		}
2752 		/* assumes a byte expands to at most UTF8_EXPAND bytes on
2753 		   upgrade, so:
2754 		   expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2755 		GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2756 		len -= fromlen;
2757 		while (fromlen > 0) {
2758 		    cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2759 		    aptr++;
2760 		    fromlen--;
2761 		}
2762 	    } else {
2763 	      string_copy:
2764 		if (howlen == e_star) {
2765 		    len = fromlen;
2766 		    if (datumtype == 'Z') len++;
2767 		}
2768 		if (len <= (I32) fromlen) {
2769 		    fromlen = len;
2770 		    if (datumtype == 'Z' && fromlen > 0) fromlen--;
2771 		}
2772 		GROWING(0, cat, start, cur, len);
2773 		Copy(aptr, cur, fromlen, char);
2774 		cur += fromlen;
2775 		len -= fromlen;
2776 	    }
2777 	    memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2778 	    cur += len;
2779 	    break;
2780 	}
2781 	case 'B':
2782 	case 'b': {
2783 	    const char *str, *end;
2784 	    I32 l, field_len;
2785 	    U8 bits;
2786 	    bool utf8_source;
2787 	    U32 utf8_flags;
2788 
2789 	    fromstr = NEXTFROM;
2790 	    str = SvPV_const(fromstr, fromlen);
2791 	    end = str + fromlen;
2792 	    if (DO_UTF8(fromstr)) {
2793 		utf8_source = TRUE;
2794 		utf8_flags  = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2795 	    } else {
2796 		utf8_source = FALSE;
2797 		utf8_flags  = 0; /* Unused, but keep compilers happy */
2798 	    }
2799 	    if (howlen == e_star) len = fromlen;
2800 	    field_len = (len+7)/8;
2801 	    GROWING(utf8, cat, start, cur, field_len);
2802 	    if (len > (I32)fromlen) len = fromlen;
2803 	    bits = 0;
2804 	    l = 0;
2805 	    if (datumtype == 'B')
2806 		while (l++ < len) {
2807 		    if (utf8_source) {
2808 			UV val = 0;
2809 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2810 			bits |= val & 1;
2811 		    } else bits |= *str++ & 1;
2812 		    if (l & 7) bits <<= 1;
2813 		    else {
2814 			PUSH_BYTE(utf8, cur, bits);
2815 			bits = 0;
2816 		    }
2817 		}
2818 	    else
2819 		/* datumtype == 'b' */
2820 		while (l++ < len) {
2821 		    if (utf8_source) {
2822 			UV val = 0;
2823 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2824 			if (val & 1) bits |= 0x80;
2825 		    } else if (*str++ & 1)
2826 			bits |= 0x80;
2827 		    if (l & 7) bits >>= 1;
2828 		    else {
2829 			PUSH_BYTE(utf8, cur, bits);
2830 			bits = 0;
2831 		    }
2832 		}
2833 	    l--;
2834 	    if (l & 7) {
2835 		if (datumtype == 'B')
2836 		    bits <<= 7 - (l & 7);
2837 		else
2838 		    bits >>= 7 - (l & 7);
2839 		PUSH_BYTE(utf8, cur, bits);
2840 		l += 7;
2841 	    }
2842 	    /* Determine how many chars are left in the requested field */
2843 	    l /= 8;
2844 	    if (howlen == e_star) field_len = 0;
2845 	    else field_len -= l;
2846 	    Zero(cur, field_len, char);
2847 	    cur += field_len;
2848 	    break;
2849 	}
2850 	case 'H':
2851 	case 'h': {
2852 	    const char *str, *end;
2853 	    I32 l, field_len;
2854 	    U8 bits;
2855 	    bool utf8_source;
2856 	    U32 utf8_flags;
2857 
2858 	    fromstr = NEXTFROM;
2859 	    str = SvPV_const(fromstr, fromlen);
2860 	    end = str + fromlen;
2861 	    if (DO_UTF8(fromstr)) {
2862 		utf8_source = TRUE;
2863 		utf8_flags  = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2864 	    } else {
2865 		utf8_source = FALSE;
2866 		utf8_flags  = 0; /* Unused, but keep compilers happy */
2867 	    }
2868 	    if (howlen == e_star) len = fromlen;
2869 	    field_len = (len+1)/2;
2870 	    GROWING(utf8, cat, start, cur, field_len);
2871 	    if (!utf8 && len > (I32)fromlen) len = fromlen;
2872 	    bits = 0;
2873 	    l = 0;
2874 	    if (datumtype == 'H')
2875 		while (l++ < len) {
2876 		    if (utf8_source) {
2877 			UV val = 0;
2878 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2879 			if (val < 256 && isALPHA(val))
2880 			    bits |= (val + 9) & 0xf;
2881 			else
2882 			    bits |= val & 0xf;
2883 		    } else if (isALPHA(*str))
2884 			bits |= (*str++ + 9) & 0xf;
2885 		    else
2886 			bits |= *str++ & 0xf;
2887 		    if (l & 1) bits <<= 4;
2888 		    else {
2889 			PUSH_BYTE(utf8, cur, bits);
2890 			bits = 0;
2891 		    }
2892 		}
2893 	    else
2894 		while (l++ < len) {
2895 		    if (utf8_source) {
2896 			UV val = 0;
2897 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2898 			if (val < 256 && isALPHA(val))
2899 			    bits |= ((val + 9) & 0xf) << 4;
2900 			else
2901 			    bits |= (val & 0xf) << 4;
2902 		    } else if (isALPHA(*str))
2903 			bits |= ((*str++ + 9) & 0xf) << 4;
2904 		    else
2905 			bits |= (*str++ & 0xf) << 4;
2906 		    if (l & 1) bits >>= 4;
2907 		    else {
2908 			PUSH_BYTE(utf8, cur, bits);
2909 			bits = 0;
2910 		    }
2911 		}
2912 	    l--;
2913 	    if (l & 1) {
2914 		PUSH_BYTE(utf8, cur, bits);
2915 		l++;
2916 	    }
2917 	    /* Determine how many chars are left in the requested field */
2918 	    l /= 2;
2919 	    if (howlen == e_star) field_len = 0;
2920 	    else field_len -= l;
2921 	    Zero(cur, field_len, char);
2922 	    cur += field_len;
2923 	    break;
2924 	}
2925 	case 'c':
2926 	    while (len-- > 0) {
2927 		IV aiv;
2928 		fromstr = NEXTFROM;
2929 		aiv = SvIV(fromstr);
2930 		if ((-128 > aiv || aiv > 127) &&
2931 		    ckWARN(WARN_PACK))
2932 		    Perl_warner(aTHX_ packWARN(WARN_PACK),
2933 				"Character in 'c' format wrapped in pack");
2934 		PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2935 	    }
2936 	    break;
2937 	case 'C':
2938 	    if (len == 0) {
2939 		utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2940 		break;
2941 	    }
2942 	    while (len-- > 0) {
2943 		IV aiv;
2944 		fromstr = NEXTFROM;
2945 		aiv = SvIV(fromstr);
2946 		if ((0 > aiv || aiv > 0xff) &&
2947 		    ckWARN(WARN_PACK))
2948 		    Perl_warner(aTHX_ packWARN(WARN_PACK),
2949 				"Character in 'C' format wrapped in pack");
2950 		PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2951 	    }
2952 	    break;
2953 	case 'W': {
2954 	    char *end;
2955 	    U8 in_bytes = (U8)IN_BYTES;
2956 
2957 	    end = start+SvLEN(cat)-1;
2958 	    if (utf8) end -= UTF8_MAXLEN-1;
2959 	    while (len-- > 0) {
2960 		UV auv;
2961 		fromstr = NEXTFROM;
2962 		auv = SvUV(fromstr);
2963 		if (in_bytes) auv = auv % 0x100;
2964 		if (utf8) {
2965 		  W_utf8:
2966 		    if (cur > end) {
2967 			*cur = '\0';
2968 			SvCUR_set(cat, cur - start);
2969 
2970 			GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2971 			end = start+SvLEN(cat)-UTF8_MAXLEN;
2972 		    }
2973 		    cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
2974 						       NATIVE_TO_UNI(auv),
2975 						       warn_utf8 ?
2976 						       0 : UNICODE_ALLOW_ANY);
2977 		} else {
2978 		    if (auv >= 0x100) {
2979 			if (!SvUTF8(cat)) {
2980 			    *cur = '\0';
2981 			    SvCUR_set(cat, cur - start);
2982 			    marked_upgrade(aTHX_ cat, symptr);
2983 			    lookahead.flags |= FLAG_DO_UTF8;
2984 			    lookahead.strbeg = symptr->strbeg;
2985 			    utf8 = 1;
2986 			    start = SvPVX(cat);
2987 			    cur = start + SvCUR(cat);
2988 			    end = start+SvLEN(cat)-UTF8_MAXLEN;
2989 			    goto W_utf8;
2990 			}
2991 			if (ckWARN(WARN_PACK))
2992 			    Perl_warner(aTHX_ packWARN(WARN_PACK),
2993 					"Character in 'W' format wrapped in pack");
2994 			auv &= 0xff;
2995 		    }
2996 		    if (cur >= end) {
2997 			*cur = '\0';
2998 			SvCUR_set(cat, cur - start);
2999 			GROWING(0, cat, start, cur, len+1);
3000 			end = start+SvLEN(cat)-1;
3001 		    }
3002 		    *(U8 *) cur++ = (U8)auv;
3003 		}
3004 	    }
3005 	    break;
3006 	}
3007 	case 'U': {
3008 	    char *end;
3009 
3010 	    if (len == 0) {
3011 		if (!(symptr->flags & FLAG_DO_UTF8)) {
3012 		    marked_upgrade(aTHX_ cat, symptr);
3013 		    lookahead.flags |= FLAG_DO_UTF8;
3014 		    lookahead.strbeg = symptr->strbeg;
3015 		}
3016 		utf8 = 0;
3017 		goto no_change;
3018 	    }
3019 
3020 	    end = start+SvLEN(cat);
3021 	    if (!utf8) end -= UTF8_MAXLEN;
3022 	    while (len-- > 0) {
3023 		UV auv;
3024 		fromstr = NEXTFROM;
3025 		auv = SvUV(fromstr);
3026 		if (utf8) {
3027 		    U8 buffer[UTF8_MAXLEN], *endb;
3028 		    endb = uvuni_to_utf8_flags(buffer, auv,
3029 					       warn_utf8 ?
3030 					       0 : UNICODE_ALLOW_ANY);
3031 		    if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3032 			*cur = '\0';
3033 			SvCUR_set(cat, cur - start);
3034 			GROWING(0, cat, start, cur,
3035 				len+(endb-buffer)*UTF8_EXPAND);
3036 			end = start+SvLEN(cat);
3037 		    }
3038 		    cur = bytes_to_uni(buffer, endb-buffer, cur);
3039 		} else {
3040 		    if (cur >= end) {
3041 			*cur = '\0';
3042 			SvCUR_set(cat, cur - start);
3043 			GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3044 			end = start+SvLEN(cat)-UTF8_MAXLEN;
3045 		    }
3046 		    cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3047 						       warn_utf8 ?
3048 						       0 : UNICODE_ALLOW_ANY);
3049 		}
3050 	    }
3051 	    break;
3052 	}
3053 	/* Float and double added by gnb@melba.bby.oz.au  22/11/89 */
3054 	case 'f':
3055 	    while (len-- > 0) {
3056 		float afloat;
3057 		NV anv;
3058 		fromstr = NEXTFROM;
3059 		anv = SvNV(fromstr);
3060 #ifdef __VOS__
3061 		/* VOS does not automatically map a floating-point overflow
3062 		   during conversion from double to float into infinity, so we
3063 		   do it by hand.  This code should either be generalized for
3064 		   any OS that needs it, or removed if and when VOS implements
3065 		   posix-976 (suggestion to support mapping to infinity).
3066 		   Paul.Green@stratus.com 02-04-02.  */
3067 		if (anv > FLT_MAX)
3068 		    afloat = _float_constants[0];   /* single prec. inf. */
3069 		else if (anv < -FLT_MAX)
3070 		    afloat = _float_constants[0];   /* single prec. inf. */
3071 		else afloat = (float) anv;
3072 #else /* __VOS__ */
3073 # if defined(VMS) && !defined(__IEEE_FP)
3074 		/* IEEE fp overflow shenanigans are unavailable on VAX and optional
3075 		 * on Alpha; fake it if we don't have them.
3076 		 */
3077 		if (anv > FLT_MAX)
3078 		    afloat = FLT_MAX;
3079 		else if (anv < -FLT_MAX)
3080 		    afloat = -FLT_MAX;
3081 		else afloat = (float)anv;
3082 # else
3083 		afloat = (float)anv;
3084 # endif
3085 #endif /* __VOS__ */
3086 		DO_BO_PACK_N(afloat, float);
3087 		PUSH_VAR(utf8, cur, afloat);
3088 	    }
3089 	    break;
3090 	case 'd':
3091 	    while (len-- > 0) {
3092 		double adouble;
3093 		NV anv;
3094 		fromstr = NEXTFROM;
3095 		anv = SvNV(fromstr);
3096 #ifdef __VOS__
3097 		/* VOS does not automatically map a floating-point overflow
3098 		   during conversion from long double to double into infinity,
3099 		   so we do it by hand.  This code should either be generalized
3100 		   for any OS that needs it, or removed if and when VOS
3101 		   implements posix-976 (suggestion to support mapping to
3102 		   infinity).  Paul.Green@stratus.com 02-04-02.  */
3103 		if (anv > DBL_MAX)
3104 		    adouble = _double_constants[0];   /* double prec. inf. */
3105 		else if (anv < -DBL_MAX)
3106 		    adouble = _double_constants[0];   /* double prec. inf. */
3107 		else adouble = (double) anv;
3108 #else /* __VOS__ */
3109 # if defined(VMS) && !defined(__IEEE_FP)
3110 		/* IEEE fp overflow shenanigans are unavailable on VAX and optional
3111 		 * on Alpha; fake it if we don't have them.
3112 		 */
3113 		if (anv > DBL_MAX)
3114 		    adouble = DBL_MAX;
3115 		else if (anv < -DBL_MAX)
3116 		    adouble = -DBL_MAX;
3117 		else adouble = (double)anv;
3118 # else
3119 		adouble = (double)anv;
3120 # endif
3121 #endif /* __VOS__ */
3122 		DO_BO_PACK_N(adouble, double);
3123 		PUSH_VAR(utf8, cur, adouble);
3124 	    }
3125 	    break;
3126 	case 'F': {
3127 	    NV anv;
3128 	    Zero(&anv, 1, NV); /* can be long double with unused bits */
3129 	    while (len-- > 0) {
3130 		fromstr = NEXTFROM;
3131 		anv = SvNV(fromstr);
3132 		DO_BO_PACK_N(anv, NV);
3133 		PUSH_VAR(utf8, cur, anv);
3134 	    }
3135 	    break;
3136 	}
3137 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3138 	case 'D': {
3139 	    long double aldouble;
3140 	    /* long doubles can have unused bits, which may be nonzero */
3141 	    Zero(&aldouble, 1, long double);
3142 	    while (len-- > 0) {
3143 		fromstr = NEXTFROM;
3144 		aldouble = (long double)SvNV(fromstr);
3145 		DO_BO_PACK_N(aldouble, long double);
3146 		PUSH_VAR(utf8, cur, aldouble);
3147 	    }
3148 	    break;
3149 	}
3150 #endif
3151 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3152 	case 'n' | TYPE_IS_SHRIEKING:
3153 #endif
3154 	case 'n':
3155 	    while (len-- > 0) {
3156 		I16 ai16;
3157 		fromstr = NEXTFROM;
3158 		ai16 = (I16)SvIV(fromstr);
3159 #ifdef HAS_HTONS
3160 		ai16 = PerlSock_htons(ai16);
3161 #endif
3162 		PUSH16(utf8, cur, &ai16);
3163 	    }
3164 	    break;
3165 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3166 	case 'v' | TYPE_IS_SHRIEKING:
3167 #endif
3168 	case 'v':
3169 	    while (len-- > 0) {
3170 		I16 ai16;
3171 		fromstr = NEXTFROM;
3172 		ai16 = (I16)SvIV(fromstr);
3173 #ifdef HAS_HTOVS
3174 		ai16 = htovs(ai16);
3175 #endif
3176 		PUSH16(utf8, cur, &ai16);
3177 	    }
3178 	    break;
3179         case 'S' | TYPE_IS_SHRIEKING:
3180 #if SHORTSIZE != SIZE16
3181 	    while (len-- > 0) {
3182 		unsigned short aushort;
3183 		fromstr = NEXTFROM;
3184 		aushort = SvUV(fromstr);
3185 		DO_BO_PACK(aushort, s);
3186 		PUSH_VAR(utf8, cur, aushort);
3187 	    }
3188             break;
3189 #else
3190             /* Fall through! */
3191 #endif
3192 	case 'S':
3193 	    while (len-- > 0) {
3194 		U16 au16;
3195 		fromstr = NEXTFROM;
3196 		au16 = (U16)SvUV(fromstr);
3197 		DO_BO_PACK(au16, 16);
3198 		PUSH16(utf8, cur, &au16);
3199 	    }
3200 	    break;
3201 	case 's' | TYPE_IS_SHRIEKING:
3202 #if SHORTSIZE != SIZE16
3203 	    while (len-- > 0) {
3204 		short ashort;
3205 		fromstr = NEXTFROM;
3206 		ashort = SvIV(fromstr);
3207 		DO_BO_PACK(ashort, s);
3208 		PUSH_VAR(utf8, cur, ashort);
3209 	    }
3210             break;
3211 #else
3212             /* Fall through! */
3213 #endif
3214 	case 's':
3215 	    while (len-- > 0) {
3216 		I16 ai16;
3217 		fromstr = NEXTFROM;
3218 		ai16 = (I16)SvIV(fromstr);
3219 		DO_BO_PACK(ai16, 16);
3220 		PUSH16(utf8, cur, &ai16);
3221 	    }
3222 	    break;
3223 	case 'I':
3224 	case 'I' | TYPE_IS_SHRIEKING:
3225 	    while (len-- > 0) {
3226 		unsigned int auint;
3227 		fromstr = NEXTFROM;
3228 		auint = SvUV(fromstr);
3229 		DO_BO_PACK(auint, i);
3230 		PUSH_VAR(utf8, cur, auint);
3231 	    }
3232 	    break;
3233 	case 'j':
3234 	    while (len-- > 0) {
3235 		IV aiv;
3236 		fromstr = NEXTFROM;
3237 		aiv = SvIV(fromstr);
3238 #if IVSIZE == INTSIZE
3239 		DO_BO_PACK(aiv, i);
3240 #elif IVSIZE == LONGSIZE
3241 		DO_BO_PACK(aiv, l);
3242 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3243 		DO_BO_PACK(aiv, 64);
3244 #else
3245 		Perl_croak(aTHX_ "'j' not supported on this platform");
3246 #endif
3247 		PUSH_VAR(utf8, cur, aiv);
3248 	    }
3249 	    break;
3250 	case 'J':
3251 	    while (len-- > 0) {
3252 		UV auv;
3253 		fromstr = NEXTFROM;
3254 		auv = SvUV(fromstr);
3255 #if UVSIZE == INTSIZE
3256 		DO_BO_PACK(auv, i);
3257 #elif UVSIZE == LONGSIZE
3258 		DO_BO_PACK(auv, l);
3259 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3260 		DO_BO_PACK(auv, 64);
3261 #else
3262 		Perl_croak(aTHX_ "'J' not supported on this platform");
3263 #endif
3264 		PUSH_VAR(utf8, cur, auv);
3265 	    }
3266 	    break;
3267 	case 'w':
3268             while (len-- > 0) {
3269 		NV anv;
3270 		fromstr = NEXTFROM;
3271 		anv = SvNV(fromstr);
3272 
3273 		if (anv < 0) {
3274 		    *cur = '\0';
3275 		    SvCUR_set(cat, cur - start);
3276 		    Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3277 		}
3278 
3279                 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3280                    which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3281                    any negative IVs will have already been got by the croak()
3282                    above. IOK is untrue for fractions, so we test them
3283                    against UV_MAX_P1.  */
3284 		if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3285 		    char   buf[(sizeof(UV)*CHAR_BIT)/7+1];
3286 		    char  *in = buf + sizeof(buf);
3287 		    UV     auv = SvUV(fromstr);
3288 
3289 		    do {
3290 			*--in = (char)((auv & 0x7f) | 0x80);
3291 			auv >>= 7;
3292 		    } while (auv);
3293 		    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3294 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
3295 				       in, (buf + sizeof(buf)) - in);
3296 		} else if (SvPOKp(fromstr))
3297 		    goto w_string;
3298 		else if (SvNOKp(fromstr)) {
3299 		    /* 10**NV_MAX_10_EXP is the largest power of 10
3300 		       so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3301 		       given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3302 		       x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3303 		       And with that many bytes only Inf can overflow.
3304 		       Some C compilers are strict about integral constant
3305 		       expressions so we conservatively divide by a slightly
3306 		       smaller integer instead of multiplying by the exact
3307 		       floating-point value.
3308 		    */
3309 #ifdef NV_MAX_10_EXP
3310 		    /* char   buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3311 		    char   buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3312 #else
3313 		    /* char   buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3314 		    char   buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3315 #endif
3316 		    char  *in = buf + sizeof(buf);
3317 
3318 		    anv = Perl_floor(anv);
3319 		    do {
3320 			const NV next = Perl_floor(anv / 128);
3321 			if (in <= buf)  /* this cannot happen ;-) */
3322 			    Perl_croak(aTHX_ "Cannot compress integer in pack");
3323 			*--in = (unsigned char)(anv - (next * 128)) | 0x80;
3324 			anv = next;
3325 		    } while (anv > 0);
3326 		    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3327 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
3328 				       in, (buf + sizeof(buf)) - in);
3329 		} else {
3330 		    const char     *from;
3331 		    char           *result, *in;
3332 		    SV             *norm;
3333 		    STRLEN          len;
3334 		    bool            done;
3335 
3336 		  w_string:
3337 		    /* Copy string and check for compliance */
3338 		    from = SvPV_const(fromstr, len);
3339 		    if ((norm = is_an_int(from, len)) == NULL)
3340 			Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3341 
3342 		    Newx(result, len, char);
3343 		    in = result + len;
3344 		    done = FALSE;
3345 		    while (!done) *--in = div128(norm, &done) | 0x80;
3346 		    result[len - 1] &= 0x7F; /* clear continue bit */
3347 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
3348 				       in, (result + len) - in);
3349 		    Safefree(result);
3350 		    SvREFCNT_dec(norm);	/* free norm */
3351 		}
3352 	    }
3353             break;
3354 	case 'i':
3355 	case 'i' | TYPE_IS_SHRIEKING:
3356 	    while (len-- > 0) {
3357 		int aint;
3358 		fromstr = NEXTFROM;
3359 		aint = SvIV(fromstr);
3360 		DO_BO_PACK(aint, i);
3361 		PUSH_VAR(utf8, cur, aint);
3362 	    }
3363 	    break;
3364 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3365 	case 'N' | TYPE_IS_SHRIEKING:
3366 #endif
3367 	case 'N':
3368 	    while (len-- > 0) {
3369 		U32 au32;
3370 		fromstr = NEXTFROM;
3371 		au32 = SvUV(fromstr);
3372 #ifdef HAS_HTONL
3373 		au32 = PerlSock_htonl(au32);
3374 #endif
3375 		PUSH32(utf8, cur, &au32);
3376 	    }
3377 	    break;
3378 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3379 	case 'V' | TYPE_IS_SHRIEKING:
3380 #endif
3381 	case 'V':
3382 	    while (len-- > 0) {
3383 		U32 au32;
3384 		fromstr = NEXTFROM;
3385 		au32 = SvUV(fromstr);
3386 #ifdef HAS_HTOVL
3387 		au32 = htovl(au32);
3388 #endif
3389 		PUSH32(utf8, cur, &au32);
3390 	    }
3391 	    break;
3392 	case 'L' | TYPE_IS_SHRIEKING:
3393 #if LONGSIZE != SIZE32
3394 	    while (len-- > 0) {
3395 		unsigned long aulong;
3396 		fromstr = NEXTFROM;
3397 		aulong = SvUV(fromstr);
3398 		DO_BO_PACK(aulong, l);
3399 		PUSH_VAR(utf8, cur, aulong);
3400 	    }
3401 	    break;
3402 #else
3403             /* Fall though! */
3404 #endif
3405 	case 'L':
3406 	    while (len-- > 0) {
3407 		U32 au32;
3408 		fromstr = NEXTFROM;
3409 		au32 = SvUV(fromstr);
3410 		DO_BO_PACK(au32, 32);
3411 		PUSH32(utf8, cur, &au32);
3412 	    }
3413 	    break;
3414 	case 'l' | TYPE_IS_SHRIEKING:
3415 #if LONGSIZE != SIZE32
3416 	    while (len-- > 0) {
3417 		long along;
3418 		fromstr = NEXTFROM;
3419 		along = SvIV(fromstr);
3420 		DO_BO_PACK(along, l);
3421 		PUSH_VAR(utf8, cur, along);
3422 	    }
3423 	    break;
3424 #else
3425             /* Fall though! */
3426 #endif
3427 	case 'l':
3428             while (len-- > 0) {
3429 		I32 ai32;
3430 		fromstr = NEXTFROM;
3431 		ai32 = SvIV(fromstr);
3432 		DO_BO_PACK(ai32, 32);
3433 		PUSH32(utf8, cur, &ai32);
3434 	    }
3435 	    break;
3436 #ifdef HAS_QUAD
3437 	case 'Q':
3438 	    while (len-- > 0) {
3439 		Uquad_t auquad;
3440 		fromstr = NEXTFROM;
3441 		auquad = (Uquad_t) SvUV(fromstr);
3442 		DO_BO_PACK(auquad, 64);
3443 		PUSH_VAR(utf8, cur, auquad);
3444 	    }
3445 	    break;
3446 	case 'q':
3447 	    while (len-- > 0) {
3448 		Quad_t aquad;
3449 		fromstr = NEXTFROM;
3450 		aquad = (Quad_t)SvIV(fromstr);
3451 		DO_BO_PACK(aquad, 64);
3452 		PUSH_VAR(utf8, cur, aquad);
3453 	    }
3454 	    break;
3455 #endif /* HAS_QUAD */
3456 	case 'P':
3457 	    len = 1;		/* assume SV is correct length */
3458 	    GROWING(utf8, cat, start, cur, sizeof(char *));
3459 	    /* Fall through! */
3460 	case 'p':
3461 	    while (len-- > 0) {
3462 		const char *aptr;
3463 
3464 		fromstr = NEXTFROM;
3465 		SvGETMAGIC(fromstr);
3466 		if (!SvOK(fromstr)) aptr = NULL;
3467 		else {
3468 		    /* XXX better yet, could spirit away the string to
3469 		     * a safe spot and hang on to it until the result
3470 		     * of pack() (and all copies of the result) are
3471 		     * gone.
3472 		     */
3473 		    if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3474 			     !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3475 			Perl_warner(aTHX_ packWARN(WARN_PACK),
3476 				    "Attempt to pack pointer to temporary value");
3477 		    }
3478 		    if (SvPOK(fromstr) || SvNIOK(fromstr))
3479 			aptr = SvPV_nomg_const_nolen(fromstr);
3480 		    else
3481 			aptr = SvPV_force_flags_nolen(fromstr, 0);
3482 		}
3483 		DO_BO_PACK_PC(aptr);
3484 		PUSH_VAR(utf8, cur, aptr);
3485 	    }
3486 	    break;
3487 	case 'u': {
3488 	    const char *aptr, *aend;
3489 	    bool from_utf8;
3490 
3491 	    fromstr = NEXTFROM;
3492 	    if (len <= 2) len = 45;
3493 	    else len = len / 3 * 3;
3494 	    if (len >= 64) {
3495 		if (ckWARN(WARN_PACK))
3496 		    Perl_warner(aTHX_ packWARN(WARN_PACK),
3497 			    "Field too wide in 'u' format in pack");
3498 		len = 63;
3499 	    }
3500 	    aptr = SvPV_const(fromstr, fromlen);
3501 	    from_utf8 = DO_UTF8(fromstr);
3502 	    if (from_utf8) {
3503 		aend = aptr + fromlen;
3504 		fromlen = sv_len_utf8(fromstr);
3505 	    } else aend = NULL; /* Unused, but keep compilers happy */
3506 	    GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3507 	    while (fromlen > 0) {
3508 		U8 *end;
3509 		I32 todo;
3510 		U8 hunk[1+63/3*4+1];
3511 
3512 		if ((I32)fromlen > len)
3513 		    todo = len;
3514 		else
3515 		    todo = fromlen;
3516 		if (from_utf8) {
3517 		    char buffer[64];
3518 		    if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3519 				      'u' | TYPE_IS_PACK)) {
3520 			*cur = '\0';
3521 			SvCUR_set(cat, cur - start);
3522 			Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3523 		    }
3524 		    end = doencodes(hunk, buffer, todo);
3525 		} else {
3526 		    end = doencodes(hunk, aptr, todo);
3527 		    aptr += todo;
3528 		}
3529 		PUSH_BYTES(utf8, cur, hunk, end-hunk);
3530 		fromlen -= todo;
3531 	    }
3532 	    break;
3533 	}
3534 	}
3535 	*cur = '\0';
3536 	SvCUR_set(cat, cur - start);
3537       no_change:
3538 	*symptr = lookahead;
3539     }
3540     return beglist;
3541 }
3542 #undef NEXTFROM
3543 
3544 
3545 PP(pp_pack)
3546 {
3547     dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3548     register SV *cat = TARG;
3549     STRLEN fromlen;
3550     SV *pat_sv = *++MARK;
3551     register const char *pat = SvPV_const(pat_sv, fromlen);
3552     register const char *patend = pat + fromlen;
3553 
3554     MARK++;
3555     sv_setpvn(cat, "", 0);
3556     SvUTF8_off(cat);
3557 
3558     packlist(cat, pat, patend, MARK, SP + 1);
3559 
3560     SvSETMAGIC(cat);
3561     SP = ORIGMARK;
3562     PUSHs(cat);
3563     RETURN;
3564 }
3565 
3566 /*
3567  * Local variables:
3568  * c-indentation-style: bsd
3569  * c-basic-offset: 4
3570  * indent-tabs-mode: t
3571  * End:
3572  *
3573  * ex: set ts=8 sts=4 sw=4 noet:
3574  */
3575