xref: /openbsd-src/gnu/usr.bin/perl/pp_pack.c (revision 897fc685943471cf985a0fe38ba076ea6fe74fa5)
1 /*    pp_pack.c
2  *
3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  * He still hopefully carried some of his gear in his pack: a small tinder-box,
13  * two small shallow pans, the smaller fitting into the larger; inside them a
14  * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15  * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16  * some salt.
17  *
18  *     [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
19  */
20 
21 /* This file contains pp ("push/pop") functions that
22  * execute the opcodes that make up a perl program. A typical pp function
23  * expects to find its arguments on the stack, and usually pushes its
24  * results onto the stack, hence the 'pp' terminology. Each OP structure
25  * contains a pointer to the relevant pp_foo() function.
26  *
27  * This particular file just contains pp_pack() and pp_unpack(). See the
28  * other pp*.c files for the rest of the pp_ functions.
29  */
30 
31 #include "EXTERN.h"
32 #define PERL_IN_PP_PACK_C
33 #include "perl.h"
34 
35 /* Types used by pack/unpack */
36 typedef enum {
37   e_no_len,     /* no length  */
38   e_number,     /* number, [] */
39   e_star        /* asterisk   */
40 } howlen_t;
41 
42 typedef struct tempsym {
43   const char*    patptr;   /* current template char */
44   const char*    patend;   /* one after last char   */
45   const char*    grpbeg;   /* 1st char of ()-group  */
46   const char*    grpend;   /* end of ()-group       */
47   I32      code;     /* template code (!<>)   */
48   I32      length;   /* length/repeat count   */
49   howlen_t howlen;   /* how length is given   */
50   int      level;    /* () nesting level      */
51   U32      flags;    /* /=4, comma=2, pack=1  */
52                      /*   and group modifiers */
53   STRLEN   strbeg;   /* offset of group start */
54   struct tempsym *previous; /* previous group */
55 } tempsym_t;
56 
57 #define TEMPSYM_INIT(symptr, p, e, f) \
58     STMT_START {	\
59 	(symptr)->patptr   = (p);	\
60 	(symptr)->patend   = (e);	\
61 	(symptr)->grpbeg   = NULL;	\
62 	(symptr)->grpend   = NULL;	\
63 	(symptr)->grpend   = NULL;	\
64 	(symptr)->code     = 0;		\
65 	(symptr)->length   = 0;		\
66 	(symptr)->howlen   = e_no_len;	\
67 	(symptr)->level    = 0;		\
68 	(symptr)->flags    = (f);	\
69 	(symptr)->strbeg   = 0;		\
70 	(symptr)->previous = NULL;	\
71    } STMT_END
72 
73 typedef union {
74     NV nv;
75     U8 bytes[sizeof(NV)];
76 } NV_bytes;
77 
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79 typedef union {
80     long double ld;
81     U8 bytes[sizeof(long double)];
82 } ld_bytes;
83 #endif
84 
85 #ifndef CHAR_BIT
86 # define CHAR_BIT	8
87 #endif
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
89 #define UTF8_EXPAND	2
90 
91 /*
92  * Offset for integer pack/unpack.
93  *
94  * On architectures where I16 and I32 aren't really 16 and 32 bits,
95  * which for now are all Crays, pack and unpack have to play games.
96  */
97 
98 /*
99  * These values are required for portability of pack() output.
100  * If they're not right on your machine, then pack() and unpack()
101  * wouldn't work right anyway; you'll need to apply the Cray hack.
102  * (I'd like to check them with #if, but you can't use sizeof() in
103  * the preprocessor.)  --???
104  */
105 /*
106     The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107     defines are now in config.h.  --Andy Dougherty  April 1998
108  */
109 #define SIZE16 2
110 #define SIZE32 4
111 
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113    --jhi Feb 1999 */
114 
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 #  if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678    /* little-endian */
117 #    define OFF16(p)	((char*)(p))
118 #    define OFF32(p)	((char*)(p))
119 #  else
120 #    if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321  /* big-endian */
121 #      define OFF16(p)	((char*)(p) + (sizeof(U16) - SIZE16))
122 #      define OFF32(p)	((char*)(p) + (sizeof(U32) - SIZE32))
123 #    else
124        ++++ bad cray byte order
125 #    endif
126 #  endif
127 #else
128 #  define OFF16(p)     ((char *) (p))
129 #  define OFF32(p)     ((char *) (p))
130 #endif
131 
132 #define PUSH16(utf8, cur, p, needs_swap)                        \
133        PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134 #define PUSH32(utf8, cur, p, needs_swap)                        \
135        PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
136 
137 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321  /* big-endian */
138 #  define NEEDS_SWAP(d)     (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678  /* little-endian */
140 #  define NEEDS_SWAP(d)     (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
141 #else
142 #  error "Unsupported byteorder"
143         /* Need to add code here to re-instate mixed endian support.
144            NEEDS_SWAP would need to hold a flag indicating which action to
145            take, and S_reverse_copy and the code in S_utf8_to_bytes would need
146            logic adding to deal with any mixed-endian transformations needed.
147         */
148 #endif
149 
150 /* Only to be used inside a loop (see the break) */
151 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap)	\
152 STMT_START {						\
153     if (UNLIKELY(utf8)) {                               \
154         if (!S_utf8_to_bytes(aTHX_ &s, strend,		\
155 	  (char *) (buf), len, datumtype)) break;	\
156     } else {						\
157         if (UNLIKELY(needs_swap))                       \
158             S_reverse_copy(s, (char *) (buf), len);     \
159         else                                            \
160             Copy(s, (char *) (buf), len, char);		\
161         s += len;					\
162     }							\
163 } STMT_END
164 
165 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap)              \
166        SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
167 
168 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap)              \
169        SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
170 
171 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap)          \
172        SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
173 
174 #define PUSH_VAR(utf8, aptr, var, needs_swap)           \
175        PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
176 
177 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178 #define MAX_SUB_TEMPLATE_LEVEL 100
179 
180 /* flags (note that type modifiers can also be used as flags!) */
181 #define FLAG_WAS_UTF8	      0x40
182 #define FLAG_PARSE_UTF8       0x20	/* Parse as utf8 */
183 #define FLAG_UNPACK_ONLY_ONE  0x10
184 #define FLAG_DO_UTF8          0x08	/* The underlying string is utf8 */
185 #define FLAG_SLASH            0x04
186 #define FLAG_COMMA            0x02
187 #define FLAG_PACK             0x01
188 
189 STATIC SV *
190 S_mul128(pTHX_ SV *sv, U8 m)
191 {
192   STRLEN          len;
193   char           *s = SvPV(sv, len);
194   char           *t;
195 
196   PERL_ARGS_ASSERT_MUL128;
197 
198   if (!strnEQ(s, "0000", 4)) {  /* need to grow sv */
199     SV * const tmpNew = newSVpvs("0000000000");
200 
201     sv_catsv(tmpNew, sv);
202     SvREFCNT_dec(sv);		/* free old sv */
203     sv = tmpNew;
204     s = SvPV(sv, len);
205   }
206   t = s + len - 1;
207   while (!*t)                   /* trailing '\0'? */
208     t--;
209   while (t > s) {
210     const U32 i = ((*t - '0') << 7) + m;
211     *(t--) = '0' + (char)(i % 10);
212     m = (char)(i / 10);
213   }
214   return (sv);
215 }
216 
217 /* Explosives and implosives. */
218 
219 #define ISUUCHAR(ch)    (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ')  \
220                       && NATIVE_TO_LATIN1(ch) <  NATIVE_TO_LATIN1('a'))
221 
222 /* type modifiers */
223 #define TYPE_IS_SHRIEKING	0x100
224 #define TYPE_IS_BIG_ENDIAN	0x200
225 #define TYPE_IS_LITTLE_ENDIAN	0x400
226 #define TYPE_IS_PACK		0x800
227 #define TYPE_ENDIANNESS_MASK	(TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
228 #define TYPE_MODIFIERS(t)	((t) & ~0xFF)
229 #define TYPE_NO_MODIFIERS(t)	((t) & 0xFF)
230 
231 # define TYPE_ENDIANNESS(t)	((t) & TYPE_ENDIANNESS_MASK)
232 # define TYPE_NO_ENDIANNESS(t)	((t) & ~TYPE_ENDIANNESS_MASK)
233 
234 # define ENDIANNESS_ALLOWED_TYPES   "sSiIlLqQjJfFdDpP("
235 
236 #define PACK_SIZE_CANNOT_CSUM		0x80
237 #define PACK_SIZE_UNPREDICTABLE		0x40	/* Not a fixed size element */
238 #define PACK_SIZE_MASK			0x3F
239 
240 #include "packsizetables.inc"
241 
242 static void
243 S_reverse_copy(const char *src, char *dest, STRLEN len)
244 {
245     dest += len;
246     while (len--)
247         *--dest = *src++;
248 }
249 
250 STATIC U8
251 utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
252 {
253     STRLEN retlen;
254     UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
255 			 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
256     /* We try to process malformed UTF-8 as much as possible (preferably with
257        warnings), but these two mean we make no progress in the string and
258        might enter an infinite loop */
259     if (retlen == (STRLEN) -1 || retlen == 0)
260 	Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
261 		   (int) TYPE_NO_MODIFIERS(datumtype));
262     if (val >= 0x100) {
263 	Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
264 		       "Character in '%c' format wrapped in unpack",
265 		       (int) TYPE_NO_MODIFIERS(datumtype));
266 	val &= 0xff;
267     }
268     *s += retlen;
269     return (U8)val;
270 }
271 
272 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
273 	utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
274 	*(U8 *)(s)++)
275 
276 STATIC bool
277 S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
278 {
279     UV val;
280     STRLEN retlen;
281     const char *from = *s;
282     int bad = 0;
283     const U32 flags = ckWARN(WARN_UTF8) ?
284 	UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
285     const bool needs_swap = NEEDS_SWAP(datumtype);
286 
287     if (UNLIKELY(needs_swap))
288         buf += buf_len;
289 
290     for (;buf_len > 0; buf_len--) {
291 	if (from >= end) return FALSE;
292 	val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
293 	if (retlen == (STRLEN) -1 || retlen == 0) {
294 	    from += UTF8SKIP(from);
295 	    bad |= 1;
296 	} else from += retlen;
297 	if (val >= 0x100) {
298 	    bad |= 2;
299 	    val &= 0xff;
300 	}
301         if (UNLIKELY(needs_swap))
302             *(U8 *)--buf = (U8)val;
303         else
304             *(U8 *)buf++ = (U8)val;
305     }
306     /* We have enough characters for the buffer. Did we have problems ? */
307     if (bad) {
308 	if (bad & 1) {
309 	    /* Rewalk the string fragment while warning */
310 	    const char *ptr;
311 	    const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
312 	    for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
313 		if (ptr >= end) break;
314 		utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
315 	    }
316 	    if (from > end) from = end;
317 	}
318 	if ((bad & 2))
319 	    Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
320 				       WARN_PACK : WARN_UNPACK),
321 			   "Character(s) in '%c' format wrapped in %s",
322 			   (int) TYPE_NO_MODIFIERS(datumtype),
323 			   datumtype & TYPE_IS_PACK ? "pack" : "unpack");
324     }
325     *s = from;
326     return TRUE;
327 }
328 
329 STATIC char *
330 S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
331     PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
332 
333     if (UNLIKELY(needs_swap)) {
334         const U8 *p = start + len;
335         while (p-- > start) {
336             append_utf8_from_native_byte(*p, (U8 **) & dest);
337         }
338     } else {
339         const U8 * const end = start + len;
340         while (start < end) {
341             append_utf8_from_native_byte(*start, (U8 **) & dest);
342             start++;
343         }
344     }
345     return dest;
346 }
347 
348 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap)             \
349 STMT_START {							\
350     if (UNLIKELY(utf8))	                                        \
351 	(cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap);       \
352     else {							\
353         if (UNLIKELY(needs_swap))                               \
354             S_reverse_copy((char *)(buf), cur, len);            \
355         else                                                    \
356             Copy(buf, cur, len, char);				\
357 	(cur) += (len);						\
358     }								\
359 } STMT_END
360 
361 #define GROWING(utf8, cat, start, cur, in_len)	\
362 STMT_START {					\
363     STRLEN glen = (in_len);			\
364     if (utf8) glen *= UTF8_EXPAND;		\
365     if ((cur) + glen >= (start) + SvLEN(cat)) {	\
366 	(start) = sv_exp_grow(cat, glen);	\
367 	(cur) = (start) + SvCUR(cat);		\
368     }						\
369 } STMT_END
370 
371 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
372 STMT_START {					\
373     const STRLEN glen = (in_len);		\
374     STRLEN gl = glen;				\
375     if (utf8) gl *= UTF8_EXPAND;		\
376     if ((cur) + gl >= (start) + SvLEN(cat)) {	\
377         *cur = '\0';				\
378         SvCUR_set((cat), (cur) - (start));	\
379 	(start) = sv_exp_grow(cat, gl);		\
380 	(cur) = (start) + SvCUR(cat);		\
381     }						\
382     PUSH_BYTES(utf8, cur, buf, glen, 0);        \
383 } STMT_END
384 
385 #define PUSH_BYTE(utf8, s, byte)		\
386 STMT_START {					\
387     if (utf8) {					\
388 	const U8 au8 = (byte);			\
389 	(s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
390     } else *(U8 *)(s)++ = (byte);		\
391 } STMT_END
392 
393 /* Only to be used inside a loop (see the break) */
394 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags)		\
395 STMT_START {							\
396     STRLEN retlen;						\
397     if (str >= end) break;					\
398     val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags);	\
399     if (retlen == (STRLEN) -1 || retlen == 0) {			\
400 	*cur = '\0';						\
401 	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");	\
402     }								\
403     str += retlen;						\
404 } STMT_END
405 
406 static const char *_action( const tempsym_t* symptr )
407 {
408     return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
409 }
410 
411 /* Returns the sizeof() struct described by pat */
412 STATIC I32
413 S_measure_struct(pTHX_ tempsym_t* symptr)
414 {
415     I32 total = 0;
416 
417     PERL_ARGS_ASSERT_MEASURE_STRUCT;
418 
419     while (next_symbol(symptr)) {
420 	I32 len;
421 	int size;
422 
423         switch (symptr->howlen) {
424 	  case e_star:
425    	    Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
426                         _action( symptr ) );
427 
428 	  default:
429 	    /* e_no_len and e_number */
430 	    len = symptr->length;
431 	    break;
432         }
433 
434 	size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
435 	if (!size) {
436             int star;
437 	    /* endianness doesn't influence the size of a type */
438 	    switch(TYPE_NO_ENDIANNESS(symptr->code)) {
439 	    default:
440 		Perl_croak(aTHX_ "Invalid type '%c' in %s",
441 			   (int)TYPE_NO_MODIFIERS(symptr->code),
442                            _action( symptr ) );
443 	    case '.' | TYPE_IS_SHRIEKING:
444 	    case '@' | TYPE_IS_SHRIEKING:
445 	    case '@':
446 	    case '.':
447 	    case '/':
448 	    case 'U':			/* XXXX Is it correct? */
449 	    case 'w':
450 	    case 'u':
451 		Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
452 			   (int) TYPE_NO_MODIFIERS(symptr->code),
453                            _action( symptr ) );
454 	    case '%':
455 		size = 0;
456 		break;
457 	    case '(':
458 	    {
459 		tempsym_t savsym = *symptr;
460 		symptr->patptr = savsym.grpbeg;
461 		symptr->patend = savsym.grpend;
462 		/* XXXX Theoretically, we need to measure many times at
463 		   different positions, since the subexpression may contain
464 		   alignment commands, but be not of aligned length.
465 		   Need to detect this and croak().  */
466 		size = measure_struct(symptr);
467 		*symptr = savsym;
468 		break;
469 	    }
470 	    case 'X' | TYPE_IS_SHRIEKING:
471 		/* XXXX Is this useful?  Then need to treat MEASURE_BACKWARDS.
472 		 */
473 		if (!len)		/* Avoid division by 0 */
474 		    len = 1;
475 		len = total % len;	/* Assumed: the start is aligned. */
476 		/* FALLTHROUGH */
477 	    case 'X':
478 		size = -1;
479 		if (total < len)
480                     Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
481 		break;
482 	    case 'x' | TYPE_IS_SHRIEKING:
483 		if (!len)		/* Avoid division by 0 */
484 		    len = 1;
485 		star = total % len;	/* Assumed: the start is aligned. */
486 		if (star)		/* Other portable ways? */
487 		    len = len - star;
488 		else
489 		    len = 0;
490 		/* FALLTHROUGH */
491 	    case 'x':
492 	    case 'A':
493 	    case 'Z':
494 	    case 'a':
495 		size = 1;
496 		break;
497 	    case 'B':
498 	    case 'b':
499 		len = (len + 7)/8;
500 		size = 1;
501 		break;
502 	    case 'H':
503 	    case 'h':
504 		len = (len + 1)/2;
505 		size = 1;
506 		break;
507 
508 	    case 'P':
509 		len = 1;
510 		size = sizeof(char*);
511 		break;
512 	    }
513 	}
514 	total += len * size;
515     }
516     return total;
517 }
518 
519 
520 /* locate matching closing parenthesis or bracket
521  * returns char pointer to char after match, or NULL
522  */
523 STATIC const char *
524 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
525 {
526     PERL_ARGS_ASSERT_GROUP_END;
527 
528     while (patptr < patend) {
529 	const char c = *patptr++;
530 
531 	if (isSPACE(c))
532 	    continue;
533 	else if (c == ender)
534 	    return patptr-1;
535 	else if (c == '#') {
536 	    while (patptr < patend && *patptr != '\n')
537 		patptr++;
538 	    continue;
539 	} else if (c == '(')
540 	    patptr = group_end(patptr, patend, ')') + 1;
541 	else if (c == '[')
542 	    patptr = group_end(patptr, patend, ']') + 1;
543     }
544     Perl_croak(aTHX_ "No group ending character '%c' found in template",
545                ender);
546     NOT_REACHED; /* NOTREACHED */
547 }
548 
549 
550 /* Convert unsigned decimal number to binary.
551  * Expects a pointer to the first digit and address of length variable
552  * Advances char pointer to 1st non-digit char and returns number
553  */
554 STATIC const char *
555 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
556 {
557   I32 len = *patptr++ - '0';
558 
559   PERL_ARGS_ASSERT_GET_NUM;
560 
561   while (isDIGIT(*patptr)) {
562     if (len >= 0x7FFFFFFF/10)
563       Perl_croak(aTHX_ "pack/unpack repeat count overflow");
564     len = (len * 10) + (*patptr++ - '0');
565   }
566   *lenptr = len;
567   return patptr;
568 }
569 
570 /* The marvellous template parsing routine: Using state stored in *symptr,
571  * locates next template code and count
572  */
573 STATIC bool
574 S_next_symbol(pTHX_ tempsym_t* symptr )
575 {
576   const char* patptr = symptr->patptr;
577   const char* const patend = symptr->patend;
578 
579   PERL_ARGS_ASSERT_NEXT_SYMBOL;
580 
581   symptr->flags &= ~FLAG_SLASH;
582 
583   while (patptr < patend) {
584     if (isSPACE(*patptr))
585       patptr++;
586     else if (*patptr == '#') {
587       patptr++;
588       while (patptr < patend && *patptr != '\n')
589 	patptr++;
590       if (patptr < patend)
591 	patptr++;
592     } else {
593       /* We should have found a template code */
594       I32 code = *patptr++ & 0xFF;
595       U32 inherited_modifiers = 0;
596 
597       if (code == ','){ /* grandfather in commas but with a warning */
598 	if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
599           symptr->flags |= FLAG_COMMA;
600 	  Perl_warner(aTHX_ packWARN(WARN_UNPACK),
601 	 	      "Invalid type ',' in %s", _action( symptr ) );
602         }
603 	continue;
604       }
605 
606       /* for '(', skip to ')' */
607       if (code == '(') {
608         if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
609           Perl_croak(aTHX_ "()-group starts with a count in %s",
610                         _action( symptr ) );
611         symptr->grpbeg = patptr;
612         patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
613         if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
614 	  Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
615                         _action( symptr ) );
616       }
617 
618       /* look for group modifiers to inherit */
619       if (TYPE_ENDIANNESS(symptr->flags)) {
620         if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
621           inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
622       }
623 
624       /* look for modifiers */
625       while (patptr < patend) {
626         const char *allowed;
627         I32 modifier;
628         switch (*patptr) {
629           case '!':
630             modifier = TYPE_IS_SHRIEKING;
631             allowed = "sSiIlLxXnNvV@.";
632             break;
633           case '>':
634             modifier = TYPE_IS_BIG_ENDIAN;
635             allowed = ENDIANNESS_ALLOWED_TYPES;
636             break;
637           case '<':
638             modifier = TYPE_IS_LITTLE_ENDIAN;
639             allowed = ENDIANNESS_ALLOWED_TYPES;
640             break;
641           default:
642             allowed = "";
643             modifier = 0;
644             break;
645         }
646 
647         if (modifier == 0)
648           break;
649 
650         if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
651           Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
652                         allowed, _action( symptr ) );
653 
654         if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
655           Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
656                      (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
657         else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
658                  TYPE_ENDIANNESS_MASK)
659           Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
660                      *patptr, _action( symptr ) );
661 
662         if ((code & modifier)) {
663 	    Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
664 			   "Duplicate modifier '%c' after '%c' in %s",
665 			   *patptr, (int) TYPE_NO_MODIFIERS(code),
666 			   _action( symptr ) );
667         }
668 
669         code |= modifier;
670         patptr++;
671       }
672 
673       /* inherit modifiers */
674       code |= inherited_modifiers;
675 
676       /* look for count and/or / */
677       if (patptr < patend) {
678 	if (isDIGIT(*patptr)) {
679  	  patptr = get_num( patptr, &symptr->length );
680           symptr->howlen = e_number;
681 
682         } else if (*patptr == '*') {
683           patptr++;
684           symptr->howlen = e_star;
685 
686         } else if (*patptr == '[') {
687           const char* lenptr = ++patptr;
688           symptr->howlen = e_number;
689           patptr = group_end( patptr, patend, ']' ) + 1;
690           /* what kind of [] is it? */
691           if (isDIGIT(*lenptr)) {
692             lenptr = get_num( lenptr, &symptr->length );
693             if( *lenptr != ']' )
694               Perl_croak(aTHX_ "Malformed integer in [] in %s",
695                             _action( symptr ) );
696           } else {
697             tempsym_t savsym = *symptr;
698             symptr->patend = patptr-1;
699             symptr->patptr = lenptr;
700             savsym.length = measure_struct(symptr);
701             *symptr = savsym;
702           }
703         } else {
704           symptr->howlen = e_no_len;
705           symptr->length = 1;
706         }
707 
708         /* try to find / */
709         while (patptr < patend) {
710           if (isSPACE(*patptr))
711             patptr++;
712           else if (*patptr == '#') {
713             patptr++;
714             while (patptr < patend && *patptr != '\n')
715 	      patptr++;
716             if (patptr < patend)
717 	      patptr++;
718           } else {
719             if (*patptr == '/') {
720               symptr->flags |= FLAG_SLASH;
721               patptr++;
722               if (patptr < patend &&
723                   (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
724                 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
725                             _action( symptr ) );
726             }
727             break;
728 	  }
729 	}
730       } else {
731         /* at end - no count, no / */
732         symptr->howlen = e_no_len;
733         symptr->length = 1;
734       }
735 
736       symptr->code = code;
737       symptr->patptr = patptr;
738       return TRUE;
739     }
740   }
741   symptr->patptr = patptr;
742   return FALSE;
743 }
744 
745 /*
746    There is no way to cleanly handle the case where we should process the
747    string per byte in its upgraded form while it's really in downgraded form
748    (e.g. estimates like strend-s as an upper bound for the number of
749    characters left wouldn't work). So if we foresee the need of this
750    (pattern starts with U or contains U0), we want to work on the encoded
751    version of the string. Users are advised to upgrade their pack string
752    themselves if they need to do a lot of unpacks like this on it
753 */
754 STATIC bool
755 need_utf8(const char *pat, const char *patend)
756 {
757     bool first = TRUE;
758 
759     PERL_ARGS_ASSERT_NEED_UTF8;
760 
761     while (pat < patend) {
762 	if (pat[0] == '#') {
763 	    pat++;
764 	    pat = (const char *) memchr(pat, '\n', patend-pat);
765 	    if (!pat) return FALSE;
766 	} else if (pat[0] == 'U') {
767 	    if (first || pat[1] == '0') return TRUE;
768 	} else first = FALSE;
769 	pat++;
770     }
771     return FALSE;
772 }
773 
774 STATIC char
775 first_symbol(const char *pat, const char *patend) {
776     PERL_ARGS_ASSERT_FIRST_SYMBOL;
777 
778     while (pat < patend) {
779 	if (pat[0] != '#') return pat[0];
780 	pat++;
781 	pat = (const char *) memchr(pat, '\n', patend-pat);
782 	if (!pat) return 0;
783 	pat++;
784     }
785     return 0;
786 }
787 
788 /*
789 
790 =head1 Pack and Unpack
791 
792 =for apidoc unpackstring
793 
794 The engine implementing the C<unpack()> Perl function.
795 
796 Using the template C<pat..patend>, this function unpacks the string
797 C<s..strend> into a number of mortal SVs, which it pushes onto the perl
798 argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
799 C<SPAGAIN> after the call to this function).  It returns the number of
800 pushed elements.
801 
802 The C<strend> and C<patend> pointers should point to the byte following the
803 last character of each string.
804 
805 Although this function returns its values on the perl argument stack, it
806 doesn't take any parameters from that stack (and thus in particular
807 there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
808 example).
809 
810 =cut */
811 
812 I32
813 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
814 {
815     tempsym_t sym;
816 
817     PERL_ARGS_ASSERT_UNPACKSTRING;
818 
819     if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
820     else if (need_utf8(pat, patend)) {
821 	/* We probably should try to avoid this in case a scalar context call
822 	   wouldn't get to the "U0" */
823 	STRLEN len = strend - s;
824 	s = (char *) bytes_to_utf8((U8 *) s, &len);
825 	SAVEFREEPV(s);
826 	strend = s + len;
827 	flags |= FLAG_DO_UTF8;
828     }
829 
830     if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
831 	flags |= FLAG_PARSE_UTF8;
832 
833     TEMPSYM_INIT(&sym, pat, patend, flags);
834 
835     return unpack_rec(&sym, s, s, strend, NULL );
836 }
837 
838 STATIC I32
839 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
840 {
841     dSP;
842     SV *sv = NULL;
843     const I32 start_sp_offset = SP - PL_stack_base;
844     howlen_t howlen;
845     I32 checksum = 0;
846     UV cuv = 0;
847     NV cdouble = 0.0;
848     const int bits_in_uv = CHAR_BIT * sizeof(cuv);
849     bool beyond = FALSE;
850     bool explicit_length;
851     const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
852     bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
853 
854     PERL_ARGS_ASSERT_UNPACK_REC;
855 
856     symptr->strbeg = s - strbeg;
857 
858     while (next_symbol(symptr)) {
859 	packprops_t props;
860 	I32 len;
861         I32 datumtype = symptr->code;
862         bool needs_swap;
863 	/* do first one only unless in list context
864 	   / is implemented by unpacking the count, then popping it from the
865 	   stack, so must check that we're not in the middle of a /  */
866         if ( unpack_only_one
867 	     && (SP - PL_stack_base == start_sp_offset + 1)
868 	     && (datumtype != '/') )   /* XXX can this be omitted */
869             break;
870 
871         switch (howlen = symptr->howlen) {
872 	  case e_star:
873 	    len = strend - strbeg;	/* long enough */
874 	    break;
875 	  default:
876 	    /* e_no_len and e_number */
877 	    len = symptr->length;
878 	    break;
879         }
880 
881         explicit_length = TRUE;
882       redo_switch:
883         beyond = s >= strend;
884 
885 	props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
886 	if (props) {
887 	    /* props nonzero means we can process this letter. */
888             const long size = props & PACK_SIZE_MASK;
889             const long howmany = (strend - s) / size;
890 	    if (len > howmany)
891 		len = howmany;
892 
893 	    if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
894 		if (len && unpack_only_one) len = 1;
895 		EXTEND(SP, len);
896 		EXTEND_MORTAL(len);
897 	    }
898 	}
899 
900         needs_swap = NEEDS_SWAP(datumtype);
901 
902 	switch(TYPE_NO_ENDIANNESS(datumtype)) {
903 	default:
904 	    Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
905 
906 	case '%':
907 	    if (howlen == e_no_len)
908 		len = 16;		/* len is not specified */
909 	    checksum = len;
910 	    cuv = 0;
911 	    cdouble = 0;
912 	    continue;
913 
914 	case '(':
915 	{
916             tempsym_t savsym = *symptr;
917             const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
918 	    symptr->flags |= group_modifiers;
919             symptr->patend = savsym.grpend;
920 	    symptr->previous = &savsym;
921             symptr->level++;
922 	    PUTBACK;
923 	    if (len && unpack_only_one) len = 1;
924 	    while (len--) {
925   	        symptr->patptr = savsym.grpbeg;
926 		if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
927 		else      symptr->flags &= ~FLAG_PARSE_UTF8;
928  	        unpack_rec(symptr, s, strbeg, strend, &s);
929                 if (s == strend && savsym.howlen == e_star)
930 		    break; /* No way to continue */
931 	    }
932 	    SPAGAIN;
933             savsym.flags = symptr->flags & ~group_modifiers;
934             *symptr = savsym;
935 	    break;
936 	}
937 	case '.' | TYPE_IS_SHRIEKING:
938 	case '.': {
939 	    const char *from;
940 	    SV *sv;
941 	    const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
942 	    if (howlen == e_star) from = strbeg;
943 	    else if (len <= 0) from = s;
944 	    else {
945 		tempsym_t *group = symptr;
946 
947 		while (--len && group) group = group->previous;
948 		from = group ? strbeg + group->strbeg : strbeg;
949 	    }
950 	    sv = from <= s ?
951 		newSVuv(  u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
952 		newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
953 	    mXPUSHs(sv);
954 	    break;
955 	}
956 	case '@' | TYPE_IS_SHRIEKING:
957 	case '@':
958 	    s = strbeg + symptr->strbeg;
959 	    if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
960 	    {
961 		while (len > 0) {
962 		    if (s >= strend)
963 			Perl_croak(aTHX_ "'@' outside of string in unpack");
964 		    s += UTF8SKIP(s);
965 		    len--;
966 		}
967 		if (s > strend)
968 		    Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
969 	    } else {
970 		if (strend-s < len)
971 		    Perl_croak(aTHX_ "'@' outside of string in unpack");
972 		s += len;
973 	    }
974 	    break;
975  	case 'X' | TYPE_IS_SHRIEKING:
976  	    if (!len)			/* Avoid division by 0 */
977  		len = 1;
978 	    if (utf8) {
979 		const char *hop, *last;
980 		I32 l = len;
981 		hop = last = strbeg;
982 		while (hop < s) {
983 		    hop += UTF8SKIP(hop);
984 		    if (--l == 0) {
985 			last = hop;
986 			l = len;
987 		    }
988 		}
989 		if (last > s)
990 		    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
991 		s = last;
992 		break;
993 	    }
994 	    len = (s - strbeg) % len;
995  	    /* FALLTHROUGH */
996 	case 'X':
997 	    if (utf8) {
998 		while (len > 0) {
999 		    if (s <= strbeg)
1000 			Perl_croak(aTHX_ "'X' outside of string in unpack");
1001 		    while (--s, UTF8_IS_CONTINUATION(*s)) {
1002 			if (s <= strbeg)
1003 			    Perl_croak(aTHX_ "'X' outside of string in unpack");
1004 		    }
1005 		    len--;
1006 		}
1007 	    } else {
1008 		if (len > s - strbeg)
1009 		    Perl_croak(aTHX_ "'X' outside of string in unpack" );
1010 		s -= len;
1011 	    }
1012 	    break;
1013  	case 'x' | TYPE_IS_SHRIEKING: {
1014             I32 ai32;
1015  	    if (!len)			/* Avoid division by 0 */
1016  		len = 1;
1017 	    if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1018 	    else      ai32 = (s - strbeg)                         % len;
1019 	    if (ai32 == 0) break;
1020 	    len -= ai32;
1021             }
1022  	    /* FALLTHROUGH */
1023 	case 'x':
1024 	    if (utf8) {
1025 		while (len>0) {
1026 		    if (s >= strend)
1027 			Perl_croak(aTHX_ "'x' outside of string in unpack");
1028 		    s += UTF8SKIP(s);
1029 		    len--;
1030 		}
1031 	    } else {
1032 		if (len > strend - s)
1033 		    Perl_croak(aTHX_ "'x' outside of string in unpack");
1034 		s += len;
1035 	    }
1036 	    break;
1037 	case '/':
1038 	    Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1039 
1040 	case 'A':
1041 	case 'Z':
1042 	case 'a':
1043 	    if (checksum) {
1044 		/* Preliminary length estimate is assumed done in 'W' */
1045 		if (len > strend - s) len = strend - s;
1046 		goto W_checksum;
1047 	    }
1048 	    if (utf8) {
1049 		I32 l;
1050 		const char *hop;
1051 		for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1052 		    if (hop >= strend) {
1053 			if (hop > strend)
1054 			    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1055 			break;
1056 		    }
1057 		}
1058 		if (hop > strend)
1059 		    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1060 		len = hop - s;
1061 	    } else if (len > strend - s)
1062 		len = strend - s;
1063 
1064 	    if (datumtype == 'Z') {
1065 		/* 'Z' strips stuff after first null */
1066 		const char *ptr, *end;
1067 		end = s + len;
1068 		for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1069 		sv = newSVpvn(s, ptr-s);
1070 		if (howlen == e_star) /* exact for 'Z*' */
1071 		    len = ptr-s + (ptr != strend ? 1 : 0);
1072 	    } else if (datumtype == 'A') {
1073 		/* 'A' strips both nulls and spaces */
1074 		const char *ptr;
1075 		if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1076 		    for (ptr = s+len-1; ptr >= s; ptr--)
1077 			if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1078 			    !isSPACE_utf8(ptr)) break;
1079 		    if (ptr >= s) ptr += UTF8SKIP(ptr);
1080 		    else ptr++;
1081 		    if (ptr > s+len)
1082 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1083 		} else {
1084 		    for (ptr = s+len-1; ptr >= s; ptr--)
1085 			if (*ptr != 0 && !isSPACE(*ptr)) break;
1086 		    ptr++;
1087 		}
1088 		sv = newSVpvn(s, ptr-s);
1089 	    } else sv = newSVpvn(s, len);
1090 
1091 	    if (utf8) {
1092 		SvUTF8_on(sv);
1093 		/* Undo any upgrade done due to need_utf8() */
1094 		if (!(symptr->flags & FLAG_WAS_UTF8))
1095 		    sv_utf8_downgrade(sv, 0);
1096 	    }
1097 	    mXPUSHs(sv);
1098 	    s += len;
1099 	    break;
1100 	case 'B':
1101 	case 'b': {
1102 	    char *str;
1103 	    if (howlen == e_star || len > (strend - s) * 8)
1104 		len = (strend - s) * 8;
1105 	    if (checksum) {
1106 		if (utf8)
1107 		    while (len >= 8 && s < strend) {
1108 			cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
1109 			len -= 8;
1110 		    }
1111 		else
1112 		    while (len >= 8) {
1113 			cuv += PL_bitcount[*(U8 *)s++];
1114 			len -= 8;
1115 		    }
1116 		if (len && s < strend) {
1117 		    U8 bits;
1118 		    bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1119 		    if (datumtype == 'b')
1120 			while (len-- > 0) {
1121 			    if (bits & 1) cuv++;
1122 			    bits >>= 1;
1123 			}
1124 		    else
1125 			while (len-- > 0) {
1126 			    if (bits & 0x80) cuv++;
1127 			    bits <<= 1;
1128 			}
1129 		}
1130 		break;
1131 	    }
1132 
1133 	    sv = sv_2mortal(newSV(len ? len : 1));
1134 	    SvPOK_on(sv);
1135 	    str = SvPVX(sv);
1136 	    if (datumtype == 'b') {
1137 		U8 bits = 0;
1138 		const I32 ai32 = len;
1139 		for (len = 0; len < ai32; len++) {
1140 		    if (len & 7) bits >>= 1;
1141 		    else if (utf8) {
1142 			if (s >= strend) break;
1143 			bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1144 		    } else bits = *(U8 *) s++;
1145 		    *str++ = bits & 1 ? '1' : '0';
1146 		}
1147 	    } else {
1148 		U8 bits = 0;
1149 		const I32 ai32 = len;
1150 		for (len = 0; len < ai32; len++) {
1151 		    if (len & 7) bits <<= 1;
1152 		    else if (utf8) {
1153 			if (s >= strend) break;
1154 			bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1155 		    } else bits = *(U8 *) s++;
1156 		    *str++ = bits & 0x80 ? '1' : '0';
1157 		}
1158 	    }
1159 	    *str = '\0';
1160 	    SvCUR_set(sv, str - SvPVX_const(sv));
1161 	    XPUSHs(sv);
1162 	    break;
1163 	}
1164 	case 'H':
1165 	case 'h': {
1166 	    char *str = NULL;
1167 	    /* Preliminary length estimate, acceptable for utf8 too */
1168 	    if (howlen == e_star || len > (strend - s) * 2)
1169 		len = (strend - s) * 2;
1170 	    if (!checksum) {
1171 		sv = sv_2mortal(newSV(len ? len : 1));
1172 		SvPOK_on(sv);
1173 		str = SvPVX(sv);
1174 	    }
1175 	    if (datumtype == 'h') {
1176 		U8 bits = 0;
1177 		I32 ai32 = len;
1178 		for (len = 0; len < ai32; len++) {
1179 		    if (len & 1) bits >>= 4;
1180 		    else if (utf8) {
1181 			if (s >= strend) break;
1182 			bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1183 		    } else bits = * (U8 *) s++;
1184 		    if (!checksum)
1185 			*str++ = PL_hexdigit[bits & 15];
1186 		}
1187 	    } else {
1188 		U8 bits = 0;
1189 		const I32 ai32 = len;
1190 		for (len = 0; len < ai32; len++) {
1191 		    if (len & 1) bits <<= 4;
1192 		    else if (utf8) {
1193 			if (s >= strend) break;
1194 			bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1195 		    } else bits = *(U8 *) s++;
1196 		    if (!checksum)
1197 			*str++ = PL_hexdigit[(bits >> 4) & 15];
1198 		}
1199 	    }
1200 	    if (!checksum) {
1201 		*str = '\0';
1202 		SvCUR_set(sv, str - SvPVX_const(sv));
1203 		XPUSHs(sv);
1204 	    }
1205 	    break;
1206 	}
1207 	case 'C':
1208             if (len == 0) {
1209                 if (explicit_length)
1210 		    /* Switch to "character" mode */
1211 		    utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1212 		break;
1213 	    }
1214 	    /* FALLTHROUGH */
1215 	case 'c':
1216 	    while (len-- > 0 && s < strend) {
1217 		int aint;
1218 		if (utf8)
1219 		  {
1220 		    STRLEN retlen;
1221 		    aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1222 				 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1223 		    if (retlen == (STRLEN) -1 || retlen == 0)
1224 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1225 		    s += retlen;
1226 		  }
1227 		else
1228 		  aint = *(U8 *)(s)++;
1229 		if (aint >= 128 && datumtype != 'C')	/* fake up signed chars */
1230 		    aint -= 256;
1231 		if (!checksum)
1232 		    mPUSHi(aint);
1233 		else if (checksum > bits_in_uv)
1234 		    cdouble += (NV)aint;
1235 		else
1236 		    cuv += aint;
1237 	    }
1238 	    break;
1239 	case 'W':
1240 	  W_checksum:
1241 	    if (utf8) {
1242 		while (len-- > 0 && s < strend) {
1243 		    STRLEN retlen;
1244 		    const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1245 					 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1246 		    if (retlen == (STRLEN) -1 || retlen == 0)
1247 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1248 		    s += retlen;
1249 		    if (!checksum)
1250 			mPUSHu(val);
1251 		    else if (checksum > bits_in_uv)
1252 			cdouble += (NV) val;
1253 		    else
1254 			cuv += val;
1255 		}
1256 	    } else if (!checksum)
1257 		while (len-- > 0) {
1258 		    const U8 ch = *(U8 *) s++;
1259 		    mPUSHu(ch);
1260 	    }
1261 	    else if (checksum > bits_in_uv)
1262 		while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1263 	    else
1264 		while (len-- > 0) cuv += *(U8 *) s++;
1265 	    break;
1266 	case 'U':
1267 	    if (len == 0) {
1268                 if (explicit_length && howlen != e_star) {
1269 		    /* Switch to "bytes in UTF-8" mode */
1270 		    if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1271 		    else
1272 			/* Should be impossible due to the need_utf8() test */
1273 			Perl_croak(aTHX_ "U0 mode on a byte string");
1274 		}
1275 		break;
1276 	    }
1277 	    if (len > strend - s) len = strend - s;
1278 	    if (!checksum) {
1279 		if (len && unpack_only_one) len = 1;
1280 		EXTEND(SP, len);
1281 		EXTEND_MORTAL(len);
1282 	    }
1283 	    while (len-- > 0 && s < strend) {
1284 		STRLEN retlen;
1285 		UV auv;
1286 		if (utf8) {
1287 		    U8 result[UTF8_MAXLEN];
1288 		    const char *ptr = s;
1289 		    STRLEN len;
1290 		    /* Bug: warns about bad utf8 even if we are short on bytes
1291 		       and will break out of the loop */
1292 		    if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1293 				      'U'))
1294 			break;
1295 		    len = UTF8SKIP(result);
1296 		    if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
1297 				      (char *) &result[1], len-1, 'U')) break;
1298 		    auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1299                                                        len,
1300                                                        &retlen,
1301                                                        UTF8_ALLOW_DEFAULT));
1302 		    s = ptr;
1303 		} else {
1304 		    auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1305                                                        strend - s,
1306                                                        &retlen,
1307                                                        UTF8_ALLOW_DEFAULT));
1308 		    if (retlen == (STRLEN) -1 || retlen == 0)
1309 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1310 		    s += retlen;
1311 		}
1312 		if (!checksum)
1313 		    mPUSHu(auv);
1314 		else if (checksum > bits_in_uv)
1315 		    cdouble += (NV) auv;
1316 		else
1317 		    cuv += auv;
1318 	    }
1319 	    break;
1320 	case 's' | TYPE_IS_SHRIEKING:
1321 #if SHORTSIZE != SIZE16
1322 	    while (len-- > 0) {
1323 		short ashort;
1324                 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1325 		if (!checksum)
1326 		    mPUSHi(ashort);
1327 		else if (checksum > bits_in_uv)
1328 		    cdouble += (NV)ashort;
1329 		else
1330 		    cuv += ashort;
1331 	    }
1332 	    break;
1333 #else
1334 	    /* FALLTHROUGH */
1335 #endif
1336 	case 's':
1337 	    while (len-- > 0) {
1338 		I16 ai16;
1339 
1340 #if U16SIZE > SIZE16
1341 		ai16 = 0;
1342 #endif
1343                 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1344 #if U16SIZE > SIZE16
1345 		if (ai16 > 32767)
1346 		    ai16 -= 65536;
1347 #endif
1348 		if (!checksum)
1349 		    mPUSHi(ai16);
1350 		else if (checksum > bits_in_uv)
1351 		    cdouble += (NV)ai16;
1352 		else
1353 		    cuv += ai16;
1354 	    }
1355 	    break;
1356 	case 'S' | TYPE_IS_SHRIEKING:
1357 #if SHORTSIZE != SIZE16
1358 	    while (len-- > 0) {
1359 		unsigned short aushort;
1360                 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1361                           needs_swap);
1362 		if (!checksum)
1363 		    mPUSHu(aushort);
1364 		else if (checksum > bits_in_uv)
1365 		    cdouble += (NV)aushort;
1366 		else
1367 		    cuv += aushort;
1368 	    }
1369 	    break;
1370 #else
1371             /* FALLTHROUGH */
1372 #endif
1373 	case 'v':
1374 	case 'n':
1375 	case 'S':
1376 	    while (len-- > 0) {
1377 		U16 au16;
1378 #if U16SIZE > SIZE16
1379 		au16 = 0;
1380 #endif
1381                 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1382 		if (datumtype == 'n')
1383 		    au16 = PerlSock_ntohs(au16);
1384 		if (datumtype == 'v')
1385 		    au16 = vtohs(au16);
1386 		if (!checksum)
1387 		    mPUSHu(au16);
1388 		else if (checksum > bits_in_uv)
1389 		    cdouble += (NV) au16;
1390 		else
1391 		    cuv += au16;
1392 	    }
1393 	    break;
1394 	case 'v' | TYPE_IS_SHRIEKING:
1395 	case 'n' | TYPE_IS_SHRIEKING:
1396 	    while (len-- > 0) {
1397 		I16 ai16;
1398 # if U16SIZE > SIZE16
1399 		ai16 = 0;
1400 # endif
1401                 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1402                 /* There should never be any byte-swapping here.  */
1403                 assert(!TYPE_ENDIANNESS(datumtype));
1404 		if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1405 		    ai16 = (I16) PerlSock_ntohs((U16) ai16);
1406 		if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1407 		    ai16 = (I16) vtohs((U16) ai16);
1408 		if (!checksum)
1409 		    mPUSHi(ai16);
1410 		else if (checksum > bits_in_uv)
1411 		    cdouble += (NV) ai16;
1412 		else
1413 		    cuv += ai16;
1414 	    }
1415 	    break;
1416 	case 'i':
1417 	case 'i' | TYPE_IS_SHRIEKING:
1418 	    while (len-- > 0) {
1419 		int aint;
1420                 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1421 		if (!checksum)
1422 		    mPUSHi(aint);
1423 		else if (checksum > bits_in_uv)
1424 		    cdouble += (NV)aint;
1425 		else
1426 		    cuv += aint;
1427 	    }
1428 	    break;
1429 	case 'I':
1430 	case 'I' | TYPE_IS_SHRIEKING:
1431 	    while (len-- > 0) {
1432 		unsigned int auint;
1433                 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1434 		if (!checksum)
1435 		    mPUSHu(auint);
1436 		else if (checksum > bits_in_uv)
1437 		    cdouble += (NV)auint;
1438 		else
1439 		    cuv += auint;
1440 	    }
1441 	    break;
1442 	case 'j':
1443 	    while (len-- > 0) {
1444 		IV aiv;
1445                 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1446 		if (!checksum)
1447 		    mPUSHi(aiv);
1448 		else if (checksum > bits_in_uv)
1449 		    cdouble += (NV)aiv;
1450 		else
1451 		    cuv += aiv;
1452 	    }
1453 	    break;
1454 	case 'J':
1455 	    while (len-- > 0) {
1456 		UV auv;
1457                 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1458 		if (!checksum)
1459 		    mPUSHu(auv);
1460 		else if (checksum > bits_in_uv)
1461 		    cdouble += (NV)auv;
1462 		else
1463 		    cuv += auv;
1464 	    }
1465 	    break;
1466 	case 'l' | TYPE_IS_SHRIEKING:
1467 #if LONGSIZE != SIZE32
1468 	    while (len-- > 0) {
1469 		long along;
1470                 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1471 		if (!checksum)
1472 		    mPUSHi(along);
1473 		else if (checksum > bits_in_uv)
1474 		    cdouble += (NV)along;
1475 		else
1476 		    cuv += along;
1477 	    }
1478 	    break;
1479 #else
1480 	    /* FALLTHROUGH */
1481 #endif
1482 	case 'l':
1483 	    while (len-- > 0) {
1484 		I32 ai32;
1485 #if U32SIZE > SIZE32
1486 		ai32 = 0;
1487 #endif
1488                 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1489 #if U32SIZE > SIZE32
1490 		if (ai32 > 2147483647) ai32 -= 4294967296;
1491 #endif
1492 		if (!checksum)
1493 		    mPUSHi(ai32);
1494 		else if (checksum > bits_in_uv)
1495 		    cdouble += (NV)ai32;
1496 		else
1497 		    cuv += ai32;
1498 	    }
1499 	    break;
1500 	case 'L' | TYPE_IS_SHRIEKING:
1501 #if LONGSIZE != SIZE32
1502 	    while (len-- > 0) {
1503 		unsigned long aulong;
1504                 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1505 		if (!checksum)
1506 		    mPUSHu(aulong);
1507 		else if (checksum > bits_in_uv)
1508 		    cdouble += (NV)aulong;
1509 		else
1510 		    cuv += aulong;
1511 	    }
1512 	    break;
1513 #else
1514             /* FALLTHROUGH */
1515 #endif
1516 	case 'V':
1517 	case 'N':
1518 	case 'L':
1519 	    while (len-- > 0) {
1520 		U32 au32;
1521 #if U32SIZE > SIZE32
1522 		au32 = 0;
1523 #endif
1524                 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1525 		if (datumtype == 'N')
1526 		    au32 = PerlSock_ntohl(au32);
1527 		if (datumtype == 'V')
1528 		    au32 = vtohl(au32);
1529 		if (!checksum)
1530 		    mPUSHu(au32);
1531 		else if (checksum > bits_in_uv)
1532 		    cdouble += (NV)au32;
1533 		else
1534 		    cuv += au32;
1535 	    }
1536 	    break;
1537 	case 'V' | TYPE_IS_SHRIEKING:
1538 	case 'N' | TYPE_IS_SHRIEKING:
1539 	    while (len-- > 0) {
1540 		I32 ai32;
1541 #if U32SIZE > SIZE32
1542 		ai32 = 0;
1543 #endif
1544                 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1545                 /* There should never be any byte swapping here.  */
1546                 assert(!TYPE_ENDIANNESS(datumtype));
1547 		if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1548 		    ai32 = (I32)PerlSock_ntohl((U32)ai32);
1549 		if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1550 		    ai32 = (I32)vtohl((U32)ai32);
1551 		if (!checksum)
1552 		    mPUSHi(ai32);
1553 		else if (checksum > bits_in_uv)
1554 		    cdouble += (NV)ai32;
1555 		else
1556 		    cuv += ai32;
1557 	    }
1558 	    break;
1559 	case 'p':
1560 	    while (len-- > 0) {
1561 		const char *aptr;
1562                 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1563 		/* newSVpv generates undef if aptr is NULL */
1564 		mPUSHs(newSVpv(aptr, 0));
1565 	    }
1566 	    break;
1567 	case 'w':
1568 	    {
1569 		UV auv = 0;
1570 		U32 bytes = 0;
1571 
1572 		while (len > 0 && s < strend) {
1573 		    U8 ch;
1574 		    ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1575 		    auv = (auv << 7) | (ch & 0x7f);
1576                     /* UTF8_IS_XXXXX not right here because this is a BER, not
1577                      * UTF-8 format - using constant 0x80 */
1578 		    if (ch < 0x80) {
1579 			bytes = 0;
1580 			mPUSHu(auv);
1581 			len--;
1582 			auv = 0;
1583 			continue;
1584 		    }
1585 		    if (++bytes >= sizeof(UV)) {	/* promote to string */
1586 			const char *t;
1587 
1588 			sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1589 			while (s < strend) {
1590 			    ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1591 			    sv = mul128(sv, (U8)(ch & 0x7f));
1592 			    if (!(ch & 0x80)) {
1593 				bytes = 0;
1594 				break;
1595 			    }
1596 			}
1597 			t = SvPV_nolen_const(sv);
1598 			while (*t == '0')
1599 			    t++;
1600 			sv_chop(sv, t);
1601 			mPUSHs(sv);
1602 			len--;
1603 			auv = 0;
1604 		    }
1605 		}
1606 		if ((s >= strend) && bytes)
1607 		    Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1608 	    }
1609 	    break;
1610 	case 'P':
1611 	    if (symptr->howlen == e_star)
1612 	        Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1613 	    EXTEND(SP, 1);
1614 	    if (s + sizeof(char*) <= strend) {
1615 		char *aptr;
1616                 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1617 		/* newSVpvn generates undef if aptr is NULL */
1618 		PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1619 	    }
1620 	    break;
1621 #if defined(HAS_QUAD) && IVSIZE >= 8
1622 	case 'q':
1623 	    while (len-- > 0) {
1624 		Quad_t aquad;
1625                 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1626 		if (!checksum)
1627                     mPUSHs(newSViv((IV)aquad));
1628 		else if (checksum > bits_in_uv)
1629 		    cdouble += (NV)aquad;
1630 		else
1631 		    cuv += aquad;
1632 	    }
1633 	    break;
1634 	case 'Q':
1635 	    while (len-- > 0) {
1636 		Uquad_t auquad;
1637                 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1638 		if (!checksum)
1639 		    mPUSHs(newSVuv((UV)auquad));
1640 		else if (checksum > bits_in_uv)
1641 		    cdouble += (NV)auquad;
1642 		else
1643 		    cuv += auquad;
1644 	    }
1645 	    break;
1646 #endif
1647 	/* float and double added gnb@melba.bby.oz.au 22/11/89 */
1648 	case 'f':
1649 	    while (len-- > 0) {
1650 		float afloat;
1651                 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1652 		if (!checksum)
1653 		    mPUSHn(afloat);
1654 		else
1655 		    cdouble += afloat;
1656 	    }
1657 	    break;
1658 	case 'd':
1659 	    while (len-- > 0) {
1660 		double adouble;
1661                 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1662 		if (!checksum)
1663 		    mPUSHn(adouble);
1664 		else
1665 		    cdouble += adouble;
1666 	    }
1667 	    break;
1668 	case 'F':
1669 	    while (len-- > 0) {
1670 		NV_bytes anv;
1671                 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1672                             datumtype, needs_swap);
1673 		if (!checksum)
1674 		    mPUSHn(anv.nv);
1675 		else
1676 		    cdouble += anv.nv;
1677 	    }
1678 	    break;
1679 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1680 	case 'D':
1681 	    while (len-- > 0) {
1682 		ld_bytes aldouble;
1683                 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1684                             sizeof(aldouble.bytes), datumtype, needs_swap);
1685                 /* The most common long double format, the x86 80-bit
1686                  * extended precision, has either 2 or 6 unused bytes,
1687                  * which may contain garbage, which may contain
1688                  * unintentional data.  While we do zero the bytes of
1689                  * the long double data in pack(), here in unpack() we
1690                  * don't, because it's really hard to envision that
1691                  * reading the long double off aldouble would be
1692                  * affected by the unused bytes.
1693                  *
1694                  * Note that trying to unpack 'long doubles' of 'long
1695                  * doubles' packed in another system is in the general
1696                  * case doomed without having more detail. */
1697 		if (!checksum)
1698 		    mPUSHn(aldouble.ld);
1699 		else
1700 		    cdouble += aldouble.ld;
1701 	    }
1702 	    break;
1703 #endif
1704 	case 'u':
1705 	    if (!checksum) {
1706                 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1707 		sv = sv_2mortal(newSV(l));
1708 		if (l) SvPOK_on(sv);
1709 	    }
1710 
1711             /* Note that all legal uuencoded strings are ASCII printables, so
1712              * have the same representation under UTF-8 vs not.  This means we
1713              * can ignore UTF8ness on legal input.  For illegal we stop at the
1714              * first failure, and don't report where/what that is, so again we
1715              * can ignore UTF8ness */
1716 
1717             while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1718                 I32 a, b, c, d;
1719                 char hunk[3];
1720 
1721                 len = PL_uudmap[*(U8*)s++] & 077;
1722                 while (len > 0) {
1723                     if (s < strend && ISUUCHAR(*s))
1724                         a = PL_uudmap[*(U8*)s++] & 077;
1725                     else
1726                         a = 0;
1727                     if (s < strend && ISUUCHAR(*s))
1728                         b = PL_uudmap[*(U8*)s++] & 077;
1729                     else
1730                         b = 0;
1731                     if (s < strend && ISUUCHAR(*s))
1732                         c = PL_uudmap[*(U8*)s++] & 077;
1733                     else
1734                         c = 0;
1735                     if (s < strend && ISUUCHAR(*s))
1736                         d = PL_uudmap[*(U8*)s++] & 077;
1737                     else
1738                         d = 0;
1739                     hunk[0] = (char)((a << 2) | (b >> 4));
1740                     hunk[1] = (char)((b << 4) | (c >> 2));
1741                     hunk[2] = (char)((c << 6) | d);
1742                     if (!checksum)
1743                         sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1744                     len -= 3;
1745                 }
1746                 if (*s == '\n')
1747                     s++;
1748                 else	/* possible checksum byte */
1749                     if (s + 1 < strend && s[1] == '\n')
1750                         s += 2;
1751             }
1752 	    if (!checksum)
1753 		XPUSHs(sv);
1754 	    break;
1755 	} /* End of switch */
1756 
1757 	if (checksum) {
1758 	    if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1759 	      (checksum > bits_in_uv &&
1760 	       strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1761 		NV trouble, anv;
1762 
1763                 anv = (NV) (1 << (checksum & 15));
1764 		while (checksum >= 16) {
1765 		    checksum -= 16;
1766 		    anv *= 65536.0;
1767 		}
1768 		while (cdouble < 0.0)
1769 		    cdouble += anv;
1770 		cdouble = Perl_modf(cdouble / anv, &trouble);
1771 #ifdef LONGDOUBLE_DOUBLEDOUBLE
1772                 /* Workaround for powerpc doubledouble modfl bug:
1773                  * close to 1.0L and -1.0L cdouble is 0, and trouble
1774                  * is cdouble / anv. */
1775                 if (trouble != Perl_ceil(trouble)) {
1776                   cdouble = trouble;
1777                   if (cdouble >  1.0L) cdouble -= 1.0L;
1778                   if (cdouble < -1.0L) cdouble += 1.0L;
1779                 }
1780 #endif
1781                 cdouble *= anv;
1782 		sv = newSVnv(cdouble);
1783 	    }
1784 	    else {
1785 		if (checksum < bits_in_uv) {
1786 		    UV mask = ((UV)1 << checksum) - 1;
1787 		    cuv &= mask;
1788 		}
1789 		sv = newSVuv(cuv);
1790 	    }
1791 	    mXPUSHs(sv);
1792 	    checksum = 0;
1793 	}
1794 
1795         if (symptr->flags & FLAG_SLASH){
1796             if (SP - PL_stack_base - start_sp_offset <= 0)
1797 		break;
1798             if( next_symbol(symptr) ){
1799               if( symptr->howlen == e_number )
1800 		Perl_croak(aTHX_ "Count after length/code in unpack" );
1801               if( beyond ){
1802          	/* ...end of char buffer then no decent length available */
1803 		Perl_croak(aTHX_ "length/code after end of string in unpack" );
1804               } else {
1805          	/* take top of stack (hope it's numeric) */
1806                 len = POPi;
1807                 if( len < 0 )
1808                     Perl_croak(aTHX_ "Negative '/' count in unpack" );
1809               }
1810             } else {
1811 		Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1812             }
1813             datumtype = symptr->code;
1814             explicit_length = FALSE;
1815 	    goto redo_switch;
1816         }
1817     }
1818 
1819     if (new_s)
1820 	*new_s = s;
1821     PUTBACK;
1822     return SP - PL_stack_base - start_sp_offset;
1823 }
1824 
1825 PP(pp_unpack)
1826 {
1827     dSP;
1828     dPOPPOPssrl;
1829     U8 gimme = GIMME_V;
1830     STRLEN llen;
1831     STRLEN rlen;
1832     const char *pat = SvPV_const(left,  llen);
1833     const char *s   = SvPV_const(right, rlen);
1834     const char *strend = s + rlen;
1835     const char *patend = pat + llen;
1836     I32 cnt;
1837 
1838     PUTBACK;
1839     cnt = unpackstring(pat, patend, s, strend,
1840 		     ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1841 		     | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1842 
1843     SPAGAIN;
1844     if ( !cnt && gimme == G_SCALAR )
1845        PUSHs(&PL_sv_undef);
1846     RETURN;
1847 }
1848 
1849 STATIC U8 *
1850 doencodes(U8 *h, const U8 *s, I32 len)
1851 {
1852     *h++ = PL_uuemap[len];
1853     while (len > 2) {
1854 	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
1855 	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1856 	*h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1857 	*h++ = PL_uuemap[(077 & (s[2] & 077))];
1858 	s += 3;
1859 	len -= 3;
1860     }
1861     if (len > 0) {
1862         const U8 r = (len > 1 ? s[1] : '\0');
1863 	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
1864 	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1865 	*h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1866 	*h++ = PL_uuemap[0];
1867     }
1868     *h++ = '\n';
1869     return h;
1870 }
1871 
1872 STATIC SV *
1873 S_is_an_int(pTHX_ const char *s, STRLEN l)
1874 {
1875   SV *result = newSVpvn(s, l);
1876   char *const result_c = SvPV_nolen(result);	/* convenience */
1877   char *out = result_c;
1878   bool skip = 1;
1879   bool ignore = 0;
1880 
1881   PERL_ARGS_ASSERT_IS_AN_INT;
1882 
1883   while (*s) {
1884     switch (*s) {
1885     case ' ':
1886       break;
1887     case '+':
1888       if (!skip) {
1889 	SvREFCNT_dec(result);
1890 	return (NULL);
1891       }
1892       break;
1893     case '0':
1894     case '1':
1895     case '2':
1896     case '3':
1897     case '4':
1898     case '5':
1899     case '6':
1900     case '7':
1901     case '8':
1902     case '9':
1903       skip = 0;
1904       if (!ignore) {
1905 	*(out++) = *s;
1906       }
1907       break;
1908     case '.':
1909       ignore = 1;
1910       break;
1911     default:
1912       SvREFCNT_dec(result);
1913       return (NULL);
1914     }
1915     s++;
1916   }
1917   *(out++) = '\0';
1918   SvCUR_set(result, out - result_c);
1919   return (result);
1920 }
1921 
1922 /* pnum must be '\0' terminated */
1923 STATIC int
1924 S_div128(pTHX_ SV *pnum, bool *done)
1925 {
1926     STRLEN len;
1927     char * const s = SvPV(pnum, len);
1928     char *t = s;
1929     int m = 0;
1930 
1931     PERL_ARGS_ASSERT_DIV128;
1932 
1933     *done = 1;
1934     while (*t) {
1935 	const int i = m * 10 + (*t - '0');
1936 	const int r = (i >> 7); /* r < 10 */
1937 	m = i & 0x7F;
1938 	if (r) {
1939 	    *done = 0;
1940 	}
1941 	*(t++) = '0' + r;
1942     }
1943     *(t++) = '\0';
1944     SvCUR_set(pnum, (STRLEN) (t - s));
1945     return (m);
1946 }
1947 
1948 /*
1949 =for apidoc packlist
1950 
1951 The engine implementing C<pack()> Perl function.
1952 
1953 =cut
1954 */
1955 
1956 void
1957 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1958 {
1959     tempsym_t sym;
1960 
1961     PERL_ARGS_ASSERT_PACKLIST;
1962 
1963     TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1964 
1965     /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1966        Also make sure any UTF8 flag is loaded */
1967     SvPV_force_nolen(cat);
1968     if (DO_UTF8(cat))
1969 	sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1970 
1971     (void)pack_rec( cat, &sym, beglist, endlist );
1972 }
1973 
1974 /* like sv_utf8_upgrade, but also repoint the group start markers */
1975 STATIC void
1976 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1977     STRLEN len;
1978     tempsym_t *group;
1979     const char *from_ptr, *from_start, *from_end, **marks, **m;
1980     char *to_start, *to_ptr;
1981 
1982     if (SvUTF8(sv)) return;
1983 
1984     from_start = SvPVX_const(sv);
1985     from_end = from_start + SvCUR(sv);
1986     for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
1987 	if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
1988     if (from_ptr == from_end) {
1989 	/* Simple case: no character needs to be changed */
1990 	SvUTF8_on(sv);
1991 	return;
1992     }
1993 
1994     len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
1995     Newx(to_start, len, char);
1996     Copy(from_start, to_start, from_ptr-from_start, char);
1997     to_ptr = to_start + (from_ptr-from_start);
1998 
1999     Newx(marks, sym_ptr->level+2, const char *);
2000     for (group=sym_ptr; group; group = group->previous)
2001 	marks[group->level] = from_start + group->strbeg;
2002     marks[sym_ptr->level+1] = from_end+1;
2003     for (m = marks; *m < from_ptr; m++)
2004 	*m = to_start + (*m-from_start);
2005 
2006     for (;from_ptr < from_end; from_ptr++) {
2007 	while (*m == from_ptr) *m++ = to_ptr;
2008 	to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2009     }
2010     *to_ptr = 0;
2011 
2012     while (*m == from_ptr) *m++ = to_ptr;
2013     if (m != marks + sym_ptr->level+1) {
2014 	Safefree(marks);
2015 	Safefree(to_start);
2016 	Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2017 		   "level=%d", m, marks, sym_ptr->level);
2018     }
2019     for (group=sym_ptr; group; group = group->previous)
2020 	group->strbeg = marks[group->level] - to_start;
2021     Safefree(marks);
2022 
2023     if (SvOOK(sv)) {
2024 	if (SvIVX(sv)) {
2025 	    SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2026 	    from_start -= SvIVX(sv);
2027 	    SvIV_set(sv, 0);
2028 	}
2029 	SvFLAGS(sv) &= ~SVf_OOK;
2030     }
2031     if (SvLEN(sv) != 0)
2032 	Safefree(from_start);
2033     SvPV_set(sv, to_start);
2034     SvCUR_set(sv, to_ptr - to_start);
2035     SvLEN_set(sv, len);
2036     SvUTF8_on(sv);
2037 }
2038 
2039 /* Exponential string grower. Makes string extension effectively O(n)
2040    needed says how many extra bytes we need (not counting the final '\0')
2041    Only grows the string if there is an actual lack of space
2042 */
2043 STATIC char *
2044 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2045     const STRLEN cur = SvCUR(sv);
2046     const STRLEN len = SvLEN(sv);
2047     STRLEN extend;
2048 
2049     PERL_ARGS_ASSERT_SV_EXP_GROW;
2050 
2051     if (len - cur > needed) return SvPVX(sv);
2052     extend = needed > len ? needed : len;
2053     return SvGROW(sv, len+extend+1);
2054 }
2055 
2056 static SV *
2057 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
2058 {
2059     SvGETMAGIC(sv);
2060     if (UNLIKELY(SvAMAGIC(sv)))
2061 	sv = sv_2num(sv);
2062     if (UNLIKELY(isinfnansv(sv))) {
2063 	const I32 c = TYPE_NO_MODIFIERS(datumtype);
2064 	const NV nv = SvNV_nomg(sv);
2065 	if (c == 'w')
2066 	    Perl_croak(aTHX_ "Cannot compress %"NVgf" in pack", nv);
2067 	else
2068 	    Perl_croak(aTHX_ "Cannot pack %"NVgf" with '%c'", nv, (int) c);
2069     }
2070     return sv;
2071 }
2072 
2073 #define SvIV_no_inf(sv,d) \
2074 	((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2075 #define SvUV_no_inf(sv,d) \
2076 	((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
2077 
2078 STATIC
2079 SV **
2080 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2081 {
2082     tempsym_t lookahead;
2083     I32 items  = endlist - beglist;
2084     bool found = next_symbol(symptr);
2085     bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2086     bool warn_utf8 = ckWARN(WARN_UTF8);
2087     char* from;
2088 
2089     PERL_ARGS_ASSERT_PACK_REC;
2090 
2091     if (symptr->level == 0 && found && symptr->code == 'U') {
2092 	marked_upgrade(aTHX_ cat, symptr);
2093 	symptr->flags |= FLAG_DO_UTF8;
2094 	utf8 = 0;
2095     }
2096     symptr->strbeg = SvCUR(cat);
2097 
2098     while (found) {
2099 	SV *fromstr;
2100 	STRLEN fromlen;
2101 	I32 len;
2102 	SV *lengthcode = NULL;
2103         I32 datumtype = symptr->code;
2104         howlen_t howlen = symptr->howlen;
2105 	char *start = SvPVX(cat);
2106 	char *cur   = start + SvCUR(cat);
2107         bool needs_swap;
2108 
2109 #define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
2110 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
2111 
2112         switch (howlen) {
2113 	  case e_star:
2114 	    len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2115 		0 : items;
2116 	    break;
2117 	  default:
2118 	    /* e_no_len and e_number */
2119 	    len = symptr->length;
2120 	    break;
2121         }
2122 
2123 	if (len) {
2124 	    packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2125 
2126 	    if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2127 		/* We can process this letter. */
2128 		STRLEN size = props & PACK_SIZE_MASK;
2129 		GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2130 	    }
2131         }
2132 
2133         /* Look ahead for next symbol. Do we have code/code? */
2134         lookahead = *symptr;
2135         found = next_symbol(&lookahead);
2136 	if (symptr->flags & FLAG_SLASH) {
2137 	    IV count;
2138 	    if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2139 	    if (strchr("aAZ", lookahead.code)) {
2140 		if (lookahead.howlen == e_number) count = lookahead.length;
2141 		else {
2142 		    if (items > 0) {
2143 			count = sv_len_utf8(*beglist);
2144 		    }
2145 		    else count = 0;
2146 		    if (lookahead.code == 'Z') count++;
2147 		}
2148 	    } else {
2149 		if (lookahead.howlen == e_number && lookahead.length < items)
2150 		    count = lookahead.length;
2151 		else count = items;
2152 	    }
2153 	    lookahead.howlen = e_number;
2154 	    lookahead.length = count;
2155 	    lengthcode = sv_2mortal(newSViv(count));
2156 	}
2157 
2158         needs_swap = NEEDS_SWAP(datumtype);
2159 
2160 	/* Code inside the switch must take care to properly update
2161 	   cat (CUR length and '\0' termination) if it updated *cur and
2162 	   doesn't simply leave using break */
2163 	switch (TYPE_NO_ENDIANNESS(datumtype)) {
2164 	default:
2165 	    Perl_croak(aTHX_ "Invalid type '%c' in pack",
2166 		       (int) TYPE_NO_MODIFIERS(datumtype));
2167 	case '%':
2168 	    Perl_croak(aTHX_ "'%%' may not be used in pack");
2169 
2170 	case '.' | TYPE_IS_SHRIEKING:
2171 	case '.':
2172 	    if (howlen == e_star) from = start;
2173 	    else if (len == 0) from = cur;
2174 	    else {
2175 		tempsym_t *group = symptr;
2176 
2177 		while (--len && group) group = group->previous;
2178 		from = group ? start + group->strbeg : start;
2179 	    }
2180 	    fromstr = NEXTFROM;
2181 	    len = SvIV_no_inf(fromstr, datumtype);
2182 	    goto resize;
2183 	case '@' | TYPE_IS_SHRIEKING:
2184 	case '@':
2185 	    from = start + symptr->strbeg;
2186 	  resize:
2187 	    if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
2188 		if (len >= 0) {
2189 		    while (len && from < cur) {
2190 			from += UTF8SKIP(from);
2191 			len--;
2192 		    }
2193 		    if (from > cur)
2194 			Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2195 		    if (len) {
2196 			/* Here we know from == cur */
2197 		      grow:
2198 			GROWING(0, cat, start, cur, len);
2199 			Zero(cur, len, char);
2200 			cur += len;
2201 		    } else if (from < cur) {
2202 			len = cur - from;
2203 			goto shrink;
2204 		    } else goto no_change;
2205 		} else {
2206 		    cur = from;
2207 		    len = -len;
2208 		    goto utf8_shrink;
2209 		}
2210 	    else {
2211 		len -= cur - from;
2212 		if (len > 0) goto grow;
2213 		if (len == 0) goto no_change;
2214 		len = -len;
2215 		goto shrink;
2216 	    }
2217 	    break;
2218 
2219 	case '(': {
2220             tempsym_t savsym = *symptr;
2221 	    U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2222 	    symptr->flags |= group_modifiers;
2223             symptr->patend = savsym.grpend;
2224             symptr->level++;
2225 	    symptr->previous = &lookahead;
2226 	    while (len--) {
2227 		U32 was_utf8;
2228 		if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
2229 		else      symptr->flags &= ~FLAG_PARSE_UTF8;
2230 		was_utf8 = SvUTF8(cat);
2231   	        symptr->patptr = savsym.grpbeg;
2232 		beglist = pack_rec(cat, symptr, beglist, endlist);
2233 		if (SvUTF8(cat) != was_utf8)
2234 		    /* This had better be an upgrade while in utf8==0 mode */
2235 		    utf8 = 1;
2236 
2237 		if (savsym.howlen == e_star && beglist == endlist)
2238 		    break;		/* No way to continue */
2239 	    }
2240 	    items = endlist - beglist;
2241 	    lookahead.flags  = symptr->flags & ~group_modifiers;
2242 	    goto no_change;
2243 	}
2244 	case 'X' | TYPE_IS_SHRIEKING:
2245 	    if (!len)			/* Avoid division by 0 */
2246 		len = 1;
2247 	    if (utf8) {
2248 		char *hop, *last;
2249 		I32 l = len;
2250 		hop = last = start;
2251 		while (hop < cur) {
2252 		    hop += UTF8SKIP(hop);
2253 		    if (--l == 0) {
2254 			last = hop;
2255 			l = len;
2256 		    }
2257 		}
2258 		if (last > cur)
2259 		    Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2260 		cur = last;
2261 		break;
2262 	    }
2263 	    len = (cur-start) % len;
2264 	    /* FALLTHROUGH */
2265 	case 'X':
2266 	    if (utf8) {
2267 		if (len < 1) goto no_change;
2268 	      utf8_shrink:
2269 		while (len > 0) {
2270 		    if (cur <= start)
2271 			Perl_croak(aTHX_ "'%c' outside of string in pack",
2272 				   (int) TYPE_NO_MODIFIERS(datumtype));
2273 		    while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2274 			if (cur <= start)
2275 			    Perl_croak(aTHX_ "'%c' outside of string in pack",
2276 				       (int) TYPE_NO_MODIFIERS(datumtype));
2277 		    }
2278 		    len--;
2279 		}
2280 	    } else {
2281 	      shrink:
2282 		if (cur - start < len)
2283 		    Perl_croak(aTHX_ "'%c' outside of string in pack",
2284 			       (int) TYPE_NO_MODIFIERS(datumtype));
2285 		cur -= len;
2286 	    }
2287 	    if (cur < start+symptr->strbeg) {
2288 		/* Make sure group starts don't point into the void */
2289 		tempsym_t *group;
2290 		const STRLEN length = cur-start;
2291 		for (group = symptr;
2292 		     group && length < group->strbeg;
2293 		     group = group->previous) group->strbeg = length;
2294 		lookahead.strbeg = length;
2295 	    }
2296 	    break;
2297 	case 'x' | TYPE_IS_SHRIEKING: {
2298 	    I32 ai32;
2299 	    if (!len)			/* Avoid division by 0 */
2300 		len = 1;
2301 	    if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2302 	    else      ai32 = (cur - start) % len;
2303 	    if (ai32 == 0) goto no_change;
2304 	    len -= ai32;
2305 	}
2306 	/* FALLTHROUGH */
2307 	case 'x':
2308 	    goto grow;
2309 	case 'A':
2310 	case 'Z':
2311 	case 'a': {
2312 	    const char *aptr;
2313 
2314 	    fromstr = NEXTFROM;
2315 	    aptr = SvPV_const(fromstr, fromlen);
2316 	    if (DO_UTF8(fromstr)) {
2317                 const char *end, *s;
2318 
2319 		if (!utf8 && !SvUTF8(cat)) {
2320 		    marked_upgrade(aTHX_ cat, symptr);
2321 		    lookahead.flags |= FLAG_DO_UTF8;
2322 		    lookahead.strbeg = symptr->strbeg;
2323 		    utf8 = 1;
2324 		    start = SvPVX(cat);
2325 		    cur = start + SvCUR(cat);
2326 		}
2327 		if (howlen == e_star) {
2328 		    if (utf8) goto string_copy;
2329 		    len = fromlen+1;
2330 		}
2331 		s = aptr;
2332 		end = aptr + fromlen;
2333 		fromlen = datumtype == 'Z' ? len-1 : len;
2334 		while ((I32) fromlen > 0 && s < end) {
2335 		    s += UTF8SKIP(s);
2336 		    fromlen--;
2337 		}
2338 		if (s > end)
2339 		    Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2340 		if (utf8) {
2341 		    len = fromlen;
2342 		    if (datumtype == 'Z') len++;
2343 		    fromlen = s-aptr;
2344 		    len += fromlen;
2345 
2346 		    goto string_copy;
2347 		}
2348 		fromlen = len - fromlen;
2349 		if (datumtype == 'Z') fromlen--;
2350 		if (howlen == e_star) {
2351 		    len = fromlen;
2352 		    if (datumtype == 'Z') len++;
2353 		}
2354 		GROWING(0, cat, start, cur, len);
2355 		if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2356 				  datumtype | TYPE_IS_PACK))
2357 		    Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2358 			       "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2359 			       (int)datumtype, aptr, end, cur, (UV)fromlen);
2360 		cur += fromlen;
2361 		len -= fromlen;
2362 	    } else if (utf8) {
2363 		if (howlen == e_star) {
2364 		    len = fromlen;
2365 		    if (datumtype == 'Z') len++;
2366 		}
2367 		if (len <= (I32) fromlen) {
2368 		    fromlen = len;
2369 		    if (datumtype == 'Z' && fromlen > 0) fromlen--;
2370 		}
2371 		/* assumes a byte expands to at most UTF8_EXPAND bytes on
2372 		   upgrade, so:
2373 		   expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2374 		GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2375 		len -= fromlen;
2376 		while (fromlen > 0) {
2377 		    cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2378 		    aptr++;
2379 		    fromlen--;
2380 		}
2381 	    } else {
2382 	      string_copy:
2383 		if (howlen == e_star) {
2384 		    len = fromlen;
2385 		    if (datumtype == 'Z') len++;
2386 		}
2387 		if (len <= (I32) fromlen) {
2388 		    fromlen = len;
2389 		    if (datumtype == 'Z' && fromlen > 0) fromlen--;
2390 		}
2391 		GROWING(0, cat, start, cur, len);
2392 		Copy(aptr, cur, fromlen, char);
2393 		cur += fromlen;
2394 		len -= fromlen;
2395 	    }
2396 	    memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2397 	    cur += len;
2398 	    SvTAINT(cat);
2399 	    break;
2400 	}
2401 	case 'B':
2402 	case 'b': {
2403 	    const char *str, *end;
2404 	    I32 l, field_len;
2405 	    U8 bits;
2406 	    bool utf8_source;
2407 	    U32 utf8_flags;
2408 
2409 	    fromstr = NEXTFROM;
2410 	    str = SvPV_const(fromstr, fromlen);
2411 	    end = str + fromlen;
2412 	    if (DO_UTF8(fromstr)) {
2413 		utf8_source = TRUE;
2414 		utf8_flags  = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2415 	    } else {
2416 		utf8_source = FALSE;
2417 		utf8_flags  = 0; /* Unused, but keep compilers happy */
2418 	    }
2419 	    if (howlen == e_star) len = fromlen;
2420 	    field_len = (len+7)/8;
2421 	    GROWING(utf8, cat, start, cur, field_len);
2422 	    if (len > (I32)fromlen) len = fromlen;
2423 	    bits = 0;
2424 	    l = 0;
2425 	    if (datumtype == 'B')
2426 		while (l++ < len) {
2427 		    if (utf8_source) {
2428 			UV val = 0;
2429 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2430 			bits |= val & 1;
2431 		    } else bits |= *str++ & 1;
2432 		    if (l & 7) bits <<= 1;
2433 		    else {
2434 			PUSH_BYTE(utf8, cur, bits);
2435 			bits = 0;
2436 		    }
2437 		}
2438 	    else
2439 		/* datumtype == 'b' */
2440 		while (l++ < len) {
2441 		    if (utf8_source) {
2442 			UV val = 0;
2443 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2444 			if (val & 1) bits |= 0x80;
2445 		    } else if (*str++ & 1)
2446 			bits |= 0x80;
2447 		    if (l & 7) bits >>= 1;
2448 		    else {
2449 			PUSH_BYTE(utf8, cur, bits);
2450 			bits = 0;
2451 		    }
2452 		}
2453 	    l--;
2454 	    if (l & 7) {
2455 		if (datumtype == 'B')
2456 		    bits <<= 7 - (l & 7);
2457 		else
2458 		    bits >>= 7 - (l & 7);
2459 		PUSH_BYTE(utf8, cur, bits);
2460 		l += 7;
2461 	    }
2462 	    /* Determine how many chars are left in the requested field */
2463 	    l /= 8;
2464 	    if (howlen == e_star) field_len = 0;
2465 	    else field_len -= l;
2466 	    Zero(cur, field_len, char);
2467 	    cur += field_len;
2468 	    break;
2469 	}
2470 	case 'H':
2471 	case 'h': {
2472 	    const char *str, *end;
2473 	    I32 l, field_len;
2474 	    U8 bits;
2475 	    bool utf8_source;
2476 	    U32 utf8_flags;
2477 
2478 	    fromstr = NEXTFROM;
2479 	    str = SvPV_const(fromstr, fromlen);
2480 	    end = str + fromlen;
2481 	    if (DO_UTF8(fromstr)) {
2482 		utf8_source = TRUE;
2483 		utf8_flags  = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2484 	    } else {
2485 		utf8_source = FALSE;
2486 		utf8_flags  = 0; /* Unused, but keep compilers happy */
2487 	    }
2488 	    if (howlen == e_star) len = fromlen;
2489 	    field_len = (len+1)/2;
2490 	    GROWING(utf8, cat, start, cur, field_len);
2491 	    if (!utf8_source && len > (I32)fromlen) len = fromlen;
2492 	    bits = 0;
2493 	    l = 0;
2494 	    if (datumtype == 'H')
2495 		while (l++ < len) {
2496 		    if (utf8_source) {
2497 			UV val = 0;
2498 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2499 			if (val < 256 && isALPHA(val))
2500 			    bits |= (val + 9) & 0xf;
2501 			else
2502 			    bits |= val & 0xf;
2503 		    } else if (isALPHA(*str))
2504 			bits |= (*str++ + 9) & 0xf;
2505 		    else
2506 			bits |= *str++ & 0xf;
2507 		    if (l & 1) bits <<= 4;
2508 		    else {
2509 			PUSH_BYTE(utf8, cur, bits);
2510 			bits = 0;
2511 		    }
2512 		}
2513 	    else
2514 		while (l++ < len) {
2515 		    if (utf8_source) {
2516 			UV val = 0;
2517 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2518 			if (val < 256 && isALPHA(val))
2519 			    bits |= ((val + 9) & 0xf) << 4;
2520 			else
2521 			    bits |= (val & 0xf) << 4;
2522 		    } else if (isALPHA(*str))
2523 			bits |= ((*str++ + 9) & 0xf) << 4;
2524 		    else
2525 			bits |= (*str++ & 0xf) << 4;
2526 		    if (l & 1) bits >>= 4;
2527 		    else {
2528 			PUSH_BYTE(utf8, cur, bits);
2529 			bits = 0;
2530 		    }
2531 		}
2532 	    l--;
2533 	    if (l & 1) {
2534 		PUSH_BYTE(utf8, cur, bits);
2535 		l++;
2536 	    }
2537 	    /* Determine how many chars are left in the requested field */
2538 	    l /= 2;
2539 	    if (howlen == e_star) field_len = 0;
2540 	    else field_len -= l;
2541 	    Zero(cur, field_len, char);
2542 	    cur += field_len;
2543 	    break;
2544 	}
2545 	case 'c':
2546 	    while (len-- > 0) {
2547 		IV aiv;
2548 		fromstr = NEXTFROM;
2549                 aiv = SvIV_no_inf(fromstr, datumtype);
2550 		if ((-128 > aiv || aiv > 127))
2551 		    Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2552 				   "Character in 'c' format wrapped in pack");
2553 		PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2554 	    }
2555 	    break;
2556 	case 'C':
2557 	    if (len == 0) {
2558 		utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2559 		break;
2560 	    }
2561 	    while (len-- > 0) {
2562 		IV aiv;
2563 		fromstr = NEXTFROM;
2564                 aiv = SvIV_no_inf(fromstr, datumtype);
2565 		if ((0 > aiv || aiv > 0xff))
2566 		    Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2567 				   "Character in 'C' format wrapped in pack");
2568 		PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2569 	    }
2570 	    break;
2571 	case 'W': {
2572 	    char *end;
2573 	    U8 in_bytes = (U8)IN_BYTES;
2574 
2575 	    end = start+SvLEN(cat)-1;
2576 	    if (utf8) end -= UTF8_MAXLEN-1;
2577 	    while (len-- > 0) {
2578 		UV auv;
2579 		fromstr = NEXTFROM;
2580 		auv = SvUV_no_inf(fromstr, datumtype);
2581 		if (in_bytes) auv = auv % 0x100;
2582 		if (utf8) {
2583 		  W_utf8:
2584 		    if (cur > end) {
2585 			*cur = '\0';
2586 			SvCUR_set(cat, cur - start);
2587 
2588 			GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2589 			end = start+SvLEN(cat)-UTF8_MAXLEN;
2590 		    }
2591 		    cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2592 						       auv,
2593 						       warn_utf8 ?
2594 						       0 : UNICODE_ALLOW_ANY);
2595 		} else {
2596 		    if (auv >= 0x100) {
2597 			if (!SvUTF8(cat)) {
2598 			    *cur = '\0';
2599 			    SvCUR_set(cat, cur - start);
2600 			    marked_upgrade(aTHX_ cat, symptr);
2601 			    lookahead.flags |= FLAG_DO_UTF8;
2602 			    lookahead.strbeg = symptr->strbeg;
2603 			    utf8 = 1;
2604 			    start = SvPVX(cat);
2605 			    cur = start + SvCUR(cat);
2606 			    end = start+SvLEN(cat)-UTF8_MAXLEN;
2607 			    goto W_utf8;
2608 			}
2609 			Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2610 				       "Character in 'W' format wrapped in pack");
2611 			auv &= 0xff;
2612 		    }
2613 		    if (cur >= end) {
2614 			*cur = '\0';
2615 			SvCUR_set(cat, cur - start);
2616 			GROWING(0, cat, start, cur, len+1);
2617 			end = start+SvLEN(cat)-1;
2618 		    }
2619 		    *(U8 *) cur++ = (U8)auv;
2620 		}
2621 	    }
2622 	    break;
2623 	}
2624 	case 'U': {
2625 	    char *end;
2626 
2627 	    if (len == 0) {
2628 		if (!(symptr->flags & FLAG_DO_UTF8)) {
2629 		    marked_upgrade(aTHX_ cat, symptr);
2630 		    lookahead.flags |= FLAG_DO_UTF8;
2631 		    lookahead.strbeg = symptr->strbeg;
2632 		}
2633 		utf8 = 0;
2634 		goto no_change;
2635 	    }
2636 
2637 	    end = start+SvLEN(cat);
2638 	    if (!utf8) end -= UTF8_MAXLEN;
2639 	    while (len-- > 0) {
2640 		UV auv;
2641 		fromstr = NEXTFROM;
2642 		auv = SvUV_no_inf(fromstr, datumtype);
2643 		if (utf8) {
2644 		    U8 buffer[UTF8_MAXLEN], *endb;
2645 		    endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv),
2646 					       warn_utf8 ?
2647 					       0 : UNICODE_ALLOW_ANY);
2648 		    if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2649 			*cur = '\0';
2650 			SvCUR_set(cat, cur - start);
2651 			GROWING(0, cat, start, cur,
2652 				len+(endb-buffer)*UTF8_EXPAND);
2653 			end = start+SvLEN(cat);
2654 		    }
2655                     cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
2656 		} else {
2657 		    if (cur >= end) {
2658 			*cur = '\0';
2659 			SvCUR_set(cat, cur - start);
2660 			GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2661 			end = start+SvLEN(cat)-UTF8_MAXLEN;
2662 		    }
2663 		    cur = (char *) uvchr_to_utf8_flags((U8 *) cur, UNI_TO_NATIVE(auv),
2664 						       warn_utf8 ?
2665 						       0 : UNICODE_ALLOW_ANY);
2666 		}
2667 	    }
2668 	    break;
2669 	}
2670 	/* Float and double added by gnb@melba.bby.oz.au  22/11/89 */
2671 	case 'f':
2672 	    while (len-- > 0) {
2673 		float afloat;
2674 		NV anv;
2675 		fromstr = NEXTFROM;
2676 		anv = SvNV(fromstr);
2677 # if defined(VMS) && !defined(_IEEE_FP)
2678 		/* IEEE fp overflow shenanigans are unavailable on VAX and optional
2679 		 * on Alpha; fake it if we don't have them.
2680 		 */
2681 		if (anv > FLT_MAX)
2682 		    afloat = FLT_MAX;
2683 		else if (anv < -FLT_MAX)
2684 		    afloat = -FLT_MAX;
2685 		else afloat = (float)anv;
2686 # else
2687 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2688 		if(Perl_isnan(anv))
2689 		    afloat = (float)NV_NAN;
2690 		else
2691 #endif
2692                 /* a simple cast to float is undefined if outside
2693                  * the range of values that can be represented */
2694 		afloat = (float)(anv >  FLT_MAX ?  NV_INF :
2695                                  anv < -FLT_MAX ? -NV_INF : anv);
2696 # endif
2697                 PUSH_VAR(utf8, cur, afloat, needs_swap);
2698 	    }
2699 	    break;
2700 	case 'd':
2701 	    while (len-- > 0) {
2702 		double adouble;
2703 		NV anv;
2704 		fromstr = NEXTFROM;
2705 		anv = SvNV(fromstr);
2706 # if defined(VMS) && !defined(_IEEE_FP)
2707 		/* IEEE fp overflow shenanigans are unavailable on VAX and optional
2708 		 * on Alpha; fake it if we don't have them.
2709 		 */
2710 		if (anv > DBL_MAX)
2711 		    adouble = DBL_MAX;
2712 		else if (anv < -DBL_MAX)
2713 		    adouble = -DBL_MAX;
2714 		else adouble = (double)anv;
2715 # else
2716 		adouble = (double)anv;
2717 # endif
2718                 PUSH_VAR(utf8, cur, adouble, needs_swap);
2719 	    }
2720 	    break;
2721 	case 'F': {
2722 	    NV_bytes anv;
2723 	    Zero(&anv, 1, NV); /* can be long double with unused bits */
2724 	    while (len-- > 0) {
2725 		fromstr = NEXTFROM;
2726 #ifdef __GNUC__
2727 		/* to work round a gcc/x86 bug; don't use SvNV */
2728 		anv.nv = sv_2nv(fromstr);
2729 #    if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2730          && LONG_DOUBLESIZE > 10
2731                 /* GCC sometimes overwrites the padding in the
2732                    assignment above */
2733                 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2734 #    endif
2735 #else
2736 		anv.nv = SvNV(fromstr);
2737 #endif
2738                 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2739 	    }
2740 	    break;
2741 	}
2742 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2743 	case 'D': {
2744 	    ld_bytes aldouble;
2745 	    /* long doubles can have unused bits, which may be nonzero */
2746 	    Zero(&aldouble, 1, long double);
2747 	    while (len-- > 0) {
2748 		fromstr = NEXTFROM;
2749 #  ifdef __GNUC__
2750 		/* to work round a gcc/x86 bug; don't use SvNV */
2751 		aldouble.ld = (long double)sv_2nv(fromstr);
2752 #    if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2753                 /* GCC sometimes overwrites the padding in the
2754                    assignment above */
2755                 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2756 #    endif
2757 #  else
2758 		aldouble.ld = (long double)SvNV(fromstr);
2759 #  endif
2760                 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2761                            needs_swap);
2762 	    }
2763 	    break;
2764 	}
2765 #endif
2766 	case 'n' | TYPE_IS_SHRIEKING:
2767 	case 'n':
2768 	    while (len-- > 0) {
2769 		I16 ai16;
2770 		fromstr = NEXTFROM;
2771 		ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2772 		ai16 = PerlSock_htons(ai16);
2773                 PUSH16(utf8, cur, &ai16, FALSE);
2774 	    }
2775 	    break;
2776 	case 'v' | TYPE_IS_SHRIEKING:
2777 	case 'v':
2778 	    while (len-- > 0) {
2779 		I16 ai16;
2780 		fromstr = NEXTFROM;
2781 		ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2782 		ai16 = htovs(ai16);
2783                 PUSH16(utf8, cur, &ai16, FALSE);
2784 	    }
2785 	    break;
2786         case 'S' | TYPE_IS_SHRIEKING:
2787 #if SHORTSIZE != SIZE16
2788 	    while (len-- > 0) {
2789 		unsigned short aushort;
2790 		fromstr = NEXTFROM;
2791 		aushort = SvUV_no_inf(fromstr, datumtype);
2792                 PUSH_VAR(utf8, cur, aushort, needs_swap);
2793 	    }
2794             break;
2795 #else
2796             /* FALLTHROUGH */
2797 #endif
2798 	case 'S':
2799 	    while (len-- > 0) {
2800 		U16 au16;
2801 		fromstr = NEXTFROM;
2802 		au16 = (U16)SvUV_no_inf(fromstr, datumtype);
2803                 PUSH16(utf8, cur, &au16, needs_swap);
2804 	    }
2805 	    break;
2806 	case 's' | TYPE_IS_SHRIEKING:
2807 #if SHORTSIZE != SIZE16
2808 	    while (len-- > 0) {
2809 		short ashort;
2810 		fromstr = NEXTFROM;
2811 		ashort = SvIV_no_inf(fromstr, datumtype);
2812                 PUSH_VAR(utf8, cur, ashort, needs_swap);
2813 	    }
2814             break;
2815 #else
2816             /* FALLTHROUGH */
2817 #endif
2818 	case 's':
2819 	    while (len-- > 0) {
2820 		I16 ai16;
2821 		fromstr = NEXTFROM;
2822 		ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2823                 PUSH16(utf8, cur, &ai16, needs_swap);
2824 	    }
2825 	    break;
2826 	case 'I':
2827 	case 'I' | TYPE_IS_SHRIEKING:
2828 	    while (len-- > 0) {
2829 		unsigned int auint;
2830 		fromstr = NEXTFROM;
2831 		auint = SvUV_no_inf(fromstr, datumtype);
2832                 PUSH_VAR(utf8, cur, auint, needs_swap);
2833 	    }
2834 	    break;
2835 	case 'j':
2836 	    while (len-- > 0) {
2837 		IV aiv;
2838 		fromstr = NEXTFROM;
2839 		aiv = SvIV_no_inf(fromstr, datumtype);
2840                 PUSH_VAR(utf8, cur, aiv, needs_swap);
2841 	    }
2842 	    break;
2843 	case 'J':
2844 	    while (len-- > 0) {
2845 		UV auv;
2846 		fromstr = NEXTFROM;
2847 		auv = SvUV_no_inf(fromstr, datumtype);
2848                 PUSH_VAR(utf8, cur, auv, needs_swap);
2849 	    }
2850 	    break;
2851 	case 'w':
2852             while (len-- > 0) {
2853 		NV anv;
2854 		fromstr = NEXTFROM;
2855 		S_sv_check_infnan(aTHX_ fromstr, datumtype);
2856 		anv = SvNV_nomg(fromstr);
2857 
2858 		if (anv < 0) {
2859 		    *cur = '\0';
2860 		    SvCUR_set(cat, cur - start);
2861 		    Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2862 		}
2863 
2864                 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2865                    which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2866                    any negative IVs will have already been got by the croak()
2867                    above. IOK is untrue for fractions, so we test them
2868                    against UV_MAX_P1.  */
2869 		if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2870 		    char   buf[(sizeof(UV)*CHAR_BIT)/7+1];
2871 		    char  *in = buf + sizeof(buf);
2872 		    UV     auv = SvUV_nomg(fromstr);
2873 
2874 		    do {
2875 			*--in = (char)((auv & 0x7f) | 0x80);
2876 			auv >>= 7;
2877 		    } while (auv);
2878 		    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2879 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
2880 				       in, (buf + sizeof(buf)) - in);
2881 		} else if (SvPOKp(fromstr))
2882 		    goto w_string;
2883 		else if (SvNOKp(fromstr)) {
2884 		    /* 10**NV_MAX_10_EXP is the largest power of 10
2885 		       so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2886 		       given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2887 		       x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2888 		       And with that many bytes only Inf can overflow.
2889 		       Some C compilers are strict about integral constant
2890 		       expressions so we conservatively divide by a slightly
2891 		       smaller integer instead of multiplying by the exact
2892 		       floating-point value.
2893 		    */
2894 #ifdef NV_MAX_10_EXP
2895 		    /* char   buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2896 		    char   buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2897 #else
2898 		    /* char   buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2899 		    char   buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2900 #endif
2901 		    char  *in = buf + sizeof(buf);
2902 
2903 		    anv = Perl_floor(anv);
2904 		    do {
2905 			const NV next = Perl_floor(anv / 128);
2906 			if (in <= buf)  /* this cannot happen ;-) */
2907 			    Perl_croak(aTHX_ "Cannot compress integer in pack");
2908 			*--in = (unsigned char)(anv - (next * 128)) | 0x80;
2909 			anv = next;
2910 		    } while (anv > 0);
2911 		    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2912 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
2913 				       in, (buf + sizeof(buf)) - in);
2914 		} else {
2915 		    const char     *from;
2916 		    char           *result, *in;
2917 		    SV             *norm;
2918 		    STRLEN          len;
2919 		    bool            done;
2920 
2921 		  w_string:
2922 		    /* Copy string and check for compliance */
2923 		    from = SvPV_nomg_const(fromstr, len);
2924 		    if ((norm = is_an_int(from, len)) == NULL)
2925 			Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2926 
2927 		    Newx(result, len, char);
2928 		    in = result + len;
2929 		    done = FALSE;
2930 		    while (!done) *--in = div128(norm, &done) | 0x80;
2931 		    result[len - 1] &= 0x7F; /* clear continue bit */
2932 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
2933 				       in, (result + len) - in);
2934 		    Safefree(result);
2935 		    SvREFCNT_dec(norm);	/* free norm */
2936 		}
2937 	    }
2938             break;
2939 	case 'i':
2940 	case 'i' | TYPE_IS_SHRIEKING:
2941 	    while (len-- > 0) {
2942 		int aint;
2943 		fromstr = NEXTFROM;
2944 		aint = SvIV_no_inf(fromstr, datumtype);
2945                 PUSH_VAR(utf8, cur, aint, needs_swap);
2946 	    }
2947 	    break;
2948 	case 'N' | TYPE_IS_SHRIEKING:
2949 	case 'N':
2950 	    while (len-- > 0) {
2951 		U32 au32;
2952 		fromstr = NEXTFROM;
2953 		au32 = SvUV_no_inf(fromstr, datumtype);
2954 		au32 = PerlSock_htonl(au32);
2955                 PUSH32(utf8, cur, &au32, FALSE);
2956 	    }
2957 	    break;
2958 	case 'V' | TYPE_IS_SHRIEKING:
2959 	case 'V':
2960 	    while (len-- > 0) {
2961 		U32 au32;
2962 		fromstr = NEXTFROM;
2963 		au32 = SvUV_no_inf(fromstr, datumtype);
2964 		au32 = htovl(au32);
2965                 PUSH32(utf8, cur, &au32, FALSE);
2966 	    }
2967 	    break;
2968 	case 'L' | TYPE_IS_SHRIEKING:
2969 #if LONGSIZE != SIZE32
2970 	    while (len-- > 0) {
2971 		unsigned long aulong;
2972 		fromstr = NEXTFROM;
2973 		aulong = SvUV_no_inf(fromstr, datumtype);
2974                 PUSH_VAR(utf8, cur, aulong, needs_swap);
2975 	    }
2976 	    break;
2977 #else
2978             /* Fall though! */
2979 #endif
2980 	case 'L':
2981 	    while (len-- > 0) {
2982 		U32 au32;
2983 		fromstr = NEXTFROM;
2984 		au32 = SvUV_no_inf(fromstr, datumtype);
2985                 PUSH32(utf8, cur, &au32, needs_swap);
2986 	    }
2987 	    break;
2988 	case 'l' | TYPE_IS_SHRIEKING:
2989 #if LONGSIZE != SIZE32
2990 	    while (len-- > 0) {
2991 		long along;
2992 		fromstr = NEXTFROM;
2993 		along = SvIV_no_inf(fromstr, datumtype);
2994                 PUSH_VAR(utf8, cur, along, needs_swap);
2995 	    }
2996 	    break;
2997 #else
2998             /* Fall though! */
2999 #endif
3000 	case 'l':
3001             while (len-- > 0) {
3002 		I32 ai32;
3003 		fromstr = NEXTFROM;
3004 		ai32 = SvIV_no_inf(fromstr, datumtype);
3005                 PUSH32(utf8, cur, &ai32, needs_swap);
3006 	    }
3007 	    break;
3008 #if defined(HAS_QUAD) && IVSIZE >= 8
3009 	case 'Q':
3010 	    while (len-- > 0) {
3011 		Uquad_t auquad;
3012 		fromstr = NEXTFROM;
3013 		auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3014                 PUSH_VAR(utf8, cur, auquad, needs_swap);
3015 	    }
3016 	    break;
3017 	case 'q':
3018 	    while (len-- > 0) {
3019 		Quad_t aquad;
3020 		fromstr = NEXTFROM;
3021 		aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3022                 PUSH_VAR(utf8, cur, aquad, needs_swap);
3023 	    }
3024 	    break;
3025 #endif
3026 	case 'P':
3027 	    len = 1;		/* assume SV is correct length */
3028 	    GROWING(utf8, cat, start, cur, sizeof(char *));
3029 	    /* FALLTHROUGH */
3030 	case 'p':
3031 	    while (len-- > 0) {
3032 		const char *aptr;
3033 
3034 		fromstr = NEXTFROM;
3035 		SvGETMAGIC(fromstr);
3036 		if (!SvOK(fromstr)) aptr = NULL;
3037 		else {
3038 		    /* XXX better yet, could spirit away the string to
3039 		     * a safe spot and hang on to it until the result
3040 		     * of pack() (and all copies of the result) are
3041 		     * gone.
3042 		     */
3043 		    if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3044 			     !SvREADONLY(fromstr)))) {
3045 			Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3046 				       "Attempt to pack pointer to temporary value");
3047 		    }
3048 		    if (SvPOK(fromstr) || SvNIOK(fromstr))
3049 			aptr = SvPV_nomg_const_nolen(fromstr);
3050 		    else
3051 			aptr = SvPV_force_flags_nolen(fromstr, 0);
3052 		}
3053                 PUSH_VAR(utf8, cur, aptr, needs_swap);
3054 	    }
3055 	    break;
3056 	case 'u': {
3057 	    const char *aptr, *aend;
3058 	    bool from_utf8;
3059 
3060 	    fromstr = NEXTFROM;
3061 	    if (len <= 2) len = 45;
3062 	    else len = len / 3 * 3;
3063 	    if (len >= 64) {
3064 		Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3065 			       "Field too wide in 'u' format in pack");
3066 		len = 63;
3067 	    }
3068 	    aptr = SvPV_const(fromstr, fromlen);
3069 	    from_utf8 = DO_UTF8(fromstr);
3070 	    if (from_utf8) {
3071 		aend = aptr + fromlen;
3072 		fromlen = sv_len_utf8_nomg(fromstr);
3073 	    } else aend = NULL; /* Unused, but keep compilers happy */
3074 	    GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3075 	    while (fromlen > 0) {
3076 		U8 *end;
3077 		I32 todo;
3078 		U8 hunk[1+63/3*4+1];
3079 
3080 		if ((I32)fromlen > len)
3081 		    todo = len;
3082 		else
3083 		    todo = fromlen;
3084 		if (from_utf8) {
3085 		    char buffer[64];
3086 		    if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3087 				      'u' | TYPE_IS_PACK)) {
3088 			*cur = '\0';
3089 			SvCUR_set(cat, cur - start);
3090 			Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3091 				   "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3092 				   aptr, aend, buffer, (long) todo);
3093 		    }
3094 		    end = doencodes(hunk, (const U8 *)buffer, todo);
3095 		} else {
3096 		    end = doencodes(hunk, (const U8 *)aptr, todo);
3097 		    aptr += todo;
3098 		}
3099 		PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3100 		fromlen -= todo;
3101 	    }
3102 	    break;
3103 	}
3104 	}
3105 	*cur = '\0';
3106 	SvCUR_set(cat, cur - start);
3107       no_change:
3108 	*symptr = lookahead;
3109     }
3110     return beglist;
3111 }
3112 #undef NEXTFROM
3113 
3114 
3115 PP(pp_pack)
3116 {
3117     dSP; dMARK; dORIGMARK; dTARGET;
3118     SV *cat = TARG;
3119     STRLEN fromlen;
3120     SV *pat_sv = *++MARK;
3121     const char *pat = SvPV_const(pat_sv, fromlen);
3122     const char *patend = pat + fromlen;
3123 
3124     MARK++;
3125     sv_setpvs(cat, "");
3126     SvUTF8_off(cat);
3127 
3128     packlist(cat, pat, patend, MARK, SP + 1);
3129 
3130     SvSETMAGIC(cat);
3131     SP = ORIGMARK;
3132     PUSHs(cat);
3133     RETURN;
3134 }
3135 
3136 /*
3137  * ex: set ts=8 sts=4 sw=4 et:
3138  */
3139