xref: /openbsd-src/gnu/usr.bin/perl/pp_pack.c (revision d1df930ffab53da22f3324c32bed7ac5709915e6)
1 /*    pp_pack.c
2  *
3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  * He still hopefully carried some of his gear in his pack: a small tinder-box,
13  * two small shallow pans, the smaller fitting into the larger; inside them a
14  * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15  * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16  * some salt.
17  *
18  *     [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
19  */
20 
21 /* This file contains pp ("push/pop") functions that
22  * execute the opcodes that make up a perl program. A typical pp function
23  * expects to find its arguments on the stack, and usually pushes its
24  * results onto the stack, hence the 'pp' terminology. Each OP structure
25  * contains a pointer to the relevant pp_foo() function.
26  *
27  * This particular file just contains pp_pack() and pp_unpack(). See the
28  * other pp*.c files for the rest of the pp_ functions.
29  */
30 
31 #include "EXTERN.h"
32 #define PERL_IN_PP_PACK_C
33 #include "perl.h"
34 
35 /* Types used by pack/unpack */
36 typedef enum {
37   e_no_len,     /* no length  */
38   e_number,     /* number, [] */
39   e_star        /* asterisk   */
40 } howlen_t;
41 
42 typedef struct tempsym {
43   const char*    patptr;   /* current template char */
44   const char*    patend;   /* one after last char   */
45   const char*    grpbeg;   /* 1st char of ()-group  */
46   const char*    grpend;   /* end of ()-group       */
47   I32      code;     /* template code (!<>)   */
48   I32      length;   /* length/repeat count   */
49   howlen_t howlen;   /* how length is given   */
50   int      level;    /* () nesting level      */
51   U32      flags;    /* /=4, comma=2, pack=1  */
52                      /*   and group modifiers */
53   STRLEN   strbeg;   /* offset of group start */
54   struct tempsym *previous; /* previous group */
55 } tempsym_t;
56 
57 #define TEMPSYM_INIT(symptr, p, e, f) \
58     STMT_START {	\
59 	(symptr)->patptr   = (p);	\
60 	(symptr)->patend   = (e);	\
61 	(symptr)->grpbeg   = NULL;	\
62 	(symptr)->grpend   = NULL;	\
63 	(symptr)->grpend   = NULL;	\
64 	(symptr)->code     = 0;		\
65 	(symptr)->length   = 0;		\
66 	(symptr)->howlen   = e_no_len;	\
67 	(symptr)->level    = 0;		\
68 	(symptr)->flags    = (f);	\
69 	(symptr)->strbeg   = 0;		\
70 	(symptr)->previous = NULL;	\
71    } STMT_END
72 
73 typedef union {
74     NV nv;
75     U8 bytes[sizeof(NV)];
76 } NV_bytes;
77 
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79 typedef union {
80     long double ld;
81     U8 bytes[sizeof(long double)];
82 } ld_bytes;
83 #endif
84 
85 #ifndef CHAR_BIT
86 # define CHAR_BIT	8
87 #endif
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
89 #define UTF8_EXPAND	2
90 
91 /*
92  * Offset for integer pack/unpack.
93  *
94  * On architectures where I16 and I32 aren't really 16 and 32 bits,
95  * which for now are all Crays, pack and unpack have to play games.
96  */
97 
98 /*
99  * These values are required for portability of pack() output.
100  * If they're not right on your machine, then pack() and unpack()
101  * wouldn't work right anyway; you'll need to apply the Cray hack.
102  * (I'd like to check them with #if, but you can't use sizeof() in
103  * the preprocessor.)  --???
104  */
105 /*
106     The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107     defines are now in config.h.  --Andy Dougherty  April 1998
108  */
109 #define SIZE16 2
110 #define SIZE32 4
111 
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113    --jhi Feb 1999 */
114 
115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
116 #  if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678    /* little-endian */
117 #    define OFF16(p)	((char*)(p))
118 #    define OFF32(p)	((char*)(p))
119 #  else
120 #    if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321  /* big-endian */
121 #      define OFF16(p)	((char*)(p) + (sizeof(U16) - SIZE16))
122 #      define OFF32(p)	((char*)(p) + (sizeof(U32) - SIZE32))
123 #    else
124        ++++ bad cray byte order
125 #    endif
126 #  endif
127 #else
128 #  define OFF16(p)     ((char *) (p))
129 #  define OFF32(p)     ((char *) (p))
130 #endif
131 
132 #define PUSH16(utf8, cur, p, needs_swap)                        \
133        PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
134 #define PUSH32(utf8, cur, p, needs_swap)                        \
135        PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
136 
137 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321  /* big-endian */
138 #  define NEEDS_SWAP(d)     (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
139 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678  /* little-endian */
140 #  define NEEDS_SWAP(d)     (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
141 #else
142 #  error "Unsupported byteorder"
143         /* Need to add code here to re-instate mixed endian support.
144            NEEDS_SWAP would need to hold a flag indicating which action to
145            take, and S_reverse_copy and the code in S_utf8_to_bytes would need
146            logic adding to deal with any mixed-endian transformations needed.
147         */
148 #endif
149 
150 /* Only to be used inside a loop (see the break) */
151 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap)	\
152 STMT_START {						\
153     if (UNLIKELY(utf8)) {                               \
154         if (!S_utf8_to_bytes(aTHX_ &s, strend,		\
155 	  (char *) (buf), len, datumtype)) break;	\
156     } else {						\
157         if (UNLIKELY(needs_swap))                       \
158             S_reverse_copy(s, (char *) (buf), len);     \
159         else                                            \
160             Copy(s, (char *) (buf), len, char);		\
161         s += len;					\
162     }							\
163 } STMT_END
164 
165 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap)              \
166        SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
167 
168 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap)              \
169        SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
170 
171 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap)          \
172        SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
173 
174 #define PUSH_VAR(utf8, aptr, var, needs_swap)           \
175        PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
176 
177 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
178 #define MAX_SUB_TEMPLATE_LEVEL 100
179 
180 /* flags (note that type modifiers can also be used as flags!) */
181 #define FLAG_WAS_UTF8	      0x40
182 #define FLAG_PARSE_UTF8       0x20	/* Parse as utf8 */
183 #define FLAG_UNPACK_ONLY_ONE  0x10
184 #define FLAG_DO_UTF8          0x08	/* The underlying string is utf8 */
185 #define FLAG_SLASH            0x04
186 #define FLAG_COMMA            0x02
187 #define FLAG_PACK             0x01
188 
189 STATIC SV *
190 S_mul128(pTHX_ SV *sv, U8 m)
191 {
192   STRLEN          len;
193   char           *s = SvPV(sv, len);
194   char           *t;
195 
196   PERL_ARGS_ASSERT_MUL128;
197 
198   if (!strnEQ(s, "0000", 4)) {  /* need to grow sv */
199     SV * const tmpNew = newSVpvs("0000000000");
200 
201     sv_catsv(tmpNew, sv);
202     SvREFCNT_dec(sv);		/* free old sv */
203     sv = tmpNew;
204     s = SvPV(sv, len);
205   }
206   t = s + len - 1;
207   while (!*t)                   /* trailing '\0'? */
208     t--;
209   while (t > s) {
210     const U32 i = ((*t - '0') << 7) + m;
211     *(t--) = '0' + (char)(i % 10);
212     m = (char)(i / 10);
213   }
214   return (sv);
215 }
216 
217 /* Explosives and implosives. */
218 
219 #define ISUUCHAR(ch)    (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ')  \
220                       && NATIVE_TO_LATIN1(ch) <  NATIVE_TO_LATIN1('a'))
221 
222 /* type modifiers */
223 #define TYPE_IS_SHRIEKING	0x100
224 #define TYPE_IS_BIG_ENDIAN	0x200
225 #define TYPE_IS_LITTLE_ENDIAN	0x400
226 #define TYPE_IS_PACK		0x800
227 #define TYPE_ENDIANNESS_MASK	(TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
228 #define TYPE_MODIFIERS(t)	((t) & ~0xFF)
229 #define TYPE_NO_MODIFIERS(t)	((t) & 0xFF)
230 
231 # define TYPE_ENDIANNESS(t)	((t) & TYPE_ENDIANNESS_MASK)
232 # define TYPE_NO_ENDIANNESS(t)	((t) & ~TYPE_ENDIANNESS_MASK)
233 
234 # define ENDIANNESS_ALLOWED_TYPES   "sSiIlLqQjJfFdDpP("
235 
236 #define PACK_SIZE_CANNOT_CSUM		0x80
237 #define PACK_SIZE_UNPREDICTABLE		0x40	/* Not a fixed size element */
238 #define PACK_SIZE_MASK			0x3F
239 
240 #include "packsizetables.inc"
241 
242 static void
243 S_reverse_copy(const char *src, char *dest, STRLEN len)
244 {
245     dest += len;
246     while (len--)
247         *--dest = *src++;
248 }
249 
250 STATIC U8
251 utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
252 {
253     STRLEN retlen;
254     UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
255 			 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
256     /* We try to process malformed UTF-8 as much as possible (preferably with
257        warnings), but these two mean we make no progress in the string and
258        might enter an infinite loop */
259     if (retlen == (STRLEN) -1 || retlen == 0)
260 	Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
261 		   (int) TYPE_NO_MODIFIERS(datumtype));
262     if (val >= 0x100) {
263 	Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
264 		       "Character in '%c' format wrapped in unpack",
265 		       (int) TYPE_NO_MODIFIERS(datumtype));
266 	val &= 0xff;
267     }
268     *s += retlen;
269     return (U8)val;
270 }
271 
272 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
273 	utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
274 	*(U8 *)(s)++)
275 
276 STATIC bool
277 S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
278 {
279     UV val;
280     STRLEN retlen;
281     const char *from = *s;
282     int bad = 0;
283     const U32 flags = ckWARN(WARN_UTF8) ?
284 	UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
285     const bool needs_swap = NEEDS_SWAP(datumtype);
286 
287     if (UNLIKELY(needs_swap))
288         buf += buf_len;
289 
290     for (;buf_len > 0; buf_len--) {
291 	if (from >= end) return FALSE;
292 	val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
293 	if (retlen == (STRLEN) -1 || retlen == 0) {
294 	    from += UTF8SKIP(from);
295 	    bad |= 1;
296 	} else from += retlen;
297 	if (val >= 0x100) {
298 	    bad |= 2;
299 	    val &= 0xff;
300 	}
301         if (UNLIKELY(needs_swap))
302             *(U8 *)--buf = (U8)val;
303         else
304             *(U8 *)buf++ = (U8)val;
305     }
306     /* We have enough characters for the buffer. Did we have problems ? */
307     if (bad) {
308 	if (bad & 1) {
309 	    /* Rewalk the string fragment while warning */
310 	    const char *ptr;
311 	    const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
312 	    for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
313 		if (ptr >= end) break;
314 		utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
315 	    }
316 	    if (from > end) from = end;
317 	}
318 	if ((bad & 2))
319 	    Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
320 				       WARN_PACK : WARN_UNPACK),
321 			   "Character(s) in '%c' format wrapped in %s",
322 			   (int) TYPE_NO_MODIFIERS(datumtype),
323 			   datumtype & TYPE_IS_PACK ? "pack" : "unpack");
324     }
325     *s = from;
326     return TRUE;
327 }
328 
329 STATIC char *
330 S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
331     PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
332 
333     if (UNLIKELY(needs_swap)) {
334         const U8 *p = start + len;
335         while (p-- > start) {
336             append_utf8_from_native_byte(*p, (U8 **) & dest);
337         }
338     } else {
339         const U8 * const end = start + len;
340         while (start < end) {
341             append_utf8_from_native_byte(*start, (U8 **) & dest);
342             start++;
343         }
344     }
345     return dest;
346 }
347 
348 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap)             \
349 STMT_START {							\
350     if (UNLIKELY(utf8))	                                        \
351 	(cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap);       \
352     else {							\
353         if (UNLIKELY(needs_swap))                               \
354             S_reverse_copy((char *)(buf), cur, len);            \
355         else                                                    \
356             Copy(buf, cur, len, char);				\
357 	(cur) += (len);						\
358     }								\
359 } STMT_END
360 
361 #define SAFE_UTF8_EXPAND(var)	\
362 STMT_START {				\
363     if ((var) > SSize_t_MAX / UTF8_EXPAND) \
364         Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
365     (var) = (var) * UTF8_EXPAND; \
366 } STMT_END
367 
368 #define GROWING2(utf8, cat, start, cur, item_size, item_count)	\
369 STMT_START {							\
370     if (SSize_t_MAX / (item_size) < (item_count))		\
371         Perl_croak(aTHX_ "%s", "Out of memory during pack()");	\
372     GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \
373 } STMT_END
374 
375 #define GROWING(utf8, cat, start, cur, in_len)	\
376 STMT_START {					\
377     STRLEN glen = (in_len);			\
378     STRLEN catcur = (STRLEN)((cur) - (start));	\
379     if (utf8) SAFE_UTF8_EXPAND(glen);		\
380     if (SSize_t_MAX - glen < catcur)		\
381         Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
382     if (catcur + glen >= SvLEN(cat)) {	\
383 	(start) = sv_exp_grow(cat, glen);	\
384 	(cur) = (start) + SvCUR(cat);		\
385     }						\
386 } STMT_END
387 
388 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
389 STMT_START {					\
390     const STRLEN glen = (in_len);		\
391     STRLEN gl = glen;				\
392     if (utf8) SAFE_UTF8_EXPAND(gl);		\
393     if ((cur) + gl >= (start) + SvLEN(cat)) {	\
394         *cur = '\0';				\
395         SvCUR_set((cat), (cur) - (start));	\
396 	(start) = sv_exp_grow(cat, gl);		\
397 	(cur) = (start) + SvCUR(cat);		\
398     }						\
399     PUSH_BYTES(utf8, cur, buf, glen, 0);        \
400 } STMT_END
401 
402 #define PUSH_BYTE(utf8, s, byte)		\
403 STMT_START {					\
404     if (utf8) {					\
405 	const U8 au8 = (byte);			\
406 	(s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
407     } else *(U8 *)(s)++ = (byte);		\
408 } STMT_END
409 
410 /* Only to be used inside a loop (see the break) */
411 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags)		\
412 STMT_START {							\
413     STRLEN retlen;						\
414     if (str >= end) break;					\
415     val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags);	\
416     if (retlen == (STRLEN) -1 || retlen == 0) {			\
417 	*cur = '\0';						\
418 	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");	\
419     }								\
420     str += retlen;						\
421 } STMT_END
422 
423 static const char *_action( const tempsym_t* symptr )
424 {
425     return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
426 }
427 
428 /* Returns the sizeof() struct described by pat */
429 STATIC I32
430 S_measure_struct(pTHX_ tempsym_t* symptr)
431 {
432     I32 total = 0;
433 
434     PERL_ARGS_ASSERT_MEASURE_STRUCT;
435 
436     while (next_symbol(symptr)) {
437 	I32 len;
438 	int size;
439 
440         switch (symptr->howlen) {
441 	  case e_star:
442    	    Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
443                         _action( symptr ) );
444 
445 	  default:
446 	    /* e_no_len and e_number */
447 	    len = symptr->length;
448 	    break;
449         }
450 
451 	size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
452 	if (!size) {
453             int star;
454 	    /* endianness doesn't influence the size of a type */
455 	    switch(TYPE_NO_ENDIANNESS(symptr->code)) {
456 	    default:
457 		Perl_croak(aTHX_ "Invalid type '%c' in %s",
458 			   (int)TYPE_NO_MODIFIERS(symptr->code),
459                            _action( symptr ) );
460 	    case '.' | TYPE_IS_SHRIEKING:
461 	    case '@' | TYPE_IS_SHRIEKING:
462 	    case '@':
463 	    case '.':
464 	    case '/':
465 	    case 'U':			/* XXXX Is it correct? */
466 	    case 'w':
467 	    case 'u':
468 		Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
469 			   (int) TYPE_NO_MODIFIERS(symptr->code),
470                            _action( symptr ) );
471 	    case '%':
472 		size = 0;
473 		break;
474 	    case '(':
475 	    {
476 		tempsym_t savsym = *symptr;
477 		symptr->patptr = savsym.grpbeg;
478 		symptr->patend = savsym.grpend;
479 		/* XXXX Theoretically, we need to measure many times at
480 		   different positions, since the subexpression may contain
481 		   alignment commands, but be not of aligned length.
482 		   Need to detect this and croak().  */
483 		size = measure_struct(symptr);
484 		*symptr = savsym;
485 		break;
486 	    }
487 	    case 'X' | TYPE_IS_SHRIEKING:
488 		/* XXXX Is this useful?  Then need to treat MEASURE_BACKWARDS.
489 		 */
490 		if (!len)		/* Avoid division by 0 */
491 		    len = 1;
492 		len = total % len;	/* Assumed: the start is aligned. */
493 		/* FALLTHROUGH */
494 	    case 'X':
495 		size = -1;
496 		if (total < len)
497                     Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
498 		break;
499 	    case 'x' | TYPE_IS_SHRIEKING:
500 		if (!len)		/* Avoid division by 0 */
501 		    len = 1;
502 		star = total % len;	/* Assumed: the start is aligned. */
503 		if (star)		/* Other portable ways? */
504 		    len = len - star;
505 		else
506 		    len = 0;
507 		/* FALLTHROUGH */
508 	    case 'x':
509 	    case 'A':
510 	    case 'Z':
511 	    case 'a':
512 		size = 1;
513 		break;
514 	    case 'B':
515 	    case 'b':
516 		len = (len + 7)/8;
517 		size = 1;
518 		break;
519 	    case 'H':
520 	    case 'h':
521 		len = (len + 1)/2;
522 		size = 1;
523 		break;
524 
525 	    case 'P':
526 		len = 1;
527 		size = sizeof(char*);
528 		break;
529 	    }
530 	}
531 	total += len * size;
532     }
533     return total;
534 }
535 
536 
537 /* locate matching closing parenthesis or bracket
538  * returns char pointer to char after match, or NULL
539  */
540 STATIC const char *
541 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
542 {
543     PERL_ARGS_ASSERT_GROUP_END;
544 
545     while (patptr < patend) {
546 	const char c = *patptr++;
547 
548 	if (isSPACE(c))
549 	    continue;
550 	else if (c == ender)
551 	    return patptr-1;
552 	else if (c == '#') {
553 	    while (patptr < patend && *patptr != '\n')
554 		patptr++;
555 	    continue;
556 	} else if (c == '(')
557 	    patptr = group_end(patptr, patend, ')') + 1;
558 	else if (c == '[')
559 	    patptr = group_end(patptr, patend, ']') + 1;
560     }
561     Perl_croak(aTHX_ "No group ending character '%c' found in template",
562                ender);
563     NOT_REACHED; /* NOTREACHED */
564 }
565 
566 
567 /* Convert unsigned decimal number to binary.
568  * Expects a pointer to the first digit and address of length variable
569  * Advances char pointer to 1st non-digit char and returns number
570  */
571 STATIC const char *
572 S_get_num(pTHX_ const char *patptr, I32 *lenptr )
573 {
574   I32 len = *patptr++ - '0';
575 
576   PERL_ARGS_ASSERT_GET_NUM;
577 
578   while (isDIGIT(*patptr)) {
579     if (len >= 0x7FFFFFFF/10)
580       Perl_croak(aTHX_ "pack/unpack repeat count overflow");
581     len = (len * 10) + (*patptr++ - '0');
582   }
583   *lenptr = len;
584   return patptr;
585 }
586 
587 /* The marvellous template parsing routine: Using state stored in *symptr,
588  * locates next template code and count
589  */
590 STATIC bool
591 S_next_symbol(pTHX_ tempsym_t* symptr )
592 {
593   const char* patptr = symptr->patptr;
594   const char* const patend = symptr->patend;
595 
596   PERL_ARGS_ASSERT_NEXT_SYMBOL;
597 
598   symptr->flags &= ~FLAG_SLASH;
599 
600   while (patptr < patend) {
601     if (isSPACE(*patptr))
602       patptr++;
603     else if (*patptr == '#') {
604       patptr++;
605       while (patptr < patend && *patptr != '\n')
606 	patptr++;
607       if (patptr < patend)
608 	patptr++;
609     } else {
610       /* We should have found a template code */
611       I32 code = *patptr++ & 0xFF;
612       U32 inherited_modifiers = 0;
613 
614       if (code == ','){ /* grandfather in commas but with a warning */
615 	if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
616           symptr->flags |= FLAG_COMMA;
617 	  Perl_warner(aTHX_ packWARN(WARN_UNPACK),
618 	 	      "Invalid type ',' in %s", _action( symptr ) );
619         }
620 	continue;
621       }
622 
623       /* for '(', skip to ')' */
624       if (code == '(') {
625         if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
626           Perl_croak(aTHX_ "()-group starts with a count in %s",
627                         _action( symptr ) );
628         symptr->grpbeg = patptr;
629         patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
630         if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
631 	  Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
632                         _action( symptr ) );
633       }
634 
635       /* look for group modifiers to inherit */
636       if (TYPE_ENDIANNESS(symptr->flags)) {
637         if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
638           inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
639       }
640 
641       /* look for modifiers */
642       while (patptr < patend) {
643         const char *allowed;
644         I32 modifier;
645         switch (*patptr) {
646           case '!':
647             modifier = TYPE_IS_SHRIEKING;
648             allowed = "sSiIlLxXnNvV@.";
649             break;
650           case '>':
651             modifier = TYPE_IS_BIG_ENDIAN;
652             allowed = ENDIANNESS_ALLOWED_TYPES;
653             break;
654           case '<':
655             modifier = TYPE_IS_LITTLE_ENDIAN;
656             allowed = ENDIANNESS_ALLOWED_TYPES;
657             break;
658           default:
659             allowed = "";
660             modifier = 0;
661             break;
662         }
663 
664         if (modifier == 0)
665           break;
666 
667         if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
668           Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
669                         allowed, _action( symptr ) );
670 
671         if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
672           Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
673                      (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
674         else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
675                  TYPE_ENDIANNESS_MASK)
676           Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
677                      *patptr, _action( symptr ) );
678 
679         if ((code & modifier)) {
680 	    Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
681 			   "Duplicate modifier '%c' after '%c' in %s",
682 			   *patptr, (int) TYPE_NO_MODIFIERS(code),
683 			   _action( symptr ) );
684         }
685 
686         code |= modifier;
687         patptr++;
688       }
689 
690       /* inherit modifiers */
691       code |= inherited_modifiers;
692 
693       /* look for count and/or / */
694       if (patptr < patend) {
695 	if (isDIGIT(*patptr)) {
696  	  patptr = get_num( patptr, &symptr->length );
697           symptr->howlen = e_number;
698 
699         } else if (*patptr == '*') {
700           patptr++;
701           symptr->howlen = e_star;
702 
703         } else if (*patptr == '[') {
704           const char* lenptr = ++patptr;
705           symptr->howlen = e_number;
706           patptr = group_end( patptr, patend, ']' ) + 1;
707           /* what kind of [] is it? */
708           if (isDIGIT(*lenptr)) {
709             lenptr = get_num( lenptr, &symptr->length );
710             if( *lenptr != ']' )
711               Perl_croak(aTHX_ "Malformed integer in [] in %s",
712                             _action( symptr ) );
713           } else {
714             tempsym_t savsym = *symptr;
715             symptr->patend = patptr-1;
716             symptr->patptr = lenptr;
717             savsym.length = measure_struct(symptr);
718             *symptr = savsym;
719           }
720         } else {
721           symptr->howlen = e_no_len;
722           symptr->length = 1;
723         }
724 
725         /* try to find / */
726         while (patptr < patend) {
727           if (isSPACE(*patptr))
728             patptr++;
729           else if (*patptr == '#') {
730             patptr++;
731             while (patptr < patend && *patptr != '\n')
732 	      patptr++;
733             if (patptr < patend)
734 	      patptr++;
735           } else {
736             if (*patptr == '/') {
737               symptr->flags |= FLAG_SLASH;
738               patptr++;
739               if (patptr < patend &&
740                   (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
741                 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
742                             _action( symptr ) );
743             }
744             break;
745 	  }
746 	}
747       } else {
748         /* at end - no count, no / */
749         symptr->howlen = e_no_len;
750         symptr->length = 1;
751       }
752 
753       symptr->code = code;
754       symptr->patptr = patptr;
755       return TRUE;
756     }
757   }
758   symptr->patptr = patptr;
759   return FALSE;
760 }
761 
762 /*
763    There is no way to cleanly handle the case where we should process the
764    string per byte in its upgraded form while it's really in downgraded form
765    (e.g. estimates like strend-s as an upper bound for the number of
766    characters left wouldn't work). So if we foresee the need of this
767    (pattern starts with U or contains U0), we want to work on the encoded
768    version of the string. Users are advised to upgrade their pack string
769    themselves if they need to do a lot of unpacks like this on it
770 */
771 STATIC bool
772 need_utf8(const char *pat, const char *patend)
773 {
774     bool first = TRUE;
775 
776     PERL_ARGS_ASSERT_NEED_UTF8;
777 
778     while (pat < patend) {
779 	if (pat[0] == '#') {
780 	    pat++;
781 	    pat = (const char *) memchr(pat, '\n', patend-pat);
782 	    if (!pat) return FALSE;
783 	} else if (pat[0] == 'U') {
784 	    if (first || pat[1] == '0') return TRUE;
785 	} else first = FALSE;
786 	pat++;
787     }
788     return FALSE;
789 }
790 
791 STATIC char
792 first_symbol(const char *pat, const char *patend) {
793     PERL_ARGS_ASSERT_FIRST_SYMBOL;
794 
795     while (pat < patend) {
796 	if (pat[0] != '#') return pat[0];
797 	pat++;
798 	pat = (const char *) memchr(pat, '\n', patend-pat);
799 	if (!pat) return 0;
800 	pat++;
801     }
802     return 0;
803 }
804 
805 /*
806 
807 =head1 Pack and Unpack
808 
809 =for apidoc unpackstring
810 
811 The engine implementing the C<unpack()> Perl function.
812 
813 Using the template C<pat..patend>, this function unpacks the string
814 C<s..strend> into a number of mortal SVs, which it pushes onto the perl
815 argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
816 C<SPAGAIN> after the call to this function).  It returns the number of
817 pushed elements.
818 
819 The C<strend> and C<patend> pointers should point to the byte following the
820 last character of each string.
821 
822 Although this function returns its values on the perl argument stack, it
823 doesn't take any parameters from that stack (and thus in particular
824 there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
825 example).
826 
827 =cut */
828 
829 I32
830 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
831 {
832     tempsym_t sym;
833 
834     PERL_ARGS_ASSERT_UNPACKSTRING;
835 
836     if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
837     else if (need_utf8(pat, patend)) {
838 	/* We probably should try to avoid this in case a scalar context call
839 	   wouldn't get to the "U0" */
840 	STRLEN len = strend - s;
841 	s = (char *) bytes_to_utf8((U8 *) s, &len);
842 	SAVEFREEPV(s);
843 	strend = s + len;
844 	flags |= FLAG_DO_UTF8;
845     }
846 
847     if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
848 	flags |= FLAG_PARSE_UTF8;
849 
850     TEMPSYM_INIT(&sym, pat, patend, flags);
851 
852     return unpack_rec(&sym, s, s, strend, NULL );
853 }
854 
855 STATIC I32
856 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
857 {
858     dSP;
859     SV *sv = NULL;
860     const I32 start_sp_offset = SP - PL_stack_base;
861     howlen_t howlen;
862     I32 checksum = 0;
863     UV cuv = 0;
864     NV cdouble = 0.0;
865     const int bits_in_uv = CHAR_BIT * sizeof(cuv);
866     bool beyond = FALSE;
867     bool explicit_length;
868     const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
869     bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
870 
871     PERL_ARGS_ASSERT_UNPACK_REC;
872 
873     symptr->strbeg = s - strbeg;
874 
875     while (next_symbol(symptr)) {
876 	packprops_t props;
877 	I32 len;
878         I32 datumtype = symptr->code;
879         bool needs_swap;
880 	/* do first one only unless in list context
881 	   / is implemented by unpacking the count, then popping it from the
882 	   stack, so must check that we're not in the middle of a /  */
883         if ( unpack_only_one
884 	     && (SP - PL_stack_base == start_sp_offset + 1)
885 	     && (datumtype != '/') )   /* XXX can this be omitted */
886             break;
887 
888         switch (howlen = symptr->howlen) {
889 	  case e_star:
890 	    len = strend - strbeg;	/* long enough */
891 	    break;
892 	  default:
893 	    /* e_no_len and e_number */
894 	    len = symptr->length;
895 	    break;
896         }
897 
898         explicit_length = TRUE;
899       redo_switch:
900         beyond = s >= strend;
901 
902 	props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
903 	if (props) {
904 	    /* props nonzero means we can process this letter. */
905             const long size = props & PACK_SIZE_MASK;
906             const long howmany = (strend - s) / size;
907 	    if (len > howmany)
908 		len = howmany;
909 
910 	    if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
911 		if (len && unpack_only_one) len = 1;
912 		EXTEND(SP, len);
913 		EXTEND_MORTAL(len);
914 	    }
915 	}
916 
917         needs_swap = NEEDS_SWAP(datumtype);
918 
919 	switch(TYPE_NO_ENDIANNESS(datumtype)) {
920 	default:
921 	    Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
922 
923 	case '%':
924 	    if (howlen == e_no_len)
925 		len = 16;		/* len is not specified */
926 	    checksum = len;
927 	    cuv = 0;
928 	    cdouble = 0;
929 	    continue;
930 
931 	case '(':
932 	{
933             tempsym_t savsym = *symptr;
934             const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
935 	    symptr->flags |= group_modifiers;
936             symptr->patend = savsym.grpend;
937 	    symptr->previous = &savsym;
938             symptr->level++;
939 	    PUTBACK;
940 	    if (len && unpack_only_one) len = 1;
941 	    while (len--) {
942   	        symptr->patptr = savsym.grpbeg;
943 		if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
944 		else      symptr->flags &= ~FLAG_PARSE_UTF8;
945  	        unpack_rec(symptr, s, strbeg, strend, &s);
946                 if (s == strend && savsym.howlen == e_star)
947 		    break; /* No way to continue */
948 	    }
949 	    SPAGAIN;
950             savsym.flags = symptr->flags & ~group_modifiers;
951             *symptr = savsym;
952 	    break;
953 	}
954 	case '.' | TYPE_IS_SHRIEKING:
955 	case '.': {
956 	    const char *from;
957 	    SV *sv;
958 	    const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
959 	    if (howlen == e_star) from = strbeg;
960 	    else if (len <= 0) from = s;
961 	    else {
962 		tempsym_t *group = symptr;
963 
964 		while (--len && group) group = group->previous;
965 		from = group ? strbeg + group->strbeg : strbeg;
966 	    }
967 	    sv = from <= s ?
968 		newSVuv(  u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
969 		newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
970 	    mXPUSHs(sv);
971 	    break;
972 	}
973 	case '@' | TYPE_IS_SHRIEKING:
974 	case '@':
975 	    s = strbeg + symptr->strbeg;
976 	    if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
977 	    {
978 		while (len > 0) {
979 		    if (s >= strend)
980 			Perl_croak(aTHX_ "'@' outside of string in unpack");
981 		    s += UTF8SKIP(s);
982 		    len--;
983 		}
984 		if (s > strend)
985 		    Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
986 	    } else {
987 		if (strend-s < len)
988 		    Perl_croak(aTHX_ "'@' outside of string in unpack");
989 		s += len;
990 	    }
991 	    break;
992  	case 'X' | TYPE_IS_SHRIEKING:
993  	    if (!len)			/* Avoid division by 0 */
994  		len = 1;
995 	    if (utf8) {
996 		const char *hop, *last;
997 		I32 l = len;
998 		hop = last = strbeg;
999 		while (hop < s) {
1000 		    hop += UTF8SKIP(hop);
1001 		    if (--l == 0) {
1002 			last = hop;
1003 			l = len;
1004 		    }
1005 		}
1006 		if (last > s)
1007 		    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1008 		s = last;
1009 		break;
1010 	    }
1011 	    len = (s - strbeg) % len;
1012  	    /* FALLTHROUGH */
1013 	case 'X':
1014 	    if (utf8) {
1015 		while (len > 0) {
1016 		    if (s <= strbeg)
1017 			Perl_croak(aTHX_ "'X' outside of string in unpack");
1018 		    while (--s, UTF8_IS_CONTINUATION(*s)) {
1019 			if (s <= strbeg)
1020 			    Perl_croak(aTHX_ "'X' outside of string in unpack");
1021 		    }
1022 		    len--;
1023 		}
1024 	    } else {
1025 		if (len > s - strbeg)
1026 		    Perl_croak(aTHX_ "'X' outside of string in unpack" );
1027 		s -= len;
1028 	    }
1029 	    break;
1030  	case 'x' | TYPE_IS_SHRIEKING: {
1031             I32 ai32;
1032  	    if (!len)			/* Avoid division by 0 */
1033  		len = 1;
1034 	    if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1035 	    else      ai32 = (s - strbeg)                         % len;
1036 	    if (ai32 == 0) break;
1037 	    len -= ai32;
1038             }
1039  	    /* FALLTHROUGH */
1040 	case 'x':
1041 	    if (utf8) {
1042 		while (len>0) {
1043 		    if (s >= strend)
1044 			Perl_croak(aTHX_ "'x' outside of string in unpack");
1045 		    s += UTF8SKIP(s);
1046 		    len--;
1047 		}
1048 	    } else {
1049 		if (len > strend - s)
1050 		    Perl_croak(aTHX_ "'x' outside of string in unpack");
1051 		s += len;
1052 	    }
1053 	    break;
1054 	case '/':
1055 	    Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1056 
1057 	case 'A':
1058 	case 'Z':
1059 	case 'a':
1060 	    if (checksum) {
1061 		/* Preliminary length estimate is assumed done in 'W' */
1062 		if (len > strend - s) len = strend - s;
1063 		goto W_checksum;
1064 	    }
1065 	    if (utf8) {
1066 		I32 l;
1067 		const char *hop;
1068 		for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1069 		    if (hop >= strend) {
1070 			if (hop > strend)
1071 			    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1072 			break;
1073 		    }
1074 		}
1075 		if (hop > strend)
1076 		    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1077 		len = hop - s;
1078 	    } else if (len > strend - s)
1079 		len = strend - s;
1080 
1081 	    if (datumtype == 'Z') {
1082 		/* 'Z' strips stuff after first null */
1083 		const char *ptr, *end;
1084 		end = s + len;
1085 		for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1086 		sv = newSVpvn(s, ptr-s);
1087 		if (howlen == e_star) /* exact for 'Z*' */
1088 		    len = ptr-s + (ptr != strend ? 1 : 0);
1089 	    } else if (datumtype == 'A') {
1090 		/* 'A' strips both nulls and spaces */
1091 		const char *ptr;
1092 		if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1093 		    for (ptr = s+len-1; ptr >= s; ptr--)
1094 			if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1095 			    !isSPACE_utf8(ptr)) break;
1096 		    if (ptr >= s) ptr += UTF8SKIP(ptr);
1097 		    else ptr++;
1098 		    if (ptr > s+len)
1099 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1100 		} else {
1101 		    for (ptr = s+len-1; ptr >= s; ptr--)
1102 			if (*ptr != 0 && !isSPACE(*ptr)) break;
1103 		    ptr++;
1104 		}
1105 		sv = newSVpvn(s, ptr-s);
1106 	    } else sv = newSVpvn(s, len);
1107 
1108 	    if (utf8) {
1109 		SvUTF8_on(sv);
1110 		/* Undo any upgrade done due to need_utf8() */
1111 		if (!(symptr->flags & FLAG_WAS_UTF8))
1112 		    sv_utf8_downgrade(sv, 0);
1113 	    }
1114 	    mXPUSHs(sv);
1115 	    s += len;
1116 	    break;
1117 	case 'B':
1118 	case 'b': {
1119 	    char *str;
1120 	    if (howlen == e_star || len > (strend - s) * 8)
1121 		len = (strend - s) * 8;
1122 	    if (checksum) {
1123 		if (utf8)
1124 		    while (len >= 8 && s < strend) {
1125 			cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
1126 			len -= 8;
1127 		    }
1128 		else
1129 		    while (len >= 8) {
1130 			cuv += PL_bitcount[*(U8 *)s++];
1131 			len -= 8;
1132 		    }
1133 		if (len && s < strend) {
1134 		    U8 bits;
1135 		    bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1136 		    if (datumtype == 'b')
1137 			while (len-- > 0) {
1138 			    if (bits & 1) cuv++;
1139 			    bits >>= 1;
1140 			}
1141 		    else
1142 			while (len-- > 0) {
1143 			    if (bits & 0x80) cuv++;
1144 			    bits <<= 1;
1145 			}
1146 		}
1147 		break;
1148 	    }
1149 
1150 	    sv = sv_2mortal(newSV(len ? len : 1));
1151 	    SvPOK_on(sv);
1152 	    str = SvPVX(sv);
1153 	    if (datumtype == 'b') {
1154 		U8 bits = 0;
1155 		const I32 ai32 = len;
1156 		for (len = 0; len < ai32; len++) {
1157 		    if (len & 7) bits >>= 1;
1158 		    else if (utf8) {
1159 			if (s >= strend) break;
1160 			bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1161 		    } else bits = *(U8 *) s++;
1162 		    *str++ = bits & 1 ? '1' : '0';
1163 		}
1164 	    } else {
1165 		U8 bits = 0;
1166 		const I32 ai32 = len;
1167 		for (len = 0; len < ai32; len++) {
1168 		    if (len & 7) bits <<= 1;
1169 		    else if (utf8) {
1170 			if (s >= strend) break;
1171 			bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1172 		    } else bits = *(U8 *) s++;
1173 		    *str++ = bits & 0x80 ? '1' : '0';
1174 		}
1175 	    }
1176 	    *str = '\0';
1177 	    SvCUR_set(sv, str - SvPVX_const(sv));
1178 	    XPUSHs(sv);
1179 	    break;
1180 	}
1181 	case 'H':
1182 	case 'h': {
1183 	    char *str = NULL;
1184 	    /* Preliminary length estimate, acceptable for utf8 too */
1185 	    if (howlen == e_star || len > (strend - s) * 2)
1186 		len = (strend - s) * 2;
1187 	    if (!checksum) {
1188 		sv = sv_2mortal(newSV(len ? len : 1));
1189 		SvPOK_on(sv);
1190 		str = SvPVX(sv);
1191 	    }
1192 	    if (datumtype == 'h') {
1193 		U8 bits = 0;
1194 		I32 ai32 = len;
1195 		for (len = 0; len < ai32; len++) {
1196 		    if (len & 1) bits >>= 4;
1197 		    else if (utf8) {
1198 			if (s >= strend) break;
1199 			bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1200 		    } else bits = * (U8 *) s++;
1201 		    if (!checksum)
1202 			*str++ = PL_hexdigit[bits & 15];
1203 		}
1204 	    } else {
1205 		U8 bits = 0;
1206 		const I32 ai32 = len;
1207 		for (len = 0; len < ai32; len++) {
1208 		    if (len & 1) bits <<= 4;
1209 		    else if (utf8) {
1210 			if (s >= strend) break;
1211 			bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1212 		    } else bits = *(U8 *) s++;
1213 		    if (!checksum)
1214 			*str++ = PL_hexdigit[(bits >> 4) & 15];
1215 		}
1216 	    }
1217 	    if (!checksum) {
1218 		*str = '\0';
1219 		SvCUR_set(sv, str - SvPVX_const(sv));
1220 		XPUSHs(sv);
1221 	    }
1222 	    break;
1223 	}
1224 	case 'C':
1225             if (len == 0) {
1226                 if (explicit_length)
1227 		    /* Switch to "character" mode */
1228 		    utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1229 		break;
1230 	    }
1231 	    /* FALLTHROUGH */
1232 	case 'c':
1233 	    while (len-- > 0 && s < strend) {
1234 		int aint;
1235 		if (utf8)
1236 		  {
1237 		    STRLEN retlen;
1238 		    aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1239 				 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1240 		    if (retlen == (STRLEN) -1 || retlen == 0)
1241 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1242 		    s += retlen;
1243 		  }
1244 		else
1245 		  aint = *(U8 *)(s)++;
1246 		if (aint >= 128 && datumtype != 'C')	/* fake up signed chars */
1247 		    aint -= 256;
1248 		if (!checksum)
1249 		    mPUSHi(aint);
1250 		else if (checksum > bits_in_uv)
1251 		    cdouble += (NV)aint;
1252 		else
1253 		    cuv += aint;
1254 	    }
1255 	    break;
1256 	case 'W':
1257 	  W_checksum:
1258 	    if (utf8) {
1259 		while (len-- > 0 && s < strend) {
1260 		    STRLEN retlen;
1261 		    const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1262 					 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1263 		    if (retlen == (STRLEN) -1 || retlen == 0)
1264 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1265 		    s += retlen;
1266 		    if (!checksum)
1267 			mPUSHu(val);
1268 		    else if (checksum > bits_in_uv)
1269 			cdouble += (NV) val;
1270 		    else
1271 			cuv += val;
1272 		}
1273 	    } else if (!checksum)
1274 		while (len-- > 0) {
1275 		    const U8 ch = *(U8 *) s++;
1276 		    mPUSHu(ch);
1277 	    }
1278 	    else if (checksum > bits_in_uv)
1279 		while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1280 	    else
1281 		while (len-- > 0) cuv += *(U8 *) s++;
1282 	    break;
1283 	case 'U':
1284 	    if (len == 0) {
1285                 if (explicit_length && howlen != e_star) {
1286 		    /* Switch to "bytes in UTF-8" mode */
1287 		    if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1288 		    else
1289 			/* Should be impossible due to the need_utf8() test */
1290 			Perl_croak(aTHX_ "U0 mode on a byte string");
1291 		}
1292 		break;
1293 	    }
1294 	    if (len > strend - s) len = strend - s;
1295 	    if (!checksum) {
1296 		if (len && unpack_only_one) len = 1;
1297 		EXTEND(SP, len);
1298 		EXTEND_MORTAL(len);
1299 	    }
1300 	    while (len-- > 0 && s < strend) {
1301 		STRLEN retlen;
1302 		UV auv;
1303 		if (utf8) {
1304 		    U8 result[UTF8_MAXLEN];
1305 		    const char *ptr = s;
1306 		    STRLEN len;
1307 		    /* Bug: warns about bad utf8 even if we are short on bytes
1308 		       and will break out of the loop */
1309 		    if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1310 				      'U'))
1311 			break;
1312 		    len = UTF8SKIP(result);
1313 		    if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
1314 				      (char *) &result[1], len-1, 'U')) break;
1315 		    auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1316                                                        len,
1317                                                        &retlen,
1318                                                        UTF8_ALLOW_DEFAULT));
1319 		    s = ptr;
1320 		} else {
1321 		    auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1322                                                        strend - s,
1323                                                        &retlen,
1324                                                        UTF8_ALLOW_DEFAULT));
1325 		    if (retlen == (STRLEN) -1 || retlen == 0)
1326 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1327 		    s += retlen;
1328 		}
1329 		if (!checksum)
1330 		    mPUSHu(auv);
1331 		else if (checksum > bits_in_uv)
1332 		    cdouble += (NV) auv;
1333 		else
1334 		    cuv += auv;
1335 	    }
1336 	    break;
1337 	case 's' | TYPE_IS_SHRIEKING:
1338 #if SHORTSIZE != SIZE16
1339 	    while (len-- > 0) {
1340 		short ashort;
1341                 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1342 		if (!checksum)
1343 		    mPUSHi(ashort);
1344 		else if (checksum > bits_in_uv)
1345 		    cdouble += (NV)ashort;
1346 		else
1347 		    cuv += ashort;
1348 	    }
1349 	    break;
1350 #else
1351 	    /* FALLTHROUGH */
1352 #endif
1353 	case 's':
1354 	    while (len-- > 0) {
1355 		I16 ai16;
1356 
1357 #if U16SIZE > SIZE16
1358 		ai16 = 0;
1359 #endif
1360                 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1361 #if U16SIZE > SIZE16
1362 		if (ai16 > 32767)
1363 		    ai16 -= 65536;
1364 #endif
1365 		if (!checksum)
1366 		    mPUSHi(ai16);
1367 		else if (checksum > bits_in_uv)
1368 		    cdouble += (NV)ai16;
1369 		else
1370 		    cuv += ai16;
1371 	    }
1372 	    break;
1373 	case 'S' | TYPE_IS_SHRIEKING:
1374 #if SHORTSIZE != SIZE16
1375 	    while (len-- > 0) {
1376 		unsigned short aushort;
1377                 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap,
1378                           needs_swap);
1379 		if (!checksum)
1380 		    mPUSHu(aushort);
1381 		else if (checksum > bits_in_uv)
1382 		    cdouble += (NV)aushort;
1383 		else
1384 		    cuv += aushort;
1385 	    }
1386 	    break;
1387 #else
1388             /* FALLTHROUGH */
1389 #endif
1390 	case 'v':
1391 	case 'n':
1392 	case 'S':
1393 	    while (len-- > 0) {
1394 		U16 au16;
1395 #if U16SIZE > SIZE16
1396 		au16 = 0;
1397 #endif
1398                 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1399 		if (datumtype == 'n')
1400 		    au16 = PerlSock_ntohs(au16);
1401 		if (datumtype == 'v')
1402 		    au16 = vtohs(au16);
1403 		if (!checksum)
1404 		    mPUSHu(au16);
1405 		else if (checksum > bits_in_uv)
1406 		    cdouble += (NV) au16;
1407 		else
1408 		    cuv += au16;
1409 	    }
1410 	    break;
1411 	case 'v' | TYPE_IS_SHRIEKING:
1412 	case 'n' | TYPE_IS_SHRIEKING:
1413 	    while (len-- > 0) {
1414 		I16 ai16;
1415 # if U16SIZE > SIZE16
1416 		ai16 = 0;
1417 # endif
1418                 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1419                 /* There should never be any byte-swapping here.  */
1420                 assert(!TYPE_ENDIANNESS(datumtype));
1421 		if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1422 		    ai16 = (I16) PerlSock_ntohs((U16) ai16);
1423 		if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1424 		    ai16 = (I16) vtohs((U16) ai16);
1425 		if (!checksum)
1426 		    mPUSHi(ai16);
1427 		else if (checksum > bits_in_uv)
1428 		    cdouble += (NV) ai16;
1429 		else
1430 		    cuv += ai16;
1431 	    }
1432 	    break;
1433 	case 'i':
1434 	case 'i' | TYPE_IS_SHRIEKING:
1435 	    while (len-- > 0) {
1436 		int aint;
1437                 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1438 		if (!checksum)
1439 		    mPUSHi(aint);
1440 		else if (checksum > bits_in_uv)
1441 		    cdouble += (NV)aint;
1442 		else
1443 		    cuv += aint;
1444 	    }
1445 	    break;
1446 	case 'I':
1447 	case 'I' | TYPE_IS_SHRIEKING:
1448 	    while (len-- > 0) {
1449 		unsigned int auint;
1450                 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1451 		if (!checksum)
1452 		    mPUSHu(auint);
1453 		else if (checksum > bits_in_uv)
1454 		    cdouble += (NV)auint;
1455 		else
1456 		    cuv += auint;
1457 	    }
1458 	    break;
1459 	case 'j':
1460 	    while (len-- > 0) {
1461 		IV aiv;
1462                 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1463 		if (!checksum)
1464 		    mPUSHi(aiv);
1465 		else if (checksum > bits_in_uv)
1466 		    cdouble += (NV)aiv;
1467 		else
1468 		    cuv += aiv;
1469 	    }
1470 	    break;
1471 	case 'J':
1472 	    while (len-- > 0) {
1473 		UV auv;
1474                 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1475 		if (!checksum)
1476 		    mPUSHu(auv);
1477 		else if (checksum > bits_in_uv)
1478 		    cdouble += (NV)auv;
1479 		else
1480 		    cuv += auv;
1481 	    }
1482 	    break;
1483 	case 'l' | TYPE_IS_SHRIEKING:
1484 #if LONGSIZE != SIZE32
1485 	    while (len-- > 0) {
1486 		long along;
1487                 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1488 		if (!checksum)
1489 		    mPUSHi(along);
1490 		else if (checksum > bits_in_uv)
1491 		    cdouble += (NV)along;
1492 		else
1493 		    cuv += along;
1494 	    }
1495 	    break;
1496 #else
1497 	    /* FALLTHROUGH */
1498 #endif
1499 	case 'l':
1500 	    while (len-- > 0) {
1501 		I32 ai32;
1502 #if U32SIZE > SIZE32
1503 		ai32 = 0;
1504 #endif
1505                 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1506 #if U32SIZE > SIZE32
1507 		if (ai32 > 2147483647) ai32 -= 4294967296;
1508 #endif
1509 		if (!checksum)
1510 		    mPUSHi(ai32);
1511 		else if (checksum > bits_in_uv)
1512 		    cdouble += (NV)ai32;
1513 		else
1514 		    cuv += ai32;
1515 	    }
1516 	    break;
1517 	case 'L' | TYPE_IS_SHRIEKING:
1518 #if LONGSIZE != SIZE32
1519 	    while (len-- > 0) {
1520 		unsigned long aulong;
1521                 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1522 		if (!checksum)
1523 		    mPUSHu(aulong);
1524 		else if (checksum > bits_in_uv)
1525 		    cdouble += (NV)aulong;
1526 		else
1527 		    cuv += aulong;
1528 	    }
1529 	    break;
1530 #else
1531             /* FALLTHROUGH */
1532 #endif
1533 	case 'V':
1534 	case 'N':
1535 	case 'L':
1536 	    while (len-- > 0) {
1537 		U32 au32;
1538 #if U32SIZE > SIZE32
1539 		au32 = 0;
1540 #endif
1541                 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1542 		if (datumtype == 'N')
1543 		    au32 = PerlSock_ntohl(au32);
1544 		if (datumtype == 'V')
1545 		    au32 = vtohl(au32);
1546 		if (!checksum)
1547 		    mPUSHu(au32);
1548 		else if (checksum > bits_in_uv)
1549 		    cdouble += (NV)au32;
1550 		else
1551 		    cuv += au32;
1552 	    }
1553 	    break;
1554 	case 'V' | TYPE_IS_SHRIEKING:
1555 	case 'N' | TYPE_IS_SHRIEKING:
1556 	    while (len-- > 0) {
1557 		I32 ai32;
1558 #if U32SIZE > SIZE32
1559 		ai32 = 0;
1560 #endif
1561                 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1562                 /* There should never be any byte swapping here.  */
1563                 assert(!TYPE_ENDIANNESS(datumtype));
1564 		if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1565 		    ai32 = (I32)PerlSock_ntohl((U32)ai32);
1566 		if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1567 		    ai32 = (I32)vtohl((U32)ai32);
1568 		if (!checksum)
1569 		    mPUSHi(ai32);
1570 		else if (checksum > bits_in_uv)
1571 		    cdouble += (NV)ai32;
1572 		else
1573 		    cuv += ai32;
1574 	    }
1575 	    break;
1576 	case 'p':
1577 	    while (len-- > 0) {
1578 		const char *aptr;
1579                 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1580 		/* newSVpv generates undef if aptr is NULL */
1581 		mPUSHs(newSVpv(aptr, 0));
1582 	    }
1583 	    break;
1584 	case 'w':
1585 	    {
1586 		UV auv = 0;
1587 		U32 bytes = 0;
1588 
1589 		while (len > 0 && s < strend) {
1590 		    U8 ch;
1591 		    ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1592 		    auv = (auv << 7) | (ch & 0x7f);
1593                     /* UTF8_IS_XXXXX not right here because this is a BER, not
1594                      * UTF-8 format - using constant 0x80 */
1595 		    if (ch < 0x80) {
1596 			bytes = 0;
1597 			mPUSHu(auv);
1598 			len--;
1599 			auv = 0;
1600 			continue;
1601 		    }
1602 		    if (++bytes >= sizeof(UV)) {	/* promote to string */
1603 			const char *t;
1604 
1605 			sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
1606 			while (s < strend) {
1607 			    ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1608 			    sv = mul128(sv, (U8)(ch & 0x7f));
1609 			    if (!(ch & 0x80)) {
1610 				bytes = 0;
1611 				break;
1612 			    }
1613 			}
1614 			t = SvPV_nolen_const(sv);
1615 			while (*t == '0')
1616 			    t++;
1617 			sv_chop(sv, t);
1618 			mPUSHs(sv);
1619 			len--;
1620 			auv = 0;
1621 		    }
1622 		}
1623 		if ((s >= strend) && bytes)
1624 		    Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1625 	    }
1626 	    break;
1627 	case 'P':
1628 	    if (symptr->howlen == e_star)
1629 	        Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1630 	    EXTEND(SP, 1);
1631 	    if (s + sizeof(char*) <= strend) {
1632 		char *aptr;
1633                 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1634 		/* newSVpvn generates undef if aptr is NULL */
1635 		PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1636 	    }
1637 	    break;
1638 #if defined(HAS_QUAD) && IVSIZE >= 8
1639 	case 'q':
1640 	    while (len-- > 0) {
1641 		Quad_t aquad;
1642                 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1643 		if (!checksum)
1644                     mPUSHs(newSViv((IV)aquad));
1645 		else if (checksum > bits_in_uv)
1646 		    cdouble += (NV)aquad;
1647 		else
1648 		    cuv += aquad;
1649 	    }
1650 	    break;
1651 	case 'Q':
1652 	    while (len-- > 0) {
1653 		Uquad_t auquad;
1654                 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1655 		if (!checksum)
1656 		    mPUSHs(newSVuv((UV)auquad));
1657 		else if (checksum > bits_in_uv)
1658 		    cdouble += (NV)auquad;
1659 		else
1660 		    cuv += auquad;
1661 	    }
1662 	    break;
1663 #endif
1664 	/* float and double added gnb@melba.bby.oz.au 22/11/89 */
1665 	case 'f':
1666 	    while (len-- > 0) {
1667 		float afloat;
1668                 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1669 		if (!checksum)
1670 		    mPUSHn(afloat);
1671 		else
1672 		    cdouble += afloat;
1673 	    }
1674 	    break;
1675 	case 'd':
1676 	    while (len-- > 0) {
1677 		double adouble;
1678                 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1679 		if (!checksum)
1680 		    mPUSHn(adouble);
1681 		else
1682 		    cdouble += adouble;
1683 	    }
1684 	    break;
1685 	case 'F':
1686 	    while (len-- > 0) {
1687 		NV_bytes anv;
1688                 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1689                             datumtype, needs_swap);
1690 		if (!checksum)
1691 		    mPUSHn(anv.nv);
1692 		else
1693 		    cdouble += anv.nv;
1694 	    }
1695 	    break;
1696 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1697 	case 'D':
1698 	    while (len-- > 0) {
1699 		ld_bytes aldouble;
1700                 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1701                             sizeof(aldouble.bytes), datumtype, needs_swap);
1702                 /* The most common long double format, the x86 80-bit
1703                  * extended precision, has either 2 or 6 unused bytes,
1704                  * which may contain garbage, which may contain
1705                  * unintentional data.  While we do zero the bytes of
1706                  * the long double data in pack(), here in unpack() we
1707                  * don't, because it's really hard to envision that
1708                  * reading the long double off aldouble would be
1709                  * affected by the unused bytes.
1710                  *
1711                  * Note that trying to unpack 'long doubles' of 'long
1712                  * doubles' packed in another system is in the general
1713                  * case doomed without having more detail. */
1714 		if (!checksum)
1715 		    mPUSHn(aldouble.ld);
1716 		else
1717 		    cdouble += aldouble.ld;
1718 	    }
1719 	    break;
1720 #endif
1721 	case 'u':
1722 	    if (!checksum) {
1723                 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1724 		sv = sv_2mortal(newSV(l));
1725 		if (l) SvPOK_on(sv);
1726 	    }
1727 
1728             /* Note that all legal uuencoded strings are ASCII printables, so
1729              * have the same representation under UTF-8 vs not.  This means we
1730              * can ignore UTF8ness on legal input.  For illegal we stop at the
1731              * first failure, and don't report where/what that is, so again we
1732              * can ignore UTF8ness */
1733 
1734             while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1735                 I32 a, b, c, d;
1736                 char hunk[3];
1737 
1738                 len = PL_uudmap[*(U8*)s++] & 077;
1739                 while (len > 0) {
1740                     if (s < strend && ISUUCHAR(*s))
1741                         a = PL_uudmap[*(U8*)s++] & 077;
1742                     else
1743                         a = 0;
1744                     if (s < strend && ISUUCHAR(*s))
1745                         b = PL_uudmap[*(U8*)s++] & 077;
1746                     else
1747                         b = 0;
1748                     if (s < strend && ISUUCHAR(*s))
1749                         c = PL_uudmap[*(U8*)s++] & 077;
1750                     else
1751                         c = 0;
1752                     if (s < strend && ISUUCHAR(*s))
1753                         d = PL_uudmap[*(U8*)s++] & 077;
1754                     else
1755                         d = 0;
1756                     hunk[0] = (char)((a << 2) | (b >> 4));
1757                     hunk[1] = (char)((b << 4) | (c >> 2));
1758                     hunk[2] = (char)((c << 6) | d);
1759                     if (!checksum)
1760                         sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1761                     len -= 3;
1762                 }
1763                 if (*s == '\n')
1764                     s++;
1765                 else	/* possible checksum byte */
1766                     if (s + 1 < strend && s[1] == '\n')
1767                         s += 2;
1768             }
1769 	    if (!checksum)
1770 		XPUSHs(sv);
1771 	    break;
1772 	} /* End of switch */
1773 
1774 	if (checksum) {
1775 	    if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1776 	      (checksum > bits_in_uv &&
1777 	       strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1778 		NV trouble, anv;
1779 
1780                 anv = (NV) (1 << (checksum & 15));
1781 		while (checksum >= 16) {
1782 		    checksum -= 16;
1783 		    anv *= 65536.0;
1784 		}
1785 		while (cdouble < 0.0)
1786 		    cdouble += anv;
1787 		cdouble = Perl_modf(cdouble / anv, &trouble);
1788 #ifdef LONGDOUBLE_DOUBLEDOUBLE
1789                 /* Workaround for powerpc doubledouble modfl bug:
1790                  * close to 1.0L and -1.0L cdouble is 0, and trouble
1791                  * is cdouble / anv. */
1792                 if (trouble != Perl_ceil(trouble)) {
1793                   cdouble = trouble;
1794                   if (cdouble >  1.0L) cdouble -= 1.0L;
1795                   if (cdouble < -1.0L) cdouble += 1.0L;
1796                 }
1797 #endif
1798                 cdouble *= anv;
1799 		sv = newSVnv(cdouble);
1800 	    }
1801 	    else {
1802 		if (checksum < bits_in_uv) {
1803 		    UV mask = ((UV)1 << checksum) - 1;
1804 		    cuv &= mask;
1805 		}
1806 		sv = newSVuv(cuv);
1807 	    }
1808 	    mXPUSHs(sv);
1809 	    checksum = 0;
1810 	}
1811 
1812         if (symptr->flags & FLAG_SLASH){
1813             if (SP - PL_stack_base - start_sp_offset <= 0)
1814 		break;
1815             if( next_symbol(symptr) ){
1816               if( symptr->howlen == e_number )
1817 		Perl_croak(aTHX_ "Count after length/code in unpack" );
1818               if( beyond ){
1819          	/* ...end of char buffer then no decent length available */
1820 		Perl_croak(aTHX_ "length/code after end of string in unpack" );
1821               } else {
1822          	/* take top of stack (hope it's numeric) */
1823                 len = POPi;
1824                 if( len < 0 )
1825                     Perl_croak(aTHX_ "Negative '/' count in unpack" );
1826               }
1827             } else {
1828 		Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1829             }
1830             datumtype = symptr->code;
1831             explicit_length = FALSE;
1832 	    goto redo_switch;
1833         }
1834     }
1835 
1836     if (new_s)
1837 	*new_s = s;
1838     PUTBACK;
1839     return SP - PL_stack_base - start_sp_offset;
1840 }
1841 
1842 PP(pp_unpack)
1843 {
1844     dSP;
1845     dPOPPOPssrl;
1846     U8 gimme = GIMME_V;
1847     STRLEN llen;
1848     STRLEN rlen;
1849     const char *pat = SvPV_const(left,  llen);
1850     const char *s   = SvPV_const(right, rlen);
1851     const char *strend = s + rlen;
1852     const char *patend = pat + llen;
1853     I32 cnt;
1854 
1855     PUTBACK;
1856     cnt = unpackstring(pat, patend, s, strend,
1857 		     ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1858 		     | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1859 
1860     SPAGAIN;
1861     if ( !cnt && gimme == G_SCALAR )
1862        PUSHs(&PL_sv_undef);
1863     RETURN;
1864 }
1865 
1866 STATIC U8 *
1867 doencodes(U8 *h, const U8 *s, I32 len)
1868 {
1869     *h++ = PL_uuemap[len];
1870     while (len > 2) {
1871 	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
1872 	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1873 	*h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1874 	*h++ = PL_uuemap[(077 & (s[2] & 077))];
1875 	s += 3;
1876 	len -= 3;
1877     }
1878     if (len > 0) {
1879         const U8 r = (len > 1 ? s[1] : '\0');
1880 	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
1881 	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1882 	*h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1883 	*h++ = PL_uuemap[0];
1884     }
1885     *h++ = '\n';
1886     return h;
1887 }
1888 
1889 STATIC SV *
1890 S_is_an_int(pTHX_ const char *s, STRLEN l)
1891 {
1892   SV *result = newSVpvn(s, l);
1893   char *const result_c = SvPV_nolen(result);	/* convenience */
1894   char *out = result_c;
1895   bool skip = 1;
1896   bool ignore = 0;
1897 
1898   PERL_ARGS_ASSERT_IS_AN_INT;
1899 
1900   while (*s) {
1901     switch (*s) {
1902     case ' ':
1903       break;
1904     case '+':
1905       if (!skip) {
1906 	SvREFCNT_dec(result);
1907 	return (NULL);
1908       }
1909       break;
1910     case '0':
1911     case '1':
1912     case '2':
1913     case '3':
1914     case '4':
1915     case '5':
1916     case '6':
1917     case '7':
1918     case '8':
1919     case '9':
1920       skip = 0;
1921       if (!ignore) {
1922 	*(out++) = *s;
1923       }
1924       break;
1925     case '.':
1926       ignore = 1;
1927       break;
1928     default:
1929       SvREFCNT_dec(result);
1930       return (NULL);
1931     }
1932     s++;
1933   }
1934   *(out++) = '\0';
1935   SvCUR_set(result, out - result_c);
1936   return (result);
1937 }
1938 
1939 /* pnum must be '\0' terminated */
1940 STATIC int
1941 S_div128(pTHX_ SV *pnum, bool *done)
1942 {
1943     STRLEN len;
1944     char * const s = SvPV(pnum, len);
1945     char *t = s;
1946     int m = 0;
1947 
1948     PERL_ARGS_ASSERT_DIV128;
1949 
1950     *done = 1;
1951     while (*t) {
1952 	const int i = m * 10 + (*t - '0');
1953 	const int r = (i >> 7); /* r < 10 */
1954 	m = i & 0x7F;
1955 	if (r) {
1956 	    *done = 0;
1957 	}
1958 	*(t++) = '0' + r;
1959     }
1960     *(t++) = '\0';
1961     SvCUR_set(pnum, (STRLEN) (t - s));
1962     return (m);
1963 }
1964 
1965 /*
1966 =for apidoc packlist
1967 
1968 The engine implementing C<pack()> Perl function.
1969 
1970 =cut
1971 */
1972 
1973 void
1974 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1975 {
1976     tempsym_t sym;
1977 
1978     PERL_ARGS_ASSERT_PACKLIST;
1979 
1980     TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1981 
1982     /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1983        Also make sure any UTF8 flag is loaded */
1984     SvPV_force_nolen(cat);
1985     if (DO_UTF8(cat))
1986 	sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1987 
1988     (void)pack_rec( cat, &sym, beglist, endlist );
1989 }
1990 
1991 /* like sv_utf8_upgrade, but also repoint the group start markers */
1992 STATIC void
1993 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
1994     STRLEN len;
1995     tempsym_t *group;
1996     const char *from_ptr, *from_start, *from_end, **marks, **m;
1997     char *to_start, *to_ptr;
1998 
1999     if (SvUTF8(sv)) return;
2000 
2001     from_start = SvPVX_const(sv);
2002     from_end = from_start + SvCUR(sv);
2003     for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2004 	if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
2005     if (from_ptr == from_end) {
2006 	/* Simple case: no character needs to be changed */
2007 	SvUTF8_on(sv);
2008 	return;
2009     }
2010 
2011     len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2012     Newx(to_start, len, char);
2013     Copy(from_start, to_start, from_ptr-from_start, char);
2014     to_ptr = to_start + (from_ptr-from_start);
2015 
2016     Newx(marks, sym_ptr->level+2, const char *);
2017     for (group=sym_ptr; group; group = group->previous)
2018 	marks[group->level] = from_start + group->strbeg;
2019     marks[sym_ptr->level+1] = from_end+1;
2020     for (m = marks; *m < from_ptr; m++)
2021 	*m = to_start + (*m-from_start);
2022 
2023     for (;from_ptr < from_end; from_ptr++) {
2024 	while (*m == from_ptr) *m++ = to_ptr;
2025 	to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2026     }
2027     *to_ptr = 0;
2028 
2029     while (*m == from_ptr) *m++ = to_ptr;
2030     if (m != marks + sym_ptr->level+1) {
2031 	Safefree(marks);
2032 	Safefree(to_start);
2033 	Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2034 		   "level=%d", m, marks, sym_ptr->level);
2035     }
2036     for (group=sym_ptr; group; group = group->previous)
2037 	group->strbeg = marks[group->level] - to_start;
2038     Safefree(marks);
2039 
2040     if (SvOOK(sv)) {
2041 	if (SvIVX(sv)) {
2042 	    SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2043 	    from_start -= SvIVX(sv);
2044 	    SvIV_set(sv, 0);
2045 	}
2046 	SvFLAGS(sv) &= ~SVf_OOK;
2047     }
2048     if (SvLEN(sv) != 0)
2049 	Safefree(from_start);
2050     SvPV_set(sv, to_start);
2051     SvCUR_set(sv, to_ptr - to_start);
2052     SvLEN_set(sv, len);
2053     SvUTF8_on(sv);
2054 }
2055 
2056 /* Exponential string grower. Makes string extension effectively O(n)
2057    needed says how many extra bytes we need (not counting the final '\0')
2058    Only grows the string if there is an actual lack of space
2059 */
2060 STATIC char *
2061 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2062     const STRLEN cur = SvCUR(sv);
2063     const STRLEN len = SvLEN(sv);
2064     STRLEN extend;
2065 
2066     PERL_ARGS_ASSERT_SV_EXP_GROW;
2067 
2068     if (len - cur > needed) return SvPVX(sv);
2069     extend = needed > len ? needed : len;
2070     return SvGROW(sv, len+extend+1);
2071 }
2072 
2073 static SV *
2074 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
2075 {
2076     SvGETMAGIC(sv);
2077     if (UNLIKELY(SvAMAGIC(sv)))
2078 	sv = sv_2num(sv);
2079     if (UNLIKELY(isinfnansv(sv))) {
2080 	const I32 c = TYPE_NO_MODIFIERS(datumtype);
2081 	const NV nv = SvNV_nomg(sv);
2082 	if (c == 'w')
2083 	    Perl_croak(aTHX_ "Cannot compress %"NVgf" in pack", nv);
2084 	else
2085 	    Perl_croak(aTHX_ "Cannot pack %"NVgf" with '%c'", nv, (int) c);
2086     }
2087     return sv;
2088 }
2089 
2090 #define SvIV_no_inf(sv,d) \
2091 	((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2092 #define SvUV_no_inf(sv,d) \
2093 	((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
2094 
2095 STATIC
2096 SV **
2097 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2098 {
2099     tempsym_t lookahead;
2100     I32 items  = endlist - beglist;
2101     bool found = next_symbol(symptr);
2102     bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2103     bool warn_utf8 = ckWARN(WARN_UTF8);
2104     char* from;
2105 
2106     PERL_ARGS_ASSERT_PACK_REC;
2107 
2108     if (symptr->level == 0 && found && symptr->code == 'U') {
2109 	marked_upgrade(aTHX_ cat, symptr);
2110 	symptr->flags |= FLAG_DO_UTF8;
2111 	utf8 = 0;
2112     }
2113     symptr->strbeg = SvCUR(cat);
2114 
2115     while (found) {
2116 	SV *fromstr;
2117 	STRLEN fromlen;
2118 	I32 len;
2119 	SV *lengthcode = NULL;
2120         I32 datumtype = symptr->code;
2121         howlen_t howlen = symptr->howlen;
2122 	char *start = SvPVX(cat);
2123 	char *cur   = start + SvCUR(cat);
2124         bool needs_swap;
2125 
2126 #define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
2127 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
2128 
2129         switch (howlen) {
2130 	  case e_star:
2131 	    len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2132 		0 : items;
2133 	    break;
2134 	  default:
2135 	    /* e_no_len and e_number */
2136 	    len = symptr->length;
2137 	    break;
2138         }
2139 
2140 	if (len) {
2141 	    packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2142 
2143 	    if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2144 		/* We can process this letter. */
2145 		STRLEN size = props & PACK_SIZE_MASK;
2146 		GROWING2(utf8, cat, start, cur, size, (STRLEN)len);
2147 	    }
2148         }
2149 
2150         /* Look ahead for next symbol. Do we have code/code? */
2151         lookahead = *symptr;
2152         found = next_symbol(&lookahead);
2153 	if (symptr->flags & FLAG_SLASH) {
2154 	    IV count;
2155 	    if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2156 	    if (strchr("aAZ", lookahead.code)) {
2157 		if (lookahead.howlen == e_number) count = lookahead.length;
2158 		else {
2159 		    if (items > 0) {
2160 			count = sv_len_utf8(*beglist);
2161 		    }
2162 		    else count = 0;
2163 		    if (lookahead.code == 'Z') count++;
2164 		}
2165 	    } else {
2166 		if (lookahead.howlen == e_number && lookahead.length < items)
2167 		    count = lookahead.length;
2168 		else count = items;
2169 	    }
2170 	    lookahead.howlen = e_number;
2171 	    lookahead.length = count;
2172 	    lengthcode = sv_2mortal(newSViv(count));
2173 	}
2174 
2175         needs_swap = NEEDS_SWAP(datumtype);
2176 
2177 	/* Code inside the switch must take care to properly update
2178 	   cat (CUR length and '\0' termination) if it updated *cur and
2179 	   doesn't simply leave using break */
2180 	switch (TYPE_NO_ENDIANNESS(datumtype)) {
2181 	default:
2182 	    Perl_croak(aTHX_ "Invalid type '%c' in pack",
2183 		       (int) TYPE_NO_MODIFIERS(datumtype));
2184 	case '%':
2185 	    Perl_croak(aTHX_ "'%%' may not be used in pack");
2186 
2187 	case '.' | TYPE_IS_SHRIEKING:
2188 	case '.':
2189 	    if (howlen == e_star) from = start;
2190 	    else if (len == 0) from = cur;
2191 	    else {
2192 		tempsym_t *group = symptr;
2193 
2194 		while (--len && group) group = group->previous;
2195 		from = group ? start + group->strbeg : start;
2196 	    }
2197 	    fromstr = NEXTFROM;
2198 	    len = SvIV_no_inf(fromstr, datumtype);
2199 	    goto resize;
2200 	case '@' | TYPE_IS_SHRIEKING:
2201 	case '@':
2202 	    from = start + symptr->strbeg;
2203 	  resize:
2204 	    if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
2205 		if (len >= 0) {
2206 		    while (len && from < cur) {
2207 			from += UTF8SKIP(from);
2208 			len--;
2209 		    }
2210 		    if (from > cur)
2211 			Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2212 		    if (len) {
2213 			/* Here we know from == cur */
2214 		      grow:
2215 			GROWING(0, cat, start, cur, len);
2216 			Zero(cur, len, char);
2217 			cur += len;
2218 		    } else if (from < cur) {
2219 			len = cur - from;
2220 			goto shrink;
2221 		    } else goto no_change;
2222 		} else {
2223 		    cur = from;
2224 		    len = -len;
2225 		    goto utf8_shrink;
2226 		}
2227 	    else {
2228 		len -= cur - from;
2229 		if (len > 0) goto grow;
2230 		if (len == 0) goto no_change;
2231 		len = -len;
2232 		goto shrink;
2233 	    }
2234 	    break;
2235 
2236 	case '(': {
2237             tempsym_t savsym = *symptr;
2238 	    U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2239 	    symptr->flags |= group_modifiers;
2240             symptr->patend = savsym.grpend;
2241             symptr->level++;
2242 	    symptr->previous = &lookahead;
2243 	    while (len--) {
2244 		U32 was_utf8;
2245 		if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
2246 		else      symptr->flags &= ~FLAG_PARSE_UTF8;
2247 		was_utf8 = SvUTF8(cat);
2248   	        symptr->patptr = savsym.grpbeg;
2249 		beglist = pack_rec(cat, symptr, beglist, endlist);
2250 		if (SvUTF8(cat) != was_utf8)
2251 		    /* This had better be an upgrade while in utf8==0 mode */
2252 		    utf8 = 1;
2253 
2254 		if (savsym.howlen == e_star && beglist == endlist)
2255 		    break;		/* No way to continue */
2256 	    }
2257 	    items = endlist - beglist;
2258 	    lookahead.flags  = symptr->flags & ~group_modifiers;
2259 	    goto no_change;
2260 	}
2261 	case 'X' | TYPE_IS_SHRIEKING:
2262 	    if (!len)			/* Avoid division by 0 */
2263 		len = 1;
2264 	    if (utf8) {
2265 		char *hop, *last;
2266 		I32 l = len;
2267 		hop = last = start;
2268 		while (hop < cur) {
2269 		    hop += UTF8SKIP(hop);
2270 		    if (--l == 0) {
2271 			last = hop;
2272 			l = len;
2273 		    }
2274 		}
2275 		if (last > cur)
2276 		    Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2277 		cur = last;
2278 		break;
2279 	    }
2280 	    len = (cur-start) % len;
2281 	    /* FALLTHROUGH */
2282 	case 'X':
2283 	    if (utf8) {
2284 		if (len < 1) goto no_change;
2285 	      utf8_shrink:
2286 		while (len > 0) {
2287 		    if (cur <= start)
2288 			Perl_croak(aTHX_ "'%c' outside of string in pack",
2289 				   (int) TYPE_NO_MODIFIERS(datumtype));
2290 		    while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2291 			if (cur <= start)
2292 			    Perl_croak(aTHX_ "'%c' outside of string in pack",
2293 				       (int) TYPE_NO_MODIFIERS(datumtype));
2294 		    }
2295 		    len--;
2296 		}
2297 	    } else {
2298 	      shrink:
2299 		if (cur - start < len)
2300 		    Perl_croak(aTHX_ "'%c' outside of string in pack",
2301 			       (int) TYPE_NO_MODIFIERS(datumtype));
2302 		cur -= len;
2303 	    }
2304 	    if (cur < start+symptr->strbeg) {
2305 		/* Make sure group starts don't point into the void */
2306 		tempsym_t *group;
2307 		const STRLEN length = cur-start;
2308 		for (group = symptr;
2309 		     group && length < group->strbeg;
2310 		     group = group->previous) group->strbeg = length;
2311 		lookahead.strbeg = length;
2312 	    }
2313 	    break;
2314 	case 'x' | TYPE_IS_SHRIEKING: {
2315 	    I32 ai32;
2316 	    if (!len)			/* Avoid division by 0 */
2317 		len = 1;
2318 	    if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2319 	    else      ai32 = (cur - start) % len;
2320 	    if (ai32 == 0) goto no_change;
2321 	    len -= ai32;
2322 	}
2323 	/* FALLTHROUGH */
2324 	case 'x':
2325 	    goto grow;
2326 	case 'A':
2327 	case 'Z':
2328 	case 'a': {
2329 	    const char *aptr;
2330 
2331 	    fromstr = NEXTFROM;
2332 	    aptr = SvPV_const(fromstr, fromlen);
2333 	    if (DO_UTF8(fromstr)) {
2334                 const char *end, *s;
2335 
2336 		if (!utf8 && !SvUTF8(cat)) {
2337 		    marked_upgrade(aTHX_ cat, symptr);
2338 		    lookahead.flags |= FLAG_DO_UTF8;
2339 		    lookahead.strbeg = symptr->strbeg;
2340 		    utf8 = 1;
2341 		    start = SvPVX(cat);
2342 		    cur = start + SvCUR(cat);
2343 		}
2344 		if (howlen == e_star) {
2345 		    if (utf8) goto string_copy;
2346 		    len = fromlen+1;
2347 		}
2348 		s = aptr;
2349 		end = aptr + fromlen;
2350 		fromlen = datumtype == 'Z' ? len-1 : len;
2351 		while ((I32) fromlen > 0 && s < end) {
2352 		    s += UTF8SKIP(s);
2353 		    fromlen--;
2354 		}
2355 		if (s > end)
2356 		    Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2357 		if (utf8) {
2358 		    len = fromlen;
2359 		    if (datumtype == 'Z') len++;
2360 		    fromlen = s-aptr;
2361 		    len += fromlen;
2362 
2363 		    goto string_copy;
2364 		}
2365 		fromlen = len - fromlen;
2366 		if (datumtype == 'Z') fromlen--;
2367 		if (howlen == e_star) {
2368 		    len = fromlen;
2369 		    if (datumtype == 'Z') len++;
2370 		}
2371 		GROWING(0, cat, start, cur, len);
2372 		if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2373 				  datumtype | TYPE_IS_PACK))
2374 		    Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2375 			       "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
2376 			       (int)datumtype, aptr, end, cur, (UV)fromlen);
2377 		cur += fromlen;
2378 		len -= fromlen;
2379 	    } else if (utf8) {
2380 		if (howlen == e_star) {
2381 		    len = fromlen;
2382 		    if (datumtype == 'Z') len++;
2383 		}
2384 		if (len <= (I32) fromlen) {
2385 		    fromlen = len;
2386 		    if (datumtype == 'Z' && fromlen > 0) fromlen--;
2387 		}
2388 		/* assumes a byte expands to at most UTF8_EXPAND bytes on
2389 		   upgrade, so:
2390 		   expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2391 		GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2392 		len -= fromlen;
2393 		while (fromlen > 0) {
2394 		    cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2395 		    aptr++;
2396 		    fromlen--;
2397 		}
2398 	    } else {
2399 	      string_copy:
2400 		if (howlen == e_star) {
2401 		    len = fromlen;
2402 		    if (datumtype == 'Z') len++;
2403 		}
2404 		if (len <= (I32) fromlen) {
2405 		    fromlen = len;
2406 		    if (datumtype == 'Z' && fromlen > 0) fromlen--;
2407 		}
2408 		GROWING(0, cat, start, cur, len);
2409 		Copy(aptr, cur, fromlen, char);
2410 		cur += fromlen;
2411 		len -= fromlen;
2412 	    }
2413 	    memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2414 	    cur += len;
2415 	    SvTAINT(cat);
2416 	    break;
2417 	}
2418 	case 'B':
2419 	case 'b': {
2420 	    const char *str, *end;
2421 	    I32 l, field_len;
2422 	    U8 bits;
2423 	    bool utf8_source;
2424 	    U32 utf8_flags;
2425 
2426 	    fromstr = NEXTFROM;
2427 	    str = SvPV_const(fromstr, fromlen);
2428 	    end = str + fromlen;
2429 	    if (DO_UTF8(fromstr)) {
2430 		utf8_source = TRUE;
2431 		utf8_flags  = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2432 	    } else {
2433 		utf8_source = FALSE;
2434 		utf8_flags  = 0; /* Unused, but keep compilers happy */
2435 	    }
2436 	    if (howlen == e_star) len = fromlen;
2437 	    field_len = (len+7)/8;
2438 	    GROWING(utf8, cat, start, cur, field_len);
2439 	    if (len > (I32)fromlen) len = fromlen;
2440 	    bits = 0;
2441 	    l = 0;
2442 	    if (datumtype == 'B')
2443 		while (l++ < len) {
2444 		    if (utf8_source) {
2445 			UV val = 0;
2446 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2447 			bits |= val & 1;
2448 		    } else bits |= *str++ & 1;
2449 		    if (l & 7) bits <<= 1;
2450 		    else {
2451 			PUSH_BYTE(utf8, cur, bits);
2452 			bits = 0;
2453 		    }
2454 		}
2455 	    else
2456 		/* datumtype == 'b' */
2457 		while (l++ < len) {
2458 		    if (utf8_source) {
2459 			UV val = 0;
2460 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2461 			if (val & 1) bits |= 0x80;
2462 		    } else if (*str++ & 1)
2463 			bits |= 0x80;
2464 		    if (l & 7) bits >>= 1;
2465 		    else {
2466 			PUSH_BYTE(utf8, cur, bits);
2467 			bits = 0;
2468 		    }
2469 		}
2470 	    l--;
2471 	    if (l & 7) {
2472 		if (datumtype == 'B')
2473 		    bits <<= 7 - (l & 7);
2474 		else
2475 		    bits >>= 7 - (l & 7);
2476 		PUSH_BYTE(utf8, cur, bits);
2477 		l += 7;
2478 	    }
2479 	    /* Determine how many chars are left in the requested field */
2480 	    l /= 8;
2481 	    if (howlen == e_star) field_len = 0;
2482 	    else field_len -= l;
2483 	    Zero(cur, field_len, char);
2484 	    cur += field_len;
2485 	    break;
2486 	}
2487 	case 'H':
2488 	case 'h': {
2489 	    const char *str, *end;
2490 	    I32 l, field_len;
2491 	    U8 bits;
2492 	    bool utf8_source;
2493 	    U32 utf8_flags;
2494 
2495 	    fromstr = NEXTFROM;
2496 	    str = SvPV_const(fromstr, fromlen);
2497 	    end = str + fromlen;
2498 	    if (DO_UTF8(fromstr)) {
2499 		utf8_source = TRUE;
2500 		utf8_flags  = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2501 	    } else {
2502 		utf8_source = FALSE;
2503 		utf8_flags  = 0; /* Unused, but keep compilers happy */
2504 	    }
2505 	    if (howlen == e_star) len = fromlen;
2506 	    field_len = (len+1)/2;
2507 	    GROWING(utf8, cat, start, cur, field_len);
2508 	    if (!utf8_source && len > (I32)fromlen) len = fromlen;
2509 	    bits = 0;
2510 	    l = 0;
2511 	    if (datumtype == 'H')
2512 		while (l++ < len) {
2513 		    if (utf8_source) {
2514 			UV val = 0;
2515 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2516 			if (val < 256 && isALPHA(val))
2517 			    bits |= (val + 9) & 0xf;
2518 			else
2519 			    bits |= val & 0xf;
2520 		    } else if (isALPHA(*str))
2521 			bits |= (*str++ + 9) & 0xf;
2522 		    else
2523 			bits |= *str++ & 0xf;
2524 		    if (l & 1) bits <<= 4;
2525 		    else {
2526 			PUSH_BYTE(utf8, cur, bits);
2527 			bits = 0;
2528 		    }
2529 		}
2530 	    else
2531 		while (l++ < len) {
2532 		    if (utf8_source) {
2533 			UV val = 0;
2534 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2535 			if (val < 256 && isALPHA(val))
2536 			    bits |= ((val + 9) & 0xf) << 4;
2537 			else
2538 			    bits |= (val & 0xf) << 4;
2539 		    } else if (isALPHA(*str))
2540 			bits |= ((*str++ + 9) & 0xf) << 4;
2541 		    else
2542 			bits |= (*str++ & 0xf) << 4;
2543 		    if (l & 1) bits >>= 4;
2544 		    else {
2545 			PUSH_BYTE(utf8, cur, bits);
2546 			bits = 0;
2547 		    }
2548 		}
2549 	    l--;
2550 	    if (l & 1) {
2551 		PUSH_BYTE(utf8, cur, bits);
2552 		l++;
2553 	    }
2554 	    /* Determine how many chars are left in the requested field */
2555 	    l /= 2;
2556 	    if (howlen == e_star) field_len = 0;
2557 	    else field_len -= l;
2558 	    Zero(cur, field_len, char);
2559 	    cur += field_len;
2560 	    break;
2561 	}
2562 	case 'c':
2563 	    while (len-- > 0) {
2564 		IV aiv;
2565 		fromstr = NEXTFROM;
2566                 aiv = SvIV_no_inf(fromstr, datumtype);
2567 		if ((-128 > aiv || aiv > 127))
2568 		    Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2569 				   "Character in 'c' format wrapped in pack");
2570 		PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2571 	    }
2572 	    break;
2573 	case 'C':
2574 	    if (len == 0) {
2575 		utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2576 		break;
2577 	    }
2578 	    while (len-- > 0) {
2579 		IV aiv;
2580 		fromstr = NEXTFROM;
2581                 aiv = SvIV_no_inf(fromstr, datumtype);
2582 		if ((0 > aiv || aiv > 0xff))
2583 		    Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2584 				   "Character in 'C' format wrapped in pack");
2585 		PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2586 	    }
2587 	    break;
2588 	case 'W': {
2589 	    char *end;
2590 	    U8 in_bytes = (U8)IN_BYTES;
2591 
2592 	    end = start+SvLEN(cat)-1;
2593 	    if (utf8) end -= UTF8_MAXLEN-1;
2594 	    while (len-- > 0) {
2595 		UV auv;
2596 		fromstr = NEXTFROM;
2597 		auv = SvUV_no_inf(fromstr, datumtype);
2598 		if (in_bytes) auv = auv % 0x100;
2599 		if (utf8) {
2600 		  W_utf8:
2601 		    if (cur > end) {
2602 			*cur = '\0';
2603 			SvCUR_set(cat, cur - start);
2604 
2605 			GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2606 			end = start+SvLEN(cat)-UTF8_MAXLEN;
2607 		    }
2608 		    cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2609 						       auv,
2610 						       warn_utf8 ?
2611 						       0 : UNICODE_ALLOW_ANY);
2612 		} else {
2613 		    if (auv >= 0x100) {
2614 			if (!SvUTF8(cat)) {
2615 			    *cur = '\0';
2616 			    SvCUR_set(cat, cur - start);
2617 			    marked_upgrade(aTHX_ cat, symptr);
2618 			    lookahead.flags |= FLAG_DO_UTF8;
2619 			    lookahead.strbeg = symptr->strbeg;
2620 			    utf8 = 1;
2621 			    start = SvPVX(cat);
2622 			    cur = start + SvCUR(cat);
2623 			    end = start+SvLEN(cat)-UTF8_MAXLEN;
2624 			    goto W_utf8;
2625 			}
2626 			Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2627 				       "Character in 'W' format wrapped in pack");
2628 			auv &= 0xff;
2629 		    }
2630 		    if (cur >= end) {
2631 			*cur = '\0';
2632 			SvCUR_set(cat, cur - start);
2633 			GROWING(0, cat, start, cur, len+1);
2634 			end = start+SvLEN(cat)-1;
2635 		    }
2636 		    *(U8 *) cur++ = (U8)auv;
2637 		}
2638 	    }
2639 	    break;
2640 	}
2641 	case 'U': {
2642 	    char *end;
2643 
2644 	    if (len == 0) {
2645 		if (!(symptr->flags & FLAG_DO_UTF8)) {
2646 		    marked_upgrade(aTHX_ cat, symptr);
2647 		    lookahead.flags |= FLAG_DO_UTF8;
2648 		    lookahead.strbeg = symptr->strbeg;
2649 		}
2650 		utf8 = 0;
2651 		goto no_change;
2652 	    }
2653 
2654 	    end = start+SvLEN(cat);
2655 	    if (!utf8) end -= UTF8_MAXLEN;
2656 	    while (len-- > 0) {
2657 		UV auv;
2658 		fromstr = NEXTFROM;
2659 		auv = SvUV_no_inf(fromstr, datumtype);
2660 		if (utf8) {
2661 		    U8 buffer[UTF8_MAXLEN], *endb;
2662 		    endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv),
2663 					       warn_utf8 ?
2664 					       0 : UNICODE_ALLOW_ANY);
2665 		    if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2666 			*cur = '\0';
2667 			SvCUR_set(cat, cur - start);
2668 			GROWING(0, cat, start, cur,
2669 				len+(endb-buffer)*UTF8_EXPAND);
2670 			end = start+SvLEN(cat);
2671 		    }
2672                     cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
2673 		} else {
2674 		    if (cur >= end) {
2675 			*cur = '\0';
2676 			SvCUR_set(cat, cur - start);
2677 			GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2678 			end = start+SvLEN(cat)-UTF8_MAXLEN;
2679 		    }
2680 		    cur = (char *) uvchr_to_utf8_flags((U8 *) cur, UNI_TO_NATIVE(auv),
2681 						       warn_utf8 ?
2682 						       0 : UNICODE_ALLOW_ANY);
2683 		}
2684 	    }
2685 	    break;
2686 	}
2687 	/* Float and double added by gnb@melba.bby.oz.au  22/11/89 */
2688 	case 'f':
2689 	    while (len-- > 0) {
2690 		float afloat;
2691 		NV anv;
2692 		fromstr = NEXTFROM;
2693 		anv = SvNV(fromstr);
2694 # if defined(VMS) && !defined(_IEEE_FP)
2695 		/* IEEE fp overflow shenanigans are unavailable on VAX and optional
2696 		 * on Alpha; fake it if we don't have them.
2697 		 */
2698 		if (anv > FLT_MAX)
2699 		    afloat = FLT_MAX;
2700 		else if (anv < -FLT_MAX)
2701 		    afloat = -FLT_MAX;
2702 		else afloat = (float)anv;
2703 # else
2704 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2705 		if(Perl_isnan(anv))
2706 		    afloat = (float)NV_NAN;
2707 		else
2708 #endif
2709                 /* a simple cast to float is undefined if outside
2710                  * the range of values that can be represented */
2711 		afloat = (float)(anv >  FLT_MAX ?  NV_INF :
2712                                  anv < -FLT_MAX ? -NV_INF : anv);
2713 # endif
2714                 PUSH_VAR(utf8, cur, afloat, needs_swap);
2715 	    }
2716 	    break;
2717 	case 'd':
2718 	    while (len-- > 0) {
2719 		double adouble;
2720 		NV anv;
2721 		fromstr = NEXTFROM;
2722 		anv = SvNV(fromstr);
2723 # if defined(VMS) && !defined(_IEEE_FP)
2724 		/* IEEE fp overflow shenanigans are unavailable on VAX and optional
2725 		 * on Alpha; fake it if we don't have them.
2726 		 */
2727 		if (anv > DBL_MAX)
2728 		    adouble = DBL_MAX;
2729 		else if (anv < -DBL_MAX)
2730 		    adouble = -DBL_MAX;
2731 		else adouble = (double)anv;
2732 # else
2733 		adouble = (double)anv;
2734 # endif
2735                 PUSH_VAR(utf8, cur, adouble, needs_swap);
2736 	    }
2737 	    break;
2738 	case 'F': {
2739 	    NV_bytes anv;
2740 	    Zero(&anv, 1, NV); /* can be long double with unused bits */
2741 	    while (len-- > 0) {
2742 		fromstr = NEXTFROM;
2743 #ifdef __GNUC__
2744 		/* to work round a gcc/x86 bug; don't use SvNV */
2745 		anv.nv = sv_2nv(fromstr);
2746 #    if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2747          && LONG_DOUBLESIZE > 10
2748                 /* GCC sometimes overwrites the padding in the
2749                    assignment above */
2750                 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2751 #    endif
2752 #else
2753 		anv.nv = SvNV(fromstr);
2754 #endif
2755                 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2756 	    }
2757 	    break;
2758 	}
2759 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2760 	case 'D': {
2761 	    ld_bytes aldouble;
2762 	    /* long doubles can have unused bits, which may be nonzero */
2763 	    Zero(&aldouble, 1, long double);
2764 	    while (len-- > 0) {
2765 		fromstr = NEXTFROM;
2766 #  ifdef __GNUC__
2767 		/* to work round a gcc/x86 bug; don't use SvNV */
2768 		aldouble.ld = (long double)sv_2nv(fromstr);
2769 #    if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2770                 /* GCC sometimes overwrites the padding in the
2771                    assignment above */
2772                 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2773 #    endif
2774 #  else
2775 		aldouble.ld = (long double)SvNV(fromstr);
2776 #  endif
2777                 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2778                            needs_swap);
2779 	    }
2780 	    break;
2781 	}
2782 #endif
2783 	case 'n' | TYPE_IS_SHRIEKING:
2784 	case 'n':
2785 	    while (len-- > 0) {
2786 		I16 ai16;
2787 		fromstr = NEXTFROM;
2788 		ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2789 		ai16 = PerlSock_htons(ai16);
2790                 PUSH16(utf8, cur, &ai16, FALSE);
2791 	    }
2792 	    break;
2793 	case 'v' | TYPE_IS_SHRIEKING:
2794 	case 'v':
2795 	    while (len-- > 0) {
2796 		I16 ai16;
2797 		fromstr = NEXTFROM;
2798 		ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2799 		ai16 = htovs(ai16);
2800                 PUSH16(utf8, cur, &ai16, FALSE);
2801 	    }
2802 	    break;
2803         case 'S' | TYPE_IS_SHRIEKING:
2804 #if SHORTSIZE != SIZE16
2805 	    while (len-- > 0) {
2806 		unsigned short aushort;
2807 		fromstr = NEXTFROM;
2808 		aushort = SvUV_no_inf(fromstr, datumtype);
2809                 PUSH_VAR(utf8, cur, aushort, needs_swap);
2810 	    }
2811             break;
2812 #else
2813             /* FALLTHROUGH */
2814 #endif
2815 	case 'S':
2816 	    while (len-- > 0) {
2817 		U16 au16;
2818 		fromstr = NEXTFROM;
2819 		au16 = (U16)SvUV_no_inf(fromstr, datumtype);
2820                 PUSH16(utf8, cur, &au16, needs_swap);
2821 	    }
2822 	    break;
2823 	case 's' | TYPE_IS_SHRIEKING:
2824 #if SHORTSIZE != SIZE16
2825 	    while (len-- > 0) {
2826 		short ashort;
2827 		fromstr = NEXTFROM;
2828 		ashort = SvIV_no_inf(fromstr, datumtype);
2829                 PUSH_VAR(utf8, cur, ashort, needs_swap);
2830 	    }
2831             break;
2832 #else
2833             /* FALLTHROUGH */
2834 #endif
2835 	case 's':
2836 	    while (len-- > 0) {
2837 		I16 ai16;
2838 		fromstr = NEXTFROM;
2839 		ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2840                 PUSH16(utf8, cur, &ai16, needs_swap);
2841 	    }
2842 	    break;
2843 	case 'I':
2844 	case 'I' | TYPE_IS_SHRIEKING:
2845 	    while (len-- > 0) {
2846 		unsigned int auint;
2847 		fromstr = NEXTFROM;
2848 		auint = SvUV_no_inf(fromstr, datumtype);
2849                 PUSH_VAR(utf8, cur, auint, needs_swap);
2850 	    }
2851 	    break;
2852 	case 'j':
2853 	    while (len-- > 0) {
2854 		IV aiv;
2855 		fromstr = NEXTFROM;
2856 		aiv = SvIV_no_inf(fromstr, datumtype);
2857                 PUSH_VAR(utf8, cur, aiv, needs_swap);
2858 	    }
2859 	    break;
2860 	case 'J':
2861 	    while (len-- > 0) {
2862 		UV auv;
2863 		fromstr = NEXTFROM;
2864 		auv = SvUV_no_inf(fromstr, datumtype);
2865                 PUSH_VAR(utf8, cur, auv, needs_swap);
2866 	    }
2867 	    break;
2868 	case 'w':
2869             while (len-- > 0) {
2870 		NV anv;
2871 		fromstr = NEXTFROM;
2872 		S_sv_check_infnan(aTHX_ fromstr, datumtype);
2873 		anv = SvNV_nomg(fromstr);
2874 
2875 		if (anv < 0) {
2876 		    *cur = '\0';
2877 		    SvCUR_set(cat, cur - start);
2878 		    Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2879 		}
2880 
2881                 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2882                    which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2883                    any negative IVs will have already been got by the croak()
2884                    above. IOK is untrue for fractions, so we test them
2885                    against UV_MAX_P1.  */
2886 		if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2887 		    char   buf[(sizeof(UV)*CHAR_BIT)/7+1];
2888 		    char  *in = buf + sizeof(buf);
2889 		    UV     auv = SvUV_nomg(fromstr);
2890 
2891 		    do {
2892 			*--in = (char)((auv & 0x7f) | 0x80);
2893 			auv >>= 7;
2894 		    } while (auv);
2895 		    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2896 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
2897 				       in, (buf + sizeof(buf)) - in);
2898 		} else if (SvPOKp(fromstr))
2899 		    goto w_string;
2900 		else if (SvNOKp(fromstr)) {
2901 		    /* 10**NV_MAX_10_EXP is the largest power of 10
2902 		       so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2903 		       given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2904 		       x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2905 		       And with that many bytes only Inf can overflow.
2906 		       Some C compilers are strict about integral constant
2907 		       expressions so we conservatively divide by a slightly
2908 		       smaller integer instead of multiplying by the exact
2909 		       floating-point value.
2910 		    */
2911 #ifdef NV_MAX_10_EXP
2912 		    /* char   buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2913 		    char   buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2914 #else
2915 		    /* char   buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2916 		    char   buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2917 #endif
2918 		    char  *in = buf + sizeof(buf);
2919 
2920 		    anv = Perl_floor(anv);
2921 		    do {
2922 			const NV next = Perl_floor(anv / 128);
2923 			if (in <= buf)  /* this cannot happen ;-) */
2924 			    Perl_croak(aTHX_ "Cannot compress integer in pack");
2925 			*--in = (unsigned char)(anv - (next * 128)) | 0x80;
2926 			anv = next;
2927 		    } while (anv > 0);
2928 		    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2929 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
2930 				       in, (buf + sizeof(buf)) - in);
2931 		} else {
2932 		    const char     *from;
2933 		    char           *result, *in;
2934 		    SV             *norm;
2935 		    STRLEN          len;
2936 		    bool            done;
2937 
2938 		  w_string:
2939 		    /* Copy string and check for compliance */
2940 		    from = SvPV_nomg_const(fromstr, len);
2941 		    if ((norm = is_an_int(from, len)) == NULL)
2942 			Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2943 
2944 		    Newx(result, len, char);
2945 		    in = result + len;
2946 		    done = FALSE;
2947 		    while (!done) *--in = div128(norm, &done) | 0x80;
2948 		    result[len - 1] &= 0x7F; /* clear continue bit */
2949 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
2950 				       in, (result + len) - in);
2951 		    Safefree(result);
2952 		    SvREFCNT_dec(norm);	/* free norm */
2953 		}
2954 	    }
2955             break;
2956 	case 'i':
2957 	case 'i' | TYPE_IS_SHRIEKING:
2958 	    while (len-- > 0) {
2959 		int aint;
2960 		fromstr = NEXTFROM;
2961 		aint = SvIV_no_inf(fromstr, datumtype);
2962                 PUSH_VAR(utf8, cur, aint, needs_swap);
2963 	    }
2964 	    break;
2965 	case 'N' | TYPE_IS_SHRIEKING:
2966 	case 'N':
2967 	    while (len-- > 0) {
2968 		U32 au32;
2969 		fromstr = NEXTFROM;
2970 		au32 = SvUV_no_inf(fromstr, datumtype);
2971 		au32 = PerlSock_htonl(au32);
2972                 PUSH32(utf8, cur, &au32, FALSE);
2973 	    }
2974 	    break;
2975 	case 'V' | TYPE_IS_SHRIEKING:
2976 	case 'V':
2977 	    while (len-- > 0) {
2978 		U32 au32;
2979 		fromstr = NEXTFROM;
2980 		au32 = SvUV_no_inf(fromstr, datumtype);
2981 		au32 = htovl(au32);
2982                 PUSH32(utf8, cur, &au32, FALSE);
2983 	    }
2984 	    break;
2985 	case 'L' | TYPE_IS_SHRIEKING:
2986 #if LONGSIZE != SIZE32
2987 	    while (len-- > 0) {
2988 		unsigned long aulong;
2989 		fromstr = NEXTFROM;
2990 		aulong = SvUV_no_inf(fromstr, datumtype);
2991                 PUSH_VAR(utf8, cur, aulong, needs_swap);
2992 	    }
2993 	    break;
2994 #else
2995             /* Fall though! */
2996 #endif
2997 	case 'L':
2998 	    while (len-- > 0) {
2999 		U32 au32;
3000 		fromstr = NEXTFROM;
3001 		au32 = SvUV_no_inf(fromstr, datumtype);
3002                 PUSH32(utf8, cur, &au32, needs_swap);
3003 	    }
3004 	    break;
3005 	case 'l' | TYPE_IS_SHRIEKING:
3006 #if LONGSIZE != SIZE32
3007 	    while (len-- > 0) {
3008 		long along;
3009 		fromstr = NEXTFROM;
3010 		along = SvIV_no_inf(fromstr, datumtype);
3011                 PUSH_VAR(utf8, cur, along, needs_swap);
3012 	    }
3013 	    break;
3014 #else
3015             /* Fall though! */
3016 #endif
3017 	case 'l':
3018             while (len-- > 0) {
3019 		I32 ai32;
3020 		fromstr = NEXTFROM;
3021 		ai32 = SvIV_no_inf(fromstr, datumtype);
3022                 PUSH32(utf8, cur, &ai32, needs_swap);
3023 	    }
3024 	    break;
3025 #if defined(HAS_QUAD) && IVSIZE >= 8
3026 	case 'Q':
3027 	    while (len-- > 0) {
3028 		Uquad_t auquad;
3029 		fromstr = NEXTFROM;
3030 		auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3031                 PUSH_VAR(utf8, cur, auquad, needs_swap);
3032 	    }
3033 	    break;
3034 	case 'q':
3035 	    while (len-- > 0) {
3036 		Quad_t aquad;
3037 		fromstr = NEXTFROM;
3038 		aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3039                 PUSH_VAR(utf8, cur, aquad, needs_swap);
3040 	    }
3041 	    break;
3042 #endif
3043 	case 'P':
3044 	    len = 1;		/* assume SV is correct length */
3045 	    GROWING(utf8, cat, start, cur, sizeof(char *));
3046 	    /* FALLTHROUGH */
3047 	case 'p':
3048 	    while (len-- > 0) {
3049 		const char *aptr;
3050 
3051 		fromstr = NEXTFROM;
3052 		SvGETMAGIC(fromstr);
3053 		if (!SvOK(fromstr)) aptr = NULL;
3054 		else {
3055 		    /* XXX better yet, could spirit away the string to
3056 		     * a safe spot and hang on to it until the result
3057 		     * of pack() (and all copies of the result) are
3058 		     * gone.
3059 		     */
3060 		    if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3061 			     !SvREADONLY(fromstr)))) {
3062 			Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3063 				       "Attempt to pack pointer to temporary value");
3064 		    }
3065 		    if (SvPOK(fromstr) || SvNIOK(fromstr))
3066 			aptr = SvPV_nomg_const_nolen(fromstr);
3067 		    else
3068 			aptr = SvPV_force_flags_nolen(fromstr, 0);
3069 		}
3070                 PUSH_VAR(utf8, cur, aptr, needs_swap);
3071 	    }
3072 	    break;
3073 	case 'u': {
3074 	    const char *aptr, *aend;
3075 	    bool from_utf8;
3076 
3077 	    fromstr = NEXTFROM;
3078 	    if (len <= 2) len = 45;
3079 	    else len = len / 3 * 3;
3080 	    if (len >= 64) {
3081 		Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3082 			       "Field too wide in 'u' format in pack");
3083 		len = 63;
3084 	    }
3085 	    aptr = SvPV_const(fromstr, fromlen);
3086 	    from_utf8 = DO_UTF8(fromstr);
3087 	    if (from_utf8) {
3088 		aend = aptr + fromlen;
3089 		fromlen = sv_len_utf8_nomg(fromstr);
3090 	    } else aend = NULL; /* Unused, but keep compilers happy */
3091 	    GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3092 	    while (fromlen > 0) {
3093 		U8 *end;
3094 		I32 todo;
3095 		U8 hunk[1+63/3*4+1];
3096 
3097 		if ((I32)fromlen > len)
3098 		    todo = len;
3099 		else
3100 		    todo = fromlen;
3101 		if (from_utf8) {
3102 		    char buffer[64];
3103 		    if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3104 				      'u' | TYPE_IS_PACK)) {
3105 			*cur = '\0';
3106 			SvCUR_set(cat, cur - start);
3107 			Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3108 				   "aptr=%p, aend=%p, buffer=%p, todo=%ld",
3109 				   aptr, aend, buffer, (long) todo);
3110 		    }
3111 		    end = doencodes(hunk, (const U8 *)buffer, todo);
3112 		} else {
3113 		    end = doencodes(hunk, (const U8 *)aptr, todo);
3114 		    aptr += todo;
3115 		}
3116 		PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3117 		fromlen -= todo;
3118 	    }
3119 	    break;
3120 	}
3121 	}
3122 	*cur = '\0';
3123 	SvCUR_set(cat, cur - start);
3124       no_change:
3125 	*symptr = lookahead;
3126     }
3127     return beglist;
3128 }
3129 #undef NEXTFROM
3130 
3131 
3132 PP(pp_pack)
3133 {
3134     dSP; dMARK; dORIGMARK; dTARGET;
3135     SV *cat = TARG;
3136     STRLEN fromlen;
3137     SV *pat_sv = *++MARK;
3138     const char *pat = SvPV_const(pat_sv, fromlen);
3139     const char *patend = pat + fromlen;
3140 
3141     MARK++;
3142     sv_setpvs(cat, "");
3143     SvUTF8_off(cat);
3144 
3145     packlist(cat, pat, patend, MARK, SP + 1);
3146 
3147     SvSETMAGIC(cat);
3148     SP = ORIGMARK;
3149     PUSHs(cat);
3150     RETURN;
3151 }
3152 
3153 /*
3154  * ex: set ts=8 sts=4 sw=4 et:
3155  */
3156