xref: /openbsd-src/gnu/usr.bin/perl/pp_pack.c (revision a0747c9f67a4ae71ccb71e62a28d1ea19e06a63c)
1 /*    pp_pack.c
2  *
3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  * He still hopefully carried some of his gear in his pack: a small tinder-box,
13  * two small shallow pans, the smaller fitting into the larger; inside them a
14  * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15  * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16  * some salt.
17  *
18  *     [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
19  */
20 
21 /* This file contains pp ("push/pop") functions that
22  * execute the opcodes that make up a perl program. A typical pp function
23  * expects to find its arguments on the stack, and usually pushes its
24  * results onto the stack, hence the 'pp' terminology. Each OP structure
25  * contains a pointer to the relevant pp_foo() function.
26  *
27  * This particular file just contains pp_pack() and pp_unpack(). See the
28  * other pp*.c files for the rest of the pp_ functions.
29  */
30 
31 #include "EXTERN.h"
32 #define PERL_IN_PP_PACK_C
33 #include "perl.h"
34 
35 /* Types used by pack/unpack */
36 typedef enum {
37   e_no_len,     /* no length  */
38   e_number,     /* number, [] */
39   e_star        /* asterisk   */
40 } howlen_t;
41 
42 typedef struct tempsym {
43   const char*    patptr;   /* current template char */
44   const char*    patend;   /* one after last char   */
45   const char*    grpbeg;   /* 1st char of ()-group  */
46   const char*    grpend;   /* end of ()-group       */
47   I32      code;     /* template code (!<>)   */
48   U32      flags;    /* /=4, comma=2, pack=1  */
49                      /*   and group modifiers */
50   SSize_t  length;   /* length/repeat count   */
51   howlen_t howlen;   /* how length is given   */
52   int      level;    /* () nesting level      */
53   STRLEN   strbeg;   /* offset of group start */
54   struct tempsym *previous; /* previous group */
55 } tempsym_t;
56 
57 #define TEMPSYM_INIT(symptr, p, e, f) \
58     STMT_START {	\
59 	(symptr)->patptr   = (p);	\
60 	(symptr)->patend   = (e);	\
61 	(symptr)->grpbeg   = NULL;	\
62 	(symptr)->grpend   = NULL;	\
63 	(symptr)->grpend   = NULL;	\
64 	(symptr)->code     = 0;		\
65 	(symptr)->length   = 0;		\
66 	(symptr)->howlen   = e_no_len;	\
67 	(symptr)->level    = 0;		\
68 	(symptr)->flags    = (f);	\
69 	(symptr)->strbeg   = 0;		\
70 	(symptr)->previous = NULL;	\
71    } STMT_END
72 
73 typedef union {
74     NV nv;
75     U8 bytes[sizeof(NV)];
76 } NV_bytes;
77 
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79 typedef union {
80     long double ld;
81     U8 bytes[sizeof(long double)];
82 } ld_bytes;
83 #endif
84 
85 #ifndef CHAR_BIT
86 # define CHAR_BIT	8
87 #endif
88 /* Maximum number of bytes to which a byte can grow due to upgrade */
89 #define UTF8_EXPAND	2
90 
91 /*
92  * Offset for integer pack/unpack.
93  *
94  * On architectures where I16 and I32 aren't really 16 and 32 bits,
95  * which for now are all Crays, pack and unpack have to play games.
96  */
97 
98 /*
99  * These values are required for portability of pack() output.
100  * If they're not right on your machine, then pack() and unpack()
101  * wouldn't work right anyway; you'll need to apply the Cray hack.
102  * (I'd like to check them with #if, but you can't use sizeof() in
103  * the preprocessor.)  --???
104  */
105 /*
106     The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107     defines are now in config.h.  --Andy Dougherty  April 1998
108  */
109 #define SIZE16 2
110 #define SIZE32 4
111 
112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113    --jhi Feb 1999 */
114 
115 #if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
116 #  define OFF16(p)     ((char *) (p))
117 #  define OFF32(p)     ((char *) (p))
118 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678    /* little-endian */
119 #  define OFF16(p)	((char*)(p))
120 #  define OFF32(p)	((char*)(p))
121 #elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321  /* big-endian */
122 #  define OFF16(p)	((char*)(p) + (sizeof(U16) - SIZE16))
123 #  define OFF32(p)	((char*)(p) + (sizeof(U32) - SIZE32))
124 #else
125 #  error "bad cray byte order"
126 #endif
127 
128 #define PUSH16(utf8, cur, p, needs_swap)                        \
129        PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
130 #define PUSH32(utf8, cur, p, needs_swap)                        \
131        PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
132 
133 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321  /* big-endian */
134 #  define NEEDS_SWAP(d)     (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
135 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678  /* little-endian */
136 #  define NEEDS_SWAP(d)     (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
137 #else
138 #  error "Unsupported byteorder"
139         /* Need to add code here to re-instate mixed endian support.
140            NEEDS_SWAP would need to hold a flag indicating which action to
141            take, and S_reverse_copy and the code in S_utf8_to_bytes would need
142            logic adding to deal with any mixed-endian transformations needed.
143         */
144 #endif
145 
146 /* Only to be used inside a loop (see the break) */
147 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap)	\
148 STMT_START {						\
149     if (UNLIKELY(utf8)) {                               \
150         if (!S_utf8_to_bytes(aTHX_ &s, strend,		\
151 	  (char *) (buf), len, datumtype)) break;	\
152     } else {						\
153         if (UNLIKELY(needs_swap))                       \
154             S_reverse_copy(s, (char *) (buf), len);     \
155         else                                            \
156             Copy(s, (char *) (buf), len, char);		\
157         s += len;					\
158     }							\
159 } STMT_END
160 
161 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap)              \
162        SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
163 
164 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap)              \
165        SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
166 
167 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap)          \
168        SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
169 
170 #define PUSH_VAR(utf8, aptr, var, needs_swap)           \
171        PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
172 
173 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174 #define MAX_SUB_TEMPLATE_LEVEL 100
175 
176 /* flags (note that type modifiers can also be used as flags!) */
177 #define FLAG_WAS_UTF8	      0x40
178 #define FLAG_PARSE_UTF8       0x20	/* Parse as utf8 */
179 #define FLAG_UNPACK_ONLY_ONE  0x10
180 #define FLAG_DO_UTF8          0x08	/* The underlying string is utf8 */
181 #define FLAG_SLASH            0x04
182 #define FLAG_COMMA            0x02
183 #define FLAG_PACK             0x01
184 
185 STATIC SV *
186 S_mul128(pTHX_ SV *sv, U8 m)
187 {
188   STRLEN          len;
189   char           *s = SvPV(sv, len);
190   char           *t;
191 
192   PERL_ARGS_ASSERT_MUL128;
193 
194   if (! memBEGINs(s, len, "0000")) {  /* need to grow sv */
195     SV * const tmpNew = newSVpvs("0000000000");
196 
197     sv_catsv(tmpNew, sv);
198     SvREFCNT_dec(sv);		/* free old sv */
199     sv = tmpNew;
200     s = SvPV(sv, len);
201   }
202   t = s + len - 1;
203   while (!*t)                   /* trailing '\0'? */
204     t--;
205   while (t > s) {
206     const U32 i = ((*t - '0') << 7) + m;
207     *(t--) = '0' + (char)(i % 10);
208     m = (char)(i / 10);
209   }
210   return (sv);
211 }
212 
213 /* Explosives and implosives. */
214 
215 #define ISUUCHAR(ch)    inRANGE(NATIVE_TO_LATIN1(ch),               \
216                                 NATIVE_TO_LATIN1(' '),              \
217                                 NATIVE_TO_LATIN1('a') - 1)
218 
219 /* type modifiers */
220 #define TYPE_IS_SHRIEKING	0x100
221 #define TYPE_IS_BIG_ENDIAN	0x200
222 #define TYPE_IS_LITTLE_ENDIAN	0x400
223 #define TYPE_IS_PACK		0x800
224 #define TYPE_ENDIANNESS_MASK	(TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
225 #define TYPE_MODIFIERS(t)	((t) & ~0xFF)
226 #define TYPE_NO_MODIFIERS(t)	((t) & 0xFF)
227 
228 # define TYPE_ENDIANNESS(t)	((t) & TYPE_ENDIANNESS_MASK)
229 # define TYPE_NO_ENDIANNESS(t)	((t) & ~TYPE_ENDIANNESS_MASK)
230 
231 # define ENDIANNESS_ALLOWED_TYPES   "sSiIlLqQjJfFdDpP("
232 
233 #define PACK_SIZE_CANNOT_CSUM		0x80
234 #define PACK_SIZE_UNPREDICTABLE		0x40	/* Not a fixed size element */
235 #define PACK_SIZE_MASK			0x3F
236 
237 #include "packsizetables.inc"
238 
239 static void
240 S_reverse_copy(const char *src, char *dest, STRLEN len)
241 {
242     dest += len;
243     while (len--)
244         *--dest = *src++;
245 }
246 
247 STATIC U8
248 utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
249 {
250     STRLEN retlen;
251     UV val;
252 
253     if (*s >= end) {
254 	goto croak;
255     }
256     val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
257 			 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
258     if (retlen == (STRLEN) -1)
259       croak:
260 	Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
261 		   (int) TYPE_NO_MODIFIERS(datumtype));
262     if (val >= 0x100) {
263 	Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
264 		       "Character in '%c' format wrapped in unpack",
265 		       (int) TYPE_NO_MODIFIERS(datumtype));
266 	val &= 0xff;
267     }
268     *s += retlen;
269     return (U8)val;
270 }
271 
272 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
273 	utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
274 	*(U8 *)(s)++)
275 
276 STATIC bool
277 S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, SSize_t buf_len, I32 datumtype)
278 {
279     UV val;
280     STRLEN retlen;
281     const char *from = *s;
282     int bad = 0;
283     const U32 flags = ckWARN(WARN_UTF8) ?
284 	UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
285     const bool needs_swap = NEEDS_SWAP(datumtype);
286 
287     if (UNLIKELY(needs_swap))
288         buf += buf_len;
289 
290     for (;buf_len > 0; buf_len--) {
291 	if (from >= end) return FALSE;
292 	val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
293 	if (retlen == (STRLEN) -1) {
294 	    from += UTF8_SAFE_SKIP(from, end);
295 	    bad |= 1;
296 	} else from += retlen;
297 	if (val >= 0x100) {
298 	    bad |= 2;
299 	    val &= 0xff;
300 	}
301         if (UNLIKELY(needs_swap))
302             *(U8 *)--buf = (U8)val;
303         else
304             *(U8 *)buf++ = (U8)val;
305     }
306     /* We have enough characters for the buffer. Did we have problems ? */
307     if (bad) {
308 	if (bad & 1) {
309 	    /* Rewalk the string fragment while warning */
310 	    const char *ptr;
311 	    const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
312 	    for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
313 		if (ptr >= end) break;
314 		utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
315 	    }
316 	    if (from > end) from = end;
317 	}
318 	if ((bad & 2))
319 	    Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
320 				       WARN_PACK : WARN_UNPACK),
321 			   "Character(s) in '%c' format wrapped in %s",
322 			   (int) TYPE_NO_MODIFIERS(datumtype),
323 			   datumtype & TYPE_IS_PACK ? "pack" : "unpack");
324     }
325     *s = from;
326     return TRUE;
327 }
328 
329 STATIC char *
330 S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
331     PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
332 
333     if (UNLIKELY(needs_swap)) {
334         const U8 *p = start + len;
335         while (p-- > start) {
336             append_utf8_from_native_byte(*p, (U8 **) & dest);
337         }
338     } else {
339         const U8 * const end = start + len;
340         while (start < end) {
341             append_utf8_from_native_byte(*start, (U8 **) & dest);
342             start++;
343         }
344     }
345     return dest;
346 }
347 
348 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap)             \
349 STMT_START {							\
350     if (UNLIKELY(utf8))	                                        \
351 	(cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap);       \
352     else {							\
353         if (UNLIKELY(needs_swap))                               \
354             S_reverse_copy((char *)(buf), cur, len);            \
355         else                                                    \
356             Copy(buf, cur, len, char);				\
357 	(cur) += (len);						\
358     }								\
359 } STMT_END
360 
361 #define SAFE_UTF8_EXPAND(var)	\
362 STMT_START {				\
363     if ((var) > SSize_t_MAX / UTF8_EXPAND) \
364         Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
365     (var) = (var) * UTF8_EXPAND; \
366 } STMT_END
367 
368 #define GROWING2(utf8, cat, start, cur, item_size, item_count)	\
369 STMT_START {							\
370     if (SSize_t_MAX / (item_size) < (item_count))		\
371         Perl_croak(aTHX_ "%s", "Out of memory during pack()");	\
372     GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \
373 } STMT_END
374 
375 #define GROWING(utf8, cat, start, cur, in_len)	\
376 STMT_START {					\
377     STRLEN glen = (in_len);			\
378     STRLEN catcur = (STRLEN)((cur) - (start));	\
379     if (utf8) SAFE_UTF8_EXPAND(glen);		\
380     if (SSize_t_MAX - glen < catcur)		\
381         Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
382     if (catcur + glen >= SvLEN(cat)) {	\
383 	(start) = sv_exp_grow(cat, glen);	\
384 	(cur) = (start) + SvCUR(cat);		\
385     }						\
386 } STMT_END
387 
388 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
389 STMT_START {					\
390     const STRLEN glen = (in_len);		\
391     STRLEN gl = glen;				\
392     if (utf8) SAFE_UTF8_EXPAND(gl);		\
393     if ((cur) + gl >= (start) + SvLEN(cat)) {	\
394         *cur = '\0';				\
395         SvCUR_set((cat), (cur) - (start));	\
396 	(start) = sv_exp_grow(cat, gl);		\
397 	(cur) = (start) + SvCUR(cat);		\
398     }						\
399     PUSH_BYTES(utf8, cur, buf, glen, 0);        \
400 } STMT_END
401 
402 #define PUSH_BYTE(utf8, s, byte)		\
403 STMT_START {					\
404     if (utf8) {					\
405 	const U8 au8 = (byte);			\
406 	(s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
407     } else *(U8 *)(s)++ = (byte);		\
408 } STMT_END
409 
410 /* Only to be used inside a loop (see the break) */
411 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags)		\
412 STMT_START {							\
413     STRLEN retlen;						\
414     if (str >= end) break;					\
415     val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags);	\
416     if (retlen == (STRLEN) -1) {			        \
417 	*cur = '\0';						\
418 	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");	\
419     }								\
420     str += retlen;						\
421 } STMT_END
422 
423 static const char *_action( const tempsym_t* symptr )
424 {
425     return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
426 }
427 
428 /* Returns the sizeof() struct described by pat */
429 STATIC SSize_t
430 S_measure_struct(pTHX_ tempsym_t* symptr)
431 {
432     SSize_t total = 0;
433 
434     PERL_ARGS_ASSERT_MEASURE_STRUCT;
435 
436     while (next_symbol(symptr)) {
437 	SSize_t len, size;
438 
439         switch (symptr->howlen) {
440 	  case e_star:
441    	    Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
442                         _action( symptr ) );
443 
444 	  default:
445 	    /* e_no_len and e_number */
446 	    len = symptr->length;
447 	    break;
448         }
449 
450 	size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
451 	if (!size) {
452             SSize_t star;
453 	    /* endianness doesn't influence the size of a type */
454 	    switch(TYPE_NO_ENDIANNESS(symptr->code)) {
455 	    default:
456 		Perl_croak(aTHX_ "Invalid type '%c' in %s",
457 			   (int)TYPE_NO_MODIFIERS(symptr->code),
458                            _action( symptr ) );
459 	    case '.' | TYPE_IS_SHRIEKING:
460 	    case '@' | TYPE_IS_SHRIEKING:
461 	    case '@':
462 	    case '.':
463 	    case '/':
464 	    case 'U':			/* XXXX Is it correct? */
465 	    case 'w':
466 	    case 'u':
467 		Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
468 			   (int) TYPE_NO_MODIFIERS(symptr->code),
469                            _action( symptr ) );
470 	    case '%':
471 		size = 0;
472 		break;
473 	    case '(':
474 	    {
475 		tempsym_t savsym = *symptr;
476 		symptr->patptr = savsym.grpbeg;
477 		symptr->patend = savsym.grpend;
478 		/* XXXX Theoretically, we need to measure many times at
479 		   different positions, since the subexpression may contain
480 		   alignment commands, but be not of aligned length.
481 		   Need to detect this and croak().  */
482 		size = measure_struct(symptr);
483 		*symptr = savsym;
484 		break;
485 	    }
486 	    case 'X' | TYPE_IS_SHRIEKING:
487 		/* XXXX Is this useful?  Then need to treat MEASURE_BACKWARDS.
488 		 */
489 		if (!len)		/* Avoid division by 0 */
490 		    len = 1;
491 		len = total % len;	/* Assumed: the start is aligned. */
492 		/* FALLTHROUGH */
493 	    case 'X':
494 		size = -1;
495 		if (total < len)
496                     Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
497 		break;
498 	    case 'x' | TYPE_IS_SHRIEKING:
499 		if (!len)		/* Avoid division by 0 */
500 		    len = 1;
501 		star = total % len;	/* Assumed: the start is aligned. */
502 		if (star)		/* Other portable ways? */
503 		    len = len - star;
504 		else
505 		    len = 0;
506 		/* FALLTHROUGH */
507 	    case 'x':
508 	    case 'A':
509 	    case 'Z':
510 	    case 'a':
511 		size = 1;
512 		break;
513 	    case 'B':
514 	    case 'b':
515 		len = (len + 7)/8;
516 		size = 1;
517 		break;
518 	    case 'H':
519 	    case 'h':
520 		len = (len + 1)/2;
521 		size = 1;
522 		break;
523 
524 	    case 'P':
525 		len = 1;
526 		size = sizeof(char*);
527 		break;
528 	    }
529 	}
530 	total += len * size;
531     }
532     return total;
533 }
534 
535 
536 /* locate matching closing parenthesis or bracket
537  * returns char pointer to char after match, or NULL
538  */
539 STATIC const char *
540 S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
541 {
542     PERL_ARGS_ASSERT_GROUP_END;
543 
544     while (patptr < patend) {
545 	const char c = *patptr++;
546 
547 	if (isSPACE(c))
548 	    continue;
549 	else if (c == ender)
550 	    return patptr-1;
551 	else if (c == '#') {
552 	    while (patptr < patend && *patptr != '\n')
553 		patptr++;
554 	    continue;
555 	} else if (c == '(')
556 	    patptr = group_end(patptr, patend, ')') + 1;
557 	else if (c == '[')
558 	    patptr = group_end(patptr, patend, ']') + 1;
559     }
560     Perl_croak(aTHX_ "No group ending character '%c' found in template",
561                ender);
562     NOT_REACHED; /* NOTREACHED */
563 }
564 
565 
566 /* Convert unsigned decimal number to binary.
567  * Expects a pointer to the first digit and address of length variable
568  * Advances char pointer to 1st non-digit char and returns number
569  */
570 STATIC const char *
571 S_get_num(pTHX_ const char *patptr, SSize_t *lenptr )
572 {
573   SSize_t len = *patptr++ - '0';
574 
575   PERL_ARGS_ASSERT_GET_NUM;
576 
577   while (isDIGIT(*patptr)) {
578     SSize_t nlen = (len * 10) + (*patptr++ - '0');
579     if (nlen < 0 || nlen/10 != len)
580       Perl_croak(aTHX_ "pack/unpack repeat count overflow");
581     len = nlen;
582   }
583   *lenptr = len;
584   return patptr;
585 }
586 
587 /* The marvellous template parsing routine: Using state stored in *symptr,
588  * locates next template code and count
589  */
590 STATIC bool
591 S_next_symbol(pTHX_ tempsym_t* symptr )
592 {
593   const char* patptr = symptr->patptr;
594   const char* const patend = symptr->patend;
595 
596   PERL_ARGS_ASSERT_NEXT_SYMBOL;
597 
598   symptr->flags &= ~FLAG_SLASH;
599 
600   while (patptr < patend) {
601     if (isSPACE(*patptr))
602       patptr++;
603     else if (*patptr == '#') {
604       patptr++;
605       while (patptr < patend && *patptr != '\n')
606 	patptr++;
607       if (patptr < patend)
608 	patptr++;
609     } else {
610       /* We should have found a template code */
611       I32 code = *patptr++ & 0xFF;
612       U32 inherited_modifiers = 0;
613 
614       if (code == ','){ /* grandfather in commas but with a warning */
615 	if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
616           symptr->flags |= FLAG_COMMA;
617 	  Perl_warner(aTHX_ packWARN(WARN_UNPACK),
618 	 	      "Invalid type ',' in %s", _action( symptr ) );
619         }
620 	continue;
621       }
622 
623       /* for '(', skip to ')' */
624       if (code == '(') {
625         if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
626           Perl_croak(aTHX_ "()-group starts with a count in %s",
627                         _action( symptr ) );
628         symptr->grpbeg = patptr;
629         patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
630         if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
631 	  Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
632                         _action( symptr ) );
633       }
634 
635       /* look for group modifiers to inherit */
636       if (TYPE_ENDIANNESS(symptr->flags)) {
637         if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
638           inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
639       }
640 
641       /* look for modifiers */
642       while (patptr < patend) {
643         const char *allowed;
644         I32 modifier;
645         switch (*patptr) {
646           case '!':
647             modifier = TYPE_IS_SHRIEKING;
648             allowed = "sSiIlLxXnNvV@.";
649             break;
650           case '>':
651             modifier = TYPE_IS_BIG_ENDIAN;
652             allowed = ENDIANNESS_ALLOWED_TYPES;
653             break;
654           case '<':
655             modifier = TYPE_IS_LITTLE_ENDIAN;
656             allowed = ENDIANNESS_ALLOWED_TYPES;
657             break;
658           default:
659             allowed = "";
660             modifier = 0;
661             break;
662         }
663 
664         if (modifier == 0)
665           break;
666 
667         if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
668           Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
669                         allowed, _action( symptr ) );
670 
671         if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
672           Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
673                      (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
674         else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
675                  TYPE_ENDIANNESS_MASK)
676           Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
677                      *patptr, _action( symptr ) );
678 
679         if ((code & modifier)) {
680 	    Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
681 			   "Duplicate modifier '%c' after '%c' in %s",
682 			   *patptr, (int) TYPE_NO_MODIFIERS(code),
683 			   _action( symptr ) );
684         }
685 
686         code |= modifier;
687         patptr++;
688       }
689 
690       /* inherit modifiers */
691       code |= inherited_modifiers;
692 
693       /* look for count and/or / */
694       if (patptr < patend) {
695 	if (isDIGIT(*patptr)) {
696  	  patptr = get_num( patptr, &symptr->length );
697           symptr->howlen = e_number;
698 
699         } else if (*patptr == '*') {
700           patptr++;
701           symptr->howlen = e_star;
702 
703         } else if (*patptr == '[') {
704           const char* lenptr = ++patptr;
705           symptr->howlen = e_number;
706           patptr = group_end( patptr, patend, ']' ) + 1;
707           /* what kind of [] is it? */
708           if (isDIGIT(*lenptr)) {
709             lenptr = get_num( lenptr, &symptr->length );
710             if( *lenptr != ']' )
711               Perl_croak(aTHX_ "Malformed integer in [] in %s",
712                             _action( symptr ) );
713           } else {
714             tempsym_t savsym = *symptr;
715             symptr->patend = patptr-1;
716             symptr->patptr = lenptr;
717             savsym.length = measure_struct(symptr);
718             *symptr = savsym;
719           }
720         } else {
721           symptr->howlen = e_no_len;
722           symptr->length = 1;
723         }
724 
725         /* try to find / */
726         while (patptr < patend) {
727           if (isSPACE(*patptr))
728             patptr++;
729           else if (*patptr == '#') {
730             patptr++;
731             while (patptr < patend && *patptr != '\n')
732 	      patptr++;
733             if (patptr < patend)
734 	      patptr++;
735           } else {
736             if (*patptr == '/') {
737               symptr->flags |= FLAG_SLASH;
738               patptr++;
739               if (patptr < patend &&
740                   (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
741                 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
742                             _action( symptr ) );
743             }
744             break;
745 	  }
746 	}
747       } else {
748         /* at end - no count, no / */
749         symptr->howlen = e_no_len;
750         symptr->length = 1;
751       }
752 
753       symptr->code = code;
754       symptr->patptr = patptr;
755       return TRUE;
756     }
757   }
758   symptr->patptr = patptr;
759   return FALSE;
760 }
761 
762 /*
763    There is no way to cleanly handle the case where we should process the
764    string per byte in its upgraded form while it's really in downgraded form
765    (e.g. estimates like strend-s as an upper bound for the number of
766    characters left wouldn't work). So if we foresee the need of this
767    (pattern starts with U or contains U0), we want to work on the encoded
768    version of the string. Users are advised to upgrade their pack string
769    themselves if they need to do a lot of unpacks like this on it
770 */
771 STATIC bool
772 need_utf8(const char *pat, const char *patend)
773 {
774     bool first = TRUE;
775 
776     PERL_ARGS_ASSERT_NEED_UTF8;
777 
778     while (pat < patend) {
779 	if (pat[0] == '#') {
780 	    pat++;
781 	    pat = (const char *) memchr(pat, '\n', patend-pat);
782 	    if (!pat) return FALSE;
783 	} else if (pat[0] == 'U') {
784 	    if (first || pat[1] == '0') return TRUE;
785 	} else first = FALSE;
786 	pat++;
787     }
788     return FALSE;
789 }
790 
791 STATIC char
792 first_symbol(const char *pat, const char *patend) {
793     PERL_ARGS_ASSERT_FIRST_SYMBOL;
794 
795     while (pat < patend) {
796 	if (pat[0] != '#') return pat[0];
797 	pat++;
798 	pat = (const char *) memchr(pat, '\n', patend-pat);
799 	if (!pat) return 0;
800 	pat++;
801     }
802     return 0;
803 }
804 
805 /*
806 
807 =head1 Pack and Unpack
808 
809 =for apidoc unpackstring
810 
811 The engine implementing the C<unpack()> Perl function.
812 
813 Using the template C<pat..patend>, this function unpacks the string
814 C<s..strend> into a number of mortal SVs, which it pushes onto the perl
815 argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
816 C<SPAGAIN> after the call to this function).  It returns the number of
817 pushed elements.
818 
819 The C<strend> and C<patend> pointers should point to the byte following the
820 last character of each string.
821 
822 Although this function returns its values on the perl argument stack, it
823 doesn't take any parameters from that stack (and thus in particular
824 there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
825 example).
826 
827 =cut */
828 
829 SSize_t
830 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
831 {
832     tempsym_t sym;
833 
834     PERL_ARGS_ASSERT_UNPACKSTRING;
835 
836     if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
837     else if (need_utf8(pat, patend)) {
838 	/* We probably should try to avoid this in case a scalar context call
839 	   wouldn't get to the "U0" */
840 	STRLEN len = strend - s;
841 	s = (char *) bytes_to_utf8((U8 *) s, &len);
842 	SAVEFREEPV(s);
843 	strend = s + len;
844 	flags |= FLAG_DO_UTF8;
845     }
846 
847     if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
848 	flags |= FLAG_PARSE_UTF8;
849 
850     TEMPSYM_INIT(&sym, pat, patend, flags);
851 
852     return unpack_rec(&sym, s, s, strend, NULL );
853 }
854 
855 STATIC SSize_t
856 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
857 {
858     dSP;
859     SV *sv = NULL;
860     const SSize_t start_sp_offset = SP - PL_stack_base;
861     howlen_t howlen;
862     SSize_t checksum = 0;
863     UV cuv = 0;
864     NV cdouble = 0.0;
865     const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
866     bool beyond = FALSE;
867     bool explicit_length;
868     const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
869     bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
870 
871     PERL_ARGS_ASSERT_UNPACK_REC;
872 
873     symptr->strbeg = s - strbeg;
874 
875     while (next_symbol(symptr)) {
876 	packprops_t props;
877 	SSize_t len;
878         I32 datumtype = symptr->code;
879         bool needs_swap;
880 	/* do first one only unless in list context
881 	   / is implemented by unpacking the count, then popping it from the
882 	   stack, so must check that we're not in the middle of a /  */
883         if ( unpack_only_one
884 	     && (SP - PL_stack_base == start_sp_offset + 1)
885 	     && (datumtype != '/') )   /* XXX can this be omitted */
886             break;
887 
888         switch (howlen = symptr->howlen) {
889 	  case e_star:
890 	    len = strend - strbeg;	/* long enough */
891 	    break;
892 	  default:
893 	    /* e_no_len and e_number */
894 	    len = symptr->length;
895 	    break;
896         }
897 
898         explicit_length = TRUE;
899       redo_switch:
900         beyond = s >= strend;
901 
902 	props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
903 	if (props) {
904 	    /* props nonzero means we can process this letter. */
905             const SSize_t size = props & PACK_SIZE_MASK;
906             const SSize_t howmany = (strend - s) / size;
907 	    if (len > howmany)
908 		len = howmany;
909 
910 	    if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
911 		if (len && unpack_only_one) len = 1;
912 		EXTEND(SP, len);
913 		EXTEND_MORTAL(len);
914 	    }
915 	}
916 
917         needs_swap = NEEDS_SWAP(datumtype);
918 
919 	switch(TYPE_NO_ENDIANNESS(datumtype)) {
920 	default:
921 	    Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
922 
923 	case '%':
924 	    if (howlen == e_no_len)
925 		len = 16;		/* len is not specified */
926 	    checksum = len;
927 	    cuv = 0;
928 	    cdouble = 0;
929 	    continue;
930 
931 	case '(':
932 	{
933             tempsym_t savsym = *symptr;
934             const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
935 	    symptr->flags |= group_modifiers;
936             symptr->patend = savsym.grpend;
937 	    symptr->previous = &savsym;
938             symptr->level++;
939 	    PUTBACK;
940 	    if (len && unpack_only_one) len = 1;
941 	    while (len--) {
942   	        symptr->patptr = savsym.grpbeg;
943 		if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
944 		else      symptr->flags &= ~FLAG_PARSE_UTF8;
945  	        unpack_rec(symptr, s, strbeg, strend, &s);
946                 if (s == strend && savsym.howlen == e_star)
947 		    break; /* No way to continue */
948 	    }
949 	    SPAGAIN;
950             savsym.flags = symptr->flags & ~group_modifiers;
951             *symptr = savsym;
952 	    break;
953 	}
954 	case '.' | TYPE_IS_SHRIEKING:
955 	case '.': {
956 	    const char *from;
957 	    SV *sv;
958 	    const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
959 	    if (howlen == e_star) from = strbeg;
960 	    else if (len <= 0) from = s;
961 	    else {
962 		tempsym_t *group = symptr;
963 
964 		while (--len && group) group = group->previous;
965 		from = group ? strbeg + group->strbeg : strbeg;
966 	    }
967 	    sv = from <= s ?
968 		newSVuv(  u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
969 		newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
970 	    mXPUSHs(sv);
971 	    break;
972 	}
973 	case '@' | TYPE_IS_SHRIEKING:
974 	case '@':
975 	    s = strbeg + symptr->strbeg;
976 	    if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
977 	    {
978 		while (len > 0) {
979 		    if (s >= strend)
980 			Perl_croak(aTHX_ "'@' outside of string in unpack");
981 		    s += UTF8SKIP(s);
982 		    len--;
983 		}
984 		if (s > strend)
985 		    Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
986 	    } else {
987 		if (strend-s < len)
988 		    Perl_croak(aTHX_ "'@' outside of string in unpack");
989 		s += len;
990 	    }
991 	    break;
992  	case 'X' | TYPE_IS_SHRIEKING:
993  	    if (!len)			/* Avoid division by 0 */
994  		len = 1;
995 	    if (utf8) {
996 		const char *hop, *last;
997 		SSize_t l = len;
998 		hop = last = strbeg;
999 		while (hop < s) {
1000 		    hop += UTF8SKIP(hop);
1001 		    if (--l == 0) {
1002 			last = hop;
1003 			l = len;
1004 		    }
1005 		}
1006 		if (last > s)
1007 		    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1008 		s = last;
1009 		break;
1010 	    }
1011 	    len = (s - strbeg) % len;
1012  	    /* FALLTHROUGH */
1013 	case 'X':
1014 	    if (utf8) {
1015 		while (len > 0) {
1016 		    if (s <= strbeg)
1017 			Perl_croak(aTHX_ "'X' outside of string in unpack");
1018 		    while (--s, UTF8_IS_CONTINUATION(*s)) {
1019 			if (s <= strbeg)
1020 			    Perl_croak(aTHX_ "'X' outside of string in unpack");
1021 		    }
1022 		    len--;
1023 		}
1024 	    } else {
1025 		if (len > s - strbeg)
1026 		    Perl_croak(aTHX_ "'X' outside of string in unpack" );
1027 		s -= len;
1028 	    }
1029 	    break;
1030  	case 'x' | TYPE_IS_SHRIEKING: {
1031             SSize_t ai32;
1032  	    if (!len)			/* Avoid division by 0 */
1033  		len = 1;
1034 	    if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1035 	    else      ai32 = (s - strbeg)                         % len;
1036 	    if (ai32 == 0) break;
1037 	    len -= ai32;
1038             }
1039  	    /* FALLTHROUGH */
1040 	case 'x':
1041 	    if (utf8) {
1042 		while (len>0) {
1043 		    if (s >= strend)
1044 			Perl_croak(aTHX_ "'x' outside of string in unpack");
1045 		    s += UTF8SKIP(s);
1046 		    len--;
1047 		}
1048 	    } else {
1049 		if (len > strend - s)
1050 		    Perl_croak(aTHX_ "'x' outside of string in unpack");
1051 		s += len;
1052 	    }
1053 	    break;
1054 	case '/':
1055 	    Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1056 
1057 	case 'A':
1058 	case 'Z':
1059 	case 'a':
1060 	    if (checksum) {
1061 		/* Preliminary length estimate is assumed done in 'W' */
1062 		if (len > strend - s) len = strend - s;
1063 		goto W_checksum;
1064 	    }
1065 	    if (utf8) {
1066 		SSize_t l;
1067 		const char *hop;
1068 		for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1069 		    if (hop >= strend) {
1070 			if (hop > strend)
1071 			    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1072 			break;
1073 		    }
1074 		}
1075 		if (hop > strend)
1076 		    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1077 		len = hop - s;
1078 	    } else if (len > strend - s)
1079 		len = strend - s;
1080 
1081 	    if (datumtype == 'Z') {
1082 		/* 'Z' strips stuff after first null */
1083 		const char *ptr, *end;
1084 		end = s + len;
1085 		for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1086 		sv = newSVpvn(s, ptr-s);
1087 		if (howlen == e_star) /* exact for 'Z*' */
1088 		    len = ptr-s + (ptr != strend ? 1 : 0);
1089 	    } else if (datumtype == 'A') {
1090 		/* 'A' strips both nulls and spaces */
1091 		const char *ptr;
1092 		if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1093                     for (ptr = s+len-1; ptr >= s; ptr--) {
1094                         if (   *ptr != 0
1095                             && !UTF8_IS_CONTINUATION(*ptr)
1096                             && !isSPACE_utf8_safe(ptr, strend))
1097                         {
1098                             break;
1099                         }
1100                     }
1101 		    if (ptr >= s) ptr += UTF8SKIP(ptr);
1102 		    else ptr++;
1103 		    if (ptr > s+len)
1104 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1105 		} else {
1106 		    for (ptr = s+len-1; ptr >= s; ptr--)
1107 			if (*ptr != 0 && !isSPACE(*ptr)) break;
1108 		    ptr++;
1109 		}
1110 		sv = newSVpvn(s, ptr-s);
1111 	    } else sv = newSVpvn(s, len);
1112 
1113 	    if (utf8) {
1114 		SvUTF8_on(sv);
1115 		/* Undo any upgrade done due to need_utf8() */
1116 		if (!(symptr->flags & FLAG_WAS_UTF8))
1117 		    sv_utf8_downgrade(sv, 0);
1118 	    }
1119 	    mXPUSHs(sv);
1120 	    s += len;
1121 	    break;
1122 	case 'B':
1123 	case 'b': {
1124 	    char *str;
1125 	    if (howlen == e_star || len > (strend - s) * 8)
1126 		len = (strend - s) * 8;
1127 	    if (checksum) {
1128 		if (utf8)
1129 		    while (len >= 8 && s < strend) {
1130 			cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
1131 			len -= 8;
1132 		    }
1133 		else
1134 		    while (len >= 8) {
1135 			cuv += PL_bitcount[*(U8 *)s++];
1136 			len -= 8;
1137 		    }
1138 		if (len && s < strend) {
1139 		    U8 bits;
1140 		    bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1141 		    if (datumtype == 'b')
1142 			while (len-- > 0) {
1143 			    if (bits & 1) cuv++;
1144 			    bits >>= 1;
1145 			}
1146 		    else
1147 			while (len-- > 0) {
1148 			    if (bits & 0x80) cuv++;
1149 			    bits <<= 1;
1150 			}
1151 		}
1152 		break;
1153 	    }
1154 
1155 	    sv = sv_2mortal(newSV(len ? len : 1));
1156 	    SvPOK_on(sv);
1157 	    str = SvPVX(sv);
1158 	    if (datumtype == 'b') {
1159 		U8 bits = 0;
1160 		const SSize_t ai32 = len;
1161 		for (len = 0; len < ai32; len++) {
1162 		    if (len & 7) bits >>= 1;
1163 		    else if (utf8) {
1164 			if (s >= strend) break;
1165 			bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1166 		    } else bits = *(U8 *) s++;
1167 		    *str++ = bits & 1 ? '1' : '0';
1168 		}
1169 	    } else {
1170 		U8 bits = 0;
1171 		const SSize_t ai32 = len;
1172 		for (len = 0; len < ai32; len++) {
1173 		    if (len & 7) bits <<= 1;
1174 		    else if (utf8) {
1175 			if (s >= strend) break;
1176 			bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1177 		    } else bits = *(U8 *) s++;
1178 		    *str++ = bits & 0x80 ? '1' : '0';
1179 		}
1180 	    }
1181 	    *str = '\0';
1182 	    SvCUR_set(sv, str - SvPVX_const(sv));
1183 	    XPUSHs(sv);
1184 	    break;
1185 	}
1186 	case 'H':
1187 	case 'h': {
1188 	    char *str = NULL;
1189 	    /* Preliminary length estimate, acceptable for utf8 too */
1190 	    if (howlen == e_star || len > (strend - s) * 2)
1191 		len = (strend - s) * 2;
1192 	    if (!checksum) {
1193 		sv = sv_2mortal(newSV(len ? len : 1));
1194 		SvPOK_on(sv);
1195 		str = SvPVX(sv);
1196 	    }
1197 	    if (datumtype == 'h') {
1198 		U8 bits = 0;
1199 		SSize_t ai32 = len;
1200 		for (len = 0; len < ai32; len++) {
1201 		    if (len & 1) bits >>= 4;
1202 		    else if (utf8) {
1203 			if (s >= strend) break;
1204 			bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1205 		    } else bits = * (U8 *) s++;
1206 		    if (!checksum)
1207 			*str++ = PL_hexdigit[bits & 15];
1208 		}
1209 	    } else {
1210 		U8 bits = 0;
1211 		const SSize_t ai32 = len;
1212 		for (len = 0; len < ai32; len++) {
1213 		    if (len & 1) bits <<= 4;
1214 		    else if (utf8) {
1215 			if (s >= strend) break;
1216 			bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1217 		    } else bits = *(U8 *) s++;
1218 		    if (!checksum)
1219 			*str++ = PL_hexdigit[(bits >> 4) & 15];
1220 		}
1221 	    }
1222 	    if (!checksum) {
1223 		*str = '\0';
1224 		SvCUR_set(sv, str - SvPVX_const(sv));
1225 		XPUSHs(sv);
1226 	    }
1227 	    break;
1228 	}
1229 	case 'C':
1230             if (len == 0) {
1231                 if (explicit_length)
1232 		    /* Switch to "character" mode */
1233 		    utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1234 		break;
1235 	    }
1236 	    /* FALLTHROUGH */
1237 	case 'c':
1238 	    while (len-- > 0 && s < strend) {
1239 		int aint;
1240 		if (utf8)
1241 		  {
1242 		    STRLEN retlen;
1243 		    aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1244 				 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1245 		    if (retlen == (STRLEN) -1)
1246 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1247 		    s += retlen;
1248 		  }
1249 		else
1250 		  aint = *(U8 *)(s)++;
1251 		if (aint >= 128 && datumtype != 'C')	/* fake up signed chars */
1252 		    aint -= 256;
1253 		if (!checksum)
1254 		    mPUSHi(aint);
1255 		else if (checksum > bits_in_uv)
1256 		    cdouble += (NV)aint;
1257 		else
1258 		    cuv += aint;
1259 	    }
1260 	    break;
1261 	case 'W':
1262 	  W_checksum:
1263 	    if (utf8) {
1264 		while (len-- > 0 && s < strend) {
1265 		    STRLEN retlen;
1266 		    const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1267 					 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1268 		    if (retlen == (STRLEN) -1)
1269 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1270 		    s += retlen;
1271 		    if (!checksum)
1272 			mPUSHu(val);
1273 		    else if (checksum > bits_in_uv)
1274 			cdouble += (NV) val;
1275 		    else
1276 			cuv += val;
1277 		}
1278 	    } else if (!checksum)
1279 		while (len-- > 0) {
1280 		    const U8 ch = *(U8 *) s++;
1281 		    mPUSHu(ch);
1282 	    }
1283 	    else if (checksum > bits_in_uv)
1284 		while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1285 	    else
1286 		while (len-- > 0) cuv += *(U8 *) s++;
1287 	    break;
1288 	case 'U':
1289 	    if (len == 0) {
1290                 if (explicit_length && howlen != e_star) {
1291 		    /* Switch to "bytes in UTF-8" mode */
1292 		    if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1293 		    else
1294 			/* Should be impossible due to the need_utf8() test */
1295 			Perl_croak(aTHX_ "U0 mode on a byte string");
1296 		}
1297 		break;
1298 	    }
1299 	    if (len > strend - s) len = strend - s;
1300 	    if (!checksum) {
1301 		if (len && unpack_only_one) len = 1;
1302 		EXTEND(SP, len);
1303 		EXTEND_MORTAL(len);
1304 	    }
1305 	    while (len-- > 0 && s < strend) {
1306 		STRLEN retlen;
1307 		UV auv;
1308 		if (utf8) {
1309 		    U8 result[UTF8_MAXLEN+1];
1310 		    const char *ptr = s;
1311 		    STRLEN len;
1312 		    /* Bug: warns about bad utf8 even if we are short on bytes
1313 		       and will break out of the loop */
1314 		    if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1315 				      'U'))
1316 			break;
1317 		    len = UTF8SKIP(result);
1318 		    if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
1319 				      (char *) &result[1], len-1, 'U')) break;
1320 		    auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
1321                                                        len,
1322                                                        &retlen,
1323                                                        UTF8_ALLOW_DEFAULT));
1324 		    s = ptr;
1325 		} else {
1326 		    auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
1327                                                        strend - s,
1328                                                        &retlen,
1329                                                        UTF8_ALLOW_DEFAULT));
1330 		    if (retlen == (STRLEN) -1)
1331 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1332 		    s += retlen;
1333 		}
1334 		if (!checksum)
1335 		    mPUSHu(auv);
1336 		else if (checksum > bits_in_uv)
1337 		    cdouble += (NV) auv;
1338 		else
1339 		    cuv += auv;
1340 	    }
1341 	    break;
1342 	case 's' | TYPE_IS_SHRIEKING:
1343 #if SHORTSIZE != SIZE16
1344 	    while (len-- > 0) {
1345 		short ashort;
1346                 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1347 		if (!checksum)
1348 		    mPUSHi(ashort);
1349 		else if (checksum > bits_in_uv)
1350 		    cdouble += (NV)ashort;
1351 		else
1352 		    cuv += ashort;
1353 	    }
1354 	    break;
1355 #else
1356 	    /* FALLTHROUGH */
1357 #endif
1358 	case 's':
1359 	    while (len-- > 0) {
1360 		I16 ai16;
1361 
1362 #if U16SIZE > SIZE16
1363 		ai16 = 0;
1364 #endif
1365                 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1366 #if U16SIZE > SIZE16
1367 		if (ai16 > 32767)
1368 		    ai16 -= 65536;
1369 #endif
1370 		if (!checksum)
1371 		    mPUSHi(ai16);
1372 		else if (checksum > bits_in_uv)
1373 		    cdouble += (NV)ai16;
1374 		else
1375 		    cuv += ai16;
1376 	    }
1377 	    break;
1378 	case 'S' | TYPE_IS_SHRIEKING:
1379 #if SHORTSIZE != SIZE16
1380 	    while (len-- > 0) {
1381 		unsigned short aushort;
1382 		SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap);
1383 		if (!checksum)
1384 		    mPUSHu(aushort);
1385 		else if (checksum > bits_in_uv)
1386 		    cdouble += (NV)aushort;
1387 		else
1388 		    cuv += aushort;
1389 	    }
1390 	    break;
1391 #else
1392             /* FALLTHROUGH */
1393 #endif
1394 	case 'v':
1395 	case 'n':
1396 	case 'S':
1397 	    while (len-- > 0) {
1398 		U16 au16;
1399 #if U16SIZE > SIZE16
1400 		au16 = 0;
1401 #endif
1402                 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1403 		if (datumtype == 'n')
1404 		    au16 = PerlSock_ntohs(au16);
1405 		if (datumtype == 'v')
1406 		    au16 = vtohs(au16);
1407 		if (!checksum)
1408 		    mPUSHu(au16);
1409 		else if (checksum > bits_in_uv)
1410 		    cdouble += (NV) au16;
1411 		else
1412 		    cuv += au16;
1413 	    }
1414 	    break;
1415 	case 'v' | TYPE_IS_SHRIEKING:
1416 	case 'n' | TYPE_IS_SHRIEKING:
1417 	    while (len-- > 0) {
1418 		I16 ai16;
1419 # if U16SIZE > SIZE16
1420 		ai16 = 0;
1421 # endif
1422                 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1423                 /* There should never be any byte-swapping here.  */
1424                 assert(!TYPE_ENDIANNESS(datumtype));
1425 		if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1426 		    ai16 = (I16) PerlSock_ntohs((U16) ai16);
1427 		if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1428 		    ai16 = (I16) vtohs((U16) ai16);
1429 		if (!checksum)
1430 		    mPUSHi(ai16);
1431 		else if (checksum > bits_in_uv)
1432 		    cdouble += (NV) ai16;
1433 		else
1434 		    cuv += ai16;
1435 	    }
1436 	    break;
1437 	case 'i':
1438 	case 'i' | TYPE_IS_SHRIEKING:
1439 	    while (len-- > 0) {
1440 		int aint;
1441                 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1442 		if (!checksum)
1443 		    mPUSHi(aint);
1444 		else if (checksum > bits_in_uv)
1445 		    cdouble += (NV)aint;
1446 		else
1447 		    cuv += aint;
1448 	    }
1449 	    break;
1450 	case 'I':
1451 	case 'I' | TYPE_IS_SHRIEKING:
1452 	    while (len-- > 0) {
1453 		unsigned int auint;
1454                 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1455 		if (!checksum)
1456 		    mPUSHu(auint);
1457 		else if (checksum > bits_in_uv)
1458 		    cdouble += (NV)auint;
1459 		else
1460 		    cuv += auint;
1461 	    }
1462 	    break;
1463 	case 'j':
1464 	    while (len-- > 0) {
1465 		IV aiv;
1466                 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1467 		if (!checksum)
1468 		    mPUSHi(aiv);
1469 		else if (checksum > bits_in_uv)
1470 		    cdouble += (NV)aiv;
1471 		else
1472 		    cuv += aiv;
1473 	    }
1474 	    break;
1475 	case 'J':
1476 	    while (len-- > 0) {
1477 		UV auv;
1478                 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1479 		if (!checksum)
1480 		    mPUSHu(auv);
1481 		else if (checksum > bits_in_uv)
1482 		    cdouble += (NV)auv;
1483 		else
1484 		    cuv += auv;
1485 	    }
1486 	    break;
1487 	case 'l' | TYPE_IS_SHRIEKING:
1488 #if LONGSIZE != SIZE32
1489 	    while (len-- > 0) {
1490 		long along;
1491                 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1492 		if (!checksum)
1493 		    mPUSHi(along);
1494 		else if (checksum > bits_in_uv)
1495 		    cdouble += (NV)along;
1496 		else
1497 		    cuv += along;
1498 	    }
1499 	    break;
1500 #else
1501 	    /* FALLTHROUGH */
1502 #endif
1503 	case 'l':
1504 	    while (len-- > 0) {
1505 		I32 ai32;
1506 #if U32SIZE > SIZE32
1507 		ai32 = 0;
1508 #endif
1509                 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1510 #if U32SIZE > SIZE32
1511 		if (ai32 > 2147483647) ai32 -= 4294967296;
1512 #endif
1513 		if (!checksum)
1514 		    mPUSHi(ai32);
1515 		else if (checksum > bits_in_uv)
1516 		    cdouble += (NV)ai32;
1517 		else
1518 		    cuv += ai32;
1519 	    }
1520 	    break;
1521 	case 'L' | TYPE_IS_SHRIEKING:
1522 #if LONGSIZE != SIZE32
1523 	    while (len-- > 0) {
1524 		unsigned long aulong;
1525                 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1526 		if (!checksum)
1527 		    mPUSHu(aulong);
1528 		else if (checksum > bits_in_uv)
1529 		    cdouble += (NV)aulong;
1530 		else
1531 		    cuv += aulong;
1532 	    }
1533 	    break;
1534 #else
1535             /* FALLTHROUGH */
1536 #endif
1537 	case 'V':
1538 	case 'N':
1539 	case 'L':
1540 	    while (len-- > 0) {
1541 		U32 au32;
1542 #if U32SIZE > SIZE32
1543 		au32 = 0;
1544 #endif
1545                 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1546 		if (datumtype == 'N')
1547 		    au32 = PerlSock_ntohl(au32);
1548 		if (datumtype == 'V')
1549 		    au32 = vtohl(au32);
1550 		if (!checksum)
1551 		    mPUSHu(au32);
1552 		else if (checksum > bits_in_uv)
1553 		    cdouble += (NV)au32;
1554 		else
1555 		    cuv += au32;
1556 	    }
1557 	    break;
1558 	case 'V' | TYPE_IS_SHRIEKING:
1559 	case 'N' | TYPE_IS_SHRIEKING:
1560 	    while (len-- > 0) {
1561 		I32 ai32;
1562 #if U32SIZE > SIZE32
1563 		ai32 = 0;
1564 #endif
1565                 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1566                 /* There should never be any byte swapping here.  */
1567                 assert(!TYPE_ENDIANNESS(datumtype));
1568 		if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1569 		    ai32 = (I32)PerlSock_ntohl((U32)ai32);
1570 		if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1571 		    ai32 = (I32)vtohl((U32)ai32);
1572 		if (!checksum)
1573 		    mPUSHi(ai32);
1574 		else if (checksum > bits_in_uv)
1575 		    cdouble += (NV)ai32;
1576 		else
1577 		    cuv += ai32;
1578 	    }
1579 	    break;
1580 	case 'p':
1581 	    while (len-- > 0) {
1582 		const char *aptr;
1583                 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1584 		/* newSVpv generates undef if aptr is NULL */
1585 		mPUSHs(newSVpv(aptr, 0));
1586 	    }
1587 	    break;
1588 	case 'w':
1589 	    {
1590 		UV auv = 0;
1591 		size_t bytes = 0;
1592 
1593 		while (len > 0 && s < strend) {
1594 		    U8 ch;
1595 		    ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1596 		    auv = (auv << 7) | (ch & 0x7f);
1597                     /* UTF8_IS_XXXXX not right here because this is a BER, not
1598                      * UTF-8 format - using constant 0x80 */
1599 		    if (ch < 0x80) {
1600 			bytes = 0;
1601 			mPUSHu(auv);
1602 			len--;
1603 			auv = 0;
1604 			continue;
1605 		    }
1606 		    if (++bytes >= sizeof(UV)) {	/* promote to string */
1607 			const char *t;
1608 
1609 			sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1610                                                  (int)TYPE_DIGITS(UV), auv);
1611 			while (s < strend) {
1612 			    ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1613 			    sv = mul128(sv, (U8)(ch & 0x7f));
1614 			    if (!(ch & 0x80)) {
1615 				bytes = 0;
1616 				break;
1617 			    }
1618 			}
1619 			t = SvPV_nolen_const(sv);
1620 			while (*t == '0')
1621 			    t++;
1622 			sv_chop(sv, t);
1623 			mPUSHs(sv);
1624 			len--;
1625 			auv = 0;
1626 		    }
1627 		}
1628 		if ((s >= strend) && bytes)
1629 		    Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1630 	    }
1631 	    break;
1632 	case 'P':
1633 	    if (symptr->howlen == e_star)
1634 	        Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1635 	    EXTEND(SP, 1);
1636 	    if (s + sizeof(char*) <= strend) {
1637 		char *aptr;
1638                 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1639 		/* newSVpvn generates undef if aptr is NULL */
1640 		PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1641 	    }
1642 	    break;
1643 #if defined(HAS_QUAD) && IVSIZE >= 8
1644 	case 'q':
1645 	    while (len-- > 0) {
1646 		Quad_t aquad;
1647                 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1648 		if (!checksum)
1649                     mPUSHs(newSViv((IV)aquad));
1650 		else if (checksum > bits_in_uv)
1651 		    cdouble += (NV)aquad;
1652 		else
1653 		    cuv += aquad;
1654 	    }
1655 	    break;
1656 	case 'Q':
1657 	    while (len-- > 0) {
1658 		Uquad_t auquad;
1659                 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1660 		if (!checksum)
1661 		    mPUSHs(newSVuv((UV)auquad));
1662 		else if (checksum > bits_in_uv)
1663 		    cdouble += (NV)auquad;
1664 		else
1665 		    cuv += auquad;
1666 	    }
1667 	    break;
1668 #endif
1669 	/* float and double added gnb@melba.bby.oz.au 22/11/89 */
1670 	case 'f':
1671 	    while (len-- > 0) {
1672 		float afloat;
1673                 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1674 		if (!checksum)
1675 		    mPUSHn(afloat);
1676 		else
1677 		    cdouble += afloat;
1678 	    }
1679 	    break;
1680 	case 'd':
1681 	    while (len-- > 0) {
1682 		double adouble;
1683                 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1684 		if (!checksum)
1685 		    mPUSHn(adouble);
1686 		else
1687 		    cdouble += adouble;
1688 	    }
1689 	    break;
1690 	case 'F':
1691 	    while (len-- > 0) {
1692 		NV_bytes anv;
1693                 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1694                             datumtype, needs_swap);
1695 		if (!checksum)
1696 		    mPUSHn(anv.nv);
1697 		else
1698 		    cdouble += anv.nv;
1699 	    }
1700 	    break;
1701 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
1702 	case 'D':
1703 	    while (len-- > 0) {
1704 		ld_bytes aldouble;
1705                 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1706                             sizeof(aldouble.bytes), datumtype, needs_swap);
1707                 /* The most common long double format, the x86 80-bit
1708                  * extended precision, has either 2 or 6 unused bytes,
1709                  * which may contain garbage, which may contain
1710                  * unintentional data.  While we do zero the bytes of
1711                  * the long double data in pack(), here in unpack() we
1712                  * don't, because it's really hard to envision that
1713                  * reading the long double off aldouble would be
1714                  * affected by the unused bytes.
1715                  *
1716                  * Note that trying to unpack 'long doubles' of 'long
1717                  * doubles' packed in another system is in the general
1718                  * case doomed without having more detail. */
1719 		if (!checksum)
1720 		    mPUSHn(aldouble.ld);
1721 		else
1722 		    cdouble += aldouble.ld;
1723 	    }
1724 	    break;
1725 #endif
1726 	case 'u':
1727 	    if (!checksum) {
1728                 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1729 		sv = sv_2mortal(newSV(l));
1730 		if (l) {
1731                     SvPOK_on(sv);
1732                     *SvEND(sv) = '\0';
1733                 }
1734 	    }
1735 
1736             /* Note that all legal uuencoded strings are ASCII printables, so
1737              * have the same representation under UTF-8 vs not.  This means we
1738              * can ignore UTF8ness on legal input.  For illegal we stop at the
1739              * first failure, and don't report where/what that is, so again we
1740              * can ignore UTF8ness */
1741 
1742             while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1743                 I32 a, b, c, d;
1744                 char hunk[3];
1745 
1746                 len = PL_uudmap[*(U8*)s++] & 077;
1747                 while (len > 0) {
1748                     if (s < strend && ISUUCHAR(*s))
1749                         a = PL_uudmap[*(U8*)s++] & 077;
1750                     else
1751                         a = 0;
1752                     if (s < strend && ISUUCHAR(*s))
1753                         b = PL_uudmap[*(U8*)s++] & 077;
1754                     else
1755                         b = 0;
1756                     if (s < strend && ISUUCHAR(*s))
1757                         c = PL_uudmap[*(U8*)s++] & 077;
1758                     else
1759                         c = 0;
1760                     if (s < strend && ISUUCHAR(*s))
1761                         d = PL_uudmap[*(U8*)s++] & 077;
1762                     else
1763                         d = 0;
1764                     hunk[0] = (char)((a << 2) | (b >> 4));
1765                     hunk[1] = (char)((b << 4) | (c >> 2));
1766                     hunk[2] = (char)((c << 6) | d);
1767                     if (!checksum)
1768                         sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1769                     len -= 3;
1770                 }
1771                 if (*s == '\n')
1772                     s++;
1773                 else	/* possible checksum byte */
1774                     if (s + 1 < strend && s[1] == '\n')
1775                         s += 2;
1776             }
1777 	    if (!checksum)
1778 		XPUSHs(sv);
1779 	    break;
1780 	} /* End of switch */
1781 
1782 	if (checksum) {
1783 	    if (memCHRs("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1784 	      (checksum > bits_in_uv &&
1785 	       memCHRs("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1786 		NV trouble, anv;
1787 
1788                 anv = (NV) (1 << (checksum & 15));
1789 		while (checksum >= 16) {
1790 		    checksum -= 16;
1791 		    anv *= 65536.0;
1792 		}
1793 		while (cdouble < 0.0)
1794 		    cdouble += anv;
1795 		cdouble = Perl_modf(cdouble / anv, &trouble);
1796 #ifdef LONGDOUBLE_DOUBLEDOUBLE
1797                 /* Workaround for powerpc doubledouble modfl bug:
1798                  * close to 1.0L and -1.0L cdouble is 0, and trouble
1799                  * is cdouble / anv. */
1800                 if (trouble != Perl_ceil(trouble)) {
1801                   cdouble = trouble;
1802                   if (cdouble >  1.0L) cdouble -= 1.0L;
1803                   if (cdouble < -1.0L) cdouble += 1.0L;
1804                 }
1805 #endif
1806                 cdouble *= anv;
1807 		sv = newSVnv(cdouble);
1808 	    }
1809 	    else {
1810 		if (checksum < bits_in_uv) {
1811 		    UV mask = ((UV)1 << checksum) - 1;
1812 		    cuv &= mask;
1813 		}
1814 		sv = newSVuv(cuv);
1815 	    }
1816 	    mXPUSHs(sv);
1817 	    checksum = 0;
1818 	}
1819 
1820         if (symptr->flags & FLAG_SLASH){
1821             if (SP - PL_stack_base - start_sp_offset <= 0)
1822 		break;
1823             if( next_symbol(symptr) ){
1824               if( symptr->howlen == e_number )
1825 		Perl_croak(aTHX_ "Count after length/code in unpack" );
1826               if( beyond ){
1827          	/* ...end of char buffer then no decent length available */
1828 		Perl_croak(aTHX_ "length/code after end of string in unpack" );
1829               } else {
1830          	/* take top of stack (hope it's numeric) */
1831                 len = POPi;
1832                 if( len < 0 )
1833                     Perl_croak(aTHX_ "Negative '/' count in unpack" );
1834               }
1835             } else {
1836 		Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1837             }
1838             datumtype = symptr->code;
1839             explicit_length = FALSE;
1840 	    goto redo_switch;
1841         }
1842     }
1843 
1844     if (new_s)
1845 	*new_s = s;
1846     PUTBACK;
1847     return SP - PL_stack_base - start_sp_offset;
1848 }
1849 
1850 PP(pp_unpack)
1851 {
1852     dSP;
1853     dPOPPOPssrl;
1854     U8 gimme = GIMME_V;
1855     STRLEN llen;
1856     STRLEN rlen;
1857     const char *pat = SvPV_const(left,  llen);
1858     const char *s   = SvPV_const(right, rlen);
1859     const char *strend = s + rlen;
1860     const char *patend = pat + llen;
1861     SSize_t cnt;
1862 
1863     PUTBACK;
1864     cnt = unpackstring(pat, patend, s, strend,
1865 		     ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1866 		     | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1867 
1868     SPAGAIN;
1869     if ( !cnt && gimme == G_SCALAR )
1870        PUSHs(&PL_sv_undef);
1871     RETURN;
1872 }
1873 
1874 STATIC U8 *
1875 doencodes(U8 *h, const U8 *s, SSize_t len)
1876 {
1877     *h++ = PL_uuemap[len];
1878     while (len > 2) {
1879 	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
1880 	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1881 	*h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1882 	*h++ = PL_uuemap[(077 & (s[2] & 077))];
1883 	s += 3;
1884 	len -= 3;
1885     }
1886     if (len > 0) {
1887         const U8 r = (len > 1 ? s[1] : '\0');
1888 	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
1889 	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1890 	*h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1891 	*h++ = PL_uuemap[0];
1892     }
1893     *h++ = '\n';
1894     return h;
1895 }
1896 
1897 STATIC SV *
1898 S_is_an_int(pTHX_ const char *s, STRLEN l)
1899 {
1900   SV *result = newSVpvn(s, l);
1901   char *const result_c = SvPV_nolen(result);	/* convenience */
1902   char *out = result_c;
1903   bool skip = 1;
1904   bool ignore = 0;
1905 
1906   PERL_ARGS_ASSERT_IS_AN_INT;
1907 
1908   while (*s) {
1909     switch (*s) {
1910     case ' ':
1911       break;
1912     case '+':
1913       if (!skip) {
1914 	SvREFCNT_dec(result);
1915 	return (NULL);
1916       }
1917       break;
1918     case '0':
1919     case '1':
1920     case '2':
1921     case '3':
1922     case '4':
1923     case '5':
1924     case '6':
1925     case '7':
1926     case '8':
1927     case '9':
1928       skip = 0;
1929       if (!ignore) {
1930 	*(out++) = *s;
1931       }
1932       break;
1933     case '.':
1934       ignore = 1;
1935       break;
1936     default:
1937       SvREFCNT_dec(result);
1938       return (NULL);
1939     }
1940     s++;
1941   }
1942   *(out++) = '\0';
1943   SvCUR_set(result, out - result_c);
1944   return (result);
1945 }
1946 
1947 /* pnum must be '\0' terminated */
1948 STATIC int
1949 S_div128(pTHX_ SV *pnum, bool *done)
1950 {
1951     STRLEN len;
1952     char * const s = SvPV(pnum, len);
1953     char *t = s;
1954     int m = 0;
1955 
1956     PERL_ARGS_ASSERT_DIV128;
1957 
1958     *done = 1;
1959     while (*t) {
1960 	const int i = m * 10 + (*t - '0');
1961 	const int r = (i >> 7); /* r < 10 */
1962 	m = i & 0x7F;
1963 	if (r) {
1964 	    *done = 0;
1965 	}
1966 	*(t++) = '0' + r;
1967     }
1968     *(t++) = '\0';
1969     SvCUR_set(pnum, (STRLEN) (t - s));
1970     return (m);
1971 }
1972 
1973 /*
1974 =for apidoc packlist
1975 
1976 The engine implementing C<pack()> Perl function.
1977 
1978 =cut
1979 */
1980 
1981 void
1982 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1983 {
1984     tempsym_t sym;
1985 
1986     PERL_ARGS_ASSERT_PACKLIST;
1987 
1988     TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1989 
1990     /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1991        Also make sure any UTF8 flag is loaded */
1992     SvPV_force_nolen(cat);
1993     if (DO_UTF8(cat))
1994 	sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1995 
1996     (void)pack_rec( cat, &sym, beglist, endlist );
1997 }
1998 
1999 /* like sv_utf8_upgrade, but also repoint the group start markers */
2000 STATIC void
2001 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2002     STRLEN len;
2003     tempsym_t *group;
2004     const char *from_ptr, *from_start, *from_end, **marks, **m;
2005     char *to_start, *to_ptr;
2006 
2007     if (SvUTF8(sv)) return;
2008 
2009     from_start = SvPVX_const(sv);
2010     from_end = from_start + SvCUR(sv);
2011     for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2012 	if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
2013     if (from_ptr == from_end) {
2014 	/* Simple case: no character needs to be changed */
2015 	SvUTF8_on(sv);
2016 	return;
2017     }
2018 
2019     len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2020     Newx(to_start, len, char);
2021     Copy(from_start, to_start, from_ptr-from_start, char);
2022     to_ptr = to_start + (from_ptr-from_start);
2023 
2024     Newx(marks, sym_ptr->level+2, const char *);
2025     for (group=sym_ptr; group; group = group->previous)
2026 	marks[group->level] = from_start + group->strbeg;
2027     marks[sym_ptr->level+1] = from_end+1;
2028     for (m = marks; *m < from_ptr; m++)
2029 	*m = to_start + (*m-from_start);
2030 
2031     for (;from_ptr < from_end; from_ptr++) {
2032 	while (*m == from_ptr) *m++ = to_ptr;
2033 	to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2034     }
2035     *to_ptr = 0;
2036 
2037     while (*m == from_ptr) *m++ = to_ptr;
2038     if (m != marks + sym_ptr->level+1) {
2039 	Safefree(marks);
2040 	Safefree(to_start);
2041 	Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2042 		   "level=%d", m, marks, sym_ptr->level);
2043     }
2044     for (group=sym_ptr; group; group = group->previous)
2045 	group->strbeg = marks[group->level] - to_start;
2046     Safefree(marks);
2047 
2048     if (SvOOK(sv)) {
2049 	if (SvIVX(sv)) {
2050 	    SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2051 	    from_start -= SvIVX(sv);
2052 	    SvIV_set(sv, 0);
2053 	}
2054 	SvFLAGS(sv) &= ~SVf_OOK;
2055     }
2056     if (SvLEN(sv) != 0)
2057 	Safefree(from_start);
2058     SvPV_set(sv, to_start);
2059     SvCUR_set(sv, to_ptr - to_start);
2060     SvLEN_set(sv, len);
2061     SvUTF8_on(sv);
2062 }
2063 
2064 /* Exponential string grower. Makes string extension effectively O(n)
2065    needed says how many extra bytes we need (not counting the final '\0')
2066    Only grows the string if there is an actual lack of space
2067 */
2068 STATIC char *
2069 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2070     const STRLEN cur = SvCUR(sv);
2071     const STRLEN len = SvLEN(sv);
2072     STRLEN extend;
2073 
2074     PERL_ARGS_ASSERT_SV_EXP_GROW;
2075 
2076     if (len - cur > needed) return SvPVX(sv);
2077     extend = needed > len ? needed : len;
2078     return SvGROW(sv, len+extend+1);
2079 }
2080 
2081 static SV *
2082 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
2083 {
2084     SvGETMAGIC(sv);
2085     if (UNLIKELY(SvAMAGIC(sv)))
2086 	sv = sv_2num(sv);
2087     if (UNLIKELY(isinfnansv(sv))) {
2088 	const I32 c = TYPE_NO_MODIFIERS(datumtype);
2089 	const NV nv = SvNV_nomg(sv);
2090 	if (c == 'w')
2091 	    Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
2092 	else
2093 	    Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
2094     }
2095     return sv;
2096 }
2097 
2098 #define SvIV_no_inf(sv,d) \
2099 	((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2100 #define SvUV_no_inf(sv,d) \
2101 	((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
2102 
2103 STATIC
2104 SV **
2105 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2106 {
2107     tempsym_t lookahead;
2108     SSize_t items  = endlist - beglist;
2109     bool found = next_symbol(symptr);
2110     bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2111     bool warn_utf8 = ckWARN(WARN_UTF8);
2112     char* from;
2113 
2114     PERL_ARGS_ASSERT_PACK_REC;
2115 
2116     if (symptr->level == 0 && found && symptr->code == 'U') {
2117 	marked_upgrade(aTHX_ cat, symptr);
2118 	symptr->flags |= FLAG_DO_UTF8;
2119 	utf8 = 0;
2120     }
2121     symptr->strbeg = SvCUR(cat);
2122 
2123     while (found) {
2124 	SV *fromstr;
2125 	STRLEN fromlen;
2126 	SSize_t len;
2127 	SV *lengthcode = NULL;
2128         I32 datumtype = symptr->code;
2129         howlen_t howlen = symptr->howlen;
2130 	char *start = SvPVX(cat);
2131 	char *cur   = start + SvCUR(cat);
2132         bool needs_swap;
2133 
2134 #define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
2135 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
2136 
2137         switch (howlen) {
2138 	  case e_star:
2139 	    len = memCHRs("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2140 		0 : items;
2141 	    break;
2142 	  default:
2143 	    /* e_no_len and e_number */
2144 	    len = symptr->length;
2145 	    break;
2146         }
2147 
2148 	if (len) {
2149 	    packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2150 
2151 	    if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2152 		/* We can process this letter. */
2153 		STRLEN size = props & PACK_SIZE_MASK;
2154 		GROWING2(utf8, cat, start, cur, size, (STRLEN)len);
2155 	    }
2156         }
2157 
2158         /* Look ahead for next symbol. Do we have code/code? */
2159         lookahead = *symptr;
2160         found = next_symbol(&lookahead);
2161 	if (symptr->flags & FLAG_SLASH) {
2162 	    IV count;
2163 	    if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2164 	    if (memCHRs("aAZ", lookahead.code)) {
2165 		if (lookahead.howlen == e_number) count = lookahead.length;
2166 		else {
2167 		    if (items > 0) {
2168 			count = sv_len_utf8(*beglist);
2169 		    }
2170 		    else count = 0;
2171 		    if (lookahead.code == 'Z') count++;
2172 		}
2173 	    } else {
2174 		if (lookahead.howlen == e_number && lookahead.length < items)
2175 		    count = lookahead.length;
2176 		else count = items;
2177 	    }
2178 	    lookahead.howlen = e_number;
2179 	    lookahead.length = count;
2180 	    lengthcode = sv_2mortal(newSViv(count));
2181 	}
2182 
2183         needs_swap = NEEDS_SWAP(datumtype);
2184 
2185 	/* Code inside the switch must take care to properly update
2186 	   cat (CUR length and '\0' termination) if it updated *cur and
2187 	   doesn't simply leave using break */
2188 	switch (TYPE_NO_ENDIANNESS(datumtype)) {
2189 	default:
2190 	    Perl_croak(aTHX_ "Invalid type '%c' in pack",
2191 		       (int) TYPE_NO_MODIFIERS(datumtype));
2192 	case '%':
2193 	    Perl_croak(aTHX_ "'%%' may not be used in pack");
2194 
2195 	case '.' | TYPE_IS_SHRIEKING:
2196 	case '.':
2197 	    if (howlen == e_star) from = start;
2198 	    else if (len == 0) from = cur;
2199 	    else {
2200 		tempsym_t *group = symptr;
2201 
2202 		while (--len && group) group = group->previous;
2203 		from = group ? start + group->strbeg : start;
2204 	    }
2205 	    fromstr = NEXTFROM;
2206 	    len = SvIV_no_inf(fromstr, datumtype);
2207 	    goto resize;
2208 	case '@' | TYPE_IS_SHRIEKING:
2209 	case '@':
2210 	    from = start + symptr->strbeg;
2211 	  resize:
2212 	    if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
2213 		if (len >= 0) {
2214 		    while (len && from < cur) {
2215 			from += UTF8SKIP(from);
2216 			len--;
2217 		    }
2218 		    if (from > cur)
2219 			Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2220 		    if (len) {
2221 			/* Here we know from == cur */
2222 		      grow:
2223 			GROWING(0, cat, start, cur, len);
2224 			Zero(cur, len, char);
2225 			cur += len;
2226 		    } else if (from < cur) {
2227 			len = cur - from;
2228 			goto shrink;
2229 		    } else goto no_change;
2230 		} else {
2231 		    cur = from;
2232 		    len = -len;
2233 		    goto utf8_shrink;
2234 		}
2235 	    else {
2236 		len -= cur - from;
2237 		if (len > 0) goto grow;
2238 		if (len == 0) goto no_change;
2239 		len = -len;
2240 		goto shrink;
2241 	    }
2242 	    break;
2243 
2244 	case '(': {
2245             tempsym_t savsym = *symptr;
2246 	    U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2247 	    symptr->flags |= group_modifiers;
2248             symptr->patend = savsym.grpend;
2249             symptr->level++;
2250 	    symptr->previous = &lookahead;
2251 	    while (len--) {
2252 		U32 was_utf8;
2253 		if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
2254 		else      symptr->flags &= ~FLAG_PARSE_UTF8;
2255 		was_utf8 = SvUTF8(cat);
2256   	        symptr->patptr = savsym.grpbeg;
2257 		beglist = pack_rec(cat, symptr, beglist, endlist);
2258 		if (SvUTF8(cat) != was_utf8)
2259 		    /* This had better be an upgrade while in utf8==0 mode */
2260 		    utf8 = 1;
2261 
2262 		if (savsym.howlen == e_star && beglist == endlist)
2263 		    break;		/* No way to continue */
2264 	    }
2265 	    items = endlist - beglist;
2266 	    lookahead.flags  = symptr->flags & ~group_modifiers;
2267 	    goto no_change;
2268 	}
2269 	case 'X' | TYPE_IS_SHRIEKING:
2270 	    if (!len)			/* Avoid division by 0 */
2271 		len = 1;
2272 	    if (utf8) {
2273 		char *hop, *last;
2274 		SSize_t l = len;
2275 		hop = last = start;
2276 		while (hop < cur) {
2277 		    hop += UTF8SKIP(hop);
2278 		    if (--l == 0) {
2279 			last = hop;
2280 			l = len;
2281 		    }
2282 		}
2283 		if (last > cur)
2284 		    Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2285 		cur = last;
2286 		break;
2287 	    }
2288 	    len = (cur-start) % len;
2289 	    /* FALLTHROUGH */
2290 	case 'X':
2291 	    if (utf8) {
2292 		if (len < 1) goto no_change;
2293 	      utf8_shrink:
2294 		while (len > 0) {
2295 		    if (cur <= start)
2296 			Perl_croak(aTHX_ "'%c' outside of string in pack",
2297 				   (int) TYPE_NO_MODIFIERS(datumtype));
2298 		    while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2299 			if (cur <= start)
2300 			    Perl_croak(aTHX_ "'%c' outside of string in pack",
2301 				       (int) TYPE_NO_MODIFIERS(datumtype));
2302 		    }
2303 		    len--;
2304 		}
2305 	    } else {
2306 	      shrink:
2307 		if (cur - start < len)
2308 		    Perl_croak(aTHX_ "'%c' outside of string in pack",
2309 			       (int) TYPE_NO_MODIFIERS(datumtype));
2310 		cur -= len;
2311 	    }
2312 	    if (cur < start+symptr->strbeg) {
2313 		/* Make sure group starts don't point into the void */
2314 		tempsym_t *group;
2315 		const STRLEN length = cur-start;
2316 		for (group = symptr;
2317 		     group && length < group->strbeg;
2318 		     group = group->previous) group->strbeg = length;
2319 		lookahead.strbeg = length;
2320 	    }
2321 	    break;
2322 	case 'x' | TYPE_IS_SHRIEKING: {
2323 	    SSize_t ai32;
2324 	    if (!len)			/* Avoid division by 0 */
2325 		len = 1;
2326 	    if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2327 	    else      ai32 = (cur - start) % len;
2328 	    if (ai32 == 0) goto no_change;
2329 	    len -= ai32;
2330 	}
2331 	/* FALLTHROUGH */
2332 	case 'x':
2333 	    goto grow;
2334 	case 'A':
2335 	case 'Z':
2336 	case 'a': {
2337 	    const char *aptr;
2338 
2339 	    fromstr = NEXTFROM;
2340 	    aptr = SvPV_const(fromstr, fromlen);
2341 	    if (DO_UTF8(fromstr)) {
2342                 const char *end, *s;
2343 
2344 		if (!utf8 && !SvUTF8(cat)) {
2345 		    marked_upgrade(aTHX_ cat, symptr);
2346 		    lookahead.flags |= FLAG_DO_UTF8;
2347 		    lookahead.strbeg = symptr->strbeg;
2348 		    utf8 = 1;
2349 		    start = SvPVX(cat);
2350 		    cur = start + SvCUR(cat);
2351 		}
2352 		if (howlen == e_star) {
2353 		    if (utf8) goto string_copy;
2354 		    len = fromlen+1;
2355 		}
2356 		s = aptr;
2357 		end = aptr + fromlen;
2358 		fromlen = datumtype == 'Z' ? len-1 : len;
2359 		while ((SSize_t) fromlen > 0 && s < end) {
2360 		    s += UTF8SKIP(s);
2361 		    fromlen--;
2362 		}
2363 		if (s > end)
2364 		    Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2365 		if (utf8) {
2366 		    len = fromlen;
2367 		    if (datumtype == 'Z') len++;
2368 		    fromlen = s-aptr;
2369 		    len += fromlen;
2370 
2371 		    goto string_copy;
2372 		}
2373 		fromlen = len - fromlen;
2374 		if (datumtype == 'Z') fromlen--;
2375 		if (howlen == e_star) {
2376 		    len = fromlen;
2377 		    if (datumtype == 'Z') len++;
2378 		}
2379 		GROWING(0, cat, start, cur, len);
2380 		if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2381 				  datumtype | TYPE_IS_PACK))
2382 		    Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2383 			       "for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
2384 			       (int)datumtype, aptr, end, cur, fromlen);
2385 		cur += fromlen;
2386 		len -= fromlen;
2387 	    } else if (utf8) {
2388 		if (howlen == e_star) {
2389 		    len = fromlen;
2390 		    if (datumtype == 'Z') len++;
2391 		}
2392 		if (len <= (SSize_t) fromlen) {
2393 		    fromlen = len;
2394 		    if (datumtype == 'Z' && fromlen > 0) fromlen--;
2395 		}
2396 		/* assumes a byte expands to at most UTF8_EXPAND bytes on
2397 		   upgrade, so:
2398 		   expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2399 		GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2400 		len -= fromlen;
2401 		while (fromlen > 0) {
2402 		    cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2403 		    aptr++;
2404 		    fromlen--;
2405 		}
2406 	    } else {
2407 	      string_copy:
2408 		if (howlen == e_star) {
2409 		    len = fromlen;
2410 		    if (datumtype == 'Z') len++;
2411 		}
2412 		if (len <= (SSize_t) fromlen) {
2413 		    fromlen = len;
2414 		    if (datumtype == 'Z' && fromlen > 0) fromlen--;
2415 		}
2416 		GROWING(0, cat, start, cur, len);
2417 		Copy(aptr, cur, fromlen, char);
2418 		cur += fromlen;
2419 		len -= fromlen;
2420 	    }
2421 	    memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2422 	    cur += len;
2423 	    SvTAINT(cat);
2424 	    break;
2425 	}
2426 	case 'B':
2427 	case 'b': {
2428 	    const char *str, *end;
2429 	    SSize_t l, field_len;
2430 	    U8 bits;
2431 	    bool utf8_source;
2432 	    U32 utf8_flags;
2433 
2434 	    fromstr = NEXTFROM;
2435 	    str = SvPV_const(fromstr, fromlen);
2436 	    end = str + fromlen;
2437 	    if (DO_UTF8(fromstr)) {
2438 		utf8_source = TRUE;
2439 		utf8_flags  = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2440 	    } else {
2441 		utf8_source = FALSE;
2442 		utf8_flags  = 0; /* Unused, but keep compilers happy */
2443 	    }
2444 	    if (howlen == e_star) len = fromlen;
2445 	    field_len = (len+7)/8;
2446 	    GROWING(utf8, cat, start, cur, field_len);
2447 	    if (len > (SSize_t)fromlen) len = fromlen;
2448 	    bits = 0;
2449 	    l = 0;
2450 	    if (datumtype == 'B')
2451 		while (l++ < len) {
2452 		    if (utf8_source) {
2453 			UV val = 0;
2454 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2455 			bits |= val & 1;
2456 		    } else bits |= *str++ & 1;
2457 		    if (l & 7) bits <<= 1;
2458 		    else {
2459 			PUSH_BYTE(utf8, cur, bits);
2460 			bits = 0;
2461 		    }
2462 		}
2463 	    else
2464 		/* datumtype == 'b' */
2465 		while (l++ < len) {
2466 		    if (utf8_source) {
2467 			UV val = 0;
2468 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2469 			if (val & 1) bits |= 0x80;
2470 		    } else if (*str++ & 1)
2471 			bits |= 0x80;
2472 		    if (l & 7) bits >>= 1;
2473 		    else {
2474 			PUSH_BYTE(utf8, cur, bits);
2475 			bits = 0;
2476 		    }
2477 		}
2478 	    l--;
2479 	    if (l & 7) {
2480 		if (datumtype == 'B')
2481 		    bits <<= 7 - (l & 7);
2482 		else
2483 		    bits >>= 7 - (l & 7);
2484 		PUSH_BYTE(utf8, cur, bits);
2485 		l += 7;
2486 	    }
2487 	    /* Determine how many chars are left in the requested field */
2488 	    l /= 8;
2489 	    if (howlen == e_star) field_len = 0;
2490 	    else field_len -= l;
2491 	    Zero(cur, field_len, char);
2492 	    cur += field_len;
2493 	    break;
2494 	}
2495 	case 'H':
2496 	case 'h': {
2497 	    const char *str, *end;
2498 	    SSize_t l, field_len;
2499 	    U8 bits;
2500 	    bool utf8_source;
2501 	    U32 utf8_flags;
2502 
2503 	    fromstr = NEXTFROM;
2504 	    str = SvPV_const(fromstr, fromlen);
2505 	    end = str + fromlen;
2506 	    if (DO_UTF8(fromstr)) {
2507 		utf8_source = TRUE;
2508 		utf8_flags  = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2509 	    } else {
2510 		utf8_source = FALSE;
2511 		utf8_flags  = 0; /* Unused, but keep compilers happy */
2512 	    }
2513 	    if (howlen == e_star) len = fromlen;
2514 	    field_len = (len+1)/2;
2515 	    GROWING(utf8, cat, start, cur, field_len);
2516 	    if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
2517 	    bits = 0;
2518 	    l = 0;
2519 	    if (datumtype == 'H')
2520 		while (l++ < len) {
2521 		    if (utf8_source) {
2522 			UV val = 0;
2523 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2524 			if (val < 256 && isALPHA(val))
2525 			    bits |= (val + 9) & 0xf;
2526 			else
2527 			    bits |= val & 0xf;
2528 		    } else if (isALPHA(*str))
2529 			bits |= (*str++ + 9) & 0xf;
2530 		    else
2531 			bits |= *str++ & 0xf;
2532 		    if (l & 1) bits <<= 4;
2533 		    else {
2534 			PUSH_BYTE(utf8, cur, bits);
2535 			bits = 0;
2536 		    }
2537 		}
2538 	    else
2539 		while (l++ < len) {
2540 		    if (utf8_source) {
2541 			UV val = 0;
2542 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2543 			if (val < 256 && isALPHA(val))
2544 			    bits |= ((val + 9) & 0xf) << 4;
2545 			else
2546 			    bits |= (val & 0xf) << 4;
2547 		    } else if (isALPHA(*str))
2548 			bits |= ((*str++ + 9) & 0xf) << 4;
2549 		    else
2550 			bits |= (*str++ & 0xf) << 4;
2551 		    if (l & 1) bits >>= 4;
2552 		    else {
2553 			PUSH_BYTE(utf8, cur, bits);
2554 			bits = 0;
2555 		    }
2556 		}
2557 	    l--;
2558 	    if (l & 1) {
2559 		PUSH_BYTE(utf8, cur, bits);
2560 		l++;
2561 	    }
2562 	    /* Determine how many chars are left in the requested field */
2563 	    l /= 2;
2564 	    if (howlen == e_star) field_len = 0;
2565 	    else field_len -= l;
2566 	    Zero(cur, field_len, char);
2567 	    cur += field_len;
2568 	    break;
2569 	}
2570 	case 'c':
2571 	    while (len-- > 0) {
2572 		IV aiv;
2573 		fromstr = NEXTFROM;
2574                 aiv = SvIV_no_inf(fromstr, datumtype);
2575 		if ((-128 > aiv || aiv > 127))
2576 		    Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2577 				   "Character in 'c' format wrapped in pack");
2578 		PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2579 	    }
2580 	    break;
2581 	case 'C':
2582 	    if (len == 0) {
2583 		utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2584 		break;
2585 	    }
2586 	    while (len-- > 0) {
2587 		IV aiv;
2588 		fromstr = NEXTFROM;
2589                 aiv = SvIV_no_inf(fromstr, datumtype);
2590 		if ((0 > aiv || aiv > 0xff))
2591 		    Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2592 				   "Character in 'C' format wrapped in pack");
2593 		PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2594 	    }
2595 	    break;
2596 	case 'W': {
2597 	    char *end;
2598 	    U8 in_bytes = (U8)IN_BYTES;
2599 
2600 	    end = start+SvLEN(cat)-1;
2601 	    if (utf8) end -= UTF8_MAXLEN-1;
2602 	    while (len-- > 0) {
2603 		UV auv;
2604 		fromstr = NEXTFROM;
2605 		auv = SvUV_no_inf(fromstr, datumtype);
2606 		if (in_bytes) auv = auv % 0x100;
2607 		if (utf8) {
2608 		  W_utf8:
2609 		    if (cur >= end) {
2610 			*cur = '\0';
2611 			SvCUR_set(cat, cur - start);
2612 
2613 			GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2614 			end = start+SvLEN(cat)-UTF8_MAXLEN;
2615 		    }
2616 		    cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
2617 		} else {
2618 		    if (auv >= 0x100) {
2619 			if (!SvUTF8(cat)) {
2620 			    *cur = '\0';
2621 			    SvCUR_set(cat, cur - start);
2622 			    marked_upgrade(aTHX_ cat, symptr);
2623 			    lookahead.flags |= FLAG_DO_UTF8;
2624 			    lookahead.strbeg = symptr->strbeg;
2625 			    utf8 = 1;
2626 			    start = SvPVX(cat);
2627 			    cur = start + SvCUR(cat);
2628 			    end = start+SvLEN(cat)-UTF8_MAXLEN;
2629 			    goto W_utf8;
2630 			}
2631 			Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2632 				       "Character in 'W' format wrapped in pack");
2633 			auv &= 0xff;
2634 		    }
2635 		    if (cur >= end) {
2636 			*cur = '\0';
2637 			SvCUR_set(cat, cur - start);
2638 			GROWING(0, cat, start, cur, len+1);
2639 			end = start+SvLEN(cat)-1;
2640 		    }
2641 		    *(U8 *) cur++ = (U8)auv;
2642 		}
2643 	    }
2644 	    break;
2645 	}
2646 	case 'U': {
2647 	    char *end;
2648 
2649 	    if (len == 0) {
2650 		if (!(symptr->flags & FLAG_DO_UTF8)) {
2651 		    marked_upgrade(aTHX_ cat, symptr);
2652 		    lookahead.flags |= FLAG_DO_UTF8;
2653 		    lookahead.strbeg = symptr->strbeg;
2654 		}
2655 		utf8 = 0;
2656 		goto no_change;
2657 	    }
2658 
2659 	    end = start+SvLEN(cat);
2660 	    if (!utf8) end -= UTF8_MAXLEN;
2661 	    while (len-- > 0) {
2662 		UV auv;
2663 		fromstr = NEXTFROM;
2664 		auv = SvUV_no_inf(fromstr, datumtype);
2665 		if (utf8) {
2666 		    U8 buffer[UTF8_MAXLEN+1], *endb;
2667 		    endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
2668 		    if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2669 			*cur = '\0';
2670 			SvCUR_set(cat, cur - start);
2671 			GROWING(0, cat, start, cur,
2672 				len+(endb-buffer)*UTF8_EXPAND);
2673 			end = start+SvLEN(cat);
2674 		    }
2675                     cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
2676 		} else {
2677 		    if (cur >= end) {
2678 			*cur = '\0';
2679 			SvCUR_set(cat, cur - start);
2680 			GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2681 			end = start+SvLEN(cat)-UTF8_MAXLEN;
2682 		    }
2683 		    cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
2684                                                        UNI_TO_NATIVE(auv),
2685 						       0);
2686 		}
2687 	    }
2688 	    break;
2689 	}
2690 	/* Float and double added by gnb@melba.bby.oz.au  22/11/89 */
2691 	case 'f':
2692 	    while (len-- > 0) {
2693 		float afloat;
2694 		NV anv;
2695 		fromstr = NEXTFROM;
2696 		anv = SvNV(fromstr);
2697 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2698 		/* IEEE fp overflow shenanigans are unavailable on VAX and optional
2699 		 * on Alpha; fake it if we don't have them.
2700 		 */
2701 		if (anv > FLT_MAX)
2702 		    afloat = FLT_MAX;
2703 		else if (anv < -FLT_MAX)
2704 		    afloat = -FLT_MAX;
2705 		else afloat = (float)anv;
2706 # else
2707 #  if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2708 		if(Perl_isnan(anv))
2709 		    afloat = (float)NV_NAN;
2710 		else
2711 #  endif
2712 #  ifdef NV_INF
2713                 /* a simple cast to float is undefined if outside
2714                  * the range of values that can be represented */
2715 		afloat = (float)(anv >  FLT_MAX ?  NV_INF :
2716                                  anv < -FLT_MAX ? -NV_INF : anv);
2717 #  endif
2718 # endif
2719                 PUSH_VAR(utf8, cur, afloat, needs_swap);
2720 	    }
2721 	    break;
2722 	case 'd':
2723 	    while (len-- > 0) {
2724 		double adouble;
2725 		NV anv;
2726 		fromstr = NEXTFROM;
2727 		anv = SvNV(fromstr);
2728 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2729 		/* IEEE fp overflow shenanigans are unavailable on VAX and optional
2730 		 * on Alpha; fake it if we don't have them.
2731 		 */
2732 		if (anv > DBL_MAX)
2733 		    adouble = DBL_MAX;
2734 		else if (anv < -DBL_MAX)
2735 		    adouble = -DBL_MAX;
2736 		else adouble = (double)anv;
2737 # else
2738 		adouble = (double)anv;
2739 # endif
2740                 PUSH_VAR(utf8, cur, adouble, needs_swap);
2741 	    }
2742 	    break;
2743 	case 'F': {
2744 	    NV_bytes anv;
2745 	    Zero(&anv, 1, NV); /* can be long double with unused bits */
2746 	    while (len-- > 0) {
2747 		fromstr = NEXTFROM;
2748 #ifdef __GNUC__
2749 		/* to work round a gcc/x86 bug; don't use SvNV */
2750 		anv.nv = sv_2nv(fromstr);
2751 #    if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2752          && LONG_DOUBLESIZE > 10
2753                 /* GCC sometimes overwrites the padding in the
2754                    assignment above */
2755                 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2756 #    endif
2757 #else
2758 		anv.nv = SvNV(fromstr);
2759 #endif
2760                 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2761 	    }
2762 	    break;
2763 	}
2764 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2765 	case 'D': {
2766 	    ld_bytes aldouble;
2767 	    /* long doubles can have unused bits, which may be nonzero */
2768 	    Zero(&aldouble, 1, long double);
2769 	    while (len-- > 0) {
2770 		fromstr = NEXTFROM;
2771 #  ifdef __GNUC__
2772 		/* to work round a gcc/x86 bug; don't use SvNV */
2773 		aldouble.ld = (long double)sv_2nv(fromstr);
2774 #    if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2775                 /* GCC sometimes overwrites the padding in the
2776                    assignment above */
2777                 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2778 #    endif
2779 #  else
2780 		aldouble.ld = (long double)SvNV(fromstr);
2781 #  endif
2782                 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2783                            needs_swap);
2784 	    }
2785 	    break;
2786 	}
2787 #endif
2788 	case 'n' | TYPE_IS_SHRIEKING:
2789 	case 'n':
2790 	    while (len-- > 0) {
2791 		I16 ai16;
2792 		fromstr = NEXTFROM;
2793 		ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2794 		ai16 = PerlSock_htons(ai16);
2795                 PUSH16(utf8, cur, &ai16, FALSE);
2796 	    }
2797 	    break;
2798 	case 'v' | TYPE_IS_SHRIEKING:
2799 	case 'v':
2800 	    while (len-- > 0) {
2801 		I16 ai16;
2802 		fromstr = NEXTFROM;
2803 		ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2804 		ai16 = htovs(ai16);
2805                 PUSH16(utf8, cur, &ai16, FALSE);
2806 	    }
2807 	    break;
2808         case 'S' | TYPE_IS_SHRIEKING:
2809 #if SHORTSIZE != SIZE16
2810 	    while (len-- > 0) {
2811 		unsigned short aushort;
2812 		fromstr = NEXTFROM;
2813 		aushort = SvUV_no_inf(fromstr, datumtype);
2814                 PUSH_VAR(utf8, cur, aushort, needs_swap);
2815 	    }
2816             break;
2817 #else
2818             /* FALLTHROUGH */
2819 #endif
2820 	case 'S':
2821 	    while (len-- > 0) {
2822 		U16 au16;
2823 		fromstr = NEXTFROM;
2824 		au16 = (U16)SvUV_no_inf(fromstr, datumtype);
2825                 PUSH16(utf8, cur, &au16, needs_swap);
2826 	    }
2827 	    break;
2828 	case 's' | TYPE_IS_SHRIEKING:
2829 #if SHORTSIZE != SIZE16
2830 	    while (len-- > 0) {
2831 		short ashort;
2832 		fromstr = NEXTFROM;
2833 		ashort = SvIV_no_inf(fromstr, datumtype);
2834                 PUSH_VAR(utf8, cur, ashort, needs_swap);
2835 	    }
2836             break;
2837 #else
2838             /* FALLTHROUGH */
2839 #endif
2840 	case 's':
2841 	    while (len-- > 0) {
2842 		I16 ai16;
2843 		fromstr = NEXTFROM;
2844 		ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2845                 PUSH16(utf8, cur, &ai16, needs_swap);
2846 	    }
2847 	    break;
2848 	case 'I':
2849 	case 'I' | TYPE_IS_SHRIEKING:
2850 	    while (len-- > 0) {
2851 		unsigned int auint;
2852 		fromstr = NEXTFROM;
2853 		auint = SvUV_no_inf(fromstr, datumtype);
2854                 PUSH_VAR(utf8, cur, auint, needs_swap);
2855 	    }
2856 	    break;
2857 	case 'j':
2858 	    while (len-- > 0) {
2859 		IV aiv;
2860 		fromstr = NEXTFROM;
2861 		aiv = SvIV_no_inf(fromstr, datumtype);
2862                 PUSH_VAR(utf8, cur, aiv, needs_swap);
2863 	    }
2864 	    break;
2865 	case 'J':
2866 	    while (len-- > 0) {
2867 		UV auv;
2868 		fromstr = NEXTFROM;
2869 		auv = SvUV_no_inf(fromstr, datumtype);
2870                 PUSH_VAR(utf8, cur, auv, needs_swap);
2871 	    }
2872 	    break;
2873 	case 'w':
2874             while (len-- > 0) {
2875 		NV anv;
2876 		fromstr = NEXTFROM;
2877 		S_sv_check_infnan(aTHX_ fromstr, datumtype);
2878 		anv = SvNV_nomg(fromstr);
2879 
2880 		if (anv < 0) {
2881 		    *cur = '\0';
2882 		    SvCUR_set(cat, cur - start);
2883 		    Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2884 		}
2885 
2886                 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2887                    which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2888                    any negative IVs will have already been got by the croak()
2889                    above. IOK is untrue for fractions, so we test them
2890                    against UV_MAX_P1.  */
2891 		if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2892 		    char   buf[(sizeof(UV)*CHAR_BIT)/7+1];
2893 		    char  *in = buf + sizeof(buf);
2894 		    UV     auv = SvUV_nomg(fromstr);
2895 
2896 		    do {
2897 			*--in = (char)((auv & 0x7f) | 0x80);
2898 			auv >>= 7;
2899 		    } while (auv);
2900 		    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2901 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
2902 				       in, (buf + sizeof(buf)) - in);
2903 		} else if (SvPOKp(fromstr))
2904 		    goto w_string;
2905 		else if (SvNOKp(fromstr)) {
2906 		    /* 10**NV_MAX_10_EXP is the largest power of 10
2907 		       so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2908 		       given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2909 		       x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2910 		       And with that many bytes only Inf can overflow.
2911 		       Some C compilers are strict about integral constant
2912 		       expressions so we conservatively divide by a slightly
2913 		       smaller integer instead of multiplying by the exact
2914 		       floating-point value.
2915 		    */
2916 #ifdef NV_MAX_10_EXP
2917 		    /* char   buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2918 		    char   buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2919 #else
2920 		    /* char   buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2921 		    char   buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2922 #endif
2923 		    char  *in = buf + sizeof(buf);
2924 
2925 		    anv = Perl_floor(anv);
2926 		    do {
2927 			const NV next = Perl_floor(anv / 128);
2928 			if (in <= buf)  /* this cannot happen ;-) */
2929 			    Perl_croak(aTHX_ "Cannot compress integer in pack");
2930 			*--in = (unsigned char)(anv - (next * 128)) | 0x80;
2931 			anv = next;
2932 		    } while (anv > 0);
2933 		    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2934 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
2935 				       in, (buf + sizeof(buf)) - in);
2936 		} else {
2937 		    const char     *from;
2938 		    char           *result, *in;
2939 		    SV             *norm;
2940 		    STRLEN          len;
2941 		    bool            done;
2942 
2943 		  w_string:
2944 		    /* Copy string and check for compliance */
2945 		    from = SvPV_nomg_const(fromstr, len);
2946 		    if ((norm = is_an_int(from, len)) == NULL)
2947 			Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2948 
2949 		    Newx(result, len, char);
2950 		    in = result + len;
2951 		    done = FALSE;
2952 		    while (!done) *--in = div128(norm, &done) | 0x80;
2953 		    result[len - 1] &= 0x7F; /* clear continue bit */
2954 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
2955 				       in, (result + len) - in);
2956 		    Safefree(result);
2957 		    SvREFCNT_dec(norm);	/* free norm */
2958 		}
2959 	    }
2960             break;
2961 	case 'i':
2962 	case 'i' | TYPE_IS_SHRIEKING:
2963 	    while (len-- > 0) {
2964 		int aint;
2965 		fromstr = NEXTFROM;
2966 		aint = SvIV_no_inf(fromstr, datumtype);
2967                 PUSH_VAR(utf8, cur, aint, needs_swap);
2968 	    }
2969 	    break;
2970 	case 'N' | TYPE_IS_SHRIEKING:
2971 	case 'N':
2972 	    while (len-- > 0) {
2973 		U32 au32;
2974 		fromstr = NEXTFROM;
2975 		au32 = SvUV_no_inf(fromstr, datumtype);
2976 		au32 = PerlSock_htonl(au32);
2977                 PUSH32(utf8, cur, &au32, FALSE);
2978 	    }
2979 	    break;
2980 	case 'V' | TYPE_IS_SHRIEKING:
2981 	case 'V':
2982 	    while (len-- > 0) {
2983 		U32 au32;
2984 		fromstr = NEXTFROM;
2985 		au32 = SvUV_no_inf(fromstr, datumtype);
2986 		au32 = htovl(au32);
2987                 PUSH32(utf8, cur, &au32, FALSE);
2988 	    }
2989 	    break;
2990 	case 'L' | TYPE_IS_SHRIEKING:
2991 #if LONGSIZE != SIZE32
2992 	    while (len-- > 0) {
2993 		unsigned long aulong;
2994 		fromstr = NEXTFROM;
2995 		aulong = SvUV_no_inf(fromstr, datumtype);
2996                 PUSH_VAR(utf8, cur, aulong, needs_swap);
2997 	    }
2998 	    break;
2999 #else
3000             /* Fall though! */
3001 #endif
3002 	case 'L':
3003 	    while (len-- > 0) {
3004 		U32 au32;
3005 		fromstr = NEXTFROM;
3006 		au32 = SvUV_no_inf(fromstr, datumtype);
3007                 PUSH32(utf8, cur, &au32, needs_swap);
3008 	    }
3009 	    break;
3010 	case 'l' | TYPE_IS_SHRIEKING:
3011 #if LONGSIZE != SIZE32
3012 	    while (len-- > 0) {
3013 		long along;
3014 		fromstr = NEXTFROM;
3015 		along = SvIV_no_inf(fromstr, datumtype);
3016                 PUSH_VAR(utf8, cur, along, needs_swap);
3017 	    }
3018 	    break;
3019 #else
3020             /* Fall though! */
3021 #endif
3022 	case 'l':
3023             while (len-- > 0) {
3024 		I32 ai32;
3025 		fromstr = NEXTFROM;
3026 		ai32 = SvIV_no_inf(fromstr, datumtype);
3027                 PUSH32(utf8, cur, &ai32, needs_swap);
3028 	    }
3029 	    break;
3030 #if defined(HAS_QUAD) && IVSIZE >= 8
3031 	case 'Q':
3032 	    while (len-- > 0) {
3033 		Uquad_t auquad;
3034 		fromstr = NEXTFROM;
3035 		auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3036                 PUSH_VAR(utf8, cur, auquad, needs_swap);
3037 	    }
3038 	    break;
3039 	case 'q':
3040 	    while (len-- > 0) {
3041 		Quad_t aquad;
3042 		fromstr = NEXTFROM;
3043 		aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3044                 PUSH_VAR(utf8, cur, aquad, needs_swap);
3045 	    }
3046 	    break;
3047 #endif
3048 	case 'P':
3049 	    len = 1;		/* assume SV is correct length */
3050 	    GROWING(utf8, cat, start, cur, sizeof(char *));
3051 	    /* FALLTHROUGH */
3052 	case 'p':
3053 	    while (len-- > 0) {
3054 		const char *aptr;
3055 
3056 		fromstr = NEXTFROM;
3057 		SvGETMAGIC(fromstr);
3058 		if (!SvOK(fromstr)) aptr = NULL;
3059 		else {
3060 		    /* XXX better yet, could spirit away the string to
3061 		     * a safe spot and hang on to it until the result
3062 		     * of pack() (and all copies of the result) are
3063 		     * gone.
3064 		     */
3065 		    if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3066 			 || (SvPADTMP(fromstr) &&
3067 			     !SvREADONLY(fromstr)))) {
3068 			Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3069 				       "Attempt to pack pointer to temporary value");
3070 		    }
3071 		    if (SvPOK(fromstr) || SvNIOK(fromstr))
3072 			aptr = SvPV_nomg_const_nolen(fromstr);
3073 		    else
3074 			aptr = SvPV_force_flags_nolen(fromstr, 0);
3075 		}
3076                 PUSH_VAR(utf8, cur, aptr, needs_swap);
3077 	    }
3078 	    break;
3079 	case 'u': {
3080 	    const char *aptr, *aend;
3081 	    bool from_utf8;
3082 
3083 	    fromstr = NEXTFROM;
3084 	    if (len <= 2) len = 45;
3085 	    else len = len / 3 * 3;
3086 	    if (len >= 64) {
3087 		Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3088 			       "Field too wide in 'u' format in pack");
3089 		len = 63;
3090 	    }
3091 	    aptr = SvPV_const(fromstr, fromlen);
3092 	    from_utf8 = DO_UTF8(fromstr);
3093 	    if (from_utf8) {
3094 		aend = aptr + fromlen;
3095 		fromlen = sv_len_utf8_nomg(fromstr);
3096 	    } else aend = NULL; /* Unused, but keep compilers happy */
3097 	    GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3098 	    while (fromlen > 0) {
3099 		U8 *end;
3100 		SSize_t todo;
3101 		U8 hunk[1+63/3*4+1];
3102 
3103 		if ((SSize_t)fromlen > len)
3104 		    todo = len;
3105 		else
3106 		    todo = fromlen;
3107 		if (from_utf8) {
3108 		    char buffer[64];
3109 		    if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3110 				      'u' | TYPE_IS_PACK)) {
3111 			*cur = '\0';
3112 			SvCUR_set(cat, cur - start);
3113 			Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3114 				   "aptr=%p, aend=%p, buffer=%p, todo=%zd",
3115 				   aptr, aend, buffer, todo);
3116 		    }
3117 		    end = doencodes(hunk, (const U8 *)buffer, todo);
3118 		} else {
3119 		    end = doencodes(hunk, (const U8 *)aptr, todo);
3120 		    aptr += todo;
3121 		}
3122 		PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3123 		fromlen -= todo;
3124 	    }
3125 	    break;
3126 	}
3127 	}
3128 	*cur = '\0';
3129 	SvCUR_set(cat, cur - start);
3130       no_change:
3131 	*symptr = lookahead;
3132     }
3133     return beglist;
3134 }
3135 #undef NEXTFROM
3136 
3137 
3138 PP(pp_pack)
3139 {
3140     dSP; dMARK; dORIGMARK; dTARGET;
3141     SV *cat = TARG;
3142     STRLEN fromlen;
3143     SV *pat_sv = *++MARK;
3144     const char *pat = SvPV_const(pat_sv, fromlen);
3145     const char *patend = pat + fromlen;
3146 
3147     MARK++;
3148     SvPVCLEAR(cat);
3149     SvUTF8_off(cat);
3150 
3151     packlist(cat, pat, patend, MARK, SP + 1);
3152 
3153     if (SvUTF8(cat)) {
3154         STRLEN result_len;
3155         const char * result = SvPV_nomg(cat, result_len);
3156         const U8 * error_pos;
3157 
3158         if (! is_utf8_string_loc((U8 *) result, result_len, &error_pos)) {
3159             _force_out_malformed_utf8_message(error_pos,
3160                                               (U8 *) result + result_len,
3161                                               0, /* no flags */
3162                                               1 /* Die */
3163                                             );
3164             NOT_REACHED; /* NOTREACHED */
3165         }
3166     }
3167 
3168     SvSETMAGIC(cat);
3169     SP = ORIGMARK;
3170     PUSHs(cat);
3171     RETURN;
3172 }
3173 
3174 /*
3175  * ex: set ts=8 sts=4 sw=4 et:
3176  */
3177