xref: /openbsd-src/gnu/usr.bin/perl/doop.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*    doop.c
2  *
3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4  *    2001, 2002, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  *  'So that was the job I felt I had to do when I started,' thought Sam.
13  *
14  *     [p.934 of _The Lord of the Rings_, VI/iii: "Mount Doom"]
15  */
16 
17 /* This file contains some common functions needed to carry out certain
18  * ops. For example both pp_schomp() and pp_chomp() - scalar and array
19  * chomp operations - call the function do_chomp() found in this file.
20  */
21 
22 #include "EXTERN.h"
23 #define PERL_IN_DOOP_C
24 #include "perl.h"
25 
26 #ifndef PERL_MICRO
27 #include <signal.h>
28 #endif
29 
30 STATIC I32
31 S_do_trans_simple(pTHX_ SV * const sv)
32 {
33     dVAR;
34     I32 matches = 0;
35     STRLEN len;
36     U8 *s = (U8*)SvPV_nomg(sv,len);
37     U8 * const send = s+len;
38     const short * const tbl = (short*)cPVOP->op_pv;
39 
40     PERL_ARGS_ASSERT_DO_TRANS_SIMPLE;
41 
42     if (!tbl)
43 	Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__);
44 
45     /* First, take care of non-UTF-8 input strings, because they're easy */
46     if (!SvUTF8(sv)) {
47 	while (s < send) {
48 	    const I32 ch = tbl[*s];
49 	    if (ch >= 0) {
50 		matches++;
51 		*s = (U8)ch;
52 	    }
53 	    s++;
54 	}
55 	SvSETMAGIC(sv);
56     }
57     else {
58 	const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
59 	U8 *d;
60 	U8 *dstart;
61 
62 	/* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */
63 	if (grows)
64 	    Newx(d, len*2+1, U8);
65 	else
66 	    d = s;
67 	dstart = d;
68 	while (s < send) {
69 	    STRLEN ulen;
70 	    I32 ch;
71 
72 	    /* Need to check this, otherwise 128..255 won't match */
73 	    const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT);
74 	    if (c < 0x100 && (ch = tbl[c]) >= 0) {
75 		matches++;
76 		d = uvchr_to_utf8(d, ch);
77 		s += ulen;
78 	    }
79 	    else { /* No match -> copy */
80 		Move(s, d, ulen, U8);
81 		d += ulen;
82 		s += ulen;
83 	    }
84 	}
85 	if (grows) {
86 	    sv_setpvn(sv, (char*)dstart, d - dstart);
87 	    Safefree(dstart);
88 	}
89 	else {
90 	    *d = '\0';
91 	    SvCUR_set(sv, d - dstart);
92 	}
93 	SvUTF8_on(sv);
94 	SvSETMAGIC(sv);
95     }
96     return matches;
97 }
98 
99 STATIC I32
100 S_do_trans_count(pTHX_ SV * const sv)
101 {
102     dVAR;
103     STRLEN len;
104     const U8 *s = (const U8*)SvPV_nomg_const(sv, len);
105     const U8 * const send = s + len;
106     I32 matches = 0;
107     const short * const tbl = (short*)cPVOP->op_pv;
108 
109     PERL_ARGS_ASSERT_DO_TRANS_COUNT;
110 
111     if (!tbl)
112 	Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__);
113 
114     if (!SvUTF8(sv)) {
115 	while (s < send) {
116             if (tbl[*s++] >= 0)
117                 matches++;
118 	}
119     }
120     else {
121 	const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
122 	while (s < send) {
123 	    STRLEN ulen;
124 	    const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT);
125 	    if (c < 0x100) {
126 		if (tbl[c] >= 0)
127 		    matches++;
128 	    } else if (complement)
129 		matches++;
130 	    s += ulen;
131 	}
132     }
133 
134     return matches;
135 }
136 
137 STATIC I32
138 S_do_trans_complex(pTHX_ SV * const sv)
139 {
140     dVAR;
141     STRLEN len;
142     U8 *s = (U8*)SvPV_nomg(sv, len);
143     U8 * const send = s+len;
144     I32 matches = 0;
145     const short * const tbl = (short*)cPVOP->op_pv;
146 
147     PERL_ARGS_ASSERT_DO_TRANS_COMPLEX;
148 
149     if (!tbl)
150 	Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__);
151 
152     if (!SvUTF8(sv)) {
153 	U8 *d = s;
154 	U8 * const dstart = d;
155 
156 	if (PL_op->op_private & OPpTRANS_SQUASH) {
157 	    const U8* p = send;
158 	    while (s < send) {
159 		const I32 ch = tbl[*s];
160 		if (ch >= 0) {
161 		    *d = (U8)ch;
162 		    matches++;
163 		    if (p != d - 1 || *p != *d)
164 			p = d++;
165 		}
166 		else if (ch == -1)	/* -1 is unmapped character */
167 		    *d++ = *s;
168 		else if (ch == -2)	/* -2 is delete character */
169 		    matches++;
170 		s++;
171 	    }
172 	}
173 	else {
174 	    while (s < send) {
175 		const I32 ch = tbl[*s];
176 		if (ch >= 0) {
177 		    matches++;
178 		    *d++ = (U8)ch;
179 		}
180 		else if (ch == -1)	/* -1 is unmapped character */
181 		    *d++ = *s;
182 		else if (ch == -2)      /* -2 is delete character */
183 		    matches++;
184 		s++;
185 	    }
186 	}
187 	*d = '\0';
188 	SvCUR_set(sv, d - dstart);
189     }
190     else { /* is utf8 */
191 	const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
192 	const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
193 	const I32 del = PL_op->op_private & OPpTRANS_DELETE;
194 	U8 *d;
195 	U8 *dstart;
196 	STRLEN rlen = 0;
197 
198 	if (grows)
199 	    Newx(d, len*2+1, U8);
200 	else
201 	    d = s;
202 	dstart = d;
203 	if (complement && !del)
204 	    rlen = tbl[0x100];
205 
206 	if (PL_op->op_private & OPpTRANS_SQUASH) {
207 	    UV pch = 0xfeedface;
208 	    while (s < send) {
209 		STRLEN len;
210 		const UV comp = utf8n_to_uvchr(s, send - s, &len,
211 					       UTF8_ALLOW_DEFAULT);
212 		I32 ch;
213 
214 		if (comp > 0xff) {
215 		    if (!complement) {
216 			Move(s, d, len, U8);
217 			d += len;
218 		    }
219 		    else {
220 			matches++;
221 			if (!del) {
222 			    ch = (rlen == 0) ? (I32)comp :
223 				(comp - 0x100 < rlen) ?
224 				tbl[comp+1] : tbl[0x100+rlen];
225 			    if ((UV)ch != pch) {
226 				d = uvchr_to_utf8(d, ch);
227 				pch = (UV)ch;
228 			    }
229 			    s += len;
230 			    continue;
231 			}
232 		    }
233 		}
234 		else if ((ch = tbl[comp]) >= 0) {
235 		    matches++;
236 		    if ((UV)ch != pch) {
237 		        d = uvchr_to_utf8(d, ch);
238 		        pch = (UV)ch;
239 		    }
240 		    s += len;
241 		    continue;
242 		}
243 		else if (ch == -1) {	/* -1 is unmapped character */
244 		    Move(s, d, len, U8);
245 		    d += len;
246 		}
247 		else if (ch == -2)      /* -2 is delete character */
248 		    matches++;
249 		s += len;
250 		pch = 0xfeedface;
251 	    }
252 	}
253 	else {
254 	    while (s < send) {
255 		STRLEN len;
256 		const UV comp = utf8n_to_uvchr(s, send - s, &len,
257 					       UTF8_ALLOW_DEFAULT);
258 		I32 ch;
259 		if (comp > 0xff) {
260 		    if (!complement) {
261 			Move(s, d, len, U8);
262 			d += len;
263 		    }
264 		    else {
265 			matches++;
266 			if (!del) {
267 			    if (comp - 0x100 < rlen)
268 				d = uvchr_to_utf8(d, tbl[comp+1]);
269 			    else
270 				d = uvchr_to_utf8(d, tbl[0x100+rlen]);
271 			}
272 		    }
273 		}
274 		else if ((ch = tbl[comp]) >= 0) {
275 		    d = uvchr_to_utf8(d, ch);
276 		    matches++;
277 		}
278 		else if (ch == -1) {	/* -1 is unmapped character */
279 		    Move(s, d, len, U8);
280 		    d += len;
281 		}
282 		else if (ch == -2)      /* -2 is delete character */
283 		    matches++;
284 		s += len;
285 	    }
286 	}
287 	if (grows) {
288 	    sv_setpvn(sv, (char*)dstart, d - dstart);
289 	    Safefree(dstart);
290 	}
291 	else {
292 	    *d = '\0';
293 	    SvCUR_set(sv, d - dstart);
294 	}
295 	SvUTF8_on(sv);
296     }
297     SvSETMAGIC(sv);
298     return matches;
299 }
300 
301 STATIC I32
302 S_do_trans_simple_utf8(pTHX_ SV * const sv)
303 {
304     dVAR;
305     U8 *s;
306     U8 *send;
307     U8 *d;
308     U8 *start;
309     U8 *dstart, *dend;
310     I32 matches = 0;
311     const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
312     STRLEN len;
313     SV* const  rv =
314 #ifdef USE_ITHREADS
315 		    PAD_SVl(cPADOP->op_padix);
316 #else
317 		    MUTABLE_SV(cSVOP->op_sv);
318 #endif
319     HV* const  hv = MUTABLE_HV(SvRV(rv));
320     SV* const * svp = hv_fetchs(hv, "NONE", FALSE);
321     const UV none = svp ? SvUV(*svp) : 0x7fffffff;
322     const UV extra = none + 1;
323     UV final = 0;
324     U8 hibit = 0;
325 
326     PERL_ARGS_ASSERT_DO_TRANS_SIMPLE_UTF8;
327 
328     s = (U8*)SvPV_nomg(sv, len);
329     if (!SvUTF8(sv)) {
330 	const U8 *t = s;
331 	const U8 * const e = s + len;
332 	while (t < e) {
333 	    const U8 ch = *t++;
334 	    hibit = !NATIVE_IS_INVARIANT(ch);
335 	    if (hibit) {
336 		s = bytes_to_utf8(s, &len);
337 		break;
338 	    }
339 	}
340     }
341     send = s + len;
342     start = s;
343 
344     svp = hv_fetchs(hv, "FINAL", FALSE);
345     if (svp)
346 	final = SvUV(*svp);
347 
348     if (grows) {
349 	/* d needs to be bigger than s, in case e.g. upgrading is required */
350 	Newx(d, len * 3 + UTF8_MAXBYTES, U8);
351 	dend = d + len * 3;
352 	dstart = d;
353     }
354     else {
355 	dstart = d = s;
356 	dend = d + len;
357     }
358 
359     while (s < send) {
360 	const UV uv = swash_fetch(rv, s, TRUE);
361 	if (uv < none) {
362 	    s += UTF8SKIP(s);
363 	    matches++;
364 	    d = uvuni_to_utf8(d, uv);
365 	}
366 	else if (uv == none) {
367 	    const int i = UTF8SKIP(s);
368 	    Move(s, d, i, U8);
369 	    d += i;
370 	    s += i;
371 	}
372 	else if (uv == extra) {
373 	    s += UTF8SKIP(s);
374 	    matches++;
375 	    d = uvuni_to_utf8(d, final);
376 	}
377 	else
378 	    s += UTF8SKIP(s);
379 
380 	if (d > dend) {
381 	    const STRLEN clen = d - dstart;
382 	    const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
383 	    if (!grows)
384 		Perl_croak(aTHX_ "panic: do_trans_simple_utf8 line %d",__LINE__);
385 	    Renew(dstart, nlen + UTF8_MAXBYTES, U8);
386 	    d = dstart + clen;
387 	    dend = dstart + nlen;
388 	}
389     }
390     if (grows || hibit) {
391 	sv_setpvn(sv, (char*)dstart, d - dstart);
392 	Safefree(dstart);
393 	if (grows && hibit)
394 	    Safefree(start);
395     }
396     else {
397 	*d = '\0';
398 	SvCUR_set(sv, d - dstart);
399     }
400     SvSETMAGIC(sv);
401     SvUTF8_on(sv);
402 
403     return matches;
404 }
405 
406 STATIC I32
407 S_do_trans_count_utf8(pTHX_ SV * const sv)
408 {
409     dVAR;
410     const U8 *s;
411     const U8 *start = NULL;
412     const U8 *send;
413     I32 matches = 0;
414     STRLEN len;
415     SV* const  rv =
416 #ifdef USE_ITHREADS
417 		    PAD_SVl(cPADOP->op_padix);
418 #else
419 		    MUTABLE_SV(cSVOP->op_sv);
420 #endif
421     HV* const hv = MUTABLE_HV(SvRV(rv));
422     SV* const * const svp = hv_fetchs(hv, "NONE", FALSE);
423     const UV none = svp ? SvUV(*svp) : 0x7fffffff;
424     const UV extra = none + 1;
425     U8 hibit = 0;
426 
427     PERL_ARGS_ASSERT_DO_TRANS_COUNT_UTF8;
428 
429     s = (const U8*)SvPV_nomg_const(sv, len);
430     if (!SvUTF8(sv)) {
431 	const U8 *t = s;
432 	const U8 * const e = s + len;
433 	while (t < e) {
434 	    const U8 ch = *t++;
435 	    hibit = !NATIVE_IS_INVARIANT(ch);
436 	    if (hibit) {
437 		start = s = bytes_to_utf8(s, &len);
438 		break;
439 	    }
440 	}
441     }
442     send = s + len;
443 
444     while (s < send) {
445 	const UV uv = swash_fetch(rv, s, TRUE);
446 	if (uv < none || uv == extra)
447 	    matches++;
448 	s += UTF8SKIP(s);
449     }
450     if (hibit)
451         Safefree(start);
452 
453     return matches;
454 }
455 
456 STATIC I32
457 S_do_trans_complex_utf8(pTHX_ SV * const sv)
458 {
459     dVAR;
460     U8 *start, *send;
461     U8 *d;
462     I32 matches = 0;
463     const I32 squash   = PL_op->op_private & OPpTRANS_SQUASH;
464     const I32 del      = PL_op->op_private & OPpTRANS_DELETE;
465     const I32 grows    = PL_op->op_private & OPpTRANS_GROWS;
466     SV* const  rv =
467 #ifdef USE_ITHREADS
468 		    PAD_SVl(cPADOP->op_padix);
469 #else
470 		    MUTABLE_SV(cSVOP->op_sv);
471 #endif
472     HV * const hv = MUTABLE_HV(SvRV(rv));
473     SV * const *svp = hv_fetchs(hv, "NONE", FALSE);
474     const UV none = svp ? SvUV(*svp) : 0x7fffffff;
475     const UV extra = none + 1;
476     UV final = 0;
477     bool havefinal = FALSE;
478     STRLEN len;
479     U8 *dstart, *dend;
480     U8 hibit = 0;
481     U8 *s = (U8*)SvPV_nomg(sv, len);
482 
483     PERL_ARGS_ASSERT_DO_TRANS_COMPLEX_UTF8;
484 
485     if (!SvUTF8(sv)) {
486 	const U8 *t = s;
487 	const U8 * const e = s + len;
488 	while (t < e) {
489 	    const U8 ch = *t++;
490 	    hibit = !NATIVE_IS_INVARIANT(ch);
491 	    if (hibit) {
492 		s = bytes_to_utf8(s, &len);
493 		break;
494 	    }
495 	}
496     }
497     send = s + len;
498     start = s;
499 
500     svp = hv_fetchs(hv, "FINAL", FALSE);
501     if (svp) {
502 	final = SvUV(*svp);
503 	havefinal = TRUE;
504     }
505 
506     if (grows) {
507 	/* d needs to be bigger than s, in case e.g. upgrading is required */
508 	Newx(d, len * 3 + UTF8_MAXBYTES, U8);
509 	dend = d + len * 3;
510 	dstart = d;
511     }
512     else {
513 	dstart = d = s;
514 	dend = d + len;
515     }
516 
517     if (squash) {
518 	UV puv = 0xfeedface;
519 	while (s < send) {
520 	    UV uv = swash_fetch(rv, s, TRUE);
521 
522 	    if (d > dend) {
523 		const STRLEN clen = d - dstart;
524 		const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
525 		if (!grows)
526 		    Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
527 		Renew(dstart, nlen + UTF8_MAXBYTES, U8);
528 		d = dstart + clen;
529 		dend = dstart + nlen;
530 	    }
531 	    if (uv < none) {
532 		matches++;
533 		s += UTF8SKIP(s);
534 		if (uv != puv) {
535 		    d = uvuni_to_utf8(d, uv);
536 		    puv = uv;
537 		}
538 		continue;
539 	    }
540 	    else if (uv == none) {	/* "none" is unmapped character */
541 		const int i = UTF8SKIP(s);
542 		Move(s, d, i, U8);
543 		d += i;
544 		s += i;
545 		puv = 0xfeedface;
546 		continue;
547 	    }
548 	    else if (uv == extra && !del) {
549 		matches++;
550 		if (havefinal) {
551 		    s += UTF8SKIP(s);
552 		    if (puv != final) {
553 			d = uvuni_to_utf8(d, final);
554 			puv = final;
555 		    }
556 		}
557 		else {
558 		    STRLEN len;
559 		    uv = utf8n_to_uvuni(s, send - s, &len, UTF8_ALLOW_DEFAULT);
560 		    if (uv != puv) {
561 			Move(s, d, len, U8);
562 			d += len;
563 			puv = uv;
564 		    }
565 		    s += len;
566 		}
567 		continue;
568 	    }
569 	    matches++;			/* "none+1" is delete character */
570 	    s += UTF8SKIP(s);
571 	}
572     }
573     else {
574 	while (s < send) {
575 	    const UV uv = swash_fetch(rv, s, TRUE);
576 	    if (d > dend) {
577 	        const STRLEN clen = d - dstart;
578 		const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
579 		if (!grows)
580 		    Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
581 		Renew(dstart, nlen + UTF8_MAXBYTES, U8);
582 		d = dstart + clen;
583 		dend = dstart + nlen;
584 	    }
585 	    if (uv < none) {
586 		matches++;
587 		s += UTF8SKIP(s);
588 		d = uvuni_to_utf8(d, uv);
589 		continue;
590 	    }
591 	    else if (uv == none) {	/* "none" is unmapped character */
592 		const int i = UTF8SKIP(s);
593 		Move(s, d, i, U8);
594 		d += i;
595 		s += i;
596 		continue;
597 	    }
598 	    else if (uv == extra && !del) {
599 		matches++;
600 		s += UTF8SKIP(s);
601 		d = uvuni_to_utf8(d, final);
602 		continue;
603 	    }
604 	    matches++;			/* "none+1" is delete character */
605 	    s += UTF8SKIP(s);
606 	}
607     }
608     if (grows || hibit) {
609 	sv_setpvn(sv, (char*)dstart, d - dstart);
610 	Safefree(dstart);
611 	if (grows && hibit)
612 	    Safefree(start);
613     }
614     else {
615 	*d = '\0';
616 	SvCUR_set(sv, d - dstart);
617     }
618     SvUTF8_on(sv);
619     SvSETMAGIC(sv);
620 
621     return matches;
622 }
623 
624 I32
625 Perl_do_trans(pTHX_ SV *sv)
626 {
627     dVAR;
628     STRLEN len;
629     const I32 hasutf = (PL_op->op_private &
630                     (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF));
631 
632     PERL_ARGS_ASSERT_DO_TRANS;
633 
634     if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL)) {
635         if (!SvIsCOW(sv))
636             Perl_croak_no_modify();
637     }
638     (void)SvPV_const(sv, len);
639     if (!len)
640 	return 0;
641     if (!(PL_op->op_private & OPpTRANS_IDENTICAL)) {
642 	if (!SvPOKp(sv) || SvTHINKFIRST(sv))
643 	    (void)SvPV_force_nomg(sv, len);
644 	(void)SvPOK_only_UTF8(sv);
645     }
646 
647     DEBUG_t( Perl_deb(aTHX_ "2.TBL\n"));
648 
649     switch (PL_op->op_private & ~hasutf & (
650 		OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL|
651 		OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) {
652     case 0:
653 	if (hasutf)
654 	    return do_trans_simple_utf8(sv);
655 	else
656 	    return do_trans_simple(sv);
657 
658     case OPpTRANS_IDENTICAL:
659     case OPpTRANS_IDENTICAL|OPpTRANS_COMPLEMENT:
660 	if (hasutf)
661 	    return do_trans_count_utf8(sv);
662 	else
663 	    return do_trans_count(sv);
664 
665     default:
666 	if (hasutf)
667 	    return do_trans_complex_utf8(sv);
668 	else
669 	    return do_trans_complex(sv);
670     }
671 }
672 
673 void
674 Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp)
675 {
676     dVAR;
677     SV ** const oldmark = mark;
678     I32 items = sp - mark;
679     STRLEN len;
680     STRLEN delimlen;
681 
682     PERL_ARGS_ASSERT_DO_JOIN;
683 
684     (void) SvPV_const(delim, delimlen); /* stringify and get the delimlen */
685     /* SvCUR assumes it's SvPOK() and woe betide you if it's not. */
686 
687     mark++;
688     len = (items > 0 ? (delimlen * (items - 1) ) : 0);
689     SvUPGRADE(sv, SVt_PV);
690     if (SvLEN(sv) < len + items) {	/* current length is way too short */
691 	while (items-- > 0) {
692 	    if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) {
693 		STRLEN tmplen;
694 		SvPV_const(*mark, tmplen);
695 		len += tmplen;
696 	    }
697 	    mark++;
698 	}
699 	SvGROW(sv, len + 1);		/* so try to pre-extend */
700 
701 	mark = oldmark;
702 	items = sp - mark;
703 	++mark;
704     }
705 
706     sv_setpvs(sv, "");
707     /* sv_setpv retains old UTF8ness [perl #24846] */
708     SvUTF8_off(sv);
709 
710     if (TAINTING_get && SvMAGICAL(sv))
711 	SvTAINTED_off(sv);
712 
713     if (items-- > 0) {
714 	if (*mark)
715 	    sv_catsv(sv, *mark);
716 	mark++;
717     }
718 
719     if (delimlen) {
720 	for (; items > 0; items--,mark++) {
721 	    sv_catsv_nomg(sv,delim);
722 	    sv_catsv(sv,*mark);
723 	}
724     }
725     else {
726 	for (; items > 0; items--,mark++)
727 	    sv_catsv(sv,*mark);
728     }
729     SvSETMAGIC(sv);
730 }
731 
732 void
733 Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg)
734 {
735     dVAR;
736     STRLEN patlen;
737     const char * const pat = SvPV_const(*sarg, patlen);
738     bool do_taint = FALSE;
739 
740     PERL_ARGS_ASSERT_DO_SPRINTF;
741 
742     if (SvTAINTED(*sarg))
743 	TAINT_PROPER(
744 		(PL_op && PL_op->op_type < OP_max)
745 		    ? (PL_op->op_type == OP_PRTF)
746 			? "printf"
747 			: PL_op_name[PL_op->op_type]
748 		    : "(unknown)"
749 	);
750     SvUTF8_off(sv);
751     if (DO_UTF8(*sarg))
752         SvUTF8_on(sv);
753     sv_vsetpvfn(sv, pat, patlen, NULL, sarg + 1, len - 1, &do_taint);
754     SvSETMAGIC(sv);
755     if (do_taint)
756 	SvTAINTED_on(sv);
757 }
758 
759 /* currently converts input to bytes if possible, but doesn't sweat failure */
760 UV
761 Perl_do_vecget(pTHX_ SV *sv, SSize_t offset, int size)
762 {
763     dVAR;
764     STRLEN srclen, len, uoffset, bitoffs = 0;
765     const unsigned char *s = (const unsigned char *) SvPV_flags_const(sv, srclen,
766                              SV_GMAGIC | ((PL_op->op_flags & OPf_MOD || LVRET)
767                                           ? SV_UNDEF_RETURNS_NULL : 0));
768     UV retnum = 0;
769 
770     if (!s) {
771       s = (const unsigned char *)"";
772     }
773 
774     PERL_ARGS_ASSERT_DO_VECGET;
775 
776     if (offset < 0)
777 	return 0;
778     if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
779 	Perl_croak(aTHX_ "Illegal number of bits in vec");
780 
781     if (SvUTF8(sv))
782 	(void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
783 
784     if (size < 8) {
785 	bitoffs = ((offset%8)*size)%8;
786 	uoffset = offset/(8/size);
787     }
788     else if (size > 8)
789 	uoffset = offset*(size/8);
790     else
791 	uoffset = offset;
792 
793     len = uoffset + (bitoffs + size + 7)/8;	/* required number of bytes */
794     if (len > srclen) {
795 	if (size <= 8)
796 	    retnum = 0;
797 	else {
798 	    if (size == 16) {
799 		if (uoffset >= srclen)
800 		    retnum = 0;
801 		else
802 		    retnum = (UV) s[uoffset] <<  8;
803 	    }
804 	    else if (size == 32) {
805 		if (uoffset >= srclen)
806 		    retnum = 0;
807 		else if (uoffset + 1 >= srclen)
808 		    retnum =
809 			((UV) s[uoffset    ] << 24);
810 		else if (uoffset + 2 >= srclen)
811 		    retnum =
812 			((UV) s[uoffset    ] << 24) +
813 			((UV) s[uoffset + 1] << 16);
814 		else
815 		    retnum =
816 			((UV) s[uoffset    ] << 24) +
817 			((UV) s[uoffset + 1] << 16) +
818 			(     s[uoffset + 2] <<  8);
819 	    }
820 #ifdef UV_IS_QUAD
821 	    else if (size == 64) {
822 		Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
823 			       "Bit vector size > 32 non-portable");
824 		if (uoffset >= srclen)
825 		    retnum = 0;
826 		else if (uoffset + 1 >= srclen)
827 		    retnum =
828 			(UV) s[uoffset     ] << 56;
829 		else if (uoffset + 2 >= srclen)
830 		    retnum =
831 			((UV) s[uoffset    ] << 56) +
832 			((UV) s[uoffset + 1] << 48);
833 		else if (uoffset + 3 >= srclen)
834 		    retnum =
835 			((UV) s[uoffset    ] << 56) +
836 			((UV) s[uoffset + 1] << 48) +
837 			((UV) s[uoffset + 2] << 40);
838 		else if (uoffset + 4 >= srclen)
839 		    retnum =
840 			((UV) s[uoffset    ] << 56) +
841 			((UV) s[uoffset + 1] << 48) +
842 			((UV) s[uoffset + 2] << 40) +
843 			((UV) s[uoffset + 3] << 32);
844 		else if (uoffset + 5 >= srclen)
845 		    retnum =
846 			((UV) s[uoffset    ] << 56) +
847 			((UV) s[uoffset + 1] << 48) +
848 			((UV) s[uoffset + 2] << 40) +
849 			((UV) s[uoffset + 3] << 32) +
850 			(     s[uoffset + 4] << 24);
851 		else if (uoffset + 6 >= srclen)
852 		    retnum =
853 			((UV) s[uoffset    ] << 56) +
854 			((UV) s[uoffset + 1] << 48) +
855 			((UV) s[uoffset + 2] << 40) +
856 			((UV) s[uoffset + 3] << 32) +
857 			((UV) s[uoffset + 4] << 24) +
858 			((UV) s[uoffset + 5] << 16);
859 		else
860 		    retnum =
861 			((UV) s[uoffset    ] << 56) +
862 			((UV) s[uoffset + 1] << 48) +
863 			((UV) s[uoffset + 2] << 40) +
864 			((UV) s[uoffset + 3] << 32) +
865 			((UV) s[uoffset + 4] << 24) +
866 			((UV) s[uoffset + 5] << 16) +
867 			(     s[uoffset + 6] <<  8);
868 	    }
869 #endif
870 	}
871     }
872     else if (size < 8)
873 	retnum = (s[uoffset] >> bitoffs) & ((1 << size) - 1);
874     else {
875 	if (size == 8)
876 	    retnum = s[uoffset];
877 	else if (size == 16)
878 	    retnum =
879 		((UV) s[uoffset] <<      8) +
880 		      s[uoffset + 1];
881 	else if (size == 32)
882 	    retnum =
883 		((UV) s[uoffset    ] << 24) +
884 		((UV) s[uoffset + 1] << 16) +
885 		(     s[uoffset + 2] <<  8) +
886 		      s[uoffset + 3];
887 #ifdef UV_IS_QUAD
888 	else if (size == 64) {
889 	    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
890 			   "Bit vector size > 32 non-portable");
891 	    retnum =
892 		((UV) s[uoffset    ] << 56) +
893 		((UV) s[uoffset + 1] << 48) +
894 		((UV) s[uoffset + 2] << 40) +
895 		((UV) s[uoffset + 3] << 32) +
896 		((UV) s[uoffset + 4] << 24) +
897 		((UV) s[uoffset + 5] << 16) +
898 		(     s[uoffset + 6] <<  8) +
899 		      s[uoffset + 7];
900 	}
901 #endif
902     }
903 
904     return retnum;
905 }
906 
907 /* currently converts input to bytes if possible but doesn't sweat failures,
908  * although it does ensure that the string it clobbers is not marked as
909  * utf8-valid any more
910  */
911 void
912 Perl_do_vecset(pTHX_ SV *sv)
913 {
914     dVAR;
915     SSize_t offset, bitoffs = 0;
916     int size;
917     unsigned char *s;
918     UV lval;
919     I32 mask;
920     STRLEN targlen;
921     STRLEN len;
922     SV * const targ = LvTARG(sv);
923 
924     PERL_ARGS_ASSERT_DO_VECSET;
925 
926     if (!targ)
927 	return;
928     s = (unsigned char*)SvPV_force_flags(targ, targlen,
929                                          SV_GMAGIC | SV_UNDEF_RETURNS_NULL);
930     if (SvUTF8(targ)) {
931 	/* This is handled by the SvPOK_only below...
932 	if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE))
933 	    SvUTF8_off(targ);
934 	 */
935 	(void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE);
936     }
937 
938     (void)SvPOK_only(targ);
939     lval = SvUV(sv);
940     offset = LvTARGOFF(sv);
941     if (offset < 0)
942 	Perl_croak(aTHX_ "Negative offset to vec in lvalue context");
943     size = LvTARGLEN(sv);
944     if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
945 	Perl_croak(aTHX_ "Illegal number of bits in vec");
946 
947     if (size < 8) {
948 	bitoffs = ((offset%8)*size)%8;
949 	offset /= 8/size;
950     }
951     else if (size > 8)
952 	offset *= size/8;
953 
954     len = offset + (bitoffs + size + 7)/8;	/* required number of bytes */
955     if (len > targlen) {
956 	s = (unsigned char*)SvGROW(targ, len + 1);
957 	(void)memzero((char *)(s + targlen), len - targlen + 1);
958 	SvCUR_set(targ, len);
959     }
960 
961     if (size < 8) {
962 	mask = (1 << size) - 1;
963 	lval &= mask;
964 	s[offset] &= ~(mask << bitoffs);
965 	s[offset] |= lval << bitoffs;
966     }
967     else {
968 	if (size == 8)
969 	    s[offset  ] = (U8)( lval        & 0xff);
970 	else if (size == 16) {
971 	    s[offset  ] = (U8)((lval >>  8) & 0xff);
972 	    s[offset+1] = (U8)( lval        & 0xff);
973 	}
974 	else if (size == 32) {
975 	    s[offset  ] = (U8)((lval >> 24) & 0xff);
976 	    s[offset+1] = (U8)((lval >> 16) & 0xff);
977 	    s[offset+2] = (U8)((lval >>  8) & 0xff);
978 	    s[offset+3] = (U8)( lval        & 0xff);
979 	}
980 #ifdef UV_IS_QUAD
981 	else if (size == 64) {
982 	    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
983 			   "Bit vector size > 32 non-portable");
984 	    s[offset  ] = (U8)((lval >> 56) & 0xff);
985 	    s[offset+1] = (U8)((lval >> 48) & 0xff);
986 	    s[offset+2] = (U8)((lval >> 40) & 0xff);
987 	    s[offset+3] = (U8)((lval >> 32) & 0xff);
988 	    s[offset+4] = (U8)((lval >> 24) & 0xff);
989 	    s[offset+5] = (U8)((lval >> 16) & 0xff);
990 	    s[offset+6] = (U8)((lval >>  8) & 0xff);
991 	    s[offset+7] = (U8)( lval        & 0xff);
992 	}
993 #endif
994     }
995     SvSETMAGIC(targ);
996 }
997 
998 void
999 Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
1000 {
1001     dVAR;
1002 #ifdef LIBERAL
1003     long *dl;
1004     long *ll;
1005     long *rl;
1006 #endif
1007     char *dc;
1008     STRLEN leftlen;
1009     STRLEN rightlen;
1010     const char *lc;
1011     const char *rc;
1012     STRLEN len;
1013     STRLEN lensave;
1014     const char *lsave;
1015     const char *rsave;
1016     bool left_utf;
1017     bool right_utf;
1018     STRLEN needlen = 0;
1019 
1020     PERL_ARGS_ASSERT_DO_VOP;
1021 
1022     if (sv != left || (optype != OP_BIT_AND && !SvOK(sv)))
1023 	sv_setpvs(sv, "");	/* avoid undef warning on |= and ^= */
1024     if (sv == left) {
1025 	lsave = lc = SvPV_force_nomg(left, leftlen);
1026     }
1027     else {
1028 	lsave = lc = SvPV_nomg_const(left, leftlen);
1029 	SvPV_force_nomg_nolen(sv);
1030     }
1031     rsave = rc = SvPV_nomg_const(right, rightlen);
1032 
1033     /* This need to come after SvPV to ensure that string overloading has
1034        fired off.  */
1035 
1036     left_utf = DO_UTF8(left);
1037     right_utf = DO_UTF8(right);
1038 
1039     if (left_utf && !right_utf) {
1040 	/* Avoid triggering overloading again by using temporaries.
1041 	   Maybe there should be a variant of sv_utf8_upgrade that takes pvn
1042 	*/
1043 	right = newSVpvn_flags(rsave, rightlen, SVs_TEMP);
1044 	sv_utf8_upgrade(right);
1045 	rsave = rc = SvPV_nomg_const(right, rightlen);
1046 	right_utf = TRUE;
1047     }
1048     else if (!left_utf && right_utf) {
1049 	left = newSVpvn_flags(lsave, leftlen, SVs_TEMP);
1050 	sv_utf8_upgrade(left);
1051 	lsave = lc = SvPV_nomg_const(left, leftlen);
1052 	left_utf = TRUE;
1053     }
1054 
1055     len = leftlen < rightlen ? leftlen : rightlen;
1056     lensave = len;
1057     SvCUR_set(sv, len);
1058     (void)SvPOK_only(sv);
1059     if ((left_utf || right_utf) && (sv == left || sv == right)) {
1060 	needlen = optype == OP_BIT_AND ? len : leftlen + rightlen;
1061 	Newxz(dc, needlen + 1, char);
1062     }
1063     else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
1064 	dc = SvPV_force_nomg_nolen(sv);
1065 	if (SvLEN(sv) < len + 1) {
1066 	    dc = SvGROW(sv, len + 1);
1067 	    (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1);
1068 	}
1069 	if (optype != OP_BIT_AND && (left_utf || right_utf))
1070 	    dc = SvGROW(sv, leftlen + rightlen + 1);
1071     }
1072     else {
1073 	needlen = optype == OP_BIT_AND
1074 		    ? len : (leftlen > rightlen ? leftlen : rightlen);
1075 	Newxz(dc, needlen + 1, char);
1076 	sv_usepvn_flags(sv, dc, needlen, SV_HAS_TRAILING_NUL);
1077 	dc = SvPVX(sv);		/* sv_usepvn() calls Renew() */
1078     }
1079     if (left_utf || right_utf) {
1080 	UV duc, luc, ruc;
1081 	char *dcorig = dc;
1082 	char *dcsave = NULL;
1083 	STRLEN lulen = leftlen;
1084 	STRLEN rulen = rightlen;
1085 	STRLEN ulen;
1086 
1087 	switch (optype) {
1088 	case OP_BIT_AND:
1089 	    while (lulen && rulen) {
1090 		luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1091 		lc += ulen;
1092 		lulen -= ulen;
1093 		ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1094 		rc += ulen;
1095 		rulen -= ulen;
1096 		duc = luc & ruc;
1097 		dc = (char*)uvchr_to_utf8((U8*)dc, duc);
1098 	    }
1099 	    if (sv == left || sv == right)
1100 		(void)sv_usepvn(sv, dcorig, needlen);
1101 	    SvCUR_set(sv, dc - dcorig);
1102 	    break;
1103 	case OP_BIT_XOR:
1104 	    while (lulen && rulen) {
1105 		luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1106 		lc += ulen;
1107 		lulen -= ulen;
1108 		ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1109 		rc += ulen;
1110 		rulen -= ulen;
1111 		duc = luc ^ ruc;
1112 		dc = (char*)uvchr_to_utf8((U8*)dc, duc);
1113 	    }
1114 	    goto mop_up_utf;
1115 	case OP_BIT_OR:
1116 	    while (lulen && rulen) {
1117 		luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1118 		lc += ulen;
1119 		lulen -= ulen;
1120 		ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1121 		rc += ulen;
1122 		rulen -= ulen;
1123 		duc = luc | ruc;
1124 		dc = (char*)uvchr_to_utf8((U8*)dc, duc);
1125 	    }
1126 	  mop_up_utf:
1127 	    if (rulen)
1128 		dcsave = savepvn(rc, rulen);
1129 	    else if (lulen)
1130 		dcsave = savepvn(lc, lulen);
1131 	    if (sv == left || sv == right)
1132 		(void)sv_usepvn(sv, dcorig, needlen); /* uses Renew(); defaults to nomg */
1133 	    SvCUR_set(sv, dc - dcorig);
1134 	    if (rulen)
1135 		sv_catpvn_nomg(sv, dcsave, rulen);
1136 	    else if (lulen)
1137 		sv_catpvn_nomg(sv, dcsave, lulen);
1138 	    else
1139 		*SvEND(sv) = '\0';
1140 	    Safefree(dcsave);
1141 	    break;
1142 	default:
1143 	    if (sv == left || sv == right)
1144 		Safefree(dcorig);
1145 	    Perl_croak(aTHX_ "panic: do_vop called for op %u (%s)",
1146 			(unsigned)optype, PL_op_name[optype]);
1147 	}
1148 	SvUTF8_on(sv);
1149 	goto finish;
1150     }
1151     else
1152 #ifdef LIBERAL
1153     if (len >= sizeof(long)*4 &&
1154 	!((unsigned long)dc % sizeof(long)) &&
1155 	!((unsigned long)lc % sizeof(long)) &&
1156 	!((unsigned long)rc % sizeof(long)))	/* It's almost always aligned... */
1157     {
1158 	const STRLEN remainder = len % (sizeof(long)*4);
1159 	len /= (sizeof(long)*4);
1160 
1161 	dl = (long*)dc;
1162 	ll = (long*)lc;
1163 	rl = (long*)rc;
1164 
1165 	switch (optype) {
1166 	case OP_BIT_AND:
1167 	    while (len--) {
1168 		*dl++ = *ll++ & *rl++;
1169 		*dl++ = *ll++ & *rl++;
1170 		*dl++ = *ll++ & *rl++;
1171 		*dl++ = *ll++ & *rl++;
1172 	    }
1173 	    break;
1174 	case OP_BIT_XOR:
1175 	    while (len--) {
1176 		*dl++ = *ll++ ^ *rl++;
1177 		*dl++ = *ll++ ^ *rl++;
1178 		*dl++ = *ll++ ^ *rl++;
1179 		*dl++ = *ll++ ^ *rl++;
1180 	    }
1181 	    break;
1182 	case OP_BIT_OR:
1183 	    while (len--) {
1184 		*dl++ = *ll++ | *rl++;
1185 		*dl++ = *ll++ | *rl++;
1186 		*dl++ = *ll++ | *rl++;
1187 		*dl++ = *ll++ | *rl++;
1188 	    }
1189 	}
1190 
1191 	dc = (char*)dl;
1192 	lc = (char*)ll;
1193 	rc = (char*)rl;
1194 
1195 	len = remainder;
1196     }
1197 #endif
1198     {
1199 	switch (optype) {
1200 	case OP_BIT_AND:
1201 	    while (len--)
1202 		*dc++ = *lc++ & *rc++;
1203 	    *dc = '\0';
1204 	    break;
1205 	case OP_BIT_XOR:
1206 	    while (len--)
1207 		*dc++ = *lc++ ^ *rc++;
1208 	    goto mop_up;
1209 	case OP_BIT_OR:
1210 	    while (len--)
1211 		*dc++ = *lc++ | *rc++;
1212 	  mop_up:
1213 	    len = lensave;
1214 	    if (rightlen > len)
1215 		sv_catpvn_nomg(sv, rsave + len, rightlen - len);
1216 	    else if (leftlen > (STRLEN)len)
1217 		sv_catpvn_nomg(sv, lsave + len, leftlen - len);
1218 	    else
1219 		*SvEND(sv) = '\0';
1220 	    break;
1221 	}
1222     }
1223 finish:
1224     SvTAINT(sv);
1225 }
1226 
1227 OP *
1228 Perl_do_kv(pTHX)
1229 {
1230     dVAR;
1231     dSP;
1232     HV * const keys = MUTABLE_HV(POPs);
1233     HE *entry;
1234     const I32 gimme = GIMME_V;
1235     const I32 dokv =     (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV);
1236     /* op_type is OP_RKEYS/OP_RVALUES if pp_rkeys delegated to here */
1237     const I32 dokeys =   dokv || (PL_op->op_type == OP_KEYS || PL_op->op_type == OP_RKEYS);
1238     const I32 dovalues = dokv || (PL_op->op_type == OP_VALUES || PL_op->op_type == OP_RVALUES);
1239 
1240     (void)hv_iterinit(keys);	/* always reset iterator regardless */
1241 
1242     if (gimme == G_VOID)
1243 	RETURN;
1244 
1245     if (gimme == G_SCALAR) {
1246 	if (PL_op->op_flags & OPf_MOD || LVRET) {	/* lvalue */
1247 	    SV * const ret = sv_2mortal(newSV_type(SVt_PVLV));  /* Not TARG RT#67838 */
1248 	    sv_magic(ret, NULL, PERL_MAGIC_nkeys, NULL, 0);
1249 	    LvTYPE(ret) = 'k';
1250 	    LvTARG(ret) = SvREFCNT_inc_simple(keys);
1251 	    PUSHs(ret);
1252 	}
1253 	else {
1254 	    IV i;
1255 	    dTARGET;
1256 
1257 	    if (! SvTIED_mg((const SV *)keys, PERL_MAGIC_tied) ) {
1258 		i = HvUSEDKEYS(keys);
1259 	    }
1260 	    else {
1261 		i = 0;
1262 		while (hv_iternext(keys)) i++;
1263 	    }
1264 	    PUSHi( i );
1265 	}
1266 	RETURN;
1267     }
1268 
1269     EXTEND(SP, HvUSEDKEYS(keys) * (dokeys + dovalues));
1270 
1271     PUTBACK;	/* hv_iternext and hv_iterval might clobber stack_sp */
1272     while ((entry = hv_iternext(keys))) {
1273 	SPAGAIN;
1274 	if (dokeys) {
1275 	    SV* const sv = hv_iterkeysv(entry);
1276 	    XPUSHs(sv);	/* won't clobber stack_sp */
1277 	}
1278 	if (dovalues) {
1279 	    SV *tmpstr;
1280 	    PUTBACK;
1281 	    tmpstr = hv_iterval(keys,entry);
1282 	    DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu",
1283 			    (unsigned long)HeHASH(entry),
1284 			    (int)HvMAX(keys)+1,
1285 			    (unsigned long)(HeHASH(entry) & HvMAX(keys))));
1286 	    SPAGAIN;
1287 	    XPUSHs(tmpstr);
1288 	}
1289 	PUTBACK;
1290     }
1291     return NORMAL;
1292 }
1293 
1294 /*
1295  * Local variables:
1296  * c-indentation-style: bsd
1297  * c-basic-offset: 4
1298  * indent-tabs-mode: nil
1299  * End:
1300  *
1301  * ex: set ts=8 sts=4 sw=4 et:
1302  */
1303