xref: /openbsd-src/gnu/usr.bin/perl/doop.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*    doop.c
2  *
3  *    Copyright (c) 1991-2001, Larry Wall
4  *
5  *    You may distribute under the terms of either the GNU General Public
6  *    License or the Artistic License, as specified in the README file.
7  *
8  */
9 
10 /*
11  * "'So that was the job I felt I had to do when I started,' thought Sam."
12  */
13 
14 #include "EXTERN.h"
15 #define PERL_IN_DOOP_C
16 #include "perl.h"
17 
18 #if !defined(NSIG) || defined(M_UNIX) || defined(M_XENIX)
19 #include <signal.h>
20 #endif
21 
22 STATIC I32
23 S_do_trans_simple(pTHX_ SV *sv)
24 {
25     U8 *s;
26     U8 *d;
27     U8 *send;
28     U8 *dstart;
29     I32 matches = 0;
30     I32 grows = PL_op->op_private & OPpTRANS_GROWS;
31     STRLEN len;
32     short *tbl;
33     I32 ch;
34 
35     tbl = (short*)cPVOP->op_pv;
36     if (!tbl)
37 	Perl_croak(aTHX_ "panic: do_trans_simple");
38 
39     s = (U8*)SvPV(sv, len);
40     send = s + len;
41 
42     /* First, take care of non-UTF8 input strings, because they're easy */
43     if (!SvUTF8(sv)) {
44 	while (s < send) {
45 	    if ((ch = tbl[*s]) >= 0) {
46 		matches++;
47 		*s++ = ch;
48 	    }
49 	    else
50 		s++;
51 	}
52 	SvSETMAGIC(sv);
53         return matches;
54     }
55 
56     /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */
57     if (grows)
58 	New(0, d, len*2+1, U8);
59     else
60 	d = s;
61     dstart = d;
62     while (s < send) {
63         STRLEN ulen;
64         UV c;
65 
66         /* Need to check this, otherwise 128..255 won't match */
67 	c = utf8_to_uv(s, send - s, &ulen, 0);
68         if (c < 0x100 && (ch = tbl[c]) >= 0) {
69             matches++;
70             if (UTF8_IS_ASCII(ch))
71                 *d++ = ch;
72             else
73                 d = uv_to_utf8(d,ch);
74             s += ulen;
75         }
76 	else { /* No match -> copy */
77 	    Copy(s, d, ulen, U8);
78 	    d += ulen;
79 	    s += ulen;
80         }
81     }
82     if (grows) {
83 	sv_setpvn(sv, (char*)dstart, d - dstart);
84 	Safefree(dstart);
85     }
86     else {
87 	*d = '\0';
88 	SvCUR_set(sv, d - dstart);
89     }
90     SvUTF8_on(sv);
91     SvSETMAGIC(sv);
92     return matches;
93 }
94 
95 STATIC I32
96 S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
97 {
98     U8 *s;
99     U8 *send;
100     I32 matches = 0;
101     STRLEN len;
102     short *tbl;
103 
104     tbl = (short*)cPVOP->op_pv;
105     if (!tbl)
106 	Perl_croak(aTHX_ "panic: do_trans_count");
107 
108     s = (U8*)SvPV(sv, len);
109     send = s + len;
110 
111     if (!SvUTF8(sv))
112 	while (s < send) {
113             if (tbl[*s++] >= 0)
114                 matches++;
115 	}
116     else
117 	while (s < send) {
118 	    UV c;
119 	    STRLEN ulen;
120 	    c = utf8_to_uv(s, send - s, &ulen, 0);
121 	    if (c < 0x100 && tbl[c] >= 0)
122 		matches++;
123 	    s += ulen;
124 	}
125 
126     return matches;
127 }
128 
129 STATIC I32
130 S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
131 {
132     U8 *s;
133     U8 *send;
134     U8 *d;
135     U8 *dstart;
136     I32 isutf8;
137     I32 matches = 0;
138     I32 grows = PL_op->op_private & OPpTRANS_GROWS;
139     STRLEN len;
140     short *tbl;
141     I32 ch;
142 
143     tbl = (short*)cPVOP->op_pv;
144     if (!tbl)
145 	Perl_croak(aTHX_ "panic: do_trans_complex");
146 
147     s = (U8*)SvPV(sv, len);
148     isutf8 = SvUTF8(sv);
149     send = s + len;
150 
151     if (!isutf8) {
152 	dstart = d = s;
153 	if (PL_op->op_private & OPpTRANS_SQUASH) {
154 	    U8* p = send;
155 	    while (s < send) {
156 		if ((ch = tbl[*s]) >= 0) {
157 		    *d = ch;
158 		    matches++;
159 		    if (p != d - 1 || *p != *d)
160 			p = d++;
161 		}
162 		else if (ch == -1)	/* -1 is unmapped character */
163 		    *d++ = *s;
164 		else if (ch == -2)	/* -2 is delete character */
165 		    matches++;
166 		s++;
167 	    }
168 	}
169 	else {
170 	    while (s < send) {
171 	        if ((ch = tbl[*s]) >= 0) {
172 		    matches++;
173 		    *d++ = ch;
174 		}
175 		else if (ch == -1)	/* -1 is unmapped character */
176 		    *d++ = *s;
177 		else if (ch == -2)      /* -2 is delete character */
178 		    matches++;
179 		s++;
180 	    }
181 	}
182 	*d = '\0';
183 	SvCUR_set(sv, d - dstart);
184     }
185     else { /* isutf8 */
186 	if (grows)
187 	    New(0, d, len*2+1, U8);
188 	else
189 	    d = s;
190 	dstart = d;
191 
192 #ifdef MACOS_TRADITIONAL
193 #define comp CoMP   /* "comp" is a keyword in some compilers ... */
194 #endif
195 
196 	if (PL_op->op_private & OPpTRANS_SQUASH) {
197 	    U8* p = send;
198 	    UV pch = 0xfeedface;
199 	    while (s < send) {
200 		STRLEN len;
201 	        UV comp = utf8_to_uv_simple(s, &len);
202 
203 		if (comp > 0xff) {	/* always unmapped */
204 		    Copy(s, d, len, U8);
205 		    d += len;
206 		}
207 		else if ((ch = tbl[comp]) >= 0) {
208 		    matches++;
209 		    if (ch != pch) {
210 		        d = uv_to_utf8(d, ch);
211 		        pch = ch;
212 		    }
213 		    s += len;
214 		    continue;
215 		}
216 		else if (ch == -1) {	/* -1 is unmapped character */
217 		    Copy(s, d, len, U8);
218 		    d += len;
219 		}
220 		else if (ch == -2)      /* -2 is delete character */
221 		    matches++;
222 		s += len;
223 		pch = 0xfeedface;
224 	    }
225 	}
226 	else {
227 	    while (s < send) {
228 		STRLEN len;
229 	        UV comp = utf8_to_uv_simple(s, &len);
230 		if (comp > 0xff) {	/* always unmapped */
231 		    Copy(s, d, len, U8);
232 		    d += len;
233 		}
234 		else if ((ch = tbl[comp]) >= 0) {
235 		    d = uv_to_utf8(d, ch);
236 		    matches++;
237 		}
238 		else if (ch == -1) {	/* -1 is unmapped character */
239 		    Copy(s, d, len, U8);
240 		    d += len;
241 		}
242 		else if (ch == -2)      /* -2 is delete character */
243 		    matches++;
244 		s += len;
245 	    }
246 	}
247 	if (grows) {
248 	    sv_setpvn(sv, (char*)dstart, d - dstart);
249 	    Safefree(dstart);
250 	}
251 	else {
252 	    *d = '\0';
253 	    SvCUR_set(sv, d - dstart);
254 	}
255 	SvUTF8_on(sv);
256     }
257     SvSETMAGIC(sv);
258     return matches;
259 }
260 
261 STATIC I32
262 S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */
263 {
264     U8 *s;
265     U8 *send;
266     U8 *d;
267     U8 *start;
268     U8 *dstart, *dend;
269     I32 matches = 0;
270     I32 grows = PL_op->op_private & OPpTRANS_GROWS;
271     STRLEN len;
272 
273     SV* rv = (SV*)cSVOP->op_sv;
274     HV* hv = (HV*)SvRV(rv);
275     SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
276     UV none = svp ? SvUV(*svp) : 0x7fffffff;
277     UV extra = none + 1;
278     UV final;
279     UV uv;
280     I32 isutf8;
281     U8 hibit = 0;
282 
283     s = (U8*)SvPV(sv, len);
284     isutf8 = SvUTF8(sv);
285     if (!isutf8) {
286 	U8 *t = s, *e = s + len;
287 	while (t < e)
288 	    if ((hibit = UTF8_IS_CONTINUED(*t++)))
289 		break;
290 	if (hibit)
291 	    s = bytes_to_utf8(s, &len);
292     }
293     send = s + len;
294     start = s;
295 
296     svp = hv_fetch(hv, "FINAL", 5, FALSE);
297     if (svp)
298 	final = SvUV(*svp);
299 
300     if (grows) {
301 	/* d needs to be bigger than s, in case e.g. upgrading is required */
302 	New(0, d, len*3+UTF8_MAXLEN, U8);
303 	dend = d + len * 3;
304 	dstart = d;
305     }
306     else {
307 	dstart = d = s;
308 	dend = d + len;
309     }
310 
311     while (s < send) {
312 	if ((uv = swash_fetch(rv, s)) < none) {
313 	    s += UTF8SKIP(s);
314 	    matches++;
315 	    d = uv_to_utf8(d, uv);
316 	}
317 	else if (uv == none) {
318 	    int i = UTF8SKIP(s);
319 	    Copy(s, d, i, U8);
320 	    d += i;
321 	    s += i;
322 	}
323 	else if (uv == extra) {
324 	    int i = UTF8SKIP(s);
325 	    s += i;
326 	    matches++;
327 	    d = uv_to_utf8(d, final);
328 	}
329 	else
330 	    s += UTF8SKIP(s);
331 
332 	if (d > dend) {
333 	    STRLEN clen = d - dstart;
334 	    STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
335 	    if (!grows)
336 		Perl_croak(aTHX_ "panic: do_trans_complex_utf8");
337 	    Renew(dstart, nlen+UTF8_MAXLEN, U8);
338 	    d = dstart + clen;
339 	    dend = dstart + nlen;
340 	}
341     }
342     if (grows || hibit) {
343 	sv_setpvn(sv, (char*)dstart, d - dstart);
344 	Safefree(dstart);
345 	if (grows && hibit)
346 	    Safefree(start);
347     }
348     else {
349 	*d = '\0';
350 	SvCUR_set(sv, d - dstart);
351     }
352     SvSETMAGIC(sv);
353     SvUTF8_on(sv);
354     if (!isutf8 && !(PL_hints & HINT_UTF8))
355 	sv_utf8_downgrade(sv, TRUE);
356 
357     return matches;
358 }
359 
360 STATIC I32
361 S_do_trans_count_utf8(pTHX_ SV *sv)/* SPC - OK */
362 {
363     U8 *s;
364     U8 *start, *send;
365     I32 matches = 0;
366     STRLEN len;
367 
368     SV* rv = (SV*)cSVOP->op_sv;
369     HV* hv = (HV*)SvRV(rv);
370     SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
371     UV none = svp ? SvUV(*svp) : 0x7fffffff;
372     UV uv;
373     U8 hibit = 0;
374 
375     s = (U8*)SvPV(sv, len);
376     if (!SvUTF8(sv)) {
377 	U8 *t = s, *e = s + len;
378 	while (t < e)
379 	    if ((hibit = !UTF8_IS_ASCII(*t++)))
380 		break;
381 	if (hibit)
382 	    start = s = bytes_to_utf8(s, &len);
383     }
384     send = s + len;
385 
386     while (s < send) {
387 	if ((uv = swash_fetch(rv, s)) < none)
388 	    matches++;
389 	s += UTF8SKIP(s);
390     }
391     if (hibit)
392         Safefree(start);
393 
394     return matches;
395 }
396 
397 STATIC I32
398 S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
399 {
400     U8 *s;
401     U8 *start, *send;
402     U8 *d;
403     I32 matches = 0;
404     I32 squash   = PL_op->op_private & OPpTRANS_SQUASH;
405     I32 del      = PL_op->op_private & OPpTRANS_DELETE;
406     I32 grows    = PL_op->op_private & OPpTRANS_GROWS;
407     SV* rv = (SV*)cSVOP->op_sv;
408     HV* hv = (HV*)SvRV(rv);
409     SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
410     UV none = svp ? SvUV(*svp) : 0x7fffffff;
411     UV extra = none + 1;
412     UV final;
413     UV uv;
414     STRLEN len;
415     U8 *dstart, *dend;
416     I32 isutf8;
417     U8 hibit = 0;
418 
419     s = (U8*)SvPV(sv, len);
420     isutf8 = SvUTF8(sv);
421     if (!isutf8) {
422 	U8 *t = s, *e = s + len;
423 	while (t < e)
424 	    if ((hibit = !UTF8_IS_ASCII(*t++)))
425 		break;
426 	if (hibit)
427 	    s = bytes_to_utf8(s, &len);
428     }
429     send = s + len;
430     start = s;
431 
432     svp = hv_fetch(hv, "FINAL", 5, FALSE);
433     if (svp)
434 	final = SvUV(*svp);
435 
436     if (grows) {
437 	/* d needs to be bigger than s, in case e.g. upgrading is required */
438 	New(0, d, len*3+UTF8_MAXLEN, U8);
439 	dend = d + len * 3;
440 	dstart = d;
441     }
442     else {
443 	dstart = d = s;
444 	dend = d + len;
445     }
446 
447     if (squash) {
448 	UV puv = 0xfeedface;
449 	while (s < send) {
450 	    uv = swash_fetch(rv, s);
451 
452 	    if (d > dend) {
453 	        STRLEN clen = d - dstart;
454 		STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
455 		if (!grows)
456 		    Perl_croak(aTHX_ "panic: do_trans_complex_utf8");
457 		Renew(dstart, nlen+UTF8_MAXLEN, U8);
458 		d = dstart + clen;
459 		dend = dstart + nlen;
460 	    }
461 	    if (uv < none) {
462 		matches++;
463 		if (uv != puv) {
464 		    d = uv_to_utf8(d, uv);
465 		    puv = uv;
466 		}
467 		s += UTF8SKIP(s);
468 		continue;
469 	    }
470 	    else if (uv == none) {	/* "none" is unmapped character */
471 		int i = UTF8SKIP(s);
472 		Copy(s, d, i, U8);
473 		d += i;
474 		s += i;
475 		puv = 0xfeedface;
476 		continue;
477 	    }
478 	    else if (uv == extra && !del) {
479 		matches++;
480 		if (uv != puv) {
481 		    d = uv_to_utf8(d, final);
482 		    puv = final;
483 		}
484 		s += UTF8SKIP(s);
485 		continue;
486 	    }
487 	    matches++;			/* "none+1" is delete character */
488 	    s += UTF8SKIP(s);
489 	}
490     }
491     else {
492 	while (s < send) {
493 	    uv = swash_fetch(rv, s);
494 	    if (d > dend) {
495 	        STRLEN clen = d - dstart;
496 		STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
497 		if (!grows)
498 		    Perl_croak(aTHX_ "panic: do_trans_complex_utf8");
499 		Renew(dstart, nlen+UTF8_MAXLEN, U8);
500 		d = dstart + clen;
501 		dend = dstart + nlen;
502 	    }
503 	    if (uv < none) {
504 		matches++;
505 		d = uv_to_utf8(d, uv);
506 		s += UTF8SKIP(s);
507 		continue;
508 	    }
509 	    else if (uv == none) {	/* "none" is unmapped character */
510 		int i = UTF8SKIP(s);
511 		Copy(s, d, i, U8);
512 		d += i;
513 		s += i;
514 		continue;
515 	    }
516 	    else if (uv == extra && !del) {
517 		matches++;
518 		d = uv_to_utf8(d, final);
519 		s += UTF8SKIP(s);
520 		continue;
521 	    }
522 	    matches++;			/* "none+1" is delete character */
523 	    s += UTF8SKIP(s);
524 	}
525     }
526     if (grows || hibit) {
527 	sv_setpvn(sv, (char*)dstart, d - dstart);
528 	Safefree(dstart);
529 	if (grows && hibit)
530 	    Safefree(start);
531     }
532     else {
533 	*d = '\0';
534 	SvCUR_set(sv, d - dstart);
535     }
536     SvUTF8_on(sv);
537     if (!isutf8 && !(PL_hints & HINT_UTF8))
538 	sv_utf8_downgrade(sv, TRUE);
539     SvSETMAGIC(sv);
540 
541     return matches;
542 }
543 
544 I32
545 Perl_do_trans(pTHX_ SV *sv)
546 {
547     STRLEN len;
548     I32 hasutf = (PL_op->op_private &
549                     (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF));
550 
551     if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL))
552 	Perl_croak(aTHX_ PL_no_modify);
553 
554     (void)SvPV(sv, len);
555     if (!len)
556 	return 0;
557     if (!SvPOKp(sv))
558 	(void)SvPV_force(sv, len);
559     if (!(PL_op->op_private & OPpTRANS_IDENTICAL))
560 	(void)SvPOK_only_UTF8(sv);
561 
562     DEBUG_t( Perl_deb(aTHX_ "2.TBL\n"));
563 
564     switch (PL_op->op_private & ~hasutf & 63) {
565     case 0:
566 	if (hasutf)
567 	    return do_trans_simple_utf8(sv);
568 	else
569 	    return do_trans_simple(sv);
570 
571     case OPpTRANS_IDENTICAL:
572 	if (hasutf)
573 	    return do_trans_count_utf8(sv);
574 	else
575 	    return do_trans_count(sv);
576 
577     default:
578 	if (hasutf)
579 	    return do_trans_complex_utf8(sv);
580 	else
581 	    return do_trans_complex(sv);
582     }
583 }
584 
585 void
586 Perl_do_join(pTHX_ register SV *sv, SV *del, register SV **mark, register SV **sp)
587 {
588     SV **oldmark = mark;
589     register I32 items = sp - mark;
590     register STRLEN len;
591     STRLEN delimlen;
592     register char *delim = SvPV(del, delimlen);
593     STRLEN tmplen;
594 
595     mark++;
596     len = (items > 0 ? (delimlen * (items - 1) ) : 0);
597     (void)SvUPGRADE(sv, SVt_PV);
598     if (SvLEN(sv) < len + items) {	/* current length is way too short */
599 	while (items-- > 0) {
600 	    if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) {
601 		SvPV(*mark, tmplen);
602 		len += tmplen;
603 	    }
604 	    mark++;
605 	}
606 	SvGROW(sv, len + 1);		/* so try to pre-extend */
607 
608 	mark = oldmark;
609 	items = sp - mark;
610 	++mark;
611     }
612 
613     if (items-- > 0) {
614 	sv_setpv(sv, "");
615 	if (*mark)
616 	    sv_catsv(sv, *mark);
617 	mark++;
618     }
619     else
620 	sv_setpv(sv,"");
621     if (delimlen) {
622 	for (; items > 0; items--,mark++) {
623 	    sv_catsv(sv,del);
624 	    sv_catsv(sv,*mark);
625 	}
626     }
627     else {
628 	for (; items > 0; items--,mark++)
629 	    sv_catsv(sv,*mark);
630     }
631     SvSETMAGIC(sv);
632 }
633 
634 void
635 Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg)
636 {
637     STRLEN patlen;
638     char *pat = SvPV(*sarg, patlen);
639     bool do_taint = FALSE;
640 
641     sv_vsetpvfn(sv, pat, patlen, Null(va_list*), sarg + 1, len - 1, &do_taint);
642     SvSETMAGIC(sv);
643     if (do_taint)
644 	SvTAINTED_on(sv);
645 }
646 
647 /* currently converts input to bytes if possible, but doesn't sweat failure */
648 UV
649 Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
650 {
651     STRLEN srclen, len;
652     unsigned char *s = (unsigned char *) SvPV(sv, srclen);
653     UV retnum = 0;
654 
655     if (offset < 0)
656 	return retnum;
657     if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
658 	Perl_croak(aTHX_ "Illegal number of bits in vec");
659 
660     if (SvUTF8(sv))
661 	(void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
662 
663     offset *= size;	/* turn into bit offset */
664     len = (offset + size + 7) / 8;	/* required number of bytes */
665     if (len > srclen) {
666 	if (size <= 8)
667 	    retnum = 0;
668 	else {
669 	    offset >>= 3;	/* turn into byte offset */
670 	    if (size == 16) {
671 		if (offset >= srclen)
672 		    retnum = 0;
673 		else
674 		    retnum = (UV) s[offset] <<  8;
675 	    }
676 	    else if (size == 32) {
677 		if (offset >= srclen)
678 		    retnum = 0;
679 		else if (offset + 1 >= srclen)
680 		    retnum =
681 			((UV) s[offset    ] << 24);
682 		else if (offset + 2 >= srclen)
683 		    retnum =
684 			((UV) s[offset    ] << 24) +
685 			((UV) s[offset + 1] << 16);
686 		else
687 		    retnum =
688 			((UV) s[offset    ] << 24) +
689 			((UV) s[offset + 1] << 16) +
690 			(     s[offset + 2] <<  8);
691 	    }
692 #ifdef UV_IS_QUAD
693 	    else if (size == 64) {
694 		if (ckWARN(WARN_PORTABLE))
695 		    Perl_warner(aTHX_ WARN_PORTABLE,
696 				"Bit vector size > 32 non-portable");
697 		if (offset >= srclen)
698 		    retnum = 0;
699 		else if (offset + 1 >= srclen)
700 		    retnum =
701 			(UV) s[offset     ] << 56;
702 		else if (offset + 2 >= srclen)
703 		    retnum =
704 			((UV) s[offset    ] << 56) +
705 			((UV) s[offset + 1] << 48);
706 		else if (offset + 3 >= srclen)
707 		    retnum =
708 			((UV) s[offset    ] << 56) +
709 			((UV) s[offset + 1] << 48) +
710 			((UV) s[offset + 2] << 40);
711 		else if (offset + 4 >= srclen)
712 		    retnum =
713 			((UV) s[offset    ] << 56) +
714 			((UV) s[offset + 1] << 48) +
715 			((UV) s[offset + 2] << 40) +
716 			((UV) s[offset + 3] << 32);
717 		else if (offset + 5 >= srclen)
718 		    retnum =
719 			((UV) s[offset    ] << 56) +
720 			((UV) s[offset + 1] << 48) +
721 			((UV) s[offset + 2] << 40) +
722 			((UV) s[offset + 3] << 32) +
723 			(     s[offset + 4] << 24);
724 		else if (offset + 6 >= srclen)
725 		    retnum =
726 			((UV) s[offset    ] << 56) +
727 			((UV) s[offset + 1] << 48) +
728 			((UV) s[offset + 2] << 40) +
729 			((UV) s[offset + 3] << 32) +
730 			((UV) s[offset + 4] << 24) +
731 			((UV) s[offset + 5] << 16);
732 		else
733 		    retnum =
734 			((UV) s[offset    ] << 56) +
735 			((UV) s[offset + 1] << 48) +
736 			((UV) s[offset + 2] << 40) +
737 			((UV) s[offset + 3] << 32) +
738 			((UV) s[offset + 4] << 24) +
739 			((UV) s[offset + 5] << 16) +
740 			(     s[offset + 6] <<  8);
741 	    }
742 #endif
743 	}
744     }
745     else if (size < 8)
746 	retnum = (s[offset >> 3] >> (offset & 7)) & ((1 << size) - 1);
747     else {
748 	offset >>= 3;	/* turn into byte offset */
749 	if (size == 8)
750 	    retnum = s[offset];
751 	else if (size == 16)
752 	    retnum =
753 		((UV) s[offset] <<      8) +
754 		      s[offset + 1];
755 	else if (size == 32)
756 	    retnum =
757 		((UV) s[offset    ] << 24) +
758 		((UV) s[offset + 1] << 16) +
759 		(     s[offset + 2] <<  8) +
760 		      s[offset + 3];
761 #ifdef UV_IS_QUAD
762 	else if (size == 64) {
763 	    if (ckWARN(WARN_PORTABLE))
764 		Perl_warner(aTHX_ WARN_PORTABLE,
765 			    "Bit vector size > 32 non-portable");
766 	    retnum =
767 		((UV) s[offset    ] << 56) +
768 		((UV) s[offset + 1] << 48) +
769 		((UV) s[offset + 2] << 40) +
770 		((UV) s[offset + 3] << 32) +
771 		((UV) s[offset + 4] << 24) +
772 		((UV) s[offset + 5] << 16) +
773 		(     s[offset + 6] <<  8) +
774 		      s[offset + 7];
775 	}
776 #endif
777     }
778 
779     return retnum;
780 }
781 
782 /* currently converts input to bytes if possible but doesn't sweat failures,
783  * although it does ensure that the string it clobbers is not marked as
784  * utf8-valid any more
785  */
786 void
787 Perl_do_vecset(pTHX_ SV *sv)
788 {
789     SV *targ = LvTARG(sv);
790     register I32 offset;
791     register I32 size;
792     register unsigned char *s;
793     register UV lval;
794     I32 mask;
795     STRLEN targlen;
796     STRLEN len;
797 
798     if (!targ)
799 	return;
800     s = (unsigned char*)SvPV_force(targ, targlen);
801     if (SvUTF8(targ)) {
802 	/* This is handled by the SvPOK_only below...
803 	if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE))
804 	    SvUTF8_off(targ);
805 	 */
806 	(void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE);
807     }
808 
809     (void)SvPOK_only(targ);
810     lval = SvUV(sv);
811     offset = LvTARGOFF(sv);
812     if (offset < 0)
813 	Perl_croak(aTHX_ "Assigning to negative offset in vec");
814     size = LvTARGLEN(sv);
815     if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
816 	Perl_croak(aTHX_ "Illegal number of bits in vec");
817 
818     offset *= size;			/* turn into bit offset */
819     len = (offset + size + 7) / 8;	/* required number of bytes */
820     if (len > targlen) {
821 	s = (unsigned char*)SvGROW(targ, len + 1);
822 	(void)memzero(s + targlen, len - targlen + 1);
823 	SvCUR_set(targ, len);
824     }
825 
826     if (size < 8) {
827 	mask = (1 << size) - 1;
828 	size = offset & 7;
829 	lval &= mask;
830 	offset >>= 3;			/* turn into byte offset */
831 	s[offset] &= ~(mask << size);
832 	s[offset] |= lval << size;
833     }
834     else {
835 	offset >>= 3;			/* turn into byte offset */
836 	if (size == 8)
837 	    s[offset  ] = lval         & 0xff;
838 	else if (size == 16) {
839 	    s[offset  ] = (lval >>  8) & 0xff;
840 	    s[offset+1] = lval         & 0xff;
841 	}
842 	else if (size == 32) {
843 	    s[offset  ] = (lval >> 24) & 0xff;
844 	    s[offset+1] = (lval >> 16) & 0xff;
845 	    s[offset+2] = (lval >>  8) & 0xff;
846 	    s[offset+3] =  lval        & 0xff;
847 	}
848 #ifdef UV_IS_QUAD
849 	else if (size == 64) {
850 	    if (ckWARN(WARN_PORTABLE))
851 		Perl_warner(aTHX_ WARN_PORTABLE,
852 			    "Bit vector size > 32 non-portable");
853 	    s[offset  ] = (lval >> 56) & 0xff;
854 	    s[offset+1] = (lval >> 48) & 0xff;
855 	    s[offset+2] = (lval >> 40) & 0xff;
856 	    s[offset+3] = (lval >> 32) & 0xff;
857 	    s[offset+4] = (lval >> 24) & 0xff;
858 	    s[offset+5] = (lval >> 16) & 0xff;
859 	    s[offset+6] = (lval >>  8) & 0xff;
860 	    s[offset+7] =  lval        & 0xff;
861 	}
862 #endif
863     }
864     SvSETMAGIC(targ);
865 }
866 
867 void
868 Perl_do_chop(pTHX_ register SV *astr, register SV *sv)
869 {
870     STRLEN len;
871     char *s;
872 
873     if (SvTYPE(sv) == SVt_PVAV) {
874 	register I32 i;
875         I32 max;
876 	AV* av = (AV*)sv;
877         max = AvFILL(av);
878         for (i = 0; i <= max; i++) {
879 	    sv = (SV*)av_fetch(av, i, FALSE);
880 	    if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
881 		do_chop(astr, sv);
882 	}
883         return;
884     }
885     else if (SvTYPE(sv) == SVt_PVHV) {
886         HV* hv = (HV*)sv;
887 	HE* entry;
888         (void)hv_iterinit(hv);
889         /*SUPPRESS 560*/
890         while ((entry = hv_iternext(hv)))
891             do_chop(astr,hv_iterval(hv,entry));
892         return;
893     }
894     else if (SvREADONLY(sv))
895 	Perl_croak(aTHX_ PL_no_modify);
896     s = SvPV(sv, len);
897     if (len && !SvPOK(sv))
898 	s = SvPV_force(sv, len);
899     if (DO_UTF8(sv)) {
900 	if (s && len) {
901 	    char *send = s + len;
902 	    char *start = s;
903 	    s = send - 1;
904 	    while (s > start && UTF8_IS_CONTINUATION(*s))
905 		s--;
906 	    if (utf8_to_uv_simple((U8*)s, 0)) {
907 		sv_setpvn(astr, s, send - s);
908 		*s = '\0';
909 		SvCUR_set(sv, s - start);
910 		SvNIOK_off(sv);
911 		SvUTF8_on(astr);
912 	    }
913 	}
914 	else
915 	    sv_setpvn(astr, "", 0);
916     }
917     else if (s && len) {
918 	s += --len;
919 	sv_setpvn(astr, s, 1);
920 	*s = '\0';
921 	SvCUR_set(sv, len);
922 	SvUTF8_off(sv);
923 	SvNIOK_off(sv);
924     }
925     else
926 	sv_setpvn(astr, "", 0);
927     SvSETMAGIC(sv);
928 }
929 
930 I32
931 Perl_do_chomp(pTHX_ register SV *sv)
932 {
933     register I32 count;
934     STRLEN len;
935     char *s;
936 
937     if (RsSNARF(PL_rs))
938 	return 0;
939     if (RsRECORD(PL_rs))
940       return 0;
941     count = 0;
942     if (SvTYPE(sv) == SVt_PVAV) {
943 	register I32 i;
944         I32 max;
945 	AV* av = (AV*)sv;
946         max = AvFILL(av);
947         for (i = 0; i <= max; i++) {
948 	    sv = (SV*)av_fetch(av, i, FALSE);
949 	    if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
950 		count += do_chomp(sv);
951 	}
952         return count;
953     }
954     else if (SvTYPE(sv) == SVt_PVHV) {
955         HV* hv = (HV*)sv;
956 	HE* entry;
957         (void)hv_iterinit(hv);
958         /*SUPPRESS 560*/
959         while ((entry = hv_iternext(hv)))
960             count += do_chomp(hv_iterval(hv,entry));
961         return count;
962     }
963     else if (SvREADONLY(sv))
964 	Perl_croak(aTHX_ PL_no_modify);
965     s = SvPV(sv, len);
966     if (len && !SvPOKp(sv))
967 	s = SvPV_force(sv, len);
968     if (s && len) {
969 	s += --len;
970 	if (RsPARA(PL_rs)) {
971 	    if (*s != '\n')
972 		goto nope;
973 	    ++count;
974 	    while (len && s[-1] == '\n') {
975 		--len;
976 		--s;
977 		++count;
978 	    }
979 	}
980 	else {
981 	    STRLEN rslen;
982 	    char *rsptr = SvPV(PL_rs, rslen);
983 	    if (rslen == 1) {
984 		if (*s != *rsptr)
985 		    goto nope;
986 		++count;
987 	    }
988 	    else {
989 		if (len < rslen - 1)
990 		    goto nope;
991 		len -= rslen - 1;
992 		s -= rslen - 1;
993 		if (memNE(s, rsptr, rslen))
994 		    goto nope;
995 		count += rslen;
996 	    }
997 	}
998 	*s = '\0';
999 	SvCUR_set(sv, len);
1000 	SvNIOK_off(sv);
1001     }
1002   nope:
1003     SvSETMAGIC(sv);
1004     return count;
1005 }
1006 
1007 void
1008 Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
1009 {
1010 #ifdef LIBERAL
1011     register long *dl;
1012     register long *ll;
1013     register long *rl;
1014 #endif
1015     register char *dc;
1016     STRLEN leftlen;
1017     STRLEN rightlen;
1018     register char *lc;
1019     register char *rc;
1020     register I32 len;
1021     I32 lensave;
1022     char *lsave;
1023     char *rsave;
1024     bool left_utf = DO_UTF8(left);
1025     bool right_utf = DO_UTF8(right);
1026     I32 needlen;
1027 
1028     if (left_utf && !right_utf)
1029 	sv_utf8_upgrade(right);
1030     else if (!left_utf && right_utf)
1031 	sv_utf8_upgrade(left);
1032 
1033     if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv)))
1034 	sv_setpvn(sv, "", 0);	/* avoid undef warning on |= and ^= */
1035     lsave = lc = SvPV(left, leftlen);
1036     rsave = rc = SvPV(right, rightlen);
1037     len = leftlen < rightlen ? leftlen : rightlen;
1038     lensave = len;
1039     if ((left_utf || right_utf) && (sv == left || sv == right)) {
1040 	needlen = optype == OP_BIT_AND ? len : leftlen + rightlen;
1041 	Newz(801, dc, needlen + 1, char);
1042     }
1043     else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
1044 	STRLEN n_a;
1045 	dc = SvPV_force(sv, n_a);
1046 	if (SvCUR(sv) < len) {
1047 	    dc = SvGROW(sv, len + 1);
1048 	    (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1);
1049 	}
1050 	if (optype != OP_BIT_AND && (left_utf || right_utf))
1051 	    dc = SvGROW(sv, leftlen + rightlen + 1);
1052     }
1053     else {
1054 	needlen = ((optype == OP_BIT_AND)
1055 		    ? len : (leftlen > rightlen ? leftlen : rightlen));
1056 	Newz(801, dc, needlen + 1, char);
1057 	(void)sv_usepvn(sv, dc, needlen);
1058 	dc = SvPVX(sv);		/* sv_usepvn() calls Renew() */
1059     }
1060     SvCUR_set(sv, len);
1061     (void)SvPOK_only(sv);
1062     if (left_utf || right_utf) {
1063 	UV duc, luc, ruc;
1064 	char *dcsave = dc;
1065 	STRLEN lulen = leftlen;
1066 	STRLEN rulen = rightlen;
1067 	STRLEN ulen;
1068 
1069 	switch (optype) {
1070 	case OP_BIT_AND:
1071 	    while (lulen && rulen) {
1072 		luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1073 		lc += ulen;
1074 		lulen -= ulen;
1075 		ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1076 		rc += ulen;
1077 		rulen -= ulen;
1078 		duc = luc & ruc;
1079 		dc = (char*)uv_to_utf8((U8*)dc, duc);
1080 	    }
1081 	    if (sv == left || sv == right)
1082 		(void)sv_usepvn(sv, dcsave, needlen);
1083 	    SvCUR_set(sv, dc - dcsave);
1084 	    break;
1085 	case OP_BIT_XOR:
1086 	    while (lulen && rulen) {
1087 		luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1088 		lc += ulen;
1089 		lulen -= ulen;
1090 		ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1091 		rc += ulen;
1092 		rulen -= ulen;
1093 		duc = luc ^ ruc;
1094 		dc = (char*)uv_to_utf8((U8*)dc, duc);
1095 	    }
1096 	    goto mop_up_utf;
1097 	case OP_BIT_OR:
1098 	    while (lulen && rulen) {
1099 		luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1100 		lc += ulen;
1101 		lulen -= ulen;
1102 		ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1103 		rc += ulen;
1104 		rulen -= ulen;
1105 		duc = luc | ruc;
1106 		dc = (char*)uv_to_utf8((U8*)dc, duc);
1107 	    }
1108 	  mop_up_utf:
1109 	    if (sv == left || sv == right)
1110 		(void)sv_usepvn(sv, dcsave, needlen);
1111 	    SvCUR_set(sv, dc - dcsave);
1112 	    if (rulen)
1113 		sv_catpvn(sv, rc, rulen);
1114 	    else if (lulen)
1115 		sv_catpvn(sv, lc, lulen);
1116 	    else
1117 		*SvEND(sv) = '\0';
1118 	    break;
1119 	}
1120 	SvUTF8_on(sv);
1121 	goto finish;
1122     }
1123     else
1124 #ifdef LIBERAL
1125     if (len >= sizeof(long)*4 &&
1126 	!((long)dc % sizeof(long)) &&
1127 	!((long)lc % sizeof(long)) &&
1128 	!((long)rc % sizeof(long)))	/* It's almost always aligned... */
1129     {
1130 	I32 remainder = len % (sizeof(long)*4);
1131 	len /= (sizeof(long)*4);
1132 
1133 	dl = (long*)dc;
1134 	ll = (long*)lc;
1135 	rl = (long*)rc;
1136 
1137 	switch (optype) {
1138 	case OP_BIT_AND:
1139 	    while (len--) {
1140 		*dl++ = *ll++ & *rl++;
1141 		*dl++ = *ll++ & *rl++;
1142 		*dl++ = *ll++ & *rl++;
1143 		*dl++ = *ll++ & *rl++;
1144 	    }
1145 	    break;
1146 	case OP_BIT_XOR:
1147 	    while (len--) {
1148 		*dl++ = *ll++ ^ *rl++;
1149 		*dl++ = *ll++ ^ *rl++;
1150 		*dl++ = *ll++ ^ *rl++;
1151 		*dl++ = *ll++ ^ *rl++;
1152 	    }
1153 	    break;
1154 	case OP_BIT_OR:
1155 	    while (len--) {
1156 		*dl++ = *ll++ | *rl++;
1157 		*dl++ = *ll++ | *rl++;
1158 		*dl++ = *ll++ | *rl++;
1159 		*dl++ = *ll++ | *rl++;
1160 	    }
1161 	}
1162 
1163 	dc = (char*)dl;
1164 	lc = (char*)ll;
1165 	rc = (char*)rl;
1166 
1167 	len = remainder;
1168     }
1169 #endif
1170     {
1171 	switch (optype) {
1172 	case OP_BIT_AND:
1173 	    while (len--)
1174 		*dc++ = *lc++ & *rc++;
1175 	    break;
1176 	case OP_BIT_XOR:
1177 	    while (len--)
1178 		*dc++ = *lc++ ^ *rc++;
1179 	    goto mop_up;
1180 	case OP_BIT_OR:
1181 	    while (len--)
1182 		*dc++ = *lc++ | *rc++;
1183 	  mop_up:
1184 	    len = lensave;
1185 	    if (rightlen > len)
1186 		sv_catpvn(sv, rsave + len, rightlen - len);
1187 	    else if (leftlen > len)
1188 		sv_catpvn(sv, lsave + len, leftlen - len);
1189 	    else
1190 		*SvEND(sv) = '\0';
1191 	    break;
1192 	}
1193     }
1194 finish:
1195     SvTAINT(sv);
1196 }
1197 
1198 OP *
1199 Perl_do_kv(pTHX)
1200 {
1201     dSP;
1202     HV *hv = (HV*)POPs;
1203     HV *keys;
1204     register HE *entry;
1205     SV *tmpstr;
1206     I32 gimme = GIMME_V;
1207     I32 dokeys =   (PL_op->op_type == OP_KEYS);
1208     I32 dovalues = (PL_op->op_type == OP_VALUES);
1209     I32 realhv = (SvTYPE(hv) == SVt_PVHV);
1210 
1211     if (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV)
1212 	dokeys = dovalues = TRUE;
1213 
1214     if (!hv) {
1215 	if (PL_op->op_flags & OPf_MOD || LVRET) {	/* lvalue */
1216 	    dTARGET;		/* make sure to clear its target here */
1217 	    if (SvTYPE(TARG) == SVt_PVLV)
1218 		LvTARG(TARG) = Nullsv;
1219 	    PUSHs(TARG);
1220 	}
1221 	RETURN;
1222     }
1223 
1224     keys = realhv ? hv : avhv_keys((AV*)hv);
1225     (void)hv_iterinit(keys);	/* always reset iterator regardless */
1226 
1227     if (gimme == G_VOID)
1228 	RETURN;
1229 
1230     if (gimme == G_SCALAR) {
1231 	IV i;
1232 	dTARGET;
1233 
1234 	if (PL_op->op_flags & OPf_MOD || LVRET) {	/* lvalue */
1235 	    if (SvTYPE(TARG) < SVt_PVLV) {
1236 		sv_upgrade(TARG, SVt_PVLV);
1237 		sv_magic(TARG, Nullsv, 'k', Nullch, 0);
1238 	    }
1239 	    LvTYPE(TARG) = 'k';
1240 	    if (LvTARG(TARG) != (SV*)keys) {
1241 		if (LvTARG(TARG))
1242 		    SvREFCNT_dec(LvTARG(TARG));
1243 		LvTARG(TARG) = SvREFCNT_inc(keys);
1244 	    }
1245 	    PUSHs(TARG);
1246 	    RETURN;
1247 	}
1248 
1249 	if (! SvTIED_mg((SV*)keys, 'P'))
1250 	    i = HvKEYS(keys);
1251 	else {
1252 	    i = 0;
1253 	    /*SUPPRESS 560*/
1254 	    while (hv_iternext(keys)) i++;
1255 	}
1256 	PUSHi( i );
1257 	RETURN;
1258     }
1259 
1260     EXTEND(SP, HvKEYS(keys) * (dokeys + dovalues));
1261 
1262     PUTBACK;	/* hv_iternext and hv_iterval might clobber stack_sp */
1263     while ((entry = hv_iternext(keys))) {
1264 	SPAGAIN;
1265 	if (dokeys)
1266 	    XPUSHs(hv_iterkeysv(entry));	/* won't clobber stack_sp */
1267 	if (dovalues) {
1268 	    PUTBACK;
1269 	    tmpstr = realhv ?
1270 		     hv_iterval(hv,entry) : avhv_iterval((AV*)hv,entry);
1271 	    DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu",
1272 			    (unsigned long)HeHASH(entry),
1273 			    HvMAX(keys)+1,
1274 			    (unsigned long)(HeHASH(entry) & HvMAX(keys))));
1275 	    SPAGAIN;
1276 	    XPUSHs(tmpstr);
1277 	}
1278 	PUTBACK;
1279     }
1280     return NORMAL;
1281 }
1282 
1283