xref: /openbsd-src/gnu/usr.bin/perl/doop.c (revision 8500990981f885cbe5e6a4958549cacc238b5ae6)
1 /*    doop.c
2  *
3  *    Copyright (c) 1991-2002, Larry Wall
4  *
5  *    You may distribute under the terms of either the GNU General Public
6  *    License or the Artistic License, as specified in the README file.
7  *
8  */
9 
10 /*
11  * "'So that was the job I felt I had to do when I started,' thought Sam."
12  */
13 
14 #include "EXTERN.h"
15 #define PERL_IN_DOOP_C
16 #include "perl.h"
17 
18 #ifndef PERL_MICRO
19 #if !defined(NSIG) || defined(M_UNIX) || defined(M_XENIX)
20 #include <signal.h>
21 #endif
22 #endif
23 
24 STATIC I32
25 S_do_trans_simple(pTHX_ SV *sv)
26 {
27     U8 *s;
28     U8 *d;
29     U8 *send;
30     U8 *dstart;
31     I32 matches = 0;
32     I32 grows = PL_op->op_private & OPpTRANS_GROWS;
33     STRLEN len;
34     short *tbl;
35     I32 ch;
36 
37     tbl = (short*)cPVOP->op_pv;
38     if (!tbl)
39 	Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__);
40 
41     s = (U8*)SvPV(sv, len);
42     send = s + len;
43 
44     /* First, take care of non-UTF8 input strings, because they're easy */
45     if (!SvUTF8(sv)) {
46 	while (s < send) {
47 	    if ((ch = tbl[*s]) >= 0) {
48 		matches++;
49 		*s++ = (U8)ch;
50 	    }
51 	    else
52 		s++;
53 	}
54 	SvSETMAGIC(sv);
55         return matches;
56     }
57 
58     /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */
59     if (grows)
60 	New(0, d, len*2+1, U8);
61     else
62 	d = s;
63     dstart = d;
64     while (s < send) {
65         STRLEN ulen;
66         UV c;
67 
68         /* Need to check this, otherwise 128..255 won't match */
69 	c = utf8n_to_uvchr(s, send - s, &ulen, 0);
70         if (c < 0x100 && (ch = tbl[c]) >= 0) {
71             matches++;
72 	    d = uvchr_to_utf8(d, ch);
73             s += ulen;
74         }
75 	else { /* No match -> copy */
76 	    Copy(s, d, ulen, U8);
77 	    d += ulen;
78 	    s += ulen;
79         }
80     }
81     if (grows) {
82 	sv_setpvn(sv, (char*)dstart, d - dstart);
83 	Safefree(dstart);
84     }
85     else {
86 	*d = '\0';
87 	SvCUR_set(sv, d - dstart);
88     }
89     SvUTF8_on(sv);
90     SvSETMAGIC(sv);
91     return matches;
92 }
93 
94 STATIC I32
95 S_do_trans_count(pTHX_ SV *sv)
96 {
97     U8 *s;
98     U8 *send;
99     I32 matches = 0;
100     STRLEN len;
101     short *tbl;
102     I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
103 
104     tbl = (short*)cPVOP->op_pv;
105     if (!tbl)
106 	Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__);
107 
108     s = (U8*)SvPV(sv, len);
109     send = s + len;
110 
111     if (!SvUTF8(sv))
112 	while (s < send) {
113             if (tbl[*s++] >= 0)
114                 matches++;
115 	}
116     else
117 	while (s < send) {
118 	    UV c;
119 	    STRLEN ulen;
120 	    c = utf8n_to_uvchr(s, send - s, &ulen, 0);
121 	    if (c < 0x100) {
122 		if (tbl[c] >= 0)
123 		    matches++;
124 	    } else if (complement)
125 		matches++;
126 	    s += ulen;
127 	}
128 
129     return matches;
130 }
131 
132 STATIC I32
133 S_do_trans_complex(pTHX_ SV *sv)
134 {
135     U8 *s;
136     U8 *send;
137     U8 *d;
138     U8 *dstart;
139     I32 isutf8;
140     I32 matches = 0;
141     I32 grows = PL_op->op_private & OPpTRANS_GROWS;
142     I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
143     I32 del = PL_op->op_private & OPpTRANS_DELETE;
144     STRLEN len, rlen = 0;
145     short *tbl;
146     I32 ch;
147 
148     tbl = (short*)cPVOP->op_pv;
149     if (!tbl)
150 	Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__);
151 
152     s = (U8*)SvPV(sv, len);
153     isutf8 = SvUTF8(sv);
154     send = s + len;
155 
156     if (!isutf8) {
157 	dstart = d = s;
158 	if (PL_op->op_private & OPpTRANS_SQUASH) {
159 	    U8* p = send;
160 	    while (s < send) {
161 		if ((ch = tbl[*s]) >= 0) {
162 		    *d = (U8)ch;
163 		    matches++;
164 		    if (p != d - 1 || *p != *d)
165 			p = d++;
166 		}
167 		else if (ch == -1)	/* -1 is unmapped character */
168 		    *d++ = *s;
169 		else if (ch == -2)	/* -2 is delete character */
170 		    matches++;
171 		s++;
172 	    }
173 	}
174 	else {
175 	    while (s < send) {
176 	        if ((ch = tbl[*s]) >= 0) {
177 		    matches++;
178 		    *d++ = (U8)ch;
179 		}
180 		else if (ch == -1)	/* -1 is unmapped character */
181 		    *d++ = *s;
182 		else if (ch == -2)      /* -2 is delete character */
183 		    matches++;
184 		s++;
185 	    }
186 	}
187 	*d = '\0';
188 	SvCUR_set(sv, d - dstart);
189     }
190     else { /* isutf8 */
191 	if (grows)
192 	    New(0, d, len*2+1, U8);
193 	else
194 	    d = s;
195 	dstart = d;
196 	if (complement && !del)
197 	    rlen = tbl[0x100];
198 
199 #ifdef MACOS_TRADITIONAL
200 #define comp CoMP   /* "comp" is a keyword in some compilers ... */
201 #endif
202 
203 	if (PL_op->op_private & OPpTRANS_SQUASH) {
204 	    UV pch = 0xfeedface;
205 	    while (s < send) {
206 		STRLEN len;
207 	        UV comp = utf8_to_uvchr(s, &len);
208 
209 		if (comp > 0xff) {
210 		    if (!complement) {
211 			Copy(s, d, len, U8);
212 			d += len;
213 		    }
214 		    else {
215 			matches++;
216 			if (!del) {
217 			    ch = (rlen == 0) ? comp :
218 				(comp - 0x100 < rlen) ?
219 				tbl[comp+1] : tbl[0x100+rlen];
220 			    if ((UV)ch != pch) {
221 				d = uvchr_to_utf8(d, ch);
222 				pch = (UV)ch;
223 			    }
224 			    s += len;
225 			    continue;
226 			}
227 		    }
228 		}
229 		else if ((ch = tbl[comp]) >= 0) {
230 		    matches++;
231 		    if ((UV)ch != pch) {
232 		        d = uvchr_to_utf8(d, ch);
233 		        pch = (UV)ch;
234 		    }
235 		    s += len;
236 		    continue;
237 		}
238 		else if (ch == -1) {	/* -1 is unmapped character */
239 		    Copy(s, d, len, U8);
240 		    d += len;
241 		}
242 		else if (ch == -2)      /* -2 is delete character */
243 		    matches++;
244 		s += len;
245 		pch = 0xfeedface;
246 	    }
247 	}
248 	else {
249 	    while (s < send) {
250 		STRLEN len;
251 	        UV comp = utf8_to_uvchr(s, &len);
252 		if (comp > 0xff) {
253 		    if (!complement) {
254 			Copy(s, d, len, U8);
255 			d += len;
256 		    }
257 		    else {
258 			matches++;
259 			if (!del) {
260 			    if (comp - 0x100 < rlen)
261 				d = uvchr_to_utf8(d, tbl[comp+1]);
262 			    else
263 				d = uvchr_to_utf8(d, tbl[0x100+rlen]);
264 			}
265 		    }
266 		}
267 		else if ((ch = tbl[comp]) >= 0) {
268 		    d = uvchr_to_utf8(d, ch);
269 		    matches++;
270 		}
271 		else if (ch == -1) {	/* -1 is unmapped character */
272 		    Copy(s, d, len, U8);
273 		    d += len;
274 		}
275 		else if (ch == -2)      /* -2 is delete character */
276 		    matches++;
277 		s += len;
278 	    }
279 	}
280 	if (grows) {
281 	    sv_setpvn(sv, (char*)dstart, d - dstart);
282 	    Safefree(dstart);
283 	}
284 	else {
285 	    *d = '\0';
286 	    SvCUR_set(sv, d - dstart);
287 	}
288 	SvUTF8_on(sv);
289     }
290     SvSETMAGIC(sv);
291     return matches;
292 }
293 
294 STATIC I32
295 S_do_trans_simple_utf8(pTHX_ SV *sv)
296 {
297     U8 *s;
298     U8 *send;
299     U8 *d;
300     U8 *start;
301     U8 *dstart, *dend;
302     I32 matches = 0;
303     I32 grows = PL_op->op_private & OPpTRANS_GROWS;
304     STRLEN len;
305 
306     SV* rv = (SV*)cSVOP->op_sv;
307     HV* hv = (HV*)SvRV(rv);
308     SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
309     UV none = svp ? SvUV(*svp) : 0x7fffffff;
310     UV extra = none + 1;
311     UV final = 0;
312     UV uv;
313     I32 isutf8;
314     U8 hibit = 0;
315 
316     s = (U8*)SvPV(sv, len);
317     isutf8 = SvUTF8(sv);
318     if (!isutf8) {
319 	U8 *t = s, *e = s + len;
320 	while (t < e) {
321 	    U8 ch = *t++;
322 	    if ((hibit = !NATIVE_IS_INVARIANT(ch)))
323 		break;
324 	}
325 	if (hibit)
326 	    s = bytes_to_utf8(s, &len);
327     }
328     send = s + len;
329     start = s;
330 
331     svp = hv_fetch(hv, "FINAL", 5, FALSE);
332     if (svp)
333 	final = SvUV(*svp);
334 
335     if (grows) {
336 	/* d needs to be bigger than s, in case e.g. upgrading is required */
337 	New(0, d, len*3+UTF8_MAXLEN, U8);
338 	dend = d + len * 3;
339 	dstart = d;
340     }
341     else {
342 	dstart = d = s;
343 	dend = d + len;
344     }
345 
346     while (s < send) {
347 	if ((uv = swash_fetch(rv, s, TRUE)) < none) {
348 	    s += UTF8SKIP(s);
349 	    matches++;
350 	    d = uvuni_to_utf8(d, uv);
351 	}
352 	else if (uv == none) {
353 	    int i = UTF8SKIP(s);
354 	    Copy(s, d, i, U8);
355 	    d += i;
356 	    s += i;
357 	}
358 	else if (uv == extra) {
359 	    int i = UTF8SKIP(s);
360 	    s += i;
361 	    matches++;
362 	    d = uvuni_to_utf8(d, final);
363 	}
364 	else
365 	    s += UTF8SKIP(s);
366 
367 	if (d > dend) {
368 	    STRLEN clen = d - dstart;
369 	    STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
370 	    if (!grows)
371 		Perl_croak(aTHX_ "panic: do_trans_simple_utf8 line %d",__LINE__);
372 	    Renew(dstart, nlen+UTF8_MAXLEN, U8);
373 	    d = dstart + clen;
374 	    dend = dstart + nlen;
375 	}
376     }
377     if (grows || hibit) {
378 	sv_setpvn(sv, (char*)dstart, d - dstart);
379 	Safefree(dstart);
380 	if (grows && hibit)
381 	    Safefree(start);
382     }
383     else {
384 	*d = '\0';
385 	SvCUR_set(sv, d - dstart);
386     }
387     SvSETMAGIC(sv);
388     SvUTF8_on(sv);
389 
390     return matches;
391 }
392 
393 STATIC I32
394 S_do_trans_count_utf8(pTHX_ SV *sv)
395 {
396     U8 *s;
397     U8 *start = 0, *send;
398     I32 matches = 0;
399     STRLEN len;
400 
401     SV* rv = (SV*)cSVOP->op_sv;
402     HV* hv = (HV*)SvRV(rv);
403     SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
404     UV none = svp ? SvUV(*svp) : 0x7fffffff;
405     UV extra = none + 1;
406     UV uv;
407     U8 hibit = 0;
408 
409     s = (U8*)SvPV(sv, len);
410     if (!SvUTF8(sv)) {
411 	U8 *t = s, *e = s + len;
412 	while (t < e) {
413 	    U8 ch = *t++;
414 	    if ((hibit = !NATIVE_IS_INVARIANT(ch)))
415 		break;
416 	}
417 	if (hibit)
418 	    start = s = bytes_to_utf8(s, &len);
419     }
420     send = s + len;
421 
422     while (s < send) {
423 	if ((uv = swash_fetch(rv, s, TRUE)) < none || uv == extra)
424 	    matches++;
425 	s += UTF8SKIP(s);
426     }
427     if (hibit)
428         Safefree(start);
429 
430     return matches;
431 }
432 
433 STATIC I32
434 S_do_trans_complex_utf8(pTHX_ SV *sv)
435 {
436     U8 *s;
437     U8 *start, *send;
438     U8 *d;
439     I32 matches = 0;
440     I32 squash   = PL_op->op_private & OPpTRANS_SQUASH;
441     I32 del      = PL_op->op_private & OPpTRANS_DELETE;
442     I32 grows    = PL_op->op_private & OPpTRANS_GROWS;
443     SV* rv = (SV*)cSVOP->op_sv;
444     HV* hv = (HV*)SvRV(rv);
445     SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
446     UV none = svp ? SvUV(*svp) : 0x7fffffff;
447     UV extra = none + 1;
448     UV final = 0;
449     bool havefinal = FALSE;
450     UV uv;
451     STRLEN len;
452     U8 *dstart, *dend;
453     I32 isutf8;
454     U8 hibit = 0;
455 
456     s = (U8*)SvPV(sv, len);
457     isutf8 = SvUTF8(sv);
458     if (!isutf8) {
459 	U8 *t = s, *e = s + len;
460 	while (t < e) {
461 	    U8 ch = *t++;
462 	    if ((hibit = !NATIVE_IS_INVARIANT(ch)))
463 		break;
464 	}
465 	if (hibit)
466 	    s = bytes_to_utf8(s, &len);
467     }
468     send = s + len;
469     start = s;
470 
471     svp = hv_fetch(hv, "FINAL", 5, FALSE);
472     if (svp) {
473 	final = SvUV(*svp);
474 	havefinal = TRUE;
475     }
476 
477     if (grows) {
478 	/* d needs to be bigger than s, in case e.g. upgrading is required */
479 	New(0, d, len*3+UTF8_MAXLEN, U8);
480 	dend = d + len * 3;
481 	dstart = d;
482     }
483     else {
484 	dstart = d = s;
485 	dend = d + len;
486     }
487 
488     if (squash) {
489 	UV puv = 0xfeedface;
490 	while (s < send) {
491 	    uv = swash_fetch(rv, s, TRUE);
492 
493 	    if (d > dend) {
494 	        STRLEN clen = d - dstart;
495 		STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
496 		if (!grows)
497 		    Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
498 		Renew(dstart, nlen+UTF8_MAXLEN, U8);
499 		d = dstart + clen;
500 		dend = dstart + nlen;
501 	    }
502 	    if (uv < none) {
503 		matches++;
504 		s += UTF8SKIP(s);
505 		if (uv != puv) {
506 		    d = uvuni_to_utf8(d, uv);
507 		    puv = uv;
508 		}
509 		continue;
510 	    }
511 	    else if (uv == none) {	/* "none" is unmapped character */
512 		int i = UTF8SKIP(s);
513 		Copy(s, d, i, U8);
514 		d += i;
515 		s += i;
516 		puv = 0xfeedface;
517 		continue;
518 	    }
519 	    else if (uv == extra && !del) {
520 		matches++;
521 		if (havefinal) {
522 		    s += UTF8SKIP(s);
523 		    if (puv != final) {
524 			d = uvuni_to_utf8(d, final);
525 			puv = final;
526 		    }
527 		}
528 		else {
529 		    STRLEN len;
530 		    uv = utf8_to_uvuni(s, &len);
531 		    if (uv != puv) {
532 			Copy(s, d, len, U8);
533 			d += len;
534 			puv = uv;
535 		    }
536 		    s += len;
537 		}
538 		continue;
539 	    }
540 	    matches++;			/* "none+1" is delete character */
541 	    s += UTF8SKIP(s);
542 	}
543     }
544     else {
545 	while (s < send) {
546 	    uv = swash_fetch(rv, s, TRUE);
547 	    if (d > dend) {
548 	        STRLEN clen = d - dstart;
549 		STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
550 		if (!grows)
551 		    Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
552 		Renew(dstart, nlen+UTF8_MAXLEN, U8);
553 		d = dstart + clen;
554 		dend = dstart + nlen;
555 	    }
556 	    if (uv < none) {
557 		matches++;
558 		s += UTF8SKIP(s);
559 		d = uvuni_to_utf8(d, uv);
560 		continue;
561 	    }
562 	    else if (uv == none) {	/* "none" is unmapped character */
563 		int i = UTF8SKIP(s);
564 		Copy(s, d, i, U8);
565 		d += i;
566 		s += i;
567 		continue;
568 	    }
569 	    else if (uv == extra && !del) {
570 		matches++;
571 		s += UTF8SKIP(s);
572 		d = uvuni_to_utf8(d, final);
573 		continue;
574 	    }
575 	    matches++;			/* "none+1" is delete character */
576 	    s += UTF8SKIP(s);
577 	}
578     }
579     if (grows || hibit) {
580 	sv_setpvn(sv, (char*)dstart, d - dstart);
581 	Safefree(dstart);
582 	if (grows && hibit)
583 	    Safefree(start);
584     }
585     else {
586 	*d = '\0';
587 	SvCUR_set(sv, d - dstart);
588     }
589     SvUTF8_on(sv);
590     SvSETMAGIC(sv);
591 
592     return matches;
593 }
594 
595 I32
596 Perl_do_trans(pTHX_ SV *sv)
597 {
598     STRLEN len;
599     I32 hasutf = (PL_op->op_private &
600                     (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF));
601 
602     if (SvREADONLY(sv)) {
603         if (SvFAKE(sv))
604             sv_force_normal(sv);
605         if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL))
606             Perl_croak(aTHX_ PL_no_modify);
607     }
608     (void)SvPV(sv, len);
609     if (!len)
610 	return 0;
611     if (!SvPOKp(sv))
612 	(void)SvPV_force(sv, len);
613     if (!(PL_op->op_private & OPpTRANS_IDENTICAL))
614 	(void)SvPOK_only_UTF8(sv);
615 
616     DEBUG_t( Perl_deb(aTHX_ "2.TBL\n"));
617 
618     switch (PL_op->op_private & ~hasutf & 63) {
619     case 0:
620 	if (hasutf)
621 	    return do_trans_simple_utf8(sv);
622 	else
623 	    return do_trans_simple(sv);
624 
625     case OPpTRANS_IDENTICAL:
626     case OPpTRANS_IDENTICAL|OPpTRANS_COMPLEMENT:
627 	if (hasutf)
628 	    return do_trans_count_utf8(sv);
629 	else
630 	    return do_trans_count(sv);
631 
632     default:
633 	if (hasutf)
634 	    return do_trans_complex_utf8(sv);
635 	else
636 	    return do_trans_complex(sv);
637     }
638 }
639 
640 void
641 Perl_do_join(pTHX_ register SV *sv, SV *del, register SV **mark, register SV **sp)
642 {
643     SV **oldmark = mark;
644     register I32 items = sp - mark;
645     register STRLEN len;
646     STRLEN delimlen;
647     STRLEN tmplen;
648 
649     (void) SvPV(del, delimlen); /* stringify and get the delimlen */
650     /* SvCUR assumes it's SvPOK() and woe betide you if it's not. */
651 
652     mark++;
653     len = (items > 0 ? (delimlen * (items - 1) ) : 0);
654     (void)SvUPGRADE(sv, SVt_PV);
655     if (SvLEN(sv) < len + items) {	/* current length is way too short */
656 	while (items-- > 0) {
657 	    if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) {
658 		SvPV(*mark, tmplen);
659 		len += tmplen;
660 	    }
661 	    mark++;
662 	}
663 	SvGROW(sv, len + 1);		/* so try to pre-extend */
664 
665 	mark = oldmark;
666 	items = sp - mark;
667 	++mark;
668     }
669 
670     sv_setpv(sv, "");
671     if (PL_tainting && SvMAGICAL(sv))
672 	SvTAINTED_off(sv);
673 
674     if (items-- > 0) {
675 	if (*mark)
676 	    sv_catsv(sv, *mark);
677 	mark++;
678     }
679 
680     if (delimlen) {
681 	for (; items > 0; items--,mark++) {
682 	    sv_catsv(sv,del);
683 	    sv_catsv(sv,*mark);
684 	}
685     }
686     else {
687 	for (; items > 0; items--,mark++)
688 	    sv_catsv(sv,*mark);
689     }
690     SvSETMAGIC(sv);
691 }
692 
693 void
694 Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg)
695 {
696     STRLEN patlen;
697     char *pat = SvPV(*sarg, patlen);
698     bool do_taint = FALSE;
699 
700     SvUTF8_off(sv);
701     if (DO_UTF8(*sarg))
702         SvUTF8_on(sv);
703     sv_vsetpvfn(sv, pat, patlen, Null(va_list*), sarg + 1, len - 1, &do_taint);
704     SvSETMAGIC(sv);
705     if (do_taint)
706 	SvTAINTED_on(sv);
707 }
708 
709 /* currently converts input to bytes if possible, but doesn't sweat failure */
710 UV
711 Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
712 {
713     STRLEN srclen, len;
714     unsigned char *s = (unsigned char *) SvPV(sv, srclen);
715     UV retnum = 0;
716 
717     if (offset < 0)
718 	return retnum;
719     if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
720 	Perl_croak(aTHX_ "Illegal number of bits in vec");
721 
722     if (SvUTF8(sv))
723 	(void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
724 
725     offset *= size;	/* turn into bit offset */
726     len = (offset + size + 7) / 8;	/* required number of bytes */
727     if (len > srclen) {
728 	if (size <= 8)
729 	    retnum = 0;
730 	else {
731 	    offset >>= 3;	/* turn into byte offset */
732 	    if (size == 16) {
733 		if ((STRLEN)offset >= srclen)
734 		    retnum = 0;
735 		else
736 		    retnum = (UV) s[offset] <<  8;
737 	    }
738 	    else if (size == 32) {
739 		if ((STRLEN)offset >= srclen)
740 		    retnum = 0;
741 		else if ((STRLEN)(offset + 1) >= srclen)
742 		    retnum =
743 			((UV) s[offset    ] << 24);
744 		else if ((STRLEN)(offset + 2) >= srclen)
745 		    retnum =
746 			((UV) s[offset    ] << 24) +
747 			((UV) s[offset + 1] << 16);
748 		else
749 		    retnum =
750 			((UV) s[offset    ] << 24) +
751 			((UV) s[offset + 1] << 16) +
752 			(     s[offset + 2] <<  8);
753 	    }
754 #ifdef UV_IS_QUAD
755 	    else if (size == 64) {
756 		if (ckWARN(WARN_PORTABLE))
757 		    Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
758 				"Bit vector size > 32 non-portable");
759 		if (offset >= srclen)
760 		    retnum = 0;
761 		else if (offset + 1 >= srclen)
762 		    retnum =
763 			(UV) s[offset     ] << 56;
764 		else if (offset + 2 >= srclen)
765 		    retnum =
766 			((UV) s[offset    ] << 56) +
767 			((UV) s[offset + 1] << 48);
768 		else if (offset + 3 >= srclen)
769 		    retnum =
770 			((UV) s[offset    ] << 56) +
771 			((UV) s[offset + 1] << 48) +
772 			((UV) s[offset + 2] << 40);
773 		else if (offset + 4 >= srclen)
774 		    retnum =
775 			((UV) s[offset    ] << 56) +
776 			((UV) s[offset + 1] << 48) +
777 			((UV) s[offset + 2] << 40) +
778 			((UV) s[offset + 3] << 32);
779 		else if (offset + 5 >= srclen)
780 		    retnum =
781 			((UV) s[offset    ] << 56) +
782 			((UV) s[offset + 1] << 48) +
783 			((UV) s[offset + 2] << 40) +
784 			((UV) s[offset + 3] << 32) +
785 			(     s[offset + 4] << 24);
786 		else if (offset + 6 >= srclen)
787 		    retnum =
788 			((UV) s[offset    ] << 56) +
789 			((UV) s[offset + 1] << 48) +
790 			((UV) s[offset + 2] << 40) +
791 			((UV) s[offset + 3] << 32) +
792 			((UV) s[offset + 4] << 24) +
793 			((UV) s[offset + 5] << 16);
794 		else
795 		    retnum =
796 			((UV) s[offset    ] << 56) +
797 			((UV) s[offset + 1] << 48) +
798 			((UV) s[offset + 2] << 40) +
799 			((UV) s[offset + 3] << 32) +
800 			((UV) s[offset + 4] << 24) +
801 			((UV) s[offset + 5] << 16) +
802 			(     s[offset + 6] <<  8);
803 	    }
804 #endif
805 	}
806     }
807     else if (size < 8)
808 	retnum = (s[offset >> 3] >> (offset & 7)) & ((1 << size) - 1);
809     else {
810 	offset >>= 3;	/* turn into byte offset */
811 	if (size == 8)
812 	    retnum = s[offset];
813 	else if (size == 16)
814 	    retnum =
815 		((UV) s[offset] <<      8) +
816 		      s[offset + 1];
817 	else if (size == 32)
818 	    retnum =
819 		((UV) s[offset    ] << 24) +
820 		((UV) s[offset + 1] << 16) +
821 		(     s[offset + 2] <<  8) +
822 		      s[offset + 3];
823 #ifdef UV_IS_QUAD
824 	else if (size == 64) {
825 	    if (ckWARN(WARN_PORTABLE))
826 		Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
827 			    "Bit vector size > 32 non-portable");
828 	    retnum =
829 		((UV) s[offset    ] << 56) +
830 		((UV) s[offset + 1] << 48) +
831 		((UV) s[offset + 2] << 40) +
832 		((UV) s[offset + 3] << 32) +
833 		((UV) s[offset + 4] << 24) +
834 		((UV) s[offset + 5] << 16) +
835 		(     s[offset + 6] <<  8) +
836 		      s[offset + 7];
837 	}
838 #endif
839     }
840 
841     return retnum;
842 }
843 
844 /* currently converts input to bytes if possible but doesn't sweat failures,
845  * although it does ensure that the string it clobbers is not marked as
846  * utf8-valid any more
847  */
848 void
849 Perl_do_vecset(pTHX_ SV *sv)
850 {
851     SV *targ = LvTARG(sv);
852     register I32 offset;
853     register I32 size;
854     register unsigned char *s;
855     register UV lval;
856     I32 mask;
857     STRLEN targlen;
858     STRLEN len;
859 
860     if (!targ)
861 	return;
862     s = (unsigned char*)SvPV_force(targ, targlen);
863     if (SvUTF8(targ)) {
864 	/* This is handled by the SvPOK_only below...
865 	if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE))
866 	    SvUTF8_off(targ);
867 	 */
868 	(void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE);
869     }
870 
871     (void)SvPOK_only(targ);
872     lval = SvUV(sv);
873     offset = LvTARGOFF(sv);
874     if (offset < 0)
875 	Perl_croak(aTHX_ "Negative offset to vec in lvalue context");
876     size = LvTARGLEN(sv);
877     if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
878 	Perl_croak(aTHX_ "Illegal number of bits in vec");
879 
880     offset *= size;			/* turn into bit offset */
881     len = (offset + size + 7) / 8;	/* required number of bytes */
882     if (len > targlen) {
883 	s = (unsigned char*)SvGROW(targ, len + 1);
884 	(void)memzero((char *)(s + targlen), len - targlen + 1);
885 	SvCUR_set(targ, len);
886     }
887 
888     if (size < 8) {
889 	mask = (1 << size) - 1;
890 	size = offset & 7;
891 	lval &= mask;
892 	offset >>= 3;			/* turn into byte offset */
893 	s[offset] &= ~(mask << size);
894 	s[offset] |= lval << size;
895     }
896     else {
897 	offset >>= 3;			/* turn into byte offset */
898 	if (size == 8)
899 	    s[offset  ] = (U8)( lval        & 0xff);
900 	else if (size == 16) {
901 	    s[offset  ] = (U8)((lval >>  8) & 0xff);
902 	    s[offset+1] = (U8)( lval        & 0xff);
903 	}
904 	else if (size == 32) {
905 	    s[offset  ] = (U8)((lval >> 24) & 0xff);
906 	    s[offset+1] = (U8)((lval >> 16) & 0xff);
907 	    s[offset+2] = (U8)((lval >>  8) & 0xff);
908 	    s[offset+3] = (U8)( lval        & 0xff);
909 	}
910 #ifdef UV_IS_QUAD
911 	else if (size == 64) {
912 	    if (ckWARN(WARN_PORTABLE))
913 		Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
914 			    "Bit vector size > 32 non-portable");
915 	    s[offset  ] = (U8)((lval >> 56) & 0xff);
916 	    s[offset+1] = (U8)((lval >> 48) & 0xff);
917 	    s[offset+2] = (U8)((lval >> 40) & 0xff);
918 	    s[offset+3] = (U8)((lval >> 32) & 0xff);
919 	    s[offset+4] = (U8)((lval >> 24) & 0xff);
920 	    s[offset+5] = (U8)((lval >> 16) & 0xff);
921 	    s[offset+6] = (U8)((lval >>  8) & 0xff);
922 	    s[offset+7] = (U8)( lval        & 0xff);
923 	}
924 #endif
925     }
926     SvSETMAGIC(targ);
927 }
928 
929 void
930 Perl_do_chop(pTHX_ register SV *astr, register SV *sv)
931 {
932     STRLEN len;
933     char *s;
934 
935     if (SvTYPE(sv) == SVt_PVAV) {
936 	register I32 i;
937         I32 max;
938 	AV* av = (AV*)sv;
939         max = AvFILL(av);
940         for (i = 0; i <= max; i++) {
941 	    sv = (SV*)av_fetch(av, i, FALSE);
942 	    if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
943 		do_chop(astr, sv);
944 	}
945         return;
946     }
947     else if (SvTYPE(sv) == SVt_PVHV) {
948         HV* hv = (HV*)sv;
949 	HE* entry;
950         (void)hv_iterinit(hv);
951         /*SUPPRESS 560*/
952         while ((entry = hv_iternext(hv)))
953             do_chop(astr,hv_iterval(hv,entry));
954         return;
955     }
956     else if (SvREADONLY(sv)) {
957         if (SvFAKE(sv)) {
958             /* SV is copy-on-write */
959 	    sv_force_normal_flags(sv, 0);
960         }
961         if (SvREADONLY(sv))
962             Perl_croak(aTHX_ PL_no_modify);
963     }
964     s = SvPV(sv, len);
965     if (len && !SvPOK(sv))
966 	s = SvPV_force(sv, len);
967     if (DO_UTF8(sv)) {
968 	if (s && len) {
969 	    char *send = s + len;
970 	    char *start = s;
971 	    s = send - 1;
972 	    while (s > start && UTF8_IS_CONTINUATION(*s))
973 		s--;
974 	    if (utf8_to_uvchr((U8*)s, 0)) {
975 		sv_setpvn(astr, s, send - s);
976 		*s = '\0';
977 		SvCUR_set(sv, s - start);
978 		SvNIOK_off(sv);
979 		SvUTF8_on(astr);
980 	    }
981 	}
982 	else
983 	    sv_setpvn(astr, "", 0);
984     }
985     else if (s && len) {
986 	s += --len;
987 	sv_setpvn(astr, s, 1);
988 	*s = '\0';
989 	SvCUR_set(sv, len);
990 	SvUTF8_off(sv);
991 	SvNIOK_off(sv);
992     }
993     else
994 	sv_setpvn(astr, "", 0);
995     SvSETMAGIC(sv);
996 }
997 
998 I32
999 Perl_do_chomp(pTHX_ register SV *sv)
1000 {
1001     register I32 count;
1002     STRLEN len;
1003     STRLEN n_a;
1004     char *s;
1005 
1006     if (RsSNARF(PL_rs))
1007 	return 0;
1008     if (RsRECORD(PL_rs))
1009       return 0;
1010     count = 0;
1011     if (SvTYPE(sv) == SVt_PVAV) {
1012 	register I32 i;
1013         I32 max;
1014 	AV* av = (AV*)sv;
1015         max = AvFILL(av);
1016         for (i = 0; i <= max; i++) {
1017 	    sv = (SV*)av_fetch(av, i, FALSE);
1018 	    if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
1019 		count += do_chomp(sv);
1020 	}
1021         return count;
1022     }
1023     else if (SvTYPE(sv) == SVt_PVHV) {
1024         HV* hv = (HV*)sv;
1025 	HE* entry;
1026         (void)hv_iterinit(hv);
1027         /*SUPPRESS 560*/
1028         while ((entry = hv_iternext(hv)))
1029             count += do_chomp(hv_iterval(hv,entry));
1030         return count;
1031     }
1032     else if (SvREADONLY(sv)) {
1033         if (SvFAKE(sv)) {
1034             /* SV is copy-on-write */
1035 	    sv_force_normal_flags(sv, 0);
1036         }
1037         if (SvREADONLY(sv))
1038             Perl_croak(aTHX_ PL_no_modify);
1039     }
1040     s = SvPV(sv, len);
1041     if (s && len) {
1042 	s += --len;
1043 	if (RsPARA(PL_rs)) {
1044 	    if (*s != '\n')
1045 		goto nope;
1046 	    ++count;
1047 	    while (len && s[-1] == '\n') {
1048 		--len;
1049 		--s;
1050 		++count;
1051 	    }
1052 	}
1053 	else {
1054 	    STRLEN rslen;
1055 	    char *rsptr = SvPV(PL_rs, rslen);
1056 	    if (rslen == 1) {
1057 		if (*s != *rsptr)
1058 		    goto nope;
1059 		++count;
1060 	    }
1061 	    else {
1062 		if (len < rslen - 1)
1063 		    goto nope;
1064 		len -= rslen - 1;
1065 		s -= rslen - 1;
1066 		if (memNE(s, rsptr, rslen))
1067 		    goto nope;
1068 		count += rslen;
1069 	    }
1070 	}
1071 	s = SvPV_force(sv, n_a);
1072 	SvCUR_set(sv, len);
1073 	*SvEND(sv) = '\0';
1074 	SvNIOK_off(sv);
1075 	SvSETMAGIC(sv);
1076     }
1077   nope:
1078     return count;
1079 }
1080 
1081 void
1082 Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
1083 {
1084 #ifdef LIBERAL
1085     register long *dl;
1086     register long *ll;
1087     register long *rl;
1088 #endif
1089     register char *dc;
1090     STRLEN leftlen;
1091     STRLEN rightlen;
1092     register char *lc;
1093     register char *rc;
1094     register I32 len;
1095     I32 lensave;
1096     char *lsave;
1097     char *rsave;
1098     bool left_utf = DO_UTF8(left);
1099     bool right_utf = DO_UTF8(right);
1100     I32 needlen = 0;
1101 
1102     if (left_utf && !right_utf)
1103 	sv_utf8_upgrade(right);
1104     else if (!left_utf && right_utf)
1105 	sv_utf8_upgrade(left);
1106 
1107     if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv)))
1108 	sv_setpvn(sv, "", 0);	/* avoid undef warning on |= and ^= */
1109     lsave = lc = SvPV(left, leftlen);
1110     rsave = rc = SvPV(right, rightlen);
1111     len = leftlen < rightlen ? leftlen : rightlen;
1112     lensave = len;
1113     if ((left_utf || right_utf) && (sv == left || sv == right)) {
1114 	needlen = optype == OP_BIT_AND ? len : leftlen + rightlen;
1115 	Newz(801, dc, needlen + 1, char);
1116     }
1117     else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
1118 	STRLEN n_a;
1119 	dc = SvPV_force(sv, n_a);
1120 	if (SvCUR(sv) < (STRLEN)len) {
1121 	    dc = SvGROW(sv, (STRLEN)(len + 1));
1122 	    (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1);
1123 	}
1124 	if (optype != OP_BIT_AND && (left_utf || right_utf))
1125 	    dc = SvGROW(sv, leftlen + rightlen + 1);
1126     }
1127     else {
1128 	needlen = ((optype == OP_BIT_AND)
1129 		    ? len : (leftlen > rightlen ? leftlen : rightlen));
1130 	Newz(801, dc, needlen + 1, char);
1131 	(void)sv_usepvn(sv, dc, needlen);
1132 	dc = SvPVX(sv);		/* sv_usepvn() calls Renew() */
1133     }
1134     SvCUR_set(sv, len);
1135     (void)SvPOK_only(sv);
1136     if (left_utf || right_utf) {
1137 	UV duc, luc, ruc;
1138 	char *dcsave = dc;
1139 	STRLEN lulen = leftlen;
1140 	STRLEN rulen = rightlen;
1141 	STRLEN ulen;
1142 
1143 	switch (optype) {
1144 	case OP_BIT_AND:
1145 	    while (lulen && rulen) {
1146 		luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1147 		lc += ulen;
1148 		lulen -= ulen;
1149 		ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1150 		rc += ulen;
1151 		rulen -= ulen;
1152 		duc = luc & ruc;
1153 		dc = (char*)uvchr_to_utf8((U8*)dc, duc);
1154 	    }
1155 	    if (sv == left || sv == right)
1156 		(void)sv_usepvn(sv, dcsave, needlen);
1157 	    SvCUR_set(sv, dc - dcsave);
1158 	    break;
1159 	case OP_BIT_XOR:
1160 	    while (lulen && rulen) {
1161 		luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1162 		lc += ulen;
1163 		lulen -= ulen;
1164 		ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1165 		rc += ulen;
1166 		rulen -= ulen;
1167 		duc = luc ^ ruc;
1168 		dc = (char*)uvchr_to_utf8((U8*)dc, duc);
1169 	    }
1170 	    goto mop_up_utf;
1171 	case OP_BIT_OR:
1172 	    while (lulen && rulen) {
1173 		luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1174 		lc += ulen;
1175 		lulen -= ulen;
1176 		ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1177 		rc += ulen;
1178 		rulen -= ulen;
1179 		duc = luc | ruc;
1180 		dc = (char*)uvchr_to_utf8((U8*)dc, duc);
1181 	    }
1182 	  mop_up_utf:
1183 	    if (sv == left || sv == right)
1184 		(void)sv_usepvn(sv, dcsave, needlen);
1185 	    SvCUR_set(sv, dc - dcsave);
1186 	    if (rulen)
1187 		sv_catpvn(sv, rc, rulen);
1188 	    else if (lulen)
1189 		sv_catpvn(sv, lc, lulen);
1190 	    else
1191 		*SvEND(sv) = '\0';
1192 	    break;
1193 	}
1194 	SvUTF8_on(sv);
1195 	goto finish;
1196     }
1197     else
1198 #ifdef LIBERAL
1199     if (len >= sizeof(long)*4 &&
1200 	!((long)dc % sizeof(long)) &&
1201 	!((long)lc % sizeof(long)) &&
1202 	!((long)rc % sizeof(long)))	/* It's almost always aligned... */
1203     {
1204 	I32 remainder = len % (sizeof(long)*4);
1205 	len /= (sizeof(long)*4);
1206 
1207 	dl = (long*)dc;
1208 	ll = (long*)lc;
1209 	rl = (long*)rc;
1210 
1211 	switch (optype) {
1212 	case OP_BIT_AND:
1213 	    while (len--) {
1214 		*dl++ = *ll++ & *rl++;
1215 		*dl++ = *ll++ & *rl++;
1216 		*dl++ = *ll++ & *rl++;
1217 		*dl++ = *ll++ & *rl++;
1218 	    }
1219 	    break;
1220 	case OP_BIT_XOR:
1221 	    while (len--) {
1222 		*dl++ = *ll++ ^ *rl++;
1223 		*dl++ = *ll++ ^ *rl++;
1224 		*dl++ = *ll++ ^ *rl++;
1225 		*dl++ = *ll++ ^ *rl++;
1226 	    }
1227 	    break;
1228 	case OP_BIT_OR:
1229 	    while (len--) {
1230 		*dl++ = *ll++ | *rl++;
1231 		*dl++ = *ll++ | *rl++;
1232 		*dl++ = *ll++ | *rl++;
1233 		*dl++ = *ll++ | *rl++;
1234 	    }
1235 	}
1236 
1237 	dc = (char*)dl;
1238 	lc = (char*)ll;
1239 	rc = (char*)rl;
1240 
1241 	len = remainder;
1242     }
1243 #endif
1244     {
1245 	switch (optype) {
1246 	case OP_BIT_AND:
1247 	    while (len--)
1248 		*dc++ = *lc++ & *rc++;
1249 	    break;
1250 	case OP_BIT_XOR:
1251 	    while (len--)
1252 		*dc++ = *lc++ ^ *rc++;
1253 	    goto mop_up;
1254 	case OP_BIT_OR:
1255 	    while (len--)
1256 		*dc++ = *lc++ | *rc++;
1257 	  mop_up:
1258 	    len = lensave;
1259 	    if (rightlen > (STRLEN)len)
1260 		sv_catpvn(sv, rsave + len, rightlen - len);
1261 	    else if (leftlen > (STRLEN)len)
1262 		sv_catpvn(sv, lsave + len, leftlen - len);
1263 	    else
1264 		*SvEND(sv) = '\0';
1265 	    break;
1266 	}
1267     }
1268 finish:
1269     SvTAINT(sv);
1270 }
1271 
1272 OP *
1273 Perl_do_kv(pTHX)
1274 {
1275     dSP;
1276     HV *hv = (HV*)POPs;
1277     HV *keys;
1278     register HE *entry;
1279     SV *tmpstr;
1280     I32 gimme = GIMME_V;
1281     I32 dokeys =   (PL_op->op_type == OP_KEYS);
1282     I32 dovalues = (PL_op->op_type == OP_VALUES);
1283     I32 realhv = (SvTYPE(hv) == SVt_PVHV);
1284 
1285     if (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV)
1286 	dokeys = dovalues = TRUE;
1287 
1288     if (!hv) {
1289 	if (PL_op->op_flags & OPf_MOD || LVRET) {	/* lvalue */
1290 	    dTARGET;		/* make sure to clear its target here */
1291 	    if (SvTYPE(TARG) == SVt_PVLV)
1292 		LvTARG(TARG) = Nullsv;
1293 	    PUSHs(TARG);
1294 	}
1295 	RETURN;
1296     }
1297 
1298     keys = realhv ? hv : avhv_keys((AV*)hv);
1299     (void)hv_iterinit(keys);	/* always reset iterator regardless */
1300 
1301     if (gimme == G_VOID)
1302 	RETURN;
1303 
1304     if (gimme == G_SCALAR) {
1305 	IV i;
1306 	dTARGET;
1307 
1308 	if (PL_op->op_flags & OPf_MOD || LVRET) {	/* lvalue */
1309 	    if (SvTYPE(TARG) < SVt_PVLV) {
1310 		sv_upgrade(TARG, SVt_PVLV);
1311 		sv_magic(TARG, Nullsv, PERL_MAGIC_nkeys, Nullch, 0);
1312 	    }
1313 	    LvTYPE(TARG) = 'k';
1314 	    if (LvTARG(TARG) != (SV*)keys) {
1315 		if (LvTARG(TARG))
1316 		    SvREFCNT_dec(LvTARG(TARG));
1317 		LvTARG(TARG) = SvREFCNT_inc(keys);
1318 	    }
1319 	    PUSHs(TARG);
1320 	    RETURN;
1321 	}
1322 
1323 	if (! SvTIED_mg((SV*)keys, PERL_MAGIC_tied))
1324 	    i = HvKEYS(keys);
1325 	else {
1326 	    i = 0;
1327 	    /*SUPPRESS 560*/
1328 	    while (hv_iternext(keys)) i++;
1329 	}
1330 	PUSHi( i );
1331 	RETURN;
1332     }
1333 
1334     EXTEND(SP, HvKEYS(keys) * (dokeys + dovalues));
1335 
1336     PUTBACK;	/* hv_iternext and hv_iterval might clobber stack_sp */
1337     while ((entry = hv_iternext(keys))) {
1338 	SPAGAIN;
1339 	if (dokeys) {
1340 	    SV* sv = hv_iterkeysv(entry);
1341 	    XPUSHs(sv);	/* won't clobber stack_sp */
1342 	}
1343 	if (dovalues) {
1344 	    PUTBACK;
1345 	    tmpstr = realhv ?
1346 		     hv_iterval(hv,entry) : avhv_iterval((AV*)hv,entry);
1347 	    DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu",
1348 			    (unsigned long)HeHASH(entry),
1349 			    HvMAX(keys)+1,
1350 			    (unsigned long)(HeHASH(entry) & HvMAX(keys))));
1351 	    SPAGAIN;
1352 	    XPUSHs(tmpstr);
1353 	}
1354 	PUTBACK;
1355     }
1356     return NORMAL;
1357 }
1358 
1359