1 /* doop.c 2 * 3 * Copyright (c) 1991-2001, Larry Wall 4 * 5 * You may distribute under the terms of either the GNU General Public 6 * License or the Artistic License, as specified in the README file. 7 * 8 */ 9 10 /* 11 * "'So that was the job I felt I had to do when I started,' thought Sam." 12 */ 13 14 #include "EXTERN.h" 15 #define PERL_IN_DOOP_C 16 #include "perl.h" 17 18 #if !defined(NSIG) || defined(M_UNIX) || defined(M_XENIX) 19 #include <signal.h> 20 #endif 21 22 STATIC I32 23 S_do_trans_simple(pTHX_ SV *sv) 24 { 25 U8 *s; 26 U8 *d; 27 U8 *send; 28 U8 *dstart; 29 I32 matches = 0; 30 I32 grows = PL_op->op_private & OPpTRANS_GROWS; 31 STRLEN len; 32 short *tbl; 33 I32 ch; 34 35 tbl = (short*)cPVOP->op_pv; 36 if (!tbl) 37 Perl_croak(aTHX_ "panic: do_trans_simple"); 38 39 s = (U8*)SvPV(sv, len); 40 send = s + len; 41 42 /* First, take care of non-UTF8 input strings, because they're easy */ 43 if (!SvUTF8(sv)) { 44 while (s < send) { 45 if ((ch = tbl[*s]) >= 0) { 46 matches++; 47 *s++ = ch; 48 } 49 else 50 s++; 51 } 52 SvSETMAGIC(sv); 53 return matches; 54 } 55 56 /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */ 57 if (grows) 58 New(0, d, len*2+1, U8); 59 else 60 d = s; 61 dstart = d; 62 while (s < send) { 63 STRLEN ulen; 64 UV c; 65 66 /* Need to check this, otherwise 128..255 won't match */ 67 c = utf8_to_uv(s, send - s, &ulen, 0); 68 if (c < 0x100 && (ch = tbl[c]) >= 0) { 69 matches++; 70 if (UTF8_IS_ASCII(ch)) 71 *d++ = ch; 72 else 73 d = uv_to_utf8(d,ch); 74 s += ulen; 75 } 76 else { /* No match -> copy */ 77 Copy(s, d, ulen, U8); 78 d += ulen; 79 s += ulen; 80 } 81 } 82 if (grows) { 83 sv_setpvn(sv, (char*)dstart, d - dstart); 84 Safefree(dstart); 85 } 86 else { 87 *d = '\0'; 88 SvCUR_set(sv, d - dstart); 89 } 90 SvUTF8_on(sv); 91 SvSETMAGIC(sv); 92 return matches; 93 } 94 95 STATIC I32 96 S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */ 97 { 98 U8 *s; 99 U8 *send; 100 I32 matches = 0; 101 STRLEN len; 102 short *tbl; 103 104 tbl = (short*)cPVOP->op_pv; 105 if (!tbl) 106 Perl_croak(aTHX_ "panic: do_trans_count"); 107 108 s = (U8*)SvPV(sv, len); 109 send = s + len; 110 111 if (!SvUTF8(sv)) 112 while (s < send) { 113 if (tbl[*s++] >= 0) 114 matches++; 115 } 116 else 117 while (s < send) { 118 UV c; 119 STRLEN ulen; 120 c = utf8_to_uv(s, send - s, &ulen, 0); 121 if (c < 0x100 && tbl[c] >= 0) 122 matches++; 123 s += ulen; 124 } 125 126 return matches; 127 } 128 129 STATIC I32 130 S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */ 131 { 132 U8 *s; 133 U8 *send; 134 U8 *d; 135 U8 *dstart; 136 I32 isutf8; 137 I32 matches = 0; 138 I32 grows = PL_op->op_private & OPpTRANS_GROWS; 139 STRLEN len; 140 short *tbl; 141 I32 ch; 142 143 tbl = (short*)cPVOP->op_pv; 144 if (!tbl) 145 Perl_croak(aTHX_ "panic: do_trans_complex"); 146 147 s = (U8*)SvPV(sv, len); 148 isutf8 = SvUTF8(sv); 149 send = s + len; 150 151 if (!isutf8) { 152 dstart = d = s; 153 if (PL_op->op_private & OPpTRANS_SQUASH) { 154 U8* p = send; 155 while (s < send) { 156 if ((ch = tbl[*s]) >= 0) { 157 *d = ch; 158 matches++; 159 if (p != d - 1 || *p != *d) 160 p = d++; 161 } 162 else if (ch == -1) /* -1 is unmapped character */ 163 *d++ = *s; 164 else if (ch == -2) /* -2 is delete character */ 165 matches++; 166 s++; 167 } 168 } 169 else { 170 while (s < send) { 171 if ((ch = tbl[*s]) >= 0) { 172 matches++; 173 *d++ = ch; 174 } 175 else if (ch == -1) /* -1 is unmapped character */ 176 *d++ = *s; 177 else if (ch == -2) /* -2 is delete character */ 178 matches++; 179 s++; 180 } 181 } 182 *d = '\0'; 183 SvCUR_set(sv, d - dstart); 184 } 185 else { /* isutf8 */ 186 if (grows) 187 New(0, d, len*2+1, U8); 188 else 189 d = s; 190 dstart = d; 191 192 #ifdef MACOS_TRADITIONAL 193 #define comp CoMP /* "comp" is a keyword in some compilers ... */ 194 #endif 195 196 if (PL_op->op_private & OPpTRANS_SQUASH) { 197 U8* p = send; 198 UV pch = 0xfeedface; 199 while (s < send) { 200 STRLEN len; 201 UV comp = utf8_to_uv_simple(s, &len); 202 203 if (comp > 0xff) { /* always unmapped */ 204 Copy(s, d, len, U8); 205 d += len; 206 } 207 else if ((ch = tbl[comp]) >= 0) { 208 matches++; 209 if (ch != pch) { 210 d = uv_to_utf8(d, ch); 211 pch = ch; 212 } 213 s += len; 214 continue; 215 } 216 else if (ch == -1) { /* -1 is unmapped character */ 217 Copy(s, d, len, U8); 218 d += len; 219 } 220 else if (ch == -2) /* -2 is delete character */ 221 matches++; 222 s += len; 223 pch = 0xfeedface; 224 } 225 } 226 else { 227 while (s < send) { 228 STRLEN len; 229 UV comp = utf8_to_uv_simple(s, &len); 230 if (comp > 0xff) { /* always unmapped */ 231 Copy(s, d, len, U8); 232 d += len; 233 } 234 else if ((ch = tbl[comp]) >= 0) { 235 d = uv_to_utf8(d, ch); 236 matches++; 237 } 238 else if (ch == -1) { /* -1 is unmapped character */ 239 Copy(s, d, len, U8); 240 d += len; 241 } 242 else if (ch == -2) /* -2 is delete character */ 243 matches++; 244 s += len; 245 } 246 } 247 if (grows) { 248 sv_setpvn(sv, (char*)dstart, d - dstart); 249 Safefree(dstart); 250 } 251 else { 252 *d = '\0'; 253 SvCUR_set(sv, d - dstart); 254 } 255 SvUTF8_on(sv); 256 } 257 SvSETMAGIC(sv); 258 return matches; 259 } 260 261 STATIC I32 262 S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ 263 { 264 U8 *s; 265 U8 *send; 266 U8 *d; 267 U8 *start; 268 U8 *dstart, *dend; 269 I32 matches = 0; 270 I32 grows = PL_op->op_private & OPpTRANS_GROWS; 271 STRLEN len; 272 273 SV* rv = (SV*)cSVOP->op_sv; 274 HV* hv = (HV*)SvRV(rv); 275 SV** svp = hv_fetch(hv, "NONE", 4, FALSE); 276 UV none = svp ? SvUV(*svp) : 0x7fffffff; 277 UV extra = none + 1; 278 UV final; 279 UV uv; 280 I32 isutf8; 281 U8 hibit = 0; 282 283 s = (U8*)SvPV(sv, len); 284 isutf8 = SvUTF8(sv); 285 if (!isutf8) { 286 U8 *t = s, *e = s + len; 287 while (t < e) 288 if ((hibit = UTF8_IS_CONTINUED(*t++))) 289 break; 290 if (hibit) 291 s = bytes_to_utf8(s, &len); 292 } 293 send = s + len; 294 start = s; 295 296 svp = hv_fetch(hv, "FINAL", 5, FALSE); 297 if (svp) 298 final = SvUV(*svp); 299 300 if (grows) { 301 /* d needs to be bigger than s, in case e.g. upgrading is required */ 302 New(0, d, len*3+UTF8_MAXLEN, U8); 303 dend = d + len * 3; 304 dstart = d; 305 } 306 else { 307 dstart = d = s; 308 dend = d + len; 309 } 310 311 while (s < send) { 312 if ((uv = swash_fetch(rv, s)) < none) { 313 s += UTF8SKIP(s); 314 matches++; 315 d = uv_to_utf8(d, uv); 316 } 317 else if (uv == none) { 318 int i = UTF8SKIP(s); 319 Copy(s, d, i, U8); 320 d += i; 321 s += i; 322 } 323 else if (uv == extra) { 324 int i = UTF8SKIP(s); 325 s += i; 326 matches++; 327 d = uv_to_utf8(d, final); 328 } 329 else 330 s += UTF8SKIP(s); 331 332 if (d > dend) { 333 STRLEN clen = d - dstart; 334 STRLEN nlen = dend - dstart + len + UTF8_MAXLEN; 335 if (!grows) 336 Perl_croak(aTHX_ "panic: do_trans_complex_utf8"); 337 Renew(dstart, nlen+UTF8_MAXLEN, U8); 338 d = dstart + clen; 339 dend = dstart + nlen; 340 } 341 } 342 if (grows || hibit) { 343 sv_setpvn(sv, (char*)dstart, d - dstart); 344 Safefree(dstart); 345 if (grows && hibit) 346 Safefree(start); 347 } 348 else { 349 *d = '\0'; 350 SvCUR_set(sv, d - dstart); 351 } 352 SvSETMAGIC(sv); 353 SvUTF8_on(sv); 354 if (!isutf8 && !(PL_hints & HINT_UTF8)) 355 sv_utf8_downgrade(sv, TRUE); 356 357 return matches; 358 } 359 360 STATIC I32 361 S_do_trans_count_utf8(pTHX_ SV *sv)/* SPC - OK */ 362 { 363 U8 *s; 364 U8 *start, *send; 365 I32 matches = 0; 366 STRLEN len; 367 368 SV* rv = (SV*)cSVOP->op_sv; 369 HV* hv = (HV*)SvRV(rv); 370 SV** svp = hv_fetch(hv, "NONE", 4, FALSE); 371 UV none = svp ? SvUV(*svp) : 0x7fffffff; 372 UV uv; 373 U8 hibit = 0; 374 375 s = (U8*)SvPV(sv, len); 376 if (!SvUTF8(sv)) { 377 U8 *t = s, *e = s + len; 378 while (t < e) 379 if ((hibit = !UTF8_IS_ASCII(*t++))) 380 break; 381 if (hibit) 382 start = s = bytes_to_utf8(s, &len); 383 } 384 send = s + len; 385 386 while (s < send) { 387 if ((uv = swash_fetch(rv, s)) < none) 388 matches++; 389 s += UTF8SKIP(s); 390 } 391 if (hibit) 392 Safefree(start); 393 394 return matches; 395 } 396 397 STATIC I32 398 S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ 399 { 400 U8 *s; 401 U8 *start, *send; 402 U8 *d; 403 I32 matches = 0; 404 I32 squash = PL_op->op_private & OPpTRANS_SQUASH; 405 I32 del = PL_op->op_private & OPpTRANS_DELETE; 406 I32 grows = PL_op->op_private & OPpTRANS_GROWS; 407 SV* rv = (SV*)cSVOP->op_sv; 408 HV* hv = (HV*)SvRV(rv); 409 SV** svp = hv_fetch(hv, "NONE", 4, FALSE); 410 UV none = svp ? SvUV(*svp) : 0x7fffffff; 411 UV extra = none + 1; 412 UV final; 413 UV uv; 414 STRLEN len; 415 U8 *dstart, *dend; 416 I32 isutf8; 417 U8 hibit = 0; 418 419 s = (U8*)SvPV(sv, len); 420 isutf8 = SvUTF8(sv); 421 if (!isutf8) { 422 U8 *t = s, *e = s + len; 423 while (t < e) 424 if ((hibit = !UTF8_IS_ASCII(*t++))) 425 break; 426 if (hibit) 427 s = bytes_to_utf8(s, &len); 428 } 429 send = s + len; 430 start = s; 431 432 svp = hv_fetch(hv, "FINAL", 5, FALSE); 433 if (svp) 434 final = SvUV(*svp); 435 436 if (grows) { 437 /* d needs to be bigger than s, in case e.g. upgrading is required */ 438 New(0, d, len*3+UTF8_MAXLEN, U8); 439 dend = d + len * 3; 440 dstart = d; 441 } 442 else { 443 dstart = d = s; 444 dend = d + len; 445 } 446 447 if (squash) { 448 UV puv = 0xfeedface; 449 while (s < send) { 450 uv = swash_fetch(rv, s); 451 452 if (d > dend) { 453 STRLEN clen = d - dstart; 454 STRLEN nlen = dend - dstart + len + UTF8_MAXLEN; 455 if (!grows) 456 Perl_croak(aTHX_ "panic: do_trans_complex_utf8"); 457 Renew(dstart, nlen+UTF8_MAXLEN, U8); 458 d = dstart + clen; 459 dend = dstart + nlen; 460 } 461 if (uv < none) { 462 matches++; 463 if (uv != puv) { 464 d = uv_to_utf8(d, uv); 465 puv = uv; 466 } 467 s += UTF8SKIP(s); 468 continue; 469 } 470 else if (uv == none) { /* "none" is unmapped character */ 471 int i = UTF8SKIP(s); 472 Copy(s, d, i, U8); 473 d += i; 474 s += i; 475 puv = 0xfeedface; 476 continue; 477 } 478 else if (uv == extra && !del) { 479 matches++; 480 if (uv != puv) { 481 d = uv_to_utf8(d, final); 482 puv = final; 483 } 484 s += UTF8SKIP(s); 485 continue; 486 } 487 matches++; /* "none+1" is delete character */ 488 s += UTF8SKIP(s); 489 } 490 } 491 else { 492 while (s < send) { 493 uv = swash_fetch(rv, s); 494 if (d > dend) { 495 STRLEN clen = d - dstart; 496 STRLEN nlen = dend - dstart + len + UTF8_MAXLEN; 497 if (!grows) 498 Perl_croak(aTHX_ "panic: do_trans_complex_utf8"); 499 Renew(dstart, nlen+UTF8_MAXLEN, U8); 500 d = dstart + clen; 501 dend = dstart + nlen; 502 } 503 if (uv < none) { 504 matches++; 505 d = uv_to_utf8(d, uv); 506 s += UTF8SKIP(s); 507 continue; 508 } 509 else if (uv == none) { /* "none" is unmapped character */ 510 int i = UTF8SKIP(s); 511 Copy(s, d, i, U8); 512 d += i; 513 s += i; 514 continue; 515 } 516 else if (uv == extra && !del) { 517 matches++; 518 d = uv_to_utf8(d, final); 519 s += UTF8SKIP(s); 520 continue; 521 } 522 matches++; /* "none+1" is delete character */ 523 s += UTF8SKIP(s); 524 } 525 } 526 if (grows || hibit) { 527 sv_setpvn(sv, (char*)dstart, d - dstart); 528 Safefree(dstart); 529 if (grows && hibit) 530 Safefree(start); 531 } 532 else { 533 *d = '\0'; 534 SvCUR_set(sv, d - dstart); 535 } 536 SvUTF8_on(sv); 537 if (!isutf8 && !(PL_hints & HINT_UTF8)) 538 sv_utf8_downgrade(sv, TRUE); 539 SvSETMAGIC(sv); 540 541 return matches; 542 } 543 544 I32 545 Perl_do_trans(pTHX_ SV *sv) 546 { 547 STRLEN len; 548 I32 hasutf = (PL_op->op_private & 549 (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)); 550 551 if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL)) 552 Perl_croak(aTHX_ PL_no_modify); 553 554 (void)SvPV(sv, len); 555 if (!len) 556 return 0; 557 if (!SvPOKp(sv)) 558 (void)SvPV_force(sv, len); 559 if (!(PL_op->op_private & OPpTRANS_IDENTICAL)) 560 (void)SvPOK_only_UTF8(sv); 561 562 DEBUG_t( Perl_deb(aTHX_ "2.TBL\n")); 563 564 switch (PL_op->op_private & ~hasutf & 63) { 565 case 0: 566 if (hasutf) 567 return do_trans_simple_utf8(sv); 568 else 569 return do_trans_simple(sv); 570 571 case OPpTRANS_IDENTICAL: 572 if (hasutf) 573 return do_trans_count_utf8(sv); 574 else 575 return do_trans_count(sv); 576 577 default: 578 if (hasutf) 579 return do_trans_complex_utf8(sv); 580 else 581 return do_trans_complex(sv); 582 } 583 } 584 585 void 586 Perl_do_join(pTHX_ register SV *sv, SV *del, register SV **mark, register SV **sp) 587 { 588 SV **oldmark = mark; 589 register I32 items = sp - mark; 590 register STRLEN len; 591 STRLEN delimlen; 592 register char *delim = SvPV(del, delimlen); 593 STRLEN tmplen; 594 595 mark++; 596 len = (items > 0 ? (delimlen * (items - 1) ) : 0); 597 (void)SvUPGRADE(sv, SVt_PV); 598 if (SvLEN(sv) < len + items) { /* current length is way too short */ 599 while (items-- > 0) { 600 if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) { 601 SvPV(*mark, tmplen); 602 len += tmplen; 603 } 604 mark++; 605 } 606 SvGROW(sv, len + 1); /* so try to pre-extend */ 607 608 mark = oldmark; 609 items = sp - mark; 610 ++mark; 611 } 612 613 if (items-- > 0) { 614 sv_setpv(sv, ""); 615 if (*mark) 616 sv_catsv(sv, *mark); 617 mark++; 618 } 619 else 620 sv_setpv(sv,""); 621 if (delimlen) { 622 for (; items > 0; items--,mark++) { 623 sv_catsv(sv,del); 624 sv_catsv(sv,*mark); 625 } 626 } 627 else { 628 for (; items > 0; items--,mark++) 629 sv_catsv(sv,*mark); 630 } 631 SvSETMAGIC(sv); 632 } 633 634 void 635 Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg) 636 { 637 STRLEN patlen; 638 char *pat = SvPV(*sarg, patlen); 639 bool do_taint = FALSE; 640 641 sv_vsetpvfn(sv, pat, patlen, Null(va_list*), sarg + 1, len - 1, &do_taint); 642 SvSETMAGIC(sv); 643 if (do_taint) 644 SvTAINTED_on(sv); 645 } 646 647 /* currently converts input to bytes if possible, but doesn't sweat failure */ 648 UV 649 Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size) 650 { 651 STRLEN srclen, len; 652 unsigned char *s = (unsigned char *) SvPV(sv, srclen); 653 UV retnum = 0; 654 655 if (offset < 0) 656 return retnum; 657 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ 658 Perl_croak(aTHX_ "Illegal number of bits in vec"); 659 660 if (SvUTF8(sv)) 661 (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE); 662 663 offset *= size; /* turn into bit offset */ 664 len = (offset + size + 7) / 8; /* required number of bytes */ 665 if (len > srclen) { 666 if (size <= 8) 667 retnum = 0; 668 else { 669 offset >>= 3; /* turn into byte offset */ 670 if (size == 16) { 671 if (offset >= srclen) 672 retnum = 0; 673 else 674 retnum = (UV) s[offset] << 8; 675 } 676 else if (size == 32) { 677 if (offset >= srclen) 678 retnum = 0; 679 else if (offset + 1 >= srclen) 680 retnum = 681 ((UV) s[offset ] << 24); 682 else if (offset + 2 >= srclen) 683 retnum = 684 ((UV) s[offset ] << 24) + 685 ((UV) s[offset + 1] << 16); 686 else 687 retnum = 688 ((UV) s[offset ] << 24) + 689 ((UV) s[offset + 1] << 16) + 690 ( s[offset + 2] << 8); 691 } 692 #ifdef UV_IS_QUAD 693 else if (size == 64) { 694 if (ckWARN(WARN_PORTABLE)) 695 Perl_warner(aTHX_ WARN_PORTABLE, 696 "Bit vector size > 32 non-portable"); 697 if (offset >= srclen) 698 retnum = 0; 699 else if (offset + 1 >= srclen) 700 retnum = 701 (UV) s[offset ] << 56; 702 else if (offset + 2 >= srclen) 703 retnum = 704 ((UV) s[offset ] << 56) + 705 ((UV) s[offset + 1] << 48); 706 else if (offset + 3 >= srclen) 707 retnum = 708 ((UV) s[offset ] << 56) + 709 ((UV) s[offset + 1] << 48) + 710 ((UV) s[offset + 2] << 40); 711 else if (offset + 4 >= srclen) 712 retnum = 713 ((UV) s[offset ] << 56) + 714 ((UV) s[offset + 1] << 48) + 715 ((UV) s[offset + 2] << 40) + 716 ((UV) s[offset + 3] << 32); 717 else if (offset + 5 >= srclen) 718 retnum = 719 ((UV) s[offset ] << 56) + 720 ((UV) s[offset + 1] << 48) + 721 ((UV) s[offset + 2] << 40) + 722 ((UV) s[offset + 3] << 32) + 723 ( s[offset + 4] << 24); 724 else if (offset + 6 >= srclen) 725 retnum = 726 ((UV) s[offset ] << 56) + 727 ((UV) s[offset + 1] << 48) + 728 ((UV) s[offset + 2] << 40) + 729 ((UV) s[offset + 3] << 32) + 730 ((UV) s[offset + 4] << 24) + 731 ((UV) s[offset + 5] << 16); 732 else 733 retnum = 734 ((UV) s[offset ] << 56) + 735 ((UV) s[offset + 1] << 48) + 736 ((UV) s[offset + 2] << 40) + 737 ((UV) s[offset + 3] << 32) + 738 ((UV) s[offset + 4] << 24) + 739 ((UV) s[offset + 5] << 16) + 740 ( s[offset + 6] << 8); 741 } 742 #endif 743 } 744 } 745 else if (size < 8) 746 retnum = (s[offset >> 3] >> (offset & 7)) & ((1 << size) - 1); 747 else { 748 offset >>= 3; /* turn into byte offset */ 749 if (size == 8) 750 retnum = s[offset]; 751 else if (size == 16) 752 retnum = 753 ((UV) s[offset] << 8) + 754 s[offset + 1]; 755 else if (size == 32) 756 retnum = 757 ((UV) s[offset ] << 24) + 758 ((UV) s[offset + 1] << 16) + 759 ( s[offset + 2] << 8) + 760 s[offset + 3]; 761 #ifdef UV_IS_QUAD 762 else if (size == 64) { 763 if (ckWARN(WARN_PORTABLE)) 764 Perl_warner(aTHX_ WARN_PORTABLE, 765 "Bit vector size > 32 non-portable"); 766 retnum = 767 ((UV) s[offset ] << 56) + 768 ((UV) s[offset + 1] << 48) + 769 ((UV) s[offset + 2] << 40) + 770 ((UV) s[offset + 3] << 32) + 771 ((UV) s[offset + 4] << 24) + 772 ((UV) s[offset + 5] << 16) + 773 ( s[offset + 6] << 8) + 774 s[offset + 7]; 775 } 776 #endif 777 } 778 779 return retnum; 780 } 781 782 /* currently converts input to bytes if possible but doesn't sweat failures, 783 * although it does ensure that the string it clobbers is not marked as 784 * utf8-valid any more 785 */ 786 void 787 Perl_do_vecset(pTHX_ SV *sv) 788 { 789 SV *targ = LvTARG(sv); 790 register I32 offset; 791 register I32 size; 792 register unsigned char *s; 793 register UV lval; 794 I32 mask; 795 STRLEN targlen; 796 STRLEN len; 797 798 if (!targ) 799 return; 800 s = (unsigned char*)SvPV_force(targ, targlen); 801 if (SvUTF8(targ)) { 802 /* This is handled by the SvPOK_only below... 803 if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE)) 804 SvUTF8_off(targ); 805 */ 806 (void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE); 807 } 808 809 (void)SvPOK_only(targ); 810 lval = SvUV(sv); 811 offset = LvTARGOFF(sv); 812 if (offset < 0) 813 Perl_croak(aTHX_ "Assigning to negative offset in vec"); 814 size = LvTARGLEN(sv); 815 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ 816 Perl_croak(aTHX_ "Illegal number of bits in vec"); 817 818 offset *= size; /* turn into bit offset */ 819 len = (offset + size + 7) / 8; /* required number of bytes */ 820 if (len > targlen) { 821 s = (unsigned char*)SvGROW(targ, len + 1); 822 (void)memzero(s + targlen, len - targlen + 1); 823 SvCUR_set(targ, len); 824 } 825 826 if (size < 8) { 827 mask = (1 << size) - 1; 828 size = offset & 7; 829 lval &= mask; 830 offset >>= 3; /* turn into byte offset */ 831 s[offset] &= ~(mask << size); 832 s[offset] |= lval << size; 833 } 834 else { 835 offset >>= 3; /* turn into byte offset */ 836 if (size == 8) 837 s[offset ] = lval & 0xff; 838 else if (size == 16) { 839 s[offset ] = (lval >> 8) & 0xff; 840 s[offset+1] = lval & 0xff; 841 } 842 else if (size == 32) { 843 s[offset ] = (lval >> 24) & 0xff; 844 s[offset+1] = (lval >> 16) & 0xff; 845 s[offset+2] = (lval >> 8) & 0xff; 846 s[offset+3] = lval & 0xff; 847 } 848 #ifdef UV_IS_QUAD 849 else if (size == 64) { 850 if (ckWARN(WARN_PORTABLE)) 851 Perl_warner(aTHX_ WARN_PORTABLE, 852 "Bit vector size > 32 non-portable"); 853 s[offset ] = (lval >> 56) & 0xff; 854 s[offset+1] = (lval >> 48) & 0xff; 855 s[offset+2] = (lval >> 40) & 0xff; 856 s[offset+3] = (lval >> 32) & 0xff; 857 s[offset+4] = (lval >> 24) & 0xff; 858 s[offset+5] = (lval >> 16) & 0xff; 859 s[offset+6] = (lval >> 8) & 0xff; 860 s[offset+7] = lval & 0xff; 861 } 862 #endif 863 } 864 SvSETMAGIC(targ); 865 } 866 867 void 868 Perl_do_chop(pTHX_ register SV *astr, register SV *sv) 869 { 870 STRLEN len; 871 char *s; 872 873 if (SvTYPE(sv) == SVt_PVAV) { 874 register I32 i; 875 I32 max; 876 AV* av = (AV*)sv; 877 max = AvFILL(av); 878 for (i = 0; i <= max; i++) { 879 sv = (SV*)av_fetch(av, i, FALSE); 880 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef)) 881 do_chop(astr, sv); 882 } 883 return; 884 } 885 else if (SvTYPE(sv) == SVt_PVHV) { 886 HV* hv = (HV*)sv; 887 HE* entry; 888 (void)hv_iterinit(hv); 889 /*SUPPRESS 560*/ 890 while ((entry = hv_iternext(hv))) 891 do_chop(astr,hv_iterval(hv,entry)); 892 return; 893 } 894 else if (SvREADONLY(sv)) 895 Perl_croak(aTHX_ PL_no_modify); 896 s = SvPV(sv, len); 897 if (len && !SvPOK(sv)) 898 s = SvPV_force(sv, len); 899 if (DO_UTF8(sv)) { 900 if (s && len) { 901 char *send = s + len; 902 char *start = s; 903 s = send - 1; 904 while (s > start && UTF8_IS_CONTINUATION(*s)) 905 s--; 906 if (utf8_to_uv_simple((U8*)s, 0)) { 907 sv_setpvn(astr, s, send - s); 908 *s = '\0'; 909 SvCUR_set(sv, s - start); 910 SvNIOK_off(sv); 911 SvUTF8_on(astr); 912 } 913 } 914 else 915 sv_setpvn(astr, "", 0); 916 } 917 else if (s && len) { 918 s += --len; 919 sv_setpvn(astr, s, 1); 920 *s = '\0'; 921 SvCUR_set(sv, len); 922 SvUTF8_off(sv); 923 SvNIOK_off(sv); 924 } 925 else 926 sv_setpvn(astr, "", 0); 927 SvSETMAGIC(sv); 928 } 929 930 I32 931 Perl_do_chomp(pTHX_ register SV *sv) 932 { 933 register I32 count; 934 STRLEN len; 935 char *s; 936 937 if (RsSNARF(PL_rs)) 938 return 0; 939 if (RsRECORD(PL_rs)) 940 return 0; 941 count = 0; 942 if (SvTYPE(sv) == SVt_PVAV) { 943 register I32 i; 944 I32 max; 945 AV* av = (AV*)sv; 946 max = AvFILL(av); 947 for (i = 0; i <= max; i++) { 948 sv = (SV*)av_fetch(av, i, FALSE); 949 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef)) 950 count += do_chomp(sv); 951 } 952 return count; 953 } 954 else if (SvTYPE(sv) == SVt_PVHV) { 955 HV* hv = (HV*)sv; 956 HE* entry; 957 (void)hv_iterinit(hv); 958 /*SUPPRESS 560*/ 959 while ((entry = hv_iternext(hv))) 960 count += do_chomp(hv_iterval(hv,entry)); 961 return count; 962 } 963 else if (SvREADONLY(sv)) 964 Perl_croak(aTHX_ PL_no_modify); 965 s = SvPV(sv, len); 966 if (len && !SvPOKp(sv)) 967 s = SvPV_force(sv, len); 968 if (s && len) { 969 s += --len; 970 if (RsPARA(PL_rs)) { 971 if (*s != '\n') 972 goto nope; 973 ++count; 974 while (len && s[-1] == '\n') { 975 --len; 976 --s; 977 ++count; 978 } 979 } 980 else { 981 STRLEN rslen; 982 char *rsptr = SvPV(PL_rs, rslen); 983 if (rslen == 1) { 984 if (*s != *rsptr) 985 goto nope; 986 ++count; 987 } 988 else { 989 if (len < rslen - 1) 990 goto nope; 991 len -= rslen - 1; 992 s -= rslen - 1; 993 if (memNE(s, rsptr, rslen)) 994 goto nope; 995 count += rslen; 996 } 997 } 998 *s = '\0'; 999 SvCUR_set(sv, len); 1000 SvNIOK_off(sv); 1001 } 1002 nope: 1003 SvSETMAGIC(sv); 1004 return count; 1005 } 1006 1007 void 1008 Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) 1009 { 1010 #ifdef LIBERAL 1011 register long *dl; 1012 register long *ll; 1013 register long *rl; 1014 #endif 1015 register char *dc; 1016 STRLEN leftlen; 1017 STRLEN rightlen; 1018 register char *lc; 1019 register char *rc; 1020 register I32 len; 1021 I32 lensave; 1022 char *lsave; 1023 char *rsave; 1024 bool left_utf = DO_UTF8(left); 1025 bool right_utf = DO_UTF8(right); 1026 I32 needlen; 1027 1028 if (left_utf && !right_utf) 1029 sv_utf8_upgrade(right); 1030 else if (!left_utf && right_utf) 1031 sv_utf8_upgrade(left); 1032 1033 if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv))) 1034 sv_setpvn(sv, "", 0); /* avoid undef warning on |= and ^= */ 1035 lsave = lc = SvPV(left, leftlen); 1036 rsave = rc = SvPV(right, rightlen); 1037 len = leftlen < rightlen ? leftlen : rightlen; 1038 lensave = len; 1039 if ((left_utf || right_utf) && (sv == left || sv == right)) { 1040 needlen = optype == OP_BIT_AND ? len : leftlen + rightlen; 1041 Newz(801, dc, needlen + 1, char); 1042 } 1043 else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) { 1044 STRLEN n_a; 1045 dc = SvPV_force(sv, n_a); 1046 if (SvCUR(sv) < len) { 1047 dc = SvGROW(sv, len + 1); 1048 (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1); 1049 } 1050 if (optype != OP_BIT_AND && (left_utf || right_utf)) 1051 dc = SvGROW(sv, leftlen + rightlen + 1); 1052 } 1053 else { 1054 needlen = ((optype == OP_BIT_AND) 1055 ? len : (leftlen > rightlen ? leftlen : rightlen)); 1056 Newz(801, dc, needlen + 1, char); 1057 (void)sv_usepvn(sv, dc, needlen); 1058 dc = SvPVX(sv); /* sv_usepvn() calls Renew() */ 1059 } 1060 SvCUR_set(sv, len); 1061 (void)SvPOK_only(sv); 1062 if (left_utf || right_utf) { 1063 UV duc, luc, ruc; 1064 char *dcsave = dc; 1065 STRLEN lulen = leftlen; 1066 STRLEN rulen = rightlen; 1067 STRLEN ulen; 1068 1069 switch (optype) { 1070 case OP_BIT_AND: 1071 while (lulen && rulen) { 1072 luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); 1073 lc += ulen; 1074 lulen -= ulen; 1075 ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); 1076 rc += ulen; 1077 rulen -= ulen; 1078 duc = luc & ruc; 1079 dc = (char*)uv_to_utf8((U8*)dc, duc); 1080 } 1081 if (sv == left || sv == right) 1082 (void)sv_usepvn(sv, dcsave, needlen); 1083 SvCUR_set(sv, dc - dcsave); 1084 break; 1085 case OP_BIT_XOR: 1086 while (lulen && rulen) { 1087 luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); 1088 lc += ulen; 1089 lulen -= ulen; 1090 ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); 1091 rc += ulen; 1092 rulen -= ulen; 1093 duc = luc ^ ruc; 1094 dc = (char*)uv_to_utf8((U8*)dc, duc); 1095 } 1096 goto mop_up_utf; 1097 case OP_BIT_OR: 1098 while (lulen && rulen) { 1099 luc = utf8_to_uv((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); 1100 lc += ulen; 1101 lulen -= ulen; 1102 ruc = utf8_to_uv((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); 1103 rc += ulen; 1104 rulen -= ulen; 1105 duc = luc | ruc; 1106 dc = (char*)uv_to_utf8((U8*)dc, duc); 1107 } 1108 mop_up_utf: 1109 if (sv == left || sv == right) 1110 (void)sv_usepvn(sv, dcsave, needlen); 1111 SvCUR_set(sv, dc - dcsave); 1112 if (rulen) 1113 sv_catpvn(sv, rc, rulen); 1114 else if (lulen) 1115 sv_catpvn(sv, lc, lulen); 1116 else 1117 *SvEND(sv) = '\0'; 1118 break; 1119 } 1120 SvUTF8_on(sv); 1121 goto finish; 1122 } 1123 else 1124 #ifdef LIBERAL 1125 if (len >= sizeof(long)*4 && 1126 !((long)dc % sizeof(long)) && 1127 !((long)lc % sizeof(long)) && 1128 !((long)rc % sizeof(long))) /* It's almost always aligned... */ 1129 { 1130 I32 remainder = len % (sizeof(long)*4); 1131 len /= (sizeof(long)*4); 1132 1133 dl = (long*)dc; 1134 ll = (long*)lc; 1135 rl = (long*)rc; 1136 1137 switch (optype) { 1138 case OP_BIT_AND: 1139 while (len--) { 1140 *dl++ = *ll++ & *rl++; 1141 *dl++ = *ll++ & *rl++; 1142 *dl++ = *ll++ & *rl++; 1143 *dl++ = *ll++ & *rl++; 1144 } 1145 break; 1146 case OP_BIT_XOR: 1147 while (len--) { 1148 *dl++ = *ll++ ^ *rl++; 1149 *dl++ = *ll++ ^ *rl++; 1150 *dl++ = *ll++ ^ *rl++; 1151 *dl++ = *ll++ ^ *rl++; 1152 } 1153 break; 1154 case OP_BIT_OR: 1155 while (len--) { 1156 *dl++ = *ll++ | *rl++; 1157 *dl++ = *ll++ | *rl++; 1158 *dl++ = *ll++ | *rl++; 1159 *dl++ = *ll++ | *rl++; 1160 } 1161 } 1162 1163 dc = (char*)dl; 1164 lc = (char*)ll; 1165 rc = (char*)rl; 1166 1167 len = remainder; 1168 } 1169 #endif 1170 { 1171 switch (optype) { 1172 case OP_BIT_AND: 1173 while (len--) 1174 *dc++ = *lc++ & *rc++; 1175 break; 1176 case OP_BIT_XOR: 1177 while (len--) 1178 *dc++ = *lc++ ^ *rc++; 1179 goto mop_up; 1180 case OP_BIT_OR: 1181 while (len--) 1182 *dc++ = *lc++ | *rc++; 1183 mop_up: 1184 len = lensave; 1185 if (rightlen > len) 1186 sv_catpvn(sv, rsave + len, rightlen - len); 1187 else if (leftlen > len) 1188 sv_catpvn(sv, lsave + len, leftlen - len); 1189 else 1190 *SvEND(sv) = '\0'; 1191 break; 1192 } 1193 } 1194 finish: 1195 SvTAINT(sv); 1196 } 1197 1198 OP * 1199 Perl_do_kv(pTHX) 1200 { 1201 dSP; 1202 HV *hv = (HV*)POPs; 1203 HV *keys; 1204 register HE *entry; 1205 SV *tmpstr; 1206 I32 gimme = GIMME_V; 1207 I32 dokeys = (PL_op->op_type == OP_KEYS); 1208 I32 dovalues = (PL_op->op_type == OP_VALUES); 1209 I32 realhv = (SvTYPE(hv) == SVt_PVHV); 1210 1211 if (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV) 1212 dokeys = dovalues = TRUE; 1213 1214 if (!hv) { 1215 if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */ 1216 dTARGET; /* make sure to clear its target here */ 1217 if (SvTYPE(TARG) == SVt_PVLV) 1218 LvTARG(TARG) = Nullsv; 1219 PUSHs(TARG); 1220 } 1221 RETURN; 1222 } 1223 1224 keys = realhv ? hv : avhv_keys((AV*)hv); 1225 (void)hv_iterinit(keys); /* always reset iterator regardless */ 1226 1227 if (gimme == G_VOID) 1228 RETURN; 1229 1230 if (gimme == G_SCALAR) { 1231 IV i; 1232 dTARGET; 1233 1234 if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */ 1235 if (SvTYPE(TARG) < SVt_PVLV) { 1236 sv_upgrade(TARG, SVt_PVLV); 1237 sv_magic(TARG, Nullsv, 'k', Nullch, 0); 1238 } 1239 LvTYPE(TARG) = 'k'; 1240 if (LvTARG(TARG) != (SV*)keys) { 1241 if (LvTARG(TARG)) 1242 SvREFCNT_dec(LvTARG(TARG)); 1243 LvTARG(TARG) = SvREFCNT_inc(keys); 1244 } 1245 PUSHs(TARG); 1246 RETURN; 1247 } 1248 1249 if (! SvTIED_mg((SV*)keys, 'P')) 1250 i = HvKEYS(keys); 1251 else { 1252 i = 0; 1253 /*SUPPRESS 560*/ 1254 while (hv_iternext(keys)) i++; 1255 } 1256 PUSHi( i ); 1257 RETURN; 1258 } 1259 1260 EXTEND(SP, HvKEYS(keys) * (dokeys + dovalues)); 1261 1262 PUTBACK; /* hv_iternext and hv_iterval might clobber stack_sp */ 1263 while ((entry = hv_iternext(keys))) { 1264 SPAGAIN; 1265 if (dokeys) 1266 XPUSHs(hv_iterkeysv(entry)); /* won't clobber stack_sp */ 1267 if (dovalues) { 1268 PUTBACK; 1269 tmpstr = realhv ? 1270 hv_iterval(hv,entry) : avhv_iterval((AV*)hv,entry); 1271 DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu", 1272 (unsigned long)HeHASH(entry), 1273 HvMAX(keys)+1, 1274 (unsigned long)(HeHASH(entry) & HvMAX(keys)))); 1275 SPAGAIN; 1276 XPUSHs(tmpstr); 1277 } 1278 PUTBACK; 1279 } 1280 return NORMAL; 1281 } 1282 1283