1 /* doop.c 2 * 3 * Copyright (c) 1991-2002, Larry Wall 4 * 5 * You may distribute under the terms of either the GNU General Public 6 * License or the Artistic License, as specified in the README file. 7 * 8 */ 9 10 /* 11 * "'So that was the job I felt I had to do when I started,' thought Sam." 12 */ 13 14 #include "EXTERN.h" 15 #define PERL_IN_DOOP_C 16 #include "perl.h" 17 18 #ifndef PERL_MICRO 19 #if !defined(NSIG) || defined(M_UNIX) || defined(M_XENIX) 20 #include <signal.h> 21 #endif 22 #endif 23 24 STATIC I32 25 S_do_trans_simple(pTHX_ SV *sv) 26 { 27 U8 *s; 28 U8 *d; 29 U8 *send; 30 U8 *dstart; 31 I32 matches = 0; 32 I32 grows = PL_op->op_private & OPpTRANS_GROWS; 33 STRLEN len; 34 short *tbl; 35 I32 ch; 36 37 tbl = (short*)cPVOP->op_pv; 38 if (!tbl) 39 Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__); 40 41 s = (U8*)SvPV(sv, len); 42 send = s + len; 43 44 /* First, take care of non-UTF8 input strings, because they're easy */ 45 if (!SvUTF8(sv)) { 46 while (s < send) { 47 if ((ch = tbl[*s]) >= 0) { 48 matches++; 49 *s++ = (U8)ch; 50 } 51 else 52 s++; 53 } 54 SvSETMAGIC(sv); 55 return matches; 56 } 57 58 /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */ 59 if (grows) 60 New(0, d, len*2+1, U8); 61 else 62 d = s; 63 dstart = d; 64 while (s < send) { 65 STRLEN ulen; 66 UV c; 67 68 /* Need to check this, otherwise 128..255 won't match */ 69 c = utf8n_to_uvchr(s, send - s, &ulen, 0); 70 if (c < 0x100 && (ch = tbl[c]) >= 0) { 71 matches++; 72 d = uvchr_to_utf8(d, ch); 73 s += ulen; 74 } 75 else { /* No match -> copy */ 76 Copy(s, d, ulen, U8); 77 d += ulen; 78 s += ulen; 79 } 80 } 81 if (grows) { 82 sv_setpvn(sv, (char*)dstart, d - dstart); 83 Safefree(dstart); 84 } 85 else { 86 *d = '\0'; 87 SvCUR_set(sv, d - dstart); 88 } 89 SvUTF8_on(sv); 90 SvSETMAGIC(sv); 91 return matches; 92 } 93 94 STATIC I32 95 S_do_trans_count(pTHX_ SV *sv) 96 { 97 U8 *s; 98 U8 *send; 99 I32 matches = 0; 100 STRLEN len; 101 short *tbl; 102 I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT; 103 104 tbl = (short*)cPVOP->op_pv; 105 if (!tbl) 106 Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__); 107 108 s = (U8*)SvPV(sv, len); 109 send = s + len; 110 111 if (!SvUTF8(sv)) 112 while (s < send) { 113 if (tbl[*s++] >= 0) 114 matches++; 115 } 116 else 117 while (s < send) { 118 UV c; 119 STRLEN ulen; 120 c = utf8n_to_uvchr(s, send - s, &ulen, 0); 121 if (c < 0x100) { 122 if (tbl[c] >= 0) 123 matches++; 124 } else if (complement) 125 matches++; 126 s += ulen; 127 } 128 129 return matches; 130 } 131 132 STATIC I32 133 S_do_trans_complex(pTHX_ SV *sv) 134 { 135 U8 *s; 136 U8 *send; 137 U8 *d; 138 U8 *dstart; 139 I32 isutf8; 140 I32 matches = 0; 141 I32 grows = PL_op->op_private & OPpTRANS_GROWS; 142 I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT; 143 I32 del = PL_op->op_private & OPpTRANS_DELETE; 144 STRLEN len, rlen = 0; 145 short *tbl; 146 I32 ch; 147 148 tbl = (short*)cPVOP->op_pv; 149 if (!tbl) 150 Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__); 151 152 s = (U8*)SvPV(sv, len); 153 isutf8 = SvUTF8(sv); 154 send = s + len; 155 156 if (!isutf8) { 157 dstart = d = s; 158 if (PL_op->op_private & OPpTRANS_SQUASH) { 159 U8* p = send; 160 while (s < send) { 161 if ((ch = tbl[*s]) >= 0) { 162 *d = (U8)ch; 163 matches++; 164 if (p != d - 1 || *p != *d) 165 p = d++; 166 } 167 else if (ch == -1) /* -1 is unmapped character */ 168 *d++ = *s; 169 else if (ch == -2) /* -2 is delete character */ 170 matches++; 171 s++; 172 } 173 } 174 else { 175 while (s < send) { 176 if ((ch = tbl[*s]) >= 0) { 177 matches++; 178 *d++ = (U8)ch; 179 } 180 else if (ch == -1) /* -1 is unmapped character */ 181 *d++ = *s; 182 else if (ch == -2) /* -2 is delete character */ 183 matches++; 184 s++; 185 } 186 } 187 *d = '\0'; 188 SvCUR_set(sv, d - dstart); 189 } 190 else { /* isutf8 */ 191 if (grows) 192 New(0, d, len*2+1, U8); 193 else 194 d = s; 195 dstart = d; 196 if (complement && !del) 197 rlen = tbl[0x100]; 198 199 #ifdef MACOS_TRADITIONAL 200 #define comp CoMP /* "comp" is a keyword in some compilers ... */ 201 #endif 202 203 if (PL_op->op_private & OPpTRANS_SQUASH) { 204 UV pch = 0xfeedface; 205 while (s < send) { 206 STRLEN len; 207 UV comp = utf8_to_uvchr(s, &len); 208 209 if (comp > 0xff) { 210 if (!complement) { 211 Copy(s, d, len, U8); 212 d += len; 213 } 214 else { 215 matches++; 216 if (!del) { 217 ch = (rlen == 0) ? comp : 218 (comp - 0x100 < rlen) ? 219 tbl[comp+1] : tbl[0x100+rlen]; 220 if ((UV)ch != pch) { 221 d = uvchr_to_utf8(d, ch); 222 pch = (UV)ch; 223 } 224 s += len; 225 continue; 226 } 227 } 228 } 229 else if ((ch = tbl[comp]) >= 0) { 230 matches++; 231 if ((UV)ch != pch) { 232 d = uvchr_to_utf8(d, ch); 233 pch = (UV)ch; 234 } 235 s += len; 236 continue; 237 } 238 else if (ch == -1) { /* -1 is unmapped character */ 239 Copy(s, d, len, U8); 240 d += len; 241 } 242 else if (ch == -2) /* -2 is delete character */ 243 matches++; 244 s += len; 245 pch = 0xfeedface; 246 } 247 } 248 else { 249 while (s < send) { 250 STRLEN len; 251 UV comp = utf8_to_uvchr(s, &len); 252 if (comp > 0xff) { 253 if (!complement) { 254 Copy(s, d, len, U8); 255 d += len; 256 } 257 else { 258 matches++; 259 if (!del) { 260 if (comp - 0x100 < rlen) 261 d = uvchr_to_utf8(d, tbl[comp+1]); 262 else 263 d = uvchr_to_utf8(d, tbl[0x100+rlen]); 264 } 265 } 266 } 267 else if ((ch = tbl[comp]) >= 0) { 268 d = uvchr_to_utf8(d, ch); 269 matches++; 270 } 271 else if (ch == -1) { /* -1 is unmapped character */ 272 Copy(s, d, len, U8); 273 d += len; 274 } 275 else if (ch == -2) /* -2 is delete character */ 276 matches++; 277 s += len; 278 } 279 } 280 if (grows) { 281 sv_setpvn(sv, (char*)dstart, d - dstart); 282 Safefree(dstart); 283 } 284 else { 285 *d = '\0'; 286 SvCUR_set(sv, d - dstart); 287 } 288 SvUTF8_on(sv); 289 } 290 SvSETMAGIC(sv); 291 return matches; 292 } 293 294 STATIC I32 295 S_do_trans_simple_utf8(pTHX_ SV *sv) 296 { 297 U8 *s; 298 U8 *send; 299 U8 *d; 300 U8 *start; 301 U8 *dstart, *dend; 302 I32 matches = 0; 303 I32 grows = PL_op->op_private & OPpTRANS_GROWS; 304 STRLEN len; 305 306 SV* rv = (SV*)cSVOP->op_sv; 307 HV* hv = (HV*)SvRV(rv); 308 SV** svp = hv_fetch(hv, "NONE", 4, FALSE); 309 UV none = svp ? SvUV(*svp) : 0x7fffffff; 310 UV extra = none + 1; 311 UV final = 0; 312 UV uv; 313 I32 isutf8; 314 U8 hibit = 0; 315 316 s = (U8*)SvPV(sv, len); 317 isutf8 = SvUTF8(sv); 318 if (!isutf8) { 319 U8 *t = s, *e = s + len; 320 while (t < e) { 321 U8 ch = *t++; 322 if ((hibit = !NATIVE_IS_INVARIANT(ch))) 323 break; 324 } 325 if (hibit) 326 s = bytes_to_utf8(s, &len); 327 } 328 send = s + len; 329 start = s; 330 331 svp = hv_fetch(hv, "FINAL", 5, FALSE); 332 if (svp) 333 final = SvUV(*svp); 334 335 if (grows) { 336 /* d needs to be bigger than s, in case e.g. upgrading is required */ 337 New(0, d, len*3+UTF8_MAXLEN, U8); 338 dend = d + len * 3; 339 dstart = d; 340 } 341 else { 342 dstart = d = s; 343 dend = d + len; 344 } 345 346 while (s < send) { 347 if ((uv = swash_fetch(rv, s, TRUE)) < none) { 348 s += UTF8SKIP(s); 349 matches++; 350 d = uvuni_to_utf8(d, uv); 351 } 352 else if (uv == none) { 353 int i = UTF8SKIP(s); 354 Copy(s, d, i, U8); 355 d += i; 356 s += i; 357 } 358 else if (uv == extra) { 359 int i = UTF8SKIP(s); 360 s += i; 361 matches++; 362 d = uvuni_to_utf8(d, final); 363 } 364 else 365 s += UTF8SKIP(s); 366 367 if (d > dend) { 368 STRLEN clen = d - dstart; 369 STRLEN nlen = dend - dstart + len + UTF8_MAXLEN; 370 if (!grows) 371 Perl_croak(aTHX_ "panic: do_trans_simple_utf8 line %d",__LINE__); 372 Renew(dstart, nlen+UTF8_MAXLEN, U8); 373 d = dstart + clen; 374 dend = dstart + nlen; 375 } 376 } 377 if (grows || hibit) { 378 sv_setpvn(sv, (char*)dstart, d - dstart); 379 Safefree(dstart); 380 if (grows && hibit) 381 Safefree(start); 382 } 383 else { 384 *d = '\0'; 385 SvCUR_set(sv, d - dstart); 386 } 387 SvSETMAGIC(sv); 388 SvUTF8_on(sv); 389 390 return matches; 391 } 392 393 STATIC I32 394 S_do_trans_count_utf8(pTHX_ SV *sv) 395 { 396 U8 *s; 397 U8 *start = 0, *send; 398 I32 matches = 0; 399 STRLEN len; 400 401 SV* rv = (SV*)cSVOP->op_sv; 402 HV* hv = (HV*)SvRV(rv); 403 SV** svp = hv_fetch(hv, "NONE", 4, FALSE); 404 UV none = svp ? SvUV(*svp) : 0x7fffffff; 405 UV extra = none + 1; 406 UV uv; 407 U8 hibit = 0; 408 409 s = (U8*)SvPV(sv, len); 410 if (!SvUTF8(sv)) { 411 U8 *t = s, *e = s + len; 412 while (t < e) { 413 U8 ch = *t++; 414 if ((hibit = !NATIVE_IS_INVARIANT(ch))) 415 break; 416 } 417 if (hibit) 418 start = s = bytes_to_utf8(s, &len); 419 } 420 send = s + len; 421 422 while (s < send) { 423 if ((uv = swash_fetch(rv, s, TRUE)) < none || uv == extra) 424 matches++; 425 s += UTF8SKIP(s); 426 } 427 if (hibit) 428 Safefree(start); 429 430 return matches; 431 } 432 433 STATIC I32 434 S_do_trans_complex_utf8(pTHX_ SV *sv) 435 { 436 U8 *s; 437 U8 *start, *send; 438 U8 *d; 439 I32 matches = 0; 440 I32 squash = PL_op->op_private & OPpTRANS_SQUASH; 441 I32 del = PL_op->op_private & OPpTRANS_DELETE; 442 I32 grows = PL_op->op_private & OPpTRANS_GROWS; 443 SV* rv = (SV*)cSVOP->op_sv; 444 HV* hv = (HV*)SvRV(rv); 445 SV** svp = hv_fetch(hv, "NONE", 4, FALSE); 446 UV none = svp ? SvUV(*svp) : 0x7fffffff; 447 UV extra = none + 1; 448 UV final = 0; 449 bool havefinal = FALSE; 450 UV uv; 451 STRLEN len; 452 U8 *dstart, *dend; 453 I32 isutf8; 454 U8 hibit = 0; 455 456 s = (U8*)SvPV(sv, len); 457 isutf8 = SvUTF8(sv); 458 if (!isutf8) { 459 U8 *t = s, *e = s + len; 460 while (t < e) { 461 U8 ch = *t++; 462 if ((hibit = !NATIVE_IS_INVARIANT(ch))) 463 break; 464 } 465 if (hibit) 466 s = bytes_to_utf8(s, &len); 467 } 468 send = s + len; 469 start = s; 470 471 svp = hv_fetch(hv, "FINAL", 5, FALSE); 472 if (svp) { 473 final = SvUV(*svp); 474 havefinal = TRUE; 475 } 476 477 if (grows) { 478 /* d needs to be bigger than s, in case e.g. upgrading is required */ 479 New(0, d, len*3+UTF8_MAXLEN, U8); 480 dend = d + len * 3; 481 dstart = d; 482 } 483 else { 484 dstart = d = s; 485 dend = d + len; 486 } 487 488 if (squash) { 489 UV puv = 0xfeedface; 490 while (s < send) { 491 uv = swash_fetch(rv, s, TRUE); 492 493 if (d > dend) { 494 STRLEN clen = d - dstart; 495 STRLEN nlen = dend - dstart + len + UTF8_MAXLEN; 496 if (!grows) 497 Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__); 498 Renew(dstart, nlen+UTF8_MAXLEN, U8); 499 d = dstart + clen; 500 dend = dstart + nlen; 501 } 502 if (uv < none) { 503 matches++; 504 s += UTF8SKIP(s); 505 if (uv != puv) { 506 d = uvuni_to_utf8(d, uv); 507 puv = uv; 508 } 509 continue; 510 } 511 else if (uv == none) { /* "none" is unmapped character */ 512 int i = UTF8SKIP(s); 513 Copy(s, d, i, U8); 514 d += i; 515 s += i; 516 puv = 0xfeedface; 517 continue; 518 } 519 else if (uv == extra && !del) { 520 matches++; 521 if (havefinal) { 522 s += UTF8SKIP(s); 523 if (puv != final) { 524 d = uvuni_to_utf8(d, final); 525 puv = final; 526 } 527 } 528 else { 529 STRLEN len; 530 uv = utf8_to_uvuni(s, &len); 531 if (uv != puv) { 532 Copy(s, d, len, U8); 533 d += len; 534 puv = uv; 535 } 536 s += len; 537 } 538 continue; 539 } 540 matches++; /* "none+1" is delete character */ 541 s += UTF8SKIP(s); 542 } 543 } 544 else { 545 while (s < send) { 546 uv = swash_fetch(rv, s, TRUE); 547 if (d > dend) { 548 STRLEN clen = d - dstart; 549 STRLEN nlen = dend - dstart + len + UTF8_MAXLEN; 550 if (!grows) 551 Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__); 552 Renew(dstart, nlen+UTF8_MAXLEN, U8); 553 d = dstart + clen; 554 dend = dstart + nlen; 555 } 556 if (uv < none) { 557 matches++; 558 s += UTF8SKIP(s); 559 d = uvuni_to_utf8(d, uv); 560 continue; 561 } 562 else if (uv == none) { /* "none" is unmapped character */ 563 int i = UTF8SKIP(s); 564 Copy(s, d, i, U8); 565 d += i; 566 s += i; 567 continue; 568 } 569 else if (uv == extra && !del) { 570 matches++; 571 s += UTF8SKIP(s); 572 d = uvuni_to_utf8(d, final); 573 continue; 574 } 575 matches++; /* "none+1" is delete character */ 576 s += UTF8SKIP(s); 577 } 578 } 579 if (grows || hibit) { 580 sv_setpvn(sv, (char*)dstart, d - dstart); 581 Safefree(dstart); 582 if (grows && hibit) 583 Safefree(start); 584 } 585 else { 586 *d = '\0'; 587 SvCUR_set(sv, d - dstart); 588 } 589 SvUTF8_on(sv); 590 SvSETMAGIC(sv); 591 592 return matches; 593 } 594 595 I32 596 Perl_do_trans(pTHX_ SV *sv) 597 { 598 STRLEN len; 599 I32 hasutf = (PL_op->op_private & 600 (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)); 601 602 if (SvREADONLY(sv)) { 603 if (SvFAKE(sv)) 604 sv_force_normal(sv); 605 if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL)) 606 Perl_croak(aTHX_ PL_no_modify); 607 } 608 (void)SvPV(sv, len); 609 if (!len) 610 return 0; 611 if (!SvPOKp(sv)) 612 (void)SvPV_force(sv, len); 613 if (!(PL_op->op_private & OPpTRANS_IDENTICAL)) 614 (void)SvPOK_only_UTF8(sv); 615 616 DEBUG_t( Perl_deb(aTHX_ "2.TBL\n")); 617 618 switch (PL_op->op_private & ~hasutf & 63) { 619 case 0: 620 if (hasutf) 621 return do_trans_simple_utf8(sv); 622 else 623 return do_trans_simple(sv); 624 625 case OPpTRANS_IDENTICAL: 626 case OPpTRANS_IDENTICAL|OPpTRANS_COMPLEMENT: 627 if (hasutf) 628 return do_trans_count_utf8(sv); 629 else 630 return do_trans_count(sv); 631 632 default: 633 if (hasutf) 634 return do_trans_complex_utf8(sv); 635 else 636 return do_trans_complex(sv); 637 } 638 } 639 640 void 641 Perl_do_join(pTHX_ register SV *sv, SV *del, register SV **mark, register SV **sp) 642 { 643 SV **oldmark = mark; 644 register I32 items = sp - mark; 645 register STRLEN len; 646 STRLEN delimlen; 647 STRLEN tmplen; 648 649 (void) SvPV(del, delimlen); /* stringify and get the delimlen */ 650 /* SvCUR assumes it's SvPOK() and woe betide you if it's not. */ 651 652 mark++; 653 len = (items > 0 ? (delimlen * (items - 1) ) : 0); 654 (void)SvUPGRADE(sv, SVt_PV); 655 if (SvLEN(sv) < len + items) { /* current length is way too short */ 656 while (items-- > 0) { 657 if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) { 658 SvPV(*mark, tmplen); 659 len += tmplen; 660 } 661 mark++; 662 } 663 SvGROW(sv, len + 1); /* so try to pre-extend */ 664 665 mark = oldmark; 666 items = sp - mark; 667 ++mark; 668 } 669 670 sv_setpv(sv, ""); 671 if (PL_tainting && SvMAGICAL(sv)) 672 SvTAINTED_off(sv); 673 674 if (items-- > 0) { 675 if (*mark) 676 sv_catsv(sv, *mark); 677 mark++; 678 } 679 680 if (delimlen) { 681 for (; items > 0; items--,mark++) { 682 sv_catsv(sv,del); 683 sv_catsv(sv,*mark); 684 } 685 } 686 else { 687 for (; items > 0; items--,mark++) 688 sv_catsv(sv,*mark); 689 } 690 SvSETMAGIC(sv); 691 } 692 693 void 694 Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg) 695 { 696 STRLEN patlen; 697 char *pat = SvPV(*sarg, patlen); 698 bool do_taint = FALSE; 699 700 SvUTF8_off(sv); 701 if (DO_UTF8(*sarg)) 702 SvUTF8_on(sv); 703 sv_vsetpvfn(sv, pat, patlen, Null(va_list*), sarg + 1, len - 1, &do_taint); 704 SvSETMAGIC(sv); 705 if (do_taint) 706 SvTAINTED_on(sv); 707 } 708 709 /* currently converts input to bytes if possible, but doesn't sweat failure */ 710 UV 711 Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size) 712 { 713 STRLEN srclen, len; 714 unsigned char *s = (unsigned char *) SvPV(sv, srclen); 715 UV retnum = 0; 716 717 if (offset < 0) 718 return retnum; 719 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ 720 Perl_croak(aTHX_ "Illegal number of bits in vec"); 721 722 if (SvUTF8(sv)) 723 (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE); 724 725 offset *= size; /* turn into bit offset */ 726 len = (offset + size + 7) / 8; /* required number of bytes */ 727 if (len > srclen) { 728 if (size <= 8) 729 retnum = 0; 730 else { 731 offset >>= 3; /* turn into byte offset */ 732 if (size == 16) { 733 if ((STRLEN)offset >= srclen) 734 retnum = 0; 735 else 736 retnum = (UV) s[offset] << 8; 737 } 738 else if (size == 32) { 739 if ((STRLEN)offset >= srclen) 740 retnum = 0; 741 else if ((STRLEN)(offset + 1) >= srclen) 742 retnum = 743 ((UV) s[offset ] << 24); 744 else if ((STRLEN)(offset + 2) >= srclen) 745 retnum = 746 ((UV) s[offset ] << 24) + 747 ((UV) s[offset + 1] << 16); 748 else 749 retnum = 750 ((UV) s[offset ] << 24) + 751 ((UV) s[offset + 1] << 16) + 752 ( s[offset + 2] << 8); 753 } 754 #ifdef UV_IS_QUAD 755 else if (size == 64) { 756 if (ckWARN(WARN_PORTABLE)) 757 Perl_warner(aTHX_ packWARN(WARN_PORTABLE), 758 "Bit vector size > 32 non-portable"); 759 if (offset >= srclen) 760 retnum = 0; 761 else if (offset + 1 >= srclen) 762 retnum = 763 (UV) s[offset ] << 56; 764 else if (offset + 2 >= srclen) 765 retnum = 766 ((UV) s[offset ] << 56) + 767 ((UV) s[offset + 1] << 48); 768 else if (offset + 3 >= srclen) 769 retnum = 770 ((UV) s[offset ] << 56) + 771 ((UV) s[offset + 1] << 48) + 772 ((UV) s[offset + 2] << 40); 773 else if (offset + 4 >= srclen) 774 retnum = 775 ((UV) s[offset ] << 56) + 776 ((UV) s[offset + 1] << 48) + 777 ((UV) s[offset + 2] << 40) + 778 ((UV) s[offset + 3] << 32); 779 else if (offset + 5 >= srclen) 780 retnum = 781 ((UV) s[offset ] << 56) + 782 ((UV) s[offset + 1] << 48) + 783 ((UV) s[offset + 2] << 40) + 784 ((UV) s[offset + 3] << 32) + 785 ( s[offset + 4] << 24); 786 else if (offset + 6 >= srclen) 787 retnum = 788 ((UV) s[offset ] << 56) + 789 ((UV) s[offset + 1] << 48) + 790 ((UV) s[offset + 2] << 40) + 791 ((UV) s[offset + 3] << 32) + 792 ((UV) s[offset + 4] << 24) + 793 ((UV) s[offset + 5] << 16); 794 else 795 retnum = 796 ((UV) s[offset ] << 56) + 797 ((UV) s[offset + 1] << 48) + 798 ((UV) s[offset + 2] << 40) + 799 ((UV) s[offset + 3] << 32) + 800 ((UV) s[offset + 4] << 24) + 801 ((UV) s[offset + 5] << 16) + 802 ( s[offset + 6] << 8); 803 } 804 #endif 805 } 806 } 807 else if (size < 8) 808 retnum = (s[offset >> 3] >> (offset & 7)) & ((1 << size) - 1); 809 else { 810 offset >>= 3; /* turn into byte offset */ 811 if (size == 8) 812 retnum = s[offset]; 813 else if (size == 16) 814 retnum = 815 ((UV) s[offset] << 8) + 816 s[offset + 1]; 817 else if (size == 32) 818 retnum = 819 ((UV) s[offset ] << 24) + 820 ((UV) s[offset + 1] << 16) + 821 ( s[offset + 2] << 8) + 822 s[offset + 3]; 823 #ifdef UV_IS_QUAD 824 else if (size == 64) { 825 if (ckWARN(WARN_PORTABLE)) 826 Perl_warner(aTHX_ packWARN(WARN_PORTABLE), 827 "Bit vector size > 32 non-portable"); 828 retnum = 829 ((UV) s[offset ] << 56) + 830 ((UV) s[offset + 1] << 48) + 831 ((UV) s[offset + 2] << 40) + 832 ((UV) s[offset + 3] << 32) + 833 ((UV) s[offset + 4] << 24) + 834 ((UV) s[offset + 5] << 16) + 835 ( s[offset + 6] << 8) + 836 s[offset + 7]; 837 } 838 #endif 839 } 840 841 return retnum; 842 } 843 844 /* currently converts input to bytes if possible but doesn't sweat failures, 845 * although it does ensure that the string it clobbers is not marked as 846 * utf8-valid any more 847 */ 848 void 849 Perl_do_vecset(pTHX_ SV *sv) 850 { 851 SV *targ = LvTARG(sv); 852 register I32 offset; 853 register I32 size; 854 register unsigned char *s; 855 register UV lval; 856 I32 mask; 857 STRLEN targlen; 858 STRLEN len; 859 860 if (!targ) 861 return; 862 s = (unsigned char*)SvPV_force(targ, targlen); 863 if (SvUTF8(targ)) { 864 /* This is handled by the SvPOK_only below... 865 if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE)) 866 SvUTF8_off(targ); 867 */ 868 (void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE); 869 } 870 871 (void)SvPOK_only(targ); 872 lval = SvUV(sv); 873 offset = LvTARGOFF(sv); 874 if (offset < 0) 875 Perl_croak(aTHX_ "Negative offset to vec in lvalue context"); 876 size = LvTARGLEN(sv); 877 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ 878 Perl_croak(aTHX_ "Illegal number of bits in vec"); 879 880 offset *= size; /* turn into bit offset */ 881 len = (offset + size + 7) / 8; /* required number of bytes */ 882 if (len > targlen) { 883 s = (unsigned char*)SvGROW(targ, len + 1); 884 (void)memzero((char *)(s + targlen), len - targlen + 1); 885 SvCUR_set(targ, len); 886 } 887 888 if (size < 8) { 889 mask = (1 << size) - 1; 890 size = offset & 7; 891 lval &= mask; 892 offset >>= 3; /* turn into byte offset */ 893 s[offset] &= ~(mask << size); 894 s[offset] |= lval << size; 895 } 896 else { 897 offset >>= 3; /* turn into byte offset */ 898 if (size == 8) 899 s[offset ] = (U8)( lval & 0xff); 900 else if (size == 16) { 901 s[offset ] = (U8)((lval >> 8) & 0xff); 902 s[offset+1] = (U8)( lval & 0xff); 903 } 904 else if (size == 32) { 905 s[offset ] = (U8)((lval >> 24) & 0xff); 906 s[offset+1] = (U8)((lval >> 16) & 0xff); 907 s[offset+2] = (U8)((lval >> 8) & 0xff); 908 s[offset+3] = (U8)( lval & 0xff); 909 } 910 #ifdef UV_IS_QUAD 911 else if (size == 64) { 912 if (ckWARN(WARN_PORTABLE)) 913 Perl_warner(aTHX_ packWARN(WARN_PORTABLE), 914 "Bit vector size > 32 non-portable"); 915 s[offset ] = (U8)((lval >> 56) & 0xff); 916 s[offset+1] = (U8)((lval >> 48) & 0xff); 917 s[offset+2] = (U8)((lval >> 40) & 0xff); 918 s[offset+3] = (U8)((lval >> 32) & 0xff); 919 s[offset+4] = (U8)((lval >> 24) & 0xff); 920 s[offset+5] = (U8)((lval >> 16) & 0xff); 921 s[offset+6] = (U8)((lval >> 8) & 0xff); 922 s[offset+7] = (U8)( lval & 0xff); 923 } 924 #endif 925 } 926 SvSETMAGIC(targ); 927 } 928 929 void 930 Perl_do_chop(pTHX_ register SV *astr, register SV *sv) 931 { 932 STRLEN len; 933 char *s; 934 935 if (SvTYPE(sv) == SVt_PVAV) { 936 register I32 i; 937 I32 max; 938 AV* av = (AV*)sv; 939 max = AvFILL(av); 940 for (i = 0; i <= max; i++) { 941 sv = (SV*)av_fetch(av, i, FALSE); 942 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef)) 943 do_chop(astr, sv); 944 } 945 return; 946 } 947 else if (SvTYPE(sv) == SVt_PVHV) { 948 HV* hv = (HV*)sv; 949 HE* entry; 950 (void)hv_iterinit(hv); 951 /*SUPPRESS 560*/ 952 while ((entry = hv_iternext(hv))) 953 do_chop(astr,hv_iterval(hv,entry)); 954 return; 955 } 956 else if (SvREADONLY(sv)) { 957 if (SvFAKE(sv)) { 958 /* SV is copy-on-write */ 959 sv_force_normal_flags(sv, 0); 960 } 961 if (SvREADONLY(sv)) 962 Perl_croak(aTHX_ PL_no_modify); 963 } 964 s = SvPV(sv, len); 965 if (len && !SvPOK(sv)) 966 s = SvPV_force(sv, len); 967 if (DO_UTF8(sv)) { 968 if (s && len) { 969 char *send = s + len; 970 char *start = s; 971 s = send - 1; 972 while (s > start && UTF8_IS_CONTINUATION(*s)) 973 s--; 974 if (utf8_to_uvchr((U8*)s, 0)) { 975 sv_setpvn(astr, s, send - s); 976 *s = '\0'; 977 SvCUR_set(sv, s - start); 978 SvNIOK_off(sv); 979 SvUTF8_on(astr); 980 } 981 } 982 else 983 sv_setpvn(astr, "", 0); 984 } 985 else if (s && len) { 986 s += --len; 987 sv_setpvn(astr, s, 1); 988 *s = '\0'; 989 SvCUR_set(sv, len); 990 SvUTF8_off(sv); 991 SvNIOK_off(sv); 992 } 993 else 994 sv_setpvn(astr, "", 0); 995 SvSETMAGIC(sv); 996 } 997 998 I32 999 Perl_do_chomp(pTHX_ register SV *sv) 1000 { 1001 register I32 count; 1002 STRLEN len; 1003 STRLEN n_a; 1004 char *s; 1005 1006 if (RsSNARF(PL_rs)) 1007 return 0; 1008 if (RsRECORD(PL_rs)) 1009 return 0; 1010 count = 0; 1011 if (SvTYPE(sv) == SVt_PVAV) { 1012 register I32 i; 1013 I32 max; 1014 AV* av = (AV*)sv; 1015 max = AvFILL(av); 1016 for (i = 0; i <= max; i++) { 1017 sv = (SV*)av_fetch(av, i, FALSE); 1018 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef)) 1019 count += do_chomp(sv); 1020 } 1021 return count; 1022 } 1023 else if (SvTYPE(sv) == SVt_PVHV) { 1024 HV* hv = (HV*)sv; 1025 HE* entry; 1026 (void)hv_iterinit(hv); 1027 /*SUPPRESS 560*/ 1028 while ((entry = hv_iternext(hv))) 1029 count += do_chomp(hv_iterval(hv,entry)); 1030 return count; 1031 } 1032 else if (SvREADONLY(sv)) { 1033 if (SvFAKE(sv)) { 1034 /* SV is copy-on-write */ 1035 sv_force_normal_flags(sv, 0); 1036 } 1037 if (SvREADONLY(sv)) 1038 Perl_croak(aTHX_ PL_no_modify); 1039 } 1040 s = SvPV(sv, len); 1041 if (s && len) { 1042 s += --len; 1043 if (RsPARA(PL_rs)) { 1044 if (*s != '\n') 1045 goto nope; 1046 ++count; 1047 while (len && s[-1] == '\n') { 1048 --len; 1049 --s; 1050 ++count; 1051 } 1052 } 1053 else { 1054 STRLEN rslen; 1055 char *rsptr = SvPV(PL_rs, rslen); 1056 if (rslen == 1) { 1057 if (*s != *rsptr) 1058 goto nope; 1059 ++count; 1060 } 1061 else { 1062 if (len < rslen - 1) 1063 goto nope; 1064 len -= rslen - 1; 1065 s -= rslen - 1; 1066 if (memNE(s, rsptr, rslen)) 1067 goto nope; 1068 count += rslen; 1069 } 1070 } 1071 s = SvPV_force(sv, n_a); 1072 SvCUR_set(sv, len); 1073 *SvEND(sv) = '\0'; 1074 SvNIOK_off(sv); 1075 SvSETMAGIC(sv); 1076 } 1077 nope: 1078 return count; 1079 } 1080 1081 void 1082 Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) 1083 { 1084 #ifdef LIBERAL 1085 register long *dl; 1086 register long *ll; 1087 register long *rl; 1088 #endif 1089 register char *dc; 1090 STRLEN leftlen; 1091 STRLEN rightlen; 1092 register char *lc; 1093 register char *rc; 1094 register I32 len; 1095 I32 lensave; 1096 char *lsave; 1097 char *rsave; 1098 bool left_utf = DO_UTF8(left); 1099 bool right_utf = DO_UTF8(right); 1100 I32 needlen = 0; 1101 1102 if (left_utf && !right_utf) 1103 sv_utf8_upgrade(right); 1104 else if (!left_utf && right_utf) 1105 sv_utf8_upgrade(left); 1106 1107 if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv))) 1108 sv_setpvn(sv, "", 0); /* avoid undef warning on |= and ^= */ 1109 lsave = lc = SvPV(left, leftlen); 1110 rsave = rc = SvPV(right, rightlen); 1111 len = leftlen < rightlen ? leftlen : rightlen; 1112 lensave = len; 1113 if ((left_utf || right_utf) && (sv == left || sv == right)) { 1114 needlen = optype == OP_BIT_AND ? len : leftlen + rightlen; 1115 Newz(801, dc, needlen + 1, char); 1116 } 1117 else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) { 1118 STRLEN n_a; 1119 dc = SvPV_force(sv, n_a); 1120 if (SvCUR(sv) < (STRLEN)len) { 1121 dc = SvGROW(sv, (STRLEN)(len + 1)); 1122 (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1); 1123 } 1124 if (optype != OP_BIT_AND && (left_utf || right_utf)) 1125 dc = SvGROW(sv, leftlen + rightlen + 1); 1126 } 1127 else { 1128 needlen = ((optype == OP_BIT_AND) 1129 ? len : (leftlen > rightlen ? leftlen : rightlen)); 1130 Newz(801, dc, needlen + 1, char); 1131 (void)sv_usepvn(sv, dc, needlen); 1132 dc = SvPVX(sv); /* sv_usepvn() calls Renew() */ 1133 } 1134 SvCUR_set(sv, len); 1135 (void)SvPOK_only(sv); 1136 if (left_utf || right_utf) { 1137 UV duc, luc, ruc; 1138 char *dcsave = dc; 1139 STRLEN lulen = leftlen; 1140 STRLEN rulen = rightlen; 1141 STRLEN ulen; 1142 1143 switch (optype) { 1144 case OP_BIT_AND: 1145 while (lulen && rulen) { 1146 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); 1147 lc += ulen; 1148 lulen -= ulen; 1149 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); 1150 rc += ulen; 1151 rulen -= ulen; 1152 duc = luc & ruc; 1153 dc = (char*)uvchr_to_utf8((U8*)dc, duc); 1154 } 1155 if (sv == left || sv == right) 1156 (void)sv_usepvn(sv, dcsave, needlen); 1157 SvCUR_set(sv, dc - dcsave); 1158 break; 1159 case OP_BIT_XOR: 1160 while (lulen && rulen) { 1161 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); 1162 lc += ulen; 1163 lulen -= ulen; 1164 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); 1165 rc += ulen; 1166 rulen -= ulen; 1167 duc = luc ^ ruc; 1168 dc = (char*)uvchr_to_utf8((U8*)dc, duc); 1169 } 1170 goto mop_up_utf; 1171 case OP_BIT_OR: 1172 while (lulen && rulen) { 1173 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); 1174 lc += ulen; 1175 lulen -= ulen; 1176 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); 1177 rc += ulen; 1178 rulen -= ulen; 1179 duc = luc | ruc; 1180 dc = (char*)uvchr_to_utf8((U8*)dc, duc); 1181 } 1182 mop_up_utf: 1183 if (sv == left || sv == right) 1184 (void)sv_usepvn(sv, dcsave, needlen); 1185 SvCUR_set(sv, dc - dcsave); 1186 if (rulen) 1187 sv_catpvn(sv, rc, rulen); 1188 else if (lulen) 1189 sv_catpvn(sv, lc, lulen); 1190 else 1191 *SvEND(sv) = '\0'; 1192 break; 1193 } 1194 SvUTF8_on(sv); 1195 goto finish; 1196 } 1197 else 1198 #ifdef LIBERAL 1199 if (len >= sizeof(long)*4 && 1200 !((long)dc % sizeof(long)) && 1201 !((long)lc % sizeof(long)) && 1202 !((long)rc % sizeof(long))) /* It's almost always aligned... */ 1203 { 1204 I32 remainder = len % (sizeof(long)*4); 1205 len /= (sizeof(long)*4); 1206 1207 dl = (long*)dc; 1208 ll = (long*)lc; 1209 rl = (long*)rc; 1210 1211 switch (optype) { 1212 case OP_BIT_AND: 1213 while (len--) { 1214 *dl++ = *ll++ & *rl++; 1215 *dl++ = *ll++ & *rl++; 1216 *dl++ = *ll++ & *rl++; 1217 *dl++ = *ll++ & *rl++; 1218 } 1219 break; 1220 case OP_BIT_XOR: 1221 while (len--) { 1222 *dl++ = *ll++ ^ *rl++; 1223 *dl++ = *ll++ ^ *rl++; 1224 *dl++ = *ll++ ^ *rl++; 1225 *dl++ = *ll++ ^ *rl++; 1226 } 1227 break; 1228 case OP_BIT_OR: 1229 while (len--) { 1230 *dl++ = *ll++ | *rl++; 1231 *dl++ = *ll++ | *rl++; 1232 *dl++ = *ll++ | *rl++; 1233 *dl++ = *ll++ | *rl++; 1234 } 1235 } 1236 1237 dc = (char*)dl; 1238 lc = (char*)ll; 1239 rc = (char*)rl; 1240 1241 len = remainder; 1242 } 1243 #endif 1244 { 1245 switch (optype) { 1246 case OP_BIT_AND: 1247 while (len--) 1248 *dc++ = *lc++ & *rc++; 1249 break; 1250 case OP_BIT_XOR: 1251 while (len--) 1252 *dc++ = *lc++ ^ *rc++; 1253 goto mop_up; 1254 case OP_BIT_OR: 1255 while (len--) 1256 *dc++ = *lc++ | *rc++; 1257 mop_up: 1258 len = lensave; 1259 if (rightlen > (STRLEN)len) 1260 sv_catpvn(sv, rsave + len, rightlen - len); 1261 else if (leftlen > (STRLEN)len) 1262 sv_catpvn(sv, lsave + len, leftlen - len); 1263 else 1264 *SvEND(sv) = '\0'; 1265 break; 1266 } 1267 } 1268 finish: 1269 SvTAINT(sv); 1270 } 1271 1272 OP * 1273 Perl_do_kv(pTHX) 1274 { 1275 dSP; 1276 HV *hv = (HV*)POPs; 1277 HV *keys; 1278 register HE *entry; 1279 SV *tmpstr; 1280 I32 gimme = GIMME_V; 1281 I32 dokeys = (PL_op->op_type == OP_KEYS); 1282 I32 dovalues = (PL_op->op_type == OP_VALUES); 1283 I32 realhv = (SvTYPE(hv) == SVt_PVHV); 1284 1285 if (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV) 1286 dokeys = dovalues = TRUE; 1287 1288 if (!hv) { 1289 if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */ 1290 dTARGET; /* make sure to clear its target here */ 1291 if (SvTYPE(TARG) == SVt_PVLV) 1292 LvTARG(TARG) = Nullsv; 1293 PUSHs(TARG); 1294 } 1295 RETURN; 1296 } 1297 1298 keys = realhv ? hv : avhv_keys((AV*)hv); 1299 (void)hv_iterinit(keys); /* always reset iterator regardless */ 1300 1301 if (gimme == G_VOID) 1302 RETURN; 1303 1304 if (gimme == G_SCALAR) { 1305 IV i; 1306 dTARGET; 1307 1308 if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */ 1309 if (SvTYPE(TARG) < SVt_PVLV) { 1310 sv_upgrade(TARG, SVt_PVLV); 1311 sv_magic(TARG, Nullsv, PERL_MAGIC_nkeys, Nullch, 0); 1312 } 1313 LvTYPE(TARG) = 'k'; 1314 if (LvTARG(TARG) != (SV*)keys) { 1315 if (LvTARG(TARG)) 1316 SvREFCNT_dec(LvTARG(TARG)); 1317 LvTARG(TARG) = SvREFCNT_inc(keys); 1318 } 1319 PUSHs(TARG); 1320 RETURN; 1321 } 1322 1323 if (! SvTIED_mg((SV*)keys, PERL_MAGIC_tied)) 1324 i = HvKEYS(keys); 1325 else { 1326 i = 0; 1327 /*SUPPRESS 560*/ 1328 while (hv_iternext(keys)) i++; 1329 } 1330 PUSHi( i ); 1331 RETURN; 1332 } 1333 1334 EXTEND(SP, HvKEYS(keys) * (dokeys + dovalues)); 1335 1336 PUTBACK; /* hv_iternext and hv_iterval might clobber stack_sp */ 1337 while ((entry = hv_iternext(keys))) { 1338 SPAGAIN; 1339 if (dokeys) { 1340 SV* sv = hv_iterkeysv(entry); 1341 XPUSHs(sv); /* won't clobber stack_sp */ 1342 } 1343 if (dovalues) { 1344 PUTBACK; 1345 tmpstr = realhv ? 1346 hv_iterval(hv,entry) : avhv_iterval((AV*)hv,entry); 1347 DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu", 1348 (unsigned long)HeHASH(entry), 1349 HvMAX(keys)+1, 1350 (unsigned long)(HeHASH(entry) & HvMAX(keys)))); 1351 SPAGAIN; 1352 XPUSHs(tmpstr); 1353 } 1354 PUTBACK; 1355 } 1356 return NORMAL; 1357 } 1358 1359