1 /* doop.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 4 * 2000, 2001, 2002, 2004, 2005, 2006, by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * "'So that was the job I felt I had to do when I started,' thought Sam." 13 */ 14 15 /* This file contains some common functions needed to carry out certain 16 * ops. For example both pp_schomp() and pp_chomp() - scalar and array 17 * chomp operations - call the function do_chomp() found in this file. 18 */ 19 20 #include "EXTERN.h" 21 #define PERL_IN_DOOP_C 22 #include "perl.h" 23 24 #ifndef PERL_MICRO 25 #include <signal.h> 26 #endif 27 28 STATIC I32 29 S_do_trans_simple(pTHX_ SV *sv) 30 { 31 U8 *s; 32 U8 *d; 33 const U8 *send; 34 U8 *dstart; 35 I32 matches = 0; 36 const I32 grows = PL_op->op_private & OPpTRANS_GROWS; 37 STRLEN len; 38 39 const short *tbl = (short*)cPVOP->op_pv; 40 if (!tbl) 41 Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__); 42 43 s = (U8*)SvPV(sv, len); 44 send = s + len; 45 46 /* First, take care of non-UTF-8 input strings, because they're easy */ 47 if (!SvUTF8(sv)) { 48 while (s < send) { 49 const I32 ch = tbl[*s]; 50 if (ch >= 0) { 51 matches++; 52 *s++ = (U8)ch; 53 } 54 else 55 s++; 56 } 57 SvSETMAGIC(sv); 58 return matches; 59 } 60 61 /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */ 62 if (grows) 63 Newx(d, len*2+1, U8); 64 else 65 d = s; 66 dstart = d; 67 while (s < send) { 68 STRLEN ulen; 69 I32 ch; 70 71 /* Need to check this, otherwise 128..255 won't match */ 72 const UV c = utf8n_to_uvchr(s, send - s, &ulen, 0); 73 if (c < 0x100 && (ch = tbl[c]) >= 0) { 74 matches++; 75 d = uvchr_to_utf8(d, ch); 76 s += ulen; 77 } 78 else { /* No match -> copy */ 79 Move(s, d, ulen, U8); 80 d += ulen; 81 s += ulen; 82 } 83 } 84 if (grows) { 85 sv_setpvn(sv, (char*)dstart, d - dstart); 86 Safefree(dstart); 87 } 88 else { 89 *d = '\0'; 90 SvCUR_set(sv, d - dstart); 91 } 92 SvUTF8_on(sv); 93 SvSETMAGIC(sv); 94 return matches; 95 } 96 97 STATIC I32 98 S_do_trans_count(pTHX_ SV *sv) 99 { 100 const U8 *s; 101 const U8 *send; 102 I32 matches = 0; 103 STRLEN len; 104 const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT; 105 106 const short * const tbl = (short*)cPVOP->op_pv; 107 if (!tbl) 108 Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__); 109 110 s = (const U8*)SvPV_const(sv, len); 111 send = s + len; 112 113 if (!SvUTF8(sv)) 114 while (s < send) { 115 if (tbl[*s++] >= 0) 116 matches++; 117 } 118 else 119 while (s < send) { 120 STRLEN ulen; 121 const UV c = utf8n_to_uvchr((U8 *)s, send - s, &ulen, 0); 122 if (c < 0x100) { 123 if (tbl[c] >= 0) 124 matches++; 125 } else if (complement) 126 matches++; 127 s += ulen; 128 } 129 130 return matches; 131 } 132 133 STATIC I32 134 S_do_trans_complex(pTHX_ SV *sv) 135 { 136 U8 *s; 137 U8 *send; 138 U8 *d; 139 U8 *dstart; 140 I32 isutf8; 141 I32 matches = 0; 142 const I32 grows = PL_op->op_private & OPpTRANS_GROWS; 143 const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT; 144 const I32 del = PL_op->op_private & OPpTRANS_DELETE; 145 STRLEN len, rlen = 0; 146 147 const short * const tbl = (short*)cPVOP->op_pv; 148 if (!tbl) 149 Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__); 150 151 s = (U8*)SvPV(sv, len); 152 isutf8 = SvUTF8(sv); 153 send = s + len; 154 155 if (!isutf8) { 156 dstart = d = s; 157 if (PL_op->op_private & OPpTRANS_SQUASH) { 158 const U8* p = send; 159 while (s < send) { 160 const I32 ch = tbl[*s]; 161 if (ch >= 0) { 162 *d = (U8)ch; 163 matches++; 164 if (p != d - 1 || *p != *d) 165 p = d++; 166 } 167 else if (ch == -1) /* -1 is unmapped character */ 168 *d++ = *s; 169 else if (ch == -2) /* -2 is delete character */ 170 matches++; 171 s++; 172 } 173 } 174 else { 175 while (s < send) { 176 const I32 ch = tbl[*s]; 177 if (ch >= 0) { 178 matches++; 179 *d++ = (U8)ch; 180 } 181 else if (ch == -1) /* -1 is unmapped character */ 182 *d++ = *s; 183 else if (ch == -2) /* -2 is delete character */ 184 matches++; 185 s++; 186 } 187 } 188 *d = '\0'; 189 SvCUR_set(sv, d - dstart); 190 } 191 else { /* isutf8 */ 192 if (grows) 193 Newx(d, len*2+1, U8); 194 else 195 d = s; 196 dstart = d; 197 if (complement && !del) 198 rlen = tbl[0x100]; 199 200 #ifdef MACOS_TRADITIONAL 201 #define comp CoMP /* "comp" is a keyword in some compilers ... */ 202 #endif 203 204 if (PL_op->op_private & OPpTRANS_SQUASH) { 205 UV pch = 0xfeedface; 206 while (s < send) { 207 STRLEN len; 208 const UV comp = utf8_to_uvchr(s, &len); 209 I32 ch; 210 211 if (comp > 0xff) { 212 if (!complement) { 213 Copy(s, d, len, U8); 214 d += len; 215 } 216 else { 217 matches++; 218 if (!del) { 219 ch = (rlen == 0) ? comp : 220 (comp - 0x100 < rlen) ? 221 tbl[comp+1] : tbl[0x100+rlen]; 222 if ((UV)ch != pch) { 223 d = uvchr_to_utf8(d, ch); 224 pch = (UV)ch; 225 } 226 s += len; 227 continue; 228 } 229 } 230 } 231 else if ((ch = tbl[comp]) >= 0) { 232 matches++; 233 if ((UV)ch != pch) { 234 d = uvchr_to_utf8(d, ch); 235 pch = (UV)ch; 236 } 237 s += len; 238 continue; 239 } 240 else if (ch == -1) { /* -1 is unmapped character */ 241 Copy(s, d, len, U8); 242 d += len; 243 } 244 else if (ch == -2) /* -2 is delete character */ 245 matches++; 246 s += len; 247 pch = 0xfeedface; 248 } 249 } 250 else { 251 while (s < send) { 252 STRLEN len; 253 const UV comp = utf8_to_uvchr(s, &len); 254 I32 ch; 255 if (comp > 0xff) { 256 if (!complement) { 257 Move(s, d, len, U8); 258 d += len; 259 } 260 else { 261 matches++; 262 if (!del) { 263 if (comp - 0x100 < rlen) 264 d = uvchr_to_utf8(d, tbl[comp+1]); 265 else 266 d = uvchr_to_utf8(d, tbl[0x100+rlen]); 267 } 268 } 269 } 270 else if ((ch = tbl[comp]) >= 0) { 271 d = uvchr_to_utf8(d, ch); 272 matches++; 273 } 274 else if (ch == -1) { /* -1 is unmapped character */ 275 Copy(s, d, len, U8); 276 d += len; 277 } 278 else if (ch == -2) /* -2 is delete character */ 279 matches++; 280 s += len; 281 } 282 } 283 if (grows) { 284 sv_setpvn(sv, (char*)dstart, d - dstart); 285 Safefree(dstart); 286 } 287 else { 288 *d = '\0'; 289 SvCUR_set(sv, d - dstart); 290 } 291 SvUTF8_on(sv); 292 } 293 SvSETMAGIC(sv); 294 return matches; 295 } 296 297 STATIC I32 298 S_do_trans_simple_utf8(pTHX_ SV *sv) 299 { 300 U8 *s; 301 U8 *send; 302 U8 *d; 303 U8 *start; 304 U8 *dstart, *dend; 305 I32 matches = 0; 306 const I32 grows = PL_op->op_private & OPpTRANS_GROWS; 307 STRLEN len; 308 309 SV* const rv = (SV*)cSVOP->op_sv; 310 HV* const hv = (HV*)SvRV(rv); 311 SV** svp = hv_fetch(hv, "NONE", 4, FALSE); 312 const UV none = svp ? SvUV(*svp) : 0x7fffffff; 313 const UV extra = none + 1; 314 UV final = 0; 315 UV uv; 316 I32 isutf8; 317 U8 hibit = 0; 318 319 s = (U8*)SvPV(sv, len); 320 isutf8 = SvUTF8(sv); 321 if (!isutf8) { 322 const U8 *t = s, *e = s + len; 323 while (t < e) { 324 const U8 ch = *t++; 325 if ((hibit = !NATIVE_IS_INVARIANT(ch))) 326 break; 327 } 328 if (hibit) 329 s = bytes_to_utf8(s, &len); 330 } 331 send = s + len; 332 start = s; 333 334 svp = hv_fetch(hv, "FINAL", 5, FALSE); 335 if (svp) 336 final = SvUV(*svp); 337 338 if (grows) { 339 /* d needs to be bigger than s, in case e.g. upgrading is required */ 340 Newx(d, len * 3 + UTF8_MAXBYTES, U8); 341 dend = d + len * 3; 342 dstart = d; 343 } 344 else { 345 dstart = d = s; 346 dend = d + len; 347 } 348 349 while (s < send) { 350 if ((uv = swash_fetch(rv, s, TRUE)) < none) { 351 s += UTF8SKIP(s); 352 matches++; 353 d = uvuni_to_utf8(d, uv); 354 } 355 else if (uv == none) { 356 const int i = UTF8SKIP(s); 357 Move(s, d, i, U8); 358 d += i; 359 s += i; 360 } 361 else if (uv == extra) { 362 s += UTF8SKIP(s); 363 matches++; 364 d = uvuni_to_utf8(d, final); 365 } 366 else 367 s += UTF8SKIP(s); 368 369 if (d > dend) { 370 const STRLEN clen = d - dstart; 371 const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES; 372 if (!grows) 373 Perl_croak(aTHX_ "panic: do_trans_simple_utf8 line %d",__LINE__); 374 Renew(dstart, nlen + UTF8_MAXBYTES, U8); 375 d = dstart + clen; 376 dend = dstart + nlen; 377 } 378 } 379 if (grows || hibit) { 380 sv_setpvn(sv, (char*)dstart, d - dstart); 381 Safefree(dstart); 382 if (grows && hibit) 383 Safefree(start); 384 } 385 else { 386 *d = '\0'; 387 SvCUR_set(sv, d - dstart); 388 } 389 SvSETMAGIC(sv); 390 SvUTF8_on(sv); 391 392 return matches; 393 } 394 395 STATIC I32 396 S_do_trans_count_utf8(pTHX_ SV *sv) 397 { 398 const U8 *s; 399 const U8 *start = 0, *send; 400 I32 matches = 0; 401 STRLEN len; 402 403 SV* const rv = (SV*)cSVOP->op_sv; 404 HV* const hv = (HV*)SvRV(rv); 405 SV** const svp = hv_fetch(hv, "NONE", 4, FALSE); 406 const UV none = svp ? SvUV(*svp) : 0x7fffffff; 407 const UV extra = none + 1; 408 U8 hibit = 0; 409 410 s = (const U8*)SvPV_const(sv, len); 411 if (!SvUTF8(sv)) { 412 const U8 *t = s; 413 const U8 *e = s + len; 414 while (t < e) { 415 const U8 ch = *t++; 416 if ((hibit = !NATIVE_IS_INVARIANT(ch))) 417 break; 418 } 419 if (hibit) 420 start = s = bytes_to_utf8((U8 *)s, &len); 421 } 422 send = s + len; 423 424 while (s < send) { 425 UV uv; 426 if ((uv = swash_fetch(rv, (U8 *)s, TRUE)) < none || uv == extra) 427 matches++; 428 s += UTF8SKIP(s); 429 } 430 if (hibit) 431 Safefree(start); 432 433 return matches; 434 } 435 436 STATIC I32 437 S_do_trans_complex_utf8(pTHX_ SV *sv) 438 { 439 U8 *start, *send; 440 U8 *d; 441 I32 matches = 0; 442 const I32 squash = PL_op->op_private & OPpTRANS_SQUASH; 443 const I32 del = PL_op->op_private & OPpTRANS_DELETE; 444 const I32 grows = PL_op->op_private & OPpTRANS_GROWS; 445 SV * const rv = (SV*)cSVOP->op_sv; 446 HV * const hv = (HV*)SvRV(rv); 447 SV** svp = hv_fetch(hv, "NONE", 4, FALSE); 448 const UV none = svp ? SvUV(*svp) : 0x7fffffff; 449 const UV extra = none + 1; 450 UV final = 0; 451 bool havefinal = FALSE; 452 STRLEN len; 453 U8 *dstart, *dend; 454 U8 hibit = 0; 455 456 U8 *s = (U8*)SvPV(sv, len); 457 const I32 isutf8 = SvUTF8(sv); 458 if (!isutf8) { 459 const U8 *t = s; 460 const U8 * const e = s + len; 461 while (t < e) { 462 const U8 ch = *t++; 463 if ((hibit = !NATIVE_IS_INVARIANT(ch))) 464 break; 465 } 466 if (hibit) 467 s = bytes_to_utf8(s, &len); 468 } 469 send = s + len; 470 start = s; 471 472 svp = hv_fetch(hv, "FINAL", 5, FALSE); 473 if (svp) { 474 final = SvUV(*svp); 475 havefinal = TRUE; 476 } 477 478 if (grows) { 479 /* d needs to be bigger than s, in case e.g. upgrading is required */ 480 Newx(d, len * 3 + UTF8_MAXBYTES, U8); 481 dend = d + len * 3; 482 dstart = d; 483 } 484 else { 485 dstart = d = s; 486 dend = d + len; 487 } 488 489 if (squash) { 490 UV puv = 0xfeedface; 491 while (s < send) { 492 UV uv = swash_fetch(rv, s, TRUE); 493 494 if (d > dend) { 495 const STRLEN clen = d - dstart; 496 const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES; 497 if (!grows) 498 Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__); 499 Renew(dstart, nlen + UTF8_MAXBYTES, U8); 500 d = dstart + clen; 501 dend = dstart + nlen; 502 } 503 if (uv < none) { 504 matches++; 505 s += UTF8SKIP(s); 506 if (uv != puv) { 507 d = uvuni_to_utf8(d, uv); 508 puv = uv; 509 } 510 continue; 511 } 512 else if (uv == none) { /* "none" is unmapped character */ 513 const int i = UTF8SKIP(s); 514 Move(s, d, i, U8); 515 d += i; 516 s += i; 517 puv = 0xfeedface; 518 continue; 519 } 520 else if (uv == extra && !del) { 521 matches++; 522 if (havefinal) { 523 s += UTF8SKIP(s); 524 if (puv != final) { 525 d = uvuni_to_utf8(d, final); 526 puv = final; 527 } 528 } 529 else { 530 STRLEN len; 531 uv = utf8_to_uvuni(s, &len); 532 if (uv != puv) { 533 Move(s, d, len, U8); 534 d += len; 535 puv = uv; 536 } 537 s += len; 538 } 539 continue; 540 } 541 matches++; /* "none+1" is delete character */ 542 s += UTF8SKIP(s); 543 } 544 } 545 else { 546 while (s < send) { 547 const UV uv = swash_fetch(rv, s, TRUE); 548 if (d > dend) { 549 const STRLEN clen = d - dstart; 550 const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES; 551 if (!grows) 552 Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__); 553 Renew(dstart, nlen + UTF8_MAXBYTES, U8); 554 d = dstart + clen; 555 dend = dstart + nlen; 556 } 557 if (uv < none) { 558 matches++; 559 s += UTF8SKIP(s); 560 d = uvuni_to_utf8(d, uv); 561 continue; 562 } 563 else if (uv == none) { /* "none" is unmapped character */ 564 const int i = UTF8SKIP(s); 565 Move(s, d, i, U8); 566 d += i; 567 s += i; 568 continue; 569 } 570 else if (uv == extra && !del) { 571 matches++; 572 s += UTF8SKIP(s); 573 d = uvuni_to_utf8(d, final); 574 continue; 575 } 576 matches++; /* "none+1" is delete character */ 577 s += UTF8SKIP(s); 578 } 579 } 580 if (grows || hibit) { 581 sv_setpvn(sv, (char*)dstart, d - dstart); 582 Safefree(dstart); 583 if (grows && hibit) 584 Safefree(start); 585 } 586 else { 587 *d = '\0'; 588 SvCUR_set(sv, d - dstart); 589 } 590 SvUTF8_on(sv); 591 SvSETMAGIC(sv); 592 593 return matches; 594 } 595 596 I32 597 Perl_do_trans(pTHX_ SV *sv) 598 { 599 STRLEN len; 600 const I32 hasutf = (PL_op->op_private & 601 (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)); 602 603 if (SvREADONLY(sv)) { 604 if (SvFAKE(sv)) 605 sv_force_normal(sv); 606 if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL)) 607 Perl_croak(aTHX_ PL_no_modify); 608 } 609 (void)SvPV_const(sv, len); 610 if (!len) 611 return 0; 612 if (!(PL_op->op_private & OPpTRANS_IDENTICAL)) { 613 if (!SvPOKp(sv)) 614 (void)SvPV_force(sv, len); 615 (void)SvPOK_only_UTF8(sv); 616 } 617 618 DEBUG_t( Perl_deb(aTHX_ "2.TBL\n")); 619 620 switch (PL_op->op_private & ~hasutf & ( 621 OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL| 622 OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) { 623 case 0: 624 if (hasutf) 625 return do_trans_simple_utf8(sv); 626 else 627 return do_trans_simple(sv); 628 629 case OPpTRANS_IDENTICAL: 630 case OPpTRANS_IDENTICAL|OPpTRANS_COMPLEMENT: 631 if (hasutf) 632 return do_trans_count_utf8(sv); 633 else 634 return do_trans_count(sv); 635 636 default: 637 if (hasutf) 638 return do_trans_complex_utf8(sv); 639 else 640 return do_trans_complex(sv); 641 } 642 } 643 644 void 645 Perl_do_join(pTHX_ register SV *sv, SV *del, register SV **mark, register SV **sp) 646 { 647 SV ** const oldmark = mark; 648 register I32 items = sp - mark; 649 register STRLEN len; 650 STRLEN delimlen; 651 652 (void) SvPV_const(del, delimlen); /* stringify and get the delimlen */ 653 /* SvCUR assumes it's SvPOK() and woe betide you if it's not. */ 654 655 mark++; 656 len = (items > 0 ? (delimlen * (items - 1) ) : 0); 657 (void)SvUPGRADE(sv, SVt_PV); 658 if (SvLEN(sv) < len + items) { /* current length is way too short */ 659 while (items-- > 0) { 660 if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) { 661 STRLEN tmplen; 662 SvPV_const(*mark, tmplen); 663 len += tmplen; 664 } 665 mark++; 666 } 667 SvGROW(sv, len + 1); /* so try to pre-extend */ 668 669 mark = oldmark; 670 items = sp - mark; 671 ++mark; 672 } 673 674 sv_setpvn(sv, "", 0); 675 /* sv_setpv retains old UTF8ness [perl #24846] */ 676 SvUTF8_off(sv); 677 678 if (PL_tainting && SvMAGICAL(sv)) 679 SvTAINTED_off(sv); 680 681 if (items-- > 0) { 682 if (*mark) 683 sv_catsv(sv, *mark); 684 mark++; 685 } 686 687 if (delimlen) { 688 for (; items > 0; items--,mark++) { 689 sv_catsv(sv,del); 690 sv_catsv(sv,*mark); 691 } 692 } 693 else { 694 for (; items > 0; items--,mark++) 695 sv_catsv(sv,*mark); 696 } 697 SvSETMAGIC(sv); 698 } 699 700 void 701 Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg) 702 { 703 STRLEN patlen; 704 const char * const pat = SvPV_const(*sarg, patlen); 705 bool do_taint = FALSE; 706 707 SvUTF8_off(sv); 708 if (DO_UTF8(*sarg)) 709 SvUTF8_on(sv); 710 sv_vsetpvfn(sv, pat, patlen, Null(va_list*), sarg + 1, len - 1, &do_taint); 711 SvSETMAGIC(sv); 712 if (do_taint) 713 SvTAINTED_on(sv); 714 } 715 716 /* currently converts input to bytes if possible, but doesn't sweat failure */ 717 UV 718 Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size) 719 { 720 STRLEN srclen, len; 721 const unsigned char *s = (const unsigned char *) SvPV_const(sv, srclen); 722 UV retnum = 0; 723 724 if (offset < 0) 725 return retnum; 726 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ 727 Perl_croak(aTHX_ "Illegal number of bits in vec"); 728 729 if (SvUTF8(sv)) 730 (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE); 731 732 offset *= size; /* turn into bit offset */ 733 len = (offset + size + 7) / 8; /* required number of bytes */ 734 if (len > srclen) { 735 if (size <= 8) 736 retnum = 0; 737 else { 738 offset >>= 3; /* turn into byte offset */ 739 if (size == 16) { 740 if ((STRLEN)offset >= srclen) 741 retnum = 0; 742 else 743 retnum = (UV) s[offset] << 8; 744 } 745 else if (size == 32) { 746 if ((STRLEN)offset >= srclen) 747 retnum = 0; 748 else if ((STRLEN)(offset + 1) >= srclen) 749 retnum = 750 ((UV) s[offset ] << 24); 751 else if ((STRLEN)(offset + 2) >= srclen) 752 retnum = 753 ((UV) s[offset ] << 24) + 754 ((UV) s[offset + 1] << 16); 755 else 756 retnum = 757 ((UV) s[offset ] << 24) + 758 ((UV) s[offset + 1] << 16) + 759 ( s[offset + 2] << 8); 760 } 761 #ifdef UV_IS_QUAD 762 else if (size == 64) { 763 if (ckWARN(WARN_PORTABLE)) 764 Perl_warner(aTHX_ packWARN(WARN_PORTABLE), 765 "Bit vector size > 32 non-portable"); 766 if (offset >= srclen) 767 retnum = 0; 768 else if (offset + 1 >= srclen) 769 retnum = 770 (UV) s[offset ] << 56; 771 else if (offset + 2 >= srclen) 772 retnum = 773 ((UV) s[offset ] << 56) + 774 ((UV) s[offset + 1] << 48); 775 else if (offset + 3 >= srclen) 776 retnum = 777 ((UV) s[offset ] << 56) + 778 ((UV) s[offset + 1] << 48) + 779 ((UV) s[offset + 2] << 40); 780 else if (offset + 4 >= srclen) 781 retnum = 782 ((UV) s[offset ] << 56) + 783 ((UV) s[offset + 1] << 48) + 784 ((UV) s[offset + 2] << 40) + 785 ((UV) s[offset + 3] << 32); 786 else if (offset + 5 >= srclen) 787 retnum = 788 ((UV) s[offset ] << 56) + 789 ((UV) s[offset + 1] << 48) + 790 ((UV) s[offset + 2] << 40) + 791 ((UV) s[offset + 3] << 32) + 792 ( s[offset + 4] << 24); 793 else if (offset + 6 >= srclen) 794 retnum = 795 ((UV) s[offset ] << 56) + 796 ((UV) s[offset + 1] << 48) + 797 ((UV) s[offset + 2] << 40) + 798 ((UV) s[offset + 3] << 32) + 799 ((UV) s[offset + 4] << 24) + 800 ((UV) s[offset + 5] << 16); 801 else 802 retnum = 803 ((UV) s[offset ] << 56) + 804 ((UV) s[offset + 1] << 48) + 805 ((UV) s[offset + 2] << 40) + 806 ((UV) s[offset + 3] << 32) + 807 ((UV) s[offset + 4] << 24) + 808 ((UV) s[offset + 5] << 16) + 809 ( s[offset + 6] << 8); 810 } 811 #endif 812 } 813 } 814 else if (size < 8) 815 retnum = (s[offset >> 3] >> (offset & 7)) & ((1 << size) - 1); 816 else { 817 offset >>= 3; /* turn into byte offset */ 818 if (size == 8) 819 retnum = s[offset]; 820 else if (size == 16) 821 retnum = 822 ((UV) s[offset] << 8) + 823 s[offset + 1]; 824 else if (size == 32) 825 retnum = 826 ((UV) s[offset ] << 24) + 827 ((UV) s[offset + 1] << 16) + 828 ( s[offset + 2] << 8) + 829 s[offset + 3]; 830 #ifdef UV_IS_QUAD 831 else if (size == 64) { 832 if (ckWARN(WARN_PORTABLE)) 833 Perl_warner(aTHX_ packWARN(WARN_PORTABLE), 834 "Bit vector size > 32 non-portable"); 835 retnum = 836 ((UV) s[offset ] << 56) + 837 ((UV) s[offset + 1] << 48) + 838 ((UV) s[offset + 2] << 40) + 839 ((UV) s[offset + 3] << 32) + 840 ((UV) s[offset + 4] << 24) + 841 ((UV) s[offset + 5] << 16) + 842 ( s[offset + 6] << 8) + 843 s[offset + 7]; 844 } 845 #endif 846 } 847 848 return retnum; 849 } 850 851 /* currently converts input to bytes if possible but doesn't sweat failures, 852 * although it does ensure that the string it clobbers is not marked as 853 * utf8-valid any more 854 */ 855 void 856 Perl_do_vecset(pTHX_ SV *sv) 857 { 858 SV *targ = LvTARG(sv); 859 register I32 offset; 860 register I32 size; 861 register unsigned char *s; 862 register UV lval; 863 I32 mask; 864 STRLEN targlen; 865 STRLEN len; 866 867 if (!targ) 868 return; 869 s = (unsigned char*)SvPV_force(targ, targlen); 870 if (SvUTF8(targ)) { 871 /* This is handled by the SvPOK_only below... 872 if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE)) 873 SvUTF8_off(targ); 874 */ 875 (void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE); 876 } 877 878 (void)SvPOK_only(targ); 879 lval = SvUV(sv); 880 offset = LvTARGOFF(sv); 881 if (offset < 0) 882 Perl_croak(aTHX_ "Negative offset to vec in lvalue context"); 883 size = LvTARGLEN(sv); 884 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ 885 Perl_croak(aTHX_ "Illegal number of bits in vec"); 886 887 offset *= size; /* turn into bit offset */ 888 len = (offset + size + 7) / 8; /* required number of bytes */ 889 if (len > targlen) { 890 s = (unsigned char*)SvGROW(targ, len + 1); 891 (void)memzero((char *)(s + targlen), len - targlen + 1); 892 SvCUR_set(targ, len); 893 } 894 895 if (size < 8) { 896 mask = (1 << size) - 1; 897 size = offset & 7; 898 lval &= mask; 899 offset >>= 3; /* turn into byte offset */ 900 s[offset] &= ~(mask << size); 901 s[offset] |= lval << size; 902 } 903 else { 904 offset >>= 3; /* turn into byte offset */ 905 if (size == 8) 906 s[offset ] = (U8)( lval & 0xff); 907 else if (size == 16) { 908 s[offset ] = (U8)((lval >> 8) & 0xff); 909 s[offset+1] = (U8)( lval & 0xff); 910 } 911 else if (size == 32) { 912 s[offset ] = (U8)((lval >> 24) & 0xff); 913 s[offset+1] = (U8)((lval >> 16) & 0xff); 914 s[offset+2] = (U8)((lval >> 8) & 0xff); 915 s[offset+3] = (U8)( lval & 0xff); 916 } 917 #ifdef UV_IS_QUAD 918 else if (size == 64) { 919 if (ckWARN(WARN_PORTABLE)) 920 Perl_warner(aTHX_ packWARN(WARN_PORTABLE), 921 "Bit vector size > 32 non-portable"); 922 s[offset ] = (U8)((lval >> 56) & 0xff); 923 s[offset+1] = (U8)((lval >> 48) & 0xff); 924 s[offset+2] = (U8)((lval >> 40) & 0xff); 925 s[offset+3] = (U8)((lval >> 32) & 0xff); 926 s[offset+4] = (U8)((lval >> 24) & 0xff); 927 s[offset+5] = (U8)((lval >> 16) & 0xff); 928 s[offset+6] = (U8)((lval >> 8) & 0xff); 929 s[offset+7] = (U8)( lval & 0xff); 930 } 931 #endif 932 } 933 SvSETMAGIC(targ); 934 } 935 936 void 937 Perl_do_chop(pTHX_ register SV *astr, register SV *sv) 938 { 939 STRLEN len; 940 char *s; 941 942 if (SvTYPE(sv) == SVt_PVAV) { 943 register I32 i; 944 AV* av = (AV*)sv; 945 const I32 max = AvFILL(av); 946 947 for (i = 0; i <= max; i++) { 948 sv = (SV*)av_fetch(av, i, FALSE); 949 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef)) 950 do_chop(astr, sv); 951 } 952 return; 953 } 954 else if (SvTYPE(sv) == SVt_PVHV) { 955 HV* hv = (HV*)sv; 956 HE* entry; 957 (void)hv_iterinit(hv); 958 while ((entry = hv_iternext(hv))) 959 do_chop(astr,hv_iterval(hv,entry)); 960 return; 961 } 962 else if (SvREADONLY(sv)) { 963 if (SvFAKE(sv)) { 964 /* SV is copy-on-write */ 965 sv_force_normal_flags(sv, 0); 966 } 967 if (SvREADONLY(sv)) 968 Perl_croak(aTHX_ PL_no_modify); 969 } 970 971 if (PL_encoding && !SvUTF8(sv)) { 972 /* like in do_chomp(), utf8-ize the sv as a side-effect 973 * if we're using encoding. */ 974 sv_recode_to_utf8(sv, PL_encoding); 975 } 976 977 s = SvPV(sv, len); 978 if (len && !SvPOK(sv)) 979 s = SvPV_force(sv, len); 980 if (DO_UTF8(sv)) { 981 if (s && len) { 982 char *send = s + len; 983 char *start = s; 984 s = send - 1; 985 while (s > start && UTF8_IS_CONTINUATION(*s)) 986 s--; 987 if (is_utf8_string((U8*)s, send - s)) { 988 sv_setpvn(astr, s, send - s); 989 *s = '\0'; 990 SvCUR_set(sv, s - start); 991 SvNIOK_off(sv); 992 SvUTF8_on(astr); 993 } 994 } 995 else 996 sv_setpvn(astr, "", 0); 997 } 998 else if (s && len) { 999 s += --len; 1000 sv_setpvn(astr, s, 1); 1001 *s = '\0'; 1002 SvCUR_set(sv, len); 1003 SvUTF8_off(sv); 1004 SvNIOK_off(sv); 1005 } 1006 else 1007 sv_setpvn(astr, "", 0); 1008 SvSETMAGIC(sv); 1009 } 1010 1011 I32 1012 Perl_do_chomp(pTHX_ register SV *sv) 1013 { 1014 register I32 count; 1015 STRLEN len; 1016 char *s; 1017 char *temp_buffer = NULL; 1018 SV* svrecode = Nullsv; 1019 1020 if (RsSNARF(PL_rs)) 1021 return 0; 1022 if (RsRECORD(PL_rs)) 1023 return 0; 1024 count = 0; 1025 if (SvTYPE(sv) == SVt_PVAV) { 1026 register I32 i; 1027 AV* const av = (AV*)sv; 1028 const I32 max = AvFILL(av); 1029 1030 for (i = 0; i <= max; i++) { 1031 sv = (SV*)av_fetch(av, i, FALSE); 1032 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef)) 1033 count += do_chomp(sv); 1034 } 1035 return count; 1036 } 1037 else if (SvTYPE(sv) == SVt_PVHV) { 1038 HV* const hv = (HV*)sv; 1039 HE* entry; 1040 (void)hv_iterinit(hv); 1041 while ((entry = hv_iternext(hv))) 1042 count += do_chomp(hv_iterval(hv,entry)); 1043 return count; 1044 } 1045 else if (SvREADONLY(sv)) { 1046 if (SvFAKE(sv)) { 1047 /* SV is copy-on-write */ 1048 sv_force_normal_flags(sv, 0); 1049 } 1050 if (SvREADONLY(sv)) 1051 Perl_croak(aTHX_ PL_no_modify); 1052 } 1053 1054 if (PL_encoding) { 1055 if (!SvUTF8(sv)) { 1056 /* XXX, here sv is utf8-ized as a side-effect! 1057 If encoding.pm is used properly, almost string-generating 1058 operations, including literal strings, chr(), input data, etc. 1059 should have been utf8-ized already, right? 1060 */ 1061 sv_recode_to_utf8(sv, PL_encoding); 1062 } 1063 } 1064 1065 s = SvPV(sv, len); 1066 if (s && len) { 1067 s += --len; 1068 if (RsPARA(PL_rs)) { 1069 if (*s != '\n') 1070 goto nope; 1071 ++count; 1072 while (len && s[-1] == '\n') { 1073 --len; 1074 --s; 1075 ++count; 1076 } 1077 } 1078 else { 1079 STRLEN rslen, rs_charlen; 1080 const char *rsptr = SvPV_const(PL_rs, rslen); 1081 1082 rs_charlen = SvUTF8(PL_rs) 1083 ? sv_len_utf8(PL_rs) 1084 : rslen; 1085 1086 if (SvUTF8(PL_rs) != SvUTF8(sv)) { 1087 /* Assumption is that rs is shorter than the scalar. */ 1088 if (SvUTF8(PL_rs)) { 1089 /* RS is utf8, scalar is 8 bit. */ 1090 bool is_utf8 = TRUE; 1091 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr, 1092 &rslen, &is_utf8); 1093 if (is_utf8) { 1094 /* Cannot downgrade, therefore cannot possibly match 1095 */ 1096 assert (temp_buffer == rsptr); 1097 temp_buffer = NULL; 1098 goto nope; 1099 } 1100 rsptr = temp_buffer; 1101 } 1102 else if (PL_encoding) { 1103 /* RS is 8 bit, encoding.pm is used. 1104 * Do not recode PL_rs as a side-effect. */ 1105 svrecode = newSVpvn(rsptr, rslen); 1106 sv_recode_to_utf8(svrecode, PL_encoding); 1107 rsptr = SvPV_const(svrecode, rslen); 1108 rs_charlen = sv_len_utf8(svrecode); 1109 } 1110 else { 1111 /* RS is 8 bit, scalar is utf8. */ 1112 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen); 1113 rsptr = temp_buffer; 1114 } 1115 } 1116 if (rslen == 1) { 1117 if (*s != *rsptr) 1118 goto nope; 1119 ++count; 1120 } 1121 else { 1122 if (len < rslen - 1) 1123 goto nope; 1124 len -= rslen - 1; 1125 s -= rslen - 1; 1126 if (memNE(s, rsptr, rslen)) 1127 goto nope; 1128 count += rs_charlen; 1129 } 1130 } 1131 s = SvPV_force_nolen(sv); 1132 SvCUR_set(sv, len); 1133 *SvEND(sv) = '\0'; 1134 SvNIOK_off(sv); 1135 SvSETMAGIC(sv); 1136 } 1137 nope: 1138 1139 if (svrecode) 1140 SvREFCNT_dec(svrecode); 1141 1142 Safefree(temp_buffer); 1143 return count; 1144 } 1145 1146 void 1147 Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) 1148 { 1149 #ifdef LIBERAL 1150 register long *dl; 1151 register long *ll; 1152 register long *rl; 1153 #endif 1154 register char *dc; 1155 STRLEN leftlen; 1156 STRLEN rightlen; 1157 register const char *lc; 1158 register const char *rc; 1159 register I32 len; 1160 I32 lensave; 1161 const char *lsave; 1162 const char *rsave; 1163 const bool left_utf = DO_UTF8(left); 1164 const bool right_utf = DO_UTF8(right); 1165 I32 needlen = 0; 1166 1167 if (left_utf && !right_utf) 1168 sv_utf8_upgrade(right); 1169 else if (!left_utf && right_utf) 1170 sv_utf8_upgrade(left); 1171 1172 if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv))) 1173 sv_setpvn(sv, "", 0); /* avoid undef warning on |= and ^= */ 1174 lsave = lc = SvPV_const(left, leftlen); 1175 rsave = rc = SvPV_const(right, rightlen); 1176 len = leftlen < rightlen ? leftlen : rightlen; 1177 lensave = len; 1178 if ((left_utf || right_utf) && (sv == left || sv == right)) { 1179 needlen = optype == OP_BIT_AND ? len : leftlen + rightlen; 1180 Newxz(dc, needlen + 1, char); 1181 } 1182 else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) { 1183 /* Fix this to nong when change 22613 is integrated. 1184 (Which in turn awaits merging sv_2iv and sv_2uv) */ 1185 dc = SvPV_force_nolen(sv); 1186 if (SvLEN(sv) < (STRLEN)(len + 1)) { 1187 dc = SvGROW(sv, (STRLEN)(len + 1)); 1188 (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1); 1189 } 1190 if (optype != OP_BIT_AND && (left_utf || right_utf)) 1191 dc = SvGROW(sv, leftlen + rightlen + 1); 1192 } 1193 else { 1194 needlen = ((optype == OP_BIT_AND) 1195 ? len : (leftlen > rightlen ? leftlen : rightlen)); 1196 Newxz(dc, needlen + 1, char); 1197 (void)sv_usepvn(sv, dc, needlen); 1198 dc = SvPVX(sv); /* sv_usepvn() calls Renew() */ 1199 } 1200 SvCUR_set(sv, len); 1201 (void)SvPOK_only(sv); 1202 if (left_utf || right_utf) { 1203 UV duc, luc, ruc; 1204 char *dcsave = dc; 1205 STRLEN lulen = leftlen; 1206 STRLEN rulen = rightlen; 1207 STRLEN ulen; 1208 1209 switch (optype) { 1210 case OP_BIT_AND: 1211 while (lulen && rulen) { 1212 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); 1213 lc += ulen; 1214 lulen -= ulen; 1215 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); 1216 rc += ulen; 1217 rulen -= ulen; 1218 duc = luc & ruc; 1219 dc = (char*)uvchr_to_utf8((U8*)dc, duc); 1220 } 1221 if (sv == left || sv == right) 1222 (void)sv_usepvn(sv, dcsave, needlen); 1223 SvCUR_set(sv, dc - dcsave); 1224 break; 1225 case OP_BIT_XOR: 1226 while (lulen && rulen) { 1227 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); 1228 lc += ulen; 1229 lulen -= ulen; 1230 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); 1231 rc += ulen; 1232 rulen -= ulen; 1233 duc = luc ^ ruc; 1234 dc = (char*)uvchr_to_utf8((U8*)dc, duc); 1235 } 1236 goto mop_up_utf; 1237 case OP_BIT_OR: 1238 while (lulen && rulen) { 1239 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); 1240 lc += ulen; 1241 lulen -= ulen; 1242 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); 1243 rc += ulen; 1244 rulen -= ulen; 1245 duc = luc | ruc; 1246 dc = (char*)uvchr_to_utf8((U8*)dc, duc); 1247 } 1248 mop_up_utf: 1249 if (sv == left || sv == right) 1250 (void)sv_usepvn(sv, dcsave, needlen); 1251 SvCUR_set(sv, dc - dcsave); 1252 if (rulen) 1253 sv_catpvn(sv, rc, rulen); 1254 else if (lulen) 1255 sv_catpvn(sv, lc, lulen); 1256 else 1257 *SvEND(sv) = '\0'; 1258 break; 1259 } 1260 SvUTF8_on(sv); 1261 goto finish; 1262 } 1263 else 1264 #ifdef LIBERAL 1265 if (len >= sizeof(long)*4 && 1266 !((long)dc % sizeof(long)) && 1267 !((long)lc % sizeof(long)) && 1268 !((long)rc % sizeof(long))) /* It's almost always aligned... */ 1269 { 1270 const I32 remainder = len % (sizeof(long)*4); 1271 len /= (sizeof(long)*4); 1272 1273 dl = (long*)dc; 1274 ll = (long*)lc; 1275 rl = (long*)rc; 1276 1277 switch (optype) { 1278 case OP_BIT_AND: 1279 while (len--) { 1280 *dl++ = *ll++ & *rl++; 1281 *dl++ = *ll++ & *rl++; 1282 *dl++ = *ll++ & *rl++; 1283 *dl++ = *ll++ & *rl++; 1284 } 1285 break; 1286 case OP_BIT_XOR: 1287 while (len--) { 1288 *dl++ = *ll++ ^ *rl++; 1289 *dl++ = *ll++ ^ *rl++; 1290 *dl++ = *ll++ ^ *rl++; 1291 *dl++ = *ll++ ^ *rl++; 1292 } 1293 break; 1294 case OP_BIT_OR: 1295 while (len--) { 1296 *dl++ = *ll++ | *rl++; 1297 *dl++ = *ll++ | *rl++; 1298 *dl++ = *ll++ | *rl++; 1299 *dl++ = *ll++ | *rl++; 1300 } 1301 } 1302 1303 dc = (char*)dl; 1304 lc = (char*)ll; 1305 rc = (char*)rl; 1306 1307 len = remainder; 1308 } 1309 #endif 1310 { 1311 switch (optype) { 1312 case OP_BIT_AND: 1313 while (len--) 1314 *dc++ = *lc++ & *rc++; 1315 *dc = '\0'; 1316 break; 1317 case OP_BIT_XOR: 1318 while (len--) 1319 *dc++ = *lc++ ^ *rc++; 1320 goto mop_up; 1321 case OP_BIT_OR: 1322 while (len--) 1323 *dc++ = *lc++ | *rc++; 1324 mop_up: 1325 len = lensave; 1326 if (rightlen > (STRLEN)len) 1327 sv_catpvn(sv, rsave + len, rightlen - len); 1328 else if (leftlen > (STRLEN)len) 1329 sv_catpvn(sv, lsave + len, leftlen - len); 1330 else 1331 *SvEND(sv) = '\0'; 1332 break; 1333 } 1334 } 1335 finish: 1336 SvTAINT(sv); 1337 } 1338 1339 OP * 1340 Perl_do_kv(pTHX) 1341 { 1342 dSP; 1343 HV *hv = (HV*)POPs; 1344 HV *keys; 1345 register HE *entry; 1346 const I32 gimme = GIMME_V; 1347 const I32 dokv = (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV); 1348 const I32 dokeys = dokv || (PL_op->op_type == OP_KEYS); 1349 const I32 dovalues = dokv || (PL_op->op_type == OP_VALUES); 1350 I32 realhv = (SvTYPE(hv) == SVt_PVHV); 1351 1352 if (!hv) { 1353 if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */ 1354 dTARGET; /* make sure to clear its target here */ 1355 if (SvTYPE(TARG) == SVt_PVLV) 1356 LvTARG(TARG) = Nullsv; 1357 PUSHs(TARG); 1358 } 1359 RETURN; 1360 } 1361 1362 keys = realhv ? hv : avhv_keys((AV*)hv); 1363 (void)hv_iterinit(keys); /* always reset iterator regardless */ 1364 1365 if (gimme == G_VOID) 1366 RETURN; 1367 1368 if (gimme == G_SCALAR) { 1369 IV i; 1370 dTARGET; 1371 1372 if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */ 1373 if (SvTYPE(TARG) < SVt_PVLV) { 1374 sv_upgrade(TARG, SVt_PVLV); 1375 sv_magic(TARG, Nullsv, PERL_MAGIC_nkeys, Nullch, 0); 1376 } 1377 LvTYPE(TARG) = 'k'; 1378 if (LvTARG(TARG) != (SV*)keys) { 1379 if (LvTARG(TARG)) 1380 SvREFCNT_dec(LvTARG(TARG)); 1381 LvTARG(TARG) = SvREFCNT_inc(keys); 1382 } 1383 PUSHs(TARG); 1384 RETURN; 1385 } 1386 1387 if (! SvTIED_mg((SV*)keys, PERL_MAGIC_tied)) 1388 i = HvKEYS(keys); 1389 else { 1390 i = 0; 1391 while (hv_iternext(keys)) i++; 1392 } 1393 PUSHi( i ); 1394 RETURN; 1395 } 1396 1397 EXTEND(SP, HvKEYS(keys) * (dokeys + dovalues)); 1398 1399 PUTBACK; /* hv_iternext and hv_iterval might clobber stack_sp */ 1400 while ((entry = hv_iternext(keys))) { 1401 SPAGAIN; 1402 if (dokeys) { 1403 SV* const sv = hv_iterkeysv(entry); 1404 XPUSHs(sv); /* won't clobber stack_sp */ 1405 } 1406 if (dovalues) { 1407 SV *tmpstr; 1408 PUTBACK; 1409 tmpstr = realhv ? 1410 hv_iterval(hv,entry) : avhv_iterval((AV*)hv,entry); 1411 DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu", 1412 (unsigned long)HeHASH(entry), 1413 (int)HvMAX(keys)+1, 1414 (unsigned long)(HeHASH(entry) & HvMAX(keys)))); 1415 SPAGAIN; 1416 XPUSHs(tmpstr); 1417 } 1418 PUTBACK; 1419 } 1420 return NORMAL; 1421 } 1422 1423 /* 1424 * Local variables: 1425 * c-indentation-style: bsd 1426 * c-basic-offset: 4 1427 * indent-tabs-mode: t 1428 * End: 1429 * 1430 * ex: set ts=8 sts=4 sw=4 noet: 1431 */ 1432