1 /* doop.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * 'So that was the job I felt I had to do when I started,' thought Sam. 13 * 14 * [p.934 of _The Lord of the Rings_, VI/iii: "Mount Doom"] 15 */ 16 17 /* This file contains some common functions needed to carry out certain 18 * ops. For example both pp_schomp() and pp_chomp() - scalar and array 19 * chomp operations - call the function do_chomp() found in this file. 20 */ 21 22 #include "EXTERN.h" 23 #define PERL_IN_DOOP_C 24 #include "perl.h" 25 26 #ifndef PERL_MICRO 27 #include <signal.h> 28 #endif 29 30 STATIC I32 31 S_do_trans_simple(pTHX_ SV * const sv) 32 { 33 dVAR; 34 I32 matches = 0; 35 STRLEN len; 36 U8 *s = (U8*)SvPV_nomg(sv,len); 37 U8 * const send = s+len; 38 const short * const tbl = (short*)cPVOP->op_pv; 39 40 PERL_ARGS_ASSERT_DO_TRANS_SIMPLE; 41 42 if (!tbl) 43 Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__); 44 45 /* First, take care of non-UTF-8 input strings, because they're easy */ 46 if (!SvUTF8(sv)) { 47 while (s < send) { 48 const I32 ch = tbl[*s]; 49 if (ch >= 0) { 50 matches++; 51 *s = (U8)ch; 52 } 53 s++; 54 } 55 SvSETMAGIC(sv); 56 } 57 else { 58 const I32 grows = PL_op->op_private & OPpTRANS_GROWS; 59 U8 *d; 60 U8 *dstart; 61 62 /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */ 63 if (grows) 64 Newx(d, len*2+1, U8); 65 else 66 d = s; 67 dstart = d; 68 while (s < send) { 69 STRLEN ulen; 70 I32 ch; 71 72 /* Need to check this, otherwise 128..255 won't match */ 73 const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT); 74 if (c < 0x100 && (ch = tbl[c]) >= 0) { 75 matches++; 76 d = uvchr_to_utf8(d, ch); 77 s += ulen; 78 } 79 else { /* No match -> copy */ 80 Move(s, d, ulen, U8); 81 d += ulen; 82 s += ulen; 83 } 84 } 85 if (grows) { 86 sv_setpvn(sv, (char*)dstart, d - dstart); 87 Safefree(dstart); 88 } 89 else { 90 *d = '\0'; 91 SvCUR_set(sv, d - dstart); 92 } 93 SvUTF8_on(sv); 94 SvSETMAGIC(sv); 95 } 96 return matches; 97 } 98 99 STATIC I32 100 S_do_trans_count(pTHX_ SV * const sv) 101 { 102 dVAR; 103 STRLEN len; 104 const U8 *s = (const U8*)SvPV_nomg_const(sv, len); 105 const U8 * const send = s + len; 106 I32 matches = 0; 107 const short * const tbl = (short*)cPVOP->op_pv; 108 109 PERL_ARGS_ASSERT_DO_TRANS_COUNT; 110 111 if (!tbl) 112 Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__); 113 114 if (!SvUTF8(sv)) { 115 while (s < send) { 116 if (tbl[*s++] >= 0) 117 matches++; 118 } 119 } 120 else { 121 const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT; 122 while (s < send) { 123 STRLEN ulen; 124 const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT); 125 if (c < 0x100) { 126 if (tbl[c] >= 0) 127 matches++; 128 } else if (complement) 129 matches++; 130 s += ulen; 131 } 132 } 133 134 return matches; 135 } 136 137 STATIC I32 138 S_do_trans_complex(pTHX_ SV * const sv) 139 { 140 dVAR; 141 STRLEN len; 142 U8 *s = (U8*)SvPV_nomg(sv, len); 143 U8 * const send = s+len; 144 I32 matches = 0; 145 const short * const tbl = (short*)cPVOP->op_pv; 146 147 PERL_ARGS_ASSERT_DO_TRANS_COMPLEX; 148 149 if (!tbl) 150 Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__); 151 152 if (!SvUTF8(sv)) { 153 U8 *d = s; 154 U8 * const dstart = d; 155 156 if (PL_op->op_private & OPpTRANS_SQUASH) { 157 const U8* p = send; 158 while (s < send) { 159 const I32 ch = tbl[*s]; 160 if (ch >= 0) { 161 *d = (U8)ch; 162 matches++; 163 if (p != d - 1 || *p != *d) 164 p = d++; 165 } 166 else if (ch == -1) /* -1 is unmapped character */ 167 *d++ = *s; 168 else if (ch == -2) /* -2 is delete character */ 169 matches++; 170 s++; 171 } 172 } 173 else { 174 while (s < send) { 175 const I32 ch = tbl[*s]; 176 if (ch >= 0) { 177 matches++; 178 *d++ = (U8)ch; 179 } 180 else if (ch == -1) /* -1 is unmapped character */ 181 *d++ = *s; 182 else if (ch == -2) /* -2 is delete character */ 183 matches++; 184 s++; 185 } 186 } 187 *d = '\0'; 188 SvCUR_set(sv, d - dstart); 189 } 190 else { /* is utf8 */ 191 const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT; 192 const I32 grows = PL_op->op_private & OPpTRANS_GROWS; 193 const I32 del = PL_op->op_private & OPpTRANS_DELETE; 194 U8 *d; 195 U8 *dstart; 196 STRLEN rlen = 0; 197 198 if (grows) 199 Newx(d, len*2+1, U8); 200 else 201 d = s; 202 dstart = d; 203 if (complement && !del) 204 rlen = tbl[0x100]; 205 206 if (PL_op->op_private & OPpTRANS_SQUASH) { 207 UV pch = 0xfeedface; 208 while (s < send) { 209 STRLEN len; 210 const UV comp = utf8n_to_uvchr(s, send - s, &len, 211 UTF8_ALLOW_DEFAULT); 212 I32 ch; 213 214 if (comp > 0xff) { 215 if (!complement) { 216 Move(s, d, len, U8); 217 d += len; 218 } 219 else { 220 matches++; 221 if (!del) { 222 ch = (rlen == 0) ? (I32)comp : 223 (comp - 0x100 < rlen) ? 224 tbl[comp+1] : tbl[0x100+rlen]; 225 if ((UV)ch != pch) { 226 d = uvchr_to_utf8(d, ch); 227 pch = (UV)ch; 228 } 229 s += len; 230 continue; 231 } 232 } 233 } 234 else if ((ch = tbl[comp]) >= 0) { 235 matches++; 236 if ((UV)ch != pch) { 237 d = uvchr_to_utf8(d, ch); 238 pch = (UV)ch; 239 } 240 s += len; 241 continue; 242 } 243 else if (ch == -1) { /* -1 is unmapped character */ 244 Move(s, d, len, U8); 245 d += len; 246 } 247 else if (ch == -2) /* -2 is delete character */ 248 matches++; 249 s += len; 250 pch = 0xfeedface; 251 } 252 } 253 else { 254 while (s < send) { 255 STRLEN len; 256 const UV comp = utf8n_to_uvchr(s, send - s, &len, 257 UTF8_ALLOW_DEFAULT); 258 I32 ch; 259 if (comp > 0xff) { 260 if (!complement) { 261 Move(s, d, len, U8); 262 d += len; 263 } 264 else { 265 matches++; 266 if (!del) { 267 if (comp - 0x100 < rlen) 268 d = uvchr_to_utf8(d, tbl[comp+1]); 269 else 270 d = uvchr_to_utf8(d, tbl[0x100+rlen]); 271 } 272 } 273 } 274 else if ((ch = tbl[comp]) >= 0) { 275 d = uvchr_to_utf8(d, ch); 276 matches++; 277 } 278 else if (ch == -1) { /* -1 is unmapped character */ 279 Move(s, d, len, U8); 280 d += len; 281 } 282 else if (ch == -2) /* -2 is delete character */ 283 matches++; 284 s += len; 285 } 286 } 287 if (grows) { 288 sv_setpvn(sv, (char*)dstart, d - dstart); 289 Safefree(dstart); 290 } 291 else { 292 *d = '\0'; 293 SvCUR_set(sv, d - dstart); 294 } 295 SvUTF8_on(sv); 296 } 297 SvSETMAGIC(sv); 298 return matches; 299 } 300 301 STATIC I32 302 S_do_trans_simple_utf8(pTHX_ SV * const sv) 303 { 304 dVAR; 305 U8 *s; 306 U8 *send; 307 U8 *d; 308 U8 *start; 309 U8 *dstart, *dend; 310 I32 matches = 0; 311 const I32 grows = PL_op->op_private & OPpTRANS_GROWS; 312 STRLEN len; 313 SV* const rv = 314 #ifdef USE_ITHREADS 315 PAD_SVl(cPADOP->op_padix); 316 #else 317 MUTABLE_SV(cSVOP->op_sv); 318 #endif 319 HV* const hv = MUTABLE_HV(SvRV(rv)); 320 SV* const * svp = hv_fetchs(hv, "NONE", FALSE); 321 const UV none = svp ? SvUV(*svp) : 0x7fffffff; 322 const UV extra = none + 1; 323 UV final = 0; 324 U8 hibit = 0; 325 326 PERL_ARGS_ASSERT_DO_TRANS_SIMPLE_UTF8; 327 328 s = (U8*)SvPV_nomg(sv, len); 329 if (!SvUTF8(sv)) { 330 const U8 *t = s; 331 const U8 * const e = s + len; 332 while (t < e) { 333 const U8 ch = *t++; 334 hibit = !NATIVE_IS_INVARIANT(ch); 335 if (hibit) { 336 s = bytes_to_utf8(s, &len); 337 break; 338 } 339 } 340 } 341 send = s + len; 342 start = s; 343 344 svp = hv_fetchs(hv, "FINAL", FALSE); 345 if (svp) 346 final = SvUV(*svp); 347 348 if (grows) { 349 /* d needs to be bigger than s, in case e.g. upgrading is required */ 350 Newx(d, len * 3 + UTF8_MAXBYTES, U8); 351 dend = d + len * 3; 352 dstart = d; 353 } 354 else { 355 dstart = d = s; 356 dend = d + len; 357 } 358 359 while (s < send) { 360 const UV uv = swash_fetch(rv, s, TRUE); 361 if (uv < none) { 362 s += UTF8SKIP(s); 363 matches++; 364 d = uvuni_to_utf8(d, uv); 365 } 366 else if (uv == none) { 367 const int i = UTF8SKIP(s); 368 Move(s, d, i, U8); 369 d += i; 370 s += i; 371 } 372 else if (uv == extra) { 373 s += UTF8SKIP(s); 374 matches++; 375 d = uvuni_to_utf8(d, final); 376 } 377 else 378 s += UTF8SKIP(s); 379 380 if (d > dend) { 381 const STRLEN clen = d - dstart; 382 const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES; 383 if (!grows) 384 Perl_croak(aTHX_ "panic: do_trans_simple_utf8 line %d",__LINE__); 385 Renew(dstart, nlen + UTF8_MAXBYTES, U8); 386 d = dstart + clen; 387 dend = dstart + nlen; 388 } 389 } 390 if (grows || hibit) { 391 sv_setpvn(sv, (char*)dstart, d - dstart); 392 Safefree(dstart); 393 if (grows && hibit) 394 Safefree(start); 395 } 396 else { 397 *d = '\0'; 398 SvCUR_set(sv, d - dstart); 399 } 400 SvSETMAGIC(sv); 401 SvUTF8_on(sv); 402 403 return matches; 404 } 405 406 STATIC I32 407 S_do_trans_count_utf8(pTHX_ SV * const sv) 408 { 409 dVAR; 410 const U8 *s; 411 const U8 *start = NULL; 412 const U8 *send; 413 I32 matches = 0; 414 STRLEN len; 415 SV* const rv = 416 #ifdef USE_ITHREADS 417 PAD_SVl(cPADOP->op_padix); 418 #else 419 MUTABLE_SV(cSVOP->op_sv); 420 #endif 421 HV* const hv = MUTABLE_HV(SvRV(rv)); 422 SV* const * const svp = hv_fetchs(hv, "NONE", FALSE); 423 const UV none = svp ? SvUV(*svp) : 0x7fffffff; 424 const UV extra = none + 1; 425 U8 hibit = 0; 426 427 PERL_ARGS_ASSERT_DO_TRANS_COUNT_UTF8; 428 429 s = (const U8*)SvPV_nomg_const(sv, len); 430 if (!SvUTF8(sv)) { 431 const U8 *t = s; 432 const U8 * const e = s + len; 433 while (t < e) { 434 const U8 ch = *t++; 435 hibit = !NATIVE_IS_INVARIANT(ch); 436 if (hibit) { 437 start = s = bytes_to_utf8(s, &len); 438 break; 439 } 440 } 441 } 442 send = s + len; 443 444 while (s < send) { 445 const UV uv = swash_fetch(rv, s, TRUE); 446 if (uv < none || uv == extra) 447 matches++; 448 s += UTF8SKIP(s); 449 } 450 if (hibit) 451 Safefree(start); 452 453 return matches; 454 } 455 456 STATIC I32 457 S_do_trans_complex_utf8(pTHX_ SV * const sv) 458 { 459 dVAR; 460 U8 *start, *send; 461 U8 *d; 462 I32 matches = 0; 463 const I32 squash = PL_op->op_private & OPpTRANS_SQUASH; 464 const I32 del = PL_op->op_private & OPpTRANS_DELETE; 465 const I32 grows = PL_op->op_private & OPpTRANS_GROWS; 466 SV* const rv = 467 #ifdef USE_ITHREADS 468 PAD_SVl(cPADOP->op_padix); 469 #else 470 MUTABLE_SV(cSVOP->op_sv); 471 #endif 472 HV * const hv = MUTABLE_HV(SvRV(rv)); 473 SV * const *svp = hv_fetchs(hv, "NONE", FALSE); 474 const UV none = svp ? SvUV(*svp) : 0x7fffffff; 475 const UV extra = none + 1; 476 UV final = 0; 477 bool havefinal = FALSE; 478 STRLEN len; 479 U8 *dstart, *dend; 480 U8 hibit = 0; 481 U8 *s = (U8*)SvPV_nomg(sv, len); 482 483 PERL_ARGS_ASSERT_DO_TRANS_COMPLEX_UTF8; 484 485 if (!SvUTF8(sv)) { 486 const U8 *t = s; 487 const U8 * const e = s + len; 488 while (t < e) { 489 const U8 ch = *t++; 490 hibit = !NATIVE_IS_INVARIANT(ch); 491 if (hibit) { 492 s = bytes_to_utf8(s, &len); 493 break; 494 } 495 } 496 } 497 send = s + len; 498 start = s; 499 500 svp = hv_fetchs(hv, "FINAL", FALSE); 501 if (svp) { 502 final = SvUV(*svp); 503 havefinal = TRUE; 504 } 505 506 if (grows) { 507 /* d needs to be bigger than s, in case e.g. upgrading is required */ 508 Newx(d, len * 3 + UTF8_MAXBYTES, U8); 509 dend = d + len * 3; 510 dstart = d; 511 } 512 else { 513 dstart = d = s; 514 dend = d + len; 515 } 516 517 if (squash) { 518 UV puv = 0xfeedface; 519 while (s < send) { 520 UV uv = swash_fetch(rv, s, TRUE); 521 522 if (d > dend) { 523 const STRLEN clen = d - dstart; 524 const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES; 525 if (!grows) 526 Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__); 527 Renew(dstart, nlen + UTF8_MAXBYTES, U8); 528 d = dstart + clen; 529 dend = dstart + nlen; 530 } 531 if (uv < none) { 532 matches++; 533 s += UTF8SKIP(s); 534 if (uv != puv) { 535 d = uvuni_to_utf8(d, uv); 536 puv = uv; 537 } 538 continue; 539 } 540 else if (uv == none) { /* "none" is unmapped character */ 541 const int i = UTF8SKIP(s); 542 Move(s, d, i, U8); 543 d += i; 544 s += i; 545 puv = 0xfeedface; 546 continue; 547 } 548 else if (uv == extra && !del) { 549 matches++; 550 if (havefinal) { 551 s += UTF8SKIP(s); 552 if (puv != final) { 553 d = uvuni_to_utf8(d, final); 554 puv = final; 555 } 556 } 557 else { 558 STRLEN len; 559 uv = utf8n_to_uvuni(s, send - s, &len, UTF8_ALLOW_DEFAULT); 560 if (uv != puv) { 561 Move(s, d, len, U8); 562 d += len; 563 puv = uv; 564 } 565 s += len; 566 } 567 continue; 568 } 569 matches++; /* "none+1" is delete character */ 570 s += UTF8SKIP(s); 571 } 572 } 573 else { 574 while (s < send) { 575 const UV uv = swash_fetch(rv, s, TRUE); 576 if (d > dend) { 577 const STRLEN clen = d - dstart; 578 const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES; 579 if (!grows) 580 Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__); 581 Renew(dstart, nlen + UTF8_MAXBYTES, U8); 582 d = dstart + clen; 583 dend = dstart + nlen; 584 } 585 if (uv < none) { 586 matches++; 587 s += UTF8SKIP(s); 588 d = uvuni_to_utf8(d, uv); 589 continue; 590 } 591 else if (uv == none) { /* "none" is unmapped character */ 592 const int i = UTF8SKIP(s); 593 Move(s, d, i, U8); 594 d += i; 595 s += i; 596 continue; 597 } 598 else if (uv == extra && !del) { 599 matches++; 600 s += UTF8SKIP(s); 601 d = uvuni_to_utf8(d, final); 602 continue; 603 } 604 matches++; /* "none+1" is delete character */ 605 s += UTF8SKIP(s); 606 } 607 } 608 if (grows || hibit) { 609 sv_setpvn(sv, (char*)dstart, d - dstart); 610 Safefree(dstart); 611 if (grows && hibit) 612 Safefree(start); 613 } 614 else { 615 *d = '\0'; 616 SvCUR_set(sv, d - dstart); 617 } 618 SvUTF8_on(sv); 619 SvSETMAGIC(sv); 620 621 return matches; 622 } 623 624 I32 625 Perl_do_trans(pTHX_ SV *sv) 626 { 627 dVAR; 628 STRLEN len; 629 const I32 hasutf = (PL_op->op_private & 630 (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)); 631 632 PERL_ARGS_ASSERT_DO_TRANS; 633 634 if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL)) { 635 if (!SvIsCOW(sv)) 636 Perl_croak_no_modify(); 637 } 638 (void)SvPV_const(sv, len); 639 if (!len) 640 return 0; 641 if (!(PL_op->op_private & OPpTRANS_IDENTICAL)) { 642 if (!SvPOKp(sv) || SvTHINKFIRST(sv)) 643 (void)SvPV_force_nomg(sv, len); 644 (void)SvPOK_only_UTF8(sv); 645 } 646 647 DEBUG_t( Perl_deb(aTHX_ "2.TBL\n")); 648 649 switch (PL_op->op_private & ~hasutf & ( 650 OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL| 651 OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) { 652 case 0: 653 if (hasutf) 654 return do_trans_simple_utf8(sv); 655 else 656 return do_trans_simple(sv); 657 658 case OPpTRANS_IDENTICAL: 659 case OPpTRANS_IDENTICAL|OPpTRANS_COMPLEMENT: 660 if (hasutf) 661 return do_trans_count_utf8(sv); 662 else 663 return do_trans_count(sv); 664 665 default: 666 if (hasutf) 667 return do_trans_complex_utf8(sv); 668 else 669 return do_trans_complex(sv); 670 } 671 } 672 673 void 674 Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp) 675 { 676 dVAR; 677 SV ** const oldmark = mark; 678 I32 items = sp - mark; 679 STRLEN len; 680 STRLEN delimlen; 681 682 PERL_ARGS_ASSERT_DO_JOIN; 683 684 (void) SvPV_const(delim, delimlen); /* stringify and get the delimlen */ 685 /* SvCUR assumes it's SvPOK() and woe betide you if it's not. */ 686 687 mark++; 688 len = (items > 0 ? (delimlen * (items - 1) ) : 0); 689 SvUPGRADE(sv, SVt_PV); 690 if (SvLEN(sv) < len + items) { /* current length is way too short */ 691 while (items-- > 0) { 692 if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) { 693 STRLEN tmplen; 694 SvPV_const(*mark, tmplen); 695 len += tmplen; 696 } 697 mark++; 698 } 699 SvGROW(sv, len + 1); /* so try to pre-extend */ 700 701 mark = oldmark; 702 items = sp - mark; 703 ++mark; 704 } 705 706 sv_setpvs(sv, ""); 707 /* sv_setpv retains old UTF8ness [perl #24846] */ 708 SvUTF8_off(sv); 709 710 if (TAINTING_get && SvMAGICAL(sv)) 711 SvTAINTED_off(sv); 712 713 if (items-- > 0) { 714 if (*mark) 715 sv_catsv(sv, *mark); 716 mark++; 717 } 718 719 if (delimlen) { 720 for (; items > 0; items--,mark++) { 721 sv_catsv_nomg(sv,delim); 722 sv_catsv(sv,*mark); 723 } 724 } 725 else { 726 for (; items > 0; items--,mark++) 727 sv_catsv(sv,*mark); 728 } 729 SvSETMAGIC(sv); 730 } 731 732 void 733 Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg) 734 { 735 dVAR; 736 STRLEN patlen; 737 const char * const pat = SvPV_const(*sarg, patlen); 738 bool do_taint = FALSE; 739 740 PERL_ARGS_ASSERT_DO_SPRINTF; 741 742 if (SvTAINTED(*sarg)) 743 TAINT_PROPER( 744 (PL_op && PL_op->op_type < OP_max) 745 ? (PL_op->op_type == OP_PRTF) 746 ? "printf" 747 : PL_op_name[PL_op->op_type] 748 : "(unknown)" 749 ); 750 SvUTF8_off(sv); 751 if (DO_UTF8(*sarg)) 752 SvUTF8_on(sv); 753 sv_vsetpvfn(sv, pat, patlen, NULL, sarg + 1, len - 1, &do_taint); 754 SvSETMAGIC(sv); 755 if (do_taint) 756 SvTAINTED_on(sv); 757 } 758 759 /* currently converts input to bytes if possible, but doesn't sweat failure */ 760 UV 761 Perl_do_vecget(pTHX_ SV *sv, SSize_t offset, int size) 762 { 763 dVAR; 764 STRLEN srclen, len, uoffset, bitoffs = 0; 765 const unsigned char *s = (const unsigned char *) SvPV_flags_const(sv, srclen, 766 SV_GMAGIC | ((PL_op->op_flags & OPf_MOD || LVRET) 767 ? SV_UNDEF_RETURNS_NULL : 0)); 768 UV retnum = 0; 769 770 if (!s) { 771 s = (const unsigned char *)""; 772 } 773 774 PERL_ARGS_ASSERT_DO_VECGET; 775 776 if (offset < 0) 777 return 0; 778 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ 779 Perl_croak(aTHX_ "Illegal number of bits in vec"); 780 781 if (SvUTF8(sv)) 782 (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE); 783 784 if (size < 8) { 785 bitoffs = ((offset%8)*size)%8; 786 uoffset = offset/(8/size); 787 } 788 else if (size > 8) 789 uoffset = offset*(size/8); 790 else 791 uoffset = offset; 792 793 len = uoffset + (bitoffs + size + 7)/8; /* required number of bytes */ 794 if (len > srclen) { 795 if (size <= 8) 796 retnum = 0; 797 else { 798 if (size == 16) { 799 if (uoffset >= srclen) 800 retnum = 0; 801 else 802 retnum = (UV) s[uoffset] << 8; 803 } 804 else if (size == 32) { 805 if (uoffset >= srclen) 806 retnum = 0; 807 else if (uoffset + 1 >= srclen) 808 retnum = 809 ((UV) s[uoffset ] << 24); 810 else if (uoffset + 2 >= srclen) 811 retnum = 812 ((UV) s[uoffset ] << 24) + 813 ((UV) s[uoffset + 1] << 16); 814 else 815 retnum = 816 ((UV) s[uoffset ] << 24) + 817 ((UV) s[uoffset + 1] << 16) + 818 ( s[uoffset + 2] << 8); 819 } 820 #ifdef UV_IS_QUAD 821 else if (size == 64) { 822 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 823 "Bit vector size > 32 non-portable"); 824 if (uoffset >= srclen) 825 retnum = 0; 826 else if (uoffset + 1 >= srclen) 827 retnum = 828 (UV) s[uoffset ] << 56; 829 else if (uoffset + 2 >= srclen) 830 retnum = 831 ((UV) s[uoffset ] << 56) + 832 ((UV) s[uoffset + 1] << 48); 833 else if (uoffset + 3 >= srclen) 834 retnum = 835 ((UV) s[uoffset ] << 56) + 836 ((UV) s[uoffset + 1] << 48) + 837 ((UV) s[uoffset + 2] << 40); 838 else if (uoffset + 4 >= srclen) 839 retnum = 840 ((UV) s[uoffset ] << 56) + 841 ((UV) s[uoffset + 1] << 48) + 842 ((UV) s[uoffset + 2] << 40) + 843 ((UV) s[uoffset + 3] << 32); 844 else if (uoffset + 5 >= srclen) 845 retnum = 846 ((UV) s[uoffset ] << 56) + 847 ((UV) s[uoffset + 1] << 48) + 848 ((UV) s[uoffset + 2] << 40) + 849 ((UV) s[uoffset + 3] << 32) + 850 ( s[uoffset + 4] << 24); 851 else if (uoffset + 6 >= srclen) 852 retnum = 853 ((UV) s[uoffset ] << 56) + 854 ((UV) s[uoffset + 1] << 48) + 855 ((UV) s[uoffset + 2] << 40) + 856 ((UV) s[uoffset + 3] << 32) + 857 ((UV) s[uoffset + 4] << 24) + 858 ((UV) s[uoffset + 5] << 16); 859 else 860 retnum = 861 ((UV) s[uoffset ] << 56) + 862 ((UV) s[uoffset + 1] << 48) + 863 ((UV) s[uoffset + 2] << 40) + 864 ((UV) s[uoffset + 3] << 32) + 865 ((UV) s[uoffset + 4] << 24) + 866 ((UV) s[uoffset + 5] << 16) + 867 ( s[uoffset + 6] << 8); 868 } 869 #endif 870 } 871 } 872 else if (size < 8) 873 retnum = (s[uoffset] >> bitoffs) & ((1 << size) - 1); 874 else { 875 if (size == 8) 876 retnum = s[uoffset]; 877 else if (size == 16) 878 retnum = 879 ((UV) s[uoffset] << 8) + 880 s[uoffset + 1]; 881 else if (size == 32) 882 retnum = 883 ((UV) s[uoffset ] << 24) + 884 ((UV) s[uoffset + 1] << 16) + 885 ( s[uoffset + 2] << 8) + 886 s[uoffset + 3]; 887 #ifdef UV_IS_QUAD 888 else if (size == 64) { 889 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 890 "Bit vector size > 32 non-portable"); 891 retnum = 892 ((UV) s[uoffset ] << 56) + 893 ((UV) s[uoffset + 1] << 48) + 894 ((UV) s[uoffset + 2] << 40) + 895 ((UV) s[uoffset + 3] << 32) + 896 ((UV) s[uoffset + 4] << 24) + 897 ((UV) s[uoffset + 5] << 16) + 898 ( s[uoffset + 6] << 8) + 899 s[uoffset + 7]; 900 } 901 #endif 902 } 903 904 return retnum; 905 } 906 907 /* currently converts input to bytes if possible but doesn't sweat failures, 908 * although it does ensure that the string it clobbers is not marked as 909 * utf8-valid any more 910 */ 911 void 912 Perl_do_vecset(pTHX_ SV *sv) 913 { 914 dVAR; 915 SSize_t offset, bitoffs = 0; 916 int size; 917 unsigned char *s; 918 UV lval; 919 I32 mask; 920 STRLEN targlen; 921 STRLEN len; 922 SV * const targ = LvTARG(sv); 923 924 PERL_ARGS_ASSERT_DO_VECSET; 925 926 if (!targ) 927 return; 928 s = (unsigned char*)SvPV_force_flags(targ, targlen, 929 SV_GMAGIC | SV_UNDEF_RETURNS_NULL); 930 if (SvUTF8(targ)) { 931 /* This is handled by the SvPOK_only below... 932 if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE)) 933 SvUTF8_off(targ); 934 */ 935 (void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE); 936 } 937 938 (void)SvPOK_only(targ); 939 lval = SvUV(sv); 940 offset = LvTARGOFF(sv); 941 if (offset < 0) 942 Perl_croak(aTHX_ "Negative offset to vec in lvalue context"); 943 size = LvTARGLEN(sv); 944 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ 945 Perl_croak(aTHX_ "Illegal number of bits in vec"); 946 947 if (size < 8) { 948 bitoffs = ((offset%8)*size)%8; 949 offset /= 8/size; 950 } 951 else if (size > 8) 952 offset *= size/8; 953 954 len = offset + (bitoffs + size + 7)/8; /* required number of bytes */ 955 if (len > targlen) { 956 s = (unsigned char*)SvGROW(targ, len + 1); 957 (void)memzero((char *)(s + targlen), len - targlen + 1); 958 SvCUR_set(targ, len); 959 } 960 961 if (size < 8) { 962 mask = (1 << size) - 1; 963 lval &= mask; 964 s[offset] &= ~(mask << bitoffs); 965 s[offset] |= lval << bitoffs; 966 } 967 else { 968 if (size == 8) 969 s[offset ] = (U8)( lval & 0xff); 970 else if (size == 16) { 971 s[offset ] = (U8)((lval >> 8) & 0xff); 972 s[offset+1] = (U8)( lval & 0xff); 973 } 974 else if (size == 32) { 975 s[offset ] = (U8)((lval >> 24) & 0xff); 976 s[offset+1] = (U8)((lval >> 16) & 0xff); 977 s[offset+2] = (U8)((lval >> 8) & 0xff); 978 s[offset+3] = (U8)( lval & 0xff); 979 } 980 #ifdef UV_IS_QUAD 981 else if (size == 64) { 982 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 983 "Bit vector size > 32 non-portable"); 984 s[offset ] = (U8)((lval >> 56) & 0xff); 985 s[offset+1] = (U8)((lval >> 48) & 0xff); 986 s[offset+2] = (U8)((lval >> 40) & 0xff); 987 s[offset+3] = (U8)((lval >> 32) & 0xff); 988 s[offset+4] = (U8)((lval >> 24) & 0xff); 989 s[offset+5] = (U8)((lval >> 16) & 0xff); 990 s[offset+6] = (U8)((lval >> 8) & 0xff); 991 s[offset+7] = (U8)( lval & 0xff); 992 } 993 #endif 994 } 995 SvSETMAGIC(targ); 996 } 997 998 void 999 Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) 1000 { 1001 dVAR; 1002 #ifdef LIBERAL 1003 long *dl; 1004 long *ll; 1005 long *rl; 1006 #endif 1007 char *dc; 1008 STRLEN leftlen; 1009 STRLEN rightlen; 1010 const char *lc; 1011 const char *rc; 1012 STRLEN len; 1013 STRLEN lensave; 1014 const char *lsave; 1015 const char *rsave; 1016 bool left_utf; 1017 bool right_utf; 1018 STRLEN needlen = 0; 1019 1020 PERL_ARGS_ASSERT_DO_VOP; 1021 1022 if (sv != left || (optype != OP_BIT_AND && !SvOK(sv))) 1023 sv_setpvs(sv, ""); /* avoid undef warning on |= and ^= */ 1024 if (sv == left) { 1025 lsave = lc = SvPV_force_nomg(left, leftlen); 1026 } 1027 else { 1028 lsave = lc = SvPV_nomg_const(left, leftlen); 1029 SvPV_force_nomg_nolen(sv); 1030 } 1031 rsave = rc = SvPV_nomg_const(right, rightlen); 1032 1033 /* This need to come after SvPV to ensure that string overloading has 1034 fired off. */ 1035 1036 left_utf = DO_UTF8(left); 1037 right_utf = DO_UTF8(right); 1038 1039 if (left_utf && !right_utf) { 1040 /* Avoid triggering overloading again by using temporaries. 1041 Maybe there should be a variant of sv_utf8_upgrade that takes pvn 1042 */ 1043 right = newSVpvn_flags(rsave, rightlen, SVs_TEMP); 1044 sv_utf8_upgrade(right); 1045 rsave = rc = SvPV_nomg_const(right, rightlen); 1046 right_utf = TRUE; 1047 } 1048 else if (!left_utf && right_utf) { 1049 left = newSVpvn_flags(lsave, leftlen, SVs_TEMP); 1050 sv_utf8_upgrade(left); 1051 lsave = lc = SvPV_nomg_const(left, leftlen); 1052 left_utf = TRUE; 1053 } 1054 1055 len = leftlen < rightlen ? leftlen : rightlen; 1056 lensave = len; 1057 SvCUR_set(sv, len); 1058 (void)SvPOK_only(sv); 1059 if ((left_utf || right_utf) && (sv == left || sv == right)) { 1060 needlen = optype == OP_BIT_AND ? len : leftlen + rightlen; 1061 Newxz(dc, needlen + 1, char); 1062 } 1063 else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) { 1064 dc = SvPV_force_nomg_nolen(sv); 1065 if (SvLEN(sv) < len + 1) { 1066 dc = SvGROW(sv, len + 1); 1067 (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1); 1068 } 1069 if (optype != OP_BIT_AND && (left_utf || right_utf)) 1070 dc = SvGROW(sv, leftlen + rightlen + 1); 1071 } 1072 else { 1073 needlen = optype == OP_BIT_AND 1074 ? len : (leftlen > rightlen ? leftlen : rightlen); 1075 Newxz(dc, needlen + 1, char); 1076 sv_usepvn_flags(sv, dc, needlen, SV_HAS_TRAILING_NUL); 1077 dc = SvPVX(sv); /* sv_usepvn() calls Renew() */ 1078 } 1079 if (left_utf || right_utf) { 1080 UV duc, luc, ruc; 1081 char *dcorig = dc; 1082 char *dcsave = NULL; 1083 STRLEN lulen = leftlen; 1084 STRLEN rulen = rightlen; 1085 STRLEN ulen; 1086 1087 switch (optype) { 1088 case OP_BIT_AND: 1089 while (lulen && rulen) { 1090 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); 1091 lc += ulen; 1092 lulen -= ulen; 1093 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); 1094 rc += ulen; 1095 rulen -= ulen; 1096 duc = luc & ruc; 1097 dc = (char*)uvchr_to_utf8((U8*)dc, duc); 1098 } 1099 if (sv == left || sv == right) 1100 (void)sv_usepvn(sv, dcorig, needlen); 1101 SvCUR_set(sv, dc - dcorig); 1102 break; 1103 case OP_BIT_XOR: 1104 while (lulen && rulen) { 1105 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); 1106 lc += ulen; 1107 lulen -= ulen; 1108 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); 1109 rc += ulen; 1110 rulen -= ulen; 1111 duc = luc ^ ruc; 1112 dc = (char*)uvchr_to_utf8((U8*)dc, duc); 1113 } 1114 goto mop_up_utf; 1115 case OP_BIT_OR: 1116 while (lulen && rulen) { 1117 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); 1118 lc += ulen; 1119 lulen -= ulen; 1120 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); 1121 rc += ulen; 1122 rulen -= ulen; 1123 duc = luc | ruc; 1124 dc = (char*)uvchr_to_utf8((U8*)dc, duc); 1125 } 1126 mop_up_utf: 1127 if (rulen) 1128 dcsave = savepvn(rc, rulen); 1129 else if (lulen) 1130 dcsave = savepvn(lc, lulen); 1131 if (sv == left || sv == right) 1132 (void)sv_usepvn(sv, dcorig, needlen); /* uses Renew(); defaults to nomg */ 1133 SvCUR_set(sv, dc - dcorig); 1134 if (rulen) 1135 sv_catpvn_nomg(sv, dcsave, rulen); 1136 else if (lulen) 1137 sv_catpvn_nomg(sv, dcsave, lulen); 1138 else 1139 *SvEND(sv) = '\0'; 1140 Safefree(dcsave); 1141 break; 1142 default: 1143 if (sv == left || sv == right) 1144 Safefree(dcorig); 1145 Perl_croak(aTHX_ "panic: do_vop called for op %u (%s)", 1146 (unsigned)optype, PL_op_name[optype]); 1147 } 1148 SvUTF8_on(sv); 1149 goto finish; 1150 } 1151 else 1152 #ifdef LIBERAL 1153 if (len >= sizeof(long)*4 && 1154 !((unsigned long)dc % sizeof(long)) && 1155 !((unsigned long)lc % sizeof(long)) && 1156 !((unsigned long)rc % sizeof(long))) /* It's almost always aligned... */ 1157 { 1158 const STRLEN remainder = len % (sizeof(long)*4); 1159 len /= (sizeof(long)*4); 1160 1161 dl = (long*)dc; 1162 ll = (long*)lc; 1163 rl = (long*)rc; 1164 1165 switch (optype) { 1166 case OP_BIT_AND: 1167 while (len--) { 1168 *dl++ = *ll++ & *rl++; 1169 *dl++ = *ll++ & *rl++; 1170 *dl++ = *ll++ & *rl++; 1171 *dl++ = *ll++ & *rl++; 1172 } 1173 break; 1174 case OP_BIT_XOR: 1175 while (len--) { 1176 *dl++ = *ll++ ^ *rl++; 1177 *dl++ = *ll++ ^ *rl++; 1178 *dl++ = *ll++ ^ *rl++; 1179 *dl++ = *ll++ ^ *rl++; 1180 } 1181 break; 1182 case OP_BIT_OR: 1183 while (len--) { 1184 *dl++ = *ll++ | *rl++; 1185 *dl++ = *ll++ | *rl++; 1186 *dl++ = *ll++ | *rl++; 1187 *dl++ = *ll++ | *rl++; 1188 } 1189 } 1190 1191 dc = (char*)dl; 1192 lc = (char*)ll; 1193 rc = (char*)rl; 1194 1195 len = remainder; 1196 } 1197 #endif 1198 { 1199 switch (optype) { 1200 case OP_BIT_AND: 1201 while (len--) 1202 *dc++ = *lc++ & *rc++; 1203 *dc = '\0'; 1204 break; 1205 case OP_BIT_XOR: 1206 while (len--) 1207 *dc++ = *lc++ ^ *rc++; 1208 goto mop_up; 1209 case OP_BIT_OR: 1210 while (len--) 1211 *dc++ = *lc++ | *rc++; 1212 mop_up: 1213 len = lensave; 1214 if (rightlen > len) 1215 sv_catpvn_nomg(sv, rsave + len, rightlen - len); 1216 else if (leftlen > (STRLEN)len) 1217 sv_catpvn_nomg(sv, lsave + len, leftlen - len); 1218 else 1219 *SvEND(sv) = '\0'; 1220 break; 1221 } 1222 } 1223 finish: 1224 SvTAINT(sv); 1225 } 1226 1227 OP * 1228 Perl_do_kv(pTHX) 1229 { 1230 dVAR; 1231 dSP; 1232 HV * const keys = MUTABLE_HV(POPs); 1233 HE *entry; 1234 const I32 gimme = GIMME_V; 1235 const I32 dokv = (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV); 1236 /* op_type is OP_RKEYS/OP_RVALUES if pp_rkeys delegated to here */ 1237 const I32 dokeys = dokv || (PL_op->op_type == OP_KEYS || PL_op->op_type == OP_RKEYS); 1238 const I32 dovalues = dokv || (PL_op->op_type == OP_VALUES || PL_op->op_type == OP_RVALUES); 1239 1240 (void)hv_iterinit(keys); /* always reset iterator regardless */ 1241 1242 if (gimme == G_VOID) 1243 RETURN; 1244 1245 if (gimme == G_SCALAR) { 1246 if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */ 1247 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ 1248 sv_magic(ret, NULL, PERL_MAGIC_nkeys, NULL, 0); 1249 LvTYPE(ret) = 'k'; 1250 LvTARG(ret) = SvREFCNT_inc_simple(keys); 1251 PUSHs(ret); 1252 } 1253 else { 1254 IV i; 1255 dTARGET; 1256 1257 if (! SvTIED_mg((const SV *)keys, PERL_MAGIC_tied) ) { 1258 i = HvUSEDKEYS(keys); 1259 } 1260 else { 1261 i = 0; 1262 while (hv_iternext(keys)) i++; 1263 } 1264 PUSHi( i ); 1265 } 1266 RETURN; 1267 } 1268 1269 EXTEND(SP, HvUSEDKEYS(keys) * (dokeys + dovalues)); 1270 1271 PUTBACK; /* hv_iternext and hv_iterval might clobber stack_sp */ 1272 while ((entry = hv_iternext(keys))) { 1273 SPAGAIN; 1274 if (dokeys) { 1275 SV* const sv = hv_iterkeysv(entry); 1276 XPUSHs(sv); /* won't clobber stack_sp */ 1277 } 1278 if (dovalues) { 1279 SV *tmpstr; 1280 PUTBACK; 1281 tmpstr = hv_iterval(keys,entry); 1282 DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu", 1283 (unsigned long)HeHASH(entry), 1284 (int)HvMAX(keys)+1, 1285 (unsigned long)(HeHASH(entry) & HvMAX(keys)))); 1286 SPAGAIN; 1287 XPUSHs(tmpstr); 1288 } 1289 PUTBACK; 1290 } 1291 return NORMAL; 1292 } 1293 1294 /* 1295 * Local variables: 1296 * c-indentation-style: bsd 1297 * c-basic-offset: 4 1298 * indent-tabs-mode: nil 1299 * End: 1300 * 1301 * ex: set ts=8 sts=4 sw=4 et: 1302 */ 1303