1 /* doop.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * 'So that was the job I felt I had to do when I started,' thought Sam. 13 * 14 * [p.934 of _The Lord of the Rings_, VI/iii: "Mount Doom"] 15 */ 16 17 /* This file contains some common functions needed to carry out certain 18 * ops. For example, both pp_sprintf() and pp_prtf() call the function 19 * do_sprintf() found in this file. 20 */ 21 22 #include "EXTERN.h" 23 #define PERL_IN_DOOP_C 24 #include "perl.h" 25 26 #ifndef PERL_MICRO 27 #include <signal.h> 28 #endif 29 30 STATIC I32 31 S_do_trans_simple(pTHX_ SV * const sv) 32 { 33 I32 matches = 0; 34 STRLEN len; 35 U8 *s = (U8*)SvPV_nomg(sv,len); 36 U8 * const send = s+len; 37 const short * const tbl = (short*)cPVOP->op_pv; 38 39 PERL_ARGS_ASSERT_DO_TRANS_SIMPLE; 40 41 if (!tbl) 42 Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__); 43 44 /* First, take care of non-UTF-8 input strings, because they're easy */ 45 if (!SvUTF8(sv)) { 46 while (s < send) { 47 const I32 ch = tbl[*s]; 48 if (ch >= 0) { 49 matches++; 50 *s = (U8)ch; 51 } 52 s++; 53 } 54 SvSETMAGIC(sv); 55 } 56 else { 57 const I32 grows = PL_op->op_private & OPpTRANS_GROWS; 58 U8 *d; 59 U8 *dstart; 60 61 /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */ 62 if (grows) 63 Newx(d, len*2+1, U8); 64 else 65 d = s; 66 dstart = d; 67 while (s < send) { 68 STRLEN ulen; 69 I32 ch; 70 71 /* Need to check this, otherwise 128..255 won't match */ 72 const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT); 73 if (c < 0x100 && (ch = tbl[c]) >= 0) { 74 matches++; 75 d = uvchr_to_utf8(d, ch); 76 s += ulen; 77 } 78 else { /* No match -> copy */ 79 Move(s, d, ulen, U8); 80 d += ulen; 81 s += ulen; 82 } 83 } 84 if (grows) { 85 sv_setpvn(sv, (char*)dstart, d - dstart); 86 Safefree(dstart); 87 } 88 else { 89 *d = '\0'; 90 SvCUR_set(sv, d - dstart); 91 } 92 SvUTF8_on(sv); 93 SvSETMAGIC(sv); 94 } 95 return matches; 96 } 97 98 STATIC I32 99 S_do_trans_count(pTHX_ SV * const sv) 100 { 101 STRLEN len; 102 const U8 *s = (const U8*)SvPV_nomg_const(sv, len); 103 const U8 * const send = s + len; 104 I32 matches = 0; 105 const short * const tbl = (short*)cPVOP->op_pv; 106 107 PERL_ARGS_ASSERT_DO_TRANS_COUNT; 108 109 if (!tbl) 110 Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__); 111 112 if (!SvUTF8(sv)) { 113 while (s < send) { 114 if (tbl[*s++] >= 0) 115 matches++; 116 } 117 } 118 else { 119 const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT; 120 while (s < send) { 121 STRLEN ulen; 122 const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT); 123 if (c < 0x100) { 124 if (tbl[c] >= 0) 125 matches++; 126 } else if (complement) 127 matches++; 128 s += ulen; 129 } 130 } 131 132 return matches; 133 } 134 135 STATIC I32 136 S_do_trans_complex(pTHX_ SV * const sv) 137 { 138 STRLEN len; 139 U8 *s = (U8*)SvPV_nomg(sv, len); 140 U8 * const send = s+len; 141 I32 matches = 0; 142 const short * const tbl = (short*)cPVOP->op_pv; 143 144 PERL_ARGS_ASSERT_DO_TRANS_COMPLEX; 145 146 if (!tbl) 147 Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__); 148 149 if (!SvUTF8(sv)) { 150 U8 *d = s; 151 U8 * const dstart = d; 152 153 if (PL_op->op_private & OPpTRANS_SQUASH) { 154 const U8* p = send; 155 while (s < send) { 156 const I32 ch = tbl[*s]; 157 if (ch >= 0) { 158 *d = (U8)ch; 159 matches++; 160 if (p != d - 1 || *p != *d) 161 p = d++; 162 } 163 else if (ch == -1) /* -1 is unmapped character */ 164 *d++ = *s; 165 else if (ch == -2) /* -2 is delete character */ 166 matches++; 167 s++; 168 } 169 } 170 else { 171 while (s < send) { 172 const I32 ch = tbl[*s]; 173 if (ch >= 0) { 174 matches++; 175 *d++ = (U8)ch; 176 } 177 else if (ch == -1) /* -1 is unmapped character */ 178 *d++ = *s; 179 else if (ch == -2) /* -2 is delete character */ 180 matches++; 181 s++; 182 } 183 } 184 *d = '\0'; 185 SvCUR_set(sv, d - dstart); 186 } 187 else { /* is utf8 */ 188 const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT; 189 const I32 grows = PL_op->op_private & OPpTRANS_GROWS; 190 const I32 del = PL_op->op_private & OPpTRANS_DELETE; 191 U8 *d; 192 U8 *dstart; 193 STRLEN rlen = 0; 194 195 if (grows) 196 Newx(d, len*2+1, U8); 197 else 198 d = s; 199 dstart = d; 200 if (complement && !del) 201 rlen = tbl[0x100]; 202 203 if (PL_op->op_private & OPpTRANS_SQUASH) { 204 UV pch = 0xfeedface; 205 while (s < send) { 206 STRLEN len; 207 const UV comp = utf8n_to_uvchr(s, send - s, &len, 208 UTF8_ALLOW_DEFAULT); 209 I32 ch; 210 211 if (comp > 0xff) { 212 if (!complement) { 213 Move(s, d, len, U8); 214 d += len; 215 } 216 else { 217 matches++; 218 if (!del) { 219 ch = (rlen == 0) ? (I32)comp : 220 (comp - 0x100 < rlen) ? 221 tbl[comp+1] : tbl[0x100+rlen]; 222 if ((UV)ch != pch) { 223 d = uvchr_to_utf8(d, ch); 224 pch = (UV)ch; 225 } 226 s += len; 227 continue; 228 } 229 } 230 } 231 else if ((ch = tbl[comp]) >= 0) { 232 matches++; 233 if ((UV)ch != pch) { 234 d = uvchr_to_utf8(d, ch); 235 pch = (UV)ch; 236 } 237 s += len; 238 continue; 239 } 240 else if (ch == -1) { /* -1 is unmapped character */ 241 Move(s, d, len, U8); 242 d += len; 243 } 244 else if (ch == -2) /* -2 is delete character */ 245 matches++; 246 s += len; 247 pch = 0xfeedface; 248 } 249 } 250 else { 251 while (s < send) { 252 STRLEN len; 253 const UV comp = utf8n_to_uvchr(s, send - s, &len, 254 UTF8_ALLOW_DEFAULT); 255 I32 ch; 256 if (comp > 0xff) { 257 if (!complement) { 258 Move(s, d, len, U8); 259 d += len; 260 } 261 else { 262 matches++; 263 if (!del) { 264 if (comp - 0x100 < rlen) 265 d = uvchr_to_utf8(d, tbl[comp+1]); 266 else 267 d = uvchr_to_utf8(d, tbl[0x100+rlen]); 268 } 269 } 270 } 271 else if ((ch = tbl[comp]) >= 0) { 272 d = uvchr_to_utf8(d, ch); 273 matches++; 274 } 275 else if (ch == -1) { /* -1 is unmapped character */ 276 Move(s, d, len, U8); 277 d += len; 278 } 279 else if (ch == -2) /* -2 is delete character */ 280 matches++; 281 s += len; 282 } 283 } 284 if (grows) { 285 sv_setpvn(sv, (char*)dstart, d - dstart); 286 Safefree(dstart); 287 } 288 else { 289 *d = '\0'; 290 SvCUR_set(sv, d - dstart); 291 } 292 SvUTF8_on(sv); 293 } 294 SvSETMAGIC(sv); 295 return matches; 296 } 297 298 STATIC I32 299 S_do_trans_simple_utf8(pTHX_ SV * const sv) 300 { 301 U8 *s; 302 U8 *send; 303 U8 *d; 304 U8 *start; 305 U8 *dstart, *dend; 306 I32 matches = 0; 307 const I32 grows = PL_op->op_private & OPpTRANS_GROWS; 308 STRLEN len; 309 SV* const rv = 310 #ifdef USE_ITHREADS 311 PAD_SVl(cPADOP->op_padix); 312 #else 313 MUTABLE_SV(cSVOP->op_sv); 314 #endif 315 HV* const hv = MUTABLE_HV(SvRV(rv)); 316 SV* const * svp = hv_fetchs(hv, "NONE", FALSE); 317 const UV none = svp ? SvUV(*svp) : 0x7fffffff; 318 const UV extra = none + 1; 319 UV final = 0; 320 U8 hibit = 0; 321 322 PERL_ARGS_ASSERT_DO_TRANS_SIMPLE_UTF8; 323 324 s = (U8*)SvPV_nomg(sv, len); 325 if (!SvUTF8(sv)) { 326 const U8 *t = s; 327 const U8 * const e = s + len; 328 while (t < e) { 329 const U8 ch = *t++; 330 hibit = !NATIVE_BYTE_IS_INVARIANT(ch); 331 if (hibit) { 332 s = bytes_to_utf8(s, &len); 333 break; 334 } 335 } 336 } 337 send = s + len; 338 start = s; 339 340 svp = hv_fetchs(hv, "FINAL", FALSE); 341 if (svp) 342 final = SvUV(*svp); 343 344 if (grows) { 345 /* d needs to be bigger than s, in case e.g. upgrading is required */ 346 Newx(d, len * 3 + UTF8_MAXBYTES, U8); 347 dend = d + len * 3; 348 dstart = d; 349 } 350 else { 351 dstart = d = s; 352 dend = d + len; 353 } 354 355 while (s < send) { 356 const UV uv = swash_fetch(rv, s, TRUE); 357 if (uv < none) { 358 s += UTF8SKIP(s); 359 matches++; 360 d = uvchr_to_utf8(d, uv); 361 } 362 else if (uv == none) { 363 const int i = UTF8SKIP(s); 364 Move(s, d, i, U8); 365 d += i; 366 s += i; 367 } 368 else if (uv == extra) { 369 s += UTF8SKIP(s); 370 matches++; 371 d = uvchr_to_utf8(d, final); 372 } 373 else 374 s += UTF8SKIP(s); 375 376 if (d > dend) { 377 const STRLEN clen = d - dstart; 378 const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES; 379 if (!grows) 380 Perl_croak(aTHX_ "panic: do_trans_simple_utf8 line %d",__LINE__); 381 Renew(dstart, nlen + UTF8_MAXBYTES, U8); 382 d = dstart + clen; 383 dend = dstart + nlen; 384 } 385 } 386 if (grows || hibit) { 387 sv_setpvn(sv, (char*)dstart, d - dstart); 388 Safefree(dstart); 389 if (grows && hibit) 390 Safefree(start); 391 } 392 else { 393 *d = '\0'; 394 SvCUR_set(sv, d - dstart); 395 } 396 SvSETMAGIC(sv); 397 SvUTF8_on(sv); 398 399 return matches; 400 } 401 402 STATIC I32 403 S_do_trans_count_utf8(pTHX_ SV * const sv) 404 { 405 const U8 *s; 406 const U8 *start = NULL; 407 const U8 *send; 408 I32 matches = 0; 409 STRLEN len; 410 SV* const rv = 411 #ifdef USE_ITHREADS 412 PAD_SVl(cPADOP->op_padix); 413 #else 414 MUTABLE_SV(cSVOP->op_sv); 415 #endif 416 HV* const hv = MUTABLE_HV(SvRV(rv)); 417 SV* const * const svp = hv_fetchs(hv, "NONE", FALSE); 418 const UV none = svp ? SvUV(*svp) : 0x7fffffff; 419 const UV extra = none + 1; 420 U8 hibit = 0; 421 422 PERL_ARGS_ASSERT_DO_TRANS_COUNT_UTF8; 423 424 s = (const U8*)SvPV_nomg_const(sv, len); 425 if (!SvUTF8(sv)) { 426 const U8 *t = s; 427 const U8 * const e = s + len; 428 while (t < e) { 429 const U8 ch = *t++; 430 hibit = !NATIVE_BYTE_IS_INVARIANT(ch); 431 if (hibit) { 432 start = s = bytes_to_utf8(s, &len); 433 break; 434 } 435 } 436 } 437 send = s + len; 438 439 while (s < send) { 440 const UV uv = swash_fetch(rv, s, TRUE); 441 if (uv < none || uv == extra) 442 matches++; 443 s += UTF8SKIP(s); 444 } 445 if (hibit) 446 Safefree(start); 447 448 return matches; 449 } 450 451 STATIC I32 452 S_do_trans_complex_utf8(pTHX_ SV * const sv) 453 { 454 U8 *start, *send; 455 U8 *d; 456 I32 matches = 0; 457 const I32 squash = PL_op->op_private & OPpTRANS_SQUASH; 458 const I32 del = PL_op->op_private & OPpTRANS_DELETE; 459 const I32 grows = PL_op->op_private & OPpTRANS_GROWS; 460 SV* const rv = 461 #ifdef USE_ITHREADS 462 PAD_SVl(cPADOP->op_padix); 463 #else 464 MUTABLE_SV(cSVOP->op_sv); 465 #endif 466 HV * const hv = MUTABLE_HV(SvRV(rv)); 467 SV * const *svp = hv_fetchs(hv, "NONE", FALSE); 468 const UV none = svp ? SvUV(*svp) : 0x7fffffff; 469 const UV extra = none + 1; 470 UV final = 0; 471 bool havefinal = FALSE; 472 STRLEN len; 473 U8 *dstart, *dend; 474 U8 hibit = 0; 475 U8 *s = (U8*)SvPV_nomg(sv, len); 476 477 PERL_ARGS_ASSERT_DO_TRANS_COMPLEX_UTF8; 478 479 if (!SvUTF8(sv)) { 480 const U8 *t = s; 481 const U8 * const e = s + len; 482 while (t < e) { 483 const U8 ch = *t++; 484 hibit = !NATIVE_BYTE_IS_INVARIANT(ch); 485 if (hibit) { 486 s = bytes_to_utf8(s, &len); 487 break; 488 } 489 } 490 } 491 send = s + len; 492 start = s; 493 494 svp = hv_fetchs(hv, "FINAL", FALSE); 495 if (svp) { 496 final = SvUV(*svp); 497 havefinal = TRUE; 498 } 499 500 if (grows) { 501 /* d needs to be bigger than s, in case e.g. upgrading is required */ 502 Newx(d, len * 3 + UTF8_MAXBYTES, U8); 503 dend = d + len * 3; 504 dstart = d; 505 } 506 else { 507 dstart = d = s; 508 dend = d + len; 509 } 510 511 if (squash) { 512 UV puv = 0xfeedface; 513 while (s < send) { 514 UV uv = swash_fetch(rv, s, TRUE); 515 516 if (d > dend) { 517 const STRLEN clen = d - dstart; 518 const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES; 519 if (!grows) 520 Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__); 521 Renew(dstart, nlen + UTF8_MAXBYTES, U8); 522 d = dstart + clen; 523 dend = dstart + nlen; 524 } 525 if (uv < none) { 526 matches++; 527 s += UTF8SKIP(s); 528 if (uv != puv) { 529 d = uvchr_to_utf8(d, uv); 530 puv = uv; 531 } 532 continue; 533 } 534 else if (uv == none) { /* "none" is unmapped character */ 535 const int i = UTF8SKIP(s); 536 Move(s, d, i, U8); 537 d += i; 538 s += i; 539 puv = 0xfeedface; 540 continue; 541 } 542 else if (uv == extra && !del) { 543 matches++; 544 if (havefinal) { 545 s += UTF8SKIP(s); 546 if (puv != final) { 547 d = uvchr_to_utf8(d, final); 548 puv = final; 549 } 550 } 551 else { 552 STRLEN len; 553 uv = utf8n_to_uvchr(s, send - s, &len, UTF8_ALLOW_DEFAULT); 554 if (uv != puv) { 555 Move(s, d, len, U8); 556 d += len; 557 puv = uv; 558 } 559 s += len; 560 } 561 continue; 562 } 563 matches++; /* "none+1" is delete character */ 564 s += UTF8SKIP(s); 565 } 566 } 567 else { 568 while (s < send) { 569 const UV uv = swash_fetch(rv, s, TRUE); 570 if (d > dend) { 571 const STRLEN clen = d - dstart; 572 const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES; 573 if (!grows) 574 Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__); 575 Renew(dstart, nlen + UTF8_MAXBYTES, U8); 576 d = dstart + clen; 577 dend = dstart + nlen; 578 } 579 if (uv < none) { 580 matches++; 581 s += UTF8SKIP(s); 582 d = uvchr_to_utf8(d, uv); 583 continue; 584 } 585 else if (uv == none) { /* "none" is unmapped character */ 586 const int i = UTF8SKIP(s); 587 Move(s, d, i, U8); 588 d += i; 589 s += i; 590 continue; 591 } 592 else if (uv == extra && !del) { 593 matches++; 594 s += UTF8SKIP(s); 595 d = uvchr_to_utf8(d, final); 596 continue; 597 } 598 matches++; /* "none+1" is delete character */ 599 s += UTF8SKIP(s); 600 } 601 } 602 if (grows || hibit) { 603 sv_setpvn(sv, (char*)dstart, d - dstart); 604 Safefree(dstart); 605 if (grows && hibit) 606 Safefree(start); 607 } 608 else { 609 *d = '\0'; 610 SvCUR_set(sv, d - dstart); 611 } 612 SvUTF8_on(sv); 613 SvSETMAGIC(sv); 614 615 return matches; 616 } 617 618 I32 619 Perl_do_trans(pTHX_ SV *sv) 620 { 621 STRLEN len; 622 const I32 flags = PL_op->op_private; 623 const I32 hasutf = flags & (OPpTRANS_FROM_UTF | OPpTRANS_TO_UTF); 624 625 PERL_ARGS_ASSERT_DO_TRANS; 626 627 if (SvREADONLY(sv) && !(flags & OPpTRANS_IDENTICAL)) { 628 Perl_croak_no_modify(); 629 } 630 (void)SvPV_const(sv, len); 631 if (!len) 632 return 0; 633 if (!(flags & OPpTRANS_IDENTICAL)) { 634 if (!SvPOKp(sv) || SvTHINKFIRST(sv)) 635 (void)SvPV_force_nomg(sv, len); 636 (void)SvPOK_only_UTF8(sv); 637 } 638 639 DEBUG_t( Perl_deb(aTHX_ "2.TBL\n")); 640 641 /* If we use only OPpTRANS_IDENTICAL to bypass the READONLY check, 642 * we must also rely on it to choose the readonly strategy. 643 */ 644 if (flags & OPpTRANS_IDENTICAL) { 645 return hasutf ? do_trans_count_utf8(sv) : do_trans_count(sv); 646 } else if (flags & (OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) { 647 return hasutf ? do_trans_complex_utf8(sv) : do_trans_complex(sv); 648 } else { 649 return hasutf ? do_trans_simple_utf8(sv) : do_trans_simple(sv); 650 } 651 } 652 653 void 654 Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp) 655 { 656 SV ** const oldmark = mark; 657 I32 items = sp - mark; 658 STRLEN len; 659 STRLEN delimlen; 660 const char * const delims = SvPV_const(delim, delimlen); 661 662 PERL_ARGS_ASSERT_DO_JOIN; 663 664 mark++; 665 len = (items > 0 ? (delimlen * (items - 1) ) : 0); 666 SvUPGRADE(sv, SVt_PV); 667 if (SvLEN(sv) < len + items) { /* current length is way too short */ 668 while (items-- > 0) { 669 if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) { 670 STRLEN tmplen; 671 SvPV_const(*mark, tmplen); 672 len += tmplen; 673 } 674 mark++; 675 } 676 SvGROW(sv, len + 1); /* so try to pre-extend */ 677 678 mark = oldmark; 679 items = sp - mark; 680 ++mark; 681 } 682 683 sv_setpvs(sv, ""); 684 /* sv_setpv retains old UTF8ness [perl #24846] */ 685 SvUTF8_off(sv); 686 687 if (TAINTING_get && SvMAGICAL(sv)) 688 SvTAINTED_off(sv); 689 690 if (items-- > 0) { 691 if (*mark) 692 sv_catsv(sv, *mark); 693 mark++; 694 } 695 696 if (delimlen) { 697 const U32 delimflag = DO_UTF8(delim) ? SV_CATUTF8 : SV_CATBYTES; 698 for (; items > 0; items--,mark++) { 699 STRLEN len; 700 const char *s; 701 sv_catpvn_flags(sv,delims,delimlen,delimflag); 702 s = SvPV_const(*mark,len); 703 sv_catpvn_flags(sv,s,len, 704 DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES); 705 } 706 } 707 else { 708 for (; items > 0; items--,mark++) 709 { 710 STRLEN len; 711 const char *s = SvPV_const(*mark,len); 712 sv_catpvn_flags(sv,s,len, 713 DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES); 714 } 715 } 716 SvSETMAGIC(sv); 717 } 718 719 void 720 Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg) 721 { 722 STRLEN patlen; 723 const char * const pat = SvPV_const(*sarg, patlen); 724 bool do_taint = FALSE; 725 726 PERL_ARGS_ASSERT_DO_SPRINTF; 727 728 if (SvTAINTED(*sarg)) 729 TAINT_PROPER( 730 (PL_op && PL_op->op_type < OP_max) 731 ? (PL_op->op_type == OP_PRTF) 732 ? "printf" 733 : PL_op_name[PL_op->op_type] 734 : "(unknown)" 735 ); 736 SvUTF8_off(sv); 737 if (DO_UTF8(*sarg)) 738 SvUTF8_on(sv); 739 sv_vsetpvfn(sv, pat, patlen, NULL, sarg + 1, len - 1, &do_taint); 740 SvSETMAGIC(sv); 741 if (do_taint) 742 SvTAINTED_on(sv); 743 } 744 745 /* currently converts input to bytes if possible, but doesn't sweat failure */ 746 UV 747 Perl_do_vecget(pTHX_ SV *sv, SSize_t offset, int size) 748 { 749 STRLEN srclen, len, uoffset, bitoffs = 0; 750 const I32 svpv_flags = ((PL_op->op_flags & OPf_MOD || LVRET) 751 ? SV_UNDEF_RETURNS_NULL : 0); 752 unsigned char *s = (unsigned char *) 753 SvPV_flags(sv, srclen, (svpv_flags|SV_GMAGIC)); 754 UV retnum = 0; 755 756 if (!s) { 757 s = (unsigned char *)""; 758 } 759 760 PERL_ARGS_ASSERT_DO_VECGET; 761 762 if (offset < 0) 763 return 0; 764 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ 765 Perl_croak(aTHX_ "Illegal number of bits in vec"); 766 767 if (SvUTF8(sv)) { 768 (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE); 769 /* PVX may have changed */ 770 s = (unsigned char *) SvPV_flags(sv, srclen, svpv_flags); 771 } 772 773 if (size < 8) { 774 bitoffs = ((offset%8)*size)%8; 775 uoffset = offset/(8/size); 776 } 777 else if (size > 8) 778 uoffset = offset*(size/8); 779 else 780 uoffset = offset; 781 782 len = uoffset + (bitoffs + size + 7)/8; /* required number of bytes */ 783 if (len > srclen) { 784 if (size <= 8) 785 retnum = 0; 786 else { 787 if (size == 16) { 788 if (uoffset >= srclen) 789 retnum = 0; 790 else 791 retnum = (UV) s[uoffset] << 8; 792 } 793 else if (size == 32) { 794 if (uoffset >= srclen) 795 retnum = 0; 796 else if (uoffset + 1 >= srclen) 797 retnum = 798 ((UV) s[uoffset ] << 24); 799 else if (uoffset + 2 >= srclen) 800 retnum = 801 ((UV) s[uoffset ] << 24) + 802 ((UV) s[uoffset + 1] << 16); 803 else 804 retnum = 805 ((UV) s[uoffset ] << 24) + 806 ((UV) s[uoffset + 1] << 16) + 807 ( s[uoffset + 2] << 8); 808 } 809 #ifdef UV_IS_QUAD 810 else if (size == 64) { 811 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 812 "Bit vector size > 32 non-portable"); 813 if (uoffset >= srclen) 814 retnum = 0; 815 else if (uoffset + 1 >= srclen) 816 retnum = 817 (UV) s[uoffset ] << 56; 818 else if (uoffset + 2 >= srclen) 819 retnum = 820 ((UV) s[uoffset ] << 56) + 821 ((UV) s[uoffset + 1] << 48); 822 else if (uoffset + 3 >= srclen) 823 retnum = 824 ((UV) s[uoffset ] << 56) + 825 ((UV) s[uoffset + 1] << 48) + 826 ((UV) s[uoffset + 2] << 40); 827 else if (uoffset + 4 >= srclen) 828 retnum = 829 ((UV) s[uoffset ] << 56) + 830 ((UV) s[uoffset + 1] << 48) + 831 ((UV) s[uoffset + 2] << 40) + 832 ((UV) s[uoffset + 3] << 32); 833 else if (uoffset + 5 >= srclen) 834 retnum = 835 ((UV) s[uoffset ] << 56) + 836 ((UV) s[uoffset + 1] << 48) + 837 ((UV) s[uoffset + 2] << 40) + 838 ((UV) s[uoffset + 3] << 32) + 839 ((UV) s[uoffset + 4] << 24); 840 else if (uoffset + 6 >= srclen) 841 retnum = 842 ((UV) s[uoffset ] << 56) + 843 ((UV) s[uoffset + 1] << 48) + 844 ((UV) s[uoffset + 2] << 40) + 845 ((UV) s[uoffset + 3] << 32) + 846 ((UV) s[uoffset + 4] << 24) + 847 ((UV) s[uoffset + 5] << 16); 848 else 849 retnum = 850 ((UV) s[uoffset ] << 56) + 851 ((UV) s[uoffset + 1] << 48) + 852 ((UV) s[uoffset + 2] << 40) + 853 ((UV) s[uoffset + 3] << 32) + 854 ((UV) s[uoffset + 4] << 24) + 855 ((UV) s[uoffset + 5] << 16) + 856 ((UV) s[uoffset + 6] << 8); 857 } 858 #endif 859 } 860 } 861 else if (size < 8) 862 retnum = (s[uoffset] >> bitoffs) & ((1 << size) - 1); 863 else { 864 if (size == 8) 865 retnum = s[uoffset]; 866 else if (size == 16) 867 retnum = 868 ((UV) s[uoffset] << 8) + 869 s[uoffset + 1]; 870 else if (size == 32) 871 retnum = 872 ((UV) s[uoffset ] << 24) + 873 ((UV) s[uoffset + 1] << 16) + 874 ( s[uoffset + 2] << 8) + 875 s[uoffset + 3]; 876 #ifdef UV_IS_QUAD 877 else if (size == 64) { 878 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 879 "Bit vector size > 32 non-portable"); 880 retnum = 881 ((UV) s[uoffset ] << 56) + 882 ((UV) s[uoffset + 1] << 48) + 883 ((UV) s[uoffset + 2] << 40) + 884 ((UV) s[uoffset + 3] << 32) + 885 ((UV) s[uoffset + 4] << 24) + 886 ((UV) s[uoffset + 5] << 16) + 887 ( s[uoffset + 6] << 8) + 888 s[uoffset + 7]; 889 } 890 #endif 891 } 892 893 return retnum; 894 } 895 896 /* currently converts input to bytes if possible but doesn't sweat failures, 897 * although it does ensure that the string it clobbers is not marked as 898 * utf8-valid any more 899 */ 900 void 901 Perl_do_vecset(pTHX_ SV *sv) 902 { 903 SSize_t offset, bitoffs = 0; 904 int size; 905 unsigned char *s; 906 UV lval; 907 I32 mask; 908 STRLEN targlen; 909 STRLEN len; 910 SV * const targ = LvTARG(sv); 911 912 PERL_ARGS_ASSERT_DO_VECSET; 913 914 if (!targ) 915 return; 916 s = (unsigned char*)SvPV_force_flags(targ, targlen, 917 SV_GMAGIC | SV_UNDEF_RETURNS_NULL); 918 if (SvUTF8(targ)) { 919 /* This is handled by the SvPOK_only below... 920 if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE)) 921 SvUTF8_off(targ); 922 */ 923 (void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE); 924 } 925 926 (void)SvPOK_only(targ); 927 lval = SvUV(sv); 928 offset = LvTARGOFF(sv); 929 if (offset < 0) 930 Perl_croak(aTHX_ "Negative offset to vec in lvalue context"); 931 size = LvTARGLEN(sv); 932 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ 933 Perl_croak(aTHX_ "Illegal number of bits in vec"); 934 935 if (size < 8) { 936 bitoffs = ((offset%8)*size)%8; 937 offset /= 8/size; 938 } 939 else if (size > 8) 940 offset *= size/8; 941 942 len = offset + (bitoffs + size + 7)/8; /* required number of bytes */ 943 if (len > targlen) { 944 s = (unsigned char*)SvGROW(targ, len + 1); 945 (void)memzero((char *)(s + targlen), len - targlen + 1); 946 SvCUR_set(targ, len); 947 } 948 949 if (size < 8) { 950 mask = (1 << size) - 1; 951 lval &= mask; 952 s[offset] &= ~(mask << bitoffs); 953 s[offset] |= lval << bitoffs; 954 } 955 else { 956 if (size == 8) 957 s[offset ] = (U8)( lval & 0xff); 958 else if (size == 16) { 959 s[offset ] = (U8)((lval >> 8) & 0xff); 960 s[offset+1] = (U8)( lval & 0xff); 961 } 962 else if (size == 32) { 963 s[offset ] = (U8)((lval >> 24) & 0xff); 964 s[offset+1] = (U8)((lval >> 16) & 0xff); 965 s[offset+2] = (U8)((lval >> 8) & 0xff); 966 s[offset+3] = (U8)( lval & 0xff); 967 } 968 #ifdef UV_IS_QUAD 969 else if (size == 64) { 970 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 971 "Bit vector size > 32 non-portable"); 972 s[offset ] = (U8)((lval >> 56) & 0xff); 973 s[offset+1] = (U8)((lval >> 48) & 0xff); 974 s[offset+2] = (U8)((lval >> 40) & 0xff); 975 s[offset+3] = (U8)((lval >> 32) & 0xff); 976 s[offset+4] = (U8)((lval >> 24) & 0xff); 977 s[offset+5] = (U8)((lval >> 16) & 0xff); 978 s[offset+6] = (U8)((lval >> 8) & 0xff); 979 s[offset+7] = (U8)( lval & 0xff); 980 } 981 #endif 982 } 983 SvSETMAGIC(targ); 984 } 985 986 void 987 Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) 988 { 989 #ifdef LIBERAL 990 long *dl; 991 long *ll; 992 long *rl; 993 #endif 994 char *dc; 995 STRLEN leftlen; 996 STRLEN rightlen; 997 const char *lc; 998 const char *rc; 999 STRLEN len; 1000 STRLEN lensave; 1001 const char *lsave; 1002 const char *rsave; 1003 bool left_utf; 1004 bool right_utf; 1005 bool do_warn_above_ff = ckWARN_d(WARN_DEPRECATED); 1006 STRLEN needlen = 0; 1007 1008 PERL_ARGS_ASSERT_DO_VOP; 1009 1010 if (sv != left || (optype != OP_BIT_AND && !SvOK(sv))) 1011 sv_setpvs(sv, ""); /* avoid undef warning on |= and ^= */ 1012 if (sv == left) { 1013 lsave = lc = SvPV_force_nomg(left, leftlen); 1014 } 1015 else { 1016 lsave = lc = SvPV_nomg_const(left, leftlen); 1017 SvPV_force_nomg_nolen(sv); 1018 } 1019 rsave = rc = SvPV_nomg_const(right, rightlen); 1020 1021 /* This needs to come after SvPV to ensure that string overloading has 1022 fired off. */ 1023 1024 left_utf = DO_UTF8(left); 1025 right_utf = DO_UTF8(right); 1026 1027 if (left_utf && !right_utf) { 1028 /* Avoid triggering overloading again by using temporaries. 1029 Maybe there should be a variant of sv_utf8_upgrade that takes pvn 1030 */ 1031 right = newSVpvn_flags(rsave, rightlen, SVs_TEMP); 1032 sv_utf8_upgrade(right); 1033 rsave = rc = SvPV_nomg_const(right, rightlen); 1034 right_utf = TRUE; 1035 } 1036 else if (!left_utf && right_utf) { 1037 left = newSVpvn_flags(lsave, leftlen, SVs_TEMP); 1038 sv_utf8_upgrade(left); 1039 lsave = lc = SvPV_nomg_const(left, leftlen); 1040 left_utf = TRUE; 1041 } 1042 1043 len = leftlen < rightlen ? leftlen : rightlen; 1044 lensave = len; 1045 SvCUR_set(sv, len); 1046 (void)SvPOK_only(sv); 1047 if ((left_utf || right_utf) && (sv == left || sv == right)) { 1048 needlen = optype == OP_BIT_AND ? len : leftlen + rightlen; 1049 Newxz(dc, needlen + 1, char); 1050 } 1051 else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) { 1052 dc = SvPV_force_nomg_nolen(sv); 1053 if (SvLEN(sv) < len + 1) { 1054 dc = SvGROW(sv, len + 1); 1055 (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1); 1056 } 1057 if (optype != OP_BIT_AND && (left_utf || right_utf)) 1058 dc = SvGROW(sv, leftlen + rightlen + 1); 1059 } 1060 else { 1061 needlen = optype == OP_BIT_AND 1062 ? len : (leftlen > rightlen ? leftlen : rightlen); 1063 Newxz(dc, needlen + 1, char); 1064 sv_usepvn_flags(sv, dc, needlen, SV_HAS_TRAILING_NUL); 1065 dc = SvPVX(sv); /* sv_usepvn() calls Renew() */ 1066 } 1067 if (left_utf || right_utf) { 1068 UV duc, luc, ruc; 1069 char *dcorig = dc; 1070 char *dcsave = NULL; 1071 STRLEN lulen = leftlen; 1072 STRLEN rulen = rightlen; 1073 STRLEN ulen; 1074 1075 switch (optype) { 1076 case OP_BIT_AND: 1077 while (lulen && rulen) { 1078 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); 1079 lc += ulen; 1080 lulen -= ulen; 1081 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); 1082 rc += ulen; 1083 rulen -= ulen; 1084 duc = luc & ruc; 1085 dc = (char*)uvchr_to_utf8((U8*)dc, duc); 1086 if (do_warn_above_ff && (luc > 0xff || ruc > 0xff)) { 1087 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 1088 deprecated_above_ff_msg, PL_op_desc[optype]); 1089 /* Warn only once per operation */ 1090 do_warn_above_ff = FALSE; 1091 } 1092 } 1093 if (sv == left || sv == right) 1094 (void)sv_usepvn(sv, dcorig, needlen); 1095 SvCUR_set(sv, dc - dcorig); 1096 break; 1097 case OP_BIT_XOR: 1098 while (lulen && rulen) { 1099 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); 1100 lc += ulen; 1101 lulen -= ulen; 1102 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); 1103 rc += ulen; 1104 rulen -= ulen; 1105 duc = luc ^ ruc; 1106 dc = (char*)uvchr_to_utf8((U8*)dc, duc); 1107 if (do_warn_above_ff && (luc > 0xff || ruc > 0xff)) { 1108 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 1109 deprecated_above_ff_msg, PL_op_desc[optype]); 1110 do_warn_above_ff = FALSE; 1111 } 1112 } 1113 goto mop_up_utf; 1114 case OP_BIT_OR: 1115 while (lulen && rulen) { 1116 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); 1117 lc += ulen; 1118 lulen -= ulen; 1119 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); 1120 rc += ulen; 1121 rulen -= ulen; 1122 duc = luc | ruc; 1123 dc = (char*)uvchr_to_utf8((U8*)dc, duc); 1124 if (do_warn_above_ff && (luc > 0xff || ruc > 0xff)) { 1125 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), 1126 deprecated_above_ff_msg, PL_op_desc[optype]); 1127 do_warn_above_ff = FALSE; 1128 } 1129 } 1130 mop_up_utf: 1131 if (rulen) 1132 dcsave = savepvn(rc, rulen); 1133 else if (lulen) 1134 dcsave = savepvn(lc, lulen); 1135 if (sv == left || sv == right) 1136 (void)sv_usepvn(sv, dcorig, needlen); /* uses Renew(); defaults to nomg */ 1137 SvCUR_set(sv, dc - dcorig); 1138 if (rulen) 1139 sv_catpvn_nomg(sv, dcsave, rulen); 1140 else if (lulen) 1141 sv_catpvn_nomg(sv, dcsave, lulen); 1142 else 1143 *SvEND(sv) = '\0'; 1144 Safefree(dcsave); 1145 break; 1146 default: 1147 if (sv == left || sv == right) 1148 Safefree(dcorig); 1149 Perl_croak(aTHX_ "panic: do_vop called for op %u (%s)", 1150 (unsigned)optype, PL_op_name[optype]); 1151 } 1152 SvUTF8_on(sv); 1153 goto finish; 1154 } 1155 else 1156 #ifdef LIBERAL 1157 if (len >= sizeof(long)*4 && 1158 !((unsigned long)dc % sizeof(long)) && 1159 !((unsigned long)lc % sizeof(long)) && 1160 !((unsigned long)rc % sizeof(long))) /* It's almost always aligned... */ 1161 { 1162 const STRLEN remainder = len % (sizeof(long)*4); 1163 len /= (sizeof(long)*4); 1164 1165 dl = (long*)dc; 1166 ll = (long*)lc; 1167 rl = (long*)rc; 1168 1169 switch (optype) { 1170 case OP_BIT_AND: 1171 while (len--) { 1172 *dl++ = *ll++ & *rl++; 1173 *dl++ = *ll++ & *rl++; 1174 *dl++ = *ll++ & *rl++; 1175 *dl++ = *ll++ & *rl++; 1176 } 1177 break; 1178 case OP_BIT_XOR: 1179 while (len--) { 1180 *dl++ = *ll++ ^ *rl++; 1181 *dl++ = *ll++ ^ *rl++; 1182 *dl++ = *ll++ ^ *rl++; 1183 *dl++ = *ll++ ^ *rl++; 1184 } 1185 break; 1186 case OP_BIT_OR: 1187 while (len--) { 1188 *dl++ = *ll++ | *rl++; 1189 *dl++ = *ll++ | *rl++; 1190 *dl++ = *ll++ | *rl++; 1191 *dl++ = *ll++ | *rl++; 1192 } 1193 } 1194 1195 dc = (char*)dl; 1196 lc = (char*)ll; 1197 rc = (char*)rl; 1198 1199 len = remainder; 1200 } 1201 #endif 1202 { 1203 switch (optype) { 1204 case OP_BIT_AND: 1205 while (len--) 1206 *dc++ = *lc++ & *rc++; 1207 *dc = '\0'; 1208 break; 1209 case OP_BIT_XOR: 1210 while (len--) 1211 *dc++ = *lc++ ^ *rc++; 1212 goto mop_up; 1213 case OP_BIT_OR: 1214 while (len--) 1215 *dc++ = *lc++ | *rc++; 1216 mop_up: 1217 len = lensave; 1218 if (rightlen > len) 1219 sv_catpvn_nomg(sv, rsave + len, rightlen - len); 1220 else if (leftlen > (STRLEN)len) 1221 sv_catpvn_nomg(sv, lsave + len, leftlen - len); 1222 else 1223 *SvEND(sv) = '\0'; 1224 break; 1225 } 1226 } 1227 finish: 1228 SvTAINT(sv); 1229 } 1230 1231 1232 /* used for: pp_keys(), pp_values() */ 1233 1234 OP * 1235 Perl_do_kv(pTHX) 1236 { 1237 dSP; 1238 HV * const keys = MUTABLE_HV(POPs); 1239 HE *entry; 1240 SSize_t extend_size; 1241 const U8 gimme = GIMME_V; 1242 const I32 dokv = (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV); 1243 /* op_type is OP_RKEYS/OP_RVALUES if pp_rkeys delegated to here */ 1244 const I32 dokeys = dokv || (PL_op->op_type == OP_KEYS); 1245 const I32 dovalues = dokv || (PL_op->op_type == OP_VALUES); 1246 1247 (void)hv_iterinit(keys); /* always reset iterator regardless */ 1248 1249 if (gimme == G_VOID) 1250 RETURN; 1251 1252 if (gimme == G_SCALAR) { 1253 if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */ 1254 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ 1255 sv_magic(ret, NULL, PERL_MAGIC_nkeys, NULL, 0); 1256 LvTYPE(ret) = 'k'; 1257 LvTARG(ret) = SvREFCNT_inc_simple(keys); 1258 PUSHs(ret); 1259 } 1260 else { 1261 IV i; 1262 dTARGET; 1263 1264 if (! SvTIED_mg((const SV *)keys, PERL_MAGIC_tied) ) { 1265 i = HvUSEDKEYS(keys); 1266 } 1267 else { 1268 i = 0; 1269 while (hv_iternext(keys)) i++; 1270 } 1271 PUSHi( i ); 1272 } 1273 RETURN; 1274 } 1275 1276 /* 2*HvUSEDKEYS() should never be big enough to truncate or wrap */ 1277 assert(HvUSEDKEYS(keys) <= (SSize_t_MAX >> 1)); 1278 extend_size = (SSize_t)HvUSEDKEYS(keys) * (dokeys + dovalues); 1279 EXTEND(SP, extend_size); 1280 1281 while ((entry = hv_iternext(keys))) { 1282 if (dokeys) { 1283 SV* const sv = hv_iterkeysv(entry); 1284 XPUSHs(sv); 1285 } 1286 if (dovalues) { 1287 SV *tmpstr = hv_iterval(keys,entry); 1288 DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu", 1289 (unsigned long)HeHASH(entry), 1290 (int)HvMAX(keys)+1, 1291 (unsigned long)(HeHASH(entry) & HvMAX(keys)))); 1292 XPUSHs(tmpstr); 1293 } 1294 } 1295 RETURN; 1296 } 1297 1298 /* 1299 * ex: set ts=8 sts=4 sw=4 et: 1300 */ 1301