1 /* doop.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * 'So that was the job I felt I had to do when I started,' thought Sam. 13 * 14 * [p.934 of _The Lord of the Rings_, VI/iii: "Mount Doom"] 15 */ 16 17 /* This file contains some common functions needed to carry out certain 18 * ops. For example, both pp_sprintf() and pp_prtf() call the function 19 * do_sprintf() found in this file. 20 */ 21 22 #include "EXTERN.h" 23 #define PERL_IN_DOOP_C 24 #include "perl.h" 25 #include "invlist_inline.h" 26 27 #include <signal.h> 28 29 30 /* Helper function for do_trans(). 31 * Handles cases where the search and replacement charlists aren't UTF-8, 32 * aren't identical, and neither the /d nor /s flag is present. 33 * 34 * sv may or may not be utf8. Note that no code point above 255 can possibly 35 * be in the to-translate set 36 */ 37 38 STATIC Size_t 39 S_do_trans_simple(pTHX_ SV * const sv, const OPtrans_map * const tbl) 40 { 41 Size_t matches = 0; 42 STRLEN len; 43 U8 *s = (U8*)SvPV_nomg(sv,len); 44 U8 * const send = s+len; 45 46 PERL_ARGS_ASSERT_DO_TRANS_SIMPLE; 47 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: entering do_trans_simple:" 48 " input sv:\n", 49 __FILE__, __LINE__)); 50 DEBUG_y(sv_dump(sv)); 51 52 /* First, take care of non-UTF-8 input strings, because they're easy */ 53 if (!SvUTF8(sv)) { 54 while (s < send) { 55 const short ch = tbl->map[*s]; 56 if (ch >= 0) { 57 matches++; 58 *s = (U8)ch; 59 } 60 s++; 61 } 62 SvSETMAGIC(sv); 63 } 64 else { 65 const bool grows = cBOOL(PL_op->op_private & OPpTRANS_GROWS); 66 U8 *d; 67 U8 *dstart; 68 69 /* Allow for worst-case expansion: Each input byte can become 2. For a 70 * given input character, this happens when it occupies a single byte 71 * under UTF-8, but is to be translated to something that occupies two: 72 * $_="a".chr(400); tr/a/\xFE/, FE needs encoding. */ 73 if (grows) 74 Newx(d, len*2+1, U8); 75 else 76 d = s; 77 dstart = d; 78 while (s < send) { 79 STRLEN ulen; 80 short ch; 81 82 /* Need to check this, otherwise 128..255 won't match */ 83 const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT); 84 if (c < 0x100 && (ch = tbl->map[c]) >= 0) { 85 matches++; 86 d = uvchr_to_utf8(d, (UV)ch); 87 s += ulen; 88 } 89 else { /* No match -> copy */ 90 Move(s, d, ulen, U8); 91 d += ulen; 92 s += ulen; 93 } 94 } 95 if (grows) { 96 sv_setpvn(sv, (char*)dstart, d - dstart); 97 Safefree(dstart); 98 } 99 else { 100 *d = '\0'; 101 SvCUR_set(sv, d - dstart); 102 } 103 SvUTF8_on(sv); 104 SvSETMAGIC(sv); 105 } 106 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: returning %zu\n", 107 __FILE__, __LINE__, matches)); 108 DEBUG_y(sv_dump(sv)); 109 return matches; 110 } 111 112 113 /* Helper function for do_trans(). 114 * Handles cases where the search and replacement charlists are identical and 115 * non-utf8: so the string isn't modified, and only a count of modifiable 116 * chars is needed. 117 * 118 * Note that it doesn't handle /d or /s, since these modify the string even if 119 * the replacement list is empty. 120 * 121 * sv may or may not be utf8. Note that no code point above 255 can possibly 122 * be in the to-translate set 123 */ 124 125 STATIC Size_t 126 S_do_trans_count(pTHX_ SV * const sv, const OPtrans_map * const tbl) 127 { 128 STRLEN len; 129 const U8 *s = (const U8*)SvPV_nomg_const(sv, len); 130 const U8 * const send = s + len; 131 Size_t matches = 0; 132 133 PERL_ARGS_ASSERT_DO_TRANS_COUNT; 134 135 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: entering do_trans_count:" 136 " input sv:\n", 137 __FILE__, __LINE__)); 138 DEBUG_y(sv_dump(sv)); 139 140 if (!SvUTF8(sv)) { 141 while (s < send) { 142 if (tbl->map[*s++] >= 0) 143 matches++; 144 } 145 } 146 else { 147 const bool complement = cBOOL(PL_op->op_private & OPpTRANS_COMPLEMENT); 148 while (s < send) { 149 STRLEN ulen; 150 const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT); 151 if (c < 0x100) { 152 if (tbl->map[c] >= 0) 153 matches++; 154 } else if (complement) 155 matches++; 156 s += ulen; 157 } 158 } 159 160 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: count returning %zu\n", 161 __FILE__, __LINE__, matches)); 162 return matches; 163 } 164 165 166 /* Helper function for do_trans(). 167 * Handles cases where the search and replacement charlists aren't identical 168 * and both are non-utf8, and one or both of /d, /s is specified. 169 * 170 * sv may or may not be utf8. Note that no code point above 255 can possibly 171 * be in the to-translate set 172 */ 173 174 STATIC Size_t 175 S_do_trans_complex(pTHX_ SV * const sv, const OPtrans_map * const tbl) 176 { 177 STRLEN len; 178 U8 *s = (U8*)SvPV_nomg(sv, len); 179 U8 * const send = s+len; 180 Size_t matches = 0; 181 const bool complement = cBOOL(PL_op->op_private & OPpTRANS_COMPLEMENT); 182 183 PERL_ARGS_ASSERT_DO_TRANS_COMPLEX; 184 185 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: entering do_trans_complex:" 186 " input sv:\n", 187 __FILE__, __LINE__)); 188 DEBUG_y(sv_dump(sv)); 189 190 if (!SvUTF8(sv)) { 191 U8 *d = s; 192 U8 * const dstart = d; 193 194 if (PL_op->op_private & OPpTRANS_SQUASH) { 195 196 /* What the mapping of the previous character was to. If the new 197 * character has the same mapping, it is squashed from the output 198 * (but still is included in the count) */ 199 short previous_map = (short) TR_OOB; 200 201 while (s < send) { 202 const short this_map = tbl->map[*s]; 203 if (this_map >= 0) { 204 matches++; 205 if (this_map != previous_map) { 206 *d++ = (U8)this_map; 207 previous_map = this_map; 208 } 209 } 210 else { 211 if (this_map == (short) TR_UNMAPPED) { 212 *d++ = *s; 213 previous_map = (short) TR_OOB; 214 } 215 else { 216 assert(this_map == (short) TR_DELETE); 217 matches++; 218 } 219 } 220 221 s++; 222 } 223 } 224 else { /* Not to squash */ 225 while (s < send) { 226 const short this_map = tbl->map[*s]; 227 if (this_map >= 0) { 228 matches++; 229 *d++ = (U8)this_map; 230 } 231 else if (this_map == (short) TR_UNMAPPED) 232 *d++ = *s; 233 else if (this_map == (short) TR_DELETE) 234 matches++; 235 s++; 236 } 237 } 238 *d = '\0'; 239 SvCUR_set(sv, d - dstart); 240 } 241 else { /* is utf8 */ 242 const bool squash = cBOOL(PL_op->op_private & OPpTRANS_SQUASH); 243 const bool grows = cBOOL(PL_op->op_private & OPpTRANS_GROWS); 244 U8 *d; 245 U8 *dstart; 246 Size_t size = tbl->size; 247 248 /* What the mapping of the previous character was to. If the new 249 * character has the same mapping, it is squashed from the output (but 250 * still is included in the count) */ 251 UV pch = TR_OOB; 252 253 if (grows) 254 /* Allow for worst-case expansion: Each input byte can become 2. 255 * For a given input character, this happens when it occupies a 256 * single byte under UTF-8, but is to be translated to something 257 * that occupies two: */ 258 Newx(d, len*2+1, U8); 259 else 260 d = s; 261 dstart = d; 262 263 while (s < send) { 264 STRLEN len; 265 const UV comp = utf8n_to_uvchr(s, send - s, &len, 266 UTF8_ALLOW_DEFAULT); 267 UV ch; 268 short sch; 269 270 sch = (comp < size) 271 ? tbl->map[comp] 272 : (! complement) 273 ? (short) TR_UNMAPPED 274 : tbl->map[size]; 275 276 if (sch >= 0) { 277 ch = (UV)sch; 278 replace: 279 matches++; 280 if (LIKELY(!squash || ch != pch)) { 281 d = uvchr_to_utf8(d, ch); 282 pch = ch; 283 } 284 s += len; 285 continue; 286 } 287 else if (sch == (short) TR_UNMAPPED) { 288 Move(s, d, len, U8); 289 d += len; 290 pch = TR_OOB; 291 } 292 else if (sch == (short) TR_DELETE) 293 matches++; 294 else { 295 assert(sch == (short) TR_R_EMPTY); /* empty replacement */ 296 ch = comp; 297 goto replace; 298 } 299 300 s += len; 301 } 302 303 if (grows) { 304 sv_setpvn(sv, (char*)dstart, d - dstart); 305 Safefree(dstart); 306 } 307 else { 308 *d = '\0'; 309 SvCUR_set(sv, d - dstart); 310 } 311 SvUTF8_on(sv); 312 } 313 SvSETMAGIC(sv); 314 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: returning %zu\n", 315 __FILE__, __LINE__, matches)); 316 DEBUG_y(sv_dump(sv)); 317 return matches; 318 } 319 320 321 /* Helper function for do_trans(). 322 * Handles cases where an inversion map implementation is to be used and the 323 * search and replacement charlists are identical: so the string isn't 324 * modified, and only a count of modifiable chars is needed. 325 * 326 * Note that it doesn't handle /d nor /s, since these modify the string 327 * even if the replacement charlist is empty. 328 * 329 * sv may or may not be utf8. 330 */ 331 332 STATIC Size_t 333 S_do_trans_count_invmap(pTHX_ SV * const sv, AV * const invmap) 334 { 335 U8 *s; 336 U8 *send; 337 Size_t matches = 0; 338 STRLEN len; 339 SV** const from_invlist_ptr = av_fetch(invmap, 0, TRUE); 340 SV** const to_invmap_ptr = av_fetch(invmap, 1, TRUE); 341 SV* from_invlist = *from_invlist_ptr; 342 SV* to_invmap_sv = *to_invmap_ptr; 343 UV* map = (UV *) SvPVX(to_invmap_sv); 344 345 PERL_ARGS_ASSERT_DO_TRANS_COUNT_INVMAP; 346 347 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d:" 348 "entering do_trans_count_invmap:" 349 " input sv:\n", 350 __FILE__, __LINE__)); 351 DEBUG_y(sv_dump(sv)); 352 DEBUG_y(PerlIO_printf(Perl_debug_log, "mapping:\n")); 353 DEBUG_y(invmap_dump(from_invlist, (UV *) SvPVX(to_invmap_sv))); 354 355 s = (U8*)SvPV_nomg(sv, len); 356 357 send = s + len; 358 359 while (s < send) { 360 UV from; 361 SSize_t i; 362 STRLEN s_len; 363 364 /* Get the code point of the next character in the string */ 365 if (! SvUTF8(sv) || UTF8_IS_INVARIANT(*s)) { 366 from = *s; 367 s_len = 1; 368 } 369 else { 370 from = utf8_to_uvchr_buf(s, send, &s_len); 371 if (from == 0 && *s != '\0') { 372 _force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE); 373 } 374 } 375 376 /* Look the code point up in the data structure for this tr/// to get 377 * what it maps to */ 378 i = _invlist_search(from_invlist, from); 379 assert(i >= 0); 380 381 if (map[i] != (UV) TR_UNLISTED) { 382 matches++; 383 } 384 385 s += s_len; 386 } 387 388 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: returning %zu\n", 389 __FILE__, __LINE__, matches)); 390 return matches; 391 } 392 393 /* Helper function for do_trans(). 394 * Handles cases where an inversion map implementation is to be used and the 395 * search and replacement charlists are either not identical or flags are 396 * present. 397 * 398 * sv may or may not be utf8. 399 */ 400 401 STATIC Size_t 402 S_do_trans_invmap(pTHX_ SV * const sv, AV * const invmap) 403 { 404 U8 *s; 405 U8 *send; 406 U8 *d; 407 U8 *s0; 408 U8 *d0; 409 Size_t matches = 0; 410 STRLEN len; 411 SV** const from_invlist_ptr = av_fetch(invmap, 0, TRUE); 412 SV** const to_invmap_ptr = av_fetch(invmap, 1, TRUE); 413 SV** const to_expansion_ptr = av_fetch(invmap, 2, TRUE); 414 NV max_expansion = SvNV(*to_expansion_ptr); 415 SV* from_invlist = *from_invlist_ptr; 416 SV* to_invmap_sv = *to_invmap_ptr; 417 UV* map = (UV *) SvPVX(to_invmap_sv); 418 UV previous_map = TR_OOB; 419 const bool squash = cBOOL(PL_op->op_private & OPpTRANS_SQUASH); 420 const bool delete_unfound = cBOOL(PL_op->op_private & OPpTRANS_DELETE); 421 bool inplace = ! cBOOL(PL_op->op_private & OPpTRANS_GROWS); 422 const UV* from_array = invlist_array(from_invlist); 423 UV final_map = TR_OOB; 424 bool out_is_utf8 = cBOOL(SvUTF8(sv)); 425 STRLEN s_len; 426 427 PERL_ARGS_ASSERT_DO_TRANS_INVMAP; 428 429 /* A third element in the array indicates that the replacement list was 430 * shorter than the search list, and this element contains the value to use 431 * for the items that don't correspond */ 432 if (av_top_index(invmap) >= 3) { 433 SV** const final_map_ptr = av_fetch(invmap, 3, TRUE); 434 SV* const final_map_sv = *final_map_ptr; 435 final_map = SvUV(final_map_sv); 436 } 437 438 /* If there is something in the transliteration that could force the input 439 * to be changed to UTF-8, we don't know if we can do it in place, so 440 * assume cannot */ 441 if (! out_is_utf8 && (PL_op->op_private & OPpTRANS_CAN_FORCE_UTF8)) { 442 inplace = FALSE; 443 } 444 445 s = (U8*)SvPV_nomg(sv, len); 446 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: entering do_trans_invmap:" 447 " input sv:\n", 448 __FILE__, __LINE__)); 449 DEBUG_y(sv_dump(sv)); 450 DEBUG_y(PerlIO_printf(Perl_debug_log, "mapping:\n")); 451 DEBUG_y(invmap_dump(from_invlist, map)); 452 453 send = s + len; 454 s0 = s; 455 456 /* We know by now if there are some possible input strings whose 457 * transliterations are longer than the input. If none can, we just edit 458 * in place. */ 459 if (inplace) { 460 d0 = d = s; 461 } 462 else { 463 /* Here, we can't edit in place. We have no idea how much, if any, 464 * this particular input string will grow. However, the compilation 465 * calculated the maximum expansion possible. Use that to allocate 466 * based on the worst case scenario. (First +1 is to round up; 2nd is 467 * for \0) */ 468 Newx(d, (STRLEN) (len * max_expansion + 1 + 1), U8); 469 d0 = d; 470 } 471 472 restart: 473 474 /* Do the actual transliteration */ 475 while (s < send) { 476 UV from; 477 UV to; 478 SSize_t i; 479 STRLEN s_len; 480 481 /* Get the code point of the next character in the string */ 482 if (! SvUTF8(sv) || UTF8_IS_INVARIANT(*s)) { 483 from = *s; 484 s_len = 1; 485 } 486 else { 487 from = utf8_to_uvchr_buf(s, send, &s_len); 488 if (from == 0 && *s != '\0') { 489 _force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE); 490 } 491 } 492 493 /* Look the code point up in the data structure for this tr/// to get 494 * what it maps to */ 495 i = _invlist_search(from_invlist, from); 496 assert(i >= 0); 497 498 to = map[i]; 499 500 if (to == (UV) TR_UNLISTED) { /* Just copy the unreplaced character */ 501 if (UVCHR_IS_INVARIANT(from) || ! out_is_utf8) { 502 *d++ = (U8) from; 503 } 504 else if (SvUTF8(sv)) { 505 Move(s, d, s_len, U8); 506 d += s_len; 507 } 508 else { /* Convert to UTF-8 */ 509 append_utf8_from_native_byte(*s, &d); 510 } 511 512 previous_map = to; 513 s += s_len; 514 continue; 515 } 516 517 /* Everything else is counted as a match */ 518 matches++; 519 520 if (to == (UV) TR_SPECIAL_HANDLING) { 521 if (delete_unfound) { 522 s += s_len; 523 continue; 524 } 525 526 /* Use the final character in the replacement list */ 527 to = final_map; 528 } 529 else { /* Here the input code point is to be remapped. The actual 530 value is offset from the base of this entry */ 531 to += from - from_array[i]; 532 } 533 534 /* If copying all occurrences, or this is the first occurrence, copy it 535 * to the output */ 536 if (! squash || to != previous_map) { 537 if (out_is_utf8) { 538 d = uvchr_to_utf8(d, to); 539 } 540 else { 541 if (to >= 256) { /* If need to convert to UTF-8, restart */ 542 out_is_utf8 = TRUE; 543 s = s0; 544 d = d0; 545 matches = 0; 546 goto restart; 547 } 548 *d++ = (U8) to; 549 } 550 } 551 552 previous_map = to; 553 s += s_len; 554 } 555 556 s_len = 0; 557 s += s_len; 558 if (! inplace) { 559 sv_setpvn(sv, (char*)d0, d - d0); 560 Safefree(d0); 561 } 562 else { 563 *d = '\0'; 564 SvCUR_set(sv, d - d0); 565 } 566 567 if (! SvUTF8(sv) && out_is_utf8) { 568 SvUTF8_on(sv); 569 } 570 SvSETMAGIC(sv); 571 572 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: returning %zu\n", 573 __FILE__, __LINE__, matches)); 574 DEBUG_y(sv_dump(sv)); 575 return matches; 576 } 577 578 /* Execute a tr//. sv is the value to be translated, while PL_op 579 * should be an OP_TRANS or OP_TRANSR op, whose op_pv field contains a 580 * translation table or whose op_sv field contains an inversion map. 581 * 582 * Returns a count of number of characters translated 583 */ 584 585 Size_t 586 Perl_do_trans(pTHX_ SV *sv) 587 { 588 STRLEN len; 589 const U8 flags = PL_op->op_private; 590 bool use_utf8_fcns = cBOOL(flags & OPpTRANS_USE_SVOP); 591 bool identical = cBOOL(flags & OPpTRANS_IDENTICAL); 592 593 PERL_ARGS_ASSERT_DO_TRANS; 594 595 if (SvREADONLY(sv) && ! identical) { 596 Perl_croak_no_modify(); 597 } 598 (void)SvPV_const(sv, len); 599 if (!len) 600 return 0; 601 if (! identical) { 602 if (!SvPOKp(sv) || SvTHINKFIRST(sv)) 603 (void)SvPV_force_nomg(sv, len); 604 (void)SvPOK_only_UTF8(sv); 605 } 606 607 if (use_utf8_fcns) { 608 SV* const map = 609 #ifdef USE_ITHREADS 610 PAD_SVl(cPADOP->op_padix); 611 #else 612 MUTABLE_SV(cSVOP->op_sv); 613 #endif 614 615 if (identical) { 616 return do_trans_count_invmap(sv, (AV *) map); 617 } 618 else { 619 return do_trans_invmap(sv, (AV *) map); 620 } 621 } 622 else { 623 const OPtrans_map * const map = (OPtrans_map*)cPVOP->op_pv; 624 625 if (identical) { 626 return do_trans_count(sv, map); 627 } 628 else if (flags & (OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) { 629 return do_trans_complex(sv, map); 630 } 631 else 632 return do_trans_simple(sv, map); 633 } 634 } 635 636 #ifdef DEBUGGING 637 /* make it small to exercise the logic */ 638 # define JOIN_DELIM_BUFSIZE 2 639 #else 640 # define JOIN_DELIM_BUFSIZE 40 641 #endif 642 643 /* 644 =for apidoc_section $string 645 =for apidoc do_join 646 647 This performs a Perl L<C<join>|perlfunc/join>, placing the joined output 648 into C<sv>. 649 650 The elements to join are in SVs, stored in a C array of pointers to SVs, from 651 C<**mark> to S<C<**sp - 1>>. Hence C<*mark> is a reference to the first SV. 652 Each SV will be coerced into a PV if not one already. 653 654 C<delim> contains the string (or coerced into a string) that is to separate 655 each of the joined elements. 656 657 If any component is in UTF-8, the result will be as well, and all non-UTF-8 658 components will be converted to UTF-8 as necessary. 659 660 Magic and tainting are handled. 661 662 =cut 663 */ 664 665 void 666 Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp) 667 { 668 PERL_ARGS_ASSERT_DO_JOIN; 669 670 SV ** const oldmark = mark; 671 SSize_t items = sp - mark; 672 STRLEN len; 673 STRLEN delimlen; 674 const char * delimpv = SvPV_const(delim, delimlen); 675 char delim_buf[JOIN_DELIM_BUFSIZE]; 676 bool delim_do_utf8 = DO_UTF8(delim); 677 678 if (items >= 2) { 679 /* Make a copy of the delim, since G or A magic may modify the delim SV. 680 Use a local buffer if possible to avoid the cost of allocation and 681 clean up. 682 */ 683 if (delimlen <= JOIN_DELIM_BUFSIZE) { 684 Copy(delimpv, delim_buf, delimlen, char); 685 delimpv = delim_buf; 686 } 687 else { 688 delimpv = savepvn(delimpv, delimlen); 689 SAVEFREEPV(delimpv); 690 } 691 } 692 693 mark++; 694 len = (items > 0 ? (delimlen * (items - 1) ) : 0); 695 SvUPGRADE(sv, SVt_PV); 696 if (SvLEN(sv) < len + items) { /* current length is way too short */ 697 while (items-- > 0) { 698 if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) { 699 STRLEN tmplen; 700 SvPV_const(*mark, tmplen); 701 len += tmplen; 702 } 703 mark++; 704 } 705 SvGROW(sv, len + 1); /* so try to pre-extend */ 706 707 mark = oldmark; 708 items = sp - mark; 709 ++mark; 710 } 711 712 SvPVCLEAR(sv); 713 /* sv_setpv retains old UTF8ness [perl #24846] */ 714 SvUTF8_off(sv); 715 716 if (TAINTING_get && SvMAGICAL(sv)) 717 SvTAINTED_off(sv); 718 719 if (items-- > 0) { 720 if (*mark) 721 sv_catsv(sv, *mark); 722 mark++; 723 } 724 725 if (delimlen) { 726 const U32 delimflag = delim_do_utf8 ? SV_CATUTF8 : SV_CATBYTES; 727 for (; items > 0; items--,mark++) { 728 STRLEN len; 729 const char *s; 730 sv_catpvn_flags(sv, delimpv, delimlen, delimflag); 731 s = SvPV_const(*mark,len); 732 sv_catpvn_flags(sv,s,len, 733 DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES); 734 } 735 } 736 else { 737 for (; items > 0; items--,mark++) 738 { 739 STRLEN len; 740 const char *s = SvPV_const(*mark,len); 741 sv_catpvn_flags(sv,s,len, 742 DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES); 743 } 744 } 745 SvSETMAGIC(sv); 746 } 747 748 /* 749 =for apidoc_section $string 750 =for apidoc do_sprintf 751 752 This performs a Perl L<C<sprintf>|perlfunc/sprintf> placing the string output 753 into C<sv>. 754 755 The elements to format are in SVs, stored in a C array of pointers to SVs of 756 length C<len>> and beginning at C<**sarg>. The element referenced by C<*sarg> 757 is the format. 758 759 Magic and tainting are handled. 760 761 =cut 762 */ 763 764 void 765 Perl_do_sprintf(pTHX_ SV *sv, SSize_t len, SV **sarg) 766 { 767 STRLEN patlen; 768 const char * const pat = SvPV_const(*sarg, patlen); 769 bool do_taint = FALSE; 770 771 PERL_ARGS_ASSERT_DO_SPRINTF; 772 assert(len >= 1); 773 774 if (SvTAINTED(*sarg)) 775 TAINT_PROPER( 776 (PL_op && PL_op->op_type < OP_max) 777 ? (PL_op->op_type == OP_PRTF) 778 ? "printf" 779 : PL_op_name[PL_op->op_type] 780 : "(unknown)" 781 ); 782 SvUTF8_off(sv); 783 if (DO_UTF8(*sarg)) 784 SvUTF8_on(sv); 785 sv_vsetpvfn(sv, pat, patlen, NULL, sarg + 1, (Size_t)(len - 1), &do_taint); 786 SvSETMAGIC(sv); 787 if (do_taint) 788 SvTAINTED_on(sv); 789 } 790 791 UV 792 Perl_do_vecget(pTHX_ SV *sv, STRLEN offset, int size) 793 { 794 STRLEN srclen; 795 const I32 svpv_flags = ((PL_op->op_flags & OPf_MOD || LVRET) 796 ? SV_UNDEF_RETURNS_NULL : 0); 797 unsigned char *s = (unsigned char *) 798 SvPV_flags(sv, srclen, (svpv_flags|SV_GMAGIC)); 799 UV retnum = 0; 800 801 if (!s) { 802 s = (unsigned char *)""; 803 } 804 805 PERL_ARGS_ASSERT_DO_VECGET; 806 807 if (size < 1 || ! isPOWER_OF_2(size)) 808 Perl_croak(aTHX_ "Illegal number of bits in vec"); 809 810 if (SvUTF8(sv)) { 811 if (Perl_sv_utf8_downgrade_flags(aTHX_ sv, TRUE, 0)) { 812 /* PVX may have changed */ 813 s = (unsigned char *) SvPV_flags(sv, srclen, svpv_flags); 814 } 815 else { 816 Perl_croak(aTHX_ "Use of strings with code points over 0xFF" 817 " as arguments to vec is forbidden"); 818 } 819 } 820 821 if (size <= 8) { 822 STRLEN bitoffs = ((offset % 8) * size) % 8; 823 STRLEN uoffset = offset / (8 / size); 824 825 if (uoffset >= srclen) 826 return 0; 827 828 retnum = (s[uoffset] >> bitoffs) & nBIT_MASK(size); 829 } 830 else { 831 int n = size / 8; /* required number of bytes */ 832 SSize_t uoffset; 833 834 #ifdef UV_IS_QUAD 835 836 if (size == 64) { 837 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 838 "Bit vector size > 32 non-portable"); 839 } 840 #endif 841 if (offset > Size_t_MAX / n - 1) /* would overflow */ 842 return 0; 843 844 uoffset = offset * n; 845 846 /* Switch on the number of bytes available, but no more than the number 847 * required */ 848 switch (MIN(n, (SSize_t) srclen - uoffset)) { 849 850 #ifdef UV_IS_QUAD 851 852 case 8: 853 retnum += ((UV) s[uoffset + 7]); 854 /* FALLTHROUGH */ 855 case 7: 856 retnum += ((UV) s[uoffset + 6] << 8); /* = size - 56 */ 857 /* FALLTHROUGH */ 858 case 6: 859 retnum += ((UV) s[uoffset + 5] << 16); /* = size - 48 */ 860 /* FALLTHROUGH */ 861 case 5: 862 retnum += ((UV) s[uoffset + 4] << 24); /* = size - 40 */ 863 #endif 864 /* FALLTHROUGH */ 865 case 4: 866 retnum += ((UV) s[uoffset + 3] << (size - 32)); 867 /* FALLTHROUGH */ 868 case 3: 869 retnum += ((UV) s[uoffset + 2] << (size - 24)); 870 /* FALLTHROUGH */ 871 case 2: 872 retnum += ((UV) s[uoffset + 1] << (size - 16)); 873 /* FALLTHROUGH */ 874 case 1: 875 retnum += ((UV) s[uoffset ] << (size - 8)); 876 break; 877 878 default: 879 return 0; 880 } 881 } 882 883 return retnum; 884 } 885 886 /* currently converts input to bytes if possible but doesn't sweat failures, 887 * although it does ensure that the string it clobbers is not marked as 888 * utf8-valid any more 889 */ 890 void 891 Perl_do_vecset(pTHX_ SV *sv) 892 { 893 STRLEN offset, bitoffs = 0; 894 int size; 895 unsigned char *s; 896 UV lval; 897 I32 mask; 898 STRLEN targlen; 899 STRLEN len; 900 SV * const targ = LvTARG(sv); 901 char errflags = LvFLAGS(sv); 902 903 PERL_ARGS_ASSERT_DO_VECSET; 904 905 /* some out-of-range errors have been deferred if/until the LV is 906 * actually written to: f(vec($s,-1,8)) is not always fatal */ 907 if (errflags) { 908 assert(!(errflags & ~(LVf_NEG_OFF|LVf_OUT_OF_RANGE))); 909 if (errflags & LVf_NEG_OFF) 910 Perl_croak_nocontext("Negative offset to vec in lvalue context"); 911 Perl_croak_nocontext("Out of memory during vec in lvalue context"); 912 } 913 914 if (!targ) 915 return; 916 s = (unsigned char*)SvPV_force_flags(targ, targlen, 917 SV_GMAGIC | SV_UNDEF_RETURNS_NULL); 918 if (SvUTF8(targ)) { 919 /* This is handled by the SvPOK_only below... 920 if (!Perl_sv_utf8_downgrade_flags(aTHX_ targ, TRUE, 0)) 921 SvUTF8_off(targ); 922 */ 923 (void) Perl_sv_utf8_downgrade_flags(aTHX_ targ, TRUE, 0); 924 } 925 926 (void)SvPOK_only(targ); 927 lval = SvUV(sv); 928 offset = LvTARGOFF(sv); 929 size = LvTARGLEN(sv); 930 931 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ 932 Perl_croak(aTHX_ "Illegal number of bits in vec"); 933 934 if (size < 8) { 935 bitoffs = ((offset%8)*size)%8; 936 offset /= 8/size; 937 } 938 else if (size > 8) { 939 int n = size/8; 940 if (offset > Size_t_MAX / n - 1) /* would overflow */ 941 Perl_croak_nocontext("Out of memory during vec in lvalue context"); 942 offset *= n; 943 } 944 945 len = (bitoffs + size + 7)/8; /* required number of bytes */ 946 if (targlen < offset || targlen - offset < len) { 947 STRLEN newlen = offset > Size_t_MAX - len - 1 ? /* avoid overflow */ 948 Size_t_MAX : offset + len + 1; 949 s = (unsigned char*)SvGROW(targ, newlen); 950 (void)memzero((char *)(s + targlen), newlen - targlen); 951 SvCUR_set(targ, newlen - 1); 952 } 953 954 if (size < 8) { 955 mask = nBIT_MASK(size); 956 lval &= mask; 957 s[offset] &= ~(mask << bitoffs); 958 s[offset] |= lval << bitoffs; 959 } 960 else switch (size) { 961 962 #ifdef UV_IS_QUAD 963 964 case 64: 965 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 966 "Bit vector size > 32 non-portable"); 967 s[offset+7] = (U8)( lval ); /* = size - 64 */ 968 s[offset+6] = (U8)( lval >> 8); /* = size - 56 */ 969 s[offset+5] = (U8)( lval >> 16); /* = size - 48 */ 970 s[offset+4] = (U8)( lval >> 24); /* = size - 40 */ 971 #endif 972 /* FALLTHROUGH */ 973 case 32: 974 s[offset+3] = (U8)( lval >> (size - 32)); 975 s[offset+2] = (U8)( lval >> (size - 24)); 976 /* FALLTHROUGH */ 977 case 16: 978 s[offset+1] = (U8)( lval >> (size - 16)); 979 /* FALLTHROUGH */ 980 case 8: 981 s[offset ] = (U8)( lval >> (size - 8)); 982 } 983 SvSETMAGIC(targ); 984 } 985 986 void 987 Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) 988 { 989 long *dl; 990 long *ll; 991 long *rl; 992 char *dc; 993 STRLEN leftlen; 994 STRLEN rightlen; 995 const char *lc; 996 const char *rc; 997 STRLEN len = 0; 998 STRLEN lensave; 999 const char *lsave; 1000 const char *rsave; 1001 STRLEN needlen = 0; 1002 bool result_needs_to_be_utf8 = FALSE; 1003 bool left_utf8 = FALSE; 1004 bool right_utf8 = FALSE; 1005 U8 * left_non_downgraded = NULL; 1006 U8 * right_non_downgraded = NULL; 1007 Size_t left_non_downgraded_len = 0; 1008 Size_t right_non_downgraded_len = 0; 1009 char * non_downgraded = NULL; 1010 Size_t non_downgraded_len = 0; 1011 1012 PERL_ARGS_ASSERT_DO_VOP; 1013 1014 if (sv != left || (optype != OP_BIT_AND && !SvOK(sv))) 1015 SvPVCLEAR(sv); /* avoid undef warning on |= and ^= */ 1016 if (sv == left) { 1017 lc = SvPV_force_nomg(left, leftlen); 1018 } 1019 else { 1020 lc = SvPV_nomg_const(left, leftlen); 1021 SvPV_force_nomg_nolen(sv); 1022 } 1023 rc = SvPV_nomg_const(right, rightlen); 1024 1025 /* This needs to come after SvPV to ensure that string overloading has 1026 fired off. */ 1027 1028 /* Create downgraded temporaries of any UTF-8 encoded operands */ 1029 if (DO_UTF8(left)) { 1030 const U8 * save_lc = (U8 *) lc; 1031 1032 left_utf8 = TRUE; 1033 result_needs_to_be_utf8 = TRUE; 1034 1035 left_non_downgraded_len = leftlen; 1036 lc = (char *) bytes_from_utf8_loc((const U8 *) lc, &leftlen, 1037 &left_utf8, 1038 (const U8 **) &left_non_downgraded); 1039 /* Calculate the number of trailing unconvertible bytes. This quantity 1040 * is the original length minus the length of the converted portion. */ 1041 left_non_downgraded_len -= left_non_downgraded - save_lc; 1042 SAVEFREEPV(lc); 1043 } 1044 if (DO_UTF8(right)) { 1045 const U8 * save_rc = (U8 *) rc; 1046 1047 right_utf8 = TRUE; 1048 result_needs_to_be_utf8 = TRUE; 1049 1050 right_non_downgraded_len = rightlen; 1051 rc = (char *) bytes_from_utf8_loc((const U8 *) rc, &rightlen, 1052 &right_utf8, 1053 (const U8 **) &right_non_downgraded); 1054 right_non_downgraded_len -= right_non_downgraded - save_rc; 1055 SAVEFREEPV(rc); 1056 } 1057 1058 /* We set 'len' to the length that the operation actually operates on. The 1059 * dangling part of the longer operand doesn't actually participate in the 1060 * operation. What happens is that we pretend that the shorter operand has 1061 * been extended to the right by enough imaginary zeros to match the length 1062 * of the longer one. But we know in advance the result of the operation 1063 * on zeros without having to do it. In the case of '&', the result is 1064 * zero, and the dangling portion is simply discarded. For '|' and '^', the 1065 * result is the same as the other operand, so the dangling part is just 1066 * appended to the final result, unchanged. As of perl-5.32, we no longer 1067 * accept above-FF code points in the dangling portion. 1068 */ 1069 if (left_utf8 || right_utf8) { 1070 Perl_croak(aTHX_ FATAL_ABOVE_FF_MSG, PL_op_desc[optype]); 1071 } 1072 else { /* Neither is UTF-8 */ 1073 len = MIN(leftlen, rightlen); 1074 } 1075 1076 lensave = len; 1077 lsave = lc; 1078 rsave = rc; 1079 1080 (void)SvPOK_only(sv); 1081 if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) { 1082 dc = SvPV_force_nomg_nolen(sv); 1083 if (SvLEN(sv) < len + 1) { 1084 dc = SvGROW(sv, len + 1); 1085 (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1); 1086 } 1087 } 1088 else { 1089 needlen = optype == OP_BIT_AND 1090 ? len : (leftlen > rightlen ? leftlen : rightlen); 1091 Newxz(dc, needlen + 1, char); 1092 sv_usepvn_flags(sv, dc, needlen, SV_HAS_TRAILING_NUL); 1093 dc = SvPVX(sv); /* sv_usepvn() calls Renew() */ 1094 } 1095 SvCUR_set(sv, len); 1096 1097 if (len >= sizeof(long)*4 && 1098 !(PTR2nat(dc) % sizeof(long)) && 1099 !(PTR2nat(lc) % sizeof(long)) && 1100 !(PTR2nat(rc) % sizeof(long))) /* It's almost always aligned... */ 1101 { 1102 const STRLEN remainder = len % (sizeof(long)*4); 1103 len /= (sizeof(long)*4); 1104 1105 dl = (long*)dc; 1106 ll = (long*)lc; 1107 rl = (long*)rc; 1108 1109 switch (optype) { 1110 case OP_BIT_AND: 1111 while (len--) { 1112 *dl++ = *ll++ & *rl++; 1113 *dl++ = *ll++ & *rl++; 1114 *dl++ = *ll++ & *rl++; 1115 *dl++ = *ll++ & *rl++; 1116 } 1117 break; 1118 case OP_BIT_XOR: 1119 while (len--) { 1120 *dl++ = *ll++ ^ *rl++; 1121 *dl++ = *ll++ ^ *rl++; 1122 *dl++ = *ll++ ^ *rl++; 1123 *dl++ = *ll++ ^ *rl++; 1124 } 1125 break; 1126 case OP_BIT_OR: 1127 while (len--) { 1128 *dl++ = *ll++ | *rl++; 1129 *dl++ = *ll++ | *rl++; 1130 *dl++ = *ll++ | *rl++; 1131 *dl++ = *ll++ | *rl++; 1132 } 1133 } 1134 1135 dc = (char*)dl; 1136 lc = (char*)ll; 1137 rc = (char*)rl; 1138 1139 len = remainder; 1140 } 1141 1142 switch (optype) { 1143 case OP_BIT_AND: 1144 while (len--) 1145 *dc++ = *lc++ & *rc++; 1146 *dc = '\0'; 1147 break; 1148 case OP_BIT_XOR: 1149 while (len--) 1150 *dc++ = *lc++ ^ *rc++; 1151 goto mop_up; 1152 case OP_BIT_OR: 1153 while (len--) 1154 *dc++ = *lc++ | *rc++; 1155 mop_up: 1156 len = lensave; 1157 if (rightlen > len) { 1158 if (dc == rc) 1159 SvCUR_set(sv, rightlen); 1160 else 1161 sv_catpvn_nomg(sv, rsave + len, rightlen - len); 1162 } 1163 else if (leftlen > len) { 1164 if (dc == lc) 1165 SvCUR_set(sv, leftlen); 1166 else 1167 sv_catpvn_nomg(sv, lsave + len, leftlen - len); 1168 } 1169 *SvEND(sv) = '\0'; 1170 1171 /* If there is trailing stuff that couldn't be converted from UTF-8, it 1172 * is appended as-is for the ^ and | operators. This preserves 1173 * backwards compatibility */ 1174 if (right_non_downgraded) { 1175 non_downgraded = (char *) right_non_downgraded; 1176 non_downgraded_len = right_non_downgraded_len; 1177 } 1178 else if (left_non_downgraded) { 1179 non_downgraded = (char *) left_non_downgraded; 1180 non_downgraded_len = left_non_downgraded_len; 1181 } 1182 1183 break; 1184 } 1185 1186 if (result_needs_to_be_utf8) { 1187 sv_utf8_upgrade_nomg(sv); 1188 1189 /* Append any trailing UTF-8 as-is. */ 1190 if (non_downgraded) { 1191 sv_catpvn_nomg(sv, non_downgraded, non_downgraded_len); 1192 } 1193 } 1194 1195 SvTAINT(sv); 1196 } 1197 1198 1199 /* Perl_do_kv() may be: 1200 * * called directly as the pp function for pp_keys() and pp_values(); 1201 * * It may also be called directly when the op is OP_AVHVSWITCH, to 1202 * implement CORE::keys(), CORE::values(). 1203 * 1204 * In all cases it expects an HV on the stack and returns a list of keys, 1205 * values, or key-value pairs, depending on PL_op. 1206 */ 1207 1208 PP(do_kv) 1209 { 1210 HV * const keys = MUTABLE_HV(*PL_stack_sp); 1211 const U8 gimme = GIMME_V; 1212 1213 const I32 dokeys = (PL_op->op_type == OP_KEYS) 1214 || ( PL_op->op_type == OP_AVHVSWITCH 1215 && (PL_op->op_private & OPpAVHVSWITCH_MASK) 1216 + OP_EACH == OP_KEYS); 1217 1218 const I32 dovalues = (PL_op->op_type == OP_VALUES) 1219 || ( PL_op->op_type == OP_AVHVSWITCH 1220 && (PL_op->op_private & OPpAVHVSWITCH_MASK) 1221 + OP_EACH == OP_VALUES); 1222 1223 assert( PL_op->op_type == OP_KEYS 1224 || PL_op->op_type == OP_VALUES 1225 || PL_op->op_type == OP_AVHVSWITCH); 1226 1227 assert(!( PL_op->op_type == OP_VALUES 1228 && (PL_op->op_private & OPpMAYBE_LVSUB))); 1229 1230 (void)hv_iterinit(keys); /* always reset iterator regardless */ 1231 1232 if (gimme == G_VOID) { 1233 rpp_popfree_1(); 1234 return NORMAL; 1235 } 1236 1237 if (gimme == G_SCALAR) { 1238 if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */ 1239 SV * const ret = newSV_type_mortal(SVt_PVLV); /* Not TARG RT#67838 */ 1240 sv_magic(ret, NULL, PERL_MAGIC_nkeys, NULL, 0); 1241 LvTYPE(ret) = 'k'; 1242 LvTARG(ret) = SvREFCNT_inc_simple(keys); 1243 rpp_replace_1_1(ret); 1244 } 1245 else { 1246 IV i; 1247 dTARGET; 1248 1249 /* note that in 'scalar(keys %h)' the OP_KEYS is usually 1250 * optimised away and the action is performed directly by the 1251 * padhv or rv2hv op. We now only get here via OP_AVHVSWITCH 1252 * and \&CORE::keys 1253 */ 1254 if (! SvTIED_mg((const SV *)keys, PERL_MAGIC_tied) ) { 1255 i = HvUSEDKEYS(keys); 1256 } 1257 else { 1258 i = 0; 1259 while (hv_iternext(keys)) i++; 1260 } 1261 TARGi(i,1); 1262 rpp_replace_1_1(targ); 1263 } 1264 return NORMAL; 1265 } 1266 1267 /* list context only here */ 1268 1269 if (UNLIKELY(PL_op->op_private & OPpMAYBE_LVSUB)) { 1270 const I32 flags = is_lvalue_sub(); 1271 if (flags && !(flags & OPpENTERSUB_INARGS)) 1272 /* diag_listed_as: Can't modify %s in %s */ 1273 Perl_croak(aTHX_ "Can't modify keys in list assignment"); 1274 } 1275 1276 /* push all keys and/or values onto stack */ 1277 #ifdef PERL_RC_STACK 1278 SSize_t sp_base = PL_stack_sp - PL_stack_base; 1279 hv_pushkv(keys, (dokeys | (dovalues << 1))); 1280 /* Now safe to free the original arg on the stack and shuffle 1281 * down one place anything pushed on top of it */ 1282 SSize_t nitems = PL_stack_sp - (PL_stack_base + sp_base); 1283 SV *old_sv = PL_stack_sp[-nitems]; 1284 if (nitems) 1285 Move(PL_stack_sp - nitems + 1, 1286 PL_stack_sp - nitems, nitems, SV*); 1287 PL_stack_sp--; 1288 SvREFCNT_dec_NN(old_sv); 1289 #else 1290 rpp_popfree_1(); 1291 hv_pushkv(keys, (dokeys | (dovalues << 1))); 1292 #endif 1293 return NORMAL; 1294 } 1295 1296 /* 1297 * ex: set ts=8 sts=4 sw=4 et: 1298 */ 1299