1 /* pp.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * 'It's a big house this, and very peculiar. Always a bit more 13 * to discover, and no knowing what you'll find round a corner. 14 * And Elves, sir!' --Samwise Gamgee 15 * 16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"] 17 */ 18 19 /* This file contains general pp ("push/pop") functions that execute the 20 * opcodes that make up a perl program. A typical pp function expects to 21 * find its arguments on the stack, and usually pushes its results onto 22 * the stack, hence the 'pp' terminology. Each OP structure contains 23 * a pointer to the relevant pp_foo() function. 24 */ 25 26 #include "EXTERN.h" 27 #define PERL_IN_PP_C 28 #include "perl.h" 29 #include "keywords.h" 30 31 #include "invlist_inline.h" 32 #include "reentr.h" 33 #include "regcharclass.h" 34 35 /* variations on pp_null */ 36 37 PP(pp_stub) 38 { 39 dSP; 40 if (GIMME_V == G_SCALAR) 41 XPUSHs(&PL_sv_undef); 42 RETURN; 43 } 44 45 /* Pushy stuff. */ 46 47 48 49 PP(pp_padcv) 50 { 51 dSP; dTARGET; 52 assert(SvTYPE(TARG) == SVt_PVCV); 53 XPUSHs(TARG); 54 RETURN; 55 } 56 57 PP(pp_introcv) 58 { 59 dTARGET; 60 SvPADSTALE_off(TARG); 61 return NORMAL; 62 } 63 64 PP(pp_clonecv) 65 { 66 dTARGET; 67 CV * const protocv = PadnamePROTOCV( 68 PadlistNAMESARRAY(CvPADLIST(find_runcv(NULL)))[ARGTARG] 69 ); 70 assert(SvTYPE(TARG) == SVt_PVCV); 71 assert(protocv); 72 if (CvISXSUB(protocv)) { /* constant */ 73 /* XXX Should we clone it here? */ 74 /* If this changes to use SAVECLEARSV, we can move the SAVECLEARSV 75 to introcv and remove the SvPADSTALE_off. */ 76 SAVEPADSVANDMORTALIZE(ARGTARG); 77 PAD_SVl(ARGTARG) = SvREFCNT_inc_simple_NN(protocv); 78 } 79 else { 80 if (CvROOT(protocv)) { 81 assert(CvCLONE(protocv)); 82 assert(!CvCLONED(protocv)); 83 } 84 cv_clone_into(protocv,(CV *)TARG); 85 SAVECLEARSV(PAD_SVl(ARGTARG)); 86 } 87 return NORMAL; 88 } 89 90 /* Translations. */ 91 92 /* In some cases this function inspects PL_op. If this function is called 93 for new op types, more bool parameters may need to be added in place of 94 the checks. 95 96 When noinit is true, the absence of a gv will cause a retval of undef. 97 This is unrelated to the cv-to-gv assignment case. 98 */ 99 100 static SV * 101 S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict, 102 const bool noinit) 103 { 104 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv); 105 if (SvROK(sv)) { 106 if (SvAMAGIC(sv)) { 107 sv = amagic_deref_call(sv, to_gv_amg); 108 } 109 wasref: 110 sv = SvRV(sv); 111 if (SvTYPE(sv) == SVt_PVIO) { 112 GV * const gv = MUTABLE_GV(sv_newmortal()); 113 gv_init(gv, 0, "__ANONIO__", 10, 0); 114 GvIOp(gv) = MUTABLE_IO(sv); 115 SvREFCNT_inc_void_NN(sv); 116 sv = MUTABLE_SV(gv); 117 } 118 else if (!isGV_with_GP(sv)) { 119 Perl_die(aTHX_ "Not a GLOB reference"); 120 } 121 } 122 else { 123 if (!isGV_with_GP(sv)) { 124 if (!SvOK(sv)) { 125 /* If this is a 'my' scalar and flag is set then vivify 126 * NI-S 1999/05/07 127 */ 128 if (vivify_sv && sv != &PL_sv_undef) { 129 GV *gv; 130 HV *stash; 131 if (SvREADONLY(sv)) 132 Perl_croak_no_modify(); 133 gv = MUTABLE_GV(newSV_type(SVt_NULL)); 134 stash = CopSTASH(PL_curcop); 135 if (SvTYPE(stash) != SVt_PVHV) stash = NULL; 136 if (cUNOP->op_targ) { 137 SV * const namesv = PAD_SV(cUNOP->op_targ); 138 gv_init_sv(gv, stash, namesv, 0); 139 } 140 else { 141 gv_init_pv(gv, stash, "__ANONIO__", 0); 142 } 143 sv_setrv_noinc_mg(sv, MUTABLE_SV(gv)); 144 goto wasref; 145 } 146 if (PL_op->op_flags & OPf_REF || strict) { 147 Perl_die(aTHX_ PL_no_usym, "a symbol"); 148 } 149 if (ckWARN(WARN_UNINITIALIZED)) 150 report_uninit(sv); 151 return &PL_sv_undef; 152 } 153 if (noinit) 154 { 155 if (!(sv = MUTABLE_SV(gv_fetchsv_nomg( 156 sv, GV_ADDMG, SVt_PVGV 157 )))) 158 return &PL_sv_undef; 159 } 160 else { 161 if (strict) { 162 Perl_die(aTHX_ 163 PL_no_symref_sv, 164 sv, 165 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""), 166 "a symbol" 167 ); 168 } 169 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV)) 170 == OPpDONT_INIT_GV) { 171 /* We are the target of a coderef assignment. Return 172 the scalar unchanged, and let pp_sasssign deal with 173 things. */ 174 return sv; 175 } 176 sv = MUTABLE_SV(gv_fetchsv_nomg(sv, GV_ADD, SVt_PVGV)); 177 } 178 /* FAKE globs in the symbol table cause weird bugs (#77810) */ 179 SvFAKE_off(sv); 180 } 181 } 182 if (SvFAKE(sv) && !(PL_op->op_private & OPpALLOW_FAKE)) { 183 SV *newsv = sv_mortalcopy_flags(sv, 0); 184 SvFAKE_off(newsv); 185 sv = newsv; 186 } 187 return sv; 188 } 189 190 PP(pp_rv2gv) 191 { 192 dSP; dTOPss; 193 194 sv = S_rv2gv(aTHX_ 195 sv, PL_op->op_private & OPpDEREF, 196 PL_op->op_private & HINT_STRICT_REFS, 197 ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD)) 198 || PL_op->op_type == OP_READLINE 199 ); 200 if (PL_op->op_private & OPpLVAL_INTRO) 201 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL)); 202 SETs(sv); 203 RETURN; 204 } 205 206 /* Helper function for pp_rv2sv and pp_rv2av */ 207 GV * 208 Perl_softref2xv(pTHX_ SV *const sv, const char *const what, 209 const svtype type, SV ***spp) 210 { 211 GV *gv; 212 213 PERL_ARGS_ASSERT_SOFTREF2XV; 214 215 if (PL_op->op_private & HINT_STRICT_REFS) { 216 if (SvOK(sv)) 217 Perl_die(aTHX_ PL_no_symref_sv, sv, 218 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""), what); 219 else 220 Perl_die(aTHX_ PL_no_usym, what); 221 } 222 if (!SvOK(sv)) { 223 if ( 224 PL_op->op_flags & OPf_REF 225 ) 226 Perl_die(aTHX_ PL_no_usym, what); 227 if (ckWARN(WARN_UNINITIALIZED)) 228 report_uninit(sv); 229 if (type != SVt_PV && GIMME_V == G_LIST) { 230 (*spp)--; 231 return NULL; 232 } 233 **spp = &PL_sv_undef; 234 return NULL; 235 } 236 if ((PL_op->op_flags & OPf_SPECIAL) && 237 !(PL_op->op_flags & OPf_MOD)) 238 { 239 if (!(gv = gv_fetchsv_nomg(sv, GV_ADDMG, type))) 240 { 241 **spp = &PL_sv_undef; 242 return NULL; 243 } 244 } 245 else { 246 gv = gv_fetchsv_nomg(sv, GV_ADD, type); 247 } 248 return gv; 249 } 250 251 PP(pp_rv2sv) 252 { 253 dSP; dTOPss; 254 GV *gv = NULL; 255 256 SvGETMAGIC(sv); 257 if (SvROK(sv)) { 258 if (SvAMAGIC(sv)) { 259 sv = amagic_deref_call(sv, to_sv_amg); 260 } 261 262 sv = SvRV(sv); 263 if (SvTYPE(sv) >= SVt_PVAV) 264 DIE(aTHX_ "Not a SCALAR reference"); 265 } 266 else { 267 gv = MUTABLE_GV(sv); 268 269 if (!isGV_with_GP(gv)) { 270 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp); 271 if (!gv) 272 RETURN; 273 } 274 sv = GvSVn(gv); 275 } 276 if (PL_op->op_flags & OPf_MOD) { 277 if (PL_op->op_private & OPpLVAL_INTRO) { 278 if (cUNOP->op_first->op_type == OP_NULL) 279 sv = save_scalar(MUTABLE_GV(TOPs)); 280 else if (gv) 281 sv = save_scalar(gv); 282 else 283 Perl_croak(aTHX_ "%s", PL_no_localize_ref); 284 } 285 else if (PL_op->op_private & OPpDEREF) 286 sv = vivify_ref(sv, PL_op->op_private & OPpDEREF); 287 } 288 SPAGAIN; /* in case chasing soft refs reallocated the stack */ 289 SETs(sv); 290 RETURN; 291 } 292 293 PP(pp_av2arylen) 294 { 295 dSP; 296 AV * const av = MUTABLE_AV(TOPs); 297 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; 298 if (lvalue) { 299 SV ** const svp = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av)); 300 if (!*svp) { 301 *svp = newSV_type(SVt_PVMG); 302 sv_magic(*svp, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0); 303 } 304 SETs(*svp); 305 } else { 306 SETs(sv_2mortal(newSViv(AvFILL(MUTABLE_AV(av))))); 307 } 308 RETURN; 309 } 310 311 PP(pp_pos) 312 { 313 dSP; dTOPss; 314 315 if (PL_op->op_flags & OPf_MOD || LVRET) { 316 SV * const ret = newSV_type_mortal(SVt_PVLV);/* Not TARG RT#67838 */ 317 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0); 318 LvTYPE(ret) = '.'; 319 LvTARG(ret) = SvREFCNT_inc_simple(sv); 320 SETs(ret); /* no SvSETMAGIC */ 321 } 322 else { 323 const MAGIC * const mg = mg_find_mglob(sv); 324 if (mg && mg->mg_len != -1) { 325 STRLEN i = mg->mg_len; 326 if (PL_op->op_private & OPpTRUEBOOL) 327 SETs(i ? &PL_sv_yes : &PL_sv_zero); 328 else { 329 dTARGET; 330 if (mg->mg_flags & MGf_BYTES && DO_UTF8(sv)) 331 i = sv_pos_b2u_flags(sv, i, SV_GMAGIC|SV_CONST_RETURN); 332 SETu(i); 333 } 334 return NORMAL; 335 } 336 SETs(&PL_sv_undef); 337 } 338 return NORMAL; 339 } 340 341 PP(pp_rv2cv) 342 { 343 dSP; 344 GV *gv; 345 HV *stash_unused; 346 const I32 flags = (PL_op->op_flags & OPf_SPECIAL) 347 ? GV_ADDMG 348 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) 349 == OPpMAY_RETURN_CONSTANT) 350 ? GV_ADD|GV_NOEXPAND 351 : GV_ADD; 352 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */ 353 /* (But not in defined().) */ 354 355 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags); 356 if (cv) NOOP; 357 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) { 358 cv = SvTYPE(SvRV(gv)) == SVt_PVCV 359 ? MUTABLE_CV(SvRV(gv)) 360 : MUTABLE_CV(gv); 361 } 362 else 363 cv = MUTABLE_CV(&PL_sv_undef); 364 SETs(MUTABLE_SV(cv)); 365 return NORMAL; 366 } 367 368 PP(pp_prototype) 369 { 370 dSP; 371 CV *cv; 372 HV *stash; 373 GV *gv; 374 SV *ret = &PL_sv_undef; 375 376 if (SvGMAGICAL(TOPs)) SETs(sv_mortalcopy(TOPs)); 377 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) { 378 const char * s = SvPVX_const(TOPs); 379 if (memBEGINs(s, SvCUR(TOPs), "CORE::")) { 380 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1); 381 if (!code) 382 DIE(aTHX_ "Can't find an opnumber for \"%" UTF8f "\"", 383 UTF8fARG(SvFLAGS(TOPs) & SVf_UTF8, SvCUR(TOPs)-6, s+6)); 384 { 385 SV * const sv = core_prototype(NULL, s + 6, code, NULL); 386 if (sv) ret = sv; 387 } 388 goto set; 389 } 390 } 391 cv = sv_2cv(TOPs, &stash, &gv, 0); 392 if (cv && SvPOK(cv)) 393 ret = newSVpvn_flags( 394 CvPROTO(cv), CvPROTOLEN(cv), SVs_TEMP | SvUTF8(cv) 395 ); 396 set: 397 SETs(ret); 398 RETURN; 399 } 400 401 PP(pp_anoncode) 402 { 403 dSP; 404 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ)); 405 if (CvCLONE(cv)) 406 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv)))); 407 EXTEND(SP,1); 408 PUSHs(MUTABLE_SV(cv)); 409 RETURN; 410 } 411 412 PP(pp_srefgen) 413 { 414 dSP; 415 *SP = refto(*SP); 416 return NORMAL; 417 } 418 419 PP(pp_refgen) 420 { 421 dSP; dMARK; 422 if (GIMME_V != G_LIST) { 423 if (++MARK <= SP) 424 *MARK = *SP; 425 else 426 { 427 MEXTEND(SP, 1); 428 *MARK = &PL_sv_undef; 429 } 430 *MARK = refto(*MARK); 431 SP = MARK; 432 RETURN; 433 } 434 EXTEND_MORTAL(SP - MARK); 435 while (++MARK <= SP) 436 *MARK = refto(*MARK); 437 RETURN; 438 } 439 440 STATIC SV* 441 S_refto(pTHX_ SV *sv) 442 { 443 SV* rv; 444 445 PERL_ARGS_ASSERT_REFTO; 446 447 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') { 448 if (LvTARGLEN(sv)) 449 vivify_defelem(sv); 450 if (!(sv = LvTARG(sv))) 451 sv = &PL_sv_undef; 452 else 453 SvREFCNT_inc_void_NN(sv); 454 } 455 else if (SvTYPE(sv) == SVt_PVAV) { 456 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv)) 457 av_reify(MUTABLE_AV(sv)); 458 SvTEMP_off(sv); 459 SvREFCNT_inc_void_NN(sv); 460 } 461 else if (SvPADTMP(sv)) { 462 sv = newSVsv(sv); 463 } 464 else if (UNLIKELY(SvSMAGICAL(sv) && mg_find(sv, PERL_MAGIC_nonelem))) 465 sv_unmagic(SvREFCNT_inc_simple_NN(sv), PERL_MAGIC_nonelem); 466 else { 467 SvTEMP_off(sv); 468 SvREFCNT_inc_void_NN(sv); 469 } 470 rv = newSV_type_mortal(SVt_IV); 471 sv_setrv_noinc(rv, sv); 472 return rv; 473 } 474 475 PP(pp_ref) 476 { 477 dSP; 478 SV * const sv = TOPs; 479 480 SvGETMAGIC(sv); 481 if (!SvROK(sv)) { 482 SETs(&PL_sv_no); 483 return NORMAL; 484 } 485 486 /* op is in boolean context? */ 487 if ( (PL_op->op_private & OPpTRUEBOOL) 488 || ( (PL_op->op_private & OPpMAYBE_TRUEBOOL) 489 && block_gimme() == G_VOID)) 490 { 491 /* refs are always true - unless it's to an object blessed into a 492 * class with a false name, i.e. "0". So we have to check for 493 * that remote possibility. The following is is basically an 494 * unrolled SvTRUE(sv_reftype(rv)) */ 495 SV * const rv = SvRV(sv); 496 if (SvOBJECT(rv)) { 497 HV *stash = SvSTASH(rv); 498 HEK *hek = HvNAME_HEK(stash); 499 if (hek) { 500 I32 len = HEK_LEN(hek); 501 /* bail out and do it the hard way? */ 502 if (UNLIKELY( 503 len == HEf_SVKEY 504 || (len == 1 && HEK_KEY(hek)[0] == '0') 505 )) 506 goto do_sv_ref; 507 } 508 } 509 SETs(&PL_sv_yes); 510 return NORMAL; 511 } 512 513 do_sv_ref: 514 { 515 dTARGET; 516 SETs(TARG); 517 sv_ref(TARG, SvRV(sv), TRUE); 518 SvSETMAGIC(TARG); 519 return NORMAL; 520 } 521 522 } 523 524 525 PP(pp_bless) 526 { 527 dSP; 528 HV *stash; 529 530 if (MAXARG == 1) 531 { 532 curstash: 533 stash = CopSTASH(PL_curcop); 534 if (SvTYPE(stash) != SVt_PVHV) 535 Perl_croak(aTHX_ "Attempt to bless into a freed package"); 536 } 537 else { 538 SV * const ssv = POPs; 539 STRLEN len; 540 const char *ptr; 541 542 if (!ssv) goto curstash; 543 SvGETMAGIC(ssv); 544 if (SvROK(ssv)) { 545 if (!SvAMAGIC(ssv)) { 546 frog: 547 Perl_croak(aTHX_ "Attempt to bless into a reference"); 548 } 549 /* SvAMAGIC is on here, but it only means potentially overloaded, 550 so after stringification: */ 551 ptr = SvPV_nomg_const(ssv,len); 552 /* We need to check the flag again: */ 553 if (!SvAMAGIC(ssv)) goto frog; 554 } 555 else ptr = SvPV_nomg_const(ssv,len); 556 if (len == 0) 557 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 558 "Explicit blessing to '' (assuming package main)"); 559 stash = gv_stashpvn(ptr, len, GV_ADD|SvUTF8(ssv)); 560 } 561 562 (void)sv_bless(TOPs, stash); 563 RETURN; 564 } 565 566 PP(pp_gelem) 567 { 568 dSP; 569 570 SV *sv = POPs; 571 STRLEN len; 572 const char * const elem = SvPV_const(sv, len); 573 GV * const gv = MUTABLE_GV(TOPs); 574 SV * tmpRef = NULL; 575 576 sv = NULL; 577 if (elem) { 578 /* elem will always be NUL terminated. */ 579 switch (*elem) { 580 case 'A': 581 if (memEQs(elem, len, "ARRAY")) 582 { 583 tmpRef = MUTABLE_SV(GvAV(gv)); 584 if (tmpRef && !AvREAL((const AV *)tmpRef) 585 && AvREIFY((const AV *)tmpRef)) 586 av_reify(MUTABLE_AV(tmpRef)); 587 } 588 break; 589 case 'C': 590 if (memEQs(elem, len, "CODE")) 591 tmpRef = MUTABLE_SV(GvCVu(gv)); 592 break; 593 case 'F': 594 if (memEQs(elem, len, "FILEHANDLE")) { 595 tmpRef = MUTABLE_SV(GvIOp(gv)); 596 } 597 else 598 if (memEQs(elem, len, "FORMAT")) 599 tmpRef = MUTABLE_SV(GvFORM(gv)); 600 break; 601 case 'G': 602 if (memEQs(elem, len, "GLOB")) 603 tmpRef = MUTABLE_SV(gv); 604 break; 605 case 'H': 606 if (memEQs(elem, len, "HASH")) 607 tmpRef = MUTABLE_SV(GvHV(gv)); 608 break; 609 case 'I': 610 if (memEQs(elem, len, "IO")) 611 tmpRef = MUTABLE_SV(GvIOp(gv)); 612 break; 613 case 'N': 614 if (memEQs(elem, len, "NAME")) 615 sv = newSVhek(GvNAME_HEK(gv)); 616 break; 617 case 'P': 618 if (memEQs(elem, len, "PACKAGE")) { 619 const HV * const stash = GvSTASH(gv); 620 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL; 621 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__"); 622 } 623 break; 624 case 'S': 625 if (memEQs(elem, len, "SCALAR")) 626 tmpRef = GvSVn(gv); 627 break; 628 } 629 } 630 if (tmpRef) 631 sv = newRV(tmpRef); 632 if (sv) 633 sv_2mortal(sv); 634 else 635 sv = &PL_sv_undef; 636 SETs(sv); 637 RETURN; 638 } 639 640 /* Pattern matching */ 641 642 PP(pp_study) 643 { 644 dSP; dTOPss; 645 STRLEN len; 646 647 (void)SvPV(sv, len); 648 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) { 649 /* Historically, study was skipped in these cases. */ 650 SETs(&PL_sv_no); 651 return NORMAL; 652 } 653 654 /* Make study a no-op. It's no longer useful and its existence 655 complicates matters elsewhere. */ 656 SETs(&PL_sv_yes); 657 return NORMAL; 658 } 659 660 661 /* also used for: pp_transr() */ 662 663 PP(pp_trans) 664 { 665 dSP; 666 SV *sv; 667 668 if (PL_op->op_flags & OPf_STACKED) 669 sv = POPs; 670 else { 671 EXTEND(SP,1); 672 if (ARGTARG) 673 sv = PAD_SV(ARGTARG); 674 else { 675 sv = DEFSV; 676 } 677 } 678 if(PL_op->op_type == OP_TRANSR) { 679 STRLEN len; 680 const char * const pv = SvPV(sv,len); 681 SV * const newsv = newSVpvn_flags(pv, len, SVs_TEMP|SvUTF8(sv)); 682 do_trans(newsv); 683 PUSHs(newsv); 684 } 685 else { 686 Size_t i = do_trans(sv); 687 mPUSHi((UV)i); 688 } 689 RETURN; 690 } 691 692 /* Lvalue operators. */ 693 694 static size_t 695 S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping) 696 { 697 STRLEN len; 698 char *s; 699 size_t count = 0; 700 701 PERL_ARGS_ASSERT_DO_CHOMP; 702 703 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs))) 704 return 0; 705 if (SvTYPE(sv) == SVt_PVAV) { 706 I32 i; 707 AV *const av = MUTABLE_AV(sv); 708 const I32 max = AvFILL(av); 709 710 for (i = 0; i <= max; i++) { 711 sv = MUTABLE_SV(av_fetch(av, i, FALSE)); 712 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef)) 713 count += do_chomp(retval, sv, chomping); 714 } 715 return count; 716 } 717 else if (SvTYPE(sv) == SVt_PVHV) { 718 HV* const hv = MUTABLE_HV(sv); 719 HE* entry; 720 (void)hv_iterinit(hv); 721 while ((entry = hv_iternext(hv))) 722 count += do_chomp(retval, hv_iterval(hv,entry), chomping); 723 return count; 724 } 725 else if (SvREADONLY(sv)) { 726 Perl_croak_no_modify(); 727 } 728 729 s = SvPV(sv, len); 730 if (chomping) { 731 if (s && len) { 732 char *temp_buffer = NULL; 733 SV *svrecode = NULL; 734 s += --len; 735 if (RsPARA(PL_rs)) { 736 if (*s != '\n') 737 goto nope_free_nothing; 738 ++count; 739 while (len && s[-1] == '\n') { 740 --len; 741 --s; 742 ++count; 743 } 744 } 745 else { 746 STRLEN rslen, rs_charlen; 747 const char *rsptr = SvPV_const(PL_rs, rslen); 748 749 rs_charlen = SvUTF8(PL_rs) 750 ? sv_len_utf8(PL_rs) 751 : rslen; 752 753 if (SvUTF8(PL_rs) != SvUTF8(sv)) { 754 /* Assumption is that rs is shorter than the scalar. */ 755 if (SvUTF8(PL_rs)) { 756 /* RS is utf8, scalar is 8 bit. */ 757 bool is_utf8 = TRUE; 758 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr, 759 &rslen, &is_utf8); 760 if (is_utf8) { 761 /* Cannot downgrade, therefore cannot possibly match. 762 At this point, temp_buffer is not alloced, and 763 is the buffer inside PL_rs, so dont free it. 764 */ 765 assert (temp_buffer == rsptr); 766 goto nope_free_sv; 767 } 768 rsptr = temp_buffer; 769 } 770 else { 771 /* RS is 8 bit, scalar is utf8. */ 772 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen); 773 rsptr = temp_buffer; 774 } 775 } 776 if (rslen == 1) { 777 if (*s != *rsptr) 778 goto nope_free_all; 779 ++count; 780 } 781 else { 782 if (len < rslen - 1) 783 goto nope_free_all; 784 len -= rslen - 1; 785 s -= rslen - 1; 786 if (memNE(s, rsptr, rslen)) 787 goto nope_free_all; 788 count += rs_charlen; 789 } 790 } 791 SvPV_force_nomg_nolen(sv); 792 SvCUR_set(sv, len); 793 *SvEND(sv) = '\0'; 794 SvNIOK_off(sv); 795 SvSETMAGIC(sv); 796 797 nope_free_all: 798 Safefree(temp_buffer); 799 nope_free_sv: 800 SvREFCNT_dec(svrecode); 801 nope_free_nothing: ; 802 } 803 } else { 804 if (len && (!SvPOK(sv) || SvIsCOW(sv))) 805 s = SvPV_force_nomg(sv, len); 806 if (DO_UTF8(sv)) { 807 if (s && len) { 808 char * const send = s + len; 809 char * const start = s; 810 s = send - 1; 811 while (s > start && UTF8_IS_CONTINUATION(*s)) 812 s--; 813 if (is_utf8_string((U8*)s, send - s)) { 814 sv_setpvn(retval, s, send - s); 815 *s = '\0'; 816 SvCUR_set(sv, s - start); 817 SvNIOK_off(sv); 818 SvUTF8_on(retval); 819 } 820 } 821 else 822 SvPVCLEAR(retval); 823 } 824 else if (s && len) { 825 s += --len; 826 sv_setpvn(retval, s, 1); 827 *s = '\0'; 828 SvCUR_set(sv, len); 829 SvUTF8_off(sv); 830 SvNIOK_off(sv); 831 } 832 else 833 SvPVCLEAR(retval); 834 SvSETMAGIC(sv); 835 } 836 return count; 837 } 838 839 840 /* also used for: pp_schomp() */ 841 842 PP(pp_schop) 843 { 844 dSP; dTARGET; 845 const bool chomping = PL_op->op_type == OP_SCHOMP; 846 847 const size_t count = do_chomp(TARG, TOPs, chomping); 848 if (chomping) 849 sv_setiv(TARG, count); 850 SETTARG; 851 return NORMAL; 852 } 853 854 855 /* also used for: pp_chomp() */ 856 857 PP(pp_chop) 858 { 859 dSP; dMARK; dTARGET; dORIGMARK; 860 const bool chomping = PL_op->op_type == OP_CHOMP; 861 size_t count = 0; 862 863 while (MARK < SP) 864 count += do_chomp(TARG, *++MARK, chomping); 865 if (chomping) 866 sv_setiv(TARG, count); 867 SP = ORIGMARK; 868 XPUSHTARG; 869 RETURN; 870 } 871 872 PP(pp_undef) 873 { 874 dSP; 875 SV *sv; 876 877 if (!PL_op->op_private) { 878 EXTEND(SP, 1); 879 RETPUSHUNDEF; 880 } 881 882 sv = TOPs; 883 if (!sv) 884 { 885 SETs(&PL_sv_undef); 886 return NORMAL; 887 } 888 889 if (SvTHINKFIRST(sv)) 890 sv_force_normal_flags(sv, SV_COW_DROP_PV|SV_IMMEDIATE_UNREF); 891 892 switch (SvTYPE(sv)) { 893 case SVt_NULL: 894 break; 895 case SVt_PVAV: 896 av_undef(MUTABLE_AV(sv)); 897 break; 898 case SVt_PVHV: 899 hv_undef(MUTABLE_HV(sv)); 900 break; 901 case SVt_PVCV: 902 if (cv_const_sv((const CV *)sv)) 903 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 904 "Constant subroutine %" SVf " undefined", 905 SVfARG(CvANON((const CV *)sv) 906 ? newSVpvs_flags("(anonymous)", SVs_TEMP) 907 : sv_2mortal(newSVhek( 908 CvNAMED(sv) 909 ? CvNAME_HEK((CV *)sv) 910 : GvENAME_HEK(CvGV((const CV *)sv)) 911 )) 912 )); 913 /* FALLTHROUGH */ 914 case SVt_PVFM: 915 /* let user-undef'd sub keep its identity */ 916 cv_undef_flags(MUTABLE_CV(sv), CV_UNDEF_KEEP_NAME); 917 break; 918 case SVt_PVGV: 919 assert(isGV_with_GP(sv)); 920 assert(!SvFAKE(sv)); 921 { 922 GP *gp; 923 HV *stash; 924 925 /* undef *Pkg::meth_name ... */ 926 bool method_changed 927 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv)) 928 && HvENAME_get(stash); 929 /* undef *Foo:: */ 930 if((stash = GvHV((const GV *)sv))) { 931 if(HvENAME_get(stash)) 932 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash)); 933 else stash = NULL; 934 } 935 936 SvREFCNT_inc_simple_void_NN(sv_2mortal(sv)); 937 gp_free(MUTABLE_GV(sv)); 938 Newxz(gp, 1, GP); 939 GvGP_set(sv, gp_ref(gp)); 940 #ifndef PERL_DONT_CREATE_GVSV 941 GvSV(sv) = newSV_type(SVt_NULL); 942 #endif 943 GvLINE(sv) = CopLINE(PL_curcop); 944 GvEGV(sv) = MUTABLE_GV(sv); 945 GvMULTI_on(sv); 946 947 if(stash) 948 mro_package_moved(NULL, stash, (const GV *)sv, 0); 949 stash = NULL; 950 /* undef *Foo::ISA */ 951 if( strEQ(GvNAME((const GV *)sv), "ISA") 952 && (stash = GvSTASH((const GV *)sv)) 953 && (method_changed || HvENAME(stash)) ) 954 mro_isa_changed_in(stash); 955 else if(method_changed) 956 mro_method_changed_in( 957 GvSTASH((const GV *)sv) 958 ); 959 960 break; 961 } 962 default: 963 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) { 964 SvPV_free(sv); 965 SvPV_set(sv, NULL); 966 SvLEN_set(sv, 0); 967 } 968 SvOK_off(sv); 969 SvSETMAGIC(sv); 970 } 971 972 SETs(&PL_sv_undef); 973 return NORMAL; 974 } 975 976 977 /* common "slow" code for pp_postinc and pp_postdec */ 978 979 static OP * 980 S_postincdec_common(pTHX_ SV *sv, SV *targ) 981 { 982 dSP; 983 const bool inc = 984 PL_op->op_type == OP_POSTINC || PL_op->op_type == OP_I_POSTINC; 985 986 if (SvROK(sv)) 987 TARG = sv_newmortal(); 988 sv_setsv(TARG, sv); 989 if (inc) 990 sv_inc_nomg(sv); 991 else 992 sv_dec_nomg(sv); 993 SvSETMAGIC(sv); 994 /* special case for undef: see thread at 2003-03/msg00536.html in archive */ 995 if (inc && !SvOK(TARG)) 996 sv_setiv(TARG, 0); 997 SETTARG; 998 return NORMAL; 999 } 1000 1001 1002 /* also used for: pp_i_postinc() */ 1003 1004 PP(pp_postinc) 1005 { 1006 dSP; dTARGET; 1007 SV *sv = TOPs; 1008 1009 /* special-case sv being a simple integer */ 1010 if (LIKELY(((sv->sv_flags & 1011 (SVf_THINKFIRST|SVs_GMG|SVf_IVisUV| 1012 SVf_IOK|SVf_NOK|SVf_POK|SVp_NOK|SVp_POK|SVf_ROK)) 1013 == SVf_IOK)) 1014 && SvIVX(sv) != IV_MAX) 1015 { 1016 IV iv = SvIVX(sv); 1017 SvIV_set(sv, iv + 1); 1018 TARGi(iv, 0); /* arg not GMG, so can't be tainted */ 1019 SETs(TARG); 1020 return NORMAL; 1021 } 1022 1023 return S_postincdec_common(aTHX_ sv, TARG); 1024 } 1025 1026 1027 /* also used for: pp_i_postdec() */ 1028 1029 PP(pp_postdec) 1030 { 1031 dSP; dTARGET; 1032 SV *sv = TOPs; 1033 1034 /* special-case sv being a simple integer */ 1035 if (LIKELY(((sv->sv_flags & 1036 (SVf_THINKFIRST|SVs_GMG|SVf_IVisUV| 1037 SVf_IOK|SVf_NOK|SVf_POK|SVp_NOK|SVp_POK|SVf_ROK)) 1038 == SVf_IOK)) 1039 && SvIVX(sv) != IV_MIN) 1040 { 1041 IV iv = SvIVX(sv); 1042 SvIV_set(sv, iv - 1); 1043 TARGi(iv, 0); /* arg not GMG, so can't be tainted */ 1044 SETs(TARG); 1045 return NORMAL; 1046 } 1047 1048 return S_postincdec_common(aTHX_ sv, TARG); 1049 } 1050 1051 1052 /* Ordinary operators. */ 1053 1054 PP(pp_pow) 1055 { 1056 dSP; dATARGET; SV *svl, *svr; 1057 #ifdef PERL_PRESERVE_IVUV 1058 bool is_int = 0; 1059 #endif 1060 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric); 1061 svr = TOPs; 1062 svl = TOPm1s; 1063 #ifdef PERL_PRESERVE_IVUV 1064 /* For integer to integer power, we do the calculation by hand wherever 1065 we're sure it is safe; otherwise we call pow() and try to convert to 1066 integer afterwards. */ 1067 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) { 1068 UV power; 1069 bool baseuok; 1070 UV baseuv; 1071 1072 if (SvUOK(svr)) { 1073 power = SvUVX(svr); 1074 } else { 1075 const IV iv = SvIVX(svr); 1076 if (iv >= 0) { 1077 power = iv; 1078 } else { 1079 goto float_it; /* Can't do negative powers this way. */ 1080 } 1081 } 1082 1083 baseuok = SvUOK(svl); 1084 if (baseuok) { 1085 baseuv = SvUVX(svl); 1086 } else { 1087 const IV iv = SvIVX(svl); 1088 if (iv >= 0) { 1089 baseuv = iv; 1090 baseuok = TRUE; /* effectively it's a UV now */ 1091 } else { 1092 baseuv = -iv; /* abs, baseuok == false records sign */ 1093 } 1094 } 1095 /* now we have integer ** positive integer. */ 1096 is_int = 1; 1097 1098 /* foo & (foo - 1) is zero only for a power of 2. */ 1099 if (!(baseuv & (baseuv - 1))) { 1100 /* We are raising power-of-2 to a positive integer. 1101 The logic here will work for any base (even non-integer 1102 bases) but it can be less accurate than 1103 pow (base,power) or exp (power * log (base)) when the 1104 intermediate values start to spill out of the mantissa. 1105 With powers of 2 we know this can't happen. 1106 And powers of 2 are the favourite thing for perl 1107 programmers to notice ** not doing what they mean. */ 1108 NV result = 1.0; 1109 NV base = baseuok ? baseuv : -(NV)baseuv; 1110 1111 if (power & 1) { 1112 result *= base; 1113 } 1114 while (power >>= 1) { 1115 base *= base; 1116 if (power & 1) { 1117 result *= base; 1118 } 1119 } 1120 SP--; 1121 SETn( result ); 1122 SvIV_please_nomg(svr); 1123 RETURN; 1124 } else { 1125 unsigned int highbit = 8 * sizeof(UV); 1126 unsigned int diff = 8 * sizeof(UV); 1127 while (diff >>= 1) { 1128 highbit -= diff; 1129 if (baseuv >> highbit) { 1130 highbit += diff; 1131 } 1132 } 1133 /* we now have baseuv < 2 ** highbit */ 1134 if (power * highbit <= 8 * sizeof(UV)) { 1135 /* result will definitely fit in UV, so use UV math 1136 on same algorithm as above */ 1137 UV result = 1; 1138 UV base = baseuv; 1139 const bool odd_power = cBOOL(power & 1); 1140 if (odd_power) { 1141 result *= base; 1142 } 1143 while (power >>= 1) { 1144 base *= base; 1145 if (power & 1) { 1146 result *= base; 1147 } 1148 } 1149 SP--; 1150 if (baseuok || !odd_power) 1151 /* answer is positive */ 1152 SETu( result ); 1153 else if (result <= (UV)IV_MAX) 1154 /* answer negative, fits in IV */ 1155 SETi( -(IV)result ); 1156 else if (result == (UV)IV_MIN) 1157 /* 2's complement assumption: special case IV_MIN */ 1158 SETi( IV_MIN ); 1159 else 1160 /* answer negative, doesn't fit */ 1161 SETn( -(NV)result ); 1162 RETURN; 1163 } 1164 } 1165 } 1166 float_it: 1167 #endif 1168 { 1169 NV right = SvNV_nomg(svr); 1170 NV left = SvNV_nomg(svl); 1171 (void)POPs; 1172 1173 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG) 1174 /* 1175 We are building perl with long double support and are on an AIX OS 1176 afflicted with a powl() function that wrongly returns NaNQ for any 1177 negative base. This was reported to IBM as PMR #23047-379 on 1178 03/06/2006. The problem exists in at least the following versions 1179 of AIX and the libm fileset, and no doubt others as well: 1180 1181 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50 1182 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29 1183 AIX 5.2.0 bos.adt.libm 5.2.0.85 1184 1185 So, until IBM fixes powl(), we provide the following workaround to 1186 handle the problem ourselves. Our logic is as follows: for 1187 negative bases (left), we use fmod(right, 2) to check if the 1188 exponent is an odd or even integer: 1189 1190 - if odd, powl(left, right) == -powl(-left, right) 1191 - if even, powl(left, right) == powl(-left, right) 1192 1193 If the exponent is not an integer, the result is rightly NaNQ, so 1194 we just return that (as NV_NAN). 1195 */ 1196 1197 if (left < 0.0) { 1198 NV mod2 = Perl_fmod( right, 2.0 ); 1199 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */ 1200 SETn( -Perl_pow( -left, right) ); 1201 } else if (mod2 == 0.0) { /* even integer */ 1202 SETn( Perl_pow( -left, right) ); 1203 } else { /* fractional power */ 1204 SETn( NV_NAN ); 1205 } 1206 } else { 1207 SETn( Perl_pow( left, right) ); 1208 } 1209 #else 1210 SETn( Perl_pow( left, right) ); 1211 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */ 1212 1213 #ifdef PERL_PRESERVE_IVUV 1214 if (is_int) 1215 SvIV_please_nomg(svr); 1216 #endif 1217 RETURN; 1218 } 1219 } 1220 1221 PP(pp_multiply) 1222 { 1223 dSP; dATARGET; SV *svl, *svr; 1224 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric); 1225 svr = TOPs; 1226 svl = TOPm1s; 1227 1228 #ifdef PERL_PRESERVE_IVUV 1229 1230 /* special-case some simple common cases */ 1231 if (!((svl->sv_flags|svr->sv_flags) & (SVf_IVisUV|SVs_GMG))) { 1232 IV il, ir; 1233 U32 flags = (svl->sv_flags & svr->sv_flags); 1234 if (flags & SVf_IOK) { 1235 /* both args are simple IVs */ 1236 UV topl, topr; 1237 il = SvIVX(svl); 1238 ir = SvIVX(svr); 1239 do_iv: 1240 topl = ((UV)il) >> (UVSIZE * 4 - 1); 1241 topr = ((UV)ir) >> (UVSIZE * 4 - 1); 1242 1243 /* if both are in a range that can't under/overflow, do a 1244 * simple integer multiply: if the top halves(*) of both numbers 1245 * are 00...00 or 11...11, then it's safe. 1246 * (*) for 32-bits, the "top half" is the top 17 bits, 1247 * for 64-bits, its 33 bits */ 1248 if (!( 1249 ((topl+1) | (topr+1)) 1250 & ( (((UV)1) << (UVSIZE * 4 + 1)) - 2) /* 11..110 */ 1251 )) { 1252 SP--; 1253 TARGi(il * ir, 0); /* args not GMG, so can't be tainted */ 1254 SETs(TARG); 1255 RETURN; 1256 } 1257 goto generic; 1258 } 1259 else if (flags & SVf_NOK) { 1260 /* both args are NVs */ 1261 NV nl = SvNVX(svl); 1262 NV nr = SvNVX(svr); 1263 NV result; 1264 1265 if (lossless_NV_to_IV(nl, &il) && lossless_NV_to_IV(nr, &ir)) { 1266 /* nothing was lost by converting to IVs */ 1267 goto do_iv; 1268 } 1269 SP--; 1270 result = nl * nr; 1271 # if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16 1272 if (Perl_isinf(result)) { 1273 Zero((U8*)&result + 8, 8, U8); 1274 } 1275 # endif 1276 TARGn(result, 0); /* args not GMG, so can't be tainted */ 1277 SETs(TARG); 1278 RETURN; 1279 } 1280 } 1281 1282 generic: 1283 1284 if (SvIV_please_nomg(svr)) { 1285 /* Unless the left argument is integer in range we are going to have to 1286 use NV maths. Hence only attempt to coerce the right argument if 1287 we know the left is integer. */ 1288 /* Left operand is defined, so is it IV? */ 1289 if (SvIV_please_nomg(svl)) { 1290 bool auvok = SvUOK(svl); 1291 bool buvok = SvUOK(svr); 1292 const UV topmask = (~ (UV)0) << (4 * sizeof (UV)); 1293 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV))); 1294 UV alow; 1295 UV ahigh; 1296 UV blow; 1297 UV bhigh; 1298 1299 if (auvok) { 1300 alow = SvUVX(svl); 1301 } else { 1302 const IV aiv = SvIVX(svl); 1303 if (aiv >= 0) { 1304 alow = aiv; 1305 auvok = TRUE; /* effectively it's a UV now */ 1306 } else { 1307 /* abs, auvok == false records sign; Using 0- here and 1308 * later to silence bogus warning from MS VC */ 1309 alow = (UV) (0 - (UV) aiv); 1310 } 1311 } 1312 if (buvok) { 1313 blow = SvUVX(svr); 1314 } else { 1315 const IV biv = SvIVX(svr); 1316 if (biv >= 0) { 1317 blow = biv; 1318 buvok = TRUE; /* effectively it's a UV now */ 1319 } else { 1320 /* abs, buvok == false records sign */ 1321 blow = (UV) (0 - (UV) biv); 1322 } 1323 } 1324 1325 /* If this does sign extension on unsigned it's time for plan B */ 1326 ahigh = alow >> (4 * sizeof (UV)); 1327 alow &= botmask; 1328 bhigh = blow >> (4 * sizeof (UV)); 1329 blow &= botmask; 1330 if (ahigh && bhigh) { 1331 NOOP; 1332 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000 1333 which is overflow. Drop to NVs below. */ 1334 } else if (!ahigh && !bhigh) { 1335 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001 1336 so the unsigned multiply cannot overflow. */ 1337 const UV product = alow * blow; 1338 if (auvok == buvok) { 1339 /* -ve * -ve or +ve * +ve gives a +ve result. */ 1340 SP--; 1341 SETu( product ); 1342 RETURN; 1343 } else if (product <= (UV)IV_MIN) { 1344 /* 2s complement assumption that (UV)-IV_MIN is correct. */ 1345 /* -ve result, which could overflow an IV */ 1346 SP--; 1347 /* can't negate IV_MIN, but there are aren't two 1348 * integers such that !ahigh && !bhigh, where the 1349 * product equals 0x800....000 */ 1350 assert(product != (UV)IV_MIN); 1351 SETi( -(IV)product ); 1352 RETURN; 1353 } /* else drop to NVs below. */ 1354 } else { 1355 /* One operand is large, 1 small */ 1356 UV product_middle; 1357 if (bhigh) { 1358 /* swap the operands */ 1359 ahigh = bhigh; 1360 bhigh = blow; /* bhigh now the temp var for the swap */ 1361 blow = alow; 1362 alow = bhigh; 1363 } 1364 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow) 1365 multiplies can't overflow. shift can, add can, -ve can. */ 1366 product_middle = ahigh * blow; 1367 if (!(product_middle & topmask)) { 1368 /* OK, (ahigh * blow) won't lose bits when we shift it. */ 1369 UV product_low; 1370 product_middle <<= (4 * sizeof (UV)); 1371 product_low = alow * blow; 1372 1373 /* as for pp_add, UV + something mustn't get smaller. 1374 IIRC ANSI mandates this wrapping *behaviour* for 1375 unsigned whatever the actual representation*/ 1376 product_low += product_middle; 1377 if (product_low >= product_middle) { 1378 /* didn't overflow */ 1379 if (auvok == buvok) { 1380 /* -ve * -ve or +ve * +ve gives a +ve result. */ 1381 SP--; 1382 SETu( product_low ); 1383 RETURN; 1384 } else if (product_low <= (UV)IV_MIN) { 1385 /* 2s complement assumption again */ 1386 /* -ve result, which could overflow an IV */ 1387 SP--; 1388 SETi(product_low == (UV)IV_MIN 1389 ? IV_MIN : -(IV)product_low); 1390 RETURN; 1391 } /* else drop to NVs below. */ 1392 } 1393 } /* product_middle too large */ 1394 } /* ahigh && bhigh */ 1395 } /* SvIOK(svl) */ 1396 } /* SvIOK(svr) */ 1397 #endif 1398 { 1399 NV right = SvNV_nomg(svr); 1400 NV left = SvNV_nomg(svl); 1401 NV result = left * right; 1402 1403 (void)POPs; 1404 #if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16 1405 if (Perl_isinf(result)) { 1406 Zero((U8*)&result + 8, 8, U8); 1407 } 1408 #endif 1409 SETn(result); 1410 RETURN; 1411 } 1412 } 1413 1414 PP(pp_divide) 1415 { 1416 dSP; dATARGET; SV *svl, *svr; 1417 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric); 1418 svr = TOPs; 1419 svl = TOPm1s; 1420 /* Only try to do UV divide first 1421 if ((SLOPPYDIVIDE is true) or 1422 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large 1423 to preserve)) 1424 The assumption is that it is better to use floating point divide 1425 whenever possible, only doing integer divide first if we can't be sure. 1426 If NV_PRESERVES_UV is true then we know at compile time that no UV 1427 can be too large to preserve, so don't need to compile the code to 1428 test the size of UVs. */ 1429 1430 #if defined(SLOPPYDIVIDE) || (defined(PERL_PRESERVE_IVUV) && !defined(NV_PRESERVES_UV)) 1431 # define PERL_TRY_UV_DIVIDE 1432 /* ensure that 20./5. == 4. */ 1433 #endif 1434 1435 #ifdef PERL_TRY_UV_DIVIDE 1436 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) { 1437 bool left_non_neg = SvUOK(svl); 1438 bool right_non_neg = SvUOK(svr); 1439 UV left; 1440 UV right; 1441 1442 if (right_non_neg) { 1443 right = SvUVX(svr); 1444 } 1445 else { 1446 const IV biv = SvIVX(svr); 1447 if (biv >= 0) { 1448 right = biv; 1449 right_non_neg = TRUE; /* effectively it's a UV now */ 1450 } 1451 else { 1452 right = -(UV)biv; 1453 } 1454 } 1455 /* historically undef()/0 gives a "Use of uninitialized value" 1456 warning before dieing, hence this test goes here. 1457 If it were immediately before the second SvIV_please, then 1458 DIE() would be invoked before left was even inspected, so 1459 no inspection would give no warning. */ 1460 if (right == 0) 1461 DIE(aTHX_ "Illegal division by zero"); 1462 1463 if (left_non_neg) { 1464 left = SvUVX(svl); 1465 } 1466 else { 1467 const IV aiv = SvIVX(svl); 1468 if (aiv >= 0) { 1469 left = aiv; 1470 left_non_neg = TRUE; /* effectively it's a UV now */ 1471 } 1472 else { 1473 left = -(UV)aiv; 1474 } 1475 } 1476 1477 if (left >= right 1478 #ifdef SLOPPYDIVIDE 1479 /* For sloppy divide we always attempt integer division. */ 1480 #else 1481 /* Otherwise we only attempt it if either or both operands 1482 would not be preserved by an NV. If both fit in NVs 1483 we fall through to the NV divide code below. However, 1484 as left >= right to ensure integer result here, we know that 1485 we can skip the test on the right operand - right big 1486 enough not to be preserved can't get here unless left is 1487 also too big. */ 1488 1489 && (left > ((UV)1 << NV_PRESERVES_UV_BITS)) 1490 #endif 1491 ) { 1492 /* Integer division can't overflow, but it can be imprecise. */ 1493 1494 /* Modern compilers optimize division followed by 1495 * modulo into a single div instruction */ 1496 const UV result = left / right; 1497 if (left % right == 0) { 1498 SP--; /* result is valid */ 1499 if (left_non_neg == right_non_neg) { 1500 /* signs identical, result is positive. */ 1501 SETu( result ); 1502 RETURN; 1503 } 1504 /* 2s complement assumption */ 1505 if (result <= (UV)IV_MIN) 1506 SETi(result == (UV)IV_MIN ? IV_MIN : -(IV)result); 1507 else { 1508 /* It's exact but too negative for IV. */ 1509 SETn( -(NV)result ); 1510 } 1511 RETURN; 1512 } /* tried integer divide but it was not an integer result */ 1513 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */ 1514 } /* one operand wasn't SvIOK */ 1515 #endif /* PERL_TRY_UV_DIVIDE */ 1516 { 1517 NV right = SvNV_nomg(svr); 1518 NV left = SvNV_nomg(svl); 1519 (void)POPs;(void)POPs; 1520 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) 1521 if (! Perl_isnan(right) && right == 0.0) 1522 #else 1523 if (right == 0.0) 1524 #endif 1525 DIE(aTHX_ "Illegal division by zero"); 1526 PUSHn( left / right ); 1527 RETURN; 1528 } 1529 } 1530 1531 PP(pp_modulo) 1532 { 1533 dSP; dATARGET; 1534 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric); 1535 { 1536 UV left = 0; 1537 UV right = 0; 1538 bool left_neg = FALSE; 1539 bool right_neg = FALSE; 1540 bool use_double = FALSE; 1541 bool dright_valid = FALSE; 1542 NV dright = 0.0; 1543 NV dleft = 0.0; 1544 SV * const svr = TOPs; 1545 SV * const svl = TOPm1s; 1546 if (SvIV_please_nomg(svr)) { 1547 right_neg = !SvUOK(svr); 1548 if (!right_neg) { 1549 right = SvUVX(svr); 1550 } else { 1551 const IV biv = SvIVX(svr); 1552 if (biv >= 0) { 1553 right = biv; 1554 right_neg = FALSE; /* effectively it's a UV now */ 1555 } else { 1556 right = (UV) (0 - (UV) biv); 1557 } 1558 } 1559 } 1560 else { 1561 dright = SvNV_nomg(svr); 1562 right_neg = dright < 0; 1563 if (right_neg) 1564 dright = -dright; 1565 if (dright < UV_MAX_P1) { 1566 right = U_V(dright); 1567 dright_valid = TRUE; /* In case we need to use double below. */ 1568 } else { 1569 use_double = TRUE; 1570 } 1571 } 1572 1573 /* At this point use_double is only true if right is out of range for 1574 a UV. In range NV has been rounded down to nearest UV and 1575 use_double false. */ 1576 if (!use_double && SvIV_please_nomg(svl)) { 1577 left_neg = !SvUOK(svl); 1578 if (!left_neg) { 1579 left = SvUVX(svl); 1580 } else { 1581 const IV aiv = SvIVX(svl); 1582 if (aiv >= 0) { 1583 left = aiv; 1584 left_neg = FALSE; /* effectively it's a UV now */ 1585 } else { 1586 left = (UV) (0 - (UV) aiv); 1587 } 1588 } 1589 } 1590 else { 1591 dleft = SvNV_nomg(svl); 1592 left_neg = dleft < 0; 1593 if (left_neg) 1594 dleft = -dleft; 1595 1596 /* This should be exactly the 5.6 behaviour - if left and right are 1597 both in range for UV then use U_V() rather than floor. */ 1598 if (!use_double) { 1599 if (dleft < UV_MAX_P1) { 1600 /* right was in range, so is dleft, so use UVs not double. 1601 */ 1602 left = U_V(dleft); 1603 } 1604 /* left is out of range for UV, right was in range, so promote 1605 right (back) to double. */ 1606 else { 1607 /* The +0.5 is used in 5.6 even though it is not strictly 1608 consistent with the implicit +0 floor in the U_V() 1609 inside the #if 1. */ 1610 dleft = Perl_floor(dleft + 0.5); 1611 use_double = TRUE; 1612 if (dright_valid) 1613 dright = Perl_floor(dright + 0.5); 1614 else 1615 dright = right; 1616 } 1617 } 1618 } 1619 sp -= 2; 1620 if (use_double) { 1621 NV dans; 1622 1623 if (!dright) 1624 DIE(aTHX_ "Illegal modulus zero"); 1625 1626 dans = Perl_fmod(dleft, dright); 1627 if ((left_neg != right_neg) && dans) 1628 dans = dright - dans; 1629 if (right_neg) 1630 dans = -dans; 1631 sv_setnv(TARG, dans); 1632 } 1633 else { 1634 UV ans; 1635 1636 if (!right) 1637 DIE(aTHX_ "Illegal modulus zero"); 1638 1639 ans = left % right; 1640 if ((left_neg != right_neg) && ans) 1641 ans = right - ans; 1642 if (right_neg) { 1643 /* XXX may warn: unary minus operator applied to unsigned type */ 1644 /* could change -foo to be (~foo)+1 instead */ 1645 if (ans <= ~((UV)IV_MAX)+1) 1646 sv_setiv(TARG, ~ans+1); 1647 else 1648 sv_setnv(TARG, -(NV)ans); 1649 } 1650 else 1651 sv_setuv(TARG, ans); 1652 } 1653 PUSHTARG; 1654 RETURN; 1655 } 1656 } 1657 1658 PP(pp_repeat) 1659 { 1660 dSP; dATARGET; 1661 IV count; 1662 SV *sv; 1663 bool infnan = FALSE; 1664 const U8 gimme = GIMME_V; 1665 1666 if (gimme == G_LIST && PL_op->op_private & OPpREPEAT_DOLIST) { 1667 /* TODO: think of some way of doing list-repeat overloading ??? */ 1668 sv = POPs; 1669 SvGETMAGIC(sv); 1670 } 1671 else { 1672 if (UNLIKELY(PL_op->op_private & OPpREPEAT_DOLIST)) { 1673 /* The parser saw this as a list repeat, and there 1674 are probably several items on the stack. But we're 1675 in scalar/void context, and there's no pp_list to save us 1676 now. So drop the rest of the items -- robin@kitsite.com 1677 */ 1678 dMARK; 1679 if (MARK + 1 < SP) { 1680 MARK[1] = TOPm1s; 1681 MARK[2] = TOPs; 1682 } 1683 else { 1684 dTOPss; 1685 ASSUME(MARK + 1 == SP); 1686 MEXTEND(SP, 1); 1687 PUSHs(sv); 1688 MARK[1] = &PL_sv_undef; 1689 } 1690 SP = MARK + 2; 1691 } 1692 tryAMAGICbin_MG(repeat_amg, AMGf_assign); 1693 sv = POPs; 1694 } 1695 1696 if (SvIOKp(sv)) { 1697 if (SvUOK(sv)) { 1698 const UV uv = SvUV_nomg(sv); 1699 if (uv > IV_MAX) 1700 count = IV_MAX; /* The best we can do? */ 1701 else 1702 count = uv; 1703 } else { 1704 count = SvIV_nomg(sv); 1705 } 1706 } 1707 else if (SvNOKp(sv)) { 1708 const NV nv = SvNV_nomg(sv); 1709 infnan = Perl_isinfnan(nv); 1710 if (UNLIKELY(infnan)) { 1711 count = 0; 1712 } else { 1713 if (nv < 0.0) 1714 count = -1; /* An arbitrary negative integer */ 1715 else 1716 count = (IV)nv; 1717 } 1718 } 1719 else 1720 count = SvIV_nomg(sv); 1721 1722 if (infnan) { 1723 Perl_ck_warner(aTHX_ packWARN(WARN_NUMERIC), 1724 "Non-finite repeat count does nothing"); 1725 } else if (count < 0) { 1726 count = 0; 1727 Perl_ck_warner(aTHX_ packWARN(WARN_NUMERIC), 1728 "Negative repeat count does nothing"); 1729 } 1730 1731 if (gimme == G_LIST && PL_op->op_private & OPpREPEAT_DOLIST) { 1732 dMARK; 1733 const SSize_t items = SP - MARK; 1734 const U8 mod = PL_op->op_flags & OPf_MOD; 1735 1736 if (count > 1) { 1737 SSize_t max; 1738 1739 if ( items > SSize_t_MAX / count /* max would overflow */ 1740 /* repeatcpy would overflow */ 1741 || items > I32_MAX / (I32)sizeof(SV *) 1742 ) 1743 Perl_croak(aTHX_ "%s","Out of memory during list extend"); 1744 max = items * count; 1745 MEXTEND(MARK, max); 1746 1747 while (SP > MARK) { 1748 if (*SP) { 1749 if (mod && SvPADTMP(*SP)) { 1750 *SP = sv_mortalcopy(*SP); 1751 } 1752 SvTEMP_off((*SP)); 1753 } 1754 SP--; 1755 } 1756 MARK++; 1757 repeatcpy((char*)(MARK + items), (char*)MARK, 1758 items * sizeof(const SV *), count - 1); 1759 SP += max; 1760 } 1761 else if (count <= 0) 1762 SP = MARK; 1763 } 1764 else { /* Note: mark already snarfed by pp_list */ 1765 SV * const tmpstr = POPs; 1766 STRLEN len; 1767 bool isutf; 1768 1769 if (TARG != tmpstr) 1770 sv_setsv_nomg(TARG, tmpstr); 1771 SvPV_force_nomg(TARG, len); 1772 isutf = DO_UTF8(TARG); 1773 if (count != 1) { 1774 if (count < 1) 1775 SvCUR_set(TARG, 0); 1776 else { 1777 STRLEN max; 1778 1779 if ( len > (MEM_SIZE_MAX-1) / (UV)count /* max would overflow */ 1780 || len > (U32)I32_MAX /* repeatcpy would overflow */ 1781 ) 1782 Perl_croak(aTHX_ "%s", 1783 "Out of memory during string extend"); 1784 max = (UV)count * len + 1; 1785 SvGROW(TARG, max); 1786 1787 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1); 1788 SvCUR_set(TARG, SvCUR(TARG) * count); 1789 } 1790 *SvEND(TARG) = '\0'; 1791 } 1792 if (isutf) 1793 (void)SvPOK_only_UTF8(TARG); 1794 else 1795 (void)SvPOK_only(TARG); 1796 1797 PUSHTARG; 1798 } 1799 RETURN; 1800 } 1801 1802 PP(pp_subtract) 1803 { 1804 dSP; dATARGET; bool useleft; SV *svl, *svr; 1805 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric); 1806 svr = TOPs; 1807 svl = TOPm1s; 1808 1809 #ifdef PERL_PRESERVE_IVUV 1810 1811 /* special-case some simple common cases */ 1812 if (!((svl->sv_flags|svr->sv_flags) & (SVf_IVisUV|SVs_GMG))) { 1813 IV il, ir; 1814 U32 flags = (svl->sv_flags & svr->sv_flags); 1815 if (flags & SVf_IOK) { 1816 /* both args are simple IVs */ 1817 UV topl, topr; 1818 il = SvIVX(svl); 1819 ir = SvIVX(svr); 1820 do_iv: 1821 topl = ((UV)il) >> (UVSIZE * 8 - 2); 1822 topr = ((UV)ir) >> (UVSIZE * 8 - 2); 1823 1824 /* if both are in a range that can't under/overflow, do a 1825 * simple integer subtract: if the top of both numbers 1826 * are 00 or 11, then it's safe */ 1827 if (!( ((topl+1) | (topr+1)) & 2)) { 1828 SP--; 1829 TARGi(il - ir, 0); /* args not GMG, so can't be tainted */ 1830 SETs(TARG); 1831 RETURN; 1832 } 1833 goto generic; 1834 } 1835 else if (flags & SVf_NOK) { 1836 /* both args are NVs */ 1837 NV nl = SvNVX(svl); 1838 NV nr = SvNVX(svr); 1839 1840 if (lossless_NV_to_IV(nl, &il) && lossless_NV_to_IV(nr, &ir)) { 1841 /* nothing was lost by converting to IVs */ 1842 goto do_iv; 1843 } 1844 SP--; 1845 TARGn(nl - nr, 0); /* args not GMG, so can't be tainted */ 1846 SETs(TARG); 1847 RETURN; 1848 } 1849 } 1850 1851 generic: 1852 1853 useleft = USE_LEFT(svl); 1854 /* See comments in pp_add (in pp_hot.c) about Overflow, and how 1855 "bad things" happen if you rely on signed integers wrapping. */ 1856 if (SvIV_please_nomg(svr)) { 1857 /* Unless the left argument is integer in range we are going to have to 1858 use NV maths. Hence only attempt to coerce the right argument if 1859 we know the left is integer. */ 1860 UV auv = 0; 1861 bool auvok = FALSE; 1862 bool a_valid = 0; 1863 1864 if (!useleft) { 1865 auv = 0; 1866 a_valid = auvok = 1; 1867 /* left operand is undef, treat as zero. */ 1868 } else { 1869 /* Left operand is defined, so is it IV? */ 1870 if (SvIV_please_nomg(svl)) { 1871 if ((auvok = SvUOK(svl))) 1872 auv = SvUVX(svl); 1873 else { 1874 const IV aiv = SvIVX(svl); 1875 if (aiv >= 0) { 1876 auv = aiv; 1877 auvok = 1; /* Now acting as a sign flag. */ 1878 } else { 1879 auv = (UV) (0 - (UV) aiv); 1880 } 1881 } 1882 a_valid = 1; 1883 } 1884 } 1885 if (a_valid) { 1886 bool result_good = 0; 1887 UV result; 1888 UV buv; 1889 bool buvok = SvUOK(svr); 1890 1891 if (buvok) 1892 buv = SvUVX(svr); 1893 else { 1894 const IV biv = SvIVX(svr); 1895 if (biv >= 0) { 1896 buv = biv; 1897 buvok = 1; 1898 } else 1899 buv = (UV) (0 - (UV) biv); 1900 } 1901 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve, 1902 else "IV" now, independent of how it came in. 1903 if a, b represents positive, A, B negative, a maps to -A etc 1904 a - b => (a - b) 1905 A - b => -(a + b) 1906 a - B => (a + b) 1907 A - B => -(a - b) 1908 all UV maths. negate result if A negative. 1909 subtract if signs same, add if signs differ. */ 1910 1911 if (auvok ^ buvok) { 1912 /* Signs differ. */ 1913 result = auv + buv; 1914 if (result >= auv) 1915 result_good = 1; 1916 } else { 1917 /* Signs same */ 1918 if (auv >= buv) { 1919 result = auv - buv; 1920 /* Must get smaller */ 1921 if (result <= auv) 1922 result_good = 1; 1923 } else { 1924 result = buv - auv; 1925 if (result <= buv) { 1926 /* result really should be -(auv-buv). as its negation 1927 of true value, need to swap our result flag */ 1928 auvok = !auvok; 1929 result_good = 1; 1930 } 1931 } 1932 } 1933 if (result_good) { 1934 SP--; 1935 if (auvok) 1936 SETu( result ); 1937 else { 1938 /* Negate result */ 1939 if (result <= (UV)IV_MIN) 1940 SETi(result == (UV)IV_MIN 1941 ? IV_MIN : -(IV)result); 1942 else { 1943 /* result valid, but out of range for IV. */ 1944 SETn( -(NV)result ); 1945 } 1946 } 1947 RETURN; 1948 } /* Overflow, drop through to NVs. */ 1949 } 1950 } 1951 #else 1952 useleft = USE_LEFT(svl); 1953 #endif 1954 { 1955 NV value = SvNV_nomg(svr); 1956 (void)POPs; 1957 1958 if (!useleft) { 1959 /* left operand is undef, treat as zero - value */ 1960 SETn(-value); 1961 RETURN; 1962 } 1963 SETn( SvNV_nomg(svl) - value ); 1964 RETURN; 1965 } 1966 } 1967 1968 #define IV_BITS (IVSIZE * 8) 1969 1970 /* Taking the right operand of bitwise shift operators, returns an int 1971 * indicating the shift amount clipped to the range [-IV_BITS, +IV_BITS]. 1972 */ 1973 static int 1974 S_shift_amount(pTHX_ SV *const svr) 1975 { 1976 const IV iv = SvIV_nomg(svr); 1977 1978 /* Note that [INT_MIN, INT_MAX] cannot be used as the clipping bound; 1979 * INT_MIN will cause overflow in "shift = -shift;" in S_{iv,uv}_shift. 1980 */ 1981 if (SvIsUV(svr)) 1982 return SvUVX(svr) > IV_BITS ? IV_BITS : (int)SvUVX(svr); 1983 return iv < -IV_BITS ? -IV_BITS : iv > IV_BITS ? IV_BITS : (int)iv; 1984 } 1985 1986 static UV S_uv_shift(UV uv, int shift, bool left) 1987 { 1988 if (shift < 0) { 1989 shift = -shift; 1990 left = !left; 1991 } 1992 if (UNLIKELY(shift >= IV_BITS)) { 1993 return 0; 1994 } 1995 return left ? uv << shift : uv >> shift; 1996 } 1997 1998 static IV S_iv_shift(IV iv, int shift, bool left) 1999 { 2000 if (shift < 0) { 2001 shift = -shift; 2002 left = !left; 2003 } 2004 2005 if (UNLIKELY(shift >= IV_BITS)) { 2006 return iv < 0 && !left ? -1 : 0; 2007 } 2008 2009 /* For left shifts, perl 5 has chosen to treat the value as unsigned for 2010 * the purposes of shifting, then cast back to signed. This is very 2011 * different from Raku: 2012 * 2013 * $ raku -e 'say -2 +< 5' 2014 * -64 2015 * 2016 * $ ./perl -le 'print -2 << 5' 2017 * 18446744073709551552 2018 * */ 2019 if (left) { 2020 return (IV) (((UV) iv) << shift); 2021 } 2022 2023 /* Here is right shift */ 2024 return iv >> shift; 2025 } 2026 2027 #define UV_LEFT_SHIFT(uv, shift) S_uv_shift(uv, shift, TRUE) 2028 #define UV_RIGHT_SHIFT(uv, shift) S_uv_shift(uv, shift, FALSE) 2029 #define IV_LEFT_SHIFT(iv, shift) S_iv_shift(iv, shift, TRUE) 2030 #define IV_RIGHT_SHIFT(iv, shift) S_iv_shift(iv, shift, FALSE) 2031 2032 PP(pp_left_shift) 2033 { 2034 dSP; dATARGET; SV *svl, *svr; 2035 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric); 2036 svr = POPs; 2037 svl = TOPs; 2038 { 2039 const int shift = S_shift_amount(aTHX_ svr); 2040 if (PL_op->op_private & OPpUSEINT) { 2041 SETi(IV_LEFT_SHIFT(SvIV_nomg(svl), shift)); 2042 } 2043 else { 2044 SETu(UV_LEFT_SHIFT(SvUV_nomg(svl), shift)); 2045 } 2046 RETURN; 2047 } 2048 } 2049 2050 PP(pp_right_shift) 2051 { 2052 dSP; dATARGET; SV *svl, *svr; 2053 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric); 2054 svr = POPs; 2055 svl = TOPs; 2056 { 2057 const int shift = S_shift_amount(aTHX_ svr); 2058 if (PL_op->op_private & OPpUSEINT) { 2059 SETi(IV_RIGHT_SHIFT(SvIV_nomg(svl), shift)); 2060 } 2061 else { 2062 SETu(UV_RIGHT_SHIFT(SvUV_nomg(svl), shift)); 2063 } 2064 RETURN; 2065 } 2066 } 2067 2068 PP(pp_lt) 2069 { 2070 dSP; 2071 SV *left, *right; 2072 U32 flags_and, flags_or; 2073 2074 tryAMAGICbin_MG(lt_amg, AMGf_numeric); 2075 right = POPs; 2076 left = TOPs; 2077 flags_and = SvFLAGS(left) & SvFLAGS(right); 2078 flags_or = SvFLAGS(left) | SvFLAGS(right); 2079 2080 SETs(boolSV( 2081 ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) ) 2082 ? (SvIVX(left) < SvIVX(right)) 2083 : (flags_and & SVf_NOK) 2084 ? (SvNVX(left) < SvNVX(right)) 2085 : (do_ncmp(left, right) == -1) 2086 )); 2087 RETURN; 2088 } 2089 2090 PP(pp_gt) 2091 { 2092 dSP; 2093 SV *left, *right; 2094 U32 flags_and, flags_or; 2095 2096 tryAMAGICbin_MG(gt_amg, AMGf_numeric); 2097 right = POPs; 2098 left = TOPs; 2099 flags_and = SvFLAGS(left) & SvFLAGS(right); 2100 flags_or = SvFLAGS(left) | SvFLAGS(right); 2101 2102 SETs(boolSV( 2103 ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) ) 2104 ? (SvIVX(left) > SvIVX(right)) 2105 : (flags_and & SVf_NOK) 2106 ? (SvNVX(left) > SvNVX(right)) 2107 : (do_ncmp(left, right) == 1) 2108 )); 2109 RETURN; 2110 } 2111 2112 PP(pp_le) 2113 { 2114 dSP; 2115 SV *left, *right; 2116 U32 flags_and, flags_or; 2117 2118 tryAMAGICbin_MG(le_amg, AMGf_numeric); 2119 right = POPs; 2120 left = TOPs; 2121 flags_and = SvFLAGS(left) & SvFLAGS(right); 2122 flags_or = SvFLAGS(left) | SvFLAGS(right); 2123 2124 SETs(boolSV( 2125 ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) ) 2126 ? (SvIVX(left) <= SvIVX(right)) 2127 : (flags_and & SVf_NOK) 2128 ? (SvNVX(left) <= SvNVX(right)) 2129 : (do_ncmp(left, right) <= 0) 2130 )); 2131 RETURN; 2132 } 2133 2134 PP(pp_ge) 2135 { 2136 dSP; 2137 SV *left, *right; 2138 U32 flags_and, flags_or; 2139 2140 tryAMAGICbin_MG(ge_amg, AMGf_numeric); 2141 right = POPs; 2142 left = TOPs; 2143 flags_and = SvFLAGS(left) & SvFLAGS(right); 2144 flags_or = SvFLAGS(left) | SvFLAGS(right); 2145 2146 SETs(boolSV( 2147 ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) ) 2148 ? (SvIVX(left) >= SvIVX(right)) 2149 : (flags_and & SVf_NOK) 2150 ? (SvNVX(left) >= SvNVX(right)) 2151 : ( (do_ncmp(left, right) & 2) == 0) 2152 )); 2153 RETURN; 2154 } 2155 2156 PP(pp_ne) 2157 { 2158 dSP; 2159 SV *left, *right; 2160 U32 flags_and, flags_or; 2161 2162 tryAMAGICbin_MG(ne_amg, AMGf_numeric); 2163 right = POPs; 2164 left = TOPs; 2165 flags_and = SvFLAGS(left) & SvFLAGS(right); 2166 flags_or = SvFLAGS(left) | SvFLAGS(right); 2167 2168 SETs(boolSV( 2169 ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) ) 2170 ? (SvIVX(left) != SvIVX(right)) 2171 : (flags_and & SVf_NOK) 2172 ? (SvNVX(left) != SvNVX(right)) 2173 : (do_ncmp(left, right) != 0) 2174 )); 2175 RETURN; 2176 } 2177 2178 /* compare left and right SVs. Returns: 2179 * -1: < 2180 * 0: == 2181 * 1: > 2182 * 2: left or right was a NaN 2183 */ 2184 I32 2185 Perl_do_ncmp(pTHX_ SV* const left, SV * const right) 2186 { 2187 PERL_ARGS_ASSERT_DO_NCMP; 2188 #ifdef PERL_PRESERVE_IVUV 2189 /* Fortunately it seems NaN isn't IOK */ 2190 if (SvIV_please_nomg(right) && SvIV_please_nomg(left)) { 2191 if (!SvUOK(left)) { 2192 const IV leftiv = SvIVX(left); 2193 if (!SvUOK(right)) { 2194 /* ## IV <=> IV ## */ 2195 const IV rightiv = SvIVX(right); 2196 return (leftiv > rightiv) - (leftiv < rightiv); 2197 } 2198 /* ## IV <=> UV ## */ 2199 if (leftiv < 0) 2200 /* As (b) is a UV, it's >=0, so it must be < */ 2201 return -1; 2202 { 2203 const UV rightuv = SvUVX(right); 2204 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv); 2205 } 2206 } 2207 2208 if (SvUOK(right)) { 2209 /* ## UV <=> UV ## */ 2210 const UV leftuv = SvUVX(left); 2211 const UV rightuv = SvUVX(right); 2212 return (leftuv > rightuv) - (leftuv < rightuv); 2213 } 2214 /* ## UV <=> IV ## */ 2215 { 2216 const IV rightiv = SvIVX(right); 2217 if (rightiv < 0) 2218 /* As (a) is a UV, it's >=0, so it cannot be < */ 2219 return 1; 2220 { 2221 const UV leftuv = SvUVX(left); 2222 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv); 2223 } 2224 } 2225 NOT_REACHED; /* NOTREACHED */ 2226 } 2227 #endif 2228 { 2229 NV const rnv = SvNV_nomg(right); 2230 NV const lnv = SvNV_nomg(left); 2231 2232 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) 2233 if (Perl_isnan(lnv) || Perl_isnan(rnv)) { 2234 return 2; 2235 } 2236 return (lnv > rnv) - (lnv < rnv); 2237 #else 2238 if (lnv < rnv) 2239 return -1; 2240 if (lnv > rnv) 2241 return 1; 2242 if (lnv == rnv) 2243 return 0; 2244 return 2; 2245 #endif 2246 } 2247 } 2248 2249 2250 PP(pp_ncmp) 2251 { 2252 dSP; 2253 SV *left, *right; 2254 I32 value; 2255 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric); 2256 right = POPs; 2257 left = TOPs; 2258 value = do_ncmp(left, right); 2259 if (value == 2) { 2260 SETs(&PL_sv_undef); 2261 } 2262 else { 2263 dTARGET; 2264 SETi(value); 2265 } 2266 RETURN; 2267 } 2268 2269 2270 /* also used for: pp_sge() pp_sgt() pp_slt() */ 2271 2272 PP(pp_sle) 2273 { 2274 dSP; 2275 2276 int amg_type = sle_amg; 2277 int multiplier = 1; 2278 int rhs = 1; 2279 2280 switch (PL_op->op_type) { 2281 case OP_SLT: 2282 amg_type = slt_amg; 2283 /* cmp < 0 */ 2284 rhs = 0; 2285 break; 2286 case OP_SGT: 2287 amg_type = sgt_amg; 2288 /* cmp > 0 */ 2289 multiplier = -1; 2290 rhs = 0; 2291 break; 2292 case OP_SGE: 2293 amg_type = sge_amg; 2294 /* cmp >= 0 */ 2295 multiplier = -1; 2296 break; 2297 } 2298 2299 tryAMAGICbin_MG(amg_type, 0); 2300 { 2301 dPOPTOPssrl; 2302 const int cmp = 2303 #ifdef USE_LOCALE_COLLATE 2304 (IN_LC_RUNTIME(LC_COLLATE)) 2305 ? sv_cmp_locale_flags(left, right, 0) 2306 : 2307 #endif 2308 sv_cmp_flags(left, right, 0); 2309 SETs(boolSV(cmp * multiplier < rhs)); 2310 RETURN; 2311 } 2312 } 2313 2314 PP(pp_seq) 2315 { 2316 dSP; 2317 tryAMAGICbin_MG(seq_amg, 0); 2318 { 2319 dPOPTOPssrl; 2320 SETs(boolSV(sv_eq_flags(left, right, 0))); 2321 RETURN; 2322 } 2323 } 2324 2325 PP(pp_sne) 2326 { 2327 dSP; 2328 tryAMAGICbin_MG(sne_amg, 0); 2329 { 2330 dPOPTOPssrl; 2331 SETs(boolSV(!sv_eq_flags(left, right, 0))); 2332 RETURN; 2333 } 2334 } 2335 2336 PP(pp_scmp) 2337 { 2338 dSP; dTARGET; 2339 tryAMAGICbin_MG(scmp_amg, 0); 2340 { 2341 dPOPTOPssrl; 2342 const int cmp = 2343 #ifdef USE_LOCALE_COLLATE 2344 (IN_LC_RUNTIME(LC_COLLATE)) 2345 ? sv_cmp_locale_flags(left, right, 0) 2346 : 2347 #endif 2348 sv_cmp_flags(left, right, 0); 2349 SETi( cmp ); 2350 RETURN; 2351 } 2352 } 2353 2354 PP(pp_bit_and) 2355 { 2356 dSP; dATARGET; 2357 tryAMAGICbin_MG(band_amg, AMGf_assign); 2358 { 2359 dPOPTOPssrl; 2360 if (SvNIOKp(left) || SvNIOKp(right)) { 2361 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left); 2362 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right); 2363 if (PL_op->op_private & OPpUSEINT) { 2364 const IV i = SvIV_nomg(left) & SvIV_nomg(right); 2365 SETi(i); 2366 } 2367 else { 2368 const UV u = SvUV_nomg(left) & SvUV_nomg(right); 2369 SETu(u); 2370 } 2371 if (left_ro_nonnum && left != TARG) SvNIOK_off(left); 2372 if (right_ro_nonnum) SvNIOK_off(right); 2373 } 2374 else { 2375 do_vop(PL_op->op_type, TARG, left, right); 2376 SETTARG; 2377 } 2378 RETURN; 2379 } 2380 } 2381 2382 PP(pp_nbit_and) 2383 { 2384 dSP; 2385 tryAMAGICbin_MG(band_amg, AMGf_assign|AMGf_numarg); 2386 { 2387 dATARGET; dPOPTOPssrl; 2388 if (PL_op->op_private & OPpUSEINT) { 2389 const IV i = SvIV_nomg(left) & SvIV_nomg(right); 2390 SETi(i); 2391 } 2392 else { 2393 const UV u = SvUV_nomg(left) & SvUV_nomg(right); 2394 SETu(u); 2395 } 2396 } 2397 RETURN; 2398 } 2399 2400 PP(pp_sbit_and) 2401 { 2402 dSP; 2403 tryAMAGICbin_MG(sband_amg, AMGf_assign); 2404 { 2405 dATARGET; dPOPTOPssrl; 2406 do_vop(OP_BIT_AND, TARG, left, right); 2407 RETSETTARG; 2408 } 2409 } 2410 2411 /* also used for: pp_bit_xor() */ 2412 2413 PP(pp_bit_or) 2414 { 2415 dSP; dATARGET; 2416 const int op_type = PL_op->op_type; 2417 2418 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign); 2419 { 2420 dPOPTOPssrl; 2421 if (SvNIOKp(left) || SvNIOKp(right)) { 2422 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left); 2423 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right); 2424 if (PL_op->op_private & OPpUSEINT) { 2425 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0); 2426 const IV r = SvIV_nomg(right); 2427 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r); 2428 SETi(result); 2429 } 2430 else { 2431 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0); 2432 const UV r = SvUV_nomg(right); 2433 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r); 2434 SETu(result); 2435 } 2436 if (left_ro_nonnum && left != TARG) SvNIOK_off(left); 2437 if (right_ro_nonnum) SvNIOK_off(right); 2438 } 2439 else { 2440 do_vop(op_type, TARG, left, right); 2441 SETTARG; 2442 } 2443 RETURN; 2444 } 2445 } 2446 2447 /* also used for: pp_nbit_xor() */ 2448 2449 PP(pp_nbit_or) 2450 { 2451 dSP; 2452 const int op_type = PL_op->op_type; 2453 2454 tryAMAGICbin_MG((op_type == OP_NBIT_OR ? bor_amg : bxor_amg), 2455 AMGf_assign|AMGf_numarg); 2456 { 2457 dATARGET; dPOPTOPssrl; 2458 if (PL_op->op_private & OPpUSEINT) { 2459 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0); 2460 const IV r = SvIV_nomg(right); 2461 const IV result = op_type == OP_NBIT_OR ? (l | r) : (l ^ r); 2462 SETi(result); 2463 } 2464 else { 2465 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0); 2466 const UV r = SvUV_nomg(right); 2467 const UV result = op_type == OP_NBIT_OR ? (l | r) : (l ^ r); 2468 SETu(result); 2469 } 2470 } 2471 RETURN; 2472 } 2473 2474 /* also used for: pp_sbit_xor() */ 2475 2476 PP(pp_sbit_or) 2477 { 2478 dSP; 2479 const int op_type = PL_op->op_type; 2480 2481 tryAMAGICbin_MG((op_type == OP_SBIT_OR ? sbor_amg : sbxor_amg), 2482 AMGf_assign); 2483 { 2484 dATARGET; dPOPTOPssrl; 2485 do_vop(op_type == OP_SBIT_OR ? OP_BIT_OR : OP_BIT_XOR, TARG, left, 2486 right); 2487 RETSETTARG; 2488 } 2489 } 2490 2491 PERL_STATIC_INLINE bool 2492 S_negate_string(pTHX) 2493 { 2494 dTARGET; dSP; 2495 STRLEN len; 2496 const char *s; 2497 SV * const sv = TOPs; 2498 if (!SvPOKp(sv) || SvNIOK(sv) || (!SvPOK(sv) && SvNIOKp(sv))) 2499 return FALSE; 2500 s = SvPV_nomg_const(sv, len); 2501 if (isIDFIRST(*s)) { 2502 sv_setpvs(TARG, "-"); 2503 sv_catsv(TARG, sv); 2504 } 2505 else if (*s == '+' || (*s == '-' && !looks_like_number(sv))) { 2506 sv_setsv_nomg(TARG, sv); 2507 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-'; 2508 } 2509 else return FALSE; 2510 SETTARG; 2511 return TRUE; 2512 } 2513 2514 PP(pp_negate) 2515 { 2516 dSP; dTARGET; 2517 tryAMAGICun_MG(neg_amg, AMGf_numeric); 2518 if (S_negate_string(aTHX)) return NORMAL; 2519 { 2520 SV * const sv = TOPs; 2521 2522 if (SvIOK(sv)) { 2523 /* It's publicly an integer */ 2524 oops_its_an_int: 2525 if (SvIsUV(sv)) { 2526 if (SvIVX(sv) == IV_MIN) { 2527 /* 2s complement assumption. */ 2528 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == 2529 IV_MIN */ 2530 return NORMAL; 2531 } 2532 else if (SvUVX(sv) <= IV_MAX) { 2533 SETi(-SvIVX(sv)); 2534 return NORMAL; 2535 } 2536 } 2537 else if (SvIVX(sv) != IV_MIN) { 2538 SETi(-SvIVX(sv)); 2539 return NORMAL; 2540 } 2541 #ifdef PERL_PRESERVE_IVUV 2542 else { 2543 SETu((UV)IV_MIN); 2544 return NORMAL; 2545 } 2546 #endif 2547 } 2548 if (SvNIOKp(sv) && (SvNIOK(sv) || !SvPOK(sv))) 2549 SETn(-SvNV_nomg(sv)); 2550 else if (SvPOKp(sv) && SvIV_please_nomg(sv)) 2551 goto oops_its_an_int; 2552 else 2553 SETn(-SvNV_nomg(sv)); 2554 } 2555 return NORMAL; 2556 } 2557 2558 PP(pp_not) 2559 { 2560 dSP; 2561 SV *sv; 2562 2563 tryAMAGICun_MG(not_amg, 0); 2564 sv = *PL_stack_sp; 2565 *PL_stack_sp = boolSV(!SvTRUE_nomg_NN(sv)); 2566 return NORMAL; 2567 } 2568 2569 static void 2570 S_scomplement(pTHX_ SV *targ, SV *sv) 2571 { 2572 U8 *tmps; 2573 I32 anum; 2574 STRLEN len; 2575 2576 sv_copypv_nomg(TARG, sv); 2577 tmps = (U8*)SvPV_nomg(TARG, len); 2578 2579 if (SvUTF8(TARG)) { 2580 if (len && ! utf8_to_bytes(tmps, &len)) { 2581 Perl_croak(aTHX_ FATAL_ABOVE_FF_MSG, PL_op_desc[PL_op->op_type]); 2582 } 2583 SvCUR_set(TARG, len); 2584 SvUTF8_off(TARG); 2585 } 2586 2587 anum = len; 2588 2589 { 2590 long *tmpl; 2591 for ( ; anum && PTR2nat(tmps) % sizeof(long); anum--, tmps++) 2592 *tmps = ~*tmps; 2593 tmpl = (long*)tmps; 2594 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++) 2595 *tmpl = ~*tmpl; 2596 tmps = (U8*)tmpl; 2597 } 2598 2599 for ( ; anum > 0; anum--, tmps++) 2600 *tmps = ~*tmps; 2601 } 2602 2603 PP(pp_complement) 2604 { 2605 dSP; dTARGET; 2606 tryAMAGICun_MG(compl_amg, AMGf_numeric); 2607 { 2608 dTOPss; 2609 if (SvNIOKp(sv)) { 2610 if (PL_op->op_private & OPpUSEINT) { 2611 const IV i = ~SvIV_nomg(sv); 2612 SETi(i); 2613 } 2614 else { 2615 const UV u = ~SvUV_nomg(sv); 2616 SETu(u); 2617 } 2618 } 2619 else { 2620 S_scomplement(aTHX_ TARG, sv); 2621 SETTARG; 2622 } 2623 return NORMAL; 2624 } 2625 } 2626 2627 PP(pp_ncomplement) 2628 { 2629 dSP; 2630 tryAMAGICun_MG(compl_amg, AMGf_numeric|AMGf_numarg); 2631 { 2632 dTARGET; dTOPss; 2633 if (PL_op->op_private & OPpUSEINT) { 2634 const IV i = ~SvIV_nomg(sv); 2635 SETi(i); 2636 } 2637 else { 2638 const UV u = ~SvUV_nomg(sv); 2639 SETu(u); 2640 } 2641 } 2642 return NORMAL; 2643 } 2644 2645 PP(pp_scomplement) 2646 { 2647 dSP; 2648 tryAMAGICun_MG(scompl_amg, AMGf_numeric); 2649 { 2650 dTARGET; dTOPss; 2651 S_scomplement(aTHX_ TARG, sv); 2652 SETTARG; 2653 return NORMAL; 2654 } 2655 } 2656 2657 /* integer versions of some of the above */ 2658 2659 PP(pp_i_multiply) 2660 { 2661 dSP; dATARGET; 2662 tryAMAGICbin_MG(mult_amg, AMGf_assign); 2663 { 2664 dPOPTOPiirl_nomg; 2665 SETi( left * right ); 2666 RETURN; 2667 } 2668 } 2669 2670 PP(pp_i_divide) 2671 { 2672 IV num; 2673 dSP; dATARGET; 2674 tryAMAGICbin_MG(div_amg, AMGf_assign); 2675 { 2676 dPOPTOPssrl; 2677 IV value = SvIV_nomg(right); 2678 if (value == 0) 2679 DIE(aTHX_ "Illegal division by zero"); 2680 num = SvIV_nomg(left); 2681 2682 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */ 2683 if (value == -1) 2684 value = - num; 2685 else 2686 value = num / value; 2687 SETi(value); 2688 RETURN; 2689 } 2690 } 2691 2692 PP(pp_i_modulo) 2693 { 2694 dSP; dATARGET; 2695 tryAMAGICbin_MG(modulo_amg, AMGf_assign); 2696 { 2697 dPOPTOPiirl_nomg; 2698 if (!right) 2699 DIE(aTHX_ "Illegal modulus zero"); 2700 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */ 2701 if (right == -1) 2702 SETi( 0 ); 2703 else 2704 SETi( left % right ); 2705 RETURN; 2706 } 2707 } 2708 2709 PP(pp_i_add) 2710 { 2711 dSP; dATARGET; 2712 tryAMAGICbin_MG(add_amg, AMGf_assign); 2713 { 2714 dPOPTOPiirl_ul_nomg; 2715 SETi( left + right ); 2716 RETURN; 2717 } 2718 } 2719 2720 PP(pp_i_subtract) 2721 { 2722 dSP; dATARGET; 2723 tryAMAGICbin_MG(subtr_amg, AMGf_assign); 2724 { 2725 dPOPTOPiirl_ul_nomg; 2726 SETi( left - right ); 2727 RETURN; 2728 } 2729 } 2730 2731 PP(pp_i_lt) 2732 { 2733 dSP; 2734 tryAMAGICbin_MG(lt_amg, 0); 2735 { 2736 dPOPTOPiirl_nomg; 2737 SETs(boolSV(left < right)); 2738 RETURN; 2739 } 2740 } 2741 2742 PP(pp_i_gt) 2743 { 2744 dSP; 2745 tryAMAGICbin_MG(gt_amg, 0); 2746 { 2747 dPOPTOPiirl_nomg; 2748 SETs(boolSV(left > right)); 2749 RETURN; 2750 } 2751 } 2752 2753 PP(pp_i_le) 2754 { 2755 dSP; 2756 tryAMAGICbin_MG(le_amg, 0); 2757 { 2758 dPOPTOPiirl_nomg; 2759 SETs(boolSV(left <= right)); 2760 RETURN; 2761 } 2762 } 2763 2764 PP(pp_i_ge) 2765 { 2766 dSP; 2767 tryAMAGICbin_MG(ge_amg, 0); 2768 { 2769 dPOPTOPiirl_nomg; 2770 SETs(boolSV(left >= right)); 2771 RETURN; 2772 } 2773 } 2774 2775 PP(pp_i_eq) 2776 { 2777 dSP; 2778 tryAMAGICbin_MG(eq_amg, 0); 2779 { 2780 dPOPTOPiirl_nomg; 2781 SETs(boolSV(left == right)); 2782 RETURN; 2783 } 2784 } 2785 2786 PP(pp_i_ne) 2787 { 2788 dSP; 2789 tryAMAGICbin_MG(ne_amg, 0); 2790 { 2791 dPOPTOPiirl_nomg; 2792 SETs(boolSV(left != right)); 2793 RETURN; 2794 } 2795 } 2796 2797 PP(pp_i_ncmp) 2798 { 2799 dSP; dTARGET; 2800 tryAMAGICbin_MG(ncmp_amg, 0); 2801 { 2802 dPOPTOPiirl_nomg; 2803 I32 value; 2804 2805 if (left > right) 2806 value = 1; 2807 else if (left < right) 2808 value = -1; 2809 else 2810 value = 0; 2811 SETi(value); 2812 RETURN; 2813 } 2814 } 2815 2816 PP(pp_i_negate) 2817 { 2818 dSP; dTARGET; 2819 tryAMAGICun_MG(neg_amg, 0); 2820 if (S_negate_string(aTHX)) return NORMAL; 2821 { 2822 SV * const sv = TOPs; 2823 IV const i = SvIV_nomg(sv); 2824 SETi(-i); 2825 return NORMAL; 2826 } 2827 } 2828 2829 /* High falutin' math. */ 2830 2831 PP(pp_atan2) 2832 { 2833 dSP; dTARGET; 2834 tryAMAGICbin_MG(atan2_amg, 0); 2835 { 2836 dPOPTOPnnrl_nomg; 2837 SETn(Perl_atan2(left, right)); 2838 RETURN; 2839 } 2840 } 2841 2842 2843 /* also used for: pp_cos() pp_exp() pp_log() pp_sqrt() */ 2844 2845 PP(pp_sin) 2846 { 2847 dSP; dTARGET; 2848 int amg_type = fallback_amg; 2849 const char *neg_report = NULL; 2850 const int op_type = PL_op->op_type; 2851 2852 switch (op_type) { 2853 case OP_SIN: amg_type = sin_amg; break; 2854 case OP_COS: amg_type = cos_amg; break; 2855 case OP_EXP: amg_type = exp_amg; break; 2856 case OP_LOG: amg_type = log_amg; neg_report = "log"; break; 2857 case OP_SQRT: amg_type = sqrt_amg; neg_report = "sqrt"; break; 2858 } 2859 2860 assert(amg_type != fallback_amg); 2861 2862 tryAMAGICun_MG(amg_type, 0); 2863 { 2864 SV * const arg = TOPs; 2865 const NV value = SvNV_nomg(arg); 2866 #ifdef NV_NAN 2867 NV result = NV_NAN; 2868 #else 2869 NV result = 0.0; 2870 #endif 2871 if (neg_report) { /* log or sqrt */ 2872 if ( 2873 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) 2874 ! Perl_isnan(value) && 2875 #endif 2876 (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0))) { 2877 SET_NUMERIC_STANDARD(); 2878 /* diag_listed_as: Can't take log of %g */ 2879 DIE(aTHX_ "Can't take %s of %" NVgf, neg_report, value); 2880 } 2881 } 2882 switch (op_type) { 2883 default: 2884 case OP_SIN: result = Perl_sin(value); break; 2885 case OP_COS: result = Perl_cos(value); break; 2886 case OP_EXP: result = Perl_exp(value); break; 2887 case OP_LOG: result = Perl_log(value); break; 2888 case OP_SQRT: result = Perl_sqrt(value); break; 2889 } 2890 SETn(result); 2891 return NORMAL; 2892 } 2893 } 2894 2895 /* Support Configure command-line overrides for rand() functions. 2896 After 5.005, perhaps we should replace this by Configure support 2897 for drand48(), random(), or rand(). For 5.005, though, maintain 2898 compatibility by calling rand() but allow the user to override it. 2899 See INSTALL for details. --Andy Dougherty 15 July 1998 2900 */ 2901 /* Now it's after 5.005, and Configure supports drand48() and random(), 2902 in addition to rand(). So the overrides should not be needed any more. 2903 --Jarkko Hietaniemi 27 September 1998 2904 */ 2905 2906 PP(pp_rand) 2907 { 2908 if (!PL_srand_called) { 2909 (void)seedDrand01((Rand_seed_t)seed()); 2910 PL_srand_called = TRUE; 2911 } 2912 { 2913 dSP; 2914 NV value; 2915 2916 if (MAXARG < 1) 2917 { 2918 EXTEND(SP, 1); 2919 value = 1.0; 2920 } 2921 else { 2922 SV * const sv = POPs; 2923 if(!sv) 2924 value = 1.0; 2925 else 2926 value = SvNV(sv); 2927 } 2928 /* 1 of 2 things can be carried through SvNV, SP or TARG, SP was carried */ 2929 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) 2930 if (! Perl_isnan(value) && value == 0.0) 2931 #else 2932 if (value == 0.0) 2933 #endif 2934 value = 1.0; 2935 { 2936 dTARGET; 2937 PUSHs(TARG); 2938 PUTBACK; 2939 value *= Drand01(); 2940 sv_setnv_mg(TARG, value); 2941 } 2942 } 2943 return NORMAL; 2944 } 2945 2946 PP(pp_srand) 2947 { 2948 dSP; dTARGET; 2949 UV anum; 2950 2951 if (MAXARG >= 1 && (TOPs || POPs)) { 2952 SV *top; 2953 char *pv; 2954 STRLEN len; 2955 int flags; 2956 2957 top = POPs; 2958 pv = SvPV(top, len); 2959 flags = grok_number(pv, len, &anum); 2960 2961 if (!(flags & IS_NUMBER_IN_UV)) { 2962 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 2963 "Integer overflow in srand"); 2964 anum = UV_MAX; 2965 } 2966 (void)srand48_deterministic((Rand_seed_t)anum); 2967 } 2968 else { 2969 anum = seed(); 2970 (void)seedDrand01((Rand_seed_t)anum); 2971 } 2972 2973 PL_srand_called = TRUE; 2974 if (anum) 2975 XPUSHu(anum); 2976 else { 2977 /* Historically srand always returned true. We can avoid breaking 2978 that like this: */ 2979 sv_setpvs(TARG, "0 but true"); 2980 XPUSHTARG; 2981 } 2982 RETURN; 2983 } 2984 2985 PP(pp_int) 2986 { 2987 dSP; dTARGET; 2988 tryAMAGICun_MG(int_amg, AMGf_numeric); 2989 { 2990 SV * const sv = TOPs; 2991 const IV iv = SvIV_nomg(sv); 2992 /* XXX it's arguable that compiler casting to IV might be subtly 2993 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which 2994 else preferring IV has introduced a subtle behaviour change bug. OTOH 2995 relying on floating point to be accurate is a bug. */ 2996 2997 if (!SvOK(sv)) { 2998 SETu(0); 2999 } 3000 else if (SvIOK(sv)) { 3001 if (SvIsUV(sv)) 3002 SETu(SvUV_nomg(sv)); 3003 else 3004 SETi(iv); 3005 } 3006 else { 3007 const NV value = SvNV_nomg(sv); 3008 if (UNLIKELY(Perl_isinfnan(value))) 3009 SETn(value); 3010 else if (value >= 0.0) { 3011 if (value < (NV)UV_MAX + 0.5) { 3012 SETu(U_V(value)); 3013 } else { 3014 SETn(Perl_floor(value)); 3015 } 3016 } 3017 else { 3018 if (value > (NV)IV_MIN - 0.5) { 3019 SETi(I_V(value)); 3020 } else { 3021 SETn(Perl_ceil(value)); 3022 } 3023 } 3024 } 3025 } 3026 return NORMAL; 3027 } 3028 3029 PP(pp_abs) 3030 { 3031 dSP; dTARGET; 3032 tryAMAGICun_MG(abs_amg, AMGf_numeric); 3033 { 3034 SV * const sv = TOPs; 3035 /* This will cache the NV value if string isn't actually integer */ 3036 const IV iv = SvIV_nomg(sv); 3037 UV uv; 3038 3039 if (!SvOK(sv)) { 3040 uv = 0; 3041 goto set_uv; 3042 } 3043 else if (SvIOK(sv)) { 3044 /* IVX is precise */ 3045 if (SvIsUV(sv)) { 3046 uv = SvUVX(sv); /* force it to be numeric only */ 3047 } else { 3048 if (iv >= 0) { 3049 uv = (UV)iv; 3050 } else { 3051 /* "(UV)-(iv + 1) + 1" below is mathematically "-iv", but 3052 transformed so that every subexpression will never trigger 3053 overflows even on 2's complement representation (note that 3054 iv is always < 0 here), and modern compilers could optimize 3055 this to a single negation. */ 3056 uv = (UV)-(iv + 1) + 1; 3057 } 3058 } 3059 set_uv: 3060 SETu(uv); 3061 } else{ 3062 const NV value = SvNV_nomg(sv); 3063 SETn(Perl_fabs(value)); 3064 } 3065 } 3066 return NORMAL; 3067 } 3068 3069 3070 /* also used for: pp_hex() */ 3071 3072 PP(pp_oct) 3073 { 3074 dSP; dTARGET; 3075 const char *tmps; 3076 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES; 3077 STRLEN len; 3078 NV result_nv; 3079 UV result_uv; 3080 SV* const sv = TOPs; 3081 3082 tmps = (SvPV_const(sv, len)); 3083 if (DO_UTF8(sv)) { 3084 /* If Unicode, try to downgrade 3085 * If not possible, croak. */ 3086 SV* const tsv = sv_2mortal(newSVsv(sv)); 3087 3088 SvUTF8_on(tsv); 3089 sv_utf8_downgrade(tsv, FALSE); 3090 tmps = SvPV_const(tsv, len); 3091 } 3092 if (PL_op->op_type == OP_HEX) 3093 goto hex; 3094 3095 while (*tmps && len && isSPACE(*tmps)) 3096 tmps++, len--; 3097 if (*tmps == '0') 3098 tmps++, len--; 3099 if (isALPHA_FOLD_EQ(*tmps, 'x')) { 3100 tmps++, len--; 3101 flags |= PERL_SCAN_DISALLOW_PREFIX; 3102 hex: 3103 result_uv = grok_hex (tmps, &len, &flags, &result_nv); 3104 } 3105 else if (isALPHA_FOLD_EQ(*tmps, 'b')) { 3106 tmps++, len--; 3107 flags |= PERL_SCAN_DISALLOW_PREFIX; 3108 result_uv = grok_bin (tmps, &len, &flags, &result_nv); 3109 } 3110 else { 3111 if (isALPHA_FOLD_EQ(*tmps, 'o')) { 3112 tmps++, len--; 3113 } 3114 result_uv = grok_oct (tmps, &len, &flags, &result_nv); 3115 } 3116 3117 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) { 3118 SETn(result_nv); 3119 } 3120 else { 3121 SETu(result_uv); 3122 } 3123 return NORMAL; 3124 } 3125 3126 /* String stuff. */ 3127 3128 3129 PP(pp_length) 3130 { 3131 dSP; dTARGET; 3132 SV * const sv = TOPs; 3133 3134 U32 in_bytes = IN_BYTES; 3135 /* Simplest case shortcut: 3136 * set svflags to just the SVf_POK|SVs_GMG|SVf_UTF8 from the SV, 3137 * with the SVf_UTF8 flag inverted if under 'use bytes' (HINT_BYTES 3138 * set) 3139 */ 3140 U32 svflags = (SvFLAGS(sv) ^ (in_bytes << 26)) & (SVf_POK|SVs_GMG|SVf_UTF8); 3141 3142 STATIC_ASSERT_STMT(SVf_UTF8 == (HINT_BYTES << 26)); 3143 SETs(TARG); 3144 3145 if (LIKELY(svflags == SVf_POK)) 3146 goto simple_pv; 3147 3148 if (svflags & SVs_GMG) 3149 mg_get(sv); 3150 3151 if (SvOK(sv)) { 3152 STRLEN len; 3153 if (!IN_BYTES) { /* reread to avoid using an C auto/register */ 3154 if ((SvFLAGS(sv) & (SVf_POK|SVf_UTF8)) == SVf_POK) 3155 goto simple_pv; 3156 if ( SvPOK(sv) && (PL_op->op_private & OPpTRUEBOOL)) { 3157 /* no need to convert from bytes to chars */ 3158 len = SvCUR(sv); 3159 goto return_bool; 3160 } 3161 len = sv_len_utf8_nomg(sv); 3162 } 3163 else { 3164 /* unrolled SvPV_nomg_const(sv,len) */ 3165 if (SvPOK_nog(sv)) { 3166 simple_pv: 3167 len = SvCUR(sv); 3168 if (PL_op->op_private & OPpTRUEBOOL) { 3169 return_bool: 3170 SETs(len ? &PL_sv_yes : &PL_sv_zero); 3171 return NORMAL; 3172 } 3173 } 3174 else { 3175 (void)sv_2pv_flags(sv, &len, 0|SV_CONST_RETURN); 3176 } 3177 } 3178 TARGi((IV)(len), 1); 3179 } 3180 else { 3181 if (!SvPADTMP(TARG)) { 3182 /* OPpTARGET_MY: targ is var in '$lex = length()' */ 3183 sv_set_undef(TARG); 3184 SvSETMAGIC(TARG); 3185 } 3186 else 3187 /* TARG is on stack at this point and is overwriten by SETs. 3188 * This branch is the odd one out, so put TARG by default on 3189 * stack earlier to let local SP go out of liveness sooner */ 3190 SETs(&PL_sv_undef); 3191 } 3192 return NORMAL; /* no putback, SP didn't move in this opcode */ 3193 } 3194 3195 3196 /* Returns false if substring is completely outside original string. 3197 No length is indicated by len_iv = 0 and len_is_uv = 0. len_is_uv must 3198 always be true for an explicit 0. 3199 */ 3200 bool 3201 Perl_translate_substr_offsets( STRLEN curlen, IV pos1_iv, 3202 bool pos1_is_uv, IV len_iv, 3203 bool len_is_uv, STRLEN *posp, 3204 STRLEN *lenp) 3205 { 3206 IV pos2_iv; 3207 int pos2_is_uv; 3208 3209 PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS; 3210 3211 if (!pos1_is_uv && pos1_iv < 0 && curlen) { 3212 pos1_is_uv = curlen-1 > ~(UV)pos1_iv; 3213 pos1_iv += curlen; 3214 } 3215 if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen) 3216 return FALSE; 3217 3218 if (len_iv || len_is_uv) { 3219 if (!len_is_uv && len_iv < 0) { 3220 pos2_iv = curlen + len_iv; 3221 if (curlen) 3222 pos2_is_uv = curlen-1 > ~(UV)len_iv; 3223 else 3224 pos2_is_uv = 0; 3225 } else { /* len_iv >= 0 */ 3226 if (!pos1_is_uv && pos1_iv < 0) { 3227 pos2_iv = pos1_iv + len_iv; 3228 pos2_is_uv = (UV)len_iv > (UV)IV_MAX; 3229 } else { 3230 if ((UV)len_iv > curlen-(UV)pos1_iv) 3231 pos2_iv = curlen; 3232 else 3233 pos2_iv = pos1_iv+len_iv; 3234 pos2_is_uv = 1; 3235 } 3236 } 3237 } 3238 else { 3239 pos2_iv = curlen; 3240 pos2_is_uv = 1; 3241 } 3242 3243 if (!pos2_is_uv && pos2_iv < 0) { 3244 if (!pos1_is_uv && pos1_iv < 0) 3245 return FALSE; 3246 pos2_iv = 0; 3247 } 3248 else if (!pos1_is_uv && pos1_iv < 0) 3249 pos1_iv = 0; 3250 3251 if ((UV)pos2_iv < (UV)pos1_iv) 3252 pos2_iv = pos1_iv; 3253 if ((UV)pos2_iv > curlen) 3254 pos2_iv = curlen; 3255 3256 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */ 3257 *posp = (STRLEN)( (UV)pos1_iv ); 3258 *lenp = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv ); 3259 3260 return TRUE; 3261 } 3262 3263 PP(pp_substr) 3264 { 3265 dSP; dTARGET; 3266 SV *sv; 3267 STRLEN curlen; 3268 STRLEN utf8_curlen; 3269 SV * pos_sv; 3270 IV pos1_iv; 3271 int pos1_is_uv; 3272 SV * len_sv; 3273 IV len_iv = 0; 3274 int len_is_uv = 0; 3275 I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; 3276 const bool rvalue = (GIMME_V != G_VOID); 3277 const char *tmps; 3278 SV *repl_sv = NULL; 3279 const char *repl = NULL; 3280 STRLEN repl_len; 3281 int num_args = PL_op->op_private & 7; 3282 bool repl_need_utf8_upgrade = FALSE; 3283 3284 if (num_args > 2) { 3285 if (num_args > 3) { 3286 if(!(repl_sv = POPs)) num_args--; 3287 } 3288 if ((len_sv = POPs)) { 3289 len_iv = SvIV(len_sv); 3290 len_is_uv = len_iv ? SvIOK_UV(len_sv) : 1; 3291 } 3292 else num_args--; 3293 } 3294 pos_sv = POPs; 3295 pos1_iv = SvIV(pos_sv); 3296 pos1_is_uv = SvIOK_UV(pos_sv); 3297 sv = POPs; 3298 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST) { 3299 assert(!repl_sv); 3300 repl_sv = POPs; 3301 } 3302 if (lvalue && !repl_sv) { 3303 SV * ret; 3304 ret = newSV_type_mortal(SVt_PVLV); /* Not TARG RT#67838 */ 3305 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0); 3306 LvTYPE(ret) = 'x'; 3307 LvTARG(ret) = SvREFCNT_inc_simple(sv); 3308 LvTARGOFF(ret) = 3309 pos1_is_uv || pos1_iv >= 0 3310 ? (STRLEN)(UV)pos1_iv 3311 : (LvFLAGS(ret) |= LVf_NEG_OFF, (STRLEN)(UV)-pos1_iv); 3312 LvTARGLEN(ret) = 3313 len_is_uv || len_iv > 0 3314 ? (STRLEN)(UV)len_iv 3315 : (LvFLAGS(ret) |= LVf_NEG_LEN, (STRLEN)(UV)-len_iv); 3316 3317 PUSHs(ret); /* avoid SvSETMAGIC here */ 3318 RETURN; 3319 } 3320 if (repl_sv) { 3321 repl = SvPV_const(repl_sv, repl_len); 3322 SvGETMAGIC(sv); 3323 if (SvROK(sv)) 3324 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), 3325 "Attempt to use reference as lvalue in substr" 3326 ); 3327 tmps = SvPV_force_nomg(sv, curlen); 3328 if (DO_UTF8(repl_sv) && repl_len) { 3329 if (!DO_UTF8(sv)) { 3330 /* Upgrade the dest, and recalculate tmps in case the buffer 3331 * got reallocated; curlen may also have been changed */ 3332 sv_utf8_upgrade_nomg(sv); 3333 tmps = SvPV_nomg(sv, curlen); 3334 } 3335 } 3336 else if (DO_UTF8(sv)) 3337 repl_need_utf8_upgrade = TRUE; 3338 } 3339 else tmps = SvPV_const(sv, curlen); 3340 if (DO_UTF8(sv)) { 3341 utf8_curlen = sv_or_pv_len_utf8(sv, tmps, curlen); 3342 if (utf8_curlen == curlen) 3343 utf8_curlen = 0; 3344 else 3345 curlen = utf8_curlen; 3346 } 3347 else 3348 utf8_curlen = 0; 3349 3350 { 3351 STRLEN pos, len, byte_len, byte_pos; 3352 3353 if (!translate_substr_offsets( 3354 curlen, pos1_iv, pos1_is_uv, len_iv, len_is_uv, &pos, &len 3355 )) goto bound_fail; 3356 3357 byte_len = len; 3358 byte_pos = utf8_curlen 3359 ? sv_or_pv_pos_u2b(sv, tmps, pos, &byte_len) : pos; 3360 3361 tmps += byte_pos; 3362 3363 if (rvalue) { 3364 SvTAINTED_off(TARG); /* decontaminate */ 3365 SvUTF8_off(TARG); /* decontaminate */ 3366 sv_setpvn(TARG, tmps, byte_len); 3367 #ifdef USE_LOCALE_COLLATE 3368 sv_unmagic(TARG, PERL_MAGIC_collxfrm); 3369 #endif 3370 if (utf8_curlen) 3371 SvUTF8_on(TARG); 3372 } 3373 3374 if (repl) { 3375 SV* repl_sv_copy = NULL; 3376 3377 if (repl_need_utf8_upgrade) { 3378 repl_sv_copy = newSVsv(repl_sv); 3379 sv_utf8_upgrade(repl_sv_copy); 3380 repl = SvPV_const(repl_sv_copy, repl_len); 3381 } 3382 if (!SvOK(sv)) 3383 SvPVCLEAR(sv); 3384 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0); 3385 SvREFCNT_dec(repl_sv_copy); 3386 } 3387 } 3388 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST) 3389 SP++; 3390 else if (rvalue) { 3391 SvSETMAGIC(TARG); 3392 PUSHs(TARG); 3393 } 3394 RETURN; 3395 3396 bound_fail: 3397 if (repl) 3398 Perl_croak(aTHX_ "substr outside of string"); 3399 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string"); 3400 RETPUSHUNDEF; 3401 } 3402 3403 PP(pp_vec) 3404 { 3405 dSP; 3406 const IV size = POPi; 3407 SV* offsetsv = POPs; 3408 SV * const src = POPs; 3409 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; 3410 SV * ret; 3411 UV retuv; 3412 STRLEN offset = 0; 3413 char errflags = 0; 3414 3415 /* extract a STRLEN-ranged integer value from offsetsv into offset, 3416 * or flag that its out of range */ 3417 { 3418 IV iv = SvIV(offsetsv); 3419 3420 /* avoid a large UV being wrapped to a negative value */ 3421 if (SvIOK_UV(offsetsv) && SvUVX(offsetsv) > (UV)IV_MAX) 3422 errflags = LVf_OUT_OF_RANGE; 3423 else if (iv < 0) 3424 errflags = (LVf_NEG_OFF|LVf_OUT_OF_RANGE); 3425 #if PTRSIZE < IVSIZE 3426 else if (iv > Size_t_MAX) 3427 errflags = LVf_OUT_OF_RANGE; 3428 #endif 3429 else 3430 offset = (STRLEN)iv; 3431 } 3432 3433 retuv = errflags ? 0 : do_vecget(src, offset, size); 3434 3435 if (lvalue) { /* it's an lvalue! */ 3436 ret = newSV_type_mortal(SVt_PVLV); /* Not TARG RT#67838 */ 3437 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0); 3438 LvTYPE(ret) = 'v'; 3439 LvTARG(ret) = SvREFCNT_inc_simple(src); 3440 LvTARGOFF(ret) = offset; 3441 LvTARGLEN(ret) = size; 3442 LvFLAGS(ret) = errflags; 3443 } 3444 else { 3445 dTARGET; 3446 SvTAINTED_off(TARG); /* decontaminate */ 3447 ret = TARG; 3448 } 3449 3450 sv_setuv(ret, retuv); 3451 if (!lvalue) 3452 SvSETMAGIC(ret); 3453 PUSHs(ret); 3454 RETURN; 3455 } 3456 3457 3458 /* also used for: pp_rindex() */ 3459 3460 PP(pp_index) 3461 { 3462 dSP; dTARGET; 3463 SV *big; 3464 SV *little; 3465 SV *temp = NULL; 3466 STRLEN biglen; 3467 STRLEN llen = 0; 3468 SSize_t offset = 0; 3469 SSize_t retval; 3470 const char *big_p; 3471 const char *little_p; 3472 bool big_utf8; 3473 bool little_utf8; 3474 const bool is_index = PL_op->op_type == OP_INDEX; 3475 const bool threeargs = MAXARG >= 3 && (TOPs || ((void)POPs,0)); 3476 3477 if (threeargs) 3478 offset = POPi; 3479 little = POPs; 3480 big = POPs; 3481 big_p = SvPV_const(big, biglen); 3482 little_p = SvPV_const(little, llen); 3483 3484 big_utf8 = DO_UTF8(big); 3485 little_utf8 = DO_UTF8(little); 3486 if (big_utf8 ^ little_utf8) { 3487 /* One needs to be upgraded. */ 3488 if (little_utf8) { 3489 /* Well, maybe instead we might be able to downgrade the small 3490 string? */ 3491 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen, 3492 &little_utf8); 3493 if (little_utf8) { 3494 /* If the large string is ISO-8859-1, and it's not possible to 3495 convert the small string to ISO-8859-1, then there is no 3496 way that it could be found anywhere by index. */ 3497 retval = -1; 3498 goto push_result; 3499 } 3500 3501 /* At this point, pv is a malloc()ed string. So donate it to temp 3502 to ensure it will get free()d */ 3503 little = temp = newSV_type(SVt_NULL); 3504 sv_usepvn(temp, pv, llen); 3505 little_p = SvPVX(little); 3506 } else { 3507 temp = newSVpvn(little_p, llen); 3508 3509 sv_utf8_upgrade(temp); 3510 little = temp; 3511 little_p = SvPV_const(little, llen); 3512 } 3513 } 3514 if (SvGAMAGIC(big)) { 3515 /* Life just becomes a lot easier if I use a temporary here. 3516 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously) 3517 will trigger magic and overloading again, as will fbm_instr() 3518 */ 3519 big = newSVpvn_flags(big_p, biglen, 3520 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0)); 3521 big_p = SvPVX(big); 3522 } 3523 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) { 3524 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will 3525 warn on undef, and we've already triggered a warning with the 3526 SvPV_const some lines above. We can't remove that, as we need to 3527 call some SvPV to trigger overloading early and find out if the 3528 string is UTF-8. 3529 This is all getting too messy. The API isn't quite clean enough, 3530 because data access has side effects. 3531 */ 3532 little = newSVpvn_flags(little_p, llen, 3533 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0)); 3534 little_p = SvPVX(little); 3535 } 3536 3537 if (!threeargs) 3538 offset = is_index ? 0 : biglen; 3539 else { 3540 if (big_utf8 && offset > 0) 3541 offset = sv_pos_u2b_flags(big, offset, 0, SV_CONST_RETURN); 3542 if (!is_index) 3543 offset += llen; 3544 } 3545 if (offset < 0) 3546 offset = 0; 3547 else if (offset > (SSize_t)biglen) 3548 offset = biglen; 3549 if (!(little_p = is_index 3550 ? fbm_instr((unsigned char*)big_p + offset, 3551 (unsigned char*)big_p + biglen, little, 0) 3552 : rninstr(big_p, big_p + offset, 3553 little_p, little_p + llen))) 3554 retval = -1; 3555 else { 3556 retval = little_p - big_p; 3557 if (retval > 1 && big_utf8) 3558 retval = sv_pos_b2u_flags(big, retval, SV_CONST_RETURN); 3559 } 3560 SvREFCNT_dec(temp); 3561 3562 push_result: 3563 /* OPpTRUEBOOL indicates an '== -1' has been optimised away */ 3564 if (PL_op->op_private & OPpTRUEBOOL) { 3565 SV *result = ((retval != -1) ^ cBOOL(PL_op->op_private & OPpINDEX_BOOLNEG)) 3566 ? &PL_sv_yes : &PL_sv_no; 3567 if (PL_op->op_private & OPpTARGET_MY) { 3568 /* $lex = (index() == -1) */ 3569 sv_setsv_mg(TARG, result); 3570 PUSHs(TARG); 3571 } 3572 else { 3573 PUSHs(result); 3574 } 3575 } 3576 else 3577 PUSHi(retval); 3578 RETURN; 3579 } 3580 3581 PP(pp_sprintf) 3582 { 3583 dSP; dMARK; dORIGMARK; dTARGET; 3584 SvTAINTED_off(TARG); 3585 do_sprintf(TARG, SP-MARK, MARK+1); 3586 TAINT_IF(SvTAINTED(TARG)); 3587 SP = ORIGMARK; 3588 PUSHTARG; 3589 RETURN; 3590 } 3591 3592 PP(pp_ord) 3593 { 3594 dSP; dTARGET; 3595 3596 SV *argsv = TOPs; 3597 STRLEN len; 3598 const U8 *s = (U8*)SvPV_const(argsv, len); 3599 3600 SETu(DO_UTF8(argsv) 3601 ? (len ? utf8n_to_uvchr(s, len, 0, UTF8_ALLOW_ANYUV) : 0) 3602 : (UV)(*s)); 3603 3604 return NORMAL; 3605 } 3606 3607 PP(pp_chr) 3608 { 3609 dSP; dTARGET; 3610 char *tmps; 3611 UV value; 3612 SV *top = TOPs; 3613 3614 SvGETMAGIC(top); 3615 if (UNLIKELY(SvAMAGIC(top))) 3616 top = sv_2num(top); 3617 if (UNLIKELY(isinfnansv(top))) 3618 Perl_croak(aTHX_ "Cannot chr %" NVgf, SvNV(top)); 3619 else { 3620 if (!IN_BYTES /* under bytes, chr(-1) eq chr(0xff), etc. */ 3621 && ((SvIOKp(top) && !SvIsUV(top) && SvIV_nomg(top) < 0) 3622 || 3623 ((SvNOKp(top) || (SvOK(top) && !SvIsUV(top))) 3624 && SvNV_nomg(top) < 0.0))) 3625 { 3626 if (ckWARN(WARN_UTF8)) { 3627 if (SvGMAGICAL(top)) { 3628 SV *top2 = sv_newmortal(); 3629 sv_setsv_nomg(top2, top); 3630 top = top2; 3631 } 3632 Perl_warner(aTHX_ packWARN(WARN_UTF8), 3633 "Invalid negative number (%" SVf ") in chr", SVfARG(top)); 3634 } 3635 value = UNICODE_REPLACEMENT; 3636 } else { 3637 value = SvUV_nomg(top); 3638 } 3639 } 3640 3641 SvUPGRADE(TARG,SVt_PV); 3642 3643 if (value > 255 && !IN_BYTES) { 3644 SvGROW(TARG, (STRLEN)UVCHR_SKIP(value)+1); 3645 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0); 3646 SvCUR_set(TARG, tmps - SvPVX_const(TARG)); 3647 *tmps = '\0'; 3648 (void)SvPOK_only(TARG); 3649 SvUTF8_on(TARG); 3650 SETTARG; 3651 return NORMAL; 3652 } 3653 3654 SvGROW(TARG,2); 3655 SvCUR_set(TARG, 1); 3656 tmps = SvPVX(TARG); 3657 *tmps++ = (char)value; 3658 *tmps = '\0'; 3659 (void)SvPOK_only(TARG); 3660 3661 SETTARG; 3662 return NORMAL; 3663 } 3664 3665 PP(pp_crypt) 3666 { 3667 #ifdef HAS_CRYPT 3668 dSP; dTARGET; 3669 dPOPTOPssrl; 3670 STRLEN len; 3671 const char *tmps = SvPV_const(left, len); 3672 3673 if (DO_UTF8(left)) { 3674 /* If Unicode, try to downgrade. 3675 * If not possible, croak. 3676 * Yes, we made this up. */ 3677 SV* const tsv = newSVpvn_flags(tmps, len, SVf_UTF8|SVs_TEMP); 3678 3679 sv_utf8_downgrade(tsv, FALSE); 3680 tmps = SvPV_const(tsv, len); 3681 } 3682 # ifdef USE_ITHREADS 3683 # ifdef HAS_CRYPT_R 3684 if (!PL_reentrant_buffer->_crypt_struct_buffer) { 3685 /* This should be threadsafe because in ithreads there is only 3686 * one thread per interpreter. If this would not be true, 3687 * we would need a mutex to protect this malloc. */ 3688 PL_reentrant_buffer->_crypt_struct_buffer = 3689 (struct crypt_data *)safemalloc(sizeof(struct crypt_data)); 3690 # if defined(__GLIBC__) || defined(__EMX__) 3691 if (PL_reentrant_buffer->_crypt_struct_buffer) { 3692 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0; 3693 } 3694 # endif 3695 } 3696 # endif /* HAS_CRYPT_R */ 3697 # endif /* USE_ITHREADS */ 3698 3699 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right))); 3700 3701 SvUTF8_off(TARG); 3702 SETTARG; 3703 RETURN; 3704 #else 3705 DIE(aTHX_ 3706 "The crypt() function is unimplemented due to excessive paranoia."); 3707 #endif 3708 } 3709 3710 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So 3711 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */ 3712 3713 3714 /* also used for: pp_lcfirst() */ 3715 3716 PP(pp_ucfirst) 3717 { 3718 /* Actually is both lcfirst() and ucfirst(). Only the first character 3719 * changes. This means that possibly we can change in-place, ie., just 3720 * take the source and change that one character and store it back, but not 3721 * if read-only etc, or if the length changes */ 3722 3723 dSP; 3724 SV *source = TOPs; 3725 STRLEN slen; /* slen is the byte length of the whole SV. */ 3726 STRLEN need; 3727 SV *dest; 3728 bool inplace; /* ? Convert first char only, in-place */ 3729 bool doing_utf8 = FALSE; /* ? using utf8 */ 3730 bool convert_source_to_utf8 = FALSE; /* ? need to convert */ 3731 const int op_type = PL_op->op_type; 3732 const U8 *s; 3733 U8 *d; 3734 U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; 3735 STRLEN ulen; /* ulen is the byte length of the original Unicode character 3736 * stored as UTF-8 at s. */ 3737 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or 3738 * lowercased) character stored in tmpbuf. May be either 3739 * UTF-8 or not, but in either case is the number of bytes */ 3740 bool remove_dot_above = FALSE; 3741 3742 s = (const U8*)SvPV_const(source, slen); 3743 3744 /* We may be able to get away with changing only the first character, in 3745 * place, but not if read-only, etc. Later we may discover more reasons to 3746 * not convert in-place. */ 3747 inplace = !SvREADONLY(source) && SvPADTMP(source); 3748 3749 #ifdef USE_LOCALE_CTYPE 3750 3751 if (IN_LC_RUNTIME(LC_CTYPE)) { 3752 _CHECK_AND_WARN_PROBLEMATIC_LOCALE; 3753 } 3754 3755 #endif 3756 3757 /* First calculate what the changed first character should be. This affects 3758 * whether we can just swap it out, leaving the rest of the string unchanged, 3759 * or even if have to convert the dest to UTF-8 when the source isn't */ 3760 3761 if (! slen) { /* If empty */ 3762 need = 1; /* still need a trailing NUL */ 3763 ulen = 0; 3764 *tmpbuf = '\0'; 3765 } 3766 else if (DO_UTF8(source)) { /* Is the source utf8? */ 3767 doing_utf8 = TRUE; 3768 ulen = UTF8SKIP(s); 3769 3770 if (op_type == OP_UCFIRST) { 3771 #ifdef USE_LOCALE_CTYPE 3772 _toTITLE_utf8_flags(s, s +slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE)); 3773 #else 3774 _toTITLE_utf8_flags(s, s +slen, tmpbuf, &tculen, 0); 3775 #endif 3776 } 3777 else { 3778 3779 #ifdef USE_LOCALE_CTYPE 3780 3781 _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE)); 3782 3783 /* In turkic locales, lower casing an 'I' normally yields U+0131, 3784 * LATIN SMALL LETTER DOTLESS I, but not if the grapheme also 3785 * contains a COMBINING DOT ABOVE. Instead it is treated like 3786 * LATIN CAPITAL LETTER I WITH DOT ABOVE lowercased to 'i'. The 3787 * call to lowercase above has handled this. But SpecialCasing.txt 3788 * says we are supposed to remove the COMBINING DOT ABOVE. We can 3789 * tell if we have this situation if I ==> i in a turkic locale. */ 3790 if ( UNLIKELY(PL_in_utf8_turkic_locale) 3791 && IN_LC_RUNTIME(LC_CTYPE) 3792 && (UNLIKELY(*s == 'I' && tmpbuf[0] == 'i'))) 3793 { 3794 /* Here, we know there was a COMBINING DOT ABOVE. We won't be 3795 * able to handle this in-place. */ 3796 inplace = FALSE; 3797 3798 /* It seems likely that the DOT will immediately follow the 3799 * 'I'. If so, we can remove it simply by indicating to the 3800 * code below to start copying the source just beyond the DOT. 3801 * We know its length is 2 */ 3802 if (LIKELY(memBEGINs(s + 1, s + slen, COMBINING_DOT_ABOVE_UTF8))) { 3803 ulen += 2; 3804 } 3805 else { /* But if it doesn't follow immediately, set a flag for 3806 the code below */ 3807 remove_dot_above = TRUE; 3808 } 3809 } 3810 #else 3811 PERL_UNUSED_VAR(remove_dot_above); 3812 3813 _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, 0); 3814 #endif 3815 3816 } 3817 3818 /* we can't do in-place if the length changes. */ 3819 if (ulen != tculen) inplace = FALSE; 3820 need = slen + 1 - ulen + tculen; 3821 } 3822 else { /* Non-zero length, non-UTF-8, Need to consider locale and if 3823 * latin1 is treated as caseless. Note that a locale takes 3824 * precedence */ 3825 ulen = 1; /* Original character is 1 byte */ 3826 tculen = 1; /* Most characters will require one byte, but this will 3827 * need to be overridden for the tricky ones */ 3828 need = slen + 1; 3829 3830 3831 #ifdef USE_LOCALE_CTYPE 3832 3833 if (IN_LC_RUNTIME(LC_CTYPE)) { 3834 if ( UNLIKELY(PL_in_utf8_turkic_locale) 3835 && ( (op_type == OP_LCFIRST && UNLIKELY(*s == 'I')) 3836 || (op_type == OP_UCFIRST && UNLIKELY(*s == 'i')))) 3837 { 3838 if (*s == 'I') { /* lcfirst('I') */ 3839 tmpbuf[0] = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I); 3840 tmpbuf[1] = UTF8_TWO_BYTE_LO(LATIN_SMALL_LETTER_DOTLESS_I); 3841 } 3842 else { /* ucfirst('i') */ 3843 tmpbuf[0] = UTF8_TWO_BYTE_HI(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE); 3844 tmpbuf[1] = UTF8_TWO_BYTE_LO(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE); 3845 } 3846 tculen = 2; 3847 inplace = FALSE; 3848 doing_utf8 = TRUE; 3849 convert_source_to_utf8 = TRUE; 3850 need += variant_under_utf8_count(s, s + slen); 3851 } 3852 else if (op_type == OP_LCFIRST) { 3853 3854 /* For lc, there are no gotchas for UTF-8 locales (other than 3855 * the turkish ones already handled above) */ 3856 *tmpbuf = toLOWER_LC(*s); 3857 } 3858 else { /* ucfirst */ 3859 3860 /* But for uc, some characters require special handling */ 3861 if (IN_UTF8_CTYPE_LOCALE) { 3862 goto do_uni_rules; 3863 } 3864 3865 /* This would be a bug if any locales have upper and title case 3866 * different */ 3867 *tmpbuf = (U8) toUPPER_LC(*s); 3868 } 3869 } 3870 else 3871 #endif 3872 /* Here, not in locale. If not using Unicode rules, is a simple 3873 * lower/upper, depending */ 3874 if (! IN_UNI_8_BIT) { 3875 *tmpbuf = (op_type == OP_LCFIRST) 3876 ? toLOWER(*s) 3877 : toUPPER(*s); 3878 } 3879 else if (op_type == OP_LCFIRST) { 3880 /* lower case the first letter: no trickiness for any character */ 3881 *tmpbuf = toLOWER_LATIN1(*s); 3882 } 3883 else { 3884 /* Here, is ucfirst non-UTF-8, not in locale (unless that locale is 3885 * non-turkic UTF-8, which we treat as not in locale), and cased 3886 * latin1 */ 3887 UV title_ord; 3888 #ifdef USE_LOCALE_CTYPE 3889 do_uni_rules: 3890 #endif 3891 3892 title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's'); 3893 if (tculen > 1) { 3894 assert(tculen == 2); 3895 3896 /* If the result is an upper Latin1-range character, it can 3897 * still be represented in one byte, which is its ordinal */ 3898 if (UTF8_IS_DOWNGRADEABLE_START(*tmpbuf)) { 3899 *tmpbuf = (U8) title_ord; 3900 tculen = 1; 3901 } 3902 else { 3903 /* Otherwise it became more than one ASCII character (in 3904 * the case of LATIN_SMALL_LETTER_SHARP_S) or changed to 3905 * beyond Latin1, so the number of bytes changed, so can't 3906 * replace just the first character in place. */ 3907 inplace = FALSE; 3908 3909 /* If the result won't fit in a byte, the entire result 3910 * will have to be in UTF-8. Allocate enough space for the 3911 * expanded first byte, and if UTF-8, the rest of the input 3912 * string, some or all of which may also expand to two 3913 * bytes, plus the terminating NUL. */ 3914 if (title_ord > 255) { 3915 doing_utf8 = TRUE; 3916 convert_source_to_utf8 = TRUE; 3917 need = slen 3918 + variant_under_utf8_count(s, s + slen) 3919 + 1; 3920 3921 /* The (converted) UTF-8 and UTF-EBCDIC lengths of all 3922 * characters whose title case is above 255 is 3923 * 2. */ 3924 ulen = 2; 3925 } 3926 else { /* LATIN_SMALL_LETTER_SHARP_S expands by 1 byte */ 3927 need = slen + 1 + 1; 3928 } 3929 } 3930 } 3931 } /* End of use Unicode (Latin1) semantics */ 3932 } /* End of changing the case of the first character */ 3933 3934 /* Here, have the first character's changed case stored in tmpbuf. Ready to 3935 * generate the result */ 3936 if (inplace) { 3937 3938 /* We can convert in place. This means we change just the first 3939 * character without disturbing the rest; no need to grow */ 3940 dest = source; 3941 s = d = (U8*)SvPV_force_nomg(source, slen); 3942 } else { 3943 dTARGET; 3944 3945 dest = TARG; 3946 3947 /* Here, we can't convert in place; we earlier calculated how much 3948 * space we will need, so grow to accommodate that */ 3949 SvUPGRADE(dest, SVt_PV); 3950 d = (U8*)SvGROW(dest, need); 3951 (void)SvPOK_only(dest); 3952 3953 SETs(dest); 3954 } 3955 3956 if (doing_utf8) { 3957 if (! inplace) { 3958 if (! convert_source_to_utf8) { 3959 3960 /* Here both source and dest are in UTF-8, but have to create 3961 * the entire output. We initialize the result to be the 3962 * title/lower cased first character, and then append the rest 3963 * of the string. */ 3964 sv_setpvn(dest, (char*)tmpbuf, tculen); 3965 if (slen > ulen) { 3966 3967 /* But this boolean being set means we are in a turkic 3968 * locale, and there is a DOT character that needs to be 3969 * removed, and it isn't immediately after the current 3970 * character. Keep concatenating characters to the output 3971 * one at a time, until we find the DOT, which we simply 3972 * skip */ 3973 if (UNLIKELY(remove_dot_above)) { 3974 do { 3975 Size_t this_len = UTF8SKIP(s + ulen); 3976 3977 sv_catpvn(dest, (char*)(s + ulen), this_len); 3978 3979 ulen += this_len; 3980 if (memBEGINs(s + ulen, s + slen, COMBINING_DOT_ABOVE_UTF8)) { 3981 ulen += 2; 3982 break; 3983 } 3984 } while (s + ulen < s + slen); 3985 } 3986 3987 /* The rest of the string can be concatenated unchanged, 3988 * all at once */ 3989 sv_catpvn(dest, (char*)(s + ulen), slen - ulen); 3990 } 3991 } 3992 else { 3993 const U8 *const send = s + slen; 3994 3995 /* Here the dest needs to be in UTF-8, but the source isn't, 3996 * except we earlier UTF-8'd the first character of the source 3997 * into tmpbuf. First put that into dest, and then append the 3998 * rest of the source, converting it to UTF-8 as we go. */ 3999 4000 /* Assert tculen is 2 here because the only characters that 4001 * get to this part of the code have 2-byte UTF-8 equivalents */ 4002 assert(tculen == 2); 4003 *d++ = *tmpbuf; 4004 *d++ = *(tmpbuf + 1); 4005 s++; /* We have just processed the 1st char */ 4006 4007 while (s < send) { 4008 append_utf8_from_native_byte(*s, &d); 4009 s++; 4010 } 4011 4012 *d = '\0'; 4013 SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); 4014 } 4015 SvUTF8_on(dest); 4016 } 4017 else { /* in-place UTF-8. Just overwrite the first character */ 4018 Copy(tmpbuf, d, tculen, U8); 4019 SvCUR_set(dest, need - 1); 4020 } 4021 4022 } 4023 else { /* Neither source nor dest are, nor need to be UTF-8 */ 4024 if (slen) { 4025 if (inplace) { /* in-place, only need to change the 1st char */ 4026 *d = *tmpbuf; 4027 } 4028 else { /* Not in-place */ 4029 4030 /* Copy the case-changed character(s) from tmpbuf */ 4031 Copy(tmpbuf, d, tculen, U8); 4032 d += tculen - 1; /* Code below expects d to point to final 4033 * character stored */ 4034 } 4035 } 4036 else { /* empty source */ 4037 /* See bug #39028: Don't taint if empty */ 4038 *d = *s; 4039 } 4040 4041 /* In a "use bytes" we don't treat the source as UTF-8, but, still want 4042 * the destination to retain that flag */ 4043 if (DO_UTF8(source)) 4044 SvUTF8_on(dest); 4045 4046 if (!inplace) { /* Finish the rest of the string, unchanged */ 4047 /* This will copy the trailing NUL */ 4048 Copy(s + 1, d + 1, slen, U8); 4049 SvCUR_set(dest, need - 1); 4050 } 4051 } 4052 #ifdef USE_LOCALE_CTYPE 4053 if (IN_LC_RUNTIME(LC_CTYPE)) { 4054 TAINT; 4055 SvTAINTED_on(dest); 4056 } 4057 #endif 4058 if (dest != source && SvTAINTED(source)) 4059 SvTAINT(dest); 4060 SvSETMAGIC(dest); 4061 return NORMAL; 4062 } 4063 4064 PP(pp_uc) 4065 { 4066 dSP; 4067 SV *source = TOPs; 4068 STRLEN len; 4069 STRLEN min; 4070 SV *dest; 4071 const U8 *s; 4072 U8 *d; 4073 4074 SvGETMAGIC(source); 4075 4076 if ( SvPADTMP(source) 4077 && !SvREADONLY(source) && SvPOK(source) 4078 && !DO_UTF8(source) 4079 && ( 4080 #ifdef USE_LOCALE_CTYPE 4081 (IN_LC_RUNTIME(LC_CTYPE)) 4082 ? ! IN_UTF8_CTYPE_LOCALE 4083 : 4084 #endif 4085 ! IN_UNI_8_BIT)) 4086 { 4087 4088 /* We can convert in place. The reason we can't if in UNI_8_BIT is to 4089 * make the loop tight, so we overwrite the source with the dest before 4090 * looking at it, and we need to look at the original source 4091 * afterwards. There would also need to be code added to handle 4092 * switching to not in-place in midstream if we run into characters 4093 * that change the length. Since being in locale overrides UNI_8_BIT, 4094 * that latter becomes irrelevant in the above test; instead for 4095 * locale, the size can't normally change, except if the locale is a 4096 * UTF-8 one */ 4097 dest = source; 4098 s = d = (U8*)SvPV_force_nomg(source, len); 4099 min = len + 1; 4100 } else { 4101 dTARGET; 4102 4103 dest = TARG; 4104 4105 s = (const U8*)SvPV_nomg_const(source, len); 4106 min = len + 1; 4107 4108 SvUPGRADE(dest, SVt_PV); 4109 d = (U8*)SvGROW(dest, min); 4110 (void)SvPOK_only(dest); 4111 4112 SETs(dest); 4113 } 4114 4115 #ifdef USE_LOCALE_CTYPE 4116 4117 if (IN_LC_RUNTIME(LC_CTYPE)) { 4118 _CHECK_AND_WARN_PROBLEMATIC_LOCALE; 4119 } 4120 4121 #endif 4122 4123 /* Overloaded values may have toggled the UTF-8 flag on source, so we need 4124 to check DO_UTF8 again here. */ 4125 4126 if (DO_UTF8(source)) { 4127 const U8 *const send = s + len; 4128 U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; 4129 4130 #define GREEK_CAPITAL_LETTER_IOTA 0x0399 4131 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345 4132 /* All occurrences of these are to be moved to follow any other marks. 4133 * This is context-dependent. We may not be passed enough context to 4134 * move the iota subscript beyond all of them, but we do the best we can 4135 * with what we're given. The result is always better than if we 4136 * hadn't done this. And, the problem would only arise if we are 4137 * passed a character without all its combining marks, which would be 4138 * the caller's mistake. The information this is based on comes from a 4139 * comment in Unicode SpecialCasing.txt, (and the Standard's text 4140 * itself) and so can't be checked properly to see if it ever gets 4141 * revised. But the likelihood of it changing is remote */ 4142 bool in_iota_subscript = FALSE; 4143 4144 while (s < send) { 4145 STRLEN u; 4146 STRLEN ulen; 4147 UV uv; 4148 if (UNLIKELY(in_iota_subscript)) { 4149 UV cp = utf8_to_uvchr_buf(s, send, NULL); 4150 4151 if (! _invlist_contains_cp(PL_utf8_mark, cp)) { 4152 4153 /* A non-mark. Time to output the iota subscript */ 4154 *d++ = UTF8_TWO_BYTE_HI(GREEK_CAPITAL_LETTER_IOTA); 4155 *d++ = UTF8_TWO_BYTE_LO(GREEK_CAPITAL_LETTER_IOTA); 4156 in_iota_subscript = FALSE; 4157 } 4158 } 4159 4160 /* Then handle the current character. Get the changed case value 4161 * and copy it to the output buffer */ 4162 4163 u = UTF8SKIP(s); 4164 #ifdef USE_LOCALE_CTYPE 4165 uv = _toUPPER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE)); 4166 #else 4167 uv = _toUPPER_utf8_flags(s, send, tmpbuf, &ulen, 0); 4168 #endif 4169 if (uv == GREEK_CAPITAL_LETTER_IOTA 4170 && utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI) 4171 { 4172 in_iota_subscript = TRUE; 4173 } 4174 else { 4175 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { 4176 /* If the eventually required minimum size outgrows the 4177 * available space, we need to grow. */ 4178 const UV o = d - (U8*)SvPVX_const(dest); 4179 4180 /* If someone uppercases one million U+03B0s we SvGROW() 4181 * one million times. Or we could try guessing how much to 4182 * allocate without allocating too much. But we can't 4183 * really guess without examining the rest of the string. 4184 * Such is life. See corresponding comment in lc code for 4185 * another option */ 4186 d = o + (U8*) SvGROW(dest, min); 4187 } 4188 Copy(tmpbuf, d, ulen, U8); 4189 d += ulen; 4190 } 4191 s += u; 4192 } 4193 if (in_iota_subscript) { 4194 *d++ = UTF8_TWO_BYTE_HI(GREEK_CAPITAL_LETTER_IOTA); 4195 *d++ = UTF8_TWO_BYTE_LO(GREEK_CAPITAL_LETTER_IOTA); 4196 } 4197 SvUTF8_on(dest); 4198 *d = '\0'; 4199 4200 SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); 4201 } 4202 else { /* Not UTF-8 */ 4203 if (len) { 4204 const U8 *const send = s + len; 4205 4206 /* Use locale casing if in locale; regular style if not treating 4207 * latin1 as having case; otherwise the latin1 casing. Do the 4208 * whole thing in a tight loop, for speed, */ 4209 #ifdef USE_LOCALE_CTYPE 4210 if (IN_LC_RUNTIME(LC_CTYPE)) { 4211 if (IN_UTF8_CTYPE_LOCALE) { 4212 goto do_uni_rules; 4213 } 4214 for (; s < send; d++, s++) 4215 *d = (U8) toUPPER_LC(*s); 4216 } 4217 else 4218 #endif 4219 if (! IN_UNI_8_BIT) { 4220 for (; s < send; d++, s++) { 4221 *d = toUPPER(*s); 4222 } 4223 } 4224 else { 4225 #ifdef USE_LOCALE_CTYPE 4226 do_uni_rules: 4227 #endif 4228 for (; s < send; d++, s++) { 4229 Size_t extra; 4230 4231 *d = toUPPER_LATIN1_MOD(*s); 4232 if ( LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) 4233 4234 #ifdef USE_LOCALE_CTYPE 4235 4236 && (LIKELY( ! PL_in_utf8_turkic_locale 4237 || ! IN_LC_RUNTIME(LC_CTYPE)) 4238 || *s != 'i') 4239 #endif 4240 4241 ) { 4242 continue; 4243 } 4244 4245 /* The mainstream case is the tight loop above. To avoid 4246 * extra tests in that, all three characters that always 4247 * require special handling are mapped by the MOD to the 4248 * one tested just above. Use the source to distinguish 4249 * between those cases */ 4250 4251 #if UNICODE_MAJOR_VERSION > 2 \ 4252 || (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1 \ 4253 && UNICODE_DOT_DOT_VERSION >= 8) 4254 if (*s == LATIN_SMALL_LETTER_SHARP_S) { 4255 4256 /* uc() of this requires 2 characters, but they are 4257 * ASCII. If not enough room, grow the string */ 4258 if (SvLEN(dest) < ++min) { 4259 const UV o = d - (U8*)SvPVX_const(dest); 4260 d = o + (U8*) SvGROW(dest, min); 4261 } 4262 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */ 4263 continue; /* Back to the tight loop; still in ASCII */ 4264 } 4265 #endif 4266 4267 /* The other special handling characters have their 4268 * upper cases outside the latin1 range, hence need to be 4269 * in UTF-8, so the whole result needs to be in UTF-8. 4270 * 4271 * So, here we are somewhere in the middle of processing a 4272 * non-UTF-8 string, and realize that we will have to 4273 * convert the whole thing to UTF-8. What to do? There 4274 * are several possibilities. The simplest to code is to 4275 * convert what we have so far, set a flag, and continue on 4276 * in the loop. The flag would be tested each time through 4277 * the loop, and if set, the next character would be 4278 * converted to UTF-8 and stored. But, I (khw) didn't want 4279 * to slow down the mainstream case at all for this fairly 4280 * rare case, so I didn't want to add a test that didn't 4281 * absolutely have to be there in the loop, besides the 4282 * possibility that it would get too complicated for 4283 * optimizers to deal with. Another possibility is to just 4284 * give up, convert the source to UTF-8, and restart the 4285 * function that way. Another possibility is to convert 4286 * both what has already been processed and what is yet to 4287 * come separately to UTF-8, then jump into the loop that 4288 * handles UTF-8. But the most efficient time-wise of the 4289 * ones I could think of is what follows, and turned out to 4290 * not require much extra code. 4291 * 4292 * First, calculate the extra space needed for the 4293 * remainder of the source needing to be in UTF-8. Except 4294 * for the 'i' in Turkic locales, in UTF-8 strings, the 4295 * uppercase of a character below 256 occupies the same 4296 * number of bytes as the original. Therefore, the space 4297 * needed is the that number plus the number of characters 4298 * that become two bytes when converted to UTF-8, plus, in 4299 * turkish locales, the number of 'i's. */ 4300 4301 extra = send - s + variant_under_utf8_count(s, send); 4302 4303 #ifdef USE_LOCALE_CTYPE 4304 4305 if (UNLIKELY(*s == 'i')) { /* We wouldn't get an 'i' here 4306 unless are in a Turkic 4307 locale */ 4308 const U8 * s_peek = s; 4309 4310 do { 4311 extra++; 4312 4313 s_peek = (U8 *) memchr(s_peek + 1, 'i', 4314 send - (s_peek + 1)); 4315 } while (s_peek != NULL); 4316 } 4317 #endif 4318 4319 /* Convert what we have so far into UTF-8, telling the 4320 * function that we know it should be converted, and to 4321 * allow extra space for what we haven't processed yet. 4322 * 4323 * This may cause the string pointer to move, so need to 4324 * save and re-find it. */ 4325 4326 len = d - (U8*)SvPVX_const(dest); 4327 SvCUR_set(dest, len); 4328 len = sv_utf8_upgrade_flags_grow(dest, 4329 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 4330 extra 4331 + 1 /* trailing NUL */ ); 4332 d = (U8*)SvPVX(dest) + len; 4333 4334 /* Now process the remainder of the source, simultaneously 4335 * converting to upper and UTF-8. 4336 * 4337 * To avoid extra tests in the loop body, and since the 4338 * loop is so simple, split out the rare Turkic case into 4339 * its own loop */ 4340 4341 #ifdef USE_LOCALE_CTYPE 4342 if ( UNLIKELY(PL_in_utf8_turkic_locale) 4343 && UNLIKELY(IN_LC_RUNTIME(LC_CTYPE))) 4344 { 4345 for (; s < send; s++) { 4346 if (*s == 'i') { 4347 *d++ = UTF8_TWO_BYTE_HI(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE); 4348 *d++ = UTF8_TWO_BYTE_LO(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE); 4349 } 4350 else { 4351 (void) _to_upper_title_latin1(*s, d, &len, 'S'); 4352 d += len; 4353 } 4354 } 4355 } 4356 else 4357 #endif 4358 for (; s < send; s++) { 4359 (void) _to_upper_title_latin1(*s, d, &len, 'S'); 4360 d += len; 4361 } 4362 4363 /* Here have processed the whole source; no need to 4364 * continue with the outer loop. Each character has been 4365 * converted to upper case and converted to UTF-8. */ 4366 break; 4367 } /* End of processing all latin1-style chars */ 4368 } /* End of processing all chars */ 4369 } /* End of source is not empty */ 4370 4371 if (source != dest) { 4372 *d = '\0'; /* Here d points to 1 after last char, add NUL */ 4373 SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); 4374 } 4375 } /* End of isn't utf8 */ 4376 #ifdef USE_LOCALE_CTYPE 4377 if (IN_LC_RUNTIME(LC_CTYPE)) { 4378 TAINT; 4379 SvTAINTED_on(dest); 4380 } 4381 #endif 4382 if (dest != source && SvTAINTED(source)) 4383 SvTAINT(dest); 4384 SvSETMAGIC(dest); 4385 return NORMAL; 4386 } 4387 4388 PP(pp_lc) 4389 { 4390 dSP; 4391 SV *source = TOPs; 4392 STRLEN len; 4393 STRLEN min; 4394 SV *dest; 4395 const U8 *s; 4396 U8 *d; 4397 bool has_turkic_I = FALSE; 4398 4399 SvGETMAGIC(source); 4400 4401 if ( SvPADTMP(source) 4402 && !SvREADONLY(source) && SvPOK(source) 4403 && !DO_UTF8(source) 4404 4405 #ifdef USE_LOCALE_CTYPE 4406 4407 && ( LIKELY(! IN_LC_RUNTIME(LC_CTYPE)) 4408 || LIKELY(! PL_in_utf8_turkic_locale)) 4409 4410 #endif 4411 4412 ) { 4413 4414 /* We can convert in place, as, outside of Turkic UTF-8 locales, 4415 * lowercasing anything in the latin1 range (or else DO_UTF8 would have 4416 * been on) doesn't lengthen it. */ 4417 dest = source; 4418 s = d = (U8*)SvPV_force_nomg(source, len); 4419 min = len + 1; 4420 } else { 4421 dTARGET; 4422 4423 dest = TARG; 4424 4425 s = (const U8*)SvPV_nomg_const(source, len); 4426 min = len + 1; 4427 4428 SvUPGRADE(dest, SVt_PV); 4429 d = (U8*)SvGROW(dest, min); 4430 (void)SvPOK_only(dest); 4431 4432 SETs(dest); 4433 } 4434 4435 #ifdef USE_LOCALE_CTYPE 4436 4437 if (IN_LC_RUNTIME(LC_CTYPE)) { 4438 const U8 * next_I; 4439 4440 _CHECK_AND_WARN_PROBLEMATIC_LOCALE; 4441 4442 /* Lowercasing in a Turkic locale can cause non-UTF-8 to need to become 4443 * UTF-8 for the single case of the character 'I' */ 4444 if ( UNLIKELY(PL_in_utf8_turkic_locale) 4445 && ! DO_UTF8(source) 4446 && (next_I = (U8 *) memchr(s, 'I', len))) 4447 { 4448 Size_t I_count = 0; 4449 const U8 *const send = s + len; 4450 4451 do { 4452 I_count++; 4453 4454 next_I = (U8 *) memchr(next_I + 1, 'I', 4455 send - (next_I + 1)); 4456 } while (next_I != NULL); 4457 4458 /* Except for the 'I', in UTF-8 strings, the lower case of a 4459 * character below 256 occupies the same number of bytes as the 4460 * original. Therefore, the space needed is the original length 4461 * plus I_count plus the number of characters that become two bytes 4462 * when converted to UTF-8 */ 4463 sv_utf8_upgrade_flags_grow(dest, 0, len 4464 + I_count 4465 + variant_under_utf8_count(s, send) 4466 + 1 /* Trailing NUL */ ); 4467 d = (U8*)SvPVX(dest); 4468 has_turkic_I = TRUE; 4469 } 4470 } 4471 4472 #else 4473 PERL_UNUSED_VAR(has_turkic_I); 4474 #endif 4475 4476 /* Overloaded values may have toggled the UTF-8 flag on source, so we need 4477 to check DO_UTF8 again here. */ 4478 4479 if (DO_UTF8(source)) { 4480 const U8 *const send = s + len; 4481 U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; 4482 bool remove_dot_above = FALSE; 4483 4484 while (s < send) { 4485 const STRLEN u = UTF8SKIP(s); 4486 STRLEN ulen; 4487 4488 #ifdef USE_LOCALE_CTYPE 4489 4490 _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE)); 4491 4492 /* If we are in a Turkic locale, we have to do more work. As noted 4493 * in the comments for lcfirst, there is a special case if a 'I' 4494 * is in a grapheme with COMBINING DOT ABOVE UTF8. It turns into a 4495 * 'i', and the DOT must be removed. We check for that situation, 4496 * and set a flag if the DOT is there. Then each time through the 4497 * loop, we have to see if we need to remove the next DOT above, 4498 * and if so, do it. We know that there is a DOT because 4499 * _toLOWER_utf8_flags() wouldn't have returned 'i' unless there 4500 * was one in a proper position. */ 4501 if ( UNLIKELY(PL_in_utf8_turkic_locale) 4502 && IN_LC_RUNTIME(LC_CTYPE)) 4503 { 4504 if ( UNLIKELY(remove_dot_above) 4505 && memBEGINs(tmpbuf, sizeof(tmpbuf), COMBINING_DOT_ABOVE_UTF8)) 4506 { 4507 s += u; 4508 remove_dot_above = FALSE; 4509 continue; 4510 } 4511 else if (UNLIKELY(*s == 'I' && tmpbuf[0] == 'i')) { 4512 remove_dot_above = TRUE; 4513 } 4514 } 4515 #else 4516 PERL_UNUSED_VAR(remove_dot_above); 4517 4518 _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, 0); 4519 #endif 4520 4521 /* Here is where we would do context-sensitive actions for the 4522 * Greek final sigma. See the commit message for 86510fb15 for why 4523 * there isn't any */ 4524 4525 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { 4526 4527 /* If the eventually required minimum size outgrows the 4528 * available space, we need to grow. */ 4529 const UV o = d - (U8*)SvPVX_const(dest); 4530 4531 /* If someone lowercases one million U+0130s we SvGROW() one 4532 * million times. Or we could try guessing how much to 4533 * allocate without allocating too much. Such is life. 4534 * Another option would be to grow an extra byte or two more 4535 * each time we need to grow, which would cut down the million 4536 * to 500K, with little waste */ 4537 d = o + (U8*) SvGROW(dest, min); 4538 } 4539 4540 /* Copy the newly lowercased letter to the output buffer we're 4541 * building */ 4542 Copy(tmpbuf, d, ulen, U8); 4543 d += ulen; 4544 s += u; 4545 } /* End of looping through the source string */ 4546 SvUTF8_on(dest); 4547 *d = '\0'; 4548 SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); 4549 } else { /* 'source' not utf8 */ 4550 if (len) { 4551 const U8 *const send = s + len; 4552 4553 /* Use locale casing if in locale; regular style if not treating 4554 * latin1 as having case; otherwise the latin1 casing. Do the 4555 * whole thing in a tight loop, for speed, */ 4556 #ifdef USE_LOCALE_CTYPE 4557 if (IN_LC_RUNTIME(LC_CTYPE)) { 4558 if (LIKELY( ! has_turkic_I)) { 4559 for (; s < send; d++, s++) 4560 *d = toLOWER_LC(*s); 4561 } 4562 else { /* This is the only case where lc() converts 'dest' 4563 into UTF-8 from a non-UTF-8 'source' */ 4564 for (; s < send; s++) { 4565 if (*s == 'I') { 4566 *d++ = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I); 4567 *d++ = UTF8_TWO_BYTE_LO(LATIN_SMALL_LETTER_DOTLESS_I); 4568 } 4569 else { 4570 append_utf8_from_native_byte(toLOWER_LATIN1(*s), &d); 4571 } 4572 } 4573 } 4574 } 4575 else 4576 #endif 4577 if (! IN_UNI_8_BIT) { 4578 for (; s < send; d++, s++) { 4579 *d = toLOWER(*s); 4580 } 4581 } 4582 else { 4583 for (; s < send; d++, s++) { 4584 *d = toLOWER_LATIN1(*s); 4585 } 4586 } 4587 } 4588 if (source != dest) { 4589 *d = '\0'; 4590 SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); 4591 } 4592 } 4593 #ifdef USE_LOCALE_CTYPE 4594 if (IN_LC_RUNTIME(LC_CTYPE)) { 4595 TAINT; 4596 SvTAINTED_on(dest); 4597 } 4598 #endif 4599 if (dest != source && SvTAINTED(source)) 4600 SvTAINT(dest); 4601 SvSETMAGIC(dest); 4602 return NORMAL; 4603 } 4604 4605 PP(pp_quotemeta) 4606 { 4607 dSP; dTARGET; 4608 SV * const sv = TOPs; 4609 STRLEN len; 4610 const char *s = SvPV_const(sv,len); 4611 4612 SvUTF8_off(TARG); /* decontaminate */ 4613 if (len) { 4614 char *d; 4615 SvUPGRADE(TARG, SVt_PV); 4616 SvGROW(TARG, (len * 2) + 1); 4617 d = SvPVX(TARG); 4618 if (DO_UTF8(sv)) { 4619 while (len) { 4620 STRLEN ulen = UTF8SKIP(s); 4621 bool to_quote = FALSE; 4622 4623 if (UTF8_IS_INVARIANT(*s)) { 4624 if (_isQUOTEMETA(*s)) { 4625 to_quote = TRUE; 4626 } 4627 } 4628 else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, s + len)) { 4629 if ( 4630 #ifdef USE_LOCALE_CTYPE 4631 /* In locale, we quote all non-ASCII Latin1 chars. 4632 * Otherwise use the quoting rules */ 4633 4634 IN_LC_RUNTIME(LC_CTYPE) 4635 || 4636 #endif 4637 _isQUOTEMETA(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s + 1)))) 4638 { 4639 to_quote = TRUE; 4640 } 4641 } 4642 else if (is_QUOTEMETA_high(s)) { 4643 to_quote = TRUE; 4644 } 4645 4646 if (to_quote) { 4647 *d++ = '\\'; 4648 } 4649 if (ulen > len) 4650 ulen = len; 4651 len -= ulen; 4652 while (ulen--) 4653 *d++ = *s++; 4654 } 4655 SvUTF8_on(TARG); 4656 } 4657 else if (IN_UNI_8_BIT) { 4658 while (len--) { 4659 if (_isQUOTEMETA(*s)) 4660 *d++ = '\\'; 4661 *d++ = *s++; 4662 } 4663 } 4664 else { 4665 /* For non UNI_8_BIT (and hence in locale) just quote all \W 4666 * including everything above ASCII */ 4667 while (len--) { 4668 if (!isWORDCHAR_A(*s)) 4669 *d++ = '\\'; 4670 *d++ = *s++; 4671 } 4672 } 4673 *d = '\0'; 4674 SvCUR_set(TARG, d - SvPVX_const(TARG)); 4675 (void)SvPOK_only_UTF8(TARG); 4676 } 4677 else 4678 sv_setpvn(TARG, s, len); 4679 SETTARG; 4680 return NORMAL; 4681 } 4682 4683 PP(pp_fc) 4684 { 4685 dTARGET; 4686 dSP; 4687 SV *source = TOPs; 4688 STRLEN len; 4689 STRLEN min; 4690 SV *dest; 4691 const U8 *s; 4692 const U8 *send; 4693 U8 *d; 4694 U8 tmpbuf[UTF8_MAXBYTES_CASE + 1]; 4695 #if UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */ \ 4696 || (UNICODE_MAJOR_VERSION == 3 && ( UNICODE_DOT_VERSION > 0) \ 4697 || UNICODE_DOT_DOT_VERSION > 0) 4698 const bool full_folding = TRUE; /* This variable is here so we can easily 4699 move to more generality later */ 4700 #else 4701 const bool full_folding = FALSE; 4702 #endif 4703 const U8 flags = ( full_folding ? FOLD_FLAGS_FULL : 0 ) 4704 #ifdef USE_LOCALE_CTYPE 4705 | ( IN_LC_RUNTIME(LC_CTYPE) ? FOLD_FLAGS_LOCALE : 0 ) 4706 #endif 4707 ; 4708 4709 /* This is a facsimile of pp_lc, but with a thousand bugs thanks to me. 4710 * You are welcome(?) -Hugmeir 4711 */ 4712 4713 SvGETMAGIC(source); 4714 4715 dest = TARG; 4716 4717 if (SvOK(source)) { 4718 s = (const U8*)SvPV_nomg_const(source, len); 4719 } else { 4720 if (ckWARN(WARN_UNINITIALIZED)) 4721 report_uninit(source); 4722 s = (const U8*)""; 4723 len = 0; 4724 } 4725 4726 min = len + 1; 4727 4728 SvUPGRADE(dest, SVt_PV); 4729 d = (U8*)SvGROW(dest, min); 4730 (void)SvPOK_only(dest); 4731 4732 SETs(dest); 4733 4734 send = s + len; 4735 4736 #ifdef USE_LOCALE_CTYPE 4737 4738 if ( IN_LC_RUNTIME(LC_CTYPE) ) { /* Under locale */ 4739 _CHECK_AND_WARN_PROBLEMATIC_LOCALE; 4740 } 4741 4742 #endif 4743 4744 if (DO_UTF8(source)) { /* UTF-8 flagged string. */ 4745 while (s < send) { 4746 const STRLEN u = UTF8SKIP(s); 4747 STRLEN ulen; 4748 4749 _toFOLD_utf8_flags(s, send, tmpbuf, &ulen, flags); 4750 4751 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { 4752 const UV o = d - (U8*)SvPVX_const(dest); 4753 d = o + (U8*) SvGROW(dest, min); 4754 } 4755 4756 Copy(tmpbuf, d, ulen, U8); 4757 d += ulen; 4758 s += u; 4759 } 4760 SvUTF8_on(dest); 4761 } /* Unflagged string */ 4762 else if (len) { 4763 #ifdef USE_LOCALE_CTYPE 4764 if ( IN_LC_RUNTIME(LC_CTYPE) ) { /* Under locale */ 4765 if (IN_UTF8_CTYPE_LOCALE) { 4766 goto do_uni_folding; 4767 } 4768 for (; s < send; d++, s++) 4769 *d = (U8) toFOLD_LC(*s); 4770 } 4771 else 4772 #endif 4773 if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */ 4774 for (; s < send; d++, s++) 4775 *d = toFOLD(*s); 4776 } 4777 else { 4778 #ifdef USE_LOCALE_CTYPE 4779 do_uni_folding: 4780 #endif 4781 /* For ASCII and the Latin-1 range, there's potentially three 4782 * troublesome folds: 4783 * \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full 4784 * casefolding becomes 'ss'; 4785 * \x{B5} (\N{MICRO SIGN}), which under any fold becomes 4786 * \x{3BC} (\N{GREEK SMALL LETTER MU}) 4787 * I only in Turkic locales, this folds to \x{131} 4788 * \N{LATIN SMALL LETTER DOTLESS I} 4789 * For the rest, the casefold is their lowercase. */ 4790 for (; s < send; d++, s++) { 4791 if ( UNLIKELY(*s == MICRO_SIGN) 4792 #ifdef USE_LOCALE_CTYPE 4793 || ( UNLIKELY(PL_in_utf8_turkic_locale) 4794 && UNLIKELY(IN_LC_RUNTIME(LC_CTYPE)) 4795 && UNLIKELY(*s == 'I')) 4796 #endif 4797 ) { 4798 Size_t extra = send - s 4799 + variant_under_utf8_count(s, send); 4800 4801 /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU}, 4802 * and 'I' in Turkic locales is \N{LATIN SMALL LETTER 4803 * DOTLESS I} both of which are outside of the latin-1 4804 * range. There's a couple of ways to deal with this -- khw 4805 * discusses them in pp_lc/uc, so go there :) What we do 4806 * here is upgrade what we had already casefolded, then 4807 * enter an inner loop that appends the rest of the 4808 * characters as UTF-8. 4809 * 4810 * First we calculate the needed size of the upgraded dest 4811 * beyond what's been processed already (the upgrade 4812 * function figures that out). Except for the 'I' in 4813 * Turkic locales, in UTF-8 strings, the fold case of a 4814 * character below 256 occupies the same number of bytes as 4815 * the original (even the Sharp S). Therefore, the space 4816 * needed is the number of bytes remaining plus the number 4817 * of characters that become two bytes when converted to 4818 * UTF-8 plus, in turkish locales, the number of 'I's */ 4819 4820 if (UNLIKELY(*s == 'I')) { 4821 const U8 * s_peek = s; 4822 4823 do { 4824 extra++; 4825 4826 s_peek = (U8 *) memchr(s_peek + 1, 'I', 4827 send - (s_peek + 1)); 4828 } while (s_peek != NULL); 4829 } 4830 4831 /* Growing may move things, so have to save and recalculate 4832 * 'd' */ 4833 len = d - (U8*)SvPVX_const(dest); 4834 SvCUR_set(dest, len); 4835 len = sv_utf8_upgrade_flags_grow(dest, 4836 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 4837 extra 4838 + 1 /* Trailing NUL */ ); 4839 d = (U8*)SvPVX(dest) + len; 4840 4841 if (*s == 'I') { 4842 *d++ = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I); 4843 *d++ = UTF8_TWO_BYTE_LO(LATIN_SMALL_LETTER_DOTLESS_I); 4844 } 4845 else { 4846 *d++ = UTF8_TWO_BYTE_HI(GREEK_SMALL_LETTER_MU); 4847 *d++ = UTF8_TWO_BYTE_LO(GREEK_SMALL_LETTER_MU); 4848 } 4849 s++; 4850 4851 for (; s < send; s++) { 4852 STRLEN ulen; 4853 _to_uni_fold_flags(*s, d, &ulen, flags); 4854 d += ulen; 4855 } 4856 break; 4857 } 4858 else if ( UNLIKELY(*s == LATIN_SMALL_LETTER_SHARP_S) 4859 && full_folding) 4860 { 4861 /* Under full casefolding, LATIN SMALL LETTER SHARP S 4862 * becomes "ss", which may require growing the SV. */ 4863 if (SvLEN(dest) < ++min) { 4864 const UV o = d - (U8*)SvPVX_const(dest); 4865 d = o + (U8*) SvGROW(dest, min); 4866 } 4867 *(d)++ = 's'; 4868 *d = 's'; 4869 } 4870 else { /* Else, the fold is the lower case */ 4871 *d = toLOWER_LATIN1(*s); 4872 } 4873 } 4874 } 4875 } 4876 *d = '\0'; 4877 SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); 4878 4879 #ifdef USE_LOCALE_CTYPE 4880 if (IN_LC_RUNTIME(LC_CTYPE)) { 4881 TAINT; 4882 SvTAINTED_on(dest); 4883 } 4884 #endif 4885 if (SvTAINTED(source)) 4886 SvTAINT(dest); 4887 SvSETMAGIC(dest); 4888 RETURN; 4889 } 4890 4891 /* Arrays. */ 4892 4893 PP(pp_aslice) 4894 { 4895 dSP; dMARK; dORIGMARK; 4896 AV *const av = MUTABLE_AV(POPs); 4897 const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET); 4898 4899 if (SvTYPE(av) == SVt_PVAV) { 4900 const bool localizing = PL_op->op_private & OPpLVAL_INTRO; 4901 bool can_preserve = FALSE; 4902 4903 if (localizing) { 4904 MAGIC *mg; 4905 HV *stash; 4906 4907 can_preserve = SvCANEXISTDELETE(av); 4908 } 4909 4910 if (lval && localizing) { 4911 SV **svp; 4912 SSize_t max = -1; 4913 for (svp = MARK + 1; svp <= SP; svp++) { 4914 const SSize_t elem = SvIV(*svp); 4915 if (elem > max) 4916 max = elem; 4917 } 4918 if (max > AvMAX(av)) 4919 av_extend(av, max); 4920 } 4921 4922 while (++MARK <= SP) { 4923 SV **svp; 4924 SSize_t elem = SvIV(*MARK); 4925 bool preeminent = TRUE; 4926 4927 if (localizing && can_preserve) { 4928 /* If we can determine whether the element exist, 4929 * Try to preserve the existenceness of a tied array 4930 * element by using EXISTS and DELETE if possible. 4931 * Fallback to FETCH and STORE otherwise. */ 4932 preeminent = av_exists(av, elem); 4933 } 4934 4935 svp = av_fetch(av, elem, lval); 4936 if (lval) { 4937 if (!svp || !*svp) 4938 DIE(aTHX_ PL_no_aelem, elem); 4939 if (localizing) { 4940 if (preeminent) 4941 save_aelem(av, elem, svp); 4942 else 4943 SAVEADELETE(av, elem); 4944 } 4945 } 4946 *MARK = svp ? *svp : &PL_sv_undef; 4947 } 4948 } 4949 if (GIMME_V != G_LIST) { 4950 MARK = ORIGMARK; 4951 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef; 4952 SP = MARK; 4953 } 4954 RETURN; 4955 } 4956 4957 PP(pp_kvaslice) 4958 { 4959 dSP; dMARK; 4960 AV *const av = MUTABLE_AV(POPs); 4961 I32 lval = (PL_op->op_flags & OPf_MOD); 4962 SSize_t items = SP - MARK; 4963 4964 if (PL_op->op_private & OPpMAYBE_LVSUB) { 4965 const I32 flags = is_lvalue_sub(); 4966 if (flags) { 4967 if (!(flags & OPpENTERSUB_INARGS)) 4968 /* diag_listed_as: Can't modify %s in %s */ 4969 Perl_croak(aTHX_ "Can't modify index/value array slice in list assignment"); 4970 lval = flags; 4971 } 4972 } 4973 4974 MEXTEND(SP,items); 4975 while (items > 1) { 4976 *(MARK+items*2-1) = *(MARK+items); 4977 items--; 4978 } 4979 items = SP-MARK; 4980 SP += items; 4981 4982 while (++MARK <= SP) { 4983 SV **svp; 4984 4985 svp = av_fetch(av, SvIV(*MARK), lval); 4986 if (lval) { 4987 if (!svp || !*svp || *svp == &PL_sv_undef) { 4988 DIE(aTHX_ PL_no_aelem, SvIV(*MARK)); 4989 } 4990 *MARK = sv_mortalcopy(*MARK); 4991 } 4992 *++MARK = svp ? *svp : &PL_sv_undef; 4993 } 4994 if (GIMME_V != G_LIST) { 4995 MARK = SP - items*2; 4996 *++MARK = items > 0 ? *SP : &PL_sv_undef; 4997 SP = MARK; 4998 } 4999 RETURN; 5000 } 5001 5002 5003 PP(pp_aeach) 5004 { 5005 dSP; 5006 AV *array = MUTABLE_AV(POPs); 5007 const U8 gimme = GIMME_V; 5008 IV *iterp = Perl_av_iter_p(aTHX_ array); 5009 const IV current = (*iterp)++; 5010 5011 if (current > av_top_index(array)) { 5012 *iterp = 0; 5013 if (gimme == G_SCALAR) 5014 RETPUSHUNDEF; 5015 else 5016 RETURN; 5017 } 5018 5019 EXTEND(SP, 2); 5020 mPUSHi(current); 5021 if (gimme == G_LIST) { 5022 SV **const element = av_fetch(array, current, 0); 5023 PUSHs(element ? *element : &PL_sv_undef); 5024 } 5025 RETURN; 5026 } 5027 5028 /* also used for: pp_avalues()*/ 5029 PP(pp_akeys) 5030 { 5031 dSP; 5032 AV *array = MUTABLE_AV(POPs); 5033 const U8 gimme = GIMME_V; 5034 5035 *Perl_av_iter_p(aTHX_ array) = 0; 5036 5037 if (gimme == G_SCALAR) { 5038 dTARGET; 5039 PUSHi(av_count(array)); 5040 } 5041 else if (gimme == G_LIST) { 5042 if (UNLIKELY(PL_op->op_private & OPpMAYBE_LVSUB)) { 5043 const I32 flags = is_lvalue_sub(); 5044 if (flags && !(flags & OPpENTERSUB_INARGS)) 5045 /* diag_listed_as: Can't modify %s in %s */ 5046 Perl_croak(aTHX_ 5047 "Can't modify keys on array in list assignment"); 5048 } 5049 { 5050 IV n = av_top_index(array); 5051 IV i; 5052 5053 EXTEND(SP, n + 1); 5054 5055 if ( PL_op->op_type == OP_AKEYS 5056 || ( PL_op->op_type == OP_AVHVSWITCH 5057 && (PL_op->op_private & 3) + OP_AEACH == OP_AKEYS )) 5058 { 5059 for (i = 0; i <= n; i++) { 5060 mPUSHi(i); 5061 } 5062 } 5063 else { 5064 for (i = 0; i <= n; i++) { 5065 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0); 5066 PUSHs(elem ? *elem : &PL_sv_undef); 5067 } 5068 } 5069 } 5070 } 5071 RETURN; 5072 } 5073 5074 /* Associative arrays. */ 5075 5076 PP(pp_each) 5077 { 5078 dSP; 5079 HV * hash = MUTABLE_HV(POPs); 5080 HE *entry; 5081 const U8 gimme = GIMME_V; 5082 5083 entry = hv_iternext(hash); 5084 5085 EXTEND(SP, 2); 5086 if (entry) { 5087 SV* const sv = hv_iterkeysv(entry); 5088 PUSHs(sv); 5089 if (gimme == G_LIST) { 5090 SV *val; 5091 val = hv_iterval(hash, entry); 5092 PUSHs(val); 5093 } 5094 } 5095 else if (gimme == G_SCALAR) 5096 RETPUSHUNDEF; 5097 5098 RETURN; 5099 } 5100 5101 STATIC OP * 5102 S_do_delete_local(pTHX) 5103 { 5104 dSP; 5105 const U8 gimme = GIMME_V; 5106 const MAGIC *mg; 5107 HV *stash; 5108 const bool sliced = !!(PL_op->op_private & OPpSLICE); 5109 SV **unsliced_keysv = sliced ? NULL : sp--; 5110 SV * const osv = POPs; 5111 SV **mark = sliced ? PL_stack_base + POPMARK : unsliced_keysv-1; 5112 dORIGMARK; 5113 const bool tied = SvRMAGICAL(osv) 5114 && mg_find((const SV *)osv, PERL_MAGIC_tied); 5115 const bool can_preserve = SvCANEXISTDELETE(osv); 5116 const U32 type = SvTYPE(osv); 5117 SV ** const end = sliced ? SP : unsliced_keysv; 5118 5119 if (type == SVt_PVHV) { /* hash element */ 5120 HV * const hv = MUTABLE_HV(osv); 5121 while (++MARK <= end) { 5122 SV * const keysv = *MARK; 5123 SV *sv = NULL; 5124 bool preeminent = TRUE; 5125 if (can_preserve) 5126 preeminent = hv_exists_ent(hv, keysv, 0); 5127 if (tied) { 5128 HE *he = hv_fetch_ent(hv, keysv, 1, 0); 5129 if (he) 5130 sv = HeVAL(he); 5131 else 5132 preeminent = FALSE; 5133 } 5134 else { 5135 sv = hv_delete_ent(hv, keysv, 0, 0); 5136 if (preeminent) 5137 SvREFCNT_inc_simple_void(sv); /* De-mortalize */ 5138 } 5139 if (preeminent) { 5140 if (!sv) DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv)); 5141 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM); 5142 if (tied) { 5143 *MARK = sv_mortalcopy(sv); 5144 mg_clear(sv); 5145 } else 5146 *MARK = sv; 5147 } 5148 else { 5149 SAVEHDELETE(hv, keysv); 5150 *MARK = &PL_sv_undef; 5151 } 5152 } 5153 } 5154 else if (type == SVt_PVAV) { /* array element */ 5155 if (PL_op->op_flags & OPf_SPECIAL) { 5156 AV * const av = MUTABLE_AV(osv); 5157 while (++MARK <= end) { 5158 SSize_t idx = SvIV(*MARK); 5159 SV *sv = NULL; 5160 bool preeminent = TRUE; 5161 if (can_preserve) 5162 preeminent = av_exists(av, idx); 5163 if (tied) { 5164 SV **svp = av_fetch(av, idx, 1); 5165 if (svp) 5166 sv = *svp; 5167 else 5168 preeminent = FALSE; 5169 } 5170 else { 5171 sv = av_delete(av, idx, 0); 5172 if (preeminent) 5173 SvREFCNT_inc_simple_void(sv); /* De-mortalize */ 5174 } 5175 if (preeminent) { 5176 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM); 5177 if (tied) { 5178 *MARK = sv_mortalcopy(sv); 5179 mg_clear(sv); 5180 } else 5181 *MARK = sv; 5182 } 5183 else { 5184 SAVEADELETE(av, idx); 5185 *MARK = &PL_sv_undef; 5186 } 5187 } 5188 } 5189 else 5190 DIE(aTHX_ "panic: avhv_delete no longer supported"); 5191 } 5192 else 5193 DIE(aTHX_ "Not a HASH reference"); 5194 if (sliced) { 5195 if (gimme == G_VOID) 5196 SP = ORIGMARK; 5197 else if (gimme == G_SCALAR) { 5198 MARK = ORIGMARK; 5199 if (SP > MARK) 5200 *++MARK = *SP; 5201 else 5202 *++MARK = &PL_sv_undef; 5203 SP = MARK; 5204 } 5205 } 5206 else if (gimme != G_VOID) 5207 PUSHs(*unsliced_keysv); 5208 5209 RETURN; 5210 } 5211 5212 PP(pp_delete) 5213 { 5214 dSP; 5215 U8 gimme; 5216 I32 discard; 5217 5218 if (PL_op->op_private & OPpLVAL_INTRO) 5219 return do_delete_local(); 5220 5221 gimme = GIMME_V; 5222 discard = (gimme == G_VOID) ? G_DISCARD : 0; 5223 5224 if (PL_op->op_private & (OPpSLICE|OPpKVSLICE)) { 5225 dMARK; dORIGMARK; 5226 HV * const hv = MUTABLE_HV(POPs); 5227 const U32 hvtype = SvTYPE(hv); 5228 int skip = 0; 5229 if (PL_op->op_private & OPpKVSLICE) { 5230 SSize_t items = SP - MARK; 5231 5232 MEXTEND(SP,items); 5233 while (items > 1) { 5234 *(MARK+items*2-1) = *(MARK+items); 5235 items--; 5236 } 5237 items = SP - MARK; 5238 SP += items; 5239 skip = 1; 5240 } 5241 if (hvtype == SVt_PVHV) { /* hash element */ 5242 while ((MARK += (1+skip)) <= SP) { 5243 SV * const sv = hv_delete_ent(hv, *(MARK-skip), discard, 0); 5244 *MARK = sv ? sv : &PL_sv_undef; 5245 } 5246 } 5247 else if (hvtype == SVt_PVAV) { /* array element */ 5248 if (PL_op->op_flags & OPf_SPECIAL) { 5249 while ((MARK += (1+skip)) <= SP) { 5250 SV * const sv = av_delete(MUTABLE_AV(hv), SvIV(*(MARK-skip)), discard); 5251 *MARK = sv ? sv : &PL_sv_undef; 5252 } 5253 } 5254 } 5255 else 5256 DIE(aTHX_ "Not a HASH reference"); 5257 if (discard) 5258 SP = ORIGMARK; 5259 else if (gimme == G_SCALAR) { 5260 MARK = ORIGMARK; 5261 if (SP > MARK) 5262 *++MARK = *SP; 5263 else 5264 *++MARK = &PL_sv_undef; 5265 SP = MARK; 5266 } 5267 } 5268 else { 5269 SV *keysv = POPs; 5270 HV * const hv = MUTABLE_HV(POPs); 5271 SV *sv = NULL; 5272 if (SvTYPE(hv) == SVt_PVHV) 5273 sv = hv_delete_ent(hv, keysv, discard, 0); 5274 else if (SvTYPE(hv) == SVt_PVAV) { 5275 if (PL_op->op_flags & OPf_SPECIAL) 5276 sv = av_delete(MUTABLE_AV(hv), SvIV(keysv), discard); 5277 else 5278 DIE(aTHX_ "panic: avhv_delete no longer supported"); 5279 } 5280 else 5281 DIE(aTHX_ "Not a HASH reference"); 5282 if (!sv) 5283 sv = &PL_sv_undef; 5284 if (!discard) 5285 PUSHs(sv); 5286 } 5287 RETURN; 5288 } 5289 5290 PP(pp_exists) 5291 { 5292 dSP; 5293 SV *tmpsv; 5294 HV *hv; 5295 5296 if (UNLIKELY( PL_op->op_private & OPpEXISTS_SUB )) { 5297 GV *gv; 5298 SV * const sv = POPs; 5299 CV * const cv = sv_2cv(sv, &hv, &gv, 0); 5300 if (cv) 5301 RETPUSHYES; 5302 if (gv && isGV(gv) && GvCV(gv) && !GvCVGEN(gv)) 5303 RETPUSHYES; 5304 RETPUSHNO; 5305 } 5306 tmpsv = POPs; 5307 hv = MUTABLE_HV(POPs); 5308 if (LIKELY( SvTYPE(hv) == SVt_PVHV )) { 5309 if (hv_exists_ent(hv, tmpsv, 0)) 5310 RETPUSHYES; 5311 } 5312 else if (SvTYPE(hv) == SVt_PVAV) { 5313 if (PL_op->op_flags & OPf_SPECIAL) { /* array element */ 5314 if (av_exists(MUTABLE_AV(hv), SvIV(tmpsv))) 5315 RETPUSHYES; 5316 } 5317 } 5318 else { 5319 DIE(aTHX_ "Not a HASH reference"); 5320 } 5321 RETPUSHNO; 5322 } 5323 5324 PP(pp_hslice) 5325 { 5326 dSP; dMARK; dORIGMARK; 5327 HV * const hv = MUTABLE_HV(POPs); 5328 const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET); 5329 const bool localizing = PL_op->op_private & OPpLVAL_INTRO; 5330 bool can_preserve = FALSE; 5331 5332 if (localizing) { 5333 MAGIC *mg; 5334 HV *stash; 5335 5336 if (SvCANEXISTDELETE(hv)) 5337 can_preserve = TRUE; 5338 } 5339 5340 while (++MARK <= SP) { 5341 SV * const keysv = *MARK; 5342 SV **svp; 5343 HE *he; 5344 bool preeminent = TRUE; 5345 5346 if (localizing && can_preserve) { 5347 /* If we can determine whether the element exist, 5348 * try to preserve the existenceness of a tied hash 5349 * element by using EXISTS and DELETE if possible. 5350 * Fallback to FETCH and STORE otherwise. */ 5351 preeminent = hv_exists_ent(hv, keysv, 0); 5352 } 5353 5354 he = hv_fetch_ent(hv, keysv, lval, 0); 5355 svp = he ? &HeVAL(he) : NULL; 5356 5357 if (lval) { 5358 if (!svp || !*svp || *svp == &PL_sv_undef) { 5359 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv)); 5360 } 5361 if (localizing) { 5362 if (HvNAME_get(hv) && isGV_or_RVCV(*svp)) 5363 save_gp(MUTABLE_GV(*svp), !(PL_op->op_flags & OPf_SPECIAL)); 5364 else if (preeminent) 5365 save_helem_flags(hv, keysv, svp, 5366 (PL_op->op_flags & OPf_SPECIAL) ? 0 : SAVEf_SETMAGIC); 5367 else 5368 SAVEHDELETE(hv, keysv); 5369 } 5370 } 5371 *MARK = svp && *svp ? *svp : &PL_sv_undef; 5372 } 5373 if (GIMME_V != G_LIST) { 5374 MARK = ORIGMARK; 5375 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef; 5376 SP = MARK; 5377 } 5378 RETURN; 5379 } 5380 5381 PP(pp_kvhslice) 5382 { 5383 dSP; dMARK; 5384 HV * const hv = MUTABLE_HV(POPs); 5385 I32 lval = (PL_op->op_flags & OPf_MOD); 5386 SSize_t items = SP - MARK; 5387 5388 if (PL_op->op_private & OPpMAYBE_LVSUB) { 5389 const I32 flags = is_lvalue_sub(); 5390 if (flags) { 5391 if (!(flags & OPpENTERSUB_INARGS)) 5392 /* diag_listed_as: Can't modify %s in %s */ 5393 Perl_croak(aTHX_ "Can't modify key/value hash slice in %s assignment", 5394 GIMME_V == G_LIST ? "list" : "scalar"); 5395 lval = flags; 5396 } 5397 } 5398 5399 MEXTEND(SP,items); 5400 while (items > 1) { 5401 *(MARK+items*2-1) = *(MARK+items); 5402 items--; 5403 } 5404 items = SP-MARK; 5405 SP += items; 5406 5407 while (++MARK <= SP) { 5408 SV * const keysv = *MARK; 5409 SV **svp; 5410 HE *he; 5411 5412 he = hv_fetch_ent(hv, keysv, lval, 0); 5413 svp = he ? &HeVAL(he) : NULL; 5414 5415 if (lval) { 5416 if (!svp || !*svp || *svp == &PL_sv_undef) { 5417 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv)); 5418 } 5419 *MARK = sv_mortalcopy(*MARK); 5420 } 5421 *++MARK = svp && *svp ? *svp : &PL_sv_undef; 5422 } 5423 if (GIMME_V != G_LIST) { 5424 MARK = SP - items*2; 5425 *++MARK = items > 0 ? *SP : &PL_sv_undef; 5426 SP = MARK; 5427 } 5428 RETURN; 5429 } 5430 5431 /* List operators. */ 5432 5433 PP(pp_list) 5434 { 5435 I32 markidx = POPMARK; 5436 if (GIMME_V != G_LIST) { 5437 /* don't initialize mark here, EXTEND() may move the stack */ 5438 SV **mark; 5439 dSP; 5440 EXTEND(SP, 1); /* in case no arguments, as in @empty */ 5441 mark = PL_stack_base + markidx; 5442 if (++MARK <= SP) 5443 *MARK = *SP; /* unwanted list, return last item */ 5444 else 5445 *MARK = &PL_sv_undef; 5446 SP = MARK; 5447 PUTBACK; 5448 } 5449 return NORMAL; 5450 } 5451 5452 PP(pp_lslice) 5453 { 5454 dSP; 5455 SV ** const lastrelem = PL_stack_sp; 5456 SV ** const lastlelem = PL_stack_base + POPMARK; 5457 SV ** const firstlelem = PL_stack_base + POPMARK + 1; 5458 SV ** const firstrelem = lastlelem + 1; 5459 const U8 mod = PL_op->op_flags & OPf_MOD; 5460 5461 const I32 max = lastrelem - lastlelem; 5462 SV **lelem; 5463 5464 if (GIMME_V != G_LIST) { 5465 if (lastlelem < firstlelem) { 5466 EXTEND(SP, 1); 5467 *firstlelem = &PL_sv_undef; 5468 } 5469 else { 5470 I32 ix = SvIV(*lastlelem); 5471 if (ix < 0) 5472 ix += max; 5473 if (ix < 0 || ix >= max) 5474 *firstlelem = &PL_sv_undef; 5475 else 5476 *firstlelem = firstrelem[ix]; 5477 } 5478 SP = firstlelem; 5479 RETURN; 5480 } 5481 5482 if (max == 0) { 5483 SP = firstlelem - 1; 5484 RETURN; 5485 } 5486 5487 for (lelem = firstlelem; lelem <= lastlelem; lelem++) { 5488 I32 ix = SvIV(*lelem); 5489 if (ix < 0) 5490 ix += max; 5491 if (ix < 0 || ix >= max) 5492 *lelem = &PL_sv_undef; 5493 else { 5494 if (!(*lelem = firstrelem[ix])) 5495 *lelem = &PL_sv_undef; 5496 else if (mod && SvPADTMP(*lelem)) { 5497 *lelem = firstrelem[ix] = sv_mortalcopy(*lelem); 5498 } 5499 } 5500 } 5501 SP = lastlelem; 5502 RETURN; 5503 } 5504 5505 PP(pp_anonlist) 5506 { 5507 dSP; dMARK; 5508 const I32 items = SP - MARK; 5509 SV * const av = MUTABLE_SV(av_make(items, MARK+1)); 5510 SP = MARK; 5511 mXPUSHs((PL_op->op_flags & OPf_SPECIAL) 5512 ? newRV_noinc(av) : av); 5513 RETURN; 5514 } 5515 5516 PP(pp_anonhash) 5517 { 5518 dSP; dMARK; dORIGMARK; 5519 HV* const hv = newHV(); 5520 SV* const retval = sv_2mortal( PL_op->op_flags & OPf_SPECIAL 5521 ? newRV_noinc(MUTABLE_SV(hv)) 5522 : MUTABLE_SV(hv) ); 5523 /* This isn't quite true for an odd sized list (it's one too few) but it's 5524 not worth the runtime +1 just to optimise for the warning case. */ 5525 SSize_t pairs = (SP - MARK) >> 1; 5526 if (pairs > PERL_HASH_DEFAULT_HvMAX) { 5527 hv_ksplit(hv, pairs); 5528 } 5529 5530 while (MARK < SP) { 5531 SV * const key = 5532 (MARK++, SvGMAGICAL(*MARK) ? sv_mortalcopy(*MARK) : *MARK); 5533 SV *val; 5534 if (MARK < SP) 5535 { 5536 MARK++; 5537 SvGETMAGIC(*MARK); 5538 val = newSV_type(SVt_NULL); 5539 sv_setsv_nomg(val, *MARK); 5540 } 5541 else 5542 { 5543 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash"); 5544 val = newSV_type(SVt_NULL); 5545 } 5546 (void)hv_store_ent(hv,key,val,0); 5547 } 5548 SP = ORIGMARK; 5549 XPUSHs(retval); 5550 RETURN; 5551 } 5552 5553 PP(pp_splice) 5554 { 5555 dSP; dMARK; dORIGMARK; 5556 int num_args = (SP - MARK); 5557 AV *ary = MUTABLE_AV(*++MARK); 5558 SV **src; 5559 SV **dst; 5560 SSize_t i; 5561 SSize_t offset; 5562 SSize_t length; 5563 SSize_t newlen; 5564 SSize_t after; 5565 SSize_t diff; 5566 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied); 5567 5568 if (mg) { 5569 return Perl_tied_method(aTHX_ SV_CONST(SPLICE), mark - 1, MUTABLE_SV(ary), mg, 5570 GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK, 5571 sp - mark); 5572 } 5573 5574 if (SvREADONLY(ary)) 5575 Perl_croak_no_modify(); 5576 5577 SP++; 5578 5579 if (++MARK < SP) { 5580 offset = i = SvIV(*MARK); 5581 if (offset < 0) 5582 offset += AvFILLp(ary) + 1; 5583 if (offset < 0) 5584 DIE(aTHX_ PL_no_aelem, i); 5585 if (++MARK < SP) { 5586 length = SvIVx(*MARK++); 5587 if (length < 0) { 5588 length += AvFILLp(ary) - offset + 1; 5589 if (length < 0) 5590 length = 0; 5591 } 5592 } 5593 else 5594 length = AvMAX(ary) + 1; /* close enough to infinity */ 5595 } 5596 else { 5597 offset = 0; 5598 length = AvMAX(ary) + 1; 5599 } 5600 if (offset > AvFILLp(ary) + 1) { 5601 if (num_args > 2) 5602 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" ); 5603 offset = AvFILLp(ary) + 1; 5604 } 5605 after = AvFILLp(ary) + 1 - (offset + length); 5606 if (after < 0) { /* not that much array */ 5607 length += after; /* offset+length now in array */ 5608 after = 0; 5609 if (!AvALLOC(ary)) 5610 av_extend(ary, 0); 5611 } 5612 5613 /* At this point, MARK .. SP-1 is our new LIST */ 5614 5615 newlen = SP - MARK; 5616 diff = newlen - length; 5617 if (newlen && !AvREAL(ary) && AvREIFY(ary)) 5618 av_reify(ary); 5619 5620 /* make new elements SVs now: avoid problems if they're from the array */ 5621 for (dst = MARK, i = newlen; i; i--) { 5622 SV * const h = *dst; 5623 *dst++ = newSVsv(h); 5624 } 5625 5626 if (diff < 0) { /* shrinking the area */ 5627 SV **tmparyval = NULL; 5628 if (newlen) { 5629 Newx(tmparyval, newlen, SV*); /* so remember insertion */ 5630 Copy(MARK, tmparyval, newlen, SV*); 5631 } 5632 5633 MARK = ORIGMARK + 1; 5634 if (GIMME_V == G_LIST) { /* copy return vals to stack */ 5635 const bool real = cBOOL(AvREAL(ary)); 5636 MEXTEND(MARK, length); 5637 if (real) 5638 EXTEND_MORTAL(length); 5639 for (i = 0, dst = MARK; i < length; i++) { 5640 if ((*dst = AvARRAY(ary)[i+offset])) { 5641 if (real) 5642 sv_2mortal(*dst); /* free them eventually */ 5643 } 5644 else 5645 *dst = &PL_sv_undef; 5646 dst++; 5647 } 5648 MARK += length - 1; 5649 } 5650 else { 5651 *MARK = AvARRAY(ary)[offset+length-1]; 5652 if (AvREAL(ary)) { 5653 sv_2mortal(*MARK); 5654 for (i = length - 1, dst = &AvARRAY(ary)[offset]; i > 0; i--) 5655 SvREFCNT_dec(*dst++); /* free them now */ 5656 } 5657 if (!*MARK) 5658 *MARK = &PL_sv_undef; 5659 } 5660 AvFILLp(ary) += diff; 5661 5662 /* pull up or down? */ 5663 5664 if (offset < after) { /* easier to pull up */ 5665 if (offset) { /* esp. if nothing to pull */ 5666 src = &AvARRAY(ary)[offset-1]; 5667 dst = src - diff; /* diff is negative */ 5668 for (i = offset; i > 0; i--) /* can't trust Copy */ 5669 *dst-- = *src--; 5670 } 5671 dst = AvARRAY(ary); 5672 AvARRAY(ary) = AvARRAY(ary) - diff; /* diff is negative */ 5673 AvMAX(ary) += diff; 5674 } 5675 else { 5676 if (after) { /* anything to pull down? */ 5677 src = AvARRAY(ary) + offset + length; 5678 dst = src + diff; /* diff is negative */ 5679 Move(src, dst, after, SV*); 5680 } 5681 dst = &AvARRAY(ary)[AvFILLp(ary)+1]; 5682 /* avoid later double free */ 5683 } 5684 i = -diff; 5685 while (i) 5686 dst[--i] = NULL; 5687 5688 if (newlen) { 5689 Copy( tmparyval, AvARRAY(ary) + offset, newlen, SV* ); 5690 Safefree(tmparyval); 5691 } 5692 } 5693 else { /* no, expanding (or same) */ 5694 SV** tmparyval = NULL; 5695 if (length) { 5696 Newx(tmparyval, length, SV*); /* so remember deletion */ 5697 Copy(AvARRAY(ary)+offset, tmparyval, length, SV*); 5698 } 5699 5700 if (diff > 0) { /* expanding */ 5701 /* push up or down? */ 5702 if (offset < after && diff <= AvARRAY(ary) - AvALLOC(ary)) { 5703 if (offset) { 5704 src = AvARRAY(ary); 5705 dst = src - diff; 5706 Move(src, dst, offset, SV*); 5707 } 5708 AvARRAY(ary) = AvARRAY(ary) - diff;/* diff is positive */ 5709 AvMAX(ary) += diff; 5710 AvFILLp(ary) += diff; 5711 } 5712 else { 5713 if (AvFILLp(ary) + diff >= AvMAX(ary)) /* oh, well */ 5714 av_extend(ary, AvFILLp(ary) + diff); 5715 AvFILLp(ary) += diff; 5716 5717 if (after) { 5718 dst = AvARRAY(ary) + AvFILLp(ary); 5719 src = dst - diff; 5720 for (i = after; i; i--) { 5721 *dst-- = *src--; 5722 } 5723 } 5724 } 5725 } 5726 5727 if (newlen) { 5728 Copy( MARK, AvARRAY(ary) + offset, newlen, SV* ); 5729 } 5730 5731 MARK = ORIGMARK + 1; 5732 if (GIMME_V == G_LIST) { /* copy return vals to stack */ 5733 if (length) { 5734 const bool real = cBOOL(AvREAL(ary)); 5735 if (real) 5736 EXTEND_MORTAL(length); 5737 for (i = 0, dst = MARK; i < length; i++) { 5738 if ((*dst = tmparyval[i])) { 5739 if (real) 5740 sv_2mortal(*dst); /* free them eventually */ 5741 } 5742 else *dst = &PL_sv_undef; 5743 dst++; 5744 } 5745 } 5746 MARK += length - 1; 5747 } 5748 else if (length--) { 5749 *MARK = tmparyval[length]; 5750 if (AvREAL(ary)) { 5751 sv_2mortal(*MARK); 5752 while (length-- > 0) 5753 SvREFCNT_dec(tmparyval[length]); 5754 } 5755 if (!*MARK) 5756 *MARK = &PL_sv_undef; 5757 } 5758 else 5759 *MARK = &PL_sv_undef; 5760 Safefree(tmparyval); 5761 } 5762 5763 if (SvMAGICAL(ary)) 5764 mg_set(MUTABLE_SV(ary)); 5765 5766 SP = MARK; 5767 RETURN; 5768 } 5769 5770 PP(pp_push) 5771 { 5772 dSP; dMARK; dORIGMARK; dTARGET; 5773 AV * const ary = MUTABLE_AV(*++MARK); 5774 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied); 5775 5776 if (mg) { 5777 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg); 5778 PUSHMARK(MARK); 5779 PUTBACK; 5780 ENTER_with_name("call_PUSH"); 5781 call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED); 5782 LEAVE_with_name("call_PUSH"); 5783 /* SPAGAIN; not needed: SP is assigned to immediately below */ 5784 } 5785 else { 5786 /* PL_delaymagic is restored by JUMPENV_POP on dieing, so we 5787 * only need to save locally, not on the save stack */ 5788 U16 old_delaymagic = PL_delaymagic; 5789 5790 if (SvREADONLY(ary) && MARK < SP) Perl_croak_no_modify(); 5791 PL_delaymagic = DM_DELAY; 5792 for (++MARK; MARK <= SP; MARK++) { 5793 SV *sv; 5794 if (*MARK) SvGETMAGIC(*MARK); 5795 sv = newSV_type(SVt_NULL); 5796 if (*MARK) 5797 sv_setsv_nomg(sv, *MARK); 5798 av_store(ary, AvFILLp(ary)+1, sv); 5799 } 5800 if (PL_delaymagic & DM_ARRAY_ISA) 5801 mg_set(MUTABLE_SV(ary)); 5802 PL_delaymagic = old_delaymagic; 5803 } 5804 SP = ORIGMARK; 5805 if (OP_GIMME(PL_op, 0) != G_VOID) { 5806 PUSHi( AvFILL(ary) + 1 ); 5807 } 5808 RETURN; 5809 } 5810 5811 /* also used for: pp_pop()*/ 5812 PP(pp_shift) 5813 { 5814 dSP; 5815 AV * const av = PL_op->op_flags & OPf_SPECIAL 5816 ? MUTABLE_AV(GvAVn(PL_defgv)) : MUTABLE_AV(POPs); 5817 SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av); 5818 EXTEND(SP, 1); 5819 assert (sv); 5820 if (AvREAL(av)) 5821 (void)sv_2mortal(sv); 5822 PUSHs(sv); 5823 RETURN; 5824 } 5825 5826 PP(pp_unshift) 5827 { 5828 dSP; dMARK; dORIGMARK; dTARGET; 5829 AV *ary = MUTABLE_AV(*++MARK); 5830 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied); 5831 5832 if (mg) { 5833 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg); 5834 PUSHMARK(MARK); 5835 PUTBACK; 5836 ENTER_with_name("call_UNSHIFT"); 5837 call_sv(SV_CONST(UNSHIFT),G_SCALAR|G_DISCARD|G_METHOD_NAMED); 5838 LEAVE_with_name("call_UNSHIFT"); 5839 /* SPAGAIN; not needed: SP is assigned to immediately below */ 5840 } 5841 else { 5842 /* PL_delaymagic is restored by JUMPENV_POP on dieing, so we 5843 * only need to save locally, not on the save stack */ 5844 U16 old_delaymagic = PL_delaymagic; 5845 SSize_t i = 0; 5846 5847 av_unshift(ary, SP - MARK); 5848 PL_delaymagic = DM_DELAY; 5849 while (MARK < SP) { 5850 SV * const sv = newSVsv(*++MARK); 5851 (void)av_store(ary, i++, sv); 5852 } 5853 if (PL_delaymagic & DM_ARRAY_ISA) 5854 mg_set(MUTABLE_SV(ary)); 5855 PL_delaymagic = old_delaymagic; 5856 } 5857 SP = ORIGMARK; 5858 if (OP_GIMME(PL_op, 0) != G_VOID) { 5859 PUSHi( AvFILL(ary) + 1 ); 5860 } 5861 RETURN; 5862 } 5863 5864 PP(pp_reverse) 5865 { 5866 dSP; dMARK; 5867 5868 if (GIMME_V == G_LIST) { 5869 if (PL_op->op_private & OPpREVERSE_INPLACE) { 5870 AV *av; 5871 5872 /* See pp_sort() */ 5873 assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV); 5874 (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */ 5875 av = MUTABLE_AV((*SP)); 5876 /* In-place reversing only happens in void context for the array 5877 * assignment. We don't need to push anything on the stack. */ 5878 SP = MARK; 5879 5880 if (SvMAGICAL(av)) { 5881 SSize_t i, j; 5882 SV *tmp = sv_newmortal(); 5883 /* For SvCANEXISTDELETE */ 5884 HV *stash; 5885 const MAGIC *mg; 5886 bool can_preserve = SvCANEXISTDELETE(av); 5887 5888 for (i = 0, j = av_top_index(av); i < j; ++i, --j) { 5889 SV *begin, *end; 5890 5891 if (can_preserve) { 5892 if (!av_exists(av, i)) { 5893 if (av_exists(av, j)) { 5894 SV *sv = av_delete(av, j, 0); 5895 begin = *av_fetch(av, i, TRUE); 5896 sv_setsv_mg(begin, sv); 5897 } 5898 continue; 5899 } 5900 else if (!av_exists(av, j)) { 5901 SV *sv = av_delete(av, i, 0); 5902 end = *av_fetch(av, j, TRUE); 5903 sv_setsv_mg(end, sv); 5904 continue; 5905 } 5906 } 5907 5908 begin = *av_fetch(av, i, TRUE); 5909 end = *av_fetch(av, j, TRUE); 5910 sv_setsv(tmp, begin); 5911 sv_setsv_mg(begin, end); 5912 sv_setsv_mg(end, tmp); 5913 } 5914 } 5915 else { 5916 SV **begin = AvARRAY(av); 5917 5918 if (begin) { 5919 SV **end = begin + AvFILLp(av); 5920 5921 while (begin < end) { 5922 SV * const tmp = *begin; 5923 *begin++ = *end; 5924 *end-- = tmp; 5925 } 5926 } 5927 } 5928 } 5929 else { 5930 SV **oldsp = SP; 5931 MARK++; 5932 while (MARK < SP) { 5933 SV * const tmp = *MARK; 5934 *MARK++ = *SP; 5935 *SP-- = tmp; 5936 } 5937 /* safe as long as stack cannot get extended in the above */ 5938 SP = oldsp; 5939 } 5940 } 5941 else { 5942 char *up; 5943 dTARGET; 5944 STRLEN len; 5945 5946 SvUTF8_off(TARG); /* decontaminate */ 5947 if (SP - MARK > 1) { 5948 do_join(TARG, &PL_sv_no, MARK, SP); 5949 SP = MARK + 1; 5950 SETs(TARG); 5951 } else if (SP > MARK) { 5952 sv_setsv(TARG, *SP); 5953 SETs(TARG); 5954 } else { 5955 sv_setsv(TARG, DEFSV); 5956 XPUSHs(TARG); 5957 } 5958 SvSETMAGIC(TARG); /* remove any utf8 length magic */ 5959 5960 up = SvPV_force(TARG, len); 5961 if (len > 1) { 5962 char *down; 5963 if (DO_UTF8(TARG)) { /* first reverse each character */ 5964 U8* s = (U8*)SvPVX(TARG); 5965 const U8* send = (U8*)(s + len); 5966 while (s < send) { 5967 if (UTF8_IS_INVARIANT(*s)) { 5968 s++; 5969 continue; 5970 } 5971 else { 5972 if (!utf8_to_uvchr_buf(s, send, 0)) 5973 break; 5974 up = (char*)s; 5975 s += UTF8SKIP(s); 5976 down = (char*)(s - 1); 5977 /* reverse this character */ 5978 while (down > up) { 5979 const char tmp = *up; 5980 *up++ = *down; 5981 *down-- = tmp; 5982 } 5983 } 5984 } 5985 up = SvPVX(TARG); 5986 } 5987 down = SvPVX(TARG) + len - 1; 5988 while (down > up) { 5989 const char tmp = *up; 5990 *up++ = *down; 5991 *down-- = tmp; 5992 } 5993 (void)SvPOK_only_UTF8(TARG); 5994 } 5995 } 5996 RETURN; 5997 } 5998 5999 PP(pp_split) 6000 { 6001 dSP; dTARG; 6002 AV *ary = ( (PL_op->op_private & OPpSPLIT_ASSIGN) /* @a = split */ 6003 && (PL_op->op_flags & OPf_STACKED)) /* @{expr} = split */ 6004 ? (AV *)POPs : NULL; 6005 IV limit = POPi; /* note, negative is forever */ 6006 SV * const sv = POPs; 6007 STRLEN len; 6008 const char *s = SvPV_const(sv, len); 6009 const bool do_utf8 = DO_UTF8(sv); 6010 const bool in_uni_8_bit = IN_UNI_8_BIT; 6011 const char *strend = s + len; 6012 PMOP *pm = cPMOPx(PL_op); 6013 REGEXP *rx; 6014 SV *dstr; 6015 const char *m; 6016 SSize_t iters = 0; 6017 const STRLEN slen = do_utf8 6018 ? utf8_length((U8*)s, (U8*)strend) 6019 : (STRLEN)(strend - s); 6020 SSize_t maxiters = slen + 10; 6021 I32 trailing_empty = 0; 6022 const char *orig; 6023 const IV origlimit = limit; 6024 bool realarray = 0; 6025 I32 base; 6026 const U8 gimme = GIMME_V; 6027 bool gimme_scalar; 6028 I32 oldsave = PL_savestack_ix; 6029 U32 flags = (do_utf8 ? SVf_UTF8 : 0) | 6030 SVs_TEMP; /* Make mortal SVs by default */ 6031 MAGIC *mg = NULL; 6032 6033 rx = PM_GETRE(pm); 6034 6035 TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET && 6036 (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE))); 6037 6038 /* handle @ary = split(...) optimisation */ 6039 if (PL_op->op_private & OPpSPLIT_ASSIGN) { 6040 realarray = 1; 6041 if (!(PL_op->op_flags & OPf_STACKED)) { 6042 if (PL_op->op_private & OPpSPLIT_LEX) { 6043 if (PL_op->op_private & OPpLVAL_INTRO) 6044 SAVECLEARSV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)); 6045 ary = (AV *)PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff); 6046 } 6047 else { 6048 GV *gv = 6049 #ifdef USE_ITHREADS 6050 MUTABLE_GV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)); 6051 #else 6052 pm->op_pmreplrootu.op_pmtargetgv; 6053 #endif 6054 if (PL_op->op_private & OPpLVAL_INTRO) 6055 ary = save_ary(gv); 6056 else 6057 ary = GvAVn(gv); 6058 } 6059 /* skip anything pushed by OPpLVAL_INTRO above */ 6060 oldsave = PL_savestack_ix; 6061 } 6062 6063 /* Some defence against stack-not-refcounted bugs */ 6064 (void)sv_2mortal(SvREFCNT_inc_simple_NN(ary)); 6065 6066 if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) { 6067 PUSHMARK(SP); 6068 XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg)); 6069 } else { 6070 flags &= ~SVs_TEMP; /* SVs will not be mortal */ 6071 } 6072 } 6073 6074 base = SP - PL_stack_base; 6075 orig = s; 6076 if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) { 6077 if (do_utf8) { 6078 while (s < strend && isSPACE_utf8_safe(s, strend)) 6079 s += UTF8SKIP(s); 6080 } 6081 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { 6082 while (s < strend && isSPACE_LC(*s)) 6083 s++; 6084 } 6085 else if (in_uni_8_bit) { 6086 while (s < strend && isSPACE_L1(*s)) 6087 s++; 6088 } 6089 else { 6090 while (s < strend && isSPACE(*s)) 6091 s++; 6092 } 6093 } 6094 6095 gimme_scalar = gimme == G_SCALAR && !ary; 6096 6097 if (!limit) 6098 limit = maxiters + 2; 6099 if (RX_EXTFLAGS(rx) & RXf_WHITE) { 6100 while (--limit) { 6101 m = s; 6102 /* this one uses 'm' and is a negative test */ 6103 if (do_utf8) { 6104 while (m < strend && ! isSPACE_utf8_safe(m, strend) ) { 6105 const int t = UTF8SKIP(m); 6106 /* isSPACE_utf8_safe returns FALSE for malform utf8 */ 6107 if (strend - m < t) 6108 m = strend; 6109 else 6110 m += t; 6111 } 6112 } 6113 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) 6114 { 6115 while (m < strend && !isSPACE_LC(*m)) 6116 ++m; 6117 } 6118 else if (in_uni_8_bit) { 6119 while (m < strend && !isSPACE_L1(*m)) 6120 ++m; 6121 } else { 6122 while (m < strend && !isSPACE(*m)) 6123 ++m; 6124 } 6125 if (m >= strend) 6126 break; 6127 6128 if (gimme_scalar) { 6129 iters++; 6130 if (m-s == 0) 6131 trailing_empty++; 6132 else 6133 trailing_empty = 0; 6134 } else { 6135 dstr = newSVpvn_flags(s, m-s, flags); 6136 XPUSHs(dstr); 6137 } 6138 6139 /* skip the whitespace found last */ 6140 if (do_utf8) 6141 s = m + UTF8SKIP(m); 6142 else 6143 s = m + 1; 6144 6145 /* this one uses 's' and is a positive test */ 6146 if (do_utf8) { 6147 while (s < strend && isSPACE_utf8_safe(s, strend) ) 6148 s += UTF8SKIP(s); 6149 } 6150 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) 6151 { 6152 while (s < strend && isSPACE_LC(*s)) 6153 ++s; 6154 } 6155 else if (in_uni_8_bit) { 6156 while (s < strend && isSPACE_L1(*s)) 6157 ++s; 6158 } else { 6159 while (s < strend && isSPACE(*s)) 6160 ++s; 6161 } 6162 } 6163 } 6164 else if (RX_EXTFLAGS(rx) & RXf_START_ONLY) { 6165 while (--limit) { 6166 for (m = s; m < strend && *m != '\n'; m++) 6167 ; 6168 m++; 6169 if (m >= strend) 6170 break; 6171 6172 if (gimme_scalar) { 6173 iters++; 6174 if (m-s == 0) 6175 trailing_empty++; 6176 else 6177 trailing_empty = 0; 6178 } else { 6179 dstr = newSVpvn_flags(s, m-s, flags); 6180 XPUSHs(dstr); 6181 } 6182 s = m; 6183 } 6184 } 6185 else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) { 6186 /* This case boils down to deciding which is the smaller of: 6187 * limit - effectively a number of characters 6188 * slen - which already contains the number of characters in s 6189 * 6190 * The resulting number is the number of iters (for gimme_scalar) 6191 * or the number of SVs to create (!gimme_scalar). */ 6192 6193 /* setting it to -1 will trigger a panic in EXTEND() */ 6194 const SSize_t sslen = slen > SSize_t_MAX ? -1 : (SSize_t)slen; 6195 const IV items = limit - 1; 6196 if (sslen < items || items < 0) { 6197 iters = slen -1; 6198 limit = slen + 1; 6199 /* Note: The same result is returned if the following block 6200 * is removed, because of the "keep field after final delim?" 6201 * adjustment, but having the following makes the "correct" 6202 * behaviour more apparent. */ 6203 if (gimme_scalar) { 6204 s = strend; 6205 iters++; 6206 } 6207 } else { 6208 iters = items; 6209 } 6210 if (!gimme_scalar) { 6211 /* 6212 Pre-extend the stack, either the number of bytes or 6213 characters in the string or a limited amount, triggered by: 6214 my ($x, $y) = split //, $str; 6215 or 6216 split //, $str, $i; 6217 */ 6218 EXTEND(SP, limit); 6219 if (do_utf8) { 6220 while (--limit) { 6221 m = s; 6222 s += UTF8SKIP(s); 6223 dstr = newSVpvn_flags(m, s-m, flags); 6224 PUSHs(dstr); 6225 } 6226 } else { 6227 while (--limit) { 6228 dstr = newSVpvn_flags(s, 1, flags); 6229 PUSHs(dstr); 6230 s++; 6231 } 6232 } 6233 } 6234 } 6235 else if (do_utf8 == (RX_UTF8(rx) != 0) && 6236 (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx) 6237 && (RX_EXTFLAGS(rx) & RXf_CHECK_ALL) 6238 && !(RX_EXTFLAGS(rx) & RXf_IS_ANCHORED)) { 6239 const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL); 6240 SV * const csv = CALLREG_INTUIT_STRING(rx); 6241 6242 len = RX_MINLENRET(rx); 6243 if (len == 1 && !RX_UTF8(rx) && !tail) { 6244 const char c = *SvPV_nolen_const(csv); 6245 while (--limit) { 6246 for (m = s; m < strend && *m != c; m++) 6247 ; 6248 if (m >= strend) 6249 break; 6250 if (gimme_scalar) { 6251 iters++; 6252 if (m-s == 0) 6253 trailing_empty++; 6254 else 6255 trailing_empty = 0; 6256 } else { 6257 dstr = newSVpvn_flags(s, m-s, flags); 6258 XPUSHs(dstr); 6259 } 6260 /* The rx->minlen is in characters but we want to step 6261 * s ahead by bytes. */ 6262 if (do_utf8) 6263 s = (char*)utf8_hop_forward((U8*) m, len, (U8*) strend); 6264 else 6265 s = m + len; /* Fake \n at the end */ 6266 } 6267 } 6268 else { 6269 const bool multiline = (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) ? 1 : 0; 6270 6271 while (s < strend && --limit && 6272 (m = fbm_instr((unsigned char*)s, (unsigned char*)strend, 6273 csv, multiline ? FBMrf_MULTILINE : 0)) ) 6274 { 6275 if (gimme_scalar) { 6276 iters++; 6277 if (m-s == 0) 6278 trailing_empty++; 6279 else 6280 trailing_empty = 0; 6281 } else { 6282 dstr = newSVpvn_flags(s, m-s, flags); 6283 XPUSHs(dstr); 6284 } 6285 /* The rx->minlen is in characters but we want to step 6286 * s ahead by bytes. */ 6287 if (do_utf8) 6288 s = (char*)utf8_hop_forward((U8*)m, len, (U8 *) strend); 6289 else 6290 s = m + len; /* Fake \n at the end */ 6291 } 6292 } 6293 } 6294 else { 6295 maxiters += slen * RX_NPARENS(rx); 6296 while (s < strend && --limit) 6297 { 6298 I32 rex_return; 6299 PUTBACK; 6300 rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1, 6301 sv, NULL, 0); 6302 SPAGAIN; 6303 if (rex_return == 0) 6304 break; 6305 TAINT_IF(RX_MATCH_TAINTED(rx)); 6306 /* we never pass the REXEC_COPY_STR flag, so it should 6307 * never get copied */ 6308 assert(!RX_MATCH_COPIED(rx)); 6309 m = RX_OFFS(rx)[0].start + orig; 6310 6311 if (gimme_scalar) { 6312 iters++; 6313 if (m-s == 0) 6314 trailing_empty++; 6315 else 6316 trailing_empty = 0; 6317 } else { 6318 dstr = newSVpvn_flags(s, m-s, flags); 6319 XPUSHs(dstr); 6320 } 6321 if (RX_NPARENS(rx)) { 6322 I32 i; 6323 for (i = 1; i <= (I32)RX_NPARENS(rx); i++) { 6324 s = RX_OFFS(rx)[i].start + orig; 6325 m = RX_OFFS(rx)[i].end + orig; 6326 6327 /* japhy (07/27/01) -- the (m && s) test doesn't catch 6328 parens that didn't match -- they should be set to 6329 undef, not the empty string */ 6330 if (gimme_scalar) { 6331 iters++; 6332 if (m-s == 0) 6333 trailing_empty++; 6334 else 6335 trailing_empty = 0; 6336 } else { 6337 if (m >= orig && s >= orig) { 6338 dstr = newSVpvn_flags(s, m-s, flags); 6339 } 6340 else 6341 dstr = &PL_sv_undef; /* undef, not "" */ 6342 XPUSHs(dstr); 6343 } 6344 6345 } 6346 } 6347 s = RX_OFFS(rx)[0].end + orig; 6348 } 6349 } 6350 6351 if (!gimme_scalar) { 6352 iters = (SP - PL_stack_base) - base; 6353 } 6354 if (iters > maxiters) 6355 DIE(aTHX_ "Split loop"); 6356 6357 /* keep field after final delim? */ 6358 if (s < strend || (iters && origlimit)) { 6359 if (!gimme_scalar) { 6360 const STRLEN l = strend - s; 6361 dstr = newSVpvn_flags(s, l, flags); 6362 XPUSHs(dstr); 6363 } 6364 iters++; 6365 } 6366 else if (!origlimit) { 6367 if (gimme_scalar) { 6368 iters -= trailing_empty; 6369 } else { 6370 while (iters > 0 && (!TOPs || !SvANY(TOPs) || SvCUR(TOPs) == 0)) { 6371 if (TOPs && !(flags & SVs_TEMP)) 6372 sv_2mortal(TOPs); 6373 *SP-- = NULL; 6374 iters--; 6375 } 6376 } 6377 } 6378 6379 PUTBACK; 6380 LEAVE_SCOPE(oldsave); 6381 SPAGAIN; 6382 if (realarray) { 6383 if (!mg) { 6384 PUTBACK; 6385 if(AvREAL(ary)) { 6386 if (av_count(ary) > 0) 6387 av_clear(ary); 6388 } else { 6389 AvREAL_on(ary); 6390 AvREIFY_off(ary); 6391 6392 if (AvMAX(ary) > -1) { 6393 /* don't free mere refs */ 6394 Zero(AvARRAY(ary), AvMAX(ary), SV*); 6395 } 6396 } 6397 if(AvMAX(ary) < iters) 6398 av_extend(ary,iters); 6399 SPAGAIN; 6400 6401 /* Need to copy the SV*s from the stack into ary */ 6402 Copy(SP + 1 - iters, AvARRAY(ary), iters, SV*); 6403 AvFILLp(ary) = iters - 1; 6404 6405 if (SvSMAGICAL(ary)) { 6406 PUTBACK; 6407 mg_set(MUTABLE_SV(ary)); 6408 SPAGAIN; 6409 } 6410 6411 if (gimme != G_LIST) { 6412 /* SP points to the final SV* pushed to the stack. But the SV* */ 6413 /* are not going to be used from the stack. Point SP to below */ 6414 /* the first of these SV*. */ 6415 SP -= iters; 6416 PUTBACK; 6417 } 6418 } 6419 else { 6420 PUTBACK; 6421 av_extend(ary,iters); 6422 av_clear(ary); 6423 6424 ENTER_with_name("call_PUSH"); 6425 call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED); 6426 LEAVE_with_name("call_PUSH"); 6427 SPAGAIN; 6428 6429 if (gimme == G_LIST) { 6430 SSize_t i; 6431 /* EXTEND should not be needed - we just popped them */ 6432 EXTEND_SKIP(SP, iters); 6433 for (i=0; i < iters; i++) { 6434 SV **svp = av_fetch(ary, i, FALSE); 6435 PUSHs((svp) ? *svp : &PL_sv_undef); 6436 } 6437 RETURN; 6438 } 6439 } 6440 } 6441 6442 if (gimme != G_LIST) { 6443 GETTARGET; 6444 XPUSHi(iters); 6445 } 6446 6447 RETURN; 6448 } 6449 6450 PP(pp_once) 6451 { 6452 dSP; 6453 SV *const sv = PAD_SVl(PL_op->op_targ); 6454 6455 if (SvPADSTALE(sv)) { 6456 /* First time. */ 6457 SvPADSTALE_off(sv); 6458 RETURNOP(cLOGOP->op_other); 6459 } 6460 RETURNOP(cLOGOP->op_next); 6461 } 6462 6463 PP(pp_lock) 6464 { 6465 dSP; 6466 dTOPss; 6467 SV *retsv = sv; 6468 SvLOCK(sv); 6469 if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV 6470 || SvTYPE(retsv) == SVt_PVCV) { 6471 retsv = refto(retsv); 6472 } 6473 SETs(retsv); 6474 RETURN; 6475 } 6476 6477 6478 /* used for: pp_padany(), pp_custom(); plus any system ops 6479 * that aren't implemented on a particular platform */ 6480 6481 PP(unimplemented_op) 6482 { 6483 const Optype op_type = PL_op->op_type; 6484 /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope 6485 with out of range op numbers - it only "special" cases op_custom. 6486 Secondly, as the three ops we "panic" on are padmy, mapstart and custom, 6487 if we get here for a custom op then that means that the custom op didn't 6488 have an implementation. Given that OP_NAME() looks up the custom op 6489 by its op_ppaddr, likely it will return NULL, unless someone (unhelpfully) 6490 registers &Perl_unimplemented_op as the address of their custom op. 6491 NULL doesn't generate a useful error message. "custom" does. */ 6492 const char *const name = op_type >= OP_max 6493 ? "[out of range]" : PL_op_name[op_type]; 6494 if(OP_IS_SOCKET(op_type)) 6495 DIE(aTHX_ PL_no_sock_func, name); 6496 DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type); 6497 } 6498 6499 static void 6500 S_maybe_unwind_defav(pTHX) 6501 { 6502 if (CX_CUR()->cx_type & CXp_HASARGS) { 6503 PERL_CONTEXT *cx = CX_CUR(); 6504 6505 assert(CxHASARGS(cx)); 6506 cx_popsub_args(cx); 6507 cx->cx_type &= ~CXp_HASARGS; 6508 } 6509 } 6510 6511 /* For sorting out arguments passed to a &CORE:: subroutine */ 6512 PP(pp_coreargs) 6513 { 6514 dSP; 6515 int opnum = SvIOK(cSVOP_sv) ? (int)SvUV(cSVOP_sv) : 0; 6516 int defgv = PL_opargs[opnum] & OA_DEFGV ||opnum==OP_GLOB, whicharg = 0; 6517 AV * const at_ = GvAV(PL_defgv); 6518 SV **svp = at_ ? AvARRAY(at_) : NULL; 6519 I32 minargs = 0, maxargs = 0, numargs = at_ ? AvFILLp(at_)+1 : 0; 6520 I32 oa = opnum ? PL_opargs[opnum] >> OASHIFT : 0; 6521 bool seen_question = 0; 6522 const char *err = NULL; 6523 const bool pushmark = PL_op->op_private & OPpCOREARGS_PUSHMARK; 6524 6525 /* Count how many args there are first, to get some idea how far to 6526 extend the stack. */ 6527 while (oa) { 6528 if ((oa & 7) == OA_LIST) { maxargs = I32_MAX; break; } 6529 maxargs++; 6530 if (oa & OA_OPTIONAL) seen_question = 1; 6531 if (!seen_question) minargs++; 6532 oa >>= 4; 6533 } 6534 6535 if(numargs < minargs) err = "Not enough"; 6536 else if(numargs > maxargs) err = "Too many"; 6537 if (err) 6538 /* diag_listed_as: Too many arguments for %s */ 6539 Perl_croak(aTHX_ 6540 "%s arguments for %s", err, 6541 opnum ? PL_op_desc[opnum] : SvPV_nolen_const(cSVOP_sv) 6542 ); 6543 6544 /* Reset the stack pointer. Without this, we end up returning our own 6545 arguments in list context, in addition to the values we are supposed 6546 to return. nextstate usually does this on sub entry, but we need 6547 to run the next op with the caller's hints, so we cannot have a 6548 nextstate. */ 6549 SP = PL_stack_base + CX_CUR()->blk_oldsp; 6550 6551 if(!maxargs) RETURN; 6552 6553 /* We do this here, rather than with a separate pushmark op, as it has 6554 to come in between two things this function does (stack reset and 6555 arg pushing). This seems the easiest way to do it. */ 6556 if (pushmark) { 6557 PUTBACK; 6558 (void)Perl_pp_pushmark(aTHX); 6559 } 6560 6561 EXTEND(SP, maxargs == I32_MAX ? numargs : maxargs); 6562 PUTBACK; /* The code below can die in various places. */ 6563 6564 oa = PL_opargs[opnum] >> OASHIFT; 6565 for (; oa&&(numargs||!pushmark); (void)(numargs&&(++svp,--numargs))) { 6566 whicharg++; 6567 switch (oa & 7) { 6568 case OA_SCALAR: 6569 try_defsv: 6570 if (!numargs && defgv && whicharg == minargs + 1) { 6571 PUSHs(DEFSV); 6572 } 6573 else PUSHs(numargs ? svp && *svp ? *svp : &PL_sv_undef : NULL); 6574 break; 6575 case OA_LIST: 6576 while (numargs--) { 6577 PUSHs(svp && *svp ? *svp : &PL_sv_undef); 6578 svp++; 6579 } 6580 RETURN; 6581 case OA_AVREF: 6582 if (!numargs) { 6583 GV *gv; 6584 if (CvUNIQUE(find_runcv_where(FIND_RUNCV_level_eq,1,NULL))) 6585 gv = PL_argvgv; 6586 else { 6587 S_maybe_unwind_defav(aTHX); 6588 gv = PL_defgv; 6589 } 6590 PUSHs((SV *)GvAVn(gv)); 6591 break; 6592 } 6593 if (!svp || !*svp || !SvROK(*svp) 6594 || SvTYPE(SvRV(*svp)) != SVt_PVAV) 6595 DIE(aTHX_ 6596 /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/ 6597 "Type of arg %d to &CORE::%s must be array reference", 6598 whicharg, PL_op_desc[opnum] 6599 ); 6600 PUSHs(SvRV(*svp)); 6601 break; 6602 case OA_HVREF: 6603 if (!svp || !*svp || !SvROK(*svp) 6604 || ( SvTYPE(SvRV(*svp)) != SVt_PVHV 6605 && ( opnum == OP_DBMCLOSE || opnum == OP_DBMOPEN 6606 || SvTYPE(SvRV(*svp)) != SVt_PVAV ))) 6607 DIE(aTHX_ 6608 /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/ 6609 "Type of arg %d to &CORE::%s must be hash%s reference", 6610 whicharg, PL_op_desc[opnum], 6611 opnum == OP_DBMCLOSE || opnum == OP_DBMOPEN 6612 ? "" 6613 : " or array" 6614 ); 6615 PUSHs(SvRV(*svp)); 6616 break; 6617 case OA_FILEREF: 6618 if (!numargs) PUSHs(NULL); 6619 else if(svp && *svp && SvROK(*svp) && isGV_with_GP(SvRV(*svp))) 6620 /* no magic here, as the prototype will have added an extra 6621 refgen and we just want what was there before that */ 6622 PUSHs(SvRV(*svp)); 6623 else { 6624 const bool constr = PL_op->op_private & whicharg; 6625 PUSHs(S_rv2gv(aTHX_ 6626 svp && *svp ? *svp : &PL_sv_undef, 6627 constr, cBOOL(CopHINTS_get(PL_curcop) & HINT_STRICT_REFS), 6628 !constr 6629 )); 6630 } 6631 break; 6632 case OA_SCALARREF: 6633 if (!numargs) goto try_defsv; 6634 else { 6635 const bool wantscalar = 6636 PL_op->op_private & OPpCOREARGS_SCALARMOD; 6637 if (!svp || !*svp || !SvROK(*svp) 6638 /* We have to permit globrefs even for the \$ proto, as 6639 *foo is indistinguishable from ${\*foo}, and the proto- 6640 type permits the latter. */ 6641 || SvTYPE(SvRV(*svp)) > ( 6642 wantscalar ? SVt_PVLV 6643 : opnum == OP_LOCK || opnum == OP_UNDEF 6644 ? SVt_PVCV 6645 : SVt_PVHV 6646 ) 6647 ) 6648 DIE(aTHX_ 6649 "Type of arg %d to &CORE::%s must be %s", 6650 whicharg, PL_op_name[opnum], 6651 wantscalar 6652 ? "scalar reference" 6653 : opnum == OP_LOCK || opnum == OP_UNDEF 6654 ? "reference to one of [$@%&*]" 6655 : "reference to one of [$@%*]" 6656 ); 6657 PUSHs(SvRV(*svp)); 6658 if (opnum == OP_UNDEF && SvRV(*svp) == (SV *)PL_defgv) { 6659 /* Undo @_ localisation, so that sub exit does not undo 6660 part of our undeffing. */ 6661 S_maybe_unwind_defav(aTHX); 6662 } 6663 } 6664 break; 6665 default: 6666 DIE(aTHX_ "panic: unknown OA_*: %x", (unsigned)(oa&7)); 6667 } 6668 oa = oa >> 4; 6669 } 6670 6671 RETURN; 6672 } 6673 6674 /* Implement CORE::keys(),values(),each(). 6675 * 6676 * We won't know until run-time whether the arg is an array or hash, 6677 * so this op calls 6678 * 6679 * pp_keys/pp_values/pp_each 6680 * or 6681 * pp_akeys/pp_avalues/pp_aeach 6682 * 6683 * as appropriate (or whatever pp function actually implements the OP_FOO 6684 * functionality for each FOO). 6685 */ 6686 6687 PP(pp_avhvswitch) 6688 { 6689 dSP; 6690 return PL_ppaddr[ 6691 (SvTYPE(TOPs) == SVt_PVAV ? OP_AEACH : OP_EACH) 6692 + (PL_op->op_private & OPpAVHVSWITCH_MASK) 6693 ](aTHX); 6694 } 6695 6696 PP(pp_runcv) 6697 { 6698 dSP; 6699 CV *cv; 6700 if (PL_op->op_private & OPpOFFBYONE) { 6701 cv = find_runcv_where(FIND_RUNCV_level_eq, 1, NULL); 6702 } 6703 else cv = find_runcv(NULL); 6704 XPUSHs(CvEVAL(cv) ? &PL_sv_undef : sv_2mortal(newRV((SV *)cv))); 6705 RETURN; 6706 } 6707 6708 static void 6709 S_localise_aelem_lval(pTHX_ AV * const av, SV * const keysv, 6710 const bool can_preserve) 6711 { 6712 const SSize_t ix = SvIV(keysv); 6713 if (can_preserve ? av_exists(av, ix) : TRUE) { 6714 SV ** const svp = av_fetch(av, ix, 1); 6715 if (!svp || !*svp) 6716 Perl_croak(aTHX_ PL_no_aelem, ix); 6717 save_aelem(av, ix, svp); 6718 } 6719 else 6720 SAVEADELETE(av, ix); 6721 } 6722 6723 static void 6724 S_localise_helem_lval(pTHX_ HV * const hv, SV * const keysv, 6725 const bool can_preserve) 6726 { 6727 if (can_preserve ? hv_exists_ent(hv, keysv, 0) : TRUE) { 6728 HE * const he = hv_fetch_ent(hv, keysv, 1, 0); 6729 SV ** const svp = he ? &HeVAL(he) : NULL; 6730 if (!svp || !*svp) 6731 Perl_croak(aTHX_ PL_no_helem_sv, SVfARG(keysv)); 6732 save_helem_flags(hv, keysv, svp, 0); 6733 } 6734 else 6735 SAVEHDELETE(hv, keysv); 6736 } 6737 6738 static void 6739 S_localise_gv_slot(pTHX_ GV *gv, U8 type) 6740 { 6741 if (type == OPpLVREF_SV) { 6742 save_pushptrptr(gv, SvREFCNT_inc_simple(GvSV(gv)), SAVEt_GVSV); 6743 GvSV(gv) = 0; 6744 } 6745 else if (type == OPpLVREF_AV) 6746 /* XXX Inefficient, as it creates a new AV, which we are 6747 about to clobber. */ 6748 save_ary(gv); 6749 else { 6750 assert(type == OPpLVREF_HV); 6751 /* XXX Likewise inefficient. */ 6752 save_hash(gv); 6753 } 6754 } 6755 6756 6757 PP(pp_refassign) 6758 { 6759 dSP; 6760 SV * const key = PL_op->op_private & OPpLVREF_ELEM ? POPs : NULL; 6761 SV * const left = PL_op->op_flags & OPf_STACKED ? POPs : NULL; 6762 dTOPss; 6763 const char *bad = NULL; 6764 const U8 type = PL_op->op_private & OPpLVREF_TYPE; 6765 if (!SvROK(sv)) DIE(aTHX_ "Assigned value is not a reference"); 6766 switch (type) { 6767 case OPpLVREF_SV: 6768 if (SvTYPE(SvRV(sv)) > SVt_PVLV) 6769 bad = " SCALAR"; 6770 break; 6771 case OPpLVREF_AV: 6772 if (SvTYPE(SvRV(sv)) != SVt_PVAV) 6773 bad = "n ARRAY"; 6774 break; 6775 case OPpLVREF_HV: 6776 if (SvTYPE(SvRV(sv)) != SVt_PVHV) 6777 bad = " HASH"; 6778 break; 6779 case OPpLVREF_CV: 6780 if (SvTYPE(SvRV(sv)) != SVt_PVCV) 6781 bad = " CODE"; 6782 } 6783 if (bad) 6784 /* diag_listed_as: Assigned value is not %s reference */ 6785 DIE(aTHX_ "Assigned value is not a%s reference", bad); 6786 { 6787 MAGIC *mg; 6788 HV *stash; 6789 switch (left ? SvTYPE(left) : 0) { 6790 case 0: 6791 { 6792 SV * const old = PAD_SV(ARGTARG); 6793 PAD_SETSV(ARGTARG, SvREFCNT_inc_NN(SvRV(sv))); 6794 SvREFCNT_dec(old); 6795 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpPAD_STATE)) 6796 == OPpLVAL_INTRO) 6797 SAVECLEARSV(PAD_SVl(ARGTARG)); 6798 break; 6799 } 6800 case SVt_PVGV: 6801 if (PL_op->op_private & OPpLVAL_INTRO) { 6802 S_localise_gv_slot(aTHX_ (GV *)left, type); 6803 } 6804 gv_setref(left, sv); 6805 SvSETMAGIC(left); 6806 break; 6807 case SVt_PVAV: 6808 assert(key); 6809 if (UNLIKELY(PL_op->op_private & OPpLVAL_INTRO)) { 6810 S_localise_aelem_lval(aTHX_ (AV *)left, key, 6811 SvCANEXISTDELETE(left)); 6812 } 6813 av_store((AV *)left, SvIV(key), SvREFCNT_inc_simple_NN(SvRV(sv))); 6814 break; 6815 case SVt_PVHV: 6816 if (UNLIKELY(PL_op->op_private & OPpLVAL_INTRO)) { 6817 assert(key); 6818 S_localise_helem_lval(aTHX_ (HV *)left, key, 6819 SvCANEXISTDELETE(left)); 6820 } 6821 (void)hv_store_ent((HV *)left, key, SvREFCNT_inc_simple_NN(SvRV(sv)), 0); 6822 } 6823 if (PL_op->op_flags & OPf_MOD) 6824 SETs(sv_2mortal(newSVsv(sv))); 6825 /* XXX else can weak references go stale before they are read, e.g., 6826 in leavesub? */ 6827 RETURN; 6828 } 6829 } 6830 6831 PP(pp_lvref) 6832 { 6833 dSP; 6834 SV * const ret = newSV_type_mortal(SVt_PVMG); 6835 SV * const elem = PL_op->op_private & OPpLVREF_ELEM ? POPs : NULL; 6836 SV * const arg = PL_op->op_flags & OPf_STACKED ? POPs : NULL; 6837 MAGIC * const mg = sv_magicext(ret, arg, PERL_MAGIC_lvref, 6838 &PL_vtbl_lvref, (char *)elem, 6839 elem ? HEf_SVKEY : (I32)ARGTARG); 6840 mg->mg_private = PL_op->op_private; 6841 if (PL_op->op_private & OPpLVREF_ITER) 6842 mg->mg_flags |= MGf_PERSIST; 6843 if (UNLIKELY(PL_op->op_private & OPpLVAL_INTRO)) { 6844 if (elem) { 6845 MAGIC *mg; 6846 HV *stash; 6847 assert(arg); 6848 { 6849 const bool can_preserve = SvCANEXISTDELETE(arg); 6850 if (SvTYPE(arg) == SVt_PVAV) 6851 S_localise_aelem_lval(aTHX_ (AV *)arg, elem, can_preserve); 6852 else 6853 S_localise_helem_lval(aTHX_ (HV *)arg, elem, can_preserve); 6854 } 6855 } 6856 else if (arg) { 6857 S_localise_gv_slot(aTHX_ (GV *)arg, 6858 PL_op->op_private & OPpLVREF_TYPE); 6859 } 6860 else if (!(PL_op->op_private & OPpPAD_STATE)) 6861 SAVECLEARSV(PAD_SVl(ARGTARG)); 6862 } 6863 XPUSHs(ret); 6864 RETURN; 6865 } 6866 6867 PP(pp_lvrefslice) 6868 { 6869 dSP; dMARK; 6870 AV * const av = (AV *)POPs; 6871 const bool localizing = PL_op->op_private & OPpLVAL_INTRO; 6872 bool can_preserve = FALSE; 6873 6874 if (UNLIKELY(localizing)) { 6875 MAGIC *mg; 6876 HV *stash; 6877 SV **svp; 6878 6879 can_preserve = SvCANEXISTDELETE(av); 6880 6881 if (SvTYPE(av) == SVt_PVAV) { 6882 SSize_t max = -1; 6883 6884 for (svp = MARK + 1; svp <= SP; svp++) { 6885 const SSize_t elem = SvIV(*svp); 6886 if (elem > max) 6887 max = elem; 6888 } 6889 if (max > AvMAX(av)) 6890 av_extend(av, max); 6891 } 6892 } 6893 6894 while (++MARK <= SP) { 6895 SV * const elemsv = *MARK; 6896 if (UNLIKELY(localizing)) { 6897 if (SvTYPE(av) == SVt_PVAV) 6898 S_localise_aelem_lval(aTHX_ av, elemsv, can_preserve); 6899 else 6900 S_localise_helem_lval(aTHX_ (HV *)av, elemsv, can_preserve); 6901 } 6902 *MARK = newSV_type_mortal(SVt_PVMG); 6903 sv_magic(*MARK,(SV *)av,PERL_MAGIC_lvref,(char *)elemsv,HEf_SVKEY); 6904 } 6905 RETURN; 6906 } 6907 6908 PP(pp_lvavref) 6909 { 6910 if (PL_op->op_flags & OPf_STACKED) 6911 Perl_pp_rv2av(aTHX); 6912 else 6913 Perl_pp_padav(aTHX); 6914 { 6915 dSP; 6916 dTOPss; 6917 SETs(0); /* special alias marker that aassign recognises */ 6918 XPUSHs(sv); 6919 RETURN; 6920 } 6921 } 6922 6923 PP(pp_anonconst) 6924 { 6925 dSP; 6926 dTOPss; 6927 SETs(sv_2mortal((SV *)newCONSTSUB(SvTYPE(CopSTASH(PL_curcop))==SVt_PVHV 6928 ? CopSTASH(PL_curcop) 6929 : NULL, 6930 NULL, SvREFCNT_inc_simple_NN(sv)))); 6931 RETURN; 6932 } 6933 6934 6935 /* process one subroutine argument - typically when the sub has a signature: 6936 * introduce PL_curpad[op_targ] and assign to it the value 6937 * for $: (OPf_STACKED ? *sp : $_[N]) 6938 * for @/%: @_[N..$#_] 6939 * 6940 * It's equivalent to 6941 * my $foo = $_[N]; 6942 * or 6943 * my $foo = (value-on-stack) 6944 * or 6945 * my @foo = @_[N..$#_] 6946 * etc 6947 */ 6948 6949 PP(pp_argelem) 6950 { 6951 dTARG; 6952 SV *val; 6953 SV ** padentry; 6954 OP *o = PL_op; 6955 AV *defav = GvAV(PL_defgv); /* @_ */ 6956 IV ix = PTR2IV(cUNOP_AUXo->op_aux); 6957 IV argc; 6958 6959 /* do 'my $var, @var or %var' action */ 6960 padentry = &(PAD_SVl(o->op_targ)); 6961 save_clearsv(padentry); 6962 targ = *padentry; 6963 6964 if ((o->op_private & OPpARGELEM_MASK) == OPpARGELEM_SV) { 6965 if (o->op_flags & OPf_STACKED) { 6966 dSP; 6967 val = POPs; 6968 PUTBACK; 6969 } 6970 else { 6971 SV **svp; 6972 /* should already have been checked */ 6973 assert(ix >= 0); 6974 #if IVSIZE > PTRSIZE 6975 assert(ix <= SSize_t_MAX); 6976 #endif 6977 6978 svp = av_fetch(defav, ix, FALSE); 6979 val = svp ? *svp : &PL_sv_undef; 6980 } 6981 6982 /* $var = $val */ 6983 6984 /* cargo-culted from pp_sassign */ 6985 assert(TAINTING_get || !TAINT_get); 6986 if (UNLIKELY(TAINT_get) && !SvTAINTED(val)) 6987 TAINT_NOT; 6988 6989 SvSetMagicSV(targ, val); 6990 return o->op_next; 6991 } 6992 6993 /* must be AV or HV */ 6994 6995 assert(!(o->op_flags & OPf_STACKED)); 6996 argc = ((IV)AvFILL(defav) + 1) - ix; 6997 6998 /* This is a copy of the relevant parts of pp_aassign(). 6999 */ 7000 if ((o->op_private & OPpARGELEM_MASK) == OPpARGELEM_AV) { 7001 IV i; 7002 7003 if (AvFILL((AV*)targ) > -1) { 7004 /* target should usually be empty. If we get get 7005 * here, someone's been doing some weird closure tricks. 7006 * Make a copy of all args before clearing the array, 7007 * to avoid the equivalent of @a = ($a[0]) prematurely freeing 7008 * elements. See similar code in pp_aassign. 7009 */ 7010 for (i = 0; i < argc; i++) { 7011 SV **svp = av_fetch(defav, ix + i, FALSE); 7012 SV *newsv = newSVsv_flags(svp ? *svp : &PL_sv_undef, 7013 (SV_DO_COW_SVSETSV|SV_NOSTEAL)); 7014 if (!av_store(defav, ix + i, newsv)) 7015 SvREFCNT_dec_NN(newsv); 7016 } 7017 av_clear((AV*)targ); 7018 } 7019 7020 if (argc <= 0) 7021 return o->op_next; 7022 7023 av_extend((AV*)targ, argc); 7024 7025 i = 0; 7026 while (argc--) { 7027 SV *tmpsv; 7028 SV **svp = av_fetch(defav, ix + i, FALSE); 7029 SV *val = svp ? *svp : &PL_sv_undef; 7030 tmpsv = newSV_type(SVt_NULL); 7031 sv_setsv(tmpsv, val); 7032 av_store((AV*)targ, i++, tmpsv); 7033 TAINT_NOT; 7034 } 7035 7036 } 7037 else { 7038 IV i; 7039 7040 assert((o->op_private & OPpARGELEM_MASK) == OPpARGELEM_HV); 7041 7042 if (SvRMAGICAL(targ) || HvUSEDKEYS((HV*)targ)) { 7043 /* see "target should usually be empty" comment above */ 7044 for (i = 0; i < argc; i++) { 7045 SV **svp = av_fetch(defav, ix + i, FALSE); 7046 SV *newsv = newSV_type(SVt_NULL); 7047 sv_setsv_flags(newsv, 7048 svp ? *svp : &PL_sv_undef, 7049 (SV_DO_COW_SVSETSV|SV_NOSTEAL)); 7050 if (!av_store(defav, ix + i, newsv)) 7051 SvREFCNT_dec_NN(newsv); 7052 } 7053 hv_clear((HV*)targ); 7054 } 7055 7056 if (argc <= 0) 7057 return o->op_next; 7058 assert(argc % 2 == 0); 7059 7060 i = 0; 7061 while (argc) { 7062 SV *tmpsv; 7063 SV **svp; 7064 SV *key; 7065 SV *val; 7066 7067 svp = av_fetch(defav, ix + i++, FALSE); 7068 key = svp ? *svp : &PL_sv_undef; 7069 svp = av_fetch(defav, ix + i++, FALSE); 7070 val = svp ? *svp : &PL_sv_undef; 7071 7072 argc -= 2; 7073 if (UNLIKELY(SvGMAGICAL(key))) 7074 key = sv_mortalcopy(key); 7075 tmpsv = newSV_type(SVt_NULL); 7076 sv_setsv(tmpsv, val); 7077 hv_store_ent((HV*)targ, key, tmpsv, 0); 7078 TAINT_NOT; 7079 } 7080 } 7081 7082 return o->op_next; 7083 } 7084 7085 /* Handle a default value for one subroutine argument (typically as part 7086 * of a subroutine signature). 7087 * It's equivalent to 7088 * @_ > op_targ ? $_[op_targ] : result_of(op_other) 7089 * 7090 * Intended to be used where op_next is an OP_ARGELEM 7091 * 7092 * We abuse the op_targ field slightly: it's an index into @_ rather than 7093 * into PL_curpad. 7094 */ 7095 7096 PP(pp_argdefelem) 7097 { 7098 OP * const o = PL_op; 7099 AV *defav = GvAV(PL_defgv); /* @_ */ 7100 IV ix = (IV)o->op_targ; 7101 7102 assert(ix >= 0); 7103 #if IVSIZE > PTRSIZE 7104 assert(ix <= SSize_t_MAX); 7105 #endif 7106 7107 if (AvFILL(defav) >= ix) { 7108 dSP; 7109 SV **svp = av_fetch(defav, ix, FALSE); 7110 SV *val = svp ? *svp : &PL_sv_undef; 7111 XPUSHs(val); 7112 RETURN; 7113 } 7114 return cLOGOPo->op_other; 7115 } 7116 7117 7118 static SV * 7119 S_find_runcv_name(void) 7120 { 7121 dTHX; 7122 CV *cv; 7123 GV *gv; 7124 SV *sv; 7125 7126 cv = find_runcv(0); 7127 if (!cv) 7128 return &PL_sv_no; 7129 7130 gv = CvGV(cv); 7131 if (!gv) 7132 return &PL_sv_no; 7133 7134 sv = sv_newmortal(); 7135 gv_fullname4(sv, gv, NULL, TRUE); 7136 return sv; 7137 } 7138 7139 /* Check a sub's arguments - i.e. that it has the correct number of args 7140 * (and anything else we might think of in future). Typically used with 7141 * signatured subs. 7142 */ 7143 7144 PP(pp_argcheck) 7145 { 7146 OP * const o = PL_op; 7147 struct op_argcheck_aux *aux = (struct op_argcheck_aux *)cUNOP_AUXo->op_aux; 7148 UV params = aux->params; 7149 UV opt_params = aux->opt_params; 7150 char slurpy = aux->slurpy; 7151 AV *defav = GvAV(PL_defgv); /* @_ */ 7152 UV argc; 7153 bool too_few; 7154 7155 assert(!SvMAGICAL(defav)); 7156 argc = (UV)(AvFILLp(defav) + 1); 7157 too_few = (argc < (params - opt_params)); 7158 7159 if (UNLIKELY(too_few || (!slurpy && argc > params))) 7160 7161 /* diag_listed_as: Too few arguments for subroutine '%s' (got %d; expected %d) */ 7162 /* diag_listed_as: Too few arguments for subroutine '%s' (got %d; expected at least %d) */ 7163 /* diag_listed_as: Too many arguments for subroutine '%s' (got %d; expected %d) */ 7164 /* diag_listed_as: Too many arguments for subroutine '%s' (got %d; expected at most %d)*/ 7165 Perl_croak_caller("Too %s arguments for subroutine '%" SVf "' (got %" UVuf "; expected %s%" UVuf ")", 7166 too_few ? "few" : "many", 7167 S_find_runcv_name(), 7168 argc, 7169 too_few ? (slurpy || opt_params ? "at least " : "") : (opt_params ? "at most " : ""), 7170 too_few ? (params - opt_params) : params); 7171 7172 if (UNLIKELY(slurpy == '%' && argc > params && (argc - params) % 2)) 7173 /* diag_listed_as: Odd name/value argument for subroutine '%s' */ 7174 Perl_croak_caller("Odd name/value argument for subroutine '%" SVf "'", 7175 S_find_runcv_name()); 7176 7177 return NORMAL; 7178 } 7179 7180 PP(pp_isa) 7181 { 7182 dSP; 7183 SV *left, *right; 7184 7185 right = POPs; 7186 left = TOPs; 7187 7188 SETs(boolSV(sv_isa_sv(left, right))); 7189 RETURN; 7190 } 7191 7192 PP(pp_cmpchain_and) 7193 { 7194 dSP; 7195 SV *result = POPs; 7196 PUTBACK; 7197 if (SvTRUE_NN(result)) { 7198 return cLOGOP->op_other; 7199 } else { 7200 TOPs = result; 7201 return NORMAL; 7202 } 7203 } 7204 7205 PP(pp_cmpchain_dup) 7206 { 7207 dSP; 7208 SV *right = TOPs; 7209 SV *left = TOPm1s; 7210 TOPm1s = right; 7211 TOPs = left; 7212 XPUSHs(right); 7213 RETURN; 7214 } 7215 7216 PP(pp_is_bool) 7217 { 7218 dSP; 7219 dTARGET; 7220 SV *arg = POPs; 7221 7222 SvGETMAGIC(arg); 7223 7224 sv_setbool_mg(TARG, SvIsBOOL(arg)); 7225 PUSHs(TARG); 7226 RETURN; 7227 } 7228 7229 PP(pp_is_weak) 7230 { 7231 dSP; 7232 dTARGET; 7233 SV *arg = POPs; 7234 7235 SvGETMAGIC(arg); 7236 7237 sv_setbool_mg(TARG, SvROK(arg) && SvWEAKREF(arg)); 7238 PUSHs(TARG); 7239 RETURN; 7240 } 7241 7242 PP(pp_weaken) 7243 { 7244 dSP; 7245 SV *arg = POPs; 7246 7247 sv_rvweaken(arg); 7248 RETURN; 7249 } 7250 7251 PP(pp_unweaken) 7252 { 7253 dSP; 7254 SV *arg = POPs; 7255 7256 sv_rvunweaken(arg); 7257 RETURN; 7258 } 7259 7260 PP(pp_blessed) 7261 { 7262 dSP; 7263 SV *arg = TOPs; 7264 SV *rv; 7265 7266 SvGETMAGIC(arg); 7267 7268 if(!SvROK(arg) || !SvOBJECT((rv = SvRV(arg)))) { 7269 SETs(&PL_sv_undef); 7270 RETURN; 7271 } 7272 7273 if((PL_op->op_private & OPpTRUEBOOL) || 7274 ((PL_op->op_private & OPpMAYBE_TRUEBOOL) && (block_gimme() == G_VOID))) { 7275 /* We only care about the boolean truth, not the specific string value. 7276 * We just have to check for the annoying cornercase of the package 7277 * named "0" */ 7278 HV *stash = SvSTASH(rv); 7279 HEK *hek = HvNAME_HEK(stash); 7280 if(!hek) 7281 goto fallback; 7282 I32 len = HEK_LEN(hek); 7283 if(UNLIKELY(len == HEf_SVKEY || (len == 1 && HEK_KEY(hek)[0] == '0'))) 7284 goto fallback; 7285 7286 SETs(&PL_sv_yes); 7287 } 7288 else { 7289 fallback: 7290 SETs(sv_ref(NULL, rv, TRUE)); 7291 } 7292 7293 RETURN; 7294 } 7295 7296 PP(pp_refaddr) 7297 { 7298 dSP; 7299 dTARGET; 7300 SV *arg = POPs; 7301 7302 SvGETMAGIC(arg); 7303 7304 if(SvROK(arg)) 7305 sv_setuv_mg(TARG, PTR2UV(SvRV(arg))); 7306 else 7307 sv_setsv(TARG, &PL_sv_undef); 7308 7309 PUSHs(TARG); 7310 RETURN; 7311 } 7312 7313 PP(pp_reftype) 7314 { 7315 dSP; 7316 dTARGET; 7317 SV *arg = POPs; 7318 7319 SvGETMAGIC(arg); 7320 7321 if(SvROK(arg)) 7322 sv_setpv_mg(TARG, sv_reftype(SvRV(arg), FALSE)); 7323 else 7324 sv_setsv(TARG, &PL_sv_undef); 7325 7326 PUSHs(TARG); 7327 RETURN; 7328 } 7329 7330 PP(pp_ceil) 7331 { 7332 dSP; 7333 dTARGET; 7334 PUSHn(Perl_ceil(POPn)); 7335 RETURN; 7336 } 7337 7338 PP(pp_floor) 7339 { 7340 dSP; 7341 dTARGET; 7342 PUSHn(Perl_floor(POPn)); 7343 RETURN; 7344 } 7345 7346 /* 7347 * ex: set ts=8 sts=4 sw=4 et: 7348 */ 7349