1 /* pp.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * 'It's a big house this, and very peculiar. Always a bit more 13 * to discover, and no knowing what you'll find round a corner. 14 * And Elves, sir!' --Samwise Gamgee 15 * 16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"] 17 */ 18 19 /* This file contains general pp ("push/pop") functions that execute the 20 * opcodes that make up a perl program. A typical pp function expects to 21 * find its arguments on the stack, and usually pushes its results onto 22 * the stack, hence the 'pp' terminology. Each OP structure contains 23 * a pointer to the relevant pp_foo() function. 24 */ 25 26 #include "EXTERN.h" 27 #define PERL_IN_PP_C 28 #include "perl.h" 29 #include "keywords.h" 30 31 #include "invlist_inline.h" 32 #include "reentr.h" 33 #include "regcharclass.h" 34 35 /* variations on pp_null */ 36 37 PP(pp_stub) 38 { 39 dSP; 40 if (GIMME_V == G_SCALAR) 41 XPUSHs(&PL_sv_undef); 42 RETURN; 43 } 44 45 /* Pushy stuff. */ 46 47 48 49 PP(pp_padcv) 50 { 51 dSP; dTARGET; 52 assert(SvTYPE(TARG) == SVt_PVCV); 53 XPUSHs(TARG); 54 RETURN; 55 } 56 57 PP(pp_introcv) 58 { 59 dTARGET; 60 SvPADSTALE_off(TARG); 61 return NORMAL; 62 } 63 64 PP(pp_clonecv) 65 { 66 dTARGET; 67 CV * const protocv = PadnamePROTOCV( 68 PadlistNAMESARRAY(CvPADLIST(find_runcv(NULL)))[ARGTARG] 69 ); 70 assert(SvTYPE(TARG) == SVt_PVCV); 71 assert(protocv); 72 if (CvISXSUB(protocv)) { /* constant */ 73 /* XXX Should we clone it here? */ 74 /* If this changes to use SAVECLEARSV, we can move the SAVECLEARSV 75 to introcv and remove the SvPADSTALE_off. */ 76 SAVEPADSVANDMORTALIZE(ARGTARG); 77 PAD_SVl(ARGTARG) = SvREFCNT_inc_simple_NN(protocv); 78 } 79 else { 80 if (CvROOT(protocv)) { 81 assert(CvCLONE(protocv)); 82 assert(!CvCLONED(protocv)); 83 } 84 cv_clone_into(protocv,(CV *)TARG); 85 SAVECLEARSV(PAD_SVl(ARGTARG)); 86 } 87 return NORMAL; 88 } 89 90 /* Translations. */ 91 92 /* In some cases this function inspects PL_op. If this function is called 93 for new op types, more bool parameters may need to be added in place of 94 the checks. 95 96 When noinit is true, the absence of a gv will cause a retval of undef. 97 This is unrelated to the cv-to-gv assignment case. 98 */ 99 100 static SV * 101 S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict, 102 const bool noinit) 103 { 104 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv); 105 if (SvROK(sv)) { 106 if (SvAMAGIC(sv)) { 107 sv = amagic_deref_call(sv, to_gv_amg); 108 } 109 wasref: 110 sv = SvRV(sv); 111 if (SvTYPE(sv) == SVt_PVIO) { 112 GV * const gv = MUTABLE_GV(sv_newmortal()); 113 gv_init(gv, 0, "__ANONIO__", 10, 0); 114 GvIOp(gv) = MUTABLE_IO(sv); 115 SvREFCNT_inc_void_NN(sv); 116 sv = MUTABLE_SV(gv); 117 } 118 else if (!isGV_with_GP(sv)) { 119 Perl_die(aTHX_ "Not a GLOB reference"); 120 } 121 } 122 else { 123 if (!isGV_with_GP(sv)) { 124 if (!SvOK(sv)) { 125 /* If this is a 'my' scalar and flag is set then vivify 126 * NI-S 1999/05/07 127 */ 128 if (vivify_sv && sv != &PL_sv_undef) { 129 GV *gv; 130 HV *stash; 131 if (SvREADONLY(sv)) 132 Perl_croak_no_modify(); 133 gv = MUTABLE_GV(newSV(0)); 134 stash = CopSTASH(PL_curcop); 135 if (SvTYPE(stash) != SVt_PVHV) stash = NULL; 136 if (cUNOP->op_targ) { 137 SV * const namesv = PAD_SV(cUNOP->op_targ); 138 gv_init_sv(gv, stash, namesv, 0); 139 } 140 else { 141 gv_init_pv(gv, stash, "__ANONIO__", 0); 142 } 143 prepare_SV_for_RV(sv); 144 SvRV_set(sv, MUTABLE_SV(gv)); 145 SvROK_on(sv); 146 SvSETMAGIC(sv); 147 goto wasref; 148 } 149 if (PL_op->op_flags & OPf_REF || strict) { 150 Perl_die(aTHX_ PL_no_usym, "a symbol"); 151 } 152 if (ckWARN(WARN_UNINITIALIZED)) 153 report_uninit(sv); 154 return &PL_sv_undef; 155 } 156 if (noinit) 157 { 158 if (!(sv = MUTABLE_SV(gv_fetchsv_nomg( 159 sv, GV_ADDMG, SVt_PVGV 160 )))) 161 return &PL_sv_undef; 162 } 163 else { 164 if (strict) { 165 Perl_die(aTHX_ 166 PL_no_symref_sv, 167 sv, 168 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""), 169 "a symbol" 170 ); 171 } 172 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV)) 173 == OPpDONT_INIT_GV) { 174 /* We are the target of a coderef assignment. Return 175 the scalar unchanged, and let pp_sasssign deal with 176 things. */ 177 return sv; 178 } 179 sv = MUTABLE_SV(gv_fetchsv_nomg(sv, GV_ADD, SVt_PVGV)); 180 } 181 /* FAKE globs in the symbol table cause weird bugs (#77810) */ 182 SvFAKE_off(sv); 183 } 184 } 185 if (SvFAKE(sv) && !(PL_op->op_private & OPpALLOW_FAKE)) { 186 SV *newsv = sv_newmortal(); 187 sv_setsv_flags(newsv, sv, 0); 188 SvFAKE_off(newsv); 189 sv = newsv; 190 } 191 return sv; 192 } 193 194 PP(pp_rv2gv) 195 { 196 dSP; dTOPss; 197 198 sv = S_rv2gv(aTHX_ 199 sv, PL_op->op_private & OPpDEREF, 200 PL_op->op_private & HINT_STRICT_REFS, 201 ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD)) 202 || PL_op->op_type == OP_READLINE 203 ); 204 if (PL_op->op_private & OPpLVAL_INTRO) 205 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL)); 206 SETs(sv); 207 RETURN; 208 } 209 210 /* Helper function for pp_rv2sv and pp_rv2av */ 211 GV * 212 Perl_softref2xv(pTHX_ SV *const sv, const char *const what, 213 const svtype type, SV ***spp) 214 { 215 GV *gv; 216 217 PERL_ARGS_ASSERT_SOFTREF2XV; 218 219 if (PL_op->op_private & HINT_STRICT_REFS) { 220 if (SvOK(sv)) 221 Perl_die(aTHX_ PL_no_symref_sv, sv, 222 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""), what); 223 else 224 Perl_die(aTHX_ PL_no_usym, what); 225 } 226 if (!SvOK(sv)) { 227 if ( 228 PL_op->op_flags & OPf_REF 229 ) 230 Perl_die(aTHX_ PL_no_usym, what); 231 if (ckWARN(WARN_UNINITIALIZED)) 232 report_uninit(sv); 233 if (type != SVt_PV && GIMME_V == G_ARRAY) { 234 (*spp)--; 235 return NULL; 236 } 237 **spp = &PL_sv_undef; 238 return NULL; 239 } 240 if ((PL_op->op_flags & OPf_SPECIAL) && 241 !(PL_op->op_flags & OPf_MOD)) 242 { 243 if (!(gv = gv_fetchsv_nomg(sv, GV_ADDMG, type))) 244 { 245 **spp = &PL_sv_undef; 246 return NULL; 247 } 248 } 249 else { 250 gv = gv_fetchsv_nomg(sv, GV_ADD, type); 251 } 252 return gv; 253 } 254 255 PP(pp_rv2sv) 256 { 257 dSP; dTOPss; 258 GV *gv = NULL; 259 260 SvGETMAGIC(sv); 261 if (SvROK(sv)) { 262 if (SvAMAGIC(sv)) { 263 sv = amagic_deref_call(sv, to_sv_amg); 264 } 265 266 sv = SvRV(sv); 267 if (SvTYPE(sv) >= SVt_PVAV) 268 DIE(aTHX_ "Not a SCALAR reference"); 269 } 270 else { 271 gv = MUTABLE_GV(sv); 272 273 if (!isGV_with_GP(gv)) { 274 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp); 275 if (!gv) 276 RETURN; 277 } 278 sv = GvSVn(gv); 279 } 280 if (PL_op->op_flags & OPf_MOD) { 281 if (PL_op->op_private & OPpLVAL_INTRO) { 282 if (cUNOP->op_first->op_type == OP_NULL) 283 sv = save_scalar(MUTABLE_GV(TOPs)); 284 else if (gv) 285 sv = save_scalar(gv); 286 else 287 Perl_croak(aTHX_ "%s", PL_no_localize_ref); 288 } 289 else if (PL_op->op_private & OPpDEREF) 290 sv = vivify_ref(sv, PL_op->op_private & OPpDEREF); 291 } 292 SPAGAIN; /* in case chasing soft refs reallocated the stack */ 293 SETs(sv); 294 RETURN; 295 } 296 297 PP(pp_av2arylen) 298 { 299 dSP; 300 AV * const av = MUTABLE_AV(TOPs); 301 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; 302 if (lvalue) { 303 SV ** const svp = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av)); 304 if (!*svp) { 305 *svp = newSV_type(SVt_PVMG); 306 sv_magic(*svp, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0); 307 } 308 SETs(*svp); 309 } else { 310 SETs(sv_2mortal(newSViv(AvFILL(MUTABLE_AV(av))))); 311 } 312 RETURN; 313 } 314 315 PP(pp_pos) 316 { 317 dSP; dTOPss; 318 319 if (PL_op->op_flags & OPf_MOD || LVRET) { 320 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV));/* Not TARG RT#67838 */ 321 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0); 322 LvTYPE(ret) = '.'; 323 LvTARG(ret) = SvREFCNT_inc_simple(sv); 324 SETs(ret); /* no SvSETMAGIC */ 325 } 326 else { 327 const MAGIC * const mg = mg_find_mglob(sv); 328 if (mg && mg->mg_len != -1) { 329 STRLEN i = mg->mg_len; 330 if (PL_op->op_private & OPpTRUEBOOL) 331 SETs(i ? &PL_sv_yes : &PL_sv_zero); 332 else { 333 dTARGET; 334 if (mg->mg_flags & MGf_BYTES && DO_UTF8(sv)) 335 i = sv_pos_b2u_flags(sv, i, SV_GMAGIC|SV_CONST_RETURN); 336 SETu(i); 337 } 338 return NORMAL; 339 } 340 SETs(&PL_sv_undef); 341 } 342 return NORMAL; 343 } 344 345 PP(pp_rv2cv) 346 { 347 dSP; 348 GV *gv; 349 HV *stash_unused; 350 const I32 flags = (PL_op->op_flags & OPf_SPECIAL) 351 ? GV_ADDMG 352 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) 353 == OPpMAY_RETURN_CONSTANT) 354 ? GV_ADD|GV_NOEXPAND 355 : GV_ADD; 356 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */ 357 /* (But not in defined().) */ 358 359 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags); 360 if (cv) NOOP; 361 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) { 362 cv = SvTYPE(SvRV(gv)) == SVt_PVCV 363 ? MUTABLE_CV(SvRV(gv)) 364 : MUTABLE_CV(gv); 365 } 366 else 367 cv = MUTABLE_CV(&PL_sv_undef); 368 SETs(MUTABLE_SV(cv)); 369 return NORMAL; 370 } 371 372 PP(pp_prototype) 373 { 374 dSP; 375 CV *cv; 376 HV *stash; 377 GV *gv; 378 SV *ret = &PL_sv_undef; 379 380 if (SvGMAGICAL(TOPs)) SETs(sv_mortalcopy(TOPs)); 381 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) { 382 const char * s = SvPVX_const(TOPs); 383 if (memBEGINs(s, SvCUR(TOPs), "CORE::")) { 384 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1); 385 if (!code) 386 DIE(aTHX_ "Can't find an opnumber for \"%" UTF8f "\"", 387 UTF8fARG(SvFLAGS(TOPs) & SVf_UTF8, SvCUR(TOPs)-6, s+6)); 388 { 389 SV * const sv = core_prototype(NULL, s + 6, code, NULL); 390 if (sv) ret = sv; 391 } 392 goto set; 393 } 394 } 395 cv = sv_2cv(TOPs, &stash, &gv, 0); 396 if (cv && SvPOK(cv)) 397 ret = newSVpvn_flags( 398 CvPROTO(cv), CvPROTOLEN(cv), SVs_TEMP | SvUTF8(cv) 399 ); 400 set: 401 SETs(ret); 402 RETURN; 403 } 404 405 PP(pp_anoncode) 406 { 407 dSP; 408 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ)); 409 if (CvCLONE(cv)) 410 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv)))); 411 EXTEND(SP,1); 412 PUSHs(MUTABLE_SV(cv)); 413 RETURN; 414 } 415 416 PP(pp_srefgen) 417 { 418 dSP; 419 *SP = refto(*SP); 420 return NORMAL; 421 } 422 423 PP(pp_refgen) 424 { 425 dSP; dMARK; 426 if (GIMME_V != G_ARRAY) { 427 if (++MARK <= SP) 428 *MARK = *SP; 429 else 430 { 431 MEXTEND(SP, 1); 432 *MARK = &PL_sv_undef; 433 } 434 *MARK = refto(*MARK); 435 SP = MARK; 436 RETURN; 437 } 438 EXTEND_MORTAL(SP - MARK); 439 while (++MARK <= SP) 440 *MARK = refto(*MARK); 441 RETURN; 442 } 443 444 STATIC SV* 445 S_refto(pTHX_ SV *sv) 446 { 447 SV* rv; 448 449 PERL_ARGS_ASSERT_REFTO; 450 451 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') { 452 if (LvTARGLEN(sv)) 453 vivify_defelem(sv); 454 if (!(sv = LvTARG(sv))) 455 sv = &PL_sv_undef; 456 else 457 SvREFCNT_inc_void_NN(sv); 458 } 459 else if (SvTYPE(sv) == SVt_PVAV) { 460 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv)) 461 av_reify(MUTABLE_AV(sv)); 462 SvTEMP_off(sv); 463 SvREFCNT_inc_void_NN(sv); 464 } 465 else if (SvPADTMP(sv)) { 466 sv = newSVsv(sv); 467 } 468 else if (UNLIKELY(SvSMAGICAL(sv) && mg_find(sv, PERL_MAGIC_nonelem))) 469 sv_unmagic(SvREFCNT_inc_simple_NN(sv), PERL_MAGIC_nonelem); 470 else { 471 SvTEMP_off(sv); 472 SvREFCNT_inc_void_NN(sv); 473 } 474 rv = sv_newmortal(); 475 sv_upgrade(rv, SVt_IV); 476 SvRV_set(rv, sv); 477 SvROK_on(rv); 478 return rv; 479 } 480 481 PP(pp_ref) 482 { 483 dSP; 484 SV * const sv = TOPs; 485 486 SvGETMAGIC(sv); 487 if (!SvROK(sv)) { 488 SETs(&PL_sv_no); 489 return NORMAL; 490 } 491 492 /* op is in boolean context? */ 493 if ( (PL_op->op_private & OPpTRUEBOOL) 494 || ( (PL_op->op_private & OPpMAYBE_TRUEBOOL) 495 && block_gimme() == G_VOID)) 496 { 497 /* refs are always true - unless it's to an object blessed into a 498 * class with a false name, i.e. "0". So we have to check for 499 * that remote possibility. The following is is basically an 500 * unrolled SvTRUE(sv_reftype(rv)) */ 501 SV * const rv = SvRV(sv); 502 if (SvOBJECT(rv)) { 503 HV *stash = SvSTASH(rv); 504 HEK *hek = HvNAME_HEK(stash); 505 if (hek) { 506 I32 len = HEK_LEN(hek); 507 /* bail out and do it the hard way? */ 508 if (UNLIKELY( 509 len == HEf_SVKEY 510 || (len == 1 && HEK_KEY(hek)[0] == '0') 511 )) 512 goto do_sv_ref; 513 } 514 } 515 SETs(&PL_sv_yes); 516 return NORMAL; 517 } 518 519 do_sv_ref: 520 { 521 dTARGET; 522 SETs(TARG); 523 sv_ref(TARG, SvRV(sv), TRUE); 524 SvSETMAGIC(TARG); 525 return NORMAL; 526 } 527 528 } 529 530 531 PP(pp_bless) 532 { 533 dSP; 534 HV *stash; 535 536 if (MAXARG == 1) 537 { 538 curstash: 539 stash = CopSTASH(PL_curcop); 540 if (SvTYPE(stash) != SVt_PVHV) 541 Perl_croak(aTHX_ "Attempt to bless into a freed package"); 542 } 543 else { 544 SV * const ssv = POPs; 545 STRLEN len; 546 const char *ptr; 547 548 if (!ssv) goto curstash; 549 SvGETMAGIC(ssv); 550 if (SvROK(ssv)) { 551 if (!SvAMAGIC(ssv)) { 552 frog: 553 Perl_croak(aTHX_ "Attempt to bless into a reference"); 554 } 555 /* SvAMAGIC is on here, but it only means potentially overloaded, 556 so after stringification: */ 557 ptr = SvPV_nomg_const(ssv,len); 558 /* We need to check the flag again: */ 559 if (!SvAMAGIC(ssv)) goto frog; 560 } 561 else ptr = SvPV_nomg_const(ssv,len); 562 if (len == 0) 563 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 564 "Explicit blessing to '' (assuming package main)"); 565 stash = gv_stashpvn(ptr, len, GV_ADD|SvUTF8(ssv)); 566 } 567 568 (void)sv_bless(TOPs, stash); 569 RETURN; 570 } 571 572 PP(pp_gelem) 573 { 574 dSP; 575 576 SV *sv = POPs; 577 STRLEN len; 578 const char * const elem = SvPV_const(sv, len); 579 GV * const gv = MUTABLE_GV(TOPs); 580 SV * tmpRef = NULL; 581 582 sv = NULL; 583 if (elem) { 584 /* elem will always be NUL terminated. */ 585 switch (*elem) { 586 case 'A': 587 if (memEQs(elem, len, "ARRAY")) 588 { 589 tmpRef = MUTABLE_SV(GvAV(gv)); 590 if (tmpRef && !AvREAL((const AV *)tmpRef) 591 && AvREIFY((const AV *)tmpRef)) 592 av_reify(MUTABLE_AV(tmpRef)); 593 } 594 break; 595 case 'C': 596 if (memEQs(elem, len, "CODE")) 597 tmpRef = MUTABLE_SV(GvCVu(gv)); 598 break; 599 case 'F': 600 if (memEQs(elem, len, "FILEHANDLE")) { 601 tmpRef = MUTABLE_SV(GvIOp(gv)); 602 } 603 else 604 if (memEQs(elem, len, "FORMAT")) 605 tmpRef = MUTABLE_SV(GvFORM(gv)); 606 break; 607 case 'G': 608 if (memEQs(elem, len, "GLOB")) 609 tmpRef = MUTABLE_SV(gv); 610 break; 611 case 'H': 612 if (memEQs(elem, len, "HASH")) 613 tmpRef = MUTABLE_SV(GvHV(gv)); 614 break; 615 case 'I': 616 if (memEQs(elem, len, "IO")) 617 tmpRef = MUTABLE_SV(GvIOp(gv)); 618 break; 619 case 'N': 620 if (memEQs(elem, len, "NAME")) 621 sv = newSVhek(GvNAME_HEK(gv)); 622 break; 623 case 'P': 624 if (memEQs(elem, len, "PACKAGE")) { 625 const HV * const stash = GvSTASH(gv); 626 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL; 627 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__"); 628 } 629 break; 630 case 'S': 631 if (memEQs(elem, len, "SCALAR")) 632 tmpRef = GvSVn(gv); 633 break; 634 } 635 } 636 if (tmpRef) 637 sv = newRV(tmpRef); 638 if (sv) 639 sv_2mortal(sv); 640 else 641 sv = &PL_sv_undef; 642 SETs(sv); 643 RETURN; 644 } 645 646 /* Pattern matching */ 647 648 PP(pp_study) 649 { 650 dSP; dTOPss; 651 STRLEN len; 652 653 (void)SvPV(sv, len); 654 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) { 655 /* Historically, study was skipped in these cases. */ 656 SETs(&PL_sv_no); 657 return NORMAL; 658 } 659 660 /* Make study a no-op. It's no longer useful and its existence 661 complicates matters elsewhere. */ 662 SETs(&PL_sv_yes); 663 return NORMAL; 664 } 665 666 667 /* also used for: pp_transr() */ 668 669 PP(pp_trans) 670 { 671 dSP; 672 SV *sv; 673 674 if (PL_op->op_flags & OPf_STACKED) 675 sv = POPs; 676 else { 677 EXTEND(SP,1); 678 if (ARGTARG) 679 sv = PAD_SV(ARGTARG); 680 else { 681 sv = DEFSV; 682 } 683 } 684 if(PL_op->op_type == OP_TRANSR) { 685 STRLEN len; 686 const char * const pv = SvPV(sv,len); 687 SV * const newsv = newSVpvn_flags(pv, len, SVs_TEMP|SvUTF8(sv)); 688 do_trans(newsv); 689 PUSHs(newsv); 690 } 691 else { 692 Size_t i = do_trans(sv); 693 mPUSHi((UV)i); 694 } 695 RETURN; 696 } 697 698 /* Lvalue operators. */ 699 700 static size_t 701 S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping) 702 { 703 STRLEN len; 704 char *s; 705 size_t count = 0; 706 707 PERL_ARGS_ASSERT_DO_CHOMP; 708 709 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs))) 710 return 0; 711 if (SvTYPE(sv) == SVt_PVAV) { 712 I32 i; 713 AV *const av = MUTABLE_AV(sv); 714 const I32 max = AvFILL(av); 715 716 for (i = 0; i <= max; i++) { 717 sv = MUTABLE_SV(av_fetch(av, i, FALSE)); 718 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef)) 719 count += do_chomp(retval, sv, chomping); 720 } 721 return count; 722 } 723 else if (SvTYPE(sv) == SVt_PVHV) { 724 HV* const hv = MUTABLE_HV(sv); 725 HE* entry; 726 (void)hv_iterinit(hv); 727 while ((entry = hv_iternext(hv))) 728 count += do_chomp(retval, hv_iterval(hv,entry), chomping); 729 return count; 730 } 731 else if (SvREADONLY(sv)) { 732 Perl_croak_no_modify(); 733 } 734 735 s = SvPV(sv, len); 736 if (chomping) { 737 if (s && len) { 738 char *temp_buffer = NULL; 739 SV *svrecode = NULL; 740 s += --len; 741 if (RsPARA(PL_rs)) { 742 if (*s != '\n') 743 goto nope_free_nothing; 744 ++count; 745 while (len && s[-1] == '\n') { 746 --len; 747 --s; 748 ++count; 749 } 750 } 751 else { 752 STRLEN rslen, rs_charlen; 753 const char *rsptr = SvPV_const(PL_rs, rslen); 754 755 rs_charlen = SvUTF8(PL_rs) 756 ? sv_len_utf8(PL_rs) 757 : rslen; 758 759 if (SvUTF8(PL_rs) != SvUTF8(sv)) { 760 /* Assumption is that rs is shorter than the scalar. */ 761 if (SvUTF8(PL_rs)) { 762 /* RS is utf8, scalar is 8 bit. */ 763 bool is_utf8 = TRUE; 764 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr, 765 &rslen, &is_utf8); 766 if (is_utf8) { 767 /* Cannot downgrade, therefore cannot possibly match. 768 At this point, temp_buffer is not alloced, and 769 is the buffer inside PL_rs, so dont free it. 770 */ 771 assert (temp_buffer == rsptr); 772 goto nope_free_sv; 773 } 774 rsptr = temp_buffer; 775 } 776 else { 777 /* RS is 8 bit, scalar is utf8. */ 778 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen); 779 rsptr = temp_buffer; 780 } 781 } 782 if (rslen == 1) { 783 if (*s != *rsptr) 784 goto nope_free_all; 785 ++count; 786 } 787 else { 788 if (len < rslen - 1) 789 goto nope_free_all; 790 len -= rslen - 1; 791 s -= rslen - 1; 792 if (memNE(s, rsptr, rslen)) 793 goto nope_free_all; 794 count += rs_charlen; 795 } 796 } 797 SvPV_force_nomg_nolen(sv); 798 SvCUR_set(sv, len); 799 *SvEND(sv) = '\0'; 800 SvNIOK_off(sv); 801 SvSETMAGIC(sv); 802 803 nope_free_all: 804 Safefree(temp_buffer); 805 nope_free_sv: 806 SvREFCNT_dec(svrecode); 807 nope_free_nothing: ; 808 } 809 } else { 810 if (len && (!SvPOK(sv) || SvIsCOW(sv))) 811 s = SvPV_force_nomg(sv, len); 812 if (DO_UTF8(sv)) { 813 if (s && len) { 814 char * const send = s + len; 815 char * const start = s; 816 s = send - 1; 817 while (s > start && UTF8_IS_CONTINUATION(*s)) 818 s--; 819 if (is_utf8_string((U8*)s, send - s)) { 820 sv_setpvn(retval, s, send - s); 821 *s = '\0'; 822 SvCUR_set(sv, s - start); 823 SvNIOK_off(sv); 824 SvUTF8_on(retval); 825 } 826 } 827 else 828 SvPVCLEAR(retval); 829 } 830 else if (s && len) { 831 s += --len; 832 sv_setpvn(retval, s, 1); 833 *s = '\0'; 834 SvCUR_set(sv, len); 835 SvUTF8_off(sv); 836 SvNIOK_off(sv); 837 } 838 else 839 SvPVCLEAR(retval); 840 SvSETMAGIC(sv); 841 } 842 return count; 843 } 844 845 846 /* also used for: pp_schomp() */ 847 848 PP(pp_schop) 849 { 850 dSP; dTARGET; 851 const bool chomping = PL_op->op_type == OP_SCHOMP; 852 853 const size_t count = do_chomp(TARG, TOPs, chomping); 854 if (chomping) 855 sv_setiv(TARG, count); 856 SETTARG; 857 return NORMAL; 858 } 859 860 861 /* also used for: pp_chomp() */ 862 863 PP(pp_chop) 864 { 865 dSP; dMARK; dTARGET; dORIGMARK; 866 const bool chomping = PL_op->op_type == OP_CHOMP; 867 size_t count = 0; 868 869 while (MARK < SP) 870 count += do_chomp(TARG, *++MARK, chomping); 871 if (chomping) 872 sv_setiv(TARG, count); 873 SP = ORIGMARK; 874 XPUSHTARG; 875 RETURN; 876 } 877 878 PP(pp_undef) 879 { 880 dSP; 881 SV *sv; 882 883 if (!PL_op->op_private) { 884 EXTEND(SP, 1); 885 RETPUSHUNDEF; 886 } 887 888 sv = TOPs; 889 if (!sv) 890 { 891 SETs(&PL_sv_undef); 892 return NORMAL; 893 } 894 895 if (SvTHINKFIRST(sv)) 896 sv_force_normal_flags(sv, SV_COW_DROP_PV|SV_IMMEDIATE_UNREF); 897 898 switch (SvTYPE(sv)) { 899 case SVt_NULL: 900 break; 901 case SVt_PVAV: 902 av_undef(MUTABLE_AV(sv)); 903 break; 904 case SVt_PVHV: 905 hv_undef(MUTABLE_HV(sv)); 906 break; 907 case SVt_PVCV: 908 if (cv_const_sv((const CV *)sv)) 909 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 910 "Constant subroutine %" SVf " undefined", 911 SVfARG(CvANON((const CV *)sv) 912 ? newSVpvs_flags("(anonymous)", SVs_TEMP) 913 : sv_2mortal(newSVhek( 914 CvNAMED(sv) 915 ? CvNAME_HEK((CV *)sv) 916 : GvENAME_HEK(CvGV((const CV *)sv)) 917 )) 918 )); 919 /* FALLTHROUGH */ 920 case SVt_PVFM: 921 /* let user-undef'd sub keep its identity */ 922 cv_undef_flags(MUTABLE_CV(sv), CV_UNDEF_KEEP_NAME); 923 break; 924 case SVt_PVGV: 925 assert(isGV_with_GP(sv)); 926 assert(!SvFAKE(sv)); 927 { 928 GP *gp; 929 HV *stash; 930 931 /* undef *Pkg::meth_name ... */ 932 bool method_changed 933 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv)) 934 && HvENAME_get(stash); 935 /* undef *Foo:: */ 936 if((stash = GvHV((const GV *)sv))) { 937 if(HvENAME_get(stash)) 938 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash)); 939 else stash = NULL; 940 } 941 942 SvREFCNT_inc_simple_void_NN(sv_2mortal(sv)); 943 gp_free(MUTABLE_GV(sv)); 944 Newxz(gp, 1, GP); 945 GvGP_set(sv, gp_ref(gp)); 946 #ifndef PERL_DONT_CREATE_GVSV 947 GvSV(sv) = newSV(0); 948 #endif 949 GvLINE(sv) = CopLINE(PL_curcop); 950 GvEGV(sv) = MUTABLE_GV(sv); 951 GvMULTI_on(sv); 952 953 if(stash) 954 mro_package_moved(NULL, stash, (const GV *)sv, 0); 955 stash = NULL; 956 /* undef *Foo::ISA */ 957 if( strEQ(GvNAME((const GV *)sv), "ISA") 958 && (stash = GvSTASH((const GV *)sv)) 959 && (method_changed || HvENAME(stash)) ) 960 mro_isa_changed_in(stash); 961 else if(method_changed) 962 mro_method_changed_in( 963 GvSTASH((const GV *)sv) 964 ); 965 966 break; 967 } 968 default: 969 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) { 970 SvPV_free(sv); 971 SvPV_set(sv, NULL); 972 SvLEN_set(sv, 0); 973 } 974 SvOK_off(sv); 975 SvSETMAGIC(sv); 976 } 977 978 SETs(&PL_sv_undef); 979 return NORMAL; 980 } 981 982 983 /* common "slow" code for pp_postinc and pp_postdec */ 984 985 static OP * 986 S_postincdec_common(pTHX_ SV *sv, SV *targ) 987 { 988 dSP; 989 const bool inc = 990 PL_op->op_type == OP_POSTINC || PL_op->op_type == OP_I_POSTINC; 991 992 if (SvROK(sv)) 993 TARG = sv_newmortal(); 994 sv_setsv(TARG, sv); 995 if (inc) 996 sv_inc_nomg(sv); 997 else 998 sv_dec_nomg(sv); 999 SvSETMAGIC(sv); 1000 /* special case for undef: see thread at 2003-03/msg00536.html in archive */ 1001 if (inc && !SvOK(TARG)) 1002 sv_setiv(TARG, 0); 1003 SETTARG; 1004 return NORMAL; 1005 } 1006 1007 1008 /* also used for: pp_i_postinc() */ 1009 1010 PP(pp_postinc) 1011 { 1012 dSP; dTARGET; 1013 SV *sv = TOPs; 1014 1015 /* special-case sv being a simple integer */ 1016 if (LIKELY(((sv->sv_flags & 1017 (SVf_THINKFIRST|SVs_GMG|SVf_IVisUV| 1018 SVf_IOK|SVf_NOK|SVf_POK|SVp_NOK|SVp_POK|SVf_ROK)) 1019 == SVf_IOK)) 1020 && SvIVX(sv) != IV_MAX) 1021 { 1022 IV iv = SvIVX(sv); 1023 SvIV_set(sv, iv + 1); 1024 TARGi(iv, 0); /* arg not GMG, so can't be tainted */ 1025 SETs(TARG); 1026 return NORMAL; 1027 } 1028 1029 return S_postincdec_common(aTHX_ sv, TARG); 1030 } 1031 1032 1033 /* also used for: pp_i_postdec() */ 1034 1035 PP(pp_postdec) 1036 { 1037 dSP; dTARGET; 1038 SV *sv = TOPs; 1039 1040 /* special-case sv being a simple integer */ 1041 if (LIKELY(((sv->sv_flags & 1042 (SVf_THINKFIRST|SVs_GMG|SVf_IVisUV| 1043 SVf_IOK|SVf_NOK|SVf_POK|SVp_NOK|SVp_POK|SVf_ROK)) 1044 == SVf_IOK)) 1045 && SvIVX(sv) != IV_MIN) 1046 { 1047 IV iv = SvIVX(sv); 1048 SvIV_set(sv, iv - 1); 1049 TARGi(iv, 0); /* arg not GMG, so can't be tainted */ 1050 SETs(TARG); 1051 return NORMAL; 1052 } 1053 1054 return S_postincdec_common(aTHX_ sv, TARG); 1055 } 1056 1057 1058 /* Ordinary operators. */ 1059 1060 PP(pp_pow) 1061 { 1062 dSP; dATARGET; SV *svl, *svr; 1063 #ifdef PERL_PRESERVE_IVUV 1064 bool is_int = 0; 1065 #endif 1066 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric); 1067 svr = TOPs; 1068 svl = TOPm1s; 1069 #ifdef PERL_PRESERVE_IVUV 1070 /* For integer to integer power, we do the calculation by hand wherever 1071 we're sure it is safe; otherwise we call pow() and try to convert to 1072 integer afterwards. */ 1073 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) { 1074 UV power; 1075 bool baseuok; 1076 UV baseuv; 1077 1078 if (SvUOK(svr)) { 1079 power = SvUVX(svr); 1080 } else { 1081 const IV iv = SvIVX(svr); 1082 if (iv >= 0) { 1083 power = iv; 1084 } else { 1085 goto float_it; /* Can't do negative powers this way. */ 1086 } 1087 } 1088 1089 baseuok = SvUOK(svl); 1090 if (baseuok) { 1091 baseuv = SvUVX(svl); 1092 } else { 1093 const IV iv = SvIVX(svl); 1094 if (iv >= 0) { 1095 baseuv = iv; 1096 baseuok = TRUE; /* effectively it's a UV now */ 1097 } else { 1098 baseuv = -iv; /* abs, baseuok == false records sign */ 1099 } 1100 } 1101 /* now we have integer ** positive integer. */ 1102 is_int = 1; 1103 1104 /* foo & (foo - 1) is zero only for a power of 2. */ 1105 if (!(baseuv & (baseuv - 1))) { 1106 /* We are raising power-of-2 to a positive integer. 1107 The logic here will work for any base (even non-integer 1108 bases) but it can be less accurate than 1109 pow (base,power) or exp (power * log (base)) when the 1110 intermediate values start to spill out of the mantissa. 1111 With powers of 2 we know this can't happen. 1112 And powers of 2 are the favourite thing for perl 1113 programmers to notice ** not doing what they mean. */ 1114 NV result = 1.0; 1115 NV base = baseuok ? baseuv : -(NV)baseuv; 1116 1117 if (power & 1) { 1118 result *= base; 1119 } 1120 while (power >>= 1) { 1121 base *= base; 1122 if (power & 1) { 1123 result *= base; 1124 } 1125 } 1126 SP--; 1127 SETn( result ); 1128 SvIV_please_nomg(svr); 1129 RETURN; 1130 } else { 1131 unsigned int highbit = 8 * sizeof(UV); 1132 unsigned int diff = 8 * sizeof(UV); 1133 while (diff >>= 1) { 1134 highbit -= diff; 1135 if (baseuv >> highbit) { 1136 highbit += diff; 1137 } 1138 } 1139 /* we now have baseuv < 2 ** highbit */ 1140 if (power * highbit <= 8 * sizeof(UV)) { 1141 /* result will definitely fit in UV, so use UV math 1142 on same algorithm as above */ 1143 UV result = 1; 1144 UV base = baseuv; 1145 const bool odd_power = cBOOL(power & 1); 1146 if (odd_power) { 1147 result *= base; 1148 } 1149 while (power >>= 1) { 1150 base *= base; 1151 if (power & 1) { 1152 result *= base; 1153 } 1154 } 1155 SP--; 1156 if (baseuok || !odd_power) 1157 /* answer is positive */ 1158 SETu( result ); 1159 else if (result <= (UV)IV_MAX) 1160 /* answer negative, fits in IV */ 1161 SETi( -(IV)result ); 1162 else if (result == (UV)IV_MIN) 1163 /* 2's complement assumption: special case IV_MIN */ 1164 SETi( IV_MIN ); 1165 else 1166 /* answer negative, doesn't fit */ 1167 SETn( -(NV)result ); 1168 RETURN; 1169 } 1170 } 1171 } 1172 float_it: 1173 #endif 1174 { 1175 NV right = SvNV_nomg(svr); 1176 NV left = SvNV_nomg(svl); 1177 (void)POPs; 1178 1179 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG) 1180 /* 1181 We are building perl with long double support and are on an AIX OS 1182 afflicted with a powl() function that wrongly returns NaNQ for any 1183 negative base. This was reported to IBM as PMR #23047-379 on 1184 03/06/2006. The problem exists in at least the following versions 1185 of AIX and the libm fileset, and no doubt others as well: 1186 1187 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50 1188 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29 1189 AIX 5.2.0 bos.adt.libm 5.2.0.85 1190 1191 So, until IBM fixes powl(), we provide the following workaround to 1192 handle the problem ourselves. Our logic is as follows: for 1193 negative bases (left), we use fmod(right, 2) to check if the 1194 exponent is an odd or even integer: 1195 1196 - if odd, powl(left, right) == -powl(-left, right) 1197 - if even, powl(left, right) == powl(-left, right) 1198 1199 If the exponent is not an integer, the result is rightly NaNQ, so 1200 we just return that (as NV_NAN). 1201 */ 1202 1203 if (left < 0.0) { 1204 NV mod2 = Perl_fmod( right, 2.0 ); 1205 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */ 1206 SETn( -Perl_pow( -left, right) ); 1207 } else if (mod2 == 0.0) { /* even integer */ 1208 SETn( Perl_pow( -left, right) ); 1209 } else { /* fractional power */ 1210 SETn( NV_NAN ); 1211 } 1212 } else { 1213 SETn( Perl_pow( left, right) ); 1214 } 1215 #else 1216 SETn( Perl_pow( left, right) ); 1217 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */ 1218 1219 #ifdef PERL_PRESERVE_IVUV 1220 if (is_int) 1221 SvIV_please_nomg(svr); 1222 #endif 1223 RETURN; 1224 } 1225 } 1226 1227 PP(pp_multiply) 1228 { 1229 dSP; dATARGET; SV *svl, *svr; 1230 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric); 1231 svr = TOPs; 1232 svl = TOPm1s; 1233 1234 #ifdef PERL_PRESERVE_IVUV 1235 1236 /* special-case some simple common cases */ 1237 if (!((svl->sv_flags|svr->sv_flags) & (SVf_IVisUV|SVs_GMG))) { 1238 IV il, ir; 1239 U32 flags = (svl->sv_flags & svr->sv_flags); 1240 if (flags & SVf_IOK) { 1241 /* both args are simple IVs */ 1242 UV topl, topr; 1243 il = SvIVX(svl); 1244 ir = SvIVX(svr); 1245 do_iv: 1246 topl = ((UV)il) >> (UVSIZE * 4 - 1); 1247 topr = ((UV)ir) >> (UVSIZE * 4 - 1); 1248 1249 /* if both are in a range that can't under/overflow, do a 1250 * simple integer multiply: if the top halves(*) of both numbers 1251 * are 00...00 or 11...11, then it's safe. 1252 * (*) for 32-bits, the "top half" is the top 17 bits, 1253 * for 64-bits, its 33 bits */ 1254 if (!( 1255 ((topl+1) | (topr+1)) 1256 & ( (((UV)1) << (UVSIZE * 4 + 1)) - 2) /* 11..110 */ 1257 )) { 1258 SP--; 1259 TARGi(il * ir, 0); /* args not GMG, so can't be tainted */ 1260 SETs(TARG); 1261 RETURN; 1262 } 1263 goto generic; 1264 } 1265 else if (flags & SVf_NOK) { 1266 /* both args are NVs */ 1267 NV nl = SvNVX(svl); 1268 NV nr = SvNVX(svr); 1269 NV result; 1270 1271 if (lossless_NV_to_IV(nl, &il) && lossless_NV_to_IV(nr, &ir)) { 1272 /* nothing was lost by converting to IVs */ 1273 goto do_iv; 1274 } 1275 SP--; 1276 result = nl * nr; 1277 # if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16 1278 if (Perl_isinf(result)) { 1279 Zero((U8*)&result + 8, 8, U8); 1280 } 1281 # endif 1282 TARGn(result, 0); /* args not GMG, so can't be tainted */ 1283 SETs(TARG); 1284 RETURN; 1285 } 1286 } 1287 1288 generic: 1289 1290 if (SvIV_please_nomg(svr)) { 1291 /* Unless the left argument is integer in range we are going to have to 1292 use NV maths. Hence only attempt to coerce the right argument if 1293 we know the left is integer. */ 1294 /* Left operand is defined, so is it IV? */ 1295 if (SvIV_please_nomg(svl)) { 1296 bool auvok = SvUOK(svl); 1297 bool buvok = SvUOK(svr); 1298 const UV topmask = (~ (UV)0) << (4 * sizeof (UV)); 1299 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV))); 1300 UV alow; 1301 UV ahigh; 1302 UV blow; 1303 UV bhigh; 1304 1305 if (auvok) { 1306 alow = SvUVX(svl); 1307 } else { 1308 const IV aiv = SvIVX(svl); 1309 if (aiv >= 0) { 1310 alow = aiv; 1311 auvok = TRUE; /* effectively it's a UV now */ 1312 } else { 1313 /* abs, auvok == false records sign; Using 0- here and 1314 * later to silence bogus warning from MS VC */ 1315 alow = (UV) (0 - (UV) aiv); 1316 } 1317 } 1318 if (buvok) { 1319 blow = SvUVX(svr); 1320 } else { 1321 const IV biv = SvIVX(svr); 1322 if (biv >= 0) { 1323 blow = biv; 1324 buvok = TRUE; /* effectively it's a UV now */ 1325 } else { 1326 /* abs, buvok == false records sign */ 1327 blow = (UV) (0 - (UV) biv); 1328 } 1329 } 1330 1331 /* If this does sign extension on unsigned it's time for plan B */ 1332 ahigh = alow >> (4 * sizeof (UV)); 1333 alow &= botmask; 1334 bhigh = blow >> (4 * sizeof (UV)); 1335 blow &= botmask; 1336 if (ahigh && bhigh) { 1337 NOOP; 1338 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000 1339 which is overflow. Drop to NVs below. */ 1340 } else if (!ahigh && !bhigh) { 1341 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001 1342 so the unsigned multiply cannot overflow. */ 1343 const UV product = alow * blow; 1344 if (auvok == buvok) { 1345 /* -ve * -ve or +ve * +ve gives a +ve result. */ 1346 SP--; 1347 SETu( product ); 1348 RETURN; 1349 } else if (product <= (UV)IV_MIN) { 1350 /* 2s complement assumption that (UV)-IV_MIN is correct. */ 1351 /* -ve result, which could overflow an IV */ 1352 SP--; 1353 /* can't negate IV_MIN, but there are aren't two 1354 * integers such that !ahigh && !bhigh, where the 1355 * product equals 0x800....000 */ 1356 assert(product != (UV)IV_MIN); 1357 SETi( -(IV)product ); 1358 RETURN; 1359 } /* else drop to NVs below. */ 1360 } else { 1361 /* One operand is large, 1 small */ 1362 UV product_middle; 1363 if (bhigh) { 1364 /* swap the operands */ 1365 ahigh = bhigh; 1366 bhigh = blow; /* bhigh now the temp var for the swap */ 1367 blow = alow; 1368 alow = bhigh; 1369 } 1370 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow) 1371 multiplies can't overflow. shift can, add can, -ve can. */ 1372 product_middle = ahigh * blow; 1373 if (!(product_middle & topmask)) { 1374 /* OK, (ahigh * blow) won't lose bits when we shift it. */ 1375 UV product_low; 1376 product_middle <<= (4 * sizeof (UV)); 1377 product_low = alow * blow; 1378 1379 /* as for pp_add, UV + something mustn't get smaller. 1380 IIRC ANSI mandates this wrapping *behaviour* for 1381 unsigned whatever the actual representation*/ 1382 product_low += product_middle; 1383 if (product_low >= product_middle) { 1384 /* didn't overflow */ 1385 if (auvok == buvok) { 1386 /* -ve * -ve or +ve * +ve gives a +ve result. */ 1387 SP--; 1388 SETu( product_low ); 1389 RETURN; 1390 } else if (product_low <= (UV)IV_MIN) { 1391 /* 2s complement assumption again */ 1392 /* -ve result, which could overflow an IV */ 1393 SP--; 1394 SETi(product_low == (UV)IV_MIN 1395 ? IV_MIN : -(IV)product_low); 1396 RETURN; 1397 } /* else drop to NVs below. */ 1398 } 1399 } /* product_middle too large */ 1400 } /* ahigh && bhigh */ 1401 } /* SvIOK(svl) */ 1402 } /* SvIOK(svr) */ 1403 #endif 1404 { 1405 NV right = SvNV_nomg(svr); 1406 NV left = SvNV_nomg(svl); 1407 NV result = left * right; 1408 1409 (void)POPs; 1410 #if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16 1411 if (Perl_isinf(result)) { 1412 Zero((U8*)&result + 8, 8, U8); 1413 } 1414 #endif 1415 SETn(result); 1416 RETURN; 1417 } 1418 } 1419 1420 PP(pp_divide) 1421 { 1422 dSP; dATARGET; SV *svl, *svr; 1423 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric); 1424 svr = TOPs; 1425 svl = TOPm1s; 1426 /* Only try to do UV divide first 1427 if ((SLOPPYDIVIDE is true) or 1428 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large 1429 to preserve)) 1430 The assumption is that it is better to use floating point divide 1431 whenever possible, only doing integer divide first if we can't be sure. 1432 If NV_PRESERVES_UV is true then we know at compile time that no UV 1433 can be too large to preserve, so don't need to compile the code to 1434 test the size of UVs. */ 1435 1436 #if defined(SLOPPYDIVIDE) || (defined(PERL_PRESERVE_IVUV) && !defined(NV_PRESERVES_UV)) 1437 # define PERL_TRY_UV_DIVIDE 1438 /* ensure that 20./5. == 4. */ 1439 #endif 1440 1441 #ifdef PERL_TRY_UV_DIVIDE 1442 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) { 1443 bool left_non_neg = SvUOK(svl); 1444 bool right_non_neg = SvUOK(svr); 1445 UV left; 1446 UV right; 1447 1448 if (right_non_neg) { 1449 right = SvUVX(svr); 1450 } 1451 else { 1452 const IV biv = SvIVX(svr); 1453 if (biv >= 0) { 1454 right = biv; 1455 right_non_neg = TRUE; /* effectively it's a UV now */ 1456 } 1457 else { 1458 right = -(UV)biv; 1459 } 1460 } 1461 /* historically undef()/0 gives a "Use of uninitialized value" 1462 warning before dieing, hence this test goes here. 1463 If it were immediately before the second SvIV_please, then 1464 DIE() would be invoked before left was even inspected, so 1465 no inspection would give no warning. */ 1466 if (right == 0) 1467 DIE(aTHX_ "Illegal division by zero"); 1468 1469 if (left_non_neg) { 1470 left = SvUVX(svl); 1471 } 1472 else { 1473 const IV aiv = SvIVX(svl); 1474 if (aiv >= 0) { 1475 left = aiv; 1476 left_non_neg = TRUE; /* effectively it's a UV now */ 1477 } 1478 else { 1479 left = -(UV)aiv; 1480 } 1481 } 1482 1483 if (left >= right 1484 #ifdef SLOPPYDIVIDE 1485 /* For sloppy divide we always attempt integer division. */ 1486 #else 1487 /* Otherwise we only attempt it if either or both operands 1488 would not be preserved by an NV. If both fit in NVs 1489 we fall through to the NV divide code below. However, 1490 as left >= right to ensure integer result here, we know that 1491 we can skip the test on the right operand - right big 1492 enough not to be preserved can't get here unless left is 1493 also too big. */ 1494 1495 && (left > ((UV)1 << NV_PRESERVES_UV_BITS)) 1496 #endif 1497 ) { 1498 /* Integer division can't overflow, but it can be imprecise. */ 1499 1500 /* Modern compilers optimize division followed by 1501 * modulo into a single div instruction */ 1502 const UV result = left / right; 1503 if (left % right == 0) { 1504 SP--; /* result is valid */ 1505 if (left_non_neg == right_non_neg) { 1506 /* signs identical, result is positive. */ 1507 SETu( result ); 1508 RETURN; 1509 } 1510 /* 2s complement assumption */ 1511 if (result <= (UV)IV_MIN) 1512 SETi(result == (UV)IV_MIN ? IV_MIN : -(IV)result); 1513 else { 1514 /* It's exact but too negative for IV. */ 1515 SETn( -(NV)result ); 1516 } 1517 RETURN; 1518 } /* tried integer divide but it was not an integer result */ 1519 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */ 1520 } /* one operand wasn't SvIOK */ 1521 #endif /* PERL_TRY_UV_DIVIDE */ 1522 { 1523 NV right = SvNV_nomg(svr); 1524 NV left = SvNV_nomg(svl); 1525 (void)POPs;(void)POPs; 1526 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) 1527 if (! Perl_isnan(right) && right == 0.0) 1528 #else 1529 if (right == 0.0) 1530 #endif 1531 DIE(aTHX_ "Illegal division by zero"); 1532 PUSHn( left / right ); 1533 RETURN; 1534 } 1535 } 1536 1537 PP(pp_modulo) 1538 { 1539 dSP; dATARGET; 1540 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric); 1541 { 1542 UV left = 0; 1543 UV right = 0; 1544 bool left_neg = FALSE; 1545 bool right_neg = FALSE; 1546 bool use_double = FALSE; 1547 bool dright_valid = FALSE; 1548 NV dright = 0.0; 1549 NV dleft = 0.0; 1550 SV * const svr = TOPs; 1551 SV * const svl = TOPm1s; 1552 if (SvIV_please_nomg(svr)) { 1553 right_neg = !SvUOK(svr); 1554 if (!right_neg) { 1555 right = SvUVX(svr); 1556 } else { 1557 const IV biv = SvIVX(svr); 1558 if (biv >= 0) { 1559 right = biv; 1560 right_neg = FALSE; /* effectively it's a UV now */ 1561 } else { 1562 right = (UV) (0 - (UV) biv); 1563 } 1564 } 1565 } 1566 else { 1567 dright = SvNV_nomg(svr); 1568 right_neg = dright < 0; 1569 if (right_neg) 1570 dright = -dright; 1571 if (dright < UV_MAX_P1) { 1572 right = U_V(dright); 1573 dright_valid = TRUE; /* In case we need to use double below. */ 1574 } else { 1575 use_double = TRUE; 1576 } 1577 } 1578 1579 /* At this point use_double is only true if right is out of range for 1580 a UV. In range NV has been rounded down to nearest UV and 1581 use_double false. */ 1582 if (!use_double && SvIV_please_nomg(svl)) { 1583 left_neg = !SvUOK(svl); 1584 if (!left_neg) { 1585 left = SvUVX(svl); 1586 } else { 1587 const IV aiv = SvIVX(svl); 1588 if (aiv >= 0) { 1589 left = aiv; 1590 left_neg = FALSE; /* effectively it's a UV now */ 1591 } else { 1592 left = (UV) (0 - (UV) aiv); 1593 } 1594 } 1595 } 1596 else { 1597 dleft = SvNV_nomg(svl); 1598 left_neg = dleft < 0; 1599 if (left_neg) 1600 dleft = -dleft; 1601 1602 /* This should be exactly the 5.6 behaviour - if left and right are 1603 both in range for UV then use U_V() rather than floor. */ 1604 if (!use_double) { 1605 if (dleft < UV_MAX_P1) { 1606 /* right was in range, so is dleft, so use UVs not double. 1607 */ 1608 left = U_V(dleft); 1609 } 1610 /* left is out of range for UV, right was in range, so promote 1611 right (back) to double. */ 1612 else { 1613 /* The +0.5 is used in 5.6 even though it is not strictly 1614 consistent with the implicit +0 floor in the U_V() 1615 inside the #if 1. */ 1616 dleft = Perl_floor(dleft + 0.5); 1617 use_double = TRUE; 1618 if (dright_valid) 1619 dright = Perl_floor(dright + 0.5); 1620 else 1621 dright = right; 1622 } 1623 } 1624 } 1625 sp -= 2; 1626 if (use_double) { 1627 NV dans; 1628 1629 if (!dright) 1630 DIE(aTHX_ "Illegal modulus zero"); 1631 1632 dans = Perl_fmod(dleft, dright); 1633 if ((left_neg != right_neg) && dans) 1634 dans = dright - dans; 1635 if (right_neg) 1636 dans = -dans; 1637 sv_setnv(TARG, dans); 1638 } 1639 else { 1640 UV ans; 1641 1642 if (!right) 1643 DIE(aTHX_ "Illegal modulus zero"); 1644 1645 ans = left % right; 1646 if ((left_neg != right_neg) && ans) 1647 ans = right - ans; 1648 if (right_neg) { 1649 /* XXX may warn: unary minus operator applied to unsigned type */ 1650 /* could change -foo to be (~foo)+1 instead */ 1651 if (ans <= ~((UV)IV_MAX)+1) 1652 sv_setiv(TARG, ~ans+1); 1653 else 1654 sv_setnv(TARG, -(NV)ans); 1655 } 1656 else 1657 sv_setuv(TARG, ans); 1658 } 1659 PUSHTARG; 1660 RETURN; 1661 } 1662 } 1663 1664 PP(pp_repeat) 1665 { 1666 dSP; dATARGET; 1667 IV count; 1668 SV *sv; 1669 bool infnan = FALSE; 1670 const U8 gimme = GIMME_V; 1671 1672 if (gimme == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) { 1673 /* TODO: think of some way of doing list-repeat overloading ??? */ 1674 sv = POPs; 1675 SvGETMAGIC(sv); 1676 } 1677 else { 1678 if (UNLIKELY(PL_op->op_private & OPpREPEAT_DOLIST)) { 1679 /* The parser saw this as a list repeat, and there 1680 are probably several items on the stack. But we're 1681 in scalar/void context, and there's no pp_list to save us 1682 now. So drop the rest of the items -- robin@kitsite.com 1683 */ 1684 dMARK; 1685 if (MARK + 1 < SP) { 1686 MARK[1] = TOPm1s; 1687 MARK[2] = TOPs; 1688 } 1689 else { 1690 dTOPss; 1691 ASSUME(MARK + 1 == SP); 1692 MEXTEND(SP, 1); 1693 PUSHs(sv); 1694 MARK[1] = &PL_sv_undef; 1695 } 1696 SP = MARK + 2; 1697 } 1698 tryAMAGICbin_MG(repeat_amg, AMGf_assign); 1699 sv = POPs; 1700 } 1701 1702 if (SvIOKp(sv)) { 1703 if (SvUOK(sv)) { 1704 const UV uv = SvUV_nomg(sv); 1705 if (uv > IV_MAX) 1706 count = IV_MAX; /* The best we can do? */ 1707 else 1708 count = uv; 1709 } else { 1710 count = SvIV_nomg(sv); 1711 } 1712 } 1713 else if (SvNOKp(sv)) { 1714 const NV nv = SvNV_nomg(sv); 1715 infnan = Perl_isinfnan(nv); 1716 if (UNLIKELY(infnan)) { 1717 count = 0; 1718 } else { 1719 if (nv < 0.0) 1720 count = -1; /* An arbitrary negative integer */ 1721 else 1722 count = (IV)nv; 1723 } 1724 } 1725 else 1726 count = SvIV_nomg(sv); 1727 1728 if (infnan) { 1729 Perl_ck_warner(aTHX_ packWARN(WARN_NUMERIC), 1730 "Non-finite repeat count does nothing"); 1731 } else if (count < 0) { 1732 count = 0; 1733 Perl_ck_warner(aTHX_ packWARN(WARN_NUMERIC), 1734 "Negative repeat count does nothing"); 1735 } 1736 1737 if (gimme == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) { 1738 dMARK; 1739 const SSize_t items = SP - MARK; 1740 const U8 mod = PL_op->op_flags & OPf_MOD; 1741 1742 if (count > 1) { 1743 SSize_t max; 1744 1745 if ( items > SSize_t_MAX / count /* max would overflow */ 1746 /* repeatcpy would overflow */ 1747 || items > I32_MAX / (I32)sizeof(SV *) 1748 ) 1749 Perl_croak(aTHX_ "%s","Out of memory during list extend"); 1750 max = items * count; 1751 MEXTEND(MARK, max); 1752 1753 while (SP > MARK) { 1754 if (*SP) { 1755 if (mod && SvPADTMP(*SP)) { 1756 *SP = sv_mortalcopy(*SP); 1757 } 1758 SvTEMP_off((*SP)); 1759 } 1760 SP--; 1761 } 1762 MARK++; 1763 repeatcpy((char*)(MARK + items), (char*)MARK, 1764 items * sizeof(const SV *), count - 1); 1765 SP += max; 1766 } 1767 else if (count <= 0) 1768 SP = MARK; 1769 } 1770 else { /* Note: mark already snarfed by pp_list */ 1771 SV * const tmpstr = POPs; 1772 STRLEN len; 1773 bool isutf; 1774 1775 if (TARG != tmpstr) 1776 sv_setsv_nomg(TARG, tmpstr); 1777 SvPV_force_nomg(TARG, len); 1778 isutf = DO_UTF8(TARG); 1779 if (count != 1) { 1780 if (count < 1) 1781 SvCUR_set(TARG, 0); 1782 else { 1783 STRLEN max; 1784 1785 if ( len > (MEM_SIZE_MAX-1) / (UV)count /* max would overflow */ 1786 || len > (U32)I32_MAX /* repeatcpy would overflow */ 1787 ) 1788 Perl_croak(aTHX_ "%s", 1789 "Out of memory during string extend"); 1790 max = (UV)count * len + 1; 1791 SvGROW(TARG, max); 1792 1793 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1); 1794 SvCUR_set(TARG, SvCUR(TARG) * count); 1795 } 1796 *SvEND(TARG) = '\0'; 1797 } 1798 if (isutf) 1799 (void)SvPOK_only_UTF8(TARG); 1800 else 1801 (void)SvPOK_only(TARG); 1802 1803 PUSHTARG; 1804 } 1805 RETURN; 1806 } 1807 1808 PP(pp_subtract) 1809 { 1810 dSP; dATARGET; bool useleft; SV *svl, *svr; 1811 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric); 1812 svr = TOPs; 1813 svl = TOPm1s; 1814 1815 #ifdef PERL_PRESERVE_IVUV 1816 1817 /* special-case some simple common cases */ 1818 if (!((svl->sv_flags|svr->sv_flags) & (SVf_IVisUV|SVs_GMG))) { 1819 IV il, ir; 1820 U32 flags = (svl->sv_flags & svr->sv_flags); 1821 if (flags & SVf_IOK) { 1822 /* both args are simple IVs */ 1823 UV topl, topr; 1824 il = SvIVX(svl); 1825 ir = SvIVX(svr); 1826 do_iv: 1827 topl = ((UV)il) >> (UVSIZE * 8 - 2); 1828 topr = ((UV)ir) >> (UVSIZE * 8 - 2); 1829 1830 /* if both are in a range that can't under/overflow, do a 1831 * simple integer subtract: if the top of both numbers 1832 * are 00 or 11, then it's safe */ 1833 if (!( ((topl+1) | (topr+1)) & 2)) { 1834 SP--; 1835 TARGi(il - ir, 0); /* args not GMG, so can't be tainted */ 1836 SETs(TARG); 1837 RETURN; 1838 } 1839 goto generic; 1840 } 1841 else if (flags & SVf_NOK) { 1842 /* both args are NVs */ 1843 NV nl = SvNVX(svl); 1844 NV nr = SvNVX(svr); 1845 1846 if (lossless_NV_to_IV(nl, &il) && lossless_NV_to_IV(nr, &ir)) { 1847 /* nothing was lost by converting to IVs */ 1848 goto do_iv; 1849 } 1850 SP--; 1851 TARGn(nl - nr, 0); /* args not GMG, so can't be tainted */ 1852 SETs(TARG); 1853 RETURN; 1854 } 1855 } 1856 1857 generic: 1858 1859 useleft = USE_LEFT(svl); 1860 /* See comments in pp_add (in pp_hot.c) about Overflow, and how 1861 "bad things" happen if you rely on signed integers wrapping. */ 1862 if (SvIV_please_nomg(svr)) { 1863 /* Unless the left argument is integer in range we are going to have to 1864 use NV maths. Hence only attempt to coerce the right argument if 1865 we know the left is integer. */ 1866 UV auv = 0; 1867 bool auvok = FALSE; 1868 bool a_valid = 0; 1869 1870 if (!useleft) { 1871 auv = 0; 1872 a_valid = auvok = 1; 1873 /* left operand is undef, treat as zero. */ 1874 } else { 1875 /* Left operand is defined, so is it IV? */ 1876 if (SvIV_please_nomg(svl)) { 1877 if ((auvok = SvUOK(svl))) 1878 auv = SvUVX(svl); 1879 else { 1880 const IV aiv = SvIVX(svl); 1881 if (aiv >= 0) { 1882 auv = aiv; 1883 auvok = 1; /* Now acting as a sign flag. */ 1884 } else { 1885 auv = (UV) (0 - (UV) aiv); 1886 } 1887 } 1888 a_valid = 1; 1889 } 1890 } 1891 if (a_valid) { 1892 bool result_good = 0; 1893 UV result; 1894 UV buv; 1895 bool buvok = SvUOK(svr); 1896 1897 if (buvok) 1898 buv = SvUVX(svr); 1899 else { 1900 const IV biv = SvIVX(svr); 1901 if (biv >= 0) { 1902 buv = biv; 1903 buvok = 1; 1904 } else 1905 buv = (UV) (0 - (UV) biv); 1906 } 1907 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve, 1908 else "IV" now, independent of how it came in. 1909 if a, b represents positive, A, B negative, a maps to -A etc 1910 a - b => (a - b) 1911 A - b => -(a + b) 1912 a - B => (a + b) 1913 A - B => -(a - b) 1914 all UV maths. negate result if A negative. 1915 subtract if signs same, add if signs differ. */ 1916 1917 if (auvok ^ buvok) { 1918 /* Signs differ. */ 1919 result = auv + buv; 1920 if (result >= auv) 1921 result_good = 1; 1922 } else { 1923 /* Signs same */ 1924 if (auv >= buv) { 1925 result = auv - buv; 1926 /* Must get smaller */ 1927 if (result <= auv) 1928 result_good = 1; 1929 } else { 1930 result = buv - auv; 1931 if (result <= buv) { 1932 /* result really should be -(auv-buv). as its negation 1933 of true value, need to swap our result flag */ 1934 auvok = !auvok; 1935 result_good = 1; 1936 } 1937 } 1938 } 1939 if (result_good) { 1940 SP--; 1941 if (auvok) 1942 SETu( result ); 1943 else { 1944 /* Negate result */ 1945 if (result <= (UV)IV_MIN) 1946 SETi(result == (UV)IV_MIN 1947 ? IV_MIN : -(IV)result); 1948 else { 1949 /* result valid, but out of range for IV. */ 1950 SETn( -(NV)result ); 1951 } 1952 } 1953 RETURN; 1954 } /* Overflow, drop through to NVs. */ 1955 } 1956 } 1957 #else 1958 useleft = USE_LEFT(svl); 1959 #endif 1960 { 1961 NV value = SvNV_nomg(svr); 1962 (void)POPs; 1963 1964 if (!useleft) { 1965 /* left operand is undef, treat as zero - value */ 1966 SETn(-value); 1967 RETURN; 1968 } 1969 SETn( SvNV_nomg(svl) - value ); 1970 RETURN; 1971 } 1972 } 1973 1974 #define IV_BITS (IVSIZE * 8) 1975 1976 static UV S_uv_shift(UV uv, int shift, bool left) 1977 { 1978 if (shift < 0) { 1979 shift = -shift; 1980 left = !left; 1981 } 1982 if (UNLIKELY(shift >= IV_BITS)) { 1983 return 0; 1984 } 1985 return left ? uv << shift : uv >> shift; 1986 } 1987 1988 static IV S_iv_shift(IV iv, int shift, bool left) 1989 { 1990 if (shift < 0) { 1991 shift = -shift; 1992 left = !left; 1993 } 1994 1995 if (UNLIKELY(shift >= IV_BITS)) { 1996 return iv < 0 && !left ? -1 : 0; 1997 } 1998 1999 /* For left shifts, perl 5 has chosen to treat the value as unsigned for 2000 * the * purposes of shifting, then cast back to signed. This is very 2001 * different from Raku: 2002 * 2003 * $ raku -e 'say -2 +< 5' 2004 * -64 2005 * 2006 * $ ./perl -le 'print -2 << 5' 2007 * 18446744073709551552 2008 * */ 2009 if (left) { 2010 if (iv == IV_MIN) { /* Casting this to a UV is undefined behavior */ 2011 return 0; 2012 } 2013 return (IV) (((UV) iv) << shift); 2014 } 2015 2016 /* Here is right shift */ 2017 return iv >> shift; 2018 } 2019 2020 #define UV_LEFT_SHIFT(uv, shift) S_uv_shift(uv, shift, TRUE) 2021 #define UV_RIGHT_SHIFT(uv, shift) S_uv_shift(uv, shift, FALSE) 2022 #define IV_LEFT_SHIFT(iv, shift) S_iv_shift(iv, shift, TRUE) 2023 #define IV_RIGHT_SHIFT(iv, shift) S_iv_shift(iv, shift, FALSE) 2024 2025 PP(pp_left_shift) 2026 { 2027 dSP; dATARGET; SV *svl, *svr; 2028 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric); 2029 svr = POPs; 2030 svl = TOPs; 2031 { 2032 const IV shift = SvIV_nomg(svr); 2033 if (PL_op->op_private & HINT_INTEGER) { 2034 SETi(IV_LEFT_SHIFT(SvIV_nomg(svl), shift)); 2035 } 2036 else { 2037 SETu(UV_LEFT_SHIFT(SvUV_nomg(svl), shift)); 2038 } 2039 RETURN; 2040 } 2041 } 2042 2043 PP(pp_right_shift) 2044 { 2045 dSP; dATARGET; SV *svl, *svr; 2046 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric); 2047 svr = POPs; 2048 svl = TOPs; 2049 { 2050 const IV shift = SvIV_nomg(svr); 2051 if (PL_op->op_private & HINT_INTEGER) { 2052 SETi(IV_RIGHT_SHIFT(SvIV_nomg(svl), shift)); 2053 } 2054 else { 2055 SETu(UV_RIGHT_SHIFT(SvUV_nomg(svl), shift)); 2056 } 2057 RETURN; 2058 } 2059 } 2060 2061 PP(pp_lt) 2062 { 2063 dSP; 2064 SV *left, *right; 2065 2066 tryAMAGICbin_MG(lt_amg, AMGf_numeric); 2067 right = POPs; 2068 left = TOPs; 2069 SETs(boolSV( 2070 (SvIOK_notUV(left) && SvIOK_notUV(right)) 2071 ? (SvIVX(left) < SvIVX(right)) 2072 : (do_ncmp(left, right) == -1) 2073 )); 2074 RETURN; 2075 } 2076 2077 PP(pp_gt) 2078 { 2079 dSP; 2080 SV *left, *right; 2081 2082 tryAMAGICbin_MG(gt_amg, AMGf_numeric); 2083 right = POPs; 2084 left = TOPs; 2085 SETs(boolSV( 2086 (SvIOK_notUV(left) && SvIOK_notUV(right)) 2087 ? (SvIVX(left) > SvIVX(right)) 2088 : (do_ncmp(left, right) == 1) 2089 )); 2090 RETURN; 2091 } 2092 2093 PP(pp_le) 2094 { 2095 dSP; 2096 SV *left, *right; 2097 2098 tryAMAGICbin_MG(le_amg, AMGf_numeric); 2099 right = POPs; 2100 left = TOPs; 2101 SETs(boolSV( 2102 (SvIOK_notUV(left) && SvIOK_notUV(right)) 2103 ? (SvIVX(left) <= SvIVX(right)) 2104 : (do_ncmp(left, right) <= 0) 2105 )); 2106 RETURN; 2107 } 2108 2109 PP(pp_ge) 2110 { 2111 dSP; 2112 SV *left, *right; 2113 2114 tryAMAGICbin_MG(ge_amg, AMGf_numeric); 2115 right = POPs; 2116 left = TOPs; 2117 SETs(boolSV( 2118 (SvIOK_notUV(left) && SvIOK_notUV(right)) 2119 ? (SvIVX(left) >= SvIVX(right)) 2120 : ( (do_ncmp(left, right) & 2) == 0) 2121 )); 2122 RETURN; 2123 } 2124 2125 PP(pp_ne) 2126 { 2127 dSP; 2128 SV *left, *right; 2129 2130 tryAMAGICbin_MG(ne_amg, AMGf_numeric); 2131 right = POPs; 2132 left = TOPs; 2133 SETs(boolSV( 2134 (SvIOK_notUV(left) && SvIOK_notUV(right)) 2135 ? (SvIVX(left) != SvIVX(right)) 2136 : (do_ncmp(left, right) != 0) 2137 )); 2138 RETURN; 2139 } 2140 2141 /* compare left and right SVs. Returns: 2142 * -1: < 2143 * 0: == 2144 * 1: > 2145 * 2: left or right was a NaN 2146 */ 2147 I32 2148 Perl_do_ncmp(pTHX_ SV* const left, SV * const right) 2149 { 2150 PERL_ARGS_ASSERT_DO_NCMP; 2151 #ifdef PERL_PRESERVE_IVUV 2152 /* Fortunately it seems NaN isn't IOK */ 2153 if (SvIV_please_nomg(right) && SvIV_please_nomg(left)) { 2154 if (!SvUOK(left)) { 2155 const IV leftiv = SvIVX(left); 2156 if (!SvUOK(right)) { 2157 /* ## IV <=> IV ## */ 2158 const IV rightiv = SvIVX(right); 2159 return (leftiv > rightiv) - (leftiv < rightiv); 2160 } 2161 /* ## IV <=> UV ## */ 2162 if (leftiv < 0) 2163 /* As (b) is a UV, it's >=0, so it must be < */ 2164 return -1; 2165 { 2166 const UV rightuv = SvUVX(right); 2167 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv); 2168 } 2169 } 2170 2171 if (SvUOK(right)) { 2172 /* ## UV <=> UV ## */ 2173 const UV leftuv = SvUVX(left); 2174 const UV rightuv = SvUVX(right); 2175 return (leftuv > rightuv) - (leftuv < rightuv); 2176 } 2177 /* ## UV <=> IV ## */ 2178 { 2179 const IV rightiv = SvIVX(right); 2180 if (rightiv < 0) 2181 /* As (a) is a UV, it's >=0, so it cannot be < */ 2182 return 1; 2183 { 2184 const UV leftuv = SvUVX(left); 2185 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv); 2186 } 2187 } 2188 NOT_REACHED; /* NOTREACHED */ 2189 } 2190 #endif 2191 { 2192 NV const rnv = SvNV_nomg(right); 2193 NV const lnv = SvNV_nomg(left); 2194 2195 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) 2196 if (Perl_isnan(lnv) || Perl_isnan(rnv)) { 2197 return 2; 2198 } 2199 return (lnv > rnv) - (lnv < rnv); 2200 #else 2201 if (lnv < rnv) 2202 return -1; 2203 if (lnv > rnv) 2204 return 1; 2205 if (lnv == rnv) 2206 return 0; 2207 return 2; 2208 #endif 2209 } 2210 } 2211 2212 2213 PP(pp_ncmp) 2214 { 2215 dSP; 2216 SV *left, *right; 2217 I32 value; 2218 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric); 2219 right = POPs; 2220 left = TOPs; 2221 value = do_ncmp(left, right); 2222 if (value == 2) { 2223 SETs(&PL_sv_undef); 2224 } 2225 else { 2226 dTARGET; 2227 SETi(value); 2228 } 2229 RETURN; 2230 } 2231 2232 2233 /* also used for: pp_sge() pp_sgt() pp_slt() */ 2234 2235 PP(pp_sle) 2236 { 2237 dSP; 2238 2239 int amg_type = sle_amg; 2240 int multiplier = 1; 2241 int rhs = 1; 2242 2243 switch (PL_op->op_type) { 2244 case OP_SLT: 2245 amg_type = slt_amg; 2246 /* cmp < 0 */ 2247 rhs = 0; 2248 break; 2249 case OP_SGT: 2250 amg_type = sgt_amg; 2251 /* cmp > 0 */ 2252 multiplier = -1; 2253 rhs = 0; 2254 break; 2255 case OP_SGE: 2256 amg_type = sge_amg; 2257 /* cmp >= 0 */ 2258 multiplier = -1; 2259 break; 2260 } 2261 2262 tryAMAGICbin_MG(amg_type, 0); 2263 { 2264 dPOPTOPssrl; 2265 const int cmp = 2266 #ifdef USE_LOCALE_COLLATE 2267 (IN_LC_RUNTIME(LC_COLLATE)) 2268 ? sv_cmp_locale_flags(left, right, 0) 2269 : 2270 #endif 2271 sv_cmp_flags(left, right, 0); 2272 SETs(boolSV(cmp * multiplier < rhs)); 2273 RETURN; 2274 } 2275 } 2276 2277 PP(pp_seq) 2278 { 2279 dSP; 2280 tryAMAGICbin_MG(seq_amg, 0); 2281 { 2282 dPOPTOPssrl; 2283 SETs(boolSV(sv_eq_flags(left, right, 0))); 2284 RETURN; 2285 } 2286 } 2287 2288 PP(pp_sne) 2289 { 2290 dSP; 2291 tryAMAGICbin_MG(sne_amg, 0); 2292 { 2293 dPOPTOPssrl; 2294 SETs(boolSV(!sv_eq_flags(left, right, 0))); 2295 RETURN; 2296 } 2297 } 2298 2299 PP(pp_scmp) 2300 { 2301 dSP; dTARGET; 2302 tryAMAGICbin_MG(scmp_amg, 0); 2303 { 2304 dPOPTOPssrl; 2305 const int cmp = 2306 #ifdef USE_LOCALE_COLLATE 2307 (IN_LC_RUNTIME(LC_COLLATE)) 2308 ? sv_cmp_locale_flags(left, right, 0) 2309 : 2310 #endif 2311 sv_cmp_flags(left, right, 0); 2312 SETi( cmp ); 2313 RETURN; 2314 } 2315 } 2316 2317 PP(pp_bit_and) 2318 { 2319 dSP; dATARGET; 2320 tryAMAGICbin_MG(band_amg, AMGf_assign); 2321 { 2322 dPOPTOPssrl; 2323 if (SvNIOKp(left) || SvNIOKp(right)) { 2324 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left); 2325 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right); 2326 if (PL_op->op_private & HINT_INTEGER) { 2327 const IV i = SvIV_nomg(left) & SvIV_nomg(right); 2328 SETi(i); 2329 } 2330 else { 2331 const UV u = SvUV_nomg(left) & SvUV_nomg(right); 2332 SETu(u); 2333 } 2334 if (left_ro_nonnum && left != TARG) SvNIOK_off(left); 2335 if (right_ro_nonnum) SvNIOK_off(right); 2336 } 2337 else { 2338 do_vop(PL_op->op_type, TARG, left, right); 2339 SETTARG; 2340 } 2341 RETURN; 2342 } 2343 } 2344 2345 PP(pp_nbit_and) 2346 { 2347 dSP; 2348 tryAMAGICbin_MG(band_amg, AMGf_assign|AMGf_numarg); 2349 { 2350 dATARGET; dPOPTOPssrl; 2351 if (PL_op->op_private & HINT_INTEGER) { 2352 const IV i = SvIV_nomg(left) & SvIV_nomg(right); 2353 SETi(i); 2354 } 2355 else { 2356 const UV u = SvUV_nomg(left) & SvUV_nomg(right); 2357 SETu(u); 2358 } 2359 } 2360 RETURN; 2361 } 2362 2363 PP(pp_sbit_and) 2364 { 2365 dSP; 2366 tryAMAGICbin_MG(sband_amg, AMGf_assign); 2367 { 2368 dATARGET; dPOPTOPssrl; 2369 do_vop(OP_BIT_AND, TARG, left, right); 2370 RETSETTARG; 2371 } 2372 } 2373 2374 /* also used for: pp_bit_xor() */ 2375 2376 PP(pp_bit_or) 2377 { 2378 dSP; dATARGET; 2379 const int op_type = PL_op->op_type; 2380 2381 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign); 2382 { 2383 dPOPTOPssrl; 2384 if (SvNIOKp(left) || SvNIOKp(right)) { 2385 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left); 2386 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right); 2387 if (PL_op->op_private & HINT_INTEGER) { 2388 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0); 2389 const IV r = SvIV_nomg(right); 2390 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r); 2391 SETi(result); 2392 } 2393 else { 2394 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0); 2395 const UV r = SvUV_nomg(right); 2396 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r); 2397 SETu(result); 2398 } 2399 if (left_ro_nonnum && left != TARG) SvNIOK_off(left); 2400 if (right_ro_nonnum) SvNIOK_off(right); 2401 } 2402 else { 2403 do_vop(op_type, TARG, left, right); 2404 SETTARG; 2405 } 2406 RETURN; 2407 } 2408 } 2409 2410 /* also used for: pp_nbit_xor() */ 2411 2412 PP(pp_nbit_or) 2413 { 2414 dSP; 2415 const int op_type = PL_op->op_type; 2416 2417 tryAMAGICbin_MG((op_type == OP_NBIT_OR ? bor_amg : bxor_amg), 2418 AMGf_assign|AMGf_numarg); 2419 { 2420 dATARGET; dPOPTOPssrl; 2421 if (PL_op->op_private & HINT_INTEGER) { 2422 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0); 2423 const IV r = SvIV_nomg(right); 2424 const IV result = op_type == OP_NBIT_OR ? (l | r) : (l ^ r); 2425 SETi(result); 2426 } 2427 else { 2428 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0); 2429 const UV r = SvUV_nomg(right); 2430 const UV result = op_type == OP_NBIT_OR ? (l | r) : (l ^ r); 2431 SETu(result); 2432 } 2433 } 2434 RETURN; 2435 } 2436 2437 /* also used for: pp_sbit_xor() */ 2438 2439 PP(pp_sbit_or) 2440 { 2441 dSP; 2442 const int op_type = PL_op->op_type; 2443 2444 tryAMAGICbin_MG((op_type == OP_SBIT_OR ? sbor_amg : sbxor_amg), 2445 AMGf_assign); 2446 { 2447 dATARGET; dPOPTOPssrl; 2448 do_vop(op_type == OP_SBIT_OR ? OP_BIT_OR : OP_BIT_XOR, TARG, left, 2449 right); 2450 RETSETTARG; 2451 } 2452 } 2453 2454 PERL_STATIC_INLINE bool 2455 S_negate_string(pTHX) 2456 { 2457 dTARGET; dSP; 2458 STRLEN len; 2459 const char *s; 2460 SV * const sv = TOPs; 2461 if (!SvPOKp(sv) || SvNIOK(sv) || (!SvPOK(sv) && SvNIOKp(sv))) 2462 return FALSE; 2463 s = SvPV_nomg_const(sv, len); 2464 if (isIDFIRST(*s)) { 2465 sv_setpvs(TARG, "-"); 2466 sv_catsv(TARG, sv); 2467 } 2468 else if (*s == '+' || (*s == '-' && !looks_like_number(sv))) { 2469 sv_setsv_nomg(TARG, sv); 2470 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-'; 2471 } 2472 else return FALSE; 2473 SETTARG; 2474 return TRUE; 2475 } 2476 2477 PP(pp_negate) 2478 { 2479 dSP; dTARGET; 2480 tryAMAGICun_MG(neg_amg, AMGf_numeric); 2481 if (S_negate_string(aTHX)) return NORMAL; 2482 { 2483 SV * const sv = TOPs; 2484 2485 if (SvIOK(sv)) { 2486 /* It's publicly an integer */ 2487 oops_its_an_int: 2488 if (SvIsUV(sv)) { 2489 if (SvIVX(sv) == IV_MIN) { 2490 /* 2s complement assumption. */ 2491 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == 2492 IV_MIN */ 2493 return NORMAL; 2494 } 2495 else if (SvUVX(sv) <= IV_MAX) { 2496 SETi(-SvIVX(sv)); 2497 return NORMAL; 2498 } 2499 } 2500 else if (SvIVX(sv) != IV_MIN) { 2501 SETi(-SvIVX(sv)); 2502 return NORMAL; 2503 } 2504 #ifdef PERL_PRESERVE_IVUV 2505 else { 2506 SETu((UV)IV_MIN); 2507 return NORMAL; 2508 } 2509 #endif 2510 } 2511 if (SvNIOKp(sv) && (SvNIOK(sv) || !SvPOK(sv))) 2512 SETn(-SvNV_nomg(sv)); 2513 else if (SvPOKp(sv) && SvIV_please_nomg(sv)) 2514 goto oops_its_an_int; 2515 else 2516 SETn(-SvNV_nomg(sv)); 2517 } 2518 return NORMAL; 2519 } 2520 2521 PP(pp_not) 2522 { 2523 dSP; 2524 SV *sv; 2525 2526 tryAMAGICun_MG(not_amg, 0); 2527 sv = *PL_stack_sp; 2528 *PL_stack_sp = boolSV(!SvTRUE_nomg_NN(sv)); 2529 return NORMAL; 2530 } 2531 2532 static void 2533 S_scomplement(pTHX_ SV *targ, SV *sv) 2534 { 2535 U8 *tmps; 2536 I32 anum; 2537 STRLEN len; 2538 2539 sv_copypv_nomg(TARG, sv); 2540 tmps = (U8*)SvPV_nomg(TARG, len); 2541 2542 if (SvUTF8(TARG)) { 2543 if (len && ! utf8_to_bytes(tmps, &len)) { 2544 Perl_croak(aTHX_ FATAL_ABOVE_FF_MSG, PL_op_desc[PL_op->op_type]); 2545 } 2546 SvCUR_set(TARG, len); 2547 SvUTF8_off(TARG); 2548 } 2549 2550 anum = len; 2551 2552 { 2553 long *tmpl; 2554 for ( ; anum && PTR2nat(tmps) % sizeof(long); anum--, tmps++) 2555 *tmps = ~*tmps; 2556 tmpl = (long*)tmps; 2557 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++) 2558 *tmpl = ~*tmpl; 2559 tmps = (U8*)tmpl; 2560 } 2561 2562 for ( ; anum > 0; anum--, tmps++) 2563 *tmps = ~*tmps; 2564 } 2565 2566 PP(pp_complement) 2567 { 2568 dSP; dTARGET; 2569 tryAMAGICun_MG(compl_amg, AMGf_numeric); 2570 { 2571 dTOPss; 2572 if (SvNIOKp(sv)) { 2573 if (PL_op->op_private & HINT_INTEGER) { 2574 const IV i = ~SvIV_nomg(sv); 2575 SETi(i); 2576 } 2577 else { 2578 const UV u = ~SvUV_nomg(sv); 2579 SETu(u); 2580 } 2581 } 2582 else { 2583 S_scomplement(aTHX_ TARG, sv); 2584 SETTARG; 2585 } 2586 return NORMAL; 2587 } 2588 } 2589 2590 PP(pp_ncomplement) 2591 { 2592 dSP; 2593 tryAMAGICun_MG(compl_amg, AMGf_numeric|AMGf_numarg); 2594 { 2595 dTARGET; dTOPss; 2596 if (PL_op->op_private & HINT_INTEGER) { 2597 const IV i = ~SvIV_nomg(sv); 2598 SETi(i); 2599 } 2600 else { 2601 const UV u = ~SvUV_nomg(sv); 2602 SETu(u); 2603 } 2604 } 2605 return NORMAL; 2606 } 2607 2608 PP(pp_scomplement) 2609 { 2610 dSP; 2611 tryAMAGICun_MG(scompl_amg, AMGf_numeric); 2612 { 2613 dTARGET; dTOPss; 2614 S_scomplement(aTHX_ TARG, sv); 2615 SETTARG; 2616 return NORMAL; 2617 } 2618 } 2619 2620 /* integer versions of some of the above */ 2621 2622 PP(pp_i_multiply) 2623 { 2624 dSP; dATARGET; 2625 tryAMAGICbin_MG(mult_amg, AMGf_assign); 2626 { 2627 dPOPTOPiirl_nomg; 2628 SETi( left * right ); 2629 RETURN; 2630 } 2631 } 2632 2633 PP(pp_i_divide) 2634 { 2635 IV num; 2636 dSP; dATARGET; 2637 tryAMAGICbin_MG(div_amg, AMGf_assign); 2638 { 2639 dPOPTOPssrl; 2640 IV value = SvIV_nomg(right); 2641 if (value == 0) 2642 DIE(aTHX_ "Illegal division by zero"); 2643 num = SvIV_nomg(left); 2644 2645 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */ 2646 if (value == -1) 2647 value = - num; 2648 else 2649 value = num / value; 2650 SETi(value); 2651 RETURN; 2652 } 2653 } 2654 2655 PP(pp_i_modulo) 2656 { 2657 dSP; dATARGET; 2658 tryAMAGICbin_MG(modulo_amg, AMGf_assign); 2659 { 2660 dPOPTOPiirl_nomg; 2661 if (!right) 2662 DIE(aTHX_ "Illegal modulus zero"); 2663 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */ 2664 if (right == -1) 2665 SETi( 0 ); 2666 else 2667 SETi( left % right ); 2668 RETURN; 2669 } 2670 } 2671 2672 PP(pp_i_add) 2673 { 2674 dSP; dATARGET; 2675 tryAMAGICbin_MG(add_amg, AMGf_assign); 2676 { 2677 dPOPTOPiirl_ul_nomg; 2678 SETi( left + right ); 2679 RETURN; 2680 } 2681 } 2682 2683 PP(pp_i_subtract) 2684 { 2685 dSP; dATARGET; 2686 tryAMAGICbin_MG(subtr_amg, AMGf_assign); 2687 { 2688 dPOPTOPiirl_ul_nomg; 2689 SETi( left - right ); 2690 RETURN; 2691 } 2692 } 2693 2694 PP(pp_i_lt) 2695 { 2696 dSP; 2697 tryAMAGICbin_MG(lt_amg, 0); 2698 { 2699 dPOPTOPiirl_nomg; 2700 SETs(boolSV(left < right)); 2701 RETURN; 2702 } 2703 } 2704 2705 PP(pp_i_gt) 2706 { 2707 dSP; 2708 tryAMAGICbin_MG(gt_amg, 0); 2709 { 2710 dPOPTOPiirl_nomg; 2711 SETs(boolSV(left > right)); 2712 RETURN; 2713 } 2714 } 2715 2716 PP(pp_i_le) 2717 { 2718 dSP; 2719 tryAMAGICbin_MG(le_amg, 0); 2720 { 2721 dPOPTOPiirl_nomg; 2722 SETs(boolSV(left <= right)); 2723 RETURN; 2724 } 2725 } 2726 2727 PP(pp_i_ge) 2728 { 2729 dSP; 2730 tryAMAGICbin_MG(ge_amg, 0); 2731 { 2732 dPOPTOPiirl_nomg; 2733 SETs(boolSV(left >= right)); 2734 RETURN; 2735 } 2736 } 2737 2738 PP(pp_i_eq) 2739 { 2740 dSP; 2741 tryAMAGICbin_MG(eq_amg, 0); 2742 { 2743 dPOPTOPiirl_nomg; 2744 SETs(boolSV(left == right)); 2745 RETURN; 2746 } 2747 } 2748 2749 PP(pp_i_ne) 2750 { 2751 dSP; 2752 tryAMAGICbin_MG(ne_amg, 0); 2753 { 2754 dPOPTOPiirl_nomg; 2755 SETs(boolSV(left != right)); 2756 RETURN; 2757 } 2758 } 2759 2760 PP(pp_i_ncmp) 2761 { 2762 dSP; dTARGET; 2763 tryAMAGICbin_MG(ncmp_amg, 0); 2764 { 2765 dPOPTOPiirl_nomg; 2766 I32 value; 2767 2768 if (left > right) 2769 value = 1; 2770 else if (left < right) 2771 value = -1; 2772 else 2773 value = 0; 2774 SETi(value); 2775 RETURN; 2776 } 2777 } 2778 2779 PP(pp_i_negate) 2780 { 2781 dSP; dTARGET; 2782 tryAMAGICun_MG(neg_amg, 0); 2783 if (S_negate_string(aTHX)) return NORMAL; 2784 { 2785 SV * const sv = TOPs; 2786 IV const i = SvIV_nomg(sv); 2787 SETi(-i); 2788 return NORMAL; 2789 } 2790 } 2791 2792 /* High falutin' math. */ 2793 2794 PP(pp_atan2) 2795 { 2796 dSP; dTARGET; 2797 tryAMAGICbin_MG(atan2_amg, 0); 2798 { 2799 dPOPTOPnnrl_nomg; 2800 SETn(Perl_atan2(left, right)); 2801 RETURN; 2802 } 2803 } 2804 2805 2806 /* also used for: pp_cos() pp_exp() pp_log() pp_sqrt() */ 2807 2808 PP(pp_sin) 2809 { 2810 dSP; dTARGET; 2811 int amg_type = fallback_amg; 2812 const char *neg_report = NULL; 2813 const int op_type = PL_op->op_type; 2814 2815 switch (op_type) { 2816 case OP_SIN: amg_type = sin_amg; break; 2817 case OP_COS: amg_type = cos_amg; break; 2818 case OP_EXP: amg_type = exp_amg; break; 2819 case OP_LOG: amg_type = log_amg; neg_report = "log"; break; 2820 case OP_SQRT: amg_type = sqrt_amg; neg_report = "sqrt"; break; 2821 } 2822 2823 assert(amg_type != fallback_amg); 2824 2825 tryAMAGICun_MG(amg_type, 0); 2826 { 2827 SV * const arg = TOPs; 2828 const NV value = SvNV_nomg(arg); 2829 #ifdef NV_NAN 2830 NV result = NV_NAN; 2831 #else 2832 NV result = 0.0; 2833 #endif 2834 if (neg_report) { /* log or sqrt */ 2835 if ( 2836 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) 2837 ! Perl_isnan(value) && 2838 #endif 2839 (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0))) { 2840 SET_NUMERIC_STANDARD(); 2841 /* diag_listed_as: Can't take log of %g */ 2842 DIE(aTHX_ "Can't take %s of %" NVgf, neg_report, value); 2843 } 2844 } 2845 switch (op_type) { 2846 default: 2847 case OP_SIN: result = Perl_sin(value); break; 2848 case OP_COS: result = Perl_cos(value); break; 2849 case OP_EXP: result = Perl_exp(value); break; 2850 case OP_LOG: result = Perl_log(value); break; 2851 case OP_SQRT: result = Perl_sqrt(value); break; 2852 } 2853 SETn(result); 2854 return NORMAL; 2855 } 2856 } 2857 2858 /* Support Configure command-line overrides for rand() functions. 2859 After 5.005, perhaps we should replace this by Configure support 2860 for drand48(), random(), or rand(). For 5.005, though, maintain 2861 compatibility by calling rand() but allow the user to override it. 2862 See INSTALL for details. --Andy Dougherty 15 July 1998 2863 */ 2864 /* Now it's after 5.005, and Configure supports drand48() and random(), 2865 in addition to rand(). So the overrides should not be needed any more. 2866 --Jarkko Hietaniemi 27 September 1998 2867 */ 2868 2869 PP(pp_rand) 2870 { 2871 if (!PL_srand_called) { 2872 (void)seedDrand01((Rand_seed_t)seed()); 2873 PL_srand_called = TRUE; 2874 } 2875 { 2876 dSP; 2877 NV value; 2878 2879 if (MAXARG < 1) 2880 { 2881 EXTEND(SP, 1); 2882 value = 1.0; 2883 } 2884 else { 2885 SV * const sv = POPs; 2886 if(!sv) 2887 value = 1.0; 2888 else 2889 value = SvNV(sv); 2890 } 2891 /* 1 of 2 things can be carried through SvNV, SP or TARG, SP was carried */ 2892 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) 2893 if (! Perl_isnan(value) && value == 0.0) 2894 #else 2895 if (value == 0.0) 2896 #endif 2897 value = 1.0; 2898 { 2899 dTARGET; 2900 PUSHs(TARG); 2901 PUTBACK; 2902 value *= Drand01(); 2903 sv_setnv_mg(TARG, value); 2904 } 2905 } 2906 return NORMAL; 2907 } 2908 2909 PP(pp_srand) 2910 { 2911 dSP; dTARGET; 2912 UV anum; 2913 2914 if (MAXARG >= 1 && (TOPs || POPs)) { 2915 SV *top; 2916 char *pv; 2917 STRLEN len; 2918 int flags; 2919 2920 top = POPs; 2921 pv = SvPV(top, len); 2922 flags = grok_number(pv, len, &anum); 2923 2924 if (!(flags & IS_NUMBER_IN_UV)) { 2925 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 2926 "Integer overflow in srand"); 2927 anum = UV_MAX; 2928 } 2929 (void)srand48_deterministic((Rand_seed_t)anum); 2930 } 2931 else { 2932 anum = seed(); 2933 (void)seedDrand01((Rand_seed_t)anum); 2934 } 2935 2936 PL_srand_called = TRUE; 2937 if (anum) 2938 XPUSHu(anum); 2939 else { 2940 /* Historically srand always returned true. We can avoid breaking 2941 that like this: */ 2942 sv_setpvs(TARG, "0 but true"); 2943 XPUSHTARG; 2944 } 2945 RETURN; 2946 } 2947 2948 PP(pp_int) 2949 { 2950 dSP; dTARGET; 2951 tryAMAGICun_MG(int_amg, AMGf_numeric); 2952 { 2953 SV * const sv = TOPs; 2954 const IV iv = SvIV_nomg(sv); 2955 /* XXX it's arguable that compiler casting to IV might be subtly 2956 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which 2957 else preferring IV has introduced a subtle behaviour change bug. OTOH 2958 relying on floating point to be accurate is a bug. */ 2959 2960 if (!SvOK(sv)) { 2961 SETu(0); 2962 } 2963 else if (SvIOK(sv)) { 2964 if (SvIsUV(sv)) 2965 SETu(SvUV_nomg(sv)); 2966 else 2967 SETi(iv); 2968 } 2969 else { 2970 const NV value = SvNV_nomg(sv); 2971 if (UNLIKELY(Perl_isinfnan(value))) 2972 SETn(value); 2973 else if (value >= 0.0) { 2974 if (value < (NV)UV_MAX + 0.5) { 2975 SETu(U_V(value)); 2976 } else { 2977 SETn(Perl_floor(value)); 2978 } 2979 } 2980 else { 2981 if (value > (NV)IV_MIN - 0.5) { 2982 SETi(I_V(value)); 2983 } else { 2984 SETn(Perl_ceil(value)); 2985 } 2986 } 2987 } 2988 } 2989 return NORMAL; 2990 } 2991 2992 PP(pp_abs) 2993 { 2994 dSP; dTARGET; 2995 tryAMAGICun_MG(abs_amg, AMGf_numeric); 2996 { 2997 SV * const sv = TOPs; 2998 /* This will cache the NV value if string isn't actually integer */ 2999 const IV iv = SvIV_nomg(sv); 3000 3001 if (!SvOK(sv)) { 3002 SETu(0); 3003 } 3004 else if (SvIOK(sv)) { 3005 /* IVX is precise */ 3006 if (SvIsUV(sv)) { 3007 SETu(SvUV_nomg(sv)); /* force it to be numeric only */ 3008 } else { 3009 if (iv >= 0) { 3010 SETi(iv); 3011 } else { 3012 if (iv != IV_MIN) { 3013 SETi(-iv); 3014 } else { 3015 /* 2s complement assumption. Also, not really needed as 3016 IV_MIN and -IV_MIN should both be %100...00 and NV-able */ 3017 SETu((UV)IV_MIN); 3018 } 3019 } 3020 } 3021 } else{ 3022 const NV value = SvNV_nomg(sv); 3023 if (value < 0.0) 3024 SETn(-value); 3025 else 3026 SETn(value); 3027 } 3028 } 3029 return NORMAL; 3030 } 3031 3032 3033 /* also used for: pp_hex() */ 3034 3035 PP(pp_oct) 3036 { 3037 dSP; dTARGET; 3038 const char *tmps; 3039 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES; 3040 STRLEN len; 3041 NV result_nv; 3042 UV result_uv; 3043 SV* const sv = TOPs; 3044 3045 tmps = (SvPV_const(sv, len)); 3046 if (DO_UTF8(sv)) { 3047 /* If Unicode, try to downgrade 3048 * If not possible, croak. */ 3049 SV* const tsv = sv_2mortal(newSVsv(sv)); 3050 3051 SvUTF8_on(tsv); 3052 sv_utf8_downgrade(tsv, FALSE); 3053 tmps = SvPV_const(tsv, len); 3054 } 3055 if (PL_op->op_type == OP_HEX) 3056 goto hex; 3057 3058 while (*tmps && len && isSPACE(*tmps)) 3059 tmps++, len--; 3060 if (*tmps == '0') 3061 tmps++, len--; 3062 if (isALPHA_FOLD_EQ(*tmps, 'x')) { 3063 tmps++, len--; 3064 flags |= PERL_SCAN_DISALLOW_PREFIX; 3065 hex: 3066 result_uv = grok_hex (tmps, &len, &flags, &result_nv); 3067 } 3068 else if (isALPHA_FOLD_EQ(*tmps, 'b')) { 3069 tmps++, len--; 3070 flags |= PERL_SCAN_DISALLOW_PREFIX; 3071 result_uv = grok_bin (tmps, &len, &flags, &result_nv); 3072 } 3073 else 3074 result_uv = grok_oct (tmps, &len, &flags, &result_nv); 3075 3076 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) { 3077 SETn(result_nv); 3078 } 3079 else { 3080 SETu(result_uv); 3081 } 3082 return NORMAL; 3083 } 3084 3085 /* String stuff. */ 3086 3087 3088 PP(pp_length) 3089 { 3090 dSP; dTARGET; 3091 SV * const sv = TOPs; 3092 3093 U32 in_bytes = IN_BYTES; 3094 /* Simplest case shortcut: 3095 * set svflags to just the SVf_POK|SVs_GMG|SVf_UTF8 from the SV, 3096 * with the SVf_UTF8 flag inverted if under 'use bytes' (HINT_BYTES 3097 * set) 3098 */ 3099 U32 svflags = (SvFLAGS(sv) ^ (in_bytes << 26)) & (SVf_POK|SVs_GMG|SVf_UTF8); 3100 3101 STATIC_ASSERT_STMT(SVf_UTF8 == (HINT_BYTES << 26)); 3102 SETs(TARG); 3103 3104 if (LIKELY(svflags == SVf_POK)) 3105 goto simple_pv; 3106 3107 if (svflags & SVs_GMG) 3108 mg_get(sv); 3109 3110 if (SvOK(sv)) { 3111 STRLEN len; 3112 if (!IN_BYTES) { /* reread to avoid using an C auto/register */ 3113 if ((SvFLAGS(sv) & (SVf_POK|SVf_UTF8)) == SVf_POK) 3114 goto simple_pv; 3115 if ( SvPOK(sv) && (PL_op->op_private & OPpTRUEBOOL)) { 3116 /* no need to convert from bytes to chars */ 3117 len = SvCUR(sv); 3118 goto return_bool; 3119 } 3120 len = sv_len_utf8_nomg(sv); 3121 } 3122 else { 3123 /* unrolled SvPV_nomg_const(sv,len) */ 3124 if (SvPOK_nog(sv)) { 3125 simple_pv: 3126 len = SvCUR(sv); 3127 if (PL_op->op_private & OPpTRUEBOOL) { 3128 return_bool: 3129 SETs(len ? &PL_sv_yes : &PL_sv_zero); 3130 return NORMAL; 3131 } 3132 } 3133 else { 3134 (void)sv_2pv_flags(sv, &len, 0|SV_CONST_RETURN); 3135 } 3136 } 3137 TARGi((IV)(len), 1); 3138 } 3139 else { 3140 if (!SvPADTMP(TARG)) { 3141 /* OPpTARGET_MY: targ is var in '$lex = length()' */ 3142 sv_set_undef(TARG); 3143 SvSETMAGIC(TARG); 3144 } 3145 else 3146 /* TARG is on stack at this point and is overwriten by SETs. 3147 * This branch is the odd one out, so put TARG by default on 3148 * stack earlier to let local SP go out of liveness sooner */ 3149 SETs(&PL_sv_undef); 3150 } 3151 return NORMAL; /* no putback, SP didn't move in this opcode */ 3152 } 3153 3154 3155 /* Returns false if substring is completely outside original string. 3156 No length is indicated by len_iv = 0 and len_is_uv = 0. len_is_uv must 3157 always be true for an explicit 0. 3158 */ 3159 bool 3160 Perl_translate_substr_offsets( STRLEN curlen, IV pos1_iv, 3161 bool pos1_is_uv, IV len_iv, 3162 bool len_is_uv, STRLEN *posp, 3163 STRLEN *lenp) 3164 { 3165 IV pos2_iv; 3166 int pos2_is_uv; 3167 3168 PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS; 3169 3170 if (!pos1_is_uv && pos1_iv < 0 && curlen) { 3171 pos1_is_uv = curlen-1 > ~(UV)pos1_iv; 3172 pos1_iv += curlen; 3173 } 3174 if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen) 3175 return FALSE; 3176 3177 if (len_iv || len_is_uv) { 3178 if (!len_is_uv && len_iv < 0) { 3179 pos2_iv = curlen + len_iv; 3180 if (curlen) 3181 pos2_is_uv = curlen-1 > ~(UV)len_iv; 3182 else 3183 pos2_is_uv = 0; 3184 } else { /* len_iv >= 0 */ 3185 if (!pos1_is_uv && pos1_iv < 0) { 3186 pos2_iv = pos1_iv + len_iv; 3187 pos2_is_uv = (UV)len_iv > (UV)IV_MAX; 3188 } else { 3189 if ((UV)len_iv > curlen-(UV)pos1_iv) 3190 pos2_iv = curlen; 3191 else 3192 pos2_iv = pos1_iv+len_iv; 3193 pos2_is_uv = 1; 3194 } 3195 } 3196 } 3197 else { 3198 pos2_iv = curlen; 3199 pos2_is_uv = 1; 3200 } 3201 3202 if (!pos2_is_uv && pos2_iv < 0) { 3203 if (!pos1_is_uv && pos1_iv < 0) 3204 return FALSE; 3205 pos2_iv = 0; 3206 } 3207 else if (!pos1_is_uv && pos1_iv < 0) 3208 pos1_iv = 0; 3209 3210 if ((UV)pos2_iv < (UV)pos1_iv) 3211 pos2_iv = pos1_iv; 3212 if ((UV)pos2_iv > curlen) 3213 pos2_iv = curlen; 3214 3215 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */ 3216 *posp = (STRLEN)( (UV)pos1_iv ); 3217 *lenp = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv ); 3218 3219 return TRUE; 3220 } 3221 3222 PP(pp_substr) 3223 { 3224 dSP; dTARGET; 3225 SV *sv; 3226 STRLEN curlen; 3227 STRLEN utf8_curlen; 3228 SV * pos_sv; 3229 IV pos1_iv; 3230 int pos1_is_uv; 3231 SV * len_sv; 3232 IV len_iv = 0; 3233 int len_is_uv = 0; 3234 I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; 3235 const bool rvalue = (GIMME_V != G_VOID); 3236 const char *tmps; 3237 SV *repl_sv = NULL; 3238 const char *repl = NULL; 3239 STRLEN repl_len; 3240 int num_args = PL_op->op_private & 7; 3241 bool repl_need_utf8_upgrade = FALSE; 3242 3243 if (num_args > 2) { 3244 if (num_args > 3) { 3245 if(!(repl_sv = POPs)) num_args--; 3246 } 3247 if ((len_sv = POPs)) { 3248 len_iv = SvIV(len_sv); 3249 len_is_uv = len_iv ? SvIOK_UV(len_sv) : 1; 3250 } 3251 else num_args--; 3252 } 3253 pos_sv = POPs; 3254 pos1_iv = SvIV(pos_sv); 3255 pos1_is_uv = SvIOK_UV(pos_sv); 3256 sv = POPs; 3257 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST) { 3258 assert(!repl_sv); 3259 repl_sv = POPs; 3260 } 3261 if (lvalue && !repl_sv) { 3262 SV * ret; 3263 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ 3264 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0); 3265 LvTYPE(ret) = 'x'; 3266 LvTARG(ret) = SvREFCNT_inc_simple(sv); 3267 LvTARGOFF(ret) = 3268 pos1_is_uv || pos1_iv >= 0 3269 ? (STRLEN)(UV)pos1_iv 3270 : (LvFLAGS(ret) |= LVf_NEG_OFF, (STRLEN)(UV)-pos1_iv); 3271 LvTARGLEN(ret) = 3272 len_is_uv || len_iv > 0 3273 ? (STRLEN)(UV)len_iv 3274 : (LvFLAGS(ret) |= LVf_NEG_LEN, (STRLEN)(UV)-len_iv); 3275 3276 PUSHs(ret); /* avoid SvSETMAGIC here */ 3277 RETURN; 3278 } 3279 if (repl_sv) { 3280 repl = SvPV_const(repl_sv, repl_len); 3281 SvGETMAGIC(sv); 3282 if (SvROK(sv)) 3283 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), 3284 "Attempt to use reference as lvalue in substr" 3285 ); 3286 tmps = SvPV_force_nomg(sv, curlen); 3287 if (DO_UTF8(repl_sv) && repl_len) { 3288 if (!DO_UTF8(sv)) { 3289 /* Upgrade the dest, and recalculate tmps in case the buffer 3290 * got reallocated; curlen may also have been changed */ 3291 sv_utf8_upgrade_nomg(sv); 3292 tmps = SvPV_nomg(sv, curlen); 3293 } 3294 } 3295 else if (DO_UTF8(sv)) 3296 repl_need_utf8_upgrade = TRUE; 3297 } 3298 else tmps = SvPV_const(sv, curlen); 3299 if (DO_UTF8(sv)) { 3300 utf8_curlen = sv_or_pv_len_utf8(sv, tmps, curlen); 3301 if (utf8_curlen == curlen) 3302 utf8_curlen = 0; 3303 else 3304 curlen = utf8_curlen; 3305 } 3306 else 3307 utf8_curlen = 0; 3308 3309 { 3310 STRLEN pos, len, byte_len, byte_pos; 3311 3312 if (!translate_substr_offsets( 3313 curlen, pos1_iv, pos1_is_uv, len_iv, len_is_uv, &pos, &len 3314 )) goto bound_fail; 3315 3316 byte_len = len; 3317 byte_pos = utf8_curlen 3318 ? sv_or_pv_pos_u2b(sv, tmps, pos, &byte_len) : pos; 3319 3320 tmps += byte_pos; 3321 3322 if (rvalue) { 3323 SvTAINTED_off(TARG); /* decontaminate */ 3324 SvUTF8_off(TARG); /* decontaminate */ 3325 sv_setpvn(TARG, tmps, byte_len); 3326 #ifdef USE_LOCALE_COLLATE 3327 sv_unmagic(TARG, PERL_MAGIC_collxfrm); 3328 #endif 3329 if (utf8_curlen) 3330 SvUTF8_on(TARG); 3331 } 3332 3333 if (repl) { 3334 SV* repl_sv_copy = NULL; 3335 3336 if (repl_need_utf8_upgrade) { 3337 repl_sv_copy = newSVsv(repl_sv); 3338 sv_utf8_upgrade(repl_sv_copy); 3339 repl = SvPV_const(repl_sv_copy, repl_len); 3340 } 3341 if (!SvOK(sv)) 3342 SvPVCLEAR(sv); 3343 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0); 3344 SvREFCNT_dec(repl_sv_copy); 3345 } 3346 } 3347 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST) 3348 SP++; 3349 else if (rvalue) { 3350 SvSETMAGIC(TARG); 3351 PUSHs(TARG); 3352 } 3353 RETURN; 3354 3355 bound_fail: 3356 if (repl) 3357 Perl_croak(aTHX_ "substr outside of string"); 3358 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string"); 3359 RETPUSHUNDEF; 3360 } 3361 3362 PP(pp_vec) 3363 { 3364 dSP; 3365 const IV size = POPi; 3366 SV* offsetsv = POPs; 3367 SV * const src = POPs; 3368 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; 3369 SV * ret; 3370 UV retuv; 3371 STRLEN offset = 0; 3372 char errflags = 0; 3373 3374 /* extract a STRLEN-ranged integer value from offsetsv into offset, 3375 * or flag that its out of range */ 3376 { 3377 IV iv = SvIV(offsetsv); 3378 3379 /* avoid a large UV being wrapped to a negative value */ 3380 if (SvIOK_UV(offsetsv) && SvUVX(offsetsv) > (UV)IV_MAX) 3381 errflags = LVf_OUT_OF_RANGE; 3382 else if (iv < 0) 3383 errflags = (LVf_NEG_OFF|LVf_OUT_OF_RANGE); 3384 #if PTRSIZE < IVSIZE 3385 else if (iv > Size_t_MAX) 3386 errflags = LVf_OUT_OF_RANGE; 3387 #endif 3388 else 3389 offset = (STRLEN)iv; 3390 } 3391 3392 retuv = errflags ? 0 : do_vecget(src, offset, size); 3393 3394 if (lvalue) { /* it's an lvalue! */ 3395 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ 3396 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0); 3397 LvTYPE(ret) = 'v'; 3398 LvTARG(ret) = SvREFCNT_inc_simple(src); 3399 LvTARGOFF(ret) = offset; 3400 LvTARGLEN(ret) = size; 3401 LvFLAGS(ret) = errflags; 3402 } 3403 else { 3404 dTARGET; 3405 SvTAINTED_off(TARG); /* decontaminate */ 3406 ret = TARG; 3407 } 3408 3409 sv_setuv(ret, retuv); 3410 if (!lvalue) 3411 SvSETMAGIC(ret); 3412 PUSHs(ret); 3413 RETURN; 3414 } 3415 3416 3417 /* also used for: pp_rindex() */ 3418 3419 PP(pp_index) 3420 { 3421 dSP; dTARGET; 3422 SV *big; 3423 SV *little; 3424 SV *temp = NULL; 3425 STRLEN biglen; 3426 STRLEN llen = 0; 3427 SSize_t offset = 0; 3428 SSize_t retval; 3429 const char *big_p; 3430 const char *little_p; 3431 bool big_utf8; 3432 bool little_utf8; 3433 const bool is_index = PL_op->op_type == OP_INDEX; 3434 const bool threeargs = MAXARG >= 3 && (TOPs || ((void)POPs,0)); 3435 3436 if (threeargs) 3437 offset = POPi; 3438 little = POPs; 3439 big = POPs; 3440 big_p = SvPV_const(big, biglen); 3441 little_p = SvPV_const(little, llen); 3442 3443 big_utf8 = DO_UTF8(big); 3444 little_utf8 = DO_UTF8(little); 3445 if (big_utf8 ^ little_utf8) { 3446 /* One needs to be upgraded. */ 3447 if (little_utf8) { 3448 /* Well, maybe instead we might be able to downgrade the small 3449 string? */ 3450 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen, 3451 &little_utf8); 3452 if (little_utf8) { 3453 /* If the large string is ISO-8859-1, and it's not possible to 3454 convert the small string to ISO-8859-1, then there is no 3455 way that it could be found anywhere by index. */ 3456 retval = -1; 3457 goto push_result; 3458 } 3459 3460 /* At this point, pv is a malloc()ed string. So donate it to temp 3461 to ensure it will get free()d */ 3462 little = temp = newSV(0); 3463 sv_usepvn(temp, pv, llen); 3464 little_p = SvPVX(little); 3465 } else { 3466 temp = newSVpvn(little_p, llen); 3467 3468 sv_utf8_upgrade(temp); 3469 little = temp; 3470 little_p = SvPV_const(little, llen); 3471 } 3472 } 3473 if (SvGAMAGIC(big)) { 3474 /* Life just becomes a lot easier if I use a temporary here. 3475 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously) 3476 will trigger magic and overloading again, as will fbm_instr() 3477 */ 3478 big = newSVpvn_flags(big_p, biglen, 3479 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0)); 3480 big_p = SvPVX(big); 3481 } 3482 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) { 3483 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will 3484 warn on undef, and we've already triggered a warning with the 3485 SvPV_const some lines above. We can't remove that, as we need to 3486 call some SvPV to trigger overloading early and find out if the 3487 string is UTF-8. 3488 This is all getting too messy. The API isn't quite clean enough, 3489 because data access has side effects. 3490 */ 3491 little = newSVpvn_flags(little_p, llen, 3492 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0)); 3493 little_p = SvPVX(little); 3494 } 3495 3496 if (!threeargs) 3497 offset = is_index ? 0 : biglen; 3498 else { 3499 if (big_utf8 && offset > 0) 3500 offset = sv_pos_u2b_flags(big, offset, 0, SV_CONST_RETURN); 3501 if (!is_index) 3502 offset += llen; 3503 } 3504 if (offset < 0) 3505 offset = 0; 3506 else if (offset > (SSize_t)biglen) 3507 offset = biglen; 3508 if (!(little_p = is_index 3509 ? fbm_instr((unsigned char*)big_p + offset, 3510 (unsigned char*)big_p + biglen, little, 0) 3511 : rninstr(big_p, big_p + offset, 3512 little_p, little_p + llen))) 3513 retval = -1; 3514 else { 3515 retval = little_p - big_p; 3516 if (retval > 1 && big_utf8) 3517 retval = sv_pos_b2u_flags(big, retval, SV_CONST_RETURN); 3518 } 3519 SvREFCNT_dec(temp); 3520 3521 push_result: 3522 /* OPpTRUEBOOL indicates an '== -1' has been optimised away */ 3523 if (PL_op->op_private & OPpTRUEBOOL) { 3524 SV *result = ((retval != -1) ^ cBOOL(PL_op->op_private & OPpINDEX_BOOLNEG)) 3525 ? &PL_sv_yes : &PL_sv_no; 3526 if (PL_op->op_private & OPpTARGET_MY) { 3527 /* $lex = (index() == -1) */ 3528 sv_setsv_mg(TARG, result); 3529 PUSHs(TARG); 3530 } 3531 else { 3532 PUSHs(result); 3533 } 3534 } 3535 else 3536 PUSHi(retval); 3537 RETURN; 3538 } 3539 3540 PP(pp_sprintf) 3541 { 3542 dSP; dMARK; dORIGMARK; dTARGET; 3543 SvTAINTED_off(TARG); 3544 do_sprintf(TARG, SP-MARK, MARK+1); 3545 TAINT_IF(SvTAINTED(TARG)); 3546 SP = ORIGMARK; 3547 PUSHTARG; 3548 RETURN; 3549 } 3550 3551 PP(pp_ord) 3552 { 3553 dSP; dTARGET; 3554 3555 SV *argsv = TOPs; 3556 STRLEN len; 3557 const U8 *s = (U8*)SvPV_const(argsv, len); 3558 3559 SETu(DO_UTF8(argsv) 3560 ? (len ? utf8n_to_uvchr(s, len, 0, UTF8_ALLOW_ANYUV) : 0) 3561 : (UV)(*s)); 3562 3563 return NORMAL; 3564 } 3565 3566 PP(pp_chr) 3567 { 3568 dSP; dTARGET; 3569 char *tmps; 3570 UV value; 3571 SV *top = TOPs; 3572 3573 SvGETMAGIC(top); 3574 if (UNLIKELY(SvAMAGIC(top))) 3575 top = sv_2num(top); 3576 if (UNLIKELY(isinfnansv(top))) 3577 Perl_croak(aTHX_ "Cannot chr %" NVgf, SvNV(top)); 3578 else { 3579 if (!IN_BYTES /* under bytes, chr(-1) eq chr(0xff), etc. */ 3580 && ((SvIOKp(top) && !SvIsUV(top) && SvIV_nomg(top) < 0) 3581 || 3582 ((SvNOKp(top) || (SvOK(top) && !SvIsUV(top))) 3583 && SvNV_nomg(top) < 0.0))) 3584 { 3585 if (ckWARN(WARN_UTF8)) { 3586 if (SvGMAGICAL(top)) { 3587 SV *top2 = sv_newmortal(); 3588 sv_setsv_nomg(top2, top); 3589 top = top2; 3590 } 3591 Perl_warner(aTHX_ packWARN(WARN_UTF8), 3592 "Invalid negative number (%" SVf ") in chr", SVfARG(top)); 3593 } 3594 value = UNICODE_REPLACEMENT; 3595 } else { 3596 value = SvUV_nomg(top); 3597 } 3598 } 3599 3600 SvUPGRADE(TARG,SVt_PV); 3601 3602 if (value > 255 && !IN_BYTES) { 3603 SvGROW(TARG, (STRLEN)UVCHR_SKIP(value)+1); 3604 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0); 3605 SvCUR_set(TARG, tmps - SvPVX_const(TARG)); 3606 *tmps = '\0'; 3607 (void)SvPOK_only(TARG); 3608 SvUTF8_on(TARG); 3609 SETTARG; 3610 return NORMAL; 3611 } 3612 3613 SvGROW(TARG,2); 3614 SvCUR_set(TARG, 1); 3615 tmps = SvPVX(TARG); 3616 *tmps++ = (char)value; 3617 *tmps = '\0'; 3618 (void)SvPOK_only(TARG); 3619 3620 SETTARG; 3621 return NORMAL; 3622 } 3623 3624 PP(pp_crypt) 3625 { 3626 #ifdef HAS_CRYPT 3627 dSP; dTARGET; 3628 dPOPTOPssrl; 3629 STRLEN len; 3630 const char *tmps = SvPV_const(left, len); 3631 3632 if (DO_UTF8(left)) { 3633 /* If Unicode, try to downgrade. 3634 * If not possible, croak. 3635 * Yes, we made this up. */ 3636 SV* const tsv = newSVpvn_flags(tmps, len, SVf_UTF8|SVs_TEMP); 3637 3638 sv_utf8_downgrade(tsv, FALSE); 3639 tmps = SvPV_const(tsv, len); 3640 } 3641 # ifdef USE_ITHREADS 3642 # ifdef HAS_CRYPT_R 3643 if (!PL_reentrant_buffer->_crypt_struct_buffer) { 3644 /* This should be threadsafe because in ithreads there is only 3645 * one thread per interpreter. If this would not be true, 3646 * we would need a mutex to protect this malloc. */ 3647 PL_reentrant_buffer->_crypt_struct_buffer = 3648 (struct crypt_data *)safemalloc(sizeof(struct crypt_data)); 3649 # if defined(__GLIBC__) || defined(__EMX__) 3650 if (PL_reentrant_buffer->_crypt_struct_buffer) { 3651 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0; 3652 } 3653 # endif 3654 } 3655 # endif /* HAS_CRYPT_R */ 3656 # endif /* USE_ITHREADS */ 3657 3658 # ifdef FCRYPT 3659 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right))); 3660 # else 3661 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right))); 3662 # endif 3663 SvUTF8_off(TARG); 3664 SETTARG; 3665 RETURN; 3666 #else 3667 DIE(aTHX_ 3668 "The crypt() function is unimplemented due to excessive paranoia."); 3669 #endif 3670 } 3671 3672 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So 3673 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */ 3674 3675 3676 /* also used for: pp_lcfirst() */ 3677 3678 PP(pp_ucfirst) 3679 { 3680 /* Actually is both lcfirst() and ucfirst(). Only the first character 3681 * changes. This means that possibly we can change in-place, ie., just 3682 * take the source and change that one character and store it back, but not 3683 * if read-only etc, or if the length changes */ 3684 3685 dSP; 3686 SV *source = TOPs; 3687 STRLEN slen; /* slen is the byte length of the whole SV. */ 3688 STRLEN need; 3689 SV *dest; 3690 bool inplace; /* ? Convert first char only, in-place */ 3691 bool doing_utf8 = FALSE; /* ? using utf8 */ 3692 bool convert_source_to_utf8 = FALSE; /* ? need to convert */ 3693 const int op_type = PL_op->op_type; 3694 const U8 *s; 3695 U8 *d; 3696 U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; 3697 STRLEN ulen; /* ulen is the byte length of the original Unicode character 3698 * stored as UTF-8 at s. */ 3699 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or 3700 * lowercased) character stored in tmpbuf. May be either 3701 * UTF-8 or not, but in either case is the number of bytes */ 3702 bool remove_dot_above = FALSE; 3703 3704 s = (const U8*)SvPV_const(source, slen); 3705 3706 /* We may be able to get away with changing only the first character, in 3707 * place, but not if read-only, etc. Later we may discover more reasons to 3708 * not convert in-place. */ 3709 inplace = !SvREADONLY(source) && SvPADTMP(source); 3710 3711 #ifdef USE_LOCALE_CTYPE 3712 3713 if (IN_LC_RUNTIME(LC_CTYPE)) { 3714 _CHECK_AND_WARN_PROBLEMATIC_LOCALE; 3715 } 3716 3717 #endif 3718 3719 /* First calculate what the changed first character should be. This affects 3720 * whether we can just swap it out, leaving the rest of the string unchanged, 3721 * or even if have to convert the dest to UTF-8 when the source isn't */ 3722 3723 if (! slen) { /* If empty */ 3724 need = 1; /* still need a trailing NUL */ 3725 ulen = 0; 3726 *tmpbuf = '\0'; 3727 } 3728 else if (DO_UTF8(source)) { /* Is the source utf8? */ 3729 doing_utf8 = TRUE; 3730 ulen = UTF8SKIP(s); 3731 3732 if (op_type == OP_UCFIRST) { 3733 #ifdef USE_LOCALE_CTYPE 3734 _toTITLE_utf8_flags(s, s +slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE)); 3735 #else 3736 _toTITLE_utf8_flags(s, s +slen, tmpbuf, &tculen, 0); 3737 #endif 3738 } 3739 else { 3740 3741 #ifdef USE_LOCALE_CTYPE 3742 3743 _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE)); 3744 3745 /* In turkic locales, lower casing an 'I' normally yields U+0131, 3746 * LATIN SMALL LETTER DOTLESS I, but not if the grapheme also 3747 * contains a COMBINING DOT ABOVE. Instead it is treated like 3748 * LATIN CAPITAL LETTER I WITH DOT ABOVE lowercased to 'i'. The 3749 * call to lowercase above has handled this. But SpecialCasing.txt 3750 * says we are supposed to remove the COMBINING DOT ABOVE. We can 3751 * tell if we have this situation if I ==> i in a turkic locale. */ 3752 if ( UNLIKELY(PL_in_utf8_turkic_locale) 3753 && IN_LC_RUNTIME(LC_CTYPE) 3754 && (UNLIKELY(*s == 'I' && tmpbuf[0] == 'i'))) 3755 { 3756 /* Here, we know there was a COMBINING DOT ABOVE. We won't be 3757 * able to handle this in-place. */ 3758 inplace = FALSE; 3759 3760 /* It seems likely that the DOT will immediately follow the 3761 * 'I'. If so, we can remove it simply by indicating to the 3762 * code below to start copying the source just beyond the DOT. 3763 * We know its length is 2 */ 3764 if (LIKELY(memBEGINs(s + 1, s + slen, COMBINING_DOT_ABOVE_UTF8))) { 3765 ulen += 2; 3766 } 3767 else { /* But if it doesn't follow immediately, set a flag for 3768 the code below */ 3769 remove_dot_above = TRUE; 3770 } 3771 } 3772 #else 3773 PERL_UNUSED_VAR(remove_dot_above); 3774 3775 _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, 0); 3776 #endif 3777 3778 } 3779 3780 /* we can't do in-place if the length changes. */ 3781 if (ulen != tculen) inplace = FALSE; 3782 need = slen + 1 - ulen + tculen; 3783 } 3784 else { /* Non-zero length, non-UTF-8, Need to consider locale and if 3785 * latin1 is treated as caseless. Note that a locale takes 3786 * precedence */ 3787 ulen = 1; /* Original character is 1 byte */ 3788 tculen = 1; /* Most characters will require one byte, but this will 3789 * need to be overridden for the tricky ones */ 3790 need = slen + 1; 3791 3792 3793 #ifdef USE_LOCALE_CTYPE 3794 3795 if (IN_LC_RUNTIME(LC_CTYPE)) { 3796 if ( UNLIKELY(PL_in_utf8_turkic_locale) 3797 && ( (op_type == OP_LCFIRST && UNLIKELY(*s == 'I')) 3798 || (op_type == OP_UCFIRST && UNLIKELY(*s == 'i')))) 3799 { 3800 if (*s == 'I') { /* lcfirst('I') */ 3801 tmpbuf[0] = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I); 3802 tmpbuf[1] = UTF8_TWO_BYTE_LO(LATIN_SMALL_LETTER_DOTLESS_I); 3803 } 3804 else { /* ucfirst('i') */ 3805 tmpbuf[0] = UTF8_TWO_BYTE_HI(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE); 3806 tmpbuf[1] = UTF8_TWO_BYTE_LO(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE); 3807 } 3808 tculen = 2; 3809 inplace = FALSE; 3810 doing_utf8 = TRUE; 3811 convert_source_to_utf8 = TRUE; 3812 need += variant_under_utf8_count(s, s + slen); 3813 } 3814 else if (op_type == OP_LCFIRST) { 3815 3816 /* For lc, there are no gotchas for UTF-8 locales (other than 3817 * the turkish ones already handled above) */ 3818 *tmpbuf = toLOWER_LC(*s); 3819 } 3820 else { /* ucfirst */ 3821 3822 /* But for uc, some characters require special handling */ 3823 if (IN_UTF8_CTYPE_LOCALE) { 3824 goto do_uni_rules; 3825 } 3826 3827 /* This would be a bug if any locales have upper and title case 3828 * different */ 3829 *tmpbuf = (U8) toUPPER_LC(*s); 3830 } 3831 } 3832 else 3833 #endif 3834 /* Here, not in locale. If not using Unicode rules, is a simple 3835 * lower/upper, depending */ 3836 if (! IN_UNI_8_BIT) { 3837 *tmpbuf = (op_type == OP_LCFIRST) 3838 ? toLOWER(*s) 3839 : toUPPER(*s); 3840 } 3841 else if (op_type == OP_LCFIRST) { 3842 /* lower case the first letter: no trickiness for any character */ 3843 *tmpbuf = toLOWER_LATIN1(*s); 3844 } 3845 else { 3846 /* Here, is ucfirst non-UTF-8, not in locale (unless that locale is 3847 * non-turkic UTF-8, which we treat as not in locale), and cased 3848 * latin1 */ 3849 UV title_ord; 3850 #ifdef USE_LOCALE_CTYPE 3851 do_uni_rules: 3852 #endif 3853 3854 title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's'); 3855 if (tculen > 1) { 3856 assert(tculen == 2); 3857 3858 /* If the result is an upper Latin1-range character, it can 3859 * still be represented in one byte, which is its ordinal */ 3860 if (UTF8_IS_DOWNGRADEABLE_START(*tmpbuf)) { 3861 *tmpbuf = (U8) title_ord; 3862 tculen = 1; 3863 } 3864 else { 3865 /* Otherwise it became more than one ASCII character (in 3866 * the case of LATIN_SMALL_LETTER_SHARP_S) or changed to 3867 * beyond Latin1, so the number of bytes changed, so can't 3868 * replace just the first character in place. */ 3869 inplace = FALSE; 3870 3871 /* If the result won't fit in a byte, the entire result 3872 * will have to be in UTF-8. Allocate enough space for the 3873 * expanded first byte, and if UTF-8, the rest of the input 3874 * string, some or all of which may also expand to two 3875 * bytes, plus the terminating NUL. */ 3876 if (title_ord > 255) { 3877 doing_utf8 = TRUE; 3878 convert_source_to_utf8 = TRUE; 3879 need = slen 3880 + variant_under_utf8_count(s, s + slen) 3881 + 1; 3882 3883 /* The (converted) UTF-8 and UTF-EBCDIC lengths of all 3884 * characters whose title case is above 255 is 3885 * 2. */ 3886 ulen = 2; 3887 } 3888 else { /* LATIN_SMALL_LETTER_SHARP_S expands by 1 byte */ 3889 need = slen + 1 + 1; 3890 } 3891 } 3892 } 3893 } /* End of use Unicode (Latin1) semantics */ 3894 } /* End of changing the case of the first character */ 3895 3896 /* Here, have the first character's changed case stored in tmpbuf. Ready to 3897 * generate the result */ 3898 if (inplace) { 3899 3900 /* We can convert in place. This means we change just the first 3901 * character without disturbing the rest; no need to grow */ 3902 dest = source; 3903 s = d = (U8*)SvPV_force_nomg(source, slen); 3904 } else { 3905 dTARGET; 3906 3907 dest = TARG; 3908 3909 /* Here, we can't convert in place; we earlier calculated how much 3910 * space we will need, so grow to accommodate that */ 3911 SvUPGRADE(dest, SVt_PV); 3912 d = (U8*)SvGROW(dest, need); 3913 (void)SvPOK_only(dest); 3914 3915 SETs(dest); 3916 } 3917 3918 if (doing_utf8) { 3919 if (! inplace) { 3920 if (! convert_source_to_utf8) { 3921 3922 /* Here both source and dest are in UTF-8, but have to create 3923 * the entire output. We initialize the result to be the 3924 * title/lower cased first character, and then append the rest 3925 * of the string. */ 3926 sv_setpvn(dest, (char*)tmpbuf, tculen); 3927 if (slen > ulen) { 3928 3929 /* But this boolean being set means we are in a turkic 3930 * locale, and there is a DOT character that needs to be 3931 * removed, and it isn't immediately after the current 3932 * character. Keep concatenating characters to the output 3933 * one at a time, until we find the DOT, which we simply 3934 * skip */ 3935 if (UNLIKELY(remove_dot_above)) { 3936 do { 3937 Size_t this_len = UTF8SKIP(s + ulen); 3938 3939 sv_catpvn(dest, (char*)(s + ulen), this_len); 3940 3941 ulen += this_len; 3942 if (memBEGINs(s + ulen, s + slen, COMBINING_DOT_ABOVE_UTF8)) { 3943 ulen += 2; 3944 break; 3945 } 3946 } while (s + ulen < s + slen); 3947 } 3948 3949 /* The rest of the string can be concatenated unchanged, 3950 * all at once */ 3951 sv_catpvn(dest, (char*)(s + ulen), slen - ulen); 3952 } 3953 } 3954 else { 3955 const U8 *const send = s + slen; 3956 3957 /* Here the dest needs to be in UTF-8, but the source isn't, 3958 * except we earlier UTF-8'd the first character of the source 3959 * into tmpbuf. First put that into dest, and then append the 3960 * rest of the source, converting it to UTF-8 as we go. */ 3961 3962 /* Assert tculen is 2 here because the only characters that 3963 * get to this part of the code have 2-byte UTF-8 equivalents */ 3964 assert(tculen == 2); 3965 *d++ = *tmpbuf; 3966 *d++ = *(tmpbuf + 1); 3967 s++; /* We have just processed the 1st char */ 3968 3969 while (s < send) { 3970 append_utf8_from_native_byte(*s, &d); 3971 s++; 3972 } 3973 3974 *d = '\0'; 3975 SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); 3976 } 3977 SvUTF8_on(dest); 3978 } 3979 else { /* in-place UTF-8. Just overwrite the first character */ 3980 Copy(tmpbuf, d, tculen, U8); 3981 SvCUR_set(dest, need - 1); 3982 } 3983 3984 } 3985 else { /* Neither source nor dest are, nor need to be UTF-8 */ 3986 if (slen) { 3987 if (inplace) { /* in-place, only need to change the 1st char */ 3988 *d = *tmpbuf; 3989 } 3990 else { /* Not in-place */ 3991 3992 /* Copy the case-changed character(s) from tmpbuf */ 3993 Copy(tmpbuf, d, tculen, U8); 3994 d += tculen - 1; /* Code below expects d to point to final 3995 * character stored */ 3996 } 3997 } 3998 else { /* empty source */ 3999 /* See bug #39028: Don't taint if empty */ 4000 *d = *s; 4001 } 4002 4003 /* In a "use bytes" we don't treat the source as UTF-8, but, still want 4004 * the destination to retain that flag */ 4005 if (DO_UTF8(source)) 4006 SvUTF8_on(dest); 4007 4008 if (!inplace) { /* Finish the rest of the string, unchanged */ 4009 /* This will copy the trailing NUL */ 4010 Copy(s + 1, d + 1, slen, U8); 4011 SvCUR_set(dest, need - 1); 4012 } 4013 } 4014 #ifdef USE_LOCALE_CTYPE 4015 if (IN_LC_RUNTIME(LC_CTYPE)) { 4016 TAINT; 4017 SvTAINTED_on(dest); 4018 } 4019 #endif 4020 if (dest != source && SvTAINTED(source)) 4021 SvTAINT(dest); 4022 SvSETMAGIC(dest); 4023 return NORMAL; 4024 } 4025 4026 PP(pp_uc) 4027 { 4028 dVAR; 4029 dSP; 4030 SV *source = TOPs; 4031 STRLEN len; 4032 STRLEN min; 4033 SV *dest; 4034 const U8 *s; 4035 U8 *d; 4036 4037 SvGETMAGIC(source); 4038 4039 if ( SvPADTMP(source) 4040 && !SvREADONLY(source) && SvPOK(source) 4041 && !DO_UTF8(source) 4042 && ( 4043 #ifdef USE_LOCALE_CTYPE 4044 (IN_LC_RUNTIME(LC_CTYPE)) 4045 ? ! IN_UTF8_CTYPE_LOCALE 4046 : 4047 #endif 4048 ! IN_UNI_8_BIT)) 4049 { 4050 4051 /* We can convert in place. The reason we can't if in UNI_8_BIT is to 4052 * make the loop tight, so we overwrite the source with the dest before 4053 * looking at it, and we need to look at the original source 4054 * afterwards. There would also need to be code added to handle 4055 * switching to not in-place in midstream if we run into characters 4056 * that change the length. Since being in locale overrides UNI_8_BIT, 4057 * that latter becomes irrelevant in the above test; instead for 4058 * locale, the size can't normally change, except if the locale is a 4059 * UTF-8 one */ 4060 dest = source; 4061 s = d = (U8*)SvPV_force_nomg(source, len); 4062 min = len + 1; 4063 } else { 4064 dTARGET; 4065 4066 dest = TARG; 4067 4068 s = (const U8*)SvPV_nomg_const(source, len); 4069 min = len + 1; 4070 4071 SvUPGRADE(dest, SVt_PV); 4072 d = (U8*)SvGROW(dest, min); 4073 (void)SvPOK_only(dest); 4074 4075 SETs(dest); 4076 } 4077 4078 #ifdef USE_LOCALE_CTYPE 4079 4080 if (IN_LC_RUNTIME(LC_CTYPE)) { 4081 _CHECK_AND_WARN_PROBLEMATIC_LOCALE; 4082 } 4083 4084 #endif 4085 4086 /* Overloaded values may have toggled the UTF-8 flag on source, so we need 4087 to check DO_UTF8 again here. */ 4088 4089 if (DO_UTF8(source)) { 4090 const U8 *const send = s + len; 4091 U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; 4092 4093 #define GREEK_CAPITAL_LETTER_IOTA 0x0399 4094 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345 4095 /* All occurrences of these are to be moved to follow any other marks. 4096 * This is context-dependent. We may not be passed enough context to 4097 * move the iota subscript beyond all of them, but we do the best we can 4098 * with what we're given. The result is always better than if we 4099 * hadn't done this. And, the problem would only arise if we are 4100 * passed a character without all its combining marks, which would be 4101 * the caller's mistake. The information this is based on comes from a 4102 * comment in Unicode SpecialCasing.txt, (and the Standard's text 4103 * itself) and so can't be checked properly to see if it ever gets 4104 * revised. But the likelihood of it changing is remote */ 4105 bool in_iota_subscript = FALSE; 4106 4107 while (s < send) { 4108 STRLEN u; 4109 STRLEN ulen; 4110 UV uv; 4111 if (UNLIKELY(in_iota_subscript)) { 4112 UV cp = utf8_to_uvchr_buf(s, send, NULL); 4113 4114 if (! _invlist_contains_cp(PL_utf8_mark, cp)) { 4115 4116 /* A non-mark. Time to output the iota subscript */ 4117 *d++ = UTF8_TWO_BYTE_HI(GREEK_CAPITAL_LETTER_IOTA); 4118 *d++ = UTF8_TWO_BYTE_LO(GREEK_CAPITAL_LETTER_IOTA); 4119 in_iota_subscript = FALSE; 4120 } 4121 } 4122 4123 /* Then handle the current character. Get the changed case value 4124 * and copy it to the output buffer */ 4125 4126 u = UTF8SKIP(s); 4127 #ifdef USE_LOCALE_CTYPE 4128 uv = _toUPPER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE)); 4129 #else 4130 uv = _toUPPER_utf8_flags(s, send, tmpbuf, &ulen, 0); 4131 #endif 4132 if (uv == GREEK_CAPITAL_LETTER_IOTA 4133 && utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI) 4134 { 4135 in_iota_subscript = TRUE; 4136 } 4137 else { 4138 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { 4139 /* If the eventually required minimum size outgrows the 4140 * available space, we need to grow. */ 4141 const UV o = d - (U8*)SvPVX_const(dest); 4142 4143 /* If someone uppercases one million U+03B0s we SvGROW() 4144 * one million times. Or we could try guessing how much to 4145 * allocate without allocating too much. But we can't 4146 * really guess without examining the rest of the string. 4147 * Such is life. See corresponding comment in lc code for 4148 * another option */ 4149 d = o + (U8*) SvGROW(dest, min); 4150 } 4151 Copy(tmpbuf, d, ulen, U8); 4152 d += ulen; 4153 } 4154 s += u; 4155 } 4156 if (in_iota_subscript) { 4157 *d++ = UTF8_TWO_BYTE_HI(GREEK_CAPITAL_LETTER_IOTA); 4158 *d++ = UTF8_TWO_BYTE_LO(GREEK_CAPITAL_LETTER_IOTA); 4159 } 4160 SvUTF8_on(dest); 4161 *d = '\0'; 4162 4163 SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); 4164 } 4165 else { /* Not UTF-8 */ 4166 if (len) { 4167 const U8 *const send = s + len; 4168 4169 /* Use locale casing if in locale; regular style if not treating 4170 * latin1 as having case; otherwise the latin1 casing. Do the 4171 * whole thing in a tight loop, for speed, */ 4172 #ifdef USE_LOCALE_CTYPE 4173 if (IN_LC_RUNTIME(LC_CTYPE)) { 4174 if (IN_UTF8_CTYPE_LOCALE) { 4175 goto do_uni_rules; 4176 } 4177 for (; s < send; d++, s++) 4178 *d = (U8) toUPPER_LC(*s); 4179 } 4180 else 4181 #endif 4182 if (! IN_UNI_8_BIT) { 4183 for (; s < send; d++, s++) { 4184 *d = toUPPER(*s); 4185 } 4186 } 4187 else { 4188 #ifdef USE_LOCALE_CTYPE 4189 do_uni_rules: 4190 #endif 4191 for (; s < send; d++, s++) { 4192 Size_t extra; 4193 4194 *d = toUPPER_LATIN1_MOD(*s); 4195 if ( LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) 4196 4197 #ifdef USE_LOCALE_CTYPE 4198 4199 && (LIKELY( ! PL_in_utf8_turkic_locale 4200 || ! IN_LC_RUNTIME(LC_CTYPE)) 4201 || *s != 'i') 4202 #endif 4203 4204 ) { 4205 continue; 4206 } 4207 4208 /* The mainstream case is the tight loop above. To avoid 4209 * extra tests in that, all three characters that always 4210 * require special handling are mapped by the MOD to the 4211 * one tested just above. Use the source to distinguish 4212 * between those cases */ 4213 4214 #if UNICODE_MAJOR_VERSION > 2 \ 4215 || (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1 \ 4216 && UNICODE_DOT_DOT_VERSION >= 8) 4217 if (*s == LATIN_SMALL_LETTER_SHARP_S) { 4218 4219 /* uc() of this requires 2 characters, but they are 4220 * ASCII. If not enough room, grow the string */ 4221 if (SvLEN(dest) < ++min) { 4222 const UV o = d - (U8*)SvPVX_const(dest); 4223 d = o + (U8*) SvGROW(dest, min); 4224 } 4225 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */ 4226 continue; /* Back to the tight loop; still in ASCII */ 4227 } 4228 #endif 4229 4230 /* The other special handling characters have their 4231 * upper cases outside the latin1 range, hence need to be 4232 * in UTF-8, so the whole result needs to be in UTF-8. 4233 * 4234 * So, here we are somewhere in the middle of processing a 4235 * non-UTF-8 string, and realize that we will have to 4236 * convert the whole thing to UTF-8. What to do? There 4237 * are several possibilities. The simplest to code is to 4238 * convert what we have so far, set a flag, and continue on 4239 * in the loop. The flag would be tested each time through 4240 * the loop, and if set, the next character would be 4241 * converted to UTF-8 and stored. But, I (khw) didn't want 4242 * to slow down the mainstream case at all for this fairly 4243 * rare case, so I didn't want to add a test that didn't 4244 * absolutely have to be there in the loop, besides the 4245 * possibility that it would get too complicated for 4246 * optimizers to deal with. Another possibility is to just 4247 * give up, convert the source to UTF-8, and restart the 4248 * function that way. Another possibility is to convert 4249 * both what has already been processed and what is yet to 4250 * come separately to UTF-8, then jump into the loop that 4251 * handles UTF-8. But the most efficient time-wise of the 4252 * ones I could think of is what follows, and turned out to 4253 * not require much extra code. 4254 * 4255 * First, calculate the extra space needed for the 4256 * remainder of the source needing to be in UTF-8. Except 4257 * for the 'i' in Turkic locales, in UTF-8 strings, the 4258 * uppercase of a character below 256 occupies the same 4259 * number of bytes as the original. Therefore, the space 4260 * needed is the that number plus the number of characters 4261 * that become two bytes when converted to UTF-8, plus, in 4262 * turkish locales, the number of 'i's. */ 4263 4264 extra = send - s + variant_under_utf8_count(s, send); 4265 4266 #ifdef USE_LOCALE_CTYPE 4267 4268 if (UNLIKELY(*s == 'i')) { /* We wouldn't get an 'i' here 4269 unless are in a Turkic 4270 locale */ 4271 const U8 * s_peek = s; 4272 4273 do { 4274 extra++; 4275 4276 s_peek = (U8 *) memchr(s_peek + 1, 'i', 4277 send - (s_peek + 1)); 4278 } while (s_peek != NULL); 4279 } 4280 #endif 4281 4282 /* Convert what we have so far into UTF-8, telling the 4283 * function that we know it should be converted, and to 4284 * allow extra space for what we haven't processed yet. 4285 * 4286 * This may cause the string pointer to move, so need to 4287 * save and re-find it. */ 4288 4289 len = d - (U8*)SvPVX_const(dest); 4290 SvCUR_set(dest, len); 4291 len = sv_utf8_upgrade_flags_grow(dest, 4292 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 4293 extra 4294 + 1 /* trailing NUL */ ); 4295 d = (U8*)SvPVX(dest) + len; 4296 4297 /* Now process the remainder of the source, simultaneously 4298 * converting to upper and UTF-8. 4299 * 4300 * To avoid extra tests in the loop body, and since the 4301 * loop is so simple, split out the rare Turkic case into 4302 * its own loop */ 4303 4304 #ifdef USE_LOCALE_CTYPE 4305 if ( UNLIKELY(PL_in_utf8_turkic_locale) 4306 && UNLIKELY(IN_LC_RUNTIME(LC_CTYPE))) 4307 { 4308 for (; s < send; s++) { 4309 if (*s == 'i') { 4310 *d++ = UTF8_TWO_BYTE_HI(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE); 4311 *d++ = UTF8_TWO_BYTE_LO(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE); 4312 } 4313 else { 4314 (void) _to_upper_title_latin1(*s, d, &len, 'S'); 4315 d += len; 4316 } 4317 } 4318 } 4319 else 4320 #endif 4321 for (; s < send; s++) { 4322 (void) _to_upper_title_latin1(*s, d, &len, 'S'); 4323 d += len; 4324 } 4325 4326 /* Here have processed the whole source; no need to 4327 * continue with the outer loop. Each character has been 4328 * converted to upper case and converted to UTF-8. */ 4329 break; 4330 } /* End of processing all latin1-style chars */ 4331 } /* End of processing all chars */ 4332 } /* End of source is not empty */ 4333 4334 if (source != dest) { 4335 *d = '\0'; /* Here d points to 1 after last char, add NUL */ 4336 SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); 4337 } 4338 } /* End of isn't utf8 */ 4339 #ifdef USE_LOCALE_CTYPE 4340 if (IN_LC_RUNTIME(LC_CTYPE)) { 4341 TAINT; 4342 SvTAINTED_on(dest); 4343 } 4344 #endif 4345 if (dest != source && SvTAINTED(source)) 4346 SvTAINT(dest); 4347 SvSETMAGIC(dest); 4348 return NORMAL; 4349 } 4350 4351 PP(pp_lc) 4352 { 4353 dSP; 4354 SV *source = TOPs; 4355 STRLEN len; 4356 STRLEN min; 4357 SV *dest; 4358 const U8 *s; 4359 U8 *d; 4360 bool has_turkic_I = FALSE; 4361 4362 SvGETMAGIC(source); 4363 4364 if ( SvPADTMP(source) 4365 && !SvREADONLY(source) && SvPOK(source) 4366 && !DO_UTF8(source) 4367 4368 #ifdef USE_LOCALE_CTYPE 4369 4370 && ( LIKELY(! IN_LC_RUNTIME(LC_CTYPE)) 4371 || LIKELY(! PL_in_utf8_turkic_locale)) 4372 4373 #endif 4374 4375 ) { 4376 4377 /* We can convert in place, as, outside of Turkic UTF-8 locales, 4378 * lowercasing anything in the latin1 range (or else DO_UTF8 would have 4379 * been on) doesn't lengthen it. */ 4380 dest = source; 4381 s = d = (U8*)SvPV_force_nomg(source, len); 4382 min = len + 1; 4383 } else { 4384 dTARGET; 4385 4386 dest = TARG; 4387 4388 s = (const U8*)SvPV_nomg_const(source, len); 4389 min = len + 1; 4390 4391 SvUPGRADE(dest, SVt_PV); 4392 d = (U8*)SvGROW(dest, min); 4393 (void)SvPOK_only(dest); 4394 4395 SETs(dest); 4396 } 4397 4398 #ifdef USE_LOCALE_CTYPE 4399 4400 if (IN_LC_RUNTIME(LC_CTYPE)) { 4401 const U8 * next_I; 4402 4403 _CHECK_AND_WARN_PROBLEMATIC_LOCALE; 4404 4405 /* Lowercasing in a Turkic locale can cause non-UTF-8 to need to become 4406 * UTF-8 for the single case of the character 'I' */ 4407 if ( UNLIKELY(PL_in_utf8_turkic_locale) 4408 && ! DO_UTF8(source) 4409 && (next_I = (U8 *) memchr(s, 'I', len))) 4410 { 4411 Size_t I_count = 0; 4412 const U8 *const send = s + len; 4413 4414 do { 4415 I_count++; 4416 4417 next_I = (U8 *) memchr(next_I + 1, 'I', 4418 send - (next_I + 1)); 4419 } while (next_I != NULL); 4420 4421 /* Except for the 'I', in UTF-8 strings, the lower case of a 4422 * character below 256 occupies the same number of bytes as the 4423 * original. Therefore, the space needed is the original length 4424 * plus I_count plus the number of characters that become two bytes 4425 * when converted to UTF-8 */ 4426 sv_utf8_upgrade_flags_grow(dest, 0, len 4427 + I_count 4428 + variant_under_utf8_count(s, send) 4429 + 1 /* Trailing NUL */ ); 4430 d = (U8*)SvPVX(dest); 4431 has_turkic_I = TRUE; 4432 } 4433 } 4434 4435 #endif 4436 4437 /* Overloaded values may have toggled the UTF-8 flag on source, so we need 4438 to check DO_UTF8 again here. */ 4439 4440 if (DO_UTF8(source)) { 4441 const U8 *const send = s + len; 4442 U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; 4443 bool remove_dot_above = FALSE; 4444 4445 while (s < send) { 4446 const STRLEN u = UTF8SKIP(s); 4447 STRLEN ulen; 4448 4449 #ifdef USE_LOCALE_CTYPE 4450 4451 _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE)); 4452 4453 /* If we are in a Turkic locale, we have to do more work. As noted 4454 * in the comments for lcfirst, there is a special case if a 'I' 4455 * is in a grapheme with COMBINING DOT ABOVE UTF8. It turns into a 4456 * 'i', and the DOT must be removed. We check for that situation, 4457 * and set a flag if the DOT is there. Then each time through the 4458 * loop, we have to see if we need to remove the next DOT above, 4459 * and if so, do it. We know that there is a DOT because 4460 * _toLOWER_utf8_flags() wouldn't have returned 'i' unless there 4461 * was one in a proper position. */ 4462 if ( UNLIKELY(PL_in_utf8_turkic_locale) 4463 && IN_LC_RUNTIME(LC_CTYPE)) 4464 { 4465 if ( UNLIKELY(remove_dot_above) 4466 && memBEGINs(tmpbuf, sizeof(tmpbuf), COMBINING_DOT_ABOVE_UTF8)) 4467 { 4468 s += u; 4469 remove_dot_above = FALSE; 4470 continue; 4471 } 4472 else if (UNLIKELY(*s == 'I' && tmpbuf[0] == 'i')) { 4473 remove_dot_above = TRUE; 4474 } 4475 } 4476 #else 4477 PERL_UNUSED_VAR(remove_dot_above); 4478 4479 _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, 0); 4480 #endif 4481 4482 /* Here is where we would do context-sensitive actions for the 4483 * Greek final sigma. See the commit message for 86510fb15 for why 4484 * there isn't any */ 4485 4486 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { 4487 4488 /* If the eventually required minimum size outgrows the 4489 * available space, we need to grow. */ 4490 const UV o = d - (U8*)SvPVX_const(dest); 4491 4492 /* If someone lowercases one million U+0130s we SvGROW() one 4493 * million times. Or we could try guessing how much to 4494 * allocate without allocating too much. Such is life. 4495 * Another option would be to grow an extra byte or two more 4496 * each time we need to grow, which would cut down the million 4497 * to 500K, with little waste */ 4498 d = o + (U8*) SvGROW(dest, min); 4499 } 4500 4501 /* Copy the newly lowercased letter to the output buffer we're 4502 * building */ 4503 Copy(tmpbuf, d, ulen, U8); 4504 d += ulen; 4505 s += u; 4506 } /* End of looping through the source string */ 4507 SvUTF8_on(dest); 4508 *d = '\0'; 4509 SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); 4510 } else { /* 'source' not utf8 */ 4511 if (len) { 4512 const U8 *const send = s + len; 4513 4514 /* Use locale casing if in locale; regular style if not treating 4515 * latin1 as having case; otherwise the latin1 casing. Do the 4516 * whole thing in a tight loop, for speed, */ 4517 #ifdef USE_LOCALE_CTYPE 4518 if (IN_LC_RUNTIME(LC_CTYPE)) { 4519 if (LIKELY( ! has_turkic_I)) { 4520 for (; s < send; d++, s++) 4521 *d = toLOWER_LC(*s); 4522 } 4523 else { /* This is the only case where lc() converts 'dest' 4524 into UTF-8 from a non-UTF-8 'source' */ 4525 for (; s < send; s++) { 4526 if (*s == 'I') { 4527 *d++ = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I); 4528 *d++ = UTF8_TWO_BYTE_LO(LATIN_SMALL_LETTER_DOTLESS_I); 4529 } 4530 else { 4531 append_utf8_from_native_byte(toLOWER_LATIN1(*s), &d); 4532 } 4533 } 4534 } 4535 } 4536 else 4537 #endif 4538 if (! IN_UNI_8_BIT) { 4539 for (; s < send; d++, s++) { 4540 *d = toLOWER(*s); 4541 } 4542 } 4543 else { 4544 for (; s < send; d++, s++) { 4545 *d = toLOWER_LATIN1(*s); 4546 } 4547 } 4548 } 4549 if (source != dest) { 4550 *d = '\0'; 4551 SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); 4552 } 4553 } 4554 #ifdef USE_LOCALE_CTYPE 4555 if (IN_LC_RUNTIME(LC_CTYPE)) { 4556 TAINT; 4557 SvTAINTED_on(dest); 4558 } 4559 #endif 4560 if (dest != source && SvTAINTED(source)) 4561 SvTAINT(dest); 4562 SvSETMAGIC(dest); 4563 return NORMAL; 4564 } 4565 4566 PP(pp_quotemeta) 4567 { 4568 dSP; dTARGET; 4569 SV * const sv = TOPs; 4570 STRLEN len; 4571 const char *s = SvPV_const(sv,len); 4572 4573 SvUTF8_off(TARG); /* decontaminate */ 4574 if (len) { 4575 char *d; 4576 SvUPGRADE(TARG, SVt_PV); 4577 SvGROW(TARG, (len * 2) + 1); 4578 d = SvPVX(TARG); 4579 if (DO_UTF8(sv)) { 4580 while (len) { 4581 STRLEN ulen = UTF8SKIP(s); 4582 bool to_quote = FALSE; 4583 4584 if (UTF8_IS_INVARIANT(*s)) { 4585 if (_isQUOTEMETA(*s)) { 4586 to_quote = TRUE; 4587 } 4588 } 4589 else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, s + len)) { 4590 if ( 4591 #ifdef USE_LOCALE_CTYPE 4592 /* In locale, we quote all non-ASCII Latin1 chars. 4593 * Otherwise use the quoting rules */ 4594 4595 IN_LC_RUNTIME(LC_CTYPE) 4596 || 4597 #endif 4598 _isQUOTEMETA(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s + 1)))) 4599 { 4600 to_quote = TRUE; 4601 } 4602 } 4603 else if (is_QUOTEMETA_high(s)) { 4604 to_quote = TRUE; 4605 } 4606 4607 if (to_quote) { 4608 *d++ = '\\'; 4609 } 4610 if (ulen > len) 4611 ulen = len; 4612 len -= ulen; 4613 while (ulen--) 4614 *d++ = *s++; 4615 } 4616 SvUTF8_on(TARG); 4617 } 4618 else if (IN_UNI_8_BIT) { 4619 while (len--) { 4620 if (_isQUOTEMETA(*s)) 4621 *d++ = '\\'; 4622 *d++ = *s++; 4623 } 4624 } 4625 else { 4626 /* For non UNI_8_BIT (and hence in locale) just quote all \W 4627 * including everything above ASCII */ 4628 while (len--) { 4629 if (!isWORDCHAR_A(*s)) 4630 *d++ = '\\'; 4631 *d++ = *s++; 4632 } 4633 } 4634 *d = '\0'; 4635 SvCUR_set(TARG, d - SvPVX_const(TARG)); 4636 (void)SvPOK_only_UTF8(TARG); 4637 } 4638 else 4639 sv_setpvn(TARG, s, len); 4640 SETTARG; 4641 return NORMAL; 4642 } 4643 4644 PP(pp_fc) 4645 { 4646 dTARGET; 4647 dSP; 4648 SV *source = TOPs; 4649 STRLEN len; 4650 STRLEN min; 4651 SV *dest; 4652 const U8 *s; 4653 const U8 *send; 4654 U8 *d; 4655 U8 tmpbuf[UTF8_MAXBYTES_CASE + 1]; 4656 #if UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */ \ 4657 || (UNICODE_MAJOR_VERSION == 3 && ( UNICODE_DOT_VERSION > 0) \ 4658 || UNICODE_DOT_DOT_VERSION > 0) 4659 const bool full_folding = TRUE; /* This variable is here so we can easily 4660 move to more generality later */ 4661 #else 4662 const bool full_folding = FALSE; 4663 #endif 4664 const U8 flags = ( full_folding ? FOLD_FLAGS_FULL : 0 ) 4665 #ifdef USE_LOCALE_CTYPE 4666 | ( IN_LC_RUNTIME(LC_CTYPE) ? FOLD_FLAGS_LOCALE : 0 ) 4667 #endif 4668 ; 4669 4670 /* This is a facsimile of pp_lc, but with a thousand bugs thanks to me. 4671 * You are welcome(?) -Hugmeir 4672 */ 4673 4674 SvGETMAGIC(source); 4675 4676 dest = TARG; 4677 4678 if (SvOK(source)) { 4679 s = (const U8*)SvPV_nomg_const(source, len); 4680 } else { 4681 if (ckWARN(WARN_UNINITIALIZED)) 4682 report_uninit(source); 4683 s = (const U8*)""; 4684 len = 0; 4685 } 4686 4687 min = len + 1; 4688 4689 SvUPGRADE(dest, SVt_PV); 4690 d = (U8*)SvGROW(dest, min); 4691 (void)SvPOK_only(dest); 4692 4693 SETs(dest); 4694 4695 send = s + len; 4696 4697 #ifdef USE_LOCALE_CTYPE 4698 4699 if ( IN_LC_RUNTIME(LC_CTYPE) ) { /* Under locale */ 4700 _CHECK_AND_WARN_PROBLEMATIC_LOCALE; 4701 } 4702 4703 #endif 4704 4705 if (DO_UTF8(source)) { /* UTF-8 flagged string. */ 4706 while (s < send) { 4707 const STRLEN u = UTF8SKIP(s); 4708 STRLEN ulen; 4709 4710 _toFOLD_utf8_flags(s, send, tmpbuf, &ulen, flags); 4711 4712 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { 4713 const UV o = d - (U8*)SvPVX_const(dest); 4714 d = o + (U8*) SvGROW(dest, min); 4715 } 4716 4717 Copy(tmpbuf, d, ulen, U8); 4718 d += ulen; 4719 s += u; 4720 } 4721 SvUTF8_on(dest); 4722 } /* Unflagged string */ 4723 else if (len) { 4724 #ifdef USE_LOCALE_CTYPE 4725 if ( IN_LC_RUNTIME(LC_CTYPE) ) { /* Under locale */ 4726 if (IN_UTF8_CTYPE_LOCALE) { 4727 goto do_uni_folding; 4728 } 4729 for (; s < send; d++, s++) 4730 *d = (U8) toFOLD_LC(*s); 4731 } 4732 else 4733 #endif 4734 if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */ 4735 for (; s < send; d++, s++) 4736 *d = toFOLD(*s); 4737 } 4738 else { 4739 #ifdef USE_LOCALE_CTYPE 4740 do_uni_folding: 4741 #endif 4742 /* For ASCII and the Latin-1 range, there's potentially three 4743 * troublesome folds: 4744 * \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full 4745 * casefolding becomes 'ss'; 4746 * \x{B5} (\N{MICRO SIGN}), which under any fold becomes 4747 * \x{3BC} (\N{GREEK SMALL LETTER MU}) 4748 * I only in Turkic locales, this folds to \x{131} 4749 * \N{LATIN SMALL LETTER DOTLESS I} 4750 * For the rest, the casefold is their lowercase. */ 4751 for (; s < send; d++, s++) { 4752 if ( UNLIKELY(*s == MICRO_SIGN) 4753 #ifdef USE_LOCALE_CTYPE 4754 || ( UNLIKELY(PL_in_utf8_turkic_locale) 4755 && UNLIKELY(IN_LC_RUNTIME(LC_CTYPE)) 4756 && UNLIKELY(*s == 'I')) 4757 #endif 4758 ) { 4759 Size_t extra = send - s 4760 + variant_under_utf8_count(s, send); 4761 4762 /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU}, 4763 * and 'I' in Turkic locales is \N{LATIN SMALL LETTER 4764 * DOTLESS I} both of which are outside of the latin-1 4765 * range. There's a couple of ways to deal with this -- khw 4766 * discusses them in pp_lc/uc, so go there :) What we do 4767 * here is upgrade what we had already casefolded, then 4768 * enter an inner loop that appends the rest of the 4769 * characters as UTF-8. 4770 * 4771 * First we calculate the needed size of the upgraded dest 4772 * beyond what's been processed already (the upgrade 4773 * function figures that out). Except for the 'I' in 4774 * Turkic locales, in UTF-8 strings, the fold case of a 4775 * character below 256 occupies the same number of bytes as 4776 * the original (even the Sharp S). Therefore, the space 4777 * needed is the number of bytes remaining plus the number 4778 * of characters that become two bytes when converted to 4779 * UTF-8 plus, in turkish locales, the number of 'I's */ 4780 4781 if (UNLIKELY(*s == 'I')) { 4782 const U8 * s_peek = s; 4783 4784 do { 4785 extra++; 4786 4787 s_peek = (U8 *) memchr(s_peek + 1, 'i', 4788 send - (s_peek + 1)); 4789 } while (s_peek != NULL); 4790 } 4791 4792 /* Growing may move things, so have to save and recalculate 4793 * 'd' */ 4794 len = d - (U8*)SvPVX_const(dest); 4795 SvCUR_set(dest, len); 4796 len = sv_utf8_upgrade_flags_grow(dest, 4797 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 4798 extra 4799 + 1 /* Trailing NUL */ ); 4800 d = (U8*)SvPVX(dest) + len; 4801 4802 *d++ = UTF8_TWO_BYTE_HI(GREEK_SMALL_LETTER_MU); 4803 *d++ = UTF8_TWO_BYTE_LO(GREEK_SMALL_LETTER_MU); 4804 s++; 4805 4806 for (; s < send; s++) { 4807 STRLEN ulen; 4808 _to_uni_fold_flags(*s, d, &ulen, flags); 4809 d += ulen; 4810 } 4811 break; 4812 } 4813 else if ( UNLIKELY(*s == LATIN_SMALL_LETTER_SHARP_S) 4814 && full_folding) 4815 { 4816 /* Under full casefolding, LATIN SMALL LETTER SHARP S 4817 * becomes "ss", which may require growing the SV. */ 4818 if (SvLEN(dest) < ++min) { 4819 const UV o = d - (U8*)SvPVX_const(dest); 4820 d = o + (U8*) SvGROW(dest, min); 4821 } 4822 *(d)++ = 's'; 4823 *d = 's'; 4824 } 4825 else { /* Else, the fold is the lower case */ 4826 *d = toLOWER_LATIN1(*s); 4827 } 4828 } 4829 } 4830 } 4831 *d = '\0'; 4832 SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); 4833 4834 #ifdef USE_LOCALE_CTYPE 4835 if (IN_LC_RUNTIME(LC_CTYPE)) { 4836 TAINT; 4837 SvTAINTED_on(dest); 4838 } 4839 #endif 4840 if (SvTAINTED(source)) 4841 SvTAINT(dest); 4842 SvSETMAGIC(dest); 4843 RETURN; 4844 } 4845 4846 /* Arrays. */ 4847 4848 PP(pp_aslice) 4849 { 4850 dSP; dMARK; dORIGMARK; 4851 AV *const av = MUTABLE_AV(POPs); 4852 const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET); 4853 4854 if (SvTYPE(av) == SVt_PVAV) { 4855 const bool localizing = PL_op->op_private & OPpLVAL_INTRO; 4856 bool can_preserve = FALSE; 4857 4858 if (localizing) { 4859 MAGIC *mg; 4860 HV *stash; 4861 4862 can_preserve = SvCANEXISTDELETE(av); 4863 } 4864 4865 if (lval && localizing) { 4866 SV **svp; 4867 SSize_t max = -1; 4868 for (svp = MARK + 1; svp <= SP; svp++) { 4869 const SSize_t elem = SvIV(*svp); 4870 if (elem > max) 4871 max = elem; 4872 } 4873 if (max > AvMAX(av)) 4874 av_extend(av, max); 4875 } 4876 4877 while (++MARK <= SP) { 4878 SV **svp; 4879 SSize_t elem = SvIV(*MARK); 4880 bool preeminent = TRUE; 4881 4882 if (localizing && can_preserve) { 4883 /* If we can determine whether the element exist, 4884 * Try to preserve the existenceness of a tied array 4885 * element by using EXISTS and DELETE if possible. 4886 * Fallback to FETCH and STORE otherwise. */ 4887 preeminent = av_exists(av, elem); 4888 } 4889 4890 svp = av_fetch(av, elem, lval); 4891 if (lval) { 4892 if (!svp || !*svp) 4893 DIE(aTHX_ PL_no_aelem, elem); 4894 if (localizing) { 4895 if (preeminent) 4896 save_aelem(av, elem, svp); 4897 else 4898 SAVEADELETE(av, elem); 4899 } 4900 } 4901 *MARK = svp ? *svp : &PL_sv_undef; 4902 } 4903 } 4904 if (GIMME_V != G_ARRAY) { 4905 MARK = ORIGMARK; 4906 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef; 4907 SP = MARK; 4908 } 4909 RETURN; 4910 } 4911 4912 PP(pp_kvaslice) 4913 { 4914 dSP; dMARK; 4915 AV *const av = MUTABLE_AV(POPs); 4916 I32 lval = (PL_op->op_flags & OPf_MOD); 4917 SSize_t items = SP - MARK; 4918 4919 if (PL_op->op_private & OPpMAYBE_LVSUB) { 4920 const I32 flags = is_lvalue_sub(); 4921 if (flags) { 4922 if (!(flags & OPpENTERSUB_INARGS)) 4923 /* diag_listed_as: Can't modify %s in %s */ 4924 Perl_croak(aTHX_ "Can't modify index/value array slice in list assignment"); 4925 lval = flags; 4926 } 4927 } 4928 4929 MEXTEND(SP,items); 4930 while (items > 1) { 4931 *(MARK+items*2-1) = *(MARK+items); 4932 items--; 4933 } 4934 items = SP-MARK; 4935 SP += items; 4936 4937 while (++MARK <= SP) { 4938 SV **svp; 4939 4940 svp = av_fetch(av, SvIV(*MARK), lval); 4941 if (lval) { 4942 if (!svp || !*svp || *svp == &PL_sv_undef) { 4943 DIE(aTHX_ PL_no_aelem, SvIV(*MARK)); 4944 } 4945 *MARK = sv_mortalcopy(*MARK); 4946 } 4947 *++MARK = svp ? *svp : &PL_sv_undef; 4948 } 4949 if (GIMME_V != G_ARRAY) { 4950 MARK = SP - items*2; 4951 *++MARK = items > 0 ? *SP : &PL_sv_undef; 4952 SP = MARK; 4953 } 4954 RETURN; 4955 } 4956 4957 4958 PP(pp_aeach) 4959 { 4960 dSP; 4961 AV *array = MUTABLE_AV(POPs); 4962 const U8 gimme = GIMME_V; 4963 IV *iterp = Perl_av_iter_p(aTHX_ array); 4964 const IV current = (*iterp)++; 4965 4966 if (current > av_tindex(array)) { 4967 *iterp = 0; 4968 if (gimme == G_SCALAR) 4969 RETPUSHUNDEF; 4970 else 4971 RETURN; 4972 } 4973 4974 EXTEND(SP, 2); 4975 mPUSHi(current); 4976 if (gimme == G_ARRAY) { 4977 SV **const element = av_fetch(array, current, 0); 4978 PUSHs(element ? *element : &PL_sv_undef); 4979 } 4980 RETURN; 4981 } 4982 4983 /* also used for: pp_avalues()*/ 4984 PP(pp_akeys) 4985 { 4986 dSP; 4987 AV *array = MUTABLE_AV(POPs); 4988 const U8 gimme = GIMME_V; 4989 4990 *Perl_av_iter_p(aTHX_ array) = 0; 4991 4992 if (gimme == G_SCALAR) { 4993 dTARGET; 4994 PUSHi(av_tindex(array) + 1); 4995 } 4996 else if (gimme == G_ARRAY) { 4997 if (UNLIKELY(PL_op->op_private & OPpMAYBE_LVSUB)) { 4998 const I32 flags = is_lvalue_sub(); 4999 if (flags && !(flags & OPpENTERSUB_INARGS)) 5000 /* diag_listed_as: Can't modify %s in %s */ 5001 Perl_croak(aTHX_ 5002 "Can't modify keys on array in list assignment"); 5003 } 5004 { 5005 IV n = Perl_av_len(aTHX_ array); 5006 IV i; 5007 5008 EXTEND(SP, n + 1); 5009 5010 if ( PL_op->op_type == OP_AKEYS 5011 || ( PL_op->op_type == OP_AVHVSWITCH 5012 && (PL_op->op_private & 3) + OP_AEACH == OP_AKEYS )) 5013 { 5014 for (i = 0; i <= n; i++) { 5015 mPUSHi(i); 5016 } 5017 } 5018 else { 5019 for (i = 0; i <= n; i++) { 5020 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0); 5021 PUSHs(elem ? *elem : &PL_sv_undef); 5022 } 5023 } 5024 } 5025 } 5026 RETURN; 5027 } 5028 5029 /* Associative arrays. */ 5030 5031 PP(pp_each) 5032 { 5033 dSP; 5034 HV * hash = MUTABLE_HV(POPs); 5035 HE *entry; 5036 const U8 gimme = GIMME_V; 5037 5038 entry = hv_iternext(hash); 5039 5040 EXTEND(SP, 2); 5041 if (entry) { 5042 SV* const sv = hv_iterkeysv(entry); 5043 PUSHs(sv); 5044 if (gimme == G_ARRAY) { 5045 SV *val; 5046 val = hv_iterval(hash, entry); 5047 PUSHs(val); 5048 } 5049 } 5050 else if (gimme == G_SCALAR) 5051 RETPUSHUNDEF; 5052 5053 RETURN; 5054 } 5055 5056 STATIC OP * 5057 S_do_delete_local(pTHX) 5058 { 5059 dSP; 5060 const U8 gimme = GIMME_V; 5061 const MAGIC *mg; 5062 HV *stash; 5063 const bool sliced = !!(PL_op->op_private & OPpSLICE); 5064 SV **unsliced_keysv = sliced ? NULL : sp--; 5065 SV * const osv = POPs; 5066 SV **mark = sliced ? PL_stack_base + POPMARK : unsliced_keysv-1; 5067 dORIGMARK; 5068 const bool tied = SvRMAGICAL(osv) 5069 && mg_find((const SV *)osv, PERL_MAGIC_tied); 5070 const bool can_preserve = SvCANEXISTDELETE(osv); 5071 const U32 type = SvTYPE(osv); 5072 SV ** const end = sliced ? SP : unsliced_keysv; 5073 5074 if (type == SVt_PVHV) { /* hash element */ 5075 HV * const hv = MUTABLE_HV(osv); 5076 while (++MARK <= end) { 5077 SV * const keysv = *MARK; 5078 SV *sv = NULL; 5079 bool preeminent = TRUE; 5080 if (can_preserve) 5081 preeminent = hv_exists_ent(hv, keysv, 0); 5082 if (tied) { 5083 HE *he = hv_fetch_ent(hv, keysv, 1, 0); 5084 if (he) 5085 sv = HeVAL(he); 5086 else 5087 preeminent = FALSE; 5088 } 5089 else { 5090 sv = hv_delete_ent(hv, keysv, 0, 0); 5091 if (preeminent) 5092 SvREFCNT_inc_simple_void(sv); /* De-mortalize */ 5093 } 5094 if (preeminent) { 5095 if (!sv) DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv)); 5096 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM); 5097 if (tied) { 5098 *MARK = sv_mortalcopy(sv); 5099 mg_clear(sv); 5100 } else 5101 *MARK = sv; 5102 } 5103 else { 5104 SAVEHDELETE(hv, keysv); 5105 *MARK = &PL_sv_undef; 5106 } 5107 } 5108 } 5109 else if (type == SVt_PVAV) { /* array element */ 5110 if (PL_op->op_flags & OPf_SPECIAL) { 5111 AV * const av = MUTABLE_AV(osv); 5112 while (++MARK <= end) { 5113 SSize_t idx = SvIV(*MARK); 5114 SV *sv = NULL; 5115 bool preeminent = TRUE; 5116 if (can_preserve) 5117 preeminent = av_exists(av, idx); 5118 if (tied) { 5119 SV **svp = av_fetch(av, idx, 1); 5120 if (svp) 5121 sv = *svp; 5122 else 5123 preeminent = FALSE; 5124 } 5125 else { 5126 sv = av_delete(av, idx, 0); 5127 if (preeminent) 5128 SvREFCNT_inc_simple_void(sv); /* De-mortalize */ 5129 } 5130 if (preeminent) { 5131 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM); 5132 if (tied) { 5133 *MARK = sv_mortalcopy(sv); 5134 mg_clear(sv); 5135 } else 5136 *MARK = sv; 5137 } 5138 else { 5139 SAVEADELETE(av, idx); 5140 *MARK = &PL_sv_undef; 5141 } 5142 } 5143 } 5144 else 5145 DIE(aTHX_ "panic: avhv_delete no longer supported"); 5146 } 5147 else 5148 DIE(aTHX_ "Not a HASH reference"); 5149 if (sliced) { 5150 if (gimme == G_VOID) 5151 SP = ORIGMARK; 5152 else if (gimme == G_SCALAR) { 5153 MARK = ORIGMARK; 5154 if (SP > MARK) 5155 *++MARK = *SP; 5156 else 5157 *++MARK = &PL_sv_undef; 5158 SP = MARK; 5159 } 5160 } 5161 else if (gimme != G_VOID) 5162 PUSHs(*unsliced_keysv); 5163 5164 RETURN; 5165 } 5166 5167 PP(pp_delete) 5168 { 5169 dSP; 5170 U8 gimme; 5171 I32 discard; 5172 5173 if (PL_op->op_private & OPpLVAL_INTRO) 5174 return do_delete_local(); 5175 5176 gimme = GIMME_V; 5177 discard = (gimme == G_VOID) ? G_DISCARD : 0; 5178 5179 if (PL_op->op_private & (OPpSLICE|OPpKVSLICE)) { 5180 dMARK; dORIGMARK; 5181 HV * const hv = MUTABLE_HV(POPs); 5182 const U32 hvtype = SvTYPE(hv); 5183 int skip = 0; 5184 if (PL_op->op_private & OPpKVSLICE) { 5185 SSize_t items = SP - MARK; 5186 5187 MEXTEND(SP,items); 5188 while (items > 1) { 5189 *(MARK+items*2-1) = *(MARK+items); 5190 items--; 5191 } 5192 items = SP - MARK; 5193 SP += items; 5194 skip = 1; 5195 } 5196 if (hvtype == SVt_PVHV) { /* hash element */ 5197 while ((MARK += (1+skip)) <= SP) { 5198 SV * const sv = hv_delete_ent(hv, *(MARK-skip), discard, 0); 5199 *MARK = sv ? sv : &PL_sv_undef; 5200 } 5201 } 5202 else if (hvtype == SVt_PVAV) { /* array element */ 5203 if (PL_op->op_flags & OPf_SPECIAL) { 5204 while ((MARK += (1+skip)) <= SP) { 5205 SV * const sv = av_delete(MUTABLE_AV(hv), SvIV(*(MARK-skip)), discard); 5206 *MARK = sv ? sv : &PL_sv_undef; 5207 } 5208 } 5209 } 5210 else 5211 DIE(aTHX_ "Not a HASH reference"); 5212 if (discard) 5213 SP = ORIGMARK; 5214 else if (gimme == G_SCALAR) { 5215 MARK = ORIGMARK; 5216 if (SP > MARK) 5217 *++MARK = *SP; 5218 else 5219 *++MARK = &PL_sv_undef; 5220 SP = MARK; 5221 } 5222 } 5223 else { 5224 SV *keysv = POPs; 5225 HV * const hv = MUTABLE_HV(POPs); 5226 SV *sv = NULL; 5227 if (SvTYPE(hv) == SVt_PVHV) 5228 sv = hv_delete_ent(hv, keysv, discard, 0); 5229 else if (SvTYPE(hv) == SVt_PVAV) { 5230 if (PL_op->op_flags & OPf_SPECIAL) 5231 sv = av_delete(MUTABLE_AV(hv), SvIV(keysv), discard); 5232 else 5233 DIE(aTHX_ "panic: avhv_delete no longer supported"); 5234 } 5235 else 5236 DIE(aTHX_ "Not a HASH reference"); 5237 if (!sv) 5238 sv = &PL_sv_undef; 5239 if (!discard) 5240 PUSHs(sv); 5241 } 5242 RETURN; 5243 } 5244 5245 PP(pp_exists) 5246 { 5247 dSP; 5248 SV *tmpsv; 5249 HV *hv; 5250 5251 if (UNLIKELY( PL_op->op_private & OPpEXISTS_SUB )) { 5252 GV *gv; 5253 SV * const sv = POPs; 5254 CV * const cv = sv_2cv(sv, &hv, &gv, 0); 5255 if (cv) 5256 RETPUSHYES; 5257 if (gv && isGV(gv) && GvCV(gv) && !GvCVGEN(gv)) 5258 RETPUSHYES; 5259 RETPUSHNO; 5260 } 5261 tmpsv = POPs; 5262 hv = MUTABLE_HV(POPs); 5263 if (LIKELY( SvTYPE(hv) == SVt_PVHV )) { 5264 if (hv_exists_ent(hv, tmpsv, 0)) 5265 RETPUSHYES; 5266 } 5267 else if (SvTYPE(hv) == SVt_PVAV) { 5268 if (PL_op->op_flags & OPf_SPECIAL) { /* array element */ 5269 if (av_exists(MUTABLE_AV(hv), SvIV(tmpsv))) 5270 RETPUSHYES; 5271 } 5272 } 5273 else { 5274 DIE(aTHX_ "Not a HASH reference"); 5275 } 5276 RETPUSHNO; 5277 } 5278 5279 PP(pp_hslice) 5280 { 5281 dSP; dMARK; dORIGMARK; 5282 HV * const hv = MUTABLE_HV(POPs); 5283 const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET); 5284 const bool localizing = PL_op->op_private & OPpLVAL_INTRO; 5285 bool can_preserve = FALSE; 5286 5287 if (localizing) { 5288 MAGIC *mg; 5289 HV *stash; 5290 5291 if (SvCANEXISTDELETE(hv)) 5292 can_preserve = TRUE; 5293 } 5294 5295 while (++MARK <= SP) { 5296 SV * const keysv = *MARK; 5297 SV **svp; 5298 HE *he; 5299 bool preeminent = TRUE; 5300 5301 if (localizing && can_preserve) { 5302 /* If we can determine whether the element exist, 5303 * try to preserve the existenceness of a tied hash 5304 * element by using EXISTS and DELETE if possible. 5305 * Fallback to FETCH and STORE otherwise. */ 5306 preeminent = hv_exists_ent(hv, keysv, 0); 5307 } 5308 5309 he = hv_fetch_ent(hv, keysv, lval, 0); 5310 svp = he ? &HeVAL(he) : NULL; 5311 5312 if (lval) { 5313 if (!svp || !*svp || *svp == &PL_sv_undef) { 5314 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv)); 5315 } 5316 if (localizing) { 5317 if (HvNAME_get(hv) && isGV_or_RVCV(*svp)) 5318 save_gp(MUTABLE_GV(*svp), !(PL_op->op_flags & OPf_SPECIAL)); 5319 else if (preeminent) 5320 save_helem_flags(hv, keysv, svp, 5321 (PL_op->op_flags & OPf_SPECIAL) ? 0 : SAVEf_SETMAGIC); 5322 else 5323 SAVEHDELETE(hv, keysv); 5324 } 5325 } 5326 *MARK = svp && *svp ? *svp : &PL_sv_undef; 5327 } 5328 if (GIMME_V != G_ARRAY) { 5329 MARK = ORIGMARK; 5330 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef; 5331 SP = MARK; 5332 } 5333 RETURN; 5334 } 5335 5336 PP(pp_kvhslice) 5337 { 5338 dSP; dMARK; 5339 HV * const hv = MUTABLE_HV(POPs); 5340 I32 lval = (PL_op->op_flags & OPf_MOD); 5341 SSize_t items = SP - MARK; 5342 5343 if (PL_op->op_private & OPpMAYBE_LVSUB) { 5344 const I32 flags = is_lvalue_sub(); 5345 if (flags) { 5346 if (!(flags & OPpENTERSUB_INARGS)) 5347 /* diag_listed_as: Can't modify %s in %s */ 5348 Perl_croak(aTHX_ "Can't modify key/value hash slice in %s assignment", 5349 GIMME_V == G_ARRAY ? "list" : "scalar"); 5350 lval = flags; 5351 } 5352 } 5353 5354 MEXTEND(SP,items); 5355 while (items > 1) { 5356 *(MARK+items*2-1) = *(MARK+items); 5357 items--; 5358 } 5359 items = SP-MARK; 5360 SP += items; 5361 5362 while (++MARK <= SP) { 5363 SV * const keysv = *MARK; 5364 SV **svp; 5365 HE *he; 5366 5367 he = hv_fetch_ent(hv, keysv, lval, 0); 5368 svp = he ? &HeVAL(he) : NULL; 5369 5370 if (lval) { 5371 if (!svp || !*svp || *svp == &PL_sv_undef) { 5372 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv)); 5373 } 5374 *MARK = sv_mortalcopy(*MARK); 5375 } 5376 *++MARK = svp && *svp ? *svp : &PL_sv_undef; 5377 } 5378 if (GIMME_V != G_ARRAY) { 5379 MARK = SP - items*2; 5380 *++MARK = items > 0 ? *SP : &PL_sv_undef; 5381 SP = MARK; 5382 } 5383 RETURN; 5384 } 5385 5386 /* List operators. */ 5387 5388 PP(pp_list) 5389 { 5390 I32 markidx = POPMARK; 5391 if (GIMME_V != G_ARRAY) { 5392 /* don't initialize mark here, EXTEND() may move the stack */ 5393 SV **mark; 5394 dSP; 5395 EXTEND(SP, 1); /* in case no arguments, as in @empty */ 5396 mark = PL_stack_base + markidx; 5397 if (++MARK <= SP) 5398 *MARK = *SP; /* unwanted list, return last item */ 5399 else 5400 *MARK = &PL_sv_undef; 5401 SP = MARK; 5402 PUTBACK; 5403 } 5404 return NORMAL; 5405 } 5406 5407 PP(pp_lslice) 5408 { 5409 dSP; 5410 SV ** const lastrelem = PL_stack_sp; 5411 SV ** const lastlelem = PL_stack_base + POPMARK; 5412 SV ** const firstlelem = PL_stack_base + POPMARK + 1; 5413 SV ** const firstrelem = lastlelem + 1; 5414 const U8 mod = PL_op->op_flags & OPf_MOD; 5415 5416 const I32 max = lastrelem - lastlelem; 5417 SV **lelem; 5418 5419 if (GIMME_V != G_ARRAY) { 5420 if (lastlelem < firstlelem) { 5421 EXTEND(SP, 1); 5422 *firstlelem = &PL_sv_undef; 5423 } 5424 else { 5425 I32 ix = SvIV(*lastlelem); 5426 if (ix < 0) 5427 ix += max; 5428 if (ix < 0 || ix >= max) 5429 *firstlelem = &PL_sv_undef; 5430 else 5431 *firstlelem = firstrelem[ix]; 5432 } 5433 SP = firstlelem; 5434 RETURN; 5435 } 5436 5437 if (max == 0) { 5438 SP = firstlelem - 1; 5439 RETURN; 5440 } 5441 5442 for (lelem = firstlelem; lelem <= lastlelem; lelem++) { 5443 I32 ix = SvIV(*lelem); 5444 if (ix < 0) 5445 ix += max; 5446 if (ix < 0 || ix >= max) 5447 *lelem = &PL_sv_undef; 5448 else { 5449 if (!(*lelem = firstrelem[ix])) 5450 *lelem = &PL_sv_undef; 5451 else if (mod && SvPADTMP(*lelem)) { 5452 *lelem = firstrelem[ix] = sv_mortalcopy(*lelem); 5453 } 5454 } 5455 } 5456 SP = lastlelem; 5457 RETURN; 5458 } 5459 5460 PP(pp_anonlist) 5461 { 5462 dSP; dMARK; 5463 const I32 items = SP - MARK; 5464 SV * const av = MUTABLE_SV(av_make(items, MARK+1)); 5465 SP = MARK; 5466 mXPUSHs((PL_op->op_flags & OPf_SPECIAL) 5467 ? newRV_noinc(av) : av); 5468 RETURN; 5469 } 5470 5471 PP(pp_anonhash) 5472 { 5473 dSP; dMARK; dORIGMARK; 5474 HV* const hv = newHV(); 5475 SV* const retval = sv_2mortal( PL_op->op_flags & OPf_SPECIAL 5476 ? newRV_noinc(MUTABLE_SV(hv)) 5477 : MUTABLE_SV(hv) ); 5478 5479 while (MARK < SP) { 5480 SV * const key = 5481 (MARK++, SvGMAGICAL(*MARK) ? sv_mortalcopy(*MARK) : *MARK); 5482 SV *val; 5483 if (MARK < SP) 5484 { 5485 MARK++; 5486 SvGETMAGIC(*MARK); 5487 val = newSV(0); 5488 sv_setsv_nomg(val, *MARK); 5489 } 5490 else 5491 { 5492 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash"); 5493 val = newSV(0); 5494 } 5495 (void)hv_store_ent(hv,key,val,0); 5496 } 5497 SP = ORIGMARK; 5498 XPUSHs(retval); 5499 RETURN; 5500 } 5501 5502 PP(pp_splice) 5503 { 5504 dSP; dMARK; dORIGMARK; 5505 int num_args = (SP - MARK); 5506 AV *ary = MUTABLE_AV(*++MARK); 5507 SV **src; 5508 SV **dst; 5509 SSize_t i; 5510 SSize_t offset; 5511 SSize_t length; 5512 SSize_t newlen; 5513 SSize_t after; 5514 SSize_t diff; 5515 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied); 5516 5517 if (mg) { 5518 return Perl_tied_method(aTHX_ SV_CONST(SPLICE), mark - 1, MUTABLE_SV(ary), mg, 5519 GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK, 5520 sp - mark); 5521 } 5522 5523 if (SvREADONLY(ary)) 5524 Perl_croak_no_modify(); 5525 5526 SP++; 5527 5528 if (++MARK < SP) { 5529 offset = i = SvIV(*MARK); 5530 if (offset < 0) 5531 offset += AvFILLp(ary) + 1; 5532 if (offset < 0) 5533 DIE(aTHX_ PL_no_aelem, i); 5534 if (++MARK < SP) { 5535 length = SvIVx(*MARK++); 5536 if (length < 0) { 5537 length += AvFILLp(ary) - offset + 1; 5538 if (length < 0) 5539 length = 0; 5540 } 5541 } 5542 else 5543 length = AvMAX(ary) + 1; /* close enough to infinity */ 5544 } 5545 else { 5546 offset = 0; 5547 length = AvMAX(ary) + 1; 5548 } 5549 if (offset > AvFILLp(ary) + 1) { 5550 if (num_args > 2) 5551 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" ); 5552 offset = AvFILLp(ary) + 1; 5553 } 5554 after = AvFILLp(ary) + 1 - (offset + length); 5555 if (after < 0) { /* not that much array */ 5556 length += after; /* offset+length now in array */ 5557 after = 0; 5558 if (!AvALLOC(ary)) 5559 av_extend(ary, 0); 5560 } 5561 5562 /* At this point, MARK .. SP-1 is our new LIST */ 5563 5564 newlen = SP - MARK; 5565 diff = newlen - length; 5566 if (newlen && !AvREAL(ary) && AvREIFY(ary)) 5567 av_reify(ary); 5568 5569 /* make new elements SVs now: avoid problems if they're from the array */ 5570 for (dst = MARK, i = newlen; i; i--) { 5571 SV * const h = *dst; 5572 *dst++ = newSVsv(h); 5573 } 5574 5575 if (diff < 0) { /* shrinking the area */ 5576 SV **tmparyval = NULL; 5577 if (newlen) { 5578 Newx(tmparyval, newlen, SV*); /* so remember insertion */ 5579 Copy(MARK, tmparyval, newlen, SV*); 5580 } 5581 5582 MARK = ORIGMARK + 1; 5583 if (GIMME_V == G_ARRAY) { /* copy return vals to stack */ 5584 const bool real = cBOOL(AvREAL(ary)); 5585 MEXTEND(MARK, length); 5586 if (real) 5587 EXTEND_MORTAL(length); 5588 for (i = 0, dst = MARK; i < length; i++) { 5589 if ((*dst = AvARRAY(ary)[i+offset])) { 5590 if (real) 5591 sv_2mortal(*dst); /* free them eventually */ 5592 } 5593 else 5594 *dst = &PL_sv_undef; 5595 dst++; 5596 } 5597 MARK += length - 1; 5598 } 5599 else { 5600 *MARK = AvARRAY(ary)[offset+length-1]; 5601 if (AvREAL(ary)) { 5602 sv_2mortal(*MARK); 5603 for (i = length - 1, dst = &AvARRAY(ary)[offset]; i > 0; i--) 5604 SvREFCNT_dec(*dst++); /* free them now */ 5605 } 5606 if (!*MARK) 5607 *MARK = &PL_sv_undef; 5608 } 5609 AvFILLp(ary) += diff; 5610 5611 /* pull up or down? */ 5612 5613 if (offset < after) { /* easier to pull up */ 5614 if (offset) { /* esp. if nothing to pull */ 5615 src = &AvARRAY(ary)[offset-1]; 5616 dst = src - diff; /* diff is negative */ 5617 for (i = offset; i > 0; i--) /* can't trust Copy */ 5618 *dst-- = *src--; 5619 } 5620 dst = AvARRAY(ary); 5621 AvARRAY(ary) = AvARRAY(ary) - diff; /* diff is negative */ 5622 AvMAX(ary) += diff; 5623 } 5624 else { 5625 if (after) { /* anything to pull down? */ 5626 src = AvARRAY(ary) + offset + length; 5627 dst = src + diff; /* diff is negative */ 5628 Move(src, dst, after, SV*); 5629 } 5630 dst = &AvARRAY(ary)[AvFILLp(ary)+1]; 5631 /* avoid later double free */ 5632 } 5633 i = -diff; 5634 while (i) 5635 dst[--i] = NULL; 5636 5637 if (newlen) { 5638 Copy( tmparyval, AvARRAY(ary) + offset, newlen, SV* ); 5639 Safefree(tmparyval); 5640 } 5641 } 5642 else { /* no, expanding (or same) */ 5643 SV** tmparyval = NULL; 5644 if (length) { 5645 Newx(tmparyval, length, SV*); /* so remember deletion */ 5646 Copy(AvARRAY(ary)+offset, tmparyval, length, SV*); 5647 } 5648 5649 if (diff > 0) { /* expanding */ 5650 /* push up or down? */ 5651 if (offset < after && diff <= AvARRAY(ary) - AvALLOC(ary)) { 5652 if (offset) { 5653 src = AvARRAY(ary); 5654 dst = src - diff; 5655 Move(src, dst, offset, SV*); 5656 } 5657 AvARRAY(ary) = AvARRAY(ary) - diff;/* diff is positive */ 5658 AvMAX(ary) += diff; 5659 AvFILLp(ary) += diff; 5660 } 5661 else { 5662 if (AvFILLp(ary) + diff >= AvMAX(ary)) /* oh, well */ 5663 av_extend(ary, AvFILLp(ary) + diff); 5664 AvFILLp(ary) += diff; 5665 5666 if (after) { 5667 dst = AvARRAY(ary) + AvFILLp(ary); 5668 src = dst - diff; 5669 for (i = after; i; i--) { 5670 *dst-- = *src--; 5671 } 5672 } 5673 } 5674 } 5675 5676 if (newlen) { 5677 Copy( MARK, AvARRAY(ary) + offset, newlen, SV* ); 5678 } 5679 5680 MARK = ORIGMARK + 1; 5681 if (GIMME_V == G_ARRAY) { /* copy return vals to stack */ 5682 if (length) { 5683 const bool real = cBOOL(AvREAL(ary)); 5684 if (real) 5685 EXTEND_MORTAL(length); 5686 for (i = 0, dst = MARK; i < length; i++) { 5687 if ((*dst = tmparyval[i])) { 5688 if (real) 5689 sv_2mortal(*dst); /* free them eventually */ 5690 } 5691 else *dst = &PL_sv_undef; 5692 dst++; 5693 } 5694 } 5695 MARK += length - 1; 5696 } 5697 else if (length--) { 5698 *MARK = tmparyval[length]; 5699 if (AvREAL(ary)) { 5700 sv_2mortal(*MARK); 5701 while (length-- > 0) 5702 SvREFCNT_dec(tmparyval[length]); 5703 } 5704 if (!*MARK) 5705 *MARK = &PL_sv_undef; 5706 } 5707 else 5708 *MARK = &PL_sv_undef; 5709 Safefree(tmparyval); 5710 } 5711 5712 if (SvMAGICAL(ary)) 5713 mg_set(MUTABLE_SV(ary)); 5714 5715 SP = MARK; 5716 RETURN; 5717 } 5718 5719 PP(pp_push) 5720 { 5721 dSP; dMARK; dORIGMARK; dTARGET; 5722 AV * const ary = MUTABLE_AV(*++MARK); 5723 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied); 5724 5725 if (mg) { 5726 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg); 5727 PUSHMARK(MARK); 5728 PUTBACK; 5729 ENTER_with_name("call_PUSH"); 5730 call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED); 5731 LEAVE_with_name("call_PUSH"); 5732 /* SPAGAIN; not needed: SP is assigned to immediately below */ 5733 } 5734 else { 5735 /* PL_delaymagic is restored by JUMPENV_POP on dieing, so we 5736 * only need to save locally, not on the save stack */ 5737 U16 old_delaymagic = PL_delaymagic; 5738 5739 if (SvREADONLY(ary) && MARK < SP) Perl_croak_no_modify(); 5740 PL_delaymagic = DM_DELAY; 5741 for (++MARK; MARK <= SP; MARK++) { 5742 SV *sv; 5743 if (*MARK) SvGETMAGIC(*MARK); 5744 sv = newSV(0); 5745 if (*MARK) 5746 sv_setsv_nomg(sv, *MARK); 5747 av_store(ary, AvFILLp(ary)+1, sv); 5748 } 5749 if (PL_delaymagic & DM_ARRAY_ISA) 5750 mg_set(MUTABLE_SV(ary)); 5751 PL_delaymagic = old_delaymagic; 5752 } 5753 SP = ORIGMARK; 5754 if (OP_GIMME(PL_op, 0) != G_VOID) { 5755 PUSHi( AvFILL(ary) + 1 ); 5756 } 5757 RETURN; 5758 } 5759 5760 /* also used for: pp_pop()*/ 5761 PP(pp_shift) 5762 { 5763 dSP; 5764 AV * const av = PL_op->op_flags & OPf_SPECIAL 5765 ? MUTABLE_AV(GvAVn(PL_defgv)) : MUTABLE_AV(POPs); 5766 SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av); 5767 EXTEND(SP, 1); 5768 assert (sv); 5769 if (AvREAL(av)) 5770 (void)sv_2mortal(sv); 5771 PUSHs(sv); 5772 RETURN; 5773 } 5774 5775 PP(pp_unshift) 5776 { 5777 dSP; dMARK; dORIGMARK; dTARGET; 5778 AV *ary = MUTABLE_AV(*++MARK); 5779 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied); 5780 5781 if (mg) { 5782 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg); 5783 PUSHMARK(MARK); 5784 PUTBACK; 5785 ENTER_with_name("call_UNSHIFT"); 5786 call_sv(SV_CONST(UNSHIFT),G_SCALAR|G_DISCARD|G_METHOD_NAMED); 5787 LEAVE_with_name("call_UNSHIFT"); 5788 /* SPAGAIN; not needed: SP is assigned to immediately below */ 5789 } 5790 else { 5791 /* PL_delaymagic is restored by JUMPENV_POP on dieing, so we 5792 * only need to save locally, not on the save stack */ 5793 U16 old_delaymagic = PL_delaymagic; 5794 SSize_t i = 0; 5795 5796 av_unshift(ary, SP - MARK); 5797 PL_delaymagic = DM_DELAY; 5798 while (MARK < SP) { 5799 SV * const sv = newSVsv(*++MARK); 5800 (void)av_store(ary, i++, sv); 5801 } 5802 if (PL_delaymagic & DM_ARRAY_ISA) 5803 mg_set(MUTABLE_SV(ary)); 5804 PL_delaymagic = old_delaymagic; 5805 } 5806 SP = ORIGMARK; 5807 if (OP_GIMME(PL_op, 0) != G_VOID) { 5808 PUSHi( AvFILL(ary) + 1 ); 5809 } 5810 RETURN; 5811 } 5812 5813 PP(pp_reverse) 5814 { 5815 dSP; dMARK; 5816 5817 if (GIMME_V == G_ARRAY) { 5818 if (PL_op->op_private & OPpREVERSE_INPLACE) { 5819 AV *av; 5820 5821 /* See pp_sort() */ 5822 assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV); 5823 (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */ 5824 av = MUTABLE_AV((*SP)); 5825 /* In-place reversing only happens in void context for the array 5826 * assignment. We don't need to push anything on the stack. */ 5827 SP = MARK; 5828 5829 if (SvMAGICAL(av)) { 5830 SSize_t i, j; 5831 SV *tmp = sv_newmortal(); 5832 /* For SvCANEXISTDELETE */ 5833 HV *stash; 5834 const MAGIC *mg; 5835 bool can_preserve = SvCANEXISTDELETE(av); 5836 5837 for (i = 0, j = av_tindex(av); i < j; ++i, --j) { 5838 SV *begin, *end; 5839 5840 if (can_preserve) { 5841 if (!av_exists(av, i)) { 5842 if (av_exists(av, j)) { 5843 SV *sv = av_delete(av, j, 0); 5844 begin = *av_fetch(av, i, TRUE); 5845 sv_setsv_mg(begin, sv); 5846 } 5847 continue; 5848 } 5849 else if (!av_exists(av, j)) { 5850 SV *sv = av_delete(av, i, 0); 5851 end = *av_fetch(av, j, TRUE); 5852 sv_setsv_mg(end, sv); 5853 continue; 5854 } 5855 } 5856 5857 begin = *av_fetch(av, i, TRUE); 5858 end = *av_fetch(av, j, TRUE); 5859 sv_setsv(tmp, begin); 5860 sv_setsv_mg(begin, end); 5861 sv_setsv_mg(end, tmp); 5862 } 5863 } 5864 else { 5865 SV **begin = AvARRAY(av); 5866 5867 if (begin) { 5868 SV **end = begin + AvFILLp(av); 5869 5870 while (begin < end) { 5871 SV * const tmp = *begin; 5872 *begin++ = *end; 5873 *end-- = tmp; 5874 } 5875 } 5876 } 5877 } 5878 else { 5879 SV **oldsp = SP; 5880 MARK++; 5881 while (MARK < SP) { 5882 SV * const tmp = *MARK; 5883 *MARK++ = *SP; 5884 *SP-- = tmp; 5885 } 5886 /* safe as long as stack cannot get extended in the above */ 5887 SP = oldsp; 5888 } 5889 } 5890 else { 5891 char *up; 5892 dTARGET; 5893 STRLEN len; 5894 5895 SvUTF8_off(TARG); /* decontaminate */ 5896 if (SP - MARK > 1) { 5897 do_join(TARG, &PL_sv_no, MARK, SP); 5898 SP = MARK + 1; 5899 SETs(TARG); 5900 } else if (SP > MARK) { 5901 sv_setsv(TARG, *SP); 5902 SETs(TARG); 5903 } else { 5904 sv_setsv(TARG, DEFSV); 5905 XPUSHs(TARG); 5906 } 5907 SvSETMAGIC(TARG); /* remove any utf8 length magic */ 5908 5909 up = SvPV_force(TARG, len); 5910 if (len > 1) { 5911 char *down; 5912 if (DO_UTF8(TARG)) { /* first reverse each character */ 5913 U8* s = (U8*)SvPVX(TARG); 5914 const U8* send = (U8*)(s + len); 5915 while (s < send) { 5916 if (UTF8_IS_INVARIANT(*s)) { 5917 s++; 5918 continue; 5919 } 5920 else { 5921 if (!utf8_to_uvchr_buf(s, send, 0)) 5922 break; 5923 up = (char*)s; 5924 s += UTF8SKIP(s); 5925 down = (char*)(s - 1); 5926 /* reverse this character */ 5927 while (down > up) { 5928 const char tmp = *up; 5929 *up++ = *down; 5930 *down-- = tmp; 5931 } 5932 } 5933 } 5934 up = SvPVX(TARG); 5935 } 5936 down = SvPVX(TARG) + len - 1; 5937 while (down > up) { 5938 const char tmp = *up; 5939 *up++ = *down; 5940 *down-- = tmp; 5941 } 5942 (void)SvPOK_only_UTF8(TARG); 5943 } 5944 } 5945 RETURN; 5946 } 5947 5948 PP(pp_split) 5949 { 5950 dSP; dTARG; 5951 AV *ary = ( (PL_op->op_private & OPpSPLIT_ASSIGN) /* @a = split */ 5952 && (PL_op->op_flags & OPf_STACKED)) /* @{expr} = split */ 5953 ? (AV *)POPs : NULL; 5954 IV limit = POPi; /* note, negative is forever */ 5955 SV * const sv = POPs; 5956 STRLEN len; 5957 const char *s = SvPV_const(sv, len); 5958 const bool do_utf8 = DO_UTF8(sv); 5959 const bool in_uni_8_bit = IN_UNI_8_BIT; 5960 const char *strend = s + len; 5961 PMOP *pm = cPMOPx(PL_op); 5962 REGEXP *rx; 5963 SV *dstr; 5964 const char *m; 5965 SSize_t iters = 0; 5966 const STRLEN slen = do_utf8 5967 ? utf8_length((U8*)s, (U8*)strend) 5968 : (STRLEN)(strend - s); 5969 SSize_t maxiters = slen + 10; 5970 I32 trailing_empty = 0; 5971 const char *orig; 5972 const IV origlimit = limit; 5973 I32 realarray = 0; 5974 I32 base; 5975 const U8 gimme = GIMME_V; 5976 bool gimme_scalar; 5977 I32 oldsave = PL_savestack_ix; 5978 U32 make_mortal = SVs_TEMP; 5979 bool multiline = 0; 5980 MAGIC *mg = NULL; 5981 5982 rx = PM_GETRE(pm); 5983 5984 TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET && 5985 (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE))); 5986 5987 /* handle @ary = split(...) optimisation */ 5988 if (PL_op->op_private & OPpSPLIT_ASSIGN) { 5989 if (!(PL_op->op_flags & OPf_STACKED)) { 5990 if (PL_op->op_private & OPpSPLIT_LEX) { 5991 if (PL_op->op_private & OPpLVAL_INTRO) 5992 SAVECLEARSV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)); 5993 ary = (AV *)PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff); 5994 } 5995 else { 5996 GV *gv = 5997 #ifdef USE_ITHREADS 5998 MUTABLE_GV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)); 5999 #else 6000 pm->op_pmreplrootu.op_pmtargetgv; 6001 #endif 6002 if (PL_op->op_private & OPpLVAL_INTRO) 6003 ary = save_ary(gv); 6004 else 6005 ary = GvAVn(gv); 6006 } 6007 /* skip anything pushed by OPpLVAL_INTRO above */ 6008 oldsave = PL_savestack_ix; 6009 } 6010 6011 realarray = 1; 6012 PUTBACK; 6013 av_extend(ary,0); 6014 (void)sv_2mortal(SvREFCNT_inc_simple_NN(sv)); 6015 av_clear(ary); 6016 SPAGAIN; 6017 if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) { 6018 PUSHMARK(SP); 6019 XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg)); 6020 } 6021 else { 6022 if (!AvREAL(ary)) { 6023 I32 i; 6024 AvREAL_on(ary); 6025 AvREIFY_off(ary); 6026 for (i = AvFILLp(ary); i >= 0; i--) 6027 AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */ 6028 } 6029 /* temporarily switch stacks */ 6030 SAVESWITCHSTACK(PL_curstack, ary); 6031 make_mortal = 0; 6032 } 6033 } 6034 6035 base = SP - PL_stack_base; 6036 orig = s; 6037 if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) { 6038 if (do_utf8) { 6039 while (s < strend && isSPACE_utf8_safe(s, strend)) 6040 s += UTF8SKIP(s); 6041 } 6042 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { 6043 while (s < strend && isSPACE_LC(*s)) 6044 s++; 6045 } 6046 else if (in_uni_8_bit) { 6047 while (s < strend && isSPACE_L1(*s)) 6048 s++; 6049 } 6050 else { 6051 while (s < strend && isSPACE(*s)) 6052 s++; 6053 } 6054 } 6055 if (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) { 6056 multiline = 1; 6057 } 6058 6059 gimme_scalar = gimme == G_SCALAR && !ary; 6060 6061 if (!limit) 6062 limit = maxiters + 2; 6063 if (RX_EXTFLAGS(rx) & RXf_WHITE) { 6064 while (--limit) { 6065 m = s; 6066 /* this one uses 'm' and is a negative test */ 6067 if (do_utf8) { 6068 while (m < strend && ! isSPACE_utf8_safe(m, strend) ) { 6069 const int t = UTF8SKIP(m); 6070 /* isSPACE_utf8_safe returns FALSE for malform utf8 */ 6071 if (strend - m < t) 6072 m = strend; 6073 else 6074 m += t; 6075 } 6076 } 6077 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) 6078 { 6079 while (m < strend && !isSPACE_LC(*m)) 6080 ++m; 6081 } 6082 else if (in_uni_8_bit) { 6083 while (m < strend && !isSPACE_L1(*m)) 6084 ++m; 6085 } else { 6086 while (m < strend && !isSPACE(*m)) 6087 ++m; 6088 } 6089 if (m >= strend) 6090 break; 6091 6092 if (gimme_scalar) { 6093 iters++; 6094 if (m-s == 0) 6095 trailing_empty++; 6096 else 6097 trailing_empty = 0; 6098 } else { 6099 dstr = newSVpvn_flags(s, m-s, 6100 (do_utf8 ? SVf_UTF8 : 0) | make_mortal); 6101 XPUSHs(dstr); 6102 } 6103 6104 /* skip the whitespace found last */ 6105 if (do_utf8) 6106 s = m + UTF8SKIP(m); 6107 else 6108 s = m + 1; 6109 6110 /* this one uses 's' and is a positive test */ 6111 if (do_utf8) { 6112 while (s < strend && isSPACE_utf8_safe(s, strend) ) 6113 s += UTF8SKIP(s); 6114 } 6115 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) 6116 { 6117 while (s < strend && isSPACE_LC(*s)) 6118 ++s; 6119 } 6120 else if (in_uni_8_bit) { 6121 while (s < strend && isSPACE_L1(*s)) 6122 ++s; 6123 } else { 6124 while (s < strend && isSPACE(*s)) 6125 ++s; 6126 } 6127 } 6128 } 6129 else if (RX_EXTFLAGS(rx) & RXf_START_ONLY) { 6130 while (--limit) { 6131 for (m = s; m < strend && *m != '\n'; m++) 6132 ; 6133 m++; 6134 if (m >= strend) 6135 break; 6136 6137 if (gimme_scalar) { 6138 iters++; 6139 if (m-s == 0) 6140 trailing_empty++; 6141 else 6142 trailing_empty = 0; 6143 } else { 6144 dstr = newSVpvn_flags(s, m-s, 6145 (do_utf8 ? SVf_UTF8 : 0) | make_mortal); 6146 XPUSHs(dstr); 6147 } 6148 s = m; 6149 } 6150 } 6151 else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) { 6152 /* 6153 Pre-extend the stack, either the number of bytes or 6154 characters in the string or a limited amount, triggered by: 6155 6156 my ($x, $y) = split //, $str; 6157 or 6158 split //, $str, $i; 6159 */ 6160 if (!gimme_scalar) { 6161 const IV items = limit - 1; 6162 /* setting it to -1 will trigger a panic in EXTEND() */ 6163 const SSize_t sslen = slen > SSize_t_MAX ? -1 : (SSize_t)slen; 6164 if (items >=0 && items < sslen) 6165 EXTEND(SP, items); 6166 else 6167 EXTEND(SP, sslen); 6168 } 6169 6170 if (do_utf8) { 6171 while (--limit) { 6172 /* keep track of how many bytes we skip over */ 6173 m = s; 6174 s += UTF8SKIP(s); 6175 if (gimme_scalar) { 6176 iters++; 6177 if (s-m == 0) 6178 trailing_empty++; 6179 else 6180 trailing_empty = 0; 6181 } else { 6182 dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal); 6183 6184 PUSHs(dstr); 6185 } 6186 6187 if (s >= strend) 6188 break; 6189 } 6190 } else { 6191 while (--limit) { 6192 if (gimme_scalar) { 6193 iters++; 6194 } else { 6195 dstr = newSVpvn(s, 1); 6196 6197 6198 if (make_mortal) 6199 sv_2mortal(dstr); 6200 6201 PUSHs(dstr); 6202 } 6203 6204 s++; 6205 6206 if (s >= strend) 6207 break; 6208 } 6209 } 6210 } 6211 else if (do_utf8 == (RX_UTF8(rx) != 0) && 6212 (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx) 6213 && (RX_EXTFLAGS(rx) & RXf_CHECK_ALL) 6214 && !(RX_EXTFLAGS(rx) & RXf_IS_ANCHORED)) { 6215 const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL); 6216 SV * const csv = CALLREG_INTUIT_STRING(rx); 6217 6218 len = RX_MINLENRET(rx); 6219 if (len == 1 && !RX_UTF8(rx) && !tail) { 6220 const char c = *SvPV_nolen_const(csv); 6221 while (--limit) { 6222 for (m = s; m < strend && *m != c; m++) 6223 ; 6224 if (m >= strend) 6225 break; 6226 if (gimme_scalar) { 6227 iters++; 6228 if (m-s == 0) 6229 trailing_empty++; 6230 else 6231 trailing_empty = 0; 6232 } else { 6233 dstr = newSVpvn_flags(s, m-s, 6234 (do_utf8 ? SVf_UTF8 : 0) | make_mortal); 6235 XPUSHs(dstr); 6236 } 6237 /* The rx->minlen is in characters but we want to step 6238 * s ahead by bytes. */ 6239 if (do_utf8) 6240 s = (char*)utf8_hop_forward((U8*) m, len, (U8*) strend); 6241 else 6242 s = m + len; /* Fake \n at the end */ 6243 } 6244 } 6245 else { 6246 while (s < strend && --limit && 6247 (m = fbm_instr((unsigned char*)s, (unsigned char*)strend, 6248 csv, multiline ? FBMrf_MULTILINE : 0)) ) 6249 { 6250 if (gimme_scalar) { 6251 iters++; 6252 if (m-s == 0) 6253 trailing_empty++; 6254 else 6255 trailing_empty = 0; 6256 } else { 6257 dstr = newSVpvn_flags(s, m-s, 6258 (do_utf8 ? SVf_UTF8 : 0) | make_mortal); 6259 XPUSHs(dstr); 6260 } 6261 /* The rx->minlen is in characters but we want to step 6262 * s ahead by bytes. */ 6263 if (do_utf8) 6264 s = (char*)utf8_hop_forward((U8*)m, len, (U8 *) strend); 6265 else 6266 s = m + len; /* Fake \n at the end */ 6267 } 6268 } 6269 } 6270 else { 6271 maxiters += slen * RX_NPARENS(rx); 6272 while (s < strend && --limit) 6273 { 6274 I32 rex_return; 6275 PUTBACK; 6276 rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1, 6277 sv, NULL, 0); 6278 SPAGAIN; 6279 if (rex_return == 0) 6280 break; 6281 TAINT_IF(RX_MATCH_TAINTED(rx)); 6282 /* we never pass the REXEC_COPY_STR flag, so it should 6283 * never get copied */ 6284 assert(!RX_MATCH_COPIED(rx)); 6285 m = RX_OFFS(rx)[0].start + orig; 6286 6287 if (gimme_scalar) { 6288 iters++; 6289 if (m-s == 0) 6290 trailing_empty++; 6291 else 6292 trailing_empty = 0; 6293 } else { 6294 dstr = newSVpvn_flags(s, m-s, 6295 (do_utf8 ? SVf_UTF8 : 0) | make_mortal); 6296 XPUSHs(dstr); 6297 } 6298 if (RX_NPARENS(rx)) { 6299 I32 i; 6300 for (i = 1; i <= (I32)RX_NPARENS(rx); i++) { 6301 s = RX_OFFS(rx)[i].start + orig; 6302 m = RX_OFFS(rx)[i].end + orig; 6303 6304 /* japhy (07/27/01) -- the (m && s) test doesn't catch 6305 parens that didn't match -- they should be set to 6306 undef, not the empty string */ 6307 if (gimme_scalar) { 6308 iters++; 6309 if (m-s == 0) 6310 trailing_empty++; 6311 else 6312 trailing_empty = 0; 6313 } else { 6314 if (m >= orig && s >= orig) { 6315 dstr = newSVpvn_flags(s, m-s, 6316 (do_utf8 ? SVf_UTF8 : 0) 6317 | make_mortal); 6318 } 6319 else 6320 dstr = &PL_sv_undef; /* undef, not "" */ 6321 XPUSHs(dstr); 6322 } 6323 6324 } 6325 } 6326 s = RX_OFFS(rx)[0].end + orig; 6327 } 6328 } 6329 6330 if (!gimme_scalar) { 6331 iters = (SP - PL_stack_base) - base; 6332 } 6333 if (iters > maxiters) 6334 DIE(aTHX_ "Split loop"); 6335 6336 /* keep field after final delim? */ 6337 if (s < strend || (iters && origlimit)) { 6338 if (!gimme_scalar) { 6339 const STRLEN l = strend - s; 6340 dstr = newSVpvn_flags(s, l, (do_utf8 ? SVf_UTF8 : 0) | make_mortal); 6341 XPUSHs(dstr); 6342 } 6343 iters++; 6344 } 6345 else if (!origlimit) { 6346 if (gimme_scalar) { 6347 iters -= trailing_empty; 6348 } else { 6349 while (iters > 0 && (!TOPs || !SvANY(TOPs) || SvCUR(TOPs) == 0)) { 6350 if (TOPs && !make_mortal) 6351 sv_2mortal(TOPs); 6352 *SP-- = NULL; 6353 iters--; 6354 } 6355 } 6356 } 6357 6358 PUTBACK; 6359 LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */ 6360 SPAGAIN; 6361 if (realarray) { 6362 if (!mg) { 6363 if (SvSMAGICAL(ary)) { 6364 PUTBACK; 6365 mg_set(MUTABLE_SV(ary)); 6366 SPAGAIN; 6367 } 6368 if (gimme == G_ARRAY) { 6369 EXTEND(SP, iters); 6370 Copy(AvARRAY(ary), SP + 1, iters, SV*); 6371 SP += iters; 6372 RETURN; 6373 } 6374 } 6375 else { 6376 PUTBACK; 6377 ENTER_with_name("call_PUSH"); 6378 call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED); 6379 LEAVE_with_name("call_PUSH"); 6380 SPAGAIN; 6381 if (gimme == G_ARRAY) { 6382 SSize_t i; 6383 /* EXTEND should not be needed - we just popped them */ 6384 EXTEND(SP, iters); 6385 for (i=0; i < iters; i++) { 6386 SV **svp = av_fetch(ary, i, FALSE); 6387 PUSHs((svp) ? *svp : &PL_sv_undef); 6388 } 6389 RETURN; 6390 } 6391 } 6392 } 6393 else { 6394 if (gimme == G_ARRAY) 6395 RETURN; 6396 } 6397 6398 GETTARGET; 6399 XPUSHi(iters); 6400 RETURN; 6401 } 6402 6403 PP(pp_once) 6404 { 6405 dSP; 6406 SV *const sv = PAD_SVl(PL_op->op_targ); 6407 6408 if (SvPADSTALE(sv)) { 6409 /* First time. */ 6410 SvPADSTALE_off(sv); 6411 RETURNOP(cLOGOP->op_other); 6412 } 6413 RETURNOP(cLOGOP->op_next); 6414 } 6415 6416 PP(pp_lock) 6417 { 6418 dSP; 6419 dTOPss; 6420 SV *retsv = sv; 6421 SvLOCK(sv); 6422 if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV 6423 || SvTYPE(retsv) == SVt_PVCV) { 6424 retsv = refto(retsv); 6425 } 6426 SETs(retsv); 6427 RETURN; 6428 } 6429 6430 6431 /* used for: pp_padany(), pp_custom(); plus any system ops 6432 * that aren't implemented on a particular platform */ 6433 6434 PP(unimplemented_op) 6435 { 6436 const Optype op_type = PL_op->op_type; 6437 /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope 6438 with out of range op numbers - it only "special" cases op_custom. 6439 Secondly, as the three ops we "panic" on are padmy, mapstart and custom, 6440 if we get here for a custom op then that means that the custom op didn't 6441 have an implementation. Given that OP_NAME() looks up the custom op 6442 by its op_ppaddr, likely it will return NULL, unless someone (unhelpfully) 6443 registers &Perl_unimplemented_op as the address of their custom op. 6444 NULL doesn't generate a useful error message. "custom" does. */ 6445 const char *const name = op_type >= OP_max 6446 ? "[out of range]" : PL_op_name[op_type]; 6447 if(OP_IS_SOCKET(op_type)) 6448 DIE(aTHX_ PL_no_sock_func, name); 6449 DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type); 6450 } 6451 6452 static void 6453 S_maybe_unwind_defav(pTHX) 6454 { 6455 if (CX_CUR()->cx_type & CXp_HASARGS) { 6456 PERL_CONTEXT *cx = CX_CUR(); 6457 6458 assert(CxHASARGS(cx)); 6459 cx_popsub_args(cx); 6460 cx->cx_type &= ~CXp_HASARGS; 6461 } 6462 } 6463 6464 /* For sorting out arguments passed to a &CORE:: subroutine */ 6465 PP(pp_coreargs) 6466 { 6467 dSP; 6468 int opnum = SvIOK(cSVOP_sv) ? (int)SvUV(cSVOP_sv) : 0; 6469 int defgv = PL_opargs[opnum] & OA_DEFGV ||opnum==OP_GLOB, whicharg = 0; 6470 AV * const at_ = GvAV(PL_defgv); 6471 SV **svp = at_ ? AvARRAY(at_) : NULL; 6472 I32 minargs = 0, maxargs = 0, numargs = at_ ? AvFILLp(at_)+1 : 0; 6473 I32 oa = opnum ? PL_opargs[opnum] >> OASHIFT : 0; 6474 bool seen_question = 0; 6475 const char *err = NULL; 6476 const bool pushmark = PL_op->op_private & OPpCOREARGS_PUSHMARK; 6477 6478 /* Count how many args there are first, to get some idea how far to 6479 extend the stack. */ 6480 while (oa) { 6481 if ((oa & 7) == OA_LIST) { maxargs = I32_MAX; break; } 6482 maxargs++; 6483 if (oa & OA_OPTIONAL) seen_question = 1; 6484 if (!seen_question) minargs++; 6485 oa >>= 4; 6486 } 6487 6488 if(numargs < minargs) err = "Not enough"; 6489 else if(numargs > maxargs) err = "Too many"; 6490 if (err) 6491 /* diag_listed_as: Too many arguments for %s */ 6492 Perl_croak(aTHX_ 6493 "%s arguments for %s", err, 6494 opnum ? PL_op_desc[opnum] : SvPV_nolen_const(cSVOP_sv) 6495 ); 6496 6497 /* Reset the stack pointer. Without this, we end up returning our own 6498 arguments in list context, in addition to the values we are supposed 6499 to return. nextstate usually does this on sub entry, but we need 6500 to run the next op with the caller's hints, so we cannot have a 6501 nextstate. */ 6502 SP = PL_stack_base + CX_CUR()->blk_oldsp; 6503 6504 if(!maxargs) RETURN; 6505 6506 /* We do this here, rather than with a separate pushmark op, as it has 6507 to come in between two things this function does (stack reset and 6508 arg pushing). This seems the easiest way to do it. */ 6509 if (pushmark) { 6510 PUTBACK; 6511 (void)Perl_pp_pushmark(aTHX); 6512 } 6513 6514 EXTEND(SP, maxargs == I32_MAX ? numargs : maxargs); 6515 PUTBACK; /* The code below can die in various places. */ 6516 6517 oa = PL_opargs[opnum] >> OASHIFT; 6518 for (; oa&&(numargs||!pushmark); (void)(numargs&&(++svp,--numargs))) { 6519 whicharg++; 6520 switch (oa & 7) { 6521 case OA_SCALAR: 6522 try_defsv: 6523 if (!numargs && defgv && whicharg == minargs + 1) { 6524 PUSHs(DEFSV); 6525 } 6526 else PUSHs(numargs ? svp && *svp ? *svp : &PL_sv_undef : NULL); 6527 break; 6528 case OA_LIST: 6529 while (numargs--) { 6530 PUSHs(svp && *svp ? *svp : &PL_sv_undef); 6531 svp++; 6532 } 6533 RETURN; 6534 case OA_AVREF: 6535 if (!numargs) { 6536 GV *gv; 6537 if (CvUNIQUE(find_runcv_where(FIND_RUNCV_level_eq,1,NULL))) 6538 gv = PL_argvgv; 6539 else { 6540 S_maybe_unwind_defav(aTHX); 6541 gv = PL_defgv; 6542 } 6543 PUSHs((SV *)GvAVn(gv)); 6544 break; 6545 } 6546 if (!svp || !*svp || !SvROK(*svp) 6547 || SvTYPE(SvRV(*svp)) != SVt_PVAV) 6548 DIE(aTHX_ 6549 /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/ 6550 "Type of arg %d to &CORE::%s must be array reference", 6551 whicharg, PL_op_desc[opnum] 6552 ); 6553 PUSHs(SvRV(*svp)); 6554 break; 6555 case OA_HVREF: 6556 if (!svp || !*svp || !SvROK(*svp) 6557 || ( SvTYPE(SvRV(*svp)) != SVt_PVHV 6558 && ( opnum == OP_DBMCLOSE || opnum == OP_DBMOPEN 6559 || SvTYPE(SvRV(*svp)) != SVt_PVAV ))) 6560 DIE(aTHX_ 6561 /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/ 6562 "Type of arg %d to &CORE::%s must be hash%s reference", 6563 whicharg, PL_op_desc[opnum], 6564 opnum == OP_DBMCLOSE || opnum == OP_DBMOPEN 6565 ? "" 6566 : " or array" 6567 ); 6568 PUSHs(SvRV(*svp)); 6569 break; 6570 case OA_FILEREF: 6571 if (!numargs) PUSHs(NULL); 6572 else if(svp && *svp && SvROK(*svp) && isGV_with_GP(SvRV(*svp))) 6573 /* no magic here, as the prototype will have added an extra 6574 refgen and we just want what was there before that */ 6575 PUSHs(SvRV(*svp)); 6576 else { 6577 const bool constr = PL_op->op_private & whicharg; 6578 PUSHs(S_rv2gv(aTHX_ 6579 svp && *svp ? *svp : &PL_sv_undef, 6580 constr, cBOOL(CopHINTS_get(PL_curcop) & HINT_STRICT_REFS), 6581 !constr 6582 )); 6583 } 6584 break; 6585 case OA_SCALARREF: 6586 if (!numargs) goto try_defsv; 6587 else { 6588 const bool wantscalar = 6589 PL_op->op_private & OPpCOREARGS_SCALARMOD; 6590 if (!svp || !*svp || !SvROK(*svp) 6591 /* We have to permit globrefs even for the \$ proto, as 6592 *foo is indistinguishable from ${\*foo}, and the proto- 6593 type permits the latter. */ 6594 || SvTYPE(SvRV(*svp)) > ( 6595 wantscalar ? SVt_PVLV 6596 : opnum == OP_LOCK || opnum == OP_UNDEF 6597 ? SVt_PVCV 6598 : SVt_PVHV 6599 ) 6600 ) 6601 DIE(aTHX_ 6602 "Type of arg %d to &CORE::%s must be %s", 6603 whicharg, PL_op_name[opnum], 6604 wantscalar 6605 ? "scalar reference" 6606 : opnum == OP_LOCK || opnum == OP_UNDEF 6607 ? "reference to one of [$@%&*]" 6608 : "reference to one of [$@%*]" 6609 ); 6610 PUSHs(SvRV(*svp)); 6611 if (opnum == OP_UNDEF && SvRV(*svp) == (SV *)PL_defgv) { 6612 /* Undo @_ localisation, so that sub exit does not undo 6613 part of our undeffing. */ 6614 S_maybe_unwind_defav(aTHX); 6615 } 6616 } 6617 break; 6618 default: 6619 DIE(aTHX_ "panic: unknown OA_*: %x", (unsigned)(oa&7)); 6620 } 6621 oa = oa >> 4; 6622 } 6623 6624 RETURN; 6625 } 6626 6627 /* Implement CORE::keys(),values(),each(). 6628 * 6629 * We won't know until run-time whether the arg is an array or hash, 6630 * so this op calls 6631 * 6632 * pp_keys/pp_values/pp_each 6633 * or 6634 * pp_akeys/pp_avalues/pp_aeach 6635 * 6636 * as appropriate (or whatever pp function actually implements the OP_FOO 6637 * functionality for each FOO). 6638 */ 6639 6640 PP(pp_avhvswitch) 6641 { 6642 dVAR; dSP; 6643 return PL_ppaddr[ 6644 (SvTYPE(TOPs) == SVt_PVAV ? OP_AEACH : OP_EACH) 6645 + (PL_op->op_private & OPpAVHVSWITCH_MASK) 6646 ](aTHX); 6647 } 6648 6649 PP(pp_runcv) 6650 { 6651 dSP; 6652 CV *cv; 6653 if (PL_op->op_private & OPpOFFBYONE) { 6654 cv = find_runcv_where(FIND_RUNCV_level_eq, 1, NULL); 6655 } 6656 else cv = find_runcv(NULL); 6657 XPUSHs(CvEVAL(cv) ? &PL_sv_undef : sv_2mortal(newRV((SV *)cv))); 6658 RETURN; 6659 } 6660 6661 static void 6662 S_localise_aelem_lval(pTHX_ AV * const av, SV * const keysv, 6663 const bool can_preserve) 6664 { 6665 const SSize_t ix = SvIV(keysv); 6666 if (can_preserve ? av_exists(av, ix) : TRUE) { 6667 SV ** const svp = av_fetch(av, ix, 1); 6668 if (!svp || !*svp) 6669 Perl_croak(aTHX_ PL_no_aelem, ix); 6670 save_aelem(av, ix, svp); 6671 } 6672 else 6673 SAVEADELETE(av, ix); 6674 } 6675 6676 static void 6677 S_localise_helem_lval(pTHX_ HV * const hv, SV * const keysv, 6678 const bool can_preserve) 6679 { 6680 if (can_preserve ? hv_exists_ent(hv, keysv, 0) : TRUE) { 6681 HE * const he = hv_fetch_ent(hv, keysv, 1, 0); 6682 SV ** const svp = he ? &HeVAL(he) : NULL; 6683 if (!svp || !*svp) 6684 Perl_croak(aTHX_ PL_no_helem_sv, SVfARG(keysv)); 6685 save_helem_flags(hv, keysv, svp, 0); 6686 } 6687 else 6688 SAVEHDELETE(hv, keysv); 6689 } 6690 6691 static void 6692 S_localise_gv_slot(pTHX_ GV *gv, U8 type) 6693 { 6694 if (type == OPpLVREF_SV) { 6695 save_pushptrptr(gv, SvREFCNT_inc_simple(GvSV(gv)), SAVEt_GVSV); 6696 GvSV(gv) = 0; 6697 } 6698 else if (type == OPpLVREF_AV) 6699 /* XXX Inefficient, as it creates a new AV, which we are 6700 about to clobber. */ 6701 save_ary(gv); 6702 else { 6703 assert(type == OPpLVREF_HV); 6704 /* XXX Likewise inefficient. */ 6705 save_hash(gv); 6706 } 6707 } 6708 6709 6710 PP(pp_refassign) 6711 { 6712 dSP; 6713 SV * const key = PL_op->op_private & OPpLVREF_ELEM ? POPs : NULL; 6714 SV * const left = PL_op->op_flags & OPf_STACKED ? POPs : NULL; 6715 dTOPss; 6716 const char *bad = NULL; 6717 const U8 type = PL_op->op_private & OPpLVREF_TYPE; 6718 if (!SvROK(sv)) DIE(aTHX_ "Assigned value is not a reference"); 6719 switch (type) { 6720 case OPpLVREF_SV: 6721 if (SvTYPE(SvRV(sv)) > SVt_PVLV) 6722 bad = " SCALAR"; 6723 break; 6724 case OPpLVREF_AV: 6725 if (SvTYPE(SvRV(sv)) != SVt_PVAV) 6726 bad = "n ARRAY"; 6727 break; 6728 case OPpLVREF_HV: 6729 if (SvTYPE(SvRV(sv)) != SVt_PVHV) 6730 bad = " HASH"; 6731 break; 6732 case OPpLVREF_CV: 6733 if (SvTYPE(SvRV(sv)) != SVt_PVCV) 6734 bad = " CODE"; 6735 } 6736 if (bad) 6737 /* diag_listed_as: Assigned value is not %s reference */ 6738 DIE(aTHX_ "Assigned value is not a%s reference", bad); 6739 { 6740 MAGIC *mg; 6741 HV *stash; 6742 switch (left ? SvTYPE(left) : 0) { 6743 case 0: 6744 { 6745 SV * const old = PAD_SV(ARGTARG); 6746 PAD_SETSV(ARGTARG, SvREFCNT_inc_NN(SvRV(sv))); 6747 SvREFCNT_dec(old); 6748 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpPAD_STATE)) 6749 == OPpLVAL_INTRO) 6750 SAVECLEARSV(PAD_SVl(ARGTARG)); 6751 break; 6752 } 6753 case SVt_PVGV: 6754 if (PL_op->op_private & OPpLVAL_INTRO) { 6755 S_localise_gv_slot(aTHX_ (GV *)left, type); 6756 } 6757 gv_setref(left, sv); 6758 SvSETMAGIC(left); 6759 break; 6760 case SVt_PVAV: 6761 assert(key); 6762 if (UNLIKELY(PL_op->op_private & OPpLVAL_INTRO)) { 6763 S_localise_aelem_lval(aTHX_ (AV *)left, key, 6764 SvCANEXISTDELETE(left)); 6765 } 6766 av_store((AV *)left, SvIV(key), SvREFCNT_inc_simple_NN(SvRV(sv))); 6767 break; 6768 case SVt_PVHV: 6769 if (UNLIKELY(PL_op->op_private & OPpLVAL_INTRO)) { 6770 assert(key); 6771 S_localise_helem_lval(aTHX_ (HV *)left, key, 6772 SvCANEXISTDELETE(left)); 6773 } 6774 (void)hv_store_ent((HV *)left, key, SvREFCNT_inc_simple_NN(SvRV(sv)), 0); 6775 } 6776 if (PL_op->op_flags & OPf_MOD) 6777 SETs(sv_2mortal(newSVsv(sv))); 6778 /* XXX else can weak references go stale before they are read, e.g., 6779 in leavesub? */ 6780 RETURN; 6781 } 6782 } 6783 6784 PP(pp_lvref) 6785 { 6786 dSP; 6787 SV * const ret = sv_2mortal(newSV_type(SVt_PVMG)); 6788 SV * const elem = PL_op->op_private & OPpLVREF_ELEM ? POPs : NULL; 6789 SV * const arg = PL_op->op_flags & OPf_STACKED ? POPs : NULL; 6790 MAGIC * const mg = sv_magicext(ret, arg, PERL_MAGIC_lvref, 6791 &PL_vtbl_lvref, (char *)elem, 6792 elem ? HEf_SVKEY : (I32)ARGTARG); 6793 mg->mg_private = PL_op->op_private; 6794 if (PL_op->op_private & OPpLVREF_ITER) 6795 mg->mg_flags |= MGf_PERSIST; 6796 if (UNLIKELY(PL_op->op_private & OPpLVAL_INTRO)) { 6797 if (elem) { 6798 MAGIC *mg; 6799 HV *stash; 6800 assert(arg); 6801 { 6802 const bool can_preserve = SvCANEXISTDELETE(arg); 6803 if (SvTYPE(arg) == SVt_PVAV) 6804 S_localise_aelem_lval(aTHX_ (AV *)arg, elem, can_preserve); 6805 else 6806 S_localise_helem_lval(aTHX_ (HV *)arg, elem, can_preserve); 6807 } 6808 } 6809 else if (arg) { 6810 S_localise_gv_slot(aTHX_ (GV *)arg, 6811 PL_op->op_private & OPpLVREF_TYPE); 6812 } 6813 else if (!(PL_op->op_private & OPpPAD_STATE)) 6814 SAVECLEARSV(PAD_SVl(ARGTARG)); 6815 } 6816 XPUSHs(ret); 6817 RETURN; 6818 } 6819 6820 PP(pp_lvrefslice) 6821 { 6822 dSP; dMARK; 6823 AV * const av = (AV *)POPs; 6824 const bool localizing = PL_op->op_private & OPpLVAL_INTRO; 6825 bool can_preserve = FALSE; 6826 6827 if (UNLIKELY(localizing)) { 6828 MAGIC *mg; 6829 HV *stash; 6830 SV **svp; 6831 6832 can_preserve = SvCANEXISTDELETE(av); 6833 6834 if (SvTYPE(av) == SVt_PVAV) { 6835 SSize_t max = -1; 6836 6837 for (svp = MARK + 1; svp <= SP; svp++) { 6838 const SSize_t elem = SvIV(*svp); 6839 if (elem > max) 6840 max = elem; 6841 } 6842 if (max > AvMAX(av)) 6843 av_extend(av, max); 6844 } 6845 } 6846 6847 while (++MARK <= SP) { 6848 SV * const elemsv = *MARK; 6849 if (UNLIKELY(localizing)) { 6850 if (SvTYPE(av) == SVt_PVAV) 6851 S_localise_aelem_lval(aTHX_ av, elemsv, can_preserve); 6852 else 6853 S_localise_helem_lval(aTHX_ (HV *)av, elemsv, can_preserve); 6854 } 6855 *MARK = sv_2mortal(newSV_type(SVt_PVMG)); 6856 sv_magic(*MARK,(SV *)av,PERL_MAGIC_lvref,(char *)elemsv,HEf_SVKEY); 6857 } 6858 RETURN; 6859 } 6860 6861 PP(pp_lvavref) 6862 { 6863 if (PL_op->op_flags & OPf_STACKED) 6864 Perl_pp_rv2av(aTHX); 6865 else 6866 Perl_pp_padav(aTHX); 6867 { 6868 dSP; 6869 dTOPss; 6870 SETs(0); /* special alias marker that aassign recognises */ 6871 XPUSHs(sv); 6872 RETURN; 6873 } 6874 } 6875 6876 PP(pp_anonconst) 6877 { 6878 dSP; 6879 dTOPss; 6880 SETs(sv_2mortal((SV *)newCONSTSUB(SvTYPE(CopSTASH(PL_curcop))==SVt_PVHV 6881 ? CopSTASH(PL_curcop) 6882 : NULL, 6883 NULL, SvREFCNT_inc_simple_NN(sv)))); 6884 RETURN; 6885 } 6886 6887 6888 /* process one subroutine argument - typically when the sub has a signature: 6889 * introduce PL_curpad[op_targ] and assign to it the value 6890 * for $: (OPf_STACKED ? *sp : $_[N]) 6891 * for @/%: @_[N..$#_] 6892 * 6893 * It's equivalent to 6894 * my $foo = $_[N]; 6895 * or 6896 * my $foo = (value-on-stack) 6897 * or 6898 * my @foo = @_[N..$#_] 6899 * etc 6900 */ 6901 6902 PP(pp_argelem) 6903 { 6904 dTARG; 6905 SV *val; 6906 SV ** padentry; 6907 OP *o = PL_op; 6908 AV *defav = GvAV(PL_defgv); /* @_ */ 6909 IV ix = PTR2IV(cUNOP_AUXo->op_aux); 6910 IV argc; 6911 6912 /* do 'my $var, @var or %var' action */ 6913 padentry = &(PAD_SVl(o->op_targ)); 6914 save_clearsv(padentry); 6915 targ = *padentry; 6916 6917 if ((o->op_private & OPpARGELEM_MASK) == OPpARGELEM_SV) { 6918 if (o->op_flags & OPf_STACKED) { 6919 dSP; 6920 val = POPs; 6921 PUTBACK; 6922 } 6923 else { 6924 SV **svp; 6925 /* should already have been checked */ 6926 assert(ix >= 0); 6927 #if IVSIZE > PTRSIZE 6928 assert(ix <= SSize_t_MAX); 6929 #endif 6930 6931 svp = av_fetch(defav, ix, FALSE); 6932 val = svp ? *svp : &PL_sv_undef; 6933 } 6934 6935 /* $var = $val */ 6936 6937 /* cargo-culted from pp_sassign */ 6938 assert(TAINTING_get || !TAINT_get); 6939 if (UNLIKELY(TAINT_get) && !SvTAINTED(val)) 6940 TAINT_NOT; 6941 6942 SvSetMagicSV(targ, val); 6943 return o->op_next; 6944 } 6945 6946 /* must be AV or HV */ 6947 6948 assert(!(o->op_flags & OPf_STACKED)); 6949 argc = ((IV)AvFILL(defav) + 1) - ix; 6950 6951 /* This is a copy of the relevant parts of pp_aassign(). 6952 */ 6953 if ((o->op_private & OPpARGELEM_MASK) == OPpARGELEM_AV) { 6954 IV i; 6955 6956 if (AvFILL((AV*)targ) > -1) { 6957 /* target should usually be empty. If we get get 6958 * here, someone's been doing some weird closure tricks. 6959 * Make a copy of all args before clearing the array, 6960 * to avoid the equivalent of @a = ($a[0]) prematurely freeing 6961 * elements. See similar code in pp_aassign. 6962 */ 6963 for (i = 0; i < argc; i++) { 6964 SV **svp = av_fetch(defav, ix + i, FALSE); 6965 SV *newsv = newSV(0); 6966 sv_setsv_flags(newsv, 6967 svp ? *svp : &PL_sv_undef, 6968 (SV_DO_COW_SVSETSV|SV_NOSTEAL)); 6969 if (!av_store(defav, ix + i, newsv)) 6970 SvREFCNT_dec_NN(newsv); 6971 } 6972 av_clear((AV*)targ); 6973 } 6974 6975 if (argc <= 0) 6976 return o->op_next; 6977 6978 av_extend((AV*)targ, argc); 6979 6980 i = 0; 6981 while (argc--) { 6982 SV *tmpsv; 6983 SV **svp = av_fetch(defav, ix + i, FALSE); 6984 SV *val = svp ? *svp : &PL_sv_undef; 6985 tmpsv = newSV(0); 6986 sv_setsv(tmpsv, val); 6987 av_store((AV*)targ, i++, tmpsv); 6988 TAINT_NOT; 6989 } 6990 6991 } 6992 else { 6993 IV i; 6994 6995 assert((o->op_private & OPpARGELEM_MASK) == OPpARGELEM_HV); 6996 6997 if (SvRMAGICAL(targ) || HvUSEDKEYS((HV*)targ)) { 6998 /* see "target should usually be empty" comment above */ 6999 for (i = 0; i < argc; i++) { 7000 SV **svp = av_fetch(defav, ix + i, FALSE); 7001 SV *newsv = newSV(0); 7002 sv_setsv_flags(newsv, 7003 svp ? *svp : &PL_sv_undef, 7004 (SV_DO_COW_SVSETSV|SV_NOSTEAL)); 7005 if (!av_store(defav, ix + i, newsv)) 7006 SvREFCNT_dec_NN(newsv); 7007 } 7008 hv_clear((HV*)targ); 7009 } 7010 7011 if (argc <= 0) 7012 return o->op_next; 7013 assert(argc % 2 == 0); 7014 7015 i = 0; 7016 while (argc) { 7017 SV *tmpsv; 7018 SV **svp; 7019 SV *key; 7020 SV *val; 7021 7022 svp = av_fetch(defav, ix + i++, FALSE); 7023 key = svp ? *svp : &PL_sv_undef; 7024 svp = av_fetch(defav, ix + i++, FALSE); 7025 val = svp ? *svp : &PL_sv_undef; 7026 7027 argc -= 2; 7028 if (UNLIKELY(SvGMAGICAL(key))) 7029 key = sv_mortalcopy(key); 7030 tmpsv = newSV(0); 7031 sv_setsv(tmpsv, val); 7032 hv_store_ent((HV*)targ, key, tmpsv, 0); 7033 TAINT_NOT; 7034 } 7035 } 7036 7037 return o->op_next; 7038 } 7039 7040 /* Handle a default value for one subroutine argument (typically as part 7041 * of a subroutine signature). 7042 * It's equivalent to 7043 * @_ > op_targ ? $_[op_targ] : result_of(op_other) 7044 * 7045 * Intended to be used where op_next is an OP_ARGELEM 7046 * 7047 * We abuse the op_targ field slightly: it's an index into @_ rather than 7048 * into PL_curpad. 7049 */ 7050 7051 PP(pp_argdefelem) 7052 { 7053 OP * const o = PL_op; 7054 AV *defav = GvAV(PL_defgv); /* @_ */ 7055 IV ix = (IV)o->op_targ; 7056 7057 assert(ix >= 0); 7058 #if IVSIZE > PTRSIZE 7059 assert(ix <= SSize_t_MAX); 7060 #endif 7061 7062 if (AvFILL(defav) >= ix) { 7063 dSP; 7064 SV **svp = av_fetch(defav, ix, FALSE); 7065 SV *val = svp ? *svp : &PL_sv_undef; 7066 XPUSHs(val); 7067 RETURN; 7068 } 7069 return cLOGOPo->op_other; 7070 } 7071 7072 7073 static SV * 7074 S_find_runcv_name(void) 7075 { 7076 dTHX; 7077 CV *cv; 7078 GV *gv; 7079 SV *sv; 7080 7081 cv = find_runcv(0); 7082 if (!cv) 7083 return &PL_sv_no; 7084 7085 gv = CvGV(cv); 7086 if (!gv) 7087 return &PL_sv_no; 7088 7089 sv = sv_2mortal(newSV(0)); 7090 gv_fullname4(sv, gv, NULL, TRUE); 7091 return sv; 7092 } 7093 7094 /* Check a sub's arguments - i.e. that it has the correct number of args 7095 * (and anything else we might think of in future). Typically used with 7096 * signatured subs. 7097 */ 7098 7099 PP(pp_argcheck) 7100 { 7101 OP * const o = PL_op; 7102 struct op_argcheck_aux *aux = (struct op_argcheck_aux *)cUNOP_AUXo->op_aux; 7103 UV params = aux->params; 7104 UV opt_params = aux->opt_params; 7105 char slurpy = aux->slurpy; 7106 AV *defav = GvAV(PL_defgv); /* @_ */ 7107 UV argc; 7108 bool too_few; 7109 7110 assert(!SvMAGICAL(defav)); 7111 argc = (UV)(AvFILLp(defav) + 1); 7112 too_few = (argc < (params - opt_params)); 7113 7114 if (UNLIKELY(too_few || (!slurpy && argc > params))) 7115 /* diag_listed_as: Too few arguments for subroutine '%s' */ 7116 /* diag_listed_as: Too many arguments for subroutine '%s' */ 7117 Perl_croak_caller("Too %s arguments for subroutine '%" SVf "'", 7118 too_few ? "few" : "many", S_find_runcv_name()); 7119 7120 if (UNLIKELY(slurpy == '%' && argc > params && (argc - params) % 2)) 7121 /* diag_listed_as: Odd name/value argument for subroutine '%s' */ 7122 Perl_croak_caller("Odd name/value argument for subroutine '%" SVf "'", 7123 S_find_runcv_name()); 7124 7125 return NORMAL; 7126 } 7127 7128 PP(pp_isa) 7129 { 7130 dSP; 7131 SV *left, *right; 7132 7133 right = POPs; 7134 left = TOPs; 7135 7136 SETs(boolSV(sv_isa_sv(left, right))); 7137 RETURN; 7138 } 7139 7140 PP(pp_cmpchain_and) 7141 { 7142 dSP; 7143 SV *result = POPs; 7144 PUTBACK; 7145 if (SvTRUE_NN(result)) { 7146 return cLOGOP->op_other; 7147 } else { 7148 TOPs = result; 7149 return NORMAL; 7150 } 7151 } 7152 7153 PP(pp_cmpchain_dup) 7154 { 7155 dSP; 7156 SV *right = TOPs; 7157 SV *left = TOPm1s; 7158 TOPm1s = right; 7159 TOPs = left; 7160 XPUSHs(right); 7161 RETURN; 7162 } 7163 7164 /* 7165 * ex: set ts=8 sts=4 sw=4 et: 7166 */ 7167