1 /* pp.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * 'It's a big house this, and very peculiar. Always a bit more 13 * to discover, and no knowing what you'll find round a corner. 14 * And Elves, sir!' --Samwise Gamgee 15 * 16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"] 17 */ 18 19 /* This file contains general pp ("push/pop") functions that execute the 20 * opcodes that make up a perl program. A typical pp function expects to 21 * find its arguments on the stack, and usually pushes its results onto 22 * the stack, hence the 'pp' terminology. Each OP structure contains 23 * a pointer to the relevant pp_foo() function. 24 */ 25 26 #include "EXTERN.h" 27 #define PERL_IN_PP_C 28 #include "perl.h" 29 #include "keywords.h" 30 31 #include "reentr.h" 32 #include "regcharclass.h" 33 34 /* XXX I can't imagine anyone who doesn't have this actually _needs_ 35 it, since pid_t is an integral type. 36 --AD 2/20/1998 37 */ 38 #ifdef NEED_GETPID_PROTO 39 extern Pid_t getpid (void); 40 #endif 41 42 /* 43 * Some BSDs and Cygwin default to POSIX math instead of IEEE. 44 * This switches them over to IEEE. 45 */ 46 #if defined(LIBM_LIB_VERSION) 47 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_; 48 #endif 49 50 /* variations on pp_null */ 51 52 PP(pp_stub) 53 { 54 dVAR; 55 dSP; 56 if (GIMME_V == G_SCALAR) 57 XPUSHs(&PL_sv_undef); 58 RETURN; 59 } 60 61 /* Pushy stuff. */ 62 63 PP(pp_padav) 64 { 65 dVAR; dSP; dTARGET; 66 I32 gimme; 67 assert(SvTYPE(TARG) == SVt_PVAV); 68 if (PL_op->op_private & OPpLVAL_INTRO) 69 if (!(PL_op->op_private & OPpPAD_STATE)) 70 SAVECLEARSV(PAD_SVl(PL_op->op_targ)); 71 EXTEND(SP, 1); 72 if (PL_op->op_flags & OPf_REF) { 73 PUSHs(TARG); 74 RETURN; 75 } else if (PL_op->op_private & OPpMAYBE_LVSUB) { 76 const I32 flags = is_lvalue_sub(); 77 if (flags && !(flags & OPpENTERSUB_INARGS)) { 78 if (GIMME == G_SCALAR) 79 /* diag_listed_as: Can't return %s to lvalue scalar context */ 80 Perl_croak(aTHX_ "Can't return array to lvalue scalar context"); 81 PUSHs(TARG); 82 RETURN; 83 } 84 } 85 gimme = GIMME_V; 86 if (gimme == G_ARRAY) { 87 /* XXX see also S_pushav in pp_hot.c */ 88 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1; 89 EXTEND(SP, maxarg); 90 if (SvMAGICAL(TARG)) { 91 U32 i; 92 for (i=0; i < (U32)maxarg; i++) { 93 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE); 94 SP[i+1] = (svp) ? *svp : &PL_sv_undef; 95 } 96 } 97 else { 98 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*); 99 } 100 SP += maxarg; 101 } 102 else if (gimme == G_SCALAR) { 103 SV* const sv = sv_newmortal(); 104 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1; 105 sv_setiv(sv, maxarg); 106 PUSHs(sv); 107 } 108 RETURN; 109 } 110 111 PP(pp_padhv) 112 { 113 dVAR; dSP; dTARGET; 114 I32 gimme; 115 116 assert(SvTYPE(TARG) == SVt_PVHV); 117 XPUSHs(TARG); 118 if (PL_op->op_private & OPpLVAL_INTRO) 119 if (!(PL_op->op_private & OPpPAD_STATE)) 120 SAVECLEARSV(PAD_SVl(PL_op->op_targ)); 121 if (PL_op->op_flags & OPf_REF) 122 RETURN; 123 else if (PL_op->op_private & OPpMAYBE_LVSUB) { 124 const I32 flags = is_lvalue_sub(); 125 if (flags && !(flags & OPpENTERSUB_INARGS)) { 126 if (GIMME == G_SCALAR) 127 /* diag_listed_as: Can't return %s to lvalue scalar context */ 128 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context"); 129 RETURN; 130 } 131 } 132 gimme = GIMME_V; 133 if (gimme == G_ARRAY) { 134 RETURNOP(Perl_do_kv(aTHX)); 135 } 136 else if ((PL_op->op_private & OPpTRUEBOOL 137 || ( PL_op->op_private & OPpMAYBE_TRUEBOOL 138 && block_gimme() == G_VOID )) 139 && (!SvRMAGICAL(TARG) || !mg_find(TARG, PERL_MAGIC_tied))) 140 SETs(HvUSEDKEYS(TARG) ? &PL_sv_yes : sv_2mortal(newSViv(0))); 141 else if (gimme == G_SCALAR) { 142 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG)); 143 SETs(sv); 144 } 145 RETURN; 146 } 147 148 PP(pp_padcv) 149 { 150 dVAR; dSP; dTARGET; 151 assert(SvTYPE(TARG) == SVt_PVCV); 152 XPUSHs(TARG); 153 RETURN; 154 } 155 156 PP(pp_introcv) 157 { 158 dVAR; dTARGET; 159 SvPADSTALE_off(TARG); 160 return NORMAL; 161 } 162 163 PP(pp_clonecv) 164 { 165 dVAR; dTARGET; 166 MAGIC * const mg = 167 mg_find(PadlistNAMESARRAY(CvPADLIST(find_runcv(NULL)))[ARGTARG], 168 PERL_MAGIC_proto); 169 assert(SvTYPE(TARG) == SVt_PVCV); 170 assert(mg); 171 assert(mg->mg_obj); 172 if (CvISXSUB(mg->mg_obj)) { /* constant */ 173 /* XXX Should we clone it here? */ 174 /* If this changes to use SAVECLEARSV, we can move the SAVECLEARSV 175 to introcv and remove the SvPADSTALE_off. */ 176 SAVEPADSVANDMORTALIZE(ARGTARG); 177 PAD_SVl(ARGTARG) = SvREFCNT_inc_simple_NN(mg->mg_obj); 178 } 179 else { 180 if (CvROOT(mg->mg_obj)) { 181 assert(CvCLONE(mg->mg_obj)); 182 assert(!CvCLONED(mg->mg_obj)); 183 } 184 cv_clone_into((CV *)mg->mg_obj,(CV *)TARG); 185 SAVECLEARSV(PAD_SVl(ARGTARG)); 186 } 187 return NORMAL; 188 } 189 190 /* Translations. */ 191 192 static const char S_no_symref_sv[] = 193 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use"; 194 195 /* In some cases this function inspects PL_op. If this function is called 196 for new op types, more bool parameters may need to be added in place of 197 the checks. 198 199 When noinit is true, the absence of a gv will cause a retval of undef. 200 This is unrelated to the cv-to-gv assignment case. 201 */ 202 203 static SV * 204 S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict, 205 const bool noinit) 206 { 207 dVAR; 208 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv); 209 if (SvROK(sv)) { 210 if (SvAMAGIC(sv)) { 211 sv = amagic_deref_call(sv, to_gv_amg); 212 } 213 wasref: 214 sv = SvRV(sv); 215 if (SvTYPE(sv) == SVt_PVIO) { 216 GV * const gv = MUTABLE_GV(sv_newmortal()); 217 gv_init(gv, 0, "__ANONIO__", 10, 0); 218 GvIOp(gv) = MUTABLE_IO(sv); 219 SvREFCNT_inc_void_NN(sv); 220 sv = MUTABLE_SV(gv); 221 } 222 else if (!isGV_with_GP(sv)) 223 return (SV *)Perl_die(aTHX_ "Not a GLOB reference"); 224 } 225 else { 226 if (!isGV_with_GP(sv)) { 227 if (!SvOK(sv)) { 228 /* If this is a 'my' scalar and flag is set then vivify 229 * NI-S 1999/05/07 230 */ 231 if (vivify_sv && sv != &PL_sv_undef) { 232 GV *gv; 233 if (SvREADONLY(sv)) 234 Perl_croak_no_modify(); 235 if (cUNOP->op_targ) { 236 SV * const namesv = PAD_SV(cUNOP->op_targ); 237 gv = MUTABLE_GV(newSV(0)); 238 gv_init_sv(gv, CopSTASH(PL_curcop), namesv, 0); 239 } 240 else { 241 const char * const name = CopSTASHPV(PL_curcop); 242 gv = newGVgen_flags(name, 243 HvNAMEUTF8(CopSTASH(PL_curcop)) ? SVf_UTF8 : 0 ); 244 } 245 prepare_SV_for_RV(sv); 246 SvRV_set(sv, MUTABLE_SV(gv)); 247 SvROK_on(sv); 248 SvSETMAGIC(sv); 249 goto wasref; 250 } 251 if (PL_op->op_flags & OPf_REF || strict) 252 return (SV *)Perl_die(aTHX_ PL_no_usym, "a symbol"); 253 if (ckWARN(WARN_UNINITIALIZED)) 254 report_uninit(sv); 255 return &PL_sv_undef; 256 } 257 if (noinit) 258 { 259 if (!(sv = MUTABLE_SV(gv_fetchsv_nomg( 260 sv, GV_ADDMG, SVt_PVGV 261 )))) 262 return &PL_sv_undef; 263 } 264 else { 265 if (strict) 266 return 267 (SV *)Perl_die(aTHX_ 268 S_no_symref_sv, 269 sv, 270 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""), 271 "a symbol" 272 ); 273 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV)) 274 == OPpDONT_INIT_GV) { 275 /* We are the target of a coderef assignment. Return 276 the scalar unchanged, and let pp_sasssign deal with 277 things. */ 278 return sv; 279 } 280 sv = MUTABLE_SV(gv_fetchsv_nomg(sv, GV_ADD, SVt_PVGV)); 281 } 282 /* FAKE globs in the symbol table cause weird bugs (#77810) */ 283 SvFAKE_off(sv); 284 } 285 } 286 if (SvFAKE(sv) && !(PL_op->op_private & OPpALLOW_FAKE)) { 287 SV *newsv = sv_newmortal(); 288 sv_setsv_flags(newsv, sv, 0); 289 SvFAKE_off(newsv); 290 sv = newsv; 291 } 292 return sv; 293 } 294 295 PP(pp_rv2gv) 296 { 297 dVAR; dSP; dTOPss; 298 299 sv = S_rv2gv(aTHX_ 300 sv, PL_op->op_private & OPpDEREF, 301 PL_op->op_private & HINT_STRICT_REFS, 302 ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD)) 303 || PL_op->op_type == OP_READLINE 304 ); 305 if (PL_op->op_private & OPpLVAL_INTRO) 306 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL)); 307 SETs(sv); 308 RETURN; 309 } 310 311 /* Helper function for pp_rv2sv and pp_rv2av */ 312 GV * 313 Perl_softref2xv(pTHX_ SV *const sv, const char *const what, 314 const svtype type, SV ***spp) 315 { 316 dVAR; 317 GV *gv; 318 319 PERL_ARGS_ASSERT_SOFTREF2XV; 320 321 if (PL_op->op_private & HINT_STRICT_REFS) { 322 if (SvOK(sv)) 323 Perl_die(aTHX_ S_no_symref_sv, sv, 324 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""), what); 325 else 326 Perl_die(aTHX_ PL_no_usym, what); 327 } 328 if (!SvOK(sv)) { 329 if ( 330 PL_op->op_flags & OPf_REF 331 ) 332 Perl_die(aTHX_ PL_no_usym, what); 333 if (ckWARN(WARN_UNINITIALIZED)) 334 report_uninit(sv); 335 if (type != SVt_PV && GIMME_V == G_ARRAY) { 336 (*spp)--; 337 return NULL; 338 } 339 **spp = &PL_sv_undef; 340 return NULL; 341 } 342 if ((PL_op->op_flags & OPf_SPECIAL) && 343 !(PL_op->op_flags & OPf_MOD)) 344 { 345 if (!(gv = gv_fetchsv_nomg(sv, GV_ADDMG, type))) 346 { 347 **spp = &PL_sv_undef; 348 return NULL; 349 } 350 } 351 else { 352 gv = gv_fetchsv_nomg(sv, GV_ADD, type); 353 } 354 return gv; 355 } 356 357 PP(pp_rv2sv) 358 { 359 dVAR; dSP; dTOPss; 360 GV *gv = NULL; 361 362 SvGETMAGIC(sv); 363 if (SvROK(sv)) { 364 if (SvAMAGIC(sv)) { 365 sv = amagic_deref_call(sv, to_sv_amg); 366 } 367 368 sv = SvRV(sv); 369 switch (SvTYPE(sv)) { 370 case SVt_PVAV: 371 case SVt_PVHV: 372 case SVt_PVCV: 373 case SVt_PVFM: 374 case SVt_PVIO: 375 DIE(aTHX_ "Not a SCALAR reference"); 376 default: NOOP; 377 } 378 } 379 else { 380 gv = MUTABLE_GV(sv); 381 382 if (!isGV_with_GP(gv)) { 383 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp); 384 if (!gv) 385 RETURN; 386 } 387 sv = GvSVn(gv); 388 } 389 if (PL_op->op_flags & OPf_MOD) { 390 if (PL_op->op_private & OPpLVAL_INTRO) { 391 if (cUNOP->op_first->op_type == OP_NULL) 392 sv = save_scalar(MUTABLE_GV(TOPs)); 393 else if (gv) 394 sv = save_scalar(gv); 395 else 396 Perl_croak(aTHX_ "%s", PL_no_localize_ref); 397 } 398 else if (PL_op->op_private & OPpDEREF) 399 sv = vivify_ref(sv, PL_op->op_private & OPpDEREF); 400 } 401 SETs(sv); 402 RETURN; 403 } 404 405 PP(pp_av2arylen) 406 { 407 dVAR; dSP; 408 AV * const av = MUTABLE_AV(TOPs); 409 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; 410 if (lvalue) { 411 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av)); 412 if (!*sv) { 413 *sv = newSV_type(SVt_PVMG); 414 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0); 415 } 416 SETs(*sv); 417 } else { 418 SETs(sv_2mortal(newSViv(AvFILL(MUTABLE_AV(av))))); 419 } 420 RETURN; 421 } 422 423 PP(pp_pos) 424 { 425 dVAR; dSP; dPOPss; 426 427 if (PL_op->op_flags & OPf_MOD || LVRET) { 428 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV));/* Not TARG RT#67838 */ 429 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0); 430 LvTYPE(ret) = '.'; 431 LvTARG(ret) = SvREFCNT_inc_simple(sv); 432 PUSHs(ret); /* no SvSETMAGIC */ 433 RETURN; 434 } 435 else { 436 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { 437 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global); 438 if (mg && mg->mg_len >= 0) { 439 dTARGET; 440 I32 i = mg->mg_len; 441 if (DO_UTF8(sv)) 442 sv_pos_b2u(sv, &i); 443 PUSHi(i); 444 RETURN; 445 } 446 } 447 RETPUSHUNDEF; 448 } 449 } 450 451 PP(pp_rv2cv) 452 { 453 dVAR; dSP; 454 GV *gv; 455 HV *stash_unused; 456 const I32 flags = (PL_op->op_flags & OPf_SPECIAL) 457 ? GV_ADDMG 458 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) 459 == OPpMAY_RETURN_CONSTANT) 460 ? GV_ADD|GV_NOEXPAND 461 : GV_ADD; 462 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */ 463 /* (But not in defined().) */ 464 465 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags); 466 if (cv) NOOP; 467 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) { 468 cv = MUTABLE_CV(gv); 469 } 470 else 471 cv = MUTABLE_CV(&PL_sv_undef); 472 SETs(MUTABLE_SV(cv)); 473 RETURN; 474 } 475 476 PP(pp_prototype) 477 { 478 dVAR; dSP; 479 CV *cv; 480 HV *stash; 481 GV *gv; 482 SV *ret = &PL_sv_undef; 483 484 if (SvGMAGICAL(TOPs)) SETs(sv_mortalcopy(TOPs)); 485 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) { 486 const char * s = SvPVX_const(TOPs); 487 if (strnEQ(s, "CORE::", 6)) { 488 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1); 489 if (!code || code == -KEY_CORE) 490 DIE(aTHX_ "Can't find an opnumber for \"%"SVf"\"", 491 SVfARG(newSVpvn_flags( 492 s+6, SvCUR(TOPs)-6, 493 (SvFLAGS(TOPs) & SVf_UTF8)|SVs_TEMP 494 ))); 495 { 496 SV * const sv = core_prototype(NULL, s + 6, code, NULL); 497 if (sv) ret = sv; 498 } 499 goto set; 500 } 501 } 502 cv = sv_2cv(TOPs, &stash, &gv, 0); 503 if (cv && SvPOK(cv)) 504 ret = newSVpvn_flags( 505 CvPROTO(cv), CvPROTOLEN(cv), SVs_TEMP | SvUTF8(cv) 506 ); 507 set: 508 SETs(ret); 509 RETURN; 510 } 511 512 PP(pp_anoncode) 513 { 514 dVAR; dSP; 515 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ)); 516 if (CvCLONE(cv)) 517 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv)))); 518 EXTEND(SP,1); 519 PUSHs(MUTABLE_SV(cv)); 520 RETURN; 521 } 522 523 PP(pp_srefgen) 524 { 525 dVAR; dSP; 526 *SP = refto(*SP); 527 RETURN; 528 } 529 530 PP(pp_refgen) 531 { 532 dVAR; dSP; dMARK; 533 if (GIMME != G_ARRAY) { 534 if (++MARK <= SP) 535 *MARK = *SP; 536 else 537 *MARK = &PL_sv_undef; 538 *MARK = refto(*MARK); 539 SP = MARK; 540 RETURN; 541 } 542 EXTEND_MORTAL(SP - MARK); 543 while (++MARK <= SP) 544 *MARK = refto(*MARK); 545 RETURN; 546 } 547 548 STATIC SV* 549 S_refto(pTHX_ SV *sv) 550 { 551 dVAR; 552 SV* rv; 553 554 PERL_ARGS_ASSERT_REFTO; 555 556 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') { 557 if (LvTARGLEN(sv)) 558 vivify_defelem(sv); 559 if (!(sv = LvTARG(sv))) 560 sv = &PL_sv_undef; 561 else 562 SvREFCNT_inc_void_NN(sv); 563 } 564 else if (SvTYPE(sv) == SVt_PVAV) { 565 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv)) 566 av_reify(MUTABLE_AV(sv)); 567 SvTEMP_off(sv); 568 SvREFCNT_inc_void_NN(sv); 569 } 570 else if (SvPADTMP(sv) && !IS_PADGV(sv)) 571 sv = newSVsv(sv); 572 else { 573 SvTEMP_off(sv); 574 SvREFCNT_inc_void_NN(sv); 575 } 576 rv = sv_newmortal(); 577 sv_upgrade(rv, SVt_IV); 578 SvRV_set(rv, sv); 579 SvROK_on(rv); 580 return rv; 581 } 582 583 PP(pp_ref) 584 { 585 dVAR; dSP; dTARGET; 586 SV * const sv = POPs; 587 588 if (sv) 589 SvGETMAGIC(sv); 590 591 if (!sv || !SvROK(sv)) 592 RETPUSHNO; 593 594 (void)sv_ref(TARG,SvRV(sv),TRUE); 595 PUSHTARG; 596 RETURN; 597 } 598 599 PP(pp_bless) 600 { 601 dVAR; dSP; 602 HV *stash; 603 604 if (MAXARG == 1) 605 curstash: 606 stash = CopSTASH(PL_curcop); 607 else { 608 SV * const ssv = POPs; 609 STRLEN len; 610 const char *ptr; 611 612 if (!ssv) goto curstash; 613 if (!SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv)) 614 Perl_croak(aTHX_ "Attempt to bless into a reference"); 615 ptr = SvPV_const(ssv,len); 616 if (len == 0) 617 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 618 "Explicit blessing to '' (assuming package main)"); 619 stash = gv_stashpvn(ptr, len, GV_ADD|SvUTF8(ssv)); 620 } 621 622 (void)sv_bless(TOPs, stash); 623 RETURN; 624 } 625 626 PP(pp_gelem) 627 { 628 dVAR; dSP; 629 630 SV *sv = POPs; 631 STRLEN len; 632 const char * const elem = SvPV_const(sv, len); 633 GV * const gv = MUTABLE_GV(POPs); 634 SV * tmpRef = NULL; 635 636 sv = NULL; 637 if (elem) { 638 /* elem will always be NUL terminated. */ 639 const char * const second_letter = elem + 1; 640 switch (*elem) { 641 case 'A': 642 if (len == 5 && strEQ(second_letter, "RRAY")) 643 { 644 tmpRef = MUTABLE_SV(GvAV(gv)); 645 if (tmpRef && !AvREAL((const AV *)tmpRef) 646 && AvREIFY((const AV *)tmpRef)) 647 av_reify(MUTABLE_AV(tmpRef)); 648 } 649 break; 650 case 'C': 651 if (len == 4 && strEQ(second_letter, "ODE")) 652 tmpRef = MUTABLE_SV(GvCVu(gv)); 653 break; 654 case 'F': 655 if (len == 10 && strEQ(second_letter, "ILEHANDLE")) { 656 /* finally deprecated in 5.8.0 */ 657 deprecate("*glob{FILEHANDLE}"); 658 tmpRef = MUTABLE_SV(GvIOp(gv)); 659 } 660 else 661 if (len == 6 && strEQ(second_letter, "ORMAT")) 662 tmpRef = MUTABLE_SV(GvFORM(gv)); 663 break; 664 case 'G': 665 if (len == 4 && strEQ(second_letter, "LOB")) 666 tmpRef = MUTABLE_SV(gv); 667 break; 668 case 'H': 669 if (len == 4 && strEQ(second_letter, "ASH")) 670 tmpRef = MUTABLE_SV(GvHV(gv)); 671 break; 672 case 'I': 673 if (*second_letter == 'O' && !elem[2] && len == 2) 674 tmpRef = MUTABLE_SV(GvIOp(gv)); 675 break; 676 case 'N': 677 if (len == 4 && strEQ(second_letter, "AME")) 678 sv = newSVhek(GvNAME_HEK(gv)); 679 break; 680 case 'P': 681 if (len == 7 && strEQ(second_letter, "ACKAGE")) { 682 const HV * const stash = GvSTASH(gv); 683 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL; 684 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__"); 685 } 686 break; 687 case 'S': 688 if (len == 6 && strEQ(second_letter, "CALAR")) 689 tmpRef = GvSVn(gv); 690 break; 691 } 692 } 693 if (tmpRef) 694 sv = newRV(tmpRef); 695 if (sv) 696 sv_2mortal(sv); 697 else 698 sv = &PL_sv_undef; 699 XPUSHs(sv); 700 RETURN; 701 } 702 703 /* Pattern matching */ 704 705 PP(pp_study) 706 { 707 dVAR; dSP; dPOPss; 708 STRLEN len; 709 710 (void)SvPV(sv, len); 711 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) { 712 /* Historically, study was skipped in these cases. */ 713 RETPUSHNO; 714 } 715 716 /* Make study a no-op. It's no longer useful and its existence 717 complicates matters elsewhere. */ 718 RETPUSHYES; 719 } 720 721 PP(pp_trans) 722 { 723 dVAR; dSP; dTARG; 724 SV *sv; 725 726 if (PL_op->op_flags & OPf_STACKED) 727 sv = POPs; 728 else if (PL_op->op_private & OPpTARGET_MY) 729 sv = GETTARGET; 730 else { 731 sv = DEFSV; 732 EXTEND(SP,1); 733 } 734 if(PL_op->op_type == OP_TRANSR) { 735 STRLEN len; 736 const char * const pv = SvPV(sv,len); 737 SV * const newsv = newSVpvn_flags(pv, len, SVs_TEMP|SvUTF8(sv)); 738 do_trans(newsv); 739 PUSHs(newsv); 740 } 741 else { 742 TARG = sv_newmortal(); 743 PUSHi(do_trans(sv)); 744 } 745 RETURN; 746 } 747 748 /* Lvalue operators. */ 749 750 static void 751 S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping) 752 { 753 dVAR; 754 STRLEN len; 755 char *s; 756 757 PERL_ARGS_ASSERT_DO_CHOMP; 758 759 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs))) 760 return; 761 if (SvTYPE(sv) == SVt_PVAV) { 762 I32 i; 763 AV *const av = MUTABLE_AV(sv); 764 const I32 max = AvFILL(av); 765 766 for (i = 0; i <= max; i++) { 767 sv = MUTABLE_SV(av_fetch(av, i, FALSE)); 768 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef)) 769 do_chomp(retval, sv, chomping); 770 } 771 return; 772 } 773 else if (SvTYPE(sv) == SVt_PVHV) { 774 HV* const hv = MUTABLE_HV(sv); 775 HE* entry; 776 (void)hv_iterinit(hv); 777 while ((entry = hv_iternext(hv))) 778 do_chomp(retval, hv_iterval(hv,entry), chomping); 779 return; 780 } 781 else if (SvREADONLY(sv)) { 782 Perl_croak_no_modify(); 783 } 784 else if (SvIsCOW(sv)) { 785 sv_force_normal_flags(sv, 0); 786 } 787 788 if (PL_encoding) { 789 if (!SvUTF8(sv)) { 790 /* XXX, here sv is utf8-ized as a side-effect! 791 If encoding.pm is used properly, almost string-generating 792 operations, including literal strings, chr(), input data, etc. 793 should have been utf8-ized already, right? 794 */ 795 sv_recode_to_utf8(sv, PL_encoding); 796 } 797 } 798 799 s = SvPV(sv, len); 800 if (chomping) { 801 char *temp_buffer = NULL; 802 SV *svrecode = NULL; 803 804 if (s && len) { 805 s += --len; 806 if (RsPARA(PL_rs)) { 807 if (*s != '\n') 808 goto nope; 809 ++SvIVX(retval); 810 while (len && s[-1] == '\n') { 811 --len; 812 --s; 813 ++SvIVX(retval); 814 } 815 } 816 else { 817 STRLEN rslen, rs_charlen; 818 const char *rsptr = SvPV_const(PL_rs, rslen); 819 820 rs_charlen = SvUTF8(PL_rs) 821 ? sv_len_utf8(PL_rs) 822 : rslen; 823 824 if (SvUTF8(PL_rs) != SvUTF8(sv)) { 825 /* Assumption is that rs is shorter than the scalar. */ 826 if (SvUTF8(PL_rs)) { 827 /* RS is utf8, scalar is 8 bit. */ 828 bool is_utf8 = TRUE; 829 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr, 830 &rslen, &is_utf8); 831 if (is_utf8) { 832 /* Cannot downgrade, therefore cannot possibly match 833 */ 834 assert (temp_buffer == rsptr); 835 temp_buffer = NULL; 836 goto nope; 837 } 838 rsptr = temp_buffer; 839 } 840 else if (PL_encoding) { 841 /* RS is 8 bit, encoding.pm is used. 842 * Do not recode PL_rs as a side-effect. */ 843 svrecode = newSVpvn(rsptr, rslen); 844 sv_recode_to_utf8(svrecode, PL_encoding); 845 rsptr = SvPV_const(svrecode, rslen); 846 rs_charlen = sv_len_utf8(svrecode); 847 } 848 else { 849 /* RS is 8 bit, scalar is utf8. */ 850 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen); 851 rsptr = temp_buffer; 852 } 853 } 854 if (rslen == 1) { 855 if (*s != *rsptr) 856 goto nope; 857 ++SvIVX(retval); 858 } 859 else { 860 if (len < rslen - 1) 861 goto nope; 862 len -= rslen - 1; 863 s -= rslen - 1; 864 if (memNE(s, rsptr, rslen)) 865 goto nope; 866 SvIVX(retval) += rs_charlen; 867 } 868 } 869 s = SvPV_force_nomg_nolen(sv); 870 SvCUR_set(sv, len); 871 *SvEND(sv) = '\0'; 872 SvNIOK_off(sv); 873 SvSETMAGIC(sv); 874 } 875 nope: 876 877 SvREFCNT_dec(svrecode); 878 879 Safefree(temp_buffer); 880 } else { 881 if (len && !SvPOK(sv)) 882 s = SvPV_force_nomg(sv, len); 883 if (DO_UTF8(sv)) { 884 if (s && len) { 885 char * const send = s + len; 886 char * const start = s; 887 s = send - 1; 888 while (s > start && UTF8_IS_CONTINUATION(*s)) 889 s--; 890 if (is_utf8_string((U8*)s, send - s)) { 891 sv_setpvn(retval, s, send - s); 892 *s = '\0'; 893 SvCUR_set(sv, s - start); 894 SvNIOK_off(sv); 895 SvUTF8_on(retval); 896 } 897 } 898 else 899 sv_setpvs(retval, ""); 900 } 901 else if (s && len) { 902 s += --len; 903 sv_setpvn(retval, s, 1); 904 *s = '\0'; 905 SvCUR_set(sv, len); 906 SvUTF8_off(sv); 907 SvNIOK_off(sv); 908 } 909 else 910 sv_setpvs(retval, ""); 911 SvSETMAGIC(sv); 912 } 913 } 914 915 PP(pp_schop) 916 { 917 dVAR; dSP; dTARGET; 918 const bool chomping = PL_op->op_type == OP_SCHOMP; 919 920 if (chomping) 921 sv_setiv(TARG, 0); 922 do_chomp(TARG, TOPs, chomping); 923 SETTARG; 924 RETURN; 925 } 926 927 PP(pp_chop) 928 { 929 dVAR; dSP; dMARK; dTARGET; dORIGMARK; 930 const bool chomping = PL_op->op_type == OP_CHOMP; 931 932 if (chomping) 933 sv_setiv(TARG, 0); 934 while (MARK < SP) 935 do_chomp(TARG, *++MARK, chomping); 936 SP = ORIGMARK; 937 XPUSHTARG; 938 RETURN; 939 } 940 941 PP(pp_undef) 942 { 943 dVAR; dSP; 944 SV *sv; 945 946 if (!PL_op->op_private) { 947 EXTEND(SP, 1); 948 RETPUSHUNDEF; 949 } 950 951 sv = POPs; 952 if (!sv) 953 RETPUSHUNDEF; 954 955 SV_CHECK_THINKFIRST_COW_DROP(sv); 956 957 switch (SvTYPE(sv)) { 958 case SVt_NULL: 959 break; 960 case SVt_PVAV: 961 av_undef(MUTABLE_AV(sv)); 962 break; 963 case SVt_PVHV: 964 hv_undef(MUTABLE_HV(sv)); 965 break; 966 case SVt_PVCV: 967 if (cv_const_sv((const CV *)sv)) 968 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), 969 "Constant subroutine %"SVf" undefined", 970 SVfARG(CvANON((const CV *)sv) 971 ? newSVpvs_flags("(anonymous)", SVs_TEMP) 972 : sv_2mortal(newSVhek( 973 CvNAMED(sv) 974 ? CvNAME_HEK((CV *)sv) 975 : GvENAME_HEK(CvGV((const CV *)sv)) 976 )) 977 )); 978 /* FALLTHROUGH */ 979 case SVt_PVFM: 980 { 981 /* let user-undef'd sub keep its identity */ 982 GV* const gv = CvGV((const CV *)sv); 983 HEK * const hek = CvNAME_HEK((CV *)sv); 984 if (hek) share_hek_hek(hek); 985 cv_undef(MUTABLE_CV(sv)); 986 if (gv) CvGV_set(MUTABLE_CV(sv), gv); 987 else if (hek) { 988 SvANY((CV *)sv)->xcv_gv_u.xcv_hek = hek; 989 CvNAMED_on(sv); 990 } 991 } 992 break; 993 case SVt_PVGV: 994 assert(isGV_with_GP(sv)); 995 assert(!SvFAKE(sv)); 996 { 997 GP *gp; 998 HV *stash; 999 1000 /* undef *Pkg::meth_name ... */ 1001 bool method_changed 1002 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv)) 1003 && HvENAME_get(stash); 1004 /* undef *Foo:: */ 1005 if((stash = GvHV((const GV *)sv))) { 1006 if(HvENAME_get(stash)) 1007 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash)); 1008 else stash = NULL; 1009 } 1010 1011 gp_free(MUTABLE_GV(sv)); 1012 Newxz(gp, 1, GP); 1013 GvGP_set(sv, gp_ref(gp)); 1014 GvSV(sv) = newSV(0); 1015 GvLINE(sv) = CopLINE(PL_curcop); 1016 GvEGV(sv) = MUTABLE_GV(sv); 1017 GvMULTI_on(sv); 1018 1019 if(stash) 1020 mro_package_moved(NULL, stash, (const GV *)sv, 0); 1021 stash = NULL; 1022 /* undef *Foo::ISA */ 1023 if( strEQ(GvNAME((const GV *)sv), "ISA") 1024 && (stash = GvSTASH((const GV *)sv)) 1025 && (method_changed || HvENAME(stash)) ) 1026 mro_isa_changed_in(stash); 1027 else if(method_changed) 1028 mro_method_changed_in( 1029 GvSTASH((const GV *)sv) 1030 ); 1031 1032 break; 1033 } 1034 default: 1035 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) { 1036 SvPV_free(sv); 1037 SvPV_set(sv, NULL); 1038 SvLEN_set(sv, 0); 1039 } 1040 SvOK_off(sv); 1041 SvSETMAGIC(sv); 1042 } 1043 1044 RETPUSHUNDEF; 1045 } 1046 1047 PP(pp_postinc) 1048 { 1049 dVAR; dSP; dTARGET; 1050 const bool inc = 1051 PL_op->op_type == OP_POSTINC || PL_op->op_type == OP_I_POSTINC; 1052 if (SvTYPE(TOPs) >= SVt_PVAV || (isGV_with_GP(TOPs) && !SvFAKE(TOPs))) 1053 Perl_croak_no_modify(); 1054 if (SvROK(TOPs)) 1055 TARG = sv_newmortal(); 1056 sv_setsv(TARG, TOPs); 1057 if (!SvREADONLY(TOPs) && !SvGMAGICAL(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs) 1058 && SvIVX(TOPs) != (inc ? IV_MAX : IV_MIN)) 1059 { 1060 SvIV_set(TOPs, SvIVX(TOPs) + (inc ? 1 : -1)); 1061 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK); 1062 } 1063 else if (inc) 1064 sv_inc_nomg(TOPs); 1065 else sv_dec_nomg(TOPs); 1066 SvSETMAGIC(TOPs); 1067 /* special case for undef: see thread at 2003-03/msg00536.html in archive */ 1068 if (inc && !SvOK(TARG)) 1069 sv_setiv(TARG, 0); 1070 SETs(TARG); 1071 return NORMAL; 1072 } 1073 1074 /* Ordinary operators. */ 1075 1076 PP(pp_pow) 1077 { 1078 dVAR; dSP; dATARGET; SV *svl, *svr; 1079 #ifdef PERL_PRESERVE_IVUV 1080 bool is_int = 0; 1081 #endif 1082 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric); 1083 svr = TOPs; 1084 svl = TOPm1s; 1085 #ifdef PERL_PRESERVE_IVUV 1086 /* For integer to integer power, we do the calculation by hand wherever 1087 we're sure it is safe; otherwise we call pow() and try to convert to 1088 integer afterwards. */ 1089 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) { 1090 UV power; 1091 bool baseuok; 1092 UV baseuv; 1093 1094 if (SvUOK(svr)) { 1095 power = SvUVX(svr); 1096 } else { 1097 const IV iv = SvIVX(svr); 1098 if (iv >= 0) { 1099 power = iv; 1100 } else { 1101 goto float_it; /* Can't do negative powers this way. */ 1102 } 1103 } 1104 1105 baseuok = SvUOK(svl); 1106 if (baseuok) { 1107 baseuv = SvUVX(svl); 1108 } else { 1109 const IV iv = SvIVX(svl); 1110 if (iv >= 0) { 1111 baseuv = iv; 1112 baseuok = TRUE; /* effectively it's a UV now */ 1113 } else { 1114 baseuv = -iv; /* abs, baseuok == false records sign */ 1115 } 1116 } 1117 /* now we have integer ** positive integer. */ 1118 is_int = 1; 1119 1120 /* foo & (foo - 1) is zero only for a power of 2. */ 1121 if (!(baseuv & (baseuv - 1))) { 1122 /* We are raising power-of-2 to a positive integer. 1123 The logic here will work for any base (even non-integer 1124 bases) but it can be less accurate than 1125 pow (base,power) or exp (power * log (base)) when the 1126 intermediate values start to spill out of the mantissa. 1127 With powers of 2 we know this can't happen. 1128 And powers of 2 are the favourite thing for perl 1129 programmers to notice ** not doing what they mean. */ 1130 NV result = 1.0; 1131 NV base = baseuok ? baseuv : -(NV)baseuv; 1132 1133 if (power & 1) { 1134 result *= base; 1135 } 1136 while (power >>= 1) { 1137 base *= base; 1138 if (power & 1) { 1139 result *= base; 1140 } 1141 } 1142 SP--; 1143 SETn( result ); 1144 SvIV_please_nomg(svr); 1145 RETURN; 1146 } else { 1147 unsigned int highbit = 8 * sizeof(UV); 1148 unsigned int diff = 8 * sizeof(UV); 1149 while (diff >>= 1) { 1150 highbit -= diff; 1151 if (baseuv >> highbit) { 1152 highbit += diff; 1153 } 1154 } 1155 /* we now have baseuv < 2 ** highbit */ 1156 if (power * highbit <= 8 * sizeof(UV)) { 1157 /* result will definitely fit in UV, so use UV math 1158 on same algorithm as above */ 1159 UV result = 1; 1160 UV base = baseuv; 1161 const bool odd_power = cBOOL(power & 1); 1162 if (odd_power) { 1163 result *= base; 1164 } 1165 while (power >>= 1) { 1166 base *= base; 1167 if (power & 1) { 1168 result *= base; 1169 } 1170 } 1171 SP--; 1172 if (baseuok || !odd_power) 1173 /* answer is positive */ 1174 SETu( result ); 1175 else if (result <= (UV)IV_MAX) 1176 /* answer negative, fits in IV */ 1177 SETi( -(IV)result ); 1178 else if (result == (UV)IV_MIN) 1179 /* 2's complement assumption: special case IV_MIN */ 1180 SETi( IV_MIN ); 1181 else 1182 /* answer negative, doesn't fit */ 1183 SETn( -(NV)result ); 1184 RETURN; 1185 } 1186 } 1187 } 1188 float_it: 1189 #endif 1190 { 1191 NV right = SvNV_nomg(svr); 1192 NV left = SvNV_nomg(svl); 1193 (void)POPs; 1194 1195 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG) 1196 /* 1197 We are building perl with long double support and are on an AIX OS 1198 afflicted with a powl() function that wrongly returns NaNQ for any 1199 negative base. This was reported to IBM as PMR #23047-379 on 1200 03/06/2006. The problem exists in at least the following versions 1201 of AIX and the libm fileset, and no doubt others as well: 1202 1203 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50 1204 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29 1205 AIX 5.2.0 bos.adt.libm 5.2.0.85 1206 1207 So, until IBM fixes powl(), we provide the following workaround to 1208 handle the problem ourselves. Our logic is as follows: for 1209 negative bases (left), we use fmod(right, 2) to check if the 1210 exponent is an odd or even integer: 1211 1212 - if odd, powl(left, right) == -powl(-left, right) 1213 - if even, powl(left, right) == powl(-left, right) 1214 1215 If the exponent is not an integer, the result is rightly NaNQ, so 1216 we just return that (as NV_NAN). 1217 */ 1218 1219 if (left < 0.0) { 1220 NV mod2 = Perl_fmod( right, 2.0 ); 1221 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */ 1222 SETn( -Perl_pow( -left, right) ); 1223 } else if (mod2 == 0.0) { /* even integer */ 1224 SETn( Perl_pow( -left, right) ); 1225 } else { /* fractional power */ 1226 SETn( NV_NAN ); 1227 } 1228 } else { 1229 SETn( Perl_pow( left, right) ); 1230 } 1231 #else 1232 SETn( Perl_pow( left, right) ); 1233 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */ 1234 1235 #ifdef PERL_PRESERVE_IVUV 1236 if (is_int) 1237 SvIV_please_nomg(svr); 1238 #endif 1239 RETURN; 1240 } 1241 } 1242 1243 PP(pp_multiply) 1244 { 1245 dVAR; dSP; dATARGET; SV *svl, *svr; 1246 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric); 1247 svr = TOPs; 1248 svl = TOPm1s; 1249 #ifdef PERL_PRESERVE_IVUV 1250 if (SvIV_please_nomg(svr)) { 1251 /* Unless the left argument is integer in range we are going to have to 1252 use NV maths. Hence only attempt to coerce the right argument if 1253 we know the left is integer. */ 1254 /* Left operand is defined, so is it IV? */ 1255 if (SvIV_please_nomg(svl)) { 1256 bool auvok = SvUOK(svl); 1257 bool buvok = SvUOK(svr); 1258 const UV topmask = (~ (UV)0) << (4 * sizeof (UV)); 1259 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV))); 1260 UV alow; 1261 UV ahigh; 1262 UV blow; 1263 UV bhigh; 1264 1265 if (auvok) { 1266 alow = SvUVX(svl); 1267 } else { 1268 const IV aiv = SvIVX(svl); 1269 if (aiv >= 0) { 1270 alow = aiv; 1271 auvok = TRUE; /* effectively it's a UV now */ 1272 } else { 1273 alow = -aiv; /* abs, auvok == false records sign */ 1274 } 1275 } 1276 if (buvok) { 1277 blow = SvUVX(svr); 1278 } else { 1279 const IV biv = SvIVX(svr); 1280 if (biv >= 0) { 1281 blow = biv; 1282 buvok = TRUE; /* effectively it's a UV now */ 1283 } else { 1284 blow = -biv; /* abs, buvok == false records sign */ 1285 } 1286 } 1287 1288 /* If this does sign extension on unsigned it's time for plan B */ 1289 ahigh = alow >> (4 * sizeof (UV)); 1290 alow &= botmask; 1291 bhigh = blow >> (4 * sizeof (UV)); 1292 blow &= botmask; 1293 if (ahigh && bhigh) { 1294 NOOP; 1295 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000 1296 which is overflow. Drop to NVs below. */ 1297 } else if (!ahigh && !bhigh) { 1298 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001 1299 so the unsigned multiply cannot overflow. */ 1300 const UV product = alow * blow; 1301 if (auvok == buvok) { 1302 /* -ve * -ve or +ve * +ve gives a +ve result. */ 1303 SP--; 1304 SETu( product ); 1305 RETURN; 1306 } else if (product <= (UV)IV_MIN) { 1307 /* 2s complement assumption that (UV)-IV_MIN is correct. */ 1308 /* -ve result, which could overflow an IV */ 1309 SP--; 1310 SETi( -(IV)product ); 1311 RETURN; 1312 } /* else drop to NVs below. */ 1313 } else { 1314 /* One operand is large, 1 small */ 1315 UV product_middle; 1316 if (bhigh) { 1317 /* swap the operands */ 1318 ahigh = bhigh; 1319 bhigh = blow; /* bhigh now the temp var for the swap */ 1320 blow = alow; 1321 alow = bhigh; 1322 } 1323 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow) 1324 multiplies can't overflow. shift can, add can, -ve can. */ 1325 product_middle = ahigh * blow; 1326 if (!(product_middle & topmask)) { 1327 /* OK, (ahigh * blow) won't lose bits when we shift it. */ 1328 UV product_low; 1329 product_middle <<= (4 * sizeof (UV)); 1330 product_low = alow * blow; 1331 1332 /* as for pp_add, UV + something mustn't get smaller. 1333 IIRC ANSI mandates this wrapping *behaviour* for 1334 unsigned whatever the actual representation*/ 1335 product_low += product_middle; 1336 if (product_low >= product_middle) { 1337 /* didn't overflow */ 1338 if (auvok == buvok) { 1339 /* -ve * -ve or +ve * +ve gives a +ve result. */ 1340 SP--; 1341 SETu( product_low ); 1342 RETURN; 1343 } else if (product_low <= (UV)IV_MIN) { 1344 /* 2s complement assumption again */ 1345 /* -ve result, which could overflow an IV */ 1346 SP--; 1347 SETi( -(IV)product_low ); 1348 RETURN; 1349 } /* else drop to NVs below. */ 1350 } 1351 } /* product_middle too large */ 1352 } /* ahigh && bhigh */ 1353 } /* SvIOK(svl) */ 1354 } /* SvIOK(svr) */ 1355 #endif 1356 { 1357 NV right = SvNV_nomg(svr); 1358 NV left = SvNV_nomg(svl); 1359 (void)POPs; 1360 SETn( left * right ); 1361 RETURN; 1362 } 1363 } 1364 1365 PP(pp_divide) 1366 { 1367 dVAR; dSP; dATARGET; SV *svl, *svr; 1368 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric); 1369 svr = TOPs; 1370 svl = TOPm1s; 1371 /* Only try to do UV divide first 1372 if ((SLOPPYDIVIDE is true) or 1373 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large 1374 to preserve)) 1375 The assumption is that it is better to use floating point divide 1376 whenever possible, only doing integer divide first if we can't be sure. 1377 If NV_PRESERVES_UV is true then we know at compile time that no UV 1378 can be too large to preserve, so don't need to compile the code to 1379 test the size of UVs. */ 1380 1381 #ifdef SLOPPYDIVIDE 1382 # define PERL_TRY_UV_DIVIDE 1383 /* ensure that 20./5. == 4. */ 1384 #else 1385 # ifdef PERL_PRESERVE_IVUV 1386 # ifndef NV_PRESERVES_UV 1387 # define PERL_TRY_UV_DIVIDE 1388 # endif 1389 # endif 1390 #endif 1391 1392 #ifdef PERL_TRY_UV_DIVIDE 1393 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) { 1394 bool left_non_neg = SvUOK(svl); 1395 bool right_non_neg = SvUOK(svr); 1396 UV left; 1397 UV right; 1398 1399 if (right_non_neg) { 1400 right = SvUVX(svr); 1401 } 1402 else { 1403 const IV biv = SvIVX(svr); 1404 if (biv >= 0) { 1405 right = biv; 1406 right_non_neg = TRUE; /* effectively it's a UV now */ 1407 } 1408 else { 1409 right = -biv; 1410 } 1411 } 1412 /* historically undef()/0 gives a "Use of uninitialized value" 1413 warning before dieing, hence this test goes here. 1414 If it were immediately before the second SvIV_please, then 1415 DIE() would be invoked before left was even inspected, so 1416 no inspection would give no warning. */ 1417 if (right == 0) 1418 DIE(aTHX_ "Illegal division by zero"); 1419 1420 if (left_non_neg) { 1421 left = SvUVX(svl); 1422 } 1423 else { 1424 const IV aiv = SvIVX(svl); 1425 if (aiv >= 0) { 1426 left = aiv; 1427 left_non_neg = TRUE; /* effectively it's a UV now */ 1428 } 1429 else { 1430 left = -aiv; 1431 } 1432 } 1433 1434 if (left >= right 1435 #ifdef SLOPPYDIVIDE 1436 /* For sloppy divide we always attempt integer division. */ 1437 #else 1438 /* Otherwise we only attempt it if either or both operands 1439 would not be preserved by an NV. If both fit in NVs 1440 we fall through to the NV divide code below. However, 1441 as left >= right to ensure integer result here, we know that 1442 we can skip the test on the right operand - right big 1443 enough not to be preserved can't get here unless left is 1444 also too big. */ 1445 1446 && (left > ((UV)1 << NV_PRESERVES_UV_BITS)) 1447 #endif 1448 ) { 1449 /* Integer division can't overflow, but it can be imprecise. */ 1450 const UV result = left / right; 1451 if (result * right == left) { 1452 SP--; /* result is valid */ 1453 if (left_non_neg == right_non_neg) { 1454 /* signs identical, result is positive. */ 1455 SETu( result ); 1456 RETURN; 1457 } 1458 /* 2s complement assumption */ 1459 if (result <= (UV)IV_MIN) 1460 SETi( -(IV)result ); 1461 else { 1462 /* It's exact but too negative for IV. */ 1463 SETn( -(NV)result ); 1464 } 1465 RETURN; 1466 } /* tried integer divide but it was not an integer result */ 1467 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */ 1468 } /* one operand wasn't SvIOK */ 1469 #endif /* PERL_TRY_UV_DIVIDE */ 1470 { 1471 NV right = SvNV_nomg(svr); 1472 NV left = SvNV_nomg(svl); 1473 (void)POPs;(void)POPs; 1474 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) 1475 if (! Perl_isnan(right) && right == 0.0) 1476 #else 1477 if (right == 0.0) 1478 #endif 1479 DIE(aTHX_ "Illegal division by zero"); 1480 PUSHn( left / right ); 1481 RETURN; 1482 } 1483 } 1484 1485 PP(pp_modulo) 1486 { 1487 dVAR; dSP; dATARGET; 1488 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric); 1489 { 1490 UV left = 0; 1491 UV right = 0; 1492 bool left_neg = FALSE; 1493 bool right_neg = FALSE; 1494 bool use_double = FALSE; 1495 bool dright_valid = FALSE; 1496 NV dright = 0.0; 1497 NV dleft = 0.0; 1498 SV * const svr = TOPs; 1499 SV * const svl = TOPm1s; 1500 if (SvIV_please_nomg(svr)) { 1501 right_neg = !SvUOK(svr); 1502 if (!right_neg) { 1503 right = SvUVX(svr); 1504 } else { 1505 const IV biv = SvIVX(svr); 1506 if (biv >= 0) { 1507 right = biv; 1508 right_neg = FALSE; /* effectively it's a UV now */ 1509 } else { 1510 right = -biv; 1511 } 1512 } 1513 } 1514 else { 1515 dright = SvNV_nomg(svr); 1516 right_neg = dright < 0; 1517 if (right_neg) 1518 dright = -dright; 1519 if (dright < UV_MAX_P1) { 1520 right = U_V(dright); 1521 dright_valid = TRUE; /* In case we need to use double below. */ 1522 } else { 1523 use_double = TRUE; 1524 } 1525 } 1526 1527 /* At this point use_double is only true if right is out of range for 1528 a UV. In range NV has been rounded down to nearest UV and 1529 use_double false. */ 1530 if (!use_double && SvIV_please_nomg(svl)) { 1531 left_neg = !SvUOK(svl); 1532 if (!left_neg) { 1533 left = SvUVX(svl); 1534 } else { 1535 const IV aiv = SvIVX(svl); 1536 if (aiv >= 0) { 1537 left = aiv; 1538 left_neg = FALSE; /* effectively it's a UV now */ 1539 } else { 1540 left = -aiv; 1541 } 1542 } 1543 } 1544 else { 1545 dleft = SvNV_nomg(svl); 1546 left_neg = dleft < 0; 1547 if (left_neg) 1548 dleft = -dleft; 1549 1550 /* This should be exactly the 5.6 behaviour - if left and right are 1551 both in range for UV then use U_V() rather than floor. */ 1552 if (!use_double) { 1553 if (dleft < UV_MAX_P1) { 1554 /* right was in range, so is dleft, so use UVs not double. 1555 */ 1556 left = U_V(dleft); 1557 } 1558 /* left is out of range for UV, right was in range, so promote 1559 right (back) to double. */ 1560 else { 1561 /* The +0.5 is used in 5.6 even though it is not strictly 1562 consistent with the implicit +0 floor in the U_V() 1563 inside the #if 1. */ 1564 dleft = Perl_floor(dleft + 0.5); 1565 use_double = TRUE; 1566 if (dright_valid) 1567 dright = Perl_floor(dright + 0.5); 1568 else 1569 dright = right; 1570 } 1571 } 1572 } 1573 sp -= 2; 1574 if (use_double) { 1575 NV dans; 1576 1577 if (!dright) 1578 DIE(aTHX_ "Illegal modulus zero"); 1579 1580 dans = Perl_fmod(dleft, dright); 1581 if ((left_neg != right_neg) && dans) 1582 dans = dright - dans; 1583 if (right_neg) 1584 dans = -dans; 1585 sv_setnv(TARG, dans); 1586 } 1587 else { 1588 UV ans; 1589 1590 if (!right) 1591 DIE(aTHX_ "Illegal modulus zero"); 1592 1593 ans = left % right; 1594 if ((left_neg != right_neg) && ans) 1595 ans = right - ans; 1596 if (right_neg) { 1597 /* XXX may warn: unary minus operator applied to unsigned type */ 1598 /* could change -foo to be (~foo)+1 instead */ 1599 if (ans <= ~((UV)IV_MAX)+1) 1600 sv_setiv(TARG, ~ans+1); 1601 else 1602 sv_setnv(TARG, -(NV)ans); 1603 } 1604 else 1605 sv_setuv(TARG, ans); 1606 } 1607 PUSHTARG; 1608 RETURN; 1609 } 1610 } 1611 1612 PP(pp_repeat) 1613 { 1614 dVAR; dSP; dATARGET; 1615 IV count; 1616 SV *sv; 1617 1618 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) { 1619 /* TODO: think of some way of doing list-repeat overloading ??? */ 1620 sv = POPs; 1621 SvGETMAGIC(sv); 1622 } 1623 else { 1624 tryAMAGICbin_MG(repeat_amg, AMGf_assign); 1625 sv = POPs; 1626 } 1627 1628 if (SvIOKp(sv)) { 1629 if (SvUOK(sv)) { 1630 const UV uv = SvUV_nomg(sv); 1631 if (uv > IV_MAX) 1632 count = IV_MAX; /* The best we can do? */ 1633 else 1634 count = uv; 1635 } else { 1636 const IV iv = SvIV_nomg(sv); 1637 if (iv < 0) 1638 count = 0; 1639 else 1640 count = iv; 1641 } 1642 } 1643 else if (SvNOKp(sv)) { 1644 const NV nv = SvNV_nomg(sv); 1645 if (nv < 0.0) 1646 count = 0; 1647 else 1648 count = (IV)nv; 1649 } 1650 else 1651 count = SvIV_nomg(sv); 1652 1653 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) { 1654 dMARK; 1655 static const char* const oom_list_extend = "Out of memory during list extend"; 1656 const I32 items = SP - MARK; 1657 const I32 max = items * count; 1658 1659 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend); 1660 /* Did the max computation overflow? */ 1661 if (items > 0 && max > 0 && (max < items || max < count)) 1662 Perl_croak(aTHX_ "%s", oom_list_extend); 1663 MEXTEND(MARK, max); 1664 if (count > 1) { 1665 while (SP > MARK) { 1666 #if 0 1667 /* This code was intended to fix 20010809.028: 1668 1669 $x = 'abcd'; 1670 for (($x =~ /./g) x 2) { 1671 print chop; # "abcdabcd" expected as output. 1672 } 1673 1674 * but that change (#11635) broke this code: 1675 1676 $x = [("foo")x2]; # only one "foo" ended up in the anonlist. 1677 1678 * I can't think of a better fix that doesn't introduce 1679 * an efficiency hit by copying the SVs. The stack isn't 1680 * refcounted, and mortalisation obviously doesn't 1681 * Do The Right Thing when the stack has more than 1682 * one pointer to the same mortal value. 1683 * .robin. 1684 */ 1685 if (*SP) { 1686 *SP = sv_2mortal(newSVsv(*SP)); 1687 SvREADONLY_on(*SP); 1688 } 1689 #else 1690 if (*SP) 1691 SvTEMP_off((*SP)); 1692 #endif 1693 SP--; 1694 } 1695 MARK++; 1696 repeatcpy((char*)(MARK + items), (char*)MARK, 1697 items * sizeof(const SV *), count - 1); 1698 SP += max; 1699 } 1700 else if (count <= 0) 1701 SP -= items; 1702 } 1703 else { /* Note: mark already snarfed by pp_list */ 1704 SV * const tmpstr = POPs; 1705 STRLEN len; 1706 bool isutf; 1707 static const char* const oom_string_extend = 1708 "Out of memory during string extend"; 1709 1710 if (TARG != tmpstr) 1711 sv_setsv_nomg(TARG, tmpstr); 1712 SvPV_force_nomg(TARG, len); 1713 isutf = DO_UTF8(TARG); 1714 if (count != 1) { 1715 if (count < 1) 1716 SvCUR_set(TARG, 0); 1717 else { 1718 const STRLEN max = (UV)count * len; 1719 if (len > MEM_SIZE_MAX / count) 1720 Perl_croak(aTHX_ "%s", oom_string_extend); 1721 MEM_WRAP_CHECK_1(max, char, oom_string_extend); 1722 SvGROW(TARG, max + 1); 1723 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1); 1724 SvCUR_set(TARG, SvCUR(TARG) * count); 1725 } 1726 *SvEND(TARG) = '\0'; 1727 } 1728 if (isutf) 1729 (void)SvPOK_only_UTF8(TARG); 1730 else 1731 (void)SvPOK_only(TARG); 1732 1733 if (PL_op->op_private & OPpREPEAT_DOLIST) { 1734 /* The parser saw this as a list repeat, and there 1735 are probably several items on the stack. But we're 1736 in scalar context, and there's no pp_list to save us 1737 now. So drop the rest of the items -- robin@kitsite.com 1738 */ 1739 dMARK; 1740 SP = MARK; 1741 } 1742 PUSHTARG; 1743 } 1744 RETURN; 1745 } 1746 1747 PP(pp_subtract) 1748 { 1749 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr; 1750 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric); 1751 svr = TOPs; 1752 svl = TOPm1s; 1753 useleft = USE_LEFT(svl); 1754 #ifdef PERL_PRESERVE_IVUV 1755 /* See comments in pp_add (in pp_hot.c) about Overflow, and how 1756 "bad things" happen if you rely on signed integers wrapping. */ 1757 if (SvIV_please_nomg(svr)) { 1758 /* Unless the left argument is integer in range we are going to have to 1759 use NV maths. Hence only attempt to coerce the right argument if 1760 we know the left is integer. */ 1761 UV auv = 0; 1762 bool auvok = FALSE; 1763 bool a_valid = 0; 1764 1765 if (!useleft) { 1766 auv = 0; 1767 a_valid = auvok = 1; 1768 /* left operand is undef, treat as zero. */ 1769 } else { 1770 /* Left operand is defined, so is it IV? */ 1771 if (SvIV_please_nomg(svl)) { 1772 if ((auvok = SvUOK(svl))) 1773 auv = SvUVX(svl); 1774 else { 1775 const IV aiv = SvIVX(svl); 1776 if (aiv >= 0) { 1777 auv = aiv; 1778 auvok = 1; /* Now acting as a sign flag. */ 1779 } else { /* 2s complement assumption for IV_MIN */ 1780 auv = (UV)-aiv; 1781 } 1782 } 1783 a_valid = 1; 1784 } 1785 } 1786 if (a_valid) { 1787 bool result_good = 0; 1788 UV result; 1789 UV buv; 1790 bool buvok = SvUOK(svr); 1791 1792 if (buvok) 1793 buv = SvUVX(svr); 1794 else { 1795 const IV biv = SvIVX(svr); 1796 if (biv >= 0) { 1797 buv = biv; 1798 buvok = 1; 1799 } else 1800 buv = (UV)-biv; 1801 } 1802 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve, 1803 else "IV" now, independent of how it came in. 1804 if a, b represents positive, A, B negative, a maps to -A etc 1805 a - b => (a - b) 1806 A - b => -(a + b) 1807 a - B => (a + b) 1808 A - B => -(a - b) 1809 all UV maths. negate result if A negative. 1810 subtract if signs same, add if signs differ. */ 1811 1812 if (auvok ^ buvok) { 1813 /* Signs differ. */ 1814 result = auv + buv; 1815 if (result >= auv) 1816 result_good = 1; 1817 } else { 1818 /* Signs same */ 1819 if (auv >= buv) { 1820 result = auv - buv; 1821 /* Must get smaller */ 1822 if (result <= auv) 1823 result_good = 1; 1824 } else { 1825 result = buv - auv; 1826 if (result <= buv) { 1827 /* result really should be -(auv-buv). as its negation 1828 of true value, need to swap our result flag */ 1829 auvok = !auvok; 1830 result_good = 1; 1831 } 1832 } 1833 } 1834 if (result_good) { 1835 SP--; 1836 if (auvok) 1837 SETu( result ); 1838 else { 1839 /* Negate result */ 1840 if (result <= (UV)IV_MIN) 1841 SETi( -(IV)result ); 1842 else { 1843 /* result valid, but out of range for IV. */ 1844 SETn( -(NV)result ); 1845 } 1846 } 1847 RETURN; 1848 } /* Overflow, drop through to NVs. */ 1849 } 1850 } 1851 #endif 1852 { 1853 NV value = SvNV_nomg(svr); 1854 (void)POPs; 1855 1856 if (!useleft) { 1857 /* left operand is undef, treat as zero - value */ 1858 SETn(-value); 1859 RETURN; 1860 } 1861 SETn( SvNV_nomg(svl) - value ); 1862 RETURN; 1863 } 1864 } 1865 1866 PP(pp_left_shift) 1867 { 1868 dVAR; dSP; dATARGET; SV *svl, *svr; 1869 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric); 1870 svr = POPs; 1871 svl = TOPs; 1872 { 1873 const IV shift = SvIV_nomg(svr); 1874 if (PL_op->op_private & HINT_INTEGER) { 1875 const IV i = SvIV_nomg(svl); 1876 SETi(i << shift); 1877 } 1878 else { 1879 const UV u = SvUV_nomg(svl); 1880 SETu(u << shift); 1881 } 1882 RETURN; 1883 } 1884 } 1885 1886 PP(pp_right_shift) 1887 { 1888 dVAR; dSP; dATARGET; SV *svl, *svr; 1889 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric); 1890 svr = POPs; 1891 svl = TOPs; 1892 { 1893 const IV shift = SvIV_nomg(svr); 1894 if (PL_op->op_private & HINT_INTEGER) { 1895 const IV i = SvIV_nomg(svl); 1896 SETi(i >> shift); 1897 } 1898 else { 1899 const UV u = SvUV_nomg(svl); 1900 SETu(u >> shift); 1901 } 1902 RETURN; 1903 } 1904 } 1905 1906 PP(pp_lt) 1907 { 1908 dVAR; dSP; 1909 SV *left, *right; 1910 1911 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric); 1912 right = POPs; 1913 left = TOPs; 1914 SETs(boolSV( 1915 (SvIOK_notUV(left) && SvIOK_notUV(right)) 1916 ? (SvIVX(left) < SvIVX(right)) 1917 : (do_ncmp(left, right) == -1) 1918 )); 1919 RETURN; 1920 } 1921 1922 PP(pp_gt) 1923 { 1924 dVAR; dSP; 1925 SV *left, *right; 1926 1927 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric); 1928 right = POPs; 1929 left = TOPs; 1930 SETs(boolSV( 1931 (SvIOK_notUV(left) && SvIOK_notUV(right)) 1932 ? (SvIVX(left) > SvIVX(right)) 1933 : (do_ncmp(left, right) == 1) 1934 )); 1935 RETURN; 1936 } 1937 1938 PP(pp_le) 1939 { 1940 dVAR; dSP; 1941 SV *left, *right; 1942 1943 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric); 1944 right = POPs; 1945 left = TOPs; 1946 SETs(boolSV( 1947 (SvIOK_notUV(left) && SvIOK_notUV(right)) 1948 ? (SvIVX(left) <= SvIVX(right)) 1949 : (do_ncmp(left, right) <= 0) 1950 )); 1951 RETURN; 1952 } 1953 1954 PP(pp_ge) 1955 { 1956 dVAR; dSP; 1957 SV *left, *right; 1958 1959 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric); 1960 right = POPs; 1961 left = TOPs; 1962 SETs(boolSV( 1963 (SvIOK_notUV(left) && SvIOK_notUV(right)) 1964 ? (SvIVX(left) >= SvIVX(right)) 1965 : ( (do_ncmp(left, right) & 2) == 0) 1966 )); 1967 RETURN; 1968 } 1969 1970 PP(pp_ne) 1971 { 1972 dVAR; dSP; 1973 SV *left, *right; 1974 1975 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric); 1976 right = POPs; 1977 left = TOPs; 1978 SETs(boolSV( 1979 (SvIOK_notUV(left) && SvIOK_notUV(right)) 1980 ? (SvIVX(left) != SvIVX(right)) 1981 : (do_ncmp(left, right) != 0) 1982 )); 1983 RETURN; 1984 } 1985 1986 /* compare left and right SVs. Returns: 1987 * -1: < 1988 * 0: == 1989 * 1: > 1990 * 2: left or right was a NaN 1991 */ 1992 I32 1993 Perl_do_ncmp(pTHX_ SV* const left, SV * const right) 1994 { 1995 dVAR; 1996 1997 PERL_ARGS_ASSERT_DO_NCMP; 1998 #ifdef PERL_PRESERVE_IVUV 1999 /* Fortunately it seems NaN isn't IOK */ 2000 if (SvIV_please_nomg(right) && SvIV_please_nomg(left)) { 2001 if (!SvUOK(left)) { 2002 const IV leftiv = SvIVX(left); 2003 if (!SvUOK(right)) { 2004 /* ## IV <=> IV ## */ 2005 const IV rightiv = SvIVX(right); 2006 return (leftiv > rightiv) - (leftiv < rightiv); 2007 } 2008 /* ## IV <=> UV ## */ 2009 if (leftiv < 0) 2010 /* As (b) is a UV, it's >=0, so it must be < */ 2011 return -1; 2012 { 2013 const UV rightuv = SvUVX(right); 2014 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv); 2015 } 2016 } 2017 2018 if (SvUOK(right)) { 2019 /* ## UV <=> UV ## */ 2020 const UV leftuv = SvUVX(left); 2021 const UV rightuv = SvUVX(right); 2022 return (leftuv > rightuv) - (leftuv < rightuv); 2023 } 2024 /* ## UV <=> IV ## */ 2025 { 2026 const IV rightiv = SvIVX(right); 2027 if (rightiv < 0) 2028 /* As (a) is a UV, it's >=0, so it cannot be < */ 2029 return 1; 2030 { 2031 const UV leftuv = SvUVX(left); 2032 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv); 2033 } 2034 } 2035 assert(0); /* NOTREACHED */ 2036 } 2037 #endif 2038 { 2039 NV const rnv = SvNV_nomg(right); 2040 NV const lnv = SvNV_nomg(left); 2041 2042 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) 2043 if (Perl_isnan(lnv) || Perl_isnan(rnv)) { 2044 return 2; 2045 } 2046 return (lnv > rnv) - (lnv < rnv); 2047 #else 2048 if (lnv < rnv) 2049 return -1; 2050 if (lnv > rnv) 2051 return 1; 2052 if (lnv == rnv) 2053 return 0; 2054 return 2; 2055 #endif 2056 } 2057 } 2058 2059 2060 PP(pp_ncmp) 2061 { 2062 dVAR; dSP; 2063 SV *left, *right; 2064 I32 value; 2065 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric); 2066 right = POPs; 2067 left = TOPs; 2068 value = do_ncmp(left, right); 2069 if (value == 2) { 2070 SETs(&PL_sv_undef); 2071 } 2072 else { 2073 dTARGET; 2074 SETi(value); 2075 } 2076 RETURN; 2077 } 2078 2079 PP(pp_sle) 2080 { 2081 dVAR; dSP; 2082 2083 int amg_type = sle_amg; 2084 int multiplier = 1; 2085 int rhs = 1; 2086 2087 switch (PL_op->op_type) { 2088 case OP_SLT: 2089 amg_type = slt_amg; 2090 /* cmp < 0 */ 2091 rhs = 0; 2092 break; 2093 case OP_SGT: 2094 amg_type = sgt_amg; 2095 /* cmp > 0 */ 2096 multiplier = -1; 2097 rhs = 0; 2098 break; 2099 case OP_SGE: 2100 amg_type = sge_amg; 2101 /* cmp >= 0 */ 2102 multiplier = -1; 2103 break; 2104 } 2105 2106 tryAMAGICbin_MG(amg_type, AMGf_set); 2107 { 2108 dPOPTOPssrl; 2109 const int cmp = (IN_LOCALE_RUNTIME 2110 ? sv_cmp_locale_flags(left, right, 0) 2111 : sv_cmp_flags(left, right, 0)); 2112 SETs(boolSV(cmp * multiplier < rhs)); 2113 RETURN; 2114 } 2115 } 2116 2117 PP(pp_seq) 2118 { 2119 dVAR; dSP; 2120 tryAMAGICbin_MG(seq_amg, AMGf_set); 2121 { 2122 dPOPTOPssrl; 2123 SETs(boolSV(sv_eq_flags(left, right, 0))); 2124 RETURN; 2125 } 2126 } 2127 2128 PP(pp_sne) 2129 { 2130 dVAR; dSP; 2131 tryAMAGICbin_MG(sne_amg, AMGf_set); 2132 { 2133 dPOPTOPssrl; 2134 SETs(boolSV(!sv_eq_flags(left, right, 0))); 2135 RETURN; 2136 } 2137 } 2138 2139 PP(pp_scmp) 2140 { 2141 dVAR; dSP; dTARGET; 2142 tryAMAGICbin_MG(scmp_amg, 0); 2143 { 2144 dPOPTOPssrl; 2145 const int cmp = (IN_LOCALE_RUNTIME 2146 ? sv_cmp_locale_flags(left, right, 0) 2147 : sv_cmp_flags(left, right, 0)); 2148 SETi( cmp ); 2149 RETURN; 2150 } 2151 } 2152 2153 PP(pp_bit_and) 2154 { 2155 dVAR; dSP; dATARGET; 2156 tryAMAGICbin_MG(band_amg, AMGf_assign); 2157 { 2158 dPOPTOPssrl; 2159 if (SvNIOKp(left) || SvNIOKp(right)) { 2160 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left); 2161 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right); 2162 if (PL_op->op_private & HINT_INTEGER) { 2163 const IV i = SvIV_nomg(left) & SvIV_nomg(right); 2164 SETi(i); 2165 } 2166 else { 2167 const UV u = SvUV_nomg(left) & SvUV_nomg(right); 2168 SETu(u); 2169 } 2170 if (left_ro_nonnum && left != TARG) SvNIOK_off(left); 2171 if (right_ro_nonnum) SvNIOK_off(right); 2172 } 2173 else { 2174 do_vop(PL_op->op_type, TARG, left, right); 2175 SETTARG; 2176 } 2177 RETURN; 2178 } 2179 } 2180 2181 PP(pp_bit_or) 2182 { 2183 dVAR; dSP; dATARGET; 2184 const int op_type = PL_op->op_type; 2185 2186 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign); 2187 { 2188 dPOPTOPssrl; 2189 if (SvNIOKp(left) || SvNIOKp(right)) { 2190 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left); 2191 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right); 2192 if (PL_op->op_private & HINT_INTEGER) { 2193 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0); 2194 const IV r = SvIV_nomg(right); 2195 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r); 2196 SETi(result); 2197 } 2198 else { 2199 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0); 2200 const UV r = SvUV_nomg(right); 2201 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r); 2202 SETu(result); 2203 } 2204 if (left_ro_nonnum && left != TARG) SvNIOK_off(left); 2205 if (right_ro_nonnum) SvNIOK_off(right); 2206 } 2207 else { 2208 do_vop(op_type, TARG, left, right); 2209 SETTARG; 2210 } 2211 RETURN; 2212 } 2213 } 2214 2215 PERL_STATIC_INLINE bool 2216 S_negate_string(pTHX) 2217 { 2218 dTARGET; dSP; 2219 STRLEN len; 2220 const char *s; 2221 SV * const sv = TOPs; 2222 if (!SvPOKp(sv) || SvNIOK(sv) || (!SvPOK(sv) && SvNIOKp(sv))) 2223 return FALSE; 2224 s = SvPV_nomg_const(sv, len); 2225 if (isIDFIRST(*s)) { 2226 sv_setpvs(TARG, "-"); 2227 sv_catsv(TARG, sv); 2228 } 2229 else if (*s == '+' || (*s == '-' && !looks_like_number(sv))) { 2230 sv_setsv_nomg(TARG, sv); 2231 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-'; 2232 } 2233 else return FALSE; 2234 SETTARG; PUTBACK; 2235 return TRUE; 2236 } 2237 2238 PP(pp_negate) 2239 { 2240 dVAR; dSP; dTARGET; 2241 tryAMAGICun_MG(neg_amg, AMGf_numeric); 2242 if (S_negate_string(aTHX)) return NORMAL; 2243 { 2244 SV * const sv = TOPs; 2245 2246 if (SvIOK(sv)) { 2247 /* It's publicly an integer */ 2248 oops_its_an_int: 2249 if (SvIsUV(sv)) { 2250 if (SvIVX(sv) == IV_MIN) { 2251 /* 2s complement assumption. */ 2252 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == 2253 IV_MIN */ 2254 RETURN; 2255 } 2256 else if (SvUVX(sv) <= IV_MAX) { 2257 SETi(-SvIVX(sv)); 2258 RETURN; 2259 } 2260 } 2261 else if (SvIVX(sv) != IV_MIN) { 2262 SETi(-SvIVX(sv)); 2263 RETURN; 2264 } 2265 #ifdef PERL_PRESERVE_IVUV 2266 else { 2267 SETu((UV)IV_MIN); 2268 RETURN; 2269 } 2270 #endif 2271 } 2272 if (SvNIOKp(sv) && (SvNIOK(sv) || !SvPOK(sv))) 2273 SETn(-SvNV_nomg(sv)); 2274 else if (SvPOKp(sv) && SvIV_please_nomg(sv)) 2275 goto oops_its_an_int; 2276 else 2277 SETn(-SvNV_nomg(sv)); 2278 } 2279 RETURN; 2280 } 2281 2282 PP(pp_not) 2283 { 2284 dVAR; dSP; 2285 tryAMAGICun_MG(not_amg, AMGf_set); 2286 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp)); 2287 return NORMAL; 2288 } 2289 2290 PP(pp_complement) 2291 { 2292 dVAR; dSP; dTARGET; 2293 tryAMAGICun_MG(compl_amg, AMGf_numeric); 2294 { 2295 dTOPss; 2296 if (SvNIOKp(sv)) { 2297 if (PL_op->op_private & HINT_INTEGER) { 2298 const IV i = ~SvIV_nomg(sv); 2299 SETi(i); 2300 } 2301 else { 2302 const UV u = ~SvUV_nomg(sv); 2303 SETu(u); 2304 } 2305 } 2306 else { 2307 U8 *tmps; 2308 I32 anum; 2309 STRLEN len; 2310 2311 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */ 2312 sv_setsv_nomg(TARG, sv); 2313 tmps = (U8*)SvPV_force_nomg(TARG, len); 2314 anum = len; 2315 if (SvUTF8(TARG)) { 2316 /* Calculate exact length, let's not estimate. */ 2317 STRLEN targlen = 0; 2318 STRLEN l; 2319 UV nchar = 0; 2320 UV nwide = 0; 2321 U8 * const send = tmps + len; 2322 U8 * const origtmps = tmps; 2323 const UV utf8flags = UTF8_ALLOW_ANYUV; 2324 2325 while (tmps < send) { 2326 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags); 2327 tmps += l; 2328 targlen += UNISKIP(~c); 2329 nchar++; 2330 if (c > 0xff) 2331 nwide++; 2332 } 2333 2334 /* Now rewind strings and write them. */ 2335 tmps = origtmps; 2336 2337 if (nwide) { 2338 U8 *result; 2339 U8 *p; 2340 2341 Newx(result, targlen + 1, U8); 2342 p = result; 2343 while (tmps < send) { 2344 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags); 2345 tmps += l; 2346 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY); 2347 } 2348 *p = '\0'; 2349 sv_usepvn_flags(TARG, (char*)result, targlen, 2350 SV_HAS_TRAILING_NUL); 2351 SvUTF8_on(TARG); 2352 } 2353 else { 2354 U8 *result; 2355 U8 *p; 2356 2357 Newx(result, nchar + 1, U8); 2358 p = result; 2359 while (tmps < send) { 2360 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags); 2361 tmps += l; 2362 *p++ = ~c; 2363 } 2364 *p = '\0'; 2365 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL); 2366 SvUTF8_off(TARG); 2367 } 2368 SETTARG; 2369 RETURN; 2370 } 2371 #ifdef LIBERAL 2372 { 2373 long *tmpl; 2374 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++) 2375 *tmps = ~*tmps; 2376 tmpl = (long*)tmps; 2377 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++) 2378 *tmpl = ~*tmpl; 2379 tmps = (U8*)tmpl; 2380 } 2381 #endif 2382 for ( ; anum > 0; anum--, tmps++) 2383 *tmps = ~*tmps; 2384 SETTARG; 2385 } 2386 RETURN; 2387 } 2388 } 2389 2390 /* integer versions of some of the above */ 2391 2392 PP(pp_i_multiply) 2393 { 2394 dVAR; dSP; dATARGET; 2395 tryAMAGICbin_MG(mult_amg, AMGf_assign); 2396 { 2397 dPOPTOPiirl_nomg; 2398 SETi( left * right ); 2399 RETURN; 2400 } 2401 } 2402 2403 PP(pp_i_divide) 2404 { 2405 IV num; 2406 dVAR; dSP; dATARGET; 2407 tryAMAGICbin_MG(div_amg, AMGf_assign); 2408 { 2409 dPOPTOPssrl; 2410 IV value = SvIV_nomg(right); 2411 if (value == 0) 2412 DIE(aTHX_ "Illegal division by zero"); 2413 num = SvIV_nomg(left); 2414 2415 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */ 2416 if (value == -1) 2417 value = - num; 2418 else 2419 value = num / value; 2420 SETi(value); 2421 RETURN; 2422 } 2423 } 2424 2425 #if defined(__GLIBC__) && IVSIZE == 8 && !defined(PERL_DEBUG_READONLY_OPS) 2426 STATIC 2427 PP(pp_i_modulo_0) 2428 #else 2429 PP(pp_i_modulo) 2430 #endif 2431 { 2432 /* This is the vanilla old i_modulo. */ 2433 dVAR; dSP; dATARGET; 2434 tryAMAGICbin_MG(modulo_amg, AMGf_assign); 2435 { 2436 dPOPTOPiirl_nomg; 2437 if (!right) 2438 DIE(aTHX_ "Illegal modulus zero"); 2439 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */ 2440 if (right == -1) 2441 SETi( 0 ); 2442 else 2443 SETi( left % right ); 2444 RETURN; 2445 } 2446 } 2447 2448 #if defined(__GLIBC__) && IVSIZE == 8 && !defined(PERL_DEBUG_READONLY_OPS) 2449 STATIC 2450 PP(pp_i_modulo_1) 2451 2452 { 2453 /* This is the i_modulo with the workaround for the _moddi3 bug 2454 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround). 2455 * See below for pp_i_modulo. */ 2456 dVAR; dSP; dATARGET; 2457 tryAMAGICbin_MG(modulo_amg, AMGf_assign); 2458 { 2459 dPOPTOPiirl_nomg; 2460 if (!right) 2461 DIE(aTHX_ "Illegal modulus zero"); 2462 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */ 2463 if (right == -1) 2464 SETi( 0 ); 2465 else 2466 SETi( left % PERL_ABS(right) ); 2467 RETURN; 2468 } 2469 } 2470 2471 PP(pp_i_modulo) 2472 { 2473 dVAR; dSP; dATARGET; 2474 tryAMAGICbin_MG(modulo_amg, AMGf_assign); 2475 { 2476 dPOPTOPiirl_nomg; 2477 if (!right) 2478 DIE(aTHX_ "Illegal modulus zero"); 2479 /* The assumption is to use hereafter the old vanilla version... */ 2480 PL_op->op_ppaddr = 2481 PL_ppaddr[OP_I_MODULO] = 2482 Perl_pp_i_modulo_0; 2483 /* .. but if we have glibc, we might have a buggy _moddi3 2484 * (at least glicb 2.2.5 is known to have this bug), in other 2485 * words our integer modulus with negative quad as the second 2486 * argument might be broken. Test for this and re-patch the 2487 * opcode dispatch table if that is the case, remembering to 2488 * also apply the workaround so that this first round works 2489 * right, too. See [perl #9402] for more information. */ 2490 { 2491 IV l = 3; 2492 IV r = -10; 2493 /* Cannot do this check with inlined IV constants since 2494 * that seems to work correctly even with the buggy glibc. */ 2495 if (l % r == -3) { 2496 /* Yikes, we have the bug. 2497 * Patch in the workaround version. */ 2498 PL_op->op_ppaddr = 2499 PL_ppaddr[OP_I_MODULO] = 2500 &Perl_pp_i_modulo_1; 2501 /* Make certain we work right this time, too. */ 2502 right = PERL_ABS(right); 2503 } 2504 } 2505 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */ 2506 if (right == -1) 2507 SETi( 0 ); 2508 else 2509 SETi( left % right ); 2510 RETURN; 2511 } 2512 } 2513 #endif 2514 2515 PP(pp_i_add) 2516 { 2517 dVAR; dSP; dATARGET; 2518 tryAMAGICbin_MG(add_amg, AMGf_assign); 2519 { 2520 dPOPTOPiirl_ul_nomg; 2521 SETi( left + right ); 2522 RETURN; 2523 } 2524 } 2525 2526 PP(pp_i_subtract) 2527 { 2528 dVAR; dSP; dATARGET; 2529 tryAMAGICbin_MG(subtr_amg, AMGf_assign); 2530 { 2531 dPOPTOPiirl_ul_nomg; 2532 SETi( left - right ); 2533 RETURN; 2534 } 2535 } 2536 2537 PP(pp_i_lt) 2538 { 2539 dVAR; dSP; 2540 tryAMAGICbin_MG(lt_amg, AMGf_set); 2541 { 2542 dPOPTOPiirl_nomg; 2543 SETs(boolSV(left < right)); 2544 RETURN; 2545 } 2546 } 2547 2548 PP(pp_i_gt) 2549 { 2550 dVAR; dSP; 2551 tryAMAGICbin_MG(gt_amg, AMGf_set); 2552 { 2553 dPOPTOPiirl_nomg; 2554 SETs(boolSV(left > right)); 2555 RETURN; 2556 } 2557 } 2558 2559 PP(pp_i_le) 2560 { 2561 dVAR; dSP; 2562 tryAMAGICbin_MG(le_amg, AMGf_set); 2563 { 2564 dPOPTOPiirl_nomg; 2565 SETs(boolSV(left <= right)); 2566 RETURN; 2567 } 2568 } 2569 2570 PP(pp_i_ge) 2571 { 2572 dVAR; dSP; 2573 tryAMAGICbin_MG(ge_amg, AMGf_set); 2574 { 2575 dPOPTOPiirl_nomg; 2576 SETs(boolSV(left >= right)); 2577 RETURN; 2578 } 2579 } 2580 2581 PP(pp_i_eq) 2582 { 2583 dVAR; dSP; 2584 tryAMAGICbin_MG(eq_amg, AMGf_set); 2585 { 2586 dPOPTOPiirl_nomg; 2587 SETs(boolSV(left == right)); 2588 RETURN; 2589 } 2590 } 2591 2592 PP(pp_i_ne) 2593 { 2594 dVAR; dSP; 2595 tryAMAGICbin_MG(ne_amg, AMGf_set); 2596 { 2597 dPOPTOPiirl_nomg; 2598 SETs(boolSV(left != right)); 2599 RETURN; 2600 } 2601 } 2602 2603 PP(pp_i_ncmp) 2604 { 2605 dVAR; dSP; dTARGET; 2606 tryAMAGICbin_MG(ncmp_amg, 0); 2607 { 2608 dPOPTOPiirl_nomg; 2609 I32 value; 2610 2611 if (left > right) 2612 value = 1; 2613 else if (left < right) 2614 value = -1; 2615 else 2616 value = 0; 2617 SETi(value); 2618 RETURN; 2619 } 2620 } 2621 2622 PP(pp_i_negate) 2623 { 2624 dVAR; dSP; dTARGET; 2625 tryAMAGICun_MG(neg_amg, 0); 2626 if (S_negate_string(aTHX)) return NORMAL; 2627 { 2628 SV * const sv = TOPs; 2629 IV const i = SvIV_nomg(sv); 2630 SETi(-i); 2631 RETURN; 2632 } 2633 } 2634 2635 /* High falutin' math. */ 2636 2637 PP(pp_atan2) 2638 { 2639 dVAR; dSP; dTARGET; 2640 tryAMAGICbin_MG(atan2_amg, 0); 2641 { 2642 dPOPTOPnnrl_nomg; 2643 SETn(Perl_atan2(left, right)); 2644 RETURN; 2645 } 2646 } 2647 2648 PP(pp_sin) 2649 { 2650 dVAR; dSP; dTARGET; 2651 int amg_type = sin_amg; 2652 const char *neg_report = NULL; 2653 NV (*func)(NV) = Perl_sin; 2654 const int op_type = PL_op->op_type; 2655 2656 switch (op_type) { 2657 case OP_COS: 2658 amg_type = cos_amg; 2659 func = Perl_cos; 2660 break; 2661 case OP_EXP: 2662 amg_type = exp_amg; 2663 func = Perl_exp; 2664 break; 2665 case OP_LOG: 2666 amg_type = log_amg; 2667 func = Perl_log; 2668 neg_report = "log"; 2669 break; 2670 case OP_SQRT: 2671 amg_type = sqrt_amg; 2672 func = Perl_sqrt; 2673 neg_report = "sqrt"; 2674 break; 2675 } 2676 2677 2678 tryAMAGICun_MG(amg_type, 0); 2679 { 2680 SV * const arg = POPs; 2681 const NV value = SvNV_nomg(arg); 2682 if (neg_report) { 2683 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) { 2684 SET_NUMERIC_STANDARD(); 2685 /* diag_listed_as: Can't take log of %g */ 2686 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value); 2687 } 2688 } 2689 XPUSHn(func(value)); 2690 RETURN; 2691 } 2692 } 2693 2694 /* Support Configure command-line overrides for rand() functions. 2695 After 5.005, perhaps we should replace this by Configure support 2696 for drand48(), random(), or rand(). For 5.005, though, maintain 2697 compatibility by calling rand() but allow the user to override it. 2698 See INSTALL for details. --Andy Dougherty 15 July 1998 2699 */ 2700 /* Now it's after 5.005, and Configure supports drand48() and random(), 2701 in addition to rand(). So the overrides should not be needed any more. 2702 --Jarkko Hietaniemi 27 September 1998 2703 */ 2704 2705 #ifndef HAS_DRAND48_PROTO 2706 extern double drand48 (void); 2707 #endif 2708 2709 PP(pp_rand) 2710 { 2711 dVAR; 2712 if (!PL_srand_called) { 2713 (void)seedDrand01((Rand_seed_t)seed()); 2714 PL_srand_called = TRUE; 2715 } 2716 { 2717 dSP; 2718 NV value; 2719 EXTEND(SP, 1); 2720 2721 if (MAXARG < 1) 2722 value = 1.0; 2723 else { 2724 SV * const sv = POPs; 2725 if(!sv) 2726 value = 1.0; 2727 else 2728 value = SvNV(sv); 2729 } 2730 /* 1 of 2 things can be carried through SvNV, SP or TARG, SP was carried */ 2731 if (value == 0.0) 2732 value = 1.0; 2733 { 2734 dTARGET; 2735 PUSHs(TARG); 2736 PUTBACK; 2737 value *= Drand01(); 2738 sv_setnv_mg(TARG, value); 2739 } 2740 } 2741 return NORMAL; 2742 } 2743 2744 PP(pp_srand) 2745 { 2746 dVAR; dSP; dTARGET; 2747 UV anum; 2748 2749 if (MAXARG >= 1 && (TOPs || POPs)) { 2750 SV *top; 2751 char *pv; 2752 STRLEN len; 2753 int flags; 2754 2755 top = POPs; 2756 pv = SvPV(top, len); 2757 flags = grok_number(pv, len, &anum); 2758 2759 if (!(flags & IS_NUMBER_IN_UV)) { 2760 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 2761 "Integer overflow in srand"); 2762 anum = UV_MAX; 2763 } 2764 } 2765 else { 2766 anum = seed(); 2767 } 2768 2769 (void)seedDrand01((Rand_seed_t)anum); 2770 PL_srand_called = TRUE; 2771 if (anum) 2772 XPUSHu(anum); 2773 else { 2774 /* Historically srand always returned true. We can avoid breaking 2775 that like this: */ 2776 sv_setpvs(TARG, "0 but true"); 2777 XPUSHTARG; 2778 } 2779 RETURN; 2780 } 2781 2782 PP(pp_int) 2783 { 2784 dVAR; dSP; dTARGET; 2785 tryAMAGICun_MG(int_amg, AMGf_numeric); 2786 { 2787 SV * const sv = TOPs; 2788 const IV iv = SvIV_nomg(sv); 2789 /* XXX it's arguable that compiler casting to IV might be subtly 2790 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which 2791 else preferring IV has introduced a subtle behaviour change bug. OTOH 2792 relying on floating point to be accurate is a bug. */ 2793 2794 if (!SvOK(sv)) { 2795 SETu(0); 2796 } 2797 else if (SvIOK(sv)) { 2798 if (SvIsUV(sv)) 2799 SETu(SvUV_nomg(sv)); 2800 else 2801 SETi(iv); 2802 } 2803 else { 2804 const NV value = SvNV_nomg(sv); 2805 if (value >= 0.0) { 2806 if (value < (NV)UV_MAX + 0.5) { 2807 SETu(U_V(value)); 2808 } else { 2809 SETn(Perl_floor(value)); 2810 } 2811 } 2812 else { 2813 if (value > (NV)IV_MIN - 0.5) { 2814 SETi(I_V(value)); 2815 } else { 2816 SETn(Perl_ceil(value)); 2817 } 2818 } 2819 } 2820 } 2821 RETURN; 2822 } 2823 2824 PP(pp_abs) 2825 { 2826 dVAR; dSP; dTARGET; 2827 tryAMAGICun_MG(abs_amg, AMGf_numeric); 2828 { 2829 SV * const sv = TOPs; 2830 /* This will cache the NV value if string isn't actually integer */ 2831 const IV iv = SvIV_nomg(sv); 2832 2833 if (!SvOK(sv)) { 2834 SETu(0); 2835 } 2836 else if (SvIOK(sv)) { 2837 /* IVX is precise */ 2838 if (SvIsUV(sv)) { 2839 SETu(SvUV_nomg(sv)); /* force it to be numeric only */ 2840 } else { 2841 if (iv >= 0) { 2842 SETi(iv); 2843 } else { 2844 if (iv != IV_MIN) { 2845 SETi(-iv); 2846 } else { 2847 /* 2s complement assumption. Also, not really needed as 2848 IV_MIN and -IV_MIN should both be %100...00 and NV-able */ 2849 SETu(IV_MIN); 2850 } 2851 } 2852 } 2853 } else{ 2854 const NV value = SvNV_nomg(sv); 2855 if (value < 0.0) 2856 SETn(-value); 2857 else 2858 SETn(value); 2859 } 2860 } 2861 RETURN; 2862 } 2863 2864 PP(pp_oct) 2865 { 2866 dVAR; dSP; dTARGET; 2867 const char *tmps; 2868 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES; 2869 STRLEN len; 2870 NV result_nv; 2871 UV result_uv; 2872 SV* const sv = POPs; 2873 2874 tmps = (SvPV_const(sv, len)); 2875 if (DO_UTF8(sv)) { 2876 /* If Unicode, try to downgrade 2877 * If not possible, croak. */ 2878 SV* const tsv = sv_2mortal(newSVsv(sv)); 2879 2880 SvUTF8_on(tsv); 2881 sv_utf8_downgrade(tsv, FALSE); 2882 tmps = SvPV_const(tsv, len); 2883 } 2884 if (PL_op->op_type == OP_HEX) 2885 goto hex; 2886 2887 while (*tmps && len && isSPACE(*tmps)) 2888 tmps++, len--; 2889 if (*tmps == '0') 2890 tmps++, len--; 2891 if (*tmps == 'x' || *tmps == 'X') { 2892 hex: 2893 result_uv = grok_hex (tmps, &len, &flags, &result_nv); 2894 } 2895 else if (*tmps == 'b' || *tmps == 'B') 2896 result_uv = grok_bin (tmps, &len, &flags, &result_nv); 2897 else 2898 result_uv = grok_oct (tmps, &len, &flags, &result_nv); 2899 2900 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) { 2901 XPUSHn(result_nv); 2902 } 2903 else { 2904 XPUSHu(result_uv); 2905 } 2906 RETURN; 2907 } 2908 2909 /* String stuff. */ 2910 2911 PP(pp_length) 2912 { 2913 dVAR; dSP; dTARGET; 2914 SV * const sv = TOPs; 2915 2916 SvGETMAGIC(sv); 2917 if (SvOK(sv)) { 2918 if (!IN_BYTES) 2919 SETi(sv_len_utf8_nomg(sv)); 2920 else 2921 { 2922 STRLEN len; 2923 (void)SvPV_nomg_const(sv,len); 2924 SETi(len); 2925 } 2926 } else { 2927 if (!SvPADTMP(TARG)) { 2928 sv_setsv_nomg(TARG, &PL_sv_undef); 2929 SETTARG; 2930 } 2931 SETs(&PL_sv_undef); 2932 } 2933 RETURN; 2934 } 2935 2936 /* Returns false if substring is completely outside original string. 2937 No length is indicated by len_iv = 0 and len_is_uv = 0. len_is_uv must 2938 always be true for an explicit 0. 2939 */ 2940 bool 2941 Perl_translate_substr_offsets(pTHX_ STRLEN curlen, IV pos1_iv, 2942 bool pos1_is_uv, IV len_iv, 2943 bool len_is_uv, STRLEN *posp, 2944 STRLEN *lenp) 2945 { 2946 IV pos2_iv; 2947 int pos2_is_uv; 2948 2949 PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS; 2950 2951 if (!pos1_is_uv && pos1_iv < 0 && curlen) { 2952 pos1_is_uv = curlen-1 > ~(UV)pos1_iv; 2953 pos1_iv += curlen; 2954 } 2955 if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen) 2956 return FALSE; 2957 2958 if (len_iv || len_is_uv) { 2959 if (!len_is_uv && len_iv < 0) { 2960 pos2_iv = curlen + len_iv; 2961 if (curlen) 2962 pos2_is_uv = curlen-1 > ~(UV)len_iv; 2963 else 2964 pos2_is_uv = 0; 2965 } else { /* len_iv >= 0 */ 2966 if (!pos1_is_uv && pos1_iv < 0) { 2967 pos2_iv = pos1_iv + len_iv; 2968 pos2_is_uv = (UV)len_iv > (UV)IV_MAX; 2969 } else { 2970 if ((UV)len_iv > curlen-(UV)pos1_iv) 2971 pos2_iv = curlen; 2972 else 2973 pos2_iv = pos1_iv+len_iv; 2974 pos2_is_uv = 1; 2975 } 2976 } 2977 } 2978 else { 2979 pos2_iv = curlen; 2980 pos2_is_uv = 1; 2981 } 2982 2983 if (!pos2_is_uv && pos2_iv < 0) { 2984 if (!pos1_is_uv && pos1_iv < 0) 2985 return FALSE; 2986 pos2_iv = 0; 2987 } 2988 else if (!pos1_is_uv && pos1_iv < 0) 2989 pos1_iv = 0; 2990 2991 if ((UV)pos2_iv < (UV)pos1_iv) 2992 pos2_iv = pos1_iv; 2993 if ((UV)pos2_iv > curlen) 2994 pos2_iv = curlen; 2995 2996 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */ 2997 *posp = (STRLEN)( (UV)pos1_iv ); 2998 *lenp = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv ); 2999 3000 return TRUE; 3001 } 3002 3003 PP(pp_substr) 3004 { 3005 dVAR; dSP; dTARGET; 3006 SV *sv; 3007 STRLEN curlen; 3008 STRLEN utf8_curlen; 3009 SV * pos_sv; 3010 IV pos1_iv; 3011 int pos1_is_uv; 3012 SV * len_sv; 3013 IV len_iv = 0; 3014 int len_is_uv = 0; 3015 I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; 3016 const bool rvalue = (GIMME_V != G_VOID); 3017 const char *tmps; 3018 SV *repl_sv = NULL; 3019 const char *repl = NULL; 3020 STRLEN repl_len; 3021 int num_args = PL_op->op_private & 7; 3022 bool repl_need_utf8_upgrade = FALSE; 3023 3024 if (num_args > 2) { 3025 if (num_args > 3) { 3026 if(!(repl_sv = POPs)) num_args--; 3027 } 3028 if ((len_sv = POPs)) { 3029 len_iv = SvIV(len_sv); 3030 len_is_uv = len_iv ? SvIOK_UV(len_sv) : 1; 3031 } 3032 else num_args--; 3033 } 3034 pos_sv = POPs; 3035 pos1_iv = SvIV(pos_sv); 3036 pos1_is_uv = SvIOK_UV(pos_sv); 3037 sv = POPs; 3038 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST) { 3039 assert(!repl_sv); 3040 repl_sv = POPs; 3041 } 3042 PUTBACK; 3043 if (lvalue && !repl_sv) { 3044 SV * ret; 3045 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ 3046 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0); 3047 LvTYPE(ret) = 'x'; 3048 LvTARG(ret) = SvREFCNT_inc_simple(sv); 3049 LvTARGOFF(ret) = 3050 pos1_is_uv || pos1_iv >= 0 3051 ? (STRLEN)(UV)pos1_iv 3052 : (LvFLAGS(ret) |= 1, (STRLEN)(UV)-pos1_iv); 3053 LvTARGLEN(ret) = 3054 len_is_uv || len_iv > 0 3055 ? (STRLEN)(UV)len_iv 3056 : (LvFLAGS(ret) |= 2, (STRLEN)(UV)-len_iv); 3057 3058 SPAGAIN; 3059 PUSHs(ret); /* avoid SvSETMAGIC here */ 3060 RETURN; 3061 } 3062 if (repl_sv) { 3063 repl = SvPV_const(repl_sv, repl_len); 3064 SvGETMAGIC(sv); 3065 if (SvROK(sv)) 3066 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), 3067 "Attempt to use reference as lvalue in substr" 3068 ); 3069 tmps = SvPV_force_nomg(sv, curlen); 3070 if (DO_UTF8(repl_sv) && repl_len) { 3071 if (!DO_UTF8(sv)) { 3072 sv_utf8_upgrade_nomg(sv); 3073 curlen = SvCUR(sv); 3074 } 3075 } 3076 else if (DO_UTF8(sv)) 3077 repl_need_utf8_upgrade = TRUE; 3078 } 3079 else tmps = SvPV_const(sv, curlen); 3080 if (DO_UTF8(sv)) { 3081 utf8_curlen = sv_or_pv_len_utf8(sv, tmps, curlen); 3082 if (utf8_curlen == curlen) 3083 utf8_curlen = 0; 3084 else 3085 curlen = utf8_curlen; 3086 } 3087 else 3088 utf8_curlen = 0; 3089 3090 { 3091 STRLEN pos, len, byte_len, byte_pos; 3092 3093 if (!translate_substr_offsets( 3094 curlen, pos1_iv, pos1_is_uv, len_iv, len_is_uv, &pos, &len 3095 )) goto bound_fail; 3096 3097 byte_len = len; 3098 byte_pos = utf8_curlen 3099 ? sv_or_pv_pos_u2b(sv, tmps, pos, &byte_len) : pos; 3100 3101 tmps += byte_pos; 3102 3103 if (rvalue) { 3104 SvTAINTED_off(TARG); /* decontaminate */ 3105 SvUTF8_off(TARG); /* decontaminate */ 3106 sv_setpvn(TARG, tmps, byte_len); 3107 #ifdef USE_LOCALE_COLLATE 3108 sv_unmagic(TARG, PERL_MAGIC_collxfrm); 3109 #endif 3110 if (utf8_curlen) 3111 SvUTF8_on(TARG); 3112 } 3113 3114 if (repl) { 3115 SV* repl_sv_copy = NULL; 3116 3117 if (repl_need_utf8_upgrade) { 3118 repl_sv_copy = newSVsv(repl_sv); 3119 sv_utf8_upgrade(repl_sv_copy); 3120 repl = SvPV_const(repl_sv_copy, repl_len); 3121 } 3122 if (!SvOK(sv)) 3123 sv_setpvs(sv, ""); 3124 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0); 3125 SvREFCNT_dec(repl_sv_copy); 3126 } 3127 } 3128 SPAGAIN; 3129 if (rvalue) { 3130 SvSETMAGIC(TARG); 3131 PUSHs(TARG); 3132 } 3133 RETURN; 3134 3135 bound_fail: 3136 if (repl) 3137 Perl_croak(aTHX_ "substr outside of string"); 3138 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string"); 3139 RETPUSHUNDEF; 3140 } 3141 3142 PP(pp_vec) 3143 { 3144 dVAR; dSP; 3145 const IV size = POPi; 3146 const IV offset = POPi; 3147 SV * const src = POPs; 3148 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; 3149 SV * ret; 3150 3151 if (lvalue) { /* it's an lvalue! */ 3152 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ 3153 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0); 3154 LvTYPE(ret) = 'v'; 3155 LvTARG(ret) = SvREFCNT_inc_simple(src); 3156 LvTARGOFF(ret) = offset; 3157 LvTARGLEN(ret) = size; 3158 } 3159 else { 3160 dTARGET; 3161 SvTAINTED_off(TARG); /* decontaminate */ 3162 ret = TARG; 3163 } 3164 3165 sv_setuv(ret, do_vecget(src, offset, size)); 3166 PUSHs(ret); 3167 RETURN; 3168 } 3169 3170 PP(pp_index) 3171 { 3172 dVAR; dSP; dTARGET; 3173 SV *big; 3174 SV *little; 3175 SV *temp = NULL; 3176 STRLEN biglen; 3177 STRLEN llen = 0; 3178 I32 offset; 3179 I32 retval; 3180 const char *big_p; 3181 const char *little_p; 3182 bool big_utf8; 3183 bool little_utf8; 3184 const bool is_index = PL_op->op_type == OP_INDEX; 3185 const bool threeargs = MAXARG >= 3 && (TOPs || ((void)POPs,0)); 3186 3187 if (threeargs) 3188 offset = POPi; 3189 little = POPs; 3190 big = POPs; 3191 big_p = SvPV_const(big, biglen); 3192 little_p = SvPV_const(little, llen); 3193 3194 big_utf8 = DO_UTF8(big); 3195 little_utf8 = DO_UTF8(little); 3196 if (big_utf8 ^ little_utf8) { 3197 /* One needs to be upgraded. */ 3198 if (little_utf8 && !PL_encoding) { 3199 /* Well, maybe instead we might be able to downgrade the small 3200 string? */ 3201 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen, 3202 &little_utf8); 3203 if (little_utf8) { 3204 /* If the large string is ISO-8859-1, and it's not possible to 3205 convert the small string to ISO-8859-1, then there is no 3206 way that it could be found anywhere by index. */ 3207 retval = -1; 3208 goto fail; 3209 } 3210 3211 /* At this point, pv is a malloc()ed string. So donate it to temp 3212 to ensure it will get free()d */ 3213 little = temp = newSV(0); 3214 sv_usepvn(temp, pv, llen); 3215 little_p = SvPVX(little); 3216 } else { 3217 temp = little_utf8 3218 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen); 3219 3220 if (PL_encoding) { 3221 sv_recode_to_utf8(temp, PL_encoding); 3222 } else { 3223 sv_utf8_upgrade(temp); 3224 } 3225 if (little_utf8) { 3226 big = temp; 3227 big_utf8 = TRUE; 3228 big_p = SvPV_const(big, biglen); 3229 } else { 3230 little = temp; 3231 little_p = SvPV_const(little, llen); 3232 } 3233 } 3234 } 3235 if (SvGAMAGIC(big)) { 3236 /* Life just becomes a lot easier if I use a temporary here. 3237 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously) 3238 will trigger magic and overloading again, as will fbm_instr() 3239 */ 3240 big = newSVpvn_flags(big_p, biglen, 3241 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0)); 3242 big_p = SvPVX(big); 3243 } 3244 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) { 3245 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will 3246 warn on undef, and we've already triggered a warning with the 3247 SvPV_const some lines above. We can't remove that, as we need to 3248 call some SvPV to trigger overloading early and find out if the 3249 string is UTF-8. 3250 This is all getting to messy. The API isn't quite clean enough, 3251 because data access has side effects. 3252 */ 3253 little = newSVpvn_flags(little_p, llen, 3254 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0)); 3255 little_p = SvPVX(little); 3256 } 3257 3258 if (!threeargs) 3259 offset = is_index ? 0 : biglen; 3260 else { 3261 if (big_utf8 && offset > 0) 3262 sv_pos_u2b(big, &offset, 0); 3263 if (!is_index) 3264 offset += llen; 3265 } 3266 if (offset < 0) 3267 offset = 0; 3268 else if (offset > (I32)biglen) 3269 offset = biglen; 3270 if (!(little_p = is_index 3271 ? fbm_instr((unsigned char*)big_p + offset, 3272 (unsigned char*)big_p + biglen, little, 0) 3273 : rninstr(big_p, big_p + offset, 3274 little_p, little_p + llen))) 3275 retval = -1; 3276 else { 3277 retval = little_p - big_p; 3278 if (retval > 0 && big_utf8) 3279 sv_pos_b2u(big, &retval); 3280 } 3281 SvREFCNT_dec(temp); 3282 fail: 3283 PUSHi(retval); 3284 RETURN; 3285 } 3286 3287 PP(pp_sprintf) 3288 { 3289 dVAR; dSP; dMARK; dORIGMARK; dTARGET; 3290 SvTAINTED_off(TARG); 3291 do_sprintf(TARG, SP-MARK, MARK+1); 3292 TAINT_IF(SvTAINTED(TARG)); 3293 SP = ORIGMARK; 3294 PUSHTARG; 3295 RETURN; 3296 } 3297 3298 PP(pp_ord) 3299 { 3300 dVAR; dSP; dTARGET; 3301 3302 SV *argsv = POPs; 3303 STRLEN len; 3304 const U8 *s = (U8*)SvPV_const(argsv, len); 3305 3306 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) { 3307 SV * const tmpsv = sv_2mortal(newSVsv(argsv)); 3308 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding); 3309 argsv = tmpsv; 3310 } 3311 3312 XPUSHu(DO_UTF8(argsv) ? 3313 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) : 3314 (UV)(*s & 0xff)); 3315 3316 RETURN; 3317 } 3318 3319 PP(pp_chr) 3320 { 3321 dVAR; dSP; dTARGET; 3322 char *tmps; 3323 UV value; 3324 SV *top = POPs; 3325 3326 SvGETMAGIC(top); 3327 if (!IN_BYTES /* under bytes, chr(-1) eq chr(0xff), etc. */ 3328 && ((SvIOKp(top) && !SvIsUV(top) && SvIV_nomg(top) < 0) 3329 || 3330 ((SvNOKp(top) || (SvOK(top) && !SvIsUV(top))) 3331 && SvNV_nomg(top) < 0.0))) { 3332 if (ckWARN(WARN_UTF8)) { 3333 if (SvGMAGICAL(top)) { 3334 SV *top2 = sv_newmortal(); 3335 sv_setsv_nomg(top2, top); 3336 top = top2; 3337 } 3338 Perl_warner(aTHX_ packWARN(WARN_UTF8), 3339 "Invalid negative number (%"SVf") in chr", top); 3340 } 3341 value = UNICODE_REPLACEMENT; 3342 } else { 3343 value = SvUV_nomg(top); 3344 } 3345 3346 SvUPGRADE(TARG,SVt_PV); 3347 3348 if (value > 255 && !IN_BYTES) { 3349 SvGROW(TARG, (STRLEN)UNISKIP(value)+1); 3350 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0); 3351 SvCUR_set(TARG, tmps - SvPVX_const(TARG)); 3352 *tmps = '\0'; 3353 (void)SvPOK_only(TARG); 3354 SvUTF8_on(TARG); 3355 XPUSHs(TARG); 3356 RETURN; 3357 } 3358 3359 SvGROW(TARG,2); 3360 SvCUR_set(TARG, 1); 3361 tmps = SvPVX(TARG); 3362 *tmps++ = (char)value; 3363 *tmps = '\0'; 3364 (void)SvPOK_only(TARG); 3365 3366 if (PL_encoding && !IN_BYTES) { 3367 sv_recode_to_utf8(TARG, PL_encoding); 3368 tmps = SvPVX(TARG); 3369 if (SvCUR(TARG) == 0 3370 || ! is_utf8_string((U8*)tmps, SvCUR(TARG)) 3371 || UTF8_IS_REPLACEMENT((U8*) tmps, (U8*) tmps + SvCUR(TARG))) 3372 { 3373 SvGROW(TARG, 2); 3374 tmps = SvPVX(TARG); 3375 SvCUR_set(TARG, 1); 3376 *tmps++ = (char)value; 3377 *tmps = '\0'; 3378 SvUTF8_off(TARG); 3379 } 3380 } 3381 3382 XPUSHs(TARG); 3383 RETURN; 3384 } 3385 3386 PP(pp_crypt) 3387 { 3388 #ifdef HAS_CRYPT 3389 dVAR; dSP; dTARGET; 3390 dPOPTOPssrl; 3391 STRLEN len; 3392 const char *tmps = SvPV_const(left, len); 3393 3394 if (DO_UTF8(left)) { 3395 /* If Unicode, try to downgrade. 3396 * If not possible, croak. 3397 * Yes, we made this up. */ 3398 SV* const tsv = sv_2mortal(newSVsv(left)); 3399 3400 SvUTF8_on(tsv); 3401 sv_utf8_downgrade(tsv, FALSE); 3402 tmps = SvPV_const(tsv, len); 3403 } 3404 # ifdef USE_ITHREADS 3405 # ifdef HAS_CRYPT_R 3406 if (!PL_reentrant_buffer->_crypt_struct_buffer) { 3407 /* This should be threadsafe because in ithreads there is only 3408 * one thread per interpreter. If this would not be true, 3409 * we would need a mutex to protect this malloc. */ 3410 PL_reentrant_buffer->_crypt_struct_buffer = 3411 (struct crypt_data *)safemalloc(sizeof(struct crypt_data)); 3412 #if defined(__GLIBC__) || defined(__EMX__) 3413 if (PL_reentrant_buffer->_crypt_struct_buffer) { 3414 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0; 3415 /* work around glibc-2.2.5 bug */ 3416 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0; 3417 } 3418 #endif 3419 } 3420 # endif /* HAS_CRYPT_R */ 3421 # endif /* USE_ITHREADS */ 3422 # ifdef FCRYPT 3423 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right))); 3424 # else 3425 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right))); 3426 # endif 3427 SETTARG; 3428 RETURN; 3429 #else 3430 DIE(aTHX_ 3431 "The crypt() function is unimplemented due to excessive paranoia."); 3432 #endif 3433 } 3434 3435 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So 3436 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */ 3437 3438 /* Generates code to store a unicode codepoint c that is known to occupy 3439 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1, 3440 * and p is advanced to point to the next available byte after the two bytes */ 3441 #define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \ 3442 STMT_START { \ 3443 *(p)++ = UTF8_TWO_BYTE_HI(c); \ 3444 *((p)++) = UTF8_TWO_BYTE_LO(c); \ 3445 } STMT_END 3446 3447 PP(pp_ucfirst) 3448 { 3449 /* Actually is both lcfirst() and ucfirst(). Only the first character 3450 * changes. This means that possibly we can change in-place, ie., just 3451 * take the source and change that one character and store it back, but not 3452 * if read-only etc, or if the length changes */ 3453 3454 dVAR; 3455 dSP; 3456 SV *source = TOPs; 3457 STRLEN slen; /* slen is the byte length of the whole SV. */ 3458 STRLEN need; 3459 SV *dest; 3460 bool inplace; /* ? Convert first char only, in-place */ 3461 bool doing_utf8 = FALSE; /* ? using utf8 */ 3462 bool convert_source_to_utf8 = FALSE; /* ? need to convert */ 3463 const int op_type = PL_op->op_type; 3464 const U8 *s; 3465 U8 *d; 3466 U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; 3467 STRLEN ulen; /* ulen is the byte length of the original Unicode character 3468 * stored as UTF-8 at s. */ 3469 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or 3470 * lowercased) character stored in tmpbuf. May be either 3471 * UTF-8 or not, but in either case is the number of bytes */ 3472 bool tainted = FALSE; 3473 3474 SvGETMAGIC(source); 3475 if (SvOK(source)) { 3476 s = (const U8*)SvPV_nomg_const(source, slen); 3477 } else { 3478 if (ckWARN(WARN_UNINITIALIZED)) 3479 report_uninit(source); 3480 s = (const U8*)""; 3481 slen = 0; 3482 } 3483 3484 /* We may be able to get away with changing only the first character, in 3485 * place, but not if read-only, etc. Later we may discover more reasons to 3486 * not convert in-place. */ 3487 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source); 3488 3489 /* First calculate what the changed first character should be. This affects 3490 * whether we can just swap it out, leaving the rest of the string unchanged, 3491 * or even if have to convert the dest to UTF-8 when the source isn't */ 3492 3493 if (! slen) { /* If empty */ 3494 need = 1; /* still need a trailing NUL */ 3495 ulen = 0; 3496 } 3497 else if (DO_UTF8(source)) { /* Is the source utf8? */ 3498 doing_utf8 = TRUE; 3499 ulen = UTF8SKIP(s); 3500 if (op_type == OP_UCFIRST) { 3501 _to_utf8_title_flags(s, tmpbuf, &tculen, 3502 cBOOL(IN_LOCALE_RUNTIME), &tainted); 3503 } 3504 else { 3505 _to_utf8_lower_flags(s, tmpbuf, &tculen, 3506 cBOOL(IN_LOCALE_RUNTIME), &tainted); 3507 } 3508 3509 /* we can't do in-place if the length changes. */ 3510 if (ulen != tculen) inplace = FALSE; 3511 need = slen + 1 - ulen + tculen; 3512 } 3513 else { /* Non-zero length, non-UTF-8, Need to consider locale and if 3514 * latin1 is treated as caseless. Note that a locale takes 3515 * precedence */ 3516 ulen = 1; /* Original character is 1 byte */ 3517 tculen = 1; /* Most characters will require one byte, but this will 3518 * need to be overridden for the tricky ones */ 3519 need = slen + 1; 3520 3521 if (op_type == OP_LCFIRST) { 3522 3523 /* lower case the first letter: no trickiness for any character */ 3524 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) : 3525 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s)); 3526 } 3527 /* is ucfirst() */ 3528 else if (IN_LOCALE_RUNTIME) { 3529 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales 3530 * have upper and title case different 3531 */ 3532 } 3533 else if (! IN_UNI_8_BIT) { 3534 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or 3535 * on EBCDIC machines whatever the 3536 * native function does */ 3537 } 3538 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */ 3539 UV title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's'); 3540 if (tculen > 1) { 3541 assert(tculen == 2); 3542 3543 /* If the result is an upper Latin1-range character, it can 3544 * still be represented in one byte, which is its ordinal */ 3545 if (UTF8_IS_DOWNGRADEABLE_START(*tmpbuf)) { 3546 *tmpbuf = (U8) title_ord; 3547 tculen = 1; 3548 } 3549 else { 3550 /* Otherwise it became more than one ASCII character (in 3551 * the case of LATIN_SMALL_LETTER_SHARP_S) or changed to 3552 * beyond Latin1, so the number of bytes changed, so can't 3553 * replace just the first character in place. */ 3554 inplace = FALSE; 3555 3556 /* If the result won't fit in a byte, the entire result 3557 * will have to be in UTF-8. Assume worst case sizing in 3558 * conversion. (all latin1 characters occupy at most two 3559 * bytes in utf8) */ 3560 if (title_ord > 255) { 3561 doing_utf8 = TRUE; 3562 convert_source_to_utf8 = TRUE; 3563 need = slen * 2 + 1; 3564 3565 /* The (converted) UTF-8 and UTF-EBCDIC lengths of all 3566 * (both) characters whose title case is above 255 is 3567 * 2. */ 3568 ulen = 2; 3569 } 3570 else { /* LATIN_SMALL_LETTER_SHARP_S expands by 1 byte */ 3571 need = slen + 1 + 1; 3572 } 3573 } 3574 } 3575 } /* End of use Unicode (Latin1) semantics */ 3576 } /* End of changing the case of the first character */ 3577 3578 /* Here, have the first character's changed case stored in tmpbuf. Ready to 3579 * generate the result */ 3580 if (inplace) { 3581 3582 /* We can convert in place. This means we change just the first 3583 * character without disturbing the rest; no need to grow */ 3584 dest = source; 3585 s = d = (U8*)SvPV_force_nomg(source, slen); 3586 } else { 3587 dTARGET; 3588 3589 dest = TARG; 3590 3591 /* Here, we can't convert in place; we earlier calculated how much 3592 * space we will need, so grow to accommodate that */ 3593 SvUPGRADE(dest, SVt_PV); 3594 d = (U8*)SvGROW(dest, need); 3595 (void)SvPOK_only(dest); 3596 3597 SETs(dest); 3598 } 3599 3600 if (doing_utf8) { 3601 if (! inplace) { 3602 if (! convert_source_to_utf8) { 3603 3604 /* Here both source and dest are in UTF-8, but have to create 3605 * the entire output. We initialize the result to be the 3606 * title/lower cased first character, and then append the rest 3607 * of the string. */ 3608 sv_setpvn(dest, (char*)tmpbuf, tculen); 3609 if (slen > ulen) { 3610 sv_catpvn(dest, (char*)(s + ulen), slen - ulen); 3611 } 3612 } 3613 else { 3614 const U8 *const send = s + slen; 3615 3616 /* Here the dest needs to be in UTF-8, but the source isn't, 3617 * except we earlier UTF-8'd the first character of the source 3618 * into tmpbuf. First put that into dest, and then append the 3619 * rest of the source, converting it to UTF-8 as we go. */ 3620 3621 /* Assert tculen is 2 here because the only two characters that 3622 * get to this part of the code have 2-byte UTF-8 equivalents */ 3623 *d++ = *tmpbuf; 3624 *d++ = *(tmpbuf + 1); 3625 s++; /* We have just processed the 1st char */ 3626 3627 for (; s < send; s++) { 3628 d = uvchr_to_utf8(d, *s); 3629 } 3630 *d = '\0'; 3631 SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); 3632 } 3633 SvUTF8_on(dest); 3634 } 3635 else { /* in-place UTF-8. Just overwrite the first character */ 3636 Copy(tmpbuf, d, tculen, U8); 3637 SvCUR_set(dest, need - 1); 3638 } 3639 3640 if (tainted) { 3641 TAINT; 3642 SvTAINTED_on(dest); 3643 } 3644 } 3645 else { /* Neither source nor dest are in or need to be UTF-8 */ 3646 if (slen) { 3647 if (IN_LOCALE_RUNTIME) { 3648 TAINT; 3649 SvTAINTED_on(dest); 3650 } 3651 if (inplace) { /* in-place, only need to change the 1st char */ 3652 *d = *tmpbuf; 3653 } 3654 else { /* Not in-place */ 3655 3656 /* Copy the case-changed character(s) from tmpbuf */ 3657 Copy(tmpbuf, d, tculen, U8); 3658 d += tculen - 1; /* Code below expects d to point to final 3659 * character stored */ 3660 } 3661 } 3662 else { /* empty source */ 3663 /* See bug #39028: Don't taint if empty */ 3664 *d = *s; 3665 } 3666 3667 /* In a "use bytes" we don't treat the source as UTF-8, but, still want 3668 * the destination to retain that flag */ 3669 if (SvUTF8(source)) 3670 SvUTF8_on(dest); 3671 3672 if (!inplace) { /* Finish the rest of the string, unchanged */ 3673 /* This will copy the trailing NUL */ 3674 Copy(s + 1, d + 1, slen, U8); 3675 SvCUR_set(dest, need - 1); 3676 } 3677 } 3678 if (dest != source && SvTAINTED(source)) 3679 SvTAINT(dest); 3680 SvSETMAGIC(dest); 3681 RETURN; 3682 } 3683 3684 /* There's so much setup/teardown code common between uc and lc, I wonder if 3685 it would be worth merging the two, and just having a switch outside each 3686 of the three tight loops. There is less and less commonality though */ 3687 PP(pp_uc) 3688 { 3689 dVAR; 3690 dSP; 3691 SV *source = TOPs; 3692 STRLEN len; 3693 STRLEN min; 3694 SV *dest; 3695 const U8 *s; 3696 U8 *d; 3697 3698 SvGETMAGIC(source); 3699 3700 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source) 3701 && SvTEMP(source) && !DO_UTF8(source) 3702 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) { 3703 3704 /* We can convert in place. The reason we can't if in UNI_8_BIT is to 3705 * make the loop tight, so we overwrite the source with the dest before 3706 * looking at it, and we need to look at the original source 3707 * afterwards. There would also need to be code added to handle 3708 * switching to not in-place in midstream if we run into characters 3709 * that change the length. 3710 */ 3711 dest = source; 3712 s = d = (U8*)SvPV_force_nomg(source, len); 3713 min = len + 1; 3714 } else { 3715 dTARGET; 3716 3717 dest = TARG; 3718 3719 /* The old implementation would copy source into TARG at this point. 3720 This had the side effect that if source was undef, TARG was now 3721 an undefined SV with PADTMP set, and they don't warn inside 3722 sv_2pv_flags(). However, we're now getting the PV direct from 3723 source, which doesn't have PADTMP set, so it would warn. Hence the 3724 little games. */ 3725 3726 if (SvOK(source)) { 3727 s = (const U8*)SvPV_nomg_const(source, len); 3728 } else { 3729 if (ckWARN(WARN_UNINITIALIZED)) 3730 report_uninit(source); 3731 s = (const U8*)""; 3732 len = 0; 3733 } 3734 min = len + 1; 3735 3736 SvUPGRADE(dest, SVt_PV); 3737 d = (U8*)SvGROW(dest, min); 3738 (void)SvPOK_only(dest); 3739 3740 SETs(dest); 3741 } 3742 3743 /* Overloaded values may have toggled the UTF-8 flag on source, so we need 3744 to check DO_UTF8 again here. */ 3745 3746 if (DO_UTF8(source)) { 3747 const U8 *const send = s + len; 3748 U8 tmpbuf[UTF8_MAXBYTES+1]; 3749 bool tainted = FALSE; 3750 3751 /* All occurrences of these are to be moved to follow any other marks. 3752 * This is context-dependent. We may not be passed enough context to 3753 * move the iota subscript beyond all of them, but we do the best we can 3754 * with what we're given. The result is always better than if we 3755 * hadn't done this. And, the problem would only arise if we are 3756 * passed a character without all its combining marks, which would be 3757 * the caller's mistake. The information this is based on comes from a 3758 * comment in Unicode SpecialCasing.txt, (and the Standard's text 3759 * itself) and so can't be checked properly to see if it ever gets 3760 * revised. But the likelihood of it changing is remote */ 3761 bool in_iota_subscript = FALSE; 3762 3763 while (s < send) { 3764 STRLEN u; 3765 STRLEN ulen; 3766 UV uv; 3767 if (in_iota_subscript && ! _is_utf8_mark(s)) { 3768 3769 /* A non-mark. Time to output the iota subscript */ 3770 #define GREEK_CAPITAL_LETTER_IOTA 0x0399 3771 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345 3772 3773 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA); 3774 in_iota_subscript = FALSE; 3775 } 3776 3777 /* Then handle the current character. Get the changed case value 3778 * and copy it to the output buffer */ 3779 3780 u = UTF8SKIP(s); 3781 uv = _to_utf8_upper_flags(s, tmpbuf, &ulen, 3782 cBOOL(IN_LOCALE_RUNTIME), &tainted); 3783 if (uv == GREEK_CAPITAL_LETTER_IOTA 3784 && utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI) 3785 { 3786 in_iota_subscript = TRUE; 3787 } 3788 else { 3789 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { 3790 /* If the eventually required minimum size outgrows the 3791 * available space, we need to grow. */ 3792 const UV o = d - (U8*)SvPVX_const(dest); 3793 3794 /* If someone uppercases one million U+03B0s we SvGROW() 3795 * one million times. Or we could try guessing how much to 3796 * allocate without allocating too much. Such is life. 3797 * See corresponding comment in lc code for another option 3798 * */ 3799 SvGROW(dest, min); 3800 d = (U8*)SvPVX(dest) + o; 3801 } 3802 Copy(tmpbuf, d, ulen, U8); 3803 d += ulen; 3804 } 3805 s += u; 3806 } 3807 if (in_iota_subscript) { 3808 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA); 3809 } 3810 SvUTF8_on(dest); 3811 *d = '\0'; 3812 3813 SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); 3814 if (tainted) { 3815 TAINT; 3816 SvTAINTED_on(dest); 3817 } 3818 } 3819 else { /* Not UTF-8 */ 3820 if (len) { 3821 const U8 *const send = s + len; 3822 3823 /* Use locale casing if in locale; regular style if not treating 3824 * latin1 as having case; otherwise the latin1 casing. Do the 3825 * whole thing in a tight loop, for speed, */ 3826 if (IN_LOCALE_RUNTIME) { 3827 TAINT; 3828 SvTAINTED_on(dest); 3829 for (; s < send; d++, s++) 3830 *d = toUPPER_LC(*s); 3831 } 3832 else if (! IN_UNI_8_BIT) { 3833 for (; s < send; d++, s++) { 3834 *d = toUPPER(*s); 3835 } 3836 } 3837 else { 3838 for (; s < send; d++, s++) { 3839 *d = toUPPER_LATIN1_MOD(*s); 3840 if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) { 3841 continue; 3842 } 3843 3844 /* The mainstream case is the tight loop above. To avoid 3845 * extra tests in that, all three characters that require 3846 * special handling are mapped by the MOD to the one tested 3847 * just above. 3848 * Use the source to distinguish between the three cases */ 3849 3850 if (*s == LATIN_SMALL_LETTER_SHARP_S) { 3851 3852 /* uc() of this requires 2 characters, but they are 3853 * ASCII. If not enough room, grow the string */ 3854 if (SvLEN(dest) < ++min) { 3855 const UV o = d - (U8*)SvPVX_const(dest); 3856 SvGROW(dest, min); 3857 d = (U8*)SvPVX(dest) + o; 3858 } 3859 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */ 3860 continue; /* Back to the tight loop; still in ASCII */ 3861 } 3862 3863 /* The other two special handling characters have their 3864 * upper cases outside the latin1 range, hence need to be 3865 * in UTF-8, so the whole result needs to be in UTF-8. So, 3866 * here we are somewhere in the middle of processing a 3867 * non-UTF-8 string, and realize that we will have to convert 3868 * the whole thing to UTF-8. What to do? There are 3869 * several possibilities. The simplest to code is to 3870 * convert what we have so far, set a flag, and continue on 3871 * in the loop. The flag would be tested each time through 3872 * the loop, and if set, the next character would be 3873 * converted to UTF-8 and stored. But, I (khw) didn't want 3874 * to slow down the mainstream case at all for this fairly 3875 * rare case, so I didn't want to add a test that didn't 3876 * absolutely have to be there in the loop, besides the 3877 * possibility that it would get too complicated for 3878 * optimizers to deal with. Another possibility is to just 3879 * give up, convert the source to UTF-8, and restart the 3880 * function that way. Another possibility is to convert 3881 * both what has already been processed and what is yet to 3882 * come separately to UTF-8, then jump into the loop that 3883 * handles UTF-8. But the most efficient time-wise of the 3884 * ones I could think of is what follows, and turned out to 3885 * not require much extra code. */ 3886 3887 /* Convert what we have so far into UTF-8, telling the 3888 * function that we know it should be converted, and to 3889 * allow extra space for what we haven't processed yet. 3890 * Assume the worst case space requirements for converting 3891 * what we haven't processed so far: that it will require 3892 * two bytes for each remaining source character, plus the 3893 * NUL at the end. This may cause the string pointer to 3894 * move, so re-find it. */ 3895 3896 len = d - (U8*)SvPVX_const(dest); 3897 SvCUR_set(dest, len); 3898 len = sv_utf8_upgrade_flags_grow(dest, 3899 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 3900 (send -s) * 2 + 1); 3901 d = (U8*)SvPVX(dest) + len; 3902 3903 /* Now process the remainder of the source, converting to 3904 * upper and UTF-8. If a resulting byte is invariant in 3905 * UTF-8, output it as-is, otherwise convert to UTF-8 and 3906 * append it to the output. */ 3907 for (; s < send; s++) { 3908 (void) _to_upper_title_latin1(*s, d, &len, 'S'); 3909 d += len; 3910 } 3911 3912 /* Here have processed the whole source; no need to continue 3913 * with the outer loop. Each character has been converted 3914 * to upper case and converted to UTF-8 */ 3915 3916 break; 3917 } /* End of processing all latin1-style chars */ 3918 } /* End of processing all chars */ 3919 } /* End of source is not empty */ 3920 3921 if (source != dest) { 3922 *d = '\0'; /* Here d points to 1 after last char, add NUL */ 3923 SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); 3924 } 3925 } /* End of isn't utf8 */ 3926 if (dest != source && SvTAINTED(source)) 3927 SvTAINT(dest); 3928 SvSETMAGIC(dest); 3929 RETURN; 3930 } 3931 3932 PP(pp_lc) 3933 { 3934 dVAR; 3935 dSP; 3936 SV *source = TOPs; 3937 STRLEN len; 3938 STRLEN min; 3939 SV *dest; 3940 const U8 *s; 3941 U8 *d; 3942 3943 SvGETMAGIC(source); 3944 3945 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source) 3946 && SvTEMP(source) && !DO_UTF8(source)) { 3947 3948 /* We can convert in place, as lowercasing anything in the latin1 range 3949 * (or else DO_UTF8 would have been on) doesn't lengthen it */ 3950 dest = source; 3951 s = d = (U8*)SvPV_force_nomg(source, len); 3952 min = len + 1; 3953 } else { 3954 dTARGET; 3955 3956 dest = TARG; 3957 3958 /* The old implementation would copy source into TARG at this point. 3959 This had the side effect that if source was undef, TARG was now 3960 an undefined SV with PADTMP set, and they don't warn inside 3961 sv_2pv_flags(). However, we're now getting the PV direct from 3962 source, which doesn't have PADTMP set, so it would warn. Hence the 3963 little games. */ 3964 3965 if (SvOK(source)) { 3966 s = (const U8*)SvPV_nomg_const(source, len); 3967 } else { 3968 if (ckWARN(WARN_UNINITIALIZED)) 3969 report_uninit(source); 3970 s = (const U8*)""; 3971 len = 0; 3972 } 3973 min = len + 1; 3974 3975 SvUPGRADE(dest, SVt_PV); 3976 d = (U8*)SvGROW(dest, min); 3977 (void)SvPOK_only(dest); 3978 3979 SETs(dest); 3980 } 3981 3982 /* Overloaded values may have toggled the UTF-8 flag on source, so we need 3983 to check DO_UTF8 again here. */ 3984 3985 if (DO_UTF8(source)) { 3986 const U8 *const send = s + len; 3987 U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; 3988 bool tainted = FALSE; 3989 3990 while (s < send) { 3991 const STRLEN u = UTF8SKIP(s); 3992 STRLEN ulen; 3993 3994 _to_utf8_lower_flags(s, tmpbuf, &ulen, 3995 cBOOL(IN_LOCALE_RUNTIME), &tainted); 3996 3997 /* Here is where we would do context-sensitive actions. See the 3998 * commit message for this comment for why there isn't any */ 3999 4000 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { 4001 4002 /* If the eventually required minimum size outgrows the 4003 * available space, we need to grow. */ 4004 const UV o = d - (U8*)SvPVX_const(dest); 4005 4006 /* If someone lowercases one million U+0130s we SvGROW() one 4007 * million times. Or we could try guessing how much to 4008 * allocate without allocating too much. Such is life. 4009 * Another option would be to grow an extra byte or two more 4010 * each time we need to grow, which would cut down the million 4011 * to 500K, with little waste */ 4012 SvGROW(dest, min); 4013 d = (U8*)SvPVX(dest) + o; 4014 } 4015 4016 /* Copy the newly lowercased letter to the output buffer we're 4017 * building */ 4018 Copy(tmpbuf, d, ulen, U8); 4019 d += ulen; 4020 s += u; 4021 } /* End of looping through the source string */ 4022 SvUTF8_on(dest); 4023 *d = '\0'; 4024 SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); 4025 if (tainted) { 4026 TAINT; 4027 SvTAINTED_on(dest); 4028 } 4029 } else { /* Not utf8 */ 4030 if (len) { 4031 const U8 *const send = s + len; 4032 4033 /* Use locale casing if in locale; regular style if not treating 4034 * latin1 as having case; otherwise the latin1 casing. Do the 4035 * whole thing in a tight loop, for speed, */ 4036 if (IN_LOCALE_RUNTIME) { 4037 TAINT; 4038 SvTAINTED_on(dest); 4039 for (; s < send; d++, s++) 4040 *d = toLOWER_LC(*s); 4041 } 4042 else if (! IN_UNI_8_BIT) { 4043 for (; s < send; d++, s++) { 4044 *d = toLOWER(*s); 4045 } 4046 } 4047 else { 4048 for (; s < send; d++, s++) { 4049 *d = toLOWER_LATIN1(*s); 4050 } 4051 } 4052 } 4053 if (source != dest) { 4054 *d = '\0'; 4055 SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); 4056 } 4057 } 4058 if (dest != source && SvTAINTED(source)) 4059 SvTAINT(dest); 4060 SvSETMAGIC(dest); 4061 RETURN; 4062 } 4063 4064 PP(pp_quotemeta) 4065 { 4066 dVAR; dSP; dTARGET; 4067 SV * const sv = TOPs; 4068 STRLEN len; 4069 const char *s = SvPV_const(sv,len); 4070 4071 SvUTF8_off(TARG); /* decontaminate */ 4072 if (len) { 4073 char *d; 4074 SvUPGRADE(TARG, SVt_PV); 4075 SvGROW(TARG, (len * 2) + 1); 4076 d = SvPVX(TARG); 4077 if (DO_UTF8(sv)) { 4078 while (len) { 4079 STRLEN ulen = UTF8SKIP(s); 4080 bool to_quote = FALSE; 4081 4082 if (UTF8_IS_INVARIANT(*s)) { 4083 if (_isQUOTEMETA(*s)) { 4084 to_quote = TRUE; 4085 } 4086 } 4087 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { 4088 4089 /* In locale, we quote all non-ASCII Latin1 chars. 4090 * Otherwise use the quoting rules */ 4091 if (IN_LOCALE_RUNTIME 4092 || _isQUOTEMETA(TWO_BYTE_UTF8_TO_UNI(*s, *(s + 1)))) 4093 { 4094 to_quote = TRUE; 4095 } 4096 } 4097 else if (is_QUOTEMETA_high(s)) { 4098 to_quote = TRUE; 4099 } 4100 4101 if (to_quote) { 4102 *d++ = '\\'; 4103 } 4104 if (ulen > len) 4105 ulen = len; 4106 len -= ulen; 4107 while (ulen--) 4108 *d++ = *s++; 4109 } 4110 SvUTF8_on(TARG); 4111 } 4112 else if (IN_UNI_8_BIT) { 4113 while (len--) { 4114 if (_isQUOTEMETA(*s)) 4115 *d++ = '\\'; 4116 *d++ = *s++; 4117 } 4118 } 4119 else { 4120 /* For non UNI_8_BIT (and hence in locale) just quote all \W 4121 * including everything above ASCII */ 4122 while (len--) { 4123 if (!isWORDCHAR_A(*s)) 4124 *d++ = '\\'; 4125 *d++ = *s++; 4126 } 4127 } 4128 *d = '\0'; 4129 SvCUR_set(TARG, d - SvPVX_const(TARG)); 4130 (void)SvPOK_only_UTF8(TARG); 4131 } 4132 else 4133 sv_setpvn(TARG, s, len); 4134 SETTARG; 4135 RETURN; 4136 } 4137 4138 PP(pp_fc) 4139 { 4140 dVAR; 4141 dTARGET; 4142 dSP; 4143 SV *source = TOPs; 4144 STRLEN len; 4145 STRLEN min; 4146 SV *dest; 4147 const U8 *s; 4148 const U8 *send; 4149 U8 *d; 4150 U8 tmpbuf[UTF8_MAXBYTES * UTF8_MAX_FOLD_CHAR_EXPAND + 1]; 4151 const bool full_folding = TRUE; 4152 const U8 flags = ( full_folding ? FOLD_FLAGS_FULL : 0 ) 4153 | ( IN_LOCALE_RUNTIME ? FOLD_FLAGS_LOCALE : 0 ); 4154 4155 /* This is a facsimile of pp_lc, but with a thousand bugs thanks to me. 4156 * You are welcome(?) -Hugmeir 4157 */ 4158 4159 SvGETMAGIC(source); 4160 4161 dest = TARG; 4162 4163 if (SvOK(source)) { 4164 s = (const U8*)SvPV_nomg_const(source, len); 4165 } else { 4166 if (ckWARN(WARN_UNINITIALIZED)) 4167 report_uninit(source); 4168 s = (const U8*)""; 4169 len = 0; 4170 } 4171 4172 min = len + 1; 4173 4174 SvUPGRADE(dest, SVt_PV); 4175 d = (U8*)SvGROW(dest, min); 4176 (void)SvPOK_only(dest); 4177 4178 SETs(dest); 4179 4180 send = s + len; 4181 if (DO_UTF8(source)) { /* UTF-8 flagged string. */ 4182 bool tainted = FALSE; 4183 while (s < send) { 4184 const STRLEN u = UTF8SKIP(s); 4185 STRLEN ulen; 4186 4187 _to_utf8_fold_flags(s, tmpbuf, &ulen, flags, &tainted); 4188 4189 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { 4190 const UV o = d - (U8*)SvPVX_const(dest); 4191 SvGROW(dest, min); 4192 d = (U8*)SvPVX(dest) + o; 4193 } 4194 4195 Copy(tmpbuf, d, ulen, U8); 4196 d += ulen; 4197 s += u; 4198 } 4199 SvUTF8_on(dest); 4200 if (tainted) { 4201 TAINT; 4202 SvTAINTED_on(dest); 4203 } 4204 } /* Unflagged string */ 4205 else if (len) { 4206 /* For locale, bytes, and nothing, the behavior is supposed to be the 4207 * same as lc(). 4208 */ 4209 if ( IN_LOCALE_RUNTIME ) { /* Under locale */ 4210 TAINT; 4211 SvTAINTED_on(dest); 4212 for (; s < send; d++, s++) 4213 *d = toLOWER_LC(*s); 4214 } 4215 else if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */ 4216 for (; s < send; d++, s++) 4217 *d = toLOWER(*s); 4218 } 4219 else { 4220 /* For ASCII and the Latin-1 range, there's only two troublesome 4221 * folds, \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full 4222 * casefolding becomes 'ss', and \x{B5} (\N{MICRO SIGN}), which 4223 * under any fold becomes \x{3BC} (\N{GREEK SMALL LETTER MU}) -- 4224 * For the rest, the casefold is their lowercase. */ 4225 for (; s < send; d++, s++) { 4226 if (*s == MICRO_SIGN) { 4227 /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU}, 4228 * which is outside of the latin-1 range. There's a couple 4229 * of ways to deal with this -- khw discusses them in 4230 * pp_lc/uc, so go there :) What we do here is upgrade what 4231 * we had already casefolded, then enter an inner loop that 4232 * appends the rest of the characters as UTF-8. */ 4233 len = d - (U8*)SvPVX_const(dest); 4234 SvCUR_set(dest, len); 4235 len = sv_utf8_upgrade_flags_grow(dest, 4236 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, 4237 /* The max expansion for latin1 4238 * chars is 1 byte becomes 2 */ 4239 (send -s) * 2 + 1); 4240 d = (U8*)SvPVX(dest) + len; 4241 4242 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_SMALL_LETTER_MU); 4243 s++; 4244 for (; s < send; s++) { 4245 STRLEN ulen; 4246 UV fc = _to_uni_fold_flags(*s, tmpbuf, &ulen, flags); 4247 if UNI_IS_INVARIANT(fc) { 4248 if (full_folding 4249 && *s == LATIN_SMALL_LETTER_SHARP_S) 4250 { 4251 *d++ = 's'; 4252 *d++ = 's'; 4253 } 4254 else 4255 *d++ = (U8)fc; 4256 } 4257 else { 4258 Copy(tmpbuf, d, ulen, U8); 4259 d += ulen; 4260 } 4261 } 4262 break; 4263 } 4264 else if (full_folding && *s == LATIN_SMALL_LETTER_SHARP_S) { 4265 /* Under full casefolding, LATIN SMALL LETTER SHARP S 4266 * becomes "ss", which may require growing the SV. */ 4267 if (SvLEN(dest) < ++min) { 4268 const UV o = d - (U8*)SvPVX_const(dest); 4269 SvGROW(dest, min); 4270 d = (U8*)SvPVX(dest) + o; 4271 } 4272 *(d)++ = 's'; 4273 *d = 's'; 4274 } 4275 else { /* If it's not one of those two, the fold is their lower 4276 case */ 4277 *d = toLOWER_LATIN1(*s); 4278 } 4279 } 4280 } 4281 } 4282 *d = '\0'; 4283 SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); 4284 4285 if (SvTAINTED(source)) 4286 SvTAINT(dest); 4287 SvSETMAGIC(dest); 4288 RETURN; 4289 } 4290 4291 /* Arrays. */ 4292 4293 PP(pp_aslice) 4294 { 4295 dVAR; dSP; dMARK; dORIGMARK; 4296 AV *const av = MUTABLE_AV(POPs); 4297 const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET); 4298 4299 if (SvTYPE(av) == SVt_PVAV) { 4300 const bool localizing = PL_op->op_private & OPpLVAL_INTRO; 4301 bool can_preserve = FALSE; 4302 4303 if (localizing) { 4304 MAGIC *mg; 4305 HV *stash; 4306 4307 can_preserve = SvCANEXISTDELETE(av); 4308 } 4309 4310 if (lval && localizing) { 4311 SV **svp; 4312 I32 max = -1; 4313 for (svp = MARK + 1; svp <= SP; svp++) { 4314 const I32 elem = SvIV(*svp); 4315 if (elem > max) 4316 max = elem; 4317 } 4318 if (max > AvMAX(av)) 4319 av_extend(av, max); 4320 } 4321 4322 while (++MARK <= SP) { 4323 SV **svp; 4324 I32 elem = SvIV(*MARK); 4325 bool preeminent = TRUE; 4326 4327 if (localizing && can_preserve) { 4328 /* If we can determine whether the element exist, 4329 * Try to preserve the existenceness of a tied array 4330 * element by using EXISTS and DELETE if possible. 4331 * Fallback to FETCH and STORE otherwise. */ 4332 preeminent = av_exists(av, elem); 4333 } 4334 4335 svp = av_fetch(av, elem, lval); 4336 if (lval) { 4337 if (!svp || *svp == &PL_sv_undef) 4338 DIE(aTHX_ PL_no_aelem, elem); 4339 if (localizing) { 4340 if (preeminent) 4341 save_aelem(av, elem, svp); 4342 else 4343 SAVEADELETE(av, elem); 4344 } 4345 } 4346 *MARK = svp ? *svp : &PL_sv_undef; 4347 } 4348 } 4349 if (GIMME != G_ARRAY) { 4350 MARK = ORIGMARK; 4351 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef; 4352 SP = MARK; 4353 } 4354 RETURN; 4355 } 4356 4357 /* Smart dereferencing for keys, values and each */ 4358 PP(pp_rkeys) 4359 { 4360 dVAR; 4361 dSP; 4362 dPOPss; 4363 4364 SvGETMAGIC(sv); 4365 4366 if ( 4367 !SvROK(sv) 4368 || (sv = SvRV(sv), 4369 (SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV) 4370 || SvOBJECT(sv) 4371 ) 4372 ) { 4373 DIE(aTHX_ 4374 "Type of argument to %s must be unblessed hashref or arrayref", 4375 PL_op_desc[PL_op->op_type] ); 4376 } 4377 4378 if (PL_op->op_flags & OPf_SPECIAL && SvTYPE(sv) == SVt_PVAV) 4379 DIE(aTHX_ 4380 "Can't modify %s in %s", 4381 PL_op_desc[PL_op->op_type], PL_op_desc[PL_op->op_next->op_type] 4382 ); 4383 4384 /* Delegate to correct function for op type */ 4385 PUSHs(sv); 4386 if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) { 4387 return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX); 4388 } 4389 else { 4390 return (SvTYPE(sv) == SVt_PVHV) 4391 ? Perl_pp_each(aTHX) 4392 : Perl_pp_aeach(aTHX); 4393 } 4394 } 4395 4396 PP(pp_aeach) 4397 { 4398 dVAR; 4399 dSP; 4400 AV *array = MUTABLE_AV(POPs); 4401 const I32 gimme = GIMME_V; 4402 IV *iterp = Perl_av_iter_p(aTHX_ array); 4403 const IV current = (*iterp)++; 4404 4405 if (current > av_len(array)) { 4406 *iterp = 0; 4407 if (gimme == G_SCALAR) 4408 RETPUSHUNDEF; 4409 else 4410 RETURN; 4411 } 4412 4413 EXTEND(SP, 2); 4414 mPUSHi(current); 4415 if (gimme == G_ARRAY) { 4416 SV **const element = av_fetch(array, current, 0); 4417 PUSHs(element ? *element : &PL_sv_undef); 4418 } 4419 RETURN; 4420 } 4421 4422 PP(pp_akeys) 4423 { 4424 dVAR; 4425 dSP; 4426 AV *array = MUTABLE_AV(POPs); 4427 const I32 gimme = GIMME_V; 4428 4429 *Perl_av_iter_p(aTHX_ array) = 0; 4430 4431 if (gimme == G_SCALAR) { 4432 dTARGET; 4433 PUSHi(av_len(array) + 1); 4434 } 4435 else if (gimme == G_ARRAY) { 4436 IV n = Perl_av_len(aTHX_ array); 4437 IV i; 4438 4439 EXTEND(SP, n + 1); 4440 4441 if (PL_op->op_type == OP_AKEYS || PL_op->op_type == OP_RKEYS) { 4442 for (i = 0; i <= n; i++) { 4443 mPUSHi(i); 4444 } 4445 } 4446 else { 4447 for (i = 0; i <= n; i++) { 4448 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0); 4449 PUSHs(elem ? *elem : &PL_sv_undef); 4450 } 4451 } 4452 } 4453 RETURN; 4454 } 4455 4456 /* Associative arrays. */ 4457 4458 PP(pp_each) 4459 { 4460 dVAR; 4461 dSP; 4462 HV * hash = MUTABLE_HV(POPs); 4463 HE *entry; 4464 const I32 gimme = GIMME_V; 4465 4466 PUTBACK; 4467 /* might clobber stack_sp */ 4468 entry = hv_iternext(hash); 4469 SPAGAIN; 4470 4471 EXTEND(SP, 2); 4472 if (entry) { 4473 SV* const sv = hv_iterkeysv(entry); 4474 PUSHs(sv); /* won't clobber stack_sp */ 4475 if (gimme == G_ARRAY) { 4476 SV *val; 4477 PUTBACK; 4478 /* might clobber stack_sp */ 4479 val = hv_iterval(hash, entry); 4480 SPAGAIN; 4481 PUSHs(val); 4482 } 4483 } 4484 else if (gimme == G_SCALAR) 4485 RETPUSHUNDEF; 4486 4487 RETURN; 4488 } 4489 4490 STATIC OP * 4491 S_do_delete_local(pTHX) 4492 { 4493 dVAR; 4494 dSP; 4495 const I32 gimme = GIMME_V; 4496 const MAGIC *mg; 4497 HV *stash; 4498 const bool sliced = !!(PL_op->op_private & OPpSLICE); 4499 SV *unsliced_keysv = sliced ? NULL : POPs; 4500 SV * const osv = POPs; 4501 SV **mark = sliced ? PL_stack_base + POPMARK : &unsliced_keysv-1; 4502 dORIGMARK; 4503 const bool tied = SvRMAGICAL(osv) 4504 && mg_find((const SV *)osv, PERL_MAGIC_tied); 4505 const bool can_preserve = SvCANEXISTDELETE(osv); 4506 const U32 type = SvTYPE(osv); 4507 SV ** const end = sliced ? SP : &unsliced_keysv; 4508 4509 if (type == SVt_PVHV) { /* hash element */ 4510 HV * const hv = MUTABLE_HV(osv); 4511 while (++MARK <= end) { 4512 SV * const keysv = *MARK; 4513 SV *sv = NULL; 4514 bool preeminent = TRUE; 4515 if (can_preserve) 4516 preeminent = hv_exists_ent(hv, keysv, 0); 4517 if (tied) { 4518 HE *he = hv_fetch_ent(hv, keysv, 1, 0); 4519 if (he) 4520 sv = HeVAL(he); 4521 else 4522 preeminent = FALSE; 4523 } 4524 else { 4525 sv = hv_delete_ent(hv, keysv, 0, 0); 4526 SvREFCNT_inc_simple_void(sv); /* De-mortalize */ 4527 } 4528 if (preeminent) { 4529 if (!sv) DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv)); 4530 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM); 4531 if (tied) { 4532 *MARK = sv_mortalcopy(sv); 4533 mg_clear(sv); 4534 } else 4535 *MARK = sv; 4536 } 4537 else { 4538 SAVEHDELETE(hv, keysv); 4539 *MARK = &PL_sv_undef; 4540 } 4541 } 4542 } 4543 else if (type == SVt_PVAV) { /* array element */ 4544 if (PL_op->op_flags & OPf_SPECIAL) { 4545 AV * const av = MUTABLE_AV(osv); 4546 while (++MARK <= end) { 4547 I32 idx = SvIV(*MARK); 4548 SV *sv = NULL; 4549 bool preeminent = TRUE; 4550 if (can_preserve) 4551 preeminent = av_exists(av, idx); 4552 if (tied) { 4553 SV **svp = av_fetch(av, idx, 1); 4554 if (svp) 4555 sv = *svp; 4556 else 4557 preeminent = FALSE; 4558 } 4559 else { 4560 sv = av_delete(av, idx, 0); 4561 SvREFCNT_inc_simple_void(sv); /* De-mortalize */ 4562 } 4563 if (preeminent) { 4564 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM); 4565 if (tied) { 4566 *MARK = sv_mortalcopy(sv); 4567 mg_clear(sv); 4568 } else 4569 *MARK = sv; 4570 } 4571 else { 4572 SAVEADELETE(av, idx); 4573 *MARK = &PL_sv_undef; 4574 } 4575 } 4576 } 4577 else 4578 DIE(aTHX_ "panic: avhv_delete no longer supported"); 4579 } 4580 else 4581 DIE(aTHX_ "Not a HASH reference"); 4582 if (sliced) { 4583 if (gimme == G_VOID) 4584 SP = ORIGMARK; 4585 else if (gimme == G_SCALAR) { 4586 MARK = ORIGMARK; 4587 if (SP > MARK) 4588 *++MARK = *SP; 4589 else 4590 *++MARK = &PL_sv_undef; 4591 SP = MARK; 4592 } 4593 } 4594 else if (gimme != G_VOID) 4595 PUSHs(unsliced_keysv); 4596 4597 RETURN; 4598 } 4599 4600 PP(pp_delete) 4601 { 4602 dVAR; 4603 dSP; 4604 I32 gimme; 4605 I32 discard; 4606 4607 if (PL_op->op_private & OPpLVAL_INTRO) 4608 return do_delete_local(); 4609 4610 gimme = GIMME_V; 4611 discard = (gimme == G_VOID) ? G_DISCARD : 0; 4612 4613 if (PL_op->op_private & OPpSLICE) { 4614 dMARK; dORIGMARK; 4615 HV * const hv = MUTABLE_HV(POPs); 4616 const U32 hvtype = SvTYPE(hv); 4617 if (hvtype == SVt_PVHV) { /* hash element */ 4618 while (++MARK <= SP) { 4619 SV * const sv = hv_delete_ent(hv, *MARK, discard, 0); 4620 *MARK = sv ? sv : &PL_sv_undef; 4621 } 4622 } 4623 else if (hvtype == SVt_PVAV) { /* array element */ 4624 if (PL_op->op_flags & OPf_SPECIAL) { 4625 while (++MARK <= SP) { 4626 SV * const sv = av_delete(MUTABLE_AV(hv), SvIV(*MARK), discard); 4627 *MARK = sv ? sv : &PL_sv_undef; 4628 } 4629 } 4630 } 4631 else 4632 DIE(aTHX_ "Not a HASH reference"); 4633 if (discard) 4634 SP = ORIGMARK; 4635 else if (gimme == G_SCALAR) { 4636 MARK = ORIGMARK; 4637 if (SP > MARK) 4638 *++MARK = *SP; 4639 else 4640 *++MARK = &PL_sv_undef; 4641 SP = MARK; 4642 } 4643 } 4644 else { 4645 SV *keysv = POPs; 4646 HV * const hv = MUTABLE_HV(POPs); 4647 SV *sv = NULL; 4648 if (SvTYPE(hv) == SVt_PVHV) 4649 sv = hv_delete_ent(hv, keysv, discard, 0); 4650 else if (SvTYPE(hv) == SVt_PVAV) { 4651 if (PL_op->op_flags & OPf_SPECIAL) 4652 sv = av_delete(MUTABLE_AV(hv), SvIV(keysv), discard); 4653 else 4654 DIE(aTHX_ "panic: avhv_delete no longer supported"); 4655 } 4656 else 4657 DIE(aTHX_ "Not a HASH reference"); 4658 if (!sv) 4659 sv = &PL_sv_undef; 4660 if (!discard) 4661 PUSHs(sv); 4662 } 4663 RETURN; 4664 } 4665 4666 PP(pp_exists) 4667 { 4668 dVAR; 4669 dSP; 4670 SV *tmpsv; 4671 HV *hv; 4672 4673 if (PL_op->op_private & OPpEXISTS_SUB) { 4674 GV *gv; 4675 SV * const sv = POPs; 4676 CV * const cv = sv_2cv(sv, &hv, &gv, 0); 4677 if (cv) 4678 RETPUSHYES; 4679 if (gv && isGV(gv) && GvCV(gv) && !GvCVGEN(gv)) 4680 RETPUSHYES; 4681 RETPUSHNO; 4682 } 4683 tmpsv = POPs; 4684 hv = MUTABLE_HV(POPs); 4685 if (SvTYPE(hv) == SVt_PVHV) { 4686 if (hv_exists_ent(hv, tmpsv, 0)) 4687 RETPUSHYES; 4688 } 4689 else if (SvTYPE(hv) == SVt_PVAV) { 4690 if (PL_op->op_flags & OPf_SPECIAL) { /* array element */ 4691 if (av_exists(MUTABLE_AV(hv), SvIV(tmpsv))) 4692 RETPUSHYES; 4693 } 4694 } 4695 else { 4696 DIE(aTHX_ "Not a HASH reference"); 4697 } 4698 RETPUSHNO; 4699 } 4700 4701 PP(pp_hslice) 4702 { 4703 dVAR; dSP; dMARK; dORIGMARK; 4704 HV * const hv = MUTABLE_HV(POPs); 4705 const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET); 4706 const bool localizing = PL_op->op_private & OPpLVAL_INTRO; 4707 bool can_preserve = FALSE; 4708 4709 if (localizing) { 4710 MAGIC *mg; 4711 HV *stash; 4712 4713 if (SvCANEXISTDELETE(hv)) 4714 can_preserve = TRUE; 4715 } 4716 4717 while (++MARK <= SP) { 4718 SV * const keysv = *MARK; 4719 SV **svp; 4720 HE *he; 4721 bool preeminent = TRUE; 4722 4723 if (localizing && can_preserve) { 4724 /* If we can determine whether the element exist, 4725 * try to preserve the existenceness of a tied hash 4726 * element by using EXISTS and DELETE if possible. 4727 * Fallback to FETCH and STORE otherwise. */ 4728 preeminent = hv_exists_ent(hv, keysv, 0); 4729 } 4730 4731 he = hv_fetch_ent(hv, keysv, lval, 0); 4732 svp = he ? &HeVAL(he) : NULL; 4733 4734 if (lval) { 4735 if (!svp || !*svp || *svp == &PL_sv_undef) { 4736 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv)); 4737 } 4738 if (localizing) { 4739 if (HvNAME_get(hv) && isGV(*svp)) 4740 save_gp(MUTABLE_GV(*svp), !(PL_op->op_flags & OPf_SPECIAL)); 4741 else if (preeminent) 4742 save_helem_flags(hv, keysv, svp, 4743 (PL_op->op_flags & OPf_SPECIAL) ? 0 : SAVEf_SETMAGIC); 4744 else 4745 SAVEHDELETE(hv, keysv); 4746 } 4747 } 4748 *MARK = svp && *svp ? *svp : &PL_sv_undef; 4749 } 4750 if (GIMME != G_ARRAY) { 4751 MARK = ORIGMARK; 4752 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef; 4753 SP = MARK; 4754 } 4755 RETURN; 4756 } 4757 4758 /* List operators. */ 4759 4760 PP(pp_list) 4761 { 4762 dVAR; dSP; dMARK; 4763 if (GIMME != G_ARRAY) { 4764 if (++MARK <= SP) 4765 *MARK = *SP; /* unwanted list, return last item */ 4766 else 4767 *MARK = &PL_sv_undef; 4768 SP = MARK; 4769 } 4770 RETURN; 4771 } 4772 4773 PP(pp_lslice) 4774 { 4775 dVAR; 4776 dSP; 4777 SV ** const lastrelem = PL_stack_sp; 4778 SV ** const lastlelem = PL_stack_base + POPMARK; 4779 SV ** const firstlelem = PL_stack_base + POPMARK + 1; 4780 SV ** const firstrelem = lastlelem + 1; 4781 I32 is_something_there = FALSE; 4782 4783 const I32 max = lastrelem - lastlelem; 4784 SV **lelem; 4785 4786 if (GIMME != G_ARRAY) { 4787 I32 ix = SvIV(*lastlelem); 4788 if (ix < 0) 4789 ix += max; 4790 if (ix < 0 || ix >= max) 4791 *firstlelem = &PL_sv_undef; 4792 else 4793 *firstlelem = firstrelem[ix]; 4794 SP = firstlelem; 4795 RETURN; 4796 } 4797 4798 if (max == 0) { 4799 SP = firstlelem - 1; 4800 RETURN; 4801 } 4802 4803 for (lelem = firstlelem; lelem <= lastlelem; lelem++) { 4804 I32 ix = SvIV(*lelem); 4805 if (ix < 0) 4806 ix += max; 4807 if (ix < 0 || ix >= max) 4808 *lelem = &PL_sv_undef; 4809 else { 4810 is_something_there = TRUE; 4811 if (!(*lelem = firstrelem[ix])) 4812 *lelem = &PL_sv_undef; 4813 } 4814 } 4815 if (is_something_there) 4816 SP = lastlelem; 4817 else 4818 SP = firstlelem - 1; 4819 RETURN; 4820 } 4821 4822 PP(pp_anonlist) 4823 { 4824 dVAR; dSP; dMARK; dORIGMARK; 4825 const I32 items = SP - MARK; 4826 SV * const av = MUTABLE_SV(av_make(items, MARK+1)); 4827 SP = ORIGMARK; /* av_make() might realloc stack_sp */ 4828 mXPUSHs((PL_op->op_flags & OPf_SPECIAL) 4829 ? newRV_noinc(av) : av); 4830 RETURN; 4831 } 4832 4833 PP(pp_anonhash) 4834 { 4835 dVAR; dSP; dMARK; dORIGMARK; 4836 HV* const hv = (HV *)sv_2mortal((SV *)newHV()); 4837 4838 while (MARK < SP) { 4839 SV * const key = 4840 (MARK++, SvGMAGICAL(*MARK) ? sv_mortalcopy(*MARK) : *MARK); 4841 SV *val; 4842 if (MARK < SP) 4843 { 4844 MARK++; 4845 SvGETMAGIC(*MARK); 4846 val = newSV(0); 4847 sv_setsv(val, *MARK); 4848 } 4849 else 4850 { 4851 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash"); 4852 val = newSV(0); 4853 } 4854 (void)hv_store_ent(hv,key,val,0); 4855 } 4856 SP = ORIGMARK; 4857 if (PL_op->op_flags & OPf_SPECIAL) 4858 mXPUSHs(newRV_inc(MUTABLE_SV(hv))); 4859 else XPUSHs(MUTABLE_SV(hv)); 4860 RETURN; 4861 } 4862 4863 static AV * 4864 S_deref_plain_array(pTHX_ AV *ary) 4865 { 4866 if (SvTYPE(ary) == SVt_PVAV) return ary; 4867 SvGETMAGIC((SV *)ary); 4868 if (!SvROK(ary) || SvTYPE(SvRV(ary)) != SVt_PVAV) 4869 Perl_die(aTHX_ "Not an ARRAY reference"); 4870 else if (SvOBJECT(SvRV(ary))) 4871 Perl_die(aTHX_ "Not an unblessed ARRAY reference"); 4872 return (AV *)SvRV(ary); 4873 } 4874 4875 #if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) 4876 # define DEREF_PLAIN_ARRAY(ary) \ 4877 ({ \ 4878 AV *aRrRay = ary; \ 4879 SvTYPE(aRrRay) == SVt_PVAV \ 4880 ? aRrRay \ 4881 : S_deref_plain_array(aTHX_ aRrRay); \ 4882 }) 4883 #else 4884 # define DEREF_PLAIN_ARRAY(ary) \ 4885 ( \ 4886 PL_Sv = (SV *)(ary), \ 4887 SvTYPE(PL_Sv) == SVt_PVAV \ 4888 ? (AV *)PL_Sv \ 4889 : S_deref_plain_array(aTHX_ (AV *)PL_Sv) \ 4890 ) 4891 #endif 4892 4893 PP(pp_splice) 4894 { 4895 dVAR; dSP; dMARK; dORIGMARK; 4896 int num_args = (SP - MARK); 4897 AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK)); 4898 SV **src; 4899 SV **dst; 4900 I32 i; 4901 I32 offset; 4902 I32 length; 4903 I32 newlen; 4904 I32 after; 4905 I32 diff; 4906 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied); 4907 4908 if (mg) { 4909 return Perl_tied_method(aTHX_ "SPLICE", mark - 1, MUTABLE_SV(ary), mg, 4910 GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK, 4911 sp - mark); 4912 } 4913 4914 SP++; 4915 4916 if (++MARK < SP) { 4917 offset = i = SvIV(*MARK); 4918 if (offset < 0) 4919 offset += AvFILLp(ary) + 1; 4920 if (offset < 0) 4921 DIE(aTHX_ PL_no_aelem, i); 4922 if (++MARK < SP) { 4923 length = SvIVx(*MARK++); 4924 if (length < 0) { 4925 length += AvFILLp(ary) - offset + 1; 4926 if (length < 0) 4927 length = 0; 4928 } 4929 } 4930 else 4931 length = AvMAX(ary) + 1; /* close enough to infinity */ 4932 } 4933 else { 4934 offset = 0; 4935 length = AvMAX(ary) + 1; 4936 } 4937 if (offset > AvFILLp(ary) + 1) { 4938 if (num_args > 2) 4939 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" ); 4940 offset = AvFILLp(ary) + 1; 4941 } 4942 after = AvFILLp(ary) + 1 - (offset + length); 4943 if (after < 0) { /* not that much array */ 4944 length += after; /* offset+length now in array */ 4945 after = 0; 4946 if (!AvALLOC(ary)) 4947 av_extend(ary, 0); 4948 } 4949 4950 /* At this point, MARK .. SP-1 is our new LIST */ 4951 4952 newlen = SP - MARK; 4953 diff = newlen - length; 4954 if (newlen && !AvREAL(ary) && AvREIFY(ary)) 4955 av_reify(ary); 4956 4957 /* make new elements SVs now: avoid problems if they're from the array */ 4958 for (dst = MARK, i = newlen; i; i--) { 4959 SV * const h = *dst; 4960 *dst++ = newSVsv(h); 4961 } 4962 4963 if (diff < 0) { /* shrinking the area */ 4964 SV **tmparyval = NULL; 4965 if (newlen) { 4966 Newx(tmparyval, newlen, SV*); /* so remember insertion */ 4967 Copy(MARK, tmparyval, newlen, SV*); 4968 } 4969 4970 MARK = ORIGMARK + 1; 4971 if (GIMME == G_ARRAY) { /* copy return vals to stack */ 4972 MEXTEND(MARK, length); 4973 Copy(AvARRAY(ary)+offset, MARK, length, SV*); 4974 if (AvREAL(ary)) { 4975 EXTEND_MORTAL(length); 4976 for (i = length, dst = MARK; i; i--) { 4977 sv_2mortal(*dst); /* free them eventually */ 4978 dst++; 4979 } 4980 } 4981 MARK += length - 1; 4982 } 4983 else { 4984 *MARK = AvARRAY(ary)[offset+length-1]; 4985 if (AvREAL(ary)) { 4986 sv_2mortal(*MARK); 4987 for (i = length - 1, dst = &AvARRAY(ary)[offset]; i > 0; i--) 4988 SvREFCNT_dec(*dst++); /* free them now */ 4989 } 4990 } 4991 AvFILLp(ary) += diff; 4992 4993 /* pull up or down? */ 4994 4995 if (offset < after) { /* easier to pull up */ 4996 if (offset) { /* esp. if nothing to pull */ 4997 src = &AvARRAY(ary)[offset-1]; 4998 dst = src - diff; /* diff is negative */ 4999 for (i = offset; i > 0; i--) /* can't trust Copy */ 5000 *dst-- = *src--; 5001 } 5002 dst = AvARRAY(ary); 5003 AvARRAY(ary) = AvARRAY(ary) - diff; /* diff is negative */ 5004 AvMAX(ary) += diff; 5005 } 5006 else { 5007 if (after) { /* anything to pull down? */ 5008 src = AvARRAY(ary) + offset + length; 5009 dst = src + diff; /* diff is negative */ 5010 Move(src, dst, after, SV*); 5011 } 5012 dst = &AvARRAY(ary)[AvFILLp(ary)+1]; 5013 /* avoid later double free */ 5014 } 5015 i = -diff; 5016 while (i) 5017 dst[--i] = &PL_sv_undef; 5018 5019 if (newlen) { 5020 Copy( tmparyval, AvARRAY(ary) + offset, newlen, SV* ); 5021 Safefree(tmparyval); 5022 } 5023 } 5024 else { /* no, expanding (or same) */ 5025 SV** tmparyval = NULL; 5026 if (length) { 5027 Newx(tmparyval, length, SV*); /* so remember deletion */ 5028 Copy(AvARRAY(ary)+offset, tmparyval, length, SV*); 5029 } 5030 5031 if (diff > 0) { /* expanding */ 5032 /* push up or down? */ 5033 if (offset < after && diff <= AvARRAY(ary) - AvALLOC(ary)) { 5034 if (offset) { 5035 src = AvARRAY(ary); 5036 dst = src - diff; 5037 Move(src, dst, offset, SV*); 5038 } 5039 AvARRAY(ary) = AvARRAY(ary) - diff;/* diff is positive */ 5040 AvMAX(ary) += diff; 5041 AvFILLp(ary) += diff; 5042 } 5043 else { 5044 if (AvFILLp(ary) + diff >= AvMAX(ary)) /* oh, well */ 5045 av_extend(ary, AvFILLp(ary) + diff); 5046 AvFILLp(ary) += diff; 5047 5048 if (after) { 5049 dst = AvARRAY(ary) + AvFILLp(ary); 5050 src = dst - diff; 5051 for (i = after; i; i--) { 5052 *dst-- = *src--; 5053 } 5054 } 5055 } 5056 } 5057 5058 if (newlen) { 5059 Copy( MARK, AvARRAY(ary) + offset, newlen, SV* ); 5060 } 5061 5062 MARK = ORIGMARK + 1; 5063 if (GIMME == G_ARRAY) { /* copy return vals to stack */ 5064 if (length) { 5065 Copy(tmparyval, MARK, length, SV*); 5066 if (AvREAL(ary)) { 5067 EXTEND_MORTAL(length); 5068 for (i = length, dst = MARK; i; i--) { 5069 sv_2mortal(*dst); /* free them eventually */ 5070 dst++; 5071 } 5072 } 5073 } 5074 MARK += length - 1; 5075 } 5076 else if (length--) { 5077 *MARK = tmparyval[length]; 5078 if (AvREAL(ary)) { 5079 sv_2mortal(*MARK); 5080 while (length-- > 0) 5081 SvREFCNT_dec(tmparyval[length]); 5082 } 5083 } 5084 else 5085 *MARK = &PL_sv_undef; 5086 Safefree(tmparyval); 5087 } 5088 5089 if (SvMAGICAL(ary)) 5090 mg_set(MUTABLE_SV(ary)); 5091 5092 SP = MARK; 5093 RETURN; 5094 } 5095 5096 PP(pp_push) 5097 { 5098 dVAR; dSP; dMARK; dORIGMARK; dTARGET; 5099 AV * const ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK)); 5100 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied); 5101 5102 if (mg) { 5103 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg); 5104 PUSHMARK(MARK); 5105 PUTBACK; 5106 ENTER_with_name("call_PUSH"); 5107 call_method("PUSH",G_SCALAR|G_DISCARD); 5108 LEAVE_with_name("call_PUSH"); 5109 SPAGAIN; 5110 } 5111 else { 5112 if (SvREADONLY(ary) && MARK < SP) Perl_croak_no_modify(); 5113 PL_delaymagic = DM_DELAY; 5114 for (++MARK; MARK <= SP; MARK++) { 5115 SV *sv; 5116 if (*MARK) SvGETMAGIC(*MARK); 5117 sv = newSV(0); 5118 if (*MARK) 5119 sv_setsv_nomg(sv, *MARK); 5120 av_store(ary, AvFILLp(ary)+1, sv); 5121 } 5122 if (PL_delaymagic & DM_ARRAY_ISA) 5123 mg_set(MUTABLE_SV(ary)); 5124 5125 PL_delaymagic = 0; 5126 } 5127 SP = ORIGMARK; 5128 if (OP_GIMME(PL_op, 0) != G_VOID) { 5129 PUSHi( AvFILL(ary) + 1 ); 5130 } 5131 RETURN; 5132 } 5133 5134 PP(pp_shift) 5135 { 5136 dVAR; 5137 dSP; 5138 AV * const av = PL_op->op_flags & OPf_SPECIAL 5139 ? MUTABLE_AV(GvAV(PL_defgv)) : DEREF_PLAIN_ARRAY(MUTABLE_AV(POPs)); 5140 SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av); 5141 EXTEND(SP, 1); 5142 assert (sv); 5143 if (AvREAL(av)) 5144 (void)sv_2mortal(sv); 5145 PUSHs(sv); 5146 RETURN; 5147 } 5148 5149 PP(pp_unshift) 5150 { 5151 dVAR; dSP; dMARK; dORIGMARK; dTARGET; 5152 AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK)); 5153 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied); 5154 5155 if (mg) { 5156 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg); 5157 PUSHMARK(MARK); 5158 PUTBACK; 5159 ENTER_with_name("call_UNSHIFT"); 5160 call_method("UNSHIFT",G_SCALAR|G_DISCARD); 5161 LEAVE_with_name("call_UNSHIFT"); 5162 SPAGAIN; 5163 } 5164 else { 5165 I32 i = 0; 5166 av_unshift(ary, SP - MARK); 5167 while (MARK < SP) { 5168 SV * const sv = newSVsv(*++MARK); 5169 (void)av_store(ary, i++, sv); 5170 } 5171 } 5172 SP = ORIGMARK; 5173 if (OP_GIMME(PL_op, 0) != G_VOID) { 5174 PUSHi( AvFILL(ary) + 1 ); 5175 } 5176 RETURN; 5177 } 5178 5179 PP(pp_reverse) 5180 { 5181 dVAR; dSP; dMARK; 5182 5183 if (GIMME == G_ARRAY) { 5184 if (PL_op->op_private & OPpREVERSE_INPLACE) { 5185 AV *av; 5186 5187 /* See pp_sort() */ 5188 assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV); 5189 (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */ 5190 av = MUTABLE_AV((*SP)); 5191 /* In-place reversing only happens in void context for the array 5192 * assignment. We don't need to push anything on the stack. */ 5193 SP = MARK; 5194 5195 if (SvMAGICAL(av)) { 5196 I32 i, j; 5197 SV *tmp = sv_newmortal(); 5198 /* For SvCANEXISTDELETE */ 5199 HV *stash; 5200 const MAGIC *mg; 5201 bool can_preserve = SvCANEXISTDELETE(av); 5202 5203 for (i = 0, j = av_len(av); i < j; ++i, --j) { 5204 SV *begin, *end; 5205 5206 if (can_preserve) { 5207 if (!av_exists(av, i)) { 5208 if (av_exists(av, j)) { 5209 SV *sv = av_delete(av, j, 0); 5210 begin = *av_fetch(av, i, TRUE); 5211 sv_setsv_mg(begin, sv); 5212 } 5213 continue; 5214 } 5215 else if (!av_exists(av, j)) { 5216 SV *sv = av_delete(av, i, 0); 5217 end = *av_fetch(av, j, TRUE); 5218 sv_setsv_mg(end, sv); 5219 continue; 5220 } 5221 } 5222 5223 begin = *av_fetch(av, i, TRUE); 5224 end = *av_fetch(av, j, TRUE); 5225 sv_setsv(tmp, begin); 5226 sv_setsv_mg(begin, end); 5227 sv_setsv_mg(end, tmp); 5228 } 5229 } 5230 else { 5231 SV **begin = AvARRAY(av); 5232 5233 if (begin) { 5234 SV **end = begin + AvFILLp(av); 5235 5236 while (begin < end) { 5237 SV * const tmp = *begin; 5238 *begin++ = *end; 5239 *end-- = tmp; 5240 } 5241 } 5242 } 5243 } 5244 else { 5245 SV **oldsp = SP; 5246 MARK++; 5247 while (MARK < SP) { 5248 SV * const tmp = *MARK; 5249 *MARK++ = *SP; 5250 *SP-- = tmp; 5251 } 5252 /* safe as long as stack cannot get extended in the above */ 5253 SP = oldsp; 5254 } 5255 } 5256 else { 5257 char *up; 5258 char *down; 5259 I32 tmp; 5260 dTARGET; 5261 STRLEN len; 5262 5263 SvUTF8_off(TARG); /* decontaminate */ 5264 if (SP - MARK > 1) 5265 do_join(TARG, &PL_sv_no, MARK, SP); 5266 else { 5267 sv_setsv(TARG, SP > MARK ? *SP : find_rundefsv()); 5268 if (! SvOK(TARG) && ckWARN(WARN_UNINITIALIZED)) 5269 report_uninit(TARG); 5270 } 5271 5272 up = SvPV_force(TARG, len); 5273 if (len > 1) { 5274 if (DO_UTF8(TARG)) { /* first reverse each character */ 5275 U8* s = (U8*)SvPVX(TARG); 5276 const U8* send = (U8*)(s + len); 5277 while (s < send) { 5278 if (UTF8_IS_INVARIANT(*s)) { 5279 s++; 5280 continue; 5281 } 5282 else { 5283 if (!utf8_to_uvchr_buf(s, send, 0)) 5284 break; 5285 up = (char*)s; 5286 s += UTF8SKIP(s); 5287 down = (char*)(s - 1); 5288 /* reverse this character */ 5289 while (down > up) { 5290 tmp = *up; 5291 *up++ = *down; 5292 *down-- = (char)tmp; 5293 } 5294 } 5295 } 5296 up = SvPVX(TARG); 5297 } 5298 down = SvPVX(TARG) + len - 1; 5299 while (down > up) { 5300 tmp = *up; 5301 *up++ = *down; 5302 *down-- = (char)tmp; 5303 } 5304 (void)SvPOK_only_UTF8(TARG); 5305 } 5306 SP = MARK + 1; 5307 SETTARG; 5308 } 5309 RETURN; 5310 } 5311 5312 PP(pp_split) 5313 { 5314 dVAR; dSP; dTARG; 5315 AV *ary; 5316 IV limit = POPi; /* note, negative is forever */ 5317 SV * const sv = POPs; 5318 STRLEN len; 5319 const char *s = SvPV_const(sv, len); 5320 const bool do_utf8 = DO_UTF8(sv); 5321 const char *strend = s + len; 5322 PMOP *pm; 5323 REGEXP *rx; 5324 SV *dstr; 5325 const char *m; 5326 I32 iters = 0; 5327 const STRLEN slen = do_utf8 5328 ? utf8_length((U8*)s, (U8*)strend) 5329 : (STRLEN)(strend - s); 5330 I32 maxiters = slen + 10; 5331 I32 trailing_empty = 0; 5332 const char *orig; 5333 const I32 origlimit = limit; 5334 I32 realarray = 0; 5335 I32 base; 5336 const I32 gimme = GIMME_V; 5337 bool gimme_scalar; 5338 const I32 oldsave = PL_savestack_ix; 5339 U32 make_mortal = SVs_TEMP; 5340 bool multiline = 0; 5341 MAGIC *mg = NULL; 5342 5343 #ifdef DEBUGGING 5344 Copy(&LvTARGOFF(POPs), &pm, 1, PMOP*); 5345 #else 5346 pm = (PMOP*)POPs; 5347 #endif 5348 if (!pm || !s) 5349 DIE(aTHX_ "panic: pp_split, pm=%p, s=%p", pm, s); 5350 rx = PM_GETRE(pm); 5351 5352 TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET && 5353 (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE))); 5354 5355 RX_MATCH_UTF8_set(rx, do_utf8); 5356 5357 #ifdef USE_ITHREADS 5358 if (pm->op_pmreplrootu.op_pmtargetoff) { 5359 ary = GvAVn(MUTABLE_GV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff))); 5360 } 5361 #else 5362 if (pm->op_pmreplrootu.op_pmtargetgv) { 5363 ary = GvAVn(pm->op_pmreplrootu.op_pmtargetgv); 5364 } 5365 #endif 5366 else 5367 ary = NULL; 5368 if (ary) { 5369 realarray = 1; 5370 PUTBACK; 5371 av_extend(ary,0); 5372 av_clear(ary); 5373 SPAGAIN; 5374 if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) { 5375 PUSHMARK(SP); 5376 XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg)); 5377 } 5378 else { 5379 if (!AvREAL(ary)) { 5380 I32 i; 5381 AvREAL_on(ary); 5382 AvREIFY_off(ary); 5383 for (i = AvFILLp(ary); i >= 0; i--) 5384 AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */ 5385 } 5386 /* temporarily switch stacks */ 5387 SAVESWITCHSTACK(PL_curstack, ary); 5388 make_mortal = 0; 5389 } 5390 } 5391 base = SP - PL_stack_base; 5392 orig = s; 5393 if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) { 5394 if (do_utf8) { 5395 while (isSPACE_utf8(s)) 5396 s += UTF8SKIP(s); 5397 } 5398 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { 5399 while (isSPACE_LC(*s)) 5400 s++; 5401 } 5402 else { 5403 while (isSPACE(*s)) 5404 s++; 5405 } 5406 } 5407 if (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) { 5408 multiline = 1; 5409 } 5410 5411 gimme_scalar = gimme == G_SCALAR && !ary; 5412 5413 if (!limit) 5414 limit = maxiters + 2; 5415 if (RX_EXTFLAGS(rx) & RXf_WHITE) { 5416 while (--limit) { 5417 m = s; 5418 /* this one uses 'm' and is a negative test */ 5419 if (do_utf8) { 5420 while (m < strend && ! isSPACE_utf8(m) ) { 5421 const int t = UTF8SKIP(m); 5422 /* isSPACE_utf8 returns FALSE for malform utf8 */ 5423 if (strend - m < t) 5424 m = strend; 5425 else 5426 m += t; 5427 } 5428 } 5429 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) 5430 { 5431 while (m < strend && !isSPACE_LC(*m)) 5432 ++m; 5433 } else { 5434 while (m < strend && !isSPACE(*m)) 5435 ++m; 5436 } 5437 if (m >= strend) 5438 break; 5439 5440 if (gimme_scalar) { 5441 iters++; 5442 if (m-s == 0) 5443 trailing_empty++; 5444 else 5445 trailing_empty = 0; 5446 } else { 5447 dstr = newSVpvn_flags(s, m-s, 5448 (do_utf8 ? SVf_UTF8 : 0) | make_mortal); 5449 XPUSHs(dstr); 5450 } 5451 5452 /* skip the whitespace found last */ 5453 if (do_utf8) 5454 s = m + UTF8SKIP(m); 5455 else 5456 s = m + 1; 5457 5458 /* this one uses 's' and is a positive test */ 5459 if (do_utf8) { 5460 while (s < strend && isSPACE_utf8(s) ) 5461 s += UTF8SKIP(s); 5462 } 5463 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) 5464 { 5465 while (s < strend && isSPACE_LC(*s)) 5466 ++s; 5467 } else { 5468 while (s < strend && isSPACE(*s)) 5469 ++s; 5470 } 5471 } 5472 } 5473 else if (RX_EXTFLAGS(rx) & RXf_START_ONLY) { 5474 while (--limit) { 5475 for (m = s; m < strend && *m != '\n'; m++) 5476 ; 5477 m++; 5478 if (m >= strend) 5479 break; 5480 5481 if (gimme_scalar) { 5482 iters++; 5483 if (m-s == 0) 5484 trailing_empty++; 5485 else 5486 trailing_empty = 0; 5487 } else { 5488 dstr = newSVpvn_flags(s, m-s, 5489 (do_utf8 ? SVf_UTF8 : 0) | make_mortal); 5490 XPUSHs(dstr); 5491 } 5492 s = m; 5493 } 5494 } 5495 else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) { 5496 /* 5497 Pre-extend the stack, either the number of bytes or 5498 characters in the string or a limited amount, triggered by: 5499 5500 my ($x, $y) = split //, $str; 5501 or 5502 split //, $str, $i; 5503 */ 5504 if (!gimme_scalar) { 5505 const U32 items = limit - 1; 5506 if (items < slen) 5507 EXTEND(SP, items); 5508 else 5509 EXTEND(SP, slen); 5510 } 5511 5512 if (do_utf8) { 5513 while (--limit) { 5514 /* keep track of how many bytes we skip over */ 5515 m = s; 5516 s += UTF8SKIP(s); 5517 if (gimme_scalar) { 5518 iters++; 5519 if (s-m == 0) 5520 trailing_empty++; 5521 else 5522 trailing_empty = 0; 5523 } else { 5524 dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal); 5525 5526 PUSHs(dstr); 5527 } 5528 5529 if (s >= strend) 5530 break; 5531 } 5532 } else { 5533 while (--limit) { 5534 if (gimme_scalar) { 5535 iters++; 5536 } else { 5537 dstr = newSVpvn(s, 1); 5538 5539 5540 if (make_mortal) 5541 sv_2mortal(dstr); 5542 5543 PUSHs(dstr); 5544 } 5545 5546 s++; 5547 5548 if (s >= strend) 5549 break; 5550 } 5551 } 5552 } 5553 else if (do_utf8 == (RX_UTF8(rx) != 0) && 5554 (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx) 5555 && (RX_EXTFLAGS(rx) & RXf_CHECK_ALL) 5556 && !(RX_EXTFLAGS(rx) & RXf_ANCH)) { 5557 const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL); 5558 SV * const csv = CALLREG_INTUIT_STRING(rx); 5559 5560 len = RX_MINLENRET(rx); 5561 if (len == 1 && !RX_UTF8(rx) && !tail) { 5562 const char c = *SvPV_nolen_const(csv); 5563 while (--limit) { 5564 for (m = s; m < strend && *m != c; m++) 5565 ; 5566 if (m >= strend) 5567 break; 5568 if (gimme_scalar) { 5569 iters++; 5570 if (m-s == 0) 5571 trailing_empty++; 5572 else 5573 trailing_empty = 0; 5574 } else { 5575 dstr = newSVpvn_flags(s, m-s, 5576 (do_utf8 ? SVf_UTF8 : 0) | make_mortal); 5577 XPUSHs(dstr); 5578 } 5579 /* The rx->minlen is in characters but we want to step 5580 * s ahead by bytes. */ 5581 if (do_utf8) 5582 s = (char*)utf8_hop((U8*)m, len); 5583 else 5584 s = m + len; /* Fake \n at the end */ 5585 } 5586 } 5587 else { 5588 while (s < strend && --limit && 5589 (m = fbm_instr((unsigned char*)s, (unsigned char*)strend, 5590 csv, multiline ? FBMrf_MULTILINE : 0)) ) 5591 { 5592 if (gimme_scalar) { 5593 iters++; 5594 if (m-s == 0) 5595 trailing_empty++; 5596 else 5597 trailing_empty = 0; 5598 } else { 5599 dstr = newSVpvn_flags(s, m-s, 5600 (do_utf8 ? SVf_UTF8 : 0) | make_mortal); 5601 XPUSHs(dstr); 5602 } 5603 /* The rx->minlen is in characters but we want to step 5604 * s ahead by bytes. */ 5605 if (do_utf8) 5606 s = (char*)utf8_hop((U8*)m, len); 5607 else 5608 s = m + len; /* Fake \n at the end */ 5609 } 5610 } 5611 } 5612 else { 5613 maxiters += slen * RX_NPARENS(rx); 5614 while (s < strend && --limit) 5615 { 5616 I32 rex_return; 5617 PUTBACK; 5618 rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1, 5619 sv, NULL, 0); 5620 SPAGAIN; 5621 if (rex_return == 0) 5622 break; 5623 TAINT_IF(RX_MATCH_TAINTED(rx)); 5624 /* we never pass the REXEC_COPY_STR flag, so it should 5625 * never get copied */ 5626 assert(!RX_MATCH_COPIED(rx)); 5627 m = RX_OFFS(rx)[0].start + orig; 5628 5629 if (gimme_scalar) { 5630 iters++; 5631 if (m-s == 0) 5632 trailing_empty++; 5633 else 5634 trailing_empty = 0; 5635 } else { 5636 dstr = newSVpvn_flags(s, m-s, 5637 (do_utf8 ? SVf_UTF8 : 0) | make_mortal); 5638 XPUSHs(dstr); 5639 } 5640 if (RX_NPARENS(rx)) { 5641 I32 i; 5642 for (i = 1; i <= (I32)RX_NPARENS(rx); i++) { 5643 s = RX_OFFS(rx)[i].start + orig; 5644 m = RX_OFFS(rx)[i].end + orig; 5645 5646 /* japhy (07/27/01) -- the (m && s) test doesn't catch 5647 parens that didn't match -- they should be set to 5648 undef, not the empty string */ 5649 if (gimme_scalar) { 5650 iters++; 5651 if (m-s == 0) 5652 trailing_empty++; 5653 else 5654 trailing_empty = 0; 5655 } else { 5656 if (m >= orig && s >= orig) { 5657 dstr = newSVpvn_flags(s, m-s, 5658 (do_utf8 ? SVf_UTF8 : 0) 5659 | make_mortal); 5660 } 5661 else 5662 dstr = &PL_sv_undef; /* undef, not "" */ 5663 XPUSHs(dstr); 5664 } 5665 5666 } 5667 } 5668 s = RX_OFFS(rx)[0].end + orig; 5669 } 5670 } 5671 5672 if (!gimme_scalar) { 5673 iters = (SP - PL_stack_base) - base; 5674 } 5675 if (iters > maxiters) 5676 DIE(aTHX_ "Split loop"); 5677 5678 /* keep field after final delim? */ 5679 if (s < strend || (iters && origlimit)) { 5680 if (!gimme_scalar) { 5681 const STRLEN l = strend - s; 5682 dstr = newSVpvn_flags(s, l, (do_utf8 ? SVf_UTF8 : 0) | make_mortal); 5683 XPUSHs(dstr); 5684 } 5685 iters++; 5686 } 5687 else if (!origlimit) { 5688 if (gimme_scalar) { 5689 iters -= trailing_empty; 5690 } else { 5691 while (iters > 0 && (!TOPs || !SvANY(TOPs) || SvCUR(TOPs) == 0)) { 5692 if (TOPs && !make_mortal) 5693 sv_2mortal(TOPs); 5694 *SP-- = &PL_sv_undef; 5695 iters--; 5696 } 5697 } 5698 } 5699 5700 PUTBACK; 5701 LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */ 5702 SPAGAIN; 5703 if (realarray) { 5704 if (!mg) { 5705 if (SvSMAGICAL(ary)) { 5706 PUTBACK; 5707 mg_set(MUTABLE_SV(ary)); 5708 SPAGAIN; 5709 } 5710 if (gimme == G_ARRAY) { 5711 EXTEND(SP, iters); 5712 Copy(AvARRAY(ary), SP + 1, iters, SV*); 5713 SP += iters; 5714 RETURN; 5715 } 5716 } 5717 else { 5718 PUTBACK; 5719 ENTER_with_name("call_PUSH"); 5720 call_method("PUSH",G_SCALAR|G_DISCARD); 5721 LEAVE_with_name("call_PUSH"); 5722 SPAGAIN; 5723 if (gimme == G_ARRAY) { 5724 I32 i; 5725 /* EXTEND should not be needed - we just popped them */ 5726 EXTEND(SP, iters); 5727 for (i=0; i < iters; i++) { 5728 SV **svp = av_fetch(ary, i, FALSE); 5729 PUSHs((svp) ? *svp : &PL_sv_undef); 5730 } 5731 RETURN; 5732 } 5733 } 5734 } 5735 else { 5736 if (gimme == G_ARRAY) 5737 RETURN; 5738 } 5739 5740 GETTARGET; 5741 PUSHi(iters); 5742 RETURN; 5743 } 5744 5745 PP(pp_once) 5746 { 5747 dSP; 5748 SV *const sv = PAD_SVl(PL_op->op_targ); 5749 5750 if (SvPADSTALE(sv)) { 5751 /* First time. */ 5752 SvPADSTALE_off(sv); 5753 RETURNOP(cLOGOP->op_other); 5754 } 5755 RETURNOP(cLOGOP->op_next); 5756 } 5757 5758 PP(pp_lock) 5759 { 5760 dVAR; 5761 dSP; 5762 dTOPss; 5763 SV *retsv = sv; 5764 SvLOCK(sv); 5765 if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV 5766 || SvTYPE(retsv) == SVt_PVCV) { 5767 retsv = refto(retsv); 5768 } 5769 SETs(retsv); 5770 RETURN; 5771 } 5772 5773 5774 PP(unimplemented_op) 5775 { 5776 dVAR; 5777 const Optype op_type = PL_op->op_type; 5778 /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope 5779 with out of range op numbers - it only "special" cases op_custom. 5780 Secondly, as the three ops we "panic" on are padmy, mapstart and custom, 5781 if we get here for a custom op then that means that the custom op didn't 5782 have an implementation. Given that OP_NAME() looks up the custom op 5783 by its pp_addr, likely it will return NULL, unless someone (unhelpfully) 5784 registers &PL_unimplemented_op as the address of their custom op. 5785 NULL doesn't generate a useful error message. "custom" does. */ 5786 const char *const name = op_type >= OP_max 5787 ? "[out of range]" : PL_op_name[PL_op->op_type]; 5788 if(OP_IS_SOCKET(op_type)) 5789 DIE(aTHX_ PL_no_sock_func, name); 5790 DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type); 5791 } 5792 5793 /* For sorting out arguments passed to a &CORE:: subroutine */ 5794 PP(pp_coreargs) 5795 { 5796 dSP; 5797 int opnum = SvIOK(cSVOP_sv) ? (int)SvUV(cSVOP_sv) : 0; 5798 int defgv = PL_opargs[opnum] & OA_DEFGV ||opnum==OP_GLOB, whicharg = 0; 5799 AV * const at_ = GvAV(PL_defgv); 5800 SV **svp = at_ ? AvARRAY(at_) : NULL; 5801 I32 minargs = 0, maxargs = 0, numargs = at_ ? AvFILLp(at_)+1 : 0; 5802 I32 oa = opnum ? PL_opargs[opnum] >> OASHIFT : 0; 5803 bool seen_question = 0; 5804 const char *err = NULL; 5805 const bool pushmark = PL_op->op_private & OPpCOREARGS_PUSHMARK; 5806 5807 /* Count how many args there are first, to get some idea how far to 5808 extend the stack. */ 5809 while (oa) { 5810 if ((oa & 7) == OA_LIST) { maxargs = I32_MAX; break; } 5811 maxargs++; 5812 if (oa & OA_OPTIONAL) seen_question = 1; 5813 if (!seen_question) minargs++; 5814 oa >>= 4; 5815 } 5816 5817 if(numargs < minargs) err = "Not enough"; 5818 else if(numargs > maxargs) err = "Too many"; 5819 if (err) 5820 /* diag_listed_as: Too many arguments for %s */ 5821 Perl_croak(aTHX_ 5822 "%s arguments for %s", err, 5823 opnum ? PL_op_desc[opnum] : SvPV_nolen_const(cSVOP_sv) 5824 ); 5825 5826 /* Reset the stack pointer. Without this, we end up returning our own 5827 arguments in list context, in addition to the values we are supposed 5828 to return. nextstate usually does this on sub entry, but we need 5829 to run the next op with the caller's hints, so we cannot have a 5830 nextstate. */ 5831 SP = PL_stack_base + cxstack[cxstack_ix].blk_oldsp; 5832 5833 if(!maxargs) RETURN; 5834 5835 /* We do this here, rather than with a separate pushmark op, as it has 5836 to come in between two things this function does (stack reset and 5837 arg pushing). This seems the easiest way to do it. */ 5838 if (pushmark) { 5839 PUTBACK; 5840 (void)Perl_pp_pushmark(aTHX); 5841 } 5842 5843 EXTEND(SP, maxargs == I32_MAX ? numargs : maxargs); 5844 PUTBACK; /* The code below can die in various places. */ 5845 5846 oa = PL_opargs[opnum] >> OASHIFT; 5847 for (; oa&&(numargs||!pushmark); (void)(numargs&&(++svp,--numargs))) { 5848 whicharg++; 5849 switch (oa & 7) { 5850 case OA_SCALAR: 5851 try_defsv: 5852 if (!numargs && defgv && whicharg == minargs + 1) { 5853 PUSHs(find_rundefsv2( 5854 find_runcv_where(FIND_RUNCV_level_eq, 1, NULL), 5855 cxstack[cxstack_ix].blk_oldcop->cop_seq 5856 )); 5857 } 5858 else PUSHs(numargs ? svp && *svp ? *svp : &PL_sv_undef : NULL); 5859 break; 5860 case OA_LIST: 5861 while (numargs--) { 5862 PUSHs(svp && *svp ? *svp : &PL_sv_undef); 5863 svp++; 5864 } 5865 RETURN; 5866 case OA_HVREF: 5867 if (!svp || !*svp || !SvROK(*svp) 5868 || SvTYPE(SvRV(*svp)) != SVt_PVHV) 5869 DIE(aTHX_ 5870 /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/ 5871 "Type of arg %d to &CORE::%s must be hash reference", 5872 whicharg, OP_DESC(PL_op->op_next) 5873 ); 5874 PUSHs(SvRV(*svp)); 5875 break; 5876 case OA_FILEREF: 5877 if (!numargs) PUSHs(NULL); 5878 else if(svp && *svp && SvROK(*svp) && isGV_with_GP(SvRV(*svp))) 5879 /* no magic here, as the prototype will have added an extra 5880 refgen and we just want what was there before that */ 5881 PUSHs(SvRV(*svp)); 5882 else { 5883 const bool constr = PL_op->op_private & whicharg; 5884 PUSHs(S_rv2gv(aTHX_ 5885 svp && *svp ? *svp : &PL_sv_undef, 5886 constr, CopHINTS_get(PL_curcop) & HINT_STRICT_REFS, 5887 !constr 5888 )); 5889 } 5890 break; 5891 case OA_SCALARREF: 5892 if (!numargs) goto try_defsv; 5893 else { 5894 const bool wantscalar = 5895 PL_op->op_private & OPpCOREARGS_SCALARMOD; 5896 if (!svp || !*svp || !SvROK(*svp) 5897 /* We have to permit globrefs even for the \$ proto, as 5898 *foo is indistinguishable from ${\*foo}, and the proto- 5899 type permits the latter. */ 5900 || SvTYPE(SvRV(*svp)) > ( 5901 wantscalar ? SVt_PVLV 5902 : opnum == OP_LOCK || opnum == OP_UNDEF 5903 ? SVt_PVCV 5904 : SVt_PVHV 5905 ) 5906 ) 5907 DIE(aTHX_ 5908 /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/ 5909 "Type of arg %d to &CORE::%s must be %s", 5910 whicharg, PL_op_name[opnum], 5911 wantscalar 5912 ? "scalar reference" 5913 : opnum == OP_LOCK || opnum == OP_UNDEF 5914 ? "reference to one of [$@%&*]" 5915 : "reference to one of [$@%*]" 5916 ); 5917 PUSHs(SvRV(*svp)); 5918 if (opnum == OP_UNDEF && SvRV(*svp) == (SV *)PL_defgv 5919 && cxstack[cxstack_ix].cx_type & CXp_HASARGS) { 5920 /* Undo @_ localisation, so that sub exit does not undo 5921 part of our undeffing. */ 5922 PERL_CONTEXT *cx = &cxstack[cxstack_ix]; 5923 POP_SAVEARRAY(); 5924 cx->cx_type &= ~ CXp_HASARGS; 5925 assert(!AvREAL(cx->blk_sub.argarray)); 5926 } 5927 } 5928 break; 5929 default: 5930 DIE(aTHX_ "panic: unknown OA_*: %x", (unsigned)(oa&7)); 5931 } 5932 oa = oa >> 4; 5933 } 5934 5935 RETURN; 5936 } 5937 5938 PP(pp_runcv) 5939 { 5940 dSP; 5941 CV *cv; 5942 if (PL_op->op_private & OPpOFFBYONE) { 5943 cv = find_runcv_where(FIND_RUNCV_level_eq, 1, NULL); 5944 } 5945 else cv = find_runcv(NULL); 5946 XPUSHs(CvEVAL(cv) ? &PL_sv_undef : sv_2mortal(newRV((SV *)cv))); 5947 RETURN; 5948 } 5949 5950 5951 /* 5952 * Local variables: 5953 * c-indentation-style: bsd 5954 * c-basic-offset: 4 5955 * indent-tabs-mode: nil 5956 * End: 5957 * 5958 * ex: set ts=8 sts=4 sw=4 et: 5959 */ 5960