1 /* 2 $Id: Encode.xs,v 2.27 2014/04/29 16:25:06 dankogai Exp dankogai $ 3 */ 4 5 #define PERL_NO_GET_CONTEXT 6 #include "EXTERN.h" 7 #include "perl.h" 8 #include "XSUB.h" 9 #include "encode.h" 10 11 # define PERLIO_MODNAME "PerlIO::encoding" 12 # define PERLIO_FILENAME "PerlIO/encoding.pm" 13 14 /* set 1 or more to profile. t/encoding.t dumps core because of 15 Perl_warner and PerlIO don't work well */ 16 #define ENCODE_XS_PROFILE 0 17 18 /* set 0 to disable floating point to calculate buffer size for 19 encode_method(). 1 is recommended. 2 restores NI-S original */ 20 #define ENCODE_XS_USEFP 1 21 22 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \ 23 Perl_croak(aTHX_ "panic_unimplemented"); \ 24 return (y)0; /* fool picky compilers */ \ 25 } 26 /**/ 27 28 UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) 29 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) 30 31 #ifdef UTF8_DISALLOW_ILLEGAL_INTERCHANGE 32 # define UTF8_ALLOW_STRICT UTF8_DISALLOW_ILLEGAL_INTERCHANGE 33 #else 34 # define UTF8_ALLOW_STRICT 0 35 #endif 36 37 #define UTF8_ALLOW_NONSTRICT (UTF8_ALLOW_ANY & \ 38 ~(UTF8_ALLOW_CONTINUATION | \ 39 UTF8_ALLOW_NON_CONTINUATION | \ 40 UTF8_ALLOW_LONG)) 41 42 void 43 Encode_XSEncoding(pTHX_ encode_t * enc) 44 { 45 dSP; 46 HV *stash = gv_stashpv("Encode::XS", TRUE); 47 SV *iv = newSViv(PTR2IV(enc)); 48 SV *sv = sv_bless(newRV_noinc(iv),stash); 49 int i = 0; 50 /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's 51 constness, in the hope that perl won't mess with it. */ 52 assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0); 53 SvFLAGS(iv) |= SVp_POK; 54 SvPVX(iv) = (char*) enc->name[0]; 55 PUSHMARK(sp); 56 XPUSHs(sv); 57 while (enc->name[i]) { 58 const char *name = enc->name[i++]; 59 XPUSHs(sv_2mortal(newSVpvn(name, strlen(name)))); 60 } 61 PUTBACK; 62 call_pv("Encode::define_encoding", G_DISCARD); 63 SvREFCNT_dec(sv); 64 } 65 66 void 67 call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) 68 { 69 /* Exists for breakpointing */ 70 } 71 72 73 #define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s" 74 #define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" 75 76 static SV * 77 do_fallback_cb(pTHX_ UV ch, SV *fallback_cb) 78 { 79 dSP; 80 int argc; 81 SV *retval = newSVpv("",0); 82 ENTER; 83 SAVETMPS; 84 PUSHMARK(sp); 85 XPUSHs(sv_2mortal(newSVnv((UV)ch))); 86 PUTBACK; 87 argc = call_sv(fallback_cb, G_SCALAR); 88 SPAGAIN; 89 if (argc != 1){ 90 croak("fallback sub must return scalar!"); 91 } 92 sv_catsv(retval, POPs); 93 PUTBACK; 94 FREETMPS; 95 LEAVE; 96 return retval; 97 } 98 99 static SV * 100 encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, 101 int check, STRLEN * offset, SV * term, int * retcode, 102 SV *fallback_cb) 103 { 104 STRLEN slen; 105 U8 *s = (U8 *) SvPV(src, slen); 106 STRLEN tlen = slen; 107 STRLEN ddone = 0; 108 STRLEN sdone = 0; 109 /* We allocate slen+1. 110 PerlIO dumps core if this value is smaller than this. */ 111 SV *dst = sv_2mortal(newSV(slen+1)); 112 U8 *d = (U8 *)SvPVX(dst); 113 STRLEN dlen = SvLEN(dst)-1; 114 int code = 0; 115 STRLEN trmlen = 0; 116 U8 *trm = term ? (U8*) SvPV(term, trmlen) : NULL; 117 118 if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */ 119 120 if (offset) { 121 s += *offset; 122 if (slen > *offset){ /* safeguard against slen overflow */ 123 slen -= *offset; 124 }else{ 125 slen = 0; 126 } 127 tlen = slen; 128 } 129 130 if (slen == 0){ 131 SvCUR_set(dst, 0); 132 SvPOK_only(dst); 133 goto ENCODE_END; 134 } 135 136 while( (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check, 137 trm, trmlen)) ) 138 { 139 SvCUR_set(dst, dlen+ddone); 140 SvPOK_only(dst); 141 142 if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL || 143 code == ENCODE_FOUND_TERM) { 144 break; 145 } 146 switch (code) { 147 case ENCODE_NOSPACE: 148 { 149 STRLEN more = 0; /* make sure you initialize! */ 150 STRLEN sleft; 151 sdone += slen; 152 ddone += dlen; 153 sleft = tlen - sdone; 154 #if ENCODE_XS_PROFILE >= 2 155 Perl_warn(aTHX_ 156 "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n", 157 more, sdone, sleft, SvLEN(dst)); 158 #endif 159 if (sdone != 0) { /* has src ever been processed ? */ 160 #if ENCODE_XS_USEFP == 2 161 more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone 162 - SvLEN(dst); 163 #elif ENCODE_XS_USEFP 164 more = (STRLEN)((1.0*SvLEN(dst)+1)/sdone * sleft); 165 #else 166 /* safe until SvLEN(dst) == MAX_INT/16 */ 167 more = (16*SvLEN(dst)+1)/sdone/16 * sleft; 168 #endif 169 } 170 more += UTF8_MAXLEN; /* insurance policy */ 171 d = (U8 *) SvGROW(dst, SvLEN(dst) + more); 172 /* dst need to grow need MORE bytes! */ 173 if (ddone >= SvLEN(dst)) { 174 Perl_croak(aTHX_ "Destination couldn't be grown."); 175 } 176 dlen = SvLEN(dst)-ddone-1; 177 d += ddone; 178 s += slen; 179 slen = tlen-sdone; 180 continue; 181 } 182 case ENCODE_NOREP: 183 /* encoding */ 184 if (dir == enc->f_utf8) { 185 STRLEN clen; 186 UV ch = 187 utf8n_to_uvuni(s+slen, (SvCUR(src)-slen), 188 &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY); 189 /* if non-representable multibyte prefix at end of current buffer - break*/ 190 if (clen > tlen - sdone) break; 191 if (check & ENCODE_DIE_ON_ERR) { 192 Perl_croak(aTHX_ ERR_ENCODE_NOMAP, 193 (UV)ch, enc->name[0]); 194 return &PL_sv_undef; /* never reaches but be safe */ 195 } 196 if (check & ENCODE_WARN_ON_ERR){ 197 Perl_warner(aTHX_ packWARN(WARN_UTF8), 198 ERR_ENCODE_NOMAP, (UV)ch, enc->name[0]); 199 } 200 if (check & ENCODE_RETURN_ON_ERR){ 201 goto ENCODE_SET_SRC; 202 } 203 if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ 204 SV* subchar = 205 (fallback_cb != &PL_sv_undef) 206 ? do_fallback_cb(aTHX_ ch, fallback_cb) 207 : newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04"UVxf"}" : 208 check & ENCODE_HTMLCREF ? "&#%" UVuf ";" : 209 "&#x%" UVxf ";", (UV)ch); 210 SvUTF8_off(subchar); /* make sure no decoded string gets in */ 211 sdone += slen + clen; 212 ddone += dlen + SvCUR(subchar); 213 sv_catsv(dst, subchar); 214 SvREFCNT_dec(subchar); 215 } else { 216 /* fallback char */ 217 sdone += slen + clen; 218 ddone += dlen + enc->replen; 219 sv_catpvn(dst, (char*)enc->rep, enc->replen); 220 } 221 } 222 /* decoding */ 223 else { 224 if (check & ENCODE_DIE_ON_ERR){ 225 Perl_croak(aTHX_ ERR_DECODE_NOMAP, 226 enc->name[0], (UV)s[slen]); 227 return &PL_sv_undef; /* never reaches but be safe */ 228 } 229 if (check & ENCODE_WARN_ON_ERR){ 230 Perl_warner( 231 aTHX_ packWARN(WARN_UTF8), 232 ERR_DECODE_NOMAP, 233 enc->name[0], (UV)s[slen]); 234 } 235 if (check & ENCODE_RETURN_ON_ERR){ 236 goto ENCODE_SET_SRC; 237 } 238 if (check & 239 (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ 240 SV* subchar = 241 (fallback_cb != &PL_sv_undef) 242 ? do_fallback_cb(aTHX_ (UV)s[slen], fallback_cb) 243 : newSVpvf("\\x%02" UVXf, (UV)s[slen]); 244 sdone += slen + 1; 245 ddone += dlen + SvCUR(subchar); 246 sv_catsv(dst, subchar); 247 SvREFCNT_dec(subchar); 248 } else { 249 sdone += slen + 1; 250 ddone += dlen + strlen(FBCHAR_UTF8); 251 sv_catpv(dst, FBCHAR_UTF8); 252 } 253 } 254 /* settle variables when fallback */ 255 d = (U8 *)SvEND(dst); 256 dlen = SvLEN(dst) - ddone - 1; 257 s = (U8*)SvPVX(src) + sdone; 258 slen = tlen - sdone; 259 break; 260 261 default: 262 Perl_croak(aTHX_ "Unexpected code %d converting %s %s", 263 code, (dir == enc->f_utf8) ? "to" : "from", 264 enc->name[0]); 265 return &PL_sv_undef; 266 } 267 } 268 ENCODE_SET_SRC: 269 if (check && !(check & ENCODE_LEAVE_SRC)){ 270 sdone = SvCUR(src) - (slen+sdone); 271 if (sdone) { 272 sv_setpvn(src, (char*)s+slen, sdone); 273 } 274 SvCUR_set(src, sdone); 275 } 276 /* warn("check = 0x%X, code = 0x%d\n", check, code); */ 277 278 SvCUR_set(dst, dlen+ddone); 279 SvPOK_only(dst); 280 281 #if ENCODE_XS_PROFILE 282 if (SvCUR(dst) > SvCUR(src)){ 283 Perl_warn(aTHX_ 284 "SvLEN(dst)=%d, SvCUR(dst)=%d. %d bytes unused(%f %%)\n", 285 SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst), 286 (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0); 287 } 288 #endif 289 290 if (offset) 291 *offset += sdone + slen; 292 293 ENCODE_END: 294 *SvEND(dst) = '\0'; 295 if (retcode) *retcode = code; 296 return dst; 297 } 298 299 static bool 300 strict_utf8(pTHX_ SV* sv) 301 { 302 HV* hv; 303 SV** svp; 304 sv = SvRV(sv); 305 if (!sv || SvTYPE(sv) != SVt_PVHV) 306 return 0; 307 hv = (HV*)sv; 308 svp = hv_fetch(hv, "strict_utf8", 11, 0); 309 if (!svp) 310 return 0; 311 return SvTRUE(*svp); 312 } 313 314 static U8* 315 process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, 316 bool encode, bool strict, bool stop_at_partial) 317 { 318 UV uv; 319 STRLEN ulen; 320 SV *fallback_cb; 321 int check; 322 323 if (SvROK(check_sv)) { 324 /* croak("UTF-8 decoder doesn't support callback CHECK"); */ 325 fallback_cb = check_sv; 326 check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as perlqq */ 327 } 328 else { 329 fallback_cb = &PL_sv_undef; 330 check = SvIV(check_sv); 331 } 332 333 SvPOK_only(dst); 334 SvCUR_set(dst,0); 335 336 while (s < e) { 337 if (UTF8_IS_INVARIANT(*s)) { 338 sv_catpvn(dst, (char *)s, 1); 339 s++; 340 continue; 341 } 342 343 if (UTF8_IS_START(*s)) { 344 U8 skip = UTF8SKIP(s); 345 if ((s + skip) > e) { 346 /* Partial character */ 347 /* XXX could check that rest of bytes are UTF8_IS_CONTINUATION(ch) */ 348 if (stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL)) 349 break; 350 351 goto malformed_byte; 352 } 353 354 uv = utf8n_to_uvuni(s, e - s, &ulen, 355 UTF8_CHECK_ONLY | (strict ? UTF8_ALLOW_STRICT : 356 UTF8_ALLOW_NONSTRICT) 357 ); 358 #if 1 /* perl-5.8.6 and older do not check UTF8_ALLOW_LONG */ 359 if (strict && uv > PERL_UNICODE_MAX) 360 ulen = (STRLEN) -1; 361 #endif 362 if (ulen == -1) { 363 if (strict) { 364 uv = utf8n_to_uvuni(s, e - s, &ulen, 365 UTF8_CHECK_ONLY | UTF8_ALLOW_NONSTRICT); 366 if (ulen == -1) 367 goto malformed_byte; 368 goto malformed; 369 } 370 goto malformed_byte; 371 } 372 373 374 /* Whole char is good */ 375 sv_catpvn(dst,(char *)s,skip); 376 s += skip; 377 continue; 378 } 379 380 /* If we get here there is something wrong with alleged UTF-8 */ 381 malformed_byte: 382 uv = (UV)*s; 383 ulen = 1; 384 385 malformed: 386 if (check & ENCODE_DIE_ON_ERR){ 387 if (encode) 388 Perl_croak(aTHX_ ERR_ENCODE_NOMAP, uv, "utf8"); 389 else 390 Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", uv); 391 } 392 if (check & ENCODE_WARN_ON_ERR){ 393 if (encode) 394 Perl_warner(aTHX_ packWARN(WARN_UTF8), 395 ERR_ENCODE_NOMAP, uv, "utf8"); 396 else 397 Perl_warner(aTHX_ packWARN(WARN_UTF8), 398 ERR_DECODE_NOMAP, "utf8", uv); 399 } 400 if (check & ENCODE_RETURN_ON_ERR) { 401 break; 402 } 403 if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ 404 SV* subchar = 405 (fallback_cb != &PL_sv_undef) 406 ? do_fallback_cb(aTHX_ uv, fallback_cb) 407 : newSVpvf(check & ENCODE_PERLQQ 408 ? (ulen == 1 ? "\\x%02" UVXf : "\\x{%04" UVXf "}") 409 : check & ENCODE_HTMLCREF ? "&#%" UVuf ";" 410 : "&#x%" UVxf ";", uv); 411 if (encode){ 412 SvUTF8_off(subchar); /* make sure no decoded string gets in */ 413 } 414 sv_catsv(dst, subchar); 415 SvREFCNT_dec(subchar); 416 } else { 417 sv_catpv(dst, FBCHAR_UTF8); 418 } 419 s += ulen; 420 } 421 *SvEND(dst) = '\0'; 422 423 return s; 424 } 425 426 427 MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_ 428 429 PROTOTYPES: DISABLE 430 431 void 432 Method_decode_xs(obj,src,check_sv = &PL_sv_no) 433 SV * obj 434 SV * src 435 SV * check_sv 436 PREINIT: 437 STRLEN slen; 438 U8 *s; 439 U8 *e; 440 SV *dst; 441 bool renewed = 0; 442 int check; 443 CODE: 444 { 445 dSP; ENTER; SAVETMPS; 446 if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0)); 447 s = (U8 *) SvPV(src, slen); 448 e = (U8 *) SvEND(src); 449 check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv); 450 /* 451 * PerlIO check -- we assume the object is of PerlIO if renewed 452 */ 453 PUSHMARK(sp); 454 XPUSHs(obj); 455 PUTBACK; 456 if (call_method("renewed",G_SCALAR) == 1) { 457 SPAGAIN; 458 renewed = (bool)POPi; 459 PUTBACK; 460 #if 0 461 fprintf(stderr, "renewed == %d\n", renewed); 462 #endif 463 } 464 FREETMPS; LEAVE; 465 /* end PerlIO check */ 466 467 if (SvUTF8(src)) { 468 s = utf8_to_bytes(s,&slen); 469 if (s) { 470 SvCUR_set(src,slen); 471 SvUTF8_off(src); 472 e = s+slen; 473 } 474 else { 475 croak("Cannot decode string with wide characters"); 476 } 477 } 478 479 dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */ 480 s = process_utf8(aTHX_ dst, s, e, check_sv, 0, strict_utf8(aTHX_ obj), renewed); 481 482 /* Clear out translated part of source unless asked not to */ 483 if (check && !(check & ENCODE_LEAVE_SRC)){ 484 slen = e-s; 485 if (slen) { 486 sv_setpvn(src, (char*)s, slen); 487 } 488 SvCUR_set(src, slen); 489 } 490 SvUTF8_on(dst); 491 if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */ 492 ST(0) = dst; 493 XSRETURN(1); 494 } 495 496 void 497 Method_encode_xs(obj,src,check_sv = &PL_sv_no) 498 SV * obj 499 SV * src 500 SV * check_sv 501 PREINIT: 502 STRLEN slen; 503 U8 *s; 504 U8 *e; 505 SV *dst; 506 bool renewed = 0; 507 int check; 508 CODE: 509 { 510 check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv); 511 if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0)); 512 s = (U8 *) SvPV(src, slen); 513 e = (U8 *) SvEND(src); 514 dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */ 515 if (SvUTF8(src)) { 516 /* Already encoded */ 517 if (strict_utf8(aTHX_ obj)) { 518 s = process_utf8(aTHX_ dst, s, e, check_sv, 1, 1, 0); 519 } 520 else { 521 /* trust it and just copy the octets */ 522 sv_setpvn(dst,(char *)s,(e-s)); 523 s = e; 524 } 525 } 526 else { 527 /* Native bytes - can always encode */ 528 U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */ 529 while (s < e) { 530 UV uv = NATIVE_TO_UNI((UV) *s); 531 s++; /* Above expansion of NATIVE_TO_UNI() is safer this way. */ 532 if (UNI_IS_INVARIANT(uv)) 533 *d++ = (U8)UTF_TO_NATIVE(uv); 534 else { 535 *d++ = (U8)UTF8_EIGHT_BIT_HI(uv); 536 *d++ = (U8)UTF8_EIGHT_BIT_LO(uv); 537 } 538 } 539 SvCUR_set(dst, d- (U8 *)SvPVX(dst)); 540 *SvEND(dst) = '\0'; 541 } 542 543 /* Clear out translated part of source unless asked not to */ 544 if (check && !(check & ENCODE_LEAVE_SRC)){ 545 slen = e-s; 546 if (slen) { 547 sv_setpvn(src, (char*)s, slen); 548 } 549 SvCUR_set(src, slen); 550 } 551 SvPOK_only(dst); 552 SvUTF8_off(dst); 553 if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */ 554 ST(0) = dst; 555 XSRETURN(1); 556 } 557 558 MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ 559 560 PROTOTYPES: ENABLE 561 562 void 563 Method_renew(obj) 564 SV * obj 565 CODE: 566 { 567 XSRETURN(1); 568 } 569 570 int 571 Method_renewed(obj) 572 SV * obj 573 CODE: 574 RETVAL = 0; 575 OUTPUT: 576 RETVAL 577 578 void 579 Method_name(obj) 580 SV * obj 581 CODE: 582 { 583 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 584 ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0]))); 585 XSRETURN(1); 586 } 587 588 void 589 Method_cat_decode(obj, dst, src, off, term, check_sv = &PL_sv_no) 590 SV * obj 591 SV * dst 592 SV * src 593 SV * off 594 SV * term 595 SV * check_sv 596 CODE: 597 { 598 int check; 599 SV *fallback_cb = &PL_sv_undef; 600 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 601 STRLEN offset = (STRLEN)SvIV(off); 602 int code = 0; 603 if (SvUTF8(src)) { 604 sv_utf8_downgrade(src, FALSE); 605 } 606 if (SvROK(check_sv)){ 607 fallback_cb = check_sv; 608 check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ 609 }else{ 610 check = SvIV(check_sv); 611 } 612 sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, check, 613 &offset, term, &code, fallback_cb)); 614 SvIV_set(off, (IV)offset); 615 if (code == ENCODE_FOUND_TERM) { 616 ST(0) = &PL_sv_yes; 617 }else{ 618 ST(0) = &PL_sv_no; 619 } 620 XSRETURN(1); 621 } 622 623 void 624 Method_decode(obj,src,check_sv = &PL_sv_no) 625 SV * obj 626 SV * src 627 SV * check_sv 628 CODE: 629 { 630 int check; 631 SV *fallback_cb = &PL_sv_undef; 632 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 633 if (SvUTF8(src)) { 634 sv_utf8_downgrade(src, FALSE); 635 } 636 if (SvROK(check_sv)){ 637 fallback_cb = check_sv; 638 check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ 639 }else{ 640 check = SvIV(check_sv); 641 } 642 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check, 643 NULL, Nullsv, NULL, fallback_cb); 644 SvUTF8_on(ST(0)); 645 XSRETURN(1); 646 } 647 648 void 649 Method_encode(obj,src,check_sv = &PL_sv_no) 650 SV * obj 651 SV * src 652 SV * check_sv 653 CODE: 654 { 655 int check; 656 SV *fallback_cb = &PL_sv_undef; 657 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 658 sv_utf8_upgrade(src); 659 if (SvROK(check_sv)){ 660 fallback_cb = check_sv; 661 check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ 662 }else{ 663 check = SvIV(check_sv); 664 } 665 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check, 666 NULL, Nullsv, NULL, fallback_cb); 667 XSRETURN(1); 668 } 669 670 void 671 Method_needs_lines(obj) 672 SV * obj 673 CODE: 674 { 675 /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ 676 ST(0) = &PL_sv_no; 677 XSRETURN(1); 678 } 679 680 void 681 Method_perlio_ok(obj) 682 SV * obj 683 CODE: 684 { 685 /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ 686 /* require_pv(PERLIO_FILENAME); */ 687 688 eval_pv("require PerlIO::encoding", 0); 689 690 if (SvTRUE(get_sv("@", 0))) { 691 ST(0) = &PL_sv_no; 692 }else{ 693 ST(0) = &PL_sv_yes; 694 } 695 XSRETURN(1); 696 } 697 698 void 699 Method_mime_name(obj) 700 SV * obj 701 CODE: 702 { 703 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 704 SV *retval; 705 eval_pv("require Encode::MIME::Name", 0); 706 707 if (SvTRUE(get_sv("@", 0))) { 708 ST(0) = &PL_sv_undef; 709 }else{ 710 ENTER; 711 SAVETMPS; 712 PUSHMARK(sp); 713 XPUSHs(sv_2mortal(newSVpvn(enc->name[0], strlen(enc->name[0])))); 714 PUTBACK; 715 call_pv("Encode::MIME::Name::get_mime_name", G_SCALAR); 716 SPAGAIN; 717 retval = newSVsv(POPs); 718 PUTBACK; 719 FREETMPS; 720 LEAVE; 721 /* enc->name[0] */ 722 ST(0) = retval; 723 } 724 XSRETURN(1); 725 } 726 727 MODULE = Encode PACKAGE = Encode 728 729 PROTOTYPES: ENABLE 730 731 I32 732 _bytes_to_utf8(sv, ...) 733 SV * sv 734 CODE: 735 { 736 SV * encoding = items == 2 ? ST(1) : Nullsv; 737 738 if (encoding) 739 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); 740 else { 741 STRLEN len; 742 U8* s = (U8*)SvPV(sv, len); 743 U8* converted; 744 745 converted = bytes_to_utf8(s, &len); /* This allocs */ 746 sv_setpvn(sv, (char *)converted, len); 747 SvUTF8_on(sv); /* XXX Should we? */ 748 Safefree(converted); /* ... so free it */ 749 RETVAL = len; 750 } 751 } 752 OUTPUT: 753 RETVAL 754 755 I32 756 _utf8_to_bytes(sv, ...) 757 SV * sv 758 CODE: 759 { 760 SV * to = items > 1 ? ST(1) : Nullsv; 761 SV * check = items > 2 ? ST(2) : Nullsv; 762 763 if (to) { 764 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); 765 } else { 766 STRLEN len; 767 U8 *s = (U8*)SvPV(sv, len); 768 769 RETVAL = 0; 770 if (SvTRUE(check)) { 771 /* Must do things the slow way */ 772 U8 *dest; 773 /* We need a copy to pass to check() */ 774 U8 *src = s; 775 U8 *send = s + len; 776 U8 *d0; 777 778 New(83, dest, len, U8); /* I think */ 779 d0 = dest; 780 781 while (s < send) { 782 if (*s < 0x80){ 783 *dest++ = *s++; 784 } else { 785 STRLEN ulen; 786 UV uv = *s++; 787 788 /* Have to do it all ourselves because of error routine, 789 aargh. */ 790 if (!(uv & 0x40)){ goto failure; } 791 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } 792 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } 793 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } 794 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } 795 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } 796 else if (!(uv & 0x01)) { ulen = 7; uv = 0; } 797 else { ulen = 13; uv = 0; } 798 799 /* Note change to utf8.c variable naming, for variety */ 800 while (ulen--) { 801 if ((*s & 0xc0) != 0x80){ 802 goto failure; 803 } else { 804 uv = (uv << 6) | (*s++ & 0x3f); 805 } 806 } 807 if (uv > 256) { 808 failure: 809 call_failure(check, s, dest, src); 810 /* Now what happens? */ 811 } 812 *dest++ = (U8)uv; 813 } 814 } 815 RETVAL = dest - d0; 816 sv_usepvn(sv, (char *)dest, RETVAL); 817 SvUTF8_off(sv); 818 } else { 819 RETVAL = (utf8_to_bytes(s, &len) ? len : 0); 820 } 821 } 822 } 823 OUTPUT: 824 RETVAL 825 826 bool 827 is_utf8(sv, check = 0) 828 SV * sv 829 int check 830 CODE: 831 { 832 if (SvGMAGICAL(sv)) /* it could be $1, for example */ 833 sv = newSVsv(sv); /* GMAGIG will be done */ 834 RETVAL = SvUTF8(sv) ? TRUE : FALSE; 835 if (RETVAL && 836 check && 837 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) 838 RETVAL = FALSE; 839 if (sv != ST(0)) 840 SvREFCNT_dec(sv); /* it was a temp copy */ 841 } 842 OUTPUT: 843 RETVAL 844 845 #ifndef SvIsCOW 846 # define SvIsCOW (SvREADONLY(sv) && SvFAKE(sv)) 847 #endif 848 849 SV * 850 _utf8_on(sv) 851 SV * sv 852 CODE: 853 { 854 if (SvPOK(sv)) { 855 SV *rsv = newSViv(SvUTF8(sv)); 856 RETVAL = rsv; 857 if (SvIsCOW(sv)) sv_force_normal(sv); 858 SvUTF8_on(sv); 859 } else { 860 RETVAL = &PL_sv_undef; 861 } 862 } 863 OUTPUT: 864 RETVAL 865 866 SV * 867 _utf8_off(sv) 868 SV * sv 869 CODE: 870 { 871 if (SvPOK(sv)) { 872 SV *rsv = newSViv(SvUTF8(sv)); 873 RETVAL = rsv; 874 if (SvIsCOW(sv)) sv_force_normal(sv); 875 SvUTF8_off(sv); 876 } else { 877 RETVAL = &PL_sv_undef; 878 } 879 } 880 OUTPUT: 881 RETVAL 882 883 int 884 DIE_ON_ERR() 885 CODE: 886 RETVAL = ENCODE_DIE_ON_ERR; 887 OUTPUT: 888 RETVAL 889 890 int 891 WARN_ON_ERR() 892 CODE: 893 RETVAL = ENCODE_WARN_ON_ERR; 894 OUTPUT: 895 RETVAL 896 897 int 898 LEAVE_SRC() 899 CODE: 900 RETVAL = ENCODE_LEAVE_SRC; 901 OUTPUT: 902 RETVAL 903 904 int 905 RETURN_ON_ERR() 906 CODE: 907 RETVAL = ENCODE_RETURN_ON_ERR; 908 OUTPUT: 909 RETVAL 910 911 int 912 PERLQQ() 913 CODE: 914 RETVAL = ENCODE_PERLQQ; 915 OUTPUT: 916 RETVAL 917 918 int 919 HTMLCREF() 920 CODE: 921 RETVAL = ENCODE_HTMLCREF; 922 OUTPUT: 923 RETVAL 924 925 int 926 XMLCREF() 927 CODE: 928 RETVAL = ENCODE_XMLCREF; 929 OUTPUT: 930 RETVAL 931 932 int 933 STOP_AT_PARTIAL() 934 CODE: 935 RETVAL = ENCODE_STOP_AT_PARTIAL; 936 OUTPUT: 937 RETVAL 938 939 int 940 FB_DEFAULT() 941 CODE: 942 RETVAL = ENCODE_FB_DEFAULT; 943 OUTPUT: 944 RETVAL 945 946 int 947 FB_CROAK() 948 CODE: 949 RETVAL = ENCODE_FB_CROAK; 950 OUTPUT: 951 RETVAL 952 953 int 954 FB_QUIET() 955 CODE: 956 RETVAL = ENCODE_FB_QUIET; 957 OUTPUT: 958 RETVAL 959 960 int 961 FB_WARN() 962 CODE: 963 RETVAL = ENCODE_FB_WARN; 964 OUTPUT: 965 RETVAL 966 967 int 968 FB_PERLQQ() 969 CODE: 970 RETVAL = ENCODE_FB_PERLQQ; 971 OUTPUT: 972 RETVAL 973 974 int 975 FB_HTMLCREF() 976 CODE: 977 RETVAL = ENCODE_FB_HTMLCREF; 978 OUTPUT: 979 RETVAL 980 981 int 982 FB_XMLCREF() 983 CODE: 984 RETVAL = ENCODE_FB_XMLCREF; 985 OUTPUT: 986 RETVAL 987 988 BOOT: 989 { 990 #include "def_t.h" 991 #include "def_t.exh" 992 } 993