1 /* 2 $Id: Encode.xs,v 2.21 2013/03/05 03:12:49 dankogai Exp dankogai $ 3 */ 4 5 #define PERL_NO_GET_CONTEXT 6 #include "EXTERN.h" 7 #include "perl.h" 8 #include "XSUB.h" 9 #define U8 U8 10 #include "encode.h" 11 12 # define PERLIO_MODNAME "PerlIO::encoding" 13 # define PERLIO_FILENAME "PerlIO/encoding.pm" 14 15 /* set 1 or more to profile. t/encoding.t dumps core because of 16 Perl_warner and PerlIO don't work well */ 17 #define ENCODE_XS_PROFILE 0 18 19 /* set 0 to disable floating point to calculate buffer size for 20 encode_method(). 1 is recommended. 2 restores NI-S original */ 21 #define ENCODE_XS_USEFP 1 22 23 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \ 24 Perl_croak(aTHX_ "panic_unimplemented"); \ 25 return (y)0; /* fool picky compilers */ \ 26 } 27 /**/ 28 29 UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) 30 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) 31 32 #ifdef UTF8_DISALLOW_ILLEGAL_INTERCHANGE 33 # define UTF8_ALLOW_STRICT UTF8_DISALLOW_ILLEGAL_INTERCHANGE 34 #else 35 # define UTF8_ALLOW_STRICT 0 36 #endif 37 38 #define UTF8_ALLOW_NONSTRICT (UTF8_ALLOW_ANY & \ 39 ~(UTF8_ALLOW_CONTINUATION | \ 40 UTF8_ALLOW_NON_CONTINUATION | \ 41 UTF8_ALLOW_LONG)) 42 43 void 44 Encode_XSEncoding(pTHX_ encode_t * enc) 45 { 46 dSP; 47 HV *stash = gv_stashpv("Encode::XS", TRUE); 48 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash); 49 int i = 0; 50 PUSHMARK(sp); 51 XPUSHs(sv); 52 while (enc->name[i]) { 53 const char *name = enc->name[i++]; 54 XPUSHs(sv_2mortal(newSVpvn(name, strlen(name)))); 55 } 56 PUTBACK; 57 call_pv("Encode::define_encoding", G_DISCARD); 58 SvREFCNT_dec(sv); 59 } 60 61 void 62 call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) 63 { 64 /* Exists for breakpointing */ 65 } 66 67 68 #define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s" 69 #define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" 70 71 static SV * 72 do_fallback_cb(pTHX_ UV ch, SV *fallback_cb) 73 { 74 dSP; 75 int argc; 76 SV *retval = newSVpv("",0); 77 ENTER; 78 SAVETMPS; 79 PUSHMARK(sp); 80 XPUSHs(sv_2mortal(newSVnv((UV)ch))); 81 PUTBACK; 82 argc = call_sv(fallback_cb, G_SCALAR); 83 SPAGAIN; 84 if (argc != 1){ 85 croak("fallback sub must return scalar!"); 86 } 87 sv_catsv(retval, POPs); 88 PUTBACK; 89 FREETMPS; 90 LEAVE; 91 return retval; 92 } 93 94 static SV * 95 encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, 96 int check, STRLEN * offset, SV * term, int * retcode, 97 SV *fallback_cb) 98 { 99 STRLEN slen; 100 U8 *s = (U8 *) SvPV(src, slen); 101 STRLEN tlen = slen; 102 STRLEN ddone = 0; 103 STRLEN sdone = 0; 104 105 /* We allocate slen+1. 106 PerlIO dumps core if this value is smaller than this. */ 107 SV *dst = sv_2mortal(newSV(slen+1)); 108 U8 *d = (U8 *)SvPVX(dst); 109 STRLEN dlen = SvLEN(dst)-1; 110 int code = 0; 111 STRLEN trmlen = 0; 112 U8 *trm = term ? (U8*) SvPV(term, trmlen) : NULL; 113 114 if (offset) { 115 s += *offset; 116 if (slen > *offset){ /* safeguard against slen overflow */ 117 slen -= *offset; 118 }else{ 119 slen = 0; 120 } 121 tlen = slen; 122 } 123 124 if (slen == 0){ 125 SvCUR_set(dst, 0); 126 SvPOK_only(dst); 127 goto ENCODE_END; 128 } 129 130 while( (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check, 131 trm, trmlen)) ) 132 { 133 SvCUR_set(dst, dlen+ddone); 134 SvPOK_only(dst); 135 136 if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL || 137 code == ENCODE_FOUND_TERM) { 138 break; 139 } 140 switch (code) { 141 case ENCODE_NOSPACE: 142 { 143 STRLEN more = 0; /* make sure you initialize! */ 144 STRLEN sleft; 145 sdone += slen; 146 ddone += dlen; 147 sleft = tlen - sdone; 148 #if ENCODE_XS_PROFILE >= 2 149 Perl_warn(aTHX_ 150 "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n", 151 more, sdone, sleft, SvLEN(dst)); 152 #endif 153 if (sdone != 0) { /* has src ever been processed ? */ 154 #if ENCODE_XS_USEFP == 2 155 more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone 156 - SvLEN(dst); 157 #elif ENCODE_XS_USEFP 158 more = (STRLEN)((1.0*SvLEN(dst)+1)/sdone * sleft); 159 #else 160 /* safe until SvLEN(dst) == MAX_INT/16 */ 161 more = (16*SvLEN(dst)+1)/sdone/16 * sleft; 162 #endif 163 } 164 more += UTF8_MAXLEN; /* insurance policy */ 165 d = (U8 *) SvGROW(dst, SvLEN(dst) + more); 166 /* dst need to grow need MORE bytes! */ 167 if (ddone >= SvLEN(dst)) { 168 Perl_croak(aTHX_ "Destination couldn't be grown."); 169 } 170 dlen = SvLEN(dst)-ddone-1; 171 d += ddone; 172 s += slen; 173 slen = tlen-sdone; 174 continue; 175 } 176 case ENCODE_NOREP: 177 /* encoding */ 178 if (dir == enc->f_utf8) { 179 STRLEN clen; 180 UV ch = 181 utf8n_to_uvuni(s+slen, (SvCUR(src)-slen), 182 &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY); 183 /* if non-representable multibyte prefix at end of current buffer - break*/ 184 if (clen > tlen - sdone) break; 185 if (check & ENCODE_DIE_ON_ERR) { 186 Perl_croak(aTHX_ ERR_ENCODE_NOMAP, 187 (UV)ch, enc->name[0]); 188 return &PL_sv_undef; /* never reaches but be safe */ 189 } 190 if (check & ENCODE_WARN_ON_ERR){ 191 Perl_warner(aTHX_ packWARN(WARN_UTF8), 192 ERR_ENCODE_NOMAP, (UV)ch, enc->name[0]); 193 } 194 if (check & ENCODE_RETURN_ON_ERR){ 195 goto ENCODE_SET_SRC; 196 } 197 if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ 198 SV* subchar = 199 (fallback_cb != &PL_sv_undef) 200 ? do_fallback_cb(aTHX_ ch, fallback_cb) 201 : newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04"UVxf"}" : 202 check & ENCODE_HTMLCREF ? "&#%" UVuf ";" : 203 "&#x%" UVxf ";", (UV)ch); 204 SvUTF8_off(subchar); /* make sure no decoded string gets in */ 205 sdone += slen + clen; 206 ddone += dlen + SvCUR(subchar); 207 sv_catsv(dst, subchar); 208 SvREFCNT_dec(subchar); 209 } else { 210 /* fallback char */ 211 sdone += slen + clen; 212 ddone += dlen + enc->replen; 213 sv_catpvn(dst, (char*)enc->rep, enc->replen); 214 } 215 } 216 /* decoding */ 217 else { 218 if (check & ENCODE_DIE_ON_ERR){ 219 Perl_croak(aTHX_ ERR_DECODE_NOMAP, 220 enc->name[0], (UV)s[slen]); 221 return &PL_sv_undef; /* never reaches but be safe */ 222 } 223 if (check & ENCODE_WARN_ON_ERR){ 224 Perl_warner( 225 aTHX_ packWARN(WARN_UTF8), 226 ERR_DECODE_NOMAP, 227 enc->name[0], (UV)s[slen]); 228 } 229 if (check & ENCODE_RETURN_ON_ERR){ 230 goto ENCODE_SET_SRC; 231 } 232 if (check & 233 (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ 234 SV* subchar = 235 (fallback_cb != &PL_sv_undef) 236 ? do_fallback_cb(aTHX_ (UV)s[slen], fallback_cb) 237 : newSVpvf("\\x%02" UVXf, (UV)s[slen]); 238 sdone += slen + 1; 239 ddone += dlen + SvCUR(subchar); 240 sv_catsv(dst, subchar); 241 SvREFCNT_dec(subchar); 242 } else { 243 sdone += slen + 1; 244 ddone += dlen + strlen(FBCHAR_UTF8); 245 sv_catpv(dst, FBCHAR_UTF8); 246 } 247 } 248 /* settle variables when fallback */ 249 d = (U8 *)SvEND(dst); 250 dlen = SvLEN(dst) - ddone - 1; 251 s = (U8*)SvPVX(src) + sdone; 252 slen = tlen - sdone; 253 break; 254 255 default: 256 Perl_croak(aTHX_ "Unexpected code %d converting %s %s", 257 code, (dir == enc->f_utf8) ? "to" : "from", 258 enc->name[0]); 259 return &PL_sv_undef; 260 } 261 } 262 ENCODE_SET_SRC: 263 if (check && !(check & ENCODE_LEAVE_SRC)){ 264 sdone = SvCUR(src) - (slen+sdone); 265 if (sdone) { 266 sv_setpvn(src, (char*)s+slen, sdone); 267 } 268 SvCUR_set(src, sdone); 269 } 270 /* warn("check = 0x%X, code = 0x%d\n", check, code); */ 271 272 SvCUR_set(dst, dlen+ddone); 273 SvPOK_only(dst); 274 275 #if ENCODE_XS_PROFILE 276 if (SvCUR(dst) > SvCUR(src)){ 277 Perl_warn(aTHX_ 278 "SvLEN(dst)=%d, SvCUR(dst)=%d. %d bytes unused(%f %%)\n", 279 SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst), 280 (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0); 281 } 282 #endif 283 284 if (offset) 285 *offset += sdone + slen; 286 287 ENCODE_END: 288 *SvEND(dst) = '\0'; 289 if (retcode) *retcode = code; 290 return dst; 291 } 292 293 static bool 294 strict_utf8(pTHX_ SV* sv) 295 { 296 HV* hv; 297 SV** svp; 298 sv = SvRV(sv); 299 if (!sv || SvTYPE(sv) != SVt_PVHV) 300 return 0; 301 hv = (HV*)sv; 302 svp = hv_fetch(hv, "strict_utf8", 11, 0); 303 if (!svp) 304 return 0; 305 return SvTRUE(*svp); 306 } 307 308 static U8* 309 process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, 310 bool encode, bool strict, bool stop_at_partial) 311 { 312 UV uv; 313 STRLEN ulen; 314 SV *fallback_cb; 315 int check; 316 317 if (SvROK(check_sv)) { 318 /* croak("UTF-8 decoder doesn't support callback CHECK"); */ 319 fallback_cb = check_sv; 320 check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as perlqq */ 321 } 322 else { 323 fallback_cb = &PL_sv_undef; 324 check = SvIV(check_sv); 325 } 326 327 SvPOK_only(dst); 328 SvCUR_set(dst,0); 329 330 while (s < e) { 331 if (UTF8_IS_INVARIANT(*s)) { 332 sv_catpvn(dst, (char *)s, 1); 333 s++; 334 continue; 335 } 336 337 if (UTF8_IS_START(*s)) { 338 U8 skip = UTF8SKIP(s); 339 if ((s + skip) > e) { 340 /* Partial character */ 341 /* XXX could check that rest of bytes are UTF8_IS_CONTINUATION(ch) */ 342 if (stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL)) 343 break; 344 345 goto malformed_byte; 346 } 347 348 uv = utf8n_to_uvuni(s, e - s, &ulen, 349 UTF8_CHECK_ONLY | (strict ? UTF8_ALLOW_STRICT : 350 UTF8_ALLOW_NONSTRICT) 351 ); 352 #if 1 /* perl-5.8.6 and older do not check UTF8_ALLOW_LONG */ 353 if (strict && uv > PERL_UNICODE_MAX) 354 ulen = (STRLEN) -1; 355 #endif 356 if (ulen == -1) { 357 if (strict) { 358 uv = utf8n_to_uvuni(s, e - s, &ulen, 359 UTF8_CHECK_ONLY | UTF8_ALLOW_NONSTRICT); 360 if (ulen == -1) 361 goto malformed_byte; 362 goto malformed; 363 } 364 goto malformed_byte; 365 } 366 367 368 /* Whole char is good */ 369 sv_catpvn(dst,(char *)s,skip); 370 s += skip; 371 continue; 372 } 373 374 /* If we get here there is something wrong with alleged UTF-8 */ 375 malformed_byte: 376 uv = (UV)*s; 377 ulen = 1; 378 379 malformed: 380 if (check & ENCODE_DIE_ON_ERR){ 381 if (encode) 382 Perl_croak(aTHX_ ERR_ENCODE_NOMAP, uv, "utf8"); 383 else 384 Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", uv); 385 } 386 if (check & ENCODE_WARN_ON_ERR){ 387 if (encode) 388 Perl_warner(aTHX_ packWARN(WARN_UTF8), 389 ERR_ENCODE_NOMAP, uv, "utf8"); 390 else 391 Perl_warner(aTHX_ packWARN(WARN_UTF8), 392 ERR_DECODE_NOMAP, "utf8", uv); 393 } 394 if (check & ENCODE_RETURN_ON_ERR) { 395 break; 396 } 397 if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ 398 SV* subchar = 399 (fallback_cb != &PL_sv_undef) 400 ? do_fallback_cb(aTHX_ uv, fallback_cb) 401 : newSVpvf(check & ENCODE_PERLQQ 402 ? (ulen == 1 ? "\\x%02" UVXf : "\\x{%04" UVXf "}") 403 : check & ENCODE_HTMLCREF ? "&#%" UVuf ";" 404 : "&#x%" UVxf ";", uv); 405 if (encode){ 406 SvUTF8_off(subchar); /* make sure no decoded string gets in */ 407 } 408 sv_catsv(dst, subchar); 409 SvREFCNT_dec(subchar); 410 } else { 411 sv_catpv(dst, FBCHAR_UTF8); 412 } 413 s += ulen; 414 } 415 *SvEND(dst) = '\0'; 416 417 return s; 418 } 419 420 421 MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_ 422 423 PROTOTYPES: DISABLE 424 425 void 426 Method_decode_xs(obj,src,check_sv = &PL_sv_no) 427 SV * obj 428 SV * src 429 SV * check_sv 430 PREINIT: 431 STRLEN slen; 432 U8 *s; 433 U8 *e; 434 SV *dst; 435 bool renewed = 0; 436 int check; 437 CODE: 438 { 439 dSP; ENTER; SAVETMPS; 440 if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0)); 441 s = (U8 *) SvPV(src, slen); 442 e = (U8 *) SvEND(src); 443 check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv); 444 /* 445 * PerlIO check -- we assume the object is of PerlIO if renewed 446 */ 447 PUSHMARK(sp); 448 XPUSHs(obj); 449 PUTBACK; 450 if (call_method("renewed",G_SCALAR) == 1) { 451 SPAGAIN; 452 renewed = (bool)POPi; 453 PUTBACK; 454 #if 0 455 fprintf(stderr, "renewed == %d\n", renewed); 456 #endif 457 } 458 FREETMPS; LEAVE; 459 /* end PerlIO check */ 460 461 if (SvUTF8(src)) { 462 s = utf8_to_bytes(s,&slen); 463 if (s) { 464 SvCUR_set(src,slen); 465 SvUTF8_off(src); 466 e = s+slen; 467 } 468 else { 469 croak("Cannot decode string with wide characters"); 470 } 471 } 472 473 dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */ 474 s = process_utf8(aTHX_ dst, s, e, check_sv, 0, strict_utf8(aTHX_ obj), renewed); 475 476 /* Clear out translated part of source unless asked not to */ 477 if (check && !(check & ENCODE_LEAVE_SRC)){ 478 slen = e-s; 479 if (slen) { 480 sv_setpvn(src, (char*)s, slen); 481 } 482 SvCUR_set(src, slen); 483 } 484 SvUTF8_on(dst); 485 ST(0) = dst; 486 XSRETURN(1); 487 } 488 489 void 490 Method_encode_xs(obj,src,check_sv = &PL_sv_no) 491 SV * obj 492 SV * src 493 SV * check_sv 494 PREINIT: 495 STRLEN slen; 496 U8 *s; 497 U8 *e; 498 SV *dst; 499 bool renewed = 0; 500 int check; 501 CODE: 502 { 503 check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv); 504 if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0)); 505 s = (U8 *) SvPV(src, slen); 506 e = (U8 *) SvEND(src); 507 dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */ 508 if (SvUTF8(src)) { 509 /* Already encoded */ 510 if (strict_utf8(aTHX_ obj)) { 511 s = process_utf8(aTHX_ dst, s, e, check_sv, 1, 1, 0); 512 } 513 else { 514 /* trust it and just copy the octets */ 515 sv_setpvn(dst,(char *)s,(e-s)); 516 s = e; 517 } 518 } 519 else { 520 /* Native bytes - can always encode */ 521 U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */ 522 while (s < e) { 523 UV uv = NATIVE_TO_UNI((UV) *s); 524 s++; /* Above expansion of NATIVE_TO_UNI() is safer this way. */ 525 if (UNI_IS_INVARIANT(uv)) 526 *d++ = (U8)UTF_TO_NATIVE(uv); 527 else { 528 *d++ = (U8)UTF8_EIGHT_BIT_HI(uv); 529 *d++ = (U8)UTF8_EIGHT_BIT_LO(uv); 530 } 531 } 532 SvCUR_set(dst, d- (U8 *)SvPVX(dst)); 533 *SvEND(dst) = '\0'; 534 } 535 536 /* Clear out translated part of source unless asked not to */ 537 if (check && !(check & ENCODE_LEAVE_SRC)){ 538 slen = e-s; 539 if (slen) { 540 sv_setpvn(src, (char*)s, slen); 541 } 542 SvCUR_set(src, slen); 543 } 544 SvPOK_only(dst); 545 SvUTF8_off(dst); 546 ST(0) = dst; 547 XSRETURN(1); 548 } 549 550 MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ 551 552 PROTOTYPES: ENABLE 553 554 void 555 Method_renew(obj) 556 SV * obj 557 CODE: 558 { 559 XSRETURN(1); 560 } 561 562 int 563 Method_renewed(obj) 564 SV * obj 565 CODE: 566 RETVAL = 0; 567 OUTPUT: 568 RETVAL 569 570 void 571 Method_name(obj) 572 SV * obj 573 CODE: 574 { 575 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 576 ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0]))); 577 XSRETURN(1); 578 } 579 580 void 581 Method_cat_decode(obj, dst, src, off, term, check_sv = &PL_sv_no) 582 SV * obj 583 SV * dst 584 SV * src 585 SV * off 586 SV * term 587 SV * check_sv 588 CODE: 589 { 590 int check; 591 SV *fallback_cb = &PL_sv_undef; 592 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 593 STRLEN offset = (STRLEN)SvIV(off); 594 int code = 0; 595 if (SvUTF8(src)) { 596 sv_utf8_downgrade(src, FALSE); 597 } 598 if (SvROK(check_sv)){ 599 fallback_cb = check_sv; 600 check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ 601 }else{ 602 check = SvIV(check_sv); 603 } 604 sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, check, 605 &offset, term, &code, fallback_cb)); 606 SvIV_set(off, (IV)offset); 607 if (code == ENCODE_FOUND_TERM) { 608 ST(0) = &PL_sv_yes; 609 }else{ 610 ST(0) = &PL_sv_no; 611 } 612 XSRETURN(1); 613 } 614 615 void 616 Method_decode(obj,src,check_sv = &PL_sv_no) 617 SV * obj 618 SV * src 619 SV * check_sv 620 CODE: 621 { 622 int check; 623 SV *fallback_cb = &PL_sv_undef; 624 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 625 if (SvUTF8(src)) { 626 sv_utf8_downgrade(src, FALSE); 627 } 628 if (SvROK(check_sv)){ 629 fallback_cb = check_sv; 630 check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ 631 }else{ 632 check = SvIV(check_sv); 633 } 634 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check, 635 NULL, Nullsv, NULL, fallback_cb); 636 SvUTF8_on(ST(0)); 637 XSRETURN(1); 638 } 639 640 void 641 Method_encode(obj,src,check_sv = &PL_sv_no) 642 SV * obj 643 SV * src 644 SV * check_sv 645 CODE: 646 { 647 int check; 648 SV *fallback_cb = &PL_sv_undef; 649 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 650 sv_utf8_upgrade(src); 651 if (SvROK(check_sv)){ 652 fallback_cb = check_sv; 653 check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ 654 }else{ 655 check = SvIV(check_sv); 656 } 657 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check, 658 NULL, Nullsv, NULL, fallback_cb); 659 XSRETURN(1); 660 } 661 662 void 663 Method_needs_lines(obj) 664 SV * obj 665 CODE: 666 { 667 /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ 668 ST(0) = &PL_sv_no; 669 XSRETURN(1); 670 } 671 672 void 673 Method_perlio_ok(obj) 674 SV * obj 675 CODE: 676 { 677 /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ 678 /* require_pv(PERLIO_FILENAME); */ 679 680 eval_pv("require PerlIO::encoding", 0); 681 682 if (SvTRUE(get_sv("@", 0))) { 683 ST(0) = &PL_sv_no; 684 }else{ 685 ST(0) = &PL_sv_yes; 686 } 687 XSRETURN(1); 688 } 689 690 void 691 Method_mime_name(obj) 692 SV * obj 693 CODE: 694 { 695 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); 696 SV *retval; 697 eval_pv("require Encode::MIME::Name", 0); 698 699 if (SvTRUE(get_sv("@", 0))) { 700 ST(0) = &PL_sv_undef; 701 }else{ 702 ENTER; 703 SAVETMPS; 704 PUSHMARK(sp); 705 XPUSHs(sv_2mortal(newSVpvn(enc->name[0], strlen(enc->name[0])))); 706 PUTBACK; 707 call_pv("Encode::MIME::Name::get_mime_name", G_SCALAR); 708 SPAGAIN; 709 retval = newSVsv(POPs); 710 PUTBACK; 711 FREETMPS; 712 LEAVE; 713 /* enc->name[0] */ 714 ST(0) = retval; 715 } 716 XSRETURN(1); 717 } 718 719 MODULE = Encode PACKAGE = Encode 720 721 PROTOTYPES: ENABLE 722 723 I32 724 _bytes_to_utf8(sv, ...) 725 SV * sv 726 CODE: 727 { 728 SV * encoding = items == 2 ? ST(1) : Nullsv; 729 730 if (encoding) 731 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); 732 else { 733 STRLEN len; 734 U8* s = (U8*)SvPV(sv, len); 735 U8* converted; 736 737 converted = bytes_to_utf8(s, &len); /* This allocs */ 738 sv_setpvn(sv, (char *)converted, len); 739 SvUTF8_on(sv); /* XXX Should we? */ 740 Safefree(converted); /* ... so free it */ 741 RETVAL = len; 742 } 743 } 744 OUTPUT: 745 RETVAL 746 747 I32 748 _utf8_to_bytes(sv, ...) 749 SV * sv 750 CODE: 751 { 752 SV * to = items > 1 ? ST(1) : Nullsv; 753 SV * check = items > 2 ? ST(2) : Nullsv; 754 755 if (to) { 756 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); 757 } else { 758 STRLEN len; 759 U8 *s = (U8*)SvPV(sv, len); 760 761 RETVAL = 0; 762 if (SvTRUE(check)) { 763 /* Must do things the slow way */ 764 U8 *dest; 765 /* We need a copy to pass to check() */ 766 U8 *src = s; 767 U8 *send = s + len; 768 U8 *d0; 769 770 New(83, dest, len, U8); /* I think */ 771 d0 = dest; 772 773 while (s < send) { 774 if (*s < 0x80){ 775 *dest++ = *s++; 776 } else { 777 STRLEN ulen; 778 UV uv = *s++; 779 780 /* Have to do it all ourselves because of error routine, 781 aargh. */ 782 if (!(uv & 0x40)){ goto failure; } 783 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } 784 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } 785 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } 786 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } 787 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } 788 else if (!(uv & 0x01)) { ulen = 7; uv = 0; } 789 else { ulen = 13; uv = 0; } 790 791 /* Note change to utf8.c variable naming, for variety */ 792 while (ulen--) { 793 if ((*s & 0xc0) != 0x80){ 794 goto failure; 795 } else { 796 uv = (uv << 6) | (*s++ & 0x3f); 797 } 798 } 799 if (uv > 256) { 800 failure: 801 call_failure(check, s, dest, src); 802 /* Now what happens? */ 803 } 804 *dest++ = (U8)uv; 805 } 806 } 807 RETVAL = dest - d0; 808 sv_usepvn(sv, (char *)dest, RETVAL); 809 SvUTF8_off(sv); 810 } else { 811 RETVAL = (utf8_to_bytes(s, &len) ? len : 0); 812 } 813 } 814 } 815 OUTPUT: 816 RETVAL 817 818 bool 819 is_utf8(sv, check = 0) 820 SV * sv 821 int check 822 CODE: 823 { 824 if (SvGMAGICAL(sv)) /* it could be $1, for example */ 825 sv = newSVsv(sv); /* GMAGIG will be done */ 826 RETVAL = SvUTF8(sv) ? TRUE : FALSE; 827 if (RETVAL && 828 check && 829 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) 830 RETVAL = FALSE; 831 if (sv != ST(0)) 832 SvREFCNT_dec(sv); /* it was a temp copy */ 833 } 834 OUTPUT: 835 RETVAL 836 837 SV * 838 _utf8_on(sv) 839 SV * sv 840 CODE: 841 { 842 if (SvPOK(sv)) { 843 SV *rsv = newSViv(SvUTF8(sv)); 844 RETVAL = rsv; 845 SvUTF8_on(sv); 846 } else { 847 RETVAL = &PL_sv_undef; 848 } 849 } 850 OUTPUT: 851 RETVAL 852 853 SV * 854 _utf8_off(sv) 855 SV * sv 856 CODE: 857 { 858 if (SvPOK(sv)) { 859 SV *rsv = newSViv(SvUTF8(sv)); 860 RETVAL = rsv; 861 SvUTF8_off(sv); 862 } else { 863 RETVAL = &PL_sv_undef; 864 } 865 } 866 OUTPUT: 867 RETVAL 868 869 int 870 DIE_ON_ERR() 871 CODE: 872 RETVAL = ENCODE_DIE_ON_ERR; 873 OUTPUT: 874 RETVAL 875 876 int 877 WARN_ON_ERR() 878 CODE: 879 RETVAL = ENCODE_WARN_ON_ERR; 880 OUTPUT: 881 RETVAL 882 883 int 884 LEAVE_SRC() 885 CODE: 886 RETVAL = ENCODE_LEAVE_SRC; 887 OUTPUT: 888 RETVAL 889 890 int 891 RETURN_ON_ERR() 892 CODE: 893 RETVAL = ENCODE_RETURN_ON_ERR; 894 OUTPUT: 895 RETVAL 896 897 int 898 PERLQQ() 899 CODE: 900 RETVAL = ENCODE_PERLQQ; 901 OUTPUT: 902 RETVAL 903 904 int 905 HTMLCREF() 906 CODE: 907 RETVAL = ENCODE_HTMLCREF; 908 OUTPUT: 909 RETVAL 910 911 int 912 XMLCREF() 913 CODE: 914 RETVAL = ENCODE_XMLCREF; 915 OUTPUT: 916 RETVAL 917 918 int 919 STOP_AT_PARTIAL() 920 CODE: 921 RETVAL = ENCODE_STOP_AT_PARTIAL; 922 OUTPUT: 923 RETVAL 924 925 int 926 FB_DEFAULT() 927 CODE: 928 RETVAL = ENCODE_FB_DEFAULT; 929 OUTPUT: 930 RETVAL 931 932 int 933 FB_CROAK() 934 CODE: 935 RETVAL = ENCODE_FB_CROAK; 936 OUTPUT: 937 RETVAL 938 939 int 940 FB_QUIET() 941 CODE: 942 RETVAL = ENCODE_FB_QUIET; 943 OUTPUT: 944 RETVAL 945 946 int 947 FB_WARN() 948 CODE: 949 RETVAL = ENCODE_FB_WARN; 950 OUTPUT: 951 RETVAL 952 953 int 954 FB_PERLQQ() 955 CODE: 956 RETVAL = ENCODE_FB_PERLQQ; 957 OUTPUT: 958 RETVAL 959 960 int 961 FB_HTMLCREF() 962 CODE: 963 RETVAL = ENCODE_FB_HTMLCREF; 964 OUTPUT: 965 RETVAL 966 967 int 968 FB_XMLCREF() 969 CODE: 970 RETVAL = ENCODE_FB_XMLCREF; 971 OUTPUT: 972 RETVAL 973 974 BOOT: 975 { 976 #include "def_t.h" 977 #include "def_t.exh" 978 } 979