xref: /openbsd-src/gnu/usr.bin/perl/cpan/Encode/Encode.xs (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*
2  $Id: Encode.xs,v 2.27 2014/04/29 16:25:06 dankogai Exp dankogai $
3  */
4 
5 #define PERL_NO_GET_CONTEXT
6 #include "EXTERN.h"
7 #include "perl.h"
8 #include "XSUB.h"
9 #include "encode.h"
10 
11 # define PERLIO_MODNAME  "PerlIO::encoding"
12 # define PERLIO_FILENAME "PerlIO/encoding.pm"
13 
14 /* set 1 or more to profile.  t/encoding.t dumps core because of
15    Perl_warner and PerlIO don't work well */
16 #define ENCODE_XS_PROFILE 0
17 
18 /* set 0 to disable floating point to calculate buffer size for
19    encode_method().  1 is recommended. 2 restores NI-S original */
20 #define ENCODE_XS_USEFP   1
21 
22 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX;   \
23                          Perl_croak(aTHX_ "panic_unimplemented"); \
24              return (y)0; /* fool picky compilers */ \
25                          }
26 /**/
27 
28 UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
29 UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
30 
31 #ifdef UTF8_DISALLOW_ILLEGAL_INTERCHANGE
32 #   define UTF8_ALLOW_STRICT UTF8_DISALLOW_ILLEGAL_INTERCHANGE
33 #else
34 #   define UTF8_ALLOW_STRICT 0
35 #endif
36 
37 #define UTF8_ALLOW_NONSTRICT (UTF8_ALLOW_ANY &                    \
38                               ~(UTF8_ALLOW_CONTINUATION |         \
39                                 UTF8_ALLOW_NON_CONTINUATION |     \
40                                 UTF8_ALLOW_LONG))
41 
42 void
43 Encode_XSEncoding(pTHX_ encode_t * enc)
44 {
45     dSP;
46     HV *stash = gv_stashpv("Encode::XS", TRUE);
47     SV *iv    = newSViv(PTR2IV(enc));
48     SV *sv    = sv_bless(newRV_noinc(iv),stash);
49     int i = 0;
50     /* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's
51     constness, in the hope that perl won't mess with it. */
52     assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0);
53     SvFLAGS(iv) |= SVp_POK;
54     SvPVX(iv) = (char*) enc->name[0];
55     PUSHMARK(sp);
56     XPUSHs(sv);
57     while (enc->name[i]) {
58     const char *name = enc->name[i++];
59     XPUSHs(sv_2mortal(newSVpvn(name, strlen(name))));
60     }
61     PUTBACK;
62     call_pv("Encode::define_encoding", G_DISCARD);
63     SvREFCNT_dec(sv);
64 }
65 
66 void
67 call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig)
68 {
69     /* Exists for breakpointing */
70 }
71 
72 
73 #define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s"
74 #define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode"
75 
76 static SV *
77 do_fallback_cb(pTHX_ UV ch, SV *fallback_cb)
78 {
79     dSP;
80     int argc;
81     SV *retval = newSVpv("",0);
82     ENTER;
83     SAVETMPS;
84     PUSHMARK(sp);
85     XPUSHs(sv_2mortal(newSVnv((UV)ch)));
86     PUTBACK;
87     argc = call_sv(fallback_cb, G_SCALAR);
88     SPAGAIN;
89     if (argc != 1){
90 	croak("fallback sub must return scalar!");
91     }
92     sv_catsv(retval, POPs);
93     PUTBACK;
94     FREETMPS;
95     LEAVE;
96     return retval;
97 }
98 
99 static SV *
100 encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src,
101 	      int check, STRLEN * offset, SV * term, int * retcode,
102 	      SV *fallback_cb)
103 {
104     STRLEN slen;
105     U8 *s = (U8 *) SvPV(src, slen);
106     STRLEN tlen  = slen;
107     STRLEN ddone = 0;
108     STRLEN sdone = 0;
109     /* We allocate slen+1.
110        PerlIO dumps core if this value is smaller than this. */
111     SV *dst = sv_2mortal(newSV(slen+1));
112     U8 *d = (U8 *)SvPVX(dst);
113     STRLEN dlen = SvLEN(dst)-1;
114     int code = 0;
115     STRLEN trmlen = 0;
116     U8 *trm = term ? (U8*) SvPV(term, trmlen) : NULL;
117 
118     if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */
119 
120     if (offset) {
121       s += *offset;
122       if (slen > *offset){ /* safeguard against slen overflow */
123       slen -= *offset;
124       }else{
125       slen = 0;
126       }
127       tlen = slen;
128     }
129 
130     if (slen == 0){
131     SvCUR_set(dst, 0);
132     SvPOK_only(dst);
133     goto ENCODE_END;
134     }
135 
136     while( (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check,
137                  trm, trmlen)) )
138     {
139     SvCUR_set(dst, dlen+ddone);
140     SvPOK_only(dst);
141 
142     if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL ||
143         code == ENCODE_FOUND_TERM) {
144         break;
145     }
146     switch (code) {
147     case ENCODE_NOSPACE:
148     {
149         STRLEN more = 0; /* make sure you initialize! */
150         STRLEN sleft;
151         sdone += slen;
152         ddone += dlen;
153         sleft = tlen - sdone;
154 #if ENCODE_XS_PROFILE >= 2
155         Perl_warn(aTHX_
156               "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n",
157               more, sdone, sleft, SvLEN(dst));
158 #endif
159         if (sdone != 0) { /* has src ever been processed ? */
160 #if   ENCODE_XS_USEFP == 2
161         more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone
162             - SvLEN(dst);
163 #elif ENCODE_XS_USEFP
164         more = (STRLEN)((1.0*SvLEN(dst)+1)/sdone * sleft);
165 #else
166         /* safe until SvLEN(dst) == MAX_INT/16 */
167         more = (16*SvLEN(dst)+1)/sdone/16 * sleft;
168 #endif
169         }
170         more += UTF8_MAXLEN; /* insurance policy */
171         d = (U8 *) SvGROW(dst, SvLEN(dst) + more);
172         /* dst need to grow need MORE bytes! */
173         if (ddone >= SvLEN(dst)) {
174         Perl_croak(aTHX_ "Destination couldn't be grown.");
175         }
176         dlen = SvLEN(dst)-ddone-1;
177         d   += ddone;
178         s   += slen;
179         slen = tlen-sdone;
180         continue;
181     }
182     case ENCODE_NOREP:
183         /* encoding */
184         if (dir == enc->f_utf8) {
185         STRLEN clen;
186         UV ch =
187             utf8n_to_uvuni(s+slen, (SvCUR(src)-slen),
188                    &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY);
189         /* if non-representable multibyte prefix at end of current buffer - break*/
190         if (clen > tlen - sdone) break;
191         if (check & ENCODE_DIE_ON_ERR) {
192             Perl_croak(aTHX_ ERR_ENCODE_NOMAP,
193                    (UV)ch, enc->name[0]);
194             return &PL_sv_undef; /* never reaches but be safe */
195         }
196         if (check & ENCODE_WARN_ON_ERR){
197             Perl_warner(aTHX_ packWARN(WARN_UTF8),
198                 ERR_ENCODE_NOMAP, (UV)ch, enc->name[0]);
199         }
200         if (check & ENCODE_RETURN_ON_ERR){
201             goto ENCODE_SET_SRC;
202         }
203         if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
204             SV* subchar =
205             (fallback_cb != &PL_sv_undef)
206 		? do_fallback_cb(aTHX_ ch, fallback_cb)
207 		: newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04"UVxf"}" :
208                  check & ENCODE_HTMLCREF ? "&#%" UVuf ";" :
209                  "&#x%" UVxf ";", (UV)ch);
210 	    SvUTF8_off(subchar); /* make sure no decoded string gets in */
211             sdone += slen + clen;
212             ddone += dlen + SvCUR(subchar);
213             sv_catsv(dst, subchar);
214             SvREFCNT_dec(subchar);
215         } else {
216             /* fallback char */
217             sdone += slen + clen;
218             ddone += dlen + enc->replen;
219             sv_catpvn(dst, (char*)enc->rep, enc->replen);
220         }
221         }
222         /* decoding */
223         else {
224         if (check & ENCODE_DIE_ON_ERR){
225             Perl_croak(aTHX_ ERR_DECODE_NOMAP,
226                               enc->name[0], (UV)s[slen]);
227             return &PL_sv_undef; /* never reaches but be safe */
228         }
229         if (check & ENCODE_WARN_ON_ERR){
230             Perl_warner(
231             aTHX_ packWARN(WARN_UTF8),
232             ERR_DECODE_NOMAP,
233                	        enc->name[0], (UV)s[slen]);
234         }
235         if (check & ENCODE_RETURN_ON_ERR){
236             goto ENCODE_SET_SRC;
237         }
238         if (check &
239             (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
240             SV* subchar =
241             (fallback_cb != &PL_sv_undef)
242 		? do_fallback_cb(aTHX_ (UV)s[slen], fallback_cb)
243 		: newSVpvf("\\x%02" UVXf, (UV)s[slen]);
244             sdone += slen + 1;
245             ddone += dlen + SvCUR(subchar);
246             sv_catsv(dst, subchar);
247             SvREFCNT_dec(subchar);
248         } else {
249             sdone += slen + 1;
250             ddone += dlen + strlen(FBCHAR_UTF8);
251             sv_catpv(dst, FBCHAR_UTF8);
252         }
253         }
254         /* settle variables when fallback */
255         d    = (U8 *)SvEND(dst);
256             dlen = SvLEN(dst) - ddone - 1;
257         s    = (U8*)SvPVX(src) + sdone;
258         slen = tlen - sdone;
259         break;
260 
261     default:
262         Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
263                code, (dir == enc->f_utf8) ? "to" : "from",
264                enc->name[0]);
265         return &PL_sv_undef;
266     }
267     }
268  ENCODE_SET_SRC:
269     if (check && !(check & ENCODE_LEAVE_SRC)){
270     sdone = SvCUR(src) - (slen+sdone);
271     if (sdone) {
272         sv_setpvn(src, (char*)s+slen, sdone);
273     }
274     SvCUR_set(src, sdone);
275     }
276     /* warn("check = 0x%X, code = 0x%d\n", check, code); */
277 
278     SvCUR_set(dst, dlen+ddone);
279     SvPOK_only(dst);
280 
281 #if ENCODE_XS_PROFILE
282     if (SvCUR(dst) > SvCUR(src)){
283     Perl_warn(aTHX_
284           "SvLEN(dst)=%d, SvCUR(dst)=%d. %d bytes unused(%f %%)\n",
285           SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst),
286           (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0);
287     }
288 #endif
289 
290     if (offset)
291       *offset += sdone + slen;
292 
293  ENCODE_END:
294     *SvEND(dst) = '\0';
295     if (retcode) *retcode = code;
296     return dst;
297 }
298 
299 static bool
300 strict_utf8(pTHX_ SV* sv)
301 {
302     HV* hv;
303     SV** svp;
304     sv = SvRV(sv);
305     if (!sv || SvTYPE(sv) != SVt_PVHV)
306         return 0;
307     hv = (HV*)sv;
308     svp = hv_fetch(hv, "strict_utf8", 11, 0);
309     if (!svp)
310         return 0;
311     return SvTRUE(*svp);
312 }
313 
314 static U8*
315 process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
316              bool encode, bool strict, bool stop_at_partial)
317 {
318     UV uv;
319     STRLEN ulen;
320     SV *fallback_cb;
321     int check;
322 
323     if (SvROK(check_sv)) {
324 	/* croak("UTF-8 decoder doesn't support callback CHECK"); */
325 	fallback_cb = check_sv;
326 	check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as perlqq */
327     }
328     else {
329 	fallback_cb = &PL_sv_undef;
330 	check = SvIV(check_sv);
331     }
332 
333     SvPOK_only(dst);
334     SvCUR_set(dst,0);
335 
336     while (s < e) {
337         if (UTF8_IS_INVARIANT(*s)) {
338             sv_catpvn(dst, (char *)s, 1);
339             s++;
340             continue;
341         }
342 
343         if (UTF8_IS_START(*s)) {
344             U8 skip = UTF8SKIP(s);
345             if ((s + skip) > e) {
346                 /* Partial character */
347                 /* XXX could check that rest of bytes are UTF8_IS_CONTINUATION(ch) */
348                 if (stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL))
349                     break;
350 
351                 goto malformed_byte;
352             }
353 
354             uv = utf8n_to_uvuni(s, e - s, &ulen,
355                                 UTF8_CHECK_ONLY | (strict ? UTF8_ALLOW_STRICT :
356                                                             UTF8_ALLOW_NONSTRICT)
357                                );
358 #if 1 /* perl-5.8.6 and older do not check UTF8_ALLOW_LONG */
359         if (strict && uv > PERL_UNICODE_MAX)
360         ulen = (STRLEN) -1;
361 #endif
362             if (ulen == -1) {
363                 if (strict) {
364                     uv = utf8n_to_uvuni(s, e - s, &ulen,
365                                         UTF8_CHECK_ONLY | UTF8_ALLOW_NONSTRICT);
366                     if (ulen == -1)
367                         goto malformed_byte;
368                     goto malformed;
369                 }
370                 goto malformed_byte;
371             }
372 
373 
374              /* Whole char is good */
375              sv_catpvn(dst,(char *)s,skip);
376              s += skip;
377              continue;
378         }
379 
380         /* If we get here there is something wrong with alleged UTF-8 */
381     malformed_byte:
382         uv = (UV)*s;
383         ulen = 1;
384 
385     malformed:
386         if (check & ENCODE_DIE_ON_ERR){
387             if (encode)
388                 Perl_croak(aTHX_ ERR_ENCODE_NOMAP, uv, "utf8");
389             else
390                 Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", uv);
391         }
392         if (check & ENCODE_WARN_ON_ERR){
393             if (encode)
394                 Perl_warner(aTHX_ packWARN(WARN_UTF8),
395                             ERR_ENCODE_NOMAP, uv, "utf8");
396             else
397                 Perl_warner(aTHX_ packWARN(WARN_UTF8),
398                             ERR_DECODE_NOMAP, "utf8", uv);
399         }
400         if (check & ENCODE_RETURN_ON_ERR) {
401                 break;
402         }
403         if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
404 	    SV* subchar =
405 		(fallback_cb != &PL_sv_undef)
406 		? do_fallback_cb(aTHX_ uv, fallback_cb)
407 		: newSVpvf(check & ENCODE_PERLQQ
408 			   ? (ulen == 1 ? "\\x%02" UVXf : "\\x{%04" UVXf "}")
409 			   :  check & ENCODE_HTMLCREF ? "&#%" UVuf ";"
410 			   : "&#x%" UVxf ";", uv);
411 	    if (encode){
412 		SvUTF8_off(subchar); /* make sure no decoded string gets in */
413 	    }
414             sv_catsv(dst, subchar);
415             SvREFCNT_dec(subchar);
416         } else {
417             sv_catpv(dst, FBCHAR_UTF8);
418         }
419         s += ulen;
420     }
421     *SvEND(dst) = '\0';
422 
423     return s;
424 }
425 
426 
427 MODULE = Encode		PACKAGE = Encode::utf8	PREFIX = Method_
428 
429 PROTOTYPES: DISABLE
430 
431 void
432 Method_decode_xs(obj,src,check_sv = &PL_sv_no)
433 SV *	obj
434 SV *	src
435 SV *	check_sv
436 PREINIT:
437     STRLEN slen;
438     U8 *s;
439     U8 *e;
440     SV *dst;
441     bool renewed = 0;
442     int check;
443 CODE:
444 {
445     dSP; ENTER; SAVETMPS;
446     if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0));
447     s = (U8 *) SvPV(src, slen);
448     e = (U8 *) SvEND(src);
449     check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv);
450     /*
451      * PerlIO check -- we assume the object is of PerlIO if renewed
452      */
453     PUSHMARK(sp);
454     XPUSHs(obj);
455     PUTBACK;
456     if (call_method("renewed",G_SCALAR) == 1) {
457     SPAGAIN;
458     renewed = (bool)POPi;
459     PUTBACK;
460 #if 0
461     fprintf(stderr, "renewed == %d\n", renewed);
462 #endif
463     }
464     FREETMPS; LEAVE;
465     /* end PerlIO check */
466 
467     if (SvUTF8(src)) {
468     s = utf8_to_bytes(s,&slen);
469     if (s) {
470         SvCUR_set(src,slen);
471         SvUTF8_off(src);
472         e = s+slen;
473     }
474     else {
475         croak("Cannot decode string with wide characters");
476     }
477     }
478 
479     dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */
480     s = process_utf8(aTHX_ dst, s, e, check_sv, 0, strict_utf8(aTHX_ obj), renewed);
481 
482     /* Clear out translated part of source unless asked not to */
483     if (check && !(check & ENCODE_LEAVE_SRC)){
484     slen = e-s;
485     if (slen) {
486         sv_setpvn(src, (char*)s, slen);
487     }
488     SvCUR_set(src, slen);
489     }
490     SvUTF8_on(dst);
491     if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */
492     ST(0) = dst;
493     XSRETURN(1);
494 }
495 
496 void
497 Method_encode_xs(obj,src,check_sv = &PL_sv_no)
498 SV *	obj
499 SV *	src
500 SV *	check_sv
501 PREINIT:
502     STRLEN slen;
503     U8 *s;
504     U8 *e;
505     SV *dst;
506     bool renewed = 0;
507     int check;
508 CODE:
509 {
510     check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv);
511     if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0));
512     s = (U8 *) SvPV(src, slen);
513     e = (U8 *) SvEND(src);
514     dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */
515     if (SvUTF8(src)) {
516     /* Already encoded */
517     if (strict_utf8(aTHX_ obj)) {
518         s = process_utf8(aTHX_ dst, s, e, check_sv, 1, 1, 0);
519     }
520         else {
521             /* trust it and just copy the octets */
522     	    sv_setpvn(dst,(char *)s,(e-s));
523         s = e;
524         }
525     }
526     else {
527     	/* Native bytes - can always encode */
528     U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */
529     	while (s < e) {
530     	    UV uv = NATIVE_TO_UNI((UV) *s);
531 	    s++; /* Above expansion of NATIVE_TO_UNI() is safer this way. */
532             if (UNI_IS_INVARIANT(uv))
533             	*d++ = (U8)UTF_TO_NATIVE(uv);
534             else {
535     	        *d++ = (U8)UTF8_EIGHT_BIT_HI(uv);
536                 *d++ = (U8)UTF8_EIGHT_BIT_LO(uv);
537             }
538     }
539         SvCUR_set(dst, d- (U8 *)SvPVX(dst));
540     	*SvEND(dst) = '\0';
541     }
542 
543     /* Clear out translated part of source unless asked not to */
544     if (check && !(check & ENCODE_LEAVE_SRC)){
545     slen = e-s;
546     if (slen) {
547         sv_setpvn(src, (char*)s, slen);
548     }
549     SvCUR_set(src, slen);
550     }
551     SvPOK_only(dst);
552     SvUTF8_off(dst);
553     if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */
554     ST(0) = dst;
555     XSRETURN(1);
556 }
557 
558 MODULE = Encode		PACKAGE = Encode::XS	PREFIX = Method_
559 
560 PROTOTYPES: ENABLE
561 
562 void
563 Method_renew(obj)
564 SV *	obj
565 CODE:
566 {
567     XSRETURN(1);
568 }
569 
570 int
571 Method_renewed(obj)
572 SV *    obj
573 CODE:
574     RETVAL = 0;
575 OUTPUT:
576     RETVAL
577 
578 void
579 Method_name(obj)
580 SV *	obj
581 CODE:
582 {
583     encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
584     ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0])));
585     XSRETURN(1);
586 }
587 
588 void
589 Method_cat_decode(obj, dst, src, off, term, check_sv = &PL_sv_no)
590 SV *	obj
591 SV *	dst
592 SV *	src
593 SV *	off
594 SV *	term
595 SV *    check_sv
596 CODE:
597 {
598     int check;
599     SV *fallback_cb = &PL_sv_undef;
600     encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
601     STRLEN offset = (STRLEN)SvIV(off);
602     int code = 0;
603     if (SvUTF8(src)) {
604     	sv_utf8_downgrade(src, FALSE);
605     }
606     if (SvROK(check_sv)){
607 	fallback_cb = check_sv;
608 	check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */
609     }else{
610 	check = SvIV(check_sv);
611     }
612     sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, check,
613                 &offset, term, &code, fallback_cb));
614     SvIV_set(off, (IV)offset);
615     if (code == ENCODE_FOUND_TERM) {
616     ST(0) = &PL_sv_yes;
617     }else{
618     ST(0) = &PL_sv_no;
619     }
620     XSRETURN(1);
621 }
622 
623 void
624 Method_decode(obj,src,check_sv = &PL_sv_no)
625 SV *	obj
626 SV *	src
627 SV *	check_sv
628 CODE:
629 {
630     int check;
631     SV *fallback_cb = &PL_sv_undef;
632     encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
633     if (SvUTF8(src)) {
634     	sv_utf8_downgrade(src, FALSE);
635     }
636     if (SvROK(check_sv)){
637 	fallback_cb = check_sv;
638 	check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */
639     }else{
640 	check = SvIV(check_sv);
641     }
642     ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check,
643               NULL, Nullsv, NULL, fallback_cb);
644     SvUTF8_on(ST(0));
645     XSRETURN(1);
646 }
647 
648 void
649 Method_encode(obj,src,check_sv = &PL_sv_no)
650 SV *	obj
651 SV *	src
652 SV *	check_sv
653 CODE:
654 {
655     int check;
656     SV *fallback_cb = &PL_sv_undef;
657     encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
658     sv_utf8_upgrade(src);
659     if (SvROK(check_sv)){
660 	fallback_cb = check_sv;
661 	check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */
662     }else{
663 	check = SvIV(check_sv);
664     }
665     ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check,
666               NULL, Nullsv, NULL, fallback_cb);
667     XSRETURN(1);
668 }
669 
670 void
671 Method_needs_lines(obj)
672 SV *	obj
673 CODE:
674 {
675     /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */
676     ST(0) = &PL_sv_no;
677     XSRETURN(1);
678 }
679 
680 void
681 Method_perlio_ok(obj)
682 SV *	obj
683 CODE:
684 {
685     /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */
686     /* require_pv(PERLIO_FILENAME); */
687 
688     eval_pv("require PerlIO::encoding", 0);
689 
690     if (SvTRUE(get_sv("@", 0))) {
691     ST(0) = &PL_sv_no;
692     }else{
693     ST(0) = &PL_sv_yes;
694     }
695     XSRETURN(1);
696 }
697 
698 void
699 Method_mime_name(obj)
700 SV *	obj
701 CODE:
702 {
703     encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
704     SV *retval;
705     eval_pv("require Encode::MIME::Name", 0);
706 
707     if (SvTRUE(get_sv("@", 0))) {
708 	ST(0) = &PL_sv_undef;
709     }else{
710 	ENTER;
711 	SAVETMPS;
712 	PUSHMARK(sp);
713 	XPUSHs(sv_2mortal(newSVpvn(enc->name[0], strlen(enc->name[0]))));
714 	PUTBACK;
715 	call_pv("Encode::MIME::Name::get_mime_name", G_SCALAR);
716 	SPAGAIN;
717 	retval = newSVsv(POPs);
718 	PUTBACK;
719 	FREETMPS;
720 	LEAVE;
721 	/* enc->name[0] */
722 	ST(0) = retval;
723     }
724     XSRETURN(1);
725 }
726 
727 MODULE = Encode         PACKAGE = Encode
728 
729 PROTOTYPES: ENABLE
730 
731 I32
732 _bytes_to_utf8(sv, ...)
733 SV *    sv
734 CODE:
735 {
736     SV * encoding = items == 2 ? ST(1) : Nullsv;
737 
738     if (encoding)
739     RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
740     else {
741     STRLEN len;
742     U8*    s = (U8*)SvPV(sv, len);
743     U8*    converted;
744 
745     converted = bytes_to_utf8(s, &len); /* This allocs */
746     sv_setpvn(sv, (char *)converted, len);
747     SvUTF8_on(sv); /* XXX Should we? */
748     Safefree(converted);                /* ... so free it */
749     RETVAL = len;
750     }
751 }
752 OUTPUT:
753     RETVAL
754 
755 I32
756 _utf8_to_bytes(sv, ...)
757 SV *    sv
758 CODE:
759 {
760     SV * to    = items > 1 ? ST(1) : Nullsv;
761     SV * check = items > 2 ? ST(2) : Nullsv;
762 
763     if (to) {
764     RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
765     } else {
766     STRLEN len;
767     U8 *s = (U8*)SvPV(sv, len);
768 
769     RETVAL = 0;
770     if (SvTRUE(check)) {
771         /* Must do things the slow way */
772         U8 *dest;
773             /* We need a copy to pass to check() */
774         U8 *src  = s;
775         U8 *send = s + len;
776         U8 *d0;
777 
778         New(83, dest, len, U8); /* I think */
779         d0 = dest;
780 
781         while (s < send) {
782                 if (*s < 0x80){
783             *dest++ = *s++;
784                 } else {
785             STRLEN ulen;
786             UV uv = *s++;
787 
788             /* Have to do it all ourselves because of error routine,
789                aargh. */
790             if (!(uv & 0x40)){ goto failure; }
791             if      (!(uv & 0x20)) { ulen = 2;  uv &= 0x1f; }
792             else if (!(uv & 0x10)) { ulen = 3;  uv &= 0x0f; }
793             else if (!(uv & 0x08)) { ulen = 4;  uv &= 0x07; }
794             else if (!(uv & 0x04)) { ulen = 5;  uv &= 0x03; }
795             else if (!(uv & 0x02)) { ulen = 6;  uv &= 0x01; }
796             else if (!(uv & 0x01)) { ulen = 7;  uv = 0; }
797             else                   { ulen = 13; uv = 0; }
798 
799             /* Note change to utf8.c variable naming, for variety */
800             while (ulen--) {
801             if ((*s & 0xc0) != 0x80){
802                 goto failure;
803             } else {
804                 uv = (uv << 6) | (*s++ & 0x3f);
805             }
806           }
807           if (uv > 256) {
808           failure:
809               call_failure(check, s, dest, src);
810               /* Now what happens? */
811           }
812           *dest++ = (U8)uv;
813         }
814         }
815         RETVAL = dest - d0;
816         sv_usepvn(sv, (char *)dest, RETVAL);
817         SvUTF8_off(sv);
818     } else {
819         RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
820     }
821     }
822 }
823 OUTPUT:
824     RETVAL
825 
826 bool
827 is_utf8(sv, check = 0)
828 SV *	sv
829 int	check
830 CODE:
831 {
832     if (SvGMAGICAL(sv)) /* it could be $1, for example */
833     sv = newSVsv(sv); /* GMAGIG will be done */
834     RETVAL = SvUTF8(sv) ? TRUE : FALSE;
835     if (RETVAL &&
836         check  &&
837         !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
838         RETVAL = FALSE;
839     if (sv != ST(0))
840     SvREFCNT_dec(sv); /* it was a temp copy */
841 }
842 OUTPUT:
843     RETVAL
844 
845 #ifndef SvIsCOW
846 # define SvIsCOW (SvREADONLY(sv) && SvFAKE(sv))
847 #endif
848 
849 SV *
850 _utf8_on(sv)
851 SV *	sv
852 CODE:
853 {
854     if (SvPOK(sv)) {
855     SV *rsv = newSViv(SvUTF8(sv));
856     RETVAL = rsv;
857     if (SvIsCOW(sv)) sv_force_normal(sv);
858     SvUTF8_on(sv);
859     } else {
860     RETVAL = &PL_sv_undef;
861     }
862 }
863 OUTPUT:
864     RETVAL
865 
866 SV *
867 _utf8_off(sv)
868 SV *	sv
869 CODE:
870 {
871     if (SvPOK(sv)) {
872     SV *rsv = newSViv(SvUTF8(sv));
873     RETVAL = rsv;
874     if (SvIsCOW(sv)) sv_force_normal(sv);
875     SvUTF8_off(sv);
876     } else {
877     RETVAL = &PL_sv_undef;
878     }
879 }
880 OUTPUT:
881     RETVAL
882 
883 int
884 DIE_ON_ERR()
885 CODE:
886     RETVAL = ENCODE_DIE_ON_ERR;
887 OUTPUT:
888     RETVAL
889 
890 int
891 WARN_ON_ERR()
892 CODE:
893     RETVAL = ENCODE_WARN_ON_ERR;
894 OUTPUT:
895     RETVAL
896 
897 int
898 LEAVE_SRC()
899 CODE:
900     RETVAL = ENCODE_LEAVE_SRC;
901 OUTPUT:
902     RETVAL
903 
904 int
905 RETURN_ON_ERR()
906 CODE:
907     RETVAL = ENCODE_RETURN_ON_ERR;
908 OUTPUT:
909     RETVAL
910 
911 int
912 PERLQQ()
913 CODE:
914     RETVAL = ENCODE_PERLQQ;
915 OUTPUT:
916     RETVAL
917 
918 int
919 HTMLCREF()
920 CODE:
921     RETVAL = ENCODE_HTMLCREF;
922 OUTPUT:
923     RETVAL
924 
925 int
926 XMLCREF()
927 CODE:
928     RETVAL = ENCODE_XMLCREF;
929 OUTPUT:
930     RETVAL
931 
932 int
933 STOP_AT_PARTIAL()
934 CODE:
935     RETVAL = ENCODE_STOP_AT_PARTIAL;
936 OUTPUT:
937     RETVAL
938 
939 int
940 FB_DEFAULT()
941 CODE:
942     RETVAL = ENCODE_FB_DEFAULT;
943 OUTPUT:
944     RETVAL
945 
946 int
947 FB_CROAK()
948 CODE:
949     RETVAL = ENCODE_FB_CROAK;
950 OUTPUT:
951     RETVAL
952 
953 int
954 FB_QUIET()
955 CODE:
956     RETVAL = ENCODE_FB_QUIET;
957 OUTPUT:
958     RETVAL
959 
960 int
961 FB_WARN()
962 CODE:
963     RETVAL = ENCODE_FB_WARN;
964 OUTPUT:
965     RETVAL
966 
967 int
968 FB_PERLQQ()
969 CODE:
970     RETVAL = ENCODE_FB_PERLQQ;
971 OUTPUT:
972     RETVAL
973 
974 int
975 FB_HTMLCREF()
976 CODE:
977     RETVAL = ENCODE_FB_HTMLCREF;
978 OUTPUT:
979     RETVAL
980 
981 int
982 FB_XMLCREF()
983 CODE:
984     RETVAL = ENCODE_FB_XMLCREF;
985 OUTPUT:
986     RETVAL
987 
988 BOOT:
989 {
990 #include "def_t.h"
991 #include "def_t.exh"
992 }
993