1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * $Id: encoding.xs,v 0.3 2002/04/21 22:14:41 dankogai Exp $
3*0Sstevel@tonic-gate  */
4*0Sstevel@tonic-gate 
5*0Sstevel@tonic-gate #define PERL_NO_GET_CONTEXT
6*0Sstevel@tonic-gate #include "EXTERN.h"
7*0Sstevel@tonic-gate #include "perl.h"
8*0Sstevel@tonic-gate #include "XSUB.h"
9*0Sstevel@tonic-gate #define U8 U8
10*0Sstevel@tonic-gate 
11*0Sstevel@tonic-gate #define OUR_DEFAULT_FB	"Encode::PERLQQ"
12*0Sstevel@tonic-gate 
13*0Sstevel@tonic-gate #if defined(USE_PERLIO) && !defined(USE_SFIO)
14*0Sstevel@tonic-gate 
15*0Sstevel@tonic-gate /* Define an encoding "layer" in the perliol.h sense.
16*0Sstevel@tonic-gate 
17*0Sstevel@tonic-gate    The layer defined here "inherits" in an object-oriented sense from
18*0Sstevel@tonic-gate    the "perlio" layer with its PerlIOBuf_* "methods".  The
19*0Sstevel@tonic-gate    implementation is particularly efficient as until Encode settles
20*0Sstevel@tonic-gate    down there is no point in tryint to tune it.
21*0Sstevel@tonic-gate 
22*0Sstevel@tonic-gate    The layer works by overloading the "fill" and "flush" methods.
23*0Sstevel@tonic-gate 
24*0Sstevel@tonic-gate    "fill" calls "SUPER::fill" in perl terms, then calls the encode OO
25*0Sstevel@tonic-gate    perl API to convert the encoded data to UTF-8 form, then copies it
26*0Sstevel@tonic-gate    back to the buffer. The "base class's" read methods then see the
27*0Sstevel@tonic-gate    UTF-8 data.
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate    "flush" transforms the UTF-8 data deposited by the "base class's
30*0Sstevel@tonic-gate    write method in the buffer back into the encoded form using the
31*0Sstevel@tonic-gate    encode OO perl API, then copies data back into the buffer and calls
32*0Sstevel@tonic-gate    "SUPER::flush.
33*0Sstevel@tonic-gate 
34*0Sstevel@tonic-gate    Note that "flush" is _also_ called for read mode - we still do the
35*0Sstevel@tonic-gate    (back)-translate so that the base class's "flush" sees the
36*0Sstevel@tonic-gate    correct number of encoded chars for positioning the seek
37*0Sstevel@tonic-gate    pointer. (This double translation is the worst performance issue -
38*0Sstevel@tonic-gate    particularly with all-perl encode engine.)
39*0Sstevel@tonic-gate 
40*0Sstevel@tonic-gate */
41*0Sstevel@tonic-gate 
42*0Sstevel@tonic-gate #include "perliol.h"
43*0Sstevel@tonic-gate 
44*0Sstevel@tonic-gate typedef struct {
45*0Sstevel@tonic-gate     PerlIOBuf base;		/* PerlIOBuf stuff */
46*0Sstevel@tonic-gate     SV *bufsv;			/* buffer seen by layers above */
47*0Sstevel@tonic-gate     SV *dataSV;			/* data we have read from layer below */
48*0Sstevel@tonic-gate     SV *enc;			/* the encoding object */
49*0Sstevel@tonic-gate     SV *chk;                    /* CHECK in Encode methods */
50*0Sstevel@tonic-gate     int flags;			/* Flags currently just needs lines */
51*0Sstevel@tonic-gate } PerlIOEncode;
52*0Sstevel@tonic-gate 
53*0Sstevel@tonic-gate #define NEEDS_LINES	1
54*0Sstevel@tonic-gate 
55*0Sstevel@tonic-gate SV *
PerlIOEncode_getarg(pTHX_ PerlIO * f,CLONE_PARAMS * param,int flags)56*0Sstevel@tonic-gate PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
57*0Sstevel@tonic-gate {
58*0Sstevel@tonic-gate     PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
59*0Sstevel@tonic-gate     SV *sv = &PL_sv_undef;
60*0Sstevel@tonic-gate     if (e->enc) {
61*0Sstevel@tonic-gate 	dSP;
62*0Sstevel@tonic-gate 	/* Not 100% sure stack swap is right thing to do during dup ... */
63*0Sstevel@tonic-gate 	PUSHSTACKi(PERLSI_MAGIC);
64*0Sstevel@tonic-gate 	SPAGAIN;
65*0Sstevel@tonic-gate 	ENTER;
66*0Sstevel@tonic-gate 	SAVETMPS;
67*0Sstevel@tonic-gate 	PUSHMARK(sp);
68*0Sstevel@tonic-gate 	XPUSHs(e->enc);
69*0Sstevel@tonic-gate 	PUTBACK;
70*0Sstevel@tonic-gate 	if (call_method("name", G_SCALAR) == 1) {
71*0Sstevel@tonic-gate 	    SPAGAIN;
72*0Sstevel@tonic-gate 	    sv = newSVsv(POPs);
73*0Sstevel@tonic-gate 	    PUTBACK;
74*0Sstevel@tonic-gate 	}
75*0Sstevel@tonic-gate 	FREETMPS;
76*0Sstevel@tonic-gate 	LEAVE;
77*0Sstevel@tonic-gate 	POPSTACK;
78*0Sstevel@tonic-gate     }
79*0Sstevel@tonic-gate     return sv;
80*0Sstevel@tonic-gate }
81*0Sstevel@tonic-gate 
82*0Sstevel@tonic-gate IV
PerlIOEncode_pushed(pTHX_ PerlIO * f,const char * mode,SV * arg,PerlIO_funcs * tab)83*0Sstevel@tonic-gate PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg, PerlIO_funcs *tab)
84*0Sstevel@tonic-gate {
85*0Sstevel@tonic-gate     PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
86*0Sstevel@tonic-gate     dSP;
87*0Sstevel@tonic-gate     IV  code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv,tab);
88*0Sstevel@tonic-gate     SV *result = Nullsv;
89*0Sstevel@tonic-gate 
90*0Sstevel@tonic-gate     PUSHSTACKi(PERLSI_MAGIC);
91*0Sstevel@tonic-gate     SPAGAIN;
92*0Sstevel@tonic-gate 
93*0Sstevel@tonic-gate     ENTER;
94*0Sstevel@tonic-gate     SAVETMPS;
95*0Sstevel@tonic-gate 
96*0Sstevel@tonic-gate     PUSHMARK(sp);
97*0Sstevel@tonic-gate     XPUSHs(arg);
98*0Sstevel@tonic-gate     PUTBACK;
99*0Sstevel@tonic-gate     if (call_pv("Encode::find_encoding", G_SCALAR) != 1) {
100*0Sstevel@tonic-gate 	/* should never happen */
101*0Sstevel@tonic-gate 	Perl_die(aTHX_ "Encode::find_encoding did not return a value");
102*0Sstevel@tonic-gate 	return -1;
103*0Sstevel@tonic-gate     }
104*0Sstevel@tonic-gate     SPAGAIN;
105*0Sstevel@tonic-gate     result = POPs;
106*0Sstevel@tonic-gate     PUTBACK;
107*0Sstevel@tonic-gate 
108*0Sstevel@tonic-gate     if (!SvROK(result) || !SvOBJECT(SvRV(result))) {
109*0Sstevel@tonic-gate 	e->enc = Nullsv;
110*0Sstevel@tonic-gate 	Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
111*0Sstevel@tonic-gate 		    arg);
112*0Sstevel@tonic-gate 	errno = EINVAL;
113*0Sstevel@tonic-gate 	code = -1;
114*0Sstevel@tonic-gate     }
115*0Sstevel@tonic-gate     else {
116*0Sstevel@tonic-gate 
117*0Sstevel@tonic-gate        /* $enc->renew */
118*0Sstevel@tonic-gate 	PUSHMARK(sp);
119*0Sstevel@tonic-gate 	XPUSHs(result);
120*0Sstevel@tonic-gate 	PUTBACK;
121*0Sstevel@tonic-gate 	if (call_method("renew",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
122*0Sstevel@tonic-gate 	    Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support renew method",
123*0Sstevel@tonic-gate 			arg);
124*0Sstevel@tonic-gate 	}
125*0Sstevel@tonic-gate 	else {
126*0Sstevel@tonic-gate 	    SPAGAIN;
127*0Sstevel@tonic-gate 	    result = POPs;
128*0Sstevel@tonic-gate 	    PUTBACK;
129*0Sstevel@tonic-gate 	}
130*0Sstevel@tonic-gate 	e->enc = newSVsv(result);
131*0Sstevel@tonic-gate 	PUSHMARK(sp);
132*0Sstevel@tonic-gate 	XPUSHs(e->enc);
133*0Sstevel@tonic-gate 	PUTBACK;
134*0Sstevel@tonic-gate 	if (call_method("needs_lines",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
135*0Sstevel@tonic-gate 	    Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support needs_lines",
136*0Sstevel@tonic-gate 			arg);
137*0Sstevel@tonic-gate 	}
138*0Sstevel@tonic-gate 	else {
139*0Sstevel@tonic-gate 	    SPAGAIN;
140*0Sstevel@tonic-gate 	    result = POPs;
141*0Sstevel@tonic-gate 	    PUTBACK;
142*0Sstevel@tonic-gate 	    if (SvTRUE(result)) {
143*0Sstevel@tonic-gate 		e->flags |= NEEDS_LINES;
144*0Sstevel@tonic-gate 	    }
145*0Sstevel@tonic-gate 	}
146*0Sstevel@tonic-gate 	PerlIOBase(f)->flags |= PERLIO_F_UTF8;
147*0Sstevel@tonic-gate     }
148*0Sstevel@tonic-gate 
149*0Sstevel@tonic-gate     e->chk = newSVsv(get_sv("PerlIO::encoding::fallback", 0));
150*0Sstevel@tonic-gate 
151*0Sstevel@tonic-gate     FREETMPS;
152*0Sstevel@tonic-gate     LEAVE;
153*0Sstevel@tonic-gate     POPSTACK;
154*0Sstevel@tonic-gate     return code;
155*0Sstevel@tonic-gate }
156*0Sstevel@tonic-gate 
157*0Sstevel@tonic-gate IV
PerlIOEncode_popped(pTHX_ PerlIO * f)158*0Sstevel@tonic-gate PerlIOEncode_popped(pTHX_ PerlIO * f)
159*0Sstevel@tonic-gate {
160*0Sstevel@tonic-gate     PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
161*0Sstevel@tonic-gate     if (e->enc) {
162*0Sstevel@tonic-gate 	SvREFCNT_dec(e->enc);
163*0Sstevel@tonic-gate 	e->enc = Nullsv;
164*0Sstevel@tonic-gate     }
165*0Sstevel@tonic-gate     if (e->bufsv) {
166*0Sstevel@tonic-gate 	SvREFCNT_dec(e->bufsv);
167*0Sstevel@tonic-gate 	e->bufsv = Nullsv;
168*0Sstevel@tonic-gate     }
169*0Sstevel@tonic-gate     if (e->dataSV) {
170*0Sstevel@tonic-gate 	SvREFCNT_dec(e->dataSV);
171*0Sstevel@tonic-gate 	e->dataSV = Nullsv;
172*0Sstevel@tonic-gate     }
173*0Sstevel@tonic-gate     if (e->chk) {
174*0Sstevel@tonic-gate 	SvREFCNT_dec(e->chk);
175*0Sstevel@tonic-gate 	e->chk = Nullsv;
176*0Sstevel@tonic-gate     }
177*0Sstevel@tonic-gate     return 0;
178*0Sstevel@tonic-gate }
179*0Sstevel@tonic-gate 
180*0Sstevel@tonic-gate STDCHAR *
PerlIOEncode_get_base(pTHX_ PerlIO * f)181*0Sstevel@tonic-gate PerlIOEncode_get_base(pTHX_ PerlIO * f)
182*0Sstevel@tonic-gate {
183*0Sstevel@tonic-gate     PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
184*0Sstevel@tonic-gate     if (!e->base.bufsiz)
185*0Sstevel@tonic-gate 	e->base.bufsiz = 1024;
186*0Sstevel@tonic-gate     if (!e->bufsv) {
187*0Sstevel@tonic-gate 	e->bufsv = newSV(e->base.bufsiz);
188*0Sstevel@tonic-gate 	sv_setpvn(e->bufsv, "", 0);
189*0Sstevel@tonic-gate     }
190*0Sstevel@tonic-gate     e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
191*0Sstevel@tonic-gate     if (!e->base.ptr)
192*0Sstevel@tonic-gate 	e->base.ptr = e->base.buf;
193*0Sstevel@tonic-gate     if (!e->base.end)
194*0Sstevel@tonic-gate 	e->base.end = e->base.buf;
195*0Sstevel@tonic-gate     if (e->base.ptr < e->base.buf
196*0Sstevel@tonic-gate 	|| e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
197*0Sstevel@tonic-gate 	Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
198*0Sstevel@tonic-gate 		  e->base.buf + SvLEN(e->bufsv));
199*0Sstevel@tonic-gate 	abort();
200*0Sstevel@tonic-gate     }
201*0Sstevel@tonic-gate     if (SvLEN(e->bufsv) < e->base.bufsiz) {
202*0Sstevel@tonic-gate 	SSize_t poff = e->base.ptr - e->base.buf;
203*0Sstevel@tonic-gate 	SSize_t eoff = e->base.end - e->base.buf;
204*0Sstevel@tonic-gate 	e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
205*0Sstevel@tonic-gate 	e->base.ptr = e->base.buf + poff;
206*0Sstevel@tonic-gate 	e->base.end = e->base.buf + eoff;
207*0Sstevel@tonic-gate     }
208*0Sstevel@tonic-gate     if (e->base.ptr < e->base.buf
209*0Sstevel@tonic-gate 	|| e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
210*0Sstevel@tonic-gate 	Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
211*0Sstevel@tonic-gate 		  e->base.buf + SvLEN(e->bufsv));
212*0Sstevel@tonic-gate 	abort();
213*0Sstevel@tonic-gate     }
214*0Sstevel@tonic-gate     return e->base.buf;
215*0Sstevel@tonic-gate }
216*0Sstevel@tonic-gate 
217*0Sstevel@tonic-gate IV
PerlIOEncode_fill(pTHX_ PerlIO * f)218*0Sstevel@tonic-gate PerlIOEncode_fill(pTHX_ PerlIO * f)
219*0Sstevel@tonic-gate {
220*0Sstevel@tonic-gate     PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
221*0Sstevel@tonic-gate     dSP;
222*0Sstevel@tonic-gate     IV code = 0;
223*0Sstevel@tonic-gate     PerlIO *n;
224*0Sstevel@tonic-gate     SSize_t avail;
225*0Sstevel@tonic-gate 
226*0Sstevel@tonic-gate     if (PerlIO_flush(f) != 0)
227*0Sstevel@tonic-gate 	return -1;
228*0Sstevel@tonic-gate     n  = PerlIONext(f);
229*0Sstevel@tonic-gate     if (!PerlIO_fast_gets(n)) {
230*0Sstevel@tonic-gate 	/* Things get too messy if we don't have a buffer layer
231*0Sstevel@tonic-gate 	   push a :perlio to do the job */
232*0Sstevel@tonic-gate 	char mode[8];
233*0Sstevel@tonic-gate 	n  = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
234*0Sstevel@tonic-gate 	if (!n) {
235*0Sstevel@tonic-gate 	    Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
236*0Sstevel@tonic-gate 	}
237*0Sstevel@tonic-gate     }
238*0Sstevel@tonic-gate     PUSHSTACKi(PERLSI_MAGIC);
239*0Sstevel@tonic-gate     SPAGAIN;
240*0Sstevel@tonic-gate     ENTER;
241*0Sstevel@tonic-gate     SAVETMPS;
242*0Sstevel@tonic-gate   retry:
243*0Sstevel@tonic-gate     avail = PerlIO_get_cnt(n);
244*0Sstevel@tonic-gate     if (avail <= 0) {
245*0Sstevel@tonic-gate 	avail = PerlIO_fill(n);
246*0Sstevel@tonic-gate 	if (avail == 0) {
247*0Sstevel@tonic-gate 	    avail = PerlIO_get_cnt(n);
248*0Sstevel@tonic-gate 	}
249*0Sstevel@tonic-gate 	else {
250*0Sstevel@tonic-gate 	    if (!PerlIO_error(n) && PerlIO_eof(n))
251*0Sstevel@tonic-gate 		avail = 0;
252*0Sstevel@tonic-gate 	}
253*0Sstevel@tonic-gate     }
254*0Sstevel@tonic-gate     if (avail > 0 || (e->flags & NEEDS_LINES)) {
255*0Sstevel@tonic-gate 	STDCHAR *ptr = PerlIO_get_ptr(n);
256*0Sstevel@tonic-gate 	SSize_t use  = (avail >= 0) ? avail : 0;
257*0Sstevel@tonic-gate 	SV *uni;
258*0Sstevel@tonic-gate 	char *s;
259*0Sstevel@tonic-gate 	STRLEN len = 0;
260*0Sstevel@tonic-gate 	e->base.ptr = e->base.end = (STDCHAR *) Nullch;
261*0Sstevel@tonic-gate 	(void) PerlIOEncode_get_base(aTHX_ f);
262*0Sstevel@tonic-gate 	if (!e->dataSV)
263*0Sstevel@tonic-gate 	    e->dataSV = newSV(0);
264*0Sstevel@tonic-gate 	if (SvTYPE(e->dataSV) < SVt_PV) {
265*0Sstevel@tonic-gate 	    sv_upgrade(e->dataSV,SVt_PV);
266*0Sstevel@tonic-gate 	}
267*0Sstevel@tonic-gate 	if (e->flags & NEEDS_LINES) {
268*0Sstevel@tonic-gate 	    /* Encoding needs whole lines (e.g. iso-2022-*)
269*0Sstevel@tonic-gate 	       search back from end of available data for
270*0Sstevel@tonic-gate 	       and line marker
271*0Sstevel@tonic-gate 	     */
272*0Sstevel@tonic-gate 	    STDCHAR *nl = ptr+use-1;
273*0Sstevel@tonic-gate 	    while (nl >= ptr) {
274*0Sstevel@tonic-gate 		if (*nl == '\n') {
275*0Sstevel@tonic-gate 		    break;
276*0Sstevel@tonic-gate 		}
277*0Sstevel@tonic-gate 		nl--;
278*0Sstevel@tonic-gate 	    }
279*0Sstevel@tonic-gate 	    if (nl >= ptr && *nl == '\n') {
280*0Sstevel@tonic-gate 		/* found a line - take up to and including that */
281*0Sstevel@tonic-gate 		use = (nl+1)-ptr;
282*0Sstevel@tonic-gate 	    }
283*0Sstevel@tonic-gate 	    else if (avail > 0) {
284*0Sstevel@tonic-gate 		/* No line, but not EOF - append avail to the pending data */
285*0Sstevel@tonic-gate 		sv_catpvn(e->dataSV, (char*)ptr, use);
286*0Sstevel@tonic-gate 		PerlIO_set_ptrcnt(n, ptr+use, 0);
287*0Sstevel@tonic-gate 		goto retry;
288*0Sstevel@tonic-gate 	    }
289*0Sstevel@tonic-gate 	    else if (!SvCUR(e->dataSV)) {
290*0Sstevel@tonic-gate 		goto end_of_file;
291*0Sstevel@tonic-gate 	    }
292*0Sstevel@tonic-gate 	}
293*0Sstevel@tonic-gate 	if (SvCUR(e->dataSV)) {
294*0Sstevel@tonic-gate 	    /* something left over from last time - create a normal
295*0Sstevel@tonic-gate 	       SV with new data appended
296*0Sstevel@tonic-gate 	     */
297*0Sstevel@tonic-gate 	    if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
298*0Sstevel@tonic-gate 		if (e->flags & NEEDS_LINES) {
299*0Sstevel@tonic-gate 		    /* Have to grow buffer */
300*0Sstevel@tonic-gate 		    e->base.bufsiz = use + SvCUR(e->dataSV);
301*0Sstevel@tonic-gate 		    PerlIOEncode_get_base(aTHX_ f);
302*0Sstevel@tonic-gate 		}
303*0Sstevel@tonic-gate 		else {
304*0Sstevel@tonic-gate 	       use = e->base.bufsiz - SvCUR(e->dataSV);
305*0Sstevel@tonic-gate 	    }
306*0Sstevel@tonic-gate 	    }
307*0Sstevel@tonic-gate 	    sv_catpvn(e->dataSV,(char*)ptr,use);
308*0Sstevel@tonic-gate 	}
309*0Sstevel@tonic-gate 	else {
310*0Sstevel@tonic-gate 	    /* Create a "dummy" SV to represent the available data from layer below */
311*0Sstevel@tonic-gate 	    if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
312*0Sstevel@tonic-gate 		Safefree(SvPVX(e->dataSV));
313*0Sstevel@tonic-gate 	    }
314*0Sstevel@tonic-gate 	    if (use > (SSize_t)e->base.bufsiz) {
315*0Sstevel@tonic-gate 		if (e->flags & NEEDS_LINES) {
316*0Sstevel@tonic-gate 		    /* Have to grow buffer */
317*0Sstevel@tonic-gate 		    e->base.bufsiz = use;
318*0Sstevel@tonic-gate 		    PerlIOEncode_get_base(aTHX_ f);
319*0Sstevel@tonic-gate 		}
320*0Sstevel@tonic-gate 		else {
321*0Sstevel@tonic-gate 	       use = e->base.bufsiz;
322*0Sstevel@tonic-gate 	    }
323*0Sstevel@tonic-gate 	    }
324*0Sstevel@tonic-gate 	    SvPVX(e->dataSV) = (char *) ptr;
325*0Sstevel@tonic-gate 	    SvLEN(e->dataSV) = 0;  /* Hands off sv.c - it isn't yours */
326*0Sstevel@tonic-gate 	    SvCUR_set(e->dataSV,use);
327*0Sstevel@tonic-gate 	    SvPOK_only(e->dataSV);
328*0Sstevel@tonic-gate 	}
329*0Sstevel@tonic-gate 	SvUTF8_off(e->dataSV);
330*0Sstevel@tonic-gate 	PUSHMARK(sp);
331*0Sstevel@tonic-gate 	XPUSHs(e->enc);
332*0Sstevel@tonic-gate 	XPUSHs(e->dataSV);
333*0Sstevel@tonic-gate 	XPUSHs(e->chk);
334*0Sstevel@tonic-gate 	PUTBACK;
335*0Sstevel@tonic-gate 	if (call_method("decode", G_SCALAR) != 1) {
336*0Sstevel@tonic-gate 	    Perl_die(aTHX_ "panic: decode did not return a value");
337*0Sstevel@tonic-gate 	}
338*0Sstevel@tonic-gate 	SPAGAIN;
339*0Sstevel@tonic-gate 	uni = POPs;
340*0Sstevel@tonic-gate 	PUTBACK;
341*0Sstevel@tonic-gate 	/* Now get translated string (forced to UTF-8) and use as buffer */
342*0Sstevel@tonic-gate 	if (SvPOK(uni)) {
343*0Sstevel@tonic-gate 	    s = SvPVutf8(uni, len);
344*0Sstevel@tonic-gate #ifdef PARANOID_ENCODE_CHECKS
345*0Sstevel@tonic-gate 	    if (len && !is_utf8_string((U8*)s,len)) {
346*0Sstevel@tonic-gate 		Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
347*0Sstevel@tonic-gate 	    }
348*0Sstevel@tonic-gate #endif
349*0Sstevel@tonic-gate 	}
350*0Sstevel@tonic-gate 	if (len > 0) {
351*0Sstevel@tonic-gate 	    /* Got _something */
352*0Sstevel@tonic-gate 	    /* if decode gave us back dataSV then data may vanish when
353*0Sstevel@tonic-gate 	       we do ptrcnt adjust - so take our copy now.
354*0Sstevel@tonic-gate 	       (The copy is a pain - need a put-it-here option for decode.)
355*0Sstevel@tonic-gate 	     */
356*0Sstevel@tonic-gate 	    sv_setpvn(e->bufsv,s,len);
357*0Sstevel@tonic-gate 	    e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
358*0Sstevel@tonic-gate 	    e->base.end = e->base.ptr + SvCUR(e->bufsv);
359*0Sstevel@tonic-gate 	    PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
360*0Sstevel@tonic-gate 	    SvUTF8_on(e->bufsv);
361*0Sstevel@tonic-gate 
362*0Sstevel@tonic-gate 	    /* Adjust ptr/cnt not taking anything which
363*0Sstevel@tonic-gate 	       did not translate - not clear this is a win */
364*0Sstevel@tonic-gate 	    /* compute amount we took */
365*0Sstevel@tonic-gate 	    use -= SvCUR(e->dataSV);
366*0Sstevel@tonic-gate 	    PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
367*0Sstevel@tonic-gate 	    /* and as we did not take it it isn't pending */
368*0Sstevel@tonic-gate 	    SvCUR_set(e->dataSV,0);
369*0Sstevel@tonic-gate 	} else {
370*0Sstevel@tonic-gate 	    /* Got nothing - assume partial character so we need some more */
371*0Sstevel@tonic-gate 	    /* Make sure e->dataSV is a normal SV before re-filling as
372*0Sstevel@tonic-gate 	       buffer alias will change under us
373*0Sstevel@tonic-gate 	     */
374*0Sstevel@tonic-gate 	    s = SvPV(e->dataSV,len);
375*0Sstevel@tonic-gate 	    sv_setpvn(e->dataSV,s,len);
376*0Sstevel@tonic-gate 	    PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
377*0Sstevel@tonic-gate 	    goto retry;
378*0Sstevel@tonic-gate 	}
379*0Sstevel@tonic-gate     }
380*0Sstevel@tonic-gate     else {
381*0Sstevel@tonic-gate     end_of_file:
382*0Sstevel@tonic-gate 	code = -1;
383*0Sstevel@tonic-gate 	if (avail == 0)
384*0Sstevel@tonic-gate 	    PerlIOBase(f)->flags |= PERLIO_F_EOF;
385*0Sstevel@tonic-gate 	else
386*0Sstevel@tonic-gate 	    PerlIOBase(f)->flags |= PERLIO_F_ERROR;
387*0Sstevel@tonic-gate     }
388*0Sstevel@tonic-gate     FREETMPS;
389*0Sstevel@tonic-gate     LEAVE;
390*0Sstevel@tonic-gate     POPSTACK;
391*0Sstevel@tonic-gate     return code;
392*0Sstevel@tonic-gate }
393*0Sstevel@tonic-gate 
394*0Sstevel@tonic-gate IV
PerlIOEncode_flush(pTHX_ PerlIO * f)395*0Sstevel@tonic-gate PerlIOEncode_flush(pTHX_ PerlIO * f)
396*0Sstevel@tonic-gate {
397*0Sstevel@tonic-gate     PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
398*0Sstevel@tonic-gate     IV code = 0;
399*0Sstevel@tonic-gate 
400*0Sstevel@tonic-gate     if (e->bufsv) {
401*0Sstevel@tonic-gate 	dSP;
402*0Sstevel@tonic-gate 	SV *str;
403*0Sstevel@tonic-gate 	char *s;
404*0Sstevel@tonic-gate 	STRLEN len;
405*0Sstevel@tonic-gate 	SSize_t count = 0;
406*0Sstevel@tonic-gate 	if ((PerlIOBase(f)->flags & PERLIO_F_WRBUF) && (e->base.ptr > e->base.buf)) {
407*0Sstevel@tonic-gate 	    /* Write case - encode the buffer and write() to layer below */
408*0Sstevel@tonic-gate 	    PUSHSTACKi(PERLSI_MAGIC);
409*0Sstevel@tonic-gate 	    SPAGAIN;
410*0Sstevel@tonic-gate 	    ENTER;
411*0Sstevel@tonic-gate 	    SAVETMPS;
412*0Sstevel@tonic-gate 	    PUSHMARK(sp);
413*0Sstevel@tonic-gate 	    XPUSHs(e->enc);
414*0Sstevel@tonic-gate 	    SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
415*0Sstevel@tonic-gate 	    SvUTF8_on(e->bufsv);
416*0Sstevel@tonic-gate 	    XPUSHs(e->bufsv);
417*0Sstevel@tonic-gate 	    XPUSHs(e->chk);
418*0Sstevel@tonic-gate 	    PUTBACK;
419*0Sstevel@tonic-gate 	    if (call_method("encode", G_SCALAR) != 1) {
420*0Sstevel@tonic-gate 		Perl_die(aTHX_ "panic: encode did not return a value");
421*0Sstevel@tonic-gate 	    }
422*0Sstevel@tonic-gate 	    SPAGAIN;
423*0Sstevel@tonic-gate 	    str = POPs;
424*0Sstevel@tonic-gate 	    PUTBACK;
425*0Sstevel@tonic-gate 	    s = SvPV(str, len);
426*0Sstevel@tonic-gate 	    count = PerlIO_write(PerlIONext(f),s,len);
427*0Sstevel@tonic-gate 	    if ((STRLEN)count != len) {
428*0Sstevel@tonic-gate 		code = -1;
429*0Sstevel@tonic-gate 	    }
430*0Sstevel@tonic-gate 	    FREETMPS;
431*0Sstevel@tonic-gate 	    LEAVE;
432*0Sstevel@tonic-gate 	    POPSTACK;
433*0Sstevel@tonic-gate 	    if (PerlIO_flush(PerlIONext(f)) != 0) {
434*0Sstevel@tonic-gate 		code = -1;
435*0Sstevel@tonic-gate 	    }
436*0Sstevel@tonic-gate 	    if (SvCUR(e->bufsv)) {
437*0Sstevel@tonic-gate 		/* Did not all translate */
438*0Sstevel@tonic-gate 		e->base.ptr = e->base.buf+SvCUR(e->bufsv);
439*0Sstevel@tonic-gate 		return code;
440*0Sstevel@tonic-gate 	    }
441*0Sstevel@tonic-gate 	}
442*0Sstevel@tonic-gate 	else if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF)) {
443*0Sstevel@tonic-gate 	    /* read case */
444*0Sstevel@tonic-gate 	    /* if we have any untranslated stuff then unread that first */
445*0Sstevel@tonic-gate 	    /* FIXME - unread is fragile is there a better way ? */
446*0Sstevel@tonic-gate 	    if (e->dataSV && SvCUR(e->dataSV)) {
447*0Sstevel@tonic-gate 		s = SvPV(e->dataSV, len);
448*0Sstevel@tonic-gate 		count = PerlIO_unread(PerlIONext(f),s,len);
449*0Sstevel@tonic-gate 		if ((STRLEN)count != len) {
450*0Sstevel@tonic-gate 		    code = -1;
451*0Sstevel@tonic-gate 		}
452*0Sstevel@tonic-gate 		SvCUR_set(e->dataSV,0);
453*0Sstevel@tonic-gate 	    }
454*0Sstevel@tonic-gate 	    /* See if there is anything left in the buffer */
455*0Sstevel@tonic-gate 	    if (e->base.ptr < e->base.end) {
456*0Sstevel@tonic-gate 		/* Bother - have unread data.
457*0Sstevel@tonic-gate 		   re-encode and unread() to layer below
458*0Sstevel@tonic-gate 		 */
459*0Sstevel@tonic-gate 		PUSHSTACKi(PERLSI_MAGIC);
460*0Sstevel@tonic-gate 		SPAGAIN;
461*0Sstevel@tonic-gate 		ENTER;
462*0Sstevel@tonic-gate 		SAVETMPS;
463*0Sstevel@tonic-gate 		str = sv_newmortal();
464*0Sstevel@tonic-gate 		sv_upgrade(str, SVt_PV);
465*0Sstevel@tonic-gate 		SvPVX(str) = (char*)e->base.ptr;
466*0Sstevel@tonic-gate 		SvLEN(str) = 0;
467*0Sstevel@tonic-gate 		SvCUR_set(str, e->base.end - e->base.ptr);
468*0Sstevel@tonic-gate 		SvPOK_only(str);
469*0Sstevel@tonic-gate 		SvUTF8_on(str);
470*0Sstevel@tonic-gate 		PUSHMARK(sp);
471*0Sstevel@tonic-gate 		XPUSHs(e->enc);
472*0Sstevel@tonic-gate 		XPUSHs(str);
473*0Sstevel@tonic-gate 		XPUSHs(e->chk);
474*0Sstevel@tonic-gate 		PUTBACK;
475*0Sstevel@tonic-gate 		if (call_method("encode", G_SCALAR) != 1) {
476*0Sstevel@tonic-gate 		     Perl_die(aTHX_ "panic: encode did not return a value");
477*0Sstevel@tonic-gate 		}
478*0Sstevel@tonic-gate 		SPAGAIN;
479*0Sstevel@tonic-gate 		str = POPs;
480*0Sstevel@tonic-gate 		PUTBACK;
481*0Sstevel@tonic-gate 		s = SvPV(str, len);
482*0Sstevel@tonic-gate 		count = PerlIO_unread(PerlIONext(f),s,len);
483*0Sstevel@tonic-gate 		if ((STRLEN)count != len) {
484*0Sstevel@tonic-gate 		    code = -1;
485*0Sstevel@tonic-gate 		}
486*0Sstevel@tonic-gate 		FREETMPS;
487*0Sstevel@tonic-gate 		LEAVE;
488*0Sstevel@tonic-gate 		POPSTACK;
489*0Sstevel@tonic-gate 	    }
490*0Sstevel@tonic-gate 	}
491*0Sstevel@tonic-gate 	e->base.ptr = e->base.end = e->base.buf;
492*0Sstevel@tonic-gate 	PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
493*0Sstevel@tonic-gate     }
494*0Sstevel@tonic-gate     return code;
495*0Sstevel@tonic-gate }
496*0Sstevel@tonic-gate 
497*0Sstevel@tonic-gate IV
PerlIOEncode_close(pTHX_ PerlIO * f)498*0Sstevel@tonic-gate PerlIOEncode_close(pTHX_ PerlIO * f)
499*0Sstevel@tonic-gate {
500*0Sstevel@tonic-gate     PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
501*0Sstevel@tonic-gate     IV code;
502*0Sstevel@tonic-gate     if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
503*0Sstevel@tonic-gate 	/* Discard partial character */
504*0Sstevel@tonic-gate 	if (e->dataSV) {
505*0Sstevel@tonic-gate 	    SvCUR_set(e->dataSV,0);
506*0Sstevel@tonic-gate 	}
507*0Sstevel@tonic-gate 	/* Don't back decode and unread any pending data */
508*0Sstevel@tonic-gate 	e->base.ptr = e->base.end = e->base.buf;
509*0Sstevel@tonic-gate     }
510*0Sstevel@tonic-gate     code = PerlIOBase_close(aTHX_ f);
511*0Sstevel@tonic-gate     if (e->bufsv) {
512*0Sstevel@tonic-gate 	/* This should only fire for write case */
513*0Sstevel@tonic-gate 	if (e->base.buf && e->base.ptr > e->base.buf) {
514*0Sstevel@tonic-gate 	    Perl_croak(aTHX_ "Close with partial character");
515*0Sstevel@tonic-gate 	}
516*0Sstevel@tonic-gate 	SvREFCNT_dec(e->bufsv);
517*0Sstevel@tonic-gate 	e->bufsv = Nullsv;
518*0Sstevel@tonic-gate     }
519*0Sstevel@tonic-gate     e->base.buf = NULL;
520*0Sstevel@tonic-gate     e->base.ptr = NULL;
521*0Sstevel@tonic-gate     e->base.end = NULL;
522*0Sstevel@tonic-gate     PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
523*0Sstevel@tonic-gate     return code;
524*0Sstevel@tonic-gate }
525*0Sstevel@tonic-gate 
526*0Sstevel@tonic-gate Off_t
PerlIOEncode_tell(pTHX_ PerlIO * f)527*0Sstevel@tonic-gate PerlIOEncode_tell(pTHX_ PerlIO * f)
528*0Sstevel@tonic-gate {
529*0Sstevel@tonic-gate     PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
530*0Sstevel@tonic-gate     /* Unfortunately the only way to get a postion is to (re-)translate,
531*0Sstevel@tonic-gate        the UTF8 we have in bufefr and then ask layer below
532*0Sstevel@tonic-gate      */
533*0Sstevel@tonic-gate     PerlIO_flush(f);
534*0Sstevel@tonic-gate     if (b->buf && b->ptr > b->buf) {
535*0Sstevel@tonic-gate 	Perl_croak(aTHX_ "Cannot tell at partial character");
536*0Sstevel@tonic-gate     }
537*0Sstevel@tonic-gate     return PerlIO_tell(PerlIONext(f));
538*0Sstevel@tonic-gate }
539*0Sstevel@tonic-gate 
540*0Sstevel@tonic-gate PerlIO *
PerlIOEncode_dup(pTHX_ PerlIO * f,PerlIO * o,CLONE_PARAMS * params,int flags)541*0Sstevel@tonic-gate PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
542*0Sstevel@tonic-gate 		 CLONE_PARAMS * params, int flags)
543*0Sstevel@tonic-gate {
544*0Sstevel@tonic-gate     if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
545*0Sstevel@tonic-gate 	PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
546*0Sstevel@tonic-gate 	PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
547*0Sstevel@tonic-gate 	if (oe->enc) {
548*0Sstevel@tonic-gate 	    fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
549*0Sstevel@tonic-gate 	}
550*0Sstevel@tonic-gate     }
551*0Sstevel@tonic-gate     return f;
552*0Sstevel@tonic-gate }
553*0Sstevel@tonic-gate 
554*0Sstevel@tonic-gate SSize_t
PerlIOEncode_write(pTHX_ PerlIO * f,const void * vbuf,Size_t count)555*0Sstevel@tonic-gate PerlIOEncode_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count)
556*0Sstevel@tonic-gate {
557*0Sstevel@tonic-gate     PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
558*0Sstevel@tonic-gate     if (e->flags & NEEDS_LINES) {
559*0Sstevel@tonic-gate 	SSize_t done = 0;
560*0Sstevel@tonic-gate 	const char *ptr = (const char *) vbuf;
561*0Sstevel@tonic-gate 	const char *end = ptr+count;
562*0Sstevel@tonic-gate 	while (ptr < end) {
563*0Sstevel@tonic-gate 	    const char *nl = ptr;
564*0Sstevel@tonic-gate 	    while (nl < end && *nl++ != '\n') /* empty body */;
565*0Sstevel@tonic-gate 	    done = PerlIOBuf_write(aTHX_ f, ptr, nl-ptr);
566*0Sstevel@tonic-gate 	    if (done != nl-ptr) {
567*0Sstevel@tonic-gate 		if (done > 0) {
568*0Sstevel@tonic-gate 		    ptr += done;
569*0Sstevel@tonic-gate 		}
570*0Sstevel@tonic-gate 		break;
571*0Sstevel@tonic-gate 	    }
572*0Sstevel@tonic-gate 	    ptr += done;
573*0Sstevel@tonic-gate 	    if (ptr[-1] == '\n') {
574*0Sstevel@tonic-gate 		if (PerlIOEncode_flush(aTHX_ f) != 0) {
575*0Sstevel@tonic-gate 		    break;
576*0Sstevel@tonic-gate 		}
577*0Sstevel@tonic-gate 	    }
578*0Sstevel@tonic-gate 	}
579*0Sstevel@tonic-gate 	return (SSize_t) (ptr - (const char *) vbuf);
580*0Sstevel@tonic-gate     }
581*0Sstevel@tonic-gate     else {
582*0Sstevel@tonic-gate 	return PerlIOBuf_write(aTHX_ f, vbuf, count);
583*0Sstevel@tonic-gate     }
584*0Sstevel@tonic-gate }
585*0Sstevel@tonic-gate 
586*0Sstevel@tonic-gate PerlIO_funcs PerlIO_encode = {
587*0Sstevel@tonic-gate     sizeof(PerlIO_funcs),
588*0Sstevel@tonic-gate     "encoding",
589*0Sstevel@tonic-gate     sizeof(PerlIOEncode),
590*0Sstevel@tonic-gate     PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
591*0Sstevel@tonic-gate     PerlIOEncode_pushed,
592*0Sstevel@tonic-gate     PerlIOEncode_popped,
593*0Sstevel@tonic-gate     PerlIOBuf_open,
594*0Sstevel@tonic-gate     NULL, /* binmode - always pop */
595*0Sstevel@tonic-gate     PerlIOEncode_getarg,
596*0Sstevel@tonic-gate     PerlIOBase_fileno,
597*0Sstevel@tonic-gate     PerlIOEncode_dup,
598*0Sstevel@tonic-gate     PerlIOBuf_read,
599*0Sstevel@tonic-gate     PerlIOBuf_unread,
600*0Sstevel@tonic-gate     PerlIOEncode_write,
601*0Sstevel@tonic-gate     PerlIOBuf_seek,
602*0Sstevel@tonic-gate     PerlIOEncode_tell,
603*0Sstevel@tonic-gate     PerlIOEncode_close,
604*0Sstevel@tonic-gate     PerlIOEncode_flush,
605*0Sstevel@tonic-gate     PerlIOEncode_fill,
606*0Sstevel@tonic-gate     PerlIOBase_eof,
607*0Sstevel@tonic-gate     PerlIOBase_error,
608*0Sstevel@tonic-gate     PerlIOBase_clearerr,
609*0Sstevel@tonic-gate     PerlIOBase_setlinebuf,
610*0Sstevel@tonic-gate     PerlIOEncode_get_base,
611*0Sstevel@tonic-gate     PerlIOBuf_bufsiz,
612*0Sstevel@tonic-gate     PerlIOBuf_get_ptr,
613*0Sstevel@tonic-gate     PerlIOBuf_get_cnt,
614*0Sstevel@tonic-gate     PerlIOBuf_set_ptrcnt,
615*0Sstevel@tonic-gate };
616*0Sstevel@tonic-gate #endif				/* encode layer */
617*0Sstevel@tonic-gate 
618*0Sstevel@tonic-gate MODULE = PerlIO::encoding PACKAGE = PerlIO::encoding
619*0Sstevel@tonic-gate 
620*0Sstevel@tonic-gate PROTOTYPES: ENABLE
621*0Sstevel@tonic-gate 
622*0Sstevel@tonic-gate BOOT:
623*0Sstevel@tonic-gate {
624*0Sstevel@tonic-gate     SV *chk = get_sv("PerlIO::encoding::fallback", GV_ADD|GV_ADDMULTI);
625*0Sstevel@tonic-gate     /*
626*0Sstevel@tonic-gate      * we now "use Encode ()" here instead of
627*0Sstevel@tonic-gate      * PerlIO/encoding.pm.  This avoids SEGV when ":encoding()"
628*0Sstevel@tonic-gate      * is invoked without prior "use Encode". -- dankogai
629*0Sstevel@tonic-gate      */
630*0Sstevel@tonic-gate     PUSHSTACKi(PERLSI_MAGIC);
631*0Sstevel@tonic-gate     SPAGAIN;
632*0Sstevel@tonic-gate     if (!get_cv(OUR_DEFAULT_FB, 0)) {
633*0Sstevel@tonic-gate #if 0
634*0Sstevel@tonic-gate 	/* This would just be an irritant now loading works */
635*0Sstevel@tonic-gate 	Perl_warner(aTHX_ packWARN(WARN_IO), ":encoding without 'use Encode'");
636*0Sstevel@tonic-gate #endif
637*0Sstevel@tonic-gate 	ENTER;
638*0Sstevel@tonic-gate 	/* Encode needs a lot of stack - it is likely to move ... */
639*0Sstevel@tonic-gate 	PUTBACK;
640*0Sstevel@tonic-gate 	/* The SV is magically freed by load_module */
641*0Sstevel@tonic-gate 	load_module(PERL_LOADMOD_NOIMPORT, newSVpvn("Encode", 6), Nullsv, Nullsv);
642*0Sstevel@tonic-gate 	SPAGAIN;
643*0Sstevel@tonic-gate 	LEAVE;
644*0Sstevel@tonic-gate     }
645*0Sstevel@tonic-gate     PUSHMARK(sp);
646*0Sstevel@tonic-gate     PUTBACK;
647*0Sstevel@tonic-gate     if (call_pv(OUR_DEFAULT_FB, G_SCALAR) != 1) {
648*0Sstevel@tonic-gate 	    /* should never happen */
649*0Sstevel@tonic-gate 	    Perl_die(aTHX_ "%s did not return a value",OUR_DEFAULT_FB);
650*0Sstevel@tonic-gate     }
651*0Sstevel@tonic-gate     SPAGAIN;
652*0Sstevel@tonic-gate     sv_setsv(chk, POPs);
653*0Sstevel@tonic-gate     PUTBACK;
654*0Sstevel@tonic-gate #ifdef PERLIO_LAYERS
655*0Sstevel@tonic-gate     PerlIO_define_layer(aTHX_ &PerlIO_encode);
656*0Sstevel@tonic-gate #endif
657*0Sstevel@tonic-gate     POPSTACK;
658*0Sstevel@tonic-gate }
659