1*0Sstevel@tonic-gate /* doop.c
2*0Sstevel@tonic-gate *
3*0Sstevel@tonic-gate * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4*0Sstevel@tonic-gate * 2000, 2001, 2002, 2004, by Larry Wall and others
5*0Sstevel@tonic-gate *
6*0Sstevel@tonic-gate * You may distribute under the terms of either the GNU General Public
7*0Sstevel@tonic-gate * License or the Artistic License, as specified in the README file.
8*0Sstevel@tonic-gate *
9*0Sstevel@tonic-gate */
10*0Sstevel@tonic-gate
11*0Sstevel@tonic-gate /*
12*0Sstevel@tonic-gate * "'So that was the job I felt I had to do when I started,' thought Sam."
13*0Sstevel@tonic-gate */
14*0Sstevel@tonic-gate
15*0Sstevel@tonic-gate #include "EXTERN.h"
16*0Sstevel@tonic-gate #define PERL_IN_DOOP_C
17*0Sstevel@tonic-gate #include "perl.h"
18*0Sstevel@tonic-gate
19*0Sstevel@tonic-gate #ifndef PERL_MICRO
20*0Sstevel@tonic-gate #include <signal.h>
21*0Sstevel@tonic-gate #endif
22*0Sstevel@tonic-gate
23*0Sstevel@tonic-gate STATIC I32
S_do_trans_simple(pTHX_ SV * sv)24*0Sstevel@tonic-gate S_do_trans_simple(pTHX_ SV *sv)
25*0Sstevel@tonic-gate {
26*0Sstevel@tonic-gate U8 *s;
27*0Sstevel@tonic-gate U8 *d;
28*0Sstevel@tonic-gate U8 *send;
29*0Sstevel@tonic-gate U8 *dstart;
30*0Sstevel@tonic-gate I32 matches = 0;
31*0Sstevel@tonic-gate I32 grows = PL_op->op_private & OPpTRANS_GROWS;
32*0Sstevel@tonic-gate STRLEN len;
33*0Sstevel@tonic-gate short *tbl;
34*0Sstevel@tonic-gate I32 ch;
35*0Sstevel@tonic-gate
36*0Sstevel@tonic-gate tbl = (short*)cPVOP->op_pv;
37*0Sstevel@tonic-gate if (!tbl)
38*0Sstevel@tonic-gate Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__);
39*0Sstevel@tonic-gate
40*0Sstevel@tonic-gate s = (U8*)SvPV(sv, len);
41*0Sstevel@tonic-gate send = s + len;
42*0Sstevel@tonic-gate
43*0Sstevel@tonic-gate /* First, take care of non-UTF-8 input strings, because they're easy */
44*0Sstevel@tonic-gate if (!SvUTF8(sv)) {
45*0Sstevel@tonic-gate while (s < send) {
46*0Sstevel@tonic-gate if ((ch = tbl[*s]) >= 0) {
47*0Sstevel@tonic-gate matches++;
48*0Sstevel@tonic-gate *s++ = (U8)ch;
49*0Sstevel@tonic-gate }
50*0Sstevel@tonic-gate else
51*0Sstevel@tonic-gate s++;
52*0Sstevel@tonic-gate }
53*0Sstevel@tonic-gate SvSETMAGIC(sv);
54*0Sstevel@tonic-gate return matches;
55*0Sstevel@tonic-gate }
56*0Sstevel@tonic-gate
57*0Sstevel@tonic-gate /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */
58*0Sstevel@tonic-gate if (grows)
59*0Sstevel@tonic-gate New(0, d, len*2+1, U8);
60*0Sstevel@tonic-gate else
61*0Sstevel@tonic-gate d = s;
62*0Sstevel@tonic-gate dstart = d;
63*0Sstevel@tonic-gate while (s < send) {
64*0Sstevel@tonic-gate STRLEN ulen;
65*0Sstevel@tonic-gate UV c;
66*0Sstevel@tonic-gate
67*0Sstevel@tonic-gate /* Need to check this, otherwise 128..255 won't match */
68*0Sstevel@tonic-gate c = utf8n_to_uvchr(s, send - s, &ulen, 0);
69*0Sstevel@tonic-gate if (c < 0x100 && (ch = tbl[c]) >= 0) {
70*0Sstevel@tonic-gate matches++;
71*0Sstevel@tonic-gate d = uvchr_to_utf8(d, ch);
72*0Sstevel@tonic-gate s += ulen;
73*0Sstevel@tonic-gate }
74*0Sstevel@tonic-gate else { /* No match -> copy */
75*0Sstevel@tonic-gate Move(s, d, ulen, U8);
76*0Sstevel@tonic-gate d += ulen;
77*0Sstevel@tonic-gate s += ulen;
78*0Sstevel@tonic-gate }
79*0Sstevel@tonic-gate }
80*0Sstevel@tonic-gate if (grows) {
81*0Sstevel@tonic-gate sv_setpvn(sv, (char*)dstart, d - dstart);
82*0Sstevel@tonic-gate Safefree(dstart);
83*0Sstevel@tonic-gate }
84*0Sstevel@tonic-gate else {
85*0Sstevel@tonic-gate *d = '\0';
86*0Sstevel@tonic-gate SvCUR_set(sv, d - dstart);
87*0Sstevel@tonic-gate }
88*0Sstevel@tonic-gate SvUTF8_on(sv);
89*0Sstevel@tonic-gate SvSETMAGIC(sv);
90*0Sstevel@tonic-gate return matches;
91*0Sstevel@tonic-gate }
92*0Sstevel@tonic-gate
93*0Sstevel@tonic-gate STATIC I32
S_do_trans_count(pTHX_ SV * sv)94*0Sstevel@tonic-gate S_do_trans_count(pTHX_ SV *sv)
95*0Sstevel@tonic-gate {
96*0Sstevel@tonic-gate U8 *s;
97*0Sstevel@tonic-gate U8 *send;
98*0Sstevel@tonic-gate I32 matches = 0;
99*0Sstevel@tonic-gate STRLEN len;
100*0Sstevel@tonic-gate short *tbl;
101*0Sstevel@tonic-gate I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
102*0Sstevel@tonic-gate
103*0Sstevel@tonic-gate tbl = (short*)cPVOP->op_pv;
104*0Sstevel@tonic-gate if (!tbl)
105*0Sstevel@tonic-gate Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__);
106*0Sstevel@tonic-gate
107*0Sstevel@tonic-gate s = (U8*)SvPV(sv, len);
108*0Sstevel@tonic-gate send = s + len;
109*0Sstevel@tonic-gate
110*0Sstevel@tonic-gate if (!SvUTF8(sv))
111*0Sstevel@tonic-gate while (s < send) {
112*0Sstevel@tonic-gate if (tbl[*s++] >= 0)
113*0Sstevel@tonic-gate matches++;
114*0Sstevel@tonic-gate }
115*0Sstevel@tonic-gate else
116*0Sstevel@tonic-gate while (s < send) {
117*0Sstevel@tonic-gate UV c;
118*0Sstevel@tonic-gate STRLEN ulen;
119*0Sstevel@tonic-gate c = utf8n_to_uvchr(s, send - s, &ulen, 0);
120*0Sstevel@tonic-gate if (c < 0x100) {
121*0Sstevel@tonic-gate if (tbl[c] >= 0)
122*0Sstevel@tonic-gate matches++;
123*0Sstevel@tonic-gate } else if (complement)
124*0Sstevel@tonic-gate matches++;
125*0Sstevel@tonic-gate s += ulen;
126*0Sstevel@tonic-gate }
127*0Sstevel@tonic-gate
128*0Sstevel@tonic-gate return matches;
129*0Sstevel@tonic-gate }
130*0Sstevel@tonic-gate
131*0Sstevel@tonic-gate STATIC I32
S_do_trans_complex(pTHX_ SV * sv)132*0Sstevel@tonic-gate S_do_trans_complex(pTHX_ SV *sv)
133*0Sstevel@tonic-gate {
134*0Sstevel@tonic-gate U8 *s;
135*0Sstevel@tonic-gate U8 *send;
136*0Sstevel@tonic-gate U8 *d;
137*0Sstevel@tonic-gate U8 *dstart;
138*0Sstevel@tonic-gate I32 isutf8;
139*0Sstevel@tonic-gate I32 matches = 0;
140*0Sstevel@tonic-gate I32 grows = PL_op->op_private & OPpTRANS_GROWS;
141*0Sstevel@tonic-gate I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
142*0Sstevel@tonic-gate I32 del = PL_op->op_private & OPpTRANS_DELETE;
143*0Sstevel@tonic-gate STRLEN len, rlen = 0;
144*0Sstevel@tonic-gate short *tbl;
145*0Sstevel@tonic-gate I32 ch;
146*0Sstevel@tonic-gate
147*0Sstevel@tonic-gate tbl = (short*)cPVOP->op_pv;
148*0Sstevel@tonic-gate if (!tbl)
149*0Sstevel@tonic-gate Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__);
150*0Sstevel@tonic-gate
151*0Sstevel@tonic-gate s = (U8*)SvPV(sv, len);
152*0Sstevel@tonic-gate isutf8 = SvUTF8(sv);
153*0Sstevel@tonic-gate send = s + len;
154*0Sstevel@tonic-gate
155*0Sstevel@tonic-gate if (!isutf8) {
156*0Sstevel@tonic-gate dstart = d = s;
157*0Sstevel@tonic-gate if (PL_op->op_private & OPpTRANS_SQUASH) {
158*0Sstevel@tonic-gate U8* p = send;
159*0Sstevel@tonic-gate while (s < send) {
160*0Sstevel@tonic-gate if ((ch = tbl[*s]) >= 0) {
161*0Sstevel@tonic-gate *d = (U8)ch;
162*0Sstevel@tonic-gate matches++;
163*0Sstevel@tonic-gate if (p != d - 1 || *p != *d)
164*0Sstevel@tonic-gate p = d++;
165*0Sstevel@tonic-gate }
166*0Sstevel@tonic-gate else if (ch == -1) /* -1 is unmapped character */
167*0Sstevel@tonic-gate *d++ = *s;
168*0Sstevel@tonic-gate else if (ch == -2) /* -2 is delete character */
169*0Sstevel@tonic-gate matches++;
170*0Sstevel@tonic-gate s++;
171*0Sstevel@tonic-gate }
172*0Sstevel@tonic-gate }
173*0Sstevel@tonic-gate else {
174*0Sstevel@tonic-gate while (s < send) {
175*0Sstevel@tonic-gate if ((ch = tbl[*s]) >= 0) {
176*0Sstevel@tonic-gate matches++;
177*0Sstevel@tonic-gate *d++ = (U8)ch;
178*0Sstevel@tonic-gate }
179*0Sstevel@tonic-gate else if (ch == -1) /* -1 is unmapped character */
180*0Sstevel@tonic-gate *d++ = *s;
181*0Sstevel@tonic-gate else if (ch == -2) /* -2 is delete character */
182*0Sstevel@tonic-gate matches++;
183*0Sstevel@tonic-gate s++;
184*0Sstevel@tonic-gate }
185*0Sstevel@tonic-gate }
186*0Sstevel@tonic-gate *d = '\0';
187*0Sstevel@tonic-gate SvCUR_set(sv, d - dstart);
188*0Sstevel@tonic-gate }
189*0Sstevel@tonic-gate else { /* isutf8 */
190*0Sstevel@tonic-gate if (grows)
191*0Sstevel@tonic-gate New(0, d, len*2+1, U8);
192*0Sstevel@tonic-gate else
193*0Sstevel@tonic-gate d = s;
194*0Sstevel@tonic-gate dstart = d;
195*0Sstevel@tonic-gate if (complement && !del)
196*0Sstevel@tonic-gate rlen = tbl[0x100];
197*0Sstevel@tonic-gate
198*0Sstevel@tonic-gate #ifdef MACOS_TRADITIONAL
199*0Sstevel@tonic-gate #define comp CoMP /* "comp" is a keyword in some compilers ... */
200*0Sstevel@tonic-gate #endif
201*0Sstevel@tonic-gate
202*0Sstevel@tonic-gate if (PL_op->op_private & OPpTRANS_SQUASH) {
203*0Sstevel@tonic-gate UV pch = 0xfeedface;
204*0Sstevel@tonic-gate while (s < send) {
205*0Sstevel@tonic-gate STRLEN len;
206*0Sstevel@tonic-gate UV comp = utf8_to_uvchr(s, &len);
207*0Sstevel@tonic-gate
208*0Sstevel@tonic-gate if (comp > 0xff) {
209*0Sstevel@tonic-gate if (!complement) {
210*0Sstevel@tonic-gate Copy(s, d, len, U8);
211*0Sstevel@tonic-gate d += len;
212*0Sstevel@tonic-gate }
213*0Sstevel@tonic-gate else {
214*0Sstevel@tonic-gate matches++;
215*0Sstevel@tonic-gate if (!del) {
216*0Sstevel@tonic-gate ch = (rlen == 0) ? comp :
217*0Sstevel@tonic-gate (comp - 0x100 < rlen) ?
218*0Sstevel@tonic-gate tbl[comp+1] : tbl[0x100+rlen];
219*0Sstevel@tonic-gate if ((UV)ch != pch) {
220*0Sstevel@tonic-gate d = uvchr_to_utf8(d, ch);
221*0Sstevel@tonic-gate pch = (UV)ch;
222*0Sstevel@tonic-gate }
223*0Sstevel@tonic-gate s += len;
224*0Sstevel@tonic-gate continue;
225*0Sstevel@tonic-gate }
226*0Sstevel@tonic-gate }
227*0Sstevel@tonic-gate }
228*0Sstevel@tonic-gate else if ((ch = tbl[comp]) >= 0) {
229*0Sstevel@tonic-gate matches++;
230*0Sstevel@tonic-gate if ((UV)ch != pch) {
231*0Sstevel@tonic-gate d = uvchr_to_utf8(d, ch);
232*0Sstevel@tonic-gate pch = (UV)ch;
233*0Sstevel@tonic-gate }
234*0Sstevel@tonic-gate s += len;
235*0Sstevel@tonic-gate continue;
236*0Sstevel@tonic-gate }
237*0Sstevel@tonic-gate else if (ch == -1) { /* -1 is unmapped character */
238*0Sstevel@tonic-gate Copy(s, d, len, U8);
239*0Sstevel@tonic-gate d += len;
240*0Sstevel@tonic-gate }
241*0Sstevel@tonic-gate else if (ch == -2) /* -2 is delete character */
242*0Sstevel@tonic-gate matches++;
243*0Sstevel@tonic-gate s += len;
244*0Sstevel@tonic-gate pch = 0xfeedface;
245*0Sstevel@tonic-gate }
246*0Sstevel@tonic-gate }
247*0Sstevel@tonic-gate else {
248*0Sstevel@tonic-gate while (s < send) {
249*0Sstevel@tonic-gate STRLEN len;
250*0Sstevel@tonic-gate UV comp = utf8_to_uvchr(s, &len);
251*0Sstevel@tonic-gate if (comp > 0xff) {
252*0Sstevel@tonic-gate if (!complement) {
253*0Sstevel@tonic-gate Move(s, d, len, U8);
254*0Sstevel@tonic-gate d += len;
255*0Sstevel@tonic-gate }
256*0Sstevel@tonic-gate else {
257*0Sstevel@tonic-gate matches++;
258*0Sstevel@tonic-gate if (!del) {
259*0Sstevel@tonic-gate if (comp - 0x100 < rlen)
260*0Sstevel@tonic-gate d = uvchr_to_utf8(d, tbl[comp+1]);
261*0Sstevel@tonic-gate else
262*0Sstevel@tonic-gate d = uvchr_to_utf8(d, tbl[0x100+rlen]);
263*0Sstevel@tonic-gate }
264*0Sstevel@tonic-gate }
265*0Sstevel@tonic-gate }
266*0Sstevel@tonic-gate else if ((ch = tbl[comp]) >= 0) {
267*0Sstevel@tonic-gate d = uvchr_to_utf8(d, ch);
268*0Sstevel@tonic-gate matches++;
269*0Sstevel@tonic-gate }
270*0Sstevel@tonic-gate else if (ch == -1) { /* -1 is unmapped character */
271*0Sstevel@tonic-gate Copy(s, d, len, U8);
272*0Sstevel@tonic-gate d += len;
273*0Sstevel@tonic-gate }
274*0Sstevel@tonic-gate else if (ch == -2) /* -2 is delete character */
275*0Sstevel@tonic-gate matches++;
276*0Sstevel@tonic-gate s += len;
277*0Sstevel@tonic-gate }
278*0Sstevel@tonic-gate }
279*0Sstevel@tonic-gate if (grows) {
280*0Sstevel@tonic-gate sv_setpvn(sv, (char*)dstart, d - dstart);
281*0Sstevel@tonic-gate Safefree(dstart);
282*0Sstevel@tonic-gate }
283*0Sstevel@tonic-gate else {
284*0Sstevel@tonic-gate *d = '\0';
285*0Sstevel@tonic-gate SvCUR_set(sv, d - dstart);
286*0Sstevel@tonic-gate }
287*0Sstevel@tonic-gate SvUTF8_on(sv);
288*0Sstevel@tonic-gate }
289*0Sstevel@tonic-gate SvSETMAGIC(sv);
290*0Sstevel@tonic-gate return matches;
291*0Sstevel@tonic-gate }
292*0Sstevel@tonic-gate
293*0Sstevel@tonic-gate STATIC I32
S_do_trans_simple_utf8(pTHX_ SV * sv)294*0Sstevel@tonic-gate S_do_trans_simple_utf8(pTHX_ SV *sv)
295*0Sstevel@tonic-gate {
296*0Sstevel@tonic-gate U8 *s;
297*0Sstevel@tonic-gate U8 *send;
298*0Sstevel@tonic-gate U8 *d;
299*0Sstevel@tonic-gate U8 *start;
300*0Sstevel@tonic-gate U8 *dstart, *dend;
301*0Sstevel@tonic-gate I32 matches = 0;
302*0Sstevel@tonic-gate I32 grows = PL_op->op_private & OPpTRANS_GROWS;
303*0Sstevel@tonic-gate STRLEN len;
304*0Sstevel@tonic-gate
305*0Sstevel@tonic-gate SV* rv = (SV*)cSVOP->op_sv;
306*0Sstevel@tonic-gate HV* hv = (HV*)SvRV(rv);
307*0Sstevel@tonic-gate SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
308*0Sstevel@tonic-gate UV none = svp ? SvUV(*svp) : 0x7fffffff;
309*0Sstevel@tonic-gate UV extra = none + 1;
310*0Sstevel@tonic-gate UV final = 0;
311*0Sstevel@tonic-gate UV uv;
312*0Sstevel@tonic-gate I32 isutf8;
313*0Sstevel@tonic-gate U8 hibit = 0;
314*0Sstevel@tonic-gate
315*0Sstevel@tonic-gate s = (U8*)SvPV(sv, len);
316*0Sstevel@tonic-gate isutf8 = SvUTF8(sv);
317*0Sstevel@tonic-gate if (!isutf8) {
318*0Sstevel@tonic-gate U8 *t = s, *e = s + len;
319*0Sstevel@tonic-gate while (t < e) {
320*0Sstevel@tonic-gate U8 ch = *t++;
321*0Sstevel@tonic-gate if ((hibit = !NATIVE_IS_INVARIANT(ch)))
322*0Sstevel@tonic-gate break;
323*0Sstevel@tonic-gate }
324*0Sstevel@tonic-gate if (hibit)
325*0Sstevel@tonic-gate s = bytes_to_utf8(s, &len);
326*0Sstevel@tonic-gate }
327*0Sstevel@tonic-gate send = s + len;
328*0Sstevel@tonic-gate start = s;
329*0Sstevel@tonic-gate
330*0Sstevel@tonic-gate svp = hv_fetch(hv, "FINAL", 5, FALSE);
331*0Sstevel@tonic-gate if (svp)
332*0Sstevel@tonic-gate final = SvUV(*svp);
333*0Sstevel@tonic-gate
334*0Sstevel@tonic-gate if (grows) {
335*0Sstevel@tonic-gate /* d needs to be bigger than s, in case e.g. upgrading is required */
336*0Sstevel@tonic-gate New(0, d, len*3+UTF8_MAXLEN, U8);
337*0Sstevel@tonic-gate dend = d + len * 3;
338*0Sstevel@tonic-gate dstart = d;
339*0Sstevel@tonic-gate }
340*0Sstevel@tonic-gate else {
341*0Sstevel@tonic-gate dstart = d = s;
342*0Sstevel@tonic-gate dend = d + len;
343*0Sstevel@tonic-gate }
344*0Sstevel@tonic-gate
345*0Sstevel@tonic-gate while (s < send) {
346*0Sstevel@tonic-gate if ((uv = swash_fetch(rv, s, TRUE)) < none) {
347*0Sstevel@tonic-gate s += UTF8SKIP(s);
348*0Sstevel@tonic-gate matches++;
349*0Sstevel@tonic-gate d = uvuni_to_utf8(d, uv);
350*0Sstevel@tonic-gate }
351*0Sstevel@tonic-gate else if (uv == none) {
352*0Sstevel@tonic-gate int i = UTF8SKIP(s);
353*0Sstevel@tonic-gate Move(s, d, i, U8);
354*0Sstevel@tonic-gate d += i;
355*0Sstevel@tonic-gate s += i;
356*0Sstevel@tonic-gate }
357*0Sstevel@tonic-gate else if (uv == extra) {
358*0Sstevel@tonic-gate int i = UTF8SKIP(s);
359*0Sstevel@tonic-gate s += i;
360*0Sstevel@tonic-gate matches++;
361*0Sstevel@tonic-gate d = uvuni_to_utf8(d, final);
362*0Sstevel@tonic-gate }
363*0Sstevel@tonic-gate else
364*0Sstevel@tonic-gate s += UTF8SKIP(s);
365*0Sstevel@tonic-gate
366*0Sstevel@tonic-gate if (d > dend) {
367*0Sstevel@tonic-gate STRLEN clen = d - dstart;
368*0Sstevel@tonic-gate STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
369*0Sstevel@tonic-gate if (!grows)
370*0Sstevel@tonic-gate Perl_croak(aTHX_ "panic: do_trans_simple_utf8 line %d",__LINE__);
371*0Sstevel@tonic-gate Renew(dstart, nlen+UTF8_MAXLEN, U8);
372*0Sstevel@tonic-gate d = dstart + clen;
373*0Sstevel@tonic-gate dend = dstart + nlen;
374*0Sstevel@tonic-gate }
375*0Sstevel@tonic-gate }
376*0Sstevel@tonic-gate if (grows || hibit) {
377*0Sstevel@tonic-gate sv_setpvn(sv, (char*)dstart, d - dstart);
378*0Sstevel@tonic-gate Safefree(dstart);
379*0Sstevel@tonic-gate if (grows && hibit)
380*0Sstevel@tonic-gate Safefree(start);
381*0Sstevel@tonic-gate }
382*0Sstevel@tonic-gate else {
383*0Sstevel@tonic-gate *d = '\0';
384*0Sstevel@tonic-gate SvCUR_set(sv, d - dstart);
385*0Sstevel@tonic-gate }
386*0Sstevel@tonic-gate SvSETMAGIC(sv);
387*0Sstevel@tonic-gate SvUTF8_on(sv);
388*0Sstevel@tonic-gate
389*0Sstevel@tonic-gate return matches;
390*0Sstevel@tonic-gate }
391*0Sstevel@tonic-gate
392*0Sstevel@tonic-gate STATIC I32
S_do_trans_count_utf8(pTHX_ SV * sv)393*0Sstevel@tonic-gate S_do_trans_count_utf8(pTHX_ SV *sv)
394*0Sstevel@tonic-gate {
395*0Sstevel@tonic-gate U8 *s;
396*0Sstevel@tonic-gate U8 *start = 0, *send;
397*0Sstevel@tonic-gate I32 matches = 0;
398*0Sstevel@tonic-gate STRLEN len;
399*0Sstevel@tonic-gate
400*0Sstevel@tonic-gate SV* rv = (SV*)cSVOP->op_sv;
401*0Sstevel@tonic-gate HV* hv = (HV*)SvRV(rv);
402*0Sstevel@tonic-gate SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
403*0Sstevel@tonic-gate UV none = svp ? SvUV(*svp) : 0x7fffffff;
404*0Sstevel@tonic-gate UV extra = none + 1;
405*0Sstevel@tonic-gate UV uv;
406*0Sstevel@tonic-gate U8 hibit = 0;
407*0Sstevel@tonic-gate
408*0Sstevel@tonic-gate s = (U8*)SvPV(sv, len);
409*0Sstevel@tonic-gate if (!SvUTF8(sv)) {
410*0Sstevel@tonic-gate U8 *t = s, *e = s + len;
411*0Sstevel@tonic-gate while (t < e) {
412*0Sstevel@tonic-gate U8 ch = *t++;
413*0Sstevel@tonic-gate if ((hibit = !NATIVE_IS_INVARIANT(ch)))
414*0Sstevel@tonic-gate break;
415*0Sstevel@tonic-gate }
416*0Sstevel@tonic-gate if (hibit)
417*0Sstevel@tonic-gate start = s = bytes_to_utf8(s, &len);
418*0Sstevel@tonic-gate }
419*0Sstevel@tonic-gate send = s + len;
420*0Sstevel@tonic-gate
421*0Sstevel@tonic-gate while (s < send) {
422*0Sstevel@tonic-gate if ((uv = swash_fetch(rv, s, TRUE)) < none || uv == extra)
423*0Sstevel@tonic-gate matches++;
424*0Sstevel@tonic-gate s += UTF8SKIP(s);
425*0Sstevel@tonic-gate }
426*0Sstevel@tonic-gate if (hibit)
427*0Sstevel@tonic-gate Safefree(start);
428*0Sstevel@tonic-gate
429*0Sstevel@tonic-gate return matches;
430*0Sstevel@tonic-gate }
431*0Sstevel@tonic-gate
432*0Sstevel@tonic-gate STATIC I32
S_do_trans_complex_utf8(pTHX_ SV * sv)433*0Sstevel@tonic-gate S_do_trans_complex_utf8(pTHX_ SV *sv)
434*0Sstevel@tonic-gate {
435*0Sstevel@tonic-gate U8 *s;
436*0Sstevel@tonic-gate U8 *start, *send;
437*0Sstevel@tonic-gate U8 *d;
438*0Sstevel@tonic-gate I32 matches = 0;
439*0Sstevel@tonic-gate I32 squash = PL_op->op_private & OPpTRANS_SQUASH;
440*0Sstevel@tonic-gate I32 del = PL_op->op_private & OPpTRANS_DELETE;
441*0Sstevel@tonic-gate I32 grows = PL_op->op_private & OPpTRANS_GROWS;
442*0Sstevel@tonic-gate SV* rv = (SV*)cSVOP->op_sv;
443*0Sstevel@tonic-gate HV* hv = (HV*)SvRV(rv);
444*0Sstevel@tonic-gate SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
445*0Sstevel@tonic-gate UV none = svp ? SvUV(*svp) : 0x7fffffff;
446*0Sstevel@tonic-gate UV extra = none + 1;
447*0Sstevel@tonic-gate UV final = 0;
448*0Sstevel@tonic-gate bool havefinal = FALSE;
449*0Sstevel@tonic-gate UV uv;
450*0Sstevel@tonic-gate STRLEN len;
451*0Sstevel@tonic-gate U8 *dstart, *dend;
452*0Sstevel@tonic-gate I32 isutf8;
453*0Sstevel@tonic-gate U8 hibit = 0;
454*0Sstevel@tonic-gate
455*0Sstevel@tonic-gate s = (U8*)SvPV(sv, len);
456*0Sstevel@tonic-gate isutf8 = SvUTF8(sv);
457*0Sstevel@tonic-gate if (!isutf8) {
458*0Sstevel@tonic-gate U8 *t = s, *e = s + len;
459*0Sstevel@tonic-gate while (t < e) {
460*0Sstevel@tonic-gate U8 ch = *t++;
461*0Sstevel@tonic-gate if ((hibit = !NATIVE_IS_INVARIANT(ch)))
462*0Sstevel@tonic-gate break;
463*0Sstevel@tonic-gate }
464*0Sstevel@tonic-gate if (hibit)
465*0Sstevel@tonic-gate s = bytes_to_utf8(s, &len);
466*0Sstevel@tonic-gate }
467*0Sstevel@tonic-gate send = s + len;
468*0Sstevel@tonic-gate start = s;
469*0Sstevel@tonic-gate
470*0Sstevel@tonic-gate svp = hv_fetch(hv, "FINAL", 5, FALSE);
471*0Sstevel@tonic-gate if (svp) {
472*0Sstevel@tonic-gate final = SvUV(*svp);
473*0Sstevel@tonic-gate havefinal = TRUE;
474*0Sstevel@tonic-gate }
475*0Sstevel@tonic-gate
476*0Sstevel@tonic-gate if (grows) {
477*0Sstevel@tonic-gate /* d needs to be bigger than s, in case e.g. upgrading is required */
478*0Sstevel@tonic-gate New(0, d, len*3+UTF8_MAXLEN, U8);
479*0Sstevel@tonic-gate dend = d + len * 3;
480*0Sstevel@tonic-gate dstart = d;
481*0Sstevel@tonic-gate }
482*0Sstevel@tonic-gate else {
483*0Sstevel@tonic-gate dstart = d = s;
484*0Sstevel@tonic-gate dend = d + len;
485*0Sstevel@tonic-gate }
486*0Sstevel@tonic-gate
487*0Sstevel@tonic-gate if (squash) {
488*0Sstevel@tonic-gate UV puv = 0xfeedface;
489*0Sstevel@tonic-gate while (s < send) {
490*0Sstevel@tonic-gate uv = swash_fetch(rv, s, TRUE);
491*0Sstevel@tonic-gate
492*0Sstevel@tonic-gate if (d > dend) {
493*0Sstevel@tonic-gate STRLEN clen = d - dstart;
494*0Sstevel@tonic-gate STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
495*0Sstevel@tonic-gate if (!grows)
496*0Sstevel@tonic-gate Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
497*0Sstevel@tonic-gate Renew(dstart, nlen+UTF8_MAXLEN, U8);
498*0Sstevel@tonic-gate d = dstart + clen;
499*0Sstevel@tonic-gate dend = dstart + nlen;
500*0Sstevel@tonic-gate }
501*0Sstevel@tonic-gate if (uv < none) {
502*0Sstevel@tonic-gate matches++;
503*0Sstevel@tonic-gate s += UTF8SKIP(s);
504*0Sstevel@tonic-gate if (uv != puv) {
505*0Sstevel@tonic-gate d = uvuni_to_utf8(d, uv);
506*0Sstevel@tonic-gate puv = uv;
507*0Sstevel@tonic-gate }
508*0Sstevel@tonic-gate continue;
509*0Sstevel@tonic-gate }
510*0Sstevel@tonic-gate else if (uv == none) { /* "none" is unmapped character */
511*0Sstevel@tonic-gate int i = UTF8SKIP(s);
512*0Sstevel@tonic-gate Move(s, d, i, U8);
513*0Sstevel@tonic-gate d += i;
514*0Sstevel@tonic-gate s += i;
515*0Sstevel@tonic-gate puv = 0xfeedface;
516*0Sstevel@tonic-gate continue;
517*0Sstevel@tonic-gate }
518*0Sstevel@tonic-gate else if (uv == extra && !del) {
519*0Sstevel@tonic-gate matches++;
520*0Sstevel@tonic-gate if (havefinal) {
521*0Sstevel@tonic-gate s += UTF8SKIP(s);
522*0Sstevel@tonic-gate if (puv != final) {
523*0Sstevel@tonic-gate d = uvuni_to_utf8(d, final);
524*0Sstevel@tonic-gate puv = final;
525*0Sstevel@tonic-gate }
526*0Sstevel@tonic-gate }
527*0Sstevel@tonic-gate else {
528*0Sstevel@tonic-gate STRLEN len;
529*0Sstevel@tonic-gate uv = utf8_to_uvuni(s, &len);
530*0Sstevel@tonic-gate if (uv != puv) {
531*0Sstevel@tonic-gate Move(s, d, len, U8);
532*0Sstevel@tonic-gate d += len;
533*0Sstevel@tonic-gate puv = uv;
534*0Sstevel@tonic-gate }
535*0Sstevel@tonic-gate s += len;
536*0Sstevel@tonic-gate }
537*0Sstevel@tonic-gate continue;
538*0Sstevel@tonic-gate }
539*0Sstevel@tonic-gate matches++; /* "none+1" is delete character */
540*0Sstevel@tonic-gate s += UTF8SKIP(s);
541*0Sstevel@tonic-gate }
542*0Sstevel@tonic-gate }
543*0Sstevel@tonic-gate else {
544*0Sstevel@tonic-gate while (s < send) {
545*0Sstevel@tonic-gate uv = swash_fetch(rv, s, TRUE);
546*0Sstevel@tonic-gate if (d > dend) {
547*0Sstevel@tonic-gate STRLEN clen = d - dstart;
548*0Sstevel@tonic-gate STRLEN nlen = dend - dstart + len + UTF8_MAXLEN;
549*0Sstevel@tonic-gate if (!grows)
550*0Sstevel@tonic-gate Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
551*0Sstevel@tonic-gate Renew(dstart, nlen+UTF8_MAXLEN, U8);
552*0Sstevel@tonic-gate d = dstart + clen;
553*0Sstevel@tonic-gate dend = dstart + nlen;
554*0Sstevel@tonic-gate }
555*0Sstevel@tonic-gate if (uv < none) {
556*0Sstevel@tonic-gate matches++;
557*0Sstevel@tonic-gate s += UTF8SKIP(s);
558*0Sstevel@tonic-gate d = uvuni_to_utf8(d, uv);
559*0Sstevel@tonic-gate continue;
560*0Sstevel@tonic-gate }
561*0Sstevel@tonic-gate else if (uv == none) { /* "none" is unmapped character */
562*0Sstevel@tonic-gate int i = UTF8SKIP(s);
563*0Sstevel@tonic-gate Move(s, d, i, U8);
564*0Sstevel@tonic-gate d += i;
565*0Sstevel@tonic-gate s += i;
566*0Sstevel@tonic-gate continue;
567*0Sstevel@tonic-gate }
568*0Sstevel@tonic-gate else if (uv == extra && !del) {
569*0Sstevel@tonic-gate matches++;
570*0Sstevel@tonic-gate s += UTF8SKIP(s);
571*0Sstevel@tonic-gate d = uvuni_to_utf8(d, final);
572*0Sstevel@tonic-gate continue;
573*0Sstevel@tonic-gate }
574*0Sstevel@tonic-gate matches++; /* "none+1" is delete character */
575*0Sstevel@tonic-gate s += UTF8SKIP(s);
576*0Sstevel@tonic-gate }
577*0Sstevel@tonic-gate }
578*0Sstevel@tonic-gate if (grows || hibit) {
579*0Sstevel@tonic-gate sv_setpvn(sv, (char*)dstart, d - dstart);
580*0Sstevel@tonic-gate Safefree(dstart);
581*0Sstevel@tonic-gate if (grows && hibit)
582*0Sstevel@tonic-gate Safefree(start);
583*0Sstevel@tonic-gate }
584*0Sstevel@tonic-gate else {
585*0Sstevel@tonic-gate *d = '\0';
586*0Sstevel@tonic-gate SvCUR_set(sv, d - dstart);
587*0Sstevel@tonic-gate }
588*0Sstevel@tonic-gate SvUTF8_on(sv);
589*0Sstevel@tonic-gate SvSETMAGIC(sv);
590*0Sstevel@tonic-gate
591*0Sstevel@tonic-gate return matches;
592*0Sstevel@tonic-gate }
593*0Sstevel@tonic-gate
594*0Sstevel@tonic-gate I32
Perl_do_trans(pTHX_ SV * sv)595*0Sstevel@tonic-gate Perl_do_trans(pTHX_ SV *sv)
596*0Sstevel@tonic-gate {
597*0Sstevel@tonic-gate STRLEN len;
598*0Sstevel@tonic-gate I32 hasutf = (PL_op->op_private &
599*0Sstevel@tonic-gate (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF));
600*0Sstevel@tonic-gate
601*0Sstevel@tonic-gate if (SvREADONLY(sv)) {
602*0Sstevel@tonic-gate if (SvFAKE(sv))
603*0Sstevel@tonic-gate sv_force_normal(sv);
604*0Sstevel@tonic-gate if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL))
605*0Sstevel@tonic-gate Perl_croak(aTHX_ PL_no_modify);
606*0Sstevel@tonic-gate }
607*0Sstevel@tonic-gate (void)SvPV(sv, len);
608*0Sstevel@tonic-gate if (!len)
609*0Sstevel@tonic-gate return 0;
610*0Sstevel@tonic-gate if (!(PL_op->op_private & OPpTRANS_IDENTICAL)) {
611*0Sstevel@tonic-gate if (!SvPOKp(sv))
612*0Sstevel@tonic-gate (void)SvPV_force(sv, len);
613*0Sstevel@tonic-gate (void)SvPOK_only_UTF8(sv);
614*0Sstevel@tonic-gate }
615*0Sstevel@tonic-gate
616*0Sstevel@tonic-gate DEBUG_t( Perl_deb(aTHX_ "2.TBL\n"));
617*0Sstevel@tonic-gate
618*0Sstevel@tonic-gate switch (PL_op->op_private & ~hasutf & (
619*0Sstevel@tonic-gate OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL|
620*0Sstevel@tonic-gate OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) {
621*0Sstevel@tonic-gate case 0:
622*0Sstevel@tonic-gate if (hasutf)
623*0Sstevel@tonic-gate return do_trans_simple_utf8(sv);
624*0Sstevel@tonic-gate else
625*0Sstevel@tonic-gate return do_trans_simple(sv);
626*0Sstevel@tonic-gate
627*0Sstevel@tonic-gate case OPpTRANS_IDENTICAL:
628*0Sstevel@tonic-gate case OPpTRANS_IDENTICAL|OPpTRANS_COMPLEMENT:
629*0Sstevel@tonic-gate if (hasutf)
630*0Sstevel@tonic-gate return do_trans_count_utf8(sv);
631*0Sstevel@tonic-gate else
632*0Sstevel@tonic-gate return do_trans_count(sv);
633*0Sstevel@tonic-gate
634*0Sstevel@tonic-gate default:
635*0Sstevel@tonic-gate if (hasutf)
636*0Sstevel@tonic-gate return do_trans_complex_utf8(sv);
637*0Sstevel@tonic-gate else
638*0Sstevel@tonic-gate return do_trans_complex(sv);
639*0Sstevel@tonic-gate }
640*0Sstevel@tonic-gate }
641*0Sstevel@tonic-gate
642*0Sstevel@tonic-gate void
Perl_do_join(pTHX_ register SV * sv,SV * del,register SV ** mark,register SV ** sp)643*0Sstevel@tonic-gate Perl_do_join(pTHX_ register SV *sv, SV *del, register SV **mark, register SV **sp)
644*0Sstevel@tonic-gate {
645*0Sstevel@tonic-gate SV **oldmark = mark;
646*0Sstevel@tonic-gate register I32 items = sp - mark;
647*0Sstevel@tonic-gate register STRLEN len;
648*0Sstevel@tonic-gate STRLEN delimlen;
649*0Sstevel@tonic-gate STRLEN tmplen;
650*0Sstevel@tonic-gate
651*0Sstevel@tonic-gate (void) SvPV(del, delimlen); /* stringify and get the delimlen */
652*0Sstevel@tonic-gate /* SvCUR assumes it's SvPOK() and woe betide you if it's not. */
653*0Sstevel@tonic-gate
654*0Sstevel@tonic-gate mark++;
655*0Sstevel@tonic-gate len = (items > 0 ? (delimlen * (items - 1) ) : 0);
656*0Sstevel@tonic-gate (void)SvUPGRADE(sv, SVt_PV);
657*0Sstevel@tonic-gate if (SvLEN(sv) < len + items) { /* current length is way too short */
658*0Sstevel@tonic-gate while (items-- > 0) {
659*0Sstevel@tonic-gate if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) {
660*0Sstevel@tonic-gate SvPV(*mark, tmplen);
661*0Sstevel@tonic-gate len += tmplen;
662*0Sstevel@tonic-gate }
663*0Sstevel@tonic-gate mark++;
664*0Sstevel@tonic-gate }
665*0Sstevel@tonic-gate SvGROW(sv, len + 1); /* so try to pre-extend */
666*0Sstevel@tonic-gate
667*0Sstevel@tonic-gate mark = oldmark;
668*0Sstevel@tonic-gate items = sp - mark;
669*0Sstevel@tonic-gate ++mark;
670*0Sstevel@tonic-gate }
671*0Sstevel@tonic-gate
672*0Sstevel@tonic-gate sv_setpvn(sv, "", 0);
673*0Sstevel@tonic-gate /* sv_setpv retains old UTF8ness [perl #24846] */
674*0Sstevel@tonic-gate if (SvUTF8(sv))
675*0Sstevel@tonic-gate SvUTF8_off(sv);
676*0Sstevel@tonic-gate
677*0Sstevel@tonic-gate if (PL_tainting && SvMAGICAL(sv))
678*0Sstevel@tonic-gate SvTAINTED_off(sv);
679*0Sstevel@tonic-gate
680*0Sstevel@tonic-gate if (items-- > 0) {
681*0Sstevel@tonic-gate if (*mark)
682*0Sstevel@tonic-gate sv_catsv(sv, *mark);
683*0Sstevel@tonic-gate mark++;
684*0Sstevel@tonic-gate }
685*0Sstevel@tonic-gate
686*0Sstevel@tonic-gate if (delimlen) {
687*0Sstevel@tonic-gate for (; items > 0; items--,mark++) {
688*0Sstevel@tonic-gate sv_catsv(sv,del);
689*0Sstevel@tonic-gate sv_catsv(sv,*mark);
690*0Sstevel@tonic-gate }
691*0Sstevel@tonic-gate }
692*0Sstevel@tonic-gate else {
693*0Sstevel@tonic-gate for (; items > 0; items--,mark++)
694*0Sstevel@tonic-gate sv_catsv(sv,*mark);
695*0Sstevel@tonic-gate }
696*0Sstevel@tonic-gate SvSETMAGIC(sv);
697*0Sstevel@tonic-gate }
698*0Sstevel@tonic-gate
699*0Sstevel@tonic-gate void
Perl_do_sprintf(pTHX_ SV * sv,I32 len,SV ** sarg)700*0Sstevel@tonic-gate Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg)
701*0Sstevel@tonic-gate {
702*0Sstevel@tonic-gate STRLEN patlen;
703*0Sstevel@tonic-gate char *pat = SvPV(*sarg, patlen);
704*0Sstevel@tonic-gate bool do_taint = FALSE;
705*0Sstevel@tonic-gate
706*0Sstevel@tonic-gate SvUTF8_off(sv);
707*0Sstevel@tonic-gate if (DO_UTF8(*sarg))
708*0Sstevel@tonic-gate SvUTF8_on(sv);
709*0Sstevel@tonic-gate sv_vsetpvfn(sv, pat, patlen, Null(va_list*), sarg + 1, len - 1, &do_taint);
710*0Sstevel@tonic-gate SvSETMAGIC(sv);
711*0Sstevel@tonic-gate if (do_taint)
712*0Sstevel@tonic-gate SvTAINTED_on(sv);
713*0Sstevel@tonic-gate }
714*0Sstevel@tonic-gate
715*0Sstevel@tonic-gate /* currently converts input to bytes if possible, but doesn't sweat failure */
716*0Sstevel@tonic-gate UV
Perl_do_vecget(pTHX_ SV * sv,I32 offset,I32 size)717*0Sstevel@tonic-gate Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
718*0Sstevel@tonic-gate {
719*0Sstevel@tonic-gate STRLEN srclen, len;
720*0Sstevel@tonic-gate unsigned char *s = (unsigned char *) SvPV(sv, srclen);
721*0Sstevel@tonic-gate UV retnum = 0;
722*0Sstevel@tonic-gate
723*0Sstevel@tonic-gate if (offset < 0)
724*0Sstevel@tonic-gate return retnum;
725*0Sstevel@tonic-gate if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
726*0Sstevel@tonic-gate Perl_croak(aTHX_ "Illegal number of bits in vec");
727*0Sstevel@tonic-gate
728*0Sstevel@tonic-gate if (SvUTF8(sv))
729*0Sstevel@tonic-gate (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
730*0Sstevel@tonic-gate
731*0Sstevel@tonic-gate offset *= size; /* turn into bit offset */
732*0Sstevel@tonic-gate len = (offset + size + 7) / 8; /* required number of bytes */
733*0Sstevel@tonic-gate if (len > srclen) {
734*0Sstevel@tonic-gate if (size <= 8)
735*0Sstevel@tonic-gate retnum = 0;
736*0Sstevel@tonic-gate else {
737*0Sstevel@tonic-gate offset >>= 3; /* turn into byte offset */
738*0Sstevel@tonic-gate if (size == 16) {
739*0Sstevel@tonic-gate if ((STRLEN)offset >= srclen)
740*0Sstevel@tonic-gate retnum = 0;
741*0Sstevel@tonic-gate else
742*0Sstevel@tonic-gate retnum = (UV) s[offset] << 8;
743*0Sstevel@tonic-gate }
744*0Sstevel@tonic-gate else if (size == 32) {
745*0Sstevel@tonic-gate if ((STRLEN)offset >= srclen)
746*0Sstevel@tonic-gate retnum = 0;
747*0Sstevel@tonic-gate else if ((STRLEN)(offset + 1) >= srclen)
748*0Sstevel@tonic-gate retnum =
749*0Sstevel@tonic-gate ((UV) s[offset ] << 24);
750*0Sstevel@tonic-gate else if ((STRLEN)(offset + 2) >= srclen)
751*0Sstevel@tonic-gate retnum =
752*0Sstevel@tonic-gate ((UV) s[offset ] << 24) +
753*0Sstevel@tonic-gate ((UV) s[offset + 1] << 16);
754*0Sstevel@tonic-gate else
755*0Sstevel@tonic-gate retnum =
756*0Sstevel@tonic-gate ((UV) s[offset ] << 24) +
757*0Sstevel@tonic-gate ((UV) s[offset + 1] << 16) +
758*0Sstevel@tonic-gate ( s[offset + 2] << 8);
759*0Sstevel@tonic-gate }
760*0Sstevel@tonic-gate #ifdef UV_IS_QUAD
761*0Sstevel@tonic-gate else if (size == 64) {
762*0Sstevel@tonic-gate if (ckWARN(WARN_PORTABLE))
763*0Sstevel@tonic-gate Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
764*0Sstevel@tonic-gate "Bit vector size > 32 non-portable");
765*0Sstevel@tonic-gate if (offset >= srclen)
766*0Sstevel@tonic-gate retnum = 0;
767*0Sstevel@tonic-gate else if (offset + 1 >= srclen)
768*0Sstevel@tonic-gate retnum =
769*0Sstevel@tonic-gate (UV) s[offset ] << 56;
770*0Sstevel@tonic-gate else if (offset + 2 >= srclen)
771*0Sstevel@tonic-gate retnum =
772*0Sstevel@tonic-gate ((UV) s[offset ] << 56) +
773*0Sstevel@tonic-gate ((UV) s[offset + 1] << 48);
774*0Sstevel@tonic-gate else if (offset + 3 >= srclen)
775*0Sstevel@tonic-gate retnum =
776*0Sstevel@tonic-gate ((UV) s[offset ] << 56) +
777*0Sstevel@tonic-gate ((UV) s[offset + 1] << 48) +
778*0Sstevel@tonic-gate ((UV) s[offset + 2] << 40);
779*0Sstevel@tonic-gate else if (offset + 4 >= srclen)
780*0Sstevel@tonic-gate retnum =
781*0Sstevel@tonic-gate ((UV) s[offset ] << 56) +
782*0Sstevel@tonic-gate ((UV) s[offset + 1] << 48) +
783*0Sstevel@tonic-gate ((UV) s[offset + 2] << 40) +
784*0Sstevel@tonic-gate ((UV) s[offset + 3] << 32);
785*0Sstevel@tonic-gate else if (offset + 5 >= srclen)
786*0Sstevel@tonic-gate retnum =
787*0Sstevel@tonic-gate ((UV) s[offset ] << 56) +
788*0Sstevel@tonic-gate ((UV) s[offset + 1] << 48) +
789*0Sstevel@tonic-gate ((UV) s[offset + 2] << 40) +
790*0Sstevel@tonic-gate ((UV) s[offset + 3] << 32) +
791*0Sstevel@tonic-gate ( s[offset + 4] << 24);
792*0Sstevel@tonic-gate else if (offset + 6 >= srclen)
793*0Sstevel@tonic-gate retnum =
794*0Sstevel@tonic-gate ((UV) s[offset ] << 56) +
795*0Sstevel@tonic-gate ((UV) s[offset + 1] << 48) +
796*0Sstevel@tonic-gate ((UV) s[offset + 2] << 40) +
797*0Sstevel@tonic-gate ((UV) s[offset + 3] << 32) +
798*0Sstevel@tonic-gate ((UV) s[offset + 4] << 24) +
799*0Sstevel@tonic-gate ((UV) s[offset + 5] << 16);
800*0Sstevel@tonic-gate else
801*0Sstevel@tonic-gate retnum =
802*0Sstevel@tonic-gate ((UV) s[offset ] << 56) +
803*0Sstevel@tonic-gate ((UV) s[offset + 1] << 48) +
804*0Sstevel@tonic-gate ((UV) s[offset + 2] << 40) +
805*0Sstevel@tonic-gate ((UV) s[offset + 3] << 32) +
806*0Sstevel@tonic-gate ((UV) s[offset + 4] << 24) +
807*0Sstevel@tonic-gate ((UV) s[offset + 5] << 16) +
808*0Sstevel@tonic-gate ( s[offset + 6] << 8);
809*0Sstevel@tonic-gate }
810*0Sstevel@tonic-gate #endif
811*0Sstevel@tonic-gate }
812*0Sstevel@tonic-gate }
813*0Sstevel@tonic-gate else if (size < 8)
814*0Sstevel@tonic-gate retnum = (s[offset >> 3] >> (offset & 7)) & ((1 << size) - 1);
815*0Sstevel@tonic-gate else {
816*0Sstevel@tonic-gate offset >>= 3; /* turn into byte offset */
817*0Sstevel@tonic-gate if (size == 8)
818*0Sstevel@tonic-gate retnum = s[offset];
819*0Sstevel@tonic-gate else if (size == 16)
820*0Sstevel@tonic-gate retnum =
821*0Sstevel@tonic-gate ((UV) s[offset] << 8) +
822*0Sstevel@tonic-gate s[offset + 1];
823*0Sstevel@tonic-gate else if (size == 32)
824*0Sstevel@tonic-gate retnum =
825*0Sstevel@tonic-gate ((UV) s[offset ] << 24) +
826*0Sstevel@tonic-gate ((UV) s[offset + 1] << 16) +
827*0Sstevel@tonic-gate ( s[offset + 2] << 8) +
828*0Sstevel@tonic-gate s[offset + 3];
829*0Sstevel@tonic-gate #ifdef UV_IS_QUAD
830*0Sstevel@tonic-gate else if (size == 64) {
831*0Sstevel@tonic-gate if (ckWARN(WARN_PORTABLE))
832*0Sstevel@tonic-gate Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
833*0Sstevel@tonic-gate "Bit vector size > 32 non-portable");
834*0Sstevel@tonic-gate retnum =
835*0Sstevel@tonic-gate ((UV) s[offset ] << 56) +
836*0Sstevel@tonic-gate ((UV) s[offset + 1] << 48) +
837*0Sstevel@tonic-gate ((UV) s[offset + 2] << 40) +
838*0Sstevel@tonic-gate ((UV) s[offset + 3] << 32) +
839*0Sstevel@tonic-gate ((UV) s[offset + 4] << 24) +
840*0Sstevel@tonic-gate ((UV) s[offset + 5] << 16) +
841*0Sstevel@tonic-gate ( s[offset + 6] << 8) +
842*0Sstevel@tonic-gate s[offset + 7];
843*0Sstevel@tonic-gate }
844*0Sstevel@tonic-gate #endif
845*0Sstevel@tonic-gate }
846*0Sstevel@tonic-gate
847*0Sstevel@tonic-gate return retnum;
848*0Sstevel@tonic-gate }
849*0Sstevel@tonic-gate
850*0Sstevel@tonic-gate /* currently converts input to bytes if possible but doesn't sweat failures,
851*0Sstevel@tonic-gate * although it does ensure that the string it clobbers is not marked as
852*0Sstevel@tonic-gate * utf8-valid any more
853*0Sstevel@tonic-gate */
854*0Sstevel@tonic-gate void
Perl_do_vecset(pTHX_ SV * sv)855*0Sstevel@tonic-gate Perl_do_vecset(pTHX_ SV *sv)
856*0Sstevel@tonic-gate {
857*0Sstevel@tonic-gate SV *targ = LvTARG(sv);
858*0Sstevel@tonic-gate register I32 offset;
859*0Sstevel@tonic-gate register I32 size;
860*0Sstevel@tonic-gate register unsigned char *s;
861*0Sstevel@tonic-gate register UV lval;
862*0Sstevel@tonic-gate I32 mask;
863*0Sstevel@tonic-gate STRLEN targlen;
864*0Sstevel@tonic-gate STRLEN len;
865*0Sstevel@tonic-gate
866*0Sstevel@tonic-gate if (!targ)
867*0Sstevel@tonic-gate return;
868*0Sstevel@tonic-gate s = (unsigned char*)SvPV_force(targ, targlen);
869*0Sstevel@tonic-gate if (SvUTF8(targ)) {
870*0Sstevel@tonic-gate /* This is handled by the SvPOK_only below...
871*0Sstevel@tonic-gate if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE))
872*0Sstevel@tonic-gate SvUTF8_off(targ);
873*0Sstevel@tonic-gate */
874*0Sstevel@tonic-gate (void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE);
875*0Sstevel@tonic-gate }
876*0Sstevel@tonic-gate
877*0Sstevel@tonic-gate (void)SvPOK_only(targ);
878*0Sstevel@tonic-gate lval = SvUV(sv);
879*0Sstevel@tonic-gate offset = LvTARGOFF(sv);
880*0Sstevel@tonic-gate if (offset < 0)
881*0Sstevel@tonic-gate Perl_croak(aTHX_ "Negative offset to vec in lvalue context");
882*0Sstevel@tonic-gate size = LvTARGLEN(sv);
883*0Sstevel@tonic-gate if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
884*0Sstevel@tonic-gate Perl_croak(aTHX_ "Illegal number of bits in vec");
885*0Sstevel@tonic-gate
886*0Sstevel@tonic-gate offset *= size; /* turn into bit offset */
887*0Sstevel@tonic-gate len = (offset + size + 7) / 8; /* required number of bytes */
888*0Sstevel@tonic-gate if (len > targlen) {
889*0Sstevel@tonic-gate s = (unsigned char*)SvGROW(targ, len + 1);
890*0Sstevel@tonic-gate (void)memzero((char *)(s + targlen), len - targlen + 1);
891*0Sstevel@tonic-gate SvCUR_set(targ, len);
892*0Sstevel@tonic-gate }
893*0Sstevel@tonic-gate
894*0Sstevel@tonic-gate if (size < 8) {
895*0Sstevel@tonic-gate mask = (1 << size) - 1;
896*0Sstevel@tonic-gate size = offset & 7;
897*0Sstevel@tonic-gate lval &= mask;
898*0Sstevel@tonic-gate offset >>= 3; /* turn into byte offset */
899*0Sstevel@tonic-gate s[offset] &= ~(mask << size);
900*0Sstevel@tonic-gate s[offset] |= lval << size;
901*0Sstevel@tonic-gate }
902*0Sstevel@tonic-gate else {
903*0Sstevel@tonic-gate offset >>= 3; /* turn into byte offset */
904*0Sstevel@tonic-gate if (size == 8)
905*0Sstevel@tonic-gate s[offset ] = (U8)( lval & 0xff);
906*0Sstevel@tonic-gate else if (size == 16) {
907*0Sstevel@tonic-gate s[offset ] = (U8)((lval >> 8) & 0xff);
908*0Sstevel@tonic-gate s[offset+1] = (U8)( lval & 0xff);
909*0Sstevel@tonic-gate }
910*0Sstevel@tonic-gate else if (size == 32) {
911*0Sstevel@tonic-gate s[offset ] = (U8)((lval >> 24) & 0xff);
912*0Sstevel@tonic-gate s[offset+1] = (U8)((lval >> 16) & 0xff);
913*0Sstevel@tonic-gate s[offset+2] = (U8)((lval >> 8) & 0xff);
914*0Sstevel@tonic-gate s[offset+3] = (U8)( lval & 0xff);
915*0Sstevel@tonic-gate }
916*0Sstevel@tonic-gate #ifdef UV_IS_QUAD
917*0Sstevel@tonic-gate else if (size == 64) {
918*0Sstevel@tonic-gate if (ckWARN(WARN_PORTABLE))
919*0Sstevel@tonic-gate Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
920*0Sstevel@tonic-gate "Bit vector size > 32 non-portable");
921*0Sstevel@tonic-gate s[offset ] = (U8)((lval >> 56) & 0xff);
922*0Sstevel@tonic-gate s[offset+1] = (U8)((lval >> 48) & 0xff);
923*0Sstevel@tonic-gate s[offset+2] = (U8)((lval >> 40) & 0xff);
924*0Sstevel@tonic-gate s[offset+3] = (U8)((lval >> 32) & 0xff);
925*0Sstevel@tonic-gate s[offset+4] = (U8)((lval >> 24) & 0xff);
926*0Sstevel@tonic-gate s[offset+5] = (U8)((lval >> 16) & 0xff);
927*0Sstevel@tonic-gate s[offset+6] = (U8)((lval >> 8) & 0xff);
928*0Sstevel@tonic-gate s[offset+7] = (U8)( lval & 0xff);
929*0Sstevel@tonic-gate }
930*0Sstevel@tonic-gate #endif
931*0Sstevel@tonic-gate }
932*0Sstevel@tonic-gate SvSETMAGIC(targ);
933*0Sstevel@tonic-gate }
934*0Sstevel@tonic-gate
935*0Sstevel@tonic-gate void
Perl_do_chop(pTHX_ register SV * astr,register SV * sv)936*0Sstevel@tonic-gate Perl_do_chop(pTHX_ register SV *astr, register SV *sv)
937*0Sstevel@tonic-gate {
938*0Sstevel@tonic-gate STRLEN len;
939*0Sstevel@tonic-gate char *s;
940*0Sstevel@tonic-gate
941*0Sstevel@tonic-gate if (SvTYPE(sv) == SVt_PVAV) {
942*0Sstevel@tonic-gate register I32 i;
943*0Sstevel@tonic-gate I32 max;
944*0Sstevel@tonic-gate AV* av = (AV*)sv;
945*0Sstevel@tonic-gate max = AvFILL(av);
946*0Sstevel@tonic-gate for (i = 0; i <= max; i++) {
947*0Sstevel@tonic-gate sv = (SV*)av_fetch(av, i, FALSE);
948*0Sstevel@tonic-gate if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
949*0Sstevel@tonic-gate do_chop(astr, sv);
950*0Sstevel@tonic-gate }
951*0Sstevel@tonic-gate return;
952*0Sstevel@tonic-gate }
953*0Sstevel@tonic-gate else if (SvTYPE(sv) == SVt_PVHV) {
954*0Sstevel@tonic-gate HV* hv = (HV*)sv;
955*0Sstevel@tonic-gate HE* entry;
956*0Sstevel@tonic-gate (void)hv_iterinit(hv);
957*0Sstevel@tonic-gate /*SUPPRESS 560*/
958*0Sstevel@tonic-gate while ((entry = hv_iternext(hv)))
959*0Sstevel@tonic-gate do_chop(astr,hv_iterval(hv,entry));
960*0Sstevel@tonic-gate return;
961*0Sstevel@tonic-gate }
962*0Sstevel@tonic-gate else if (SvREADONLY(sv)) {
963*0Sstevel@tonic-gate if (SvFAKE(sv)) {
964*0Sstevel@tonic-gate /* SV is copy-on-write */
965*0Sstevel@tonic-gate sv_force_normal_flags(sv, 0);
966*0Sstevel@tonic-gate }
967*0Sstevel@tonic-gate if (SvREADONLY(sv))
968*0Sstevel@tonic-gate Perl_croak(aTHX_ PL_no_modify);
969*0Sstevel@tonic-gate }
970*0Sstevel@tonic-gate s = SvPV(sv, len);
971*0Sstevel@tonic-gate if (len && !SvPOK(sv))
972*0Sstevel@tonic-gate s = SvPV_force(sv, len);
973*0Sstevel@tonic-gate if (DO_UTF8(sv)) {
974*0Sstevel@tonic-gate if (s && len) {
975*0Sstevel@tonic-gate char *send = s + len;
976*0Sstevel@tonic-gate char *start = s;
977*0Sstevel@tonic-gate s = send - 1;
978*0Sstevel@tonic-gate while (s > start && UTF8_IS_CONTINUATION(*s))
979*0Sstevel@tonic-gate s--;
980*0Sstevel@tonic-gate if (utf8_to_uvchr((U8*)s, 0)) {
981*0Sstevel@tonic-gate sv_setpvn(astr, s, send - s);
982*0Sstevel@tonic-gate *s = '\0';
983*0Sstevel@tonic-gate SvCUR_set(sv, s - start);
984*0Sstevel@tonic-gate SvNIOK_off(sv);
985*0Sstevel@tonic-gate SvUTF8_on(astr);
986*0Sstevel@tonic-gate }
987*0Sstevel@tonic-gate }
988*0Sstevel@tonic-gate else
989*0Sstevel@tonic-gate sv_setpvn(astr, "", 0);
990*0Sstevel@tonic-gate }
991*0Sstevel@tonic-gate else if (s && len) {
992*0Sstevel@tonic-gate s += --len;
993*0Sstevel@tonic-gate sv_setpvn(astr, s, 1);
994*0Sstevel@tonic-gate *s = '\0';
995*0Sstevel@tonic-gate SvCUR_set(sv, len);
996*0Sstevel@tonic-gate SvUTF8_off(sv);
997*0Sstevel@tonic-gate SvNIOK_off(sv);
998*0Sstevel@tonic-gate }
999*0Sstevel@tonic-gate else
1000*0Sstevel@tonic-gate sv_setpvn(astr, "", 0);
1001*0Sstevel@tonic-gate SvSETMAGIC(sv);
1002*0Sstevel@tonic-gate }
1003*0Sstevel@tonic-gate
1004*0Sstevel@tonic-gate I32
Perl_do_chomp(pTHX_ register SV * sv)1005*0Sstevel@tonic-gate Perl_do_chomp(pTHX_ register SV *sv)
1006*0Sstevel@tonic-gate {
1007*0Sstevel@tonic-gate register I32 count;
1008*0Sstevel@tonic-gate STRLEN len;
1009*0Sstevel@tonic-gate STRLEN n_a;
1010*0Sstevel@tonic-gate char *s;
1011*0Sstevel@tonic-gate char *temp_buffer = NULL;
1012*0Sstevel@tonic-gate SV* svrecode = Nullsv;
1013*0Sstevel@tonic-gate
1014*0Sstevel@tonic-gate if (RsSNARF(PL_rs))
1015*0Sstevel@tonic-gate return 0;
1016*0Sstevel@tonic-gate if (RsRECORD(PL_rs))
1017*0Sstevel@tonic-gate return 0;
1018*0Sstevel@tonic-gate count = 0;
1019*0Sstevel@tonic-gate if (SvTYPE(sv) == SVt_PVAV) {
1020*0Sstevel@tonic-gate register I32 i;
1021*0Sstevel@tonic-gate I32 max;
1022*0Sstevel@tonic-gate AV* av = (AV*)sv;
1023*0Sstevel@tonic-gate max = AvFILL(av);
1024*0Sstevel@tonic-gate for (i = 0; i <= max; i++) {
1025*0Sstevel@tonic-gate sv = (SV*)av_fetch(av, i, FALSE);
1026*0Sstevel@tonic-gate if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
1027*0Sstevel@tonic-gate count += do_chomp(sv);
1028*0Sstevel@tonic-gate }
1029*0Sstevel@tonic-gate return count;
1030*0Sstevel@tonic-gate }
1031*0Sstevel@tonic-gate else if (SvTYPE(sv) == SVt_PVHV) {
1032*0Sstevel@tonic-gate HV* hv = (HV*)sv;
1033*0Sstevel@tonic-gate HE* entry;
1034*0Sstevel@tonic-gate (void)hv_iterinit(hv);
1035*0Sstevel@tonic-gate /*SUPPRESS 560*/
1036*0Sstevel@tonic-gate while ((entry = hv_iternext(hv)))
1037*0Sstevel@tonic-gate count += do_chomp(hv_iterval(hv,entry));
1038*0Sstevel@tonic-gate return count;
1039*0Sstevel@tonic-gate }
1040*0Sstevel@tonic-gate else if (SvREADONLY(sv)) {
1041*0Sstevel@tonic-gate if (SvFAKE(sv)) {
1042*0Sstevel@tonic-gate /* SV is copy-on-write */
1043*0Sstevel@tonic-gate sv_force_normal_flags(sv, 0);
1044*0Sstevel@tonic-gate }
1045*0Sstevel@tonic-gate if (SvREADONLY(sv))
1046*0Sstevel@tonic-gate Perl_croak(aTHX_ PL_no_modify);
1047*0Sstevel@tonic-gate }
1048*0Sstevel@tonic-gate
1049*0Sstevel@tonic-gate if (PL_encoding) {
1050*0Sstevel@tonic-gate if (!SvUTF8(sv)) {
1051*0Sstevel@tonic-gate /* XXX, here sv is utf8-ized as a side-effect!
1052*0Sstevel@tonic-gate If encoding.pm is used properly, almost string-generating
1053*0Sstevel@tonic-gate operations, including literal strings, chr(), input data, etc.
1054*0Sstevel@tonic-gate should have been utf8-ized already, right?
1055*0Sstevel@tonic-gate */
1056*0Sstevel@tonic-gate sv_recode_to_utf8(sv, PL_encoding);
1057*0Sstevel@tonic-gate }
1058*0Sstevel@tonic-gate }
1059*0Sstevel@tonic-gate
1060*0Sstevel@tonic-gate s = SvPV(sv, len);
1061*0Sstevel@tonic-gate if (s && len) {
1062*0Sstevel@tonic-gate s += --len;
1063*0Sstevel@tonic-gate if (RsPARA(PL_rs)) {
1064*0Sstevel@tonic-gate if (*s != '\n')
1065*0Sstevel@tonic-gate goto nope;
1066*0Sstevel@tonic-gate ++count;
1067*0Sstevel@tonic-gate while (len && s[-1] == '\n') {
1068*0Sstevel@tonic-gate --len;
1069*0Sstevel@tonic-gate --s;
1070*0Sstevel@tonic-gate ++count;
1071*0Sstevel@tonic-gate }
1072*0Sstevel@tonic-gate }
1073*0Sstevel@tonic-gate else {
1074*0Sstevel@tonic-gate STRLEN rslen, rs_charlen;
1075*0Sstevel@tonic-gate char *rsptr = SvPV(PL_rs, rslen);
1076*0Sstevel@tonic-gate
1077*0Sstevel@tonic-gate rs_charlen = SvUTF8(PL_rs)
1078*0Sstevel@tonic-gate ? sv_len_utf8(PL_rs)
1079*0Sstevel@tonic-gate : rslen;
1080*0Sstevel@tonic-gate
1081*0Sstevel@tonic-gate if (SvUTF8(PL_rs) != SvUTF8(sv)) {
1082*0Sstevel@tonic-gate /* Assumption is that rs is shorter than the scalar. */
1083*0Sstevel@tonic-gate if (SvUTF8(PL_rs)) {
1084*0Sstevel@tonic-gate /* RS is utf8, scalar is 8 bit. */
1085*0Sstevel@tonic-gate bool is_utf8 = TRUE;
1086*0Sstevel@tonic-gate temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
1087*0Sstevel@tonic-gate &rslen, &is_utf8);
1088*0Sstevel@tonic-gate if (is_utf8) {
1089*0Sstevel@tonic-gate /* Cannot downgrade, therefore cannot possibly match
1090*0Sstevel@tonic-gate */
1091*0Sstevel@tonic-gate assert (temp_buffer == rsptr);
1092*0Sstevel@tonic-gate temp_buffer = NULL;
1093*0Sstevel@tonic-gate goto nope;
1094*0Sstevel@tonic-gate }
1095*0Sstevel@tonic-gate rsptr = temp_buffer;
1096*0Sstevel@tonic-gate }
1097*0Sstevel@tonic-gate else if (PL_encoding) {
1098*0Sstevel@tonic-gate /* RS is 8 bit, encoding.pm is used.
1099*0Sstevel@tonic-gate * Do not recode PL_rs as a side-effect. */
1100*0Sstevel@tonic-gate svrecode = newSVpvn(rsptr, rslen);
1101*0Sstevel@tonic-gate sv_recode_to_utf8(svrecode, PL_encoding);
1102*0Sstevel@tonic-gate rsptr = SvPV(svrecode, rslen);
1103*0Sstevel@tonic-gate rs_charlen = sv_len_utf8(svrecode);
1104*0Sstevel@tonic-gate }
1105*0Sstevel@tonic-gate else {
1106*0Sstevel@tonic-gate /* RS is 8 bit, scalar is utf8. */
1107*0Sstevel@tonic-gate temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
1108*0Sstevel@tonic-gate rsptr = temp_buffer;
1109*0Sstevel@tonic-gate }
1110*0Sstevel@tonic-gate }
1111*0Sstevel@tonic-gate if (rslen == 1) {
1112*0Sstevel@tonic-gate if (*s != *rsptr)
1113*0Sstevel@tonic-gate goto nope;
1114*0Sstevel@tonic-gate ++count;
1115*0Sstevel@tonic-gate }
1116*0Sstevel@tonic-gate else {
1117*0Sstevel@tonic-gate if (len < rslen - 1)
1118*0Sstevel@tonic-gate goto nope;
1119*0Sstevel@tonic-gate len -= rslen - 1;
1120*0Sstevel@tonic-gate s -= rslen - 1;
1121*0Sstevel@tonic-gate if (memNE(s, rsptr, rslen))
1122*0Sstevel@tonic-gate goto nope;
1123*0Sstevel@tonic-gate count += rs_charlen;
1124*0Sstevel@tonic-gate }
1125*0Sstevel@tonic-gate }
1126*0Sstevel@tonic-gate s = SvPV_force(sv, n_a);
1127*0Sstevel@tonic-gate SvCUR_set(sv, len);
1128*0Sstevel@tonic-gate *SvEND(sv) = '\0';
1129*0Sstevel@tonic-gate SvNIOK_off(sv);
1130*0Sstevel@tonic-gate SvSETMAGIC(sv);
1131*0Sstevel@tonic-gate }
1132*0Sstevel@tonic-gate nope:
1133*0Sstevel@tonic-gate
1134*0Sstevel@tonic-gate if (svrecode)
1135*0Sstevel@tonic-gate SvREFCNT_dec(svrecode);
1136*0Sstevel@tonic-gate
1137*0Sstevel@tonic-gate Safefree(temp_buffer);
1138*0Sstevel@tonic-gate return count;
1139*0Sstevel@tonic-gate }
1140*0Sstevel@tonic-gate
1141*0Sstevel@tonic-gate void
Perl_do_vop(pTHX_ I32 optype,SV * sv,SV * left,SV * right)1142*0Sstevel@tonic-gate Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
1143*0Sstevel@tonic-gate {
1144*0Sstevel@tonic-gate #ifdef LIBERAL
1145*0Sstevel@tonic-gate register long *dl;
1146*0Sstevel@tonic-gate register long *ll;
1147*0Sstevel@tonic-gate register long *rl;
1148*0Sstevel@tonic-gate #endif
1149*0Sstevel@tonic-gate register char *dc;
1150*0Sstevel@tonic-gate STRLEN leftlen;
1151*0Sstevel@tonic-gate STRLEN rightlen;
1152*0Sstevel@tonic-gate register char *lc;
1153*0Sstevel@tonic-gate register char *rc;
1154*0Sstevel@tonic-gate register I32 len;
1155*0Sstevel@tonic-gate I32 lensave;
1156*0Sstevel@tonic-gate char *lsave;
1157*0Sstevel@tonic-gate char *rsave;
1158*0Sstevel@tonic-gate bool left_utf = DO_UTF8(left);
1159*0Sstevel@tonic-gate bool right_utf = DO_UTF8(right);
1160*0Sstevel@tonic-gate I32 needlen = 0;
1161*0Sstevel@tonic-gate
1162*0Sstevel@tonic-gate if (left_utf && !right_utf)
1163*0Sstevel@tonic-gate sv_utf8_upgrade(right);
1164*0Sstevel@tonic-gate else if (!left_utf && right_utf)
1165*0Sstevel@tonic-gate sv_utf8_upgrade(left);
1166*0Sstevel@tonic-gate
1167*0Sstevel@tonic-gate if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv)))
1168*0Sstevel@tonic-gate sv_setpvn(sv, "", 0); /* avoid undef warning on |= and ^= */
1169*0Sstevel@tonic-gate lsave = lc = SvPV(left, leftlen);
1170*0Sstevel@tonic-gate rsave = rc = SvPV(right, rightlen);
1171*0Sstevel@tonic-gate len = leftlen < rightlen ? leftlen : rightlen;
1172*0Sstevel@tonic-gate lensave = len;
1173*0Sstevel@tonic-gate if ((left_utf || right_utf) && (sv == left || sv == right)) {
1174*0Sstevel@tonic-gate needlen = optype == OP_BIT_AND ? len : leftlen + rightlen;
1175*0Sstevel@tonic-gate Newz(801, dc, needlen + 1, char);
1176*0Sstevel@tonic-gate }
1177*0Sstevel@tonic-gate else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
1178*0Sstevel@tonic-gate STRLEN n_a;
1179*0Sstevel@tonic-gate dc = SvPV_force(sv, n_a);
1180*0Sstevel@tonic-gate if (SvCUR(sv) < (STRLEN)len) {
1181*0Sstevel@tonic-gate dc = SvGROW(sv, (STRLEN)(len + 1));
1182*0Sstevel@tonic-gate (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1);
1183*0Sstevel@tonic-gate }
1184*0Sstevel@tonic-gate if (optype != OP_BIT_AND && (left_utf || right_utf))
1185*0Sstevel@tonic-gate dc = SvGROW(sv, leftlen + rightlen + 1);
1186*0Sstevel@tonic-gate }
1187*0Sstevel@tonic-gate else {
1188*0Sstevel@tonic-gate needlen = ((optype == OP_BIT_AND)
1189*0Sstevel@tonic-gate ? len : (leftlen > rightlen ? leftlen : rightlen));
1190*0Sstevel@tonic-gate Newz(801, dc, needlen + 1, char);
1191*0Sstevel@tonic-gate (void)sv_usepvn(sv, dc, needlen);
1192*0Sstevel@tonic-gate dc = SvPVX(sv); /* sv_usepvn() calls Renew() */
1193*0Sstevel@tonic-gate }
1194*0Sstevel@tonic-gate SvCUR_set(sv, len);
1195*0Sstevel@tonic-gate (void)SvPOK_only(sv);
1196*0Sstevel@tonic-gate if (left_utf || right_utf) {
1197*0Sstevel@tonic-gate UV duc, luc, ruc;
1198*0Sstevel@tonic-gate char *dcsave = dc;
1199*0Sstevel@tonic-gate STRLEN lulen = leftlen;
1200*0Sstevel@tonic-gate STRLEN rulen = rightlen;
1201*0Sstevel@tonic-gate STRLEN ulen;
1202*0Sstevel@tonic-gate
1203*0Sstevel@tonic-gate switch (optype) {
1204*0Sstevel@tonic-gate case OP_BIT_AND:
1205*0Sstevel@tonic-gate while (lulen && rulen) {
1206*0Sstevel@tonic-gate luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1207*0Sstevel@tonic-gate lc += ulen;
1208*0Sstevel@tonic-gate lulen -= ulen;
1209*0Sstevel@tonic-gate ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1210*0Sstevel@tonic-gate rc += ulen;
1211*0Sstevel@tonic-gate rulen -= ulen;
1212*0Sstevel@tonic-gate duc = luc & ruc;
1213*0Sstevel@tonic-gate dc = (char*)uvchr_to_utf8((U8*)dc, duc);
1214*0Sstevel@tonic-gate }
1215*0Sstevel@tonic-gate if (sv == left || sv == right)
1216*0Sstevel@tonic-gate (void)sv_usepvn(sv, dcsave, needlen);
1217*0Sstevel@tonic-gate SvCUR_set(sv, dc - dcsave);
1218*0Sstevel@tonic-gate break;
1219*0Sstevel@tonic-gate case OP_BIT_XOR:
1220*0Sstevel@tonic-gate while (lulen && rulen) {
1221*0Sstevel@tonic-gate luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1222*0Sstevel@tonic-gate lc += ulen;
1223*0Sstevel@tonic-gate lulen -= ulen;
1224*0Sstevel@tonic-gate ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1225*0Sstevel@tonic-gate rc += ulen;
1226*0Sstevel@tonic-gate rulen -= ulen;
1227*0Sstevel@tonic-gate duc = luc ^ ruc;
1228*0Sstevel@tonic-gate dc = (char*)uvchr_to_utf8((U8*)dc, duc);
1229*0Sstevel@tonic-gate }
1230*0Sstevel@tonic-gate goto mop_up_utf;
1231*0Sstevel@tonic-gate case OP_BIT_OR:
1232*0Sstevel@tonic-gate while (lulen && rulen) {
1233*0Sstevel@tonic-gate luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
1234*0Sstevel@tonic-gate lc += ulen;
1235*0Sstevel@tonic-gate lulen -= ulen;
1236*0Sstevel@tonic-gate ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
1237*0Sstevel@tonic-gate rc += ulen;
1238*0Sstevel@tonic-gate rulen -= ulen;
1239*0Sstevel@tonic-gate duc = luc | ruc;
1240*0Sstevel@tonic-gate dc = (char*)uvchr_to_utf8((U8*)dc, duc);
1241*0Sstevel@tonic-gate }
1242*0Sstevel@tonic-gate mop_up_utf:
1243*0Sstevel@tonic-gate if (sv == left || sv == right)
1244*0Sstevel@tonic-gate (void)sv_usepvn(sv, dcsave, needlen);
1245*0Sstevel@tonic-gate SvCUR_set(sv, dc - dcsave);
1246*0Sstevel@tonic-gate if (rulen)
1247*0Sstevel@tonic-gate sv_catpvn(sv, rc, rulen);
1248*0Sstevel@tonic-gate else if (lulen)
1249*0Sstevel@tonic-gate sv_catpvn(sv, lc, lulen);
1250*0Sstevel@tonic-gate else
1251*0Sstevel@tonic-gate *SvEND(sv) = '\0';
1252*0Sstevel@tonic-gate break;
1253*0Sstevel@tonic-gate }
1254*0Sstevel@tonic-gate SvUTF8_on(sv);
1255*0Sstevel@tonic-gate goto finish;
1256*0Sstevel@tonic-gate }
1257*0Sstevel@tonic-gate else
1258*0Sstevel@tonic-gate #ifdef LIBERAL
1259*0Sstevel@tonic-gate if (len >= sizeof(long)*4 &&
1260*0Sstevel@tonic-gate !((long)dc % sizeof(long)) &&
1261*0Sstevel@tonic-gate !((long)lc % sizeof(long)) &&
1262*0Sstevel@tonic-gate !((long)rc % sizeof(long))) /* It's almost always aligned... */
1263*0Sstevel@tonic-gate {
1264*0Sstevel@tonic-gate I32 remainder = len % (sizeof(long)*4);
1265*0Sstevel@tonic-gate len /= (sizeof(long)*4);
1266*0Sstevel@tonic-gate
1267*0Sstevel@tonic-gate dl = (long*)dc;
1268*0Sstevel@tonic-gate ll = (long*)lc;
1269*0Sstevel@tonic-gate rl = (long*)rc;
1270*0Sstevel@tonic-gate
1271*0Sstevel@tonic-gate switch (optype) {
1272*0Sstevel@tonic-gate case OP_BIT_AND:
1273*0Sstevel@tonic-gate while (len--) {
1274*0Sstevel@tonic-gate *dl++ = *ll++ & *rl++;
1275*0Sstevel@tonic-gate *dl++ = *ll++ & *rl++;
1276*0Sstevel@tonic-gate *dl++ = *ll++ & *rl++;
1277*0Sstevel@tonic-gate *dl++ = *ll++ & *rl++;
1278*0Sstevel@tonic-gate }
1279*0Sstevel@tonic-gate break;
1280*0Sstevel@tonic-gate case OP_BIT_XOR:
1281*0Sstevel@tonic-gate while (len--) {
1282*0Sstevel@tonic-gate *dl++ = *ll++ ^ *rl++;
1283*0Sstevel@tonic-gate *dl++ = *ll++ ^ *rl++;
1284*0Sstevel@tonic-gate *dl++ = *ll++ ^ *rl++;
1285*0Sstevel@tonic-gate *dl++ = *ll++ ^ *rl++;
1286*0Sstevel@tonic-gate }
1287*0Sstevel@tonic-gate break;
1288*0Sstevel@tonic-gate case OP_BIT_OR:
1289*0Sstevel@tonic-gate while (len--) {
1290*0Sstevel@tonic-gate *dl++ = *ll++ | *rl++;
1291*0Sstevel@tonic-gate *dl++ = *ll++ | *rl++;
1292*0Sstevel@tonic-gate *dl++ = *ll++ | *rl++;
1293*0Sstevel@tonic-gate *dl++ = *ll++ | *rl++;
1294*0Sstevel@tonic-gate }
1295*0Sstevel@tonic-gate }
1296*0Sstevel@tonic-gate
1297*0Sstevel@tonic-gate dc = (char*)dl;
1298*0Sstevel@tonic-gate lc = (char*)ll;
1299*0Sstevel@tonic-gate rc = (char*)rl;
1300*0Sstevel@tonic-gate
1301*0Sstevel@tonic-gate len = remainder;
1302*0Sstevel@tonic-gate }
1303*0Sstevel@tonic-gate #endif
1304*0Sstevel@tonic-gate {
1305*0Sstevel@tonic-gate switch (optype) {
1306*0Sstevel@tonic-gate case OP_BIT_AND:
1307*0Sstevel@tonic-gate while (len--)
1308*0Sstevel@tonic-gate *dc++ = *lc++ & *rc++;
1309*0Sstevel@tonic-gate break;
1310*0Sstevel@tonic-gate case OP_BIT_XOR:
1311*0Sstevel@tonic-gate while (len--)
1312*0Sstevel@tonic-gate *dc++ = *lc++ ^ *rc++;
1313*0Sstevel@tonic-gate goto mop_up;
1314*0Sstevel@tonic-gate case OP_BIT_OR:
1315*0Sstevel@tonic-gate while (len--)
1316*0Sstevel@tonic-gate *dc++ = *lc++ | *rc++;
1317*0Sstevel@tonic-gate mop_up:
1318*0Sstevel@tonic-gate len = lensave;
1319*0Sstevel@tonic-gate if (rightlen > (STRLEN)len)
1320*0Sstevel@tonic-gate sv_catpvn(sv, rsave + len, rightlen - len);
1321*0Sstevel@tonic-gate else if (leftlen > (STRLEN)len)
1322*0Sstevel@tonic-gate sv_catpvn(sv, lsave + len, leftlen - len);
1323*0Sstevel@tonic-gate else
1324*0Sstevel@tonic-gate *SvEND(sv) = '\0';
1325*0Sstevel@tonic-gate break;
1326*0Sstevel@tonic-gate }
1327*0Sstevel@tonic-gate }
1328*0Sstevel@tonic-gate finish:
1329*0Sstevel@tonic-gate SvTAINT(sv);
1330*0Sstevel@tonic-gate }
1331*0Sstevel@tonic-gate
1332*0Sstevel@tonic-gate OP *
Perl_do_kv(pTHX)1333*0Sstevel@tonic-gate Perl_do_kv(pTHX)
1334*0Sstevel@tonic-gate {
1335*0Sstevel@tonic-gate dSP;
1336*0Sstevel@tonic-gate HV *hv = (HV*)POPs;
1337*0Sstevel@tonic-gate HV *keys;
1338*0Sstevel@tonic-gate register HE *entry;
1339*0Sstevel@tonic-gate SV *tmpstr;
1340*0Sstevel@tonic-gate I32 gimme = GIMME_V;
1341*0Sstevel@tonic-gate I32 dokeys = (PL_op->op_type == OP_KEYS);
1342*0Sstevel@tonic-gate I32 dovalues = (PL_op->op_type == OP_VALUES);
1343*0Sstevel@tonic-gate I32 realhv = (SvTYPE(hv) == SVt_PVHV);
1344*0Sstevel@tonic-gate
1345*0Sstevel@tonic-gate if (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV)
1346*0Sstevel@tonic-gate dokeys = dovalues = TRUE;
1347*0Sstevel@tonic-gate
1348*0Sstevel@tonic-gate if (!hv) {
1349*0Sstevel@tonic-gate if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */
1350*0Sstevel@tonic-gate dTARGET; /* make sure to clear its target here */
1351*0Sstevel@tonic-gate if (SvTYPE(TARG) == SVt_PVLV)
1352*0Sstevel@tonic-gate LvTARG(TARG) = Nullsv;
1353*0Sstevel@tonic-gate PUSHs(TARG);
1354*0Sstevel@tonic-gate }
1355*0Sstevel@tonic-gate RETURN;
1356*0Sstevel@tonic-gate }
1357*0Sstevel@tonic-gate
1358*0Sstevel@tonic-gate keys = realhv ? hv : avhv_keys((AV*)hv);
1359*0Sstevel@tonic-gate (void)hv_iterinit(keys); /* always reset iterator regardless */
1360*0Sstevel@tonic-gate
1361*0Sstevel@tonic-gate if (gimme == G_VOID)
1362*0Sstevel@tonic-gate RETURN;
1363*0Sstevel@tonic-gate
1364*0Sstevel@tonic-gate if (gimme == G_SCALAR) {
1365*0Sstevel@tonic-gate IV i;
1366*0Sstevel@tonic-gate dTARGET;
1367*0Sstevel@tonic-gate
1368*0Sstevel@tonic-gate if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */
1369*0Sstevel@tonic-gate if (SvTYPE(TARG) < SVt_PVLV) {
1370*0Sstevel@tonic-gate sv_upgrade(TARG, SVt_PVLV);
1371*0Sstevel@tonic-gate sv_magic(TARG, Nullsv, PERL_MAGIC_nkeys, Nullch, 0);
1372*0Sstevel@tonic-gate }
1373*0Sstevel@tonic-gate LvTYPE(TARG) = 'k';
1374*0Sstevel@tonic-gate if (LvTARG(TARG) != (SV*)keys) {
1375*0Sstevel@tonic-gate if (LvTARG(TARG))
1376*0Sstevel@tonic-gate SvREFCNT_dec(LvTARG(TARG));
1377*0Sstevel@tonic-gate LvTARG(TARG) = SvREFCNT_inc(keys);
1378*0Sstevel@tonic-gate }
1379*0Sstevel@tonic-gate PUSHs(TARG);
1380*0Sstevel@tonic-gate RETURN;
1381*0Sstevel@tonic-gate }
1382*0Sstevel@tonic-gate
1383*0Sstevel@tonic-gate if (! SvTIED_mg((SV*)keys, PERL_MAGIC_tied))
1384*0Sstevel@tonic-gate i = HvKEYS(keys);
1385*0Sstevel@tonic-gate else {
1386*0Sstevel@tonic-gate i = 0;
1387*0Sstevel@tonic-gate /*SUPPRESS 560*/
1388*0Sstevel@tonic-gate while (hv_iternext(keys)) i++;
1389*0Sstevel@tonic-gate }
1390*0Sstevel@tonic-gate PUSHi( i );
1391*0Sstevel@tonic-gate RETURN;
1392*0Sstevel@tonic-gate }
1393*0Sstevel@tonic-gate
1394*0Sstevel@tonic-gate EXTEND(SP, HvKEYS(keys) * (dokeys + dovalues));
1395*0Sstevel@tonic-gate
1396*0Sstevel@tonic-gate PUTBACK; /* hv_iternext and hv_iterval might clobber stack_sp */
1397*0Sstevel@tonic-gate while ((entry = hv_iternext(keys))) {
1398*0Sstevel@tonic-gate SPAGAIN;
1399*0Sstevel@tonic-gate if (dokeys) {
1400*0Sstevel@tonic-gate SV* sv = hv_iterkeysv(entry);
1401*0Sstevel@tonic-gate XPUSHs(sv); /* won't clobber stack_sp */
1402*0Sstevel@tonic-gate }
1403*0Sstevel@tonic-gate if (dovalues) {
1404*0Sstevel@tonic-gate PUTBACK;
1405*0Sstevel@tonic-gate tmpstr = realhv ?
1406*0Sstevel@tonic-gate hv_iterval(hv,entry) : avhv_iterval((AV*)hv,entry);
1407*0Sstevel@tonic-gate DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu",
1408*0Sstevel@tonic-gate (unsigned long)HeHASH(entry),
1409*0Sstevel@tonic-gate HvMAX(keys)+1,
1410*0Sstevel@tonic-gate (unsigned long)(HeHASH(entry) & HvMAX(keys))));
1411*0Sstevel@tonic-gate SPAGAIN;
1412*0Sstevel@tonic-gate XPUSHs(tmpstr);
1413*0Sstevel@tonic-gate }
1414*0Sstevel@tonic-gate PUTBACK;
1415*0Sstevel@tonic-gate }
1416*0Sstevel@tonic-gate return NORMAL;
1417*0Sstevel@tonic-gate }
1418*0Sstevel@tonic-gate
1419