1 #define PERL_NO_GET_CONTEXT
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5
6 static I32 num_q (char *s, STRLEN slen);
7 static I32 esc_q (char *dest, char *src, STRLEN slen);
8 static I32 esc_q_utf8 (pTHX_ SV *sv, char *src, STRLEN slen);
9 static SV *sv_x (pTHX_ SV *sv, char *str, STRLEN len, I32 n);
10 static I32 DD_dump (pTHX_ SV *val, char *name, STRLEN namelen, SV *retval,
11 HV *seenhv, AV *postav, I32 *levelp, I32 indent,
12 SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
13 SV *freezer, SV *toaster,
14 I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
15 I32 maxdepth, SV *sortkeys);
16
17 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
18
19 # ifdef EBCDIC
20 # define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
21 # else
22 # define UNI_TO_NATIVE(ch) (ch)
23 # endif
24
25 UV
Perl_utf8_to_uvchr(pTHX_ U8 * s,STRLEN * retlen)26 Perl_utf8_to_uvchr(pTHX_ U8 *s, STRLEN *retlen)
27 {
28 UV uv = utf8_to_uv(s, UTF8_MAXLEN, retlen,
29 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
30 return UNI_TO_NATIVE(uv);
31 }
32
33 # if !defined(PERL_IMPLICIT_CONTEXT)
34 # define utf8_to_uvchr Perl_utf8_to_uvchr
35 # else
36 # define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
37 # endif
38
39 #endif /* PERL_VERSION <= 6 */
40
41 /* Changes in 5.7 series mean that now IOK is only set if scalar is
42 precisely integer but in 5.6 and earlier we need to do a more
43 complex test */
44 #if PERL_VERSION <= 6
45 #define DD_is_integer(sv) (SvIOK(sv) && (SvIsUV(val) ? SvUV(sv) == SvNV(sv) : SvIV(sv) == SvNV(sv)))
46 #else
47 #define DD_is_integer(sv) SvIOK(sv)
48 #endif
49
50 /* does a string need to be protected? */
51 static I32
needs_quote(register char * s)52 needs_quote(register char *s)
53 {
54 TOP:
55 if (s[0] == ':') {
56 if (*++s) {
57 if (*s++ != ':')
58 return 1;
59 }
60 else
61 return 1;
62 }
63 if (isIDFIRST(*s)) {
64 while (*++s)
65 if (!isALNUM(*s)) {
66 if (*s == ':')
67 goto TOP;
68 else
69 return 1;
70 }
71 }
72 else
73 return 1;
74 return 0;
75 }
76
77 /* count the number of "'"s and "\"s in string */
78 static I32
num_q(register char * s,register STRLEN slen)79 num_q(register char *s, register STRLEN slen)
80 {
81 register I32 ret = 0;
82
83 while (slen > 0) {
84 if (*s == '\'' || *s == '\\')
85 ++ret;
86 ++s;
87 --slen;
88 }
89 return ret;
90 }
91
92
93 /* returns number of chars added to escape "'"s and "\"s in s */
94 /* slen number of characters in s will be escaped */
95 /* destination must be long enough for additional chars */
96 static I32
esc_q(register char * d,register char * s,register STRLEN slen)97 esc_q(register char *d, register char *s, register STRLEN slen)
98 {
99 register I32 ret = 0;
100
101 while (slen > 0) {
102 switch (*s) {
103 case '\'':
104 case '\\':
105 *d = '\\';
106 ++d; ++ret;
107 default:
108 *d = *s;
109 ++d; ++s; --slen;
110 break;
111 }
112 }
113 return ret;
114 }
115
116 static I32
esc_q_utf8(pTHX_ SV * sv,register char * src,register STRLEN slen)117 esc_q_utf8(pTHX_ SV* sv, register char *src, register STRLEN slen)
118 {
119 char *s, *send, *r, *rstart;
120 STRLEN j, cur = SvCUR(sv);
121 /* Could count 128-255 and 256+ in two variables, if we want to
122 be like &qquote and make a distinction. */
123 STRLEN grow = 0; /* bytes needed to represent chars 128+ */
124 /* STRLEN topbit_grow = 0; bytes needed to represent chars 128-255 */
125 STRLEN backslashes = 0;
126 STRLEN single_quotes = 0;
127 STRLEN qq_escapables = 0; /* " $ @ will need a \ in "" strings. */
128 STRLEN normal = 0;
129
130 /* this will need EBCDICification */
131 for (s = src, send = src + slen; s < send; s += UTF8SKIP(s)) {
132 UV k = utf8_to_uvchr((U8*)s, NULL);
133
134 if (k > 127) {
135 /* 4: \x{} then count the number of hex digits. */
136 grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
137 #if UVSIZE == 4
138 8 /* We may allocate a bit more than the minimum here. */
139 #else
140 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
141 #endif
142 );
143 } else if (k == '\\') {
144 backslashes++;
145 } else if (k == '\'') {
146 single_quotes++;
147 } else if (k == '"' || k == '$' || k == '@') {
148 qq_escapables++;
149 } else {
150 normal++;
151 }
152 }
153 if (grow) {
154 /* We have something needing hex. 3 is ""\0 */
155 sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
156 + 2*qq_escapables + normal);
157 rstart = r = SvPVX(sv) + cur;
158
159 *r++ = '"';
160
161 for (s = src; s < send; s += UTF8SKIP(s)) {
162 UV k = utf8_to_uvchr((U8*)s, NULL);
163
164 if (k == '"' || k == '\\' || k == '$' || k == '@') {
165 *r++ = '\\';
166 *r++ = (char)k;
167 }
168 else if (k < 0x80)
169 *r++ = (char)k;
170 else {
171 /* The return value of sprintf() is unportable.
172 * In modern systems it returns (int) the number of characters,
173 * but in older systems it might return (char*) the original
174 * buffer, or it might even be (void). The easiest portable
175 * thing to do is probably use sprintf() in void context and
176 * then strlen(buffer) for the length. The more proper way
177 * would of course be to figure out the prototype of sprintf.
178 * --jhi */
179 sprintf(r, "\\x{%"UVxf"}", k);
180 r += strlen(r);
181 }
182 }
183 *r++ = '"';
184 } else {
185 /* Single quotes. */
186 sv_grow(sv, cur + 3 + 2*backslashes + 2*single_quotes
187 + qq_escapables + normal);
188 rstart = r = SvPVX(sv) + cur;
189 *r++ = '\'';
190 for (s = src; s < send; s ++) {
191 char k = *s;
192 if (k == '\'' || k == '\\')
193 *r++ = '\\';
194 *r++ = k;
195 }
196 *r++ = '\'';
197 }
198 *r = '\0';
199 j = r - rstart;
200 SvCUR_set(sv, cur + j);
201
202 return j;
203 }
204
205 /* append a repeated string to an SV */
206 static SV *
sv_x(pTHX_ SV * sv,register char * str,STRLEN len,I32 n)207 sv_x(pTHX_ SV *sv, register char *str, STRLEN len, I32 n)
208 {
209 if (sv == Nullsv)
210 sv = newSVpvn("", 0);
211 else
212 assert(SvTYPE(sv) >= SVt_PV);
213
214 if (n > 0) {
215 SvGROW(sv, len*n + SvCUR(sv) + 1);
216 if (len == 1) {
217 char *start = SvPVX(sv) + SvCUR(sv);
218 SvCUR(sv) += n;
219 start[n] = '\0';
220 while (n > 0)
221 start[--n] = str[0];
222 }
223 else
224 while (n > 0) {
225 sv_catpvn(sv, str, len);
226 --n;
227 }
228 }
229 return sv;
230 }
231
232 /*
233 * This ought to be split into smaller functions. (it is one long function since
234 * it exactly parallels the perl version, which was one long thing for
235 * efficiency raisins.) Ugggh!
236 */
237 static I32
DD_dump(pTHX_ SV * val,char * name,STRLEN namelen,SV * retval,HV * seenhv,AV * postav,I32 * levelp,I32 indent,SV * pad,SV * xpad,SV * apad,SV * sep,SV * pair,SV * freezer,SV * toaster,I32 purity,I32 deepcopy,I32 quotekeys,SV * bless,I32 maxdepth,SV * sortkeys)238 DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
239 AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
240 SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
241 I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
242 {
243 char tmpbuf[128];
244 U32 i;
245 char *c, *r, *realpack, id[128];
246 SV **svp;
247 SV *sv, *ipad, *ival;
248 SV *blesspad = Nullsv;
249 AV *seenentry = Nullav;
250 char *iname;
251 STRLEN inamelen, idlen = 0;
252 U32 realtype;
253
254 if (!val)
255 return 0;
256
257 realtype = SvTYPE(val);
258
259 if (SvGMAGICAL(val))
260 mg_get(val);
261 if (SvROK(val)) {
262
263 if (SvOBJECT(SvRV(val)) && freezer &&
264 SvPOK(freezer) && SvCUR(freezer))
265 {
266 dSP; ENTER; SAVETMPS; PUSHMARK(sp);
267 XPUSHs(val); PUTBACK;
268 i = perl_call_method(SvPVX(freezer), G_EVAL|G_SCALAR);
269 SPAGAIN;
270 if (SvTRUE(ERRSV))
271 warn("WARNING(Freezer method call failed): %"SVf"", ERRSV);
272 else if (i)
273 val = newSVsv(POPs);
274 PUTBACK; FREETMPS; LEAVE;
275 if (i)
276 (void)sv_2mortal(val);
277 }
278
279 ival = SvRV(val);
280 realtype = SvTYPE(ival);
281 (void) sprintf(id, "0x%"UVxf, PTR2UV(ival));
282 idlen = strlen(id);
283 if (SvOBJECT(ival))
284 realpack = HvNAME(SvSTASH(ival));
285 else
286 realpack = Nullch;
287
288 /* if it has a name, we need to either look it up, or keep a tab
289 * on it so we know when we hit it later
290 */
291 if (namelen) {
292 if ((svp = hv_fetch(seenhv, id, idlen, FALSE))
293 && (sv = *svp) && SvROK(sv) && (seenentry = (AV*)SvRV(sv)))
294 {
295 SV *othername;
296 if ((svp = av_fetch(seenentry, 0, FALSE))
297 && (othername = *svp))
298 {
299 if (purity && *levelp > 0) {
300 SV *postentry;
301
302 if (realtype == SVt_PVHV)
303 sv_catpvn(retval, "{}", 2);
304 else if (realtype == SVt_PVAV)
305 sv_catpvn(retval, "[]", 2);
306 else
307 sv_catpvn(retval, "do{my $o}", 9);
308 postentry = newSVpvn(name, namelen);
309 sv_catpvn(postentry, " = ", 3);
310 sv_catsv(postentry, othername);
311 av_push(postav, postentry);
312 }
313 else {
314 if (name[0] == '@' || name[0] == '%') {
315 if ((SvPVX(othername))[0] == '\\' &&
316 (SvPVX(othername))[1] == name[0]) {
317 sv_catpvn(retval, SvPVX(othername)+1,
318 SvCUR(othername)-1);
319 }
320 else {
321 sv_catpvn(retval, name, 1);
322 sv_catpvn(retval, "{", 1);
323 sv_catsv(retval, othername);
324 sv_catpvn(retval, "}", 1);
325 }
326 }
327 else
328 sv_catsv(retval, othername);
329 }
330 return 1;
331 }
332 else {
333 warn("ref name not found for %s", id);
334 return 0;
335 }
336 }
337 else { /* store our name and continue */
338 SV *namesv;
339 if (name[0] == '@' || name[0] == '%') {
340 namesv = newSVpvn("\\", 1);
341 sv_catpvn(namesv, name, namelen);
342 }
343 else if (realtype == SVt_PVCV && name[0] == '*') {
344 namesv = newSVpvn("\\", 2);
345 sv_catpvn(namesv, name, namelen);
346 (SvPVX(namesv))[1] = '&';
347 }
348 else
349 namesv = newSVpvn(name, namelen);
350 seenentry = newAV();
351 av_push(seenentry, namesv);
352 (void)SvREFCNT_inc(val);
353 av_push(seenentry, val);
354 (void)hv_store(seenhv, id, strlen(id),
355 newRV_inc((SV*)seenentry), 0);
356 SvREFCNT_dec(seenentry);
357 }
358 }
359
360 if (realpack && *realpack == 'R' && strEQ(realpack, "Regexp")) {
361 STRLEN rlen;
362 char *rval = SvPV(val, rlen);
363 char *slash = strchr(rval, '/');
364 sv_catpvn(retval, "qr/", 3);
365 while (slash) {
366 sv_catpvn(retval, rval, slash-rval);
367 sv_catpvn(retval, "\\/", 2);
368 rlen -= slash-rval+1;
369 rval = slash+1;
370 slash = strchr(rval, '/');
371 }
372 sv_catpvn(retval, rval, rlen);
373 sv_catpvn(retval, "/", 1);
374 return 1;
375 }
376
377 /* If purity is not set and maxdepth is set, then check depth:
378 * if we have reached maximum depth, return the string
379 * representation of the thing we are currently examining
380 * at this depth (i.e., 'Foo=ARRAY(0xdeadbeef)').
381 */
382 if (!purity && maxdepth > 0 && *levelp >= maxdepth) {
383 STRLEN vallen;
384 char *valstr = SvPV(val,vallen);
385 sv_catpvn(retval, "'", 1);
386 sv_catpvn(retval, valstr, vallen);
387 sv_catpvn(retval, "'", 1);
388 return 1;
389 }
390
391 if (realpack) { /* we have a blessed ref */
392 STRLEN blesslen;
393 char *blessstr = SvPV(bless, blesslen);
394 sv_catpvn(retval, blessstr, blesslen);
395 sv_catpvn(retval, "( ", 2);
396 if (indent >= 2) {
397 blesspad = apad;
398 apad = newSVsv(apad);
399 sv_x(aTHX_ apad, " ", 1, blesslen+2);
400 }
401 }
402
403 (*levelp)++;
404 ipad = sv_x(aTHX_ Nullsv, SvPVX(xpad), SvCUR(xpad), *levelp);
405
406 if (realtype <= SVt_PVBM) { /* scalar ref */
407 SV *namesv = newSVpvn("${", 2);
408 sv_catpvn(namesv, name, namelen);
409 sv_catpvn(namesv, "}", 1);
410 if (realpack) { /* blessed */
411 sv_catpvn(retval, "do{\\(my $o = ", 13);
412 DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
413 postav, levelp, indent, pad, xpad, apad, sep, pair,
414 freezer, toaster, purity, deepcopy, quotekeys, bless,
415 maxdepth, sortkeys);
416 sv_catpvn(retval, ")}", 2);
417 } /* plain */
418 else {
419 sv_catpvn(retval, "\\", 1);
420 DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
421 postav, levelp, indent, pad, xpad, apad, sep, pair,
422 freezer, toaster, purity, deepcopy, quotekeys, bless,
423 maxdepth, sortkeys);
424 }
425 SvREFCNT_dec(namesv);
426 }
427 else if (realtype == SVt_PVGV) { /* glob ref */
428 SV *namesv = newSVpvn("*{", 2);
429 sv_catpvn(namesv, name, namelen);
430 sv_catpvn(namesv, "}", 1);
431 sv_catpvn(retval, "\\", 1);
432 DD_dump(aTHX_ ival, SvPVX(namesv), SvCUR(namesv), retval, seenhv,
433 postav, levelp, indent, pad, xpad, apad, sep, pair,
434 freezer, toaster, purity, deepcopy, quotekeys, bless,
435 maxdepth, sortkeys);
436 SvREFCNT_dec(namesv);
437 }
438 else if (realtype == SVt_PVAV) {
439 SV *totpad;
440 I32 ix = 0;
441 I32 ixmax = av_len((AV *)ival);
442
443 SV *ixsv = newSViv(0);
444 /* allowing for a 24 char wide array index */
445 New(0, iname, namelen+28, char);
446 (void)strcpy(iname, name);
447 inamelen = namelen;
448 if (name[0] == '@') {
449 sv_catpvn(retval, "(", 1);
450 iname[0] = '$';
451 }
452 else {
453 sv_catpvn(retval, "[", 1);
454 /* omit "->" in $foo{bar}->[0], but not in ${$foo}->[0] */
455 /*if (namelen > 0
456 && name[namelen-1] != ']' && name[namelen-1] != '}'
457 && (namelen < 4 || (name[1] != '{' && name[2] != '{')))*/
458 if ((namelen > 0
459 && name[namelen-1] != ']' && name[namelen-1] != '}')
460 || (namelen > 4
461 && (name[1] == '{'
462 || (name[0] == '\\' && name[2] == '{'))))
463 {
464 iname[inamelen++] = '-'; iname[inamelen++] = '>';
465 iname[inamelen] = '\0';
466 }
467 }
468 if (iname[0] == '*' && iname[inamelen-1] == '}' && inamelen >= 8 &&
469 (instr(iname+inamelen-8, "{SCALAR}") ||
470 instr(iname+inamelen-7, "{ARRAY}") ||
471 instr(iname+inamelen-6, "{HASH}"))) {
472 iname[inamelen++] = '-'; iname[inamelen++] = '>';
473 }
474 iname[inamelen++] = '['; iname[inamelen] = '\0';
475 totpad = newSVsv(sep);
476 sv_catsv(totpad, pad);
477 sv_catsv(totpad, apad);
478
479 for (ix = 0; ix <= ixmax; ++ix) {
480 STRLEN ilen;
481 SV *elem;
482 svp = av_fetch((AV*)ival, ix, FALSE);
483 if (svp)
484 elem = *svp;
485 else
486 elem = &PL_sv_undef;
487
488 ilen = inamelen;
489 sv_setiv(ixsv, ix);
490 (void) sprintf(iname+ilen, "%"IVdf, (IV)ix);
491 ilen = strlen(iname);
492 iname[ilen++] = ']'; iname[ilen] = '\0';
493 if (indent >= 3) {
494 sv_catsv(retval, totpad);
495 sv_catsv(retval, ipad);
496 sv_catpvn(retval, "#", 1);
497 sv_catsv(retval, ixsv);
498 }
499 sv_catsv(retval, totpad);
500 sv_catsv(retval, ipad);
501 DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
502 levelp, indent, pad, xpad, apad, sep, pair,
503 freezer, toaster, purity, deepcopy, quotekeys, bless,
504 maxdepth, sortkeys);
505 if (ix < ixmax)
506 sv_catpvn(retval, ",", 1);
507 }
508 if (ixmax >= 0) {
509 SV *opad = sv_x(aTHX_ Nullsv, SvPVX(xpad), SvCUR(xpad), (*levelp)-1);
510 sv_catsv(retval, totpad);
511 sv_catsv(retval, opad);
512 SvREFCNT_dec(opad);
513 }
514 if (name[0] == '@')
515 sv_catpvn(retval, ")", 1);
516 else
517 sv_catpvn(retval, "]", 1);
518 SvREFCNT_dec(ixsv);
519 SvREFCNT_dec(totpad);
520 Safefree(iname);
521 }
522 else if (realtype == SVt_PVHV) {
523 SV *totpad, *newapad;
524 SV *iname, *sname;
525 HE *entry;
526 char *key;
527 I32 klen;
528 SV *hval;
529 AV *keys = Nullav;
530
531 iname = newSVpvn(name, namelen);
532 if (name[0] == '%') {
533 sv_catpvn(retval, "(", 1);
534 (SvPVX(iname))[0] = '$';
535 }
536 else {
537 sv_catpvn(retval, "{", 1);
538 /* omit "->" in $foo[0]->{bar}, but not in ${$foo}->{bar} */
539 if ((namelen > 0
540 && name[namelen-1] != ']' && name[namelen-1] != '}')
541 || (namelen > 4
542 && (name[1] == '{'
543 || (name[0] == '\\' && name[2] == '{'))))
544 {
545 sv_catpvn(iname, "->", 2);
546 }
547 }
548 if (name[0] == '*' && name[namelen-1] == '}' && namelen >= 8 &&
549 (instr(name+namelen-8, "{SCALAR}") ||
550 instr(name+namelen-7, "{ARRAY}") ||
551 instr(name+namelen-6, "{HASH}"))) {
552 sv_catpvn(iname, "->", 2);
553 }
554 sv_catpvn(iname, "{", 1);
555 totpad = newSVsv(sep);
556 sv_catsv(totpad, pad);
557 sv_catsv(totpad, apad);
558
559 /* If requested, get a sorted/filtered array of hash keys */
560 if (sortkeys) {
561 if (sortkeys == &PL_sv_yes) {
562 #if PERL_VERSION < 8
563 sortkeys = sv_2mortal(newSVpvn("Data::Dumper::_sortkeys", 23));
564 #else
565 keys = newAV();
566 (void)hv_iterinit((HV*)ival);
567 while ((entry = hv_iternext((HV*)ival))) {
568 sv = hv_iterkeysv(entry);
569 SvREFCNT_inc(sv);
570 av_push(keys, sv);
571 }
572 # ifdef USE_LOCALE_NUMERIC
573 sortsv(AvARRAY(keys),
574 av_len(keys)+1,
575 IN_LOCALE ? Perl_sv_cmp_locale : Perl_sv_cmp);
576 # else
577 sortsv(AvARRAY(keys),
578 av_len(keys)+1,
579 Perl_sv_cmp);
580 # endif
581 #endif
582 }
583 if (sortkeys != &PL_sv_yes) {
584 dSP; ENTER; SAVETMPS; PUSHMARK(sp);
585 XPUSHs(sv_2mortal(newRV_inc(ival))); PUTBACK;
586 i = perl_call_sv(sortkeys, G_SCALAR | G_EVAL);
587 SPAGAIN;
588 if (i) {
589 sv = POPs;
590 if (SvROK(sv) && (SvTYPE(SvRV(sv)) == SVt_PVAV))
591 keys = (AV*)SvREFCNT_inc(SvRV(sv));
592 }
593 if (! keys)
594 warn("Sortkeys subroutine did not return ARRAYREF\n");
595 PUTBACK; FREETMPS; LEAVE;
596 }
597 if (keys)
598 sv_2mortal((SV*)keys);
599 }
600 else
601 (void)hv_iterinit((HV*)ival);
602
603 /* foreach (keys %hash) */
604 for (i = 0; 1; i++) {
605 char *nkey;
606 char *nkey_buffer = NULL;
607 I32 nticks = 0;
608 SV* keysv;
609 STRLEN keylen;
610 I32 nlen;
611 bool do_utf8 = FALSE;
612
613 if ((sortkeys && !(keys && (I32)i <= av_len(keys))) ||
614 !(entry = hv_iternext((HV *)ival)))
615 break;
616
617 if (i)
618 sv_catpvn(retval, ",", 1);
619
620 if (sortkeys) {
621 char *key;
622 svp = av_fetch(keys, i, FALSE);
623 keysv = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
624 key = SvPV(keysv, keylen);
625 svp = hv_fetch((HV*)ival, key,
626 SvUTF8(keysv) ? -(I32)keylen : keylen, 0);
627 hval = svp ? *svp : sv_mortalcopy(&PL_sv_undef);
628 }
629 else {
630 keysv = hv_iterkeysv(entry);
631 hval = hv_iterval((HV*)ival, entry);
632 }
633
634 do_utf8 = DO_UTF8(keysv);
635 key = SvPV(keysv, keylen);
636 klen = keylen;
637
638 sv_catsv(retval, totpad);
639 sv_catsv(retval, ipad);
640 /* old logic was first to check utf8 flag, and if utf8 always
641 call esc_q_utf8. This caused test to break under -Mutf8,
642 because there even strings like 'c' have utf8 flag on.
643 Hence with quotekeys == 0 the XS code would still '' quote
644 them based on flags, whereas the perl code would not,
645 based on regexps.
646 The perl code is correct.
647 needs_quote() decides that anything that isn't a valid
648 perl identifier needs to be quoted, hence only correctly
649 formed strings with no characters outside [A-Za-z0-9_:]
650 won't need quoting. None of those characters are used in
651 the byte encoding of utf8, so anything with utf8
652 encoded characters in will need quoting. Hence strings
653 with utf8 encoded characters in will end up inside do_utf8
654 just like before, but now strings with utf8 flag set but
655 only ascii characters will end up in the unquoted section.
656
657 There should also be less tests for the (probably currently)
658 more common doesn't need quoting case.
659 The code is also smaller (22044 vs 22260) because I've been
660 able to pull the common logic out to both sides. */
661 if (quotekeys || needs_quote(key)) {
662 if (do_utf8) {
663 STRLEN ocur = SvCUR(retval);
664 nlen = esc_q_utf8(aTHX_ retval, key, klen);
665 nkey = SvPVX(retval) + ocur;
666 }
667 else {
668 nticks = num_q(key, klen);
669 New(0, nkey_buffer, klen+nticks+3, char);
670 nkey = nkey_buffer;
671 nkey[0] = '\'';
672 if (nticks)
673 klen += esc_q(nkey+1, key, klen);
674 else
675 (void)Copy(key, nkey+1, klen, char);
676 nkey[++klen] = '\'';
677 nkey[++klen] = '\0';
678 nlen = klen;
679 sv_catpvn(retval, nkey, klen);
680 }
681 }
682 else {
683 nkey = key;
684 nlen = klen;
685 sv_catpvn(retval, nkey, klen);
686 }
687 sname = newSVsv(iname);
688 sv_catpvn(sname, nkey, nlen);
689 sv_catpvn(sname, "}", 1);
690
691 sv_catsv(retval, pair);
692 if (indent >= 2) {
693 char *extra;
694 I32 elen = 0;
695 newapad = newSVsv(apad);
696 New(0, extra, klen+4+1, char);
697 while (elen < (klen+4))
698 extra[elen++] = ' ';
699 extra[elen] = '\0';
700 sv_catpvn(newapad, extra, elen);
701 Safefree(extra);
702 }
703 else
704 newapad = apad;
705
706 DD_dump(aTHX_ hval, SvPVX(sname), SvCUR(sname), retval, seenhv,
707 postav, levelp, indent, pad, xpad, newapad, sep, pair,
708 freezer, toaster, purity, deepcopy, quotekeys, bless,
709 maxdepth, sortkeys);
710 SvREFCNT_dec(sname);
711 Safefree(nkey_buffer);
712 if (indent >= 2)
713 SvREFCNT_dec(newapad);
714 }
715 if (i) {
716 SV *opad = sv_x(aTHX_ Nullsv, SvPVX(xpad), SvCUR(xpad), *levelp-1);
717 sv_catsv(retval, totpad);
718 sv_catsv(retval, opad);
719 SvREFCNT_dec(opad);
720 }
721 if (name[0] == '%')
722 sv_catpvn(retval, ")", 1);
723 else
724 sv_catpvn(retval, "}", 1);
725 SvREFCNT_dec(iname);
726 SvREFCNT_dec(totpad);
727 }
728 else if (realtype == SVt_PVCV) {
729 sv_catpvn(retval, "sub { \"DUMMY\" }", 15);
730 if (purity)
731 warn("Encountered CODE ref, using dummy placeholder");
732 }
733 else {
734 warn("cannot handle ref type %ld", realtype);
735 }
736
737 if (realpack) { /* free blessed allocs */
738 if (indent >= 2) {
739 SvREFCNT_dec(apad);
740 apad = blesspad;
741 }
742 sv_catpvn(retval, ", '", 3);
743 sv_catpvn(retval, realpack, strlen(realpack));
744 sv_catpvn(retval, "' )", 3);
745 if (toaster && SvPOK(toaster) && SvCUR(toaster)) {
746 sv_catpvn(retval, "->", 2);
747 sv_catsv(retval, toaster);
748 sv_catpvn(retval, "()", 2);
749 }
750 }
751 SvREFCNT_dec(ipad);
752 (*levelp)--;
753 }
754 else {
755 STRLEN i;
756
757 if (namelen) {
758 (void) sprintf(id, "0x%"UVxf, PTR2UV(val));
759 if ((svp = hv_fetch(seenhv, id, (idlen = strlen(id)), FALSE)) &&
760 (sv = *svp) && SvROK(sv) &&
761 (seenentry = (AV*)SvRV(sv)))
762 {
763 SV *othername;
764 if ((svp = av_fetch(seenentry, 0, FALSE)) && (othername = *svp)
765 && (svp = av_fetch(seenentry, 2, FALSE)) && *svp && SvIV(*svp) > 0)
766 {
767 sv_catpvn(retval, "${", 2);
768 sv_catsv(retval, othername);
769 sv_catpvn(retval, "}", 1);
770 return 1;
771 }
772 }
773 else if (val != &PL_sv_undef) {
774 SV *namesv;
775 namesv = newSVpvn("\\", 1);
776 sv_catpvn(namesv, name, namelen);
777 seenentry = newAV();
778 av_push(seenentry, namesv);
779 av_push(seenentry, newRV_inc(val));
780 (void)hv_store(seenhv, id, strlen(id), newRV_inc((SV*)seenentry), 0);
781 SvREFCNT_dec(seenentry);
782 }
783 }
784
785 if (DD_is_integer(val)) {
786 STRLEN len;
787 if (SvIsUV(val))
788 (void) sprintf(tmpbuf, "%"UVuf, SvUV(val));
789 else
790 (void) sprintf(tmpbuf, "%"IVdf, SvIV(val));
791 len = strlen(tmpbuf);
792 if (SvPOK(val)) {
793 /* Need to check to see if this is a string such as " 0".
794 I'm assuming from sprintf isn't going to clash with utf8.
795 Is this valid on EBCDIC? */
796 STRLEN pvlen;
797 const char *pv = SvPV(val, pvlen);
798 if (pvlen != len || memNE(pv, tmpbuf, len))
799 goto integer_came_from_string;
800 }
801 if (len > 10) {
802 /* Looks like we're on a 64 bit system. Make it a string so that
803 if a 32 bit system reads the number it will cope better. */
804 sv_catpvf(retval, "'%s'", tmpbuf);
805 } else
806 sv_catpvn(retval, tmpbuf, len);
807 }
808 else if (realtype == SVt_PVGV) {/* GLOBs can end up with scribbly names */
809 c = SvPV(val, i);
810 ++c; --i; /* just get the name */
811 if (i >= 6 && strncmp(c, "main::", 6) == 0) {
812 c += 4;
813 i -= 4;
814 }
815 if (needs_quote(c)) {
816 sv_grow(retval, SvCUR(retval)+6+2*i);
817 r = SvPVX(retval)+SvCUR(retval);
818 r[0] = '*'; r[1] = '{'; r[2] = '\'';
819 i += esc_q(r+3, c, i);
820 i += 3;
821 r[i++] = '\''; r[i++] = '}';
822 r[i] = '\0';
823 }
824 else {
825 sv_grow(retval, SvCUR(retval)+i+2);
826 r = SvPVX(retval)+SvCUR(retval);
827 r[0] = '*'; strcpy(r+1, c);
828 i++;
829 }
830 SvCUR_set(retval, SvCUR(retval)+i);
831
832 if (purity) {
833 static char *entries[] = { "{SCALAR}", "{ARRAY}", "{HASH}" };
834 static STRLEN sizes[] = { 8, 7, 6 };
835 SV *e;
836 SV *nname = newSVpvn("", 0);
837 SV *newapad = newSVpvn("", 0);
838 GV *gv = (GV*)val;
839 I32 j;
840
841 for (j=0; j<3; j++) {
842 e = ((j == 0) ? GvSV(gv) : (j == 1) ? (SV*)GvAV(gv) : (SV*)GvHV(gv));
843 if (!e)
844 continue;
845 if (j == 0 && !SvOK(e))
846 continue;
847
848 {
849 I32 nlevel = 0;
850 SV *postentry = newSVpvn(r,i);
851
852 sv_setsv(nname, postentry);
853 sv_catpvn(nname, entries[j], sizes[j]);
854 sv_catpvn(postentry, " = ", 3);
855 av_push(postav, postentry);
856 e = newRV_inc(e);
857
858 SvCUR(newapad) = 0;
859 if (indent >= 2)
860 (void)sv_x(aTHX_ newapad, " ", 1, SvCUR(postentry));
861
862 DD_dump(aTHX_ e, SvPVX(nname), SvCUR(nname), postentry,
863 seenhv, postav, &nlevel, indent, pad, xpad,
864 newapad, sep, pair, freezer, toaster, purity,
865 deepcopy, quotekeys, bless, maxdepth,
866 sortkeys);
867 SvREFCNT_dec(e);
868 }
869 }
870
871 SvREFCNT_dec(newapad);
872 SvREFCNT_dec(nname);
873 }
874 }
875 else if (val == &PL_sv_undef || !SvOK(val)) {
876 sv_catpvn(retval, "undef", 5);
877 }
878 else {
879 integer_came_from_string:
880 c = SvPV(val, i);
881 if (DO_UTF8(val))
882 i += esc_q_utf8(aTHX_ retval, c, i);
883 else {
884 sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
885 r = SvPVX(retval) + SvCUR(retval);
886 r[0] = '\'';
887 i += esc_q(r+1, c, i);
888 ++i;
889 r[i++] = '\'';
890 r[i] = '\0';
891 SvCUR_set(retval, SvCUR(retval)+i);
892 }
893 }
894 }
895
896 if (idlen) {
897 if (deepcopy)
898 (void)hv_delete(seenhv, id, idlen, G_DISCARD);
899 else if (namelen && seenentry) {
900 SV *mark = *av_fetch(seenentry, 2, TRUE);
901 sv_setiv(mark,1);
902 }
903 }
904 return 1;
905 }
906
907
908 MODULE = Data::Dumper PACKAGE = Data::Dumper PREFIX = Data_Dumper_
909
910 #
911 # This is the exact equivalent of Dump. Well, almost. The things that are
912 # different as of now (due to Laziness):
913 # * doesnt do double-quotes yet.
914 #
915
916 void
917 Data_Dumper_Dumpxs(href, ...)
918 SV *href;
919 PROTOTYPE: $;$$
920 PPCODE:
921 {
922 HV *hv;
923 SV *retval, *valstr;
924 HV *seenhv = Nullhv;
925 AV *postav, *todumpav, *namesav;
926 I32 level = 0;
927 I32 indent, terse, i, imax, postlen;
928 SV **svp;
929 SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
930 SV *freezer, *toaster, *bless, *sortkeys;
931 I32 purity, deepcopy, quotekeys, maxdepth = 0;
932 char tmpbuf[1024];
933 I32 gimme = GIMME;
934
935 if (!SvROK(href)) { /* call new to get an object first */
936 if (items < 2)
937 croak("Usage: Data::Dumper::Dumpxs(PACKAGE, VAL_ARY_REF, [NAME_ARY_REF])");
938
939 ENTER;
940 SAVETMPS;
941
942 PUSHMARK(sp);
943 XPUSHs(href);
944 XPUSHs(sv_2mortal(newSVsv(ST(1))));
945 if (items >= 3)
946 XPUSHs(sv_2mortal(newSVsv(ST(2))));
947 PUTBACK;
948 i = perl_call_method("new", G_SCALAR);
949 SPAGAIN;
950 if (i)
951 href = newSVsv(POPs);
952
953 PUTBACK;
954 FREETMPS;
955 LEAVE;
956 if (i)
957 (void)sv_2mortal(href);
958 }
959
960 todumpav = namesav = Nullav;
961 seenhv = Nullhv;
962 val = pad = xpad = apad = sep = pair = varname
963 = freezer = toaster = bless = &PL_sv_undef;
964 name = sv_newmortal();
965 indent = 2;
966 terse = purity = deepcopy = 0;
967 quotekeys = 1;
968
969 retval = newSVpvn("", 0);
970 if (SvROK(href)
971 && (hv = (HV*)SvRV((SV*)href))
972 && SvTYPE(hv) == SVt_PVHV) {
973
974 if ((svp = hv_fetch(hv, "seen", 4, FALSE)) && SvROK(*svp))
975 seenhv = (HV*)SvRV(*svp);
976 if ((svp = hv_fetch(hv, "todump", 6, FALSE)) && SvROK(*svp))
977 todumpav = (AV*)SvRV(*svp);
978 if ((svp = hv_fetch(hv, "names", 5, FALSE)) && SvROK(*svp))
979 namesav = (AV*)SvRV(*svp);
980 if ((svp = hv_fetch(hv, "indent", 6, FALSE)))
981 indent = SvIV(*svp);
982 if ((svp = hv_fetch(hv, "purity", 6, FALSE)))
983 purity = SvIV(*svp);
984 if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
985 terse = SvTRUE(*svp);
986 #if 0 /* useqq currently unused */
987 if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
988 useqq = SvTRUE(*svp);
989 #endif
990 if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
991 pad = *svp;
992 if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
993 xpad = *svp;
994 if ((svp = hv_fetch(hv, "apad", 4, FALSE)))
995 apad = *svp;
996 if ((svp = hv_fetch(hv, "sep", 3, FALSE)))
997 sep = *svp;
998 if ((svp = hv_fetch(hv, "pair", 4, FALSE)))
999 pair = *svp;
1000 if ((svp = hv_fetch(hv, "varname", 7, FALSE)))
1001 varname = *svp;
1002 if ((svp = hv_fetch(hv, "freezer", 7, FALSE)))
1003 freezer = *svp;
1004 if ((svp = hv_fetch(hv, "toaster", 7, FALSE)))
1005 toaster = *svp;
1006 if ((svp = hv_fetch(hv, "deepcopy", 8, FALSE)))
1007 deepcopy = SvTRUE(*svp);
1008 if ((svp = hv_fetch(hv, "quotekeys", 9, FALSE)))
1009 quotekeys = SvTRUE(*svp);
1010 if ((svp = hv_fetch(hv, "bless", 5, FALSE)))
1011 bless = *svp;
1012 if ((svp = hv_fetch(hv, "maxdepth", 8, FALSE)))
1013 maxdepth = SvIV(*svp);
1014 if ((svp = hv_fetch(hv, "sortkeys", 8, FALSE))) {
1015 sortkeys = *svp;
1016 if (! SvTRUE(sortkeys))
1017 sortkeys = NULL;
1018 else if (! (SvROK(sortkeys) &&
1019 SvTYPE(SvRV(sortkeys)) == SVt_PVCV) )
1020 {
1021 /* flag to use qsortsv() for sorting hash keys */
1022 sortkeys = &PL_sv_yes;
1023 }
1024 }
1025 postav = newAV();
1026
1027 if (todumpav)
1028 imax = av_len(todumpav);
1029 else
1030 imax = -1;
1031 valstr = newSVpvn("",0);
1032 for (i = 0; i <= imax; ++i) {
1033 SV *newapad;
1034
1035 av_clear(postav);
1036 if ((svp = av_fetch(todumpav, i, FALSE)))
1037 val = *svp;
1038 else
1039 val = &PL_sv_undef;
1040 if ((svp = av_fetch(namesav, i, TRUE)))
1041 sv_setsv(name, *svp);
1042 else
1043 (void)SvOK_off(name);
1044
1045 if (SvOK(name)) {
1046 if ((SvPVX(name))[0] == '*') {
1047 if (SvROK(val)) {
1048 switch (SvTYPE(SvRV(val))) {
1049 case SVt_PVAV:
1050 (SvPVX(name))[0] = '@';
1051 break;
1052 case SVt_PVHV:
1053 (SvPVX(name))[0] = '%';
1054 break;
1055 case SVt_PVCV:
1056 (SvPVX(name))[0] = '*';
1057 break;
1058 default:
1059 (SvPVX(name))[0] = '$';
1060 break;
1061 }
1062 }
1063 else
1064 (SvPVX(name))[0] = '$';
1065 }
1066 else if ((SvPVX(name))[0] != '$')
1067 sv_insert(name, 0, 0, "$", 1);
1068 }
1069 else {
1070 STRLEN nchars = 0;
1071 sv_setpvn(name, "$", 1);
1072 sv_catsv(name, varname);
1073 (void) sprintf(tmpbuf, "%"IVdf, (IV)(i+1));
1074 nchars = strlen(tmpbuf);
1075 sv_catpvn(name, tmpbuf, nchars);
1076 }
1077
1078 if (indent >= 2) {
1079 SV *tmpsv = sv_x(aTHX_ Nullsv, " ", 1, SvCUR(name)+3);
1080 newapad = newSVsv(apad);
1081 sv_catsv(newapad, tmpsv);
1082 SvREFCNT_dec(tmpsv);
1083 }
1084 else
1085 newapad = apad;
1086
1087 DD_dump(aTHX_ val, SvPVX(name), SvCUR(name), valstr, seenhv,
1088 postav, &level, indent, pad, xpad, newapad, sep, pair,
1089 freezer, toaster, purity, deepcopy, quotekeys,
1090 bless, maxdepth, sortkeys);
1091
1092 if (indent >= 2)
1093 SvREFCNT_dec(newapad);
1094
1095 postlen = av_len(postav);
1096 if (postlen >= 0 || !terse) {
1097 sv_insert(valstr, 0, 0, " = ", 3);
1098 sv_insert(valstr, 0, 0, SvPVX(name), SvCUR(name));
1099 sv_catpvn(valstr, ";", 1);
1100 }
1101 sv_catsv(retval, pad);
1102 sv_catsv(retval, valstr);
1103 sv_catsv(retval, sep);
1104 if (postlen >= 0) {
1105 I32 i;
1106 sv_catsv(retval, pad);
1107 for (i = 0; i <= postlen; ++i) {
1108 SV *elem;
1109 svp = av_fetch(postav, i, FALSE);
1110 if (svp && (elem = *svp)) {
1111 sv_catsv(retval, elem);
1112 if (i < postlen) {
1113 sv_catpvn(retval, ";", 1);
1114 sv_catsv(retval, sep);
1115 sv_catsv(retval, pad);
1116 }
1117 }
1118 }
1119 sv_catpvn(retval, ";", 1);
1120 sv_catsv(retval, sep);
1121 }
1122 sv_setpvn(valstr, "", 0);
1123 if (gimme == G_ARRAY) {
1124 XPUSHs(sv_2mortal(retval));
1125 if (i < imax) /* not the last time thro ? */
1126 retval = newSVpvn("",0);
1127 }
1128 }
1129 SvREFCNT_dec(postav);
1130 SvREFCNT_dec(valstr);
1131 }
1132 else
1133 croak("Call to new() method failed to return HASH ref");
1134 if (gimme == G_SCALAR)
1135 XPUSHs(sv_2mortal(retval));
1136 }
1137