xref: /openbsd-src/gnu/usr.bin/perl/ext/File-Glob/Glob.xs (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 #define PERL_NO_GET_CONTEXT
2 
3 #include "EXTERN.h"
4 #include "perl.h"
5 #include "XSUB.h"
6 
7 #include "bsd_glob.h"
8 
9 #define MY_CXT_KEY "File::Glob::_guts" XS_VERSION
10 
11 typedef struct {
12 #ifdef USE_ITHREADS
13     tTHX interp;
14 #endif
15     int		x_GLOB_ERROR;
16     HV *	x_GLOB_ENTRIES;
17     Perl_ophook_t	x_GLOB_OLD_OPHOOK;
18 } my_cxt_t;
19 
20 START_MY_CXT
21 
22 #define GLOB_ERROR	(MY_CXT.x_GLOB_ERROR)
23 
24 #include "const-c.inc"
25 
26 #ifdef WIN32
27 #define errfunc		NULL
28 #else
29 static int
30 errfunc(const char *foo, int bar) {
31   PERL_UNUSED_ARG(foo);
32   return !(bar == EACCES || bar == ENOENT || bar == ENOTDIR);
33 }
34 #endif
35 
36 static void
37 doglob(pTHX_ const char *pattern, int flags)
38 {
39     dSP;
40     glob_t pglob;
41     int i;
42     int retval;
43     SV *tmp;
44     {
45 	dMY_CXT;
46 
47 	/* call glob */
48 	memset(&pglob, 0, sizeof(glob_t));
49 	retval = bsd_glob(pattern, flags, errfunc, &pglob);
50 	GLOB_ERROR = retval;
51 
52 	/* return any matches found */
53 	EXTEND(sp, pglob.gl_pathc);
54 	for (i = 0; i < pglob.gl_pathc; i++) {
55 	    /* printf("# bsd_glob: %s\n", pglob.gl_pathv[i]); */
56 	    tmp = newSVpvn_flags(pglob.gl_pathv[i], strlen(pglob.gl_pathv[i]),
57 				 SVs_TEMP);
58 	    TAINT;
59 	    SvTAINT(tmp);
60 	    PUSHs(tmp);
61 	}
62 	PUTBACK;
63 
64 	bsd_globfree(&pglob);
65     }
66 }
67 
68 static void
69 iterate(pTHX_ bool(*globber)(pTHX_ AV *entries, const char *pat, STRLEN len, bool is_utf8))
70 {
71     dSP;
72     dMY_CXT;
73 
74     const char * const cxixpv = (char *)&PL_op;
75     STRLEN const cxixlen = sizeof(OP *);
76     AV *entries;
77     U32 const gimme = GIMME_V;
78     SV *patsv = POPs;
79     bool on_stack = FALSE;
80 
81     if (!MY_CXT.x_GLOB_ENTRIES) MY_CXT.x_GLOB_ENTRIES = newHV();
82     entries = (AV *)*(hv_fetch(MY_CXT.x_GLOB_ENTRIES, cxixpv, cxixlen, 1));
83 
84     /* if we're just beginning, do it all first */
85     if (SvTYPE(entries) != SVt_PVAV) {
86         const char *pat;
87         STRLEN len;
88         bool is_utf8;
89 
90         /* glob without args defaults to $_ */
91         SvGETMAGIC(patsv);
92         if (
93             !SvOK(patsv)
94               && (patsv = DEFSV, SvGETMAGIC(patsv), !SvOK(patsv))
95             ) {
96             pat = "";
97             len = 0;
98             is_utf8 = 0;
99         }
100         else {
101             pat = SvPV_nomg(patsv,len);
102             is_utf8 = !!SvUTF8(patsv);
103             /* the lower-level code expects a null-terminated string */
104             if (!SvPOK(patsv) || pat != SvPVX(patsv) || pat[len] != '\0') {
105                 SV *newpatsv = newSVpvn_flags(pat, len, SVs_TEMP);
106                 pat = SvPV_nomg(newpatsv,len);
107             }
108         }
109 
110         if (!IS_SAFE_SYSCALL(pat, len, "pattern", "glob")) {
111             if (gimme != G_ARRAY)
112                 PUSHs(&PL_sv_undef);
113             PUTBACK;
114             return;
115         }
116 
117 	PUTBACK;
118 	on_stack = globber(aTHX_ entries, pat, len, is_utf8);
119 	SPAGAIN;
120     }
121 
122     /* chuck it all out, quick or slow */
123     if (gimme == G_ARRAY) {
124 	if (!on_stack) {
125 	    EXTEND(SP, AvFILLp(entries)+1);
126 	    Copy(AvARRAY(entries), SP+1, AvFILLp(entries)+1, SV *);
127 	    SP += AvFILLp(entries)+1;
128 	}
129 	/* No G_DISCARD here!  It will free the stack items. */
130 	(void)hv_delete(MY_CXT.x_GLOB_ENTRIES, cxixpv, cxixlen, 0);
131     }
132     else {
133 	if (AvFILLp(entries) + 1) {
134 	    mPUSHs(av_shift(entries));
135 	}
136 	else {
137 	    /* return undef for EOL */
138 	    (void)hv_delete(MY_CXT.x_GLOB_ENTRIES, cxixpv, cxixlen, G_DISCARD);
139 	    PUSHs(&PL_sv_undef);
140 	}
141     }
142     PUTBACK;
143 }
144 
145 /* returns true if the items are on the stack already, but only in
146    list context */
147 static bool
148 csh_glob(pTHX_ AV *entries, const char *pat, STRLEN len, bool is_utf8)
149 {
150 	dSP;
151 	AV *patav = NULL;
152 	const char *patend;
153 	const char *s = NULL;
154 	const char *piece = NULL;
155 	SV *word = NULL;
156 	int const flags =
157 	    (int)SvIV(get_sv("File::Glob::DEFAULT_FLAGS", GV_ADD));
158 	U32 const gimme = GIMME_V;
159 
160 	patend = pat + len;
161 
162 	assert(SvTYPE(entries) != SVt_PVAV);
163 	sv_upgrade((SV *)entries, SVt_PVAV);
164 
165 	/* extract patterns */
166 	s = pat-1;
167 	while (++s < patend) {
168 	    switch (*s) {
169 	    case '\'':
170 	    case '"' :
171 	      {
172 		bool found = FALSE;
173 		const char quote = *s;
174 		if (!word) {
175 		    word = newSVpvs("");
176 		    if (is_utf8) SvUTF8_on(word);
177 		}
178 		if (piece) sv_catpvn(word, piece, s-piece);
179 		piece = s+1;
180 		while (++s < patend)
181 		    if (*s == '\\') {
182 			s++;
183 			/* If the backslash is here to escape a quote,
184 			   obliterate it. */
185 			if (s < patend && *s == quote)
186 			    sv_catpvn(word, piece, s-piece-1), piece = s;
187 		    }
188 		    else if (*s == quote) {
189 			sv_catpvn(word, piece, s-piece);
190 			piece = NULL;
191 			found = TRUE;
192 			break;
193 		    }
194 		if (!found) { /* unmatched quote */
195 		    /* Give up on tokenisation and treat the whole string
196 		       as a single token, but with whitespace stripped. */
197 		    piece = pat;
198 		    while (isSPACE(*pat)) pat++;
199 		    while (isSPACE(*(patend-1))) patend--;
200 		    /* bsd_glob expects a trailing null, but we cannot mod-
201 		       ify the original */
202 		    if (patend < pat + len) {
203 			if (word) sv_setpvn(word, pat, patend-pat);
204 			else
205 			    word = newSVpvn_flags(
206 				pat, patend-pat, SVf_UTF8*is_utf8
207 			    );
208 			piece = NULL;
209 		    }
210 		    else {
211 			if (word) SvREFCNT_dec(word), word=NULL;
212 			piece = pat;
213 			s = patend;
214 		    }
215 		    goto end_of_parsing;
216 		}
217 		break;
218 	      }
219 	    case '\\':
220 		if (!piece) piece = s;
221 		s++;
222 		/* If the backslash is here to escape a quote,
223 		   obliterate it. */
224 		if (s < patend && (*s == '"' || *s == '\'')) {
225 		    if (!word) {
226 			word = newSVpvn(piece,s-piece-1);
227 			if (is_utf8) SvUTF8_on(word);
228 		    }
229 		    else sv_catpvn(word, piece, s-piece-1);
230 		    piece = s;
231 		}
232 		break;
233 	    default:
234 		if (isSPACE(*s)) {
235 		    if (piece) {
236 			if (!word) {
237 			    word = newSVpvn(piece,s-piece);
238 			    if (is_utf8) SvUTF8_on(word);
239 			}
240 			else sv_catpvn(word, piece, s-piece);
241 		    }
242 		    if (!word) break;
243 		    if (!patav) patav = (AV *)sv_2mortal((SV *)newAV());
244 		    av_push(patav, word);
245 		    word = NULL;
246 		    piece = NULL;
247 		}
248 		else if (!piece) piece = s;
249 		break;
250 	    }
251 	}
252       end_of_parsing:
253 
254 	if (patav) {
255 	    I32 items = AvFILLp(patav) + 1;
256 	    SV **svp = AvARRAY(patav);
257 	    while (items--) {
258 		PUSHMARK(SP);
259 		PUTBACK;
260 		doglob(aTHX_ SvPVXx(*svp++), flags);
261 		SPAGAIN;
262 		{
263 		    dMARK;
264 		    dORIGMARK;
265 		    while (++MARK <= SP)
266 			av_push(entries, SvREFCNT_inc_simple_NN(*MARK));
267 		    SP = ORIGMARK;
268 		}
269 	    }
270 	}
271 	/* piece is set at this point if there is no trailing whitespace.
272 	   It is the beginning of the last token or quote-delimited
273 	   piece thereof.  word is set at this point if the last token has
274 	   multiple quoted pieces. */
275 	if (piece || word) {
276 	    if (word) {
277 		if (piece) sv_catpvn(word, piece, s-piece);
278 		piece = SvPVX(word);
279 	    }
280 	    PUSHMARK(SP);
281 	    PUTBACK;
282 	    doglob(aTHX_ piece, flags);
283 	    if (word) SvREFCNT_dec(word);
284 	    SPAGAIN;
285 	    {
286 		dMARK;
287 		dORIGMARK;
288 		/* short-circuit here for a fairly common case */
289 		if (!patav && gimme == G_ARRAY) { PUTBACK; return TRUE; }
290 		while (++MARK <= SP)
291 		    av_push(entries, SvREFCNT_inc_simple_NN(*MARK));
292 
293 		SP = ORIGMARK;
294 	    }
295 	}
296 	PUTBACK;
297 	return FALSE;
298 }
299 
300 static void
301 csh_glob_iter(pTHX)
302 {
303     iterate(aTHX_ csh_glob);
304 }
305 
306 /* wrapper around doglob that can be passed to the iterator */
307 static bool
308 doglob_iter_wrapper(pTHX_ AV *entries, const char *pattern, STRLEN len, bool is_utf8)
309 {
310     dSP;
311     int const flags =
312 	    (int)SvIV(get_sv("File::Glob::DEFAULT_FLAGS", GV_ADD));
313 
314     PERL_UNUSED_VAR(len); /* we use \0 termination instead */
315     /* XXX we currently just use the underlying bytes of the passed SV.
316      * Some day someone needs to make glob utf8 aware */
317     PERL_UNUSED_VAR(is_utf8);
318 
319     PUSHMARK(SP);
320     PUTBACK;
321     doglob(aTHX_ pattern, flags);
322     SPAGAIN;
323     {
324 	dMARK;
325 	dORIGMARK;
326 	if (GIMME_V == G_ARRAY) { PUTBACK; return TRUE; }
327 	sv_upgrade((SV *)entries, SVt_PVAV);
328 	while (++MARK <= SP)
329 	    av_push(entries, SvREFCNT_inc_simple_NN(*MARK));
330 	SP = ORIGMARK;
331     }
332     return FALSE;
333 }
334 
335 static void
336 glob_ophook(pTHX_ OP *o)
337 {
338   if (PL_dirty) return;
339   {
340     dMY_CXT;
341     if (MY_CXT.x_GLOB_ENTRIES
342      && (o->op_type == OP_GLOB || o->op_type == OP_ENTERSUB))
343 	(void)hv_delete(MY_CXT.x_GLOB_ENTRIES, (char *)&o, sizeof(OP *),
344 		  G_DISCARD);
345     if (MY_CXT.x_GLOB_OLD_OPHOOK) MY_CXT.x_GLOB_OLD_OPHOOK(aTHX_ o);
346   }
347 }
348 
349 MODULE = File::Glob		PACKAGE = File::Glob
350 
351 int
352 GLOB_ERROR()
353     PREINIT:
354 	dMY_CXT;
355     CODE:
356 	RETVAL = GLOB_ERROR;
357     OUTPUT:
358 	RETVAL
359 
360 void
361 bsd_glob(pattern_sv,...)
362     SV *pattern_sv
363 PREINIT:
364     int flags = 0;
365     char *pattern;
366     STRLEN len;
367 PPCODE:
368     {
369         pattern = SvPV(pattern_sv, len);
370         if (!IS_SAFE_SYSCALL(pattern, len, "pattern", "bsd_glob"))
371             XSRETURN(0);
372 	/* allow for optional flags argument */
373 	if (items > 1) {
374 	    flags = (int) SvIV(ST(1));
375 	    /* remove unsupported flags */
376 	    flags &= ~(GLOB_APPEND | GLOB_DOOFFS | GLOB_ALTDIRFUNC | GLOB_MAGCHAR);
377 	} else {
378 	    flags = (int) SvIV(get_sv("File::Glob::DEFAULT_FLAGS", GV_ADD));
379 	}
380 
381 	PUTBACK;
382 	doglob(aTHX_ pattern, flags);
383 	SPAGAIN;
384     }
385 
386 PROTOTYPES: DISABLE
387 void
388 csh_glob(...)
389 PPCODE:
390     /* For backward-compatibility with the original Perl function, we sim-
391      * ply take the first argument, regardless of how many there are.
392      */
393     if (items) SP ++;
394     else {
395 	XPUSHs(&PL_sv_undef);
396     }
397     PUTBACK;
398     csh_glob_iter(aTHX);
399     SPAGAIN;
400 
401 void
402 bsd_glob_override(...)
403 PPCODE:
404     if (items) SP ++;
405     else {
406 	XPUSHs(&PL_sv_undef);
407     }
408     PUTBACK;
409     iterate(aTHX_ doglob_iter_wrapper);
410     SPAGAIN;
411 
412 #ifdef USE_ITHREADS
413 
414 void
415 CLONE(...)
416 INIT:
417     HV *glob_entries_clone = NULL;
418 CODE:
419     PERL_UNUSED_ARG(items);
420     {
421         dMY_CXT;
422         if ( MY_CXT.x_GLOB_ENTRIES ) {
423             CLONE_PARAMS param;
424             param.stashes    = NULL;
425             param.flags      = 0;
426             param.proto_perl = MY_CXT.interp;
427 
428             glob_entries_clone = MUTABLE_HV(sv_dup_inc((SV*)MY_CXT.x_GLOB_ENTRIES, &param));
429         }
430     }
431     {
432         MY_CXT_CLONE;
433         MY_CXT.x_GLOB_ENTRIES = glob_entries_clone;
434         MY_CXT.interp = aTHX;
435     }
436 
437 #endif
438 
439 BOOT:
440 {
441 #ifndef PERL_EXTERNAL_GLOB
442     /* Don't do this at home! The globhook interface is highly volatile. */
443     PL_globhook = csh_glob_iter;
444 #endif
445 }
446 
447 BOOT:
448 {
449     MY_CXT_INIT;
450     {
451 	dMY_CXT;
452 	MY_CXT.x_GLOB_ENTRIES = NULL;
453 	MY_CXT.x_GLOB_OLD_OPHOOK = PL_opfreehook;
454 #ifdef USE_ITHREADS
455         MY_CXT.interp = aTHX;
456 #endif
457 	PL_opfreehook = glob_ophook;
458     }
459 }
460 
461 INCLUDE: const-xs.inc
462