xref: /openbsd-src/usr.bin/less/filename.c (revision 0b7734b3d77bb9b21afec6f4621cae6c805dbd45)
1 /*
2  * Copyright (C) 1984-2012  Mark Nudelman
3  * Modified for use with illumos by Garrett D'Amore.
4  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
5  *
6  * You may distribute under the terms of either the GNU General Public
7  * License or the Less License, as specified in the README file.
8  *
9  * For more information, see the README file.
10  */
11 
12 /*
13  * Routines to mess around with filenames (and files).
14  * Much of this is very OS dependent.
15  *
16  * Modified for illumos/POSIX -- it uses native glob(3C) rather than
17  * popen to a shell to perform the expansion.
18  */
19 
20 #include <sys/stat.h>
21 
22 #include <glob.h>
23 #include <stdarg.h>
24 
25 #include "less.h"
26 
27 extern int force_open;
28 extern int secure;
29 extern int use_lessopen;
30 extern int ctldisp;
31 extern int utf_mode;
32 extern IFILE curr_ifile;
33 extern IFILE old_ifile;
34 extern char openquote;
35 extern char closequote;
36 
37 /*
38  * Remove quotes around a filename.
39  */
40 char *
41 shell_unquote(char *str)
42 {
43 	char *name;
44 	char *p;
45 
46 	name = p = ecalloc(strlen(str)+1, sizeof (char));
47 	if (*str == openquote) {
48 		str++;
49 		while (*str != '\0') {
50 			if (*str == closequote) {
51 				if (str[1] != closequote)
52 					break;
53 				str++;
54 			}
55 			*p++ = *str++;
56 		}
57 	} else {
58 		char *esc = get_meta_escape();
59 		int esclen = strlen(esc);
60 		while (*str != '\0') {
61 			if (esclen > 0 && strncmp(str, esc, esclen) == 0)
62 				str += esclen;
63 			*p++ = *str++;
64 		}
65 	}
66 	*p = '\0';
67 	return (name);
68 }
69 
70 /*
71  * Get the shell's escape character.
72  */
73 char *
74 get_meta_escape(void)
75 {
76 	char *s;
77 
78 	s = lgetenv("LESSMETAESCAPE");
79 	if (s == NULL)
80 		s = "\\";
81 	return (s);
82 }
83 
84 /*
85  * Get the characters which the shell considers to be "metacharacters".
86  */
87 static char *
88 metachars(void)
89 {
90 	static char *mchars = NULL;
91 
92 	if (mchars == NULL) {
93 		mchars = lgetenv("LESSMETACHARS");
94 		if (mchars == NULL)
95 			mchars = DEF_METACHARS;
96 	}
97 	return (mchars);
98 }
99 
100 /*
101  * Is this a shell metacharacter?
102  */
103 static int
104 metachar(char c)
105 {
106 	return (strchr(metachars(), c) != NULL);
107 }
108 
109 /*
110  * Insert a backslash before each metacharacter in a string.
111  */
112 char *
113 shell_quote(const char *s)
114 {
115 	const char *p;
116 	char *r;
117 	char *newstr;
118 	int len;
119 	char *esc = get_meta_escape();
120 	int esclen = strlen(esc);
121 	int use_quotes = 0;
122 	int have_quotes = 0;
123 
124 	/*
125 	 * Determine how big a string we need to allocate.
126 	 */
127 	len = 1; /* Trailing null byte */
128 	for (p = s;  *p != '\0';  p++) {
129 		len++;
130 		if (*p == openquote || *p == closequote)
131 			have_quotes = 1;
132 		if (metachar(*p)) {
133 			if (esclen == 0) {
134 				/*
135 				 * We've got a metachar, but this shell
136 				 * doesn't support escape chars.  Use quotes.
137 				 */
138 				use_quotes = 1;
139 			} else {
140 				/*
141 				 * Allow space for the escape char.
142 				 */
143 				len += esclen;
144 			}
145 		}
146 	}
147 	/*
148 	 * Allocate and construct the new string.
149 	 */
150 	if (use_quotes) {
151 		/* We can't quote a string that contains quotes. */
152 		if (have_quotes)
153 			return (NULL);
154 		newstr  = easprintf("%c%s%c", openquote, s, closequote);
155 	} else {
156 		newstr = r = ecalloc(len, sizeof (char));
157 		while (*s != '\0') {
158 			if (metachar(*s)) {
159 				/*
160 				 * Add the escape char.
161 				 */
162 				(void) strlcpy(r, esc, newstr + len - p);
163 				r += esclen;
164 			}
165 			*r++ = *s++;
166 		}
167 		*r = '\0';
168 	}
169 	return (newstr);
170 }
171 
172 /*
173  * Return a pathname that points to a specified file in a specified directory.
174  * Return NULL if the file does not exist in the directory.
175  */
176 static char *
177 dirfile(const char *dirname, const char *filename)
178 {
179 	char *pathname;
180 	char *qpathname;
181 	int f;
182 
183 	if (dirname == NULL || *dirname == '\0')
184 		return (NULL);
185 	/*
186 	 * Construct the full pathname.
187 	 */
188 	pathname = easprintf("%s/%s", dirname, filename);
189 	/*
190 	 * Make sure the file exists.
191 	 */
192 	qpathname = shell_unquote(pathname);
193 	f = open(qpathname, O_RDONLY);
194 	if (f < 0) {
195 		free(pathname);
196 		pathname = NULL;
197 	} else {
198 		(void) close(f);
199 	}
200 	free(qpathname);
201 	return (pathname);
202 }
203 
204 /*
205  * Return the full pathname of the given file in the "home directory".
206  */
207 char *
208 homefile(char *filename)
209 {
210 	return (dirfile(lgetenv("HOME"), filename));
211 }
212 
213 /*
214  * Expand a string, substituting any "%" with the current filename,
215  * and any "#" with the previous filename.
216  * But a string of N "%"s is just replaced with N-1 "%"s.
217  * Likewise for a string of N "#"s.
218  * {{ This is a lot of work just to support % and #. }}
219  */
220 char *
221 fexpand(char *s)
222 {
223 	char *fr, *to;
224 	int n;
225 	char *e;
226 	IFILE ifile;
227 
228 #define	fchar_ifile(c) \
229 	((c) == '%' ? curr_ifile : (c) == '#' ? old_ifile : NULL)
230 
231 	/*
232 	 * Make one pass to see how big a buffer we
233 	 * need to allocate for the expanded string.
234 	 */
235 	n = 0;
236 	for (fr = s;  *fr != '\0';  fr++) {
237 		switch (*fr) {
238 		case '%':
239 		case '#':
240 			if (fr > s && fr[-1] == *fr) {
241 				/*
242 				 * Second (or later) char in a string
243 				 * of identical chars.  Treat as normal.
244 				 */
245 				n++;
246 			} else if (fr[1] != *fr) {
247 				/*
248 				 * Single char (not repeated).  Treat specially.
249 				 */
250 				ifile = fchar_ifile(*fr);
251 				if (ifile == NULL)
252 					n++;
253 				else
254 					n += strlen(get_filename(ifile));
255 			}
256 			/*
257 			 * Else it is the first char in a string of
258 			 * identical chars.  Just discard it.
259 			 */
260 			break;
261 		default:
262 			n++;
263 			break;
264 		}
265 	}
266 
267 	e = ecalloc(n+1, sizeof (char));
268 
269 	/*
270 	 * Now copy the string, expanding any "%" or "#".
271 	 */
272 	to = e;
273 	for (fr = s;  *fr != '\0';  fr++) {
274 		switch (*fr) {
275 		case '%':
276 		case '#':
277 			if (fr > s && fr[-1] == *fr) {
278 				*to++ = *fr;
279 			} else if (fr[1] != *fr) {
280 				ifile = fchar_ifile(*fr);
281 				if (ifile == NULL) {
282 					*to++ = *fr;
283 				} else {
284 					(void) strlcpy(to, get_filename(ifile),
285 					    e + n + 1 - to);
286 					to += strlen(to);
287 				}
288 			}
289 			break;
290 		default:
291 			*to++ = *fr;
292 			break;
293 		}
294 	}
295 	*to = '\0';
296 	return (e);
297 }
298 
299 /*
300  * Return a blank-separated list of filenames which "complete"
301  * the given string.
302  */
303 char *
304 fcomplete(char *s)
305 {
306 	char *fpat;
307 	char *qs;
308 
309 	if (secure)
310 		return (NULL);
311 	/*
312 	 * Complete the filename "s" by globbing "s*".
313 	 */
314 	fpat =  easprintf("%s*", s);
315 
316 	qs = lglob(fpat);
317 	s = shell_unquote(qs);
318 	if (strcmp(s, fpat) == 0) {
319 		/*
320 		 * The filename didn't expand.
321 		 */
322 		free(qs);
323 		qs = NULL;
324 	}
325 	free(s);
326 	free(fpat);
327 	return (qs);
328 }
329 
330 /*
331  * Try to determine if a file is "binary".
332  * This is just a guess, and we need not try too hard to make it accurate.
333  */
334 int
335 bin_file(int f)
336 {
337 	int n;
338 	int bin_count = 0;
339 	char data[256];
340 	char *p;
341 	char *pend;
342 
343 	if (!seekable(f))
344 		return (0);
345 	if (lseek(f, (off_t)0, SEEK_SET) == (off_t)-1)
346 		return (0);
347 	n = read(f, data, sizeof (data));
348 	pend = &data[n];
349 	for (p = data; p < pend; ) {
350 		LWCHAR c = step_char(&p, +1, pend);
351 		if (ctldisp == OPT_ONPLUS && IS_CSI_START(c)) {
352 			do {
353 				c = step_char(&p, +1, pend);
354 			} while (p < pend && is_ansi_middle(c));
355 		} else if (binary_char(c))
356 			bin_count++;
357 	}
358 	/*
359 	 * Call it a binary file if there are more than 5 binary characters
360 	 * in the first 256 bytes of the file.
361 	 */
362 	return (bin_count > 5);
363 }
364 
365 /*
366  * Try to determine the size of a file by seeking to the end.
367  */
368 static off_t
369 seek_filesize(int f)
370 {
371 	off_t spos;
372 
373 	spos = lseek(f, (off_t)0, SEEK_END);
374 	if (spos == (off_t)-1)
375 		return (-1);
376 	return (spos);
377 }
378 
379 /*
380  * Read a string from a file.
381  * Return a pointer to the string in memory.
382  */
383 static char *
384 readfd(FILE *fd)
385 {
386 	int len;
387 	int ch;
388 	char *buf;
389 	char *p;
390 
391 	/*
392 	 * Make a guess about how many chars in the string
393 	 * and allocate a buffer to hold it.
394 	 */
395 	len = 100;
396 	buf = ecalloc(len, sizeof (char));
397 	for (p = buf; ; p++) {
398 		if ((ch = getc(fd)) == '\n' || ch == EOF)
399 			break;
400 		if (p >= buf + len-1) {
401 			/*
402 			 * The string is too big to fit in the buffer we have.
403 			 * Allocate a new buffer, twice as big.
404 			 */
405 			len *= 2;
406 			*p = '\0';
407 			p = ecalloc(len, sizeof (char));
408 			strlcpy(p, buf, len);
409 			free(buf);
410 			buf = p;
411 			p = buf + strlen(buf);
412 		}
413 		*p = (char)ch;
414 	}
415 	*p = '\0';
416 	return (buf);
417 }
418 
419 /*
420  * Execute a shell command.
421  * Return a pointer to a pipe connected to the shell command's standard output.
422  */
423 static FILE *
424 shellcmd(char *cmd)
425 {
426 	FILE *fd;
427 
428 	char *shell;
429 
430 	shell = lgetenv("SHELL");
431 	if (shell != NULL && *shell != '\0') {
432 		char *scmd;
433 		char *esccmd;
434 
435 		/*
436 		 * Read the output of <$SHELL -c cmd>.
437 		 * Escape any metacharacters in the command.
438 		 */
439 		esccmd = shell_quote(cmd);
440 		if (esccmd == NULL) {
441 			fd = popen(cmd, "r");
442 		} else {
443 			scmd = easprintf("%s -c %s", shell, esccmd);
444 			free(esccmd);
445 			fd = popen(scmd, "r");
446 			free(scmd);
447 		}
448 	} else {
449 		fd = popen(cmd, "r");
450 	}
451 	/*
452 	 * Redirection in `popen' might have messed with the
453 	 * standard devices.  Restore binary input mode.
454 	 */
455 	return (fd);
456 }
457 
458 /*
459  * Expand a filename, doing any system-specific metacharacter substitutions.
460  */
461 char *
462 lglob(char *filename)
463 {
464 	char *gfilename;
465 	char *ofilename;
466 	glob_t list;
467 	int i;
468 	int length;
469 	char *p;
470 	char *qfilename;
471 
472 	ofilename = fexpand(filename);
473 	if (secure)
474 		return (ofilename);
475 	filename = shell_unquote(ofilename);
476 
477 	/*
478 	 * The globbing function returns a list of names.
479 	 */
480 
481 #ifndef	GLOB_TILDE
482 #define	GLOB_TILDE	0
483 #endif
484 #ifndef	GLOB_LIMIT
485 #define	GLOB_LIMIT	0
486 #endif
487 	if (glob(filename, GLOB_TILDE | GLOB_LIMIT, NULL, &list) != 0) {
488 		free(filename);
489 		return (ofilename);
490 	}
491 	length = 1; /* Room for trailing null byte */
492 	for (i = 0; i < list.gl_pathc; i++) {
493 		p = list.gl_pathv[i];
494 		qfilename = shell_quote(p);
495 		if (qfilename != NULL) {
496 			length += strlen(qfilename) + 1;
497 			free(qfilename);
498 		}
499 	}
500 	gfilename = ecalloc(length, sizeof (char));
501 	for (i = 0; i < list.gl_pathc; i++) {
502 		p = list.gl_pathv[i];
503 		qfilename = shell_quote(p);
504 		if (qfilename != NULL) {
505 			if (i != 0) {
506 				(void) strlcat(gfilename, " ", length);
507 			}
508 			(void) strlcat(gfilename, qfilename, length);
509 			free(qfilename);
510 		}
511 	}
512 	globfree(&list);
513 	free(filename);
514 	free(ofilename);
515 	return (gfilename);
516 }
517 
518 /*
519  * Expand LESSOPEN or LESSCLOSE.  Returns a newly allocated string
520  * on success, NULL otherwise.
521  */
522 static char *
523 expand_pct_s(const char *fmt, ...)
524 {
525 	int		n;
526 	int		len;
527 	char		*r, *d;
528 	const char	*f[3];		/* max expansions + 1 for NULL */
529 	va_list		ap;
530 
531 	va_start(ap, fmt);
532 	for (n = 0; n < ((sizeof (f)/sizeof (f[0])) - 1); n++) {
533 		f[n] = (const char *)va_arg(ap, const char *);
534 		if (f[n] == NULL) {
535 			break;
536 		}
537 	}
538 	va_end(ap);
539 	f[n] = NULL;	/* terminate list */
540 
541 	len = strlen(fmt) + 1;
542 	for (n = 0; f[n] != NULL; n++) {
543 		len += strlen(f[n]);	/* technically could -2 for "%s" */
544 	}
545 	r = ecalloc(len, sizeof (char));
546 
547 	for (n = 0, d = r; *fmt != 0; ) {
548 		if (*fmt != '%') {
549 			*d++ = *fmt++;
550 			continue;
551 		}
552 		fmt++;
553 		/* Permit embedded "%%" */
554 		switch (*fmt) {
555 		case '%':
556 			*d++ = '%';
557 			fmt++;
558 			break;
559 		case 's':
560 			if (f[n] == NULL) {
561 				va_end(ap);
562 				free(r);
563 				return (NULL);
564 			}
565 			(void) strlcpy(d, f[n++], r + len - d);
566 			fmt++;
567 			d += strlen(d);
568 			break;
569 		default:
570 			va_end(ap);
571 			free(r);
572 			return (NULL);
573 		}
574 	}
575 	*d = '\0';
576 	return (r);
577 }
578 
579 /*
580  * See if we should open a "replacement file"
581  * instead of the file we're about to open.
582  */
583 char *
584 open_altfile(char *filename, int *pf, void **pfd)
585 {
586 	char *lessopen;
587 	char *cmd;
588 	FILE *fd;
589 	int returnfd = 0;
590 
591 	if (!use_lessopen || secure)
592 		return (NULL);
593 	ch_ungetchar(-1);
594 	if ((lessopen = lgetenv("LESSOPEN")) == NULL)
595 		return (NULL);
596 	while (*lessopen == '|') {
597 		/*
598 		 * If LESSOPEN starts with a |, it indicates
599 		 * a "pipe preprocessor".
600 		 */
601 		lessopen++;
602 		returnfd++;
603 	}
604 	if (*lessopen == '-') {
605 		/*
606 		 * Lessopen preprocessor will accept "-" as a filename.
607 		 */
608 		lessopen++;
609 	} else {
610 		if (strcmp(filename, "-") == 0)
611 			return (NULL);
612 	}
613 
614 	if ((cmd = expand_pct_s(lessopen, filename, NULL)) == NULL) {
615 		error("Invalid LESSOPEN variable", NULL);
616 		return (NULL);
617 	}
618 	fd = shellcmd(cmd);
619 	free(cmd);
620 	if (fd == NULL) {
621 		/*
622 		 * Cannot create the pipe.
623 		 */
624 		return (NULL);
625 	}
626 	if (returnfd) {
627 		int f;
628 		char c;
629 
630 		/*
631 		 * Read one char to see if the pipe will produce any data.
632 		 * If it does, push the char back on the pipe.
633 		 */
634 		f = fileno(fd);
635 		if (read(f, &c, 1) != 1) {
636 			/*
637 			 * Pipe is empty.
638 			 * If more than 1 pipe char was specified,
639 			 * the exit status tells whether the file itself
640 			 * is empty, or if there is no alt file.
641 			 * If only one pipe char, just assume no alt file.
642 			 */
643 			int status = pclose(fd);
644 			if (returnfd > 1 && status == 0) {
645 				*pfd = NULL;
646 				*pf = -1;
647 				return (estrdup(FAKE_EMPTYFILE));
648 			}
649 			return (NULL);
650 		}
651 		ch_ungetchar(c);
652 		*pfd = (void *) fd;
653 		*pf = f;
654 		return (estrdup("-"));
655 	}
656 	cmd = readfd(fd);
657 	pclose(fd);
658 	if (*cmd == '\0')
659 		/*
660 		 * Pipe is empty.  This means there is no alt file.
661 		 */
662 		return (NULL);
663 	return (cmd);
664 }
665 
666 /*
667  * Close a replacement file.
668  */
669 void
670 close_altfile(char *altfilename, char *filename, void *pipefd)
671 {
672 	char *lessclose;
673 	FILE *fd;
674 	char *cmd;
675 
676 	if (secure)
677 		return;
678 	if (pipefd != NULL) {
679 		pclose((FILE *)pipefd);
680 	}
681 	if ((lessclose = lgetenv("LESSCLOSE")) == NULL)
682 		return;
683 	cmd = expand_pct_s(lessclose, filename, altfilename, NULL);
684 	if (cmd == NULL) {
685 		error("Invalid LESSCLOSE variable", NULL);
686 		return;
687 	}
688 	fd = shellcmd(cmd);
689 	free(cmd);
690 	if (fd != NULL)
691 		(void) pclose(fd);
692 }
693 
694 /*
695  * Is the specified file a directory?
696  */
697 int
698 is_dir(char *filename)
699 {
700 	int isdir = 0;
701 	int r;
702 	struct stat statbuf;
703 
704 	filename = shell_unquote(filename);
705 
706 	r = stat(filename, &statbuf);
707 	isdir = (r >= 0 && S_ISDIR(statbuf.st_mode));
708 	free(filename);
709 	return (isdir);
710 }
711 
712 /*
713  * Returns NULL if the file can be opened and
714  * is an ordinary file, otherwise an error message
715  * (if it cannot be opened or is a directory, etc.)
716  */
717 char *
718 bad_file(char *filename)
719 {
720 	char *m = NULL;
721 
722 	filename = shell_unquote(filename);
723 	if (!force_open && is_dir(filename)) {
724 		m = easprintf("%s is a directory", filename);
725 	} else {
726 		int r;
727 		struct stat statbuf;
728 
729 		r = stat(filename, &statbuf);
730 		if (r < 0) {
731 			m = errno_message(filename);
732 		} else if (force_open) {
733 			m = NULL;
734 		} else if (!S_ISREG(statbuf.st_mode)) {
735 			m = easprintf("%s is not a regular file (use -f to "
736 			    "see it)", filename);
737 		}
738 	}
739 	free(filename);
740 	return (m);
741 }
742 
743 /*
744  * Return the size of a file, as cheaply as possible.
745  * In Unix, we can stat the file.
746  */
747 off_t
748 filesize(int f)
749 {
750 	struct stat statbuf;
751 
752 	if (fstat(f, &statbuf) >= 0)
753 		return (statbuf.st_size);
754 	return (seek_filesize(f));
755 }
756 
757 /*
758  * Return last component of a pathname.
759  */
760 char *
761 last_component(char *name)
762 {
763 	char *slash;
764 
765 	for (slash = name + strlen(name);  slash > name; ) {
766 		--slash;
767 		if (*slash == '/')
768 			return (slash + 1);
769 	}
770 	return (name);
771 }
772