xref: /freebsd-src/usr.bin/ident/ident.c (revision 5e3934b15a2741b2de6b217e77dc9d798d740804)
1c048a83fSBaptiste Daroussin /*-
2*1a4d5f13SBaptiste Daroussin  * Copyright (c) 2015-2021 Baptiste Daroussin <bapt@FreeBSD.org>
37d477dc3SXin LI  * Copyright (c) 2015 Xin LI <delphij@FreeBSD.org>
4c048a83fSBaptiste Daroussin  *
5c048a83fSBaptiste Daroussin  * Redistribution and use in source and binary forms, with or without
6c048a83fSBaptiste Daroussin  * modification, are permitted provided that the following conditions
7c048a83fSBaptiste Daroussin  * are met:
8c048a83fSBaptiste Daroussin  * 1. Redistributions of source code must retain the above copyright
9c048a83fSBaptiste Daroussin  *    notice, this list of conditions and the following disclaimer
10c048a83fSBaptiste Daroussin  *    in this position and unchanged.
11c048a83fSBaptiste Daroussin  * 2. Redistributions in binary form must reproduce the above copyright
12c048a83fSBaptiste Daroussin  *    notice, this list of conditions and the following disclaimer in the
13c048a83fSBaptiste Daroussin  *    documentation and/or other materials provided with the distribution.
14c048a83fSBaptiste Daroussin  *
15c048a83fSBaptiste Daroussin  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16c048a83fSBaptiste Daroussin  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17c048a83fSBaptiste Daroussin  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18c048a83fSBaptiste Daroussin  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19c048a83fSBaptiste Daroussin  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20c048a83fSBaptiste Daroussin  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21c048a83fSBaptiste Daroussin  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22c048a83fSBaptiste Daroussin  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23c048a83fSBaptiste Daroussin  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24c048a83fSBaptiste Daroussin  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25c048a83fSBaptiste Daroussin  */
26c048a83fSBaptiste Daroussin 
271ea1b275SConrad Meyer #include <sys/capsicum.h>
28c048a83fSBaptiste Daroussin #include <sys/types.h>
29c048a83fSBaptiste Daroussin #include <sys/sbuf.h>
30c048a83fSBaptiste Daroussin 
311ea1b275SConrad Meyer #include <capsicum_helpers.h>
32c048a83fSBaptiste Daroussin #include <ctype.h>
33c048a83fSBaptiste Daroussin #include <err.h>
341ea1b275SConrad Meyer #include <errno.h>
35c048a83fSBaptiste Daroussin #include <stdbool.h>
36c048a83fSBaptiste Daroussin #include <stdio.h>
37c048a83fSBaptiste Daroussin #include <stdlib.h>
38*1a4d5f13SBaptiste Daroussin #include <string.h>
39c048a83fSBaptiste Daroussin #include <unistd.h>
40c048a83fSBaptiste Daroussin #include <xlocale.h>
41c048a83fSBaptiste Daroussin 
427d477dc3SXin LI typedef enum {
437d477dc3SXin LI 	/* state	condition to transit to next state */
447d477dc3SXin LI 	INIT,		/* '$' */
457d477dc3SXin LI 	DELIM_SEEN,	/* letter */
467d477dc3SXin LI 	KEYWORD,	/* punctuation mark */
477d477dc3SXin LI 	PUNC_SEEN,	/* ':' -> _SVN; space -> TEXT */
487d477dc3SXin LI 	PUNC_SEEN_SVN,	/* space */
497d477dc3SXin LI 	TEXT
507d477dc3SXin LI } analyzer_states;
51c048a83fSBaptiste Daroussin 
52c048a83fSBaptiste Daroussin static int
scan(FILE * fp,const char * name,bool quiet)53c048a83fSBaptiste Daroussin scan(FILE *fp, const char *name, bool quiet)
54c048a83fSBaptiste Daroussin {
55c048a83fSBaptiste Daroussin 	int c;
56c048a83fSBaptiste Daroussin 	bool hasid = false;
577d477dc3SXin LI 	bool subversion = false;
587d477dc3SXin LI 	analyzer_states state = INIT;
59*1a4d5f13SBaptiste Daroussin 	FILE* buffp;
60*1a4d5f13SBaptiste Daroussin 	char *buf;
61*1a4d5f13SBaptiste Daroussin 	size_t sz;
62c048a83fSBaptiste Daroussin 	locale_t l;
63c048a83fSBaptiste Daroussin 
64c048a83fSBaptiste Daroussin 	l = newlocale(LC_ALL_MASK, "C", NULL);
65*1a4d5f13SBaptiste Daroussin 	sz = 0;
66*1a4d5f13SBaptiste Daroussin 	buf = NULL;
67*1a4d5f13SBaptiste Daroussin 	buffp = open_memstream(&buf, &sz);
68*1a4d5f13SBaptiste Daroussin 	if (buffp == NULL)
69*1a4d5f13SBaptiste Daroussin 		err(EXIT_FAILURE, "open_memstream()");
70c048a83fSBaptiste Daroussin 
71c048a83fSBaptiste Daroussin 	if (name != NULL)
72c048a83fSBaptiste Daroussin 		printf("%s:\n", name);
73c048a83fSBaptiste Daroussin 
74c048a83fSBaptiste Daroussin 	while ((c = fgetc(fp)) != EOF) {
757d477dc3SXin LI 		switch (state) {
767d477dc3SXin LI 		case INIT:
77c048a83fSBaptiste Daroussin 			if (c == '$') {
787d477dc3SXin LI 				/* Transit to DELIM_SEEN if we see $ */
797d477dc3SXin LI 				state = DELIM_SEEN;
807d477dc3SXin LI 			} else {
817d477dc3SXin LI 				/* Otherwise, stay in INIT state */
827d477dc3SXin LI 				continue;
837d477dc3SXin LI 			}
847d477dc3SXin LI 			break;
857d477dc3SXin LI 		case DELIM_SEEN:
867d477dc3SXin LI 			if (isalpha_l(c, l)) {
877d477dc3SXin LI 				/* Transit to KEYWORD if we see letter */
88*1a4d5f13SBaptiste Daroussin 				if (buf != NULL)
89*1a4d5f13SBaptiste Daroussin 					memset(buf, 0, sz);
90*1a4d5f13SBaptiste Daroussin 				rewind(buffp);
91*1a4d5f13SBaptiste Daroussin 				fputc('$', buffp);
92*1a4d5f13SBaptiste Daroussin 				fputc(c, buffp);
937d477dc3SXin LI 				state = KEYWORD;
947d477dc3SXin LI 
957d477dc3SXin LI 				continue;
967d477dc3SXin LI 			} else if (c == '$') {
977d477dc3SXin LI 				/* Or, stay in DELIM_SEEN if more $ */
987d477dc3SXin LI 				continue;
997d477dc3SXin LI 			} else {
1007d477dc3SXin LI 				/* Otherwise, transit back to INIT */
1017d477dc3SXin LI 				state = INIT;
1027d477dc3SXin LI 			}
1037d477dc3SXin LI 			break;
1047d477dc3SXin LI 		case KEYWORD:
105*1a4d5f13SBaptiste Daroussin 			fputc(c, buffp);
1067d477dc3SXin LI 
1077d477dc3SXin LI 			if (isalpha_l(c, l)) {
1087d477dc3SXin LI 				/*
1097d477dc3SXin LI 				 * Stay in KEYWORD if additional letter is seen
1107d477dc3SXin LI 				 */
1117d477dc3SXin LI 				continue;
1127d477dc3SXin LI 			} else if (c == ':') {
1137d477dc3SXin LI 				/*
1147d477dc3SXin LI 				 * See ':' for the first time, transit to
1157d477dc3SXin LI 				 * PUNC_SEEN.
1167d477dc3SXin LI 				 */
1177d477dc3SXin LI 				state = PUNC_SEEN;
1187d477dc3SXin LI 				subversion = false;
1197d477dc3SXin LI 			} else if (c == '$') {
1207d477dc3SXin LI 				/*
1217d477dc3SXin LI 				 * Incomplete ident.  Go back to DELIM_SEEN
1227d477dc3SXin LI 				 * state because we see a '$' which could be
1237d477dc3SXin LI 				 * the beginning of a keyword.
1247d477dc3SXin LI 				 */
1257d477dc3SXin LI 				state = DELIM_SEEN;
1267d477dc3SXin LI 			} else {
1277d477dc3SXin LI 				/*
1287d477dc3SXin LI 				 * Go back to INIT state otherwise.
1297d477dc3SXin LI 				 */
1307d477dc3SXin LI 				state = INIT;
1317d477dc3SXin LI 			}
1327d477dc3SXin LI 			break;
1337d477dc3SXin LI 		case PUNC_SEEN:
1347d477dc3SXin LI 		case PUNC_SEEN_SVN:
135*1a4d5f13SBaptiste Daroussin 			fputc(c, buffp);
1367d477dc3SXin LI 
1377d477dc3SXin LI 			switch (c) {
1387d477dc3SXin LI 			case ':':
1397d477dc3SXin LI 				/*
1407d477dc3SXin LI 				 * If we see '::' (seen : in PUNC_SEEN),
1417d477dc3SXin LI 				 * activate subversion treatment and transit
1427d477dc3SXin LI 				 * to PUNC_SEEN_SVN state.
1437d477dc3SXin LI 				 *
1447d477dc3SXin LI 				 * If more than two :'s were seen, the ident
1457d477dc3SXin LI 				 * is invalid and we would therefore go back
1467d477dc3SXin LI 				 * to INIT state.
1477d477dc3SXin LI 				 */
1487d477dc3SXin LI 				if (state == PUNC_SEEN) {
1497d477dc3SXin LI 					state = PUNC_SEEN_SVN;
1507d477dc3SXin LI 					subversion = true;
1517d477dc3SXin LI 				} else {
1527d477dc3SXin LI 					state = INIT;
1537d477dc3SXin LI 				}
1547d477dc3SXin LI 				break;
1557d477dc3SXin LI 			case ' ':
1567d477dc3SXin LI 				/*
1577d477dc3SXin LI 				 * A space after ':' or '::' indicates we are at the
1587d477dc3SXin LI 				 * last component of potential ident.
1597d477dc3SXin LI 				 */
1607d477dc3SXin LI 				state = TEXT;
1617d477dc3SXin LI 				break;
1627d477dc3SXin LI 			default:
1637d477dc3SXin LI 				/* All other characters are invalid */
1647d477dc3SXin LI 				state = INIT;
1657d477dc3SXin LI 				break;
1667d477dc3SXin LI 			}
1677d477dc3SXin LI 			break;
1687d477dc3SXin LI 		case TEXT:
169*1a4d5f13SBaptiste Daroussin 			fputc(c, buffp);
1707d477dc3SXin LI 
1717d477dc3SXin LI 			if (iscntrl_l(c, l)) {
1727d477dc3SXin LI 				/* Control characters are not allowed in this state */
1737d477dc3SXin LI 				state = INIT;
1747d477dc3SXin LI 			} else if (c == '$') {
175*1a4d5f13SBaptiste Daroussin 				fflush(buffp);
1767d477dc3SXin LI 				/*
1777d477dc3SXin LI 				 * valid ident should end with a space.
1787d477dc3SXin LI 				 *
1797d477dc3SXin LI 				 * subversion extension uses '#' to indicate that
1807d477dc3SXin LI 				 * the keyword expansion have exceeded the fixed
1817d477dc3SXin LI 				 * width, so it is also permitted if we are in
1827d477dc3SXin LI 				 * subversion mode.  No length check is enforced
1837d477dc3SXin LI 				 * because GNU RCS ident(1) does not do it either.
1847d477dc3SXin LI 				 */
185*1a4d5f13SBaptiste Daroussin 				c = buf[strlen(buf) -2 ];
1867d477dc3SXin LI 				if (c == ' ' || (subversion && c == '#')) {
187*1a4d5f13SBaptiste Daroussin 					printf("     %s\n", buf);
188c048a83fSBaptiste Daroussin 					hasid = true;
189c048a83fSBaptiste Daroussin 				}
1907d477dc3SXin LI 				state = INIT;
1917d477dc3SXin LI 			}
1927d477dc3SXin LI 			/* Other characters: stay in the state */
1937d477dc3SXin LI 			break;
1947d477dc3SXin LI 		}
195c048a83fSBaptiste Daroussin 	}
196*1a4d5f13SBaptiste Daroussin 	fclose(buffp);
197*1a4d5f13SBaptiste Daroussin 	free(buf);
198c048a83fSBaptiste Daroussin 	freelocale(l);
199c048a83fSBaptiste Daroussin 
200c048a83fSBaptiste Daroussin 	if (!hasid) {
201c048a83fSBaptiste Daroussin 		if (!quiet)
202c048a83fSBaptiste Daroussin 			fprintf(stderr, "%s warning: no id keywords in %s\n",
203c048a83fSBaptiste Daroussin 			    getprogname(), name ? name : "standard input");
204c048a83fSBaptiste Daroussin 
205c048a83fSBaptiste Daroussin 		return (EXIT_FAILURE);
206c048a83fSBaptiste Daroussin 	}
207c048a83fSBaptiste Daroussin 
208c048a83fSBaptiste Daroussin 	return (EXIT_SUCCESS);
209c048a83fSBaptiste Daroussin }
210c048a83fSBaptiste Daroussin 
211c048a83fSBaptiste Daroussin int
main(int argc,char ** argv)212c048a83fSBaptiste Daroussin main(int argc, char **argv)
213c048a83fSBaptiste Daroussin {
214c048a83fSBaptiste Daroussin 	bool quiet = false;
2151ea1b275SConrad Meyer 	int ch, i, *fds, fd;
216c048a83fSBaptiste Daroussin 	int ret = EXIT_SUCCESS;
2171ea1b275SConrad Meyer 	size_t nfds;
218c048a83fSBaptiste Daroussin 	FILE *fp;
219c048a83fSBaptiste Daroussin 
220c048a83fSBaptiste Daroussin 	while ((ch = getopt(argc, argv, "qV")) != -1) {
221c048a83fSBaptiste Daroussin 		switch (ch) {
222c048a83fSBaptiste Daroussin 		case 'q':
223c048a83fSBaptiste Daroussin 			quiet = true;
224c048a83fSBaptiste Daroussin 			break;
225c048a83fSBaptiste Daroussin 		case 'V':
226c048a83fSBaptiste Daroussin 			/* Do nothing, compat with GNU rcs's ident */
227c048a83fSBaptiste Daroussin 			return (EXIT_SUCCESS);
228c048a83fSBaptiste Daroussin 		default:
229c048a83fSBaptiste Daroussin 			errx(EXIT_FAILURE, "usage: %s [-q] [-V] [file...]",
230c048a83fSBaptiste Daroussin 			    getprogname());
231c048a83fSBaptiste Daroussin 		}
232c048a83fSBaptiste Daroussin 	}
233c048a83fSBaptiste Daroussin 
234c048a83fSBaptiste Daroussin 	argc -= optind;
235c048a83fSBaptiste Daroussin 	argv += optind;
236c048a83fSBaptiste Daroussin 
2371ea1b275SConrad Meyer 	if (caph_limit_stdio() < 0)
2381ea1b275SConrad Meyer 		err(EXIT_FAILURE, "unable to limit stdio");
2391ea1b275SConrad Meyer 
2401ea1b275SConrad Meyer 	if (argc == 0) {
2411ea1b275SConrad Meyer 		nfds = 1;
2421ea1b275SConrad Meyer 		fds = malloc(sizeof(*fds));
2431ea1b275SConrad Meyer 		if (fds == NULL)
2441ea1b275SConrad Meyer 			err(EXIT_FAILURE, "unable to allocate fds array");
2451ea1b275SConrad Meyer 		fds[0] = STDIN_FILENO;
2461ea1b275SConrad Meyer 	} else {
2471ea1b275SConrad Meyer 		nfds = argc;
2481ea1b275SConrad Meyer 		fds = malloc(sizeof(*fds) * nfds);
2491ea1b275SConrad Meyer 		if (fds == NULL)
2501ea1b275SConrad Meyer 			err(EXIT_FAILURE, "unable to allocate fds array");
251c048a83fSBaptiste Daroussin 
252c048a83fSBaptiste Daroussin 		for (i = 0; i < argc; i++) {
2531ea1b275SConrad Meyer 			fds[i] = fd = open(argv[i], O_RDONLY);
2541ea1b275SConrad Meyer 			if (fd < 0) {
2551ea1b275SConrad Meyer 				warn("%s", argv[i]);
2561ea1b275SConrad Meyer 				ret = EXIT_FAILURE;
2571ea1b275SConrad Meyer 				continue;
2581ea1b275SConrad Meyer 			}
2591ea1b275SConrad Meyer 			if (caph_limit_stream(fd, CAPH_READ) < 0)
2601ea1b275SConrad Meyer 				err(EXIT_FAILURE,
2611ea1b275SConrad Meyer 				    "unable to limit fcntls/rights for %s",
2621ea1b275SConrad Meyer 				    argv[i]);
2631ea1b275SConrad Meyer 		}
2641ea1b275SConrad Meyer 	}
2651ea1b275SConrad Meyer 
2661ea1b275SConrad Meyer 	/* Enter Capsicum sandbox. */
2677672a014SMariusz Zaborski 	if (caph_enter() < 0)
2681ea1b275SConrad Meyer 		err(EXIT_FAILURE, "unable to enter capability mode");
2691ea1b275SConrad Meyer 
2701ea1b275SConrad Meyer 	for (i = 0; i < (int)nfds; i++) {
2711ea1b275SConrad Meyer 		if (fds[i] < 0)
2721ea1b275SConrad Meyer 			continue;
2731ea1b275SConrad Meyer 
2741ea1b275SConrad Meyer 		fp = fdopen(fds[i], "r");
275c048a83fSBaptiste Daroussin 		if (fp == NULL) {
276c048a83fSBaptiste Daroussin 			warn("%s", argv[i]);
277c048a83fSBaptiste Daroussin 			ret = EXIT_FAILURE;
278c048a83fSBaptiste Daroussin 			continue;
279c048a83fSBaptiste Daroussin 		}
2801ea1b275SConrad Meyer 		if (scan(fp, argc == 0 ? NULL : argv[i], quiet) != EXIT_SUCCESS)
281c048a83fSBaptiste Daroussin 			ret = EXIT_FAILURE;
282c048a83fSBaptiste Daroussin 		fclose(fp);
283c048a83fSBaptiste Daroussin 	}
284c048a83fSBaptiste Daroussin 
285c048a83fSBaptiste Daroussin 	return (ret);
286c048a83fSBaptiste Daroussin }
287