xref: /openbsd-src/usr.bin/locate/locate/util.c (revision 607abeab0a215de2924d06cbf5ebd42ef6eb615d)
1 /*	$OpenBSD: util.c,v 1.17 2023/04/28 20:22:35 tb Exp $
2  *
3  * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * James A. Woods.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 
36 #include <stdlib.h>
37 #include <string.h>
38 #include <err.h>
39 #include <stdio.h>
40 #include <limits.h>
41 
42 #include "locate.h"
43 
44 char 	**colon(char **, char*, char*);
45 char 	*patprep(char *);
46 int 	getwm(caddr_t);
47 int 	getwf(FILE *);
48 int	check_bigram_char(int);
49 
50 /*
51  * Validate bigram chars. If the test failed the database is corrupt
52  * or the database is obviously not a locate database.
53  */
54 int
check_bigram_char(int ch)55 check_bigram_char(int ch)
56 {
57 	/* legal bigram: 0, ASCII_MIN ... ASCII_MAX */
58 	if (ch == 0 ||
59 	    (ch >= ASCII_MIN && ch <= ASCII_MAX))
60 		return(ch);
61 
62 	(void)fprintf(stderr, "locate database header corrupt, bigram ");
63 	(void)fprintf(stderr, "char outside 0, %d-%d: %d\n",
64 	    ASCII_MIN, ASCII_MAX, ch);
65 	exit(1);
66 }
67 
68 /* split a colon separated string into a char vector
69  *
70  * "bla:foo" -> {"bla", "foo"}
71  * "bla:"    -> {"bla", dot}
72  * "bla"     -> {"bla"}
73  * ""	     -> do nothing
74  *
75  */
76 char **
colon(char ** dbv,char * path,char * dot)77 colon(char **dbv, char *path, char *dot)
78 {
79 	int vlen, slen;
80 	char *c, *ch, *p;
81 	char **pv;
82 
83 	if (dbv == NULL) {
84 		if ((dbv = malloc(sizeof(*dbv))) == NULL)
85 			err(1, "malloc");
86 		*dbv = NULL;
87 	}
88 
89 	/* empty string */
90 	if (*path == '\0') {
91 		(void)fprintf(stderr, "empty database name, ignored\n");
92 		return(dbv);
93 	}
94 
95 	/* length of string vector */
96 	for (vlen = 0, pv = dbv; *pv != NULL; pv++, vlen++)
97 		;
98 
99 	for (ch = c = path; ; ch++) {
100 		if (*ch == ':' ||
101 		    (!*ch && !(*(ch - 1) == ':' && ch == 1+ path))) {
102 			char **newdbv;
103 
104 			/* single colon -> dot */
105 			if (ch == c)
106 				p = dot;
107 			else {
108 				/* a string */
109 				slen = ch - c;
110 				if ((p = calloc(sizeof(char), slen + 1))
111 				    == NULL)
112 					err(1, "malloc");
113 				bcopy(c, p, slen);
114 				*(p + slen) = '\0';
115 			}
116 			/* increase dbv with element p */
117 			if ((newdbv = reallocarray(dbv, vlen + 2,
118 			    sizeof(*newdbv))) == NULL)
119 				err(1, "realloc");
120 			dbv = newdbv;
121 			*(dbv + vlen) = p;
122 			*(dbv + ++vlen) = NULL;
123 			c = ch + 1;
124 		}
125 		if (*ch == '\0')
126 			break;
127 	}
128 	return (dbv);
129 }
130 
131 
132 /*
133  * extract last glob-free subpattern in name for fast pre-match; prepend
134  * '\0' for backwards match; return end of new pattern
135  */
136 static char globfree[100];
137 
138 char *
patprep(char * name)139 patprep(char *name)
140 {
141 	char *endmark, *p, *subp;
142 
143 	subp = globfree;
144 	*subp++ = '\0';   /* set first element to '\0' */
145 	p = name + strlen(name) - 1;
146 
147 	/* skip trailing metacharacters */
148 	for (; p >= name; p--)
149 		if (strchr(LOCATE_REG, *p) == NULL)
150 			break;
151 
152 	/*
153 	 * check if maybe we are in a character class
154 	 *
155 	 * 'foo.[ch]'
156 	 *        |----< p
157 	 */
158 	if (p >= name &&
159 	    (strchr(p, '[') != NULL || strchr(p, ']') != NULL)) {
160 		for (p = name; *p != '\0'; p++)
161 			if (*p == ']' || *p == '[')
162 				break;
163 		p--;
164 
165 		/*
166 		 * cannot find a non-meta character, give up
167 		 * '*\*[a-z]'
168 		 *    |-------< p
169 		 */
170 		if (p >= name && strchr(LOCATE_REG, *p) != NULL)
171 			p = name - 1;
172 	}
173 
174 	if (p < name)
175 		/* only meta chars: "???", force '/' search */
176 		*subp++ = '/';
177 
178 	else {
179 		for (endmark = p; p >= name; p--)
180 			if (strchr(LOCATE_REG, *p) != NULL)
181 				break;
182 		for (++p;
183 		    (p <= endmark) && subp < (globfree + sizeof(globfree));)
184 			*subp++ = *p++;
185 	}
186 	*subp = '\0';
187 	return(--subp);
188 }
189 
190 
191 /*
192  * Read integer from mmap pointer.
193  * Essential a simple  ``return *(int *)p'' but avoid sigbus
194  * for integer alignment.
195  *
196  * Convert network byte order to host byte order if necessary.
197  */
198 
199 int
getwm(caddr_t p)200 getwm(caddr_t p)
201 {
202 	union {
203 		char buf[sizeof(int)];
204 		int i;
205 	} u;
206 	int i;
207 
208 	for (i = 0; i < sizeof(int); i++)
209 		u.buf[i] = *p++;
210 
211 	i = u.i;
212 
213 	if (i > PATH_MAX || i < -(PATH_MAX)) {
214 		i = ntohl(i);
215 		if (i > PATH_MAX || i < -(PATH_MAX)) {
216 			(void)fprintf(stderr,
217 			    "integer out of +-PATH_MAX (%d): %d\n",
218 			    PATH_MAX, i);
219 			exit(1);
220 		}
221 	}
222 	return(i);
223 }
224 
225 /*
226  * Read integer from stream.
227  *
228  * Convert network byte order to host byte order if necessary.
229  * So we can read on FreeBSD/i386 (little endian) a locate database
230  * which was built on SunOS/sparc (big endian).
231  */
232 
233 int
getwf(FILE * fp)234 getwf(FILE *fp)
235 {
236 	int word;
237 
238 	word = getw(fp);
239 
240 	if (word > PATH_MAX || word < -(PATH_MAX)) {
241 		word = ntohl(word);
242 		if (word > PATH_MAX || word < -(PATH_MAX)) {
243 			(void)fprintf(stderr,
244 			    "integer out of +-PATH_MAX (%d): %d\n",
245 			    PATH_MAX, word);
246 			exit(1);
247 		}
248 	}
249 	return(word);
250 }
251