xref: /openbsd-src/usr.sbin/rpki-client/rsync.c (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 /*	$OpenBSD: rsync.c,v 1.9 2020/09/12 15:46:48 claudio Exp $ */
2 /*
3  * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <sys/stat.h>
19 #include <sys/wait.h>
20 #include <netinet/in.h>
21 #include <assert.h>
22 #include <err.h>
23 #include <errno.h>
24 #include <poll.h>
25 #include <resolv.h>
26 #include <signal.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "extern.h"
33 
34 /*
35  * A running rsync process.
36  * We can have multiple of these simultaneously and need to keep track
37  * of which process maps to which request.
38  */
39 struct	rsyncproc {
40 	char	*uri; /* uri of this rsync proc */
41 	size_t	 id; /* identity of request */
42 	pid_t	 pid; /* pid of process or 0 if unassociated */
43 };
44 
45 /*
46  * Conforms to RFC 5781.
47  * Note that "Source" is broken down into the module, path, and also
48  * file type relevant to RPKI.
49  * Any of the pointers (except "uri") may be NULL.
50  * Returns zero on failure, non-zero on success.
51  */
52 int
53 rsync_uri_parse(const char **hostp, size_t *hostsz,
54     const char **modulep, size_t *modulesz,
55     const char **pathp, size_t *pathsz,
56     enum rtype *rtypep, const char *uri)
57 {
58 	const char	*host, *module, *path;
59 	size_t		 sz;
60 
61 	/* Initialise all output values to NULL or 0. */
62 
63 	if (hostsz != NULL)
64 		*hostsz = 0;
65 	if (modulesz != NULL)
66 		*modulesz = 0;
67 	if (pathsz != NULL)
68 		*pathsz = 0;
69 	if (hostp != NULL)
70 		*hostp = 0;
71 	if (modulep != NULL)
72 		*modulep = 0;
73 	if (pathp != NULL)
74 		*pathp = 0;
75 	if (rtypep != NULL)
76 		*rtypep = RTYPE_EOF;
77 
78 	/* Case-insensitive rsync URI. */
79 
80 	if (strncasecmp(uri, "rsync://", 8)) {
81 		warnx("%s: not using rsync schema", uri);
82 		return 0;
83 	}
84 
85 	/* Parse the non-zero-length hostname. */
86 
87 	host = uri + 8;
88 
89 	if ((module = strchr(host, '/')) == NULL) {
90 		warnx("%s: missing rsync module", uri);
91 		return 0;
92 	} else if (module == host) {
93 		warnx("%s: zero-length rsync host", uri);
94 		return 0;
95 	}
96 
97 	if (hostp != NULL)
98 		*hostp = host;
99 	if (hostsz != NULL)
100 		*hostsz = module - host;
101 
102 	/* The non-zero-length module follows the hostname. */
103 
104 	if (module[1] == '\0') {
105 		warnx("%s: zero-length rsync module", uri);
106 		return 0;
107 	}
108 
109 	module++;
110 
111 	/* The path component is optional. */
112 
113 	if ((path = strchr(module, '/')) == NULL) {
114 		assert(*module != '\0');
115 		if (modulep != NULL)
116 			*modulep = module;
117 		if (modulesz != NULL)
118 			*modulesz = strlen(module);
119 		return 1;
120 	} else if (path == module) {
121 		warnx("%s: zero-length module", uri);
122 		return 0;
123 	}
124 
125 	if (modulep != NULL)
126 		*modulep = module;
127 	if (modulesz != NULL)
128 		*modulesz = path - module;
129 
130 	path++;
131 	sz = strlen(path);
132 
133 	if (pathp != NULL)
134 		*pathp = path;
135 	if (pathsz != NULL)
136 		*pathsz = sz;
137 
138 	if (rtypep != NULL && sz > 4) {
139 		if (strcasecmp(path + sz - 4, ".roa") == 0)
140 			*rtypep = RTYPE_ROA;
141 		else if (strcasecmp(path + sz - 4, ".mft") == 0)
142 			*rtypep = RTYPE_MFT;
143 		else if (strcasecmp(path + sz - 4, ".cer") == 0)
144 			*rtypep = RTYPE_CER;
145 		else if (strcasecmp(path + sz - 4, ".crl") == 0)
146 			*rtypep = RTYPE_CRL;
147 	}
148 
149 	return 1;
150 }
151 
152 static void
153 proc_child(int signal)
154 {
155 
156 	/* Nothing: just discard. */
157 }
158 
159 /*
160  * Process used for synchronising repositories.
161  * This simply waits to be told which repository to synchronise, then
162  * does so.
163  * It then responds with the identifier of the repo that it updated.
164  * It only exits cleanly when fd is closed.
165  * FIXME: this should use buffered output to prevent deadlocks, but it's
166  * very unlikely that we're going to fill our buffer, so whatever.
167  * FIXME: limit the number of simultaneous process.
168  * Currently, an attacker can trivially specify thousands of different
169  * repositories and saturate our system.
170  */
171 void
172 proc_rsync(char *prog, char *bind_addr, int fd)
173 {
174 	size_t			 id, i, idsz = 0;
175 	ssize_t			 ssz;
176 	char			*host = NULL, *mod = NULL, *uri = NULL,
177 				*dst = NULL, *path, *save, *cmd;
178 	const char		*pp;
179 	pid_t			 pid;
180 	char			*args[32];
181 	int			 st, rc = 0;
182 	struct stat		 stt;
183 	struct pollfd		 pfd;
184 	sigset_t		 mask, oldmask;
185 	struct rsyncproc	*ids = NULL;
186 
187 	pfd.fd = fd;
188 	pfd.events = POLLIN;
189 
190 	/*
191 	 * Unveil the command we want to run.
192 	 * If this has a pathname component in it, interpret as a file
193 	 * and unveil the file directly.
194 	 * Otherwise, look up the command in our PATH.
195 	 */
196 
197 	if (strchr(prog, '/') == NULL) {
198 		if (getenv("PATH") == NULL)
199 			errx(1, "PATH is unset");
200 		if ((path = strdup(getenv("PATH"))) == NULL)
201 			err(1, "strdup");
202 		save = path;
203 		while ((pp = strsep(&path, ":")) != NULL) {
204 			if (*pp == '\0')
205 				continue;
206 			if (asprintf(&cmd, "%s/%s", pp, prog) == -1)
207 				err(1, "asprintf");
208 			if (lstat(cmd, &stt) == -1) {
209 				free(cmd);
210 				continue;
211 			} else if (unveil(cmd, "x") == -1)
212 				err(1, "%s: unveil", cmd);
213 			free(cmd);
214 			break;
215 		}
216 		free(save);
217 	} else if (unveil(prog, "x") == -1)
218 		err(1, "%s: unveil", prog);
219 
220 	/* Unveil the repository directory and terminate unveiling. */
221 
222 	if (unveil(".", "c") == -1)
223 		err(1, "unveil");
224 	if (unveil(NULL, NULL) == -1)
225 		err(1, "unveil");
226 
227 	/* Initialise retriever for children exiting. */
228 
229 	if (sigemptyset(&mask) == -1)
230 		err(1, NULL);
231 	if (signal(SIGCHLD, proc_child) == SIG_ERR)
232 		err(1, NULL);
233 	if (sigaddset(&mask, SIGCHLD) == -1)
234 		err(1, NULL);
235 	if (sigprocmask(SIG_BLOCK, &mask, &oldmask) == -1)
236 		err(1, NULL);
237 
238 	for (;;) {
239 		if (ppoll(&pfd, 1, NULL, &oldmask) == -1) {
240 			if (errno != EINTR)
241 				err(1, "ppoll");
242 
243 			/*
244 			 * If we've received an EINTR, it means that one
245 			 * of our children has exited and we can reap it
246 			 * and look up its identifier.
247 			 * Then we respond to the parent.
248 			 */
249 
250 			while ((pid = waitpid(WAIT_ANY, &st, WNOHANG)) > 0) {
251 				int ok = 1;
252 
253 				for (i = 0; i < idsz; i++)
254 					if (ids[i].pid == pid)
255 						break;
256 				assert(i < idsz);
257 
258 				if (!WIFEXITED(st)) {
259 					warnx("rsync %s terminated abnormally",
260 					    ids[i].uri);
261 					rc = 1;
262 					ok = 0;
263 				} else if (WEXITSTATUS(st) != 0) {
264 					warnx("rsync %s failed", ids[i].uri);
265 					ok = 0;
266 				}
267 
268 				io_simple_write(fd, &ids[i].id, sizeof(size_t));
269 				io_simple_write(fd, &ok, sizeof(ok));
270 				free(ids[i].uri);
271 				ids[i].uri = NULL;
272 				ids[i].pid = 0;
273 				ids[i].id = 0;
274 			}
275 			if (pid == -1 && errno != ECHILD)
276 				err(1, "waitpid");
277 			continue;
278 		}
279 
280 		/*
281 		 * Read til the parent exits.
282 		 * That will mean that we can safely exit.
283 		 */
284 
285 		if ((ssz = read(fd, &id, sizeof(size_t))) == -1)
286 			err(1, "read");
287 		if (ssz == 0)
288 			break;
289 
290 		/* Read host and module. */
291 
292 		io_str_read(fd, &host);
293 		io_str_read(fd, &mod);
294 
295 		/*
296 		 * Create source and destination locations.
297 		 * Build up the tree to this point because GPL rsync(1)
298 		 * will not build the destination for us.
299 		 */
300 
301 		if (mkdir(host, 0700) == -1 && EEXIST != errno)
302 			err(1, "%s", host);
303 
304 		if (asprintf(&dst, "%s/%s", host, mod) == -1)
305 			err(1, NULL);
306 		if (mkdir(dst, 0700) == -1 && EEXIST != errno)
307 			err(1, "%s", dst);
308 
309 		if (asprintf(&uri, "rsync://%s/%s", host, mod) == -1)
310 			err(1, NULL);
311 
312 		/* Run process itself, wait for exit, check error. */
313 
314 		if ((pid = fork()) == -1)
315 			err(1, "fork");
316 
317 		if (pid == 0) {
318 			if (pledge("stdio exec", NULL) == -1)
319 				err(1, "pledge");
320 			i = 0;
321 			args[i++] = (char *)prog;
322 			args[i++] = "-rt";
323 			if (bind_addr != NULL) {
324 				args[i++] = "--address";
325 				args[i++] = (char *)bind_addr;
326 			}
327 			args[i++] = uri;
328 			args[i++] = dst;
329 			args[i] = NULL;
330 			execvp(args[0], args);
331 			err(1, "%s: execvp", prog);
332 		}
333 
334 		/* Augment the list of running processes. */
335 
336 		for (i = 0; i < idsz; i++)
337 			if (ids[i].pid == 0)
338 				break;
339 		if (i == idsz) {
340 			ids = reallocarray(ids, idsz + 1, sizeof(*ids));
341 			if (ids == NULL)
342 				err(1, NULL);
343 			idsz++;
344 		}
345 
346 		ids[i].id = id;
347 		ids[i].pid = pid;
348 		ids[i].uri = uri;
349 
350 		/* Clean up temporary values. */
351 
352 		free(mod);
353 		free(dst);
354 		free(host);
355 	}
356 
357 	/* No need for these to be hanging around. */
358 	for (i = 0; i < idsz; i++)
359 		if (ids[i].pid > 0) {
360 			kill(ids[i].pid, SIGTERM);
361 			free(ids[i].uri);
362 		}
363 
364 	free(ids);
365 	exit(rc);
366 	/* NOTREACHED */
367 }
368