xref: /openbsd-src/usr.bin/rsync/flist.c (revision 4b70baf6e17fc8b27fc1f7fa7929335753fa94c3)
1 /*	$Id: flist.c,v 1.24 2019/04/04 04:19:54 bket Exp $ */
2 /*
3  * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2019 Florian Obser <florian@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/param.h>
19 #include <sys/stat.h>
20 
21 #include <assert.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <fts.h>
25 #include <inttypes.h>
26 #include <search.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "extern.h"
33 
34 /*
35  * We allocate our file list in chunk sizes so as not to do it one by
36  * one.
37  * Preferrably we get one or two allocation.
38  */
39 #define	FLIST_CHUNK_SIZE (1024)
40 
41 /*
42  * These flags are part of the rsync protocol.
43  * They are sent as the first byte for a file transmission and encode
44  * information that affects subsequent transmissions.
45  */
46 #define FLIST_TOP_LEVEL	 0x0001 /* needed for remote --delete */
47 #define FLIST_MODE_SAME  0x0002 /* mode is repeat */
48 #define	FLIST_RDEV_SAME  0x0004 /* rdev is repeat */
49 #define	FLIST_UID_SAME	 0x0008 /* uid is repeat */
50 #define	FLIST_GID_SAME	 0x0010 /* gid is repeat */
51 #define	FLIST_NAME_SAME  0x0020 /* name is repeat */
52 #define FLIST_NAME_LONG	 0x0040 /* name >255 bytes */
53 #define FLIST_TIME_SAME  0x0080 /* time is repeat */
54 
55 /*
56  * Requied way to sort a filename list.
57  */
58 static int
59 flist_cmp(const void *p1, const void *p2)
60 {
61 	const struct flist *f1 = p1, *f2 = p2;
62 
63 	return strcmp(f1->wpath, f2->wpath);
64 }
65 
66 /*
67  * Deduplicate our file list (which may be zero-length).
68  * Returns zero on failure, non-zero on success.
69  */
70 static int
71 flist_dedupe(struct sess *sess, struct flist **fl, size_t *sz)
72 {
73 	size_t		 i, j;
74 	struct flist	*new;
75 	struct flist	*f, *fnext;
76 
77 	if (*sz == 0)
78 		return 1;
79 
80 	/* Create a new buffer, "new", and copy. */
81 
82 	new = calloc(*sz, sizeof(struct flist));
83 	if (new == NULL) {
84 		ERR(sess, "calloc");
85 		return 0;
86 	}
87 
88 	for (i = j = 0; i < *sz - 1; i++) {
89 		f = &(*fl)[i];
90 		fnext = &(*fl)[i + 1];
91 
92 		if (strcmp(f->wpath, fnext->wpath)) {
93 			new[j++] = *f;
94 			continue;
95 		}
96 
97 		/*
98 		 * Our working (destination) paths are the same.
99 		 * If the actual file is the same (as given on the
100 		 * command-line), then we can just discard the first.
101 		 * Otherwise, we need to bail out: it means we have two
102 		 * different files with the relative path on the
103 		 * destination side.
104 		 */
105 
106 		if (strcmp(f->path, fnext->path) == 0) {
107 			new[j++] = *f;
108 			i++;
109 			WARNX(sess, "%s: duplicate path: %s",
110 			    f->wpath, f->path);
111 			free(fnext->path);
112 			free(fnext->link);
113 			fnext->path = fnext->link = NULL;
114 			continue;
115 		}
116 
117 		ERRX(sess, "%s: duplicate working path for "
118 		    "possibly different file: %s, %s",
119 		    f->wpath, f->path, fnext->path);
120 		free(new);
121 		return 0;
122 	}
123 
124 	/* Don't forget the last entry. */
125 
126 	if (i == *sz - 1)
127 		new[j++] = (*fl)[i];
128 
129 	/*
130 	 * Reassign to the deduplicated array.
131 	 * If we started out with *sz > 0, which we check for at the
132 	 * beginning, then we'll always continue having *sz > 0.
133 	 */
134 
135 	free(*fl);
136 	*fl = new;
137 	*sz = j;
138 	assert(*sz);
139 	return 1;
140 }
141 
142 /*
143  * We're now going to find our top-level directories.
144  * This only applies to recursive mode.
145  * If we have the first element as the ".", then that's the "top
146  * directory" of our transfer.
147  * Otherwise, mark up all top-level directories in the set.
148  * XXX: the FLIST_TOP_LEVEL flag should indicate what is and what isn't
149  * a top-level directory, but I'm not sure if GPL rsync(1) respects it
150  * the same way.
151  */
152 static void
153 flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz)
154 {
155 	size_t		 i;
156 	const char	*cp;
157 
158 	if (!sess->opts->recursive)
159 		return;
160 
161 	if (flsz && strcmp(fl[0].wpath, ".")) {
162 		for (i = 0; i < flsz; i++) {
163 			if (!S_ISDIR(fl[i].st.mode))
164 				continue;
165 			cp = strchr(fl[i].wpath, '/');
166 			if (cp != NULL && cp[1] != '\0')
167 				continue;
168 			fl[i].st.flags |= FLSTAT_TOP_DIR;
169 			LOG4(sess, "%s: top-level", fl[i].wpath);
170 		}
171 	} else if (flsz) {
172 		fl[0].st.flags |= FLSTAT_TOP_DIR;
173 		LOG4(sess, "%s: top-level", fl[0].wpath);
174 	}
175 }
176 
177 /*
178  * Filter through the fts() file information.
179  * We want directories (pre-order), regular files, and symlinks.
180  * Everything else is skipped and possibly warned about.
181  * Return zero to skip, non-zero to examine.
182  */
183 static int
184 flist_fts_check(struct sess *sess, FTSENT *ent)
185 {
186 
187 	if (ent->fts_info == FTS_F  ||
188 	    ent->fts_info == FTS_D ||
189 	    ent->fts_info == FTS_SL ||
190 	    ent->fts_info == FTS_SLNONE)
191 		return 1;
192 
193 	if (ent->fts_info == FTS_DC) {
194 		WARNX(sess, "%s: directory cycle", ent->fts_path);
195 	} else if (ent->fts_info == FTS_DNR) {
196 		errno = ent->fts_errno;
197 		WARN(sess, "%s: unreadable directory", ent->fts_path);
198 	} else if (ent->fts_info == FTS_DOT) {
199 		WARNX(sess, "%s: skipping dot-file", ent->fts_path);
200 	} else if (ent->fts_info == FTS_ERR) {
201 		errno = ent->fts_errno;
202 		WARN(sess, "%s", ent->fts_path);
203 	} else if (ent->fts_info == FTS_DEFAULT) {
204 		if ((sess->opts->devices && (S_ISBLK(ent->fts_statp->st_mode) ||
205 		    S_ISCHR(ent->fts_statp->st_mode))) ||
206 		    (sess->opts->specials &&
207 		    (S_ISFIFO(ent->fts_statp->st_mode) ||
208 		    S_ISSOCK(ent->fts_statp->st_mode)))) {
209 			return 1;
210 		}
211 		WARNX(sess, "%s: skipping special", ent->fts_path);
212 	} else if (ent->fts_info == FTS_NS) {
213 		errno = ent->fts_errno;
214 		WARN(sess, "%s: could not stat", ent->fts_path);
215 	}
216 
217 	return 0;
218 }
219 
220 /*
221  * Copy necessary elements in "st" into the fields of "f".
222  */
223 static void
224 flist_copy_stat(struct flist *f, const struct stat *st)
225 {
226 	f->st.mode = st->st_mode;
227 	f->st.uid = st->st_uid;
228 	f->st.gid = st->st_gid;
229 	f->st.size = st->st_size;
230 	f->st.mtime = st->st_mtime;
231 	f->st.rdev = st->st_rdev;
232 }
233 
234 void
235 flist_free(struct flist *f, size_t sz)
236 {
237 	size_t	 i;
238 
239 	if (f == NULL)
240 		return;
241 
242 	for (i = 0; i < sz; i++) {
243 		free(f[i].path);
244 		free(f[i].link);
245 	}
246 	free(f);
247 }
248 
249 /*
250  * Serialise our file list (which may be zero-length) to the wire.
251  * Makes sure that the receiver isn't going to block on sending us
252  * return messages on the log channel.
253  * Return zero on failure, non-zero on success.
254  */
255 int
256 flist_send(struct sess *sess, int fdin, int fdout, const struct flist *fl,
257     size_t flsz)
258 {
259 	size_t		 i, sz, gidsz = 0, uidsz = 0;
260 	uint8_t		 flag;
261 	const struct flist *f;
262 	const char	*fn;
263 	struct ident	*gids = NULL, *uids = NULL;
264 	int		 rc = 0;
265 
266 	/* Double-check that we've no pending multiplexed data. */
267 
268 	LOG2(sess, "sending file metadata list: %zu", flsz);
269 
270 	for (i = 0; i < flsz; i++) {
271 		f = &fl[i];
272 		fn = f->wpath;
273 		sz = strlen(f->wpath);
274 		assert(sz > 0);
275 		assert(sz < INT32_MAX);
276 
277 		/*
278 		 * If applicable, unclog the read buffer.
279 		 * This happens when the receiver has a lot of log
280 		 * messages and all we're doing is sending our file list
281 		 * without checking for messages.
282 		 */
283 
284 		if (sess->mplex_reads &&
285 		    io_read_check(sess, fdin) &&
286 		     !io_read_flush(sess, fdin)) {
287 			ERRX1(sess, "io_read_flush");
288 			goto out;
289 		}
290 
291 		/*
292 		 * For ease, make all of our filenames be "long"
293 		 * regardless their actual length.
294 		 * This also makes sure that we don't transmit a zero
295 		 * byte unintentionally.
296 		 */
297 
298 		flag = FLIST_NAME_LONG;
299 		if ((FLSTAT_TOP_DIR & f->st.flags))
300 			flag |= FLIST_TOP_LEVEL;
301 
302 		LOG3(sess, "%s: sending file metadata: "
303 			"size %jd, mtime %jd, mode %o",
304 			fn, (intmax_t)f->st.size,
305 			(intmax_t)f->st.mtime, f->st.mode);
306 
307 		/* Now write to the wire. */
308 		/* FIXME: buffer this. */
309 
310 		if (!io_write_byte(sess, fdout, flag)) {
311 			ERRX1(sess, "io_write_byte");
312 			goto out;
313 		} else if (!io_write_int(sess, fdout, sz)) {
314 			ERRX1(sess, "io_write_int");
315 			goto out;
316 		} else if (!io_write_buf(sess, fdout, fn, sz)) {
317 			ERRX1(sess, "io_write_buf");
318 			goto out;
319 		} else if (!io_write_long(sess, fdout, f->st.size)) {
320 			ERRX1(sess, "io_write_long");
321 			goto out;
322 		} else if (!io_write_uint(sess, fdout, (uint32_t)f->st.mtime)) {
323 			ERRX1(sess, "io_write_uint");
324 			goto out;
325 		} else if (!io_write_uint(sess, fdout, f->st.mode)) {
326 			ERRX1(sess, "io_write_uint");
327 			goto out;
328 		}
329 
330 		/* Conditional part: uid. */
331 
332 		if (sess->opts->preserve_uids) {
333 			if (!io_write_uint(sess, fdout, f->st.uid)) {
334 				ERRX1(sess, "io_write_uint");
335 				goto out;
336 			}
337 			if (!idents_add(sess, 0, &uids, &uidsz, f->st.uid)) {
338 				ERRX1(sess, "idents_add");
339 				goto out;
340 			}
341 		}
342 
343 		/* Conditional part: gid. */
344 
345 		if (sess->opts->preserve_gids) {
346 			if (!io_write_uint(sess, fdout, f->st.gid)) {
347 				ERRX1(sess, "io_write_uint");
348 				goto out;
349 			}
350 			if (!idents_add(sess, 1, &gids, &gidsz, f->st.gid)) {
351 				ERRX1(sess, "idents_add");
352 				goto out;
353 			}
354 		}
355 
356 		/* Conditional part: devices & special files. */
357 
358 		if ((sess->opts->devices && (S_ISBLK(f->st.mode) ||
359 		     S_ISCHR(f->st.mode))) ||
360 		    (sess->opts->specials && (S_ISFIFO(f->st.mode) ||
361 		    S_ISSOCK(f->st.mode)))) {
362 			if (!io_write_int(sess, fdout, f->st.rdev)) {
363 				ERRX1(sess, "io_write_int");
364 				goto out;
365 			}
366 		}
367 
368 		/* Conditional part: link. */
369 
370 		if (S_ISLNK(f->st.mode) &&
371 		    sess->opts->preserve_links) {
372 			fn = f->link;
373 			sz = strlen(f->link);
374 			assert(sz < INT32_MAX);
375 			if (!io_write_int(sess, fdout, sz)) {
376 				ERRX1(sess, "io_write_int");
377 				goto out;
378 			}
379 			if (!io_write_buf(sess, fdout, fn, sz)) {
380 				ERRX1(sess, "io_write_buf");
381 				goto out;
382 			}
383 		}
384 
385 		if (S_ISREG(f->st.mode))
386 			sess->total_size += f->st.size;
387 	}
388 
389 	/* Signal end of file list. */
390 
391 	if (!io_write_byte(sess, fdout, 0)) {
392 		ERRX1(sess, "io_write_byte");
393 		goto out;
394 	}
395 
396 	/* Conditionally write identifier lists. */
397 
398 	if (sess->opts->preserve_uids && !sess->opts->numeric_ids) {
399 		LOG2(sess, "sending uid list: %zu", uidsz);
400 		if (!idents_send(sess, fdout, uids, uidsz)) {
401 			ERRX1(sess, "idents_send");
402 			goto out;
403 		}
404 	}
405 
406 	if (sess->opts->preserve_gids && !sess->opts->numeric_ids) {
407 		LOG2(sess, "sending gid list: %zu", gidsz);
408 		if (!idents_send(sess, fdout, gids, gidsz)) {
409 			ERRX1(sess, "idents_send");
410 			goto out;
411 		}
412 	}
413 
414 	rc = 1;
415 out:
416 	idents_free(gids, gidsz);
417 	idents_free(uids, uidsz);
418 	return rc;
419 }
420 
421 /*
422  * Read the filename of a file list.
423  * This is the most expensive part of the file list transfer, so a lot
424  * of attention has gone into transmitting as little as possible.
425  * Micro-optimisation, but whatever.
426  * Fills in "f" with the full path on success.
427  * Returns zero on failure, non-zero on success.
428  */
429 static int
430 flist_recv_name(struct sess *sess, int fd, struct flist *f, uint8_t flags,
431     char last[MAXPATHLEN])
432 {
433 	uint8_t		 bval;
434 	size_t		 partial = 0;
435 	size_t		 pathlen = 0, len;
436 
437 	/*
438 	 * Read our filename.
439 	 * If we have FLIST_NAME_SAME, we inherit some of the last
440 	 * transmitted name.
441 	 * If we have FLIST_NAME_LONG, then the string length is greater
442 	 * than byte-size.
443 	 */
444 
445 	if (FLIST_NAME_SAME & flags) {
446 		if (!io_read_byte(sess, fd, &bval)) {
447 			ERRX1(sess, "io_read_byte");
448 			return 0;
449 		}
450 		partial = bval;
451 	}
452 
453 	/* Get the (possibly-remaining) filename length. */
454 
455 	if (FLIST_NAME_LONG & flags) {
456 		if (!io_read_size(sess, fd, &pathlen)) {
457 			ERRX1(sess, "io_read_size");
458 			return 0;
459 		}
460 	} else {
461 		if (!io_read_byte(sess, fd, &bval)) {
462 			ERRX1(sess, "io_read_byte");
463 			return 0;
464 		}
465 		pathlen = bval;
466 	}
467 
468 	/* Allocate our full filename length. */
469 	/* FIXME: maximum pathname length. */
470 
471 	if ((len = pathlen + partial) == 0) {
472 		ERRX(sess, "security violation: zero-length pathname");
473 		return 0;
474 	}
475 
476 	if ((f->path = malloc(len + 1)) == NULL) {
477 		ERR(sess, "malloc");
478 		return 0;
479 	}
480 	f->path[len] = '\0';
481 
482 	if (FLIST_NAME_SAME & flags)
483 		memcpy(f->path, last, partial);
484 
485 	if (!io_read_buf(sess, fd, f->path + partial, pathlen)) {
486 		ERRX1(sess, "io_read_buf");
487 		return 0;
488 	}
489 
490 	if (f->path[0] == '/') {
491 		ERRX(sess, "security violation: absolute pathname: %s",
492 		    f->path);
493 		return 0;
494 	}
495 
496 	if (strstr(f->path, "/../") != NULL ||
497 	    (len > 2 && strcmp(f->path + len - 3, "/..") == 0) ||
498 	    (len > 2 && strncmp(f->path, "../", 3) == 0) ||
499 	    strcmp(f->path, "..") == 0) {
500 		ERRX(sess, "%s: security violation: backtracking pathname",
501 		    f->path);
502 		return 0;
503 	}
504 
505 	/* Record our last path and construct our filename. */
506 
507 	strlcpy(last, f->path, MAXPATHLEN);
508 	f->wpath = f->path;
509 	return 1;
510 }
511 
512 /*
513  * Reallocate a file list in chunks of FLIST_CHUNK_SIZE;
514  * Returns zero on failure, non-zero on success.
515  */
516 static int
517 flist_realloc(struct sess *sess, struct flist **fl, size_t *sz, size_t *max)
518 {
519 	void	*pp;
520 
521 	if (*sz + 1 <= *max)  {
522 		(*sz)++;
523 		return 1;
524 	}
525 
526 	pp = recallocarray(*fl, *max,
527 		*max + FLIST_CHUNK_SIZE, sizeof(struct flist));
528 	if (pp == NULL) {
529 		ERR(sess, "recallocarray");
530 		return 0;
531 	}
532 	*fl = pp;
533 	*max += FLIST_CHUNK_SIZE;
534 	(*sz)++;
535 	return 1;
536 }
537 
538 /*
539  * Copy a regular or symbolic link file "path" into "f".
540  * This handles the correct path creation and symbolic linking.
541  * Returns zero on failure, non-zero on success.
542  */
543 static int
544 flist_append(struct sess *sess, struct flist *f, struct stat *st,
545     const char *path)
546 {
547 
548 	/*
549 	 * Copy the full path for local addressing and transmit
550 	 * only the filename part for the receiver.
551 	 */
552 
553 	if ((f->path = strdup(path)) == NULL) {
554 		ERR(sess, "strdup");
555 		return 0;
556 	}
557 
558 	if ((f->wpath = strrchr(f->path, '/')) == NULL)
559 		f->wpath = f->path;
560 	else
561 		f->wpath++;
562 
563 	/*
564 	 * On the receiving end, we'll strip out all bits on the
565 	 * mode except for the file permissions.
566 	 * No need to warn about it here.
567 	 */
568 
569 	flist_copy_stat(f, st);
570 
571 	/* Optionally copy link information. */
572 
573 	if (S_ISLNK(st->st_mode)) {
574 		f->link = symlink_read(sess, f->path);
575 		if (f->link == NULL) {
576 			ERRX1(sess, "symlink_read");
577 			return 0;
578 		}
579 	}
580 
581 	return 1;
582 }
583 
584 /*
585  * Receive a file list from the wire, filling in length "sz" (which may
586  * possibly be zero) and list "flp" on success.
587  * Return zero on failure, non-zero on success.
588  */
589 int
590 flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz)
591 {
592 	struct flist	*fl = NULL;
593 	struct flist	*ff;
594 	const struct flist *fflast = NULL;
595 	size_t		 flsz = 0, flmax = 0, lsz, gidsz = 0, uidsz = 0;
596 	uint8_t		 flag;
597 	char		 last[MAXPATHLEN];
598 	int64_t		 lval; /* temporary values... */
599 	int32_t		 ival;
600 	uint32_t	 uival;
601 	struct ident	*gids = NULL, *uids = NULL;
602 
603 	last[0] = '\0';
604 
605 	for (;;) {
606 		if (!io_read_byte(sess, fd, &flag)) {
607 			ERRX1(sess, "io_read_byte");
608 			goto out;
609 		} else if (flag == 0)
610 			break;
611 
612 		if (!flist_realloc(sess, &fl, &flsz, &flmax)) {
613 			ERRX1(sess, "flist_realloc");
614 			goto out;
615 		}
616 
617 		ff = &fl[flsz - 1];
618 		fflast = flsz > 1 ? &fl[flsz - 2] : NULL;
619 
620 		/* Filename first. */
621 
622 		if (!flist_recv_name(sess, fd, ff, flag, last)) {
623 			ERRX1(sess, "flist_recv_name");
624 			goto out;
625 		}
626 
627 		/* Read the file size. */
628 
629 		if (!io_read_long(sess, fd, &lval)) {
630 			ERRX1(sess, "io_read_long");
631 			goto out;
632 		}
633 		ff->st.size = lval;
634 
635 		/* Read the modification time. */
636 
637 		if (!(FLIST_TIME_SAME & flag)) {
638 			if (!io_read_uint(sess, fd, &uival)) {
639 				ERRX1(sess, "io_read_int");
640 				goto out;
641 			}
642 			ff->st.mtime = uival;	/* beyond 2038 */
643 		} else if (fflast == NULL) {
644 			ERRX(sess, "same time without last entry");
645 			goto out;
646 		}  else
647 			ff->st.mtime = fflast->st.mtime;
648 
649 		/* Read the file mode. */
650 
651 		if (!(FLIST_MODE_SAME & flag)) {
652 			if (!io_read_uint(sess, fd, &uival)) {
653 				ERRX1(sess, "io_read_int");
654 				goto out;
655 			}
656 			ff->st.mode = uival;
657 		} else if (fflast == NULL) {
658 			ERRX(sess, "same mode without last entry");
659 			goto out;
660 		} else
661 			ff->st.mode = fflast->st.mode;
662 
663 		/* Conditional part: uid. */
664 
665 		if (sess->opts->preserve_uids) {
666 			if (!(FLIST_UID_SAME & flag)) {
667 				if (!io_read_uint(sess, fd, &uival)) {
668 					ERRX1(sess, "io_read_int");
669 					goto out;
670 				}
671 				ff->st.uid = uival;
672 			} else if (fflast == NULL) {
673 				ERRX(sess, "same uid without last entry");
674 				goto out;
675 			} else
676 				ff->st.uid = fflast->st.uid;
677 		}
678 
679 		/* Conditional part: gid. */
680 
681 		if (sess->opts->preserve_gids) {
682 			if (!(FLIST_GID_SAME & flag)) {
683 				if (!io_read_uint(sess, fd, &uival)) {
684 					ERRX1(sess, "io_read_int");
685 					goto out;
686 				}
687 				ff->st.gid = uival;
688 			} else if (fflast == NULL) {
689 				ERRX(sess, "same gid without last entry");
690 				goto out;
691 			} else
692 				ff->st.gid = fflast->st.gid;
693 		}
694 
695 		/* Conditional part: devices & special files. */
696 
697 		if ((sess->opts->devices && (S_ISBLK(ff->st.mode) ||
698 		     S_ISCHR(ff->st.mode))) ||
699 		    (sess->opts->specials && (S_ISFIFO(ff->st.mode) ||
700 		    S_ISSOCK(ff->st.mode)))) {
701 			if (!(FLIST_RDEV_SAME & flag)) {
702 				if (!io_read_int(sess, fd, &ival)) {
703 					ERRX1(sess, "io_read_int");
704 					goto out;
705 				}
706 				ff->st.rdev = ival;
707 			} else if (fflast == NULL) {
708 				ERRX(sess, "same device without last entry");
709 				goto out;
710 			} else
711 				ff->st.rdev = fflast->st.rdev;
712 		}
713 
714 		/* Conditional part: link. */
715 
716 		if (S_ISLNK(ff->st.mode) &&
717 		    sess->opts->preserve_links) {
718 			if (!io_read_size(sess, fd, &lsz)) {
719 				ERRX1(sess, "io_read_size");
720 				goto out;
721 			} else if (lsz == 0) {
722 				ERRX(sess, "empty link name");
723 				goto out;
724 			}
725 			ff->link = calloc(lsz + 1, 1);
726 			if (ff->link == NULL) {
727 				ERR(sess, "calloc");
728 				goto out;
729 			}
730 			if (!io_read_buf(sess, fd, ff->link, lsz)) {
731 				ERRX1(sess, "io_read_buf");
732 				goto out;
733 			}
734 		}
735 
736 		LOG3(sess, "%s: received file metadata: "
737 			"size %jd, mtime %jd, mode %o, rdev (%d, %d)",
738 			ff->path, (intmax_t)ff->st.size,
739 			(intmax_t)ff->st.mtime, ff->st.mode,
740 			major(ff->st.rdev), minor(ff->st.rdev));
741 
742 		if (S_ISREG(ff->st.mode))
743 			sess->total_size += ff->st.size;
744 	}
745 
746 	/* Conditionally read the user/group list. */
747 
748 	if (sess->opts->preserve_uids && !sess->opts->numeric_ids) {
749 		if (!idents_recv(sess, fd, &uids, &uidsz)) {
750 			ERRX1(sess, "idents_recv");
751 			goto out;
752 		}
753 		LOG2(sess, "received uid list: %zu", uidsz);
754 	}
755 
756 	if (sess->opts->preserve_gids && !sess->opts->numeric_ids) {
757 		if (!idents_recv(sess, fd, &gids, &gidsz)) {
758 			ERRX1(sess, "idents_recv");
759 			goto out;
760 		}
761 		LOG2(sess, "received gid list: %zu", gidsz);
762 	}
763 
764 	/* Remember to order the received list. */
765 
766 	LOG2(sess, "received file metadata list: %zu", flsz);
767 	qsort(fl, flsz, sizeof(struct flist), flist_cmp);
768 	flist_topdirs(sess, fl, flsz);
769 	*sz = flsz;
770 	*flp = fl;
771 
772 	/* Conditionally remap and reassign identifiers. */
773 
774 	if (sess->opts->preserve_uids && !sess->opts->numeric_ids) {
775 		idents_remap(sess, 0, uids, uidsz);
776 		idents_assign_uid(sess, fl, flsz, uids, uidsz);
777 	}
778 
779 	if (sess->opts->preserve_gids && !sess->opts->numeric_ids) {
780 		idents_remap(sess, 1, gids, gidsz);
781 		idents_assign_gid(sess, fl, flsz, gids, gidsz);
782 	}
783 
784 	idents_free(gids, gidsz);
785 	idents_free(uids, uidsz);
786 	return 1;
787 out:
788 	flist_free(fl, flsz);
789 	idents_free(gids, gidsz);
790 	idents_free(uids, uidsz);
791 	*sz = 0;
792 	*flp = NULL;
793 	return 0;
794 }
795 
796 /*
797  * Generate a flist possibly-recursively given a file root, which may
798  * also be a regular file or symlink.
799  * On success, augments the generated list in "flp" of length "sz".
800  * Returns zero on failure, non-zero on success.
801  */
802 static int
803 flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,
804     size_t *max)
805 {
806 	char		*cargv[2], *cp;
807 	int		 rc = 0, nxdev = 0, flag, i;
808 	FTS		*fts;
809 	FTSENT		*ent;
810 	struct flist	*f;
811 	size_t		 flsz = 0, stripdir;
812 	dev_t		*xdev;
813 	struct stat	 st;
814 
815 	cargv[0] = root;
816 	cargv[1] = NULL;
817 
818 	/*
819 	 * If we're a file, then revert to the same actions we use for
820 	 * the non-recursive scan.
821 	 */
822 
823 	if (lstat(root, &st) == -1) {
824 		ERR(sess, "%s: lstat", root);
825 		return 0;
826 	} else if (S_ISREG(st.st_mode)) {
827 		if (!flist_realloc(sess, fl, sz, max)) {
828 			ERRX1(sess, "flist_realloc");
829 			return 0;
830 		}
831 		f = &(*fl)[(*sz) - 1];
832 		assert(f != NULL);
833 
834 		if (!flist_append(sess, f, &st, root)) {
835 			ERRX1(sess, "flist_append");
836 			return 0;
837 		}
838 		if (unveil(root, "r") == -1) {
839 			ERR(sess, "%s: unveil", root);
840 			return 0;
841 		}
842 		return 1;
843 	} else if (S_ISLNK(st.st_mode)) {
844 		if (!sess->opts->preserve_links) {
845 			WARNX(sess, "%s: skipping symlink", root);
846 			return 1;
847 		} else if (!flist_realloc(sess, fl, sz, max)) {
848 			ERRX1(sess, "flist_realloc");
849 			return 0;
850 		}
851 		f = &(*fl)[(*sz) - 1];
852 		assert(f != NULL);
853 
854 		if (!flist_append(sess, f, &st, root)) {
855 			ERRX1(sess, "flist_append");
856 			return 0;
857 		}
858 		if (unveil(root, "r") == -1) {
859 			ERR(sess, "%s: unveil", root);
860 			return 0;
861 		}
862 		return 1;
863 	} else if (!S_ISDIR(st.st_mode)) {
864 		WARNX(sess, "%s: skipping special", root);
865 		return 1;
866 	}
867 
868 	/*
869 	 * If we end with a slash, it means that we're not supposed to
870 	 * copy the directory part itself---only the contents.
871 	 * So set "stripdir" to be what we take out.
872 	 */
873 
874 	stripdir = strlen(root);
875 	assert(stripdir > 0);
876 	if (root[stripdir - 1] != '/')
877 		stripdir = 0;
878 
879 	/*
880 	 * If we're not stripping anything, then see if we need to strip
881 	 * out the leading material in the path up to and including the
882 	 * last directory component.
883 	 */
884 
885 	if (stripdir == 0)
886 		if ((cp = strrchr(root, '/')) != NULL)
887 			stripdir = cp - root + 1;
888 
889 	/*
890 	 * If we're recursive, then we need to take down all of the
891 	 * files and directory components, so use fts(3).
892 	 * Copying the information file-by-file into the flstat.
893 	 * We'll make sense of it in flist_send.
894 	 */
895 
896 	if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
897 		ERR(sess, "fts_open");
898 		return 0;
899 	}
900 
901 	errno = 0;
902 	while ((ent = fts_read(fts)) != NULL) {
903 		if (!flist_fts_check(sess, ent)) {
904 			errno = 0;
905 			continue;
906 		}
907 
908 		/* We don't allow symlinks without -l. */
909 
910 		assert(ent->fts_statp != NULL);
911 		if (S_ISLNK(ent->fts_statp->st_mode) &&
912 		    !sess->opts->preserve_links) {
913 			WARNX(sess, "%s: skipping symlink", ent->fts_path);
914 			continue;
915 		}
916 
917 		/*
918 		 * If rsync is told to avoid crossing a filesystem
919 		 * boundary when recursing, then replace all mount point
920 		 * directories with empty directories.  The latter is
921 		 * prevented by telling rsync multiple times to avoid
922 		 * crossing a filesystem boundary when recursing.
923 		 * Replacing mount point directories is tricky. We need
924 		 * to sort out which directories to include.  As such,
925 		 * keep track of unique device inodes, and use these for
926 		 * comparison.
927 		 */
928 
929 		if (sess->opts->one_file_system &&
930 		    ent->fts_statp->st_dev != st.st_dev) {
931 			if (sess->opts->one_file_system > 1 ||
932 			    !S_ISDIR(ent->fts_statp->st_mode))
933 				continue;
934 
935 			if ((xdev = malloc(sizeof(dev_t))) == NULL) {
936 				ERRX1(sess, "malloc");
937 				goto out;
938 			}
939 
940 			flag = 0;
941 			for (i = 0; i < nxdev; i++)
942 				if (xdev[i] == ent->fts_statp->st_dev) {
943 					flag = 1;
944 					break;
945 				}
946 			if (flag)
947 				continue;
948 
949 			if (nxdev)
950 				if ((xdev = realloc(xdev, sizeof(dev_t))) ==
951 				    NULL) {
952 					ERRX1(sess, "realloc");
953 					goto out;
954 				}
955 			xdev[nxdev] = ent->fts_statp->st_dev;
956 			nxdev++;
957 		}
958 
959 		/* Allocate a new file entry. */
960 
961 		if (!flist_realloc(sess, fl, sz, max)) {
962 			ERRX1(sess, "flist_realloc");
963 			goto out;
964 		}
965 		flsz++;
966 		f = &(*fl)[*sz - 1];
967 
968 		/* Our path defaults to "." for the root. */
969 
970 		if (ent->fts_path[stripdir] == '\0') {
971 			if (asprintf(&f->path, "%s.", ent->fts_path) < 0) {
972 				ERR(sess, "asprintf");
973 				f->path = NULL;
974 				goto out;
975 			}
976 		} else {
977 			if ((f->path = strdup(ent->fts_path)) == NULL) {
978 				ERR(sess, "strdup");
979 				goto out;
980 			}
981 		}
982 
983 		f->wpath = f->path + stripdir;
984 		flist_copy_stat(f, ent->fts_statp);
985 
986 		/* Optionally copy link information. */
987 
988 		if (S_ISLNK(ent->fts_statp->st_mode)) {
989 			f->link = symlink_read(sess, f->path);
990 			if (f->link == NULL) {
991 				ERRX1(sess, "symlink_read");
992 				goto out;
993 			}
994 		}
995 
996 		/* Reset errno for next fts_read() call. */
997 		errno = 0;
998 	}
999 	if (errno) {
1000 		ERR(sess, "fts_read");
1001 		goto out;
1002 	}
1003 	if (unveil(root, "r") == -1) {
1004 		ERR(sess, "%s: unveil", root);
1005 		goto out;
1006 	}
1007 
1008 	LOG3(sess, "generated %zu filenames: %s", flsz, root);
1009 	rc = 1;
1010 out:
1011 	fts_close(fts);
1012 	if (sess->opts->one_file_system)
1013 		free(xdev);
1014 	return rc;
1015 }
1016 
1017 /*
1018  * Generate a flist recursively given the array of directories (or
1019  * files, symlinks, doesn't matter) specified in argv (argc >0).
1020  * On success, stores the generated list in "flp" with length "sz",
1021  * which may be zero.
1022  * Returns zero on failure, non-zero on success.
1023  */
1024 static int
1025 flist_gen_dirs(struct sess *sess, size_t argc, char **argv, struct flist **flp,
1026     size_t *sz)
1027 {
1028 	size_t		 i, max = 0;
1029 
1030 	for (i = 0; i < argc; i++)
1031 		if (!flist_gen_dirent(sess, argv[i], flp, sz, &max))
1032 			break;
1033 
1034 	if (i == argc) {
1035 		LOG2(sess, "recursively generated %zu filenames", *sz);
1036 		return 1;
1037 	}
1038 
1039 	ERRX1(sess, "flist_gen_dirent");
1040 	flist_free(*flp, max);
1041 	*flp = NULL;
1042 	*sz = 0;
1043 	return 0;
1044 }
1045 
1046 /*
1047  * Generate list of files from the command-line argc (>0) and argv.
1048  * On success, stores the generated list in "flp" with length "sz",
1049  * which may be zero.
1050  * Returns zero on failure, non-zero on success.
1051  */
1052 static int
1053 flist_gen_files(struct sess *sess, size_t argc, char **argv,
1054     struct flist **flp, size_t *sz)
1055 {
1056 	struct flist	*fl = NULL, *f;
1057 	size_t		 i, flsz = 0;
1058 	struct stat	 st;
1059 
1060 	assert(argc);
1061 
1062 	if ((fl = calloc(argc, sizeof(struct flist))) == NULL) {
1063 		ERR(sess, "calloc");
1064 		return 0;
1065 	}
1066 
1067 	for (i = 0; i < argc; i++) {
1068 		if (argv[i][0] == '\0')
1069 			continue;
1070 		if (lstat(argv[i], &st) == -1) {
1071 			ERR(sess, "%s: lstat", argv[i]);
1072 			goto out;
1073 		}
1074 
1075 		/*
1076 		 * File type checks.
1077 		 * In non-recursive mode, we don't accept directories.
1078 		 * We also skip symbolic links without -l.
1079 		 * Beyond that, we only accept regular files.
1080 		 */
1081 
1082 		if (S_ISDIR(st.st_mode)) {
1083 			WARNX(sess, "%s: skipping directory", argv[i]);
1084 			continue;
1085 		} else if (S_ISLNK(st.st_mode)) {
1086 			if (!sess->opts->preserve_links) {
1087 				WARNX(sess, "%s: skipping symlink", argv[i]);
1088 				continue;
1089 			}
1090 		} else if (!S_ISREG(st.st_mode)) {
1091 			WARNX(sess, "%s: skipping special", argv[i]);
1092 			continue;
1093 		}
1094 
1095 
1096 		f = &fl[flsz++];
1097 		assert(f != NULL);
1098 
1099 		/* Add this file to our file-system worldview. */
1100 
1101 		if (unveil(argv[i], "r") == -1) {
1102 			ERR(sess, "%s: unveil", argv[i]);
1103 			goto out;
1104 		}
1105 		if (!flist_append(sess, f, &st, argv[i])) {
1106 			ERRX1(sess, "flist_append");
1107 			goto out;
1108 		}
1109 	}
1110 
1111 	LOG2(sess, "non-recursively generated %zu filenames", flsz);
1112 	*sz = flsz;
1113 	*flp = fl;
1114 	return 1;
1115 out:
1116 	flist_free(fl, argc);
1117 	*sz = 0;
1118 	*flp = NULL;
1119 	return 0;
1120 }
1121 
1122 /*
1123  * Generate a sorted, de-duplicated list of file metadata.
1124  * In non-recursive mode (the default), we use only the files we're
1125  * given.
1126  * Otherwise, directories are recursively examined.
1127  * Returns zero on failure, non-zero on success.
1128  * On success, "fl" will need to be freed with flist_free().
1129  */
1130 int
1131 flist_gen(struct sess *sess, size_t argc, char **argv, struct flist **flp,
1132     size_t *sz)
1133 {
1134 	int	 rc;
1135 
1136 	assert(argc > 0);
1137 	rc = sess->opts->recursive ?
1138 		flist_gen_dirs(sess, argc, argv, flp, sz) :
1139 		flist_gen_files(sess, argc, argv, flp, sz);
1140 
1141 	/* After scanning, lock our file-system view. */
1142 
1143 	if (unveil(NULL, NULL) == -1) {
1144 		ERR(sess, "unveil");
1145 		return 0;
1146 	}
1147 	if (!rc)
1148 		return 0;
1149 
1150 	qsort(*flp, *sz, sizeof(struct flist), flist_cmp);
1151 
1152 	if (flist_dedupe(sess, flp, sz)) {
1153 		flist_topdirs(sess, *flp, *sz);
1154 		return 1;
1155 	}
1156 
1157 	ERRX1(sess, "flist_dedupe");
1158 	flist_free(*flp, *sz);
1159 	*flp = NULL;
1160 	*sz = 0;
1161 	return 0;
1162 }
1163 
1164 /*
1165  * Generate a list of files in root to delete that are within the
1166  * top-level directories stipulated by "wfl".
1167  * Only handles symbolic links, directories, and regular files.
1168  * Returns zero on failure (fl and flsz will be NULL and zero), non-zero
1169  * on success.
1170  * On success, "fl" will need to be freed with flist_free().
1171  */
1172 int
1173 flist_gen_dels(struct sess *sess, const char *root, struct flist **fl,
1174     size_t *sz,	const struct flist *wfl, size_t wflsz)
1175 {
1176 	char		**cargv = NULL;
1177 	int		  rc = 0, c, flag;
1178 	FTS		 *fts = NULL;
1179 	FTSENT		 *ent;
1180 	struct flist	 *f;
1181 	struct stat	  st;
1182 	size_t		  cargvs = 0, i, j, max = 0, stripdir;
1183 	ENTRY		  hent;
1184 	ENTRY		 *hentp;
1185 
1186 	*fl = NULL;
1187 	*sz = 0;
1188 
1189 	/* Only run this code when we're recursive. */
1190 
1191 	if (!sess->opts->recursive)
1192 		return 1;
1193 
1194 	/*
1195 	 * Gather up all top-level directories for scanning.
1196 	 * This is stipulated by rsync's --delete behaviour, where we
1197 	 * only delete things in the top-level directories given on the
1198 	 * command line.
1199 	 */
1200 
1201 	assert(wflsz > 0);
1202 	for (i = 0; i < wflsz; i++)
1203 		if (FLSTAT_TOP_DIR & wfl[i].st.flags)
1204 			cargvs++;
1205 	if (cargvs == 0)
1206 		return 1;
1207 
1208 	if ((cargv = calloc(cargvs + 1, sizeof(char *))) == NULL) {
1209 		ERR(sess, "calloc");
1210 		return 0;
1211 	}
1212 
1213 	/*
1214 	 * If we're given just a "." as the first entry, that means
1215 	 * we're doing a relative copy with a trailing slash.
1216 	 * Special-case this just for the sake of simplicity.
1217 	 * Otherwise, look through all top-levels.
1218 	 */
1219 
1220 	if (wflsz && strcmp(wfl[0].wpath, ".") == 0) {
1221 		assert(cargvs == 1);
1222 		assert(S_ISDIR(wfl[0].st.mode));
1223 		if (asprintf(&cargv[0], "%s/", root) < 0) {
1224 			ERR(sess, "asprintf");
1225 			cargv[0] = NULL;
1226 			goto out;
1227 		}
1228 		cargv[1] = NULL;
1229 	} else {
1230 		for (i = j = 0; i < wflsz; i++) {
1231 			if (!(FLSTAT_TOP_DIR & wfl[i].st.flags))
1232 				continue;
1233 			assert(S_ISDIR(wfl[i].st.mode));
1234 			assert(strcmp(wfl[i].wpath, "."));
1235 			c = asprintf(&cargv[j], "%s/%s", root, wfl[i].wpath);
1236 			if (c < 0) {
1237 				ERR(sess, "asprintf");
1238 				cargv[j] = NULL;
1239 				goto out;
1240 			}
1241 			LOG4(sess, "%s: will scan for deletions", cargv[j]);
1242 			j++;
1243 		}
1244 		assert(j == cargvs);
1245 		cargv[j] = NULL;
1246 	}
1247 
1248 	LOG2(sess, "delete from %zu directories", cargvs);
1249 
1250 	/*
1251 	 * Next, use the standard hcreate(3) hashtable interface to hash
1252 	 * all of the files that we want to synchronise.
1253 	 * This way, we'll be able to determine which files we want to
1254 	 * delete in O(n) time instead of O(n * search) time.
1255 	 * Plus, we can do the scan in-band and only allocate the files
1256 	 * we want to delete.
1257 	 */
1258 
1259 	if (!hcreate(wflsz)) {
1260 		ERR(sess, "hcreate");
1261 		goto out;
1262 	}
1263 
1264 	for (i = 0; i < wflsz; i++) {
1265 		memset(&hent, 0, sizeof(ENTRY));
1266 		if ((hent.key = strdup(wfl[i].wpath)) == NULL) {
1267 			ERR(sess, "strdup");
1268 			goto out;
1269 		}
1270 		if ((hentp = hsearch(hent, ENTER)) == NULL) {
1271 			ERR(sess, "hsearch");
1272 			goto out;
1273 		} else if (hentp->key != hent.key) {
1274 			ERRX(sess, "%s: duplicate", wfl[i].wpath);
1275 			free(hent.key);
1276 			goto out;
1277 		}
1278 	}
1279 
1280 	/*
1281 	 * Now we're going to try to descend into all of the top-level
1282 	 * directories stipulated by the file list.
1283 	 * If the directories don't exist, it's ok.
1284 	 */
1285 
1286 	if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
1287 		ERR(sess, "fts_open");
1288 		goto out;
1289 	}
1290 
1291 	stripdir = strlen(root) + 1;
1292 	errno = 0;
1293 	while ((ent = fts_read(fts)) != NULL) {
1294 		if (ent->fts_info == FTS_NS)
1295 			continue;
1296 		if (!flist_fts_check(sess, ent)) {
1297 			errno = 0;
1298 			continue;
1299 		} else if (stripdir >= ent->fts_pathlen)
1300 			continue;
1301 
1302 		assert(ent->fts_statp != NULL);
1303 
1304 		/*
1305 		 * If rsync is told to avoid crossing a filesystem
1306 		 * boundary when recursing, then exclude all entries
1307 		 * from the list with a device inode, which does not
1308 		 * match that of one of the top-level directories.
1309 		 */
1310 
1311 		if (sess->opts->one_file_system) {
1312 			flag = 0;
1313 			for (i = 0; i < wflsz; i++) {
1314 				if (stat(wfl[i].path, &st) == -1) {
1315 					ERR(sess, "%s: stat", wfl[i].path);
1316 					goto out;
1317 				}
1318 				if (ent->fts_statp->st_dev == st.st_dev) {
1319 					flag = 1;
1320 					break;
1321 				}
1322 			}
1323 			if (!flag)
1324 				continue;
1325 		}
1326 
1327 		/* Look up in hashtable. */
1328 
1329 		memset(&hent, 0, sizeof(ENTRY));
1330 		hent.key = ent->fts_path + stripdir;
1331 		if (hsearch(hent, FIND) != NULL)
1332 			continue;
1333 
1334 		/* Not found: we'll delete it. */
1335 
1336 		if (!flist_realloc(sess, fl, sz, &max)) {
1337 			ERRX1(sess, "flist_realloc");
1338 			goto out;
1339 		}
1340 		f = &(*fl)[*sz - 1];
1341 
1342 		if ((f->path = strdup(ent->fts_path)) == NULL) {
1343 			ERR(sess, "strdup");
1344 			goto out;
1345 		}
1346 		f->wpath = f->path + stripdir;
1347 		flist_copy_stat(f, ent->fts_statp);
1348 		errno = 0;
1349 	}
1350 
1351 	if (errno) {
1352 		ERR(sess, "fts_read");
1353 		goto out;
1354 	}
1355 
1356 	qsort(*fl, *sz, sizeof(struct flist), flist_cmp);
1357 	rc = 1;
1358 out:
1359 	if (fts != NULL)
1360 		fts_close(fts);
1361 	for (i = 0; i < cargvs; i++)
1362 		free(cargv[i]);
1363 	free(cargv);
1364 	hdestroy();
1365 	return rc;
1366 }
1367 
1368 /*
1369  * Delete all files and directories in "fl".
1370  * If called with a zero-length "fl", does nothing.
1371  * If dry_run is specified, simply write what would be done.
1372  * Return zero on failure, non-zero on success.
1373  */
1374 int
1375 flist_del(struct sess *sess, int root, const struct flist *fl, size_t flsz)
1376 {
1377 	ssize_t	 i;
1378 	int	 flag;
1379 
1380 	if (flsz == 0)
1381 		return 1;
1382 
1383 	assert(sess->opts->del);
1384 	assert(sess->opts->recursive);
1385 
1386 	for (i = flsz - 1; i >= 0; i--) {
1387 		LOG1(sess, "%s: deleting", fl[i].wpath);
1388 		if (sess->opts->dry_run)
1389 			continue;
1390 		assert(root != -1);
1391 		flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0;
1392 		if (unlinkat(root, fl[i].wpath, flag) == -1 &&
1393 		    errno != ENOENT) {
1394 			ERR(sess, "%s: unlinkat", fl[i].wpath);
1395 			return 0;
1396 		}
1397 	}
1398 
1399 	return 1;
1400 }
1401