xref: /openbsd-src/usr.bin/rsync/flist.c (revision 9f11ffb7133c203312a01e4b986886bc88c7d74b)
1 /*	$Id: flist.c,v 1.12 2019/02/12 19:39:57 benno Exp $ */
2 /*
3  * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <sys/param.h>
18 #include <sys/stat.h>
19 
20 #include <assert.h>
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <fts.h>
24 #include <inttypes.h>
25 #include <search.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 
31 #include "extern.h"
32 
33 /*
34  * We allocate our file list in chunk sizes so as not to do it one by
35  * one.
36  * Preferrably we get one or two allocation.
37  */
38 #define	FLIST_CHUNK_SIZE (1024)
39 
40 /*
41  * These flags are part of the rsync protocol.
42  * They are sent as the first byte for a file transmission and encode
43  * information that affects subsequent transmissions.
44  */
45 #define FLIST_MODE_SAME  0x0002 /* mode is repeat */
46 #define	FLIST_GID_SAME	 0x0010 /* gid is repeat */
47 #define	FLIST_NAME_SAME  0x0020 /* name is repeat */
48 #define FLIST_NAME_LONG	 0x0040 /* name >255 bytes */
49 #define FLIST_TIME_SAME  0x0080 /* time is repeat */
50 
51 /*
52  * Requied way to sort a filename list.
53  */
54 static int
55 flist_cmp(const void *p1, const void *p2)
56 {
57 	const struct flist *f1 = p1, *f2 = p2;
58 
59 	return strcmp(f1->wpath, f2->wpath);
60 }
61 
62 /*
63  * Deduplicate our file list (which may be zero-length).
64  * Returns zero on failure, non-zero on success.
65  */
66 static int
67 flist_dedupe(struct sess *sess, struct flist **fl, size_t *sz)
68 {
69 	size_t		 i, j;
70 	struct flist	*new;
71 	struct flist	*f, *fnext;
72 
73 	if (*sz == 0)
74 		return 1;
75 
76 	/* Create a new buffer, "new", and copy. */
77 
78 	new = calloc(*sz, sizeof(struct flist));
79 	if (new == NULL) {
80 		ERR(sess, "calloc");
81 		return 0;
82 	}
83 
84 	for (i = j = 0; i < *sz - 1; i++) {
85 		f = &(*fl)[i];
86 		fnext = &(*fl)[i + 1];
87 
88 		if (strcmp(f->wpath, fnext->wpath)) {
89 			new[j++] = *f;
90 			continue;
91 		}
92 
93 		/*
94 		 * Our working (destination) paths are the same.
95 		 * If the actual file is the same (as given on the
96 		 * command-line), then we can just discard the first.
97 		 * Otherwise, we need to bail out: it means we have two
98 		 * different files with the relative path on the
99 		 * destination side.
100 		 */
101 
102 		if (strcmp(f->path, fnext->path) == 0) {
103 			new[j++] = *f;
104 			i++;
105 			WARNX(sess, "%s: duplicate path: %s",
106 			    f->wpath, f->path);
107 			free(fnext->path);
108 			free(fnext->link);
109 			fnext->path = fnext->link = NULL;
110 			continue;
111 		}
112 
113 		ERRX(sess, "%s: duplicate working path for "
114 		    "possibly different file: %s, %s",
115 		    f->wpath, f->path, fnext->path);
116 		free(new);
117 		return 0;
118 	}
119 
120 	/* Don't forget the last entry. */
121 
122 	if (i == *sz - 1)
123 		new[j++] = (*fl)[i];
124 
125 	/*
126 	 * Reassign to the deduplicated array.
127 	 * If we started out with *sz > 0, which we check for at the
128 	 * beginning, then we'll always continue having *sz > 0.
129 	 */
130 
131 	free(*fl);
132 	*fl = new;
133 	*sz = j;
134 	assert(*sz);
135 	return 1;
136 }
137 
138 /*
139  * We're now going to find our top-level directories.
140  * This only applies to recursive mode.
141  * If we have the first element as the ".", then that's the "top
142  * directory" of our transfer.
143  * Otherwise, mark up all top-level directories in the set.
144  */
145 static void
146 flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz)
147 {
148 	size_t		 i;
149 	const char	*cp;
150 
151 	if (!sess->opts->recursive)
152 		return;
153 
154 	if (flsz && strcmp(fl[0].wpath, ".")) {
155 		for (i = 0; i < flsz; i++) {
156 			if (!S_ISDIR(fl[i].st.mode))
157 				continue;
158 			cp = strchr(fl[i].wpath, '/');
159 			if (cp != NULL && cp[1] != '\0')
160 				continue;
161 			fl[i].st.flags |= FLSTAT_TOP_DIR;
162 			LOG4(sess, "%s: top-level", fl[i].wpath);
163 		}
164 	} else if (flsz) {
165 		fl[0].st.flags |= FLSTAT_TOP_DIR;
166 		LOG4(sess, "%s: top-level", fl[0].wpath);
167 	}
168 }
169 
170 /*
171  * Filter through the fts() file information.
172  * We want directories (pre-order), regular files, and symlinks.
173  * Everything else is skipped and possibly warned about.
174  * Return zero to skip, non-zero to examine.
175  */
176 static int
177 flist_fts_check(struct sess *sess, FTSENT *ent)
178 {
179 
180 	if (ent->fts_info == FTS_F  ||
181 	    ent->fts_info == FTS_D ||
182 	    ent->fts_info == FTS_SL ||
183 	    ent->fts_info == FTS_SLNONE)
184 		return 1;
185 
186 	if (ent->fts_info == FTS_DC) {
187 		WARNX(sess, "%s: directory cycle", ent->fts_path);
188 	} else if (ent->fts_info == FTS_DNR) {
189 		errno = ent->fts_errno;
190 		WARN(sess, "%s: unreadable directory", ent->fts_path);
191 	} else if (ent->fts_info == FTS_DOT) {
192 		WARNX(sess, "%s: skipping dot-file", ent->fts_path);
193 	} else if (ent->fts_info == FTS_ERR) {
194 		errno = ent->fts_errno;
195 		WARN(sess, "%s", ent->fts_path);
196 	} else if (ent->fts_info == FTS_DEFAULT) {
197 		WARNX(sess, "%s: skipping special", ent->fts_path);
198 	} else if (ent->fts_info == FTS_NS) {
199 		errno = ent->fts_errno;
200 		WARN(sess, "%s: could not stat", ent->fts_path);
201 	}
202 
203 	return 0;
204 }
205 
206 /*
207  * Copy necessary elements in "st" into the fields of "f".
208  */
209 static void
210 flist_copy_stat(struct flist *f, const struct stat *st)
211 {
212 	f->st.mode = st->st_mode;
213 	f->st.uid = st->st_uid;
214 	f->st.gid = st->st_gid;
215 	f->st.size = st->st_size;
216 	f->st.mtime = st->st_mtime;
217 }
218 
219 void
220 flist_free(struct flist *f, size_t sz)
221 {
222 	size_t	 i;
223 
224 	if (f == NULL)
225 		return;
226 
227 	for (i = 0; i < sz; i++) {
228 		free(f[i].path);
229 		free(f[i].link);
230 	}
231 	free(f);
232 }
233 
234 /*
235  * Serialise our file list (which may be zero-length) to the wire.
236  * Makes sure that the receiver isn't going to block on sending us
237  * return messages on the log channel.
238  * Return zero on failure, non-zero on success.
239  */
240 int
241 flist_send(struct sess *sess, int fdin, int fdout, const struct flist *fl,
242     size_t flsz)
243 {
244 	size_t		 i, sz, gidsz = 0;
245 	uint8_t		 flag;
246 	const struct flist *f;
247 	const char	*fn;
248 	struct ident	*gids = NULL;
249 	int		 rc = 0;
250 
251 	/* Double-check that we've no pending multiplexed data. */
252 
253 	LOG2(sess, "sending file metadata list: %zu", flsz);
254 
255 	for (i = 0; i < flsz; i++) {
256 		f = &fl[i];
257 		fn = f->wpath;
258 		sz = strlen(f->wpath);
259 		assert(sz > 0);
260 
261 		/*
262 		 * If applicable, unclog the read buffer.
263 		 * This happens when the receiver has a lot of log
264 		 * messages and all we're doing is sending our file list
265 		 * without checking for messages.
266 		 */
267 
268 		if (sess->mplex_reads &&
269 		    io_read_check(sess, fdin) &&
270 		     !io_read_flush(sess, fdin)) {
271 			ERRX1(sess, "io_read_flush");
272 			goto out;
273 		}
274 
275 		/*
276 		 * For ease, make all of our filenames be "long"
277 		 * regardless their actual length.
278 		 * This also makes sure that we don't transmit a zero
279 		 * byte unintentionally.
280 		 */
281 
282 		flag = FLIST_NAME_LONG;
283 
284 		LOG3(sess, "%s: sending file metadata: "
285 			"size %jd, mtime %jd, mode %o",
286 			fn, (intmax_t)f->st.size,
287 			(intmax_t)f->st.mtime, f->st.mode);
288 
289 		/* Now write to the wire. */
290 		/* FIXME: buffer this. */
291 
292 		if (!io_write_byte(sess, fdout, flag)) {
293 			ERRX1(sess, "io_write_byte");
294 			goto out;
295 		} else if (!io_write_int(sess, fdout, sz)) {
296 			ERRX1(sess, "io_write_int");
297 			goto out;
298 		} else if (!io_write_buf(sess, fdout, fn, sz)) {
299 			ERRX1(sess, "io_write_buf");
300 			goto out;
301 		} else if (!io_write_long(sess, fdout, f->st.size)) {
302 			ERRX1(sess, "io_write_long");
303 			goto out;
304 		} else if (!io_write_int(sess, fdout, f->st.mtime)) {
305 			ERRX1(sess, "io_write_int");
306 			goto out;
307 		} else if (!io_write_int(sess, fdout, f->st.mode)) {
308 			ERRX1(sess, "io_write_int");
309 			goto out;
310 		}
311 
312 		/* Conditional part: gid. */
313 
314 		if (sess->opts->preserve_gids) {
315 			if (!io_write_int(sess, fdout, f->st.gid)) {
316 				ERRX1(sess, "io_write_int");
317 				goto out;
318 			}
319 			if (!idents_gid_add(sess, &gids, &gidsz, f->st.gid)) {
320 				ERRX1(sess, "idents_gid_add");
321 				goto out;
322 			}
323 		}
324 
325 		/* Conditional part: link. */
326 
327 		if (S_ISLNK(f->st.mode) &&
328 		    sess->opts->preserve_links) {
329 			fn = f->link;
330 			sz = strlen(f->link);
331 			if (!io_write_int(sess, fdout, sz)) {
332 				ERRX1(sess, "io_write_int");
333 				goto out;
334 			}
335 			if (!io_write_buf(sess, fdout, fn, sz)) {
336 				ERRX1(sess, "io_write_int");
337 				goto out;
338 			}
339 		}
340 
341 		if (S_ISREG(f->st.mode))
342 			sess->total_size += f->st.size;
343 	}
344 
345 	/* Signal end of file list. */
346 
347 	if (!io_write_byte(sess, fdout, 0)) {
348 		ERRX1(sess, "io_write_byte");
349 		goto out;
350 	}
351 
352 	/* Conditionally write gid list and terminator. */
353 
354 	if (sess->opts->preserve_gids) {
355 		LOG2(sess, "sending gid list: %zu", gidsz);
356 		if (!idents_send(sess, fdout, gids, gidsz)) {
357 			ERRX1(sess, "idents_send");
358 			goto out;
359 		}
360 	}
361 
362 	rc = 1;
363 out:
364 	idents_free(gids, gidsz);
365 	return rc;
366 }
367 
368 /*
369  * Read the filename of a file list.
370  * This is the most expensive part of the file list transfer, so a lot
371  * of attention has gone into transmitting as little as possible.
372  * Micro-optimisation, but whatever.
373  * Fills in "f" with the full path on success.
374  * Returns zero on failure, non-zero on success.
375  */
376 static int
377 flist_recv_name(struct sess *sess, int fd, struct flist *f, uint8_t flags,
378     char last[MAXPATHLEN])
379 {
380 	uint8_t		 bval;
381 	size_t		 partial = 0;
382 	size_t		 pathlen = 0, len;
383 
384 	/*
385 	 * Read our filename.
386 	 * If we have FLIST_NAME_SAME, we inherit some of the last
387 	 * transmitted name.
388 	 * If we have FLIST_NAME_LONG, then the string length is greater
389 	 * than byte-size.
390 	 */
391 
392 	if (FLIST_NAME_SAME & flags) {
393 		if (!io_read_byte(sess, fd, &bval)) {
394 			ERRX1(sess, "io_read_byte");
395 			return 0;
396 		}
397 		partial = bval;
398 	}
399 
400 	/* Get the (possibly-remaining) filename length. */
401 
402 	if (FLIST_NAME_LONG & flags) {
403 		if (!io_read_size(sess, fd, &pathlen)) {
404 			ERRX1(sess, "io_read_size");
405 			return 0;
406 		}
407 	} else {
408 		if (!io_read_byte(sess, fd, &bval)) {
409 			ERRX1(sess, "io_read_byte");
410 			return 0;
411 		}
412 		pathlen = bval;
413 	}
414 
415 	/* Allocate our full filename length. */
416 	/* FIXME: maximum pathname length. */
417 
418 	if ((len = pathlen + partial) == 0) {
419 		ERRX(sess, "security violation: "
420 			"zero-length pathname");
421 		return 0;
422 	}
423 
424 	if ((f->path = malloc(len + 1)) == NULL) {
425 		ERR(sess, "malloc");
426 		return 0;
427 	}
428 	f->path[len] = '\0';
429 
430 	if (FLIST_NAME_SAME & flags)
431 		memcpy(f->path, last, partial);
432 
433 	if (!io_read_buf(sess, fd, f->path + partial, pathlen)) {
434 		ERRX1(sess, "io_read_buf");
435 		return 0;
436 	}
437 
438 	if (f->path[0] == '/') {
439 		ERRX(sess, "security violation: "
440 			"absolute pathname: %s", f->path);
441 		return 0;
442 	}
443 
444 	if (strstr(f->path, "/../") != NULL ||
445 	    (len > 2 && strcmp(f->path + len - 3, "/..") == 0) ||
446 	    (len > 2 && strncmp(f->path, "../", 3) == 0) ||
447 	    strcmp(f->path, "..") == 0) {
448 		ERRX(sess, "%s: security violation: "
449 			"backtracking pathname", f->path);
450 		return 0;
451 	}
452 
453 	/* Record our last path and construct our filename. */
454 
455 	strlcpy(last, f->path, MAXPATHLEN);
456 	f->wpath = f->path;
457 	return 1;
458 }
459 
460 /*
461  * Reallocate a file list in chunks of FLIST_CHUNK_SIZE;
462  * Returns zero on failure, non-zero on success.
463  */
464 static int
465 flist_realloc(struct sess *sess, struct flist **fl, size_t *sz, size_t *max)
466 {
467 	void	*pp;
468 
469 	if (*sz + 1 <= *max)  {
470 		(*sz)++;
471 		return 1;
472 	}
473 
474 	pp = recallocarray(*fl, *max,
475 		*max + FLIST_CHUNK_SIZE, sizeof(struct flist));
476 	if (pp == NULL) {
477 		ERR(sess, "recallocarray");
478 		return 0;
479 	}
480 	*fl = pp;
481 	*max += FLIST_CHUNK_SIZE;
482 	(*sz)++;
483 	return 1;
484 }
485 
486 /*
487  * Copy a regular or symbolic link file "path" into "f".
488  * This handles the correct path creation and symbolic linking.
489  * Returns zero on failure, non-zero on success.
490  */
491 static int
492 flist_append(struct sess *sess, struct flist *f, struct stat *st,
493     const char *path)
494 {
495 
496 	/*
497 	 * Copy the full path for local addressing and transmit
498 	 * only the filename part for the receiver.
499 	 */
500 
501 	if ((f->path = strdup(path)) == NULL) {
502 		ERR(sess, "strdup");
503 		return 0;
504 	}
505 
506 	if ((f->wpath = strrchr(f->path, '/')) == NULL)
507 		f->wpath = f->path;
508 	else
509 		f->wpath++;
510 
511 	/*
512 	 * On the receiving end, we'll strip out all bits on the
513 	 * mode except for the file permissions.
514 	 * No need to warn about it here.
515 	 */
516 
517 	flist_copy_stat(f, st);
518 
519 	/* Optionally copy link information. */
520 
521 	if (S_ISLNK(st->st_mode)) {
522 		f->link = symlink_read(sess, f->path);
523 		if (f->link == NULL) {
524 			ERRX1(sess, "symlink_read");
525 			return 0;
526 		}
527 	}
528 
529 	return 1;
530 }
531 
532 /*
533  * Receive a file list from the wire, filling in length "sz" (which may
534  * possibly be zero) and list "flp" on success.
535  * Return zero on failure, non-zero on success.
536  */
537 int
538 flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz)
539 {
540 	struct flist	*fl = NULL;
541 	struct flist	*ff;
542 	const struct flist *fflast = NULL;
543 	size_t		 flsz = 0, flmax = 0, lsz, gidsz = 0;
544 	uint8_t		 flag;
545 	char		 last[MAXPATHLEN];
546 	uint64_t	 lval; /* temporary values... */
547 	int32_t		 ival;
548 	struct ident	*gids = NULL;
549 
550 	last[0] = '\0';
551 
552 	for (;;) {
553 		if (!io_read_byte(sess, fd, &flag)) {
554 			ERRX1(sess, "io_read_byte");
555 			goto out;
556 		} else if (flag == 0)
557 			break;
558 
559 		if (!flist_realloc(sess, &fl, &flsz, &flmax)) {
560 			ERRX1(sess, "flist_realloc");
561 			goto out;
562 		}
563 
564 		ff = &fl[flsz - 1];
565 		fflast = flsz > 1 ? &fl[flsz - 2] : NULL;
566 
567 		/* Filename first. */
568 
569 		if (!flist_recv_name(sess, fd, ff, flag, last)) {
570 			ERRX1(sess, "flist_recv_name");
571 			goto out;
572 		}
573 
574 		/* Read the file size. */
575 
576 		if (!io_read_ulong(sess, fd, &lval)) {
577 			ERRX1(sess, "io_read_ulong");
578 			goto out;
579 		}
580 		ff->st.size = lval;
581 
582 		/* Read the modification time. */
583 
584 		if (!(FLIST_TIME_SAME & flag)) {
585 			if (!io_read_int(sess, fd, &ival)) {
586 				ERRX1(sess, "io_read_int");
587 				goto out;
588 			}
589 			ff->st.mtime = ival;
590 		} else if (fflast == NULL) {
591 			ERRX(sess, "same time without last entry");
592 			goto out;
593 		}  else
594 			ff->st.mtime = fflast->st.mtime;
595 
596 		/* Read the file mode. */
597 
598 		if (!(FLIST_MODE_SAME & flag)) {
599 			if (!io_read_int(sess, fd, &ival)) {
600 				ERRX1(sess, "io_read_int");
601 				goto out;
602 			}
603 			ff->st.mode = ival;
604 		} else if (fflast == NULL) {
605 			ERRX(sess, "same mode without last entry");
606 			goto out;
607 		} else
608 			ff->st.mode = fflast->st.mode;
609 
610 		/* Conditional part: gid. */
611 
612 		if (sess->opts->preserve_gids) {
613 			if (!(FLIST_GID_SAME & flag)) {
614 				if (!io_read_int(sess, fd, &ival)) {
615 					ERRX1(sess, "io_read_int");
616 					goto out;
617 				}
618 				ff->st.gid = ival;
619 			} else if (fflast == NULL) {
620 				ERRX(sess, "same gid "
621 					"without last entry");
622 				goto out;
623 			} else
624 				ff->st.gid = fflast->st.gid;
625 		}
626 
627 		/* Conditional part: link. */
628 
629 		if (S_ISLNK(ff->st.mode) &&
630 		    sess->opts->preserve_links) {
631 			if (!io_read_size(sess, fd, &lsz)) {
632 				ERRX1(sess, "io_read_size");
633 				goto out;
634 			} else if (lsz == 0) {
635 				ERRX(sess, "empty link name");
636 				goto out;
637 			}
638 			ff->link = calloc(lsz + 1, 1);
639 			if (ff->link == NULL) {
640 				ERR(sess, "calloc");
641 				goto out;
642 			}
643 			if (!io_read_buf(sess, fd, ff->link, lsz)) {
644 				ERRX1(sess, "io_read_buf");
645 				goto out;
646 			}
647 		}
648 
649 		LOG3(sess, "%s: received file metadata: "
650 			"size %jd, mtime %jd, mode %o",
651 			ff->path, (intmax_t)ff->st.size,
652 			(intmax_t)ff->st.mtime, ff->st.mode);
653 
654 		if (S_ISREG(ff->st.mode))
655 			sess->total_size += ff->st.size;
656 	}
657 
658 	/*
659 	 * Now conditionally read the group list.
660 	 * We then remap all group identifiers to the local ids.
661 	 */
662 
663 	if (sess->opts->preserve_gids) {
664 		if (!idents_recv(sess, fd, &gids, &gidsz)) {
665 			ERRX1(sess, "idents_recv");
666 			goto out;
667 		}
668 		LOG2(sess, "received gid list: %zu", gidsz);
669 	}
670 
671 	/* Remember to order the received list. */
672 
673 	LOG2(sess, "received file metadata list: %zu", flsz);
674 	qsort(fl, flsz, sizeof(struct flist), flist_cmp);
675 	flist_topdirs(sess, fl, flsz);
676 	*sz = flsz;
677 	*flp = fl;
678 
679 	/* Lastly, remap and reassign group identifiers. */
680 
681 	if (sess->opts->preserve_gids) {
682 		idents_gid_remap(sess, gids, gidsz);
683 		idents_gid_assign(sess, fl, flsz, gids, gidsz);
684 	}
685 
686 	idents_free(gids, gidsz);
687 	return 1;
688 out:
689 	flist_free(fl, flsz);
690 	idents_free(gids, gidsz);
691 	*sz = 0;
692 	*flp = NULL;
693 	return 0;
694 }
695 
696 /*
697  * Generate a flist possibly-recursively given a file root, which may
698  * also be a regular file or symlink.
699  * On success, augments the generated list in "flp" of length "sz".
700  * Returns zero on failure, non-zero on success.
701  */
702 static int
703 flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,
704     size_t *max)
705 {
706 	char		*cargv[2], *cp;
707 	int		 rc = 0;
708 	FTS		*fts;
709 	FTSENT		*ent;
710 	struct flist	*f;
711 	size_t		 flsz = 0, stripdir;
712 	struct stat	 st;
713 
714 	cargv[0] = root;
715 	cargv[1] = NULL;
716 
717 	/*
718 	 * If we're a file, then revert to the same actions we use for
719 	 * the non-recursive scan.
720 	 */
721 
722 	if (lstat(root, &st) == -1) {
723 		ERR(sess, "%s: lstat", root);
724 		return 0;
725 	} else if (S_ISREG(st.st_mode)) {
726 		if (!flist_realloc(sess, fl, sz, max)) {
727 			ERRX1(sess, "flist_realloc");
728 			return 0;
729 		}
730 		f = &(*fl)[(*sz) - 1];
731 		assert(f != NULL);
732 
733 		if (!flist_append(sess, f, &st, root)) {
734 			ERRX1(sess, "flist_append");
735 			return 0;
736 		}
737 		if (unveil(root, "r") == -1) {
738 			ERR(sess, "%s: unveil", root);
739 			return 0;
740 		}
741 		return 1;
742 	} else if (S_ISLNK(st.st_mode)) {
743 		if (!sess->opts->preserve_links) {
744 			WARNX(sess, "%s: skipping symlink", root);
745 			return 1;
746 		} else if (!flist_realloc(sess, fl, sz, max)) {
747 			ERRX1(sess, "flist_realloc");
748 			return 0;
749 		}
750 		f = &(*fl)[(*sz) - 1];
751 		assert(f != NULL);
752 
753 		if (!flist_append(sess, f, &st, root)) {
754 			ERRX1(sess, "flist_append");
755 			return 0;
756 		}
757 		if (unveil(root, "r") == -1) {
758 			ERR(sess, "%s: unveil", root);
759 			return 0;
760 		}
761 		return 1;
762 	} else if (!S_ISDIR(st.st_mode)) {
763 		WARNX(sess, "%s: skipping special", root);
764 		return 1;
765 	}
766 
767 	/*
768 	 * If we end with a slash, it means that we're not supposed to
769 	 * copy the directory part itself---only the contents.
770 	 * So set "stripdir" to be what we take out.
771 	 */
772 
773 	stripdir = strlen(root);
774 	assert(stripdir > 0);
775 	if (root[stripdir - 1] != '/')
776 		stripdir = 0;
777 
778 	/*
779 	 * If we're not stripping anything, then see if we need to strip
780 	 * out the leading material in the path up to and including the
781 	 * last directory component.
782 	 */
783 
784 	if (stripdir == 0)
785 		if ((cp = strrchr(root, '/')) != NULL)
786 			stripdir = cp - root + 1;
787 
788 	/*
789 	 * If we're recursive, then we need to take down all of the
790 	 * files and directory components, so use fts(3).
791 	 * Copying the information file-by-file into the flstat.
792 	 * We'll make sense of it in flist_send.
793 	 */
794 
795 	if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
796 		ERR(sess, "fts_open");
797 		return 0;
798 	}
799 
800 	errno = 0;
801 	while ((ent = fts_read(fts)) != NULL) {
802 		if (!flist_fts_check(sess, ent)) {
803 			errno = 0;
804 			continue;
805 		}
806 
807 		/* We don't allow symlinks without -l. */
808 
809 		assert(ent->fts_statp != NULL);
810 		if (S_ISLNK(ent->fts_statp->st_mode) &&
811 		    !sess->opts->preserve_links) {
812 			WARNX(sess, "%s: skipping "
813 				"symlink", ent->fts_path);
814 			continue;
815 		}
816 
817 		/* Allocate a new file entry. */
818 
819 		if (!flist_realloc(sess, fl, sz, max)) {
820 			ERRX1(sess, "flist_realloc");
821 			goto out;
822 		}
823 		flsz++;
824 		f = &(*fl)[*sz - 1];
825 
826 		/* Our path defaults to "." for the root. */
827 
828 		if ('\0' == ent->fts_path[stripdir]) {
829 			if (asprintf(&f->path, "%s.", ent->fts_path) < 0) {
830 				ERR(sess, "asprintf");
831 				f->path = NULL;
832 				goto out;
833 			}
834 		} else {
835 			if ((f->path = strdup(ent->fts_path)) == NULL) {
836 				ERR(sess, "strdup");
837 				goto out;
838 			}
839 		}
840 
841 		f->wpath = f->path + stripdir;
842 		flist_copy_stat(f, ent->fts_statp);
843 
844 		/* Optionally copy link information. */
845 
846 		if (S_ISLNK(ent->fts_statp->st_mode)) {
847 			f->link = symlink_read(sess, f->path);
848 			if (f->link == NULL) {
849 				ERRX1(sess, "symlink_read");
850 				goto out;
851 			}
852 		}
853 
854 		/* Reset errno for next fts_read() call. */
855 		errno = 0;
856 	}
857 	if (errno) {
858 		ERR(sess, "fts_read");
859 		goto out;
860 	}
861 	if (unveil(root, "r") == -1) {
862 		ERR(sess, "%s: unveil", root);
863 		goto out;
864 	}
865 
866 	LOG3(sess, "generated %zu filenames: %s", flsz, root);
867 	rc = 1;
868 out:
869 	fts_close(fts);
870 	return rc;
871 }
872 
873 /*
874  * Generate a flist recursively given the array of directories (or
875  * files, symlinks, doesn't matter) specified in argv (argc >0).
876  * On success, stores the generated list in "flp" with length "sz",
877  * which may be zero.
878  * Returns zero on failure, non-zero on success.
879  */
880 static int
881 flist_gen_dirs(struct sess *sess, size_t argc, char **argv, struct flist **flp,
882     size_t *sz)
883 {
884 	size_t		 i, max = 0;
885 
886 	for (i = 0; i < argc; i++)
887 		if (!flist_gen_dirent(sess, argv[i], flp, sz, &max))
888 			break;
889 
890 	if (i == argc) {
891 		LOG2(sess, "recursively generated %zu filenames", *sz);
892 		return 1;
893 	}
894 
895 	ERRX1(sess, "flist_gen_dirent");
896 	flist_free(*flp, max);
897 	*flp = NULL;
898 	*sz = 0;
899 	return 0;
900 }
901 
902 /*
903  * Generate list of files from the command-line argc (>0) and argv.
904  * On success, stores the generated list in "flp" with length "sz",
905  * which may be zero.
906  * Returns zero on failure, non-zero on success.
907  */
908 static int
909 flist_gen_files(struct sess *sess, size_t argc, char **argv,
910     struct flist **flp, size_t *sz)
911 {
912 	struct flist	*fl = NULL, *f;
913 	size_t		 i, flsz = 0;
914 	struct stat	 st;
915 
916 	assert(argc);
917 
918 	if ((fl = calloc(argc, sizeof(struct flist))) == NULL) {
919 		ERR(sess, "calloc");
920 		return 0;
921 	}
922 
923 	for (i = 0; i < argc; i++) {
924 		if ('\0' == argv[i][0])
925 			continue;
926 		if (lstat(argv[i], &st) == -1) {
927 			ERR(sess, "%s: lstat", argv[i]);
928 			goto out;
929 		}
930 
931 		/*
932 		 * File type checks.
933 		 * In non-recursive mode, we don't accept directories.
934 		 * We also skip symbolic links without -l.
935 		 * Beyond that, we only accept regular files.
936 		 */
937 
938 		if (S_ISDIR(st.st_mode)) {
939 			WARNX(sess, "%s: skipping directory", argv[i]);
940 			continue;
941 		} else if (S_ISLNK(st.st_mode)) {
942 			if (!sess->opts->preserve_links) {
943 				WARNX(sess, "%s: skipping "
944 					"symlink", argv[i]);
945 				continue;
946 			}
947 		} else if (!S_ISREG(st.st_mode)) {
948 			WARNX(sess, "%s: skipping special", argv[i]);
949 			continue;
950 		}
951 
952 
953 		f = &fl[flsz++];
954 		assert(f != NULL);
955 
956 		/* Add this file to our file-system worldview. */
957 
958 		if (unveil(argv[i], "r") == -1) {
959 			ERR(sess, "%s: unveil", argv[i]);
960 			goto out;
961 		}
962 		if (!flist_append(sess, f, &st, argv[i])) {
963 			ERRX1(sess, "flist_append");
964 			goto out;
965 		}
966 	}
967 
968 	LOG2(sess, "non-recursively generated %zu filenames", flsz);
969 	*sz = flsz;
970 	*flp = fl;
971 	return 1;
972 out:
973 	flist_free(fl, argc);
974 	*sz = 0;
975 	*flp = NULL;
976 	return 0;
977 }
978 
979 /*
980  * Generate a sorted, de-duplicated list of file metadata.
981  * In non-recursive mode (the default), we use only the files we're
982  * given.
983  * Otherwise, directories are recursively examined.
984  * Returns zero on failure, non-zero on success.
985  * On success, "fl" will need to be freed with flist_free().
986  */
987 int
988 flist_gen(struct sess *sess, size_t argc, char **argv, struct flist **flp,
989     size_t *sz)
990 {
991 	int	 rc;
992 
993 	assert(argc > 0);
994 	rc = sess->opts->recursive ?
995 		flist_gen_dirs(sess, argc, argv, flp, sz) :
996 		flist_gen_files(sess, argc, argv, flp, sz);
997 
998 	/* After scanning, lock our file-system view. */
999 
1000 	if (unveil(NULL, NULL) == -1) {
1001 		ERR(sess, "unveil");
1002 		return 0;
1003 	}
1004 	if (!rc)
1005 		return 0;
1006 
1007 	qsort(*flp, *sz, sizeof(struct flist), flist_cmp);
1008 
1009 	if (flist_dedupe(sess, flp, sz)) {
1010 		flist_topdirs(sess, *flp, *sz);
1011 		return 1;
1012 	}
1013 
1014 	ERRX1(sess, "flist_dedupe");
1015 	flist_free(*flp, *sz);
1016 	*flp = NULL;
1017 	*sz = 0;
1018 	return 0;
1019 }
1020 
1021 /*
1022  * Generate a list of files in root to delete that are within the
1023  * top-level directories stipulated by "wfl".
1024  * Only handles symbolic links, directories, and regular files.
1025  * Returns zero on failure (fl and flsz will be NULL and zero), non-zero
1026  * on success.
1027  * On success, "fl" will need to be freed with flist_free().
1028  */
1029 int
1030 flist_gen_dels(struct sess *sess, const char *root, struct flist **fl,
1031     size_t *sz,	const struct flist *wfl, size_t wflsz)
1032 {
1033 	char		**cargv = NULL;
1034 	int		  rc = 0, c;
1035 	FTS		 *fts = NULL;
1036 	FTSENT		 *ent;
1037 	struct flist	 *f;
1038 	size_t		  cargvs = 0, i, j, max = 0, stripdir;
1039 	ENTRY		  hent;
1040 	ENTRY		 *hentp;
1041 
1042 	*fl = NULL;
1043 	*sz = 0;
1044 
1045 	/* Only run this code when we're recursive. */
1046 
1047 	if (!sess->opts->recursive)
1048 		return 1;
1049 
1050 	/*
1051 	 * Gather up all top-level directories for scanning.
1052 	 * This is stipulated by rsync's --delete behaviour, where we
1053 	 * only delete things in the top-level directories given on the
1054 	 * command line.
1055 	 */
1056 
1057 	assert(wflsz > 0);
1058 	for (i = 0; i < wflsz; i++)
1059 		if (FLSTAT_TOP_DIR & wfl[i].st.flags)
1060 			cargvs++;
1061 	if (cargvs == 0)
1062 		return 1;
1063 
1064 	if ((cargv = calloc(cargvs + 1, sizeof(char *))) == NULL) {
1065 		ERR(sess, "calloc");
1066 		return 0;
1067 	}
1068 
1069 	/*
1070 	 * If we're given just a "." as the first entry, that means
1071 	 * we're doing a relative copy with a trailing slash.
1072 	 * Special-case this just for the sake of simplicity.
1073 	 * Otherwise, look through all top-levels.
1074 	 */
1075 
1076 	if (wflsz && strcmp(wfl[0].wpath, ".") == 0) {
1077 		assert(cargvs == 1);
1078 		assert(S_ISDIR(wfl[0].st.mode));
1079 		if (asprintf(&cargv[0], "%s/", root) < 0) {
1080 			ERR(sess, "asprintf");
1081 			cargv[0] = NULL;
1082 			goto out;
1083 		}
1084 		cargv[1] = NULL;
1085 	} else {
1086 		for (i = j = 0; i < wflsz; i++) {
1087 			if (!(FLSTAT_TOP_DIR & wfl[i].st.flags))
1088 				continue;
1089 			assert(S_ISDIR(wfl[i].st.mode));
1090 			assert(strcmp(wfl[i].wpath, "."));
1091 			c = asprintf(&cargv[j], "%s/%s", root, wfl[i].wpath);
1092 			if (c < 0) {
1093 				ERR(sess, "asprintf");
1094 				cargv[j] = NULL;
1095 				goto out;
1096 			}
1097 			LOG4(sess, "%s: will scan "
1098 				"for deletions", cargv[j]);
1099 			j++;
1100 		}
1101 		assert(j == cargvs);
1102 		cargv[j] = NULL;
1103 	}
1104 
1105 	LOG2(sess, "delete from %zu directories", cargvs);
1106 
1107 	/*
1108 	 * Next, use the standard hcreate(3) hashtable interface to hash
1109 	 * all of the files that we want to synchronise.
1110 	 * This way, we'll be able to determine which files we want to
1111 	 * delete in O(n) time instead of O(n * search) time.
1112 	 * Plus, we can do the scan in-band and only allocate the files
1113 	 * we want to delete.
1114 	 */
1115 
1116 	if (!hcreate(wflsz)) {
1117 		ERR(sess, "hcreate");
1118 		goto out;
1119 	}
1120 
1121 	for (i = 0; i < wflsz; i++) {
1122 		memset(&hent, 0, sizeof(ENTRY));
1123 		if ((hent.key = strdup(wfl[i].wpath)) == NULL) {
1124 			ERR(sess, "strdup");
1125 			goto out;
1126 		}
1127 		if ((hentp = hsearch(hent, ENTER)) == NULL) {
1128 			ERR(sess, "hsearch");
1129 			goto out;
1130 		} else if (hentp->key != hent.key) {
1131 			ERRX(sess, "%s: duplicate", wfl[i].wpath);
1132 			free(hent.key);
1133 			goto out;
1134 		}
1135 	}
1136 
1137 	/*
1138 	 * Now we're going to try to descend into all of the top-level
1139 	 * directories stipulated by the file list.
1140 	 * If the directories don't exist, it's ok.
1141 	 */
1142 
1143 	if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) {
1144 		ERR(sess, "fts_open");
1145 		goto out;
1146 	}
1147 
1148 	stripdir = strlen(root) + 1;
1149 	errno = 0;
1150 	while ((ent = fts_read(fts)) != NULL) {
1151 		if (ent->fts_info == FTS_NS)
1152 			continue;
1153 		if (!flist_fts_check(sess, ent)) {
1154 			errno = 0;
1155 			continue;
1156 		} else if (stripdir >= ent->fts_pathlen)
1157 			continue;
1158 
1159 		/* Look up in hashtable. */
1160 
1161 		memset(&hent, 0, sizeof(ENTRY));
1162 		hent.key = ent->fts_path + stripdir;
1163 		if (hsearch(hent, FIND) != NULL)
1164 			continue;
1165 
1166 		/* Not found: we'll delete it. */
1167 
1168 		if (!flist_realloc(sess, fl, sz, &max)) {
1169 			ERRX1(sess, "flist_realloc");
1170 			goto out;
1171 		}
1172 		f = &(*fl)[*sz - 1];
1173 
1174 		if ((f->path = strdup(ent->fts_path)) == NULL) {
1175 			ERR(sess, "strdup");
1176 			goto out;
1177 		}
1178 		f->wpath = f->path + stripdir;
1179 		assert(ent->fts_statp != NULL);
1180 		flist_copy_stat(f, ent->fts_statp);
1181 		errno = 0;
1182 	}
1183 
1184 	if (errno) {
1185 		ERR(sess, "fts_read");
1186 		goto out;
1187 	}
1188 
1189 	qsort(*fl, *sz, sizeof(struct flist), flist_cmp);
1190 	rc = 1;
1191 out:
1192 	if (fts != NULL)
1193 		fts_close(fts);
1194 	for (i = 0; i < cargvs; i++)
1195 		free(cargv[i]);
1196 	free(cargv);
1197 	hdestroy();
1198 	return rc;
1199 }
1200 
1201 /*
1202  * Delete all files and directories in "fl".
1203  * If called with a zero-length "fl", does nothing.
1204  * If dry_run is specified, simply write what would be done.
1205  * Return zero on failure, non-zero on success.
1206  */
1207 int
1208 flist_del(struct sess *sess, int root, const struct flist *fl, size_t flsz)
1209 {
1210 	ssize_t	 i;
1211 	int	 flag;
1212 
1213 	if (flsz == 0)
1214 		return 1;
1215 
1216 	assert(sess->opts->del);
1217 	assert(sess->opts->recursive);
1218 
1219 	for (i = flsz - 1; i >= 0; i--) {
1220 		LOG1(sess, "%s: deleting", fl[i].wpath);
1221 		if (sess->opts->dry_run)
1222 			continue;
1223 		assert(root != -1);
1224 		flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0;
1225 		if (unlinkat(root, fl[i].wpath, flag) == -1 &&
1226 		    errno != ENOENT) {
1227 			ERR(sess, "%s: unlinkat", fl[i].wpath);
1228 			return 0;
1229 		}
1230 	}
1231 
1232 	return 1;
1233 }
1234