1 /* $Id: flist.c,v 1.24 2019/04/04 04:19:54 bket Exp $ */ 2 /* 3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2019 Florian Obser <florian@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/param.h> 19 #include <sys/stat.h> 20 21 #include <assert.h> 22 #include <errno.h> 23 #include <fcntl.h> 24 #include <fts.h> 25 #include <inttypes.h> 26 #include <search.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <unistd.h> 31 32 #include "extern.h" 33 34 /* 35 * We allocate our file list in chunk sizes so as not to do it one by 36 * one. 37 * Preferrably we get one or two allocation. 38 */ 39 #define FLIST_CHUNK_SIZE (1024) 40 41 /* 42 * These flags are part of the rsync protocol. 43 * They are sent as the first byte for a file transmission and encode 44 * information that affects subsequent transmissions. 45 */ 46 #define FLIST_TOP_LEVEL 0x0001 /* needed for remote --delete */ 47 #define FLIST_MODE_SAME 0x0002 /* mode is repeat */ 48 #define FLIST_RDEV_SAME 0x0004 /* rdev is repeat */ 49 #define FLIST_UID_SAME 0x0008 /* uid is repeat */ 50 #define FLIST_GID_SAME 0x0010 /* gid is repeat */ 51 #define FLIST_NAME_SAME 0x0020 /* name is repeat */ 52 #define FLIST_NAME_LONG 0x0040 /* name >255 bytes */ 53 #define FLIST_TIME_SAME 0x0080 /* time is repeat */ 54 55 /* 56 * Requied way to sort a filename list. 57 */ 58 static int 59 flist_cmp(const void *p1, const void *p2) 60 { 61 const struct flist *f1 = p1, *f2 = p2; 62 63 return strcmp(f1->wpath, f2->wpath); 64 } 65 66 /* 67 * Deduplicate our file list (which may be zero-length). 68 * Returns zero on failure, non-zero on success. 69 */ 70 static int 71 flist_dedupe(struct sess *sess, struct flist **fl, size_t *sz) 72 { 73 size_t i, j; 74 struct flist *new; 75 struct flist *f, *fnext; 76 77 if (*sz == 0) 78 return 1; 79 80 /* Create a new buffer, "new", and copy. */ 81 82 new = calloc(*sz, sizeof(struct flist)); 83 if (new == NULL) { 84 ERR(sess, "calloc"); 85 return 0; 86 } 87 88 for (i = j = 0; i < *sz - 1; i++) { 89 f = &(*fl)[i]; 90 fnext = &(*fl)[i + 1]; 91 92 if (strcmp(f->wpath, fnext->wpath)) { 93 new[j++] = *f; 94 continue; 95 } 96 97 /* 98 * Our working (destination) paths are the same. 99 * If the actual file is the same (as given on the 100 * command-line), then we can just discard the first. 101 * Otherwise, we need to bail out: it means we have two 102 * different files with the relative path on the 103 * destination side. 104 */ 105 106 if (strcmp(f->path, fnext->path) == 0) { 107 new[j++] = *f; 108 i++; 109 WARNX(sess, "%s: duplicate path: %s", 110 f->wpath, f->path); 111 free(fnext->path); 112 free(fnext->link); 113 fnext->path = fnext->link = NULL; 114 continue; 115 } 116 117 ERRX(sess, "%s: duplicate working path for " 118 "possibly different file: %s, %s", 119 f->wpath, f->path, fnext->path); 120 free(new); 121 return 0; 122 } 123 124 /* Don't forget the last entry. */ 125 126 if (i == *sz - 1) 127 new[j++] = (*fl)[i]; 128 129 /* 130 * Reassign to the deduplicated array. 131 * If we started out with *sz > 0, which we check for at the 132 * beginning, then we'll always continue having *sz > 0. 133 */ 134 135 free(*fl); 136 *fl = new; 137 *sz = j; 138 assert(*sz); 139 return 1; 140 } 141 142 /* 143 * We're now going to find our top-level directories. 144 * This only applies to recursive mode. 145 * If we have the first element as the ".", then that's the "top 146 * directory" of our transfer. 147 * Otherwise, mark up all top-level directories in the set. 148 * XXX: the FLIST_TOP_LEVEL flag should indicate what is and what isn't 149 * a top-level directory, but I'm not sure if GPL rsync(1) respects it 150 * the same way. 151 */ 152 static void 153 flist_topdirs(struct sess *sess, struct flist *fl, size_t flsz) 154 { 155 size_t i; 156 const char *cp; 157 158 if (!sess->opts->recursive) 159 return; 160 161 if (flsz && strcmp(fl[0].wpath, ".")) { 162 for (i = 0; i < flsz; i++) { 163 if (!S_ISDIR(fl[i].st.mode)) 164 continue; 165 cp = strchr(fl[i].wpath, '/'); 166 if (cp != NULL && cp[1] != '\0') 167 continue; 168 fl[i].st.flags |= FLSTAT_TOP_DIR; 169 LOG4(sess, "%s: top-level", fl[i].wpath); 170 } 171 } else if (flsz) { 172 fl[0].st.flags |= FLSTAT_TOP_DIR; 173 LOG4(sess, "%s: top-level", fl[0].wpath); 174 } 175 } 176 177 /* 178 * Filter through the fts() file information. 179 * We want directories (pre-order), regular files, and symlinks. 180 * Everything else is skipped and possibly warned about. 181 * Return zero to skip, non-zero to examine. 182 */ 183 static int 184 flist_fts_check(struct sess *sess, FTSENT *ent) 185 { 186 187 if (ent->fts_info == FTS_F || 188 ent->fts_info == FTS_D || 189 ent->fts_info == FTS_SL || 190 ent->fts_info == FTS_SLNONE) 191 return 1; 192 193 if (ent->fts_info == FTS_DC) { 194 WARNX(sess, "%s: directory cycle", ent->fts_path); 195 } else if (ent->fts_info == FTS_DNR) { 196 errno = ent->fts_errno; 197 WARN(sess, "%s: unreadable directory", ent->fts_path); 198 } else if (ent->fts_info == FTS_DOT) { 199 WARNX(sess, "%s: skipping dot-file", ent->fts_path); 200 } else if (ent->fts_info == FTS_ERR) { 201 errno = ent->fts_errno; 202 WARN(sess, "%s", ent->fts_path); 203 } else if (ent->fts_info == FTS_DEFAULT) { 204 if ((sess->opts->devices && (S_ISBLK(ent->fts_statp->st_mode) || 205 S_ISCHR(ent->fts_statp->st_mode))) || 206 (sess->opts->specials && 207 (S_ISFIFO(ent->fts_statp->st_mode) || 208 S_ISSOCK(ent->fts_statp->st_mode)))) { 209 return 1; 210 } 211 WARNX(sess, "%s: skipping special", ent->fts_path); 212 } else if (ent->fts_info == FTS_NS) { 213 errno = ent->fts_errno; 214 WARN(sess, "%s: could not stat", ent->fts_path); 215 } 216 217 return 0; 218 } 219 220 /* 221 * Copy necessary elements in "st" into the fields of "f". 222 */ 223 static void 224 flist_copy_stat(struct flist *f, const struct stat *st) 225 { 226 f->st.mode = st->st_mode; 227 f->st.uid = st->st_uid; 228 f->st.gid = st->st_gid; 229 f->st.size = st->st_size; 230 f->st.mtime = st->st_mtime; 231 f->st.rdev = st->st_rdev; 232 } 233 234 void 235 flist_free(struct flist *f, size_t sz) 236 { 237 size_t i; 238 239 if (f == NULL) 240 return; 241 242 for (i = 0; i < sz; i++) { 243 free(f[i].path); 244 free(f[i].link); 245 } 246 free(f); 247 } 248 249 /* 250 * Serialise our file list (which may be zero-length) to the wire. 251 * Makes sure that the receiver isn't going to block on sending us 252 * return messages on the log channel. 253 * Return zero on failure, non-zero on success. 254 */ 255 int 256 flist_send(struct sess *sess, int fdin, int fdout, const struct flist *fl, 257 size_t flsz) 258 { 259 size_t i, sz, gidsz = 0, uidsz = 0; 260 uint8_t flag; 261 const struct flist *f; 262 const char *fn; 263 struct ident *gids = NULL, *uids = NULL; 264 int rc = 0; 265 266 /* Double-check that we've no pending multiplexed data. */ 267 268 LOG2(sess, "sending file metadata list: %zu", flsz); 269 270 for (i = 0; i < flsz; i++) { 271 f = &fl[i]; 272 fn = f->wpath; 273 sz = strlen(f->wpath); 274 assert(sz > 0); 275 assert(sz < INT32_MAX); 276 277 /* 278 * If applicable, unclog the read buffer. 279 * This happens when the receiver has a lot of log 280 * messages and all we're doing is sending our file list 281 * without checking for messages. 282 */ 283 284 if (sess->mplex_reads && 285 io_read_check(sess, fdin) && 286 !io_read_flush(sess, fdin)) { 287 ERRX1(sess, "io_read_flush"); 288 goto out; 289 } 290 291 /* 292 * For ease, make all of our filenames be "long" 293 * regardless their actual length. 294 * This also makes sure that we don't transmit a zero 295 * byte unintentionally. 296 */ 297 298 flag = FLIST_NAME_LONG; 299 if ((FLSTAT_TOP_DIR & f->st.flags)) 300 flag |= FLIST_TOP_LEVEL; 301 302 LOG3(sess, "%s: sending file metadata: " 303 "size %jd, mtime %jd, mode %o", 304 fn, (intmax_t)f->st.size, 305 (intmax_t)f->st.mtime, f->st.mode); 306 307 /* Now write to the wire. */ 308 /* FIXME: buffer this. */ 309 310 if (!io_write_byte(sess, fdout, flag)) { 311 ERRX1(sess, "io_write_byte"); 312 goto out; 313 } else if (!io_write_int(sess, fdout, sz)) { 314 ERRX1(sess, "io_write_int"); 315 goto out; 316 } else if (!io_write_buf(sess, fdout, fn, sz)) { 317 ERRX1(sess, "io_write_buf"); 318 goto out; 319 } else if (!io_write_long(sess, fdout, f->st.size)) { 320 ERRX1(sess, "io_write_long"); 321 goto out; 322 } else if (!io_write_uint(sess, fdout, (uint32_t)f->st.mtime)) { 323 ERRX1(sess, "io_write_uint"); 324 goto out; 325 } else if (!io_write_uint(sess, fdout, f->st.mode)) { 326 ERRX1(sess, "io_write_uint"); 327 goto out; 328 } 329 330 /* Conditional part: uid. */ 331 332 if (sess->opts->preserve_uids) { 333 if (!io_write_uint(sess, fdout, f->st.uid)) { 334 ERRX1(sess, "io_write_uint"); 335 goto out; 336 } 337 if (!idents_add(sess, 0, &uids, &uidsz, f->st.uid)) { 338 ERRX1(sess, "idents_add"); 339 goto out; 340 } 341 } 342 343 /* Conditional part: gid. */ 344 345 if (sess->opts->preserve_gids) { 346 if (!io_write_uint(sess, fdout, f->st.gid)) { 347 ERRX1(sess, "io_write_uint"); 348 goto out; 349 } 350 if (!idents_add(sess, 1, &gids, &gidsz, f->st.gid)) { 351 ERRX1(sess, "idents_add"); 352 goto out; 353 } 354 } 355 356 /* Conditional part: devices & special files. */ 357 358 if ((sess->opts->devices && (S_ISBLK(f->st.mode) || 359 S_ISCHR(f->st.mode))) || 360 (sess->opts->specials && (S_ISFIFO(f->st.mode) || 361 S_ISSOCK(f->st.mode)))) { 362 if (!io_write_int(sess, fdout, f->st.rdev)) { 363 ERRX1(sess, "io_write_int"); 364 goto out; 365 } 366 } 367 368 /* Conditional part: link. */ 369 370 if (S_ISLNK(f->st.mode) && 371 sess->opts->preserve_links) { 372 fn = f->link; 373 sz = strlen(f->link); 374 assert(sz < INT32_MAX); 375 if (!io_write_int(sess, fdout, sz)) { 376 ERRX1(sess, "io_write_int"); 377 goto out; 378 } 379 if (!io_write_buf(sess, fdout, fn, sz)) { 380 ERRX1(sess, "io_write_buf"); 381 goto out; 382 } 383 } 384 385 if (S_ISREG(f->st.mode)) 386 sess->total_size += f->st.size; 387 } 388 389 /* Signal end of file list. */ 390 391 if (!io_write_byte(sess, fdout, 0)) { 392 ERRX1(sess, "io_write_byte"); 393 goto out; 394 } 395 396 /* Conditionally write identifier lists. */ 397 398 if (sess->opts->preserve_uids && !sess->opts->numeric_ids) { 399 LOG2(sess, "sending uid list: %zu", uidsz); 400 if (!idents_send(sess, fdout, uids, uidsz)) { 401 ERRX1(sess, "idents_send"); 402 goto out; 403 } 404 } 405 406 if (sess->opts->preserve_gids && !sess->opts->numeric_ids) { 407 LOG2(sess, "sending gid list: %zu", gidsz); 408 if (!idents_send(sess, fdout, gids, gidsz)) { 409 ERRX1(sess, "idents_send"); 410 goto out; 411 } 412 } 413 414 rc = 1; 415 out: 416 idents_free(gids, gidsz); 417 idents_free(uids, uidsz); 418 return rc; 419 } 420 421 /* 422 * Read the filename of a file list. 423 * This is the most expensive part of the file list transfer, so a lot 424 * of attention has gone into transmitting as little as possible. 425 * Micro-optimisation, but whatever. 426 * Fills in "f" with the full path on success. 427 * Returns zero on failure, non-zero on success. 428 */ 429 static int 430 flist_recv_name(struct sess *sess, int fd, struct flist *f, uint8_t flags, 431 char last[MAXPATHLEN]) 432 { 433 uint8_t bval; 434 size_t partial = 0; 435 size_t pathlen = 0, len; 436 437 /* 438 * Read our filename. 439 * If we have FLIST_NAME_SAME, we inherit some of the last 440 * transmitted name. 441 * If we have FLIST_NAME_LONG, then the string length is greater 442 * than byte-size. 443 */ 444 445 if (FLIST_NAME_SAME & flags) { 446 if (!io_read_byte(sess, fd, &bval)) { 447 ERRX1(sess, "io_read_byte"); 448 return 0; 449 } 450 partial = bval; 451 } 452 453 /* Get the (possibly-remaining) filename length. */ 454 455 if (FLIST_NAME_LONG & flags) { 456 if (!io_read_size(sess, fd, &pathlen)) { 457 ERRX1(sess, "io_read_size"); 458 return 0; 459 } 460 } else { 461 if (!io_read_byte(sess, fd, &bval)) { 462 ERRX1(sess, "io_read_byte"); 463 return 0; 464 } 465 pathlen = bval; 466 } 467 468 /* Allocate our full filename length. */ 469 /* FIXME: maximum pathname length. */ 470 471 if ((len = pathlen + partial) == 0) { 472 ERRX(sess, "security violation: zero-length pathname"); 473 return 0; 474 } 475 476 if ((f->path = malloc(len + 1)) == NULL) { 477 ERR(sess, "malloc"); 478 return 0; 479 } 480 f->path[len] = '\0'; 481 482 if (FLIST_NAME_SAME & flags) 483 memcpy(f->path, last, partial); 484 485 if (!io_read_buf(sess, fd, f->path + partial, pathlen)) { 486 ERRX1(sess, "io_read_buf"); 487 return 0; 488 } 489 490 if (f->path[0] == '/') { 491 ERRX(sess, "security violation: absolute pathname: %s", 492 f->path); 493 return 0; 494 } 495 496 if (strstr(f->path, "/../") != NULL || 497 (len > 2 && strcmp(f->path + len - 3, "/..") == 0) || 498 (len > 2 && strncmp(f->path, "../", 3) == 0) || 499 strcmp(f->path, "..") == 0) { 500 ERRX(sess, "%s: security violation: backtracking pathname", 501 f->path); 502 return 0; 503 } 504 505 /* Record our last path and construct our filename. */ 506 507 strlcpy(last, f->path, MAXPATHLEN); 508 f->wpath = f->path; 509 return 1; 510 } 511 512 /* 513 * Reallocate a file list in chunks of FLIST_CHUNK_SIZE; 514 * Returns zero on failure, non-zero on success. 515 */ 516 static int 517 flist_realloc(struct sess *sess, struct flist **fl, size_t *sz, size_t *max) 518 { 519 void *pp; 520 521 if (*sz + 1 <= *max) { 522 (*sz)++; 523 return 1; 524 } 525 526 pp = recallocarray(*fl, *max, 527 *max + FLIST_CHUNK_SIZE, sizeof(struct flist)); 528 if (pp == NULL) { 529 ERR(sess, "recallocarray"); 530 return 0; 531 } 532 *fl = pp; 533 *max += FLIST_CHUNK_SIZE; 534 (*sz)++; 535 return 1; 536 } 537 538 /* 539 * Copy a regular or symbolic link file "path" into "f". 540 * This handles the correct path creation and symbolic linking. 541 * Returns zero on failure, non-zero on success. 542 */ 543 static int 544 flist_append(struct sess *sess, struct flist *f, struct stat *st, 545 const char *path) 546 { 547 548 /* 549 * Copy the full path for local addressing and transmit 550 * only the filename part for the receiver. 551 */ 552 553 if ((f->path = strdup(path)) == NULL) { 554 ERR(sess, "strdup"); 555 return 0; 556 } 557 558 if ((f->wpath = strrchr(f->path, '/')) == NULL) 559 f->wpath = f->path; 560 else 561 f->wpath++; 562 563 /* 564 * On the receiving end, we'll strip out all bits on the 565 * mode except for the file permissions. 566 * No need to warn about it here. 567 */ 568 569 flist_copy_stat(f, st); 570 571 /* Optionally copy link information. */ 572 573 if (S_ISLNK(st->st_mode)) { 574 f->link = symlink_read(sess, f->path); 575 if (f->link == NULL) { 576 ERRX1(sess, "symlink_read"); 577 return 0; 578 } 579 } 580 581 return 1; 582 } 583 584 /* 585 * Receive a file list from the wire, filling in length "sz" (which may 586 * possibly be zero) and list "flp" on success. 587 * Return zero on failure, non-zero on success. 588 */ 589 int 590 flist_recv(struct sess *sess, int fd, struct flist **flp, size_t *sz) 591 { 592 struct flist *fl = NULL; 593 struct flist *ff; 594 const struct flist *fflast = NULL; 595 size_t flsz = 0, flmax = 0, lsz, gidsz = 0, uidsz = 0; 596 uint8_t flag; 597 char last[MAXPATHLEN]; 598 int64_t lval; /* temporary values... */ 599 int32_t ival; 600 uint32_t uival; 601 struct ident *gids = NULL, *uids = NULL; 602 603 last[0] = '\0'; 604 605 for (;;) { 606 if (!io_read_byte(sess, fd, &flag)) { 607 ERRX1(sess, "io_read_byte"); 608 goto out; 609 } else if (flag == 0) 610 break; 611 612 if (!flist_realloc(sess, &fl, &flsz, &flmax)) { 613 ERRX1(sess, "flist_realloc"); 614 goto out; 615 } 616 617 ff = &fl[flsz - 1]; 618 fflast = flsz > 1 ? &fl[flsz - 2] : NULL; 619 620 /* Filename first. */ 621 622 if (!flist_recv_name(sess, fd, ff, flag, last)) { 623 ERRX1(sess, "flist_recv_name"); 624 goto out; 625 } 626 627 /* Read the file size. */ 628 629 if (!io_read_long(sess, fd, &lval)) { 630 ERRX1(sess, "io_read_long"); 631 goto out; 632 } 633 ff->st.size = lval; 634 635 /* Read the modification time. */ 636 637 if (!(FLIST_TIME_SAME & flag)) { 638 if (!io_read_uint(sess, fd, &uival)) { 639 ERRX1(sess, "io_read_int"); 640 goto out; 641 } 642 ff->st.mtime = uival; /* beyond 2038 */ 643 } else if (fflast == NULL) { 644 ERRX(sess, "same time without last entry"); 645 goto out; 646 } else 647 ff->st.mtime = fflast->st.mtime; 648 649 /* Read the file mode. */ 650 651 if (!(FLIST_MODE_SAME & flag)) { 652 if (!io_read_uint(sess, fd, &uival)) { 653 ERRX1(sess, "io_read_int"); 654 goto out; 655 } 656 ff->st.mode = uival; 657 } else if (fflast == NULL) { 658 ERRX(sess, "same mode without last entry"); 659 goto out; 660 } else 661 ff->st.mode = fflast->st.mode; 662 663 /* Conditional part: uid. */ 664 665 if (sess->opts->preserve_uids) { 666 if (!(FLIST_UID_SAME & flag)) { 667 if (!io_read_uint(sess, fd, &uival)) { 668 ERRX1(sess, "io_read_int"); 669 goto out; 670 } 671 ff->st.uid = uival; 672 } else if (fflast == NULL) { 673 ERRX(sess, "same uid without last entry"); 674 goto out; 675 } else 676 ff->st.uid = fflast->st.uid; 677 } 678 679 /* Conditional part: gid. */ 680 681 if (sess->opts->preserve_gids) { 682 if (!(FLIST_GID_SAME & flag)) { 683 if (!io_read_uint(sess, fd, &uival)) { 684 ERRX1(sess, "io_read_int"); 685 goto out; 686 } 687 ff->st.gid = uival; 688 } else if (fflast == NULL) { 689 ERRX(sess, "same gid without last entry"); 690 goto out; 691 } else 692 ff->st.gid = fflast->st.gid; 693 } 694 695 /* Conditional part: devices & special files. */ 696 697 if ((sess->opts->devices && (S_ISBLK(ff->st.mode) || 698 S_ISCHR(ff->st.mode))) || 699 (sess->opts->specials && (S_ISFIFO(ff->st.mode) || 700 S_ISSOCK(ff->st.mode)))) { 701 if (!(FLIST_RDEV_SAME & flag)) { 702 if (!io_read_int(sess, fd, &ival)) { 703 ERRX1(sess, "io_read_int"); 704 goto out; 705 } 706 ff->st.rdev = ival; 707 } else if (fflast == NULL) { 708 ERRX(sess, "same device without last entry"); 709 goto out; 710 } else 711 ff->st.rdev = fflast->st.rdev; 712 } 713 714 /* Conditional part: link. */ 715 716 if (S_ISLNK(ff->st.mode) && 717 sess->opts->preserve_links) { 718 if (!io_read_size(sess, fd, &lsz)) { 719 ERRX1(sess, "io_read_size"); 720 goto out; 721 } else if (lsz == 0) { 722 ERRX(sess, "empty link name"); 723 goto out; 724 } 725 ff->link = calloc(lsz + 1, 1); 726 if (ff->link == NULL) { 727 ERR(sess, "calloc"); 728 goto out; 729 } 730 if (!io_read_buf(sess, fd, ff->link, lsz)) { 731 ERRX1(sess, "io_read_buf"); 732 goto out; 733 } 734 } 735 736 LOG3(sess, "%s: received file metadata: " 737 "size %jd, mtime %jd, mode %o, rdev (%d, %d)", 738 ff->path, (intmax_t)ff->st.size, 739 (intmax_t)ff->st.mtime, ff->st.mode, 740 major(ff->st.rdev), minor(ff->st.rdev)); 741 742 if (S_ISREG(ff->st.mode)) 743 sess->total_size += ff->st.size; 744 } 745 746 /* Conditionally read the user/group list. */ 747 748 if (sess->opts->preserve_uids && !sess->opts->numeric_ids) { 749 if (!idents_recv(sess, fd, &uids, &uidsz)) { 750 ERRX1(sess, "idents_recv"); 751 goto out; 752 } 753 LOG2(sess, "received uid list: %zu", uidsz); 754 } 755 756 if (sess->opts->preserve_gids && !sess->opts->numeric_ids) { 757 if (!idents_recv(sess, fd, &gids, &gidsz)) { 758 ERRX1(sess, "idents_recv"); 759 goto out; 760 } 761 LOG2(sess, "received gid list: %zu", gidsz); 762 } 763 764 /* Remember to order the received list. */ 765 766 LOG2(sess, "received file metadata list: %zu", flsz); 767 qsort(fl, flsz, sizeof(struct flist), flist_cmp); 768 flist_topdirs(sess, fl, flsz); 769 *sz = flsz; 770 *flp = fl; 771 772 /* Conditionally remap and reassign identifiers. */ 773 774 if (sess->opts->preserve_uids && !sess->opts->numeric_ids) { 775 idents_remap(sess, 0, uids, uidsz); 776 idents_assign_uid(sess, fl, flsz, uids, uidsz); 777 } 778 779 if (sess->opts->preserve_gids && !sess->opts->numeric_ids) { 780 idents_remap(sess, 1, gids, gidsz); 781 idents_assign_gid(sess, fl, flsz, gids, gidsz); 782 } 783 784 idents_free(gids, gidsz); 785 idents_free(uids, uidsz); 786 return 1; 787 out: 788 flist_free(fl, flsz); 789 idents_free(gids, gidsz); 790 idents_free(uids, uidsz); 791 *sz = 0; 792 *flp = NULL; 793 return 0; 794 } 795 796 /* 797 * Generate a flist possibly-recursively given a file root, which may 798 * also be a regular file or symlink. 799 * On success, augments the generated list in "flp" of length "sz". 800 * Returns zero on failure, non-zero on success. 801 */ 802 static int 803 flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz, 804 size_t *max) 805 { 806 char *cargv[2], *cp; 807 int rc = 0, nxdev = 0, flag, i; 808 FTS *fts; 809 FTSENT *ent; 810 struct flist *f; 811 size_t flsz = 0, stripdir; 812 dev_t *xdev; 813 struct stat st; 814 815 cargv[0] = root; 816 cargv[1] = NULL; 817 818 /* 819 * If we're a file, then revert to the same actions we use for 820 * the non-recursive scan. 821 */ 822 823 if (lstat(root, &st) == -1) { 824 ERR(sess, "%s: lstat", root); 825 return 0; 826 } else if (S_ISREG(st.st_mode)) { 827 if (!flist_realloc(sess, fl, sz, max)) { 828 ERRX1(sess, "flist_realloc"); 829 return 0; 830 } 831 f = &(*fl)[(*sz) - 1]; 832 assert(f != NULL); 833 834 if (!flist_append(sess, f, &st, root)) { 835 ERRX1(sess, "flist_append"); 836 return 0; 837 } 838 if (unveil(root, "r") == -1) { 839 ERR(sess, "%s: unveil", root); 840 return 0; 841 } 842 return 1; 843 } else if (S_ISLNK(st.st_mode)) { 844 if (!sess->opts->preserve_links) { 845 WARNX(sess, "%s: skipping symlink", root); 846 return 1; 847 } else if (!flist_realloc(sess, fl, sz, max)) { 848 ERRX1(sess, "flist_realloc"); 849 return 0; 850 } 851 f = &(*fl)[(*sz) - 1]; 852 assert(f != NULL); 853 854 if (!flist_append(sess, f, &st, root)) { 855 ERRX1(sess, "flist_append"); 856 return 0; 857 } 858 if (unveil(root, "r") == -1) { 859 ERR(sess, "%s: unveil", root); 860 return 0; 861 } 862 return 1; 863 } else if (!S_ISDIR(st.st_mode)) { 864 WARNX(sess, "%s: skipping special", root); 865 return 1; 866 } 867 868 /* 869 * If we end with a slash, it means that we're not supposed to 870 * copy the directory part itself---only the contents. 871 * So set "stripdir" to be what we take out. 872 */ 873 874 stripdir = strlen(root); 875 assert(stripdir > 0); 876 if (root[stripdir - 1] != '/') 877 stripdir = 0; 878 879 /* 880 * If we're not stripping anything, then see if we need to strip 881 * out the leading material in the path up to and including the 882 * last directory component. 883 */ 884 885 if (stripdir == 0) 886 if ((cp = strrchr(root, '/')) != NULL) 887 stripdir = cp - root + 1; 888 889 /* 890 * If we're recursive, then we need to take down all of the 891 * files and directory components, so use fts(3). 892 * Copying the information file-by-file into the flstat. 893 * We'll make sense of it in flist_send. 894 */ 895 896 if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) { 897 ERR(sess, "fts_open"); 898 return 0; 899 } 900 901 errno = 0; 902 while ((ent = fts_read(fts)) != NULL) { 903 if (!flist_fts_check(sess, ent)) { 904 errno = 0; 905 continue; 906 } 907 908 /* We don't allow symlinks without -l. */ 909 910 assert(ent->fts_statp != NULL); 911 if (S_ISLNK(ent->fts_statp->st_mode) && 912 !sess->opts->preserve_links) { 913 WARNX(sess, "%s: skipping symlink", ent->fts_path); 914 continue; 915 } 916 917 /* 918 * If rsync is told to avoid crossing a filesystem 919 * boundary when recursing, then replace all mount point 920 * directories with empty directories. The latter is 921 * prevented by telling rsync multiple times to avoid 922 * crossing a filesystem boundary when recursing. 923 * Replacing mount point directories is tricky. We need 924 * to sort out which directories to include. As such, 925 * keep track of unique device inodes, and use these for 926 * comparison. 927 */ 928 929 if (sess->opts->one_file_system && 930 ent->fts_statp->st_dev != st.st_dev) { 931 if (sess->opts->one_file_system > 1 || 932 !S_ISDIR(ent->fts_statp->st_mode)) 933 continue; 934 935 if ((xdev = malloc(sizeof(dev_t))) == NULL) { 936 ERRX1(sess, "malloc"); 937 goto out; 938 } 939 940 flag = 0; 941 for (i = 0; i < nxdev; i++) 942 if (xdev[i] == ent->fts_statp->st_dev) { 943 flag = 1; 944 break; 945 } 946 if (flag) 947 continue; 948 949 if (nxdev) 950 if ((xdev = realloc(xdev, sizeof(dev_t))) == 951 NULL) { 952 ERRX1(sess, "realloc"); 953 goto out; 954 } 955 xdev[nxdev] = ent->fts_statp->st_dev; 956 nxdev++; 957 } 958 959 /* Allocate a new file entry. */ 960 961 if (!flist_realloc(sess, fl, sz, max)) { 962 ERRX1(sess, "flist_realloc"); 963 goto out; 964 } 965 flsz++; 966 f = &(*fl)[*sz - 1]; 967 968 /* Our path defaults to "." for the root. */ 969 970 if (ent->fts_path[stripdir] == '\0') { 971 if (asprintf(&f->path, "%s.", ent->fts_path) < 0) { 972 ERR(sess, "asprintf"); 973 f->path = NULL; 974 goto out; 975 } 976 } else { 977 if ((f->path = strdup(ent->fts_path)) == NULL) { 978 ERR(sess, "strdup"); 979 goto out; 980 } 981 } 982 983 f->wpath = f->path + stripdir; 984 flist_copy_stat(f, ent->fts_statp); 985 986 /* Optionally copy link information. */ 987 988 if (S_ISLNK(ent->fts_statp->st_mode)) { 989 f->link = symlink_read(sess, f->path); 990 if (f->link == NULL) { 991 ERRX1(sess, "symlink_read"); 992 goto out; 993 } 994 } 995 996 /* Reset errno for next fts_read() call. */ 997 errno = 0; 998 } 999 if (errno) { 1000 ERR(sess, "fts_read"); 1001 goto out; 1002 } 1003 if (unveil(root, "r") == -1) { 1004 ERR(sess, "%s: unveil", root); 1005 goto out; 1006 } 1007 1008 LOG3(sess, "generated %zu filenames: %s", flsz, root); 1009 rc = 1; 1010 out: 1011 fts_close(fts); 1012 if (sess->opts->one_file_system) 1013 free(xdev); 1014 return rc; 1015 } 1016 1017 /* 1018 * Generate a flist recursively given the array of directories (or 1019 * files, symlinks, doesn't matter) specified in argv (argc >0). 1020 * On success, stores the generated list in "flp" with length "sz", 1021 * which may be zero. 1022 * Returns zero on failure, non-zero on success. 1023 */ 1024 static int 1025 flist_gen_dirs(struct sess *sess, size_t argc, char **argv, struct flist **flp, 1026 size_t *sz) 1027 { 1028 size_t i, max = 0; 1029 1030 for (i = 0; i < argc; i++) 1031 if (!flist_gen_dirent(sess, argv[i], flp, sz, &max)) 1032 break; 1033 1034 if (i == argc) { 1035 LOG2(sess, "recursively generated %zu filenames", *sz); 1036 return 1; 1037 } 1038 1039 ERRX1(sess, "flist_gen_dirent"); 1040 flist_free(*flp, max); 1041 *flp = NULL; 1042 *sz = 0; 1043 return 0; 1044 } 1045 1046 /* 1047 * Generate list of files from the command-line argc (>0) and argv. 1048 * On success, stores the generated list in "flp" with length "sz", 1049 * which may be zero. 1050 * Returns zero on failure, non-zero on success. 1051 */ 1052 static int 1053 flist_gen_files(struct sess *sess, size_t argc, char **argv, 1054 struct flist **flp, size_t *sz) 1055 { 1056 struct flist *fl = NULL, *f; 1057 size_t i, flsz = 0; 1058 struct stat st; 1059 1060 assert(argc); 1061 1062 if ((fl = calloc(argc, sizeof(struct flist))) == NULL) { 1063 ERR(sess, "calloc"); 1064 return 0; 1065 } 1066 1067 for (i = 0; i < argc; i++) { 1068 if (argv[i][0] == '\0') 1069 continue; 1070 if (lstat(argv[i], &st) == -1) { 1071 ERR(sess, "%s: lstat", argv[i]); 1072 goto out; 1073 } 1074 1075 /* 1076 * File type checks. 1077 * In non-recursive mode, we don't accept directories. 1078 * We also skip symbolic links without -l. 1079 * Beyond that, we only accept regular files. 1080 */ 1081 1082 if (S_ISDIR(st.st_mode)) { 1083 WARNX(sess, "%s: skipping directory", argv[i]); 1084 continue; 1085 } else if (S_ISLNK(st.st_mode)) { 1086 if (!sess->opts->preserve_links) { 1087 WARNX(sess, "%s: skipping symlink", argv[i]); 1088 continue; 1089 } 1090 } else if (!S_ISREG(st.st_mode)) { 1091 WARNX(sess, "%s: skipping special", argv[i]); 1092 continue; 1093 } 1094 1095 1096 f = &fl[flsz++]; 1097 assert(f != NULL); 1098 1099 /* Add this file to our file-system worldview. */ 1100 1101 if (unveil(argv[i], "r") == -1) { 1102 ERR(sess, "%s: unveil", argv[i]); 1103 goto out; 1104 } 1105 if (!flist_append(sess, f, &st, argv[i])) { 1106 ERRX1(sess, "flist_append"); 1107 goto out; 1108 } 1109 } 1110 1111 LOG2(sess, "non-recursively generated %zu filenames", flsz); 1112 *sz = flsz; 1113 *flp = fl; 1114 return 1; 1115 out: 1116 flist_free(fl, argc); 1117 *sz = 0; 1118 *flp = NULL; 1119 return 0; 1120 } 1121 1122 /* 1123 * Generate a sorted, de-duplicated list of file metadata. 1124 * In non-recursive mode (the default), we use only the files we're 1125 * given. 1126 * Otherwise, directories are recursively examined. 1127 * Returns zero on failure, non-zero on success. 1128 * On success, "fl" will need to be freed with flist_free(). 1129 */ 1130 int 1131 flist_gen(struct sess *sess, size_t argc, char **argv, struct flist **flp, 1132 size_t *sz) 1133 { 1134 int rc; 1135 1136 assert(argc > 0); 1137 rc = sess->opts->recursive ? 1138 flist_gen_dirs(sess, argc, argv, flp, sz) : 1139 flist_gen_files(sess, argc, argv, flp, sz); 1140 1141 /* After scanning, lock our file-system view. */ 1142 1143 if (unveil(NULL, NULL) == -1) { 1144 ERR(sess, "unveil"); 1145 return 0; 1146 } 1147 if (!rc) 1148 return 0; 1149 1150 qsort(*flp, *sz, sizeof(struct flist), flist_cmp); 1151 1152 if (flist_dedupe(sess, flp, sz)) { 1153 flist_topdirs(sess, *flp, *sz); 1154 return 1; 1155 } 1156 1157 ERRX1(sess, "flist_dedupe"); 1158 flist_free(*flp, *sz); 1159 *flp = NULL; 1160 *sz = 0; 1161 return 0; 1162 } 1163 1164 /* 1165 * Generate a list of files in root to delete that are within the 1166 * top-level directories stipulated by "wfl". 1167 * Only handles symbolic links, directories, and regular files. 1168 * Returns zero on failure (fl and flsz will be NULL and zero), non-zero 1169 * on success. 1170 * On success, "fl" will need to be freed with flist_free(). 1171 */ 1172 int 1173 flist_gen_dels(struct sess *sess, const char *root, struct flist **fl, 1174 size_t *sz, const struct flist *wfl, size_t wflsz) 1175 { 1176 char **cargv = NULL; 1177 int rc = 0, c, flag; 1178 FTS *fts = NULL; 1179 FTSENT *ent; 1180 struct flist *f; 1181 struct stat st; 1182 size_t cargvs = 0, i, j, max = 0, stripdir; 1183 ENTRY hent; 1184 ENTRY *hentp; 1185 1186 *fl = NULL; 1187 *sz = 0; 1188 1189 /* Only run this code when we're recursive. */ 1190 1191 if (!sess->opts->recursive) 1192 return 1; 1193 1194 /* 1195 * Gather up all top-level directories for scanning. 1196 * This is stipulated by rsync's --delete behaviour, where we 1197 * only delete things in the top-level directories given on the 1198 * command line. 1199 */ 1200 1201 assert(wflsz > 0); 1202 for (i = 0; i < wflsz; i++) 1203 if (FLSTAT_TOP_DIR & wfl[i].st.flags) 1204 cargvs++; 1205 if (cargvs == 0) 1206 return 1; 1207 1208 if ((cargv = calloc(cargvs + 1, sizeof(char *))) == NULL) { 1209 ERR(sess, "calloc"); 1210 return 0; 1211 } 1212 1213 /* 1214 * If we're given just a "." as the first entry, that means 1215 * we're doing a relative copy with a trailing slash. 1216 * Special-case this just for the sake of simplicity. 1217 * Otherwise, look through all top-levels. 1218 */ 1219 1220 if (wflsz && strcmp(wfl[0].wpath, ".") == 0) { 1221 assert(cargvs == 1); 1222 assert(S_ISDIR(wfl[0].st.mode)); 1223 if (asprintf(&cargv[0], "%s/", root) < 0) { 1224 ERR(sess, "asprintf"); 1225 cargv[0] = NULL; 1226 goto out; 1227 } 1228 cargv[1] = NULL; 1229 } else { 1230 for (i = j = 0; i < wflsz; i++) { 1231 if (!(FLSTAT_TOP_DIR & wfl[i].st.flags)) 1232 continue; 1233 assert(S_ISDIR(wfl[i].st.mode)); 1234 assert(strcmp(wfl[i].wpath, ".")); 1235 c = asprintf(&cargv[j], "%s/%s", root, wfl[i].wpath); 1236 if (c < 0) { 1237 ERR(sess, "asprintf"); 1238 cargv[j] = NULL; 1239 goto out; 1240 } 1241 LOG4(sess, "%s: will scan for deletions", cargv[j]); 1242 j++; 1243 } 1244 assert(j == cargvs); 1245 cargv[j] = NULL; 1246 } 1247 1248 LOG2(sess, "delete from %zu directories", cargvs); 1249 1250 /* 1251 * Next, use the standard hcreate(3) hashtable interface to hash 1252 * all of the files that we want to synchronise. 1253 * This way, we'll be able to determine which files we want to 1254 * delete in O(n) time instead of O(n * search) time. 1255 * Plus, we can do the scan in-band and only allocate the files 1256 * we want to delete. 1257 */ 1258 1259 if (!hcreate(wflsz)) { 1260 ERR(sess, "hcreate"); 1261 goto out; 1262 } 1263 1264 for (i = 0; i < wflsz; i++) { 1265 memset(&hent, 0, sizeof(ENTRY)); 1266 if ((hent.key = strdup(wfl[i].wpath)) == NULL) { 1267 ERR(sess, "strdup"); 1268 goto out; 1269 } 1270 if ((hentp = hsearch(hent, ENTER)) == NULL) { 1271 ERR(sess, "hsearch"); 1272 goto out; 1273 } else if (hentp->key != hent.key) { 1274 ERRX(sess, "%s: duplicate", wfl[i].wpath); 1275 free(hent.key); 1276 goto out; 1277 } 1278 } 1279 1280 /* 1281 * Now we're going to try to descend into all of the top-level 1282 * directories stipulated by the file list. 1283 * If the directories don't exist, it's ok. 1284 */ 1285 1286 if ((fts = fts_open(cargv, FTS_PHYSICAL, NULL)) == NULL) { 1287 ERR(sess, "fts_open"); 1288 goto out; 1289 } 1290 1291 stripdir = strlen(root) + 1; 1292 errno = 0; 1293 while ((ent = fts_read(fts)) != NULL) { 1294 if (ent->fts_info == FTS_NS) 1295 continue; 1296 if (!flist_fts_check(sess, ent)) { 1297 errno = 0; 1298 continue; 1299 } else if (stripdir >= ent->fts_pathlen) 1300 continue; 1301 1302 assert(ent->fts_statp != NULL); 1303 1304 /* 1305 * If rsync is told to avoid crossing a filesystem 1306 * boundary when recursing, then exclude all entries 1307 * from the list with a device inode, which does not 1308 * match that of one of the top-level directories. 1309 */ 1310 1311 if (sess->opts->one_file_system) { 1312 flag = 0; 1313 for (i = 0; i < wflsz; i++) { 1314 if (stat(wfl[i].path, &st) == -1) { 1315 ERR(sess, "%s: stat", wfl[i].path); 1316 goto out; 1317 } 1318 if (ent->fts_statp->st_dev == st.st_dev) { 1319 flag = 1; 1320 break; 1321 } 1322 } 1323 if (!flag) 1324 continue; 1325 } 1326 1327 /* Look up in hashtable. */ 1328 1329 memset(&hent, 0, sizeof(ENTRY)); 1330 hent.key = ent->fts_path + stripdir; 1331 if (hsearch(hent, FIND) != NULL) 1332 continue; 1333 1334 /* Not found: we'll delete it. */ 1335 1336 if (!flist_realloc(sess, fl, sz, &max)) { 1337 ERRX1(sess, "flist_realloc"); 1338 goto out; 1339 } 1340 f = &(*fl)[*sz - 1]; 1341 1342 if ((f->path = strdup(ent->fts_path)) == NULL) { 1343 ERR(sess, "strdup"); 1344 goto out; 1345 } 1346 f->wpath = f->path + stripdir; 1347 flist_copy_stat(f, ent->fts_statp); 1348 errno = 0; 1349 } 1350 1351 if (errno) { 1352 ERR(sess, "fts_read"); 1353 goto out; 1354 } 1355 1356 qsort(*fl, *sz, sizeof(struct flist), flist_cmp); 1357 rc = 1; 1358 out: 1359 if (fts != NULL) 1360 fts_close(fts); 1361 for (i = 0; i < cargvs; i++) 1362 free(cargv[i]); 1363 free(cargv); 1364 hdestroy(); 1365 return rc; 1366 } 1367 1368 /* 1369 * Delete all files and directories in "fl". 1370 * If called with a zero-length "fl", does nothing. 1371 * If dry_run is specified, simply write what would be done. 1372 * Return zero on failure, non-zero on success. 1373 */ 1374 int 1375 flist_del(struct sess *sess, int root, const struct flist *fl, size_t flsz) 1376 { 1377 ssize_t i; 1378 int flag; 1379 1380 if (flsz == 0) 1381 return 1; 1382 1383 assert(sess->opts->del); 1384 assert(sess->opts->recursive); 1385 1386 for (i = flsz - 1; i >= 0; i--) { 1387 LOG1(sess, "%s: deleting", fl[i].wpath); 1388 if (sess->opts->dry_run) 1389 continue; 1390 assert(root != -1); 1391 flag = S_ISDIR(fl[i].st.mode) ? AT_REMOVEDIR : 0; 1392 if (unlinkat(root, fl[i].wpath, flag) == -1 && 1393 errno != ENOENT) { 1394 ERR(sess, "%s: unlinkat", fl[i].wpath); 1395 return 0; 1396 } 1397 } 1398 1399 return 1; 1400 } 1401