1 /* $NetBSD: vndcompress.c,v 1.26 2017/01/10 21:15:54 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2013 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Taylor R. Campbell. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __RCSID("$NetBSD: vndcompress.c,v 1.26 2017/01/10 21:15:54 christos Exp $"); 34 35 #include <sys/endian.h> 36 #include <sys/stat.h> 37 38 #include <assert.h> 39 #include <err.h> 40 #include <errno.h> 41 #include <fcntl.h> 42 #include <inttypes.h> 43 #include <limits.h> 44 #include <signal.h> 45 #include <stdbool.h> 46 #include <stdint.h> 47 #include <stdio.h> 48 #include <stdlib.h> 49 #include <string.h> 50 #include <unistd.h> 51 #include <zlib.h> 52 53 #include "common.h" 54 #include "offtab.h" 55 #include "utils.h" 56 57 /* 58 * XXX Switch to control bug-for-bug byte-for-byte compatibility with 59 * NetBSD's vndcompress. 60 */ 61 #define VNDCOMPRESS_COMPAT 0 62 63 __CTASSERT(sizeof(struct cloop2_header) == CLOOP2_OFFSET_TABLE_OFFSET); 64 65 struct compress_state { 66 uint64_t size; /* uncompressed size */ 67 uint64_t offset; /* output byte offset */ 68 uint32_t blocksize; /* bytes per block */ 69 uint32_t blkno; /* input block number */ 70 uint32_t n_full_blocks; /* floor(size/blocksize) */ 71 uint32_t n_blocks; /* ceiling(size/blocksize) */ 72 uint32_t n_offsets; /* n_blocks + 1 */ 73 uint32_t end_block; /* last block to transfer */ 74 uint32_t checkpoint_blocks; /* blocks before checkpoint */ 75 int image_fd; 76 int cloop2_fd; 77 struct offtab offtab; 78 uint32_t n_checkpointed_blocks; 79 volatile sig_atomic_t 80 initialized; /* everything above initialized? */ 81 }; 82 83 /* Global compression state for SIGINFO handler. */ 84 static struct compress_state global_state; 85 86 struct sigdesc { 87 int sd_signo; 88 const char *sd_name; 89 }; 90 91 static const struct sigdesc info_signals[] = { 92 { SIGINFO, "SIGINFO" }, 93 { SIGUSR1, "SIGUSR1" }, 94 }; 95 96 static const struct sigdesc checkpoint_signals[] = { 97 { SIGUSR2, "SIGUSR2" }, 98 }; 99 100 static void init_signals(void); 101 static void init_signal_handler(int, const struct sigdesc *, size_t, 102 void (*)(int)); 103 static void info_signal_handler(int); 104 static void checkpoint_signal_handler(int); 105 static void compress_progress(struct compress_state *); 106 static void compress_init(int, char **, const struct options *, 107 struct compress_state *); 108 static bool compress_restart(struct compress_state *); 109 static uint32_t compress_block(int, int, uint32_t, uint32_t, uint32_t, void *, 110 void *); 111 static void compress_maybe_checkpoint(struct compress_state *); 112 static void compress_checkpoint(struct compress_state *); 113 static void compress_exit(struct compress_state *); 114 115 /* 116 * Compression entry point. 117 */ 118 int 119 vndcompress(int argc, char **argv, const struct options *O) 120 { 121 struct compress_state *const S = &global_state; 122 123 /* Paranoia. The other fields either have no sentinel or use zero. */ 124 S->image_fd = -1; 125 S->cloop2_fd = -1; 126 127 /* Set up signal handlers so we can handle SIGINFO ASAP. */ 128 init_signals(); 129 130 /* 131 * Parse the arguments to initialize our state. 132 */ 133 compress_init(argc, argv, O, S); 134 assert(MIN_BLOCKSIZE <= S->blocksize); 135 assert(S->blocksize <= MAX_BLOCKSIZE); 136 137 /* 138 * Allocate compression buffers. 139 * 140 * Compression may actually expand. From an overabundance of 141 * caution, assume it can expand by at most double. 142 * 143 * XXX Check and consider tightening this assumption. 144 */ 145 __CTASSERT(MAX_BLOCKSIZE <= SIZE_MAX); 146 void *const uncompbuf = malloc(S->blocksize); 147 if (uncompbuf == NULL) 148 err(1, "malloc uncompressed buffer"); 149 150 /* XXX compression ratio bound */ 151 __CTASSERT(MAX_BLOCKSIZE <= (SIZE_MAX / 2)); 152 void *const compbuf = malloc(2 * (size_t)S->blocksize); 153 if (compbuf == NULL) 154 err(1, "malloc compressed buffer"); 155 156 /* 157 * Compress the blocks. S->blkno specifies the input block 158 * we're about to transfer. S->offset is the current output 159 * offset. 160 */ 161 while (S->blkno < S->n_blocks) { 162 /* Report any progress. */ 163 compress_progress(S); 164 165 /* Stop if we've done the requested partial transfer. */ 166 if ((0 < S->end_block) && (S->end_block <= S->blkno)) 167 goto out; 168 169 /* Checkpoint if appropriate. */ 170 compress_maybe_checkpoint(S); 171 offtab_prepare_put(&S->offtab, (S->blkno + 1)); 172 173 /* Choose read size: partial if last block, full if not. */ 174 const uint32_t readsize = (S->blkno == S->n_full_blocks? 175 (S->size % S->blocksize) : S->blocksize); 176 assert(readsize > 0); 177 assert(readsize <= S->blocksize); 178 179 /* Fail noisily if we might be about to overflow. */ 180 /* XXX compression ratio bound */ 181 __CTASSERT(MAX_BLOCKSIZE <= (UINTMAX_MAX / 2)); 182 assert(S->offset <= MIN(UINT64_MAX, OFF_MAX)); 183 if ((2 * (uintmax_t)readsize) > 184 (MIN(UINT64_MAX, OFF_MAX) - S->offset)) 185 errx(1, "blkno %"PRIu32" may overflow: %ju + 2*%ju", 186 S->blkno, (uintmax_t)S->offset, 187 (uintmax_t)readsize); 188 189 /* Process the block. */ 190 const uint32_t complen = 191 compress_block(S->image_fd, S->cloop2_fd, S->blkno, 192 S->blocksize, readsize, uncompbuf, compbuf); 193 194 /* 195 * Signal-atomically update the state to reflect 196 * (a) what block number we are now at, 197 * (b) how far we are now in the output file, and 198 * (c) where the last block ended. 199 */ 200 assert(S->blkno <= (UINT32_MAX - 1)); 201 assert(complen <= (MIN(UINT64_MAX, OFF_MAX) - S->offset)); 202 assert((S->blkno + 1) < S->n_offsets); 203 { 204 sigset_t old_sigmask; 205 block_signals(&old_sigmask); 206 S->blkno += 1; /* (a) */ 207 S->offset += complen; /* (b) */ 208 offtab_put(&S->offtab, S->blkno, S->offset); /* (c) */ 209 restore_sigmask(&old_sigmask); 210 } 211 } 212 213 /* Make sure we're all done. */ 214 assert(S->blkno == S->n_blocks); 215 assert((S->blkno + 1) == S->n_offsets); 216 217 /* Pad to the disk block size. */ 218 const uint32_t n_extra = (S->offset % DEV_BSIZE); 219 if (n_extra != 0) { 220 const uint32_t n_padding = (DEV_BSIZE - n_extra); 221 /* Reuse compbuf -- guaranteed to be large enough. */ 222 (void)memset(compbuf, 0, n_padding); 223 const ssize_t n_written = write(S->cloop2_fd, compbuf, 224 n_padding); 225 if (n_written == -1) 226 err(1, "write final padding failed"); 227 assert(n_written >= 0); 228 if ((size_t)n_written != n_padding) 229 errx(1, "partial write of final padding bytes" 230 ": %zu != %"PRIu32, 231 (size_t)n_written, n_padding); 232 233 /* Account for the extra bytes in the output file. */ 234 assert(n_padding <= (MIN(UINT64_MAX, OFF_MAX) - S->offset)); 235 { 236 sigset_t old_sigmask; 237 block_signals(&old_sigmask); 238 S->offset += n_padding; 239 restore_sigmask(&old_sigmask); 240 } 241 } 242 243 out: 244 /* One last checkpoint to commit the offset table. */ 245 assert(S->offset <= OFF_MAX); 246 assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR)); 247 compress_checkpoint(S); 248 249 /* 250 * Free the compression buffers and finalize the compression. 251 */ 252 free(compbuf); 253 free(uncompbuf); 254 compress_exit(S); 255 256 return 0; 257 } 258 259 /* 260 * Signal cruft. 261 */ 262 263 static void 264 init_signals(void) 265 { 266 267 init_signal_handler(SA_RESTART, info_signals, 268 __arraycount(info_signals), &info_signal_handler); 269 init_signal_handler(SA_RESTART, checkpoint_signals, 270 __arraycount(checkpoint_signals), &checkpoint_signal_handler); 271 } 272 273 static void 274 init_signal_handler(int flags, const struct sigdesc *signals, size_t n, 275 void (*handler)(int)) 276 { 277 static const struct sigaction zero_sa; 278 struct sigaction sa = zero_sa; 279 size_t i; 280 281 (void)sigemptyset(&sa.sa_mask); 282 for (i = 0; i < n; i++) 283 (void)sigaddset(&sa.sa_mask, signals[i].sd_signo); 284 sa.sa_flags = flags; 285 sa.sa_handler = handler; 286 for (i = 0; i < n; i++) 287 if (sigaction(signals[i].sd_signo, &sa, NULL) == -1) 288 err(1, "sigaction(%s)", signals[i].sd_name); 289 } 290 291 static void 292 info_signal_handler(int signo __unused) 293 { 294 /* Save errno. */ 295 const int error = errno; 296 struct compress_state *const S = &global_state; 297 char buf[128]; 298 299 /* Bail if the state is not yet initialized. */ 300 if (!S->initialized) { 301 warnx_ss("initializing"); 302 goto out; 303 } 304 305 /* Carefully calculate our I/O position. */ 306 assert(S->blocksize > 0); 307 __CTASSERT(MAX_N_BLOCKS <= (UINT64_MAX / MAX_BLOCKSIZE)); 308 const uint64_t nread = ((uint64_t)S->blkno * (uint64_t)S->blocksize); 309 310 assert(S->n_blocks > 0); 311 __CTASSERT(CLOOP2_OFFSET_TABLE_OFFSET <= 312 (UINT64_MAX / sizeof(uint64_t))); 313 __CTASSERT(MAX_N_BLOCKS <= ((UINT64_MAX / sizeof(uint64_t)) - 314 CLOOP2_OFFSET_TABLE_OFFSET)); 315 const uint64_t nwritten = (S->offset <= (CLOOP2_OFFSET_TABLE_OFFSET + 316 ((uint64_t)S->n_blocks * sizeof(uint64_t)))? 317 0 : S->offset); 318 319 /* snprintf_ss can't do floating-point, so do fixed-point instead. */ 320 const uint64_t ratio_percent = 321 (nread > 0? 322 ((nwritten >= (UINT64_MAX / 100)) ? 323 ((nwritten / nread) * 100) : ((nwritten * 100) / nread)) 324 : 0); 325 326 /* Format the status. */ 327 assert(S->n_checkpointed_blocks <= (UINT64_MAX / S->blocksize)); 328 const int n = snprintf_ss(buf, sizeof(buf), 329 "vndcompress: read %"PRIu64" bytes, wrote %"PRIu64" bytes, " 330 "compression ratio %"PRIu64"%% (checkpointed %"PRIu64" bytes)\n", 331 nread, nwritten, ratio_percent, 332 ((uint64_t)S->n_checkpointed_blocks * (uint64_t)S->blocksize)); 333 if (n < 0) { 334 const char msg[] = "vndcompress: can't format info\n"; 335 (void)write(STDERR_FILENO, msg, __arraycount(msg)); 336 } else { 337 __CTASSERT(INT_MAX <= SIZE_MAX); 338 (void)write(STDERR_FILENO, buf, (size_t)n); 339 } 340 341 out: 342 /* Restore errno. */ 343 errno = error; 344 } 345 346 static void 347 checkpoint_signal_handler(int signo __unused) 348 { 349 /* Save errno. */ 350 const int error = errno; 351 struct compress_state *const S = &global_state; 352 353 /* Bail if the state is not yet initialized. */ 354 if (!S->initialized) { 355 warnx_ss("nothing to checkpoint yet"); 356 goto out; 357 } 358 359 assert(S->image_fd >= 0); 360 assert(S->cloop2_fd >= 0); 361 362 /* Take a checkpoint. */ 363 assert(S->blocksize > 0); 364 assert(S->blkno <= (UINT64_MAX / S->blocksize)); 365 warnx_ss("checkpointing %"PRIu64" bytes", 366 ((uint64_t)S->blkno * (uint64_t)S->blocksize)); 367 compress_checkpoint(S); 368 369 out: 370 /* Restore errno. */ 371 errno = error; 372 } 373 374 /* 375 * Report progress. 376 * 377 * XXX Should do a progress bar here. 378 */ 379 static void 380 compress_progress(struct compress_state *S __unused) 381 { 382 } 383 384 /* 385 * Parse arguments, open the files, and initialize the state. 386 */ 387 static void 388 compress_init(int argc, char **argv, const struct options *O, 389 struct compress_state *S) 390 { 391 392 if (!((argc == 2) || (argc == 3))) 393 usage(); 394 395 const char *const image_pathname = argv[0]; 396 const char *const cloop2_pathname = argv[1]; 397 398 /* Grab the block size either from `-b' or from the last argument. */ 399 __CTASSERT(0 < DEV_BSIZE); 400 __CTASSERT((MIN_BLOCKSIZE % DEV_BSIZE) == 0); 401 __CTASSERT(MIN_BLOCKSIZE <= DEF_BLOCKSIZE); 402 __CTASSERT((DEF_BLOCKSIZE % DEV_BSIZE) == 0); 403 __CTASSERT(DEF_BLOCKSIZE <= MAX_BLOCKSIZE); 404 __CTASSERT((MAX_BLOCKSIZE % DEV_BSIZE) == 0); 405 if (ISSET(O->flags, FLAG_b)) { 406 if (argc == 3) { 407 warnx("use -b or the extra argument, not both"); 408 usage(); 409 } 410 S->blocksize = O->blocksize; 411 } else { 412 S->blocksize = (argc == 2? DEF_BLOCKSIZE : 413 strsuftoll("block size", argv[2], MIN_BLOCKSIZE, 414 MAX_BLOCKSIZE)); 415 } 416 417 /* Sanity-check the blocksize. (strsuftoll guarantees bounds.) */ 418 __CTASSERT(DEV_BSIZE <= UINT32_MAX); 419 if ((S->blocksize % DEV_BSIZE) != 0) 420 errx(1, "bad blocksize: %"PRIu32 421 " (not a multiple of %"PRIu32")", 422 S->blocksize, (uint32_t)DEV_BSIZE); 423 assert(MIN_BLOCKSIZE <= S->blocksize); 424 assert((S->blocksize % DEV_BSIZE) == 0); 425 assert(S->blocksize <= MAX_BLOCKSIZE); 426 427 /* Grab the end block number if we have one. */ 428 S->end_block = (ISSET(O->flags, FLAG_p)? O->end_block : 0); 429 430 /* Grab the checkpoint block count, if we have one. */ 431 S->checkpoint_blocks = 432 (ISSET(O->flags, FLAG_k)? O->checkpoint_blocks : 0); 433 434 /* Open the input image file and the output cloop2 file. */ 435 S->image_fd = open(image_pathname, O_RDONLY); 436 if (S->image_fd == -1) 437 err(1, "open(%s)", image_pathname); 438 439 int oflags; 440 if (!ISSET(O->flags, FLAG_r)) 441 oflags = (O_WRONLY | O_TRUNC | O_CREAT); /* XXX O_EXCL? */ 442 else if (!ISSET(O->flags, FLAG_R)) 443 oflags = (O_RDWR | O_CREAT); 444 else 445 oflags = O_RDWR; 446 S->cloop2_fd = open(cloop2_pathname, oflags, 0777); 447 if (S->cloop2_fd == -1) 448 err(1, "open(%s)", cloop2_pathname); 449 450 /* Find the size of the input image. */ 451 if (ISSET(O->flags, FLAG_l)) { 452 S->size = O->length; 453 } else { 454 static const struct stat zero_st; 455 struct stat st = zero_st; 456 if (fstat(S->image_fd, &st) == -1) 457 err(1, "stat(%s)", image_pathname); 458 if (st.st_size <= 0) 459 errx(1, "unknown image size"); 460 assert(st.st_size >= 0); 461 __CTASSERT(OFF_MAX <= UINT64_MAX); 462 assert(__type_fit(uint64_t, st.st_size)); 463 S->size = st.st_size; 464 } 465 assert(S->size <= OFF_MAX); 466 467 /* Find number of full blocks and whether there's a partial block. */ 468 S->n_full_blocks = (S->size / S->blocksize); 469 assert(S->n_full_blocks <= 470 (UINT32_MAX - ((S->size % S->blocksize) > 0))); 471 S->n_blocks = (S->n_full_blocks + ((S->size % S->blocksize) > 0)); 472 assert(S->n_full_blocks <= S->n_blocks); 473 474 if (S->n_blocks > MAX_N_BLOCKS) 475 errx(1, "image too large for block size %"PRIu32": %"PRIu64, 476 S->blocksize, S->size); 477 assert(S->n_blocks <= MAX_N_BLOCKS); 478 479 /* Choose a window size. */ 480 const uint32_t window_size = (ISSET(O->flags, FLAG_w)? O->window_size : 481 DEF_WINDOW_SIZE); 482 483 /* Create an offset table for the blocks; one extra for the end. */ 484 __CTASSERT(MAX_N_BLOCKS <= (UINT32_MAX - 1)); 485 S->n_offsets = (S->n_blocks + 1); 486 __CTASSERT(MAX_N_OFFSETS == (MAX_N_BLOCKS + 1)); 487 __CTASSERT(MAX_N_OFFSETS <= (SIZE_MAX / sizeof(uint64_t))); 488 offtab_init(&S->offtab, S->n_offsets, window_size, S->cloop2_fd, 489 CLOOP2_OFFSET_TABLE_OFFSET); 490 491 /* Attempt to restart a partial transfer if requested. */ 492 if (ISSET(O->flags, FLAG_r)) { 493 if (compress_restart(S)) { 494 /* 495 * Restart succeeded. Truncate the output 496 * here, in case any garbage got appended. We 497 * are committed to making progress at this 498 * point. If the ftruncate fails, we don't 499 * lose anything valuable -- this is the last 500 * point at which we can restart anyway. 501 */ 502 if (ftruncate(S->cloop2_fd, S->offset) == -1) 503 err(1, "ftruncate failed"); 504 505 /* All set! No more initialization to do. */ 506 return; 507 } else { 508 /* Restart failed. Barf now if requested. */ 509 if (ISSET(O->flags, FLAG_R)) 510 errx(1, "restart failed, aborting"); 511 512 /* Otherwise, truncate and start at the top. */ 513 if (ftruncate(S->cloop2_fd, 0) == -1) 514 err(1, "truncate failed"); 515 if (lseek(S->cloop2_fd, 0, SEEK_SET) == -1) 516 err(1, "lseek to cloop2 beginning failed"); 517 518 /* If we seeked in the input, rewind. */ 519 if (S->blkno != 0) { 520 if (lseek(S->image_fd, 0, SEEK_SET) == -1) 521 err(1, 522 "lseek to image beginning failed"); 523 } 524 } 525 } 526 527 /* Write a bogus (zero) header for now, until we checkpoint. */ 528 static const struct cloop2_header zero_header; 529 const ssize_t h_written = write(S->cloop2_fd, &zero_header, 530 sizeof(zero_header)); 531 if (h_written == -1) 532 err(1, "write header"); 533 assert(h_written >= 0); 534 if ((size_t)h_written != sizeof(zero_header)) 535 errx(1, "partial write of header: %zu != %zu", 536 (size_t)h_written, sizeof(zero_header)); 537 538 /* Reset the offset table to be empty and write it. */ 539 offtab_reset_write(&S->offtab); 540 541 /* Start at the beginning of the image. */ 542 S->blkno = 0; 543 S->offset = (sizeof(struct cloop2_header) + 544 ((uint64_t)S->n_offsets * sizeof(uint64_t))); 545 S->n_checkpointed_blocks = 0; 546 547 /* Good to go and ready for interruption by a signal. */ 548 S->initialized = 1; 549 } 550 551 /* 552 * Try to recover state from an existing output file. 553 * 554 * On success, fill the offset table with what's in the file, set 555 * S->blkno and S->offset to reflect our position, and seek to the 556 * respective positions in the input and output files. 557 * 558 * On failure, return false. May clobber the offset table, S->blkno, 559 * S->offset, and the file pointers. 560 */ 561 static bool 562 compress_restart(struct compress_state *S) 563 { 564 565 /* Read in the header. */ 566 static const struct cloop2_header zero_header; 567 struct cloop2_header header = zero_header; 568 569 const ssize_t h_read = read_block(S->cloop2_fd, &header, 570 sizeof(header)); 571 if (h_read == -1) { 572 warn("failed to read header"); 573 return false; 574 } 575 assert(h_read >= 0); 576 if ((size_t)h_read != sizeof(header)) { 577 warnx("partial read of header"); 578 return false; 579 } 580 581 /* Check that the header looks like a header. */ 582 __CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic)); 583 if (memcmp(header.cl2h_magic, cloop2_magic, sizeof(cloop2_magic)) 584 != 0) { 585 warnx("bad cloop2 shell script magic"); 586 return false; 587 } 588 589 /* Check the header parameters. */ 590 if (be32toh(header.cl2h_blocksize) != S->blocksize) { 591 warnx("mismatched block size: %"PRIu32 592 " (expected %"PRIu32")", 593 be32toh(header.cl2h_blocksize), S->blocksize); 594 return false; 595 } 596 if (be32toh(header.cl2h_n_blocks) != S->n_blocks) { 597 warnx("mismatched number of blocks: %"PRIu32 598 " (expected %"PRIu32")", 599 be32toh(header.cl2h_n_blocks), S->n_blocks); 600 return false; 601 } 602 603 /* Read in the partial offset table. */ 604 if (!offtab_reset_read(&S->offtab, &warn, &warnx)) 605 return false; 606 if (!offtab_prepare_get(&S->offtab, 0)) 607 return false; 608 const uint64_t first_offset = offtab_get(&S->offtab, 0); 609 const uint64_t expected = sizeof(struct cloop2_header) + 610 ((uint64_t)S->n_offsets * sizeof(uint64_t)); 611 if (first_offset != expected) { 612 warnx("first offset is not 0x%"PRIx64": 0x%"PRIx64, 613 expected, first_offset); 614 return false; 615 } 616 617 /* Find where we left off. */ 618 __CTASSERT(MAX_N_OFFSETS <= UINT32_MAX); 619 uint32_t blkno = 0; 620 uint64_t last_offset = first_offset; 621 for (blkno = 0; blkno < S->n_blocks; blkno++) { 622 if (!offtab_prepare_get(&S->offtab, blkno)) 623 return false; 624 const uint64_t offset = offtab_get(&S->offtab, blkno); 625 if (offset == ~(uint64_t)0) 626 break; 627 628 if (0 < blkno) { 629 const uint64_t start = last_offset; 630 const uint64_t end = offset; 631 if (end <= start) { 632 warnx("bad offset table: 0x%"PRIx64 633 ", 0x%"PRIx64, start, end); 634 return false; 635 } 636 /* XXX compression ratio bound */ 637 __CTASSERT(MAX_BLOCKSIZE <= (SIZE_MAX / 2)); 638 if ((2 * (size_t)S->blocksize) <= (end - start)) { 639 warnx("block %"PRIu32" too large:" 640 " %"PRIu64" bytes" 641 " from 0x%"PRIx64" to 0x%"PRIx64, 642 blkno, (end - start), start, end); 643 return false; 644 } 645 } 646 647 last_offset = offset; 648 } 649 650 if (blkno == 0) { 651 warnx("no blocks were written; nothing to restart"); 652 return false; 653 } 654 655 /* Make sure the rest of the offset table is all ones. */ 656 if (blkno < S->n_blocks) { 657 uint32_t nblkno; 658 659 for (nblkno = blkno; nblkno < S->n_blocks; nblkno++) { 660 if (!offtab_prepare_get(&S->offtab, nblkno)) 661 return false; 662 const uint64_t offset = offtab_get(&S->offtab, nblkno); 663 if (offset != ~(uint64_t)0) { 664 warnx("bad partial offset table entry" 665 " at %"PRIu32": 0x%"PRIx64, 666 nblkno, offset); 667 return false; 668 } 669 } 670 } 671 672 /* 673 * XXX Consider decompressing some number of blocks to make 674 * sure they match. 675 */ 676 677 /* Back up by one. */ 678 assert(1 <= blkno); 679 blkno -= 1; 680 681 /* Seek to the output position. */ 682 assert(last_offset <= OFF_MAX); 683 if (lseek(S->cloop2_fd, last_offset, SEEK_SET) == -1) { 684 warn("lseek output cloop2 to %"PRIx64" failed", last_offset); 685 return false; 686 } 687 688 /* Switch from reading to writing the offset table. */ 689 if (!offtab_transmogrify_read_to_write(&S->offtab, blkno)) 690 return false; 691 692 /* 693 * Seek to the input position last, after all other possible 694 * failures, because if the input is a pipe, we can't change 695 * our mind, rewind, and start at the beginning instead of 696 * restarting. 697 */ 698 assert(S->size <= OFF_MAX); 699 assert(blkno <= (S->size / S->blocksize)); 700 const off_t restart_position = ((off_t)blkno * (off_t)S->blocksize); 701 assert(0 <= restart_position); 702 assert(restart_position <= (off_t)S->size); 703 if (lseek(S->image_fd, restart_position, SEEK_SET) == -1) { 704 if (errno != ESPIPE) { 705 warn("lseek input image failed"); 706 return false; 707 } 708 709 /* Try read instead of lseek for a pipe/socket/fifo. */ 710 void *const buffer = malloc(0x10000); 711 if (buffer == NULL) 712 err(1, "malloc temporary buffer"); 713 off_t left = restart_position; 714 while (left > 0) { 715 const size_t size = MIN(0x10000, left); 716 const ssize_t n_read = read_block(S->image_fd, buffer, 717 size); 718 if (n_read == -1) { 719 free(buffer); 720 warn("read of input image failed"); 721 return false; 722 } 723 assert(n_read >= 0); 724 if ((size_t)n_read != size) { 725 free(buffer); 726 warnx("partial read of input image"); 727 return false; 728 } 729 assert((off_t)size <= left); 730 left -= size; 731 } 732 free(buffer); 733 } 734 735 /* Start where we left off. */ 736 S->blkno = blkno; 737 S->offset = last_offset; 738 S->n_checkpointed_blocks = blkno; 739 740 /* Good to go and ready for interruption by a signal. */ 741 S->initialized = 1; 742 743 /* Success! */ 744 return true; 745 } 746 747 /* 748 * Read a single block, compress it, and write the compressed block. 749 * Return the size of the compressed block. 750 */ 751 static uint32_t 752 compress_block(int in_fd, int out_fd, uint32_t blkno, uint32_t blocksize, 753 uint32_t readsize, void *uncompbuf, void *compbuf) 754 { 755 756 assert(readsize <= blocksize); 757 assert(blocksize <= MAX_BLOCKSIZE); 758 759 /* Read the uncompressed block. */ 760 const ssize_t n_read = read_block(in_fd, uncompbuf, readsize); 761 if (n_read == -1) 762 err(1, "read block %"PRIu32, blkno); 763 assert(n_read >= 0); 764 if ((size_t)n_read != readsize) 765 errx(1, "partial read of block %"PRIu32": %zu != %"PRIu32, 766 blkno, (size_t)n_read, readsize); 767 768 /* Compress the block. */ 769 /* XXX compression ratio bound */ 770 __CTASSERT(MAX_BLOCKSIZE <= (ULONG_MAX / 2)); 771 const unsigned long uncomplen = 772 (VNDCOMPRESS_COMPAT? blocksize : readsize); /* XXX */ 773 unsigned long complen = (uncomplen * 2); 774 const int zerror = compress2(compbuf, &complen, uncompbuf, uncomplen, 775 Z_BEST_COMPRESSION); 776 if (zerror != Z_OK) 777 errx(1, "compressed failed at block %"PRIu32" (%d): %s", blkno, 778 zerror, zError(zerror)); 779 assert(complen <= (uncomplen * 2)); 780 781 /* Write the compressed block. */ 782 const ssize_t n_written = write(out_fd, compbuf, complen); 783 if (n_written == -1) 784 err(1, "write block %"PRIu32, blkno); 785 assert(n_written >= 0); 786 if ((size_t)n_written != complen) 787 errx(1, "partial write of block %"PRIu32": %zu != %lu", 788 blkno, (size_t)n_written, complen); 789 790 return (size_t)n_written; 791 } 792 793 /* 794 * Checkpoint if appropriate. 795 */ 796 static void 797 compress_maybe_checkpoint(struct compress_state *S) 798 { 799 800 if ((0 < S->checkpoint_blocks) && (0 < S->blkno) && 801 ((S->blkno % S->checkpoint_blocks) == 0)) { 802 assert(S->offset <= OFF_MAX); 803 assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR)); 804 compress_checkpoint(S); 805 } 806 } 807 808 /* 809 * Write the prefix of the offset table that we have filled so far. 810 * 811 * We fsync the data blocks we have written, and then write the offset 812 * table, and then fsync the offset table and file metadata. This 813 * should help to avoid offset tables that point at garbage data. 814 * 815 * This may be called from a signal handler, so it must not use stdio, 816 * malloc, &c. -- it may only (a) handle signal-safe state in S, and 817 * (b) do file descriptor I/O / fsync. 818 * 819 * XXX This requires further thought and heavy testing to be sure. 820 * 821 * XXX Should have an option to suppress fsync. 822 * 823 * XXX Should have an option to fail on fsync failures. 824 * 825 * XXX Would be nice if we could just do a barrier rather than an 826 * fsync. 827 * 828 * XXX How might we automatically test the fsyncs? 829 */ 830 static void 831 compress_checkpoint(struct compress_state *S) 832 { 833 834 assert(S->blkno < S->n_offsets); 835 const uint32_t n_offsets = (S->blkno + 1); 836 assert(n_offsets <= S->n_offsets); 837 838 assert(S->offset <= OFF_MAX); 839 assert((off_t)S->offset <= lseek(S->cloop2_fd, 0, SEEK_CUR)); 840 841 /* Make sure the data hits the disk before we say it's ready. */ 842 if (fsync_range(S->cloop2_fd, (FFILESYNC | FDISKSYNC), 0, S->offset) 843 == -1) 844 warn_ss("fsync of output failed"); 845 846 /* Say the data blocks are ready. */ 847 offtab_checkpoint(&S->offtab, n_offsets, 848 (S->n_checkpointed_blocks == 0? OFFTAB_CHECKPOINT_SYNC : 0)); 849 850 /* 851 * If this is the first checkpoint, initialize the header. 852 * Signal handler can race with main code here, but it is 853 * harmless -- just an extra fsync and write of the header, 854 * which are both idempotent. 855 * 856 * Once we have synchronously checkpointed the offset table, 857 * subsequent writes will preserve a valid state. 858 */ 859 if (S->n_checkpointed_blocks == 0) { 860 static const struct cloop2_header zero_header; 861 struct cloop2_header header = zero_header; 862 863 /* Format the header. */ 864 __CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic)); 865 (void)memcpy(header.cl2h_magic, cloop2_magic, 866 sizeof(cloop2_magic)); 867 header.cl2h_blocksize = htobe32(S->blocksize); 868 header.cl2h_n_blocks = htobe32(S->n_blocks); 869 870 /* Write the header. */ 871 const ssize_t h_written = pwrite(S->cloop2_fd, &header, 872 sizeof(header), 0); 873 if (h_written == -1) 874 err_ss(1, "write header"); 875 assert(h_written >= 0); 876 if ((size_t)h_written != sizeof(header)) 877 errx_ss(1, "partial write of header: %zu != %zu", 878 (size_t)h_written, sizeof(header)); 879 } 880 881 /* Record how many blocks we've checkpointed. */ 882 { 883 sigset_t old_sigmask; 884 block_signals(&old_sigmask); 885 S->n_checkpointed_blocks = S->blkno; 886 restore_sigmask(&old_sigmask); 887 } 888 } 889 890 /* 891 * Release everything we allocated in compress_init. 892 */ 893 static void 894 compress_exit(struct compress_state *S) 895 { 896 897 /* Done with the offset table. Destroy it. */ 898 offtab_destroy(&S->offtab); 899 900 /* Done with the files. Close them. */ 901 if (close(S->cloop2_fd) == -1) 902 warn("close(cloop2 fd)"); 903 if (close(S->image_fd) == -1) 904 warn("close(image fd)"); 905 } 906