1 /* $NetBSD: vndcompress.c,v 1.24 2014/01/25 15:31:06 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2013 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Taylor R. Campbell. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __RCSID("$NetBSD: vndcompress.c,v 1.24 2014/01/25 15:31:06 riastradh Exp $"); 34 35 #include <sys/endian.h> 36 37 #include <assert.h> 38 #include <err.h> 39 #include <errno.h> 40 #include <fcntl.h> 41 #include <inttypes.h> 42 #include <limits.h> 43 #include <signal.h> 44 #include <stdbool.h> 45 #include <stdint.h> 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <unistd.h> 50 #include <zlib.h> 51 52 #include "common.h" 53 #include "offtab.h" 54 #include "utils.h" 55 56 /* 57 * XXX Switch to control bug-for-bug byte-for-byte compatibility with 58 * NetBSD's vndcompress. 59 */ 60 #define VNDCOMPRESS_COMPAT 0 61 62 __CTASSERT(sizeof(struct cloop2_header) == CLOOP2_OFFSET_TABLE_OFFSET); 63 64 struct compress_state { 65 uint64_t size; /* uncompressed size */ 66 uint64_t offset; /* output byte offset */ 67 uint32_t blocksize; /* bytes per block */ 68 uint32_t blkno; /* input block number */ 69 uint32_t n_full_blocks; /* floor(size/blocksize) */ 70 uint32_t n_blocks; /* ceiling(size/blocksize) */ 71 uint32_t n_offsets; /* n_blocks + 1 */ 72 uint32_t end_block; /* last block to transfer */ 73 uint32_t checkpoint_blocks; /* blocks before checkpoint */ 74 int image_fd; 75 int cloop2_fd; 76 struct offtab offtab; 77 uint32_t n_checkpointed_blocks; 78 volatile sig_atomic_t 79 initialized; /* everything above initialized? */ 80 }; 81 82 /* Global compression state for SIGINFO handler. */ 83 static struct compress_state global_state; 84 85 struct sigdesc { 86 int sd_signo; 87 const char *sd_name; 88 }; 89 90 static const struct sigdesc info_signals[] = { 91 { SIGINFO, "SIGINFO" }, 92 { SIGUSR1, "SIGUSR1" }, 93 }; 94 95 static const struct sigdesc checkpoint_signals[] = { 96 { SIGUSR2, "SIGUSR2" }, 97 }; 98 99 static void init_signals(void); 100 static void init_signal_handler(int, const struct sigdesc *, size_t, 101 void (*)(int)); 102 static void info_signal_handler(int); 103 static void checkpoint_signal_handler(int); 104 static void compress_progress(struct compress_state *); 105 static void compress_init(int, char **, const struct options *, 106 struct compress_state *); 107 static bool compress_restart(struct compress_state *); 108 static uint32_t compress_block(int, int, uint32_t, uint32_t, uint32_t, void *, 109 void *); 110 static void compress_maybe_checkpoint(struct compress_state *); 111 static void compress_checkpoint(struct compress_state *); 112 static void compress_exit(struct compress_state *); 113 114 /* 115 * Compression entry point. 116 */ 117 int 118 vndcompress(int argc, char **argv, const struct options *O) 119 { 120 struct compress_state *const S = &global_state; 121 122 /* Paranoia. The other fields either have no sentinel or use zero. */ 123 S->image_fd = -1; 124 S->cloop2_fd = -1; 125 126 /* Set up signal handlers so we can handle SIGINFO ASAP. */ 127 init_signals(); 128 129 /* 130 * Parse the arguments to initialize our state. 131 */ 132 compress_init(argc, argv, O, S); 133 assert(MIN_BLOCKSIZE <= S->blocksize); 134 assert(S->blocksize <= MAX_BLOCKSIZE); 135 136 /* 137 * Allocate compression buffers. 138 * 139 * Compression may actually expand. From an overabundance of 140 * caution, assume it can expand by at most double. 141 * 142 * XXX Check and consider tightening this assumption. 143 */ 144 __CTASSERT(MAX_BLOCKSIZE <= SIZE_MAX); 145 void *const uncompbuf = malloc(S->blocksize); 146 if (uncompbuf == NULL) 147 err(1, "malloc uncompressed buffer"); 148 149 /* XXX compression ratio bound */ 150 __CTASSERT(MAX_BLOCKSIZE <= (SIZE_MAX / 2)); 151 void *const compbuf = malloc(2 * (size_t)S->blocksize); 152 if (compbuf == NULL) 153 err(1, "malloc compressed buffer"); 154 155 /* 156 * Compress the blocks. S->blkno specifies the input block 157 * we're about to transfer. S->offset is the current output 158 * offset. 159 */ 160 while (S->blkno < S->n_blocks) { 161 /* Report any progress. */ 162 compress_progress(S); 163 164 /* Stop if we've done the requested partial transfer. */ 165 if ((0 < S->end_block) && (S->end_block <= S->blkno)) 166 goto out; 167 168 /* Checkpoint if appropriate. */ 169 compress_maybe_checkpoint(S); 170 offtab_prepare_put(&S->offtab, (S->blkno + 1)); 171 172 /* Choose read size: partial if last block, full if not. */ 173 const uint32_t readsize = (S->blkno == S->n_full_blocks? 174 (S->size % S->blocksize) : S->blocksize); 175 assert(readsize > 0); 176 assert(readsize <= S->blocksize); 177 178 /* Fail noisily if we might be about to overflow. */ 179 /* XXX compression ratio bound */ 180 __CTASSERT(MAX_BLOCKSIZE <= (UINTMAX_MAX / 2)); 181 assert(S->offset <= MIN(UINT64_MAX, OFF_MAX)); 182 if ((2 * (uintmax_t)readsize) > 183 (MIN(UINT64_MAX, OFF_MAX) - S->offset)) 184 errx(1, "blkno %"PRIu32" may overflow: %ju + 2*%ju", 185 S->blkno, (uintmax_t)S->offset, 186 (uintmax_t)readsize); 187 188 /* Process the block. */ 189 const uint32_t complen = 190 compress_block(S->image_fd, S->cloop2_fd, S->blkno, 191 S->blocksize, readsize, uncompbuf, compbuf); 192 193 /* 194 * Signal-atomically update the state to reflect 195 * (a) what block number we are now at, 196 * (b) how far we are now in the output file, and 197 * (c) where the last block ended. 198 */ 199 assert(S->blkno <= (UINT32_MAX - 1)); 200 assert(complen <= (MIN(UINT64_MAX, OFF_MAX) - S->offset)); 201 assert((S->blkno + 1) < S->n_offsets); 202 { 203 sigset_t old_sigmask; 204 block_signals(&old_sigmask); 205 S->blkno += 1; /* (a) */ 206 S->offset += complen; /* (b) */ 207 offtab_put(&S->offtab, S->blkno, S->offset); /* (c) */ 208 restore_sigmask(&old_sigmask); 209 } 210 } 211 212 /* Make sure we're all done. */ 213 assert(S->blkno == S->n_blocks); 214 assert((S->blkno + 1) == S->n_offsets); 215 216 /* Pad to the disk block size. */ 217 const uint32_t n_extra = (S->offset % DEV_BSIZE); 218 if (n_extra != 0) { 219 const uint32_t n_padding = (DEV_BSIZE - n_extra); 220 /* Reuse compbuf -- guaranteed to be large enough. */ 221 (void)memset(compbuf, 0, n_padding); 222 const ssize_t n_written = write(S->cloop2_fd, compbuf, 223 n_padding); 224 if (n_written == -1) 225 err(1, "write final padding failed"); 226 assert(n_written >= 0); 227 if ((size_t)n_written != n_padding) 228 errx(1, "partial write of final padding bytes" 229 ": %zu != %"PRIu32, 230 (size_t)n_written, n_padding); 231 232 /* Account for the extra bytes in the output file. */ 233 assert(n_padding <= (MIN(UINT64_MAX, OFF_MAX) - S->offset)); 234 { 235 sigset_t old_sigmask; 236 block_signals(&old_sigmask); 237 S->offset += n_padding; 238 restore_sigmask(&old_sigmask); 239 } 240 } 241 242 out: 243 /* One last checkpoint to commit the offset table. */ 244 assert(S->offset <= OFF_MAX); 245 assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR)); 246 compress_checkpoint(S); 247 248 /* 249 * Free the compression buffers and finalize the compression. 250 */ 251 free(compbuf); 252 free(uncompbuf); 253 compress_exit(S); 254 255 return 0; 256 } 257 258 /* 259 * Signal cruft. 260 */ 261 262 static void 263 init_signals(void) 264 { 265 266 init_signal_handler(SA_RESTART, info_signals, 267 __arraycount(info_signals), &info_signal_handler); 268 init_signal_handler(SA_RESTART, checkpoint_signals, 269 __arraycount(checkpoint_signals), &checkpoint_signal_handler); 270 } 271 272 static void 273 init_signal_handler(int flags, const struct sigdesc *signals, size_t n, 274 void (*handler)(int)) 275 { 276 static const struct sigaction zero_sa; 277 struct sigaction sa = zero_sa; 278 size_t i; 279 280 (void)sigemptyset(&sa.sa_mask); 281 for (i = 0; i < n; i++) 282 (void)sigaddset(&sa.sa_mask, signals[i].sd_signo); 283 sa.sa_flags = flags; 284 sa.sa_handler = handler; 285 for (i = 0; i < n; i++) 286 if (sigaction(signals[i].sd_signo, &sa, NULL) == -1) 287 err(1, "sigaction(%s)", signals[i].sd_name); 288 } 289 290 static void 291 info_signal_handler(int signo __unused) 292 { 293 /* Save errno. */ 294 const int error = errno; 295 struct compress_state *const S = &global_state; 296 char buf[128]; 297 298 /* Bail if the state is not yet initialized. */ 299 if (!S->initialized) { 300 warnx_ss("initializing"); 301 goto out; 302 } 303 304 /* Carefully calculate our I/O position. */ 305 assert(S->blocksize > 0); 306 __CTASSERT(MAX_N_BLOCKS <= (UINT64_MAX / MAX_BLOCKSIZE)); 307 const uint64_t nread = ((uint64_t)S->blkno * (uint64_t)S->blocksize); 308 309 assert(S->n_blocks > 0); 310 __CTASSERT(CLOOP2_OFFSET_TABLE_OFFSET <= 311 (UINT64_MAX / sizeof(uint64_t))); 312 __CTASSERT(MAX_N_BLOCKS <= ((UINT64_MAX / sizeof(uint64_t)) - 313 CLOOP2_OFFSET_TABLE_OFFSET)); 314 const uint64_t nwritten = (S->offset <= (CLOOP2_OFFSET_TABLE_OFFSET + 315 ((uint64_t)S->n_blocks * sizeof(uint64_t)))? 316 0 : S->offset); 317 318 /* snprintf_ss can't do floating-point, so do fixed-point instead. */ 319 const uint64_t ratio_percent = 320 (nread > 0? 321 ((nwritten >= (UINT64_MAX / 100)) ? 322 ((nwritten / nread) * 100) : ((nwritten * 100) / nread)) 323 : 0); 324 325 /* Format the status. */ 326 assert(S->n_checkpointed_blocks <= (UINT64_MAX / S->blocksize)); 327 const int n = snprintf_ss(buf, sizeof(buf), 328 "vndcompress: read %"PRIu64" bytes, wrote %"PRIu64" bytes, " 329 "compression ratio %"PRIu64"%% (checkpointed %"PRIu64" bytes)\n", 330 nread, nwritten, ratio_percent, 331 ((uint64_t)S->n_checkpointed_blocks * (uint64_t)S->blocksize)); 332 if (n < 0) { 333 const char msg[] = "vndcompress: can't format info\n"; 334 (void)write(STDERR_FILENO, msg, __arraycount(msg)); 335 } else { 336 __CTASSERT(INT_MAX <= SIZE_MAX); 337 (void)write(STDERR_FILENO, buf, (size_t)n); 338 } 339 340 out: 341 /* Restore errno. */ 342 errno = error; 343 } 344 345 static void 346 checkpoint_signal_handler(int signo __unused) 347 { 348 /* Save errno. */ 349 const int error = errno; 350 struct compress_state *const S = &global_state; 351 352 /* Bail if the state is not yet initialized. */ 353 if (!S->initialized) { 354 warnx_ss("nothing to checkpoint yet"); 355 goto out; 356 } 357 358 assert(S->image_fd >= 0); 359 assert(S->cloop2_fd >= 0); 360 361 /* Take a checkpoint. */ 362 assert(S->blocksize > 0); 363 assert(S->blkno <= (UINT64_MAX / S->blocksize)); 364 warnx_ss("checkpointing %"PRIu64" bytes", 365 ((uint64_t)S->blkno * (uint64_t)S->blocksize)); 366 compress_checkpoint(S); 367 368 out: 369 /* Restore errno. */ 370 errno = error; 371 } 372 373 /* 374 * Report progress. 375 * 376 * XXX Should do a progress bar here. 377 */ 378 static void 379 compress_progress(struct compress_state *S __unused) 380 { 381 } 382 383 /* 384 * Parse arguments, open the files, and initialize the state. 385 */ 386 static void 387 compress_init(int argc, char **argv, const struct options *O, 388 struct compress_state *S) 389 { 390 391 if (!((argc == 2) || (argc == 3))) 392 usage(); 393 394 const char *const image_pathname = argv[0]; 395 const char *const cloop2_pathname = argv[1]; 396 397 /* Grab the block size either from `-b' or from the last argument. */ 398 __CTASSERT(0 < DEV_BSIZE); 399 __CTASSERT((MIN_BLOCKSIZE % DEV_BSIZE) == 0); 400 __CTASSERT(MIN_BLOCKSIZE <= DEF_BLOCKSIZE); 401 __CTASSERT((DEF_BLOCKSIZE % DEV_BSIZE) == 0); 402 __CTASSERT(DEF_BLOCKSIZE <= MAX_BLOCKSIZE); 403 __CTASSERT((MAX_BLOCKSIZE % DEV_BSIZE) == 0); 404 if (ISSET(O->flags, FLAG_b)) { 405 if (argc == 3) { 406 warnx("use -b or the extra argument, not both"); 407 usage(); 408 } 409 S->blocksize = O->blocksize; 410 } else { 411 S->blocksize = (argc == 2? DEF_BLOCKSIZE : 412 strsuftoll("block size", argv[2], MIN_BLOCKSIZE, 413 MAX_BLOCKSIZE)); 414 } 415 416 /* Sanity-check the blocksize. (strsuftoll guarantees bounds.) */ 417 __CTASSERT(DEV_BSIZE <= UINT32_MAX); 418 if ((S->blocksize % DEV_BSIZE) != 0) 419 errx(1, "bad blocksize: %"PRIu32 420 " (not a multiple of %"PRIu32")", 421 S->blocksize, (uint32_t)DEV_BSIZE); 422 assert(MIN_BLOCKSIZE <= S->blocksize); 423 assert((S->blocksize % DEV_BSIZE) == 0); 424 assert(S->blocksize <= MAX_BLOCKSIZE); 425 426 /* Grab the end block number if we have one. */ 427 S->end_block = (ISSET(O->flags, FLAG_p)? O->end_block : 0); 428 429 /* Grab the checkpoint block count, if we have one. */ 430 S->checkpoint_blocks = 431 (ISSET(O->flags, FLAG_k)? O->checkpoint_blocks : 0); 432 433 /* Open the input image file and the output cloop2 file. */ 434 S->image_fd = open(image_pathname, O_RDONLY); 435 if (S->image_fd == -1) 436 err(1, "open(%s)", image_pathname); 437 438 int oflags; 439 if (!ISSET(O->flags, FLAG_r)) 440 oflags = (O_WRONLY | O_TRUNC | O_CREAT); /* XXX O_EXCL? */ 441 else if (!ISSET(O->flags, FLAG_R)) 442 oflags = (O_RDWR | O_CREAT); 443 else 444 oflags = O_RDWR; 445 S->cloop2_fd = open(cloop2_pathname, oflags, 0777); 446 if (S->cloop2_fd == -1) 447 err(1, "open(%s)", cloop2_pathname); 448 449 /* Find the size of the input image. */ 450 if (ISSET(O->flags, FLAG_l)) { 451 S->size = O->length; 452 } else { 453 static const struct stat zero_st; 454 struct stat st = zero_st; 455 if (fstat(S->image_fd, &st) == -1) 456 err(1, "stat(%s)", image_pathname); 457 if (st.st_size <= 0) 458 errx(1, "unknown image size"); 459 assert(st.st_size >= 0); 460 __CTASSERT(OFF_MAX <= UINT64_MAX); 461 assert(__type_fit(uint64_t, st.st_size)); 462 S->size = st.st_size; 463 } 464 assert(S->size <= OFF_MAX); 465 466 /* Find number of full blocks and whether there's a partial block. */ 467 S->n_full_blocks = (S->size / S->blocksize); 468 assert(S->n_full_blocks <= 469 (UINT32_MAX - ((S->size % S->blocksize) > 0))); 470 S->n_blocks = (S->n_full_blocks + ((S->size % S->blocksize) > 0)); 471 assert(S->n_full_blocks <= S->n_blocks); 472 473 if (S->n_blocks > MAX_N_BLOCKS) 474 errx(1, "image too large for block size %"PRIu32": %"PRIu64, 475 S->blocksize, S->size); 476 assert(S->n_blocks <= MAX_N_BLOCKS); 477 478 /* Choose a window size. */ 479 const uint32_t window_size = (ISSET(O->flags, FLAG_w)? O->window_size : 480 DEF_WINDOW_SIZE); 481 482 /* Create an offset table for the blocks; one extra for the end. */ 483 __CTASSERT(MAX_N_BLOCKS <= (UINT32_MAX - 1)); 484 S->n_offsets = (S->n_blocks + 1); 485 __CTASSERT(MAX_N_OFFSETS == (MAX_N_BLOCKS + 1)); 486 __CTASSERT(MAX_N_OFFSETS <= (SIZE_MAX / sizeof(uint64_t))); 487 offtab_init(&S->offtab, S->n_offsets, window_size, S->cloop2_fd, 488 CLOOP2_OFFSET_TABLE_OFFSET); 489 490 /* Attempt to restart a partial transfer if requested. */ 491 if (ISSET(O->flags, FLAG_r)) { 492 if (compress_restart(S)) { 493 /* 494 * Restart succeeded. Truncate the output 495 * here, in case any garbage got appended. We 496 * are committed to making progress at this 497 * point. If the ftruncate fails, we don't 498 * lose anything valuable -- this is the last 499 * point at which we can restart anyway. 500 */ 501 if (ftruncate(S->cloop2_fd, S->offset) == -1) 502 err(1, "ftruncate failed"); 503 504 /* All set! No more initialization to do. */ 505 return; 506 } else { 507 /* Restart failed. Barf now if requested. */ 508 if (ISSET(O->flags, FLAG_R)) 509 errx(1, "restart failed, aborting"); 510 511 /* Otherwise, truncate and start at the top. */ 512 if (ftruncate(S->cloop2_fd, 0) == -1) 513 err(1, "truncate failed"); 514 if (lseek(S->cloop2_fd, 0, SEEK_SET) == -1) 515 err(1, "lseek to cloop2 beginning failed"); 516 if (lseek(S->image_fd, 0, SEEK_SET) == -1) 517 err(1, "lseek to image beginning failed"); 518 } 519 } 520 521 /* Write a bogus (zero) header for now, until we checkpoint. */ 522 static const struct cloop2_header zero_header; 523 const ssize_t h_written = write(S->cloop2_fd, &zero_header, 524 sizeof(zero_header)); 525 if (h_written == -1) 526 err(1, "write header"); 527 assert(h_written >= 0); 528 if ((size_t)h_written != sizeof(zero_header)) 529 errx(1, "partial write of header: %zu != %zu", 530 (size_t)h_written, sizeof(zero_header)); 531 532 /* Reset the offset table to be empty and write it. */ 533 offtab_reset_write(&S->offtab); 534 535 /* Start at the beginning of the image. */ 536 S->blkno = 0; 537 S->offset = (sizeof(struct cloop2_header) + 538 ((uint64_t)S->n_offsets * sizeof(uint64_t))); 539 S->n_checkpointed_blocks = 0; 540 541 /* Good to go and ready for interruption by a signal. */ 542 S->initialized = 1; 543 } 544 545 /* 546 * Try to recover state from an existing output file. 547 * 548 * On success, fill the offset table with what's in the file, set 549 * S->blkno and S->offset to reflect our position, and seek to the 550 * respective positions in the input and output files. 551 * 552 * On failure, return false. May clobber the offset table, S->blkno, 553 * S->offset, and the file pointers. 554 */ 555 static bool 556 compress_restart(struct compress_state *S) 557 { 558 559 /* Read in the header. */ 560 static const struct cloop2_header zero_header; 561 struct cloop2_header header = zero_header; 562 563 const ssize_t h_read = read_block(S->cloop2_fd, &header, 564 sizeof(header)); 565 if (h_read == -1) { 566 warn("failed to read header"); 567 return false; 568 } 569 assert(h_read >= 0); 570 if ((size_t)h_read != sizeof(header)) { 571 warnx("partial read of header"); 572 return false; 573 } 574 575 /* Check that the header looks like a header. */ 576 __CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic)); 577 if (memcmp(header.cl2h_magic, cloop2_magic, sizeof(cloop2_magic)) 578 != 0) { 579 warnx("bad cloop2 shell script magic"); 580 return false; 581 } 582 583 /* Check the header parameters. */ 584 if (be32toh(header.cl2h_blocksize) != S->blocksize) { 585 warnx("mismatched block size: %"PRIu32 586 " (expected %"PRIu32")", 587 be32toh(header.cl2h_blocksize), S->blocksize); 588 return false; 589 } 590 if (be32toh(header.cl2h_n_blocks) != S->n_blocks) { 591 warnx("mismatched number of blocks: %"PRIu32 592 " (expected %"PRIu32")", 593 be32toh(header.cl2h_n_blocks), S->n_blocks); 594 return false; 595 } 596 597 /* Read in the partial offset table. */ 598 if (!offtab_reset_read(&S->offtab, &warn, &warnx)) 599 return false; 600 if (!offtab_prepare_get(&S->offtab, 0)) 601 return false; 602 const uint64_t first_offset = offtab_get(&S->offtab, 0); 603 const uint64_t expected = sizeof(struct cloop2_header) + 604 ((uint64_t)S->n_offsets * sizeof(uint64_t)); 605 if (first_offset != expected) { 606 warnx("first offset is not 0x%"PRIx64": 0x%"PRIx64, 607 expected, first_offset); 608 return false; 609 } 610 611 /* Find where we left off. */ 612 __CTASSERT(MAX_N_OFFSETS <= UINT32_MAX); 613 uint32_t blkno = 0; 614 uint64_t last_offset = first_offset; 615 for (blkno = 0; blkno < S->n_blocks; blkno++) { 616 if (!offtab_prepare_get(&S->offtab, blkno)) 617 return false; 618 const uint64_t offset = offtab_get(&S->offtab, blkno); 619 if (offset == ~(uint64_t)0) 620 break; 621 622 if (0 < blkno) { 623 const uint64_t start = last_offset; 624 const uint64_t end = offset; 625 if (end <= start) { 626 warnx("bad offset table: 0x%"PRIx64 627 ", 0x%"PRIx64, start, end); 628 return false; 629 } 630 /* XXX compression ratio bound */ 631 __CTASSERT(MAX_BLOCKSIZE <= (SIZE_MAX / 2)); 632 if ((2 * (size_t)S->blocksize) <= (end - start)) { 633 warnx("block %"PRIu32" too large:" 634 " %"PRIu64" bytes" 635 " from 0x%"PRIx64" to 0x%"PRIx64, 636 blkno, (end - start), start, end); 637 return false; 638 } 639 } 640 641 last_offset = offset; 642 } 643 644 if (blkno == 0) { 645 warnx("no blocks were written; nothing to restart"); 646 return false; 647 } 648 649 /* Make sure the rest of the offset table is all ones. */ 650 if (blkno < S->n_blocks) { 651 uint32_t nblkno; 652 653 for (nblkno = blkno; nblkno < S->n_blocks; nblkno++) { 654 if (!offtab_prepare_get(&S->offtab, nblkno)) 655 return false; 656 const uint64_t offset = offtab_get(&S->offtab, nblkno); 657 if (offset != ~(uint64_t)0) { 658 warnx("bad partial offset table entry" 659 " at %"PRIu32": 0x%"PRIx64, 660 nblkno, offset); 661 return false; 662 } 663 } 664 } 665 666 /* 667 * XXX Consider decompressing some number of blocks to make 668 * sure they match. 669 */ 670 671 /* Back up by one. */ 672 assert(1 <= blkno); 673 blkno -= 1; 674 675 /* Seek to the input position. */ 676 assert(S->size <= OFF_MAX); 677 assert(blkno <= (S->size / S->blocksize)); 678 const off_t restart_position = ((off_t)blkno * (off_t)S->blocksize); 679 assert(0 <= restart_position); 680 assert(restart_position <= (off_t)S->size); 681 if (lseek(S->image_fd, restart_position, SEEK_SET) == -1) { 682 if (errno != ESPIPE) { 683 warn("lseek input image failed"); 684 return false; 685 } 686 687 /* Try read instead of lseek for a pipe/socket/fifo. */ 688 void *const buffer = malloc(0x10000); 689 if (buffer == NULL) 690 err(1, "malloc temporary buffer"); 691 off_t left = restart_position; 692 while (left > 0) { 693 const size_t size = MIN(0x10000, left); 694 const ssize_t n_read = read_block(S->image_fd, buffer, 695 size); 696 if (n_read == -1) { 697 free(buffer); 698 warn("read of input image failed"); 699 return false; 700 } 701 assert(n_read >= 0); 702 if ((size_t)n_read != size) { 703 free(buffer); 704 warnx("partial read of input image"); 705 return false; 706 } 707 assert((off_t)size <= left); 708 left -= size; 709 } 710 free(buffer); 711 } 712 713 /* Seek to the output position. */ 714 assert(last_offset <= OFF_MAX); 715 if (lseek(S->cloop2_fd, last_offset, SEEK_SET) == -1) { 716 warn("lseek output cloop2 to %"PRIx64" failed", last_offset); 717 return false; 718 } 719 720 /* Switch from reading to writing the offset table. */ 721 if (!offtab_transmogrify_read_to_write(&S->offtab, blkno)) 722 return false; 723 724 /* Start where we left off. */ 725 S->blkno = blkno; 726 S->offset = last_offset; 727 S->n_checkpointed_blocks = blkno; 728 729 /* Good to go and ready for interruption by a signal. */ 730 S->initialized = 1; 731 732 /* Success! */ 733 return true; 734 } 735 736 /* 737 * Read a single block, compress it, and write the compressed block. 738 * Return the size of the compressed block. 739 */ 740 static uint32_t 741 compress_block(int in_fd, int out_fd, uint32_t blkno, uint32_t blocksize, 742 uint32_t readsize, void *uncompbuf, void *compbuf) 743 { 744 745 assert(readsize <= blocksize); 746 assert(blocksize <= MAX_BLOCKSIZE); 747 748 /* Read the uncompressed block. */ 749 const ssize_t n_read = read_block(in_fd, uncompbuf, readsize); 750 if (n_read == -1) 751 err(1, "read block %"PRIu32, blkno); 752 assert(n_read >= 0); 753 if ((size_t)n_read != readsize) 754 errx(1, "partial read of block %"PRIu32": %zu != %"PRIu32, 755 blkno, (size_t)n_read, readsize); 756 757 /* Compress the block. */ 758 /* XXX compression ratio bound */ 759 __CTASSERT(MAX_BLOCKSIZE <= (ULONG_MAX / 2)); 760 const unsigned long uncomplen = 761 (VNDCOMPRESS_COMPAT? blocksize : readsize); /* XXX */ 762 unsigned long complen = (uncomplen * 2); 763 const int zerror = compress2(compbuf, &complen, uncompbuf, uncomplen, 764 Z_BEST_COMPRESSION); 765 if (zerror != Z_OK) 766 errx(1, "compressed failed at block %"PRIu32" (%d): %s", blkno, 767 zerror, zError(zerror)); 768 assert(complen <= (uncomplen * 2)); 769 770 /* Write the compressed block. */ 771 const ssize_t n_written = write(out_fd, compbuf, complen); 772 if (n_written == -1) 773 err(1, "write block %"PRIu32, blkno); 774 assert(n_written >= 0); 775 if ((size_t)n_written != complen) 776 errx(1, "partial write of block %"PRIu32": %zu != %lu", 777 blkno, (size_t)n_written, complen); 778 779 return (size_t)n_written; 780 } 781 782 /* 783 * Checkpoint if appropriate. 784 */ 785 static void 786 compress_maybe_checkpoint(struct compress_state *S) 787 { 788 789 if ((0 < S->checkpoint_blocks) && (0 < S->blkno) && 790 ((S->blkno % S->checkpoint_blocks) == 0)) { 791 assert(S->offset <= OFF_MAX); 792 assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR)); 793 compress_checkpoint(S); 794 } 795 } 796 797 /* 798 * Write the prefix of the offset table that we have filled so far. 799 * 800 * We fsync the data blocks we have written, and then write the offset 801 * table, and then fsync the offset table and file metadata. This 802 * should help to avoid offset tables that point at garbage data. 803 * 804 * This may be called from a signal handler, so it must not use stdio, 805 * malloc, &c. -- it may only (a) handle signal-safe state in S, and 806 * (b) do file descriptor I/O / fsync. 807 * 808 * XXX This requires further thought and heavy testing to be sure. 809 * 810 * XXX Should have an option to suppress fsync. 811 * 812 * XXX Should have an option to fail on fsync failures. 813 * 814 * XXX Would be nice if we could just do a barrier rather than an 815 * fsync. 816 * 817 * XXX How might we automatically test the fsyncs? 818 */ 819 static void 820 compress_checkpoint(struct compress_state *S) 821 { 822 823 assert(S->blkno < S->n_offsets); 824 const uint32_t n_offsets = (S->blkno + 1); 825 assert(n_offsets <= S->n_offsets); 826 827 assert(S->offset <= OFF_MAX); 828 assert((off_t)S->offset <= lseek(S->cloop2_fd, 0, SEEK_CUR)); 829 830 /* Make sure the data hits the disk before we say it's ready. */ 831 if (fsync_range(S->cloop2_fd, (FFILESYNC | FDISKSYNC), 0, S->offset) 832 == -1) 833 warn_ss("fsync of output failed"); 834 835 /* Say the data blocks are ready. */ 836 offtab_checkpoint(&S->offtab, n_offsets, 837 (S->n_checkpointed_blocks == 0? OFFTAB_CHECKPOINT_SYNC : 0)); 838 839 /* 840 * If this is the first checkpoint, initialize the header. 841 * Signal handler can race with main code here, but it is 842 * harmless -- just an extra fsync and write of the header, 843 * which are both idempotent. 844 * 845 * Once we have synchronously checkpointed the offset table, 846 * subsequent writes will preserve a valid state. 847 */ 848 if (S->n_checkpointed_blocks == 0) { 849 static const struct cloop2_header zero_header; 850 struct cloop2_header header = zero_header; 851 852 /* Format the header. */ 853 __CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic)); 854 (void)memcpy(header.cl2h_magic, cloop2_magic, 855 sizeof(cloop2_magic)); 856 header.cl2h_blocksize = htobe32(S->blocksize); 857 header.cl2h_n_blocks = htobe32(S->n_blocks); 858 859 /* Write the header. */ 860 const ssize_t h_written = pwrite(S->cloop2_fd, &header, 861 sizeof(header), 0); 862 if (h_written == -1) 863 err_ss(1, "write header"); 864 assert(h_written >= 0); 865 if ((size_t)h_written != sizeof(header)) 866 errx_ss(1, "partial write of header: %zu != %zu", 867 (size_t)h_written, sizeof(header)); 868 } 869 870 /* Record how many blocks we've checkpointed. */ 871 { 872 sigset_t old_sigmask; 873 block_signals(&old_sigmask); 874 S->n_checkpointed_blocks = S->blkno; 875 restore_sigmask(&old_sigmask); 876 } 877 } 878 879 /* 880 * Release everything we allocated in compress_init. 881 */ 882 static void 883 compress_exit(struct compress_state *S) 884 { 885 886 /* Done with the offset table. Destroy it. */ 887 offtab_destroy(&S->offtab); 888 889 /* Done with the files. Close them. */ 890 if (close(S->cloop2_fd) == -1) 891 warn("close(cloop2 fd)"); 892 if (close(S->image_fd) == -1) 893 warn("close(image fd)"); 894 } 895