xref: /netbsd-src/usr.bin/vndcompress/vndcompress.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /*	$NetBSD: vndcompress.c,v 1.24 2014/01/25 15:31:06 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2013 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Taylor R. Campbell.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: vndcompress.c,v 1.24 2014/01/25 15:31:06 riastradh Exp $");
34 
35 #include <sys/endian.h>
36 
37 #include <assert.h>
38 #include <err.h>
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <inttypes.h>
42 #include <limits.h>
43 #include <signal.h>
44 #include <stdbool.h>
45 #include <stdint.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <zlib.h>
51 
52 #include "common.h"
53 #include "offtab.h"
54 #include "utils.h"
55 
56 /*
57  * XXX Switch to control bug-for-bug byte-for-byte compatibility with
58  * NetBSD's vndcompress.
59  */
60 #define	VNDCOMPRESS_COMPAT	0
61 
62 __CTASSERT(sizeof(struct cloop2_header) == CLOOP2_OFFSET_TABLE_OFFSET);
63 
64 struct compress_state {
65 	uint64_t	size;		/* uncompressed size */
66 	uint64_t	offset;		/* output byte offset */
67 	uint32_t	blocksize;	/* bytes per block */
68 	uint32_t	blkno;		/* input block number */
69 	uint32_t	n_full_blocks;	/* floor(size/blocksize) */
70 	uint32_t	n_blocks;	/* ceiling(size/blocksize) */
71 	uint32_t	n_offsets;	/* n_blocks + 1 */
72 	uint32_t	end_block;	/* last block to transfer */
73 	uint32_t	checkpoint_blocks;	/* blocks before checkpoint */
74 	int		image_fd;
75 	int		cloop2_fd;
76 	struct offtab	offtab;
77 	uint32_t	n_checkpointed_blocks;
78 	volatile sig_atomic_t
79 			initialized;	/* everything above initialized?  */
80 };
81 
82 /* Global compression state for SIGINFO handler.  */
83 static struct compress_state	global_state;
84 
85 struct sigdesc {
86 	int sd_signo;
87 	const char *sd_name;
88 };
89 
90 static const struct sigdesc info_signals[] = {
91 	{ SIGINFO, "SIGINFO" },
92 	{ SIGUSR1, "SIGUSR1" },
93 };
94 
95 static const struct sigdesc checkpoint_signals[] = {
96 	{ SIGUSR2, "SIGUSR2" },
97 };
98 
99 static void	init_signals(void);
100 static void	init_signal_handler(int, const struct sigdesc *, size_t,
101 		    void (*)(int));
102 static void	info_signal_handler(int);
103 static void	checkpoint_signal_handler(int);
104 static void	compress_progress(struct compress_state *);
105 static void	compress_init(int, char **, const struct options *,
106 		    struct compress_state *);
107 static bool	compress_restart(struct compress_state *);
108 static uint32_t	compress_block(int, int, uint32_t, uint32_t, uint32_t, void *,
109 		    void *);
110 static void	compress_maybe_checkpoint(struct compress_state *);
111 static void	compress_checkpoint(struct compress_state *);
112 static void	compress_exit(struct compress_state *);
113 
114 /*
115  * Compression entry point.
116  */
117 int
118 vndcompress(int argc, char **argv, const struct options *O)
119 {
120 	struct compress_state *const S = &global_state;
121 
122 	/* Paranoia.  The other fields either have no sentinel or use zero.  */
123 	S->image_fd = -1;
124 	S->cloop2_fd = -1;
125 
126 	/* Set up signal handlers so we can handle SIGINFO ASAP.  */
127 	init_signals();
128 
129 	/*
130 	 * Parse the arguments to initialize our state.
131 	 */
132 	compress_init(argc, argv, O, S);
133 	assert(MIN_BLOCKSIZE <= S->blocksize);
134 	assert(S->blocksize <= MAX_BLOCKSIZE);
135 
136 	/*
137 	 * Allocate compression buffers.
138 	 *
139 	 * Compression may actually expand.  From an overabundance of
140 	 * caution, assume it can expand by at most double.
141 	 *
142 	 * XXX Check and consider tightening this assumption.
143 	 */
144 	__CTASSERT(MAX_BLOCKSIZE <= SIZE_MAX);
145 	void *const uncompbuf = malloc(S->blocksize);
146 	if (uncompbuf == NULL)
147 		err(1, "malloc uncompressed buffer");
148 
149 	/* XXX compression ratio bound */
150 	__CTASSERT(MAX_BLOCKSIZE <= (SIZE_MAX / 2));
151 	void *const compbuf = malloc(2 * (size_t)S->blocksize);
152 	if (compbuf == NULL)
153 		err(1, "malloc compressed buffer");
154 
155 	/*
156 	 * Compress the blocks.  S->blkno specifies the input block
157 	 * we're about to transfer.  S->offset is the current output
158 	 * offset.
159 	 */
160 	while (S->blkno < S->n_blocks) {
161 		/* Report any progress.  */
162 		compress_progress(S);
163 
164 		/* Stop if we've done the requested partial transfer.  */
165 		if ((0 < S->end_block) && (S->end_block <= S->blkno))
166 			goto out;
167 
168 		/* Checkpoint if appropriate.  */
169 		compress_maybe_checkpoint(S);
170 		offtab_prepare_put(&S->offtab, (S->blkno + 1));
171 
172 		/* Choose read size: partial if last block, full if not.  */
173 		const uint32_t readsize = (S->blkno == S->n_full_blocks?
174 		    (S->size % S->blocksize) : S->blocksize);
175 		assert(readsize > 0);
176 		assert(readsize <= S->blocksize);
177 
178 		/* Fail noisily if we might be about to overflow.  */
179 		/* XXX compression ratio bound */
180 		__CTASSERT(MAX_BLOCKSIZE <= (UINTMAX_MAX / 2));
181 		assert(S->offset <= MIN(UINT64_MAX, OFF_MAX));
182 		if ((2 * (uintmax_t)readsize) >
183 		    (MIN(UINT64_MAX, OFF_MAX) - S->offset))
184 			errx(1, "blkno %"PRIu32" may overflow: %ju + 2*%ju",
185 			    S->blkno, (uintmax_t)S->offset,
186 			    (uintmax_t)readsize);
187 
188 		/* Process the block.  */
189 		const uint32_t complen =
190 		    compress_block(S->image_fd, S->cloop2_fd, S->blkno,
191 			S->blocksize, readsize, uncompbuf, compbuf);
192 
193 		/*
194 		 * Signal-atomically update the state to reflect
195 		 * (a) what block number we are now at,
196 		 * (b) how far we are now in the output file, and
197 		 * (c) where the last block ended.
198 		 */
199 		assert(S->blkno <= (UINT32_MAX - 1));
200 		assert(complen <= (MIN(UINT64_MAX, OFF_MAX) - S->offset));
201 		assert((S->blkno + 1) < S->n_offsets);
202 	    {
203 		sigset_t old_sigmask;
204 		block_signals(&old_sigmask);
205 		S->blkno += 1;					/* (a) */
206 		S->offset += complen;				/* (b) */
207 		offtab_put(&S->offtab, S->blkno, S->offset);	/* (c) */
208 		restore_sigmask(&old_sigmask);
209 	    }
210 	}
211 
212 	/* Make sure we're all done. */
213 	assert(S->blkno == S->n_blocks);
214 	assert((S->blkno + 1) == S->n_offsets);
215 
216 	/* Pad to the disk block size.  */
217 	const uint32_t n_extra = (S->offset % DEV_BSIZE);
218 	if (n_extra != 0) {
219 		const uint32_t n_padding = (DEV_BSIZE - n_extra);
220 		/* Reuse compbuf -- guaranteed to be large enough.  */
221 		(void)memset(compbuf, 0, n_padding);
222 		const ssize_t n_written = write(S->cloop2_fd, compbuf,
223 		    n_padding);
224 		if (n_written == -1)
225 			err(1, "write final padding failed");
226 		assert(n_written >= 0);
227 		if ((size_t)n_written != n_padding)
228 			errx(1, "partial write of final padding bytes"
229 			    ": %zu != %"PRIu32,
230 			    (size_t)n_written, n_padding);
231 
232 		/* Account for the extra bytes in the output file.  */
233 		assert(n_padding <= (MIN(UINT64_MAX, OFF_MAX) - S->offset));
234 	    {
235 		sigset_t old_sigmask;
236 		block_signals(&old_sigmask);
237 		S->offset += n_padding;
238 		restore_sigmask(&old_sigmask);
239 	    }
240 	}
241 
242 out:
243 	/* One last checkpoint to commit the offset table.  */
244 	assert(S->offset <= OFF_MAX);
245 	assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR));
246 	compress_checkpoint(S);
247 
248 	/*
249 	 * Free the compression buffers and finalize the compression.
250 	 */
251 	free(compbuf);
252 	free(uncompbuf);
253 	compress_exit(S);
254 
255 	return 0;
256 }
257 
258 /*
259  * Signal cruft.
260  */
261 
262 static void
263 init_signals(void)
264 {
265 
266 	init_signal_handler(SA_RESTART, info_signals,
267 	    __arraycount(info_signals), &info_signal_handler);
268 	init_signal_handler(SA_RESTART, checkpoint_signals,
269 	    __arraycount(checkpoint_signals), &checkpoint_signal_handler);
270 }
271 
272 static void
273 init_signal_handler(int flags, const struct sigdesc *signals, size_t n,
274     void (*handler)(int))
275 {
276 	static const struct sigaction zero_sa;
277 	struct sigaction sa = zero_sa;
278 	size_t i;
279 
280 	(void)sigemptyset(&sa.sa_mask);
281 	for (i = 0; i < n; i++)
282 		(void)sigaddset(&sa.sa_mask, signals[i].sd_signo);
283 	sa.sa_flags = flags;
284 	sa.sa_handler = handler;
285 	for (i = 0; i < n; i++)
286 		if (sigaction(signals[i].sd_signo, &sa, NULL) == -1)
287 			err(1, "sigaction(%s)", signals[i].sd_name);
288 }
289 
290 static void
291 info_signal_handler(int signo __unused)
292 {
293 	/* Save errno.  */
294 	const int error = errno;
295 	struct compress_state *const S = &global_state;
296 	char buf[128];
297 
298 	/* Bail if the state is not yet initialized.  */
299 	if (!S->initialized) {
300 		warnx_ss("initializing");
301 		goto out;
302 	}
303 
304 	/* Carefully calculate our I/O position.  */
305 	assert(S->blocksize > 0);
306 	__CTASSERT(MAX_N_BLOCKS <= (UINT64_MAX / MAX_BLOCKSIZE));
307 	const uint64_t nread = ((uint64_t)S->blkno * (uint64_t)S->blocksize);
308 
309 	assert(S->n_blocks > 0);
310 	__CTASSERT(CLOOP2_OFFSET_TABLE_OFFSET <=
311 	    (UINT64_MAX / sizeof(uint64_t)));
312 	__CTASSERT(MAX_N_BLOCKS <= ((UINT64_MAX / sizeof(uint64_t)) -
313 		CLOOP2_OFFSET_TABLE_OFFSET));
314 	const uint64_t nwritten = (S->offset <= (CLOOP2_OFFSET_TABLE_OFFSET +
315 		((uint64_t)S->n_blocks * sizeof(uint64_t)))?
316 	    0 : S->offset);
317 
318 	/* snprintf_ss can't do floating-point, so do fixed-point instead.  */
319 	const uint64_t ratio_percent =
320 	    (nread > 0?
321 		((nwritten >= (UINT64_MAX / 100)) ?
322 		    ((nwritten / nread) * 100) : ((nwritten * 100) / nread))
323 		: 0);
324 
325 	/* Format the status.  */
326 	assert(S->n_checkpointed_blocks <= (UINT64_MAX / S->blocksize));
327 	const int n = snprintf_ss(buf, sizeof(buf),
328 	    "vndcompress: read %"PRIu64" bytes, wrote %"PRIu64" bytes, "
329 	    "compression ratio %"PRIu64"%% (checkpointed %"PRIu64" bytes)\n",
330 	    nread, nwritten, ratio_percent,
331 	    ((uint64_t)S->n_checkpointed_blocks * (uint64_t)S->blocksize));
332 	if (n < 0) {
333 		const char msg[] = "vndcompress: can't format info\n";
334 		(void)write(STDERR_FILENO, msg, __arraycount(msg));
335 	} else {
336 		__CTASSERT(INT_MAX <= SIZE_MAX);
337 		(void)write(STDERR_FILENO, buf, (size_t)n);
338 	}
339 
340 out:
341 	/* Restore errno.  */
342 	errno = error;
343 }
344 
345 static void
346 checkpoint_signal_handler(int signo __unused)
347 {
348 	/* Save errno.  */
349 	const int error = errno;
350 	struct compress_state *const S = &global_state;
351 
352 	/* Bail if the state is not yet initialized.  */
353 	if (!S->initialized) {
354 		warnx_ss("nothing to checkpoint yet");
355 		goto out;
356 	}
357 
358 	assert(S->image_fd >= 0);
359 	assert(S->cloop2_fd >= 0);
360 
361 	/* Take a checkpoint.  */
362 	assert(S->blocksize > 0);
363 	assert(S->blkno <= (UINT64_MAX / S->blocksize));
364 	warnx_ss("checkpointing %"PRIu64" bytes",
365 	    ((uint64_t)S->blkno * (uint64_t)S->blocksize));
366 	compress_checkpoint(S);
367 
368 out:
369 	/* Restore errno.  */
370 	errno = error;
371 }
372 
373 /*
374  * Report progress.
375  *
376  * XXX Should do a progress bar here.
377  */
378 static void
379 compress_progress(struct compress_state *S __unused)
380 {
381 }
382 
383 /*
384  * Parse arguments, open the files, and initialize the state.
385  */
386 static void
387 compress_init(int argc, char **argv, const struct options *O,
388     struct compress_state *S)
389 {
390 
391 	if (!((argc == 2) || (argc == 3)))
392 		usage();
393 
394 	const char *const image_pathname = argv[0];
395 	const char *const cloop2_pathname = argv[1];
396 
397 	/* Grab the block size either from `-b' or from the last argument.  */
398 	__CTASSERT(0 < DEV_BSIZE);
399 	__CTASSERT((MIN_BLOCKSIZE % DEV_BSIZE) == 0);
400 	__CTASSERT(MIN_BLOCKSIZE <= DEF_BLOCKSIZE);
401 	__CTASSERT((DEF_BLOCKSIZE % DEV_BSIZE) == 0);
402 	__CTASSERT(DEF_BLOCKSIZE <= MAX_BLOCKSIZE);
403 	__CTASSERT((MAX_BLOCKSIZE % DEV_BSIZE) == 0);
404 	if (ISSET(O->flags, FLAG_b)) {
405 		if (argc == 3) {
406 			warnx("use -b or the extra argument, not both");
407 			usage();
408 		}
409 		S->blocksize = O->blocksize;
410 	} else {
411 		S->blocksize = (argc == 2? DEF_BLOCKSIZE :
412 		    strsuftoll("block size", argv[2], MIN_BLOCKSIZE,
413 			MAX_BLOCKSIZE));
414 	}
415 
416 	/* Sanity-check the blocksize.  (strsuftoll guarantees bounds.)  */
417 	__CTASSERT(DEV_BSIZE <= UINT32_MAX);
418 	if ((S->blocksize % DEV_BSIZE) != 0)
419 		errx(1, "bad blocksize: %"PRIu32
420 		    " (not a multiple of %"PRIu32")",
421 		    S->blocksize, (uint32_t)DEV_BSIZE);
422 	assert(MIN_BLOCKSIZE <= S->blocksize);
423 	assert((S->blocksize % DEV_BSIZE) == 0);
424 	assert(S->blocksize <= MAX_BLOCKSIZE);
425 
426 	/* Grab the end block number if we have one.  */
427 	S->end_block = (ISSET(O->flags, FLAG_p)? O->end_block : 0);
428 
429 	/* Grab the checkpoint block count, if we have one.  */
430 	S->checkpoint_blocks =
431 	    (ISSET(O->flags, FLAG_k)? O->checkpoint_blocks : 0);
432 
433 	/* Open the input image file and the output cloop2 file.  */
434 	S->image_fd = open(image_pathname, O_RDONLY);
435 	if (S->image_fd == -1)
436 		err(1, "open(%s)", image_pathname);
437 
438 	int oflags;
439 	if (!ISSET(O->flags, FLAG_r))
440 		oflags = (O_WRONLY | O_TRUNC | O_CREAT); /* XXX O_EXCL?  */
441 	else if (!ISSET(O->flags, FLAG_R))
442 		oflags = (O_RDWR | O_CREAT);
443 	else
444 		oflags = O_RDWR;
445 	S->cloop2_fd = open(cloop2_pathname, oflags, 0777);
446 	if (S->cloop2_fd == -1)
447 		err(1, "open(%s)", cloop2_pathname);
448 
449 	/* Find the size of the input image.  */
450 	if (ISSET(O->flags, FLAG_l)) {
451 		S->size = O->length;
452 	} else {
453 		static const struct stat zero_st;
454 		struct stat st = zero_st;
455 		if (fstat(S->image_fd, &st) == -1)
456 			err(1, "stat(%s)", image_pathname);
457 		if (st.st_size <= 0)
458 			errx(1, "unknown image size");
459 		assert(st.st_size >= 0);
460 		__CTASSERT(OFF_MAX <= UINT64_MAX);
461 		assert(__type_fit(uint64_t, st.st_size));
462 		S->size = st.st_size;
463 	}
464 	assert(S->size <= OFF_MAX);
465 
466 	/* Find number of full blocks and whether there's a partial block.  */
467 	S->n_full_blocks = (S->size / S->blocksize);
468 	assert(S->n_full_blocks <=
469 	    (UINT32_MAX - ((S->size % S->blocksize) > 0)));
470 	S->n_blocks = (S->n_full_blocks + ((S->size % S->blocksize) > 0));
471 	assert(S->n_full_blocks <= S->n_blocks);
472 
473 	if (S->n_blocks > MAX_N_BLOCKS)
474 		errx(1, "image too large for block size %"PRIu32": %"PRIu64,
475 		    S->blocksize, S->size);
476 	assert(S->n_blocks <= MAX_N_BLOCKS);
477 
478 	/* Choose a window size.  */
479 	const uint32_t window_size = (ISSET(O->flags, FLAG_w)? O->window_size :
480 	    DEF_WINDOW_SIZE);
481 
482 	/* Create an offset table for the blocks; one extra for the end.  */
483 	__CTASSERT(MAX_N_BLOCKS <= (UINT32_MAX - 1));
484 	S->n_offsets = (S->n_blocks + 1);
485 	__CTASSERT(MAX_N_OFFSETS == (MAX_N_BLOCKS + 1));
486 	__CTASSERT(MAX_N_OFFSETS <= (SIZE_MAX / sizeof(uint64_t)));
487 	offtab_init(&S->offtab, S->n_offsets, window_size, S->cloop2_fd,
488 	    CLOOP2_OFFSET_TABLE_OFFSET);
489 
490 	/* Attempt to restart a partial transfer if requested.  */
491 	if (ISSET(O->flags, FLAG_r)) {
492 		if (compress_restart(S)) {
493 			/*
494 			 * Restart succeeded.  Truncate the output
495 			 * here, in case any garbage got appended.  We
496 			 * are committed to making progress at this
497 			 * point.  If the ftruncate fails, we don't
498 			 * lose anything valuable -- this is the last
499 			 * point at which we can restart anyway.
500 			 */
501 			if (ftruncate(S->cloop2_fd, S->offset) == -1)
502 				err(1, "ftruncate failed");
503 
504 			/* All set!  No more initialization to do.  */
505 			return;
506 		} else {
507 			/* Restart failed.  Barf now if requested.  */
508 			if (ISSET(O->flags, FLAG_R))
509 				errx(1, "restart failed, aborting");
510 
511 			/* Otherwise, truncate and start at the top.  */
512 			if (ftruncate(S->cloop2_fd, 0) == -1)
513 				err(1, "truncate failed");
514 			if (lseek(S->cloop2_fd, 0, SEEK_SET) == -1)
515 				err(1, "lseek to cloop2 beginning failed");
516 			if (lseek(S->image_fd, 0, SEEK_SET) == -1)
517 				err(1, "lseek to image beginning failed");
518 		}
519 	}
520 
521 	/* Write a bogus (zero) header for now, until we checkpoint.  */
522 	static const struct cloop2_header zero_header;
523 	const ssize_t h_written = write(S->cloop2_fd, &zero_header,
524 	    sizeof(zero_header));
525 	if (h_written == -1)
526 		err(1, "write header");
527 	assert(h_written >= 0);
528 	if ((size_t)h_written != sizeof(zero_header))
529 		errx(1, "partial write of header: %zu != %zu",
530 		    (size_t)h_written, sizeof(zero_header));
531 
532 	/* Reset the offset table to be empty and write it.  */
533 	offtab_reset_write(&S->offtab);
534 
535 	/* Start at the beginning of the image.  */
536 	S->blkno = 0;
537 	S->offset = (sizeof(struct cloop2_header) +
538 	    ((uint64_t)S->n_offsets * sizeof(uint64_t)));
539 	S->n_checkpointed_blocks = 0;
540 
541 	/* Good to go and ready for interruption by a signal.  */
542 	S->initialized = 1;
543 }
544 
545 /*
546  * Try to recover state from an existing output file.
547  *
548  * On success, fill the offset table with what's in the file, set
549  * S->blkno and S->offset to reflect our position, and seek to the
550  * respective positions in the input and output files.
551  *
552  * On failure, return false.  May clobber the offset table, S->blkno,
553  * S->offset, and the file pointers.
554  */
555 static bool
556 compress_restart(struct compress_state *S)
557 {
558 
559 	/* Read in the header.  */
560 	static const struct cloop2_header zero_header;
561 	struct cloop2_header header = zero_header;
562 
563 	const ssize_t h_read = read_block(S->cloop2_fd, &header,
564 	    sizeof(header));
565 	if (h_read == -1) {
566 		warn("failed to read header");
567 		return false;
568 	}
569 	assert(h_read >= 0);
570 	if ((size_t)h_read != sizeof(header)) {
571 		warnx("partial read of header");
572 		return false;
573 	}
574 
575 	/* Check that the header looks like a header.  */
576 	__CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic));
577 	if (memcmp(header.cl2h_magic, cloop2_magic, sizeof(cloop2_magic))
578 	    != 0) {
579 		warnx("bad cloop2 shell script magic");
580 		return false;
581 	}
582 
583 	/* Check the header parameters.  */
584 	if (be32toh(header.cl2h_blocksize) != S->blocksize) {
585 		warnx("mismatched block size: %"PRIu32
586 		    " (expected %"PRIu32")",
587 		    be32toh(header.cl2h_blocksize), S->blocksize);
588 		return false;
589 	}
590 	if (be32toh(header.cl2h_n_blocks) != S->n_blocks) {
591 		warnx("mismatched number of blocks: %"PRIu32
592 		    " (expected %"PRIu32")",
593 		    be32toh(header.cl2h_n_blocks), S->n_blocks);
594 		return false;
595 	}
596 
597 	/* Read in the partial offset table.  */
598 	if (!offtab_reset_read(&S->offtab, &warn, &warnx))
599 		return false;
600 	if (!offtab_prepare_get(&S->offtab, 0))
601 		return false;
602 	const uint64_t first_offset = offtab_get(&S->offtab, 0);
603 	const uint64_t expected = sizeof(struct cloop2_header) +
604 	    ((uint64_t)S->n_offsets * sizeof(uint64_t));
605 	if (first_offset != expected) {
606 		warnx("first offset is not 0x%"PRIx64": 0x%"PRIx64,
607 		    expected, first_offset);
608 		return false;
609 	}
610 
611 	/* Find where we left off.  */
612 	__CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
613 	uint32_t blkno = 0;
614 	uint64_t last_offset = first_offset;
615 	for (blkno = 0; blkno < S->n_blocks; blkno++) {
616 		if (!offtab_prepare_get(&S->offtab, blkno))
617 			return false;
618 		const uint64_t offset = offtab_get(&S->offtab, blkno);
619 		if (offset == ~(uint64_t)0)
620 			break;
621 
622 		if (0 < blkno) {
623 			const uint64_t start = last_offset;
624 			const uint64_t end = offset;
625 			if (end <= start) {
626 				warnx("bad offset table: 0x%"PRIx64
627 				    ", 0x%"PRIx64, start, end);
628 				return false;
629 			}
630 			/* XXX compression ratio bound */
631 			__CTASSERT(MAX_BLOCKSIZE <= (SIZE_MAX / 2));
632 			if ((2 * (size_t)S->blocksize) <= (end - start)) {
633 				warnx("block %"PRIu32" too large:"
634 				    " %"PRIu64" bytes"
635 				    " from 0x%"PRIx64" to 0x%"PRIx64,
636 				    blkno, (end - start), start, end);
637 				return false;
638 			}
639 		}
640 
641 		last_offset = offset;
642 	}
643 
644 	if (blkno == 0) {
645 		warnx("no blocks were written; nothing to restart");
646 		return false;
647 	}
648 
649 	/* Make sure the rest of the offset table is all ones.  */
650 	if (blkno < S->n_blocks) {
651 		uint32_t nblkno;
652 
653 		for (nblkno = blkno; nblkno < S->n_blocks; nblkno++) {
654 			if (!offtab_prepare_get(&S->offtab, nblkno))
655 				return false;
656 			const uint64_t offset = offtab_get(&S->offtab, nblkno);
657 			if (offset != ~(uint64_t)0) {
658 				warnx("bad partial offset table entry"
659 				    " at %"PRIu32": 0x%"PRIx64,
660 				    nblkno, offset);
661 				return false;
662 			}
663 		}
664 	}
665 
666 	/*
667 	 * XXX Consider decompressing some number of blocks to make
668 	 * sure they match.
669 	 */
670 
671 	/* Back up by one.  */
672 	assert(1 <= blkno);
673 	blkno -= 1;
674 
675 	/* Seek to the input position.  */
676 	assert(S->size <= OFF_MAX);
677 	assert(blkno <= (S->size / S->blocksize));
678 	const off_t restart_position = ((off_t)blkno * (off_t)S->blocksize);
679 	assert(0 <= restart_position);
680 	assert(restart_position <= (off_t)S->size);
681 	if (lseek(S->image_fd, restart_position, SEEK_SET) == -1) {
682 		if (errno != ESPIPE) {
683 			warn("lseek input image failed");
684 			return false;
685 		}
686 
687 		/* Try read instead of lseek for a pipe/socket/fifo.  */
688 		void *const buffer = malloc(0x10000);
689 		if (buffer == NULL)
690 			err(1, "malloc temporary buffer");
691 		off_t left = restart_position;
692 		while (left > 0) {
693 			const size_t size = MIN(0x10000, left);
694 			const ssize_t n_read = read_block(S->image_fd, buffer,
695 			    size);
696 			if (n_read == -1) {
697 				free(buffer);
698 				warn("read of input image failed");
699 				return false;
700 			}
701 			assert(n_read >= 0);
702 			if ((size_t)n_read != size) {
703 				free(buffer);
704 				warnx("partial read of input image");
705 				return false;
706 			}
707 			assert((off_t)size <= left);
708 			left -= size;
709 		}
710 		free(buffer);
711 	}
712 
713 	/* Seek to the output position.  */
714 	assert(last_offset <= OFF_MAX);
715 	if (lseek(S->cloop2_fd, last_offset, SEEK_SET) == -1) {
716 		warn("lseek output cloop2 to %"PRIx64" failed", last_offset);
717 		return false;
718 	}
719 
720 	/* Switch from reading to writing the offset table.  */
721 	if (!offtab_transmogrify_read_to_write(&S->offtab, blkno))
722 		return false;
723 
724 	/* Start where we left off.  */
725 	S->blkno = blkno;
726 	S->offset = last_offset;
727 	S->n_checkpointed_blocks = blkno;
728 
729 	/* Good to go and ready for interruption by a signal.  */
730 	S->initialized = 1;
731 
732 	/* Success!  */
733 	return true;
734 }
735 
736 /*
737  * Read a single block, compress it, and write the compressed block.
738  * Return the size of the compressed block.
739  */
740 static uint32_t
741 compress_block(int in_fd, int out_fd, uint32_t blkno, uint32_t blocksize,
742     uint32_t readsize, void *uncompbuf, void *compbuf)
743 {
744 
745 	assert(readsize <= blocksize);
746 	assert(blocksize <= MAX_BLOCKSIZE);
747 
748 	/* Read the uncompressed block.  */
749 	const ssize_t n_read = read_block(in_fd, uncompbuf, readsize);
750 	if (n_read == -1)
751 		err(1, "read block %"PRIu32, blkno);
752 	assert(n_read >= 0);
753 	if ((size_t)n_read != readsize)
754 		errx(1, "partial read of block %"PRIu32": %zu != %"PRIu32,
755 		    blkno, (size_t)n_read, readsize);
756 
757 	/* Compress the block.  */
758 	/* XXX compression ratio bound */
759 	__CTASSERT(MAX_BLOCKSIZE <= (ULONG_MAX / 2));
760 	const unsigned long uncomplen =
761 	    (VNDCOMPRESS_COMPAT? blocksize : readsize); /* XXX */
762 	unsigned long complen = (uncomplen * 2);
763 	const int zerror = compress2(compbuf, &complen, uncompbuf, uncomplen,
764 	    Z_BEST_COMPRESSION);
765 	if (zerror != Z_OK)
766 		errx(1, "compressed failed at block %"PRIu32" (%d): %s", blkno,
767 		    zerror, zError(zerror));
768 	assert(complen <= (uncomplen * 2));
769 
770 	/* Write the compressed block.  */
771 	const ssize_t n_written = write(out_fd, compbuf, complen);
772 	if (n_written == -1)
773 		err(1, "write block %"PRIu32, blkno);
774 	assert(n_written >= 0);
775 	if ((size_t)n_written != complen)
776 		errx(1, "partial write of block %"PRIu32": %zu != %lu",
777 		    blkno, (size_t)n_written, complen);
778 
779 	return (size_t)n_written;
780 }
781 
782 /*
783  * Checkpoint if appropriate.
784  */
785 static void
786 compress_maybe_checkpoint(struct compress_state *S)
787 {
788 
789 	if ((0 < S->checkpoint_blocks) && (0 < S->blkno) &&
790 	    ((S->blkno % S->checkpoint_blocks) == 0)) {
791 		assert(S->offset <= OFF_MAX);
792 		assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR));
793 		compress_checkpoint(S);
794 	}
795 }
796 
797 /*
798  * Write the prefix of the offset table that we have filled so far.
799  *
800  * We fsync the data blocks we have written, and then write the offset
801  * table, and then fsync the offset table and file metadata.  This
802  * should help to avoid offset tables that point at garbage data.
803  *
804  * This may be called from a signal handler, so it must not use stdio,
805  * malloc, &c. -- it may only (a) handle signal-safe state in S, and
806  * (b) do file descriptor I/O / fsync.
807  *
808  * XXX This requires further thought and heavy testing to be sure.
809  *
810  * XXX Should have an option to suppress fsync.
811  *
812  * XXX Should have an option to fail on fsync failures.
813  *
814  * XXX Would be nice if we could just do a barrier rather than an
815  * fsync.
816  *
817  * XXX How might we automatically test the fsyncs?
818  */
819 static void
820 compress_checkpoint(struct compress_state *S)
821 {
822 
823 	assert(S->blkno < S->n_offsets);
824 	const uint32_t n_offsets = (S->blkno + 1);
825 	assert(n_offsets <= S->n_offsets);
826 
827 	assert(S->offset <= OFF_MAX);
828 	assert((off_t)S->offset <= lseek(S->cloop2_fd, 0, SEEK_CUR));
829 
830 	/* Make sure the data hits the disk before we say it's ready.  */
831 	if (fsync_range(S->cloop2_fd, (FFILESYNC | FDISKSYNC), 0, S->offset)
832 	    == -1)
833 		warn_ss("fsync of output failed");
834 
835 	/* Say the data blocks are ready.  */
836 	offtab_checkpoint(&S->offtab, n_offsets,
837 	    (S->n_checkpointed_blocks == 0? OFFTAB_CHECKPOINT_SYNC : 0));
838 
839 	/*
840 	 * If this is the first checkpoint, initialize the header.
841 	 * Signal handler can race with main code here, but it is
842 	 * harmless -- just an extra fsync and write of the header,
843 	 * which are both idempotent.
844 	 *
845 	 * Once we have synchronously checkpointed the offset table,
846 	 * subsequent writes will preserve a valid state.
847 	 */
848 	if (S->n_checkpointed_blocks == 0) {
849 		static const struct cloop2_header zero_header;
850 		struct cloop2_header header = zero_header;
851 
852 		/* Format the header.  */
853 		__CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic));
854 		(void)memcpy(header.cl2h_magic, cloop2_magic,
855 		    sizeof(cloop2_magic));
856 		header.cl2h_blocksize = htobe32(S->blocksize);
857 		header.cl2h_n_blocks = htobe32(S->n_blocks);
858 
859 		/* Write the header.  */
860 		const ssize_t h_written = pwrite(S->cloop2_fd, &header,
861 		    sizeof(header), 0);
862 		if (h_written == -1)
863 			err_ss(1, "write header");
864 		assert(h_written >= 0);
865 		if ((size_t)h_written != sizeof(header))
866 			errx_ss(1, "partial write of header: %zu != %zu",
867 			    (size_t)h_written, sizeof(header));
868 	}
869 
870 	/* Record how many blocks we've checkpointed.  */
871     {
872 	sigset_t old_sigmask;
873 	block_signals(&old_sigmask);
874 	S->n_checkpointed_blocks = S->blkno;
875 	restore_sigmask(&old_sigmask);
876     }
877 }
878 
879 /*
880  * Release everything we allocated in compress_init.
881  */
882 static void
883 compress_exit(struct compress_state *S)
884 {
885 
886 	/* Done with the offset table.  Destroy it.  */
887 	offtab_destroy(&S->offtab);
888 
889 	/* Done with the files.  Close them.  */
890 	if (close(S->cloop2_fd) == -1)
891 		warn("close(cloop2 fd)");
892 	if (close(S->image_fd) == -1)
893 		warn("close(image fd)");
894 }
895