xref: /netbsd-src/usr.bin/vndcompress/vndcompress.c (revision 6a493d6bc668897c91594964a732d38505b70cbb)
1 /*	$NetBSD: vndcompress.c,v 1.13 2013/05/06 22:53:24 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2013 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Taylor R. Campbell.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: vndcompress.c,v 1.13 2013/05/06 22:53:24 riastradh Exp $");
34 
35 #include <sys/endian.h>
36 
37 #include <assert.h>
38 #include <err.h>
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <inttypes.h>
42 #include <limits.h>
43 #include <signal.h>
44 #include <stdbool.h>
45 #include <stdint.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <zlib.h>
51 
52 /* XXX Seems to be missing from <stdio.h>...  */
53 int	snprintf_ss(char *restrict, size_t, const char *restrict, ...)
54 	    __printflike(3, 4);
55 int	vsnprintf_ss(char *restrict, size_t, const char *restrict, va_list)
56 	    __printflike(3, 0);
57 
58 #include "common.h"
59 
60 /*
61  * XXX Switch to control bug-for-bug byte-for-byte compatibility with
62  * NetBSD's vndcompress.
63  */
64 #define	VNDCOMPRESS_COMPAT	0
65 
66 __CTASSERT(sizeof(struct cloop2_header) == CLOOP2_OFFSET_TABLE_OFFSET);
67 
68 struct compress_state {
69 	uint64_t	size;		/* uncompressed size */
70 	uint64_t	offset;		/* output byte offset */
71 	uint32_t	blocksize;	/* bytes per block */
72 	uint32_t	blkno;		/* input block number */
73 	uint32_t	n_full_blocks;	/* floor(size/blocksize) */
74 	uint32_t	n_blocks;	/* ceiling(size/blocksize) */
75 	uint32_t	n_offsets;	/* n_blocks + 1 */
76 	uint32_t	end_block;	/* last block to transfer */
77 	uint32_t	checkpoint_blocks;	/* blocks before checkpoint */
78 	int		image_fd;
79 	int		cloop2_fd;
80 	uint64_t	*offset_table;
81 	uint32_t	n_checkpointed_blocks;
82 	volatile sig_atomic_t
83 			initialized;	/* everything above initialized?  */
84 };
85 
86 /* Global compression state for SIGINFO handler.  */
87 static struct compress_state	global_state;
88 
89 struct sigdesc {
90 	int sd_signo;
91 	const char *sd_name;
92 };
93 
94 static const struct sigdesc info_signals[] = {
95 	{ SIGINFO, "SIGINFO" },
96 	{ SIGUSR1, "SIGUSR1" },
97 };
98 
99 static const struct sigdesc checkpoint_signals[] = {
100 	{ SIGUSR2, "SIGUSR2" },
101 };
102 
103 static void	init_signals(void);
104 static void	init_signal_handler(int, const struct sigdesc *, size_t,
105 		    void (*)(int));
106 static void	info_signal_handler(int);
107 static void	checkpoint_signal_handler(int);
108 static void	block_signals(sigset_t *);
109 static void	restore_sigmask(const sigset_t *);
110 static void	compress_progress(struct compress_state *);
111 static void	compress_init(int, char **, const struct options *,
112 		    struct compress_state *);
113 static bool	compress_restart(struct compress_state *);
114 static uint32_t	compress_block(int, int, uint32_t, uint32_t, uint32_t, void *,
115 		    void *);
116 static void	compress_maybe_checkpoint(struct compress_state *);
117 static void	compress_checkpoint(struct compress_state *);
118 static void	compress_exit(struct compress_state *);
119 static ssize_t	read_block(int, void *, size_t);
120 static void	err_ss(int, const char *) __dead;
121 static void	errx_ss(int, const char *, ...) __printflike(2, 3) __dead;
122 static void	warn_ss(const char *);
123 static void	warnx_ss(const char *, ...) __printflike(1, 2);
124 static void	vwarnx_ss(const char *, va_list) __printflike(1, 0);
125 
126 /*
127  * Compression entry point.
128  */
129 int
130 vndcompress(int argc, char **argv, const struct options *O)
131 {
132 	struct compress_state *const S = &global_state;
133 
134 	/* Paranoia.  The other fields either have no sentinel or use zero.  */
135 	S->image_fd = -1;
136 	S->cloop2_fd = -1;
137 
138 	/* Set up signal handlers so we can handle SIGINFO ASAP.  */
139 	init_signals();
140 
141 	/*
142 	 * Parse the arguments to initialize our state.
143 	 */
144 	compress_init(argc, argv, O, S);
145 	assert(MIN_BLOCKSIZE <= S->blocksize);
146 	assert(S->blocksize <= MAX_BLOCKSIZE);
147 	assert(S->offset_table != NULL);
148 	assert(S->n_offsets > 0);
149 	assert(S->offset_table[0] == htobe64(sizeof(struct cloop2_header) +
150 		(S->n_offsets * sizeof(uint64_t))));
151 
152 	/*
153 	 * Allocate compression buffers.
154 	 *
155 	 * Compression may actually expand.  From an overabundance of
156 	 * caution, assume it can expand by at most double.
157 	 *
158 	 * XXX Check and consider tightening this assumption.
159 	 */
160 	__CTASSERT(MAX_BLOCKSIZE <= SIZE_MAX);
161 	void *const uncompbuf = malloc(S->blocksize);
162 	if (uncompbuf == NULL)
163 		err(1, "malloc uncompressed buffer");
164 
165 	/* XXX compression ratio bound */
166 	__CTASSERT(MAX_BLOCKSIZE <= (SIZE_MAX / 2));
167 	void *const compbuf = malloc(2 * (size_t)S->blocksize);
168 	if (compbuf == NULL)
169 		err(1, "malloc compressed buffer");
170 
171 	/*
172 	 * Compress the blocks.  S->blkno specifies the input block
173 	 * we're about to transfer.  S->offset is the current output
174 	 * offset.
175 	 */
176 	while (S->blkno < S->n_blocks) {
177 		/* Report any progress.  */
178 		compress_progress(S);
179 
180 		/* Stop if we've done the requested partial transfer.  */
181 		if ((0 < S->end_block) && (S->end_block <= S->blkno))
182 			goto out;
183 
184 		/* Checkpoint if appropriate.  */
185 		compress_maybe_checkpoint(S);
186 
187 		/* Choose read size: partial if last block, full if not.  */
188 		const uint32_t readsize = (S->blkno == S->n_full_blocks?
189 		    (S->size % S->blocksize) : S->blocksize);
190 		assert(readsize > 0);
191 		assert(readsize <= S->blocksize);
192 
193 		/* Fail noisily if we might be about to overflow.  */
194 		/* XXX compression ratio bound */
195 		__CTASSERT(MAX_BLOCKSIZE <= (UINTMAX_MAX / 2));
196 		assert(S->offset <= MIN(UINT64_MAX, OFF_MAX));
197 		if ((2 * (uintmax_t)readsize) >
198 		    (MIN(UINT64_MAX, OFF_MAX) - S->offset))
199 			errx(1, "blkno %"PRIu32" may overflow: %ju + 2*%ju",
200 			    S->blkno, (uintmax_t)S->offset,
201 			    (uintmax_t)readsize);
202 
203 		/* Process the block.  */
204 		const uint32_t complen =
205 		    compress_block(S->image_fd, S->cloop2_fd, S->blkno,
206 			S->blocksize, readsize, uncompbuf, compbuf);
207 
208 		/*
209 		 * Signal-atomically update the state to reflect
210 		 * (a) what block number we are now at,
211 		 * (b) how far we are now in the output file, and
212 		 * (c) where the last block ended.
213 		 */
214 		assert(S->blkno <= (UINT32_MAX - 1));
215 		assert(complen <= (MIN(UINT64_MAX, OFF_MAX) - S->offset));
216 		assert((S->blkno + 1) < S->n_offsets);
217 	    {
218 		sigset_t old_sigmask;
219 		block_signals(&old_sigmask);
220 		S->blkno += 1;					/* (a) */
221 		S->offset += complen;				/* (b) */
222 		S->offset_table[S->blkno] = htobe64(S->offset);	/* (c) */
223 		restore_sigmask(&old_sigmask);
224 	    }
225 	}
226 
227 	/* Make sure we're all done. */
228 	assert(S->blkno == S->n_blocks);
229 	assert((S->blkno + 1) == S->n_offsets);
230 
231 	/* Pad to the disk block size.  */
232 	const uint32_t n_extra = (S->offset % DEV_BSIZE);
233 	if (n_extra != 0) {
234 		const uint32_t n_padding = (DEV_BSIZE - n_extra);
235 		/* Reuse compbuf -- guaranteed to be large enough.  */
236 		(void)memset(compbuf, 0, n_padding);
237 		const ssize_t n_written = write(S->cloop2_fd, compbuf,
238 		    n_padding);
239 		if (n_written == -1)
240 			err(1, "write final padding failed");
241 		assert(n_written >= 0);
242 		if ((size_t)n_written != n_padding)
243 			errx(1, "partial write of final padding bytes"
244 			    ": %zu != %"PRIu32,
245 			    (size_t)n_written, n_padding);
246 
247 		/* Account for the extra bytes in the output file.  */
248 		assert(n_padding <= (MIN(UINT64_MAX, OFF_MAX) - S->offset));
249 	    {
250 		sigset_t old_sigmask;
251 		block_signals(&old_sigmask);
252 		S->offset += n_padding;
253 		restore_sigmask(&old_sigmask);
254 	    }
255 	}
256 
257 out:
258 	/* Commit the offset table.  */
259 	assert(S->offset <= OFF_MAX);
260 	assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR));
261 	compress_checkpoint(S);
262 
263 	/*
264 	 * Free the compression buffers and finalize the compression.
265 	 */
266 	free(compbuf);
267 	free(uncompbuf);
268 	compress_exit(S);
269 
270 	return 0;
271 }
272 
273 /*
274  * Signal cruft.
275  */
276 
277 static void
278 init_signals(void)
279 {
280 
281 	init_signal_handler(SA_RESTART, info_signals,
282 	    __arraycount(info_signals), &info_signal_handler);
283 	init_signal_handler(SA_RESTART, checkpoint_signals,
284 	    __arraycount(checkpoint_signals), &checkpoint_signal_handler);
285 }
286 
287 static void
288 init_signal_handler(int flags, const struct sigdesc *signals, size_t n,
289     void (*handler)(int))
290 {
291 	static const struct sigaction zero_sa;
292 	struct sigaction sa = zero_sa;
293 	size_t i;
294 
295 	(void)sigemptyset(&sa.sa_mask);
296 	for (i = 0; i < n; i++)
297 		(void)sigaddset(&sa.sa_mask, signals[i].sd_signo);
298 	sa.sa_flags = flags;
299 	sa.sa_handler = handler;
300 	for (i = 0; i < n; i++)
301 		if (sigaction(signals[i].sd_signo, &sa, NULL) == -1)
302 			err(1, "sigaction(%s)", signals[i].sd_name);
303 }
304 
305 static void
306 info_signal_handler(int signo __unused)
307 {
308 	/* Save errno.  */
309 	const int error = errno;
310 	struct compress_state *const S = &global_state;
311 	char buf[128];
312 
313 	/* Bail if the state is not yet initialized.  */
314 	if (!S->initialized) {
315 		warnx_ss("initializing");
316 		goto out;
317 	}
318 
319 	/* Carefully calculate our I/O position.  */
320 	assert(S->blocksize > 0);
321 	__CTASSERT(MAX_N_BLOCKS <= (UINT64_MAX / MAX_BLOCKSIZE));
322 	const uint64_t nread = ((uint64_t)S->blkno * (uint64_t)S->blocksize);
323 
324 	assert(S->n_blocks > 0);
325 	__CTASSERT(MAX_N_BLOCKS <= ((UINT64_MAX / sizeof(uint64_t)) -
326 		CLOOP2_OFFSET_TABLE_OFFSET));
327 	const uint64_t nwritten = (S->offset <= (CLOOP2_OFFSET_TABLE_OFFSET +
328 		(S->n_blocks * sizeof(uint64_t)))?
329 	    0 : S->offset);
330 
331 	/* snprintf_ss can't do floating-point, so do fixed-point instead.  */
332 	const uint64_t ratio_percent =
333 	    (nread > 0?
334 		((nwritten >= (UINT64_MAX / 100)) ?
335 		    ((nwritten / nread) * 100) : ((nwritten * 100) / nread))
336 		: 0);
337 
338 	/* Format the status.  */
339 	assert(S->n_checkpointed_blocks <= (UINT64_MAX / S->blocksize));
340 	const int n = snprintf_ss(buf, sizeof(buf),
341 	    "vndcompress: read %"PRIu64" bytes, wrote %"PRIu64" bytes, "
342 	    "compression ratio %"PRIu64"%% (checkpointed %"PRIu64" bytes)\n",
343 	    nread, nwritten, ratio_percent,
344 	    ((uint64_t)S->n_checkpointed_blocks * (uint64_t)S->blocksize));
345 	if (n < 0) {
346 		const char msg[] = "vndcompress: can't format info\n";
347 		(void)write(STDERR_FILENO, msg, __arraycount(msg));
348 	} else {
349 		__CTASSERT(INT_MAX <= SIZE_MAX);
350 		(void)write(STDERR_FILENO, buf, (size_t)n);
351 	}
352 
353 out:
354 	/* Restore errno.  */
355 	errno = error;
356 }
357 
358 static void
359 checkpoint_signal_handler(int signo __unused)
360 {
361 	/* Save errno.  */
362 	const int error = errno;
363 	struct compress_state *const S = &global_state;
364 
365 	/* Bail if the state is not yet initialized.  */
366 	if (!S->initialized) {
367 		warnx_ss("nothing to checkpoint yet");
368 		goto out;
369 	}
370 
371 	assert(S->image_fd >= 0);
372 	assert(S->cloop2_fd >= 0);
373 
374 	/* Take a checkpoint.  */
375 	assert(S->blocksize > 0);
376 	assert(S->blkno <= (UINT64_MAX / S->blocksize));
377 	warnx_ss("checkpointing %"PRIu64" bytes",
378 	    ((uint64_t)S->blkno * (uint64_t)S->blocksize));
379 	compress_checkpoint(S);
380 
381 out:
382 	/* Restore errno.  */
383 	errno = error;
384 }
385 
386 static void
387 block_signals(sigset_t *old_sigmask)
388 {
389 	sigset_t block;
390 
391 	(void)sigfillset(&block);
392 	(void)sigprocmask(SIG_BLOCK, &block, old_sigmask);
393 }
394 
395 static void
396 restore_sigmask(const sigset_t *sigmask)
397 {
398 
399 	(void)sigprocmask(SIG_SETMASK, sigmask, NULL);
400 }
401 
402 /*
403  * Report progress.
404  *
405  * XXX Should do a progress bar here.
406  */
407 static void
408 compress_progress(struct compress_state *S __unused)
409 {
410 }
411 
412 /*
413  * Parse arguments, open the files, and initialize the state.
414  */
415 static void
416 compress_init(int argc, char **argv, const struct options *O,
417     struct compress_state *S)
418 {
419 	uint32_t i;
420 
421 	if (!((argc == 2) || (argc == 3)))
422 		usage();
423 
424 	const char *const image_pathname = argv[0];
425 	const char *const cloop2_pathname = argv[1];
426 
427 	/* Grab the block size either from `-s' or from the last argument.  */
428 	__CTASSERT(0 < DEV_BSIZE);
429 	__CTASSERT((MIN_BLOCKSIZE % DEV_BSIZE) == 0);
430 	__CTASSERT(MIN_BLOCKSIZE <= DEF_BLOCKSIZE);
431 	__CTASSERT((DEF_BLOCKSIZE % DEV_BSIZE) == 0);
432 	__CTASSERT(DEF_BLOCKSIZE <= MAX_BLOCKSIZE);
433 	__CTASSERT((MAX_BLOCKSIZE % DEV_BSIZE) == 0);
434 	if (ISSET(O->flags, FLAG_s)) {
435 		if (argc == 3) {
436 			warnx("use -s or the extra argument, not both");
437 			usage();
438 		}
439 		S->blocksize = O->blocksize;
440 	} else {
441 		S->blocksize = (argc == 2? DEF_BLOCKSIZE :
442 		    strsuftoll("block size", argv[2], MIN_BLOCKSIZE,
443 			MAX_BLOCKSIZE));
444 	}
445 
446 	/* Sanity-check the blocksize.  (strsuftoll guarantees bounds.)  */
447 	__CTASSERT(DEV_BSIZE <= UINT32_MAX);
448 	if ((S->blocksize % DEV_BSIZE) != 0)
449 		errx(1, "bad blocksize: %"PRIu32
450 		    " (not a multiple of %"PRIu32")",
451 		    S->blocksize, (uint32_t)DEV_BSIZE);
452 	assert(MIN_BLOCKSIZE <= S->blocksize);
453 	assert((S->blocksize % DEV_BSIZE) == 0);
454 	assert(S->blocksize <= MAX_BLOCKSIZE);
455 
456 	/* Grab the end block number if we have one.  */
457 	S->end_block = (ISSET(O->flags, FLAG_p)? O->end_block : 0);
458 
459 	/* Grab the checkpoint block count, if we have one.  */
460 	S->checkpoint_blocks =
461 	    (ISSET(O->flags, FLAG_k)? O->checkpoint_blocks : 0);
462 
463 	/* Open the input image file and the output cloop2 file.  */
464 	S->image_fd = open(image_pathname, O_RDONLY);
465 	if (S->image_fd == -1)
466 		err(1, "open(%s)", image_pathname);
467 
468 	int oflags;
469 	if (!ISSET(O->flags, FLAG_r))
470 		oflags = (O_WRONLY | O_TRUNC | O_CREAT); /* XXX O_EXCL?  */
471 	else if (!ISSET(O->flags, FLAG_R))
472 		oflags = (O_RDWR | O_CREAT);
473 	else
474 		oflags = O_RDWR;
475 	S->cloop2_fd = open(cloop2_pathname, oflags, 0777);
476 	if (S->cloop2_fd == -1)
477 		err(1, "open(%s)", cloop2_pathname);
478 
479 	/* Find the size of the input image.  */
480 	if (ISSET(O->flags, FLAG_l)) {
481 		S->size = O->length;
482 	} else {
483 		static const struct stat zero_st;
484 		struct stat st = zero_st;
485 		if (fstat(S->image_fd, &st) == -1)
486 			err(1, "stat(%s)", image_pathname);
487 		if (st.st_size <= 0)
488 			errx(1, "unknown image size");
489 		assert(st.st_size >= 0);
490 		__CTASSERT(OFF_MAX <= UINT64_MAX);
491 		assert(__type_fit(uint64_t, st.st_size));
492 		S->size = st.st_size;
493 	}
494 	assert(S->size <= OFF_MAX);
495 
496 	/* Find number of full blocks and whether there's a partial block.  */
497 	S->n_full_blocks = (S->size / S->blocksize);
498 	assert(S->n_full_blocks <=
499 	    (UINT32_MAX - ((S->size % S->blocksize) > 0)));
500 	S->n_blocks = (S->n_full_blocks + ((S->size % S->blocksize) > 0));
501 	assert(S->n_full_blocks <= S->n_blocks);
502 
503 	if (S->n_blocks > MAX_N_BLOCKS)
504 		errx(1, "image too large for block size %"PRIu32": %"PRIu64,
505 		    S->blocksize, S->size);
506 	assert(S->n_blocks <= MAX_N_BLOCKS);
507 
508 	/* Allocate an offset table for the blocks; one extra for the end.  */
509 	__CTASSERT(MAX_N_BLOCKS <= (UINT32_MAX - 1));
510 	S->n_offsets = (S->n_blocks + 1);
511 	__CTASSERT(MAX_N_OFFSETS == (MAX_N_BLOCKS + 1));
512 	__CTASSERT(MAX_N_OFFSETS <= (SIZE_MAX / sizeof(uint64_t)));
513 	S->offset_table = malloc(S->n_offsets * sizeof(uint64_t));
514 	if (S->offset_table == NULL)
515 		err(1, "malloc offset table");
516 
517 	/* Attempt to restart a partial transfer if requested.  */
518 	if (ISSET(O->flags, FLAG_r)) {
519 		if (compress_restart(S)) {
520 			/*
521 			 * Restart succeeded.  Truncate the output
522 			 * here, in case any garbage got appended.  We
523 			 * are committed to making progress at this
524 			 * point.  If the ftruncate fails, we don't
525 			 * lose anything valuable -- this is the last
526 			 * point at which we can restart anyway.
527 			 */
528 			if (ftruncate(S->cloop2_fd, S->offset) == -1)
529 				err(1, "ftruncate failed");
530 
531 			/* All set!  No more initialization to do.  */
532 			return;
533 		} else {
534 			/* Restart failed.  Barf now if requested.  */
535 			if (ISSET(O->flags, FLAG_R))
536 				errx(1, "restart failed, aborting");
537 
538 			/* Otherwise, truncate and start at the top.  */
539 			if (ftruncate(S->cloop2_fd, 0) == -1)
540 				err(1, "truncate failed");
541 			if (lseek(S->cloop2_fd, 0, SEEK_SET) == -1)
542 				err(1, "lseek to cloop2 beginning failed");
543 			if (lseek(S->image_fd, 0, SEEK_SET) == -1)
544 				err(1, "lseek to image beginning failed");
545 		}
546 	}
547 
548 	/*
549 	 * Initialize the offset table to all ones (except for the
550 	 * fixed first offset) so that we can easily detect where we
551 	 * were interrupted if we want to restart.
552 	 */
553 	__CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
554 	assert(S->n_offsets > 0);
555 	S->offset_table[0] = htobe64(sizeof(struct cloop2_header) +
556 	    (S->n_offsets * sizeof(uint64_t)));
557 	for (i = 1; i < S->n_offsets; i++)
558 		S->offset_table[i] = ~(uint64_t)0;
559 
560 	/* Write a bogus (zero) header for now, until we checkpoint.  */
561 	static const struct cloop2_header zero_header;
562 	const ssize_t h_written = write(S->cloop2_fd, &zero_header,
563 	    sizeof(zero_header));
564 	if (h_written == -1)
565 		err(1, "write header");
566 	assert(h_written >= 0);
567 	if ((size_t)h_written != sizeof(zero_header))
568 		errx(1, "partial write of header: %zu != %zu",
569 		    (size_t)h_written, sizeof(zero_header));
570 
571 	/* Write the initial (empty) offset table.  */
572 	const ssize_t ot_written = write(S->cloop2_fd, S->offset_table,
573 	    (S->n_offsets * sizeof(uint64_t)));
574 	if (ot_written == -1)
575 		err(1, "write initial offset table");
576 	assert(ot_written >= 0);
577 	if ((size_t)ot_written != (S->n_offsets * sizeof(uint64_t)))
578 		errx(1, "partial write of initial offset bytes: %zu <= %zu",
579 		    (size_t)ot_written,
580 		    (size_t)(S->n_offsets * sizeof(uint64_t)));
581 
582 	/* Start at the beginning of the image.  */
583 	S->blkno = 0;
584 	S->offset = (sizeof(struct cloop2_header) +
585 	    (S->n_offsets * sizeof(uint64_t)));
586 	S->n_checkpointed_blocks = 0;
587 
588 	/* Good to go and ready for interruption by a signal.  */
589 	S->initialized = 1;
590 }
591 
592 /*
593  * Try to recover state from an existing output file.
594  *
595  * On success, fill S->offset_table with what's in the file, set
596  * S->blkno and S->offset to reflect our position, and seek to the
597  * respective positions in the input and output files.
598  *
599  * On failure, return false.  May clobber S->offset_table, S->blkno,
600  * S->offset, and the file pointers.
601  */
602 static bool
603 compress_restart(struct compress_state *S)
604 {
605 
606 	/* Read in the header.  */
607 	static const struct cloop2_header zero_header;
608 	struct cloop2_header header = zero_header;
609 
610 	const ssize_t h_read = read_block(S->cloop2_fd, &header,
611 	    sizeof(header));
612 	if (h_read == -1) {
613 		warn("failed to read header");
614 		return false;
615 	}
616 	assert(h_read >= 0);
617 	if ((size_t)h_read != sizeof(header)) {
618 		warnx("partial read of header");
619 		return false;
620 	}
621 
622 	/* Check that the header looks like a header.  */
623 	__CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic));
624 	if (memcmp(header.cl2h_magic, cloop2_magic, sizeof(cloop2_magic))
625 	    != 0) {
626 		warnx("bad cloop2 shell script magic");
627 		return false;
628 	}
629 
630 	/* Check the header parameters.  */
631 	if (be32toh(header.cl2h_blocksize) != S->blocksize) {
632 		warnx("mismatched block size: %"PRIu32
633 		    " (expected %"PRIu32")",
634 		    be32toh(header.cl2h_blocksize), S->blocksize);
635 		return false;
636 	}
637 	if (be32toh(header.cl2h_n_blocks) != S->n_blocks) {
638 		warnx("mismatched number of blocks: %"PRIu32
639 		    " (expected %"PRIu32")",
640 		    be32toh(header.cl2h_n_blocks), S->n_blocks);
641 		return false;
642 	}
643 
644 	/* Read in the partial offset table.  */
645 	const ssize_t ot_read = read_block(S->cloop2_fd, S->offset_table,
646 	    (S->n_offsets * sizeof(uint64_t)));
647 	if (ot_read == -1) {
648 		warn("failed to read offset table");
649 		return false;
650 	}
651 	assert(ot_read >= 0);
652 	if ((size_t)ot_read != (S->n_offsets * sizeof(uint64_t))) {
653 		warnx("partial read of offset table");
654 		return false;
655 	}
656 
657 	if (be64toh(S->offset_table[0]) != (sizeof(struct cloop2_header) +
658 		(S->n_offsets * sizeof(uint64_t)))) {
659 		warnx("first offset is not %"PRIu64": %"PRIu64,
660 		    ((uint64_t)S->n_offsets * sizeof(uint64_t)),
661 		    be64toh(S->offset_table[0]));
662 		return false;
663 	}
664 
665 	/* Find where we left off.  */
666 	__CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
667 	uint32_t blkno = 0;
668 	for (blkno = 0; blkno < S->n_blocks; blkno++) {
669 		if (S->offset_table[blkno] == ~(uint64_t)0)
670 			break;
671 		if (0 < blkno) {
672 			const uint64_t start =
673 			    be64toh(S->offset_table[blkno - 1]);
674 			const uint64_t end = be64toh(S->offset_table[blkno]);
675 			if (end <= start) {
676 				warnx("bad offset table: 0x%"PRIx64
677 				    ", 0x%"PRIx64, start, end);
678 				return false;
679 			}
680 			/* XXX compression ratio bound */
681 			__CTASSERT(MAX_BLOCKSIZE <= (SIZE_MAX / 2));
682 			if ((2 * (size_t)S->blocksize) <= (end - start)) {
683 				warnx("block %"PRIu32" too large:"
684 				    " %"PRIu64" bytes",
685 				    blkno, (end - start));
686 				return false;
687 			}
688 		}
689 	}
690 
691 	if (blkno == 0) {
692 		warnx("no blocks were written; nothing to restart");
693 		return false;
694 	}
695 
696 	/* Make sure the rest of the offset table is all ones.  */
697 	if (blkno < S->n_blocks) {
698 		uint32_t nblkno;
699 
700 		for (nblkno = blkno; nblkno < S->n_blocks; nblkno++) {
701 			if (S->offset_table[nblkno] != ~(uint64_t)0) {
702 				warnx("bad partial offset table entry"
703 				    " at %"PRIu32": %"PRIu64,
704 				    nblkno,
705 				    be64toh(S->offset_table[nblkno]));
706 				return false;
707 			}
708 		}
709 	}
710 
711 	/*
712 	 * XXX Consider decompressing some number of blocks to make
713 	 * sure they match.
714 	 */
715 
716 	/* Back up by one.  */
717 	assert(1 <= blkno);
718 	blkno -= 1;
719 
720 	/* Seek to the input position.  */
721 	assert(S->size <= OFF_MAX);
722 	assert(blkno <= (S->size / S->blocksize));
723 	const off_t restart_position = ((off_t)blkno * (off_t)S->blocksize);
724 	assert(0 <= restart_position);
725 	assert(restart_position <= (off_t)S->size);
726 	if (lseek(S->image_fd, restart_position, SEEK_SET) == -1) {
727 		if (errno != ESPIPE) {
728 			warn("lseek input image failed");
729 			return false;
730 		}
731 
732 		/* Try read instead of lseek for a pipe/socket/fifo.  */
733 		void *const buffer = malloc(0x10000);
734 		if (buffer == NULL)
735 			err(1, "malloc temporary buffer");
736 		off_t left = restart_position;
737 		while (left > 0) {
738 			const size_t size = MIN(0x10000, left);
739 			const ssize_t n_read = read_block(S->image_fd, buffer,
740 			    size);
741 			if (n_read == -1) {
742 				free(buffer);
743 				warn("read of input image failed");
744 				return false;
745 			}
746 			assert(n_read >= 0);
747 			if ((size_t)n_read != size) {
748 				free(buffer);
749 				warnx("partial read of input image");
750 				return false;
751 			}
752 			assert((off_t)size <= left);
753 			left -= size;
754 		}
755 		free(buffer);
756 	}
757 
758 	/* Seek to the output position.  */
759 	const uint64_t offset = be64toh(S->offset_table[blkno]);
760 	assert(offset <= OFF_MAX);
761 	if (lseek(S->cloop2_fd, offset, SEEK_SET) == -1) {
762 		warn("lseek output cloop2 to %"PRIx64" failed",
763 		    S->offset);
764 		return false;
765 	}
766 
767 	/* Start where we left off.  */
768 	S->blkno = blkno;
769 	S->offset = offset;
770 	S->n_checkpointed_blocks = blkno;
771 
772 	/* Good to go and ready for interruption by a signal.  */
773 	S->initialized = 1;
774 
775 	/* Success!  */
776 	return true;
777 }
778 
779 /*
780  * Read a single block, compress it, and write the compressed block.
781  * Return the size of the compressed block.
782  */
783 static uint32_t
784 compress_block(int in_fd, int out_fd, uint32_t blkno, uint32_t blocksize,
785     uint32_t readsize, void *uncompbuf, void *compbuf)
786 {
787 
788 	assert(readsize <= blocksize);
789 	assert(blocksize <= MAX_BLOCKSIZE);
790 
791 	/* Read the uncompressed block.  */
792 	const ssize_t n_read = read_block(in_fd, uncompbuf, readsize);
793 	if (n_read == -1)
794 		err(1, "read block %"PRIu32, blkno);
795 	assert(n_read >= 0);
796 	if ((size_t)n_read != readsize)
797 		errx(1, "partial read of block %"PRIu32": %zu != %"PRIu32,
798 		    blkno, (size_t)n_read, readsize);
799 
800 	/* Compress the block.  */
801 	/* XXX compression ratio bound */
802 	__CTASSERT(MAX_BLOCKSIZE <= (ULONG_MAX / 2));
803 	const unsigned long uncomplen =
804 	    (VNDCOMPRESS_COMPAT? blocksize : readsize); /* XXX */
805 	unsigned long complen = (uncomplen * 2);
806 	const int zerror = compress2(compbuf, &complen, uncompbuf, uncomplen,
807 	    Z_BEST_COMPRESSION);
808 	if (zerror != Z_OK)
809 		errx(1, "compressed failed at block %"PRIu32" (%d): %s", blkno,
810 		    zerror, zError(zerror));
811 	assert(complen <= (uncomplen * 2));
812 
813 	/* Write the compressed block.  */
814 	const ssize_t n_written = write(out_fd, compbuf, complen);
815 	if (n_written == -1)
816 		err(1, "write block %"PRIu32, blkno);
817 	assert(n_written >= 0);
818 	if ((size_t)n_written != complen)
819 		errx(1, "partial write of block %"PRIu32": %zu != %lu",
820 		    blkno, (size_t)n_written, complen);
821 
822 	return (size_t)n_written;
823 }
824 
825 /*
826  * Checkpoint if appropriate.
827  */
828 static void
829 compress_maybe_checkpoint(struct compress_state *S)
830 {
831 
832 	if ((0 < S->checkpoint_blocks) && (0 < S->blkno) &&
833 	    ((S->blkno % S->checkpoint_blocks) == 0)) {
834 		assert(S->offset <= OFF_MAX);
835 		assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR));
836 		compress_checkpoint(S);
837 	}
838 }
839 
840 /*
841  * Write the prefix of the offset table that we have filled so far.
842  *
843  * We fsync the data blocks we have written, and then write the offset
844  * table, and then fsync the offset table and file metadata.  This
845  * should help to avoid offset tables that point at garbage data.
846  *
847  * This may be called from a signal handler, so it must not use stdio,
848  * malloc, &c. -- it may only (a) handle signal-safe state in S, and
849  * (b) do file descriptor I/O / fsync.
850  *
851  * XXX This requires further thought and heavy testing to be sure.
852  *
853  * XXX Should have an option to suppress fsync.
854  *
855  * XXX Should have an option to fail on fsync failures.
856  *
857  * XXX Would be nice if we could just do a barrier rather than an
858  * fsync.
859  *
860  * XXX How might we automatically test the fsyncs?
861  */
862 static void
863 compress_checkpoint(struct compress_state *S)
864 {
865 
866 	assert(S->blkno < S->n_offsets);
867 	const uint32_t n_offsets = (S->blkno + 1);
868 	assert(n_offsets <= S->n_offsets);
869 
870 	assert(S->offset <= OFF_MAX);
871 	assert((off_t)S->offset <= lseek(S->cloop2_fd, 0, SEEK_CUR));
872 
873 	/* Make sure the data hits the disk before we say it's ready.  */
874 	if (fsync_range(S->cloop2_fd, (FFILESYNC | FDISKSYNC), 0, S->offset)
875 	    == -1)
876 		warn_ss("fsync of output failed");
877 
878 	/* Say the data blocks are ready.  */
879 	const ssize_t n_written = pwrite(S->cloop2_fd, S->offset_table,
880 	    (n_offsets * sizeof(uint64_t)), CLOOP2_OFFSET_TABLE_OFFSET);
881 	if (n_written == -1)
882 		err_ss(1, "write partial offset table");
883 	assert(n_written >= 0);
884 	if ((size_t)n_written != (n_offsets * sizeof(uint64_t)))
885 		errx_ss(1, "partial write of partial offset table: %zu != %zu",
886 		    (size_t)n_written,
887 		    (size_t)(n_offsets * sizeof(uint64_t)));
888 
889 	/*
890 	 * If this is the first checkpoint, initialize the header.
891 	 * Signal handler can race with main code here, but it is
892 	 * harmless -- just an extra fsync and write of the header,
893 	 * which are both idempotent.
894 	 */
895 	if (S->n_checkpointed_blocks == 0) {
896 		static const struct cloop2_header zero_header;
897 		struct cloop2_header header = zero_header;
898 
899 		/* Force the offset table to disk before we set the header.  */
900 		if (fsync_range(S->cloop2_fd, (FFILESYNC | FDISKSYNC),
901 			0,
902 			(CLOOP2_OFFSET_TABLE_OFFSET
903 			    + (n_offsets * (sizeof(uint64_t)))))
904 		    == -1)
905 			warn_ss("fsync of offset table failed");
906 
907 		/* Subsequent writes will preserve a valid state.  */
908 
909 		/* Format the header.  */
910 		__CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic));
911 		(void)memcpy(header.cl2h_magic, cloop2_magic,
912 		    sizeof(cloop2_magic));
913 		header.cl2h_blocksize = htobe32(S->blocksize);
914 		header.cl2h_n_blocks = htobe32(S->n_blocks);
915 
916 		/* Write the header.  */
917 		const ssize_t h_written = pwrite(S->cloop2_fd, &header,
918 		    sizeof(header), 0);
919 		if (h_written == -1)
920 			err_ss(1, "write header");
921 		assert(h_written >= 0);
922 		if ((size_t)h_written != sizeof(header))
923 			errx_ss(1, "partial write of header: %zu != %zu",
924 			    (size_t)h_written, sizeof(header));
925 	}
926 
927 	/* Record how many blocks we've checkpointed.  */
928     {
929 	sigset_t old_sigmask;
930 	block_signals(&old_sigmask);
931 	S->n_checkpointed_blocks = S->blkno;
932 	restore_sigmask(&old_sigmask);
933     }
934 }
935 
936 /*
937  * Release everything we allocated in compress_init.
938  */
939 static void
940 compress_exit(struct compress_state *S)
941 {
942 
943 	/* Done with the offset table.  Free it.  */
944 	free(S->offset_table);
945 
946 	/* Done with the files.  Close them.  */
947 	if (close(S->cloop2_fd) == -1)
948 		warn("close(cloop2 fd)");
949 	if (close(S->image_fd) == -1)
950 		warn("close(image fd)");
951 }
952 
953 /*
954  * Read, returning partial data only at end of file.
955  */
956 static ssize_t
957 read_block(int fd, void *buffer, size_t n)
958 {
959 	char *p = buffer, *const end __unused = (p + n);
960 	size_t total_read = 0;
961 
962 	while (n > 0) {
963 		const ssize_t n_read = read(fd, p, n);
964 		if (n_read == -1)
965 			return -1;
966 		assert(n_read >= 0);
967 		if (n_read == 0)
968 			break;
969 
970 		assert((size_t)n_read <= n);
971 		n -= (size_t)n_read;
972 
973 		assert(p <= end);
974 		assert(n_read <= (end - p));
975 		p += (size_t)n_read;
976 
977 		assert((size_t)n_read <= (SIZE_MAX - total_read));
978 		total_read += (size_t)n_read;
979 	}
980 
981 	return total_read;
982 }
983 
984 /*
985  * Signal-safe err/warn utilities.  The errno varieties are limited to
986  * having no format arguments for reasons of laziness.
987  */
988 
989 static void
990 err_ss(int exit_value, const char *msg)
991 {
992 	warn_ss(msg);
993 	_Exit(exit_value);
994 }
995 
996 static void
997 errx_ss(int exit_value, const char *format, ...)
998 {
999 	va_list va;
1000 
1001 	va_start(va, format);
1002 	vwarnx_ss(format, va);
1003 	va_end(va);
1004 	_Exit(exit_value);
1005 }
1006 
1007 static void
1008 warn_ss(const char *msg)
1009 {
1010 	int error = errno;
1011 
1012 	warnx_ss("%s: %s", msg, strerror(error));
1013 
1014 	errno = error;
1015 }
1016 
1017 static void
1018 warnx_ss(const char *format, ...)
1019 {
1020 	va_list va;
1021 
1022 	va_start(va, format);
1023 	vwarnx_ss(format, va);
1024 	va_end(va);
1025 }
1026 
1027 static void
1028 vwarnx_ss(const char *format, va_list va)
1029 {
1030 	char buf[128];
1031 
1032 	(void)strlcpy(buf, getprogname(), sizeof(buf));
1033 	(void)strlcat(buf, ": ", sizeof(buf));
1034 
1035 	const int n = vsnprintf_ss(&buf[strlen(buf)], (sizeof(buf) -
1036 		strlen(buf)), format, va);
1037 	if (n <= 0) {
1038 		const char fallback[] =
1039 		    "vndcompress: Help!  I'm trapped in a signal handler!\n";
1040 		(void)write(STDERR_FILENO, fallback, __arraycount(fallback));
1041 	} else {
1042 		(void)strlcat(buf, "\n", sizeof(buf));
1043 		(void)write(STDERR_FILENO, buf, strlen(buf));
1044 	}
1045 }
1046