xref: /netbsd-src/usr.bin/vndcompress/vndcompress.c (revision 76d4b812519cd793ec395025fc72068d22cb7c23)
1 /*	$NetBSD: vndcompress.c,v 1.29 2017/07/29 21:04:07 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2013 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Taylor R. Campbell.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: vndcompress.c,v 1.29 2017/07/29 21:04:07 riastradh Exp $");
34 
35 #include <sys/endian.h>
36 #include <sys/stat.h>
37 
38 #include <assert.h>
39 #include <err.h>
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <inttypes.h>
43 #include <limits.h>
44 #include <signal.h>
45 #include <stdbool.h>
46 #include <stdint.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <zlib.h>
52 
53 #include "common.h"
54 #include "offtab.h"
55 #include "utils.h"
56 
57 /*
58  * XXX Switch to control bug-for-bug byte-for-byte compatibility with
59  * NetBSD's vndcompress.
60  */
61 #define	VNDCOMPRESS_COMPAT	0
62 
63 __CTASSERT(sizeof(struct cloop2_header) == CLOOP2_OFFSET_TABLE_OFFSET);
64 
65 struct compress_state {
66 	uint64_t	size;		/* uncompressed size */
67 	uint64_t	offset;		/* output byte offset */
68 	uint32_t	blocksize;	/* bytes per block */
69 	uint32_t	blkno;		/* input block number */
70 	uint32_t	n_full_blocks;	/* floor(size/blocksize) */
71 	uint32_t	n_blocks;	/* ceiling(size/blocksize) */
72 	uint32_t	n_offsets;	/* n_blocks + 1 */
73 	uint32_t	end_block;	/* last block to transfer */
74 	uint32_t	checkpoint_blocks;	/* blocks before checkpoint */
75 	int		image_fd;
76 	int		cloop2_fd;
77 	struct offtab	offtab;
78 	uint32_t	n_checkpointed_blocks;
79 	volatile sig_atomic_t
80 			initialized;	/* everything above initialized?  */
81 };
82 
83 /* Global compression state for SIGINFO handler.  */
84 static struct compress_state	global_state;
85 
86 struct sigdesc {
87 	int sd_signo;
88 	const char *sd_name;
89 };
90 
91 static const struct sigdesc info_signals[] = {
92 	{ SIGINFO, "SIGINFO" },
93 	{ SIGUSR1, "SIGUSR1" },
94 };
95 
96 static const struct sigdesc checkpoint_signals[] = {
97 	{ SIGUSR2, "SIGUSR2" },
98 };
99 
100 static void	init_signals(void);
101 static void	init_signal_handler(int, const struct sigdesc *, size_t,
102 		    void (*)(int));
103 static void	info_signal_handler(int);
104 static void	checkpoint_signal_handler(int);
105 static void	compress_progress(struct compress_state *);
106 static void	compress_init(int, char **, const struct options *,
107 		    struct compress_state *);
108 static bool	compress_restart(struct compress_state *);
109 static uint32_t	compress_block(int, int, uint32_t, uint32_t, uint32_t, void *,
110 		    void *);
111 static void	compress_maybe_checkpoint(struct compress_state *);
112 static void	compress_checkpoint(struct compress_state *);
113 static void	compress_exit(struct compress_state *);
114 
115 /*
116  * Compression entry point.
117  */
118 int
vndcompress(int argc,char ** argv,const struct options * O)119 vndcompress(int argc, char **argv, const struct options *O)
120 {
121 	struct compress_state *const S = &global_state;
122 
123 	/* Paranoia.  The other fields either have no sentinel or use zero.  */
124 	S->image_fd = -1;
125 	S->cloop2_fd = -1;
126 
127 	/* Set up signal handlers so we can handle SIGINFO ASAP.  */
128 	init_signals();
129 
130 	/*
131 	 * Parse the arguments to initialize our state.
132 	 */
133 	compress_init(argc, argv, O, S);
134 	assert(MIN_BLOCKSIZE <= S->blocksize);
135 	assert(S->blocksize <= MAX_BLOCKSIZE);
136 
137 	/*
138 	 * Allocate compression buffers.
139 	 *
140 	 * Compression may actually expand.  From an overabundance of
141 	 * caution, assume it can expand by at most double.
142 	 *
143 	 * XXX Check and consider tightening this assumption.
144 	 */
145 	__CTASSERT(MAX_BLOCKSIZE <= SIZE_MAX);
146 	void *const uncompbuf = malloc(S->blocksize);
147 	if (uncompbuf == NULL)
148 		err(1, "malloc uncompressed buffer");
149 
150 	/* XXX compression ratio bound */
151 	__CTASSERT(MUL_OK(size_t, 2, MAX_BLOCKSIZE));
152 	void *const compbuf = malloc(2 * (size_t)S->blocksize);
153 	if (compbuf == NULL)
154 		err(1, "malloc compressed buffer");
155 
156 	/*
157 	 * Compress the blocks.  S->blkno specifies the input block
158 	 * we're about to transfer.  S->offset is the current output
159 	 * offset.
160 	 */
161 	while (S->blkno < S->n_blocks) {
162 		/* Report any progress.  */
163 		compress_progress(S);
164 
165 		/* Stop if we've done the requested partial transfer.  */
166 		if ((0 < S->end_block) && (S->end_block <= S->blkno))
167 			goto out;
168 
169 		/* Checkpoint if appropriate.  */
170 		compress_maybe_checkpoint(S);
171 		offtab_prepare_put(&S->offtab, (S->blkno + 1));
172 
173 		/* Choose read size: partial if last block, full if not.  */
174 		const uint32_t readsize = (S->blkno == S->n_full_blocks?
175 		    (S->size % S->blocksize) : S->blocksize);
176 		assert(readsize > 0);
177 		assert(readsize <= S->blocksize);
178 
179 		/* Fail noisily if we might be about to overflow.  */
180 		/* XXX compression ratio bound */
181 		__CTASSERT(MUL_OK(uint64_t, 2, MAX_BLOCKSIZE));
182 		__CTASSERT(MUL_OK(off_t, 2, MAX_BLOCKSIZE));
183 		assert(S->offset <= MIN(UINT64_MAX, OFF_MAX));
184 		if (!ADD_OK(uint64_t, S->offset, 2*(uintmax_t)readsize) ||
185 		    !ADD_OK(off_t, S->offset, 2*(uintmax_t)readsize))
186 			errx(1, "blkno %"PRIu32" may overflow: %ju + 2*%ju",
187 			    S->blkno, (uintmax_t)S->offset,
188 			    (uintmax_t)readsize);
189 
190 		/* Process the block.  */
191 		const uint32_t complen =
192 		    compress_block(S->image_fd, S->cloop2_fd, S->blkno,
193 			S->blocksize, readsize, uncompbuf, compbuf);
194 
195 		/*
196 		 * Signal-atomically update the state to reflect
197 		 * (a) what block number we are now at,
198 		 * (b) how far we are now in the output file, and
199 		 * (c) where the last block ended.
200 		 */
201 		assert(ADD_OK(uint32_t, S->blkno, 1));
202 		assert(ADD_OK(uint64_t, S->offset, complen));
203 		assert(ADD_OK(off_t, (off_t)S->offset, (off_t)complen));
204 		assert((S->blkno + 1) < S->n_offsets);
205 	    {
206 		sigset_t old_sigmask;
207 		block_signals(&old_sigmask);
208 		S->blkno += 1;					/* (a) */
209 		S->offset += complen;				/* (b) */
210 		offtab_put(&S->offtab, S->blkno, S->offset);	/* (c) */
211 		restore_sigmask(&old_sigmask);
212 	    }
213 	}
214 
215 	/* Make sure we're all done. */
216 	assert(S->blkno == S->n_blocks);
217 	assert((S->blkno + 1) == S->n_offsets);
218 
219 	/* Pad to the disk block size.  */
220 	const uint32_t n_extra = (S->offset % DEV_BSIZE);
221 	if (n_extra != 0) {
222 		const uint32_t n_padding = (DEV_BSIZE - n_extra);
223 		/* Reuse compbuf -- guaranteed to be large enough.  */
224 		(void)memset(compbuf, 0, n_padding);
225 		const ssize_t n_written = write(S->cloop2_fd, compbuf,
226 		    n_padding);
227 		if (n_written == -1)
228 			err(1, "write final padding failed");
229 		assert(n_written >= 0);
230 		if ((size_t)n_written != n_padding)
231 			errx(1, "partial write of final padding bytes"
232 			    ": %zu != %"PRIu32,
233 			    (size_t)n_written, n_padding);
234 
235 		/* Account for the extra bytes in the output file.  */
236 		assert(ADD_OK(uint64_t, S->offset, n_padding));
237 		assert(ADD_OK(off_t, (off_t)S->offset, (off_t)n_padding));
238 	    {
239 		sigset_t old_sigmask;
240 		block_signals(&old_sigmask);
241 		S->offset += n_padding;
242 		restore_sigmask(&old_sigmask);
243 	    }
244 	}
245 
246 out:
247 	/* One last checkpoint to commit the offset table.  */
248 	assert(S->offset <= OFF_MAX);
249 	assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR));
250 	compress_checkpoint(S);
251 
252 	/*
253 	 * Free the compression buffers and finalize the compression.
254 	 */
255 	free(compbuf);
256 	free(uncompbuf);
257 	compress_exit(S);
258 
259 	return 0;
260 }
261 
262 /*
263  * Signal cruft.
264  */
265 
266 static void
init_signals(void)267 init_signals(void)
268 {
269 
270 	init_signal_handler(SA_RESTART, info_signals,
271 	    __arraycount(info_signals), &info_signal_handler);
272 	init_signal_handler(SA_RESTART, checkpoint_signals,
273 	    __arraycount(checkpoint_signals), &checkpoint_signal_handler);
274 }
275 
276 static void
init_signal_handler(int flags,const struct sigdesc * signals,size_t n,void (* handler)(int))277 init_signal_handler(int flags, const struct sigdesc *signals, size_t n,
278     void (*handler)(int))
279 {
280 	static const struct sigaction zero_sa;
281 	struct sigaction sa = zero_sa;
282 	size_t i;
283 
284 	(void)sigemptyset(&sa.sa_mask);
285 	for (i = 0; i < n; i++)
286 		(void)sigaddset(&sa.sa_mask, signals[i].sd_signo);
287 	sa.sa_flags = flags;
288 	sa.sa_handler = handler;
289 	for (i = 0; i < n; i++)
290 		if (sigaction(signals[i].sd_signo, &sa, NULL) == -1)
291 			err(1, "sigaction(%s)", signals[i].sd_name);
292 }
293 
294 static void
info_signal_handler(int signo __unused)295 info_signal_handler(int signo __unused)
296 {
297 	/* Save errno.  */
298 	const int error = errno;
299 	struct compress_state *const S = &global_state;
300 	char buf[128];
301 
302 	/* Bail if the state is not yet initialized.  */
303 	if (!S->initialized) {
304 		warnx_ss("initializing");
305 		goto out;
306 	}
307 
308 	/* Carefully calculate our I/O position.  */
309 	assert(S->blocksize > 0);
310 	__CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, MAX_BLOCKSIZE));
311 	const uint64_t nread = ((uint64_t)S->blkno * (uint64_t)S->blocksize);
312 
313 	assert(S->n_blocks > 0);
314 	__CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, sizeof(uint64_t)));
315 	__CTASSERT(ADD_OK(uint64_t, CLOOP2_OFFSET_TABLE_OFFSET,
316 		MAX_N_BLOCKS*sizeof(uint64_t)));
317 	const uint64_t nwritten = (S->offset <= (CLOOP2_OFFSET_TABLE_OFFSET +
318 		((uint64_t)S->n_blocks * sizeof(uint64_t)))?
319 	    0 : S->offset);
320 
321 	/* snprintf_ss can't do floating-point, so do fixed-point instead.  */
322 	const uint64_t ratio_percent =
323 	    (nread > 0?
324 		((nwritten >= (UINT64_MAX / 100)) ?
325 		    ((nwritten / nread) * 100) : ((nwritten * 100) / nread))
326 		: 0);
327 
328 	/* Format the status.  */
329 	assert(S->n_checkpointed_blocks <= MAX_N_BLOCKS);
330 	assert(S->blocksize <= MAX_BLOCKSIZE);
331 	__CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, MAX_BLOCKSIZE));
332 	const int n = snprintf_ss(buf, sizeof(buf),
333 	    "vndcompress: read %"PRIu64" bytes, wrote %"PRIu64" bytes, "
334 	    "compression ratio %"PRIu64"%% (checkpointed %"PRIu64" bytes)\n",
335 	    nread, nwritten, ratio_percent,
336 	    ((uint64_t)S->n_checkpointed_blocks * (uint64_t)S->blocksize));
337 	if (n < 0) {
338 		const char msg[] = "vndcompress: can't format info\n";
339 		(void)write(STDERR_FILENO, msg, __arraycount(msg));
340 	} else {
341 		__CTASSERT(INT_MAX <= SIZE_MAX);
342 		(void)write(STDERR_FILENO, buf, (size_t)n);
343 	}
344 
345 out:
346 	/* Restore errno.  */
347 	errno = error;
348 }
349 
350 static void
checkpoint_signal_handler(int signo __unused)351 checkpoint_signal_handler(int signo __unused)
352 {
353 	/* Save errno.  */
354 	const int error = errno;
355 	struct compress_state *const S = &global_state;
356 
357 	/* Bail if the state is not yet initialized.  */
358 	if (!S->initialized) {
359 		warnx_ss("nothing to checkpoint yet");
360 		goto out;
361 	}
362 
363 	assert(S->image_fd >= 0);
364 	assert(S->cloop2_fd >= 0);
365 
366 	/* Take a checkpoint.  */
367 	assert(S->blkno <= MAX_N_BLOCKS);
368 	assert(S->blocksize <= MAX_BLOCKSIZE);
369 	__CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, MAX_BLOCKSIZE));
370 	warnx_ss("checkpointing %"PRIu64" bytes",
371 	    ((uint64_t)S->blkno * (uint64_t)S->blocksize));
372 	compress_checkpoint(S);
373 
374 out:
375 	/* Restore errno.  */
376 	errno = error;
377 }
378 
379 /*
380  * Report progress.
381  *
382  * XXX Should do a progress bar here.
383  */
384 static void
compress_progress(struct compress_state * S __unused)385 compress_progress(struct compress_state *S __unused)
386 {
387 }
388 
389 /*
390  * Parse arguments, open the files, and initialize the state.
391  */
392 static void
compress_init(int argc,char ** argv,const struct options * O,struct compress_state * S)393 compress_init(int argc, char **argv, const struct options *O,
394     struct compress_state *S)
395 {
396 
397 	if (!((argc == 2) || (argc == 3)))
398 		usage();
399 
400 	const char *const image_pathname = argv[0];
401 	const char *const cloop2_pathname = argv[1];
402 
403 	/* Grab the block size either from `-b' or from the last argument.  */
404 	__CTASSERT(0 < DEV_BSIZE);
405 	__CTASSERT((MIN_BLOCKSIZE % DEV_BSIZE) == 0);
406 	__CTASSERT(MIN_BLOCKSIZE <= DEF_BLOCKSIZE);
407 	__CTASSERT((DEF_BLOCKSIZE % DEV_BSIZE) == 0);
408 	__CTASSERT(DEF_BLOCKSIZE <= MAX_BLOCKSIZE);
409 	__CTASSERT((MAX_BLOCKSIZE % DEV_BSIZE) == 0);
410 	if (ISSET(O->flags, FLAG_b)) {
411 		if (argc == 3) {
412 			warnx("use -b or the extra argument, not both");
413 			usage();
414 		}
415 		S->blocksize = O->blocksize;
416 	} else {
417 		S->blocksize = (argc == 2? DEF_BLOCKSIZE :
418 		    strsuftoll("block size", argv[2], MIN_BLOCKSIZE,
419 			MAX_BLOCKSIZE));
420 	}
421 
422 	/* Sanity-check the blocksize.  (strsuftoll guarantees bounds.)  */
423 	__CTASSERT(DEV_BSIZE <= UINT32_MAX);
424 	if ((S->blocksize % DEV_BSIZE) != 0)
425 		errx(1, "bad blocksize: %"PRIu32
426 		    " (not a multiple of %"PRIu32")",
427 		    S->blocksize, (uint32_t)DEV_BSIZE);
428 	assert(MIN_BLOCKSIZE <= S->blocksize);
429 	assert((S->blocksize % DEV_BSIZE) == 0);
430 	assert(S->blocksize <= MAX_BLOCKSIZE);
431 
432 	/* Grab the end block number if we have one.  */
433 	S->end_block = (ISSET(O->flags, FLAG_p)? O->end_block : 0);
434 
435 	/* Grab the checkpoint block count, if we have one.  */
436 	S->checkpoint_blocks =
437 	    (ISSET(O->flags, FLAG_k)? O->checkpoint_blocks : 0);
438 
439 	/* Open the input image file and the output cloop2 file.  */
440 	S->image_fd = open(image_pathname, O_RDONLY);
441 	if (S->image_fd == -1)
442 		err(1, "open(%s)", image_pathname);
443 
444 	int oflags;
445 	if (!ISSET(O->flags, FLAG_r))
446 		oflags = (O_WRONLY | O_TRUNC | O_CREAT);
447 	else if (!ISSET(O->flags, FLAG_R))
448 		oflags = (O_RDWR | O_CREAT);
449 	else
450 		oflags = O_RDWR;
451 	S->cloop2_fd = open(cloop2_pathname, oflags, 0777);
452 	if (S->cloop2_fd == -1)
453 		err(1, "open(%s)", cloop2_pathname);
454 
455 	/* Find the size of the input image.  */
456 	if (ISSET(O->flags, FLAG_l)) {
457 		S->size = O->length;
458 	} else {
459 		static const struct stat zero_st;
460 		struct stat st = zero_st;
461 		if (fstat(S->image_fd, &st) == -1)
462 			err(1, "stat(%s)", image_pathname);
463 		if (st.st_size <= 0)
464 			errx(1, "unknown image size");
465 		assert(st.st_size >= 0);
466 		__CTASSERT(OFF_MAX <= UINT64_MAX);
467 		assert(__type_fit(uint64_t, st.st_size));
468 		S->size = st.st_size;
469 	}
470 	assert(S->size <= OFF_MAX);
471 
472 	/* Find number of full blocks and whether there's a partial block.  */
473 	__CTASSERT(0 < MIN_BLOCKSIZE);
474 	assert(0 < S->blocksize);
475 	if (TOOMANY(off_t, (off_t)S->size, (off_t)S->blocksize,
476 		(off_t)MAX_N_BLOCKS))
477 		errx(1, "image too large for block size %"PRIu32": %"PRIu64,
478 		    S->blocksize, S->size);
479 	__CTASSERT(MAX_N_BLOCKS <= UINT32_MAX);
480 	S->n_full_blocks = S->size/S->blocksize;
481 	S->n_blocks = HOWMANY(S->size, S->blocksize);
482 	assert(S->n_full_blocks <= S->n_blocks);
483 	assert(S->n_blocks <= MAX_N_BLOCKS);
484 
485 	/* Choose a window size.  */
486 	const uint32_t window_size = (ISSET(O->flags, FLAG_w)? O->window_size :
487 	    DEF_WINDOW_SIZE);
488 
489 	/* Create an offset table for the blocks; one extra for the end.  */
490 	__CTASSERT(ADD_OK(uint32_t, MAX_N_BLOCKS, 1));
491 	S->n_offsets = (S->n_blocks + 1);
492 	__CTASSERT(MAX_N_OFFSETS == (MAX_N_BLOCKS + 1));
493 	__CTASSERT(MUL_OK(size_t, MAX_N_OFFSETS, sizeof(uint64_t)));
494 	__CTASSERT(CLOOP2_OFFSET_TABLE_OFFSET <= OFFTAB_MAX_FDPOS);
495 	offtab_init(&S->offtab, S->n_offsets, window_size, S->cloop2_fd,
496 	    CLOOP2_OFFSET_TABLE_OFFSET);
497 
498 	/* Attempt to restart a partial transfer if requested.  */
499 	if (ISSET(O->flags, FLAG_r)) {
500 		if (compress_restart(S)) {
501 			/*
502 			 * Restart succeeded.  Truncate the output
503 			 * here, in case any garbage got appended.  We
504 			 * are committed to making progress at this
505 			 * point.  If the ftruncate fails, we don't
506 			 * lose anything valuable -- this is the last
507 			 * point at which we can restart anyway.
508 			 */
509 			if (ftruncate(S->cloop2_fd, S->offset) == -1)
510 				err(1, "ftruncate failed");
511 
512 			/* All set!  No more initialization to do.  */
513 			return;
514 		} else {
515 			/* Restart failed.  Barf now if requested.  */
516 			if (ISSET(O->flags, FLAG_R))
517 				errx(1, "restart failed, aborting");
518 
519 			/* Otherwise, truncate and start at the top.  */
520 			if (ftruncate(S->cloop2_fd, 0) == -1)
521 				err(1, "truncate failed");
522 			if (lseek(S->cloop2_fd, 0, SEEK_SET) == -1)
523 				err(1, "lseek to cloop2 beginning failed");
524 
525 			/* If we seeked in the input, rewind.  */
526 			if (S->blkno != 0) {
527 				if (lseek(S->image_fd, 0, SEEK_SET) == -1)
528 					err(1,
529 					    "lseek to image beginning failed");
530 			}
531 		}
532 	}
533 
534 	/* Write a bogus (zero) header for now, until we checkpoint.  */
535 	static const struct cloop2_header zero_header;
536 	const ssize_t h_written = write(S->cloop2_fd, &zero_header,
537 	    sizeof(zero_header));
538 	if (h_written == -1)
539 		err(1, "write header");
540 	assert(h_written >= 0);
541 	if ((size_t)h_written != sizeof(zero_header))
542 		errx(1, "partial write of header: %zu != %zu",
543 		    (size_t)h_written, sizeof(zero_header));
544 
545 	/* Reset the offset table to be empty and write it.  */
546 	offtab_reset_write(&S->offtab);
547 
548 	/* Start at the beginning of the image.  */
549 	S->blkno = 0;
550 	S->offset = (sizeof(struct cloop2_header) +
551 	    ((uint64_t)S->n_offsets * sizeof(uint64_t)));
552 	S->n_checkpointed_blocks = 0;
553 
554 	/* Good to go and ready for interruption by a signal.  */
555 	S->initialized = 1;
556 }
557 
558 /*
559  * Try to recover state from an existing output file.
560  *
561  * On success, fill the offset table with what's in the file, set
562  * S->blkno and S->offset to reflect our position, and seek to the
563  * respective positions in the input and output files.
564  *
565  * On failure, return false.  May clobber the offset table, S->blkno,
566  * S->offset, and the file pointers.
567  */
568 static bool
compress_restart(struct compress_state * S)569 compress_restart(struct compress_state *S)
570 {
571 
572 	/* Read in the header.  */
573 	static const struct cloop2_header zero_header;
574 	struct cloop2_header header = zero_header;
575 
576 	const ssize_t h_read = read_block(S->cloop2_fd, &header,
577 	    sizeof(header));
578 	if (h_read == -1) {
579 		warn("failed to read header");
580 		return false;
581 	}
582 	assert(h_read >= 0);
583 	if ((size_t)h_read != sizeof(header)) {
584 		warnx("partial read of header");
585 		return false;
586 	}
587 
588 	/* Check that the header looks like a header.  */
589 	__CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic));
590 	if (memcmp(header.cl2h_magic, cloop2_magic, sizeof(cloop2_magic))
591 	    != 0) {
592 		warnx("bad cloop2 shell script magic");
593 		return false;
594 	}
595 
596 	/* Check the header parameters.  */
597 	if (be32toh(header.cl2h_blocksize) != S->blocksize) {
598 		warnx("mismatched block size: %"PRIu32
599 		    " (expected %"PRIu32")",
600 		    be32toh(header.cl2h_blocksize), S->blocksize);
601 		return false;
602 	}
603 	if (be32toh(header.cl2h_n_blocks) != S->n_blocks) {
604 		warnx("mismatched number of blocks: %"PRIu32
605 		    " (expected %"PRIu32")",
606 		    be32toh(header.cl2h_n_blocks), S->n_blocks);
607 		return false;
608 	}
609 
610 	/* Read in the partial offset table.  */
611 	if (!offtab_reset_read(&S->offtab, &warn, &warnx))
612 		return false;
613 	if (!offtab_prepare_get(&S->offtab, 0))
614 		return false;
615 	const uint64_t first_offset = offtab_get(&S->offtab, 0);
616 	__CTASSERT(MUL_OK(uint64_t, MAX_N_OFFSETS, sizeof(uint64_t)));
617 	__CTASSERT(ADD_OK(uint64_t, sizeof(struct cloop2_header),
618 		MAX_N_OFFSETS*sizeof(uint64_t)));
619 	const uint64_t expected = sizeof(struct cloop2_header) +
620 	    ((uint64_t)S->n_offsets * sizeof(uint64_t));
621 	if (first_offset != expected) {
622 		warnx("first offset is not 0x%"PRIx64": 0x%"PRIx64,
623 		    expected, first_offset);
624 		return false;
625 	}
626 
627 	/* Find where we left off.  */
628 	__CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
629 	uint32_t blkno = 0;
630 	uint64_t last_offset = first_offset;
631 	for (blkno = 0; blkno < S->n_blocks; blkno++) {
632 		if (!offtab_prepare_get(&S->offtab, blkno))
633 			return false;
634 		const uint64_t offset = offtab_get(&S->offtab, blkno);
635 		if (offset == ~(uint64_t)0)
636 			break;
637 
638 		if (0 < blkno) {
639 			const uint64_t start = last_offset;
640 			const uint64_t end = offset;
641 			if (end <= start) {
642 				warnx("bad offset table: 0x%"PRIx64
643 				    ", 0x%"PRIx64, start, end);
644 				return false;
645 			}
646 			/* XXX compression ratio bound */
647 			__CTASSERT(MUL_OK(size_t, 2, MAX_BLOCKSIZE));
648 			if ((2 * (size_t)S->blocksize) <= (end - start)) {
649 				warnx("block %"PRIu32" too large:"
650 				    " %"PRIu64" bytes"
651 				    " from 0x%"PRIx64" to 0x%"PRIx64,
652 				    blkno, (end - start), start, end);
653 				return false;
654 			}
655 		}
656 
657 		last_offset = offset;
658 	}
659 
660 	if (blkno == 0) {
661 		warnx("no blocks were written; nothing to restart");
662 		return false;
663 	}
664 
665 	/* Make sure the rest of the offset table is all ones.  */
666 	if (blkno < S->n_blocks) {
667 		uint32_t nblkno;
668 
669 		for (nblkno = blkno; nblkno < S->n_blocks; nblkno++) {
670 			if (!offtab_prepare_get(&S->offtab, nblkno))
671 				return false;
672 			const uint64_t offset = offtab_get(&S->offtab, nblkno);
673 			if (offset != ~(uint64_t)0) {
674 				warnx("bad partial offset table entry"
675 				    " at %"PRIu32": 0x%"PRIx64,
676 				    nblkno, offset);
677 				return false;
678 			}
679 		}
680 	}
681 
682 	/*
683 	 * XXX Consider decompressing some number of blocks to make
684 	 * sure they match.
685 	 */
686 
687 	/* Back up by one.  */
688 	assert(1 <= blkno);
689 	blkno -= 1;
690 
691 	/* Seek to the output position.  */
692 	assert(last_offset <= OFF_MAX);
693 	if (lseek(S->cloop2_fd, last_offset, SEEK_SET) == -1) {
694 		warn("lseek output cloop2 to %"PRIx64" failed", last_offset);
695 		return false;
696 	}
697 
698 	/* Switch from reading to writing the offset table.  */
699 	if (!offtab_transmogrify_read_to_write(&S->offtab, blkno))
700 		return false;
701 
702 	/*
703 	 * Seek to the input position last, after all other possible
704 	 * failures, because if the input is a pipe, we can't change
705 	 * our mind, rewind, and start at the beginning instead of
706 	 * restarting.
707 	 */
708 	assert(S->size <= OFF_MAX);
709 	assert(blkno <= (S->size / S->blocksize));
710 	const off_t restart_position = ((off_t)blkno * (off_t)S->blocksize);
711 	assert(0 <= restart_position);
712 	assert(restart_position <= (off_t)S->size);
713 	if (lseek(S->image_fd, restart_position, SEEK_SET) == -1) {
714 		if (errno != ESPIPE) {
715 			warn("lseek input image failed");
716 			return false;
717 		}
718 
719 		/* Try read instead of lseek for a pipe/socket/fifo.  */
720 		void *const buffer = malloc(0x10000);
721 		if (buffer == NULL)
722 			err(1, "malloc temporary buffer");
723 		off_t left = restart_position;
724 		while (left > 0) {
725 			const size_t size = MIN(0x10000, left);
726 			const ssize_t n_read = read_block(S->image_fd, buffer,
727 			    size);
728 			if (n_read == -1) {
729 				free(buffer);
730 				warn("read of input image failed");
731 				return false;
732 			}
733 			assert(n_read >= 0);
734 			if ((size_t)n_read != size) {
735 				free(buffer);
736 				warnx("partial read of input image");
737 				return false;
738 			}
739 			assert((off_t)size <= left);
740 			left -= size;
741 		}
742 		free(buffer);
743 	}
744 
745 	/* Start where we left off.  */
746 	S->blkno = blkno;
747 	S->offset = last_offset;
748 	S->n_checkpointed_blocks = blkno;
749 
750 	/* Good to go and ready for interruption by a signal.  */
751 	S->initialized = 1;
752 
753 	/* Success!  */
754 	return true;
755 }
756 
757 /*
758  * Read a single block, compress it, and write the compressed block.
759  * Return the size of the compressed block.
760  */
761 static uint32_t
compress_block(int in_fd,int out_fd,uint32_t blkno,uint32_t blocksize,uint32_t readsize,void * uncompbuf,void * compbuf)762 compress_block(int in_fd, int out_fd, uint32_t blkno, uint32_t blocksize,
763     uint32_t readsize, void *uncompbuf, void *compbuf)
764 {
765 
766 	assert(readsize <= blocksize);
767 	assert(blocksize <= MAX_BLOCKSIZE);
768 
769 	/* Read the uncompressed block.  */
770 	const ssize_t n_read = read_block(in_fd, uncompbuf, readsize);
771 	if (n_read == -1)
772 		err(1, "read block %"PRIu32, blkno);
773 	assert(n_read >= 0);
774 	if ((size_t)n_read != readsize)
775 		errx(1, "partial read of block %"PRIu32": %zu != %"PRIu32,
776 		    blkno, (size_t)n_read, readsize);
777 
778 	/* Compress the block.  */
779 	/* XXX compression ratio bound */
780 	__CTASSERT(MUL_OK(unsigned long, 2, MAX_BLOCKSIZE));
781 	const unsigned long uncomplen =
782 	    (VNDCOMPRESS_COMPAT? blocksize : readsize); /* XXX */
783 	unsigned long complen = (uncomplen * 2);
784 	const int zerror = compress2(compbuf, &complen, uncompbuf, uncomplen,
785 	    Z_BEST_COMPRESSION);
786 	if (zerror != Z_OK)
787 		errx(1, "compressed failed at block %"PRIu32" (%d): %s", blkno,
788 		    zerror, zError(zerror));
789 	assert(complen <= (uncomplen * 2));
790 
791 	/* Write the compressed block.  */
792 	const ssize_t n_written = write(out_fd, compbuf, complen);
793 	if (n_written == -1)
794 		err(1, "write block %"PRIu32, blkno);
795 	assert(n_written >= 0);
796 	if ((size_t)n_written != complen)
797 		errx(1, "partial write of block %"PRIu32": %zu != %lu",
798 		    blkno, (size_t)n_written, complen);
799 
800 	return (size_t)n_written;
801 }
802 
803 /*
804  * Checkpoint if appropriate.
805  */
806 static void
compress_maybe_checkpoint(struct compress_state * S)807 compress_maybe_checkpoint(struct compress_state *S)
808 {
809 
810 	if ((0 < S->checkpoint_blocks) && (0 < S->blkno) &&
811 	    ((S->blkno % S->checkpoint_blocks) == 0)) {
812 		assert(S->offset <= OFF_MAX);
813 		assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR));
814 		compress_checkpoint(S);
815 	}
816 }
817 
818 /*
819  * Write the prefix of the offset table that we have filled so far.
820  *
821  * We fsync the data blocks we have written, and then write the offset
822  * table, and then fsync the offset table and file metadata.  This
823  * should help to avoid offset tables that point at garbage data.
824  *
825  * This may be called from a signal handler, so it must not use stdio,
826  * malloc, &c. -- it may only (a) handle signal-safe state in S, and
827  * (b) do file descriptor I/O / fsync.
828  *
829  * XXX This requires further thought and heavy testing to be sure.
830  *
831  * XXX Should have an option to suppress fsync.
832  *
833  * XXX Should have an option to fail on fsync failures.
834  *
835  * XXX Would be nice if we could just do a barrier rather than an
836  * fsync.
837  *
838  * XXX How might we automatically test the fsyncs?
839  */
840 static void
compress_checkpoint(struct compress_state * S)841 compress_checkpoint(struct compress_state *S)
842 {
843 
844 	assert(S->blkno < S->n_offsets);
845 	const uint32_t n_offsets = (S->blkno + 1);
846 	assert(n_offsets <= S->n_offsets);
847 
848 	assert(S->offset <= OFF_MAX);
849 	assert((off_t)S->offset <= lseek(S->cloop2_fd, 0, SEEK_CUR));
850 
851 	/* Make sure the data hits the disk before we say it's ready.  */
852 	if (fsync_range(S->cloop2_fd, (FFILESYNC | FDISKSYNC), 0, S->offset)
853 	    == -1)
854 		warn_ss("fsync of output failed");
855 
856 	/* Say the data blocks are ready.  */
857 	offtab_checkpoint(&S->offtab, n_offsets,
858 	    (S->n_checkpointed_blocks == 0? OFFTAB_CHECKPOINT_SYNC : 0));
859 
860 	/*
861 	 * If this is the first checkpoint, initialize the header.
862 	 * Signal handler can race with main code here, but it is
863 	 * harmless -- just an extra fsync and write of the header,
864 	 * which are both idempotent.
865 	 *
866 	 * Once we have synchronously checkpointed the offset table,
867 	 * subsequent writes will preserve a valid state.
868 	 */
869 	if (S->n_checkpointed_blocks == 0) {
870 		static const struct cloop2_header zero_header;
871 		struct cloop2_header header = zero_header;
872 
873 		/* Format the header.  */
874 		__CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic));
875 		(void)memcpy(header.cl2h_magic, cloop2_magic,
876 		    sizeof(cloop2_magic));
877 		header.cl2h_blocksize = htobe32(S->blocksize);
878 		header.cl2h_n_blocks = htobe32(S->n_blocks);
879 
880 		/* Write the header.  */
881 		const ssize_t h_written = pwrite(S->cloop2_fd, &header,
882 		    sizeof(header), 0);
883 		if (h_written == -1)
884 			err_ss(1, "write header");
885 		assert(h_written >= 0);
886 		if ((size_t)h_written != sizeof(header))
887 			errx_ss(1, "partial write of header: %zu != %zu",
888 			    (size_t)h_written, sizeof(header));
889 	}
890 
891 	/* Record how many blocks we've checkpointed.  */
892     {
893 	sigset_t old_sigmask;
894 	block_signals(&old_sigmask);
895 	S->n_checkpointed_blocks = S->blkno;
896 	restore_sigmask(&old_sigmask);
897     }
898 }
899 
900 /*
901  * Release everything we allocated in compress_init.
902  */
903 static void
compress_exit(struct compress_state * S)904 compress_exit(struct compress_state *S)
905 {
906 
907 	/* Done with the offset table.  Destroy it.  */
908 	offtab_destroy(&S->offtab);
909 
910 	/* Done with the files.  Close them.  */
911 	if (close(S->cloop2_fd) == -1)
912 		warn("close(cloop2 fd)");
913 	if (close(S->image_fd) == -1)
914 		warn("close(image fd)");
915 }
916