1 /* $NetBSD: vndcompress.c,v 1.29 2017/07/29 21:04:07 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2013 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Taylor R. Campbell.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: vndcompress.c,v 1.29 2017/07/29 21:04:07 riastradh Exp $");
34
35 #include <sys/endian.h>
36 #include <sys/stat.h>
37
38 #include <assert.h>
39 #include <err.h>
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <inttypes.h>
43 #include <limits.h>
44 #include <signal.h>
45 #include <stdbool.h>
46 #include <stdint.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <zlib.h>
52
53 #include "common.h"
54 #include "offtab.h"
55 #include "utils.h"
56
57 /*
58 * XXX Switch to control bug-for-bug byte-for-byte compatibility with
59 * NetBSD's vndcompress.
60 */
61 #define VNDCOMPRESS_COMPAT 0
62
63 __CTASSERT(sizeof(struct cloop2_header) == CLOOP2_OFFSET_TABLE_OFFSET);
64
65 struct compress_state {
66 uint64_t size; /* uncompressed size */
67 uint64_t offset; /* output byte offset */
68 uint32_t blocksize; /* bytes per block */
69 uint32_t blkno; /* input block number */
70 uint32_t n_full_blocks; /* floor(size/blocksize) */
71 uint32_t n_blocks; /* ceiling(size/blocksize) */
72 uint32_t n_offsets; /* n_blocks + 1 */
73 uint32_t end_block; /* last block to transfer */
74 uint32_t checkpoint_blocks; /* blocks before checkpoint */
75 int image_fd;
76 int cloop2_fd;
77 struct offtab offtab;
78 uint32_t n_checkpointed_blocks;
79 volatile sig_atomic_t
80 initialized; /* everything above initialized? */
81 };
82
83 /* Global compression state for SIGINFO handler. */
84 static struct compress_state global_state;
85
86 struct sigdesc {
87 int sd_signo;
88 const char *sd_name;
89 };
90
91 static const struct sigdesc info_signals[] = {
92 { SIGINFO, "SIGINFO" },
93 { SIGUSR1, "SIGUSR1" },
94 };
95
96 static const struct sigdesc checkpoint_signals[] = {
97 { SIGUSR2, "SIGUSR2" },
98 };
99
100 static void init_signals(void);
101 static void init_signal_handler(int, const struct sigdesc *, size_t,
102 void (*)(int));
103 static void info_signal_handler(int);
104 static void checkpoint_signal_handler(int);
105 static void compress_progress(struct compress_state *);
106 static void compress_init(int, char **, const struct options *,
107 struct compress_state *);
108 static bool compress_restart(struct compress_state *);
109 static uint32_t compress_block(int, int, uint32_t, uint32_t, uint32_t, void *,
110 void *);
111 static void compress_maybe_checkpoint(struct compress_state *);
112 static void compress_checkpoint(struct compress_state *);
113 static void compress_exit(struct compress_state *);
114
115 /*
116 * Compression entry point.
117 */
118 int
vndcompress(int argc,char ** argv,const struct options * O)119 vndcompress(int argc, char **argv, const struct options *O)
120 {
121 struct compress_state *const S = &global_state;
122
123 /* Paranoia. The other fields either have no sentinel or use zero. */
124 S->image_fd = -1;
125 S->cloop2_fd = -1;
126
127 /* Set up signal handlers so we can handle SIGINFO ASAP. */
128 init_signals();
129
130 /*
131 * Parse the arguments to initialize our state.
132 */
133 compress_init(argc, argv, O, S);
134 assert(MIN_BLOCKSIZE <= S->blocksize);
135 assert(S->blocksize <= MAX_BLOCKSIZE);
136
137 /*
138 * Allocate compression buffers.
139 *
140 * Compression may actually expand. From an overabundance of
141 * caution, assume it can expand by at most double.
142 *
143 * XXX Check and consider tightening this assumption.
144 */
145 __CTASSERT(MAX_BLOCKSIZE <= SIZE_MAX);
146 void *const uncompbuf = malloc(S->blocksize);
147 if (uncompbuf == NULL)
148 err(1, "malloc uncompressed buffer");
149
150 /* XXX compression ratio bound */
151 __CTASSERT(MUL_OK(size_t, 2, MAX_BLOCKSIZE));
152 void *const compbuf = malloc(2 * (size_t)S->blocksize);
153 if (compbuf == NULL)
154 err(1, "malloc compressed buffer");
155
156 /*
157 * Compress the blocks. S->blkno specifies the input block
158 * we're about to transfer. S->offset is the current output
159 * offset.
160 */
161 while (S->blkno < S->n_blocks) {
162 /* Report any progress. */
163 compress_progress(S);
164
165 /* Stop if we've done the requested partial transfer. */
166 if ((0 < S->end_block) && (S->end_block <= S->blkno))
167 goto out;
168
169 /* Checkpoint if appropriate. */
170 compress_maybe_checkpoint(S);
171 offtab_prepare_put(&S->offtab, (S->blkno + 1));
172
173 /* Choose read size: partial if last block, full if not. */
174 const uint32_t readsize = (S->blkno == S->n_full_blocks?
175 (S->size % S->blocksize) : S->blocksize);
176 assert(readsize > 0);
177 assert(readsize <= S->blocksize);
178
179 /* Fail noisily if we might be about to overflow. */
180 /* XXX compression ratio bound */
181 __CTASSERT(MUL_OK(uint64_t, 2, MAX_BLOCKSIZE));
182 __CTASSERT(MUL_OK(off_t, 2, MAX_BLOCKSIZE));
183 assert(S->offset <= MIN(UINT64_MAX, OFF_MAX));
184 if (!ADD_OK(uint64_t, S->offset, 2*(uintmax_t)readsize) ||
185 !ADD_OK(off_t, S->offset, 2*(uintmax_t)readsize))
186 errx(1, "blkno %"PRIu32" may overflow: %ju + 2*%ju",
187 S->blkno, (uintmax_t)S->offset,
188 (uintmax_t)readsize);
189
190 /* Process the block. */
191 const uint32_t complen =
192 compress_block(S->image_fd, S->cloop2_fd, S->blkno,
193 S->blocksize, readsize, uncompbuf, compbuf);
194
195 /*
196 * Signal-atomically update the state to reflect
197 * (a) what block number we are now at,
198 * (b) how far we are now in the output file, and
199 * (c) where the last block ended.
200 */
201 assert(ADD_OK(uint32_t, S->blkno, 1));
202 assert(ADD_OK(uint64_t, S->offset, complen));
203 assert(ADD_OK(off_t, (off_t)S->offset, (off_t)complen));
204 assert((S->blkno + 1) < S->n_offsets);
205 {
206 sigset_t old_sigmask;
207 block_signals(&old_sigmask);
208 S->blkno += 1; /* (a) */
209 S->offset += complen; /* (b) */
210 offtab_put(&S->offtab, S->blkno, S->offset); /* (c) */
211 restore_sigmask(&old_sigmask);
212 }
213 }
214
215 /* Make sure we're all done. */
216 assert(S->blkno == S->n_blocks);
217 assert((S->blkno + 1) == S->n_offsets);
218
219 /* Pad to the disk block size. */
220 const uint32_t n_extra = (S->offset % DEV_BSIZE);
221 if (n_extra != 0) {
222 const uint32_t n_padding = (DEV_BSIZE - n_extra);
223 /* Reuse compbuf -- guaranteed to be large enough. */
224 (void)memset(compbuf, 0, n_padding);
225 const ssize_t n_written = write(S->cloop2_fd, compbuf,
226 n_padding);
227 if (n_written == -1)
228 err(1, "write final padding failed");
229 assert(n_written >= 0);
230 if ((size_t)n_written != n_padding)
231 errx(1, "partial write of final padding bytes"
232 ": %zu != %"PRIu32,
233 (size_t)n_written, n_padding);
234
235 /* Account for the extra bytes in the output file. */
236 assert(ADD_OK(uint64_t, S->offset, n_padding));
237 assert(ADD_OK(off_t, (off_t)S->offset, (off_t)n_padding));
238 {
239 sigset_t old_sigmask;
240 block_signals(&old_sigmask);
241 S->offset += n_padding;
242 restore_sigmask(&old_sigmask);
243 }
244 }
245
246 out:
247 /* One last checkpoint to commit the offset table. */
248 assert(S->offset <= OFF_MAX);
249 assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR));
250 compress_checkpoint(S);
251
252 /*
253 * Free the compression buffers and finalize the compression.
254 */
255 free(compbuf);
256 free(uncompbuf);
257 compress_exit(S);
258
259 return 0;
260 }
261
262 /*
263 * Signal cruft.
264 */
265
266 static void
init_signals(void)267 init_signals(void)
268 {
269
270 init_signal_handler(SA_RESTART, info_signals,
271 __arraycount(info_signals), &info_signal_handler);
272 init_signal_handler(SA_RESTART, checkpoint_signals,
273 __arraycount(checkpoint_signals), &checkpoint_signal_handler);
274 }
275
276 static void
init_signal_handler(int flags,const struct sigdesc * signals,size_t n,void (* handler)(int))277 init_signal_handler(int flags, const struct sigdesc *signals, size_t n,
278 void (*handler)(int))
279 {
280 static const struct sigaction zero_sa;
281 struct sigaction sa = zero_sa;
282 size_t i;
283
284 (void)sigemptyset(&sa.sa_mask);
285 for (i = 0; i < n; i++)
286 (void)sigaddset(&sa.sa_mask, signals[i].sd_signo);
287 sa.sa_flags = flags;
288 sa.sa_handler = handler;
289 for (i = 0; i < n; i++)
290 if (sigaction(signals[i].sd_signo, &sa, NULL) == -1)
291 err(1, "sigaction(%s)", signals[i].sd_name);
292 }
293
294 static void
info_signal_handler(int signo __unused)295 info_signal_handler(int signo __unused)
296 {
297 /* Save errno. */
298 const int error = errno;
299 struct compress_state *const S = &global_state;
300 char buf[128];
301
302 /* Bail if the state is not yet initialized. */
303 if (!S->initialized) {
304 warnx_ss("initializing");
305 goto out;
306 }
307
308 /* Carefully calculate our I/O position. */
309 assert(S->blocksize > 0);
310 __CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, MAX_BLOCKSIZE));
311 const uint64_t nread = ((uint64_t)S->blkno * (uint64_t)S->blocksize);
312
313 assert(S->n_blocks > 0);
314 __CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, sizeof(uint64_t)));
315 __CTASSERT(ADD_OK(uint64_t, CLOOP2_OFFSET_TABLE_OFFSET,
316 MAX_N_BLOCKS*sizeof(uint64_t)));
317 const uint64_t nwritten = (S->offset <= (CLOOP2_OFFSET_TABLE_OFFSET +
318 ((uint64_t)S->n_blocks * sizeof(uint64_t)))?
319 0 : S->offset);
320
321 /* snprintf_ss can't do floating-point, so do fixed-point instead. */
322 const uint64_t ratio_percent =
323 (nread > 0?
324 ((nwritten >= (UINT64_MAX / 100)) ?
325 ((nwritten / nread) * 100) : ((nwritten * 100) / nread))
326 : 0);
327
328 /* Format the status. */
329 assert(S->n_checkpointed_blocks <= MAX_N_BLOCKS);
330 assert(S->blocksize <= MAX_BLOCKSIZE);
331 __CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, MAX_BLOCKSIZE));
332 const int n = snprintf_ss(buf, sizeof(buf),
333 "vndcompress: read %"PRIu64" bytes, wrote %"PRIu64" bytes, "
334 "compression ratio %"PRIu64"%% (checkpointed %"PRIu64" bytes)\n",
335 nread, nwritten, ratio_percent,
336 ((uint64_t)S->n_checkpointed_blocks * (uint64_t)S->blocksize));
337 if (n < 0) {
338 const char msg[] = "vndcompress: can't format info\n";
339 (void)write(STDERR_FILENO, msg, __arraycount(msg));
340 } else {
341 __CTASSERT(INT_MAX <= SIZE_MAX);
342 (void)write(STDERR_FILENO, buf, (size_t)n);
343 }
344
345 out:
346 /* Restore errno. */
347 errno = error;
348 }
349
350 static void
checkpoint_signal_handler(int signo __unused)351 checkpoint_signal_handler(int signo __unused)
352 {
353 /* Save errno. */
354 const int error = errno;
355 struct compress_state *const S = &global_state;
356
357 /* Bail if the state is not yet initialized. */
358 if (!S->initialized) {
359 warnx_ss("nothing to checkpoint yet");
360 goto out;
361 }
362
363 assert(S->image_fd >= 0);
364 assert(S->cloop2_fd >= 0);
365
366 /* Take a checkpoint. */
367 assert(S->blkno <= MAX_N_BLOCKS);
368 assert(S->blocksize <= MAX_BLOCKSIZE);
369 __CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, MAX_BLOCKSIZE));
370 warnx_ss("checkpointing %"PRIu64" bytes",
371 ((uint64_t)S->blkno * (uint64_t)S->blocksize));
372 compress_checkpoint(S);
373
374 out:
375 /* Restore errno. */
376 errno = error;
377 }
378
379 /*
380 * Report progress.
381 *
382 * XXX Should do a progress bar here.
383 */
384 static void
compress_progress(struct compress_state * S __unused)385 compress_progress(struct compress_state *S __unused)
386 {
387 }
388
389 /*
390 * Parse arguments, open the files, and initialize the state.
391 */
392 static void
compress_init(int argc,char ** argv,const struct options * O,struct compress_state * S)393 compress_init(int argc, char **argv, const struct options *O,
394 struct compress_state *S)
395 {
396
397 if (!((argc == 2) || (argc == 3)))
398 usage();
399
400 const char *const image_pathname = argv[0];
401 const char *const cloop2_pathname = argv[1];
402
403 /* Grab the block size either from `-b' or from the last argument. */
404 __CTASSERT(0 < DEV_BSIZE);
405 __CTASSERT((MIN_BLOCKSIZE % DEV_BSIZE) == 0);
406 __CTASSERT(MIN_BLOCKSIZE <= DEF_BLOCKSIZE);
407 __CTASSERT((DEF_BLOCKSIZE % DEV_BSIZE) == 0);
408 __CTASSERT(DEF_BLOCKSIZE <= MAX_BLOCKSIZE);
409 __CTASSERT((MAX_BLOCKSIZE % DEV_BSIZE) == 0);
410 if (ISSET(O->flags, FLAG_b)) {
411 if (argc == 3) {
412 warnx("use -b or the extra argument, not both");
413 usage();
414 }
415 S->blocksize = O->blocksize;
416 } else {
417 S->blocksize = (argc == 2? DEF_BLOCKSIZE :
418 strsuftoll("block size", argv[2], MIN_BLOCKSIZE,
419 MAX_BLOCKSIZE));
420 }
421
422 /* Sanity-check the blocksize. (strsuftoll guarantees bounds.) */
423 __CTASSERT(DEV_BSIZE <= UINT32_MAX);
424 if ((S->blocksize % DEV_BSIZE) != 0)
425 errx(1, "bad blocksize: %"PRIu32
426 " (not a multiple of %"PRIu32")",
427 S->blocksize, (uint32_t)DEV_BSIZE);
428 assert(MIN_BLOCKSIZE <= S->blocksize);
429 assert((S->blocksize % DEV_BSIZE) == 0);
430 assert(S->blocksize <= MAX_BLOCKSIZE);
431
432 /* Grab the end block number if we have one. */
433 S->end_block = (ISSET(O->flags, FLAG_p)? O->end_block : 0);
434
435 /* Grab the checkpoint block count, if we have one. */
436 S->checkpoint_blocks =
437 (ISSET(O->flags, FLAG_k)? O->checkpoint_blocks : 0);
438
439 /* Open the input image file and the output cloop2 file. */
440 S->image_fd = open(image_pathname, O_RDONLY);
441 if (S->image_fd == -1)
442 err(1, "open(%s)", image_pathname);
443
444 int oflags;
445 if (!ISSET(O->flags, FLAG_r))
446 oflags = (O_WRONLY | O_TRUNC | O_CREAT);
447 else if (!ISSET(O->flags, FLAG_R))
448 oflags = (O_RDWR | O_CREAT);
449 else
450 oflags = O_RDWR;
451 S->cloop2_fd = open(cloop2_pathname, oflags, 0777);
452 if (S->cloop2_fd == -1)
453 err(1, "open(%s)", cloop2_pathname);
454
455 /* Find the size of the input image. */
456 if (ISSET(O->flags, FLAG_l)) {
457 S->size = O->length;
458 } else {
459 static const struct stat zero_st;
460 struct stat st = zero_st;
461 if (fstat(S->image_fd, &st) == -1)
462 err(1, "stat(%s)", image_pathname);
463 if (st.st_size <= 0)
464 errx(1, "unknown image size");
465 assert(st.st_size >= 0);
466 __CTASSERT(OFF_MAX <= UINT64_MAX);
467 assert(__type_fit(uint64_t, st.st_size));
468 S->size = st.st_size;
469 }
470 assert(S->size <= OFF_MAX);
471
472 /* Find number of full blocks and whether there's a partial block. */
473 __CTASSERT(0 < MIN_BLOCKSIZE);
474 assert(0 < S->blocksize);
475 if (TOOMANY(off_t, (off_t)S->size, (off_t)S->blocksize,
476 (off_t)MAX_N_BLOCKS))
477 errx(1, "image too large for block size %"PRIu32": %"PRIu64,
478 S->blocksize, S->size);
479 __CTASSERT(MAX_N_BLOCKS <= UINT32_MAX);
480 S->n_full_blocks = S->size/S->blocksize;
481 S->n_blocks = HOWMANY(S->size, S->blocksize);
482 assert(S->n_full_blocks <= S->n_blocks);
483 assert(S->n_blocks <= MAX_N_BLOCKS);
484
485 /* Choose a window size. */
486 const uint32_t window_size = (ISSET(O->flags, FLAG_w)? O->window_size :
487 DEF_WINDOW_SIZE);
488
489 /* Create an offset table for the blocks; one extra for the end. */
490 __CTASSERT(ADD_OK(uint32_t, MAX_N_BLOCKS, 1));
491 S->n_offsets = (S->n_blocks + 1);
492 __CTASSERT(MAX_N_OFFSETS == (MAX_N_BLOCKS + 1));
493 __CTASSERT(MUL_OK(size_t, MAX_N_OFFSETS, sizeof(uint64_t)));
494 __CTASSERT(CLOOP2_OFFSET_TABLE_OFFSET <= OFFTAB_MAX_FDPOS);
495 offtab_init(&S->offtab, S->n_offsets, window_size, S->cloop2_fd,
496 CLOOP2_OFFSET_TABLE_OFFSET);
497
498 /* Attempt to restart a partial transfer if requested. */
499 if (ISSET(O->flags, FLAG_r)) {
500 if (compress_restart(S)) {
501 /*
502 * Restart succeeded. Truncate the output
503 * here, in case any garbage got appended. We
504 * are committed to making progress at this
505 * point. If the ftruncate fails, we don't
506 * lose anything valuable -- this is the last
507 * point at which we can restart anyway.
508 */
509 if (ftruncate(S->cloop2_fd, S->offset) == -1)
510 err(1, "ftruncate failed");
511
512 /* All set! No more initialization to do. */
513 return;
514 } else {
515 /* Restart failed. Barf now if requested. */
516 if (ISSET(O->flags, FLAG_R))
517 errx(1, "restart failed, aborting");
518
519 /* Otherwise, truncate and start at the top. */
520 if (ftruncate(S->cloop2_fd, 0) == -1)
521 err(1, "truncate failed");
522 if (lseek(S->cloop2_fd, 0, SEEK_SET) == -1)
523 err(1, "lseek to cloop2 beginning failed");
524
525 /* If we seeked in the input, rewind. */
526 if (S->blkno != 0) {
527 if (lseek(S->image_fd, 0, SEEK_SET) == -1)
528 err(1,
529 "lseek to image beginning failed");
530 }
531 }
532 }
533
534 /* Write a bogus (zero) header for now, until we checkpoint. */
535 static const struct cloop2_header zero_header;
536 const ssize_t h_written = write(S->cloop2_fd, &zero_header,
537 sizeof(zero_header));
538 if (h_written == -1)
539 err(1, "write header");
540 assert(h_written >= 0);
541 if ((size_t)h_written != sizeof(zero_header))
542 errx(1, "partial write of header: %zu != %zu",
543 (size_t)h_written, sizeof(zero_header));
544
545 /* Reset the offset table to be empty and write it. */
546 offtab_reset_write(&S->offtab);
547
548 /* Start at the beginning of the image. */
549 S->blkno = 0;
550 S->offset = (sizeof(struct cloop2_header) +
551 ((uint64_t)S->n_offsets * sizeof(uint64_t)));
552 S->n_checkpointed_blocks = 0;
553
554 /* Good to go and ready for interruption by a signal. */
555 S->initialized = 1;
556 }
557
558 /*
559 * Try to recover state from an existing output file.
560 *
561 * On success, fill the offset table with what's in the file, set
562 * S->blkno and S->offset to reflect our position, and seek to the
563 * respective positions in the input and output files.
564 *
565 * On failure, return false. May clobber the offset table, S->blkno,
566 * S->offset, and the file pointers.
567 */
568 static bool
compress_restart(struct compress_state * S)569 compress_restart(struct compress_state *S)
570 {
571
572 /* Read in the header. */
573 static const struct cloop2_header zero_header;
574 struct cloop2_header header = zero_header;
575
576 const ssize_t h_read = read_block(S->cloop2_fd, &header,
577 sizeof(header));
578 if (h_read == -1) {
579 warn("failed to read header");
580 return false;
581 }
582 assert(h_read >= 0);
583 if ((size_t)h_read != sizeof(header)) {
584 warnx("partial read of header");
585 return false;
586 }
587
588 /* Check that the header looks like a header. */
589 __CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic));
590 if (memcmp(header.cl2h_magic, cloop2_magic, sizeof(cloop2_magic))
591 != 0) {
592 warnx("bad cloop2 shell script magic");
593 return false;
594 }
595
596 /* Check the header parameters. */
597 if (be32toh(header.cl2h_blocksize) != S->blocksize) {
598 warnx("mismatched block size: %"PRIu32
599 " (expected %"PRIu32")",
600 be32toh(header.cl2h_blocksize), S->blocksize);
601 return false;
602 }
603 if (be32toh(header.cl2h_n_blocks) != S->n_blocks) {
604 warnx("mismatched number of blocks: %"PRIu32
605 " (expected %"PRIu32")",
606 be32toh(header.cl2h_n_blocks), S->n_blocks);
607 return false;
608 }
609
610 /* Read in the partial offset table. */
611 if (!offtab_reset_read(&S->offtab, &warn, &warnx))
612 return false;
613 if (!offtab_prepare_get(&S->offtab, 0))
614 return false;
615 const uint64_t first_offset = offtab_get(&S->offtab, 0);
616 __CTASSERT(MUL_OK(uint64_t, MAX_N_OFFSETS, sizeof(uint64_t)));
617 __CTASSERT(ADD_OK(uint64_t, sizeof(struct cloop2_header),
618 MAX_N_OFFSETS*sizeof(uint64_t)));
619 const uint64_t expected = sizeof(struct cloop2_header) +
620 ((uint64_t)S->n_offsets * sizeof(uint64_t));
621 if (first_offset != expected) {
622 warnx("first offset is not 0x%"PRIx64": 0x%"PRIx64,
623 expected, first_offset);
624 return false;
625 }
626
627 /* Find where we left off. */
628 __CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
629 uint32_t blkno = 0;
630 uint64_t last_offset = first_offset;
631 for (blkno = 0; blkno < S->n_blocks; blkno++) {
632 if (!offtab_prepare_get(&S->offtab, blkno))
633 return false;
634 const uint64_t offset = offtab_get(&S->offtab, blkno);
635 if (offset == ~(uint64_t)0)
636 break;
637
638 if (0 < blkno) {
639 const uint64_t start = last_offset;
640 const uint64_t end = offset;
641 if (end <= start) {
642 warnx("bad offset table: 0x%"PRIx64
643 ", 0x%"PRIx64, start, end);
644 return false;
645 }
646 /* XXX compression ratio bound */
647 __CTASSERT(MUL_OK(size_t, 2, MAX_BLOCKSIZE));
648 if ((2 * (size_t)S->blocksize) <= (end - start)) {
649 warnx("block %"PRIu32" too large:"
650 " %"PRIu64" bytes"
651 " from 0x%"PRIx64" to 0x%"PRIx64,
652 blkno, (end - start), start, end);
653 return false;
654 }
655 }
656
657 last_offset = offset;
658 }
659
660 if (blkno == 0) {
661 warnx("no blocks were written; nothing to restart");
662 return false;
663 }
664
665 /* Make sure the rest of the offset table is all ones. */
666 if (blkno < S->n_blocks) {
667 uint32_t nblkno;
668
669 for (nblkno = blkno; nblkno < S->n_blocks; nblkno++) {
670 if (!offtab_prepare_get(&S->offtab, nblkno))
671 return false;
672 const uint64_t offset = offtab_get(&S->offtab, nblkno);
673 if (offset != ~(uint64_t)0) {
674 warnx("bad partial offset table entry"
675 " at %"PRIu32": 0x%"PRIx64,
676 nblkno, offset);
677 return false;
678 }
679 }
680 }
681
682 /*
683 * XXX Consider decompressing some number of blocks to make
684 * sure they match.
685 */
686
687 /* Back up by one. */
688 assert(1 <= blkno);
689 blkno -= 1;
690
691 /* Seek to the output position. */
692 assert(last_offset <= OFF_MAX);
693 if (lseek(S->cloop2_fd, last_offset, SEEK_SET) == -1) {
694 warn("lseek output cloop2 to %"PRIx64" failed", last_offset);
695 return false;
696 }
697
698 /* Switch from reading to writing the offset table. */
699 if (!offtab_transmogrify_read_to_write(&S->offtab, blkno))
700 return false;
701
702 /*
703 * Seek to the input position last, after all other possible
704 * failures, because if the input is a pipe, we can't change
705 * our mind, rewind, and start at the beginning instead of
706 * restarting.
707 */
708 assert(S->size <= OFF_MAX);
709 assert(blkno <= (S->size / S->blocksize));
710 const off_t restart_position = ((off_t)blkno * (off_t)S->blocksize);
711 assert(0 <= restart_position);
712 assert(restart_position <= (off_t)S->size);
713 if (lseek(S->image_fd, restart_position, SEEK_SET) == -1) {
714 if (errno != ESPIPE) {
715 warn("lseek input image failed");
716 return false;
717 }
718
719 /* Try read instead of lseek for a pipe/socket/fifo. */
720 void *const buffer = malloc(0x10000);
721 if (buffer == NULL)
722 err(1, "malloc temporary buffer");
723 off_t left = restart_position;
724 while (left > 0) {
725 const size_t size = MIN(0x10000, left);
726 const ssize_t n_read = read_block(S->image_fd, buffer,
727 size);
728 if (n_read == -1) {
729 free(buffer);
730 warn("read of input image failed");
731 return false;
732 }
733 assert(n_read >= 0);
734 if ((size_t)n_read != size) {
735 free(buffer);
736 warnx("partial read of input image");
737 return false;
738 }
739 assert((off_t)size <= left);
740 left -= size;
741 }
742 free(buffer);
743 }
744
745 /* Start where we left off. */
746 S->blkno = blkno;
747 S->offset = last_offset;
748 S->n_checkpointed_blocks = blkno;
749
750 /* Good to go and ready for interruption by a signal. */
751 S->initialized = 1;
752
753 /* Success! */
754 return true;
755 }
756
757 /*
758 * Read a single block, compress it, and write the compressed block.
759 * Return the size of the compressed block.
760 */
761 static uint32_t
compress_block(int in_fd,int out_fd,uint32_t blkno,uint32_t blocksize,uint32_t readsize,void * uncompbuf,void * compbuf)762 compress_block(int in_fd, int out_fd, uint32_t blkno, uint32_t blocksize,
763 uint32_t readsize, void *uncompbuf, void *compbuf)
764 {
765
766 assert(readsize <= blocksize);
767 assert(blocksize <= MAX_BLOCKSIZE);
768
769 /* Read the uncompressed block. */
770 const ssize_t n_read = read_block(in_fd, uncompbuf, readsize);
771 if (n_read == -1)
772 err(1, "read block %"PRIu32, blkno);
773 assert(n_read >= 0);
774 if ((size_t)n_read != readsize)
775 errx(1, "partial read of block %"PRIu32": %zu != %"PRIu32,
776 blkno, (size_t)n_read, readsize);
777
778 /* Compress the block. */
779 /* XXX compression ratio bound */
780 __CTASSERT(MUL_OK(unsigned long, 2, MAX_BLOCKSIZE));
781 const unsigned long uncomplen =
782 (VNDCOMPRESS_COMPAT? blocksize : readsize); /* XXX */
783 unsigned long complen = (uncomplen * 2);
784 const int zerror = compress2(compbuf, &complen, uncompbuf, uncomplen,
785 Z_BEST_COMPRESSION);
786 if (zerror != Z_OK)
787 errx(1, "compressed failed at block %"PRIu32" (%d): %s", blkno,
788 zerror, zError(zerror));
789 assert(complen <= (uncomplen * 2));
790
791 /* Write the compressed block. */
792 const ssize_t n_written = write(out_fd, compbuf, complen);
793 if (n_written == -1)
794 err(1, "write block %"PRIu32, blkno);
795 assert(n_written >= 0);
796 if ((size_t)n_written != complen)
797 errx(1, "partial write of block %"PRIu32": %zu != %lu",
798 blkno, (size_t)n_written, complen);
799
800 return (size_t)n_written;
801 }
802
803 /*
804 * Checkpoint if appropriate.
805 */
806 static void
compress_maybe_checkpoint(struct compress_state * S)807 compress_maybe_checkpoint(struct compress_state *S)
808 {
809
810 if ((0 < S->checkpoint_blocks) && (0 < S->blkno) &&
811 ((S->blkno % S->checkpoint_blocks) == 0)) {
812 assert(S->offset <= OFF_MAX);
813 assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR));
814 compress_checkpoint(S);
815 }
816 }
817
818 /*
819 * Write the prefix of the offset table that we have filled so far.
820 *
821 * We fsync the data blocks we have written, and then write the offset
822 * table, and then fsync the offset table and file metadata. This
823 * should help to avoid offset tables that point at garbage data.
824 *
825 * This may be called from a signal handler, so it must not use stdio,
826 * malloc, &c. -- it may only (a) handle signal-safe state in S, and
827 * (b) do file descriptor I/O / fsync.
828 *
829 * XXX This requires further thought and heavy testing to be sure.
830 *
831 * XXX Should have an option to suppress fsync.
832 *
833 * XXX Should have an option to fail on fsync failures.
834 *
835 * XXX Would be nice if we could just do a barrier rather than an
836 * fsync.
837 *
838 * XXX How might we automatically test the fsyncs?
839 */
840 static void
compress_checkpoint(struct compress_state * S)841 compress_checkpoint(struct compress_state *S)
842 {
843
844 assert(S->blkno < S->n_offsets);
845 const uint32_t n_offsets = (S->blkno + 1);
846 assert(n_offsets <= S->n_offsets);
847
848 assert(S->offset <= OFF_MAX);
849 assert((off_t)S->offset <= lseek(S->cloop2_fd, 0, SEEK_CUR));
850
851 /* Make sure the data hits the disk before we say it's ready. */
852 if (fsync_range(S->cloop2_fd, (FFILESYNC | FDISKSYNC), 0, S->offset)
853 == -1)
854 warn_ss("fsync of output failed");
855
856 /* Say the data blocks are ready. */
857 offtab_checkpoint(&S->offtab, n_offsets,
858 (S->n_checkpointed_blocks == 0? OFFTAB_CHECKPOINT_SYNC : 0));
859
860 /*
861 * If this is the first checkpoint, initialize the header.
862 * Signal handler can race with main code here, but it is
863 * harmless -- just an extra fsync and write of the header,
864 * which are both idempotent.
865 *
866 * Once we have synchronously checkpointed the offset table,
867 * subsequent writes will preserve a valid state.
868 */
869 if (S->n_checkpointed_blocks == 0) {
870 static const struct cloop2_header zero_header;
871 struct cloop2_header header = zero_header;
872
873 /* Format the header. */
874 __CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic));
875 (void)memcpy(header.cl2h_magic, cloop2_magic,
876 sizeof(cloop2_magic));
877 header.cl2h_blocksize = htobe32(S->blocksize);
878 header.cl2h_n_blocks = htobe32(S->n_blocks);
879
880 /* Write the header. */
881 const ssize_t h_written = pwrite(S->cloop2_fd, &header,
882 sizeof(header), 0);
883 if (h_written == -1)
884 err_ss(1, "write header");
885 assert(h_written >= 0);
886 if ((size_t)h_written != sizeof(header))
887 errx_ss(1, "partial write of header: %zu != %zu",
888 (size_t)h_written, sizeof(header));
889 }
890
891 /* Record how many blocks we've checkpointed. */
892 {
893 sigset_t old_sigmask;
894 block_signals(&old_sigmask);
895 S->n_checkpointed_blocks = S->blkno;
896 restore_sigmask(&old_sigmask);
897 }
898 }
899
900 /*
901 * Release everything we allocated in compress_init.
902 */
903 static void
compress_exit(struct compress_state * S)904 compress_exit(struct compress_state *S)
905 {
906
907 /* Done with the offset table. Destroy it. */
908 offtab_destroy(&S->offtab);
909
910 /* Done with the files. Close them. */
911 if (close(S->cloop2_fd) == -1)
912 warn("close(cloop2 fd)");
913 if (close(S->image_fd) == -1)
914 warn("close(image fd)");
915 }
916