xref: /netbsd-src/usr.bin/vndcompress/offtab.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /*	$NetBSD: offtab.c,v 1.13 2014/01/25 16:38:15 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2014 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Taylor R. Campbell.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: offtab.c,v 1.13 2014/01/25 16:38:15 riastradh Exp $");
34 
35 #include <sys/types.h>
36 #include <sys/endian.h>
37 
38 #include <assert.h>
39 #include <err.h>
40 #include <errno.h>
41 #include <inttypes.h>
42 #include <limits.h>
43 #include <stdbool.h>
44 #include <stdlib.h>
45 #include <unistd.h>
46 
47 #include "common.h"
48 #include "utils.h"
49 
50 #include "offtab.h"
51 
52 static void __printflike(1,2) __dead
53 offtab_bug(const char *fmt, ...)
54 {
55 
56 	errx(1, "bug in offtab, please report");
57 }
58 
59 static void __printflike(1,2) __dead
60 offtab_bugx(const char *fmt, ...)
61 {
62 
63 	errx(1, "bug in offtab, please report");
64 }
65 
66 static uint32_t
67 offtab_compute_window_size(struct offtab *offtab, uint32_t start)
68 {
69 
70 	assert(start < offtab->ot_n_offsets);
71 	return MIN(offtab->ot_window_size, (offtab->ot_n_offsets - start));
72 }
73 
74 static uint32_t
75 offtab_current_window_size(struct offtab *offtab)
76 {
77 
78 	return offtab_compute_window_size(offtab, offtab->ot_window_start);
79 }
80 
81 static uint32_t
82 offtab_current_window_end(struct offtab *offtab)
83 {
84 
85 	assert(offtab->ot_window_start < offtab->ot_n_offsets);
86 	assert(offtab_current_window_size(offtab) <=
87 	    (offtab->ot_n_offsets - offtab->ot_window_start));
88 	return (offtab->ot_window_start + offtab_current_window_size(offtab));
89 }
90 
91 static void
92 offtab_compute_window_position(struct offtab *offtab, uint32_t window_start,
93     size_t *bytes, off_t *pos)
94 {
95 	const uint32_t window_size = offtab_compute_window_size(offtab,
96 	    window_start);
97 
98 	__CTASSERT(MAX_WINDOW_SIZE <= (SIZE_MAX / sizeof(uint64_t)));
99 	*bytes = (window_size * sizeof(uint64_t));
100 
101 	assert(window_start <= offtab->ot_n_offsets);
102 	__CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
103 	const off_t window_offset = ((off_t)window_start *
104 	    (off_t)sizeof(uint64_t));
105 
106 	/* XXX This assertion is not justified.  */
107 	assert(offtab->ot_fdpos <= (OFF_MAX - window_offset));
108 	*pos = (offtab->ot_fdpos + window_offset);
109 }
110 
111 #define	OFFTAB_READ_SEEK	0x01
112 #define	OFFTAB_READ_NOSEEK	0x00
113 
114 static bool
115 offtab_read_window(struct offtab *offtab, uint32_t blkno, int read_flags)
116 {
117 	const uint32_t window_start = rounddown(blkno, offtab->ot_window_size);
118 	size_t window_bytes;
119 	off_t window_pos;
120 
121 	assert(offtab->ot_mode == OFFTAB_MODE_READ);
122 	assert(ISSET(read_flags, OFFTAB_READ_SEEK) ||
123 	    (lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
124 	    ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
125 
126 	offtab_compute_window_position(offtab, window_start,
127 	    &window_bytes, &window_pos);
128 	const ssize_t n_read = (ISSET(read_flags, OFFTAB_READ_SEEK)
129 	    ? pread_block(offtab->ot_fd, offtab->ot_window, window_bytes,
130 		window_pos)
131 	    : read_block(offtab->ot_fd, offtab->ot_window, window_bytes));
132 	if (n_read == -1) {
133 		(*offtab->ot_report)("read offset table at %"PRIuMAX,
134 		    (uintmax_t)window_pos);
135 		return false;
136 	}
137 	assert(n_read >= 0);
138 	if ((size_t)n_read != window_bytes) {
139 		(*offtab->ot_reportx)("partial read of offset table"
140 		    " at %"PRIuMAX": %zu != %zu",
141 		    (uintmax_t)window_pos, (size_t)n_read, window_bytes);
142 		return false;
143 	}
144 
145 	offtab->ot_window_start = window_start;
146 
147 	return true;
148 }
149 
150 static bool
151 offtab_maybe_read_window(struct offtab *offtab, uint32_t blkno, int read_flags)
152 {
153 
154 	/* Don't bother if blkno is already in the window.  */
155 	if ((offtab->ot_window_start <= blkno) &&
156 	    (blkno < offtab_current_window_end(offtab)))
157 		return true;
158 
159 	if (!offtab_read_window(offtab, blkno, read_flags))
160 		return false;
161 
162 	return true;
163 }
164 
165 static void
166 offtab_write_window(struct offtab *offtab)
167 {
168 	size_t window_bytes;
169 	off_t window_pos;
170 
171 	assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
172 
173 	offtab_compute_window_position(offtab, offtab->ot_window_start,
174 	    &window_bytes, &window_pos);
175 	const ssize_t n_written = pwrite(offtab->ot_fd, offtab->ot_window,
176 	    window_bytes, window_pos);
177 	if (n_written == -1)
178 		err_ss(1, "write initial offset table");
179 	assert(n_written >= 0);
180 	if ((size_t)n_written != window_bytes)
181 		errx_ss(1, "partial write of initial offset bytes: %zu <= %zu",
182 		    (size_t)n_written,
183 		    window_bytes);
184 }
185 
186 static void
187 offtab_maybe_write_window(struct offtab *offtab, uint32_t start, uint32_t end)
188 {
189 
190 	/* Don't bother if [start, end) does not cover our window.  */
191 	if (end <= offtab->ot_window_start)
192 		return;
193 	if (offtab_current_window_end(offtab) < start)
194 		return;
195 
196 	offtab_write_window(offtab);
197 }
198 
199 /*
200  * Initialize an offtab to support the specified number of offsets read
201  * to or written from fd at byte position fdpos.
202  */
203 void
204 offtab_init(struct offtab *offtab, uint32_t n_offsets, uint32_t window_size,
205     int fd, off_t fdpos)
206 {
207 
208 	assert(offtab != NULL);
209 	assert(0 < n_offsets);
210 	assert(0 <= fd);
211 	assert(0 <= fdpos);
212 
213 	offtab->ot_n_offsets = n_offsets;
214 	if ((window_size == 0) || (n_offsets < window_size))
215 		offtab->ot_window_size = n_offsets;
216 	else
217 		offtab->ot_window_size = window_size;
218 	assert(offtab->ot_window_size <= offtab->ot_n_offsets);
219 	offtab->ot_window_start = (uint32_t)-1;
220 	__CTASSERT(MAX_WINDOW_SIZE <= (SIZE_MAX / sizeof(uint64_t)));
221 	offtab->ot_window = malloc(offtab->ot_window_size * sizeof(uint64_t));
222 	if (offtab->ot_window == NULL)
223 		err(1, "malloc offset table");
224 	offtab->ot_blkno = (uint32_t)-1;
225 	offtab->ot_fd = fd;
226 	offtab->ot_fdpos = fdpos;
227 	offtab->ot_report = &offtab_bug;
228 	offtab->ot_reportx = &offtab_bugx;
229 	offtab->ot_mode = OFFTAB_MODE_NONE;
230 }
231 
232 /*
233  * Destroy an offtab.
234  */
235 void
236 offtab_destroy(struct offtab *offtab)
237 {
238 
239 	free(offtab->ot_window);
240 }
241 
242 /*
243  * For an offtab that has been used to read data from disk, convert it
244  * to an offtab that can be used to write subsequent data to disk.
245  * blkno is the last valid blkno read from disk.
246  */
247 bool
248 offtab_transmogrify_read_to_write(struct offtab *offtab, uint32_t blkno)
249 {
250 
251 	assert(offtab->ot_mode == OFFTAB_MODE_READ);
252 	assert(0 < blkno);
253 
254 	if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK))
255 		return false;
256 
257 	offtab->ot_mode = OFFTAB_MODE_WRITE;
258 	offtab->ot_blkno = blkno;
259 
260 	return true;
261 }
262 
263 /*
264  * Reset an offtab for reading an offset table from the beginning.
265  * Initializes in-memory state and may read data from offtab->ot_fd,
266  * which must currently be at byte position offtab->ot_fdpos.  Failure
267  * will be reported by the report/reportx routines, which are called
268  * like warn/warnx.  May fail; returns true on success, false on
269  * failure.
270  *
271  * This almost has copypasta of offtab_prepare_get, but this uses read,
272  * rather than pread, so that it will work on nonseekable input if the
273  * window is the whole offset table.
274  */
275 bool
276 offtab_reset_read(struct offtab *offtab,
277     void (*report)(const char *, ...) __printflike(1,2),
278     void (*reportx)(const char *, ...) __printflike(1,2))
279 {
280 
281 	assert((lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
282 	    ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
283 
284 	offtab->ot_report = report;
285 	offtab->ot_reportx = reportx;
286 	offtab->ot_mode = OFFTAB_MODE_READ;
287 	offtab->ot_blkno = (uint32_t)-1;
288 
289 	if (!offtab_read_window(offtab, 0, OFFTAB_READ_NOSEEK))
290 		return false;
291 
292 	if (offtab->ot_window_size < offtab->ot_n_offsets) {
293 		__CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
294 		const off_t offtab_bytes = ((off_t)offtab->ot_n_offsets *
295 		    (off_t)sizeof(uint64_t));
296 		assert(offtab->ot_fdpos <= (OFF_MAX - offtab_bytes));
297 		const off_t first_offset = (offtab->ot_fdpos + offtab_bytes);
298 		if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1) {
299 			(*offtab->ot_report)("lseek to first offset 0x%"PRIx64,
300 			    first_offset);
301 			return false;
302 		}
303 	}
304 
305 	return true;
306 }
307 
308 /*
309  * Do any I/O or bookkeeping necessary to fetch the offset for blkno in
310  * preparation for a call to offtab_get.  May fail; returns true on
311  * success, false on failure.
312  */
313 bool
314 offtab_prepare_get(struct offtab *offtab, uint32_t blkno)
315 {
316 
317 	assert(offtab->ot_mode == OFFTAB_MODE_READ);
318 	assert(blkno < offtab->ot_n_offsets);
319 
320 	if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK))
321 		return false;
322 
323 	assert(offtab->ot_window_start <= blkno);
324 	assert(blkno < offtab_current_window_end(offtab));
325 
326 	offtab->ot_blkno = blkno;
327 	return true;
328 }
329 
330 /*
331  * Return the offset for blkno.  Caller must have called
332  * offtab_prepare_get beforehand.
333  */
334 uint64_t
335 offtab_get(struct offtab *offtab, uint32_t blkno)
336 {
337 
338 	assert(offtab->ot_mode == OFFTAB_MODE_READ);
339 	assert(blkno == offtab->ot_blkno);
340 	assert(offtab->ot_window_start <= blkno);
341 	assert(blkno < offtab_current_window_end(offtab));
342 
343 	return be64toh(offtab->ot_window[blkno - offtab->ot_window_start]);
344 }
345 
346 /*
347  * Reset offtab for writing a fresh offset table.  Initializes
348  * in-memory state and writes an empty offset table to offtab->ot_fd,
349  * which must currently be at byte position offtab->ot_fdpos.  May
350  * fail; returns on success, aborts with err(3) on failure.
351  */
352 void
353 offtab_reset_write(struct offtab *offtab)
354 {
355 	uint32_t i;
356 
357 	assert(lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos);
358 
359 	offtab->ot_mode = OFFTAB_MODE_WRITE;
360 	offtab->ot_blkno = (uint32_t)-1;
361 
362 	/*
363 	 * Initialize the offset table to all ones (except for the
364 	 * fixed first offset) so that we can easily detect where we
365 	 * were interrupted if we want to restart.
366 	 */
367 	__CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
368 	assert(offtab->ot_n_offsets > 0);
369 
370 	for (i = 0; i < offtab->ot_window_size; i++)
371 		offtab->ot_window[i] = ~(uint64_t)0;
372 
373 	const uint32_t n_windows =
374 	    howmany(offtab->ot_n_offsets, offtab->ot_window_size);
375 	for (i = 1; i < n_windows; i++) {
376 		/* Change the start but reuse the all-ones buffer.  */
377 		offtab->ot_window_start = (i * offtab->ot_window_size);
378 		offtab_write_window(offtab);
379 	}
380 
381 	offtab->ot_window_start = 0;
382 	__CTASSERT(MAX_N_OFFSETS <=
383 	    (MIN(OFF_MAX, UINT64_MAX) / sizeof(uint64_t)));
384 	const off_t offtab_bytes = ((off_t)offtab->ot_n_offsets *
385 	    sizeof(uint64_t));
386 	assert(offtab->ot_fdpos <=
387 	    ((off_t)MIN(OFF_MAX, UINT64_MAX) - offtab_bytes));
388 	const off_t first_offset = (offtab->ot_fdpos + offtab_bytes);
389 	assert(first_offset <= (off_t)MIN(OFF_MAX, UINT64_MAX));
390 	offtab->ot_window[0] = htobe64((uint64_t)first_offset);
391 	offtab_write_window(offtab);
392 
393 	if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1)
394 		err(1, "lseek to first offset failed");
395 }
396 
397 /*
398  * Guarantee that the disk reflects block offsets [0, n_offsets).  If
399  * OFFTAB_CHECKPOINT_SYNC is set in flags, will also fsync the entire
400  * offset table.  May fail; returns on success, aborts with err(3) on
401  * failure.  Fsync failure is considered success but is reported with a
402  * warning.
403  *
404  * This routine does not write state in memory, and does not read state
405  * that is not signal-safe.  The only state read is offtab->ot_window,
406  * offtab->ot_window_start, and quantities that are static for the
407  * signal-interruptable existence of the offset table.
408  */
409 void
410 offtab_checkpoint(struct offtab *offtab, uint32_t n_offsets, int flags)
411 {
412 
413 	assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
414 	assert(n_offsets <= offtab->ot_n_offsets);
415 
416 	/*
417 	 * Write the window unless we just did that and were
418 	 * interrupted before we could move the window.
419 	 */
420 	if (offtab->ot_window != NULL)
421 		offtab_maybe_write_window(offtab, 0, n_offsets);
422 
423 	if (ISSET(flags, OFFTAB_CHECKPOINT_SYNC)) {
424 		__CTASSERT(MAX_N_OFFSETS <= (OFF_MAX / sizeof(uint64_t)));
425 		const off_t sync_bytes = ((off_t)n_offsets *
426 		    (off_t)sizeof(uint64_t));
427 		assert(offtab->ot_fdpos <= (OFF_MAX - sync_bytes));
428 		if (fsync_range(offtab->ot_fd, (FFILESYNC | FDISKSYNC),
429 			offtab->ot_fdpos, (offtab->ot_fdpos + sync_bytes))
430 		    == -1)
431 			warn_ss("fsync of offset table failed");
432 	}
433 }
434 
435 /*
436  * Do any I/O or bookkeeping necessary to set an offset for blkno.  May
437  * fail; returns on success, aborts with err(3) on failure.
438  */
439 void
440 offtab_prepare_put(struct offtab *offtab, uint32_t blkno)
441 {
442 	uint32_t i;
443 
444 	assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
445 	assert(blkno < offtab->ot_n_offsets);
446 
447 	/*
448 	 * Assume, for convenience, that we write blocks in order.
449 	 * Thus we need not do another read -- we can just clear the
450 	 * window.
451 	 */
452 	assert((offtab->ot_blkno == (uint32_t)-1) ||
453 	    ((offtab->ot_blkno + 1) == blkno));
454 
455 	/* If it's already in our window, we're good to go.  */
456 	if ((offtab->ot_window_start <= blkno) &&
457 	    (blkno < offtab_current_window_end(offtab)))
458 		goto win;
459 
460 	/* Otherwise, write out the current window and choose a new one.  */
461 	offtab_write_window(offtab);
462 
463 	assert(offtab->ot_window_size <= blkno);
464 	assert(offtab->ot_window_start == (blkno - offtab->ot_window_size));
465 	assert((offtab->ot_window_start + offtab->ot_window_size) ==
466 	    rounddown(blkno, offtab->ot_window_size));
467 
468     {
469 	uint64_t *window;
470 	sigset_t sigmask;
471 
472 	/*
473 	 * Mark the window as being updated so nobody tries to write it
474 	 * (since we just wrote it) while we fill it with ones.
475 	 */
476 	block_signals(&sigmask);
477 	window = offtab->ot_window;
478 	offtab->ot_window = NULL;
479 	restore_sigmask(&sigmask);
480 
481 	/* Fill the window with ones.  */
482 	for (i = 0; i < offtab_current_window_size(offtab); i++)
483 		window[i] = ~(uint64_t)0;
484 
485 	/* Restore the window as ready again.  */
486 	block_signals(&sigmask);
487 	offtab->ot_window = window;
488 	offtab->ot_window_start = rounddown(blkno, offtab->ot_window_size);
489 	restore_sigmask(&sigmask);
490     }
491 
492 win:	assert(offtab->ot_window_start <= blkno);
493 	assert(blkno < offtab_current_window_end(offtab));
494 
495 	offtab->ot_blkno = blkno;
496 }
497 
498 /*
499  * Actually set the offset for blkno.
500  */
501 void
502 offtab_put(struct offtab *offtab, uint32_t blkno, uint64_t offset)
503 {
504 
505 	assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
506 	assert(blkno == offtab->ot_blkno);
507 	assert(offtab->ot_window_start <= blkno);
508 	assert(blkno < offtab_current_window_end(offtab));
509 
510 	offtab->ot_window[blkno - offtab->ot_window_start] = htobe64(offset);
511 }
512