xref: /netbsd-src/usr.bin/vndcompress/offtab.c (revision 76d4b812519cd793ec395025fc72068d22cb7c23)
1 /*	$NetBSD: offtab.c,v 1.15 2017/07/29 21:04:07 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2014 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Taylor R. Campbell.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: offtab.c,v 1.15 2017/07/29 21:04:07 riastradh Exp $");
34 
35 #include <sys/types.h>
36 #include <sys/endian.h>
37 
38 #include <assert.h>
39 #include <err.h>
40 #include <errno.h>
41 #include <inttypes.h>
42 #include <limits.h>
43 #include <stdbool.h>
44 #include <stdlib.h>
45 #include <unistd.h>
46 
47 #include "common.h"
48 #include "utils.h"
49 
50 #include "offtab.h"
51 
52 static void __printflike(1,2) __dead
offtab_bug(const char * fmt,...)53 offtab_bug(const char *fmt, ...)
54 {
55 
56 	errx(1, "bug in offtab, please report");
57 }
58 
59 static void __printflike(1,2) __dead
offtab_bugx(const char * fmt,...)60 offtab_bugx(const char *fmt, ...)
61 {
62 
63 	errx(1, "bug in offtab, please report");
64 }
65 
66 static uint32_t
offtab_compute_window_size(struct offtab * offtab,uint32_t start)67 offtab_compute_window_size(struct offtab *offtab, uint32_t start)
68 {
69 
70 	assert(start < offtab->ot_n_offsets);
71 	return MIN(offtab->ot_window_size, (offtab->ot_n_offsets - start));
72 }
73 
74 static uint32_t
offtab_current_window_size(struct offtab * offtab)75 offtab_current_window_size(struct offtab *offtab)
76 {
77 
78 	return offtab_compute_window_size(offtab, offtab->ot_window_start);
79 }
80 
81 static uint32_t
offtab_current_window_end(struct offtab * offtab)82 offtab_current_window_end(struct offtab *offtab)
83 {
84 
85 	assert(offtab->ot_window_start < offtab->ot_n_offsets);
86 	assert(offtab_current_window_size(offtab) <=
87 	    (offtab->ot_n_offsets - offtab->ot_window_start));
88 	return (offtab->ot_window_start + offtab_current_window_size(offtab));
89 }
90 
91 static void
offtab_compute_window_position(struct offtab * offtab,uint32_t window_start,size_t * bytes,off_t * pos)92 offtab_compute_window_position(struct offtab *offtab, uint32_t window_start,
93     size_t *bytes, off_t *pos)
94 {
95 	const uint32_t window_size = offtab_compute_window_size(offtab,
96 	    window_start);
97 
98 	__CTASSERT(MUL_OK(size_t, MAX_WINDOW_SIZE, sizeof(uint64_t)));
99 	*bytes = (window_size * sizeof(uint64_t));
100 
101 	assert(window_start <= offtab->ot_n_offsets);
102 	__CTASSERT(MUL_OK(off_t, MAX_N_OFFSETS, sizeof(uint64_t)));
103 	const off_t window_offset = ((off_t)window_start *
104 	    (off_t)sizeof(uint64_t));
105 
106 	assert(offtab->ot_fdpos <= OFFTAB_MAX_FDPOS);
107 	__CTASSERT(ADD_OK(off_t, OFFTAB_MAX_FDPOS,
108 		(off_t)MAX_N_OFFSETS*sizeof(uint64_t)));
109 	assert(ADD_OK(off_t, offtab->ot_fdpos, window_offset));
110 	*pos = (offtab->ot_fdpos + window_offset);
111 }
112 
113 #define	OFFTAB_READ_SEEK	0x01
114 #define	OFFTAB_READ_NOSEEK	0x00
115 
116 static bool
offtab_read_window(struct offtab * offtab,uint32_t blkno,int read_flags)117 offtab_read_window(struct offtab *offtab, uint32_t blkno, int read_flags)
118 {
119 	const uint32_t window_start = rounddown(blkno, offtab->ot_window_size);
120 	size_t window_bytes;
121 	off_t window_pos;
122 
123 	assert(offtab->ot_mode == OFFTAB_MODE_READ);
124 	assert(ISSET(read_flags, OFFTAB_READ_SEEK) ||
125 	    (lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
126 	    ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
127 
128 	offtab_compute_window_position(offtab, window_start,
129 	    &window_bytes, &window_pos);
130 	const ssize_t n_read = (ISSET(read_flags, OFFTAB_READ_SEEK)
131 	    ? pread_block(offtab->ot_fd, offtab->ot_window, window_bytes,
132 		window_pos)
133 	    : read_block(offtab->ot_fd, offtab->ot_window, window_bytes));
134 	if (n_read == -1) {
135 		(*offtab->ot_report)("read offset table at %"PRIuMAX,
136 		    (uintmax_t)window_pos);
137 		return false;
138 	}
139 	assert(n_read >= 0);
140 	if ((size_t)n_read != window_bytes) {
141 		(*offtab->ot_reportx)("partial read of offset table"
142 		    " at %"PRIuMAX": %zu != %zu",
143 		    (uintmax_t)window_pos, (size_t)n_read, window_bytes);
144 		return false;
145 	}
146 
147 	offtab->ot_window_start = window_start;
148 
149 	return true;
150 }
151 
152 static bool
offtab_maybe_read_window(struct offtab * offtab,uint32_t blkno,int read_flags)153 offtab_maybe_read_window(struct offtab *offtab, uint32_t blkno, int read_flags)
154 {
155 
156 	/* Don't bother if blkno is already in the window.  */
157 	if ((offtab->ot_window_start <= blkno) &&
158 	    (blkno < offtab_current_window_end(offtab)))
159 		return true;
160 
161 	if (!offtab_read_window(offtab, blkno, read_flags))
162 		return false;
163 
164 	return true;
165 }
166 
167 static void
offtab_write_window(struct offtab * offtab)168 offtab_write_window(struct offtab *offtab)
169 {
170 	size_t window_bytes;
171 	off_t window_pos;
172 
173 	assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
174 
175 	offtab_compute_window_position(offtab, offtab->ot_window_start,
176 	    &window_bytes, &window_pos);
177 	const ssize_t n_written = pwrite(offtab->ot_fd, offtab->ot_window,
178 	    window_bytes, window_pos);
179 	if (n_written == -1)
180 		err_ss(1, "write initial offset table");
181 	assert(n_written >= 0);
182 	if ((size_t)n_written != window_bytes)
183 		errx_ss(1, "partial write of initial offset bytes: %zu <= %zu",
184 		    (size_t)n_written,
185 		    window_bytes);
186 }
187 
188 static void
offtab_maybe_write_window(struct offtab * offtab,uint32_t start,uint32_t end)189 offtab_maybe_write_window(struct offtab *offtab, uint32_t start, uint32_t end)
190 {
191 
192 	/* Don't bother if [start, end) does not cover our window.  */
193 	if (end <= offtab->ot_window_start)
194 		return;
195 	if (offtab_current_window_end(offtab) < start)
196 		return;
197 
198 	offtab_write_window(offtab);
199 }
200 
201 /*
202  * Initialize an offtab to support the specified number of offsets read
203  * to or written from fd at byte position fdpos.
204  */
205 void
offtab_init(struct offtab * offtab,uint32_t n_offsets,uint32_t window_size,int fd,off_t fdpos)206 offtab_init(struct offtab *offtab, uint32_t n_offsets, uint32_t window_size,
207     int fd, off_t fdpos)
208 {
209 
210 	assert(offtab != NULL);
211 	assert(0 < n_offsets);
212 	assert(0 <= fd);
213 	assert(0 <= fdpos);
214 	assert(fdpos <= OFFTAB_MAX_FDPOS);
215 
216 	offtab->ot_n_offsets = n_offsets;
217 	if ((window_size == 0) || (n_offsets < window_size))
218 		offtab->ot_window_size = n_offsets;
219 	else
220 		offtab->ot_window_size = window_size;
221 	assert(offtab->ot_window_size <= offtab->ot_n_offsets);
222 	offtab->ot_window_start = (uint32_t)-1;
223 	__CTASSERT(MUL_OK(size_t, MAX_WINDOW_SIZE, sizeof(uint64_t)));
224 	offtab->ot_window = malloc(offtab->ot_window_size * sizeof(uint64_t));
225 	if (offtab->ot_window == NULL)
226 		err(1, "malloc offset table");
227 	offtab->ot_blkno = (uint32_t)-1;
228 	offtab->ot_fd = fd;
229 	offtab->ot_fdpos = fdpos;
230 	offtab->ot_report = &offtab_bug;
231 	offtab->ot_reportx = &offtab_bugx;
232 	offtab->ot_mode = OFFTAB_MODE_NONE;
233 }
234 
235 /*
236  * Destroy an offtab.
237  */
238 void
offtab_destroy(struct offtab * offtab)239 offtab_destroy(struct offtab *offtab)
240 {
241 
242 	free(offtab->ot_window);
243 }
244 
245 /*
246  * For an offtab that has been used to read data from disk, convert it
247  * to an offtab that can be used to write subsequent data to disk.
248  * blkno is the last valid blkno read from disk.
249  */
250 bool
offtab_transmogrify_read_to_write(struct offtab * offtab,uint32_t blkno)251 offtab_transmogrify_read_to_write(struct offtab *offtab, uint32_t blkno)
252 {
253 
254 	assert(offtab->ot_mode == OFFTAB_MODE_READ);
255 	assert(0 < blkno);
256 
257 	if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK))
258 		return false;
259 
260 	offtab->ot_mode = OFFTAB_MODE_WRITE;
261 	offtab->ot_blkno = blkno;
262 
263 	return true;
264 }
265 
266 /*
267  * Reset an offtab for reading an offset table from the beginning.
268  * Initializes in-memory state and may read data from offtab->ot_fd,
269  * which must currently be at byte position offtab->ot_fdpos.  Failure
270  * will be reported by the report/reportx routines, which are called
271  * like warn/warnx.  May fail; returns true on success, false on
272  * failure.
273  *
274  * This almost has copypasta of offtab_prepare_get, but this uses read,
275  * rather than pread, so that it will work on nonseekable input if the
276  * window is the whole offset table.
277  */
278 bool
279 offtab_reset_read(struct offtab *offtab,
280     void (*report)(const char *, ...) __printflike(1,2),
281     void (*reportx)(const char *, ...) __printflike(1,2))
282 {
283 
284 	assert((lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos) ||
285 	    ((lseek(offtab->ot_fd, 0, SEEK_CUR) == -1) && (errno == ESPIPE)));
286 
287 	offtab->ot_report = report;
288 	offtab->ot_reportx = reportx;
289 	offtab->ot_mode = OFFTAB_MODE_READ;
290 	offtab->ot_blkno = (uint32_t)-1;
291 
292 	if (!offtab_read_window(offtab, 0, OFFTAB_READ_NOSEEK))
293 		return false;
294 
295 	if (offtab->ot_window_size < offtab->ot_n_offsets) {
296 		__CTASSERT(MUL_OK(off_t, MAX_N_OFFSETS, sizeof(uint64_t)));
297 		const off_t offtab_bytes = ((off_t)offtab->ot_n_offsets *
298 		    (off_t)sizeof(uint64_t));
299 		assert(offtab->ot_fdpos <= OFFTAB_MAX_FDPOS);
300 		__CTASSERT(ADD_OK(off_t, OFFTAB_MAX_FDPOS,
301 			(off_t)MAX_N_OFFSETS*sizeof(uint64_t)));
302 		assert(ADD_OK(off_t, offtab->ot_fdpos, offtab_bytes));
303 		const off_t first_offset = (offtab->ot_fdpos + offtab_bytes);
304 		if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1) {
305 			(*offtab->ot_report)("lseek to first offset 0x%"PRIx64,
306 			    first_offset);
307 			return false;
308 		}
309 	}
310 
311 	return true;
312 }
313 
314 /*
315  * Do any I/O or bookkeeping necessary to fetch the offset for blkno in
316  * preparation for a call to offtab_get.  May fail; returns true on
317  * success, false on failure.
318  */
319 bool
offtab_prepare_get(struct offtab * offtab,uint32_t blkno)320 offtab_prepare_get(struct offtab *offtab, uint32_t blkno)
321 {
322 
323 	assert(offtab->ot_mode == OFFTAB_MODE_READ);
324 	assert(blkno < offtab->ot_n_offsets);
325 
326 	if (!offtab_maybe_read_window(offtab, blkno, OFFTAB_READ_SEEK))
327 		return false;
328 
329 	assert(offtab->ot_window_start <= blkno);
330 	assert(blkno < offtab_current_window_end(offtab));
331 
332 	offtab->ot_blkno = blkno;
333 	return true;
334 }
335 
336 /*
337  * Return the offset for blkno.  Caller must have called
338  * offtab_prepare_get beforehand.
339  */
340 uint64_t
offtab_get(struct offtab * offtab,uint32_t blkno)341 offtab_get(struct offtab *offtab, uint32_t blkno)
342 {
343 
344 	assert(offtab->ot_mode == OFFTAB_MODE_READ);
345 	assert(blkno == offtab->ot_blkno);
346 	assert(offtab->ot_window_start <= blkno);
347 	assert(blkno < offtab_current_window_end(offtab));
348 
349 	return be64toh(offtab->ot_window[blkno - offtab->ot_window_start]);
350 }
351 
352 /*
353  * Reset offtab for writing a fresh offset table.  Initializes
354  * in-memory state and writes an empty offset table to offtab->ot_fd,
355  * which must currently be at byte position offtab->ot_fdpos.  May
356  * fail; returns on success, aborts with err(3) on failure.
357  */
358 void
offtab_reset_write(struct offtab * offtab)359 offtab_reset_write(struct offtab *offtab)
360 {
361 	uint32_t i;
362 
363 	assert(lseek(offtab->ot_fd, 0, SEEK_CUR) == offtab->ot_fdpos);
364 
365 	offtab->ot_mode = OFFTAB_MODE_WRITE;
366 	offtab->ot_blkno = (uint32_t)-1;
367 
368 	/*
369 	 * Initialize the offset table to all ones (except for the
370 	 * fixed first offset) so that we can easily detect where we
371 	 * were interrupted if we want to restart.
372 	 */
373 	__CTASSERT(MAX_N_OFFSETS <= UINT32_MAX);
374 	assert(offtab->ot_n_offsets > 0);
375 
376 	/* Initialize window of all ones.  */
377 	for (i = 0; i < offtab->ot_window_size; i++)
378 		offtab->ot_window[i] = ~(uint64_t)0;
379 
380 	/* Write the window to every position in the table.  */
381 	const uint32_t n_windows =
382 	    howmany(offtab->ot_n_offsets, offtab->ot_window_size);
383 	for (i = 1; i < n_windows; i++) {
384 		/* Change the start but reuse the all-ones buffer.  */
385 		offtab->ot_window_start = (i * offtab->ot_window_size);
386 		offtab_write_window(offtab);
387 	}
388 
389 	/* Compute the number of bytes in the offset table.  */
390 	__CTASSERT(MUL_OK(off_t, MAX_N_OFFSETS, sizeof(uint64_t)));
391 	const off_t offtab_bytes = ((off_t)offtab->ot_n_offsets *
392 	    sizeof(uint64_t));
393 
394 	/* Compute the offset of the first block.  */
395 	assert(offtab->ot_fdpos <= OFFTAB_MAX_FDPOS);
396 	__CTASSERT(ADD_OK(off_t, OFFTAB_MAX_FDPOS,
397 		MAX_N_OFFSETS*sizeof(uint64_t)));
398 	assert(ADD_OK(off_t, offtab->ot_fdpos, offtab_bytes));
399 	const off_t first_offset = (offtab->ot_fdpos + offtab_bytes);
400 
401 	/* Assert that it fits in 64 bits.  */
402 	__CTASSERT(MUL_OK(uint64_t, MAX_N_OFFSETS, sizeof(uint64_t)));
403 	__CTASSERT(ADD_OK(uint64_t, OFFTAB_MAX_FDPOS,
404 		(uint64_t)MAX_N_OFFSETS*sizeof(uint64_t)));
405 
406 	/* Write out the first window with the first offset.  */
407 	offtab->ot_window_start = 0;
408 	offtab->ot_window[0] = htobe64((uint64_t)first_offset);
409 	offtab_write_window(offtab);
410 
411 	if (lseek(offtab->ot_fd, first_offset, SEEK_SET) == -1)
412 		err(1, "lseek to first offset failed");
413 }
414 
415 /*
416  * Guarantee that the disk reflects block offsets [0, n_offsets).  If
417  * OFFTAB_CHECKPOINT_SYNC is set in flags, will also fsync the entire
418  * offset table.  May fail; returns on success, aborts with err(3) on
419  * failure.  Fsync failure is considered success but is reported with a
420  * warning.
421  *
422  * This routine does not write state in memory, and does not read state
423  * that is not signal-safe.  The only state read is offtab->ot_window,
424  * offtab->ot_window_start, and quantities that are static for the
425  * signal-interruptable existence of the offset table.
426  */
427 void
offtab_checkpoint(struct offtab * offtab,uint32_t n_offsets,int flags)428 offtab_checkpoint(struct offtab *offtab, uint32_t n_offsets, int flags)
429 {
430 
431 	assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
432 	assert(n_offsets <= offtab->ot_n_offsets);
433 
434 	/*
435 	 * Write the window unless we just did that and were
436 	 * interrupted before we could move the window.
437 	 */
438 	if (offtab->ot_window != NULL)
439 		offtab_maybe_write_window(offtab, 0, n_offsets);
440 
441 	if (ISSET(flags, OFFTAB_CHECKPOINT_SYNC)) {
442 		__CTASSERT(MUL_OK(off_t, MAX_N_OFFSETS, sizeof(uint64_t)));
443 		const off_t sync_bytes = ((off_t)n_offsets *
444 		    (off_t)sizeof(uint64_t));
445 		__CTASSERT(ADD_OK(off_t, OFFTAB_MAX_FDPOS,
446 			MAX_N_OFFSETS*sizeof(uint64_t)));
447 		assert(ADD_OK(off_t, offtab->ot_fdpos, sync_bytes));
448 		if (fsync_range(offtab->ot_fd, (FFILESYNC | FDISKSYNC),
449 			offtab->ot_fdpos, (offtab->ot_fdpos + sync_bytes))
450 		    == -1)
451 			warn_ss("fsync of offset table failed");
452 	}
453 }
454 
455 /*
456  * Do any I/O or bookkeeping necessary to set an offset for blkno.  May
457  * fail; returns on success, aborts with err(3) on failure.
458  */
459 void
offtab_prepare_put(struct offtab * offtab,uint32_t blkno)460 offtab_prepare_put(struct offtab *offtab, uint32_t blkno)
461 {
462 	uint32_t i;
463 
464 	assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
465 	assert(blkno < offtab->ot_n_offsets);
466 
467 	/*
468 	 * Assume, for convenience, that we write blocks in order.
469 	 * Thus we need not do another read -- we can just clear the
470 	 * window.
471 	 */
472 	assert((offtab->ot_blkno == (uint32_t)-1) ||
473 	    ((offtab->ot_blkno + 1) == blkno));
474 
475 	/* If it's already in our window, we're good to go.  */
476 	if ((offtab->ot_window_start <= blkno) &&
477 	    (blkno < offtab_current_window_end(offtab)))
478 		goto win;
479 
480 	/* Otherwise, write out the current window and choose a new one.  */
481 	offtab_write_window(offtab);
482 
483 	assert(offtab->ot_window_size <= blkno);
484 	assert(offtab->ot_window_start == (blkno - offtab->ot_window_size));
485 	assert((offtab->ot_window_start + offtab->ot_window_size) ==
486 	    rounddown(blkno, offtab->ot_window_size));
487 
488     {
489 	uint64_t *window;
490 	sigset_t sigmask;
491 
492 	/*
493 	 * Mark the window as being updated so nobody tries to write it
494 	 * (since we just wrote it) while we fill it with ones.
495 	 */
496 	block_signals(&sigmask);
497 	window = offtab->ot_window;
498 	offtab->ot_window = NULL;
499 	restore_sigmask(&sigmask);
500 
501 	/* Fill the window with ones.  */
502 	for (i = 0; i < offtab_current_window_size(offtab); i++)
503 		window[i] = ~(uint64_t)0;
504 
505 	/* Restore the window as ready again.  */
506 	block_signals(&sigmask);
507 	offtab->ot_window = window;
508 	offtab->ot_window_start = rounddown(blkno, offtab->ot_window_size);
509 	restore_sigmask(&sigmask);
510     }
511 
512 win:	assert(offtab->ot_window_start <= blkno);
513 	assert(blkno < offtab_current_window_end(offtab));
514 
515 	offtab->ot_blkno = blkno;
516 }
517 
518 /*
519  * Actually set the offset for blkno.
520  */
521 void
offtab_put(struct offtab * offtab,uint32_t blkno,uint64_t offset)522 offtab_put(struct offtab *offtab, uint32_t blkno, uint64_t offset)
523 {
524 
525 	assert(offtab->ot_mode == OFFTAB_MODE_WRITE);
526 	assert(blkno == offtab->ot_blkno);
527 	assert(offtab->ot_window_start <= blkno);
528 	assert(blkno < offtab_current_window_end(offtab));
529 
530 	offtab->ot_window[blkno - offtab->ot_window_start] = htobe64(offset);
531 }
532