xref: /dflybsd-src/sbin/hammer/ondisk.c (revision 1448a966161a9420da0adf26a910473e9202cbbc)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/types.h>
36 #include <assert.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <stdarg.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <stddef.h>
43 #include <err.h>
44 #include <fcntl.h>
45 #include "hammer_util.h"
46 
47 static void *alloc_blockmap(int zone, int bytes, hammer_off_t *result_offp,
48 			struct buffer_info **bufferp);
49 static hammer_off_t alloc_bigblock(struct volume_info *volume, int zone);
50 static void get_buffer_readahead(struct buffer_info *base);
51 static __inline void *get_ondisk(hammer_off_t buf_offset,
52 			struct buffer_info **bufferp, int isnew);
53 #if 0
54 static void init_fifo_head(hammer_fifo_head_t head, u_int16_t hdr_type);
55 static void readhammerbuf(struct volume_info *vol, void *data,
56 			int64_t offset);
57 #endif
58 static void writehammerbuf(struct volume_info *vol, const void *data,
59 			int64_t offset);
60 
61 int DebugOpt;
62 
63 uuid_t Hammer_FSType;
64 uuid_t Hammer_FSId;
65 int64_t BootAreaSize;
66 int64_t MemAreaSize;
67 int64_t UndoBufferSize;
68 int     UsingSuperClusters;
69 int     NumVolumes;
70 int	RootVolNo = -1;
71 int	UseReadBehind = -4;
72 int	UseReadAhead = 4;
73 int	AssertOnFailure = 1;
74 struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList);
75 
76 static __inline
77 int
78 buffer_hash(hammer_off_t buf_offset)
79 {
80 	int hi;
81 
82 	hi = (int)(buf_offset / HAMMER_BUFSIZE) & HAMMER_BUFLISTMASK;
83 	return(hi);
84 }
85 
86 /*
87  * Lookup the requested information structure and related on-disk buffer.
88  * Missing structures are created.
89  */
90 struct volume_info *
91 setup_volume(int32_t vol_no, const char *filename, int isnew, int oflags)
92 {
93 	struct volume_info *vol;
94 	struct volume_info *scan;
95 	struct hammer_volume_ondisk *ondisk;
96 	int i, n;
97 
98 	/*
99 	 * Allocate the volume structure
100 	 */
101 	vol = malloc(sizeof(*vol));
102 	bzero(vol, sizeof(*vol));
103 	for (i = 0; i < HAMMER_BUFLISTS; ++i)
104 		TAILQ_INIT(&vol->buffer_lists[i]);
105 	vol->name = strdup(filename);
106 	vol->fd = open(filename, oflags);
107 	if (vol->fd < 0) {
108 		free(vol->name);
109 		free(vol);
110 		err(1, "setup_volume: %s: Open failed", filename);
111 	}
112 
113 	/*
114 	 * Read or initialize the volume header
115 	 */
116 	vol->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
117 	if (isnew > 0) {
118 		bzero(ondisk, HAMMER_BUFSIZE);
119 	} else {
120 		n = pread(vol->fd, ondisk, HAMMER_BUFSIZE, 0);
121 		if (n != HAMMER_BUFSIZE) {
122 			err(1, "setup_volume: %s: Read failed at offset 0",
123 			    filename);
124 		}
125 		vol_no = ondisk->vol_no;
126 		if (RootVolNo < 0) {
127 			RootVolNo = ondisk->vol_rootvol;
128 		} else if (RootVolNo != (int)ondisk->vol_rootvol) {
129 			errx(1, "setup_volume: %s: root volume disagreement: "
130 				"%d vs %d",
131 				vol->name, RootVolNo, ondisk->vol_rootvol);
132 		}
133 
134 		if (bcmp(&Hammer_FSType, &ondisk->vol_fstype, sizeof(Hammer_FSType)) != 0) {
135 			errx(1, "setup_volume: %s: Header does not indicate "
136 				"that this is a hammer volume", vol->name);
137 		}
138 		if (TAILQ_EMPTY(&VolList)) {
139 			Hammer_FSId = vol->ondisk->vol_fsid;
140 		} else if (bcmp(&Hammer_FSId, &ondisk->vol_fsid, sizeof(Hammer_FSId)) != 0) {
141 			errx(1, "setup_volume: %s: FSId does match other "
142 				"volumes!", vol->name);
143 		}
144 	}
145 	vol->vol_no = vol_no;
146 
147 	if (isnew > 0) {
148 		/*init_fifo_head(&ondisk->head, HAMMER_HEAD_TYPE_VOL);*/
149 		vol->cache.modified = 1;
150         }
151 
152 	/*
153 	 * Link the volume structure in
154 	 */
155 	TAILQ_FOREACH(scan, &VolList, entry) {
156 		if (scan->vol_no == vol_no) {
157 			errx(1, "setup_volume %s: Duplicate volume number %d "
158 				"against %s", filename, vol_no, scan->name);
159 		}
160 	}
161 	TAILQ_INSERT_TAIL(&VolList, vol, entry);
162 	return(vol);
163 }
164 
165 struct volume_info *
166 test_volume(int32_t vol_no)
167 {
168 	struct volume_info *vol;
169 
170 	TAILQ_FOREACH(vol, &VolList, entry) {
171 		if (vol->vol_no == vol_no)
172 			break;
173 	}
174 	if (vol == NULL)
175 		return(NULL);
176 	++vol->cache.refs;
177 	/* not added to or removed from hammer cache */
178 	return(vol);
179 }
180 
181 struct volume_info *
182 get_volume(int32_t vol_no)
183 {
184 	struct volume_info *vol;
185 
186 	TAILQ_FOREACH(vol, &VolList, entry) {
187 		if (vol->vol_no == vol_no)
188 			break;
189 	}
190 	if (vol == NULL)
191 		errx(1, "get_volume: Volume %d does not exist!", vol_no);
192 	++vol->cache.refs;
193 	/* not added to or removed from hammer cache */
194 	return(vol);
195 }
196 
197 void
198 rel_volume(struct volume_info *volume)
199 {
200 	/* not added to or removed from hammer cache */
201 	--volume->cache.refs;
202 }
203 
204 /*
205  * Acquire the specified buffer.
206  */
207 struct buffer_info *
208 get_buffer(hammer_off_t buf_offset, int isnew)
209 {
210 	void *ondisk;
211 	struct buffer_info *buf;
212 	struct volume_info *volume;
213 	hammer_off_t orig_offset = buf_offset;
214 	int vol_no;
215 	int zone;
216 	int hi, n;
217 	int dora = 0;
218 
219 	zone = HAMMER_ZONE_DECODE(buf_offset);
220 	if (zone > HAMMER_ZONE_RAW_BUFFER_INDEX) {
221 		buf_offset = blockmap_lookup(buf_offset, NULL, NULL, NULL);
222 	}
223 	if (buf_offset == HAMMER_OFF_BAD)
224 		return(NULL);
225 
226 	if (AssertOnFailure) {
227 		assert((buf_offset & HAMMER_OFF_ZONE_MASK) ==
228 		       HAMMER_ZONE_RAW_BUFFER);
229 	}
230 	vol_no = HAMMER_VOL_DECODE(buf_offset);
231 	volume = test_volume(vol_no);
232 	if (volume == NULL) {
233 		if (AssertOnFailure)
234 			errx(1, "get_buffer: Volume %d not found!", vol_no);
235 		return(NULL);
236 	}
237 
238 	buf_offset &= ~HAMMER_BUFMASK64;
239 
240 	hi = buffer_hash(buf_offset);
241 
242 	TAILQ_FOREACH(buf, &volume->buffer_lists[hi], entry) {
243 		if (buf->buf_offset == buf_offset)
244 			break;
245 	}
246 	if (buf == NULL) {
247 		buf = malloc(sizeof(*buf));
248 		bzero(buf, sizeof(*buf));
249 		if (DebugOpt) {
250 			fprintf(stderr, "get_buffer %016llx %016llx\n",
251 				(long long)orig_offset, (long long)buf_offset);
252 		}
253 		buf->buf_offset = buf_offset;
254 		buf->raw_offset = volume->ondisk->vol_buf_beg +
255 				  (buf_offset & HAMMER_OFF_SHORT_MASK);
256 		buf->volume = volume;
257 		TAILQ_INSERT_TAIL(&volume->buffer_lists[hi], buf, entry);
258 		++volume->cache.refs;
259 		buf->cache.u.buffer = buf;
260 		hammer_cache_add(&buf->cache, ISBUFFER);
261 		dora = (isnew == 0);
262 		if (isnew < 0)
263 			buf->flags |= HAMMER_BUFINFO_READAHEAD;
264 	} else {
265 		if (isnew >= 0) {
266 			buf->flags &= ~HAMMER_BUFINFO_READAHEAD;
267 			hammer_cache_used(&buf->cache);
268 		}
269 		++buf->use_count;
270 	}
271 	++buf->cache.refs;
272 	hammer_cache_flush();
273 	if ((ondisk = buf->ondisk) == NULL) {
274 		buf->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
275 		if (isnew <= 0) {
276 			n = pread(volume->fd, ondisk, HAMMER_BUFSIZE,
277 				  buf->raw_offset);
278 			if (n != HAMMER_BUFSIZE) {
279 				if (AssertOnFailure)
280 					err(1, "get_buffer: %s:%016llx "
281 					    "Read failed at offset %016llx",
282 					    volume->name,
283 					    (long long)buf->buf_offset,
284 					    (long long)buf->raw_offset);
285 				bzero(ondisk, HAMMER_BUFSIZE);
286 			}
287 		}
288 	}
289 	if (isnew > 0) {
290 		bzero(ondisk, HAMMER_BUFSIZE);
291 		buf->cache.modified = 1;
292 	}
293 	if (dora)
294 		get_buffer_readahead(buf);
295 	return(buf);
296 }
297 
298 static void
299 get_buffer_readahead(struct buffer_info *base)
300 {
301 	struct buffer_info *buf;
302 	struct volume_info *vol;
303 	hammer_off_t buf_offset;
304 	int64_t raw_offset;
305 	int ri = UseReadBehind;
306 	int re = UseReadAhead;
307 	int hi;
308 
309 	raw_offset = base->raw_offset + ri * HAMMER_BUFSIZE;
310 	vol = base->volume;
311 
312 	while (ri < re) {
313 		if (raw_offset >= vol->ondisk->vol_buf_end)
314 			break;
315 		if (raw_offset < vol->ondisk->vol_buf_beg) {
316 			++ri;
317 			raw_offset += HAMMER_BUFSIZE;
318 			continue;
319 		}
320 		buf_offset = HAMMER_VOL_ENCODE(vol->vol_no) |
321 			     HAMMER_ZONE_RAW_BUFFER |
322 			     (raw_offset - vol->ondisk->vol_buf_beg);
323 		hi = buffer_hash(raw_offset);
324 		TAILQ_FOREACH(buf, &vol->buffer_lists[hi], entry) {
325 			if (buf->raw_offset == raw_offset)
326 				break;
327 		}
328 		if (buf == NULL) {
329 			buf = get_buffer(buf_offset, -1);
330 			rel_buffer(buf);
331 		}
332 		++ri;
333 		raw_offset += HAMMER_BUFSIZE;
334 	}
335 }
336 
337 void
338 rel_buffer(struct buffer_info *buffer)
339 {
340 	struct volume_info *volume;
341 	int hi;
342 
343 	assert(buffer->cache.refs > 0);
344 	if (--buffer->cache.refs == 0) {
345 		if (buffer->cache.delete) {
346 			hi = buffer_hash(buffer->buf_offset);
347 			volume = buffer->volume;
348 			if (buffer->cache.modified)
349 				flush_buffer(buffer);
350 			TAILQ_REMOVE(&volume->buffer_lists[hi], buffer, entry);
351 			hammer_cache_del(&buffer->cache);
352 			free(buffer->ondisk);
353 			free(buffer);
354 			rel_volume(volume);
355 		}
356 	}
357 }
358 
359 /*
360  * Retrieve a pointer to a buffer data given a buffer offset.  The underlying
361  * bufferp is freed if isnew or the offset is out of range of the cached data.
362  * If bufferp is freed a referenced buffer is loaded into it.
363  */
364 void *
365 get_buffer_data(hammer_off_t buf_offset, struct buffer_info **bufferp,
366 		int isnew)
367 {
368 	if (*bufferp != NULL) {
369 		if (isnew > 0 ||
370 		    (((*bufferp)->buf_offset ^ buf_offset) & ~HAMMER_BUFMASK64)) {
371 			rel_buffer(*bufferp);
372 			*bufferp = NULL;
373 		}
374 	}
375 	return(get_ondisk(buf_offset, bufferp, isnew));
376 }
377 
378 /*
379  * Retrieve a pointer to a B-Tree node given a cluster offset.  The underlying
380  * bufferp is freed if non-NULL and a referenced buffer is loaded into it.
381  */
382 hammer_node_ondisk_t
383 get_node(hammer_off_t node_offset, struct buffer_info **bufferp)
384 {
385 	if (*bufferp != NULL) {
386 		rel_buffer(*bufferp);
387 		*bufferp = NULL;
388 	}
389 	return(get_ondisk(node_offset, bufferp, 0));
390 }
391 
392 /*
393  * Return a pointer to a buffer data given a buffer offset.
394  * If *bufferp is NULL acquire the buffer otherwise use that buffer.
395  */
396 static __inline
397 void *
398 get_ondisk(hammer_off_t buf_offset, struct buffer_info **bufferp,
399 	int isnew)
400 {
401 	struct buffer_info *buffer;
402 
403 	buffer = *bufferp;
404 	if (buffer == NULL) {
405 		buffer = *bufferp = get_buffer(buf_offset, isnew);
406 		if (buffer == NULL)
407 			return(NULL);
408 	}
409 
410 	return((char *)buffer->ondisk +
411 		((int32_t)buf_offset & HAMMER_BUFMASK));
412 }
413 
414 /*
415  * Allocate HAMMER elements - btree nodes, data storage
416  */
417 void *
418 alloc_btree_element(hammer_off_t *offp)
419 {
420 	struct buffer_info *buffer = NULL;
421 	hammer_node_ondisk_t node;
422 
423 	node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node),
424 			      offp, &buffer);
425 	bzero(node, sizeof(*node));
426 	/* XXX buffer not released, pointer remains valid */
427 	return(node);
428 }
429 
430 void *
431 alloc_data_element(hammer_off_t *offp, int32_t data_len,
432 		   struct buffer_info **data_bufferp)
433 {
434 	void *data;
435 
436 	if (data_len >= HAMMER_BUFSIZE) {
437 		assert(data_len <= HAMMER_BUFSIZE); /* just one buffer */
438 		data = alloc_blockmap(HAMMER_ZONE_LARGE_DATA_INDEX, data_len,
439 				      offp, data_bufferp);
440 		bzero(data, data_len);
441 	} else if (data_len) {
442 		data = alloc_blockmap(HAMMER_ZONE_SMALL_DATA_INDEX, data_len,
443 				      offp, data_bufferp);
444 		bzero(data, data_len);
445 	} else {
446 		data = NULL;
447 	}
448 	return (data);
449 }
450 
451 /*
452  * Format a new freemap.  Set all layer1 entries to UNAVAIL.  The initialize
453  * code will load each volume's freemap.
454  */
455 void
456 format_freemap(struct volume_info *root_vol, hammer_blockmap_t blockmap)
457 {
458 	struct buffer_info *buffer = NULL;
459 	hammer_off_t layer1_offset;
460 	struct hammer_blockmap_layer1 *layer1;
461 	int i, isnew;
462 
463 	layer1_offset = alloc_bigblock(root_vol, HAMMER_ZONE_FREEMAP_INDEX);
464 	for (i = 0; i < (int)HAMMER_BLOCKMAP_RADIX1; ++i) {
465 		isnew = ((i % HAMMER_BLOCKMAP_RADIX1_PERBUFFER) == 0);
466 		layer1 = get_buffer_data(layer1_offset + i * sizeof(*layer1),
467 					 &buffer, isnew);
468 		bzero(layer1, sizeof(*layer1));
469 		layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
470 		layer1->blocks_free = 0;
471 		layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
472 	}
473 	rel_buffer(buffer);
474 
475 	blockmap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
476 	blockmap->phys_offset = layer1_offset;
477 	blockmap->alloc_offset = HAMMER_ENCODE_RAW_BUFFER(255, -1);
478 	blockmap->next_offset = HAMMER_ENCODE_RAW_BUFFER(0, 0);
479 	blockmap->reserved01 = 0;
480 	blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
481 	root_vol->cache.modified = 1;
482 }
483 
484 /*
485  * Load the volume's remaining free space into the freemap.
486  *
487  * Returns the number of bigblocks available.
488  */
489 int64_t
490 initialize_freemap(struct volume_info *vol)
491 {
492 	struct volume_info *root_vol;
493 	struct buffer_info *buffer1 = NULL;
494 	struct buffer_info *buffer2 = NULL;
495 	struct hammer_blockmap_layer1 *layer1;
496 	struct hammer_blockmap_layer2 *layer2;
497 	hammer_off_t layer1_base;
498 	hammer_off_t layer1_offset;
499 	hammer_off_t layer2_offset;
500 	hammer_off_t phys_offset;
501 	hammer_off_t aligned_vol_free_end;
502 	int64_t count = 0;
503 	int modified1 = 0;
504 
505 	root_vol = get_volume(RootVolNo);
506 	aligned_vol_free_end = (vol->vol_free_end + HAMMER_BLOCKMAP_LAYER2_MASK)
507 				& ~HAMMER_BLOCKMAP_LAYER2_MASK;
508 
509 	printf("initialize freemap volume %d\n", vol->vol_no);
510 
511 	/*
512 	 * Initialize the freemap.  First preallocate the bigblocks required
513 	 * to implement layer2.   This preallocation is a bootstrap allocation
514 	 * using blocks from the target volume.
515 	 */
516 	layer1_base = root_vol->ondisk->vol0_blockmap[
517 					HAMMER_ZONE_FREEMAP_INDEX].phys_offset;
518 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0);
519 	     phys_offset < aligned_vol_free_end;
520 	     phys_offset += HAMMER_BLOCKMAP_LAYER2) {
521 		layer1_offset = layer1_base +
522 				HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
523 		layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
524 		if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
525 			layer1->phys_offset = alloc_bigblock(vol,
526 						HAMMER_ZONE_FREEMAP_INDEX);
527 			layer1->blocks_free = 0;
528 			buffer1->cache.modified = 1;
529 			layer1->layer1_crc = crc32(layer1,
530 						   HAMMER_LAYER1_CRCSIZE);
531 		}
532 	}
533 
534 	/*
535 	 * Now fill everything in.
536 	 */
537 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0);
538 	     phys_offset < aligned_vol_free_end;
539 	     phys_offset += HAMMER_BIGBLOCK_SIZE) {
540 		modified1 = 0;
541 		layer1_offset = layer1_base +
542 				HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
543 		layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
544 
545 		assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
546 		layer2_offset = layer1->phys_offset +
547 				HAMMER_BLOCKMAP_LAYER2_OFFSET(phys_offset);
548 
549 		layer2 = get_buffer_data(layer2_offset, &buffer2, 0);
550 		bzero(layer2, sizeof(*layer2));
551 		if (phys_offset < vol->vol_free_off) {
552 			/*
553 			 * Fixups XXX - bigblocks already allocated as part
554 			 * of the freemap bootstrap.
555 			 */
556 			if (layer2->zone == 0) {
557 				layer2->zone = HAMMER_ZONE_FREEMAP_INDEX;
558 				layer2->append_off = HAMMER_BIGBLOCK_SIZE;
559 				layer2->bytes_free = 0;
560 			}
561 		} else if (phys_offset < vol->vol_free_end) {
562 			++layer1->blocks_free;
563 			buffer1->cache.modified = 1;
564 			layer2->zone = 0;
565 			layer2->append_off = 0;
566 			layer2->bytes_free = HAMMER_BIGBLOCK_SIZE;
567 			++count;
568 			modified1 = 1;
569 		} else {
570 			layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX;
571 			layer2->append_off = HAMMER_BIGBLOCK_SIZE;
572 			layer2->bytes_free = 0;
573 		}
574 		layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
575 		buffer2->cache.modified = 1;
576 
577 		/*
578 		 * Finish-up layer 1
579 		 */
580 		if (modified1) {
581 			layer1->layer1_crc = crc32(layer1,
582 						   HAMMER_LAYER1_CRCSIZE);
583 			buffer1->cache.modified = 1;
584 		}
585 	}
586 	rel_buffer(buffer1);
587 	rel_buffer(buffer2);
588 	rel_volume(root_vol);
589 	return(count);
590 }
591 
592 /*
593  * Allocate big-blocks using our poor-man's volume->vol_free_off.
594  *
595  * If the zone is HAMMER_ZONE_FREEMAP_INDEX we are bootstrapping the freemap
596  * itself and cannot update it yet.
597  */
598 hammer_off_t
599 alloc_bigblock(struct volume_info *volume, int zone)
600 {
601 	struct buffer_info *buffer1 = NULL;
602 	struct buffer_info *buffer2 = NULL;
603 	struct volume_info *root_vol;
604 	hammer_off_t result_offset;
605 	hammer_off_t layer_offset;
606 	struct hammer_blockmap_layer1 *layer1;
607 	struct hammer_blockmap_layer2 *layer2;
608 	int didget;
609 
610 	if (volume == NULL) {
611 		volume = get_volume(RootVolNo);
612 		didget = 1;
613 	} else {
614 		didget = 0;
615 	}
616 	result_offset = volume->vol_free_off;
617 	if (result_offset >= volume->vol_free_end)
618 		panic("alloc_bigblock: Ran out of room, filesystem too small");
619 	volume->vol_free_off += HAMMER_BIGBLOCK_SIZE;
620 
621 	/*
622 	 * Update the freemap.
623 	 */
624 	if (zone != HAMMER_ZONE_FREEMAP_INDEX) {
625 		root_vol = get_volume(RootVolNo);
626 		layer_offset = root_vol->ondisk->vol0_blockmap[
627 					HAMMER_ZONE_FREEMAP_INDEX].phys_offset;
628 		layer_offset += HAMMER_BLOCKMAP_LAYER1_OFFSET(result_offset);
629 		layer1 = get_buffer_data(layer_offset, &buffer1, 0);
630 		assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
631 		--layer1->blocks_free;
632 		layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
633 		buffer1->cache.modified = 1;
634 		layer_offset = layer1->phys_offset +
635 			       HAMMER_BLOCKMAP_LAYER2_OFFSET(result_offset);
636 		layer2 = get_buffer_data(layer_offset, &buffer2, 0);
637 		assert(layer2->zone == 0);
638 		layer2->zone = zone;
639 		layer2->append_off = HAMMER_BIGBLOCK_SIZE;
640 		layer2->bytes_free = 0;
641 		layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
642 		buffer2->cache.modified = 1;
643 
644 		--root_vol->ondisk->vol0_stat_freebigblocks;
645 		root_vol->cache.modified = 1;
646 
647 		rel_buffer(buffer1);
648 		rel_buffer(buffer2);
649 		rel_volume(root_vol);
650 	}
651 
652 	if (didget)
653 		rel_volume(volume);
654 	return(result_offset);
655 }
656 
657 /*
658  * Format the undo-map for the root volume.
659  */
660 void
661 format_undomap(hammer_volume_ondisk_t ondisk)
662 {
663 	const int undo_zone = HAMMER_ZONE_UNDO_INDEX;
664 	hammer_off_t undo_limit;
665 	hammer_blockmap_t blockmap;
666 	struct buffer_info *buffer = NULL;
667 	hammer_off_t scan;
668 	int n;
669 	int limit_index;
670 	u_int32_t seqno;
671 
672 	/*
673 	 * Size the undo buffer in multiples of HAMMER_BIGBLOCK_SIZE,
674 	 * up to HAMMER_UNDO_LAYER2 big blocks.  Size to approximately
675 	 * 0.1% of the disk.
676 	 *
677 	 * The minimum UNDO fifo size is 500MB, or approximately 1% of
678 	 * the recommended 50G disk.
679 	 *
680 	 * Changing this minimum is rather dangerous as complex filesystem
681 	 * operations can cause the UNDO FIFO to fill up otherwise.
682 	 */
683 	undo_limit = UndoBufferSize;
684 	if (undo_limit == 0) {
685 		undo_limit = (ondisk->vol_buf_end - ondisk->vol_buf_beg) / 1000;
686 		if (undo_limit < 500*1024*1024)
687 			undo_limit = 500*1024*1024;
688 	}
689 	undo_limit = (undo_limit + HAMMER_BIGBLOCK_MASK64) &
690 		     ~HAMMER_BIGBLOCK_MASK64;
691 	if (undo_limit < HAMMER_BIGBLOCK_SIZE)
692 		undo_limit = HAMMER_BIGBLOCK_SIZE;
693 	if (undo_limit > HAMMER_BIGBLOCK_SIZE * HAMMER_UNDO_LAYER2)
694 		undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_UNDO_LAYER2;
695 	UndoBufferSize = undo_limit;
696 
697 	blockmap = &ondisk->vol0_blockmap[undo_zone];
698 	bzero(blockmap, sizeof(*blockmap));
699 	blockmap->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
700 	blockmap->first_offset = HAMMER_ZONE_ENCODE(undo_zone, 0);
701 	blockmap->next_offset = blockmap->first_offset;
702 	blockmap->alloc_offset = HAMMER_ZONE_ENCODE(undo_zone, undo_limit);
703 	blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
704 
705 	n = 0;
706 	scan = blockmap->next_offset;
707 	limit_index = undo_limit / HAMMER_BIGBLOCK_SIZE;
708 
709 	assert(limit_index <= HAMMER_UNDO_LAYER2);
710 
711 	for (n = 0; n < limit_index; ++n) {
712 		ondisk->vol0_undo_array[n] = alloc_bigblock(NULL,
713 							HAMMER_ZONE_UNDO_INDEX);
714 		scan += HAMMER_BIGBLOCK_SIZE;
715 	}
716 	while (n < HAMMER_UNDO_LAYER2) {
717 		ondisk->vol0_undo_array[n] = HAMMER_BLOCKMAP_UNAVAIL;
718 		++n;
719 	}
720 
721 	/*
722 	 * Pre-initialize the UNDO blocks (HAMMER version 4+)
723 	 */
724 	printf("initializing the undo map (%jd MB)\n",
725 		(intmax_t)(blockmap->alloc_offset & HAMMER_OFF_LONG_MASK) /
726 		(1024 * 1024));
727 
728 	scan = blockmap->first_offset;
729 	seqno = 0;
730 
731 	while (scan < blockmap->alloc_offset) {
732 		hammer_fifo_head_t head;
733 		hammer_fifo_tail_t tail;
734 		int isnew;
735 		int bytes = HAMMER_UNDO_ALIGN;
736 
737 		isnew = ((scan & HAMMER_BUFMASK64) == 0);
738 		head = get_buffer_data(scan, &buffer, isnew);
739 		buffer->cache.modified = 1;
740 		tail = (void *)((char *)head + bytes - sizeof(*tail));
741 
742 		bzero(head, bytes);
743 		head->hdr_signature = HAMMER_HEAD_SIGNATURE;
744 		head->hdr_type = HAMMER_HEAD_TYPE_DUMMY;
745 		head->hdr_size = bytes;
746 		head->hdr_seq = seqno++;
747 
748 		tail->tail_signature = HAMMER_TAIL_SIGNATURE;
749 		tail->tail_type = HAMMER_HEAD_TYPE_DUMMY;
750 		tail->tail_size = bytes;
751 
752 		head->hdr_crc = crc32(head, HAMMER_FIFO_HEAD_CRCOFF) ^
753 				crc32(head + 1, bytes - sizeof(*head));
754 
755 		scan += bytes;
756 	}
757 	if (buffer)
758 		rel_buffer(buffer);
759 }
760 
761 /*
762  * Format a new blockmap.  This is mostly a degenerate case because
763  * all allocations are now actually done from the freemap.
764  */
765 void
766 format_blockmap(hammer_blockmap_t blockmap, hammer_off_t zone_base)
767 {
768 	blockmap->phys_offset = 0;
769 	blockmap->alloc_offset = zone_base | HAMMER_VOL_ENCODE(255) |
770 				 HAMMER_SHORT_OFF_ENCODE(-1);
771 	blockmap->first_offset = zone_base;
772 	blockmap->next_offset = zone_base;
773 	blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
774 }
775 
776 /*
777  * Allocate a chunk of data out of a blockmap.  This is a simplified
778  * version which uses next_offset as a simple allocation iterator.
779  */
780 static
781 void *
782 alloc_blockmap(int zone, int bytes, hammer_off_t *result_offp,
783 	       struct buffer_info **bufferp)
784 {
785 	struct buffer_info *buffer1 = NULL;
786 	struct buffer_info *buffer2 = NULL;
787 	struct volume_info *volume;
788 	hammer_blockmap_t blockmap;
789 	hammer_blockmap_t freemap;
790 	struct hammer_blockmap_layer1 *layer1;
791 	struct hammer_blockmap_layer2 *layer2;
792 	hammer_off_t layer1_offset;
793 	hammer_off_t layer2_offset;
794 	hammer_off_t zone2_offset;
795 	void *ptr;
796 
797 	volume = get_volume(RootVolNo);
798 
799 	blockmap = &volume->ondisk->vol0_blockmap[zone];
800 	freemap = &volume->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
801 
802 	/*
803 	 * Alignment and buffer-boundary issues.  If the allocation would
804 	 * cross a buffer boundary we have to skip to the next buffer.
805 	 */
806 	bytes = (bytes + 15) & ~15;
807 
808 again:
809 	if ((blockmap->next_offset ^ (blockmap->next_offset + bytes - 1)) &
810 	    ~HAMMER_BUFMASK64) {
811 		volume->cache.modified = 1;
812 		blockmap->next_offset = (blockmap->next_offset + bytes) &
813 				        ~HAMMER_BUFMASK64;
814 	}
815 
816 	/*
817 	 * Dive layer 1.  For now we can't allocate data outside of volume 0.
818 	 */
819 	layer1_offset = freemap->phys_offset +
820 			HAMMER_BLOCKMAP_LAYER1_OFFSET(blockmap->next_offset);
821 
822 	layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
823 
824 	if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
825 		fprintf(stderr, "alloc_blockmap: ran out of space!\n");
826 		exit(1);
827 	}
828 
829 	/*
830 	 * Dive layer 2
831 	 */
832 	layer2_offset = layer1->phys_offset +
833 			HAMMER_BLOCKMAP_LAYER2_OFFSET(blockmap->next_offset);
834 
835 	layer2 = get_buffer_data(layer2_offset, &buffer2, 0);
836 
837 	if (layer2->zone == HAMMER_ZONE_UNAVAIL_INDEX) {
838 		fprintf(stderr, "alloc_blockmap: ran out of space!\n");
839 		exit(1);
840 	}
841 
842 	/*
843 	 * If we are entering a new bigblock assign ownership to our
844 	 * zone.  If the bigblock is owned by another zone skip it.
845 	 */
846 	if (layer2->zone == 0) {
847 		--layer1->blocks_free;
848 		layer2->zone = zone;
849 		assert(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
850 		assert(layer2->append_off == 0);
851 	}
852 	if (layer2->zone != zone) {
853 		blockmap->next_offset = (blockmap->next_offset + HAMMER_BIGBLOCK_SIZE) &
854 					~HAMMER_BIGBLOCK_MASK64;
855 		goto again;
856 	}
857 
858 	buffer1->cache.modified = 1;
859 	buffer2->cache.modified = 1;
860 	volume->cache.modified = 1;
861 	assert(layer2->append_off ==
862 	       (blockmap->next_offset & HAMMER_BIGBLOCK_MASK));
863 	layer2->bytes_free -= bytes;
864 	*result_offp = blockmap->next_offset;
865 	blockmap->next_offset += bytes;
866 	layer2->append_off = (int)blockmap->next_offset &
867 			      HAMMER_BIGBLOCK_MASK;
868 
869 	layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
870 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
871 
872 	zone2_offset = (*result_offp & ~HAMMER_OFF_ZONE_MASK) |
873 			HAMMER_ZONE_ENCODE(zone, 0);
874 
875 	ptr = get_buffer_data(zone2_offset, bufferp, 0);
876 	(*bufferp)->cache.modified = 1;
877 
878 	if (buffer1)
879 		rel_buffer(buffer1);
880 	if (buffer2)
881 		rel_buffer(buffer2);
882 
883 	rel_volume(volume);
884 	return(ptr);
885 }
886 
887 /*
888  * Flush various tracking structures to disk
889  */
890 
891 /*
892  * Flush various tracking structures to disk
893  */
894 void
895 flush_all_volumes(void)
896 {
897 	struct volume_info *vol;
898 
899 	TAILQ_FOREACH(vol, &VolList, entry)
900 		flush_volume(vol);
901 }
902 
903 void
904 flush_volume(struct volume_info *volume)
905 {
906 	struct buffer_info *buffer;
907 	int i;
908 
909 	for (i = 0; i < HAMMER_BUFLISTS; ++i) {
910 		TAILQ_FOREACH(buffer, &volume->buffer_lists[i], entry)
911 			flush_buffer(buffer);
912 	}
913 	writehammerbuf(volume, volume->ondisk, 0);
914 	volume->cache.modified = 0;
915 }
916 
917 void
918 flush_buffer(struct buffer_info *buffer)
919 {
920 	writehammerbuf(buffer->volume, buffer->ondisk, buffer->raw_offset);
921 	buffer->cache.modified = 0;
922 }
923 
924 #if 0
925 /*
926  * Generic buffer initialization
927  */
928 static void
929 init_fifo_head(hammer_fifo_head_t head, u_int16_t hdr_type)
930 {
931 	head->hdr_signature = HAMMER_HEAD_SIGNATURE;
932 	head->hdr_type = hdr_type;
933 	head->hdr_size = 0;
934 	head->hdr_crc = 0;
935 	head->hdr_seq = 0;
936 }
937 
938 #endif
939 
940 #if 0
941 /*
942  * Core I/O operations
943  */
944 static void
945 readhammerbuf(struct volume_info *vol, void *data, int64_t offset)
946 {
947 	ssize_t n;
948 
949 	n = pread(vol->fd, data, HAMMER_BUFSIZE, offset);
950 	if (n != HAMMER_BUFSIZE)
951 		err(1, "Read volume %d (%s)", vol->vol_no, vol->name);
952 }
953 
954 #endif
955 
956 static void
957 writehammerbuf(struct volume_info *vol, const void *data, int64_t offset)
958 {
959 	ssize_t n;
960 
961 	n = pwrite(vol->fd, data, HAMMER_BUFSIZE, offset);
962 	if (n != HAMMER_BUFSIZE)
963 		err(1, "Write volume %d (%s)", vol->vol_no, vol->name);
964 }
965 
966 void
967 panic(const char *ctl, ...)
968 {
969 	va_list va;
970 
971 	va_start(va, ctl);
972 	vfprintf(stderr, ctl, va);
973 	va_end(va);
974 	fprintf(stderr, "\n");
975 	exit(1);
976 }
977 
978