xref: /dflybsd-src/sys/vfs/hammer/hammer_blockmap.c (revision 9ebbd47df7abd81e0803cf228d15b3c372ad85db)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * HAMMER blockmap
37  */
38 #include "hammer.h"
39 
40 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
41 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
42 				    hammer_off_t base_offset, int zone,
43 				    struct hammer_blockmap_layer2 *layer2);
44 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
45 static int update_bytes_free(hammer_reserve_t resv, int bytes);
46 static int hammer_check_volume(hammer_mount_t, hammer_off_t*);
47 
48 /*
49  * Reserved big-blocks red-black tree support
50  */
51 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
52 	     hammer_res_rb_compare, hammer_off_t, zone_offset);
53 
54 static int
55 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
56 {
57 	if (res1->zone_offset < res2->zone_offset)
58 		return(-1);
59 	if (res1->zone_offset > res2->zone_offset)
60 		return(1);
61 	return(0);
62 }
63 
64 /*
65  * Allocate bytes from a zone
66  */
67 hammer_off_t
68 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
69 		      hammer_off_t hint, int *errorp)
70 {
71 	hammer_mount_t hmp;
72 	hammer_volume_t root_volume;
73 	hammer_blockmap_t blockmap;
74 	hammer_blockmap_t freemap;
75 	hammer_reserve_t resv;
76 	struct hammer_blockmap_layer1 *layer1;
77 	struct hammer_blockmap_layer2 *layer2;
78 	hammer_buffer_t buffer1 = NULL;
79 	hammer_buffer_t buffer2 = NULL;
80 	hammer_buffer_t buffer3 = NULL;
81 	hammer_off_t tmp_offset;
82 	hammer_off_t next_offset;
83 	hammer_off_t result_offset;
84 	hammer_off_t layer1_offset;
85 	hammer_off_t layer2_offset;
86 	hammer_off_t base_off;
87 	int loops = 0;
88 	int offset;		/* offset within big-block */
89 	int use_hint;
90 
91 	hmp = trans->hmp;
92 
93 	/*
94 	 * Deal with alignment and buffer-boundary issues.
95 	 *
96 	 * Be careful, certain primary alignments are used below to allocate
97 	 * new blockmap blocks.
98 	 */
99 	bytes = (bytes + 15) & ~15;
100 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
101 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
102 
103 	/*
104 	 * Setup
105 	 */
106 	root_volume = trans->rootvol;
107 	*errorp = 0;
108 	blockmap = &hmp->blockmap[zone];
109 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
110 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
111 
112 	/*
113 	 * Use the hint if we have one.
114 	 */
115 	if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
116 		next_offset = (hint + 15) & ~(hammer_off_t)15;
117 		use_hint = 1;
118 	} else {
119 		next_offset = blockmap->next_offset;
120 		use_hint = 0;
121 	}
122 again:
123 
124 	/*
125 	 * use_hint is turned off if we leave the hinted big-block.
126 	 */
127 	if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
128 		next_offset = blockmap->next_offset;
129 		use_hint = 0;
130 	}
131 
132 	/*
133 	 * Check for wrap
134 	 */
135 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
136 		if (++loops == 2) {
137 			result_offset = 0;
138 			*errorp = ENOSPC;
139 			goto failed;
140 		}
141 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
142 	}
143 
144 	/*
145 	 * The allocation request may not cross a buffer boundary.  Special
146 	 * large allocations must not cross a big-block boundary.
147 	 */
148 	tmp_offset = next_offset + bytes - 1;
149 	if (bytes <= HAMMER_BUFSIZE) {
150 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
151 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
152 			goto again;
153 		}
154 	} else {
155 		if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
156 			next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
157 			goto again;
158 		}
159 	}
160 	offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
161 
162 	/*
163 	 * Dive layer 1.
164 	 */
165 	layer1_offset = freemap->phys_offset +
166 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
167 
168 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
169 	if (*errorp) {
170 		result_offset = 0;
171 		goto failed;
172 	}
173 
174 	/*
175 	 * Check CRC.
176 	 */
177 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
178 		hammer_lock_ex(&hmp->blkmap_lock);
179 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
180 			panic("CRC FAILED: LAYER1");
181 		hammer_unlock(&hmp->blkmap_lock);
182 	}
183 
184 	/*
185 	 * If we are at a big-block boundary and layer1 indicates no
186 	 * free big-blocks, then we cannot allocate a new big-block in
187 	 * layer2, skip to the next layer1 entry.
188 	 */
189 	if (offset == 0 && layer1->blocks_free == 0) {
190 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
191 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
192 		if (hammer_check_volume(hmp, &next_offset)) {
193 			result_offset = 0;
194 			goto failed;
195 		}
196 		goto again;
197 	}
198 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
199 
200 	/*
201 	 * Skip this layer1 entry if it is pointing to a layer2 big-block
202 	 * on a volume that we are currently trying to remove from the
203 	 * file-system. This is used by the volume-del code together with
204 	 * the reblocker to free up a volume.
205 	 */
206 	if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
207 	    hmp->volume_to_remove) {
208 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
209 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
210 		goto again;
211 	}
212 
213 	/*
214 	 * Dive layer 2, each entry represents a big-block.
215 	 */
216 	layer2_offset = layer1->phys_offset +
217 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
218 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
219 	if (*errorp) {
220 		result_offset = 0;
221 		goto failed;
222 	}
223 
224 	/*
225 	 * Check CRC.  This can race another thread holding the lock
226 	 * and in the middle of modifying layer2.
227 	 */
228 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
229 		hammer_lock_ex(&hmp->blkmap_lock);
230 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
231 			panic("CRC FAILED: LAYER2");
232 		hammer_unlock(&hmp->blkmap_lock);
233 	}
234 
235 	/*
236 	 * Skip the layer if the zone is owned by someone other then us.
237 	 */
238 	if (layer2->zone && layer2->zone != zone) {
239 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
240 		goto again;
241 	}
242 	if (offset < layer2->append_off) {
243 		next_offset += layer2->append_off - offset;
244 		goto again;
245 	}
246 
247 #if 0
248 	/*
249 	 * If operating in the current non-hint blockmap block, do not
250 	 * allow it to get over-full.  Also drop any active hinting so
251 	 * blockmap->next_offset is updated at the end.
252 	 *
253 	 * We do this for B-Tree and meta-data allocations to provide
254 	 * localization for updates.
255 	 */
256 	if ((zone == HAMMER_ZONE_BTREE_INDEX ||
257 	     zone == HAMMER_ZONE_META_INDEX) &&
258 	    offset >= HAMMER_BIGBLOCK_OVERFILL &&
259 	    !((next_offset ^ blockmap->next_offset) & ~HAMMER_BIGBLOCK_MASK64)
260 	) {
261 		if (offset >= HAMMER_BIGBLOCK_OVERFILL) {
262 			next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
263 			use_hint = 0;
264 			goto again;
265 		}
266 	}
267 #endif
268 
269 	/*
270 	 * We need the lock from this point on.  We have to re-check zone
271 	 * ownership after acquiring the lock and also check for reservations.
272 	 */
273 	hammer_lock_ex(&hmp->blkmap_lock);
274 
275 	if (layer2->zone && layer2->zone != zone) {
276 		hammer_unlock(&hmp->blkmap_lock);
277 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
278 		goto again;
279 	}
280 	if (offset < layer2->append_off) {
281 		hammer_unlock(&hmp->blkmap_lock);
282 		next_offset += layer2->append_off - offset;
283 		goto again;
284 	}
285 
286 	/*
287 	 * The big-block might be reserved by another zone.  If it is reserved
288 	 * by our zone we may have to move next_offset past the append_off.
289 	 */
290 	base_off = hammer_xlate_to_zone2(next_offset &
291 					~HAMMER_BIGBLOCK_MASK64);
292 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
293 	if (resv) {
294 		if (resv->zone != zone) {
295 			hammer_unlock(&hmp->blkmap_lock);
296 			next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
297 				      ~HAMMER_BIGBLOCK_MASK64;
298 			goto again;
299 		}
300 		if (offset < resv->append_off) {
301 			hammer_unlock(&hmp->blkmap_lock);
302 			next_offset += resv->append_off - offset;
303 			goto again;
304 		}
305 		++resv->refs;
306 	}
307 
308 	/*
309 	 * Ok, we can allocate out of this layer2 big-block.  Assume ownership
310 	 * of the layer for real.  At this point we've validated any
311 	 * reservation that might exist and can just ignore resv.
312 	 */
313 	if (layer2->zone == 0) {
314 		/*
315 		 * Assign the big-block to our zone
316 		 */
317 		hammer_modify_buffer(trans, buffer1,
318 				     layer1, sizeof(*layer1));
319 		--layer1->blocks_free;
320 		layer1->layer1_crc = crc32(layer1,
321 					   HAMMER_LAYER1_CRCSIZE);
322 		hammer_modify_buffer_done(buffer1);
323 		hammer_modify_buffer(trans, buffer2,
324 				     layer2, sizeof(*layer2));
325 		layer2->zone = zone;
326 		KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
327 		KKASSERT(layer2->append_off == 0);
328 		hammer_modify_volume_field(trans, trans->rootvol,
329 					   vol0_stat_freebigblocks);
330 		--root_volume->ondisk->vol0_stat_freebigblocks;
331 		hmp->copy_stat_freebigblocks =
332 			root_volume->ondisk->vol0_stat_freebigblocks;
333 		hammer_modify_volume_done(trans->rootvol);
334 	} else {
335 		hammer_modify_buffer(trans, buffer2,
336 				     layer2, sizeof(*layer2));
337 	}
338 	KKASSERT(layer2->zone == zone);
339 
340 	/*
341 	 * NOTE: bytes_free can legally go negative due to de-dup.
342 	 */
343 	layer2->bytes_free -= bytes;
344 	KKASSERT(layer2->append_off <= offset);
345 	layer2->append_off = offset + bytes;
346 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
347 	hammer_modify_buffer_done(buffer2);
348 
349 	/*
350 	 * We hold the blockmap lock and should be the only ones
351 	 * capable of modifying resv->append_off.  Track the allocation
352 	 * as appropriate.
353 	 */
354 	KKASSERT(bytes != 0);
355 	if (resv) {
356 		KKASSERT(resv->append_off <= offset);
357 		resv->append_off = offset + bytes;
358 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
359 		hammer_blockmap_reserve_complete(hmp, resv);
360 	}
361 
362 	/*
363 	 * If we are allocating from the base of a new buffer we can avoid
364 	 * a disk read by calling hammer_bnew_ext().
365 	 */
366 	if ((next_offset & HAMMER_BUFMASK) == 0) {
367 		hammer_bnew_ext(trans->hmp, next_offset, bytes,
368 				errorp, &buffer3);
369 		if (*errorp) {
370 			result_offset = 0;
371 			goto failed;
372 		}
373 	}
374 	result_offset = next_offset;
375 
376 	/*
377 	 * If we weren't supplied with a hint or could not use the hint
378 	 * then we wound up using blockmap->next_offset as the hint and
379 	 * need to save it.
380 	 */
381 	if (use_hint == 0) {
382 		hammer_modify_volume_noundo(NULL, root_volume);
383 		blockmap->next_offset = next_offset + bytes;
384 		hammer_modify_volume_done(root_volume);
385 	}
386 	hammer_unlock(&hmp->blkmap_lock);
387 failed:
388 
389 	/*
390 	 * Cleanup
391 	 */
392 	if (buffer1)
393 		hammer_rel_buffer(buffer1, 0);
394 	if (buffer2)
395 		hammer_rel_buffer(buffer2, 0);
396 	if (buffer3)
397 		hammer_rel_buffer(buffer3, 0);
398 
399 	return(result_offset);
400 }
401 
402 /*
403  * Frontend function - Reserve bytes in a zone.
404  *
405  * This code reserves bytes out of a blockmap without committing to any
406  * meta-data modifications, allowing the front-end to directly issue disk
407  * write I/O for big-blocks of data
408  *
409  * The backend later finalizes the reservation with hammer_blockmap_finalize()
410  * upon committing the related record.
411  */
412 hammer_reserve_t
413 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
414 			hammer_off_t *zone_offp, int *errorp)
415 {
416 	hammer_volume_t root_volume;
417 	hammer_blockmap_t blockmap;
418 	hammer_blockmap_t freemap;
419 	struct hammer_blockmap_layer1 *layer1;
420 	struct hammer_blockmap_layer2 *layer2;
421 	hammer_buffer_t buffer1 = NULL;
422 	hammer_buffer_t buffer2 = NULL;
423 	hammer_buffer_t buffer3 = NULL;
424 	hammer_off_t tmp_offset;
425 	hammer_off_t next_offset;
426 	hammer_off_t layer1_offset;
427 	hammer_off_t layer2_offset;
428 	hammer_off_t base_off;
429 	hammer_reserve_t resv;
430 	hammer_reserve_t resx;
431 	int loops = 0;
432 	int offset;
433 
434 	/*
435 	 * Setup
436 	 */
437 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
438 	root_volume = hammer_get_root_volume(hmp, errorp);
439 	if (*errorp)
440 		return(NULL);
441 	blockmap = &hmp->blockmap[zone];
442 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
443 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
444 
445 	/*
446 	 * Deal with alignment and buffer-boundary issues.
447 	 *
448 	 * Be careful, certain primary alignments are used below to allocate
449 	 * new blockmap blocks.
450 	 */
451 	bytes = (bytes + 15) & ~15;
452 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
453 
454 	next_offset = blockmap->next_offset;
455 again:
456 	resv = NULL;
457 	/*
458 	 * Check for wrap
459 	 */
460 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
461 		if (++loops == 2) {
462 			*errorp = ENOSPC;
463 			goto failed;
464 		}
465 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
466 	}
467 
468 	/*
469 	 * The allocation request may not cross a buffer boundary.  Special
470 	 * large allocations must not cross a big-block boundary.
471 	 */
472 	tmp_offset = next_offset + bytes - 1;
473 	if (bytes <= HAMMER_BUFSIZE) {
474 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
475 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
476 			goto again;
477 		}
478 	} else {
479 		if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
480 			next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
481 			goto again;
482 		}
483 	}
484 	offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
485 
486 	/*
487 	 * Dive layer 1.
488 	 */
489 	layer1_offset = freemap->phys_offset +
490 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
491 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
492 	if (*errorp)
493 		goto failed;
494 
495 	/*
496 	 * Check CRC.
497 	 */
498 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
499 		hammer_lock_ex(&hmp->blkmap_lock);
500 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
501 			panic("CRC FAILED: LAYER1");
502 		hammer_unlock(&hmp->blkmap_lock);
503 	}
504 
505 	/*
506 	 * If we are at a big-block boundary and layer1 indicates no
507 	 * free big-blocks, then we cannot allocate a new big-block in
508 	 * layer2, skip to the next layer1 entry.
509 	 */
510 	if ((next_offset & HAMMER_BIGBLOCK_MASK) == 0 &&
511 	    layer1->blocks_free == 0) {
512 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
513 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
514 		if (hammer_check_volume(hmp, &next_offset))
515 			goto failed;
516 		goto again;
517 	}
518 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
519 
520 	/*
521 	 * Dive layer 2, each entry represents a big-block.
522 	 */
523 	layer2_offset = layer1->phys_offset +
524 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
525 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
526 	if (*errorp)
527 		goto failed;
528 
529 	/*
530 	 * Check CRC if not allocating into uninitialized space (which we
531 	 * aren't when reserving space).
532 	 */
533 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
534 		hammer_lock_ex(&hmp->blkmap_lock);
535 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
536 			panic("CRC FAILED: LAYER2");
537 		hammer_unlock(&hmp->blkmap_lock);
538 	}
539 
540 	/*
541 	 * Skip the layer if the zone is owned by someone other then us.
542 	 */
543 	if (layer2->zone && layer2->zone != zone) {
544 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
545 		goto again;
546 	}
547 	if (offset < layer2->append_off) {
548 		next_offset += layer2->append_off - offset;
549 		goto again;
550 	}
551 
552 	/*
553 	 * We need the lock from this point on.  We have to re-check zone
554 	 * ownership after acquiring the lock and also check for reservations.
555 	 */
556 	hammer_lock_ex(&hmp->blkmap_lock);
557 
558 	if (layer2->zone && layer2->zone != zone) {
559 		hammer_unlock(&hmp->blkmap_lock);
560 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
561 		goto again;
562 	}
563 	if (offset < layer2->append_off) {
564 		hammer_unlock(&hmp->blkmap_lock);
565 		next_offset += layer2->append_off - offset;
566 		goto again;
567 	}
568 
569 	/*
570 	 * The big-block might be reserved by another zone.  If it is reserved
571 	 * by our zone we may have to move next_offset past the append_off.
572 	 */
573 	base_off = hammer_xlate_to_zone2(next_offset &
574 					~HAMMER_BIGBLOCK_MASK64);
575 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
576 	if (resv) {
577 		if (resv->zone != zone) {
578 			hammer_unlock(&hmp->blkmap_lock);
579 			next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
580 				      ~HAMMER_BIGBLOCK_MASK64;
581 			goto again;
582 		}
583 		if (offset < resv->append_off) {
584 			hammer_unlock(&hmp->blkmap_lock);
585 			next_offset += resv->append_off - offset;
586 			goto again;
587 		}
588 		++resv->refs;
589 		resx = NULL;
590 	} else {
591 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
592 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
593 		resx->refs = 1;
594 		resx->zone = zone;
595 		resx->zone_offset = base_off;
596 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
597 			resx->flags |= HAMMER_RESF_LAYER2FREE;
598 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
599 		KKASSERT(resv == NULL);
600 		resv = resx;
601 		++hammer_count_reservations;
602 	}
603 	resv->append_off = offset + bytes;
604 
605 	/*
606 	 * If we are not reserving a whole buffer but are at the start of
607 	 * a new block, call hammer_bnew() to avoid a disk read.
608 	 *
609 	 * If we are reserving a whole buffer (or more), the caller will
610 	 * probably use a direct read, so do nothing.
611 	 *
612 	 * If we do not have a whole lot of system memory we really can't
613 	 * afford to block while holding the blkmap_lock!
614 	 */
615 	if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
616 		if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE)) {
617 			hammer_bnew(hmp, next_offset, errorp, &buffer3);
618 			if (*errorp)
619 				goto failed;
620 		}
621 	}
622 
623 	/*
624 	 * Adjust our iterator and alloc_offset.  The layer1 and layer2
625 	 * space beyond alloc_offset is uninitialized.  alloc_offset must
626 	 * be big-block aligned.
627 	 */
628 	blockmap->next_offset = next_offset + bytes;
629 	hammer_unlock(&hmp->blkmap_lock);
630 
631 failed:
632 	if (buffer1)
633 		hammer_rel_buffer(buffer1, 0);
634 	if (buffer2)
635 		hammer_rel_buffer(buffer2, 0);
636 	if (buffer3)
637 		hammer_rel_buffer(buffer3, 0);
638 	hammer_rel_volume(root_volume, 0);
639 	*zone_offp = next_offset;
640 
641 	return(resv);
642 }
643 
644 /*
645  * Frontend function - Dedup bytes in a zone.
646  *
647  * Dedup reservations work exactly the same as normal write reservations
648  * except we only adjust bytes_free field and don't touch append offset.
649  * Finalization mechanic for dedup reservations is also the same as for
650  * normal write ones - the backend finalizes the reservation with
651  * hammer_blockmap_finalize().
652  */
653 hammer_reserve_t
654 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
655 			      hammer_off_t zone_offset, int *errorp)
656 {
657 	hammer_volume_t root_volume;
658 	hammer_blockmap_t freemap;
659 	struct hammer_blockmap_layer1 *layer1;
660 	struct hammer_blockmap_layer2 *layer2;
661 	hammer_buffer_t buffer1 = NULL;
662 	hammer_buffer_t buffer2 = NULL;
663 	hammer_off_t layer1_offset;
664 	hammer_off_t layer2_offset;
665 	hammer_off_t base_off;
666 	hammer_reserve_t resv = NULL;
667 	hammer_reserve_t resx = NULL;
668 
669 	/*
670 	 * Setup
671 	 */
672 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
673 	root_volume = hammer_get_root_volume(hmp, errorp);
674 	if (*errorp)
675 		return (NULL);
676 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
677 	KKASSERT(freemap->phys_offset != 0);
678 
679 	bytes = (bytes + 15) & ~15;
680 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
681 
682 	/*
683 	 * Dive layer 1.
684 	 */
685 	layer1_offset = freemap->phys_offset +
686 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
687 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
688 	if (*errorp)
689 		goto failed;
690 
691 	/*
692 	 * Check CRC.
693 	 */
694 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
695 		hammer_lock_ex(&hmp->blkmap_lock);
696 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
697 			panic("CRC FAILED: LAYER1");
698 		hammer_unlock(&hmp->blkmap_lock);
699 	}
700 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
701 
702 	/*
703 	 * Dive layer 2, each entry represents a big-block.
704 	 */
705 	layer2_offset = layer1->phys_offset +
706 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
707 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
708 	if (*errorp)
709 		goto failed;
710 
711 	/*
712 	 * Check CRC.
713 	 */
714 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
715 		hammer_lock_ex(&hmp->blkmap_lock);
716 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
717 			panic("CRC FAILED: LAYER2");
718 		hammer_unlock(&hmp->blkmap_lock);
719 	}
720 
721 	/*
722 	 * Fail if the zone is owned by someone other than us.
723 	 */
724 	if (layer2->zone && layer2->zone != zone)
725 		goto failed;
726 
727 	/*
728 	 * We need the lock from this point on.  We have to re-check zone
729 	 * ownership after acquiring the lock and also check for reservations.
730 	 */
731 	hammer_lock_ex(&hmp->blkmap_lock);
732 
733 	if (layer2->zone && layer2->zone != zone) {
734 		hammer_unlock(&hmp->blkmap_lock);
735 		goto failed;
736 	}
737 
738 	base_off = hammer_xlate_to_zone2(zone_offset &
739 					~HAMMER_BIGBLOCK_MASK64);
740 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
741 	if (resv) {
742 		if (resv->zone != zone) {
743 			hammer_unlock(&hmp->blkmap_lock);
744 			resv = NULL;
745 			goto failed;
746 		}
747 		/*
748 		 * Due to possible big-block underflow we can't simply
749 		 * subtract bytes from bytes_free.
750 		 */
751 		if (update_bytes_free(resv, bytes) == 0) {
752 			hammer_unlock(&hmp->blkmap_lock);
753 			resv = NULL;
754 			goto failed;
755 		}
756 		++resv->refs;
757 		resx = NULL;
758 	} else {
759 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
760 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
761 		resx->refs = 1;
762 		resx->zone = zone;
763 		resx->bytes_free = layer2->bytes_free;
764 		/*
765 		 * Due to possible big-block underflow we can't simply
766 		 * subtract bytes from bytes_free.
767 		 */
768 		if (update_bytes_free(resx, bytes) == 0) {
769 			hammer_unlock(&hmp->blkmap_lock);
770 			kfree(resx, hmp->m_misc);
771 			goto failed;
772 		}
773 		resx->zone_offset = base_off;
774 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
775 		KKASSERT(resv == NULL);
776 		resv = resx;
777 		++hammer_count_reservations;
778 	}
779 
780 	hammer_unlock(&hmp->blkmap_lock);
781 
782 failed:
783 	if (buffer1)
784 		hammer_rel_buffer(buffer1, 0);
785 	if (buffer2)
786 		hammer_rel_buffer(buffer2, 0);
787 	hammer_rel_volume(root_volume, 0);
788 
789 	return(resv);
790 }
791 
792 static int
793 update_bytes_free(hammer_reserve_t resv, int bytes)
794 {
795 	int32_t temp;
796 
797 	/*
798 	 * Big-block underflow check
799 	 */
800 	temp = resv->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
801 	cpu_ccfence(); /* XXX do we really need it ? */
802 	if (temp > resv->bytes_free) {
803 		kprintf("BIGBLOCK UNDERFLOW\n");
804 		return (0);
805 	}
806 
807 	resv->bytes_free -= bytes;
808 	return (1);
809 }
810 
811 /*
812  * Dereference a reservation structure.  Upon the final release the
813  * underlying big-block is checked and if it is entirely free we delete
814  * any related HAMMER buffers to avoid potential conflicts with future
815  * reuse of the big-block.
816  */
817 void
818 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
819 {
820 	hammer_off_t base_offset;
821 	int error;
822 
823 	KKASSERT(resv->refs > 0);
824 	KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
825 		 HAMMER_ZONE_RAW_BUFFER);
826 
827 	/*
828 	 * Setting append_off to the max prevents any new allocations
829 	 * from occuring while we are trying to dispose of the reservation,
830 	 * allowing us to safely delete any related HAMMER buffers.
831 	 *
832 	 * If we are unable to clean out all related HAMMER buffers we
833 	 * requeue the delay.
834 	 */
835 	if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
836 		resv->append_off = HAMMER_BIGBLOCK_SIZE;
837 		base_offset = hammer_xlate_to_zoneX(resv->zone, resv->zone_offset);
838 		if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
839 			hammer_dedup_cache_inval(hmp, base_offset);
840 		error = hammer_del_buffers(hmp, base_offset,
841 					   resv->zone_offset,
842 					   HAMMER_BIGBLOCK_SIZE,
843 					   1);
844 		if (hammer_debug_general & 0x20000) {
845 			kprintf("hammer: delbgblk %016jx error %d\n",
846 				(intmax_t)base_offset, error);
847 		}
848 		if (error)
849 			hammer_reserve_setdelay(hmp, resv);
850 	}
851 	if (--resv->refs == 0) {
852 		if (hammer_debug_general & 0x20000) {
853 			kprintf("hammer: delresvr %016jx zone %02x\n",
854 				(intmax_t)resv->zone_offset, resv->zone);
855 		}
856 		KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
857 		RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
858 		kfree(resv, hmp->m_misc);
859 		--hammer_count_reservations;
860 	}
861 }
862 
863 /*
864  * Prevent a potentially free big-block from being reused until after
865  * the related flushes have completely cycled, otherwise crash recovery
866  * could resurrect a data block that was already reused and overwritten.
867  *
868  * The caller might reset the underlying layer2 entry's append_off to 0, so
869  * our covering append_off must be set to max to prevent any reallocation
870  * until after the flush delays complete, not to mention proper invalidation
871  * of any underlying cached blocks.
872  */
873 static void
874 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
875 			int zone, struct hammer_blockmap_layer2 *layer2)
876 {
877 	hammer_reserve_t resv;
878 
879 	/*
880 	 * Allocate the reservation if necessary.
881 	 *
882 	 * NOTE: need lock in future around resv lookup/allocation and
883 	 * the setdelay call, currently refs is not bumped until the call.
884 	 */
885 again:
886 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
887 	if (resv == NULL) {
888 		resv = kmalloc(sizeof(*resv), hmp->m_misc,
889 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
890 		resv->zone = zone;
891 		resv->zone_offset = base_offset;
892 		resv->refs = 0;
893 		resv->append_off = HAMMER_BIGBLOCK_SIZE;
894 
895 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
896 			resv->flags |= HAMMER_RESF_LAYER2FREE;
897 		if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
898 			kfree(resv, hmp->m_misc);
899 			goto again;
900 		}
901 		++hammer_count_reservations;
902 	} else {
903 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
904 			resv->flags |= HAMMER_RESF_LAYER2FREE;
905 	}
906 	hammer_reserve_setdelay(hmp, resv);
907 }
908 
909 /*
910  * Enter the reservation on the on-delay list, or move it if it
911  * is already on the list.
912  */
913 static void
914 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
915 {
916 	if (resv->flags & HAMMER_RESF_ONDELAY) {
917 		TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
918 		resv->flush_group = hmp->flusher.next + 1;
919 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
920 	} else {
921 		++resv->refs;
922 		++hmp->rsv_fromdelay;
923 		resv->flags |= HAMMER_RESF_ONDELAY;
924 		resv->flush_group = hmp->flusher.next + 1;
925 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
926 	}
927 }
928 
929 /*
930  * Reserve has reached its flush point, remove it from the delay list
931  * and finish it off.  hammer_blockmap_reserve_complete() inherits
932  * the ondelay reference.
933  */
934 void
935 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
936 {
937 	KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
938 	resv->flags &= ~HAMMER_RESF_ONDELAY;
939 	TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
940 	--hmp->rsv_fromdelay;
941 	hammer_blockmap_reserve_complete(hmp, resv);
942 }
943 
944 /*
945  * Backend function - free (offset, bytes) in a zone.
946  *
947  * XXX error return
948  */
949 void
950 hammer_blockmap_free(hammer_transaction_t trans,
951 		     hammer_off_t zone_offset, int bytes)
952 {
953 	hammer_mount_t hmp;
954 	hammer_volume_t root_volume;
955 	hammer_blockmap_t freemap;
956 	struct hammer_blockmap_layer1 *layer1;
957 	struct hammer_blockmap_layer2 *layer2;
958 	hammer_buffer_t buffer1 = NULL;
959 	hammer_buffer_t buffer2 = NULL;
960 	hammer_off_t layer1_offset;
961 	hammer_off_t layer2_offset;
962 	hammer_off_t base_off;
963 	int error;
964 	int zone;
965 
966 	if (bytes == 0)
967 		return;
968 	hmp = trans->hmp;
969 
970 	/*
971 	 * Alignment
972 	 */
973 	bytes = (bytes + 15) & ~15;
974 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
975 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
976 		  ~HAMMER_BIGBLOCK_MASK64) == 0);
977 
978 	/*
979 	 * Basic zone validation & locking
980 	 */
981 	zone = HAMMER_ZONE_DECODE(zone_offset);
982 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
983 	root_volume = trans->rootvol;
984 	error = 0;
985 
986 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
987 
988 	/*
989 	 * Dive layer 1.
990 	 */
991 	layer1_offset = freemap->phys_offset +
992 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
993 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
994 	if (error)
995 		goto failed;
996 	KKASSERT(layer1->phys_offset &&
997 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
998 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
999 		hammer_lock_ex(&hmp->blkmap_lock);
1000 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1001 			panic("CRC FAILED: LAYER1");
1002 		hammer_unlock(&hmp->blkmap_lock);
1003 	}
1004 
1005 	/*
1006 	 * Dive layer 2, each entry represents a big-block.
1007 	 */
1008 	layer2_offset = layer1->phys_offset +
1009 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1010 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1011 	if (error)
1012 		goto failed;
1013 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1014 		hammer_lock_ex(&hmp->blkmap_lock);
1015 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1016 			panic("CRC FAILED: LAYER2");
1017 		hammer_unlock(&hmp->blkmap_lock);
1018 	}
1019 
1020 	hammer_lock_ex(&hmp->blkmap_lock);
1021 
1022 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1023 
1024 	/*
1025 	 * Free space previously allocated via blockmap_alloc().
1026 	 *
1027 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1028 	 *	 but can never become larger than HAMMER_BIGBLOCK_SIZE.
1029 	 */
1030 	KKASSERT(layer2->zone == zone);
1031 	layer2->bytes_free += bytes;
1032 	KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1033 
1034 	/*
1035 	 * If a big-block becomes entirely free we must create a covering
1036 	 * reservation to prevent premature reuse.  Note, however, that
1037 	 * the big-block and/or reservation may still have an append_off
1038 	 * that allows further (non-reused) allocations.
1039 	 *
1040 	 * Once the reservation has been made we re-check layer2 and if
1041 	 * the big-block is still entirely free we reset the layer2 entry.
1042 	 * The reservation will prevent premature reuse.
1043 	 *
1044 	 * NOTE: hammer_buffer's are only invalidated when the reservation
1045 	 * is completed, if the layer2 entry is still completely free at
1046 	 * that time.  Any allocations from the reservation that may have
1047 	 * occured in the mean time, or active references on the reservation
1048 	 * from new pending allocations, will prevent the invalidation from
1049 	 * occuring.
1050 	 */
1051 	if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1052 		base_off = hammer_xlate_to_zone2(zone_offset &
1053 						~HAMMER_BIGBLOCK_MASK64);
1054 
1055 		hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1056 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1057 			layer2->zone = 0;
1058 			layer2->append_off = 0;
1059 			hammer_modify_buffer(trans, buffer1,
1060 					     layer1, sizeof(*layer1));
1061 			++layer1->blocks_free;
1062 			layer1->layer1_crc = crc32(layer1,
1063 						   HAMMER_LAYER1_CRCSIZE);
1064 			hammer_modify_buffer_done(buffer1);
1065 			hammer_modify_volume_field(trans,
1066 					trans->rootvol,
1067 					vol0_stat_freebigblocks);
1068 			++root_volume->ondisk->vol0_stat_freebigblocks;
1069 			hmp->copy_stat_freebigblocks =
1070 			   root_volume->ondisk->vol0_stat_freebigblocks;
1071 			hammer_modify_volume_done(trans->rootvol);
1072 		}
1073 	}
1074 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1075 	hammer_modify_buffer_done(buffer2);
1076 	hammer_unlock(&hmp->blkmap_lock);
1077 
1078 failed:
1079 	if (buffer1)
1080 		hammer_rel_buffer(buffer1, 0);
1081 	if (buffer2)
1082 		hammer_rel_buffer(buffer2, 0);
1083 }
1084 
1085 int
1086 hammer_blockmap_dedup(hammer_transaction_t trans,
1087 		     hammer_off_t zone_offset, int bytes)
1088 {
1089 	hammer_mount_t hmp;
1090 	hammer_blockmap_t freemap;
1091 	struct hammer_blockmap_layer1 *layer1;
1092 	struct hammer_blockmap_layer2 *layer2;
1093 	hammer_buffer_t buffer1 = NULL;
1094 	hammer_buffer_t buffer2 = NULL;
1095 	hammer_off_t layer1_offset;
1096 	hammer_off_t layer2_offset;
1097 	int32_t temp;
1098 	int error;
1099 	int zone __debugvar;
1100 
1101 	if (bytes == 0)
1102 		return (0);
1103 	hmp = trans->hmp;
1104 
1105 	/*
1106 	 * Alignment
1107 	 */
1108 	bytes = (bytes + 15) & ~15;
1109 	KKASSERT(bytes <= HAMMER_BIGBLOCK_SIZE);
1110 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1111 		  ~HAMMER_BIGBLOCK_MASK64) == 0);
1112 
1113 	/*
1114 	 * Basic zone validation & locking
1115 	 */
1116 	zone = HAMMER_ZONE_DECODE(zone_offset);
1117 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1118 	error = 0;
1119 
1120 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1121 
1122 	/*
1123 	 * Dive layer 1.
1124 	 */
1125 	layer1_offset = freemap->phys_offset +
1126 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1127 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1128 	if (error)
1129 		goto failed;
1130 	KKASSERT(layer1->phys_offset &&
1131 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1132 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1133 		hammer_lock_ex(&hmp->blkmap_lock);
1134 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1135 			panic("CRC FAILED: LAYER1");
1136 		hammer_unlock(&hmp->blkmap_lock);
1137 	}
1138 
1139 	/*
1140 	 * Dive layer 2, each entry represents a big-block.
1141 	 */
1142 	layer2_offset = layer1->phys_offset +
1143 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1144 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1145 	if (error)
1146 		goto failed;
1147 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1148 		hammer_lock_ex(&hmp->blkmap_lock);
1149 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1150 			panic("CRC FAILED: LAYER2");
1151 		hammer_unlock(&hmp->blkmap_lock);
1152 	}
1153 
1154 	hammer_lock_ex(&hmp->blkmap_lock);
1155 
1156 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1157 
1158 	/*
1159 	 * Free space previously allocated via blockmap_alloc().
1160 	 *
1161 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1162 	 *	 but can never become larger than HAMMER_BIGBLOCK_SIZE.
1163 	 */
1164 	KKASSERT(layer2->zone == zone);
1165 	temp = layer2->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
1166 	cpu_ccfence(); /* prevent gcc from optimizing temp out */
1167 	if (temp > layer2->bytes_free) {
1168 		error = ERANGE;
1169 		goto underflow;
1170 	}
1171 	layer2->bytes_free -= bytes;
1172 
1173 	KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1174 
1175 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1176 underflow:
1177 	hammer_modify_buffer_done(buffer2);
1178 	hammer_unlock(&hmp->blkmap_lock);
1179 
1180 failed:
1181 	if (buffer1)
1182 		hammer_rel_buffer(buffer1, 0);
1183 	if (buffer2)
1184 		hammer_rel_buffer(buffer2, 0);
1185 	return (error);
1186 }
1187 
1188 /*
1189  * Backend function - finalize (offset, bytes) in a zone.
1190  *
1191  * Allocate space that was previously reserved by the frontend.
1192  */
1193 int
1194 hammer_blockmap_finalize(hammer_transaction_t trans,
1195 			 hammer_reserve_t resv,
1196 			 hammer_off_t zone_offset, int bytes)
1197 {
1198 	hammer_mount_t hmp;
1199 	hammer_volume_t root_volume;
1200 	hammer_blockmap_t freemap;
1201 	struct hammer_blockmap_layer1 *layer1;
1202 	struct hammer_blockmap_layer2 *layer2;
1203 	hammer_buffer_t buffer1 = NULL;
1204 	hammer_buffer_t buffer2 = NULL;
1205 	hammer_off_t layer1_offset;
1206 	hammer_off_t layer2_offset;
1207 	int error;
1208 	int zone;
1209 	int offset;
1210 
1211 	if (bytes == 0)
1212 		return(0);
1213 	hmp = trans->hmp;
1214 
1215 	/*
1216 	 * Alignment
1217 	 */
1218 	bytes = (bytes + 15) & ~15;
1219 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
1220 
1221 	/*
1222 	 * Basic zone validation & locking
1223 	 */
1224 	zone = HAMMER_ZONE_DECODE(zone_offset);
1225 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1226 	root_volume = trans->rootvol;
1227 	error = 0;
1228 
1229 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1230 
1231 	/*
1232 	 * Dive layer 1.
1233 	 */
1234 	layer1_offset = freemap->phys_offset +
1235 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1236 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1237 	if (error)
1238 		goto failed;
1239 	KKASSERT(layer1->phys_offset &&
1240 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1241 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1242 		hammer_lock_ex(&hmp->blkmap_lock);
1243 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1244 			panic("CRC FAILED: LAYER1");
1245 		hammer_unlock(&hmp->blkmap_lock);
1246 	}
1247 
1248 	/*
1249 	 * Dive layer 2, each entry represents a big-block.
1250 	 */
1251 	layer2_offset = layer1->phys_offset +
1252 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1253 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1254 	if (error)
1255 		goto failed;
1256 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1257 		hammer_lock_ex(&hmp->blkmap_lock);
1258 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1259 			panic("CRC FAILED: LAYER2");
1260 		hammer_unlock(&hmp->blkmap_lock);
1261 	}
1262 
1263 	hammer_lock_ex(&hmp->blkmap_lock);
1264 
1265 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1266 
1267 	/*
1268 	 * Finalize some or all of the space covered by a current
1269 	 * reservation.  An allocation in the same layer may have
1270 	 * already assigned ownership.
1271 	 */
1272 	if (layer2->zone == 0) {
1273 		hammer_modify_buffer(trans, buffer1,
1274 				     layer1, sizeof(*layer1));
1275 		--layer1->blocks_free;
1276 		layer1->layer1_crc = crc32(layer1,
1277 					   HAMMER_LAYER1_CRCSIZE);
1278 		hammer_modify_buffer_done(buffer1);
1279 		layer2->zone = zone;
1280 		KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
1281 		KKASSERT(layer2->append_off == 0);
1282 		hammer_modify_volume_field(trans,
1283 				trans->rootvol,
1284 				vol0_stat_freebigblocks);
1285 		--root_volume->ondisk->vol0_stat_freebigblocks;
1286 		hmp->copy_stat_freebigblocks =
1287 		   root_volume->ondisk->vol0_stat_freebigblocks;
1288 		hammer_modify_volume_done(trans->rootvol);
1289 	}
1290 	if (layer2->zone != zone)
1291 		kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1292 	KKASSERT(layer2->zone == zone);
1293 	KKASSERT(bytes != 0);
1294 	layer2->bytes_free -= bytes;
1295 
1296 	if (resv) {
1297 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1298 	}
1299 
1300 	/*
1301 	 * Finalizations can occur out of order, or combined with allocations.
1302 	 * append_off must be set to the highest allocated offset.
1303 	 */
1304 	offset = ((int)zone_offset & HAMMER_BIGBLOCK_MASK) + bytes;
1305 	if (layer2->append_off < offset)
1306 		layer2->append_off = offset;
1307 
1308 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1309 	hammer_modify_buffer_done(buffer2);
1310 	hammer_unlock(&hmp->blkmap_lock);
1311 
1312 failed:
1313 	if (buffer1)
1314 		hammer_rel_buffer(buffer1, 0);
1315 	if (buffer2)
1316 		hammer_rel_buffer(buffer2, 0);
1317 	return(error);
1318 }
1319 
1320 /*
1321  * Return the approximate number of free bytes in the big-block
1322  * containing the specified blockmap offset.
1323  *
1324  * WARNING: A negative number can be returned if data de-dup exists,
1325  *	    and the result will also not represent he actual number
1326  *	    of free bytes in this case.
1327  *
1328  *	    This code is used only by the reblocker.
1329  */
1330 int
1331 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1332 			int *curp, int *errorp)
1333 {
1334 	hammer_volume_t root_volume;
1335 	hammer_blockmap_t blockmap;
1336 	hammer_blockmap_t freemap;
1337 	struct hammer_blockmap_layer1 *layer1;
1338 	struct hammer_blockmap_layer2 *layer2;
1339 	hammer_buffer_t buffer = NULL;
1340 	hammer_off_t layer1_offset;
1341 	hammer_off_t layer2_offset;
1342 	int32_t bytes;
1343 	int zone;
1344 
1345 	zone = HAMMER_ZONE_DECODE(zone_offset);
1346 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1347 	root_volume = hammer_get_root_volume(hmp, errorp);
1348 	if (*errorp) {
1349 		*curp = 0;
1350 		return(0);
1351 	}
1352 	blockmap = &hmp->blockmap[zone];
1353 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1354 
1355 	/*
1356 	 * Dive layer 1.
1357 	 */
1358 	layer1_offset = freemap->phys_offset +
1359 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1360 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1361 	if (*errorp) {
1362 		*curp = 0;
1363 		bytes = 0;
1364 		goto failed;
1365 	}
1366 	KKASSERT(layer1->phys_offset);
1367 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1368 		hammer_lock_ex(&hmp->blkmap_lock);
1369 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1370 			panic("CRC FAILED: LAYER1");
1371 		hammer_unlock(&hmp->blkmap_lock);
1372 	}
1373 
1374 	/*
1375 	 * Dive layer 2, each entry represents a big-block.
1376 	 *
1377 	 * (reuse buffer, layer1 pointer becomes invalid)
1378 	 */
1379 	layer2_offset = layer1->phys_offset +
1380 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1381 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1382 	if (*errorp) {
1383 		*curp = 0;
1384 		bytes = 0;
1385 		goto failed;
1386 	}
1387 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1388 		hammer_lock_ex(&hmp->blkmap_lock);
1389 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1390 			panic("CRC FAILED: LAYER2");
1391 		hammer_unlock(&hmp->blkmap_lock);
1392 	}
1393 	KKASSERT(layer2->zone == zone);
1394 
1395 	bytes = layer2->bytes_free;
1396 
1397 	if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_BIGBLOCK_MASK64)
1398 		*curp = 0;
1399 	else
1400 		*curp = 1;
1401 failed:
1402 	if (buffer)
1403 		hammer_rel_buffer(buffer, 0);
1404 	hammer_rel_volume(root_volume, 0);
1405 	if (hammer_debug_general & 0x0800) {
1406 		kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1407 			(long long)zone_offset, bytes);
1408 	}
1409 	return(bytes);
1410 }
1411 
1412 
1413 /*
1414  * Lookup a blockmap offset and verify blockmap layers.
1415  */
1416 hammer_off_t
1417 hammer_blockmap_lookup_verify(hammer_mount_t hmp, hammer_off_t zone_offset,
1418 			int *errorp)
1419 {
1420 	hammer_volume_t root_volume;
1421 	hammer_blockmap_t freemap;
1422 	struct hammer_blockmap_layer1 *layer1;
1423 	struct hammer_blockmap_layer2 *layer2;
1424 	hammer_buffer_t buffer = NULL;
1425 	hammer_off_t layer1_offset;
1426 	hammer_off_t layer2_offset;
1427 	hammer_off_t result_offset;
1428 	hammer_off_t base_off;
1429 	hammer_reserve_t resv __debugvar;
1430 	int zone;
1431 
1432 	/*
1433 	 * Calculate the zone-2 offset.
1434 	 */
1435 	zone = HAMMER_ZONE_DECODE(zone_offset);
1436 	result_offset = hammer_xlate_to_zone2(zone_offset);
1437 
1438 	/*
1439 	 * Validate the allocation zone
1440 	 */
1441 	root_volume = hammer_get_root_volume(hmp, errorp);
1442 	if (*errorp)
1443 		return(0);
1444 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1445 	KKASSERT(freemap->phys_offset != 0);
1446 
1447 	/*
1448 	 * Dive layer 1.
1449 	 */
1450 	layer1_offset = freemap->phys_offset +
1451 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1452 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1453 	if (*errorp)
1454 		goto failed;
1455 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1456 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1457 		hammer_lock_ex(&hmp->blkmap_lock);
1458 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1459 			panic("CRC FAILED: LAYER1");
1460 		hammer_unlock(&hmp->blkmap_lock);
1461 	}
1462 
1463 	/*
1464 	 * Dive layer 2, each entry represents a big-block.
1465 	 */
1466 	layer2_offset = layer1->phys_offset +
1467 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1468 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1469 
1470 	if (*errorp)
1471 		goto failed;
1472 	if (layer2->zone == 0) {
1473 		base_off = hammer_xlate_to_zone2(zone_offset &
1474 						~HAMMER_BIGBLOCK_MASK64);
1475 		resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1476 				 base_off);
1477 		KKASSERT(resv && resv->zone == zone);
1478 
1479 	} else if (layer2->zone != zone) {
1480 		panic("hammer_blockmap_lookup_verify: bad zone %d/%d",
1481 			layer2->zone, zone);
1482 	}
1483 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1484 		hammer_lock_ex(&hmp->blkmap_lock);
1485 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1486 			panic("CRC FAILED: LAYER2");
1487 		hammer_unlock(&hmp->blkmap_lock);
1488 	}
1489 
1490 failed:
1491 	if (buffer)
1492 		hammer_rel_buffer(buffer, 0);
1493 	hammer_rel_volume(root_volume, 0);
1494 	if (hammer_debug_general & 0x0800) {
1495 		kprintf("hammer_blockmap_lookup_verify: %016llx -> %016llx\n",
1496 			(long long)zone_offset, (long long)result_offset);
1497 	}
1498 	return(result_offset);
1499 }
1500 
1501 
1502 /*
1503  * Check space availability
1504  *
1505  * MPSAFE - does not require fs_token
1506  */
1507 int
1508 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1509 {
1510 	const int in_size = sizeof(struct hammer_inode_data) +
1511 			    sizeof(union hammer_btree_elm);
1512 	const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1513 	int64_t usedbytes;
1514 
1515 	usedbytes = hmp->rsv_inodes * in_size +
1516 		    hmp->rsv_recs * rec_size +
1517 		    hmp->rsv_databytes +
1518 		    ((int64_t)hmp->rsv_fromdelay << HAMMER_BIGBLOCK_BITS) +
1519 		    ((int64_t)hammer_limit_dirtybufspace) +
1520 		    (slop << HAMMER_BIGBLOCK_BITS);
1521 
1522 	hammer_count_extra_space_used = usedbytes;	/* debugging */
1523 	if (resp)
1524 		*resp = usedbytes;
1525 
1526 	if (hmp->copy_stat_freebigblocks >=
1527 	    (usedbytes >> HAMMER_BIGBLOCK_BITS)) {
1528 		return(0);
1529 	}
1530 	return (ENOSPC);
1531 }
1532 
1533 static int
1534 hammer_check_volume(hammer_mount_t hmp, hammer_off_t *offsetp)
1535 {
1536 	hammer_blockmap_t freemap;
1537 	struct hammer_blockmap_layer1 *layer1;
1538 	hammer_buffer_t buffer1 = NULL;
1539 	hammer_off_t layer1_offset, offset;
1540 	int zone, vol_no, error = 0;
1541 
1542 	offset = *offsetp;
1543 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1544 
1545 	layer1_offset = freemap->phys_offset +
1546 			HAMMER_BLOCKMAP_LAYER1_OFFSET(offset);
1547 
1548 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1549 	if (error)
1550 		goto end;
1551 
1552 	/*
1553 	 * No more available space in layer1s of this volume.
1554 	 */
1555 	if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
1556 		zone = HAMMER_ZONE_DECODE(offset);
1557 		vol_no = HAMMER_VOL_DECODE(offset) + 1;
1558 		KKASSERT(vol_no <= HAMMER_MAX_VOLUMES);
1559 		if (vol_no == HAMMER_MAX_VOLUMES) {
1560 			vol_no = 0;
1561 			++zone;
1562 		}
1563 		offset &= HAMMER_BLOCKMAP_LAYER2_MASK;
1564 		*offsetp = HAMMER_ENCODE(zone, vol_no, offset);
1565 	}
1566 end:
1567 	if (buffer1)
1568 		hammer_rel_buffer(buffer1, 0);
1569 	return(error);
1570 }
1571