xref: /dflybsd-src/sys/vfs/hammer/hammer_blockmap.c (revision 72d6a027d96b30c52d242ce162021efcecfe2bb9)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.27 2008/07/31 22:30:33 dillon Exp $
35  */
36 
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41 
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44 				    hammer_off_t base_offset, int zone,
45 				    struct hammer_blockmap_layer2 *layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47 static int update_bytes_free(hammer_reserve_t resv, int bytes);
48 
49 /*
50  * Reserved big-blocks red-black tree support
51  */
52 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
53 	     hammer_res_rb_compare, hammer_off_t, zone_offset);
54 
55 static int
56 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
57 {
58 	if (res1->zone_offset < res2->zone_offset)
59 		return(-1);
60 	if (res1->zone_offset > res2->zone_offset)
61 		return(1);
62 	return(0);
63 }
64 
65 /*
66  * Allocate bytes from a zone
67  */
68 hammer_off_t
69 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
70 		      hammer_off_t hint, int *errorp)
71 {
72 	hammer_mount_t hmp;
73 	hammer_volume_t root_volume;
74 	hammer_blockmap_t blockmap;
75 	hammer_blockmap_t freemap;
76 	hammer_reserve_t resv;
77 	struct hammer_blockmap_layer1 *layer1;
78 	struct hammer_blockmap_layer2 *layer2;
79 	hammer_buffer_t buffer1 = NULL;
80 	hammer_buffer_t buffer2 = NULL;
81 	hammer_buffer_t buffer3 = NULL;
82 	hammer_off_t tmp_offset;
83 	hammer_off_t next_offset;
84 	hammer_off_t result_offset;
85 	hammer_off_t layer1_offset;
86 	hammer_off_t layer2_offset;
87 	hammer_off_t base_off;
88 	int loops = 0;
89 	int offset;		/* offset within big-block */
90 	int use_hint;
91 
92 	hmp = trans->hmp;
93 
94 	/*
95 	 * Deal with alignment and buffer-boundary issues.
96 	 *
97 	 * Be careful, certain primary alignments are used below to allocate
98 	 * new blockmap blocks.
99 	 */
100 	bytes = (bytes + 15) & ~15;
101 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
102 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
103 
104 	/*
105 	 * Setup
106 	 */
107 	root_volume = trans->rootvol;
108 	*errorp = 0;
109 	blockmap = &hmp->blockmap[zone];
110 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
111 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
112 
113 	/*
114 	 * Use the hint if we have one.
115 	 */
116 	if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
117 		next_offset = (hint + 15) & ~(hammer_off_t)15;
118 		use_hint = 1;
119 	} else {
120 		next_offset = blockmap->next_offset;
121 		use_hint = 0;
122 	}
123 again:
124 
125 	/*
126 	 * use_hint is turned off if we leave the hinted big-block.
127 	 */
128 	if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
129 		next_offset = blockmap->next_offset;
130 		use_hint = 0;
131 	}
132 
133 	/*
134 	 * Check for wrap
135 	 */
136 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
137 		if (++loops == 2) {
138 			result_offset = 0;
139 			*errorp = ENOSPC;
140 			goto failed;
141 		}
142 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
143 	}
144 
145 	/*
146 	 * The allocation request may not cross a buffer boundary.  Special
147 	 * large allocations must not cross a large-block boundary.
148 	 */
149 	tmp_offset = next_offset + bytes - 1;
150 	if (bytes <= HAMMER_BUFSIZE) {
151 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
152 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
153 			goto again;
154 		}
155 	} else {
156 		if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
157 			next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
158 			goto again;
159 		}
160 	}
161 	offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
162 
163 	/*
164 	 * Dive layer 1.
165 	 */
166 	layer1_offset = freemap->phys_offset +
167 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
168 
169 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
170 	if (*errorp) {
171 		result_offset = 0;
172 		goto failed;
173 	}
174 
175 	/*
176 	 * Check CRC.
177 	 */
178 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
179 		hammer_lock_ex(&hmp->blkmap_lock);
180 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
181 			panic("CRC FAILED: LAYER1");
182 		hammer_unlock(&hmp->blkmap_lock);
183 	}
184 
185 	/*
186 	 * If we are at a big-block boundary and layer1 indicates no
187 	 * free big-blocks, then we cannot allocate a new bigblock in
188 	 * layer2, skip to the next layer1 entry.
189 	 */
190 	if (offset == 0 && layer1->blocks_free == 0) {
191 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
192 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
193 		goto again;
194 	}
195 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
196 
197 	/*
198 	 * Skip this layer1 entry if it is pointing to a layer2 big-block
199 	 * on a volume that we are currently trying to remove from the
200 	 * file-system. This is used by the volume-del code together with
201 	 * the reblocker to free up a volume.
202 	 */
203 	if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
204 	    hmp->volume_to_remove) {
205 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
206 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
207 		goto again;
208 	}
209 
210 	/*
211 	 * Dive layer 2, each entry represents a large-block.
212 	 */
213 	layer2_offset = layer1->phys_offset +
214 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
215 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
216 	if (*errorp) {
217 		result_offset = 0;
218 		goto failed;
219 	}
220 
221 	/*
222 	 * Check CRC.  This can race another thread holding the lock
223 	 * and in the middle of modifying layer2.
224 	 */
225 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
226 		hammer_lock_ex(&hmp->blkmap_lock);
227 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
228 			panic("CRC FAILED: LAYER2");
229 		hammer_unlock(&hmp->blkmap_lock);
230 	}
231 
232 	/*
233 	 * Skip the layer if the zone is owned by someone other then us.
234 	 */
235 	if (layer2->zone && layer2->zone != zone) {
236 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
237 		goto again;
238 	}
239 	if (offset < layer2->append_off) {
240 		next_offset += layer2->append_off - offset;
241 		goto again;
242 	}
243 
244 	/*
245 	 * If operating in the current non-hint blockmap block, do not
246 	 * allow it to get over-full.  Also drop any active hinting so
247 	 * blockmap->next_offset is updated at the end.
248 	 *
249 	 * We do this for B-Tree and meta-data allocations to provide
250 	 * localization for updates.
251 	 */
252 	if ((zone == HAMMER_ZONE_BTREE_INDEX ||
253 	     zone == HAMMER_ZONE_META_INDEX) &&
254 	    offset >= HAMMER_LARGEBLOCK_OVERFILL &&
255 	    !((next_offset ^ blockmap->next_offset) & ~HAMMER_LARGEBLOCK_MASK64)
256 	) {
257 		if (offset >= HAMMER_LARGEBLOCK_OVERFILL) {
258 			next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
259 			use_hint = 0;
260 			goto again;
261 		}
262 	}
263 
264 	/*
265 	 * We need the lock from this point on.  We have to re-check zone
266 	 * ownership after acquiring the lock and also check for reservations.
267 	 */
268 	hammer_lock_ex(&hmp->blkmap_lock);
269 
270 	if (layer2->zone && layer2->zone != zone) {
271 		hammer_unlock(&hmp->blkmap_lock);
272 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
273 		goto again;
274 	}
275 	if (offset < layer2->append_off) {
276 		hammer_unlock(&hmp->blkmap_lock);
277 		next_offset += layer2->append_off - offset;
278 		goto again;
279 	}
280 
281 	/*
282 	 * The bigblock might be reserved by another zone.  If it is reserved
283 	 * by our zone we may have to move next_offset past the append_off.
284 	 */
285 	base_off = (next_offset &
286 		    (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
287 		    HAMMER_ZONE_RAW_BUFFER;
288 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
289 	if (resv) {
290 		if (resv->zone != zone) {
291 			hammer_unlock(&hmp->blkmap_lock);
292 			next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
293 				      ~HAMMER_LARGEBLOCK_MASK64;
294 			goto again;
295 		}
296 		if (offset < resv->append_off) {
297 			hammer_unlock(&hmp->blkmap_lock);
298 			next_offset += resv->append_off - offset;
299 			goto again;
300 		}
301 		++resv->refs;
302 	}
303 
304 	/*
305 	 * Ok, we can allocate out of this layer2 big-block.  Assume ownership
306 	 * of the layer for real.  At this point we've validated any
307 	 * reservation that might exist and can just ignore resv.
308 	 */
309 	if (layer2->zone == 0) {
310 		/*
311 		 * Assign the bigblock to our zone
312 		 */
313 		hammer_modify_buffer(trans, buffer1,
314 				     layer1, sizeof(*layer1));
315 		--layer1->blocks_free;
316 		layer1->layer1_crc = crc32(layer1,
317 					   HAMMER_LAYER1_CRCSIZE);
318 		hammer_modify_buffer_done(buffer1);
319 		hammer_modify_buffer(trans, buffer2,
320 				     layer2, sizeof(*layer2));
321 		layer2->zone = zone;
322 		KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
323 		KKASSERT(layer2->append_off == 0);
324 		hammer_modify_volume_field(trans, trans->rootvol,
325 					   vol0_stat_freebigblocks);
326 		--root_volume->ondisk->vol0_stat_freebigblocks;
327 		hmp->copy_stat_freebigblocks =
328 			root_volume->ondisk->vol0_stat_freebigblocks;
329 		hammer_modify_volume_done(trans->rootvol);
330 	} else {
331 		hammer_modify_buffer(trans, buffer2,
332 				     layer2, sizeof(*layer2));
333 	}
334 	KKASSERT(layer2->zone == zone);
335 
336 	/*
337 	 * NOTE: bytes_free can legally go negative due to de-dup.
338 	 */
339 	layer2->bytes_free -= bytes;
340 	KKASSERT(layer2->append_off <= offset);
341 	layer2->append_off = offset + bytes;
342 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
343 	hammer_modify_buffer_done(buffer2);
344 
345 	/*
346 	 * We hold the blockmap lock and should be the only ones
347 	 * capable of modifying resv->append_off.  Track the allocation
348 	 * as appropriate.
349 	 */
350 	KKASSERT(bytes != 0);
351 	if (resv) {
352 		KKASSERT(resv->append_off <= offset);
353 		resv->append_off = offset + bytes;
354 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
355 		hammer_blockmap_reserve_complete(hmp, resv);
356 	}
357 
358 	/*
359 	 * If we are allocating from the base of a new buffer we can avoid
360 	 * a disk read by calling hammer_bnew().
361 	 */
362 	if ((next_offset & HAMMER_BUFMASK) == 0) {
363 		hammer_bnew_ext(trans->hmp, next_offset, bytes,
364 				errorp, &buffer3);
365 	}
366 	result_offset = next_offset;
367 
368 	/*
369 	 * If we weren't supplied with a hint or could not use the hint
370 	 * then we wound up using blockmap->next_offset as the hint and
371 	 * need to save it.
372 	 */
373 	if (use_hint == 0) {
374 		hammer_modify_volume(NULL, root_volume, NULL, 0);
375 		blockmap->next_offset = next_offset + bytes;
376 		hammer_modify_volume_done(root_volume);
377 	}
378 	hammer_unlock(&hmp->blkmap_lock);
379 failed:
380 
381 	/*
382 	 * Cleanup
383 	 */
384 	if (buffer1)
385 		hammer_rel_buffer(buffer1, 0);
386 	if (buffer2)
387 		hammer_rel_buffer(buffer2, 0);
388 	if (buffer3)
389 		hammer_rel_buffer(buffer3, 0);
390 
391 	return(result_offset);
392 }
393 
394 /*
395  * Frontend function - Reserve bytes in a zone.
396  *
397  * This code reserves bytes out of a blockmap without committing to any
398  * meta-data modifications, allowing the front-end to directly issue disk
399  * write I/O for large blocks of data
400  *
401  * The backend later finalizes the reservation with hammer_blockmap_finalize()
402  * upon committing the related record.
403  */
404 hammer_reserve_t
405 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
406 			hammer_off_t *zone_offp, int *errorp)
407 {
408 	hammer_volume_t root_volume;
409 	hammer_blockmap_t blockmap;
410 	hammer_blockmap_t freemap;
411 	struct hammer_blockmap_layer1 *layer1;
412 	struct hammer_blockmap_layer2 *layer2;
413 	hammer_buffer_t buffer1 = NULL;
414 	hammer_buffer_t buffer2 = NULL;
415 	hammer_buffer_t buffer3 = NULL;
416 	hammer_off_t tmp_offset;
417 	hammer_off_t next_offset;
418 	hammer_off_t layer1_offset;
419 	hammer_off_t layer2_offset;
420 	hammer_off_t base_off;
421 	hammer_reserve_t resv;
422 	hammer_reserve_t resx;
423 	int loops = 0;
424 	int offset;
425 
426 	/*
427 	 * Setup
428 	 */
429 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
430 	root_volume = hammer_get_root_volume(hmp, errorp);
431 	if (*errorp)
432 		return(NULL);
433 	blockmap = &hmp->blockmap[zone];
434 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
435 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
436 
437 	/*
438 	 * Deal with alignment and buffer-boundary issues.
439 	 *
440 	 * Be careful, certain primary alignments are used below to allocate
441 	 * new blockmap blocks.
442 	 */
443 	bytes = (bytes + 15) & ~15;
444 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
445 
446 	next_offset = blockmap->next_offset;
447 again:
448 	resv = NULL;
449 	/*
450 	 * Check for wrap
451 	 */
452 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
453 		if (++loops == 2) {
454 			*errorp = ENOSPC;
455 			goto failed;
456 		}
457 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
458 	}
459 
460 	/*
461 	 * The allocation request may not cross a buffer boundary.  Special
462 	 * large allocations must not cross a large-block boundary.
463 	 */
464 	tmp_offset = next_offset + bytes - 1;
465 	if (bytes <= HAMMER_BUFSIZE) {
466 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
467 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
468 			goto again;
469 		}
470 	} else {
471 		if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
472 			next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
473 			goto again;
474 		}
475 	}
476 	offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
477 
478 	/*
479 	 * Dive layer 1.
480 	 */
481 	layer1_offset = freemap->phys_offset +
482 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
483 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
484 	if (*errorp)
485 		goto failed;
486 
487 	/*
488 	 * Check CRC.
489 	 */
490 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
491 		hammer_lock_ex(&hmp->blkmap_lock);
492 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
493 			panic("CRC FAILED: LAYER1");
494 		hammer_unlock(&hmp->blkmap_lock);
495 	}
496 
497 	/*
498 	 * If we are at a big-block boundary and layer1 indicates no
499 	 * free big-blocks, then we cannot allocate a new bigblock in
500 	 * layer2, skip to the next layer1 entry.
501 	 */
502 	if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
503 	    layer1->blocks_free == 0) {
504 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
505 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
506 		goto again;
507 	}
508 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
509 
510 	/*
511 	 * Dive layer 2, each entry represents a large-block.
512 	 */
513 	layer2_offset = layer1->phys_offset +
514 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
515 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
516 	if (*errorp)
517 		goto failed;
518 
519 	/*
520 	 * Check CRC if not allocating into uninitialized space (which we
521 	 * aren't when reserving space).
522 	 */
523 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
524 		hammer_lock_ex(&hmp->blkmap_lock);
525 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
526 			panic("CRC FAILED: LAYER2");
527 		hammer_unlock(&hmp->blkmap_lock);
528 	}
529 
530 	/*
531 	 * Skip the layer if the zone is owned by someone other then us.
532 	 */
533 	if (layer2->zone && layer2->zone != zone) {
534 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
535 		goto again;
536 	}
537 	if (offset < layer2->append_off) {
538 		next_offset += layer2->append_off - offset;
539 		goto again;
540 	}
541 
542 	/*
543 	 * We need the lock from this point on.  We have to re-check zone
544 	 * ownership after acquiring the lock and also check for reservations.
545 	 */
546 	hammer_lock_ex(&hmp->blkmap_lock);
547 
548 	if (layer2->zone && layer2->zone != zone) {
549 		hammer_unlock(&hmp->blkmap_lock);
550 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
551 		goto again;
552 	}
553 	if (offset < layer2->append_off) {
554 		hammer_unlock(&hmp->blkmap_lock);
555 		next_offset += layer2->append_off - offset;
556 		goto again;
557 	}
558 
559 	/*
560 	 * The bigblock might be reserved by another zone.  If it is reserved
561 	 * by our zone we may have to move next_offset past the append_off.
562 	 */
563 	base_off = (next_offset &
564 		    (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
565 		    HAMMER_ZONE_RAW_BUFFER;
566 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
567 	if (resv) {
568 		if (resv->zone != zone) {
569 			hammer_unlock(&hmp->blkmap_lock);
570 			next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
571 				      ~HAMMER_LARGEBLOCK_MASK64;
572 			goto again;
573 		}
574 		if (offset < resv->append_off) {
575 			hammer_unlock(&hmp->blkmap_lock);
576 			next_offset += resv->append_off - offset;
577 			goto again;
578 		}
579 		++resv->refs;
580 		resx = NULL;
581 	} else {
582 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
583 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
584 		resx->refs = 1;
585 		resx->zone = zone;
586 		resx->zone_offset = base_off;
587 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
588 			resx->flags |= HAMMER_RESF_LAYER2FREE;
589 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
590 		KKASSERT(resv == NULL);
591 		resv = resx;
592 		++hammer_count_reservations;
593 	}
594 	resv->append_off = offset + bytes;
595 
596 	/*
597 	 * If we are not reserving a whole buffer but are at the start of
598 	 * a new block, call hammer_bnew() to avoid a disk read.
599 	 *
600 	 * If we are reserving a whole buffer (or more), the caller will
601 	 * probably use a direct read, so do nothing.
602 	 */
603 	if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
604 		hammer_bnew(hmp, next_offset, errorp, &buffer3);
605 	}
606 
607 	/*
608 	 * Adjust our iterator and alloc_offset.  The layer1 and layer2
609 	 * space beyond alloc_offset is uninitialized.  alloc_offset must
610 	 * be big-block aligned.
611 	 */
612 	blockmap->next_offset = next_offset + bytes;
613 	hammer_unlock(&hmp->blkmap_lock);
614 
615 failed:
616 	if (buffer1)
617 		hammer_rel_buffer(buffer1, 0);
618 	if (buffer2)
619 		hammer_rel_buffer(buffer2, 0);
620 	if (buffer3)
621 		hammer_rel_buffer(buffer3, 0);
622 	hammer_rel_volume(root_volume, 0);
623 	*zone_offp = next_offset;
624 
625 	return(resv);
626 }
627 
628 /*
629  * Frontend function - Dedup bytes in a zone.
630  *
631  * Dedup reservations work exactly the same as normal write reservations
632  * except we only adjust bytes_free field and don't touch append offset.
633  * Finalization mechanic for dedup reservations is also the same as for
634  * normal write ones - the backend finalizes the reservation with
635  * hammer_blockmap_finalize().
636  */
637 hammer_reserve_t
638 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
639 			      hammer_off_t zone_offset, int *errorp)
640 {
641 	hammer_volume_t root_volume;
642 	hammer_blockmap_t freemap;
643 	struct hammer_blockmap_layer1 *layer1;
644 	struct hammer_blockmap_layer2 *layer2;
645 	hammer_buffer_t buffer1 = NULL;
646 	hammer_buffer_t buffer2 = NULL;
647 	hammer_off_t layer1_offset;
648 	hammer_off_t layer2_offset;
649 	hammer_off_t base_off;
650 	hammer_reserve_t resv = NULL;
651 	hammer_reserve_t resx = NULL;
652 
653 	/*
654 	 * Setup
655 	 */
656 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
657 	root_volume = hammer_get_root_volume(hmp, errorp);
658 	if (*errorp)
659 		return (NULL);
660 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
661 	KKASSERT(freemap->phys_offset != 0);
662 
663 	bytes = (bytes + 15) & ~15;
664 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
665 
666 	/*
667 	 * Dive layer 1.
668 	 */
669 	layer1_offset = freemap->phys_offset +
670 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
671 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
672 	if (*errorp)
673 		goto failed;
674 
675 	/*
676 	 * Check CRC.
677 	 */
678 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
679 		hammer_lock_ex(&hmp->blkmap_lock);
680 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
681 			panic("CRC FAILED: LAYER1");
682 		hammer_unlock(&hmp->blkmap_lock);
683 	}
684 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
685 
686 	/*
687 	 * Dive layer 2, each entry represents a large-block.
688 	 */
689 	layer2_offset = layer1->phys_offset +
690 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
691 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
692 	if (*errorp)
693 		goto failed;
694 
695 	/*
696 	 * Check CRC.
697 	 */
698 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
699 		hammer_lock_ex(&hmp->blkmap_lock);
700 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
701 			panic("CRC FAILED: LAYER2");
702 		hammer_unlock(&hmp->blkmap_lock);
703 	}
704 
705 	/*
706 	 * Fail if the zone is owned by someone other than us.
707 	 */
708 	if (layer2->zone && layer2->zone != zone)
709 		goto failed;
710 
711 	/*
712 	 * We need the lock from this point on.  We have to re-check zone
713 	 * ownership after acquiring the lock and also check for reservations.
714 	 */
715 	hammer_lock_ex(&hmp->blkmap_lock);
716 
717 	if (layer2->zone && layer2->zone != zone) {
718 		hammer_unlock(&hmp->blkmap_lock);
719 		goto failed;
720 	}
721 
722 	base_off = (zone_offset &
723 		    (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
724 		    HAMMER_ZONE_RAW_BUFFER;
725 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
726 	if (resv) {
727 		if (resv->zone != zone) {
728 			hammer_unlock(&hmp->blkmap_lock);
729 			resv = NULL;
730 			goto failed;
731 		}
732 		/*
733 		 * Due to possible big block underflow we can't simply
734 		 * subtract bytes from bytes_free.
735 		 */
736 		if (update_bytes_free(resv, bytes) == 0) {
737 			hammer_unlock(&hmp->blkmap_lock);
738 			resv = NULL;
739 			goto failed;
740 		}
741 		++resv->refs;
742 		resx = NULL;
743 	} else {
744 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
745 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
746 		resx->refs = 1;
747 		resx->zone = zone;
748 		resx->bytes_free = layer2->bytes_free;
749 		/*
750 		 * Due to possible big block underflow we can't simply
751 		 * subtract bytes from bytes_free.
752 		 */
753 		if (update_bytes_free(resx, bytes) == 0) {
754 			hammer_unlock(&hmp->blkmap_lock);
755 			kfree(resx, hmp->m_misc);
756 			goto failed;
757 		}
758 		resx->zone_offset = base_off;
759 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
760 		KKASSERT(resv == NULL);
761 		resv = resx;
762 		++hammer_count_reservations;
763 	}
764 
765 	hammer_unlock(&hmp->blkmap_lock);
766 
767 failed:
768 	if (buffer1)
769 		hammer_rel_buffer(buffer1, 0);
770 	if (buffer2)
771 		hammer_rel_buffer(buffer2, 0);
772 	hammer_rel_volume(root_volume, 0);
773 
774 	return(resv);
775 }
776 
777 static int
778 update_bytes_free(hammer_reserve_t resv, int bytes)
779 {
780 	int32_t temp;
781 
782 	/*
783 	 * Big-block underflow check
784 	 */
785 	temp = resv->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
786 	cpu_ccfence(); /* XXX do we really need it ? */
787 	if (temp > resv->bytes_free) {
788 		kprintf("BIGBLOCK UNDERFLOW\n");
789 		return (0);
790 	}
791 
792 	resv->bytes_free -= bytes;
793 	return (1);
794 }
795 
796 /*
797  * Dereference a reservation structure.  Upon the final release the
798  * underlying big-block is checked and if it is entirely free we delete
799  * any related HAMMER buffers to avoid potential conflicts with future
800  * reuse of the big-block.
801  */
802 void
803 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
804 {
805 	hammer_off_t base_offset;
806 	int error;
807 
808 	KKASSERT(resv->refs > 0);
809 	KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
810 		 HAMMER_ZONE_RAW_BUFFER);
811 
812 	/*
813 	 * Setting append_off to the max prevents any new allocations
814 	 * from occuring while we are trying to dispose of the reservation,
815 	 * allowing us to safely delete any related HAMMER buffers.
816 	 *
817 	 * If we are unable to clean out all related HAMMER buffers we
818 	 * requeue the delay.
819 	 */
820 	if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
821 		resv->append_off = HAMMER_LARGEBLOCK_SIZE;
822 		base_offset = resv->zone_offset & ~HAMMER_OFF_ZONE_MASK;
823 		base_offset = HAMMER_ZONE_ENCODE(resv->zone, base_offset);
824 		if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
825 			hammer_dedup_cache_inval(hmp, base_offset);
826 		error = hammer_del_buffers(hmp, base_offset,
827 					   resv->zone_offset,
828 					   HAMMER_LARGEBLOCK_SIZE,
829 					   1);
830 		if (hammer_debug_general & 0x20000) {
831 			kprintf("hammer: dellgblk %016jx error %d\n",
832 				(intmax_t)base_offset, error);
833 		}
834 		if (error)
835 			hammer_reserve_setdelay(hmp, resv);
836 	}
837 	if (--resv->refs == 0) {
838 		if (hammer_debug_general & 0x20000) {
839 			kprintf("hammer: delresvr %016jx zone %02x\n",
840 				(intmax_t)resv->zone_offset, resv->zone);
841 		}
842 		KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
843 		RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
844 		kfree(resv, hmp->m_misc);
845 		--hammer_count_reservations;
846 	}
847 }
848 
849 /*
850  * Prevent a potentially free big-block from being reused until after
851  * the related flushes have completely cycled, otherwise crash recovery
852  * could resurrect a data block that was already reused and overwritten.
853  *
854  * The caller might reset the underlying layer2 entry's append_off to 0, so
855  * our covering append_off must be set to max to prevent any reallocation
856  * until after the flush delays complete, not to mention proper invalidation
857  * of any underlying cached blocks.
858  */
859 static void
860 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
861 			int zone, struct hammer_blockmap_layer2 *layer2)
862 {
863 	hammer_reserve_t resv;
864 
865 	/*
866 	 * Allocate the reservation if necessary.
867 	 *
868 	 * NOTE: need lock in future around resv lookup/allocation and
869 	 * the setdelay call, currently refs is not bumped until the call.
870 	 */
871 again:
872 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
873 	if (resv == NULL) {
874 		resv = kmalloc(sizeof(*resv), hmp->m_misc,
875 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
876 		resv->zone = zone;
877 		resv->zone_offset = base_offset;
878 		resv->refs = 0;
879 		resv->append_off = HAMMER_LARGEBLOCK_SIZE;
880 
881 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
882 			resv->flags |= HAMMER_RESF_LAYER2FREE;
883 		if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
884 			kfree(resv, hmp->m_misc);
885 			goto again;
886 		}
887 		++hammer_count_reservations;
888 	} else {
889 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
890 			resv->flags |= HAMMER_RESF_LAYER2FREE;
891 	}
892 	hammer_reserve_setdelay(hmp, resv);
893 }
894 
895 /*
896  * Enter the reservation on the on-delay list, or move it if it
897  * is already on the list.
898  */
899 static void
900 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
901 {
902 	if (resv->flags & HAMMER_RESF_ONDELAY) {
903 		TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
904 		resv->flush_group = hmp->flusher.next + 1;
905 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
906 	} else {
907 		++resv->refs;
908 		++hmp->rsv_fromdelay;
909 		resv->flags |= HAMMER_RESF_ONDELAY;
910 		resv->flush_group = hmp->flusher.next + 1;
911 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
912 	}
913 }
914 
915 /*
916  * Reserve has reached its flush point, remove it from the delay list
917  * and finish it off.  hammer_blockmap_reserve_complete() inherits
918  * the ondelay reference.
919  */
920 void
921 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
922 {
923 	KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
924 	resv->flags &= ~HAMMER_RESF_ONDELAY;
925 	TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
926 	--hmp->rsv_fromdelay;
927 	hammer_blockmap_reserve_complete(hmp, resv);
928 }
929 
930 /*
931  * Backend function - free (offset, bytes) in a zone.
932  *
933  * XXX error return
934  */
935 void
936 hammer_blockmap_free(hammer_transaction_t trans,
937 		     hammer_off_t zone_offset, int bytes)
938 {
939 	hammer_mount_t hmp;
940 	hammer_volume_t root_volume;
941 	hammer_blockmap_t freemap;
942 	struct hammer_blockmap_layer1 *layer1;
943 	struct hammer_blockmap_layer2 *layer2;
944 	hammer_buffer_t buffer1 = NULL;
945 	hammer_buffer_t buffer2 = NULL;
946 	hammer_off_t layer1_offset;
947 	hammer_off_t layer2_offset;
948 	hammer_off_t base_off;
949 	int error;
950 	int zone;
951 
952 	if (bytes == 0)
953 		return;
954 	hmp = trans->hmp;
955 
956 	/*
957 	 * Alignment
958 	 */
959 	bytes = (bytes + 15) & ~15;
960 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
961 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
962 		  ~HAMMER_LARGEBLOCK_MASK64) == 0);
963 
964 	/*
965 	 * Basic zone validation & locking
966 	 */
967 	zone = HAMMER_ZONE_DECODE(zone_offset);
968 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
969 	root_volume = trans->rootvol;
970 	error = 0;
971 
972 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
973 
974 	/*
975 	 * Dive layer 1.
976 	 */
977 	layer1_offset = freemap->phys_offset +
978 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
979 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
980 	if (error)
981 		goto failed;
982 	KKASSERT(layer1->phys_offset &&
983 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
984 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
985 		hammer_lock_ex(&hmp->blkmap_lock);
986 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
987 			panic("CRC FAILED: LAYER1");
988 		hammer_unlock(&hmp->blkmap_lock);
989 	}
990 
991 	/*
992 	 * Dive layer 2, each entry represents a large-block.
993 	 */
994 	layer2_offset = layer1->phys_offset +
995 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
996 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
997 	if (error)
998 		goto failed;
999 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1000 		hammer_lock_ex(&hmp->blkmap_lock);
1001 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1002 			panic("CRC FAILED: LAYER2");
1003 		hammer_unlock(&hmp->blkmap_lock);
1004 	}
1005 
1006 	hammer_lock_ex(&hmp->blkmap_lock);
1007 
1008 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1009 
1010 	/*
1011 	 * Free space previously allocated via blockmap_alloc().
1012 	 *
1013 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1014 	 *	 but can never become larger than HAMMER_LARGEBLOCK_SIZE.
1015 	 */
1016 	KKASSERT(layer2->zone == zone);
1017 	layer2->bytes_free += bytes;
1018 	KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
1019 
1020 	/*
1021 	 * If a big-block becomes entirely free we must create a covering
1022 	 * reservation to prevent premature reuse.  Note, however, that
1023 	 * the big-block and/or reservation may still have an append_off
1024 	 * that allows further (non-reused) allocations.
1025 	 *
1026 	 * Once the reservation has been made we re-check layer2 and if
1027 	 * the big-block is still entirely free we reset the layer2 entry.
1028 	 * The reservation will prevent premature reuse.
1029 	 *
1030 	 * NOTE: hammer_buffer's are only invalidated when the reservation
1031 	 * is completed, if the layer2 entry is still completely free at
1032 	 * that time.  Any allocations from the reservation that may have
1033 	 * occured in the mean time, or active references on the reservation
1034 	 * from new pending allocations, will prevent the invalidation from
1035 	 * occuring.
1036 	 */
1037 	if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
1038 		base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1039 
1040 		hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1041 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
1042 			layer2->zone = 0;
1043 			layer2->append_off = 0;
1044 			hammer_modify_buffer(trans, buffer1,
1045 					     layer1, sizeof(*layer1));
1046 			++layer1->blocks_free;
1047 			layer1->layer1_crc = crc32(layer1,
1048 						   HAMMER_LAYER1_CRCSIZE);
1049 			hammer_modify_buffer_done(buffer1);
1050 			hammer_modify_volume_field(trans,
1051 					trans->rootvol,
1052 					vol0_stat_freebigblocks);
1053 			++root_volume->ondisk->vol0_stat_freebigblocks;
1054 			hmp->copy_stat_freebigblocks =
1055 			   root_volume->ondisk->vol0_stat_freebigblocks;
1056 			hammer_modify_volume_done(trans->rootvol);
1057 		}
1058 	}
1059 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1060 	hammer_modify_buffer_done(buffer2);
1061 	hammer_unlock(&hmp->blkmap_lock);
1062 
1063 failed:
1064 	if (buffer1)
1065 		hammer_rel_buffer(buffer1, 0);
1066 	if (buffer2)
1067 		hammer_rel_buffer(buffer2, 0);
1068 }
1069 
1070 int
1071 hammer_blockmap_dedup(hammer_transaction_t trans,
1072 		     hammer_off_t zone_offset, int bytes)
1073 {
1074 	hammer_mount_t hmp;
1075 	hammer_volume_t root_volume;
1076 	hammer_blockmap_t freemap;
1077 	struct hammer_blockmap_layer1 *layer1;
1078 	struct hammer_blockmap_layer2 *layer2;
1079 	hammer_buffer_t buffer1 = NULL;
1080 	hammer_buffer_t buffer2 = NULL;
1081 	hammer_off_t layer1_offset;
1082 	hammer_off_t layer2_offset;
1083 	int32_t temp;
1084 	int error;
1085 	int zone;
1086 
1087 	if (bytes == 0)
1088 		return (0);
1089 	hmp = trans->hmp;
1090 
1091 	/*
1092 	 * Alignment
1093 	 */
1094 	bytes = (bytes + 15) & ~15;
1095 	KKASSERT(bytes <= HAMMER_LARGEBLOCK_SIZE);
1096 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1097 		  ~HAMMER_LARGEBLOCK_MASK64) == 0);
1098 
1099 	/*
1100 	 * Basic zone validation & locking
1101 	 */
1102 	zone = HAMMER_ZONE_DECODE(zone_offset);
1103 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1104 	root_volume = trans->rootvol;
1105 	error = 0;
1106 
1107 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1108 
1109 	/*
1110 	 * Dive layer 1.
1111 	 */
1112 	layer1_offset = freemap->phys_offset +
1113 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1114 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1115 	if (error)
1116 		goto failed;
1117 	KKASSERT(layer1->phys_offset &&
1118 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1119 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1120 		hammer_lock_ex(&hmp->blkmap_lock);
1121 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1122 			panic("CRC FAILED: LAYER1");
1123 		hammer_unlock(&hmp->blkmap_lock);
1124 	}
1125 
1126 	/*
1127 	 * Dive layer 2, each entry represents a large-block.
1128 	 */
1129 	layer2_offset = layer1->phys_offset +
1130 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1131 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1132 	if (error)
1133 		goto failed;
1134 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1135 		hammer_lock_ex(&hmp->blkmap_lock);
1136 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1137 			panic("CRC FAILED: LAYER2");
1138 		hammer_unlock(&hmp->blkmap_lock);
1139 	}
1140 
1141 	hammer_lock_ex(&hmp->blkmap_lock);
1142 
1143 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1144 
1145 	/*
1146 	 * Free space previously allocated via blockmap_alloc().
1147 	 *
1148 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1149 	 *	 but can never become larger than HAMMER_LARGEBLOCK_SIZE.
1150 	 */
1151 	KKASSERT(layer2->zone == zone);
1152 	temp = layer2->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
1153 	cpu_ccfence(); /* prevent gcc from optimizing temp out */
1154 	if (temp > layer2->bytes_free) {
1155 		error = ERANGE;
1156 		goto underflow;
1157 	}
1158 	layer2->bytes_free -= bytes;
1159 
1160 	KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
1161 
1162 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1163 underflow:
1164 	hammer_modify_buffer_done(buffer2);
1165 	hammer_unlock(&hmp->blkmap_lock);
1166 
1167 failed:
1168 	if (buffer1)
1169 		hammer_rel_buffer(buffer1, 0);
1170 	if (buffer2)
1171 		hammer_rel_buffer(buffer2, 0);
1172 	return (error);
1173 }
1174 
1175 /*
1176  * Backend function - finalize (offset, bytes) in a zone.
1177  *
1178  * Allocate space that was previously reserved by the frontend.
1179  */
1180 int
1181 hammer_blockmap_finalize(hammer_transaction_t trans,
1182 			 hammer_reserve_t resv,
1183 			 hammer_off_t zone_offset, int bytes)
1184 {
1185 	hammer_mount_t hmp;
1186 	hammer_volume_t root_volume;
1187 	hammer_blockmap_t freemap;
1188 	struct hammer_blockmap_layer1 *layer1;
1189 	struct hammer_blockmap_layer2 *layer2;
1190 	hammer_buffer_t buffer1 = NULL;
1191 	hammer_buffer_t buffer2 = NULL;
1192 	hammer_off_t layer1_offset;
1193 	hammer_off_t layer2_offset;
1194 	int error;
1195 	int zone;
1196 	int offset;
1197 
1198 	if (bytes == 0)
1199 		return(0);
1200 	hmp = trans->hmp;
1201 
1202 	/*
1203 	 * Alignment
1204 	 */
1205 	bytes = (bytes + 15) & ~15;
1206 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
1207 
1208 	/*
1209 	 * Basic zone validation & locking
1210 	 */
1211 	zone = HAMMER_ZONE_DECODE(zone_offset);
1212 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1213 	root_volume = trans->rootvol;
1214 	error = 0;
1215 
1216 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1217 
1218 	/*
1219 	 * Dive layer 1.
1220 	 */
1221 	layer1_offset = freemap->phys_offset +
1222 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1223 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1224 	if (error)
1225 		goto failed;
1226 	KKASSERT(layer1->phys_offset &&
1227 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1228 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1229 		hammer_lock_ex(&hmp->blkmap_lock);
1230 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1231 			panic("CRC FAILED: LAYER1");
1232 		hammer_unlock(&hmp->blkmap_lock);
1233 	}
1234 
1235 	/*
1236 	 * Dive layer 2, each entry represents a large-block.
1237 	 */
1238 	layer2_offset = layer1->phys_offset +
1239 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1240 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1241 	if (error)
1242 		goto failed;
1243 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1244 		hammer_lock_ex(&hmp->blkmap_lock);
1245 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1246 			panic("CRC FAILED: LAYER2");
1247 		hammer_unlock(&hmp->blkmap_lock);
1248 	}
1249 
1250 	hammer_lock_ex(&hmp->blkmap_lock);
1251 
1252 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1253 
1254 	/*
1255 	 * Finalize some or all of the space covered by a current
1256 	 * reservation.  An allocation in the same layer may have
1257 	 * already assigned ownership.
1258 	 */
1259 	if (layer2->zone == 0) {
1260 		hammer_modify_buffer(trans, buffer1,
1261 				     layer1, sizeof(*layer1));
1262 		--layer1->blocks_free;
1263 		layer1->layer1_crc = crc32(layer1,
1264 					   HAMMER_LAYER1_CRCSIZE);
1265 		hammer_modify_buffer_done(buffer1);
1266 		layer2->zone = zone;
1267 		KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
1268 		KKASSERT(layer2->append_off == 0);
1269 		hammer_modify_volume_field(trans,
1270 				trans->rootvol,
1271 				vol0_stat_freebigblocks);
1272 		--root_volume->ondisk->vol0_stat_freebigblocks;
1273 		hmp->copy_stat_freebigblocks =
1274 		   root_volume->ondisk->vol0_stat_freebigblocks;
1275 		hammer_modify_volume_done(trans->rootvol);
1276 	}
1277 	if (layer2->zone != zone)
1278 		kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1279 	KKASSERT(layer2->zone == zone);
1280 	KKASSERT(bytes != 0);
1281 	layer2->bytes_free -= bytes;
1282 
1283 	if (resv) {
1284 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1285 	}
1286 
1287 	/*
1288 	 * Finalizations can occur out of order, or combined with allocations.
1289 	 * append_off must be set to the highest allocated offset.
1290 	 */
1291 	offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
1292 	if (layer2->append_off < offset)
1293 		layer2->append_off = offset;
1294 
1295 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1296 	hammer_modify_buffer_done(buffer2);
1297 	hammer_unlock(&hmp->blkmap_lock);
1298 
1299 failed:
1300 	if (buffer1)
1301 		hammer_rel_buffer(buffer1, 0);
1302 	if (buffer2)
1303 		hammer_rel_buffer(buffer2, 0);
1304 	return(error);
1305 }
1306 
1307 /*
1308  * Return the approximate number of free bytes in the big-block
1309  * containing the specified blockmap offset.
1310  *
1311  * WARNING: A negative number can be returned if data de-dup exists,
1312  *	    and the result will also not represent he actual number
1313  *	    of free bytes in this case.
1314  *
1315  *	    This code is used only by the reblocker.
1316  */
1317 int
1318 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1319 			int *curp, int *errorp)
1320 {
1321 	hammer_volume_t root_volume;
1322 	hammer_blockmap_t blockmap;
1323 	hammer_blockmap_t freemap;
1324 	struct hammer_blockmap_layer1 *layer1;
1325 	struct hammer_blockmap_layer2 *layer2;
1326 	hammer_buffer_t buffer = NULL;
1327 	hammer_off_t layer1_offset;
1328 	hammer_off_t layer2_offset;
1329 	int32_t bytes;
1330 	int zone;
1331 
1332 	zone = HAMMER_ZONE_DECODE(zone_offset);
1333 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1334 	root_volume = hammer_get_root_volume(hmp, errorp);
1335 	if (*errorp) {
1336 		*curp = 0;
1337 		return(0);
1338 	}
1339 	blockmap = &hmp->blockmap[zone];
1340 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1341 
1342 	/*
1343 	 * Dive layer 1.
1344 	 */
1345 	layer1_offset = freemap->phys_offset +
1346 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1347 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1348 	if (*errorp) {
1349 		bytes = 0;
1350 		goto failed;
1351 	}
1352 	KKASSERT(layer1->phys_offset);
1353 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1354 		hammer_lock_ex(&hmp->blkmap_lock);
1355 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1356 			panic("CRC FAILED: LAYER1");
1357 		hammer_unlock(&hmp->blkmap_lock);
1358 	}
1359 
1360 	/*
1361 	 * Dive layer 2, each entry represents a large-block.
1362 	 *
1363 	 * (reuse buffer, layer1 pointer becomes invalid)
1364 	 */
1365 	layer2_offset = layer1->phys_offset +
1366 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1367 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1368 	if (*errorp) {
1369 		bytes = 0;
1370 		goto failed;
1371 	}
1372 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1373 		hammer_lock_ex(&hmp->blkmap_lock);
1374 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1375 			panic("CRC FAILED: LAYER2");
1376 		hammer_unlock(&hmp->blkmap_lock);
1377 	}
1378 	KKASSERT(layer2->zone == zone);
1379 
1380 	bytes = layer2->bytes_free;
1381 
1382 	if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
1383 		*curp = 0;
1384 	else
1385 		*curp = 1;
1386 failed:
1387 	if (buffer)
1388 		hammer_rel_buffer(buffer, 0);
1389 	hammer_rel_volume(root_volume, 0);
1390 	if (hammer_debug_general & 0x0800) {
1391 		kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1392 			(long long)zone_offset, bytes);
1393 	}
1394 	return(bytes);
1395 }
1396 
1397 
1398 /*
1399  * Lookup a blockmap offset.
1400  */
1401 hammer_off_t
1402 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
1403 		       int *errorp)
1404 {
1405 	hammer_volume_t root_volume;
1406 	hammer_blockmap_t freemap;
1407 	struct hammer_blockmap_layer1 *layer1;
1408 	struct hammer_blockmap_layer2 *layer2;
1409 	hammer_buffer_t buffer = NULL;
1410 	hammer_off_t layer1_offset;
1411 	hammer_off_t layer2_offset;
1412 	hammer_off_t result_offset;
1413 	hammer_off_t base_off;
1414 	hammer_reserve_t resv;
1415 	int zone;
1416 
1417 	/*
1418 	 * Calculate the zone-2 offset.
1419 	 */
1420 	zone = HAMMER_ZONE_DECODE(zone_offset);
1421 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1422 
1423 	result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1424 			HAMMER_ZONE_RAW_BUFFER;
1425 
1426 	/*
1427 	 * We can actually stop here, normal blockmaps are now direct-mapped
1428 	 * onto the freemap and so represent zone-2 addresses.
1429 	 */
1430 	if (hammer_verify_zone == 0) {
1431 		*errorp = 0;
1432 		return(result_offset);
1433 	}
1434 
1435 	/*
1436 	 * Validate the allocation zone
1437 	 */
1438 	root_volume = hammer_get_root_volume(hmp, errorp);
1439 	if (*errorp)
1440 		return(0);
1441 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1442 	KKASSERT(freemap->phys_offset != 0);
1443 
1444 	/*
1445 	 * Dive layer 1.
1446 	 */
1447 	layer1_offset = freemap->phys_offset +
1448 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1449 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1450 	if (*errorp)
1451 		goto failed;
1452 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1453 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1454 		hammer_lock_ex(&hmp->blkmap_lock);
1455 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1456 			panic("CRC FAILED: LAYER1");
1457 		hammer_unlock(&hmp->blkmap_lock);
1458 	}
1459 
1460 	/*
1461 	 * Dive layer 2, each entry represents a large-block.
1462 	 */
1463 	layer2_offset = layer1->phys_offset +
1464 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1465 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1466 
1467 	if (*errorp)
1468 		goto failed;
1469 	if (layer2->zone == 0) {
1470 		base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1471 		resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1472 				 base_off);
1473 		KKASSERT(resv && resv->zone == zone);
1474 
1475 	} else if (layer2->zone != zone) {
1476 		panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1477 			layer2->zone, zone);
1478 	}
1479 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1480 		hammer_lock_ex(&hmp->blkmap_lock);
1481 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1482 			panic("CRC FAILED: LAYER2");
1483 		hammer_unlock(&hmp->blkmap_lock);
1484 	}
1485 
1486 failed:
1487 	if (buffer)
1488 		hammer_rel_buffer(buffer, 0);
1489 	hammer_rel_volume(root_volume, 0);
1490 	if (hammer_debug_general & 0x0800) {
1491 		kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1492 			(long long)zone_offset, (long long)result_offset);
1493 	}
1494 	return(result_offset);
1495 }
1496 
1497 
1498 /*
1499  * Check space availability
1500  *
1501  * MPSAFE - does not require fs_token
1502  */
1503 int
1504 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1505 {
1506 	const int in_size = sizeof(struct hammer_inode_data) +
1507 			    sizeof(union hammer_btree_elm);
1508 	const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1509 	int64_t usedbytes;
1510 
1511 	usedbytes = hmp->rsv_inodes * in_size +
1512 		    hmp->rsv_recs * rec_size +
1513 		    hmp->rsv_databytes +
1514 		    ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1515 		    ((int64_t)hidirtybufspace << 2) +
1516 		    (slop << HAMMER_LARGEBLOCK_BITS);
1517 
1518 	hammer_count_extra_space_used = usedbytes;	/* debugging */
1519 	if (resp)
1520 		*resp = usedbytes;
1521 
1522 	if (hmp->copy_stat_freebigblocks >=
1523 	    (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
1524 		return(0);
1525 	}
1526 	return (ENOSPC);
1527 }
1528 
1529