xref: /netbsd-src/sys/fs/nilfs/nilfs_subr.c (revision ba65fde2d7fefa7d39838fa5fa855e62bd606b5e)
1 /* $NetBSD: nilfs_subr.c,v 1.9 2012/12/20 08:03:43 hannken Exp $ */
2 
3 /*
4  * Copyright (c) 2008, 2009 Reinoud Zandijk
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  */
28 
29 #include <sys/cdefs.h>
30 #ifndef lint
31 __KERNEL_RCSID(0, "$NetBSD: nilfs_subr.c,v 1.9 2012/12/20 08:03:43 hannken Exp $");
32 #endif /* not lint */
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/namei.h>
37 #include <sys/resourcevar.h>	/* defines plimit structure in proc struct */
38 #include <sys/kernel.h>
39 #include <sys/file.h>		/* define FWRITE ... */
40 #include <sys/stat.h>
41 #include <sys/buf.h>
42 #include <sys/proc.h>
43 #include <sys/mount.h>
44 #include <sys/vnode.h>
45 #include <sys/signalvar.h>
46 #include <sys/malloc.h>
47 #include <sys/dirent.h>
48 #include <sys/lockf.h>
49 #include <sys/kauth.h>
50 #include <sys/dirhash.h>
51 
52 #include <miscfs/genfs/genfs.h>
53 #include <uvm/uvm_extern.h>
54 
55 #include <fs/nilfs/nilfs_mount.h>
56 #include "nilfs.h"
57 #include "nilfs_subr.h"
58 #include "nilfs_bswap.h"
59 
60 
61 #define VTOI(vnode) ((struct nilfs_node *) (vnode)->v_data)
62 
63 /* forwards */
64 static int nilfs_btree_lookup(struct nilfs_node *node, uint64_t lblocknr,
65 	uint64_t *vblocknr);
66 
67 /* basic calculators */
68 uint64_t nilfs_get_segnum_of_block(struct nilfs_device *nilfsdev,
69 	uint64_t blocknr)
70 {
71 	return blocknr / nilfs_rw32(nilfsdev->super.s_blocks_per_segment);
72 }
73 
74 
75 void
76 nilfs_get_segment_range(struct nilfs_device *nilfsdev, uint64_t segnum,
77         uint64_t *seg_start, uint64_t *seg_end)
78 {
79         uint64_t blks_per_seg;
80 
81         blks_per_seg = nilfs_rw64(nilfsdev->super.s_blocks_per_segment);
82         *seg_start = blks_per_seg * segnum;
83         *seg_end   = *seg_start + blks_per_seg -1;
84         if (segnum == 0)
85                 *seg_start = nilfs_rw64(nilfsdev->super.s_first_data_block);
86 }
87 
88 
89 void nilfs_calc_mdt_consts(struct nilfs_device *nilfsdev,
90 	struct nilfs_mdt *mdt, int entry_size)
91 {
92 	uint32_t blocksize = nilfsdev->blocksize;
93 
94 	mdt->entries_per_group = blocksize * 8;	   /* bits in sector */
95 	mdt->entries_per_block = blocksize / entry_size;
96 
97 	mdt->blocks_per_group  =
98 		(mdt->entries_per_group -1) / mdt->entries_per_block + 1 + 1;
99 	mdt->groups_per_desc_block =
100 		blocksize / sizeof(struct nilfs_block_group_desc);
101 	mdt->blocks_per_desc_block =
102 		mdt->groups_per_desc_block * mdt->blocks_per_group + 1;
103 }
104 
105 
106 /* from NetBSD's src/sys/net/if_ethersubr.c */
107 uint32_t
108 crc32_le(uint32_t crc, const uint8_t *buf, size_t len)
109 {
110         static const uint32_t crctab[] = {
111                 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
112                 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
113                 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
114                 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
115         };
116         size_t i;
117 
118         for (i = 0; i < len; i++) {
119                 crc ^= buf[i];
120                 crc = (crc >> 4) ^ crctab[crc & 0xf];
121                 crc = (crc >> 4) ^ crctab[crc & 0xf];
122         }
123 
124         return (crc);
125 }
126 
127 
128 static int
129 nilfs_calchash(uint64_t ino)
130 {
131 	return (int) ino;
132 }
133 
134 
135 /* dev reading */
136 static int
137 nilfs_dev_bread(struct nilfs_device *nilfsdev, uint64_t blocknr,
138 	struct kauth_cred *cred, int flags, struct buf **bpp)
139 {
140 	int blk2dev = nilfsdev->blocksize / DEV_BSIZE;
141 
142 	return bread(nilfsdev->devvp, blocknr * blk2dev, nilfsdev->blocksize,
143 		NOCRED, 0, bpp);
144 }
145 
146 
147 /* read on a node */
148 int
149 nilfs_bread(struct nilfs_node *node, uint64_t blocknr,
150 	struct kauth_cred *cred, int flags, struct buf **bpp)
151 {
152 	uint64_t vblocknr;
153 	int error;
154 
155 	error = nilfs_btree_lookup(node, blocknr, &vblocknr);
156 	if (error)
157 		return error;
158 	return bread(node->vnode, vblocknr, node->nilfsdev->blocksize,
159 		cred, flags, bpp);
160 }
161 
162 
163 /* segment-log reading */
164 int
165 nilfs_get_segment_log(struct nilfs_device *nilfsdev, uint64_t *blocknr,
166 	uint64_t *offset, struct buf **bpp, int len, void *blob)
167 {
168 	int blocksize = nilfsdev->blocksize;
169 	int error;
170 
171 	KASSERT(len <= blocksize);
172 
173 	if (*offset + len > blocksize) {
174 		*blocknr = *blocknr + 1;
175 		*offset = 0;
176 	}
177 	if (*offset == 0) {
178 		if (*bpp)
179 			brelse(*bpp, BC_AGE);
180 		/* read in block */
181 		error = nilfs_dev_bread(nilfsdev, *blocknr, NOCRED, 0, bpp);
182 		if (error)
183 			return error;
184 	}
185 	memcpy(blob, ((uint8_t *) (*bpp)->b_data) + *offset, len);
186 	*offset += len;
187 
188 	return 0;
189 }
190 
191 /* -------------------------------------------------------------------------- */
192 
193 /* btree operations */
194 
195 static int
196 nilfs_btree_lookup_level(struct nilfs_node *node, uint64_t lblocknr,
197 		uint64_t btree_vblknr, int level, uint64_t *vblocknr)
198 {
199 	struct nilfs_device *nilfsdev = node->nilfsdev;
200 	struct nilfs_btree_node *btree_hdr;
201 	struct buf *bp;
202 	uint64_t btree_blknr;
203 	uint64_t *dkeys, *dptrs, child_btree_blk;
204 	uint8_t  *pos;
205 	int i, error, selected;
206 
207 	DPRINTF(TRANSLATE, ("nilfs_btree_lookup_level ino %"PRIu64", "
208 		"lblocknr %"PRIu64", btree_vblknr %"PRIu64", level %d\n",
209 		node->ino, lblocknr, btree_vblknr, level));
210 
211 	/* translate btree_vblknr */
212 	error = nilfs_nvtop(node, 1, &btree_vblknr, &btree_blknr);
213 	if (error)
214 		return error;
215 
216 	/* get our block */
217 	error = nilfs_dev_bread(nilfsdev, btree_blknr, NOCRED, 0, &bp);
218 	if (error) {
219 		return error;
220 	}
221 
222 	btree_hdr = (struct nilfs_btree_node *) bp->b_data;
223 	pos =   (uint8_t *) bp->b_data +
224 		sizeof(struct nilfs_btree_node) +
225 		NILFS_BTREE_NODE_EXTRA_PAD_SIZE;
226 	dkeys = (uint64_t *) pos;
227 	dptrs = dkeys + NILFS_BTREE_NODE_NCHILDREN_MAX(nilfsdev->blocksize);
228 
229 	assert((btree_hdr->bn_flags & NILFS_BTREE_NODE_ROOT) == 0);
230 
231 	/* select matching child XXX could use binary search */
232 	selected = 0;
233 	for (i = 0; i < nilfs_rw16(btree_hdr->bn_nchildren); i++) {
234 		if (dkeys[i] > lblocknr)
235 			break;
236 		selected = i;
237 	}
238 
239 	if (level == 1) {
240 		/* if found it mapped */
241 		if (dkeys[selected] == lblocknr)
242 			*vblocknr = dptrs[selected];
243 		brelse(bp, BC_AGE);
244 		return 0;
245 	}
246 
247 	/* lookup in selected child */
248 	assert(dkeys[selected] <= lblocknr);
249 	child_btree_blk = dptrs[selected];
250 	brelse(bp, BC_AGE);
251 
252 	return nilfs_btree_lookup_level(node, lblocknr,
253 			child_btree_blk, btree_hdr->bn_level-1, vblocknr);
254 }
255 
256 
257 /* internal function */
258 static int
259 nilfs_btree_lookup(struct nilfs_node *node, uint64_t lblocknr,
260 		uint64_t *vblocknr)
261 {
262 	struct nilfs_inode  *inode    = &node->inode;
263 	struct nilfs_btree_node  *btree_hdr;
264 	uint64_t *dkeys, *dptrs, *dtrans;
265 	int i, selected;
266 	int error;
267 
268 	DPRINTF(TRANSLATE, ("nilfs_btree_lookup ino %"PRIu64", "
269 		"lblocknr %"PRIu64"\n", node->ino, lblocknr));
270 
271 	btree_hdr  = (struct nilfs_btree_node *) &inode->i_bmap[0];
272 	dkeys  = &inode->i_bmap[1];
273 	dptrs  = dkeys + NILFS_BTREE_ROOT_NCHILDREN_MAX;
274 	dtrans = &inode->i_bmap[1];
275 
276 	/* SMALL, direct lookup */
277 	*vblocknr = 0;
278 	if ((btree_hdr->bn_flags & NILFS_BMAP_LARGE) == 0) {
279 		if (lblocknr < NILFS_DIRECT_NBLOCKS) {
280 			*vblocknr = dtrans[lblocknr];
281 			return 0;
282 		}
283 		/* not mapped XXX could be considered error here */
284 		return 0;
285 	}
286 
287 	/* LARGE, select matching child; XXX could use binary search */
288 	dtrans = NULL;
289 	error = 0;
290 	selected = 0;
291 	for (i = 0; i < nilfs_rw16(btree_hdr->bn_nchildren); i++) {
292 		if (dkeys[i] > lblocknr)
293 			break;
294 		selected = i;
295 	}
296 
297 	/* if selected key > lblocknr, its not mapped */
298 	if (dkeys[selected] > lblocknr)
299 		return 0;
300 
301 	/* overshooting? then not mapped */
302 	if (selected == nilfs_rw16(btree_hdr->bn_nchildren))
303 		return 0;
304 
305 	/* level should be > 1 or otherwise it should be a direct one */
306 	assert(btree_hdr->bn_level > 1);
307 
308 	/* lookup in selected child */
309 	assert(dkeys[selected] <= lblocknr);
310 	error = nilfs_btree_lookup_level(node, lblocknr,
311 			dptrs[selected], btree_hdr->bn_level-1, vblocknr);
312 
313 	return error;
314 }
315 
316 
317 /* node should be locked on entry to prevent btree changes (unlikely) */
318 int
319 nilfs_btree_nlookup(struct nilfs_node *node, uint64_t from, uint64_t blks,
320 		uint64_t *l2vmap)
321 {
322 	uint64_t lblocknr, *vblocknr;
323 	int i, error;
324 
325 	/* TODO / OPTI multiple translations in one go possible */
326 	error = EINVAL;
327 	for (i = 0; i < blks; i++) {
328 		lblocknr  = from + i;
329 		vblocknr  = l2vmap + i;
330 		error = nilfs_btree_lookup(node, lblocknr, vblocknr);
331 
332 		DPRINTF(TRANSLATE, ("btree_nlookup ino %"PRIu64", "
333 			"lblocknr %"PRIu64" -> %"PRIu64"\n",
334 			node->ino, lblocknr, *vblocknr));
335 		if (error)
336 			break;
337 	}
338 
339 	return error;
340 }
341 
342 /* --------------------------------------------------------------------- */
343 
344 /* vtop operations */
345 
346 /* translate index to a file block number and an entry */
347 static void
348 nilfs_mdt_trans(struct nilfs_mdt *mdt, uint64_t index,
349 	uint64_t *blocknr, uint32_t *entry_in_block)
350 {
351 	uint64_t blknr;
352 	uint64_t group, group_offset, blocknr_in_group;
353 	uint64_t desc_block, desc_offset;
354 
355 	/* calculate our offset in the file */
356 	group             = index / mdt->entries_per_group;
357 	group_offset      = index % mdt->entries_per_group;
358 	desc_block        = group / mdt->groups_per_desc_block;
359 	desc_offset       = group % mdt->groups_per_desc_block;
360 	blocknr_in_group  = group_offset / mdt->entries_per_block;
361 
362 	/* to descgroup offset */
363 	blknr = 1 + desc_block * mdt->blocks_per_desc_block;
364 
365 	/* to group offset */
366 	blknr += desc_offset * mdt->blocks_per_group;
367 
368 	/* to actual file block */
369 	blknr += 1 + blocknr_in_group;
370 
371 	*blocknr        = blknr;
372 	*entry_in_block = group_offset % mdt->entries_per_block;
373 }
374 
375 
376 static int
377 nilfs_vtop(struct nilfs_device *nilfsdev, uint64_t vblocknr, uint64_t *pblocknr)
378 {
379 	struct nilfs_dat_entry *entry;
380 	struct buf *bp;
381 	uint64_t  ldatblknr;
382 	uint32_t  entry_in_block;
383 	int error;
384 
385 	nilfs_mdt_trans(&nilfsdev->dat_mdt, vblocknr,
386 		&ldatblknr, &entry_in_block);
387 
388 	error = nilfs_bread(nilfsdev->dat_node, ldatblknr, NOCRED, 0, &bp);
389 	if (error) {
390 		printf("vtop: can't read in DAT block %"PRIu64"!\n", ldatblknr);
391 		return error;
392 	}
393 
394 	/* get our translation */
395 	entry = ((struct nilfs_dat_entry *) bp->b_data) + entry_in_block;
396 #if 0
397 	printf("\tvblk %4"PRIu64" -> %"PRIu64" for "
398 		"checkpoint %"PRIu64" to %"PRIu64"\n",
399 		vblocknr,
400 		nilfs_rw64(entry->de_blocknr),
401 		nilfs_rw64(entry->de_start),
402 		nilfs_rw64(entry->de_end));
403 #endif
404 
405 	*pblocknr = nilfs_rw64(entry->de_blocknr);
406 	brelse(bp, BC_AGE);
407 
408 	return 0;
409 }
410 
411 
412 int
413 nilfs_nvtop(struct nilfs_node *node, uint64_t blks, uint64_t *l2vmap,
414 		uint64_t *v2pmap)
415 {
416 	uint64_t vblocknr, *pblocknr;
417 	int i, error;
418 
419 	/* the DAT inode is the only one not mapped virtual */
420 	if (node->ino == NILFS_DAT_INO) {
421 		memcpy(v2pmap, l2vmap, blks * sizeof(uint64_t));
422 		return 0;
423 	}
424 
425 	/* TODO / OPTI more translations in one go */
426 	error = 0;
427 	for (i = 0; i < blks; i++) {
428 		vblocknr  = l2vmap[i];
429 		pblocknr  = v2pmap + i;
430 		*pblocknr = 0;
431 
432 		/* only translate valid vblocknrs */
433 		if (vblocknr == 0)
434 			continue;
435 		error = nilfs_vtop(node->nilfsdev, vblocknr, pblocknr);
436 		if (error)
437 			break;
438 	}
439 
440 	return error;
441 }
442 
443 /* --------------------------------------------------------------------- */
444 
445 struct nilfs_recover_info {
446 	uint64_t segnum;
447 	uint64_t pseg;
448 
449 	struct nilfs_segment_summary segsum;
450 	struct nilfs_super_root      super_root;
451 	STAILQ_ENTRY(nilfs_recover_info) next;
452 };
453 
454 
455 /*
456  * Helper functions of nilfs_mount() that actually mounts the disc.
457  */
458 static int
459 nilfs_load_segsum(struct nilfs_device *nilfsdev,
460 	struct nilfs_recover_info *ri)
461 {
462 	struct buf *bp;
463 	uint64_t blocknr, offset;
464 	uint32_t segsum_struct_size;
465 	uint32_t magic;
466 	int error;
467 
468 	segsum_struct_size = sizeof(struct nilfs_segment_summary);
469 
470 	/* read in segsum structure */
471 	bp      = NULL;
472 	blocknr = ri->pseg;
473 	offset  = 0;
474 	error = nilfs_get_segment_log(nilfsdev,
475 			&blocknr, &offset, &bp,
476 			segsum_struct_size, (void *) &ri->segsum);
477 	if (error)
478 		goto out;
479 
480 	/* sanity checks */
481 	magic = nilfs_rw32(ri->segsum.ss_magic);
482 	if (magic != NILFS_SEGSUM_MAGIC) {
483 		DPRINTF(VOLUMES, ("nilfs: bad magic in pseg %"PRIu64"\n",
484 			ri->pseg));
485 		error = EINVAL;
486 		goto out;
487 	}
488 
489 	/* TODO check segment summary checksum */
490 	/* TODO check data checksum */
491 
492 out:
493 	if (bp)
494 		brelse(bp, BC_AGE);
495 
496 	return error;
497 }
498 
499 
500 static int
501 nilfs_load_super_root(struct nilfs_device *nilfsdev,
502 	struct nilfs_recover_info *ri)
503 {
504 	struct nilfs_segment_summary *segsum = &ri->segsum;
505 	struct nilfs_super_root *super_root;
506 	struct buf *bp;
507 	uint64_t blocknr, offset;
508 	uint32_t segsum_size, size;
509 	uint32_t nsumblk, nfileblk;
510 	uint32_t super_root_crc, comp_crc;
511 	int off, error;
512 
513 	/* process segment summary */
514 	segsum_size = nilfs_rw32(segsum->ss_sumbytes);
515 	nsumblk     = (segsum_size - 1) / nilfsdev->blocksize + 1;
516 	nfileblk    = nilfs_rw32(segsum->ss_nblocks) - nsumblk;
517 
518 	/* check if there is a superroot */
519 	if ((nilfs_rw16(segsum->ss_flags) & NILFS_SS_SR) == 0) {
520 		DPRINTF(VOLUMES, ("nilfs: no super root in pseg %"PRIu64"\n",
521 			ri->pseg));
522 		return ENOENT;
523 	}
524 
525 	/* get our super root, located at the end of the pseg */
526 	blocknr = ri->pseg + nsumblk + nfileblk - 1;
527 	offset = 0;
528 	size = sizeof(struct nilfs_super_root);
529 	bp = NULL;
530 	error = nilfs_get_segment_log(nilfsdev,
531 			&blocknr, &offset, &bp,
532 			size, (void *) &nilfsdev->super_root);
533 	if (bp)
534 		brelse(bp, BC_AGE);
535 	if (error) {
536 		printf("read in of superroot failed\n");
537 		return EIO;
538 	}
539 
540 	/* check super root crc */
541 	super_root = &nilfsdev->super_root;
542 	super_root_crc = nilfs_rw32(super_root->sr_sum);
543 	off = sizeof(super_root->sr_sum);
544 	comp_crc = crc32_le(nilfs_rw32(nilfsdev->super.s_crc_seed),
545 		(uint8_t *) super_root + off,
546 		NILFS_SR_BYTES - off);
547 	if (super_root_crc != comp_crc) {
548 		DPRINTF(VOLUMES, ("    invalid superroot, likely from old format\n"));
549 		return EINVAL;
550 	}
551 
552 	DPRINTF(VOLUMES, ("    got valid superroot\n"));
553 
554 	return 0;
555 }
556 
557 /*
558  * Search for the last super root recorded.
559  */
560 void
561 nilfs_search_super_root(struct nilfs_device *nilfsdev)
562 {
563 	struct nilfs_super_block *super;
564 	struct nilfs_segment_summary *segsum;
565 	struct nilfs_recover_info *ri, *ori, *i_ri;
566 	STAILQ_HEAD(,nilfs_recover_info) ri_list;
567 	uint64_t seg_start, seg_end, cno;
568 	uint32_t segsum_size;
569 	uint32_t nsumblk, nfileblk;
570 	int error;
571 
572 	STAILQ_INIT(&ri_list);
573 
574 	/* search for last super root */
575 	ri = malloc(sizeof(struct nilfs_recover_info), M_NILFSTEMP, M_WAITOK);
576 	memset(ri, 0, sizeof(struct nilfs_recover_info));
577 
578 	/* if enabled, start from the specified position */
579 	if (0) {
580 		/* start from set start */
581 		nilfsdev->super.s_last_pseg = nilfsdev->super.s_first_data_block;
582 		nilfsdev->super.s_last_cno  = nilfs_rw64(1);
583 	}
584 
585 	ri->pseg   = nilfs_rw64(nilfsdev->super.s_last_pseg); /* blknr */
586 	ri->segnum = nilfs_get_segnum_of_block(nilfsdev, ri->pseg);
587 
588 	error = 0;
589 	cno = nilfs_rw64(nilfsdev->super.s_last_cno);
590 	DPRINTF(VOLUMES, ("nilfs: seach_super_root start in pseg %"PRIu64"\n",
591 			ri->pseg));
592 	for (;;) {
593 		DPRINTF(VOLUMES, (" at pseg %"PRIu64"\n", ri->pseg));
594 		error = nilfs_load_segsum(nilfsdev, ri);
595 		if (error)
596 			break;
597 
598 		segsum = &ri->segsum;
599 
600 		/* try to load super root */
601 		if (nilfs_rw16(segsum->ss_flags) & NILFS_SS_SR) {
602 			DPRINTF(VOLUMES, (" try super root\n"));
603 			error = nilfs_load_super_root(nilfsdev, ri);
604 			if (error)
605 				break;	/* confused */
606 			/* wipe current list of ri */
607 			while (!STAILQ_EMPTY(&ri_list)) {
608 				i_ri = STAILQ_FIRST(&ri_list);
609 				STAILQ_REMOVE_HEAD(&ri_list, next);
610 				free(i_ri, M_NILFSTEMP);
611 			}
612 			super = &nilfsdev->super;
613 
614 			super->s_last_pseg = nilfs_rw64(ri->pseg);
615 			super->s_last_cno  = cno++;
616 			super->s_last_seq  = segsum->ss_seq;
617 			super->s_state     = nilfs_rw16(NILFS_VALID_FS);
618 		} else {
619 			STAILQ_INSERT_TAIL(&ri_list, ri, next);
620 			ori = ri;
621 			ri = malloc(sizeof(struct nilfs_recover_info),
622 				M_NILFSTEMP, M_WAITOK);
623 			memset(ri, 0, sizeof(struct nilfs_recover_info));
624 			ri->segnum = ori->segnum;
625 			ri->pseg   = ori->pseg;
626 			/* segsum keeps pointing to the `old' ri */
627 		}
628 
629 		/* continue to the next pseg */
630 		segsum_size = nilfs_rw32(segsum->ss_sumbytes);
631 		nsumblk     = (segsum_size - 1) / nilfsdev->blocksize + 1;
632 		nfileblk    = nilfs_rw32(segsum->ss_nblocks) - nsumblk;
633 
634 		/* calculate next partial segment location */
635 		ri->pseg += nsumblk + nfileblk;
636 
637 		/* did we reach the end of the segment? if so, go to the next */
638 		nilfs_get_segment_range(nilfsdev, ri->segnum, &seg_start, &seg_end);
639 		if (ri->pseg >= seg_end)
640 			ri->pseg = nilfs_rw64(segsum->ss_next);
641 		ri->segnum = nilfs_get_segnum_of_block(nilfsdev, ri->pseg);
642 	}
643 
644 	/*
645 	 * XXX No roll-forward yet of the remaining partial segments.
646 	 */
647 
648 	/* wipe current list of ri */
649 	while (!STAILQ_EMPTY(&ri_list)) {
650 		i_ri = STAILQ_FIRST(&ri_list);
651 		STAILQ_REMOVE_HEAD(&ri_list, next);
652 		printf("nilfs: ignoring pseg at %"PRIu64"\n", i_ri->pseg);
653 		free(i_ri, M_NILFSTEMP);
654 	}
655 	free(ri, M_NILFSTEMP);
656 }
657 
658 /* --------------------------------------------------------------------- */
659 
660 /*
661  * Genfs interfacing
662  *
663  * static const struct genfs_ops nilfs_genfsops = {
664  * 	.gop_size = genfs_size,
665  * 		size of transfers
666  * 	.gop_alloc = nilfs_gop_alloc,
667  * 		allocate len bytes at offset
668  * 	.gop_write = genfs_gop_write,
669  * 		putpages interface code
670  * 	.gop_markupdate = nilfs_gop_markupdate,
671  * 		set update/modify flags etc.
672  * }
673  */
674 
675 /*
676  * Callback from genfs to allocate len bytes at offset off; only called when
677  * filling up gaps in the allocation.
678  */
679 static int
680 nilfs_gop_alloc(struct vnode *vp, off_t off,
681     off_t len, int flags, kauth_cred_t cred)
682 {
683 	DPRINTF(NOTIMPL, ("nilfs_gop_alloc not implemented\n"));
684 	DPRINTF(ALLOC, ("nilfs_gop_alloc called for %"PRIu64" bytes\n", len));
685 
686 	return 0;
687 }
688 
689 
690 /*
691  * callback from genfs to update our flags
692  */
693 static void
694 nilfs_gop_markupdate(struct vnode *vp, int flags)
695 {
696 	struct nilfs_node *nilfs_node = VTOI(vp);
697 	u_long mask = 0;
698 
699 	if ((flags & GOP_UPDATE_ACCESSED) != 0) {
700 		mask = IN_ACCESS;
701 	}
702 	if ((flags & GOP_UPDATE_MODIFIED) != 0) {
703 		if (vp->v_type == VREG) {
704 			mask |= IN_CHANGE | IN_UPDATE;
705 		} else {
706 			mask |= IN_MODIFY;
707 		}
708 	}
709 	if (mask) {
710 		nilfs_node->i_flags |= mask;
711 	}
712 }
713 
714 
715 static const struct genfs_ops nilfs_genfsops = {
716 	.gop_size = genfs_size,
717 	.gop_alloc = nilfs_gop_alloc,
718 	.gop_write = genfs_gop_write_rwmap,
719 	.gop_markupdate = nilfs_gop_markupdate,
720 };
721 
722 /* --------------------------------------------------------------------- */
723 
724 static void
725 nilfs_register_node(struct nilfs_node *node)
726 {
727 	struct nilfs_mount *ump;
728 	struct nilfs_node *chk;
729 	uint32_t hashline;
730 
731 	ump = node->ump;
732 	mutex_enter(&ump->ihash_lock);
733 
734 	/* add to our hash table */
735 	hashline = nilfs_calchash(node->ino) & NILFS_INODE_HASHMASK;
736 #ifdef DEBUG
737 	LIST_FOREACH(chk, &ump->nilfs_nodes[hashline], hashchain) {
738 		assert(chk);
739 		if (chk->ino == node->ino)
740 			panic("Double node entered\n");
741 	}
742 #else
743 	chk = NULL;
744 #endif
745 	LIST_INSERT_HEAD(&ump->nilfs_nodes[hashline], node, hashchain);
746 
747 	mutex_exit(&ump->ihash_lock);
748 }
749 
750 
751 static void
752 nilfs_deregister_node(struct nilfs_node *node)
753 {
754 	struct nilfs_mount *ump;
755 
756 	ump = node->ump;
757 	mutex_enter(&ump->ihash_lock);
758 
759 	/* remove from hash list */
760 	LIST_REMOVE(node, hashchain);
761 
762 	mutex_exit(&ump->ihash_lock);
763 }
764 
765 
766 static struct nilfs_node *
767 nilfs_hash_lookup(struct nilfs_mount *ump, ino_t ino)
768 {
769 	struct nilfs_node *node;
770 	struct vnode *vp;
771 	uint32_t hashline;
772 
773 loop:
774 	mutex_enter(&ump->ihash_lock);
775 
776 	/* search our hash table */
777 	hashline = nilfs_calchash(ino) & NILFS_INODE_HASHMASK;
778 	LIST_FOREACH(node, &ump->nilfs_nodes[hashline], hashchain) {
779 		assert(node);
780 		if (node->ino == ino) {
781 			vp = node->vnode;
782 			assert(vp);
783 			mutex_enter(vp->v_interlock);
784 			mutex_exit(&ump->ihash_lock);
785 			if (vget(vp, LK_EXCLUSIVE))
786 				goto loop;
787 			return node;
788 		}
789 	}
790 	mutex_exit(&ump->ihash_lock);
791 
792 	return NULL;
793 }
794 
795 
796 /* node action implementators */
797 extern int (**nilfs_vnodeop_p)(void *);
798 
799 int
800 nilfs_get_node_raw(struct nilfs_device *nilfsdev, struct nilfs_mount *ump,
801 	uint64_t ino, struct nilfs_inode *inode, struct nilfs_node **nodep)
802 {
803 	struct nilfs_node *node;
804 	struct vnode *nvp;
805 	struct mount *mp;
806 	int (**vnodeops)(void *);
807 	int error;
808 
809 	*nodep = NULL;
810 	vnodeops = nilfs_vnodeop_p;
811 
812 	/* associate with mountpoint if present*/
813 	mp = ump? ump->vfs_mountp : NULL;
814 	error = getnewvnode(VT_NILFS, mp, vnodeops, NULL, &nvp);
815 	if (error)
816 		return error;
817 
818 	/* lock node */
819 	error = vn_lock(nvp, LK_EXCLUSIVE | LK_RETRY);
820 	if (error) {
821 		nvp->v_data = NULL;
822 		ungetnewvnode(nvp);
823 		return error;
824 	}
825 
826 	node = pool_get(&nilfs_node_pool, PR_WAITOK);
827 	memset(node, 0, sizeof(struct nilfs_node));
828 
829 	/* crosslink */
830 	node->vnode    = nvp;
831 	node->ump      = ump;
832 	node->nilfsdev = nilfsdev;
833 	nvp->v_data    = node;
834 
835 	/* initiase nilfs node */
836 	node->ino   = ino;
837 	node->inode = *inode;
838 	node->lockf = NULL;
839 
840 	/* needed? */
841 	mutex_init(&node->node_mutex, MUTEX_DEFAULT, IPL_NONE);
842 	cv_init(&node->node_lock, "nilfsnlk");
843 
844 	/* initialise genfs */
845 	genfs_node_init(nvp, &nilfs_genfsops);
846 
847 	/* check if we're fetching the root */
848 	if (ino == NILFS_ROOT_INO)
849 		nvp->v_vflag |= VV_ROOT;
850 
851 	/* update vnode's file type XXX is there a function for this? */
852 	nvp->v_type = VREG;
853 	if (S_ISDIR(inode->i_mode))
854 		nvp->v_type = VDIR;
855 	if (S_ISLNK(inode->i_mode))
856 		nvp->v_type = VLNK;
857 #if 0
858 	if (S_ISCHR(inode->i_mode))
859 		nvp->v_type = VCHR;
860 	if (S_ISBLK(inode->i_mode))
861 		nvp->v_type = VBLK;
862 #endif
863 	/* XXX what else? */
864 
865 	/* fixup inode size for system nodes */
866 	if ((ino < NILFS_USER_INO) && (ino != NILFS_ROOT_INO)) {
867 		DPRINTF(VOLUMES, ("NEED TO GET my size for inode %"PRIu64"\n",
868 			ino));
869 		/* for now set it to maximum, -1 is illegal */
870 		inode->i_size = nilfs_rw64(((uint64_t) -2));
871 	}
872 
873 	uvm_vnp_setsize(nvp, nilfs_rw64(inode->i_size));
874 
875 	if (ump)
876 		nilfs_register_node(node);
877 
878 	/* return node */
879 	*nodep = node;
880 	return 0;
881 }
882 
883 
884 int
885 nilfs_get_node(struct nilfs_mount *ump, uint64_t ino, struct nilfs_node **nodep)
886 {
887 	struct nilfs_device *nilfsdev;
888 	struct nilfs_inode   inode, *entry;
889 	struct buf *bp;
890 	uint64_t ivblocknr;
891 	uint32_t entry_in_block;
892 	int error;
893 
894 	/* lookup node in hash table */
895 	*nodep = nilfs_hash_lookup(ump, ino);
896 	if (*nodep)
897 		return 0;
898 
899 	/* lock to disallow simultanious creation of same udf_node */
900 	mutex_enter(&ump->get_node_lock);
901 
902 	/* relookup since it could be created while waiting for the mutex */
903 	*nodep = nilfs_hash_lookup(ump, ino);
904 	if (*nodep) {
905 		mutex_exit(&ump->get_node_lock);
906 		return 0;
907 	}
908 
909 	/* create new inode; XXX check could be handier */
910 	if ((ino < NILFS_ATIME_INO) && (ino != NILFS_ROOT_INO)) {
911 		printf("nilfs_get_node: system ino %"PRIu64" not in mount "
912 			"point!\n", ino);
913 		mutex_exit(&ump->get_node_lock);
914 		return ENOENT;
915 	}
916 
917 	/* lookup inode in the ifile */
918 	DPRINTF(NODE, ("lookup ino %"PRIu64"\n", ino));
919 
920 	/* lookup inode structure in mountpoints ifile */
921 	nilfsdev = ump->nilfsdev;
922 	nilfs_mdt_trans(&nilfsdev->ifile_mdt, ino, &ivblocknr, &entry_in_block);
923 
924 	error = nilfs_bread(ump->ifile_node, ivblocknr, NOCRED, 0, &bp);
925 	if (error) {
926 		mutex_exit(&ump->get_node_lock);
927 		return ENOENT;
928 	}
929 
930 	/* get inode entry */
931 	entry =  (struct nilfs_inode *) bp->b_data + entry_in_block;
932 	inode = *entry;
933 	brelse(bp, BC_AGE);
934 
935 	/* get node */
936 	error = nilfs_get_node_raw(ump->nilfsdev, ump, ino, &inode, nodep);
937 	mutex_exit(&ump->get_node_lock);
938 
939 	return error;
940 }
941 
942 
943 void
944 nilfs_dispose_node(struct nilfs_node **nodep)
945 {
946 	struct vnode *vp;
947 	struct nilfs_node *node;
948 
949 	/* protect against rogue values */
950 	if (!*nodep)
951 		return;
952 
953 	node = *nodep;
954 	vp = node->vnode;
955 
956 	/* remove dirhash if present */
957 	dirhash_purge(&node->dir_hash);
958 
959 	/* remove from our hash lookup table */
960 	if (node->ump)
961 		nilfs_deregister_node(node);
962 
963 	/* destroy our locks */
964 	mutex_destroy(&node->node_mutex);
965 	cv_destroy(&node->node_lock);
966 
967 	/* dissociate from our vnode */
968 	genfs_node_destroy(node->vnode);
969 	vp->v_data = NULL;
970 
971 	/* free our associated memory */
972 	pool_put(&nilfs_node_pool, node);
973 
974 	*nodep = NULL;
975 }
976 
977 
978 void
979 nilfs_itimes(struct nilfs_node *node, struct timespec *acc,
980 	struct timespec *mod, struct timespec *birth)
981 {
982 }
983 
984 
985 int
986 nilfs_update(struct vnode *node, struct timespec *acc,
987 	struct timespec *mod, struct timespec *birth, int updflags)
988 {
989 	return EROFS;
990 }
991 
992 
993 int
994 nilfs_chsize(struct vnode *vp, u_quad_t newsize, kauth_cred_t cred)
995 {
996 	return EROFS;
997 }
998 
999 
1000 
1001 int
1002 nilfs_grow_node(struct nilfs_node *node, uint64_t new_size)
1003 {
1004 	return EROFS;
1005 }
1006 
1007 
1008 int
1009 nilfs_shrink_node(struct nilfs_node *node, uint64_t new_size)
1010 {
1011 	return EROFS;
1012 }
1013 
1014 
1015 static int
1016 dirhash_fill(struct nilfs_node *dir_node)
1017 {
1018 	struct vnode *dvp = dir_node->vnode;
1019 	struct dirhash *dirh;
1020 	struct nilfs_dir_entry *ndirent;
1021 	struct dirent dirent;
1022 	struct buf *bp;
1023 	uint64_t file_size, diroffset, blkoff;
1024 	uint64_t blocknr;
1025 	uint32_t blocksize = dir_node->nilfsdev->blocksize;
1026 	uint8_t *pos, name_len;
1027 	int error;
1028 
1029 	DPRINTF(CALL, ("dirhash_fill called\n"));
1030 
1031 	if (dvp->v_type != VDIR)
1032 		return ENOTDIR;
1033 
1034 	/* make sure we have a dirhash to work on */
1035 	dirh = dir_node->dir_hash;
1036 	KASSERT(dirh);
1037 	KASSERT(dirh->refcnt > 0);
1038 
1039 	if (dirh->flags & DIRH_BROKEN)
1040 		return EIO;
1041 
1042 	if (dirh->flags & DIRH_COMPLETE)
1043 		return 0;
1044 
1045 	DPRINTF(DIRHASH, ("Filling directory hash\n"));
1046 
1047 	/* make sure we have a clean dirhash to add to */
1048 	dirhash_purge_entries(dirh);
1049 
1050 	/* get directory filesize */
1051 	file_size = nilfs_rw64(dir_node->inode.i_size);
1052 
1053 	/* walk the directory */
1054 	error = 0;
1055 	diroffset = 0;
1056 
1057 	blocknr = diroffset / blocksize;
1058 	blkoff  = diroffset % blocksize;
1059 	error = nilfs_bread(dir_node, blocknr, NOCRED, 0, &bp);
1060 	if (error) {
1061 		dirh->flags |= DIRH_BROKEN;
1062 		dirhash_purge_entries(dirh);
1063 		return EIO;
1064 	}
1065 	while (diroffset < file_size) {
1066 		DPRINTF(READDIR, ("filldir : offset = %"PRIu64"\n",
1067 			diroffset));
1068 		if (blkoff >= blocksize) {
1069 			blkoff = 0; blocknr++;
1070 			brelse(bp, BC_AGE);
1071 			error = nilfs_bread(dir_node, blocknr, NOCRED, 0,
1072 					&bp);
1073 			if (error) {
1074 				dirh->flags |= DIRH_BROKEN;
1075 				dirhash_purge_entries(dirh);
1076 				return EIO;
1077 			}
1078 		}
1079 
1080 		/* read in one dirent */
1081 		pos = (uint8_t *) bp->b_data + blkoff;
1082 		ndirent = (struct nilfs_dir_entry *) pos;
1083 		name_len = ndirent->name_len;
1084 
1085 		memset(&dirent, 0, sizeof(struct dirent));
1086 		dirent.d_fileno = nilfs_rw64(ndirent->inode);
1087 		dirent.d_type   = ndirent->file_type;	/* 1:1 ? */
1088 		dirent.d_namlen = name_len;
1089 		strncpy(dirent.d_name, ndirent->name, name_len);
1090 		dirent.d_reclen = _DIRENT_SIZE(&dirent);
1091 		DPRINTF(DIRHASH, ("copying `%*.*s`\n", name_len,
1092 			name_len, dirent.d_name));
1093 
1094 		/* XXX is it deleted? extra free space? */
1095 		dirhash_enter(dirh, &dirent, diroffset,
1096 			nilfs_rw16(ndirent->rec_len), 0);
1097 
1098 		/* advance */
1099 		diroffset += nilfs_rw16(ndirent->rec_len);
1100 		blkoff    += nilfs_rw16(ndirent->rec_len);
1101 	}
1102 	brelse(bp, BC_AGE);
1103 
1104 	dirh->flags |= DIRH_COMPLETE;
1105 
1106 	return 0;
1107 }
1108 
1109 
1110 int
1111 nilfs_lookup_name_in_dir(struct vnode *dvp, const char *name, int namelen,
1112 		uint64_t *ino, int *found)
1113 {
1114 	struct nilfs_node	*dir_node = VTOI(dvp);
1115 	struct nilfs_dir_entry *ndirent;
1116 	struct dirhash		*dirh;
1117 	struct dirhash_entry	*dirh_ep;
1118 	struct buf *bp;
1119 	uint64_t diroffset, blkoff;
1120 	uint64_t blocknr;
1121 	uint32_t blocksize = dir_node->nilfsdev->blocksize;
1122 	uint8_t *pos;
1123 	int hit, error;
1124 
1125 	/* set default return */
1126 	*found = 0;
1127 
1128 	/* get our dirhash and make sure its read in */
1129 	dirhash_get(&dir_node->dir_hash);
1130 	error = dirhash_fill(dir_node);
1131 	if (error) {
1132 		dirhash_put(dir_node->dir_hash);
1133 		return error;
1134 	}
1135 	dirh = dir_node->dir_hash;
1136 
1137 	/* allocate temporary space for fid */
1138 
1139 	DPRINTF(DIRHASH, ("dirhash_lookup looking for `%*.*s`\n",
1140 		namelen, namelen, name));
1141 
1142 	/* search our dirhash hits */
1143 	*ino = 0;
1144 	dirh_ep = NULL;
1145 	for (;;) {
1146 		hit = dirhash_lookup(dirh, name, namelen, &dirh_ep);
1147 		/* if no hit, abort the search */
1148 		if (!hit)
1149 			break;
1150 
1151 		/* check this hit */
1152 		diroffset = dirh_ep->offset;
1153 
1154 		blocknr = diroffset / blocksize;
1155 		blkoff  = diroffset % blocksize;
1156 		error = nilfs_bread(dir_node, blocknr, NOCRED, 0, &bp);
1157 		if (error)
1158 			return EIO;
1159 
1160 		/* read in one dirent */
1161 		pos = (uint8_t *) bp->b_data + blkoff;
1162 		ndirent = (struct nilfs_dir_entry *) pos;
1163 
1164 		DPRINTF(DIRHASH, ("dirhash_lookup\tchecking `%*.*s`\n",
1165 			ndirent->name_len, ndirent->name_len, ndirent->name));
1166 
1167 		/* see if its our entry */
1168 		KASSERT(ndirent->name_len == namelen);
1169 		if (strncmp(ndirent->name, name, namelen) == 0) {
1170 			*found = 1;
1171 			*ino = nilfs_rw64(ndirent->inode);
1172 			brelse(bp, BC_AGE);
1173 			break;
1174 		}
1175 		brelse(bp, BC_AGE);
1176 	}
1177 
1178 	dirhash_put(dir_node->dir_hash);
1179 
1180 	return error;
1181 }
1182 
1183 
1184 int
1185 nilfs_dir_detach(struct nilfs_mount *ump, struct nilfs_node *dir_node, struct nilfs_node *node, struct componentname *cnp)
1186 {
1187 	return EROFS;
1188 }
1189 
1190 
1191 int
1192 nilfs_dir_attach(struct nilfs_mount *ump, struct nilfs_node *dir_node, struct nilfs_node *node, struct vattr *vap, struct componentname *cnp)
1193 {
1194 	return EROFS;
1195 }
1196 
1197 
1198 /* XXX return vnode? */
1199 int
1200 nilfs_create_node(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, struct componentname *cnp)
1201 {
1202 	return EROFS;
1203 }
1204 
1205 
1206 void
1207 nilfs_delete_node(struct nilfs_node *node)
1208 {
1209 }
1210 
1211 
1212