xref: /netbsd-src/sys/fs/nilfs/nilfs_subr.c (revision 7330f729ccf0bd976a06f95fad452fe774fc7fd1)
1 /* $NetBSD: nilfs_subr.c,v 1.14 2015/03/29 14:12:28 riastradh Exp $ */
2 
3 /*
4  * Copyright (c) 2008, 2009 Reinoud Zandijk
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  */
28 
29 #include <sys/cdefs.h>
30 #ifndef lint
31 __KERNEL_RCSID(0, "$NetBSD: nilfs_subr.c,v 1.14 2015/03/29 14:12:28 riastradh Exp $");
32 #endif /* not lint */
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/namei.h>
37 #include <sys/resourcevar.h>	/* defines plimit structure in proc struct */
38 #include <sys/kernel.h>
39 #include <sys/file.h>		/* define FWRITE ... */
40 #include <sys/stat.h>
41 #include <sys/buf.h>
42 #include <sys/proc.h>
43 #include <sys/mount.h>
44 #include <sys/vnode.h>
45 #include <sys/signalvar.h>
46 #include <sys/malloc.h>
47 #include <sys/dirent.h>
48 #include <sys/lockf.h>
49 #include <sys/kauth.h>
50 #include <sys/dirhash.h>
51 
52 #include <miscfs/genfs/genfs.h>
53 #include <uvm/uvm_extern.h>
54 
55 #include <fs/nilfs/nilfs_mount.h>
56 #include "nilfs.h"
57 #include "nilfs_subr.h"
58 #include "nilfs_bswap.h"
59 
60 
61 #define VTOI(vnode) ((struct nilfs_node *) (vnode)->v_data)
62 
63 /* forwards */
64 static int nilfs_btree_lookup(struct nilfs_node *node, uint64_t lblocknr,
65 	uint64_t *vblocknr);
66 
67 /* basic calculators */
68 uint64_t nilfs_get_segnum_of_block(struct nilfs_device *nilfsdev,
69 	uint64_t blocknr)
70 {
71 	return blocknr / nilfs_rw32(nilfsdev->super.s_blocks_per_segment);
72 }
73 
74 
75 void
76 nilfs_get_segment_range(struct nilfs_device *nilfsdev, uint64_t segnum,
77         uint64_t *seg_start, uint64_t *seg_end)
78 {
79         uint64_t blks_per_seg;
80 
81         blks_per_seg = nilfs_rw64(nilfsdev->super.s_blocks_per_segment);
82         *seg_start = blks_per_seg * segnum;
83         *seg_end   = *seg_start + blks_per_seg -1;
84         if (segnum == 0)
85                 *seg_start = nilfs_rw64(nilfsdev->super.s_first_data_block);
86 }
87 
88 
89 void nilfs_calc_mdt_consts(struct nilfs_device *nilfsdev,
90 	struct nilfs_mdt *mdt, int entry_size)
91 {
92 	uint32_t blocksize = nilfsdev->blocksize;
93 
94 	mdt->entries_per_group = blocksize * 8;	   /* bits in sector */
95 	mdt->entries_per_block = blocksize / entry_size;
96 
97 	mdt->blocks_per_group  =
98 		(mdt->entries_per_group -1) / mdt->entries_per_block + 1 + 1;
99 	mdt->groups_per_desc_block =
100 		blocksize / sizeof(struct nilfs_block_group_desc);
101 	mdt->blocks_per_desc_block =
102 		mdt->groups_per_desc_block * mdt->blocks_per_group + 1;
103 }
104 
105 
106 /* from NetBSD's src/sys/net/if_ethersubr.c */
107 uint32_t
108 crc32_le(uint32_t crc, const uint8_t *buf, size_t len)
109 {
110         static const uint32_t crctab[] = {
111                 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
112                 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
113                 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
114                 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
115         };
116         size_t i;
117 
118         for (i = 0; i < len; i++) {
119                 crc ^= buf[i];
120                 crc = (crc >> 4) ^ crctab[crc & 0xf];
121                 crc = (crc >> 4) ^ crctab[crc & 0xf];
122         }
123 
124         return (crc);
125 }
126 
127 
128 /* dev reading */
129 static int
130 nilfs_dev_bread(struct nilfs_device *nilfsdev, uint64_t blocknr,
131 	int flags, struct buf **bpp)
132 {
133 	int blk2dev = nilfsdev->blocksize / DEV_BSIZE;
134 
135 	return bread(nilfsdev->devvp, blocknr * blk2dev, nilfsdev->blocksize,
136 		0, bpp);
137 }
138 
139 
140 /* read on a node */
141 int
142 nilfs_bread(struct nilfs_node *node, uint64_t blocknr,
143 	int flags, struct buf **bpp)
144 {
145 	struct nilfs_device *nilfsdev = node->nilfsdev;
146 	uint64_t vblocknr, pblockno;
147 	int error;
148 
149 	error = nilfs_btree_lookup(node, blocknr, &vblocknr);
150 	if (error)
151 		return error;
152 
153 	/* Read special files through devvp as they have no vnode attached. */
154 	if (node->ino < NILFS_USER_INO && node->ino != NILFS_ROOT_INO) {
155 		error = nilfs_nvtop(node, 1, &vblocknr, &pblockno);
156 		if (error)
157 			return error;
158 		return nilfs_dev_bread(nilfsdev, pblockno, flags, bpp);
159 	}
160 
161 	return bread(node->vnode, vblocknr, node->nilfsdev->blocksize,
162 		flags, bpp);
163 }
164 
165 
166 /* segment-log reading */
167 int
168 nilfs_get_segment_log(struct nilfs_device *nilfsdev, uint64_t *blocknr,
169 	uint64_t *offset, struct buf **bpp, int len, void *blob)
170 {
171 	int blocksize = nilfsdev->blocksize;
172 	int error;
173 
174 	KASSERT(len <= blocksize);
175 
176 	if (*offset + len > blocksize) {
177 		*blocknr = *blocknr + 1;
178 		*offset = 0;
179 	}
180 	if (*offset == 0) {
181 		if (*bpp)
182 			brelse(*bpp, BC_AGE);
183 		/* read in block */
184 		error = nilfs_dev_bread(nilfsdev, *blocknr, 0, bpp);
185 		if (error)
186 			return error;
187 	}
188 	memcpy(blob, ((uint8_t *) (*bpp)->b_data) + *offset, len);
189 	*offset += len;
190 
191 	return 0;
192 }
193 
194 /* -------------------------------------------------------------------------- */
195 
196 /* btree operations */
197 
198 static int
199 nilfs_btree_lookup_level(struct nilfs_node *node, uint64_t lblocknr,
200 		uint64_t btree_vblknr, int level, uint64_t *vblocknr)
201 {
202 	struct nilfs_device *nilfsdev = node->nilfsdev;
203 	struct nilfs_btree_node *btree_hdr;
204 	struct buf *bp;
205 	uint64_t btree_blknr;
206 	uint64_t *dkeys, *dptrs, child_btree_blk;
207 	uint8_t  *pos;
208 	int i, error, selected;
209 
210 	DPRINTF(TRANSLATE, ("nilfs_btree_lookup_level ino %"PRIu64", "
211 		"lblocknr %"PRIu64", btree_vblknr %"PRIu64", level %d\n",
212 		node->ino, lblocknr, btree_vblknr, level));
213 
214 	/* translate btree_vblknr */
215 	error = nilfs_nvtop(node, 1, &btree_vblknr, &btree_blknr);
216 	if (error)
217 		return error;
218 
219 	/* get our block */
220 	error = nilfs_dev_bread(nilfsdev, btree_blknr, 0, &bp);
221 	if (error) {
222 		return error;
223 	}
224 
225 	btree_hdr = (struct nilfs_btree_node *) bp->b_data;
226 	pos =   (uint8_t *) bp->b_data +
227 		sizeof(struct nilfs_btree_node) +
228 		NILFS_BTREE_NODE_EXTRA_PAD_SIZE;
229 	dkeys = (uint64_t *) pos;
230 	dptrs = dkeys + NILFS_BTREE_NODE_NCHILDREN_MAX(nilfsdev->blocksize);
231 
232 	assert((btree_hdr->bn_flags & NILFS_BTREE_NODE_ROOT) == 0);
233 
234 	/* select matching child XXX could use binary search */
235 	selected = 0;
236 	for (i = 0; i < nilfs_rw16(btree_hdr->bn_nchildren); i++) {
237 		if (dkeys[i] > lblocknr)
238 			break;
239 		selected = i;
240 	}
241 
242 	if (level == 1) {
243 		/* if found it mapped */
244 		if (dkeys[selected] == lblocknr)
245 			*vblocknr = dptrs[selected];
246 		brelse(bp, BC_AGE);
247 		return 0;
248 	}
249 
250 	/* lookup in selected child */
251 	assert(dkeys[selected] <= lblocknr);
252 	child_btree_blk = dptrs[selected];
253 	brelse(bp, BC_AGE);
254 
255 	return nilfs_btree_lookup_level(node, lblocknr,
256 			child_btree_blk, btree_hdr->bn_level-1, vblocknr);
257 }
258 
259 
260 /* internal function */
261 static int
262 nilfs_btree_lookup(struct nilfs_node *node, uint64_t lblocknr,
263 		uint64_t *vblocknr)
264 {
265 	struct nilfs_inode  *inode    = &node->inode;
266 	struct nilfs_btree_node  *btree_hdr;
267 	uint64_t *dkeys, *dptrs, *dtrans;
268 	int i, selected;
269 	int error;
270 
271 	DPRINTF(TRANSLATE, ("nilfs_btree_lookup ino %"PRIu64", "
272 		"lblocknr %"PRIu64"\n", node->ino, lblocknr));
273 
274 	btree_hdr  = (struct nilfs_btree_node *) &inode->i_bmap[0];
275 	dkeys  = &inode->i_bmap[1];
276 	dptrs  = dkeys + NILFS_BTREE_ROOT_NCHILDREN_MAX;
277 	dtrans = &inode->i_bmap[1];
278 
279 	/* SMALL, direct lookup */
280 	*vblocknr = 0;
281 	if ((btree_hdr->bn_flags & NILFS_BMAP_LARGE) == 0) {
282 		if (lblocknr < NILFS_DIRECT_NBLOCKS) {
283 			*vblocknr = dtrans[lblocknr];
284 			return 0;
285 		}
286 		/* not mapped XXX could be considered error here */
287 		return 0;
288 	}
289 
290 	/* LARGE, select matching child; XXX could use binary search */
291 	dtrans = NULL;
292 	error = 0;
293 	selected = 0;
294 	for (i = 0; i < nilfs_rw16(btree_hdr->bn_nchildren); i++) {
295 		if (dkeys[i] > lblocknr)
296 			break;
297 		selected = i;
298 	}
299 
300 	/* if selected key > lblocknr, its not mapped */
301 	if (dkeys[selected] > lblocknr)
302 		return 0;
303 
304 	/* overshooting? then not mapped */
305 	if (selected == nilfs_rw16(btree_hdr->bn_nchildren))
306 		return 0;
307 
308 	/* level should be > 1 or otherwise it should be a direct one */
309 	assert(btree_hdr->bn_level > 1);
310 
311 	/* lookup in selected child */
312 	assert(dkeys[selected] <= lblocknr);
313 	error = nilfs_btree_lookup_level(node, lblocknr,
314 			dptrs[selected], btree_hdr->bn_level-1, vblocknr);
315 
316 	return error;
317 }
318 
319 
320 /* node should be locked on entry to prevent btree changes (unlikely) */
321 int
322 nilfs_btree_nlookup(struct nilfs_node *node, uint64_t from, uint64_t blks,
323 		uint64_t *l2vmap)
324 {
325 	uint64_t lblocknr, *vblocknr;
326 	int i, error;
327 
328 	/* TODO / OPTI multiple translations in one go possible */
329 	error = EINVAL;
330 	for (i = 0; i < blks; i++) {
331 		lblocknr  = from + i;
332 		vblocknr  = l2vmap + i;
333 		error = nilfs_btree_lookup(node, lblocknr, vblocknr);
334 
335 		DPRINTF(TRANSLATE, ("btree_nlookup ino %"PRIu64", "
336 			"lblocknr %"PRIu64" -> %"PRIu64"\n",
337 			node->ino, lblocknr, *vblocknr));
338 		if (error)
339 			break;
340 	}
341 
342 	return error;
343 }
344 
345 /* --------------------------------------------------------------------- */
346 
347 /* vtop operations */
348 
349 /* translate index to a file block number and an entry */
350 void
351 nilfs_mdt_trans(struct nilfs_mdt *mdt, uint64_t index,
352 	uint64_t *blocknr, uint32_t *entry_in_block)
353 {
354 	uint64_t blknr;
355 	uint64_t group, group_offset, blocknr_in_group;
356 	uint64_t desc_block, desc_offset;
357 
358 	/* calculate our offset in the file */
359 	group             = index / mdt->entries_per_group;
360 	group_offset      = index % mdt->entries_per_group;
361 	desc_block        = group / mdt->groups_per_desc_block;
362 	desc_offset       = group % mdt->groups_per_desc_block;
363 	blocknr_in_group  = group_offset / mdt->entries_per_block;
364 
365 	/* to descgroup offset */
366 	blknr = 1 + desc_block * mdt->blocks_per_desc_block;
367 
368 	/* to group offset */
369 	blknr += desc_offset * mdt->blocks_per_group;
370 
371 	/* to actual file block */
372 	blknr += 1 + blocknr_in_group;
373 
374 	*blocknr        = blknr;
375 	*entry_in_block = group_offset % mdt->entries_per_block;
376 }
377 
378 
379 static int
380 nilfs_vtop(struct nilfs_device *nilfsdev, uint64_t vblocknr, uint64_t *pblocknr)
381 {
382 	struct nilfs_dat_entry *entry;
383 	struct buf *bp;
384 	uint64_t  ldatblknr;
385 	uint32_t  entry_in_block;
386 	int error;
387 
388 	nilfs_mdt_trans(&nilfsdev->dat_mdt, vblocknr,
389 		&ldatblknr, &entry_in_block);
390 
391 	error = nilfs_bread(nilfsdev->dat_node, ldatblknr, 0, &bp);
392 	if (error) {
393 		printf("vtop: can't read in DAT block %"PRIu64"!\n", ldatblknr);
394 		return error;
395 	}
396 
397 	/* get our translation */
398 	entry = ((struct nilfs_dat_entry *) bp->b_data) + entry_in_block;
399 #if 0
400 	printf("\tvblk %4"PRIu64" -> %"PRIu64" for "
401 		"checkpoint %"PRIu64" to %"PRIu64"\n",
402 		vblocknr,
403 		nilfs_rw64(entry->de_blocknr),
404 		nilfs_rw64(entry->de_start),
405 		nilfs_rw64(entry->de_end));
406 #endif
407 
408 	*pblocknr = nilfs_rw64(entry->de_blocknr);
409 	brelse(bp, BC_AGE);
410 
411 	return 0;
412 }
413 
414 
415 int
416 nilfs_nvtop(struct nilfs_node *node, uint64_t blks, uint64_t *l2vmap,
417 		uint64_t *v2pmap)
418 {
419 	uint64_t vblocknr, *pblocknr;
420 	int i, error;
421 
422 	/* the DAT inode is the only one not mapped virtual */
423 	if (node->ino == NILFS_DAT_INO) {
424 		memcpy(v2pmap, l2vmap, blks * sizeof(uint64_t));
425 		return 0;
426 	}
427 
428 	/* TODO / OPTI more translations in one go */
429 	error = 0;
430 	for (i = 0; i < blks; i++) {
431 		vblocknr  = l2vmap[i];
432 		pblocknr  = v2pmap + i;
433 		*pblocknr = 0;
434 
435 		/* only translate valid vblocknrs */
436 		if (vblocknr == 0)
437 			continue;
438 		error = nilfs_vtop(node->nilfsdev, vblocknr, pblocknr);
439 		if (error)
440 			break;
441 	}
442 
443 	return error;
444 }
445 
446 /* --------------------------------------------------------------------- */
447 
448 struct nilfs_recover_info {
449 	uint64_t segnum;
450 	uint64_t pseg;
451 
452 	struct nilfs_segment_summary segsum;
453 	struct nilfs_super_root      super_root;
454 	STAILQ_ENTRY(nilfs_recover_info) next;
455 };
456 
457 
458 /*
459  * Helper functions of nilfs_mount() that actually mounts the disc.
460  */
461 static int
462 nilfs_load_segsum(struct nilfs_device *nilfsdev,
463 	struct nilfs_recover_info *ri)
464 {
465 	struct buf *bp;
466 	uint64_t blocknr, offset;
467 	uint32_t segsum_struct_size;
468 	uint32_t magic;
469 	int error;
470 
471 	segsum_struct_size = sizeof(struct nilfs_segment_summary);
472 
473 	/* read in segsum structure */
474 	bp      = NULL;
475 	blocknr = ri->pseg;
476 	offset  = 0;
477 	error = nilfs_get_segment_log(nilfsdev,
478 			&blocknr, &offset, &bp,
479 			segsum_struct_size, (void *) &ri->segsum);
480 	if (error)
481 		goto out;
482 
483 	/* sanity checks */
484 	magic = nilfs_rw32(ri->segsum.ss_magic);
485 	if (magic != NILFS_SEGSUM_MAGIC) {
486 		DPRINTF(VOLUMES, ("nilfs: bad magic in pseg %"PRIu64"\n",
487 			ri->pseg));
488 		error = EINVAL;
489 		goto out;
490 	}
491 
492 	/* TODO check segment summary checksum */
493 	/* TODO check data checksum */
494 
495 out:
496 	if (bp)
497 		brelse(bp, BC_AGE);
498 
499 	return error;
500 }
501 
502 
503 static int
504 nilfs_load_super_root(struct nilfs_device *nilfsdev,
505 	struct nilfs_recover_info *ri)
506 {
507 	struct nilfs_segment_summary *segsum = &ri->segsum;
508 	struct nilfs_super_root *super_root;
509 	struct buf *bp;
510 	uint64_t blocknr, offset;
511 	uint32_t segsum_size, size;
512 	uint32_t nsumblk, nfileblk;
513 	uint32_t super_root_crc, comp_crc;
514 	int off, error;
515 
516 	/* process segment summary */
517 	segsum_size = nilfs_rw32(segsum->ss_sumbytes);
518 	nsumblk     = (segsum_size - 1) / nilfsdev->blocksize + 1;
519 	nfileblk    = nilfs_rw32(segsum->ss_nblocks) - nsumblk;
520 
521 	/* check if there is a superroot */
522 	if ((nilfs_rw16(segsum->ss_flags) & NILFS_SS_SR) == 0) {
523 		DPRINTF(VOLUMES, ("nilfs: no super root in pseg %"PRIu64"\n",
524 			ri->pseg));
525 		return ENOENT;
526 	}
527 
528 	/* get our super root, located at the end of the pseg */
529 	blocknr = ri->pseg + nsumblk + nfileblk - 1;
530 	offset = 0;
531 	size = sizeof(struct nilfs_super_root);
532 	bp = NULL;
533 	error = nilfs_get_segment_log(nilfsdev,
534 			&blocknr, &offset, &bp,
535 			size, (void *) &nilfsdev->super_root);
536 	if (bp)
537 		brelse(bp, BC_AGE);
538 	if (error) {
539 		printf("read in of superroot failed\n");
540 		return EIO;
541 	}
542 
543 	/* check super root crc */
544 	super_root = &nilfsdev->super_root;
545 	super_root_crc = nilfs_rw32(super_root->sr_sum);
546 	off = sizeof(super_root->sr_sum);
547 	comp_crc = crc32_le(nilfs_rw32(nilfsdev->super.s_crc_seed),
548 		(uint8_t *) super_root + off,
549 		NILFS_SR_BYTES - off);
550 	if (super_root_crc != comp_crc) {
551 		DPRINTF(VOLUMES, ("    invalid superroot, likely from old format\n"));
552 		return EINVAL;
553 	}
554 
555 	DPRINTF(VOLUMES, ("    got valid superroot\n"));
556 
557 	return 0;
558 }
559 
560 /*
561  * Search for the last super root recorded.
562  */
563 void
564 nilfs_search_super_root(struct nilfs_device *nilfsdev)
565 {
566 	struct nilfs_super_block *super;
567 	struct nilfs_segment_summary *segsum;
568 	struct nilfs_recover_info *ri, *ori, *i_ri;
569 	STAILQ_HEAD(,nilfs_recover_info) ri_list;
570 	uint64_t seg_start, seg_end, cno;
571 	uint32_t segsum_size;
572 	uint32_t nsumblk, nfileblk;
573 	int error;
574 
575 	STAILQ_INIT(&ri_list);
576 
577 	/* search for last super root */
578 	ri = malloc(sizeof(struct nilfs_recover_info), M_NILFSTEMP, M_WAITOK);
579 	memset(ri, 0, sizeof(struct nilfs_recover_info));
580 
581 	/* if enabled, start from the specified position */
582 	if (0) {
583 		/* start from set start */
584 		nilfsdev->super.s_last_pseg = nilfsdev->super.s_first_data_block;
585 		nilfsdev->super.s_last_cno  = nilfs_rw64(1);
586 	}
587 
588 	ri->pseg   = nilfs_rw64(nilfsdev->super.s_last_pseg); /* blknr */
589 	ri->segnum = nilfs_get_segnum_of_block(nilfsdev, ri->pseg);
590 
591 	error = 0;
592 	cno = nilfs_rw64(nilfsdev->super.s_last_cno);
593 	DPRINTF(VOLUMES, ("nilfs: seach_super_root start in pseg %"PRIu64"\n",
594 			ri->pseg));
595 	for (;;) {
596 		DPRINTF(VOLUMES, (" at pseg %"PRIu64"\n", ri->pseg));
597 		error = nilfs_load_segsum(nilfsdev, ri);
598 		if (error)
599 			break;
600 
601 		segsum = &ri->segsum;
602 
603 		/* try to load super root */
604 		if (nilfs_rw16(segsum->ss_flags) & NILFS_SS_SR) {
605 			DPRINTF(VOLUMES, (" try super root\n"));
606 			error = nilfs_load_super_root(nilfsdev, ri);
607 			if (error)
608 				break;	/* confused */
609 			/* wipe current list of ri */
610 			while (!STAILQ_EMPTY(&ri_list)) {
611 				i_ri = STAILQ_FIRST(&ri_list);
612 				STAILQ_REMOVE_HEAD(&ri_list, next);
613 				free(i_ri, M_NILFSTEMP);
614 			}
615 			super = &nilfsdev->super;
616 
617 			super->s_last_pseg = nilfs_rw64(ri->pseg);
618 			super->s_last_cno  = cno++;
619 			super->s_last_seq  = segsum->ss_seq;
620 			super->s_state     = nilfs_rw16(NILFS_VALID_FS);
621 		} else {
622 			STAILQ_INSERT_TAIL(&ri_list, ri, next);
623 			ori = ri;
624 			ri = malloc(sizeof(struct nilfs_recover_info),
625 				M_NILFSTEMP, M_WAITOK);
626 			memset(ri, 0, sizeof(struct nilfs_recover_info));
627 			ri->segnum = ori->segnum;
628 			ri->pseg   = ori->pseg;
629 			/* segsum keeps pointing to the `old' ri */
630 		}
631 
632 		/* continue to the next pseg */
633 		segsum_size = nilfs_rw32(segsum->ss_sumbytes);
634 		nsumblk     = (segsum_size - 1) / nilfsdev->blocksize + 1;
635 		nfileblk    = nilfs_rw32(segsum->ss_nblocks) - nsumblk;
636 
637 		/* calculate next partial segment location */
638 		ri->pseg += nsumblk + nfileblk;
639 
640 		/* did we reach the end of the segment? if so, go to the next */
641 		nilfs_get_segment_range(nilfsdev, ri->segnum, &seg_start, &seg_end);
642 		if (ri->pseg >= seg_end)
643 			ri->pseg = nilfs_rw64(segsum->ss_next);
644 		ri->segnum = nilfs_get_segnum_of_block(nilfsdev, ri->pseg);
645 	}
646 
647 	/*
648 	 * XXX No roll-forward yet of the remaining partial segments.
649 	 */
650 
651 	/* wipe current list of ri */
652 	while (!STAILQ_EMPTY(&ri_list)) {
653 		i_ri = STAILQ_FIRST(&ri_list);
654 		STAILQ_REMOVE_HEAD(&ri_list, next);
655 		printf("nilfs: ignoring pseg at %"PRIu64"\n", i_ri->pseg);
656 		free(i_ri, M_NILFSTEMP);
657 	}
658 	free(ri, M_NILFSTEMP);
659 }
660 
661 /* --------------------------------------------------------------------- */
662 
663 int
664 nilfs_get_node_raw(struct nilfs_device *nilfsdev, struct nilfs_mount *ump,
665 	uint64_t ino, struct nilfs_inode *inode, struct nilfs_node **nodep)
666 {
667 	struct nilfs_node *node;
668 
669 	*nodep = NULL;
670 
671 	node = pool_get(&nilfs_node_pool, PR_WAITOK);
672 	memset(node, 0, sizeof(struct nilfs_node));
673 
674 	/* crosslink */
675 	node->ump      = ump;
676 	node->nilfsdev = nilfsdev;
677 
678 	/* initiase nilfs node */
679 	node->ino   = ino;
680 	node->inode = *inode;
681 	node->lockf = NULL;
682 
683 	/* initialise locks */
684 	mutex_init(&node->node_mutex, MUTEX_DEFAULT, IPL_NONE);
685 	cv_init(&node->node_lock, "nilfsnlk");
686 
687 	/* fixup inode size for system nodes */
688 	if ((ino < NILFS_USER_INO) && (ino != NILFS_ROOT_INO)) {
689 		DPRINTF(VOLUMES, ("NEED TO GET my size for inode %"PRIu64"\n",
690 			ino));
691 		/* for now set it to maximum, -1 is illegal */
692 		inode->i_size = nilfs_rw64(((uint64_t) -2));
693 	}
694 
695 	/* return node */
696 	*nodep = node;
697 	return 0;
698 }
699 
700 void
701 nilfs_dispose_node(struct nilfs_node **nodep)
702 {
703 	struct nilfs_node *node;
704 
705 	/* protect against rogue values */
706 	if (!*nodep)
707 		return;
708 
709 	node = *nodep;
710 
711 	/* remove dirhash if present */
712 	dirhash_purge(&node->dir_hash);
713 
714 	/* destroy our locks */
715 	mutex_destroy(&node->node_mutex);
716 	cv_destroy(&node->node_lock);
717 
718 	/* free our associated memory */
719 	pool_put(&nilfs_node_pool, node);
720 
721 	*nodep = NULL;
722 }
723 
724 
725 void
726 nilfs_itimes(struct nilfs_node *node, struct timespec *acc,
727 	struct timespec *mod, struct timespec *birth)
728 {
729 }
730 
731 
732 int
733 nilfs_update(struct vnode *node, struct timespec *acc,
734 	struct timespec *mod, struct timespec *birth, int updflags)
735 {
736 	return EROFS;
737 }
738 
739 
740 int
741 nilfs_chsize(struct vnode *vp, u_quad_t newsize, kauth_cred_t cred)
742 {
743 	return EROFS;
744 }
745 
746 
747 
748 int
749 nilfs_grow_node(struct nilfs_node *node, uint64_t new_size)
750 {
751 	return EROFS;
752 }
753 
754 
755 int
756 nilfs_shrink_node(struct nilfs_node *node, uint64_t new_size)
757 {
758 	return EROFS;
759 }
760 
761 
762 static int
763 dirhash_fill(struct nilfs_node *dir_node)
764 {
765 	struct vnode *dvp = dir_node->vnode;
766 	struct dirhash *dirh;
767 	struct nilfs_dir_entry *ndirent;
768 	struct dirent dirent;
769 	struct buf *bp;
770 	uint64_t file_size, diroffset, blkoff;
771 	uint64_t blocknr;
772 	uint32_t blocksize = dir_node->nilfsdev->blocksize;
773 	uint8_t *pos, name_len;
774 	int error;
775 
776 	DPRINTF(CALL, ("dirhash_fill called\n"));
777 
778 	if (dvp->v_type != VDIR)
779 		return ENOTDIR;
780 
781 	/* make sure we have a dirhash to work on */
782 	dirh = dir_node->dir_hash;
783 	KASSERT(dirh);
784 	KASSERT(dirh->refcnt > 0);
785 
786 	if (dirh->flags & DIRH_BROKEN)
787 		return EIO;
788 
789 	if (dirh->flags & DIRH_COMPLETE)
790 		return 0;
791 
792 	DPRINTF(DIRHASH, ("Filling directory hash\n"));
793 
794 	/* make sure we have a clean dirhash to add to */
795 	dirhash_purge_entries(dirh);
796 
797 	/* get directory filesize */
798 	file_size = nilfs_rw64(dir_node->inode.i_size);
799 
800 	/* walk the directory */
801 	error = 0;
802 	diroffset = 0;
803 
804 	blocknr = diroffset / blocksize;
805 	blkoff  = diroffset % blocksize;
806 	error = nilfs_bread(dir_node, blocknr, 0, &bp);
807 	if (error) {
808 		dirh->flags |= DIRH_BROKEN;
809 		dirhash_purge_entries(dirh);
810 		return EIO;
811 	}
812 	while (diroffset < file_size) {
813 		DPRINTF(READDIR, ("filldir : offset = %"PRIu64"\n",
814 			diroffset));
815 		if (blkoff >= blocksize) {
816 			blkoff = 0; blocknr++;
817 			brelse(bp, BC_AGE);
818 			error = nilfs_bread(dir_node, blocknr, 0, &bp);
819 			if (error) {
820 				dirh->flags |= DIRH_BROKEN;
821 				dirhash_purge_entries(dirh);
822 				return EIO;
823 			}
824 		}
825 
826 		/* read in one dirent */
827 		pos = (uint8_t *) bp->b_data + blkoff;
828 		ndirent = (struct nilfs_dir_entry *) pos;
829 		name_len = ndirent->name_len;
830 
831 		memset(&dirent, 0, sizeof(struct dirent));
832 		dirent.d_fileno = nilfs_rw64(ndirent->inode);
833 		dirent.d_type   = ndirent->file_type;	/* 1:1 ? */
834 		dirent.d_namlen = name_len;
835 		strncpy(dirent.d_name, ndirent->name, name_len);
836 		dirent.d_reclen = _DIRENT_SIZE(&dirent);
837 		DPRINTF(DIRHASH, ("copying `%*.*s`\n", name_len,
838 			name_len, dirent.d_name));
839 
840 		/* XXX is it deleted? extra free space? */
841 		dirhash_enter(dirh, &dirent, diroffset,
842 			nilfs_rw16(ndirent->rec_len), 0);
843 
844 		/* advance */
845 		diroffset += nilfs_rw16(ndirent->rec_len);
846 		blkoff    += nilfs_rw16(ndirent->rec_len);
847 	}
848 	brelse(bp, BC_AGE);
849 
850 	dirh->flags |= DIRH_COMPLETE;
851 
852 	return 0;
853 }
854 
855 
856 int
857 nilfs_lookup_name_in_dir(struct vnode *dvp, const char *name, int namelen,
858 		uint64_t *ino, int *found)
859 {
860 	struct nilfs_node	*dir_node = VTOI(dvp);
861 	struct nilfs_dir_entry *ndirent;
862 	struct dirhash		*dirh;
863 	struct dirhash_entry	*dirh_ep;
864 	struct buf *bp;
865 	uint64_t diroffset, blkoff;
866 	uint64_t blocknr;
867 	uint32_t blocksize = dir_node->nilfsdev->blocksize;
868 	uint8_t *pos;
869 	int hit, error;
870 
871 	/* set default return */
872 	*found = 0;
873 
874 	/* get our dirhash and make sure its read in */
875 	dirhash_get(&dir_node->dir_hash);
876 	error = dirhash_fill(dir_node);
877 	if (error) {
878 		dirhash_put(dir_node->dir_hash);
879 		return error;
880 	}
881 	dirh = dir_node->dir_hash;
882 
883 	/* allocate temporary space for fid */
884 
885 	DPRINTF(DIRHASH, ("dirhash_lookup looking for `%*.*s`\n",
886 		namelen, namelen, name));
887 
888 	/* search our dirhash hits */
889 	*ino = 0;
890 	dirh_ep = NULL;
891 	for (;;) {
892 		hit = dirhash_lookup(dirh, name, namelen, &dirh_ep);
893 		/* if no hit, abort the search */
894 		if (!hit)
895 			break;
896 
897 		/* check this hit */
898 		diroffset = dirh_ep->offset;
899 
900 		blocknr = diroffset / blocksize;
901 		blkoff  = diroffset % blocksize;
902 		error = nilfs_bread(dir_node, blocknr, 0, &bp);
903 		if (error)
904 			return EIO;
905 
906 		/* read in one dirent */
907 		pos = (uint8_t *) bp->b_data + blkoff;
908 		ndirent = (struct nilfs_dir_entry *) pos;
909 
910 		DPRINTF(DIRHASH, ("dirhash_lookup\tchecking `%*.*s`\n",
911 			ndirent->name_len, ndirent->name_len, ndirent->name));
912 
913 		/* see if its our entry */
914 		KASSERT(ndirent->name_len == namelen);
915 		if (strncmp(ndirent->name, name, namelen) == 0) {
916 			*found = 1;
917 			*ino = nilfs_rw64(ndirent->inode);
918 			brelse(bp, BC_AGE);
919 			break;
920 		}
921 		brelse(bp, BC_AGE);
922 	}
923 
924 	dirhash_put(dir_node->dir_hash);
925 
926 	return error;
927 }
928 
929 
930 int
931 nilfs_dir_detach(struct nilfs_mount *ump, struct nilfs_node *dir_node, struct nilfs_node *node, struct componentname *cnp)
932 {
933 	return EROFS;
934 }
935 
936 
937 int
938 nilfs_dir_attach(struct nilfs_mount *ump, struct nilfs_node *dir_node, struct nilfs_node *node, struct vattr *vap, struct componentname *cnp)
939 {
940 	return EROFS;
941 }
942 
943 
944 /* XXX return vnode? */
945 int
946 nilfs_create_node(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, struct componentname *cnp)
947 {
948 	return EROFS;
949 }
950 
951 
952 void
953 nilfs_delete_node(struct nilfs_node *node)
954 {
955 }
956 
957 
958