xref: /netbsd-src/sys/ufs/chfs/chfs_readinode.c (revision 7788a0781fe6ff2cce37368b4578a7ade0850cb1)
1 /*	$NetBSD: chfs_readinode.c,v 1.7 2012/12/01 11:31:01 mbalmer Exp $	*/
2 
3 /*-
4  * Copyright (c) 2010 Department of Software Engineering,
5  *		      University of Szeged, Hungary
6  * Copyright (C) 2010 David Tengeri <dtengeri@inf.u-szeged.hu>
7  * Copyright (C) 2010 Tamas Toth <ttoth@inf.u-szeged.hu>
8  * Copyright (C) 2010 Adam Hoka <ahoka@NetBSD.org>
9  * All rights reserved.
10  *
11  * This code is derived from software contributed to The NetBSD Foundation
12  * by the Department of Software Engineering, University of Szeged, Hungary
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/buf.h>
37 
38 #include "chfs.h"
39 
40 /* tmp node operations */
41 int chfs_check_td_data(struct chfs_mount *,
42     struct chfs_tmp_dnode *);
43 int chfs_check_td_node(struct chfs_mount *,
44     struct chfs_tmp_dnode *);
45 struct chfs_node_ref *chfs_first_valid_data_ref(struct chfs_node_ref *);
46 int chfs_add_tmp_dnode_to_tree(struct chfs_mount *,
47     struct chfs_readinode_info *,
48     struct chfs_tmp_dnode *);
49 void chfs_add_tmp_dnode_to_tdi(struct chfs_tmp_dnode_info *,
50 	struct chfs_tmp_dnode *);
51 void chfs_remove_tmp_dnode_from_tdi(struct chfs_tmp_dnode_info *,
52 	struct chfs_tmp_dnode *);
53 static void chfs_kill_td(struct chfs_mount *,
54     struct chfs_tmp_dnode *);
55 static void chfs_kill_tdi(struct chfs_mount *,
56     struct chfs_tmp_dnode_info *);
57 /* frag node operations */
58 struct chfs_node_frag *new_fragment(struct chfs_full_dnode *,
59     uint32_t,
60     uint32_t);
61 int no_overlapping_node(struct rb_tree *, struct chfs_node_frag *,
62     struct chfs_node_frag *, uint32_t);
63 int chfs_add_frag_to_fragtree(struct chfs_mount *,
64     struct rb_tree *,
65     struct chfs_node_frag *);
66 void chfs_obsolete_node_frag(struct chfs_mount *,
67     struct chfs_node_frag *);
68 /* general node operations */
69 int chfs_get_data_nodes(struct chfs_mount *,
70     struct chfs_inode *,
71     struct chfs_readinode_info *);
72 int chfs_build_fragtree(struct chfs_mount *,
73     struct chfs_inode *,
74     struct chfs_readinode_info *);
75 
76 
77 
78 /* tmp node rbtree operations */
79 static signed int
80 tmp_node_compare_nodes(void *ctx, const void *n1, const void *n2)
81 {
82 	const struct chfs_tmp_dnode_info *tdi1 = n1;
83 	const struct chfs_tmp_dnode_info *tdi2 = n2;
84 
85 	return (tdi1->tmpnode->node->ofs - tdi2->tmpnode->node->ofs);
86 }
87 
88 static signed int
89 tmp_node_compare_key(void *ctx, const void *n, const void *key)
90 {
91 	const struct chfs_tmp_dnode_info *tdi = n;
92 	uint64_t ofs =  *(const uint64_t *)key;
93 
94 	return (tdi->tmpnode->node->ofs - ofs);
95 }
96 
97 const rb_tree_ops_t tmp_node_rbtree_ops = {
98 	.rbto_compare_nodes = tmp_node_compare_nodes,
99 	.rbto_compare_key = tmp_node_compare_key,
100 	.rbto_node_offset = offsetof(struct chfs_tmp_dnode_info, rb_node),
101 	.rbto_context = NULL
102 };
103 
104 
105 /* frag node rbtree operations */
106 static signed int
107 frag_compare_nodes(void *ctx, const void *n1, const void *n2)
108 {
109 	const struct chfs_node_frag *frag1 = n1;
110 	const struct chfs_node_frag *frag2 = n2;
111 
112 	return (frag1->ofs - frag2->ofs);
113 }
114 
115 static signed int
116 frag_compare_key(void *ctx, const void *n, const void *key)
117 {
118 	const struct chfs_node_frag *frag = n;
119 	uint64_t ofs = *(const uint64_t *)key;
120 
121 	return (frag->ofs - ofs);
122 }
123 
124 const rb_tree_ops_t frag_rbtree_ops = {
125 	.rbto_compare_nodes = frag_compare_nodes,
126 	.rbto_compare_key   = frag_compare_key,
127 	.rbto_node_offset = offsetof(struct chfs_node_frag, rb_node),
128 	.rbto_context = NULL
129 };
130 
131 
132 /*
133  * chfs_check_td_data - checks the data CRC of the node
134  *
135  * Returns: 0 - if everything OK;
136  * 	    	1 - if CRC is incorrect;
137  * 	    	2 - else;
138  *	    	error code if an error occured.
139  */
140 int
141 chfs_check_td_data(struct chfs_mount *chmp,
142     struct chfs_tmp_dnode *td)
143 {
144 	int err;
145 	size_t retlen, len, totlen;
146 	uint32_t crc;
147 	uint64_t ofs;
148 	char *buf;
149 	struct chfs_node_ref *nref = td->node->nref;
150 
151 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
152 	KASSERT(!mutex_owned(&chmp->chm_lock_sizes));
153 
154 	ofs = CHFS_GET_OFS(nref->nref_offset) + sizeof(struct chfs_flash_data_node);
155 	len = td->node->size;
156 	if (!len)
157 		return 0;
158 
159 	/* Read data. */
160 	buf = kmem_alloc(len, KM_SLEEP);
161 	if (!buf) {
162 		dbg("allocating error\n");
163 		return 2;
164 	}
165 	err = chfs_read_leb(chmp, nref->nref_lnr, buf, ofs, len, &retlen);
166 	if (err) {
167 		dbg("error while reading: %d\n", err);
168 		err = 2;
169 		goto out;
170 	}
171 
172 	/* Check crc. */
173 	if (len != retlen) {
174 		dbg("len:%zu, retlen:%zu\n", len, retlen);
175 		err = 2;
176 		goto out;
177 	}
178 	crc = crc32(0, (uint8_t *)buf, len);
179 
180 	if (crc != td->data_crc) {
181 		dbg("crc failed, calculated: 0x%x, orig: 0x%x\n", crc, td->data_crc);
182 		kmem_free(buf, len);
183 		return 1;
184 	}
185 
186 	/* Correct sizes. */
187 	CHFS_MARK_REF_NORMAL(nref);
188 	totlen = CHFS_PAD(sizeof(struct chfs_flash_data_node) + len);
189 
190 	mutex_enter(&chmp->chm_lock_sizes);
191 	chfs_change_size_unchecked(chmp, &chmp->chm_blocks[nref->nref_lnr], -totlen);
192 	chfs_change_size_used(chmp, &chmp->chm_blocks[nref->nref_lnr], totlen);
193 	mutex_exit(&chmp->chm_lock_sizes);
194 	KASSERT(chmp->chm_blocks[nref->nref_lnr].used_size <= chmp->chm_ebh->eb_size);
195 
196 	err = 0;
197 out:
198 	kmem_free(buf, len);
199 	return err;
200 }
201 
202 /* chfs_check_td_node - checks a temporary node */
203 int
204 chfs_check_td_node(struct chfs_mount *chmp, struct chfs_tmp_dnode *td)
205 {
206 	int ret;
207 
208 	if (CHFS_REF_FLAGS(td->node->nref) != CHFS_UNCHECKED_NODE_MASK)
209 		return 0;
210 
211 	ret = chfs_check_td_data(chmp, td);
212 	return ret;
213 }
214 
215 /*
216  * chfs_first_valid_data_ref -
217  * returns the first valid nref after the given nref
218  */
219 struct chfs_node_ref *
220 chfs_first_valid_data_ref(struct chfs_node_ref *nref)
221 {
222 	while (nref) {
223 		if (!CHFS_REF_OBSOLETE(nref)) {
224 #ifdef DGB_MSG_GC
225 			if (nref->nref_lnr == REF_EMPTY_NODE) {
226 				dbg("FIRST VALID IS EMPTY!\n");
227 			}
228 #endif
229 			return nref;
230 		}
231 
232 		if (nref->nref_next) {
233 			nref = nref->nref_next;
234 		} else
235 			break;
236 	}
237 	return NULL;
238 }
239 
240 /*
241  * chfs_add_tmp_dnode_to_tdi -
242  * adds a temporary node to a temporary node descriptor
243  */
244 void
245 chfs_add_tmp_dnode_to_tdi(struct chfs_tmp_dnode_info *tdi,
246 	struct chfs_tmp_dnode *td)
247 {
248 	if (!tdi->tmpnode) {
249 	/* The chain is empty. */
250 		tdi->tmpnode = td;
251 	} else {
252 	/* Insert into the chain. */
253 		struct chfs_tmp_dnode *tmp = tdi->tmpnode;
254 		while (tmp->next) {
255 			tmp = tmp->next;
256 		}
257 		tmp->next = td;
258 	}
259 }
260 
261 /*
262  * chfs_remove_tmp_dnode_from_tdi -
263  * removes a temporary node from its descriptor
264  */
265 void
266 chfs_remove_tmp_dnode_from_tdi(struct chfs_tmp_dnode_info *tdi,
267 	struct chfs_tmp_dnode *td)
268 {
269 	if (tdi->tmpnode == td) {
270 	/* It's the first in the chain. */
271 		tdi->tmpnode = tdi->tmpnode->next;
272 	} else {
273 	/* Remove from the middle of the chain. */
274 		struct chfs_tmp_dnode *tmp = tdi->tmpnode->next;
275 		while (tmp->next && tmp->next != td) {
276 			tmp = tmp->next;
277 		}
278 		if (tmp->next) {
279 			tmp->next = td->next;
280 		}
281 	}
282 }
283 
284 /* chfs_kill_td - removes all components of a temporary node */
285 static void
286 chfs_kill_td(struct chfs_mount *chmp,
287     struct chfs_tmp_dnode *td)
288 {
289 	struct chfs_vnode_cache *vc;
290 	if (td->node) {
291 		mutex_enter(&chmp->chm_lock_vnocache);
292 		/* Remove the node from the vnode cache's data node chain. */
293 		vc = chfs_nref_to_vc(td->node->nref);
294 		chfs_remove_and_obsolete(chmp, vc, td->node->nref, &vc->dnode);
295 		mutex_exit(&chmp->chm_lock_vnocache);
296 	}
297 
298 	chfs_free_tmp_dnode(td);
299 }
300 
301 /* chfs_kill_tdi - removes a temporary node descriptor */
302 static void
303 chfs_kill_tdi(struct chfs_mount *chmp,
304     struct chfs_tmp_dnode_info *tdi)
305 {
306 	struct chfs_tmp_dnode *next, *tmp = tdi->tmpnode;
307 
308 	/* Iterate the chain and remove all temporary node from it. */
309 	while (tmp) {
310 		next = tmp->next;
311 		chfs_kill_td(chmp, tmp);
312 		tmp = next;
313 	}
314 
315 	chfs_free_tmp_dnode_info(tdi);
316 }
317 
318 /*
319  * chfs_add_tmp_dnode_to_tree -
320  * adds a temporary node to the temporary tree
321  */
322 int
323 chfs_add_tmp_dnode_to_tree(struct chfs_mount *chmp,
324     struct chfs_readinode_info *rii,
325     struct chfs_tmp_dnode *newtd)
326 {
327 	uint64_t end_ofs = newtd->node->ofs + newtd->node->size;
328 	struct chfs_tmp_dnode_info *this;
329 	struct rb_node *node, *prev_node;
330 	struct chfs_tmp_dnode_info *newtdi;
331 
332 	node = rb_tree_find_node(&rii->tdi_root, &newtd->node->ofs);
333 	if (node) {
334 		this = (struct chfs_tmp_dnode_info *)node;
335 		while (this->tmpnode->overlapped) {
336 			prev_node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_LEFT);
337 			if (!prev_node) {
338 				this->tmpnode->overlapped = 0;
339 				break;
340 			}
341 			node = prev_node;
342 			this = (struct chfs_tmp_dnode_info *)node;
343 		}
344 	}
345 
346 	while (node) {
347 		this = (struct chfs_tmp_dnode_info *)node;
348 		if (this->tmpnode->node->ofs > end_ofs)
349 			break;
350 
351 		struct chfs_tmp_dnode *tmp_td = this->tmpnode;
352 		while (tmp_td) {
353 			if (tmp_td->version == newtd->version) {
354 				/* This is a new version of an old node. */
355 				if (!chfs_check_td_node(chmp, tmp_td)) {
356 					dbg("calling kill td 0\n");
357 					chfs_kill_td(chmp, newtd);
358 					return 0;
359 				} else {
360 					chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
361 					chfs_kill_td(chmp, tmp_td);
362 					chfs_add_tmp_dnode_to_tdi(this, newtd);
363 					return 0;
364 				}
365 			}
366 			if (tmp_td->version < newtd->version &&
367 				tmp_td->node->ofs >= newtd->node->ofs &&
368 				tmp_td->node->ofs + tmp_td->node->size <= end_ofs) {
369 				/* New node entirely overlaps 'this' */
370 				if (chfs_check_td_node(chmp, newtd)) {
371 					dbg("calling kill td 2\n");
372 					chfs_kill_td(chmp, newtd);
373 					return 0;
374 				}
375 				/* ... and is good. Kill 'this' and any subsequent nodes which are also overlapped */
376 				while (tmp_td && tmp_td->node->ofs + tmp_td->node->size <= end_ofs) {
377 					struct rb_node *next = rb_tree_iterate(&rii->tdi_root, this, RB_DIR_RIGHT);
378 					struct chfs_tmp_dnode_info *next_tdi = (struct chfs_tmp_dnode_info *)next;
379 					struct chfs_tmp_dnode *next_td = NULL;
380 					if (tmp_td->next) {
381 						next_td = tmp_td->next;
382 					} else if (next_tdi) {
383 						next_td = next_tdi->tmpnode;
384 					}
385 					if (tmp_td->version < newtd->version) {
386 						chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
387 						chfs_kill_td(chmp, tmp_td);
388 						if (!this->tmpnode) {
389 							rb_tree_remove_node(&rii->tdi_root, this);
390 							chfs_kill_tdi(chmp, this);
391 							this = next_tdi;
392 						}
393 					}
394 					tmp_td = next_td;
395 				}
396 				continue;
397 			}
398 			if (tmp_td->version > newtd->version &&
399 				tmp_td->node->ofs <= newtd->node->ofs &&
400 				tmp_td->node->ofs + tmp_td->node->size >= end_ofs) {
401 				/* New node entirely overlapped by 'this' */
402 				if (!chfs_check_td_node(chmp, tmp_td)) {
403 					dbg("this version: %llu\n",
404 						(unsigned long long)tmp_td->version);
405 					dbg("this ofs: %llu, size: %u\n",
406 						(unsigned long long)tmp_td->node->ofs,
407 						tmp_td->node->size);
408 					dbg("calling kill td 4\n");
409 					chfs_kill_td(chmp, newtd);
410 					return 0;
411 				}
412 				/* ... but 'this' was bad. Replace it... */
413 				chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
414 				chfs_kill_td(chmp, tmp_td);
415 				if (!this->tmpnode) {
416 					rb_tree_remove_node(&rii->tdi_root, this);
417 					chfs_kill_tdi(chmp, this);
418 				}
419 				dbg("calling kill td 5\n");
420 				chfs_kill_td(chmp, newtd);
421 				break;
422 			}
423 			tmp_td = tmp_td->next;
424 		}
425 		node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_RIGHT);
426 	}
427 
428 	newtdi = chfs_alloc_tmp_dnode_info();
429 	chfs_add_tmp_dnode_to_tdi(newtdi, newtd);
430 	/* We neither completely obsoleted nor were completely
431 	   obsoleted by an earlier node. Insert into the tree */
432 	struct chfs_tmp_dnode_info *tmp_tdi = rb_tree_insert_node(&rii->tdi_root, newtdi);
433 	if (tmp_tdi != newtdi) {
434 		chfs_remove_tmp_dnode_from_tdi(newtdi, newtd);
435 		chfs_add_tmp_dnode_to_tdi(tmp_tdi, newtd);
436 		chfs_kill_tdi(chmp, newtdi);
437 	}
438 
439 	/* If there's anything behind that overlaps us, note it */
440 	node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_LEFT);
441 	if (node) {
442 		while (1) {
443 			this = (struct chfs_tmp_dnode_info *)node;
444 			if (this->tmpnode->node->ofs + this->tmpnode->node->size > newtd->node->ofs) {
445 				newtd->overlapped = 1;
446 			}
447 			if (!this->tmpnode->overlapped)
448 				break;
449 
450 			prev_node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_LEFT);
451 			if (!prev_node) {
452 				this->tmpnode->overlapped = 0;
453 				break;
454 			}
455 			node = prev_node;
456 		}
457 	}
458 
459 	/* If the new node overlaps anything ahead, note it */
460 	node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_RIGHT);
461 	this = (struct chfs_tmp_dnode_info *)node;
462 	while (this && this->tmpnode->node->ofs < end_ofs) {
463 		this->tmpnode->overlapped = 1;
464 		node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_RIGHT);
465 		this = (struct chfs_tmp_dnode_info *)node;
466 	}
467 	return 0;
468 }
469 
470 
471 /* new_fragment - creates a new fragment for a data node */
472 struct chfs_node_frag *
473 new_fragment(struct chfs_full_dnode *fdn, uint32_t ofs, uint32_t size)
474 {
475 	struct chfs_node_frag *newfrag;
476 	newfrag = chfs_alloc_node_frag();
477 	if (newfrag) {
478 		/* Initialize fragment. */
479 		newfrag->ofs = ofs;
480 		newfrag->size = size;
481 		newfrag->node = fdn;
482 		if (newfrag->node) {
483 			newfrag->node->frags++;
484 		}
485 	} else {
486 		chfs_err("cannot allocate a chfs_node_frag object\n");
487 	}
488 	return newfrag;
489 }
490 
491 /*
492  * no_overlapping_node - inserts a node to the fragtree
493  * Puts hole frag into the holes between fragments.
494  */
495 int
496 no_overlapping_node(struct rb_tree *fragtree,
497     struct chfs_node_frag *newfrag,
498     struct chfs_node_frag *this, uint32_t lastend)
499 {
500 	if (lastend < newfrag->node->ofs) {
501 		struct chfs_node_frag *holefrag;
502 
503 		holefrag = new_fragment(NULL, lastend, newfrag->node->ofs - lastend);
504 		if (!holefrag) {
505 			chfs_free_node_frag(newfrag);
506 			return ENOMEM;
507 		}
508 
509 		rb_tree_insert_node(fragtree, holefrag);
510 		this = holefrag;
511 	}
512 
513 	rb_tree_insert_node(fragtree, newfrag);
514 
515 	return 0;
516 }
517 
518 /*
519  * chfs_add_frag_to_fragtree -
520  * adds a fragment to a data node's fragtree
521  */
522 int
523 chfs_add_frag_to_fragtree(struct chfs_mount *chmp,
524     struct rb_tree *fragtree,
525     struct chfs_node_frag *newfrag)
526 {
527 	struct chfs_node_frag *this;
528 	uint32_t lastend;
529 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
530 
531 	/* Find the offset of frag which is before the new one. */
532 	this = (struct chfs_node_frag *)rb_tree_find_node_leq(fragtree, &newfrag->ofs);
533 
534 	if (this) {
535 		lastend = this->ofs + this->size;
536 	} else {
537 		lastend = 0;
538 	}
539 
540 	/* New fragment is end of the file and there is no overlapping. */
541 	if (lastend <= newfrag->ofs) {
542 		if (lastend && (lastend - 1) >> PAGE_SHIFT == newfrag->ofs >> PAGE_SHIFT) {
543 			if (this->node)
544 				CHFS_MARK_REF_NORMAL(this->node->nref);
545 			CHFS_MARK_REF_NORMAL(newfrag->node->nref);
546 		}
547 		return no_overlapping_node(fragtree, newfrag, this, lastend);
548 	}
549 
550 	if (newfrag->ofs > this->ofs) {
551 		CHFS_MARK_REF_NORMAL(newfrag->node->nref);
552 		if (this->node)
553 			CHFS_MARK_REF_NORMAL(this->node->nref);
554 
555 		if (this->ofs + this->size > newfrag->ofs + newfrag->size) {
556 			/* Newfrag is inside of this. */
557 			struct chfs_node_frag *newfrag2;
558 
559 			newfrag2 = new_fragment(this->node, newfrag->ofs + newfrag->size,
560 			    this->ofs + this->size - newfrag->ofs - newfrag->size);
561 			if (!newfrag2)
562 				return ENOMEM;
563 
564 			this->size = newfrag->ofs - this->ofs;
565 
566 			rb_tree_insert_node(fragtree, newfrag);
567 			rb_tree_insert_node(fragtree, newfrag2);
568 
569 			return 0;
570 		}
571 		/* Newfrag is bottom of this. */
572 		this->size = newfrag->ofs - this->ofs;
573 		rb_tree_insert_node(fragtree, newfrag);
574 	} else {
575 		/* Newfrag start at same point */
576 		//TODO replace instead of remove and insert
577 		rb_tree_remove_node(fragtree, this);
578 		rb_tree_insert_node(fragtree, newfrag);
579 
580 		if (newfrag->ofs + newfrag->size >= this->ofs+this->size) {
581 			chfs_obsolete_node_frag(chmp, this);
582 		} else {
583 			this->ofs += newfrag->size;
584 			this->size -= newfrag->size;
585 
586 			rb_tree_insert_node(fragtree, this);
587 			return 0;
588 		}
589 	}
590 	/* OK, now we have newfrag added in the correct place in the tree, but
591 	   frag_next(newfrag) may be a fragment which is overlapped by it
592 	*/
593 	while ((this = frag_next(fragtree, newfrag)) && newfrag->ofs + newfrag->size >= this->ofs + this->size) {
594 		rb_tree_remove_node(fragtree, this);
595 		chfs_obsolete_node_frag(chmp, this);
596 	}
597 
598 	if (!this || newfrag->ofs + newfrag->size == this->ofs)
599 		return 0;
600 
601 	this->size = (this->ofs + this->size) - (newfrag->ofs + newfrag->size);
602 	this->ofs = newfrag->ofs + newfrag->size;
603 
604 	if (this->node)
605 		CHFS_MARK_REF_NORMAL(this->node->nref);
606 	CHFS_MARK_REF_NORMAL(newfrag->node->nref);
607 
608 	return 0;
609 }
610 
611 /*
612  * chfs_remove_frags_of_node -
613  * removes all fragments from a fragtree and DOESN'T OBSOLETE them
614  */
615 void
616 chfs_remove_frags_of_node(struct chfs_mount *chmp, struct rb_tree *fragtree,
617 	struct chfs_node_ref *nref)
618 {
619 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
620 	struct chfs_node_frag *this, *next;
621 
622 	if (nref == NULL) {
623 		return;
624 	}
625 
626 	/* Iterate the tree and clean all elements. */
627 	this = (struct chfs_node_frag *)RB_TREE_MIN(fragtree);
628 	while (this) {
629 		next = frag_next(fragtree, this);
630 		if (this->node->nref == nref) {
631 			rb_tree_remove_node(fragtree, this);
632 			chfs_free_node_frag(this);
633 		}
634 		this = next;
635 	}
636 }
637 
638 /*
639  * chfs_kill_fragtree -
640  * removes all fragments from a fragtree and OBSOLETES them
641  */
642 void
643 chfs_kill_fragtree(struct chfs_mount *chmp, struct rb_tree *fragtree)
644 {
645 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
646 	struct chfs_node_frag *this, *next;
647 
648 	/* Iterate the tree and clean all elements. */
649 	this = (struct chfs_node_frag *)RB_TREE_MIN(fragtree);
650 	while (this) {
651 		next = frag_next(fragtree, this);
652 		rb_tree_remove_node(fragtree, this);
653 		chfs_obsolete_node_frag(chmp, this);
654 		this = next;
655 	}
656 }
657 
658 /* chfs_truncate_fragtree - truncates the tree to a specified size */
659 uint32_t
660 chfs_truncate_fragtree(struct chfs_mount *chmp,
661 	struct rb_tree *fragtree, uint32_t size)
662 {
663 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
664 	struct chfs_node_frag *frag;
665 
666 	dbg("truncate to size: %u\n", size);
667 
668 	frag = (struct chfs_node_frag *)rb_tree_find_node_leq(fragtree, &size);
669 
670 	/* Find the last frag before size and set its new size. */
671 	if (frag && frag->ofs != size) {
672 		if (frag->ofs + frag->size > size) {
673 			frag->size = size - frag->ofs;
674 		}
675 		frag = frag_next(fragtree, frag);
676 	}
677 
678 	/* Delete frags after new size. */
679 	while (frag && frag->ofs >= size) {
680 		struct chfs_node_frag *next = frag_next(fragtree, frag);
681 
682 		rb_tree_remove_node(fragtree, frag);
683 		chfs_obsolete_node_frag(chmp, frag);
684 		frag = next;
685 	}
686 
687 	if (size == 0) {
688 		return 0;
689 	}
690 
691 	frag = frag_last(fragtree);
692 
693 	if (!frag) {
694 		return 0;
695 	}
696 
697 	if (frag->ofs + frag->size < size) {
698 		return frag->ofs + frag->size;
699 	}
700 
701 	/* FIXME Should we check the postion of the last node? (PAGE_CACHE size, etc.) */
702 	if (frag->node && (frag->ofs & (PAGE_SIZE - 1)) == 0) {
703 		frag->node->nref->nref_offset =
704 			CHFS_GET_OFS(frag->node->nref->nref_offset) | CHFS_PRISTINE_NODE_MASK;
705 	}
706 
707 	return size;
708 }
709 
710 /* chfs_obsolete_node_frag - obsoletes a fragment of a node */
711 void
712 chfs_obsolete_node_frag(struct chfs_mount *chmp,
713     struct chfs_node_frag *this)
714 {
715 	struct chfs_vnode_cache *vc;
716 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
717 	if (this->node) {
718 	/* The fragment is in a node. */
719 		KASSERT(this->node->frags != 0);
720 		this->node->frags--;
721 		if (this->node->frags == 0) {
722 		/* This is the last fragment. (There is no more.) */
723 			KASSERT(!CHFS_REF_OBSOLETE(this->node->nref));
724 			mutex_enter(&chmp->chm_lock_vnocache);
725 			vc = chfs_nref_to_vc(this->node->nref);
726 			dbg("[MARK] lnr: %u ofs: %u\n", this->node->nref->nref_lnr,
727 				this->node->nref->nref_offset);
728 
729 			chfs_remove_and_obsolete(chmp, vc, this->node->nref, &vc->dnode);
730 			mutex_exit(&chmp->chm_lock_vnocache);
731 
732 			chfs_free_full_dnode(this->node);
733 		} else {
734 		/* There is more frags in the node. */
735 			CHFS_MARK_REF_NORMAL(this->node->nref);
736 		}
737 	}
738 	chfs_free_node_frag(this);
739 }
740 
741 /* chfs_add_full_dnode_to_inode - adds a data node to an inode */
742 int
743 chfs_add_full_dnode_to_inode(struct chfs_mount *chmp,
744     struct chfs_inode *ip,
745     struct chfs_full_dnode *fd)
746 {
747 	int ret;
748 	struct chfs_node_frag *newfrag;
749 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
750 
751 	if (unlikely(!fd->size))
752 		return 0;
753 
754 	/* Create a new fragment from the data node and add it to the fragtree. */
755 	newfrag = new_fragment(fd, fd->ofs, fd->size);
756 	if (unlikely(!newfrag))
757 		return ENOMEM;
758 
759 	ret = chfs_add_frag_to_fragtree(chmp, &ip->fragtree, newfrag);
760 	if (ret)
761 		return ret;
762 
763 	/* Check previous fragment. */
764 	if (newfrag->ofs & (PAGE_SIZE - 1)) {
765 		struct chfs_node_frag *prev = frag_prev(&ip->fragtree, newfrag);
766 
767 		CHFS_MARK_REF_NORMAL(fd->nref);
768 		if (prev->node)
769 			CHFS_MARK_REF_NORMAL(prev->node->nref);
770 	}
771 
772 	/* Check next fragment. */
773 	if ((newfrag->ofs+newfrag->size) & (PAGE_SIZE - 1)) {
774 		struct chfs_node_frag *next = frag_next(&ip->fragtree, newfrag);
775 
776 		if (next) {
777 			CHFS_MARK_REF_NORMAL(fd->nref);
778 			if (next->node)
779 				CHFS_MARK_REF_NORMAL(next->node->nref);
780 		}
781 	}
782 
783 	return 0;
784 }
785 
786 
787 /* chfs_get_data_nodes - get temporary nodes of an inode */
788 int
789 chfs_get_data_nodes(struct chfs_mount *chmp,
790     struct chfs_inode *ip,
791     struct chfs_readinode_info *rii)
792 {
793 	uint32_t crc;
794 	int err;
795 	size_t len, retlen;
796 	struct chfs_node_ref *nref;
797 	struct chfs_flash_data_node *dnode;
798 	struct chfs_tmp_dnode *td;
799 	char* buf;
800 
801 	len = sizeof(struct chfs_flash_data_node);
802 	buf = kmem_alloc(len, KM_SLEEP);
803 
804 	dnode = kmem_alloc(len, KM_SLEEP);
805 	if (!dnode)
806 		return ENOMEM;
807 
808 	nref = chfs_first_valid_data_ref(ip->chvc->dnode);
809 
810 	/* Update highest version. */
811 	rii->highest_version = ip->chvc->highest_version;
812 
813 	while(nref && (struct chfs_vnode_cache *)nref != ip->chvc) {
814 		err = chfs_read_leb(chmp, nref->nref_lnr, buf, CHFS_GET_OFS(nref->nref_offset), len, &retlen);
815 		if (err || len != retlen)
816 			goto out;
817 		dnode = (struct chfs_flash_data_node*)buf;
818 
819 		/* Check header crc. */
820 		crc = crc32(0, (uint8_t *)dnode, CHFS_NODE_HDR_SIZE - 4);
821 		if (crc != le32toh(dnode->hdr_crc)) {
822 			chfs_err("CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->hdr_crc));
823 			goto cont;
824 		}
825 
826 		/* Check header magic bitmask. */
827 		if (le16toh(dnode->magic) != CHFS_FS_MAGIC_BITMASK) {
828 			chfs_err("Wrong magic bitmask.\n");
829 			goto cont;
830 		}
831 
832 		/* Check node crc. */
833 		crc = crc32(0, (uint8_t *)dnode, sizeof(*dnode) - 4);
834 		if (crc != le32toh(dnode->node_crc)) {
835 			chfs_err("Node CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->node_crc));
836 			goto cont;
837 		}
838 
839 		td = chfs_alloc_tmp_dnode();
840 		if (!td) {
841 			chfs_err("Can't allocate tmp dnode info.\n");
842 			err = ENOMEM;
843 			goto out;
844 		}
845 
846 		/* We don't check data crc here, just add nodes to tmp frag tree, because
847 		 * we don't want to check nodes which have been overlapped by a new node
848 		 * with a higher version number.
849 		 */
850 		td->node = chfs_alloc_full_dnode();
851 		if (!td->node) {
852 			chfs_err("Can't allocate full dnode info.\n");
853 			err = ENOMEM;
854 			goto out_tmp_dnode;
855 		}
856 		td->version = le64toh(dnode->version);
857 		td->node->ofs = le64toh(dnode->offset);
858 		td->data_crc = le32toh(dnode->data_crc);
859 		td->node->nref = nref;
860 		td->node->size = le32toh(dnode->data_length);
861 		td->node->frags = 1;
862 		td->overlapped = 0;
863 
864 		if (td->version > rii->highest_version) {
865 			rii->highest_version = td->version;
866 		}
867 
868 		/* Add node to the tree. */
869 		err = chfs_add_tmp_dnode_to_tree(chmp, rii, td);
870 		if (err)
871 			goto out_full_dnode;
872 
873 cont:
874 		nref = chfs_first_valid_data_ref(nref->nref_next);
875 	}
876 
877 	ip->chvc->highest_version = rii->highest_version;
878 	return 0;
879 
880 out_full_dnode:
881 	chfs_free_full_dnode(td->node);
882 out_tmp_dnode:
883 	chfs_free_tmp_dnode(td);
884 out:
885 	kmem_free(buf, len);
886 	kmem_free(dnode, len);
887 	return err;
888 }
889 
890 
891 /* chfs_build_fragtree - builds fragtree from temporary tree */
892 int
893 chfs_build_fragtree(struct chfs_mount *chmp, struct chfs_inode *ip,
894     struct chfs_readinode_info *rii)
895 {
896 	struct chfs_tmp_dnode_info *pen, *last, *this;
897 	struct rb_tree ver_tree;    /* version tree, used only temporary */
898 	uint64_t high_ver = 0;
899 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
900 
901 	rb_tree_init(&ver_tree, &tmp_node_rbtree_ops);
902 
903 	/* Update highest version and latest node reference. */
904 	if (rii->mdata_tn) {
905 		high_ver = rii->mdata_tn->tmpnode->version;
906 		rii->latest_ref = rii->mdata_tn->tmpnode->node->nref;
907 	}
908 
909 	/* Iterate the temporary tree in reverse order. */
910 	pen = (struct chfs_tmp_dnode_info *)RB_TREE_MAX(&rii->tdi_root);
911 
912 	while((last = pen)) {
913 		pen = (struct chfs_tmp_dnode_info *)rb_tree_iterate(&rii->tdi_root, last, RB_DIR_LEFT);
914 
915 		/* We build here a version tree from overlapped nodes. */
916 		rb_tree_remove_node(&rii->tdi_root, last);
917 		rb_tree_insert_node(&ver_tree, last);
918 
919 		if (last->tmpnode->overlapped) {
920 			if (pen)
921 				continue;
922 
923 			last->tmpnode->overlapped = 0;
924 		}
925 
926 		this = (struct chfs_tmp_dnode_info *)RB_TREE_MAX(&ver_tree);
927 
928 		/* Start to build the fragtree. */
929 		while (this) {
930 			struct chfs_tmp_dnode_info *vers_next;
931 			int ret;
932 
933 			vers_next = (struct chfs_tmp_dnode_info *)rb_tree_iterate(&ver_tree, this, RB_DIR_LEFT);
934 			rb_tree_remove_node(&ver_tree, this);
935 
936 			struct chfs_tmp_dnode *tmp_td = this->tmpnode;
937 			while (tmp_td) {
938 				struct chfs_tmp_dnode *next_td = tmp_td->next;
939 
940 				/* Check temporary node. */
941 				if (chfs_check_td_node(chmp, tmp_td)) {
942 					if (next_td) {
943 						chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
944 						chfs_kill_td(chmp, tmp_td);
945 					} else {
946 						break;
947 					}
948 				} else {
949 					if (tmp_td->version > high_ver) {
950 						high_ver = tmp_td->version;
951 						dbg("highver: %llu\n", (unsigned long long)high_ver);
952 						rii->latest_ref = tmp_td->node->nref;
953 					}
954 
955 					/* Add node to inode and its fragtree. */
956 					ret = chfs_add_full_dnode_to_inode(chmp, ip, tmp_td->node);
957 					if (ret) {
958 						/* On error, clean the whole version tree. */
959 						while (1) {
960 							vers_next = (struct chfs_tmp_dnode_info *)rb_tree_iterate(&ver_tree, this, RB_DIR_LEFT);
961 							while (tmp_td) {
962 								next_td = tmp_td->next;
963 
964 								chfs_free_full_dnode(tmp_td->node);
965 								chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
966 								chfs_kill_td(chmp, tmp_td);
967 								tmp_td = next_td;
968 							}
969 							chfs_free_tmp_dnode_info(this);
970 							this = vers_next;
971 							if (!this)
972 								break;
973 							rb_tree_remove_node(&ver_tree, vers_next);
974 							chfs_kill_tdi(chmp, vers_next);
975 						}
976 						return ret;
977 					}
978 
979 					/* Remove temporary node from temporary descriptor.
980 					 * Shouldn't obsolete tmp_td here, because tmp_td->node
981 					 * was added to the inode. */
982 					chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
983 					chfs_free_tmp_dnode(tmp_td);
984 				}
985 				tmp_td = next_td;
986 			}
987 			/* Continue with the previous element of version tree. */
988 			chfs_kill_tdi(chmp, this);
989 			this = vers_next;
990 		}
991 	}
992 
993 	return 0;
994 }
995 
996 /* chfs_read_inode - checks the state of the inode then reads and builds it */
997 int chfs_read_inode(struct chfs_mount *chmp, struct chfs_inode *ip)
998 {
999 	struct chfs_vnode_cache *vc = ip->chvc;
1000 
1001 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
1002 
1003 retry:
1004 	mutex_enter(&chmp->chm_lock_vnocache);
1005 	switch (vc->state) {
1006 		case VNO_STATE_UNCHECKED:
1007 			/* FALLTHROUGH */
1008 		case VNO_STATE_CHECKEDABSENT:
1009 			vc->state = VNO_STATE_READING;
1010 			break;
1011 		case VNO_STATE_CHECKING:
1012 			/* FALLTHROUGH */
1013 		case VNO_STATE_GC:
1014 			mutex_exit(&chmp->chm_lock_vnocache);
1015 			goto retry;
1016 			break;
1017 		case VNO_STATE_PRESENT:
1018 			/* FALLTHROUGH */
1019 		case VNO_STATE_READING:
1020 			chfs_err("Reading inode #%llu in state %d!\n",
1021 				(unsigned long long)vc->vno, vc->state);
1022 			chfs_err("wants to read a nonexistent ino %llu\n",
1023 				(unsigned long long)vc->vno);
1024 			return ENOENT;
1025 		default:
1026 			panic("BUG() Bad vno cache state.");
1027 	}
1028 	mutex_exit(&chmp->chm_lock_vnocache);
1029 
1030 	return chfs_read_inode_internal(chmp, ip);
1031 }
1032 
1033 /*
1034  * chfs_read_inode_internal - reads and builds an inode
1035  * Firstly get temporary nodes then build fragtree.
1036  */
1037 int
1038 chfs_read_inode_internal(struct chfs_mount *chmp, struct chfs_inode *ip)
1039 {
1040 	int err;
1041 	size_t len, retlen;
1042 	char* buf;
1043 	struct chfs_readinode_info rii;
1044 	struct chfs_flash_vnode *fvnode;
1045 
1046 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
1047 
1048 	len = sizeof(*fvnode);
1049 
1050 	memset(&rii, 0, sizeof(rii));
1051 
1052 	rb_tree_init(&rii.tdi_root, &tmp_node_rbtree_ops);
1053 
1054 	/* Build a temporary node tree. */
1055 	err = chfs_get_data_nodes(chmp, ip, &rii);
1056 	if (err) {
1057 		if (ip->chvc->state == VNO_STATE_READING)
1058 			ip->chvc->state = VNO_STATE_CHECKEDABSENT;
1059 		/* FIXME Should we kill fragtree or something here? */
1060 		return err;
1061 	}
1062 
1063 	/* Build fragtree from temp nodes. */
1064 	rb_tree_init(&ip->fragtree, &frag_rbtree_ops);
1065 
1066 	err = chfs_build_fragtree(chmp, ip, &rii);
1067 	if (err) {
1068 		if (ip->chvc->state == VNO_STATE_READING)
1069 			ip->chvc->state = VNO_STATE_CHECKEDABSENT;
1070 		/* FIXME Should we kill fragtree or something here? */
1071 		return err;
1072 	}
1073 
1074 	if (!rii.latest_ref) {
1075 		return 0;
1076 	}
1077 
1078 	buf = kmem_alloc(len, KM_SLEEP);
1079 	if (!buf)
1080 		return ENOMEM;
1081 
1082 	/* Set inode size from its vnode information node. */
1083 	err = chfs_read_leb(chmp, ip->chvc->v->nref_lnr, buf, CHFS_GET_OFS(ip->chvc->v->nref_offset), len, &retlen);
1084 	if (err || retlen != len) {
1085 		kmem_free(buf, len);
1086 		return err?err:EIO;
1087 	}
1088 
1089 	fvnode = (struct chfs_flash_vnode*)buf;
1090 
1091 	dbg("set size from v: %u\n", fvnode->dn_size);
1092 	chfs_set_vnode_size(ITOV(ip), fvnode->dn_size);
1093 	uint32_t retsize = chfs_truncate_fragtree(chmp, &ip->fragtree, fvnode->dn_size);
1094 	if (retsize != fvnode->dn_size) {
1095 		dbg("Truncating failed. It is %u instead of %u\n", retsize, fvnode->dn_size);
1096 	}
1097 
1098 	kmem_free(buf, len);
1099 
1100 	if (ip->chvc->state == VNO_STATE_READING) {
1101 		ip->chvc->state = VNO_STATE_PRESENT;
1102 	}
1103 
1104 	return 0;
1105 }
1106 
1107 /* chfs_read_data - reads and checks data of a file */
1108 int
1109 chfs_read_data(struct chfs_mount* chmp, struct vnode *vp,
1110     struct buf *bp)
1111 {
1112 	off_t ofs;
1113 	struct chfs_node_frag *frag;
1114 	char * buf;
1115 	int err = 0;
1116 	size_t size, retlen;
1117 	uint32_t crc;
1118 	struct chfs_inode *ip = VTOI(vp);
1119 	struct chfs_flash_data_node *dnode;
1120 	struct chfs_node_ref *nref;
1121 
1122 	memset(bp->b_data, 0, bp->b_bcount);
1123 
1124 	/* Calculate the size of the file from its fragtree. */
1125 	ofs = bp->b_blkno * PAGE_SIZE;
1126 	frag = (struct chfs_node_frag *)rb_tree_find_node_leq(&ip->fragtree, &ofs);
1127 
1128 	if (!frag || frag->ofs > ofs || frag->ofs + frag->size <= ofs) {
1129 		bp->b_resid = 0;
1130 		dbg("not found in frag tree\n");
1131 		return 0;
1132 	}
1133 
1134 	if (!frag->node) {
1135 		dbg("no node in frag\n");
1136 		return 0;
1137 	}
1138 
1139 	nref = frag->node->nref;
1140 	size = sizeof(*dnode) + frag->size;
1141 
1142 	buf = kmem_alloc(size, KM_SLEEP);
1143 
1144 	/* Read node from flash. */
1145 	dbg("reading from lnr: %u, offset: %u, size: %zu\n", nref->nref_lnr, CHFS_GET_OFS(nref->nref_offset), size);
1146 	err = chfs_read_leb(chmp, nref->nref_lnr, buf, CHFS_GET_OFS(nref->nref_offset), size, &retlen);
1147 	if (err) {
1148 		chfs_err("error after reading: %d\n", err);
1149 		goto out;
1150 	}
1151 	if (retlen != size) {
1152 		chfs_err("retlen: %zu != size: %zu\n", retlen, size);
1153 		err = EIO;
1154 		goto out;
1155 	}
1156 
1157 	/* Read data from flash. */
1158 	dnode = (struct chfs_flash_data_node *)buf;
1159 	crc = crc32(0, (uint8_t *)dnode, CHFS_NODE_HDR_SIZE - 4);
1160 	if (crc != le32toh(dnode->hdr_crc)) {
1161 		chfs_err("CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->hdr_crc));
1162 		err = EIO;
1163 		goto out;
1164 	}
1165 
1166 	/* Check header magic bitmask. */
1167 	if (le16toh(dnode->magic) != CHFS_FS_MAGIC_BITMASK) {
1168 		chfs_err("Wrong magic bitmask.\n");
1169 		err = EIO;
1170 		goto out;
1171 	}
1172 
1173 	/* Check crc of node. */
1174 	crc = crc32(0, (uint8_t *)dnode, sizeof(*dnode) - 4);
1175 	if (crc != le32toh(dnode->node_crc)) {
1176 		chfs_err("Node CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->node_crc));
1177 		err = EIO;
1178 		goto out;
1179 	}
1180 
1181 	/* Check crc of data. */
1182 	crc = crc32(0, (uint8_t *)dnode->data, dnode->data_length);
1183 	if (crc != le32toh(dnode->data_crc)) {
1184 		chfs_err("Data CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->data_crc));
1185 		err = EIO;
1186 		goto out;
1187 	}
1188 
1189 	memcpy(bp->b_data, dnode->data, dnode->data_length);
1190 	bp->b_resid = 0;
1191 
1192 out:
1193 	kmem_free(buf, size);
1194 	return err;
1195 }
1196