xref: /minix3/sys/ufs/chfs/chfs_readinode.c (revision b5e2faaaaf60a8b9a02f8d72f64caa56a87eb312)
1 /*	$NetBSD: chfs_readinode.c,v 1.8 2013/10/20 17:18:38 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2010 Department of Software Engineering,
5  *		      University of Szeged, Hungary
6  * Copyright (C) 2010 David Tengeri <dtengeri@inf.u-szeged.hu>
7  * Copyright (C) 2010 Tamas Toth <ttoth@inf.u-szeged.hu>
8  * Copyright (C) 2010 Adam Hoka <ahoka@NetBSD.org>
9  * All rights reserved.
10  *
11  * This code is derived from software contributed to The NetBSD Foundation
12  * by the Department of Software Engineering, University of Szeged, Hungary
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/buf.h>
37 
38 #include "chfs.h"
39 
40 /* tmp node operations */
41 int chfs_check_td_data(struct chfs_mount *,
42     struct chfs_tmp_dnode *);
43 int chfs_check_td_node(struct chfs_mount *,
44     struct chfs_tmp_dnode *);
45 struct chfs_node_ref *chfs_first_valid_data_ref(struct chfs_node_ref *);
46 int chfs_add_tmp_dnode_to_tree(struct chfs_mount *,
47     struct chfs_readinode_info *,
48     struct chfs_tmp_dnode *);
49 void chfs_add_tmp_dnode_to_tdi(struct chfs_tmp_dnode_info *,
50 	struct chfs_tmp_dnode *);
51 void chfs_remove_tmp_dnode_from_tdi(struct chfs_tmp_dnode_info *,
52 	struct chfs_tmp_dnode *);
53 static void chfs_kill_td(struct chfs_mount *,
54     struct chfs_tmp_dnode *);
55 static void chfs_kill_tdi(struct chfs_mount *,
56     struct chfs_tmp_dnode_info *);
57 /* frag node operations */
58 struct chfs_node_frag *new_fragment(struct chfs_full_dnode *,
59     uint32_t,
60     uint32_t);
61 int no_overlapping_node(struct rb_tree *, struct chfs_node_frag *,
62     struct chfs_node_frag *, uint32_t);
63 int chfs_add_frag_to_fragtree(struct chfs_mount *,
64     struct rb_tree *,
65     struct chfs_node_frag *);
66 void chfs_obsolete_node_frag(struct chfs_mount *,
67     struct chfs_node_frag *);
68 /* general node operations */
69 int chfs_get_data_nodes(struct chfs_mount *,
70     struct chfs_inode *,
71     struct chfs_readinode_info *);
72 int chfs_build_fragtree(struct chfs_mount *,
73     struct chfs_inode *,
74     struct chfs_readinode_info *);
75 
76 
77 
78 /* tmp node rbtree operations */
79 static signed int
80 tmp_node_compare_nodes(void *ctx, const void *n1, const void *n2)
81 {
82 	const struct chfs_tmp_dnode_info *tdi1 = n1;
83 	const struct chfs_tmp_dnode_info *tdi2 = n2;
84 
85 	return (tdi1->tmpnode->node->ofs - tdi2->tmpnode->node->ofs);
86 }
87 
88 static signed int
89 tmp_node_compare_key(void *ctx, const void *n, const void *key)
90 {
91 	const struct chfs_tmp_dnode_info *tdi = n;
92 	uint64_t ofs =  *(const uint64_t *)key;
93 
94 	return (tdi->tmpnode->node->ofs - ofs);
95 }
96 
97 const rb_tree_ops_t tmp_node_rbtree_ops = {
98 	.rbto_compare_nodes = tmp_node_compare_nodes,
99 	.rbto_compare_key = tmp_node_compare_key,
100 	.rbto_node_offset = offsetof(struct chfs_tmp_dnode_info, rb_node),
101 	.rbto_context = NULL
102 };
103 
104 
105 /* frag node rbtree operations */
106 static signed int
107 frag_compare_nodes(void *ctx, const void *n1, const void *n2)
108 {
109 	const struct chfs_node_frag *frag1 = n1;
110 	const struct chfs_node_frag *frag2 = n2;
111 
112 	return (frag1->ofs - frag2->ofs);
113 }
114 
115 static signed int
116 frag_compare_key(void *ctx, const void *n, const void *key)
117 {
118 	const struct chfs_node_frag *frag = n;
119 	uint64_t ofs = *(const uint64_t *)key;
120 
121 	return (frag->ofs - ofs);
122 }
123 
124 const rb_tree_ops_t frag_rbtree_ops = {
125 	.rbto_compare_nodes = frag_compare_nodes,
126 	.rbto_compare_key   = frag_compare_key,
127 	.rbto_node_offset = offsetof(struct chfs_node_frag, rb_node),
128 	.rbto_context = NULL
129 };
130 
131 
132 /*
133  * chfs_check_td_data - checks the data CRC of the node
134  *
135  * Returns: 0 - if everything OK;
136  * 	    	1 - if CRC is incorrect;
137  * 	    	2 - else;
138  *	    	error code if an error occured.
139  */
140 int
141 chfs_check_td_data(struct chfs_mount *chmp,
142     struct chfs_tmp_dnode *td)
143 {
144 	int err;
145 	size_t retlen, len, totlen;
146 	uint32_t crc;
147 	uint64_t ofs;
148 	char *buf;
149 	struct chfs_node_ref *nref = td->node->nref;
150 
151 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
152 	KASSERT(!mutex_owned(&chmp->chm_lock_sizes));
153 
154 	ofs = CHFS_GET_OFS(nref->nref_offset) + sizeof(struct chfs_flash_data_node);
155 	len = td->node->size;
156 	if (!len)
157 		return 0;
158 
159 	/* Read data. */
160 	buf = kmem_alloc(len, KM_SLEEP);
161 	if (!buf) {
162 		dbg("allocating error\n");
163 		return 2;
164 	}
165 	err = chfs_read_leb(chmp, nref->nref_lnr, buf, ofs, len, &retlen);
166 	if (err) {
167 		dbg("error while reading: %d\n", err);
168 		err = 2;
169 		goto out;
170 	}
171 
172 	/* Check crc. */
173 	if (len != retlen) {
174 		dbg("len:%zu, retlen:%zu\n", len, retlen);
175 		err = 2;
176 		goto out;
177 	}
178 	crc = crc32(0, (uint8_t *)buf, len);
179 
180 	if (crc != td->data_crc) {
181 		dbg("crc failed, calculated: 0x%x, orig: 0x%x\n", crc, td->data_crc);
182 		kmem_free(buf, len);
183 		return 1;
184 	}
185 
186 	/* Correct sizes. */
187 	CHFS_MARK_REF_NORMAL(nref);
188 	totlen = CHFS_PAD(sizeof(struct chfs_flash_data_node) + len);
189 
190 	mutex_enter(&chmp->chm_lock_sizes);
191 	chfs_change_size_unchecked(chmp, &chmp->chm_blocks[nref->nref_lnr], -totlen);
192 	chfs_change_size_used(chmp, &chmp->chm_blocks[nref->nref_lnr], totlen);
193 	mutex_exit(&chmp->chm_lock_sizes);
194 	KASSERT(chmp->chm_blocks[nref->nref_lnr].used_size <= chmp->chm_ebh->eb_size);
195 
196 	err = 0;
197 out:
198 	kmem_free(buf, len);
199 	return err;
200 }
201 
202 /* chfs_check_td_node - checks a temporary node */
203 int
204 chfs_check_td_node(struct chfs_mount *chmp, struct chfs_tmp_dnode *td)
205 {
206 	int ret;
207 
208 	if (CHFS_REF_FLAGS(td->node->nref) != CHFS_UNCHECKED_NODE_MASK)
209 		return 0;
210 
211 	ret = chfs_check_td_data(chmp, td);
212 	return ret;
213 }
214 
215 /*
216  * chfs_first_valid_data_ref -
217  * returns the first valid nref after the given nref
218  */
219 struct chfs_node_ref *
220 chfs_first_valid_data_ref(struct chfs_node_ref *nref)
221 {
222 	while (nref) {
223 		if (!CHFS_REF_OBSOLETE(nref)) {
224 #ifdef DGB_MSG_GC
225 			if (nref->nref_lnr == REF_EMPTY_NODE) {
226 				dbg("FIRST VALID IS EMPTY!\n");
227 			}
228 #endif
229 			return nref;
230 		}
231 
232 		if (nref->nref_next) {
233 			nref = nref->nref_next;
234 		} else
235 			break;
236 	}
237 	return NULL;
238 }
239 
240 /*
241  * chfs_add_tmp_dnode_to_tdi -
242  * adds a temporary node to a temporary node descriptor
243  */
244 void
245 chfs_add_tmp_dnode_to_tdi(struct chfs_tmp_dnode_info *tdi,
246 	struct chfs_tmp_dnode *td)
247 {
248 	if (!tdi->tmpnode) {
249 	/* The chain is empty. */
250 		tdi->tmpnode = td;
251 	} else {
252 	/* Insert into the chain. */
253 		struct chfs_tmp_dnode *tmp = tdi->tmpnode;
254 		while (tmp->next) {
255 			tmp = tmp->next;
256 		}
257 		tmp->next = td;
258 	}
259 }
260 
261 /*
262  * chfs_remove_tmp_dnode_from_tdi -
263  * removes a temporary node from its descriptor
264  */
265 void
266 chfs_remove_tmp_dnode_from_tdi(struct chfs_tmp_dnode_info *tdi,
267 	struct chfs_tmp_dnode *td)
268 {
269 	if (tdi->tmpnode == td) {
270 	/* It's the first in the chain. */
271 		tdi->tmpnode = tdi->tmpnode->next;
272 	} else {
273 	/* Remove from the middle of the chain. */
274 		struct chfs_tmp_dnode *tmp = tdi->tmpnode->next;
275 		while (tmp->next && tmp->next != td) {
276 			tmp = tmp->next;
277 		}
278 		if (tmp->next) {
279 			tmp->next = td->next;
280 		}
281 	}
282 }
283 
284 /* chfs_kill_td - removes all components of a temporary node */
285 static void
286 chfs_kill_td(struct chfs_mount *chmp,
287     struct chfs_tmp_dnode *td)
288 {
289 	struct chfs_vnode_cache *vc;
290 	if (td->node) {
291 		mutex_enter(&chmp->chm_lock_vnocache);
292 		/* Remove the node from the vnode cache's data node chain. */
293 		vc = chfs_nref_to_vc(td->node->nref);
294 		chfs_remove_and_obsolete(chmp, vc, td->node->nref, &vc->dnode);
295 		mutex_exit(&chmp->chm_lock_vnocache);
296 	}
297 
298 	chfs_free_tmp_dnode(td);
299 }
300 
301 /* chfs_kill_tdi - removes a temporary node descriptor */
302 static void
303 chfs_kill_tdi(struct chfs_mount *chmp,
304     struct chfs_tmp_dnode_info *tdi)
305 {
306 	struct chfs_tmp_dnode *next, *tmp = tdi->tmpnode;
307 
308 	/* Iterate the chain and remove all temporary node from it. */
309 	while (tmp) {
310 		next = tmp->next;
311 		chfs_kill_td(chmp, tmp);
312 		tmp = next;
313 	}
314 
315 	chfs_free_tmp_dnode_info(tdi);
316 }
317 
318 /*
319  * chfs_add_tmp_dnode_to_tree -
320  * adds a temporary node to the temporary tree
321  */
322 int
323 chfs_add_tmp_dnode_to_tree(struct chfs_mount *chmp,
324     struct chfs_readinode_info *rii,
325     struct chfs_tmp_dnode *newtd)
326 {
327 	uint64_t end_ofs = newtd->node->ofs + newtd->node->size;
328 	struct chfs_tmp_dnode_info *this;
329 	struct rb_node *node, *prev_node;
330 	struct chfs_tmp_dnode_info *newtdi;
331 
332 	node = rb_tree_find_node(&rii->tdi_root, &newtd->node->ofs);
333 	if (node) {
334 		this = (struct chfs_tmp_dnode_info *)node;
335 		while (this->tmpnode->overlapped) {
336 			prev_node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_LEFT);
337 			if (!prev_node) {
338 				this->tmpnode->overlapped = 0;
339 				break;
340 			}
341 			node = prev_node;
342 			this = (struct chfs_tmp_dnode_info *)node;
343 		}
344 	}
345 
346 	while (node) {
347 		this = (struct chfs_tmp_dnode_info *)node;
348 		if (this->tmpnode->node->ofs > end_ofs)
349 			break;
350 
351 		struct chfs_tmp_dnode *tmp_td = this->tmpnode;
352 		while (tmp_td) {
353 			if (tmp_td->version == newtd->version) {
354 				/* This is a new version of an old node. */
355 				if (!chfs_check_td_node(chmp, tmp_td)) {
356 					dbg("calling kill td 0\n");
357 					chfs_kill_td(chmp, newtd);
358 					return 0;
359 				} else {
360 					chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
361 					chfs_kill_td(chmp, tmp_td);
362 					chfs_add_tmp_dnode_to_tdi(this, newtd);
363 					return 0;
364 				}
365 			}
366 			if (tmp_td->version < newtd->version &&
367 				tmp_td->node->ofs >= newtd->node->ofs &&
368 				tmp_td->node->ofs + tmp_td->node->size <= end_ofs) {
369 				/* New node entirely overlaps 'this' */
370 				if (chfs_check_td_node(chmp, newtd)) {
371 					dbg("calling kill td 2\n");
372 					chfs_kill_td(chmp, newtd);
373 					return 0;
374 				}
375 				/* ... and is good. Kill 'this' and any subsequent nodes which are also overlapped */
376 				while (tmp_td && tmp_td->node->ofs + tmp_td->node->size <= end_ofs) {
377 					struct rb_node *next = rb_tree_iterate(&rii->tdi_root, this, RB_DIR_RIGHT);
378 					struct chfs_tmp_dnode_info *next_tdi = (struct chfs_tmp_dnode_info *)next;
379 					struct chfs_tmp_dnode *next_td = NULL;
380 					if (tmp_td->next) {
381 						next_td = tmp_td->next;
382 					} else if (next_tdi) {
383 						next_td = next_tdi->tmpnode;
384 					}
385 					if (tmp_td->version < newtd->version) {
386 						chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
387 						chfs_kill_td(chmp, tmp_td);
388 						if (!this->tmpnode) {
389 							rb_tree_remove_node(&rii->tdi_root, this);
390 							chfs_kill_tdi(chmp, this);
391 							this = next_tdi;
392 						}
393 					}
394 					tmp_td = next_td;
395 				}
396 				continue;
397 			}
398 			if (tmp_td->version > newtd->version &&
399 				tmp_td->node->ofs <= newtd->node->ofs &&
400 				tmp_td->node->ofs + tmp_td->node->size >= end_ofs) {
401 				/* New node entirely overlapped by 'this' */
402 				if (!chfs_check_td_node(chmp, tmp_td)) {
403 					dbg("this version: %llu\n",
404 						(unsigned long long)tmp_td->version);
405 					dbg("this ofs: %llu, size: %u\n",
406 						(unsigned long long)tmp_td->node->ofs,
407 						tmp_td->node->size);
408 					dbg("calling kill td 4\n");
409 					chfs_kill_td(chmp, newtd);
410 					return 0;
411 				}
412 				/* ... but 'this' was bad. Replace it... */
413 				chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
414 				chfs_kill_td(chmp, tmp_td);
415 				if (!this->tmpnode) {
416 					rb_tree_remove_node(&rii->tdi_root, this);
417 					chfs_kill_tdi(chmp, this);
418 				}
419 				dbg("calling kill td 5\n");
420 				chfs_kill_td(chmp, newtd);
421 				break;
422 			}
423 			tmp_td = tmp_td->next;
424 		}
425 		node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_RIGHT);
426 	}
427 
428 	newtdi = chfs_alloc_tmp_dnode_info();
429 	chfs_add_tmp_dnode_to_tdi(newtdi, newtd);
430 	/* We neither completely obsoleted nor were completely
431 	   obsoleted by an earlier node. Insert into the tree */
432 	struct chfs_tmp_dnode_info *tmp_tdi = rb_tree_insert_node(&rii->tdi_root, newtdi);
433 	if (tmp_tdi != newtdi) {
434 		chfs_remove_tmp_dnode_from_tdi(newtdi, newtd);
435 		chfs_add_tmp_dnode_to_tdi(tmp_tdi, newtd);
436 		chfs_kill_tdi(chmp, newtdi);
437 	}
438 
439 	/* If there's anything behind that overlaps us, note it */
440 	node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_LEFT);
441 	if (node) {
442 		while (1) {
443 			this = (struct chfs_tmp_dnode_info *)node;
444 			if (this->tmpnode->node->ofs + this->tmpnode->node->size > newtd->node->ofs) {
445 				newtd->overlapped = 1;
446 			}
447 			if (!this->tmpnode->overlapped)
448 				break;
449 
450 			prev_node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_LEFT);
451 			if (!prev_node) {
452 				this->tmpnode->overlapped = 0;
453 				break;
454 			}
455 			node = prev_node;
456 		}
457 	}
458 
459 	/* If the new node overlaps anything ahead, note it */
460 	node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_RIGHT);
461 	this = (struct chfs_tmp_dnode_info *)node;
462 	while (this && this->tmpnode->node->ofs < end_ofs) {
463 		this->tmpnode->overlapped = 1;
464 		node = rb_tree_iterate(&rii->tdi_root, node, RB_DIR_RIGHT);
465 		this = (struct chfs_tmp_dnode_info *)node;
466 	}
467 	return 0;
468 }
469 
470 
471 /* new_fragment - creates a new fragment for a data node */
472 struct chfs_node_frag *
473 new_fragment(struct chfs_full_dnode *fdn, uint32_t ofs, uint32_t size)
474 {
475 	struct chfs_node_frag *newfrag;
476 	newfrag = chfs_alloc_node_frag();
477 	if (newfrag) {
478 		/* Initialize fragment. */
479 		newfrag->ofs = ofs;
480 		newfrag->size = size;
481 		newfrag->node = fdn;
482 		if (newfrag->node) {
483 			newfrag->node->frags++;
484 		}
485 	} else {
486 		chfs_err("cannot allocate a chfs_node_frag object\n");
487 	}
488 	return newfrag;
489 }
490 
491 /*
492  * no_overlapping_node - inserts a node to the fragtree
493  * Puts hole frag into the holes between fragments.
494  */
495 int
496 no_overlapping_node(struct rb_tree *fragtree,
497     struct chfs_node_frag *newfrag,
498     struct chfs_node_frag *this, uint32_t lastend)
499 {
500 	if (lastend < newfrag->node->ofs) {
501 		struct chfs_node_frag *holefrag;
502 
503 		holefrag = new_fragment(NULL, lastend, newfrag->node->ofs - lastend);
504 		if (!holefrag) {
505 			chfs_free_node_frag(newfrag);
506 			return ENOMEM;
507 		}
508 
509 		rb_tree_insert_node(fragtree, holefrag);
510 	}
511 
512 	rb_tree_insert_node(fragtree, newfrag);
513 
514 	return 0;
515 }
516 
517 /*
518  * chfs_add_frag_to_fragtree -
519  * adds a fragment to a data node's fragtree
520  */
521 int
522 chfs_add_frag_to_fragtree(struct chfs_mount *chmp,
523     struct rb_tree *fragtree,
524     struct chfs_node_frag *newfrag)
525 {
526 	struct chfs_node_frag *this;
527 	uint32_t lastend;
528 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
529 
530 	/* Find the offset of frag which is before the new one. */
531 	this = (struct chfs_node_frag *)rb_tree_find_node_leq(fragtree, &newfrag->ofs);
532 
533 	if (this) {
534 		lastend = this->ofs + this->size;
535 	} else {
536 		lastend = 0;
537 	}
538 
539 	/* New fragment is end of the file and there is no overlapping. */
540 	if (lastend <= newfrag->ofs) {
541 		if (lastend && (lastend - 1) >> PAGE_SHIFT == newfrag->ofs >> PAGE_SHIFT) {
542 			if (this->node)
543 				CHFS_MARK_REF_NORMAL(this->node->nref);
544 			CHFS_MARK_REF_NORMAL(newfrag->node->nref);
545 		}
546 		return no_overlapping_node(fragtree, newfrag, this, lastend);
547 	}
548 
549 	if (newfrag->ofs > this->ofs) {
550 		CHFS_MARK_REF_NORMAL(newfrag->node->nref);
551 		if (this->node)
552 			CHFS_MARK_REF_NORMAL(this->node->nref);
553 
554 		if (this->ofs + this->size > newfrag->ofs + newfrag->size) {
555 			/* Newfrag is inside of this. */
556 			struct chfs_node_frag *newfrag2;
557 
558 			newfrag2 = new_fragment(this->node, newfrag->ofs + newfrag->size,
559 			    this->ofs + this->size - newfrag->ofs - newfrag->size);
560 			if (!newfrag2)
561 				return ENOMEM;
562 
563 			this->size = newfrag->ofs - this->ofs;
564 
565 			rb_tree_insert_node(fragtree, newfrag);
566 			rb_tree_insert_node(fragtree, newfrag2);
567 
568 			return 0;
569 		}
570 		/* Newfrag is bottom of this. */
571 		this->size = newfrag->ofs - this->ofs;
572 		rb_tree_insert_node(fragtree, newfrag);
573 	} else {
574 		/* Newfrag start at same point */
575 		//TODO replace instead of remove and insert
576 		rb_tree_remove_node(fragtree, this);
577 		rb_tree_insert_node(fragtree, newfrag);
578 
579 		if (newfrag->ofs + newfrag->size >= this->ofs+this->size) {
580 			chfs_obsolete_node_frag(chmp, this);
581 		} else {
582 			this->ofs += newfrag->size;
583 			this->size -= newfrag->size;
584 
585 			rb_tree_insert_node(fragtree, this);
586 			return 0;
587 		}
588 	}
589 	/* OK, now we have newfrag added in the correct place in the tree, but
590 	   frag_next(newfrag) may be a fragment which is overlapped by it
591 	*/
592 	while ((this = frag_next(fragtree, newfrag)) && newfrag->ofs + newfrag->size >= this->ofs + this->size) {
593 		rb_tree_remove_node(fragtree, this);
594 		chfs_obsolete_node_frag(chmp, this);
595 	}
596 
597 	if (!this || newfrag->ofs + newfrag->size == this->ofs)
598 		return 0;
599 
600 	this->size = (this->ofs + this->size) - (newfrag->ofs + newfrag->size);
601 	this->ofs = newfrag->ofs + newfrag->size;
602 
603 	if (this->node)
604 		CHFS_MARK_REF_NORMAL(this->node->nref);
605 	CHFS_MARK_REF_NORMAL(newfrag->node->nref);
606 
607 	return 0;
608 }
609 
610 /*
611  * chfs_remove_frags_of_node -
612  * removes all fragments from a fragtree and DOESN'T OBSOLETE them
613  */
614 void
615 chfs_remove_frags_of_node(struct chfs_mount *chmp, struct rb_tree *fragtree,
616 	struct chfs_node_ref *nref)
617 {
618 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
619 	struct chfs_node_frag *this, *next;
620 
621 	if (nref == NULL) {
622 		return;
623 	}
624 
625 	/* Iterate the tree and clean all elements. */
626 	this = (struct chfs_node_frag *)RB_TREE_MIN(fragtree);
627 	while (this) {
628 		next = frag_next(fragtree, this);
629 		if (this->node->nref == nref) {
630 			rb_tree_remove_node(fragtree, this);
631 			chfs_free_node_frag(this);
632 		}
633 		this = next;
634 	}
635 }
636 
637 /*
638  * chfs_kill_fragtree -
639  * removes all fragments from a fragtree and OBSOLETES them
640  */
641 void
642 chfs_kill_fragtree(struct chfs_mount *chmp, struct rb_tree *fragtree)
643 {
644 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
645 	struct chfs_node_frag *this, *next;
646 
647 	/* Iterate the tree and clean all elements. */
648 	this = (struct chfs_node_frag *)RB_TREE_MIN(fragtree);
649 	while (this) {
650 		next = frag_next(fragtree, this);
651 		rb_tree_remove_node(fragtree, this);
652 		chfs_obsolete_node_frag(chmp, this);
653 		this = next;
654 	}
655 }
656 
657 /* chfs_truncate_fragtree - truncates the tree to a specified size */
658 uint32_t
659 chfs_truncate_fragtree(struct chfs_mount *chmp,
660 	struct rb_tree *fragtree, uint32_t size)
661 {
662 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
663 	struct chfs_node_frag *frag;
664 
665 	dbg("truncate to size: %u\n", size);
666 
667 	frag = (struct chfs_node_frag *)rb_tree_find_node_leq(fragtree, &size);
668 
669 	/* Find the last frag before size and set its new size. */
670 	if (frag && frag->ofs != size) {
671 		if (frag->ofs + frag->size > size) {
672 			frag->size = size - frag->ofs;
673 		}
674 		frag = frag_next(fragtree, frag);
675 	}
676 
677 	/* Delete frags after new size. */
678 	while (frag && frag->ofs >= size) {
679 		struct chfs_node_frag *next = frag_next(fragtree, frag);
680 
681 		rb_tree_remove_node(fragtree, frag);
682 		chfs_obsolete_node_frag(chmp, frag);
683 		frag = next;
684 	}
685 
686 	if (size == 0) {
687 		return 0;
688 	}
689 
690 	frag = frag_last(fragtree);
691 
692 	if (!frag) {
693 		return 0;
694 	}
695 
696 	if (frag->ofs + frag->size < size) {
697 		return frag->ofs + frag->size;
698 	}
699 
700 	/* FIXME Should we check the postion of the last node? (PAGE_CACHE size, etc.) */
701 	if (frag->node && (frag->ofs & (PAGE_SIZE - 1)) == 0) {
702 		frag->node->nref->nref_offset =
703 			CHFS_GET_OFS(frag->node->nref->nref_offset) | CHFS_PRISTINE_NODE_MASK;
704 	}
705 
706 	return size;
707 }
708 
709 /* chfs_obsolete_node_frag - obsoletes a fragment of a node */
710 void
711 chfs_obsolete_node_frag(struct chfs_mount *chmp,
712     struct chfs_node_frag *this)
713 {
714 	struct chfs_vnode_cache *vc;
715 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
716 	if (this->node) {
717 	/* The fragment is in a node. */
718 		KASSERT(this->node->frags != 0);
719 		this->node->frags--;
720 		if (this->node->frags == 0) {
721 		/* This is the last fragment. (There is no more.) */
722 			KASSERT(!CHFS_REF_OBSOLETE(this->node->nref));
723 			mutex_enter(&chmp->chm_lock_vnocache);
724 			vc = chfs_nref_to_vc(this->node->nref);
725 			dbg("[MARK] lnr: %u ofs: %u\n", this->node->nref->nref_lnr,
726 				this->node->nref->nref_offset);
727 
728 			chfs_remove_and_obsolete(chmp, vc, this->node->nref, &vc->dnode);
729 			mutex_exit(&chmp->chm_lock_vnocache);
730 
731 			chfs_free_full_dnode(this->node);
732 		} else {
733 		/* There is more frags in the node. */
734 			CHFS_MARK_REF_NORMAL(this->node->nref);
735 		}
736 	}
737 	chfs_free_node_frag(this);
738 }
739 
740 /* chfs_add_full_dnode_to_inode - adds a data node to an inode */
741 int
742 chfs_add_full_dnode_to_inode(struct chfs_mount *chmp,
743     struct chfs_inode *ip,
744     struct chfs_full_dnode *fd)
745 {
746 	int ret;
747 	struct chfs_node_frag *newfrag;
748 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
749 
750 	if (unlikely(!fd->size))
751 		return 0;
752 
753 	/* Create a new fragment from the data node and add it to the fragtree. */
754 	newfrag = new_fragment(fd, fd->ofs, fd->size);
755 	if (unlikely(!newfrag))
756 		return ENOMEM;
757 
758 	ret = chfs_add_frag_to_fragtree(chmp, &ip->fragtree, newfrag);
759 	if (ret)
760 		return ret;
761 
762 	/* Check previous fragment. */
763 	if (newfrag->ofs & (PAGE_SIZE - 1)) {
764 		struct chfs_node_frag *prev = frag_prev(&ip->fragtree, newfrag);
765 
766 		CHFS_MARK_REF_NORMAL(fd->nref);
767 		if (prev->node)
768 			CHFS_MARK_REF_NORMAL(prev->node->nref);
769 	}
770 
771 	/* Check next fragment. */
772 	if ((newfrag->ofs+newfrag->size) & (PAGE_SIZE - 1)) {
773 		struct chfs_node_frag *next = frag_next(&ip->fragtree, newfrag);
774 
775 		if (next) {
776 			CHFS_MARK_REF_NORMAL(fd->nref);
777 			if (next->node)
778 				CHFS_MARK_REF_NORMAL(next->node->nref);
779 		}
780 	}
781 
782 	return 0;
783 }
784 
785 
786 /* chfs_get_data_nodes - get temporary nodes of an inode */
787 int
788 chfs_get_data_nodes(struct chfs_mount *chmp,
789     struct chfs_inode *ip,
790     struct chfs_readinode_info *rii)
791 {
792 	uint32_t crc;
793 	int err;
794 	size_t len, retlen;
795 	struct chfs_node_ref *nref;
796 	struct chfs_flash_data_node *dnode;
797 	struct chfs_tmp_dnode *td;
798 	char* buf;
799 
800 	len = sizeof(struct chfs_flash_data_node);
801 	buf = kmem_alloc(len, KM_SLEEP);
802 
803 	dnode = kmem_alloc(len, KM_SLEEP);
804 	if (!dnode)
805 		return ENOMEM;
806 
807 	nref = chfs_first_valid_data_ref(ip->chvc->dnode);
808 
809 	/* Update highest version. */
810 	rii->highest_version = ip->chvc->highest_version;
811 
812 	while(nref && (struct chfs_vnode_cache *)nref != ip->chvc) {
813 		err = chfs_read_leb(chmp, nref->nref_lnr, buf, CHFS_GET_OFS(nref->nref_offset), len, &retlen);
814 		if (err || len != retlen)
815 			goto out;
816 		dnode = (struct chfs_flash_data_node*)buf;
817 
818 		/* Check header crc. */
819 		crc = crc32(0, (uint8_t *)dnode, CHFS_NODE_HDR_SIZE - 4);
820 		if (crc != le32toh(dnode->hdr_crc)) {
821 			chfs_err("CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->hdr_crc));
822 			goto cont;
823 		}
824 
825 		/* Check header magic bitmask. */
826 		if (le16toh(dnode->magic) != CHFS_FS_MAGIC_BITMASK) {
827 			chfs_err("Wrong magic bitmask.\n");
828 			goto cont;
829 		}
830 
831 		/* Check node crc. */
832 		crc = crc32(0, (uint8_t *)dnode, sizeof(*dnode) - 4);
833 		if (crc != le32toh(dnode->node_crc)) {
834 			chfs_err("Node CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->node_crc));
835 			goto cont;
836 		}
837 
838 		td = chfs_alloc_tmp_dnode();
839 		if (!td) {
840 			chfs_err("Can't allocate tmp dnode info.\n");
841 			err = ENOMEM;
842 			goto out;
843 		}
844 
845 		/* We don't check data crc here, just add nodes to tmp frag tree, because
846 		 * we don't want to check nodes which have been overlapped by a new node
847 		 * with a higher version number.
848 		 */
849 		td->node = chfs_alloc_full_dnode();
850 		if (!td->node) {
851 			chfs_err("Can't allocate full dnode info.\n");
852 			err = ENOMEM;
853 			goto out_tmp_dnode;
854 		}
855 		td->version = le64toh(dnode->version);
856 		td->node->ofs = le64toh(dnode->offset);
857 		td->data_crc = le32toh(dnode->data_crc);
858 		td->node->nref = nref;
859 		td->node->size = le32toh(dnode->data_length);
860 		td->node->frags = 1;
861 		td->overlapped = 0;
862 
863 		if (td->version > rii->highest_version) {
864 			rii->highest_version = td->version;
865 		}
866 
867 		/* Add node to the tree. */
868 		err = chfs_add_tmp_dnode_to_tree(chmp, rii, td);
869 		if (err)
870 			goto out_full_dnode;
871 
872 cont:
873 		nref = chfs_first_valid_data_ref(nref->nref_next);
874 	}
875 
876 	ip->chvc->highest_version = rii->highest_version;
877 	return 0;
878 
879 out_full_dnode:
880 	chfs_free_full_dnode(td->node);
881 out_tmp_dnode:
882 	chfs_free_tmp_dnode(td);
883 out:
884 	kmem_free(buf, len);
885 	kmem_free(dnode, len);
886 	return err;
887 }
888 
889 
890 /* chfs_build_fragtree - builds fragtree from temporary tree */
891 int
892 chfs_build_fragtree(struct chfs_mount *chmp, struct chfs_inode *ip,
893     struct chfs_readinode_info *rii)
894 {
895 	struct chfs_tmp_dnode_info *pen, *last, *this;
896 	struct rb_tree ver_tree;    /* version tree, used only temporary */
897 	uint64_t high_ver = 0;
898 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
899 
900 	rb_tree_init(&ver_tree, &tmp_node_rbtree_ops);
901 
902 	/* Update highest version and latest node reference. */
903 	if (rii->mdata_tn) {
904 		high_ver = rii->mdata_tn->tmpnode->version;
905 		rii->latest_ref = rii->mdata_tn->tmpnode->node->nref;
906 	}
907 
908 	/* Iterate the temporary tree in reverse order. */
909 	pen = (struct chfs_tmp_dnode_info *)RB_TREE_MAX(&rii->tdi_root);
910 
911 	while((last = pen)) {
912 		pen = (struct chfs_tmp_dnode_info *)rb_tree_iterate(&rii->tdi_root, last, RB_DIR_LEFT);
913 
914 		/* We build here a version tree from overlapped nodes. */
915 		rb_tree_remove_node(&rii->tdi_root, last);
916 		rb_tree_insert_node(&ver_tree, last);
917 
918 		if (last->tmpnode->overlapped) {
919 			if (pen)
920 				continue;
921 
922 			last->tmpnode->overlapped = 0;
923 		}
924 
925 		this = (struct chfs_tmp_dnode_info *)RB_TREE_MAX(&ver_tree);
926 
927 		/* Start to build the fragtree. */
928 		while (this) {
929 			struct chfs_tmp_dnode_info *vers_next;
930 			int ret;
931 
932 			vers_next = (struct chfs_tmp_dnode_info *)rb_tree_iterate(&ver_tree, this, RB_DIR_LEFT);
933 			rb_tree_remove_node(&ver_tree, this);
934 
935 			struct chfs_tmp_dnode *tmp_td = this->tmpnode;
936 			while (tmp_td) {
937 				struct chfs_tmp_dnode *next_td = tmp_td->next;
938 
939 				/* Check temporary node. */
940 				if (chfs_check_td_node(chmp, tmp_td)) {
941 					if (next_td) {
942 						chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
943 						chfs_kill_td(chmp, tmp_td);
944 					} else {
945 						break;
946 					}
947 				} else {
948 					if (tmp_td->version > high_ver) {
949 						high_ver = tmp_td->version;
950 						dbg("highver: %llu\n", (unsigned long long)high_ver);
951 						rii->latest_ref = tmp_td->node->nref;
952 					}
953 
954 					/* Add node to inode and its fragtree. */
955 					ret = chfs_add_full_dnode_to_inode(chmp, ip, tmp_td->node);
956 					if (ret) {
957 						/* On error, clean the whole version tree. */
958 						while (1) {
959 							vers_next = (struct chfs_tmp_dnode_info *)rb_tree_iterate(&ver_tree, this, RB_DIR_LEFT);
960 							while (tmp_td) {
961 								next_td = tmp_td->next;
962 
963 								chfs_free_full_dnode(tmp_td->node);
964 								chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
965 								chfs_kill_td(chmp, tmp_td);
966 								tmp_td = next_td;
967 							}
968 							chfs_free_tmp_dnode_info(this);
969 							this = vers_next;
970 							if (!this)
971 								break;
972 							rb_tree_remove_node(&ver_tree, vers_next);
973 							chfs_kill_tdi(chmp, vers_next);
974 						}
975 						return ret;
976 					}
977 
978 					/* Remove temporary node from temporary descriptor.
979 					 * Shouldn't obsolete tmp_td here, because tmp_td->node
980 					 * was added to the inode. */
981 					chfs_remove_tmp_dnode_from_tdi(this, tmp_td);
982 					chfs_free_tmp_dnode(tmp_td);
983 				}
984 				tmp_td = next_td;
985 			}
986 			/* Continue with the previous element of version tree. */
987 			chfs_kill_tdi(chmp, this);
988 			this = vers_next;
989 		}
990 	}
991 
992 	return 0;
993 }
994 
995 /* chfs_read_inode - checks the state of the inode then reads and builds it */
996 int chfs_read_inode(struct chfs_mount *chmp, struct chfs_inode *ip)
997 {
998 	struct chfs_vnode_cache *vc = ip->chvc;
999 
1000 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
1001 
1002 retry:
1003 	mutex_enter(&chmp->chm_lock_vnocache);
1004 	switch (vc->state) {
1005 		case VNO_STATE_UNCHECKED:
1006 			/* FALLTHROUGH */
1007 		case VNO_STATE_CHECKEDABSENT:
1008 			vc->state = VNO_STATE_READING;
1009 			break;
1010 		case VNO_STATE_CHECKING:
1011 			/* FALLTHROUGH */
1012 		case VNO_STATE_GC:
1013 			mutex_exit(&chmp->chm_lock_vnocache);
1014 			goto retry;
1015 			break;
1016 		case VNO_STATE_PRESENT:
1017 			/* FALLTHROUGH */
1018 		case VNO_STATE_READING:
1019 			chfs_err("Reading inode #%llu in state %d!\n",
1020 				(unsigned long long)vc->vno, vc->state);
1021 			chfs_err("wants to read a nonexistent ino %llu\n",
1022 				(unsigned long long)vc->vno);
1023 			return ENOENT;
1024 		default:
1025 			panic("BUG() Bad vno cache state.");
1026 	}
1027 	mutex_exit(&chmp->chm_lock_vnocache);
1028 
1029 	return chfs_read_inode_internal(chmp, ip);
1030 }
1031 
1032 /*
1033  * chfs_read_inode_internal - reads and builds an inode
1034  * Firstly get temporary nodes then build fragtree.
1035  */
1036 int
1037 chfs_read_inode_internal(struct chfs_mount *chmp, struct chfs_inode *ip)
1038 {
1039 	int err;
1040 	size_t len, retlen;
1041 	char* buf;
1042 	struct chfs_readinode_info rii;
1043 	struct chfs_flash_vnode *fvnode;
1044 
1045 	KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
1046 
1047 	len = sizeof(*fvnode);
1048 
1049 	memset(&rii, 0, sizeof(rii));
1050 
1051 	rb_tree_init(&rii.tdi_root, &tmp_node_rbtree_ops);
1052 
1053 	/* Build a temporary node tree. */
1054 	err = chfs_get_data_nodes(chmp, ip, &rii);
1055 	if (err) {
1056 		if (ip->chvc->state == VNO_STATE_READING)
1057 			ip->chvc->state = VNO_STATE_CHECKEDABSENT;
1058 		/* FIXME Should we kill fragtree or something here? */
1059 		return err;
1060 	}
1061 
1062 	/* Build fragtree from temp nodes. */
1063 	rb_tree_init(&ip->fragtree, &frag_rbtree_ops);
1064 
1065 	err = chfs_build_fragtree(chmp, ip, &rii);
1066 	if (err) {
1067 		if (ip->chvc->state == VNO_STATE_READING)
1068 			ip->chvc->state = VNO_STATE_CHECKEDABSENT;
1069 		/* FIXME Should we kill fragtree or something here? */
1070 		return err;
1071 	}
1072 
1073 	if (!rii.latest_ref) {
1074 		return 0;
1075 	}
1076 
1077 	buf = kmem_alloc(len, KM_SLEEP);
1078 	if (!buf)
1079 		return ENOMEM;
1080 
1081 	/* Set inode size from its vnode information node. */
1082 	err = chfs_read_leb(chmp, ip->chvc->v->nref_lnr, buf, CHFS_GET_OFS(ip->chvc->v->nref_offset), len, &retlen);
1083 	if (err || retlen != len) {
1084 		kmem_free(buf, len);
1085 		return err?err:EIO;
1086 	}
1087 
1088 	fvnode = (struct chfs_flash_vnode*)buf;
1089 
1090 	dbg("set size from v: %u\n", fvnode->dn_size);
1091 	chfs_set_vnode_size(ITOV(ip), fvnode->dn_size);
1092 	uint32_t retsize = chfs_truncate_fragtree(chmp, &ip->fragtree, fvnode->dn_size);
1093 	if (retsize != fvnode->dn_size) {
1094 		dbg("Truncating failed. It is %u instead of %u\n", retsize, fvnode->dn_size);
1095 	}
1096 
1097 	kmem_free(buf, len);
1098 
1099 	if (ip->chvc->state == VNO_STATE_READING) {
1100 		ip->chvc->state = VNO_STATE_PRESENT;
1101 	}
1102 
1103 	return 0;
1104 }
1105 
1106 /* chfs_read_data - reads and checks data of a file */
1107 int
1108 chfs_read_data(struct chfs_mount* chmp, struct vnode *vp,
1109     struct buf *bp)
1110 {
1111 	off_t ofs;
1112 	struct chfs_node_frag *frag;
1113 	char * buf;
1114 	int err = 0;
1115 	size_t size, retlen;
1116 	uint32_t crc;
1117 	struct chfs_inode *ip = VTOI(vp);
1118 	struct chfs_flash_data_node *dnode;
1119 	struct chfs_node_ref *nref;
1120 
1121 	memset(bp->b_data, 0, bp->b_bcount);
1122 
1123 	/* Calculate the size of the file from its fragtree. */
1124 	ofs = bp->b_blkno * PAGE_SIZE;
1125 	frag = (struct chfs_node_frag *)rb_tree_find_node_leq(&ip->fragtree, &ofs);
1126 
1127 	if (!frag || frag->ofs > ofs || frag->ofs + frag->size <= ofs) {
1128 		bp->b_resid = 0;
1129 		dbg("not found in frag tree\n");
1130 		return 0;
1131 	}
1132 
1133 	if (!frag->node) {
1134 		dbg("no node in frag\n");
1135 		return 0;
1136 	}
1137 
1138 	nref = frag->node->nref;
1139 	size = sizeof(*dnode) + frag->size;
1140 
1141 	buf = kmem_alloc(size, KM_SLEEP);
1142 
1143 	/* Read node from flash. */
1144 	dbg("reading from lnr: %u, offset: %u, size: %zu\n", nref->nref_lnr, CHFS_GET_OFS(nref->nref_offset), size);
1145 	err = chfs_read_leb(chmp, nref->nref_lnr, buf, CHFS_GET_OFS(nref->nref_offset), size, &retlen);
1146 	if (err) {
1147 		chfs_err("error after reading: %d\n", err);
1148 		goto out;
1149 	}
1150 	if (retlen != size) {
1151 		chfs_err("retlen: %zu != size: %zu\n", retlen, size);
1152 		err = EIO;
1153 		goto out;
1154 	}
1155 
1156 	/* Read data from flash. */
1157 	dnode = (struct chfs_flash_data_node *)buf;
1158 	crc = crc32(0, (uint8_t *)dnode, CHFS_NODE_HDR_SIZE - 4);
1159 	if (crc != le32toh(dnode->hdr_crc)) {
1160 		chfs_err("CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->hdr_crc));
1161 		err = EIO;
1162 		goto out;
1163 	}
1164 
1165 	/* Check header magic bitmask. */
1166 	if (le16toh(dnode->magic) != CHFS_FS_MAGIC_BITMASK) {
1167 		chfs_err("Wrong magic bitmask.\n");
1168 		err = EIO;
1169 		goto out;
1170 	}
1171 
1172 	/* Check crc of node. */
1173 	crc = crc32(0, (uint8_t *)dnode, sizeof(*dnode) - 4);
1174 	if (crc != le32toh(dnode->node_crc)) {
1175 		chfs_err("Node CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->node_crc));
1176 		err = EIO;
1177 		goto out;
1178 	}
1179 
1180 	/* Check crc of data. */
1181 	crc = crc32(0, (uint8_t *)dnode->data, dnode->data_length);
1182 	if (crc != le32toh(dnode->data_crc)) {
1183 		chfs_err("Data CRC check failed. calc: 0x%x orig: 0x%x\n", crc, le32toh(dnode->data_crc));
1184 		err = EIO;
1185 		goto out;
1186 	}
1187 
1188 	memcpy(bp->b_data, dnode->data, dnode->data_length);
1189 	bp->b_resid = 0;
1190 
1191 out:
1192 	kmem_free(buf, size);
1193 	return err;
1194 }
1195