xref: /plan9-contrib/sys/src/cmd/fossil/source.c (revision 4dc626cd82534e522e4a6a32aec8b7d6f806e477)
1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
4 #include "error.h"
5 
6 static int	sizeToDepth(uvlong s, int psize, int dsize);
7 static u32int 	tagGen(void);
8 static Block 	*sourceLoad(Source *r, Entry *e);
9 static Block	*sourceLoadUnlocked(Source *r, Entry *e);
10 static int	sourceShrinkDepth(Source*, Block*, Entry*, int);
11 static int	sourceShrinkSize(Source*, Entry*, uvlong);
12 static int	sourceGrowDepth(Source*, Block*, Entry*, int);
13 
14 #define sourceIsLocked(r)	((r)->b != nil)
15 
16 static Source *
17 sourceAlloc(Fs *fs, Block *b, Source *p, u32int offset, int mode, int issnapshot)
18 {
19 	Source *r;
20 	int epb;
21 	u32int epoch;
22 	Entry e;
23 
24 	assert(p==nil || sourceIsLocked(p));
25 
26 	if(p == nil){
27 		assert(offset == 0);
28 		epb = 1;
29 	}else
30 		epb = p->dsize / VtEntrySize;
31 
32 	if(b->l.type != BtDir)
33 		goto Bad;
34 
35 	/*
36 	 * a non-active entry is the only thing that
37 	 * can legitimately happen here. all the others
38 	 * get prints.
39 	 */
40 	if(!entryUnpack(&e, b->data, offset % epb)){
41 		fprint(2, "entryUnpack failed\n");
42 		goto Bad;
43 	}
44 	if(!(e.flags & VtEntryActive)){
45 		if(0)fprint(2, "not active\n");
46 		goto Bad;
47 	}
48 	if(e.psize < 256 || e.dsize < 256){
49 		fprint(2, "psize %ud dsize %ud\n", e.psize, e.dsize);
50 		goto Bad;
51 	}
52 
53 	if(e.depth < sizeToDepth(e.size, e.psize, e.dsize)){
54 		fprint(2, "depth %ud size %llud psize %ud dsize %ud\n", e.depth, e.size, e.psize, e.dsize);
55 		goto Bad;
56 	}
57 
58 	if((e.flags & VtEntryLocal) && e.tag == 0){
59 		fprint(2, "flags %#ux tag %#ux\n", e.flags, e.tag);
60 		goto Bad;
61 	}
62 
63 	if(e.dsize > fs->blockSize || e.psize > fs->blockSize){
64 		fprint(2, "psize %ud dsize %ud blocksize %ud\n", e.psize, e.dsize, fs->blockSize);
65 		goto Bad;
66 	}
67 
68 	epoch = b->l.epoch;
69 	if(mode == OReadWrite){
70 		if(e.snap != 0){
71 			vtSetError(ESnapRO);
72 			return nil;
73 		}
74 	}else if(e.snap != 0){
75 		if(e.snap < fs->elo){
76 			vtSetError(ESnapOld);
77 			return nil;
78 		}
79 		if(e.snap >= fs->ehi)
80 			goto Bad;
81 		epoch = e.snap;
82 	}
83 
84 	r = vtMemAllocZ(sizeof(Source));
85 	r->fs = fs;
86 	r->mode = mode;
87 	r->issnapshot = issnapshot;
88 	r->dsize = e.dsize;
89 	r->gen = e.gen;
90 	r->dir = (e.flags & VtEntryDir) != 0;
91 	r->lk = vtLockAlloc();
92 	r->ref = 1;
93 	r->parent = p;
94 	if(p){
95 		vtLock(p->lk);
96 		assert(mode == OReadOnly || p->mode == OReadWrite);
97 		p->ref++;
98 		vtUnlock(p->lk);
99 	}
100 	r->epoch = epoch;
101 //fprint(2, "sourceAlloc have %V be.%d fse.%d %s\n", b->score, b->l.epoch, r->fs->ehi, mode==OReadWrite ? "rw" : "ro");
102 	memmove(r->score, b->score, VtScoreSize);
103 	r->scoreEpoch = b->l.epoch;
104 	r->offset = offset;
105 	r->epb = epb;
106 	r->tag = b->l.tag;
107 
108 //fprint(2, "sourceAlloc: %p -> %V %d\n", r, r->score, r->offset);
109 
110 	return r;
111 Bad:
112 	vtSetError(EBadEntry);
113 	return nil;
114 
115 }
116 
117 Source *
118 sourceRoot(Fs *fs, u32int addr, int mode)
119 {
120 	Source *r;
121 	Block *b;
122 
123 	b = cacheLocalData(fs->cache, addr, BtDir, RootTag, mode, 0);
124 	if(b == nil)
125 		return nil;
126 
127 	if(mode == OReadWrite && b->l.epoch != fs->ehi){
128 		fprint(2, "sourceRoot: fs->ehi = %ud, b->l = %L\n", fs->ehi, &b->l);
129 		blockPut(b);
130 		vtSetError(EBadRoot);
131 		return nil;
132 	}
133 
134 	r = sourceAlloc(fs, b, nil, 0, mode, 0);
135 	blockPut(b);
136 	return r;
137 }
138 
139 Source *
140 sourceOpen(Source *r, ulong offset, int mode, int issnapshot)
141 {
142 	ulong bn;
143 	Block *b;
144 
145 	assert(sourceIsLocked(r));
146 	if(r->mode == OReadWrite)
147 		assert(r->epoch == r->b->l.epoch);
148 	if(!r->dir){
149 		vtSetError(ENotDir);
150 		return nil;
151 	}
152 
153 	bn = offset/(r->dsize/VtEntrySize);
154 
155 	b = sourceBlock(r, bn, mode);
156 	if(b == nil)
157 		return nil;
158 	r = sourceAlloc(r->fs, b, r, offset, mode, issnapshot);
159 	blockPut(b);
160 	return r;
161 }
162 
163 Source *
164 sourceCreate(Source *r, int dsize, int dir, u32int offset)
165 {
166 	int i;
167 	Block *b;
168 	u32int bn, size;
169 	Entry e;
170 	int epb;
171 	int psize;
172 	Source *rr;
173 
174 	assert(sourceIsLocked(r));
175 
176 	if(!r->dir){
177 		vtSetError(ENotDir);
178 		return nil;
179 	}
180 
181 	epb = r->dsize/VtEntrySize;
182 	psize = (dsize/VtScoreSize)*VtScoreSize;
183 
184 	size = sourceGetDirSize(r);
185 	if(offset == 0){
186 		/*
187 		 * look at a random block to see if we can find an empty entry
188 		 */
189 		offset = lnrand(size+1);
190 		offset -= offset % epb;
191 	}
192 
193 	/* try the given block and then try the last block */
194 	for(;;){
195 		bn = offset/epb;
196 		b = sourceBlock(r, bn, OReadWrite);
197 		if(b == nil)
198 			return nil;
199 		for(i=offset%r->epb; i<epb; i++){
200 			entryUnpack(&e, b->data, i);
201 			if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
202 				goto Found;
203 		}
204 		blockPut(b);
205 		if(offset == size){
206 			fprint(2, "sourceCreate: cannot happen\n");
207 			vtSetError("sourceCreate: cannot happen");
208 			return nil;
209 		}
210 		offset = size;
211 	}
212 
213 Found:
214 	/* found an entry - gen already set */
215 	e.psize = psize;
216 	e.dsize = dsize;
217 	assert(psize && dsize);
218 	e.flags = VtEntryActive;
219 	if(dir)
220 		e.flags |= VtEntryDir;
221 	e.depth = 0;
222 	e.size = 0;
223 	memmove(e.score, vtZeroScore, VtScoreSize);
224 	e.tag = 0;
225 	e.snap = 0;
226 	e.archive = 0;
227 	entryPack(&e, b->data, i);
228 	blockDirty(b);
229 
230 	offset = bn*epb + i;
231 	if(offset+1 > size){
232 		if(!sourceSetDirSize(r, offset+1)){
233 			blockPut(b);
234 			return nil;
235 		}
236 	}
237 
238 	rr = sourceAlloc(r->fs, b, r, offset, OReadWrite, 0);
239 	blockPut(b);
240 	return rr;
241 }
242 
243 static int
244 sourceKill(Source *r, int doremove)
245 {
246 	Entry e;
247 	Block *b;
248 	u32int addr;
249 	u32int tag;
250 	int type;
251 
252 	assert(sourceIsLocked(r));
253 	b = sourceLoad(r, &e);
254 	if(b == nil)
255 		return 0;
256 
257 	assert(b->l.epoch == r->fs->ehi);
258 
259 	if(doremove==0 && e.size == 0){
260 		/* already truncated */
261 		blockPut(b);
262 		return 1;
263 	}
264 
265 	/* remember info on link we are removing */
266 	addr = globalToLocal(e.score);
267 	type = entryType(&e);
268 	tag = e.tag;
269 
270 	if(doremove){
271 		if(e.gen != ~0)
272 			e.gen++;
273 		e.dsize = 0;
274 		e.psize = 0;
275 		e.flags = 0;
276 	}else{
277 		e.flags &= ~VtEntryLocal;
278 	}
279 	e.depth = 0;
280 	e.size = 0;
281 	e.tag = 0;
282 	memmove(e.score, vtZeroScore, VtScoreSize);
283 	entryPack(&e, b->data, r->offset % r->epb);
284 	blockDirty(b);
285 	if(addr != NilBlock)
286 		blockRemoveLink(b, addr, type, tag, 1);
287 	blockPut(b);
288 
289 	if(doremove){
290 		sourceUnlock(r);
291 		sourceClose(r);
292 	}
293 
294 	return 1;
295 }
296 
297 int
298 sourceRemove(Source *r)
299 {
300 	return sourceKill(r, 1);
301 }
302 
303 int
304 sourceTruncate(Source *r)
305 {
306 	return sourceKill(r, 0);
307 }
308 
309 uvlong
310 sourceGetSize(Source *r)
311 {
312 	Entry e;
313 	Block *b;
314 
315 	assert(sourceIsLocked(r));
316 	b = sourceLoad(r, &e);
317 	if(b == nil)
318 		return 0;
319 	blockPut(b);
320 
321 	return e.size;
322 }
323 
324 static int
325 sourceShrinkSize(Source *r, Entry *e, uvlong size)
326 {
327 	int i, type, ppb;
328 	uvlong ptrsz;
329 	u32int addr;
330 	uchar score[VtScoreSize];
331 	Block *b;
332 
333 	type = entryType(e);
334 	b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
335 	if(b == nil)
336 		return 0;
337 
338 	ptrsz = e->dsize;
339 	ppb = e->psize/VtScoreSize;
340 	for(i=0; i+1<e->depth; i++)
341 		ptrsz *= ppb;
342 
343 	while(type&BtLevelMask){
344 		if(b->addr == NilBlock || b->l.epoch != r->fs->ehi){
345 			/* not worth copying the block just so we can zero some of it */
346 			blockPut(b);
347 			return 0;
348 		}
349 
350 		/*
351 		 * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
352 		 */
353 
354 		/* zero the pointers to unnecessary blocks */
355 		i = (size+ptrsz-1)/ptrsz;
356 		for(; i<ppb; i++){
357 			addr = globalToLocal(b->data+i*VtScoreSize);
358 			memmove(b->data+i*VtScoreSize, vtZeroScore, VtScoreSize);
359 			blockDirty(b);
360 			if(addr != NilBlock)
361 				blockRemoveLink(b, addr, type-1, e->tag, 1);
362 		}
363 
364 		/* recurse (go around again) on the partially necessary block */
365 		i = size/ptrsz;
366 		size = size%ptrsz;
367 		if(size == 0){
368 			blockPut(b);
369 			return 1;
370 		}
371 		ptrsz /= ppb;
372 		type--;
373 		memmove(score, b->data+i*VtScoreSize, VtScoreSize);
374 		blockPut(b);
375 		b = cacheGlobal(r->fs->cache, score, type, e->tag, OReadWrite);
376 		if(b == nil)
377 			return 0;
378 	}
379 
380 	if(b->addr == NilBlock || b->l.epoch != r->fs->ehi){
381 		blockPut(b);
382 		return 0;
383 	}
384 
385 	/*
386 	 * No one ever truncates BtDir blocks.
387 	 */
388 	if(type == BtData && e->dsize > size){
389 		memset(b->data+size, 0, e->dsize-size);
390 		blockDirty(b);
391 	}
392 	blockPut(b);
393 	return 1;
394 }
395 
396 int
397 sourceSetSize(Source *r, uvlong size)
398 {
399 	int depth;
400 	Entry e;
401 	Block *b;
402 
403 	assert(sourceIsLocked(r));
404 	if(size == 0)
405 		return sourceTruncate(r);
406 
407 	if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
408 		vtSetError(ETooBig);
409 		return 0;
410 	}
411 
412 	b = sourceLoad(r, &e);
413 	if(b == nil)
414 		return 0;
415 
416 	/* quick out */
417 	if(e.size == size){
418 		blockPut(b);
419 		return 1;
420 	}
421 
422 	depth = sizeToDepth(size, e.psize, e.dsize);
423 
424 	if(depth < e.depth){
425 		if(!sourceShrinkDepth(r, b, &e, depth)){
426 			blockPut(b);
427 			return 0;
428 		}
429 	}else if(depth > e.depth){
430 		if(!sourceGrowDepth(r, b, &e, depth)){
431 			blockPut(b);
432 			return 0;
433 		}
434 	}
435 
436 	if(size < e.size)
437 		sourceShrinkSize(r, &e, size);
438 
439 	e.size = size;
440 	entryPack(&e, b->data, r->offset % r->epb);
441 	blockDirty(b);
442 	blockPut(b);
443 
444 	return 1;
445 }
446 
447 int
448 sourceSetDirSize(Source *r, ulong ds)
449 {
450 	uvlong size;
451 	int epb;
452 
453 	assert(sourceIsLocked(r));
454 	epb = r->dsize/VtEntrySize;
455 
456 	size = (uvlong)r->dsize*(ds/epb);
457 	size += VtEntrySize*(ds%epb);
458 	return sourceSetSize(r, size);
459 }
460 
461 ulong
462 sourceGetDirSize(Source *r)
463 {
464 	ulong ds;
465 	uvlong size;
466 	int epb;
467 
468 	assert(sourceIsLocked(r));
469 	epb = r->dsize/VtEntrySize;
470 
471 	size = sourceGetSize(r);
472 	ds = epb*(size/r->dsize);
473 	ds += (size%r->dsize)/VtEntrySize;
474 	return ds;
475 }
476 
477 int
478 sourceGetEntry(Source *r, Entry *e)
479 {
480 	Block *b;
481 
482 	assert(sourceIsLocked(r));
483 	b = sourceLoad(r, e);
484 	if(b == nil)
485 		return 0;
486 	blockPut(b);
487 
488 	return 1;
489 }
490 
491 /*
492  * Must be careful with this.  Doesn't record
493  * dependencies, so don't introduce any!
494  */
495 int
496 sourceSetEntry(Source *r, Entry *e)
497 {
498 	Block *b;
499 	Entry oe;
500 
501 	assert(sourceIsLocked(r));
502 	b = sourceLoad(r, &oe);
503 	if(b == nil)
504 		return 0;
505 	entryPack(e, b->data, r->offset%r->epb);
506 	blockDirty(b);
507 	blockPut(b);
508 
509 	return 1;
510 }
511 
512 static Block *
513 blockWalk(Block *p, int index, int mode, Fs *fs, Entry *e)
514 {
515 	Block *b;
516 	Cache *c;
517 	u32int addr;
518 	int type;
519 	uchar oscore[VtScoreSize], score[VtScoreSize];
520 	Entry oe;
521 
522 	c = fs->cache;
523 
524 	if((p->l.type & BtLevelMask) == 0){
525 		assert(p->l.type == BtDir);
526 		type = entryType(e);
527 		b = cacheGlobal(c, e->score, type, e->tag, mode);
528 	}else{
529 		type = p->l.type - 1;
530 		b = cacheGlobal(c, p->data + index*VtScoreSize, type, e->tag, mode);
531 	}
532 
533 	if(b)
534 		b->pc = getcallerpc(&p);
535 
536 	if(b == nil || mode == OReadOnly)
537 		return b;
538 
539 	if(p->l.epoch != fs->ehi){
540 		fprint(2, "blockWalk: parent not writable\n");
541 		abort();
542 	}
543 	if(b->l.epoch == fs->ehi)
544 		return b;
545 
546 	oe = *e;
547 
548 	/*
549 	 * Copy on write.
550 	 */
551 	if(e->tag == 0){
552 		assert(p->l.type == BtDir);
553 		e->tag = tagGen();
554 		e->flags |= VtEntryLocal;
555 	}
556 
557 	addr = b->addr;
558 	b = blockCopy(b, e->tag, fs->ehi, fs->elo);
559 	if(b == nil)
560 		return nil;
561 
562 	b->pc = getcallerpc(&p);
563 	assert(b->l.epoch == fs->ehi);
564 
565 	blockDirty(b);
566 	memmove(score, b->score, VtScoreSize);
567 	if(p->l.type == BtDir){
568 		memmove(e->score, b->score, VtScoreSize);
569 		entryPack(e, p->data, index);
570 		blockDependency(p, b, index, nil, &oe);
571 	}else{
572 		memmove(oscore, p->data+index*VtScoreSize, VtScoreSize);
573 		memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
574 		blockDependency(p, b, index, oscore, nil);
575 	}
576 	blockDirty(p);
577 
578 	if(addr != NilBlock)
579 		blockRemoveLink(p, addr, type, e->tag, 0);
580 
581 	return b;
582 }
583 
584 /*
585  * Change the depth of the source r.
586  * The entry e for r is contained in block p.
587  */
588 static int
589 sourceGrowDepth(Source *r, Block *p, Entry *e, int depth)
590 {
591 	Block *b, *bb;
592 	u32int tag;
593 	int type;
594 	Entry oe;
595 
596 	assert(sourceIsLocked(r));
597 	assert(depth <= VtPointerDepth);
598 
599 	type = entryType(e);
600 	b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
601 	if(b == nil)
602 		return 0;
603 
604 	tag = e->tag;
605 	if(tag == 0)
606 		tag = tagGen();
607 
608 	oe = *e;
609 
610 	/*
611 	 * Keep adding layers until we get to the right depth
612 	 * or an error occurs.
613 	 */
614 	while(e->depth < depth){
615 		bb = cacheAllocBlock(r->fs->cache, type+1, tag, r->fs->ehi, r->fs->elo);
616 		if(bb == nil)
617 			break;
618 //fprint(2, "alloc %lux grow %V\n", bb->addr, b->score);
619 		memmove(bb->data, b->score, VtScoreSize);
620 		memmove(e->score, bb->score, VtScoreSize);
621 		e->depth++;
622 		type++;
623 		e->tag = tag;
624 		e->flags |= VtEntryLocal;
625 		blockDependency(bb, b, 0, vtZeroScore, nil);
626 		blockPut(b);
627 		b = bb;
628 		blockDirty(b);
629 	}
630 
631 	entryPack(e, p->data, r->offset % r->epb);
632 	blockDependency(p, b, r->offset % r->epb, nil, &oe);
633 	blockPut(b);
634 	blockDirty(p);
635 
636 	return e->depth == depth;
637 }
638 
639 static int
640 sourceShrinkDepth(Source *r, Block *p, Entry *e, int depth)
641 {
642 	Block *b, *nb, *ob, *rb;
643 	u32int tag;
644 	int type, d;
645 	Entry oe;
646 
647 	assert(sourceIsLocked(r));
648 	assert(depth <= VtPointerDepth);
649 
650 	type = entryType(e);
651 	rb = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
652 	if(rb == nil)
653 		return 0;
654 
655 	tag = e->tag;
656 	if(tag == 0)
657 		tag = tagGen();
658 
659 	/*
660 	 * Walk down to the new root block.
661 	 * We may stop early, but something is better than nothing.
662 	 */
663 	oe = *e;
664 
665 	ob = nil;
666 	b = rb;
667 /* BUG: explain type++.  i think it is a real bug */
668 	for(d=e->depth; d > depth; d--, type++){
669 		nb = cacheGlobal(r->fs->cache, b->data, type-1, tag, OReadWrite);
670 		if(nb == nil)
671 			break;
672 		if(ob!=nil && ob!=rb)
673 			blockPut(ob);
674 		ob = b;
675 		b = nb;
676 	}
677 
678 	if(b == rb){
679 		blockPut(rb);
680 		return 0;
681 	}
682 
683 	/*
684 	 * Right now, e points at the root block rb, b is the new root block,
685 	 * and ob points at b.  To update:
686 	 *
687 	 *	(i) change e to point at b
688 	 *	(ii) zero the pointer ob -> b
689 	 *	(iii) free the root block
690 	 *
691 	 * p (the block containing e) must be written before
692 	 * anything else.
693  	 */
694 
695 	/* (i) */
696 	e->depth = d;
697 	/* might have been local and now global; reverse cannot happen */
698 	if(globalToLocal(b->score) == NilBlock)
699 		e->flags &= ~VtEntryLocal;
700 	memmove(e->score, b->score, VtScoreSize);
701 	entryPack(e, p->data, r->offset % r->epb);
702 	blockDependency(p, b, r->offset % r->epb, nil, &oe);
703 	blockDirty(p);
704 
705 	/* (ii) */
706 	memmove(ob->data, vtZeroScore, VtScoreSize);
707 	blockDependency(ob, p, 0, b->score, nil);
708 	blockDirty(ob);
709 
710 	/* (iii) */
711 	if(rb->addr != NilBlock)
712 		blockRemoveLink(p, rb->addr, rb->l.type, rb->l.tag, 1);
713 
714 	blockPut(rb);
715 	if(ob!=nil && ob!=rb)
716 		blockPut(ob);
717 	blockPut(b);
718 
719 	return d == depth;
720 }
721 
722 /*
723  * Normally we return the block at the given number.
724  * If early is set, we stop earlier in the tree.  Setting early
725  * to 1 gives us the block that contains the pointer to bn.
726  */
727 Block *
728 _sourceBlock(Source *r, ulong bn, int mode, int early, ulong tag)
729 {
730 	Block *b, *bb;
731 	int index[VtPointerDepth+1];
732 	Entry e;
733 	int i, np;
734 	int m;
735 
736 	assert(sourceIsLocked(r));
737 	assert(bn != NilBlock);
738 
739 	/* mode for intermediate block */
740 	m = mode;
741 	if(m == OOverWrite)
742 		m = OReadWrite;
743 
744 	b = sourceLoad(r, &e);
745 	if(b == nil)
746 		return nil;
747 	if(r->issnapshot && (e.flags & VtEntryNoArchive)){
748 		blockPut(b);
749 		vtSetError(ENotArchived);
750 		return nil;
751 	}
752 
753 	if(tag){
754 		if(e.tag == 0)
755 			e.tag = tag;
756 		else if(e.tag != tag){
757 			fprint(2, "tag mismatch\n");
758 			vtSetError("tag mismatch");
759 			goto Err;
760 		}
761 	}
762 
763 	np = e.psize/VtScoreSize;
764 	memset(index, 0, sizeof(index));
765 	for(i=0; bn > 0; i++){
766 		if(i >= VtPointerDepth){
767 			vtSetError(EBadAddr);
768 			goto Err;
769 		}
770 		index[i] = bn % np;
771 		bn /= np;
772 	}
773 
774 	if(i > e.depth){
775 		if(mode == OReadOnly){
776 			vtSetError(EBadAddr);
777 			goto Err;
778 		}
779 		if(!sourceGrowDepth(r, b, &e, i))
780 			goto Err;
781 	}
782 
783 	index[e.depth] = r->offset % r->epb;
784 
785 	for(i=e.depth; i>=early; i--){
786 		bb = blockWalk(b, index[i], m, r->fs, &e);
787 		if(bb == nil)
788 			goto Err;
789 		blockPut(b);
790 		b = bb;
791 	}
792 	b->pc = getcallerpc(&r);
793 	return b;
794 Err:
795 	blockPut(b);
796 	return nil;
797 }
798 
799 Block*
800 sourceBlock(Source *r, ulong bn, int mode)
801 {
802 	Block *b;
803 
804 	b = _sourceBlock(r, bn, mode, 0, 0);
805 	if(b)
806 		b->pc = getcallerpc(&r);
807 	return b;
808 }
809 
810 void
811 sourceClose(Source *r)
812 {
813 	if(r == nil)
814 		return;
815 	vtLock(r->lk);
816 	r->ref--;
817 	if(r->ref){
818 		vtUnlock(r->lk);
819 		return;
820 	}
821 	assert(r->ref == 0);
822 	vtUnlock(r->lk);
823 	if(r->parent)
824 		sourceClose(r->parent);
825 	vtLockFree(r->lk);
826 	memset(r, ~0, sizeof(*r));
827 	vtMemFree(r);
828 }
829 
830 /*
831  * Retrieve the block containing the entry for r.
832  * If a snapshot has happened, we might need
833  * to get a new copy of the block.  We avoid this
834  * in the common case by caching the score for
835  * the block and the last epoch in which it was valid.
836  *
837  * We use r->mode to tell the difference between active
838  * file system sources (OReadWrite) and sources for the
839  * snapshot file system (OReadOnly).
840  */
841 static Block*
842 sourceLoadBlock(Source *r, int mode)
843 {
844 	u32int addr;
845 	Block *b;
846 
847 	switch(r->mode){
848 	default:
849 		assert(0);
850 	case OReadWrite:
851 		assert(r->mode == OReadWrite);
852 		/*
853 		 * This needn't be true -- we might bump the low epoch
854 		 * to reclaim some old blocks, but since this score is
855 		 * OReadWrite, the blocks must all still be open, so none
856 		 * are reclaimed.  Thus it's okay that the epoch is so low.
857 		 * Proceed.
858 		assert(r->epoch >= r->fs->elo);
859 		 */
860 		if(r->epoch == r->fs->ehi){
861 			b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite);
862 			if(b == nil)
863 				return nil;
864 			assert(r->epoch == b->l.epoch);
865 			return b;
866 		}
867 		assert(r->parent != nil);
868 		if(!sourceLock(r->parent, OReadWrite))
869 			return nil;
870 		b = sourceBlock(r->parent, r->offset/r->epb, OReadWrite);
871 		sourceUnlock(r->parent);
872 		if(b == nil)
873 			return nil;
874 		assert(b->l.epoch == r->fs->ehi);
875 	//	fprint(2, "sourceLoadBlock %p %V => %V\n", r, r->score, b->score);
876 		memmove(r->score, b->score, VtScoreSize);
877 		r->scoreEpoch = b->l.epoch;
878 		r->tag = b->l.tag;
879 		r->epoch = r->fs->ehi;
880 		return b;
881 
882 	case OReadOnly:
883 		addr = globalToLocal(r->score);
884 		if(addr == NilBlock)
885 			return cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, mode);
886 
887 		b = cacheLocalData(r->fs->cache, addr, BtDir, r->tag, mode, r->scoreEpoch);
888 		if(b)
889 			return b;
890 
891 		/*
892 		 * If it failed because the epochs don't match, the block has been
893 		 * archived and reclaimed.  Rewalk from the parent and get the
894 		 * new pointer.  This can't happen in the OReadWrite case
895 		 * above because blocks in the current epoch don't get
896 		 * reclaimed.  The fact that we're OReadOnly means we're
897 		 * a snapshot.  (Or else the file system is read-only, but then
898 		 * the archiver isn't going around deleting blocks.)
899 		 */
900 		if(strcmp(vtGetError(), ELabelMismatch) == 0){
901 			if(!sourceLock(r->parent, OReadOnly))
902 				return nil;
903 			b = sourceBlock(r->parent, r->offset/r->epb, OReadOnly);
904 			sourceUnlock(r->parent);
905 			if(b){
906 				fprint(2, "sourceAlloc: lost %V found %V\n",
907 					r->score, b->score);
908 				memmove(r->score, b->score, VtScoreSize);
909 				r->scoreEpoch = b->l.epoch;
910 				return b;
911 			}
912 		}
913 		return nil;
914 	}
915 }
916 
917 int
918 sourceLock(Source *r, int mode)
919 {
920 	Block *b;
921 
922 	if(mode == -1)
923 		mode = r->mode;
924 
925 	b = sourceLoadBlock(r, mode);
926 	if(b == nil)
927 		return 0;
928 	/*
929 	 * The fact that we are holding b serves as the
930 	 * lock entitling us to write to r->b.
931 	 */
932 	assert(r->b == nil);
933 	r->b = b;
934 	if(r->mode == OReadWrite)
935 		assert(r->epoch == r->b->l.epoch);
936 	return 1;
937 }
938 
939 /*
940  * Lock two (usually sibling) sources.  This needs special care
941  * because the Entries for both sources might be in the same block.
942  * We also try to lock blocks in left-to-right order within the tree.
943  */
944 int
945 sourceLock2(Source *r, Source *rr, int mode)
946 {
947 	Block *b, *bb;
948 
949 	if(rr == nil)
950 		return sourceLock(r, mode);
951 
952 	if(mode == -1)
953 		mode = r->mode;
954 
955 	if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
956 		b = sourceLoadBlock(r, mode);
957 		if(b == nil)
958 			return 0;
959 		if(memcmp(r->score, rr->score, VtScoreSize) != 0){
960 			memmove(rr->score, b->score, VtScoreSize);
961 			rr->scoreEpoch = b->l.epoch;
962 			rr->tag = b->l.tag;
963 			rr->epoch = rr->fs->ehi;
964 		}
965 		blockDupLock(b);
966 		bb = b;
967 	}else if(r->parent==rr->parent || r->offset > rr->offset){
968 		bb = sourceLoadBlock(rr, mode);
969 		b = sourceLoadBlock(r, mode);
970 	}else{
971 		b = sourceLoadBlock(r, mode);
972 		bb = sourceLoadBlock(rr, mode);
973 	}
974 	if(b == nil || bb == nil){
975 		if(b)
976 			blockPut(b);
977 		if(bb)
978 			blockPut(bb);
979 		return 0;
980 	}
981 
982 	/*
983 	 * The fact that we are holding b and bb serves
984 	 * as the lock entitling us to write to r->b and rr->b.
985 	 */
986 	r->b = b;
987 	rr->b = bb;
988 	return 1;
989 }
990 
991 void
992 sourceUnlock(Source *r)
993 {
994 	Block *b;
995 
996 	if(r->b == nil){
997 		fprint(2, "sourceUnlock: already unlocked\n");
998 		abort();
999 	}
1000 	b = r->b;
1001 	r->b = nil;
1002 	blockPut(b);
1003 }
1004 
1005 static Block*
1006 sourceLoad(Source *r, Entry *e)
1007 {
1008 	Block *b;
1009 
1010 	assert(sourceIsLocked(r));
1011 	b = r->b;
1012 	if(!entryUnpack(e, b->data, r->offset % r->epb))
1013 		return nil;
1014 	if(e->gen != r->gen){
1015 		vtSetError(ERemoved);
1016 		return nil;
1017 	}
1018 	blockDupLock(b);
1019 	return b;
1020 }
1021 
1022 static int
1023 sizeToDepth(uvlong s, int psize, int dsize)
1024 {
1025 	int np;
1026 	int d;
1027 
1028 	/* determine pointer depth */
1029 	np = psize/VtScoreSize;
1030 	s = (s + dsize - 1)/dsize;
1031 	for(d = 0; s > 1; d++)
1032 		s = (s + np - 1)/np;
1033 	return d;
1034 }
1035 
1036 static u32int
1037 tagGen(void)
1038 {
1039 	u32int tag;
1040 
1041 	for(;;){
1042 		tag = lrand();
1043 		if(tag >= UserTag)
1044 			break;
1045 	}
1046 	return tag;
1047 }
1048