1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
4 #include "error.h"
5 #include "9.h"
6
7 static int sizeToDepth(uvlong s, int psize, int dsize);
8 static u32int tagGen(void);
9 static Block *sourceLoad(Source *r, Entry *e);
10 static int sourceShrinkDepth(Source*, Block*, Entry*, int);
11 static int sourceShrinkSize(Source*, Entry*, uvlong);
12 static int sourceGrowDepth(Source*, Block*, Entry*, int);
13
14 #define sourceIsLocked(r) ((r)->b != nil)
15
16 static Source *
sourceAlloc(Fs * fs,Block * b,Source * p,u32int offset,int mode,int issnapshot)17 sourceAlloc(Fs *fs, Block *b, Source *p, u32int offset, int mode, int issnapshot)
18 {
19 int epb;
20 u32int epoch;
21 char *pname = nil;
22 Source *r;
23 Entry e;
24
25 assert(p==nil || sourceIsLocked(p));
26
27 if(p == nil){
28 assert(offset == 0);
29 epb = 1;
30 }else
31 epb = p->dsize / VtEntrySize;
32
33 if(b->l.type != BtDir)
34 goto Bad;
35
36 /*
37 * a non-active entry is the only thing that
38 * can legitimately happen here. all the others
39 * get prints.
40 */
41 if(!entryUnpack(&e, b->data, offset % epb)){
42 pname = sourceName(p);
43 consPrint("%s: %s %V: sourceAlloc: entryUnpack failed\n",
44 fs->name, pname, b->score);
45 goto Bad;
46 }
47 if(!(e.flags & VtEntryActive)){
48 pname = sourceName(p);
49 if(0) consPrint("%s: %s %V: sourceAlloc: not active\n",
50 fs->name, pname, e.score);
51 goto Bad;
52 }
53 if(e.psize < 256 || e.dsize < 256){
54 pname = sourceName(p);
55 consPrint("%s: %s %V: sourceAlloc: psize %ud or dsize %ud < 256\n",
56 fs->name, pname, e.score, e.psize, e.dsize);
57 goto Bad;
58 }
59
60 if(e.depth < sizeToDepth(e.size, e.psize, e.dsize)){
61 pname = sourceName(p);
62 consPrint("%s: %s %V: sourceAlloc: depth %ud size %llud "
63 "psize %ud dsize %ud\n", fs->name, pname,
64 e.score, e.depth, e.size, e.psize, e.dsize);
65 goto Bad;
66 }
67
68 if((e.flags & VtEntryLocal) && e.tag == 0){
69 pname = sourceName(p);
70 consPrint("%s: %s %V: sourceAlloc: flags %#ux tag %#ux\n",
71 fs->name, pname, e.score, e.flags, e.tag);
72 goto Bad;
73 }
74
75 if(e.dsize > fs->blockSize || e.psize > fs->blockSize){
76 pname = sourceName(p);
77 consPrint("%s: %s %V: sourceAlloc: psize %ud or dsize %ud "
78 "> blocksize %ud\n", fs->name, pname, e.score,
79 e.psize, e.dsize, fs->blockSize);
80 goto Bad;
81 }
82
83 epoch = b->l.epoch;
84 if(mode == OReadWrite){
85 if(e.snap != 0){
86 vtSetError(ESnapRO);
87 return nil;
88 }
89 }else if(e.snap != 0){
90 if(e.snap < fs->elo){
91 vtSetError(ESnapOld);
92 return nil;
93 }
94 if(e.snap >= fs->ehi)
95 goto Bad;
96 epoch = e.snap;
97 }
98
99 r = vtMemAllocZ(sizeof(Source));
100 r->fs = fs;
101 r->mode = mode;
102 r->issnapshot = issnapshot;
103 r->dsize = e.dsize;
104 r->gen = e.gen;
105 r->dir = (e.flags & VtEntryDir) != 0;
106 r->lk = vtLockAlloc();
107 r->ref = 1;
108 r->parent = p;
109 if(p){
110 vtLock(p->lk);
111 assert(mode == OReadOnly || p->mode == OReadWrite);
112 p->ref++;
113 vtUnlock(p->lk);
114 }
115 r->epoch = epoch;
116 // consPrint("sourceAlloc: have %V be.%d fse.%d %s\n", b->score,
117 // b->l.epoch, r->fs->ehi, mode == OReadWrite? "rw": "ro");
118 memmove(r->score, b->score, VtScoreSize);
119 r->scoreEpoch = b->l.epoch;
120 r->offset = offset;
121 r->epb = epb;
122 r->tag = b->l.tag;
123
124 // consPrint("%s: sourceAlloc: %p -> %V %d\n", r, r->score, r->offset);
125
126 return r;
127 Bad:
128 free(pname);
129 vtSetError(EBadEntry);
130 return nil;
131 }
132
133 Source *
sourceRoot(Fs * fs,u32int addr,int mode)134 sourceRoot(Fs *fs, u32int addr, int mode)
135 {
136 Source *r;
137 Block *b;
138
139 b = cacheLocalData(fs->cache, addr, BtDir, RootTag, mode, 0);
140 if(b == nil)
141 return nil;
142
143 if(mode == OReadWrite && b->l.epoch != fs->ehi){
144 consPrint("sourceRoot: fs->ehi = %ud, b->l = %L\n",
145 fs->ehi, &b->l);
146 blockPut(b);
147 vtSetError(EBadRoot);
148 return nil;
149 }
150
151 r = sourceAlloc(fs, b, nil, 0, mode, 0);
152 blockPut(b);
153 return r;
154 }
155
156 Source *
sourceOpen(Source * r,ulong offset,int mode,int issnapshot)157 sourceOpen(Source *r, ulong offset, int mode, int issnapshot)
158 {
159 ulong bn;
160 Block *b;
161
162 assert(sourceIsLocked(r));
163 if(r->mode == OReadWrite)
164 assert(r->epoch == r->b->l.epoch);
165 if(!r->dir){
166 vtSetError(ENotDir);
167 return nil;
168 }
169
170 bn = offset/(r->dsize/VtEntrySize);
171
172 b = sourceBlock(r, bn, mode);
173 if(b == nil)
174 return nil;
175 r = sourceAlloc(r->fs, b, r, offset, mode, issnapshot);
176 blockPut(b);
177 return r;
178 }
179
180 Source *
sourceCreate(Source * r,int dsize,int dir,u32int offset)181 sourceCreate(Source *r, int dsize, int dir, u32int offset)
182 {
183 int i, epb, psize;
184 u32int bn, size;
185 Block *b;
186 Entry e;
187 Source *rr;
188
189 assert(sourceIsLocked(r));
190
191 if(!r->dir){
192 vtSetError(ENotDir);
193 return nil;
194 }
195
196 epb = r->dsize/VtEntrySize;
197 psize = (dsize/VtScoreSize)*VtScoreSize;
198
199 size = sourceGetDirSize(r);
200 if(offset == 0){
201 /*
202 * look at a random block to see if we can find an empty entry
203 */
204 offset = lnrand(size+1);
205 offset -= offset % epb;
206 }
207
208 /* try the given block and then try the last block */
209 for(;;){
210 bn = offset/epb;
211 b = sourceBlock(r, bn, OReadWrite);
212 if(b == nil)
213 return nil;
214 for(i=offset%r->epb; i<epb; i++){
215 entryUnpack(&e, b->data, i);
216 if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
217 goto Found;
218 }
219 blockPut(b);
220 if(offset == size){
221 fprint(2, "sourceCreate: cannot happen\n");
222 vtSetError("sourceCreate: cannot happen");
223 return nil;
224 }
225 offset = size;
226 }
227
228 Found:
229 /* found an entry - gen already set */
230 e.psize = psize;
231 e.dsize = dsize;
232 assert(psize && dsize);
233 e.flags = VtEntryActive;
234 if(dir)
235 e.flags |= VtEntryDir;
236 e.depth = 0;
237 e.size = 0;
238 memmove(e.score, vtZeroScore, VtScoreSize);
239 e.tag = 0;
240 e.snap = 0;
241 e.archive = 0;
242 entryPack(&e, b->data, i);
243 blockDirty(b);
244
245 offset = bn*epb + i;
246 if(offset+1 > size){
247 if(!sourceSetDirSize(r, offset+1)){
248 blockPut(b);
249 return nil;
250 }
251 }
252
253 rr = sourceAlloc(r->fs, b, r, offset, OReadWrite, 0);
254 blockPut(b);
255 return rr;
256 }
257
258 static int
sourceKill(Source * r,int doremove)259 sourceKill(Source *r, int doremove)
260 {
261 Entry e;
262 Block *b;
263 u32int addr;
264 u32int tag;
265 int type;
266
267 assert(sourceIsLocked(r));
268 b = sourceLoad(r, &e);
269 if(b == nil)
270 return 0;
271
272 assert(b->l.epoch == r->fs->ehi);
273
274 if(doremove==0 && e.size == 0){
275 /* already truncated */
276 blockPut(b);
277 return 1;
278 }
279
280 /* remember info on link we are removing */
281 addr = globalToLocal(e.score);
282 type = entryType(&e);
283 tag = e.tag;
284
285 if(doremove){
286 if(e.gen != ~0)
287 e.gen++;
288 e.dsize = 0;
289 e.psize = 0;
290 e.flags = 0;
291 }else{
292 e.flags &= ~VtEntryLocal;
293 }
294 e.depth = 0;
295 e.size = 0;
296 e.tag = 0;
297 memmove(e.score, vtZeroScore, VtScoreSize);
298 entryPack(&e, b->data, r->offset % r->epb);
299 blockDirty(b);
300 if(addr != NilBlock)
301 blockRemoveLink(b, addr, type, tag, 1);
302 blockPut(b);
303
304 if(doremove){
305 sourceUnlock(r);
306 sourceClose(r);
307 }
308
309 return 1;
310 }
311
312 int
sourceRemove(Source * r)313 sourceRemove(Source *r)
314 {
315 return sourceKill(r, 1);
316 }
317
318 int
sourceTruncate(Source * r)319 sourceTruncate(Source *r)
320 {
321 return sourceKill(r, 0);
322 }
323
324 uvlong
sourceGetSize(Source * r)325 sourceGetSize(Source *r)
326 {
327 Entry e;
328 Block *b;
329
330 assert(sourceIsLocked(r));
331 b = sourceLoad(r, &e);
332 if(b == nil)
333 return 0;
334 blockPut(b);
335
336 return e.size;
337 }
338
339 static int
sourceShrinkSize(Source * r,Entry * e,uvlong size)340 sourceShrinkSize(Source *r, Entry *e, uvlong size)
341 {
342 int i, type, ppb;
343 uvlong ptrsz;
344 u32int addr;
345 uchar score[VtScoreSize];
346 Block *b;
347
348 type = entryType(e);
349 b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
350 if(b == nil)
351 return 0;
352
353 ptrsz = e->dsize;
354 ppb = e->psize/VtScoreSize;
355 for(i=0; i+1<e->depth; i++)
356 ptrsz *= ppb;
357
358 while(type&BtLevelMask){
359 if(b->addr == NilBlock || b->l.epoch != r->fs->ehi){
360 /* not worth copying the block just so we can zero some of it */
361 blockPut(b);
362 return 0;
363 }
364
365 /*
366 * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
367 */
368
369 /* zero the pointers to unnecessary blocks */
370 i = (size+ptrsz-1)/ptrsz;
371 for(; i<ppb; i++){
372 addr = globalToLocal(b->data+i*VtScoreSize);
373 memmove(b->data+i*VtScoreSize, vtZeroScore, VtScoreSize);
374 blockDirty(b);
375 if(addr != NilBlock)
376 blockRemoveLink(b, addr, type-1, e->tag, 1);
377 }
378
379 /* recurse (go around again) on the partially necessary block */
380 i = size/ptrsz;
381 size = size%ptrsz;
382 if(size == 0){
383 blockPut(b);
384 return 1;
385 }
386 ptrsz /= ppb;
387 type--;
388 memmove(score, b->data+i*VtScoreSize, VtScoreSize);
389 blockPut(b);
390 b = cacheGlobal(r->fs->cache, score, type, e->tag, OReadWrite);
391 if(b == nil)
392 return 0;
393 }
394
395 if(b->addr == NilBlock || b->l.epoch != r->fs->ehi){
396 blockPut(b);
397 return 0;
398 }
399
400 /*
401 * No one ever truncates BtDir blocks.
402 */
403 if(type == BtData && e->dsize > size){
404 memset(b->data+size, 0, e->dsize-size);
405 blockDirty(b);
406 }
407 blockPut(b);
408 return 1;
409 }
410
411 int
sourceSetSize(Source * r,uvlong size)412 sourceSetSize(Source *r, uvlong size)
413 {
414 int depth;
415 Entry e;
416 Block *b;
417
418 assert(sourceIsLocked(r));
419 if(size == 0)
420 return sourceTruncate(r);
421
422 if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
423 vtSetError(ETooBig);
424 return 0;
425 }
426
427 b = sourceLoad(r, &e);
428 if(b == nil)
429 return 0;
430
431 /* quick out */
432 if(e.size == size){
433 blockPut(b);
434 return 1;
435 }
436
437 depth = sizeToDepth(size, e.psize, e.dsize);
438
439 if(depth < e.depth){
440 if(!sourceShrinkDepth(r, b, &e, depth)){
441 blockPut(b);
442 return 0;
443 }
444 }else if(depth > e.depth){
445 if(!sourceGrowDepth(r, b, &e, depth)){
446 blockPut(b);
447 return 0;
448 }
449 }
450
451 if(size < e.size)
452 sourceShrinkSize(r, &e, size);
453
454 e.size = size;
455 entryPack(&e, b->data, r->offset % r->epb);
456 blockDirty(b);
457 blockPut(b);
458
459 return 1;
460 }
461
462 int
sourceSetDirSize(Source * r,ulong ds)463 sourceSetDirSize(Source *r, ulong ds)
464 {
465 uvlong size;
466 int epb;
467
468 assert(sourceIsLocked(r));
469 epb = r->dsize/VtEntrySize;
470
471 size = (uvlong)r->dsize*(ds/epb);
472 size += VtEntrySize*(ds%epb);
473 return sourceSetSize(r, size);
474 }
475
476 ulong
sourceGetDirSize(Source * r)477 sourceGetDirSize(Source *r)
478 {
479 ulong ds;
480 uvlong size;
481 int epb;
482
483 assert(sourceIsLocked(r));
484 epb = r->dsize/VtEntrySize;
485
486 size = sourceGetSize(r);
487 ds = epb*(size/r->dsize);
488 ds += (size%r->dsize)/VtEntrySize;
489 return ds;
490 }
491
492 int
sourceGetEntry(Source * r,Entry * e)493 sourceGetEntry(Source *r, Entry *e)
494 {
495 Block *b;
496
497 assert(sourceIsLocked(r));
498 b = sourceLoad(r, e);
499 if(b == nil)
500 return 0;
501 blockPut(b);
502
503 return 1;
504 }
505
506 /*
507 * Must be careful with this. Doesn't record
508 * dependencies, so don't introduce any!
509 */
510 int
sourceSetEntry(Source * r,Entry * e)511 sourceSetEntry(Source *r, Entry *e)
512 {
513 Block *b;
514 Entry oe;
515
516 assert(sourceIsLocked(r));
517 b = sourceLoad(r, &oe);
518 if(b == nil)
519 return 0;
520 entryPack(e, b->data, r->offset%r->epb);
521 blockDirty(b);
522 blockPut(b);
523
524 return 1;
525 }
526
527 static Block *
blockWalk(Block * p,int index,int mode,Fs * fs,Entry * e)528 blockWalk(Block *p, int index, int mode, Fs *fs, Entry *e)
529 {
530 Block *b;
531 Cache *c;
532 u32int addr;
533 int type;
534 uchar oscore[VtScoreSize], score[VtScoreSize];
535 Entry oe;
536
537 c = fs->cache;
538
539 if((p->l.type & BtLevelMask) == 0){
540 assert(p->l.type == BtDir);
541 type = entryType(e);
542 b = cacheGlobal(c, e->score, type, e->tag, mode);
543 }else{
544 type = p->l.type - 1;
545 b = cacheGlobal(c, p->data + index*VtScoreSize, type, e->tag, mode);
546 }
547
548 if(b)
549 b->pc = getcallerpc(&p);
550
551 if(b == nil || mode == OReadOnly)
552 return b;
553
554 if(p->l.epoch != fs->ehi){
555 fprint(2, "blockWalk: parent not writable\n");
556 abort();
557 }
558 if(b->l.epoch == fs->ehi)
559 return b;
560
561 oe = *e;
562
563 /*
564 * Copy on write.
565 */
566 if(e->tag == 0){
567 assert(p->l.type == BtDir);
568 e->tag = tagGen();
569 e->flags |= VtEntryLocal;
570 }
571
572 addr = b->addr;
573 b = blockCopy(b, e->tag, fs->ehi, fs->elo);
574 if(b == nil)
575 return nil;
576
577 b->pc = getcallerpc(&p);
578 assert(b->l.epoch == fs->ehi);
579
580 blockDirty(b);
581 memmove(score, b->score, VtScoreSize);
582 if(p->l.type == BtDir){
583 memmove(e->score, b->score, VtScoreSize);
584 entryPack(e, p->data, index);
585 blockDependency(p, b, index, nil, &oe);
586 }else{
587 memmove(oscore, p->data+index*VtScoreSize, VtScoreSize);
588 memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
589 blockDependency(p, b, index, oscore, nil);
590 }
591 blockDirty(p);
592
593 if(addr != NilBlock)
594 blockRemoveLink(p, addr, type, e->tag, 0);
595
596 return b;
597 }
598
599 /*
600 * Change the depth of the source r.
601 * The entry e for r is contained in block p.
602 */
603 static int
sourceGrowDepth(Source * r,Block * p,Entry * e,int depth)604 sourceGrowDepth(Source *r, Block *p, Entry *e, int depth)
605 {
606 Block *b, *bb;
607 u32int tag;
608 int type;
609 Entry oe;
610
611 assert(sourceIsLocked(r));
612 assert(depth <= VtPointerDepth);
613
614 type = entryType(e);
615 b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
616 if(b == nil)
617 return 0;
618
619 tag = e->tag;
620 if(tag == 0)
621 tag = tagGen();
622
623 oe = *e;
624
625 /*
626 * Keep adding layers until we get to the right depth
627 * or an error occurs.
628 */
629 while(e->depth < depth){
630 bb = cacheAllocBlock(r->fs->cache, type+1, tag, r->fs->ehi, r->fs->elo);
631 if(bb == nil)
632 break;
633 //fprint(2, "alloc %lux grow %V\n", bb->addr, b->score);
634 memmove(bb->data, b->score, VtScoreSize);
635 memmove(e->score, bb->score, VtScoreSize);
636 e->depth++;
637 type++;
638 e->tag = tag;
639 e->flags |= VtEntryLocal;
640 blockDependency(bb, b, 0, vtZeroScore, nil);
641 blockPut(b);
642 b = bb;
643 blockDirty(b);
644 }
645
646 entryPack(e, p->data, r->offset % r->epb);
647 blockDependency(p, b, r->offset % r->epb, nil, &oe);
648 blockPut(b);
649 blockDirty(p);
650
651 return e->depth == depth;
652 }
653
654 static int
sourceShrinkDepth(Source * r,Block * p,Entry * e,int depth)655 sourceShrinkDepth(Source *r, Block *p, Entry *e, int depth)
656 {
657 Block *b, *nb, *ob, *rb;
658 u32int tag;
659 int type, d;
660 Entry oe;
661
662 assert(sourceIsLocked(r));
663 assert(depth <= VtPointerDepth);
664
665 type = entryType(e);
666 rb = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
667 if(rb == nil)
668 return 0;
669
670 tag = e->tag;
671 if(tag == 0)
672 tag = tagGen();
673
674 /*
675 * Walk down to the new root block.
676 * We may stop early, but something is better than nothing.
677 */
678 oe = *e;
679
680 ob = nil;
681 b = rb;
682 /* BUG: explain type++. i think it is a real bug */
683 for(d=e->depth; d > depth; d--, type++){
684 nb = cacheGlobal(r->fs->cache, b->data, type-1, tag, OReadWrite);
685 if(nb == nil)
686 break;
687 if(ob!=nil && ob!=rb)
688 blockPut(ob);
689 ob = b;
690 b = nb;
691 }
692
693 if(b == rb){
694 blockPut(rb);
695 return 0;
696 }
697
698 /*
699 * Right now, e points at the root block rb, b is the new root block,
700 * and ob points at b. To update:
701 *
702 * (i) change e to point at b
703 * (ii) zero the pointer ob -> b
704 * (iii) free the root block
705 *
706 * p (the block containing e) must be written before
707 * anything else.
708 */
709
710 /* (i) */
711 e->depth = d;
712 /* might have been local and now global; reverse cannot happen */
713 if(globalToLocal(b->score) == NilBlock)
714 e->flags &= ~VtEntryLocal;
715 memmove(e->score, b->score, VtScoreSize);
716 entryPack(e, p->data, r->offset % r->epb);
717 blockDependency(p, b, r->offset % r->epb, nil, &oe);
718 blockDirty(p);
719
720 /* (ii) */
721 memmove(ob->data, vtZeroScore, VtScoreSize);
722 blockDependency(ob, p, 0, b->score, nil);
723 blockDirty(ob);
724
725 /* (iii) */
726 if(rb->addr != NilBlock)
727 blockRemoveLink(p, rb->addr, rb->l.type, rb->l.tag, 1);
728
729 blockPut(rb);
730 if(ob!=nil && ob!=rb)
731 blockPut(ob);
732 blockPut(b);
733
734 return d == depth;
735 }
736
737 /*
738 * Normally we return the block at the given number.
739 * If early is set, we stop earlier in the tree. Setting early
740 * to 1 gives us the block that contains the pointer to bn.
741 */
742 Block *
_sourceBlock(Source * r,ulong bn,int mode,int early,ulong tag)743 _sourceBlock(Source *r, ulong bn, int mode, int early, ulong tag)
744 {
745 Block *b, *bb;
746 int index[VtPointerDepth+1];
747 Entry e;
748 int i, np;
749 int m;
750
751 assert(sourceIsLocked(r));
752 assert(bn != NilBlock);
753
754 /* mode for intermediate block */
755 m = mode;
756 if(m == OOverWrite)
757 m = OReadWrite;
758
759 b = sourceLoad(r, &e);
760 if(b == nil)
761 return nil;
762 if(r->issnapshot && (e.flags & VtEntryNoArchive)){
763 blockPut(b);
764 vtSetError(ENotArchived);
765 return nil;
766 }
767
768 if(tag){
769 if(e.tag == 0)
770 e.tag = tag;
771 else if(e.tag != tag){
772 fprint(2, "tag mismatch\n");
773 vtSetError("tag mismatch");
774 goto Err;
775 }
776 }
777
778 np = e.psize/VtScoreSize;
779 memset(index, 0, sizeof(index));
780 for(i=0; bn > 0; i++){
781 if(i >= VtPointerDepth){
782 vtSetError(EBadAddr);
783 goto Err;
784 }
785 index[i] = bn % np;
786 bn /= np;
787 }
788
789 if(i > e.depth){
790 if(mode == OReadOnly){
791 vtSetError(EBadAddr);
792 goto Err;
793 }
794 if(!sourceGrowDepth(r, b, &e, i))
795 goto Err;
796 }
797
798 index[e.depth] = r->offset % r->epb;
799
800 for(i=e.depth; i>=early; i--){
801 bb = blockWalk(b, index[i], m, r->fs, &e);
802 if(bb == nil)
803 goto Err;
804 blockPut(b);
805 b = bb;
806 }
807 b->pc = getcallerpc(&r);
808 return b;
809 Err:
810 blockPut(b);
811 return nil;
812 }
813
814 Block*
sourceBlock(Source * r,ulong bn,int mode)815 sourceBlock(Source *r, ulong bn, int mode)
816 {
817 Block *b;
818
819 b = _sourceBlock(r, bn, mode, 0, 0);
820 if(b)
821 b->pc = getcallerpc(&r);
822 return b;
823 }
824
825 void
sourceClose(Source * r)826 sourceClose(Source *r)
827 {
828 if(r == nil)
829 return;
830 vtLock(r->lk);
831 r->ref--;
832 if(r->ref){
833 vtUnlock(r->lk);
834 return;
835 }
836 assert(r->ref == 0);
837 vtUnlock(r->lk);
838 if(r->parent)
839 sourceClose(r->parent);
840 vtLockFree(r->lk);
841 memset(r, ~0, sizeof(*r));
842 vtMemFree(r);
843 }
844
845 /*
846 * Retrieve the block containing the entry for r.
847 * If a snapshot has happened, we might need
848 * to get a new copy of the block. We avoid this
849 * in the common case by caching the score for
850 * the block and the last epoch in which it was valid.
851 *
852 * We use r->mode to tell the difference between active
853 * file system sources (OReadWrite) and sources for the
854 * snapshot file system (OReadOnly).
855 */
856 static Block*
sourceLoadBlock(Source * r,int mode)857 sourceLoadBlock(Source *r, int mode)
858 {
859 u32int addr;
860 Block *b;
861
862 switch(r->mode){
863 default:
864 assert(0);
865 case OReadWrite:
866 assert(r->mode == OReadWrite);
867 /*
868 * This needn't be true -- we might bump the low epoch
869 * to reclaim some old blocks, but since this score is
870 * OReadWrite, the blocks must all still be open, so none
871 * are reclaimed. Thus it's okay that the epoch is so low.
872 * Proceed.
873 assert(r->epoch >= r->fs->elo);
874 */
875 if(r->epoch == r->fs->ehi){
876 b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite);
877 if(b == nil)
878 return nil;
879 assert(r->epoch == b->l.epoch);
880 return b;
881 }
882 assert(r->parent != nil);
883 if(!sourceLock(r->parent, OReadWrite))
884 return nil;
885 b = sourceBlock(r->parent, r->offset/r->epb, OReadWrite);
886 sourceUnlock(r->parent);
887 if(b == nil)
888 return nil;
889 assert(b->l.epoch == r->fs->ehi);
890 // fprint(2, "sourceLoadBlock %p %V => %V\n", r, r->score, b->score);
891 memmove(r->score, b->score, VtScoreSize);
892 r->scoreEpoch = b->l.epoch;
893 r->tag = b->l.tag;
894 r->epoch = r->fs->ehi;
895 return b;
896
897 case OReadOnly:
898 addr = globalToLocal(r->score);
899 if(addr == NilBlock)
900 return cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, mode);
901
902 b = cacheLocalData(r->fs->cache, addr, BtDir, r->tag, mode, r->scoreEpoch);
903 if(b)
904 return b;
905
906 /*
907 * If it failed because the epochs don't match, the block has been
908 * archived and reclaimed. Rewalk from the parent and get the
909 * new pointer. This can't happen in the OReadWrite case
910 * above because blocks in the current epoch don't get
911 * reclaimed. The fact that we're OReadOnly means we're
912 * a snapshot. (Or else the file system is read-only, but then
913 * the archiver isn't going around deleting blocks.)
914 */
915 if(strcmp(vtGetError(), ELabelMismatch) == 0){
916 if(!sourceLock(r->parent, OReadOnly))
917 return nil;
918 b = sourceBlock(r->parent, r->offset/r->epb, OReadOnly);
919 sourceUnlock(r->parent);
920 if(b){
921 fprint(2, "sourceAlloc: lost %V found %V\n",
922 r->score, b->score);
923 memmove(r->score, b->score, VtScoreSize);
924 r->scoreEpoch = b->l.epoch;
925 return b;
926 }
927 }
928 return nil;
929 }
930 }
931
932 int
sourceLock(Source * r,int mode)933 sourceLock(Source *r, int mode)
934 {
935 Block *b;
936
937 if(mode == -1)
938 mode = r->mode;
939
940 b = sourceLoadBlock(r, mode);
941 if(b == nil)
942 return 0;
943 /*
944 * The fact that we are holding b serves as the
945 * lock entitling us to write to r->b.
946 */
947 assert(r->b == nil);
948 r->b = b;
949 if(r->mode == OReadWrite)
950 assert(r->epoch == r->b->l.epoch);
951 return 1;
952 }
953
954 /*
955 * Lock two (usually sibling) sources. This needs special care
956 * because the Entries for both sources might be in the same block.
957 * We also try to lock blocks in left-to-right order within the tree.
958 */
959 int
sourceLock2(Source * r,Source * rr,int mode)960 sourceLock2(Source *r, Source *rr, int mode)
961 {
962 Block *b, *bb;
963
964 if(rr == nil)
965 return sourceLock(r, mode);
966
967 if(mode == -1)
968 mode = r->mode;
969
970 if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
971 b = sourceLoadBlock(r, mode);
972 if(b == nil)
973 return 0;
974 if(memcmp(r->score, rr->score, VtScoreSize) != 0){
975 memmove(rr->score, b->score, VtScoreSize);
976 rr->scoreEpoch = b->l.epoch;
977 rr->tag = b->l.tag;
978 rr->epoch = rr->fs->ehi;
979 }
980 blockDupLock(b);
981 bb = b;
982 }else if(r->parent==rr->parent || r->offset > rr->offset){
983 bb = sourceLoadBlock(rr, mode);
984 b = sourceLoadBlock(r, mode);
985 }else{
986 b = sourceLoadBlock(r, mode);
987 bb = sourceLoadBlock(rr, mode);
988 }
989 if(b == nil || bb == nil){
990 if(b)
991 blockPut(b);
992 if(bb)
993 blockPut(bb);
994 return 0;
995 }
996
997 /*
998 * The fact that we are holding b and bb serves
999 * as the lock entitling us to write to r->b and rr->b.
1000 */
1001 r->b = b;
1002 rr->b = bb;
1003 return 1;
1004 }
1005
1006 void
sourceUnlock(Source * r)1007 sourceUnlock(Source *r)
1008 {
1009 Block *b;
1010
1011 if(r->b == nil){
1012 fprint(2, "sourceUnlock: already unlocked\n");
1013 abort();
1014 }
1015 b = r->b;
1016 r->b = nil;
1017 blockPut(b);
1018 }
1019
1020 static Block*
sourceLoad(Source * r,Entry * e)1021 sourceLoad(Source *r, Entry *e)
1022 {
1023 Block *b;
1024
1025 assert(sourceIsLocked(r));
1026 b = r->b;
1027 if(!entryUnpack(e, b->data, r->offset % r->epb))
1028 return nil;
1029 if(e->gen != r->gen){
1030 vtSetError(ERemoved);
1031 return nil;
1032 }
1033 blockDupLock(b);
1034 return b;
1035 }
1036
1037 static int
sizeToDepth(uvlong s,int psize,int dsize)1038 sizeToDepth(uvlong s, int psize, int dsize)
1039 {
1040 int np;
1041 int d;
1042
1043 /* determine pointer depth */
1044 np = psize/VtScoreSize;
1045 s = (s + dsize - 1)/dsize;
1046 for(d = 0; s > 1; d++)
1047 s = (s + np - 1)/np;
1048 return d;
1049 }
1050
1051 static u32int
tagGen(void)1052 tagGen(void)
1053 {
1054 u32int tag;
1055
1056 for(;;){
1057 tag = lrand();
1058 if(tag >= UserTag)
1059 break;
1060 }
1061 return tag;
1062 }
1063
1064 char *
sourceName(Source * s)1065 sourceName(Source *s)
1066 {
1067 return fileName(s->file);
1068 }
1069