xref: /plan9/sys/src/libventi/file.c (revision 9ba92a1a1d8a653f6050241dbdbd039f2ecb1dc2)
1 /*
2  * Manage tree of VtFiles stored in the block cache.
3  *
4  * The single point of truth for the info about the VtFiles themselves
5  * is the block data.  Because of this, there is no explicit locking of
6  * VtFile structures, and indeed there may be more than one VtFile
7  * structure for a given Venti file.  They synchronize through the
8  * block cache.
9  *
10  * This is a bit simpler than fossil because there are no epochs
11  * or tags or anything else.  Just mutable local blocks and immutable
12  * Venti blocks.
13  */
14 
15 #include <u.h>
16 #include <libc.h>
17 #include <venti.h>
18 
19 #define MaxBlock (1UL<<31)
20 
21 static char ENotDir[] = "walk in non-directory";
22 static char ETooBig[] = "file too big";
23 /* static char EBadAddr[] = "bad address"; */
24 static char ELabelMismatch[] = "label mismatch";
25 
26 static int	sizetodepth(uvlong s, int psize, int dsize);
27 static VtBlock 	*fileload(VtFile *r, VtEntry *e);
28 static int	shrinkdepth(VtFile*, VtBlock*, VtEntry*, int);
29 static int	shrinksize(VtFile*, VtEntry*, uvlong);
30 static int	growdepth(VtFile*, VtBlock*, VtEntry*, int);
31 
32 #define ISLOCKED(r)	((r)->b != nil)
33 #define DEPTH(t)	((t)&VtTypeDepthMask)
34 
35 static VtFile *
vtfilealloc(VtCache * c,VtBlock * b,VtFile * p,u32int offset,int mode)36 vtfilealloc(VtCache *c, VtBlock *b, VtFile *p, u32int offset, int mode)
37 {
38 	int epb;
39 	u32int size;
40 	VtEntry e;
41 	VtFile *r;
42 
43 	assert(p==nil || ISLOCKED(p));
44 
45 	if(p == nil){
46 		assert(offset == 0);
47 		epb = 1;
48 	}else
49 		epb = p->dsize / VtEntrySize;
50 
51 	if(b->type != VtDirType){
52 		werrstr("bad block type %#uo", b->type);
53 		return nil;
54 	}
55 
56 	/*
57 	 * a non-active entry is the only thing that
58 	 * can legitimately happen here. all the others
59 	 * get prints.
60 	 */
61 	if(vtentryunpack(&e, b->data, offset % epb) < 0){
62 		fprint(2, "vtentryunpack failed: %r (%.*H)\n", VtEntrySize, b->data+VtEntrySize*(offset%epb));
63 		return nil;
64 	}
65 	if(!(e.flags & VtEntryActive)){
66 		werrstr("entry not active");
67 		return nil;
68 	}
69 
70 	if(DEPTH(e.type) < sizetodepth(e.size, e.psize, e.dsize)){
71 		fprint(2, "depth %ud size %llud psize %ud dsize %ud\n",
72 			DEPTH(e.type), e.size, e.psize, e.dsize);
73 		werrstr("bad depth");
74 		return nil;
75 	}
76 
77 	size = vtcacheblocksize(c);
78 	if(e.dsize > size || e.psize > size){
79 		werrstr("block sizes %ud, %ud bigger than cache block size %ud",
80 			e.psize, e.dsize, size);
81 		return nil;
82 	}
83 
84 	r = vtmallocz(sizeof(VtFile));
85 	r->c = c;
86 	r->mode = mode;
87 	r->dsize = e.dsize;
88 	r->psize = e.psize;
89 	r->gen = e.gen;
90 	r->dir = (e.type & VtTypeBaseMask) == VtDirType;
91 	r->ref = 1;
92 	r->parent = p;
93 	if(p){
94 		qlock(&p->lk);
95 		assert(mode == VtOREAD || p->mode == VtORDWR);
96 		p->ref++;
97 		qunlock(&p->lk);
98 	}else{
99 		assert(b->addr != NilBlock);
100 		r->local = 1;
101 	}
102 	memmove(r->score, b->score, VtScoreSize);
103 	r->offset = offset;
104 	r->epb = epb;
105 
106 	return r;
107 }
108 
109 VtFile *
vtfileroot(VtCache * c,u32int addr,int mode)110 vtfileroot(VtCache *c, u32int addr, int mode)
111 {
112 	VtFile *r;
113 	VtBlock *b;
114 
115 	b = vtcachelocal(c, addr, VtDirType);
116 	if(b == nil)
117 		return nil;
118 	r = vtfilealloc(c, b, nil, 0, mode);
119 	vtblockput(b);
120 	return r;
121 }
122 
123 VtFile*
vtfileopenroot(VtCache * c,VtEntry * e)124 vtfileopenroot(VtCache *c, VtEntry *e)
125 {
126 	VtBlock *b;
127 	VtFile *f;
128 
129 	b = vtcacheallocblock(c, VtDirType);
130 	if(b == nil)
131 		return nil;
132 
133 	vtentrypack(e, b->data, 0);
134 	f = vtfilealloc(c, b, nil, 0, VtORDWR);
135 	vtblockput(b);
136 	return f;
137 }
138 
139 VtFile *
vtfilecreateroot(VtCache * c,int psize,int dsize,int type)140 vtfilecreateroot(VtCache *c, int psize, int dsize, int type)
141 {
142 	VtEntry e;
143 
144 	memset(&e, 0, sizeof e);
145 	e.flags = VtEntryActive;
146 	e.psize = psize;
147 	e.dsize = dsize;
148 	e.type = type;
149 	memmove(e.score, vtzeroscore, VtScoreSize);
150 
151 	return vtfileopenroot(c, &e);
152 }
153 
154 VtFile *
vtfileopen(VtFile * r,u32int offset,int mode)155 vtfileopen(VtFile *r, u32int offset, int mode)
156 {
157 	ulong bn;
158 	VtBlock *b;
159 
160 	assert(ISLOCKED(r));
161 	if(!r->dir){
162 		werrstr(ENotDir);
163 		return nil;
164 	}
165 
166 	bn = offset/(r->dsize/VtEntrySize);
167 
168 	b = vtfileblock(r, bn, mode);
169 	if(b == nil)
170 		return nil;
171 	r = vtfilealloc(r->c, b, r, offset, mode);
172 	vtblockput(b);
173 	return r;
174 }
175 
176 VtFile*
vtfilecreate(VtFile * r,int psize,int dsize,int type)177 vtfilecreate(VtFile *r, int psize, int dsize, int type)
178 {
179 	return _vtfilecreate(r, -1, psize, dsize, type);
180 }
181 
182 VtFile*
_vtfilecreate(VtFile * r,int o,int psize,int dsize,int type)183 _vtfilecreate(VtFile *r, int o, int psize, int dsize, int type)
184 {
185 	int i;
186 	VtBlock *b;
187 	u32int bn, size;
188 	VtEntry e;
189 	int epb;
190 	VtFile *rr;
191 	u32int offset;
192 
193 	assert(ISLOCKED(r));
194 	assert(psize <= VtMaxLumpSize);
195 	assert(dsize <= VtMaxLumpSize);
196 	assert(type == VtDirType || type == VtDataType);
197 
198 	if(!r->dir){
199 		werrstr(ENotDir);
200 		return nil;
201 	}
202 
203 	epb = r->dsize/VtEntrySize;
204 
205 	size = vtfilegetdirsize(r);
206 	/*
207 	 * look at a random block to see if we can find an empty entry
208 	 */
209 	if(o == -1){
210 		offset = lnrand(size+1);
211 		offset -= offset % epb;
212 	}else
213 		offset = o;
214 
215 	/* try the given block and then try the last block */
216 	for(;;){
217 		bn = offset/epb;
218 		b = vtfileblock(r, bn, VtORDWR);
219 		if(b == nil)
220 			return nil;
221 		for(i=offset%r->epb; i<epb; i++){
222 			if(vtentryunpack(&e, b->data, i) < 0)
223 				continue;
224 			if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
225 				goto Found;
226 		}
227 		vtblockput(b);
228 		if(offset == size){
229 			fprint(2, "vtfilecreate: cannot happen\n");
230 			werrstr("vtfilecreate: cannot happen");
231 			return nil;
232 		}
233 		offset = size;
234 	}
235 
236 Found:
237 	/* found an entry - gen already set */
238 	e.psize = psize;
239 	e.dsize = dsize;
240 	e.flags = VtEntryActive;
241 	e.type = type;
242 	e.size = 0;
243 	memmove(e.score, vtzeroscore, VtScoreSize);
244 	vtentrypack(&e, b->data, i);
245 
246 	offset = bn*epb + i;
247 	if(offset+1 > size){
248 		if(vtfilesetdirsize(r, offset+1) < 0){
249 			vtblockput(b);
250 			return nil;
251 		}
252 	}
253 
254 	rr = vtfilealloc(r->c, b, r, offset, VtORDWR);
255 	vtblockput(b);
256 	return rr;
257 }
258 
259 static int
vtfilekill(VtFile * r,int doremove)260 vtfilekill(VtFile *r, int doremove)
261 {
262 	VtEntry e;
263 	VtBlock *b;
264 
265 	assert(ISLOCKED(r));
266 	b = fileload(r, &e);
267 	if(b == nil)
268 		return -1;
269 
270 	if(doremove==0 && e.size == 0){
271 		/* already truncated */
272 		vtblockput(b);
273 		return 0;
274 	}
275 
276 	if(doremove){
277 		if(e.gen != ~0)
278 			e.gen++;
279 		e.dsize = 0;
280 		e.psize = 0;
281 		e.flags = 0;
282 	}else
283 		e.flags &= ~VtEntryLocal;
284 	e.type = 0;
285 	e.size = 0;
286 	memmove(e.score, vtzeroscore, VtScoreSize);
287 	vtentrypack(&e, b->data, r->offset % r->epb);
288 	vtblockput(b);
289 
290 	if(doremove){
291 		vtfileunlock(r);
292 		vtfileclose(r);
293 	}
294 
295 	return 0;
296 }
297 
298 int
vtfileremove(VtFile * r)299 vtfileremove(VtFile *r)
300 {
301 	return vtfilekill(r, 1);
302 }
303 
304 int
vtfiletruncate(VtFile * r)305 vtfiletruncate(VtFile *r)
306 {
307 	return vtfilekill(r, 0);
308 }
309 
310 uvlong
vtfilegetsize(VtFile * r)311 vtfilegetsize(VtFile *r)
312 {
313 	VtEntry e;
314 	VtBlock *b;
315 
316 	assert(ISLOCKED(r));
317 	b = fileload(r, &e);
318 	if(b == nil)
319 		return ~(uvlong)0;
320 	vtblockput(b);
321 
322 	return e.size;
323 }
324 
325 static int
shrinksize(VtFile * r,VtEntry * e,uvlong size)326 shrinksize(VtFile *r, VtEntry *e, uvlong size)
327 {
328 	int i, depth, type, isdir, ppb;
329 	uvlong ptrsz;
330 	uchar score[VtScoreSize];
331 	VtBlock *b;
332 
333 	b = vtcacheglobal(r->c, e->score, e->type);
334 	if(b == nil)
335 		return -1;
336 
337 	ptrsz = e->dsize;
338 	ppb = e->psize/VtScoreSize;
339 	type = e->type;
340 	depth = DEPTH(type);
341 	for(i=0; i+1<depth; i++)
342 		ptrsz *= ppb;
343 
344 	isdir = r->dir;
345 	while(depth > 0){
346 		if(b->addr == NilBlock){
347 			/* not worth copying the block just so we can zero some of it */
348 			vtblockput(b);
349 			return -1;
350 		}
351 
352 		/*
353 		 * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
354 		 */
355 
356 		/* zero the pointers to unnecessary blocks */
357 		i = (size+ptrsz-1)/ptrsz;
358 		for(; i<ppb; i++)
359 			memmove(b->data+i*VtScoreSize, vtzeroscore, VtScoreSize);
360 
361 		/* recurse (go around again) on the partially necessary block */
362 		i = size/ptrsz;
363 		size = size%ptrsz;
364 		if(size == 0){
365 			vtblockput(b);
366 			return 0;
367 		}
368 		ptrsz /= ppb;
369 		type--;
370 		memmove(score, b->data+i*VtScoreSize, VtScoreSize);
371 		vtblockput(b);
372 		b = vtcacheglobal(r->c, score, type);
373 		if(b == nil)
374 			return -1;
375 	}
376 
377 	if(b->addr == NilBlock){
378 		vtblockput(b);
379 		return -1;
380 	}
381 
382 	/*
383 	 * No one ever truncates BtDir blocks.
384 	 */
385 	if(depth==0 && !isdir && e->dsize > size)
386 		memset(b->data+size, 0, e->dsize-size);
387 	vtblockput(b);
388 	return 0;
389 }
390 
391 int
vtfilesetsize(VtFile * r,u64int size)392 vtfilesetsize(VtFile *r, u64int size)
393 {
394 	int depth, edepth;
395 	VtEntry e;
396 	VtBlock *b;
397 
398 	assert(ISLOCKED(r));
399 	if(size == 0)
400 		return vtfiletruncate(r);
401 
402 	if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
403 		werrstr(ETooBig);
404 		return -1;
405 	}
406 
407 	b = fileload(r, &e);
408 	if(b == nil)
409 		return -1;
410 
411 	/* quick out */
412 	if(e.size == size){
413 		vtblockput(b);
414 		return 0;
415 	}
416 
417 	depth = sizetodepth(size, e.psize, e.dsize);
418 	edepth = DEPTH(e.type);
419 	if(depth < edepth){
420 		if(shrinkdepth(r, b, &e, depth) < 0){
421 			vtblockput(b);
422 			return -1;
423 		}
424 	}else if(depth > edepth){
425 		if(growdepth(r, b, &e, depth) < 0){
426 			vtblockput(b);
427 			return -1;
428 		}
429 	}
430 
431 	if(size < e.size)
432 		shrinksize(r, &e, size);
433 
434 	e.size = size;
435 	vtentrypack(&e, b->data, r->offset % r->epb);
436 	vtblockput(b);
437 
438 	return 0;
439 }
440 
441 int
vtfilesetdirsize(VtFile * r,u32int ds)442 vtfilesetdirsize(VtFile *r, u32int ds)
443 {
444 	uvlong size;
445 	int epb;
446 
447 	assert(ISLOCKED(r));
448 	epb = r->dsize/VtEntrySize;
449 
450 	size = (uvlong)r->dsize*(ds/epb);
451 	size += VtEntrySize*(ds%epb);
452 	return vtfilesetsize(r, size);
453 }
454 
455 u32int
vtfilegetdirsize(VtFile * r)456 vtfilegetdirsize(VtFile *r)
457 {
458 	ulong ds;
459 	uvlong size;
460 	int epb;
461 
462 	assert(ISLOCKED(r));
463 	epb = r->dsize/VtEntrySize;
464 
465 	size = vtfilegetsize(r);
466 	ds = epb*(size/r->dsize);
467 	ds += (size%r->dsize)/VtEntrySize;
468 	return ds;
469 }
470 
471 int
vtfilegetentry(VtFile * r,VtEntry * e)472 vtfilegetentry(VtFile *r, VtEntry *e)
473 {
474 	VtBlock *b;
475 
476 	assert(ISLOCKED(r));
477 	b = fileload(r, e);
478 	if(b == nil)
479 		return -1;
480 	vtblockput(b);
481 
482 	return 0;
483 }
484 
485 int
vtfilesetentry(VtFile * r,VtEntry * e)486 vtfilesetentry(VtFile *r, VtEntry *e)
487 {
488 	VtBlock *b;
489 	VtEntry ee;
490 
491 	assert(ISLOCKED(r));
492 	b = fileload(r, &ee);
493 	if(b == nil)
494 		return -1;
495 	vtentrypack(e, b->data, r->offset % r->epb);
496 	vtblockput(b);
497 	return 0;
498 }
499 
500 static VtBlock *
blockwalk(VtBlock * p,int index,VtCache * c,int mode,VtEntry * e)501 blockwalk(VtBlock *p, int index, VtCache *c, int mode, VtEntry *e)
502 {
503 	VtBlock *b;
504 	int type;
505 	uchar *score;
506 	VtEntry oe;
507 
508 	switch(p->type){
509 	case VtDataType:
510 		assert(0);
511 	case VtDirType:
512 		type = e->type;
513 		score = e->score;
514 		break;
515 	default:
516 		type = p->type - 1;
517 		score = p->data+index*VtScoreSize;
518 		break;
519 	}
520 /*print("walk from %V/%d ty %d to %V ty %d\n", p->score, index, p->type, score, type); */
521 
522 	if(mode == VtOWRITE && vtglobaltolocal(score) == NilBlock){
523 		b = vtcacheallocblock(c, type);
524 		if(b)
525 			goto HaveCopy;
526 	}else
527 		b = vtcacheglobal(c, score, type);
528 
529 	if(b == nil || mode == VtOREAD)
530 		return b;
531 
532 	if(vtglobaltolocal(b->score) != NilBlock)
533 		return b;
534 
535 	oe = *e;
536 
537 	/*
538 	 * Copy on write.
539 	 */
540 	e->flags |= VtEntryLocal;
541 
542 	b = vtblockcopy(b/*, e->tag, fs->ehi, fs->elo*/);
543 	if(b == nil)
544 		return nil;
545 
546 HaveCopy:
547 	if(p->type == VtDirType){
548 		memmove(e->score, b->score, VtScoreSize);
549 		vtentrypack(e, p->data, index);
550 	}else{
551 		memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
552 	}
553 	return b;
554 }
555 
556 /*
557  * Change the depth of the VtFile r.
558  * The entry e for r is contained in block p.
559  */
560 static int
growdepth(VtFile * r,VtBlock * p,VtEntry * e,int depth)561 growdepth(VtFile *r, VtBlock *p, VtEntry *e, int depth)
562 {
563 	VtBlock *b, *bb;
564 	VtEntry oe;
565 
566 	assert(ISLOCKED(r));
567 	assert(depth <= VtPointerDepth);
568 
569 	b = vtcacheglobal(r->c, e->score, e->type);
570 	if(b == nil)
571 		return -1;
572 
573 	oe = *e;
574 
575 	/*
576 	 * Keep adding layers until we get to the right depth
577 	 * or an error occurs.
578 	 */
579 	while(DEPTH(e->type) < depth){
580 		bb = vtcacheallocblock(r->c, e->type+1);
581 		if(bb == nil)
582 			break;
583 		memmove(bb->data, b->score, VtScoreSize);
584 		memmove(e->score, bb->score, VtScoreSize);
585 		e->type++;
586 		e->flags |= VtEntryLocal;
587 		vtblockput(b);
588 		b = bb;
589 	}
590 
591 	vtentrypack(e, p->data, r->offset % r->epb);
592 	vtblockput(b);
593 
594 	if(DEPTH(e->type) == depth)
595 		return 0;
596 	return -1;
597 }
598 
599 static int
shrinkdepth(VtFile * r,VtBlock * p,VtEntry * e,int depth)600 shrinkdepth(VtFile *r, VtBlock *p, VtEntry *e, int depth)
601 {
602 	VtBlock *b, *nb, *ob, *rb;
603 	VtEntry oe;
604 
605 	assert(ISLOCKED(r));
606 	assert(depth <= VtPointerDepth);
607 
608 	rb = vtcacheglobal(r->c, e->score, e->type);
609 	if(rb == nil)
610 		return -1;
611 
612 	/*
613 	 * Walk down to the new root block.
614 	 * We may stop early, but something is better than nothing.
615 	 */
616 	oe = *e;
617 
618 	ob = nil;
619 	b = rb;
620 	for(; DEPTH(e->type) > depth; e->type--){
621 		nb = vtcacheglobal(r->c, b->data, e->type-1);
622 		if(nb == nil)
623 			break;
624 		if(ob!=nil && ob!=rb)
625 			vtblockput(ob);
626 		ob = b;
627 		b = nb;
628 	}
629 
630 	if(b == rb){
631 		vtblockput(rb);
632 		return 0;
633 	}
634 
635 	/*
636 	 * Right now, e points at the root block rb, b is the new root block,
637 	 * and ob points at b.  To update:
638 	 *
639 	 *	(i) change e to point at b
640 	 *	(ii) zero the pointer ob -> b
641 	 *	(iii) free the root block
642 	 *
643 	 * p (the block containing e) must be written before
644 	 * anything else.
645  	 */
646 
647 	/* (i) */
648 	memmove(e->score, b->score, VtScoreSize);
649 	vtentrypack(e, p->data, r->offset % r->epb);
650 
651 	/* (ii) */
652 	memmove(ob->data, vtzeroscore, VtScoreSize);
653 
654 	/* (iii) */
655 	vtblockput(rb);
656 	if(ob!=nil && ob!=rb)
657 		vtblockput(ob);
658 	vtblockput(b);
659 
660 	if(DEPTH(e->type) == depth)
661 		return 0;
662 	return -1;
663 }
664 
665 static int
mkindices(VtEntry * e,u32int bn,int * index)666 mkindices(VtEntry *e, u32int bn, int *index)
667 {
668 	int i, np;
669 
670 	memset(index, 0, (VtPointerDepth+1)*sizeof(int));
671 
672 	np = e->psize/VtScoreSize;
673 	for(i=0; bn > 0; i++){
674 		if(i >= VtPointerDepth){
675 			werrstr("bad address 0x%lux", (ulong)bn);
676 			return -1;
677 		}
678 		index[i] = bn % np;
679 		bn /= np;
680 	}
681 	return i;
682 }
683 
684 VtBlock *
vtfileblock(VtFile * r,u32int bn,int mode)685 vtfileblock(VtFile *r, u32int bn, int mode)
686 {
687 	VtBlock *b, *bb;
688 	int index[VtPointerDepth+1];
689 	VtEntry e;
690 	int i;
691 	int m;
692 
693 	assert(ISLOCKED(r));
694 	assert(bn != NilBlock);
695 
696 	b = fileload(r, &e);
697 	if(b == nil)
698 		return nil;
699 
700 	i = mkindices(&e, bn, index);
701 	if(i < 0)
702 		goto Err;
703 	if(i > DEPTH(e.type)){
704 		if(mode == VtOREAD){
705 			werrstr("bad address 0x%lux", (ulong)bn);
706 			goto Err;
707 		}
708 		index[i] = 0;
709 		if(growdepth(r, b, &e, i) < 0)
710 			goto Err;
711 	}
712 
713 assert(b->type == VtDirType);
714 
715 	index[DEPTH(e.type)] = r->offset % r->epb;
716 
717 	/* mode for intermediate block */
718 	m = mode;
719 	if(m == VtOWRITE)
720 		m = VtORDWR;
721 
722 	for(i=DEPTH(e.type); i>=0; i--){
723 		bb = blockwalk(b, index[i], r->c, i==0 ? mode : m, &e);
724 		if(bb == nil)
725 			goto Err;
726 		vtblockput(b);
727 		b = bb;
728 	}
729 	b->pc = getcallerpc(&r);
730 	return b;
731 Err:
732 	vtblockput(b);
733 	return nil;
734 }
735 
736 int
vtfileblockscore(VtFile * r,u32int bn,uchar score[VtScoreSize])737 vtfileblockscore(VtFile *r, u32int bn, uchar score[VtScoreSize])
738 {
739 	VtBlock *b, *bb;
740 	int index[VtPointerDepth+1];
741 	VtEntry e;
742 	int i;
743 
744 	assert(ISLOCKED(r));
745 	assert(bn != NilBlock);
746 
747 	b = fileload(r, &e);
748 	if(b == nil)
749 		return -1;
750 
751 	if(DEPTH(e.type) == 0){
752 		memmove(score, e.score, VtScoreSize);
753 		vtblockput(b);
754 		return 0;
755 	}
756 
757 	i = mkindices(&e, bn, index);
758 	if(i < 0){
759 		vtblockput(b);
760 		return -1;
761 	}
762 	if(i > DEPTH(e.type)){
763 		memmove(score, vtzeroscore, VtScoreSize);
764 		vtblockput(b);
765 		return 0;
766 	}
767 
768 	index[DEPTH(e.type)] = r->offset % r->epb;
769 
770 	for(i=DEPTH(e.type); i>=1; i--){
771 		bb = blockwalk(b, index[i], r->c, VtOREAD, &e);
772 		if(bb == nil)
773 			goto Err;
774 		vtblockput(b);
775 		b = bb;
776 		if(memcmp(b->score, vtzeroscore, VtScoreSize) == 0)
777 			break;
778 	}
779 
780 	memmove(score, b->data+index[0]*VtScoreSize, VtScoreSize);
781 	vtblockput(b);
782 	return 0;
783 
784 Err:
785 	vtblockput(b);
786 	return -1;
787 }
788 
789 void
vtfileincref(VtFile * r)790 vtfileincref(VtFile *r)
791 {
792 	qlock(&r->lk);
793 	r->ref++;
794 	qunlock(&r->lk);
795 }
796 
797 void
vtfileclose(VtFile * r)798 vtfileclose(VtFile *r)
799 {
800 	if(r == nil)
801 		return;
802 	qlock(&r->lk);
803 	r->ref--;
804 	if(r->ref){
805 		qunlock(&r->lk);
806 		return;
807 	}
808 	assert(r->ref == 0);
809 	qunlock(&r->lk);
810 	if(r->parent)
811 		vtfileclose(r->parent);
812 	memset(r, ~0, sizeof(*r));
813 	vtfree(r);
814 }
815 
816 /*
817  * Retrieve the block containing the entry for r.
818  * If a snapshot has happened, we might need
819  * to get a new copy of the block.  We avoid this
820  * in the common case by caching the score for
821  * the block and the last epoch in which it was valid.
822  *
823  * We use r->mode to tell the difference between active
824  * file system VtFiles (VtORDWR) and VtFiles for the
825  * snapshot file system (VtOREAD).
826  */
827 static VtBlock*
fileloadblock(VtFile * r,int mode)828 fileloadblock(VtFile *r, int mode)
829 {
830 	char e[ERRMAX];
831 	u32int addr;
832 	VtBlock *b;
833 
834 	switch(r->mode){
835 	default:
836 		assert(0);
837 	case VtORDWR:
838 		assert(r->mode == VtORDWR);
839 		if(r->local == 1){
840 			b = vtcacheglobal(r->c, r->score, VtDirType);
841 			if(b == nil)
842 				return nil;
843 			b->pc = getcallerpc(&r);
844 			return b;
845 		}
846 		assert(r->parent != nil);
847 		if(vtfilelock(r->parent, VtORDWR) < 0)
848 			return nil;
849 		b = vtfileblock(r->parent, r->offset/r->epb, VtORDWR);
850 		vtfileunlock(r->parent);
851 		if(b == nil)
852 			return nil;
853 		memmove(r->score, b->score, VtScoreSize);
854 		r->local = 1;
855 		return b;
856 
857 	case VtOREAD:
858 		if(mode == VtORDWR){
859 			werrstr("read/write lock of read-only file");
860 			return nil;
861 		}
862 		addr = vtglobaltolocal(r->score);
863 		if(addr == NilBlock)
864 			return vtcacheglobal(r->c, r->score, VtDirType);
865 
866 		b = vtcachelocal(r->c, addr, VtDirType);
867 		if(b)
868 			return b;
869 
870 		/*
871 		 * If it failed because the epochs don't match, the block has been
872 		 * archived and reclaimed.  Rewalk from the parent and get the
873 		 * new pointer.  This can't happen in the VtORDWR case
874 		 * above because blocks in the current epoch don't get
875 		 * reclaimed.  The fact that we're VtOREAD means we're
876 		 * a snapshot.  (Or else the file system is read-only, but then
877 		 * the archiver isn't going around deleting blocks.)
878 		 */
879 		rerrstr(e, sizeof e);
880 		if(strcmp(e, ELabelMismatch) == 0){
881 			if(vtfilelock(r->parent, VtOREAD) < 0)
882 				return nil;
883 			b = vtfileblock(r->parent, r->offset/r->epb, VtOREAD);
884 			vtfileunlock(r->parent);
885 			if(b){
886 				fprint(2, "vtfilealloc: lost %V found %V\n",
887 					r->score, b->score);
888 				memmove(r->score, b->score, VtScoreSize);
889 				return b;
890 			}
891 		}
892 		return nil;
893 	}
894 }
895 
896 int
vtfilelock(VtFile * r,int mode)897 vtfilelock(VtFile *r, int mode)
898 {
899 	VtBlock *b;
900 
901 	if(mode == -1)
902 		mode = r->mode;
903 
904 	b = fileloadblock(r, mode);
905 	if(b == nil)
906 		return -1;
907 	/*
908 	 * The fact that we are holding b serves as the
909 	 * lock entitling us to write to r->b.
910 	 */
911 	assert(r->b == nil);
912 	r->b = b;
913 	b->pc = getcallerpc(&r);
914 	return 0;
915 }
916 
917 /*
918  * Lock two (usually sibling) VtFiles.  This needs special care
919  * because the Entries for both vtFiles might be in the same block.
920  * We also try to lock blocks in left-to-right order within the tree.
921  */
922 int
vtfilelock2(VtFile * r,VtFile * rr,int mode)923 vtfilelock2(VtFile *r, VtFile *rr, int mode)
924 {
925 	VtBlock *b, *bb;
926 
927 	if(rr == nil)
928 		return vtfilelock(r, mode);
929 
930 	if(mode == -1)
931 		mode = r->mode;
932 
933 	if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
934 		b = fileloadblock(r, mode);
935 		if(b == nil)
936 			return -1;
937 		vtblockduplock(b);
938 		bb = b;
939 	}else if(r->parent==rr->parent || r->offset > rr->offset){
940 		bb = fileloadblock(rr, mode);
941 		b = fileloadblock(r, mode);
942 	}else{
943 		b = fileloadblock(r, mode);
944 		bb = fileloadblock(rr, mode);
945 	}
946 	if(b == nil || bb == nil){
947 		if(b)
948 			vtblockput(b);
949 		if(bb)
950 			vtblockput(bb);
951 		return -1;
952 	}
953 
954 	/*
955 	 * The fact that we are holding b and bb serves
956 	 * as the lock entitling us to write to r->b and rr->b.
957 	 */
958 	r->b = b;
959 	rr->b = bb;
960 	b->pc = getcallerpc(&r);
961 	bb->pc = getcallerpc(&r);
962 	return 0;
963 }
964 
965 void
vtfileunlock(VtFile * r)966 vtfileunlock(VtFile *r)
967 {
968 	VtBlock *b;
969 
970 	if(r->b == nil){
971 		fprint(2, "vtfileunlock: already unlocked\n");
972 		abort();
973 	}
974 	b = r->b;
975 	r->b = nil;
976 	vtblockput(b);
977 }
978 
979 static VtBlock*
fileload(VtFile * r,VtEntry * e)980 fileload(VtFile *r, VtEntry *e)
981 {
982 	VtBlock *b;
983 
984 	assert(ISLOCKED(r));
985 	b = r->b;
986 	if(vtentryunpack(e, b->data, r->offset % r->epb) < 0)
987 		return nil;
988 	vtblockduplock(b);
989 	return b;
990 }
991 
992 static int
sizetodepth(uvlong s,int psize,int dsize)993 sizetodepth(uvlong s, int psize, int dsize)
994 {
995 	int np;
996 	int d;
997 
998 	/* determine pointer depth */
999 	np = psize/VtScoreSize;
1000 	s = (s + dsize - 1)/dsize;
1001 	for(d = 0; s > 1; d++)
1002 		s = (s + np - 1)/np;
1003 	return d;
1004 }
1005 
1006 long
vtfileread(VtFile * f,void * data,long count,vlong offset)1007 vtfileread(VtFile *f, void *data, long count, vlong offset)
1008 {
1009 	int frag;
1010 	VtBlock *b;
1011 	VtEntry e;
1012 
1013 	assert(ISLOCKED(f));
1014 
1015 	vtfilegetentry(f, &e);
1016 	if(count == 0)
1017 		return 0;
1018 	if(count < 0 || offset < 0){
1019 		werrstr("vtfileread: bad offset or count");
1020 		return -1;
1021 	}
1022 	if(offset >= e.size)
1023 		return 0;
1024 
1025 	if(offset+count > e.size)
1026 		count = e.size - offset;
1027 
1028 	frag = offset % e.dsize;
1029 	if(frag+count > e.dsize)
1030 		count = e.dsize - frag;
1031 
1032 	b = vtfileblock(f, offset/e.dsize, VtOREAD);
1033 	if(b == nil)
1034 		return -1;
1035 
1036 	memmove(data, b->data+frag, count);
1037 	vtblockput(b);
1038 	return count;
1039 }
1040 
1041 static long
filewrite1(VtFile * f,void * data,long count,vlong offset)1042 filewrite1(VtFile *f, void *data, long count, vlong offset)
1043 {
1044 	int frag, m;
1045 	VtBlock *b;
1046 	VtEntry e;
1047 
1048 	vtfilegetentry(f, &e);
1049 	if(count < 0 || offset < 0){
1050 		werrstr("vtfilewrite: bad offset or count");
1051 		return -1;
1052 	}
1053 
1054 	frag = offset % e.dsize;
1055 	if(frag+count > e.dsize)
1056 		count = e.dsize - frag;
1057 
1058 	m = VtORDWR;
1059 	if(frag == 0 && count == e.dsize)
1060 		m = VtOWRITE;
1061 
1062 	b = vtfileblock(f, offset/e.dsize, m);
1063 	if(b == nil)
1064 		return -1;
1065 
1066 	memmove(b->data+frag, data, count);
1067 	if(m == VtOWRITE && frag+count < e.dsize)
1068 		memset(b->data+frag+count, 0, e.dsize-frag-count);
1069 
1070 	if(offset+count > e.size){
1071 		vtfilegetentry(f, &e);
1072 		e.size = offset+count;
1073 		vtfilesetentry(f, &e);
1074 	}
1075 
1076 	vtblockput(b);
1077 	return count;
1078 }
1079 
1080 long
vtfilewrite(VtFile * f,void * data,long count,vlong offset)1081 vtfilewrite(VtFile *f, void *data, long count, vlong offset)
1082 {
1083 	long tot, m;
1084 
1085 	assert(ISLOCKED(f));
1086 
1087 	tot = 0;
1088 	m = 0;
1089 	while(tot < count){
1090 		m = filewrite1(f, (char*)data+tot, count-tot, offset+tot);
1091 		if(m <= 0)
1092 			break;
1093 		tot += m;
1094 	}
1095 	if(tot==0)
1096 		return m;
1097 	return tot;
1098 }
1099 
1100 static int
flushblock(VtCache * c,VtBlock * bb,uchar score[VtScoreSize],int ppb,int epb,int type)1101 flushblock(VtCache *c, VtBlock *bb, uchar score[VtScoreSize], int ppb, int epb,
1102 	int type)
1103 {
1104 	u32int addr;
1105 	VtBlock *b;
1106 	VtEntry e;
1107 	int i;
1108 
1109 	addr = vtglobaltolocal(score);
1110 	if(addr == NilBlock)
1111 		return 0;
1112 
1113 	if(bb){
1114 		b = bb;
1115 		if(memcmp(b->score, score, VtScoreSize) != 0)
1116 			abort();
1117 	}else
1118 		if((b = vtcachelocal(c, addr, type)) == nil)
1119 			return -1;
1120 
1121 	switch(type){
1122 	case VtDataType:
1123 		break;
1124 
1125 	case VtDirType:
1126 		for(i=0; i<epb; i++){
1127 			if(vtentryunpack(&e, b->data, i) < 0)
1128 				goto Err;
1129 			if(!(e.flags&VtEntryActive))
1130 				continue;
1131 			if(flushblock(c, nil, e.score, e.psize/VtScoreSize, e.dsize/VtEntrySize,
1132 				e.type) < 0)
1133 				goto Err;
1134 			vtentrypack(&e, b->data, i);
1135 		}
1136 		break;
1137 
1138 	default:	/* VtPointerTypeX */
1139 		for(i=0; i<ppb; i++){
1140 			if(flushblock(c, nil, b->data+VtScoreSize*i, ppb, epb, type-1) < 0)
1141 				goto Err;
1142 		}
1143 		break;
1144 	}
1145 
1146 	if(vtblockwrite(b) < 0)
1147 		goto Err;
1148 	memmove(score, b->score, VtScoreSize);
1149 	if(b != bb)
1150 		vtblockput(b);
1151 	return 0;
1152 
1153 Err:
1154 	if(b != bb)
1155 		vtblockput(b);
1156 	return -1;
1157 }
1158 
1159 int
vtfileflush(VtFile * f)1160 vtfileflush(VtFile *f)
1161 {
1162 	int ret;
1163 	VtBlock *b;
1164 	VtEntry e;
1165 
1166 	assert(ISLOCKED(f));
1167 	b = fileload(f, &e);
1168 	if(!(e.flags&VtEntryLocal)){
1169 		vtblockput(b);
1170 		return 0;
1171 	}
1172 
1173 	ret = flushblock(f->c, nil, e.score, e.psize/VtScoreSize, e.dsize/VtEntrySize,
1174 		e.type);
1175 	if(ret < 0){
1176 		vtblockput(b);
1177 		return -1;
1178 	}
1179 
1180 	vtentrypack(&e, b->data, f->offset % f->epb);
1181 	vtblockput(b);
1182 	return 0;
1183 }
1184 
1185 int
vtfileflushbefore(VtFile * r,u64int offset)1186 vtfileflushbefore(VtFile *r, u64int offset)
1187 {
1188 	VtBlock *b, *bb;
1189 	VtEntry e;
1190 	int i, base, depth, ppb, epb, doflush;
1191 	int index[VtPointerDepth+1], j, ret;
1192 	VtBlock *bi[VtPointerDepth+2];
1193 	uchar *score;
1194 
1195 	assert(ISLOCKED(r));
1196 	if(offset == 0)
1197 		return 0;
1198 
1199 	b = fileload(r, &e);
1200 	if(b == nil)
1201 		return -1;
1202 
1203 	/*
1204 	 * compute path through tree for the last written byte and the next one.
1205 	 */
1206 	ret = -1;
1207 	memset(bi, 0, sizeof bi);
1208 	depth = DEPTH(e.type);
1209 	bi[depth+1] = b;
1210 	i = mkindices(&e, (offset-1)/e.dsize, index);
1211 	if(i < 0)
1212 		goto Err;
1213 	if(i > depth)
1214 		goto Err;
1215 	ppb = e.psize / VtScoreSize;
1216 	epb = e.dsize / VtEntrySize;
1217 
1218 	/*
1219 	 * load the blocks along the last written byte
1220 	 */
1221 	index[depth] = r->offset % r->epb;
1222 	for(i=depth; i>=0; i--){
1223 		bb = blockwalk(b, index[i], r->c, VtORDWR, &e);
1224 		if(bb == nil)
1225 			goto Err;
1226 		bi[i] = bb;
1227 		b = bb;
1228 	}
1229 	ret = 0;
1230 
1231 	/*
1232 	 * walk up the path from leaf to root, flushing anything that
1233 	 * has been finished.
1234 	 */
1235 	base = e.type&~VtTypeDepthMask;
1236 	for(i=0; i<=depth; i++){
1237 		doflush = 0;
1238 		if(i == 0){
1239 			/* leaf: data or dir block */
1240 			if(offset%e.dsize == 0)
1241 				doflush = 1;
1242 		}else{
1243 			/*
1244 			 * interior node: pointer blocks.
1245 			 * specifically, b = bi[i] is a block whose index[i-1]'th entry
1246 			 * points at bi[i-1].
1247 			 */
1248 			b = bi[i];
1249 
1250 			/*
1251 			 * the index entries up to but not including index[i-1] point at
1252 			 * finished blocks, so flush them for sure.
1253 			 */
1254 			for(j=0; j<index[i-1]; j++)
1255 				if(flushblock(r->c, nil, b->data+j*VtScoreSize, ppb, epb, base+i-1) < 0)
1256 					goto Err;
1257 
1258 			/*
1259 			 * if index[i-1] is the last entry in the block and is global
1260 			 * (i.e. the kid is flushed), then we can flush this block.
1261 			 */
1262 			if(j==ppb-1 && vtglobaltolocal(b->data+j*VtScoreSize)==NilBlock)
1263 				doflush = 1;
1264 		}
1265 		if(doflush){
1266 			if(i == depth)
1267 				score = e.score;
1268 			else
1269 				score = bi[i+1]->data+index[i]*VtScoreSize;
1270 			if(flushblock(r->c, bi[i], score, ppb, epb, base+i) < 0)
1271 				goto Err;
1272 		}
1273 	}
1274 
1275 Err:
1276 	/* top: entry.  do this always so that the score is up-to-date */
1277 	vtentrypack(&e, bi[depth+1]->data, index[depth]);
1278 	for(i=0; i<nelem(bi); i++)
1279 		if(bi[i])
1280 			vtblockput(bi[i]);
1281 	return ret;
1282 }
1283