xref: /plan9/sys/src/cmd/fossil/fs.c (revision ec59a3ddbfceee0efe34584c2c9981a5e5ff1ec4)
1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
4 #include "error.h"
5 
6 static void fsMetaFlush(void *a);
7 static Snap *snapInit(Fs*);
8 static void snapClose(Snap*);
9 
10 Fs *
11 fsOpen(char *file, VtSession *z, long ncache, int mode)
12 {
13 	Fs *fs;
14 	Disk *disk;
15 	int fd;
16 	Block *b, *bs;
17 	Super super;
18 	int m;
19 	uchar oscore[VtScoreSize];
20 
21 	switch(mode){
22 	default:
23 		vtSetError(EBadMode);
24 		return nil;
25 	case OReadOnly:
26 		m = OREAD;
27 		break;
28 	case OReadWrite:
29 		m = ORDWR;
30 		break;
31 	}
32 	fd = open(file, m);
33 	if(fd < 0){
34 		vtSetError("open %s: %r", file);
35 		return nil;
36 	}
37 
38 	bwatchInit();
39 	disk = diskAlloc(fd);
40 	if(disk == nil){
41 		vtSetError("diskAlloc: %R");
42 		close(fd);
43 		return nil;
44 	}
45 
46 	fs = vtMemAllocZ(sizeof(Fs));
47 	fs->mode = mode;
48 	fs->blockSize = diskBlockSize(disk);
49 	fs->elk = vtLockAlloc();
50 	fs->cache = cacheAlloc(disk, z, ncache, mode);
51 	if(mode == OReadWrite && z)
52 		fs->arch = archInit(fs->cache, disk, fs, z);
53 	fs->z = z;
54 
55 	b = cacheLocal(fs->cache, PartSuper, 0, mode);
56 	if(b == nil)
57 		goto Err;
58 	if(!superUnpack(&super, b->data)){
59 		blockPut(b);
60 		vtSetError("bad super block");
61 		goto Err;
62 	}
63 	blockPut(b);
64 
65 	fs->ehi = super.epochHigh;
66 	fs->elo = super.epochLow;
67 
68 //fprint(2, "fs->ehi %d fs->elo %d active=%d\n", fs->ehi, fs->elo, super.active);
69 
70 	fs->source = sourceRoot(fs, super.active, mode);
71 	if(fs->source == nil){
72 		/*
73 		 * Perhaps it failed because the block is copy-on-write.
74 		 * Do the copy and try again.
75 		 */
76 		if(mode == OReadOnly || strcmp(vtGetError(), EBadRoot) != 0)
77 			goto Err;
78 		b = cacheLocalData(fs->cache, super.active, BtDir, RootTag, OReadWrite, 0);
79 		if(b == nil){
80 			vtSetError("cacheLocalData: %R");
81 			goto Err;
82 		}
83 		if(b->l.epoch == fs->ehi){
84 			blockPut(b);
85 			vtSetError("bad root source block");
86 			goto Err;
87 		}
88 		b = blockCopy(b, RootTag, fs->ehi, fs->elo);
89 		if(b == nil)
90 			goto Err;
91 		localToGlobal(super.active, oscore);
92 		super.active = b->addr;
93 		bs = cacheLocal(fs->cache, PartSuper, 0, OReadWrite);
94 		if(bs == nil){
95 			blockPut(b);
96 			vtSetError("cacheLocal: %R");
97 			goto Err;
98 		}
99 		superPack(&super, bs->data);
100 		blockDependency(bs, b, 0, oscore, nil);
101 		blockPut(b);
102 		blockDirty(bs);
103 		blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0);
104 		blockPut(bs);
105 		fs->source = sourceRoot(fs, super.active, mode);
106 		if(fs->source == nil){
107 			vtSetError("sourceRoot: %R");
108 			goto Err;
109 		}
110 	}
111 
112 //fprint(2, "got fs source\n");
113 
114 	vtRLock(fs->elk);
115 	fs->file = fileRoot(fs->source);
116 	vtRUnlock(fs->elk);
117 	if(fs->file == nil){
118 		vtSetError("fileRoot: %R");
119 		goto Err;
120 	}
121 
122 //fprint(2, "got file root\n");
123 
124 	if(mode == OReadWrite){
125 		fs->metaFlush = periodicAlloc(fsMetaFlush, fs, 1000);
126 		fs->snap = snapInit(fs);
127 	}
128 	return fs;
129 
130 Err:
131 fprint(2, "fsOpen error\n");
132 	fsClose(fs);
133 	return nil;
134 }
135 
136 void
137 fsClose(Fs *fs)
138 {
139 	vtRLock(fs->elk);
140 	periodicKill(fs->metaFlush);
141 	snapClose(fs->snap);
142 	if(fs->file){
143 		fileMetaFlush(fs->file, 0);
144 		if(!fileDecRef(fs->file))
145 			vtFatal("fsClose: files still in use: %r\n");
146 	}
147 	fs->file = nil;
148 	sourceClose(fs->source);
149 	cacheFree(fs->cache);
150 	if(fs->arch)
151 		archFree(fs->arch);
152 	vtRUnlock(fs->elk);
153 	vtLockFree(fs->elk);
154 	memset(fs, ~0, sizeof(Fs));
155 	vtMemFree(fs);
156 }
157 
158 int
159 fsRedial(Fs *fs, char *host)
160 {
161 	if(!vtRedial(fs->z, host))
162 		return 0;
163 	if(!vtConnect(fs->z, 0))
164 		return 0;
165 	return 1;
166 }
167 
168 File *
169 fsGetRoot(Fs *fs)
170 {
171 	return fileIncRef(fs->file);
172 }
173 
174 int
175 fsGetBlockSize(Fs *fs)
176 {
177 	return fs->blockSize;
178 }
179 
180 Block*
181 superGet(Cache *c, Super* super)
182 {
183 	Block *b;
184 
185 	if((b = cacheLocal(c, PartSuper, 0, OReadWrite)) == nil){
186 		fprint(2, "superGet: cacheLocal failed: %R");
187 		return nil;
188 	}
189 	if(!superUnpack(super, b->data)){
190 		fprint(2, "superGet: superUnpack failed: %R");
191 		blockPut(b);
192 		return nil;
193 	}
194 
195 	return b;
196 }
197 
198 void
199 superWrite(Block* b, Super* super, int forceWrite)
200 {
201 	superPack(super, b->data);
202 	blockDirty(b);
203 	if(forceWrite){
204 		while(!blockWrite(b)){
205 			/* BUG: what should really happen here? */
206 			fprint(2, "could not write super block; waiting 10 seconds\n");
207 			sleep(10*1000);
208 		}
209 		while(b->iostate != BioClean && b->iostate != BioDirty){
210 			assert(b->iostate == BioWriting);
211 			vtSleep(b->ioready);
212 		}
213 		/*
214 		 * it's okay that b might still be dirty.
215 		 * that means it got written out but with an old root pointer,
216 		 * but the other fields went out, and those are the ones
217 		 * we really care about.  (specifically, epochHigh; see fsSnapshot).
218 		 */
219 	}
220 }
221 
222 /*
223  * Prepare the directory to store a snapshot.
224  * Temporary snapshots go into /snapshot/yyyy/mmdd/hhmm[.#]
225  * Archival snapshots go into /archive/yyyy/mmdd[.#].
226  *
227  * TODO This should be rewritten to eliminate most of the duplication.
228  */
229 static File*
230 fileOpenSnapshot(Fs *fs, char *dstpath, int doarchive)
231 {
232 	int n;
233 	char buf[30], *s, *p, *elem;
234 	File *dir, *f;
235 	Tm now;
236 
237 	if(dstpath){
238 		if((p = strrchr(dstpath, '/')) != nil){
239 			*p++ = '\0';
240 			elem = p;
241 			p = dstpath;
242 			if(*p == '\0')
243 				p = "/";
244 		}else{
245 			p = "/";
246 			elem = dstpath;
247 		}
248 		if((dir = fileOpen(fs, p)) == nil)
249 			return nil;
250 		f = fileCreate(dir, elem, ModeDir|ModeSnapshot|0555, "adm");
251 		fileDecRef(dir);
252 		return f;
253 	}else if(doarchive){
254 		/*
255 		 * a snapshot intended to be archived to venti.
256 		 */
257 		dir = fileOpen(fs, "/archive");
258 		if(dir == nil)
259 			return nil;
260 		now = *localtime(time(0));
261 
262 		/* yyyy */
263 		snprint(buf, sizeof(buf), "%d", now.year+1900);
264 		f = fileWalk(dir, buf);
265 		if(f == nil)
266 			f = fileCreate(dir, buf, ModeDir|0555, "adm");
267 		fileDecRef(dir);
268 		if(f == nil)
269 			return nil;
270 		dir = f;
271 
272 		/* mmdd[#] */
273 		snprint(buf, sizeof(buf), "%02d%02d", now.mon+1, now.mday);
274 		s = buf+strlen(buf);
275 		for(n=0;; n++){
276 			if(n)
277 				seprint(s, buf+sizeof(buf), ".%d", n);
278 			f = fileWalk(dir, buf);
279 			if(f != nil){
280 				fileDecRef(f);
281 				continue;
282 			}
283 			f = fileCreate(dir, buf, ModeDir|ModeSnapshot|0555, "adm");
284 			break;
285 		}
286 		fileDecRef(dir);
287 		return f;
288 	}else{
289 		/*
290 		 * Just a temporary snapshot
291 		 * We'll use /snapshot/yyyy/mmdd/hhmm.
292 		 * There may well be a better naming scheme.
293 		 * (I'd have used hh:mm but ':' is reserved in Microsoft file systems.)
294 		 */
295 		dir = fileOpen(fs, "/snapshot");
296 		if(dir == nil)
297 			return nil;
298 
299 		now = *localtime(time(0));
300 
301 		/* yyyy */
302 		snprint(buf, sizeof(buf), "%d", now.year+1900);
303 		f = fileWalk(dir, buf);
304 		if(f == nil)
305 			f = fileCreate(dir, buf, ModeDir|0555, "adm");
306 		fileDecRef(dir);
307 		if(f == nil)
308 			return nil;
309 		dir = f;
310 
311 		/* mmdd */
312 		snprint(buf, sizeof(buf), "%02d%02d", now.mon+1, now.mday);
313 		f = fileWalk(dir, buf);
314 		if(f == nil)
315 			f = fileCreate(dir, buf, ModeDir|0555, "adm");
316 		fileDecRef(dir);
317 		if(f == nil)
318 			return nil;
319 		dir = f;
320 
321 		/* hhmm[.#] */
322 		snprint(buf, sizeof buf, "%02d%02d", now.hour, now.min);
323 		s = buf+strlen(buf);
324 		for(n=0;; n++){
325 			if(n)
326 				seprint(s, buf+sizeof(buf), ".%d", n);
327 			f = fileWalk(dir, buf);
328 			if(f != nil){
329 				fileDecRef(f);
330 				continue;
331 			}
332 			f = fileCreate(dir, buf, ModeDir|ModeSnapshot|0555, "adm");
333 			break;
334 		}
335 		fileDecRef(dir);
336 		return f;
337 	}
338 }
339 
340 static int
341 fsNeedArch(Fs *fs, uint archMinute)
342 {
343 	int need;
344 	File *f;
345 	char buf[100];
346 	Tm now;
347 	ulong then;
348 
349 	then = time(0);
350 	now = *localtime(then);
351 
352 	/* back up to yesterday if necessary */
353 	if(now.hour < archMinute/60
354 	|| now.hour == archMinute/60 && now.min < archMinute%60)
355 		now = *localtime(then-86400);
356 
357 	snprint(buf, sizeof buf, "/archive/%d/%02d%02d",
358 		now.year+1900, now.mon+1, now.mday);
359 	need = 1;
360 	vtRLock(fs->elk);
361 	f = fileOpen(fs, buf);
362 	if(f){
363 		need = 0;
364 		fileDecRef(f);
365 	}
366 	vtRUnlock(fs->elk);
367 	return need;
368 }
369 
370 int
371 fsEpochLow(Fs *fs, u32int low)
372 {
373 	Block *bs;
374 	Super super;
375 
376 	vtLock(fs->elk);
377 	if(low > fs->ehi){
378 		vtSetError("bad low epoch (must be <= %ud)", fs->ehi);
379 		vtUnlock(fs->elk);
380 		return 0;
381 	}
382 
383 	if((bs = superGet(fs->cache, &super)) == nil){
384 		vtUnlock(fs->elk);
385 		return 0;
386 	}
387 
388 	super.epochLow = low;
389 	fs->elo = low;
390 	superWrite(bs, &super, 1);
391 	blockPut(bs);
392 	vtUnlock(fs->elk);
393 
394 	return 1;
395 }
396 
397 static int
398 bumpEpoch(Fs *fs, int doarchive)
399 {
400 	uchar oscore[VtScoreSize];
401 	u32int oldaddr;
402 	Block *b, *bs;
403 	Entry e;
404 	Source *r;
405 	Super super;
406 
407 	/*
408 	 * Duplicate the root block.
409 	 *
410 	 * As a hint to flchk, the garbage collector,
411 	 * and any (human) debuggers, store a pointer
412 	 * to the old root block in entry 1 of the new root block.
413 	 */
414 	r = fs->source;
415 	b = cacheGlobal(fs->cache, r->score, BtDir, RootTag, OReadOnly);
416 	if(b == nil)
417 		return 0;
418 
419 	memset(&e, 0, sizeof e);
420 	e.flags = VtEntryActive | VtEntryLocal | VtEntryDir;
421 	memmove(e.score, b->score, VtScoreSize);
422 	e.tag = RootTag;
423 	e.snap = b->l.epoch;
424 
425 	b = blockCopy(b, RootTag, fs->ehi+1, fs->elo);
426 	if(b == nil){
427 		fprint(2, "bumpEpoch: blockCopy: %R\n");
428 		return 0;
429 	}
430 
431 	if(0) fprint(2, "snapshot root from %d to %d\n", oldaddr, b->addr);
432 	entryPack(&e, b->data, 1);
433 	blockDirty(b);
434 
435 	/*
436 	 * Update the superblock with the new root and epoch.
437 	 */
438 	if((bs = superGet(fs->cache, &super)) == nil)
439 		return 0;
440 
441 	fs->ehi++;
442 	memmove(r->score, b->score, VtScoreSize);
443 	r->epoch = fs->ehi;
444 
445 	super.epochHigh = fs->ehi;
446 	oldaddr = super.active;
447 	super.active = b->addr;
448 	if(doarchive)
449 		super.next = oldaddr;
450 
451 	/*
452 	 * Record that the new super.active can't get written out until
453 	 * the new b gets written out.  Until then, use the old value.
454 	 */
455 	localToGlobal(oldaddr, oscore);
456 	blockDependency(bs, b, 0, oscore, nil);
457 	blockPut(b);
458 
459 	/*
460 	 * We force the super block to disk so that super.epochHigh gets updated.
461 	 * Otherwise, if we crash and come back, we might incorrectly treat as active
462 	 * some of the blocks that making up the snapshot we just created.
463 	 * Basically every block in the active file system and all the blocks in
464 	 * the recently-created snapshot depend on the super block now.
465 	 * Rather than record all those dependencies, we just force the block to disk.
466 	 *
467 	 * Note that blockWrite might actually (will probably) send a slightly outdated
468 	 * super.active to disk.  It will be the address of the most recent root that has
469 	 * gone to disk.
470 	 */
471 	superWrite(bs, &super, 1);
472 	blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0);
473 	blockPut(bs);
474 
475 	return 1;
476 }
477 
478 int
479 saveQid(Fs *fs)
480 {
481 	Block *b;
482 	Super super;
483 	u64int qidMax;
484 
485 	if((b = superGet(fs->cache, &super)) == nil)
486 		return 0;
487 	qidMax = super.qid;
488 	blockPut(b);
489 
490 	if(!fileSetQidSpace(fs->file, 0, qidMax))
491 		return 0;
492 
493 	return 1;
494 }
495 
496 int
497 fsSnapshot(Fs *fs, char *srcpath, char *dstpath, int doarchive)
498 {
499 	File *src, *dst;
500 
501 	assert(fs->mode == OReadWrite);
502 
503 	dst = nil;
504 
505 	if(fs->halted){
506 		vtSetError("file system is halted");
507 		return 0;
508 	}
509 
510 	/*
511 	 * Freeze file system activity.
512 	 */
513 	vtLock(fs->elk);
514 
515 	/*
516 	 * Get the root of the directory we're going to save.
517 	 */
518 	if(srcpath == nil)
519 		srcpath = "/active";
520 	src = fileOpen(fs, srcpath);
521 	if(src == nil)
522 		goto Err;
523 
524 	/*
525 	 * It is important that we maintain the invariant that:
526 	 *	if both b and bb are marked as Active with start epoch e
527 	 *	and b points at bb, then no other pointers to bb exist.
528 	 *
529 	 * When bb is unlinked from b, its close epoch is set to b's epoch.
530 	 * A block with epoch == close epoch is
531 	 * treated as free by cacheAllocBlock; this aggressively
532 	 * reclaims blocks after they have been stored to Venti.
533 	 *
534 	 * Let's say src->source is block sb, and src->msource is block
535 	 * mb.  Let's also say that block b holds the Entry structures for
536 	 * both src->source and src->msource (their Entry structures might
537 	 * be in different blocks, but the argument is the same).
538 	 * That is, right now we have:
539 	 *
540 	 *	b	Active w/ epoch e, holds ptrs to sb and mb.
541 	 *	sb	Active w/ epoch e.
542 	 *	mb	Active w/ epoch e.
543 	 *
544 	 * With things as they are now, the invariant requires that
545 	 * b holds the only pointers to sb and mb.  We want to record
546 	 * pointers to sb and mb in new Entries corresponding to dst,
547 	 * which breaks the invariant.  Thus we need to do something
548 	 * about b.  Specifically, we bump the file system's epoch and
549 	 * then rewalk the path from the root down to and including b.
550 	 * This will copy-on-write as we walk, so now the state will be:
551 	 *
552 	 *	b	Snap w/ epoch e, holds ptrs to sb and mb.
553 	 *	new-b	Active w/ epoch e+1, holds ptrs to sb and mb.
554 	 *	sb	Active w/ epoch e.
555 	 *	mb	Active w/ epoch e.
556 	 *
557 	 * In this state, it's perfectly okay to make more pointers to sb and mb.
558 	 */
559 	if(!bumpEpoch(fs, 0) || !fileWalkSources(src))
560 		goto Err;
561 
562 	/*
563 	 * Sync to disk.  I'm not sure this is necessary, but better safe than sorry.
564 	 */
565 	cacheFlush(fs->cache, 1);
566 
567 	/*
568 	 * Create the directory where we will store the copy of src.
569 	 */
570 	dst = fileOpenSnapshot(fs, dstpath, doarchive);
571 	if(dst == nil)
572 		goto Err;
573 
574 	/*
575 	 * Actually make the copy by setting dst's source and msource
576 	 * to be src's.
577 	 */
578 	if(!fileSnapshot(dst, src, fs->ehi-1, doarchive))
579 		goto Err;
580 
581 	fileDecRef(src);
582 	fileDecRef(dst);
583 	src = nil;
584 	dst = nil;
585 
586 	/*
587 	 * Make another copy of the file system.  This one is for the
588 	 * archiver, so that the file system we archive has the recently
589 	 * added snapshot both in /active and in /archive/yyyy/mmdd[.#].
590 	 */
591 	if(doarchive){
592 		if(!saveQid(fs))
593 			goto Err;
594 		if(!bumpEpoch(fs, 1))
595 			goto Err;
596 	}
597 
598 	vtUnlock(fs->elk);
599 
600 	/* BUG? can fs->arch fall out from under us here? */
601 	if(doarchive && fs->arch)
602 		archKick(fs->arch);
603 
604 	return 1;
605 
606 Err:
607 	fprint(2, "fsSnapshot: %R\n");
608 	if(src)
609 		fileDecRef(src);
610 	if(dst)
611 		fileDecRef(dst);
612 	vtUnlock(fs->elk);
613 	return 0;
614 }
615 
616 int
617 fsVac(Fs *fs, char *name, uchar score[VtScoreSize])
618 {
619 	int r;
620 	DirEntry de;
621 	Entry e, ee;
622 	File *f;
623 
624 	vtRLock(fs->elk);
625 	f = fileOpen(fs, name);
626 	if(f == nil){
627 		vtRUnlock(fs->elk);
628 		return 0;
629 	}
630 
631 	if(!fileGetSources(f, &e, &ee) || !fileGetDir(f, &de)){
632 		fileDecRef(f);
633 		vtRUnlock(fs->elk);
634 		return 0;
635 	}
636 	fileDecRef(f);
637 
638 	r = mkVac(fs->z, fs->blockSize, &e, &ee, &de, score);
639 	vtRUnlock(fs->elk);
640 	return r;
641 }
642 
643 static int
644 vtWriteBlock(VtSession *z, uchar *buf, uint n, uint type, uchar score[VtScoreSize])
645 {
646 	if(!vtWrite(z, score, type, buf, n))
647 		return 0;
648 	if(!vtSha1Check(score, buf, n))
649 		return 0;
650 	return 1;
651 }
652 
653 int
654 mkVac(VtSession *z, uint blockSize, Entry *pe, Entry *pee, DirEntry *pde, uchar score[VtScoreSize])
655 {
656 	uchar buf[8192];
657 	int i;
658 	uchar *p;
659 	uint n;
660 	DirEntry de;
661 	Entry e, ee, eee;
662 	MetaBlock mb;
663 	MetaEntry me;
664 	VtRoot root;
665 
666 	e = *pe;
667 	ee = *pee;
668 	de = *pde;
669 
670 	if(globalToLocal(e.score) != NilBlock
671 	|| (ee.flags&VtEntryActive && globalToLocal(ee.score) != NilBlock)){
672 		vtSetError("can only vac paths already stored on venti");
673 		return 0;
674 	}
675 
676 	/*
677 	 * Build metadata source for root.
678 	 */
679 	n = deSize(&de);
680 	if(n+MetaHeaderSize+MetaIndexSize > sizeof buf){
681 		vtSetError("DirEntry too big");
682 		return 0;
683 	}
684 	memset(buf, 0, sizeof buf);
685 	mbInit(&mb, buf, n+MetaHeaderSize+MetaIndexSize, 1);
686 	p = mbAlloc(&mb, n);
687 	if(p == nil)
688 		abort();
689 	mbSearch(&mb, de.elem, &i, &me);
690 	assert(me.p == nil);
691 	me.p = p;
692 	me.size = n;
693 	dePack(&de, &me);
694 	mbInsert(&mb, i, &me);
695 	mbPack(&mb);
696 
697 	eee.size = n+MetaHeaderSize+MetaIndexSize;
698 	if(!vtWriteBlock(z, buf, eee.size, VtDataType, eee.score))
699 		return 0;
700 	eee.psize = 8192;
701 	eee.dsize = 8192;
702 	eee.depth = 0;
703 	eee.flags = VtEntryActive;
704 
705 	/*
706 	 * Build root source with three entries in it.
707 	 */
708 	entryPack(&e, buf, 0);
709 	entryPack(&ee, buf, 1);
710 	entryPack(&eee, buf, 2);
711 
712 	n = VtEntrySize*3;
713 	memset(&root, 0, sizeof root);
714 	if(!vtWriteBlock(z, buf, n, VtDirType, root.score))
715 		return 0;
716 
717 	/*
718 	 * Save root.
719 	 */
720 	root.version = VtRootVersion;
721 	strecpy(root.type, root.type+sizeof root.type, "vac");
722 	strecpy(root.name, root.name+sizeof root.name, de.elem);
723 	root.blockSize = blockSize;
724 	vtRootPack(&root, buf);
725 	if(!vtWriteBlock(z, buf, VtRootSize, VtRootType, score))
726 		return 0;
727 
728 	return 1;
729 }
730 
731 int
732 fsSync(Fs *fs)
733 {
734 	vtLock(fs->elk);
735 	fileMetaFlush(fs->file, 1);
736 	cacheFlush(fs->cache, 1);
737 	vtUnlock(fs->elk);
738 	return 1;
739 }
740 
741 int
742 fsHalt(Fs *fs)
743 {
744 	vtLock(fs->elk);
745 	fs->halted = 1;
746 	fileMetaFlush(fs->file, 1);
747 	cacheFlush(fs->cache, 1);
748 	return 1;
749 }
750 
751 int
752 fsUnhalt(Fs *fs)
753 {
754 	if(!fs->halted)
755 		return 0;
756 	fs->halted = 0;
757 	vtUnlock(fs->elk);
758 	return 1;
759 }
760 
761 int
762 fsNextQid(Fs *fs, u64int *qid)
763 {
764 	Block *b;
765 	Super super;
766 
767 	if((b = superGet(fs->cache, &super)) == nil)
768 		return 0;
769 
770 	*qid = super.qid++;
771 
772 	/*
773 	 * It's okay if the super block doesn't go to disk immediately,
774 	 * since fileMetaAlloc will record a dependency between the
775 	 * block holding this qid and the super block.  See file.c:/^fileMetaAlloc.
776 	 */
777 	superWrite(b, &super, 0);
778 	blockPut(b);
779 	return 1;
780 }
781 
782 static void
783 fsMetaFlush(void *a)
784 {
785 	int rv;
786 	Fs *fs = a;
787 
788 	vtRLock(fs->elk);
789 	rv = fileMetaFlush(fs->file, 1);
790 	vtRUnlock(fs->elk);
791 	if(rv > 0)
792 		cacheFlush(fs->cache, 0);
793 }
794 
795 static int
796 fsEsearch1(File *f, char *path, u32int savetime, u32int *plo)
797 {
798 	int n, r;
799 	DirEntry de;
800 	DirEntryEnum *dee;
801 	File *ff;
802 	Entry e, ee;
803 	char *t;
804 
805 	dee = deeOpen(f);
806 	if(dee == nil)
807 		return 0;
808 
809 	n = 0;
810 	for(;;){
811 		r = deeRead(dee, &de);
812 		if(r <= 0)
813 			break;
814 		if(de.mode & ModeSnapshot){
815 			if((ff = fileWalk(f, de.elem)) != nil){
816 				if(fileGetSources(ff, &e, &ee))
817 					if(de.mtime >= savetime && e.snap != 0)
818 						if(e.snap < *plo)
819 							*plo = e.snap;
820 				fileDecRef(ff);
821 			}
822 		}
823 		else if(de.mode & ModeDir){
824 			if((ff = fileWalk(f, de.elem)) != nil){
825 				t = smprint("%s/%s", path, de.elem);
826 				n += fsEsearch1(ff, t, savetime, plo);
827 				vtMemFree(t);
828 				fileDecRef(ff);
829 			}
830 		}
831 		deCleanup(&de);
832 		if(r < 0)
833 			break;
834 	}
835 	deeClose(dee);
836 
837 	return n;
838 }
839 
840 static int
841 fsEsearch(Fs *fs, char *path, u32int savetime, u32int *plo)
842 {
843 	int n;
844 	File *f;
845 	DirEntry de;
846 
847 	f = fileOpen(fs, path);
848 	if(f == nil)
849 		return 0;
850 	if(!fileGetDir(f, &de)){
851 		fileDecRef(f);
852 		return 0;
853 	}
854 	if((de.mode & ModeDir) == 0){
855 		fileDecRef(f);
856 		deCleanup(&de);
857 		return 0;
858 	}
859 	deCleanup(&de);
860 	n = fsEsearch1(f, path, savetime, plo);
861 	fileDecRef(f);
862 	return n;
863 }
864 
865 void
866 fsSnapshotCleanup(Fs *fs, u32int age)
867 {
868 	u32int lo;
869 
870 	/*
871 	 * Find the best low epoch we can use,
872 	 * given that we need to save all the unventied archives
873 	 * and all the snapshots younger than age.
874 	 */
875 	vtRLock(fs->elk);
876 	lo = fs->ehi;
877 	fsEsearch(fs, "/archive", 0, &lo);
878 	fsEsearch(fs, "/snapshot", time(0)-age*60, &lo);
879 	vtRUnlock(fs->elk);
880 
881 	fsEpochLow(fs, lo);
882 	fsSnapshotRemove(fs);
883 }
884 
885 /* remove all snapshots that have expired */
886 /* return number of directory entries remaining */
887 static int
888 fsRsearch1(File *f, char *s)
889 {
890 	int n, r;
891 	DirEntry de;
892 	DirEntryEnum *dee;
893 	File *ff;
894 	char *t;
895 
896 	dee = deeOpen(f);
897 	if(dee == nil)
898 		return 0;
899 
900 	n = 0;
901 	for(;;){
902 		r = deeRead(dee, &de);
903 		if(r <= 0)
904 			break;
905 		n++;
906 		if(de.mode & ModeSnapshot){
907 			if((ff = fileWalk(f, de.elem)) != nil)
908 				fileDecRef(ff);
909 			else if(strcmp(vtGetError(), ESnapOld) == 0){
910 				if(fileClri(f, de.elem, "adm"))
911 					n--;
912 			}
913 		}
914 		else if(de.mode & ModeDir){
915 			if((ff = fileWalk(f, de.elem)) != nil){
916 				t = smprint("%s/%s", s, de.elem);
917 				if(fsRsearch1(ff, t) == 0)
918 					if(fileRemove(ff, "adm"))
919 						n--;
920 				vtMemFree(t);
921 				fileDecRef(ff);
922 			}
923 		}
924 		deCleanup(&de);
925 		if(r < 0)
926 			break;
927 	}
928 	deeClose(dee);
929 
930 	return n;
931 }
932 
933 static int
934 fsRsearch(Fs *fs, char *path)
935 {
936 	File *f;
937 	DirEntry de;
938 
939 	f = fileOpen(fs, path);
940 	if(f == nil)
941 		return 0;
942 	if(!fileGetDir(f, &de)){
943 		fileDecRef(f);
944 		return 0;
945 	}
946 	if((de.mode & ModeDir) == 0){
947 		fileDecRef(f);
948 		deCleanup(&de);
949 		return 0;
950 	}
951 	deCleanup(&de);
952 	fsRsearch1(f, path);
953 	fileDecRef(f);
954 	return 1;
955 }
956 
957 void
958 fsSnapshotRemove(Fs *fs)
959 {
960 	vtRLock(fs->elk);
961 	fsRsearch(fs, "/snapshot");
962 	vtRUnlock(fs->elk);
963 }
964 
965 struct Snap
966 {
967 	Fs *fs;
968 	Periodic *tick;
969 	VtLock *lk;
970 	uint snapMinutes;
971 	uint archMinute;
972 	uint snapLife;
973 	u32int lastSnap;
974 	u32int lastArch;
975 	u32int lastCleanup;
976 	uint ignore;
977 };
978 
979 static void
980 snapEvent(void *v)
981 {
982 	Snap *s;
983 	u32int now, min;
984 	Tm tm;
985 	int need;
986 
987 	s = v;
988 
989 	now = time(0)/60;
990 	vtLock(s->lk);
991 
992 	/*
993 	 * Snapshots happen every snapMinutes minutes.
994 	 * If we miss a snapshot (for example, because we
995 	 * were down), we wait for the next one.
996 	 */
997 	if(s->snapMinutes != ~0 && s->snapMinutes != 0
998 	&& now%s->snapMinutes==0 && now != s->lastSnap){
999 		if(!fsSnapshot(s->fs, nil, nil, 0))
1000 			fprint(2, "fsSnapshot snap: %R\n");
1001 		s->lastSnap = now;
1002 	}
1003 
1004 	/*
1005 	 * Archival snapshots happen at archMinute.
1006 	 * If we miss an archive (for example, because we
1007 	 * were down), we do it as soon as possible.
1008 	 */
1009 	tm = *localtime(now*60);
1010 	min = tm.hour*60+tm.min;
1011 	if(s->archMinute != ~0){
1012 		need = 0;
1013 		if(min == s->archMinute && now != s->lastArch)
1014 			need = 1;
1015 		if(s->lastArch == 0){
1016 			s->lastArch = 1;
1017 			if(fsNeedArch(s->fs, s->archMinute))
1018 				need = 1;
1019 		}
1020 		if(need){
1021 			fsSnapshot(s->fs, nil, nil, 1);
1022 			s->lastArch = now;
1023 		}
1024 	}
1025 
1026 	/*
1027 	 * Snapshot cleanup happens every snaplife or every day.
1028 	 */
1029 	if(s->snapLife != ~0
1030 	&& (s->lastCleanup+s->snapLife < now || s->lastCleanup+24*60 < now)){
1031 		fsSnapshotCleanup(s->fs, s->snapLife);
1032 		s->lastCleanup = now;
1033 	}
1034 	vtUnlock(s->lk);
1035 }
1036 
1037 static Snap*
1038 snapInit(Fs *fs)
1039 {
1040 	Snap *s;
1041 
1042 	s = vtMemAllocZ(sizeof(Snap));
1043 	s->fs = fs;
1044 	s->tick = periodicAlloc(snapEvent, s, 10*1000);
1045 	s->lk = vtLockAlloc();
1046 	s->snapMinutes = -1;
1047 	s->archMinute = -1;
1048 	s->snapLife = -1;
1049 	s->ignore = 5*2;	/* wait five minutes for clock to stabilize */
1050 	return s;
1051 }
1052 
1053 void
1054 snapGetTimes(Snap *s, u32int *arch, u32int *snap, u32int *snaplen)
1055 {
1056 	if(s == nil){
1057 		*snap = -1;
1058 		*arch = -1;
1059 		*snaplen = -1;
1060 		return;
1061 	}
1062 
1063 	vtLock(s->lk);
1064 	*snap = s->snapMinutes;
1065 	*arch = s->archMinute;
1066 	*snaplen = s->snapLife;
1067 	vtUnlock(s->lk);
1068 }
1069 
1070 void
1071 snapSetTimes(Snap *s, u32int arch, u32int snap, u32int snaplen)
1072 {
1073 	if(s == nil)
1074 		return;
1075 
1076 	vtLock(s->lk);
1077 	s->snapMinutes = snap;
1078 	s->archMinute = arch;
1079 	s->snapLife = snaplen;
1080 	vtUnlock(s->lk);
1081 }
1082 
1083 static void
1084 snapClose(Snap *s)
1085 {
1086 	if(s == nil)
1087 		return;
1088 
1089 	periodicKill(s->tick);
1090 	vtMemFree(s);
1091 }
1092 
1093