xref: /plan9/sys/src/cmd/fossil/fs.c (revision e0d6d19cdffb15d5c5f1e7337cee05064ead1fd0)
1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
4 #include "error.h"
5 
6 static void fsMetaFlush(void *a);
7 static Snap *snapInit(Fs*);
8 static void snapClose(Snap*);
9 
10 Fs *
11 fsOpen(char *file, VtSession *z, long ncache, int mode)
12 {
13 	Fs *fs;
14 	Disk *disk;
15 	int fd;
16 	Block *b, *bs;
17 	Super super;
18 	int m;
19 	uchar oscore[VtScoreSize];
20 
21 	switch(mode){
22 	default:
23 		vtSetError(EBadMode);
24 		return nil;
25 	case OReadOnly:
26 		m = OREAD;
27 		break;
28 	case OReadWrite:
29 		m = ORDWR;
30 		break;
31 	}
32 	fd = open(file, m);
33 	if(fd < 0){
34 		vtSetError("open %s: %r", file);
35 		return nil;
36 	}
37 
38 	bwatchInit();
39 	disk = diskAlloc(fd);
40 	if(disk == nil){
41 		vtSetError("diskAlloc: %R");
42 		close(fd);
43 		return nil;
44 	}
45 
46 	fs = vtMemAllocZ(sizeof(Fs));
47 	fs->mode = mode;
48 	fs->blockSize = diskBlockSize(disk);
49 	fs->elk = vtLockAlloc();
50 	fs->cache = cacheAlloc(disk, z, ncache, mode);
51 	if(mode == OReadWrite && z)
52 		fs->arch = archInit(fs->cache, disk, fs, z);
53 	fs->z = z;
54 
55 	b = cacheLocal(fs->cache, PartSuper, 0, mode);
56 	if(b == nil)
57 		goto Err;
58 	if(!superUnpack(&super, b->data)){
59 		blockPut(b);
60 		vtSetError("bad super block");
61 		goto Err;
62 	}
63 	blockPut(b);
64 
65 	fs->ehi = super.epochHigh;
66 	fs->elo = super.epochLow;
67 
68 //fprint(2, "fs->ehi %d fs->elo %d active=%d\n", fs->ehi, fs->elo, super.active);
69 
70 	fs->source = sourceRoot(fs, super.active, mode);
71 	if(fs->source == nil){
72 		/*
73 		 * Perhaps it failed because the block is copy-on-write.
74 		 * Do the copy and try again.
75 		 */
76 		if(mode == OReadOnly || strcmp(vtGetError(), EBadRoot) != 0)
77 			goto Err;
78 		b = cacheLocalData(fs->cache, super.active, BtDir, RootTag, OReadWrite, 0);
79 		if(b == nil){
80 			vtSetError("cacheLocalData: %R");
81 			goto Err;
82 		}
83 		if(!(b->l.state&BsClosed) && b->l.epoch == fs->ehi){
84 			blockPut(b);
85 			vtSetError("bad root source block");
86 			goto Err;
87 		}
88 		b = blockCopy(b, RootTag, fs->ehi, fs->elo);
89 		if(b == nil)
90 			goto Err;
91 		localToGlobal(super.active, oscore);
92 		super.active = b->addr;
93 		bs = cacheLocal(fs->cache, PartSuper, 0, OReadWrite);
94 		if(bs == nil){
95 			blockPut(b);
96 			vtSetError("cacheLocal: %R");
97 			goto Err;
98 		}
99 		superPack(&super, bs->data);
100 		blockDependency(bs, b, 0, oscore, nil);
101 		blockPut(b);
102 		blockDirty(bs);
103 		blockPut(bs);
104 		fs->source = sourceRoot(fs, super.active, mode);
105 		if(fs->source == nil){
106 			vtSetError("sourceRoot: %R");
107 			goto Err;
108 		}
109 	}
110 
111 //fprint(2, "got fs source\n");
112 
113 	vtRLock(fs->elk);
114 	fs->file = fileRoot(fs->source);
115 	vtRUnlock(fs->elk);
116 	if(fs->file == nil){
117 		vtSetError("fileRoot: %R");
118 		goto Err;
119 	}
120 
121 //fprint(2, "got file root\n");
122 
123 	if(mode == OReadWrite){
124 		fs->metaFlush = periodicAlloc(fsMetaFlush, fs, 1000);
125 		fs->snap = snapInit(fs);
126 	}
127 	return fs;
128 
129 Err:
130 fprint(2, "fsOpen error\n");
131 	fsClose(fs);
132 	return nil;
133 }
134 
135 void
136 fsClose(Fs *fs)
137 {
138 	vtRLock(fs->elk);
139 	periodicKill(fs->metaFlush);
140 	snapClose(fs->snap);
141 	if(fs->file){
142 		fileMetaFlush(fs->file, 0);
143 		if(!fileDecRef(fs->file))
144 			vtFatal("fsClose: files still in use: %r\n");
145 	}
146 	fs->file = nil;
147 	sourceClose(fs->source);
148 	cacheFree(fs->cache);
149 	if(fs->arch)
150 		archFree(fs->arch);
151 	vtRUnlock(fs->elk);
152 	vtLockFree(fs->elk);
153 	memset(fs, ~0, sizeof(Fs));
154 	vtMemFree(fs);
155 }
156 
157 int
158 fsRedial(Fs *fs, char *host)
159 {
160 	if(!vtRedial(fs->z, host))
161 		return 0;
162 	if(!vtConnect(fs->z, 0))
163 		return 0;
164 	return 1;
165 }
166 
167 File *
168 fsGetRoot(Fs *fs)
169 {
170 	return fileIncRef(fs->file);
171 }
172 
173 int
174 fsGetBlockSize(Fs *fs)
175 {
176 	return fs->blockSize;
177 }
178 
179 Block*
180 superGet(Cache *c, Super* super)
181 {
182 	Block *b;
183 
184 	if((b = cacheLocal(c, PartSuper, 0, OReadWrite)) == nil){
185 		fprint(2, "superGet: cacheLocal failed: %R");
186 		return nil;
187 	}
188 	if(!superUnpack(super, b->data)){
189 		fprint(2, "superGet: superUnpack failed: %R");
190 		blockPut(b);
191 		return nil;
192 	}
193 
194 	return b;
195 }
196 
197 void
198 superPut(Block* b, Super* super, int forceWrite)
199 {
200 	superPack(super, b->data);
201 	blockDirty(b);
202 	if(forceWrite){
203 		while(!blockWrite(b)){
204 			/* BUG: what should really happen here? */
205 			fprint(2, "could not write super block; waiting 10 seconds\n");
206 			sleep(10*000);
207 		}
208 		while(b->iostate != BioClean && b->iostate != BioDirty){
209 			assert(b->iostate == BioWriting);
210 			vtSleep(b->ioready);
211 		}
212 		/*
213 		 * it's okay that b might still be dirty.
214 		 * that means it got written out but with an old root pointer,
215 		 * but the other fields went out, and those are the ones
216 		 * we really care about.  (specifically, epochHigh; see fsSnapshot).
217 		 */
218 	}
219 	blockPut(b);
220 }
221 
222 /*
223  * Prepare the directory to store a snapshot.
224  * Temporary snapshots go into /snapshot/yyyy/mmdd/hhmm[.#]
225  * Archival snapshots go into /archive/yyyy/mmdd[.#].
226  *
227  * TODO This should be rewritten to eliminate most of the duplication.
228  */
229 static File*
230 fileOpenSnapshot(Fs *fs, char *dstpath, int doarchive)
231 {
232 	int n;
233 	char buf[30], *s, *p, *elem;
234 	File *dir, *f;
235 	Tm now;
236 
237 	if(dstpath){
238 		if((p = strrchr(dstpath, '/')) != nil){
239 			*p++ = '\0';
240 			elem = p;
241 			p = dstpath;
242 			if(*p == '\0')
243 				p = "/";
244 		}else{
245 			p = "/";
246 			elem = dstpath;
247 		}
248 		if((dir = fileOpen(fs, p)) == nil)
249 			return nil;
250 		f = fileCreate(dir, elem, ModeDir|ModeSnapshot|0555, "adm");
251 		fileDecRef(dir);
252 		return f;
253 	}else if(doarchive){
254 		/*
255 		 * a snapshot intended to be archived to venti.
256 		 */
257 		dir = fileOpen(fs, "/archive");
258 		if(dir == nil)
259 			return nil;
260 		now = *localtime(time(0));
261 
262 		/* yyyy */
263 		snprint(buf, sizeof(buf), "%d", now.year+1900);
264 		f = fileWalk(dir, buf);
265 		if(f == nil)
266 			f = fileCreate(dir, buf, ModeDir|0555, "adm");
267 		fileDecRef(dir);
268 		if(f == nil)
269 			return nil;
270 		dir = f;
271 
272 		/* mmdd[#] */
273 		snprint(buf, sizeof(buf), "%02d%02d", now.mon+1, now.mday);
274 		s = buf+strlen(buf);
275 		for(n=0;; n++){
276 			if(n)
277 				seprint(s, buf+sizeof(buf), ".%d", n);
278 			f = fileWalk(dir, buf);
279 			if(f != nil){
280 				fileDecRef(f);
281 				continue;
282 			}
283 			f = fileCreate(dir, buf, ModeDir|ModeSnapshot|0555, "adm");
284 			break;
285 		}
286 		fileDecRef(dir);
287 		return f;
288 	}else{
289 		/*
290 		 * Just a temporary snapshot
291 		 * We'll use /snapshot/yyyy/mmdd/hhmm.
292 		 * There may well be a better naming scheme.
293 		 * (I'd have used hh:mm but ':' is reserved in Microsoft file systems.)
294 		 */
295 		dir = fileOpen(fs, "/snapshot");
296 		if(dir == nil)
297 			return nil;
298 
299 		now = *localtime(time(0));
300 
301 		/* yyyy */
302 		snprint(buf, sizeof(buf), "%d", now.year+1900);
303 		f = fileWalk(dir, buf);
304 		if(f == nil)
305 			f = fileCreate(dir, buf, ModeDir|0555, "adm");
306 		fileDecRef(dir);
307 		if(f == nil)
308 			return nil;
309 		dir = f;
310 
311 		/* mmdd */
312 		snprint(buf, sizeof(buf), "%02d%02d", now.mon+1, now.mday);
313 		f = fileWalk(dir, buf);
314 		if(f == nil)
315 			f = fileCreate(dir, buf, ModeDir|0555, "adm");
316 		fileDecRef(dir);
317 		if(f == nil)
318 			return nil;
319 		dir = f;
320 
321 		/* hhmm[.#] */
322 		snprint(buf, sizeof buf, "%02d%02d", now.hour, now.min);
323 		s = buf+strlen(buf);
324 		for(n=0;; n++){
325 			if(n)
326 				seprint(s, buf+sizeof(buf), ".%d", n);
327 			f = fileWalk(dir, buf);
328 			if(f != nil){
329 				fileDecRef(f);
330 				continue;
331 			}
332 			f = fileCreate(dir, buf, ModeDir|ModeSnapshot|0555, "adm");
333 			break;
334 		}
335 		fileDecRef(dir);
336 		return f;
337 	}
338 }
339 
340 static int
341 fsNeedArch(Fs *fs, uint archMinute)
342 {
343 	int need;
344 	File *f;
345 	char buf[100];
346 	Tm now;
347 	ulong then;
348 
349 	then = time(0);
350 	now = *localtime(then);
351 
352 	/* back up to yesterday if necessary */
353 	if(now.hour < archMinute/60
354 	|| now.hour == archMinute/60 && now.min < archMinute%60)
355 		now = *localtime(then-86400);
356 
357 	snprint(buf, sizeof buf, "/archive/%d/%02d%02d",
358 		now.year+1900, now.mon+1, now.mday);
359 	need = 1;
360 	vtRLock(fs->elk);
361 	f = fileOpen(fs, buf);
362 	if(f){
363 		need = 0;
364 		fileDecRef(f);
365 	}
366 	vtRUnlock(fs->elk);
367 	return need;
368 }
369 
370 int
371 fsEpochLow(Fs *fs, u32int low)
372 {
373 	Block *bs;
374 	Super super;
375 
376 	vtLock(fs->elk);
377 	if(low > fs->ehi){
378 		vtSetError("bad low epoch (must be <= %ud)", fs->ehi);
379 		vtUnlock(fs->elk);
380 		return 0;
381 	}
382 
383 	if((bs = superGet(fs->cache, &super)) == nil){
384 		vtUnlock(fs->elk);
385 		return 0;
386 	}
387 
388 	super.epochLow = low;
389 	fs->elo = low;
390 	superPut(bs, &super, 1);
391 	vtUnlock(fs->elk);
392 
393 	return 1;
394 }
395 
396 static int
397 bumpEpoch(Fs *fs, int doarchive)
398 {
399 	uchar oscore[VtScoreSize];
400 	u32int oldaddr;
401 	Block *b, *bs;
402 	Entry e;
403 	Source *r;
404 	Super super;
405 
406 	/*
407 	 * Duplicate the root block.
408 	 *
409 	 * As a hint to flchk, the garbage collector,
410 	 * and any (human) debuggers, store a pointer
411 	 * to the old root block in entry 1 of the new root block.
412 	 */
413 	r = fs->source;
414 	b = cacheGlobal(fs->cache, r->score, BtDir, RootTag, OReadOnly);
415 	if(b == nil)
416 		return 0;
417 
418 	memset(&e, 0, sizeof e);
419 	e.flags = VtEntryActive | VtEntryLocal | VtEntryDir;
420 	memmove(e.score, b->score, VtScoreSize);
421 	e.tag = RootTag;
422 	e.snap = b->l.epoch;
423 
424 	b = blockCopy(b, RootTag, fs->ehi+1, fs->elo);
425 	if(b == nil){
426 		fprint(2, "bumpEpoch: blockCopy: %R\n");
427 		return 0;
428 	}
429 
430 	if(0) fprint(2, "snapshot root from %d to %d\n", oldaddr, b->addr);
431 	entryPack(&e, b->data, 1);
432 	blockDirty(b);
433 
434 	/*
435 	 * Update the superblock with the new root and epoch.
436 	 */
437 	if((bs = superGet(fs->cache, &super)) == nil)
438 		return 0;
439 
440 	fs->ehi++;
441 	memmove(r->score, b->score, VtScoreSize);
442 	r->epoch = fs->ehi;
443 
444 	super.epochHigh = fs->ehi;
445 	oldaddr = super.active;
446 	super.active = b->addr;
447 	if(doarchive)
448 		super.next = oldaddr;
449 
450 	/*
451 	 * Record that the new super.active can't get written out until
452 	 * the new b gets written out.  Until then, use the old value.
453 	 */
454 	localToGlobal(oldaddr, oscore);
455 	blockDependency(bs, b, 0, oscore, nil);
456 	blockPut(b);
457 
458 	/*
459 	 * We force the super block to disk so that super.epochHigh gets updated.
460 	 * Otherwise, if we crash and come back, we might incorrectly treat as active
461 	 * some of the blocks that making up the snapshot we just created.
462 	 * Basically every block in the active file system and all the blocks in
463 	 * the recently-created snapshot depend on the super block now.
464 	 * Rather than record all those dependencies, we just force the block to disk.
465 	 *
466 	 * Note that blockWrite might actually (will probably) send a slightly outdated
467 	 * super.active to disk.  It will be the address of the most recent root that has
468 	 * gone to disk.
469 	 */
470 	superPut(bs, &super, 1);
471 
472 	return 1;
473 }
474 
475 int
476 saveQid(Fs *fs)
477 {
478 	Block *b;
479 	Super super;
480 	u64int qidMax;
481 
482 	if((b = superGet(fs->cache, &super)) == nil)
483 		return 0;
484 	qidMax = super.qid;
485 	blockPut(b);
486 
487 	if(!fileSetQidSpace(fs->file, 0, qidMax))
488 		return 0;
489 
490 	return 1;
491 }
492 
493 int
494 fsSnapshot(Fs *fs, char *srcpath, char *dstpath, int doarchive)
495 {
496 	File *src, *dst;
497 
498 	assert(fs->mode == OReadWrite);
499 
500 	dst = nil;
501 
502 	if(fs->halted){
503 		vtSetError("file system is halted");
504 		return 0;
505 	}
506 
507 	/*
508 	 * Freeze file system activity.
509 	 */
510 	vtLock(fs->elk);
511 
512 	/*
513 	 * Get the root of the directory we're going to save.
514 	 */
515 	if(srcpath == nil)
516 		srcpath = "/active";
517 	src = fileOpen(fs, srcpath);
518 	if(src == nil)
519 		goto Err;
520 
521 	/*
522 	 * It is important that we maintain the invariant that:
523 	 *	if both b and bb are marked as Active with epoch e
524 	 *	and b points at bb, then no other pointers to bb exist.
525 	 *
526 	 * The archiver uses this property to aggressively reclaim
527 	 * such blocks once they have been stored on Venti, and
528 	 * blockCleanup knows about this property as well.
529 	 *
530 	 * Let's say src->source is block sb, and src->msource is block
531 	 * mb.  Let's also say that block b holds the Entry structures for
532 	 * both src->source and src->msource (their Entry structures might
533 	 * be in different blocks, but the argument is the same).
534 	 * That is, right now we have:
535 	 *
536 	 *	b	Active w/ epoch e, holds ptrs to sb and mb.
537 	 *	sb	Active w/ epoch e.
538 	 *	mb	Active w/ epoch e.
539 	 *
540 	 * With things as they are now, the invariant requires that
541 	 * b holds the only pointers to sb and mb.  We want to record
542 	 * pointers to sb and mb in new Entries corresponding to dst,
543 	 * which breaks the invariant.  Thus we need to do something
544 	 * about b.  Specifically, we bump the file system's epoch and
545 	 * then rewalk the path from the root down to and including b.
546 	 * This will copy-on-write as we walk, so now the state will be:
547 	 *
548 	 *	b	Snap w/ epoch e, holds ptrs to sb and mb.
549 	 *	new-b	Active w/ epoch e+1, holds ptrs to sb and mb.
550 	 *	sb	Active w/ epoch e.
551 	 *	mb	Active w/ epoch e.
552 	 *
553 	 * In this state, it's perfectly okay to add pointers to dst, which
554 	 * will live in a block marked Active with epoch e+1.
555 	 *
556 	 * Of course, we need to make sure that the copied path makes
557 	 * it out to disk before the new dst block; if the dst block goes out
558 	 * first and then we crash, the invariant is violated.  Rather than
559 	 * deal with the dependencies, we just sync the file system to disk
560 	 * right now.
561 	 */
562 	if(!bumpEpoch(fs, 0) || !fileWalkSources(src))
563 		goto Err;
564 
565 	/*
566 	 * Sync to disk.
567 	 */
568 	cacheFlush(fs->cache, 1);
569 
570 	/*
571 	 * Create the directory where we will store the copy of src.
572 	 */
573 	dst = fileOpenSnapshot(fs, dstpath, doarchive);
574 	if(dst == nil)
575 		goto Err;
576 
577 	/*
578 	 * Actually make the copy by setting dst's source and msource
579 	 * to be src's.
580 	 */
581 	if(!fileSnapshot(dst, src, fs->ehi-1, doarchive))
582 		goto Err;
583 
584 	fileDecRef(src);
585 	fileDecRef(dst);
586 	src = nil;
587 	dst = nil;
588 
589 	/*
590 	 * Make another copy of the file system.  This one is for the
591 	 * archiver, so that the file system we archive has the recently
592 	 * added snapshot both in /active and in /archive/yyyy/mmdd[.#].
593 	 */
594 	if(doarchive){
595 		if(!saveQid(fs))
596 			goto Err;
597 		if(!bumpEpoch(fs, 1))
598 			goto Err;
599 	}
600 
601 	vtUnlock(fs->elk);
602 
603 	/* BUG? can fs->arch fall out from under us here? */
604 	if(doarchive && fs->arch)
605 		archKick(fs->arch);
606 
607 	return 1;
608 
609 Err:
610 	fprint(2, "fsSnapshot: %R\n");
611 	if(src)
612 		fileDecRef(src);
613 	if(dst)
614 		fileDecRef(dst);
615 	vtUnlock(fs->elk);
616 	return 0;
617 }
618 
619 int
620 fsVac(Fs *fs, char *name, uchar score[VtScoreSize])
621 {
622 	int r;
623 	DirEntry de;
624 	Entry e, ee;
625 	File *f;
626 
627 	vtRLock(fs->elk);
628 	f = fileOpen(fs, name);
629 	if(f == nil){
630 		vtRUnlock(fs->elk);
631 		return 0;
632 	}
633 
634 	if(!fileGetSources(f, &e, &ee, 0) || !fileGetDir(f, &de)){
635 		fileDecRef(f);
636 		vtRUnlock(fs->elk);
637 		return 0;
638 	}
639 	fileDecRef(f);
640 
641 	r = mkVac(fs->z, fs->blockSize, &e, &ee, &de, score);
642 	vtRUnlock(fs->elk);
643 	return r;
644 }
645 
646 static int
647 vtWriteBlock(VtSession *z, uchar *buf, uint n, uint type, uchar score[VtScoreSize])
648 {
649 	if(!vtWrite(z, score, type, buf, n))
650 		return 0;
651 	if(!vtSha1Check(score, buf, n))
652 		return 0;
653 	return 1;
654 }
655 
656 int
657 mkVac(VtSession *z, uint blockSize, Entry *pe, Entry *pee, DirEntry *pde, uchar score[VtScoreSize])
658 {
659 	uchar buf[8192];
660 	int i;
661 	uchar *p;
662 	uint n;
663 	DirEntry de;
664 	Entry e, ee, eee;
665 	MetaBlock mb;
666 	MetaEntry me;
667 	VtRoot root;
668 
669 	e = *pe;
670 	ee = *pee;
671 	de = *pde;
672 
673 	if(globalToLocal(e.score) != NilBlock
674 	|| (ee.flags&VtEntryActive && globalToLocal(ee.score) != NilBlock)){
675 		vtSetError("can only vac paths already stored on venti");
676 		return 0;
677 	}
678 
679 	/*
680 	 * Build metadata source for root.
681 	 */
682 	n = deSize(&de);
683 	if(n+MetaHeaderSize+MetaIndexSize > sizeof buf){
684 		vtSetError("DirEntry too big");
685 		return 0;
686 	}
687 	memset(buf, 0, sizeof buf);
688 	mbInit(&mb, buf, n+MetaHeaderSize+MetaIndexSize, 1);
689 	p = mbAlloc(&mb, n);
690 	if(p == nil)
691 		abort();
692 	mbSearch(&mb, de.elem, &i, &me);
693 	assert(me.p == nil);
694 	me.p = p;
695 	me.size = n;
696 	dePack(&de, &me);
697 	mbInsert(&mb, i, &me);
698 	mbPack(&mb);
699 
700 	eee.size = n+MetaHeaderSize+MetaIndexSize;
701 	if(!vtWriteBlock(z, buf, eee.size, VtDataType, eee.score))
702 		return 0;
703 	eee.psize = 8192;
704 	eee.dsize = 8192;
705 	eee.depth = 0;
706 	eee.flags = VtEntryActive;
707 
708 	/*
709 	 * Build root source with three entries in it.
710 	 */
711 	entryPack(&e, buf, 0);
712 	entryPack(&ee, buf, 1);
713 	entryPack(&eee, buf, 2);
714 
715 	n = VtEntrySize*3;
716 	memset(&root, 0, sizeof root);
717 	if(!vtWriteBlock(z, buf, n, VtDirType, root.score))
718 		return 0;
719 
720 	/*
721 	 * Save root.
722 	 */
723 	root.version = VtRootVersion;
724 	strecpy(root.type, root.type+sizeof root.type, "vac");
725 	strecpy(root.name, root.name+sizeof root.name, de.elem);
726 	root.blockSize = blockSize;
727 	vtRootPack(&root, buf);
728 	if(!vtWriteBlock(z, buf, VtRootSize, VtRootType, score))
729 		return 0;
730 
731 	return 1;
732 }
733 
734 int
735 fsSync(Fs *fs)
736 {
737 	vtLock(fs->elk);
738 	fileMetaFlush(fs->file, 1);
739 	cacheFlush(fs->cache, 1);
740 	vtUnlock(fs->elk);
741 	return 1;
742 }
743 
744 int
745 fsHalt(Fs *fs)
746 {
747 	vtLock(fs->elk);
748 	fs->halted = 1;
749 	fileMetaFlush(fs->file, 1);
750 	cacheFlush(fs->cache, 1);
751 	return 1;
752 }
753 
754 int
755 fsUnhalt(Fs *fs)
756 {
757 	if(!fs->halted)
758 		return 0;
759 	fs->halted = 0;
760 	vtUnlock(fs->elk);
761 	return 1;
762 }
763 
764 int
765 fsNextQid(Fs *fs, u64int *qid)
766 {
767 	Block *b;
768 	Super super;
769 
770 	if((b = superGet(fs->cache, &super)) == nil)
771 		return 0;
772 
773 	*qid = super.qid++;
774 
775 	/*
776 	 * It's okay if the super block doesn't go to disk immediately,
777 	 * since fileMetaAlloc will record a dependency between the
778 	 * block holding this qid and the super block.  See file.c:/^fileMetaAlloc.
779 	 */
780 	superPut(b, &super, 0);
781 	return 1;
782 }
783 
784 static void
785 fsMetaFlush(void *a)
786 {
787 	Fs *fs = a;
788 
789 	vtRLock(fs->elk);
790 	fileMetaFlush(fs->file, 1);
791 	vtRUnlock(fs->elk);
792 	cacheFlush(fs->cache, 0);
793 }
794 
795 static int
796 fsEsearch1(File *f, char *path, u32int savetime, u32int *plo)
797 {
798 	int n, r;
799 	DirEntry de;
800 	DirEntryEnum *dee;
801 	File *ff;
802 	Entry e, ee;
803 	char *t;
804 
805 	dee = deeOpen(f);
806 	if(dee == nil)
807 		return 0;
808 
809 	n = 0;
810 	for(;;){
811 		r = deeRead(dee, &de);
812 		if(r <= 0)
813 			break;
814 		if(de.mode & ModeSnapshot){
815 			if((ff = fileWalk(f, de.elem)) != nil){
816 				if(fileGetSources(ff, &e, &ee, 0))
817 					if(de.mtime >= savetime && e.snap != 0)
818 						if(e.snap < *plo)
819 							*plo = e.snap;
820 				fileDecRef(ff);
821 			}
822 		}
823 		else if(de.mode & ModeDir){
824 			if((ff = fileWalk(f, de.elem)) != nil){
825 				t = smprint("%s/%s", path, de.elem);
826 				n += fsEsearch1(ff, t, savetime, plo);
827 				vtMemFree(t);
828 				fileDecRef(ff);
829 			}
830 		}
831 		deCleanup(&de);
832 		if(r < 0)
833 			break;
834 	}
835 	deeClose(dee);
836 
837 	return n;
838 }
839 
840 static int
841 fsEsearch(Fs *fs, char *path, u32int savetime, u32int *plo)
842 {
843 	int n;
844 	File *f;
845 	DirEntry de;
846 
847 	f = fileOpen(fs, path);
848 	if(f == nil)
849 		return 0;
850 	if(!fileGetDir(f, &de)){
851 		fileDecRef(f);
852 		return 0;
853 	}
854 	if((de.mode & ModeDir) == 0){
855 		fileDecRef(f);
856 		deCleanup(&de);
857 		return 0;
858 	}
859 	deCleanup(&de);
860 	n = fsEsearch1(f, path, savetime, plo);
861 	fileDecRef(f);
862 	return n;
863 }
864 
865 void
866 fsSnapshotCleanup(Fs *fs, u32int age)
867 {
868 	u32int lo;
869 
870 	/*
871 	 * Find the best low epoch we can use,
872 	 * given that we need to save all the unventied archives
873 	 * and all the snapshots younger than age.
874 	 */
875 	vtRLock(fs->elk);
876 	lo = fs->ehi;
877 	fsEsearch(fs, "/archive", 0, &lo);
878 	fsEsearch(fs, "/snapshot", time(0)-age*60, &lo);
879 	vtRUnlock(fs->elk);
880 
881 	fsEpochLow(fs, lo);
882 	fsSnapshotRemove(fs);
883 }
884 
885 /* remove all snapshots that have expired */
886 /* return number of directory entries remaining */
887 static int
888 fsRsearch1(File *f, char *s)
889 {
890 	int n, r;
891 	DirEntry de;
892 	DirEntryEnum *dee;
893 	File *ff;
894 	char *t;
895 
896 	dee = deeOpen(f);
897 	if(dee == nil)
898 		return 0;
899 
900 	n = 0;
901 	for(;;){
902 		r = deeRead(dee, &de);
903 		if(r <= 0)
904 			break;
905 		n++;
906 		if(de.mode & ModeSnapshot){
907 			if((ff = fileWalk(f, de.elem)) != nil)
908 				fileDecRef(ff);
909 			else if(strcmp(vtGetError(), ESnapOld) == 0){
910 				if(fileClri(f, de.elem, "adm"))
911 					n--;
912 			}
913 		}
914 		else if(de.mode & ModeDir){
915 			if((ff = fileWalk(f, de.elem)) != nil){
916 				t = smprint("%s/%s", s, de.elem);
917 				if(fsRsearch1(ff, t) == 0)
918 					if(fileRemove(ff, "adm"))
919 						n--;
920 				vtMemFree(t);
921 				fileDecRef(ff);
922 			}
923 		}
924 		deCleanup(&de);
925 		if(r < 0)
926 			break;
927 	}
928 	deeClose(dee);
929 
930 	return n;
931 }
932 
933 static int
934 fsRsearch(Fs *fs, char *path)
935 {
936 	File *f;
937 	DirEntry de;
938 
939 	f = fileOpen(fs, path);
940 	if(f == nil)
941 		return 0;
942 	if(!fileGetDir(f, &de)){
943 		fileDecRef(f);
944 		return 0;
945 	}
946 	if((de.mode & ModeDir) == 0){
947 		fileDecRef(f);
948 		deCleanup(&de);
949 		return 0;
950 	}
951 	deCleanup(&de);
952 	fsRsearch1(f, path);
953 	fileDecRef(f);
954 	return 1;
955 }
956 
957 void
958 fsSnapshotRemove(Fs *fs)
959 {
960 	vtRLock(fs->elk);
961 	fsRsearch(fs, "/snapshot");
962 	vtRUnlock(fs->elk);
963 }
964 
965 struct Snap
966 {
967 	Fs *fs;
968 	Periodic *tick;
969 	VtLock *lk;
970 	uint snapMinutes;
971 	uint archMinute;
972 	uint snapLife;
973 	u32int lastSnap;
974 	u32int lastArch;
975 	u32int lastCleanup;
976 	uint ignore;
977 };
978 
979 static void
980 snapEvent(void *v)
981 {
982 	Snap *s;
983 	u32int now, min;
984 	Tm tm;
985 	int need;
986 
987 	s = v;
988 
989 	now = time(0)/60;
990 	vtLock(s->lk);
991 
992 	/*
993 	 * Snapshots happen every snapMinutes minutes.
994 	 * If we miss a snapshot (for example, because we
995 	 * were down), we wait for the next one.
996 	 */
997 	if(s->snapMinutes != ~0 && s->snapMinutes != 0
998 	&& now%s->snapMinutes==0 && now != s->lastSnap){
999 		if(!fsSnapshot(s->fs, nil, nil, 0))
1000 			fprint(2, "fsSnapshot snap: %R\n");
1001 		s->lastSnap = now;
1002 	}
1003 
1004 	/*
1005 	 * Archival snapshots happen at archMinute.
1006 	 * If we miss an archive (for example, because we
1007 	 * were down), we do it as soon as possible.
1008 	 */
1009 	tm = *localtime(now*60);
1010 	min = tm.hour*60+tm.min;
1011 	if(s->archMinute != ~0){
1012 		need = 0;
1013 		if(min == s->archMinute && now != s->lastArch)
1014 			need = 1;
1015 		if(s->lastArch == 0){
1016 			s->lastArch = 1;
1017 			if(fsNeedArch(s->fs, s->archMinute))
1018 				need = 1;
1019 		}
1020 		if(need){
1021 			fsSnapshot(s->fs, nil, nil, 1);
1022 			s->lastArch = now;
1023 		}
1024 	}
1025 
1026 	/*
1027 	 * Snapshot cleanup happens every snaplife or every day.
1028 	 */
1029 	if(s->snapLife != ~0
1030 	&& (s->lastCleanup+s->snapLife < now || s->lastCleanup+24*60 < now)){
1031 		fsSnapshotCleanup(s->fs, s->snapLife);
1032 		s->lastCleanup = now;
1033 	}
1034 	vtUnlock(s->lk);
1035 }
1036 
1037 static Snap*
1038 snapInit(Fs *fs)
1039 {
1040 	Snap *s;
1041 
1042 	s = vtMemAllocZ(sizeof(Snap));
1043 	s->fs = fs;
1044 	s->tick = periodicAlloc(snapEvent, s, 10*1000);
1045 	s->lk = vtLockAlloc();
1046 	s->snapMinutes = -1;
1047 	s->archMinute = -1;
1048 	s->snapLife = -1;
1049 	s->ignore = 5*2;	/* wait five minutes for clock to stabilize */
1050 	return s;
1051 }
1052 
1053 void
1054 snapGetTimes(Snap *s, u32int *arch, u32int *snap, u32int *snaplen)
1055 {
1056 	vtLock(s->lk);
1057 	*snap = s->snapMinutes;
1058 	*arch = s->archMinute;
1059 	*snaplen = s->snapLife;
1060 	vtUnlock(s->lk);
1061 }
1062 
1063 void
1064 snapSetTimes(Snap *s, u32int arch, u32int snap, u32int snaplen)
1065 {
1066 	vtLock(s->lk);
1067 	s->snapMinutes = snap;
1068 	s->archMinute = arch;
1069 	s->snapLife = snaplen;
1070 	vtUnlock(s->lk);
1071 }
1072 
1073 static void
1074 snapClose(Snap *s)
1075 {
1076 	if(s == nil)
1077 		return;
1078 
1079 	periodicKill(s->tick);
1080 	vtMemFree(s);
1081 }
1082 
1083