xref: /plan9/sys/src/cmd/venti/srv/arena.c (revision 7ab27030036b6c877a6f81728daeda263d1ca3cf)
1368c31abSDavid du Colombier #include "stdinc.h"
2368c31abSDavid du Colombier #include "dat.h"
3368c31abSDavid du Colombier #include "fns.h"
4368c31abSDavid du Colombier 
5368c31abSDavid du Colombier typedef struct ASum ASum;
6368c31abSDavid du Colombier 
7368c31abSDavid du Colombier struct ASum
8368c31abSDavid du Colombier {
9368c31abSDavid du Colombier 	Arena	*arena;
10368c31abSDavid du Colombier 	ASum	*next;
11368c31abSDavid du Colombier };
12368c31abSDavid du Colombier 
13368c31abSDavid du Colombier static void	sealarena(Arena *arena);
14368c31abSDavid du Colombier static int	okarena(Arena *arena);
15368c31abSDavid du Colombier static int	loadarena(Arena *arena);
16368c31abSDavid du Colombier static CIBlock	*getcib(Arena *arena, int clump, int writing, CIBlock *rock);
17368c31abSDavid du Colombier static void	putcib(Arena *arena, CIBlock *cib);
18368c31abSDavid du Colombier static void	sumproc(void *);
19f9e1cf08SDavid du Colombier static void loadcig(Arena *arena);
20368c31abSDavid du Colombier 
21368c31abSDavid du Colombier static QLock	sumlock;
22368c31abSDavid du Colombier static Rendez	sumwait;
23368c31abSDavid du Colombier static ASum	*sumq;
24368c31abSDavid du Colombier static ASum	*sumqtail;
25368c31abSDavid du Colombier static uchar zero[8192];
26368c31abSDavid du Colombier 
27368c31abSDavid du Colombier int	arenasumsleeptime;
28368c31abSDavid du Colombier 
29368c31abSDavid du Colombier int
initarenasum(void)30368c31abSDavid du Colombier initarenasum(void)
31368c31abSDavid du Colombier {
32368c31abSDavid du Colombier 	needzeroscore();  /* OS X */
33368c31abSDavid du Colombier 
34368c31abSDavid du Colombier 	sumwait.l = &sumlock;
35368c31abSDavid du Colombier 
36368c31abSDavid du Colombier 	if(vtproc(sumproc, nil) < 0){
37368c31abSDavid du Colombier 		seterr(EOk, "can't start arena checksum slave: %r");
38368c31abSDavid du Colombier 		return -1;
39368c31abSDavid du Colombier 	}
40368c31abSDavid du Colombier 	return 0;
41368c31abSDavid du Colombier }
42368c31abSDavid du Colombier 
43368c31abSDavid du Colombier /*
44368c31abSDavid du Colombier  * make an Arena, and initialize it based upon the disk header and trailer.
45368c31abSDavid du Colombier  */
46368c31abSDavid du Colombier Arena*
initarena(Part * part,u64int base,u64int size,u32int blocksize)47368c31abSDavid du Colombier initarena(Part *part, u64int base, u64int size, u32int blocksize)
48368c31abSDavid du Colombier {
49368c31abSDavid du Colombier 	Arena *arena;
50368c31abSDavid du Colombier 
51368c31abSDavid du Colombier 	arena = MKZ(Arena);
52368c31abSDavid du Colombier 	arena->part = part;
53368c31abSDavid du Colombier 	arena->blocksize = blocksize;
54368c31abSDavid du Colombier 	arena->clumpmax = arena->blocksize / ClumpInfoSize;
55368c31abSDavid du Colombier 	arena->base = base + blocksize;
56368c31abSDavid du Colombier 	arena->size = size - 2 * blocksize;
57368c31abSDavid du Colombier 
58368c31abSDavid du Colombier 	if(loadarena(arena) < 0){
59368c31abSDavid du Colombier 		seterr(ECorrupt, "arena header or trailer corrupted");
60368c31abSDavid du Colombier 		freearena(arena);
61368c31abSDavid du Colombier 		return nil;
62368c31abSDavid du Colombier 	}
63368c31abSDavid du Colombier 	if(okarena(arena) < 0){
64368c31abSDavid du Colombier 		freearena(arena);
65368c31abSDavid du Colombier 		return nil;
66368c31abSDavid du Colombier 	}
67368c31abSDavid du Colombier 
68368c31abSDavid du Colombier 	if(arena->diskstats.sealed && scorecmp(zeroscore, arena->score)==0)
69f9e1cf08SDavid du Colombier 		sealarena(arena);
70368c31abSDavid du Colombier 
71368c31abSDavid du Colombier 	return arena;
72368c31abSDavid du Colombier }
73368c31abSDavid du Colombier 
74368c31abSDavid du Colombier void
freearena(Arena * arena)75368c31abSDavid du Colombier freearena(Arena *arena)
76368c31abSDavid du Colombier {
77368c31abSDavid du Colombier 	if(arena == nil)
78368c31abSDavid du Colombier 		return;
79368c31abSDavid du Colombier 	free(arena);
80368c31abSDavid du Colombier }
81368c31abSDavid du Colombier 
82368c31abSDavid du Colombier Arena*
newarena(Part * part,u32int vers,char * name,u64int base,u64int size,u32int blocksize)83368c31abSDavid du Colombier newarena(Part *part, u32int vers, char *name, u64int base, u64int size, u32int blocksize)
84368c31abSDavid du Colombier {
85368c31abSDavid du Colombier 	int bsize;
86368c31abSDavid du Colombier 	Arena *arena;
87368c31abSDavid du Colombier 
88368c31abSDavid du Colombier 	if(nameok(name) < 0){
89368c31abSDavid du Colombier 		seterr(EOk, "illegal arena name", name);
90368c31abSDavid du Colombier 		return nil;
91368c31abSDavid du Colombier 	}
92368c31abSDavid du Colombier 	arena = MKZ(Arena);
93368c31abSDavid du Colombier 	arena->part = part;
94368c31abSDavid du Colombier 	arena->version = vers;
95368c31abSDavid du Colombier 	if(vers == ArenaVersion4)
96368c31abSDavid du Colombier 		arena->clumpmagic = _ClumpMagic;
97368c31abSDavid du Colombier 	else{
98368c31abSDavid du Colombier 		do
99368c31abSDavid du Colombier 			arena->clumpmagic = fastrand();
100368c31abSDavid du Colombier 		while(arena->clumpmagic==_ClumpMagic || arena->clumpmagic==0);
101368c31abSDavid du Colombier 	}
102368c31abSDavid du Colombier 	arena->blocksize = blocksize;
103368c31abSDavid du Colombier 	arena->clumpmax = arena->blocksize / ClumpInfoSize;
104368c31abSDavid du Colombier 	arena->base = base + blocksize;
105368c31abSDavid du Colombier 	arena->size = size - 2 * blocksize;
106368c31abSDavid du Colombier 
107368c31abSDavid du Colombier 	namecp(arena->name, name);
108368c31abSDavid du Colombier 
109368c31abSDavid du Colombier 	bsize = sizeof zero;
110368c31abSDavid du Colombier 	if(bsize > arena->blocksize)
111368c31abSDavid du Colombier 		bsize = arena->blocksize;
112368c31abSDavid du Colombier 
113368c31abSDavid du Colombier 	if(wbarena(arena)<0 || wbarenahead(arena)<0
114368c31abSDavid du Colombier 	|| writepart(arena->part, arena->base, zero, bsize)<0){
115368c31abSDavid du Colombier 		freearena(arena);
116368c31abSDavid du Colombier 		return nil;
117368c31abSDavid du Colombier 	}
118368c31abSDavid du Colombier 
119368c31abSDavid du Colombier 	return arena;
120368c31abSDavid du Colombier }
121368c31abSDavid du Colombier 
122368c31abSDavid du Colombier int
readclumpinfo(Arena * arena,int clump,ClumpInfo * ci)123368c31abSDavid du Colombier readclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
124368c31abSDavid du Colombier {
125368c31abSDavid du Colombier 	CIBlock *cib, r;
126368c31abSDavid du Colombier 
127368c31abSDavid du Colombier 	cib = getcib(arena, clump, 0, &r);
128368c31abSDavid du Colombier 	if(cib == nil)
129368c31abSDavid du Colombier 		return -1;
130368c31abSDavid du Colombier 	unpackclumpinfo(ci, &cib->data->data[cib->offset]);
131368c31abSDavid du Colombier 	putcib(arena, cib);
132368c31abSDavid du Colombier 	return 0;
133368c31abSDavid du Colombier }
134368c31abSDavid du Colombier 
135368c31abSDavid du Colombier int
readclumpinfos(Arena * arena,int clump,ClumpInfo * cis,int n)136368c31abSDavid du Colombier readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n)
137368c31abSDavid du Colombier {
138368c31abSDavid du Colombier 	CIBlock *cib, r;
139368c31abSDavid du Colombier 	int i;
140368c31abSDavid du Colombier 
141f9e1cf08SDavid du Colombier 	/*
142f9e1cf08SDavid du Colombier 	 * because the clump blocks are laid out
143f9e1cf08SDavid du Colombier 	 * in reverse order at the end of the arena,
144f9e1cf08SDavid du Colombier 	 * it can be a few percent faster to read
145f9e1cf08SDavid du Colombier 	 * the clumps backwards, which reads the
146f9e1cf08SDavid du Colombier 	 * disk blocks forwards.
147f9e1cf08SDavid du Colombier 	 */
148f9e1cf08SDavid du Colombier 	for(i = n-1; i >= 0; i--){
149368c31abSDavid du Colombier 		cib = getcib(arena, clump + i, 0, &r);
150f9e1cf08SDavid du Colombier 		if(cib == nil){
151f9e1cf08SDavid du Colombier 			n = i;
152f9e1cf08SDavid du Colombier 			continue;
153f9e1cf08SDavid du Colombier 		}
154368c31abSDavid du Colombier 		unpackclumpinfo(&cis[i], &cib->data->data[cib->offset]);
155368c31abSDavid du Colombier 		putcib(arena, cib);
156368c31abSDavid du Colombier 	}
157f9e1cf08SDavid du Colombier 	return n;
158368c31abSDavid du Colombier }
159368c31abSDavid du Colombier 
160368c31abSDavid du Colombier /*
161368c31abSDavid du Colombier  * write directory information for one clump
162368c31abSDavid du Colombier  * must be called the arena locked
163368c31abSDavid du Colombier  */
164368c31abSDavid du Colombier int
writeclumpinfo(Arena * arena,int clump,ClumpInfo * ci)165368c31abSDavid du Colombier writeclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
166368c31abSDavid du Colombier {
167368c31abSDavid du Colombier 	CIBlock *cib, r;
168368c31abSDavid du Colombier 
169368c31abSDavid du Colombier 	cib = getcib(arena, clump, 1, &r);
170368c31abSDavid du Colombier 	if(cib == nil)
171368c31abSDavid du Colombier 		return -1;
172368c31abSDavid du Colombier 	dirtydblock(cib->data, DirtyArenaCib);
173368c31abSDavid du Colombier 	packclumpinfo(ci, &cib->data->data[cib->offset]);
174368c31abSDavid du Colombier 	putcib(arena, cib);
175368c31abSDavid du Colombier 	return 0;
176368c31abSDavid du Colombier }
177368c31abSDavid du Colombier 
178368c31abSDavid du Colombier u64int
arenadirsize(Arena * arena,u32int clumps)179368c31abSDavid du Colombier arenadirsize(Arena *arena, u32int clumps)
180368c31abSDavid du Colombier {
181368c31abSDavid du Colombier 	return ((clumps / arena->clumpmax) + 1) * arena->blocksize;
182368c31abSDavid du Colombier }
183368c31abSDavid du Colombier 
184368c31abSDavid du Colombier /*
185368c31abSDavid du Colombier  * read a clump of data
186368c31abSDavid du Colombier  * n is a hint of the size of the data, not including the header
187368c31abSDavid du Colombier  * make sure it won't run off the end, then return the number of bytes actually read
188368c31abSDavid du Colombier  */
189368c31abSDavid du Colombier u32int
readarena(Arena * arena,u64int aa,u8int * buf,long n)190368c31abSDavid du Colombier readarena(Arena *arena, u64int aa, u8int *buf, long n)
191368c31abSDavid du Colombier {
192368c31abSDavid du Colombier 	DBlock *b;
193368c31abSDavid du Colombier 	u64int a;
194368c31abSDavid du Colombier 	u32int blocksize, off, m;
195368c31abSDavid du Colombier 	long nn;
196368c31abSDavid du Colombier 
197368c31abSDavid du Colombier 	if(n == 0)
198368c31abSDavid du Colombier 		return -1;
199368c31abSDavid du Colombier 
200368c31abSDavid du Colombier 	qlock(&arena->lock);
201368c31abSDavid du Colombier 	a = arena->size - arenadirsize(arena, arena->memstats.clumps);
202368c31abSDavid du Colombier 	qunlock(&arena->lock);
203368c31abSDavid du Colombier 	if(aa >= a){
204368c31abSDavid du Colombier 		seterr(EOk, "reading beyond arena clump storage: clumps=%d aa=%lld a=%lld -1 clumps=%lld\n", arena->memstats.clumps, aa, a, arena->size - arenadirsize(arena, arena->memstats.clumps - 1));
205368c31abSDavid du Colombier 		return -1;
206368c31abSDavid du Colombier 	}
207368c31abSDavid du Colombier 	if(aa + n > a)
208368c31abSDavid du Colombier 		n = a - aa;
209368c31abSDavid du Colombier 
210368c31abSDavid du Colombier 	blocksize = arena->blocksize;
211368c31abSDavid du Colombier 	a = arena->base + aa;
212368c31abSDavid du Colombier 	off = a & (blocksize - 1);
213368c31abSDavid du Colombier 	a -= off;
214368c31abSDavid du Colombier 	nn = 0;
215368c31abSDavid du Colombier 	for(;;){
216368c31abSDavid du Colombier 		b = getdblock(arena->part, a, OREAD);
217368c31abSDavid du Colombier 		if(b == nil)
218368c31abSDavid du Colombier 			return -1;
219368c31abSDavid du Colombier 		m = blocksize - off;
220368c31abSDavid du Colombier 		if(m > n - nn)
221368c31abSDavid du Colombier 			m = n - nn;
222368c31abSDavid du Colombier 		memmove(&buf[nn], &b->data[off], m);
223368c31abSDavid du Colombier 		putdblock(b);
224368c31abSDavid du Colombier 		nn += m;
225368c31abSDavid du Colombier 		if(nn == n)
226368c31abSDavid du Colombier 			break;
227368c31abSDavid du Colombier 		off = 0;
228368c31abSDavid du Colombier 		a += blocksize;
229368c31abSDavid du Colombier 	}
230368c31abSDavid du Colombier 	return n;
231368c31abSDavid du Colombier }
232368c31abSDavid du Colombier 
233368c31abSDavid du Colombier /*
234368c31abSDavid du Colombier  * write some data to the clump section at a given offset
235368c31abSDavid du Colombier  * used to fix up corrupted arenas.
236368c31abSDavid du Colombier  */
237368c31abSDavid du Colombier u32int
writearena(Arena * arena,u64int aa,u8int * clbuf,u32int n)238368c31abSDavid du Colombier writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n)
239368c31abSDavid du Colombier {
240368c31abSDavid du Colombier 	DBlock *b;
241368c31abSDavid du Colombier 	u64int a;
242368c31abSDavid du Colombier 	u32int blocksize, off, m;
243368c31abSDavid du Colombier 	long nn;
244368c31abSDavid du Colombier 	int ok;
245368c31abSDavid du Colombier 
246368c31abSDavid du Colombier 	if(n == 0)
247368c31abSDavid du Colombier 		return -1;
248368c31abSDavid du Colombier 
249368c31abSDavid du Colombier 	qlock(&arena->lock);
250368c31abSDavid du Colombier 	a = arena->size - arenadirsize(arena, arena->memstats.clumps);
251368c31abSDavid du Colombier 	if(aa >= a || aa + n > a){
252368c31abSDavid du Colombier 		qunlock(&arena->lock);
253368c31abSDavid du Colombier 		seterr(EOk, "writing beyond arena clump storage");
254368c31abSDavid du Colombier 		return -1;
255368c31abSDavid du Colombier 	}
256368c31abSDavid du Colombier 
257368c31abSDavid du Colombier 	blocksize = arena->blocksize;
258368c31abSDavid du Colombier 	a = arena->base + aa;
259368c31abSDavid du Colombier 	off = a & (blocksize - 1);
260368c31abSDavid du Colombier 	a -= off;
261368c31abSDavid du Colombier 	nn = 0;
262368c31abSDavid du Colombier 	for(;;){
263368c31abSDavid du Colombier 		b = getdblock(arena->part, a, off != 0 || off + n < blocksize ? ORDWR : OWRITE);
264368c31abSDavid du Colombier 		if(b == nil){
265368c31abSDavid du Colombier 			qunlock(&arena->lock);
266368c31abSDavid du Colombier 			return -1;
267368c31abSDavid du Colombier 		}
268368c31abSDavid du Colombier 		dirtydblock(b, DirtyArena);
269368c31abSDavid du Colombier 		m = blocksize - off;
270368c31abSDavid du Colombier 		if(m > n - nn)
271368c31abSDavid du Colombier 			m = n - nn;
272368c31abSDavid du Colombier 		memmove(&b->data[off], &clbuf[nn], m);
273368c31abSDavid du Colombier 		ok = 0;
274368c31abSDavid du Colombier 		putdblock(b);
275368c31abSDavid du Colombier 		if(ok < 0){
276368c31abSDavid du Colombier 			qunlock(&arena->lock);
277368c31abSDavid du Colombier 			return -1;
278368c31abSDavid du Colombier 		}
279368c31abSDavid du Colombier 		nn += m;
280368c31abSDavid du Colombier 		if(nn == n)
281368c31abSDavid du Colombier 			break;
282368c31abSDavid du Colombier 		off = 0;
283368c31abSDavid du Colombier 		a += blocksize;
284368c31abSDavid du Colombier 	}
285368c31abSDavid du Colombier 	qunlock(&arena->lock);
286368c31abSDavid du Colombier 	return n;
287368c31abSDavid du Colombier }
288368c31abSDavid du Colombier 
289368c31abSDavid du Colombier /*
290368c31abSDavid du Colombier  * allocate space for the clump and write it,
291368c31abSDavid du Colombier  * updating the arena directory
292368c31abSDavid du Colombier ZZZ question: should this distinguish between an arena
293368c31abSDavid du Colombier filling up and real errors writing the clump?
294368c31abSDavid du Colombier  */
295368c31abSDavid du Colombier u64int
writeaclump(Arena * arena,Clump * c,u8int * clbuf)296f9e1cf08SDavid du Colombier writeaclump(Arena *arena, Clump *c, u8int *clbuf)
297368c31abSDavid du Colombier {
298368c31abSDavid du Colombier 	DBlock *b;
299368c31abSDavid du Colombier 	u64int a, aa;
300368c31abSDavid du Colombier 	u32int clump, n, nn, m, off, blocksize;
301368c31abSDavid du Colombier 	int ok;
302368c31abSDavid du Colombier 
303368c31abSDavid du Colombier 	n = c->info.size + ClumpSize + U32Size;
304368c31abSDavid du Colombier 	qlock(&arena->lock);
305368c31abSDavid du Colombier 	aa = arena->memstats.used;
306368c31abSDavid du Colombier 	if(arena->memstats.sealed
307368c31abSDavid du Colombier 	|| aa + n + U32Size + arenadirsize(arena, arena->memstats.clumps + 1) > arena->size){
308368c31abSDavid du Colombier 		if(!arena->memstats.sealed){
309368c31abSDavid du Colombier 			logerr(EOk, "seal memstats %s", arena->name);
310368c31abSDavid du Colombier 			arena->memstats.sealed = 1;
31161f42feeSDavid du Colombier 			wbarena(arena);
312368c31abSDavid du Colombier 		}
313368c31abSDavid du Colombier 		qunlock(&arena->lock);
314368c31abSDavid du Colombier 		return TWID64;
315368c31abSDavid du Colombier 	}
316368c31abSDavid du Colombier 	if(packclump(c, &clbuf[0], arena->clumpmagic) < 0){
317368c31abSDavid du Colombier 		qunlock(&arena->lock);
318368c31abSDavid du Colombier 		return TWID64;
319368c31abSDavid du Colombier 	}
320368c31abSDavid du Colombier 
321368c31abSDavid du Colombier 	/*
322368c31abSDavid du Colombier 	 * write the data out one block at a time
323368c31abSDavid du Colombier 	 */
324368c31abSDavid du Colombier 	blocksize = arena->blocksize;
325368c31abSDavid du Colombier 	a = arena->base + aa;
326368c31abSDavid du Colombier 	off = a & (blocksize - 1);
327368c31abSDavid du Colombier 	a -= off;
328368c31abSDavid du Colombier 	nn = 0;
329368c31abSDavid du Colombier 	for(;;){
330368c31abSDavid du Colombier 		b = getdblock(arena->part, a, off != 0 ? ORDWR : OWRITE);
331368c31abSDavid du Colombier 		if(b == nil){
332368c31abSDavid du Colombier 			qunlock(&arena->lock);
333368c31abSDavid du Colombier 			return TWID64;
334368c31abSDavid du Colombier 		}
335368c31abSDavid du Colombier 		dirtydblock(b, DirtyArena);
336368c31abSDavid du Colombier 		m = blocksize - off;
337368c31abSDavid du Colombier 		if(m > n - nn)
338368c31abSDavid du Colombier 			m = n - nn;
339368c31abSDavid du Colombier 		memmove(&b->data[off], &clbuf[nn], m);
340368c31abSDavid du Colombier 		ok = 0;
341368c31abSDavid du Colombier 		putdblock(b);
342368c31abSDavid du Colombier 		if(ok < 0){
343368c31abSDavid du Colombier 			qunlock(&arena->lock);
344368c31abSDavid du Colombier 			return TWID64;
345368c31abSDavid du Colombier 		}
346368c31abSDavid du Colombier 		nn += m;
347368c31abSDavid du Colombier 		if(nn == n)
348368c31abSDavid du Colombier 			break;
349368c31abSDavid du Colombier 		off = 0;
350368c31abSDavid du Colombier 		a += blocksize;
351368c31abSDavid du Colombier 	}
352368c31abSDavid du Colombier 
353368c31abSDavid du Colombier 	arena->memstats.used += c->info.size + ClumpSize;
354368c31abSDavid du Colombier 	arena->memstats.uncsize += c->info.uncsize;
355368c31abSDavid du Colombier 	if(c->info.size < c->info.uncsize)
356368c31abSDavid du Colombier 		arena->memstats.cclumps++;
357368c31abSDavid du Colombier 
358f9e1cf08SDavid du Colombier 	clump = arena->memstats.clumps;
359f9e1cf08SDavid du Colombier 	if(clump % ArenaCIGSize == 0){
360f9e1cf08SDavid du Colombier 		if(arena->cig == nil){
361f9e1cf08SDavid du Colombier 			loadcig(arena);
362f9e1cf08SDavid du Colombier 			if(arena->cig == nil)
363f9e1cf08SDavid du Colombier 				goto NoCIG;
364f9e1cf08SDavid du Colombier 		}
365f9e1cf08SDavid du Colombier 		/* add aa as start of next cig */
366f9e1cf08SDavid du Colombier 		if(clump/ArenaCIGSize != arena->ncig){
367f9e1cf08SDavid du Colombier 			fprint(2, "bad arena cig computation %s: writing clump %d but %d cigs\n",
368f9e1cf08SDavid du Colombier 				arena->name, clump, arena->ncig);
369f9e1cf08SDavid du Colombier 			arena->ncig = -1;
370f9e1cf08SDavid du Colombier 			vtfree(arena->cig);
371f9e1cf08SDavid du Colombier 			arena->cig = nil;
372f9e1cf08SDavid du Colombier 			goto NoCIG;
373f9e1cf08SDavid du Colombier 		}
374f9e1cf08SDavid du Colombier 		arena->cig = vtrealloc(arena->cig, (arena->ncig+1)*sizeof arena->cig[0]);
375f9e1cf08SDavid du Colombier 		arena->cig[arena->ncig++].offset = aa;
376f9e1cf08SDavid du Colombier 	}
377f9e1cf08SDavid du Colombier NoCIG:
378f9e1cf08SDavid du Colombier 	arena->memstats.clumps++;
379f9e1cf08SDavid du Colombier 
380368c31abSDavid du Colombier 	if(arena->memstats.clumps == 0)
381368c31abSDavid du Colombier 		sysfatal("clumps wrapped");
382368c31abSDavid du Colombier 	arena->wtime = now();
383368c31abSDavid du Colombier 	if(arena->ctime == 0)
384368c31abSDavid du Colombier 		arena->ctime = arena->wtime;
385368c31abSDavid du Colombier 
386368c31abSDavid du Colombier 	writeclumpinfo(arena, clump, &c->info);
387368c31abSDavid du Colombier 	wbarena(arena);
388368c31abSDavid du Colombier 
389368c31abSDavid du Colombier 	qunlock(&arena->lock);
390368c31abSDavid du Colombier 
391368c31abSDavid du Colombier 	return aa;
392368c31abSDavid du Colombier }
393368c31abSDavid du Colombier 
394368c31abSDavid du Colombier int
atailcmp(ATailStats * a,ATailStats * b)395368c31abSDavid du Colombier atailcmp(ATailStats *a, ATailStats *b)
396368c31abSDavid du Colombier {
397368c31abSDavid du Colombier 	/* good test */
398368c31abSDavid du Colombier 	if(a->used < b->used)
399368c31abSDavid du Colombier 		return -1;
400368c31abSDavid du Colombier 	if(a->used > b->used)
401368c31abSDavid du Colombier 		return 1;
402368c31abSDavid du Colombier 
403368c31abSDavid du Colombier 	/* suspect tests - why order this way? (no one cares) */
404368c31abSDavid du Colombier 	if(a->clumps < b->clumps)
405368c31abSDavid du Colombier 		return -1;
406368c31abSDavid du Colombier 	if(a->clumps > b->clumps)
407368c31abSDavid du Colombier 		return 1;
408368c31abSDavid du Colombier 	if(a->cclumps < b->cclumps)
409368c31abSDavid du Colombier 		return -1;
410368c31abSDavid du Colombier 	if(a->cclumps > b->cclumps)
411368c31abSDavid du Colombier 		return 1;
412368c31abSDavid du Colombier 	if(a->uncsize < b->uncsize)
413368c31abSDavid du Colombier 		return -1;
414368c31abSDavid du Colombier 	if(a->uncsize > b->uncsize)
415368c31abSDavid du Colombier 		return 1;
416368c31abSDavid du Colombier 	if(a->sealed < b->sealed)
417368c31abSDavid du Colombier 		return -1;
418368c31abSDavid du Colombier 	if(a->sealed > b->sealed)
419368c31abSDavid du Colombier 		return 1;
420368c31abSDavid du Colombier 
421368c31abSDavid du Colombier 	/* everything matches */
422368c31abSDavid du Colombier 	return 0;
423368c31abSDavid du Colombier }
424368c31abSDavid du Colombier 
425368c31abSDavid du Colombier void
setatailstate(AState * as)426368c31abSDavid du Colombier setatailstate(AState *as)
427368c31abSDavid du Colombier {
428368c31abSDavid du Colombier 	int i, j, osealed;
429368c31abSDavid du Colombier 	Arena *a;
430368c31abSDavid du Colombier 	Index *ix;
431368c31abSDavid du Colombier 
432368c31abSDavid du Colombier 	trace(0, "setatailstate %s 0x%llux clumps %d", as->arena->name, as->aa, as->stats.clumps);
433368c31abSDavid du Colombier 
434368c31abSDavid du Colombier 	/*
435368c31abSDavid du Colombier 	 * Look up as->arena to find index.
436368c31abSDavid du Colombier 	 */
437f9e1cf08SDavid du Colombier 	needmainindex();	/* OS X linker */
438368c31abSDavid du Colombier 	ix = mainindex;
439368c31abSDavid du Colombier 	for(i=0; i<ix->narenas; i++)
440368c31abSDavid du Colombier 		if(ix->arenas[i] == as->arena)
441368c31abSDavid du Colombier 			break;
442368c31abSDavid du Colombier 	if(i==ix->narenas || as->aa < ix->amap[i].start || as->aa >= ix->amap[i].stop || as->arena != ix->arenas[i]){
443368c31abSDavid du Colombier 		fprint(2, "funny settailstate 0x%llux\n", as->aa);
444368c31abSDavid du Colombier 		return;
445368c31abSDavid du Colombier 	}
446368c31abSDavid du Colombier 
447368c31abSDavid du Colombier 	for(j=0; j<=i; j++){
448368c31abSDavid du Colombier 		a = ix->arenas[j];
449368c31abSDavid du Colombier 		if(atailcmp(&a->diskstats, &a->memstats) == 0)
450368c31abSDavid du Colombier 			continue;
451368c31abSDavid du Colombier 		qlock(&a->lock);
452368c31abSDavid du Colombier 		osealed = a->diskstats.sealed;
453368c31abSDavid du Colombier 		if(j == i)
454368c31abSDavid du Colombier 			a->diskstats = as->stats;
455368c31abSDavid du Colombier 		else
456368c31abSDavid du Colombier 			a->diskstats = a->memstats;
457368c31abSDavid du Colombier 		wbarena(a);
458368c31abSDavid du Colombier 		if(a->diskstats.sealed != osealed && !a->inqueue)
459368c31abSDavid du Colombier 			sealarena(a);
460368c31abSDavid du Colombier 		qunlock(&a->lock);
461368c31abSDavid du Colombier 	}
462368c31abSDavid du Colombier }
463368c31abSDavid du Colombier 
464368c31abSDavid du Colombier /*
465368c31abSDavid du Colombier  * once sealed, an arena never has any data added to it.
466368c31abSDavid du Colombier  * it should only be changed to fix errors.
467368c31abSDavid du Colombier  * this also syncs the clump directory.
468368c31abSDavid du Colombier  */
469368c31abSDavid du Colombier static void
sealarena(Arena * arena)470368c31abSDavid du Colombier sealarena(Arena *arena)
471368c31abSDavid du Colombier {
472368c31abSDavid du Colombier 	arena->inqueue = 1;
473368c31abSDavid du Colombier 	backsumarena(arena);
474368c31abSDavid du Colombier }
475368c31abSDavid du Colombier 
476368c31abSDavid du Colombier void
backsumarena(Arena * arena)477368c31abSDavid du Colombier backsumarena(Arena *arena)
478368c31abSDavid du Colombier {
479368c31abSDavid du Colombier 	ASum *as;
480368c31abSDavid du Colombier 
481368c31abSDavid du Colombier 	if(sumwait.l == nil)
482368c31abSDavid du Colombier 		return;
483368c31abSDavid du Colombier 
484368c31abSDavid du Colombier 	as = MK(ASum);
485368c31abSDavid du Colombier 	if(as == nil)
486368c31abSDavid du Colombier 		return;
487368c31abSDavid du Colombier 	qlock(&sumlock);
488368c31abSDavid du Colombier 	as->arena = arena;
489368c31abSDavid du Colombier 	as->next = nil;
490368c31abSDavid du Colombier 	if(sumq)
491368c31abSDavid du Colombier 		sumqtail->next = as;
492368c31abSDavid du Colombier 	else
493368c31abSDavid du Colombier 		sumq = as;
494368c31abSDavid du Colombier 	sumqtail = as;
495368c31abSDavid du Colombier 	rwakeup(&sumwait);
496368c31abSDavid du Colombier 	qunlock(&sumlock);
497368c31abSDavid du Colombier }
498368c31abSDavid du Colombier 
499368c31abSDavid du Colombier static void
sumproc(void * unused)500368c31abSDavid du Colombier sumproc(void *unused)
501368c31abSDavid du Colombier {
502368c31abSDavid du Colombier 	ASum *as;
503368c31abSDavid du Colombier 	Arena *arena;
504368c31abSDavid du Colombier 
505368c31abSDavid du Colombier 	USED(unused);
506368c31abSDavid du Colombier 
507368c31abSDavid du Colombier 	for(;;){
508368c31abSDavid du Colombier 		qlock(&sumlock);
509368c31abSDavid du Colombier 		while(sumq == nil)
510368c31abSDavid du Colombier 			rsleep(&sumwait);
511368c31abSDavid du Colombier 		as = sumq;
512368c31abSDavid du Colombier 		sumq = as->next;
513368c31abSDavid du Colombier 		qunlock(&sumlock);
514368c31abSDavid du Colombier 		arena = as->arena;
515368c31abSDavid du Colombier 		free(as);
516368c31abSDavid du Colombier 
517368c31abSDavid du Colombier 		sumarena(arena);
518368c31abSDavid du Colombier 	}
519368c31abSDavid du Colombier }
520368c31abSDavid du Colombier 
521368c31abSDavid du Colombier void
sumarena(Arena * arena)522368c31abSDavid du Colombier sumarena(Arena *arena)
523368c31abSDavid du Colombier {
524368c31abSDavid du Colombier 	ZBlock *b;
525368c31abSDavid du Colombier 	DigestState s;
526368c31abSDavid du Colombier 	u64int a, e;
527368c31abSDavid du Colombier 	u32int bs;
528368c31abSDavid du Colombier 	int t;
529368c31abSDavid du Colombier 	u8int score[VtScoreSize];
530368c31abSDavid du Colombier 
531368c31abSDavid du Colombier 	bs = MaxIoSize;
532368c31abSDavid du Colombier 	if(bs < arena->blocksize)
533368c31abSDavid du Colombier 		bs = arena->blocksize;
534368c31abSDavid du Colombier 
535368c31abSDavid du Colombier 	/*
536368c31abSDavid du Colombier 	 * read & sum all blocks except the last one
537368c31abSDavid du Colombier 	 */
538f9e1cf08SDavid du Colombier 	flushdcache();
539368c31abSDavid du Colombier 	memset(&s, 0, sizeof s);
540368c31abSDavid du Colombier 	b = alloczblock(bs, 0, arena->part->blocksize);
541368c31abSDavid du Colombier 	e = arena->base + arena->size;
542368c31abSDavid du Colombier 	for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){
543368c31abSDavid du Colombier 		disksched();
544368c31abSDavid du Colombier 		while((t=arenasumsleeptime) == SleepForever){
545368c31abSDavid du Colombier 			sleep(1000);
546368c31abSDavid du Colombier 			disksched();
547368c31abSDavid du Colombier 		}
548368c31abSDavid du Colombier 		sleep(t);
549368c31abSDavid du Colombier 		if(a + bs > e)
550368c31abSDavid du Colombier 			bs = arena->blocksize;
551368c31abSDavid du Colombier 		if(readpart(arena->part, a, b->data, bs) < 0)
552368c31abSDavid du Colombier 			goto ReadErr;
553368c31abSDavid du Colombier 		addstat(StatSumRead, 1);
554368c31abSDavid du Colombier 		addstat(StatSumReadBytes, bs);
555368c31abSDavid du Colombier 		sha1(b->data, bs, nil, &s);
556368c31abSDavid du Colombier 	}
557368c31abSDavid du Colombier 
558368c31abSDavid du Colombier 	/*
559368c31abSDavid du Colombier 	 * the last one is special, since it may already have the checksum included
560368c31abSDavid du Colombier 	 */
561368c31abSDavid du Colombier 	bs = arena->blocksize;
562368c31abSDavid du Colombier 	if(readpart(arena->part, e, b->data, bs) < 0){
563368c31abSDavid du Colombier ReadErr:
564368c31abSDavid du Colombier 		logerr(EOk, "sumarena can't sum %s, read at %lld failed: %r", arena->name, a);
565368c31abSDavid du Colombier 		freezblock(b);
566368c31abSDavid du Colombier 		return;
567368c31abSDavid du Colombier 	}
568368c31abSDavid du Colombier 	addstat(StatSumRead, 1);
569368c31abSDavid du Colombier 	addstat(StatSumReadBytes, bs);
570368c31abSDavid du Colombier 
571368c31abSDavid du Colombier 	sha1(b->data, bs-VtScoreSize, nil, &s);
572368c31abSDavid du Colombier 	sha1(zeroscore, VtScoreSize, nil, &s);
573368c31abSDavid du Colombier 	sha1(nil, 0, score, &s);
574368c31abSDavid du Colombier 
575368c31abSDavid du Colombier 	/*
576368c31abSDavid du Colombier 	 * check for no checksum or the same
577368c31abSDavid du Colombier 	 */
578f9e1cf08SDavid du Colombier 	if(scorecmp(score, &b->data[bs - VtScoreSize]) != 0
579f9e1cf08SDavid du Colombier 	&& scorecmp(zeroscore, &b->data[bs - VtScoreSize]) != 0)
580368c31abSDavid du Colombier 		logerr(EOk, "overwriting mismatched checksums for arena=%s, found=%V calculated=%V",
581368c31abSDavid du Colombier 			arena->name, &b->data[bs - VtScoreSize], score);
582368c31abSDavid du Colombier 	freezblock(b);
583368c31abSDavid du Colombier 
584368c31abSDavid du Colombier 	qlock(&arena->lock);
585368c31abSDavid du Colombier 	scorecp(arena->score, score);
586f9e1cf08SDavid du Colombier 	wbarena(arena);
587368c31abSDavid du Colombier 	qunlock(&arena->lock);
588368c31abSDavid du Colombier }
589368c31abSDavid du Colombier 
590368c31abSDavid du Colombier /*
591368c31abSDavid du Colombier  * write the arena trailer block to the partition
592368c31abSDavid du Colombier  */
593368c31abSDavid du Colombier int
wbarena(Arena * arena)594368c31abSDavid du Colombier wbarena(Arena *arena)
595368c31abSDavid du Colombier {
596368c31abSDavid du Colombier 	DBlock *b;
597368c31abSDavid du Colombier 	int bad;
598368c31abSDavid du Colombier 
599368c31abSDavid du Colombier 	if((b = getdblock(arena->part, arena->base + arena->size, OWRITE)) == nil){
600368c31abSDavid du Colombier 		logerr(EAdmin, "can't write arena trailer: %r");
601368c31abSDavid du Colombier 		return -1;
602368c31abSDavid du Colombier 	}
603368c31abSDavid du Colombier 	dirtydblock(b, DirtyArenaTrailer);
604368c31abSDavid du Colombier 	bad = okarena(arena)<0 || packarena(arena, b->data)<0;
605f9e1cf08SDavid du Colombier 	scorecp(b->data + arena->blocksize - VtScoreSize, arena->score);
606368c31abSDavid du Colombier 	putdblock(b);
607368c31abSDavid du Colombier 	if(bad)
608368c31abSDavid du Colombier 		return -1;
609368c31abSDavid du Colombier 	return 0;
610368c31abSDavid du Colombier }
611368c31abSDavid du Colombier 
612368c31abSDavid du Colombier int
wbarenahead(Arena * arena)613368c31abSDavid du Colombier wbarenahead(Arena *arena)
614368c31abSDavid du Colombier {
615368c31abSDavid du Colombier 	ZBlock *b;
616368c31abSDavid du Colombier 	ArenaHead head;
617368c31abSDavid du Colombier 	int bad;
618368c31abSDavid du Colombier 
619368c31abSDavid du Colombier 	namecp(head.name, arena->name);
620368c31abSDavid du Colombier 	head.version = arena->version;
621368c31abSDavid du Colombier 	head.size = arena->size + 2 * arena->blocksize;
622368c31abSDavid du Colombier 	head.blocksize = arena->blocksize;
623368c31abSDavid du Colombier 	head.clumpmagic = arena->clumpmagic;
624368c31abSDavid du Colombier 	b = alloczblock(arena->blocksize, 1, arena->part->blocksize);
625368c31abSDavid du Colombier 	if(b == nil){
626368c31abSDavid du Colombier 		logerr(EAdmin, "can't write arena header: %r");
627368c31abSDavid du Colombier /* ZZZ add error message? */
628368c31abSDavid du Colombier 		return -1;
629368c31abSDavid du Colombier 	}
630368c31abSDavid du Colombier 	/*
631368c31abSDavid du Colombier 	 * this writepart is okay because it only happens
632368c31abSDavid du Colombier 	 * during initialization.
633368c31abSDavid du Colombier 	 */
634368c31abSDavid du Colombier 	bad = packarenahead(&head, b->data)<0 ||
635368c31abSDavid du Colombier 	      writepart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize)<0 ||
636368c31abSDavid du Colombier 	      flushpart(arena->part)<0;
637368c31abSDavid du Colombier 	freezblock(b);
638368c31abSDavid du Colombier 	if(bad)
639368c31abSDavid du Colombier 		return -1;
640368c31abSDavid du Colombier 	return 0;
641368c31abSDavid du Colombier }
642368c31abSDavid du Colombier 
643368c31abSDavid du Colombier /*
644368c31abSDavid du Colombier  * read the arena header and trailer blocks from disk
645368c31abSDavid du Colombier  */
646368c31abSDavid du Colombier static int
loadarena(Arena * arena)647368c31abSDavid du Colombier loadarena(Arena *arena)
648368c31abSDavid du Colombier {
649368c31abSDavid du Colombier 	ArenaHead head;
650368c31abSDavid du Colombier 	ZBlock *b;
651368c31abSDavid du Colombier 
652368c31abSDavid du Colombier 	b = alloczblock(arena->blocksize, 0, arena->part->blocksize);
653368c31abSDavid du Colombier 	if(b == nil)
654368c31abSDavid du Colombier 		return -1;
655368c31abSDavid du Colombier 	if(readpart(arena->part, arena->base + arena->size, b->data, arena->blocksize) < 0){
656368c31abSDavid du Colombier 		freezblock(b);
657368c31abSDavid du Colombier 		return -1;
658368c31abSDavid du Colombier 	}
659368c31abSDavid du Colombier 	if(unpackarena(arena, b->data) < 0){
660368c31abSDavid du Colombier 		freezblock(b);
661368c31abSDavid du Colombier 		return -1;
662368c31abSDavid du Colombier 	}
663368c31abSDavid du Colombier 	if(arena->version != ArenaVersion4 && arena->version != ArenaVersion5){
664368c31abSDavid du Colombier 		seterr(EAdmin, "unknown arena version %d", arena->version);
665368c31abSDavid du Colombier 		freezblock(b);
666368c31abSDavid du Colombier 		return -1;
667368c31abSDavid du Colombier 	}
668368c31abSDavid du Colombier 	scorecp(arena->score, &b->data[arena->blocksize - VtScoreSize]);
669368c31abSDavid du Colombier 
670368c31abSDavid du Colombier 	if(readpart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize) < 0){
671368c31abSDavid du Colombier 		logerr(EAdmin, "can't read arena header: %r");
672368c31abSDavid du Colombier 		freezblock(b);
673368c31abSDavid du Colombier 		return 0;
674368c31abSDavid du Colombier 	}
675368c31abSDavid du Colombier 	if(unpackarenahead(&head, b->data) < 0)
676368c31abSDavid du Colombier 		logerr(ECorrupt, "corrupted arena header: %r");
677368c31abSDavid du Colombier 	else if(namecmp(arena->name, head.name)!=0
678368c31abSDavid du Colombier 	     || arena->clumpmagic != head.clumpmagic
679368c31abSDavid du Colombier 	     || arena->version != head.version
680368c31abSDavid du Colombier 	     || arena->blocksize != head.blocksize
681368c31abSDavid du Colombier 	     || arena->size + 2 * arena->blocksize != head.size){
682368c31abSDavid du Colombier 		if(namecmp(arena->name, head.name)!=0)
683368c31abSDavid du Colombier 			logerr(ECorrupt, "arena tail name %s head %s",
684368c31abSDavid du Colombier 				arena->name, head.name);
685368c31abSDavid du Colombier 		else if(arena->clumpmagic != head.clumpmagic)
68692b836f4SDavid du Colombier 			logerr(ECorrupt, "arena %d tail clumpmagic 0x%lux head 0x%lux",
68792b836f4SDavid du Colombier 				debugarena, (ulong)arena->clumpmagic,
68892b836f4SDavid du Colombier 				(ulong)head.clumpmagic);
689368c31abSDavid du Colombier 		else if(arena->version != head.version)
690368c31abSDavid du Colombier 			logerr(ECorrupt, "arena tail version %d head version %d",
691368c31abSDavid du Colombier 				arena->version, head.version);
692368c31abSDavid du Colombier 		else if(arena->blocksize != head.blocksize)
693368c31abSDavid du Colombier 			logerr(ECorrupt, "arena tail block size %d head %d",
694368c31abSDavid du Colombier 				arena->blocksize, head.blocksize);
695368c31abSDavid du Colombier 		else if(arena->size+2*arena->blocksize != head.size)
696368c31abSDavid du Colombier 			logerr(ECorrupt, "arena tail size %lud head %lud",
697368c31abSDavid du Colombier 				(ulong)arena->size+2*arena->blocksize, head.size);
698368c31abSDavid du Colombier 		else
699368c31abSDavid du Colombier 			logerr(ECorrupt, "arena header inconsistent with arena data");
700368c31abSDavid du Colombier 	}
701368c31abSDavid du Colombier 	freezblock(b);
702368c31abSDavid du Colombier 
703368c31abSDavid du Colombier 	return 0;
704368c31abSDavid du Colombier }
705368c31abSDavid du Colombier 
706368c31abSDavid du Colombier static int
okarena(Arena * arena)707368c31abSDavid du Colombier okarena(Arena *arena)
708368c31abSDavid du Colombier {
709368c31abSDavid du Colombier 	u64int dsize;
710368c31abSDavid du Colombier 	int ok;
711368c31abSDavid du Colombier 
712368c31abSDavid du Colombier 	ok = 0;
713368c31abSDavid du Colombier 	dsize = arenadirsize(arena, arena->diskstats.clumps);
714368c31abSDavid du Colombier 	if(arena->diskstats.used + dsize > arena->size){
715368c31abSDavid du Colombier 		seterr(ECorrupt, "arena %s used > size", arena->name);
716368c31abSDavid du Colombier 		ok = -1;
717368c31abSDavid du Colombier 	}
718368c31abSDavid du Colombier 
719368c31abSDavid du Colombier 	if(arena->diskstats.cclumps > arena->diskstats.clumps)
720368c31abSDavid du Colombier 		logerr(ECorrupt, "arena %s has more compressed clumps than total clumps", arena->name);
721368c31abSDavid du Colombier 
722368c31abSDavid du Colombier 	/*
723368c31abSDavid du Colombier 	 * This need not be true if some of the disk is corrupted.
724368c31abSDavid du Colombier 	 *
725368c31abSDavid du Colombier 	if(arena->diskstats.uncsize + arena->diskstats.clumps * ClumpSize + arena->blocksize < arena->diskstats.used)
726368c31abSDavid du Colombier 		logerr(ECorrupt, "arena %s uncompressed size inconsistent with used space %lld %d %lld", arena->name, arena->diskstats.uncsize, arena->diskstats.clumps, arena->diskstats.used);
727368c31abSDavid du Colombier 	 */
728368c31abSDavid du Colombier 
729*7ab27030SDavid du Colombier 	/*
730*7ab27030SDavid du Colombier 	 * this happens; it's harmless.
731*7ab27030SDavid du Colombier 	 *
732368c31abSDavid du Colombier 	if(arena->ctime > arena->wtime)
733368c31abSDavid du Colombier 		logerr(ECorrupt, "arena %s creation time after last write time", arena->name);
734*7ab27030SDavid du Colombier 	 */
735368c31abSDavid du Colombier 	return ok;
736368c31abSDavid du Colombier }
737368c31abSDavid du Colombier 
738368c31abSDavid du Colombier static CIBlock*
getcib(Arena * arena,int clump,int writing,CIBlock * rock)739368c31abSDavid du Colombier getcib(Arena *arena, int clump, int writing, CIBlock *rock)
740368c31abSDavid du Colombier {
741368c31abSDavid du Colombier 	int mode;
742368c31abSDavid du Colombier 	CIBlock *cib;
743368c31abSDavid du Colombier 	u32int block, off;
744368c31abSDavid du Colombier 
745368c31abSDavid du Colombier 	if(clump >= arena->memstats.clumps){
746368c31abSDavid du Colombier 		seterr(EOk, "clump directory access out of range");
747368c31abSDavid du Colombier 		return nil;
748368c31abSDavid du Colombier 	}
749368c31abSDavid du Colombier 	block = clump / arena->clumpmax;
750368c31abSDavid du Colombier 	off = (clump - block * arena->clumpmax) * ClumpInfoSize;
751368c31abSDavid du Colombier 	cib = rock;
752368c31abSDavid du Colombier 	cib->block = block;
753368c31abSDavid du Colombier 	cib->offset = off;
754368c31abSDavid du Colombier 
755368c31abSDavid du Colombier 	if(writing){
756368c31abSDavid du Colombier 		if(off == 0 && clump == arena->memstats.clumps-1)
757368c31abSDavid du Colombier 			mode = OWRITE;
758368c31abSDavid du Colombier 		else
759368c31abSDavid du Colombier 			mode = ORDWR;
760368c31abSDavid du Colombier 	}else
761368c31abSDavid du Colombier 		mode = OREAD;
762368c31abSDavid du Colombier 
763368c31abSDavid du Colombier 	cib->data = getdblock(arena->part,
764368c31abSDavid du Colombier 		arena->base + arena->size - (block + 1) * arena->blocksize, mode);
765368c31abSDavid du Colombier 	if(cib->data == nil)
766368c31abSDavid du Colombier 		return nil;
767368c31abSDavid du Colombier 	return cib;
768368c31abSDavid du Colombier }
769368c31abSDavid du Colombier 
770368c31abSDavid du Colombier static void
putcib(Arena * arena,CIBlock * cib)771368c31abSDavid du Colombier putcib(Arena *arena, CIBlock *cib)
772368c31abSDavid du Colombier {
773368c31abSDavid du Colombier 	USED(arena);
774368c31abSDavid du Colombier 
775368c31abSDavid du Colombier 	putdblock(cib->data);
776368c31abSDavid du Colombier 	cib->data = nil;
777368c31abSDavid du Colombier }
778f9e1cf08SDavid du Colombier 
779f9e1cf08SDavid du Colombier 
780f9e1cf08SDavid du Colombier /*
781f9e1cf08SDavid du Colombier  * For index entry readahead purposes, the arenas are
782f9e1cf08SDavid du Colombier  * broken into smaller subpieces, called clump info groups
783f9e1cf08SDavid du Colombier  * or cigs.  Each cig has ArenaCIGSize clumps (ArenaCIGSize
784f9e1cf08SDavid du Colombier  * is chosen to make the index entries take up about half
785f9e1cf08SDavid du Colombier  * a megabyte).  The index entries do not contain enough
786f9e1cf08SDavid du Colombier  * information to determine what the clump index is for
787f9e1cf08SDavid du Colombier  * a given address in an arena.  That info is needed both for
788f9e1cf08SDavid du Colombier  * figuring out which clump group an address belongs to
789f9e1cf08SDavid du Colombier  * and for prefetching a clump group's index entries from
790f9e1cf08SDavid du Colombier  * the arena table of contents.  The first time clump groups
791f9e1cf08SDavid du Colombier  * are accessed, we scan the entire arena table of contents
792f9e1cf08SDavid du Colombier  * (which might be 10s of megabytes), recording the data
793f9e1cf08SDavid du Colombier  * offset of each clump group.
794f9e1cf08SDavid du Colombier  */
795f9e1cf08SDavid du Colombier 
796f9e1cf08SDavid du Colombier /*
797f9e1cf08SDavid du Colombier  * load clump info group information by scanning entire toc.
798f9e1cf08SDavid du Colombier  */
799f9e1cf08SDavid du Colombier static void
loadcig(Arena * arena)800f9e1cf08SDavid du Colombier loadcig(Arena *arena)
801f9e1cf08SDavid du Colombier {
802f9e1cf08SDavid du Colombier 	u32int i, j, ncig, nci;
803f9e1cf08SDavid du Colombier 	ArenaCIG *cig;
804f9e1cf08SDavid du Colombier 	ClumpInfo *ci;
805f9e1cf08SDavid du Colombier 	u64int offset;
806f9e1cf08SDavid du Colombier 	int ms;
807f9e1cf08SDavid du Colombier 
808f9e1cf08SDavid du Colombier 	if(arena->cig || arena->ncig < 0)
809f9e1cf08SDavid du Colombier 		return;
810f9e1cf08SDavid du Colombier 
811f9e1cf08SDavid du Colombier //	fprint(2, "loadcig %s\n", arena->name);
812f9e1cf08SDavid du Colombier 
813f9e1cf08SDavid du Colombier 	ncig = (arena->memstats.clumps+ArenaCIGSize-1) / ArenaCIGSize;
814f9e1cf08SDavid du Colombier 	if(ncig == 0){
815f9e1cf08SDavid du Colombier 		arena->cig = vtmalloc(1);
816f9e1cf08SDavid du Colombier 		arena->ncig = 0;
817f9e1cf08SDavid du Colombier 		return;
818f9e1cf08SDavid du Colombier 	}
819f9e1cf08SDavid du Colombier 
820f9e1cf08SDavid du Colombier 	ms = msec();
821f9e1cf08SDavid du Colombier 	cig = vtmalloc(ncig*sizeof cig[0]);
822f9e1cf08SDavid du Colombier 	ci = vtmalloc(ArenaCIGSize*sizeof ci[0]);
823f9e1cf08SDavid du Colombier 	offset = 0;
824f9e1cf08SDavid du Colombier 	for(i=0; i<ncig; i++){
825f9e1cf08SDavid du Colombier 		nci = readclumpinfos(arena, i*ArenaCIGSize, ci, ArenaCIGSize);
826f9e1cf08SDavid du Colombier 		cig[i].offset = offset;
827f9e1cf08SDavid du Colombier 		for(j=0; j<nci; j++)
828f9e1cf08SDavid du Colombier 			offset += ClumpSize + ci[j].size;
829f9e1cf08SDavid du Colombier 		if(nci < ArenaCIGSize){
830f9e1cf08SDavid du Colombier 			if(i != ncig-1){
831f9e1cf08SDavid du Colombier 				vtfree(ci);
832f9e1cf08SDavid du Colombier 				vtfree(cig);
833f9e1cf08SDavid du Colombier 				arena->ncig = -1;
834f9e1cf08SDavid du Colombier 				fprint(2, "loadcig %s: got %ud cigs, expected %ud\n", arena->name, i+1, ncig);
835f9e1cf08SDavid du Colombier 				goto out;
836f9e1cf08SDavid du Colombier 			}
837f9e1cf08SDavid du Colombier 		}
838f9e1cf08SDavid du Colombier 	}
839f9e1cf08SDavid du Colombier 	vtfree(ci);
840f9e1cf08SDavid du Colombier 
841f9e1cf08SDavid du Colombier 	arena->ncig = ncig;
842f9e1cf08SDavid du Colombier 	arena->cig = cig;
843f9e1cf08SDavid du Colombier 
844f9e1cf08SDavid du Colombier out:
845f9e1cf08SDavid du Colombier 	ms = msec() - ms;
846f9e1cf08SDavid du Colombier 	addstat2(StatCigLoad, 1, StatCigLoadTime, ms);
847f9e1cf08SDavid du Colombier }
848f9e1cf08SDavid du Colombier 
849f9e1cf08SDavid du Colombier /*
850f9e1cf08SDavid du Colombier  * convert arena address into arena group + data boundaries.
851f9e1cf08SDavid du Colombier  */
852f9e1cf08SDavid du Colombier int
arenatog(Arena * arena,u64int addr,u64int * gstart,u64int * glimit,int * g)853f9e1cf08SDavid du Colombier arenatog(Arena *arena, u64int addr, u64int *gstart, u64int *glimit, int *g)
854f9e1cf08SDavid du Colombier {
855f9e1cf08SDavid du Colombier 	int r, l, m;
856f9e1cf08SDavid du Colombier 
857f9e1cf08SDavid du Colombier 	qlock(&arena->lock);
858f9e1cf08SDavid du Colombier 	if(arena->cig == nil)
859f9e1cf08SDavid du Colombier 		loadcig(arena);
860f9e1cf08SDavid du Colombier 	if(arena->cig == nil || arena->ncig == 0){
861f9e1cf08SDavid du Colombier 		qunlock(&arena->lock);
862f9e1cf08SDavid du Colombier 		return -1;
863f9e1cf08SDavid du Colombier 	}
864f9e1cf08SDavid du Colombier 
865f9e1cf08SDavid du Colombier 	l = 1;
866f9e1cf08SDavid du Colombier 	r = arena->ncig - 1;
867f9e1cf08SDavid du Colombier 	while(l <= r){
868f9e1cf08SDavid du Colombier 		m = (r + l) / 2;
869f9e1cf08SDavid du Colombier 		if(arena->cig[m].offset <= addr)
870f9e1cf08SDavid du Colombier 			l = m + 1;
871f9e1cf08SDavid du Colombier 		else
872f9e1cf08SDavid du Colombier 			r = m - 1;
873f9e1cf08SDavid du Colombier 	}
874f9e1cf08SDavid du Colombier 	l--;
875f9e1cf08SDavid du Colombier 
876f9e1cf08SDavid du Colombier 	*g = l;
877f9e1cf08SDavid du Colombier 	*gstart = arena->cig[l].offset;
878f9e1cf08SDavid du Colombier 	if(l+1 < arena->ncig)
879f9e1cf08SDavid du Colombier 		*glimit = arena->cig[l+1].offset;
880f9e1cf08SDavid du Colombier 	else
881f9e1cf08SDavid du Colombier 		*glimit = arena->memstats.used;
882f9e1cf08SDavid du Colombier 	qunlock(&arena->lock);
883f9e1cf08SDavid du Colombier 	return 0;
884f9e1cf08SDavid du Colombier }
885f9e1cf08SDavid du Colombier 
886f9e1cf08SDavid du Colombier /*
887f9e1cf08SDavid du Colombier  * load the clump info for group g into the index entries.
888f9e1cf08SDavid du Colombier  */
889f9e1cf08SDavid du Colombier int
asumload(Arena * arena,int g,IEntry * entries,int nentries)890f9e1cf08SDavid du Colombier asumload(Arena *arena, int g, IEntry *entries, int nentries)
891f9e1cf08SDavid du Colombier {
892f9e1cf08SDavid du Colombier 	int i, base, limit;
893f9e1cf08SDavid du Colombier 	u64int addr;
894f9e1cf08SDavid du Colombier 	ClumpInfo ci;
895f9e1cf08SDavid du Colombier 	IEntry *ie;
896f9e1cf08SDavid du Colombier 
897f9e1cf08SDavid du Colombier 	if(nentries < ArenaCIGSize){
898f9e1cf08SDavid du Colombier 		fprint(2, "asking for too few entries\n");
899f9e1cf08SDavid du Colombier 		return -1;
900f9e1cf08SDavid du Colombier 	}
901f9e1cf08SDavid du Colombier 
902f9e1cf08SDavid du Colombier 	qlock(&arena->lock);
903f9e1cf08SDavid du Colombier 	if(arena->cig == nil)
904f9e1cf08SDavid du Colombier 		loadcig(arena);
905f9e1cf08SDavid du Colombier 	if(arena->cig == nil || arena->ncig == 0 || g >= arena->ncig){
906f9e1cf08SDavid du Colombier 		qunlock(&arena->lock);
907f9e1cf08SDavid du Colombier 		return -1;
908f9e1cf08SDavid du Colombier 	}
909f9e1cf08SDavid du Colombier 
910f9e1cf08SDavid du Colombier 	addr = 0;
911f9e1cf08SDavid du Colombier 	base = g*ArenaCIGSize;
912f9e1cf08SDavid du Colombier 	limit = base + ArenaCIGSize;
913f9e1cf08SDavid du Colombier 	if(base > arena->memstats.clumps)
914f9e1cf08SDavid du Colombier 		base = arena->memstats.clumps;
915f9e1cf08SDavid du Colombier 	ie = entries;
916f9e1cf08SDavid du Colombier 	for(i=base; i<limit; i++){
917f9e1cf08SDavid du Colombier 		if(readclumpinfo(arena, i, &ci) < 0)
918f9e1cf08SDavid du Colombier 			break;
919f9e1cf08SDavid du Colombier 		if(ci.type != VtCorruptType){
920f9e1cf08SDavid du Colombier 			scorecp(ie->score, ci.score);
921f9e1cf08SDavid du Colombier 			ie->ia.type = ci.type;
922f9e1cf08SDavid du Colombier 			ie->ia.size = ci.uncsize;
923f9e1cf08SDavid du Colombier 			ie->ia.blocks = (ci.size + ClumpSize + (1<<ABlockLog) - 1) >> ABlockLog;
924f9e1cf08SDavid du Colombier 			ie->ia.addr = addr;
925f9e1cf08SDavid du Colombier 			ie++;
926f9e1cf08SDavid du Colombier 		}
927f9e1cf08SDavid du Colombier 		addr += ClumpSize + ci.size;
928f9e1cf08SDavid du Colombier 	}
929f9e1cf08SDavid du Colombier 	qunlock(&arena->lock);
930f9e1cf08SDavid du Colombier 	return ie - entries;
931f9e1cf08SDavid du Colombier }
932