xref: /plan9/sys/src/cmd/venti/srv/dcache.c (revision 23566e0c4b9b5ce80121325d240a2f6e83a70a8f)
1368c31abSDavid du Colombier /*
2368c31abSDavid du Colombier  * Disk cache.
3368c31abSDavid du Colombier  *
4368c31abSDavid du Colombier  * Caches raw disk blocks.  Getdblock() gets a block, putdblock puts it back.
5368c31abSDavid du Colombier  * Getdblock has a mode parameter that determines i/o and access to a block:
6368c31abSDavid du Colombier  * if mode is OREAD or ORDWR, it is read from disk if not already in memory.
7368c31abSDavid du Colombier  * If mode is ORDWR or OWRITE, it is locked for exclusive use before being returned.
8368c31abSDavid du Colombier  * It is *not* marked dirty -- once changes have been made, they should be noted
9368c31abSDavid du Colombier  * by using dirtydblock() before putdblock().
10368c31abSDavid du Colombier  *
11368c31abSDavid du Colombier  * There is a global cache lock as well as a lock on each block.
12368c31abSDavid du Colombier  * Within a thread, the cache lock can be acquired while holding a block lock,
13368c31abSDavid du Colombier  * but not vice versa; and a block cannot be locked if you already hold the lock
14368c31abSDavid du Colombier  * on another block.
15368c31abSDavid du Colombier  *
16368c31abSDavid du Colombier  * The flush proc writes out dirty blocks in batches, one batch per dirty tag.
17368c31abSDavid du Colombier  * For example, the DirtyArena blocks are all written to disk before any of the
18368c31abSDavid du Colombier  * DirtyArenaCib blocks.
19368c31abSDavid du Colombier  *
20368c31abSDavid du Colombier  * This code used to be in charge of flushing the dirty index blocks out to
21368c31abSDavid du Colombier  * disk, but updating the index turned out to benefit from extra care.
22368c31abSDavid du Colombier  * Now cached index blocks are never marked dirty.  The index.c code takes
23368c31abSDavid du Colombier  * care of updating them behind our back, and uses _getdblock to update any
24368c31abSDavid du Colombier  * cached copies of the blocks as it changes them on disk.
25368c31abSDavid du Colombier  */
26368c31abSDavid du Colombier 
27368c31abSDavid du Colombier #include "stdinc.h"
28368c31abSDavid du Colombier #include "dat.h"
29368c31abSDavid du Colombier #include "fns.h"
30368c31abSDavid du Colombier 
31368c31abSDavid du Colombier typedef struct DCache	DCache;
32368c31abSDavid du Colombier 
33368c31abSDavid du Colombier enum
34368c31abSDavid du Colombier {
35368c31abSDavid du Colombier 	HashLog		= 9,
36368c31abSDavid du Colombier 	HashSize	= 1<<HashLog,
37368c31abSDavid du Colombier 	HashMask	= HashSize - 1,
38368c31abSDavid du Colombier };
39368c31abSDavid du Colombier 
40368c31abSDavid du Colombier struct DCache
41368c31abSDavid du Colombier {
42368c31abSDavid du Colombier 	QLock		lock;
43368c31abSDavid du Colombier 	RWLock		dirtylock;		/* must be held to inspect or set b->dirty */
44368c31abSDavid du Colombier 	Rendez		full;
45368c31abSDavid du Colombier 	Round		round;
46368c31abSDavid du Colombier 	DBlock		*free;			/* list of available lumps */
47368c31abSDavid du Colombier 	u32int		now;			/* ticks for usage timestamps */
48368c31abSDavid du Colombier 	int		size;			/* max. size of any block; allocated to each block */
49368c31abSDavid du Colombier 	DBlock		**heads;		/* hash table for finding address */
50368c31abSDavid du Colombier 	int		nheap;			/* number of available victims */
51368c31abSDavid du Colombier 	DBlock		**heap;			/* heap for locating victims */
52368c31abSDavid du Colombier 	int		nblocks;		/* number of blocks allocated */
53368c31abSDavid du Colombier 	DBlock		*blocks;		/* array of block descriptors */
54368c31abSDavid du Colombier 	DBlock		**write;		/* array of block pointers to be written */
55368c31abSDavid du Colombier 	u8int		*mem;			/* memory for all block descriptors */
56368c31abSDavid du Colombier 	int		ndirty;			/* number of dirty blocks */
57368c31abSDavid du Colombier 	int		maxdirty;		/* max. number of dirty blocks */
58368c31abSDavid du Colombier };
59368c31abSDavid du Colombier 
60368c31abSDavid du Colombier typedef struct Ra Ra;
61368c31abSDavid du Colombier struct Ra
62368c31abSDavid du Colombier {
63368c31abSDavid du Colombier 	Part *part;
64368c31abSDavid du Colombier 	u64int addr;
65368c31abSDavid du Colombier };
66368c31abSDavid du Colombier 
67368c31abSDavid du Colombier static DCache	dcache;
68368c31abSDavid du Colombier 
69368c31abSDavid du Colombier static int	downheap(int i, DBlock *b);
70368c31abSDavid du Colombier static int	upheap(int i, DBlock *b);
71368c31abSDavid du Colombier static DBlock	*bumpdblock(void);
72368c31abSDavid du Colombier static void	delheap(DBlock *db);
73368c31abSDavid du Colombier static void	fixheap(int i, DBlock *b);
74368c31abSDavid du Colombier static void	flushproc(void*);
75368c31abSDavid du Colombier static void	writeproc(void*);
76368c31abSDavid du Colombier 
77368c31abSDavid du Colombier void
initdcache(u32int mem)78368c31abSDavid du Colombier initdcache(u32int mem)
79368c31abSDavid du Colombier {
80368c31abSDavid du Colombier 	DBlock *b, *last;
81368c31abSDavid du Colombier 	u32int nblocks, blocksize;
82368c31abSDavid du Colombier 	int i;
83368c31abSDavid du Colombier 	u8int *p;
84368c31abSDavid du Colombier 
85368c31abSDavid du Colombier 	if(mem < maxblocksize * 2)
86368c31abSDavid du Colombier 		sysfatal("need at least %d bytes for the disk cache", maxblocksize * 2);
87368c31abSDavid du Colombier 	if(maxblocksize == 0)
88368c31abSDavid du Colombier 		sysfatal("no max. block size given for disk cache");
89368c31abSDavid du Colombier 	blocksize = maxblocksize;
90368c31abSDavid du Colombier 	nblocks = mem / blocksize;
91368c31abSDavid du Colombier 	dcache.full.l = &dcache.lock;
92368c31abSDavid du Colombier 	dcache.nblocks = nblocks;
93368c31abSDavid du Colombier 	dcache.maxdirty = (nblocks * 2) / 3;
94368c31abSDavid du Colombier 	trace(TraceProc, "initialize disk cache with %d blocks of %d bytes, maximum %d dirty blocks\n",
95368c31abSDavid du Colombier 			nblocks, blocksize, dcache.maxdirty);
96368c31abSDavid du Colombier 	dcache.size = blocksize;
97368c31abSDavid du Colombier 	dcache.heads = MKNZ(DBlock*, HashSize);
98368c31abSDavid du Colombier 	dcache.heap = MKNZ(DBlock*, nblocks);
99368c31abSDavid du Colombier 	dcache.blocks = MKNZ(DBlock, nblocks);
100368c31abSDavid du Colombier 	dcache.write = MKNZ(DBlock*, nblocks);
101368c31abSDavid du Colombier 	dcache.mem = MKNZ(u8int, (nblocks+1+128) * blocksize);
102368c31abSDavid du Colombier 
103368c31abSDavid du Colombier 	last = nil;
1041f9fb570SDavid du Colombier 	p = (u8int*)(((uintptr)dcache.mem+blocksize-1)&~(uintptr)(blocksize-1));
105368c31abSDavid du Colombier 	for(i = 0; i < nblocks; i++){
106368c31abSDavid du Colombier 		b = &dcache.blocks[i];
107368c31abSDavid du Colombier 		b->data = &p[i * blocksize];
108368c31abSDavid du Colombier 		b->heap = TWID32;
109368c31abSDavid du Colombier 		b->writedonechan = chancreate(sizeof(void*), 1);
110368c31abSDavid du Colombier 		b->next = last;
111368c31abSDavid du Colombier 		last = b;
112368c31abSDavid du Colombier 	}
113368c31abSDavid du Colombier 
114368c31abSDavid du Colombier 	dcache.free = last;
115368c31abSDavid du Colombier 	dcache.nheap = 0;
116368c31abSDavid du Colombier 	setstat(StatDcacheSize, nblocks);
117368c31abSDavid du Colombier 	initround(&dcache.round, "dcache", 120*1000);
118368c31abSDavid du Colombier 
119368c31abSDavid du Colombier 	vtproc(flushproc, nil);
120368c31abSDavid du Colombier 	vtproc(delaykickroundproc, &dcache.round);
121368c31abSDavid du Colombier }
122368c31abSDavid du Colombier 
123368c31abSDavid du Colombier static u32int
pbhash(u64int addr)124368c31abSDavid du Colombier pbhash(u64int addr)
125368c31abSDavid du Colombier {
126368c31abSDavid du Colombier 	u32int h;
127368c31abSDavid du Colombier 
128368c31abSDavid du Colombier #define hashit(c)	((((c) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
129368c31abSDavid du Colombier 	h = (addr >> 32) ^ addr;
130368c31abSDavid du Colombier 	return hashit(h);
131368c31abSDavid du Colombier }
132368c31abSDavid du Colombier 
133368c31abSDavid du Colombier DBlock*
getdblock(Part * part,u64int addr,int mode)134368c31abSDavid du Colombier getdblock(Part *part, u64int addr, int mode)
135368c31abSDavid du Colombier {
136368c31abSDavid du Colombier 	DBlock *b;
137368c31abSDavid du Colombier 
138368c31abSDavid du Colombier 	b = _getdblock(part, addr, mode, 1);
139368c31abSDavid du Colombier 	if(mode == OREAD || mode == ORDWR)
140368c31abSDavid du Colombier 		addstat(StatDcacheRead, 1);
141368c31abSDavid du Colombier 	if(mode == OWRITE || mode == ORDWR)
142368c31abSDavid du Colombier 		addstat(StatDcacheWrite, 1);
143368c31abSDavid du Colombier 	return b;
144368c31abSDavid du Colombier }
145368c31abSDavid du Colombier 
146368c31abSDavid du Colombier DBlock*
_getdblock(Part * part,u64int addr,int mode,int load)147368c31abSDavid du Colombier _getdblock(Part *part, u64int addr, int mode, int load)
148368c31abSDavid du Colombier {
149368c31abSDavid du Colombier 	DBlock *b;
150*23566e0cSDavid du Colombier 	u32int h, size, ms;
151368c31abSDavid du Colombier 
152*23566e0cSDavid du Colombier 	ms = 0;
153368c31abSDavid du Colombier 	trace(TraceBlock, "getdblock enter %s 0x%llux", part->name, addr);
154368c31abSDavid du Colombier 	size = part->blocksize;
155368c31abSDavid du Colombier 	if(size > dcache.size){
156368c31abSDavid du Colombier 		seterr(EAdmin, "block size %d too big for cache with size %d", size, dcache.size);
157*23566e0cSDavid du Colombier 		if(load)
158*23566e0cSDavid du Colombier 			addstat(StatDcacheLookup, 1);
159368c31abSDavid du Colombier 		return nil;
160368c31abSDavid du Colombier 	}
161368c31abSDavid du Colombier 	h = pbhash(addr);
162368c31abSDavid du Colombier 
163368c31abSDavid du Colombier 	/*
164368c31abSDavid du Colombier 	 * look for the block in the cache
165368c31abSDavid du Colombier 	 */
166368c31abSDavid du Colombier 	qlock(&dcache.lock);
167368c31abSDavid du Colombier again:
168368c31abSDavid du Colombier 	for(b = dcache.heads[h]; b != nil; b = b->next){
169368c31abSDavid du Colombier 		if(b->part == part && b->addr == addr){
170f9e1cf08SDavid du Colombier 			if(load)
171*23566e0cSDavid du Colombier 				addstat2(StatDcacheHit, 1, StatDcacheLookup, 1);
172368c31abSDavid du Colombier 			goto found;
173368c31abSDavid du Colombier 		}
174368c31abSDavid du Colombier 	}
175368c31abSDavid du Colombier 
176368c31abSDavid du Colombier 	/*
177368c31abSDavid du Colombier 	 * missed: locate the block with the oldest second to last use.
178368c31abSDavid du Colombier 	 * remove it from the heap, and fix up the heap.
179368c31abSDavid du Colombier 	 */
180368c31abSDavid du Colombier 	if(!load){
181368c31abSDavid du Colombier 		qunlock(&dcache.lock);
182368c31abSDavid du Colombier 		return nil;
183368c31abSDavid du Colombier 	}
184368c31abSDavid du Colombier 
185*23566e0cSDavid du Colombier 	/*
186*23566e0cSDavid du Colombier 	 * Only start timer here, on cache miss - calling msec() on plain cache hits
187*23566e0cSDavid du Colombier 	 * makes cache hits system-call bound.
188*23566e0cSDavid du Colombier 	 */
189*23566e0cSDavid du Colombier 	ms = msec();
190*23566e0cSDavid du Colombier 	addstat2(StatDcacheLookup, 1, StatDcacheMiss, 1);
191368c31abSDavid du Colombier 
192368c31abSDavid du Colombier 	b = bumpdblock();
193368c31abSDavid du Colombier 	if(b == nil){
194368c31abSDavid du Colombier 		trace(TraceBlock, "all disk cache blocks in use");
195368c31abSDavid du Colombier 		addstat(StatDcacheStall, 1);
196368c31abSDavid du Colombier 		rsleep(&dcache.full);
197368c31abSDavid du Colombier 		addstat(StatDcacheStall, -1);
198368c31abSDavid du Colombier 		goto again;
199368c31abSDavid du Colombier 	}
200368c31abSDavid du Colombier 
201368c31abSDavid du Colombier 	assert(!b->dirty);
202368c31abSDavid du Colombier 
203368c31abSDavid du Colombier 	/*
204368c31abSDavid du Colombier 	 * the new block has no last use, so assume it happens sometime in the middle
205368c31abSDavid du Colombier ZZZ this is not reasonable
206368c31abSDavid du Colombier 	 */
207368c31abSDavid du Colombier 	b->used = (b->used2 + dcache.now) / 2;
208368c31abSDavid du Colombier 
209368c31abSDavid du Colombier 	/*
210368c31abSDavid du Colombier 	 * rechain the block on the correct hash chain
211368c31abSDavid du Colombier 	 */
212368c31abSDavid du Colombier 	b->next = dcache.heads[h];
213368c31abSDavid du Colombier 	dcache.heads[h] = b;
214368c31abSDavid du Colombier 	if(b->next != nil)
215368c31abSDavid du Colombier 		b->next->prev = b;
216368c31abSDavid du Colombier 	b->prev = nil;
217368c31abSDavid du Colombier 
218368c31abSDavid du Colombier 	b->addr = addr;
219368c31abSDavid du Colombier 	b->part = part;
220368c31abSDavid du Colombier 	b->size = 0;
221368c31abSDavid du Colombier 
222368c31abSDavid du Colombier found:
223368c31abSDavid du Colombier 	b->ref++;
224368c31abSDavid du Colombier 	b->used2 = b->used;
225368c31abSDavid du Colombier 	b->used = dcache.now++;
226368c31abSDavid du Colombier 	if(b->heap != TWID32)
227368c31abSDavid du Colombier 		fixheap(b->heap, b);
228368c31abSDavid du Colombier 
229493edcedSDavid du Colombier 	if((mode == ORDWR || mode == OWRITE) && part->writechan == nil){
230493edcedSDavid du Colombier 		trace(TraceBlock, "getdblock allocwriteproc %s", part->name);
231493edcedSDavid du Colombier 		part->writechan = chancreate(sizeof(DBlock*), dcache.nblocks);
232493edcedSDavid du Colombier 		vtproc(writeproc, part);
233493edcedSDavid du Colombier 	}
234368c31abSDavid du Colombier 	qunlock(&dcache.lock);
235368c31abSDavid du Colombier 
236368c31abSDavid du Colombier 	trace(TraceBlock, "getdblock lock");
237368c31abSDavid du Colombier 	addstat(StatDblockStall, 1);
238368c31abSDavid du Colombier 	if(mode == OREAD)
239368c31abSDavid du Colombier 		rlock(&b->lock);
240368c31abSDavid du Colombier 	else
241368c31abSDavid du Colombier 		wlock(&b->lock);
242368c31abSDavid du Colombier 	addstat(StatDblockStall, -1);
243368c31abSDavid du Colombier 	trace(TraceBlock, "getdblock locked");
244368c31abSDavid du Colombier 
245368c31abSDavid du Colombier 	if(b->size != size){
246368c31abSDavid du Colombier 		if(mode == OREAD){
247368c31abSDavid du Colombier 			addstat(StatDblockStall, 1);
248368c31abSDavid du Colombier 			runlock(&b->lock);
249368c31abSDavid du Colombier 			wlock(&b->lock);
250368c31abSDavid du Colombier 			addstat(StatDblockStall, -1);
251368c31abSDavid du Colombier 		}
252368c31abSDavid du Colombier 		if(b->size < size){
253368c31abSDavid du Colombier 			if(mode == OWRITE)
254368c31abSDavid du Colombier 				memset(&b->data[b->size], 0, size - b->size);
255368c31abSDavid du Colombier 			else{
256368c31abSDavid du Colombier 				trace(TraceBlock, "getdblock readpart %s 0x%llux", part->name, addr);
257f9e1cf08SDavid du Colombier 				diskaccess(0);
258f9e1cf08SDavid du Colombier 				if(readpart(part, addr + b->size, &b->data[b->size], size - b->size) < 0){
259368c31abSDavid du Colombier 					b->mode = ORDWR;	/* so putdblock wunlocks */
260368c31abSDavid du Colombier 					putdblock(b);
261368c31abSDavid du Colombier 					return nil;
262368c31abSDavid du Colombier 				}
263368c31abSDavid du Colombier 				trace(TraceBlock, "getdblock readpartdone");
264368c31abSDavid du Colombier 				addstat(StatApartRead, 1);
265368c31abSDavid du Colombier 				addstat(StatApartReadBytes, size-b->size);
266368c31abSDavid du Colombier 			}
267368c31abSDavid du Colombier 		}
268368c31abSDavid du Colombier 		b->size = size;
269368c31abSDavid du Colombier 		if(mode == OREAD){
270368c31abSDavid du Colombier 			addstat(StatDblockStall, 1);
271368c31abSDavid du Colombier 			wunlock(&b->lock);
272368c31abSDavid du Colombier 			rlock(&b->lock);
273368c31abSDavid du Colombier 			addstat(StatDblockStall, -1);
274368c31abSDavid du Colombier 		}
275368c31abSDavid du Colombier 	}
276368c31abSDavid du Colombier 
277368c31abSDavid du Colombier 	b->mode = mode;
278368c31abSDavid du Colombier 	trace(TraceBlock, "getdblock exit");
279*23566e0cSDavid du Colombier 	if(ms)
280*23566e0cSDavid du Colombier 		addstat(StatDcacheLookupTime, msec() - ms);
281368c31abSDavid du Colombier 	return b;
282368c31abSDavid du Colombier }
283368c31abSDavid du Colombier 
284368c31abSDavid du Colombier void
putdblock(DBlock * b)285368c31abSDavid du Colombier putdblock(DBlock *b)
286368c31abSDavid du Colombier {
287368c31abSDavid du Colombier 	if(b == nil)
288368c31abSDavid du Colombier 		return;
289368c31abSDavid du Colombier 
290368c31abSDavid du Colombier 	trace(TraceBlock, "putdblock %s 0x%llux", b->part->name, b->addr);
291368c31abSDavid du Colombier 
292368c31abSDavid du Colombier 	if(b->mode == OREAD)
293368c31abSDavid du Colombier 		runlock(&b->lock);
294368c31abSDavid du Colombier 	else
295368c31abSDavid du Colombier 		wunlock(&b->lock);
296368c31abSDavid du Colombier 
297368c31abSDavid du Colombier 	qlock(&dcache.lock);
298368c31abSDavid du Colombier 	if(--b->ref == 0 && !b->dirty){
299368c31abSDavid du Colombier 		if(b->heap == TWID32)
300368c31abSDavid du Colombier 			upheap(dcache.nheap++, b);
301368c31abSDavid du Colombier 		rwakeupall(&dcache.full);
302368c31abSDavid du Colombier 	}
303368c31abSDavid du Colombier 	qunlock(&dcache.lock);
304368c31abSDavid du Colombier }
305368c31abSDavid du Colombier 
306368c31abSDavid du Colombier void
dirtydblock(DBlock * b,int dirty)307368c31abSDavid du Colombier dirtydblock(DBlock *b, int dirty)
308368c31abSDavid du Colombier {
309368c31abSDavid du Colombier 	int odirty;
310368c31abSDavid du Colombier 
31129e26a97SDavid du Colombier 	trace(TraceBlock, "dirtydblock enter %s 0x%llux %d from 0x%lux",
31229e26a97SDavid du Colombier 		b->part->name, b->addr, dirty, getcallerpc(&b));
313368c31abSDavid du Colombier 	assert(b->ref != 0);
314368c31abSDavid du Colombier 	assert(b->mode==ORDWR || b->mode==OWRITE);
315368c31abSDavid du Colombier 
316368c31abSDavid du Colombier 	odirty = b->dirty;
317368c31abSDavid du Colombier 	if(b->dirty)
318368c31abSDavid du Colombier 		assert(b->dirty == dirty);
319368c31abSDavid du Colombier 	else
320368c31abSDavid du Colombier 		b->dirty = dirty;
321368c31abSDavid du Colombier 
322368c31abSDavid du Colombier 	qlock(&dcache.lock);
323368c31abSDavid du Colombier 	if(!odirty){
324368c31abSDavid du Colombier 		dcache.ndirty++;
325368c31abSDavid du Colombier 		setstat(StatDcacheDirty, dcache.ndirty);
326368c31abSDavid du Colombier 		if(dcache.ndirty >= dcache.maxdirty)
327368c31abSDavid du Colombier 			kickround(&dcache.round, 0);
328368c31abSDavid du Colombier 		else
329368c31abSDavid du Colombier 			delaykickround(&dcache.round);
330368c31abSDavid du Colombier 	}
331368c31abSDavid du Colombier 	qunlock(&dcache.lock);
332368c31abSDavid du Colombier }
333368c31abSDavid du Colombier 
334368c31abSDavid du Colombier static void
unchain(DBlock * b)335368c31abSDavid du Colombier unchain(DBlock *b)
336368c31abSDavid du Colombier {
337368c31abSDavid du Colombier 	ulong h;
338368c31abSDavid du Colombier 
339368c31abSDavid du Colombier 	/*
340368c31abSDavid du Colombier 	 * unchain the block
341368c31abSDavid du Colombier 	 */
342368c31abSDavid du Colombier 	if(b->prev == nil){
343368c31abSDavid du Colombier 		h = pbhash(b->addr);
344368c31abSDavid du Colombier 		if(dcache.heads[h] != b)
345368c31abSDavid du Colombier 			sysfatal("bad hash chains in disk cache");
346368c31abSDavid du Colombier 		dcache.heads[h] = b->next;
347368c31abSDavid du Colombier 	}else
348368c31abSDavid du Colombier 		b->prev->next = b->next;
349368c31abSDavid du Colombier 	if(b->next != nil)
350368c31abSDavid du Colombier 		b->next->prev = b->prev;
351368c31abSDavid du Colombier }
352368c31abSDavid du Colombier 
353368c31abSDavid du Colombier /*
354368c31abSDavid du Colombier  * remove some block from use and update the free list and counters
355368c31abSDavid du Colombier  */
356368c31abSDavid du Colombier static DBlock*
bumpdblock(void)357368c31abSDavid du Colombier bumpdblock(void)
358368c31abSDavid du Colombier {
359368c31abSDavid du Colombier 	DBlock *b;
360368c31abSDavid du Colombier 
361368c31abSDavid du Colombier 	trace(TraceBlock, "bumpdblock enter");
362368c31abSDavid du Colombier 	b = dcache.free;
363368c31abSDavid du Colombier 	if(b != nil){
364368c31abSDavid du Colombier 		dcache.free = b->next;
365368c31abSDavid du Colombier 		return b;
366368c31abSDavid du Colombier 	}
367368c31abSDavid du Colombier 
368368c31abSDavid du Colombier 	if(dcache.ndirty >= dcache.maxdirty)
369368c31abSDavid du Colombier 		kickdcache();
370368c31abSDavid du Colombier 
371368c31abSDavid du Colombier 	/*
372368c31abSDavid du Colombier 	 * remove blocks until we find one that is unused
373368c31abSDavid du Colombier 	 * referenced blocks are left in the heap even though
374368c31abSDavid du Colombier 	 * they can't be scavenged; this is simple a speed optimization
375368c31abSDavid du Colombier 	 */
376368c31abSDavid du Colombier 	for(;;){
377368c31abSDavid du Colombier 		if(dcache.nheap == 0){
378368c31abSDavid du Colombier 			kickdcache();
379368c31abSDavid du Colombier 			trace(TraceBlock, "bumpdblock gotnothing");
380368c31abSDavid du Colombier 			return nil;
381368c31abSDavid du Colombier 		}
382368c31abSDavid du Colombier 		b = dcache.heap[0];
383368c31abSDavid du Colombier 		delheap(b);
384368c31abSDavid du Colombier 		if(!b->ref && !b->dirty)
385368c31abSDavid du Colombier 			break;
386368c31abSDavid du Colombier 	}
387368c31abSDavid du Colombier 
388368c31abSDavid du Colombier 	trace(TraceBlock, "bumpdblock bumping %s 0x%llux", b->part->name, b->addr);
389368c31abSDavid du Colombier 
390368c31abSDavid du Colombier 	unchain(b);
391368c31abSDavid du Colombier 	return b;
392368c31abSDavid du Colombier }
393368c31abSDavid du Colombier 
394368c31abSDavid du Colombier void
emptydcache(void)395368c31abSDavid du Colombier emptydcache(void)
396368c31abSDavid du Colombier {
397368c31abSDavid du Colombier 	DBlock *b;
398368c31abSDavid du Colombier 
399368c31abSDavid du Colombier 	qlock(&dcache.lock);
400368c31abSDavid du Colombier 	while(dcache.nheap > 0){
401368c31abSDavid du Colombier 		b = dcache.heap[0];
402368c31abSDavid du Colombier 		delheap(b);
403368c31abSDavid du Colombier 		if(!b->ref && !b->dirty){
404368c31abSDavid du Colombier 			unchain(b);
405368c31abSDavid du Colombier 			b->next = dcache.free;
406368c31abSDavid du Colombier 			dcache.free = b;
407368c31abSDavid du Colombier 		}
408368c31abSDavid du Colombier 	}
409368c31abSDavid du Colombier 	qunlock(&dcache.lock);
410368c31abSDavid du Colombier }
411368c31abSDavid du Colombier 
412368c31abSDavid du Colombier /*
413368c31abSDavid du Colombier  * delete an arbitrary block from the heap
414368c31abSDavid du Colombier  */
415368c31abSDavid du Colombier static void
delheap(DBlock * db)416368c31abSDavid du Colombier delheap(DBlock *db)
417368c31abSDavid du Colombier {
418368c31abSDavid du Colombier 	if(db->heap == TWID32)
419368c31abSDavid du Colombier 		return;
420368c31abSDavid du Colombier 	fixheap(db->heap, dcache.heap[--dcache.nheap]);
421368c31abSDavid du Colombier 	db->heap = TWID32;
422368c31abSDavid du Colombier }
423368c31abSDavid du Colombier 
424368c31abSDavid du Colombier /*
425368c31abSDavid du Colombier  * push an element up or down to it's correct new location
426368c31abSDavid du Colombier  */
427368c31abSDavid du Colombier static void
fixheap(int i,DBlock * b)428368c31abSDavid du Colombier fixheap(int i, DBlock *b)
429368c31abSDavid du Colombier {
430368c31abSDavid du Colombier 	if(upheap(i, b) == i)
431368c31abSDavid du Colombier 		downheap(i, b);
432368c31abSDavid du Colombier }
433368c31abSDavid du Colombier 
434368c31abSDavid du Colombier static int
upheap(int i,DBlock * b)435368c31abSDavid du Colombier upheap(int i, DBlock *b)
436368c31abSDavid du Colombier {
437368c31abSDavid du Colombier 	DBlock *bb;
438368c31abSDavid du Colombier 	u32int now;
439368c31abSDavid du Colombier 	int p;
440368c31abSDavid du Colombier 
441368c31abSDavid du Colombier 	now = dcache.now;
442368c31abSDavid du Colombier 	for(; i != 0; i = p){
443368c31abSDavid du Colombier 		p = (i - 1) >> 1;
444368c31abSDavid du Colombier 		bb = dcache.heap[p];
445368c31abSDavid du Colombier 		if(b->used2 - now >= bb->used2 - now)
446368c31abSDavid du Colombier 			break;
447368c31abSDavid du Colombier 		dcache.heap[i] = bb;
448368c31abSDavid du Colombier 		bb->heap = i;
449368c31abSDavid du Colombier 	}
450368c31abSDavid du Colombier 
451368c31abSDavid du Colombier 	dcache.heap[i] = b;
452368c31abSDavid du Colombier 	b->heap = i;
453368c31abSDavid du Colombier 	return i;
454368c31abSDavid du Colombier }
455368c31abSDavid du Colombier 
456368c31abSDavid du Colombier static int
downheap(int i,DBlock * b)457368c31abSDavid du Colombier downheap(int i, DBlock *b)
458368c31abSDavid du Colombier {
459368c31abSDavid du Colombier 	DBlock *bb;
460368c31abSDavid du Colombier 	u32int now;
461368c31abSDavid du Colombier 	int k;
462368c31abSDavid du Colombier 
463368c31abSDavid du Colombier 	now = dcache.now;
464368c31abSDavid du Colombier 	for(; ; i = k){
465368c31abSDavid du Colombier 		k = (i << 1) + 1;
466368c31abSDavid du Colombier 		if(k >= dcache.nheap)
467368c31abSDavid du Colombier 			break;
468368c31abSDavid du Colombier 		if(k + 1 < dcache.nheap && dcache.heap[k]->used2 - now > dcache.heap[k + 1]->used2 - now)
469368c31abSDavid du Colombier 			k++;
470368c31abSDavid du Colombier 		bb = dcache.heap[k];
471368c31abSDavid du Colombier 		if(b->used2 - now <= bb->used2 - now)
472368c31abSDavid du Colombier 			break;
473368c31abSDavid du Colombier 		dcache.heap[i] = bb;
474368c31abSDavid du Colombier 		bb->heap = i;
475368c31abSDavid du Colombier 	}
476368c31abSDavid du Colombier 
477368c31abSDavid du Colombier 	dcache.heap[i] = b;
478368c31abSDavid du Colombier 	b->heap = i;
479368c31abSDavid du Colombier 	return i;
480368c31abSDavid du Colombier }
481368c31abSDavid du Colombier 
482368c31abSDavid du Colombier static void
findblock(DBlock * bb)483368c31abSDavid du Colombier findblock(DBlock *bb)
484368c31abSDavid du Colombier {
485368c31abSDavid du Colombier 	DBlock *b, *last;
486368c31abSDavid du Colombier 	int h;
487368c31abSDavid du Colombier 
488368c31abSDavid du Colombier 	last = nil;
489368c31abSDavid du Colombier 	h = pbhash(bb->addr);
490368c31abSDavid du Colombier 	for(b = dcache.heads[h]; b != nil; b = b->next){
491368c31abSDavid du Colombier 		if(last != b->prev)
492368c31abSDavid du Colombier 			sysfatal("bad prev link");
493368c31abSDavid du Colombier 		if(b == bb)
494368c31abSDavid du Colombier 			return;
495368c31abSDavid du Colombier 		last = b;
496368c31abSDavid du Colombier 	}
497368c31abSDavid du Colombier 	sysfatal("block missing from hash table");
498368c31abSDavid du Colombier }
499368c31abSDavid du Colombier 
500368c31abSDavid du Colombier void
checkdcache(void)501368c31abSDavid du Colombier checkdcache(void)
502368c31abSDavid du Colombier {
503368c31abSDavid du Colombier 	DBlock *b;
504368c31abSDavid du Colombier 	u32int size, now;
505368c31abSDavid du Colombier 	int i, k, refed, nfree;
506368c31abSDavid du Colombier 
507368c31abSDavid du Colombier 	qlock(&dcache.lock);
508368c31abSDavid du Colombier 	size = dcache.size;
509368c31abSDavid du Colombier 	now = dcache.now;
510368c31abSDavid du Colombier 	for(i = 0; i < dcache.nheap; i++){
511368c31abSDavid du Colombier 		if(dcache.heap[i]->heap != i)
512368c31abSDavid du Colombier 			sysfatal("dc: mis-heaped at %d: %d", i, dcache.heap[i]->heap);
513368c31abSDavid du Colombier 		if(i > 0 && dcache.heap[(i - 1) >> 1]->used2 - now > dcache.heap[i]->used2 - now)
514368c31abSDavid du Colombier 			sysfatal("dc: bad heap ordering");
515368c31abSDavid du Colombier 		k = (i << 1) + 1;
516368c31abSDavid du Colombier 		if(k < dcache.nheap && dcache.heap[i]->used2 - now > dcache.heap[k]->used2 - now)
517368c31abSDavid du Colombier 			sysfatal("dc: bad heap ordering");
518368c31abSDavid du Colombier 		k++;
519368c31abSDavid du Colombier 		if(k < dcache.nheap && dcache.heap[i]->used2 - now > dcache.heap[k]->used2 - now)
520368c31abSDavid du Colombier 			sysfatal("dc: bad heap ordering");
521368c31abSDavid du Colombier 	}
522368c31abSDavid du Colombier 
523368c31abSDavid du Colombier 	refed = 0;
524368c31abSDavid du Colombier 	for(i = 0; i < dcache.nblocks; i++){
525368c31abSDavid du Colombier 		b = &dcache.blocks[i];
526368c31abSDavid du Colombier 		if(b->data != &dcache.mem[i * size])
527368c31abSDavid du Colombier 			sysfatal("dc: mis-blocked at %d", i);
528368c31abSDavid du Colombier 		if(b->ref && b->heap == TWID32)
529368c31abSDavid du Colombier 			refed++;
530368c31abSDavid du Colombier 		if(b->addr)
531368c31abSDavid du Colombier 			findblock(b);
532368c31abSDavid du Colombier 		if(b->heap != TWID32
533368c31abSDavid du Colombier 		&& dcache.heap[b->heap] != b)
534368c31abSDavid du Colombier 			sysfatal("dc: spurious heap value");
535368c31abSDavid du Colombier 	}
536368c31abSDavid du Colombier 
537368c31abSDavid du Colombier 	nfree = 0;
538368c31abSDavid du Colombier 	for(b = dcache.free; b != nil; b = b->next){
539368c31abSDavid du Colombier 		if(b->addr != 0 || b->heap != TWID32)
540368c31abSDavid du Colombier 			sysfatal("dc: bad free list");
541368c31abSDavid du Colombier 		nfree++;
542368c31abSDavid du Colombier 	}
543368c31abSDavid du Colombier 
544368c31abSDavid du Colombier 	if(dcache.nheap + nfree + refed != dcache.nblocks)
545368c31abSDavid du Colombier 		sysfatal("dc: missing blocks: %d %d %d", dcache.nheap, refed, dcache.nblocks);
546368c31abSDavid du Colombier 	qunlock(&dcache.lock);
547368c31abSDavid du Colombier }
548368c31abSDavid du Colombier 
549368c31abSDavid du Colombier void
flushdcache(void)550368c31abSDavid du Colombier flushdcache(void)
551368c31abSDavid du Colombier {
552368c31abSDavid du Colombier 	trace(TraceProc, "flushdcache enter");
553368c31abSDavid du Colombier 	kickround(&dcache.round, 1);
554368c31abSDavid du Colombier 	trace(TraceProc, "flushdcache exit");
555368c31abSDavid du Colombier }
556368c31abSDavid du Colombier 
557368c31abSDavid du Colombier void
kickdcache(void)558368c31abSDavid du Colombier kickdcache(void)
559368c31abSDavid du Colombier {
560368c31abSDavid du Colombier 	kickround(&dcache.round, 0);
561368c31abSDavid du Colombier }
562368c31abSDavid du Colombier 
563368c31abSDavid du Colombier static int
parallelwrites(DBlock ** b,DBlock ** eb,int dirty)564368c31abSDavid du Colombier parallelwrites(DBlock **b, DBlock **eb, int dirty)
565368c31abSDavid du Colombier {
566368c31abSDavid du Colombier 	DBlock **p, **q;
567368c31abSDavid du Colombier 	Part *part;
568368c31abSDavid du Colombier 
569368c31abSDavid du Colombier 	for(p=b; p<eb && (*p)->dirty == dirty; p++){
570368c31abSDavid du Colombier 		assert(b<=p && p<eb);
571368c31abSDavid du Colombier 		sendp((*p)->part->writechan, *p);
572368c31abSDavid du Colombier 	}
573368c31abSDavid du Colombier 	q = p;
574368c31abSDavid du Colombier 	for(p=b; p<q; p++){
575368c31abSDavid du Colombier 		assert(b<=p && p<eb);
576368c31abSDavid du Colombier 		recvp((*p)->writedonechan);
577368c31abSDavid du Colombier 	}
578368c31abSDavid du Colombier 
579368c31abSDavid du Colombier 	/*
580368c31abSDavid du Colombier 	 * Flush the partitions that have been written to.
581368c31abSDavid du Colombier 	 */
582368c31abSDavid du Colombier 	part = nil;
583368c31abSDavid du Colombier 	for(p=b; p<q; p++){
584368c31abSDavid du Colombier 		if(part != (*p)->part){
585368c31abSDavid du Colombier 			part = (*p)->part;
586368c31abSDavid du Colombier 			flushpart(part);	/* what if it fails? */
587368c31abSDavid du Colombier 		}
588368c31abSDavid du Colombier 	}
589368c31abSDavid du Colombier 
590368c31abSDavid du Colombier 	return p-b;
591368c31abSDavid du Colombier }
592368c31abSDavid du Colombier 
593368c31abSDavid du Colombier /*
594368c31abSDavid du Colombier  * Sort first by dirty flag, then by partition, then by address in partition.
595368c31abSDavid du Colombier  */
596368c31abSDavid du Colombier static int
writeblockcmp(const void * va,const void * vb)597368c31abSDavid du Colombier writeblockcmp(const void *va, const void *vb)
598368c31abSDavid du Colombier {
599368c31abSDavid du Colombier 	DBlock *a, *b;
600368c31abSDavid du Colombier 
601368c31abSDavid du Colombier 	a = *(DBlock**)va;
602368c31abSDavid du Colombier 	b = *(DBlock**)vb;
603368c31abSDavid du Colombier 
604368c31abSDavid du Colombier 	if(a->dirty != b->dirty)
605368c31abSDavid du Colombier 		return a->dirty - b->dirty;
606368c31abSDavid du Colombier 	if(a->part != b->part){
607368c31abSDavid du Colombier 		if(a->part < b->part)
608368c31abSDavid du Colombier 			return -1;
609368c31abSDavid du Colombier 		if(a->part > b->part)
610368c31abSDavid du Colombier 			return 1;
611368c31abSDavid du Colombier 	}
612368c31abSDavid du Colombier 	if(a->addr < b->addr)
613368c31abSDavid du Colombier 		return -1;
614368c31abSDavid du Colombier 	return 1;
615368c31abSDavid du Colombier }
616368c31abSDavid du Colombier 
617368c31abSDavid du Colombier static void
flushproc(void * v)618368c31abSDavid du Colombier flushproc(void *v)
619368c31abSDavid du Colombier {
620368c31abSDavid du Colombier 	int i, j, n;
621368c31abSDavid du Colombier 	ulong t0;
622368c31abSDavid du Colombier 	DBlock *b, **write;
623368c31abSDavid du Colombier 
624368c31abSDavid du Colombier 	USED(v);
625368c31abSDavid du Colombier 	threadsetname("flushproc");
626368c31abSDavid du Colombier 	for(;;){
627368c31abSDavid du Colombier 		waitforkick(&dcache.round);
628368c31abSDavid du Colombier 
629368c31abSDavid du Colombier 		trace(TraceWork, "start");
630493edcedSDavid du Colombier 		t0 = nsec()/1000;
631493edcedSDavid du Colombier 		trace(TraceProc, "build t=%lud", (ulong)(nsec()/1000)-t0);
632493edcedSDavid du Colombier 
633368c31abSDavid du Colombier 		write = dcache.write;
634368c31abSDavid du Colombier 		n = 0;
635368c31abSDavid du Colombier 		for(i=0; i<dcache.nblocks; i++){
636368c31abSDavid du Colombier 			b = &dcache.blocks[i];
637368c31abSDavid du Colombier 			if(b->dirty)
638368c31abSDavid du Colombier 				write[n++] = b;
639368c31abSDavid du Colombier 		}
640368c31abSDavid du Colombier 
641368c31abSDavid du Colombier 		qsort(write, n, sizeof(write[0]), writeblockcmp);
642368c31abSDavid du Colombier 
643368c31abSDavid du Colombier 		/* Write each stage of blocks out. */
644368c31abSDavid du Colombier 		trace(TraceProc, "writeblocks t=%lud", (ulong)(nsec()/1000)-t0);
645368c31abSDavid du Colombier 		i = 0;
646368c31abSDavid du Colombier 		for(j=1; j<DirtyMax; j++){
64729e26a97SDavid du Colombier 			trace(TraceProc, "writeblocks.%d t=%lud",
64829e26a97SDavid du Colombier 				j, (ulong)(nsec()/1000)-t0);
649368c31abSDavid du Colombier 			i += parallelwrites(write+i, write+n, j);
650368c31abSDavid du Colombier 		}
651368c31abSDavid du Colombier 		if(i != n){
652368c31abSDavid du Colombier 			fprint(2, "in flushproc i=%d n=%d\n", i, n);
653368c31abSDavid du Colombier 			for(i=0; i<n; i++)
65429e26a97SDavid du Colombier 				fprint(2, "\tblock %d: dirty=%d\n",
65529e26a97SDavid du Colombier 					i, write[i]->dirty);
656368c31abSDavid du Colombier 			abort();
657368c31abSDavid du Colombier 		}
658368c31abSDavid du Colombier 
65929e26a97SDavid du Colombier 		/*
660493edcedSDavid du Colombier 		 * b->dirty is protected by b->lock while ndirty is protected
661493edcedSDavid du Colombier 		 * by dcache.lock, so the --ndirty below is the delayed one
662493edcedSDavid du Colombier 		 * from clearing b->dirty in the write proc.  It may happen
663493edcedSDavid du Colombier 		 * that some other proc has come along and redirtied b since
664493edcedSDavid du Colombier 		 * the write.  That's okay, it just means that ndirty may be
665493edcedSDavid du Colombier 		 * one too high until we catch up and do the decrement.
666368c31abSDavid du Colombier 		 */
667368c31abSDavid du Colombier 		trace(TraceProc, "undirty.%d t=%lud", j, (ulong)(nsec()/1000)-t0);
668368c31abSDavid du Colombier 		qlock(&dcache.lock);
669368c31abSDavid du Colombier 		for(i=0; i<n; i++){
670368c31abSDavid du Colombier 			b = write[i];
671368c31abSDavid du Colombier 			--dcache.ndirty;
672368c31abSDavid du Colombier 			if(b->ref == 0 && b->heap == TWID32){
673368c31abSDavid du Colombier 				upheap(dcache.nheap++, b);
674368c31abSDavid du Colombier 				rwakeupall(&dcache.full);
675368c31abSDavid du Colombier 			}
676368c31abSDavid du Colombier 		}
677368c31abSDavid du Colombier 		setstat(StatDcacheDirty, dcache.ndirty);
678368c31abSDavid du Colombier 		qunlock(&dcache.lock);
679368c31abSDavid du Colombier 		addstat(StatDcacheFlush, 1);
680368c31abSDavid du Colombier 		trace(TraceWork, "finish");
681368c31abSDavid du Colombier 	}
682368c31abSDavid du Colombier }
683368c31abSDavid du Colombier 
684368c31abSDavid du Colombier static void
writeproc(void * v)685368c31abSDavid du Colombier writeproc(void *v)
686368c31abSDavid du Colombier {
687368c31abSDavid du Colombier 	DBlock *b;
688368c31abSDavid du Colombier 	Part *p;
689368c31abSDavid du Colombier 
690368c31abSDavid du Colombier 	p = v;
691368c31abSDavid du Colombier 
692368c31abSDavid du Colombier 	threadsetname("writeproc:%s", p->name);
693368c31abSDavid du Colombier 	for(;;){
694368c31abSDavid du Colombier 		b = recvp(p->writechan);
695368c31abSDavid du Colombier 		trace(TraceWork, "start");
696368c31abSDavid du Colombier 		assert(b->part == p);
697368c31abSDavid du Colombier 		trace(TraceProc, "wlock %s 0x%llux", p->name, b->addr);
698368c31abSDavid du Colombier 		wlock(&b->lock);
699368c31abSDavid du Colombier 		trace(TraceProc, "writepart %s 0x%llux", p->name, b->addr);
700368c31abSDavid du Colombier 		diskaccess(0);
701368c31abSDavid du Colombier 		if(writepart(p, b->addr, b->data, b->size) < 0)
70229e26a97SDavid du Colombier 			fprint(2, "%s: writeproc: part %s addr 0x%llux: write error: %r\n",
70329e26a97SDavid du Colombier 				argv0, p->name, b->addr);
704368c31abSDavid du Colombier 		addstat(StatApartWrite, 1);
705368c31abSDavid du Colombier 		addstat(StatApartWriteBytes, b->size);
706368c31abSDavid du Colombier 		b->dirty = 0;
707368c31abSDavid du Colombier 		wunlock(&b->lock);
708368c31abSDavid du Colombier 		trace(TraceProc, "finish %s 0x%llux", p->name, b->addr);
709368c31abSDavid du Colombier 		trace(TraceWork, "finish");
710368c31abSDavid du Colombier 		sendp(b->writedonechan, b);
711368c31abSDavid du Colombier 	}
712368c31abSDavid du Colombier }
713