xref: /inferno-os/liblogfs/write.c (revision 46439007cf417cbd9ac8049bb4122c890097a0fa)
1 #include "lib9.h"
2 #include "logfs.h"
3 #include "local.h"
4 
5 typedef struct AllocState {
6 	long oldblock;
7 	int markbad;
8 } AllocState;
9 
10 u32int
11 logfsdatapagemask(int pages, int base)
12 {
13 	if(pages == 32)
14 		return 0xffffffff;
15 	return (((u32int)1 << pages) - 1) << (32 - base - pages);
16 }
17 
18 static u32int
19 fastgap(u32int w, u32int n)
20 {
21 	u32int s;
22 //print("fastgap(0x%.8ux, %d)\n", w, n);
23 	if(w == 0 || n < 1 || n > 32)
24 		return 0;
25 /*
26 #	unroll the following loop 5 times:
27 #		while(n > 1){
28 #			s := n >> 1;
29 #			w &= w<<s;
30 #			n -= s;
31 #		}
32 */
33 	s = n >> 1;
34 	w &= w << s;
35 	n -= s;
36 	s = n >> 1;
37 	w &= w << s;
38 	n -= s;
39 	s = n >> 1;
40 	w &= w << s;
41 	n -= s;
42 	s = n >> 1;
43 	w &= w << s;
44 	n -= s;
45 	s = n >> 1;
46 	return w & (w << s);
47 }
48 
49 static u32int
50 page0gap(u32int w, u32int n)
51 {
52 	int p;
53 	for(p = 1; p <= n; p++) {
54 		u32int m = logfsdatapagemask(p, 0);
55 		if((w & m) != m)
56 			return logfsdatapagemask(p - 1, 0);
57 	}
58 	return 0;
59 }
60 
61 int
62 nlz(u32int x)
63 {
64 	int n, c;
65 	if(x == 0)
66 		return 32;
67 	if(x & 0x80000000)
68 		return (~x >> 26) & 0x20;
69 	n = 32;
70 	c = 16;
71 	do {
72 		u32int y;
73 		y = x >> c;
74 		if(y != 0) {
75 			n -= c;
76 			x = y;
77 		}
78 	} while((c >>= 1) != 0);
79 	return n - x;
80 }
81 
82 static u32int
83 findgap(u32int w, u32int n)
84 {
85 	u32int m;
86 	do {
87 		m  = fastgap(w, n);
88 		if(m)
89 			break;
90 		n--;
91 	} while(n);
92 	if(n == 0)
93 		return 0;
94 	return logfsdatapagemask(n, nlz(m));
95 }
96 
97 static int
98 bitcount(ulong mask)
99 {
100 	ulong m;
101 	int rv;
102 	for(rv = 0, m = 0x80000000; m; m >>= 1)
103 		if(mask & m)
104 			rv++;
105 	return rv;
106 }
107 
108 static char *
109 allocdatapages(LogfsServer *server, u32int count, int *countp, long *blockindexp, int *pagep, u32int *flashaddr, AllocState *state)
110 {
111 	LogfsLowLevel *ll = server->ll;
112 	long b, blockindex;
113 	DataBlock *db;
114 	int pagebase;
115 	u32int pages = (count + (1 << ll->l2pagesize) - 1) >> ll->l2pagesize;
116 	u32int gapmask;
117 	long bestfreeblockindex;
118 	int bestfree;
119 	int pagesperblock = 1 << ll->l2pagesperblock;
120 	int apages;
121 	char *errmsg;
122 	int didsomething;
123 
124 	state->oldblock = -1;
125 	state->markbad = 0;
126 	if(pages > pagesperblock)
127 		pages = pagesperblock;
128 	/*
129 	 * fill in gaps first
130 	 */
131 	bestfreeblockindex = -1;
132 	bestfree = 0;
133 	for(blockindex = 0; blockindex < server->ndatablocks; blockindex++) {
134 		db = server->datablock + blockindex;
135 		if(db->block < 0)
136 			continue;
137 		gapmask = findgap(db->free & ~db->dirty, pages);
138 //print("blockindex %ld free 0x%.8ux dirty 0x%.8ux gapmask %.8ux\n", blockindex, db->free, db->dirty, gapmask);
139 		if(gapmask != 0) {
140 			/*
141 			 * this is free and !dirty
142 			 */
143 			b = db->block;
144 			USED(b);
145 			goto done;
146 		}
147 		else {
148 			int free = bitcount(db->free & logfsdatapagemask(pagesperblock, 0));
149 			if(free > 0 && (bestfreeblockindex < 0 || free > bestfree)) {
150 				bestfreeblockindex = blockindex;
151 				bestfree = free;
152 			}
153 		}
154 	}
155 //print("out of space - need to clean up a data block\n");
156 	if(bestfreeblockindex >= 0) {
157 //print("best block index %ld (%ld) %d bits\n", bestfreeblockindex, server->datablock[bestfreeblockindex].block, bestfree);
158 		/*
159 		 * clean up data block
160 		 */
161 		b = logfsfindfreeblock(ll, AllocReasonTransfer);
162 		while(b >= 0) {
163 			char *errmsg;
164 			LogfsLowLevelReadResult llrr;
165 			long oldblock;
166 			int markedbad;
167 
168 			db = server->datablock + bestfreeblockindex;
169 			oldblock = db->block;
170 			errmsg = logfsservercopyactivedata(server, b, bestfreeblockindex, 0, &llrr, &markedbad);
171 			if(errmsg) {
172 				if(!markedbad)
173 					return errmsg;
174 				b = logfsfindfreeblock(ll, AllocReasonTransfer);
175 			}
176 			else {
177 				u32int available;
178 				/*
179 				 * if page0 is free, then we must ensure that we use it otherwise
180 				 * in tagged storage such as nand, the block tag is not written
181 				 * in all cases, it is safer to erase the block afterwards to
182 				 * preserve the data for as long as possible (we could choose
183 				 * to erase the old block now if page0 has already been copied)
184 				 */
185 				blockindex = bestfreeblockindex;
186 				state->oldblock = oldblock;
187 				state->markbad = llrr != LogfsLowLevelReadResultOk;
188 				available = db->free & ~db->dirty;
189 				gapmask = findgap(available, pages);
190 				goto done;
191 			}
192 		}
193 	}
194 	/*
195 	 * use already erased blocks, so long as there are a few free
196 	 */
197 	b = logfsfindfreeblock(ll, AllocReasonDataExtend);
198 	if(b >= 0) {
199 useerased:
200 		for(blockindex = 0, db = server->datablock; blockindex < server->ndatablocks; blockindex++, db++)
201 			if(db->block < 0)
202 				break;
203 		if(blockindex == server->ndatablocks)
204 			server->ndatablocks++;
205 		db->path = mkdatapath(blockindex, 0);
206 		db->block = b;
207 		(*ll->setblocktag)(ll, b, LogfsTdata);
208 		(*ll->setblockpath)(ll, b, db->path);
209 		db->free = logfsdatapagemask(pagesperblock, 0);
210 		db->dirty = 0;
211 		gapmask = db->free;
212 		goto done;
213 	}
214 	/*
215 	 * last resort; try to steal from log
216 	 */
217 //print("last resort\n");
218 	errmsg = logfsserverlogsweep(server, 0, &didsomething);
219 	if(errmsg)
220 		return errmsg;
221 	if(didsomething) {
222 		/*
223 		 * this can only create whole free blocks, so...
224 		 */
225 //print("findfree after last resort\n");
226 		b = logfsfindfreeblock(ll, AllocReasonDataExtend);
227 		if(b >= 0) {
228 //print("*********************************************************\n");
229 			goto useerased;
230 		}
231 	}
232 	*countp = 0;
233 	return nil;
234 done:
235 	/*
236 	 * common finish - needs gapmask, blockindex, db
237 	 */
238 	apages = bitcount(gapmask);
239 	pagebase = nlz(gapmask);
240 	if(apages > pages)
241 		apages = pages;
242 	gapmask = logfsdatapagemask(apages, pagebase);
243 	if(server->trace > 1)
244 		print("allocdatapages: block %ld(%ld) pages %d mask 0x%.8ux pagebase %d apages %d\n",
245 			blockindex, db->block, pages, gapmask, pagebase, apages);
246 //	db->free &= ~gapmask;
247 //	db->dirty |= gapmask;
248 	*pagep = pagebase;
249 	*blockindexp = blockindex;
250 	*flashaddr = logfsspo2flashaddr(server, blockindex, pagebase, 0);
251 	*countp = apages << ll->l2pagesize;
252 	if(*countp > count)
253 		*countp = count;
254 	return nil;
255 }
256 
257 typedef struct Page {
258 	u32int pageaddr;
259 	int ref;
260 } Page;
261 
262 typedef struct DataStructure {
263 	LogfsServer *server;
264 	int nentries;
265 	int maxentries;
266 	Page *array;
267 } DataStructure;
268 
269 static int
270 deltapage(DataStructure *ds, u32int pageaddr, int add, int delta)
271 {
272 	int i;
273 	for(i = 0; i < ds->nentries; i++)
274 		if(ds->array[i].pageaddr == pageaddr) {
275 			ds->array[i].ref += delta;
276 			return 1;
277 		}
278 	if(!add)
279 		return 1;
280 	if(ds->maxentries == 0) {
281 		ds->array = logfsrealloc(nil, sizeof(Page) * 100);
282 		if(ds->array == nil)
283 			return 0;
284 		ds->maxentries = 100;
285 	}
286 	else if(ds->nentries >= ds->maxentries) {
287 		void *a = logfsrealloc(ds->array, ds->maxentries * 2 * sizeof(Page));
288 		if(a == nil)
289 			return 0;
290 		ds->array = a;
291 		ds->maxentries *= 2;
292 	}
293 	ds->array[ds->nentries].pageaddr = pageaddr;
294 	ds->array[ds->nentries++].ref = delta;
295 	return 1;
296 }
297 
298 /*
299  * only called for data addresses
300  */
301 static int
302 deltapages(DataStructure *ds, LogfsLowLevel *ll, u32int baseflashaddr, int range, int add, int delta)
303 {
304 	long seq;
305 	int page, offset;
306 	int pages;
307 	u32int pageaddr;
308 	int x;
309 
310 //print("deltapages(%ud, %ud, %d, %d)\n", baseflashaddr, limitflashaddr, add, delta);
311 	logfsflashaddr2spo(ds->server, baseflashaddr, &seq, &page, &offset);
312 	pages = (range + (1 << ll->l2pagesize) - 1) >> ll->l2pagesize;
313 	pageaddr = (seq << ll->l2pagesperblock) + page;
314  	for(x = 0; x < pages; x++, pageaddr++)
315 		if(!deltapage(ds, pageaddr, add, delta))
316 			return 0;
317 	return 1;
318 }
319 
320 static int
321 findpageset(void *magic, u32int baseoffset, u32int limitoffset, Extent *e, u32int extentoffset)
322 {
323 	DataStructure *ds = magic;
324 	LogfsLowLevel *ll;
325 	u32int flashaddr;
326 	u32int range;
327 	u32int residue;
328 
329 	if(e == nil || (e->flashaddr & LogAddr) != 0)
330 		return 1;
331 	ll = ds->server->ll;
332 //print("baseoffset %ud limitoffset %ud min %ud max %ud\n", baseoffset, limitoffset, e->min, e->max);
333 	flashaddr = e->flashaddr;
334 	if(extentoffset)
335 		if(!deltapages(ds, ll, flashaddr, extentoffset, 1, 1))
336 			return -1;
337 	flashaddr += extentoffset;
338 	range = limitoffset - baseoffset;
339 	if(!deltapages(ds, ll, flashaddr, range, 1, -1))
340 		return -1;
341 	flashaddr += range;
342 	residue = e->max - e->min - (extentoffset + range);
343 	if(residue)
344 		if(!deltapages(ds, ll, flashaddr, residue, 1, 1))
345 			return -1;
346 	return 1;
347 }
348 
349 static int
350 addpagereferences(void *magic, Extent *e, int hole)
351 {
352 	DataStructure *ds = magic;
353 
354 	if(hole || (e->flashaddr & LogAddr) != 0)
355 		return 1;
356 	return deltapages(ds, ds->server->ll, e->flashaddr, e->max - e->min, 0, 1) ? 1 : -1;
357 }
358 
359 static char *
360 zappages(LogfsServer *server, Entry *e, u32int min, u32int max)
361 {
362 	DataStructure ds;
363 	int x, rv;
364 
365 	if(min >= e->u.file.length)
366 		/* no checks necessary */
367 		return nil;
368 	if(min == 0 && max >= e->u.file.length) {
369 		/* replacing entire file */
370 		logfsextentlistwalk(e->u.file.extent, logfsunconditionallymarkfreeanddirty, server);
371 		return nil;
372 	}
373 	/* hard after that - this will need to be improved */
374 	/*
375 	 * current algorithm
376 	 * build a list of all pages referenced by the extents being removed, and count the
377  	 * number of references
378 	 * then subtract the number of references to each page in entire file
379 	 * any pages with a reference count == 0 can be removed
380 	 */
381 	ds.server = server;
382 	ds.nentries = 0;
383 	ds.maxentries = 0;
384 	ds.array = nil;
385 	rv = logfsextentlistwalkrange(e->u.file.extent, findpageset, &ds, min, max);
386 /*
387 	print("pass 1\n");
388 	for(x = 0; x < ds.nentries; x++)
389 		print("block %ud page %ud ref %d\n", ds.array[x].pageaddr / server->ll->pagesperblock,
390 			ds.array[x].pageaddr % server->ll->pagesperblock, ds.array[x].ref);
391 */
392 	if(rv >= 0) {
393 		Page *p;
394 		if(ds.nentries == 0)
395 			print("pass 2 cancelled\n");
396 		else {
397 			rv = logfsextentlistwalk(e->u.file.extent, addpagereferences, &ds);
398 //			print("pass 2\n");
399 			for(x = 0, p = ds.array; x < ds.nentries; x++, p++) {
400 //				print("block %ud page %ud ref %d\n", p->pageaddr / server->ll->pagesperblock,
401 //					p->pageaddr % server->ll->pagesperblock, p->ref);
402 				if(rv >= 0 && p->ref == 0) {
403 					long seq = p->pageaddr >> server->ll->l2pagesperblock;
404 					int page = p->pageaddr & ((1 << server->ll->l2pagesperblock) - 1);
405 					logfsfreedatapages(server, seq, 1 << (31 - page));
406 				}
407 			}
408 		}
409 	}
410 	logfsfreemem(ds.array);
411 	return rv < 0 ? Enomem : nil;
412 }
413 
414 static void
415 disposeofoldblock(LogfsServer *server, AllocState *state)
416 {
417 	if(state->oldblock >= 0) {
418 		if(server->testflags & LogfsTestDontFettleDataBlock) {
419 			/* take the block out of commission */
420 			(*server->ll->setblocktag)(server->ll, state->oldblock, LogfsTworse);
421 			server->testflags &= ~LogfsTestDontFettleDataBlock;
422 		}
423 		else {
424 			if(state->markbad)
425 				(*server->ll->markblockbad)(server->ll, state->oldblock);
426 			else
427 				logfsbootfettleblock(server->lb, state->oldblock, LogfsTnone, ~0, nil);
428 		}
429 		state->oldblock = -1;
430 	}
431 }
432 
433 char *
434 logfsserverwrite(LogfsServer *server, u32int fid, u32int offset, u32int count, uchar *buf, u32int *rcount)
435 {
436 	Fid *f;
437 	Entry *e;
438 	u32int now;
439 	char *muid;
440 	int muidlen;
441 	LogfsLowLevel *ll = server->ll;
442 
443 	if(server->trace > 1)
444 		print("logfsserverwrite(%ud, %ud, %ud)\n", fid, offset, count);
445 	f = logfsfidmapfindentry(server->fidmap, fid);
446 	if(f == nil)
447 		return logfsebadfid;
448 	if(f->openmode < 0)
449 		return logfsefidnotopen;
450 	if((f->openmode & 3) == OREAD)
451 		return logfseaccess;
452 	e = f->entry;
453 	if(e->deadandgone)
454 		return Eio;
455 	if(e->qid.type & QTDIR)
456 		return Eperm;
457 	if(e->perm & DMAPPEND)
458 		offset = e->u.file.length;
459 	now = logfsnow();
460 	*rcount = count;
461 	muid = logfsisfindidfromname(server->is, f->uname);
462 	muidlen = strlen(muid);
463 	while(count) {
464 		Extent extent;
465 		int thistime;
466 		char *errmsg;
467 		thistime = lognicesizeforwrite(server, 1, count, muidlen);
468 		if(thistime == 0) {
469 			int p;
470 			u32int n;
471 			long blockindex;
472 			int pagebase;
473 			AllocState state;
474 			int pagesize = 1 << ll->l2pagesize;
475 		reallocate:
476 			errmsg = allocdatapages(server, count, &thistime, &blockindex, &pagebase, &extent.flashaddr, &state);
477 			if(errmsg)
478 				return errmsg;
479 			if(thistime == 0)
480 				return logfselogfull;
481 			for(p = pagebase, n = 0; n < thistime; p++, n += pagesize) {
482 				u32int mask;
483 				DataBlock *db = server->datablock + blockindex;
484 				errmsg = (*ll->writepage)(ll, buf + n, db->block, p);
485 				if(errmsg) {
486 					if(strcmp(errmsg, Eio) != 0) {
487 						/*
488 						 * something horrid happened down below
489 						 * recover without writing any more than we have to
490 						 */
491 						if(p != 0) {
492 							/*
493 							 * page 0 was either written already, or has been written in this loop
494 							 * thus the block referenced is valid on the media. all we need to do
495 							 * is lose the old block, mark the written pages as free (so they can
496 							 * be scavenged), and don't bother with the log message
497 							 */
498 							disposeofoldblock(server, &state);
499 							mask = logfsdatapagemask(p - pagebase - 1, pagebase);
500 							db->free |= mask;
501 							db->dirty |= mask;
502 							return errmsg;
503 						}
504 						/*
505 						 * page 0 failed to write (so nothing written at all)
506 						 * this is either an entirely free block (no erased block in savestate),
507 						 * or a copy of a scavenged block (erased block in savestate)
508 						 */
509 						if(state.oldblock < 0) {
510 							/*
511 							 * newly selected erased block (blockindex == server->ndatablocks - 1)
512 							 * mark it bad, lose it from the datablock table
513 							 */
514 							(*ll->markblockbad)(ll, db->block);
515 							db->block = -1;
516 							if(blockindex == server->ndatablocks - 1)
517 								server->ndatablocks--;
518 							return errmsg;
519 						}
520 						/*
521 						 * page 0 of a data scavenge copy
522 						 * mark it bad, restore state (old block)
523 						 */
524 						(*ll->markblockbad)(ll, db->block);
525 						db->block = state.oldblock;
526 						return errmsg;
527 					}
528 					/*
529 					 * write error on target block
530 					 *
531 					 * if it is a replacement (state saved)
532 					 *	mark the new block bad, restore state and try again
533 					 *
534 					 * if it is not replaced (no state saved)
535 					 *	replace block, and try again
536 					 */
537 					if(state.oldblock >= 0) {
538 						(*ll->markblockbad)(ll, db->block);
539 						db->block = state.oldblock;
540 					}
541 					else {
542 						errmsg = logfsserverreplacedatablock(server, blockindex);
543 						if(errmsg)
544 							return errmsg;
545 					}
546 					goto reallocate;
547 				}
548 				mask = logfsdatapagemask(1, p);
549 				db->free &= ~mask;
550 				db->dirty |= mask;
551 			}
552 			/* well, we managed to write the data out */
553 			errmsg = logfslogwrite(server, 1, e->qid.path, offset, thistime, now, e->u.file.cvers,
554 				muid, nil, &extent.flashaddr);
555 			/*
556 			 * now we can dispose of the original data block, if any
557 			 * this is regardless of whether we succeeded in writing a log message, as
558 			 * if this block is not erased, there will be a duplicate
559 			 */
560 			disposeofoldblock(server, &state);
561 		}
562 		else {
563 			if(thistime > count)
564 				thistime = count;
565 			errmsg = logfslogwrite(server, 1, e->qid.path, offset, thistime, now, e->u.file.cvers,
566 				muid, buf, &extent.flashaddr);
567 		}
568 		/*
569 		 * here if we failed to write the log message
570 		 */
571 		if(errmsg)
572 			return errmsg;
573 		if(server->trace > 1)
574 			print("logfsserverwrite: %d bytes at flashaddr 0x%.8ux\n", thistime, extent.flashaddr);
575 		extent.min = offset;
576 		extent.max = offset + thistime;
577 		errmsg = zappages(server, e, extent.min, extent.max);
578 		if(errmsg)
579 			return errmsg;
580 		errmsg = logfsextentlistinsert(e->u.file.extent, &extent, nil);
581 		if(errmsg)
582 			return errmsg;
583 		e->muid = muid;
584 		e->mtime = now;
585 		offset += thistime;
586 		if(e->u.file.length < offset)
587 			e->u.file.length = offset;
588 		count -= thistime;
589 		buf += thistime;
590 		e->qid.vers++;
591 	}
592 	return nil;
593 }
594