15e96a66cSDavid du Colombier #include "stdinc.h"
25e96a66cSDavid du Colombier #include "dat.h"
35e96a66cSDavid du Colombier #include "fns.h"
45e96a66cSDavid du Colombier #include "error.h"
55e96a66cSDavid du Colombier
65e96a66cSDavid du Colombier static void diskThread(void *a);
75e96a66cSDavid du Colombier
85e96a66cSDavid du Colombier enum {
9c39c2eb3SDavid du Colombier /*
10c39c2eb3SDavid du Colombier * disable measurement since it gets alignment faults on BG
11c39c2eb3SDavid du Colombier * and the guts used to be commented out.
12c39c2eb3SDavid du Colombier */
13c39c2eb3SDavid du Colombier Timing = 0, /* flag */
145e96a66cSDavid du Colombier QueueSize = 100, /* maximum block to queue */
155e96a66cSDavid du Colombier };
165e96a66cSDavid du Colombier
175e96a66cSDavid du Colombier struct Disk {
18*d7aba6c3SDavid du Colombier QLock lk;
195e96a66cSDavid du Colombier int ref;
205e96a66cSDavid du Colombier
215e96a66cSDavid du Colombier int fd;
225e96a66cSDavid du Colombier Header h;
235e96a66cSDavid du Colombier
24*d7aba6c3SDavid du Colombier Rendez flow;
25*d7aba6c3SDavid du Colombier Rendez starve;
26*d7aba6c3SDavid du Colombier Rendez flush;
27*d7aba6c3SDavid du Colombier Rendez die;
285e96a66cSDavid du Colombier
295e96a66cSDavid du Colombier int nqueue;
305e96a66cSDavid du Colombier
315e96a66cSDavid du Colombier Block *cur; /* block to do on current scan */
325e96a66cSDavid du Colombier Block *next; /* blocks to do next scan */
335e96a66cSDavid du Colombier };
345e96a66cSDavid du Colombier
35f6333ca0SDavid du Colombier /* keep in sync with Part* enum in dat.h */
36f6333ca0SDavid du Colombier static char *partname[] = {
37f6333ca0SDavid du Colombier [PartError] "error",
38f6333ca0SDavid du Colombier [PartSuper] "super",
39f6333ca0SDavid du Colombier [PartLabel] "label",
40f6333ca0SDavid du Colombier [PartData] "data",
41f6333ca0SDavid du Colombier [PartVenti] "venti",
42f6333ca0SDavid du Colombier };
435e96a66cSDavid du Colombier
445e96a66cSDavid du Colombier Disk *
diskAlloc(int fd)455e96a66cSDavid du Colombier diskAlloc(int fd)
465e96a66cSDavid du Colombier {
475e96a66cSDavid du Colombier u8int buf[HeaderSize];
485e96a66cSDavid du Colombier Header h;
495e96a66cSDavid du Colombier Disk *disk;
505e96a66cSDavid du Colombier
515e96a66cSDavid du Colombier if(pread(fd, buf, HeaderSize, HeaderOffset) < HeaderSize){
52*d7aba6c3SDavid du Colombier werrstr("short read: %r");
535e96a66cSDavid du Colombier return nil;
545e96a66cSDavid du Colombier }
555e96a66cSDavid du Colombier
56dc5a79c1SDavid du Colombier if(!headerUnpack(&h, buf)){
57*d7aba6c3SDavid du Colombier werrstr("bad disk header");
585e96a66cSDavid du Colombier return nil;
59dc5a79c1SDavid du Colombier }
60*d7aba6c3SDavid du Colombier disk = vtmallocz(sizeof(Disk));
61*d7aba6c3SDavid du Colombier disk->starve.l = &disk->lk;
62*d7aba6c3SDavid du Colombier disk->flow.l = &disk->lk;
63*d7aba6c3SDavid du Colombier disk->flush.l = &disk->lk;
645e96a66cSDavid du Colombier disk->fd = fd;
655e96a66cSDavid du Colombier disk->h = h;
665e96a66cSDavid du Colombier
675e96a66cSDavid du Colombier disk->ref = 2;
68*d7aba6c3SDavid du Colombier proccreate(diskThread, disk, STACK);
695e96a66cSDavid du Colombier
705e96a66cSDavid du Colombier return disk;
715e96a66cSDavid du Colombier }
725e96a66cSDavid du Colombier
735e96a66cSDavid du Colombier void
diskFree(Disk * disk)745e96a66cSDavid du Colombier diskFree(Disk *disk)
755e96a66cSDavid du Colombier {
765e96a66cSDavid du Colombier diskFlush(disk);
775e96a66cSDavid du Colombier
785e96a66cSDavid du Colombier /* kill slave */
79*d7aba6c3SDavid du Colombier qlock(&disk->lk);
80*d7aba6c3SDavid du Colombier disk->die.l = &disk->lk;
81*d7aba6c3SDavid du Colombier rwakeup(&disk->starve);
825e96a66cSDavid du Colombier while(disk->ref > 1)
83*d7aba6c3SDavid du Colombier rsleep(&disk->die);
84*d7aba6c3SDavid du Colombier qunlock(&disk->lk);
855e96a66cSDavid du Colombier close(disk->fd);
86*d7aba6c3SDavid du Colombier vtfree(disk);
875e96a66cSDavid du Colombier }
885e96a66cSDavid du Colombier
895e96a66cSDavid du Colombier static u32int
partStart(Disk * disk,int part)905e96a66cSDavid du Colombier partStart(Disk *disk, int part)
915e96a66cSDavid du Colombier {
925e96a66cSDavid du Colombier switch(part){
935e96a66cSDavid du Colombier default:
945e96a66cSDavid du Colombier assert(0);
955e96a66cSDavid du Colombier case PartSuper:
965e96a66cSDavid du Colombier return disk->h.super;
975e96a66cSDavid du Colombier case PartLabel:
985e96a66cSDavid du Colombier return disk->h.label;
995e96a66cSDavid du Colombier case PartData:
1005e96a66cSDavid du Colombier return disk->h.data;
1015e96a66cSDavid du Colombier }
1025e96a66cSDavid du Colombier }
1035e96a66cSDavid du Colombier
1045e96a66cSDavid du Colombier
1055e96a66cSDavid du Colombier static u32int
partEnd(Disk * disk,int part)1065e96a66cSDavid du Colombier partEnd(Disk *disk, int part)
1075e96a66cSDavid du Colombier {
1085e96a66cSDavid du Colombier switch(part){
1095e96a66cSDavid du Colombier default:
1105e96a66cSDavid du Colombier assert(0);
1115e96a66cSDavid du Colombier case PartSuper:
1125e96a66cSDavid du Colombier return disk->h.super+1;
1135e96a66cSDavid du Colombier case PartLabel:
1145e96a66cSDavid du Colombier return disk->h.data;
1155e96a66cSDavid du Colombier case PartData:
1165e96a66cSDavid du Colombier return disk->h.end;
1175e96a66cSDavid du Colombier }
1185e96a66cSDavid du Colombier }
1195e96a66cSDavid du Colombier
1205e96a66cSDavid du Colombier int
diskReadRaw(Disk * disk,int part,u32int addr,uchar * buf)1215e96a66cSDavid du Colombier diskReadRaw(Disk *disk, int part, u32int addr, uchar *buf)
1225e96a66cSDavid du Colombier {
1235e96a66cSDavid du Colombier ulong start, end;
1245e96a66cSDavid du Colombier u64int offset;
1255e96a66cSDavid du Colombier int n, nn;
1265e96a66cSDavid du Colombier
1275e96a66cSDavid du Colombier start = partStart(disk, part);
1285e96a66cSDavid du Colombier end = partEnd(disk, part);
1295e96a66cSDavid du Colombier
1305e96a66cSDavid du Colombier if(addr >= end-start){
131*d7aba6c3SDavid du Colombier werrstr(EBadAddr);
1325e96a66cSDavid du Colombier return 0;
1335e96a66cSDavid du Colombier }
1345e96a66cSDavid du Colombier
1355e96a66cSDavid du Colombier offset = ((u64int)(addr + start))*disk->h.blockSize;
1365e96a66cSDavid du Colombier n = disk->h.blockSize;
1375e96a66cSDavid du Colombier while(n > 0){
1385e96a66cSDavid du Colombier nn = pread(disk->fd, buf, n, offset);
1395e96a66cSDavid du Colombier if(nn < 0){
140*d7aba6c3SDavid du Colombier werrstr("%r");
1415e96a66cSDavid du Colombier return 0;
1425e96a66cSDavid du Colombier }
1435e96a66cSDavid du Colombier if(nn == 0){
144*d7aba6c3SDavid du Colombier werrstr("eof reading disk");
1455e96a66cSDavid du Colombier return 0;
1465e96a66cSDavid du Colombier }
1475e96a66cSDavid du Colombier n -= nn;
1485e96a66cSDavid du Colombier offset += nn;
1495e96a66cSDavid du Colombier buf += nn;
1505e96a66cSDavid du Colombier }
1515e96a66cSDavid du Colombier return 1;
1525e96a66cSDavid du Colombier }
1535e96a66cSDavid du Colombier
1545e96a66cSDavid du Colombier int
diskWriteRaw(Disk * disk,int part,u32int addr,uchar * buf)1555e96a66cSDavid du Colombier diskWriteRaw(Disk *disk, int part, u32int addr, uchar *buf)
1565e96a66cSDavid du Colombier {
1575e96a66cSDavid du Colombier ulong start, end;
1585e96a66cSDavid du Colombier u64int offset;
1595e96a66cSDavid du Colombier int n;
1605e96a66cSDavid du Colombier
1615e96a66cSDavid du Colombier start = partStart(disk, part);
1625e96a66cSDavid du Colombier end = partEnd(disk, part);
1635e96a66cSDavid du Colombier
1645e96a66cSDavid du Colombier if(addr >= end - start){
165*d7aba6c3SDavid du Colombier werrstr(EBadAddr);
1665e96a66cSDavid du Colombier return 0;
1675e96a66cSDavid du Colombier }
1685e96a66cSDavid du Colombier
1695e96a66cSDavid du Colombier offset = ((u64int)(addr + start))*disk->h.blockSize;
1706de6ce84SDavid du Colombier n = pwrite(disk->fd, buf, disk->h.blockSize, offset);
1716de6ce84SDavid du Colombier if(n < 0){
172*d7aba6c3SDavid du Colombier werrstr("%r");
1735e96a66cSDavid du Colombier return 0;
1745e96a66cSDavid du Colombier }
1756de6ce84SDavid du Colombier if(n < disk->h.blockSize) {
176*d7aba6c3SDavid du Colombier werrstr("short write");
1776de6ce84SDavid du Colombier return 0;
1786de6ce84SDavid du Colombier }
1795e96a66cSDavid du Colombier
1805e96a66cSDavid du Colombier return 1;
1815e96a66cSDavid du Colombier }
1825e96a66cSDavid du Colombier
1835e96a66cSDavid du Colombier static void
diskQueue(Disk * disk,Block * b)1845e96a66cSDavid du Colombier diskQueue(Disk *disk, Block *b)
1855e96a66cSDavid du Colombier {
1865e96a66cSDavid du Colombier Block **bp, *bb;
1875e96a66cSDavid du Colombier
188*d7aba6c3SDavid du Colombier qlock(&disk->lk);
1895e96a66cSDavid du Colombier while(disk->nqueue >= QueueSize)
190*d7aba6c3SDavid du Colombier rsleep(&disk->flow);
1915e96a66cSDavid du Colombier if(disk->cur == nil || b->addr > disk->cur->addr)
1925e96a66cSDavid du Colombier bp = &disk->cur;
1935e96a66cSDavid du Colombier else
1945e96a66cSDavid du Colombier bp = &disk->next;
1955e96a66cSDavid du Colombier
1965e96a66cSDavid du Colombier for(bb=*bp; bb; bb=*bp){
1975e96a66cSDavid du Colombier if(b->addr < bb->addr)
1985e96a66cSDavid du Colombier break;
1995e96a66cSDavid du Colombier bp = &bb->ionext;
2005e96a66cSDavid du Colombier }
2015e96a66cSDavid du Colombier b->ionext = bb;
2025e96a66cSDavid du Colombier *bp = b;
2035e96a66cSDavid du Colombier if(disk->nqueue == 0)
204*d7aba6c3SDavid du Colombier rwakeup(&disk->starve);
2055e96a66cSDavid du Colombier disk->nqueue++;
206*d7aba6c3SDavid du Colombier qunlock(&disk->lk);
2075e96a66cSDavid du Colombier }
2085e96a66cSDavid du Colombier
2095e96a66cSDavid du Colombier
2105e96a66cSDavid du Colombier void
diskRead(Disk * disk,Block * b)2115e96a66cSDavid du Colombier diskRead(Disk *disk, Block *b)
2125e96a66cSDavid du Colombier {
2135e96a66cSDavid du Colombier assert(b->iostate == BioEmpty || b->iostate == BioLabel);
2145e96a66cSDavid du Colombier blockSetIOState(b, BioReading);
2155e96a66cSDavid du Colombier diskQueue(disk, b);
2165e96a66cSDavid du Colombier }
2175e96a66cSDavid du Colombier
2185e96a66cSDavid du Colombier void
diskWrite(Disk * disk,Block * b)2195e96a66cSDavid du Colombier diskWrite(Disk *disk, Block *b)
2205e96a66cSDavid du Colombier {
221867bfcc6SDavid du Colombier assert(b->nlock == 1);
2225e96a66cSDavid du Colombier assert(b->iostate == BioDirty);
2235e96a66cSDavid du Colombier blockSetIOState(b, BioWriting);
2245e96a66cSDavid du Colombier diskQueue(disk, b);
2255e96a66cSDavid du Colombier }
2265e96a66cSDavid du Colombier
227867bfcc6SDavid du Colombier void
diskWriteAndWait(Disk * disk,Block * b)228867bfcc6SDavid du Colombier diskWriteAndWait(Disk *disk, Block *b)
229867bfcc6SDavid du Colombier {
230867bfcc6SDavid du Colombier int nlock;
231867bfcc6SDavid du Colombier
232867bfcc6SDavid du Colombier /*
233867bfcc6SDavid du Colombier * If b->nlock > 1, the block is aliased within
234867bfcc6SDavid du Colombier * a single thread. That thread is us.
235*d7aba6c3SDavid du Colombier * DiskWrite does some funny stuff with QLock
236867bfcc6SDavid du Colombier * and blockPut that basically assumes b->nlock==1.
237867bfcc6SDavid du Colombier * We humor diskWrite by temporarily setting
238867bfcc6SDavid du Colombier * nlock to 1. This needs to be revisited.
239867bfcc6SDavid du Colombier */
240867bfcc6SDavid du Colombier nlock = b->nlock;
241867bfcc6SDavid du Colombier if(nlock > 1)
242867bfcc6SDavid du Colombier b->nlock = 1;
243867bfcc6SDavid du Colombier diskWrite(disk, b);
244867bfcc6SDavid du Colombier while(b->iostate != BioClean)
245*d7aba6c3SDavid du Colombier rsleep(&b->ioready);
246867bfcc6SDavid du Colombier b->nlock = nlock;
247867bfcc6SDavid du Colombier }
248867bfcc6SDavid du Colombier
2495e96a66cSDavid du Colombier int
diskBlockSize(Disk * disk)2505e96a66cSDavid du Colombier diskBlockSize(Disk *disk)
2515e96a66cSDavid du Colombier {
2525e96a66cSDavid du Colombier return disk->h.blockSize; /* immuttable */
2535e96a66cSDavid du Colombier }
2545e96a66cSDavid du Colombier
2555e96a66cSDavid du Colombier int
diskFlush(Disk * disk)2565e96a66cSDavid du Colombier diskFlush(Disk *disk)
2575e96a66cSDavid du Colombier {
2585e96a66cSDavid du Colombier Dir dir;
2595e96a66cSDavid du Colombier
260*d7aba6c3SDavid du Colombier qlock(&disk->lk);
2615e96a66cSDavid du Colombier while(disk->nqueue > 0)
262*d7aba6c3SDavid du Colombier rsleep(&disk->flush);
263*d7aba6c3SDavid du Colombier qunlock(&disk->lk);
2645e96a66cSDavid du Colombier
2655e96a66cSDavid du Colombier /* there really should be a cleaner interface to flush an fd */
2665e96a66cSDavid du Colombier nulldir(&dir);
2675e96a66cSDavid du Colombier if(dirfwstat(disk->fd, &dir) < 0){
268*d7aba6c3SDavid du Colombier werrstr("%r");
2695e96a66cSDavid du Colombier return 0;
2705e96a66cSDavid du Colombier }
2715e96a66cSDavid du Colombier return 1;
2725e96a66cSDavid du Colombier }
2735e96a66cSDavid du Colombier
2745e96a66cSDavid du Colombier u32int
diskSize(Disk * disk,int part)2755e96a66cSDavid du Colombier diskSize(Disk *disk, int part)
2765e96a66cSDavid du Colombier {
2775e96a66cSDavid du Colombier return partEnd(disk, part) - partStart(disk, part);
2785e96a66cSDavid du Colombier }
2795e96a66cSDavid du Colombier
28074f16c81SDavid du Colombier static uintptr
mypc(int x)281fe853e23SDavid du Colombier mypc(int x)
282fe853e23SDavid du Colombier {
283fe853e23SDavid du Colombier return getcallerpc(&x);
284fe853e23SDavid du Colombier }
285fe853e23SDavid du Colombier
286f6333ca0SDavid du Colombier static char *
disk2file(Disk * disk)287f6333ca0SDavid du Colombier disk2file(Disk *disk)
288f6333ca0SDavid du Colombier {
289f6333ca0SDavid du Colombier static char buf[256];
290f6333ca0SDavid du Colombier
291f6333ca0SDavid du Colombier if (fd2path(disk->fd, buf, sizeof buf) < 0)
292f6333ca0SDavid du Colombier strncpy(buf, "GOK", sizeof buf);
293f6333ca0SDavid du Colombier return buf;
294f6333ca0SDavid du Colombier }
295f6333ca0SDavid du Colombier
2965e96a66cSDavid du Colombier static void
diskThread(void * a)2975e96a66cSDavid du Colombier diskThread(void *a)
2985e96a66cSDavid du Colombier {
2995e96a66cSDavid du Colombier Disk *disk = a;
3005e96a66cSDavid du Colombier Block *b;
3015e96a66cSDavid du Colombier uchar *buf, *p;
3025e96a66cSDavid du Colombier double t;
3035e96a66cSDavid du Colombier int nio;
3045e96a66cSDavid du Colombier
305*d7aba6c3SDavid du Colombier threadsetname("disk");
3065e96a66cSDavid du Colombier
307dc5a79c1SDavid du Colombier //fprint(2, "diskThread %d\n", getpid());
3085e96a66cSDavid du Colombier
309*d7aba6c3SDavid du Colombier buf = vtmalloc(disk->h.blockSize);
3105e96a66cSDavid du Colombier
311*d7aba6c3SDavid du Colombier qlock(&disk->lk);
312c39c2eb3SDavid du Colombier if (Timing) {
3135e96a66cSDavid du Colombier nio = 0;
3145e96a66cSDavid du Colombier t = -nsec();
315c39c2eb3SDavid du Colombier }
3165e96a66cSDavid du Colombier for(;;){
3175e96a66cSDavid du Colombier while(disk->nqueue == 0){
318c39c2eb3SDavid du Colombier if (Timing) {
3195e96a66cSDavid du Colombier t += nsec();
320c39c2eb3SDavid du Colombier if(nio >= 10000){
321c39c2eb3SDavid du Colombier fprint(2, "disk: io=%d at %.3fms\n",
322c39c2eb3SDavid du Colombier nio, t*1e-6/nio);
323c39c2eb3SDavid du Colombier nio = 0;
324c39c2eb3SDavid du Colombier t = 0;
325c39c2eb3SDavid du Colombier }
326c39c2eb3SDavid du Colombier }
327*d7aba6c3SDavid du Colombier if(disk->die.l != nil)
3285e96a66cSDavid du Colombier goto Done;
329*d7aba6c3SDavid du Colombier rsleep(&disk->starve);
330c39c2eb3SDavid du Colombier if (Timing)
3315e96a66cSDavid du Colombier t -= nsec();
3325e96a66cSDavid du Colombier }
3335e96a66cSDavid du Colombier assert(disk->cur != nil || disk->next != nil);
3345e96a66cSDavid du Colombier
3355e96a66cSDavid du Colombier if(disk->cur == nil){
3365e96a66cSDavid du Colombier disk->cur = disk->next;
3375e96a66cSDavid du Colombier disk->next = nil;
3385e96a66cSDavid du Colombier }
3395e96a66cSDavid du Colombier b = disk->cur;
3405e96a66cSDavid du Colombier disk->cur = b->ionext;
341*d7aba6c3SDavid du Colombier qunlock(&disk->lk);
3425e96a66cSDavid du Colombier
3435e96a66cSDavid du Colombier /*
3445e96a66cSDavid du Colombier * no one should hold onto blocking in the
3455e96a66cSDavid du Colombier * reading or writing state, so this lock should
3465e96a66cSDavid du Colombier * not cause deadlock.
3475e96a66cSDavid du Colombier */
348f6333ca0SDavid du Colombier if(0)fprint(2, "fossil: diskThread: %d:%d %x\n", getpid(), b->part, b->addr);
3495e96a66cSDavid du Colombier bwatchLock(b);
350*d7aba6c3SDavid du Colombier qlock(&b->lk);
351fe853e23SDavid du Colombier b->pc = mypc(0);
3525e96a66cSDavid du Colombier assert(b->nlock == 1);
3535e96a66cSDavid du Colombier switch(b->iostate){
3545e96a66cSDavid du Colombier default:
3555e96a66cSDavid du Colombier abort();
3565e96a66cSDavid du Colombier case BioReading:
3575e96a66cSDavid du Colombier if(!diskReadRaw(disk, b->part, b->addr, b->data)){
358f366f900SDavid du Colombier fprint(2, "fossil: diskReadRaw failed: %s: "
359f366f900SDavid du Colombier "score %V: part=%s block %ud: %r\n",
360f366f900SDavid du Colombier disk2file(disk), b->score,
361f366f900SDavid du Colombier partname[b->part], b->addr);
3625e96a66cSDavid du Colombier blockSetIOState(b, BioReadError);
3635e96a66cSDavid du Colombier }else
3645e96a66cSDavid du Colombier blockSetIOState(b, BioClean);
3655e96a66cSDavid du Colombier break;
3665e96a66cSDavid du Colombier case BioWriting:
3675e96a66cSDavid du Colombier p = blockRollback(b, buf);
368f366f900SDavid du Colombier /* NB: ctime result ends with a newline */
3695e96a66cSDavid du Colombier if(!diskWriteRaw(disk, b->part, b->addr, p)){
370f366f900SDavid du Colombier fprint(2, "fossil: diskWriteRaw failed: %s: "
371f366f900SDavid du Colombier "score %V: date %s part=%s block %ud: %r\n",
372f366f900SDavid du Colombier disk2file(disk), b->score,
373f366f900SDavid du Colombier ctime(time(0)),
374f366f900SDavid du Colombier partname[b->part], b->addr);
3755e96a66cSDavid du Colombier break;
3765e96a66cSDavid du Colombier }
3775e96a66cSDavid du Colombier if(p != buf)
3785e96a66cSDavid du Colombier blockSetIOState(b, BioClean);
3795e96a66cSDavid du Colombier else
3805e96a66cSDavid du Colombier blockSetIOState(b, BioDirty);
3815e96a66cSDavid du Colombier break;
3825e96a66cSDavid du Colombier }
3835e96a66cSDavid du Colombier
3845e96a66cSDavid du Colombier blockPut(b); /* remove extra reference, unlock */
385*d7aba6c3SDavid du Colombier qlock(&disk->lk);
3865e96a66cSDavid du Colombier disk->nqueue--;
3875e96a66cSDavid du Colombier if(disk->nqueue == QueueSize-1)
388*d7aba6c3SDavid du Colombier rwakeup(&disk->flow);
3895e96a66cSDavid du Colombier if(disk->nqueue == 0)
390*d7aba6c3SDavid du Colombier rwakeup(&disk->flush);
391c39c2eb3SDavid du Colombier if(Timing)
3925e96a66cSDavid du Colombier nio++;
3935e96a66cSDavid du Colombier }
3945e96a66cSDavid du Colombier Done:
395dc5a79c1SDavid du Colombier //fprint(2, "diskThread done\n");
3965e96a66cSDavid du Colombier disk->ref--;
397*d7aba6c3SDavid du Colombier rwakeup(&disk->die);
398*d7aba6c3SDavid du Colombier qunlock(&disk->lk);
399*d7aba6c3SDavid du Colombier vtfree(buf);
4005e96a66cSDavid du Colombier }
401