1368c31abSDavid du Colombier /* 2368c31abSDavid du Colombier * Write the dirty icache entries to disk. Random seeks are 3368c31abSDavid du Colombier * so expensive that it makes sense to wait until we have 4368c31abSDavid du Colombier * a lot and then just make a sequential pass over the disk. 5368c31abSDavid du Colombier */ 6368c31abSDavid du Colombier #include "stdinc.h" 7368c31abSDavid du Colombier #include "dat.h" 8368c31abSDavid du Colombier #include "fns.h" 9368c31abSDavid du Colombier 10368c31abSDavid du Colombier static void icachewriteproc(void*); 11368c31abSDavid du Colombier static void icachewritecoord(void*); 12368c31abSDavid du Colombier static IEntry *iesort(IEntry*); 13368c31abSDavid du Colombier 14368c31abSDavid du Colombier int icachesleeptime = 1000; /* milliseconds */ 15368c31abSDavid du Colombier int minicachesleeptime = 50; 16368c31abSDavid du Colombier 17368c31abSDavid du Colombier enum 18368c31abSDavid du Colombier { 19368c31abSDavid du Colombier Bufsize = 8*1024*1024 20368c31abSDavid du Colombier }; 21368c31abSDavid du Colombier 22368c31abSDavid du Colombier typedef struct IWrite IWrite; 23368c31abSDavid du Colombier struct IWrite 24368c31abSDavid du Colombier { 25368c31abSDavid du Colombier Round round; 26368c31abSDavid du Colombier AState as; 27368c31abSDavid du Colombier }; 28368c31abSDavid du Colombier 29368c31abSDavid du Colombier static IWrite iwrite; 30368c31abSDavid du Colombier 31368c31abSDavid du Colombier void 32368c31abSDavid du Colombier initicachewrite(void) 33368c31abSDavid du Colombier { 34368c31abSDavid du Colombier int i; 35368c31abSDavid du Colombier Index *ix; 36368c31abSDavid du Colombier 37368c31abSDavid du Colombier initround(&iwrite.round, "icache", 120*60*1000); 38368c31abSDavid du Colombier ix = mainindex; 39368c31abSDavid du Colombier for(i=0; i<ix->nsects; i++){ 40368c31abSDavid du Colombier ix->sects[i]->writechan = chancreate(sizeof(ulong), 1); 41368c31abSDavid du Colombier ix->sects[i]->writedonechan = chancreate(sizeof(ulong), 1); 42368c31abSDavid du Colombier vtproc(icachewriteproc, ix->sects[i]); 43368c31abSDavid du Colombier } 44368c31abSDavid du Colombier vtproc(icachewritecoord, nil); 45368c31abSDavid du Colombier vtproc(delaykickroundproc, &iwrite.round); 46368c31abSDavid du Colombier } 47368c31abSDavid du Colombier 48368c31abSDavid du Colombier static IEntry* 49368c31abSDavid du Colombier nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr, uint *pnbuf) 50368c31abSDavid du Colombier { 51368c31abSDavid du Colombier u64int addr, naddr; 52368c31abSDavid du Colombier uint nbuf; 53368c31abSDavid du Colombier int bsize; 54368c31abSDavid du Colombier IEntry *iefirst, *ie, **l; 55368c31abSDavid du Colombier 56368c31abSDavid du Colombier bsize = 1<<is->blocklog; 57368c31abSDavid du Colombier iefirst = *pie; 58*29e26a97SDavid du Colombier addr = is->blockbase + ((u64int)(hashbits(iefirst->score, 32) / 59*29e26a97SDavid du Colombier ix->div - is->start) << is->blocklog); 60368c31abSDavid du Colombier nbuf = 0; 61368c31abSDavid du Colombier for(l = &iefirst->nextdirty; (ie = *l) != nil; l = &(*l)->nextdirty){ 62*29e26a97SDavid du Colombier naddr = is->blockbase + ((u64int)(hashbits(ie->score, 32) / 63*29e26a97SDavid du Colombier ix->div - is->start) << is->blocklog); 64368c31abSDavid du Colombier if(naddr - addr >= Bufsize) 65368c31abSDavid du Colombier break; 66368c31abSDavid du Colombier nbuf = naddr - addr; 67368c31abSDavid du Colombier } 68368c31abSDavid du Colombier nbuf += bsize; 69368c31abSDavid du Colombier 70368c31abSDavid du Colombier *l = nil; 71368c31abSDavid du Colombier *pie = ie; 72368c31abSDavid du Colombier *paddr = addr; 73368c31abSDavid du Colombier *pnbuf = nbuf; 74368c31abSDavid du Colombier return iefirst; 75368c31abSDavid du Colombier } 76368c31abSDavid du Colombier 77368c31abSDavid du Colombier static int 78368c31abSDavid du Colombier icachewritesect(Index *ix, ISect *is, u8int *buf) 79368c31abSDavid du Colombier { 80368c31abSDavid du Colombier int err, h, bsize, t; 81368c31abSDavid du Colombier u32int lo, hi; 82368c31abSDavid du Colombier u64int addr, naddr; 83368c31abSDavid du Colombier uint nbuf, off; 84368c31abSDavid du Colombier DBlock *b; 85368c31abSDavid du Colombier IBucket ib; 86368c31abSDavid du Colombier IEntry *ie, *iedirty, **l, *chunk; 87368c31abSDavid du Colombier 88368c31abSDavid du Colombier lo = is->start * ix->div; 89368c31abSDavid du Colombier if(TWID32/ix->div < is->stop) 90368c31abSDavid du Colombier hi = TWID32; 91368c31abSDavid du Colombier else 92368c31abSDavid du Colombier hi = is->stop * ix->div - 1; 93368c31abSDavid du Colombier 94*29e26a97SDavid du Colombier trace(TraceProc, "icachewritesect enter %ud %ud %llud", 95*29e26a97SDavid du Colombier lo, hi, iwrite.as.aa); 96368c31abSDavid du Colombier 97368c31abSDavid du Colombier iedirty = icachedirty(lo, hi, iwrite.as.aa); 98368c31abSDavid du Colombier iedirty = iesort(iedirty); 99368c31abSDavid du Colombier bsize = 1 << is->blocklog; 100368c31abSDavid du Colombier err = 0; 101368c31abSDavid du Colombier 102368c31abSDavid du Colombier while(iedirty){ 103368c31abSDavid du Colombier disksched(); 104368c31abSDavid du Colombier while((t = icachesleeptime) == SleepForever){ 105368c31abSDavid du Colombier sleep(1000); 106368c31abSDavid du Colombier disksched(); 107368c31abSDavid du Colombier } 108368c31abSDavid du Colombier if(t < minicachesleeptime) 109368c31abSDavid du Colombier t = minicachesleeptime; 110368c31abSDavid du Colombier sleep(t); 111368c31abSDavid du Colombier trace(TraceProc, "icachewritesect nextchunk"); 112368c31abSDavid du Colombier chunk = nextchunk(ix, is, &iedirty, &addr, &nbuf); 113368c31abSDavid du Colombier 114*29e26a97SDavid du Colombier trace(TraceProc, "icachewritesect readpart 0x%llux+0x%ux", 115*29e26a97SDavid du Colombier addr, nbuf); 116368c31abSDavid du Colombier if(readpart(is->part, addr, buf, nbuf) < 0){ 117*29e26a97SDavid du Colombier fprint(2, "%s: part %s addr 0x%llux: icachewritesect " 118*29e26a97SDavid du Colombier "readpart: %r\n", argv0, is->part->name, addr); 119368c31abSDavid du Colombier err = -1; 120368c31abSDavid du Colombier continue; 121368c31abSDavid du Colombier } 122368c31abSDavid du Colombier trace(TraceProc, "icachewritesect updatebuf"); 123368c31abSDavid du Colombier addstat(StatIsectReadBytes, nbuf); 124368c31abSDavid du Colombier addstat(StatIsectRead, 1); 125368c31abSDavid du Colombier 126368c31abSDavid du Colombier for(l=&chunk; (ie=*l)!=nil; l=&ie->nextdirty){ 127368c31abSDavid du Colombier again: 128*29e26a97SDavid du Colombier naddr = is->blockbase + ((u64int)(hashbits(ie->score, 129*29e26a97SDavid du Colombier 32) / ix->div - is->start) << is->blocklog); 130368c31abSDavid du Colombier off = naddr - addr; 131368c31abSDavid du Colombier if(off+bsize > nbuf){ 132*29e26a97SDavid du Colombier fprint(2, "%s: whoops! addr=0x%llux nbuf=%ud " 133*29e26a97SDavid du Colombier "addr+nbuf=0x%llux naddr=0x%llux\n", 134*29e26a97SDavid du Colombier argv0, addr, nbuf, addr+nbuf, naddr); 135368c31abSDavid du Colombier assert(off+bsize <= nbuf); 136368c31abSDavid du Colombier } 137368c31abSDavid du Colombier unpackibucket(&ib, buf+off, is->bucketmagic); 138368c31abSDavid du Colombier if(okibucket(&ib, is) < 0){ 139*29e26a97SDavid du Colombier fprint(2, "%s: bad bucket XXX\n", argv0); 140368c31abSDavid du Colombier goto skipit; 141368c31abSDavid du Colombier } 142*29e26a97SDavid du Colombier trace(TraceProc, "icachewritesect add %V at 0x%llux", 143*29e26a97SDavid du Colombier ie->score, naddr); 144368c31abSDavid du Colombier h = bucklook(ie->score, ie->ia.type, ib.data, ib.n); 145368c31abSDavid du Colombier if(h & 1){ 146368c31abSDavid du Colombier h ^= 1; 147368c31abSDavid du Colombier packientry(ie, &ib.data[h]); 148368c31abSDavid du Colombier }else if(ib.n < is->buckmax){ 149*29e26a97SDavid du Colombier memmove(&ib.data[h + IEntrySize], &ib.data[h], 150*29e26a97SDavid du Colombier ib.n*IEntrySize - h); 151368c31abSDavid du Colombier ib.n++; 152368c31abSDavid du Colombier packientry(ie, &ib.data[h]); 153368c31abSDavid du Colombier }else{ 154*29e26a97SDavid du Colombier fprint(2, "%s: bucket overflow XXX\n", argv0); 155368c31abSDavid du Colombier skipit: 156368c31abSDavid du Colombier err = -1; 157368c31abSDavid du Colombier *l = ie->nextdirty; 158368c31abSDavid du Colombier ie = *l; 159368c31abSDavid du Colombier if(ie) 160368c31abSDavid du Colombier goto again; 161368c31abSDavid du Colombier else 162368c31abSDavid du Colombier break; 163368c31abSDavid du Colombier } 164368c31abSDavid du Colombier packibucket(&ib, buf+off, is->bucketmagic); 165*29e26a97SDavid du Colombier /* 166*29e26a97SDavid du Colombier * XXX This is not quite right - it's good that we 167368c31abSDavid du Colombier * update the cached block (if any) here, but 168368c31abSDavid du Colombier * since the block doesn't get written until writepart 169368c31abSDavid du Colombier * below, we also need to make sure that the cache 170368c31abSDavid du Colombier * doesn't load the stale block before we write it to 171368c31abSDavid du Colombier * disk below. We could lock the disk cache during 172368c31abSDavid du Colombier * the writepart, but that's pretty annoying. 173368c31abSDavid du Colombier * Another possibility would be never to cache 174368c31abSDavid du Colombier * index partition blocks. The hit rate on those is 175368c31abSDavid du Colombier * miniscule anyway. 176368c31abSDavid du Colombier */ 177368c31abSDavid du Colombier if((b = _getdblock(is->part, naddr, ORDWR, 0)) != nil){ 178368c31abSDavid du Colombier memmove(b->data, buf+off, bsize); 179368c31abSDavid du Colombier putdblock(b); 180368c31abSDavid du Colombier } 181368c31abSDavid du Colombier } 182368c31abSDavid du Colombier 183368c31abSDavid du Colombier diskaccess(1); 184368c31abSDavid du Colombier 185368c31abSDavid du Colombier trace(TraceProc, "icachewritesect writepart", addr, nbuf); 186*29e26a97SDavid du Colombier if(writepart(is->part, addr, buf, nbuf) < 0 || 187*29e26a97SDavid du Colombier flushpart(is->part) < 0){ 188*29e26a97SDavid du Colombier fprint(2, "%s: part %s addr 0x%llux: icachewritesect " 189*29e26a97SDavid du Colombier "writepart: %r\n", argv0, is->part->name, addr); 190368c31abSDavid du Colombier continue; 191368c31abSDavid du Colombier } 192368c31abSDavid du Colombier addstat(StatIsectWriteBytes, nbuf); 193368c31abSDavid du Colombier addstat(StatIsectWrite, 1); 194368c31abSDavid du Colombier icacheclean(chunk); 195368c31abSDavid du Colombier } 196368c31abSDavid du Colombier 197368c31abSDavid du Colombier trace(TraceProc, "icachewritesect done"); 198368c31abSDavid du Colombier return err; 199368c31abSDavid du Colombier } 200368c31abSDavid du Colombier 201368c31abSDavid du Colombier static void 202368c31abSDavid du Colombier icachewriteproc(void *v) 203368c31abSDavid du Colombier { 204368c31abSDavid du Colombier int ret; 205368c31abSDavid du Colombier uint bsize; 206368c31abSDavid du Colombier ISect *is; 207368c31abSDavid du Colombier Index *ix; 208368c31abSDavid du Colombier u8int *buf; 209368c31abSDavid du Colombier 210368c31abSDavid du Colombier ix = mainindex; 211368c31abSDavid du Colombier is = v; 212368c31abSDavid du Colombier threadsetname("icachewriteproc:%s", is->part->name); 213368c31abSDavid du Colombier 214368c31abSDavid du Colombier bsize = 1<<is->blocklog; 215368c31abSDavid du Colombier buf = emalloc(Bufsize+bsize); 216368c31abSDavid du Colombier buf = (u8int*)(((ulong)buf+bsize-1)&~(ulong)(bsize-1)); 217368c31abSDavid du Colombier 218368c31abSDavid du Colombier for(;;){ 219368c31abSDavid du Colombier trace(TraceProc, "icachewriteproc recv"); 220368c31abSDavid du Colombier recv(is->writechan, 0); 221368c31abSDavid du Colombier trace(TraceWork, "start"); 222368c31abSDavid du Colombier ret = icachewritesect(ix, is, buf); 223368c31abSDavid du Colombier trace(TraceProc, "icachewriteproc send"); 224368c31abSDavid du Colombier trace(TraceWork, "finish"); 225368c31abSDavid du Colombier sendul(is->writedonechan, ret); 226368c31abSDavid du Colombier } 227368c31abSDavid du Colombier } 228368c31abSDavid du Colombier 229368c31abSDavid du Colombier static void 230368c31abSDavid du Colombier icachewritecoord(void *v) 231368c31abSDavid du Colombier { 232368c31abSDavid du Colombier int i, err; 233368c31abSDavid du Colombier Index *ix; 234368c31abSDavid du Colombier AState as; 235368c31abSDavid du Colombier 236368c31abSDavid du Colombier USED(v); 237368c31abSDavid du Colombier 238368c31abSDavid du Colombier threadsetname("icachewritecoord"); 239368c31abSDavid du Colombier 240368c31abSDavid du Colombier ix = mainindex; 241368c31abSDavid du Colombier iwrite.as = diskstate(); 242368c31abSDavid du Colombier 243368c31abSDavid du Colombier for(;;){ 244368c31abSDavid du Colombier trace(TraceProc, "icachewritecoord sleep"); 245368c31abSDavid du Colombier waitforkick(&iwrite.round); 246368c31abSDavid du Colombier trace(TraceWork, "start"); 247368c31abSDavid du Colombier as = diskstate(); 248368c31abSDavid du Colombier if(as.arena==iwrite.as.arena && as.aa==iwrite.as.aa){ 249368c31abSDavid du Colombier /* will not be able to do anything more than last flush - kick disk */ 250368c31abSDavid du Colombier trace(TraceProc, "icachewritecoord kick dcache"); 251368c31abSDavid du Colombier kickdcache(); 252368c31abSDavid du Colombier trace(TraceProc, "icachewritecoord kicked dcache"); 253368c31abSDavid du Colombier } 254368c31abSDavid du Colombier iwrite.as = as; 255368c31abSDavid du Colombier 256368c31abSDavid du Colombier trace(TraceProc, "icachewritecoord start flush"); 257368c31abSDavid du Colombier if(iwrite.as.arena){ 258368c31abSDavid du Colombier for(i=0; i<ix->nsects; i++) 259368c31abSDavid du Colombier send(ix->sects[i]->writechan, 0); 260368c31abSDavid du Colombier if(ix->bloom) 261368c31abSDavid du Colombier send(ix->bloom->writechan, 0); 262368c31abSDavid du Colombier 263368c31abSDavid du Colombier err = 0; 264368c31abSDavid du Colombier for(i=0; i<ix->nsects; i++) 265368c31abSDavid du Colombier err |= recvul(ix->sects[i]->writedonechan); 266368c31abSDavid du Colombier if(ix->bloom) 267368c31abSDavid du Colombier err |= recvul(ix->bloom->writedonechan); 268368c31abSDavid du Colombier 269368c31abSDavid du Colombier trace(TraceProc, "icachewritecoord donewrite err=%d", err); 270368c31abSDavid du Colombier if(err == 0) 271368c31abSDavid du Colombier setatailstate(&iwrite.as); 272368c31abSDavid du Colombier } 273368c31abSDavid du Colombier icacheclean(nil); /* wake up anyone waiting */ 274368c31abSDavid du Colombier trace(TraceWork, "finish"); 275368c31abSDavid du Colombier addstat(StatIcacheFlush, 1); 276368c31abSDavid du Colombier } 277368c31abSDavid du Colombier } 278368c31abSDavid du Colombier 279368c31abSDavid du Colombier void 280368c31abSDavid du Colombier flushicache(void) 281368c31abSDavid du Colombier { 282368c31abSDavid du Colombier trace(TraceProc, "flushicache enter"); 283368c31abSDavid du Colombier kickround(&iwrite.round, 1); 284368c31abSDavid du Colombier trace(TraceProc, "flushicache exit"); 285368c31abSDavid du Colombier } 286368c31abSDavid du Colombier 287368c31abSDavid du Colombier void 288368c31abSDavid du Colombier kickicache(void) 289368c31abSDavid du Colombier { 290368c31abSDavid du Colombier kickround(&iwrite.round, 0); 291368c31abSDavid du Colombier } 292368c31abSDavid du Colombier 293368c31abSDavid du Colombier void 294368c31abSDavid du Colombier delaykickicache(void) 295368c31abSDavid du Colombier { 296368c31abSDavid du Colombier delaykickround(&iwrite.round); 297368c31abSDavid du Colombier } 298368c31abSDavid du Colombier 299368c31abSDavid du Colombier static IEntry* 300368c31abSDavid du Colombier iesort(IEntry *ie) 301368c31abSDavid du Colombier { 302368c31abSDavid du Colombier int cmp; 303368c31abSDavid du Colombier IEntry **l; 304368c31abSDavid du Colombier IEntry *ie1, *ie2, *sorted; 305368c31abSDavid du Colombier 306368c31abSDavid du Colombier if(ie == nil || ie->nextdirty == nil) 307368c31abSDavid du Colombier return ie; 308368c31abSDavid du Colombier 309368c31abSDavid du Colombier /* split the lists */ 310368c31abSDavid du Colombier ie1 = ie; 311368c31abSDavid du Colombier ie2 = ie; 312368c31abSDavid du Colombier if(ie2) 313368c31abSDavid du Colombier ie2 = ie2->nextdirty; 314368c31abSDavid du Colombier if(ie2) 315368c31abSDavid du Colombier ie2 = ie2->nextdirty; 316368c31abSDavid du Colombier while(ie1 && ie2){ 317368c31abSDavid du Colombier ie1 = ie1->nextdirty; 318368c31abSDavid du Colombier ie2 = ie2->nextdirty; 319368c31abSDavid du Colombier if(ie2) 320368c31abSDavid du Colombier ie2 = ie2->nextdirty; 321368c31abSDavid du Colombier } 322368c31abSDavid du Colombier if(ie1){ 323368c31abSDavid du Colombier ie2 = ie1->nextdirty; 324368c31abSDavid du Colombier ie1->nextdirty = nil; 325368c31abSDavid du Colombier } 326368c31abSDavid du Colombier 327368c31abSDavid du Colombier /* sort the lists */ 328368c31abSDavid du Colombier ie1 = iesort(ie); 329368c31abSDavid du Colombier ie2 = iesort(ie2); 330368c31abSDavid du Colombier 331368c31abSDavid du Colombier /* merge the lists */ 332368c31abSDavid du Colombier sorted = nil; 333368c31abSDavid du Colombier l = &sorted; 334368c31abSDavid du Colombier cmp = 0; 335368c31abSDavid du Colombier while(ie1 || ie2){ 336368c31abSDavid du Colombier if(ie1 && ie2) 337368c31abSDavid du Colombier cmp = scorecmp(ie1->score, ie2->score); 338368c31abSDavid du Colombier if(ie1==nil || (ie2 && cmp > 0)){ 339368c31abSDavid du Colombier *l = ie2; 340368c31abSDavid du Colombier l = &ie2->nextdirty; 341368c31abSDavid du Colombier ie2 = ie2->nextdirty; 342368c31abSDavid du Colombier }else{ 343368c31abSDavid du Colombier *l = ie1; 344368c31abSDavid du Colombier l = &ie1->nextdirty; 345368c31abSDavid du Colombier ie1 = ie1->nextdirty; 346368c31abSDavid du Colombier } 347368c31abSDavid du Colombier } 348368c31abSDavid du Colombier *l = nil; 349368c31abSDavid du Colombier return sorted; 350368c31abSDavid du Colombier } 351368c31abSDavid du Colombier 352