1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
4
5 typedef struct ASum ASum;
6
7 struct ASum
8 {
9 Arena *arena;
10 ASum *next;
11 };
12
13 static void sealarena(Arena *arena);
14 static int okarena(Arena *arena);
15 static int loadarena(Arena *arena);
16 static CIBlock *getcib(Arena *arena, int clump, int writing, CIBlock *rock);
17 static void putcib(Arena *arena, CIBlock *cib);
18 static void sumproc(void *);
19 static void loadcig(Arena *arena);
20
21 static QLock sumlock;
22 static Rendez sumwait;
23 static ASum *sumq;
24 static ASum *sumqtail;
25 static uchar zero[8192];
26
27 int arenasumsleeptime;
28
29 int
initarenasum(void)30 initarenasum(void)
31 {
32 needzeroscore(); /* OS X */
33
34 sumwait.l = &sumlock;
35
36 if(vtproc(sumproc, nil) < 0){
37 seterr(EOk, "can't start arena checksum slave: %r");
38 return -1;
39 }
40 return 0;
41 }
42
43 /*
44 * make an Arena, and initialize it based upon the disk header and trailer.
45 */
46 Arena*
initarena(Part * part,u64int base,u64int size,u32int blocksize)47 initarena(Part *part, u64int base, u64int size, u32int blocksize)
48 {
49 Arena *arena;
50
51 arena = MKZ(Arena);
52 arena->part = part;
53 arena->blocksize = blocksize;
54 arena->clumpmax = arena->blocksize / ClumpInfoSize;
55 arena->base = base + blocksize;
56 arena->size = size - 2 * blocksize;
57
58 if(loadarena(arena) < 0){
59 seterr(ECorrupt, "arena header or trailer corrupted");
60 freearena(arena);
61 return nil;
62 }
63 if(okarena(arena) < 0){
64 freearena(arena);
65 return nil;
66 }
67
68 if(arena->diskstats.sealed && scorecmp(zeroscore, arena->score)==0)
69 sealarena(arena);
70
71 return arena;
72 }
73
74 void
freearena(Arena * arena)75 freearena(Arena *arena)
76 {
77 if(arena == nil)
78 return;
79 free(arena);
80 }
81
82 Arena*
newarena(Part * part,u32int vers,char * name,u64int base,u64int size,u32int blocksize)83 newarena(Part *part, u32int vers, char *name, u64int base, u64int size, u32int blocksize)
84 {
85 int bsize;
86 Arena *arena;
87
88 if(nameok(name) < 0){
89 seterr(EOk, "illegal arena name", name);
90 return nil;
91 }
92 arena = MKZ(Arena);
93 arena->part = part;
94 arena->version = vers;
95 if(vers == ArenaVersion4)
96 arena->clumpmagic = _ClumpMagic;
97 else{
98 do
99 arena->clumpmagic = fastrand();
100 while(arena->clumpmagic==_ClumpMagic || arena->clumpmagic==0);
101 }
102 arena->blocksize = blocksize;
103 arena->clumpmax = arena->blocksize / ClumpInfoSize;
104 arena->base = base + blocksize;
105 arena->size = size - 2 * blocksize;
106
107 namecp(arena->name, name);
108
109 bsize = sizeof zero;
110 if(bsize > arena->blocksize)
111 bsize = arena->blocksize;
112
113 if(wbarena(arena)<0 || wbarenahead(arena)<0
114 || writepart(arena->part, arena->base, zero, bsize)<0){
115 freearena(arena);
116 return nil;
117 }
118
119 return arena;
120 }
121
122 int
readclumpinfo(Arena * arena,int clump,ClumpInfo * ci)123 readclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
124 {
125 CIBlock *cib, r;
126
127 cib = getcib(arena, clump, 0, &r);
128 if(cib == nil)
129 return -1;
130 unpackclumpinfo(ci, &cib->data->data[cib->offset]);
131 putcib(arena, cib);
132 return 0;
133 }
134
135 int
readclumpinfos(Arena * arena,int clump,ClumpInfo * cis,int n)136 readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n)
137 {
138 CIBlock *cib, r;
139 int i;
140
141 /*
142 * because the clump blocks are laid out
143 * in reverse order at the end of the arena,
144 * it can be a few percent faster to read
145 * the clumps backwards, which reads the
146 * disk blocks forwards.
147 */
148 for(i = n-1; i >= 0; i--){
149 cib = getcib(arena, clump + i, 0, &r);
150 if(cib == nil){
151 n = i;
152 continue;
153 }
154 unpackclumpinfo(&cis[i], &cib->data->data[cib->offset]);
155 putcib(arena, cib);
156 }
157 return n;
158 }
159
160 /*
161 * write directory information for one clump
162 * must be called the arena locked
163 */
164 int
writeclumpinfo(Arena * arena,int clump,ClumpInfo * ci)165 writeclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
166 {
167 CIBlock *cib, r;
168
169 cib = getcib(arena, clump, 1, &r);
170 if(cib == nil)
171 return -1;
172 dirtydblock(cib->data, DirtyArenaCib);
173 packclumpinfo(ci, &cib->data->data[cib->offset]);
174 putcib(arena, cib);
175 return 0;
176 }
177
178 u64int
arenadirsize(Arena * arena,u32int clumps)179 arenadirsize(Arena *arena, u32int clumps)
180 {
181 return ((clumps / arena->clumpmax) + 1) * arena->blocksize;
182 }
183
184 /*
185 * read a clump of data
186 * n is a hint of the size of the data, not including the header
187 * make sure it won't run off the end, then return the number of bytes actually read
188 */
189 u32int
readarena(Arena * arena,u64int aa,u8int * buf,long n)190 readarena(Arena *arena, u64int aa, u8int *buf, long n)
191 {
192 DBlock *b;
193 u64int a;
194 u32int blocksize, off, m;
195 long nn;
196
197 if(n == 0)
198 return -1;
199
200 qlock(&arena->lock);
201 a = arena->size - arenadirsize(arena, arena->memstats.clumps);
202 qunlock(&arena->lock);
203 if(aa >= a){
204 seterr(EOk, "reading beyond arena clump storage: clumps=%d aa=%lld a=%lld -1 clumps=%lld\n", arena->memstats.clumps, aa, a, arena->size - arenadirsize(arena, arena->memstats.clumps - 1));
205 return -1;
206 }
207 if(aa + n > a)
208 n = a - aa;
209
210 blocksize = arena->blocksize;
211 a = arena->base + aa;
212 off = a & (blocksize - 1);
213 a -= off;
214 nn = 0;
215 for(;;){
216 b = getdblock(arena->part, a, OREAD);
217 if(b == nil)
218 return -1;
219 m = blocksize - off;
220 if(m > n - nn)
221 m = n - nn;
222 memmove(&buf[nn], &b->data[off], m);
223 putdblock(b);
224 nn += m;
225 if(nn == n)
226 break;
227 off = 0;
228 a += blocksize;
229 }
230 return n;
231 }
232
233 /*
234 * write some data to the clump section at a given offset
235 * used to fix up corrupted arenas.
236 */
237 u32int
writearena(Arena * arena,u64int aa,u8int * clbuf,u32int n)238 writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n)
239 {
240 DBlock *b;
241 u64int a;
242 u32int blocksize, off, m;
243 long nn;
244 int ok;
245
246 if(n == 0)
247 return -1;
248
249 qlock(&arena->lock);
250 a = arena->size - arenadirsize(arena, arena->memstats.clumps);
251 if(aa >= a || aa + n > a){
252 qunlock(&arena->lock);
253 seterr(EOk, "writing beyond arena clump storage");
254 return -1;
255 }
256
257 blocksize = arena->blocksize;
258 a = arena->base + aa;
259 off = a & (blocksize - 1);
260 a -= off;
261 nn = 0;
262 for(;;){
263 b = getdblock(arena->part, a, off != 0 || off + n < blocksize ? ORDWR : OWRITE);
264 if(b == nil){
265 qunlock(&arena->lock);
266 return -1;
267 }
268 dirtydblock(b, DirtyArena);
269 m = blocksize - off;
270 if(m > n - nn)
271 m = n - nn;
272 memmove(&b->data[off], &clbuf[nn], m);
273 ok = 0;
274 putdblock(b);
275 if(ok < 0){
276 qunlock(&arena->lock);
277 return -1;
278 }
279 nn += m;
280 if(nn == n)
281 break;
282 off = 0;
283 a += blocksize;
284 }
285 qunlock(&arena->lock);
286 return n;
287 }
288
289 /*
290 * allocate space for the clump and write it,
291 * updating the arena directory
292 ZZZ question: should this distinguish between an arena
293 filling up and real errors writing the clump?
294 */
295 u64int
writeaclump(Arena * arena,Clump * c,u8int * clbuf)296 writeaclump(Arena *arena, Clump *c, u8int *clbuf)
297 {
298 DBlock *b;
299 u64int a, aa;
300 u32int clump, n, nn, m, off, blocksize;
301 int ok;
302
303 n = c->info.size + ClumpSize + U32Size;
304 qlock(&arena->lock);
305 aa = arena->memstats.used;
306 if(arena->memstats.sealed
307 || aa + n + U32Size + arenadirsize(arena, arena->memstats.clumps + 1) > arena->size){
308 if(!arena->memstats.sealed){
309 logerr(EOk, "seal memstats %s", arena->name);
310 arena->memstats.sealed = 1;
311 wbarena(arena);
312 }
313 qunlock(&arena->lock);
314 return TWID64;
315 }
316 if(packclump(c, &clbuf[0], arena->clumpmagic) < 0){
317 qunlock(&arena->lock);
318 return TWID64;
319 }
320
321 /*
322 * write the data out one block at a time
323 */
324 blocksize = arena->blocksize;
325 a = arena->base + aa;
326 off = a & (blocksize - 1);
327 a -= off;
328 nn = 0;
329 for(;;){
330 b = getdblock(arena->part, a, off != 0 ? ORDWR : OWRITE);
331 if(b == nil){
332 qunlock(&arena->lock);
333 return TWID64;
334 }
335 dirtydblock(b, DirtyArena);
336 m = blocksize - off;
337 if(m > n - nn)
338 m = n - nn;
339 memmove(&b->data[off], &clbuf[nn], m);
340 ok = 0;
341 putdblock(b);
342 if(ok < 0){
343 qunlock(&arena->lock);
344 return TWID64;
345 }
346 nn += m;
347 if(nn == n)
348 break;
349 off = 0;
350 a += blocksize;
351 }
352
353 arena->memstats.used += c->info.size + ClumpSize;
354 arena->memstats.uncsize += c->info.uncsize;
355 if(c->info.size < c->info.uncsize)
356 arena->memstats.cclumps++;
357
358 clump = arena->memstats.clumps;
359 if(clump % ArenaCIGSize == 0){
360 if(arena->cig == nil){
361 loadcig(arena);
362 if(arena->cig == nil)
363 goto NoCIG;
364 }
365 /* add aa as start of next cig */
366 if(clump/ArenaCIGSize != arena->ncig){
367 fprint(2, "bad arena cig computation %s: writing clump %d but %d cigs\n",
368 arena->name, clump, arena->ncig);
369 arena->ncig = -1;
370 vtfree(arena->cig);
371 arena->cig = nil;
372 goto NoCIG;
373 }
374 arena->cig = vtrealloc(arena->cig, (arena->ncig+1)*sizeof arena->cig[0]);
375 arena->cig[arena->ncig++].offset = aa;
376 }
377 NoCIG:
378 arena->memstats.clumps++;
379
380 if(arena->memstats.clumps == 0)
381 sysfatal("clumps wrapped");
382 arena->wtime = now();
383 if(arena->ctime == 0)
384 arena->ctime = arena->wtime;
385
386 writeclumpinfo(arena, clump, &c->info);
387 wbarena(arena);
388
389 qunlock(&arena->lock);
390
391 return aa;
392 }
393
394 int
atailcmp(ATailStats * a,ATailStats * b)395 atailcmp(ATailStats *a, ATailStats *b)
396 {
397 /* good test */
398 if(a->used < b->used)
399 return -1;
400 if(a->used > b->used)
401 return 1;
402
403 /* suspect tests - why order this way? (no one cares) */
404 if(a->clumps < b->clumps)
405 return -1;
406 if(a->clumps > b->clumps)
407 return 1;
408 if(a->cclumps < b->cclumps)
409 return -1;
410 if(a->cclumps > b->cclumps)
411 return 1;
412 if(a->uncsize < b->uncsize)
413 return -1;
414 if(a->uncsize > b->uncsize)
415 return 1;
416 if(a->sealed < b->sealed)
417 return -1;
418 if(a->sealed > b->sealed)
419 return 1;
420
421 /* everything matches */
422 return 0;
423 }
424
425 void
setatailstate(AState * as)426 setatailstate(AState *as)
427 {
428 int i, j, osealed;
429 Arena *a;
430 Index *ix;
431
432 trace(0, "setatailstate %s 0x%llux clumps %d", as->arena->name, as->aa, as->stats.clumps);
433
434 /*
435 * Look up as->arena to find index.
436 */
437 needmainindex(); /* OS X linker */
438 ix = mainindex;
439 for(i=0; i<ix->narenas; i++)
440 if(ix->arenas[i] == as->arena)
441 break;
442 if(i==ix->narenas || as->aa < ix->amap[i].start || as->aa >= ix->amap[i].stop || as->arena != ix->arenas[i]){
443 fprint(2, "funny settailstate 0x%llux\n", as->aa);
444 return;
445 }
446
447 for(j=0; j<=i; j++){
448 a = ix->arenas[j];
449 if(atailcmp(&a->diskstats, &a->memstats) == 0)
450 continue;
451 qlock(&a->lock);
452 osealed = a->diskstats.sealed;
453 if(j == i)
454 a->diskstats = as->stats;
455 else
456 a->diskstats = a->memstats;
457 wbarena(a);
458 if(a->diskstats.sealed != osealed && !a->inqueue)
459 sealarena(a);
460 qunlock(&a->lock);
461 }
462 }
463
464 /*
465 * once sealed, an arena never has any data added to it.
466 * it should only be changed to fix errors.
467 * this also syncs the clump directory.
468 */
469 static void
sealarena(Arena * arena)470 sealarena(Arena *arena)
471 {
472 arena->inqueue = 1;
473 backsumarena(arena);
474 }
475
476 void
backsumarena(Arena * arena)477 backsumarena(Arena *arena)
478 {
479 ASum *as;
480
481 if(sumwait.l == nil)
482 return;
483
484 as = MK(ASum);
485 if(as == nil)
486 return;
487 qlock(&sumlock);
488 as->arena = arena;
489 as->next = nil;
490 if(sumq)
491 sumqtail->next = as;
492 else
493 sumq = as;
494 sumqtail = as;
495 rwakeup(&sumwait);
496 qunlock(&sumlock);
497 }
498
499 static void
sumproc(void * unused)500 sumproc(void *unused)
501 {
502 ASum *as;
503 Arena *arena;
504
505 USED(unused);
506
507 for(;;){
508 qlock(&sumlock);
509 while(sumq == nil)
510 rsleep(&sumwait);
511 as = sumq;
512 sumq = as->next;
513 qunlock(&sumlock);
514 arena = as->arena;
515 free(as);
516
517 sumarena(arena);
518 }
519 }
520
521 void
sumarena(Arena * arena)522 sumarena(Arena *arena)
523 {
524 ZBlock *b;
525 DigestState s;
526 u64int a, e;
527 u32int bs;
528 int t;
529 u8int score[VtScoreSize];
530
531 bs = MaxIoSize;
532 if(bs < arena->blocksize)
533 bs = arena->blocksize;
534
535 /*
536 * read & sum all blocks except the last one
537 */
538 flushdcache();
539 memset(&s, 0, sizeof s);
540 b = alloczblock(bs, 0, arena->part->blocksize);
541 e = arena->base + arena->size;
542 for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){
543 disksched();
544 while((t=arenasumsleeptime) == SleepForever){
545 sleep(1000);
546 disksched();
547 }
548 sleep(t);
549 if(a + bs > e)
550 bs = arena->blocksize;
551 if(readpart(arena->part, a, b->data, bs) < 0)
552 goto ReadErr;
553 addstat(StatSumRead, 1);
554 addstat(StatSumReadBytes, bs);
555 sha1(b->data, bs, nil, &s);
556 }
557
558 /*
559 * the last one is special, since it may already have the checksum included
560 */
561 bs = arena->blocksize;
562 if(readpart(arena->part, e, b->data, bs) < 0){
563 ReadErr:
564 logerr(EOk, "sumarena can't sum %s, read at %lld failed: %r", arena->name, a);
565 freezblock(b);
566 return;
567 }
568 addstat(StatSumRead, 1);
569 addstat(StatSumReadBytes, bs);
570
571 sha1(b->data, bs-VtScoreSize, nil, &s);
572 sha1(zeroscore, VtScoreSize, nil, &s);
573 sha1(nil, 0, score, &s);
574
575 /*
576 * check for no checksum or the same
577 */
578 if(scorecmp(score, &b->data[bs - VtScoreSize]) != 0
579 && scorecmp(zeroscore, &b->data[bs - VtScoreSize]) != 0)
580 logerr(EOk, "overwriting mismatched checksums for arena=%s, found=%V calculated=%V",
581 arena->name, &b->data[bs - VtScoreSize], score);
582 freezblock(b);
583
584 qlock(&arena->lock);
585 scorecp(arena->score, score);
586 wbarena(arena);
587 qunlock(&arena->lock);
588 }
589
590 /*
591 * write the arena trailer block to the partition
592 */
593 int
wbarena(Arena * arena)594 wbarena(Arena *arena)
595 {
596 DBlock *b;
597 int bad;
598
599 if((b = getdblock(arena->part, arena->base + arena->size, OWRITE)) == nil){
600 logerr(EAdmin, "can't write arena trailer: %r");
601 return -1;
602 }
603 dirtydblock(b, DirtyArenaTrailer);
604 bad = okarena(arena)<0 || packarena(arena, b->data)<0;
605 scorecp(b->data + arena->blocksize - VtScoreSize, arena->score);
606 putdblock(b);
607 if(bad)
608 return -1;
609 return 0;
610 }
611
612 int
wbarenahead(Arena * arena)613 wbarenahead(Arena *arena)
614 {
615 ZBlock *b;
616 ArenaHead head;
617 int bad;
618
619 namecp(head.name, arena->name);
620 head.version = arena->version;
621 head.size = arena->size + 2 * arena->blocksize;
622 head.blocksize = arena->blocksize;
623 head.clumpmagic = arena->clumpmagic;
624 b = alloczblock(arena->blocksize, 1, arena->part->blocksize);
625 if(b == nil){
626 logerr(EAdmin, "can't write arena header: %r");
627 /* ZZZ add error message? */
628 return -1;
629 }
630 /*
631 * this writepart is okay because it only happens
632 * during initialization.
633 */
634 bad = packarenahead(&head, b->data)<0 ||
635 writepart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize)<0 ||
636 flushpart(arena->part)<0;
637 freezblock(b);
638 if(bad)
639 return -1;
640 return 0;
641 }
642
643 /*
644 * read the arena header and trailer blocks from disk
645 */
646 static int
loadarena(Arena * arena)647 loadarena(Arena *arena)
648 {
649 ArenaHead head;
650 ZBlock *b;
651
652 b = alloczblock(arena->blocksize, 0, arena->part->blocksize);
653 if(b == nil)
654 return -1;
655 if(readpart(arena->part, arena->base + arena->size, b->data, arena->blocksize) < 0){
656 freezblock(b);
657 return -1;
658 }
659 if(unpackarena(arena, b->data) < 0){
660 freezblock(b);
661 return -1;
662 }
663 if(arena->version != ArenaVersion4 && arena->version != ArenaVersion5){
664 seterr(EAdmin, "unknown arena version %d", arena->version);
665 freezblock(b);
666 return -1;
667 }
668 scorecp(arena->score, &b->data[arena->blocksize - VtScoreSize]);
669
670 if(readpart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize) < 0){
671 logerr(EAdmin, "can't read arena header: %r");
672 freezblock(b);
673 return 0;
674 }
675 if(unpackarenahead(&head, b->data) < 0)
676 logerr(ECorrupt, "corrupted arena header: %r");
677 else if(namecmp(arena->name, head.name)!=0
678 || arena->clumpmagic != head.clumpmagic
679 || arena->version != head.version
680 || arena->blocksize != head.blocksize
681 || arena->size + 2 * arena->blocksize != head.size){
682 if(namecmp(arena->name, head.name)!=0)
683 logerr(ECorrupt, "arena tail name %s head %s",
684 arena->name, head.name);
685 else if(arena->clumpmagic != head.clumpmagic)
686 logerr(ECorrupt, "arena %d tail clumpmagic 0x%lux head 0x%lux",
687 debugarena, (ulong)arena->clumpmagic,
688 (ulong)head.clumpmagic);
689 else if(arena->version != head.version)
690 logerr(ECorrupt, "arena tail version %d head version %d",
691 arena->version, head.version);
692 else if(arena->blocksize != head.blocksize)
693 logerr(ECorrupt, "arena tail block size %d head %d",
694 arena->blocksize, head.blocksize);
695 else if(arena->size+2*arena->blocksize != head.size)
696 logerr(ECorrupt, "arena tail size %lud head %lud",
697 (ulong)arena->size+2*arena->blocksize, head.size);
698 else
699 logerr(ECorrupt, "arena header inconsistent with arena data");
700 }
701 freezblock(b);
702
703 return 0;
704 }
705
706 static int
okarena(Arena * arena)707 okarena(Arena *arena)
708 {
709 u64int dsize;
710 int ok;
711
712 ok = 0;
713 dsize = arenadirsize(arena, arena->diskstats.clumps);
714 if(arena->diskstats.used + dsize > arena->size){
715 seterr(ECorrupt, "arena %s used > size", arena->name);
716 ok = -1;
717 }
718
719 if(arena->diskstats.cclumps > arena->diskstats.clumps)
720 logerr(ECorrupt, "arena %s has more compressed clumps than total clumps", arena->name);
721
722 /*
723 * This need not be true if some of the disk is corrupted.
724 *
725 if(arena->diskstats.uncsize + arena->diskstats.clumps * ClumpSize + arena->blocksize < arena->diskstats.used)
726 logerr(ECorrupt, "arena %s uncompressed size inconsistent with used space %lld %d %lld", arena->name, arena->diskstats.uncsize, arena->diskstats.clumps, arena->diskstats.used);
727 */
728
729 /*
730 * this happens; it's harmless.
731 *
732 if(arena->ctime > arena->wtime)
733 logerr(ECorrupt, "arena %s creation time after last write time", arena->name);
734 */
735 return ok;
736 }
737
738 static CIBlock*
getcib(Arena * arena,int clump,int writing,CIBlock * rock)739 getcib(Arena *arena, int clump, int writing, CIBlock *rock)
740 {
741 int mode;
742 CIBlock *cib;
743 u32int block, off;
744
745 if(clump >= arena->memstats.clumps){
746 seterr(EOk, "clump directory access out of range");
747 return nil;
748 }
749 block = clump / arena->clumpmax;
750 off = (clump - block * arena->clumpmax) * ClumpInfoSize;
751 cib = rock;
752 cib->block = block;
753 cib->offset = off;
754
755 if(writing){
756 if(off == 0 && clump == arena->memstats.clumps-1)
757 mode = OWRITE;
758 else
759 mode = ORDWR;
760 }else
761 mode = OREAD;
762
763 cib->data = getdblock(arena->part,
764 arena->base + arena->size - (block + 1) * arena->blocksize, mode);
765 if(cib->data == nil)
766 return nil;
767 return cib;
768 }
769
770 static void
putcib(Arena * arena,CIBlock * cib)771 putcib(Arena *arena, CIBlock *cib)
772 {
773 USED(arena);
774
775 putdblock(cib->data);
776 cib->data = nil;
777 }
778
779
780 /*
781 * For index entry readahead purposes, the arenas are
782 * broken into smaller subpieces, called clump info groups
783 * or cigs. Each cig has ArenaCIGSize clumps (ArenaCIGSize
784 * is chosen to make the index entries take up about half
785 * a megabyte). The index entries do not contain enough
786 * information to determine what the clump index is for
787 * a given address in an arena. That info is needed both for
788 * figuring out which clump group an address belongs to
789 * and for prefetching a clump group's index entries from
790 * the arena table of contents. The first time clump groups
791 * are accessed, we scan the entire arena table of contents
792 * (which might be 10s of megabytes), recording the data
793 * offset of each clump group.
794 */
795
796 /*
797 * load clump info group information by scanning entire toc.
798 */
799 static void
loadcig(Arena * arena)800 loadcig(Arena *arena)
801 {
802 u32int i, j, ncig, nci;
803 ArenaCIG *cig;
804 ClumpInfo *ci;
805 u64int offset;
806 int ms;
807
808 if(arena->cig || arena->ncig < 0)
809 return;
810
811 // fprint(2, "loadcig %s\n", arena->name);
812
813 ncig = (arena->memstats.clumps+ArenaCIGSize-1) / ArenaCIGSize;
814 if(ncig == 0){
815 arena->cig = vtmalloc(1);
816 arena->ncig = 0;
817 return;
818 }
819
820 ms = msec();
821 cig = vtmalloc(ncig*sizeof cig[0]);
822 ci = vtmalloc(ArenaCIGSize*sizeof ci[0]);
823 offset = 0;
824 for(i=0; i<ncig; i++){
825 nci = readclumpinfos(arena, i*ArenaCIGSize, ci, ArenaCIGSize);
826 cig[i].offset = offset;
827 for(j=0; j<nci; j++)
828 offset += ClumpSize + ci[j].size;
829 if(nci < ArenaCIGSize){
830 if(i != ncig-1){
831 vtfree(ci);
832 vtfree(cig);
833 arena->ncig = -1;
834 fprint(2, "loadcig %s: got %ud cigs, expected %ud\n", arena->name, i+1, ncig);
835 goto out;
836 }
837 }
838 }
839 vtfree(ci);
840
841 arena->ncig = ncig;
842 arena->cig = cig;
843
844 out:
845 ms = msec() - ms;
846 addstat2(StatCigLoad, 1, StatCigLoadTime, ms);
847 }
848
849 /*
850 * convert arena address into arena group + data boundaries.
851 */
852 int
arenatog(Arena * arena,u64int addr,u64int * gstart,u64int * glimit,int * g)853 arenatog(Arena *arena, u64int addr, u64int *gstart, u64int *glimit, int *g)
854 {
855 int r, l, m;
856
857 qlock(&arena->lock);
858 if(arena->cig == nil)
859 loadcig(arena);
860 if(arena->cig == nil || arena->ncig == 0){
861 qunlock(&arena->lock);
862 return -1;
863 }
864
865 l = 1;
866 r = arena->ncig - 1;
867 while(l <= r){
868 m = (r + l) / 2;
869 if(arena->cig[m].offset <= addr)
870 l = m + 1;
871 else
872 r = m - 1;
873 }
874 l--;
875
876 *g = l;
877 *gstart = arena->cig[l].offset;
878 if(l+1 < arena->ncig)
879 *glimit = arena->cig[l+1].offset;
880 else
881 *glimit = arena->memstats.used;
882 qunlock(&arena->lock);
883 return 0;
884 }
885
886 /*
887 * load the clump info for group g into the index entries.
888 */
889 int
asumload(Arena * arena,int g,IEntry * entries,int nentries)890 asumload(Arena *arena, int g, IEntry *entries, int nentries)
891 {
892 int i, base, limit;
893 u64int addr;
894 ClumpInfo ci;
895 IEntry *ie;
896
897 if(nentries < ArenaCIGSize){
898 fprint(2, "asking for too few entries\n");
899 return -1;
900 }
901
902 qlock(&arena->lock);
903 if(arena->cig == nil)
904 loadcig(arena);
905 if(arena->cig == nil || arena->ncig == 0 || g >= arena->ncig){
906 qunlock(&arena->lock);
907 return -1;
908 }
909
910 addr = 0;
911 base = g*ArenaCIGSize;
912 limit = base + ArenaCIGSize;
913 if(base > arena->memstats.clumps)
914 base = arena->memstats.clumps;
915 ie = entries;
916 for(i=base; i<limit; i++){
917 if(readclumpinfo(arena, i, &ci) < 0)
918 break;
919 if(ci.type != VtCorruptType){
920 scorecp(ie->score, ci.score);
921 ie->ia.type = ci.type;
922 ie->ia.size = ci.uncsize;
923 ie->ia.blocks = (ci.size + ClumpSize + (1<<ABlockLog) - 1) >> ABlockLog;
924 ie->ia.addr = addr;
925 ie++;
926 }
927 addr += ClumpSize + ci.size;
928 }
929 qunlock(&arena->lock);
930 return ie - entries;
931 }
932