1*368c31abSDavid du Colombier #include "stdinc.h"
2*368c31abSDavid du Colombier #include "dat.h"
3*368c31abSDavid du Colombier #include "fns.h"
4*368c31abSDavid du Colombier #include <bio.h>
5*368c31abSDavid du Colombier
6*368c31abSDavid du Colombier typedef struct IEBuck IEBuck;
7*368c31abSDavid du Colombier typedef struct IEBucks IEBucks;
8*368c31abSDavid du Colombier
9*368c31abSDavid du Colombier enum
10*368c31abSDavid du Colombier {
11*368c31abSDavid du Colombier ClumpChunks = 32*1024
12*368c31abSDavid du Colombier };
13*368c31abSDavid du Colombier
14*368c31abSDavid du Colombier struct IEBuck
15*368c31abSDavid du Colombier {
16*368c31abSDavid du Colombier u32int head; /* head of chain of chunks on the disk */
17*368c31abSDavid du Colombier u32int used; /* usage of the last chunk */
18*368c31abSDavid du Colombier u64int total; /* total number of bytes in this bucket */
19*368c31abSDavid du Colombier u8int *buf; /* chunk of entries for this bucket */
20*368c31abSDavid du Colombier };
21*368c31abSDavid du Colombier
22*368c31abSDavid du Colombier struct IEBucks
23*368c31abSDavid du Colombier {
24*368c31abSDavid du Colombier Part *part;
25*368c31abSDavid du Colombier u64int off; /* offset for writing data in the partition */
26*368c31abSDavid du Colombier u32int chunks; /* total chunks written to fd */
27*368c31abSDavid du Colombier u64int max; /* max bytes entered in any one bucket */
28*368c31abSDavid du Colombier int bits; /* number of bits in initial bucket sort */
29*368c31abSDavid du Colombier int nbucks; /* 1 << bits, the number of buckets */
30*368c31abSDavid du Colombier u32int size; /* bytes in each of the buckets chunks */
31*368c31abSDavid du Colombier u32int usable; /* amount usable for IEntry data */
32*368c31abSDavid du Colombier u8int *buf; /* buffer for all chunks */
33*368c31abSDavid du Colombier u8int *xbuf;
34*368c31abSDavid du Colombier IEBuck *bucks;
35*368c31abSDavid du Colombier };
36*368c31abSDavid du Colombier
37*368c31abSDavid du Colombier #define U32GET(p) (((p)[0]<<24)|((p)[1]<<16)|((p)[2]<<8)|(p)[3])
38*368c31abSDavid du Colombier #define U32PUT(p,v) (p)[0]=(v)>>24;(p)[1]=(v)>>16;(p)[2]=(v)>>8;(p)[3]=(v)
39*368c31abSDavid du Colombier
40*368c31abSDavid du Colombier static IEBucks *initiebucks(Part *part, int bits, u32int size);
41*368c31abSDavid du Colombier static int flushiebuck(IEBucks *ib, int b, int reset);
42*368c31abSDavid du Colombier static int flushiebucks(IEBucks *ib);
43*368c31abSDavid du Colombier static u32int sortiebuck(IEBucks *ib, int b);
44*368c31abSDavid du Colombier static u64int sortiebucks(IEBucks *ib);
45*368c31abSDavid du Colombier static int sprayientry(IEBucks *ib, IEntry *ie);
46*368c31abSDavid du Colombier static u32int readarenainfo(IEBucks *ib, Arena *arena, u64int a, Bloom *b);
47*368c31abSDavid du Colombier static u32int readiebuck(IEBucks *ib, int b);
48*368c31abSDavid du Colombier static void freeiebucks(IEBucks *ib);
49*368c31abSDavid du Colombier
50*368c31abSDavid du Colombier /*
51*368c31abSDavid du Colombier * build a sorted file with all IEntries which should be in ix.
52*368c31abSDavid du Colombier * assumes the arenas' directories are up to date.
53*368c31abSDavid du Colombier * reads each, converts the entries to index entries,
54*368c31abSDavid du Colombier * and sorts them.
55*368c31abSDavid du Colombier */
56*368c31abSDavid du Colombier u64int
sortrawientries(Index * ix,Part * tmp,u64int * base,Bloom * bloom)57*368c31abSDavid du Colombier sortrawientries(Index *ix, Part *tmp, u64int *base, Bloom *bloom)
58*368c31abSDavid du Colombier {
59*368c31abSDavid du Colombier IEBucks *ib;
60*368c31abSDavid du Colombier u64int clumps, sorted;
61*368c31abSDavid du Colombier u32int n;
62*368c31abSDavid du Colombier int i, ok;
63*368c31abSDavid du Colombier
64*368c31abSDavid du Colombier /* ZZZ should allow configuration of bits, bucket size */
65*368c31abSDavid du Colombier ib = initiebucks(tmp, 8, 64*1024);
66*368c31abSDavid du Colombier if(ib == nil){
67*368c31abSDavid du Colombier seterr(EOk, "can't create sorting buckets: %r");
68*368c31abSDavid du Colombier return TWID64;
69*368c31abSDavid du Colombier }
70*368c31abSDavid du Colombier ok = 0;
71*368c31abSDavid du Colombier clumps = 0;
72*368c31abSDavid du Colombier fprint(2, "constructing entry list\n");
73*368c31abSDavid du Colombier for(i = 0; i < ix->narenas; i++){
74*368c31abSDavid du Colombier n = readarenainfo(ib, ix->arenas[i], ix->amap[i].start, bloom);
75*368c31abSDavid du Colombier if(n == TWID32){
76*368c31abSDavid du Colombier ok = -1;
77*368c31abSDavid du Colombier break;
78*368c31abSDavid du Colombier }
79*368c31abSDavid du Colombier clumps += n;
80*368c31abSDavid du Colombier }
81*368c31abSDavid du Colombier fprint(2, "sorting %lld entries\n", clumps);
82*368c31abSDavid du Colombier if(ok == 0){
83*368c31abSDavid du Colombier sorted = sortiebucks(ib);
84*368c31abSDavid du Colombier *base = (u64int)ib->chunks * ib->size;
85*368c31abSDavid du Colombier if(sorted != clumps){
86*368c31abSDavid du Colombier fprint(2, "sorting messed up: clumps=%lld sorted=%lld\n", clumps, sorted);
87*368c31abSDavid du Colombier ok = -1;
88*368c31abSDavid du Colombier }
89*368c31abSDavid du Colombier }
90*368c31abSDavid du Colombier freeiebucks(ib);
91*368c31abSDavid du Colombier if(ok < 0)
92*368c31abSDavid du Colombier return TWID64;
93*368c31abSDavid du Colombier return clumps;
94*368c31abSDavid du Colombier }
95*368c31abSDavid du Colombier
96*368c31abSDavid du Colombier #define CHECK(cis) if(((ulong*)cis)[-4] != 0xA110C09) xabort();
97*368c31abSDavid du Colombier
98*368c31abSDavid du Colombier void
xabort(void)99*368c31abSDavid du Colombier xabort(void)
100*368c31abSDavid du Colombier {
101*368c31abSDavid du Colombier int *x;
102*368c31abSDavid du Colombier
103*368c31abSDavid du Colombier x = 0;
104*368c31abSDavid du Colombier *x = 0;
105*368c31abSDavid du Colombier }
106*368c31abSDavid du Colombier
107*368c31abSDavid du Colombier /*
108*368c31abSDavid du Colombier * read in all of the arena's clump directory,
109*368c31abSDavid du Colombier * convert to IEntry format, and bucket sort based
110*368c31abSDavid du Colombier * on the first few bits.
111*368c31abSDavid du Colombier */
112*368c31abSDavid du Colombier static u32int
readarenainfo(IEBucks * ib,Arena * arena,u64int a,Bloom * b)113*368c31abSDavid du Colombier readarenainfo(IEBucks *ib, Arena *arena, u64int a, Bloom *b)
114*368c31abSDavid du Colombier {
115*368c31abSDavid du Colombier IEntry ie;
116*368c31abSDavid du Colombier ClumpInfo *ci, *cis;
117*368c31abSDavid du Colombier u32int clump;
118*368c31abSDavid du Colombier int i, n, ok, nskip;
119*368c31abSDavid du Colombier
120*368c31abSDavid du Colombier if(arena->memstats.clumps)
121*368c31abSDavid du Colombier fprint(2, "\tarena %s: %d entries\n", arena->name, arena->memstats.clumps);
122*368c31abSDavid du Colombier else
123*368c31abSDavid du Colombier fprint(2, "[%s] ", arena->name);
124*368c31abSDavid du Colombier
125*368c31abSDavid du Colombier cis = MKN(ClumpInfo, ClumpChunks);
126*368c31abSDavid du Colombier ok = 0;
127*368c31abSDavid du Colombier nskip = 0;
128*368c31abSDavid du Colombier memset(&ie, 0, sizeof(IEntry));
129*368c31abSDavid du Colombier for(clump = 0; clump < arena->memstats.clumps; clump += n){
130*368c31abSDavid du Colombier n = ClumpChunks;
131*368c31abSDavid du Colombier if(n > arena->memstats.clumps - clump)
132*368c31abSDavid du Colombier n = arena->memstats.clumps - clump;
133*368c31abSDavid du Colombier if(readclumpinfos(arena, clump, cis, n) != n){
134*368c31abSDavid du Colombier seterr(EOk, "arena directory read failed: %r");
135*368c31abSDavid du Colombier ok = -1;
136*368c31abSDavid du Colombier break;
137*368c31abSDavid du Colombier }
138*368c31abSDavid du Colombier
139*368c31abSDavid du Colombier for(i = 0; i < n; i++){
140*368c31abSDavid du Colombier ci = &cis[i];
141*368c31abSDavid du Colombier ie.ia.type = ci->type;
142*368c31abSDavid du Colombier ie.ia.size = ci->uncsize;
143*368c31abSDavid du Colombier ie.ia.addr = a;
144*368c31abSDavid du Colombier a += ci->size + ClumpSize;
145*368c31abSDavid du Colombier ie.ia.blocks = (ci->size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
146*368c31abSDavid du Colombier scorecp(ie.score, ci->score);
147*368c31abSDavid du Colombier if(ci->type == VtCorruptType){
148*368c31abSDavid du Colombier if(0) print("! %V %22lld %3d %5d %3d\n",
149*368c31abSDavid du Colombier ie.score, ie.ia.addr, ie.ia.type, ie.ia.size, ie.ia.blocks);
150*368c31abSDavid du Colombier nskip++;
151*368c31abSDavid du Colombier }else
152*368c31abSDavid du Colombier sprayientry(ib, &ie);
153*368c31abSDavid du Colombier markbloomfilter(b, ie.score);
154*368c31abSDavid du Colombier }
155*368c31abSDavid du Colombier }
156*368c31abSDavid du Colombier free(cis);
157*368c31abSDavid du Colombier if(ok < 0)
158*368c31abSDavid du Colombier return TWID32;
159*368c31abSDavid du Colombier return clump - nskip;
160*368c31abSDavid du Colombier }
161*368c31abSDavid du Colombier
162*368c31abSDavid du Colombier /*
163*368c31abSDavid du Colombier * initialize the external bucket sorting data structures
164*368c31abSDavid du Colombier */
165*368c31abSDavid du Colombier static IEBucks*
initiebucks(Part * part,int bits,u32int size)166*368c31abSDavid du Colombier initiebucks(Part *part, int bits, u32int size)
167*368c31abSDavid du Colombier {
168*368c31abSDavid du Colombier IEBucks *ib;
169*368c31abSDavid du Colombier int i;
170*368c31abSDavid du Colombier
171*368c31abSDavid du Colombier ib = MKZ(IEBucks);
172*368c31abSDavid du Colombier if(ib == nil){
173*368c31abSDavid du Colombier seterr(EOk, "out of memory");
174*368c31abSDavid du Colombier return nil;
175*368c31abSDavid du Colombier }
176*368c31abSDavid du Colombier ib->bits = bits;
177*368c31abSDavid du Colombier ib->nbucks = 1 << bits;
178*368c31abSDavid du Colombier ib->size = size;
179*368c31abSDavid du Colombier ib->usable = (size - U32Size) / IEntrySize * IEntrySize;
180*368c31abSDavid du Colombier ib->bucks = MKNZ(IEBuck, ib->nbucks);
181*368c31abSDavid du Colombier if(ib->bucks == nil){
182*368c31abSDavid du Colombier seterr(EOk, "out of memory allocation sorting buckets");
183*368c31abSDavid du Colombier freeiebucks(ib);
184*368c31abSDavid du Colombier return nil;
185*368c31abSDavid du Colombier }
186*368c31abSDavid du Colombier ib->xbuf = MKN(u8int, size * ((1 << bits)+1));
187*368c31abSDavid du Colombier ib->buf = (u8int*)(((uintptr)ib->xbuf+size-1)&~(uintptr)(size-1));
188*368c31abSDavid du Colombier if(ib->buf == nil){
189*368c31abSDavid du Colombier seterr(EOk, "out of memory allocating sorting buckets' buffers");
190*368c31abSDavid du Colombier freeiebucks(ib);
191*368c31abSDavid du Colombier return nil;
192*368c31abSDavid du Colombier }
193*368c31abSDavid du Colombier for(i = 0; i < ib->nbucks; i++){
194*368c31abSDavid du Colombier ib->bucks[i].head = TWID32;
195*368c31abSDavid du Colombier ib->bucks[i].buf = &ib->buf[i * size];
196*368c31abSDavid du Colombier }
197*368c31abSDavid du Colombier ib->part = part;
198*368c31abSDavid du Colombier return ib;
199*368c31abSDavid du Colombier }
200*368c31abSDavid du Colombier
201*368c31abSDavid du Colombier static void
freeiebucks(IEBucks * ib)202*368c31abSDavid du Colombier freeiebucks(IEBucks *ib)
203*368c31abSDavid du Colombier {
204*368c31abSDavid du Colombier if(ib == nil)
205*368c31abSDavid du Colombier return;
206*368c31abSDavid du Colombier free(ib->bucks);
207*368c31abSDavid du Colombier free(ib->buf);
208*368c31abSDavid du Colombier free(ib);
209*368c31abSDavid du Colombier }
210*368c31abSDavid du Colombier
211*368c31abSDavid du Colombier /*
212*368c31abSDavid du Colombier * initial sort: put the entry into the correct bucket
213*368c31abSDavid du Colombier */
214*368c31abSDavid du Colombier static int
sprayientry(IEBucks * ib,IEntry * ie)215*368c31abSDavid du Colombier sprayientry(IEBucks *ib, IEntry *ie)
216*368c31abSDavid du Colombier {
217*368c31abSDavid du Colombier u32int n;
218*368c31abSDavid du Colombier int b;
219*368c31abSDavid du Colombier
220*368c31abSDavid du Colombier b = hashbits(ie->score, ib->bits);
221*368c31abSDavid du Colombier n = ib->bucks[b].used;
222*368c31abSDavid du Colombier if(n + IEntrySize > ib->usable){
223*368c31abSDavid du Colombier /* should be flushed below, but if flush fails, this can happen */
224*368c31abSDavid du Colombier seterr(EOk, "out of space in bucket");
225*368c31abSDavid du Colombier return -1;
226*368c31abSDavid du Colombier }
227*368c31abSDavid du Colombier packientry(ie, &ib->bucks[b].buf[n]);
228*368c31abSDavid du Colombier n += IEntrySize;
229*368c31abSDavid du Colombier ib->bucks[b].used = n;
230*368c31abSDavid du Colombier if(n + IEntrySize <= ib->usable)
231*368c31abSDavid du Colombier return 0;
232*368c31abSDavid du Colombier return flushiebuck(ib, b, 1);
233*368c31abSDavid du Colombier }
234*368c31abSDavid du Colombier
235*368c31abSDavid du Colombier /*
236*368c31abSDavid du Colombier * finish sorting:
237*368c31abSDavid du Colombier * for each bucket, read it in and sort it
238*368c31abSDavid du Colombier * write out the the final file
239*368c31abSDavid du Colombier */
240*368c31abSDavid du Colombier static u64int
sortiebucks(IEBucks * ib)241*368c31abSDavid du Colombier sortiebucks(IEBucks *ib)
242*368c31abSDavid du Colombier {
243*368c31abSDavid du Colombier u64int tot;
244*368c31abSDavid du Colombier u32int n;
245*368c31abSDavid du Colombier int i;
246*368c31abSDavid du Colombier
247*368c31abSDavid du Colombier if(flushiebucks(ib) < 0)
248*368c31abSDavid du Colombier return TWID64;
249*368c31abSDavid du Colombier for(i = 0; i < ib->nbucks; i++)
250*368c31abSDavid du Colombier ib->bucks[i].buf = nil;
251*368c31abSDavid du Colombier ib->off = (u64int)ib->chunks * ib->size;
252*368c31abSDavid du Colombier free(ib->xbuf);
253*368c31abSDavid du Colombier
254*368c31abSDavid du Colombier ib->buf = MKN(u8int, ib->max + U32Size);
255*368c31abSDavid du Colombier if(ib->buf == nil){
256*368c31abSDavid du Colombier seterr(EOk, "out of memory allocating final sorting buffer; try more buckets");
257*368c31abSDavid du Colombier return TWID64;
258*368c31abSDavid du Colombier }
259*368c31abSDavid du Colombier tot = 0;
260*368c31abSDavid du Colombier for(i = 0; i < ib->nbucks; i++){
261*368c31abSDavid du Colombier n = sortiebuck(ib, i);
262*368c31abSDavid du Colombier if(n == TWID32)
263*368c31abSDavid du Colombier return TWID64;
264*368c31abSDavid du Colombier if(n != ib->bucks[i].total/IEntrySize)
265*368c31abSDavid du Colombier fprint(2, "bucket %d changed count %d => %d\n",
266*368c31abSDavid du Colombier i, (int)(ib->bucks[i].total/IEntrySize), n);
267*368c31abSDavid du Colombier tot += n;
268*368c31abSDavid du Colombier }
269*368c31abSDavid du Colombier return tot;
270*368c31abSDavid du Colombier }
271*368c31abSDavid du Colombier
272*368c31abSDavid du Colombier /*
273*368c31abSDavid du Colombier * sort from bucket b of ib into the output file to
274*368c31abSDavid du Colombier */
275*368c31abSDavid du Colombier static u32int
sortiebuck(IEBucks * ib,int b)276*368c31abSDavid du Colombier sortiebuck(IEBucks *ib, int b)
277*368c31abSDavid du Colombier {
278*368c31abSDavid du Colombier u32int n;
279*368c31abSDavid du Colombier
280*368c31abSDavid du Colombier n = readiebuck(ib, b);
281*368c31abSDavid du Colombier if(n == TWID32)
282*368c31abSDavid du Colombier return TWID32;
283*368c31abSDavid du Colombier qsort(ib->buf, n, IEntrySize, ientrycmp);
284*368c31abSDavid du Colombier if(writepart(ib->part, ib->off, ib->buf, n*IEntrySize) < 0){
285*368c31abSDavid du Colombier seterr(EOk, "can't write sorted bucket: %r");
286*368c31abSDavid du Colombier return TWID32;
287*368c31abSDavid du Colombier }
288*368c31abSDavid du Colombier ib->off += n * IEntrySize;
289*368c31abSDavid du Colombier return n;
290*368c31abSDavid du Colombier }
291*368c31abSDavid du Colombier
292*368c31abSDavid du Colombier /*
293*368c31abSDavid du Colombier * write out a single bucket
294*368c31abSDavid du Colombier */
295*368c31abSDavid du Colombier static int
flushiebuck(IEBucks * ib,int b,int reset)296*368c31abSDavid du Colombier flushiebuck(IEBucks *ib, int b, int reset)
297*368c31abSDavid du Colombier {
298*368c31abSDavid du Colombier u32int n;
299*368c31abSDavid du Colombier
300*368c31abSDavid du Colombier if(ib->bucks[b].used == 0)
301*368c31abSDavid du Colombier return 0;
302*368c31abSDavid du Colombier n = ib->bucks[b].used;
303*368c31abSDavid du Colombier U32PUT(&ib->bucks[b].buf[n], ib->bucks[b].head);
304*368c31abSDavid du Colombier n += U32Size;
305*368c31abSDavid du Colombier USED(n);
306*368c31abSDavid du Colombier if(writepart(ib->part, (u64int)ib->chunks * ib->size, ib->bucks[b].buf, ib->size) < 0){
307*368c31abSDavid du Colombier seterr(EOk, "can't write sorting bucket to file: %r");
308*368c31abSDavid du Colombier xabort();
309*368c31abSDavid du Colombier return -1;
310*368c31abSDavid du Colombier }
311*368c31abSDavid du Colombier ib->bucks[b].head = ib->chunks++;
312*368c31abSDavid du Colombier ib->bucks[b].total += ib->bucks[b].used;
313*368c31abSDavid du Colombier if(reset)
314*368c31abSDavid du Colombier ib->bucks[b].used = 0;
315*368c31abSDavid du Colombier return 0;
316*368c31abSDavid du Colombier }
317*368c31abSDavid du Colombier
318*368c31abSDavid du Colombier /*
319*368c31abSDavid du Colombier * write out all of the buckets, and compute
320*368c31abSDavid du Colombier * the maximum size of any bucket
321*368c31abSDavid du Colombier */
322*368c31abSDavid du Colombier static int
flushiebucks(IEBucks * ib)323*368c31abSDavid du Colombier flushiebucks(IEBucks *ib)
324*368c31abSDavid du Colombier {
325*368c31abSDavid du Colombier int i;
326*368c31abSDavid du Colombier
327*368c31abSDavid du Colombier for(i = 0; i < ib->nbucks; i++){
328*368c31abSDavid du Colombier if(flushiebuck(ib, i, 0) < 0)
329*368c31abSDavid du Colombier return -1;
330*368c31abSDavid du Colombier if(ib->bucks[i].total > ib->max)
331*368c31abSDavid du Colombier ib->max = ib->bucks[i].total;
332*368c31abSDavid du Colombier }
333*368c31abSDavid du Colombier return 0;
334*368c31abSDavid du Colombier }
335*368c31abSDavid du Colombier
336*368c31abSDavid du Colombier /*
337*368c31abSDavid du Colombier * read in the chained buffers for bucket b,
338*368c31abSDavid du Colombier * and return it's total number of IEntries
339*368c31abSDavid du Colombier */
340*368c31abSDavid du Colombier static u32int
readiebuck(IEBucks * ib,int b)341*368c31abSDavid du Colombier readiebuck(IEBucks *ib, int b)
342*368c31abSDavid du Colombier {
343*368c31abSDavid du Colombier u32int head, m, n;
344*368c31abSDavid du Colombier
345*368c31abSDavid du Colombier head = ib->bucks[b].head;
346*368c31abSDavid du Colombier n = 0;
347*368c31abSDavid du Colombier m = ib->bucks[b].used;
348*368c31abSDavid du Colombier if(m == 0)
349*368c31abSDavid du Colombier m = ib->usable;
350*368c31abSDavid du Colombier if(0) if(ib->bucks[b].total)
351*368c31abSDavid du Colombier fprint(2, "\tbucket %d: %lld entries\n", b, ib->bucks[b].total/IEntrySize);
352*368c31abSDavid du Colombier while(head != TWID32){
353*368c31abSDavid du Colombier if(readpart(ib->part, (u64int)head * ib->size, &ib->buf[n], m+U32Size) < 0){
354*368c31abSDavid du Colombier seterr(EOk, "can't read index sort bucket: %r");
355*368c31abSDavid du Colombier return TWID32;
356*368c31abSDavid du Colombier }
357*368c31abSDavid du Colombier n += m;
358*368c31abSDavid du Colombier head = U32GET(&ib->buf[n]);
359*368c31abSDavid du Colombier m = ib->usable;
360*368c31abSDavid du Colombier }
361*368c31abSDavid du Colombier if(n != ib->bucks[b].total)
362*368c31abSDavid du Colombier fprint(2, "\tbucket %d: expected %d entries, got %d\n",
363*368c31abSDavid du Colombier b, (int)ib->bucks[b].total/IEntrySize, n/IEntrySize);
364*368c31abSDavid du Colombier return n / IEntrySize;
365*368c31abSDavid du Colombier }
366