1*368c31abSDavid du Colombier #include "stdinc.h"
2*368c31abSDavid du Colombier #include "dat.h"
3*368c31abSDavid du Colombier #include "fns.h"
4*368c31abSDavid du Colombier
5*368c31abSDavid du Colombier Bloom b;
6*368c31abSDavid du Colombier
7*368c31abSDavid du Colombier void
usage(void)8*368c31abSDavid du Colombier usage(void)
9*368c31abSDavid du Colombier {
10*368c31abSDavid du Colombier fprint(2, "usage: fmtbloom [-s size] [-n nblocks | -N nhash] file\n");
11*368c31abSDavid du Colombier threadexitsall(0);
12*368c31abSDavid du Colombier }
13*368c31abSDavid du Colombier
14*368c31abSDavid du Colombier void
threadmain(int argc,char * argv[])15*368c31abSDavid du Colombier threadmain(int argc, char *argv[])
16*368c31abSDavid du Colombier {
17*368c31abSDavid du Colombier Part *part;
18*368c31abSDavid du Colombier char *file;
19*368c31abSDavid du Colombier vlong bits, size, size2;
20*368c31abSDavid du Colombier int nhash;
21*368c31abSDavid du Colombier vlong nblocks;
22*368c31abSDavid du Colombier
23*368c31abSDavid du Colombier ventifmtinstall();
24*368c31abSDavid du Colombier statsinit();
25*368c31abSDavid du Colombier
26*368c31abSDavid du Colombier size = 0;
27*368c31abSDavid du Colombier nhash = 0;
28*368c31abSDavid du Colombier nblocks = 0;
29*368c31abSDavid du Colombier ARGBEGIN{
30*368c31abSDavid du Colombier case 'n':
31*368c31abSDavid du Colombier if(nhash || nblocks)
32*368c31abSDavid du Colombier usage();
33*368c31abSDavid du Colombier nblocks = unittoull(EARGF(usage()));
34*368c31abSDavid du Colombier break;
35*368c31abSDavid du Colombier case 'N':
36*368c31abSDavid du Colombier if(nhash || nblocks)
37*368c31abSDavid du Colombier usage();
38*368c31abSDavid du Colombier nhash = unittoull(EARGF(usage()));
39*368c31abSDavid du Colombier if(nhash > BloomMaxHash){
40*368c31abSDavid du Colombier fprint(2, "maximum possible is -N %d", BloomMaxHash);
41*368c31abSDavid du Colombier usage();
42*368c31abSDavid du Colombier }
43*368c31abSDavid du Colombier break;
44*368c31abSDavid du Colombier case 's':
45*368c31abSDavid du Colombier size = unittoull(ARGF());
46*368c31abSDavid du Colombier if(size == ~0)
47*368c31abSDavid du Colombier usage();
48*368c31abSDavid du Colombier break;
49*368c31abSDavid du Colombier default:
50*368c31abSDavid du Colombier usage();
51*368c31abSDavid du Colombier break;
52*368c31abSDavid du Colombier }ARGEND
53*368c31abSDavid du Colombier
54*368c31abSDavid du Colombier if(argc != 1)
55*368c31abSDavid du Colombier usage();
56*368c31abSDavid du Colombier
57*368c31abSDavid du Colombier file = argv[0];
58*368c31abSDavid du Colombier
59*368c31abSDavid du Colombier part = initpart(file, ORDWR|ODIRECT);
60*368c31abSDavid du Colombier if(part == nil)
61*368c31abSDavid du Colombier sysfatal("can't open partition %s: %r", file);
62*368c31abSDavid du Colombier
63*368c31abSDavid du Colombier if(size == 0)
64*368c31abSDavid du Colombier size = part->size;
65*368c31abSDavid du Colombier
66*368c31abSDavid du Colombier if(size < 1024*1024)
67*368c31abSDavid du Colombier sysfatal("bloom filter too small");
68*368c31abSDavid du Colombier
69*368c31abSDavid du Colombier if(size > MaxBloomSize){
70*368c31abSDavid du Colombier fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n",
71*368c31abSDavid du Colombier size, (vlong)MaxBloomSize);
72*368c31abSDavid du Colombier size = MaxBloomSize;
73*368c31abSDavid du Colombier }
74*368c31abSDavid du Colombier if(size&(size-1)){
75*368c31abSDavid du Colombier for(size2=1; size2<size; size2*=2)
76*368c31abSDavid du Colombier ;
77*368c31abSDavid du Colombier size = size2/2;
78*368c31abSDavid du Colombier fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024);
79*368c31abSDavid du Colombier }
80*368c31abSDavid du Colombier
81*368c31abSDavid du Colombier if(nblocks){
82*368c31abSDavid du Colombier /*
83*368c31abSDavid du Colombier * no use for more than 32 bits per block
84*368c31abSDavid du Colombier * shoot for less than 64 bits per block
85*368c31abSDavid du Colombier */
86*368c31abSDavid du Colombier size2 = size;
87*368c31abSDavid du Colombier while(size2*8 >= nblocks*64)
88*368c31abSDavid du Colombier size2 >>= 1;
89*368c31abSDavid du Colombier if(size2 != size){
90*368c31abSDavid du Colombier size = size2;
91*368c31abSDavid du Colombier fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n",
92*368c31abSDavid du Colombier size/1024/1024);
93*368c31abSDavid du Colombier }
94*368c31abSDavid du Colombier
95*368c31abSDavid du Colombier /*
96*368c31abSDavid du Colombier * optimal is to use ln 2 times as many hash functions as we have bits per blocks.
97*368c31abSDavid du Colombier */
98*368c31abSDavid du Colombier bits = (8*size)/nblocks;
99*368c31abSDavid du Colombier nhash = bits*7/10;
100*368c31abSDavid du Colombier if(nhash > BloomMaxHash)
101*368c31abSDavid du Colombier nhash = BloomMaxHash;
102*368c31abSDavid du Colombier }
103*368c31abSDavid du Colombier if(!nhash)
104*368c31abSDavid du Colombier nhash = BloomMaxHash;
105*368c31abSDavid du Colombier if(bloominit(&b, size, nil) < 0)
106*368c31abSDavid du Colombier sysfatal("bloominit: %r");
107*368c31abSDavid du Colombier b.nhash = nhash;
108*368c31abSDavid du Colombier bits = nhash*10/7;
109*368c31abSDavid du Colombier nblocks = (8*size)/bits;
110*368c31abSDavid du Colombier fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size/1024/1024, nhash, nblocks);
111*368c31abSDavid du Colombier b.data = vtmallocz(size);
112*368c31abSDavid du Colombier b.part = part;
113*368c31abSDavid du Colombier if(writebloom(&b) < 0)
114*368c31abSDavid du Colombier sysfatal("writing %s: %r", file);
115*368c31abSDavid du Colombier threadexitsall(0);
116*368c31abSDavid du Colombier }
117