xref: /plan9/sys/src/cmd/venti/srv/fmtbloom.c (revision 368c31ab13393dea083228fdd1c3445076f83a4b)
1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
4 
5 Bloom b;
6 
7 void
usage(void)8 usage(void)
9 {
10 	fprint(2, "usage: fmtbloom [-s size] [-n nblocks | -N nhash] file\n");
11 	threadexitsall(0);
12 }
13 
14 void
threadmain(int argc,char * argv[])15 threadmain(int argc, char *argv[])
16 {
17 	Part *part;
18 	char *file;
19 	vlong bits, size, size2;
20 	int nhash;
21 	vlong nblocks;
22 
23 	ventifmtinstall();
24 	statsinit();
25 
26 	size = 0;
27 	nhash = 0;
28 	nblocks = 0;
29 	ARGBEGIN{
30 	case 'n':
31 		if(nhash || nblocks)
32 			usage();
33 		nblocks = unittoull(EARGF(usage()));
34 		break;
35 	case 'N':
36 		if(nhash || nblocks)
37 			usage();
38 		nhash = unittoull(EARGF(usage()));
39 		if(nhash > BloomMaxHash){
40 			fprint(2, "maximum possible is -N %d", BloomMaxHash);
41 			usage();
42 		}
43 		break;
44 	case 's':
45 		size = unittoull(ARGF());
46 		if(size == ~0)
47 			usage();
48 		break;
49 	default:
50 		usage();
51 		break;
52 	}ARGEND
53 
54 	if(argc != 1)
55 		usage();
56 
57 	file = argv[0];
58 
59 	part = initpart(file, ORDWR|ODIRECT);
60 	if(part == nil)
61 		sysfatal("can't open partition %s: %r", file);
62 
63 	if(size == 0)
64 		size = part->size;
65 
66 	if(size < 1024*1024)
67 		sysfatal("bloom filter too small");
68 
69 	if(size > MaxBloomSize){
70 		fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n",
71 			size, (vlong)MaxBloomSize);
72 		size = MaxBloomSize;
73 	}
74 	if(size&(size-1)){
75 		for(size2=1; size2<size; size2*=2)
76 			;
77 		size = size2/2;
78 		fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024);
79 	}
80 
81 	if(nblocks){
82 		/*
83 		 * no use for more than 32 bits per block
84 		 * shoot for less than 64 bits per block
85 		 */
86 		size2 = size;
87 		while(size2*8 >= nblocks*64)
88 			size2 >>= 1;
89 		if(size2 != size){
90 			size = size2;
91 			fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n",
92 				size/1024/1024);
93 		}
94 
95 		/*
96 		 * optimal is to use ln 2 times as many hash functions as we have bits per blocks.
97 		 */
98 		bits = (8*size)/nblocks;
99 		nhash = bits*7/10;
100 		if(nhash > BloomMaxHash)
101 			nhash = BloomMaxHash;
102 	}
103 	if(!nhash)
104 		nhash = BloomMaxHash;
105 	if(bloominit(&b, size, nil) < 0)
106 		sysfatal("bloominit: %r");
107 	b.nhash = nhash;
108 	bits = nhash*10/7;
109 	nblocks = (8*size)/bits;
110 	fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size/1024/1024, nhash, nblocks);
111 	b.data = vtmallocz(size);
112 	b.part = part;
113 	if(writebloom(&b) < 0)
114 		sysfatal("writing %s: %r", file);
115 	threadexitsall(0);
116 }
117