xref: /inferno-os/appl/lib/bloomfilter.b (revision 37da2899f40661e3e9631e497da8dc59b971cbd0)
1*37da2899SCharles.Forsythimplement Bloomfilter;
2*37da2899SCharles.Forsyth
3*37da2899SCharles.Forsythinclude "sys.m";
4*37da2899SCharles.Forsyth	sys: Sys;
5*37da2899SCharles.Forsythinclude "sets.m";
6*37da2899SCharles.Forsyth	sets: Sets;
7*37da2899SCharles.Forsyth	Set: import sets;
8*37da2899SCharles.Forsythinclude "keyring.m";
9*37da2899SCharles.Forsyth	keyring: Keyring;
10*37da2899SCharles.Forsythinclude "bloomfilter.m";
11*37da2899SCharles.Forsyth
12*37da2899SCharles.Forsythinit()
13*37da2899SCharles.Forsyth{
14*37da2899SCharles.Forsyth	sys = load Sys Sys->PATH;
15*37da2899SCharles.Forsyth	sets = load Sets Sets->PATH;
16*37da2899SCharles.Forsyth	sets->init();
17*37da2899SCharles.Forsyth	keyring = load Keyring Keyring->PATH;
18*37da2899SCharles.Forsyth}
19*37da2899SCharles.Forsyth
20*37da2899SCharles.ForsythBits: adt {
21*37da2899SCharles.Forsyth	d: array of byte;
22*37da2899SCharles.Forsyth	n: int;			# bits used
23*37da2899SCharles.Forsyth	get:	fn(bits: self ref Bits, n: int): int;
24*37da2899SCharles.Forsyth};
25*37da2899SCharles.Forsyth
26*37da2899SCharles.Forsythfilter(d: array of byte, logm, k: int): Set
27*37da2899SCharles.Forsyth{
28*37da2899SCharles.Forsyth	if(logm < 3 || logm > 30)
29*37da2899SCharles.Forsyth		raise "invalid bloom filter size";
30*37da2899SCharles.Forsyth	nb := 1 << logm;
31*37da2899SCharles.Forsyth	f := array[nb / 8 + 1] of {* => byte 0};	# one extra zero to make sure set's not inverted.
32*37da2899SCharles.Forsyth	bits := hashbits(d, logm * k);
33*37da2899SCharles.Forsyth	while(k--){
34*37da2899SCharles.Forsyth		v := bits.get(logm);
35*37da2899SCharles.Forsyth		f[v >> 3] |= byte 1 << (v & 7);
36*37da2899SCharles.Forsyth	}
37*37da2899SCharles.Forsyth	return sets->bytes2set(f);
38*37da2899SCharles.Forsyth}
39*37da2899SCharles.Forsyth
40*37da2899SCharles.Forsythhashbits(data: array of byte, n: int): ref Bits
41*37da2899SCharles.Forsyth{
42*37da2899SCharles.Forsyth	d := array[((n + 7) / 8)] of byte;
43*37da2899SCharles.Forsyth	digest := array[Keyring->SHA1dlen] of byte;
44*37da2899SCharles.Forsyth	state := keyring->sha1(data, len data, nil, nil);
45*37da2899SCharles.Forsyth	extra := array[2] of byte;
46*37da2899SCharles.Forsyth	e := 0;
47*37da2899SCharles.Forsyth	for(i := 0; i < len d; i += Keyring->SHA1dlen){
48*37da2899SCharles.Forsyth		extra[0] = byte e;
49*37da2899SCharles.Forsyth		extra[1] = byte (e>>8);
50*37da2899SCharles.Forsyth		e++;
51*37da2899SCharles.Forsyth		state = keyring->sha1(extra, len extra, digest, state);
52*37da2899SCharles.Forsyth		if(i + Keyring->SHA1dlen > len d)
53*37da2899SCharles.Forsyth			digest = digest[0:len d - i];
54*37da2899SCharles.Forsyth		d[i:] = digest;
55*37da2899SCharles.Forsyth	}
56*37da2899SCharles.Forsyth	return ref Bits(d, 0);
57*37da2899SCharles.Forsyth}
58*37da2899SCharles.Forsyth
59*37da2899SCharles.Forsyth# XXX could be more efficient.
60*37da2899SCharles.ForsythBits.get(bits: self ref Bits, n: int): int
61*37da2899SCharles.Forsyth{
62*37da2899SCharles.Forsyth	d := bits.d;
63*37da2899SCharles.Forsyth	v := 0;
64*37da2899SCharles.Forsyth	nb := bits.n;
65*37da2899SCharles.Forsyth	for(i := 0; i < n; i++){
66*37da2899SCharles.Forsyth		j := nb + i;
67*37da2899SCharles.Forsyth		if(int d[j >> 3] & (1 << (j & 7)))
68*37da2899SCharles.Forsyth			v |= (1 << i);
69*37da2899SCharles.Forsyth	}
70*37da2899SCharles.Forsyth	bits.n += n;
71*37da2899SCharles.Forsyth	return v;
72*37da2899SCharles.Forsyth}
73