1*37da2899SCharles.Forsythimplement Bloomfilter; 2*37da2899SCharles.Forsyth 3*37da2899SCharles.Forsythinclude "sys.m"; 4*37da2899SCharles.Forsyth sys: Sys; 5*37da2899SCharles.Forsythinclude "sets.m"; 6*37da2899SCharles.Forsyth sets: Sets; 7*37da2899SCharles.Forsyth Set: import sets; 8*37da2899SCharles.Forsythinclude "keyring.m"; 9*37da2899SCharles.Forsyth keyring: Keyring; 10*37da2899SCharles.Forsythinclude "bloomfilter.m"; 11*37da2899SCharles.Forsyth 12*37da2899SCharles.Forsythinit() 13*37da2899SCharles.Forsyth{ 14*37da2899SCharles.Forsyth sys = load Sys Sys->PATH; 15*37da2899SCharles.Forsyth sets = load Sets Sets->PATH; 16*37da2899SCharles.Forsyth sets->init(); 17*37da2899SCharles.Forsyth keyring = load Keyring Keyring->PATH; 18*37da2899SCharles.Forsyth} 19*37da2899SCharles.Forsyth 20*37da2899SCharles.ForsythBits: adt { 21*37da2899SCharles.Forsyth d: array of byte; 22*37da2899SCharles.Forsyth n: int; # bits used 23*37da2899SCharles.Forsyth get: fn(bits: self ref Bits, n: int): int; 24*37da2899SCharles.Forsyth}; 25*37da2899SCharles.Forsyth 26*37da2899SCharles.Forsythfilter(d: array of byte, logm, k: int): Set 27*37da2899SCharles.Forsyth{ 28*37da2899SCharles.Forsyth if(logm < 3 || logm > 30) 29*37da2899SCharles.Forsyth raise "invalid bloom filter size"; 30*37da2899SCharles.Forsyth nb := 1 << logm; 31*37da2899SCharles.Forsyth f := array[nb / 8 + 1] of {* => byte 0}; # one extra zero to make sure set's not inverted. 32*37da2899SCharles.Forsyth bits := hashbits(d, logm * k); 33*37da2899SCharles.Forsyth while(k--){ 34*37da2899SCharles.Forsyth v := bits.get(logm); 35*37da2899SCharles.Forsyth f[v >> 3] |= byte 1 << (v & 7); 36*37da2899SCharles.Forsyth } 37*37da2899SCharles.Forsyth return sets->bytes2set(f); 38*37da2899SCharles.Forsyth} 39*37da2899SCharles.Forsyth 40*37da2899SCharles.Forsythhashbits(data: array of byte, n: int): ref Bits 41*37da2899SCharles.Forsyth{ 42*37da2899SCharles.Forsyth d := array[((n + 7) / 8)] of byte; 43*37da2899SCharles.Forsyth digest := array[Keyring->SHA1dlen] of byte; 44*37da2899SCharles.Forsyth state := keyring->sha1(data, len data, nil, nil); 45*37da2899SCharles.Forsyth extra := array[2] of byte; 46*37da2899SCharles.Forsyth e := 0; 47*37da2899SCharles.Forsyth for(i := 0; i < len d; i += Keyring->SHA1dlen){ 48*37da2899SCharles.Forsyth extra[0] = byte e; 49*37da2899SCharles.Forsyth extra[1] = byte (e>>8); 50*37da2899SCharles.Forsyth e++; 51*37da2899SCharles.Forsyth state = keyring->sha1(extra, len extra, digest, state); 52*37da2899SCharles.Forsyth if(i + Keyring->SHA1dlen > len d) 53*37da2899SCharles.Forsyth digest = digest[0:len d - i]; 54*37da2899SCharles.Forsyth d[i:] = digest; 55*37da2899SCharles.Forsyth } 56*37da2899SCharles.Forsyth return ref Bits(d, 0); 57*37da2899SCharles.Forsyth} 58*37da2899SCharles.Forsyth 59*37da2899SCharles.Forsyth# XXX could be more efficient. 60*37da2899SCharles.ForsythBits.get(bits: self ref Bits, n: int): int 61*37da2899SCharles.Forsyth{ 62*37da2899SCharles.Forsyth d := bits.d; 63*37da2899SCharles.Forsyth v := 0; 64*37da2899SCharles.Forsyth nb := bits.n; 65*37da2899SCharles.Forsyth for(i := 0; i < n; i++){ 66*37da2899SCharles.Forsyth j := nb + i; 67*37da2899SCharles.Forsyth if(int d[j >> 3] & (1 << (j & 7))) 68*37da2899SCharles.Forsyth v |= (1 << i); 69*37da2899SCharles.Forsyth } 70*37da2899SCharles.Forsyth bits.n += n; 71*37da2899SCharles.Forsyth return v; 72*37da2899SCharles.Forsyth} 73