1 /* 2 * Bloom filter tracking which scores are present in our arenas 3 * and (more importantly) which are not. 4 */ 5 6 #include "stdinc.h" 7 #include "dat.h" 8 #include "fns.h" 9 10 int ignorebloom; 11 12 int 13 bloominit(Bloom *b, vlong vsize, u8int *data) 14 { 15 ulong size; 16 17 size = vsize; 18 if(size != vsize){ /* truncation */ 19 werrstr("bloom data too big"); 20 return -1; 21 } 22 23 b->size = size; 24 b->nhash = 32; /* will be fixed by caller on initialization */ 25 if(data != nil) 26 if(unpackbloomhead(b, data) < 0) 27 return -1; 28 29 b->bitmask = (b->size<<3) - 1; 30 b->data = data; 31 return 0; 32 } 33 34 void 35 wbbloomhead(Bloom *b) 36 { 37 packbloomhead(b, b->data); 38 } 39 40 Bloom* 41 readbloom(Part *p) 42 { 43 uchar buf[512]; 44 Bloom *b; 45 46 b = vtmallocz(sizeof *b); 47 if(readpart(p, 0, buf, sizeof buf) < 0) 48 return nil; 49 /* 50 * pass buf as b->data so that bloominit 51 * can parse header. won't be used for 52 * accessing bits (cleared below). 53 */ 54 if(bloominit(b, 0, buf) < 0){ 55 vtfree(b); 56 freepart(p); 57 return nil; 58 }else{ 59 /* 60 * default block size is system page size. 61 * the bloom filter is usually very big. 62 * bump the block size up to speed i/o. 63 */ 64 if(p->blocksize < (1<<20)){ 65 p->blocksize = 1<<20; 66 if(p->blocksize > p->size) 67 p->blocksize = p->size; 68 } 69 } 70 b->part = p; 71 b->data = nil; 72 return b; 73 } 74 75 int 76 resetbloom(Bloom *b) 77 { 78 uchar *data; 79 80 data = vtmallocz(b->size); 81 b->data = data; 82 if(b->size == MaxBloomSize) /* 2^32 overflows ulong */ 83 addstat(StatBloomBits, b->size*8-1); 84 else 85 addstat(StatBloomBits, b->size*8); 86 return 0; 87 } 88 89 int 90 loadbloom(Bloom *b) 91 { 92 int i, n; 93 uint ones; 94 uchar *data; 95 u32int *a; 96 97 data = vtmallocz(b->size); 98 if(readpart(b->part, 0, data, b->size) < 0){ 99 vtfree(b); 100 vtfree(data); 101 return -1; 102 } 103 b->data = data; 104 105 a = (u32int*)b->data; 106 n = b->size/4; 107 ones = 0; 108 for(i=0; i<n; i++) 109 ones += countbits(a[i]); 110 addstat(StatBloomOnes, ones); 111 112 if(b->size == MaxBloomSize) /* 2^32 overflows ulong */ 113 addstat(StatBloomBits, b->size*8-1); 114 else 115 addstat(StatBloomBits, b->size*8); 116 117 return 0; 118 } 119 120 int 121 writebloom(Bloom *b) 122 { 123 wbbloomhead(b); 124 if(writepart(b->part, 0, b->data, b->size) < 0) 125 return -1; 126 if(flushpart(b->part) < 0) 127 return -1; 128 return 0; 129 } 130 131 /* 132 * Derive two random 32-bit quantities a, b from the score 133 * and then use a+b*i as a sequence of bloom filter indices. 134 * Michael Mitzenmacher has a recent (2005) paper saying this is okay. 135 * We reserve the bottom bytes (BloomHeadSize*8 bits) for the header. 136 */ 137 static void 138 gethashes(u8int *score, ulong *h) 139 { 140 int i; 141 u32int a, b; 142 143 a = 0; 144 b = 0; 145 for(i=4; i+8<=VtScoreSize; i+=8){ 146 a ^= *(u32int*)(score+i); 147 b ^= *(u32int*)(score+i+4); 148 } 149 if(i+4 <= VtScoreSize) /* 20 is not 4-aligned */ 150 a ^= *(u32int*)(score+i); 151 for(i=0; i<BloomMaxHash; i++, a+=b) 152 h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a; 153 } 154 155 static void 156 _markbloomfilter(Bloom *b, u8int *score) 157 { 158 int i, nnew; 159 ulong h[BloomMaxHash]; 160 u32int x, *y, z, *tab; 161 162 trace("markbloomfilter", "markbloomfilter %V", score); 163 gethashes(score, h); 164 nnew = 0; 165 tab = (u32int*)b->data; 166 for(i=0; i<b->nhash; i++){ 167 x = h[i]; 168 y = &tab[(x&b->bitmask)>>5]; 169 z = 1<<(x&31); 170 if(!(*y&z)){ 171 nnew++; 172 *y |= z; 173 } 174 } 175 if(nnew) 176 addstat(StatBloomOnes, nnew); 177 178 trace("markbloomfilter", "markbloomfilter exit"); 179 } 180 181 static int 182 _inbloomfilter(Bloom *b, u8int *score) 183 { 184 int i; 185 ulong h[BloomMaxHash], x; 186 u32int *tab; 187 188 gethashes(score, h); 189 tab = (u32int*)b->data; 190 for(i=0; i<b->nhash; i++){ 191 x = h[i]; 192 if(!(tab[(x&b->bitmask)>>5] & (1<<(x&31)))) 193 return 0; 194 } 195 return 1; 196 } 197 198 int 199 inbloomfilter(Bloom *b, u8int *score) 200 { 201 int r; 202 uint ms; 203 204 if(b == nil || b->data == nil) 205 return 1; 206 207 if(ignorebloom) 208 return 1; 209 210 ms = msec(); 211 rlock(&b->lk); 212 r = _inbloomfilter(b, score); 213 runlock(&b->lk); 214 ms = ms - msec(); 215 addstat2(StatBloomLookup, 1, StatBloomLookupTime, ms); 216 if(r) 217 addstat(StatBloomMiss, 1); 218 else 219 addstat(StatBloomHit, 1); 220 return r; 221 } 222 223 void 224 markbloomfilter(Bloom *b, u8int *score) 225 { 226 if(b == nil || b->data == nil) 227 return; 228 229 rlock(&b->lk); 230 qlock(&b->mod); 231 _markbloomfilter(b, score); 232 qunlock(&b->mod); 233 runlock(&b->lk); 234 } 235 236 static void 237 bloomwriteproc(void *v) 238 { 239 int ret; 240 Bloom *b; 241 242 threadsetname("bloomwriteproc"); 243 b = v; 244 for(;;){ 245 recv(b->writechan, 0); 246 if((ret=writebloom(b)) < 0) 247 fprint(2, "oops! writing bloom: %r\n"); 248 else 249 ret = 0; 250 sendul(b->writedonechan, ret); 251 } 252 } 253 254 void 255 startbloomproc(Bloom *b) 256 { 257 b->writechan = chancreate(sizeof(void*), 0); 258 b->writedonechan = chancreate(sizeof(void*), 0); 259 vtproc(bloomwriteproc, b); 260 } 261