xref: /netbsd-src/external/bsd/openldap/dist/servers/slapd/zn_malloc.c (revision 274254cdae52594c1aa480a736aef78313d15c9c)
1 /* zn_malloc.c - zone-based malloc routines */
2 /* $OpenLDAP: pkg/ldap/servers/slapd/zn_malloc.c,v 1.11.2.3 2008/02/11 23:26:45 kurt Exp $*/
3 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
4  *
5  * Copyright 2003-2008 The OpenLDAP Foundation.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted only as authorized by the OpenLDAP
10  * Public License.
11  *
12  * A copy of this license is available in the file LICENSE in the
13  * top-level directory of the distribution or, alternatively, at
14  * <http://www.OpenLDAP.org/license.html>.
15  */
16 /* Portions Copyright 2004 IBM Corporation
17  * All rights reserved.
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted only as authorized by the OpenLDAP
20  * Public License.
21  */
22 /* ACKNOWLEDGEMENTS
23  * This work originally developed by Jong-Hyuk Choi for inclusion in
24  * OpenLDAP Software.
25  */
26 
27 #include "portable.h"
28 
29 #include <stdio.h>
30 #include <ac/string.h>
31 #include <sys/types.h>
32 #include <fcntl.h>
33 
34 #include "slap.h"
35 
36 #ifdef SLAP_ZONE_ALLOC
37 
38 #include <sys/mman.h>
39 
40 static int slap_zone_cmp(const void *v1, const void *v2);
41 void * slap_replenish_zopool(void *ctx);
42 
43 static void
44 slap_zo_release(void *data)
45 {
46 	struct zone_object *zo = (struct zone_object *)data;
47 	ch_free( zo );
48 }
49 
50 void
51 slap_zn_mem_destroy(
52 	void *ctx
53 )
54 {
55 	struct zone_heap *zh = ctx;
56 	int pad = 2*sizeof(int)-1, pad_shift;
57 	int order_start = -1, i, j;
58 	struct zone_object *zo;
59 
60 	pad_shift = pad - 1;
61 	do {
62 		order_start++;
63 	} while (pad_shift >>= 1);
64 
65 	ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
66 	for (i = 0; i < zh->zh_zoneorder - order_start + 1; i++) {
67 		zo = LDAP_LIST_FIRST(&zh->zh_free[i]);
68 		while (zo) {
69 			struct zone_object *zo_tmp = zo;
70 			zo = LDAP_LIST_NEXT(zo, zo_link);
71 			LDAP_LIST_REMOVE(zo_tmp, zo_link);
72 			LDAP_LIST_INSERT_HEAD(&zh->zh_zopool, zo_tmp, zo_link);
73 		}
74 	}
75 	ch_free(zh->zh_free);
76 
77 	for (i = 0; i < zh->zh_numzones; i++) {
78 		for (j = 0; j < zh->zh_zoneorder - order_start + 1; j++) {
79 			ch_free(zh->zh_maps[i][j]);
80 		}
81 		ch_free(zh->zh_maps[i]);
82 		munmap(zh->zh_zones[i], zh->zh_zonesize);
83 		ldap_pvt_thread_rdwr_destroy(&zh->zh_znlock[i]);
84 	}
85 	ch_free(zh->zh_maps);
86 	ch_free(zh->zh_zones);
87 	ch_free(zh->zh_seqno);
88 	ch_free(zh->zh_znlock);
89 
90 	avl_free(zh->zh_zonetree, slap_zo_release);
91 
92 	zo = LDAP_LIST_FIRST(&zh->zh_zopool);
93 	while (zo) {
94 		struct zone_object *zo_tmp = zo;
95 		zo = LDAP_LIST_NEXT(zo, zo_link);
96 		if (!zo_tmp->zo_blockhead) {
97 			LDAP_LIST_REMOVE(zo_tmp, zo_link);
98 		}
99 	}
100 	zo = LDAP_LIST_FIRST(&zh->zh_zopool);
101 	while (zo) {
102 		struct zone_object *zo_tmp = zo;
103 		zo = LDAP_LIST_NEXT(zo, zo_link);
104 		ch_free(zo_tmp);
105 	}
106 	ldap_pvt_thread_mutex_unlock(&zh->zh_mutex);
107 	ldap_pvt_thread_rdwr_destroy(&zh->zh_lock);
108 	ldap_pvt_thread_mutex_destroy(&zh->zh_mutex);
109 	ch_free(zh);
110 }
111 
112 void *
113 slap_zn_mem_create(
114 	ber_len_t initsize,
115 	ber_len_t maxsize,
116 	ber_len_t deltasize,
117 	ber_len_t zonesize
118 )
119 {
120 	struct zone_heap *zh = NULL;
121 	ber_len_t zpad;
122 	int pad = 2*sizeof(int)-1, pad_shift;
123 	int size_shift;
124 	int order = -1, order_start = -1, order_end = -1;
125 	int i, j;
126 	struct zone_object *zo;
127 
128 	Debug(LDAP_DEBUG_NONE,
129 		"--> slap_zn_mem_create: initsize=%d, maxsize=%d\n",
130 		initsize, maxsize, 0);
131 	Debug(LDAP_DEBUG_NONE,
132 		"++> slap_zn_mem_create: deltasize=%d, zonesize=%d\n",
133 		deltasize, zonesize, 0);
134 
135 	zh = (struct zone_heap *)ch_calloc(1, sizeof(struct zone_heap));
136 
137 	zh->zh_fd = open("/dev/zero", O_RDWR);
138 
139 	if ( zonesize ) {
140 		zh->zh_zonesize = zonesize;
141 	} else {
142 		zh->zh_zonesize = SLAP_ZONE_SIZE;
143 	}
144 
145 	zpad = zh->zh_zonesize - 1;
146 	zh->zh_numzones = ((initsize + zpad) & ~zpad) / zh->zh_zonesize;
147 
148 	if ( maxsize && maxsize >= initsize ) {
149 		zh->zh_maxzones = ((maxsize + zpad) & ~zpad) / zh->zh_zonesize;
150 	} else {
151 		zh->zh_maxzones = ((initsize + zpad) & ~zpad) / zh->zh_zonesize;
152 	}
153 
154 	if ( deltasize ) {
155 		zh->zh_deltazones = ((deltasize + zpad) & ~zpad) / zh->zh_zonesize;
156 	} else {
157 		zh->zh_deltazones = ((SLAP_ZONE_DELTA+zpad) & ~zpad) / zh->zh_zonesize;
158 	}
159 
160 	size_shift = zh->zh_zonesize - 1;
161 	do {
162 		order_end++;
163 	} while (size_shift >>= 1);
164 
165 	pad_shift = pad - 1;
166 	do {
167 		order_start++;
168 	} while (pad_shift >>= 1);
169 
170 	order = order_end - order_start + 1;
171 
172 	zh->zh_zones = (void **)ch_malloc(zh->zh_maxzones * sizeof(void*));
173 	zh->zh_znlock = (ldap_pvt_thread_rdwr_t *)ch_malloc(
174 						zh->zh_maxzones * sizeof(ldap_pvt_thread_rdwr_t *));
175 	zh->zh_maps = (unsigned char ***)ch_malloc(
176 					zh->zh_maxzones * sizeof(unsigned char**));
177 
178 	zh->zh_zoneorder = order_end;
179 	zh->zh_free = (struct zh_freelist *)
180 					ch_malloc(order * sizeof(struct zh_freelist));
181 	zh->zh_seqno = (unsigned long *)ch_calloc(zh->zh_maxzones,
182 											sizeof(unsigned long));
183 	for (i = 0; i < order; i++) {
184 		LDAP_LIST_INIT(&zh->zh_free[i]);
185 	}
186 	LDAP_LIST_INIT(&zh->zh_zopool);
187 
188 	for (i = 0; i < zh->zh_numzones; i++) {
189 		zh->zh_zones[i] = mmap(0, zh->zh_zonesize, PROT_READ | PROT_WRITE,
190 							MAP_PRIVATE, zh->zh_fd, 0);
191 		zh->zh_maps[i] = (unsigned char **)
192 					ch_malloc(order * sizeof(unsigned char *));
193 		for (j = 0; j < order; j++) {
194 			int shiftamt = order_start + 1 + j;
195 			int nummaps = zh->zh_zonesize >> shiftamt;
196 			assert(nummaps);
197 			nummaps >>= 3;
198 			if (!nummaps) nummaps = 1;
199 			zh->zh_maps[i][j] = (unsigned char *)ch_malloc(nummaps);
200 			memset(zh->zh_maps[i][j], 0, nummaps);
201 		}
202 
203 		if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
204 			slap_replenish_zopool(zh);
205 		}
206 		zo = LDAP_LIST_FIRST(&zh->zh_zopool);
207 		LDAP_LIST_REMOVE(zo, zo_link);
208 		zo->zo_ptr = zh->zh_zones[i];
209 		zo->zo_idx = i;
210 		LDAP_LIST_INSERT_HEAD(&zh->zh_free[order-1], zo, zo_link);
211 
212 		if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
213 			slap_replenish_zopool(zh);
214 		}
215 		zo = LDAP_LIST_FIRST(&zh->zh_zopool);
216 		LDAP_LIST_REMOVE(zo, zo_link);
217 		zo->zo_ptr = zh->zh_zones[i];
218 		zo->zo_siz = zh->zh_zonesize;
219 		zo->zo_idx = i;
220 		avl_insert(&zh->zh_zonetree, zo, slap_zone_cmp, avl_dup_error);
221 		ldap_pvt_thread_rdwr_init(&zh->zh_znlock[i]);
222 	}
223 
224 	LDAP_STAILQ_INIT(&zh->zh_latency_history_queue);
225 	ldap_pvt_thread_mutex_init(&zh->zh_mutex);
226 	ldap_pvt_thread_rdwr_init(&zh->zh_lock);
227 
228 	return zh;
229 }
230 
231 void *
232 slap_zn_malloc(
233     ber_len_t	size,
234 	void *ctx
235 )
236 {
237 	struct zone_heap *zh = ctx;
238 	ber_len_t size_shift;
239 	int pad = 2*sizeof(int)-1, pad_shift;
240 	int order = -1, order_start = -1;
241 	struct zone_object *zo, *zo_new, *zo_left, *zo_right;
242 	ber_len_t *ptr, *new;
243 	int idx;
244 	unsigned long diff;
245 	int i, j, k;
246 
247 	Debug(LDAP_DEBUG_NONE,
248 		"--> slap_zn_malloc: size=%d\n", size, 0, 0);
249 
250 	if (!zh) return ber_memalloc_x(size, NULL);
251 
252 	/* round up to doubleword boundary */
253 	size += 2*sizeof(ber_len_t) + pad;
254 	size &= ~pad;
255 
256 	size_shift = size - 1;
257 	do {
258 		order++;
259 	} while (size_shift >>= 1);
260 
261 	pad_shift = pad - 1;
262 	do {
263 		order_start++;
264 	} while (pad_shift >>= 1);
265 
266 retry:
267 
268 	ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
269 	for (i = order; i <= zh->zh_zoneorder &&
270 			LDAP_LIST_EMPTY(&zh->zh_free[i-order_start]); i++);
271 
272 	if (i == order) {
273 		zo_new = LDAP_LIST_FIRST(&zh->zh_free[i-order_start]);
274 		LDAP_LIST_REMOVE(zo_new, zo_link);
275 		ptr = zo_new->zo_ptr;
276 		idx = zo_new->zo_idx;
277 		diff = (unsigned long)((char*)ptr -
278 				(char*)zh->zh_zones[idx]) >> (order + 1);
279 		zh->zh_maps[idx][order-order_start][diff>>3] |= (1 << (diff & 0x7));
280 		*ptr++ = zh->zh_seqno[idx];
281 		*ptr++ = size - 2*sizeof(ber_len_t);
282 		zo_new->zo_ptr = NULL;
283 		zo_new->zo_idx = -1;
284 		LDAP_LIST_INSERT_HEAD(&zh->zh_zopool, zo_new, zo_link);
285 		ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
286 		Debug(LDAP_DEBUG_NONE, "slap_zn_malloc: returning 0x%x, 0x%x\n",
287 				ptr, (int)ptr>>(zh->zh_zoneorder+1), 0);
288 		return((void*)ptr);
289 	} else if (i <= zh->zh_zoneorder) {
290 		for (j = i; j > order; j--) {
291 			zo_left = LDAP_LIST_FIRST(&zh->zh_free[j-order_start]);
292 			LDAP_LIST_REMOVE(zo_left, zo_link);
293 			if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
294 				slap_replenish_zopool(zh);
295 			}
296 			zo_right = LDAP_LIST_FIRST(&zh->zh_zopool);
297 			LDAP_LIST_REMOVE(zo_right, zo_link);
298 			zo_right->zo_ptr = zo_left->zo_ptr + (1 << j);
299 			zo_right->zo_idx = zo_left->zo_idx;
300 			Debug(LDAP_DEBUG_NONE,
301 				"slap_zn_malloc: split (left=0x%x, right=0x%x)\n",
302 				zo_left->zo_ptr, zo_right->zo_ptr, 0);
303 			if (j == order + 1) {
304 				ptr = zo_left->zo_ptr;
305 				diff = (unsigned long)((char*)ptr -
306 						(char*)zh->zh_zones[zo_left->zo_idx]) >> (order+1);
307 				zh->zh_maps[zo_left->zo_idx][order-order_start][diff>>3] |=
308 						(1 << (diff & 0x7));
309 				*ptr++ = zh->zh_seqno[zo_left->zo_idx];
310 				*ptr++ = size - 2*sizeof(ber_len_t);
311 				LDAP_LIST_INSERT_HEAD(
312 						&zh->zh_free[j-1-order_start], zo_right, zo_link);
313 				LDAP_LIST_INSERT_HEAD(&zh->zh_zopool, zo_left, zo_link);
314 				ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
315 				Debug(LDAP_DEBUG_NONE,
316 					"slap_zn_malloc: returning 0x%x, 0x%x\n",
317 					ptr, (int)ptr>>(zh->zh_zoneorder+1), 0);
318 				return((void*)ptr);
319 			} else {
320 				LDAP_LIST_INSERT_HEAD(
321 						&zh->zh_free[j-1-order_start], zo_right, zo_link);
322 				LDAP_LIST_INSERT_HEAD(
323 						&zh->zh_free[j-1-order_start], zo_left, zo_link);
324 			}
325 		}
326 		assert(0);
327 	} else {
328 
329 		if ( zh->zh_maxzones < zh->zh_numzones + zh->zh_deltazones ) {
330 			ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
331 			Debug( LDAP_DEBUG_TRACE,
332 				"slap_zn_malloc of %lu bytes failed, using ch_malloc\n",
333 				(long)size, 0, 0);
334 			Debug(LDAP_DEBUG_NONE,
335 				"slap_zn_malloc: returning 0x%x, 0x%x\n",
336 				ptr, (int)ptr>>(zh->zh_zoneorder+1), 0);
337 			return (void*)ch_malloc(size);
338 		}
339 
340 		for (i = zh->zh_numzones; i < zh->zh_numzones+zh->zh_deltazones; i++) {
341 			zh->zh_zones[i] = mmap(0, zh->zh_zonesize, PROT_READ | PROT_WRITE,
342 								MAP_PRIVATE, zh->zh_fd, 0);
343 			zh->zh_maps[i] = (unsigned char **)
344 						ch_malloc((zh->zh_zoneorder - order_start + 1) *
345 						sizeof(unsigned char *));
346 			for (j = 0; j < zh->zh_zoneorder-order_start+1; j++) {
347 				int shiftamt = order_start + 1 + j;
348 				int nummaps = zh->zh_zonesize >> shiftamt;
349 				assert(nummaps);
350 				nummaps >>= 3;
351 				if (!nummaps) nummaps = 1;
352 				zh->zh_maps[i][j] = (unsigned char *)ch_malloc(nummaps);
353 				memset(zh->zh_maps[i][j], 0, nummaps);
354 			}
355 
356 			if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
357 				slap_replenish_zopool(zh);
358 			}
359 			zo = LDAP_LIST_FIRST(&zh->zh_zopool);
360 			LDAP_LIST_REMOVE(zo, zo_link);
361 			zo->zo_ptr = zh->zh_zones[i];
362 			zo->zo_idx = i;
363 			LDAP_LIST_INSERT_HEAD(&zh->
364 						zh_free[zh->zh_zoneorder-order_start],zo,zo_link);
365 
366 			if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
367 				slap_replenish_zopool(zh);
368 			}
369 			zo = LDAP_LIST_FIRST(&zh->zh_zopool);
370 			LDAP_LIST_REMOVE(zo, zo_link);
371 			zo->zo_ptr = zh->zh_zones[i];
372 			zo->zo_siz = zh->zh_zonesize;
373 			zo->zo_idx = i;
374 			avl_insert(&zh->zh_zonetree, zo, slap_zone_cmp, avl_dup_error);
375 			ldap_pvt_thread_rdwr_init(&zh->zh_znlock[i]);
376 		}
377 		zh->zh_numzones += zh->zh_deltazones;
378 		ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
379 		goto retry;
380 	}
381 }
382 
383 void *
384 slap_zn_calloc( ber_len_t n, ber_len_t size, void *ctx )
385 {
386 	void *new;
387 
388 	new = slap_zn_malloc( n*size, ctx );
389 	if ( new ) {
390 		memset( new, 0, n*size );
391 	}
392 	return new;
393 }
394 
395 void *
396 slap_zn_realloc(void *ptr, ber_len_t size, void *ctx)
397 {
398 	struct zone_heap *zh = ctx;
399 	int pad = 2*sizeof(int)-1, pad_shift;
400 	int order_start = -1, order = -1;
401 	struct zone_object zoi, *zoo;
402 	ber_len_t *p = (ber_len_t *)ptr, *new;
403 	unsigned long diff;
404 	int i;
405 	void *newptr = NULL;
406 	struct zone_heap *zone = NULL;
407 
408 	Debug(LDAP_DEBUG_NONE,
409 		"--> slap_zn_realloc: ptr=0x%x, size=%d\n", ptr, size, 0);
410 
411 	if (ptr == NULL)
412 		return slap_zn_malloc(size, zh);
413 
414 	zoi.zo_ptr = p;
415 	zoi.zo_idx = -1;
416 
417 	if (zh) {
418 		ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
419 		zoo = avl_find(zh->zh_zonetree, &zoi, slap_zone_cmp);
420 		ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
421 	}
422 
423 	/* Not our memory? */
424 	if (!zoo) {
425 		/* duplicate of realloc behavior, oh well */
426 		new = ber_memrealloc_x(ptr, size, NULL);
427 		if (new) {
428 			return new;
429 		}
430 		Debug(LDAP_DEBUG_ANY, "ch_realloc of %lu bytes failed\n",
431 				(long) size, 0, 0);
432 		assert(0);
433 		exit( EXIT_FAILURE );
434 	}
435 
436 	assert(zoo->zo_idx != -1);
437 
438 	zone = zh->zh_zones[zoo->zo_idx];
439 
440 	if (size == 0) {
441 		slap_zn_free(ptr, zh);
442 		return NULL;
443 	}
444 
445 	newptr = slap_zn_malloc(size, zh);
446 	if (size < p[-1]) {
447 		AC_MEMCPY(newptr, ptr, size);
448 	} else {
449 		AC_MEMCPY(newptr, ptr, p[-1]);
450 	}
451 	slap_zn_free(ptr, zh);
452 	return newptr;
453 }
454 
455 void
456 slap_zn_free(void *ptr, void *ctx)
457 {
458 	struct zone_heap *zh = ctx;
459 	int size, size_shift, order_size;
460 	int pad = 2*sizeof(int)-1, pad_shift;
461 	ber_len_t *p = (ber_len_t *)ptr, *tmpp;
462 	int order_start = -1, order = -1;
463 	struct zone_object zoi, *zoo, *zo;
464 	unsigned long diff;
465 	int i, k, inserted = 0, idx;
466 	struct zone_heap *zone = NULL;
467 
468 	zoi.zo_ptr = p;
469 	zoi.zo_idx = -1;
470 
471 	Debug(LDAP_DEBUG_NONE, "--> slap_zn_free: ptr=0x%x\n", ptr, 0, 0);
472 
473 	if (zh) {
474 		ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
475 		zoo = avl_find(zh->zh_zonetree, &zoi, slap_zone_cmp);
476 		ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
477 	}
478 
479 	if (!zoo) {
480 		ber_memfree_x(ptr, NULL);
481 	} else {
482 		idx = zoo->zo_idx;
483 		assert(idx != -1);
484 		zone = zh->zh_zones[idx];
485 
486 		size = *(--p);
487 		size_shift = size + 2*sizeof(ber_len_t) - 1;
488 		do {
489 			order++;
490 		} while (size_shift >>= 1);
491 
492 		pad_shift = pad - 1;
493 		do {
494 			order_start++;
495 		} while (pad_shift >>= 1);
496 
497 		ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
498 		for (i = order, tmpp = p; i <= zh->zh_zoneorder; i++) {
499 			order_size = 1 << (i+1);
500 			diff = (unsigned long)((char*)tmpp - (char*)zone) >> (i+1);
501 			zh->zh_maps[idx][i-order_start][diff>>3] &= (~(1 << (diff & 0x7)));
502 			if (diff == ((diff>>1)<<1)) {
503 				if (!(zh->zh_maps[idx][i-order_start][(diff+1)>>3] &
504 						(1<<((diff+1)&0x7)))) {
505 					zo = LDAP_LIST_FIRST(&zh->zh_free[i-order_start]);
506 					while (zo) {
507 						if ((char*)zo->zo_ptr == (char*)tmpp) {
508 							LDAP_LIST_REMOVE( zo, zo_link );
509 						} else if ((char*)zo->zo_ptr ==
510 								(char*)tmpp + order_size) {
511 							LDAP_LIST_REMOVE(zo, zo_link);
512 							break;
513 						}
514 						zo = LDAP_LIST_NEXT(zo, zo_link);
515 					}
516 					if (zo) {
517 						if (i < zh->zh_zoneorder) {
518 							inserted = 1;
519 							zo->zo_ptr = tmpp;
520 							Debug(LDAP_DEBUG_NONE,
521 								"slap_zn_free: merging 0x%x\n",
522 								zo->zo_ptr, 0, 0);
523 							LDAP_LIST_INSERT_HEAD(&zh->zh_free[i-order_start+1],
524 									zo, zo_link);
525 						}
526 						continue;
527 					} else {
528 						if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
529 							slap_replenish_zopool(zh);
530 						}
531 						zo = LDAP_LIST_FIRST(&zh->zh_zopool);
532 						LDAP_LIST_REMOVE(zo, zo_link);
533 						zo->zo_ptr = tmpp;
534 						zo->zo_idx = idx;
535 						Debug(LDAP_DEBUG_NONE,
536 							"slap_zn_free: merging 0x%x\n",
537 							zo->zo_ptr, 0, 0);
538 						LDAP_LIST_INSERT_HEAD(&zh->zh_free[i-order_start],
539 								zo, zo_link);
540 						break;
541 
542 						Debug(LDAP_DEBUG_ANY, "slap_zn_free: "
543 							"free object not found while bit is clear.\n",
544 							0, 0, 0);
545 						assert(zo != NULL);
546 
547 					}
548 				} else {
549 					if (!inserted) {
550 						if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
551 							slap_replenish_zopool(zh);
552 						}
553 						zo = LDAP_LIST_FIRST(&zh->zh_zopool);
554 						LDAP_LIST_REMOVE(zo, zo_link);
555 						zo->zo_ptr = tmpp;
556 						zo->zo_idx = idx;
557 						Debug(LDAP_DEBUG_NONE,
558 							"slap_zn_free: merging 0x%x\n",
559 							zo->zo_ptr, 0, 0);
560 						LDAP_LIST_INSERT_HEAD(&zh->zh_free[i-order_start],
561 								zo, zo_link);
562 					}
563 					break;
564 				}
565 			} else {
566 				if (!(zh->zh_maps[idx][i-order_start][(diff-1)>>3] &
567 						(1<<((diff-1)&0x7)))) {
568 					zo = LDAP_LIST_FIRST(&zh->zh_free[i-order_start]);
569 					while (zo) {
570 						if ((char*)zo->zo_ptr == (char*)tmpp) {
571 							LDAP_LIST_REMOVE(zo, zo_link);
572 						} else if ((char*)tmpp == zo->zo_ptr + order_size) {
573 							LDAP_LIST_REMOVE(zo, zo_link);
574 							tmpp = zo->zo_ptr;
575 							break;
576 						}
577 						zo = LDAP_LIST_NEXT(zo, zo_link);
578 					}
579 					if (zo) {
580 						if (i < zh->zh_zoneorder) {
581 							inserted = 1;
582 							Debug(LDAP_DEBUG_NONE,
583 								"slap_zn_free: merging 0x%x\n",
584 								zo->zo_ptr, 0, 0);
585 							LDAP_LIST_INSERT_HEAD(&zh->zh_free[i-order_start+1],
586 									zo, zo_link);
587 							continue;
588 						}
589 					} else {
590 						if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
591 							slap_replenish_zopool(zh);
592 						}
593 						zo = LDAP_LIST_FIRST(&zh->zh_zopool);
594 						LDAP_LIST_REMOVE(zo, zo_link);
595 						zo->zo_ptr = tmpp;
596 						zo->zo_idx = idx;
597 						Debug(LDAP_DEBUG_NONE,
598 							"slap_zn_free: merging 0x%x\n",
599 							zo->zo_ptr, 0, 0);
600 						LDAP_LIST_INSERT_HEAD(&zh->zh_free[i-order_start],
601 								zo, zo_link);
602 						break;
603 
604 						Debug(LDAP_DEBUG_ANY, "slap_zn_free: "
605 							"free object not found while bit is clear.\n",
606 							0, 0, 0 );
607 						assert(zo != NULL);
608 
609 					}
610 				} else {
611 					if ( !inserted ) {
612 						if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
613 							slap_replenish_zopool(zh);
614 						}
615 						zo = LDAP_LIST_FIRST(&zh->zh_zopool);
616 						LDAP_LIST_REMOVE(zo, zo_link);
617 						zo->zo_ptr = tmpp;
618 						zo->zo_idx = idx;
619 						Debug(LDAP_DEBUG_NONE,
620 							"slap_zn_free: merging 0x%x\n",
621 							zo->zo_ptr, 0, 0);
622 						LDAP_LIST_INSERT_HEAD(&zh->zh_free[i-order_start],
623 								zo, zo_link);
624 					}
625 					break;
626 				}
627 			}
628 		}
629 		ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
630 	}
631 }
632 
633 static int
634 slap_zone_cmp(const void *v1, const void *v2)
635 {
636 	const struct zone_object *zo1 = v1;
637 	const struct zone_object *zo2 = v2;
638 	char *ptr1;
639 	char *ptr2;
640 	ber_len_t zpad;
641 
642 	zpad = zo2->zo_siz - 1;
643 	ptr1 = (char*)(((unsigned long)zo1->zo_ptr + zpad) & ~zpad);
644 	ptr2 = (char*)zo2->zo_ptr + ((char*)ptr1 - (char*)zo1->zo_ptr);
645 	ptr2 = (char*)(((unsigned long)ptr2 + zpad) & ~zpad);
646 	return (int)((char*)ptr1 - (char*)ptr2);
647 }
648 
649 void *
650 slap_replenish_zopool(
651 	void *ctx
652 )
653 {
654 	struct zone_heap* zh = ctx;
655 	struct zone_object *zo_block;
656 	int i;
657 
658 	zo_block = (struct zone_object *)ch_malloc(
659 					SLAP_ZONE_ZOBLOCK * sizeof(struct zone_object));
660 
661 	if ( zo_block == NULL ) {
662 		return NULL;
663 	}
664 
665 	zo_block[0].zo_blockhead = 1;
666 	LDAP_LIST_INSERT_HEAD(&zh->zh_zopool, &zo_block[0], zo_link);
667 	for (i = 1; i < SLAP_ZONE_ZOBLOCK; i++) {
668 		zo_block[i].zo_blockhead = 0;
669 		LDAP_LIST_INSERT_HEAD(&zh->zh_zopool, &zo_block[i], zo_link );
670 	}
671 
672 	return zo_block;
673 }
674 
675 int
676 slap_zn_invalidate(
677 	void *ctx,
678 	void *ptr
679 )
680 {
681 	struct zone_heap* zh = ctx;
682 	struct zone_object zoi, *zoo;
683 	struct zone_heap *zone = NULL;
684 	int seqno = *((ber_len_t*)ptr - 2);
685 	int idx = -1, rc = 0;
686 	int pad = 2*sizeof(int)-1, pad_shift;
687 	int order_start = -1, i;
688 	struct zone_object *zo;
689 
690 	pad_shift = pad - 1;
691 	do {
692 		order_start++;
693 	} while (pad_shift >>= 1);
694 
695 	zoi.zo_ptr = ptr;
696 	zoi.zo_idx = -1;
697 
698 	ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
699 	zoo = avl_find(zh->zh_zonetree, &zoi, slap_zone_cmp);
700 
701 	if (zoo) {
702 		idx = zoo->zo_idx;
703 		assert(idx != -1);
704 		madvise(zh->zh_zones[idx], zh->zh_zonesize, MADV_DONTNEED);
705 		for (i = 0; i < zh->zh_zoneorder - order_start + 1; i++) {
706 			int shiftamt = order_start + 1 + i;
707 			int nummaps = zh->zh_zonesize >> shiftamt;
708 			assert(nummaps);
709 			nummaps >>= 3;
710 			if (!nummaps) nummaps = 1;
711 			memset(zh->zh_maps[idx][i], 0, nummaps);
712 			zo = LDAP_LIST_FIRST(&zh->zh_free[i]);
713 			while (zo) {
714 				struct zone_object *zo_tmp = zo;
715 				zo = LDAP_LIST_NEXT(zo, zo_link);
716 				if (zo_tmp && zo_tmp->zo_idx == idx) {
717 					LDAP_LIST_REMOVE(zo_tmp, zo_link);
718 					LDAP_LIST_INSERT_HEAD(&zh->zh_zopool, zo_tmp, zo_link);
719 				}
720 			}
721 		}
722 		if (LDAP_LIST_EMPTY(&zh->zh_zopool)) {
723 			slap_replenish_zopool(zh);
724 		}
725 		zo = LDAP_LIST_FIRST(&zh->zh_zopool);
726 		LDAP_LIST_REMOVE(zo, zo_link);
727 		zo->zo_ptr = zh->zh_zones[idx];
728 		zo->zo_idx = idx;
729 		LDAP_LIST_INSERT_HEAD(&zh->zh_free[zh->zh_zoneorder-order_start],
730 								zo, zo_link);
731 		zh->zh_seqno[idx]++;
732 	} else {
733 		Debug(LDAP_DEBUG_NONE, "zone not found for (ctx=0x%x, ptr=0x%x) !\n",
734 				ctx, ptr, 0);
735 	}
736 
737 	ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
738 	Debug(LDAP_DEBUG_NONE, "zone %d invalidate\n", idx, 0, 0);
739 	return rc;
740 }
741 
742 int
743 slap_zn_validate(
744 	void *ctx,
745 	void *ptr,
746 	int seqno
747 )
748 {
749 	struct zone_heap* zh = ctx;
750 	struct zone_object zoi, *zoo;
751 	struct zone_heap *zone = NULL;
752 	int idx, rc = 0;
753 
754 	zoi.zo_ptr = ptr;
755 	zoi.zo_idx = -1;
756 
757 	zoo = avl_find(zh->zh_zonetree, &zoi, slap_zone_cmp);
758 
759 	if (zoo) {
760 		idx = zoo->zo_idx;
761 		assert(idx != -1);
762 		assert(seqno <= zh->zh_seqno[idx]);
763 		rc = (seqno == zh->zh_seqno[idx]);
764 	}
765 
766 	return rc;
767 }
768 
769 int slap_zh_rlock(
770 	void *ctx
771 )
772 {
773 	struct zone_heap* zh = ctx;
774 	ldap_pvt_thread_rdwr_rlock(&zh->zh_lock);
775 }
776 
777 int slap_zh_runlock(
778 	void *ctx
779 )
780 {
781 	struct zone_heap* zh = ctx;
782 	ldap_pvt_thread_rdwr_runlock(&zh->zh_lock);
783 }
784 
785 int slap_zh_wlock(
786 	void *ctx
787 )
788 {
789 	struct zone_heap* zh = ctx;
790 	ldap_pvt_thread_rdwr_wlock(&zh->zh_lock);
791 }
792 
793 int slap_zh_wunlock(
794 	void *ctx
795 )
796 {
797 	struct zone_heap* zh = ctx;
798 	ldap_pvt_thread_rdwr_wunlock(&zh->zh_lock);
799 }
800 
801 int slap_zn_rlock(
802 	void *ctx,
803 	void *ptr
804 )
805 {
806 	struct zone_heap* zh = ctx;
807 	struct zone_object zoi, *zoo;
808 	struct zone_heap *zone = NULL;
809 	int idx;
810 
811 	zoi.zo_ptr = ptr;
812 	zoi.zo_idx = -1;
813 
814 	ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
815 	zoo = avl_find(zh->zh_zonetree, &zoi, slap_zone_cmp);
816 	ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
817 
818 	if (zoo) {
819 		idx = zoo->zo_idx;
820 		assert(idx != -1);
821 		ldap_pvt_thread_rdwr_rlock(&zh->zh_znlock[idx]);
822 	}
823 }
824 
825 int slap_zn_runlock(
826 	void *ctx,
827 	void *ptr
828 )
829 {
830 	struct zone_heap* zh = ctx;
831 	struct zone_object zoi, *zoo;
832 	struct zone_heap *zone = NULL;
833 	int idx;
834 
835 	zoi.zo_ptr = ptr;
836 	zoi.zo_idx = -1;
837 
838 	ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
839 	zoo = avl_find(zh->zh_zonetree, &zoi, slap_zone_cmp);
840 	ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
841 
842 	if (zoo) {
843 		idx = zoo->zo_idx;
844 		assert(idx != -1);
845 		ldap_pvt_thread_rdwr_runlock(&zh->zh_znlock[idx]);
846 	}
847 }
848 
849 int slap_zn_wlock(
850 	void *ctx,
851 	void *ptr
852 )
853 {
854 	struct zone_heap* zh = ctx;
855 	struct zone_object zoi, *zoo;
856 	struct zone_heap *zone = NULL;
857 	int idx;
858 
859 	zoi.zo_ptr = ptr;
860 	zoi.zo_idx = -1;
861 
862 	ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
863 	zoo = avl_find(zh->zh_zonetree, &zoi, slap_zone_cmp);
864 	ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
865 
866 	if (zoo) {
867 		idx = zoo->zo_idx;
868 		assert(idx != -1);
869 		ldap_pvt_thread_rdwr_wlock(&zh->zh_znlock[idx]);
870 	}
871 }
872 
873 int slap_zn_wunlock(
874 	void *ctx,
875 	void *ptr
876 )
877 {
878 	struct zone_heap* zh = ctx;
879 	struct zone_object zoi, *zoo;
880 	struct zone_heap *zone = NULL;
881 	int idx;
882 
883 	zoi.zo_ptr = ptr;
884 	zoi.zo_idx = -1;
885 
886 	ldap_pvt_thread_mutex_lock( &zh->zh_mutex );
887 	zoo = avl_find(zh->zh_zonetree, &zoi, slap_zone_cmp);
888 	ldap_pvt_thread_mutex_unlock( &zh->zh_mutex );
889 
890 	if (zoo) {
891 		idx = zoo->zo_idx;
892 		assert(idx != -1);
893 		ldap_pvt_thread_rdwr_wunlock(&zh->zh_znlock[idx]);
894 	}
895 }
896 
897 #define T_SEC_IN_USEC 1000000
898 
899 static int
900 slap_timediff(struct timeval *tv_begin, struct timeval *tv_end)
901 {
902 	uint64_t t_begin, t_end, t_diff;
903 
904 	t_begin = T_SEC_IN_USEC * tv_begin->tv_sec + tv_begin->tv_usec;
905 	t_end  = T_SEC_IN_USEC * tv_end->tv_sec  + tv_end->tv_usec;
906 	t_diff  = t_end - t_begin;
907 
908 	if ( t_diff < 0 )
909 		t_diff = 0;
910 
911 	return (int)t_diff;
912 }
913 
914 void
915 slap_set_timing(struct timeval *tv_set)
916 {
917 	gettimeofday(tv_set, (struct timezone *)NULL);
918 }
919 
920 int
921 slap_measure_timing(struct timeval *tv_set, struct timeval *tv_measure)
922 {
923 	gettimeofday(tv_measure, (struct timezone *)NULL);
924 	return(slap_timediff(tv_set, tv_measure));
925 }
926 
927 #define EMA_WEIGHT 0.999000
928 #define SLAP_ZN_LATENCY_HISTORY_QLEN 500
929 int
930 slap_zn_latency_history(void* ctx, int ea_latency)
931 {
932 /* TODO: monitor /proc/stat (swap) as well */
933 	struct zone_heap* zh = ctx;
934 	double t_diff = 0.0;
935 
936 	zh->zh_ema_latency = (double)ea_latency * (1.0 - EMA_WEIGHT)
937 					+ zh->zh_ema_latency * EMA_WEIGHT;
938 	if (!zh->zh_swapping && zh->zh_ema_samples++ % 100 == 99) {
939 		struct zone_latency_history *zlh_entry;
940 		zlh_entry = ch_calloc(1, sizeof(struct zone_latency_history));
941 		zlh_entry->zlh_latency = zh->zh_ema_latency;
942 		LDAP_STAILQ_INSERT_TAIL(
943 				&zh->zh_latency_history_queue, zlh_entry, zlh_next);
944 		zh->zh_latency_history_qlen++;
945 		while (zh->zh_latency_history_qlen > SLAP_ZN_LATENCY_HISTORY_QLEN) {
946 			struct zone_latency_history *zlh;
947 			zlh = LDAP_STAILQ_FIRST(&zh->zh_latency_history_queue);
948 			LDAP_STAILQ_REMOVE_HEAD(
949 					&zh->zh_latency_history_queue, zlh_next);
950 			zh->zh_latency_history_qlen--;
951 			ch_free(zlh);
952 		}
953 		if (zh->zh_latency_history_qlen == SLAP_ZN_LATENCY_HISTORY_QLEN) {
954 			struct zone_latency_history *zlh_first, *zlh_last;
955 			zlh_first = LDAP_STAILQ_FIRST(&zh->zh_latency_history_queue);
956 			zlh_last = LDAP_STAILQ_LAST(&zh->zh_latency_history_queue,
957 						zone_latency_history, zlh_next);
958 			t_diff = zlh_last->zlh_latency - zlh_first->zlh_latency;
959 		}
960 		if (t_diff >= 2000) {
961 			zh->zh_latency_jump++;
962 		} else {
963 			zh->zh_latency_jump = 0;
964 		}
965 		if (zh->zh_latency_jump > 3) {
966 			zh->zh_latency_jump = 0;
967 			zh->zh_swapping = 1;
968 		}
969 	}
970 	return zh->zh_swapping;
971 }
972 #endif /* SLAP_ZONE_ALLOC */
973