xref: /openbsd-src/usr.sbin/nsd/udb.c (revision 4564029f9cb3eebe0367746e84e30a977eed3985)
1d3fecca9Ssthen /* udb.c - u(micro) data base.
2d3fecca9Ssthen  * By W.C.A. Wijngaards
3d3fecca9Ssthen  * Copyright 2010, NLnet Labs.
4d3fecca9Ssthen  * BSD, see LICENSE.
5d3fecca9Ssthen  */
6d3fecca9Ssthen #include "config.h"
7d3fecca9Ssthen #include "udb.h"
8d3fecca9Ssthen #include <string.h>
9d3fecca9Ssthen #include <errno.h>
10d3fecca9Ssthen #include <stdio.h>
11d3fecca9Ssthen #include <unistd.h>
12d3fecca9Ssthen #include <assert.h>
13d3fecca9Ssthen #include "lookup3.h"
14d3fecca9Ssthen #include "util.h"
15d3fecca9Ssthen 
16d3fecca9Ssthen /* mmap and friends */
17d3fecca9Ssthen #include <sys/types.h>
18d3fecca9Ssthen #include <sys/stat.h>
19d3fecca9Ssthen #include <fcntl.h>
20d3fecca9Ssthen #include <sys/mman.h>
21d3fecca9Ssthen 
22d3fecca9Ssthen /* for systems without, portable definition, failed-1 and async is a flag */
23d3fecca9Ssthen #ifndef MAP_FAILED
24d3fecca9Ssthen #define MAP_FAILED ((void*)-1)
25d3fecca9Ssthen #endif
26d3fecca9Ssthen #ifndef MS_SYNC
27d3fecca9Ssthen #define MS_SYNC 0
28d3fecca9Ssthen #endif
29d3fecca9Ssthen 
30d3fecca9Ssthen /** move and fixup xl segment */
31d3fecca9Ssthen static void move_xl_segment(void* base, udb_base* udb, udb_void xl,
32d3fecca9Ssthen 	udb_void n, uint64_t sz, uint64_t startseg);
33d3fecca9Ssthen /** attempt to compact the data and move free space to the end */
34d3fecca9Ssthen static int udb_alloc_compact(void* base, udb_alloc* alloc);
35d3fecca9Ssthen 
36d3fecca9Ssthen /** convert pointer to the data part to a pointer to the base of the chunk */
37d3fecca9Ssthen static udb_void
chunk_from_dataptr(udb_void data)38d3fecca9Ssthen chunk_from_dataptr(udb_void data)
39d3fecca9Ssthen {
40d3fecca9Ssthen 	/* we use that sizeof(udb_chunk_d) != sizeof(udb_xl_chunk_d) and
41d3fecca9Ssthen 	 * that xl_chunk_d is aligned on x**1024 boundaries. */
42d3fecca9Ssthen 	udb_void xl = data - sizeof(udb_xl_chunk_d);
43d3fecca9Ssthen 	if( (xl & (UDB_ALLOC_CHUNK_SIZE-1)) == 0)
44d3fecca9Ssthen 		return xl;
45d3fecca9Ssthen 	return data - sizeof(udb_chunk_d);
46d3fecca9Ssthen }
47d3fecca9Ssthen 
chunk_from_dataptr_ext(udb_void data)48d3fecca9Ssthen udb_void chunk_from_dataptr_ext(udb_void data) {
49d3fecca9Ssthen 	return chunk_from_dataptr(data);
50d3fecca9Ssthen }
51d3fecca9Ssthen 
52d3fecca9Ssthen #ifndef NDEBUG
53d3fecca9Ssthen /** read last octet from a chunk */
54d3fecca9Ssthen static uint8_t
chunk_get_last(void * base,udb_void chunk,int exp)55d3fecca9Ssthen chunk_get_last(void* base, udb_void chunk, int exp)
56d3fecca9Ssthen {
57d3fecca9Ssthen 	return *((uint8_t*)UDB_REL(base, chunk+(1<<exp)-1));
58d3fecca9Ssthen }
59d3fecca9Ssthen #endif
60d3fecca9Ssthen 
61d3fecca9Ssthen /** write last octet of a chunk */
62d3fecca9Ssthen static void
chunk_set_last(void * base,udb_void chunk,int exp,uint8_t value)63d3fecca9Ssthen chunk_set_last(void* base, udb_void chunk, int exp, uint8_t value)
64d3fecca9Ssthen {
65bfd0b123Sflorian 	assert(exp >= 0 && exp <= 63);
66bfd0b123Sflorian 	*((uint8_t*)UDB_REL(base, chunk+((uint64_t)1<<exp)-1)) = value;
67d3fecca9Ssthen }
68d3fecca9Ssthen 
69d3fecca9Ssthen /** create udb_base from a file descriptor (must be at start of file) */
70d3fecca9Ssthen udb_base*
udb_base_create_fd(const char * fname,int fd,udb_walk_relptr_func walkfunc,void * arg)71d3fecca9Ssthen udb_base_create_fd(const char* fname, int fd, udb_walk_relptr_func walkfunc,
72d3fecca9Ssthen 	void* arg)
73d3fecca9Ssthen {
74ab0a6c35Ssthen 	uint64_t m, fsz;
75d3fecca9Ssthen 	udb_glob_d g;
76d3fecca9Ssthen 	ssize_t r;
77d3fecca9Ssthen 	udb_base* udb = (udb_base*)xalloc_zero(sizeof(*udb));
78d3fecca9Ssthen 	if(!udb) {
79d3fecca9Ssthen 		log_msg(LOG_ERR, "out of memory");
80d3fecca9Ssthen 		close(fd);
81d3fecca9Ssthen 		return NULL;
82d3fecca9Ssthen 	}
83d3fecca9Ssthen 	udb->fname = strdup(fname);
84d3fecca9Ssthen 	if(!udb->fname) {
85d3fecca9Ssthen 		log_msg(LOG_ERR, "out of memory");
86d3fecca9Ssthen 		free(udb);
87d3fecca9Ssthen 		close(fd);
88d3fecca9Ssthen 		return NULL;
89d3fecca9Ssthen 	}
90d3fecca9Ssthen 	udb->walkfunc = walkfunc;
91d3fecca9Ssthen 	udb->walkarg = arg;
92d3fecca9Ssthen 	udb->fd = fd;
93d3fecca9Ssthen 	udb->ram_size = 1024;
94d3fecca9Ssthen 	udb->ram_mask = (int)udb->ram_size - 1;
958d8f1862Ssthen 	udb->ram_hash = (udb_ptr**)xalloc_array_zero(sizeof(udb_ptr*),
968d8f1862Ssthen 		udb->ram_size);
97d3fecca9Ssthen 	if(!udb->ram_hash) {
98d3fecca9Ssthen 		free(udb->fname);
99d3fecca9Ssthen 		free(udb);
100d3fecca9Ssthen 		log_msg(LOG_ERR, "out of memory");
101d3fecca9Ssthen 		close(fd);
102d3fecca9Ssthen 		return NULL;
103d3fecca9Ssthen 	}
104d3fecca9Ssthen 
105d3fecca9Ssthen 	/* read magic */
106d3fecca9Ssthen 	if((r=read(fd, &m, sizeof(m))) == -1) {
107d3fecca9Ssthen 		log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
108d3fecca9Ssthen 		goto fail;
109d3fecca9Ssthen 	} else if(r != (ssize_t)sizeof(m)) {
110d3fecca9Ssthen 		log_msg(LOG_ERR, "%s: file too short", fname);
111d3fecca9Ssthen 		goto fail;
112d3fecca9Ssthen 	}
113d3fecca9Ssthen 	/* TODO : what if bigendian and littleendian file, see magic */
114d3fecca9Ssthen 	if(m != UDB_MAGIC) {
115d3fecca9Ssthen 		log_msg(LOG_ERR, "%s: wrong type of file", fname);
116d3fecca9Ssthen 		goto fail;
117d3fecca9Ssthen 	}
118d3fecca9Ssthen 	/* read header */
119d3fecca9Ssthen 	if((r=read(fd, &g, sizeof(g))) == -1) {
120d3fecca9Ssthen 		log_msg(LOG_ERR, "%s: %s\n", fname, strerror(errno));
121d3fecca9Ssthen 		goto fail;
122d3fecca9Ssthen 	} else if(r != (ssize_t)sizeof(g)) {
123d3fecca9Ssthen 		log_msg(LOG_ERR, "%s: file too short", fname);
124d3fecca9Ssthen 		goto fail;
125d3fecca9Ssthen 	}
126d3fecca9Ssthen 	if(g.version != 0) {
127d3fecca9Ssthen 		log_msg(LOG_ERR, "%s: unknown file version %d", fname,
128d3fecca9Ssthen 			(int)g.version);
129d3fecca9Ssthen 		goto fail;
130d3fecca9Ssthen 	}
131d3fecca9Ssthen 	if(g.hsize < UDB_HEADER_SIZE) {
132d3fecca9Ssthen 		log_msg(LOG_ERR, "%s: header size too small %d", fname,
133d3fecca9Ssthen 			(int)g.hsize);
134d3fecca9Ssthen 		goto fail;
135d3fecca9Ssthen 	}
136d3fecca9Ssthen 	if(g.hsize > UDB_HEADER_SIZE) {
137d3fecca9Ssthen 		log_msg(LOG_WARNING, "%s: header size too large %d", fname,
138d3fecca9Ssthen 			(int)g.hsize);
139cbbc2d6cSbrad 		goto fail;
140d3fecca9Ssthen 	}
141ab0a6c35Ssthen 	if(g.clean_close != 1) {
142d3fecca9Ssthen 		log_msg(LOG_WARNING, "%s: not cleanly closed %d", fname,
143d3fecca9Ssthen 			(int)g.clean_close);
144cbbc2d6cSbrad 		goto fail;
145cbbc2d6cSbrad 	}
146cbbc2d6cSbrad 	if(g.dirty_alloc != 0) {
147cbbc2d6cSbrad 		log_msg(LOG_WARNING, "%s: not cleanly closed (alloc:%d)", fname,
148cbbc2d6cSbrad 			(int)g.dirty_alloc);
149cbbc2d6cSbrad 		goto fail;
150d3fecca9Ssthen 	}
151ab0a6c35Ssthen 
152ab0a6c35Ssthen 	/* check file size correctly written, for 4.0.2 nsd.db failure */
153ab0a6c35Ssthen 	fsz = (uint64_t)lseek(fd, (off_t)0, SEEK_END);
154ab0a6c35Ssthen 	(void)lseek(fd, (off_t)0, SEEK_SET);
155ab0a6c35Ssthen 	if(g.fsize != fsz) {
156ab0a6c35Ssthen 		log_msg(LOG_WARNING, "%s: file size %llu but mmap header "
157ab0a6c35Ssthen 			"has size %llu", fname, (unsigned long long)fsz,
158ab0a6c35Ssthen 			(unsigned long long)g.fsize);
159ab0a6c35Ssthen 		goto fail;
160ab0a6c35Ssthen 	}
161d3fecca9Ssthen 
162d3fecca9Ssthen 	/* mmap it */
163d3fecca9Ssthen 	if(g.fsize < UDB_HEADER_SIZE || g.fsize < g.hsize) {
164d3fecca9Ssthen 		log_msg(LOG_ERR, "%s: file too short", fname);
165d3fecca9Ssthen 		goto fail;
166d3fecca9Ssthen 	}
167ab0a6c35Ssthen 	if(g.fsize > (uint64_t)400*1024*1024*1024*1024) /* 400 Tb */ {
168ab0a6c35Ssthen 		log_msg(LOG_WARNING, "%s: file size too large %llu",
169ab0a6c35Ssthen 			fname, (unsigned long long)g.fsize);
170ab0a6c35Ssthen 		goto fail;
171ab0a6c35Ssthen 	}
172d3fecca9Ssthen 	udb->base_size = (size_t)g.fsize;
17315ed76cbSbrad #ifdef HAVE_MMAP
174d3fecca9Ssthen 	/* note the size_t casts must be there for portability, on some
175d3fecca9Ssthen 	 * systems the layout of memory is otherwise broken. */
176d3fecca9Ssthen 	udb->base = mmap(NULL, (size_t)udb->base_size,
177d3fecca9Ssthen 		(int)PROT_READ|PROT_WRITE, (int)MAP_SHARED,
178d3fecca9Ssthen 		(int)udb->fd, (off_t)0);
17915ed76cbSbrad #else
18015ed76cbSbrad 	udb->base = MAP_FAILED; errno = ENOSYS;
18115ed76cbSbrad #endif
182d3fecca9Ssthen 	if(udb->base == MAP_FAILED) {
183d3fecca9Ssthen 		udb->base = NULL;
184d3fecca9Ssthen 		log_msg(LOG_ERR, "mmap(size %u) error: %s",
185d3fecca9Ssthen 			(unsigned)udb->base_size, strerror(errno));
186d3fecca9Ssthen 	fail:
187d3fecca9Ssthen 		close(fd);
188d3fecca9Ssthen 		free(udb->fname);
189d3fecca9Ssthen 		free(udb->ram_hash);
190d3fecca9Ssthen 		free(udb);
191d3fecca9Ssthen 		return NULL;
192d3fecca9Ssthen 	}
193d3fecca9Ssthen 
194d3fecca9Ssthen 	/* init completion */
195308d2509Sflorian 	udb->glob_data = (udb_glob_d*)((char*)udb->base+sizeof(uint64_t));
196d3fecca9Ssthen 	r = 0;
197cbbc2d6cSbrad 	/* cannot be dirty because that is goto fail above */
198d3fecca9Ssthen 	if(udb->glob_data->dirty_alloc != udb_dirty_clean)
199d3fecca9Ssthen 		r = 1;
200d3fecca9Ssthen 	udb->alloc = udb_alloc_create(udb, (udb_alloc_d*)(
201308d2509Sflorian 		(char*)udb->glob_data+sizeof(*udb->glob_data)));
202d3fecca9Ssthen 	if(!udb->alloc) {
203d3fecca9Ssthen 		log_msg(LOG_ERR, "out of memory");
204d3fecca9Ssthen 		udb_base_free(udb);
205d3fecca9Ssthen 		return NULL;
206d3fecca9Ssthen 	}
207d3fecca9Ssthen 	if(r) {
208d3fecca9Ssthen 		/* and compact now, or resume compacting */
209d3fecca9Ssthen 		udb_alloc_compact(udb, udb->alloc);
210d3fecca9Ssthen 		udb_base_sync(udb, 1);
211d3fecca9Ssthen 	}
212ab0a6c35Ssthen 	udb->glob_data->clean_close = 0;
213d3fecca9Ssthen 
214d3fecca9Ssthen 	return udb;
215d3fecca9Ssthen }
216d3fecca9Ssthen 
udb_base_create_read(const char * fname,udb_walk_relptr_func walkfunc,void * arg)217d3fecca9Ssthen udb_base* udb_base_create_read(const char* fname, udb_walk_relptr_func walkfunc,
218d3fecca9Ssthen 	void* arg)
219d3fecca9Ssthen {
220d3fecca9Ssthen 	int fd = open(fname, O_RDWR);
221d3fecca9Ssthen 	if(fd == -1) {
222d3fecca9Ssthen 		log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
223d3fecca9Ssthen 		return NULL;
224d3fecca9Ssthen 	}
225d3fecca9Ssthen 	return udb_base_create_fd(fname, fd, walkfunc, arg);
226d3fecca9Ssthen }
227d3fecca9Ssthen 
228d3fecca9Ssthen /** init new udb_global structure */
udb_glob_init_new(udb_glob_d * g)229d3fecca9Ssthen static void udb_glob_init_new(udb_glob_d* g)
230d3fecca9Ssthen {
231d3fecca9Ssthen 	memset(g, 0, sizeof(*g));
232d3fecca9Ssthen 	g->hsize = UDB_HEADER_SIZE;
233d3fecca9Ssthen 	g->fsize = UDB_HEADER_SIZE;
234d3fecca9Ssthen }
235d3fecca9Ssthen 
236d3fecca9Ssthen /** write data to file and check result */
237d3fecca9Ssthen static int
write_fdata(const char * fname,int fd,void * data,size_t len)238d3fecca9Ssthen write_fdata(const char* fname, int fd, void* data, size_t len)
239d3fecca9Ssthen {
240d3fecca9Ssthen 	ssize_t w;
241d3fecca9Ssthen 	if((w=write(fd, data, len)) == -1) {
242d3fecca9Ssthen 		log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
243d3fecca9Ssthen 		close(fd);
244d3fecca9Ssthen 		return 0;
245d3fecca9Ssthen 	} else if(w != (ssize_t)len) {
246d3fecca9Ssthen 		log_msg(LOG_ERR, "%s: short write (disk full?)", fname);
247d3fecca9Ssthen 		close(fd);
248d3fecca9Ssthen 		return 0;
249d3fecca9Ssthen 	}
250d3fecca9Ssthen 	return 1;
251d3fecca9Ssthen }
252d3fecca9Ssthen 
udb_base_create_new(const char * fname,udb_walk_relptr_func walkfunc,void * arg)253d3fecca9Ssthen udb_base* udb_base_create_new(const char* fname, udb_walk_relptr_func walkfunc,
254d3fecca9Ssthen 	void* arg)
255d3fecca9Ssthen {
256d3fecca9Ssthen 	uint64_t m;
257d3fecca9Ssthen 	udb_glob_d g;
258d3fecca9Ssthen 	udb_alloc_d a;
259d3fecca9Ssthen 	uint64_t endsize = UDB_HEADER_SIZE;
260d3fecca9Ssthen 	uint64_t endexp = 0;
261d3fecca9Ssthen 	int fd = open(fname, O_CREAT|O_RDWR, 0600);
262d3fecca9Ssthen 	if(fd == -1) {
263d3fecca9Ssthen 		log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
264d3fecca9Ssthen 		return NULL;
265d3fecca9Ssthen 	}
266d3fecca9Ssthen 	m = UDB_MAGIC;
267d3fecca9Ssthen 	udb_glob_init_new(&g);
268d3fecca9Ssthen 	udb_alloc_init_new(&a);
269ab0a6c35Ssthen 	g.clean_close = 1;
270d3fecca9Ssthen 
271d3fecca9Ssthen 	/* write new data to file (closes fd on error) */
272d3fecca9Ssthen 	if(!write_fdata(fname, fd, &m, sizeof(m)))
273d3fecca9Ssthen 		return NULL;
274d3fecca9Ssthen 	if(!write_fdata(fname, fd, &g, sizeof(g)))
275d3fecca9Ssthen 		return NULL;
276d3fecca9Ssthen 	if(!write_fdata(fname, fd, &a, sizeof(a)))
277d3fecca9Ssthen 		return NULL;
278d3fecca9Ssthen 	if(!write_fdata(fname, fd, &endsize, sizeof(endsize)))
279d3fecca9Ssthen 		return NULL;
280d3fecca9Ssthen 	if(!write_fdata(fname, fd, &endexp, sizeof(endexp)))
281d3fecca9Ssthen 		return NULL;
282d3fecca9Ssthen 	/* rewind to start */
283d3fecca9Ssthen 	if(lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
284d3fecca9Ssthen 		log_msg(LOG_ERR, "%s: lseek %s", fname, strerror(errno));
285d3fecca9Ssthen 		close(fd);
286d3fecca9Ssthen 		return NULL;
287d3fecca9Ssthen 	}
288ab0a6c35Ssthen 	/* truncate to the right size */
289ab0a6c35Ssthen 	if(ftruncate(fd, (off_t)g.fsize) < 0) {
290ab0a6c35Ssthen 		log_msg(LOG_ERR, "%s: ftruncate(%d): %s", fname,
291ab0a6c35Ssthen 			(int)g.fsize, strerror(errno));
292ab0a6c35Ssthen 		close(fd);
293ab0a6c35Ssthen 		return NULL;
294ab0a6c35Ssthen 	}
295d3fecca9Ssthen 	return udb_base_create_fd(fname, fd, walkfunc, arg);
296d3fecca9Ssthen }
297d3fecca9Ssthen 
298d3fecca9Ssthen /** shrink the udb base if it has unused space at the end */
299d3fecca9Ssthen static void
udb_base_shrink(udb_base * udb,uint64_t nsize)300d3fecca9Ssthen udb_base_shrink(udb_base* udb, uint64_t nsize)
301d3fecca9Ssthen {
302d3fecca9Ssthen 	udb->glob_data->dirty_alloc = udb_dirty_fsize;
303d3fecca9Ssthen 	udb->glob_data->fsize = nsize;
304d3fecca9Ssthen 	/* sync, does not *seem* to be required on Linux, but it is
305d3fecca9Ssthen 	   certainly required on OpenBSD.  Otherwise changed data is lost. */
30615ed76cbSbrad #ifdef HAVE_MMAP
307d3fecca9Ssthen 	msync(udb->base, udb->base_size, MS_ASYNC);
30815ed76cbSbrad #endif
309d3fecca9Ssthen 	if(ftruncate(udb->fd, (off_t)nsize) != 0) {
310d3fecca9Ssthen 		log_msg(LOG_ERR, "%s: ftruncate(%u) %s", udb->fname,
311d3fecca9Ssthen 			(unsigned)nsize, strerror(errno));
312d3fecca9Ssthen 	}
313d3fecca9Ssthen 	udb->glob_data->dirty_alloc = udb_dirty_clean;
314d3fecca9Ssthen }
315d3fecca9Ssthen 
udb_base_close(udb_base * udb)316d3fecca9Ssthen void udb_base_close(udb_base* udb)
317d3fecca9Ssthen {
318d3fecca9Ssthen 	if(!udb)
319d3fecca9Ssthen 		return;
320d3fecca9Ssthen 	if(udb->fd != -1 && udb->base && udb->alloc) {
321d3fecca9Ssthen 		uint64_t nsize = udb->alloc->disk->nextgrow;
322d3fecca9Ssthen 		if(nsize < udb->base_size)
323d3fecca9Ssthen 			udb_base_shrink(udb, nsize);
324d3fecca9Ssthen 	}
325d3fecca9Ssthen 	if(udb->fd != -1) {
326ab0a6c35Ssthen 		udb->glob_data->clean_close = 1;
327d3fecca9Ssthen 		close(udb->fd);
328d3fecca9Ssthen 		udb->fd = -1;
329d3fecca9Ssthen 	}
330d3fecca9Ssthen 	if(udb->base) {
33115ed76cbSbrad #ifdef HAVE_MMAP
332d3fecca9Ssthen 		if(munmap(udb->base, udb->base_size) == -1) {
333d3fecca9Ssthen 			log_msg(LOG_ERR, "munmap: %s", strerror(errno));
334d3fecca9Ssthen 		}
33515ed76cbSbrad #endif
336d3fecca9Ssthen 		udb->base = NULL;
337d3fecca9Ssthen 	}
338d3fecca9Ssthen }
339d3fecca9Ssthen 
udb_base_free(udb_base * udb)340d3fecca9Ssthen void udb_base_free(udb_base* udb)
341d3fecca9Ssthen {
342d3fecca9Ssthen 	if(!udb)
343d3fecca9Ssthen 		return;
344d3fecca9Ssthen 	udb_base_close(udb);
345d3fecca9Ssthen 	udb_alloc_delete(udb->alloc);
346d3fecca9Ssthen 	free(udb->ram_hash);
347d3fecca9Ssthen 	free(udb->fname);
348d3fecca9Ssthen 	free(udb);
349d3fecca9Ssthen }
350d3fecca9Ssthen 
udb_base_free_keep_mmap(udb_base * udb)351d3fecca9Ssthen void udb_base_free_keep_mmap(udb_base* udb)
352d3fecca9Ssthen {
353d3fecca9Ssthen 	if(!udb) return;
354d3fecca9Ssthen 	if(udb->fd != -1) {
355d3fecca9Ssthen 		close(udb->fd);
356d3fecca9Ssthen 		udb->fd = -1;
357d3fecca9Ssthen 	}
358d3fecca9Ssthen 	udb->base = NULL;
359d3fecca9Ssthen 	udb_alloc_delete(udb->alloc);
360d3fecca9Ssthen 	free(udb->ram_hash);
361d3fecca9Ssthen 	free(udb->fname);
362d3fecca9Ssthen 	free(udb);
363d3fecca9Ssthen }
364d3fecca9Ssthen 
udb_base_sync(udb_base * udb,int wait)365d3fecca9Ssthen void udb_base_sync(udb_base* udb, int wait)
366d3fecca9Ssthen {
36715ed76cbSbrad 	if(!udb) return;
36815ed76cbSbrad #ifdef HAVE_MMAP
369d3fecca9Ssthen 	if(msync(udb->base, udb->base_size, wait?MS_SYNC:MS_ASYNC) != 0) {
370d3fecca9Ssthen 		log_msg(LOG_ERR, "msync(%s) error %s",
371d3fecca9Ssthen 			udb->fname, strerror(errno));
372d3fecca9Ssthen 	}
37315ed76cbSbrad #else
37415ed76cbSbrad 	(void)wait;
37515ed76cbSbrad #endif
376d3fecca9Ssthen }
377d3fecca9Ssthen 
378d3fecca9Ssthen /** hash a chunk pointer */
379d3fecca9Ssthen static uint32_t
chunk_hash_ptr(udb_void p)380d3fecca9Ssthen chunk_hash_ptr(udb_void p)
381d3fecca9Ssthen {
382d3fecca9Ssthen 	/* put p into an array of uint32 */
383d3fecca9Ssthen 	uint32_t h[sizeof(p)/sizeof(uint32_t)];
384d3fecca9Ssthen 	memcpy(&h, &p, sizeof(h));
385d3fecca9Ssthen 	return hashword(h, sizeof(p)/sizeof(uint32_t), 0x8763);
386d3fecca9Ssthen }
387d3fecca9Ssthen 
388d3fecca9Ssthen /** check that the given pointer is on the bucket for the given offset */
udb_ptr_is_on_bucket(udb_base * udb,udb_ptr * ptr,udb_void to)389d3fecca9Ssthen int udb_ptr_is_on_bucket(udb_base* udb, udb_ptr* ptr, udb_void to)
390d3fecca9Ssthen {
391d3fecca9Ssthen 	uint32_t i = chunk_hash_ptr(to) & udb->ram_mask;
392d3fecca9Ssthen 	udb_ptr* p;
393d3fecca9Ssthen 	assert((size_t)i < udb->ram_size);
394d3fecca9Ssthen 	for(p = udb->ram_hash[i]; p; p=p->next) {
395d3fecca9Ssthen 		if(p == ptr)
396d3fecca9Ssthen 			return 1;
397d3fecca9Ssthen 	}
398d3fecca9Ssthen 	return 0;
399d3fecca9Ssthen }
400d3fecca9Ssthen 
401d3fecca9Ssthen /** grow the ram array */
402d3fecca9Ssthen static void
grow_ram_hash(udb_base * udb,udb_ptr ** newhash)403d3fecca9Ssthen grow_ram_hash(udb_base* udb, udb_ptr** newhash)
404d3fecca9Ssthen {
405d3fecca9Ssthen 	size_t i;
406d3fecca9Ssthen 	size_t osize= udb->ram_size;
407d3fecca9Ssthen 	udb_ptr* p, *np;
408d3fecca9Ssthen 	udb_ptr** oldhash = udb->ram_hash;
409d3fecca9Ssthen 	udb->ram_size *= 2;
410d3fecca9Ssthen 	udb->ram_mask <<= 1;
411d3fecca9Ssthen 	udb->ram_mask |= 1;
412d3fecca9Ssthen 	udb->ram_hash = newhash;
413d3fecca9Ssthen 	/* have to link in every element in the old list into the new list*/
414d3fecca9Ssthen 	for(i=0; i<osize; i++) {
415d3fecca9Ssthen 		p = oldhash[i];
416d3fecca9Ssthen 		while(p) {
417d3fecca9Ssthen 			np = p->next;
418d3fecca9Ssthen 			/* link into newhash */
419d3fecca9Ssthen 			p->prev=NULL;
420d3fecca9Ssthen 			p->next=newhash[chunk_hash_ptr(p->data)&udb->ram_mask];
421d3fecca9Ssthen 			if(p->next) p->next->prev = p;
422d3fecca9Ssthen 			/* go to next element of oldhash */
423d3fecca9Ssthen 			p = np;
424d3fecca9Ssthen 		}
425d3fecca9Ssthen 	}
426d3fecca9Ssthen 	free(oldhash);
427d3fecca9Ssthen }
428d3fecca9Ssthen 
udb_base_link_ptr(udb_base * udb,udb_ptr * ptr)429d3fecca9Ssthen void udb_base_link_ptr(udb_base* udb, udb_ptr* ptr)
430d3fecca9Ssthen {
431db7d0d02Sflorian 	uint32_t i;
432d3fecca9Ssthen #ifdef UDB_CHECK
433d3fecca9Ssthen 	assert(udb_valid_dataptr(udb, ptr->data)); /* must be to whole chunk*/
434d3fecca9Ssthen #endif
435d3fecca9Ssthen 	udb->ram_num++;
4368d8f1862Ssthen 	if(udb->ram_num == udb->ram_size && udb->ram_size<(size_t)0x7fffffff) {
437d3fecca9Ssthen 		/* grow the array, if allocation succeeds */
4388d8f1862Ssthen 		udb_ptr** newram = (udb_ptr**)xalloc_array_zero(
4398d8f1862Ssthen 			sizeof(udb_ptr*), udb->ram_size*2);
440d3fecca9Ssthen 		if(newram) {
441d3fecca9Ssthen 			grow_ram_hash(udb, newram);
442d3fecca9Ssthen 		}
443d3fecca9Ssthen 	}
444db7d0d02Sflorian 	i = chunk_hash_ptr(ptr->data) & udb->ram_mask;
445db7d0d02Sflorian 	assert((size_t)i < udb->ram_size);
446db7d0d02Sflorian 
447d3fecca9Ssthen 	ptr->prev = NULL;
448d3fecca9Ssthen 	ptr->next = udb->ram_hash[i];
449d3fecca9Ssthen 	udb->ram_hash[i] = ptr;
450d3fecca9Ssthen 	if(ptr->next)
451d3fecca9Ssthen 		ptr->next->prev = ptr;
452d3fecca9Ssthen }
453d3fecca9Ssthen 
udb_base_unlink_ptr(udb_base * udb,udb_ptr * ptr)454d3fecca9Ssthen void udb_base_unlink_ptr(udb_base* udb, udb_ptr* ptr)
455d3fecca9Ssthen {
456d3fecca9Ssthen 	assert(ptr->data);
457d3fecca9Ssthen #ifdef UDB_CHECK
458d3fecca9Ssthen 	assert(udb_valid_dataptr(udb, ptr->data)); /* ptr must be inited */
459d3fecca9Ssthen 	assert(udb_ptr_is_on_bucket(udb, ptr, ptr->data));
460d3fecca9Ssthen #endif
461d3fecca9Ssthen 	udb->ram_num--;
462d3fecca9Ssthen 	if(ptr->next)
463d3fecca9Ssthen 		ptr->next->prev = ptr->prev;
464d3fecca9Ssthen 	if(ptr->prev)
465d3fecca9Ssthen 		ptr->prev->next = ptr->next;
466d3fecca9Ssthen 	else	{
467d3fecca9Ssthen 		uint32_t i = chunk_hash_ptr(ptr->data) & udb->ram_mask;
468d3fecca9Ssthen 		assert((size_t)i < udb->ram_size);
469d3fecca9Ssthen 		udb->ram_hash[i] = ptr->next;
470d3fecca9Ssthen 	}
471d3fecca9Ssthen }
472d3fecca9Ssthen 
473d3fecca9Ssthen /** change a set of ram ptrs to a new value */
474d3fecca9Ssthen static void
udb_base_ram_ptr_edit(udb_base * udb,udb_void old,udb_void newd)475d3fecca9Ssthen udb_base_ram_ptr_edit(udb_base* udb, udb_void old, udb_void newd)
476d3fecca9Ssthen {
477d3fecca9Ssthen 	uint32_t io = chunk_hash_ptr(old) & udb->ram_mask;
478d3fecca9Ssthen 	udb_ptr* p, *np;
479d3fecca9Ssthen 	/* edit them and move them into the new position */
480d3fecca9Ssthen 	p = udb->ram_hash[io];
481d3fecca9Ssthen 	while(p) {
482d3fecca9Ssthen 		np = p->next;
483d3fecca9Ssthen 		if(p->data == old) {
484d3fecca9Ssthen 			udb_base_unlink_ptr(udb, p);
485d3fecca9Ssthen 			p->data = newd;
486d3fecca9Ssthen 			udb_base_link_ptr(udb, p);
487d3fecca9Ssthen 		}
488d3fecca9Ssthen 		p = np;
489d3fecca9Ssthen 	}
490d3fecca9Ssthen }
491d3fecca9Ssthen 
udb_base_get_userdata(udb_base * udb)492d3fecca9Ssthen udb_rel_ptr* udb_base_get_userdata(udb_base* udb)
493d3fecca9Ssthen {
494d3fecca9Ssthen 	return &udb->glob_data->user_global;
495d3fecca9Ssthen }
496d3fecca9Ssthen 
udb_base_set_userdata(udb_base * udb,udb_void user)497d3fecca9Ssthen void udb_base_set_userdata(udb_base* udb, udb_void user)
498d3fecca9Ssthen {
499d3fecca9Ssthen #ifdef UDB_CHECK
500d3fecca9Ssthen 	if(user) { assert(udb_valid_dataptr(udb, user)); }
501d3fecca9Ssthen #endif
502d3fecca9Ssthen 	udb_rel_ptr_set(udb->base, &udb->glob_data->user_global, user);
503d3fecca9Ssthen }
504d3fecca9Ssthen 
udb_base_set_userflags(udb_base * udb,uint8_t v)505d3fecca9Ssthen void udb_base_set_userflags(udb_base* udb, uint8_t v)
506d3fecca9Ssthen {
507d3fecca9Ssthen 	udb->glob_data->userflags = v;
508d3fecca9Ssthen }
509d3fecca9Ssthen 
udb_base_get_userflags(udb_base * udb)510d3fecca9Ssthen uint8_t udb_base_get_userflags(udb_base* udb)
511d3fecca9Ssthen {
512d3fecca9Ssthen 	return udb->glob_data->userflags;
513d3fecca9Ssthen }
514d3fecca9Ssthen 
515d3fecca9Ssthen /** re-mmap the udb to specified size */
516d3fecca9Ssthen static void*
udb_base_remap(udb_base * udb,udb_alloc * alloc,uint64_t nsize)517d3fecca9Ssthen udb_base_remap(udb_base* udb, udb_alloc* alloc, uint64_t nsize)
518d3fecca9Ssthen {
51915ed76cbSbrad #ifdef HAVE_MMAP
520d3fecca9Ssthen 	void* nb;
521d3fecca9Ssthen 	/* for use with valgrind, do not use mremap, but the other version */
522d3fecca9Ssthen #ifdef MREMAP_MAYMOVE
523d3fecca9Ssthen 	nb = mremap(udb->base, udb->base_size, nsize, MREMAP_MAYMOVE);
524d3fecca9Ssthen 	if(nb == MAP_FAILED) {
525d3fecca9Ssthen 		log_msg(LOG_ERR, "mremap(%s, size %u) error %s",
526d3fecca9Ssthen 			udb->fname, (unsigned)nsize, strerror(errno));
527d3fecca9Ssthen 		return 0;
528d3fecca9Ssthen 	}
529d3fecca9Ssthen #else /* !HAVE MREMAP */
530d3fecca9Ssthen 	/* use munmap-mmap to simulate mremap */
531d3fecca9Ssthen 	if(munmap(udb->base, udb->base_size) != 0) {
532d3fecca9Ssthen 		log_msg(LOG_ERR, "munmap(%s) error %s",
533d3fecca9Ssthen 			udb->fname, strerror(errno));
534d3fecca9Ssthen 	}
535d3fecca9Ssthen 	/* provide hint for new location */
536d3fecca9Ssthen 	/* note the size_t casts must be there for portability, on some
537d3fecca9Ssthen 	 * systems the layout of memory is otherwise broken. */
538d3fecca9Ssthen 	nb = mmap(udb->base, (size_t)nsize, (int)PROT_READ|PROT_WRITE,
539d3fecca9Ssthen 		(int)MAP_SHARED, (int)udb->fd, (off_t)0);
540d3fecca9Ssthen 	/* retry the mmap without basept in case of ENOMEM (FreeBSD8),
541d3fecca9Ssthen 	 * the kernel can then try to mmap it at a different location
542d3fecca9Ssthen 	 * where more memory is available */
543d3fecca9Ssthen 	if(nb == MAP_FAILED && errno == ENOMEM) {
544d3fecca9Ssthen 		nb = mmap(NULL, (size_t)nsize, (int)PROT_READ|PROT_WRITE,
545d3fecca9Ssthen 			(int)MAP_SHARED, (int)udb->fd, (off_t)0);
546d3fecca9Ssthen 	}
547d3fecca9Ssthen 	if(nb == MAP_FAILED) {
548d3fecca9Ssthen 		log_msg(LOG_ERR, "mmap(%s, size %u) error %s",
549d3fecca9Ssthen 			udb->fname, (unsigned)nsize, strerror(errno));
550d3fecca9Ssthen 		udb->base = NULL;
551d3fecca9Ssthen 		return 0;
552d3fecca9Ssthen 	}
553d3fecca9Ssthen #endif /* HAVE MREMAP */
554d3fecca9Ssthen 	if(nb != udb->base) {
555d3fecca9Ssthen 		/* fix up realpointers in udb and alloc */
556d3fecca9Ssthen 		/* but mremap may have been nice and not move the base */
557d3fecca9Ssthen 		udb->base = nb;
558308d2509Sflorian 		udb->glob_data = (udb_glob_d*)((char*)nb+sizeof(uint64_t));
559d3fecca9Ssthen 		/* use passed alloc pointer because the udb->alloc may not
560d3fecca9Ssthen 		 * be initialized yet */
561308d2509Sflorian 		alloc->disk = (udb_alloc_d*)((char*)udb->glob_data
562d3fecca9Ssthen 			+sizeof(*udb->glob_data));
563d3fecca9Ssthen 	}
564d3fecca9Ssthen 	udb->base_size = nsize;
565d3fecca9Ssthen 	return nb;
56615ed76cbSbrad #else /* HAVE_MMAP */
56715ed76cbSbrad 	(void)udb; (void)alloc; (void)nsize;
56815ed76cbSbrad 	return NULL;
56915ed76cbSbrad #endif /* HAVE_MMAP */
570d3fecca9Ssthen }
571d3fecca9Ssthen 
572d3fecca9Ssthen void
udb_base_remap_process(udb_base * udb)573d3fecca9Ssthen udb_base_remap_process(udb_base* udb)
574d3fecca9Ssthen {
575d3fecca9Ssthen 	/* assume that fsize is still accessible */
576d3fecca9Ssthen 	udb_base_remap(udb, udb->alloc, udb->glob_data->fsize);
577d3fecca9Ssthen }
578d3fecca9Ssthen 
579d3fecca9Ssthen /** grow file to specified size and re-mmap, return new base */
580d3fecca9Ssthen static void*
udb_base_grow_and_remap(udb_base * udb,uint64_t nsize)581d3fecca9Ssthen udb_base_grow_and_remap(udb_base* udb, uint64_t nsize)
582d3fecca9Ssthen {
583d3fecca9Ssthen 	/* grow file by writing a single zero at that spot, the
584d3fecca9Ssthen 	 * rest is filled in with zeroes. */
585d3fecca9Ssthen 	uint8_t z = 0;
586d3fecca9Ssthen 	ssize_t w;
587d3fecca9Ssthen 
588d3fecca9Ssthen 	assert(nsize > 0);
589d3fecca9Ssthen 	udb->glob_data->dirty_alloc = udb_dirty_fsize;
590d3fecca9Ssthen #ifdef HAVE_PWRITE
591d3fecca9Ssthen 	if((w=pwrite(udb->fd, &z, sizeof(z), (off_t)(nsize-1))) == -1) {
592d3fecca9Ssthen #else
593d3fecca9Ssthen 	if(lseek(udb->fd, (off_t)(nsize-1), SEEK_SET) == -1) {
594d3fecca9Ssthen 		log_msg(LOG_ERR, "fseek %s: %s", udb->fname, strerror(errno));
595d3fecca9Ssthen 		return 0;
596d3fecca9Ssthen 	}
597d3fecca9Ssthen 	if((w=write(udb->fd, &z, sizeof(z))) == -1) {
598d3fecca9Ssthen #endif
599d3fecca9Ssthen 		log_msg(LOG_ERR, "grow(%s, size %u) error %s",
600d3fecca9Ssthen 			udb->fname, (unsigned)nsize, strerror(errno));
601d3fecca9Ssthen 		return 0;
602d3fecca9Ssthen 	} else if(w != (ssize_t)sizeof(z)) {
603d3fecca9Ssthen 		log_msg(LOG_ERR, "grow(%s, size %u) failed (disk full?)",
604d3fecca9Ssthen 			udb->fname, (unsigned)nsize);
605d3fecca9Ssthen 		return 0;
606d3fecca9Ssthen 	}
607d3fecca9Ssthen 	udb->glob_data->fsize = nsize;
608d3fecca9Ssthen 	udb->glob_data->dirty_alloc = udb_dirty_clean;
609d3fecca9Ssthen 	return udb_base_remap(udb, udb->alloc, nsize);
610d3fecca9Ssthen }
611d3fecca9Ssthen 
612d3fecca9Ssthen int udb_exp_size(uint64_t a)
613d3fecca9Ssthen {
614d3fecca9Ssthen 	/* find enclosing value such that 2**x >= a */
615d3fecca9Ssthen 	int x = 0;
616d3fecca9Ssthen 	uint64_t i = a;
617d3fecca9Ssthen 	assert(a != 0);
618d3fecca9Ssthen 
619d3fecca9Ssthen 	i --;
620d3fecca9Ssthen 	/* could optimise this with uint8* access, depends on endianness */
621d3fecca9Ssthen 	/* first whole bytes */
622d3fecca9Ssthen 	while( (i&(~(uint64_t)0xff)) ) {
623d3fecca9Ssthen 		i >>= 8;
624d3fecca9Ssthen 		x += 8;
625d3fecca9Ssthen 	}
626d3fecca9Ssthen 	/* now details */
627d3fecca9Ssthen 	while(i) {
628d3fecca9Ssthen 		i >>= 1;
629d3fecca9Ssthen 		x ++;
630d3fecca9Ssthen 	}
631bfd0b123Sflorian 	assert( x>=0 && x<=63);
632d3fecca9Ssthen 	assert( ((uint64_t)1<<x) >= a);
633308d2509Sflorian 	assert( x==0 || /* <<x-1 without negative number analyzer complaints: */ (((uint64_t)1<<x)>>1) < a);
634d3fecca9Ssthen 	return x;
635d3fecca9Ssthen }
636d3fecca9Ssthen 
637d3fecca9Ssthen int udb_exp_offset(uint64_t o)
638d3fecca9Ssthen {
639d3fecca9Ssthen 	/* this means measuring the number of 0 bits on the right */
640d3fecca9Ssthen 	/* so, if exp zero bits then (o&(2**x-1))==0 */
641d3fecca9Ssthen 	int x = 0;
642d3fecca9Ssthen 	uint64_t i = o;
643d3fecca9Ssthen 	assert(o != 0);
644d3fecca9Ssthen 	/* first whole bytes */
645d3fecca9Ssthen 	while( (i&(uint64_t)0xff) == 0) {
646d3fecca9Ssthen 		i >>= 8;
647d3fecca9Ssthen 		x += 8;
648d3fecca9Ssthen 	}
649d3fecca9Ssthen 	/* now details */
650d3fecca9Ssthen 	while( (i&(uint64_t)0x1) == 0) {
651d3fecca9Ssthen 		i >>= 1;
652d3fecca9Ssthen 		x ++;
653d3fecca9Ssthen 	}
654d3fecca9Ssthen 	assert( o % ((uint64_t)1<<x) == 0);
655d3fecca9Ssthen 	assert( o % ((uint64_t)1<<(x+1)) != 0);
656d3fecca9Ssthen 	return x;
657d3fecca9Ssthen }
658d3fecca9Ssthen 
659d3fecca9Ssthen void udb_alloc_init_new(udb_alloc_d* a)
660d3fecca9Ssthen {
661d3fecca9Ssthen 	assert(UDB_HEADER_SIZE % UDB_ALLOC_CHUNK_MINSIZE == 0);
662d3fecca9Ssthen 	memset(a, 0, sizeof(*a));
663d3fecca9Ssthen 	/* set new allocations after header, as if allocated in a sequence
664d3fecca9Ssthen 	 * of minsize allocations */
665d3fecca9Ssthen 	a->nextgrow = UDB_HEADER_SIZE;
666d3fecca9Ssthen }
667d3fecca9Ssthen 
668d3fecca9Ssthen /** fsck the file size, false if failed and file is useless */
669d3fecca9Ssthen static int
670d3fecca9Ssthen fsck_fsize(udb_base* udb, udb_alloc* alloc)
671d3fecca9Ssthen {
672d3fecca9Ssthen 	off_t realsize;
673d3fecca9Ssthen 	log_msg(LOG_WARNING, "udb-fsck %s: file size wrong", udb->fname);
674d3fecca9Ssthen 	realsize = lseek(udb->fd, (off_t)0, SEEK_END);
675d3fecca9Ssthen 	if(realsize == (off_t)-1) {
676d3fecca9Ssthen 		log_msg(LOG_ERR, "lseek(%s): %s", udb->fname, strerror(errno));
677d3fecca9Ssthen 		return 0;
678d3fecca9Ssthen 	}
679d3fecca9Ssthen 	udb->glob_data->fsize = (uint64_t)realsize;
680d3fecca9Ssthen 	if(!udb_base_remap(udb, alloc, (uint64_t)realsize))
681d3fecca9Ssthen 		return 0;
682d3fecca9Ssthen 	udb->glob_data->dirty_alloc = udb_dirty_clean;
683d3fecca9Ssthen 	log_msg(LOG_WARNING, "udb-fsck %s: file size fixed (sync)", udb->fname);
684d3fecca9Ssthen 	udb_base_sync(udb, 1);
685d3fecca9Ssthen 	return 1;
686d3fecca9Ssthen }
687d3fecca9Ssthen 
688d3fecca9Ssthen /** regenerate freelist add a new free chunk, return next todo */
689d3fecca9Ssthen static udb_void
690d3fecca9Ssthen regen_free(void* base, udb_void c, int exp, udb_alloc_d* regen)
691d3fecca9Ssthen {
692d3fecca9Ssthen 	udb_free_chunk_d* cp = UDB_FREE_CHUNK(c);
693d3fecca9Ssthen 	uint64_t esz = (uint64_t)1<<exp;
694d3fecca9Ssthen 	if(exp < UDB_ALLOC_CHUNK_MINEXP || exp > UDB_ALLOC_CHUNKS_MAX) {
695d3fecca9Ssthen 		return 0;
696d3fecca9Ssthen 	}
697d3fecca9Ssthen 	cp->type = udb_chunk_type_free;
698d3fecca9Ssthen 	cp->flags = 0;
699d3fecca9Ssthen 	chunk_set_last(base, c, exp, (uint8_t)exp);
700d3fecca9Ssthen 	cp->prev = 0;
701d3fecca9Ssthen 	cp->next = regen->free[exp-UDB_ALLOC_CHUNK_MINEXP];
702d3fecca9Ssthen 	if(cp->next)
703d3fecca9Ssthen 		UDB_FREE_CHUNK(cp->next)->prev = c;
704d3fecca9Ssthen 	regen->stat_free += esz;
705d3fecca9Ssthen 	return c + esz;
706d3fecca9Ssthen }
707d3fecca9Ssthen 
708d3fecca9Ssthen /** regenerate xl chunk, return next todo */
709d3fecca9Ssthen static udb_void
710d3fecca9Ssthen regen_xl(void* base, udb_void c, udb_alloc_d* regen)
711d3fecca9Ssthen {
712d3fecca9Ssthen 	udb_xl_chunk_d* cp = UDB_XL_CHUNK(c);
713d3fecca9Ssthen 	uint64_t xlsz = cp->size;
714d3fecca9Ssthen 	if( (xlsz&(UDB_ALLOC_CHUNK_SIZE-1)) != 0) {
715d3fecca9Ssthen 		return 0;
716d3fecca9Ssthen 	}
717d3fecca9Ssthen 	if( (c&(UDB_ALLOC_CHUNK_SIZE-1)) != 0) {
718d3fecca9Ssthen 		return 0;
719d3fecca9Ssthen 	}
720d3fecca9Ssthen 	/* fixup end-size and end-expmarker */
721d3fecca9Ssthen 	regen->stat_alloc += xlsz;
722d3fecca9Ssthen 	return c + xlsz;
723d3fecca9Ssthen }
724d3fecca9Ssthen 
725d3fecca9Ssthen /** regenerate data chunk, return next todo */
726d3fecca9Ssthen static udb_void
727d3fecca9Ssthen regen_data(void* base, udb_void c, int exp, udb_alloc_d* regen)
728d3fecca9Ssthen {
729d3fecca9Ssthen 	uint64_t esz = (uint64_t)1<<exp;
730d3fecca9Ssthen 	if(exp < UDB_ALLOC_CHUNK_MINEXP || exp > UDB_ALLOC_CHUNKS_MAX) {
731d3fecca9Ssthen 		return 0;
732d3fecca9Ssthen 	}
733d3fecca9Ssthen 	chunk_set_last(base, c, exp, (uint8_t)exp);
734d3fecca9Ssthen 	regen->stat_alloc += esz;
735d3fecca9Ssthen 	return c + esz;
736d3fecca9Ssthen }
737d3fecca9Ssthen 
738d3fecca9Ssthen /** regenerate a relptr structure inside a data segment */
739d3fecca9Ssthen static void
740d3fecca9Ssthen regen_relptr_func(void* base, udb_rel_ptr* rp, void* arg)
741d3fecca9Ssthen {
742d3fecca9Ssthen 	udb_void* a = (udb_void*)arg;
743d3fecca9Ssthen 	/* ignore 0 pointers */
744d3fecca9Ssthen 	if(!rp->data)
745d3fecca9Ssthen 		return;
746d3fecca9Ssthen 
747d3fecca9Ssthen 	/* edit relptrs that point to oldmoved to point to newmoved. */
748d3fecca9Ssthen 	if(rp->data == a[0])
749d3fecca9Ssthen 		rp->data = a[1];
750d3fecca9Ssthen 
751d3fecca9Ssthen 	/* regenerate relptr lists, add this item to the relptr list for
752d3fecca9Ssthen 	 * the data that it points to */
753d3fecca9Ssthen 	udb_rel_ptr_link(base, rp, rp->data);
754d3fecca9Ssthen }
755d3fecca9Ssthen 
756d3fecca9Ssthen /** regenerate the relptrs store in this data segment */
757d3fecca9Ssthen static void
758d3fecca9Ssthen regen_its_ptrs(void* base, udb_base* udb, udb_chunk_d* atp,
759d3fecca9Ssthen 	void* data, uint64_t dsz, udb_void rb_old, udb_void rb_new)
760d3fecca9Ssthen {
761d3fecca9Ssthen 	udb_void arg[2];
762d3fecca9Ssthen 	arg[0] = rb_old; arg[1] = rb_new;
763d3fecca9Ssthen 	/* walk through the structs here and put them on their respective
764d3fecca9Ssthen 	 * relptr lists */
765d3fecca9Ssthen 	(*udb->walkfunc)(base, udb->walkarg, atp->type, data, dsz,
766d3fecca9Ssthen 		&regen_relptr_func, arg);
767d3fecca9Ssthen 
768d3fecca9Ssthen }
769d3fecca9Ssthen 
770d3fecca9Ssthen /** regenerate relptrlists in the file */
771d3fecca9Ssthen static void
772d3fecca9Ssthen regen_ptrlist(void* base, udb_base* udb, udb_alloc* alloc,
773d3fecca9Ssthen 	udb_void rb_old, udb_void rb_new)
774d3fecca9Ssthen {
775d3fecca9Ssthen 	udb_void at = alloc->udb->glob_data->hsize;
776d3fecca9Ssthen 	/* clear all ptrlist start pointers in the file. */
777d3fecca9Ssthen 	while(at < alloc->disk->nextgrow) {
778d3fecca9Ssthen 		int exp = (int)UDB_CHUNK(at)->exp;
779d3fecca9Ssthen 		udb_chunk_type tp = (udb_chunk_type)UDB_CHUNK(at)->type;
780d3fecca9Ssthen 		if(exp == UDB_EXP_XL) {
781d3fecca9Ssthen 			UDB_XL_CHUNK(at)->ptrlist = 0;
782d3fecca9Ssthen 			at += UDB_XL_CHUNK(at)->size;
783d3fecca9Ssthen 		} else if(tp == udb_chunk_type_free) {
784d3fecca9Ssthen 			at += (uint64_t)1<<exp;
785d3fecca9Ssthen 		} else { /* data chunk */
786d3fecca9Ssthen 			UDB_CHUNK(at)->ptrlist = 0;
787d3fecca9Ssthen 			at += (uint64_t)1<<exp;
788d3fecca9Ssthen 		}
789d3fecca9Ssthen 	}
790d3fecca9Ssthen 	/* walk through all relptr structs and put on the right list. */
791d3fecca9Ssthen 	at = alloc->udb->glob_data->hsize;
792d3fecca9Ssthen 	while(at < alloc->disk->nextgrow) {
793d3fecca9Ssthen 		udb_chunk_d* atp = UDB_CHUNK(at);
794d3fecca9Ssthen 		int exp = (int)atp->exp;
795d3fecca9Ssthen 		udb_chunk_type tp = (udb_chunk_type)atp->type;
796d3fecca9Ssthen 		uint64_t sz = ((exp == UDB_EXP_XL)?UDB_XL_CHUNK(at)->size:
797d3fecca9Ssthen 			(uint64_t)1<<exp);
798d3fecca9Ssthen 		if(exp == UDB_EXP_XL) {
799d3fecca9Ssthen 			assert(at != rb_old); /* should have been freed */
800d3fecca9Ssthen 			regen_its_ptrs(base, udb, atp,
801308d2509Sflorian 				((char*)atp)+sizeof(udb_xl_chunk_d),
802d3fecca9Ssthen 				sz-sizeof(udb_xl_chunk_d) - sizeof(uint64_t)*2,
803d3fecca9Ssthen 				rb_old, rb_new);
804d3fecca9Ssthen 			at += sz;
805d3fecca9Ssthen 		} else if(tp == udb_chunk_type_free) {
806d3fecca9Ssthen 			at += sz;
807d3fecca9Ssthen 		} else { /* data chunk */
808d3fecca9Ssthen 			assert(at != rb_old); /* should have been freed */
809d3fecca9Ssthen 			regen_its_ptrs(base, udb, atp,
810308d2509Sflorian 				((char*)atp)+sizeof(udb_chunk_d),
811d3fecca9Ssthen 				sz-sizeof(udb_chunk_d)-1, rb_old, rb_new);
812d3fecca9Ssthen 			at += sz;
813d3fecca9Ssthen 		}
814d3fecca9Ssthen 	}
815d3fecca9Ssthen }
816d3fecca9Ssthen 
817d3fecca9Ssthen 
818d3fecca9Ssthen /** mark free elements from ex XL chunk space and later fixups pick that up */
819d3fecca9Ssthen static void
820d3fecca9Ssthen rb_mark_free_segs(void* base, udb_void s, uint64_t m)
821d3fecca9Ssthen {
822d3fecca9Ssthen 	udb_void q = s + m - UDB_ALLOC_CHUNK_SIZE;
823d3fecca9Ssthen 	/* because of header and alignment we know s >= UDB_ALLOC_CHUNK_SIZE*/
824d3fecca9Ssthen 	assert(s >= UDB_ALLOC_CHUNK_SIZE);
825d3fecca9Ssthen 	while(q >= s) {
826d3fecca9Ssthen 		UDB_CHUNK(q)->exp = UDB_ALLOC_CHUNKS_MAX;
827d3fecca9Ssthen 		UDB_CHUNK(q)->type = udb_chunk_type_free;
828d3fecca9Ssthen 		q -= UDB_ALLOC_CHUNK_SIZE;
829d3fecca9Ssthen 	}
830d3fecca9Ssthen }
831d3fecca9Ssthen 
832d3fecca9Ssthen 
833d3fecca9Ssthen /** fsck rollback or rollforward XL move results */
834d3fecca9Ssthen static int
835d3fecca9Ssthen fsck_rb_xl(void* base, udb_base* udb, udb_void rb_old, udb_void rb_new,
836d3fecca9Ssthen 	uint64_t rb_size, uint64_t rb_seg)
837d3fecca9Ssthen {
838d3fecca9Ssthen 
839d3fecca9Ssthen 	if(rb_old <= rb_new)
840d3fecca9Ssthen 		return 0; /* XL move one way */
841d3fecca9Ssthen 	if( (rb_size&(UDB_ALLOC_CHUNK_SIZE-1)) != 0)
842d3fecca9Ssthen 		return 0; /* not aligned */
843d3fecca9Ssthen 	if( (rb_old&(UDB_ALLOC_CHUNK_SIZE-1)) != 0)
844d3fecca9Ssthen 		return 0; /* not aligned */
845d3fecca9Ssthen 	if( (rb_new&(UDB_ALLOC_CHUNK_SIZE-1)) != 0)
846d3fecca9Ssthen 		return 0; /* not aligned */
847d3fecca9Ssthen 	if(rb_new + rb_size <= rb_old) {
848d3fecca9Ssthen 		/* not overlapping: resume copy */
849d3fecca9Ssthen 		memcpy(UDB_CHUNK(rb_new), UDB_CHUNK(rb_old), rb_size);
850d3fecca9Ssthen 		/* and free up old piece(s) */
851d3fecca9Ssthen 		rb_mark_free_segs(base, rb_old, rb_size);
852d3fecca9Ssthen 	} else {
853d3fecca9Ssthen 		/* overlapping, see what segment we stopped at
854d3fecca9Ssthen 		 * and continue there. */
855d3fecca9Ssthen 		move_xl_segment(base, udb, rb_old, rb_new, rb_size, rb_seg);
856d3fecca9Ssthen 		/* free up old piece(s); from the end of the moved segment,
857d3fecca9Ssthen 		 * until the end of the old segment */
858d3fecca9Ssthen 		rb_mark_free_segs(base, rb_new+rb_size, (rb_old+rb_size)-
859d3fecca9Ssthen 			(rb_new+rb_size));
860d3fecca9Ssthen 	}
861d3fecca9Ssthen 	/* do not call fix_ptrs, regenptrs does the job */
862d3fecca9Ssthen 	return 1;
863d3fecca9Ssthen }
864d3fecca9Ssthen 
865d3fecca9Ssthen /** fsck rollback or rollforward move results */
866d3fecca9Ssthen static int
867d3fecca9Ssthen fsck_rb(void* base, udb_void rb_old, udb_void rb_new, uint64_t rb_size,
868d3fecca9Ssthen 	udb_void* make_free)
869d3fecca9Ssthen {
870d3fecca9Ssthen 	if( (rb_size&(rb_size-1)) != 0)
871d3fecca9Ssthen 		return 0; /* not powerof2 */
872d3fecca9Ssthen 	if( (rb_old&(rb_size-1)) != 0)
873d3fecca9Ssthen 		return 0; /* not aligned */
874d3fecca9Ssthen 	if( (rb_new&(rb_size-1)) != 0)
875d3fecca9Ssthen 		return 0; /* not aligned */
876d3fecca9Ssthen 	/* resume copy */
877d3fecca9Ssthen 	memcpy(UDB_CHUNK(rb_new), UDB_CHUNK(rb_old), rb_size);
878d3fecca9Ssthen 	/* do not call fix_ptrs, regenptrs does the job */
879d3fecca9Ssthen 	/* make sure udb_old is freed */
880d3fecca9Ssthen 	*make_free = rb_old;
881d3fecca9Ssthen 	return 1;
882d3fecca9Ssthen }
883d3fecca9Ssthen 
884d3fecca9Ssthen /** fsck the file and salvage, false if failed and file is useless */
885d3fecca9Ssthen static int
886d3fecca9Ssthen fsck_file(udb_base* udb, udb_alloc* alloc, int moved)
887d3fecca9Ssthen {
888d3fecca9Ssthen 	void* base = udb->base;
889d3fecca9Ssthen 	udb_alloc_d regen;
890d3fecca9Ssthen 	udb_void at = udb->glob_data->hsize;
891d3fecca9Ssthen 	udb_void rb_old = udb->glob_data->rb_old;
892d3fecca9Ssthen 	udb_void rb_new = udb->glob_data->rb_new;
893d3fecca9Ssthen 	udb_void rb_seg = udb->glob_data->rb_seg;
894d3fecca9Ssthen 	udb_void make_free = 0;
895d3fecca9Ssthen 	uint64_t rb_size = udb->glob_data->rb_size;
896d3fecca9Ssthen 	log_msg(LOG_WARNING, "udb-fsck %s: salvaging", udb->fname);
897d3fecca9Ssthen 	/* walk through the file, use the exp values to see what can be
898d3fecca9Ssthen 	 * salvaged */
899d3fecca9Ssthen 	if(moved && rb_old && rb_new && rb_size) {
900d3fecca9Ssthen 		if(rb_old+rb_size <= alloc->disk->nextgrow
901d3fecca9Ssthen 			&& rb_new+rb_size <= alloc->disk->nextgrow) {
902d3fecca9Ssthen 			/* we can use the move information to fix up the
903d3fecca9Ssthen 			 * duplicate element (or partially moved element) */
904d3fecca9Ssthen 			if(rb_size > 1024*1024) {
905d3fecca9Ssthen 				/* XL chunk */
906d3fecca9Ssthen 				if(!fsck_rb_xl(base, udb, rb_old, rb_new,
907d3fecca9Ssthen 					rb_size, rb_seg))
908d3fecca9Ssthen 					return 0;
909d3fecca9Ssthen 			} else {
910d3fecca9Ssthen 				if(!fsck_rb(base, rb_old, rb_new, rb_size,
911d3fecca9Ssthen 					&make_free))
912d3fecca9Ssthen 					return 0;
913d3fecca9Ssthen 			}
914d3fecca9Ssthen 		}
915d3fecca9Ssthen 	}
916d3fecca9Ssthen 
917d3fecca9Ssthen 	/* rebuild freelists */
918d3fecca9Ssthen 	/* recalculate stats in alloc (except 'stat_data') */
919d3fecca9Ssthen 	/* possibly new end 'nextgrow' value */
920d3fecca9Ssthen 	memset(&regen, 0, sizeof(regen));
921d3fecca9Ssthen 	regen.nextgrow = alloc->disk->nextgrow;
922d3fecca9Ssthen 	while(at < regen.nextgrow) {
923d3fecca9Ssthen 		/* figure out this chunk */
924d3fecca9Ssthen 		int exp = (int)UDB_CHUNK(at)->exp;
925d3fecca9Ssthen 		udb_chunk_type tp = (udb_chunk_type)UDB_CHUNK(at)->type;
926d3fecca9Ssthen 		/* consistency check possible here with end-exp */
927d3fecca9Ssthen 		if(tp == udb_chunk_type_free || at == make_free) {
928d3fecca9Ssthen 			at = regen_free(base, at, exp, &regen);
929d3fecca9Ssthen 			if(!at) return 0;
930d3fecca9Ssthen 		} else if(exp == UDB_EXP_XL) {
931d3fecca9Ssthen 			/* allocated data of XL size */
932d3fecca9Ssthen 			at = regen_xl(base, at, &regen);
933d3fecca9Ssthen 			if(!at) return 0;
934d3fecca9Ssthen 		} else if(exp >= UDB_ALLOC_CHUNK_MINEXP
935d3fecca9Ssthen 			&& exp <= UDB_ALLOC_CHUNKS_MAX) {
936d3fecca9Ssthen 			/* allocated data */
937d3fecca9Ssthen 			at = regen_data(base, at, exp, &regen);
938d3fecca9Ssthen 			if(!at) return 0;
939d3fecca9Ssthen 		} else {
940d3fecca9Ssthen 			/* garbage; this must be EOF then */
941d3fecca9Ssthen 			regen.nextgrow = at;
942d3fecca9Ssthen 			break;
943d3fecca9Ssthen 		}
944d3fecca9Ssthen 	}
945d3fecca9Ssthen 	*alloc->disk = regen;
946d3fecca9Ssthen 
947d3fecca9Ssthen 	/* rebuild relptr lists */
948d3fecca9Ssthen 	regen_ptrlist(base, udb, alloc, rb_old, rb_new);
949d3fecca9Ssthen 
950d3fecca9Ssthen 	log_msg(LOG_WARNING, "udb-fsck %s: salvaged successfully (sync)",
951d3fecca9Ssthen 		udb->fname);
952d3fecca9Ssthen 	udb->glob_data->rb_old = 0;
953d3fecca9Ssthen 	udb->glob_data->rb_new = 0;
954d3fecca9Ssthen 	udb->glob_data->rb_size = 0;
955d3fecca9Ssthen 	udb->glob_data->dirty_alloc = udb_dirty_clean;
956d3fecca9Ssthen 	udb_base_sync(udb, 1);
957d3fecca9Ssthen 	return 1;
958d3fecca9Ssthen }
959d3fecca9Ssthen 
960d3fecca9Ssthen 
961d3fecca9Ssthen udb_alloc* udb_alloc_create(udb_base* udb, udb_alloc_d* disk)
962d3fecca9Ssthen {
963d3fecca9Ssthen 	udb_alloc* alloc = (udb_alloc*)xalloc_zero(sizeof(*alloc));
964d3fecca9Ssthen 	if(!alloc)
965d3fecca9Ssthen 		return NULL;
966d3fecca9Ssthen 	alloc->udb = udb;
967d3fecca9Ssthen 	alloc->disk = disk;
968d3fecca9Ssthen 	/* see if committed but uncompleted actions need to be done */
969d3fecca9Ssthen 	/* preserves the alloc state */
970d3fecca9Ssthen 	if(udb->glob_data->dirty_alloc != udb_dirty_clean) {
971d3fecca9Ssthen 		if(udb->glob_data->dirty_alloc == udb_dirty_fsize) {
972d3fecca9Ssthen 			if(fsck_fsize(udb, alloc))
973d3fecca9Ssthen 				return alloc;
974d3fecca9Ssthen 		} else if(udb->glob_data->dirty_alloc == udb_dirty_fl) {
975d3fecca9Ssthen 			if(fsck_file(udb, alloc, 0))
976d3fecca9Ssthen 				return alloc;
977d3fecca9Ssthen 		} else if(udb->glob_data->dirty_alloc == udb_dirty_compact) {
978d3fecca9Ssthen 			if(fsck_file(udb, alloc, 1))
979d3fecca9Ssthen 				return alloc;
980d3fecca9Ssthen 		}
981d3fecca9Ssthen 		log_msg(LOG_ERR, "error: file allocation dirty (%d)",
982d3fecca9Ssthen 			(int)udb->glob_data->dirty_alloc);
983d3fecca9Ssthen 		free(alloc);
984d3fecca9Ssthen 		return NULL;
985d3fecca9Ssthen 	}
986d3fecca9Ssthen 	return alloc;
987d3fecca9Ssthen }
988d3fecca9Ssthen 
989d3fecca9Ssthen void udb_alloc_delete(udb_alloc* alloc)
990d3fecca9Ssthen {
991d3fecca9Ssthen 	if(!alloc) return;
992d3fecca9Ssthen 	free(alloc);
993d3fecca9Ssthen }
994d3fecca9Ssthen 
995d3fecca9Ssthen /** unlink this element from its freelist */
996d3fecca9Ssthen static void
997d3fecca9Ssthen udb_alloc_unlink_fl(void* base, udb_alloc* alloc, udb_void chunk, int exp)
998d3fecca9Ssthen {
999d3fecca9Ssthen 	udb_free_chunk_d* fp = UDB_FREE_CHUNK(chunk);
1000d3fecca9Ssthen 	assert(chunk);
1001d3fecca9Ssthen 	/* chunk is a free chunk */
1002d3fecca9Ssthen 	assert(fp->exp == (uint8_t)exp);
1003d3fecca9Ssthen 	assert(fp->type == udb_chunk_type_free);
1004d3fecca9Ssthen 	assert(chunk_get_last(base, chunk, exp) == (uint8_t)exp);
1005d3fecca9Ssthen 	/* and thus freelist not empty */
1006d3fecca9Ssthen 	assert(alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP]);
1007d3fecca9Ssthen 	/* unlink */
1008d3fecca9Ssthen 	if(fp->prev)
1009d3fecca9Ssthen 		UDB_FREE_CHUNK(fp->prev)->next = fp->next;
1010d3fecca9Ssthen 	else	alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = fp->next;
1011d3fecca9Ssthen 	if(fp->next)
1012d3fecca9Ssthen 		UDB_FREE_CHUNK(fp->next)->prev = fp->prev;
1013d3fecca9Ssthen }
1014d3fecca9Ssthen 
1015d3fecca9Ssthen /** pop first element off freelist, list may not be empty */
1016d3fecca9Ssthen static udb_void
1017d3fecca9Ssthen udb_alloc_pop_fl(void* base, udb_alloc* alloc, int exp)
1018d3fecca9Ssthen {
1019d3fecca9Ssthen 	udb_void f = alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP];
1020d3fecca9Ssthen 	udb_free_chunk_d* fp = UDB_FREE_CHUNK(f);
1021d3fecca9Ssthen 	assert(f);
1022d3fecca9Ssthen 	assert(fp->exp == (uint8_t)exp);
1023d3fecca9Ssthen 	assert(fp->type == udb_chunk_type_free);
1024d3fecca9Ssthen 	assert(chunk_get_last(base, f, exp) == (uint8_t)exp);
1025d3fecca9Ssthen 	alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = fp->next;
1026d3fecca9Ssthen 	if(fp->next) {
1027d3fecca9Ssthen 		UDB_FREE_CHUNK(fp->next)->prev = 0;
1028d3fecca9Ssthen 	}
1029d3fecca9Ssthen 	return f;
1030d3fecca9Ssthen }
1031d3fecca9Ssthen 
1032d3fecca9Ssthen /** push new element onto freelist */
1033d3fecca9Ssthen static void
1034d3fecca9Ssthen udb_alloc_push_fl(void* base, udb_alloc* alloc, udb_void f, int exp)
1035d3fecca9Ssthen {
1036d3fecca9Ssthen 	udb_free_chunk_d* fp = UDB_FREE_CHUNK(f);
1037d3fecca9Ssthen 	assert(f);
1038d3fecca9Ssthen 	fp->exp = (uint8_t)exp;
1039d3fecca9Ssthen 	fp->type = udb_chunk_type_free;
1040d3fecca9Ssthen 	fp->flags = 0;
1041d3fecca9Ssthen 	fp->prev = 0;
1042d3fecca9Ssthen 	fp->next = alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP];
1043d3fecca9Ssthen 	if(fp->next)
1044d3fecca9Ssthen 		UDB_FREE_CHUNK(fp->next)->prev = f;
1045d3fecca9Ssthen 	chunk_set_last(base, f, exp, (uint8_t)exp);
1046d3fecca9Ssthen 	alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = f;
1047d3fecca9Ssthen }
1048d3fecca9Ssthen 
1049d3fecca9Ssthen /** push new element onto freelist - do not initialize the elt */
1050d3fecca9Ssthen static void
1051d3fecca9Ssthen udb_alloc_push_fl_noinit(void* base, udb_alloc* alloc, udb_void f, int exp)
1052d3fecca9Ssthen {
1053d3fecca9Ssthen 	udb_free_chunk_d* fp = UDB_FREE_CHUNK(f);
1054d3fecca9Ssthen 	assert(f);
1055d3fecca9Ssthen 	assert(fp->exp == (uint8_t)exp);
1056d3fecca9Ssthen 	assert(fp->type == udb_chunk_type_free);
1057d3fecca9Ssthen 	assert(chunk_get_last(base, f, exp) == (uint8_t)exp);
1058d3fecca9Ssthen 	fp->prev = 0;
1059d3fecca9Ssthen 	fp->next = alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP];
1060d3fecca9Ssthen 	if(fp->next)
1061d3fecca9Ssthen 		UDB_FREE_CHUNK(fp->next)->prev = f;
1062d3fecca9Ssthen 	alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = f;
1063d3fecca9Ssthen }
1064d3fecca9Ssthen 
1065d3fecca9Ssthen /** add free chunks at end until specified alignment occurs */
1066d3fecca9Ssthen static void
1067d3fecca9Ssthen grow_align(void* base, udb_alloc* alloc, uint64_t esz)
1068d3fecca9Ssthen {
1069d3fecca9Ssthen 	while( (alloc->disk->nextgrow & (esz-1)) != 0) {
1070d3fecca9Ssthen 		/* the nextgrow is not a whole multiple of esz. */
1071d3fecca9Ssthen 		/* grow a free chunk of max allowed size */
1072d3fecca9Ssthen 		int fexp = udb_exp_offset(alloc->disk->nextgrow);
1073d3fecca9Ssthen 		uint64_t fsz = (uint64_t)1<<fexp;
1074d3fecca9Ssthen 		udb_void f = alloc->disk->nextgrow;
1075d3fecca9Ssthen 		udb_void fn = alloc->disk->nextgrow+fsz;
1076d3fecca9Ssthen 		assert(fn <= alloc->udb->base_size);
1077d3fecca9Ssthen 		alloc->disk->stat_free += fsz;
1078d3fecca9Ssthen 		udb_alloc_push_fl(base, alloc, f, fexp);
1079d3fecca9Ssthen 		/* now increase nextgrow to commit that free chunk */
1080d3fecca9Ssthen 		alloc->disk->nextgrow = fn;
1081d3fecca9Ssthen 	}
1082d3fecca9Ssthen }
1083d3fecca9Ssthen 
1084d3fecca9Ssthen /** append chunks at end of memory space to get size exp, return dataptr */
1085d3fecca9Ssthen static udb_void
1086d3fecca9Ssthen grow_chunks(void* base, udb_alloc* alloc, size_t sz, int exp)
1087d3fecca9Ssthen {
1088d3fecca9Ssthen 	uint64_t esz = (uint64_t)1<<exp;
1089d3fecca9Ssthen 	udb_void ret;
1090d3fecca9Ssthen 	alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1091d3fecca9Ssthen 	grow_align(base, alloc, esz);
1092d3fecca9Ssthen 	/* free chunks are grown, grow the one we want to use */
1093d3fecca9Ssthen 	ret = alloc->disk->nextgrow;
1094d3fecca9Ssthen 	/* take a new alloced chunk into use */
1095d3fecca9Ssthen 	UDB_CHUNK(ret)->exp = (uint8_t)exp;
1096d3fecca9Ssthen 	UDB_CHUNK(ret)->flags = 0;
1097d3fecca9Ssthen 	UDB_CHUNK(ret)->ptrlist = 0;
1098d3fecca9Ssthen 	UDB_CHUNK(ret)->type = udb_chunk_type_data;
1099d3fecca9Ssthen 	/* store last octet */
1100d3fecca9Ssthen 	chunk_set_last(base, ret, exp, (uint8_t)exp);
1101d3fecca9Ssthen 	/* update stats */
1102d3fecca9Ssthen 	alloc->disk->stat_alloc += esz;
1103d3fecca9Ssthen 	alloc->disk->stat_data += sz;
1104d3fecca9Ssthen 	/* now increase nextgrow to commit this newly allocated chunk */
1105d3fecca9Ssthen 	alloc->disk->nextgrow += esz;
1106d3fecca9Ssthen 	assert(alloc->disk->nextgrow <= alloc->udb->base_size);
1107d3fecca9Ssthen 	alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1108d3fecca9Ssthen 	return ret + sizeof(udb_chunk_d); /* ptr to data */
1109d3fecca9Ssthen }
1110d3fecca9Ssthen 
1111d3fecca9Ssthen /** calculate how much space is necessary to grow for this exp */
1112d3fecca9Ssthen static uint64_t
1113d3fecca9Ssthen grow_end_calc(udb_alloc* alloc, int exp)
1114d3fecca9Ssthen {
1115d3fecca9Ssthen 	uint64_t sz = (uint64_t)1<<exp;
1116d3fecca9Ssthen 	uint64_t ng = alloc->disk->nextgrow;
1117d3fecca9Ssthen 	uint64_t res;
1118d3fecca9Ssthen 	/* if nextgrow is 2**expness, no extra growth needed, only size */
1119d3fecca9Ssthen 	if( (ng & (sz-1)) == 0) {
1120d3fecca9Ssthen 		/* sz-1 is like 0xfff, and checks if ng is whole 2**exp */
1121d3fecca9Ssthen 		return ng+sz; /* must grow exactly 2**exp */
1122d3fecca9Ssthen 	}
1123d3fecca9Ssthen 	/* grow until 2**expness and then we need 2**exp as well */
1124d3fecca9Ssthen 	/* so, round ng down to whole sz (basically  ng-ng%sz, or ng/sz*sz)
1125d3fecca9Ssthen 	 * and then add the sz twice (go up to whole sz, and to allocate) */
1126d3fecca9Ssthen 	res = (ng & ~(sz-1)) + 2*sz;
1127d3fecca9Ssthen 	return res;
1128d3fecca9Ssthen }
1129d3fecca9Ssthen 
1130d3fecca9Ssthen /** see if we need to grow more than specified to enable sustained growth */
1131d3fecca9Ssthen static uint64_t
1132d3fecca9Ssthen grow_extra_check(udb_alloc* alloc, uint64_t ge)
1133d3fecca9Ssthen {
1134d3fecca9Ssthen 	const uint64_t mb = 1024*1024;
1135d3fecca9Ssthen 	uint64_t bsz = alloc->udb->base_size;
1136d3fecca9Ssthen 	if(bsz <= mb) {
1137d3fecca9Ssthen 		/* below 1 Mb, double sizes for exponential growth */
1138d3fecca9Ssthen 		/* takes about 15 times to grow to 1Mb */
1139d3fecca9Ssthen 		if(ge < bsz*2)
1140d3fecca9Ssthen 			return bsz*2;
1141d3fecca9Ssthen 	} else {
1142d3fecca9Ssthen 		uint64_t gnow = ge - bsz;
1143d3fecca9Ssthen 		/* above 1Mb, grow at least 1 Mb, or 12.5% of current size,
1144d3fecca9Ssthen 		 * in whole megabytes rounded up. */
1145d3fecca9Ssthen 		uint64_t want = ((bsz / 8) & ~(mb-1)) + mb;
1146d3fecca9Ssthen 		if(gnow < want)
1147d3fecca9Ssthen 			return bsz + want;
1148d3fecca9Ssthen 	}
1149d3fecca9Ssthen 	return ge;
1150d3fecca9Ssthen }
1151d3fecca9Ssthen 
1152bc6311d7Sflorian /** see if free space is enough to warrant shrink (while file is open) */
1153d3fecca9Ssthen static int
1154d3fecca9Ssthen enough_free(udb_alloc* alloc)
1155d3fecca9Ssthen {
1156d3fecca9Ssthen 	if(alloc->udb->base_size <= 2*1024*1024) {
1157d3fecca9Ssthen 		/* below 1 Mb, grown by double size, (so up to 2 mb),
1158d3fecca9Ssthen 		 * do not shrink unless we can 1/3 in size */
1159d3fecca9Ssthen 		if(((size_t)alloc->disk->nextgrow)*3 <= alloc->udb->base_size)
1160d3fecca9Ssthen 			return 1;
1161d3fecca9Ssthen 	} else {
1162d3fecca9Ssthen 		/* grown 12.5%, shrink 25% if possible, at least one mb */
1163d3fecca9Ssthen 		/* between 1mb and 4mb size, it shrinks by 1mb if possible */
1164d3fecca9Ssthen 		uint64_t space = alloc->udb->base_size - alloc->disk->nextgrow;
1165d3fecca9Ssthen 		if(space >= 1024*1024 && (space*4 >= alloc->udb->base_size
1166d3fecca9Ssthen 			|| alloc->udb->base_size < 4*1024*1024))
1167d3fecca9Ssthen 			return 1;
1168d3fecca9Ssthen 	}
1169d3fecca9Ssthen 	return 0;
1170d3fecca9Ssthen }
1171d3fecca9Ssthen 
1172d3fecca9Ssthen /** grow space for a chunk of 2**exp and return dataptr */
1173d3fecca9Ssthen static udb_void
1174d3fecca9Ssthen udb_alloc_grow_space(void* base, udb_alloc* alloc, size_t sz, int exp)
1175d3fecca9Ssthen {
1176d3fecca9Ssthen 	/* commit the grow action
1177d3fecca9Ssthen 	 * - the file grow only changes filesize, but not the nextgrow.
1178d3fecca9Ssthen 	 * - taking space after nextgrow into use (as free space),
1179d3fecca9Ssthen 	 *   is like free-ing a chunk (one at a time).
1180d3fecca9Ssthen 	 * - and the last chunk taken into use is like alloc.
1181d3fecca9Ssthen 	 */
1182d3fecca9Ssthen 	/* predict how much free space is needed for this */
1183d3fecca9Ssthen 	uint64_t grow_end = grow_end_calc(alloc, exp);
1184d3fecca9Ssthen 	assert(alloc->udb->base_size >= alloc->disk->nextgrow);
1185d3fecca9Ssthen 	if(grow_end <= alloc->udb->base_size) {
1186d3fecca9Ssthen 		/* we can do this with the available space */
1187d3fecca9Ssthen 		return grow_chunks(base, alloc, sz, exp);
1188d3fecca9Ssthen 	}
1189d3fecca9Ssthen 	/* we have to grow the file, re-mmap */
1190d3fecca9Ssthen 	/* see if we need to grow a little more, to avoid endless grow
1191d3fecca9Ssthen 	 * efforts on adding data */
1192d3fecca9Ssthen 	grow_end = grow_extra_check(alloc, grow_end);
1193d3fecca9Ssthen 	if(!(base=udb_base_grow_and_remap(alloc->udb, grow_end))) {
1194d3fecca9Ssthen 		return 0; /* mmap or write failed (disk or mem full) */
1195d3fecca9Ssthen 	}
1196d3fecca9Ssthen 	/* we have enough space now */
1197d3fecca9Ssthen 	assert(grow_end <= alloc->udb->base_size);
1198d3fecca9Ssthen 	assert(alloc->udb->glob_data->fsize == alloc->udb->base_size);
1199d3fecca9Ssthen 	return grow_chunks(base, alloc, sz, exp);
1200d3fecca9Ssthen }
1201d3fecca9Ssthen 
1202d3fecca9Ssthen /** take XL allocation into use at end of file, return dataptr */
1203d3fecca9Ssthen static udb_void
1204d3fecca9Ssthen grow_xl(void* base, udb_alloc* alloc, uint64_t xlsz, uint64_t sz)
1205d3fecca9Ssthen {
1206d3fecca9Ssthen 	udb_void ret;
1207d3fecca9Ssthen 	udb_xl_chunk_d* p;
1208d3fecca9Ssthen 	alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1209d3fecca9Ssthen 
1210d3fecca9Ssthen 	/* align growth to whole mbs */
1211d3fecca9Ssthen 	grow_align(base, alloc, UDB_ALLOC_CHUNK_SIZE);
1212d3fecca9Ssthen 
1213d3fecca9Ssthen 	/* grow XL segment */
1214d3fecca9Ssthen 	ret = alloc->disk->nextgrow;
1215d3fecca9Ssthen 	p = UDB_XL_CHUNK(ret);
1216d3fecca9Ssthen 	p->exp = UDB_EXP_XL;
1217d3fecca9Ssthen 	p->size = xlsz;
1218d3fecca9Ssthen 	p->flags = 0;
1219d3fecca9Ssthen 	p->ptrlist = 0;
1220d3fecca9Ssthen 	p->type = udb_chunk_type_data;
1221d3fecca9Ssthen 
1222d3fecca9Ssthen 	/* also put size and marker at end for compaction */
1223d3fecca9Ssthen 	*((uint64_t*)(UDB_REL(base, ret+xlsz-sizeof(uint64_t)*2))) = xlsz;
1224d3fecca9Ssthen 	*((uint8_t*)(UDB_REL(base, ret+xlsz-1))) = UDB_EXP_XL;
1225d3fecca9Ssthen 
1226d3fecca9Ssthen 	/* stats */
1227d3fecca9Ssthen 	alloc->disk->stat_data += sz;
1228d3fecca9Ssthen 	alloc->disk->stat_alloc += xlsz;
1229d3fecca9Ssthen 	/* now increase the nextgrow to commit this xl chunk */
1230d3fecca9Ssthen 	alloc->disk->nextgrow += xlsz;
1231d3fecca9Ssthen 	alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1232d3fecca9Ssthen 	return ret + sizeof(udb_xl_chunk_d); /* data ptr */
1233d3fecca9Ssthen }
1234d3fecca9Ssthen 
1235d3fecca9Ssthen /** make space for XL allocation */
1236d3fecca9Ssthen static udb_void
1237d3fecca9Ssthen udb_alloc_xl_space(void* base, udb_alloc* alloc, size_t sz)
1238d3fecca9Ssthen {
1239d3fecca9Ssthen 	/* allocate whole mbs of space, at end of space */
1240d3fecca9Ssthen 	uint64_t asz = sz + sizeof(udb_xl_chunk_d) + sizeof(uint64_t)*2;
1241d3fecca9Ssthen 	uint64_t need=(asz+UDB_ALLOC_CHUNK_SIZE-1)&(~(UDB_ALLOC_CHUNK_SIZE-1));
1242d3fecca9Ssthen 	uint64_t grow_end = grow_end_calc(alloc, UDB_ALLOC_CHUNKS_MAX) + need;
1243d3fecca9Ssthen 	assert(need >= asz);
1244d3fecca9Ssthen 	if(grow_end <= alloc->udb->base_size) {
1245d3fecca9Ssthen 		/* can do this in available space */
1246d3fecca9Ssthen 		return grow_xl(base, alloc, need, sz);
1247d3fecca9Ssthen 	}
1248d3fecca9Ssthen 	/* have to grow file and re-mmap */
1249d3fecca9Ssthen 	grow_end = grow_extra_check(alloc, grow_end);
1250d3fecca9Ssthen 	if(!(base=udb_base_grow_and_remap(alloc->udb, grow_end))) {
1251d3fecca9Ssthen 		return 0; /* mmap or write failed (disk or mem full) */
1252d3fecca9Ssthen 	}
1253d3fecca9Ssthen 	/* we have enough space now */
1254d3fecca9Ssthen 	assert(grow_end <= alloc->udb->base_size);
1255d3fecca9Ssthen 	assert(alloc->udb->glob_data->fsize == alloc->udb->base_size);
1256d3fecca9Ssthen 	return grow_xl(base, alloc, need, sz);
1257d3fecca9Ssthen }
1258d3fecca9Ssthen 
1259d3fecca9Ssthen /** divide big(2**e2) into pieces so 2**exp fits */
1260d3fecca9Ssthen static udb_void
1261d3fecca9Ssthen udb_alloc_subdivide(void* base, udb_alloc* alloc, udb_void big, int e2,
1262d3fecca9Ssthen 	int exp)
1263d3fecca9Ssthen {
1264d3fecca9Ssthen 	int e = e2;
1265d3fecca9Ssthen 	uint64_t sz = (uint64_t)1<<e2;
1266d3fecca9Ssthen 	assert(big && e2 > exp);
1267d3fecca9Ssthen 	/* so the returned piece to use is the first piece,
1268d3fecca9Ssthen 	 * offload the later half until it fits */
1269d3fecca9Ssthen 	do {
1270d3fecca9Ssthen 		sz >>= 1; /* divide size of big by two */
1271d3fecca9Ssthen 		e--;      /* that means its exp is one smaller */
1272d3fecca9Ssthen 		udb_alloc_push_fl(base, alloc, big+sz, e);
1273d3fecca9Ssthen 	} while(e != exp);
1274d3fecca9Ssthen 	/* exit loop when last pushed is same size as what we want */
1275d3fecca9Ssthen 	return big;
1276d3fecca9Ssthen }
1277d3fecca9Ssthen 
1278d3fecca9Ssthen /** returns the exponent size of the chunk needed for data sz */
1279d3fecca9Ssthen static int
1280d3fecca9Ssthen udb_alloc_exp_needed(size_t sz)
1281d3fecca9Ssthen {
1282d3fecca9Ssthen 	uint64_t asz = sz + sizeof(udb_chunk_d) + 1;
1283*4564029fSflorian 	int exp;
1284d3fecca9Ssthen 	if(asz > UDB_ALLOC_CHUNK_SIZE) {
1285d3fecca9Ssthen 		return UDB_EXP_XL;
1286d3fecca9Ssthen 	} else if(asz <= UDB_ALLOC_CHUNK_MINSIZE) {
1287d3fecca9Ssthen 		return UDB_ALLOC_CHUNK_MINEXP;
1288d3fecca9Ssthen 	}
1289*4564029fSflorian 	exp = udb_exp_size(asz);
1290*4564029fSflorian 	assert(exp <= UDB_ALLOC_CHUNKS_MAX);
1291*4564029fSflorian 	return exp;
1292d3fecca9Ssthen }
1293d3fecca9Ssthen 
1294d3fecca9Ssthen udb_void udb_alloc_space(udb_alloc* alloc, size_t sz)
1295d3fecca9Ssthen {
1296d3fecca9Ssthen 	void* base = alloc->udb->base;
1297d3fecca9Ssthen 	/* calculate actual allocation size */
1298d3fecca9Ssthen 	int e2, exp = udb_alloc_exp_needed(sz);
1299d3fecca9Ssthen 	if(exp == UDB_EXP_XL)
1300d3fecca9Ssthen 		return udb_alloc_xl_space(base, alloc, sz);
1301d3fecca9Ssthen 	/* see if there is a free chunk of that size exactly */
1302d3fecca9Ssthen 	if(alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP]) {
1303d3fecca9Ssthen 		/* snip from freelist, udb_chunk_d */
1304d3fecca9Ssthen 		udb_void ret;
1305d3fecca9Ssthen 		alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1306d3fecca9Ssthen 		ret = udb_alloc_pop_fl(base, alloc, exp);
1307d3fecca9Ssthen 		/* use it - size octets already OK */
1308d3fecca9Ssthen 		UDB_CHUNK(ret)->flags = 0;
1309d3fecca9Ssthen 		UDB_CHUNK(ret)->ptrlist = 0;
1310d3fecca9Ssthen 		UDB_CHUNK(ret)->type = udb_chunk_type_data;
1311d3fecca9Ssthen 		/* update stats */
1312d3fecca9Ssthen 		alloc->disk->stat_data += sz;
1313d3fecca9Ssthen 		alloc->disk->stat_alloc += (1<<exp);
1314d3fecca9Ssthen 		assert(alloc->disk->stat_free >= (1u<<exp));
1315d3fecca9Ssthen 		alloc->disk->stat_free -= (1<<exp);
1316d3fecca9Ssthen 		alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1317d3fecca9Ssthen 		return ret + sizeof(udb_chunk_d); /* ptr to data */
1318d3fecca9Ssthen 	}
1319d3fecca9Ssthen 	/* see if we can subdivide a larger chunk */
1320595fd91cSbrad 	for(e2 = exp+1; e2 <= UDB_ALLOC_CHUNKS_MAX; e2++)
1321d3fecca9Ssthen 		if(alloc->disk->free[e2-UDB_ALLOC_CHUNK_MINEXP]) {
1322d3fecca9Ssthen 			udb_void big, ret; /* udb_chunk_d */
1323d3fecca9Ssthen 			alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1324d3fecca9Ssthen 			big = udb_alloc_pop_fl(base, alloc, e2);
1325d3fecca9Ssthen 			/* push other parts onto freelists (needs inited) */
1326d3fecca9Ssthen 			ret = udb_alloc_subdivide(base, alloc, big, e2, exp);
1327d3fecca9Ssthen 			/* use final part (needs inited) */
1328d3fecca9Ssthen 			UDB_CHUNK(ret)->exp = (uint8_t)exp;
1329d3fecca9Ssthen 			/* if stop here; the new exp makes smaller free chunk*/
1330d3fecca9Ssthen 			UDB_CHUNK(ret)->flags = 0;
1331d3fecca9Ssthen 			UDB_CHUNK(ret)->ptrlist = 0;
1332d3fecca9Ssthen 			/* set type to commit data chunk */
1333d3fecca9Ssthen 			UDB_CHUNK(ret)->type = udb_chunk_type_data;
1334d3fecca9Ssthen 			/* store last octet */
1335d3fecca9Ssthen 			chunk_set_last(base, ret, exp, (uint8_t)exp);
1336d3fecca9Ssthen 			/* update stats */
1337d3fecca9Ssthen 			alloc->disk->stat_data += sz;
1338d3fecca9Ssthen 			alloc->disk->stat_alloc += (1<<exp);
1339d3fecca9Ssthen 			assert(alloc->disk->stat_free >= (1u<<exp));
1340d3fecca9Ssthen 			alloc->disk->stat_free -= (1<<exp);
1341d3fecca9Ssthen 			alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1342d3fecca9Ssthen 			return ret + sizeof(udb_chunk_d); /* ptr to data */
1343d3fecca9Ssthen 		}
1344d3fecca9Ssthen 	/* we need to grow an extra chunk */
1345d3fecca9Ssthen 	return udb_alloc_grow_space(base, alloc, sz, exp);
1346d3fecca9Ssthen }
1347d3fecca9Ssthen 
1348d3fecca9Ssthen /** see if there is free space to allocate a chunk into */
1349d3fecca9Ssthen static int
1350d3fecca9Ssthen have_free_for(udb_alloc* alloc, int exp)
1351d3fecca9Ssthen {
1352d3fecca9Ssthen 	int e2;
1353d3fecca9Ssthen 	if(alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP])
1354d3fecca9Ssthen 		return exp;
1355595fd91cSbrad 	for(e2 = exp+1; e2 <= UDB_ALLOC_CHUNKS_MAX; e2++)
1356d3fecca9Ssthen 		if(alloc->disk->free[e2-UDB_ALLOC_CHUNK_MINEXP]) {
1357d3fecca9Ssthen 			return e2;
1358d3fecca9Ssthen 		}
1359d3fecca9Ssthen 	return 0;
1360d3fecca9Ssthen }
1361d3fecca9Ssthen 
1362d3fecca9Ssthen /** fix relptr prev and next for moved relptr structures */
1363d3fecca9Ssthen static void
1364d3fecca9Ssthen chunk_fix_ptr_each(void* base, udb_rel_ptr* rp, void* arg)
1365d3fecca9Ssthen {
1366d3fecca9Ssthen 	udb_void* data = (udb_void*)arg;
1367d3fecca9Ssthen 	udb_void r;
1368d3fecca9Ssthen 	if(!rp->data)
1369d3fecca9Ssthen 		return;
1370d3fecca9Ssthen 	r = UDB_SYSTOREL(base, rp);
1371d3fecca9Ssthen 	if(rp->next)
1372d3fecca9Ssthen 		UDB_REL_PTR(rp->next)->prev = r;
1373d3fecca9Ssthen 	if(rp->prev)
1374d3fecca9Ssthen 		UDB_REL_PTR(rp->prev)->next = r;
1375d3fecca9Ssthen 	else	{
1376d3fecca9Ssthen 		/* if this is a pointer to its own chunk, fix it up;
1377d3fecca9Ssthen 		 * the data ptr gets set by relptr_edit later. */
1378d3fecca9Ssthen 		if(rp->data == data[0])
1379d3fecca9Ssthen 			UDB_CHUNK(data[1])->ptrlist = r;
1380d3fecca9Ssthen 		else	UDB_CHUNK(chunk_from_dataptr(rp->data))->ptrlist = r;
1381d3fecca9Ssthen 	}
1382d3fecca9Ssthen }
1383d3fecca9Ssthen 
1384d3fecca9Ssthen /** fix pointers from and to a moved chunk */
1385d3fecca9Ssthen static void
1386d3fecca9Ssthen chunk_fix_ptrs(void* base, udb_base* udb, udb_chunk_d* cp, udb_void data,
1387d3fecca9Ssthen 	uint64_t dsz, udb_void olddata)
1388d3fecca9Ssthen {
1389d3fecca9Ssthen 	udb_void d[2];
1390d3fecca9Ssthen 	d[0] = olddata;
1391d3fecca9Ssthen 	d[1] = data;
1392d3fecca9Ssthen 	(*udb->walkfunc)(base, udb->walkarg, cp->type, UDB_REL(base, data),
1393d3fecca9Ssthen 		dsz, &chunk_fix_ptr_each, d);
1394d3fecca9Ssthen 	udb_rel_ptr_edit(base, cp->ptrlist, data);
1395d3fecca9Ssthen 	udb_base_ram_ptr_edit(udb, olddata, data);
1396d3fecca9Ssthen }
1397d3fecca9Ssthen 
1398d3fecca9Ssthen /** move an allocated chunk to use a free chunk */
1399d3fecca9Ssthen static void
1400d3fecca9Ssthen move_chunk(void* base, udb_alloc* alloc, udb_void f, int exp, uint64_t esz,
1401d3fecca9Ssthen 	int e2)
1402d3fecca9Ssthen {
1403d3fecca9Ssthen 	udb_void res = udb_alloc_pop_fl(base, alloc, e2);
1404d3fecca9Ssthen 	udb_chunk_d* rp;
1405d3fecca9Ssthen 	udb_chunk_d* fp;
1406d3fecca9Ssthen 	if(exp != e2) {
1407d3fecca9Ssthen 		/* it is bigger, subdivide it */
1408d3fecca9Ssthen 		res = udb_alloc_subdivide(base, alloc, res, e2, exp);
1409d3fecca9Ssthen 	}
1410d3fecca9Ssthen 	assert(res != f);
1411d3fecca9Ssthen 	/* setup rollback information */
1412d3fecca9Ssthen 	alloc->udb->glob_data->rb_old = f;
1413d3fecca9Ssthen 	alloc->udb->glob_data->rb_new = res;
1414d3fecca9Ssthen 	alloc->udb->glob_data->rb_size = esz;
1415d3fecca9Ssthen 	/* take the res, exp into use */
1416d3fecca9Ssthen 	rp = UDB_CHUNK(res);
1417d3fecca9Ssthen 	fp = UDB_CHUNK(f);
1418d3fecca9Ssthen 	/* copy over the data */
1419d3fecca9Ssthen 	memcpy(rp, fp, esz);
1420d3fecca9Ssthen 	/* adjust rel ptrs */
1421d3fecca9Ssthen 	chunk_fix_ptrs(base, alloc->udb, rp, res+sizeof(udb_chunk_d),
1422d3fecca9Ssthen 		esz-sizeof(udb_chunk_d)-1, f+sizeof(udb_chunk_d));
1423d3fecca9Ssthen 
1424d3fecca9Ssthen 	/* do not freeup the fp; caller does that */
1425d3fecca9Ssthen }
1426d3fecca9Ssthen 
1427d3fecca9Ssthen /** unlink several free elements to overwrite with xl chunk */
1428d3fecca9Ssthen static void
1429d3fecca9Ssthen free_xl_space(void* base, udb_alloc* alloc, udb_void s, uint64_t m)
1430d3fecca9Ssthen {
1431d3fecca9Ssthen 	udb_void q = s + m - UDB_ALLOC_CHUNK_SIZE;
1432d3fecca9Ssthen 	/* because of header and alignment we know s >= UDB_ALLOC_CHUNK_SIZE*/
1433d3fecca9Ssthen 	assert(s >= UDB_ALLOC_CHUNK_SIZE);
1434d3fecca9Ssthen 	while(q >= s) {
1435d3fecca9Ssthen 		assert(UDB_CHUNK(q)->exp == UDB_ALLOC_CHUNKS_MAX);
1436d3fecca9Ssthen 		assert(UDB_CHUNK(q)->type == udb_chunk_type_free);
1437d3fecca9Ssthen 		udb_alloc_unlink_fl(base, alloc, q, UDB_ALLOC_CHUNKS_MAX);
1438d3fecca9Ssthen 		q -= UDB_ALLOC_CHUNK_SIZE;
1439d3fecca9Ssthen 	}
1440d3fecca9Ssthen }
1441d3fecca9Ssthen 
1442d3fecca9Ssthen /** move an XL chunk, and keep track of segments for rollback */
1443d3fecca9Ssthen static void
1444d3fecca9Ssthen move_xl_segment(void* base, udb_base* udb, udb_void xl, udb_void n,
1445d3fecca9Ssthen 	uint64_t sz, uint64_t startseg)
1446d3fecca9Ssthen {
1447d3fecca9Ssthen 	udb_xl_chunk_d* xlp = UDB_XL_CHUNK(xl);
1448d3fecca9Ssthen 	udb_xl_chunk_d* np = UDB_XL_CHUNK(n);
1449d3fecca9Ssthen 	uint64_t amount = xl - n;
1450d3fecca9Ssthen 	assert(n < xl); /* move to compact */
1451d3fecca9Ssthen 
1452d3fecca9Ssthen 	/* setup move rollback */
1453d3fecca9Ssthen 	udb->glob_data->rb_old = xl;
1454d3fecca9Ssthen 	udb->glob_data->rb_new = n;
1455d3fecca9Ssthen 	udb->glob_data->rb_size = sz;
1456d3fecca9Ssthen 
1457d3fecca9Ssthen 	/* is it overlapping? */
1458d3fecca9Ssthen 	if(sz <= amount) {
1459d3fecca9Ssthen 		memcpy(np, xlp, sz);
1460d3fecca9Ssthen 	} else {
1461d3fecca9Ssthen 		/* move and commit per 1M segment to avoid data loss */
1462d3fecca9Ssthen 		uint64_t seg, maxseg = amount/UDB_ALLOC_CHUNK_SIZE;
1463d3fecca9Ssthen 		for(seg = startseg; seg<maxseg; seg++) {
1464d3fecca9Ssthen 			udb->glob_data->rb_seg = seg;
1465d3fecca9Ssthen 			memcpy(np+seg*UDB_ALLOC_CHUNK_SIZE,
1466d3fecca9Ssthen 				xlp+seg*UDB_ALLOC_CHUNK_SIZE,
1467d3fecca9Ssthen 				UDB_ALLOC_CHUNK_SIZE);
1468d3fecca9Ssthen 		}
1469d3fecca9Ssthen 
1470d3fecca9Ssthen 	}
1471d3fecca9Ssthen }
1472d3fecca9Ssthen 
1473d3fecca9Ssthen /** move list of XL chunks to the front by the shift amount */
1474d3fecca9Ssthen static void
1475d3fecca9Ssthen move_xl_list(void* base, udb_alloc* alloc, udb_void xl_start, uint64_t xl_sz,
1476d3fecca9Ssthen 	uint64_t amount)
1477d3fecca9Ssthen {
1478d3fecca9Ssthen 	udb_void xl = xl_start;
1479d3fecca9Ssthen 	assert( (xl_start&(UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* aligned */
1480d3fecca9Ssthen 	assert( (amount&(UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* multiples */
1481d3fecca9Ssthen 	assert( (xl_sz&(UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* multiples */
1482d3fecca9Ssthen 	while(xl < xl_start+xl_sz) {
1483d3fecca9Ssthen 		udb_xl_chunk_d* xlp = UDB_XL_CHUNK(xl);
1484d3fecca9Ssthen 		udb_void n = xl-amount;
1485d3fecca9Ssthen 		uint64_t sz = xlp->size;
1486d3fecca9Ssthen 		assert(xlp->exp == UDB_EXP_XL);
1487d3fecca9Ssthen 		move_xl_segment(base, alloc->udb, xl, n, sz, 0);
1488d3fecca9Ssthen 		chunk_fix_ptrs(base, alloc->udb, UDB_CHUNK(n),
1489d3fecca9Ssthen 			n+sizeof(udb_xl_chunk_d),
1490d3fecca9Ssthen 			sz-sizeof(udb_xl_chunk_d)-sizeof(uint64_t)*2,
1491d3fecca9Ssthen 			xl+sizeof(udb_xl_chunk_d));
1492d3fecca9Ssthen 	}
1493d3fecca9Ssthen 	alloc->disk->stat_free -= amount;
1494d3fecca9Ssthen 	alloc->disk->nextgrow -= amount;
1495d3fecca9Ssthen 	alloc->udb->glob_data->rb_old = 0;
1496d3fecca9Ssthen 	alloc->udb->glob_data->rb_new = 0;
1497d3fecca9Ssthen 	alloc->udb->glob_data->rb_size = 0;
1498d3fecca9Ssthen }
1499d3fecca9Ssthen 
1500d3fecca9Ssthen /** see if free chunk can coagulate with another chunk, return other chunk */
1501d3fecca9Ssthen static udb_void
1502d3fecca9Ssthen coagulate_possible(void* base, udb_alloc* alloc, udb_void f, int exp,
1503d3fecca9Ssthen 	uint64_t esz)
1504d3fecca9Ssthen {
1505d3fecca9Ssthen 	udb_void other = f^esz;
1506d3fecca9Ssthen 	if(exp == UDB_ALLOC_CHUNKS_MAX)
1507d3fecca9Ssthen 		return 0; /* no further merges */
1508d3fecca9Ssthen 	if(other >= alloc->udb->base_size)
1509d3fecca9Ssthen 		return 0; /* not allocated */
1510d3fecca9Ssthen 	if(other >= alloc->disk->nextgrow)
1511d3fecca9Ssthen 		return 0; /* not in use */
1512d3fecca9Ssthen 	if(other < alloc->udb->glob_data->hsize)
1513d3fecca9Ssthen 		return 0; /* cannot merge with header */
1514d3fecca9Ssthen 		/* the header is also protected by the special exp marker */
1515d3fecca9Ssthen 	/* see if the other chunk is a free chunk */
1516d3fecca9Ssthen 
1517d3fecca9Ssthen 	/* check closest marker to avoid large memory churn */
1518d3fecca9Ssthen 	/* and also it makes XL allocations and header special markers work */
1519d3fecca9Ssthen 	if(f > other) {
1520d3fecca9Ssthen 		assert(f > 1); /* this is certain because of header */
1521d3fecca9Ssthen 		if(*((uint8_t*)UDB_REL(base, f-1)) == (uint8_t)exp) {
1522d3fecca9Ssthen 			/* can do it if the other part is a free chunk */
1523d3fecca9Ssthen 			assert(UDB_FREE_CHUNK(other)->exp == (uint8_t)exp);
1524d3fecca9Ssthen 			if(UDB_CHUNK(other)->type == udb_chunk_type_free)
1525d3fecca9Ssthen 				return other;
1526d3fecca9Ssthen 		}
1527d3fecca9Ssthen 	} else {
1528d3fecca9Ssthen 		if(UDB_CHUNK(other)->exp == (uint8_t)exp) {
1529d3fecca9Ssthen 			/* can do it if the other part is a free chunk */
1530d3fecca9Ssthen 			assert(chunk_get_last(base, other, exp)==(uint8_t)exp);
1531d3fecca9Ssthen 			if(UDB_CHUNK(other)->type == udb_chunk_type_free)
1532d3fecca9Ssthen 				return other;
1533d3fecca9Ssthen 		}
1534d3fecca9Ssthen 	}
1535d3fecca9Ssthen 	return 0;
1536d3fecca9Ssthen }
1537d3fecca9Ssthen 
1538d3fecca9Ssthen /** coagulate and then add new free segment, return final free segment */
1539d3fecca9Ssthen static udb_void
1540d3fecca9Ssthen coagulate_and_push(void* base, udb_alloc* alloc, udb_void last, int exp,
1541d3fecca9Ssthen 	uint64_t esz)
1542d3fecca9Ssthen {
1543d3fecca9Ssthen 	/* new free chunk here, attempt coagulate */
1544d3fecca9Ssthen 	udb_void other;
1545d3fecca9Ssthen 	while( (other=coagulate_possible(base, alloc, last, exp, esz)) ) {
1546d3fecca9Ssthen 		/* unlink that other chunk */
1547d3fecca9Ssthen 		udb_alloc_unlink_fl(base, alloc, other, exp);
1548d3fecca9Ssthen 		/* merge up */
1549d3fecca9Ssthen 		if(other < last)
1550d3fecca9Ssthen 			last = other;
1551d3fecca9Ssthen 		exp++;
1552d3fecca9Ssthen 		esz <<= 1;
1553d3fecca9Ssthen 	}
1554d3fecca9Ssthen 	/* free the final segment */
1555d3fecca9Ssthen 	udb_alloc_push_fl(base, alloc, last, exp);
1556d3fecca9Ssthen 	return last;
1557d3fecca9Ssthen }
1558d3fecca9Ssthen 
1559d3fecca9Ssthen /** attempt to compact the data and move free space to the end */
156015ed76cbSbrad int
1561d3fecca9Ssthen udb_alloc_compact(void* base, udb_alloc* alloc)
1562d3fecca9Ssthen {
1563d3fecca9Ssthen 	udb_void last;
1564d3fecca9Ssthen 	int exp, e2;
1565d3fecca9Ssthen 	uint64_t esz;
1566d3fecca9Ssthen 	uint64_t at = alloc->disk->nextgrow;
1567d3fecca9Ssthen 	udb_void xl_start = 0;
1568d3fecca9Ssthen 	uint64_t xl_sz = 0;
156915ed76cbSbrad 	if(alloc->udb->inhibit_compact)
157015ed76cbSbrad 		return 1;
157115ed76cbSbrad 	alloc->udb->useful_compact = 0;
1572d3fecca9Ssthen 	while(at > alloc->udb->glob_data->hsize) {
1573d3fecca9Ssthen 		/* grab last entry */
1574d3fecca9Ssthen 		exp = (int)*((uint8_t*)UDB_REL(base, at-1));
1575d3fecca9Ssthen 		if(exp == UDB_EXP_XL) {
1576d3fecca9Ssthen 			/* for XL chunks:
1577d3fecca9Ssthen 			 * - inspect the size of the XLchunklist at end
1578d3fecca9Ssthen 			 * - attempt to compact in front of of XLchunklist
1579d3fecca9Ssthen 			 */
1580d3fecca9Ssthen 			uint64_t xlsz = *((uint64_t*)UDB_REL(base,
1581d3fecca9Ssthen 				at-sizeof(uint64_t)*2));
1582d3fecca9Ssthen 			udb_void xl = at-xlsz;
1583d3fecca9Ssthen #ifndef NDEBUG
1584d3fecca9Ssthen 			udb_xl_chunk_d* xlp = UDB_XL_CHUNK(xl);
1585d3fecca9Ssthen 			assert(xlp->exp == UDB_EXP_XL);
1586d3fecca9Ssthen 			assert(xlp->type != udb_chunk_type_free);
1587d3fecca9Ssthen #endif
1588d3fecca9Ssthen 			/* got thesegment add to the xl chunk list */
1589d3fecca9Ssthen 			if(xl_start != 0 && xl+xlsz != xl_start) {
1590d3fecca9Ssthen 				/* nonadjoining XL part, but they are aligned,
1591d3fecca9Ssthen 				 * so the space in between is whole Mbs,
1592d3fecca9Ssthen 				 * shift the later part(s) and continue */
1593d3fecca9Ssthen 				uint64_t m = xl_start - (xl+xlsz);
1594d3fecca9Ssthen 				assert(xl_start > xl+xlsz);
1595d3fecca9Ssthen 				alloc->udb->glob_data->dirty_alloc = udb_dirty_compact;
1596d3fecca9Ssthen 				free_xl_space(base, alloc, xl+xlsz, m);
1597d3fecca9Ssthen 				move_xl_list(base, alloc, xl_start, xl_sz, m);
1598d3fecca9Ssthen 				alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1599d3fecca9Ssthen 			}
1600d3fecca9Ssthen 			xl_start = xl;
1601d3fecca9Ssthen 			xl_sz += xlsz;
1602d3fecca9Ssthen 			at = xl;
1603d3fecca9Ssthen 			continue;
1604d3fecca9Ssthen 			/* end of XL if */
1605d3fecca9Ssthen 		} else if(exp < UDB_ALLOC_CHUNK_MINEXP
1606d3fecca9Ssthen 			|| exp > UDB_ALLOC_CHUNKS_MAX)
1607d3fecca9Ssthen 			break; /* special chunk or garbage */
1608d3fecca9Ssthen 		esz = (uint64_t)1<<exp;
1609d3fecca9Ssthen 		last = at - esz;
1610d3fecca9Ssthen 		assert(UDB_CHUNK(last)->exp == (uint8_t)exp);
1611d3fecca9Ssthen 		if(UDB_CHUNK(last)->type == udb_chunk_type_free) {
1612d3fecca9Ssthen 			/* if xlstart continue looking to move stuff, but do
1613d3fecca9Ssthen 			 * not unlink this free segment */
1614d3fecca9Ssthen 			if(!xl_start) {
1615d3fecca9Ssthen 				/* it is a free chunk, remove it */
1616d3fecca9Ssthen 				alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1617d3fecca9Ssthen 				udb_alloc_unlink_fl(base, alloc, last, exp);
1618d3fecca9Ssthen 				alloc->disk->stat_free -= esz;
1619d3fecca9Ssthen 				alloc->disk->nextgrow = last;
1620d3fecca9Ssthen 				alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1621d3fecca9Ssthen 				/* and continue at this point */
1622d3fecca9Ssthen 			}
1623d3fecca9Ssthen 			at = last;
1624d3fecca9Ssthen 		} else if( (e2=have_free_for(alloc, exp)) ) {
1625d3fecca9Ssthen 			/* last entry can be allocated in free chunks
1626d3fecca9Ssthen 			 * move it to its new position, adjust rel_ptrs */
1627d3fecca9Ssthen 			alloc->udb->glob_data->dirty_alloc = udb_dirty_compact;
1628d3fecca9Ssthen 			move_chunk(base, alloc, last, exp, esz, e2);
1629d3fecca9Ssthen 			if(xl_start) {
1630d3fecca9Ssthen 				last = coagulate_and_push(base, alloc,
1631d3fecca9Ssthen 					last, exp, esz);
1632d3fecca9Ssthen 			} else {
1633d3fecca9Ssthen 				/* shorten usage */
1634d3fecca9Ssthen 				alloc->disk->stat_free -= esz;
1635d3fecca9Ssthen 				alloc->disk->nextgrow = last;
1636d3fecca9Ssthen 			}
1637d3fecca9Ssthen 			alloc->udb->glob_data->rb_old = 0;
1638d3fecca9Ssthen 			alloc->udb->glob_data->rb_new = 0;
1639d3fecca9Ssthen 			alloc->udb->glob_data->rb_size = 0;
1640d3fecca9Ssthen 			alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1641d3fecca9Ssthen 			/* and continue in front of it */
1642d3fecca9Ssthen 			at = last;
1643d3fecca9Ssthen 		} else {
1644d3fecca9Ssthen 			/* cannot compact this block, stop compacting */
1645d3fecca9Ssthen 			break;
1646d3fecca9Ssthen 		}
1647d3fecca9Ssthen 		/* if that worked, repeat it */
1648d3fecca9Ssthen 	}
1649d3fecca9Ssthen 	/* if we passed xl chunks, see if XL-chunklist can move */
1650d3fecca9Ssthen 	if(xl_start) {
1651d3fecca9Ssthen 		/* calculate free space in front of the XLchunklist. */
1652d3fecca9Ssthen 		/* has to be whole mbs of free space */
1653d3fecca9Ssthen 		/* if so, we can move the XL chunks.  Move them all back
1654d3fecca9Ssthen 		 * by the new free space. */
1655d3fecca9Ssthen 		/* this compacts very well, but the XL chunks can be moved
1656d3fecca9Ssthen 		 * multiple times; worst case for every mb freed a huge sized
1657d3fecca9Ssthen 		 * xlchunklist gets moved. */
1658d3fecca9Ssthen 		/* free space must be, since aligned and coagulated, in
1659d3fecca9Ssthen 		 * chunks of a whole MB */
1660d3fecca9Ssthen 		udb_void at = xl_start;
1661d3fecca9Ssthen 		uint64_t m = 0;
1662d3fecca9Ssthen 		while(*((uint8_t*)UDB_REL(base, at-1))==UDB_ALLOC_CHUNKS_MAX){
1663d3fecca9Ssthen 			udb_void chunk = at - UDB_ALLOC_CHUNK_SIZE;
1664d3fecca9Ssthen 			if(UDB_CHUNK(chunk)->type != udb_chunk_type_free)
1665d3fecca9Ssthen 				break;
1666d3fecca9Ssthen 			assert(UDB_CHUNK(chunk)->exp==UDB_ALLOC_CHUNKS_MAX);
1667d3fecca9Ssthen 			m += UDB_ALLOC_CHUNK_SIZE;
1668d3fecca9Ssthen 			at = chunk;
1669d3fecca9Ssthen 		}
1670d3fecca9Ssthen 		if(m != 0) {
1671d3fecca9Ssthen 			assert(at+m == xl_start);
1672d3fecca9Ssthen 			alloc->udb->glob_data->dirty_alloc = udb_dirty_compact;
1673d3fecca9Ssthen 			free_xl_space(base, alloc, at, m);
1674d3fecca9Ssthen 			move_xl_list(base, alloc, xl_start, xl_sz, m);
1675d3fecca9Ssthen 			alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1676d3fecca9Ssthen 		}
1677d3fecca9Ssthen 	}
1678d3fecca9Ssthen 
1679d3fecca9Ssthen 	/* if enough free, shrink the file; re-mmap */
1680d3fecca9Ssthen 	if(enough_free(alloc)) {
1681d3fecca9Ssthen 		uint64_t nsize = alloc->disk->nextgrow;
1682d3fecca9Ssthen 		udb_base_shrink(alloc->udb, nsize);
1683d3fecca9Ssthen 		if(!udb_base_remap(alloc->udb, alloc, nsize))
1684d3fecca9Ssthen 			return 0;
1685d3fecca9Ssthen 	}
1686d3fecca9Ssthen 	return 1;
1687d3fecca9Ssthen }
1688d3fecca9Ssthen 
168915ed76cbSbrad int
169015ed76cbSbrad udb_compact(udb_base* udb)
169115ed76cbSbrad {
169215ed76cbSbrad 	if(!udb) return 1;
169315ed76cbSbrad 	if(!udb->useful_compact) return 1;
169415ed76cbSbrad 	DEBUG(DEBUG_DBACCESS, 1, (LOG_INFO, "Compacting database..."));
169515ed76cbSbrad 	return udb_alloc_compact(udb->base, udb->alloc);
169615ed76cbSbrad }
169715ed76cbSbrad 
169815ed76cbSbrad void udb_compact_inhibited(udb_base* udb, int inhibit)
169915ed76cbSbrad {
170015ed76cbSbrad 	if(!udb) return;
170115ed76cbSbrad 	udb->inhibit_compact = inhibit;
170215ed76cbSbrad }
170315ed76cbSbrad 
1704d3fecca9Ssthen #ifdef UDB_CHECK
1705d3fecca9Ssthen /** check that rptrs are really zero before free */
1706d3fecca9Ssthen void udb_check_rptr_zero(void* base, udb_rel_ptr* p, void* arg)
1707d3fecca9Ssthen {
1708d3fecca9Ssthen 	(void)base;
1709d3fecca9Ssthen 	(void)arg;
1710d3fecca9Ssthen 	assert(p->data == 0);
1711d3fecca9Ssthen }
1712d3fecca9Ssthen #endif /* UDB_CHECK */
1713d3fecca9Ssthen 
1714d3fecca9Ssthen /** free XL chunk as multiples of CHUNK_SIZE free segments */
1715d3fecca9Ssthen static void
1716d3fecca9Ssthen udb_free_xl(void* base, udb_alloc* alloc, udb_void f, udb_xl_chunk_d* fp,
1717d3fecca9Ssthen 	size_t sz)
1718d3fecca9Ssthen {
1719d3fecca9Ssthen 	uint64_t xlsz = fp->size;
1720d3fecca9Ssthen 	uint64_t c;
1721d3fecca9Ssthen 	/* lightweight check for buffer overflow in xl data */
1722d3fecca9Ssthen 	assert(*((uint64_t*)(UDB_REL(base, f+xlsz-sizeof(uint64_t)*2)))==xlsz);
1723d3fecca9Ssthen 	assert(*((uint8_t*)(UDB_REL(base, f+xlsz-1))) == UDB_EXP_XL);
1724d3fecca9Ssthen 	assert( (xlsz & (UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* whole mbs */
1725d3fecca9Ssthen 	assert( (f & (UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* aligned */
1726d3fecca9Ssthen #ifdef UDB_CHECK
1727d3fecca9Ssthen 	/* check that relptrs in this chunk have been zeroed */
1728d3fecca9Ssthen 	(*alloc->udb->walkfunc)(base, alloc->udb->walkarg, fp->type,
1729d3fecca9Ssthen 		UDB_REL(base, f+sizeof(udb_xl_chunk_d)), xlsz,
1730d3fecca9Ssthen 		&udb_check_rptr_zero, NULL);
1731d3fecca9Ssthen #endif
1732d3fecca9Ssthen 	alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1733d3fecca9Ssthen 	/* update stats */
1734d3fecca9Ssthen 	alloc->disk->stat_data -= sz;
1735d3fecca9Ssthen 	alloc->disk->stat_alloc -= xlsz;
1736d3fecca9Ssthen 	alloc->disk->stat_free += xlsz;
1737d3fecca9Ssthen 	/* walk in reverse, so the front blocks go first on the list */
1738d3fecca9Ssthen 	c = f + xlsz - UDB_ALLOC_CHUNK_SIZE;
1739d3fecca9Ssthen 	/* because of header and alignment we know f >= UDB_ALLOC_CHUNK_SIZE*/
1740d3fecca9Ssthen 	assert(f >= UDB_ALLOC_CHUNK_SIZE);
1741d3fecca9Ssthen 	while(c >= f) {
1742d3fecca9Ssthen 		/* free a block of CHUNK_SIZE (1 Mb) */
1743d3fecca9Ssthen 		udb_alloc_push_fl(base, alloc, c, UDB_ALLOC_CHUNKS_MAX);
1744d3fecca9Ssthen 		c -= UDB_ALLOC_CHUNK_SIZE;
1745d3fecca9Ssthen 	}
1746d3fecca9Ssthen 	alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1747d3fecca9Ssthen }
1748d3fecca9Ssthen 
1749d3fecca9Ssthen int udb_alloc_free(udb_alloc* alloc, udb_void r, size_t sz)
1750d3fecca9Ssthen {
1751d3fecca9Ssthen 	void* base;
1752d3fecca9Ssthen 	/* lookup chunk ptr */
1753d3fecca9Ssthen 	udb_void f;
1754d3fecca9Ssthen 	udb_chunk_d* fp;
1755d3fecca9Ssthen 	uint64_t esz;
1756d3fecca9Ssthen 	int exp;
1757d3fecca9Ssthen 	udb_void other;
1758d3fecca9Ssthen 	int coagulated = 0;
1759d3fecca9Ssthen 	if(!r)
1760d3fecca9Ssthen 		return 1; /* free(NULL) does nothing */
1761d3fecca9Ssthen 
1762d3fecca9Ssthen 	/* lookup size of chunk */
1763d3fecca9Ssthen 	base = alloc->udb->base;
1764d3fecca9Ssthen 	/* fails for XL blocks */
1765d3fecca9Ssthen 	f = chunk_from_dataptr(r);
1766d3fecca9Ssthen 	fp = UDB_CHUNK(f);
1767d3fecca9Ssthen 	assert(fp->type != udb_chunk_type_free);
1768d3fecca9Ssthen 
1769d3fecca9Ssthen 	/* see if it has a ptrlist, if so: trouble, the list is not properly
1770d3fecca9Ssthen 	 * cleaned up. (although you can imagine a wholesale delete where
1771d3fecca9Ssthen 	 * it does not matter) */
1772d3fecca9Ssthen 	assert(fp->ptrlist == 0);
1773d3fecca9Ssthen 
1774d3fecca9Ssthen 	/* set ptrlist to 0 to stop relptr from using it, robustness. */
1775d3fecca9Ssthen 	fp->ptrlist = 0;
1776d3fecca9Ssthen 
1777d3fecca9Ssthen 	if(fp->exp == UDB_EXP_XL) {
1778d3fecca9Ssthen 		udb_free_xl(base, alloc, f, (udb_xl_chunk_d*)fp, sz);
1779d3fecca9Ssthen 		/* compact */
178015ed76cbSbrad 		if(alloc->udb->inhibit_compact) {
178115ed76cbSbrad 			alloc->udb->useful_compact = 1;
178215ed76cbSbrad 			return 1;
178315ed76cbSbrad 		}
1784d3fecca9Ssthen 		return udb_alloc_compact(base, alloc);
1785d3fecca9Ssthen 	}
1786d3fecca9Ssthen 	/* it is a regular chunk of 2**exp size */
1787d3fecca9Ssthen 	exp = (int)fp->exp;
1788d3fecca9Ssthen 	esz = (uint64_t)1<<exp;
1789d3fecca9Ssthen 	/* light check for e.g. buffer overflow of the data */
1790d3fecca9Ssthen 	assert(sz < esz);
1791d3fecca9Ssthen 	assert(chunk_get_last(base, f, exp) == (uint8_t)exp);
1792d3fecca9Ssthen #ifdef UDB_CHECK
1793d3fecca9Ssthen 	/* check that relptrs in this chunk have been zeroed */
1794d3fecca9Ssthen 	(*alloc->udb->walkfunc)(base, alloc->udb->walkarg, fp->type,
1795d3fecca9Ssthen 		UDB_REL(base, r), esz, &udb_check_rptr_zero, NULL);
1796d3fecca9Ssthen #endif
1797d3fecca9Ssthen 
1798d3fecca9Ssthen 	/* update the stats */
1799d3fecca9Ssthen 	alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1800d3fecca9Ssthen 	alloc->disk->stat_data -= sz;
1801d3fecca9Ssthen 	alloc->disk->stat_free += esz;
1802d3fecca9Ssthen 	alloc->disk->stat_alloc -= esz;
1803d3fecca9Ssthen 
1804d3fecca9Ssthen 	/* if it can be merged with other free chunks, do so */
1805d3fecca9Ssthen 	while( (other=coagulate_possible(base, alloc, f, exp, esz)) ) {
1806d3fecca9Ssthen 		coagulated = 1;
1807d3fecca9Ssthen 		/* unlink that other chunk and expand it (it has same size) */
1808d3fecca9Ssthen 		udb_alloc_unlink_fl(base, alloc, other, exp);
1809d3fecca9Ssthen 		/* merge up */
1810d3fecca9Ssthen 		if(other < f)
1811d3fecca9Ssthen 			f = other;
1812d3fecca9Ssthen 		exp++;
1813d3fecca9Ssthen 		esz <<= 1;
1814d3fecca9Ssthen 	}
1815d3fecca9Ssthen 	if(coagulated) {
1816d3fecca9Ssthen 		/* put big free chunk into freelist, and init it */
1817d3fecca9Ssthen 		udb_alloc_push_fl(base, alloc, f, exp);
1818d3fecca9Ssthen 	} else {
1819d3fecca9Ssthen 		/* we do not need to touch the last-exp-byte, which may save
1820d3fecca9Ssthen 		 * a reference to that page of memory */
1821d3fecca9Ssthen 		fp->type = udb_chunk_type_free;
1822d3fecca9Ssthen 		fp->flags = 0;
1823d3fecca9Ssthen 		udb_alloc_push_fl_noinit(base, alloc, f, exp);
1824d3fecca9Ssthen 	}
1825d3fecca9Ssthen 	alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1826d3fecca9Ssthen 	/* compact */
182715ed76cbSbrad 	if(alloc->udb->inhibit_compact) {
182815ed76cbSbrad 		alloc->udb->useful_compact = 1;
182915ed76cbSbrad 		return 1;
183015ed76cbSbrad 	}
1831d3fecca9Ssthen 	return udb_alloc_compact(base, alloc);
1832d3fecca9Ssthen }
1833d3fecca9Ssthen 
1834d3fecca9Ssthen udb_void udb_alloc_init(udb_alloc* alloc, void* d, size_t sz)
1835d3fecca9Ssthen {
1836d3fecca9Ssthen 	/* could be faster maybe, if grown? */
1837d3fecca9Ssthen 	udb_void r = udb_alloc_space(alloc, sz);
1838d3fecca9Ssthen 	if(!r) return r;
1839d3fecca9Ssthen 	memcpy(UDB_REL(alloc->udb->base, r), d, sz);
1840d3fecca9Ssthen 	return r;
1841d3fecca9Ssthen }
1842d3fecca9Ssthen 
1843d3fecca9Ssthen udb_void udb_alloc_realloc(udb_alloc* alloc, udb_void r, size_t osz, size_t sz)
1844d3fecca9Ssthen {
1845d3fecca9Ssthen 	void* base = alloc->udb->base;
1846d3fecca9Ssthen 	udb_void c, n, newd;
1847d3fecca9Ssthen 	udb_chunk_d* cp, *np;
1848d3fecca9Ssthen 	uint64_t avail;
1849d3fecca9Ssthen 	uint8_t cp_type;
1850d3fecca9Ssthen 	/* emulate some posix realloc stuff */
1851d3fecca9Ssthen 	if(r == 0)
1852d3fecca9Ssthen 		return udb_alloc_space(alloc, sz);
1853d3fecca9Ssthen 	if(sz == 0) {
1854d3fecca9Ssthen 		if(!udb_alloc_free(alloc, r, osz))
1855d3fecca9Ssthen 			log_msg(LOG_ERR, "udb_alloc_realloc: free failed");
1856d3fecca9Ssthen 		return 0;
1857d3fecca9Ssthen 	}
1858d3fecca9Ssthen 	c = chunk_from_dataptr(r);
1859d3fecca9Ssthen 	cp = UDB_CHUNK(c);
1860d3fecca9Ssthen 	cp_type = cp->type;
1861d3fecca9Ssthen 	if(cp->exp == UDB_EXP_XL) {
1862d3fecca9Ssthen 		avail = UDB_XL_CHUNK(c)->size - sizeof(udb_xl_chunk_d)
1863d3fecca9Ssthen 			- sizeof(uint64_t)*2;
1864d3fecca9Ssthen 	} else {
1865d3fecca9Ssthen 		avail = ((uint64_t)1<<cp->exp) - sizeof(udb_chunk_d) - 1;
1866d3fecca9Ssthen 	}
1867d3fecca9Ssthen 	if(sz <= avail)
1868d3fecca9Ssthen 		return r;
1869d3fecca9Ssthen 	/* reallocate it, and copy */
1870d3fecca9Ssthen 	newd = udb_alloc_space(alloc, sz);
1871d3fecca9Ssthen 	if(!newd) return 0;
1872d3fecca9Ssthen 	/* re-base after alloc, since re-mmap may have happened */
1873d3fecca9Ssthen 	base = alloc->udb->base;
1874d3fecca9Ssthen 	cp = NULL; /* may be invalid now, robustness */
1875d3fecca9Ssthen 	n = chunk_from_dataptr(newd);
1876d3fecca9Ssthen 	np = UDB_CHUNK(n);
1877d3fecca9Ssthen 	np->type = cp_type;
1878d3fecca9Ssthen 	memcpy(UDB_REL(base, newd), UDB_REL(base, r), osz);
1879d3fecca9Ssthen 	/* fixup ptrs */
1880d3fecca9Ssthen 	chunk_fix_ptrs(base, alloc->udb, np, newd, osz, r);
1881d3fecca9Ssthen 
1882d3fecca9Ssthen 	if(!udb_alloc_free(alloc, r, osz))
1883d3fecca9Ssthen 		log_msg(LOG_ERR, "udb_alloc_realloc: free failed");
1884d3fecca9Ssthen 	return newd;
1885d3fecca9Ssthen }
1886d3fecca9Ssthen 
1887d3fecca9Ssthen int udb_alloc_grow(udb_alloc* alloc, size_t sz, size_t num)
1888d3fecca9Ssthen {
1889d3fecca9Ssthen 	const uint64_t mb = 1024*1024;
1890d3fecca9Ssthen 	int exp = udb_alloc_exp_needed(sz);
1891d3fecca9Ssthen 	uint64_t esz;
1892d3fecca9Ssthen 	uint64_t want;
1893d3fecca9Ssthen 	if(exp == UDB_EXP_XL)
1894d3fecca9Ssthen 		esz = (sz&(mb-1))+mb;
1895d3fecca9Ssthen 	else	esz = (uint64_t)1<<exp;
1896d3fecca9Ssthen 	/* we need grow_end_calc to take into account alignment */
1897d3fecca9Ssthen 	want = grow_end_calc(alloc, exp) + esz*(num-1);
1898d3fecca9Ssthen 	assert(want >= alloc->udb->base_size);
1899d3fecca9Ssthen 	if(!udb_base_grow_and_remap(alloc->udb, want)) {
1900d3fecca9Ssthen 		log_msg(LOG_ERR, "failed to grow the specified amount");
1901d3fecca9Ssthen 		return 0;
1902d3fecca9Ssthen 	}
1903d3fecca9Ssthen 	return 1;
1904d3fecca9Ssthen }
1905d3fecca9Ssthen 
1906d3fecca9Ssthen void udb_alloc_set_type(udb_alloc* alloc, udb_void r, udb_chunk_type tp)
1907d3fecca9Ssthen {
1908d3fecca9Ssthen 	void* base = alloc->udb->base;
1909d3fecca9Ssthen 	udb_void f = chunk_from_dataptr(r);
1910d3fecca9Ssthen 	udb_chunk_d* fp = UDB_CHUNK(f);
1911d3fecca9Ssthen 	/* not the 'free' type, that must be set by allocation routines */
1912d3fecca9Ssthen 	assert(fp->type != udb_chunk_type_free);
1913d3fecca9Ssthen 	assert(tp != udb_chunk_type_free);
1914d3fecca9Ssthen 	fp->type = tp;
1915d3fecca9Ssthen }
1916d3fecca9Ssthen 
1917d3fecca9Ssthen int udb_valid_offset(udb_base* udb, udb_void to, size_t destsize)
1918d3fecca9Ssthen {
1919d3fecca9Ssthen 	/* pointers are not valid before the header-size or after the
1920d3fecca9Ssthen 	 * used-region of the mmap */
1921d3fecca9Ssthen 	return ( (to+destsize) <= udb->base_size &&
1922d3fecca9Ssthen 		to >= (udb->glob_data->hsize-2*sizeof(udb_rel_ptr)) &&
1923d3fecca9Ssthen 		(to+destsize) <= udb->alloc->disk->nextgrow);
1924d3fecca9Ssthen }
1925d3fecca9Ssthen 
1926d3fecca9Ssthen int udb_valid_dataptr(udb_base* udb, udb_void to)
1927d3fecca9Ssthen {
1928d3fecca9Ssthen 	void* base = udb->base;
1929d3fecca9Ssthen 	udb_void ch;
1930d3fecca9Ssthen 	int exp;
1931d3fecca9Ssthen 	uint64_t esz;
1932d3fecca9Ssthen 	/* our data chunks are aligned and at least 8 bytes */
1933d3fecca9Ssthen 	if(!udb_valid_offset(udb, to, sizeof(uint64_t)))
1934d3fecca9Ssthen 		return 0;
1935d3fecca9Ssthen 	/* get the chunk pointer */
1936d3fecca9Ssthen 	ch = chunk_from_dataptr(to);
1937d3fecca9Ssthen 	if(!udb_valid_offset(udb, ch, sizeof(udb_chunk_d)))
1938d3fecca9Ssthen 		return 0;
1939d3fecca9Ssthen 	/* check its size */
1940d3fecca9Ssthen 	exp = UDB_CHUNK(ch)->exp;
1941d3fecca9Ssthen 	if(exp == UDB_EXP_XL) {
1942d3fecca9Ssthen 		/* check XL chunk */
1943d3fecca9Ssthen 		uint64_t xlsz;
1944d3fecca9Ssthen 		if(!udb_valid_offset(udb, ch, sizeof(udb_xl_chunk_d)))
1945d3fecca9Ssthen 			return 0;
1946d3fecca9Ssthen 		xlsz = UDB_XL_CHUNK(ch)->size;
1947d3fecca9Ssthen 		if(!udb_valid_offset(udb, ch+xlsz-1, 1))
1948d3fecca9Ssthen 			return 0;
1949d3fecca9Ssthen 		if(*((uint8_t*)UDB_REL(base, ch+xlsz-1)) != UDB_EXP_XL)
1950d3fecca9Ssthen 			return 0;
1951d3fecca9Ssthen 		if(*((uint64_t*)UDB_REL(base, ch+xlsz-sizeof(uint64_t)*2))
1952d3fecca9Ssthen 			!= xlsz)
1953d3fecca9Ssthen 			return 0;
1954d3fecca9Ssthen 		return 1;
1955d3fecca9Ssthen 	}
1956d3fecca9Ssthen 	/* check if regular chunk has matching end byte */
1957d3fecca9Ssthen 	if(exp < UDB_ALLOC_CHUNK_MINEXP || exp > UDB_ALLOC_CHUNKS_MAX)
1958d3fecca9Ssthen 		return 0; /* cannot be a valid chunk */
1959d3fecca9Ssthen 	esz = 1<<exp;
1960d3fecca9Ssthen 	if(!udb_valid_offset(udb, ch+esz-1, 1))
1961d3fecca9Ssthen 		return 0;
1962d3fecca9Ssthen 	if(*((uint8_t*)UDB_REL(base, ch+esz-1)) != exp)
1963d3fecca9Ssthen 		return 0;
1964d3fecca9Ssthen 	return 1;
1965d3fecca9Ssthen }
1966d3fecca9Ssthen 
1967d3fecca9Ssthen int udb_valid_rptr(udb_base* udb, udb_void rptr, udb_void to)
1968d3fecca9Ssthen {
1969d3fecca9Ssthen 	void* base = udb->base;
1970d3fecca9Ssthen 	udb_void p;
1971d3fecca9Ssthen 	if(!udb_valid_offset(udb, rptr, sizeof(udb_rel_ptr)))
1972d3fecca9Ssthen 		return 0;
1973d3fecca9Ssthen 	if(!udb_valid_dataptr(udb, to))
1974d3fecca9Ssthen 		return 0;
1975d3fecca9Ssthen 	p = UDB_CHUNK(chunk_from_dataptr(to))->ptrlist;
1976d3fecca9Ssthen 	while(p) {
1977d3fecca9Ssthen 		if(!udb_valid_offset(udb, p, sizeof(udb_rel_ptr)))
1978d3fecca9Ssthen 			return 0;
1979d3fecca9Ssthen 		if(p == rptr)
1980d3fecca9Ssthen 			return 1;
1981d3fecca9Ssthen 		p = UDB_REL_PTR(p)->next;
1982d3fecca9Ssthen 	}
1983d3fecca9Ssthen 	return 0;
1984d3fecca9Ssthen }
1985d3fecca9Ssthen 
1986d3fecca9Ssthen void udb_rel_ptr_init(udb_rel_ptr* ptr)
1987d3fecca9Ssthen {
1988d3fecca9Ssthen 	memset(ptr, 0, sizeof(*ptr));
1989d3fecca9Ssthen }
1990d3fecca9Ssthen 
1991d3fecca9Ssthen void udb_rel_ptr_unlink(void* base, udb_rel_ptr* ptr)
1992d3fecca9Ssthen {
1993d3fecca9Ssthen 	if(!ptr->data)
1994d3fecca9Ssthen 		return;
1995d3fecca9Ssthen 	if(ptr->prev) {
1996d3fecca9Ssthen 		UDB_REL_PTR(ptr->prev)->next = ptr->next;
1997d3fecca9Ssthen 	} else {
1998d3fecca9Ssthen 		UDB_CHUNK(chunk_from_dataptr(ptr->data))->ptrlist = ptr->next;
1999d3fecca9Ssthen 	}
2000d3fecca9Ssthen 	if(ptr->next) {
2001d3fecca9Ssthen 		UDB_REL_PTR(ptr->next)->prev = ptr->prev;
2002d3fecca9Ssthen 	}
2003d3fecca9Ssthen }
2004d3fecca9Ssthen 
2005d3fecca9Ssthen void udb_rel_ptr_link(void* base, udb_rel_ptr* ptr, udb_void to)
2006d3fecca9Ssthen {
2007d3fecca9Ssthen 	udb_chunk_d* chunk = UDB_CHUNK(chunk_from_dataptr(to));
2008d3fecca9Ssthen 	ptr->prev = 0;
2009d3fecca9Ssthen 	ptr->next = chunk->ptrlist;
2010d3fecca9Ssthen 	if(ptr->next)
2011d3fecca9Ssthen 		UDB_REL_PTR(ptr->next)->prev = UDB_SYSTOREL(base, ptr);
2012d3fecca9Ssthen 	chunk->ptrlist = UDB_SYSTOREL(base, ptr);
2013d3fecca9Ssthen 	ptr->data = to;
2014d3fecca9Ssthen }
2015d3fecca9Ssthen 
2016d3fecca9Ssthen void udb_rel_ptr_set(void* base, udb_rel_ptr* ptr, udb_void to)
2017d3fecca9Ssthen {
2018d3fecca9Ssthen 	assert(to == 0 || to > 64);
2019d3fecca9Ssthen 	udb_rel_ptr_unlink(base, ptr);
2020d3fecca9Ssthen 	if(to)
2021d3fecca9Ssthen 		udb_rel_ptr_link(base, ptr, to);
2022d3fecca9Ssthen 	else	ptr->data = to;
2023d3fecca9Ssthen }
2024d3fecca9Ssthen 
2025d3fecca9Ssthen void udb_rel_ptr_edit(void* base, udb_void list, udb_void to)
2026d3fecca9Ssthen {
2027d3fecca9Ssthen 	udb_void p = list;
2028d3fecca9Ssthen 	while(p) {
2029d3fecca9Ssthen 		UDB_REL_PTR(p)->data = to;
2030d3fecca9Ssthen 		p = UDB_REL_PTR(p)->next;
2031d3fecca9Ssthen 	}
2032d3fecca9Ssthen }
2033d3fecca9Ssthen 
2034d3fecca9Ssthen #ifdef UDB_CHECK
2035d3fecca9Ssthen /** check that all pointers are validly chained */
2036d3fecca9Ssthen static void
2037d3fecca9Ssthen udb_check_ptrs_valid(udb_base* udb)
2038d3fecca9Ssthen {
2039d3fecca9Ssthen 	size_t i;
2040d3fecca9Ssthen 	udb_ptr* p, *prev;
2041d3fecca9Ssthen 	for(i=0; i<udb->ram_size; i++) {
2042d3fecca9Ssthen 		prev = NULL;
2043d3fecca9Ssthen 		for(p=udb->ram_hash[i]; p; p=p->next) {
2044d3fecca9Ssthen 			assert(p->prev == prev);
2045d3fecca9Ssthen 			assert((size_t)(chunk_hash_ptr(p->data)&udb->ram_mask)
2046d3fecca9Ssthen 				== i);
2047d3fecca9Ssthen 			assert(p->base == &udb->base);
2048d3fecca9Ssthen 			prev = p;
2049d3fecca9Ssthen 		}
2050d3fecca9Ssthen 	}
2051d3fecca9Ssthen }
2052d3fecca9Ssthen #endif /* UDB_CHECK */
2053d3fecca9Ssthen 
2054d3fecca9Ssthen void udb_ptr_init(udb_ptr* ptr, udb_base* udb)
2055d3fecca9Ssthen {
2056d3fecca9Ssthen #ifdef UDB_CHECK
2057d3fecca9Ssthen 	udb_check_ptrs_valid(udb); /* previous ptrs have been unlinked */
2058d3fecca9Ssthen #endif
2059d3fecca9Ssthen 	memset(ptr, 0, sizeof(*ptr));
2060d3fecca9Ssthen 	ptr->base = &udb->base;
2061d3fecca9Ssthen }
2062d3fecca9Ssthen 
2063d3fecca9Ssthen void udb_ptr_set(udb_ptr* ptr, udb_base* udb, udb_void newval)
2064d3fecca9Ssthen {
2065d3fecca9Ssthen 	assert(newval == 0 || newval > 64);
2066d3fecca9Ssthen 	if(ptr->data)
2067d3fecca9Ssthen 		udb_base_unlink_ptr(udb, ptr);
2068d3fecca9Ssthen 	ptr->data = newval;
2069d3fecca9Ssthen 	if(newval)
2070d3fecca9Ssthen 		udb_base_link_ptr(udb, ptr);
2071d3fecca9Ssthen }
2072d3fecca9Ssthen 
2073d3fecca9Ssthen int udb_ptr_alloc_space(udb_ptr* ptr, udb_base* udb, udb_chunk_type type,
2074d3fecca9Ssthen 	size_t sz)
2075d3fecca9Ssthen {
2076d3fecca9Ssthen 	udb_void r;
2077d3fecca9Ssthen 	r = udb_alloc_space(udb->alloc, sz);
2078d3fecca9Ssthen 	if(!r) return 0;
2079d3fecca9Ssthen 	udb_alloc_set_type(udb->alloc, r, type);
2080d3fecca9Ssthen 	udb_ptr_init(ptr, udb);
2081d3fecca9Ssthen 	udb_ptr_set(ptr, udb, r);
2082d3fecca9Ssthen 	return 1;
2083d3fecca9Ssthen }
2084d3fecca9Ssthen 
2085d3fecca9Ssthen void udb_ptr_free_space(udb_ptr* ptr, udb_base* udb, size_t sz)
2086d3fecca9Ssthen {
2087d3fecca9Ssthen 	if(ptr->data) {
2088d3fecca9Ssthen 		udb_void d = ptr->data;
2089d3fecca9Ssthen 		udb_ptr_set(ptr, udb, 0);
2090d3fecca9Ssthen 		udb_alloc_free(udb->alloc, d, sz);
2091d3fecca9Ssthen 	}
2092d3fecca9Ssthen }
2093d3fecca9Ssthen 
2094d3fecca9Ssthen udb_chunk_type udb_ptr_get_type(udb_ptr* ptr)
2095d3fecca9Ssthen {
2096d3fecca9Ssthen 	udb_void f;
2097d3fecca9Ssthen 	if(!ptr || ptr->data == 0) return udb_chunk_type_internal; /* something bad*/
2098d3fecca9Ssthen 	f = chunk_from_dataptr(ptr->data);
2099d3fecca9Ssthen 	return ((udb_chunk_d*)UDB_REL(*ptr->base, f))->type;
2100d3fecca9Ssthen }
2101