1d3fecca9Ssthen /* udb.c - u(micro) data base.
2d3fecca9Ssthen * By W.C.A. Wijngaards
3d3fecca9Ssthen * Copyright 2010, NLnet Labs.
4d3fecca9Ssthen * BSD, see LICENSE.
5d3fecca9Ssthen */
6d3fecca9Ssthen #include "config.h"
7d3fecca9Ssthen #include "udb.h"
8d3fecca9Ssthen #include <string.h>
9d3fecca9Ssthen #include <errno.h>
10d3fecca9Ssthen #include <stdio.h>
11d3fecca9Ssthen #include <unistd.h>
12d3fecca9Ssthen #include <assert.h>
13d3fecca9Ssthen #include "lookup3.h"
14d3fecca9Ssthen #include "util.h"
15d3fecca9Ssthen
16d3fecca9Ssthen /* mmap and friends */
17d3fecca9Ssthen #include <sys/types.h>
18d3fecca9Ssthen #include <sys/stat.h>
19d3fecca9Ssthen #include <fcntl.h>
20d3fecca9Ssthen #include <sys/mman.h>
21d3fecca9Ssthen
22d3fecca9Ssthen /* for systems without, portable definition, failed-1 and async is a flag */
23d3fecca9Ssthen #ifndef MAP_FAILED
24d3fecca9Ssthen #define MAP_FAILED ((void*)-1)
25d3fecca9Ssthen #endif
26d3fecca9Ssthen #ifndef MS_SYNC
27d3fecca9Ssthen #define MS_SYNC 0
28d3fecca9Ssthen #endif
29d3fecca9Ssthen
30d3fecca9Ssthen /** move and fixup xl segment */
31d3fecca9Ssthen static void move_xl_segment(void* base, udb_base* udb, udb_void xl,
32d3fecca9Ssthen udb_void n, uint64_t sz, uint64_t startseg);
33d3fecca9Ssthen /** attempt to compact the data and move free space to the end */
34d3fecca9Ssthen static int udb_alloc_compact(void* base, udb_alloc* alloc);
35d3fecca9Ssthen
36d3fecca9Ssthen /** convert pointer to the data part to a pointer to the base of the chunk */
37d3fecca9Ssthen static udb_void
chunk_from_dataptr(udb_void data)38d3fecca9Ssthen chunk_from_dataptr(udb_void data)
39d3fecca9Ssthen {
40d3fecca9Ssthen /* we use that sizeof(udb_chunk_d) != sizeof(udb_xl_chunk_d) and
41d3fecca9Ssthen * that xl_chunk_d is aligned on x**1024 boundaries. */
42d3fecca9Ssthen udb_void xl = data - sizeof(udb_xl_chunk_d);
43d3fecca9Ssthen if( (xl & (UDB_ALLOC_CHUNK_SIZE-1)) == 0)
44d3fecca9Ssthen return xl;
45d3fecca9Ssthen return data - sizeof(udb_chunk_d);
46d3fecca9Ssthen }
47d3fecca9Ssthen
chunk_from_dataptr_ext(udb_void data)48d3fecca9Ssthen udb_void chunk_from_dataptr_ext(udb_void data) {
49d3fecca9Ssthen return chunk_from_dataptr(data);
50d3fecca9Ssthen }
51d3fecca9Ssthen
52d3fecca9Ssthen #ifndef NDEBUG
53d3fecca9Ssthen /** read last octet from a chunk */
54d3fecca9Ssthen static uint8_t
chunk_get_last(void * base,udb_void chunk,int exp)55d3fecca9Ssthen chunk_get_last(void* base, udb_void chunk, int exp)
56d3fecca9Ssthen {
57d3fecca9Ssthen return *((uint8_t*)UDB_REL(base, chunk+(1<<exp)-1));
58d3fecca9Ssthen }
59d3fecca9Ssthen #endif
60d3fecca9Ssthen
61d3fecca9Ssthen /** write last octet of a chunk */
62d3fecca9Ssthen static void
chunk_set_last(void * base,udb_void chunk,int exp,uint8_t value)63d3fecca9Ssthen chunk_set_last(void* base, udb_void chunk, int exp, uint8_t value)
64d3fecca9Ssthen {
65bfd0b123Sflorian assert(exp >= 0 && exp <= 63);
66bfd0b123Sflorian *((uint8_t*)UDB_REL(base, chunk+((uint64_t)1<<exp)-1)) = value;
67d3fecca9Ssthen }
68d3fecca9Ssthen
69d3fecca9Ssthen /** create udb_base from a file descriptor (must be at start of file) */
70d3fecca9Ssthen udb_base*
udb_base_create_fd(const char * fname,int fd,udb_walk_relptr_func walkfunc,void * arg)71d3fecca9Ssthen udb_base_create_fd(const char* fname, int fd, udb_walk_relptr_func walkfunc,
72d3fecca9Ssthen void* arg)
73d3fecca9Ssthen {
74ab0a6c35Ssthen uint64_t m, fsz;
75d3fecca9Ssthen udb_glob_d g;
76d3fecca9Ssthen ssize_t r;
77d3fecca9Ssthen udb_base* udb = (udb_base*)xalloc_zero(sizeof(*udb));
78d3fecca9Ssthen if(!udb) {
79d3fecca9Ssthen log_msg(LOG_ERR, "out of memory");
80d3fecca9Ssthen close(fd);
81d3fecca9Ssthen return NULL;
82d3fecca9Ssthen }
83d3fecca9Ssthen udb->fname = strdup(fname);
84d3fecca9Ssthen if(!udb->fname) {
85d3fecca9Ssthen log_msg(LOG_ERR, "out of memory");
86d3fecca9Ssthen free(udb);
87d3fecca9Ssthen close(fd);
88d3fecca9Ssthen return NULL;
89d3fecca9Ssthen }
90d3fecca9Ssthen udb->walkfunc = walkfunc;
91d3fecca9Ssthen udb->walkarg = arg;
92d3fecca9Ssthen udb->fd = fd;
93d3fecca9Ssthen udb->ram_size = 1024;
94d3fecca9Ssthen udb->ram_mask = (int)udb->ram_size - 1;
958d8f1862Ssthen udb->ram_hash = (udb_ptr**)xalloc_array_zero(sizeof(udb_ptr*),
968d8f1862Ssthen udb->ram_size);
97d3fecca9Ssthen if(!udb->ram_hash) {
98d3fecca9Ssthen free(udb->fname);
99d3fecca9Ssthen free(udb);
100d3fecca9Ssthen log_msg(LOG_ERR, "out of memory");
101d3fecca9Ssthen close(fd);
102d3fecca9Ssthen return NULL;
103d3fecca9Ssthen }
104d3fecca9Ssthen
105d3fecca9Ssthen /* read magic */
106d3fecca9Ssthen if((r=read(fd, &m, sizeof(m))) == -1) {
107d3fecca9Ssthen log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
108d3fecca9Ssthen goto fail;
109d3fecca9Ssthen } else if(r != (ssize_t)sizeof(m)) {
110d3fecca9Ssthen log_msg(LOG_ERR, "%s: file too short", fname);
111d3fecca9Ssthen goto fail;
112d3fecca9Ssthen }
113d3fecca9Ssthen /* TODO : what if bigendian and littleendian file, see magic */
114d3fecca9Ssthen if(m != UDB_MAGIC) {
115d3fecca9Ssthen log_msg(LOG_ERR, "%s: wrong type of file", fname);
116d3fecca9Ssthen goto fail;
117d3fecca9Ssthen }
118d3fecca9Ssthen /* read header */
119d3fecca9Ssthen if((r=read(fd, &g, sizeof(g))) == -1) {
120d3fecca9Ssthen log_msg(LOG_ERR, "%s: %s\n", fname, strerror(errno));
121d3fecca9Ssthen goto fail;
122d3fecca9Ssthen } else if(r != (ssize_t)sizeof(g)) {
123d3fecca9Ssthen log_msg(LOG_ERR, "%s: file too short", fname);
124d3fecca9Ssthen goto fail;
125d3fecca9Ssthen }
126d3fecca9Ssthen if(g.version != 0) {
127d3fecca9Ssthen log_msg(LOG_ERR, "%s: unknown file version %d", fname,
128d3fecca9Ssthen (int)g.version);
129d3fecca9Ssthen goto fail;
130d3fecca9Ssthen }
131d3fecca9Ssthen if(g.hsize < UDB_HEADER_SIZE) {
132d3fecca9Ssthen log_msg(LOG_ERR, "%s: header size too small %d", fname,
133d3fecca9Ssthen (int)g.hsize);
134d3fecca9Ssthen goto fail;
135d3fecca9Ssthen }
136d3fecca9Ssthen if(g.hsize > UDB_HEADER_SIZE) {
137d3fecca9Ssthen log_msg(LOG_WARNING, "%s: header size too large %d", fname,
138d3fecca9Ssthen (int)g.hsize);
139cbbc2d6cSbrad goto fail;
140d3fecca9Ssthen }
141ab0a6c35Ssthen if(g.clean_close != 1) {
142d3fecca9Ssthen log_msg(LOG_WARNING, "%s: not cleanly closed %d", fname,
143d3fecca9Ssthen (int)g.clean_close);
144cbbc2d6cSbrad goto fail;
145cbbc2d6cSbrad }
146cbbc2d6cSbrad if(g.dirty_alloc != 0) {
147cbbc2d6cSbrad log_msg(LOG_WARNING, "%s: not cleanly closed (alloc:%d)", fname,
148cbbc2d6cSbrad (int)g.dirty_alloc);
149cbbc2d6cSbrad goto fail;
150d3fecca9Ssthen }
151ab0a6c35Ssthen
152ab0a6c35Ssthen /* check file size correctly written, for 4.0.2 nsd.db failure */
153ab0a6c35Ssthen fsz = (uint64_t)lseek(fd, (off_t)0, SEEK_END);
154ab0a6c35Ssthen (void)lseek(fd, (off_t)0, SEEK_SET);
155ab0a6c35Ssthen if(g.fsize != fsz) {
156ab0a6c35Ssthen log_msg(LOG_WARNING, "%s: file size %llu but mmap header "
157ab0a6c35Ssthen "has size %llu", fname, (unsigned long long)fsz,
158ab0a6c35Ssthen (unsigned long long)g.fsize);
159ab0a6c35Ssthen goto fail;
160ab0a6c35Ssthen }
161d3fecca9Ssthen
162d3fecca9Ssthen /* mmap it */
163d3fecca9Ssthen if(g.fsize < UDB_HEADER_SIZE || g.fsize < g.hsize) {
164d3fecca9Ssthen log_msg(LOG_ERR, "%s: file too short", fname);
165d3fecca9Ssthen goto fail;
166d3fecca9Ssthen }
167ab0a6c35Ssthen if(g.fsize > (uint64_t)400*1024*1024*1024*1024) /* 400 Tb */ {
168ab0a6c35Ssthen log_msg(LOG_WARNING, "%s: file size too large %llu",
169ab0a6c35Ssthen fname, (unsigned long long)g.fsize);
170ab0a6c35Ssthen goto fail;
171ab0a6c35Ssthen }
172d3fecca9Ssthen udb->base_size = (size_t)g.fsize;
17315ed76cbSbrad #ifdef HAVE_MMAP
174d3fecca9Ssthen /* note the size_t casts must be there for portability, on some
175d3fecca9Ssthen * systems the layout of memory is otherwise broken. */
176d3fecca9Ssthen udb->base = mmap(NULL, (size_t)udb->base_size,
177d3fecca9Ssthen (int)PROT_READ|PROT_WRITE, (int)MAP_SHARED,
178d3fecca9Ssthen (int)udb->fd, (off_t)0);
17915ed76cbSbrad #else
18015ed76cbSbrad udb->base = MAP_FAILED; errno = ENOSYS;
18115ed76cbSbrad #endif
182d3fecca9Ssthen if(udb->base == MAP_FAILED) {
183d3fecca9Ssthen udb->base = NULL;
184d3fecca9Ssthen log_msg(LOG_ERR, "mmap(size %u) error: %s",
185d3fecca9Ssthen (unsigned)udb->base_size, strerror(errno));
186d3fecca9Ssthen fail:
187d3fecca9Ssthen close(fd);
188d3fecca9Ssthen free(udb->fname);
189d3fecca9Ssthen free(udb->ram_hash);
190d3fecca9Ssthen free(udb);
191d3fecca9Ssthen return NULL;
192d3fecca9Ssthen }
193d3fecca9Ssthen
194d3fecca9Ssthen /* init completion */
195308d2509Sflorian udb->glob_data = (udb_glob_d*)((char*)udb->base+sizeof(uint64_t));
196d3fecca9Ssthen r = 0;
197cbbc2d6cSbrad /* cannot be dirty because that is goto fail above */
198d3fecca9Ssthen if(udb->glob_data->dirty_alloc != udb_dirty_clean)
199d3fecca9Ssthen r = 1;
200d3fecca9Ssthen udb->alloc = udb_alloc_create(udb, (udb_alloc_d*)(
201308d2509Sflorian (char*)udb->glob_data+sizeof(*udb->glob_data)));
202d3fecca9Ssthen if(!udb->alloc) {
203d3fecca9Ssthen log_msg(LOG_ERR, "out of memory");
204d3fecca9Ssthen udb_base_free(udb);
205d3fecca9Ssthen return NULL;
206d3fecca9Ssthen }
207d3fecca9Ssthen if(r) {
208d3fecca9Ssthen /* and compact now, or resume compacting */
209d3fecca9Ssthen udb_alloc_compact(udb, udb->alloc);
210d3fecca9Ssthen udb_base_sync(udb, 1);
211d3fecca9Ssthen }
212ab0a6c35Ssthen udb->glob_data->clean_close = 0;
213d3fecca9Ssthen
214d3fecca9Ssthen return udb;
215d3fecca9Ssthen }
216d3fecca9Ssthen
udb_base_create_read(const char * fname,udb_walk_relptr_func walkfunc,void * arg)217d3fecca9Ssthen udb_base* udb_base_create_read(const char* fname, udb_walk_relptr_func walkfunc,
218d3fecca9Ssthen void* arg)
219d3fecca9Ssthen {
220d3fecca9Ssthen int fd = open(fname, O_RDWR);
221d3fecca9Ssthen if(fd == -1) {
222d3fecca9Ssthen log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
223d3fecca9Ssthen return NULL;
224d3fecca9Ssthen }
225d3fecca9Ssthen return udb_base_create_fd(fname, fd, walkfunc, arg);
226d3fecca9Ssthen }
227d3fecca9Ssthen
228d3fecca9Ssthen /** init new udb_global structure */
udb_glob_init_new(udb_glob_d * g)229d3fecca9Ssthen static void udb_glob_init_new(udb_glob_d* g)
230d3fecca9Ssthen {
231d3fecca9Ssthen memset(g, 0, sizeof(*g));
232d3fecca9Ssthen g->hsize = UDB_HEADER_SIZE;
233d3fecca9Ssthen g->fsize = UDB_HEADER_SIZE;
234d3fecca9Ssthen }
235d3fecca9Ssthen
236d3fecca9Ssthen /** write data to file and check result */
237d3fecca9Ssthen static int
write_fdata(const char * fname,int fd,void * data,size_t len)238d3fecca9Ssthen write_fdata(const char* fname, int fd, void* data, size_t len)
239d3fecca9Ssthen {
240d3fecca9Ssthen ssize_t w;
241d3fecca9Ssthen if((w=write(fd, data, len)) == -1) {
242d3fecca9Ssthen log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
243d3fecca9Ssthen close(fd);
244d3fecca9Ssthen return 0;
245d3fecca9Ssthen } else if(w != (ssize_t)len) {
246d3fecca9Ssthen log_msg(LOG_ERR, "%s: short write (disk full?)", fname);
247d3fecca9Ssthen close(fd);
248d3fecca9Ssthen return 0;
249d3fecca9Ssthen }
250d3fecca9Ssthen return 1;
251d3fecca9Ssthen }
252d3fecca9Ssthen
udb_base_create_new(const char * fname,udb_walk_relptr_func walkfunc,void * arg)253d3fecca9Ssthen udb_base* udb_base_create_new(const char* fname, udb_walk_relptr_func walkfunc,
254d3fecca9Ssthen void* arg)
255d3fecca9Ssthen {
256d3fecca9Ssthen uint64_t m;
257d3fecca9Ssthen udb_glob_d g;
258d3fecca9Ssthen udb_alloc_d a;
259d3fecca9Ssthen uint64_t endsize = UDB_HEADER_SIZE;
260d3fecca9Ssthen uint64_t endexp = 0;
261d3fecca9Ssthen int fd = open(fname, O_CREAT|O_RDWR, 0600);
262d3fecca9Ssthen if(fd == -1) {
263d3fecca9Ssthen log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
264d3fecca9Ssthen return NULL;
265d3fecca9Ssthen }
266d3fecca9Ssthen m = UDB_MAGIC;
267d3fecca9Ssthen udb_glob_init_new(&g);
268d3fecca9Ssthen udb_alloc_init_new(&a);
269ab0a6c35Ssthen g.clean_close = 1;
270d3fecca9Ssthen
271d3fecca9Ssthen /* write new data to file (closes fd on error) */
272d3fecca9Ssthen if(!write_fdata(fname, fd, &m, sizeof(m)))
273d3fecca9Ssthen return NULL;
274d3fecca9Ssthen if(!write_fdata(fname, fd, &g, sizeof(g)))
275d3fecca9Ssthen return NULL;
276d3fecca9Ssthen if(!write_fdata(fname, fd, &a, sizeof(a)))
277d3fecca9Ssthen return NULL;
278d3fecca9Ssthen if(!write_fdata(fname, fd, &endsize, sizeof(endsize)))
279d3fecca9Ssthen return NULL;
280d3fecca9Ssthen if(!write_fdata(fname, fd, &endexp, sizeof(endexp)))
281d3fecca9Ssthen return NULL;
282d3fecca9Ssthen /* rewind to start */
283d3fecca9Ssthen if(lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
284d3fecca9Ssthen log_msg(LOG_ERR, "%s: lseek %s", fname, strerror(errno));
285d3fecca9Ssthen close(fd);
286d3fecca9Ssthen return NULL;
287d3fecca9Ssthen }
288ab0a6c35Ssthen /* truncate to the right size */
289ab0a6c35Ssthen if(ftruncate(fd, (off_t)g.fsize) < 0) {
290ab0a6c35Ssthen log_msg(LOG_ERR, "%s: ftruncate(%d): %s", fname,
291ab0a6c35Ssthen (int)g.fsize, strerror(errno));
292ab0a6c35Ssthen close(fd);
293ab0a6c35Ssthen return NULL;
294ab0a6c35Ssthen }
295d3fecca9Ssthen return udb_base_create_fd(fname, fd, walkfunc, arg);
296d3fecca9Ssthen }
297d3fecca9Ssthen
298d3fecca9Ssthen /** shrink the udb base if it has unused space at the end */
299d3fecca9Ssthen static void
udb_base_shrink(udb_base * udb,uint64_t nsize)300d3fecca9Ssthen udb_base_shrink(udb_base* udb, uint64_t nsize)
301d3fecca9Ssthen {
302d3fecca9Ssthen udb->glob_data->dirty_alloc = udb_dirty_fsize;
303d3fecca9Ssthen udb->glob_data->fsize = nsize;
304d3fecca9Ssthen /* sync, does not *seem* to be required on Linux, but it is
305d3fecca9Ssthen certainly required on OpenBSD. Otherwise changed data is lost. */
30615ed76cbSbrad #ifdef HAVE_MMAP
307d3fecca9Ssthen msync(udb->base, udb->base_size, MS_ASYNC);
30815ed76cbSbrad #endif
309d3fecca9Ssthen if(ftruncate(udb->fd, (off_t)nsize) != 0) {
310d3fecca9Ssthen log_msg(LOG_ERR, "%s: ftruncate(%u) %s", udb->fname,
311d3fecca9Ssthen (unsigned)nsize, strerror(errno));
312d3fecca9Ssthen }
313d3fecca9Ssthen udb->glob_data->dirty_alloc = udb_dirty_clean;
314d3fecca9Ssthen }
315d3fecca9Ssthen
udb_base_close(udb_base * udb)316d3fecca9Ssthen void udb_base_close(udb_base* udb)
317d3fecca9Ssthen {
318d3fecca9Ssthen if(!udb)
319d3fecca9Ssthen return;
320d3fecca9Ssthen if(udb->fd != -1 && udb->base && udb->alloc) {
321d3fecca9Ssthen uint64_t nsize = udb->alloc->disk->nextgrow;
322d3fecca9Ssthen if(nsize < udb->base_size)
323d3fecca9Ssthen udb_base_shrink(udb, nsize);
324d3fecca9Ssthen }
325d3fecca9Ssthen if(udb->fd != -1) {
326ab0a6c35Ssthen udb->glob_data->clean_close = 1;
327d3fecca9Ssthen close(udb->fd);
328d3fecca9Ssthen udb->fd = -1;
329d3fecca9Ssthen }
330d3fecca9Ssthen if(udb->base) {
33115ed76cbSbrad #ifdef HAVE_MMAP
332d3fecca9Ssthen if(munmap(udb->base, udb->base_size) == -1) {
333d3fecca9Ssthen log_msg(LOG_ERR, "munmap: %s", strerror(errno));
334d3fecca9Ssthen }
33515ed76cbSbrad #endif
336d3fecca9Ssthen udb->base = NULL;
337d3fecca9Ssthen }
338d3fecca9Ssthen }
339d3fecca9Ssthen
udb_base_free(udb_base * udb)340d3fecca9Ssthen void udb_base_free(udb_base* udb)
341d3fecca9Ssthen {
342d3fecca9Ssthen if(!udb)
343d3fecca9Ssthen return;
344d3fecca9Ssthen udb_base_close(udb);
345d3fecca9Ssthen udb_alloc_delete(udb->alloc);
346d3fecca9Ssthen free(udb->ram_hash);
347d3fecca9Ssthen free(udb->fname);
348d3fecca9Ssthen free(udb);
349d3fecca9Ssthen }
350d3fecca9Ssthen
udb_base_free_keep_mmap(udb_base * udb)351d3fecca9Ssthen void udb_base_free_keep_mmap(udb_base* udb)
352d3fecca9Ssthen {
353d3fecca9Ssthen if(!udb) return;
354d3fecca9Ssthen if(udb->fd != -1) {
355d3fecca9Ssthen close(udb->fd);
356d3fecca9Ssthen udb->fd = -1;
357d3fecca9Ssthen }
358d3fecca9Ssthen udb->base = NULL;
359d3fecca9Ssthen udb_alloc_delete(udb->alloc);
360d3fecca9Ssthen free(udb->ram_hash);
361d3fecca9Ssthen free(udb->fname);
362d3fecca9Ssthen free(udb);
363d3fecca9Ssthen }
364d3fecca9Ssthen
udb_base_sync(udb_base * udb,int wait)365d3fecca9Ssthen void udb_base_sync(udb_base* udb, int wait)
366d3fecca9Ssthen {
36715ed76cbSbrad if(!udb) return;
36815ed76cbSbrad #ifdef HAVE_MMAP
369d3fecca9Ssthen if(msync(udb->base, udb->base_size, wait?MS_SYNC:MS_ASYNC) != 0) {
370d3fecca9Ssthen log_msg(LOG_ERR, "msync(%s) error %s",
371d3fecca9Ssthen udb->fname, strerror(errno));
372d3fecca9Ssthen }
37315ed76cbSbrad #else
37415ed76cbSbrad (void)wait;
37515ed76cbSbrad #endif
376d3fecca9Ssthen }
377d3fecca9Ssthen
378d3fecca9Ssthen /** hash a chunk pointer */
379d3fecca9Ssthen static uint32_t
chunk_hash_ptr(udb_void p)380d3fecca9Ssthen chunk_hash_ptr(udb_void p)
381d3fecca9Ssthen {
382d3fecca9Ssthen /* put p into an array of uint32 */
383d3fecca9Ssthen uint32_t h[sizeof(p)/sizeof(uint32_t)];
384d3fecca9Ssthen memcpy(&h, &p, sizeof(h));
385d3fecca9Ssthen return hashword(h, sizeof(p)/sizeof(uint32_t), 0x8763);
386d3fecca9Ssthen }
387d3fecca9Ssthen
388d3fecca9Ssthen /** check that the given pointer is on the bucket for the given offset */
udb_ptr_is_on_bucket(udb_base * udb,udb_ptr * ptr,udb_void to)389d3fecca9Ssthen int udb_ptr_is_on_bucket(udb_base* udb, udb_ptr* ptr, udb_void to)
390d3fecca9Ssthen {
391d3fecca9Ssthen uint32_t i = chunk_hash_ptr(to) & udb->ram_mask;
392d3fecca9Ssthen udb_ptr* p;
393d3fecca9Ssthen assert((size_t)i < udb->ram_size);
394d3fecca9Ssthen for(p = udb->ram_hash[i]; p; p=p->next) {
395d3fecca9Ssthen if(p == ptr)
396d3fecca9Ssthen return 1;
397d3fecca9Ssthen }
398d3fecca9Ssthen return 0;
399d3fecca9Ssthen }
400d3fecca9Ssthen
401d3fecca9Ssthen /** grow the ram array */
402d3fecca9Ssthen static void
grow_ram_hash(udb_base * udb,udb_ptr ** newhash)403d3fecca9Ssthen grow_ram_hash(udb_base* udb, udb_ptr** newhash)
404d3fecca9Ssthen {
405d3fecca9Ssthen size_t i;
406d3fecca9Ssthen size_t osize= udb->ram_size;
407d3fecca9Ssthen udb_ptr* p, *np;
408d3fecca9Ssthen udb_ptr** oldhash = udb->ram_hash;
409d3fecca9Ssthen udb->ram_size *= 2;
410d3fecca9Ssthen udb->ram_mask <<= 1;
411d3fecca9Ssthen udb->ram_mask |= 1;
412d3fecca9Ssthen udb->ram_hash = newhash;
413d3fecca9Ssthen /* have to link in every element in the old list into the new list*/
414d3fecca9Ssthen for(i=0; i<osize; i++) {
415d3fecca9Ssthen p = oldhash[i];
416d3fecca9Ssthen while(p) {
417d3fecca9Ssthen np = p->next;
418d3fecca9Ssthen /* link into newhash */
419d3fecca9Ssthen p->prev=NULL;
420d3fecca9Ssthen p->next=newhash[chunk_hash_ptr(p->data)&udb->ram_mask];
421d3fecca9Ssthen if(p->next) p->next->prev = p;
422d3fecca9Ssthen /* go to next element of oldhash */
423d3fecca9Ssthen p = np;
424d3fecca9Ssthen }
425d3fecca9Ssthen }
426d3fecca9Ssthen free(oldhash);
427d3fecca9Ssthen }
428d3fecca9Ssthen
udb_base_link_ptr(udb_base * udb,udb_ptr * ptr)429d3fecca9Ssthen void udb_base_link_ptr(udb_base* udb, udb_ptr* ptr)
430d3fecca9Ssthen {
431db7d0d02Sflorian uint32_t i;
432d3fecca9Ssthen #ifdef UDB_CHECK
433d3fecca9Ssthen assert(udb_valid_dataptr(udb, ptr->data)); /* must be to whole chunk*/
434d3fecca9Ssthen #endif
435d3fecca9Ssthen udb->ram_num++;
4368d8f1862Ssthen if(udb->ram_num == udb->ram_size && udb->ram_size<(size_t)0x7fffffff) {
437d3fecca9Ssthen /* grow the array, if allocation succeeds */
4388d8f1862Ssthen udb_ptr** newram = (udb_ptr**)xalloc_array_zero(
4398d8f1862Ssthen sizeof(udb_ptr*), udb->ram_size*2);
440d3fecca9Ssthen if(newram) {
441d3fecca9Ssthen grow_ram_hash(udb, newram);
442d3fecca9Ssthen }
443d3fecca9Ssthen }
444db7d0d02Sflorian i = chunk_hash_ptr(ptr->data) & udb->ram_mask;
445db7d0d02Sflorian assert((size_t)i < udb->ram_size);
446db7d0d02Sflorian
447d3fecca9Ssthen ptr->prev = NULL;
448d3fecca9Ssthen ptr->next = udb->ram_hash[i];
449d3fecca9Ssthen udb->ram_hash[i] = ptr;
450d3fecca9Ssthen if(ptr->next)
451d3fecca9Ssthen ptr->next->prev = ptr;
452d3fecca9Ssthen }
453d3fecca9Ssthen
udb_base_unlink_ptr(udb_base * udb,udb_ptr * ptr)454d3fecca9Ssthen void udb_base_unlink_ptr(udb_base* udb, udb_ptr* ptr)
455d3fecca9Ssthen {
456d3fecca9Ssthen assert(ptr->data);
457d3fecca9Ssthen #ifdef UDB_CHECK
458d3fecca9Ssthen assert(udb_valid_dataptr(udb, ptr->data)); /* ptr must be inited */
459d3fecca9Ssthen assert(udb_ptr_is_on_bucket(udb, ptr, ptr->data));
460d3fecca9Ssthen #endif
461d3fecca9Ssthen udb->ram_num--;
462d3fecca9Ssthen if(ptr->next)
463d3fecca9Ssthen ptr->next->prev = ptr->prev;
464d3fecca9Ssthen if(ptr->prev)
465d3fecca9Ssthen ptr->prev->next = ptr->next;
466d3fecca9Ssthen else {
467d3fecca9Ssthen uint32_t i = chunk_hash_ptr(ptr->data) & udb->ram_mask;
468d3fecca9Ssthen assert((size_t)i < udb->ram_size);
469d3fecca9Ssthen udb->ram_hash[i] = ptr->next;
470d3fecca9Ssthen }
471d3fecca9Ssthen }
472d3fecca9Ssthen
473d3fecca9Ssthen /** change a set of ram ptrs to a new value */
474d3fecca9Ssthen static void
udb_base_ram_ptr_edit(udb_base * udb,udb_void old,udb_void newd)475d3fecca9Ssthen udb_base_ram_ptr_edit(udb_base* udb, udb_void old, udb_void newd)
476d3fecca9Ssthen {
477d3fecca9Ssthen uint32_t io = chunk_hash_ptr(old) & udb->ram_mask;
478d3fecca9Ssthen udb_ptr* p, *np;
479d3fecca9Ssthen /* edit them and move them into the new position */
480d3fecca9Ssthen p = udb->ram_hash[io];
481d3fecca9Ssthen while(p) {
482d3fecca9Ssthen np = p->next;
483d3fecca9Ssthen if(p->data == old) {
484d3fecca9Ssthen udb_base_unlink_ptr(udb, p);
485d3fecca9Ssthen p->data = newd;
486d3fecca9Ssthen udb_base_link_ptr(udb, p);
487d3fecca9Ssthen }
488d3fecca9Ssthen p = np;
489d3fecca9Ssthen }
490d3fecca9Ssthen }
491d3fecca9Ssthen
udb_base_get_userdata(udb_base * udb)492d3fecca9Ssthen udb_rel_ptr* udb_base_get_userdata(udb_base* udb)
493d3fecca9Ssthen {
494d3fecca9Ssthen return &udb->glob_data->user_global;
495d3fecca9Ssthen }
496d3fecca9Ssthen
udb_base_set_userdata(udb_base * udb,udb_void user)497d3fecca9Ssthen void udb_base_set_userdata(udb_base* udb, udb_void user)
498d3fecca9Ssthen {
499d3fecca9Ssthen #ifdef UDB_CHECK
500d3fecca9Ssthen if(user) { assert(udb_valid_dataptr(udb, user)); }
501d3fecca9Ssthen #endif
502d3fecca9Ssthen udb_rel_ptr_set(udb->base, &udb->glob_data->user_global, user);
503d3fecca9Ssthen }
504d3fecca9Ssthen
udb_base_set_userflags(udb_base * udb,uint8_t v)505d3fecca9Ssthen void udb_base_set_userflags(udb_base* udb, uint8_t v)
506d3fecca9Ssthen {
507d3fecca9Ssthen udb->glob_data->userflags = v;
508d3fecca9Ssthen }
509d3fecca9Ssthen
udb_base_get_userflags(udb_base * udb)510d3fecca9Ssthen uint8_t udb_base_get_userflags(udb_base* udb)
511d3fecca9Ssthen {
512d3fecca9Ssthen return udb->glob_data->userflags;
513d3fecca9Ssthen }
514d3fecca9Ssthen
515d3fecca9Ssthen /** re-mmap the udb to specified size */
516d3fecca9Ssthen static void*
udb_base_remap(udb_base * udb,udb_alloc * alloc,uint64_t nsize)517d3fecca9Ssthen udb_base_remap(udb_base* udb, udb_alloc* alloc, uint64_t nsize)
518d3fecca9Ssthen {
51915ed76cbSbrad #ifdef HAVE_MMAP
520d3fecca9Ssthen void* nb;
521d3fecca9Ssthen /* for use with valgrind, do not use mremap, but the other version */
522d3fecca9Ssthen #ifdef MREMAP_MAYMOVE
523d3fecca9Ssthen nb = mremap(udb->base, udb->base_size, nsize, MREMAP_MAYMOVE);
524d3fecca9Ssthen if(nb == MAP_FAILED) {
525d3fecca9Ssthen log_msg(LOG_ERR, "mremap(%s, size %u) error %s",
526d3fecca9Ssthen udb->fname, (unsigned)nsize, strerror(errno));
527d3fecca9Ssthen return 0;
528d3fecca9Ssthen }
529d3fecca9Ssthen #else /* !HAVE MREMAP */
530d3fecca9Ssthen /* use munmap-mmap to simulate mremap */
531d3fecca9Ssthen if(munmap(udb->base, udb->base_size) != 0) {
532d3fecca9Ssthen log_msg(LOG_ERR, "munmap(%s) error %s",
533d3fecca9Ssthen udb->fname, strerror(errno));
534d3fecca9Ssthen }
535d3fecca9Ssthen /* provide hint for new location */
536d3fecca9Ssthen /* note the size_t casts must be there for portability, on some
537d3fecca9Ssthen * systems the layout of memory is otherwise broken. */
538d3fecca9Ssthen nb = mmap(udb->base, (size_t)nsize, (int)PROT_READ|PROT_WRITE,
539d3fecca9Ssthen (int)MAP_SHARED, (int)udb->fd, (off_t)0);
540d3fecca9Ssthen /* retry the mmap without basept in case of ENOMEM (FreeBSD8),
541d3fecca9Ssthen * the kernel can then try to mmap it at a different location
542d3fecca9Ssthen * where more memory is available */
543d3fecca9Ssthen if(nb == MAP_FAILED && errno == ENOMEM) {
544d3fecca9Ssthen nb = mmap(NULL, (size_t)nsize, (int)PROT_READ|PROT_WRITE,
545d3fecca9Ssthen (int)MAP_SHARED, (int)udb->fd, (off_t)0);
546d3fecca9Ssthen }
547d3fecca9Ssthen if(nb == MAP_FAILED) {
548d3fecca9Ssthen log_msg(LOG_ERR, "mmap(%s, size %u) error %s",
549d3fecca9Ssthen udb->fname, (unsigned)nsize, strerror(errno));
550d3fecca9Ssthen udb->base = NULL;
551d3fecca9Ssthen return 0;
552d3fecca9Ssthen }
553d3fecca9Ssthen #endif /* HAVE MREMAP */
554d3fecca9Ssthen if(nb != udb->base) {
555d3fecca9Ssthen /* fix up realpointers in udb and alloc */
556d3fecca9Ssthen /* but mremap may have been nice and not move the base */
557d3fecca9Ssthen udb->base = nb;
558308d2509Sflorian udb->glob_data = (udb_glob_d*)((char*)nb+sizeof(uint64_t));
559d3fecca9Ssthen /* use passed alloc pointer because the udb->alloc may not
560d3fecca9Ssthen * be initialized yet */
561308d2509Sflorian alloc->disk = (udb_alloc_d*)((char*)udb->glob_data
562d3fecca9Ssthen +sizeof(*udb->glob_data));
563d3fecca9Ssthen }
564d3fecca9Ssthen udb->base_size = nsize;
565d3fecca9Ssthen return nb;
56615ed76cbSbrad #else /* HAVE_MMAP */
56715ed76cbSbrad (void)udb; (void)alloc; (void)nsize;
56815ed76cbSbrad return NULL;
56915ed76cbSbrad #endif /* HAVE_MMAP */
570d3fecca9Ssthen }
571d3fecca9Ssthen
572d3fecca9Ssthen void
udb_base_remap_process(udb_base * udb)573d3fecca9Ssthen udb_base_remap_process(udb_base* udb)
574d3fecca9Ssthen {
575d3fecca9Ssthen /* assume that fsize is still accessible */
576d3fecca9Ssthen udb_base_remap(udb, udb->alloc, udb->glob_data->fsize);
577d3fecca9Ssthen }
578d3fecca9Ssthen
579d3fecca9Ssthen /** grow file to specified size and re-mmap, return new base */
580d3fecca9Ssthen static void*
udb_base_grow_and_remap(udb_base * udb,uint64_t nsize)581d3fecca9Ssthen udb_base_grow_and_remap(udb_base* udb, uint64_t nsize)
582d3fecca9Ssthen {
583d3fecca9Ssthen /* grow file by writing a single zero at that spot, the
584d3fecca9Ssthen * rest is filled in with zeroes. */
585d3fecca9Ssthen uint8_t z = 0;
586d3fecca9Ssthen ssize_t w;
587d3fecca9Ssthen
588d3fecca9Ssthen assert(nsize > 0);
589d3fecca9Ssthen udb->glob_data->dirty_alloc = udb_dirty_fsize;
590d3fecca9Ssthen #ifdef HAVE_PWRITE
591d3fecca9Ssthen if((w=pwrite(udb->fd, &z, sizeof(z), (off_t)(nsize-1))) == -1) {
592d3fecca9Ssthen #else
593d3fecca9Ssthen if(lseek(udb->fd, (off_t)(nsize-1), SEEK_SET) == -1) {
594d3fecca9Ssthen log_msg(LOG_ERR, "fseek %s: %s", udb->fname, strerror(errno));
595d3fecca9Ssthen return 0;
596d3fecca9Ssthen }
597d3fecca9Ssthen if((w=write(udb->fd, &z, sizeof(z))) == -1) {
598d3fecca9Ssthen #endif
599d3fecca9Ssthen log_msg(LOG_ERR, "grow(%s, size %u) error %s",
600d3fecca9Ssthen udb->fname, (unsigned)nsize, strerror(errno));
601d3fecca9Ssthen return 0;
602d3fecca9Ssthen } else if(w != (ssize_t)sizeof(z)) {
603d3fecca9Ssthen log_msg(LOG_ERR, "grow(%s, size %u) failed (disk full?)",
604d3fecca9Ssthen udb->fname, (unsigned)nsize);
605d3fecca9Ssthen return 0;
606d3fecca9Ssthen }
607d3fecca9Ssthen udb->glob_data->fsize = nsize;
608d3fecca9Ssthen udb->glob_data->dirty_alloc = udb_dirty_clean;
609d3fecca9Ssthen return udb_base_remap(udb, udb->alloc, nsize);
610d3fecca9Ssthen }
611d3fecca9Ssthen
612d3fecca9Ssthen int udb_exp_size(uint64_t a)
613d3fecca9Ssthen {
614d3fecca9Ssthen /* find enclosing value such that 2**x >= a */
615d3fecca9Ssthen int x = 0;
616d3fecca9Ssthen uint64_t i = a;
617d3fecca9Ssthen assert(a != 0);
618d3fecca9Ssthen
619d3fecca9Ssthen i --;
620d3fecca9Ssthen /* could optimise this with uint8* access, depends on endianness */
621d3fecca9Ssthen /* first whole bytes */
622d3fecca9Ssthen while( (i&(~(uint64_t)0xff)) ) {
623d3fecca9Ssthen i >>= 8;
624d3fecca9Ssthen x += 8;
625d3fecca9Ssthen }
626d3fecca9Ssthen /* now details */
627d3fecca9Ssthen while(i) {
628d3fecca9Ssthen i >>= 1;
629d3fecca9Ssthen x ++;
630d3fecca9Ssthen }
631bfd0b123Sflorian assert( x>=0 && x<=63);
632d3fecca9Ssthen assert( ((uint64_t)1<<x) >= a);
633308d2509Sflorian assert( x==0 || /* <<x-1 without negative number analyzer complaints: */ (((uint64_t)1<<x)>>1) < a);
634d3fecca9Ssthen return x;
635d3fecca9Ssthen }
636d3fecca9Ssthen
637d3fecca9Ssthen int udb_exp_offset(uint64_t o)
638d3fecca9Ssthen {
639d3fecca9Ssthen /* this means measuring the number of 0 bits on the right */
640d3fecca9Ssthen /* so, if exp zero bits then (o&(2**x-1))==0 */
641d3fecca9Ssthen int x = 0;
642d3fecca9Ssthen uint64_t i = o;
643d3fecca9Ssthen assert(o != 0);
644d3fecca9Ssthen /* first whole bytes */
645d3fecca9Ssthen while( (i&(uint64_t)0xff) == 0) {
646d3fecca9Ssthen i >>= 8;
647d3fecca9Ssthen x += 8;
648d3fecca9Ssthen }
649d3fecca9Ssthen /* now details */
650d3fecca9Ssthen while( (i&(uint64_t)0x1) == 0) {
651d3fecca9Ssthen i >>= 1;
652d3fecca9Ssthen x ++;
653d3fecca9Ssthen }
654d3fecca9Ssthen assert( o % ((uint64_t)1<<x) == 0);
655d3fecca9Ssthen assert( o % ((uint64_t)1<<(x+1)) != 0);
656d3fecca9Ssthen return x;
657d3fecca9Ssthen }
658d3fecca9Ssthen
659d3fecca9Ssthen void udb_alloc_init_new(udb_alloc_d* a)
660d3fecca9Ssthen {
661d3fecca9Ssthen assert(UDB_HEADER_SIZE % UDB_ALLOC_CHUNK_MINSIZE == 0);
662d3fecca9Ssthen memset(a, 0, sizeof(*a));
663d3fecca9Ssthen /* set new allocations after header, as if allocated in a sequence
664d3fecca9Ssthen * of minsize allocations */
665d3fecca9Ssthen a->nextgrow = UDB_HEADER_SIZE;
666d3fecca9Ssthen }
667d3fecca9Ssthen
668d3fecca9Ssthen /** fsck the file size, false if failed and file is useless */
669d3fecca9Ssthen static int
670d3fecca9Ssthen fsck_fsize(udb_base* udb, udb_alloc* alloc)
671d3fecca9Ssthen {
672d3fecca9Ssthen off_t realsize;
673d3fecca9Ssthen log_msg(LOG_WARNING, "udb-fsck %s: file size wrong", udb->fname);
674d3fecca9Ssthen realsize = lseek(udb->fd, (off_t)0, SEEK_END);
675d3fecca9Ssthen if(realsize == (off_t)-1) {
676d3fecca9Ssthen log_msg(LOG_ERR, "lseek(%s): %s", udb->fname, strerror(errno));
677d3fecca9Ssthen return 0;
678d3fecca9Ssthen }
679d3fecca9Ssthen udb->glob_data->fsize = (uint64_t)realsize;
680d3fecca9Ssthen if(!udb_base_remap(udb, alloc, (uint64_t)realsize))
681d3fecca9Ssthen return 0;
682d3fecca9Ssthen udb->glob_data->dirty_alloc = udb_dirty_clean;
683d3fecca9Ssthen log_msg(LOG_WARNING, "udb-fsck %s: file size fixed (sync)", udb->fname);
684d3fecca9Ssthen udb_base_sync(udb, 1);
685d3fecca9Ssthen return 1;
686d3fecca9Ssthen }
687d3fecca9Ssthen
688d3fecca9Ssthen /** regenerate freelist add a new free chunk, return next todo */
689d3fecca9Ssthen static udb_void
690d3fecca9Ssthen regen_free(void* base, udb_void c, int exp, udb_alloc_d* regen)
691d3fecca9Ssthen {
692d3fecca9Ssthen udb_free_chunk_d* cp = UDB_FREE_CHUNK(c);
693d3fecca9Ssthen uint64_t esz = (uint64_t)1<<exp;
694d3fecca9Ssthen if(exp < UDB_ALLOC_CHUNK_MINEXP || exp > UDB_ALLOC_CHUNKS_MAX) {
695d3fecca9Ssthen return 0;
696d3fecca9Ssthen }
697d3fecca9Ssthen cp->type = udb_chunk_type_free;
698d3fecca9Ssthen cp->flags = 0;
699d3fecca9Ssthen chunk_set_last(base, c, exp, (uint8_t)exp);
700d3fecca9Ssthen cp->prev = 0;
701d3fecca9Ssthen cp->next = regen->free[exp-UDB_ALLOC_CHUNK_MINEXP];
702d3fecca9Ssthen if(cp->next)
703d3fecca9Ssthen UDB_FREE_CHUNK(cp->next)->prev = c;
704d3fecca9Ssthen regen->stat_free += esz;
705d3fecca9Ssthen return c + esz;
706d3fecca9Ssthen }
707d3fecca9Ssthen
708d3fecca9Ssthen /** regenerate xl chunk, return next todo */
709d3fecca9Ssthen static udb_void
710d3fecca9Ssthen regen_xl(void* base, udb_void c, udb_alloc_d* regen)
711d3fecca9Ssthen {
712d3fecca9Ssthen udb_xl_chunk_d* cp = UDB_XL_CHUNK(c);
713d3fecca9Ssthen uint64_t xlsz = cp->size;
714d3fecca9Ssthen if( (xlsz&(UDB_ALLOC_CHUNK_SIZE-1)) != 0) {
715d3fecca9Ssthen return 0;
716d3fecca9Ssthen }
717d3fecca9Ssthen if( (c&(UDB_ALLOC_CHUNK_SIZE-1)) != 0) {
718d3fecca9Ssthen return 0;
719d3fecca9Ssthen }
720d3fecca9Ssthen /* fixup end-size and end-expmarker */
721d3fecca9Ssthen regen->stat_alloc += xlsz;
722d3fecca9Ssthen return c + xlsz;
723d3fecca9Ssthen }
724d3fecca9Ssthen
725d3fecca9Ssthen /** regenerate data chunk, return next todo */
726d3fecca9Ssthen static udb_void
727d3fecca9Ssthen regen_data(void* base, udb_void c, int exp, udb_alloc_d* regen)
728d3fecca9Ssthen {
729d3fecca9Ssthen uint64_t esz = (uint64_t)1<<exp;
730d3fecca9Ssthen if(exp < UDB_ALLOC_CHUNK_MINEXP || exp > UDB_ALLOC_CHUNKS_MAX) {
731d3fecca9Ssthen return 0;
732d3fecca9Ssthen }
733d3fecca9Ssthen chunk_set_last(base, c, exp, (uint8_t)exp);
734d3fecca9Ssthen regen->stat_alloc += esz;
735d3fecca9Ssthen return c + esz;
736d3fecca9Ssthen }
737d3fecca9Ssthen
738d3fecca9Ssthen /** regenerate a relptr structure inside a data segment */
739d3fecca9Ssthen static void
740d3fecca9Ssthen regen_relptr_func(void* base, udb_rel_ptr* rp, void* arg)
741d3fecca9Ssthen {
742d3fecca9Ssthen udb_void* a = (udb_void*)arg;
743d3fecca9Ssthen /* ignore 0 pointers */
744d3fecca9Ssthen if(!rp->data)
745d3fecca9Ssthen return;
746d3fecca9Ssthen
747d3fecca9Ssthen /* edit relptrs that point to oldmoved to point to newmoved. */
748d3fecca9Ssthen if(rp->data == a[0])
749d3fecca9Ssthen rp->data = a[1];
750d3fecca9Ssthen
751d3fecca9Ssthen /* regenerate relptr lists, add this item to the relptr list for
752d3fecca9Ssthen * the data that it points to */
753d3fecca9Ssthen udb_rel_ptr_link(base, rp, rp->data);
754d3fecca9Ssthen }
755d3fecca9Ssthen
756d3fecca9Ssthen /** regenerate the relptrs store in this data segment */
757d3fecca9Ssthen static void
758d3fecca9Ssthen regen_its_ptrs(void* base, udb_base* udb, udb_chunk_d* atp,
759d3fecca9Ssthen void* data, uint64_t dsz, udb_void rb_old, udb_void rb_new)
760d3fecca9Ssthen {
761d3fecca9Ssthen udb_void arg[2];
762d3fecca9Ssthen arg[0] = rb_old; arg[1] = rb_new;
763d3fecca9Ssthen /* walk through the structs here and put them on their respective
764d3fecca9Ssthen * relptr lists */
765d3fecca9Ssthen (*udb->walkfunc)(base, udb->walkarg, atp->type, data, dsz,
766d3fecca9Ssthen ®en_relptr_func, arg);
767d3fecca9Ssthen
768d3fecca9Ssthen }
769d3fecca9Ssthen
770d3fecca9Ssthen /** regenerate relptrlists in the file */
771d3fecca9Ssthen static void
772d3fecca9Ssthen regen_ptrlist(void* base, udb_base* udb, udb_alloc* alloc,
773d3fecca9Ssthen udb_void rb_old, udb_void rb_new)
774d3fecca9Ssthen {
775d3fecca9Ssthen udb_void at = alloc->udb->glob_data->hsize;
776d3fecca9Ssthen /* clear all ptrlist start pointers in the file. */
777d3fecca9Ssthen while(at < alloc->disk->nextgrow) {
778d3fecca9Ssthen int exp = (int)UDB_CHUNK(at)->exp;
779d3fecca9Ssthen udb_chunk_type tp = (udb_chunk_type)UDB_CHUNK(at)->type;
780d3fecca9Ssthen if(exp == UDB_EXP_XL) {
781d3fecca9Ssthen UDB_XL_CHUNK(at)->ptrlist = 0;
782d3fecca9Ssthen at += UDB_XL_CHUNK(at)->size;
783d3fecca9Ssthen } else if(tp == udb_chunk_type_free) {
784d3fecca9Ssthen at += (uint64_t)1<<exp;
785d3fecca9Ssthen } else { /* data chunk */
786d3fecca9Ssthen UDB_CHUNK(at)->ptrlist = 0;
787d3fecca9Ssthen at += (uint64_t)1<<exp;
788d3fecca9Ssthen }
789d3fecca9Ssthen }
790d3fecca9Ssthen /* walk through all relptr structs and put on the right list. */
791d3fecca9Ssthen at = alloc->udb->glob_data->hsize;
792d3fecca9Ssthen while(at < alloc->disk->nextgrow) {
793d3fecca9Ssthen udb_chunk_d* atp = UDB_CHUNK(at);
794d3fecca9Ssthen int exp = (int)atp->exp;
795d3fecca9Ssthen udb_chunk_type tp = (udb_chunk_type)atp->type;
796d3fecca9Ssthen uint64_t sz = ((exp == UDB_EXP_XL)?UDB_XL_CHUNK(at)->size:
797d3fecca9Ssthen (uint64_t)1<<exp);
798d3fecca9Ssthen if(exp == UDB_EXP_XL) {
799d3fecca9Ssthen assert(at != rb_old); /* should have been freed */
800d3fecca9Ssthen regen_its_ptrs(base, udb, atp,
801308d2509Sflorian ((char*)atp)+sizeof(udb_xl_chunk_d),
802d3fecca9Ssthen sz-sizeof(udb_xl_chunk_d) - sizeof(uint64_t)*2,
803d3fecca9Ssthen rb_old, rb_new);
804d3fecca9Ssthen at += sz;
805d3fecca9Ssthen } else if(tp == udb_chunk_type_free) {
806d3fecca9Ssthen at += sz;
807d3fecca9Ssthen } else { /* data chunk */
808d3fecca9Ssthen assert(at != rb_old); /* should have been freed */
809d3fecca9Ssthen regen_its_ptrs(base, udb, atp,
810308d2509Sflorian ((char*)atp)+sizeof(udb_chunk_d),
811d3fecca9Ssthen sz-sizeof(udb_chunk_d)-1, rb_old, rb_new);
812d3fecca9Ssthen at += sz;
813d3fecca9Ssthen }
814d3fecca9Ssthen }
815d3fecca9Ssthen }
816d3fecca9Ssthen
817d3fecca9Ssthen
818d3fecca9Ssthen /** mark free elements from ex XL chunk space and later fixups pick that up */
819d3fecca9Ssthen static void
820d3fecca9Ssthen rb_mark_free_segs(void* base, udb_void s, uint64_t m)
821d3fecca9Ssthen {
822d3fecca9Ssthen udb_void q = s + m - UDB_ALLOC_CHUNK_SIZE;
823d3fecca9Ssthen /* because of header and alignment we know s >= UDB_ALLOC_CHUNK_SIZE*/
824d3fecca9Ssthen assert(s >= UDB_ALLOC_CHUNK_SIZE);
825d3fecca9Ssthen while(q >= s) {
826d3fecca9Ssthen UDB_CHUNK(q)->exp = UDB_ALLOC_CHUNKS_MAX;
827d3fecca9Ssthen UDB_CHUNK(q)->type = udb_chunk_type_free;
828d3fecca9Ssthen q -= UDB_ALLOC_CHUNK_SIZE;
829d3fecca9Ssthen }
830d3fecca9Ssthen }
831d3fecca9Ssthen
832d3fecca9Ssthen
833d3fecca9Ssthen /** fsck rollback or rollforward XL move results */
834d3fecca9Ssthen static int
835d3fecca9Ssthen fsck_rb_xl(void* base, udb_base* udb, udb_void rb_old, udb_void rb_new,
836d3fecca9Ssthen uint64_t rb_size, uint64_t rb_seg)
837d3fecca9Ssthen {
838d3fecca9Ssthen
839d3fecca9Ssthen if(rb_old <= rb_new)
840d3fecca9Ssthen return 0; /* XL move one way */
841d3fecca9Ssthen if( (rb_size&(UDB_ALLOC_CHUNK_SIZE-1)) != 0)
842d3fecca9Ssthen return 0; /* not aligned */
843d3fecca9Ssthen if( (rb_old&(UDB_ALLOC_CHUNK_SIZE-1)) != 0)
844d3fecca9Ssthen return 0; /* not aligned */
845d3fecca9Ssthen if( (rb_new&(UDB_ALLOC_CHUNK_SIZE-1)) != 0)
846d3fecca9Ssthen return 0; /* not aligned */
847d3fecca9Ssthen if(rb_new + rb_size <= rb_old) {
848d3fecca9Ssthen /* not overlapping: resume copy */
849d3fecca9Ssthen memcpy(UDB_CHUNK(rb_new), UDB_CHUNK(rb_old), rb_size);
850d3fecca9Ssthen /* and free up old piece(s) */
851d3fecca9Ssthen rb_mark_free_segs(base, rb_old, rb_size);
852d3fecca9Ssthen } else {
853d3fecca9Ssthen /* overlapping, see what segment we stopped at
854d3fecca9Ssthen * and continue there. */
855d3fecca9Ssthen move_xl_segment(base, udb, rb_old, rb_new, rb_size, rb_seg);
856d3fecca9Ssthen /* free up old piece(s); from the end of the moved segment,
857d3fecca9Ssthen * until the end of the old segment */
858d3fecca9Ssthen rb_mark_free_segs(base, rb_new+rb_size, (rb_old+rb_size)-
859d3fecca9Ssthen (rb_new+rb_size));
860d3fecca9Ssthen }
861d3fecca9Ssthen /* do not call fix_ptrs, regenptrs does the job */
862d3fecca9Ssthen return 1;
863d3fecca9Ssthen }
864d3fecca9Ssthen
865d3fecca9Ssthen /** fsck rollback or rollforward move results */
866d3fecca9Ssthen static int
867d3fecca9Ssthen fsck_rb(void* base, udb_void rb_old, udb_void rb_new, uint64_t rb_size,
868d3fecca9Ssthen udb_void* make_free)
869d3fecca9Ssthen {
870d3fecca9Ssthen if( (rb_size&(rb_size-1)) != 0)
871d3fecca9Ssthen return 0; /* not powerof2 */
872d3fecca9Ssthen if( (rb_old&(rb_size-1)) != 0)
873d3fecca9Ssthen return 0; /* not aligned */
874d3fecca9Ssthen if( (rb_new&(rb_size-1)) != 0)
875d3fecca9Ssthen return 0; /* not aligned */
876d3fecca9Ssthen /* resume copy */
877d3fecca9Ssthen memcpy(UDB_CHUNK(rb_new), UDB_CHUNK(rb_old), rb_size);
878d3fecca9Ssthen /* do not call fix_ptrs, regenptrs does the job */
879d3fecca9Ssthen /* make sure udb_old is freed */
880d3fecca9Ssthen *make_free = rb_old;
881d3fecca9Ssthen return 1;
882d3fecca9Ssthen }
883d3fecca9Ssthen
884d3fecca9Ssthen /** fsck the file and salvage, false if failed and file is useless */
885d3fecca9Ssthen static int
886d3fecca9Ssthen fsck_file(udb_base* udb, udb_alloc* alloc, int moved)
887d3fecca9Ssthen {
888d3fecca9Ssthen void* base = udb->base;
889d3fecca9Ssthen udb_alloc_d regen;
890d3fecca9Ssthen udb_void at = udb->glob_data->hsize;
891d3fecca9Ssthen udb_void rb_old = udb->glob_data->rb_old;
892d3fecca9Ssthen udb_void rb_new = udb->glob_data->rb_new;
893d3fecca9Ssthen udb_void rb_seg = udb->glob_data->rb_seg;
894d3fecca9Ssthen udb_void make_free = 0;
895d3fecca9Ssthen uint64_t rb_size = udb->glob_data->rb_size;
896d3fecca9Ssthen log_msg(LOG_WARNING, "udb-fsck %s: salvaging", udb->fname);
897d3fecca9Ssthen /* walk through the file, use the exp values to see what can be
898d3fecca9Ssthen * salvaged */
899d3fecca9Ssthen if(moved && rb_old && rb_new && rb_size) {
900d3fecca9Ssthen if(rb_old+rb_size <= alloc->disk->nextgrow
901d3fecca9Ssthen && rb_new+rb_size <= alloc->disk->nextgrow) {
902d3fecca9Ssthen /* we can use the move information to fix up the
903d3fecca9Ssthen * duplicate element (or partially moved element) */
904d3fecca9Ssthen if(rb_size > 1024*1024) {
905d3fecca9Ssthen /* XL chunk */
906d3fecca9Ssthen if(!fsck_rb_xl(base, udb, rb_old, rb_new,
907d3fecca9Ssthen rb_size, rb_seg))
908d3fecca9Ssthen return 0;
909d3fecca9Ssthen } else {
910d3fecca9Ssthen if(!fsck_rb(base, rb_old, rb_new, rb_size,
911d3fecca9Ssthen &make_free))
912d3fecca9Ssthen return 0;
913d3fecca9Ssthen }
914d3fecca9Ssthen }
915d3fecca9Ssthen }
916d3fecca9Ssthen
917d3fecca9Ssthen /* rebuild freelists */
918d3fecca9Ssthen /* recalculate stats in alloc (except 'stat_data') */
919d3fecca9Ssthen /* possibly new end 'nextgrow' value */
920d3fecca9Ssthen memset(®en, 0, sizeof(regen));
921d3fecca9Ssthen regen.nextgrow = alloc->disk->nextgrow;
922d3fecca9Ssthen while(at < regen.nextgrow) {
923d3fecca9Ssthen /* figure out this chunk */
924d3fecca9Ssthen int exp = (int)UDB_CHUNK(at)->exp;
925d3fecca9Ssthen udb_chunk_type tp = (udb_chunk_type)UDB_CHUNK(at)->type;
926d3fecca9Ssthen /* consistency check possible here with end-exp */
927d3fecca9Ssthen if(tp == udb_chunk_type_free || at == make_free) {
928d3fecca9Ssthen at = regen_free(base, at, exp, ®en);
929d3fecca9Ssthen if(!at) return 0;
930d3fecca9Ssthen } else if(exp == UDB_EXP_XL) {
931d3fecca9Ssthen /* allocated data of XL size */
932d3fecca9Ssthen at = regen_xl(base, at, ®en);
933d3fecca9Ssthen if(!at) return 0;
934d3fecca9Ssthen } else if(exp >= UDB_ALLOC_CHUNK_MINEXP
935d3fecca9Ssthen && exp <= UDB_ALLOC_CHUNKS_MAX) {
936d3fecca9Ssthen /* allocated data */
937d3fecca9Ssthen at = regen_data(base, at, exp, ®en);
938d3fecca9Ssthen if(!at) return 0;
939d3fecca9Ssthen } else {
940d3fecca9Ssthen /* garbage; this must be EOF then */
941d3fecca9Ssthen regen.nextgrow = at;
942d3fecca9Ssthen break;
943d3fecca9Ssthen }
944d3fecca9Ssthen }
945d3fecca9Ssthen *alloc->disk = regen;
946d3fecca9Ssthen
947d3fecca9Ssthen /* rebuild relptr lists */
948d3fecca9Ssthen regen_ptrlist(base, udb, alloc, rb_old, rb_new);
949d3fecca9Ssthen
950d3fecca9Ssthen log_msg(LOG_WARNING, "udb-fsck %s: salvaged successfully (sync)",
951d3fecca9Ssthen udb->fname);
952d3fecca9Ssthen udb->glob_data->rb_old = 0;
953d3fecca9Ssthen udb->glob_data->rb_new = 0;
954d3fecca9Ssthen udb->glob_data->rb_size = 0;
955d3fecca9Ssthen udb->glob_data->dirty_alloc = udb_dirty_clean;
956d3fecca9Ssthen udb_base_sync(udb, 1);
957d3fecca9Ssthen return 1;
958d3fecca9Ssthen }
959d3fecca9Ssthen
960d3fecca9Ssthen
961d3fecca9Ssthen udb_alloc* udb_alloc_create(udb_base* udb, udb_alloc_d* disk)
962d3fecca9Ssthen {
963d3fecca9Ssthen udb_alloc* alloc = (udb_alloc*)xalloc_zero(sizeof(*alloc));
964d3fecca9Ssthen if(!alloc)
965d3fecca9Ssthen return NULL;
966d3fecca9Ssthen alloc->udb = udb;
967d3fecca9Ssthen alloc->disk = disk;
968d3fecca9Ssthen /* see if committed but uncompleted actions need to be done */
969d3fecca9Ssthen /* preserves the alloc state */
970d3fecca9Ssthen if(udb->glob_data->dirty_alloc != udb_dirty_clean) {
971d3fecca9Ssthen if(udb->glob_data->dirty_alloc == udb_dirty_fsize) {
972d3fecca9Ssthen if(fsck_fsize(udb, alloc))
973d3fecca9Ssthen return alloc;
974d3fecca9Ssthen } else if(udb->glob_data->dirty_alloc == udb_dirty_fl) {
975d3fecca9Ssthen if(fsck_file(udb, alloc, 0))
976d3fecca9Ssthen return alloc;
977d3fecca9Ssthen } else if(udb->glob_data->dirty_alloc == udb_dirty_compact) {
978d3fecca9Ssthen if(fsck_file(udb, alloc, 1))
979d3fecca9Ssthen return alloc;
980d3fecca9Ssthen }
981d3fecca9Ssthen log_msg(LOG_ERR, "error: file allocation dirty (%d)",
982d3fecca9Ssthen (int)udb->glob_data->dirty_alloc);
983d3fecca9Ssthen free(alloc);
984d3fecca9Ssthen return NULL;
985d3fecca9Ssthen }
986d3fecca9Ssthen return alloc;
987d3fecca9Ssthen }
988d3fecca9Ssthen
989d3fecca9Ssthen void udb_alloc_delete(udb_alloc* alloc)
990d3fecca9Ssthen {
991d3fecca9Ssthen if(!alloc) return;
992d3fecca9Ssthen free(alloc);
993d3fecca9Ssthen }
994d3fecca9Ssthen
995d3fecca9Ssthen /** unlink this element from its freelist */
996d3fecca9Ssthen static void
997d3fecca9Ssthen udb_alloc_unlink_fl(void* base, udb_alloc* alloc, udb_void chunk, int exp)
998d3fecca9Ssthen {
999d3fecca9Ssthen udb_free_chunk_d* fp = UDB_FREE_CHUNK(chunk);
1000d3fecca9Ssthen assert(chunk);
1001d3fecca9Ssthen /* chunk is a free chunk */
1002d3fecca9Ssthen assert(fp->exp == (uint8_t)exp);
1003d3fecca9Ssthen assert(fp->type == udb_chunk_type_free);
1004d3fecca9Ssthen assert(chunk_get_last(base, chunk, exp) == (uint8_t)exp);
1005d3fecca9Ssthen /* and thus freelist not empty */
1006d3fecca9Ssthen assert(alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP]);
1007d3fecca9Ssthen /* unlink */
1008d3fecca9Ssthen if(fp->prev)
1009d3fecca9Ssthen UDB_FREE_CHUNK(fp->prev)->next = fp->next;
1010d3fecca9Ssthen else alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = fp->next;
1011d3fecca9Ssthen if(fp->next)
1012d3fecca9Ssthen UDB_FREE_CHUNK(fp->next)->prev = fp->prev;
1013d3fecca9Ssthen }
1014d3fecca9Ssthen
1015d3fecca9Ssthen /** pop first element off freelist, list may not be empty */
1016d3fecca9Ssthen static udb_void
1017d3fecca9Ssthen udb_alloc_pop_fl(void* base, udb_alloc* alloc, int exp)
1018d3fecca9Ssthen {
1019d3fecca9Ssthen udb_void f = alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP];
1020d3fecca9Ssthen udb_free_chunk_d* fp = UDB_FREE_CHUNK(f);
1021d3fecca9Ssthen assert(f);
1022d3fecca9Ssthen assert(fp->exp == (uint8_t)exp);
1023d3fecca9Ssthen assert(fp->type == udb_chunk_type_free);
1024d3fecca9Ssthen assert(chunk_get_last(base, f, exp) == (uint8_t)exp);
1025d3fecca9Ssthen alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = fp->next;
1026d3fecca9Ssthen if(fp->next) {
1027d3fecca9Ssthen UDB_FREE_CHUNK(fp->next)->prev = 0;
1028d3fecca9Ssthen }
1029d3fecca9Ssthen return f;
1030d3fecca9Ssthen }
1031d3fecca9Ssthen
1032d3fecca9Ssthen /** push new element onto freelist */
1033d3fecca9Ssthen static void
1034d3fecca9Ssthen udb_alloc_push_fl(void* base, udb_alloc* alloc, udb_void f, int exp)
1035d3fecca9Ssthen {
1036d3fecca9Ssthen udb_free_chunk_d* fp = UDB_FREE_CHUNK(f);
1037d3fecca9Ssthen assert(f);
1038d3fecca9Ssthen fp->exp = (uint8_t)exp;
1039d3fecca9Ssthen fp->type = udb_chunk_type_free;
1040d3fecca9Ssthen fp->flags = 0;
1041d3fecca9Ssthen fp->prev = 0;
1042d3fecca9Ssthen fp->next = alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP];
1043d3fecca9Ssthen if(fp->next)
1044d3fecca9Ssthen UDB_FREE_CHUNK(fp->next)->prev = f;
1045d3fecca9Ssthen chunk_set_last(base, f, exp, (uint8_t)exp);
1046d3fecca9Ssthen alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = f;
1047d3fecca9Ssthen }
1048d3fecca9Ssthen
1049d3fecca9Ssthen /** push new element onto freelist - do not initialize the elt */
1050d3fecca9Ssthen static void
1051d3fecca9Ssthen udb_alloc_push_fl_noinit(void* base, udb_alloc* alloc, udb_void f, int exp)
1052d3fecca9Ssthen {
1053d3fecca9Ssthen udb_free_chunk_d* fp = UDB_FREE_CHUNK(f);
1054d3fecca9Ssthen assert(f);
1055d3fecca9Ssthen assert(fp->exp == (uint8_t)exp);
1056d3fecca9Ssthen assert(fp->type == udb_chunk_type_free);
1057d3fecca9Ssthen assert(chunk_get_last(base, f, exp) == (uint8_t)exp);
1058d3fecca9Ssthen fp->prev = 0;
1059d3fecca9Ssthen fp->next = alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP];
1060d3fecca9Ssthen if(fp->next)
1061d3fecca9Ssthen UDB_FREE_CHUNK(fp->next)->prev = f;
1062d3fecca9Ssthen alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = f;
1063d3fecca9Ssthen }
1064d3fecca9Ssthen
1065d3fecca9Ssthen /** add free chunks at end until specified alignment occurs */
1066d3fecca9Ssthen static void
1067d3fecca9Ssthen grow_align(void* base, udb_alloc* alloc, uint64_t esz)
1068d3fecca9Ssthen {
1069d3fecca9Ssthen while( (alloc->disk->nextgrow & (esz-1)) != 0) {
1070d3fecca9Ssthen /* the nextgrow is not a whole multiple of esz. */
1071d3fecca9Ssthen /* grow a free chunk of max allowed size */
1072d3fecca9Ssthen int fexp = udb_exp_offset(alloc->disk->nextgrow);
1073d3fecca9Ssthen uint64_t fsz = (uint64_t)1<<fexp;
1074d3fecca9Ssthen udb_void f = alloc->disk->nextgrow;
1075d3fecca9Ssthen udb_void fn = alloc->disk->nextgrow+fsz;
1076d3fecca9Ssthen assert(fn <= alloc->udb->base_size);
1077d3fecca9Ssthen alloc->disk->stat_free += fsz;
1078d3fecca9Ssthen udb_alloc_push_fl(base, alloc, f, fexp);
1079d3fecca9Ssthen /* now increase nextgrow to commit that free chunk */
1080d3fecca9Ssthen alloc->disk->nextgrow = fn;
1081d3fecca9Ssthen }
1082d3fecca9Ssthen }
1083d3fecca9Ssthen
1084d3fecca9Ssthen /** append chunks at end of memory space to get size exp, return dataptr */
1085d3fecca9Ssthen static udb_void
1086d3fecca9Ssthen grow_chunks(void* base, udb_alloc* alloc, size_t sz, int exp)
1087d3fecca9Ssthen {
1088d3fecca9Ssthen uint64_t esz = (uint64_t)1<<exp;
1089d3fecca9Ssthen udb_void ret;
1090d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1091d3fecca9Ssthen grow_align(base, alloc, esz);
1092d3fecca9Ssthen /* free chunks are grown, grow the one we want to use */
1093d3fecca9Ssthen ret = alloc->disk->nextgrow;
1094d3fecca9Ssthen /* take a new alloced chunk into use */
1095d3fecca9Ssthen UDB_CHUNK(ret)->exp = (uint8_t)exp;
1096d3fecca9Ssthen UDB_CHUNK(ret)->flags = 0;
1097d3fecca9Ssthen UDB_CHUNK(ret)->ptrlist = 0;
1098d3fecca9Ssthen UDB_CHUNK(ret)->type = udb_chunk_type_data;
1099d3fecca9Ssthen /* store last octet */
1100d3fecca9Ssthen chunk_set_last(base, ret, exp, (uint8_t)exp);
1101d3fecca9Ssthen /* update stats */
1102d3fecca9Ssthen alloc->disk->stat_alloc += esz;
1103d3fecca9Ssthen alloc->disk->stat_data += sz;
1104d3fecca9Ssthen /* now increase nextgrow to commit this newly allocated chunk */
1105d3fecca9Ssthen alloc->disk->nextgrow += esz;
1106d3fecca9Ssthen assert(alloc->disk->nextgrow <= alloc->udb->base_size);
1107d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1108d3fecca9Ssthen return ret + sizeof(udb_chunk_d); /* ptr to data */
1109d3fecca9Ssthen }
1110d3fecca9Ssthen
1111d3fecca9Ssthen /** calculate how much space is necessary to grow for this exp */
1112d3fecca9Ssthen static uint64_t
1113d3fecca9Ssthen grow_end_calc(udb_alloc* alloc, int exp)
1114d3fecca9Ssthen {
1115d3fecca9Ssthen uint64_t sz = (uint64_t)1<<exp;
1116d3fecca9Ssthen uint64_t ng = alloc->disk->nextgrow;
1117d3fecca9Ssthen uint64_t res;
1118d3fecca9Ssthen /* if nextgrow is 2**expness, no extra growth needed, only size */
1119d3fecca9Ssthen if( (ng & (sz-1)) == 0) {
1120d3fecca9Ssthen /* sz-1 is like 0xfff, and checks if ng is whole 2**exp */
1121d3fecca9Ssthen return ng+sz; /* must grow exactly 2**exp */
1122d3fecca9Ssthen }
1123d3fecca9Ssthen /* grow until 2**expness and then we need 2**exp as well */
1124d3fecca9Ssthen /* so, round ng down to whole sz (basically ng-ng%sz, or ng/sz*sz)
1125d3fecca9Ssthen * and then add the sz twice (go up to whole sz, and to allocate) */
1126d3fecca9Ssthen res = (ng & ~(sz-1)) + 2*sz;
1127d3fecca9Ssthen return res;
1128d3fecca9Ssthen }
1129d3fecca9Ssthen
1130d3fecca9Ssthen /** see if we need to grow more than specified to enable sustained growth */
1131d3fecca9Ssthen static uint64_t
1132d3fecca9Ssthen grow_extra_check(udb_alloc* alloc, uint64_t ge)
1133d3fecca9Ssthen {
1134d3fecca9Ssthen const uint64_t mb = 1024*1024;
1135d3fecca9Ssthen uint64_t bsz = alloc->udb->base_size;
1136d3fecca9Ssthen if(bsz <= mb) {
1137d3fecca9Ssthen /* below 1 Mb, double sizes for exponential growth */
1138d3fecca9Ssthen /* takes about 15 times to grow to 1Mb */
1139d3fecca9Ssthen if(ge < bsz*2)
1140d3fecca9Ssthen return bsz*2;
1141d3fecca9Ssthen } else {
1142d3fecca9Ssthen uint64_t gnow = ge - bsz;
1143d3fecca9Ssthen /* above 1Mb, grow at least 1 Mb, or 12.5% of current size,
1144d3fecca9Ssthen * in whole megabytes rounded up. */
1145d3fecca9Ssthen uint64_t want = ((bsz / 8) & ~(mb-1)) + mb;
1146d3fecca9Ssthen if(gnow < want)
1147d3fecca9Ssthen return bsz + want;
1148d3fecca9Ssthen }
1149d3fecca9Ssthen return ge;
1150d3fecca9Ssthen }
1151d3fecca9Ssthen
1152bc6311d7Sflorian /** see if free space is enough to warrant shrink (while file is open) */
1153d3fecca9Ssthen static int
1154d3fecca9Ssthen enough_free(udb_alloc* alloc)
1155d3fecca9Ssthen {
1156d3fecca9Ssthen if(alloc->udb->base_size <= 2*1024*1024) {
1157d3fecca9Ssthen /* below 1 Mb, grown by double size, (so up to 2 mb),
1158d3fecca9Ssthen * do not shrink unless we can 1/3 in size */
1159d3fecca9Ssthen if(((size_t)alloc->disk->nextgrow)*3 <= alloc->udb->base_size)
1160d3fecca9Ssthen return 1;
1161d3fecca9Ssthen } else {
1162d3fecca9Ssthen /* grown 12.5%, shrink 25% if possible, at least one mb */
1163d3fecca9Ssthen /* between 1mb and 4mb size, it shrinks by 1mb if possible */
1164d3fecca9Ssthen uint64_t space = alloc->udb->base_size - alloc->disk->nextgrow;
1165d3fecca9Ssthen if(space >= 1024*1024 && (space*4 >= alloc->udb->base_size
1166d3fecca9Ssthen || alloc->udb->base_size < 4*1024*1024))
1167d3fecca9Ssthen return 1;
1168d3fecca9Ssthen }
1169d3fecca9Ssthen return 0;
1170d3fecca9Ssthen }
1171d3fecca9Ssthen
1172d3fecca9Ssthen /** grow space for a chunk of 2**exp and return dataptr */
1173d3fecca9Ssthen static udb_void
1174d3fecca9Ssthen udb_alloc_grow_space(void* base, udb_alloc* alloc, size_t sz, int exp)
1175d3fecca9Ssthen {
1176d3fecca9Ssthen /* commit the grow action
1177d3fecca9Ssthen * - the file grow only changes filesize, but not the nextgrow.
1178d3fecca9Ssthen * - taking space after nextgrow into use (as free space),
1179d3fecca9Ssthen * is like free-ing a chunk (one at a time).
1180d3fecca9Ssthen * - and the last chunk taken into use is like alloc.
1181d3fecca9Ssthen */
1182d3fecca9Ssthen /* predict how much free space is needed for this */
1183d3fecca9Ssthen uint64_t grow_end = grow_end_calc(alloc, exp);
1184d3fecca9Ssthen assert(alloc->udb->base_size >= alloc->disk->nextgrow);
1185d3fecca9Ssthen if(grow_end <= alloc->udb->base_size) {
1186d3fecca9Ssthen /* we can do this with the available space */
1187d3fecca9Ssthen return grow_chunks(base, alloc, sz, exp);
1188d3fecca9Ssthen }
1189d3fecca9Ssthen /* we have to grow the file, re-mmap */
1190d3fecca9Ssthen /* see if we need to grow a little more, to avoid endless grow
1191d3fecca9Ssthen * efforts on adding data */
1192d3fecca9Ssthen grow_end = grow_extra_check(alloc, grow_end);
1193d3fecca9Ssthen if(!(base=udb_base_grow_and_remap(alloc->udb, grow_end))) {
1194d3fecca9Ssthen return 0; /* mmap or write failed (disk or mem full) */
1195d3fecca9Ssthen }
1196d3fecca9Ssthen /* we have enough space now */
1197d3fecca9Ssthen assert(grow_end <= alloc->udb->base_size);
1198d3fecca9Ssthen assert(alloc->udb->glob_data->fsize == alloc->udb->base_size);
1199d3fecca9Ssthen return grow_chunks(base, alloc, sz, exp);
1200d3fecca9Ssthen }
1201d3fecca9Ssthen
1202d3fecca9Ssthen /** take XL allocation into use at end of file, return dataptr */
1203d3fecca9Ssthen static udb_void
1204d3fecca9Ssthen grow_xl(void* base, udb_alloc* alloc, uint64_t xlsz, uint64_t sz)
1205d3fecca9Ssthen {
1206d3fecca9Ssthen udb_void ret;
1207d3fecca9Ssthen udb_xl_chunk_d* p;
1208d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1209d3fecca9Ssthen
1210d3fecca9Ssthen /* align growth to whole mbs */
1211d3fecca9Ssthen grow_align(base, alloc, UDB_ALLOC_CHUNK_SIZE);
1212d3fecca9Ssthen
1213d3fecca9Ssthen /* grow XL segment */
1214d3fecca9Ssthen ret = alloc->disk->nextgrow;
1215d3fecca9Ssthen p = UDB_XL_CHUNK(ret);
1216d3fecca9Ssthen p->exp = UDB_EXP_XL;
1217d3fecca9Ssthen p->size = xlsz;
1218d3fecca9Ssthen p->flags = 0;
1219d3fecca9Ssthen p->ptrlist = 0;
1220d3fecca9Ssthen p->type = udb_chunk_type_data;
1221d3fecca9Ssthen
1222d3fecca9Ssthen /* also put size and marker at end for compaction */
1223d3fecca9Ssthen *((uint64_t*)(UDB_REL(base, ret+xlsz-sizeof(uint64_t)*2))) = xlsz;
1224d3fecca9Ssthen *((uint8_t*)(UDB_REL(base, ret+xlsz-1))) = UDB_EXP_XL;
1225d3fecca9Ssthen
1226d3fecca9Ssthen /* stats */
1227d3fecca9Ssthen alloc->disk->stat_data += sz;
1228d3fecca9Ssthen alloc->disk->stat_alloc += xlsz;
1229d3fecca9Ssthen /* now increase the nextgrow to commit this xl chunk */
1230d3fecca9Ssthen alloc->disk->nextgrow += xlsz;
1231d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1232d3fecca9Ssthen return ret + sizeof(udb_xl_chunk_d); /* data ptr */
1233d3fecca9Ssthen }
1234d3fecca9Ssthen
1235d3fecca9Ssthen /** make space for XL allocation */
1236d3fecca9Ssthen static udb_void
1237d3fecca9Ssthen udb_alloc_xl_space(void* base, udb_alloc* alloc, size_t sz)
1238d3fecca9Ssthen {
1239d3fecca9Ssthen /* allocate whole mbs of space, at end of space */
1240d3fecca9Ssthen uint64_t asz = sz + sizeof(udb_xl_chunk_d) + sizeof(uint64_t)*2;
1241d3fecca9Ssthen uint64_t need=(asz+UDB_ALLOC_CHUNK_SIZE-1)&(~(UDB_ALLOC_CHUNK_SIZE-1));
1242d3fecca9Ssthen uint64_t grow_end = grow_end_calc(alloc, UDB_ALLOC_CHUNKS_MAX) + need;
1243d3fecca9Ssthen assert(need >= asz);
1244d3fecca9Ssthen if(grow_end <= alloc->udb->base_size) {
1245d3fecca9Ssthen /* can do this in available space */
1246d3fecca9Ssthen return grow_xl(base, alloc, need, sz);
1247d3fecca9Ssthen }
1248d3fecca9Ssthen /* have to grow file and re-mmap */
1249d3fecca9Ssthen grow_end = grow_extra_check(alloc, grow_end);
1250d3fecca9Ssthen if(!(base=udb_base_grow_and_remap(alloc->udb, grow_end))) {
1251d3fecca9Ssthen return 0; /* mmap or write failed (disk or mem full) */
1252d3fecca9Ssthen }
1253d3fecca9Ssthen /* we have enough space now */
1254d3fecca9Ssthen assert(grow_end <= alloc->udb->base_size);
1255d3fecca9Ssthen assert(alloc->udb->glob_data->fsize == alloc->udb->base_size);
1256d3fecca9Ssthen return grow_xl(base, alloc, need, sz);
1257d3fecca9Ssthen }
1258d3fecca9Ssthen
1259d3fecca9Ssthen /** divide big(2**e2) into pieces so 2**exp fits */
1260d3fecca9Ssthen static udb_void
1261d3fecca9Ssthen udb_alloc_subdivide(void* base, udb_alloc* alloc, udb_void big, int e2,
1262d3fecca9Ssthen int exp)
1263d3fecca9Ssthen {
1264d3fecca9Ssthen int e = e2;
1265d3fecca9Ssthen uint64_t sz = (uint64_t)1<<e2;
1266d3fecca9Ssthen assert(big && e2 > exp);
1267d3fecca9Ssthen /* so the returned piece to use is the first piece,
1268d3fecca9Ssthen * offload the later half until it fits */
1269d3fecca9Ssthen do {
1270d3fecca9Ssthen sz >>= 1; /* divide size of big by two */
1271d3fecca9Ssthen e--; /* that means its exp is one smaller */
1272d3fecca9Ssthen udb_alloc_push_fl(base, alloc, big+sz, e);
1273d3fecca9Ssthen } while(e != exp);
1274d3fecca9Ssthen /* exit loop when last pushed is same size as what we want */
1275d3fecca9Ssthen return big;
1276d3fecca9Ssthen }
1277d3fecca9Ssthen
1278d3fecca9Ssthen /** returns the exponent size of the chunk needed for data sz */
1279d3fecca9Ssthen static int
1280d3fecca9Ssthen udb_alloc_exp_needed(size_t sz)
1281d3fecca9Ssthen {
1282d3fecca9Ssthen uint64_t asz = sz + sizeof(udb_chunk_d) + 1;
1283*4564029fSflorian int exp;
1284d3fecca9Ssthen if(asz > UDB_ALLOC_CHUNK_SIZE) {
1285d3fecca9Ssthen return UDB_EXP_XL;
1286d3fecca9Ssthen } else if(asz <= UDB_ALLOC_CHUNK_MINSIZE) {
1287d3fecca9Ssthen return UDB_ALLOC_CHUNK_MINEXP;
1288d3fecca9Ssthen }
1289*4564029fSflorian exp = udb_exp_size(asz);
1290*4564029fSflorian assert(exp <= UDB_ALLOC_CHUNKS_MAX);
1291*4564029fSflorian return exp;
1292d3fecca9Ssthen }
1293d3fecca9Ssthen
1294d3fecca9Ssthen udb_void udb_alloc_space(udb_alloc* alloc, size_t sz)
1295d3fecca9Ssthen {
1296d3fecca9Ssthen void* base = alloc->udb->base;
1297d3fecca9Ssthen /* calculate actual allocation size */
1298d3fecca9Ssthen int e2, exp = udb_alloc_exp_needed(sz);
1299d3fecca9Ssthen if(exp == UDB_EXP_XL)
1300d3fecca9Ssthen return udb_alloc_xl_space(base, alloc, sz);
1301d3fecca9Ssthen /* see if there is a free chunk of that size exactly */
1302d3fecca9Ssthen if(alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP]) {
1303d3fecca9Ssthen /* snip from freelist, udb_chunk_d */
1304d3fecca9Ssthen udb_void ret;
1305d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1306d3fecca9Ssthen ret = udb_alloc_pop_fl(base, alloc, exp);
1307d3fecca9Ssthen /* use it - size octets already OK */
1308d3fecca9Ssthen UDB_CHUNK(ret)->flags = 0;
1309d3fecca9Ssthen UDB_CHUNK(ret)->ptrlist = 0;
1310d3fecca9Ssthen UDB_CHUNK(ret)->type = udb_chunk_type_data;
1311d3fecca9Ssthen /* update stats */
1312d3fecca9Ssthen alloc->disk->stat_data += sz;
1313d3fecca9Ssthen alloc->disk->stat_alloc += (1<<exp);
1314d3fecca9Ssthen assert(alloc->disk->stat_free >= (1u<<exp));
1315d3fecca9Ssthen alloc->disk->stat_free -= (1<<exp);
1316d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1317d3fecca9Ssthen return ret + sizeof(udb_chunk_d); /* ptr to data */
1318d3fecca9Ssthen }
1319d3fecca9Ssthen /* see if we can subdivide a larger chunk */
1320595fd91cSbrad for(e2 = exp+1; e2 <= UDB_ALLOC_CHUNKS_MAX; e2++)
1321d3fecca9Ssthen if(alloc->disk->free[e2-UDB_ALLOC_CHUNK_MINEXP]) {
1322d3fecca9Ssthen udb_void big, ret; /* udb_chunk_d */
1323d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1324d3fecca9Ssthen big = udb_alloc_pop_fl(base, alloc, e2);
1325d3fecca9Ssthen /* push other parts onto freelists (needs inited) */
1326d3fecca9Ssthen ret = udb_alloc_subdivide(base, alloc, big, e2, exp);
1327d3fecca9Ssthen /* use final part (needs inited) */
1328d3fecca9Ssthen UDB_CHUNK(ret)->exp = (uint8_t)exp;
1329d3fecca9Ssthen /* if stop here; the new exp makes smaller free chunk*/
1330d3fecca9Ssthen UDB_CHUNK(ret)->flags = 0;
1331d3fecca9Ssthen UDB_CHUNK(ret)->ptrlist = 0;
1332d3fecca9Ssthen /* set type to commit data chunk */
1333d3fecca9Ssthen UDB_CHUNK(ret)->type = udb_chunk_type_data;
1334d3fecca9Ssthen /* store last octet */
1335d3fecca9Ssthen chunk_set_last(base, ret, exp, (uint8_t)exp);
1336d3fecca9Ssthen /* update stats */
1337d3fecca9Ssthen alloc->disk->stat_data += sz;
1338d3fecca9Ssthen alloc->disk->stat_alloc += (1<<exp);
1339d3fecca9Ssthen assert(alloc->disk->stat_free >= (1u<<exp));
1340d3fecca9Ssthen alloc->disk->stat_free -= (1<<exp);
1341d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1342d3fecca9Ssthen return ret + sizeof(udb_chunk_d); /* ptr to data */
1343d3fecca9Ssthen }
1344d3fecca9Ssthen /* we need to grow an extra chunk */
1345d3fecca9Ssthen return udb_alloc_grow_space(base, alloc, sz, exp);
1346d3fecca9Ssthen }
1347d3fecca9Ssthen
1348d3fecca9Ssthen /** see if there is free space to allocate a chunk into */
1349d3fecca9Ssthen static int
1350d3fecca9Ssthen have_free_for(udb_alloc* alloc, int exp)
1351d3fecca9Ssthen {
1352d3fecca9Ssthen int e2;
1353d3fecca9Ssthen if(alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP])
1354d3fecca9Ssthen return exp;
1355595fd91cSbrad for(e2 = exp+1; e2 <= UDB_ALLOC_CHUNKS_MAX; e2++)
1356d3fecca9Ssthen if(alloc->disk->free[e2-UDB_ALLOC_CHUNK_MINEXP]) {
1357d3fecca9Ssthen return e2;
1358d3fecca9Ssthen }
1359d3fecca9Ssthen return 0;
1360d3fecca9Ssthen }
1361d3fecca9Ssthen
1362d3fecca9Ssthen /** fix relptr prev and next for moved relptr structures */
1363d3fecca9Ssthen static void
1364d3fecca9Ssthen chunk_fix_ptr_each(void* base, udb_rel_ptr* rp, void* arg)
1365d3fecca9Ssthen {
1366d3fecca9Ssthen udb_void* data = (udb_void*)arg;
1367d3fecca9Ssthen udb_void r;
1368d3fecca9Ssthen if(!rp->data)
1369d3fecca9Ssthen return;
1370d3fecca9Ssthen r = UDB_SYSTOREL(base, rp);
1371d3fecca9Ssthen if(rp->next)
1372d3fecca9Ssthen UDB_REL_PTR(rp->next)->prev = r;
1373d3fecca9Ssthen if(rp->prev)
1374d3fecca9Ssthen UDB_REL_PTR(rp->prev)->next = r;
1375d3fecca9Ssthen else {
1376d3fecca9Ssthen /* if this is a pointer to its own chunk, fix it up;
1377d3fecca9Ssthen * the data ptr gets set by relptr_edit later. */
1378d3fecca9Ssthen if(rp->data == data[0])
1379d3fecca9Ssthen UDB_CHUNK(data[1])->ptrlist = r;
1380d3fecca9Ssthen else UDB_CHUNK(chunk_from_dataptr(rp->data))->ptrlist = r;
1381d3fecca9Ssthen }
1382d3fecca9Ssthen }
1383d3fecca9Ssthen
1384d3fecca9Ssthen /** fix pointers from and to a moved chunk */
1385d3fecca9Ssthen static void
1386d3fecca9Ssthen chunk_fix_ptrs(void* base, udb_base* udb, udb_chunk_d* cp, udb_void data,
1387d3fecca9Ssthen uint64_t dsz, udb_void olddata)
1388d3fecca9Ssthen {
1389d3fecca9Ssthen udb_void d[2];
1390d3fecca9Ssthen d[0] = olddata;
1391d3fecca9Ssthen d[1] = data;
1392d3fecca9Ssthen (*udb->walkfunc)(base, udb->walkarg, cp->type, UDB_REL(base, data),
1393d3fecca9Ssthen dsz, &chunk_fix_ptr_each, d);
1394d3fecca9Ssthen udb_rel_ptr_edit(base, cp->ptrlist, data);
1395d3fecca9Ssthen udb_base_ram_ptr_edit(udb, olddata, data);
1396d3fecca9Ssthen }
1397d3fecca9Ssthen
1398d3fecca9Ssthen /** move an allocated chunk to use a free chunk */
1399d3fecca9Ssthen static void
1400d3fecca9Ssthen move_chunk(void* base, udb_alloc* alloc, udb_void f, int exp, uint64_t esz,
1401d3fecca9Ssthen int e2)
1402d3fecca9Ssthen {
1403d3fecca9Ssthen udb_void res = udb_alloc_pop_fl(base, alloc, e2);
1404d3fecca9Ssthen udb_chunk_d* rp;
1405d3fecca9Ssthen udb_chunk_d* fp;
1406d3fecca9Ssthen if(exp != e2) {
1407d3fecca9Ssthen /* it is bigger, subdivide it */
1408d3fecca9Ssthen res = udb_alloc_subdivide(base, alloc, res, e2, exp);
1409d3fecca9Ssthen }
1410d3fecca9Ssthen assert(res != f);
1411d3fecca9Ssthen /* setup rollback information */
1412d3fecca9Ssthen alloc->udb->glob_data->rb_old = f;
1413d3fecca9Ssthen alloc->udb->glob_data->rb_new = res;
1414d3fecca9Ssthen alloc->udb->glob_data->rb_size = esz;
1415d3fecca9Ssthen /* take the res, exp into use */
1416d3fecca9Ssthen rp = UDB_CHUNK(res);
1417d3fecca9Ssthen fp = UDB_CHUNK(f);
1418d3fecca9Ssthen /* copy over the data */
1419d3fecca9Ssthen memcpy(rp, fp, esz);
1420d3fecca9Ssthen /* adjust rel ptrs */
1421d3fecca9Ssthen chunk_fix_ptrs(base, alloc->udb, rp, res+sizeof(udb_chunk_d),
1422d3fecca9Ssthen esz-sizeof(udb_chunk_d)-1, f+sizeof(udb_chunk_d));
1423d3fecca9Ssthen
1424d3fecca9Ssthen /* do not freeup the fp; caller does that */
1425d3fecca9Ssthen }
1426d3fecca9Ssthen
1427d3fecca9Ssthen /** unlink several free elements to overwrite with xl chunk */
1428d3fecca9Ssthen static void
1429d3fecca9Ssthen free_xl_space(void* base, udb_alloc* alloc, udb_void s, uint64_t m)
1430d3fecca9Ssthen {
1431d3fecca9Ssthen udb_void q = s + m - UDB_ALLOC_CHUNK_SIZE;
1432d3fecca9Ssthen /* because of header and alignment we know s >= UDB_ALLOC_CHUNK_SIZE*/
1433d3fecca9Ssthen assert(s >= UDB_ALLOC_CHUNK_SIZE);
1434d3fecca9Ssthen while(q >= s) {
1435d3fecca9Ssthen assert(UDB_CHUNK(q)->exp == UDB_ALLOC_CHUNKS_MAX);
1436d3fecca9Ssthen assert(UDB_CHUNK(q)->type == udb_chunk_type_free);
1437d3fecca9Ssthen udb_alloc_unlink_fl(base, alloc, q, UDB_ALLOC_CHUNKS_MAX);
1438d3fecca9Ssthen q -= UDB_ALLOC_CHUNK_SIZE;
1439d3fecca9Ssthen }
1440d3fecca9Ssthen }
1441d3fecca9Ssthen
1442d3fecca9Ssthen /** move an XL chunk, and keep track of segments for rollback */
1443d3fecca9Ssthen static void
1444d3fecca9Ssthen move_xl_segment(void* base, udb_base* udb, udb_void xl, udb_void n,
1445d3fecca9Ssthen uint64_t sz, uint64_t startseg)
1446d3fecca9Ssthen {
1447d3fecca9Ssthen udb_xl_chunk_d* xlp = UDB_XL_CHUNK(xl);
1448d3fecca9Ssthen udb_xl_chunk_d* np = UDB_XL_CHUNK(n);
1449d3fecca9Ssthen uint64_t amount = xl - n;
1450d3fecca9Ssthen assert(n < xl); /* move to compact */
1451d3fecca9Ssthen
1452d3fecca9Ssthen /* setup move rollback */
1453d3fecca9Ssthen udb->glob_data->rb_old = xl;
1454d3fecca9Ssthen udb->glob_data->rb_new = n;
1455d3fecca9Ssthen udb->glob_data->rb_size = sz;
1456d3fecca9Ssthen
1457d3fecca9Ssthen /* is it overlapping? */
1458d3fecca9Ssthen if(sz <= amount) {
1459d3fecca9Ssthen memcpy(np, xlp, sz);
1460d3fecca9Ssthen } else {
1461d3fecca9Ssthen /* move and commit per 1M segment to avoid data loss */
1462d3fecca9Ssthen uint64_t seg, maxseg = amount/UDB_ALLOC_CHUNK_SIZE;
1463d3fecca9Ssthen for(seg = startseg; seg<maxseg; seg++) {
1464d3fecca9Ssthen udb->glob_data->rb_seg = seg;
1465d3fecca9Ssthen memcpy(np+seg*UDB_ALLOC_CHUNK_SIZE,
1466d3fecca9Ssthen xlp+seg*UDB_ALLOC_CHUNK_SIZE,
1467d3fecca9Ssthen UDB_ALLOC_CHUNK_SIZE);
1468d3fecca9Ssthen }
1469d3fecca9Ssthen
1470d3fecca9Ssthen }
1471d3fecca9Ssthen }
1472d3fecca9Ssthen
1473d3fecca9Ssthen /** move list of XL chunks to the front by the shift amount */
1474d3fecca9Ssthen static void
1475d3fecca9Ssthen move_xl_list(void* base, udb_alloc* alloc, udb_void xl_start, uint64_t xl_sz,
1476d3fecca9Ssthen uint64_t amount)
1477d3fecca9Ssthen {
1478d3fecca9Ssthen udb_void xl = xl_start;
1479d3fecca9Ssthen assert( (xl_start&(UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* aligned */
1480d3fecca9Ssthen assert( (amount&(UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* multiples */
1481d3fecca9Ssthen assert( (xl_sz&(UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* multiples */
1482d3fecca9Ssthen while(xl < xl_start+xl_sz) {
1483d3fecca9Ssthen udb_xl_chunk_d* xlp = UDB_XL_CHUNK(xl);
1484d3fecca9Ssthen udb_void n = xl-amount;
1485d3fecca9Ssthen uint64_t sz = xlp->size;
1486d3fecca9Ssthen assert(xlp->exp == UDB_EXP_XL);
1487d3fecca9Ssthen move_xl_segment(base, alloc->udb, xl, n, sz, 0);
1488d3fecca9Ssthen chunk_fix_ptrs(base, alloc->udb, UDB_CHUNK(n),
1489d3fecca9Ssthen n+sizeof(udb_xl_chunk_d),
1490d3fecca9Ssthen sz-sizeof(udb_xl_chunk_d)-sizeof(uint64_t)*2,
1491d3fecca9Ssthen xl+sizeof(udb_xl_chunk_d));
1492d3fecca9Ssthen }
1493d3fecca9Ssthen alloc->disk->stat_free -= amount;
1494d3fecca9Ssthen alloc->disk->nextgrow -= amount;
1495d3fecca9Ssthen alloc->udb->glob_data->rb_old = 0;
1496d3fecca9Ssthen alloc->udb->glob_data->rb_new = 0;
1497d3fecca9Ssthen alloc->udb->glob_data->rb_size = 0;
1498d3fecca9Ssthen }
1499d3fecca9Ssthen
1500d3fecca9Ssthen /** see if free chunk can coagulate with another chunk, return other chunk */
1501d3fecca9Ssthen static udb_void
1502d3fecca9Ssthen coagulate_possible(void* base, udb_alloc* alloc, udb_void f, int exp,
1503d3fecca9Ssthen uint64_t esz)
1504d3fecca9Ssthen {
1505d3fecca9Ssthen udb_void other = f^esz;
1506d3fecca9Ssthen if(exp == UDB_ALLOC_CHUNKS_MAX)
1507d3fecca9Ssthen return 0; /* no further merges */
1508d3fecca9Ssthen if(other >= alloc->udb->base_size)
1509d3fecca9Ssthen return 0; /* not allocated */
1510d3fecca9Ssthen if(other >= alloc->disk->nextgrow)
1511d3fecca9Ssthen return 0; /* not in use */
1512d3fecca9Ssthen if(other < alloc->udb->glob_data->hsize)
1513d3fecca9Ssthen return 0; /* cannot merge with header */
1514d3fecca9Ssthen /* the header is also protected by the special exp marker */
1515d3fecca9Ssthen /* see if the other chunk is a free chunk */
1516d3fecca9Ssthen
1517d3fecca9Ssthen /* check closest marker to avoid large memory churn */
1518d3fecca9Ssthen /* and also it makes XL allocations and header special markers work */
1519d3fecca9Ssthen if(f > other) {
1520d3fecca9Ssthen assert(f > 1); /* this is certain because of header */
1521d3fecca9Ssthen if(*((uint8_t*)UDB_REL(base, f-1)) == (uint8_t)exp) {
1522d3fecca9Ssthen /* can do it if the other part is a free chunk */
1523d3fecca9Ssthen assert(UDB_FREE_CHUNK(other)->exp == (uint8_t)exp);
1524d3fecca9Ssthen if(UDB_CHUNK(other)->type == udb_chunk_type_free)
1525d3fecca9Ssthen return other;
1526d3fecca9Ssthen }
1527d3fecca9Ssthen } else {
1528d3fecca9Ssthen if(UDB_CHUNK(other)->exp == (uint8_t)exp) {
1529d3fecca9Ssthen /* can do it if the other part is a free chunk */
1530d3fecca9Ssthen assert(chunk_get_last(base, other, exp)==(uint8_t)exp);
1531d3fecca9Ssthen if(UDB_CHUNK(other)->type == udb_chunk_type_free)
1532d3fecca9Ssthen return other;
1533d3fecca9Ssthen }
1534d3fecca9Ssthen }
1535d3fecca9Ssthen return 0;
1536d3fecca9Ssthen }
1537d3fecca9Ssthen
1538d3fecca9Ssthen /** coagulate and then add new free segment, return final free segment */
1539d3fecca9Ssthen static udb_void
1540d3fecca9Ssthen coagulate_and_push(void* base, udb_alloc* alloc, udb_void last, int exp,
1541d3fecca9Ssthen uint64_t esz)
1542d3fecca9Ssthen {
1543d3fecca9Ssthen /* new free chunk here, attempt coagulate */
1544d3fecca9Ssthen udb_void other;
1545d3fecca9Ssthen while( (other=coagulate_possible(base, alloc, last, exp, esz)) ) {
1546d3fecca9Ssthen /* unlink that other chunk */
1547d3fecca9Ssthen udb_alloc_unlink_fl(base, alloc, other, exp);
1548d3fecca9Ssthen /* merge up */
1549d3fecca9Ssthen if(other < last)
1550d3fecca9Ssthen last = other;
1551d3fecca9Ssthen exp++;
1552d3fecca9Ssthen esz <<= 1;
1553d3fecca9Ssthen }
1554d3fecca9Ssthen /* free the final segment */
1555d3fecca9Ssthen udb_alloc_push_fl(base, alloc, last, exp);
1556d3fecca9Ssthen return last;
1557d3fecca9Ssthen }
1558d3fecca9Ssthen
1559d3fecca9Ssthen /** attempt to compact the data and move free space to the end */
156015ed76cbSbrad int
1561d3fecca9Ssthen udb_alloc_compact(void* base, udb_alloc* alloc)
1562d3fecca9Ssthen {
1563d3fecca9Ssthen udb_void last;
1564d3fecca9Ssthen int exp, e2;
1565d3fecca9Ssthen uint64_t esz;
1566d3fecca9Ssthen uint64_t at = alloc->disk->nextgrow;
1567d3fecca9Ssthen udb_void xl_start = 0;
1568d3fecca9Ssthen uint64_t xl_sz = 0;
156915ed76cbSbrad if(alloc->udb->inhibit_compact)
157015ed76cbSbrad return 1;
157115ed76cbSbrad alloc->udb->useful_compact = 0;
1572d3fecca9Ssthen while(at > alloc->udb->glob_data->hsize) {
1573d3fecca9Ssthen /* grab last entry */
1574d3fecca9Ssthen exp = (int)*((uint8_t*)UDB_REL(base, at-1));
1575d3fecca9Ssthen if(exp == UDB_EXP_XL) {
1576d3fecca9Ssthen /* for XL chunks:
1577d3fecca9Ssthen * - inspect the size of the XLchunklist at end
1578d3fecca9Ssthen * - attempt to compact in front of of XLchunklist
1579d3fecca9Ssthen */
1580d3fecca9Ssthen uint64_t xlsz = *((uint64_t*)UDB_REL(base,
1581d3fecca9Ssthen at-sizeof(uint64_t)*2));
1582d3fecca9Ssthen udb_void xl = at-xlsz;
1583d3fecca9Ssthen #ifndef NDEBUG
1584d3fecca9Ssthen udb_xl_chunk_d* xlp = UDB_XL_CHUNK(xl);
1585d3fecca9Ssthen assert(xlp->exp == UDB_EXP_XL);
1586d3fecca9Ssthen assert(xlp->type != udb_chunk_type_free);
1587d3fecca9Ssthen #endif
1588d3fecca9Ssthen /* got thesegment add to the xl chunk list */
1589d3fecca9Ssthen if(xl_start != 0 && xl+xlsz != xl_start) {
1590d3fecca9Ssthen /* nonadjoining XL part, but they are aligned,
1591d3fecca9Ssthen * so the space in between is whole Mbs,
1592d3fecca9Ssthen * shift the later part(s) and continue */
1593d3fecca9Ssthen uint64_t m = xl_start - (xl+xlsz);
1594d3fecca9Ssthen assert(xl_start > xl+xlsz);
1595d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_compact;
1596d3fecca9Ssthen free_xl_space(base, alloc, xl+xlsz, m);
1597d3fecca9Ssthen move_xl_list(base, alloc, xl_start, xl_sz, m);
1598d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1599d3fecca9Ssthen }
1600d3fecca9Ssthen xl_start = xl;
1601d3fecca9Ssthen xl_sz += xlsz;
1602d3fecca9Ssthen at = xl;
1603d3fecca9Ssthen continue;
1604d3fecca9Ssthen /* end of XL if */
1605d3fecca9Ssthen } else if(exp < UDB_ALLOC_CHUNK_MINEXP
1606d3fecca9Ssthen || exp > UDB_ALLOC_CHUNKS_MAX)
1607d3fecca9Ssthen break; /* special chunk or garbage */
1608d3fecca9Ssthen esz = (uint64_t)1<<exp;
1609d3fecca9Ssthen last = at - esz;
1610d3fecca9Ssthen assert(UDB_CHUNK(last)->exp == (uint8_t)exp);
1611d3fecca9Ssthen if(UDB_CHUNK(last)->type == udb_chunk_type_free) {
1612d3fecca9Ssthen /* if xlstart continue looking to move stuff, but do
1613d3fecca9Ssthen * not unlink this free segment */
1614d3fecca9Ssthen if(!xl_start) {
1615d3fecca9Ssthen /* it is a free chunk, remove it */
1616d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1617d3fecca9Ssthen udb_alloc_unlink_fl(base, alloc, last, exp);
1618d3fecca9Ssthen alloc->disk->stat_free -= esz;
1619d3fecca9Ssthen alloc->disk->nextgrow = last;
1620d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1621d3fecca9Ssthen /* and continue at this point */
1622d3fecca9Ssthen }
1623d3fecca9Ssthen at = last;
1624d3fecca9Ssthen } else if( (e2=have_free_for(alloc, exp)) ) {
1625d3fecca9Ssthen /* last entry can be allocated in free chunks
1626d3fecca9Ssthen * move it to its new position, adjust rel_ptrs */
1627d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_compact;
1628d3fecca9Ssthen move_chunk(base, alloc, last, exp, esz, e2);
1629d3fecca9Ssthen if(xl_start) {
1630d3fecca9Ssthen last = coagulate_and_push(base, alloc,
1631d3fecca9Ssthen last, exp, esz);
1632d3fecca9Ssthen } else {
1633d3fecca9Ssthen /* shorten usage */
1634d3fecca9Ssthen alloc->disk->stat_free -= esz;
1635d3fecca9Ssthen alloc->disk->nextgrow = last;
1636d3fecca9Ssthen }
1637d3fecca9Ssthen alloc->udb->glob_data->rb_old = 0;
1638d3fecca9Ssthen alloc->udb->glob_data->rb_new = 0;
1639d3fecca9Ssthen alloc->udb->glob_data->rb_size = 0;
1640d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1641d3fecca9Ssthen /* and continue in front of it */
1642d3fecca9Ssthen at = last;
1643d3fecca9Ssthen } else {
1644d3fecca9Ssthen /* cannot compact this block, stop compacting */
1645d3fecca9Ssthen break;
1646d3fecca9Ssthen }
1647d3fecca9Ssthen /* if that worked, repeat it */
1648d3fecca9Ssthen }
1649d3fecca9Ssthen /* if we passed xl chunks, see if XL-chunklist can move */
1650d3fecca9Ssthen if(xl_start) {
1651d3fecca9Ssthen /* calculate free space in front of the XLchunklist. */
1652d3fecca9Ssthen /* has to be whole mbs of free space */
1653d3fecca9Ssthen /* if so, we can move the XL chunks. Move them all back
1654d3fecca9Ssthen * by the new free space. */
1655d3fecca9Ssthen /* this compacts very well, but the XL chunks can be moved
1656d3fecca9Ssthen * multiple times; worst case for every mb freed a huge sized
1657d3fecca9Ssthen * xlchunklist gets moved. */
1658d3fecca9Ssthen /* free space must be, since aligned and coagulated, in
1659d3fecca9Ssthen * chunks of a whole MB */
1660d3fecca9Ssthen udb_void at = xl_start;
1661d3fecca9Ssthen uint64_t m = 0;
1662d3fecca9Ssthen while(*((uint8_t*)UDB_REL(base, at-1))==UDB_ALLOC_CHUNKS_MAX){
1663d3fecca9Ssthen udb_void chunk = at - UDB_ALLOC_CHUNK_SIZE;
1664d3fecca9Ssthen if(UDB_CHUNK(chunk)->type != udb_chunk_type_free)
1665d3fecca9Ssthen break;
1666d3fecca9Ssthen assert(UDB_CHUNK(chunk)->exp==UDB_ALLOC_CHUNKS_MAX);
1667d3fecca9Ssthen m += UDB_ALLOC_CHUNK_SIZE;
1668d3fecca9Ssthen at = chunk;
1669d3fecca9Ssthen }
1670d3fecca9Ssthen if(m != 0) {
1671d3fecca9Ssthen assert(at+m == xl_start);
1672d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_compact;
1673d3fecca9Ssthen free_xl_space(base, alloc, at, m);
1674d3fecca9Ssthen move_xl_list(base, alloc, xl_start, xl_sz, m);
1675d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1676d3fecca9Ssthen }
1677d3fecca9Ssthen }
1678d3fecca9Ssthen
1679d3fecca9Ssthen /* if enough free, shrink the file; re-mmap */
1680d3fecca9Ssthen if(enough_free(alloc)) {
1681d3fecca9Ssthen uint64_t nsize = alloc->disk->nextgrow;
1682d3fecca9Ssthen udb_base_shrink(alloc->udb, nsize);
1683d3fecca9Ssthen if(!udb_base_remap(alloc->udb, alloc, nsize))
1684d3fecca9Ssthen return 0;
1685d3fecca9Ssthen }
1686d3fecca9Ssthen return 1;
1687d3fecca9Ssthen }
1688d3fecca9Ssthen
168915ed76cbSbrad int
169015ed76cbSbrad udb_compact(udb_base* udb)
169115ed76cbSbrad {
169215ed76cbSbrad if(!udb) return 1;
169315ed76cbSbrad if(!udb->useful_compact) return 1;
169415ed76cbSbrad DEBUG(DEBUG_DBACCESS, 1, (LOG_INFO, "Compacting database..."));
169515ed76cbSbrad return udb_alloc_compact(udb->base, udb->alloc);
169615ed76cbSbrad }
169715ed76cbSbrad
169815ed76cbSbrad void udb_compact_inhibited(udb_base* udb, int inhibit)
169915ed76cbSbrad {
170015ed76cbSbrad if(!udb) return;
170115ed76cbSbrad udb->inhibit_compact = inhibit;
170215ed76cbSbrad }
170315ed76cbSbrad
1704d3fecca9Ssthen #ifdef UDB_CHECK
1705d3fecca9Ssthen /** check that rptrs are really zero before free */
1706d3fecca9Ssthen void udb_check_rptr_zero(void* base, udb_rel_ptr* p, void* arg)
1707d3fecca9Ssthen {
1708d3fecca9Ssthen (void)base;
1709d3fecca9Ssthen (void)arg;
1710d3fecca9Ssthen assert(p->data == 0);
1711d3fecca9Ssthen }
1712d3fecca9Ssthen #endif /* UDB_CHECK */
1713d3fecca9Ssthen
1714d3fecca9Ssthen /** free XL chunk as multiples of CHUNK_SIZE free segments */
1715d3fecca9Ssthen static void
1716d3fecca9Ssthen udb_free_xl(void* base, udb_alloc* alloc, udb_void f, udb_xl_chunk_d* fp,
1717d3fecca9Ssthen size_t sz)
1718d3fecca9Ssthen {
1719d3fecca9Ssthen uint64_t xlsz = fp->size;
1720d3fecca9Ssthen uint64_t c;
1721d3fecca9Ssthen /* lightweight check for buffer overflow in xl data */
1722d3fecca9Ssthen assert(*((uint64_t*)(UDB_REL(base, f+xlsz-sizeof(uint64_t)*2)))==xlsz);
1723d3fecca9Ssthen assert(*((uint8_t*)(UDB_REL(base, f+xlsz-1))) == UDB_EXP_XL);
1724d3fecca9Ssthen assert( (xlsz & (UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* whole mbs */
1725d3fecca9Ssthen assert( (f & (UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* aligned */
1726d3fecca9Ssthen #ifdef UDB_CHECK
1727d3fecca9Ssthen /* check that relptrs in this chunk have been zeroed */
1728d3fecca9Ssthen (*alloc->udb->walkfunc)(base, alloc->udb->walkarg, fp->type,
1729d3fecca9Ssthen UDB_REL(base, f+sizeof(udb_xl_chunk_d)), xlsz,
1730d3fecca9Ssthen &udb_check_rptr_zero, NULL);
1731d3fecca9Ssthen #endif
1732d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1733d3fecca9Ssthen /* update stats */
1734d3fecca9Ssthen alloc->disk->stat_data -= sz;
1735d3fecca9Ssthen alloc->disk->stat_alloc -= xlsz;
1736d3fecca9Ssthen alloc->disk->stat_free += xlsz;
1737d3fecca9Ssthen /* walk in reverse, so the front blocks go first on the list */
1738d3fecca9Ssthen c = f + xlsz - UDB_ALLOC_CHUNK_SIZE;
1739d3fecca9Ssthen /* because of header and alignment we know f >= UDB_ALLOC_CHUNK_SIZE*/
1740d3fecca9Ssthen assert(f >= UDB_ALLOC_CHUNK_SIZE);
1741d3fecca9Ssthen while(c >= f) {
1742d3fecca9Ssthen /* free a block of CHUNK_SIZE (1 Mb) */
1743d3fecca9Ssthen udb_alloc_push_fl(base, alloc, c, UDB_ALLOC_CHUNKS_MAX);
1744d3fecca9Ssthen c -= UDB_ALLOC_CHUNK_SIZE;
1745d3fecca9Ssthen }
1746d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1747d3fecca9Ssthen }
1748d3fecca9Ssthen
1749d3fecca9Ssthen int udb_alloc_free(udb_alloc* alloc, udb_void r, size_t sz)
1750d3fecca9Ssthen {
1751d3fecca9Ssthen void* base;
1752d3fecca9Ssthen /* lookup chunk ptr */
1753d3fecca9Ssthen udb_void f;
1754d3fecca9Ssthen udb_chunk_d* fp;
1755d3fecca9Ssthen uint64_t esz;
1756d3fecca9Ssthen int exp;
1757d3fecca9Ssthen udb_void other;
1758d3fecca9Ssthen int coagulated = 0;
1759d3fecca9Ssthen if(!r)
1760d3fecca9Ssthen return 1; /* free(NULL) does nothing */
1761d3fecca9Ssthen
1762d3fecca9Ssthen /* lookup size of chunk */
1763d3fecca9Ssthen base = alloc->udb->base;
1764d3fecca9Ssthen /* fails for XL blocks */
1765d3fecca9Ssthen f = chunk_from_dataptr(r);
1766d3fecca9Ssthen fp = UDB_CHUNK(f);
1767d3fecca9Ssthen assert(fp->type != udb_chunk_type_free);
1768d3fecca9Ssthen
1769d3fecca9Ssthen /* see if it has a ptrlist, if so: trouble, the list is not properly
1770d3fecca9Ssthen * cleaned up. (although you can imagine a wholesale delete where
1771d3fecca9Ssthen * it does not matter) */
1772d3fecca9Ssthen assert(fp->ptrlist == 0);
1773d3fecca9Ssthen
1774d3fecca9Ssthen /* set ptrlist to 0 to stop relptr from using it, robustness. */
1775d3fecca9Ssthen fp->ptrlist = 0;
1776d3fecca9Ssthen
1777d3fecca9Ssthen if(fp->exp == UDB_EXP_XL) {
1778d3fecca9Ssthen udb_free_xl(base, alloc, f, (udb_xl_chunk_d*)fp, sz);
1779d3fecca9Ssthen /* compact */
178015ed76cbSbrad if(alloc->udb->inhibit_compact) {
178115ed76cbSbrad alloc->udb->useful_compact = 1;
178215ed76cbSbrad return 1;
178315ed76cbSbrad }
1784d3fecca9Ssthen return udb_alloc_compact(base, alloc);
1785d3fecca9Ssthen }
1786d3fecca9Ssthen /* it is a regular chunk of 2**exp size */
1787d3fecca9Ssthen exp = (int)fp->exp;
1788d3fecca9Ssthen esz = (uint64_t)1<<exp;
1789d3fecca9Ssthen /* light check for e.g. buffer overflow of the data */
1790d3fecca9Ssthen assert(sz < esz);
1791d3fecca9Ssthen assert(chunk_get_last(base, f, exp) == (uint8_t)exp);
1792d3fecca9Ssthen #ifdef UDB_CHECK
1793d3fecca9Ssthen /* check that relptrs in this chunk have been zeroed */
1794d3fecca9Ssthen (*alloc->udb->walkfunc)(base, alloc->udb->walkarg, fp->type,
1795d3fecca9Ssthen UDB_REL(base, r), esz, &udb_check_rptr_zero, NULL);
1796d3fecca9Ssthen #endif
1797d3fecca9Ssthen
1798d3fecca9Ssthen /* update the stats */
1799d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1800d3fecca9Ssthen alloc->disk->stat_data -= sz;
1801d3fecca9Ssthen alloc->disk->stat_free += esz;
1802d3fecca9Ssthen alloc->disk->stat_alloc -= esz;
1803d3fecca9Ssthen
1804d3fecca9Ssthen /* if it can be merged with other free chunks, do so */
1805d3fecca9Ssthen while( (other=coagulate_possible(base, alloc, f, exp, esz)) ) {
1806d3fecca9Ssthen coagulated = 1;
1807d3fecca9Ssthen /* unlink that other chunk and expand it (it has same size) */
1808d3fecca9Ssthen udb_alloc_unlink_fl(base, alloc, other, exp);
1809d3fecca9Ssthen /* merge up */
1810d3fecca9Ssthen if(other < f)
1811d3fecca9Ssthen f = other;
1812d3fecca9Ssthen exp++;
1813d3fecca9Ssthen esz <<= 1;
1814d3fecca9Ssthen }
1815d3fecca9Ssthen if(coagulated) {
1816d3fecca9Ssthen /* put big free chunk into freelist, and init it */
1817d3fecca9Ssthen udb_alloc_push_fl(base, alloc, f, exp);
1818d3fecca9Ssthen } else {
1819d3fecca9Ssthen /* we do not need to touch the last-exp-byte, which may save
1820d3fecca9Ssthen * a reference to that page of memory */
1821d3fecca9Ssthen fp->type = udb_chunk_type_free;
1822d3fecca9Ssthen fp->flags = 0;
1823d3fecca9Ssthen udb_alloc_push_fl_noinit(base, alloc, f, exp);
1824d3fecca9Ssthen }
1825d3fecca9Ssthen alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1826d3fecca9Ssthen /* compact */
182715ed76cbSbrad if(alloc->udb->inhibit_compact) {
182815ed76cbSbrad alloc->udb->useful_compact = 1;
182915ed76cbSbrad return 1;
183015ed76cbSbrad }
1831d3fecca9Ssthen return udb_alloc_compact(base, alloc);
1832d3fecca9Ssthen }
1833d3fecca9Ssthen
1834d3fecca9Ssthen udb_void udb_alloc_init(udb_alloc* alloc, void* d, size_t sz)
1835d3fecca9Ssthen {
1836d3fecca9Ssthen /* could be faster maybe, if grown? */
1837d3fecca9Ssthen udb_void r = udb_alloc_space(alloc, sz);
1838d3fecca9Ssthen if(!r) return r;
1839d3fecca9Ssthen memcpy(UDB_REL(alloc->udb->base, r), d, sz);
1840d3fecca9Ssthen return r;
1841d3fecca9Ssthen }
1842d3fecca9Ssthen
1843d3fecca9Ssthen udb_void udb_alloc_realloc(udb_alloc* alloc, udb_void r, size_t osz, size_t sz)
1844d3fecca9Ssthen {
1845d3fecca9Ssthen void* base = alloc->udb->base;
1846d3fecca9Ssthen udb_void c, n, newd;
1847d3fecca9Ssthen udb_chunk_d* cp, *np;
1848d3fecca9Ssthen uint64_t avail;
1849d3fecca9Ssthen uint8_t cp_type;
1850d3fecca9Ssthen /* emulate some posix realloc stuff */
1851d3fecca9Ssthen if(r == 0)
1852d3fecca9Ssthen return udb_alloc_space(alloc, sz);
1853d3fecca9Ssthen if(sz == 0) {
1854d3fecca9Ssthen if(!udb_alloc_free(alloc, r, osz))
1855d3fecca9Ssthen log_msg(LOG_ERR, "udb_alloc_realloc: free failed");
1856d3fecca9Ssthen return 0;
1857d3fecca9Ssthen }
1858d3fecca9Ssthen c = chunk_from_dataptr(r);
1859d3fecca9Ssthen cp = UDB_CHUNK(c);
1860d3fecca9Ssthen cp_type = cp->type;
1861d3fecca9Ssthen if(cp->exp == UDB_EXP_XL) {
1862d3fecca9Ssthen avail = UDB_XL_CHUNK(c)->size - sizeof(udb_xl_chunk_d)
1863d3fecca9Ssthen - sizeof(uint64_t)*2;
1864d3fecca9Ssthen } else {
1865d3fecca9Ssthen avail = ((uint64_t)1<<cp->exp) - sizeof(udb_chunk_d) - 1;
1866d3fecca9Ssthen }
1867d3fecca9Ssthen if(sz <= avail)
1868d3fecca9Ssthen return r;
1869d3fecca9Ssthen /* reallocate it, and copy */
1870d3fecca9Ssthen newd = udb_alloc_space(alloc, sz);
1871d3fecca9Ssthen if(!newd) return 0;
1872d3fecca9Ssthen /* re-base after alloc, since re-mmap may have happened */
1873d3fecca9Ssthen base = alloc->udb->base;
1874d3fecca9Ssthen cp = NULL; /* may be invalid now, robustness */
1875d3fecca9Ssthen n = chunk_from_dataptr(newd);
1876d3fecca9Ssthen np = UDB_CHUNK(n);
1877d3fecca9Ssthen np->type = cp_type;
1878d3fecca9Ssthen memcpy(UDB_REL(base, newd), UDB_REL(base, r), osz);
1879d3fecca9Ssthen /* fixup ptrs */
1880d3fecca9Ssthen chunk_fix_ptrs(base, alloc->udb, np, newd, osz, r);
1881d3fecca9Ssthen
1882d3fecca9Ssthen if(!udb_alloc_free(alloc, r, osz))
1883d3fecca9Ssthen log_msg(LOG_ERR, "udb_alloc_realloc: free failed");
1884d3fecca9Ssthen return newd;
1885d3fecca9Ssthen }
1886d3fecca9Ssthen
1887d3fecca9Ssthen int udb_alloc_grow(udb_alloc* alloc, size_t sz, size_t num)
1888d3fecca9Ssthen {
1889d3fecca9Ssthen const uint64_t mb = 1024*1024;
1890d3fecca9Ssthen int exp = udb_alloc_exp_needed(sz);
1891d3fecca9Ssthen uint64_t esz;
1892d3fecca9Ssthen uint64_t want;
1893d3fecca9Ssthen if(exp == UDB_EXP_XL)
1894d3fecca9Ssthen esz = (sz&(mb-1))+mb;
1895d3fecca9Ssthen else esz = (uint64_t)1<<exp;
1896d3fecca9Ssthen /* we need grow_end_calc to take into account alignment */
1897d3fecca9Ssthen want = grow_end_calc(alloc, exp) + esz*(num-1);
1898d3fecca9Ssthen assert(want >= alloc->udb->base_size);
1899d3fecca9Ssthen if(!udb_base_grow_and_remap(alloc->udb, want)) {
1900d3fecca9Ssthen log_msg(LOG_ERR, "failed to grow the specified amount");
1901d3fecca9Ssthen return 0;
1902d3fecca9Ssthen }
1903d3fecca9Ssthen return 1;
1904d3fecca9Ssthen }
1905d3fecca9Ssthen
1906d3fecca9Ssthen void udb_alloc_set_type(udb_alloc* alloc, udb_void r, udb_chunk_type tp)
1907d3fecca9Ssthen {
1908d3fecca9Ssthen void* base = alloc->udb->base;
1909d3fecca9Ssthen udb_void f = chunk_from_dataptr(r);
1910d3fecca9Ssthen udb_chunk_d* fp = UDB_CHUNK(f);
1911d3fecca9Ssthen /* not the 'free' type, that must be set by allocation routines */
1912d3fecca9Ssthen assert(fp->type != udb_chunk_type_free);
1913d3fecca9Ssthen assert(tp != udb_chunk_type_free);
1914d3fecca9Ssthen fp->type = tp;
1915d3fecca9Ssthen }
1916d3fecca9Ssthen
1917d3fecca9Ssthen int udb_valid_offset(udb_base* udb, udb_void to, size_t destsize)
1918d3fecca9Ssthen {
1919d3fecca9Ssthen /* pointers are not valid before the header-size or after the
1920d3fecca9Ssthen * used-region of the mmap */
1921d3fecca9Ssthen return ( (to+destsize) <= udb->base_size &&
1922d3fecca9Ssthen to >= (udb->glob_data->hsize-2*sizeof(udb_rel_ptr)) &&
1923d3fecca9Ssthen (to+destsize) <= udb->alloc->disk->nextgrow);
1924d3fecca9Ssthen }
1925d3fecca9Ssthen
1926d3fecca9Ssthen int udb_valid_dataptr(udb_base* udb, udb_void to)
1927d3fecca9Ssthen {
1928d3fecca9Ssthen void* base = udb->base;
1929d3fecca9Ssthen udb_void ch;
1930d3fecca9Ssthen int exp;
1931d3fecca9Ssthen uint64_t esz;
1932d3fecca9Ssthen /* our data chunks are aligned and at least 8 bytes */
1933d3fecca9Ssthen if(!udb_valid_offset(udb, to, sizeof(uint64_t)))
1934d3fecca9Ssthen return 0;
1935d3fecca9Ssthen /* get the chunk pointer */
1936d3fecca9Ssthen ch = chunk_from_dataptr(to);
1937d3fecca9Ssthen if(!udb_valid_offset(udb, ch, sizeof(udb_chunk_d)))
1938d3fecca9Ssthen return 0;
1939d3fecca9Ssthen /* check its size */
1940d3fecca9Ssthen exp = UDB_CHUNK(ch)->exp;
1941d3fecca9Ssthen if(exp == UDB_EXP_XL) {
1942d3fecca9Ssthen /* check XL chunk */
1943d3fecca9Ssthen uint64_t xlsz;
1944d3fecca9Ssthen if(!udb_valid_offset(udb, ch, sizeof(udb_xl_chunk_d)))
1945d3fecca9Ssthen return 0;
1946d3fecca9Ssthen xlsz = UDB_XL_CHUNK(ch)->size;
1947d3fecca9Ssthen if(!udb_valid_offset(udb, ch+xlsz-1, 1))
1948d3fecca9Ssthen return 0;
1949d3fecca9Ssthen if(*((uint8_t*)UDB_REL(base, ch+xlsz-1)) != UDB_EXP_XL)
1950d3fecca9Ssthen return 0;
1951d3fecca9Ssthen if(*((uint64_t*)UDB_REL(base, ch+xlsz-sizeof(uint64_t)*2))
1952d3fecca9Ssthen != xlsz)
1953d3fecca9Ssthen return 0;
1954d3fecca9Ssthen return 1;
1955d3fecca9Ssthen }
1956d3fecca9Ssthen /* check if regular chunk has matching end byte */
1957d3fecca9Ssthen if(exp < UDB_ALLOC_CHUNK_MINEXP || exp > UDB_ALLOC_CHUNKS_MAX)
1958d3fecca9Ssthen return 0; /* cannot be a valid chunk */
1959d3fecca9Ssthen esz = 1<<exp;
1960d3fecca9Ssthen if(!udb_valid_offset(udb, ch+esz-1, 1))
1961d3fecca9Ssthen return 0;
1962d3fecca9Ssthen if(*((uint8_t*)UDB_REL(base, ch+esz-1)) != exp)
1963d3fecca9Ssthen return 0;
1964d3fecca9Ssthen return 1;
1965d3fecca9Ssthen }
1966d3fecca9Ssthen
1967d3fecca9Ssthen int udb_valid_rptr(udb_base* udb, udb_void rptr, udb_void to)
1968d3fecca9Ssthen {
1969d3fecca9Ssthen void* base = udb->base;
1970d3fecca9Ssthen udb_void p;
1971d3fecca9Ssthen if(!udb_valid_offset(udb, rptr, sizeof(udb_rel_ptr)))
1972d3fecca9Ssthen return 0;
1973d3fecca9Ssthen if(!udb_valid_dataptr(udb, to))
1974d3fecca9Ssthen return 0;
1975d3fecca9Ssthen p = UDB_CHUNK(chunk_from_dataptr(to))->ptrlist;
1976d3fecca9Ssthen while(p) {
1977d3fecca9Ssthen if(!udb_valid_offset(udb, p, sizeof(udb_rel_ptr)))
1978d3fecca9Ssthen return 0;
1979d3fecca9Ssthen if(p == rptr)
1980d3fecca9Ssthen return 1;
1981d3fecca9Ssthen p = UDB_REL_PTR(p)->next;
1982d3fecca9Ssthen }
1983d3fecca9Ssthen return 0;
1984d3fecca9Ssthen }
1985d3fecca9Ssthen
1986d3fecca9Ssthen void udb_rel_ptr_init(udb_rel_ptr* ptr)
1987d3fecca9Ssthen {
1988d3fecca9Ssthen memset(ptr, 0, sizeof(*ptr));
1989d3fecca9Ssthen }
1990d3fecca9Ssthen
1991d3fecca9Ssthen void udb_rel_ptr_unlink(void* base, udb_rel_ptr* ptr)
1992d3fecca9Ssthen {
1993d3fecca9Ssthen if(!ptr->data)
1994d3fecca9Ssthen return;
1995d3fecca9Ssthen if(ptr->prev) {
1996d3fecca9Ssthen UDB_REL_PTR(ptr->prev)->next = ptr->next;
1997d3fecca9Ssthen } else {
1998d3fecca9Ssthen UDB_CHUNK(chunk_from_dataptr(ptr->data))->ptrlist = ptr->next;
1999d3fecca9Ssthen }
2000d3fecca9Ssthen if(ptr->next) {
2001d3fecca9Ssthen UDB_REL_PTR(ptr->next)->prev = ptr->prev;
2002d3fecca9Ssthen }
2003d3fecca9Ssthen }
2004d3fecca9Ssthen
2005d3fecca9Ssthen void udb_rel_ptr_link(void* base, udb_rel_ptr* ptr, udb_void to)
2006d3fecca9Ssthen {
2007d3fecca9Ssthen udb_chunk_d* chunk = UDB_CHUNK(chunk_from_dataptr(to));
2008d3fecca9Ssthen ptr->prev = 0;
2009d3fecca9Ssthen ptr->next = chunk->ptrlist;
2010d3fecca9Ssthen if(ptr->next)
2011d3fecca9Ssthen UDB_REL_PTR(ptr->next)->prev = UDB_SYSTOREL(base, ptr);
2012d3fecca9Ssthen chunk->ptrlist = UDB_SYSTOREL(base, ptr);
2013d3fecca9Ssthen ptr->data = to;
2014d3fecca9Ssthen }
2015d3fecca9Ssthen
2016d3fecca9Ssthen void udb_rel_ptr_set(void* base, udb_rel_ptr* ptr, udb_void to)
2017d3fecca9Ssthen {
2018d3fecca9Ssthen assert(to == 0 || to > 64);
2019d3fecca9Ssthen udb_rel_ptr_unlink(base, ptr);
2020d3fecca9Ssthen if(to)
2021d3fecca9Ssthen udb_rel_ptr_link(base, ptr, to);
2022d3fecca9Ssthen else ptr->data = to;
2023d3fecca9Ssthen }
2024d3fecca9Ssthen
2025d3fecca9Ssthen void udb_rel_ptr_edit(void* base, udb_void list, udb_void to)
2026d3fecca9Ssthen {
2027d3fecca9Ssthen udb_void p = list;
2028d3fecca9Ssthen while(p) {
2029d3fecca9Ssthen UDB_REL_PTR(p)->data = to;
2030d3fecca9Ssthen p = UDB_REL_PTR(p)->next;
2031d3fecca9Ssthen }
2032d3fecca9Ssthen }
2033d3fecca9Ssthen
2034d3fecca9Ssthen #ifdef UDB_CHECK
2035d3fecca9Ssthen /** check that all pointers are validly chained */
2036d3fecca9Ssthen static void
2037d3fecca9Ssthen udb_check_ptrs_valid(udb_base* udb)
2038d3fecca9Ssthen {
2039d3fecca9Ssthen size_t i;
2040d3fecca9Ssthen udb_ptr* p, *prev;
2041d3fecca9Ssthen for(i=0; i<udb->ram_size; i++) {
2042d3fecca9Ssthen prev = NULL;
2043d3fecca9Ssthen for(p=udb->ram_hash[i]; p; p=p->next) {
2044d3fecca9Ssthen assert(p->prev == prev);
2045d3fecca9Ssthen assert((size_t)(chunk_hash_ptr(p->data)&udb->ram_mask)
2046d3fecca9Ssthen == i);
2047d3fecca9Ssthen assert(p->base == &udb->base);
2048d3fecca9Ssthen prev = p;
2049d3fecca9Ssthen }
2050d3fecca9Ssthen }
2051d3fecca9Ssthen }
2052d3fecca9Ssthen #endif /* UDB_CHECK */
2053d3fecca9Ssthen
2054d3fecca9Ssthen void udb_ptr_init(udb_ptr* ptr, udb_base* udb)
2055d3fecca9Ssthen {
2056d3fecca9Ssthen #ifdef UDB_CHECK
2057d3fecca9Ssthen udb_check_ptrs_valid(udb); /* previous ptrs have been unlinked */
2058d3fecca9Ssthen #endif
2059d3fecca9Ssthen memset(ptr, 0, sizeof(*ptr));
2060d3fecca9Ssthen ptr->base = &udb->base;
2061d3fecca9Ssthen }
2062d3fecca9Ssthen
2063d3fecca9Ssthen void udb_ptr_set(udb_ptr* ptr, udb_base* udb, udb_void newval)
2064d3fecca9Ssthen {
2065d3fecca9Ssthen assert(newval == 0 || newval > 64);
2066d3fecca9Ssthen if(ptr->data)
2067d3fecca9Ssthen udb_base_unlink_ptr(udb, ptr);
2068d3fecca9Ssthen ptr->data = newval;
2069d3fecca9Ssthen if(newval)
2070d3fecca9Ssthen udb_base_link_ptr(udb, ptr);
2071d3fecca9Ssthen }
2072d3fecca9Ssthen
2073d3fecca9Ssthen int udb_ptr_alloc_space(udb_ptr* ptr, udb_base* udb, udb_chunk_type type,
2074d3fecca9Ssthen size_t sz)
2075d3fecca9Ssthen {
2076d3fecca9Ssthen udb_void r;
2077d3fecca9Ssthen r = udb_alloc_space(udb->alloc, sz);
2078d3fecca9Ssthen if(!r) return 0;
2079d3fecca9Ssthen udb_alloc_set_type(udb->alloc, r, type);
2080d3fecca9Ssthen udb_ptr_init(ptr, udb);
2081d3fecca9Ssthen udb_ptr_set(ptr, udb, r);
2082d3fecca9Ssthen return 1;
2083d3fecca9Ssthen }
2084d3fecca9Ssthen
2085d3fecca9Ssthen void udb_ptr_free_space(udb_ptr* ptr, udb_base* udb, size_t sz)
2086d3fecca9Ssthen {
2087d3fecca9Ssthen if(ptr->data) {
2088d3fecca9Ssthen udb_void d = ptr->data;
2089d3fecca9Ssthen udb_ptr_set(ptr, udb, 0);
2090d3fecca9Ssthen udb_alloc_free(udb->alloc, d, sz);
2091d3fecca9Ssthen }
2092d3fecca9Ssthen }
2093d3fecca9Ssthen
2094d3fecca9Ssthen udb_chunk_type udb_ptr_get_type(udb_ptr* ptr)
2095d3fecca9Ssthen {
2096d3fecca9Ssthen udb_void f;
2097d3fecca9Ssthen if(!ptr || ptr->data == 0) return udb_chunk_type_internal; /* something bad*/
2098d3fecca9Ssthen f = chunk_from_dataptr(ptr->data);
2099d3fecca9Ssthen return ((udb_chunk_d*)UDB_REL(*ptr->base, f))->type;
2100d3fecca9Ssthen }
2101