1d83a80eeSchristos /* udb.c - u(micro) data base.
2d83a80eeSchristos * By W.C.A. Wijngaards
3d83a80eeSchristos * Copyright 2010, NLnet Labs.
4d83a80eeSchristos * BSD, see LICENSE.
5d83a80eeSchristos */
6d83a80eeSchristos #include "config.h"
7d83a80eeSchristos #include "udb.h"
8d83a80eeSchristos #include <string.h>
9d83a80eeSchristos #include <errno.h>
10d83a80eeSchristos #include <stdio.h>
11d83a80eeSchristos #include <unistd.h>
12d83a80eeSchristos #include <assert.h>
13d83a80eeSchristos #include "lookup3.h"
14d83a80eeSchristos #include "util.h"
15d83a80eeSchristos
16d83a80eeSchristos /* mmap and friends */
17d83a80eeSchristos #include <sys/types.h>
18d83a80eeSchristos #include <sys/stat.h>
19d83a80eeSchristos #include <fcntl.h>
20d83a80eeSchristos #include <sys/mman.h>
21d83a80eeSchristos
22d83a80eeSchristos /* for systems without, portable definition, failed-1 and async is a flag */
23d83a80eeSchristos #ifndef MAP_FAILED
24d83a80eeSchristos #define MAP_FAILED ((void*)-1)
25d83a80eeSchristos #endif
26d83a80eeSchristos #ifndef MS_SYNC
27d83a80eeSchristos #define MS_SYNC 0
28d83a80eeSchristos #endif
29d83a80eeSchristos
30d83a80eeSchristos /** move and fixup xl segment */
31d83a80eeSchristos static void move_xl_segment(void* base, udb_base* udb, udb_void xl,
32d83a80eeSchristos udb_void n, uint64_t sz, uint64_t startseg);
33d83a80eeSchristos /** attempt to compact the data and move free space to the end */
34d83a80eeSchristos static int udb_alloc_compact(void* base, udb_alloc* alloc);
35d83a80eeSchristos
36d83a80eeSchristos /** convert pointer to the data part to a pointer to the base of the chunk */
37d83a80eeSchristos static udb_void
chunk_from_dataptr(udb_void data)38d83a80eeSchristos chunk_from_dataptr(udb_void data)
39d83a80eeSchristos {
40d83a80eeSchristos /* we use that sizeof(udb_chunk_d) != sizeof(udb_xl_chunk_d) and
41d83a80eeSchristos * that xl_chunk_d is aligned on x**1024 boundaries. */
42d83a80eeSchristos udb_void xl = data - sizeof(udb_xl_chunk_d);
43d83a80eeSchristos if( (xl & (UDB_ALLOC_CHUNK_SIZE-1)) == 0)
44d83a80eeSchristos return xl;
45d83a80eeSchristos return data - sizeof(udb_chunk_d);
46d83a80eeSchristos }
47d83a80eeSchristos
chunk_from_dataptr_ext(udb_void data)48d83a80eeSchristos udb_void chunk_from_dataptr_ext(udb_void data) {
49d83a80eeSchristos return chunk_from_dataptr(data);
50d83a80eeSchristos }
51d83a80eeSchristos
52d83a80eeSchristos #ifndef NDEBUG
53d83a80eeSchristos /** read last octet from a chunk */
54d83a80eeSchristos static uint8_t
chunk_get_last(void * base,udb_void chunk,int exp)55d83a80eeSchristos chunk_get_last(void* base, udb_void chunk, int exp)
56d83a80eeSchristos {
57d83a80eeSchristos return *((uint8_t*)UDB_REL(base, chunk+(1<<exp)-1));
58d83a80eeSchristos }
59d83a80eeSchristos #endif
60d83a80eeSchristos
61d83a80eeSchristos /** write last octet of a chunk */
62d83a80eeSchristos static void
chunk_set_last(void * base,udb_void chunk,int exp,uint8_t value)63d83a80eeSchristos chunk_set_last(void* base, udb_void chunk, int exp, uint8_t value)
64d83a80eeSchristos {
65da4c7d9dSchristos assert(exp >= 0 && exp <= 63);
66da4c7d9dSchristos *((uint8_t*)UDB_REL(base, chunk+((uint64_t)1<<exp)-1)) = value;
67d83a80eeSchristos }
68d83a80eeSchristos
69d83a80eeSchristos /** create udb_base from a file descriptor (must be at start of file) */
70d83a80eeSchristos udb_base*
udb_base_create_fd(const char * fname,int fd,udb_walk_relptr_func walkfunc,void * arg)71d83a80eeSchristos udb_base_create_fd(const char* fname, int fd, udb_walk_relptr_func walkfunc,
72d83a80eeSchristos void* arg)
73d83a80eeSchristos {
74d83a80eeSchristos uint64_t m, fsz;
75d83a80eeSchristos udb_glob_d g;
76d83a80eeSchristos ssize_t r;
77d83a80eeSchristos udb_base* udb = (udb_base*)xalloc_zero(sizeof(*udb));
78d83a80eeSchristos if(!udb) {
79d83a80eeSchristos log_msg(LOG_ERR, "out of memory");
80d83a80eeSchristos close(fd);
81d83a80eeSchristos return NULL;
82d83a80eeSchristos }
83d83a80eeSchristos udb->fname = strdup(fname);
84d83a80eeSchristos if(!udb->fname) {
85d83a80eeSchristos log_msg(LOG_ERR, "out of memory");
86d83a80eeSchristos free(udb);
87d83a80eeSchristos close(fd);
88d83a80eeSchristos return NULL;
89d83a80eeSchristos }
90d83a80eeSchristos udb->walkfunc = walkfunc;
91d83a80eeSchristos udb->walkarg = arg;
92d83a80eeSchristos udb->fd = fd;
93d83a80eeSchristos udb->ram_size = 1024;
94d83a80eeSchristos udb->ram_mask = (int)udb->ram_size - 1;
95d83a80eeSchristos udb->ram_hash = (udb_ptr**)xalloc_array_zero(sizeof(udb_ptr*),
96d83a80eeSchristos udb->ram_size);
97d83a80eeSchristos if(!udb->ram_hash) {
98d83a80eeSchristos free(udb->fname);
99d83a80eeSchristos free(udb);
100d83a80eeSchristos log_msg(LOG_ERR, "out of memory");
101d83a80eeSchristos close(fd);
102d83a80eeSchristos return NULL;
103d83a80eeSchristos }
104d83a80eeSchristos
105d83a80eeSchristos /* read magic */
106d83a80eeSchristos if((r=read(fd, &m, sizeof(m))) == -1) {
107d83a80eeSchristos log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
108d83a80eeSchristos goto fail;
109d83a80eeSchristos } else if(r != (ssize_t)sizeof(m)) {
110d83a80eeSchristos log_msg(LOG_ERR, "%s: file too short", fname);
111d83a80eeSchristos goto fail;
112d83a80eeSchristos }
113d83a80eeSchristos /* TODO : what if bigendian and littleendian file, see magic */
114d83a80eeSchristos if(m != UDB_MAGIC) {
115d83a80eeSchristos log_msg(LOG_ERR, "%s: wrong type of file", fname);
116d83a80eeSchristos goto fail;
117d83a80eeSchristos }
118d83a80eeSchristos /* read header */
119d83a80eeSchristos if((r=read(fd, &g, sizeof(g))) == -1) {
120d83a80eeSchristos log_msg(LOG_ERR, "%s: %s\n", fname, strerror(errno));
121d83a80eeSchristos goto fail;
122d83a80eeSchristos } else if(r != (ssize_t)sizeof(g)) {
123d83a80eeSchristos log_msg(LOG_ERR, "%s: file too short", fname);
124d83a80eeSchristos goto fail;
125d83a80eeSchristos }
126d83a80eeSchristos if(g.version != 0) {
127d83a80eeSchristos log_msg(LOG_ERR, "%s: unknown file version %d", fname,
128d83a80eeSchristos (int)g.version);
129d83a80eeSchristos goto fail;
130d83a80eeSchristos }
131d83a80eeSchristos if(g.hsize < UDB_HEADER_SIZE) {
132d83a80eeSchristos log_msg(LOG_ERR, "%s: header size too small %d", fname,
133d83a80eeSchristos (int)g.hsize);
134d83a80eeSchristos goto fail;
135d83a80eeSchristos }
136d83a80eeSchristos if(g.hsize > UDB_HEADER_SIZE) {
137d83a80eeSchristos log_msg(LOG_WARNING, "%s: header size too large %d", fname,
138d83a80eeSchristos (int)g.hsize);
139d83a80eeSchristos goto fail;
140d83a80eeSchristos }
141d83a80eeSchristos if(g.clean_close != 1) {
142d83a80eeSchristos log_msg(LOG_WARNING, "%s: not cleanly closed %d", fname,
143d83a80eeSchristos (int)g.clean_close);
144d83a80eeSchristos goto fail;
145d83a80eeSchristos }
146d83a80eeSchristos if(g.dirty_alloc != 0) {
147d83a80eeSchristos log_msg(LOG_WARNING, "%s: not cleanly closed (alloc:%d)", fname,
148d83a80eeSchristos (int)g.dirty_alloc);
149d83a80eeSchristos goto fail;
150d83a80eeSchristos }
151d83a80eeSchristos
152d83a80eeSchristos /* check file size correctly written, for 4.0.2 nsd.db failure */
153d83a80eeSchristos fsz = (uint64_t)lseek(fd, (off_t)0, SEEK_END);
154d83a80eeSchristos (void)lseek(fd, (off_t)0, SEEK_SET);
155d83a80eeSchristos if(g.fsize != fsz) {
156d83a80eeSchristos log_msg(LOG_WARNING, "%s: file size %llu but mmap header "
157d83a80eeSchristos "has size %llu", fname, (unsigned long long)fsz,
158d83a80eeSchristos (unsigned long long)g.fsize);
159d83a80eeSchristos goto fail;
160d83a80eeSchristos }
161d83a80eeSchristos
162d83a80eeSchristos /* mmap it */
163d83a80eeSchristos if(g.fsize < UDB_HEADER_SIZE || g.fsize < g.hsize) {
164d83a80eeSchristos log_msg(LOG_ERR, "%s: file too short", fname);
165d83a80eeSchristos goto fail;
166d83a80eeSchristos }
167d83a80eeSchristos if(g.fsize > (uint64_t)400*1024*1024*1024*1024) /* 400 Tb */ {
168d83a80eeSchristos log_msg(LOG_WARNING, "%s: file size too large %llu",
169d83a80eeSchristos fname, (unsigned long long)g.fsize);
170d83a80eeSchristos goto fail;
171d83a80eeSchristos }
172d83a80eeSchristos udb->base_size = (size_t)g.fsize;
173d83a80eeSchristos #ifdef HAVE_MMAP
174d83a80eeSchristos /* note the size_t casts must be there for portability, on some
175d83a80eeSchristos * systems the layout of memory is otherwise broken. */
176d83a80eeSchristos udb->base = mmap(NULL, (size_t)udb->base_size,
177d83a80eeSchristos (int)PROT_READ|PROT_WRITE, (int)MAP_SHARED,
178d83a80eeSchristos (int)udb->fd, (off_t)0);
179d83a80eeSchristos #else
180d83a80eeSchristos udb->base = MAP_FAILED; errno = ENOSYS;
181d83a80eeSchristos #endif
182d83a80eeSchristos if(udb->base == MAP_FAILED) {
183d83a80eeSchristos udb->base = NULL;
184d83a80eeSchristos log_msg(LOG_ERR, "mmap(size %u) error: %s",
185d83a80eeSchristos (unsigned)udb->base_size, strerror(errno));
186d83a80eeSchristos fail:
187d83a80eeSchristos close(fd);
188d83a80eeSchristos free(udb->fname);
189d83a80eeSchristos free(udb->ram_hash);
190d83a80eeSchristos free(udb);
191d83a80eeSchristos return NULL;
192d83a80eeSchristos }
193d83a80eeSchristos
194d83a80eeSchristos /* init completion */
195f3d63a56Schristos udb->glob_data = (udb_glob_d*)((char*)udb->base+sizeof(uint64_t));
196d83a80eeSchristos r = 0;
197d83a80eeSchristos /* cannot be dirty because that is goto fail above */
198d83a80eeSchristos if(udb->glob_data->dirty_alloc != udb_dirty_clean)
199d83a80eeSchristos r = 1;
200d83a80eeSchristos udb->alloc = udb_alloc_create(udb, (udb_alloc_d*)(
201f3d63a56Schristos (char*)udb->glob_data+sizeof(*udb->glob_data)));
202d83a80eeSchristos if(!udb->alloc) {
203d83a80eeSchristos log_msg(LOG_ERR, "out of memory");
204d83a80eeSchristos udb_base_free(udb);
205d83a80eeSchristos return NULL;
206d83a80eeSchristos }
207d83a80eeSchristos if(r) {
208d83a80eeSchristos /* and compact now, or resume compacting */
209d83a80eeSchristos udb_alloc_compact(udb, udb->alloc);
210d83a80eeSchristos udb_base_sync(udb, 1);
211d83a80eeSchristos }
212d83a80eeSchristos udb->glob_data->clean_close = 0;
213d83a80eeSchristos
214d83a80eeSchristos return udb;
215d83a80eeSchristos }
216d83a80eeSchristos
udb_base_create_read(const char * fname,udb_walk_relptr_func walkfunc,void * arg)217d83a80eeSchristos udb_base* udb_base_create_read(const char* fname, udb_walk_relptr_func walkfunc,
218d83a80eeSchristos void* arg)
219d83a80eeSchristos {
220d83a80eeSchristos int fd = open(fname, O_RDWR);
221d83a80eeSchristos if(fd == -1) {
222d83a80eeSchristos log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
223d83a80eeSchristos return NULL;
224d83a80eeSchristos }
225d83a80eeSchristos return udb_base_create_fd(fname, fd, walkfunc, arg);
226d83a80eeSchristos }
227d83a80eeSchristos
228d83a80eeSchristos /** init new udb_global structure */
udb_glob_init_new(udb_glob_d * g)229d83a80eeSchristos static void udb_glob_init_new(udb_glob_d* g)
230d83a80eeSchristos {
231d83a80eeSchristos memset(g, 0, sizeof(*g));
232d83a80eeSchristos g->hsize = UDB_HEADER_SIZE;
233d83a80eeSchristos g->fsize = UDB_HEADER_SIZE;
234d83a80eeSchristos }
235d83a80eeSchristos
236d83a80eeSchristos /** write data to file and check result */
237d83a80eeSchristos static int
write_fdata(const char * fname,int fd,void * data,size_t len)238d83a80eeSchristos write_fdata(const char* fname, int fd, void* data, size_t len)
239d83a80eeSchristos {
240d83a80eeSchristos ssize_t w;
241d83a80eeSchristos if((w=write(fd, data, len)) == -1) {
242d83a80eeSchristos log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
243d83a80eeSchristos close(fd);
244d83a80eeSchristos return 0;
245d83a80eeSchristos } else if(w != (ssize_t)len) {
246d83a80eeSchristos log_msg(LOG_ERR, "%s: short write (disk full?)", fname);
247d83a80eeSchristos close(fd);
248d83a80eeSchristos return 0;
249d83a80eeSchristos }
250d83a80eeSchristos return 1;
251d83a80eeSchristos }
252d83a80eeSchristos
udb_base_create_new(const char * fname,udb_walk_relptr_func walkfunc,void * arg)253d83a80eeSchristos udb_base* udb_base_create_new(const char* fname, udb_walk_relptr_func walkfunc,
254d83a80eeSchristos void* arg)
255d83a80eeSchristos {
256d83a80eeSchristos uint64_t m;
257d83a80eeSchristos udb_glob_d g;
258d83a80eeSchristos udb_alloc_d a;
259d83a80eeSchristos uint64_t endsize = UDB_HEADER_SIZE;
260d83a80eeSchristos uint64_t endexp = 0;
261d83a80eeSchristos int fd = open(fname, O_CREAT|O_RDWR, 0600);
262d83a80eeSchristos if(fd == -1) {
263d83a80eeSchristos log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
264d83a80eeSchristos return NULL;
265d83a80eeSchristos }
266d83a80eeSchristos m = UDB_MAGIC;
267d83a80eeSchristos udb_glob_init_new(&g);
268d83a80eeSchristos udb_alloc_init_new(&a);
269d83a80eeSchristos g.clean_close = 1;
270d83a80eeSchristos
271d83a80eeSchristos /* write new data to file (closes fd on error) */
272d83a80eeSchristos if(!write_fdata(fname, fd, &m, sizeof(m)))
273d83a80eeSchristos return NULL;
274d83a80eeSchristos if(!write_fdata(fname, fd, &g, sizeof(g)))
275d83a80eeSchristos return NULL;
276d83a80eeSchristos if(!write_fdata(fname, fd, &a, sizeof(a)))
277d83a80eeSchristos return NULL;
278d83a80eeSchristos if(!write_fdata(fname, fd, &endsize, sizeof(endsize)))
279d83a80eeSchristos return NULL;
280d83a80eeSchristos if(!write_fdata(fname, fd, &endexp, sizeof(endexp)))
281d83a80eeSchristos return NULL;
282d83a80eeSchristos /* rewind to start */
283d83a80eeSchristos if(lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
284d83a80eeSchristos log_msg(LOG_ERR, "%s: lseek %s", fname, strerror(errno));
285d83a80eeSchristos close(fd);
286d83a80eeSchristos return NULL;
287d83a80eeSchristos }
288d83a80eeSchristos /* truncate to the right size */
289d83a80eeSchristos if(ftruncate(fd, (off_t)g.fsize) < 0) {
290d83a80eeSchristos log_msg(LOG_ERR, "%s: ftruncate(%d): %s", fname,
291d83a80eeSchristos (int)g.fsize, strerror(errno));
292d83a80eeSchristos close(fd);
293d83a80eeSchristos return NULL;
294d83a80eeSchristos }
295d83a80eeSchristos return udb_base_create_fd(fname, fd, walkfunc, arg);
296d83a80eeSchristos }
297d83a80eeSchristos
298d83a80eeSchristos /** shrink the udb base if it has unused space at the end */
299d83a80eeSchristos static void
udb_base_shrink(udb_base * udb,uint64_t nsize)300d83a80eeSchristos udb_base_shrink(udb_base* udb, uint64_t nsize)
301d83a80eeSchristos {
302d83a80eeSchristos udb->glob_data->dirty_alloc = udb_dirty_fsize;
303d83a80eeSchristos udb->glob_data->fsize = nsize;
304d83a80eeSchristos /* sync, does not *seem* to be required on Linux, but it is
305d83a80eeSchristos certainly required on OpenBSD. Otherwise changed data is lost. */
306d83a80eeSchristos #ifdef HAVE_MMAP
307d83a80eeSchristos msync(udb->base, udb->base_size, MS_ASYNC);
308d83a80eeSchristos #endif
309d83a80eeSchristos if(ftruncate(udb->fd, (off_t)nsize) != 0) {
310d83a80eeSchristos log_msg(LOG_ERR, "%s: ftruncate(%u) %s", udb->fname,
311d83a80eeSchristos (unsigned)nsize, strerror(errno));
312d83a80eeSchristos }
313d83a80eeSchristos udb->glob_data->dirty_alloc = udb_dirty_clean;
314d83a80eeSchristos }
315d83a80eeSchristos
udb_base_close(udb_base * udb)316d83a80eeSchristos void udb_base_close(udb_base* udb)
317d83a80eeSchristos {
318d83a80eeSchristos if(!udb)
319d83a80eeSchristos return;
320d83a80eeSchristos if(udb->fd != -1 && udb->base && udb->alloc) {
321d83a80eeSchristos uint64_t nsize = udb->alloc->disk->nextgrow;
322d83a80eeSchristos if(nsize < udb->base_size)
323d83a80eeSchristos udb_base_shrink(udb, nsize);
324d83a80eeSchristos }
325d83a80eeSchristos if(udb->fd != -1) {
326d83a80eeSchristos udb->glob_data->clean_close = 1;
327d83a80eeSchristos close(udb->fd);
328d83a80eeSchristos udb->fd = -1;
329d83a80eeSchristos }
330d83a80eeSchristos if(udb->base) {
331d83a80eeSchristos #ifdef HAVE_MMAP
332d83a80eeSchristos if(munmap(udb->base, udb->base_size) == -1) {
333d83a80eeSchristos log_msg(LOG_ERR, "munmap: %s", strerror(errno));
334d83a80eeSchristos }
335d83a80eeSchristos #endif
336d83a80eeSchristos udb->base = NULL;
337d83a80eeSchristos }
338d83a80eeSchristos }
339d83a80eeSchristos
udb_base_free(udb_base * udb)340d83a80eeSchristos void udb_base_free(udb_base* udb)
341d83a80eeSchristos {
342d83a80eeSchristos if(!udb)
343d83a80eeSchristos return;
344d83a80eeSchristos udb_base_close(udb);
345d83a80eeSchristos udb_alloc_delete(udb->alloc);
346d83a80eeSchristos free(udb->ram_hash);
347d83a80eeSchristos free(udb->fname);
348d83a80eeSchristos free(udb);
349d83a80eeSchristos }
350d83a80eeSchristos
udb_base_free_keep_mmap(udb_base * udb)351d83a80eeSchristos void udb_base_free_keep_mmap(udb_base* udb)
352d83a80eeSchristos {
353d83a80eeSchristos if(!udb) return;
354d83a80eeSchristos if(udb->fd != -1) {
355d83a80eeSchristos close(udb->fd);
356d83a80eeSchristos udb->fd = -1;
357d83a80eeSchristos }
358d83a80eeSchristos udb->base = NULL;
359d83a80eeSchristos udb_alloc_delete(udb->alloc);
360d83a80eeSchristos free(udb->ram_hash);
361d83a80eeSchristos free(udb->fname);
362d83a80eeSchristos free(udb);
363d83a80eeSchristos }
364d83a80eeSchristos
udb_base_sync(udb_base * udb,int wait)365d83a80eeSchristos void udb_base_sync(udb_base* udb, int wait)
366d83a80eeSchristos {
367d83a80eeSchristos if(!udb) return;
368d83a80eeSchristos #ifdef HAVE_MMAP
369d83a80eeSchristos if(msync(udb->base, udb->base_size, wait?MS_SYNC:MS_ASYNC) != 0) {
370d83a80eeSchristos log_msg(LOG_ERR, "msync(%s) error %s",
371d83a80eeSchristos udb->fname, strerror(errno));
372d83a80eeSchristos }
373d83a80eeSchristos #else
374d83a80eeSchristos (void)wait;
375d83a80eeSchristos #endif
376d83a80eeSchristos }
377d83a80eeSchristos
378d83a80eeSchristos /** hash a chunk pointer */
379d83a80eeSchristos static uint32_t
chunk_hash_ptr(udb_void p)380d83a80eeSchristos chunk_hash_ptr(udb_void p)
381d83a80eeSchristos {
382d83a80eeSchristos /* put p into an array of uint32 */
383d83a80eeSchristos uint32_t h[sizeof(p)/sizeof(uint32_t)];
384d83a80eeSchristos memcpy(&h, &p, sizeof(h));
385d83a80eeSchristos return hashword(h, sizeof(p)/sizeof(uint32_t), 0x8763);
386d83a80eeSchristos }
387d83a80eeSchristos
388d83a80eeSchristos /** check that the given pointer is on the bucket for the given offset */
udb_ptr_is_on_bucket(udb_base * udb,udb_ptr * ptr,udb_void to)389d83a80eeSchristos int udb_ptr_is_on_bucket(udb_base* udb, udb_ptr* ptr, udb_void to)
390d83a80eeSchristos {
391d83a80eeSchristos uint32_t i = chunk_hash_ptr(to) & udb->ram_mask;
392d83a80eeSchristos udb_ptr* p;
393d83a80eeSchristos assert((size_t)i < udb->ram_size);
394d83a80eeSchristos for(p = udb->ram_hash[i]; p; p=p->next) {
395d83a80eeSchristos if(p == ptr)
396d83a80eeSchristos return 1;
397d83a80eeSchristos }
398d83a80eeSchristos return 0;
399d83a80eeSchristos }
400d83a80eeSchristos
401d83a80eeSchristos /** grow the ram array */
402d83a80eeSchristos static void
grow_ram_hash(udb_base * udb,udb_ptr ** newhash)403d83a80eeSchristos grow_ram_hash(udb_base* udb, udb_ptr** newhash)
404d83a80eeSchristos {
405d83a80eeSchristos size_t i;
406d83a80eeSchristos size_t osize= udb->ram_size;
407d83a80eeSchristos udb_ptr* p, *np;
408d83a80eeSchristos udb_ptr** oldhash = udb->ram_hash;
409d83a80eeSchristos udb->ram_size *= 2;
410d83a80eeSchristos udb->ram_mask <<= 1;
411d83a80eeSchristos udb->ram_mask |= 1;
412d83a80eeSchristos udb->ram_hash = newhash;
413d83a80eeSchristos /* have to link in every element in the old list into the new list*/
414d83a80eeSchristos for(i=0; i<osize; i++) {
415d83a80eeSchristos p = oldhash[i];
416d83a80eeSchristos while(p) {
417d83a80eeSchristos np = p->next;
418d83a80eeSchristos /* link into newhash */
419d83a80eeSchristos p->prev=NULL;
420d83a80eeSchristos p->next=newhash[chunk_hash_ptr(p->data)&udb->ram_mask];
421d83a80eeSchristos if(p->next) p->next->prev = p;
422d83a80eeSchristos /* go to next element of oldhash */
423d83a80eeSchristos p = np;
424d83a80eeSchristos }
425d83a80eeSchristos }
426d83a80eeSchristos free(oldhash);
427d83a80eeSchristos }
428d83a80eeSchristos
udb_base_link_ptr(udb_base * udb,udb_ptr * ptr)429d83a80eeSchristos void udb_base_link_ptr(udb_base* udb, udb_ptr* ptr)
430d83a80eeSchristos {
4313fb62404Schristos uint32_t i;
432d83a80eeSchristos #ifdef UDB_CHECK
433d83a80eeSchristos assert(udb_valid_dataptr(udb, ptr->data)); /* must be to whole chunk*/
434d83a80eeSchristos #endif
435d83a80eeSchristos udb->ram_num++;
436d83a80eeSchristos if(udb->ram_num == udb->ram_size && udb->ram_size<(size_t)0x7fffffff) {
437d83a80eeSchristos /* grow the array, if allocation succeeds */
438d83a80eeSchristos udb_ptr** newram = (udb_ptr**)xalloc_array_zero(
439d83a80eeSchristos sizeof(udb_ptr*), udb->ram_size*2);
440d83a80eeSchristos if(newram) {
441d83a80eeSchristos grow_ram_hash(udb, newram);
442d83a80eeSchristos }
443d83a80eeSchristos }
4443fb62404Schristos i = chunk_hash_ptr(ptr->data) & udb->ram_mask;
4453fb62404Schristos assert((size_t)i < udb->ram_size);
4463fb62404Schristos
447d83a80eeSchristos ptr->prev = NULL;
448d83a80eeSchristos ptr->next = udb->ram_hash[i];
449d83a80eeSchristos udb->ram_hash[i] = ptr;
450d83a80eeSchristos if(ptr->next)
451d83a80eeSchristos ptr->next->prev = ptr;
452d83a80eeSchristos }
453d83a80eeSchristos
udb_base_unlink_ptr(udb_base * udb,udb_ptr * ptr)454d83a80eeSchristos void udb_base_unlink_ptr(udb_base* udb, udb_ptr* ptr)
455d83a80eeSchristos {
456d83a80eeSchristos assert(ptr->data);
457d83a80eeSchristos #ifdef UDB_CHECK
458d83a80eeSchristos assert(udb_valid_dataptr(udb, ptr->data)); /* ptr must be inited */
459d83a80eeSchristos assert(udb_ptr_is_on_bucket(udb, ptr, ptr->data));
460d83a80eeSchristos #endif
461d83a80eeSchristos udb->ram_num--;
462d83a80eeSchristos if(ptr->next)
463d83a80eeSchristos ptr->next->prev = ptr->prev;
464d83a80eeSchristos if(ptr->prev)
465d83a80eeSchristos ptr->prev->next = ptr->next;
466d83a80eeSchristos else {
467d83a80eeSchristos uint32_t i = chunk_hash_ptr(ptr->data) & udb->ram_mask;
468d83a80eeSchristos assert((size_t)i < udb->ram_size);
469d83a80eeSchristos udb->ram_hash[i] = ptr->next;
470d83a80eeSchristos }
471d83a80eeSchristos }
472d83a80eeSchristos
473d83a80eeSchristos /** change a set of ram ptrs to a new value */
474d83a80eeSchristos static void
udb_base_ram_ptr_edit(udb_base * udb,udb_void old,udb_void newd)475d83a80eeSchristos udb_base_ram_ptr_edit(udb_base* udb, udb_void old, udb_void newd)
476d83a80eeSchristos {
477d83a80eeSchristos uint32_t io = chunk_hash_ptr(old) & udb->ram_mask;
478d83a80eeSchristos udb_ptr* p, *np;
479d83a80eeSchristos /* edit them and move them into the new position */
480d83a80eeSchristos p = udb->ram_hash[io];
481d83a80eeSchristos while(p) {
482d83a80eeSchristos np = p->next;
483d83a80eeSchristos if(p->data == old) {
484d83a80eeSchristos udb_base_unlink_ptr(udb, p);
485d83a80eeSchristos p->data = newd;
486d83a80eeSchristos udb_base_link_ptr(udb, p);
487d83a80eeSchristos }
488d83a80eeSchristos p = np;
489d83a80eeSchristos }
490d83a80eeSchristos }
491d83a80eeSchristos
udb_base_get_userdata(udb_base * udb)492d83a80eeSchristos udb_rel_ptr* udb_base_get_userdata(udb_base* udb)
493d83a80eeSchristos {
494d83a80eeSchristos return &udb->glob_data->user_global;
495d83a80eeSchristos }
496d83a80eeSchristos
udb_base_set_userdata(udb_base * udb,udb_void user)497d83a80eeSchristos void udb_base_set_userdata(udb_base* udb, udb_void user)
498d83a80eeSchristos {
499d83a80eeSchristos #ifdef UDB_CHECK
500d83a80eeSchristos if(user) { assert(udb_valid_dataptr(udb, user)); }
501d83a80eeSchristos #endif
502d83a80eeSchristos udb_rel_ptr_set(udb->base, &udb->glob_data->user_global, user);
503d83a80eeSchristos }
504d83a80eeSchristos
udb_base_set_userflags(udb_base * udb,uint8_t v)505d83a80eeSchristos void udb_base_set_userflags(udb_base* udb, uint8_t v)
506d83a80eeSchristos {
507d83a80eeSchristos udb->glob_data->userflags = v;
508d83a80eeSchristos }
509d83a80eeSchristos
udb_base_get_userflags(udb_base * udb)510d83a80eeSchristos uint8_t udb_base_get_userflags(udb_base* udb)
511d83a80eeSchristos {
512d83a80eeSchristos return udb->glob_data->userflags;
513d83a80eeSchristos }
514d83a80eeSchristos
515d83a80eeSchristos /** re-mmap the udb to specified size */
516d83a80eeSchristos static void*
udb_base_remap(udb_base * udb,udb_alloc * alloc,uint64_t nsize)517d83a80eeSchristos udb_base_remap(udb_base* udb, udb_alloc* alloc, uint64_t nsize)
518d83a80eeSchristos {
519d83a80eeSchristos #ifdef HAVE_MMAP
520d83a80eeSchristos void* nb;
521d83a80eeSchristos /* for use with valgrind, do not use mremap, but the other version */
522d83a80eeSchristos #ifdef MREMAP_MAYMOVE
523d83a80eeSchristos nb = mremap(udb->base, udb->base_size, nsize, MREMAP_MAYMOVE);
524d83a80eeSchristos if(nb == MAP_FAILED) {
525d83a80eeSchristos log_msg(LOG_ERR, "mremap(%s, size %u) error %s",
526d83a80eeSchristos udb->fname, (unsigned)nsize, strerror(errno));
527d83a80eeSchristos return 0;
528d83a80eeSchristos }
529d83a80eeSchristos #else /* !HAVE MREMAP */
530d83a80eeSchristos /* use munmap-mmap to simulate mremap */
531d83a80eeSchristos if(munmap(udb->base, udb->base_size) != 0) {
532d83a80eeSchristos log_msg(LOG_ERR, "munmap(%s) error %s",
533d83a80eeSchristos udb->fname, strerror(errno));
534d83a80eeSchristos }
535d83a80eeSchristos /* provide hint for new location */
536d83a80eeSchristos /* note the size_t casts must be there for portability, on some
537d83a80eeSchristos * systems the layout of memory is otherwise broken. */
538d83a80eeSchristos nb = mmap(udb->base, (size_t)nsize, (int)PROT_READ|PROT_WRITE,
539d83a80eeSchristos (int)MAP_SHARED, (int)udb->fd, (off_t)0);
540d83a80eeSchristos /* retry the mmap without basept in case of ENOMEM (FreeBSD8),
541d83a80eeSchristos * the kernel can then try to mmap it at a different location
542d83a80eeSchristos * where more memory is available */
543d83a80eeSchristos if(nb == MAP_FAILED && errno == ENOMEM) {
544d83a80eeSchristos nb = mmap(NULL, (size_t)nsize, (int)PROT_READ|PROT_WRITE,
545d83a80eeSchristos (int)MAP_SHARED, (int)udb->fd, (off_t)0);
546d83a80eeSchristos }
547d83a80eeSchristos if(nb == MAP_FAILED) {
548d83a80eeSchristos log_msg(LOG_ERR, "mmap(%s, size %u) error %s",
549d83a80eeSchristos udb->fname, (unsigned)nsize, strerror(errno));
550d83a80eeSchristos udb->base = NULL;
551d83a80eeSchristos return 0;
552d83a80eeSchristos }
553d83a80eeSchristos #endif /* HAVE MREMAP */
554d83a80eeSchristos if(nb != udb->base) {
555d83a80eeSchristos /* fix up realpointers in udb and alloc */
556d83a80eeSchristos /* but mremap may have been nice and not move the base */
557d83a80eeSchristos udb->base = nb;
558f3d63a56Schristos udb->glob_data = (udb_glob_d*)((char*)nb+sizeof(uint64_t));
559d83a80eeSchristos /* use passed alloc pointer because the udb->alloc may not
560d83a80eeSchristos * be initialized yet */
561f3d63a56Schristos alloc->disk = (udb_alloc_d*)((char*)udb->glob_data
562d83a80eeSchristos +sizeof(*udb->glob_data));
563d83a80eeSchristos }
564d83a80eeSchristos udb->base_size = nsize;
565d83a80eeSchristos return nb;
566d83a80eeSchristos #else /* HAVE_MMAP */
567d83a80eeSchristos (void)udb; (void)alloc; (void)nsize;
568d83a80eeSchristos return NULL;
569d83a80eeSchristos #endif /* HAVE_MMAP */
570d83a80eeSchristos }
571d83a80eeSchristos
572d83a80eeSchristos void
udb_base_remap_process(udb_base * udb)573d83a80eeSchristos udb_base_remap_process(udb_base* udb)
574d83a80eeSchristos {
575d83a80eeSchristos /* assume that fsize is still accessible */
576d83a80eeSchristos udb_base_remap(udb, udb->alloc, udb->glob_data->fsize);
577d83a80eeSchristos }
578d83a80eeSchristos
579d83a80eeSchristos /** grow file to specified size and re-mmap, return new base */
580d83a80eeSchristos static void*
udb_base_grow_and_remap(udb_base * udb,uint64_t nsize)581d83a80eeSchristos udb_base_grow_and_remap(udb_base* udb, uint64_t nsize)
582d83a80eeSchristos {
583d83a80eeSchristos /* grow file by writing a single zero at that spot, the
584d83a80eeSchristos * rest is filled in with zeroes. */
585d83a80eeSchristos uint8_t z = 0;
586d83a80eeSchristos ssize_t w;
587d83a80eeSchristos
588d83a80eeSchristos assert(nsize > 0);
589d83a80eeSchristos udb->glob_data->dirty_alloc = udb_dirty_fsize;
590d83a80eeSchristos #ifdef HAVE_PWRITE
591d83a80eeSchristos if((w=pwrite(udb->fd, &z, sizeof(z), (off_t)(nsize-1))) == -1) {
592d83a80eeSchristos #else
593d83a80eeSchristos if(lseek(udb->fd, (off_t)(nsize-1), SEEK_SET) == -1) {
594d83a80eeSchristos log_msg(LOG_ERR, "fseek %s: %s", udb->fname, strerror(errno));
595d83a80eeSchristos return 0;
596d83a80eeSchristos }
597d83a80eeSchristos if((w=write(udb->fd, &z, sizeof(z))) == -1) {
598d83a80eeSchristos #endif
599d83a80eeSchristos log_msg(LOG_ERR, "grow(%s, size %u) error %s",
600d83a80eeSchristos udb->fname, (unsigned)nsize, strerror(errno));
601d83a80eeSchristos return 0;
602d83a80eeSchristos } else if(w != (ssize_t)sizeof(z)) {
603d83a80eeSchristos log_msg(LOG_ERR, "grow(%s, size %u) failed (disk full?)",
604d83a80eeSchristos udb->fname, (unsigned)nsize);
605d83a80eeSchristos return 0;
606d83a80eeSchristos }
607d83a80eeSchristos udb->glob_data->fsize = nsize;
608d83a80eeSchristos udb->glob_data->dirty_alloc = udb_dirty_clean;
609d83a80eeSchristos return udb_base_remap(udb, udb->alloc, nsize);
610d83a80eeSchristos }
611d83a80eeSchristos
612d83a80eeSchristos int udb_exp_size(uint64_t a)
613d83a80eeSchristos {
614d83a80eeSchristos /* find enclosing value such that 2**x >= a */
615d83a80eeSchristos int x = 0;
616d83a80eeSchristos uint64_t i = a;
617d83a80eeSchristos assert(a != 0);
618d83a80eeSchristos
619d83a80eeSchristos i --;
620d83a80eeSchristos /* could optimise this with uint8* access, depends on endianness */
621d83a80eeSchristos /* first whole bytes */
622d83a80eeSchristos while( (i&(~(uint64_t)0xff)) ) {
623d83a80eeSchristos i >>= 8;
624d83a80eeSchristos x += 8;
625d83a80eeSchristos }
626d83a80eeSchristos /* now details */
627d83a80eeSchristos while(i) {
628d83a80eeSchristos i >>= 1;
629d83a80eeSchristos x ++;
630d83a80eeSchristos }
631da4c7d9dSchristos assert( x>=0 && x<=63);
632d83a80eeSchristos assert( ((uint64_t)1<<x) >= a);
633f3d63a56Schristos assert( x==0 || /* <<x-1 without negative number analyzer complaints: */ (((uint64_t)1<<x)>>1) < a);
634d83a80eeSchristos return x;
635d83a80eeSchristos }
636d83a80eeSchristos
637d83a80eeSchristos int udb_exp_offset(uint64_t o)
638d83a80eeSchristos {
639d83a80eeSchristos /* this means measuring the number of 0 bits on the right */
640d83a80eeSchristos /* so, if exp zero bits then (o&(2**x-1))==0 */
641d83a80eeSchristos int x = 0;
642d83a80eeSchristos uint64_t i = o;
643d83a80eeSchristos assert(o != 0);
644d83a80eeSchristos /* first whole bytes */
645d83a80eeSchristos while( (i&(uint64_t)0xff) == 0) {
646d83a80eeSchristos i >>= 8;
647d83a80eeSchristos x += 8;
648d83a80eeSchristos }
649d83a80eeSchristos /* now details */
650d83a80eeSchristos while( (i&(uint64_t)0x1) == 0) {
651d83a80eeSchristos i >>= 1;
652d83a80eeSchristos x ++;
653d83a80eeSchristos }
654d83a80eeSchristos assert( o % ((uint64_t)1<<x) == 0);
655d83a80eeSchristos assert( o % ((uint64_t)1<<(x+1)) != 0);
656d83a80eeSchristos return x;
657d83a80eeSchristos }
658d83a80eeSchristos
659d83a80eeSchristos void udb_alloc_init_new(udb_alloc_d* a)
660d83a80eeSchristos {
661d83a80eeSchristos assert(UDB_HEADER_SIZE % UDB_ALLOC_CHUNK_MINSIZE == 0);
662d83a80eeSchristos memset(a, 0, sizeof(*a));
663d83a80eeSchristos /* set new allocations after header, as if allocated in a sequence
664d83a80eeSchristos * of minsize allocations */
665d83a80eeSchristos a->nextgrow = UDB_HEADER_SIZE;
666d83a80eeSchristos }
667d83a80eeSchristos
668d83a80eeSchristos /** fsck the file size, false if failed and file is useless */
669d83a80eeSchristos static int
670d83a80eeSchristos fsck_fsize(udb_base* udb, udb_alloc* alloc)
671d83a80eeSchristos {
672d83a80eeSchristos off_t realsize;
673d83a80eeSchristos log_msg(LOG_WARNING, "udb-fsck %s: file size wrong", udb->fname);
674d83a80eeSchristos realsize = lseek(udb->fd, (off_t)0, SEEK_END);
675d83a80eeSchristos if(realsize == (off_t)-1) {
676d83a80eeSchristos log_msg(LOG_ERR, "lseek(%s): %s", udb->fname, strerror(errno));
677d83a80eeSchristos return 0;
678d83a80eeSchristos }
679d83a80eeSchristos udb->glob_data->fsize = (uint64_t)realsize;
680d83a80eeSchristos if(!udb_base_remap(udb, alloc, (uint64_t)realsize))
681d83a80eeSchristos return 0;
682d83a80eeSchristos udb->glob_data->dirty_alloc = udb_dirty_clean;
683d83a80eeSchristos log_msg(LOG_WARNING, "udb-fsck %s: file size fixed (sync)", udb->fname);
684d83a80eeSchristos udb_base_sync(udb, 1);
685d83a80eeSchristos return 1;
686d83a80eeSchristos }
687d83a80eeSchristos
688d83a80eeSchristos /** regenerate freelist add a new free chunk, return next todo */
689d83a80eeSchristos static udb_void
690d83a80eeSchristos regen_free(void* base, udb_void c, int exp, udb_alloc_d* regen)
691d83a80eeSchristos {
692d83a80eeSchristos udb_free_chunk_d* cp = UDB_FREE_CHUNK(c);
693d83a80eeSchristos uint64_t esz = (uint64_t)1<<exp;
694d83a80eeSchristos if(exp < UDB_ALLOC_CHUNK_MINEXP || exp > UDB_ALLOC_CHUNKS_MAX) {
695d83a80eeSchristos return 0;
696d83a80eeSchristos }
697d83a80eeSchristos cp->type = udb_chunk_type_free;
698d83a80eeSchristos cp->flags = 0;
699d83a80eeSchristos chunk_set_last(base, c, exp, (uint8_t)exp);
700d83a80eeSchristos cp->prev = 0;
701d83a80eeSchristos cp->next = regen->free[exp-UDB_ALLOC_CHUNK_MINEXP];
702d83a80eeSchristos if(cp->next)
703d83a80eeSchristos UDB_FREE_CHUNK(cp->next)->prev = c;
704d83a80eeSchristos regen->stat_free += esz;
705d83a80eeSchristos return c + esz;
706d83a80eeSchristos }
707d83a80eeSchristos
708d83a80eeSchristos /** regenerate xl chunk, return next todo */
709d83a80eeSchristos static udb_void
710d83a80eeSchristos regen_xl(void* base, udb_void c, udb_alloc_d* regen)
711d83a80eeSchristos {
712d83a80eeSchristos udb_xl_chunk_d* cp = UDB_XL_CHUNK(c);
713d83a80eeSchristos uint64_t xlsz = cp->size;
714d83a80eeSchristos if( (xlsz&(UDB_ALLOC_CHUNK_SIZE-1)) != 0) {
715d83a80eeSchristos return 0;
716d83a80eeSchristos }
717d83a80eeSchristos if( (c&(UDB_ALLOC_CHUNK_SIZE-1)) != 0) {
718d83a80eeSchristos return 0;
719d83a80eeSchristos }
720d83a80eeSchristos /* fixup end-size and end-expmarker */
721d83a80eeSchristos regen->stat_alloc += xlsz;
722d83a80eeSchristos return c + xlsz;
723d83a80eeSchristos }
724d83a80eeSchristos
725d83a80eeSchristos /** regenerate data chunk, return next todo */
726d83a80eeSchristos static udb_void
727d83a80eeSchristos regen_data(void* base, udb_void c, int exp, udb_alloc_d* regen)
728d83a80eeSchristos {
729d83a80eeSchristos uint64_t esz = (uint64_t)1<<exp;
730d83a80eeSchristos if(exp < UDB_ALLOC_CHUNK_MINEXP || exp > UDB_ALLOC_CHUNKS_MAX) {
731d83a80eeSchristos return 0;
732d83a80eeSchristos }
733d83a80eeSchristos chunk_set_last(base, c, exp, (uint8_t)exp);
734d83a80eeSchristos regen->stat_alloc += esz;
735d83a80eeSchristos return c + esz;
736d83a80eeSchristos }
737d83a80eeSchristos
738d83a80eeSchristos /** regenerate a relptr structure inside a data segment */
739d83a80eeSchristos static void
740d83a80eeSchristos regen_relptr_func(void* base, udb_rel_ptr* rp, void* arg)
741d83a80eeSchristos {
742d83a80eeSchristos udb_void* a = (udb_void*)arg;
743d83a80eeSchristos /* ignore 0 pointers */
744d83a80eeSchristos if(!rp->data)
745d83a80eeSchristos return;
746d83a80eeSchristos
747d83a80eeSchristos /* edit relptrs that point to oldmoved to point to newmoved. */
748d83a80eeSchristos if(rp->data == a[0])
749d83a80eeSchristos rp->data = a[1];
750d83a80eeSchristos
751d83a80eeSchristos /* regenerate relptr lists, add this item to the relptr list for
752d83a80eeSchristos * the data that it points to */
753d83a80eeSchristos udb_rel_ptr_link(base, rp, rp->data);
754d83a80eeSchristos }
755d83a80eeSchristos
756d83a80eeSchristos /** regenerate the relptrs store in this data segment */
757d83a80eeSchristos static void
758d83a80eeSchristos regen_its_ptrs(void* base, udb_base* udb, udb_chunk_d* atp,
759d83a80eeSchristos void* data, uint64_t dsz, udb_void rb_old, udb_void rb_new)
760d83a80eeSchristos {
761d83a80eeSchristos udb_void arg[2];
762d83a80eeSchristos arg[0] = rb_old; arg[1] = rb_new;
763d83a80eeSchristos /* walk through the structs here and put them on their respective
764d83a80eeSchristos * relptr lists */
765d83a80eeSchristos (*udb->walkfunc)(base, udb->walkarg, atp->type, data, dsz,
766d83a80eeSchristos ®en_relptr_func, arg);
767d83a80eeSchristos
768d83a80eeSchristos }
769d83a80eeSchristos
770d83a80eeSchristos /** regenerate relptrlists in the file */
771d83a80eeSchristos static void
772d83a80eeSchristos regen_ptrlist(void* base, udb_base* udb, udb_alloc* alloc,
773d83a80eeSchristos udb_void rb_old, udb_void rb_new)
774d83a80eeSchristos {
775d83a80eeSchristos udb_void at = alloc->udb->glob_data->hsize;
776d83a80eeSchristos /* clear all ptrlist start pointers in the file. */
777d83a80eeSchristos while(at < alloc->disk->nextgrow) {
778d83a80eeSchristos int exp = (int)UDB_CHUNK(at)->exp;
779d83a80eeSchristos udb_chunk_type tp = (udb_chunk_type)UDB_CHUNK(at)->type;
780d83a80eeSchristos if(exp == UDB_EXP_XL) {
781d83a80eeSchristos UDB_XL_CHUNK(at)->ptrlist = 0;
782d83a80eeSchristos at += UDB_XL_CHUNK(at)->size;
783d83a80eeSchristos } else if(tp == udb_chunk_type_free) {
784d83a80eeSchristos at += (uint64_t)1<<exp;
785d83a80eeSchristos } else { /* data chunk */
786d83a80eeSchristos UDB_CHUNK(at)->ptrlist = 0;
787d83a80eeSchristos at += (uint64_t)1<<exp;
788d83a80eeSchristos }
789d83a80eeSchristos }
790d83a80eeSchristos /* walk through all relptr structs and put on the right list. */
791d83a80eeSchristos at = alloc->udb->glob_data->hsize;
792d83a80eeSchristos while(at < alloc->disk->nextgrow) {
793d83a80eeSchristos udb_chunk_d* atp = UDB_CHUNK(at);
794d83a80eeSchristos int exp = (int)atp->exp;
795d83a80eeSchristos udb_chunk_type tp = (udb_chunk_type)atp->type;
796d83a80eeSchristos uint64_t sz = ((exp == UDB_EXP_XL)?UDB_XL_CHUNK(at)->size:
797d83a80eeSchristos (uint64_t)1<<exp);
798d83a80eeSchristos if(exp == UDB_EXP_XL) {
799d83a80eeSchristos assert(at != rb_old); /* should have been freed */
800d83a80eeSchristos regen_its_ptrs(base, udb, atp,
801f3d63a56Schristos ((char*)atp)+sizeof(udb_xl_chunk_d),
802d83a80eeSchristos sz-sizeof(udb_xl_chunk_d) - sizeof(uint64_t)*2,
803d83a80eeSchristos rb_old, rb_new);
804d83a80eeSchristos at += sz;
805d83a80eeSchristos } else if(tp == udb_chunk_type_free) {
806d83a80eeSchristos at += sz;
807d83a80eeSchristos } else { /* data chunk */
808d83a80eeSchristos assert(at != rb_old); /* should have been freed */
809d83a80eeSchristos regen_its_ptrs(base, udb, atp,
810f3d63a56Schristos ((char*)atp)+sizeof(udb_chunk_d),
811d83a80eeSchristos sz-sizeof(udb_chunk_d)-1, rb_old, rb_new);
812d83a80eeSchristos at += sz;
813d83a80eeSchristos }
814d83a80eeSchristos }
815d83a80eeSchristos }
816d83a80eeSchristos
817d83a80eeSchristos
818d83a80eeSchristos /** mark free elements from ex XL chunk space and later fixups pick that up */
819d83a80eeSchristos static void
820d83a80eeSchristos rb_mark_free_segs(void* base, udb_void s, uint64_t m)
821d83a80eeSchristos {
822d83a80eeSchristos udb_void q = s + m - UDB_ALLOC_CHUNK_SIZE;
823d83a80eeSchristos /* because of header and alignment we know s >= UDB_ALLOC_CHUNK_SIZE*/
824d83a80eeSchristos assert(s >= UDB_ALLOC_CHUNK_SIZE);
825d83a80eeSchristos while(q >= s) {
826d83a80eeSchristos UDB_CHUNK(q)->exp = UDB_ALLOC_CHUNKS_MAX;
827d83a80eeSchristos UDB_CHUNK(q)->type = udb_chunk_type_free;
828d83a80eeSchristos q -= UDB_ALLOC_CHUNK_SIZE;
829d83a80eeSchristos }
830d83a80eeSchristos }
831d83a80eeSchristos
832d83a80eeSchristos
833d83a80eeSchristos /** fsck rollback or rollforward XL move results */
834d83a80eeSchristos static int
835d83a80eeSchristos fsck_rb_xl(void* base, udb_base* udb, udb_void rb_old, udb_void rb_new,
836d83a80eeSchristos uint64_t rb_size, uint64_t rb_seg)
837d83a80eeSchristos {
838d83a80eeSchristos
839d83a80eeSchristos if(rb_old <= rb_new)
840d83a80eeSchristos return 0; /* XL move one way */
841d83a80eeSchristos if( (rb_size&(UDB_ALLOC_CHUNK_SIZE-1)) != 0)
842d83a80eeSchristos return 0; /* not aligned */
843d83a80eeSchristos if( (rb_old&(UDB_ALLOC_CHUNK_SIZE-1)) != 0)
844d83a80eeSchristos return 0; /* not aligned */
845d83a80eeSchristos if( (rb_new&(UDB_ALLOC_CHUNK_SIZE-1)) != 0)
846d83a80eeSchristos return 0; /* not aligned */
847d83a80eeSchristos if(rb_new + rb_size <= rb_old) {
848d83a80eeSchristos /* not overlapping: resume copy */
849d83a80eeSchristos memcpy(UDB_CHUNK(rb_new), UDB_CHUNK(rb_old), rb_size);
850d83a80eeSchristos /* and free up old piece(s) */
851d83a80eeSchristos rb_mark_free_segs(base, rb_old, rb_size);
852d83a80eeSchristos } else {
853d83a80eeSchristos /* overlapping, see what segment we stopped at
854d83a80eeSchristos * and continue there. */
855d83a80eeSchristos move_xl_segment(base, udb, rb_old, rb_new, rb_size, rb_seg);
856d83a80eeSchristos /* free up old piece(s); from the end of the moved segment,
857d83a80eeSchristos * until the end of the old segment */
858d83a80eeSchristos rb_mark_free_segs(base, rb_new+rb_size, (rb_old+rb_size)-
859d83a80eeSchristos (rb_new+rb_size));
860d83a80eeSchristos }
861d83a80eeSchristos /* do not call fix_ptrs, regenptrs does the job */
862d83a80eeSchristos return 1;
863d83a80eeSchristos }
864d83a80eeSchristos
865d83a80eeSchristos /** fsck rollback or rollforward move results */
866d83a80eeSchristos static int
867d83a80eeSchristos fsck_rb(void* base, udb_void rb_old, udb_void rb_new, uint64_t rb_size,
868d83a80eeSchristos udb_void* make_free)
869d83a80eeSchristos {
870d83a80eeSchristos if( (rb_size&(rb_size-1)) != 0)
871d83a80eeSchristos return 0; /* not powerof2 */
872d83a80eeSchristos if( (rb_old&(rb_size-1)) != 0)
873d83a80eeSchristos return 0; /* not aligned */
874d83a80eeSchristos if( (rb_new&(rb_size-1)) != 0)
875d83a80eeSchristos return 0; /* not aligned */
876d83a80eeSchristos /* resume copy */
877d83a80eeSchristos memcpy(UDB_CHUNK(rb_new), UDB_CHUNK(rb_old), rb_size);
878d83a80eeSchristos /* do not call fix_ptrs, regenptrs does the job */
879d83a80eeSchristos /* make sure udb_old is freed */
880d83a80eeSchristos *make_free = rb_old;
881d83a80eeSchristos return 1;
882d83a80eeSchristos }
883d83a80eeSchristos
884d83a80eeSchristos /** fsck the file and salvage, false if failed and file is useless */
885d83a80eeSchristos static int
886d83a80eeSchristos fsck_file(udb_base* udb, udb_alloc* alloc, int moved)
887d83a80eeSchristos {
888d83a80eeSchristos void* base = udb->base;
889d83a80eeSchristos udb_alloc_d regen;
890d83a80eeSchristos udb_void at = udb->glob_data->hsize;
891d83a80eeSchristos udb_void rb_old = udb->glob_data->rb_old;
892d83a80eeSchristos udb_void rb_new = udb->glob_data->rb_new;
893d83a80eeSchristos udb_void rb_seg = udb->glob_data->rb_seg;
894d83a80eeSchristos udb_void make_free = 0;
895d83a80eeSchristos uint64_t rb_size = udb->glob_data->rb_size;
896d83a80eeSchristos log_msg(LOG_WARNING, "udb-fsck %s: salvaging", udb->fname);
897d83a80eeSchristos /* walk through the file, use the exp values to see what can be
898d83a80eeSchristos * salvaged */
899d83a80eeSchristos if(moved && rb_old && rb_new && rb_size) {
900d83a80eeSchristos if(rb_old+rb_size <= alloc->disk->nextgrow
901d83a80eeSchristos && rb_new+rb_size <= alloc->disk->nextgrow) {
902d83a80eeSchristos /* we can use the move information to fix up the
903d83a80eeSchristos * duplicate element (or partially moved element) */
904d83a80eeSchristos if(rb_size > 1024*1024) {
905d83a80eeSchristos /* XL chunk */
906d83a80eeSchristos if(!fsck_rb_xl(base, udb, rb_old, rb_new,
907d83a80eeSchristos rb_size, rb_seg))
908d83a80eeSchristos return 0;
909d83a80eeSchristos } else {
910d83a80eeSchristos if(!fsck_rb(base, rb_old, rb_new, rb_size,
911d83a80eeSchristos &make_free))
912d83a80eeSchristos return 0;
913d83a80eeSchristos }
914d83a80eeSchristos }
915d83a80eeSchristos }
916d83a80eeSchristos
917d83a80eeSchristos /* rebuild freelists */
918d83a80eeSchristos /* recalculate stats in alloc (except 'stat_data') */
919d83a80eeSchristos /* possibly new end 'nextgrow' value */
920d83a80eeSchristos memset(®en, 0, sizeof(regen));
921d83a80eeSchristos regen.nextgrow = alloc->disk->nextgrow;
922d83a80eeSchristos while(at < regen.nextgrow) {
923d83a80eeSchristos /* figure out this chunk */
924d83a80eeSchristos int exp = (int)UDB_CHUNK(at)->exp;
925d83a80eeSchristos udb_chunk_type tp = (udb_chunk_type)UDB_CHUNK(at)->type;
926d83a80eeSchristos /* consistency check possible here with end-exp */
927d83a80eeSchristos if(tp == udb_chunk_type_free || at == make_free) {
928d83a80eeSchristos at = regen_free(base, at, exp, ®en);
929d83a80eeSchristos if(!at) return 0;
930d83a80eeSchristos } else if(exp == UDB_EXP_XL) {
931d83a80eeSchristos /* allocated data of XL size */
932d83a80eeSchristos at = regen_xl(base, at, ®en);
933d83a80eeSchristos if(!at) return 0;
934d83a80eeSchristos } else if(exp >= UDB_ALLOC_CHUNK_MINEXP
935d83a80eeSchristos && exp <= UDB_ALLOC_CHUNKS_MAX) {
936d83a80eeSchristos /* allocated data */
937d83a80eeSchristos at = regen_data(base, at, exp, ®en);
938d83a80eeSchristos if(!at) return 0;
939d83a80eeSchristos } else {
940d83a80eeSchristos /* garbage; this must be EOF then */
941d83a80eeSchristos regen.nextgrow = at;
942d83a80eeSchristos break;
943d83a80eeSchristos }
944d83a80eeSchristos }
945d83a80eeSchristos *alloc->disk = regen;
946d83a80eeSchristos
947d83a80eeSchristos /* rebuild relptr lists */
948d83a80eeSchristos regen_ptrlist(base, udb, alloc, rb_old, rb_new);
949d83a80eeSchristos
950d83a80eeSchristos log_msg(LOG_WARNING, "udb-fsck %s: salvaged successfully (sync)",
951d83a80eeSchristos udb->fname);
952d83a80eeSchristos udb->glob_data->rb_old = 0;
953d83a80eeSchristos udb->glob_data->rb_new = 0;
954d83a80eeSchristos udb->glob_data->rb_size = 0;
955d83a80eeSchristos udb->glob_data->dirty_alloc = udb_dirty_clean;
956d83a80eeSchristos udb_base_sync(udb, 1);
957d83a80eeSchristos return 1;
958d83a80eeSchristos }
959d83a80eeSchristos
960d83a80eeSchristos
961d83a80eeSchristos udb_alloc* udb_alloc_create(udb_base* udb, udb_alloc_d* disk)
962d83a80eeSchristos {
963d83a80eeSchristos udb_alloc* alloc = (udb_alloc*)xalloc_zero(sizeof(*alloc));
964d83a80eeSchristos if(!alloc)
965d83a80eeSchristos return NULL;
966d83a80eeSchristos alloc->udb = udb;
967d83a80eeSchristos alloc->disk = disk;
968d83a80eeSchristos /* see if committed but uncompleted actions need to be done */
969d83a80eeSchristos /* preserves the alloc state */
970d83a80eeSchristos if(udb->glob_data->dirty_alloc != udb_dirty_clean) {
971d83a80eeSchristos if(udb->glob_data->dirty_alloc == udb_dirty_fsize) {
972d83a80eeSchristos if(fsck_fsize(udb, alloc))
973d83a80eeSchristos return alloc;
974d83a80eeSchristos } else if(udb->glob_data->dirty_alloc == udb_dirty_fl) {
975d83a80eeSchristos if(fsck_file(udb, alloc, 0))
976d83a80eeSchristos return alloc;
977d83a80eeSchristos } else if(udb->glob_data->dirty_alloc == udb_dirty_compact) {
978d83a80eeSchristos if(fsck_file(udb, alloc, 1))
979d83a80eeSchristos return alloc;
980d83a80eeSchristos }
981d83a80eeSchristos log_msg(LOG_ERR, "error: file allocation dirty (%d)",
982d83a80eeSchristos (int)udb->glob_data->dirty_alloc);
983d83a80eeSchristos free(alloc);
984d83a80eeSchristos return NULL;
985d83a80eeSchristos }
986d83a80eeSchristos return alloc;
987d83a80eeSchristos }
988d83a80eeSchristos
989d83a80eeSchristos void udb_alloc_delete(udb_alloc* alloc)
990d83a80eeSchristos {
991d83a80eeSchristos if(!alloc) return;
992d83a80eeSchristos free(alloc);
993d83a80eeSchristos }
994d83a80eeSchristos
995d83a80eeSchristos /** unlink this element from its freelist */
996d83a80eeSchristos static void
997d83a80eeSchristos udb_alloc_unlink_fl(void* base, udb_alloc* alloc, udb_void chunk, int exp)
998d83a80eeSchristos {
999d83a80eeSchristos udb_free_chunk_d* fp = UDB_FREE_CHUNK(chunk);
1000d83a80eeSchristos assert(chunk);
1001d83a80eeSchristos /* chunk is a free chunk */
1002d83a80eeSchristos assert(fp->exp == (uint8_t)exp);
1003d83a80eeSchristos assert(fp->type == udb_chunk_type_free);
1004d83a80eeSchristos assert(chunk_get_last(base, chunk, exp) == (uint8_t)exp);
1005d83a80eeSchristos /* and thus freelist not empty */
1006d83a80eeSchristos assert(alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP]);
1007d83a80eeSchristos /* unlink */
1008d83a80eeSchristos if(fp->prev)
1009d83a80eeSchristos UDB_FREE_CHUNK(fp->prev)->next = fp->next;
1010d83a80eeSchristos else alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = fp->next;
1011d83a80eeSchristos if(fp->next)
1012d83a80eeSchristos UDB_FREE_CHUNK(fp->next)->prev = fp->prev;
1013d83a80eeSchristos }
1014d83a80eeSchristos
1015d83a80eeSchristos /** pop first element off freelist, list may not be empty */
1016d83a80eeSchristos static udb_void
1017d83a80eeSchristos udb_alloc_pop_fl(void* base, udb_alloc* alloc, int exp)
1018d83a80eeSchristos {
1019d83a80eeSchristos udb_void f = alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP];
1020d83a80eeSchristos udb_free_chunk_d* fp = UDB_FREE_CHUNK(f);
1021d83a80eeSchristos assert(f);
1022d83a80eeSchristos assert(fp->exp == (uint8_t)exp);
1023d83a80eeSchristos assert(fp->type == udb_chunk_type_free);
1024d83a80eeSchristos assert(chunk_get_last(base, f, exp) == (uint8_t)exp);
1025d83a80eeSchristos alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = fp->next;
1026d83a80eeSchristos if(fp->next) {
1027d83a80eeSchristos UDB_FREE_CHUNK(fp->next)->prev = 0;
1028d83a80eeSchristos }
1029d83a80eeSchristos return f;
1030d83a80eeSchristos }
1031d83a80eeSchristos
1032d83a80eeSchristos /** push new element onto freelist */
1033d83a80eeSchristos static void
1034d83a80eeSchristos udb_alloc_push_fl(void* base, udb_alloc* alloc, udb_void f, int exp)
1035d83a80eeSchristos {
1036d83a80eeSchristos udb_free_chunk_d* fp = UDB_FREE_CHUNK(f);
1037d83a80eeSchristos assert(f);
1038d83a80eeSchristos fp->exp = (uint8_t)exp;
1039d83a80eeSchristos fp->type = udb_chunk_type_free;
1040d83a80eeSchristos fp->flags = 0;
1041d83a80eeSchristos fp->prev = 0;
1042d83a80eeSchristos fp->next = alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP];
1043d83a80eeSchristos if(fp->next)
1044d83a80eeSchristos UDB_FREE_CHUNK(fp->next)->prev = f;
1045d83a80eeSchristos chunk_set_last(base, f, exp, (uint8_t)exp);
1046d83a80eeSchristos alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = f;
1047d83a80eeSchristos }
1048d83a80eeSchristos
1049d83a80eeSchristos /** push new element onto freelist - do not initialize the elt */
1050d83a80eeSchristos static void
1051d83a80eeSchristos udb_alloc_push_fl_noinit(void* base, udb_alloc* alloc, udb_void f, int exp)
1052d83a80eeSchristos {
1053d83a80eeSchristos udb_free_chunk_d* fp = UDB_FREE_CHUNK(f);
1054d83a80eeSchristos assert(f);
1055d83a80eeSchristos assert(fp->exp == (uint8_t)exp);
1056d83a80eeSchristos assert(fp->type == udb_chunk_type_free);
1057d83a80eeSchristos assert(chunk_get_last(base, f, exp) == (uint8_t)exp);
1058d83a80eeSchristos fp->prev = 0;
1059d83a80eeSchristos fp->next = alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP];
1060d83a80eeSchristos if(fp->next)
1061d83a80eeSchristos UDB_FREE_CHUNK(fp->next)->prev = f;
1062d83a80eeSchristos alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = f;
1063d83a80eeSchristos }
1064d83a80eeSchristos
1065d83a80eeSchristos /** add free chunks at end until specified alignment occurs */
1066d83a80eeSchristos static void
1067d83a80eeSchristos grow_align(void* base, udb_alloc* alloc, uint64_t esz)
1068d83a80eeSchristos {
1069d83a80eeSchristos while( (alloc->disk->nextgrow & (esz-1)) != 0) {
1070d83a80eeSchristos /* the nextgrow is not a whole multiple of esz. */
1071d83a80eeSchristos /* grow a free chunk of max allowed size */
1072d83a80eeSchristos int fexp = udb_exp_offset(alloc->disk->nextgrow);
1073d83a80eeSchristos uint64_t fsz = (uint64_t)1<<fexp;
1074d83a80eeSchristos udb_void f = alloc->disk->nextgrow;
1075d83a80eeSchristos udb_void fn = alloc->disk->nextgrow+fsz;
1076d83a80eeSchristos assert(fn <= alloc->udb->base_size);
1077d83a80eeSchristos alloc->disk->stat_free += fsz;
1078d83a80eeSchristos udb_alloc_push_fl(base, alloc, f, fexp);
1079d83a80eeSchristos /* now increase nextgrow to commit that free chunk */
1080d83a80eeSchristos alloc->disk->nextgrow = fn;
1081d83a80eeSchristos }
1082d83a80eeSchristos }
1083d83a80eeSchristos
1084d83a80eeSchristos /** append chunks at end of memory space to get size exp, return dataptr */
1085d83a80eeSchristos static udb_void
1086d83a80eeSchristos grow_chunks(void* base, udb_alloc* alloc, size_t sz, int exp)
1087d83a80eeSchristos {
1088d83a80eeSchristos uint64_t esz = (uint64_t)1<<exp;
1089d83a80eeSchristos udb_void ret;
1090d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1091d83a80eeSchristos grow_align(base, alloc, esz);
1092d83a80eeSchristos /* free chunks are grown, grow the one we want to use */
1093d83a80eeSchristos ret = alloc->disk->nextgrow;
1094d83a80eeSchristos /* take a new alloced chunk into use */
1095d83a80eeSchristos UDB_CHUNK(ret)->exp = (uint8_t)exp;
1096d83a80eeSchristos UDB_CHUNK(ret)->flags = 0;
1097d83a80eeSchristos UDB_CHUNK(ret)->ptrlist = 0;
1098d83a80eeSchristos UDB_CHUNK(ret)->type = udb_chunk_type_data;
1099d83a80eeSchristos /* store last octet */
1100d83a80eeSchristos chunk_set_last(base, ret, exp, (uint8_t)exp);
1101d83a80eeSchristos /* update stats */
1102d83a80eeSchristos alloc->disk->stat_alloc += esz;
1103d83a80eeSchristos alloc->disk->stat_data += sz;
1104d83a80eeSchristos /* now increase nextgrow to commit this newly allocated chunk */
1105d83a80eeSchristos alloc->disk->nextgrow += esz;
1106d83a80eeSchristos assert(alloc->disk->nextgrow <= alloc->udb->base_size);
1107d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1108d83a80eeSchristos return ret + sizeof(udb_chunk_d); /* ptr to data */
1109d83a80eeSchristos }
1110d83a80eeSchristos
1111d83a80eeSchristos /** calculate how much space is necessary to grow for this exp */
1112d83a80eeSchristos static uint64_t
1113d83a80eeSchristos grow_end_calc(udb_alloc* alloc, int exp)
1114d83a80eeSchristos {
1115d83a80eeSchristos uint64_t sz = (uint64_t)1<<exp;
1116d83a80eeSchristos uint64_t ng = alloc->disk->nextgrow;
1117d83a80eeSchristos uint64_t res;
1118d83a80eeSchristos /* if nextgrow is 2**expness, no extra growth needed, only size */
1119d83a80eeSchristos if( (ng & (sz-1)) == 0) {
1120d83a80eeSchristos /* sz-1 is like 0xfff, and checks if ng is whole 2**exp */
1121d83a80eeSchristos return ng+sz; /* must grow exactly 2**exp */
1122d83a80eeSchristos }
1123d83a80eeSchristos /* grow until 2**expness and then we need 2**exp as well */
1124d83a80eeSchristos /* so, round ng down to whole sz (basically ng-ng%sz, or ng/sz*sz)
1125d83a80eeSchristos * and then add the sz twice (go up to whole sz, and to allocate) */
1126d83a80eeSchristos res = (ng & ~(sz-1)) + 2*sz;
1127d83a80eeSchristos return res;
1128d83a80eeSchristos }
1129d83a80eeSchristos
1130d83a80eeSchristos /** see if we need to grow more than specified to enable sustained growth */
1131d83a80eeSchristos static uint64_t
1132d83a80eeSchristos grow_extra_check(udb_alloc* alloc, uint64_t ge)
1133d83a80eeSchristos {
1134d83a80eeSchristos const uint64_t mb = 1024*1024;
1135d83a80eeSchristos uint64_t bsz = alloc->udb->base_size;
1136d83a80eeSchristos if(bsz <= mb) {
1137d83a80eeSchristos /* below 1 Mb, double sizes for exponential growth */
1138d83a80eeSchristos /* takes about 15 times to grow to 1Mb */
1139d83a80eeSchristos if(ge < bsz*2)
1140d83a80eeSchristos return bsz*2;
1141d83a80eeSchristos } else {
1142d83a80eeSchristos uint64_t gnow = ge - bsz;
1143d83a80eeSchristos /* above 1Mb, grow at least 1 Mb, or 12.5% of current size,
1144d83a80eeSchristos * in whole megabytes rounded up. */
1145d83a80eeSchristos uint64_t want = ((bsz / 8) & ~(mb-1)) + mb;
1146d83a80eeSchristos if(gnow < want)
1147d83a80eeSchristos return bsz + want;
1148d83a80eeSchristos }
1149d83a80eeSchristos return ge;
1150d83a80eeSchristos }
1151d83a80eeSchristos
1152*ee758998Schristos /** see if free space is enough to warrant shrink (while file is open) */
1153d83a80eeSchristos static int
1154d83a80eeSchristos enough_free(udb_alloc* alloc)
1155d83a80eeSchristos {
1156d83a80eeSchristos if(alloc->udb->base_size <= 2*1024*1024) {
1157d83a80eeSchristos /* below 1 Mb, grown by double size, (so up to 2 mb),
1158d83a80eeSchristos * do not shrink unless we can 1/3 in size */
1159d83a80eeSchristos if(((size_t)alloc->disk->nextgrow)*3 <= alloc->udb->base_size)
1160d83a80eeSchristos return 1;
1161d83a80eeSchristos } else {
1162d83a80eeSchristos /* grown 12.5%, shrink 25% if possible, at least one mb */
1163d83a80eeSchristos /* between 1mb and 4mb size, it shrinks by 1mb if possible */
1164d83a80eeSchristos uint64_t space = alloc->udb->base_size - alloc->disk->nextgrow;
1165d83a80eeSchristos if(space >= 1024*1024 && (space*4 >= alloc->udb->base_size
1166d83a80eeSchristos || alloc->udb->base_size < 4*1024*1024))
1167d83a80eeSchristos return 1;
1168d83a80eeSchristos }
1169d83a80eeSchristos return 0;
1170d83a80eeSchristos }
1171d83a80eeSchristos
1172d83a80eeSchristos /** grow space for a chunk of 2**exp and return dataptr */
1173d83a80eeSchristos static udb_void
1174d83a80eeSchristos udb_alloc_grow_space(void* base, udb_alloc* alloc, size_t sz, int exp)
1175d83a80eeSchristos {
1176d83a80eeSchristos /* commit the grow action
1177d83a80eeSchristos * - the file grow only changes filesize, but not the nextgrow.
1178d83a80eeSchristos * - taking space after nextgrow into use (as free space),
1179d83a80eeSchristos * is like free-ing a chunk (one at a time).
1180d83a80eeSchristos * - and the last chunk taken into use is like alloc.
1181d83a80eeSchristos */
1182d83a80eeSchristos /* predict how much free space is needed for this */
1183d83a80eeSchristos uint64_t grow_end = grow_end_calc(alloc, exp);
1184d83a80eeSchristos assert(alloc->udb->base_size >= alloc->disk->nextgrow);
1185d83a80eeSchristos if(grow_end <= alloc->udb->base_size) {
1186d83a80eeSchristos /* we can do this with the available space */
1187d83a80eeSchristos return grow_chunks(base, alloc, sz, exp);
1188d83a80eeSchristos }
1189d83a80eeSchristos /* we have to grow the file, re-mmap */
1190d83a80eeSchristos /* see if we need to grow a little more, to avoid endless grow
1191d83a80eeSchristos * efforts on adding data */
1192d83a80eeSchristos grow_end = grow_extra_check(alloc, grow_end);
1193d83a80eeSchristos if(!(base=udb_base_grow_and_remap(alloc->udb, grow_end))) {
1194d83a80eeSchristos return 0; /* mmap or write failed (disk or mem full) */
1195d83a80eeSchristos }
1196d83a80eeSchristos /* we have enough space now */
1197d83a80eeSchristos assert(grow_end <= alloc->udb->base_size);
1198d83a80eeSchristos assert(alloc->udb->glob_data->fsize == alloc->udb->base_size);
1199d83a80eeSchristos return grow_chunks(base, alloc, sz, exp);
1200d83a80eeSchristos }
1201d83a80eeSchristos
1202d83a80eeSchristos /** take XL allocation into use at end of file, return dataptr */
1203d83a80eeSchristos static udb_void
1204d83a80eeSchristos grow_xl(void* base, udb_alloc* alloc, uint64_t xlsz, uint64_t sz)
1205d83a80eeSchristos {
1206d83a80eeSchristos udb_void ret;
1207d83a80eeSchristos udb_xl_chunk_d* p;
1208d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1209d83a80eeSchristos
1210d83a80eeSchristos /* align growth to whole mbs */
1211d83a80eeSchristos grow_align(base, alloc, UDB_ALLOC_CHUNK_SIZE);
1212d83a80eeSchristos
1213d83a80eeSchristos /* grow XL segment */
1214d83a80eeSchristos ret = alloc->disk->nextgrow;
1215d83a80eeSchristos p = UDB_XL_CHUNK(ret);
1216d83a80eeSchristos p->exp = UDB_EXP_XL;
1217d83a80eeSchristos p->size = xlsz;
1218d83a80eeSchristos p->flags = 0;
1219d83a80eeSchristos p->ptrlist = 0;
1220d83a80eeSchristos p->type = udb_chunk_type_data;
1221d83a80eeSchristos
1222d83a80eeSchristos /* also put size and marker at end for compaction */
1223d83a80eeSchristos *((uint64_t*)(UDB_REL(base, ret+xlsz-sizeof(uint64_t)*2))) = xlsz;
1224d83a80eeSchristos *((uint8_t*)(UDB_REL(base, ret+xlsz-1))) = UDB_EXP_XL;
1225d83a80eeSchristos
1226d83a80eeSchristos /* stats */
1227d83a80eeSchristos alloc->disk->stat_data += sz;
1228d83a80eeSchristos alloc->disk->stat_alloc += xlsz;
1229d83a80eeSchristos /* now increase the nextgrow to commit this xl chunk */
1230d83a80eeSchristos alloc->disk->nextgrow += xlsz;
1231d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1232d83a80eeSchristos return ret + sizeof(udb_xl_chunk_d); /* data ptr */
1233d83a80eeSchristos }
1234d83a80eeSchristos
1235d83a80eeSchristos /** make space for XL allocation */
1236d83a80eeSchristos static udb_void
1237d83a80eeSchristos udb_alloc_xl_space(void* base, udb_alloc* alloc, size_t sz)
1238d83a80eeSchristos {
1239d83a80eeSchristos /* allocate whole mbs of space, at end of space */
1240d83a80eeSchristos uint64_t asz = sz + sizeof(udb_xl_chunk_d) + sizeof(uint64_t)*2;
1241d83a80eeSchristos uint64_t need=(asz+UDB_ALLOC_CHUNK_SIZE-1)&(~(UDB_ALLOC_CHUNK_SIZE-1));
1242d83a80eeSchristos uint64_t grow_end = grow_end_calc(alloc, UDB_ALLOC_CHUNKS_MAX) + need;
1243d83a80eeSchristos assert(need >= asz);
1244d83a80eeSchristos if(grow_end <= alloc->udb->base_size) {
1245d83a80eeSchristos /* can do this in available space */
1246d83a80eeSchristos return grow_xl(base, alloc, need, sz);
1247d83a80eeSchristos }
1248d83a80eeSchristos /* have to grow file and re-mmap */
1249d83a80eeSchristos grow_end = grow_extra_check(alloc, grow_end);
1250d83a80eeSchristos if(!(base=udb_base_grow_and_remap(alloc->udb, grow_end))) {
1251d83a80eeSchristos return 0; /* mmap or write failed (disk or mem full) */
1252d83a80eeSchristos }
1253d83a80eeSchristos /* we have enough space now */
1254d83a80eeSchristos assert(grow_end <= alloc->udb->base_size);
1255d83a80eeSchristos assert(alloc->udb->glob_data->fsize == alloc->udb->base_size);
1256d83a80eeSchristos return grow_xl(base, alloc, need, sz);
1257d83a80eeSchristos }
1258d83a80eeSchristos
1259d83a80eeSchristos /** divide big(2**e2) into pieces so 2**exp fits */
1260d83a80eeSchristos static udb_void
1261d83a80eeSchristos udb_alloc_subdivide(void* base, udb_alloc* alloc, udb_void big, int e2,
1262d83a80eeSchristos int exp)
1263d83a80eeSchristos {
1264d83a80eeSchristos int e = e2;
1265d83a80eeSchristos uint64_t sz = (uint64_t)1<<e2;
1266d83a80eeSchristos assert(big && e2 > exp);
1267d83a80eeSchristos /* so the returned piece to use is the first piece,
1268d83a80eeSchristos * offload the later half until it fits */
1269d83a80eeSchristos do {
1270d83a80eeSchristos sz >>= 1; /* divide size of big by two */
1271d83a80eeSchristos e--; /* that means its exp is one smaller */
1272d83a80eeSchristos udb_alloc_push_fl(base, alloc, big+sz, e);
1273d83a80eeSchristos } while(e != exp);
1274d83a80eeSchristos /* exit loop when last pushed is same size as what we want */
1275d83a80eeSchristos return big;
1276d83a80eeSchristos }
1277d83a80eeSchristos
1278d83a80eeSchristos /** returns the exponent size of the chunk needed for data sz */
1279d83a80eeSchristos static int
1280d83a80eeSchristos udb_alloc_exp_needed(size_t sz)
1281d83a80eeSchristos {
1282d83a80eeSchristos uint64_t asz = sz + sizeof(udb_chunk_d) + 1;
1283*ee758998Schristos int exp;
1284d83a80eeSchristos if(asz > UDB_ALLOC_CHUNK_SIZE) {
1285d83a80eeSchristos return UDB_EXP_XL;
1286d83a80eeSchristos } else if(asz <= UDB_ALLOC_CHUNK_MINSIZE) {
1287d83a80eeSchristos return UDB_ALLOC_CHUNK_MINEXP;
1288d83a80eeSchristos }
1289*ee758998Schristos exp = udb_exp_size(asz);
1290*ee758998Schristos assert(exp <= UDB_ALLOC_CHUNKS_MAX);
1291*ee758998Schristos return exp;
1292d83a80eeSchristos }
1293d83a80eeSchristos
1294d83a80eeSchristos udb_void udb_alloc_space(udb_alloc* alloc, size_t sz)
1295d83a80eeSchristos {
1296d83a80eeSchristos void* base = alloc->udb->base;
1297d83a80eeSchristos /* calculate actual allocation size */
1298d83a80eeSchristos int e2, exp = udb_alloc_exp_needed(sz);
1299d83a80eeSchristos if(exp == UDB_EXP_XL)
1300d83a80eeSchristos return udb_alloc_xl_space(base, alloc, sz);
1301d83a80eeSchristos /* see if there is a free chunk of that size exactly */
1302d83a80eeSchristos if(alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP]) {
1303d83a80eeSchristos /* snip from freelist, udb_chunk_d */
1304d83a80eeSchristos udb_void ret;
1305d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1306d83a80eeSchristos ret = udb_alloc_pop_fl(base, alloc, exp);
1307d83a80eeSchristos /* use it - size octets already OK */
1308d83a80eeSchristos UDB_CHUNK(ret)->flags = 0;
1309d83a80eeSchristos UDB_CHUNK(ret)->ptrlist = 0;
1310d83a80eeSchristos UDB_CHUNK(ret)->type = udb_chunk_type_data;
1311d83a80eeSchristos /* update stats */
1312d83a80eeSchristos alloc->disk->stat_data += sz;
1313d83a80eeSchristos alloc->disk->stat_alloc += (1<<exp);
1314d83a80eeSchristos assert(alloc->disk->stat_free >= (1u<<exp));
1315d83a80eeSchristos alloc->disk->stat_free -= (1<<exp);
1316d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1317d83a80eeSchristos return ret + sizeof(udb_chunk_d); /* ptr to data */
1318d83a80eeSchristos }
1319d83a80eeSchristos /* see if we can subdivide a larger chunk */
1320d83a80eeSchristos for(e2 = exp+1; e2 <= UDB_ALLOC_CHUNKS_MAX; e2++)
1321d83a80eeSchristos if(alloc->disk->free[e2-UDB_ALLOC_CHUNK_MINEXP]) {
1322d83a80eeSchristos udb_void big, ret; /* udb_chunk_d */
1323d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1324d83a80eeSchristos big = udb_alloc_pop_fl(base, alloc, e2);
1325d83a80eeSchristos /* push other parts onto freelists (needs inited) */
1326d83a80eeSchristos ret = udb_alloc_subdivide(base, alloc, big, e2, exp);
1327d83a80eeSchristos /* use final part (needs inited) */
1328d83a80eeSchristos UDB_CHUNK(ret)->exp = (uint8_t)exp;
1329d83a80eeSchristos /* if stop here; the new exp makes smaller free chunk*/
1330d83a80eeSchristos UDB_CHUNK(ret)->flags = 0;
1331d83a80eeSchristos UDB_CHUNK(ret)->ptrlist = 0;
1332d83a80eeSchristos /* set type to commit data chunk */
1333d83a80eeSchristos UDB_CHUNK(ret)->type = udb_chunk_type_data;
1334d83a80eeSchristos /* store last octet */
1335d83a80eeSchristos chunk_set_last(base, ret, exp, (uint8_t)exp);
1336d83a80eeSchristos /* update stats */
1337d83a80eeSchristos alloc->disk->stat_data += sz;
1338d83a80eeSchristos alloc->disk->stat_alloc += (1<<exp);
1339d83a80eeSchristos assert(alloc->disk->stat_free >= (1u<<exp));
1340d83a80eeSchristos alloc->disk->stat_free -= (1<<exp);
1341d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1342d83a80eeSchristos return ret + sizeof(udb_chunk_d); /* ptr to data */
1343d83a80eeSchristos }
1344d83a80eeSchristos /* we need to grow an extra chunk */
1345d83a80eeSchristos return udb_alloc_grow_space(base, alloc, sz, exp);
1346d83a80eeSchristos }
1347d83a80eeSchristos
1348d83a80eeSchristos /** see if there is free space to allocate a chunk into */
1349d83a80eeSchristos static int
1350d83a80eeSchristos have_free_for(udb_alloc* alloc, int exp)
1351d83a80eeSchristos {
1352d83a80eeSchristos int e2;
1353d83a80eeSchristos if(alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP])
1354d83a80eeSchristos return exp;
1355d83a80eeSchristos for(e2 = exp+1; e2 <= UDB_ALLOC_CHUNKS_MAX; e2++)
1356d83a80eeSchristos if(alloc->disk->free[e2-UDB_ALLOC_CHUNK_MINEXP]) {
1357d83a80eeSchristos return e2;
1358d83a80eeSchristos }
1359d83a80eeSchristos return 0;
1360d83a80eeSchristos }
1361d83a80eeSchristos
1362d83a80eeSchristos /** fix relptr prev and next for moved relptr structures */
1363d83a80eeSchristos static void
1364d83a80eeSchristos chunk_fix_ptr_each(void* base, udb_rel_ptr* rp, void* arg)
1365d83a80eeSchristos {
1366d83a80eeSchristos udb_void* data = (udb_void*)arg;
1367d83a80eeSchristos udb_void r;
1368d83a80eeSchristos if(!rp->data)
1369d83a80eeSchristos return;
1370d83a80eeSchristos r = UDB_SYSTOREL(base, rp);
1371d83a80eeSchristos if(rp->next)
1372d83a80eeSchristos UDB_REL_PTR(rp->next)->prev = r;
1373d83a80eeSchristos if(rp->prev)
1374d83a80eeSchristos UDB_REL_PTR(rp->prev)->next = r;
1375d83a80eeSchristos else {
1376d83a80eeSchristos /* if this is a pointer to its own chunk, fix it up;
1377d83a80eeSchristos * the data ptr gets set by relptr_edit later. */
1378d83a80eeSchristos if(rp->data == data[0])
1379d83a80eeSchristos UDB_CHUNK(data[1])->ptrlist = r;
1380d83a80eeSchristos else UDB_CHUNK(chunk_from_dataptr(rp->data))->ptrlist = r;
1381d83a80eeSchristos }
1382d83a80eeSchristos }
1383d83a80eeSchristos
1384d83a80eeSchristos /** fix pointers from and to a moved chunk */
1385d83a80eeSchristos static void
1386d83a80eeSchristos chunk_fix_ptrs(void* base, udb_base* udb, udb_chunk_d* cp, udb_void data,
1387d83a80eeSchristos uint64_t dsz, udb_void olddata)
1388d83a80eeSchristos {
1389d83a80eeSchristos udb_void d[2];
1390d83a80eeSchristos d[0] = olddata;
1391d83a80eeSchristos d[1] = data;
1392d83a80eeSchristos (*udb->walkfunc)(base, udb->walkarg, cp->type, UDB_REL(base, data),
1393d83a80eeSchristos dsz, &chunk_fix_ptr_each, d);
1394d83a80eeSchristos udb_rel_ptr_edit(base, cp->ptrlist, data);
1395d83a80eeSchristos udb_base_ram_ptr_edit(udb, olddata, data);
1396d83a80eeSchristos }
1397d83a80eeSchristos
1398d83a80eeSchristos /** move an allocated chunk to use a free chunk */
1399d83a80eeSchristos static void
1400d83a80eeSchristos move_chunk(void* base, udb_alloc* alloc, udb_void f, int exp, uint64_t esz,
1401d83a80eeSchristos int e2)
1402d83a80eeSchristos {
1403d83a80eeSchristos udb_void res = udb_alloc_pop_fl(base, alloc, e2);
1404d83a80eeSchristos udb_chunk_d* rp;
1405d83a80eeSchristos udb_chunk_d* fp;
1406d83a80eeSchristos if(exp != e2) {
1407d83a80eeSchristos /* it is bigger, subdivide it */
1408d83a80eeSchristos res = udb_alloc_subdivide(base, alloc, res, e2, exp);
1409d83a80eeSchristos }
1410d83a80eeSchristos assert(res != f);
1411d83a80eeSchristos /* setup rollback information */
1412d83a80eeSchristos alloc->udb->glob_data->rb_old = f;
1413d83a80eeSchristos alloc->udb->glob_data->rb_new = res;
1414d83a80eeSchristos alloc->udb->glob_data->rb_size = esz;
1415d83a80eeSchristos /* take the res, exp into use */
1416d83a80eeSchristos rp = UDB_CHUNK(res);
1417d83a80eeSchristos fp = UDB_CHUNK(f);
1418d83a80eeSchristos /* copy over the data */
1419d83a80eeSchristos memcpy(rp, fp, esz);
1420d83a80eeSchristos /* adjust rel ptrs */
1421d83a80eeSchristos chunk_fix_ptrs(base, alloc->udb, rp, res+sizeof(udb_chunk_d),
1422d83a80eeSchristos esz-sizeof(udb_chunk_d)-1, f+sizeof(udb_chunk_d));
1423d83a80eeSchristos
1424d83a80eeSchristos /* do not freeup the fp; caller does that */
1425d83a80eeSchristos }
1426d83a80eeSchristos
1427d83a80eeSchristos /** unlink several free elements to overwrite with xl chunk */
1428d83a80eeSchristos static void
1429d83a80eeSchristos free_xl_space(void* base, udb_alloc* alloc, udb_void s, uint64_t m)
1430d83a80eeSchristos {
1431d83a80eeSchristos udb_void q = s + m - UDB_ALLOC_CHUNK_SIZE;
1432d83a80eeSchristos /* because of header and alignment we know s >= UDB_ALLOC_CHUNK_SIZE*/
1433d83a80eeSchristos assert(s >= UDB_ALLOC_CHUNK_SIZE);
1434d83a80eeSchristos while(q >= s) {
1435d83a80eeSchristos assert(UDB_CHUNK(q)->exp == UDB_ALLOC_CHUNKS_MAX);
1436d83a80eeSchristos assert(UDB_CHUNK(q)->type == udb_chunk_type_free);
1437d83a80eeSchristos udb_alloc_unlink_fl(base, alloc, q, UDB_ALLOC_CHUNKS_MAX);
1438d83a80eeSchristos q -= UDB_ALLOC_CHUNK_SIZE;
1439d83a80eeSchristos }
1440d83a80eeSchristos }
1441d83a80eeSchristos
1442d83a80eeSchristos /** move an XL chunk, and keep track of segments for rollback */
1443d83a80eeSchristos static void
1444d83a80eeSchristos move_xl_segment(void* base, udb_base* udb, udb_void xl, udb_void n,
1445d83a80eeSchristos uint64_t sz, uint64_t startseg)
1446d83a80eeSchristos {
1447d83a80eeSchristos udb_xl_chunk_d* xlp = UDB_XL_CHUNK(xl);
1448d83a80eeSchristos udb_xl_chunk_d* np = UDB_XL_CHUNK(n);
1449d83a80eeSchristos uint64_t amount = xl - n;
1450d83a80eeSchristos assert(n < xl); /* move to compact */
1451d83a80eeSchristos
1452d83a80eeSchristos /* setup move rollback */
1453d83a80eeSchristos udb->glob_data->rb_old = xl;
1454d83a80eeSchristos udb->glob_data->rb_new = n;
1455d83a80eeSchristos udb->glob_data->rb_size = sz;
1456d83a80eeSchristos
1457d83a80eeSchristos /* is it overlapping? */
1458d83a80eeSchristos if(sz <= amount) {
1459d83a80eeSchristos memcpy(np, xlp, sz);
1460d83a80eeSchristos } else {
1461d83a80eeSchristos /* move and commit per 1M segment to avoid data loss */
1462d83a80eeSchristos uint64_t seg, maxseg = amount/UDB_ALLOC_CHUNK_SIZE;
1463d83a80eeSchristos for(seg = startseg; seg<maxseg; seg++) {
1464d83a80eeSchristos udb->glob_data->rb_seg = seg;
1465d83a80eeSchristos memcpy(np+seg*UDB_ALLOC_CHUNK_SIZE,
1466d83a80eeSchristos xlp+seg*UDB_ALLOC_CHUNK_SIZE,
1467d83a80eeSchristos UDB_ALLOC_CHUNK_SIZE);
1468d83a80eeSchristos }
1469d83a80eeSchristos
1470d83a80eeSchristos }
1471d83a80eeSchristos }
1472d83a80eeSchristos
1473d83a80eeSchristos /** move list of XL chunks to the front by the shift amount */
1474d83a80eeSchristos static void
1475d83a80eeSchristos move_xl_list(void* base, udb_alloc* alloc, udb_void xl_start, uint64_t xl_sz,
1476d83a80eeSchristos uint64_t amount)
1477d83a80eeSchristos {
1478d83a80eeSchristos udb_void xl = xl_start;
1479d83a80eeSchristos assert( (xl_start&(UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* aligned */
1480d83a80eeSchristos assert( (amount&(UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* multiples */
1481d83a80eeSchristos assert( (xl_sz&(UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* multiples */
1482d83a80eeSchristos while(xl < xl_start+xl_sz) {
1483d83a80eeSchristos udb_xl_chunk_d* xlp = UDB_XL_CHUNK(xl);
1484d83a80eeSchristos udb_void n = xl-amount;
1485d83a80eeSchristos uint64_t sz = xlp->size;
1486d83a80eeSchristos assert(xlp->exp == UDB_EXP_XL);
1487d83a80eeSchristos move_xl_segment(base, alloc->udb, xl, n, sz, 0);
1488d83a80eeSchristos chunk_fix_ptrs(base, alloc->udb, UDB_CHUNK(n),
1489d83a80eeSchristos n+sizeof(udb_xl_chunk_d),
1490d83a80eeSchristos sz-sizeof(udb_xl_chunk_d)-sizeof(uint64_t)*2,
1491d83a80eeSchristos xl+sizeof(udb_xl_chunk_d));
1492d83a80eeSchristos }
1493d83a80eeSchristos alloc->disk->stat_free -= amount;
1494d83a80eeSchristos alloc->disk->nextgrow -= amount;
1495d83a80eeSchristos alloc->udb->glob_data->rb_old = 0;
1496d83a80eeSchristos alloc->udb->glob_data->rb_new = 0;
1497d83a80eeSchristos alloc->udb->glob_data->rb_size = 0;
1498d83a80eeSchristos }
1499d83a80eeSchristos
1500d83a80eeSchristos /** see if free chunk can coagulate with another chunk, return other chunk */
1501d83a80eeSchristos static udb_void
1502d83a80eeSchristos coagulate_possible(void* base, udb_alloc* alloc, udb_void f, int exp,
1503d83a80eeSchristos uint64_t esz)
1504d83a80eeSchristos {
1505d83a80eeSchristos udb_void other = f^esz;
1506d83a80eeSchristos if(exp == UDB_ALLOC_CHUNKS_MAX)
1507d83a80eeSchristos return 0; /* no further merges */
1508d83a80eeSchristos if(other >= alloc->udb->base_size)
1509d83a80eeSchristos return 0; /* not allocated */
1510d83a80eeSchristos if(other >= alloc->disk->nextgrow)
1511d83a80eeSchristos return 0; /* not in use */
1512d83a80eeSchristos if(other < alloc->udb->glob_data->hsize)
1513d83a80eeSchristos return 0; /* cannot merge with header */
1514d83a80eeSchristos /* the header is also protected by the special exp marker */
1515d83a80eeSchristos /* see if the other chunk is a free chunk */
1516d83a80eeSchristos
1517d83a80eeSchristos /* check closest marker to avoid large memory churn */
1518d83a80eeSchristos /* and also it makes XL allocations and header special markers work */
1519d83a80eeSchristos if(f > other) {
1520d83a80eeSchristos assert(f > 1); /* this is certain because of header */
1521d83a80eeSchristos if(*((uint8_t*)UDB_REL(base, f-1)) == (uint8_t)exp) {
1522d83a80eeSchristos /* can do it if the other part is a free chunk */
1523d83a80eeSchristos assert(UDB_FREE_CHUNK(other)->exp == (uint8_t)exp);
1524d83a80eeSchristos if(UDB_CHUNK(other)->type == udb_chunk_type_free)
1525d83a80eeSchristos return other;
1526d83a80eeSchristos }
1527d83a80eeSchristos } else {
1528d83a80eeSchristos if(UDB_CHUNK(other)->exp == (uint8_t)exp) {
1529d83a80eeSchristos /* can do it if the other part is a free chunk */
1530d83a80eeSchristos assert(chunk_get_last(base, other, exp)==(uint8_t)exp);
1531d83a80eeSchristos if(UDB_CHUNK(other)->type == udb_chunk_type_free)
1532d83a80eeSchristos return other;
1533d83a80eeSchristos }
1534d83a80eeSchristos }
1535d83a80eeSchristos return 0;
1536d83a80eeSchristos }
1537d83a80eeSchristos
1538d83a80eeSchristos /** coagulate and then add new free segment, return final free segment */
1539d83a80eeSchristos static udb_void
1540d83a80eeSchristos coagulate_and_push(void* base, udb_alloc* alloc, udb_void last, int exp,
1541d83a80eeSchristos uint64_t esz)
1542d83a80eeSchristos {
1543d83a80eeSchristos /* new free chunk here, attempt coagulate */
1544d83a80eeSchristos udb_void other;
1545d83a80eeSchristos while( (other=coagulate_possible(base, alloc, last, exp, esz)) ) {
1546d83a80eeSchristos /* unlink that other chunk */
1547d83a80eeSchristos udb_alloc_unlink_fl(base, alloc, other, exp);
1548d83a80eeSchristos /* merge up */
1549d83a80eeSchristos if(other < last)
1550d83a80eeSchristos last = other;
1551d83a80eeSchristos exp++;
1552d83a80eeSchristos esz <<= 1;
1553d83a80eeSchristos }
1554d83a80eeSchristos /* free the final segment */
1555d83a80eeSchristos udb_alloc_push_fl(base, alloc, last, exp);
1556d83a80eeSchristos return last;
1557d83a80eeSchristos }
1558d83a80eeSchristos
1559d83a80eeSchristos /** attempt to compact the data and move free space to the end */
1560d83a80eeSchristos int
1561d83a80eeSchristos udb_alloc_compact(void* base, udb_alloc* alloc)
1562d83a80eeSchristos {
1563d83a80eeSchristos udb_void last;
1564d83a80eeSchristos int exp, e2;
1565d83a80eeSchristos uint64_t esz;
1566d83a80eeSchristos uint64_t at = alloc->disk->nextgrow;
1567d83a80eeSchristos udb_void xl_start = 0;
1568d83a80eeSchristos uint64_t xl_sz = 0;
1569d83a80eeSchristos if(alloc->udb->inhibit_compact)
1570d83a80eeSchristos return 1;
1571d83a80eeSchristos alloc->udb->useful_compact = 0;
1572d83a80eeSchristos while(at > alloc->udb->glob_data->hsize) {
1573d83a80eeSchristos /* grab last entry */
1574d83a80eeSchristos exp = (int)*((uint8_t*)UDB_REL(base, at-1));
1575d83a80eeSchristos if(exp == UDB_EXP_XL) {
1576d83a80eeSchristos /* for XL chunks:
1577d83a80eeSchristos * - inspect the size of the XLchunklist at end
1578d83a80eeSchristos * - attempt to compact in front of of XLchunklist
1579d83a80eeSchristos */
1580d83a80eeSchristos uint64_t xlsz = *((uint64_t*)UDB_REL(base,
1581d83a80eeSchristos at-sizeof(uint64_t)*2));
1582d83a80eeSchristos udb_void xl = at-xlsz;
1583d83a80eeSchristos #ifndef NDEBUG
1584d83a80eeSchristos udb_xl_chunk_d* xlp = UDB_XL_CHUNK(xl);
1585d83a80eeSchristos assert(xlp->exp == UDB_EXP_XL);
1586d83a80eeSchristos assert(xlp->type != udb_chunk_type_free);
1587d83a80eeSchristos #endif
1588d83a80eeSchristos /* got thesegment add to the xl chunk list */
1589d83a80eeSchristos if(xl_start != 0 && xl+xlsz != xl_start) {
1590d83a80eeSchristos /* nonadjoining XL part, but they are aligned,
1591d83a80eeSchristos * so the space in between is whole Mbs,
1592d83a80eeSchristos * shift the later part(s) and continue */
1593d83a80eeSchristos uint64_t m = xl_start - (xl+xlsz);
1594d83a80eeSchristos assert(xl_start > xl+xlsz);
1595d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_compact;
1596d83a80eeSchristos free_xl_space(base, alloc, xl+xlsz, m);
1597d83a80eeSchristos move_xl_list(base, alloc, xl_start, xl_sz, m);
1598d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1599d83a80eeSchristos }
1600d83a80eeSchristos xl_start = xl;
1601d83a80eeSchristos xl_sz += xlsz;
1602d83a80eeSchristos at = xl;
1603d83a80eeSchristos continue;
1604d83a80eeSchristos /* end of XL if */
1605d83a80eeSchristos } else if(exp < UDB_ALLOC_CHUNK_MINEXP
1606d83a80eeSchristos || exp > UDB_ALLOC_CHUNKS_MAX)
1607d83a80eeSchristos break; /* special chunk or garbage */
1608d83a80eeSchristos esz = (uint64_t)1<<exp;
1609d83a80eeSchristos last = at - esz;
1610d83a80eeSchristos assert(UDB_CHUNK(last)->exp == (uint8_t)exp);
1611d83a80eeSchristos if(UDB_CHUNK(last)->type == udb_chunk_type_free) {
1612d83a80eeSchristos /* if xlstart continue looking to move stuff, but do
1613d83a80eeSchristos * not unlink this free segment */
1614d83a80eeSchristos if(!xl_start) {
1615d83a80eeSchristos /* it is a free chunk, remove it */
1616d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1617d83a80eeSchristos udb_alloc_unlink_fl(base, alloc, last, exp);
1618d83a80eeSchristos alloc->disk->stat_free -= esz;
1619d83a80eeSchristos alloc->disk->nextgrow = last;
1620d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1621d83a80eeSchristos /* and continue at this point */
1622d83a80eeSchristos }
1623d83a80eeSchristos at = last;
1624d83a80eeSchristos } else if( (e2=have_free_for(alloc, exp)) ) {
1625d83a80eeSchristos /* last entry can be allocated in free chunks
1626d83a80eeSchristos * move it to its new position, adjust rel_ptrs */
1627d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_compact;
1628d83a80eeSchristos move_chunk(base, alloc, last, exp, esz, e2);
1629d83a80eeSchristos if(xl_start) {
1630d83a80eeSchristos last = coagulate_and_push(base, alloc,
1631d83a80eeSchristos last, exp, esz);
1632d83a80eeSchristos } else {
1633d83a80eeSchristos /* shorten usage */
1634d83a80eeSchristos alloc->disk->stat_free -= esz;
1635d83a80eeSchristos alloc->disk->nextgrow = last;
1636d83a80eeSchristos }
1637d83a80eeSchristos alloc->udb->glob_data->rb_old = 0;
1638d83a80eeSchristos alloc->udb->glob_data->rb_new = 0;
1639d83a80eeSchristos alloc->udb->glob_data->rb_size = 0;
1640d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1641d83a80eeSchristos /* and continue in front of it */
1642d83a80eeSchristos at = last;
1643d83a80eeSchristos } else {
1644d83a80eeSchristos /* cannot compact this block, stop compacting */
1645d83a80eeSchristos break;
1646d83a80eeSchristos }
1647d83a80eeSchristos /* if that worked, repeat it */
1648d83a80eeSchristos }
1649d83a80eeSchristos /* if we passed xl chunks, see if XL-chunklist can move */
1650d83a80eeSchristos if(xl_start) {
1651d83a80eeSchristos /* calculate free space in front of the XLchunklist. */
1652d83a80eeSchristos /* has to be whole mbs of free space */
1653d83a80eeSchristos /* if so, we can move the XL chunks. Move them all back
1654d83a80eeSchristos * by the new free space. */
1655d83a80eeSchristos /* this compacts very well, but the XL chunks can be moved
1656d83a80eeSchristos * multiple times; worst case for every mb freed a huge sized
1657d83a80eeSchristos * xlchunklist gets moved. */
1658d83a80eeSchristos /* free space must be, since aligned and coagulated, in
1659d83a80eeSchristos * chunks of a whole MB */
1660d83a80eeSchristos udb_void at = xl_start;
1661d83a80eeSchristos uint64_t m = 0;
1662d83a80eeSchristos while(*((uint8_t*)UDB_REL(base, at-1))==UDB_ALLOC_CHUNKS_MAX){
1663d83a80eeSchristos udb_void chunk = at - UDB_ALLOC_CHUNK_SIZE;
1664d83a80eeSchristos if(UDB_CHUNK(chunk)->type != udb_chunk_type_free)
1665d83a80eeSchristos break;
1666d83a80eeSchristos assert(UDB_CHUNK(chunk)->exp==UDB_ALLOC_CHUNKS_MAX);
1667d83a80eeSchristos m += UDB_ALLOC_CHUNK_SIZE;
1668d83a80eeSchristos at = chunk;
1669d83a80eeSchristos }
1670d83a80eeSchristos if(m != 0) {
1671d83a80eeSchristos assert(at+m == xl_start);
1672d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_compact;
1673d83a80eeSchristos free_xl_space(base, alloc, at, m);
1674d83a80eeSchristos move_xl_list(base, alloc, xl_start, xl_sz, m);
1675d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1676d83a80eeSchristos }
1677d83a80eeSchristos }
1678d83a80eeSchristos
1679d83a80eeSchristos /* if enough free, shrink the file; re-mmap */
1680d83a80eeSchristos if(enough_free(alloc)) {
1681d83a80eeSchristos uint64_t nsize = alloc->disk->nextgrow;
1682d83a80eeSchristos udb_base_shrink(alloc->udb, nsize);
1683d83a80eeSchristos if(!udb_base_remap(alloc->udb, alloc, nsize))
1684d83a80eeSchristos return 0;
1685d83a80eeSchristos }
1686d83a80eeSchristos return 1;
1687d83a80eeSchristos }
1688d83a80eeSchristos
1689d83a80eeSchristos int
1690d83a80eeSchristos udb_compact(udb_base* udb)
1691d83a80eeSchristos {
1692d83a80eeSchristos if(!udb) return 1;
1693d83a80eeSchristos if(!udb->useful_compact) return 1;
1694d83a80eeSchristos DEBUG(DEBUG_DBACCESS, 1, (LOG_INFO, "Compacting database..."));
1695d83a80eeSchristos return udb_alloc_compact(udb->base, udb->alloc);
1696d83a80eeSchristos }
1697d83a80eeSchristos
1698d83a80eeSchristos void udb_compact_inhibited(udb_base* udb, int inhibit)
1699d83a80eeSchristos {
1700d83a80eeSchristos if(!udb) return;
1701d83a80eeSchristos udb->inhibit_compact = inhibit;
1702d83a80eeSchristos }
1703d83a80eeSchristos
1704d83a80eeSchristos #ifdef UDB_CHECK
1705d83a80eeSchristos /** check that rptrs are really zero before free */
1706d83a80eeSchristos void udb_check_rptr_zero(void* base, udb_rel_ptr* p, void* arg)
1707d83a80eeSchristos {
1708d83a80eeSchristos (void)base;
1709d83a80eeSchristos (void)arg;
1710d83a80eeSchristos assert(p->data == 0);
1711d83a80eeSchristos }
1712d83a80eeSchristos #endif /* UDB_CHECK */
1713d83a80eeSchristos
1714d83a80eeSchristos /** free XL chunk as multiples of CHUNK_SIZE free segments */
1715d83a80eeSchristos static void
1716d83a80eeSchristos udb_free_xl(void* base, udb_alloc* alloc, udb_void f, udb_xl_chunk_d* fp,
1717d83a80eeSchristos size_t sz)
1718d83a80eeSchristos {
1719d83a80eeSchristos uint64_t xlsz = fp->size;
1720d83a80eeSchristos uint64_t c;
1721d83a80eeSchristos /* lightweight check for buffer overflow in xl data */
1722d83a80eeSchristos assert(*((uint64_t*)(UDB_REL(base, f+xlsz-sizeof(uint64_t)*2)))==xlsz);
1723d83a80eeSchristos assert(*((uint8_t*)(UDB_REL(base, f+xlsz-1))) == UDB_EXP_XL);
1724d83a80eeSchristos assert( (xlsz & (UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* whole mbs */
1725d83a80eeSchristos assert( (f & (UDB_ALLOC_CHUNK_SIZE-1)) == 0 ); /* aligned */
1726d83a80eeSchristos #ifdef UDB_CHECK
1727d83a80eeSchristos /* check that relptrs in this chunk have been zeroed */
1728d83a80eeSchristos (*alloc->udb->walkfunc)(base, alloc->udb->walkarg, fp->type,
1729d83a80eeSchristos UDB_REL(base, f+sizeof(udb_xl_chunk_d)), xlsz,
1730d83a80eeSchristos &udb_check_rptr_zero, NULL);
1731d83a80eeSchristos #endif
1732d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1733d83a80eeSchristos /* update stats */
1734d83a80eeSchristos alloc->disk->stat_data -= sz;
1735d83a80eeSchristos alloc->disk->stat_alloc -= xlsz;
1736d83a80eeSchristos alloc->disk->stat_free += xlsz;
1737d83a80eeSchristos /* walk in reverse, so the front blocks go first on the list */
1738d83a80eeSchristos c = f + xlsz - UDB_ALLOC_CHUNK_SIZE;
1739d83a80eeSchristos /* because of header and alignment we know f >= UDB_ALLOC_CHUNK_SIZE*/
1740d83a80eeSchristos assert(f >= UDB_ALLOC_CHUNK_SIZE);
1741d83a80eeSchristos while(c >= f) {
1742d83a80eeSchristos /* free a block of CHUNK_SIZE (1 Mb) */
1743d83a80eeSchristos udb_alloc_push_fl(base, alloc, c, UDB_ALLOC_CHUNKS_MAX);
1744d83a80eeSchristos c -= UDB_ALLOC_CHUNK_SIZE;
1745d83a80eeSchristos }
1746d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1747d83a80eeSchristos }
1748d83a80eeSchristos
1749d83a80eeSchristos int udb_alloc_free(udb_alloc* alloc, udb_void r, size_t sz)
1750d83a80eeSchristos {
1751d83a80eeSchristos void* base;
1752d83a80eeSchristos /* lookup chunk ptr */
1753d83a80eeSchristos udb_void f;
1754d83a80eeSchristos udb_chunk_d* fp;
1755d83a80eeSchristos uint64_t esz;
1756d83a80eeSchristos int exp;
1757d83a80eeSchristos udb_void other;
1758d83a80eeSchristos int coagulated = 0;
1759d83a80eeSchristos if(!r)
1760d83a80eeSchristos return 1; /* free(NULL) does nothing */
1761d83a80eeSchristos
1762d83a80eeSchristos /* lookup size of chunk */
1763d83a80eeSchristos base = alloc->udb->base;
1764d83a80eeSchristos /* fails for XL blocks */
1765d83a80eeSchristos f = chunk_from_dataptr(r);
1766d83a80eeSchristos fp = UDB_CHUNK(f);
1767d83a80eeSchristos assert(fp->type != udb_chunk_type_free);
1768d83a80eeSchristos
1769d83a80eeSchristos /* see if it has a ptrlist, if so: trouble, the list is not properly
1770d83a80eeSchristos * cleaned up. (although you can imagine a wholesale delete where
1771d83a80eeSchristos * it does not matter) */
1772d83a80eeSchristos assert(fp->ptrlist == 0);
1773d83a80eeSchristos
1774d83a80eeSchristos /* set ptrlist to 0 to stop relptr from using it, robustness. */
1775d83a80eeSchristos fp->ptrlist = 0;
1776d83a80eeSchristos
1777d83a80eeSchristos if(fp->exp == UDB_EXP_XL) {
1778d83a80eeSchristos udb_free_xl(base, alloc, f, (udb_xl_chunk_d*)fp, sz);
1779d83a80eeSchristos /* compact */
1780d83a80eeSchristos if(alloc->udb->inhibit_compact) {
1781d83a80eeSchristos alloc->udb->useful_compact = 1;
1782d83a80eeSchristos return 1;
1783d83a80eeSchristos }
1784d83a80eeSchristos return udb_alloc_compact(base, alloc);
1785d83a80eeSchristos }
1786d83a80eeSchristos /* it is a regular chunk of 2**exp size */
1787d83a80eeSchristos exp = (int)fp->exp;
1788d83a80eeSchristos esz = (uint64_t)1<<exp;
1789d83a80eeSchristos /* light check for e.g. buffer overflow of the data */
1790d83a80eeSchristos assert(sz < esz);
1791d83a80eeSchristos assert(chunk_get_last(base, f, exp) == (uint8_t)exp);
1792d83a80eeSchristos #ifdef UDB_CHECK
1793d83a80eeSchristos /* check that relptrs in this chunk have been zeroed */
1794d83a80eeSchristos (*alloc->udb->walkfunc)(base, alloc->udb->walkarg, fp->type,
1795d83a80eeSchristos UDB_REL(base, r), esz, &udb_check_rptr_zero, NULL);
1796d83a80eeSchristos #endif
1797d83a80eeSchristos
1798d83a80eeSchristos /* update the stats */
1799d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
1800d83a80eeSchristos alloc->disk->stat_data -= sz;
1801d83a80eeSchristos alloc->disk->stat_free += esz;
1802d83a80eeSchristos alloc->disk->stat_alloc -= esz;
1803d83a80eeSchristos
1804d83a80eeSchristos /* if it can be merged with other free chunks, do so */
1805d83a80eeSchristos while( (other=coagulate_possible(base, alloc, f, exp, esz)) ) {
1806d83a80eeSchristos coagulated = 1;
1807d83a80eeSchristos /* unlink that other chunk and expand it (it has same size) */
1808d83a80eeSchristos udb_alloc_unlink_fl(base, alloc, other, exp);
1809d83a80eeSchristos /* merge up */
1810d83a80eeSchristos if(other < f)
1811d83a80eeSchristos f = other;
1812d83a80eeSchristos exp++;
1813d83a80eeSchristos esz <<= 1;
1814d83a80eeSchristos }
1815d83a80eeSchristos if(coagulated) {
1816d83a80eeSchristos /* put big free chunk into freelist, and init it */
1817d83a80eeSchristos udb_alloc_push_fl(base, alloc, f, exp);
1818d83a80eeSchristos } else {
1819d83a80eeSchristos /* we do not need to touch the last-exp-byte, which may save
1820d83a80eeSchristos * a reference to that page of memory */
1821d83a80eeSchristos fp->type = udb_chunk_type_free;
1822d83a80eeSchristos fp->flags = 0;
1823d83a80eeSchristos udb_alloc_push_fl_noinit(base, alloc, f, exp);
1824d83a80eeSchristos }
1825d83a80eeSchristos alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
1826d83a80eeSchristos /* compact */
1827d83a80eeSchristos if(alloc->udb->inhibit_compact) {
1828d83a80eeSchristos alloc->udb->useful_compact = 1;
1829d83a80eeSchristos return 1;
1830d83a80eeSchristos }
1831d83a80eeSchristos return udb_alloc_compact(base, alloc);
1832d83a80eeSchristos }
1833d83a80eeSchristos
1834d83a80eeSchristos udb_void udb_alloc_init(udb_alloc* alloc, void* d, size_t sz)
1835d83a80eeSchristos {
1836d83a80eeSchristos /* could be faster maybe, if grown? */
1837d83a80eeSchristos udb_void r = udb_alloc_space(alloc, sz);
1838d83a80eeSchristos if(!r) return r;
1839d83a80eeSchristos memcpy(UDB_REL(alloc->udb->base, r), d, sz);
1840d83a80eeSchristos return r;
1841d83a80eeSchristos }
1842d83a80eeSchristos
1843d83a80eeSchristos udb_void udb_alloc_realloc(udb_alloc* alloc, udb_void r, size_t osz, size_t sz)
1844d83a80eeSchristos {
1845d83a80eeSchristos void* base = alloc->udb->base;
1846d83a80eeSchristos udb_void c, n, newd;
1847d83a80eeSchristos udb_chunk_d* cp, *np;
1848d83a80eeSchristos uint64_t avail;
1849d83a80eeSchristos uint8_t cp_type;
1850d83a80eeSchristos /* emulate some posix realloc stuff */
1851d83a80eeSchristos if(r == 0)
1852d83a80eeSchristos return udb_alloc_space(alloc, sz);
1853d83a80eeSchristos if(sz == 0) {
1854d83a80eeSchristos if(!udb_alloc_free(alloc, r, osz))
1855d83a80eeSchristos log_msg(LOG_ERR, "udb_alloc_realloc: free failed");
1856d83a80eeSchristos return 0;
1857d83a80eeSchristos }
1858d83a80eeSchristos c = chunk_from_dataptr(r);
1859d83a80eeSchristos cp = UDB_CHUNK(c);
1860d83a80eeSchristos cp_type = cp->type;
1861d83a80eeSchristos if(cp->exp == UDB_EXP_XL) {
1862d83a80eeSchristos avail = UDB_XL_CHUNK(c)->size - sizeof(udb_xl_chunk_d)
1863d83a80eeSchristos - sizeof(uint64_t)*2;
1864d83a80eeSchristos } else {
1865d83a80eeSchristos avail = ((uint64_t)1<<cp->exp) - sizeof(udb_chunk_d) - 1;
1866d83a80eeSchristos }
1867d83a80eeSchristos if(sz <= avail)
1868d83a80eeSchristos return r;
1869d83a80eeSchristos /* reallocate it, and copy */
1870d83a80eeSchristos newd = udb_alloc_space(alloc, sz);
1871d83a80eeSchristos if(!newd) return 0;
1872d83a80eeSchristos /* re-base after alloc, since re-mmap may have happened */
1873d83a80eeSchristos base = alloc->udb->base;
1874d83a80eeSchristos cp = NULL; /* may be invalid now, robustness */
1875d83a80eeSchristos n = chunk_from_dataptr(newd);
1876d83a80eeSchristos np = UDB_CHUNK(n);
1877d83a80eeSchristos np->type = cp_type;
1878d83a80eeSchristos memcpy(UDB_REL(base, newd), UDB_REL(base, r), osz);
1879d83a80eeSchristos /* fixup ptrs */
1880d83a80eeSchristos chunk_fix_ptrs(base, alloc->udb, np, newd, osz, r);
1881d83a80eeSchristos
1882d83a80eeSchristos if(!udb_alloc_free(alloc, r, osz))
1883d83a80eeSchristos log_msg(LOG_ERR, "udb_alloc_realloc: free failed");
1884d83a80eeSchristos return newd;
1885d83a80eeSchristos }
1886d83a80eeSchristos
1887d83a80eeSchristos int udb_alloc_grow(udb_alloc* alloc, size_t sz, size_t num)
1888d83a80eeSchristos {
1889d83a80eeSchristos const uint64_t mb = 1024*1024;
1890d83a80eeSchristos int exp = udb_alloc_exp_needed(sz);
1891d83a80eeSchristos uint64_t esz;
1892d83a80eeSchristos uint64_t want;
1893d83a80eeSchristos if(exp == UDB_EXP_XL)
1894d83a80eeSchristos esz = (sz&(mb-1))+mb;
1895d83a80eeSchristos else esz = (uint64_t)1<<exp;
1896d83a80eeSchristos /* we need grow_end_calc to take into account alignment */
1897d83a80eeSchristos want = grow_end_calc(alloc, exp) + esz*(num-1);
1898d83a80eeSchristos assert(want >= alloc->udb->base_size);
1899d83a80eeSchristos if(!udb_base_grow_and_remap(alloc->udb, want)) {
1900d83a80eeSchristos log_msg(LOG_ERR, "failed to grow the specified amount");
1901d83a80eeSchristos return 0;
1902d83a80eeSchristos }
1903d83a80eeSchristos return 1;
1904d83a80eeSchristos }
1905d83a80eeSchristos
1906d83a80eeSchristos void udb_alloc_set_type(udb_alloc* alloc, udb_void r, udb_chunk_type tp)
1907d83a80eeSchristos {
1908d83a80eeSchristos void* base = alloc->udb->base;
1909d83a80eeSchristos udb_void f = chunk_from_dataptr(r);
1910d83a80eeSchristos udb_chunk_d* fp = UDB_CHUNK(f);
1911d83a80eeSchristos /* not the 'free' type, that must be set by allocation routines */
1912d83a80eeSchristos assert(fp->type != udb_chunk_type_free);
1913d83a80eeSchristos assert(tp != udb_chunk_type_free);
1914d83a80eeSchristos fp->type = tp;
1915d83a80eeSchristos }
1916d83a80eeSchristos
1917d83a80eeSchristos int udb_valid_offset(udb_base* udb, udb_void to, size_t destsize)
1918d83a80eeSchristos {
1919d83a80eeSchristos /* pointers are not valid before the header-size or after the
1920d83a80eeSchristos * used-region of the mmap */
1921d83a80eeSchristos return ( (to+destsize) <= udb->base_size &&
1922d83a80eeSchristos to >= (udb->glob_data->hsize-2*sizeof(udb_rel_ptr)) &&
1923d83a80eeSchristos (to+destsize) <= udb->alloc->disk->nextgrow);
1924d83a80eeSchristos }
1925d83a80eeSchristos
1926d83a80eeSchristos int udb_valid_dataptr(udb_base* udb, udb_void to)
1927d83a80eeSchristos {
1928d83a80eeSchristos void* base = udb->base;
1929d83a80eeSchristos udb_void ch;
1930d83a80eeSchristos int exp;
1931d83a80eeSchristos uint64_t esz;
1932d83a80eeSchristos /* our data chunks are aligned and at least 8 bytes */
1933d83a80eeSchristos if(!udb_valid_offset(udb, to, sizeof(uint64_t)))
1934d83a80eeSchristos return 0;
1935d83a80eeSchristos /* get the chunk pointer */
1936d83a80eeSchristos ch = chunk_from_dataptr(to);
1937d83a80eeSchristos if(!udb_valid_offset(udb, ch, sizeof(udb_chunk_d)))
1938d83a80eeSchristos return 0;
1939d83a80eeSchristos /* check its size */
1940d83a80eeSchristos exp = UDB_CHUNK(ch)->exp;
1941d83a80eeSchristos if(exp == UDB_EXP_XL) {
1942d83a80eeSchristos /* check XL chunk */
1943d83a80eeSchristos uint64_t xlsz;
1944d83a80eeSchristos if(!udb_valid_offset(udb, ch, sizeof(udb_xl_chunk_d)))
1945d83a80eeSchristos return 0;
1946d83a80eeSchristos xlsz = UDB_XL_CHUNK(ch)->size;
1947d83a80eeSchristos if(!udb_valid_offset(udb, ch+xlsz-1, 1))
1948d83a80eeSchristos return 0;
1949d83a80eeSchristos if(*((uint8_t*)UDB_REL(base, ch+xlsz-1)) != UDB_EXP_XL)
1950d83a80eeSchristos return 0;
1951d83a80eeSchristos if(*((uint64_t*)UDB_REL(base, ch+xlsz-sizeof(uint64_t)*2))
1952d83a80eeSchristos != xlsz)
1953d83a80eeSchristos return 0;
1954d83a80eeSchristos return 1;
1955d83a80eeSchristos }
1956d83a80eeSchristos /* check if regular chunk has matching end byte */
1957d83a80eeSchristos if(exp < UDB_ALLOC_CHUNK_MINEXP || exp > UDB_ALLOC_CHUNKS_MAX)
1958d83a80eeSchristos return 0; /* cannot be a valid chunk */
1959d83a80eeSchristos esz = 1<<exp;
1960d83a80eeSchristos if(!udb_valid_offset(udb, ch+esz-1, 1))
1961d83a80eeSchristos return 0;
1962d83a80eeSchristos if(*((uint8_t*)UDB_REL(base, ch+esz-1)) != exp)
1963d83a80eeSchristos return 0;
1964d83a80eeSchristos return 1;
1965d83a80eeSchristos }
1966d83a80eeSchristos
1967d83a80eeSchristos int udb_valid_rptr(udb_base* udb, udb_void rptr, udb_void to)
1968d83a80eeSchristos {
1969d83a80eeSchristos void* base = udb->base;
1970d83a80eeSchristos udb_void p;
1971d83a80eeSchristos if(!udb_valid_offset(udb, rptr, sizeof(udb_rel_ptr)))
1972d83a80eeSchristos return 0;
1973d83a80eeSchristos if(!udb_valid_dataptr(udb, to))
1974d83a80eeSchristos return 0;
1975d83a80eeSchristos p = UDB_CHUNK(chunk_from_dataptr(to))->ptrlist;
1976d83a80eeSchristos while(p) {
1977d83a80eeSchristos if(!udb_valid_offset(udb, p, sizeof(udb_rel_ptr)))
1978d83a80eeSchristos return 0;
1979d83a80eeSchristos if(p == rptr)
1980d83a80eeSchristos return 1;
1981d83a80eeSchristos p = UDB_REL_PTR(p)->next;
1982d83a80eeSchristos }
1983d83a80eeSchristos return 0;
1984d83a80eeSchristos }
1985d83a80eeSchristos
1986d83a80eeSchristos void udb_rel_ptr_init(udb_rel_ptr* ptr)
1987d83a80eeSchristos {
1988d83a80eeSchristos memset(ptr, 0, sizeof(*ptr));
1989d83a80eeSchristos }
1990d83a80eeSchristos
1991d83a80eeSchristos void udb_rel_ptr_unlink(void* base, udb_rel_ptr* ptr)
1992d83a80eeSchristos {
1993d83a80eeSchristos if(!ptr->data)
1994d83a80eeSchristos return;
1995d83a80eeSchristos if(ptr->prev) {
1996d83a80eeSchristos UDB_REL_PTR(ptr->prev)->next = ptr->next;
1997d83a80eeSchristos } else {
1998d83a80eeSchristos UDB_CHUNK(chunk_from_dataptr(ptr->data))->ptrlist = ptr->next;
1999d83a80eeSchristos }
2000d83a80eeSchristos if(ptr->next) {
2001d83a80eeSchristos UDB_REL_PTR(ptr->next)->prev = ptr->prev;
2002d83a80eeSchristos }
2003d83a80eeSchristos }
2004d83a80eeSchristos
2005d83a80eeSchristos void udb_rel_ptr_link(void* base, udb_rel_ptr* ptr, udb_void to)
2006d83a80eeSchristos {
2007d83a80eeSchristos udb_chunk_d* chunk = UDB_CHUNK(chunk_from_dataptr(to));
2008d83a80eeSchristos ptr->prev = 0;
2009d83a80eeSchristos ptr->next = chunk->ptrlist;
2010d83a80eeSchristos if(ptr->next)
2011d83a80eeSchristos UDB_REL_PTR(ptr->next)->prev = UDB_SYSTOREL(base, ptr);
2012d83a80eeSchristos chunk->ptrlist = UDB_SYSTOREL(base, ptr);
2013d83a80eeSchristos ptr->data = to;
2014d83a80eeSchristos }
2015d83a80eeSchristos
2016d83a80eeSchristos void udb_rel_ptr_set(void* base, udb_rel_ptr* ptr, udb_void to)
2017d83a80eeSchristos {
2018d83a80eeSchristos assert(to == 0 || to > 64);
2019d83a80eeSchristos udb_rel_ptr_unlink(base, ptr);
2020d83a80eeSchristos if(to)
2021d83a80eeSchristos udb_rel_ptr_link(base, ptr, to);
2022d83a80eeSchristos else ptr->data = to;
2023d83a80eeSchristos }
2024d83a80eeSchristos
2025d83a80eeSchristos void udb_rel_ptr_edit(void* base, udb_void list, udb_void to)
2026d83a80eeSchristos {
2027d83a80eeSchristos udb_void p = list;
2028d83a80eeSchristos while(p) {
2029d83a80eeSchristos UDB_REL_PTR(p)->data = to;
2030d83a80eeSchristos p = UDB_REL_PTR(p)->next;
2031d83a80eeSchristos }
2032d83a80eeSchristos }
2033d83a80eeSchristos
2034d83a80eeSchristos #ifdef UDB_CHECK
2035d83a80eeSchristos /** check that all pointers are validly chained */
2036d83a80eeSchristos static void
2037d83a80eeSchristos udb_check_ptrs_valid(udb_base* udb)
2038d83a80eeSchristos {
2039d83a80eeSchristos size_t i;
2040d83a80eeSchristos udb_ptr* p, *prev;
2041d83a80eeSchristos for(i=0; i<udb->ram_size; i++) {
2042d83a80eeSchristos prev = NULL;
2043d83a80eeSchristos for(p=udb->ram_hash[i]; p; p=p->next) {
2044d83a80eeSchristos assert(p->prev == prev);
2045d83a80eeSchristos assert((size_t)(chunk_hash_ptr(p->data)&udb->ram_mask)
2046d83a80eeSchristos == i);
2047d83a80eeSchristos assert(p->base == &udb->base);
2048d83a80eeSchristos prev = p;
2049d83a80eeSchristos }
2050d83a80eeSchristos }
2051d83a80eeSchristos }
2052d83a80eeSchristos #endif /* UDB_CHECK */
2053d83a80eeSchristos
2054d83a80eeSchristos void udb_ptr_init(udb_ptr* ptr, udb_base* udb)
2055d83a80eeSchristos {
2056d83a80eeSchristos #ifdef UDB_CHECK
2057d83a80eeSchristos udb_check_ptrs_valid(udb); /* previous ptrs have been unlinked */
2058d83a80eeSchristos #endif
2059d83a80eeSchristos memset(ptr, 0, sizeof(*ptr));
2060d83a80eeSchristos ptr->base = &udb->base;
2061d83a80eeSchristos }
2062d83a80eeSchristos
2063d83a80eeSchristos void udb_ptr_set(udb_ptr* ptr, udb_base* udb, udb_void newval)
2064d83a80eeSchristos {
2065d83a80eeSchristos assert(newval == 0 || newval > 64);
2066d83a80eeSchristos if(ptr->data)
2067d83a80eeSchristos udb_base_unlink_ptr(udb, ptr);
2068d83a80eeSchristos ptr->data = newval;
2069d83a80eeSchristos if(newval)
2070d83a80eeSchristos udb_base_link_ptr(udb, ptr);
2071d83a80eeSchristos }
2072d83a80eeSchristos
2073d83a80eeSchristos int udb_ptr_alloc_space(udb_ptr* ptr, udb_base* udb, udb_chunk_type type,
2074d83a80eeSchristos size_t sz)
2075d83a80eeSchristos {
2076d83a80eeSchristos udb_void r;
2077d83a80eeSchristos r = udb_alloc_space(udb->alloc, sz);
2078d83a80eeSchristos if(!r) return 0;
2079d83a80eeSchristos udb_alloc_set_type(udb->alloc, r, type);
2080d83a80eeSchristos udb_ptr_init(ptr, udb);
2081d83a80eeSchristos udb_ptr_set(ptr, udb, r);
2082d83a80eeSchristos return 1;
2083d83a80eeSchristos }
2084d83a80eeSchristos
2085d83a80eeSchristos void udb_ptr_free_space(udb_ptr* ptr, udb_base* udb, size_t sz)
2086d83a80eeSchristos {
2087d83a80eeSchristos if(ptr->data) {
2088d83a80eeSchristos udb_void d = ptr->data;
2089d83a80eeSchristos udb_ptr_set(ptr, udb, 0);
2090d83a80eeSchristos udb_alloc_free(udb->alloc, d, sz);
2091d83a80eeSchristos }
2092d83a80eeSchristos }
2093d83a80eeSchristos
2094d83a80eeSchristos udb_chunk_type udb_ptr_get_type(udb_ptr* ptr)
2095d83a80eeSchristos {
2096d83a80eeSchristos udb_void f;
2097d83a80eeSchristos if(!ptr || ptr->data == 0) return udb_chunk_type_internal; /* something bad*/
2098d83a80eeSchristos f = chunk_from_dataptr(ptr->data);
2099d83a80eeSchristos return ((udb_chunk_d*)UDB_REL(*ptr->base, f))->type;
2100d83a80eeSchristos }
2101