1 // Written in the D programming language.
2 /**
3 `AlignedBlockList` represents a wrapper around a chain of allocators, allowing for fast deallocations
4 and preserving a low degree of fragmentation by means of aligned allocations.
5
6 Source: $(PHOBOSSRC std/experimental/allocator/building_blocks/aligned_block_list.d)
7 */
8 module std.experimental.allocator.building_blocks.aligned_block_list;
9
10 import std.experimental.allocator.common;
11 import std.experimental.allocator.building_blocks.null_allocator;
12
13 // Common function implementation for thread local and shared AlignedBlockList
AlignedBlockListImpl(bool isShared)14 private mixin template AlignedBlockListImpl(bool isShared)
15 {
16 import std.traits : hasMember;
17 import std.typecons : Ternary;
18
19 static if (isShared)
20 import core.internal.spinlock : SpinLock;
21
22 private:
23 // Doubly linked list of 'AlignedBlockNode'
24 // Each node contains an `Allocator` followed by its payload
25 static struct AlignedBlockNode
26 {
27 AlignedBlockNode* next, prev;
28 Allocator bAlloc;
29
30 static if (isShared)
31 {
32 shared(size_t) bytesUsed;
33 // Since the lock is not taken when allocating, this acts like a refcount
34 // keeping the node alive
35 uint keepAlive;
36 }
37 else
38 {
39 size_t bytesUsed;
40 }
41 }
42
43 // Root of the internal doubly linked list
44 AlignedBlockNode* root;
45
46 // Number of active nodes
47 uint numNodes;
48
49 // If the numNodes exceeds this limit, we will start deallocating nodes
50 enum uint maxNodes = 64;
51
52 // This lock is always taken when changing the list
53 // To improve performance, the lock is not taken when the allocation logic is called
54 static if (isShared)
55 SpinLock lock = SpinLock(SpinLock.Contention.brief);
56
57 // Moves a node to the front of the list, allowing for quick allocations
58 void moveToFront(AlignedBlockNode* tmp)
59 {
60 auto localRoot = cast(AlignedBlockNode*) root;
61 if (tmp == localRoot)
62 return;
63
64 if (tmp.prev) tmp.prev.next = tmp.next;
65 if (tmp.next) tmp.next.prev = tmp.prev;
66 if (localRoot) localRoot.prev = tmp;
67 tmp.next = localRoot;
68 tmp.prev = null;
69
70 root = cast(typeof(root)) tmp;
71 }
72
73 // Removes a node from the list, including its payload
74 // The payload is deallocated by calling 'parent.deallocate'
75 void removeNode(AlignedBlockNode* tmp)
76 {
77 auto next = tmp.next;
78 if (tmp.prev) tmp.prev.next = tmp.next;
79 if (tmp.next) tmp.next.prev = tmp.prev;
80 parent.deallocate((cast(void*) tmp)[0 .. theAlignment]);
81
82 if (tmp == cast(AlignedBlockNode*) root)
83 root = cast(typeof(root)) next;
84
85 static if (isShared)
86 {
87 import core.atomic : atomicOp;
88 atomicOp!"-="(numNodes, 1);
89 }
90 else
91 {
92 numNodes--;
93 }
94 }
95
96 // If the nodes do not have available space, a new node is created
97 // by drawing memory from the parent allocator with aligned allocations.
98 // The new node is inserted at the front of the list
99 bool insertNewNode()
100 {
101 void[] buf = parent.alignedAllocate(theAlignment, theAlignment);
102 if (buf is null)
103 return false;
104
105 auto localRoot = cast(AlignedBlockNode*) root;
106 auto newNode = cast(AlignedBlockNode*) buf;
107
108 // The first part of the allocation represent the node contents
109 // followed by the actual payload
110 ubyte[] payload = cast(ubyte[]) buf[AlignedBlockNode.sizeof .. $];
111 newNode.bAlloc = Allocator(payload);
112
113 newNode.next = localRoot;
114 newNode.prev = null;
115 if (localRoot)
116 localRoot.prev = newNode;
117 root = cast(typeof(root)) newNode;
118
119 static if (isShared)
120 {
121 import core.atomic : atomicOp;
122 atomicOp!"+="(numNodes, 1);
123 }
124 else
125 {
126 numNodes++;
127 }
128
129 return true;
130 }
131
132 public:
133 static if (stateSize!ParentAllocator) ParentAllocator parent;
134 else alias parent = ParentAllocator.instance;
135
136 enum ulong alignment = Allocator.alignment;
137
138 // Since all memory is drawn from ParentAllocator, we can
139 // forward this to the parent
140 static if (hasMember!(ParentAllocator, "owns"))
141 Ternary owns(void[] b)
142 {
143 return parent.owns(b);
144 }
145
146 // Use `theAlignment` to find the node which allocated this block
147 bool deallocate(void[] b)
148 {
149 if (b is null)
150 return true;
151
152 // Round buffer to nearest `theAlignment` multiple to quickly find
153 // the `parent` `AlignedBlockNode`
154 enum ulong mask = ~(theAlignment - 1);
155 ulong ptr = ((cast(ulong) b.ptr) & mask);
156 AlignedBlockNode *node = cast(AlignedBlockNode*) ptr;
157 if (node.bAlloc.deallocate(b))
158 {
159 static if (isShared)
160 {
161 import core.atomic : atomicOp;
162 atomicOp!"-="(node.bytesUsed, b.length);
163 }
164 else
165 {
166 node.bytesUsed -= b.length;
167 }
168 return true;
169 }
170 return false;
171 }
172
173 // Allocate works only if memory can be provided via `alignedAllocate` from the parent
174 static if (hasMember!(ParentAllocator, "alignedAllocate"))
175 void[] allocate(size_t n)
176 {
177 static if (isShared)
178 import core.atomic : atomicOp, atomicLoad;
179
180 if (n == 0 || n > theAlignment)
181 return null;
182
183 static if (isShared)
184 {
185 lock.lock();
186 scope(exit) lock.unlock();
187 }
188
189 auto tmp = cast(AlignedBlockNode*) root;
190
191 // Iterate through list and find first node which has memory available
192 while (tmp)
193 {
194 auto next = tmp.next;
195 static if (isShared)
196 {
197 // Allocations can happen outside the lock
198 // Make sure nobody deletes this node while using it
199 tmp.keepAlive++;
200 if (next) next.keepAlive++;
201 lock.unlock();
202 }
203
204 auto result = tmp.bAlloc.allocate(n);
205 if (result.length == n)
206 {
207 // Success
208 static if (isShared)
209 {
210 atomicOp!"+="(tmp.bytesUsed, n);
211 lock.lock();
212 }
213 else
214 {
215 tmp.bytesUsed += n;
216 }
217
218 // Most likely this node has memory for more allocations
219 // Move it to the front
220 moveToFront(tmp);
221
222 static if (isShared)
223 {
224 tmp.keepAlive--;
225 if (next) next.keepAlive--;
226 }
227
228 return result;
229 }
230
231 // This node can now be removed if necessary
232 static if (isShared)
233 {
234 lock.lock();
235 tmp.keepAlive--;
236 if (next) next.keepAlive--;
237 }
238
239 if (!next)
240 break;
241
242 tmp = next;
243 next = tmp.next;
244
245 // If there are too many nodes, free memory by removing empty nodes
246 static if (isShared)
247 {
248 if (atomicLoad(numNodes) > maxNodes &&
249 atomicLoad(tmp.bytesUsed) == 0 &&
250 tmp.keepAlive == 0)
251 {
252 removeNode(tmp);
253 }
254 }
255 else
256 {
257 if (numNodes > maxNodes && tmp.bytesUsed == 0)
258 {
259 removeNode(tmp);
260 }
261 }
262
263 tmp = next;
264 }
265
266 // Cannot create new AlignedBlockNode. Most likely the ParentAllocator ran out of resources
267 if (!insertNewNode())
268 return null;
269
270 tmp = cast(typeof(tmp)) root;
271 void[] result = tmp.bAlloc.allocate(n);
272
273 static if (isShared)
274 {
275 atomicOp!"+="(root.bytesUsed, result.length);
276 }
277 else
278 {
279 root.bytesUsed += result.length;
280 }
281
282 return result;
283 }
284
285 // goodAllocSize should not use state
286 size_t goodAllocSize(const size_t n)
287 {
288 Allocator a = null;
289 return a.goodAllocSize(n);
290 }
291 }
292
293 /**
294 `AlignedBlockList` represents a wrapper around a chain of allocators, allowing for fast deallocations
295 and preserving a low degree of fragmentation.
296 The allocator holds internally a doubly linked list of `Allocator` objects, which will serve allocations
297 in a most-recently-used fashion. Most recent allocators used for `allocate` calls, will be
298 moved to the front of the list.
299
300 Although allocations are in theory served in linear searching time, `deallocate` calls take
301 $(BIGOH 1) time, by using aligned allocations. `ParentAllocator` must implement `alignedAllocate`
302 and it must be able to allocate `theAlignment` bytes at the same alignment. Each aligned allocation
303 done by `ParentAllocator` will contain metadata for an `Allocator`, followed by its payload.
304
305 Params:
306 Allocator = the allocator which is used to manage each node; it must have a constructor which receives
307 `ubyte[]` and it must not have any parent allocators, except for the `NullAllocator`
308 ParentAllocator = each node draws memory from the parent allocator; it must support `alignedAllocate`
309 theAlignment = alignment of each block and at the same time length of each node
310 */
311 struct AlignedBlockList(Allocator, ParentAllocator, ulong theAlignment = (1 << 21))
312 {
versionAlignedBlockList313 version (StdDdoc)
314 {
315 import std.typecons : Ternary;
316 import std.traits : hasMember;
317
318 /**
319 Returns a chunk of memory of size `n`
320 It finds the first node in the `AlignedBlockNode` list which has available memory,
321 and moves it to the front of the list.
322
323 All empty nodes which cannot return new memory, are removed from the list.
324
325 Params:
326 n = bytes to allocate
327 Returns:
328 A chunk of memory of the required length or `null` on failure or
329 */
330 static if (hasMember!(ParentAllocator, "alignedAllocate"))
331 void[] allocate(size_t n);
332
333 /**
334 Deallocates the buffer `b` given as parameter. Deallocations take place in constant
335 time, regardless of the number of nodes in the list. `b.ptr` is rounded down
336 to the nearest multiple of the `alignment` to quickly find the corresponding
337 `AlignedBlockNode`.
338
339 Params:
340 b = buffer candidate for deallocation
341 Returns:
342 `true` on success and `false` on failure
343 */
344 bool deallocate(void[] b);
345
346 /**
347 Returns `Ternary.yes` if the buffer belongs to the parent allocator and
348 `Ternary.no` otherwise.
349
350 Params:
351 b = buffer tested if owned by this allocator
352 Returns:
353 `Ternary.yes` if owned by this allocator and `Ternary.no` otherwise
354 */
355 static if (hasMember!(ParentAllocator, "owns"))
356 Ternary owns(void[] b);
357 }
358 else
359 {
360 import std.math.traits : isPowerOf2;
361 static assert(isPowerOf2(alignment));
362 mixin AlignedBlockListImpl!false;
363 }
364 }
365
366 ///
367 @system unittest
368 {
369 import std.experimental.allocator.building_blocks.ascending_page_allocator : AscendingPageAllocator;
370 import std.experimental.allocator.building_blocks.segregator : Segregator;
371 import std.experimental.allocator.building_blocks.bitmapped_block : BitmappedBlock;
372 import std.typecons : Ternary;
373
374 /*
375 In this example we use 'AlignedBlockList' in conjunction with other allocators
376 in order to create a more complex allocator.
377
378 The 'SuperAllocator' uses a 'Segregator' to distribute allocations to sub-allocators,
379 based on the requested size.
380
381 Each sub-allocator is represented by an 'AlignedBlockList' of 'BitmappedBlocks'.
382 Each 'AlignedBlockList' draws memory from a root allocator which in this case is an 'AscendingPageAllocator'
383
384 Such an allocator not only provides good performance, but also a low degree of memory fragmentation.
385 */
386 alias SuperAllocator = Segregator!(
387 32,
388 AlignedBlockList!(BitmappedBlock!32, AscendingPageAllocator*, 1 << 12),
389 Segregator!(
390
391 64,
392 AlignedBlockList!(BitmappedBlock!64, AscendingPageAllocator*, 1 << 12),
393 Segregator!(
394
395 128,
396 AlignedBlockList!(BitmappedBlock!128, AscendingPageAllocator*, 1 << 12),
397 AscendingPageAllocator*
398 )));
399
400 SuperAllocator a;
401 auto pageAlloc = AscendingPageAllocator(128 * 4096);
402
403 // Set the parent allocator for all the sub allocators
404 a.allocatorForSize!256 = &pageAlloc;
405 a.allocatorForSize!128.parent = &pageAlloc;
406 a.allocatorForSize!64.parent = &pageAlloc;
407 a.allocatorForSize!32.parent = &pageAlloc;
408
409 enum testNum = 10;
410 void[][testNum] buf;
411
412 // Allocations of size 32 will go to the first 'AlignedBlockList'
413 foreach (j; 0 .. testNum)
414 {
415 buf[j] = a.allocate(32);
416 assert(buf[j].length == 32);
417
418 // This is owned by the first 'AlignedBlockList'
419 assert(a.allocatorForSize!32.owns(buf[j]) == Ternary.yes);
420 }
421
422 // Free the memory
423 foreach (j; 0 .. testNum)
424 assert(a.deallocate(buf[j]));
425
426 // Allocations of size 64 will go to the second 'AlignedBlockList'
427 foreach (j; 0 .. testNum)
428 {
429 buf[j] = a.allocate(64);
430 assert(buf[j].length == 64);
431
432 // This is owned by the second 'AlignedBlockList'
433 assert(a.allocatorForSize!64.owns(buf[j]) == Ternary.yes);
434 }
435
436 // Free the memory
437 foreach (j; 0 .. testNum)
438 assert(a.deallocate(buf[j]));
439
440 // Allocations of size 128 will go to the third 'AlignedBlockList'
441 foreach (j; 0 .. testNum)
442 {
443 buf[j] = a.allocate(128);
444 assert(buf[j].length == 128);
445
446 // This is owned by the third 'AlignedBlockList'
447 assert(a.allocatorForSize!128.owns(buf[j]) == Ternary.yes);
448 }
449
450 // Free the memory
451 foreach (j; 0 .. testNum)
452 assert(a.deallocate(buf[j]));
453
454 // Allocations which exceed 128, will go to the 'AscendingPageAllocator*'
455 void[] b = a.allocate(256);
456 assert(b.length == 256);
457 a.deallocate(b);
458 }
459
460 /**
461 `SharedAlignedBlockList` is the threadsafe version of `AlignedBlockList`.
462 The `Allocator` template parameter must refer a shared allocator.
463 Also, `ParentAllocator` must be a shared allocator, supporting `alignedAllocate`.
464
465 Params:
466 Allocator = the shared allocator which is used to manage each node; it must have a constructor which receives
467 `ubyte[]` and it must not have any parent allocators, except for the `NullAllocator`
468 ParentAllocator = each node draws memory from the parent allocator; it must be shared and support `alignedAllocate`
469 theAlignment = alignment of each block and at the same time length of each node
470 */
471 shared struct SharedAlignedBlockList(Allocator, ParentAllocator, ulong theAlignment = (1 << 21))
472 {
versionSharedAlignedBlockList473 version (StdDdoc)
474 {
475 import std.typecons : Ternary;
476 import std.traits : hasMember;
477
478 /**
479 Returns a chunk of memory of size `n`
480 It finds the first node in the `AlignedBlockNode` list which has available memory,
481 and moves it to the front of the list.
482
483 All empty nodes which cannot return new memory, are removed from the list.
484
485 Params:
486 n = bytes to allocate
487 Returns:
488 A chunk of memory of the required length or `null` on failure or
489 */
490 static if (hasMember!(ParentAllocator, "alignedAllocate"))
491 void[] allocate(size_t n);
492
493 /**
494 Deallocates the buffer `b` given as parameter. Deallocations take place in constant
495 time, regardless of the number of nodes in the list. `b.ptr` is rounded down
496 to the nearest multiple of the `alignment` to quickly find the corresponding
497 `AlignedBlockNode`.
498
499 Params:
500 b = buffer candidate for deallocation
501 Returns:
502 `true` on success and `false` on failure
503 */
504 bool deallocate(void[] b);
505
506 /**
507 Returns `Ternary.yes` if the buffer belongs to the parent allocator and
508 `Ternary.no` otherwise.
509
510 Params:
511 b = buffer tested if owned by this allocator
512 Returns:
513 `Ternary.yes` if owned by this allocator and `Ternary.no` otherwise
514 */
515 static if (hasMember!(ParentAllocator, "owns"))
516 Ternary owns(void[] b);
517 }
518 else
519 {
520 import std.math.traits : isPowerOf2;
521 static assert(isPowerOf2(alignment));
522 mixin AlignedBlockListImpl!true;
523 }
524 }
525
526 ///
527 @system unittest
528 {
529 import std.experimental.allocator.building_blocks.region : SharedRegion;
530 import std.experimental.allocator.building_blocks.ascending_page_allocator : SharedAscendingPageAllocator;
531 import std.experimental.allocator.building_blocks.null_allocator : NullAllocator;
532 import core.thread : ThreadGroup;
533
534 enum numThreads = 8;
535 enum size = 2048;
536 enum maxIter = 10;
537
538 /*
539 In this example we use 'SharedAlignedBlockList' together with 'SharedRegion',
540 in order to create a fast, thread-safe allocator.
541 */
542 alias SuperAllocator = SharedAlignedBlockList!(
543 SharedRegion!(NullAllocator, 1),
544 SharedAscendingPageAllocator,
545 4096);
546
547 SuperAllocator a;
548 // The 'SuperAllocator' will draw memory from a 'SharedAscendingPageAllocator'
549 a.parent = SharedAscendingPageAllocator(4096 * 1024);
550
551 // Launch 'numThreads', each performing allocations
fun()552 void fun()
553 {
554 foreach (i; 0 .. maxIter)
555 {
556 void[] b = a.allocate(size);
557 assert(b.length == size);
558 }
559 }
560
561 auto tg = new ThreadGroup;
562 foreach (i; 0 .. numThreads)
563 {
564 tg.create(&fun);
565 }
566 tg.joinAll();
567 }
568
version(StdUnittest)569 version (StdUnittest)
570 {
571 static void testrw(void[] b)
572 {
573 ubyte* buf = cast(ubyte*) b.ptr;
574 size_t len = (b.length).roundUpToMultipleOf(4096);
575 for (int i = 0; i < len; i += 4096)
576 {
577 buf[i] = (cast(ubyte) i % 256);
578 assert(buf[i] == (cast(ubyte) i % 256));
579 }
580 }
581 }
582
583 @system unittest
584 {
585 import std.experimental.allocator.building_blocks.region;
586 import std.experimental.allocator.building_blocks.ascending_page_allocator;
587 import std.random;
588 import std.algorithm.sorting : sort;
589 import core.thread : ThreadGroup;
590 import core.internal.spinlock : SpinLock;
591
592 enum pageSize = 4096;
593 enum numThreads = 10;
594 enum maxIter = 20;
595 enum totalAllocs = maxIter * numThreads;
596 size_t count = 0;
597 SpinLock lock = SpinLock(SpinLock.Contention.brief);
598
599 alias SuperAllocator = SharedAlignedBlockList!(
600 SharedRegion!(NullAllocator, 1),
601 SharedAscendingPageAllocator,
602 1 << 16);
603 void[][totalAllocs] buf;
604
605 SuperAllocator a;
606 a.parent = SharedAscendingPageAllocator(4096 * 1024);
607
fun()608 void fun()
609 {
610 auto rnd = Random(1000);
611
612 foreach (i; 0 .. maxIter)
613 {
614 auto size = uniform(1, pageSize + 1, rnd);
615 void[] b = a.allocate(size);
616 assert(b.length == size);
617 testrw(b);
618
619 lock.lock();
620 buf[count++] = b;
621 lock.unlock();
622 }
623 }
624 auto tg = new ThreadGroup;
625 foreach (i; 0 .. numThreads)
626 {
627 tg.create(&fun);
628 }
629 tg.joinAll();
630
631 sort!((a, b) => a.ptr < b.ptr)(buf[0 .. totalAllocs]);
632 foreach (i; 0 .. totalAllocs - 1)
633 {
634 assert(buf[i].ptr + a.goodAllocSize(buf[i].length) <= buf[i + 1].ptr);
635 }
636
637 foreach (i; 0 .. totalAllocs)
638 {
639 assert(a.deallocate(buf[totalAllocs - 1 - i]));
640 }
641 }
642
643 @system unittest
644 {
645 import std.experimental.allocator.building_blocks.ascending_page_allocator : AscendingPageAllocator;
646 import std.experimental.allocator.building_blocks.segregator : Segregator;
647 import std.experimental.allocator.building_blocks.bitmapped_block : BitmappedBlock;
648 import std.random;
649
650 alias SuperAllocator = Segregator!(
651 256,
652 AlignedBlockList!(BitmappedBlock!256, AscendingPageAllocator*, 1 << 16),
653 Segregator!(
654
655 512,
656 AlignedBlockList!(BitmappedBlock!512, AscendingPageAllocator*, 1 << 16),
657 Segregator!(
658
659 1024,
660 AlignedBlockList!(BitmappedBlock!1024, AscendingPageAllocator*, 1 << 16),
661 Segregator!(
662
663 2048,
664 AlignedBlockList!(BitmappedBlock!2048, AscendingPageAllocator*, 1 << 16),
665 AscendingPageAllocator*
666 ))));
667
668 SuperAllocator a;
669 auto pageAlloc = AscendingPageAllocator(4096 * 4096);
670 a.allocatorForSize!4096 = &pageAlloc;
671 a.allocatorForSize!2048.parent = &pageAlloc;
672 a.allocatorForSize!1024.parent = &pageAlloc;
673 a.allocatorForSize!512.parent = &pageAlloc;
674 a.allocatorForSize!256.parent = &pageAlloc;
675
676 auto rnd = Random(1000);
677
678 size_t maxIter = 10;
679 enum testNum = 10;
680 void[][testNum] buf;
681 int maxSize = 8192;
682 foreach (i; 0 .. maxIter)
683 {
684 foreach (j; 0 .. testNum)
685 {
686 auto size = uniform(1, maxSize + 1, rnd);
687 buf[j] = a.allocate(size);
688 assert(buf[j].length == size);
689 testrw(buf[j]);
690 }
691
692 randomShuffle(buf[]);
693
694 foreach (j; 0 .. testNum)
695 {
696 assert(a.deallocate(buf[j]));
697 }
698 }
699 }
700