1 /**
2 * The core.internal.atomic module comtains the low-level atomic features available in hardware.
3 * This module may be a routing layer for compiler intrinsics.
4 *
5 * Copyright: Copyright Manu Evans 2019.
6 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7 * Authors: Sean Kelly, Alex Rønne Petersen, Manu Evans
8 * Source: $(DRUNTIMESRC core/internal/_atomic.d)
9 */
10
11 module core.internal.atomic;
12
13 import core.atomic : MemoryOrder, has128BitCAS;
14
version(DigitalMars)15 version (DigitalMars)
16 {
17 private
18 {
19 enum : int
20 {
21 AX, BX, CX, DX, DI, SI, R8, R9
22 }
23
24 immutable string[4][8] registerNames = [
25 [ "AL", "AX", "EAX", "RAX" ],
26 [ "BL", "BX", "EBX", "RBX" ],
27 [ "CL", "CX", "ECX", "RCX" ],
28 [ "DL", "DX", "EDX", "RDX" ],
29 [ "DIL", "DI", "EDI", "RDI" ],
30 [ "SIL", "SI", "ESI", "RSI" ],
31 [ "R8B", "R8W", "R8D", "R8" ],
32 [ "R9B", "R9W", "R9D", "R9" ],
33 ];
34
35 template RegIndex(T)
36 {
37 static if (T.sizeof == 1)
38 enum RegIndex = 0;
39 else static if (T.sizeof == 2)
40 enum RegIndex = 1;
41 else static if (T.sizeof == 4)
42 enum RegIndex = 2;
43 else static if (T.sizeof == 8)
44 enum RegIndex = 3;
45 else
46 static assert(false, "Invalid type");
47 }
48
49 enum SizedReg(int reg, T = size_t) = registerNames[reg][RegIndex!T];
50 }
51
52 inout(T) atomicLoad(MemoryOrder order = MemoryOrder.seq, T)(inout(T)* src) pure nothrow @nogc @trusted
53 if (CanCAS!T)
54 {
55 static assert(order != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()");
56
57 static if (T.sizeof == size_t.sizeof * 2)
58 {
59 version (D_InlineAsm_X86)
60 {
61 asm pure nothrow @nogc @trusted
62 {
63 push EDI;
64 push EBX;
65 mov EBX, 0;
66 mov ECX, 0;
67 mov EAX, 0;
68 mov EDX, 0;
69 mov EDI, src;
70 lock; cmpxchg8b [EDI];
71 pop EBX;
72 pop EDI;
73 }
74 }
75 else version (D_InlineAsm_X86_64)
76 {
77 version (Windows)
78 {
79 static if (RegisterReturn!T)
80 {
81 enum SrcPtr = SizedReg!CX;
82 enum RetPtr = null;
83 }
84 else
85 {
86 enum SrcPtr = SizedReg!DX;
87 enum RetPtr = SizedReg!CX;
88 }
89
90 mixin (simpleFormat(q{
91 asm pure nothrow @nogc @trusted
92 {
93 naked;
94 push RBX;
95 mov R8, %0;
96 ?1 mov R9, %1;
97 mov RBX, 0;
98 mov RCX, 0;
99 mov RAX, 0;
100 mov RDX, 0;
101 lock; cmpxchg16b [R8];
102 ?1 mov [R9], RAX;
103 ?1 mov 8[R9], RDX;
104 pop RBX;
105 ret;
106 }
107 }, [SrcPtr, RetPtr]));
108 }
109 else
110 {
111 asm pure nothrow @nogc @trusted
112 {
113 naked;
114 push RBX;
115 mov RBX, 0;
116 mov RCX, 0;
117 mov RAX, 0;
118 mov RDX, 0;
119 lock; cmpxchg16b [RDI];
120 pop RBX;
121 ret;
122 }
123 }
124 }
125 }
126 else static if (needsLoadBarrier!order)
127 {
128 version (D_InlineAsm_X86)
129 {
130 enum SrcReg = SizedReg!CX;
131 enum ZeroReg = SizedReg!(DX, T);
132 enum ResReg = SizedReg!(AX, T);
133
134 mixin (simpleFormat(q{
135 asm pure nothrow @nogc @trusted
136 {
137 mov %1, 0;
138 mov %2, 0;
139 mov %0, src;
140 lock; cmpxchg [%0], %1;
141 }
142 }, [SrcReg, ZeroReg, ResReg]));
143 }
144 else version (D_InlineAsm_X86_64)
145 {
146 version (Windows)
147 enum SrcReg = SizedReg!CX;
148 else
149 enum SrcReg = SizedReg!DI;
150 enum ZeroReg = SizedReg!(DX, T);
151 enum ResReg = SizedReg!(AX, T);
152
153 mixin (simpleFormat(q{
154 asm pure nothrow @nogc @trusted
155 {
156 naked;
157 mov %1, 0;
158 mov %2, 0;
159 lock; cmpxchg [%0], %1;
160 ret;
161 }
162 }, [SrcReg, ZeroReg, ResReg]));
163 }
164 }
165 else
166 return *src;
167 }
168
169 void atomicStore(MemoryOrder order = MemoryOrder.seq, T)(T* dest, T value) pure nothrow @nogc @trusted
170 if (CanCAS!T)
171 {
172 static assert(order != MemoryOrder.acq, "Invalid MemoryOrder for atomicStore()");
173
174 static if (T.sizeof == size_t.sizeof * 2)
175 {
176 version (D_InlineAsm_X86)
177 {
178 asm pure nothrow @nogc @trusted
179 {
180 push EDI;
181 push EBX;
182 lea EDI, value;
183 mov EBX, [EDI];
184 mov ECX, 4[EDI];
185 mov EDI, dest;
186 mov EAX, [EDI];
187 mov EDX, 4[EDI];
188 L1: lock; cmpxchg8b [EDI];
189 jne L1;
190 pop EBX;
191 pop EDI;
192 }
193 }
194 else version (D_InlineAsm_X86_64)
195 {
196 version (Windows)
197 {
198 asm pure nothrow @nogc @trusted
199 {
200 naked;
201 push RBX;
202 mov R8, RDX;
203 mov RAX, [RDX];
204 mov RDX, 8[RDX];
205 mov RBX, [RCX];
206 mov RCX, 8[RCX];
207 L1: lock; cmpxchg16b [R8];
208 jne L1;
209 pop RBX;
210 ret;
211 }
212 }
213 else
214 {
215 asm pure nothrow @nogc @trusted
216 {
217 naked;
218 push RBX;
219 mov RBX, RDI;
220 mov RCX, RSI;
221 mov RDI, RDX;
222 mov RAX, [RDX];
223 mov RDX, 8[RDX];
224 L1: lock; cmpxchg16b [RDI];
225 jne L1;
226 pop RBX;
227 ret;
228 }
229 }
230 }
231 }
232 else static if (needsStoreBarrier!order)
233 atomicExchange!(order, false)(dest, value);
234 else
235 *dest = value;
236 }
237
238 T atomicFetchAdd(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted
239 if (is(T : ulong))
240 {
241 version (D_InlineAsm_X86)
242 {
243 static assert(T.sizeof <= 4, "64bit atomicFetchAdd not supported on 32bit target." );
244
245 enum DestReg = SizedReg!DX;
246 enum ValReg = SizedReg!(AX, T);
247
248 mixin (simpleFormat(q{
249 asm pure nothrow @nogc @trusted
250 {
251 mov %1, value;
252 mov %0, dest;
253 lock; xadd[%0], %1;
254 }
255 }, [DestReg, ValReg]));
256 }
257 else version (D_InlineAsm_X86_64)
258 {
259 version (Windows)
260 {
261 enum DestReg = SizedReg!DX;
262 enum ValReg = SizedReg!(CX, T);
263 }
264 else
265 {
266 enum DestReg = SizedReg!SI;
267 enum ValReg = SizedReg!(DI, T);
268 }
269 enum ResReg = result ? SizedReg!(AX, T) : null;
270
271 mixin (simpleFormat(q{
272 asm pure nothrow @nogc @trusted
273 {
274 naked;
275 lock; xadd[%0], %1;
276 ?2 mov %2, %1;
277 ret;
278 }
279 }, [DestReg, ValReg, ResReg]));
280 }
281 else
282 static assert (false, "Unsupported architecture.");
283 }
284
285 T atomicFetchSub(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted
286 if (is(T : ulong))
287 {
288 return atomicFetchAdd(dest, cast(T)-cast(IntOrLong!T)value);
289 }
290
291 T atomicExchange(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted
292 if (CanCAS!T)
293 {
294 version (D_InlineAsm_X86)
295 {
296 static assert(T.sizeof <= 4, "64bit atomicExchange not supported on 32bit target." );
297
298 enum DestReg = SizedReg!CX;
299 enum ValReg = SizedReg!(AX, T);
300
301 mixin (simpleFormat(q{
302 asm pure nothrow @nogc @trusted
303 {
304 mov %1, value;
305 mov %0, dest;
306 xchg [%0], %1;
307 }
308 }, [DestReg, ValReg]));
309 }
310 else version (D_InlineAsm_X86_64)
311 {
312 version (Windows)
313 {
314 enum DestReg = SizedReg!DX;
315 enum ValReg = SizedReg!(CX, T);
316 }
317 else
318 {
319 enum DestReg = SizedReg!SI;
320 enum ValReg = SizedReg!(DI, T);
321 }
322 enum ResReg = result ? SizedReg!(AX, T) : null;
323
324 mixin (simpleFormat(q{
325 asm pure nothrow @nogc @trusted
326 {
327 naked;
328 xchg [%0], %1;
329 ?2 mov %2, %1;
330 ret;
331 }
332 }, [DestReg, ValReg, ResReg]));
333 }
334 else
335 static assert (false, "Unsupported architecture.");
336 }
337
338 alias atomicCompareExchangeWeak = atomicCompareExchangeStrong;
339
340 bool atomicCompareExchangeStrong(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted
341 if (CanCAS!T)
342 {
343 version (D_InlineAsm_X86)
344 {
345 static if (T.sizeof <= 4)
346 {
347 enum DestAddr = SizedReg!CX;
348 enum CmpAddr = SizedReg!DI;
349 enum Val = SizedReg!(DX, T);
350 enum Cmp = SizedReg!(AX, T);
351
352 mixin (simpleFormat(q{
353 asm pure nothrow @nogc @trusted
354 {
355 push %1;
356 mov %2, value;
357 mov %1, compare;
358 mov %3, [%1];
359 mov %0, dest;
360 lock; cmpxchg [%0], %2;
361 mov [%1], %3;
362 setz AL;
363 pop %1;
364 }
365 }, [DestAddr, CmpAddr, Val, Cmp]));
366 }
367 else static if (T.sizeof == 8)
368 {
369 asm pure nothrow @nogc @trusted
370 {
371 push EDI;
372 push EBX;
373 lea EDI, value;
374 mov EBX, [EDI];
375 mov ECX, 4[EDI];
376 mov EDI, compare;
377 mov EAX, [EDI];
378 mov EDX, 4[EDI];
379 mov EDI, dest;
380 lock; cmpxchg8b [EDI];
381 mov EDI, compare;
382 mov [EDI], EAX;
383 mov 4[EDI], EDX;
384 setz AL;
385 pop EBX;
386 pop EDI;
387 }
388 }
389 else
390 static assert(T.sizeof <= 8, "128bit atomicCompareExchangeStrong not supported on 32bit target." );
391 }
392 else version (D_InlineAsm_X86_64)
393 {
394 static if (T.sizeof <= 8)
395 {
396 version (Windows)
397 {
398 enum DestAddr = SizedReg!R8;
399 enum CmpAddr = SizedReg!DX;
400 enum Val = SizedReg!(CX, T);
401 }
402 else
403 {
404 enum DestAddr = SizedReg!DX;
405 enum CmpAddr = SizedReg!SI;
406 enum Val = SizedReg!(DI, T);
407 }
408 enum Res = SizedReg!(AX, T);
409
410 mixin (simpleFormat(q{
411 asm pure nothrow @nogc @trusted
412 {
413 naked;
414 mov %3, [%1];
415 lock; cmpxchg [%0], %2;
416 jne compare_fail;
417 mov AL, 1;
418 ret;
419 compare_fail:
420 mov [%1], %3;
421 xor AL, AL;
422 ret;
423 }
424 }, [DestAddr, CmpAddr, Val, Res]));
425 }
426 else
427 {
428 version (Windows)
429 {
430 asm pure nothrow @nogc @trusted
431 {
432 naked;
433 push RBX;
434 mov R9, RDX;
435 mov RAX, [RDX];
436 mov RDX, 8[RDX];
437 mov RBX, [RCX];
438 mov RCX, 8[RCX];
439 lock; cmpxchg16b [R8];
440 pop RBX;
441 jne compare_fail;
442 mov AL, 1;
443 ret;
444 compare_fail:
445 mov [R9], RAX;
446 mov 8[R9], RDX;
447 xor AL, AL;
448 ret;
449 }
450 }
451 else
452 {
453 asm pure nothrow @nogc @trusted
454 {
455 naked;
456 push RBX;
457 mov R8, RCX;
458 mov R9, RDX;
459 mov RAX, [RDX];
460 mov RDX, 8[RDX];
461 mov RBX, RDI;
462 mov RCX, RSI;
463 lock; cmpxchg16b [R8];
464 pop RBX;
465 jne compare_fail;
466 mov AL, 1;
467 ret;
468 compare_fail:
469 mov [R9], RAX;
470 mov 8[R9], RDX;
471 xor AL, AL;
472 ret;
473 }
474 }
475 }
476 }
477 else
478 static assert (false, "Unsupported architecture.");
479 }
480
481 alias atomicCompareExchangeWeakNoResult = atomicCompareExchangeStrongNoResult;
482
483 bool atomicCompareExchangeStrongNoResult(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, const T compare, T value) pure nothrow @nogc @trusted
484 if (CanCAS!T)
485 {
486 version (D_InlineAsm_X86)
487 {
488 static if (T.sizeof <= 4)
489 {
490 enum DestAddr = SizedReg!CX;
491 enum Cmp = SizedReg!(AX, T);
492 enum Val = SizedReg!(DX, T);
493
494 mixin (simpleFormat(q{
495 asm pure nothrow @nogc @trusted
496 {
497 mov %2, value;
498 mov %1, compare;
499 mov %0, dest;
500 lock; cmpxchg [%0], %2;
501 setz AL;
502 }
503 }, [DestAddr, Cmp, Val]));
504 }
505 else static if (T.sizeof == 8)
506 {
507 asm pure nothrow @nogc @trusted
508 {
509 push EDI;
510 push EBX;
511 lea EDI, value;
512 mov EBX, [EDI];
513 mov ECX, 4[EDI];
514 lea EDI, compare;
515 mov EAX, [EDI];
516 mov EDX, 4[EDI];
517 mov EDI, dest;
518 lock; cmpxchg8b [EDI];
519 setz AL;
520 pop EBX;
521 pop EDI;
522 }
523 }
524 else
525 static assert(T.sizeof <= 8, "128bit atomicCompareExchangeStrong not supported on 32bit target." );
526 }
527 else version (D_InlineAsm_X86_64)
528 {
529 static if (T.sizeof <= 8)
530 {
531 version (Windows)
532 {
533 enum DestAddr = SizedReg!R8;
534 enum Cmp = SizedReg!(DX, T);
535 enum Val = SizedReg!(CX, T);
536 }
537 else
538 {
539 enum DestAddr = SizedReg!DX;
540 enum Cmp = SizedReg!(SI, T);
541 enum Val = SizedReg!(DI, T);
542 }
543 enum AXReg = SizedReg!(AX, T);
544
545 mixin (simpleFormat(q{
546 asm pure nothrow @nogc @trusted
547 {
548 naked;
549 mov %3, %1;
550 lock; cmpxchg [%0], %2;
551 setz AL;
552 ret;
553 }
554 }, [DestAddr, Cmp, Val, AXReg]));
555 }
556 else
557 {
558 version (Windows)
559 {
560 asm pure nothrow @nogc @trusted
561 {
562 naked;
563 push RBX;
564 mov RAX, [RDX];
565 mov RDX, 8[RDX];
566 mov RBX, [RCX];
567 mov RCX, 8[RCX];
568 lock; cmpxchg16b [R8];
569 setz AL;
570 pop RBX;
571 ret;
572 }
573 }
574 else
575 {
576 asm pure nothrow @nogc @trusted
577 {
578 naked;
579 push RBX;
580 mov RAX, RDX;
581 mov RDX, RCX;
582 mov RBX, RDI;
583 mov RCX, RSI;
584 lock; cmpxchg16b [R8];
585 setz AL;
586 pop RBX;
587 ret;
588 }
589 }
590 }
591 }
592 else
593 static assert (false, "Unsupported architecture.");
594 }
595
596 void atomicFence(MemoryOrder order = MemoryOrder.seq)() pure nothrow @nogc @trusted
597 {
598 // TODO: `mfence` should only be required for seq_cst operations, but this depends on
599 // the compiler's backend knowledge to not reorder code inappropriately,
600 // so we'll apply it conservatively.
601 static if (order != MemoryOrder.raw)
602 {
603 version (D_InlineAsm_X86)
604 {
605 import core.cpuid;
606
607 // TODO: review this implementation; it seems way overly complicated
608 asm pure nothrow @nogc @trusted
609 {
610 naked;
611
612 call sse2;
613 test AL, AL;
614 jne Lcpuid;
615
616 // Fast path: We have SSE2, so just use mfence.
617 mfence;
618 jmp Lend;
619
620 Lcpuid:
621
622 // Slow path: We use cpuid to serialize. This is
623 // significantly slower than mfence, but is the
624 // only serialization facility we have available
625 // on older non-SSE2 chips.
626 push EBX;
627
628 mov EAX, 0;
629 cpuid;
630
631 pop EBX;
632
633 Lend:
634
635 ret;
636 }
637 }
638 else version (D_InlineAsm_X86_64)
639 {
640 asm pure nothrow @nogc @trusted
641 {
642 naked;
643 mfence;
644 ret;
645 }
646 }
647 else
648 static assert (false, "Unsupported architecture.");
649 }
650 }
651
652 void pause() pure nothrow @nogc @trusted
653 {
654 version (D_InlineAsm_X86)
655 {
656 asm pure nothrow @nogc @trusted
657 {
658 naked;
659 rep; nop;
660 ret;
661 }
662 }
663 else version (D_InlineAsm_X86_64)
664 {
665 asm pure nothrow @nogc @trusted
666 {
667 naked;
668 // pause; // TODO: DMD should add this opcode to its inline asm
669 rep; nop;
670 ret;
671 }
672 }
673 else
674 {
675 // ARM should `yield`
676 // other architectures? otherwise some sort of nop...
677 }
678 }
679 }
version(GNU)680 else version (GNU)
681 {
682 import gcc.builtins;
683 import gcc.config;
684
685 inout(T) atomicLoad(MemoryOrder order = MemoryOrder.seq, T)(inout(T)* src) pure nothrow @nogc @trusted
686 if (CanCAS!T)
687 {
688 static assert(order != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()");
689
690 static if (GNU_Have_Atomics || GNU_Have_LibAtomic)
691 {
692 static if (T.sizeof == ubyte.sizeof)
693 {
694 ubyte value = __atomic_load_1(cast(shared)src, order);
695 return *cast(typeof(return)*)&value;
696 }
697 else static if (T.sizeof == ushort.sizeof)
698 {
699 ushort value = __atomic_load_2(cast(shared)src, order);
700 return *cast(typeof(return)*)&value;
701 }
702 else static if (T.sizeof == uint.sizeof)
703 {
704 uint value = __atomic_load_4(cast(shared)src, order);
705 return *cast(typeof(return)*)&value;
706 }
707 else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics)
708 {
709 ulong value = __atomic_load_8(cast(shared)src, order);
710 return *cast(typeof(return)*)&value;
711 }
712 else static if (GNU_Have_LibAtomic)
713 {
714 T value;
715 __atomic_load(T.sizeof, cast(shared)src, &value, order);
716 return *cast(typeof(return)*)&value;
717 }
718 else
719 static assert(0, "Invalid template type specified.");
720 }
721 else
722 {
723 getAtomicMutex.lock();
724 scope(exit) getAtomicMutex.unlock();
725 return *cast(typeof(return)*)&src;
726 }
727 }
728
729 void atomicStore(MemoryOrder order = MemoryOrder.seq, T)(T* dest, T value) pure nothrow @nogc @trusted
730 if (CanCAS!T)
731 {
732 static assert(order != MemoryOrder.acq, "Invalid MemoryOrder for atomicStore()");
733
734 static if (GNU_Have_Atomics || GNU_Have_LibAtomic)
735 {
736 static if (T.sizeof == ubyte.sizeof)
737 __atomic_store_1(cast(shared)dest, *cast(ubyte*)&value, order);
738 else static if (T.sizeof == ushort.sizeof)
739 __atomic_store_2(cast(shared)dest, *cast(ushort*)&value, order);
740 else static if (T.sizeof == uint.sizeof)
741 __atomic_store_4(cast(shared)dest, *cast(uint*)&value, order);
742 else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics)
743 __atomic_store_8(cast(shared)dest, *cast(ulong*)&value, order);
744 else static if (GNU_Have_LibAtomic)
745 __atomic_store(T.sizeof, cast(shared)dest, cast(void*)&value, order);
746 else
747 static assert(0, "Invalid template type specified.");
748 }
749 else
750 {
751 getAtomicMutex.lock();
752 *dest = value;
753 getAtomicMutex.unlock();
754 }
755 }
756
757 T atomicFetchAdd(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted
758 if (is(T : ulong))
759 {
760 static if (GNU_Have_Atomics || GNU_Have_LibAtomic)
761 {
762 static if (T.sizeof == ubyte.sizeof)
763 return __atomic_fetch_add_1(cast(shared)dest, value, order);
764 else static if (T.sizeof == ushort.sizeof)
765 return __atomic_fetch_add_2(cast(shared)dest, value, order);
766 else static if (T.sizeof == uint.sizeof)
767 return __atomic_fetch_add_4(cast(shared)dest, value, order);
768 else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics)
769 return __atomic_fetch_add_8(cast(shared)dest, value, order);
770 else static if (GNU_Have_LibAtomic)
771 return __atomic_fetch_add(T.sizeof, cast(shared)dest, cast(void*)&value, order);
772 else
773 static assert(0, "Invalid template type specified.");
774 }
775 else
776 {
777 getAtomicMutex.lock();
778 scope(exit) getAtomicMutex.unlock();
779 T tmp = *dest;
780 *dest += value;
781 return tmp;
782 }
783 }
784
785 T atomicFetchSub(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted
786 if (is(T : ulong))
787 {
788 static if (GNU_Have_Atomics || GNU_Have_LibAtomic)
789 {
790 static if (T.sizeof == ubyte.sizeof)
791 return __atomic_fetch_sub_1(cast(shared)dest, value, order);
792 else static if (T.sizeof == ushort.sizeof)
793 return __atomic_fetch_sub_2(cast(shared)dest, value, order);
794 else static if (T.sizeof == uint.sizeof)
795 return __atomic_fetch_sub_4(cast(shared)dest, value, order);
796 else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics)
797 return __atomic_fetch_sub_8(cast(shared)dest, value, order);
798 else static if (GNU_Have_LibAtomic)
799 return __atomic_fetch_sub(T.sizeof, cast(shared)dest, cast(void*)&value, order);
800 else
801 static assert(0, "Invalid template type specified.");
802 }
803 else
804 {
805 getAtomicMutex.lock();
806 scope(exit) getAtomicMutex.unlock();
807 T tmp = *dest;
808 *dest -= value;
809 return tmp;
810 }
811 }
812
813 T atomicExchange(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted
814 if (is(T : ulong) || is(T == class) || is(T == interface) || is(T U : U*))
815 {
816 static if (GNU_Have_Atomics || GNU_Have_LibAtomic)
817 {
818 static if (T.sizeof == byte.sizeof)
819 {
820 ubyte res = __atomic_exchange_1(cast(shared)dest, *cast(ubyte*)&value, order);
821 return *cast(typeof(return)*)&res;
822 }
823 else static if (T.sizeof == short.sizeof)
824 {
825 ushort res = __atomic_exchange_2(cast(shared)dest, *cast(ushort*)&value, order);
826 return *cast(typeof(return)*)&res;
827 }
828 else static if (T.sizeof == int.sizeof)
829 {
830 uint res = __atomic_exchange_4(cast(shared)dest, *cast(uint*)&value, order);
831 return *cast(typeof(return)*)&res;
832 }
833 else static if (T.sizeof == long.sizeof && GNU_Have_64Bit_Atomics)
834 {
835 ulong res = __atomic_exchange_8(cast(shared)dest, *cast(ulong*)&value, order);
836 return *cast(typeof(return)*)&res;
837 }
838 else static if (GNU_Have_LibAtomic)
839 {
840 T res = void;
841 __atomic_exchange(T.sizeof, cast(shared)dest, cast(void*)&value, &res, order);
842 return res;
843 }
844 else
845 static assert(0, "Invalid template type specified.");
846 }
847 else
848 {
849 getAtomicMutex.lock();
850 scope(exit) getAtomicMutex.unlock();
851
852 T res = *dest;
853 *dest = value;
854 return res;
855 }
856 }
857
858 bool atomicCompareExchangeWeak(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted
859 if (CanCAS!T)
860 {
861 return atomicCompareExchangeImpl!(succ, fail, true)(dest, compare, value);
862 }
863
864 bool atomicCompareExchangeStrong(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted
865 if (CanCAS!T)
866 {
867 return atomicCompareExchangeImpl!(succ, fail, false)(dest, compare, value);
868 }
869
870 bool atomicCompareExchangeStrongNoResult(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, const T compare, T value) pure nothrow @nogc @trusted
871 if (CanCAS!T)
872 {
873 return atomicCompareExchangeImpl!(succ, fail, false)(dest, cast(T*)&compare, value);
874 }
875
876 bool atomicCompareExchangeWeakNoResult(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, const T compare, T value) pure nothrow @nogc @trusted
877 if (CanCAS!T)
878 {
879 return atomicCompareExchangeImpl!(succ, fail, true)(dest, cast(T*)&compare, value);
880 }
881
882 private bool atomicCompareExchangeImpl(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, bool weak, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted
883 if (CanCAS!T)
884 {
885 bool res = void;
886
887 static if (GNU_Have_Atomics || GNU_Have_LibAtomic)
888 {
889 static if (T.sizeof == byte.sizeof)
890 res = __atomic_compare_exchange_1(cast(shared)dest, compare, *cast(ubyte*)&value,
891 weak, succ, fail);
892 else static if (T.sizeof == short.sizeof)
893 res = __atomic_compare_exchange_2(cast(shared)dest, compare, *cast(ushort*)&value,
894 weak, succ, fail);
895 else static if (T.sizeof == int.sizeof)
896 res = __atomic_compare_exchange_4(cast(shared)dest, compare, *cast(uint*)&value,
897 weak, succ, fail);
898 else static if (T.sizeof == long.sizeof && GNU_Have_64Bit_Atomics)
899 res = __atomic_compare_exchange_8(cast(shared)dest, compare, *cast(ulong*)&value,
900 weak, succ, fail);
901 else static if (GNU_Have_LibAtomic)
902 res = __atomic_compare_exchange(T.sizeof, cast(shared)dest, compare, cast(void*)&value,
903 succ, fail);
904 else
905 static assert(0, "Invalid template type specified.");
906 }
907 else
908 {
909 static if (T.sizeof == byte.sizeof)
910 alias U = byte;
911 else static if (T.sizeof == short.sizeof)
912 alias U = short;
913 else static if (T.sizeof == int.sizeof)
914 alias U = int;
915 else static if (T.sizeof == long.sizeof)
916 alias U = long;
917 else
918 static assert(0, "Invalid template type specified.");
919
920 getAtomicMutex.lock();
921 scope(exit) getAtomicMutex.unlock();
922
923 if (*cast(U*)dest == *cast(U*)&compare)
924 {
925 *dest = value;
926 res = true;
927 }
928 else
929 {
930 *compare = *dest;
931 res = false;
932 }
933 }
934
935 return res;
936 }
937
938 void atomicFence(MemoryOrder order = MemoryOrder.seq)() pure nothrow @nogc @trusted
939 {
940 static if (GNU_Have_Atomics || GNU_Have_LibAtomic)
941 __atomic_thread_fence(order);
942 else
943 {
944 getAtomicMutex.lock();
945 getAtomicMutex.unlock();
946 }
947 }
948
949 void pause() pure nothrow @nogc @trusted
950 {
951 version (X86)
952 {
953 __builtin_ia32_pause();
954 }
955 else version (X86_64)
956 {
957 __builtin_ia32_pause();
958 }
959 else
960 {
961 // Other architectures? Some sort of nop or barrier.
962 }
963 }
964
965 static if (!GNU_Have_Atomics && !GNU_Have_LibAtomic)
966 {
967 // Use system mutex for atomics, faking the purity of the functions so
968 // that they can be used in pure/nothrow/@safe code.
969 extern (C) private pure @trusted @nogc nothrow
970 {
971 static if (GNU_Thread_Model == ThreadModel.Posix)
972 {
973 import core.sys.posix.pthread;
974 alias atomicMutexHandle = pthread_mutex_t;
975
976 pragma(mangle, "pthread_mutex_init") int fakePureMutexInit(pthread_mutex_t*, pthread_mutexattr_t*);
977 pragma(mangle, "pthread_mutex_lock") int fakePureMutexLock(pthread_mutex_t*);
978 pragma(mangle, "pthread_mutex_unlock") int fakePureMutexUnlock(pthread_mutex_t*);
979 }
980 else static if (GNU_Thread_Model == ThreadModel.Win32)
981 {
982 import core.sys.windows.winbase;
983 alias atomicMutexHandle = CRITICAL_SECTION;
984
985 pragma(mangle, "InitializeCriticalSection") int fakePureMutexInit(CRITICAL_SECTION*);
986 pragma(mangle, "EnterCriticalSection") void fakePureMutexLock(CRITICAL_SECTION*);
987 pragma(mangle, "LeaveCriticalSection") int fakePureMutexUnlock(CRITICAL_SECTION*);
988 }
989 else
990 {
991 alias atomicMutexHandle = int;
992 }
993 }
994
995 // Implements lock/unlock operations.
996 private struct AtomicMutex
997 {
998 int lock() pure @trusted @nogc nothrow
999 {
1000 static if (GNU_Thread_Model == ThreadModel.Posix)
1001 {
1002 if (!_inited)
1003 {
1004 fakePureMutexInit(&_handle, null);
1005 _inited = true;
1006 }
1007 return fakePureMutexLock(&_handle);
1008 }
1009 else
1010 {
1011 static if (GNU_Thread_Model == ThreadModel.Win32)
1012 {
1013 if (!_inited)
1014 {
1015 fakePureMutexInit(&_handle);
1016 _inited = true;
1017 }
1018 fakePureMutexLock(&_handle);
1019 }
1020 return 0;
1021 }
1022 }
1023
1024 int unlock() pure @trusted @nogc nothrow
1025 {
1026 static if (GNU_Thread_Model == ThreadModel.Posix)
1027 return fakePureMutexUnlock(&_handle);
1028 else
1029 {
1030 static if (GNU_Thread_Model == ThreadModel.Win32)
1031 fakePureMutexUnlock(&_handle);
1032 return 0;
1033 }
1034 }
1035
1036 private:
1037 atomicMutexHandle _handle;
1038 bool _inited;
1039 }
1040
1041 // Internal static mutex reference.
1042 private AtomicMutex* _getAtomicMutex() @trusted @nogc nothrow
1043 {
1044 __gshared static AtomicMutex mutex;
1045 return &mutex;
1046 }
1047
1048 // Pure alias for _getAtomicMutex.
1049 pragma(mangle, _getAtomicMutex.mangleof)
1050 private AtomicMutex* getAtomicMutex() pure @trusted @nogc nothrow @property;
1051 }
1052 }
1053
1054 private:
1055
version(Windows)1056 version (Windows)
1057 {
1058 enum RegisterReturn(T) = is(T : U[], U) || is(T : R delegate(A), R, A...);
1059 }
1060
1061 enum CanCAS(T) = is(T : ulong) ||
1062 is(T == class) ||
1063 is(T == interface) ||
1064 is(T : U*, U) ||
1065 is(T : U[], U) ||
1066 is(T : R delegate(A), R, A...) ||
1067 (is(T == struct) && __traits(isPOD, T) &&
1068 (T.sizeof <= size_t.sizeof*2 || // no more than 2 words
1069 (T.sizeof == 16 && has128BitCAS)) && // or supports 128-bit CAS
1070 (T.sizeof & (T.sizeof - 1)) == 0 // is power of 2
1071 );
1072
IntOrLong(T)1073 template IntOrLong(T)
1074 {
1075 static if (T.sizeof > 4)
1076 alias IntOrLong = long;
1077 else
1078 alias IntOrLong = int;
1079 }
1080
1081 // NOTE: x86 loads implicitly have acquire semantics so a memory
1082 // barrier is only necessary on releases.
needsLoadBarrier(MemoryOrder ms)1083 template needsLoadBarrier( MemoryOrder ms )
1084 {
1085 enum bool needsLoadBarrier = ms == MemoryOrder.seq;
1086 }
1087
1088
1089 // NOTE: x86 stores implicitly have release semantics so a memory
1090 // barrier is only necessary on acquires.
needsStoreBarrier(MemoryOrder ms)1091 template needsStoreBarrier( MemoryOrder ms )
1092 {
1093 enum bool needsStoreBarrier = ms == MemoryOrder.seq;
1094 }
1095
1096 // this is a helper to build asm blocks
simpleFormat(string format,scope string[]args)1097 string simpleFormat(string format, scope string[] args)
1098 {
1099 string result;
1100 outer: while (format.length)
1101 {
1102 foreach (i; 0 .. format.length)
1103 {
1104 if (format[i] == '%' || format[i] == '?')
1105 {
1106 bool isQ = format[i] == '?';
1107 result ~= format[0 .. i++];
1108 assert (i < format.length, "Invalid format string");
1109 if (format[i] == '%' || format[i] == '?')
1110 {
1111 assert(!isQ, "Invalid format string");
1112 result ~= format[i++];
1113 }
1114 else
1115 {
1116 int index = 0;
1117 assert (format[i] >= '0' && format[i] <= '9', "Invalid format string");
1118 while (i < format.length && format[i] >= '0' && format[i] <= '9')
1119 index = index * 10 + (ubyte(format[i++]) - ubyte('0'));
1120 if (!isQ)
1121 result ~= args[index];
1122 else if (!args[index])
1123 {
1124 size_t j = i;
1125 for (; j < format.length;)
1126 {
1127 if (format[j++] == '\n')
1128 break;
1129 }
1130 i = j;
1131 }
1132 }
1133 format = format[i .. $];
1134 continue outer;
1135 }
1136 }
1137 result ~= format;
1138 break;
1139 }
1140 return result;
1141 }
1142