xref: /netbsd-src/common/lib/libc/arch/x86_64/atomic/atomic.S (revision 00d1317f303193bc6d536c94fa89a3193d111066)
1/*	$NetBSD: atomic.S,v 1.31 2024/07/16 22:45:10 riastradh Exp $	*/
2
3/*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <sys/param.h>
33#include <machine/asm.h>
34
35#ifdef _KERNEL
36#define	ALIAS(f, t)	STRONG_ALIAS(f,t)
37#else
38#define	ALIAS(f, t)	WEAK_ALIAS(f,t)
39#endif
40
41#ifdef _HARDKERNEL
42#include <machine/frameasm.h>
43#define	LOCK		HOTPATCH(HP_NAME_NOLOCK, 1); lock
44#else
45#define	LOCK		lock
46#endif
47
48	.text
49
50/* 32-bit */
51
52ENTRY(_atomic_add_32)
53	LOCK
54	addl	%esi, (%rdi)
55	ret
56END(_atomic_add_32)
57
58ENTRY(_atomic_add_32_nv)
59	movl	%esi, %eax
60	LOCK
61	xaddl	%eax, (%rdi)
62	addl	%esi, %eax
63	ret
64END(_atomic_add_32_nv)
65
66ENTRY(_atomic_and_32)
67	LOCK
68	andl	%esi, (%rdi)
69	ret
70END(_atomic_and_32)
71
72ENTRY(_atomic_and_32_nv)
73	movl	(%rdi), %eax
741:
75	movl	%eax, %ecx
76	andl	%esi, %ecx
77	LOCK
78	cmpxchgl %ecx, (%rdi)
79	jnz	1b
80	movl	%ecx, %eax
81	ret
82END(_atomic_and_32_nv)
83
84ENTRY(_atomic_dec_32)
85	LOCK
86	decl	(%rdi)
87	ret
88END(_atomic_dec_32)
89
90ENTRY(_atomic_dec_32_nv)
91	movl	$-1, %eax
92	LOCK
93	xaddl	%eax, (%rdi)
94	decl	%eax
95	ret
96END(_atomic_dec_32_nv)
97
98ENTRY(_atomic_inc_32)
99	LOCK
100	incl	(%rdi)
101	ret
102END(_atomic_inc_32)
103
104ENTRY(_atomic_inc_32_nv)
105	movl	$1, %eax
106	LOCK
107	xaddl	%eax, (%rdi)
108	incl	%eax
109	ret
110END(_atomic_inc_32_nv)
111
112ENTRY(_atomic_or_32)
113	LOCK
114	orl	%esi, (%rdi)
115	ret
116END(_atomic_or_32)
117
118ENTRY(_atomic_or_32_nv)
119	movl	(%rdi), %eax
1201:
121	movl	%eax, %ecx
122	orl	%esi, %ecx
123	LOCK
124	cmpxchgl %ecx, (%rdi)
125	jnz	1b
126	movl	%ecx, %eax
127	ret
128END(_atomic_or_32_nv)
129
130ENTRY(_atomic_swap_32)
131	movl	%esi, %eax
132	xchgl	%eax, (%rdi)
133	ret
134END(_atomic_swap_32)
135
136ENTRY(_atomic_cas_32)
137	movl	%esi, %eax
138	LOCK
139	cmpxchgl %edx, (%rdi)
140	/* %eax now contains the old value */
141	ret
142END(_atomic_cas_32)
143
144ENTRY(_atomic_cas_32_ni)
145	movl	%esi, %eax
146	cmpxchgl %edx, (%rdi)
147	/* %eax now contains the old value */
148	ret
149END(_atomic_cas_32_ni)
150
151/* 64-bit */
152
153ENTRY(_atomic_add_64)
154	LOCK
155	addq	%rsi, (%rdi)
156	ret
157END(_atomic_add_64)
158
159ENTRY(_atomic_add_64_nv)
160	movq	%rsi, %rax
161	LOCK
162	xaddq	%rax, (%rdi)
163	addq	%rsi, %rax
164	ret
165END(_atomic_add_64_nv)
166
167ENTRY(_atomic_and_64)
168	LOCK
169	andq	%rsi, (%rdi)
170	ret
171END(_atomic_and_64)
172
173ENTRY(_atomic_and_64_nv)
174	movq	(%rdi), %rax
1751:
176	movq	%rax, %rcx
177	andq	%rsi, %rcx
178	LOCK
179	cmpxchgq %rcx, (%rdi)
180	jnz	1b
181	movq	%rcx, %rax
182	ret
183END(_atomic_and_64_nv)
184
185ENTRY(_atomic_dec_64)
186	LOCK
187	decq	(%rdi)
188	ret
189END(_atomic_dec_64)
190
191ENTRY(_atomic_dec_64_nv)
192	movq	$-1, %rax
193	LOCK
194	xaddq	%rax, (%rdi)
195	decq	%rax
196	ret
197END(_atomic_dec_64_nv)
198
199ENTRY(_atomic_inc_64)
200	LOCK
201	incq	(%rdi)
202	ret
203END(_atomic_inc_64)
204
205ENTRY(_atomic_inc_64_nv)
206	movq	$1, %rax
207	LOCK
208	xaddq	%rax, (%rdi)
209	incq	%rax
210	ret
211END(_atomic_inc_64_nv)
212
213ENTRY(_atomic_or_64)
214	LOCK
215	orq	%rsi, (%rdi)
216	ret
217END(_atomic_or_64)
218
219ENTRY(_atomic_or_64_nv)
220	movq	(%rdi), %rax
2211:
222	movq	%rax, %rcx
223	orq	%rsi, %rcx
224	LOCK
225	cmpxchgq %rcx, (%rdi)
226	jnz	1b
227	movq	%rcx, %rax
228	ret
229END(_atomic_or_64_nv)
230
231ENTRY(_atomic_swap_64)
232	movq	%rsi, %rax
233	xchgq	%rax, (%rdi)
234	ret
235END(_atomic_swap_64)
236
237ENTRY(_atomic_cas_64)
238	movq	%rsi, %rax
239	LOCK
240	cmpxchgq %rdx, (%rdi)
241	/* %eax now contains the old value */
242	ret
243END(_atomic_cas_64)
244
245ENTRY(_atomic_cas_64_ni)
246	movq	%rsi, %rax
247	cmpxchgq %rdx, (%rdi)
248	/* %eax now contains the old value */
249	ret
250END(_atomic_cas_64_ni)
251
252/* memory barriers */
253
254ENTRY(_membar_acquire)
255	/*
256	 * Every load from normal memory is a load-acquire on x86, so
257	 * there is never any need for explicit barriers to order
258	 * load-before-anything.
259	 */
260	ret
261END(_membar_acquire)
262
263ENTRY(_membar_release)
264	/*
265	 * Every store to normal memory is a store-release on x86, so
266	 * there is never any need for explicit barriers to order
267	 * anything-before-store.
268	 */
269	ret
270END(_membar_release)
271
272ENTRY(_membar_sync)
273	/*
274	 * MFENCE, or a serializing instruction like a locked ADDQ,
275	 * is necessary to order store-before-load.  Every other
276	 * ordering -- load-before-anything, anything-before-store --
277	 * is already guaranteed without explicit barriers.
278	 *
279	 * Empirically it turns out locked ADDQ is cheaper than MFENCE,
280	 * so we use that, with an offset below the return address on
281	 * the stack to avoid a false dependency with RET.  (It might
282	 * even be better to use a much lower offset, say -128, to
283	 * avoid false dependencies for subsequent callees of the
284	 * caller.)
285	 *
286	 * https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/
287	 * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
288	 * https://www.agner.org/optimize/instruction_tables.pdf
289	 *
290	 * Sync with xen_mb in sys/arch/amd64/amd64/cpufunc.S.
291	 */
292	LOCK
293	addq	$0, -8(%rsp)
294	ret
295END(_membar_sync)
296
297ALIAS(atomic_add_32,_atomic_add_32)
298ALIAS(atomic_add_64,_atomic_add_64)
299ALIAS(atomic_add_int,_atomic_add_32)
300ALIAS(atomic_add_long,_atomic_add_64)
301ALIAS(atomic_add_ptr,_atomic_add_64)
302
303ALIAS(atomic_add_32_nv,_atomic_add_32_nv)
304ALIAS(atomic_add_64_nv,_atomic_add_64_nv)
305ALIAS(atomic_add_int_nv,_atomic_add_32_nv)
306ALIAS(atomic_add_long_nv,_atomic_add_64_nv)
307ALIAS(atomic_add_ptr_nv,_atomic_add_64_nv)
308
309ALIAS(atomic_and_32,_atomic_and_32)
310ALIAS(atomic_and_64,_atomic_and_64)
311ALIAS(atomic_and_uint,_atomic_and_32)
312ALIAS(atomic_and_ulong,_atomic_and_64)
313ALIAS(atomic_and_ptr,_atomic_and_64)
314
315ALIAS(atomic_and_32_nv,_atomic_and_32_nv)
316ALIAS(atomic_and_64_nv,_atomic_and_64_nv)
317ALIAS(atomic_and_uint_nv,_atomic_and_32_nv)
318ALIAS(atomic_and_ulong_nv,_atomic_and_64_nv)
319ALIAS(atomic_and_ptr_nv,_atomic_and_64_nv)
320
321ALIAS(atomic_dec_32,_atomic_dec_32)
322ALIAS(atomic_dec_64,_atomic_dec_64)
323ALIAS(atomic_dec_uint,_atomic_dec_32)
324ALIAS(atomic_dec_ulong,_atomic_dec_64)
325ALIAS(atomic_dec_ptr,_atomic_dec_64)
326
327ALIAS(atomic_dec_32_nv,_atomic_dec_32_nv)
328ALIAS(atomic_dec_64_nv,_atomic_dec_64_nv)
329ALIAS(atomic_dec_uint_nv,_atomic_dec_32_nv)
330ALIAS(atomic_dec_ulong_nv,_atomic_dec_64_nv)
331ALIAS(atomic_dec_ptr_nv,_atomic_dec_64_nv)
332
333ALIAS(atomic_inc_32,_atomic_inc_32)
334ALIAS(atomic_inc_64,_atomic_inc_64)
335ALIAS(atomic_inc_uint,_atomic_inc_32)
336ALIAS(atomic_inc_ulong,_atomic_inc_64)
337ALIAS(atomic_inc_ptr,_atomic_inc_64)
338
339ALIAS(atomic_inc_32_nv,_atomic_inc_32_nv)
340ALIAS(atomic_inc_64_nv,_atomic_inc_64_nv)
341ALIAS(atomic_inc_uint_nv,_atomic_inc_32_nv)
342ALIAS(atomic_inc_ulong_nv,_atomic_inc_64_nv)
343ALIAS(atomic_inc_ptr_nv,_atomic_inc_64_nv)
344
345ALIAS(atomic_or_32,_atomic_or_32)
346ALIAS(atomic_or_64,_atomic_or_64)
347ALIAS(atomic_or_uint,_atomic_or_32)
348ALIAS(atomic_or_ulong,_atomic_or_64)
349ALIAS(atomic_or_ptr,_atomic_or_64)
350
351ALIAS(atomic_or_32_nv,_atomic_or_32_nv)
352ALIAS(atomic_or_64_nv,_atomic_or_64_nv)
353ALIAS(atomic_or_uint_nv,_atomic_or_32_nv)
354ALIAS(atomic_or_ulong_nv,_atomic_or_64_nv)
355ALIAS(atomic_or_ptr_nv,_atomic_or_64_nv)
356
357ALIAS(atomic_swap_32,_atomic_swap_32)
358ALIAS(atomic_swap_64,_atomic_swap_64)
359ALIAS(atomic_swap_uint,_atomic_swap_32)
360ALIAS(atomic_swap_ulong,_atomic_swap_64)
361ALIAS(atomic_swap_ptr,_atomic_swap_64)
362
363ALIAS(atomic_cas_32,_atomic_cas_32)
364ALIAS(atomic_cas_64,_atomic_cas_64)
365ALIAS(atomic_cas_uint,_atomic_cas_32)
366ALIAS(atomic_cas_ulong,_atomic_cas_64)
367ALIAS(atomic_cas_ptr,_atomic_cas_64)
368
369ALIAS(atomic_cas_32_ni,_atomic_cas_32_ni)
370ALIAS(atomic_cas_64_ni,_atomic_cas_64_ni)
371ALIAS(atomic_cas_uint_ni,_atomic_cas_32_ni)
372ALIAS(atomic_cas_ulong_ni,_atomic_cas_64_ni)
373ALIAS(atomic_cas_ptr_ni,_atomic_cas_64_ni)
374
375ALIAS(membar_acquire,_membar_acquire)
376ALIAS(membar_release,_membar_release)
377ALIAS(membar_sync,_membar_sync)
378
379ALIAS(membar_consumer,_membar_acquire)
380ALIAS(membar_producer,_membar_release)
381ALIAS(membar_enter,_membar_sync)
382ALIAS(membar_exit,_membar_release)
383ALIAS(membar_sync,_membar_sync)
384
385STRONG_ALIAS(_atomic_add_int,_atomic_add_32)
386STRONG_ALIAS(_atomic_add_long,_atomic_add_64)
387STRONG_ALIAS(_atomic_add_ptr,_atomic_add_64)
388
389STRONG_ALIAS(_atomic_add_int_nv,_atomic_add_32_nv)
390STRONG_ALIAS(_atomic_add_long_nv,_atomic_add_64_nv)
391STRONG_ALIAS(_atomic_add_ptr_nv,_atomic_add_64_nv)
392
393STRONG_ALIAS(_atomic_and_uint,_atomic_and_32)
394STRONG_ALIAS(_atomic_and_ulong,_atomic_and_64)
395STRONG_ALIAS(_atomic_and_ptr,_atomic_and_64)
396
397STRONG_ALIAS(_atomic_and_uint_nv,_atomic_and_32_nv)
398STRONG_ALIAS(_atomic_and_ulong_nv,_atomic_and_64_nv)
399STRONG_ALIAS(_atomic_and_ptr_nv,_atomic_and_64_nv)
400
401STRONG_ALIAS(_atomic_dec_uint,_atomic_dec_32)
402STRONG_ALIAS(_atomic_dec_ulong,_atomic_dec_64)
403STRONG_ALIAS(_atomic_dec_ptr,_atomic_dec_64)
404
405STRONG_ALIAS(_atomic_dec_uint_nv,_atomic_dec_32_nv)
406STRONG_ALIAS(_atomic_dec_ulong_nv,_atomic_dec_64_nv)
407STRONG_ALIAS(_atomic_dec_ptr_nv,_atomic_dec_64_nv)
408
409STRONG_ALIAS(_atomic_inc_uint,_atomic_inc_32)
410STRONG_ALIAS(_atomic_inc_ulong,_atomic_inc_64)
411STRONG_ALIAS(_atomic_inc_ptr,_atomic_inc_64)
412
413STRONG_ALIAS(_atomic_inc_uint_nv,_atomic_inc_32_nv)
414STRONG_ALIAS(_atomic_inc_ulong_nv,_atomic_inc_64_nv)
415STRONG_ALIAS(_atomic_inc_ptr_nv,_atomic_inc_64_nv)
416
417STRONG_ALIAS(_atomic_or_uint,_atomic_or_32)
418STRONG_ALIAS(_atomic_or_ulong,_atomic_or_64)
419STRONG_ALIAS(_atomic_or_ptr,_atomic_or_64)
420
421STRONG_ALIAS(_atomic_or_uint_nv,_atomic_or_32_nv)
422STRONG_ALIAS(_atomic_or_ulong_nv,_atomic_or_64_nv)
423STRONG_ALIAS(_atomic_or_ptr_nv,_atomic_or_64_nv)
424
425STRONG_ALIAS(_atomic_swap_uint,_atomic_swap_32)
426STRONG_ALIAS(_atomic_swap_ulong,_atomic_swap_64)
427STRONG_ALIAS(_atomic_swap_ptr,_atomic_swap_64)
428
429STRONG_ALIAS(_atomic_cas_uint,_atomic_cas_32)
430STRONG_ALIAS(_atomic_cas_ulong,_atomic_cas_64)
431STRONG_ALIAS(_atomic_cas_ptr,_atomic_cas_64)
432
433STRONG_ALIAS(_atomic_cas_uint_ni,_atomic_cas_32_ni)
434STRONG_ALIAS(_atomic_cas_ulong_ni,_atomic_cas_64_ni)
435STRONG_ALIAS(_atomic_cas_ptr_ni,_atomic_cas_64_ni)
436
437STRONG_ALIAS(_membar_consumer,_membar_acquire)
438STRONG_ALIAS(_membar_producer,_membar_release)
439STRONG_ALIAS(_membar_enter,_membar_sync)
440STRONG_ALIAS(_membar_exit,_membar_release)
441