xref: /netbsd-src/common/lib/libc/arch/i386/atomic/atomic.S (revision f183eaeed00232c8d8b44a8611d174d9ff1b475b)
1/*	$NetBSD: atomic.S,v 1.37 2024/07/16 22:44:38 riastradh Exp $	*/
2
3/*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <sys/param.h>
33#include <machine/asm.h>
34/*
35 * __HAVE_ constants should not be in <machine/types.h>
36 * because we can't use them from assembly. OTOH we
37 * only need __HAVE_ATOMIC64_OPS here, and we don't.
38 */
39#ifdef _KERNEL
40#define	ALIAS(f, t)	STRONG_ALIAS(f,t)
41#else
42#define	ALIAS(f, t)	WEAK_ALIAS(f,t)
43#endif
44
45#ifdef _HARDKERNEL
46#include "opt_xen.h"
47#include <machine/frameasm.h>
48#define LOCK			HOTPATCH(HP_NAME_NOLOCK, 1); lock
49#define HOTPATCH_CAS_64		HOTPATCH(HP_NAME_CAS_64, 49);
50#else
51#define LOCK			lock
52#define HOTPATCH_CAS_64		/* nothing */
53#endif
54
55	.text
56
57ENTRY(_atomic_add_32)
58	movl	4(%esp), %edx
59	movl	8(%esp), %eax
60	LOCK
61	addl	%eax, (%edx)
62	ret
63END(_atomic_add_32)
64
65ENTRY(_atomic_add_32_nv)
66	movl	4(%esp), %edx
67	movl	8(%esp), %eax
68	movl	%eax, %ecx
69	LOCK
70	xaddl	%eax, (%edx)
71	addl	%ecx, %eax
72	ret
73END(_atomic_add_32_nv)
74
75ENTRY(_atomic_and_32)
76	movl	4(%esp), %edx
77	movl	8(%esp), %eax
78	LOCK
79	andl	%eax, (%edx)
80	ret
81END(_atomic_and_32)
82
83ENTRY(_atomic_and_32_nv)
84	movl	4(%esp), %edx
85	movl	(%edx), %eax
860:
87	movl	%eax, %ecx
88	andl	8(%esp), %ecx
89	LOCK
90	cmpxchgl %ecx, (%edx)
91	jnz	1f
92	movl	%ecx, %eax
93	ret
941:
95	jmp	0b
96END(_atomic_and_32_nv)
97
98ENTRY(_atomic_dec_32)
99	movl	4(%esp), %edx
100	LOCK
101	decl	(%edx)
102	ret
103END(_atomic_dec_32)
104
105ENTRY(_atomic_dec_32_nv)
106	movl	4(%esp), %edx
107	movl	$-1, %eax
108	LOCK
109	xaddl	%eax, (%edx)
110	decl	%eax
111	ret
112END(_atomic_dec_32_nv)
113
114ENTRY(_atomic_inc_32)
115	movl	4(%esp), %edx
116	LOCK
117	incl	(%edx)
118	ret
119END(_atomic_inc_32)
120
121ENTRY(_atomic_inc_32_nv)
122	movl	4(%esp), %edx
123	movl	$1, %eax
124	LOCK
125	xaddl	%eax, (%edx)
126	incl	%eax
127	ret
128END(_atomic_inc_32_nv)
129
130ENTRY(_atomic_or_32)
131	movl	4(%esp), %edx
132	movl	8(%esp), %eax
133	LOCK
134	orl	%eax, (%edx)
135	ret
136END(_atomic_or_32)
137
138ENTRY(_atomic_or_32_nv)
139	movl	4(%esp), %edx
140	movl	(%edx), %eax
1410:
142	movl	%eax, %ecx
143	orl	8(%esp), %ecx
144	LOCK
145	cmpxchgl %ecx, (%edx)
146	jnz	1f
147	movl	%ecx, %eax
148	ret
1491:
150	jmp	0b
151END(_atomic_or_32_nv)
152
153ENTRY(_atomic_swap_32)
154	movl	4(%esp), %edx
155	movl	8(%esp), %eax
156	xchgl	%eax, (%edx)
157	ret
158END(_atomic_swap_32)
159
160ENTRY(_atomic_cas_32)
161	movl	4(%esp), %edx
162	movl	8(%esp), %eax
163	movl	12(%esp), %ecx
164	LOCK
165	cmpxchgl %ecx, (%edx)
166	/* %eax now contains the old value */
167	ret
168END(_atomic_cas_32)
169
170ENTRY(_atomic_cas_32_ni)
171	movl	4(%esp), %edx
172	movl	8(%esp), %eax
173	movl	12(%esp), %ecx
174	cmpxchgl %ecx, (%edx)
175	/* %eax now contains the old value */
176	ret
177END(_atomic_cas_32_ni)
178
179ENTRY(_membar_acquire)
180	/*
181	 * Every load from normal memory is a load-acquire on x86, so
182	 * there is never any need for explicit barriers to order
183	 * load-before-anything.
184	 */
185	ret
186END(_membar_acquire)
187
188ENTRY(_membar_release)
189	/*
190	 * Every store to normal memory is a store-release on x86, so
191	 * there is never any need for explicit barriers to order
192	 * anything-before-store.
193	 */
194	ret
195END(_membar_release)
196
197ENTRY(_membar_sync)
198	/*
199	 * MFENCE, or a serializing instruction like a locked ADDL,
200	 * is necessary to order store-before-load.  Every other
201	 * ordering -- load-before-anything, anything-before-store --
202	 * is already guaranteed without explicit barriers.
203	 *
204	 * Empirically it turns out locked ADDL is cheaper than MFENCE,
205	 * so we use that, with an offset below the return address on
206	 * the stack to avoid a false dependency with RET.  (It might
207	 * even be better to use a much lower offset, say -128, to
208	 * avoid false dependencies for subsequent callees of the
209	 * caller.)
210	 *
211	 * https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/
212	 * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
213	 * https://www.agner.org/optimize/instruction_tables.pdf
214	 *
215	 * Sync with xen_mb in sys/arch/i386/i386/cpufunc.S.
216	 */
217	LOCK
218	addl	$0, -4(%esp)
219	ret
220END(_membar_sync)
221
222#if defined(__HAVE_ATOMIC64_OPS) || defined(_KERNEL)
223#ifdef XENPV
224STRONG_ALIAS(_atomic_cas_64,_atomic_cas_cx8)
225#else
226ENTRY(_atomic_cas_64)
227	HOTPATCH_CAS_64
228	/* 49 bytes of instructions */
229#ifdef _HARDKERNEL
230	pushf
231	cli
232#endif
233	pushl	%edi
234	pushl	%ebx
235	movl	12(%esp), %edi
236	movl	16(%esp), %eax
237	movl	20(%esp), %edx
238	movl	24(%esp), %ebx
239	movl	28(%esp), %ecx
240	cmpl	0(%edi), %eax
241	jne	2f
242	cmpl	4(%edi), %edx
243	jne	2f
244	movl	%ebx, 0(%edi)
245	movl	%ecx, 4(%edi)
2461:
247	popl	%ebx
248	popl	%edi
249#ifdef _HARDKERNEL
250	popf
251#endif
252	ret
2532:
254	movl	0(%edi), %eax
255	movl	4(%edi), %edx
256	jmp	1b
257END(_atomic_cas_64)
258#endif /* !XENPV */
259
260ENTRY(_atomic_cas_cx8)
261	/* 49 bytes of instructions */
262	pushl	%edi
263	pushl	%ebx
264	movl	12(%esp), %edi
265	movl	16(%esp), %eax
266	movl	20(%esp), %edx
267	movl	24(%esp), %ebx
268	movl	28(%esp), %ecx
269	LOCK
270	cmpxchg8b (%edi)
271	popl	%ebx
272	popl	%edi
273	ret
274#ifdef _HARDKERNEL
275	.space	20, 0xCC
276#endif
277END(_atomic_cas_cx8)
278LABEL(_atomic_cas_cx8_end)
279#endif /* __HAVE_ATOMIC64_OPS || _KERNEL */
280
281ALIAS(atomic_add_32,_atomic_add_32)
282ALIAS(atomic_add_int,_atomic_add_32)
283ALIAS(atomic_add_long,_atomic_add_32)
284ALIAS(atomic_add_ptr,_atomic_add_32)
285
286ALIAS(atomic_add_32_nv,_atomic_add_32_nv)
287ALIAS(atomic_add_int_nv,_atomic_add_32_nv)
288ALIAS(atomic_add_long_nv,_atomic_add_32_nv)
289ALIAS(atomic_add_ptr_nv,_atomic_add_32_nv)
290
291ALIAS(atomic_and_32,_atomic_and_32)
292ALIAS(atomic_and_uint,_atomic_and_32)
293ALIAS(atomic_and_ulong,_atomic_and_32)
294ALIAS(atomic_and_ptr,_atomic_and_32)
295
296ALIAS(atomic_and_32_nv,_atomic_and_32_nv)
297ALIAS(atomic_and_uint_nv,_atomic_and_32_nv)
298ALIAS(atomic_and_ulong_nv,_atomic_and_32_nv)
299ALIAS(atomic_and_ptr_nv,_atomic_and_32_nv)
300
301ALIAS(atomic_dec_32,_atomic_dec_32)
302ALIAS(atomic_dec_uint,_atomic_dec_32)
303ALIAS(atomic_dec_ulong,_atomic_dec_32)
304ALIAS(atomic_dec_ptr,_atomic_dec_32)
305
306ALIAS(atomic_dec_32_nv,_atomic_dec_32_nv)
307ALIAS(atomic_dec_uint_nv,_atomic_dec_32_nv)
308ALIAS(atomic_dec_ulong_nv,_atomic_dec_32_nv)
309ALIAS(atomic_dec_ptr_nv,_atomic_dec_32_nv)
310
311ALIAS(atomic_inc_32,_atomic_inc_32)
312ALIAS(atomic_inc_uint,_atomic_inc_32)
313ALIAS(atomic_inc_ulong,_atomic_inc_32)
314ALIAS(atomic_inc_ptr,_atomic_inc_32)
315
316ALIAS(atomic_inc_32_nv,_atomic_inc_32_nv)
317ALIAS(atomic_inc_uint_nv,_atomic_inc_32_nv)
318ALIAS(atomic_inc_ulong_nv,_atomic_inc_32_nv)
319ALIAS(atomic_inc_ptr_nv,_atomic_inc_32_nv)
320
321ALIAS(atomic_or_32,_atomic_or_32)
322ALIAS(atomic_or_uint,_atomic_or_32)
323ALIAS(atomic_or_ulong,_atomic_or_32)
324ALIAS(atomic_or_ptr,_atomic_or_32)
325
326ALIAS(atomic_or_32_nv,_atomic_or_32_nv)
327ALIAS(atomic_or_uint_nv,_atomic_or_32_nv)
328ALIAS(atomic_or_ulong_nv,_atomic_or_32_nv)
329ALIAS(atomic_or_ptr_nv,_atomic_or_32_nv)
330
331ALIAS(atomic_swap_32,_atomic_swap_32)
332ALIAS(atomic_swap_uint,_atomic_swap_32)
333ALIAS(atomic_swap_ulong,_atomic_swap_32)
334ALIAS(atomic_swap_ptr,_atomic_swap_32)
335
336ALIAS(atomic_cas_32,_atomic_cas_32)
337ALIAS(atomic_cas_uint,_atomic_cas_32)
338ALIAS(atomic_cas_ulong,_atomic_cas_32)
339ALIAS(atomic_cas_ptr,_atomic_cas_32)
340
341ALIAS(atomic_cas_32_ni,_atomic_cas_32_ni)
342ALIAS(atomic_cas_uint_ni,_atomic_cas_32_ni)
343ALIAS(atomic_cas_ulong_ni,_atomic_cas_32_ni)
344ALIAS(atomic_cas_ptr_ni,_atomic_cas_32_ni)
345
346#if defined(__HAVE_ATOMIC64_OPS) || defined(_KERNEL)
347ALIAS(atomic_cas_64,_atomic_cas_64)
348ALIAS(atomic_cas_64_ni,_atomic_cas_64)
349ALIAS(__sync_val_compare_and_swap_8,_atomic_cas_64)
350#endif /* __HAVE_ATOMIC64_OPS || _KERNEL */
351
352ALIAS(membar_acquire,_membar_acquire)
353ALIAS(membar_release,_membar_release)
354ALIAS(membar_sync,_membar_sync)
355
356ALIAS(membar_consumer,_membar_acquire)
357ALIAS(membar_producer,_membar_release)
358ALIAS(membar_enter,_membar_sync)
359ALIAS(membar_exit,_membar_release)
360ALIAS(membar_sync,_membar_sync)
361
362STRONG_ALIAS(_atomic_add_int,_atomic_add_32)
363STRONG_ALIAS(_atomic_add_long,_atomic_add_32)
364STRONG_ALIAS(_atomic_add_ptr,_atomic_add_32)
365
366STRONG_ALIAS(_atomic_add_int_nv,_atomic_add_32_nv)
367STRONG_ALIAS(_atomic_add_long_nv,_atomic_add_32_nv)
368STRONG_ALIAS(_atomic_add_ptr_nv,_atomic_add_32_nv)
369
370STRONG_ALIAS(_atomic_and_uint,_atomic_and_32)
371STRONG_ALIAS(_atomic_and_ulong,_atomic_and_32)
372STRONG_ALIAS(_atomic_and_ptr,_atomic_and_32)
373
374STRONG_ALIAS(_atomic_and_uint_nv,_atomic_and_32_nv)
375STRONG_ALIAS(_atomic_and_ulong_nv,_atomic_and_32_nv)
376STRONG_ALIAS(_atomic_and_ptr_nv,_atomic_and_32_nv)
377
378STRONG_ALIAS(_atomic_dec_uint,_atomic_dec_32)
379STRONG_ALIAS(_atomic_dec_ulong,_atomic_dec_32)
380STRONG_ALIAS(_atomic_dec_ptr,_atomic_dec_32)
381
382STRONG_ALIAS(_atomic_dec_uint_nv,_atomic_dec_32_nv)
383STRONG_ALIAS(_atomic_dec_ulong_nv,_atomic_dec_32_nv)
384STRONG_ALIAS(_atomic_dec_ptr_nv,_atomic_dec_32_nv)
385
386STRONG_ALIAS(_atomic_inc_uint,_atomic_inc_32)
387STRONG_ALIAS(_atomic_inc_ulong,_atomic_inc_32)
388STRONG_ALIAS(_atomic_inc_ptr,_atomic_inc_32)
389
390STRONG_ALIAS(_atomic_inc_uint_nv,_atomic_inc_32_nv)
391STRONG_ALIAS(_atomic_inc_ulong_nv,_atomic_inc_32_nv)
392STRONG_ALIAS(_atomic_inc_ptr_nv,_atomic_inc_32_nv)
393
394STRONG_ALIAS(_atomic_or_uint,_atomic_or_32)
395STRONG_ALIAS(_atomic_or_ulong,_atomic_or_32)
396STRONG_ALIAS(_atomic_or_ptr,_atomic_or_32)
397
398STRONG_ALIAS(_atomic_or_uint_nv,_atomic_or_32_nv)
399STRONG_ALIAS(_atomic_or_ulong_nv,_atomic_or_32_nv)
400STRONG_ALIAS(_atomic_or_ptr_nv,_atomic_or_32_nv)
401
402STRONG_ALIAS(_atomic_swap_uint,_atomic_swap_32)
403STRONG_ALIAS(_atomic_swap_ulong,_atomic_swap_32)
404STRONG_ALIAS(_atomic_swap_ptr,_atomic_swap_32)
405
406STRONG_ALIAS(_atomic_cas_uint,_atomic_cas_32)
407STRONG_ALIAS(_atomic_cas_ulong,_atomic_cas_32)
408STRONG_ALIAS(_atomic_cas_ptr,_atomic_cas_32)
409
410STRONG_ALIAS(_atomic_cas_uint_ni,_atomic_cas_32_ni)
411STRONG_ALIAS(_atomic_cas_ulong_ni,_atomic_cas_32_ni)
412STRONG_ALIAS(_atomic_cas_ptr_ni,_atomic_cas_32_ni)
413
414STRONG_ALIAS(_membar_consumer,_membar_acquire)
415STRONG_ALIAS(_membar_producer,_membar_release)
416STRONG_ALIAS(_membar_enter,_membar_sync)
417STRONG_ALIAS(_membar_exit,_membar_release)
418