xref: /netbsd-src/sys/arch/alpha/alpha/lock_stubs.s (revision 84cc6378fcb4eb827a86bc57f84238c857561e22)
1/*	$NetBSD: lock_stubs.s,v 1.10 2021/08/25 13:28:51 thorpej Exp $	*/
2
3/*-
4 * Copyright (c) 2007, 2021 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran, and by Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include "opt_lockdebug.h"
33#include "opt_multiprocessor.h"
34
35#include <machine/asm.h>
36
37__KERNEL_RCSID(0, "$NetBSD: lock_stubs.s,v 1.10 2021/08/25 13:28:51 thorpej Exp $");
38
39#include "assym.h"
40
41#if defined(MULTIPROCESSOR)
42/*
43 * These 'unop' insns will be patched with 'mb' insns at run-time if
44 * the system has more than one processor.
45 */
46#define	MB(label)	label: unop
47#else
48#define	MB(label)	/* nothing */
49#endif
50
51#if !defined(LOCKDEBUG)
52
53/*
54 * void mutex_enter(kmutex_t *mtx);
55 */
56LEAF(mutex_enter, 1)
57	LDGP(pv)
58	GET_CURLWP	/* Note: GET_CURLWP clobbers v0, t0, t8...t11. */
591:
60	mov	v0, t1
61	ldq_l	t2, 0(a0)
62	bne	t2, 2f
63	stq_c	t1, 0(a0)
64	beq	t1, 3f
65	MB(.L_mutex_enter_mb_1)
66	RET
672:
68	lda	t12, mutex_vector_enter
69	jmp	(t12)
703:
71	br	1b
72	END(mutex_enter)
73
74/*
75 * void mutex_exit(kmutex_t *mtx);
76 */
77LEAF(mutex_exit, 1)
78	LDGP(pv)
79	MB(.L_mutex_exit_mb_1)
80	GET_CURLWP	/* Note: GET_CURLWP clobbers v0, t0, t8...t11. */
81	mov	zero, t3
821:
83	ldq_l	t2, 0(a0)
84	cmpeq	v0, t2, t2
85	beq	t2, 2f
86	stq_c	t3, 0(a0)
87	beq	t3, 3f
88	RET
892:
90	lda	t12, mutex_vector_exit
91	jmp	(t12)
923:
93	br	1b
94	END(mutex_exit)
95
96#if 0 /* XXX disabled for now XXX */
97/*
98 * void mutex_spin_enter(kmutex_t *mtx);
99 */
100LEAF(mutex_spin_enter, 1);
101	LDGP(pv)
102
103	/*
104	 * STEP 1: Perform the MUTEX_SPIN_SPLRAISE() function.
105	 * (see sys/kern/kern_mutex.c)
106	 *
107	 *	s = splraise(mtx->mtx_ipl);
108	 *	if (curcpu->ci_mtx_count-- == 0)
109	 *		curcpu->ci_mtx_oldspl = s;
110	 */
111
112	call_pal PAL_OSF1_rdps		/* clobbers v0, t0, t8..t11 */
113					/* v0 = cur_ipl */
114#ifdef __BWX__
115	mov	a0, a1			/* a1 = mtx */
116	ldbu	a0, MUTEX_IPL(a0)	/* a0 = new_ipl */
117	mov	v0, a4			/* save cur_ipl in a4 */
118#else
119	mov	a0, a1			/* a1 = mtx */
120	ldq_u	a2, MUTEX_IPL(a0)
121	mov	v0, a4			/* save cur_ipl in a4 */
122	extbl	a2, MUTEX_IPL, a0	/* a0 = new_ipl */
123#endif /* __BWX__ */
124	cmplt	v0, a0, a3		/* a3 = (cur_ipl < new_ipl) */
125	GET_CURLWP	/* Note: GET_CURLWP clobbers v0, t0, t8...t11. */
126	mov	v0, a5			/* save curlwp in a5 */
127	/*
128	 * The forward-branch over the SWPIPL call is correctly predicted
129	 * not-taken by the CPU because it's rare for a code path to acquire
130	 * 2 spin mutexes.
131	 */
132	beq	a3, 1f			/*      no? -> skip... */
133	call_pal PAL_OSF1_swpipl	/* clobbers v0, t0, t8..t11 */
134	/*
135	 * v0 returns the old_ipl, which will be the same as the
136	 * cur_ipl we squirreled away in a4 earlier.
137	 */
1381:
139	/*
140	 * curlwp->l_cpu is now stable.  Update the counter and
141	 * stash the old_ipl.  Just in case it's not clear what's
142	 * going on, we:
143	 *
144	 *	- Load previous value of mtx_oldspl into t1.
145	 *	- Conditionally move old_ipl into t1 if mtx_count == 0.
146	 *	- Store t1 back to mtx_oldspl; if mtx_count != 0,
147	 *	  the store is redundant, but it's faster than a forward
148	 *	  branch.
149	 */
150	ldq	a3, L_CPU(a5)		/* a3 = curlwp->l_cpu (curcpu) */
151	ldl	t0, CPU_INFO_MTX_COUNT(a3)
152	ldl	t1, CPU_INFO_MTX_OLDSPL(a3)
153	cmoveq	t0, a4, t1		/* mtx_count == 0? -> t1 = old_ipl */
154	subl	t0, 1, t2		/* mtx_count-- */
155	stl	t1, CPU_INFO_MTX_OLDSPL(a3)
156	stl	t2, CPU_INFO_MTX_COUNT(a3)
157
158	/*
159	 * STEP 2: __cpu_simple_lock_try(&mtx->mtx_lock)
160	 */
161	ldl_l	t0, MUTEX_SIMPLELOCK(a1)
162	ldiq	t1, __SIMPLELOCK_LOCKED
163	bne	t0, 2f			/* contended */
164	stl_c	t1, MUTEX_SIMPLELOCK(a1)
165	beq	t1, 2f			/* STL_C failed; consider contended */
166	MB(.L_mutex_spin_enter_mb_1)
167	RET
1682:
169	mov	a1, a0			/* restore first argument */
170	lda	pv, mutex_spin_retry
171	jmp	(pv)
172	END(mutex_spin_enter)
173
174/*
175 * void mutex_spin_exit(kmutex_t *mtx);
176 */
177LEAF(mutex_spin_exit, 1)
178	LDGP(pv);
179	MB(.L_mutex_spin_exit_mb_1)
180
181	/*
182	 * STEP 1: __cpu_simple_unlock(&mtx->mtx_lock)
183	 */
184	stl	zero, MUTEX_SIMPLELOCK(a0)
185
186	/*
187	 * STEP 2: Perform the MUTEX_SPIN_SPLRESTORE() function.
188	 * (see sys/kern/kern_mutex.c)
189	 *
190	 *	s = curcpu->ci_mtx_oldspl;
191	 *	if (++curcpu->ci_mtx_count == 0)
192	 *		splx(s);
193	 */
194	GET_CURLWP	/* Note: GET_CURLWP clobbers v0, t0, t8...t11. */
195	ldq	a3, L_CPU(v0)		/* a3 = curlwp->l_cpu (curcpu) */
196	ldl	t0, CPU_INFO_MTX_COUNT(a3)
197	ldl	a0, CPU_INFO_MTX_OLDSPL(a3)
198	addl	t0, 1, t2		/* mtx_count++ */
199	stl	t2, CPU_INFO_MTX_COUNT(a3)
200	/*
201	 * The forward-branch over the SWPIPL call is correctly predicted
202	 * not-taken by the CPU because it's rare for a code path to acquire
203	 * 2 spin mutexes.
204	 */
205	bne	t2, 1f			/* t2 != 0? Skip... */
206	call_pal PAL_OSF1_swpipl	/* clobbers v0, t0, t8..t11 */
2071:
208	RET
209	END(mutex_spin_exit)
210#endif /* XXX disabled for now XXX */
211
212/*
213 * void rw_enter(krwlock_t *rwl, krw_t op);
214 *
215 * Acquire one hold on a RW lock.
216 */
217LEAF(rw_enter, 2)
218	LDGP(pv)
219
220	/*
221	 * RW_READER == 0 (we have a compile-time assert in machdep.c
222	 * to ensure this).
223	 *
224	 * Acquire for read is the most common case.
225	 */
226	bne	a1, 3f
227
228	/* Acquiring for read. */
2291:	ldq_l	t0, 0(a0)
230	and	t0, (RW_WRITE_LOCKED|RW_WRITE_WANTED), t1
231	addq	t0, RW_READ_INCR, t2
232	bne	t1, 4f		/* contended */
233	stq_c	t2, 0(a0)
234	beq	t2, 2f		/* STQ_C failed; retry */
235	MB(.L_rw_enter_mb_1)
236	RET
237
2382:	br	1b
239
2403:	/* Acquiring for write. */
241	GET_CURLWP	/* Note: GET_CURLWP clobbers v0, t0, t8...t11. */
242	ldq_l	t0, 0(a0)
243	or	v0, RW_WRITE_LOCKED, t2
244	bne	t0, 4f		/* contended */
245	stq_c	t2, 0(a0)
246	beq	t2, 4f		/* STQ_C failed; consider it contended */
247	MB(.L_rw_enter_mb_2)
248	RET
249
2504:	lda	pv, rw_vector_enter
251	jmp	(pv)
252	END(rw_enter)
253
254/*
255 * int rw_tryenter(krwlock_t *rwl, krw_t op);
256 *
257 * Try to acquire one hold on a RW lock.
258 */
259LEAF(rw_tryenter, 2)
260	LDGP(pv)
261
262	/* See above. */
263	bne	a1, 3f
264
265	/* Acquiring for read. */
2661:	ldq_l	t0, 0(a0)
267	and	t0, (RW_WRITE_LOCKED|RW_WRITE_WANTED), t1
268	addq	t0, RW_READ_INCR, v0
269	bne	t1, 4f		/* contended */
270	stq_c	v0, 0(a0)
271	beq	v0, 2f		/* STQ_C failed; retry */
272	MB(.L_rw_tryenter_mb_1)
273	RET			/* v0 contains non-zero LOCK_FLAG from STQ_C */
274
2752:	br	1b
276
277	/* Acquiring for write. */
2783:	GET_CURLWP	/* Note: GET_CURLWP clobbers v0, t0, t8...t11. */
279	ldq_l	t0, 0(a0)
280	or	v0, RW_WRITE_LOCKED, v0
281	bne	t0, 4f		/* contended */
282	stq_c	v0, 0(a0)
283	/*
284	 * v0 now contains the LOCK_FLAG value from STQ_C, which is either
285	 * 0 for failure, or non-zero for success.  In either case, v0's
286	 * value is correct.  Go ahead and perform the memory barrier even
287	 * in the failure case because we expect it to be rare and it saves
288	 * a branch-not-taken instruction in the success case.
289	 */
290	MB(.L_rw_tryenter_mb_2)
291	RET
292
2934:	mov	zero, v0	/* return 0 (failure) */
294	RET
295	END(rw_tryenter)
296
297/*
298 * void rw_exit(krwlock_t *rwl);
299 *
300 * Release one hold on a RW lock.
301 */
302LEAF(rw_exit, 1)
303	LDGP(pv)
304	MB(.L_rw_exit_mb_1)
305
306	/*
307	 * Check for write-lock release, and get the owner/count field
308	 * on its own for sanity-checking against expected values.
309	 */
310	ldq	a1, 0(a0)
311	and	a1, RW_WRITE_LOCKED, t1
312	srl	a1, RW_READ_COUNT_SHIFT, a2
313	bne	t1, 3f
314
315	/*
316	 * Releasing a read-lock.  Make sure the count is non-zero.
317	 * If it is zero, take the slow path where the juicy diagnostic
318	 * checks are located.
319	 */
320	beq	a2, 4f
321
322	/*
323	 * We do the following trick to check to see if we're releasing
324	 * the last read-count and there are waiters:
325	 *
326	 *	1. Set v0 to 1.
327	 *	2. Shift the new read count into t1.
328	 *	3. Conditally move t1 to v0 based on low-bit-set of t0
329	 *	   (RW_HAS_WAITERS).  If RW_HAS_WAITERS is not set, then
330	 *	   the move will not take place, and v0 will remain 1.
331	 *	   Otherwise, v0 will contain the updated read count.
332	 *	4. Jump to slow path if v0 == 0.
333	 */
3341:	ldq_l	t0, 0(a0)
335	ldiq	v0, 1
336	subq	t0, RW_READ_INCR, t2
337	srl	t2, RW_READ_COUNT_SHIFT, t1
338	cmovlbs	t0, t1, v0
339	beq	v0, 4f
340	stq_c	t2, 0(a0)
341	beq	t2, 2f		/* STQ_C failed; try again */
342	RET
343
3442:	br	1b
345
346	/*
347	 * Releasing a write-lock.  Make sure the owner field points
348	 * to our LWP.  If it does not, take the slow path where the
349	 * juicy diagnostic checks are located.  a2 contains the owner
350	 * field shifted down.  Shift it back up to compare to curlwp;
351	 * this conveniently discards the bits we don't want to compare.
352	 */
3533:	GET_CURLWP	/* Note: GET_CURLWP clobbers v0, t0, t8...t11. */
354	sll	a2, RW_READ_COUNT_SHIFT, a2
355	mov	zero, t2	/* fast-path write-unlock stores NULL */
356	cmpeq	v0, a2, v0	/* v0 = (owner == curlwp) */
357	ldq_l	t0, 0(a0)
358	beq	v0, 4f		/* owner field mismatch; need slow path */
359	blbs	t0, 4f		/* RW_HAS_WAITERS set; need slow-path */
360	stq_c	t2, 0(a0)
361	beq	t2, 4f		/* STQ_C failed; need slow-path */
362	RET
363
3644:	lda	pv, rw_vector_exit
365	jmp	(pv)
366	END(rw_exit)
367
368#endif	/* !LOCKDEBUG */
369
370#if defined(MULTIPROCESSOR)
371/*
372 * Table of locations to patch with MB instructions on multiprocessor
373 * systems.
374 */
375	.section ".rodata"
376	.globl	lock_stub_patch_table
377lock_stub_patch_table:
378#if !defined(LOCKDEBUG)
379	.quad	.L_mutex_enter_mb_1
380	.quad	.L_mutex_exit_mb_1
381#if 0 /* XXX disabled for now XXX */
382	.quad	.L_mutex_spin_enter_mb_1
383	.quad	.L_mutex_spin_exit_mb_1
384#endif /* XXX disabled for now XXX */
385	.quad	.L_rw_enter_mb_1
386	.quad	.L_rw_enter_mb_2
387	.quad	.L_rw_tryenter_mb_1
388	.quad	.L_rw_tryenter_mb_2
389	.quad	.L_rw_exit_mb_1
390#endif /* ! LOCKDEBUG */
391	.quad	0		/* NULL terminator */
392#endif /* MULTIPROCESSOR */
393