xref: /netbsd-src/sys/arch/mips/mips/lock_stubs_ras.S (revision 5b28f239895d55856221c590945769250e289f5f)
1/*	$NetBSD: lock_stubs_ras.S,v 1.12 2024/09/08 09:36:49 rillig Exp $	*/
2
3/*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include "opt_cputype.h"
33#include "opt_lockdebug.h"
34#include "opt_multiprocessor.h"
35
36#include <sys/errno.h>
37
38#include <machine/asm.h>
39
40RCSID("$NetBSD: lock_stubs_ras.S,v 1.12 2024/09/08 09:36:49 rillig Exp $")
41
42#include "assym.h"
43
44/*
45 * We rely on mips_vector_init to choose to not use these routines if we are
46 * on a system with multiple CPUs.  We can still use this on a simple CPU
47 * with MULTIPROCESSOR since it might be useful to be using preemption.
48 */
49
50/*
51 * Lock stubs for non-MP kernels.  These are implemented using restartable
52 * sequences, since LL/SC are either not available (MIPS1 and a couple of
53 * oddball MIPS3 CPUs) or not desirable (overhead).
54 *
55 * The order of the generated code is particularly important here.  Some
56 * assumptions:
57 *
58 * o All of the critical sections are 20 bytes in size, and the second
59 *   instruction in each critical section is aligned on a 16 byte boundary
60 *   (see top of _restart_lock_ras() for why).  The entry is defined here as
61 *   the point where a restart occurs if we trap within the section.
62 *
63 * o The entire code block is aligned on a 256 byte boundary, and is
64 *   256 bytes in size.  This is to allow us to do a pessimistic check
65 *   after taking a trap with:
66 *
67 *	if ((addr & ~255) == _lock_ras_start)
68 *		addr = _restart_lock_ras(addr);
69 *
70 *   See definition of MIPS_LOCK_RAS_SIZE in asm.h.
71 *
72 * o In order to keep the size of the block down, the routines are run
73 *   into each other.  Use objdump -d to check alignment after making
74 *   changes.
75 */
76#ifndef __mips_o32
77	.set	mips3
78#else
79	.set	mips1
80#endif
81	.set	noreorder
82	.set	noat
83
84/*
85 * to work around the branch prediction engine misbehavior of
86 * Loongson 2F processors we need to clear the branch target buffer before
87 * a j ra.  This requires extra instructions which don't fit in the RAS blocks,
88 * so do a PC-relative just to a block of code (this is the same size as
89 * a j ra) where we can let the assembler install the workaround.
90 */
91#ifdef MIPS3_LOONGSON2F
92#define J_RA	j loongson_return
93#else
94#define J_RA	j ra
95#endif
96
97
98/*
99 * unsigned long ras_atomic_cas_ulong(volatile unsigned long *val,
100 *     unsigned long old, unsigned long new);
101 */
102	.text
103	.p2align LOG2_MIPS_LOCK_RAS_SIZE
104
105EXPORT(_lock_ras_start)
106STATIC_LEAF_NOPROFILE(ras_atomic_cas_noupdate)
107	J_RA
108	 move	v0, t0
109END(ras_atomic_cas_noupdate)
110
111	nop
112	.if ((. - _lock_ras_start) & 15) != 12
113	.error	"bas ras offset"
114	.endif
115STATIC_LEAF_NOPROFILE(ras_atomic_cas_ulong)
116	PTR_L	t0, (a0)	/* <- critical section start */
117_atomic_cas_ulong_ras_start:
118	 nop
119	bne	t0, a1, ras_atomic_cas_noupdate
120 	 nop
121	PTR_S	a2, (a0)	/* <- critical section end */
122_atomic_cas_ulong_ras_end:
123	J_RA
124	 move	v0, a1
125END(ras_atomic_cas_ulong)
126
127/*
128 * unsigned int ras_atomic_cas_uint(volatile unsigned int *val,
129 *     unsigned int old, unsigned int new);
130 */
131	nop
132	.if ((. - _lock_ras_start) & 15) != 12
133	.error	"bas ras offset"
134	.endif
135STATIC_LEAF_NOPROFILE(ras_atomic_cas_uint)
136	INT_L	t0, (a0)	/* <- critical section start */
137_atomic_cas_uint_ras_start:
138	 nop
139	bne	t0, a1, ras_atomic_cas_noupdate
140 	 nop
141	INT_S	a2, (a0)	/* <- critical section end */
142_atomic_cas_uint_ras_end:
143	J_RA
144	 move	v0, a1
145END(ras_atomic_cas_uint)
146
147/*
148 * int _ucas_32_ras(volatile uint32_t *val, uint32_t old, uint32_t new,
149 *     uint32_t *retp);
150 */
151	nop
152	.if ((. - _lock_ras_start) & 15) != 12
153	.error	"bas ras offset"
154	.endif
155STATIC_LEAF_NOPROFILE(_ucas_32_ras)
156	lw	t0, (a0)	/* <- critical section start */
157_ucas_32_ras_start:
158	 nop
159	bne	t0, a1, _ucas_32_ras_end
160 	 nop
161	sw	a2, (a0)	/* <- critical section end */
162_ucas_32_ras_end:
163	PTR_S	zero, PCB_ONFAULT(v1)
164	J_RA
165	 sw	t0, 0(a3)
166END(_ucas_32_ras)
167
168#ifdef _LP64
169/*
170 * int _ucas_64_ras(volatile uint64_t *val, uint64_t old, uint64_t new,
171 *     uint64_t *retp);
172 */
173	.if ((. - _lock_ras_start) & 15) != 12
174	.error	"bad ras offset"
175	.endif
176STATIC_LEAF_NOPROFILE(_ucas_64_ras)
177	ld	t0, (a0)	/* <- critical section start */
178_ucas_64_ras_start:
179	 nop
180	bne	t0, a1, _ucas_64_ras_end
181 	 nop
182	sd	a2, (a0)	/* <- critical section end */
183_ucas_64_ras_end:
184	PTR_S	zero, PCB_ONFAULT(v1)
185	J_RA
186	 sd	t0, 0(a3)
187END(_ucas_64_ras)
188#endif /* _LP64 */
189
190#ifndef LOCKDEBUG
191/*
192 * void ras_mutex_enter(kmutex_t *mtx);
193 */
194	.if ((. - _lock_ras_start) & 15) != 12
195	.error	"bad ras offset"
196	.endif
197STATIC_LEAF_NOPROFILE(ras_mutex_enter)
198	PTR_L	t0, (a0)	/* <- critical section start */
199_mutex_enter_ras_start:
200	 nop
201	bnez	t0, ras_mutex_vector_enter
202	 nop
203	PTR_S	MIPS_CURLWP, (a0)/* <- critical section end */
204_mutex_enter_ras_end:
205	J_RA
206	 nop
207END(ras_mutex_enter)
208
209/*
210 * int ras_mutex_exit(kmutex_t *mtx);
211 */
212	nop
213	.if ((. - _lock_ras_start) & 15) != 12
214	.error	"bas ras offset"
215	.endif
216STATIC_LEAF_NOPROFILE(ras_mutex_exit)
217	PTR_L	t0, (a0)	/* <- critical section start */
218_mutex_exit_ras_start:
219	 nop
220	bne	t0, MIPS_CURLWP, ras_mutex_vector_exit
221	 nop
222	PTR_S	zero, (a0)	/* <- critical section end */
223_mutex_exit_ras_exit:
224	J_RA
225	 nop
226END(ras_mutex_exit)
227
228/*
229 * These could moved out to fit in more RAS sequences.
230 */
231STATIC_LEAF_NOPROFILE(ras_mutex_vector_enter)
232	j	_C_LABEL(mutex_vector_enter)
233	 nop
234END(ras_mutex_vector_enter)
235
236STATIC_LEAF_NOPROFILE(ras_mutex_vector_exit)
237	j	_C_LABEL(mutex_vector_exit)
238	 nop
239END(ras_mutex_vector_exit)
240#endif	/* !LOCKDEBUG */
241
242	.p2align LOG2_MIPS_LOCK_RAS_SIZE	/* Get out of the RAS block */
243
244	.set at
245#ifdef MIPS3_LOONGSON2F
246loongson_return:
247	j	ra
248	 nop
249#endif
250
251/*
252 * Patch up the given address.  We arrive here if we might have trapped
253 * within one of the critical sections above.  Do:
254 *
255 *	if ((addr & ~15) == ras)
256 *		return ras - 4;
257 *	... check next ...
258 *	return addr;
259 *
260 * Registers on entry:
261 *
262 *	k1	fault PC
263 *	ra	return address
264 *
265 * On exit:
266 *
267 *	k1	adjusted fault PC
268 *	ra	return address
269 *	t0	clobbered
270 *	t1	clobbered
271 */
272
273#define	RAS_MKMASK(a)	(1 << (((a)-_lock_ras_start) >> 4))
274
275/*
276 * Since each RAS is aligned on a 16 byte boundary, we can use its offset
277 * from _lock_ras_start to construct a bitmask of the valid RAS within.
278 */
279#ifndef LOCKDEBUG
280#define	MUTEX_RAS_MASK	(RAS_MKMASK(_mutex_enter_ras_start) \
281			|RAS_MKMASK(_mutex_exit_ras_start))
282#else
283#define	MUTEX_RAS_MASK	0
284#endif
285
286#ifdef _LP64
287#define	UCAS_64_MASK	RAS_MKMASK(_ucas_64_ras_start)
288#else
289#define	UCAS_64_MASK	0
290#endif
291
292#define	RAS_MASK	(RAS_MKMASK(_atomic_cas_ulong_ras_start) \
293			|RAS_MKMASK(_atomic_cas_uint_ras_start) \
294			|RAS_MKMASK(_ucas_32_ras_start) \
295			|UCAS_64_MASK \
296			|MUTEX_RAS_MASK)
297
298/*
299 * The caller has already determined that
300 * _lock_ras_start == (k1 & -MIPS_LOCK_RAS_SIZE)
301 */
302LEAF_NOPROFILE(_restart_lock_ras)
303	and	t0, k1, MIPS_LOCK_RAS_SIZE - 1
304				/* look at addr bits in ras region */
305	srl	t0, 4		/* focus on each set of 16 bytes */
306	li	t1, 1		/* need this to make a bitmask */
307	sllv	t1, t1, t0	/* now we have a bitmask of the PC */
308	andi	t1, RAS_MASK	/* was the PC in a RAS? */
309	bnez	t1, 1f		/* yes, adjust PC */
310	 and	t0, k1, 15	/* get offset in RAS */
311
312	j	ra
313	 nop
3141:
315	addu	t0, 4		/* bias offset by one more instruction */
316	j	ra
317	 PTR_SUBU k1, t0	/* and subtract that from the PC */
318END(_restart_lock_ras)
319
320/*
321 * int ras_ucas_32(volatile uint32_t *ptr, uint32_t old, uint32_t new,
322 *     uint32_t *retp);
323 */
324STATIC_LEAF(ras_ucas_32)
325	PTR_L	v1, L_PCB(MIPS_CURLWP)
326	PTR_LA	v0, _C_LABEL(ras_ucaserr)
327	PTR_S	v0, PCB_ONFAULT(v1)
328	bltz	a0, _C_LABEL(ras_ucaserr)
329	 nop
330	b	_C_LABEL(_ucas_32_ras)
331	 move	v0, zero			# assume success
332END(ras_ucas_32)
333
334#ifdef _LP64
335/*
336 * int ras_ucas_64(volatile uint64_t *ptr, uint64_t old, uint64_t new,
337 *     uint64_t *retp);
338 */
339STATIC_LEAF(ras_ucas_64)
340	PTR_L	v1, L_PCB(MIPS_CURLWP)
341	PTR_LA	v0, _C_LABEL(ras_ucaserr)
342	PTR_S	v0, PCB_ONFAULT(v1)
343	bltz	a0, _C_LABEL(ras_ucaserr)
344	 nop
345	b	_C_LABEL(_ucas_64_ras)
346	 move	v0, zero			# assume success
347END(ras_ucas_64)
348#endif /* _LP64 */
349
350/*
351 *
352 */
353STATIC_LEAF(ras_ucaserr)
354	PTR_S	zero, PCB_ONFAULT(v1)		# reset fault handler
355	j	ra
356	 li	v0, EFAULT			# return EFAULT on error
357END(ras_ucaserr)
358
359#ifndef LOCKDEBUG
360/*
361 * void	mutex_spin_enter(kmutex_t *mtx);
362 */
363STATIC_NESTED(ras_mutex_spin_enter, CALLFRAME_SIZ, ra)
364	move	t0, a0
365	PTR_L	t2, L_CPU(MIPS_CURLWP)
366	INT_L	a0, MTX_IPL(t0)
367#ifdef PARANOIA
368	INT_L	ta1, CPU_INFO_CPL(t2)		# get current cpl
369#endif
370
371	/*
372	 * We need to raise our IPL.
373	 * call splraise (only uses a0-a3, v0-v1, and ra)
374	 */
375	move	t3, ra
376	jal	_C_LABEL(splraise)
377	 nop
378	move	ra, t3
379
380	/*
381	 * If this is the first lock of the mutex, store the previous IPL
382	 * for exit.
383	 */
3841:
385	INT_L	ta2, CPU_INFO_MTX_COUNT(t2)
386	nop
387	INT_ADDU ta3, ta2, -1
388	INT_S	ta3, CPU_INFO_MTX_COUNT(t2)
389
390	bnez	ta2, 2f
391	 nop
392	INT_S	v0, CPU_INFO_MTX_OLDSPL(t2)	/* returned by splraise */
3932:
394#if defined(DIAGNOSTIC)
395	INT_L	t3, MTX_LOCK(t0)
396	li	t1, 1
397	bnez	t3, 3f
398	 nop
399	j	ra
400	 INT_S	t1, MTX_LOCK(t0)
4013:
402	j	_C_LABEL(mutex_spin_retry)
403	 nop
404#else	/* DIAGNOSTIC */
405	j	ra
406	 nop
407#endif	/* DIAGNOSTIC */
408END(ras_mutex_spin_enter)
409
410/*
411 * void	mutex_spin_exit(kmutex_t *mtx);
412 */
413LEAF(ras_mutex_spin_exit)
414	PTR_L	t2, L_CPU(MIPS_CURLWP)
415	nop
416#if defined(DIAGNOSTIC)
417	INT_L	t0, MTX_LOCK(a0)
418	nop
419	beqz	t0, 2f
420	 nop
421	INT_S	zero, MTX_LOCK(a0)
422#endif
423
424	/*
425	 * We need to grab this before the mutex count is incremented
426	 * because if we get an interrupt, it may see the count as zero
427	 * and overwrite the oldspl value with a bogus value.
428	 */
429#ifdef PARANOIA
430	INT_L	a2, MTX_IPL(a0)
431#endif
432	INT_L	a0, CPU_INFO_MTX_OLDSPL(t2)
433
434	/*
435	 * Increment the mutex count
436	 */
437	INT_L	t0, CPU_INFO_MTX_COUNT(t2)
438	nop
439	INT_ADDU t0, t0, 1
440	INT_S	t0, CPU_INFO_MTX_COUNT(t2)
441
442	/*
443	 * If the IPL doesn't change, nothing to do
444	 */
445	INT_L	a1, CPU_INFO_CPL(t2)
446	nop
447
448#ifdef PARANOIA
449	sltu	v0, a1, a2		# v0 = cpl < mtx_ipl
450	sltu	v1, a1, a0		# v1 = cpl < oldspl
451	sll	v0, 1
452	or	v0, v1
45312:	bnez	v0, 12b			# loop forever if either is true
454	 nop
455#endif /* PARANOIA */
456
457	beq	a0, a1, 1f		# if oldspl == cpl
458	 nop				#   no reason to drop ipl
459
460	bltz	t0, 1f			# there are still holders
461	 nop				# so don't drop IPL
462
463	/*
464	 * Mutex count is zero so we need to restore the old IPL
465	 */
466#ifdef PARANOIA
467	sltiu	v0, a0, IPL_HIGH+1
46813:	beqz	v0, 13b			# loop forever if ipl > IPL_HIGH
469	 nop
470#endif
471	j	 _C_LABEL(splx)
472	 nop
4731:
474	j	ra
475	 nop
476#if defined(DIAGNOSTIC)
4772:
478	j	_C_LABEL(mutex_vector_exit)
479	 nop
480#endif
481END(ras_mutex_spin_exit)
482#endif	/* !LOCKDEBUG */
483
484	.data
485EXPORT_OBJECT(mips_locore_atomicvec)
486	PTR_WORD 	ras_atomic_cas_uint
487	PTR_WORD 	ras_atomic_cas_ulong
488	PTR_WORD	ras_ucas_32
489#ifdef _LP64
490	PTR_WORD	ras_ucas_64
491#else
492	PTR_WORD	0
493#endif /* _LP64 */
494#ifdef LOCKDEBUG
495	PTR_WORD	mutex_enter
496	PTR_WORD	mutex_exit
497	PTR_WORD	mutex_spin_enter
498	PTR_WORD	mutex_spin_exit
499#else
500	PTR_WORD	ras_mutex_enter
501	PTR_WORD	ras_mutex_exit
502	PTR_WORD	ras_mutex_spin_enter
503	PTR_WORD	ras_mutex_spin_exit
504#endif	/* !LOCKDEBUG */
505