1/* $NetBSD: lock_stubs.s,v 1.10 2021/08/25 13:28:51 thorpej Exp $ */ 2 3/*- 4 * Copyright (c) 2007, 2021 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran, and by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include "opt_lockdebug.h" 33#include "opt_multiprocessor.h" 34 35#include <machine/asm.h> 36 37__KERNEL_RCSID(0, "$NetBSD: lock_stubs.s,v 1.10 2021/08/25 13:28:51 thorpej Exp $"); 38 39#include "assym.h" 40 41#if defined(MULTIPROCESSOR) 42/* 43 * These 'unop' insns will be patched with 'mb' insns at run-time if 44 * the system has more than one processor. 45 */ 46#define MB(label) label: unop 47#else 48#define MB(label) /* nothing */ 49#endif 50 51#if !defined(LOCKDEBUG) 52 53/* 54 * void mutex_enter(kmutex_t *mtx); 55 */ 56LEAF(mutex_enter, 1) 57 LDGP(pv) 58 GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ 591: 60 mov v0, t1 61 ldq_l t2, 0(a0) 62 bne t2, 2f 63 stq_c t1, 0(a0) 64 beq t1, 3f 65 MB(.L_mutex_enter_mb_1) 66 RET 672: 68 lda t12, mutex_vector_enter 69 jmp (t12) 703: 71 br 1b 72 END(mutex_enter) 73 74/* 75 * void mutex_exit(kmutex_t *mtx); 76 */ 77LEAF(mutex_exit, 1) 78 LDGP(pv) 79 MB(.L_mutex_exit_mb_1) 80 GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ 81 mov zero, t3 821: 83 ldq_l t2, 0(a0) 84 cmpeq v0, t2, t2 85 beq t2, 2f 86 stq_c t3, 0(a0) 87 beq t3, 3f 88 RET 892: 90 lda t12, mutex_vector_exit 91 jmp (t12) 923: 93 br 1b 94 END(mutex_exit) 95 96#if 0 /* XXX disabled for now XXX */ 97/* 98 * void mutex_spin_enter(kmutex_t *mtx); 99 */ 100LEAF(mutex_spin_enter, 1); 101 LDGP(pv) 102 103 /* 104 * STEP 1: Perform the MUTEX_SPIN_SPLRAISE() function. 105 * (see sys/kern/kern_mutex.c) 106 * 107 * s = splraise(mtx->mtx_ipl); 108 * if (curcpu->ci_mtx_count-- == 0) 109 * curcpu->ci_mtx_oldspl = s; 110 */ 111 112 call_pal PAL_OSF1_rdps /* clobbers v0, t0, t8..t11 */ 113 /* v0 = cur_ipl */ 114#ifdef __BWX__ 115 mov a0, a1 /* a1 = mtx */ 116 ldbu a0, MUTEX_IPL(a0) /* a0 = new_ipl */ 117 mov v0, a4 /* save cur_ipl in a4 */ 118#else 119 mov a0, a1 /* a1 = mtx */ 120 ldq_u a2, MUTEX_IPL(a0) 121 mov v0, a4 /* save cur_ipl in a4 */ 122 extbl a2, MUTEX_IPL, a0 /* a0 = new_ipl */ 123#endif /* __BWX__ */ 124 cmplt v0, a0, a3 /* a3 = (cur_ipl < new_ipl) */ 125 GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ 126 mov v0, a5 /* save curlwp in a5 */ 127 /* 128 * The forward-branch over the SWPIPL call is correctly predicted 129 * not-taken by the CPU because it's rare for a code path to acquire 130 * 2 spin mutexes. 131 */ 132 beq a3, 1f /* no? -> skip... */ 133 call_pal PAL_OSF1_swpipl /* clobbers v0, t0, t8..t11 */ 134 /* 135 * v0 returns the old_ipl, which will be the same as the 136 * cur_ipl we squirreled away in a4 earlier. 137 */ 1381: 139 /* 140 * curlwp->l_cpu is now stable. Update the counter and 141 * stash the old_ipl. Just in case it's not clear what's 142 * going on, we: 143 * 144 * - Load previous value of mtx_oldspl into t1. 145 * - Conditionally move old_ipl into t1 if mtx_count == 0. 146 * - Store t1 back to mtx_oldspl; if mtx_count != 0, 147 * the store is redundant, but it's faster than a forward 148 * branch. 149 */ 150 ldq a3, L_CPU(a5) /* a3 = curlwp->l_cpu (curcpu) */ 151 ldl t0, CPU_INFO_MTX_COUNT(a3) 152 ldl t1, CPU_INFO_MTX_OLDSPL(a3) 153 cmoveq t0, a4, t1 /* mtx_count == 0? -> t1 = old_ipl */ 154 subl t0, 1, t2 /* mtx_count-- */ 155 stl t1, CPU_INFO_MTX_OLDSPL(a3) 156 stl t2, CPU_INFO_MTX_COUNT(a3) 157 158 /* 159 * STEP 2: __cpu_simple_lock_try(&mtx->mtx_lock) 160 */ 161 ldl_l t0, MUTEX_SIMPLELOCK(a1) 162 ldiq t1, __SIMPLELOCK_LOCKED 163 bne t0, 2f /* contended */ 164 stl_c t1, MUTEX_SIMPLELOCK(a1) 165 beq t1, 2f /* STL_C failed; consider contended */ 166 MB(.L_mutex_spin_enter_mb_1) 167 RET 1682: 169 mov a1, a0 /* restore first argument */ 170 lda pv, mutex_spin_retry 171 jmp (pv) 172 END(mutex_spin_enter) 173 174/* 175 * void mutex_spin_exit(kmutex_t *mtx); 176 */ 177LEAF(mutex_spin_exit, 1) 178 LDGP(pv); 179 MB(.L_mutex_spin_exit_mb_1) 180 181 /* 182 * STEP 1: __cpu_simple_unlock(&mtx->mtx_lock) 183 */ 184 stl zero, MUTEX_SIMPLELOCK(a0) 185 186 /* 187 * STEP 2: Perform the MUTEX_SPIN_SPLRESTORE() function. 188 * (see sys/kern/kern_mutex.c) 189 * 190 * s = curcpu->ci_mtx_oldspl; 191 * if (++curcpu->ci_mtx_count == 0) 192 * splx(s); 193 */ 194 GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ 195 ldq a3, L_CPU(v0) /* a3 = curlwp->l_cpu (curcpu) */ 196 ldl t0, CPU_INFO_MTX_COUNT(a3) 197 ldl a0, CPU_INFO_MTX_OLDSPL(a3) 198 addl t0, 1, t2 /* mtx_count++ */ 199 stl t2, CPU_INFO_MTX_COUNT(a3) 200 /* 201 * The forward-branch over the SWPIPL call is correctly predicted 202 * not-taken by the CPU because it's rare for a code path to acquire 203 * 2 spin mutexes. 204 */ 205 bne t2, 1f /* t2 != 0? Skip... */ 206 call_pal PAL_OSF1_swpipl /* clobbers v0, t0, t8..t11 */ 2071: 208 RET 209 END(mutex_spin_exit) 210#endif /* XXX disabled for now XXX */ 211 212/* 213 * void rw_enter(krwlock_t *rwl, krw_t op); 214 * 215 * Acquire one hold on a RW lock. 216 */ 217LEAF(rw_enter, 2) 218 LDGP(pv) 219 220 /* 221 * RW_READER == 0 (we have a compile-time assert in machdep.c 222 * to ensure this). 223 * 224 * Acquire for read is the most common case. 225 */ 226 bne a1, 3f 227 228 /* Acquiring for read. */ 2291: ldq_l t0, 0(a0) 230 and t0, (RW_WRITE_LOCKED|RW_WRITE_WANTED), t1 231 addq t0, RW_READ_INCR, t2 232 bne t1, 4f /* contended */ 233 stq_c t2, 0(a0) 234 beq t2, 2f /* STQ_C failed; retry */ 235 MB(.L_rw_enter_mb_1) 236 RET 237 2382: br 1b 239 2403: /* Acquiring for write. */ 241 GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ 242 ldq_l t0, 0(a0) 243 or v0, RW_WRITE_LOCKED, t2 244 bne t0, 4f /* contended */ 245 stq_c t2, 0(a0) 246 beq t2, 4f /* STQ_C failed; consider it contended */ 247 MB(.L_rw_enter_mb_2) 248 RET 249 2504: lda pv, rw_vector_enter 251 jmp (pv) 252 END(rw_enter) 253 254/* 255 * int rw_tryenter(krwlock_t *rwl, krw_t op); 256 * 257 * Try to acquire one hold on a RW lock. 258 */ 259LEAF(rw_tryenter, 2) 260 LDGP(pv) 261 262 /* See above. */ 263 bne a1, 3f 264 265 /* Acquiring for read. */ 2661: ldq_l t0, 0(a0) 267 and t0, (RW_WRITE_LOCKED|RW_WRITE_WANTED), t1 268 addq t0, RW_READ_INCR, v0 269 bne t1, 4f /* contended */ 270 stq_c v0, 0(a0) 271 beq v0, 2f /* STQ_C failed; retry */ 272 MB(.L_rw_tryenter_mb_1) 273 RET /* v0 contains non-zero LOCK_FLAG from STQ_C */ 274 2752: br 1b 276 277 /* Acquiring for write. */ 2783: GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ 279 ldq_l t0, 0(a0) 280 or v0, RW_WRITE_LOCKED, v0 281 bne t0, 4f /* contended */ 282 stq_c v0, 0(a0) 283 /* 284 * v0 now contains the LOCK_FLAG value from STQ_C, which is either 285 * 0 for failure, or non-zero for success. In either case, v0's 286 * value is correct. Go ahead and perform the memory barrier even 287 * in the failure case because we expect it to be rare and it saves 288 * a branch-not-taken instruction in the success case. 289 */ 290 MB(.L_rw_tryenter_mb_2) 291 RET 292 2934: mov zero, v0 /* return 0 (failure) */ 294 RET 295 END(rw_tryenter) 296 297/* 298 * void rw_exit(krwlock_t *rwl); 299 * 300 * Release one hold on a RW lock. 301 */ 302LEAF(rw_exit, 1) 303 LDGP(pv) 304 MB(.L_rw_exit_mb_1) 305 306 /* 307 * Check for write-lock release, and get the owner/count field 308 * on its own for sanity-checking against expected values. 309 */ 310 ldq a1, 0(a0) 311 and a1, RW_WRITE_LOCKED, t1 312 srl a1, RW_READ_COUNT_SHIFT, a2 313 bne t1, 3f 314 315 /* 316 * Releasing a read-lock. Make sure the count is non-zero. 317 * If it is zero, take the slow path where the juicy diagnostic 318 * checks are located. 319 */ 320 beq a2, 4f 321 322 /* 323 * We do the following trick to check to see if we're releasing 324 * the last read-count and there are waiters: 325 * 326 * 1. Set v0 to 1. 327 * 2. Shift the new read count into t1. 328 * 3. Conditally move t1 to v0 based on low-bit-set of t0 329 * (RW_HAS_WAITERS). If RW_HAS_WAITERS is not set, then 330 * the move will not take place, and v0 will remain 1. 331 * Otherwise, v0 will contain the updated read count. 332 * 4. Jump to slow path if v0 == 0. 333 */ 3341: ldq_l t0, 0(a0) 335 ldiq v0, 1 336 subq t0, RW_READ_INCR, t2 337 srl t2, RW_READ_COUNT_SHIFT, t1 338 cmovlbs t0, t1, v0 339 beq v0, 4f 340 stq_c t2, 0(a0) 341 beq t2, 2f /* STQ_C failed; try again */ 342 RET 343 3442: br 1b 345 346 /* 347 * Releasing a write-lock. Make sure the owner field points 348 * to our LWP. If it does not, take the slow path where the 349 * juicy diagnostic checks are located. a2 contains the owner 350 * field shifted down. Shift it back up to compare to curlwp; 351 * this conveniently discards the bits we don't want to compare. 352 */ 3533: GET_CURLWP /* Note: GET_CURLWP clobbers v0, t0, t8...t11. */ 354 sll a2, RW_READ_COUNT_SHIFT, a2 355 mov zero, t2 /* fast-path write-unlock stores NULL */ 356 cmpeq v0, a2, v0 /* v0 = (owner == curlwp) */ 357 ldq_l t0, 0(a0) 358 beq v0, 4f /* owner field mismatch; need slow path */ 359 blbs t0, 4f /* RW_HAS_WAITERS set; need slow-path */ 360 stq_c t2, 0(a0) 361 beq t2, 4f /* STQ_C failed; need slow-path */ 362 RET 363 3644: lda pv, rw_vector_exit 365 jmp (pv) 366 END(rw_exit) 367 368#endif /* !LOCKDEBUG */ 369 370#if defined(MULTIPROCESSOR) 371/* 372 * Table of locations to patch with MB instructions on multiprocessor 373 * systems. 374 */ 375 .section ".rodata" 376 .globl lock_stub_patch_table 377lock_stub_patch_table: 378#if !defined(LOCKDEBUG) 379 .quad .L_mutex_enter_mb_1 380 .quad .L_mutex_exit_mb_1 381#if 0 /* XXX disabled for now XXX */ 382 .quad .L_mutex_spin_enter_mb_1 383 .quad .L_mutex_spin_exit_mb_1 384#endif /* XXX disabled for now XXX */ 385 .quad .L_rw_enter_mb_1 386 .quad .L_rw_enter_mb_2 387 .quad .L_rw_tryenter_mb_1 388 .quad .L_rw_tryenter_mb_2 389 .quad .L_rw_exit_mb_1 390#endif /* ! LOCKDEBUG */ 391 .quad 0 /* NULL terminator */ 392#endif /* MULTIPROCESSOR */ 393