1/* $NetBSD: lock_stubs_ras.S,v 1.12 2024/09/08 09:36:49 rillig Exp $ */ 2 3/*- 4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include "opt_cputype.h" 33#include "opt_lockdebug.h" 34#include "opt_multiprocessor.h" 35 36#include <sys/errno.h> 37 38#include <machine/asm.h> 39 40RCSID("$NetBSD: lock_stubs_ras.S,v 1.12 2024/09/08 09:36:49 rillig Exp $") 41 42#include "assym.h" 43 44/* 45 * We rely on mips_vector_init to choose to not use these routines if we are 46 * on a system with multiple CPUs. We can still use this on a simple CPU 47 * with MULTIPROCESSOR since it might be useful to be using preemption. 48 */ 49 50/* 51 * Lock stubs for non-MP kernels. These are implemented using restartable 52 * sequences, since LL/SC are either not available (MIPS1 and a couple of 53 * oddball MIPS3 CPUs) or not desirable (overhead). 54 * 55 * The order of the generated code is particularly important here. Some 56 * assumptions: 57 * 58 * o All of the critical sections are 20 bytes in size, and the second 59 * instruction in each critical section is aligned on a 16 byte boundary 60 * (see top of _restart_lock_ras() for why). The entry is defined here as 61 * the point where a restart occurs if we trap within the section. 62 * 63 * o The entire code block is aligned on a 256 byte boundary, and is 64 * 256 bytes in size. This is to allow us to do a pessimistic check 65 * after taking a trap with: 66 * 67 * if ((addr & ~255) == _lock_ras_start) 68 * addr = _restart_lock_ras(addr); 69 * 70 * See definition of MIPS_LOCK_RAS_SIZE in asm.h. 71 * 72 * o In order to keep the size of the block down, the routines are run 73 * into each other. Use objdump -d to check alignment after making 74 * changes. 75 */ 76#ifndef __mips_o32 77 .set mips3 78#else 79 .set mips1 80#endif 81 .set noreorder 82 .set noat 83 84/* 85 * to work around the branch prediction engine misbehavior of 86 * Loongson 2F processors we need to clear the branch target buffer before 87 * a j ra. This requires extra instructions which don't fit in the RAS blocks, 88 * so do a PC-relative just to a block of code (this is the same size as 89 * a j ra) where we can let the assembler install the workaround. 90 */ 91#ifdef MIPS3_LOONGSON2F 92#define J_RA j loongson_return 93#else 94#define J_RA j ra 95#endif 96 97 98/* 99 * unsigned long ras_atomic_cas_ulong(volatile unsigned long *val, 100 * unsigned long old, unsigned long new); 101 */ 102 .text 103 .p2align LOG2_MIPS_LOCK_RAS_SIZE 104 105EXPORT(_lock_ras_start) 106STATIC_LEAF_NOPROFILE(ras_atomic_cas_noupdate) 107 J_RA 108 move v0, t0 109END(ras_atomic_cas_noupdate) 110 111 nop 112 .if ((. - _lock_ras_start) & 15) != 12 113 .error "bas ras offset" 114 .endif 115STATIC_LEAF_NOPROFILE(ras_atomic_cas_ulong) 116 PTR_L t0, (a0) /* <- critical section start */ 117_atomic_cas_ulong_ras_start: 118 nop 119 bne t0, a1, ras_atomic_cas_noupdate 120 nop 121 PTR_S a2, (a0) /* <- critical section end */ 122_atomic_cas_ulong_ras_end: 123 J_RA 124 move v0, a1 125END(ras_atomic_cas_ulong) 126 127/* 128 * unsigned int ras_atomic_cas_uint(volatile unsigned int *val, 129 * unsigned int old, unsigned int new); 130 */ 131 nop 132 .if ((. - _lock_ras_start) & 15) != 12 133 .error "bas ras offset" 134 .endif 135STATIC_LEAF_NOPROFILE(ras_atomic_cas_uint) 136 INT_L t0, (a0) /* <- critical section start */ 137_atomic_cas_uint_ras_start: 138 nop 139 bne t0, a1, ras_atomic_cas_noupdate 140 nop 141 INT_S a2, (a0) /* <- critical section end */ 142_atomic_cas_uint_ras_end: 143 J_RA 144 move v0, a1 145END(ras_atomic_cas_uint) 146 147/* 148 * int _ucas_32_ras(volatile uint32_t *val, uint32_t old, uint32_t new, 149 * uint32_t *retp); 150 */ 151 nop 152 .if ((. - _lock_ras_start) & 15) != 12 153 .error "bas ras offset" 154 .endif 155STATIC_LEAF_NOPROFILE(_ucas_32_ras) 156 lw t0, (a0) /* <- critical section start */ 157_ucas_32_ras_start: 158 nop 159 bne t0, a1, _ucas_32_ras_end 160 nop 161 sw a2, (a0) /* <- critical section end */ 162_ucas_32_ras_end: 163 PTR_S zero, PCB_ONFAULT(v1) 164 J_RA 165 sw t0, 0(a3) 166END(_ucas_32_ras) 167 168#ifdef _LP64 169/* 170 * int _ucas_64_ras(volatile uint64_t *val, uint64_t old, uint64_t new, 171 * uint64_t *retp); 172 */ 173 .if ((. - _lock_ras_start) & 15) != 12 174 .error "bad ras offset" 175 .endif 176STATIC_LEAF_NOPROFILE(_ucas_64_ras) 177 ld t0, (a0) /* <- critical section start */ 178_ucas_64_ras_start: 179 nop 180 bne t0, a1, _ucas_64_ras_end 181 nop 182 sd a2, (a0) /* <- critical section end */ 183_ucas_64_ras_end: 184 PTR_S zero, PCB_ONFAULT(v1) 185 J_RA 186 sd t0, 0(a3) 187END(_ucas_64_ras) 188#endif /* _LP64 */ 189 190#ifndef LOCKDEBUG 191/* 192 * void ras_mutex_enter(kmutex_t *mtx); 193 */ 194 .if ((. - _lock_ras_start) & 15) != 12 195 .error "bad ras offset" 196 .endif 197STATIC_LEAF_NOPROFILE(ras_mutex_enter) 198 PTR_L t0, (a0) /* <- critical section start */ 199_mutex_enter_ras_start: 200 nop 201 bnez t0, ras_mutex_vector_enter 202 nop 203 PTR_S MIPS_CURLWP, (a0)/* <- critical section end */ 204_mutex_enter_ras_end: 205 J_RA 206 nop 207END(ras_mutex_enter) 208 209/* 210 * int ras_mutex_exit(kmutex_t *mtx); 211 */ 212 nop 213 .if ((. - _lock_ras_start) & 15) != 12 214 .error "bas ras offset" 215 .endif 216STATIC_LEAF_NOPROFILE(ras_mutex_exit) 217 PTR_L t0, (a0) /* <- critical section start */ 218_mutex_exit_ras_start: 219 nop 220 bne t0, MIPS_CURLWP, ras_mutex_vector_exit 221 nop 222 PTR_S zero, (a0) /* <- critical section end */ 223_mutex_exit_ras_exit: 224 J_RA 225 nop 226END(ras_mutex_exit) 227 228/* 229 * These could moved out to fit in more RAS sequences. 230 */ 231STATIC_LEAF_NOPROFILE(ras_mutex_vector_enter) 232 j _C_LABEL(mutex_vector_enter) 233 nop 234END(ras_mutex_vector_enter) 235 236STATIC_LEAF_NOPROFILE(ras_mutex_vector_exit) 237 j _C_LABEL(mutex_vector_exit) 238 nop 239END(ras_mutex_vector_exit) 240#endif /* !LOCKDEBUG */ 241 242 .p2align LOG2_MIPS_LOCK_RAS_SIZE /* Get out of the RAS block */ 243 244 .set at 245#ifdef MIPS3_LOONGSON2F 246loongson_return: 247 j ra 248 nop 249#endif 250 251/* 252 * Patch up the given address. We arrive here if we might have trapped 253 * within one of the critical sections above. Do: 254 * 255 * if ((addr & ~15) == ras) 256 * return ras - 4; 257 * ... check next ... 258 * return addr; 259 * 260 * Registers on entry: 261 * 262 * k1 fault PC 263 * ra return address 264 * 265 * On exit: 266 * 267 * k1 adjusted fault PC 268 * ra return address 269 * t0 clobbered 270 * t1 clobbered 271 */ 272 273#define RAS_MKMASK(a) (1 << (((a)-_lock_ras_start) >> 4)) 274 275/* 276 * Since each RAS is aligned on a 16 byte boundary, we can use its offset 277 * from _lock_ras_start to construct a bitmask of the valid RAS within. 278 */ 279#ifndef LOCKDEBUG 280#define MUTEX_RAS_MASK (RAS_MKMASK(_mutex_enter_ras_start) \ 281 |RAS_MKMASK(_mutex_exit_ras_start)) 282#else 283#define MUTEX_RAS_MASK 0 284#endif 285 286#ifdef _LP64 287#define UCAS_64_MASK RAS_MKMASK(_ucas_64_ras_start) 288#else 289#define UCAS_64_MASK 0 290#endif 291 292#define RAS_MASK (RAS_MKMASK(_atomic_cas_ulong_ras_start) \ 293 |RAS_MKMASK(_atomic_cas_uint_ras_start) \ 294 |RAS_MKMASK(_ucas_32_ras_start) \ 295 |UCAS_64_MASK \ 296 |MUTEX_RAS_MASK) 297 298/* 299 * The caller has already determined that 300 * _lock_ras_start == (k1 & -MIPS_LOCK_RAS_SIZE) 301 */ 302LEAF_NOPROFILE(_restart_lock_ras) 303 and t0, k1, MIPS_LOCK_RAS_SIZE - 1 304 /* look at addr bits in ras region */ 305 srl t0, 4 /* focus on each set of 16 bytes */ 306 li t1, 1 /* need this to make a bitmask */ 307 sllv t1, t1, t0 /* now we have a bitmask of the PC */ 308 andi t1, RAS_MASK /* was the PC in a RAS? */ 309 bnez t1, 1f /* yes, adjust PC */ 310 and t0, k1, 15 /* get offset in RAS */ 311 312 j ra 313 nop 3141: 315 addu t0, 4 /* bias offset by one more instruction */ 316 j ra 317 PTR_SUBU k1, t0 /* and subtract that from the PC */ 318END(_restart_lock_ras) 319 320/* 321 * int ras_ucas_32(volatile uint32_t *ptr, uint32_t old, uint32_t new, 322 * uint32_t *retp); 323 */ 324STATIC_LEAF(ras_ucas_32) 325 PTR_L v1, L_PCB(MIPS_CURLWP) 326 PTR_LA v0, _C_LABEL(ras_ucaserr) 327 PTR_S v0, PCB_ONFAULT(v1) 328 bltz a0, _C_LABEL(ras_ucaserr) 329 nop 330 b _C_LABEL(_ucas_32_ras) 331 move v0, zero # assume success 332END(ras_ucas_32) 333 334#ifdef _LP64 335/* 336 * int ras_ucas_64(volatile uint64_t *ptr, uint64_t old, uint64_t new, 337 * uint64_t *retp); 338 */ 339STATIC_LEAF(ras_ucas_64) 340 PTR_L v1, L_PCB(MIPS_CURLWP) 341 PTR_LA v0, _C_LABEL(ras_ucaserr) 342 PTR_S v0, PCB_ONFAULT(v1) 343 bltz a0, _C_LABEL(ras_ucaserr) 344 nop 345 b _C_LABEL(_ucas_64_ras) 346 move v0, zero # assume success 347END(ras_ucas_64) 348#endif /* _LP64 */ 349 350/* 351 * 352 */ 353STATIC_LEAF(ras_ucaserr) 354 PTR_S zero, PCB_ONFAULT(v1) # reset fault handler 355 j ra 356 li v0, EFAULT # return EFAULT on error 357END(ras_ucaserr) 358 359#ifndef LOCKDEBUG 360/* 361 * void mutex_spin_enter(kmutex_t *mtx); 362 */ 363STATIC_NESTED(ras_mutex_spin_enter, CALLFRAME_SIZ, ra) 364 move t0, a0 365 PTR_L t2, L_CPU(MIPS_CURLWP) 366 INT_L a0, MTX_IPL(t0) 367#ifdef PARANOIA 368 INT_L ta1, CPU_INFO_CPL(t2) # get current cpl 369#endif 370 371 /* 372 * We need to raise our IPL. 373 * call splraise (only uses a0-a3, v0-v1, and ra) 374 */ 375 move t3, ra 376 jal _C_LABEL(splraise) 377 nop 378 move ra, t3 379 380 /* 381 * If this is the first lock of the mutex, store the previous IPL 382 * for exit. 383 */ 3841: 385 INT_L ta2, CPU_INFO_MTX_COUNT(t2) 386 nop 387 INT_ADDU ta3, ta2, -1 388 INT_S ta3, CPU_INFO_MTX_COUNT(t2) 389 390 bnez ta2, 2f 391 nop 392 INT_S v0, CPU_INFO_MTX_OLDSPL(t2) /* returned by splraise */ 3932: 394#if defined(DIAGNOSTIC) 395 INT_L t3, MTX_LOCK(t0) 396 li t1, 1 397 bnez t3, 3f 398 nop 399 j ra 400 INT_S t1, MTX_LOCK(t0) 4013: 402 j _C_LABEL(mutex_spin_retry) 403 nop 404#else /* DIAGNOSTIC */ 405 j ra 406 nop 407#endif /* DIAGNOSTIC */ 408END(ras_mutex_spin_enter) 409 410/* 411 * void mutex_spin_exit(kmutex_t *mtx); 412 */ 413LEAF(ras_mutex_spin_exit) 414 PTR_L t2, L_CPU(MIPS_CURLWP) 415 nop 416#if defined(DIAGNOSTIC) 417 INT_L t0, MTX_LOCK(a0) 418 nop 419 beqz t0, 2f 420 nop 421 INT_S zero, MTX_LOCK(a0) 422#endif 423 424 /* 425 * We need to grab this before the mutex count is incremented 426 * because if we get an interrupt, it may see the count as zero 427 * and overwrite the oldspl value with a bogus value. 428 */ 429#ifdef PARANOIA 430 INT_L a2, MTX_IPL(a0) 431#endif 432 INT_L a0, CPU_INFO_MTX_OLDSPL(t2) 433 434 /* 435 * Increment the mutex count 436 */ 437 INT_L t0, CPU_INFO_MTX_COUNT(t2) 438 nop 439 INT_ADDU t0, t0, 1 440 INT_S t0, CPU_INFO_MTX_COUNT(t2) 441 442 /* 443 * If the IPL doesn't change, nothing to do 444 */ 445 INT_L a1, CPU_INFO_CPL(t2) 446 nop 447 448#ifdef PARANOIA 449 sltu v0, a1, a2 # v0 = cpl < mtx_ipl 450 sltu v1, a1, a0 # v1 = cpl < oldspl 451 sll v0, 1 452 or v0, v1 45312: bnez v0, 12b # loop forever if either is true 454 nop 455#endif /* PARANOIA */ 456 457 beq a0, a1, 1f # if oldspl == cpl 458 nop # no reason to drop ipl 459 460 bltz t0, 1f # there are still holders 461 nop # so don't drop IPL 462 463 /* 464 * Mutex count is zero so we need to restore the old IPL 465 */ 466#ifdef PARANOIA 467 sltiu v0, a0, IPL_HIGH+1 46813: beqz v0, 13b # loop forever if ipl > IPL_HIGH 469 nop 470#endif 471 j _C_LABEL(splx) 472 nop 4731: 474 j ra 475 nop 476#if defined(DIAGNOSTIC) 4772: 478 j _C_LABEL(mutex_vector_exit) 479 nop 480#endif 481END(ras_mutex_spin_exit) 482#endif /* !LOCKDEBUG */ 483 484 .data 485EXPORT_OBJECT(mips_locore_atomicvec) 486 PTR_WORD ras_atomic_cas_uint 487 PTR_WORD ras_atomic_cas_ulong 488 PTR_WORD ras_ucas_32 489#ifdef _LP64 490 PTR_WORD ras_ucas_64 491#else 492 PTR_WORD 0 493#endif /* _LP64 */ 494#ifdef LOCKDEBUG 495 PTR_WORD mutex_enter 496 PTR_WORD mutex_exit 497 PTR_WORD mutex_spin_enter 498 PTR_WORD mutex_spin_exit 499#else 500 PTR_WORD ras_mutex_enter 501 PTR_WORD ras_mutex_exit 502 PTR_WORD ras_mutex_spin_enter 503 PTR_WORD ras_mutex_spin_exit 504#endif /* !LOCKDEBUG */ 505