1/* $NetBSD: atomic.S,v 1.31 2024/07/16 22:45:10 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/param.h> 33#include <machine/asm.h> 34 35#ifdef _KERNEL 36#define ALIAS(f, t) STRONG_ALIAS(f,t) 37#else 38#define ALIAS(f, t) WEAK_ALIAS(f,t) 39#endif 40 41#ifdef _HARDKERNEL 42#include <machine/frameasm.h> 43#define LOCK HOTPATCH(HP_NAME_NOLOCK, 1); lock 44#else 45#define LOCK lock 46#endif 47 48 .text 49 50/* 32-bit */ 51 52ENTRY(_atomic_add_32) 53 LOCK 54 addl %esi, (%rdi) 55 ret 56END(_atomic_add_32) 57 58ENTRY(_atomic_add_32_nv) 59 movl %esi, %eax 60 LOCK 61 xaddl %eax, (%rdi) 62 addl %esi, %eax 63 ret 64END(_atomic_add_32_nv) 65 66ENTRY(_atomic_and_32) 67 LOCK 68 andl %esi, (%rdi) 69 ret 70END(_atomic_and_32) 71 72ENTRY(_atomic_and_32_nv) 73 movl (%rdi), %eax 741: 75 movl %eax, %ecx 76 andl %esi, %ecx 77 LOCK 78 cmpxchgl %ecx, (%rdi) 79 jnz 1b 80 movl %ecx, %eax 81 ret 82END(_atomic_and_32_nv) 83 84ENTRY(_atomic_dec_32) 85 LOCK 86 decl (%rdi) 87 ret 88END(_atomic_dec_32) 89 90ENTRY(_atomic_dec_32_nv) 91 movl $-1, %eax 92 LOCK 93 xaddl %eax, (%rdi) 94 decl %eax 95 ret 96END(_atomic_dec_32_nv) 97 98ENTRY(_atomic_inc_32) 99 LOCK 100 incl (%rdi) 101 ret 102END(_atomic_inc_32) 103 104ENTRY(_atomic_inc_32_nv) 105 movl $1, %eax 106 LOCK 107 xaddl %eax, (%rdi) 108 incl %eax 109 ret 110END(_atomic_inc_32_nv) 111 112ENTRY(_atomic_or_32) 113 LOCK 114 orl %esi, (%rdi) 115 ret 116END(_atomic_or_32) 117 118ENTRY(_atomic_or_32_nv) 119 movl (%rdi), %eax 1201: 121 movl %eax, %ecx 122 orl %esi, %ecx 123 LOCK 124 cmpxchgl %ecx, (%rdi) 125 jnz 1b 126 movl %ecx, %eax 127 ret 128END(_atomic_or_32_nv) 129 130ENTRY(_atomic_swap_32) 131 movl %esi, %eax 132 xchgl %eax, (%rdi) 133 ret 134END(_atomic_swap_32) 135 136ENTRY(_atomic_cas_32) 137 movl %esi, %eax 138 LOCK 139 cmpxchgl %edx, (%rdi) 140 /* %eax now contains the old value */ 141 ret 142END(_atomic_cas_32) 143 144ENTRY(_atomic_cas_32_ni) 145 movl %esi, %eax 146 cmpxchgl %edx, (%rdi) 147 /* %eax now contains the old value */ 148 ret 149END(_atomic_cas_32_ni) 150 151/* 64-bit */ 152 153ENTRY(_atomic_add_64) 154 LOCK 155 addq %rsi, (%rdi) 156 ret 157END(_atomic_add_64) 158 159ENTRY(_atomic_add_64_nv) 160 movq %rsi, %rax 161 LOCK 162 xaddq %rax, (%rdi) 163 addq %rsi, %rax 164 ret 165END(_atomic_add_64_nv) 166 167ENTRY(_atomic_and_64) 168 LOCK 169 andq %rsi, (%rdi) 170 ret 171END(_atomic_and_64) 172 173ENTRY(_atomic_and_64_nv) 174 movq (%rdi), %rax 1751: 176 movq %rax, %rcx 177 andq %rsi, %rcx 178 LOCK 179 cmpxchgq %rcx, (%rdi) 180 jnz 1b 181 movq %rcx, %rax 182 ret 183END(_atomic_and_64_nv) 184 185ENTRY(_atomic_dec_64) 186 LOCK 187 decq (%rdi) 188 ret 189END(_atomic_dec_64) 190 191ENTRY(_atomic_dec_64_nv) 192 movq $-1, %rax 193 LOCK 194 xaddq %rax, (%rdi) 195 decq %rax 196 ret 197END(_atomic_dec_64_nv) 198 199ENTRY(_atomic_inc_64) 200 LOCK 201 incq (%rdi) 202 ret 203END(_atomic_inc_64) 204 205ENTRY(_atomic_inc_64_nv) 206 movq $1, %rax 207 LOCK 208 xaddq %rax, (%rdi) 209 incq %rax 210 ret 211END(_atomic_inc_64_nv) 212 213ENTRY(_atomic_or_64) 214 LOCK 215 orq %rsi, (%rdi) 216 ret 217END(_atomic_or_64) 218 219ENTRY(_atomic_or_64_nv) 220 movq (%rdi), %rax 2211: 222 movq %rax, %rcx 223 orq %rsi, %rcx 224 LOCK 225 cmpxchgq %rcx, (%rdi) 226 jnz 1b 227 movq %rcx, %rax 228 ret 229END(_atomic_or_64_nv) 230 231ENTRY(_atomic_swap_64) 232 movq %rsi, %rax 233 xchgq %rax, (%rdi) 234 ret 235END(_atomic_swap_64) 236 237ENTRY(_atomic_cas_64) 238 movq %rsi, %rax 239 LOCK 240 cmpxchgq %rdx, (%rdi) 241 /* %eax now contains the old value */ 242 ret 243END(_atomic_cas_64) 244 245ENTRY(_atomic_cas_64_ni) 246 movq %rsi, %rax 247 cmpxchgq %rdx, (%rdi) 248 /* %eax now contains the old value */ 249 ret 250END(_atomic_cas_64_ni) 251 252/* memory barriers */ 253 254ENTRY(_membar_acquire) 255 /* 256 * Every load from normal memory is a load-acquire on x86, so 257 * there is never any need for explicit barriers to order 258 * load-before-anything. 259 */ 260 ret 261END(_membar_acquire) 262 263ENTRY(_membar_release) 264 /* 265 * Every store to normal memory is a store-release on x86, so 266 * there is never any need for explicit barriers to order 267 * anything-before-store. 268 */ 269 ret 270END(_membar_release) 271 272ENTRY(_membar_sync) 273 /* 274 * MFENCE, or a serializing instruction like a locked ADDQ, 275 * is necessary to order store-before-load. Every other 276 * ordering -- load-before-anything, anything-before-store -- 277 * is already guaranteed without explicit barriers. 278 * 279 * Empirically it turns out locked ADDQ is cheaper than MFENCE, 280 * so we use that, with an offset below the return address on 281 * the stack to avoid a false dependency with RET. (It might 282 * even be better to use a much lower offset, say -128, to 283 * avoid false dependencies for subsequent callees of the 284 * caller.) 285 * 286 * https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/ 287 * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ 288 * https://www.agner.org/optimize/instruction_tables.pdf 289 * 290 * Sync with xen_mb in sys/arch/amd64/amd64/cpufunc.S. 291 */ 292 LOCK 293 addq $0, -8(%rsp) 294 ret 295END(_membar_sync) 296 297ALIAS(atomic_add_32,_atomic_add_32) 298ALIAS(atomic_add_64,_atomic_add_64) 299ALIAS(atomic_add_int,_atomic_add_32) 300ALIAS(atomic_add_long,_atomic_add_64) 301ALIAS(atomic_add_ptr,_atomic_add_64) 302 303ALIAS(atomic_add_32_nv,_atomic_add_32_nv) 304ALIAS(atomic_add_64_nv,_atomic_add_64_nv) 305ALIAS(atomic_add_int_nv,_atomic_add_32_nv) 306ALIAS(atomic_add_long_nv,_atomic_add_64_nv) 307ALIAS(atomic_add_ptr_nv,_atomic_add_64_nv) 308 309ALIAS(atomic_and_32,_atomic_and_32) 310ALIAS(atomic_and_64,_atomic_and_64) 311ALIAS(atomic_and_uint,_atomic_and_32) 312ALIAS(atomic_and_ulong,_atomic_and_64) 313ALIAS(atomic_and_ptr,_atomic_and_64) 314 315ALIAS(atomic_and_32_nv,_atomic_and_32_nv) 316ALIAS(atomic_and_64_nv,_atomic_and_64_nv) 317ALIAS(atomic_and_uint_nv,_atomic_and_32_nv) 318ALIAS(atomic_and_ulong_nv,_atomic_and_64_nv) 319ALIAS(atomic_and_ptr_nv,_atomic_and_64_nv) 320 321ALIAS(atomic_dec_32,_atomic_dec_32) 322ALIAS(atomic_dec_64,_atomic_dec_64) 323ALIAS(atomic_dec_uint,_atomic_dec_32) 324ALIAS(atomic_dec_ulong,_atomic_dec_64) 325ALIAS(atomic_dec_ptr,_atomic_dec_64) 326 327ALIAS(atomic_dec_32_nv,_atomic_dec_32_nv) 328ALIAS(atomic_dec_64_nv,_atomic_dec_64_nv) 329ALIAS(atomic_dec_uint_nv,_atomic_dec_32_nv) 330ALIAS(atomic_dec_ulong_nv,_atomic_dec_64_nv) 331ALIAS(atomic_dec_ptr_nv,_atomic_dec_64_nv) 332 333ALIAS(atomic_inc_32,_atomic_inc_32) 334ALIAS(atomic_inc_64,_atomic_inc_64) 335ALIAS(atomic_inc_uint,_atomic_inc_32) 336ALIAS(atomic_inc_ulong,_atomic_inc_64) 337ALIAS(atomic_inc_ptr,_atomic_inc_64) 338 339ALIAS(atomic_inc_32_nv,_atomic_inc_32_nv) 340ALIAS(atomic_inc_64_nv,_atomic_inc_64_nv) 341ALIAS(atomic_inc_uint_nv,_atomic_inc_32_nv) 342ALIAS(atomic_inc_ulong_nv,_atomic_inc_64_nv) 343ALIAS(atomic_inc_ptr_nv,_atomic_inc_64_nv) 344 345ALIAS(atomic_or_32,_atomic_or_32) 346ALIAS(atomic_or_64,_atomic_or_64) 347ALIAS(atomic_or_uint,_atomic_or_32) 348ALIAS(atomic_or_ulong,_atomic_or_64) 349ALIAS(atomic_or_ptr,_atomic_or_64) 350 351ALIAS(atomic_or_32_nv,_atomic_or_32_nv) 352ALIAS(atomic_or_64_nv,_atomic_or_64_nv) 353ALIAS(atomic_or_uint_nv,_atomic_or_32_nv) 354ALIAS(atomic_or_ulong_nv,_atomic_or_64_nv) 355ALIAS(atomic_or_ptr_nv,_atomic_or_64_nv) 356 357ALIAS(atomic_swap_32,_atomic_swap_32) 358ALIAS(atomic_swap_64,_atomic_swap_64) 359ALIAS(atomic_swap_uint,_atomic_swap_32) 360ALIAS(atomic_swap_ulong,_atomic_swap_64) 361ALIAS(atomic_swap_ptr,_atomic_swap_64) 362 363ALIAS(atomic_cas_32,_atomic_cas_32) 364ALIAS(atomic_cas_64,_atomic_cas_64) 365ALIAS(atomic_cas_uint,_atomic_cas_32) 366ALIAS(atomic_cas_ulong,_atomic_cas_64) 367ALIAS(atomic_cas_ptr,_atomic_cas_64) 368 369ALIAS(atomic_cas_32_ni,_atomic_cas_32_ni) 370ALIAS(atomic_cas_64_ni,_atomic_cas_64_ni) 371ALIAS(atomic_cas_uint_ni,_atomic_cas_32_ni) 372ALIAS(atomic_cas_ulong_ni,_atomic_cas_64_ni) 373ALIAS(atomic_cas_ptr_ni,_atomic_cas_64_ni) 374 375ALIAS(membar_acquire,_membar_acquire) 376ALIAS(membar_release,_membar_release) 377ALIAS(membar_sync,_membar_sync) 378 379ALIAS(membar_consumer,_membar_acquire) 380ALIAS(membar_producer,_membar_release) 381ALIAS(membar_enter,_membar_sync) 382ALIAS(membar_exit,_membar_release) 383ALIAS(membar_sync,_membar_sync) 384 385STRONG_ALIAS(_atomic_add_int,_atomic_add_32) 386STRONG_ALIAS(_atomic_add_long,_atomic_add_64) 387STRONG_ALIAS(_atomic_add_ptr,_atomic_add_64) 388 389STRONG_ALIAS(_atomic_add_int_nv,_atomic_add_32_nv) 390STRONG_ALIAS(_atomic_add_long_nv,_atomic_add_64_nv) 391STRONG_ALIAS(_atomic_add_ptr_nv,_atomic_add_64_nv) 392 393STRONG_ALIAS(_atomic_and_uint,_atomic_and_32) 394STRONG_ALIAS(_atomic_and_ulong,_atomic_and_64) 395STRONG_ALIAS(_atomic_and_ptr,_atomic_and_64) 396 397STRONG_ALIAS(_atomic_and_uint_nv,_atomic_and_32_nv) 398STRONG_ALIAS(_atomic_and_ulong_nv,_atomic_and_64_nv) 399STRONG_ALIAS(_atomic_and_ptr_nv,_atomic_and_64_nv) 400 401STRONG_ALIAS(_atomic_dec_uint,_atomic_dec_32) 402STRONG_ALIAS(_atomic_dec_ulong,_atomic_dec_64) 403STRONG_ALIAS(_atomic_dec_ptr,_atomic_dec_64) 404 405STRONG_ALIAS(_atomic_dec_uint_nv,_atomic_dec_32_nv) 406STRONG_ALIAS(_atomic_dec_ulong_nv,_atomic_dec_64_nv) 407STRONG_ALIAS(_atomic_dec_ptr_nv,_atomic_dec_64_nv) 408 409STRONG_ALIAS(_atomic_inc_uint,_atomic_inc_32) 410STRONG_ALIAS(_atomic_inc_ulong,_atomic_inc_64) 411STRONG_ALIAS(_atomic_inc_ptr,_atomic_inc_64) 412 413STRONG_ALIAS(_atomic_inc_uint_nv,_atomic_inc_32_nv) 414STRONG_ALIAS(_atomic_inc_ulong_nv,_atomic_inc_64_nv) 415STRONG_ALIAS(_atomic_inc_ptr_nv,_atomic_inc_64_nv) 416 417STRONG_ALIAS(_atomic_or_uint,_atomic_or_32) 418STRONG_ALIAS(_atomic_or_ulong,_atomic_or_64) 419STRONG_ALIAS(_atomic_or_ptr,_atomic_or_64) 420 421STRONG_ALIAS(_atomic_or_uint_nv,_atomic_or_32_nv) 422STRONG_ALIAS(_atomic_or_ulong_nv,_atomic_or_64_nv) 423STRONG_ALIAS(_atomic_or_ptr_nv,_atomic_or_64_nv) 424 425STRONG_ALIAS(_atomic_swap_uint,_atomic_swap_32) 426STRONG_ALIAS(_atomic_swap_ulong,_atomic_swap_64) 427STRONG_ALIAS(_atomic_swap_ptr,_atomic_swap_64) 428 429STRONG_ALIAS(_atomic_cas_uint,_atomic_cas_32) 430STRONG_ALIAS(_atomic_cas_ulong,_atomic_cas_64) 431STRONG_ALIAS(_atomic_cas_ptr,_atomic_cas_64) 432 433STRONG_ALIAS(_atomic_cas_uint_ni,_atomic_cas_32_ni) 434STRONG_ALIAS(_atomic_cas_ulong_ni,_atomic_cas_64_ni) 435STRONG_ALIAS(_atomic_cas_ptr_ni,_atomic_cas_64_ni) 436 437STRONG_ALIAS(_membar_consumer,_membar_acquire) 438STRONG_ALIAS(_membar_producer,_membar_release) 439STRONG_ALIAS(_membar_enter,_membar_sync) 440STRONG_ALIAS(_membar_exit,_membar_release) 441