18f0e9130SKonstantin Belousov /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 38a36da99SPedro F. Giffuni * 48f0e9130SKonstantin Belousov * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org> 5d8a16b6aSKonstantin Belousov * Copyright (c) 2023 The FreeBSD Foundation 6d8a16b6aSKonstantin Belousov * 7d8a16b6aSKonstantin Belousov * Portions of this software were developed by 8d8a16b6aSKonstantin Belousov * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from 9d8a16b6aSKonstantin Belousov * the FreeBSD Foundation. 108f0e9130SKonstantin Belousov * 118f0e9130SKonstantin Belousov * Redistribution and use in source and binary forms, with or without 128f0e9130SKonstantin Belousov * modification, are permitted provided that the following conditions 138f0e9130SKonstantin Belousov * are met: 148f0e9130SKonstantin Belousov * 1. Redistributions of source code must retain the above copyright 158f0e9130SKonstantin Belousov * notice unmodified, this list of conditions, and the following 168f0e9130SKonstantin Belousov * disclaimer. 178f0e9130SKonstantin Belousov * 2. Redistributions in binary form must reproduce the above copyright 188f0e9130SKonstantin Belousov * notice, this list of conditions and the following disclaimer in the 198f0e9130SKonstantin Belousov * documentation and/or other materials provided with the distribution. 208f0e9130SKonstantin Belousov * 218f0e9130SKonstantin Belousov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 228f0e9130SKonstantin Belousov * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 238f0e9130SKonstantin Belousov * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 248f0e9130SKonstantin Belousov * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 258f0e9130SKonstantin Belousov * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 268f0e9130SKonstantin Belousov * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 278f0e9130SKonstantin Belousov * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 288f0e9130SKonstantin Belousov * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 298f0e9130SKonstantin Belousov * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 308f0e9130SKonstantin Belousov * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 318f0e9130SKonstantin Belousov */ 328f0e9130SKonstantin Belousov 338f0e9130SKonstantin Belousov #include <sys/param.h> 34c3d8a931SKonstantin Belousov #include <sys/kassert.h> 358f0e9130SKonstantin Belousov #include <sys/kernel.h> 368f0e9130SKonstantin Belousov #include <sys/lock.h> 378f0e9130SKonstantin Belousov #include <sys/mutex.h> 388f0e9130SKonstantin Belousov #include <sys/proc.h> 398f0e9130SKonstantin Belousov #include <sys/rangelock.h> 40c3d8a931SKonstantin Belousov #include <sys/sleepqueue.h> 41c3d8a931SKonstantin Belousov #include <sys/smr.h> 429ef425e5SKonstantin Belousov #include <sys/sysctl.h> 438f0e9130SKonstantin Belousov 448f0e9130SKonstantin Belousov #include <vm/uma.h> 458f0e9130SKonstantin Belousov 46c3d8a931SKonstantin Belousov /* 479ef425e5SKonstantin Belousov * Immediately after initialization (subject to 'rangelock_cheat' 489ef425e5SKonstantin Belousov * below), and until a request comes that conflicts with granted ones 499ef425e5SKonstantin Belousov * based on type, rangelocks serve requests in the "cheating" mode. 509ef425e5SKonstantin Belousov * In this mode, a rangelock behaves like a sxlock, as if each request 519ef425e5SKonstantin Belousov * covered the whole range of the protected object. On receiving a 529ef425e5SKonstantin Belousov * conflicting request (any request while a write request is 539ef425e5SKonstantin Belousov * effective, or any write request while some read ones are 549ef425e5SKonstantin Belousov * effective), all requests granted in "cheating" mode are drained, 559ef425e5SKonstantin Belousov * and the rangelock then switches to effectively keeping track of the 569ef425e5SKonstantin Belousov * precise range of each new request. 579ef425e5SKonstantin Belousov * 589ef425e5SKonstantin Belousov * Normal sx implementation is not used to not bloat structures (most 599ef425e5SKonstantin Belousov * important, vnodes) which embeds rangelocks. 609ef425e5SKonstantin Belousov * 619ef425e5SKonstantin Belousov * The cheating greatly helps very common pattern where file is first 629ef425e5SKonstantin Belousov * written single-threaded, and then read by many processes. 639ef425e5SKonstantin Belousov * 649ef425e5SKonstantin Belousov * Lock is in cheat mode when RL_CHEAT_CHEATING bit is set in the 659ef425e5SKonstantin Belousov * lock->head. Special cookies are returned in this mode, and 669ef425e5SKonstantin Belousov * trylocks are same as normal locks but do not drain. 679ef425e5SKonstantin Belousov */ 689ef425e5SKonstantin Belousov 6953789621SKonstantin Belousov static int rangelock_cheat = 1; 709ef425e5SKonstantin Belousov SYSCTL_INT(_debug, OID_AUTO, rangelock_cheat, CTLFLAG_RWTUN, 719ef425e5SKonstantin Belousov &rangelock_cheat, 0, 729ef425e5SKonstantin Belousov ""); 739ef425e5SKonstantin Belousov 749ef425e5SKonstantin Belousov #define RL_CHEAT_MASK 0x7 759ef425e5SKonstantin Belousov #define RL_CHEAT_CHEATING 0x1 769ef425e5SKonstantin Belousov /* #define RL_CHEAT_RLOCKED 0x0 */ 779ef425e5SKonstantin Belousov #define RL_CHEAT_WLOCKED 0x2 789ef425e5SKonstantin Belousov #define RL_CHEAT_DRAINING 0x4 799ef425e5SKonstantin Belousov 809ef425e5SKonstantin Belousov #define RL_CHEAT_READER 0x8 819ef425e5SKonstantin Belousov 829ef425e5SKonstantin Belousov #define RL_RET_CHEAT_RLOCKED 0x1100 839ef425e5SKonstantin Belousov #define RL_RET_CHEAT_WLOCKED 0x2200 849ef425e5SKonstantin Belousov 8575447afcSKonstantin Belousov static void 8675447afcSKonstantin Belousov rangelock_cheat_drain(struct rangelock *lock) 8775447afcSKonstantin Belousov { 8875447afcSKonstantin Belousov uintptr_t v; 8975447afcSKonstantin Belousov 9075447afcSKonstantin Belousov DROP_GIANT(); 9175447afcSKonstantin Belousov for (;;) { 92e1f4d623SKonstantin Belousov v = atomic_load_ptr(&lock->head); 9375447afcSKonstantin Belousov if ((v & RL_CHEAT_DRAINING) == 0) 9475447afcSKonstantin Belousov break; 9575447afcSKonstantin Belousov sleepq_add(&lock->head, NULL, "ranged1", 0, 0); 9675447afcSKonstantin Belousov sleepq_wait(&lock->head, PRI_USER); 9775447afcSKonstantin Belousov sleepq_lock(&lock->head); 9875447afcSKonstantin Belousov } 9975447afcSKonstantin Belousov sleepq_release(&lock->head); 10075447afcSKonstantin Belousov PICKUP_GIANT(); 10175447afcSKonstantin Belousov } 10275447afcSKonstantin Belousov 1039ef425e5SKonstantin Belousov static bool 1049ef425e5SKonstantin Belousov rangelock_cheat_lock(struct rangelock *lock, int locktype, bool trylock, 1059ef425e5SKonstantin Belousov void **cookie) 1069ef425e5SKonstantin Belousov { 1079ef425e5SKonstantin Belousov uintptr_t v, x; 1089ef425e5SKonstantin Belousov 109e1f4d623SKonstantin Belousov v = atomic_load_ptr(&lock->head); 1109ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 1119ef425e5SKonstantin Belousov return (false); 1129ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) { 1139ef425e5SKonstantin Belousov drain: 1149ef425e5SKonstantin Belousov if (trylock) { 1159ef425e5SKonstantin Belousov *cookie = NULL; 1169ef425e5SKonstantin Belousov return (true); 1179ef425e5SKonstantin Belousov } 1189ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 1199ef425e5SKonstantin Belousov drain1: 12075447afcSKonstantin Belousov rangelock_cheat_drain(lock); 1219ef425e5SKonstantin Belousov return (false); 1229ef425e5SKonstantin Belousov } 1239ef425e5SKonstantin Belousov 1249ef425e5SKonstantin Belousov switch (locktype) { 1259ef425e5SKonstantin Belousov case RL_LOCK_READ: 1269ef425e5SKonstantin Belousov for (;;) { 1279ef425e5SKonstantin Belousov if ((v & RL_CHEAT_WLOCKED) != 0) { 1289ef425e5SKonstantin Belousov if (trylock) { 1299ef425e5SKonstantin Belousov *cookie = NULL; 1309ef425e5SKonstantin Belousov return (true); 1319ef425e5SKonstantin Belousov } 1329ef425e5SKonstantin Belousov x = v | RL_CHEAT_DRAINING; 1339ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 1349ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, &v, 1359ef425e5SKonstantin Belousov x) != 0) 1369ef425e5SKonstantin Belousov goto drain1; 1379ef425e5SKonstantin Belousov sleepq_release(&lock->head); 1389ef425e5SKonstantin Belousov /* Possibly forgive passed conflict */ 13957cc80e6SKonstantin Belousov } else { 1409ef425e5SKonstantin Belousov x = (v & ~RL_CHEAT_MASK) + RL_CHEAT_READER; 1419ef425e5SKonstantin Belousov x |= RL_CHEAT_CHEATING; 14257cc80e6SKonstantin Belousov if (atomic_fcmpset_acq_ptr(&lock->head, &v, 14357cc80e6SKonstantin Belousov x) != 0) 1449ef425e5SKonstantin Belousov break; 14557cc80e6SKonstantin Belousov } 1469ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 1479ef425e5SKonstantin Belousov return (false); 1489ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) 1499ef425e5SKonstantin Belousov goto drain; 1509ef425e5SKonstantin Belousov } 1519ef425e5SKonstantin Belousov *(uintptr_t *)cookie = RL_RET_CHEAT_RLOCKED; 1529ef425e5SKonstantin Belousov break; 1539ef425e5SKonstantin Belousov case RL_LOCK_WRITE: 1549ef425e5SKonstantin Belousov for (;;) { 1559ef425e5SKonstantin Belousov if ((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER || 1569ef425e5SKonstantin Belousov (v & RL_CHEAT_WLOCKED) != 0) { 1579ef425e5SKonstantin Belousov if (trylock) { 1589ef425e5SKonstantin Belousov *cookie = NULL; 1599ef425e5SKonstantin Belousov return (true); 1609ef425e5SKonstantin Belousov } 1619ef425e5SKonstantin Belousov x = v | RL_CHEAT_DRAINING; 1629ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 1639ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, &v, 1649ef425e5SKonstantin Belousov x) != 0) 1659ef425e5SKonstantin Belousov goto drain1; 1669ef425e5SKonstantin Belousov sleepq_release(&lock->head); 1679ef425e5SKonstantin Belousov /* Possibly forgive passed conflict */ 16857cc80e6SKonstantin Belousov } else { 1699ef425e5SKonstantin Belousov x = RL_CHEAT_WLOCKED | RL_CHEAT_CHEATING; 17057cc80e6SKonstantin Belousov if (atomic_fcmpset_acq_ptr(&lock->head, &v, 17157cc80e6SKonstantin Belousov x) != 0) 1729ef425e5SKonstantin Belousov break; 17357cc80e6SKonstantin Belousov } 1749ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 1759ef425e5SKonstantin Belousov return (false); 1769ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) 1779ef425e5SKonstantin Belousov goto drain; 1789ef425e5SKonstantin Belousov } 1799ef425e5SKonstantin Belousov *(uintptr_t *)cookie = RL_RET_CHEAT_WLOCKED; 1809ef425e5SKonstantin Belousov break; 1819ef425e5SKonstantin Belousov default: 1829ef425e5SKonstantin Belousov __assert_unreachable(); 1839ef425e5SKonstantin Belousov break; 1849ef425e5SKonstantin Belousov } 1859ef425e5SKonstantin Belousov return (true); 1869ef425e5SKonstantin Belousov } 1879ef425e5SKonstantin Belousov 1889ef425e5SKonstantin Belousov static bool 1899ef425e5SKonstantin Belousov rangelock_cheat_unlock(struct rangelock *lock, void *cookie) 1909ef425e5SKonstantin Belousov { 1919ef425e5SKonstantin Belousov uintptr_t v, x; 1929ef425e5SKonstantin Belousov 193e1f4d623SKonstantin Belousov v = atomic_load_ptr(&lock->head); 1949ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 1959ef425e5SKonstantin Belousov return (false); 1969ef425e5SKonstantin Belousov 1979ef425e5SKonstantin Belousov MPASS((uintptr_t)cookie == RL_RET_CHEAT_WLOCKED || 1989ef425e5SKonstantin Belousov (uintptr_t)cookie == RL_RET_CHEAT_RLOCKED); 1999ef425e5SKonstantin Belousov 2009ef425e5SKonstantin Belousov switch ((uintptr_t)cookie) { 2019ef425e5SKonstantin Belousov case RL_RET_CHEAT_RLOCKED: 2029ef425e5SKonstantin Belousov for (;;) { 2039ef425e5SKonstantin Belousov MPASS((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER); 2049ef425e5SKonstantin Belousov MPASS((v & RL_CHEAT_WLOCKED) == 0); 2059ef425e5SKonstantin Belousov x = (v & ~RL_CHEAT_MASK) - RL_CHEAT_READER; 2069ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) { 2079ef425e5SKonstantin Belousov if (x != 0) { 2089ef425e5SKonstantin Belousov x |= RL_CHEAT_DRAINING | 2099ef425e5SKonstantin Belousov RL_CHEAT_CHEATING; 2109ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, 2119ef425e5SKonstantin Belousov &v, x) != 0) 2129ef425e5SKonstantin Belousov break; 2139ef425e5SKonstantin Belousov } else { 2149ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 2159ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, 2169ef425e5SKonstantin Belousov &v, x) != 0) { 2179ef425e5SKonstantin Belousov sleepq_broadcast( 2189ef425e5SKonstantin Belousov &lock->head, 2199ef425e5SKonstantin Belousov SLEEPQ_SLEEP, 0, 0); 2209ef425e5SKonstantin Belousov sleepq_release(&lock->head); 2219ef425e5SKonstantin Belousov break; 2229ef425e5SKonstantin Belousov } 2239ef425e5SKonstantin Belousov sleepq_release(&lock->head); 2249ef425e5SKonstantin Belousov } 2259ef425e5SKonstantin Belousov } else { 2269ef425e5SKonstantin Belousov x |= RL_CHEAT_CHEATING; 2279ef425e5SKonstantin Belousov if (atomic_fcmpset_rel_ptr(&lock->head, &v, 2289ef425e5SKonstantin Belousov x) != 0) 2299ef425e5SKonstantin Belousov break; 2309ef425e5SKonstantin Belousov } 2319ef425e5SKonstantin Belousov } 2329ef425e5SKonstantin Belousov break; 2339ef425e5SKonstantin Belousov case RL_RET_CHEAT_WLOCKED: 2349ef425e5SKonstantin Belousov for (;;) { 2359ef425e5SKonstantin Belousov MPASS((v & RL_CHEAT_WLOCKED) != 0); 2369ef425e5SKonstantin Belousov if ((v & RL_CHEAT_DRAINING) != 0) { 2379ef425e5SKonstantin Belousov sleepq_lock(&lock->head); 2389ef425e5SKonstantin Belousov atomic_store_ptr(&lock->head, 0); 2399ef425e5SKonstantin Belousov sleepq_broadcast(&lock->head, 2409ef425e5SKonstantin Belousov SLEEPQ_SLEEP, 0, 0); 2419ef425e5SKonstantin Belousov sleepq_release(&lock->head); 2429ef425e5SKonstantin Belousov break; 2439ef425e5SKonstantin Belousov } else { 2449ef425e5SKonstantin Belousov if (atomic_fcmpset_ptr(&lock->head, &v, 2459ef425e5SKonstantin Belousov RL_CHEAT_CHEATING) != 0) 2469ef425e5SKonstantin Belousov break; 2479ef425e5SKonstantin Belousov } 2489ef425e5SKonstantin Belousov } 2499ef425e5SKonstantin Belousov break; 2509ef425e5SKonstantin Belousov default: 2519ef425e5SKonstantin Belousov __assert_unreachable(); 2529ef425e5SKonstantin Belousov break; 2539ef425e5SKonstantin Belousov } 2549ef425e5SKonstantin Belousov return (true); 2559ef425e5SKonstantin Belousov } 2569ef425e5SKonstantin Belousov 2579ef425e5SKonstantin Belousov static bool 2589ef425e5SKonstantin Belousov rangelock_cheat_destroy(struct rangelock *lock) 2599ef425e5SKonstantin Belousov { 2609ef425e5SKonstantin Belousov uintptr_t v; 2619ef425e5SKonstantin Belousov 262e1f4d623SKonstantin Belousov v = atomic_load_ptr(&lock->head); 2639ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 2649ef425e5SKonstantin Belousov return (false); 2659ef425e5SKonstantin Belousov MPASS(v == RL_CHEAT_CHEATING); 2669ef425e5SKonstantin Belousov return (true); 2679ef425e5SKonstantin Belousov } 2689ef425e5SKonstantin Belousov 2699ef425e5SKonstantin Belousov /* 270c3d8a931SKonstantin Belousov * Implementation of range locks based on the paper 271c3d8a931SKonstantin Belousov * https://doi.org/10.1145/3342195.3387533 272c3d8a931SKonstantin Belousov * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020 273c3d8a931SKonstantin Belousov * Scalable Range Locks for Scalable Address Spaces and Beyond 274c3d8a931SKonstantin Belousov * by Alex Kogan, Dave Dice, and Shady Issa 275c3d8a931SKonstantin Belousov */ 276c3d8a931SKonstantin Belousov 277c3d8a931SKonstantin Belousov static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e); 278c3d8a931SKonstantin Belousov 279c3d8a931SKonstantin Belousov /* 280c3d8a931SKonstantin Belousov * rl_q_next links all granted ranges in the lock. We cannot free an 281c3d8a931SKonstantin Belousov * rl_q_entry while in the smr section, and cannot reuse rl_q_next 282c3d8a931SKonstantin Belousov * linkage since other threads might follow it even after CAS removed 283c3d8a931SKonstantin Belousov * the range. Use rl_q_free for local list of ranges to remove after 284c3d8a931SKonstantin Belousov * the smr section is dropped. 285c3d8a931SKonstantin Belousov */ 2868f0e9130SKonstantin Belousov struct rl_q_entry { 287c3d8a931SKonstantin Belousov struct rl_q_entry *rl_q_next; 288c3d8a931SKonstantin Belousov struct rl_q_entry *rl_q_free; 2898f0e9130SKonstantin Belousov off_t rl_q_start, rl_q_end; 2908f0e9130SKonstantin Belousov int rl_q_flags; 291c3d8a931SKonstantin Belousov #ifdef INVARIANTS 292c3d8a931SKonstantin Belousov struct thread *rl_q_owner; 293c3d8a931SKonstantin Belousov #endif 2948f0e9130SKonstantin Belousov }; 2958f0e9130SKonstantin Belousov 2968f0e9130SKonstantin Belousov static uma_zone_t rl_entry_zone; 297c3d8a931SKonstantin Belousov static smr_t rl_smr; 2988f0e9130SKonstantin Belousov 299a3f10d08SKonstantin Belousov static void rangelock_free_free(struct rl_q_entry *free); 3008a5b2db3SKonstantin Belousov static void rangelock_noncheating_destroy(struct rangelock *lock); 301a3f10d08SKonstantin Belousov 3028f0e9130SKonstantin Belousov static void 3038f0e9130SKonstantin Belousov rangelock_sys_init(void) 3048f0e9130SKonstantin Belousov { 3058f0e9130SKonstantin Belousov rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry), 306c3d8a931SKonstantin Belousov NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry), 307c3d8a931SKonstantin Belousov UMA_ZONE_SMR); 308c3d8a931SKonstantin Belousov rl_smr = uma_zone_get_smr(rl_entry_zone); 3098f0e9130SKonstantin Belousov } 310c3d8a931SKonstantin Belousov SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL); 3118f0e9130SKonstantin Belousov 3128f0e9130SKonstantin Belousov static struct rl_q_entry * 313c3d8a931SKonstantin Belousov rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags) 3148f0e9130SKonstantin Belousov { 315c3d8a931SKonstantin Belousov struct rl_q_entry *e; 3168f0e9130SKonstantin Belousov 317c3d8a931SKonstantin Belousov e = uma_zalloc_smr(rl_entry_zone, M_WAITOK); 318c3d8a931SKonstantin Belousov e->rl_q_next = NULL; 319c3d8a931SKonstantin Belousov e->rl_q_free = NULL; 320c3d8a931SKonstantin Belousov e->rl_q_start = start; 321c3d8a931SKonstantin Belousov e->rl_q_end = end; 322c3d8a931SKonstantin Belousov e->rl_q_flags = flags; 323c3d8a931SKonstantin Belousov #ifdef INVARIANTS 324c3d8a931SKonstantin Belousov e->rl_q_owner = curthread; 325c3d8a931SKonstantin Belousov #endif 326c3d8a931SKonstantin Belousov return (e); 3278f0e9130SKonstantin Belousov } 3288f0e9130SKonstantin Belousov 3298f0e9130SKonstantin Belousov void 3308f0e9130SKonstantin Belousov rangelock_init(struct rangelock *lock) 3318f0e9130SKonstantin Belousov { 332c3d8a931SKonstantin Belousov lock->sleepers = false; 3339ef425e5SKonstantin Belousov atomic_store_ptr(&lock->head, rangelock_cheat ? RL_CHEAT_CHEATING : 0); 3348f0e9130SKonstantin Belousov } 3358f0e9130SKonstantin Belousov 3368f0e9130SKonstantin Belousov void 3378f0e9130SKonstantin Belousov rangelock_destroy(struct rangelock *lock) 3388f0e9130SKonstantin Belousov { 339c3d8a931SKonstantin Belousov MPASS(!lock->sleepers); 3408a5b2db3SKonstantin Belousov if (!rangelock_cheat_destroy(lock)) 3418a5b2db3SKonstantin Belousov rangelock_noncheating_destroy(lock); 342e228961dSKonstantin Belousov DEBUG_POISON_POINTER(*(void **)&lock->head); 3438f0e9130SKonstantin Belousov } 3448f0e9130SKonstantin Belousov 345c3d8a931SKonstantin Belousov static bool 346c3d8a931SKonstantin Belousov rl_e_is_marked(const struct rl_q_entry *e) 3478f0e9130SKonstantin Belousov { 348c3d8a931SKonstantin Belousov return (((uintptr_t)e & 1) != 0); 3498f0e9130SKonstantin Belousov } 3508f0e9130SKonstantin Belousov 351c3d8a931SKonstantin Belousov static struct rl_q_entry * 3525badbeeaSKonstantin Belousov rl_e_unmark_unchecked(const struct rl_q_entry *e) 3535badbeeaSKonstantin Belousov { 3545badbeeaSKonstantin Belousov return ((struct rl_q_entry *)((uintptr_t)e & ~1)); 3555badbeeaSKonstantin Belousov } 3565badbeeaSKonstantin Belousov 3575badbeeaSKonstantin Belousov static struct rl_q_entry * 358c3d8a931SKonstantin Belousov rl_e_unmark(const struct rl_q_entry *e) 3598f0e9130SKonstantin Belousov { 360c3d8a931SKonstantin Belousov MPASS(rl_e_is_marked(e)); 3615badbeeaSKonstantin Belousov return (rl_e_unmark_unchecked(e)); 3625badbeeaSKonstantin Belousov } 3635badbeeaSKonstantin Belousov 3645badbeeaSKonstantin Belousov static void 3655badbeeaSKonstantin Belousov rl_e_mark(struct rl_q_entry *e) 3665badbeeaSKonstantin Belousov { 367*590e7a0eSJohn Baldwin #if defined(INVARIANTS) 368*590e7a0eSJohn Baldwin int r = atomic_testandset_ptr((uintptr_t *)&e->rl_q_next, 0); 3695badbeeaSKonstantin Belousov MPASS(r == 0); 3705badbeeaSKonstantin Belousov #else 3715badbeeaSKonstantin Belousov atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1); 3725badbeeaSKonstantin Belousov #endif 3732bb93f2dSColin Percival } 3742bb93f2dSColin Percival 375c3d8a931SKonstantin Belousov static struct rl_q_entry * 376c3d8a931SKonstantin Belousov rl_q_load(struct rl_q_entry **p) 3778f0e9130SKonstantin Belousov { 378c3d8a931SKonstantin Belousov return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p)); 3798f0e9130SKonstantin Belousov } 3808f0e9130SKonstantin Belousov 3816c32d89eSKonstantin Belousov static bool 3826c32d89eSKonstantin Belousov rl_e_is_rlock(const struct rl_q_entry *e) 3836c32d89eSKonstantin Belousov { 3846c32d89eSKonstantin Belousov return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ); 3856c32d89eSKonstantin Belousov } 3866c32d89eSKonstantin Belousov 3875badbeeaSKonstantin Belousov static void 388a3f10d08SKonstantin Belousov rangelock_free_free(struct rl_q_entry *free) 389a3f10d08SKonstantin Belousov { 390a3f10d08SKonstantin Belousov struct rl_q_entry *x, *xp; 391a3f10d08SKonstantin Belousov 392a3f10d08SKonstantin Belousov for (x = free; x != NULL; x = xp) { 393a3f10d08SKonstantin Belousov MPASS(!rl_e_is_marked(x)); 394a3f10d08SKonstantin Belousov xp = x->rl_q_free; 395a3f10d08SKonstantin Belousov MPASS(!rl_e_is_marked(xp)); 396a3f10d08SKonstantin Belousov uma_zfree_smr(rl_entry_zone, x); 397a3f10d08SKonstantin Belousov } 398a3f10d08SKonstantin Belousov } 399a3f10d08SKonstantin Belousov 400a3f10d08SKonstantin Belousov static void 4015badbeeaSKonstantin Belousov rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e) 4028f0e9130SKonstantin Belousov { 403c3158008SKonstantin Belousov bool sleepers; 404c3158008SKonstantin Belousov 405c3d8a931SKonstantin Belousov MPASS(lock != NULL && e != NULL); 406c3d8a931SKonstantin Belousov MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next))); 407c3d8a931SKonstantin Belousov MPASS(e->rl_q_owner == curthread); 4088f0e9130SKonstantin Belousov 4095badbeeaSKonstantin Belousov rl_e_mark(e); 410c3158008SKonstantin Belousov sleepers = lock->sleepers; 411c3d8a931SKonstantin Belousov lock->sleepers = false; 412c3158008SKonstantin Belousov if (sleepers) 413c3d8a931SKonstantin Belousov sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0); 4145badbeeaSKonstantin Belousov } 4155badbeeaSKonstantin Belousov 4165badbeeaSKonstantin Belousov void 4175badbeeaSKonstantin Belousov rangelock_unlock(struct rangelock *lock, void *cookie) 4185badbeeaSKonstantin Belousov { 4199ef425e5SKonstantin Belousov if (rangelock_cheat_unlock(lock, cookie)) 4209ef425e5SKonstantin Belousov return; 4219ef425e5SKonstantin Belousov 4225badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 4235badbeeaSKonstantin Belousov rangelock_unlock_int(lock, cookie); 424c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 4258f0e9130SKonstantin Belousov } 4268f0e9130SKonstantin Belousov 4278f0e9130SKonstantin Belousov /* 4285badbeeaSKonstantin Belousov * result: -1 if e1 before e2, or both locks are readers and e1 4295badbeeaSKonstantin Belousov * starts before or at e2 4305badbeeaSKonstantin Belousov * 1 if e1 after e2, or both locks are readers and e1 4315badbeeaSKonstantin Belousov * starts after e2 4325badbeeaSKonstantin Belousov * 0 if e1 and e2 overlap and at least one lock is writer 4338f0e9130SKonstantin Belousov */ 434c3d8a931SKonstantin Belousov static int 435c3d8a931SKonstantin Belousov rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2) 4368f0e9130SKonstantin Belousov { 4375badbeeaSKonstantin Belousov bool rds; 4385badbeeaSKonstantin Belousov 439c3d8a931SKonstantin Belousov if (e1 == NULL) 440c3d8a931SKonstantin Belousov return (1); 441c3d8a931SKonstantin Belousov if (e2->rl_q_start >= e1->rl_q_end) 442c3d8a931SKonstantin Belousov return (-1); 4435badbeeaSKonstantin Belousov rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2); 4445badbeeaSKonstantin Belousov if (e2->rl_q_start >= e1->rl_q_start && rds) 4455badbeeaSKonstantin Belousov return (-1); 4465badbeeaSKonstantin Belousov if (e1->rl_q_start >= e2->rl_q_end) 4475badbeeaSKonstantin Belousov return (1); 4485badbeeaSKonstantin Belousov if (e1->rl_q_start >= e2->rl_q_start && rds) 4495badbeeaSKonstantin Belousov return (1); 450c3d8a931SKonstantin Belousov return (0); 4518f0e9130SKonstantin Belousov } 4528f0e9130SKonstantin Belousov 453c3d8a931SKonstantin Belousov static void 454c3d8a931SKonstantin Belousov rl_insert_sleep(struct rangelock *lock) 4558f0e9130SKonstantin Belousov { 456c3d8a931SKonstantin Belousov smr_exit(rl_smr); 457c3d8a931SKonstantin Belousov DROP_GIANT(); 458c3d8a931SKonstantin Belousov lock->sleepers = true; 459c3d8a931SKonstantin Belousov sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0); 460c3d8a931SKonstantin Belousov sleepq_wait(&lock->sleepers, PRI_USER); 461c3d8a931SKonstantin Belousov PICKUP_GIANT(); 462c3d8a931SKonstantin Belousov smr_enter(rl_smr); 463c3d8a931SKonstantin Belousov } 4648f0e9130SKonstantin Belousov 465c3d8a931SKonstantin Belousov static bool 466c3d8a931SKonstantin Belousov rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old, 467c3d8a931SKonstantin Belousov struct rl_q_entry *new) 468c3d8a931SKonstantin Belousov { 4699467c1a6SKonstantin Belousov MPASS(!rl_e_is_marked(old)); 470c3d8a931SKonstantin Belousov return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old, 471c3d8a931SKonstantin Belousov (uintptr_t)new) != 0); 472c3d8a931SKonstantin Belousov } 4738f0e9130SKonstantin Belousov 4748a5b2db3SKonstantin Belousov static void 4758a5b2db3SKonstantin Belousov rangelock_noncheating_destroy(struct rangelock *lock) 4768a5b2db3SKonstantin Belousov { 4778a5b2db3SKonstantin Belousov struct rl_q_entry *cur, *free, *next, **prev; 4788a5b2db3SKonstantin Belousov 4798a5b2db3SKonstantin Belousov free = NULL; 4808a5b2db3SKonstantin Belousov again: 4818a5b2db3SKonstantin Belousov smr_enter(rl_smr); 4828a5b2db3SKonstantin Belousov prev = (struct rl_q_entry **)&lock->head; 4838a5b2db3SKonstantin Belousov cur = rl_q_load(prev); 4848a5b2db3SKonstantin Belousov MPASS(!rl_e_is_marked(cur)); 4858a5b2db3SKonstantin Belousov 4868a5b2db3SKonstantin Belousov for (;;) { 4878a5b2db3SKonstantin Belousov if (cur == NULL) 4888a5b2db3SKonstantin Belousov break; 4898a5b2db3SKonstantin Belousov if (rl_e_is_marked(cur)) 4908a5b2db3SKonstantin Belousov goto again; 4918a5b2db3SKonstantin Belousov 4928a5b2db3SKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 4938a5b2db3SKonstantin Belousov if (rl_e_is_marked(next)) { 4948a5b2db3SKonstantin Belousov next = rl_e_unmark(next); 4958a5b2db3SKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 4968a5b2db3SKonstantin Belousov #ifdef INVARIANTS 4978a5b2db3SKonstantin Belousov cur->rl_q_owner = NULL; 4988a5b2db3SKonstantin Belousov #endif 4998a5b2db3SKonstantin Belousov cur->rl_q_free = free; 5008a5b2db3SKonstantin Belousov free = cur; 5018a5b2db3SKonstantin Belousov cur = next; 5028a5b2db3SKonstantin Belousov continue; 5038a5b2db3SKonstantin Belousov } 5048a5b2db3SKonstantin Belousov smr_exit(rl_smr); 5058a5b2db3SKonstantin Belousov goto again; 5068a5b2db3SKonstantin Belousov } 5078a5b2db3SKonstantin Belousov 5088a5b2db3SKonstantin Belousov sleepq_lock(&lock->sleepers); 5098a5b2db3SKonstantin Belousov if (!rl_e_is_marked(cur)) { 5108a5b2db3SKonstantin Belousov rl_insert_sleep(lock); 5118a5b2db3SKonstantin Belousov goto again; 5128a5b2db3SKonstantin Belousov } 5138a5b2db3SKonstantin Belousov } 5148a5b2db3SKonstantin Belousov smr_exit(rl_smr); 5158a5b2db3SKonstantin Belousov rangelock_free_free(free); 5168a5b2db3SKonstantin Belousov } 5178a5b2db3SKonstantin Belousov 5185badbeeaSKonstantin Belousov enum RL_INSERT_RES { 5195badbeeaSKonstantin Belousov RL_TRYLOCK_FAILED, 5205badbeeaSKonstantin Belousov RL_LOCK_SUCCESS, 5215badbeeaSKonstantin Belousov RL_LOCK_RETRY, 5225badbeeaSKonstantin Belousov }; 5235badbeeaSKonstantin Belousov 5245badbeeaSKonstantin Belousov static enum RL_INSERT_RES 5255badbeeaSKonstantin Belousov rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 5265badbeeaSKonstantin Belousov struct rl_q_entry **free) 5275badbeeaSKonstantin Belousov { 5285badbeeaSKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 5295badbeeaSKonstantin Belousov 530a725d618SKonstantin Belousov again: 5315badbeeaSKonstantin Belousov prev = &e->rl_q_next; 5325badbeeaSKonstantin Belousov cur = rl_q_load(prev); 5335badbeeaSKonstantin Belousov MPASS(!rl_e_is_marked(cur)); /* nobody can unlock e yet */ 5345badbeeaSKonstantin Belousov for (;;) { 535e6651546SMark Johnston if (cur == NULL || cur->rl_q_start >= e->rl_q_end) 5365badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 5375badbeeaSKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 5385badbeeaSKonstantin Belousov if (rl_e_is_marked(next)) { 5395badbeeaSKonstantin Belousov next = rl_e_unmark(next); 5405badbeeaSKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 5415badbeeaSKonstantin Belousov cur->rl_q_free = *free; 5425badbeeaSKonstantin Belousov *free = cur; 5435badbeeaSKonstantin Belousov cur = next; 5445badbeeaSKonstantin Belousov continue; 5455badbeeaSKonstantin Belousov } 546a725d618SKonstantin Belousov goto again; 547a725d618SKonstantin Belousov } 5485badbeeaSKonstantin Belousov if (rl_e_is_rlock(cur)) { 5495badbeeaSKonstantin Belousov prev = &cur->rl_q_next; 5505badbeeaSKonstantin Belousov cur = rl_e_unmark_unchecked(rl_q_load(prev)); 5515badbeeaSKonstantin Belousov continue; 5525badbeeaSKonstantin Belousov } 5535badbeeaSKonstantin Belousov if (!rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 5545badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 5555badbeeaSKonstantin Belousov if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) { 5565badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 5575badbeeaSKonstantin Belousov continue; 5585badbeeaSKonstantin Belousov } 5595badbeeaSKonstantin Belousov rangelock_unlock_int(lock, e); 5605badbeeaSKonstantin Belousov if (trylock) { 5615badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 5625badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 5635badbeeaSKonstantin Belousov } 5645badbeeaSKonstantin Belousov rl_insert_sleep(lock); 5655badbeeaSKonstantin Belousov return (RL_LOCK_RETRY); 5665badbeeaSKonstantin Belousov } 5675badbeeaSKonstantin Belousov } 5685badbeeaSKonstantin Belousov } 5695badbeeaSKonstantin Belousov 5705badbeeaSKonstantin Belousov static enum RL_INSERT_RES 5715badbeeaSKonstantin Belousov rl_w_validate(struct rangelock *lock, struct rl_q_entry *e, 5725badbeeaSKonstantin Belousov bool trylock, struct rl_q_entry **free) 5735badbeeaSKonstantin Belousov { 5745badbeeaSKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 5755badbeeaSKonstantin Belousov 576a725d618SKonstantin Belousov again: 5779ef425e5SKonstantin Belousov prev = (struct rl_q_entry **)&lock->head; 5785badbeeaSKonstantin Belousov cur = rl_q_load(prev); 5795badbeeaSKonstantin Belousov MPASS(!rl_e_is_marked(cur)); /* head is not marked */ 5805badbeeaSKonstantin Belousov for (;;) { 5815badbeeaSKonstantin Belousov if (cur == e) 5825badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 5835badbeeaSKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 5845badbeeaSKonstantin Belousov if (rl_e_is_marked(next)) { 5855badbeeaSKonstantin Belousov next = rl_e_unmark(next); 5865badbeeaSKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 58740bffb7dSKonstantin Belousov cur->rl_q_free = *free; 5885badbeeaSKonstantin Belousov *free = cur; 5895badbeeaSKonstantin Belousov cur = next; 5905badbeeaSKonstantin Belousov continue; 5915badbeeaSKonstantin Belousov } 592a725d618SKonstantin Belousov goto again; 593a725d618SKonstantin Belousov } 5945badbeeaSKonstantin Belousov if (cur->rl_q_end <= e->rl_q_start) { 5955badbeeaSKonstantin Belousov prev = &cur->rl_q_next; 5965badbeeaSKonstantin Belousov cur = rl_e_unmark_unchecked(rl_q_load(prev)); 5975badbeeaSKonstantin Belousov continue; 5985badbeeaSKonstantin Belousov } 5995badbeeaSKonstantin Belousov sleepq_lock(&lock->sleepers); 600c4d8b246SKonstantin Belousov /* Reload after sleepq is locked */ 601c4d8b246SKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 602c4d8b246SKonstantin Belousov if (rl_e_is_marked(next)) { 603c4d8b246SKonstantin Belousov sleepq_release(&lock->sleepers); 604c4d8b246SKonstantin Belousov goto again; 605c4d8b246SKonstantin Belousov } 6065badbeeaSKonstantin Belousov rangelock_unlock_int(lock, e); 6075badbeeaSKonstantin Belousov if (trylock) { 6085badbeeaSKonstantin Belousov sleepq_release(&lock->sleepers); 6095badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 6105badbeeaSKonstantin Belousov } 6115badbeeaSKonstantin Belousov rl_insert_sleep(lock); 6125badbeeaSKonstantin Belousov return (RL_LOCK_RETRY); 6135badbeeaSKonstantin Belousov } 6145badbeeaSKonstantin Belousov } 6155badbeeaSKonstantin Belousov 6165badbeeaSKonstantin Belousov static enum RL_INSERT_RES 617c3d8a931SKonstantin Belousov rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock, 618c3d8a931SKonstantin Belousov struct rl_q_entry **free) 619c3d8a931SKonstantin Belousov { 620c3d8a931SKonstantin Belousov struct rl_q_entry *cur, *next, **prev; 621c3d8a931SKonstantin Belousov int r; 6228f0e9130SKonstantin Belousov 623c3d8a931SKonstantin Belousov again: 6249ef425e5SKonstantin Belousov prev = (struct rl_q_entry **)&lock->head; 6255badbeeaSKonstantin Belousov cur = rl_q_load(prev); 6265badbeeaSKonstantin Belousov if (cur == NULL && rl_q_cas(prev, NULL, e)) 6275badbeeaSKonstantin Belousov return (RL_LOCK_SUCCESS); 6288f0e9130SKonstantin Belousov 6295badbeeaSKonstantin Belousov for (;;) { 6305badbeeaSKonstantin Belousov if (cur != NULL) { 631c3d8a931SKonstantin Belousov if (rl_e_is_marked(cur)) 632c3d8a931SKonstantin Belousov goto again; 633c3d8a931SKonstantin Belousov 634c3d8a931SKonstantin Belousov next = rl_q_load(&cur->rl_q_next); 635c3d8a931SKonstantin Belousov if (rl_e_is_marked(next)) { 636c3d8a931SKonstantin Belousov next = rl_e_unmark(next); 637c3d8a931SKonstantin Belousov if (rl_q_cas(prev, cur, next)) { 638c3d8a931SKonstantin Belousov #ifdef INVARIANTS 639c3d8a931SKonstantin Belousov cur->rl_q_owner = NULL; 640c3d8a931SKonstantin Belousov #endif 641c3d8a931SKonstantin Belousov cur->rl_q_free = *free; 642c3d8a931SKonstantin Belousov *free = cur; 643c3d8a931SKonstantin Belousov cur = next; 644c3d8a931SKonstantin Belousov continue; 645c3d8a931SKonstantin Belousov } 646a725d618SKonstantin Belousov goto again; 647a725d618SKonstantin Belousov } 648c3d8a931SKonstantin Belousov } 649c3d8a931SKonstantin Belousov 6509467c1a6SKonstantin Belousov MPASS(!rl_e_is_marked(cur)); 651c3d8a931SKonstantin Belousov r = rl_e_compare(cur, e); 652c3d8a931SKonstantin Belousov if (r == -1) { 653c3d8a931SKonstantin Belousov prev = &cur->rl_q_next; 654c3d8a931SKonstantin Belousov cur = rl_q_load(prev); 655c3d8a931SKonstantin Belousov } else if (r == 0) { 656c3d8a931SKonstantin Belousov sleepq_lock(&lock->sleepers); 657c3d8a931SKonstantin Belousov if (__predict_false(rl_e_is_marked(rl_q_load( 658c3d8a931SKonstantin Belousov &cur->rl_q_next)))) { 659c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 660c3d8a931SKonstantin Belousov continue; 661c3d8a931SKonstantin Belousov } 662e3680954SRick Macklem if (trylock) { 663c3d8a931SKonstantin Belousov sleepq_release(&lock->sleepers); 6645badbeeaSKonstantin Belousov return (RL_TRYLOCK_FAILED); 665e3680954SRick Macklem } 666c3d8a931SKonstantin Belousov rl_insert_sleep(lock); 667c3d8a931SKonstantin Belousov /* e is still valid */ 668c3d8a931SKonstantin Belousov goto again; 669c3d8a931SKonstantin Belousov } else /* r == 1 */ { 670c3d8a931SKonstantin Belousov e->rl_q_next = cur; 671c3d8a931SKonstantin Belousov if (rl_q_cas(prev, cur, e)) { 672c3d8a931SKonstantin Belousov atomic_thread_fence_acq(); 6735badbeeaSKonstantin Belousov return (rl_e_is_rlock(e) ? 6745badbeeaSKonstantin Belousov rl_r_validate(lock, e, trylock, free) : 6755badbeeaSKonstantin Belousov rl_w_validate(lock, e, trylock, free)); 676e3680954SRick Macklem } 677c3d8a931SKonstantin Belousov /* Reset rl_q_next in case we hit fast path. */ 678c3d8a931SKonstantin Belousov e->rl_q_next = NULL; 679c3d8a931SKonstantin Belousov cur = rl_q_load(prev); 680c3d8a931SKonstantin Belousov } 681c3d8a931SKonstantin Belousov } 682c3d8a931SKonstantin Belousov } 683c3d8a931SKonstantin Belousov 684c3d8a931SKonstantin Belousov static struct rl_q_entry * 6855badbeeaSKonstantin Belousov rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start, 6865badbeeaSKonstantin Belousov vm_ooffset_t end, int locktype) 687c3d8a931SKonstantin Belousov { 688a3f10d08SKonstantin Belousov struct rl_q_entry *e, *free; 6899ef425e5SKonstantin Belousov void *cookie; 6905badbeeaSKonstantin Belousov enum RL_INSERT_RES res; 691c3d8a931SKonstantin Belousov 6929ef425e5SKonstantin Belousov if (rangelock_cheat_lock(lock, locktype, trylock, &cookie)) 6939ef425e5SKonstantin Belousov return (cookie); 6945badbeeaSKonstantin Belousov for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) { 695c3d8a931SKonstantin Belousov free = NULL; 6965badbeeaSKonstantin Belousov e = rlqentry_alloc(start, end, locktype); 697c3d8a931SKonstantin Belousov smr_enter(rl_smr); 698c3d8a931SKonstantin Belousov res = rl_insert(lock, e, trylock, &free); 699c3d8a931SKonstantin Belousov smr_exit(rl_smr); 7005badbeeaSKonstantin Belousov if (res == RL_TRYLOCK_FAILED) { 7015badbeeaSKonstantin Belousov MPASS(trylock); 702c3d8a931SKonstantin Belousov e->rl_q_free = free; 703c3d8a931SKonstantin Belousov free = e; 704c3d8a931SKonstantin Belousov e = NULL; 705c3d8a931SKonstantin Belousov } 706a3f10d08SKonstantin Belousov rangelock_free_free(free); 707ff1ae3b3SKonstantin Belousov } 708c3d8a931SKonstantin Belousov return (e); 7098f0e9130SKonstantin Belousov } 7108f0e9130SKonstantin Belousov 7118f0e9130SKonstantin Belousov void * 712c3d8a931SKonstantin Belousov rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 7138f0e9130SKonstantin Belousov { 7145badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ)); 715e3680954SRick Macklem } 716e3680954SRick Macklem 717e3680954SRick Macklem void * 718c3d8a931SKonstantin Belousov rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 719e3680954SRick Macklem { 7205badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ)); 7218f0e9130SKonstantin Belousov } 7228f0e9130SKonstantin Belousov 7238f0e9130SKonstantin Belousov void * 724c3d8a931SKonstantin Belousov rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 7258f0e9130SKonstantin Belousov { 7269ef425e5SKonstantin Belousov return (rangelock_lock_int(lock, false, start, end, RL_LOCK_WRITE)); 727e3680954SRick Macklem } 728e3680954SRick Macklem 729e3680954SRick Macklem void * 730c3d8a931SKonstantin Belousov rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end) 731e3680954SRick Macklem { 7325badbeeaSKonstantin Belousov return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE)); 7338f0e9130SKonstantin Belousov } 7343155f2f0SKyle Evans 7350b6b1c28SKonstantin Belousov /* 7360b6b1c28SKonstantin Belousov * If the caller asserts that it can obtain the range locks on the 7370b6b1c28SKonstantin Belousov * same lock simultaneously, switch to the non-cheat mode. Cheat mode 7380b6b1c28SKonstantin Belousov * cannot handle it, hanging in drain or trylock retries. 7390b6b1c28SKonstantin Belousov */ 7400b6b1c28SKonstantin Belousov void 7410b6b1c28SKonstantin Belousov rangelock_may_recurse(struct rangelock *lock) 7420b6b1c28SKonstantin Belousov { 7430b6b1c28SKonstantin Belousov uintptr_t v, x; 7440b6b1c28SKonstantin Belousov 7450b6b1c28SKonstantin Belousov v = atomic_load_ptr(&lock->head); 7460b6b1c28SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) 7470b6b1c28SKonstantin Belousov return; 7480b6b1c28SKonstantin Belousov 7490b6b1c28SKonstantin Belousov sleepq_lock(&lock->head); 7500b6b1c28SKonstantin Belousov for (;;) { 7510b6b1c28SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) == 0) { 7520b6b1c28SKonstantin Belousov sleepq_release(&lock->head); 7530b6b1c28SKonstantin Belousov return; 7540b6b1c28SKonstantin Belousov } 7550b6b1c28SKonstantin Belousov 7560b6b1c28SKonstantin Belousov /* Cheating and locked, drain. */ 7570b6b1c28SKonstantin Belousov if ((v & RL_CHEAT_WLOCKED) != 0 || 7580b6b1c28SKonstantin Belousov (v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER) { 7590b6b1c28SKonstantin Belousov x = v | RL_CHEAT_DRAINING; 7600b6b1c28SKonstantin Belousov if (atomic_fcmpset_ptr(&lock->head, &v, x) != 0) { 7610b6b1c28SKonstantin Belousov rangelock_cheat_drain(lock); 7620b6b1c28SKonstantin Belousov return; 7630b6b1c28SKonstantin Belousov } 7640b6b1c28SKonstantin Belousov continue; 7650b6b1c28SKonstantin Belousov } 7660b6b1c28SKonstantin Belousov 7670b6b1c28SKonstantin Belousov /* Cheating and unlocked, clear RL_CHEAT_CHEATING. */ 7680b6b1c28SKonstantin Belousov x = 0; 7690b6b1c28SKonstantin Belousov if (atomic_fcmpset_ptr(&lock->head, &v, x) != 0) { 7700b6b1c28SKonstantin Belousov sleepq_release(&lock->head); 7710b6b1c28SKonstantin Belousov return; 7720b6b1c28SKonstantin Belousov } 7730b6b1c28SKonstantin Belousov } 7740b6b1c28SKonstantin Belousov } 7750b6b1c28SKonstantin Belousov 7763155f2f0SKyle Evans #ifdef INVARIANT_SUPPORT 7773155f2f0SKyle Evans void 7783155f2f0SKyle Evans _rangelock_cookie_assert(void *cookie, int what, const char *file, int line) 7793155f2f0SKyle Evans { 7803155f2f0SKyle Evans } 7813155f2f0SKyle Evans #endif /* INVARIANT_SUPPORT */ 782c3d8a931SKonstantin Belousov 783c3d8a931SKonstantin Belousov #include "opt_ddb.h" 784c3d8a931SKonstantin Belousov #ifdef DDB 785c3d8a931SKonstantin Belousov #include <ddb/ddb.h> 786c3d8a931SKonstantin Belousov 787c3d8a931SKonstantin Belousov DB_SHOW_COMMAND(rangelock, db_show_rangelock) 788c3d8a931SKonstantin Belousov { 789c3d8a931SKonstantin Belousov struct rangelock *lock; 790c3d8a931SKonstantin Belousov struct rl_q_entry *e, *x; 7919ef425e5SKonstantin Belousov uintptr_t v; 792c3d8a931SKonstantin Belousov 793c3d8a931SKonstantin Belousov if (!have_addr) { 794c3d8a931SKonstantin Belousov db_printf("show rangelock addr\n"); 795c3d8a931SKonstantin Belousov return; 796c3d8a931SKonstantin Belousov } 797c3d8a931SKonstantin Belousov 798c3d8a931SKonstantin Belousov lock = (struct rangelock *)addr; 799c3d8a931SKonstantin Belousov db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers); 8009ef425e5SKonstantin Belousov v = lock->head; 8019ef425e5SKonstantin Belousov if ((v & RL_CHEAT_CHEATING) != 0) { 8029ef425e5SKonstantin Belousov db_printf(" cheating head %#jx\n", (uintmax_t)v); 8039ef425e5SKonstantin Belousov return; 8049ef425e5SKonstantin Belousov } 8059ef425e5SKonstantin Belousov for (e = (struct rl_q_entry *)(lock->head);;) { 806c3d8a931SKonstantin Belousov x = rl_e_is_marked(e) ? rl_e_unmark(e) : e; 807c3d8a931SKonstantin Belousov if (x == NULL) 808c3d8a931SKonstantin Belousov break; 809c3d8a931SKonstantin Belousov db_printf(" entry %p marked %d %d start %#jx end %#jx " 810c3d8a931SKonstantin Belousov "flags %x next %p", 811c3d8a931SKonstantin Belousov e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next), 812c3d8a931SKonstantin Belousov x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next); 813c3d8a931SKonstantin Belousov #ifdef INVARIANTS 814c3d8a931SKonstantin Belousov db_printf(" owner %p (%d)", x->rl_q_owner, 815c3d8a931SKonstantin Belousov x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1); 816c3d8a931SKonstantin Belousov #endif 817c3d8a931SKonstantin Belousov db_printf("\n"); 818c3d8a931SKonstantin Belousov e = x->rl_q_next; 819c3d8a931SKonstantin Belousov } 820c3d8a931SKonstantin Belousov } 821c3d8a931SKonstantin Belousov 822c3d8a931SKonstantin Belousov #endif /* DDB */ 823