1 /* $OpenBSD: subr_witness.c,v 1.54 2024/09/25 18:24:13 bluhm Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 Isilon Systems, Inc. 5 * Copyright (c) 2008 Ilya Maykov <ivmaykov@gmail.com> 6 * Copyright (c) 1998 Berkeley Software Design, Inc. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Berkeley Software Design Inc's name may not be used to endorse or 18 * promote products derived from this software without specific prior 19 * written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * from BSDI Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp 34 * and BSDI Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp 35 */ 36 37 /* 38 * Implementation of the `witness' lock verifier. Originally implemented for 39 * mutexes in BSD/OS. Extended to handle generic lock objects and lock 40 * classes in FreeBSD. 41 */ 42 43 /* 44 * Main Entry: witness 45 * Pronunciation: 'wit-n&s 46 * Function: noun 47 * Etymology: Middle English witnesse, from Old English witnes knowledge, 48 * testimony, witness, from 2wit 49 * Date: before 12th century 50 * 1 : attestation of a fact or event : TESTIMONY 51 * 2 : one that gives evidence; specifically : one who testifies in 52 * a cause or before a judicial tribunal 53 * 3 : one asked to be present at a transaction so as to be able to 54 * testify to its having taken place 55 * 4 : one who has personal knowledge of something 56 * 5 a : something serving as evidence or proof : SIGN 57 * b : public affirmation by word or example of usually 58 * religious faith or conviction <the heroic witness to divine 59 * life -- Pilot> 60 * 6 capitalized : a member of the Jehovah's Witnesses 61 */ 62 63 /* 64 * Special rules concerning Giant and lock orders: 65 * 66 * 1) Giant must be acquired before any other mutexes. Stated another way, 67 * no other mutex may be held when Giant is acquired. 68 * 69 * 2) Giant must be released when blocking on a sleepable lock. 70 * 71 * This rule is less obvious, but is a result of Giant providing the same 72 * semantics as spl(). Basically, when a thread sleeps, it must release 73 * Giant. When a thread blocks on a sleepable lock, it sleeps. Hence rule 74 * 2). 75 * 76 * 3) Giant may be acquired before or after sleepable locks. 77 * 78 * This rule is also not quite as obvious. Giant may be acquired after 79 * a sleepable lock because it is a non-sleepable lock and non-sleepable 80 * locks may always be acquired while holding a sleepable lock. The second 81 * case, Giant before a sleepable lock, follows from rule 2) above. Suppose 82 * you have two threads T1 and T2 and a sleepable lock X. Suppose that T1 83 * acquires X and blocks on Giant. Then suppose that T2 acquires Giant and 84 * blocks on X. When T2 blocks on X, T2 will release Giant allowing T1 to 85 * execute. Thus, acquiring Giant both before and after a sleepable lock 86 * will not result in a lock order reversal. 87 */ 88 89 #include <sys/param.h> 90 #include <sys/systm.h> 91 #include <sys/kernel.h> 92 #include <sys/malloc.h> 93 #ifdef MULTIPROCESSOR 94 #include <sys/mplock.h> 95 #endif 96 #include <sys/mutex.h> 97 #include <sys/percpu.h> 98 #include <sys/proc.h> 99 #include <sys/sched.h> 100 #include <sys/stacktrace.h> 101 #include <sys/stdint.h> 102 #include <sys/sysctl.h> 103 #include <sys/syslog.h> 104 #include <sys/witness.h> 105 106 #include <machine/cpu.h> 107 108 #include <uvm/uvm_extern.h> /* uvm_pageboot_alloc */ 109 110 #ifndef DDB 111 #error "DDB is required for WITNESS" 112 #endif 113 114 #include <machine/db_machdep.h> 115 116 #include <ddb/db_access.h> 117 #include <ddb/db_var.h> 118 #include <ddb/db_output.h> 119 120 #define LI_RECURSEMASK 0x0000ffff /* Recursion depth of lock instance. */ 121 #define LI_EXCLUSIVE 0x00010000 /* Exclusive lock instance. */ 122 #define LI_NORELEASE 0x00020000 /* Lock not allowed to be released. */ 123 124 #ifndef WITNESS_COUNT 125 #define WITNESS_COUNT 1536 126 #endif 127 #define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */ 128 #define WITNESS_PENDLIST (1024 + MAXCPUS) 129 130 /* Allocate 256 KB of stack data space */ 131 #define WITNESS_LO_DATA_COUNT 2048 132 133 /* Prime, gives load factor of ~2 at full load */ 134 #define WITNESS_LO_HASH_SIZE 1021 135 136 /* 137 * XXX: This is somewhat bogus, as we assume here that at most 2048 threads 138 * will hold LOCK_NCHILDREN locks. We handle failure ok, and we should 139 * probably be safe for the most part, but it's still a SWAG. 140 */ 141 #define LOCK_NCHILDREN 5 142 #define LOCK_CHILDCOUNT 2048 143 144 #define FULLGRAPH_SBUF_SIZE 512 145 146 /* 147 * These flags go in the witness relationship matrix and describe the 148 * relationship between any two struct witness objects. 149 */ 150 #define WITNESS_UNRELATED 0x00 /* No lock order relation. */ 151 #define WITNESS_PARENT 0x01 /* Parent, aka direct ancestor. */ 152 #define WITNESS_ANCESTOR 0x02 /* Direct or indirect ancestor. */ 153 #define WITNESS_CHILD 0x04 /* Child, aka direct descendant. */ 154 #define WITNESS_DESCENDANT 0x08 /* Direct or indirect descendant. */ 155 #define WITNESS_ANCESTOR_MASK (WITNESS_PARENT | WITNESS_ANCESTOR) 156 #define WITNESS_DESCENDANT_MASK (WITNESS_CHILD | WITNESS_DESCENDANT) 157 #define WITNESS_RELATED_MASK \ 158 (WITNESS_ANCESTOR_MASK | WITNESS_DESCENDANT_MASK) 159 #define WITNESS_REVERSAL 0x10 /* A lock order reversal has been 160 * observed. */ 161 #define WITNESS_RESERVED1 0x20 /* Unused flag, reserved. */ 162 #define WITNESS_RESERVED2 0x40 /* Unused flag, reserved. */ 163 #define WITNESS_LOCK_ORDER_KNOWN 0x80 /* This lock order is known. */ 164 165 /* Descendant to ancestor flags */ 166 #define WITNESS_DTOA(x) (((x) & WITNESS_RELATED_MASK) >> 2) 167 168 /* Ancestor to descendant flags */ 169 #define WITNESS_ATOD(x) (((x) & WITNESS_RELATED_MASK) << 2) 170 171 #define WITNESS_INDEX_ASSERT(i) \ 172 KASSERT((i) > 0 && (i) <= w_max_used_index && (i) < witness_count) 173 174 /* 175 * Lock classes. Each lock has a class which describes characteristics 176 * common to all types of locks of a given class. 177 * 178 * Spin locks in general must always protect against preemption, as it is 179 * an error to perform any type of context switch while holding a spin lock. 180 * Also, for an individual lock to be recursable, its class must allow 181 * recursion and the lock itself must explicitly allow recursion. 182 */ 183 184 struct lock_class { 185 const char *lc_name; 186 u_int lc_flags; 187 }; 188 189 union lock_stack { 190 union lock_stack *ls_next; 191 struct stacktrace ls_stack; 192 }; 193 194 #define LC_SLEEPLOCK 0x00000001 /* Sleep lock. */ 195 #define LC_SPINLOCK 0x00000002 /* Spin lock. */ 196 #define LC_SLEEPABLE 0x00000004 /* Sleeping allowed with this lock. */ 197 #define LC_RECURSABLE 0x00000008 /* Locks of this type may recurse. */ 198 #define LC_UPGRADABLE 0x00000010 /* Upgrades and downgrades permitted. */ 199 200 /* 201 * Lock instances. A lock instance is the data associated with a lock while 202 * it is held by witness. For example, a lock instance will hold the 203 * recursion count of a lock. Lock instances are held in lists. Spin locks 204 * are held in a per-cpu list while sleep locks are held in per-thread list. 205 */ 206 struct lock_instance { 207 struct lock_object *li_lock; 208 union lock_stack *li_stack; 209 u_int li_flags; 210 }; 211 212 /* 213 * A simple list type used to build the list of locks held by a thread 214 * or CPU. We can't simply embed the list in struct lock_object since a 215 * lock may be held by more than one thread if it is a shared lock. Locks 216 * are added to the head of the list, so we fill up each list entry from 217 * "the back" logically. To ease some of the arithmetic, we actually fill 218 * in each list entry the normal way (children[0] then children[1], etc.) but 219 * when we traverse the list we read children[count-1] as the first entry 220 * down to children[0] as the final entry. 221 */ 222 struct lock_list_entry { 223 struct lock_list_entry *ll_next; 224 struct lock_instance ll_children[LOCK_NCHILDREN]; 225 int ll_count; 226 }; 227 228 /* 229 * The main witness structure. One of these per named lock type in the system 230 * (for example, "vnode interlock"). 231 */ 232 struct witness { 233 const struct lock_type *w_type; 234 const char *w_subtype; 235 uint32_t w_index; /* Index in the relationship matrix */ 236 struct lock_class *w_class; 237 SLIST_ENTRY(witness) w_list; /* List of all witnesses. */ 238 SLIST_ENTRY(witness) w_typelist; /* Witnesses of a type. */ 239 SLIST_ENTRY(witness) w_hash_next; /* Linked list in 240 * hash buckets. */ 241 uint16_t w_num_ancestors; /* direct/indirect 242 * ancestor count */ 243 uint16_t w_num_descendants; /* direct/indirect 244 * descendant count */ 245 int16_t w_ddb_level; 246 unsigned w_acquired:1; 247 unsigned w_displayed:1; 248 unsigned w_reversed:1; 249 }; 250 251 SLIST_HEAD(witness_list, witness); 252 253 /* 254 * The witness hash table. Keys are witness names (const char *), elements are 255 * witness objects (struct witness *). 256 */ 257 struct witness_hash { 258 struct witness_list wh_array[WITNESS_HASH_SIZE]; 259 uint32_t wh_size; 260 uint32_t wh_count; 261 }; 262 263 /* 264 * Key type for the lock order data hash table. 265 */ 266 struct witness_lock_order_key { 267 uint16_t from; 268 uint16_t to; 269 }; 270 271 struct witness_lock_order_data { 272 struct stacktrace wlod_stack; 273 struct witness_lock_order_key wlod_key; 274 struct witness_lock_order_data *wlod_next; 275 }; 276 277 /* 278 * The witness lock order data hash table. Keys are witness index tuples 279 * (struct witness_lock_order_key), elements are lock order data objects 280 * (struct witness_lock_order_data). 281 */ 282 struct witness_lock_order_hash { 283 struct witness_lock_order_data *wloh_array[WITNESS_LO_HASH_SIZE]; 284 u_int wloh_size; 285 u_int wloh_count; 286 }; 287 288 struct witness_pendhelp { 289 const struct lock_type *wh_type; 290 struct lock_object *wh_lock; 291 }; 292 293 struct witness_cpu { 294 struct lock_list_entry *wc_spinlocks; 295 struct lock_list_entry *wc_lle_cache; 296 union lock_stack *wc_stk_cache; 297 unsigned int wc_lle_count; 298 unsigned int wc_stk_count; 299 } __aligned(CACHELINESIZE); 300 301 #define WITNESS_LLE_CACHE_MAX 8 302 #define WITNESS_STK_CACHE_MAX (WITNESS_LLE_CACHE_MAX * LOCK_NCHILDREN) 303 304 struct witness_cpu witness_cpu[MAXCPUS]; 305 306 /* 307 * Returns 0 if one of the locks is a spin lock and the other is not. 308 * Returns 1 otherwise. 309 */ 310 static __inline int 311 witness_lock_type_equal(struct witness *w1, struct witness *w2) 312 { 313 314 return ((w1->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) == 315 (w2->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK))); 316 } 317 318 static __inline int 319 witness_lock_order_key_equal(const struct witness_lock_order_key *a, 320 const struct witness_lock_order_key *b) 321 { 322 323 return (a->from == b->from && a->to == b->to); 324 } 325 326 static int _isitmyx(struct witness *w1, struct witness *w2, int rmask, 327 const char *fname); 328 static void adopt(struct witness *parent, struct witness *child); 329 static struct witness *enroll(const struct lock_type *, const char *, 330 struct lock_class *); 331 static struct lock_instance *find_instance(struct lock_list_entry *list, 332 const struct lock_object *lock); 333 static int isitmychild(struct witness *parent, struct witness *child); 334 static int isitmydescendant(struct witness *parent, struct witness *child); 335 static void itismychild(struct witness *parent, struct witness *child); 336 #ifdef DDB 337 static void db_witness_add_fullgraph(struct witness *parent); 338 static void witness_ddb_compute_levels(void); 339 static void witness_ddb_display(int(*)(const char *fmt, ...)); 340 static void witness_ddb_display_descendants(int(*)(const char *fmt, ...), 341 struct witness *, int indent); 342 static void witness_ddb_display_list(int(*prnt)(const char *fmt, ...), 343 struct witness_list *list); 344 static void witness_ddb_level_descendants(struct witness *parent, int l); 345 static void witness_ddb_list(struct proc *td); 346 #endif 347 static int witness_alloc_stacks(void); 348 static void witness_debugger(int dump); 349 static void witness_free(struct witness *m); 350 static struct witness *witness_get(void); 351 static uint32_t witness_hash_djb2(const uint8_t *key, uint32_t size); 352 static struct witness *witness_hash_get(const struct lock_type *, 353 const char *); 354 static void witness_hash_put(struct witness *w); 355 static void witness_init_hash_tables(void); 356 static void witness_increment_graph_generation(void); 357 static int witness_list_locks(struct lock_list_entry **, 358 int (*)(const char *, ...)); 359 static void witness_lock_list_free(struct lock_list_entry *lle); 360 static struct lock_list_entry *witness_lock_list_get(void); 361 static void witness_lock_stack_free(union lock_stack *stack); 362 static union lock_stack *witness_lock_stack_get(void); 363 static int witness_lock_order_add(struct witness *parent, 364 struct witness *child); 365 static int witness_lock_order_check(struct witness *parent, 366 struct witness *child); 367 static struct witness_lock_order_data *witness_lock_order_get( 368 struct witness *parent, 369 struct witness *child); 370 static void witness_list_lock(struct lock_instance *instance, 371 int (*prnt)(const char *fmt, ...)); 372 static void witness_print_cycle(int (*prnt)(const char *fmt, ...), 373 struct witness *parent, struct witness *child); 374 static void witness_print_cycle_edge(int (*prnt)(const char *fmt, ...), 375 struct witness *parent, struct witness *child, 376 int step, int last); 377 static int witness_search(struct witness *w, struct witness *target, 378 struct witness **path, int depth, int *remaining); 379 static void witness_setflag(struct lock_object *lock, int flag, int set); 380 381 /* 382 * If set to 0, lock order checking is disabled. If set to -1, 383 * witness is completely disabled. Otherwise witness performs full 384 * lock order checking for all locks. At runtime, lock order checking 385 * may be toggled. However, witness cannot be reenabled once it is 386 * completely disabled. 387 */ 388 #ifdef WITNESS_WATCH 389 static int witness_watch = 3; 390 #else 391 static int witness_watch = 2; 392 #endif 393 394 #ifdef WITNESS_LOCKTRACE 395 static int witness_locktrace = 1; 396 #else 397 static int witness_locktrace = 0; 398 #endif 399 400 int witness_count = WITNESS_COUNT; 401 int witness_uninitialized_report = 5; 402 403 static struct mutex w_mtx; 404 static struct rwlock w_ctlock = RWLOCK_INITIALIZER("w_ctlock"); 405 406 /* w_list */ 407 static struct witness_list w_free = SLIST_HEAD_INITIALIZER(w_free); 408 static struct witness_list w_all = SLIST_HEAD_INITIALIZER(w_all); 409 410 /* w_typelist */ 411 static struct witness_list w_spin = SLIST_HEAD_INITIALIZER(w_spin); 412 static struct witness_list w_sleep = SLIST_HEAD_INITIALIZER(w_sleep); 413 414 /* lock list */ 415 static struct lock_list_entry *w_lock_list_free = NULL; 416 static struct witness_pendhelp pending_locks[WITNESS_PENDLIST]; 417 static u_int pending_cnt; 418 419 static int w_free_cnt, w_spin_cnt, w_sleep_cnt; 420 421 static struct witness *w_data; 422 static uint8_t **w_rmatrix; 423 static struct lock_list_entry *w_locklistdata; 424 static struct witness_hash w_hash; /* The witness hash table. */ 425 426 /* The lock order data hash */ 427 static struct witness_lock_order_data *w_lodata; 428 static struct witness_lock_order_data *w_lofree = NULL; 429 static struct witness_lock_order_hash w_lohash; 430 static int w_max_used_index = 0; 431 static unsigned int w_generation = 0; 432 433 static union lock_stack *w_lock_stack_free; 434 static unsigned int w_lock_stack_num; 435 436 static struct lock_class lock_class_kernel_lock = { 437 .lc_name = "kernel_lock", 438 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_SLEEPABLE 439 }; 440 441 static struct lock_class lock_class_mutex = { 442 .lc_name = "mutex", 443 .lc_flags = LC_SPINLOCK 444 }; 445 446 static struct lock_class lock_class_rwlock = { 447 .lc_name = "rwlock", 448 .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_UPGRADABLE 449 }; 450 451 static struct lock_class lock_class_rrwlock = { 452 .lc_name = "rrwlock", 453 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_SLEEPABLE | 454 LC_UPGRADABLE 455 }; 456 457 static struct lock_class *lock_classes[] = { 458 &lock_class_kernel_lock, 459 &lock_class_mutex, 460 &lock_class_rwlock, 461 &lock_class_rrwlock, 462 }; 463 464 /* 465 * This global is set to 0 once it becomes safe to use the witness code. 466 */ 467 static int witness_cold = 1; 468 469 /* 470 * This global is set to 1 once the static lock orders have been enrolled 471 * so that a warning can be issued for any spin locks enrolled later. 472 */ 473 static int witness_spin_warn = 0; 474 475 /* 476 * The WITNESS-enabled diagnostic code. Note that the witness code does 477 * assume that the early boot is single-threaded at least until after this 478 * routine is completed. 479 */ 480 void 481 witness_initialize(void) 482 { 483 struct lock_object *lock; 484 union lock_stack *stacks; 485 struct witness *w; 486 int i, s; 487 488 w_data = (void *)uvm_pageboot_alloc(sizeof(struct witness) * 489 witness_count); 490 memset(w_data, 0, sizeof(struct witness) * witness_count); 491 492 w_rmatrix = (void *)uvm_pageboot_alloc(sizeof(*w_rmatrix) * 493 (witness_count + 1)); 494 495 for (i = 0; i < witness_count + 1; i++) { 496 w_rmatrix[i] = (void *)uvm_pageboot_alloc( 497 sizeof(*w_rmatrix[i]) * (witness_count + 1)); 498 memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) * 499 (witness_count + 1)); 500 } 501 502 mtx_init_flags(&w_mtx, IPL_HIGH, "witness lock", MTX_NOWITNESS); 503 for (i = witness_count - 1; i >= 0; i--) { 504 w = &w_data[i]; 505 memset(w, 0, sizeof(*w)); 506 w_data[i].w_index = i; /* Witness index never changes. */ 507 witness_free(w); 508 } 509 KASSERTMSG(SLIST_FIRST(&w_free)->w_index == 0, 510 "%s: Invalid list of free witness objects", __func__); 511 512 /* Witness with index 0 is not used to aid in debugging. */ 513 SLIST_REMOVE_HEAD(&w_free, w_list); 514 w_free_cnt--; 515 516 for (i = 0; i < witness_count; i++) { 517 memset(w_rmatrix[i], 0, sizeof(*w_rmatrix[i]) * 518 (witness_count + 1)); 519 } 520 521 if (witness_locktrace) { 522 w_lock_stack_num = LOCK_CHILDCOUNT * LOCK_NCHILDREN; 523 stacks = (void *)uvm_pageboot_alloc(sizeof(*stacks) * 524 w_lock_stack_num); 525 } 526 527 w_locklistdata = (void *)uvm_pageboot_alloc( 528 sizeof(struct lock_list_entry) * LOCK_CHILDCOUNT); 529 memset(w_locklistdata, 0, sizeof(struct lock_list_entry) * 530 LOCK_CHILDCOUNT); 531 532 s = splhigh(); 533 for (i = 0; i < w_lock_stack_num; i++) 534 witness_lock_stack_free(&stacks[i]); 535 for (i = 0; i < LOCK_CHILDCOUNT; i++) 536 witness_lock_list_free(&w_locklistdata[i]); 537 splx(s); 538 witness_init_hash_tables(); 539 witness_spin_warn = 1; 540 541 /* Iterate through all locks and add them to witness. */ 542 for (i = 0; pending_locks[i].wh_lock != NULL; i++) { 543 lock = pending_locks[i].wh_lock; 544 KASSERTMSG(lock->lo_flags & LO_WITNESS, 545 "%s: lock %s is on pending list but not LO_WITNESS", 546 __func__, lock->lo_name); 547 lock->lo_witness = enroll(pending_locks[i].wh_type, 548 lock->lo_name, LOCK_CLASS(lock)); 549 } 550 551 /* Mark the witness code as being ready for use. */ 552 witness_cold = 0; 553 } 554 555 void 556 witness_init(struct lock_object *lock, const struct lock_type *type) 557 { 558 struct lock_class *class; 559 560 /* Various sanity checks. */ 561 class = LOCK_CLASS(lock); 562 if ((lock->lo_flags & LO_RECURSABLE) != 0 && 563 (class->lc_flags & LC_RECURSABLE) == 0) 564 panic("%s: lock (%s) %s can not be recursable", 565 __func__, class->lc_name, lock->lo_name); 566 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 567 (class->lc_flags & LC_SLEEPABLE) == 0) 568 panic("%s: lock (%s) %s can not be sleepable", 569 __func__, class->lc_name, lock->lo_name); 570 if ((lock->lo_flags & LO_UPGRADABLE) != 0 && 571 (class->lc_flags & LC_UPGRADABLE) == 0) 572 panic("%s: lock (%s) %s can not be upgradable", 573 __func__, class->lc_name, lock->lo_name); 574 575 /* 576 * If we shouldn't watch this lock, then just clear lo_witness. 577 * Record the type in case the lock becomes watched later. 578 * Otherwise, if witness_cold is set, then it is too early to 579 * enroll this lock, so defer it to witness_initialize() by adding 580 * it to the pending_locks list. If it is not too early, then enroll 581 * the lock now. 582 */ 583 if (witness_watch < 1 || panicstr != NULL || db_active || 584 (lock->lo_flags & LO_WITNESS) == 0) { 585 lock->lo_witness = NULL; 586 lock->lo_type = type; 587 } else if (witness_cold) { 588 pending_locks[pending_cnt].wh_lock = lock; 589 pending_locks[pending_cnt++].wh_type = type; 590 if (pending_cnt > WITNESS_PENDLIST) 591 panic("%s: pending locks list is too small, " 592 "increase WITNESS_PENDLIST", 593 __func__); 594 } else 595 lock->lo_witness = enroll(type, lock->lo_name, class); 596 } 597 598 static inline int 599 is_kernel_lock(const struct lock_object *lock) 600 { 601 #ifdef MULTIPROCESSOR 602 return (lock == &kernel_lock.mpl_lock_obj); 603 #else 604 return (0); 605 #endif 606 } 607 608 #ifdef DDB 609 static void 610 witness_ddb_compute_levels(void) 611 { 612 struct witness *w; 613 614 /* 615 * First clear all levels. 616 */ 617 SLIST_FOREACH(w, &w_all, w_list) 618 w->w_ddb_level = -1; 619 620 /* 621 * Look for locks with no parents and level all their descendants. 622 */ 623 SLIST_FOREACH(w, &w_all, w_list) { 624 /* If the witness has ancestors (is not a root), skip it. */ 625 if (w->w_num_ancestors > 0) 626 continue; 627 witness_ddb_level_descendants(w, 0); 628 } 629 } 630 631 static void 632 witness_ddb_level_descendants(struct witness *w, int l) 633 { 634 int i; 635 636 if (w->w_ddb_level >= l) 637 return; 638 639 w->w_ddb_level = l; 640 l++; 641 642 for (i = 1; i <= w_max_used_index; i++) { 643 if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) 644 witness_ddb_level_descendants(&w_data[i], l); 645 } 646 } 647 648 static void 649 witness_ddb_display_descendants(int(*prnt)(const char *fmt, ...), 650 struct witness *w, int indent) 651 { 652 int i; 653 654 for (i = 0; i < indent; i++) 655 prnt(" "); 656 prnt("%s (%s) (type: %s, depth: %d)", 657 w->w_subtype, w->w_type->lt_name, 658 w->w_class->lc_name, w->w_ddb_level); 659 if (w->w_displayed) { 660 prnt(" -- (already displayed)\n"); 661 return; 662 } 663 w->w_displayed = 1; 664 if (!w->w_acquired) 665 prnt(" -- never acquired\n"); 666 else 667 prnt("\n"); 668 indent++; 669 WITNESS_INDEX_ASSERT(w->w_index); 670 for (i = 1; i <= w_max_used_index; i++) { 671 if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) 672 witness_ddb_display_descendants(prnt, &w_data[i], 673 indent); 674 } 675 } 676 677 static void 678 witness_ddb_display_list(int(*prnt)(const char *fmt, ...), 679 struct witness_list *list) 680 { 681 struct witness *w; 682 683 SLIST_FOREACH(w, list, w_typelist) { 684 if (!w->w_acquired || w->w_ddb_level > 0) 685 continue; 686 687 /* This lock has no ancestors - display its descendants. */ 688 witness_ddb_display_descendants(prnt, w, 0); 689 } 690 } 691 692 static void 693 witness_ddb_display(int(*prnt)(const char *fmt, ...)) 694 { 695 struct witness *w; 696 697 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__); 698 witness_ddb_compute_levels(); 699 700 /* Clear all the displayed flags. */ 701 SLIST_FOREACH(w, &w_all, w_list) 702 w->w_displayed = 0; 703 704 /* 705 * First, handle sleep locks which have been acquired at least 706 * once. 707 */ 708 prnt("Sleep locks:\n"); 709 witness_ddb_display_list(prnt, &w_sleep); 710 711 /* 712 * Now do spin locks which have been acquired at least once. 713 */ 714 prnt("\nSpin locks:\n"); 715 witness_ddb_display_list(prnt, &w_spin); 716 717 /* 718 * Finally, any locks which have not been acquired yet. 719 */ 720 prnt("\nLocks which were never acquired:\n"); 721 SLIST_FOREACH(w, &w_all, w_list) { 722 if (w->w_acquired) 723 continue; 724 prnt("%s (%s) (type: %s, depth: %d)\n", 725 w->w_subtype, w->w_type->lt_name, 726 w->w_class->lc_name, w->w_ddb_level); 727 } 728 } 729 #endif /* DDB */ 730 731 int 732 witness_defineorder(struct lock_object *lock1, struct lock_object *lock2) 733 { 734 735 if (witness_watch < 0 || panicstr != NULL || db_active) 736 return (0); 737 738 /* Require locks that witness knows about. */ 739 if (lock1 == NULL || lock1->lo_witness == NULL || lock2 == NULL || 740 lock2->lo_witness == NULL) 741 return (EINVAL); 742 743 MUTEX_ASSERT_UNLOCKED(&w_mtx); 744 mtx_enter(&w_mtx); 745 746 /* 747 * If we already have either an explicit or implied lock order that 748 * is the other way around, then return an error. 749 */ 750 if (witness_watch && 751 isitmydescendant(lock2->lo_witness, lock1->lo_witness)) { 752 mtx_leave(&w_mtx); 753 return (EINVAL); 754 } 755 756 /* Try to add the new order. */ 757 itismychild(lock1->lo_witness, lock2->lo_witness); 758 mtx_leave(&w_mtx); 759 return (0); 760 } 761 762 void 763 witness_checkorder(struct lock_object *lock, int flags, 764 struct lock_object *interlock) 765 { 766 struct lock_list_entry *lock_list, *lle; 767 struct lock_instance *lock1, *lock2, *plock; 768 struct lock_class *class, *iclass; 769 struct witness *w, *w1; 770 int i, j, s; 771 772 if (witness_cold || witness_watch < 1 || panicstr != NULL || db_active) 773 return; 774 775 if ((lock->lo_flags & LO_INITIALIZED) == 0) { 776 if (witness_uninitialized_report > 0) { 777 witness_uninitialized_report--; 778 printf("witness: lock_object uninitialized: %p\n", lock); 779 witness_debugger(1); 780 } 781 lock->lo_flags |= LO_INITIALIZED; 782 } 783 784 if ((lock->lo_flags & LO_WITNESS) == 0) 785 return; 786 787 w = lock->lo_witness; 788 class = LOCK_CLASS(lock); 789 790 if (w == NULL) 791 w = lock->lo_witness = 792 enroll(lock->lo_type, lock->lo_name, class); 793 794 if (class->lc_flags & LC_SLEEPLOCK) { 795 struct proc *p; 796 797 /* 798 * Since spin locks include a critical section, this check 799 * implicitly enforces a lock order of all sleep locks before 800 * all spin locks. 801 */ 802 lock_list = witness_cpu[cpu_number()].wc_spinlocks; 803 if (lock_list != NULL && lock_list->ll_count > 0) { 804 panic("acquiring blockable sleep lock with " 805 "spinlock or critical section held (%s) %s", 806 class->lc_name, lock->lo_name); 807 } 808 809 /* 810 * If this is the first lock acquired then just return as 811 * no order checking is needed. 812 */ 813 p = curproc; 814 if (p == NULL) 815 return; 816 lock_list = p->p_sleeplocks; 817 if (lock_list == NULL || lock_list->ll_count == 0) 818 return; 819 } else { 820 821 /* 822 * If this is the first lock, just return as no order 823 * checking is needed. 824 */ 825 lock_list = witness_cpu[cpu_number()].wc_spinlocks; 826 if (lock_list == NULL || lock_list->ll_count == 0) 827 return; 828 } 829 830 s = splhigh(); 831 832 /* 833 * Check to see if we are recursing on a lock we already own. If 834 * so, make sure that we don't mismatch exclusive and shared lock 835 * acquires. 836 */ 837 lock1 = find_instance(lock_list, lock); 838 if (lock1 != NULL) { 839 if ((lock1->li_flags & LI_EXCLUSIVE) != 0 && 840 (flags & LOP_EXCLUSIVE) == 0) { 841 printf("witness: shared lock of (%s) %s " 842 "while exclusively locked\n", 843 class->lc_name, lock->lo_name); 844 panic("excl->share"); 845 } 846 if ((lock1->li_flags & LI_EXCLUSIVE) == 0 && 847 (flags & LOP_EXCLUSIVE) != 0) { 848 printf("witness: exclusive lock of (%s) %s " 849 "while share locked\n", 850 class->lc_name, lock->lo_name); 851 panic("share->excl"); 852 } 853 goto out_splx; 854 } 855 856 /* Warn if the interlock is not locked exactly once. */ 857 if (interlock != NULL) { 858 iclass = LOCK_CLASS(interlock); 859 lock1 = find_instance(lock_list, interlock); 860 if (lock1 == NULL) 861 panic("interlock (%s) %s not locked", 862 iclass->lc_name, interlock->lo_name); 863 else if ((lock1->li_flags & LI_RECURSEMASK) != 0) 864 panic("interlock (%s) %s recursed", 865 iclass->lc_name, interlock->lo_name); 866 } 867 868 /* 869 * Find the previously acquired lock, but ignore interlocks. 870 */ 871 plock = &lock_list->ll_children[lock_list->ll_count - 1]; 872 if (interlock != NULL && plock->li_lock == interlock) { 873 if (lock_list->ll_count > 1) 874 plock = 875 &lock_list->ll_children[lock_list->ll_count - 2]; 876 else { 877 lle = lock_list->ll_next; 878 879 /* 880 * The interlock is the only lock we hold, so 881 * simply return. 882 */ 883 if (lle == NULL) 884 goto out_splx; 885 plock = &lle->ll_children[lle->ll_count - 1]; 886 } 887 } 888 889 /* 890 * Try to perform most checks without a lock. If this succeeds we 891 * can skip acquiring the lock and return success. Otherwise we redo 892 * the check with the lock held to handle races with concurrent updates. 893 */ 894 w1 = plock->li_lock->lo_witness; 895 if (witness_lock_order_check(w1, w)) 896 goto out_splx; 897 898 mtx_enter(&w_mtx); 899 if (witness_lock_order_check(w1, w)) 900 goto out; 901 902 witness_lock_order_add(w1, w); 903 904 /* 905 * Check for duplicate locks of the same type. Note that we only 906 * have to check for this on the last lock we just acquired. Any 907 * other cases will be caught as lock order violations. 908 */ 909 if (w1 == w) { 910 i = w->w_index; 911 if (!(lock->lo_flags & LO_DUPOK) && !(flags & LOP_DUPOK) && 912 !(w_rmatrix[i][i] & WITNESS_REVERSAL)) { 913 w_rmatrix[i][i] |= WITNESS_REVERSAL; 914 w->w_reversed = 1; 915 mtx_leave(&w_mtx); 916 printf("witness: acquiring duplicate lock of " 917 "same type: \"%s\"\n", w->w_type->lt_name); 918 printf(" 1st %s\n", plock->li_lock->lo_name); 919 printf(" 2nd %s\n", lock->lo_name); 920 witness_debugger(1); 921 } else 922 mtx_leave(&w_mtx); 923 goto out_splx; 924 } 925 MUTEX_ASSERT_LOCKED(&w_mtx); 926 927 /* 928 * If we know that the lock we are acquiring comes after 929 * the lock we most recently acquired in the lock order tree, 930 * then there is no need for any further checks. 931 */ 932 if (isitmychild(w1, w)) 933 goto out; 934 935 for (j = 0, lle = lock_list; lle != NULL; lle = lle->ll_next) { 936 for (i = lle->ll_count - 1; i >= 0; i--, j++) { 937 938 KASSERT(j < LOCK_CHILDCOUNT * LOCK_NCHILDREN); 939 lock1 = &lle->ll_children[i]; 940 941 /* 942 * Ignore the interlock. 943 */ 944 if (interlock == lock1->li_lock) 945 continue; 946 947 /* 948 * If this lock doesn't undergo witness checking, 949 * then skip it. 950 */ 951 w1 = lock1->li_lock->lo_witness; 952 if (w1 == NULL) { 953 KASSERTMSG((lock1->li_lock->lo_flags & 954 LO_WITNESS) == 0, 955 "lock missing witness structure"); 956 continue; 957 } 958 959 /* 960 * If we are locking Giant and this is a sleepable 961 * lock, then skip it. 962 */ 963 if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0 && 964 is_kernel_lock(lock)) 965 continue; 966 967 /* 968 * If we are locking a sleepable lock and this lock 969 * is Giant, then skip it. 970 */ 971 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 972 is_kernel_lock(lock1->li_lock)) 973 continue; 974 975 /* 976 * If we are locking a sleepable lock and this lock 977 * isn't sleepable, we want to treat it as a lock 978 * order violation to enforce a general lock order of 979 * sleepable locks before non-sleepable locks. 980 */ 981 if (((lock->lo_flags & LO_SLEEPABLE) != 0 && 982 (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) 983 goto reversal; 984 985 /* 986 * If we are locking Giant and this is a non-sleepable 987 * lock, then treat it as a reversal. 988 */ 989 if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0 && 990 is_kernel_lock(lock)) 991 goto reversal; 992 993 /* 994 * Check the lock order hierarchy for a reveresal. 995 */ 996 if (!isitmydescendant(w, w1)) 997 continue; 998 reversal: 999 1000 /* 1001 * We have a lock order violation, check to see if it 1002 * is allowed or has already been yelled about. 1003 */ 1004 1005 /* Bail if this violation is known */ 1006 if (w_rmatrix[w1->w_index][w->w_index] & WITNESS_REVERSAL) 1007 goto out; 1008 1009 /* Record this as a violation */ 1010 w_rmatrix[w1->w_index][w->w_index] |= WITNESS_REVERSAL; 1011 w_rmatrix[w->w_index][w1->w_index] |= WITNESS_REVERSAL; 1012 w->w_reversed = w1->w_reversed = 1; 1013 witness_increment_graph_generation(); 1014 mtx_leave(&w_mtx); 1015 1016 /* 1017 * There are known LORs between VNODE locks. They are 1018 * not an indication of a bug. VNODE locks are flagged 1019 * as such (LO_IS_VNODE) and we don't yell if the LOR 1020 * is between 2 VNODE locks. 1021 */ 1022 if ((lock->lo_flags & LO_IS_VNODE) != 0 && 1023 (lock1->li_lock->lo_flags & LO_IS_VNODE) != 0) 1024 goto out_splx; 1025 1026 /* 1027 * Ok, yell about it. 1028 */ 1029 printf("witness: "); 1030 if (((lock->lo_flags & LO_SLEEPABLE) != 0 && 1031 (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) 1032 printf("lock order reversal: " 1033 "(sleepable after non-sleepable)\n"); 1034 else if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0 1035 && is_kernel_lock(lock)) 1036 printf("lock order reversal: " 1037 "(Giant after non-sleepable)\n"); 1038 else 1039 printf("lock order reversal:\n"); 1040 1041 /* 1042 * Try to locate an earlier lock with 1043 * witness w in our list. 1044 */ 1045 do { 1046 lock2 = &lle->ll_children[i]; 1047 KASSERT(lock2->li_lock != NULL); 1048 if (lock2->li_lock->lo_witness == w) 1049 break; 1050 if (i == 0 && lle->ll_next != NULL) { 1051 lle = lle->ll_next; 1052 i = lle->ll_count - 1; 1053 KASSERT(i >= 0 && i < LOCK_NCHILDREN); 1054 } else 1055 i--; 1056 } while (i >= 0); 1057 if (i < 0) { 1058 printf(" 1st %p %s (%s)\n", 1059 lock1->li_lock, lock1->li_lock->lo_name, 1060 w1->w_type->lt_name); 1061 printf(" 2nd %p %s (%s)\n", 1062 lock, lock->lo_name, w->w_type->lt_name); 1063 } else { 1064 printf(" 1st %p %s (%s)\n", 1065 lock2->li_lock, lock2->li_lock->lo_name, 1066 lock2->li_lock->lo_witness->w_type-> 1067 lt_name); 1068 printf(" 2nd %p %s (%s)\n", 1069 lock1->li_lock, lock1->li_lock->lo_name, 1070 w1->w_type->lt_name); 1071 printf(" 3rd %p %s (%s)\n", lock, 1072 lock->lo_name, w->w_type->lt_name); 1073 } 1074 if (witness_watch > 1) 1075 witness_print_cycle(printf, w1, w); 1076 witness_debugger(0); 1077 goto out_splx; 1078 } 1079 } 1080 1081 /* 1082 * If requested, build a new lock order. However, don't build a new 1083 * relationship between a sleepable lock and Giant if it is in the 1084 * wrong direction. The correct lock order is that sleepable locks 1085 * always come before Giant. 1086 */ 1087 if (flags & LOP_NEWORDER && 1088 !(is_kernel_lock(plock->li_lock) && 1089 (lock->lo_flags & LO_SLEEPABLE) != 0)) 1090 itismychild(plock->li_lock->lo_witness, w); 1091 out: 1092 mtx_leave(&w_mtx); 1093 out_splx: 1094 splx(s); 1095 } 1096 1097 void 1098 witness_lock(struct lock_object *lock, int flags) 1099 { 1100 struct lock_list_entry **lock_list, *lle; 1101 struct lock_instance *instance; 1102 struct witness *w; 1103 int s; 1104 1105 if (witness_cold || witness_watch < 0 || panicstr != NULL || 1106 db_active || (lock->lo_flags & LO_WITNESS) == 0) 1107 return; 1108 1109 w = lock->lo_witness; 1110 if (w == NULL) 1111 w = lock->lo_witness = 1112 enroll(lock->lo_type, lock->lo_name, LOCK_CLASS(lock)); 1113 1114 /* Determine lock list for this lock. */ 1115 if (LOCK_CLASS(lock)->lc_flags & LC_SLEEPLOCK) { 1116 struct proc *p; 1117 1118 p = curproc; 1119 if (p == NULL) 1120 return; 1121 lock_list = &p->p_sleeplocks; 1122 } else 1123 lock_list = &witness_cpu[cpu_number()].wc_spinlocks; 1124 1125 s = splhigh(); 1126 1127 /* Check to see if we are recursing on a lock we already own. */ 1128 instance = find_instance(*lock_list, lock); 1129 if (instance != NULL) { 1130 instance->li_flags++; 1131 goto out; 1132 } 1133 1134 w->w_acquired = 1; 1135 1136 /* Find the next open lock instance in the list and fill it. */ 1137 lle = *lock_list; 1138 if (lle == NULL || lle->ll_count == LOCK_NCHILDREN) { 1139 lle = witness_lock_list_get(); 1140 if (lle == NULL) 1141 goto out; 1142 lle->ll_next = *lock_list; 1143 *lock_list = lle; 1144 } 1145 instance = &lle->ll_children[lle->ll_count++]; 1146 instance->li_lock = lock; 1147 if ((flags & LOP_EXCLUSIVE) != 0) 1148 instance->li_flags = LI_EXCLUSIVE; 1149 else 1150 instance->li_flags = 0; 1151 instance->li_stack = NULL; 1152 if (witness_locktrace) { 1153 instance->li_stack = witness_lock_stack_get(); 1154 if (instance->li_stack != NULL) 1155 stacktrace_save(&instance->li_stack->ls_stack); 1156 } 1157 out: 1158 splx(s); 1159 } 1160 1161 void 1162 witness_upgrade(struct lock_object *lock, int flags) 1163 { 1164 struct lock_instance *instance; 1165 struct lock_class *class; 1166 int s; 1167 1168 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__); 1169 if (lock->lo_witness == NULL || witness_watch < 0 || 1170 panicstr != NULL || db_active) 1171 return; 1172 class = LOCK_CLASS(lock); 1173 if (witness_watch) { 1174 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1175 panic("upgrade of non-upgradable lock (%s) %s", 1176 class->lc_name, lock->lo_name); 1177 if ((class->lc_flags & LC_SLEEPLOCK) == 0) 1178 panic("upgrade of non-sleep lock (%s) %s", 1179 class->lc_name, lock->lo_name); 1180 } 1181 s = splhigh(); 1182 instance = find_instance(curproc->p_sleeplocks, lock); 1183 if (instance == NULL) { 1184 panic("upgrade of unlocked lock (%s) %s", 1185 class->lc_name, lock->lo_name); 1186 goto out; 1187 } 1188 if (witness_watch) { 1189 if ((instance->li_flags & LI_EXCLUSIVE) != 0) 1190 panic("upgrade of exclusive lock (%s) %s", 1191 class->lc_name, lock->lo_name); 1192 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1193 panic("upgrade of recursed lock (%s) %s r=%d", 1194 class->lc_name, lock->lo_name, 1195 instance->li_flags & LI_RECURSEMASK); 1196 } 1197 instance->li_flags |= LI_EXCLUSIVE; 1198 out: 1199 splx(s); 1200 } 1201 1202 void 1203 witness_downgrade(struct lock_object *lock, int flags) 1204 { 1205 struct lock_instance *instance; 1206 struct lock_class *class; 1207 int s; 1208 1209 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__); 1210 if (lock->lo_witness == NULL || witness_watch < 0 || 1211 panicstr != NULL || db_active) 1212 return; 1213 class = LOCK_CLASS(lock); 1214 if (witness_watch) { 1215 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1216 panic( 1217 "downgrade of non-upgradable lock (%s) %s", 1218 class->lc_name, lock->lo_name); 1219 if ((class->lc_flags & LC_SLEEPLOCK) == 0) 1220 panic("downgrade of non-sleep lock (%s) %s", 1221 class->lc_name, lock->lo_name); 1222 } 1223 s = splhigh(); 1224 instance = find_instance(curproc->p_sleeplocks, lock); 1225 if (instance == NULL) { 1226 panic("downgrade of unlocked lock (%s) %s", 1227 class->lc_name, lock->lo_name); 1228 goto out; 1229 } 1230 if (witness_watch) { 1231 if ((instance->li_flags & LI_EXCLUSIVE) == 0) 1232 panic("downgrade of shared lock (%s) %s", 1233 class->lc_name, lock->lo_name); 1234 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1235 panic("downgrade of recursed lock (%s) %s r=%d", 1236 class->lc_name, lock->lo_name, 1237 instance->li_flags & LI_RECURSEMASK); 1238 } 1239 instance->li_flags &= ~LI_EXCLUSIVE; 1240 out: 1241 splx(s); 1242 } 1243 1244 void 1245 witness_unlock(struct lock_object *lock, int flags) 1246 { 1247 struct lock_list_entry **lock_list, *lle; 1248 struct lock_instance *instance; 1249 struct lock_class *class; 1250 int i, j; 1251 int s; 1252 1253 if (witness_cold || lock->lo_witness == NULL || 1254 panicstr != NULL || db_active) 1255 return; 1256 class = LOCK_CLASS(lock); 1257 1258 /* Find lock instance associated with this lock. */ 1259 if (class->lc_flags & LC_SLEEPLOCK) { 1260 struct proc *p; 1261 1262 p = curproc; 1263 if (p == NULL) 1264 return; 1265 lock_list = &p->p_sleeplocks; 1266 } else 1267 lock_list = &witness_cpu[cpu_number()].wc_spinlocks; 1268 1269 s = splhigh(); 1270 1271 lle = *lock_list; 1272 for (; *lock_list != NULL; lock_list = &(*lock_list)->ll_next) 1273 for (i = 0; i < (*lock_list)->ll_count; i++) { 1274 instance = &(*lock_list)->ll_children[i]; 1275 if (instance->li_lock == lock) 1276 goto found; 1277 } 1278 1279 /* 1280 * When disabling WITNESS through witness_watch we could end up in 1281 * having registered locks in the p_sleeplocks queue. 1282 * We have to make sure we flush these queues, so just search for 1283 * eventual register locks and remove them. 1284 */ 1285 if (witness_watch > 0) { 1286 panic("lock (%s) %s not locked", class->lc_name, lock->lo_name); 1287 } 1288 goto out; 1289 1290 found: 1291 1292 /* First, check for shared/exclusive mismatches. */ 1293 if ((instance->li_flags & LI_EXCLUSIVE) != 0 && witness_watch > 0 && 1294 (flags & LOP_EXCLUSIVE) == 0) { 1295 printf("witness: shared unlock of (%s) %s " 1296 "while exclusively locked\n", 1297 class->lc_name, lock->lo_name); 1298 panic("excl->ushare"); 1299 } 1300 if ((instance->li_flags & LI_EXCLUSIVE) == 0 && witness_watch > 0 && 1301 (flags & LOP_EXCLUSIVE) != 0) { 1302 printf("witness: exclusive unlock of (%s) %s " 1303 "while share locked\n", class->lc_name, lock->lo_name); 1304 panic("share->uexcl"); 1305 } 1306 /* If we are recursed, unrecurse. */ 1307 if ((instance->li_flags & LI_RECURSEMASK) > 0) { 1308 instance->li_flags--; 1309 goto out; 1310 } 1311 /* The lock is now being dropped, check for NORELEASE flag */ 1312 if ((instance->li_flags & LI_NORELEASE) != 0 && witness_watch > 0) { 1313 printf("witness: forbidden unlock of (%s) %s\n", 1314 class->lc_name, lock->lo_name); 1315 panic("lock marked norelease"); 1316 } 1317 1318 /* Release the stack buffer, if any. */ 1319 if (instance->li_stack != NULL) { 1320 witness_lock_stack_free(instance->li_stack); 1321 instance->li_stack = NULL; 1322 } 1323 1324 /* Remove this item from the list. */ 1325 for (j = i; j < (*lock_list)->ll_count - 1; j++) 1326 (*lock_list)->ll_children[j] = 1327 (*lock_list)->ll_children[j + 1]; 1328 (*lock_list)->ll_count--; 1329 1330 /* 1331 * In order to reduce contention on w_mtx, we want to keep always an 1332 * head object into lists so that frequent allocation from the 1333 * free witness pool (and subsequent locking) is avoided. 1334 * In order to maintain the current code simple, when the head 1335 * object is totally unloaded it means also that we do not have 1336 * further objects in the list, so the list ownership needs to be 1337 * hand over to another object if the current head needs to be freed. 1338 */ 1339 if ((*lock_list)->ll_count == 0) { 1340 if (*lock_list == lle) { 1341 if (lle->ll_next == NULL) 1342 goto out; 1343 } else 1344 lle = *lock_list; 1345 *lock_list = lle->ll_next; 1346 witness_lock_list_free(lle); 1347 } 1348 out: 1349 splx(s); 1350 } 1351 1352 void 1353 witness_thread_exit(struct proc *p) 1354 { 1355 struct lock_list_entry *lle; 1356 int i, n, s; 1357 1358 lle = p->p_sleeplocks; 1359 if (lle == NULL || panicstr != NULL || db_active) 1360 return; 1361 if (lle->ll_count != 0) { 1362 for (n = 0; lle != NULL; lle = lle->ll_next) 1363 for (i = lle->ll_count - 1; i >= 0; i--) { 1364 if (n == 0) 1365 printf("witness: thread %p exiting " 1366 "with the following locks held:\n", 1367 p); 1368 n++; 1369 witness_list_lock(&lle->ll_children[i], 1370 printf); 1371 } 1372 panic("thread %p cannot exit while holding sleeplocks", p); 1373 } 1374 KASSERT(lle->ll_next == NULL); 1375 s = splhigh(); 1376 witness_lock_list_free(lle); 1377 splx(s); 1378 } 1379 1380 /* 1381 * Warn if any locks other than 'lock' are held. Flags can be passed in to 1382 * exempt Giant and sleepable locks from the checks as well. If any 1383 * non-exempt locks are held, then a supplied message is printed to the 1384 * output channel along with a list of the offending locks. If indicated in the 1385 * flags then a failure results in a panic as well. 1386 */ 1387 int 1388 witness_warn(int flags, struct lock_object *lock, const char *fmt, ...) 1389 { 1390 struct lock_list_entry *lock_list, *lle; 1391 struct lock_instance *lock1; 1392 struct proc *p; 1393 va_list ap; 1394 int i, n; 1395 1396 if (witness_cold || witness_watch < 1 || panicstr != NULL || db_active) 1397 return (0); 1398 n = 0; 1399 p = curproc; 1400 for (lle = p->p_sleeplocks; lle != NULL; lle = lle->ll_next) 1401 for (i = lle->ll_count - 1; i >= 0; i--) { 1402 lock1 = &lle->ll_children[i]; 1403 if (lock1->li_lock == lock) 1404 continue; 1405 if (flags & WARN_KERNELOK && 1406 is_kernel_lock(lock1->li_lock)) 1407 continue; 1408 if (flags & WARN_SLEEPOK && 1409 (lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0) 1410 continue; 1411 if (n == 0) { 1412 printf("witness: "); 1413 va_start(ap, fmt); 1414 vprintf(fmt, ap); 1415 va_end(ap); 1416 printf(" with the following %slocks held:\n", 1417 (flags & WARN_SLEEPOK) != 0 ? 1418 "non-sleepable " : ""); 1419 } 1420 n++; 1421 witness_list_lock(lock1, printf); 1422 } 1423 1424 lock_list = witness_cpu[cpu_number()].wc_spinlocks; 1425 if (lock_list != NULL && lock_list->ll_count != 0) { 1426 /* 1427 * We should only have one spinlock and as long as 1428 * the flags cannot match for this locks class, 1429 * check if the first spinlock is the one curproc 1430 * should hold. 1431 */ 1432 lock1 = &lock_list->ll_children[lock_list->ll_count - 1]; 1433 if (lock_list->ll_count == 1 && lock_list->ll_next == NULL && 1434 lock1->li_lock == lock && n == 0) 1435 return (0); 1436 1437 printf("witness: "); 1438 va_start(ap, fmt); 1439 vprintf(fmt, ap); 1440 va_end(ap); 1441 printf(" with the following %slocks held:\n", 1442 (flags & WARN_SLEEPOK) != 0 ? "non-sleepable " : ""); 1443 n += witness_list_locks(&lock_list, printf); 1444 } 1445 if (n > 0) { 1446 if (flags & WARN_PANIC) 1447 panic("%s", __func__); 1448 else 1449 witness_debugger(1); 1450 } 1451 return (n); 1452 } 1453 1454 static struct witness * 1455 enroll(const struct lock_type *type, const char *subtype, 1456 struct lock_class *lock_class) 1457 { 1458 struct witness *w; 1459 struct witness_list *typelist; 1460 1461 KASSERT(type != NULL); 1462 1463 if (witness_watch < 0 || panicstr != NULL || db_active) 1464 return (NULL); 1465 if ((lock_class->lc_flags & LC_SPINLOCK)) { 1466 typelist = &w_spin; 1467 } else if ((lock_class->lc_flags & LC_SLEEPLOCK)) { 1468 typelist = &w_sleep; 1469 } else { 1470 panic("lock class %s is not sleep or spin", 1471 lock_class->lc_name); 1472 return (NULL); 1473 } 1474 1475 mtx_enter(&w_mtx); 1476 w = witness_hash_get(type, subtype); 1477 if (w) 1478 goto found; 1479 if ((w = witness_get()) == NULL) 1480 return (NULL); 1481 w->w_type = type; 1482 w->w_subtype = subtype; 1483 w->w_class = lock_class; 1484 SLIST_INSERT_HEAD(&w_all, w, w_list); 1485 if (lock_class->lc_flags & LC_SPINLOCK) { 1486 SLIST_INSERT_HEAD(&w_spin, w, w_typelist); 1487 w_spin_cnt++; 1488 } else if (lock_class->lc_flags & LC_SLEEPLOCK) { 1489 SLIST_INSERT_HEAD(&w_sleep, w, w_typelist); 1490 w_sleep_cnt++; 1491 } 1492 1493 /* Insert new witness into the hash */ 1494 witness_hash_put(w); 1495 witness_increment_graph_generation(); 1496 mtx_leave(&w_mtx); 1497 return (w); 1498 found: 1499 mtx_leave(&w_mtx); 1500 if (lock_class != w->w_class) 1501 panic("lock (%s) %s does not match earlier (%s) lock", 1502 type->lt_name, lock_class->lc_name, w->w_class->lc_name); 1503 return (w); 1504 } 1505 1506 static void 1507 adopt(struct witness *parent, struct witness *child) 1508 { 1509 int pi, ci, i, j; 1510 1511 if (witness_cold == 0) 1512 MUTEX_ASSERT_LOCKED(&w_mtx); 1513 1514 /* If the relationship is already known, there's no work to be done. */ 1515 if (isitmychild(parent, child)) 1516 return; 1517 1518 /* When the structure of the graph changes, bump up the generation. */ 1519 witness_increment_graph_generation(); 1520 1521 /* 1522 * The hard part ... create the direct relationship, then propagate all 1523 * indirect relationships. 1524 */ 1525 pi = parent->w_index; 1526 ci = child->w_index; 1527 WITNESS_INDEX_ASSERT(pi); 1528 WITNESS_INDEX_ASSERT(ci); 1529 KASSERT(pi != ci); 1530 w_rmatrix[pi][ci] |= WITNESS_PARENT; 1531 w_rmatrix[ci][pi] |= WITNESS_CHILD; 1532 1533 /* 1534 * If parent was not already an ancestor of child, 1535 * then we increment the descendant and ancestor counters. 1536 */ 1537 if ((w_rmatrix[pi][ci] & WITNESS_ANCESTOR) == 0) { 1538 parent->w_num_descendants++; 1539 child->w_num_ancestors++; 1540 } 1541 1542 /* 1543 * Find each ancestor of 'pi'. Note that 'pi' itself is counted as 1544 * an ancestor of 'pi' during this loop. 1545 */ 1546 for (i = 1; i <= w_max_used_index; i++) { 1547 if ((w_rmatrix[i][pi] & WITNESS_ANCESTOR_MASK) == 0 && 1548 (i != pi)) 1549 continue; 1550 1551 /* Find each descendant of 'i' and mark it as a descendant. */ 1552 for (j = 1; j <= w_max_used_index; j++) { 1553 1554 /* 1555 * Skip children that are already marked as 1556 * descendants of 'i'. 1557 */ 1558 if (w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK) 1559 continue; 1560 1561 /* 1562 * We are only interested in descendants of 'ci'. Note 1563 * that 'ci' itself is counted as a descendant of 'ci'. 1564 */ 1565 if ((w_rmatrix[ci][j] & WITNESS_ANCESTOR_MASK) == 0 && 1566 (j != ci)) 1567 continue; 1568 w_rmatrix[i][j] |= WITNESS_ANCESTOR; 1569 w_rmatrix[j][i] |= WITNESS_DESCENDANT; 1570 w_data[i].w_num_descendants++; 1571 w_data[j].w_num_ancestors++; 1572 1573 /* 1574 * Make sure we aren't marking a node as both an 1575 * ancestor and descendant. We should have caught 1576 * this as a lock order reversal earlier. 1577 */ 1578 if ((w_rmatrix[i][j] & WITNESS_ANCESTOR_MASK) && 1579 (w_rmatrix[i][j] & WITNESS_DESCENDANT_MASK)) { 1580 printf("witness: rmatrix paradox! [%d][%d]=%d " 1581 "both ancestor and descendant\n", 1582 i, j, w_rmatrix[i][j]); 1583 #ifdef DDB 1584 db_stack_dump(); 1585 #endif 1586 printf("witness disabled\n"); 1587 witness_watch = -1; 1588 } 1589 if ((w_rmatrix[j][i] & WITNESS_ANCESTOR_MASK) && 1590 (w_rmatrix[j][i] & WITNESS_DESCENDANT_MASK)) { 1591 printf("witness: rmatrix paradox! [%d][%d]=%d " 1592 "both ancestor and descendant\n", 1593 j, i, w_rmatrix[j][i]); 1594 #ifdef DDB 1595 db_stack_dump(); 1596 #endif 1597 printf("witness disabled\n"); 1598 witness_watch = -1; 1599 } 1600 } 1601 } 1602 } 1603 1604 static void 1605 itismychild(struct witness *parent, struct witness *child) 1606 { 1607 KASSERT(child != NULL && parent != NULL); 1608 if (witness_cold == 0) 1609 MUTEX_ASSERT_LOCKED(&w_mtx); 1610 1611 if (!witness_lock_type_equal(parent, child)) { 1612 if (witness_cold == 0) 1613 mtx_leave(&w_mtx); 1614 panic( 1615 "%s: parent \"%s\" (%s) and child \"%s\" (%s) are not " 1616 "the same lock type", __func__, parent->w_type->lt_name, 1617 parent->w_class->lc_name, child->w_type->lt_name, 1618 child->w_class->lc_name); 1619 } 1620 adopt(parent, child); 1621 } 1622 1623 /* 1624 * Generic code for the isitmy*() functions. The rmask parameter is the 1625 * expected relationship of w1 to w2. 1626 */ 1627 static int 1628 _isitmyx(struct witness *w1, struct witness *w2, int rmask, const char *fname) 1629 { 1630 unsigned char r1, r2; 1631 int i1, i2; 1632 1633 i1 = w1->w_index; 1634 i2 = w2->w_index; 1635 WITNESS_INDEX_ASSERT(i1); 1636 WITNESS_INDEX_ASSERT(i2); 1637 r1 = w_rmatrix[i1][i2] & WITNESS_RELATED_MASK; 1638 r2 = w_rmatrix[i2][i1] & WITNESS_RELATED_MASK; 1639 1640 /* The flags on one better be the inverse of the flags on the other */ 1641 if (!((WITNESS_ATOD(r1) == r2 && WITNESS_DTOA(r2) == r1) || 1642 (WITNESS_DTOA(r1) == r2 && WITNESS_ATOD(r2) == r1))) { 1643 /* Don't squawk if we're potentially racing with an update. */ 1644 if (w_mtx.mtx_owner != curcpu()) 1645 return (0); 1646 printf("witness: %s: rmatrix mismatch between %s (index %d) " 1647 "and %s (index %d): w_rmatrix[%d][%d] == %x but " 1648 "w_rmatrix[%d][%d] == %x\n", 1649 fname, w1->w_type->lt_name, i1, w2->w_type->lt_name, 1650 i2, i1, i2, r1, 1651 i2, i1, r2); 1652 #ifdef DDB 1653 db_stack_dump(); 1654 #endif 1655 printf("witness disabled\n"); 1656 witness_watch = -1; 1657 } 1658 return (r1 & rmask); 1659 } 1660 1661 /* 1662 * Checks if @child is a direct child of @parent. 1663 */ 1664 static int 1665 isitmychild(struct witness *parent, struct witness *child) 1666 { 1667 1668 return (_isitmyx(parent, child, WITNESS_PARENT, __func__)); 1669 } 1670 1671 /* 1672 * Checks if @descendant is a direct or indirect descendant of @ancestor. 1673 */ 1674 static int 1675 isitmydescendant(struct witness *ancestor, struct witness *descendant) 1676 { 1677 1678 return (_isitmyx(ancestor, descendant, WITNESS_ANCESTOR_MASK, 1679 __func__)); 1680 } 1681 1682 static struct witness * 1683 witness_get(void) 1684 { 1685 struct witness *w; 1686 int index; 1687 1688 if (witness_cold == 0) 1689 MUTEX_ASSERT_LOCKED(&w_mtx); 1690 1691 if (witness_watch < 0) { 1692 mtx_leave(&w_mtx); 1693 return (NULL); 1694 } 1695 if (SLIST_EMPTY(&w_free)) { 1696 witness_watch = -1; 1697 mtx_leave(&w_mtx); 1698 printf("WITNESS: unable to allocate a new witness object\n"); 1699 return (NULL); 1700 } 1701 w = SLIST_FIRST(&w_free); 1702 SLIST_REMOVE_HEAD(&w_free, w_list); 1703 w_free_cnt--; 1704 index = w->w_index; 1705 KASSERT(index > 0 && index == w_max_used_index + 1 && 1706 index < witness_count); 1707 memset(w, 0, sizeof(*w)); 1708 w->w_index = index; 1709 if (index > w_max_used_index) 1710 w_max_used_index = index; 1711 return (w); 1712 } 1713 1714 static void 1715 witness_free(struct witness *w) 1716 { 1717 SLIST_INSERT_HEAD(&w_free, w, w_list); 1718 w_free_cnt++; 1719 } 1720 1721 static struct lock_list_entry * 1722 witness_lock_list_get(void) 1723 { 1724 struct lock_list_entry *lle; 1725 struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; 1726 1727 if (witness_watch < 0) 1728 return (NULL); 1729 1730 splassert(IPL_HIGH); 1731 1732 if (wcpu->wc_lle_count > 0) { 1733 lle = wcpu->wc_lle_cache; 1734 wcpu->wc_lle_cache = lle->ll_next; 1735 wcpu->wc_lle_count--; 1736 memset(lle, 0, sizeof(*lle)); 1737 return (lle); 1738 } 1739 1740 mtx_enter(&w_mtx); 1741 lle = w_lock_list_free; 1742 if (lle == NULL) { 1743 witness_watch = -1; 1744 mtx_leave(&w_mtx); 1745 printf("%s: witness exhausted\n", __func__); 1746 return (NULL); 1747 } 1748 w_lock_list_free = lle->ll_next; 1749 mtx_leave(&w_mtx); 1750 memset(lle, 0, sizeof(*lle)); 1751 return (lle); 1752 } 1753 1754 static void 1755 witness_lock_list_free(struct lock_list_entry *lle) 1756 { 1757 struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; 1758 1759 splassert(IPL_HIGH); 1760 1761 if (wcpu->wc_lle_count < WITNESS_LLE_CACHE_MAX) { 1762 lle->ll_next = wcpu->wc_lle_cache; 1763 wcpu->wc_lle_cache = lle; 1764 wcpu->wc_lle_count++; 1765 return; 1766 } 1767 1768 mtx_enter(&w_mtx); 1769 lle->ll_next = w_lock_list_free; 1770 w_lock_list_free = lle; 1771 mtx_leave(&w_mtx); 1772 } 1773 1774 static union lock_stack * 1775 witness_lock_stack_get(void) 1776 { 1777 union lock_stack *stack = NULL; 1778 struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; 1779 1780 splassert(IPL_HIGH); 1781 1782 if (wcpu->wc_stk_count > 0) { 1783 stack = wcpu->wc_stk_cache; 1784 wcpu->wc_stk_cache = stack->ls_next; 1785 wcpu->wc_stk_count--; 1786 return (stack); 1787 } 1788 1789 mtx_enter(&w_mtx); 1790 if (w_lock_stack_free != NULL) { 1791 stack = w_lock_stack_free; 1792 w_lock_stack_free = stack->ls_next; 1793 } 1794 mtx_leave(&w_mtx); 1795 return (stack); 1796 } 1797 1798 static void 1799 witness_lock_stack_free(union lock_stack *stack) 1800 { 1801 struct witness_cpu *wcpu = &witness_cpu[cpu_number()]; 1802 1803 splassert(IPL_HIGH); 1804 1805 if (wcpu->wc_stk_count < WITNESS_STK_CACHE_MAX) { 1806 stack->ls_next = wcpu->wc_stk_cache; 1807 wcpu->wc_stk_cache = stack; 1808 wcpu->wc_stk_count++; 1809 return; 1810 } 1811 1812 mtx_enter(&w_mtx); 1813 stack->ls_next = w_lock_stack_free; 1814 w_lock_stack_free = stack; 1815 mtx_leave(&w_mtx); 1816 } 1817 1818 static struct lock_instance * 1819 find_instance(struct lock_list_entry *list, const struct lock_object *lock) 1820 { 1821 struct lock_list_entry *lle; 1822 struct lock_instance *instance; 1823 int i; 1824 1825 for (lle = list; lle != NULL; lle = lle->ll_next) { 1826 for (i = lle->ll_count - 1; i >= 0; i--) { 1827 instance = &lle->ll_children[i]; 1828 if (instance->li_lock == lock) 1829 return (instance); 1830 } 1831 } 1832 return (NULL); 1833 } 1834 1835 static void 1836 witness_list_lock(struct lock_instance *instance, 1837 int (*prnt)(const char *fmt, ...)) 1838 { 1839 struct lock_object *lock; 1840 1841 lock = instance->li_lock; 1842 prnt("%s %s %s", (instance->li_flags & LI_EXCLUSIVE) != 0 ? 1843 "exclusive" : "shared", LOCK_CLASS(lock)->lc_name, lock->lo_name); 1844 prnt(" r = %d (%p)\n", instance->li_flags & LI_RECURSEMASK, lock); 1845 if (instance->li_stack != NULL) 1846 stacktrace_print(&instance->li_stack->ls_stack, prnt); 1847 } 1848 1849 static int 1850 witness_search(struct witness *w, struct witness *target, 1851 struct witness **path, int depth, int *remaining) 1852 { 1853 int i, any_remaining; 1854 1855 if (depth == 0) { 1856 *remaining = 1; 1857 return (w == target); 1858 } 1859 1860 any_remaining = 0; 1861 for (i = 1; i <= w_max_used_index; i++) { 1862 if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) { 1863 if (witness_search(&w_data[i], target, path, depth - 1, 1864 remaining)) { 1865 path[depth - 1] = &w_data[i]; 1866 *remaining = 1; 1867 return 1; 1868 } 1869 if (remaining) 1870 any_remaining = 1; 1871 } 1872 } 1873 *remaining = any_remaining; 1874 return 0; 1875 } 1876 1877 static void 1878 witness_print_cycle_edge(int(*prnt)(const char *fmt, ...), 1879 struct witness *parent, struct witness *child, int step, int last) 1880 { 1881 struct witness_lock_order_data *wlod; 1882 int next; 1883 1884 if (last) 1885 next = 1; 1886 else 1887 next = step + 1; 1888 prnt("lock order [%d] %s (%s) -> [%d] %s (%s)\n", 1889 step, parent->w_subtype, parent->w_type->lt_name, 1890 next, child->w_subtype, child->w_type->lt_name); 1891 if (witness_watch > 1) { 1892 mtx_enter(&w_mtx); 1893 wlod = witness_lock_order_get(parent, child); 1894 mtx_leave(&w_mtx); 1895 1896 if (wlod != NULL) 1897 stacktrace_print(&wlod->wlod_stack, printf); 1898 else 1899 prnt("lock order data %p -> %p is missing\n", 1900 parent->w_type->lt_name, child->w_type->lt_name); 1901 } 1902 } 1903 1904 static void 1905 witness_print_cycle(int(*prnt)(const char *fmt, ...), 1906 struct witness *parent, struct witness *child) 1907 { 1908 struct witness *path[4]; 1909 struct witness *w; 1910 int depth, remaining; 1911 int step = 0; 1912 1913 /* 1914 * Use depth-limited search to find the shortest path 1915 * from child to parent. 1916 */ 1917 for (depth = 1; depth < nitems(path); depth++) { 1918 if (witness_search(child, parent, path, depth, &remaining)) 1919 goto found; 1920 if (!remaining) 1921 break; 1922 } 1923 prnt("witness: incomplete path, depth %d\n", depth); 1924 return; 1925 1926 found: 1927 witness_print_cycle_edge(prnt, parent, child, ++step, 0); 1928 for (w = child; depth > 0; depth--) { 1929 witness_print_cycle_edge(prnt, w, path[depth - 1], ++step, 1930 depth == 1); 1931 w = path[depth - 1]; 1932 } 1933 } 1934 1935 #ifdef DDB 1936 static int 1937 witness_thread_has_locks(struct proc *p) 1938 { 1939 1940 if (p->p_sleeplocks == NULL) 1941 return (0); 1942 return (p->p_sleeplocks->ll_count != 0); 1943 } 1944 1945 static int 1946 witness_process_has_locks(struct process *pr) 1947 { 1948 struct proc *p; 1949 1950 TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { 1951 if (witness_thread_has_locks(p)) 1952 return (1); 1953 } 1954 return (0); 1955 } 1956 #endif 1957 1958 int 1959 witness_list_locks(struct lock_list_entry **lock_list, 1960 int (*prnt)(const char *fmt, ...)) 1961 { 1962 struct lock_list_entry *lle; 1963 int i, nheld; 1964 1965 nheld = 0; 1966 for (lle = *lock_list; lle != NULL; lle = lle->ll_next) 1967 for (i = lle->ll_count - 1; i >= 0; i--) { 1968 witness_list_lock(&lle->ll_children[i], prnt); 1969 nheld++; 1970 } 1971 return (nheld); 1972 } 1973 1974 /* 1975 * This is a bit risky at best. We call this function when we have timed 1976 * out acquiring a spin lock, and we assume that the other CPU is stuck 1977 * with this lock held. So, we go groveling around in the other CPU's 1978 * per-cpu data to try to find the lock instance for this spin lock to 1979 * see when it was last acquired. 1980 */ 1981 void 1982 witness_display_spinlock(struct lock_object *lock, struct proc *owner, 1983 int (*prnt)(const char *fmt, ...)) 1984 { 1985 struct lock_instance *instance; 1986 1987 if (owner->p_stat != SONPROC) 1988 return; 1989 instance = find_instance( 1990 witness_cpu[owner->p_cpu->ci_cpuid].wc_spinlocks, lock); 1991 if (instance != NULL) 1992 witness_list_lock(instance, prnt); 1993 } 1994 1995 void 1996 witness_assert(const struct lock_object *lock, int flags) 1997 { 1998 struct lock_instance *instance; 1999 struct lock_class *class; 2000 2001 if (lock->lo_witness == NULL || witness_watch < 1 || 2002 panicstr != NULL || db_active) 2003 return; 2004 class = LOCK_CLASS(lock); 2005 if ((class->lc_flags & LC_SLEEPLOCK) != 0) 2006 instance = find_instance(curproc->p_sleeplocks, lock); 2007 else if ((class->lc_flags & LC_SPINLOCK) != 0) 2008 instance = find_instance( 2009 witness_cpu[cpu_number()].wc_spinlocks, lock); 2010 else { 2011 panic("lock (%s) %s is not sleep or spin!", 2012 class->lc_name, lock->lo_name); 2013 return; 2014 } 2015 switch (flags) { 2016 case LA_UNLOCKED: 2017 if (instance != NULL) 2018 panic("lock (%s) %s locked", 2019 class->lc_name, lock->lo_name); 2020 break; 2021 case LA_LOCKED: 2022 case LA_LOCKED | LA_RECURSED: 2023 case LA_LOCKED | LA_NOTRECURSED: 2024 case LA_SLOCKED: 2025 case LA_SLOCKED | LA_RECURSED: 2026 case LA_SLOCKED | LA_NOTRECURSED: 2027 case LA_XLOCKED: 2028 case LA_XLOCKED | LA_RECURSED: 2029 case LA_XLOCKED | LA_NOTRECURSED: 2030 if (instance == NULL) { 2031 panic("lock (%s) %s not locked", 2032 class->lc_name, lock->lo_name); 2033 break; 2034 } 2035 if ((flags & LA_XLOCKED) != 0 && 2036 (instance->li_flags & LI_EXCLUSIVE) == 0) 2037 panic( 2038 "lock (%s) %s not exclusively locked", 2039 class->lc_name, lock->lo_name); 2040 if ((flags & LA_SLOCKED) != 0 && 2041 (instance->li_flags & LI_EXCLUSIVE) != 0) 2042 panic( 2043 "lock (%s) %s exclusively locked", 2044 class->lc_name, lock->lo_name); 2045 if ((flags & LA_RECURSED) != 0 && 2046 (instance->li_flags & LI_RECURSEMASK) == 0) 2047 panic("lock (%s) %s not recursed", 2048 class->lc_name, lock->lo_name); 2049 if ((flags & LA_NOTRECURSED) != 0 && 2050 (instance->li_flags & LI_RECURSEMASK) != 0) 2051 panic("lock (%s) %s recursed", 2052 class->lc_name, lock->lo_name); 2053 break; 2054 default: 2055 panic("invalid lock assertion"); 2056 2057 } 2058 } 2059 2060 static void 2061 witness_setflag(struct lock_object *lock, int flag, int set) 2062 { 2063 struct lock_list_entry *lock_list; 2064 struct lock_instance *instance; 2065 struct lock_class *class; 2066 2067 if (lock->lo_witness == NULL || witness_watch < 0 || 2068 panicstr != NULL || db_active) 2069 return; 2070 class = LOCK_CLASS(lock); 2071 if (class->lc_flags & LC_SLEEPLOCK) 2072 lock_list = curproc->p_sleeplocks; 2073 else 2074 lock_list = witness_cpu[cpu_number()].wc_spinlocks; 2075 instance = find_instance(lock_list, lock); 2076 if (instance == NULL) { 2077 panic("%s: lock (%s) %s not locked", __func__, 2078 class->lc_name, lock->lo_name); 2079 return; 2080 } 2081 2082 if (set) 2083 instance->li_flags |= flag; 2084 else 2085 instance->li_flags &= ~flag; 2086 } 2087 2088 void 2089 witness_norelease(struct lock_object *lock) 2090 { 2091 2092 witness_setflag(lock, LI_NORELEASE, 1); 2093 } 2094 2095 void 2096 witness_releaseok(struct lock_object *lock) 2097 { 2098 2099 witness_setflag(lock, LI_NORELEASE, 0); 2100 } 2101 2102 #ifdef DDB 2103 static void 2104 witness_ddb_list(struct proc *p) 2105 { 2106 struct witness_cpu *wc = &witness_cpu[cpu_number()]; 2107 2108 KASSERTMSG(witness_cold == 0, "%s: witness_cold", __func__); 2109 KASSERTMSG(db_active, "%s: not in the debugger", __func__); 2110 2111 if (witness_watch < 1) 2112 return; 2113 2114 witness_list_locks(&p->p_sleeplocks, db_printf); 2115 2116 /* 2117 * We only handle spinlocks if td == curproc. This is somewhat broken 2118 * if td is currently executing on some other CPU and holds spin locks 2119 * as we won't display those locks. If we had a MI way of getting 2120 * the per-cpu data for a given cpu then we could use 2121 * td->td_oncpu to get the list of spinlocks for this thread 2122 * and "fix" this. 2123 * 2124 * That still wouldn't really fix this unless we locked the scheduler 2125 * lock or stopped the other CPU to make sure it wasn't changing the 2126 * list out from under us. It is probably best to just not try to 2127 * handle threads on other CPU's for now. 2128 */ 2129 if (p == curproc && wc->wc_spinlocks != NULL) 2130 witness_list_locks(&wc->wc_spinlocks, db_printf); 2131 } 2132 2133 void 2134 db_witness_list(db_expr_t addr, int have_addr, db_expr_t count, char *modif) 2135 { 2136 struct proc *p; 2137 2138 if (have_addr) 2139 p = (struct proc *)addr; 2140 else 2141 p = curproc; 2142 witness_ddb_list(p); 2143 } 2144 2145 void 2146 db_witness_list_all(db_expr_t addr, int have_addr, db_expr_t count, char *modif) 2147 { 2148 CPU_INFO_ITERATOR cii; 2149 struct cpu_info *ci; 2150 struct lock_list_entry *lock_list; 2151 struct process *pr; 2152 struct proc *p; 2153 2154 CPU_INFO_FOREACH(cii, ci) { 2155 lock_list = witness_cpu[CPU_INFO_UNIT(ci)].wc_spinlocks; 2156 if (lock_list == NULL || lock_list->ll_count == 0) 2157 continue; 2158 db_printf("CPU %d:\n", CPU_INFO_UNIT(ci)); 2159 witness_list_locks(&lock_list, db_printf); 2160 } 2161 2162 /* 2163 * It would be nice to list only threads and processes that actually 2164 * held sleep locks, but that information is currently not exported 2165 * by WITNESS. 2166 */ 2167 LIST_FOREACH(pr, &allprocess, ps_list) { 2168 if (!witness_process_has_locks(pr)) 2169 continue; 2170 TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { 2171 if (!witness_thread_has_locks(p)) 2172 continue; 2173 db_printf("Process %d (%s) thread %p (%d)\n", 2174 pr->ps_pid, pr->ps_comm, p, p->p_tid); 2175 witness_ddb_list(p); 2176 } 2177 } 2178 } 2179 2180 void 2181 witness_print_badstacks(void) 2182 { 2183 struct witness *w1, *w2; 2184 int error, generation, i, j; 2185 2186 if (witness_watch < 1) { 2187 db_printf("witness watch is disabled\n"); 2188 return; 2189 } 2190 if (witness_cold) { 2191 db_printf("witness is cold\n"); 2192 return; 2193 } 2194 error = 0; 2195 2196 restart: 2197 mtx_enter(&w_mtx); 2198 generation = w_generation; 2199 mtx_leave(&w_mtx); 2200 db_printf("Number of known direct relationships is %d\n", 2201 w_lohash.wloh_count); 2202 for (i = 1; i < w_max_used_index; i++) { 2203 mtx_enter(&w_mtx); 2204 if (generation != w_generation) { 2205 mtx_leave(&w_mtx); 2206 2207 /* The graph has changed, try again. */ 2208 db_printf("Lock graph changed, restarting trace.\n"); 2209 goto restart; 2210 } 2211 2212 w1 = &w_data[i]; 2213 if (w1->w_reversed == 0) { 2214 mtx_leave(&w_mtx); 2215 continue; 2216 } 2217 mtx_leave(&w_mtx); 2218 2219 if (w1->w_reversed == 0) 2220 continue; 2221 for (j = 1; j < w_max_used_index; j++) { 2222 if ((w_rmatrix[i][j] & WITNESS_REVERSAL) == 0 || i > j) 2223 continue; 2224 2225 mtx_enter(&w_mtx); 2226 if (generation != w_generation) { 2227 mtx_leave(&w_mtx); 2228 2229 /* The graph has changed, try again. */ 2230 db_printf("Lock graph changed, " 2231 "restarting trace.\n"); 2232 goto restart; 2233 } 2234 2235 w2 = &w_data[j]; 2236 mtx_leave(&w_mtx); 2237 2238 db_printf("\nLock order reversal between \"%s\"(%s) " 2239 "and \"%s\"(%s)!\n", 2240 w1->w_type->lt_name, w1->w_class->lc_name, 2241 w2->w_type->lt_name, w2->w_class->lc_name); 2242 witness_print_cycle(db_printf, w1, w2); 2243 } 2244 } 2245 mtx_enter(&w_mtx); 2246 if (generation != w_generation) { 2247 mtx_leave(&w_mtx); 2248 2249 /* 2250 * The graph changed while we were printing stack data, 2251 * try again. 2252 */ 2253 db_printf("Lock graph changed, restarting trace.\n"); 2254 goto restart; 2255 } 2256 mtx_leave(&w_mtx); 2257 } 2258 2259 void 2260 db_witness_display(db_expr_t addr, int have_addr, db_expr_t count, char *modif) 2261 { 2262 switch (modif[0]) { 2263 case 'b': 2264 witness_print_badstacks(); 2265 break; 2266 default: 2267 witness_ddb_display(db_printf); 2268 break; 2269 } 2270 } 2271 #endif 2272 2273 void 2274 db_witness_print_fullgraph(void) 2275 { 2276 struct witness *w; 2277 int error; 2278 2279 if (witness_watch < 1) { 2280 db_printf("witness watch is disabled\n"); 2281 return; 2282 } 2283 if (witness_cold) { 2284 db_printf("witness is cold\n"); 2285 return; 2286 } 2287 error = 0; 2288 2289 mtx_enter(&w_mtx); 2290 SLIST_FOREACH(w, &w_all, w_list) 2291 w->w_displayed = 0; 2292 SLIST_FOREACH(w, &w_all, w_list) 2293 db_witness_add_fullgraph(w); 2294 mtx_leave(&w_mtx); 2295 } 2296 2297 static void 2298 db_witness_add_fullgraph(struct witness *w) 2299 { 2300 int i; 2301 2302 if (w->w_displayed != 0 || w->w_acquired == 0) 2303 return; 2304 w->w_displayed = 1; 2305 2306 WITNESS_INDEX_ASSERT(w->w_index); 2307 for (i = 1; i <= w_max_used_index; i++) { 2308 if (w_rmatrix[w->w_index][i] & WITNESS_PARENT) { 2309 db_printf("\"%s\",\"%s\"\n", w->w_type->lt_name, 2310 w_data[i].w_type->lt_name); 2311 db_witness_add_fullgraph(&w_data[i]); 2312 } 2313 } 2314 } 2315 2316 /* 2317 * A simple hash function. Takes a key pointer and a key size. If size == 0, 2318 * interprets the key as a string and reads until the null 2319 * terminator. Otherwise, reads the first size bytes. Returns an unsigned 32-bit 2320 * hash value computed from the key. 2321 */ 2322 static uint32_t 2323 witness_hash_djb2(const uint8_t *key, uint32_t size) 2324 { 2325 unsigned int hash = 5381; 2326 int i; 2327 2328 /* hash = hash * 33 + key[i] */ 2329 if (size) 2330 for (i = 0; i < size; i++) 2331 hash = ((hash << 5) + hash) + (unsigned int)key[i]; 2332 else 2333 for (i = 0; key[i] != 0; i++) 2334 hash = ((hash << 5) + hash) + (unsigned int)key[i]; 2335 2336 return (hash); 2337 } 2338 2339 2340 /* 2341 * Initializes the two witness hash tables. Called exactly once from 2342 * witness_initialize(). 2343 */ 2344 static void 2345 witness_init_hash_tables(void) 2346 { 2347 int i; 2348 2349 KASSERT(witness_cold); 2350 2351 /* Initialize the hash tables. */ 2352 for (i = 0; i < WITNESS_HASH_SIZE; i++) 2353 SLIST_INIT(&w_hash.wh_array[i]); 2354 2355 w_hash.wh_size = WITNESS_HASH_SIZE; 2356 w_hash.wh_count = 0; 2357 2358 /* Initialize the lock order data hash. */ 2359 w_lodata = (void *)uvm_pageboot_alloc( 2360 sizeof(struct witness_lock_order_data) * WITNESS_LO_DATA_COUNT); 2361 memset(w_lodata, 0, sizeof(struct witness_lock_order_data) * 2362 WITNESS_LO_DATA_COUNT); 2363 w_lofree = NULL; 2364 for (i = 0; i < WITNESS_LO_DATA_COUNT; i++) { 2365 w_lodata[i].wlod_next = w_lofree; 2366 w_lofree = &w_lodata[i]; 2367 } 2368 w_lohash.wloh_size = WITNESS_LO_HASH_SIZE; 2369 w_lohash.wloh_count = 0; 2370 for (i = 0; i < WITNESS_LO_HASH_SIZE; i++) 2371 w_lohash.wloh_array[i] = NULL; 2372 } 2373 2374 static struct witness * 2375 witness_hash_get(const struct lock_type *type, const char *subtype) 2376 { 2377 struct witness *w; 2378 uint32_t hash; 2379 2380 KASSERT(type != NULL); 2381 if (witness_cold == 0) 2382 MUTEX_ASSERT_LOCKED(&w_mtx); 2383 hash = (uint32_t)((uintptr_t)type ^ (uintptr_t)subtype) % 2384 w_hash.wh_size; 2385 SLIST_FOREACH(w, &w_hash.wh_array[hash], w_hash_next) { 2386 if (w->w_type == type && w->w_subtype == subtype) 2387 goto out; 2388 } 2389 2390 out: 2391 return (w); 2392 } 2393 2394 static void 2395 witness_hash_put(struct witness *w) 2396 { 2397 uint32_t hash; 2398 2399 KASSERT(w != NULL); 2400 KASSERT(w->w_type != NULL); 2401 if (witness_cold == 0) 2402 MUTEX_ASSERT_LOCKED(&w_mtx); 2403 KASSERTMSG(witness_hash_get(w->w_type, w->w_subtype) == NULL, 2404 "%s: trying to add a hash entry that already exists!", __func__); 2405 KASSERTMSG(SLIST_NEXT(w, w_hash_next) == NULL, 2406 "%s: w->w_hash_next != NULL", __func__); 2407 2408 hash = (uint32_t)((uintptr_t)w->w_type ^ (uintptr_t)w->w_subtype) % 2409 w_hash.wh_size; 2410 SLIST_INSERT_HEAD(&w_hash.wh_array[hash], w, w_hash_next); 2411 w_hash.wh_count++; 2412 } 2413 2414 2415 static struct witness_lock_order_data * 2416 witness_lock_order_get(struct witness *parent, struct witness *child) 2417 { 2418 struct witness_lock_order_data *data = NULL; 2419 struct witness_lock_order_key key; 2420 unsigned int hash; 2421 2422 KASSERT(parent != NULL && child != NULL); 2423 key.from = parent->w_index; 2424 key.to = child->w_index; 2425 WITNESS_INDEX_ASSERT(key.from); 2426 WITNESS_INDEX_ASSERT(key.to); 2427 if ((w_rmatrix[parent->w_index][child->w_index] 2428 & WITNESS_LOCK_ORDER_KNOWN) == 0) 2429 goto out; 2430 2431 hash = witness_hash_djb2((const char*)&key, 2432 sizeof(key)) % w_lohash.wloh_size; 2433 data = w_lohash.wloh_array[hash]; 2434 while (data != NULL) { 2435 if (witness_lock_order_key_equal(&data->wlod_key, &key)) 2436 break; 2437 data = data->wlod_next; 2438 } 2439 2440 out: 2441 return (data); 2442 } 2443 2444 /* 2445 * Verify that parent and child have a known relationship, are not the same, 2446 * and child is actually a child of parent. This is done without w_mtx 2447 * to avoid contention in the common case. 2448 */ 2449 static int 2450 witness_lock_order_check(struct witness *parent, struct witness *child) 2451 { 2452 2453 if (parent != child && 2454 w_rmatrix[parent->w_index][child->w_index] 2455 & WITNESS_LOCK_ORDER_KNOWN && 2456 isitmychild(parent, child)) 2457 return (1); 2458 2459 return (0); 2460 } 2461 2462 static int 2463 witness_lock_order_add(struct witness *parent, struct witness *child) 2464 { 2465 static int lofree_empty_reported = 0; 2466 struct witness_lock_order_data *data = NULL; 2467 struct witness_lock_order_key key; 2468 unsigned int hash; 2469 2470 KASSERT(parent != NULL && child != NULL); 2471 key.from = parent->w_index; 2472 key.to = child->w_index; 2473 WITNESS_INDEX_ASSERT(key.from); 2474 WITNESS_INDEX_ASSERT(key.to); 2475 if (w_rmatrix[parent->w_index][child->w_index] 2476 & WITNESS_LOCK_ORDER_KNOWN) 2477 return (1); 2478 2479 hash = witness_hash_djb2((const char*)&key, 2480 sizeof(key)) % w_lohash.wloh_size; 2481 w_rmatrix[parent->w_index][child->w_index] |= WITNESS_LOCK_ORDER_KNOWN; 2482 data = w_lofree; 2483 if (data == NULL) { 2484 if (!lofree_empty_reported) { 2485 lofree_empty_reported = 1; 2486 printf("witness: out of free lock order entries\n"); 2487 } 2488 return (0); 2489 } 2490 w_lofree = data->wlod_next; 2491 data->wlod_next = w_lohash.wloh_array[hash]; 2492 data->wlod_key = key; 2493 w_lohash.wloh_array[hash] = data; 2494 w_lohash.wloh_count++; 2495 stacktrace_save_at(&data->wlod_stack, 1); 2496 return (1); 2497 } 2498 2499 /* Call this whenever the structure of the witness graph changes. */ 2500 static void 2501 witness_increment_graph_generation(void) 2502 { 2503 2504 if (witness_cold == 0) 2505 MUTEX_ASSERT_LOCKED(&w_mtx); 2506 w_generation++; 2507 } 2508 2509 static void 2510 witness_debugger(int dump) 2511 { 2512 switch (witness_watch) { 2513 case 1: 2514 break; 2515 case 2: 2516 if (dump) 2517 db_stack_dump(); 2518 break; 2519 case 3: 2520 if (dump) 2521 db_stack_dump(); 2522 db_enter(); 2523 break; 2524 default: 2525 panic("witness: locking error"); 2526 } 2527 } 2528 2529 static int 2530 witness_alloc_stacks(void) 2531 { 2532 union lock_stack *stacks; 2533 unsigned int i, nstacks = LOCK_CHILDCOUNT * LOCK_NCHILDREN; 2534 2535 rw_assert_wrlock(&w_ctlock); 2536 2537 if (w_lock_stack_num >= nstacks) 2538 return (0); 2539 2540 nstacks -= w_lock_stack_num; 2541 stacks = mallocarray(nstacks, sizeof(*stacks), M_WITNESS, 2542 M_WAITOK | M_CANFAIL | M_ZERO); 2543 if (stacks == NULL) 2544 return (ENOMEM); 2545 2546 mtx_enter(&w_mtx); 2547 for (i = 0; i < nstacks; i++) { 2548 stacks[i].ls_next = w_lock_stack_free; 2549 w_lock_stack_free = &stacks[i]; 2550 } 2551 mtx_leave(&w_mtx); 2552 w_lock_stack_num += nstacks; 2553 2554 return (0); 2555 } 2556 2557 int 2558 witness_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, 2559 void *newp, size_t newlen) 2560 { 2561 int error, value; 2562 2563 if (namelen != 1) 2564 return (ENOTDIR); 2565 2566 rw_enter_write(&w_ctlock); 2567 2568 switch (name[0]) { 2569 case KERN_WITNESS_WATCH: 2570 error = witness_sysctl_watch(oldp, oldlenp, newp, newlen); 2571 break; 2572 case KERN_WITNESS_LOCKTRACE: 2573 value = witness_locktrace; 2574 error = sysctl_int(oldp, oldlenp, newp, newlen, &value); 2575 if (error == 0 && newp != NULL) { 2576 switch (value) { 2577 case 1: 2578 error = witness_alloc_stacks(); 2579 /* FALLTHROUGH */ 2580 case 0: 2581 if (error == 0) 2582 witness_locktrace = value; 2583 break; 2584 default: 2585 error = EINVAL; 2586 break; 2587 } 2588 } 2589 break; 2590 default: 2591 error = EOPNOTSUPP; 2592 break; 2593 } 2594 2595 rw_exit_write(&w_ctlock); 2596 2597 return (error); 2598 } 2599 2600 int 2601 witness_sysctl_watch(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 2602 { 2603 int error; 2604 int value; 2605 2606 value = witness_watch; 2607 error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, 2608 &value, -1, 3); 2609 if (error == 0 && newp != NULL) { 2610 mtx_enter(&w_mtx); 2611 if (value < 0 || witness_watch >= 0) 2612 witness_watch = value; 2613 else 2614 error = EINVAL; 2615 mtx_leave(&w_mtx); 2616 } 2617 return (error); 2618 } 2619