1 /* $NetBSD: lockstat.c,v 1.19 2014/07/25 08:10:35 dholland Exp $ */ 2 3 /*- 4 * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Lock statistics driver, providing kernel support for the lockstat(8) 34 * command. 35 * 36 * We use a global lock word (lockstat_lock) to track device opens. 37 * Only one thread can hold the device at a time, providing a global lock. 38 * 39 * XXX Timings for contention on sleep locks are currently incorrect. 40 */ 41 42 #include <sys/cdefs.h> 43 __KERNEL_RCSID(0, "$NetBSD: lockstat.c,v 1.19 2014/07/25 08:10:35 dholland Exp $"); 44 45 #include <sys/types.h> 46 #include <sys/param.h> 47 #include <sys/proc.h> 48 #include <sys/resourcevar.h> 49 #include <sys/systm.h> 50 #include <sys/kernel.h> 51 #include <sys/kmem.h> 52 #include <sys/conf.h> 53 #include <sys/syslog.h> 54 #include <sys/atomic.h> 55 56 #include <dev/lockstat.h> 57 58 #include <machine/lock.h> 59 60 #ifndef __HAVE_CPU_COUNTER 61 #error CPU counters not available 62 #endif 63 64 #if LONG_BIT == 64 65 #define LOCKSTAT_HASH_SHIFT 3 66 #elif LONG_BIT == 32 67 #define LOCKSTAT_HASH_SHIFT 2 68 #endif 69 70 #define LOCKSTAT_MINBUFS 1000 71 #define LOCKSTAT_DEFBUFS 10000 72 #define LOCKSTAT_MAXBUFS 1000000 73 74 #define LOCKSTAT_HASH_SIZE 128 75 #define LOCKSTAT_HASH_MASK (LOCKSTAT_HASH_SIZE - 1) 76 #define LOCKSTAT_HASH(key) \ 77 ((key >> LOCKSTAT_HASH_SHIFT) & LOCKSTAT_HASH_MASK) 78 79 typedef struct lscpu { 80 SLIST_HEAD(, lsbuf) lc_free; 81 u_int lc_overflow; 82 LIST_HEAD(lslist, lsbuf) lc_hash[LOCKSTAT_HASH_SIZE]; 83 } lscpu_t; 84 85 typedef struct lslist lslist_t; 86 87 void lockstatattach(int); 88 void lockstat_start(lsenable_t *); 89 int lockstat_alloc(lsenable_t *); 90 void lockstat_init_tables(lsenable_t *); 91 int lockstat_stop(lsdisable_t *); 92 void lockstat_free(void); 93 94 dev_type_open(lockstat_open); 95 dev_type_close(lockstat_close); 96 dev_type_read(lockstat_read); 97 dev_type_ioctl(lockstat_ioctl); 98 99 volatile u_int lockstat_enabled; 100 uintptr_t lockstat_csstart; 101 uintptr_t lockstat_csend; 102 uintptr_t lockstat_csmask; 103 uintptr_t lockstat_lamask; 104 uintptr_t lockstat_lockstart; 105 uintptr_t lockstat_lockend; 106 __cpu_simple_lock_t lockstat_lock; 107 lwp_t *lockstat_lwp; 108 lsbuf_t *lockstat_baseb; 109 size_t lockstat_sizeb; 110 int lockstat_busy; 111 struct timespec lockstat_stime; 112 113 const struct cdevsw lockstat_cdevsw = { 114 .d_open = lockstat_open, 115 .d_close = lockstat_close, 116 .d_read = lockstat_read, 117 .d_write = nowrite, 118 .d_ioctl = lockstat_ioctl, 119 .d_stop = nostop, 120 .d_tty = notty, 121 .d_poll = nopoll, 122 .d_mmap = nommap, 123 .d_kqfilter = nokqfilter, 124 .d_discard = nodiscard, 125 .d_flag = D_OTHER | D_MPSAFE 126 }; 127 128 /* 129 * Called when the pseudo-driver is attached. 130 */ 131 void 132 lockstatattach(int nunits) 133 { 134 135 (void)nunits; 136 137 __cpu_simple_lock_init(&lockstat_lock); 138 } 139 140 /* 141 * Prepare the per-CPU tables for use, or clear down tables when tracing is 142 * stopped. 143 */ 144 void 145 lockstat_init_tables(lsenable_t *le) 146 { 147 int i, per, slop, cpuno; 148 CPU_INFO_ITERATOR cii; 149 struct cpu_info *ci; 150 lscpu_t *lc; 151 lsbuf_t *lb; 152 153 KASSERT(!lockstat_enabled); 154 155 for (CPU_INFO_FOREACH(cii, ci)) { 156 if (ci->ci_lockstat != NULL) { 157 kmem_free(ci->ci_lockstat, sizeof(lscpu_t)); 158 ci->ci_lockstat = NULL; 159 } 160 } 161 162 if (le == NULL) 163 return; 164 165 lb = lockstat_baseb; 166 per = le->le_nbufs / ncpu; 167 slop = le->le_nbufs - (per * ncpu); 168 cpuno = 0; 169 for (CPU_INFO_FOREACH(cii, ci)) { 170 lc = kmem_alloc(sizeof(*lc), KM_SLEEP); 171 lc->lc_overflow = 0; 172 ci->ci_lockstat = lc; 173 174 SLIST_INIT(&lc->lc_free); 175 for (i = 0; i < LOCKSTAT_HASH_SIZE; i++) 176 LIST_INIT(&lc->lc_hash[i]); 177 178 for (i = per; i != 0; i--, lb++) { 179 lb->lb_cpu = (uint16_t)cpuno; 180 SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist); 181 } 182 if (--slop > 0) { 183 lb->lb_cpu = (uint16_t)cpuno; 184 SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist); 185 lb++; 186 } 187 cpuno++; 188 } 189 } 190 191 /* 192 * Start collecting lock statistics. 193 */ 194 void 195 lockstat_start(lsenable_t *le) 196 { 197 198 KASSERT(!lockstat_enabled); 199 200 lockstat_init_tables(le); 201 202 if ((le->le_flags & LE_CALLSITE) != 0) 203 lockstat_csmask = (uintptr_t)-1LL; 204 else 205 lockstat_csmask = 0; 206 207 if ((le->le_flags & LE_LOCK) != 0) 208 lockstat_lamask = (uintptr_t)-1LL; 209 else 210 lockstat_lamask = 0; 211 212 lockstat_csstart = le->le_csstart; 213 lockstat_csend = le->le_csend; 214 lockstat_lockstart = le->le_lockstart; 215 lockstat_lockstart = le->le_lockstart; 216 lockstat_lockend = le->le_lockend; 217 membar_sync(); 218 getnanotime(&lockstat_stime); 219 lockstat_enabled = le->le_mask; 220 membar_producer(); 221 } 222 223 /* 224 * Stop collecting lock statistics. 225 */ 226 int 227 lockstat_stop(lsdisable_t *ld) 228 { 229 CPU_INFO_ITERATOR cii; 230 struct cpu_info *ci; 231 u_int cpuno, overflow; 232 struct timespec ts; 233 int error; 234 lwp_t *l; 235 236 KASSERT(lockstat_enabled); 237 238 /* 239 * Set enabled false, force a write barrier, and wait for other CPUs 240 * to exit lockstat_event(). 241 */ 242 lockstat_enabled = 0; 243 membar_producer(); 244 getnanotime(&ts); 245 tsleep(&lockstat_stop, PPAUSE, "lockstat", mstohz(10)); 246 247 /* 248 * Did we run out of buffers while tracing? 249 */ 250 overflow = 0; 251 for (CPU_INFO_FOREACH(cii, ci)) 252 overflow += ((lscpu_t *)ci->ci_lockstat)->lc_overflow; 253 254 if (overflow != 0) { 255 error = EOVERFLOW; 256 log(LOG_NOTICE, "lockstat: %d buffer allocations failed\n", 257 overflow); 258 } else 259 error = 0; 260 261 lockstat_init_tables(NULL); 262 263 /* Run through all LWPs and clear the slate for the next run. */ 264 mutex_enter(proc_lock); 265 LIST_FOREACH(l, &alllwp, l_list) { 266 l->l_pfailaddr = 0; 267 l->l_pfailtime = 0; 268 l->l_pfaillock = 0; 269 } 270 mutex_exit(proc_lock); 271 272 if (ld == NULL) 273 return error; 274 275 /* 276 * Fill out the disable struct for the caller. 277 */ 278 timespecsub(&ts, &lockstat_stime, &ld->ld_time); 279 ld->ld_size = lockstat_sizeb; 280 281 cpuno = 0; 282 for (CPU_INFO_FOREACH(cii, ci)) { 283 if (cpuno >= sizeof(ld->ld_freq) / sizeof(ld->ld_freq[0])) { 284 log(LOG_WARNING, "lockstat: too many CPUs\n"); 285 break; 286 } 287 ld->ld_freq[cpuno++] = cpu_frequency(ci); 288 } 289 290 return error; 291 } 292 293 /* 294 * Allocate buffers for lockstat_start(). 295 */ 296 int 297 lockstat_alloc(lsenable_t *le) 298 { 299 lsbuf_t *lb; 300 size_t sz; 301 302 KASSERT(!lockstat_enabled); 303 lockstat_free(); 304 305 sz = sizeof(*lb) * le->le_nbufs; 306 307 lb = kmem_zalloc(sz, KM_SLEEP); 308 if (lb == NULL) 309 return (ENOMEM); 310 311 KASSERT(!lockstat_enabled); 312 KASSERT(lockstat_baseb == NULL); 313 lockstat_sizeb = sz; 314 lockstat_baseb = lb; 315 316 return (0); 317 } 318 319 /* 320 * Free allocated buffers after tracing has stopped. 321 */ 322 void 323 lockstat_free(void) 324 { 325 326 KASSERT(!lockstat_enabled); 327 328 if (lockstat_baseb != NULL) { 329 kmem_free(lockstat_baseb, lockstat_sizeb); 330 lockstat_baseb = NULL; 331 } 332 } 333 334 /* 335 * Main entry point from lock primatives. 336 */ 337 void 338 lockstat_event(uintptr_t lock, uintptr_t callsite, u_int flags, u_int count, 339 uint64_t cycles) 340 { 341 lslist_t *ll; 342 lscpu_t *lc; 343 lsbuf_t *lb; 344 u_int event; 345 int s; 346 347 if ((flags & lockstat_enabled) != flags || count == 0) 348 return; 349 if (lock < lockstat_lockstart || lock > lockstat_lockend) 350 return; 351 if (callsite < lockstat_csstart || callsite > lockstat_csend) 352 return; 353 354 callsite &= lockstat_csmask; 355 lock &= lockstat_lamask; 356 357 /* 358 * Find the table for this lock+callsite pair, and try to locate a 359 * buffer with the same key. 360 */ 361 s = splhigh(); 362 lc = curcpu()->ci_lockstat; 363 ll = &lc->lc_hash[LOCKSTAT_HASH(lock ^ callsite)]; 364 event = (flags & LB_EVENT_MASK) - 1; 365 366 LIST_FOREACH(lb, ll, lb_chain.list) { 367 if (lb->lb_lock == lock && lb->lb_callsite == callsite) 368 break; 369 } 370 371 if (lb != NULL) { 372 /* 373 * We found a record. Move it to the front of the list, as 374 * we're likely to hit it again soon. 375 */ 376 if (lb != LIST_FIRST(ll)) { 377 LIST_REMOVE(lb, lb_chain.list); 378 LIST_INSERT_HEAD(ll, lb, lb_chain.list); 379 } 380 lb->lb_counts[event] += count; 381 lb->lb_times[event] += cycles; 382 } else if ((lb = SLIST_FIRST(&lc->lc_free)) != NULL) { 383 /* 384 * Pinch a new buffer and fill it out. 385 */ 386 SLIST_REMOVE_HEAD(&lc->lc_free, lb_chain.slist); 387 LIST_INSERT_HEAD(ll, lb, lb_chain.list); 388 lb->lb_flags = (uint16_t)flags; 389 lb->lb_lock = lock; 390 lb->lb_callsite = callsite; 391 lb->lb_counts[event] = count; 392 lb->lb_times[event] = cycles; 393 } else { 394 /* 395 * We didn't find a buffer and there were none free. 396 * lockstat_stop() will notice later on and report the 397 * error. 398 */ 399 lc->lc_overflow++; 400 } 401 402 splx(s); 403 } 404 405 /* 406 * Accept an open() on /dev/lockstat. 407 */ 408 int 409 lockstat_open(dev_t dev, int flag, int mode, lwp_t *l) 410 { 411 412 if (!__cpu_simple_lock_try(&lockstat_lock)) 413 return EBUSY; 414 lockstat_lwp = curlwp; 415 return 0; 416 } 417 418 /* 419 * Accept the last close() on /dev/lockstat. 420 */ 421 int 422 lockstat_close(dev_t dev, int flag, int mode, lwp_t *l) 423 { 424 425 lockstat_lwp = NULL; 426 __cpu_simple_unlock(&lockstat_lock); 427 return 0; 428 } 429 430 /* 431 * Handle control operations. 432 */ 433 int 434 lockstat_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l) 435 { 436 lsenable_t *le; 437 int error; 438 439 if (lockstat_lwp != curlwp) 440 return EBUSY; 441 442 switch (cmd) { 443 case IOC_LOCKSTAT_GVERSION: 444 *(int *)data = LS_VERSION; 445 error = 0; 446 break; 447 448 case IOC_LOCKSTAT_ENABLE: 449 le = (lsenable_t *)data; 450 451 if (!cpu_hascounter()) { 452 error = ENODEV; 453 break; 454 } 455 if (lockstat_enabled) { 456 error = EBUSY; 457 break; 458 } 459 460 /* 461 * Sanitize the arguments passed in and set up filtering. 462 */ 463 if (le->le_nbufs == 0) 464 le->le_nbufs = LOCKSTAT_DEFBUFS; 465 else if (le->le_nbufs > LOCKSTAT_MAXBUFS || 466 le->le_nbufs < LOCKSTAT_MINBUFS) { 467 error = EINVAL; 468 break; 469 } 470 if ((le->le_flags & LE_ONE_CALLSITE) == 0) { 471 le->le_csstart = 0; 472 le->le_csend = le->le_csstart - 1; 473 } 474 if ((le->le_flags & LE_ONE_LOCK) == 0) { 475 le->le_lockstart = 0; 476 le->le_lockend = le->le_lockstart - 1; 477 } 478 if ((le->le_mask & LB_EVENT_MASK) == 0) 479 return EINVAL; 480 if ((le->le_mask & LB_LOCK_MASK) == 0) 481 return EINVAL; 482 483 /* 484 * Start tracing. 485 */ 486 if ((error = lockstat_alloc(le)) == 0) 487 lockstat_start(le); 488 break; 489 490 case IOC_LOCKSTAT_DISABLE: 491 if (!lockstat_enabled) 492 error = EINVAL; 493 else 494 error = lockstat_stop((lsdisable_t *)data); 495 break; 496 497 default: 498 error = ENOTTY; 499 break; 500 } 501 502 return error; 503 } 504 505 /* 506 * Copy buffers out to user-space. 507 */ 508 int 509 lockstat_read(dev_t dev, struct uio *uio, int flag) 510 { 511 512 if (curlwp != lockstat_lwp || lockstat_enabled) 513 return EBUSY; 514 return uiomove(lockstat_baseb, lockstat_sizeb, uio); 515 } 516