1 /* $NetBSD: lockstat.c,v 1.3 2006/10/12 06:56:47 xtraeme Exp $ */ 2 3 /*- 4 * Copyright (c) 2006 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Lock statistics driver, providing kernel support for the lockstat(8) 41 * command. 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: lockstat.c,v 1.3 2006/10/12 06:56:47 xtraeme Exp $"); 46 47 #include <sys/types.h> 48 #include <sys/param.h> 49 #include <sys/lock.h> 50 #include <sys/proc.h> 51 #include <sys/resourcevar.h> 52 #include <sys/systm.h> 53 #include <sys/kernel.h> 54 #include <sys/malloc.h> 55 #include <sys/conf.h> 56 #include <sys/syslog.h> 57 58 #include <dev/lockstat.h> 59 60 #ifndef __HAVE_CPU_COUNTER 61 #error CPU counters not available 62 #endif 63 64 #if LONG_BIT == 64 65 #define LOCKSTAT_HASH_SHIFT 3 66 #elif LONG_BIT == 32 67 #define LOCKSTAT_HASH_SHIFT 2 68 #endif 69 70 #define LOCKSTAT_MINBUFS 100 71 #define LOCKSTAT_DEFBUFS 1000 72 #define LOCKSTAT_MAXBUFS 10000 73 74 #define LOCKSTAT_HASH_SIZE 64 75 #define LOCKSTAT_HASH_MASK (LOCKSTAT_HASH_SIZE - 1) 76 #define LOCKSTAT_HASH(key) \ 77 ((key >> LOCKSTAT_HASH_SHIFT) & LOCKSTAT_HASH_MASK) 78 79 typedef struct lscpu { 80 SLIST_HEAD(, lsbuf) lc_free; 81 u_int lc_overflow; 82 LIST_HEAD(lslist, lsbuf) lc_hash[LOCKSTAT_HASH_SIZE]; 83 } lscpu_t; 84 85 typedef struct lslist lslist_t; 86 87 void lockstatattach(int); 88 void lockstat_start(lsenable_t *); 89 int lockstat_alloc(lsenable_t *); 90 void lockstat_init_tables(lsenable_t *); 91 int lockstat_stop(lsdisable_t *); 92 void lockstat_free(void); 93 94 dev_type_open(lockstat_open); 95 dev_type_close(lockstat_close); 96 dev_type_read(lockstat_read); 97 dev_type_ioctl(lockstat_ioctl); 98 99 /* Protected against write by lockstat_lock(). Used by lockstat_event(). */ 100 volatile u_int lockstat_enabled; 101 uintptr_t lockstat_csstart; 102 uintptr_t lockstat_csend; 103 uintptr_t lockstat_csmask; 104 uintptr_t lockstat_lockaddr; 105 106 /* Protected by lockstat_lock(). */ 107 struct simplelock lockstat_slock; 108 lsbuf_t *lockstat_baseb; 109 size_t lockstat_sizeb; 110 int lockstat_busy; 111 int lockstat_devopen; 112 struct timespec lockstat_stime; 113 114 const struct cdevsw lockstat_cdevsw = { 115 lockstat_open, lockstat_close, lockstat_read, nowrite, lockstat_ioctl, 116 nostop, notty, nopoll, nommap, nokqfilter, 0 117 }; 118 119 MALLOC_DEFINE(M_LOCKSTAT, "lockstat", "lockstat event buffers"); 120 121 /* 122 * Called when the pseudo-driver is attached. 123 */ 124 void 125 lockstatattach(int nunits) 126 { 127 128 (void)nunits; 129 130 __cpu_simple_lock_init(&lockstat_slock.lock_data); 131 } 132 133 /* 134 * Grab the global lock. If busy is set, we want to block out operations on 135 * the control device. 136 */ 137 static inline int 138 lockstat_lock(int busy) 139 { 140 141 if (!__cpu_simple_lock_try(&lockstat_slock.lock_data)) 142 return (EBUSY); 143 if (busy) { 144 if (lockstat_busy) { 145 __cpu_simple_unlock(&lockstat_slock.lock_data); 146 return (EBUSY); 147 } 148 lockstat_busy = 1; 149 } 150 KASSERT(lockstat_busy); 151 152 return 0; 153 } 154 155 /* 156 * Release the global lock. If unbusy is set, we want to allow new 157 * operations on the control device. 158 */ 159 static inline void 160 lockstat_unlock(int unbusy) 161 { 162 163 KASSERT(lockstat_busy); 164 if (unbusy) 165 lockstat_busy = 0; 166 __cpu_simple_unlock(&lockstat_slock.lock_data); 167 } 168 169 /* 170 * Prepare the per-CPU tables for use, or clear down tables when tracing is 171 * stopped. 172 */ 173 void 174 lockstat_init_tables(lsenable_t *le) 175 { 176 int i, ncpu, per, slop, cpuno; 177 CPU_INFO_ITERATOR cii; 178 struct cpu_info *ci; 179 lscpu_t *lc; 180 lsbuf_t *lb; 181 182 KASSERT(!lockstat_enabled); 183 184 ncpu = 0; 185 for (CPU_INFO_FOREACH(cii, ci)) { 186 if (ci->ci_lockstat != NULL) { 187 free(ci->ci_lockstat, M_LOCKSTAT); 188 ci->ci_lockstat = NULL; 189 } 190 ncpu++; 191 } 192 193 if (le == NULL) 194 return; 195 196 lb = lockstat_baseb; 197 per = le->le_nbufs / ncpu; 198 slop = le->le_nbufs - (per * ncpu); 199 cpuno = 0; 200 for (CPU_INFO_FOREACH(cii, ci)) { 201 lc = malloc(sizeof(*lc), M_LOCKSTAT, M_WAITOK); 202 lc->lc_overflow = 0; 203 ci->ci_lockstat = lc; 204 205 SLIST_INIT(&lc->lc_free); 206 for (i = 0; i < LOCKSTAT_HASH_SIZE; i++) 207 LIST_INIT(&lc->lc_hash[i]); 208 209 for (i = per; i != 0; i--, lb++) { 210 lb->lb_cpu = (uint16_t)cpuno; 211 SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist); 212 } 213 if (--slop > 0) { 214 lb->lb_cpu = (uint16_t)cpuno; 215 SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist); 216 lb++; 217 } 218 cpuno++; 219 } 220 } 221 222 /* 223 * Start collecting lock statistics. 224 */ 225 void 226 lockstat_start(lsenable_t *le) 227 { 228 229 KASSERT(!lockstat_enabled); 230 231 lockstat_init_tables(le); 232 233 if ((le->le_flags & LE_CALLSITE) != 0) 234 lockstat_csmask = (uintptr_t)-1LL; 235 else 236 lockstat_csmask = 0; 237 238 lockstat_csstart = le->le_csstart; 239 lockstat_csend = le->le_csend; 240 lockstat_lockaddr = le->le_lock; 241 242 /* 243 * Force a write barrier. XXX This may not be sufficient.. 244 */ 245 lockstat_unlock(0); 246 tsleep(&lockstat_start, PPAUSE, "lockstat", mstohz(10)); 247 (void)lockstat_lock(0); 248 249 getnanotime(&lockstat_stime); 250 lockstat_enabled = le->le_mask; 251 lockstat_unlock(0); 252 (void)lockstat_lock(0); 253 } 254 255 /* 256 * Stop collecting lock statistics. 257 */ 258 int 259 lockstat_stop(lsdisable_t *ld) 260 { 261 CPU_INFO_ITERATOR cii; 262 struct cpu_info *ci; 263 u_int cpuno, overflow; 264 struct timespec ts; 265 int error; 266 267 KASSERT(lockstat_enabled); 268 269 /* 270 * Set enabled false, force a write barrier, and wait for other CPUs 271 * to exit lockstat_event(). XXX This may not be sufficient.. 272 */ 273 lockstat_enabled = 0; 274 lockstat_unlock(0); 275 getnanotime(&ts); 276 tsleep(&lockstat_stop, PPAUSE, "lockstat", mstohz(10)); 277 (void)lockstat_lock(0); 278 279 /* 280 * Did we run out of buffers while tracing? 281 */ 282 overflow = 0; 283 for (CPU_INFO_FOREACH(cii, ci)) 284 overflow += ((lscpu_t *)ci->ci_lockstat)->lc_overflow; 285 286 if (overflow != 0) { 287 error = EOVERFLOW; 288 log(LOG_NOTICE, "lockstat: %d buffer allocations failed\n", 289 overflow); 290 } else 291 error = 0; 292 293 lockstat_init_tables(NULL); 294 295 if (ld == NULL) 296 return (error); 297 298 /* 299 * Fill out the disable struct for the caller. 300 */ 301 timespecsub(&ts, &lockstat_stime, &ld->ld_time); 302 ld->ld_size = lockstat_sizeb; 303 304 cpuno = 0; 305 for (CPU_INFO_FOREACH(cii, ci)) { 306 if (cpuno > sizeof(ld->ld_freq) / sizeof(ld->ld_freq[0])) { 307 log(LOG_WARNING, "lockstat: too many CPUs\n"); 308 break; 309 } 310 ld->ld_freq[cpuno++] = cpu_frequency(ci); 311 } 312 313 return (error); 314 } 315 316 /* 317 * Allocate buffers for lockstat_start(). 318 */ 319 int 320 lockstat_alloc(lsenable_t *le) 321 { 322 lsbuf_t *lb; 323 size_t sz; 324 325 KASSERT(!lockstat_enabled); 326 lockstat_free(); 327 328 sz = sizeof(*lb) * le->le_nbufs; 329 330 lockstat_unlock(0); 331 lb = malloc(sz, M_LOCKSTAT, M_WAITOK | M_ZERO); 332 (void)lockstat_lock(0); 333 334 if (lb == NULL) 335 return (ENOMEM); 336 337 KASSERT(!lockstat_enabled); 338 KASSERT(lockstat_baseb == NULL); 339 lockstat_sizeb = sz; 340 lockstat_baseb = lb; 341 342 return (0); 343 } 344 345 /* 346 * Free allocated buffers after tracing has stopped. 347 */ 348 void 349 lockstat_free(void) 350 { 351 352 KASSERT(!lockstat_enabled); 353 354 if (lockstat_baseb != NULL) { 355 free(lockstat_baseb, M_LOCKSTAT); 356 lockstat_baseb = NULL; 357 } 358 } 359 360 /* 361 * Main entry point from lock primatives. 362 */ 363 void 364 lockstat_event(uintptr_t lock, uintptr_t callsite, u_int flags, u_int count, 365 uint64_t time) 366 { 367 lslist_t *ll; 368 lscpu_t *lc; 369 lsbuf_t *lb; 370 u_int event; 371 int s; 372 373 if ((flags & lockstat_enabled) != flags || count == 0) 374 return; 375 if (lockstat_lockaddr != 0 && lock != lockstat_lockaddr) 376 return; 377 if (callsite < lockstat_csstart || callsite > lockstat_csend) 378 return; 379 380 callsite &= lockstat_csmask; 381 382 /* 383 * Find the table for this lock+callsite pair, and try to locate a 384 * buffer with the same key. 385 */ 386 lc = curcpu()->ci_lockstat; 387 ll = &lc->lc_hash[LOCKSTAT_HASH(lock ^ callsite)]; 388 event = (flags & LB_EVENT_MASK) - 1; 389 s = spllock(); 390 391 LIST_FOREACH(lb, ll, lb_chain.list) { 392 if (lb->lb_lock == lock && lb->lb_callsite == callsite) 393 break; 394 } 395 396 if (lb != NULL) { 397 /* 398 * We found a record. Move it to the front of the list, as 399 * we're likely to hit it again soon. 400 */ 401 if (lb != LIST_FIRST(ll)) { 402 LIST_REMOVE(lb, lb_chain.list); 403 LIST_INSERT_HEAD(ll, lb, lb_chain.list); 404 } 405 lb->lb_counts[event] += count; 406 lb->lb_times[event] += time; 407 } else if ((lb = SLIST_FIRST(&lc->lc_free)) != NULL) { 408 /* 409 * Pinch a new buffer and fill it out. 410 */ 411 SLIST_REMOVE_HEAD(&lc->lc_free, lb_chain.slist); 412 LIST_INSERT_HEAD(ll, lb, lb_chain.list); 413 lb->lb_flags = (uint16_t)flags; 414 lb->lb_lock = lock; 415 lb->lb_callsite = callsite; 416 lb->lb_counts[event] = count; 417 lb->lb_times[event] = time; 418 } else { 419 /* 420 * We didn't find a buffer and there were none free. 421 * lockstat_stop() will notice later on and report the 422 * error. 423 */ 424 lc->lc_overflow++; 425 } 426 427 splx(s); 428 } 429 430 /* 431 * Accept an open() on /dev/lockstat. 432 */ 433 int 434 lockstat_open(dev_t dev __unused, int flag __unused, int mode __unused, 435 struct lwp *l __unused) 436 { 437 int error; 438 439 if ((error = lockstat_lock(1)) != 0) 440 return error; 441 442 if (lockstat_devopen) 443 error = EBUSY; 444 else { 445 lockstat_devopen = 1; 446 error = 0; 447 } 448 449 lockstat_unlock(1); 450 451 return error; 452 } 453 454 /* 455 * Accept the last close() on /dev/lockstat. 456 */ 457 int 458 lockstat_close(dev_t dev __unused, int flag __unused, int mode __unused, 459 struct lwp *l __unused) 460 { 461 int error; 462 463 if ((error = lockstat_lock(1)) == 0) { 464 if (lockstat_enabled) 465 (void)lockstat_stop(NULL); 466 lockstat_free(); 467 lockstat_devopen = 0; 468 lockstat_unlock(1); 469 } 470 471 return error; 472 } 473 474 /* 475 * Handle control operations. 476 */ 477 int 478 lockstat_ioctl(dev_t dev __unused, u_long cmd, caddr_t data, 479 int flag __unused, struct lwp *l __unused) 480 { 481 lsenable_t *le; 482 int error; 483 484 if ((error = lockstat_lock(1)) != 0) 485 return error; 486 487 switch (cmd) { 488 case IOC_LOCKSTAT_GVERSION: 489 *(int *)data = LS_VERSION; 490 error = 0; 491 break; 492 493 case IOC_LOCKSTAT_ENABLE: 494 le = (lsenable_t *)data; 495 496 if (!cpu_hascounter()) { 497 error = ENODEV; 498 break; 499 } 500 if (lockstat_enabled) { 501 error = EBUSY; 502 break; 503 } 504 505 /* 506 * Sanitize the arguments passed in and set up filtering. 507 */ 508 if (le->le_nbufs == 0) 509 le->le_nbufs = LOCKSTAT_DEFBUFS; 510 else if (le->le_nbufs > LOCKSTAT_MAXBUFS || 511 le->le_nbufs < LOCKSTAT_MINBUFS) { 512 error = EINVAL; 513 break; 514 } 515 if ((le->le_flags & LE_ONE_CALLSITE) == 0) { 516 le->le_csstart = 0; 517 le->le_csend = le->le_csstart - 1; 518 } 519 if ((le->le_flags & LE_ONE_LOCK) == 0) 520 le->le_lock = 0; 521 if ((le->le_mask & LB_EVENT_MASK) == 0) 522 return (EINVAL); 523 if ((le->le_mask & LB_LOCK_MASK) == 0) 524 return (EINVAL); 525 526 /* 527 * Start tracing. 528 */ 529 if ((error = lockstat_alloc(le)) == 0) 530 lockstat_start(le); 531 break; 532 533 case IOC_LOCKSTAT_DISABLE: 534 if (!lockstat_enabled) 535 error = EINVAL; 536 else 537 error = lockstat_stop((lsdisable_t *)data); 538 break; 539 540 default: 541 error = ENOTTY; 542 break; 543 } 544 545 lockstat_unlock(1); 546 return error; 547 } 548 549 /* 550 * Copy buffers out to user-space. 551 */ 552 int 553 lockstat_read(dev_t dev __unused, struct uio *uio, int flag __unused) 554 { 555 int error; 556 557 if ((error = lockstat_lock(1)) != 0) 558 return (error); 559 560 if (lockstat_enabled) { 561 lockstat_unlock(1); 562 return (EBUSY); 563 } 564 565 lockstat_unlock(0); 566 error = uiomove(lockstat_baseb, lockstat_sizeb, uio); 567 lockstat_lock(0); 568 569 lockstat_unlock(1); 570 571 return (error); 572 } 573