1 /* $NetBSD: profile.c,v 1.4 2012/12/07 03:11:17 chs Exp $ */ 2 3 /* 4 * CDDL HEADER START 5 * 6 * The contents of this file are subject to the terms of the 7 * Common Development and Distribution License (the "License"). 8 * You may not use this file except in compliance with the License. 9 * 10 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 11 * or http://www.opensolaris.org/os/licensing. 12 * See the License for the specific language governing permissions 13 * and limitations under the License. 14 * 15 * When distributing Covered Code, include this CDDL HEADER in each 16 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 17 * If applicable, add the following below this CDDL HEADER, with the 18 * fields enclosed by brackets "[]" replaced with your own identifying 19 * information: Portions Copyright [yyyy] [name of copyright owner] 20 * 21 * CDDL HEADER END 22 * 23 * Portions Copyright 2006-2008 John Birrell jb@freebsd.org 24 * 25 * $FreeBSD: src/sys/cddl/dev/profile/profile.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ 26 * 27 */ 28 29 /* 30 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 31 * Use is subject to license terms. 32 */ 33 34 #include <sys/cdefs.h> 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/conf.h> 38 #include <sys/cpuvar.h> 39 #include <sys/fcntl.h> 40 #include <sys/filio.h> 41 #ifdef __FreeBSD__ 42 #include <sys/kdb.h> 43 #endif 44 #include <sys/kernel.h> 45 #include <sys/kmem.h> 46 #include <sys/kthread.h> 47 #include <sys/limits.h> 48 #include <sys/linker.h> 49 #include <sys/lock.h> 50 #include <sys/malloc.h> 51 #include <sys/module.h> 52 #include <sys/mutex.h> 53 #include <sys/poll.h> 54 #include <sys/proc.h> 55 #include <sys/selinfo.h> 56 #ifdef __FreeBSD__ 57 #include <sys/smp.h> 58 #endif 59 #include <sys/uio.h> 60 #include <sys/unistd.h> 61 62 #ifdef __NetBSD__ 63 #include <sys/atomic.h> 64 #include <sys/cpu.h> 65 #define ASSERT(x) KASSERT(x) 66 #endif 67 68 #include <sys/cyclic.h> 69 #include <sys/dtrace.h> 70 #include <sys/dtrace_bsd.h> 71 72 #define PROF_NAMELEN 15 73 74 #define PROF_PROFILE 0 75 #define PROF_TICK 1 76 #define PROF_PREFIX_PROFILE "profile-" 77 #define PROF_PREFIX_TICK "tick-" 78 79 /* 80 * Regardless of platform, there are five artificial frames in the case of the 81 * profile provider: 82 * 83 * profile_fire 84 * cyclic_expire 85 * cyclic_fire 86 * [ cbe ] 87 * [ locore ] 88 * 89 * On amd64, there are two frames associated with locore: one in locore, and 90 * another in common interrupt dispatch code. (i386 has not been modified to 91 * use this common layer.) Further, on i386, the interrupted instruction 92 * appears as its own stack frame. All of this means that we need to add one 93 * frame for amd64, and then take one away for both amd64 and i386. 94 * 95 * On SPARC, the picture is further complicated because the compiler 96 * optimizes away tail-calls -- so the following frames are optimized away: 97 * 98 * profile_fire 99 * cyclic_expire 100 * 101 * This gives three frames. However, on DEBUG kernels, the cyclic_expire 102 * frame cannot be tail-call eliminated, yielding four frames in this case. 103 * 104 * All of the above constraints lead to the mess below. Yes, the profile 105 * provider should ideally figure this out on-the-fly by hiting one of its own 106 * probes and then walking its own stack trace. This is complicated, however, 107 * and the static definition doesn't seem to be overly brittle. Still, we 108 * allow for a manual override in case we get it completely wrong. 109 */ 110 #ifdef __FreeBSD__ 111 #ifdef __amd64 112 #define PROF_ARTIFICIAL_FRAMES 7 113 #else 114 #ifdef __i386 115 #define PROF_ARTIFICIAL_FRAMES 6 116 #else 117 #ifdef __sparc 118 #ifdef DEBUG 119 #define PROF_ARTIFICIAL_FRAMES 4 120 #else 121 #define PROF_ARTIFICIAL_FRAMES 3 122 #endif 123 #endif 124 #endif 125 #endif 126 #endif 127 128 #ifdef __NetBSD__ 129 #define PROF_ARTIFICIAL_FRAMES 3 130 #endif 131 132 typedef struct profile_probe { 133 char prof_name[PROF_NAMELEN]; 134 dtrace_id_t prof_id; 135 int prof_kind; 136 hrtime_t prof_interval; 137 cyclic_id_t prof_cyclic; 138 } profile_probe_t; 139 140 typedef struct profile_probe_percpu { 141 hrtime_t profc_expected; 142 hrtime_t profc_interval; 143 profile_probe_t *profc_probe; 144 } profile_probe_percpu_t; 145 146 #ifdef __FreeBSD__ 147 static d_open_t profile_open; 148 #endif 149 static int profile_unload(void); 150 static void profile_create(hrtime_t, char *, int); 151 static void profile_destroy(void *, dtrace_id_t, void *); 152 static int profile_enable(void *, dtrace_id_t, void *); 153 static void profile_disable(void *, dtrace_id_t, void *); 154 static void profile_load(void *); 155 static void profile_provide(void *, const dtrace_probedesc_t *); 156 157 static int profile_rates[] = { 158 97, 199, 499, 997, 1999, 159 4001, 4999, 0, 0, 0, 160 0, 0, 0, 0, 0, 161 0, 0, 0, 0, 0 162 }; 163 164 static int profile_ticks[] = { 165 1, 10, 100, 500, 1000, 166 5000, 0, 0, 0, 0, 167 0, 0, 0, 0, 0 168 }; 169 170 /* 171 * profile_max defines the upper bound on the number of profile probes that 172 * can exist (this is to prevent malicious or clumsy users from exhausing 173 * system resources by creating a slew of profile probes). At mod load time, 174 * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's 175 * present in the profile.conf file. 176 */ 177 #define PROFILE_MAX_DEFAULT 1000 /* default max. number of probes */ 178 static uint32_t profile_max = PROFILE_MAX_DEFAULT; 179 /* maximum number of profile probes */ 180 static uint32_t profile_total; /* current number of profile probes */ 181 182 #ifdef __FreeBSD__ 183 static struct cdevsw profile_cdevsw = { 184 .d_version = D_VERSION, 185 .d_open = profile_open, 186 .d_name = "profile", 187 }; 188 #endif 189 190 static dtrace_pattr_t profile_attr = { 191 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 192 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 193 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 194 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 195 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 196 }; 197 198 static dtrace_pops_t profile_pops = { 199 profile_provide, 200 NULL, 201 profile_enable, 202 profile_disable, 203 NULL, 204 NULL, 205 NULL, 206 NULL, 207 NULL, 208 profile_destroy 209 }; 210 211 #ifdef __FreeBSD__ 212 static struct cdev *profile_cdev; 213 #endif 214 static dtrace_provider_id_t profile_id; 215 static hrtime_t profile_interval_min = NANOSEC / 5000; /* 5000 hz */ 216 static int profile_aframes = 0; /* override */ 217 218 static void 219 profile_fire(void *arg) 220 { 221 profile_probe_percpu_t *pcpu = arg; 222 profile_probe_t *prof = pcpu->profc_probe; 223 hrtime_t late; 224 solaris_cpu_t *c = &solaris_cpu[cpu_number()]; 225 226 late = gethrtime() - pcpu->profc_expected; 227 pcpu->profc_expected += pcpu->profc_interval; 228 229 dtrace_probe(prof->prof_id, c->cpu_profile_pc, 230 c->cpu_profile_upc, late, 0, 0); 231 } 232 233 static void 234 profile_tick(void *arg) 235 { 236 profile_probe_t *prof = arg; 237 solaris_cpu_t *c = &solaris_cpu[cpu_number()]; 238 239 dtrace_probe(prof->prof_id, c->cpu_profile_pc, 240 c->cpu_profile_upc, 0, 0, 0); 241 } 242 243 static void 244 profile_create(hrtime_t interval, char *name, int kind) 245 { 246 profile_probe_t *prof; 247 248 if (interval < profile_interval_min) 249 return; 250 251 if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0) 252 return; 253 254 atomic_add_32(&profile_total, 1); 255 if (profile_total > profile_max) { 256 atomic_add_32(&profile_total, -1); 257 return; 258 } 259 260 prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP); 261 (void) strcpy(prof->prof_name, name); 262 prof->prof_interval = interval; 263 prof->prof_cyclic = CYCLIC_NONE; 264 prof->prof_kind = kind; 265 prof->prof_id = dtrace_probe_create(profile_id, 266 NULL, NULL, name, 267 profile_aframes ? profile_aframes : PROF_ARTIFICIAL_FRAMES, prof); 268 } 269 270 /*ARGSUSED*/ 271 static void 272 profile_provide(void *arg, const dtrace_probedesc_t *desc) 273 { 274 int i, j, rate, kind; 275 hrtime_t val = 0, mult = 1, len = 0; 276 char *name, *suffix = NULL; 277 278 const struct { 279 const char *prefix; 280 int kind; 281 } types[] = { 282 { PROF_PREFIX_PROFILE, PROF_PROFILE }, 283 { PROF_PREFIX_TICK, PROF_TICK }, 284 { 0, 0 } 285 }; 286 287 const struct { 288 const char *name; 289 hrtime_t mult; 290 } suffixes[] = { 291 { "ns", NANOSEC / NANOSEC }, 292 { "nsec", NANOSEC / NANOSEC }, 293 { "us", NANOSEC / MICROSEC }, 294 { "usec", NANOSEC / MICROSEC }, 295 { "ms", NANOSEC / MILLISEC }, 296 { "msec", NANOSEC / MILLISEC }, 297 { "s", NANOSEC / SEC }, 298 { "sec", NANOSEC / SEC }, 299 { "m", NANOSEC * (hrtime_t)60 }, 300 { "min", NANOSEC * (hrtime_t)60 }, 301 { "h", NANOSEC * (hrtime_t)(60 * 60) }, 302 { "hour", NANOSEC * (hrtime_t)(60 * 60) }, 303 { "d", NANOSEC * (hrtime_t)(24 * 60 * 60) }, 304 { "day", NANOSEC * (hrtime_t)(24 * 60 * 60) }, 305 { "hz", 0 }, 306 { NULL, 0 } 307 }; 308 309 if (desc == NULL) { 310 char n[PROF_NAMELEN]; 311 312 /* 313 * If no description was provided, provide all of our probes. 314 */ 315 for (i = 0; i < sizeof (profile_rates) / sizeof (int); i++) { 316 if ((rate = profile_rates[i]) == 0) 317 continue; 318 319 (void) snprintf(n, PROF_NAMELEN, "%s%d", 320 PROF_PREFIX_PROFILE, rate); 321 profile_create(NANOSEC / rate, n, PROF_PROFILE); 322 } 323 324 for (i = 0; i < sizeof (profile_ticks) / sizeof (int); i++) { 325 if ((rate = profile_ticks[i]) == 0) 326 continue; 327 328 (void) snprintf(n, PROF_NAMELEN, "%s%d", 329 PROF_PREFIX_TICK, rate); 330 profile_create(NANOSEC / rate, n, PROF_TICK); 331 } 332 333 return; 334 } 335 336 name = (char *)desc->dtpd_name; 337 338 for (i = 0; types[i].prefix != NULL; i++) { 339 len = strlen(types[i].prefix); 340 341 if (strncmp(name, types[i].prefix, len) != 0) 342 continue; 343 break; 344 } 345 346 if (types[i].prefix == NULL) 347 return; 348 349 kind = types[i].kind; 350 j = strlen(name) - len; 351 352 /* 353 * We need to start before any time suffix. 354 */ 355 for (j = strlen(name); j >= len; j--) { 356 if (name[j] >= '0' && name[j] <= '9') 357 break; 358 suffix = &name[j]; 359 } 360 361 ASSERT(suffix != NULL); 362 363 /* 364 * Now determine the numerical value present in the probe name. 365 */ 366 for (; j >= len; j--) { 367 if (name[j] < '0' || name[j] > '9') 368 return; 369 370 val += (name[j] - '0') * mult; 371 mult *= (hrtime_t)10; 372 } 373 374 if (val == 0) 375 return; 376 377 /* 378 * Look-up the suffix to determine the multiplier. 379 */ 380 for (i = 0, mult = 0; suffixes[i].name != NULL; i++) { 381 if (strcasecmp(suffixes[i].name, suffix) == 0) { 382 mult = suffixes[i].mult; 383 break; 384 } 385 } 386 387 if (suffixes[i].name == NULL && *suffix != '\0') 388 return; 389 390 if (mult == 0) { 391 /* 392 * The default is frequency-per-second. 393 */ 394 val = NANOSEC / val; 395 } else { 396 val *= mult; 397 } 398 399 profile_create(val, name, kind); 400 } 401 402 /* ARGSUSED */ 403 static void 404 profile_destroy(void *arg, dtrace_id_t id, void *parg) 405 { 406 profile_probe_t *prof = parg; 407 408 ASSERT(prof->prof_cyclic == CYCLIC_NONE); 409 kmem_free(prof, sizeof (profile_probe_t)); 410 411 ASSERT(profile_total >= 1); 412 atomic_add_32(&profile_total, -1); 413 } 414 415 /*ARGSUSED*/ 416 static void 417 profile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when) 418 { 419 profile_probe_t *prof = arg; 420 profile_probe_percpu_t *pcpu; 421 422 pcpu = kmem_zalloc(sizeof (profile_probe_percpu_t), KM_SLEEP); 423 pcpu->profc_probe = prof; 424 425 hdlr->cyh_func = profile_fire; 426 hdlr->cyh_arg = pcpu; 427 428 when->cyt_interval = prof->prof_interval; 429 when->cyt_when = gethrtime() + when->cyt_interval; 430 431 pcpu->profc_expected = when->cyt_when; 432 pcpu->profc_interval = when->cyt_interval; 433 } 434 435 /*ARGSUSED*/ 436 static void 437 profile_offline(void *arg, cpu_t *cpu, void *oarg) 438 { 439 profile_probe_percpu_t *pcpu = oarg; 440 441 ASSERT(pcpu->profc_probe == arg); 442 kmem_free(pcpu, sizeof (profile_probe_percpu_t)); 443 } 444 445 /* ARGSUSED */ 446 static int 447 profile_enable(void *arg, dtrace_id_t id, void *parg) 448 { 449 profile_probe_t *prof = parg; 450 cyc_omni_handler_t omni; 451 cyc_handler_t hdlr; 452 cyc_time_t when; 453 454 ASSERT(prof->prof_interval != 0); 455 ASSERT(MUTEX_HELD(&cpu_lock)); 456 457 if (prof->prof_kind == PROF_TICK) { 458 hdlr.cyh_func = profile_tick; 459 hdlr.cyh_arg = prof; 460 461 when.cyt_interval = prof->prof_interval; 462 when.cyt_when = gethrtime() + when.cyt_interval; 463 } else { 464 ASSERT(prof->prof_kind == PROF_PROFILE); 465 omni.cyo_online = profile_online; 466 omni.cyo_offline = profile_offline; 467 omni.cyo_arg = prof; 468 } 469 470 if (prof->prof_kind == PROF_TICK) { 471 prof->prof_cyclic = cyclic_add(&hdlr, &when); 472 } else { 473 prof->prof_cyclic = cyclic_add_omni(&omni); 474 } 475 return 0; 476 } 477 478 /* ARGSUSED */ 479 static void 480 profile_disable(void *arg, dtrace_id_t id, void *parg) 481 { 482 profile_probe_t *prof = parg; 483 484 ASSERT(prof->prof_cyclic != CYCLIC_NONE); 485 ASSERT(MUTEX_HELD(&cpu_lock)); 486 487 cyclic_remove(prof->prof_cyclic); 488 prof->prof_cyclic = CYCLIC_NONE; 489 } 490 491 static void 492 profile_load(void *dummy) 493 { 494 #ifdef __FreeBSD__ 495 /* Create the /dev/dtrace/profile entry. */ 496 profile_cdev = make_dev(&profile_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, 497 "dtrace/profile"); 498 #endif 499 500 if (dtrace_register("profile", &profile_attr, DTRACE_PRIV_USER, 501 NULL, &profile_pops, NULL, &profile_id) != 0) 502 return; 503 } 504 505 506 static int 507 profile_unload() 508 { 509 int error = 0; 510 511 if ((error = dtrace_unregister(profile_id)) != 0) 512 return (error); 513 514 #ifdef __FreeBSD__ 515 destroy_dev(profile_cdev); 516 #endif 517 518 return (error); 519 } 520 521 #ifdef __FreeBSD__ 522 523 /* ARGSUSED */ 524 static int 525 profile_modevent(module_t mod __unused, int type, void *data __unused) 526 { 527 int error = 0; 528 529 switch (type) { 530 case MOD_LOAD: 531 break; 532 533 case MOD_UNLOAD: 534 break; 535 536 case MOD_SHUTDOWN: 537 break; 538 539 default: 540 error = EOPNOTSUPP; 541 break; 542 543 } 544 return (error); 545 } 546 547 /* ARGSUSED */ 548 static int 549 profile_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused) 550 { 551 return (0); 552 } 553 554 SYSINIT(profile_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, profile_load, NULL); 555 SYSUNINIT(profile_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, profile_unload, NULL); 556 557 DEV_MODULE(profile, profile_modevent, NULL); 558 MODULE_VERSION(profile, 1); 559 MODULE_DEPEND(profile, dtrace, 1, 1, 1); 560 MODULE_DEPEND(profile, cyclic, 1, 1, 1); 561 MODULE_DEPEND(profile, opensolaris, 1, 1, 1); 562 563 #endif 564 565 #ifdef __NetBSD__ 566 567 static int 568 profile_modcmd(modcmd_t cmd, void *data) 569 { 570 switch (cmd) { 571 case MODULE_CMD_INIT: 572 profile_load(NULL); 573 return 0; 574 575 case MODULE_CMD_FINI: 576 profile_unload(); 577 return 0; 578 579 default: 580 return ENOTTY; 581 } 582 } 583 584 MODULE(MODULE_CLASS_MISC, profile, "dtrace,cyclic"); 585 586 #endif 587