1da5b7e90SMitchell Horne /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 3da5b7e90SMitchell Horne * 4da5b7e90SMitchell Horne * Copyright (c) 2008-2022 NetApp, Inc. 5da5b7e90SMitchell Horne * 6da5b7e90SMitchell Horne * Redistribution and use in source and binary forms, with or without 7da5b7e90SMitchell Horne * modification, are permitted provided that the following conditions 8da5b7e90SMitchell Horne * are met: 9da5b7e90SMitchell Horne * 1. Redistributions of source code must retain the above copyright 10da5b7e90SMitchell Horne * notice, this list of conditions and the following disclaimer. 11da5b7e90SMitchell Horne * 2. Redistributions in binary form must reproduce the above copyright 12da5b7e90SMitchell Horne * notice, this list of conditions and the following disclaimer in the 13da5b7e90SMitchell Horne * documentation and/or other materials provided with the distribution. 14da5b7e90SMitchell Horne * 15da5b7e90SMitchell Horne * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16da5b7e90SMitchell Horne * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17da5b7e90SMitchell Horne * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18da5b7e90SMitchell Horne * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19da5b7e90SMitchell Horne * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20da5b7e90SMitchell Horne * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21da5b7e90SMitchell Horne * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22da5b7e90SMitchell Horne * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23da5b7e90SMitchell Horne * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24da5b7e90SMitchell Horne * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25da5b7e90SMitchell Horne */ 26da5b7e90SMitchell Horne 27da5b7e90SMitchell Horne #include <sys/param.h> 28da5b7e90SMitchell Horne #include <sys/systm.h> 29da5b7e90SMitchell Horne #include <sys/boottrace.h> 30da5b7e90SMitchell Horne #include <sys/kernel.h> 31da5b7e90SMitchell Horne #include <sys/malloc.h> 32da5b7e90SMitchell Horne #include <sys/pcpu.h> 33da5b7e90SMitchell Horne #include <sys/proc.h> 34da5b7e90SMitchell Horne #include <sys/resourcevar.h> 35da5b7e90SMitchell Horne #include <sys/sbuf.h> 36da5b7e90SMitchell Horne #include <sys/syscallsubr.h> 37da5b7e90SMitchell Horne #include <sys/sysctl.h> 38da5b7e90SMitchell Horne #include <sys/time.h> 39da5b7e90SMitchell Horne 40da5b7e90SMitchell Horne #include <machine/stdarg.h> 41da5b7e90SMitchell Horne 42da5b7e90SMitchell Horne #define dprintf(fmt, ...) \ 43da5b7e90SMitchell Horne do { \ 44da5b7e90SMitchell Horne if (dotrace_debugging) \ 45da5b7e90SMitchell Horne printf(fmt, ##__VA_ARGS__); \ 46da5b7e90SMitchell Horne } while (0); 47da5b7e90SMitchell Horne 48da5b7e90SMitchell Horne static MALLOC_DEFINE(M_BOOTTRACE, "boottrace", "memory for boot tracing"); 49da5b7e90SMitchell Horne 50da5b7e90SMitchell Horne #define BT_TABLE_DEFSIZE 3000 51da5b7e90SMitchell Horne #define BT_TABLE_RUNSIZE 2000 52da5b7e90SMitchell Horne #define BT_TABLE_SHTSIZE 1000 53da5b7e90SMitchell Horne #define BT_TABLE_MINSIZE 500 54da5b7e90SMitchell Horne 55da5b7e90SMitchell Horne /* 56da5b7e90SMitchell Horne * Boot-time & shutdown-time event. 57da5b7e90SMitchell Horne */ 58da5b7e90SMitchell Horne struct bt_event { 59da5b7e90SMitchell Horne uint64_t tsc; /* Timestamp */ 60da5b7e90SMitchell Horne uint64_t tick; /* Kernel tick */ 61da5b7e90SMitchell Horne uint64_t cputime; /* Microseconds of process CPU time */ 62da5b7e90SMitchell Horne uint32_t cpuid; /* CPU on which the event ran */ 63da5b7e90SMitchell Horne uint32_t inblock; /* # of blocks in */ 64da5b7e90SMitchell Horne uint32_t oublock; /* # of blocks out */ 65da5b7e90SMitchell Horne pid_t pid; /* Current PID */ 66da5b7e90SMitchell Horne char name[BT_EVENT_NAMELEN]; /* Event name */ 67da5b7e90SMitchell Horne char tdname[BT_EVENT_TDNAMELEN]; /* Thread name */ 68da5b7e90SMitchell Horne }; 69da5b7e90SMitchell Horne 70da5b7e90SMitchell Horne struct bt_table { 71da5b7e90SMitchell Horne uint32_t size; /* Trace table size */ 72da5b7e90SMitchell Horne uint32_t curr; /* Trace entry to use */ 73da5b7e90SMitchell Horne uint32_t wrap; /* Wrap-around, instead of dropping */ 74da5b7e90SMitchell Horne uint32_t drops_early; /* Trace entries dropped before init */ 75da5b7e90SMitchell Horne uint32_t drops_full; /* Trace entries dropped after full */ 76da5b7e90SMitchell Horne struct bt_event *table; 77da5b7e90SMitchell Horne }; 78da5b7e90SMitchell Horne 79da5b7e90SMitchell Horne /* Boot-time tracing */ 80da5b7e90SMitchell Horne static struct bt_table bt; 81da5b7e90SMitchell Horne 82da5b7e90SMitchell Horne /* Run-time tracing */ 83da5b7e90SMitchell Horne static struct bt_table rt; 84da5b7e90SMitchell Horne 85da5b7e90SMitchell Horne /* Shutdown-time tracing */ 86da5b7e90SMitchell Horne static struct bt_table st; 87da5b7e90SMitchell Horne 88da5b7e90SMitchell Horne /* Set when system boot is complete, and we've switched to runtime tracing. */ 89da5b7e90SMitchell Horne static bool bootdone; 90da5b7e90SMitchell Horne 91da5b7e90SMitchell Horne /* Set when system shutdown has started. */ 92da5b7e90SMitchell Horne static bool shutdown; 93da5b7e90SMitchell Horne 94da5b7e90SMitchell Horne /* Turn on dotrace() debug logging to console. */ 95da5b7e90SMitchell Horne static bool dotrace_debugging; 96da5b7e90SMitchell Horne TUNABLE_BOOL("debug.boottrace.dotrace_debugging", &dotrace_debugging); 97da5b7e90SMitchell Horne 98da5b7e90SMitchell Horne /* Trace kernel events */ 99da5b7e90SMitchell Horne static bool dotrace_kernel = true; 100da5b7e90SMitchell Horne TUNABLE_BOOL("kern.boottrace.dotrace_kernel", &dotrace_kernel); 101da5b7e90SMitchell Horne 102da5b7e90SMitchell Horne /* Trace userspace events */ 103da5b7e90SMitchell Horne static bool dotrace_user = true; 104da5b7e90SMitchell Horne TUNABLE_BOOL("kern.boottrace.dotrace_user", &dotrace_user); 105da5b7e90SMitchell Horne 106da5b7e90SMitchell Horne static int sysctl_log(SYSCTL_HANDLER_ARGS); 107da5b7e90SMitchell Horne static int sysctl_boottrace(SYSCTL_HANDLER_ARGS); 108da5b7e90SMitchell Horne static int sysctl_runtrace(SYSCTL_HANDLER_ARGS); 109da5b7e90SMitchell Horne static int sysctl_shuttrace(SYSCTL_HANDLER_ARGS); 110da5b7e90SMitchell Horne static int sysctl_boottrace_reset(SYSCTL_HANDLER_ARGS); 111da5b7e90SMitchell Horne 112da5b7e90SMitchell Horne SYSCTL_NODE(_kern, OID_AUTO, boottrace, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 113da5b7e90SMitchell Horne "boottrace statistics"); 114da5b7e90SMitchell Horne 115da5b7e90SMitchell Horne SYSCTL_PROC(_kern_boottrace, OID_AUTO, log, 116da5b7e90SMitchell Horne CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP, 117da5b7e90SMitchell Horne 0, 0, sysctl_log, "A", 118da5b7e90SMitchell Horne "Print a log of the boottrace trace data"); 119da5b7e90SMitchell Horne SYSCTL_PROC(_kern_boottrace, OID_AUTO, boottrace, 120da5b7e90SMitchell Horne CTLTYPE_STRING | CTLFLAG_WR | CTLFLAG_MPSAFE, 121da5b7e90SMitchell Horne 0, 0, sysctl_boottrace, "A", 122da5b7e90SMitchell Horne "Capture a boot-time trace event"); 123da5b7e90SMitchell Horne SYSCTL_PROC(_kern_boottrace, OID_AUTO, runtrace, 124da5b7e90SMitchell Horne CTLTYPE_STRING | CTLFLAG_WR | CTLFLAG_MPSAFE, 125da5b7e90SMitchell Horne 0, 0, sysctl_runtrace, "A", 126da5b7e90SMitchell Horne "Capture a run-time trace event"); 127da5b7e90SMitchell Horne SYSCTL_PROC(_kern_boottrace, OID_AUTO, shuttrace, 128da5b7e90SMitchell Horne CTLTYPE_STRING | CTLFLAG_WR | CTLFLAG_MPSAFE, 129da5b7e90SMitchell Horne 0, 0, sysctl_shuttrace, "A", 130da5b7e90SMitchell Horne "Capture a shutdown-time trace event"); 131da5b7e90SMitchell Horne SYSCTL_PROC(_kern_boottrace, OID_AUTO, reset, 132da5b7e90SMitchell Horne CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_MPSAFE, 133da5b7e90SMitchell Horne 0, 0, sysctl_boottrace_reset, "A", 134da5b7e90SMitchell Horne "Reset run-time tracing table"); 135da5b7e90SMitchell Horne 136da5b7e90SMitchell Horne /* 137da5b7e90SMitchell Horne * Global enable. 138da5b7e90SMitchell Horne */ 139da5b7e90SMitchell Horne bool __read_mostly boottrace_enabled = false; 140da5b7e90SMitchell Horne SYSCTL_BOOL(_kern_boottrace, OID_AUTO, enabled, CTLFLAG_RDTUN, 141da5b7e90SMitchell Horne &boottrace_enabled, 0, 142da5b7e90SMitchell Horne "Boot-time and shutdown-time tracing enabled"); 143da5b7e90SMitchell Horne 144da5b7e90SMitchell Horne /* 145da5b7e90SMitchell Horne * Enable dumping of the shutdown trace entries to console. 146da5b7e90SMitchell Horne */ 147da5b7e90SMitchell Horne bool shutdown_trace = false; 148da5b7e90SMitchell Horne SYSCTL_BOOL(_kern_boottrace, OID_AUTO, shutdown_trace, CTLFLAG_RWTUN, 149da5b7e90SMitchell Horne &shutdown_trace, 0, 150da5b7e90SMitchell Horne "Enable kernel shutdown tracing to the console"); 151da5b7e90SMitchell Horne 152da5b7e90SMitchell Horne /* 153da5b7e90SMitchell Horne * Set the delta threshold (ms) below which events are ignored, used in 154da5b7e90SMitchell Horne * determining what to dump to the console. 155da5b7e90SMitchell Horne */ 156da5b7e90SMitchell Horne static int shutdown_trace_threshold; 157da5b7e90SMitchell Horne SYSCTL_INT(_kern_boottrace, OID_AUTO, shutdown_trace_threshold, CTLFLAG_RWTUN, 158da5b7e90SMitchell Horne &shutdown_trace_threshold, 0, 159da5b7e90SMitchell Horne "Tracing threshold (ms) below which tracing is ignored"); 160da5b7e90SMitchell Horne 16151dc362dSZhenlei Huang SYSCTL_UINT(_kern_boottrace, OID_AUTO, table_size, 16251dc362dSZhenlei Huang CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &bt.size, 0, 16351dc362dSZhenlei Huang "Boot-time tracing table size"); 16451dc362dSZhenlei Huang 165da5b7e90SMitchell Horne /* 166da5b7e90SMitchell Horne * Dump a trace to buffer or if buffer is NULL to console. 167da5b7e90SMitchell Horne * 168da5b7e90SMitchell Horne * Non-zero delta_threshold selectively prints entries based on delta 169da5b7e90SMitchell Horne * with current and previous entry. Otherwise, delta_threshold of 0 170da5b7e90SMitchell Horne * prints every trace entry and delta. 171da5b7e90SMitchell Horne * 172da5b7e90SMitchell Horne * Output something like this: 173da5b7e90SMitchell Horne * 174da5b7e90SMitchell Horne * CPU msecs delta process event 175da5b7e90SMitchell Horne * 11 1228262715 0 init shutdown pre sync begin 176da5b7e90SMitchell Horne * 3 1228265622 2907 init shutdown pre sync complete 177da5b7e90SMitchell Horne * 3 1228265623 0 init shutdown turned swap off 178da5b7e90SMitchell Horne * 18 1228266466 843 init shutdown unmounted all filesystems 179da5b7e90SMitchell Horne * 180da5b7e90SMitchell Horne * How to interpret: 181da5b7e90SMitchell Horne * 182da5b7e90SMitchell Horne * delta column represents the time in milliseconds between this event and the previous. 183da5b7e90SMitchell Horne * Usually that means you can take the previous event, current event, match them 184da5b7e90SMitchell Horne * up in the code, and whatever lies between is the culprit taking time. 185da5b7e90SMitchell Horne * 186da5b7e90SMitchell Horne * For example, above: Pre-Sync is taking 2907ms, and something between swap and unmount 187da5b7e90SMitchell Horne * filesystems is taking 843 milliseconds. 188da5b7e90SMitchell Horne * 189da5b7e90SMitchell Horne * An event with a delta of 0 are 'landmark' events that simply exist in the output 190da5b7e90SMitchell Horne * for the developer to know where the time measurement begins. The 0 is an arbitrary 191da5b7e90SMitchell Horne * number that can effectively be ignored. 192da5b7e90SMitchell Horne */ 193da5b7e90SMitchell Horne #define BTC_DELTA_PRINT(bte, msecs, delta) do { \ 194da5b7e90SMitchell Horne if (sbp) { \ 195da5b7e90SMitchell Horne sbuf_printf(sbp, fmt, (bte)->cpuid, msecs, delta, \ 196da5b7e90SMitchell Horne (bte)->tdname, (bte)->name, (bte)->pid, \ 197da5b7e90SMitchell Horne (bte)->cputime / 1000000, \ 198da5b7e90SMitchell Horne ((bte)->cputime % 1000000) / 10000, \ 199da5b7e90SMitchell Horne (bte)->inblock, (bte)->oublock); \ 200da5b7e90SMitchell Horne } else { \ 201da5b7e90SMitchell Horne printf(fmt, (bte)->cpuid, msecs, delta, \ 202da5b7e90SMitchell Horne (bte)->tdname, (bte)->name, (bte)->pid, \ 203da5b7e90SMitchell Horne (bte)->cputime / 1000000, \ 204da5b7e90SMitchell Horne ((bte)->cputime % 1000000) / 10000, \ 205da5b7e90SMitchell Horne (bte)->inblock, (bte)->oublock); \ 206da5b7e90SMitchell Horne } \ 207da5b7e90SMitchell Horne } while (0) 208da5b7e90SMitchell Horne 209da5b7e90SMitchell Horne /* 210da5b7e90SMitchell Horne * Print the trace entries to the message buffer, or to an sbuf, if provided. 211da5b7e90SMitchell Horne * 212da5b7e90SMitchell Horne * Entries with a difference less than dthres will not be printed. 213da5b7e90SMitchell Horne */ 214da5b7e90SMitchell Horne static void 215da5b7e90SMitchell Horne boottrace_display(struct sbuf *sbp, struct bt_table *btp, uint64_t dthres) 216da5b7e90SMitchell Horne { 217da5b7e90SMitchell Horne struct bt_event *evtp; 218da5b7e90SMitchell Horne struct bt_event *last_evtp; 219da5b7e90SMitchell Horne uint64_t msecs; 220da5b7e90SMitchell Horne uint64_t first_msecs; 221da5b7e90SMitchell Horne uint64_t last_msecs; 222da5b7e90SMitchell Horne uint64_t dmsecs; 223da5b7e90SMitchell Horne uint64_t last_dmsecs; 224da5b7e90SMitchell Horne uint64_t total_dmsecs; 225da5b7e90SMitchell Horne uint32_t i; 226da5b7e90SMitchell Horne uint32_t curr; 227da5b7e90SMitchell Horne const char *fmt = "%3u %10llu %10llu %-24s %-40s %5d %4d.%02d %5u %5u\n"; 228da5b7e90SMitchell Horne const char *hdr_fmt = "\n\n%3s %10s %10s %-24s %-40s %5s %6s %5s %5s\n"; 229da5b7e90SMitchell Horne bool printed; 230da5b7e90SMitchell Horne bool last_printed; 231da5b7e90SMitchell Horne 232da5b7e90SMitchell Horne /* Print the header */ 233da5b7e90SMitchell Horne if (sbp != NULL) 234da5b7e90SMitchell Horne sbuf_printf(sbp, hdr_fmt, 235da5b7e90SMitchell Horne "CPU", "msecs", "delta", "process", 236da5b7e90SMitchell Horne "event", "PID", "CPUtime", "IBlks", "OBlks"); 237da5b7e90SMitchell Horne else 238da5b7e90SMitchell Horne printf(hdr_fmt, 239da5b7e90SMitchell Horne "CPU", "msecs", "delta", "process", 240da5b7e90SMitchell Horne "event", "PID", "CPUtime", "IBlks", "OBlks"); 241da5b7e90SMitchell Horne 242da5b7e90SMitchell Horne first_msecs = 0; 243da5b7e90SMitchell Horne last_evtp = NULL; 244da5b7e90SMitchell Horne last_msecs = 0; 245da5b7e90SMitchell Horne last_dmsecs = 0; 246da5b7e90SMitchell Horne last_printed = false; 247da5b7e90SMitchell Horne i = curr = btp->curr; 248da5b7e90SMitchell Horne 249da5b7e90SMitchell Horne do { 250da5b7e90SMitchell Horne evtp = &btp->table[i]; 251da5b7e90SMitchell Horne if (evtp->tsc == 0) 252da5b7e90SMitchell Horne goto next; 253da5b7e90SMitchell Horne 254da5b7e90SMitchell Horne msecs = cputick2usec(evtp->tick) / 1000; 255da5b7e90SMitchell Horne dmsecs = (last_msecs != 0 && msecs > last_msecs) ? 256da5b7e90SMitchell Horne msecs - last_msecs : 0; 257da5b7e90SMitchell Horne printed = false; 258da5b7e90SMitchell Horne 259da5b7e90SMitchell Horne /* 260da5b7e90SMitchell Horne * If a threshold is defined, start filtering events by the 261da5b7e90SMitchell Horne * delta of msecs. 262da5b7e90SMitchell Horne */ 263da5b7e90SMitchell Horne if (dthres != 0 && (dmsecs > dthres)) { 264da5b7e90SMitchell Horne /* 265da5b7e90SMitchell Horne * Print the previous entry as a landmark, even if it 266da5b7e90SMitchell Horne * falls below the threshold. 267da5b7e90SMitchell Horne */ 268da5b7e90SMitchell Horne if (last_evtp != NULL && !last_printed) 269da5b7e90SMitchell Horne BTC_DELTA_PRINT(last_evtp, last_msecs, 270da5b7e90SMitchell Horne last_dmsecs); 271da5b7e90SMitchell Horne BTC_DELTA_PRINT(evtp, msecs, dmsecs); 272da5b7e90SMitchell Horne printed = true; 273da5b7e90SMitchell Horne } else if (dthres == 0) { 274da5b7e90SMitchell Horne BTC_DELTA_PRINT(evtp, msecs, dmsecs); 275da5b7e90SMitchell Horne printed = true; 276da5b7e90SMitchell Horne } 277da5b7e90SMitchell Horne if (first_msecs == 0 || msecs < first_msecs) 278da5b7e90SMitchell Horne first_msecs = msecs; 279da5b7e90SMitchell Horne last_evtp = evtp; 280da5b7e90SMitchell Horne last_msecs = msecs; 281da5b7e90SMitchell Horne last_dmsecs = dmsecs; 282da5b7e90SMitchell Horne last_printed = printed; 283da5b7e90SMitchell Horne maybe_yield(); 284da5b7e90SMitchell Horne next: 285da5b7e90SMitchell Horne i = (i + 1) % btp->size; 286da5b7e90SMitchell Horne } while (i != curr); 287da5b7e90SMitchell Horne 288da5b7e90SMitchell Horne total_dmsecs = last_msecs > first_msecs ? 289da5b7e90SMitchell Horne (last_msecs - first_msecs) : 0; 290da5b7e90SMitchell Horne if (sbp != NULL) 291da5b7e90SMitchell Horne sbuf_printf(sbp, "Total measured time: %ju msecs\n", 292da5b7e90SMitchell Horne (uintmax_t)total_dmsecs); 293da5b7e90SMitchell Horne else 294da5b7e90SMitchell Horne printf("Total measured time: %ju msecs\n", 295da5b7e90SMitchell Horne (uintmax_t)total_dmsecs); 296da5b7e90SMitchell Horne } 297da5b7e90SMitchell Horne 298da5b7e90SMitchell Horne /* 299da5b7e90SMitchell Horne * Dump trace table entries to the console, given a delta threshold. 300da5b7e90SMitchell Horne */ 301da5b7e90SMitchell Horne void 302da5b7e90SMitchell Horne boottrace_dump_console(void) 303da5b7e90SMitchell Horne { 304da5b7e90SMitchell Horne if (!boottrace_enabled) 305da5b7e90SMitchell Horne return; 306da5b7e90SMitchell Horne 307da5b7e90SMitchell Horne if (shutdown || rebooting || KERNEL_PANICKED()) { 308da5b7e90SMitchell Horne boottrace_display(NULL, &st, shutdown_trace_threshold); 309da5b7e90SMitchell Horne } else { 310da5b7e90SMitchell Horne boottrace_display(NULL, &bt, 0); 311da5b7e90SMitchell Horne boottrace_display(NULL, &rt, 0); 312da5b7e90SMitchell Horne } 313da5b7e90SMitchell Horne } 314da5b7e90SMitchell Horne 315da5b7e90SMitchell Horne /* 316da5b7e90SMitchell Horne * Records a new tracing event to the specified table. 317da5b7e90SMitchell Horne * 318da5b7e90SMitchell Horne * We don't use a lock because we want this to be callable from interrupt 319da5b7e90SMitchell Horne * context. 320da5b7e90SMitchell Horne */ 321da5b7e90SMitchell Horne static int 322da5b7e90SMitchell Horne dotrace(struct bt_table *btp, const char *eventname, const char *tdname) 323da5b7e90SMitchell Horne { 324da5b7e90SMitchell Horne uint32_t idx, nxt; 325da5b7e90SMitchell Horne struct rusage usage; 326da5b7e90SMitchell Horne 327da5b7e90SMitchell Horne MPASS(boottrace_enabled); 328da5b7e90SMitchell Horne if (tdname == NULL) 329da5b7e90SMitchell Horne tdname = (curproc->p_flag & P_SYSTEM) ? 330da5b7e90SMitchell Horne curthread->td_name : curproc->p_comm; 331da5b7e90SMitchell Horne 332da5b7e90SMitchell Horne dprintf("dotrace["); 333da5b7e90SMitchell Horne dprintf("cpu=%u, pid=%d, tsc=%ju, tick=%d, td='%s', event='%s'", 334da5b7e90SMitchell Horne PCPU_GET(cpuid), curthread->td_proc->p_pid, 335da5b7e90SMitchell Horne (uintmax_t)get_cyclecount(), ticks, tdname, eventname); 336da5b7e90SMitchell Horne if (btp->table == NULL) { 337da5b7e90SMitchell Horne btp->drops_early++; 338da5b7e90SMitchell Horne dprintf(", return=ENOSPC_1]\n"); 339da5b7e90SMitchell Horne return (ENOSPC); 340da5b7e90SMitchell Horne } 341da5b7e90SMitchell Horne 342da5b7e90SMitchell Horne /* Claim a slot in the table. */ 343da5b7e90SMitchell Horne do { 344da5b7e90SMitchell Horne idx = btp->curr; 345da5b7e90SMitchell Horne nxt = (idx + 1) % btp->size; 346da5b7e90SMitchell Horne if (nxt == 0 && btp->wrap == 0) { 347da5b7e90SMitchell Horne btp->drops_full++; 348da5b7e90SMitchell Horne dprintf(", return=ENOSPC_2]\n"); 349da5b7e90SMitchell Horne return (ENOSPC); 350da5b7e90SMitchell Horne } 351da5b7e90SMitchell Horne } while (!atomic_cmpset_int(&btp->curr, idx, nxt)); 352da5b7e90SMitchell Horne 353da5b7e90SMitchell Horne btp->table[idx].cpuid = PCPU_GET(cpuid); 354da5b7e90SMitchell Horne btp->table[idx].tsc = get_cyclecount(), 355da5b7e90SMitchell Horne btp->table[idx].tick = cpu_ticks(); 356da5b7e90SMitchell Horne btp->table[idx].pid = curthread->td_proc->p_pid; 357da5b7e90SMitchell Horne 358da5b7e90SMitchell Horne /* 359da5b7e90SMitchell Horne * Get the resource usage for the process. Don't attempt this for the 360da5b7e90SMitchell Horne * kernel proc0 or for critical section activities. 361da5b7e90SMitchell Horne */ 362da5b7e90SMitchell Horne if ((curthread->td_proc == &proc0) || (curthread->td_critnest != 0)) { 363da5b7e90SMitchell Horne btp->table[idx].cputime = 0; 364da5b7e90SMitchell Horne btp->table[idx].inblock = 0; 365da5b7e90SMitchell Horne btp->table[idx].oublock = 0; 366da5b7e90SMitchell Horne } else { 367da5b7e90SMitchell Horne kern_getrusage(curthread, RUSAGE_CHILDREN, &usage); 368da5b7e90SMitchell Horne btp->table[idx].cputime = 369da5b7e90SMitchell Horne (uint64_t)(usage.ru_utime.tv_sec * 1000000 + 370da5b7e90SMitchell Horne usage.ru_utime.tv_usec + usage.ru_stime.tv_sec * 1000000 + 371da5b7e90SMitchell Horne usage.ru_stime.tv_usec); 372da5b7e90SMitchell Horne btp->table[idx].inblock = (uint32_t)usage.ru_inblock; 373da5b7e90SMitchell Horne btp->table[idx].oublock = (uint32_t)usage.ru_oublock; 374da5b7e90SMitchell Horne } 375da5b7e90SMitchell Horne strlcpy(btp->table[idx].name, eventname, BT_EVENT_NAMELEN); 376da5b7e90SMitchell Horne strlcpy(btp->table[idx].tdname, tdname, BT_EVENT_TDNAMELEN); 377da5b7e90SMitchell Horne 378da5b7e90SMitchell Horne dprintf(", return=0]\n"); 379da5b7e90SMitchell Horne return (0); 380da5b7e90SMitchell Horne } 381da5b7e90SMitchell Horne 382da5b7e90SMitchell Horne /* 383da5b7e90SMitchell Horne * Log various boot-time events, with the trace message encoded using 384da5b7e90SMitchell Horne * printf-like arguments. 385da5b7e90SMitchell Horne */ 386da5b7e90SMitchell Horne int 387da5b7e90SMitchell Horne boottrace(const char *tdname, const char *fmt, ...) 388da5b7e90SMitchell Horne { 389da5b7e90SMitchell Horne char eventname[BT_EVENT_NAMELEN]; 390da5b7e90SMitchell Horne struct bt_table *btp; 391da5b7e90SMitchell Horne va_list ap; 392da5b7e90SMitchell Horne 393da5b7e90SMitchell Horne if (!dotrace_kernel) 394da5b7e90SMitchell Horne return (ENXIO); 395da5b7e90SMitchell Horne 396da5b7e90SMitchell Horne va_start(ap, fmt); 397da5b7e90SMitchell Horne vsnprintf(eventname, sizeof(eventname), fmt, ap); 398da5b7e90SMitchell Horne va_end(ap); 399da5b7e90SMitchell Horne 400da5b7e90SMitchell Horne btp = &bt; 401da5b7e90SMitchell Horne if (bootdone) 402da5b7e90SMitchell Horne btp = &rt; 403da5b7e90SMitchell Horne if (shutdown || rebooting || KERNEL_PANICKED()) 404da5b7e90SMitchell Horne btp = &st; 405da5b7e90SMitchell Horne 406da5b7e90SMitchell Horne return (dotrace(btp, eventname, tdname)); 407da5b7e90SMitchell Horne } 408da5b7e90SMitchell Horne 409da5b7e90SMitchell Horne /* 410da5b7e90SMitchell Horne * Log a run-time event & switch over to run-time tracing mode. 411da5b7e90SMitchell Horne */ 412da5b7e90SMitchell Horne static int 413da5b7e90SMitchell Horne runtrace(const char *eventname, const char *tdname) 414da5b7e90SMitchell Horne { 415da5b7e90SMitchell Horne 416da5b7e90SMitchell Horne bootdone = true; 417da5b7e90SMitchell Horne return (dotrace(&rt, eventname, tdname)); 418da5b7e90SMitchell Horne } 419da5b7e90SMitchell Horne 420da5b7e90SMitchell Horne /* 421da5b7e90SMitchell Horne * Parse a boottrace message from userspace. 422da5b7e90SMitchell Horne * 423da5b7e90SMitchell Horne * The input from may contain a ':' to denote tdname. If not, tdname is 424da5b7e90SMitchell Horne * inferred from the process' name. 425da5b7e90SMitchell Horne * 426da5b7e90SMitchell Horne * e.g. reboot(8):SIGINT to init(8) 427da5b7e90SMitchell Horne */ 428da5b7e90SMitchell Horne static void 429da5b7e90SMitchell Horne boottrace_parse_message(char *message, char **eventname, char **tdname) 430da5b7e90SMitchell Horne { 431da5b7e90SMitchell Horne char *delim; 432da5b7e90SMitchell Horne 433da5b7e90SMitchell Horne delim = strchr(message, ':'); 434da5b7e90SMitchell Horne if (delim != NULL) { 435da5b7e90SMitchell Horne *delim = '\0'; 436da5b7e90SMitchell Horne *tdname = message; 437da5b7e90SMitchell Horne *eventname = ++delim; 438da5b7e90SMitchell Horne } else { 439da5b7e90SMitchell Horne *tdname = curproc->p_comm; 440da5b7e90SMitchell Horne *eventname = message; 441da5b7e90SMitchell Horne } 442da5b7e90SMitchell Horne } 443da5b7e90SMitchell Horne 444da5b7e90SMitchell Horne static int 445da5b7e90SMitchell Horne _boottrace_sysctl(struct bt_table *btp, struct sysctl_oid *oidp, 446da5b7e90SMitchell Horne struct sysctl_req *req) 447da5b7e90SMitchell Horne { 448da5b7e90SMitchell Horne char message[BT_MSGLEN]; 449da5b7e90SMitchell Horne char *eventname, *tdname; 450da5b7e90SMitchell Horne int error; 451da5b7e90SMitchell Horne 452da5b7e90SMitchell Horne if (!dotrace_user) 453da5b7e90SMitchell Horne return (ENXIO); 454da5b7e90SMitchell Horne 455da5b7e90SMitchell Horne message[0] = '\0'; 456da5b7e90SMitchell Horne error = sysctl_handle_string(oidp, message, sizeof(message), req); 457da5b7e90SMitchell Horne if (error) 458da5b7e90SMitchell Horne return (error); 459da5b7e90SMitchell Horne 460da5b7e90SMitchell Horne boottrace_parse_message(message, &eventname, &tdname); 461da5b7e90SMitchell Horne error = dotrace(btp, eventname, tdname); 462da5b7e90SMitchell Horne if (error == ENOSPC) { 463da5b7e90SMitchell Horne /* Ignore table full error. */ 464da5b7e90SMitchell Horne error = 0; 465da5b7e90SMitchell Horne } 466da5b7e90SMitchell Horne return (error); 467da5b7e90SMitchell Horne } 468da5b7e90SMitchell Horne 469da5b7e90SMitchell Horne static int 470da5b7e90SMitchell Horne sysctl_log(SYSCTL_HANDLER_ARGS) 471da5b7e90SMitchell Horne { 472da5b7e90SMitchell Horne struct sbuf *sbuf; 473da5b7e90SMitchell Horne int error; 474da5b7e90SMitchell Horne 475da5b7e90SMitchell Horne if (!boottrace_enabled || req->newptr != NULL) 476da5b7e90SMitchell Horne return (0); 477da5b7e90SMitchell Horne 478da5b7e90SMitchell Horne sbuf = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND); 479da5b7e90SMitchell Horne boottrace_display(sbuf, &bt, 0); 480da5b7e90SMitchell Horne boottrace_display(sbuf, &rt, 0); 481da5b7e90SMitchell Horne sbuf_finish(sbuf); 482da5b7e90SMitchell Horne error = SYSCTL_OUT(req, sbuf_data(sbuf), sbuf_len(sbuf)); 483da5b7e90SMitchell Horne sbuf_delete(sbuf); 484da5b7e90SMitchell Horne 485da5b7e90SMitchell Horne return (error); 486da5b7e90SMitchell Horne } 487da5b7e90SMitchell Horne 488da5b7e90SMitchell Horne static int 489da5b7e90SMitchell Horne sysctl_boottrace(SYSCTL_HANDLER_ARGS) 490da5b7e90SMitchell Horne { 491da5b7e90SMitchell Horne 492da5b7e90SMitchell Horne if (!boottrace_enabled) 493da5b7e90SMitchell Horne return (0); 494da5b7e90SMitchell Horne 495da5b7e90SMitchell Horne /* No output */ 496da5b7e90SMitchell Horne if (req->newptr == NULL) 497da5b7e90SMitchell Horne return (0); 498da5b7e90SMitchell Horne 499da5b7e90SMitchell Horne /* Trace to rt if we have reached multi-user. */ 500da5b7e90SMitchell Horne return (_boottrace_sysctl(bootdone ? &rt : &bt, oidp, req)); 501da5b7e90SMitchell Horne } 502da5b7e90SMitchell Horne 503da5b7e90SMitchell Horne /* 504da5b7e90SMitchell Horne * Log a run-time event and switch over to run-time tracing mode. 505da5b7e90SMitchell Horne */ 506da5b7e90SMitchell Horne static int 507da5b7e90SMitchell Horne sysctl_runtrace(SYSCTL_HANDLER_ARGS) 508da5b7e90SMitchell Horne { 509da5b7e90SMitchell Horne 510da5b7e90SMitchell Horne if (!boottrace_enabled) 511da5b7e90SMitchell Horne return (0); 512da5b7e90SMitchell Horne 513da5b7e90SMitchell Horne /* No output */ 514da5b7e90SMitchell Horne if (req->newptr == NULL) 515da5b7e90SMitchell Horne return (0); 516da5b7e90SMitchell Horne 517da5b7e90SMitchell Horne bootdone = true; 518da5b7e90SMitchell Horne return (_boottrace_sysctl(&rt, oidp, req)); 519da5b7e90SMitchell Horne } 520da5b7e90SMitchell Horne 521da5b7e90SMitchell Horne /* 522da5b7e90SMitchell Horne * Log a shutdown-time event and switch over to shutdown tracing mode. 523da5b7e90SMitchell Horne */ 524da5b7e90SMitchell Horne static int 525da5b7e90SMitchell Horne sysctl_shuttrace(SYSCTL_HANDLER_ARGS) 526da5b7e90SMitchell Horne { 527da5b7e90SMitchell Horne 528da5b7e90SMitchell Horne if (!boottrace_enabled) 529da5b7e90SMitchell Horne return (0); 530da5b7e90SMitchell Horne 531da5b7e90SMitchell Horne /* No output */ 532da5b7e90SMitchell Horne if (req->newptr == NULL) 533da5b7e90SMitchell Horne return (0); 534da5b7e90SMitchell Horne 535da5b7e90SMitchell Horne shutdown = true; 536da5b7e90SMitchell Horne return (_boottrace_sysctl(&st, oidp, req)); 537da5b7e90SMitchell Horne } 538da5b7e90SMitchell Horne 539da5b7e90SMitchell Horne /* 540da5b7e90SMitchell Horne * Reset the runtime tracing buffer. 541da5b7e90SMitchell Horne */ 542da5b7e90SMitchell Horne void 543da5b7e90SMitchell Horne boottrace_reset(const char *actor) 544da5b7e90SMitchell Horne { 545da5b7e90SMitchell Horne char tmpbuf[64]; 546da5b7e90SMitchell Horne 547da5b7e90SMitchell Horne snprintf(tmpbuf, sizeof(tmpbuf), "reset: %s", actor); 548da5b7e90SMitchell Horne runtrace(tmpbuf, NULL); 549da5b7e90SMitchell Horne } 550da5b7e90SMitchell Horne 551da5b7e90SMitchell Horne /* 552da5b7e90SMitchell Horne * Note that a resize implies a reset, i.e., the index is reset to 0. 553da5b7e90SMitchell Horne * We never shrink the array; we can only increase its size. 554da5b7e90SMitchell Horne */ 555da5b7e90SMitchell Horne int 556da5b7e90SMitchell Horne boottrace_resize(u_int newsize) 557da5b7e90SMitchell Horne { 558da5b7e90SMitchell Horne 559da5b7e90SMitchell Horne if (newsize <= rt.size) { 560da5b7e90SMitchell Horne return (EINVAL); 561da5b7e90SMitchell Horne } 562da5b7e90SMitchell Horne rt.table = realloc(rt.table, newsize * sizeof(struct bt_event), 563da5b7e90SMitchell Horne M_BOOTTRACE, M_WAITOK | M_ZERO); 564da5b7e90SMitchell Horne rt.size = newsize; 565da5b7e90SMitchell Horne boottrace_reset("boottrace_resize"); 566da5b7e90SMitchell Horne return (0); 567da5b7e90SMitchell Horne } 568da5b7e90SMitchell Horne 569da5b7e90SMitchell Horne static int 570da5b7e90SMitchell Horne sysctl_boottrace_reset(SYSCTL_HANDLER_ARGS) 571da5b7e90SMitchell Horne { 572da5b7e90SMitchell Horne 573da5b7e90SMitchell Horne if (!boottrace_enabled) 574da5b7e90SMitchell Horne return (0); 575da5b7e90SMitchell Horne 576da5b7e90SMitchell Horne if (req->newptr != NULL) 577da5b7e90SMitchell Horne boottrace_reset("sysctl_boottrace_reset"); 578da5b7e90SMitchell Horne 579da5b7e90SMitchell Horne return (0); 580da5b7e90SMitchell Horne } 581da5b7e90SMitchell Horne 582da5b7e90SMitchell Horne static void 583da5b7e90SMitchell Horne boottrace_init(void) 584da5b7e90SMitchell Horne { 585da5b7e90SMitchell Horne 586da5b7e90SMitchell Horne if (!boottrace_enabled) 587da5b7e90SMitchell Horne return; 588da5b7e90SMitchell Horne 589da5b7e90SMitchell Horne /* Boottime trace table */ 590da5b7e90SMitchell Horne bt.size = BT_TABLE_DEFSIZE; 591da5b7e90SMitchell Horne TUNABLE_INT_FETCH("kern.boottrace.table_size", &bt.size); 592da5b7e90SMitchell Horne bt.size = max(bt.size, BT_TABLE_MINSIZE); 593da5b7e90SMitchell Horne bt.table = malloc(bt.size * sizeof(struct bt_event), M_BOOTTRACE, 594da5b7e90SMitchell Horne M_WAITOK | M_ZERO); 595da5b7e90SMitchell Horne 596da5b7e90SMitchell Horne /* Stick in an initial entry. */ 597da5b7e90SMitchell Horne bt.table[0].cpuid = PCPU_GET(cpuid); 598da5b7e90SMitchell Horne strlcpy(bt.table[0].tdname, "boottime", BT_EVENT_TDNAMELEN); 599da5b7e90SMitchell Horne strlcpy(bt.table[0].name, "initial event", BT_EVENT_NAMELEN); 600da5b7e90SMitchell Horne bt.curr = 1; 601da5b7e90SMitchell Horne 602da5b7e90SMitchell Horne /* Run-time trace table (may wrap-around). */ 603da5b7e90SMitchell Horne rt.wrap = 1; 604da5b7e90SMitchell Horne rt.size = BT_TABLE_RUNSIZE; 605da5b7e90SMitchell Horne rt.table = malloc(rt.size * sizeof(struct bt_event), M_BOOTTRACE, 606da5b7e90SMitchell Horne M_WAITOK | M_ZERO); 607da5b7e90SMitchell Horne 608da5b7e90SMitchell Horne /* Shutdown trace table */ 609da5b7e90SMitchell Horne st.size = BT_TABLE_SHTSIZE; 610da5b7e90SMitchell Horne st.table = malloc(st.size * sizeof(struct bt_event), M_BOOTTRACE, 611da5b7e90SMitchell Horne M_WAITOK | M_ZERO); 612da5b7e90SMitchell Horne } 613*d9f1f0a9SZhenlei Huang SYSINIT(boottrace, SI_SUB_CPU, SI_ORDER_ANY, boottrace_init, NULL); 614