1*6670Stariq /* 2*6670Stariq * CDDL HEADER START 3*6670Stariq * 4*6670Stariq * The contents of this file are subject to the terms of the 5*6670Stariq * Common Development and Distribution License (the "License"). 6*6670Stariq * You may not use this file except in compliance with the License. 7*6670Stariq * 8*6670Stariq * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*6670Stariq * or http://www.opensolaris.org/os/licensing. 10*6670Stariq * See the License for the specific language governing permissions 11*6670Stariq * and limitations under the License. 12*6670Stariq * 13*6670Stariq * When distributing Covered Code, include this CDDL HEADER in each 14*6670Stariq * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*6670Stariq * If applicable, add the following below this CDDL HEADER, with the 16*6670Stariq * fields enclosed by brackets "[]" replaced with your own identifying 17*6670Stariq * information: Portions Copyright [yyyy] [name of copyright owner] 18*6670Stariq * 19*6670Stariq * CDDL HEADER END 20*6670Stariq */ 21*6670Stariq 22*6670Stariq /* 23*6670Stariq * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24*6670Stariq * Use is subject to license terms. 25*6670Stariq */ 26*6670Stariq 27*6670Stariq #pragma ident "%Z%%M% %I% %E% SMI" 28*6670Stariq 29*6670Stariq /* 30*6670Stariq * Xen event provider for DTrace 31*6670Stariq * 32*6670Stariq * NOTE: This provider is PRIVATE. It is intended as a short-term solution and 33*6670Stariq * may disappear or be re-implemented at anytime. 34*6670Stariq * 35*6670Stariq * This provider isn't suitable as a general-purpose solution for a number of 36*6670Stariq * reasons. First and foremost, we rely on the Xen tracing mechanism and don't 37*6670Stariq * have any way to gather data other than that collected by the Xen trace 38*6670Stariq * buffers. Further, it does not fit into the DTrace model (see "Interacting 39*6670Stariq * with DTrace" below.) 40*6670Stariq * 41*6670Stariq * 42*6670Stariq * Tracing in Xen 43*6670Stariq * -------------- 44*6670Stariq * 45*6670Stariq * Xen implements a tracing facility for generating and collecting execution 46*6670Stariq * event traces from the hypervisor. When tracing is enabled, compiled in 47*6670Stariq * probes record events in contiguous per-CPU trace buffers. 48*6670Stariq * 49*6670Stariq * +---------+ 50*6670Stariq * +------+ | | 51*6670Stariq * | CPUn |----> | BUFFERn | 52*6670Stariq * +------+ | | 53*6670Stariq * +---------+- tbuf.va + (tbuf.size * n) 54*6670Stariq * : : 55*6670Stariq * +---------+ 56*6670Stariq * +------+ | | 57*6670Stariq * | CPU1 |----> | BUFFER1 | 58*6670Stariq * +------+ | | 59*6670Stariq * +---------+- tbuf.va + tbuf.size 60*6670Stariq * +------+ | | 61*6670Stariq * | CPU0 |----> | BUFFER0 | 62*6670Stariq * +------+ | | 63*6670Stariq * +---------+- tbuf.va 64*6670Stariq * 65*6670Stariq * Each CPU buffer consists of a metadata header followed by the trace records. 66*6670Stariq * The metadata consists of a producer/consumer pair of pointers into the buffer 67*6670Stariq * that point to the next record to be written and the next record to be read 68*6670Stariq * respectively. The trace record format is as follows: 69*6670Stariq * 70*6670Stariq * +--------------------------------------------------------------------------+ 71*6670Stariq * | CPUID(uint_t) | TSC(uint64_t) | EVENTID(uint32_t) | DATA FIELDS | 72*6670Stariq * +--------------------------------------------------------------------------+ 73*6670Stariq * 74*6670Stariq * DATA FIELDS: 75*6670Stariq * +--------------------------------------------------------------------------+ 76*6670Stariq * | D1(uint32_t) | D2(uint32_t) | D3(uint32_t) | D4(uint32_t) | D5(uint32_t) | 77*6670Stariq * +--------------------------------------------------------------------------+ 78*6670Stariq * 79*6670Stariq * 80*6670Stariq * Interacting with DTrace 81*6670Stariq * ----------------------- 82*6670Stariq * 83*6670Stariq * Every xdt_poll_nsec nano-seconds we poll the trace buffers for data and feed 84*6670Stariq * each entry into dtrace_probe() with the corresponding probe ID for the event. 85*6670Stariq * As a result of this periodic collection implementation probe firings are 86*6670Stariq * asynchronous. This is the only sensible way to implement this form of 87*6670Stariq * provider, but because of its asynchronous nature asking things like 88*6670Stariq * "current CPU" and, more importantly, arbitrary questions about the context 89*6670Stariq * surrounding the probe firing are not meaningful. So, consumers should not 90*6670Stariq * attempt to infer anything beyond what is supplied via the probe arguments. 91*6670Stariq */ 92*6670Stariq 93*6670Stariq #include <sys/types.h> 94*6670Stariq #include <sys/sysmacros.h> 95*6670Stariq #include <sys/modctl.h> 96*6670Stariq #include <sys/sunddi.h> 97*6670Stariq #include <sys/ddi.h> 98*6670Stariq #include <sys/conf.h> 99*6670Stariq #include <sys/devops.h> 100*6670Stariq #include <sys/stat.h> 101*6670Stariq #include <sys/cmn_err.h> 102*6670Stariq #include <sys/dtrace.h> 103*6670Stariq #include <sys/sdt.h> 104*6670Stariq #include <sys/cyclic.h> 105*6670Stariq #include <vm/seg_kmem.h> 106*6670Stariq #include <vm/hat_i86.h> 107*6670Stariq #include <sys/hypervisor.h> 108*6670Stariq #include <xen/public/trace.h> 109*6670Stariq #include <xen/public/sched.h> 110*6670Stariq 111*6670Stariq #define XDT_POLL_DEFAULT 100000000 /* default poll interval (ns) */ 112*6670Stariq #define XDT_POLL_MIN 10000000 /* min poll interval (ns) */ 113*6670Stariq #define XDT_TBUF_RETRY 50 /* tbuf disable retry count */ 114*6670Stariq 115*6670Stariq /* 116*6670Stariq * The domid must match IDLE_DOMAIN_ID in xen.hg/xen/include/xen/sched.h 117*6670Stariq * in the xVM gate. 118*6670Stariq */ 119*6670Stariq #define IS_IDLE_DOM(domid) (domid == 0x7FFFU) 120*6670Stariq 121*6670Stariq /* Macros to extract the domid and cpuid from a HVM trace data field */ 122*6670Stariq #define HVM_DOMID(d) (d >> 16) 123*6670Stariq #define HVM_VCPUID(d) (d & 0xFFFF) 124*6670Stariq 125*6670Stariq #define XDT_PROBE4(event, cpuid, arg0, arg1, arg2, arg3) { \ 126*6670Stariq dtrace_id_t id = xdt_probemap[event]; \ 127*6670Stariq if (id) \ 128*6670Stariq dtrace_probe(id, cpuid, arg0, arg1, arg2, arg3); \ 129*6670Stariq } \ 130*6670Stariq 131*6670Stariq #define XDT_PROBE3(event, cpuid, arg0, arg1, arg2) \ 132*6670Stariq XDT_PROBE4(event, cpuid, arg0, arg1, arg2, 0) 133*6670Stariq 134*6670Stariq #define XDT_PROBE2(event, cpuid, arg0, arg1) \ 135*6670Stariq XDT_PROBE4(event, cpuid, arg0, arg1, 0, 0) 136*6670Stariq 137*6670Stariq #define XDT_PROBE1(event, cpuid, arg0) \ 138*6670Stariq XDT_PROBE4(event, cpuid, arg0, 0, 0, 0) 139*6670Stariq 140*6670Stariq #define XDT_PROBE0(event, cpuid) \ 141*6670Stariq XDT_PROBE4(event, cpuid, 0, 0, 0, 0) 142*6670Stariq 143*6670Stariq /* Probe classes */ 144*6670Stariq #define XDT_SCHED 0 145*6670Stariq #define XDT_MEM 1 146*6670Stariq #define XDT_HVM 2 147*6670Stariq #define XDT_NCLASSES 3 148*6670Stariq 149*6670Stariq /* Probe events */ 150*6670Stariq #define XDT_EVT_INVALID (-(int)1) 151*6670Stariq #define XDT_SCHED_OFF_CPU 0 152*6670Stariq #define XDT_SCHED_ON_CPU 1 153*6670Stariq #define XDT_SCHED_IDLE_OFF_CPU 2 154*6670Stariq #define XDT_SCHED_IDLE_ON_CPU 3 155*6670Stariq #define XDT_SCHED_BLOCK 4 156*6670Stariq #define XDT_SCHED_SLEEP 5 157*6670Stariq #define XDT_SCHED_WAKE 6 158*6670Stariq #define XDT_SCHED_YIELD 7 159*6670Stariq #define XDT_SCHED_SHUTDOWN_POWEROFF 8 160*6670Stariq #define XDT_SCHED_SHUTDOWN_REBOOT 9 161*6670Stariq #define XDT_SCHED_SHUTDOWN_SUSPEND 10 162*6670Stariq #define XDT_SCHED_SHUTDOWN_CRASH 11 163*6670Stariq #define XDT_MEM_PAGE_GRANT_MAP 12 164*6670Stariq #define XDT_MEM_PAGE_GRANT_UNMAP 13 165*6670Stariq #define XDT_MEM_PAGE_GRANT_TRANSFER 14 166*6670Stariq #define XDT_HVM_VMENTRY 15 167*6670Stariq #define XDT_HVM_VMEXIT 16 168*6670Stariq #define XDT_NEVENTS 17 169*6670Stariq 170*6670Stariq typedef struct { 171*6670Stariq const char *pr_mod; /* probe module */ 172*6670Stariq const char *pr_name; /* probe name */ 173*6670Stariq int evt_id; /* event id */ 174*6670Stariq uint_t class; /* probe class */ 175*6670Stariq } xdt_probe_t; 176*6670Stariq 177*6670Stariq typedef struct { 178*6670Stariq uint32_t trc_mask; /* trace mask */ 179*6670Stariq uint32_t cnt; /* num enabled probes in class */ 180*6670Stariq } xdt_classinfo_t; 181*6670Stariq 182*6670Stariq typedef struct { 183*6670Stariq ulong_t prev_domid; /* previous dom executed */ 184*6670Stariq ulong_t prev_vcpuid; /* previous vcpu executed */ 185*6670Stariq ulong_t prev_ctime; /* time spent on cpu */ 186*6670Stariq ulong_t next_domid; /* next dom to be scheduled */ 187*6670Stariq ulong_t next_vcpuid; /* next vcpu to be scheduled */ 188*6670Stariq ulong_t next_wtime; /* time spent waiting to get on cpu */ 189*6670Stariq ulong_t next_ts; /* allocated time slice */ 190*6670Stariq } xdt_schedinfo_t; 191*6670Stariq 192*6670Stariq static struct { 193*6670Stariq uint_t cnt; /* total num of trace buffers */ 194*6670Stariq size_t size; /* size of each cpu buffer */ 195*6670Stariq mfn_t start_mfn; /* starting mfn of buffers */ 196*6670Stariq caddr_t va; /* va buffers are mapped into */ 197*6670Stariq 198*6670Stariq /* per-cpu buffers */ 199*6670Stariq struct t_buf **meta; /* buffer metadata */ 200*6670Stariq struct t_rec **data; /* buffer data records */ 201*6670Stariq 202*6670Stariq /* statistics */ 203*6670Stariq uint64_t stat_dropped_recs; /* records dropped */ 204*6670Stariq uint64_t stat_spurious_cpu; /* recs with garbage cpuids */ 205*6670Stariq uint64_t stat_spurious_switch; /* inconsistent vcpu switches */ 206*6670Stariq uint64_t stat_unknown_shutdown; /* unknown shutdown code */ 207*6670Stariq uint64_t stat_unknown_recs; /* unknown records */ 208*6670Stariq } tbuf; 209*6670Stariq 210*6670Stariq static char *xdt_stats[] = { 211*6670Stariq "dropped_recs", 212*6670Stariq }; 213*6670Stariq 214*6670Stariq /* 215*6670Stariq * Tunable variables 216*6670Stariq * 217*6670Stariq * The following may be tuned by adding a line to /etc/system that 218*6670Stariq * includes both the name of the module ("xdt") and the name of the variable. 219*6670Stariq * For example: 220*6670Stariq * set xdt:xdt_tbuf_pages = 40 221*6670Stariq */ 222*6670Stariq uint_t xdt_tbuf_pages = 20; /* pages to alloc per-cpu buf */ 223*6670Stariq 224*6670Stariq /* 225*6670Stariq * The following may be tuned by adding a line to 226*6670Stariq * /platform/i86xpv/kernel/drv/xdt.conf. 227*6670Stariq * For example: 228*6670Stariq * xdt_poll_nsec = 200000000; 229*6670Stariq */ 230*6670Stariq static hrtime_t xdt_poll_nsec; /* trace buffer poll interval */ 231*6670Stariq 232*6670Stariq /* 233*6670Stariq * Internal variables 234*6670Stariq */ 235*6670Stariq static dev_info_t *xdt_devi; 236*6670Stariq static dtrace_provider_id_t xdt_id; 237*6670Stariq static uint_t xdt_ncpus; /* total number of phys CPUs */ 238*6670Stariq static uint32_t cur_trace_mask; /* current trace mask */ 239*6670Stariq static xdt_schedinfo_t *xdt_cpu_schedinfo; /* per-cpu sched info */ 240*6670Stariq dtrace_id_t xdt_probemap[XDT_NEVENTS]; /* map of enabled probes */ 241*6670Stariq dtrace_id_t xdt_prid[XDT_NEVENTS]; /* IDs of registered events */ 242*6670Stariq static cyclic_id_t xdt_cyclic = CYCLIC_NONE; 243*6670Stariq static kstat_t *xdt_kstats; 244*6670Stariq static xdt_classinfo_t xdt_classinfo[XDT_NCLASSES]; 245*6670Stariq 246*6670Stariq static xdt_probe_t xdt_probe[] = { 247*6670Stariq /* Sched probes */ 248*6670Stariq { "sched", "off-cpu", XDT_SCHED_OFF_CPU, XDT_SCHED }, 249*6670Stariq { "sched", "on-cpu", XDT_SCHED_ON_CPU, XDT_SCHED }, 250*6670Stariq { "sched", "idle-off-cpu", XDT_SCHED_IDLE_OFF_CPU, XDT_SCHED }, 251*6670Stariq { "sched", "idle-on-cpu", XDT_SCHED_IDLE_ON_CPU, XDT_SCHED }, 252*6670Stariq { "sched", "block", XDT_SCHED_BLOCK, XDT_SCHED }, 253*6670Stariq { "sched", "sleep", XDT_SCHED_SLEEP, XDT_SCHED }, 254*6670Stariq { "sched", "wake", XDT_SCHED_WAKE, XDT_SCHED }, 255*6670Stariq { "sched", "yield", XDT_SCHED_YIELD, XDT_SCHED }, 256*6670Stariq { "sched", "shutdown-poweroff", XDT_SCHED_SHUTDOWN_POWEROFF, 257*6670Stariq XDT_SCHED }, 258*6670Stariq { "sched", "shutdown-reboot", XDT_SCHED_SHUTDOWN_REBOOT, XDT_SCHED }, 259*6670Stariq { "sched", "shutdown-suspend", XDT_SCHED_SHUTDOWN_SUSPEND, XDT_SCHED }, 260*6670Stariq { "sched", "shutdown-crash", XDT_SCHED_SHUTDOWN_CRASH, XDT_SCHED }, 261*6670Stariq 262*6670Stariq /* Memory probes */ 263*6670Stariq { "mem", "page-grant-map", XDT_MEM_PAGE_GRANT_MAP, XDT_MEM }, 264*6670Stariq { "mem", "page-grant-unmap", XDT_MEM_PAGE_GRANT_UNMAP, XDT_MEM }, 265*6670Stariq { "mem", "page-grant-transfer", XDT_MEM_PAGE_GRANT_TRANSFER, XDT_MEM }, 266*6670Stariq 267*6670Stariq /* HVM probes */ 268*6670Stariq { "hvm", "vmentry", XDT_HVM_VMENTRY, XDT_HVM }, 269*6670Stariq { "hvm", "vmexit", XDT_HVM_VMEXIT, XDT_HVM }, 270*6670Stariq 271*6670Stariq { NULL } 272*6670Stariq }; 273*6670Stariq 274*6670Stariq extern uint_t xen_get_nphyscpus(void); 275*6670Stariq 276*6670Stariq static inline uint32_t 277*6670Stariq xdt_nr_active_probes() 278*6670Stariq { 279*6670Stariq int i; 280*6670Stariq uint32_t tot = 0; 281*6670Stariq 282*6670Stariq for (i = 0; i < XDT_NCLASSES; i++) 283*6670Stariq tot += xdt_classinfo[i].cnt; 284*6670Stariq 285*6670Stariq return (tot); 286*6670Stariq } 287*6670Stariq 288*6670Stariq static void 289*6670Stariq xdt_init_trace_masks(void) 290*6670Stariq { 291*6670Stariq xdt_classinfo[XDT_SCHED].trc_mask = TRC_SCHED; 292*6670Stariq xdt_classinfo[XDT_MEM].trc_mask = TRC_MEM; 293*6670Stariq xdt_classinfo[XDT_HVM].trc_mask = TRC_HVM; 294*6670Stariq } 295*6670Stariq 296*6670Stariq static int 297*6670Stariq xdt_kstat_update(kstat_t *ksp, int flag) 298*6670Stariq { 299*6670Stariq kstat_named_t *knp; 300*6670Stariq 301*6670Stariq if (flag != KSTAT_READ) 302*6670Stariq return (EACCES); 303*6670Stariq 304*6670Stariq knp = ksp->ks_data; 305*6670Stariq 306*6670Stariq /* 307*6670Stariq * Assignment order should match that of the names in 308*6670Stariq * xdt_stats. 309*6670Stariq */ 310*6670Stariq (knp++)->value.ui64 = tbuf.stat_dropped_recs; 311*6670Stariq 312*6670Stariq return (0); 313*6670Stariq } 314*6670Stariq 315*6670Stariq static void 316*6670Stariq xdt_kstat_init(void) 317*6670Stariq { 318*6670Stariq int nstats = sizeof (xdt_stats) / sizeof (xdt_stats[0]); 319*6670Stariq char **cp = xdt_stats; 320*6670Stariq kstat_named_t *knp; 321*6670Stariq 322*6670Stariq if ((xdt_kstats = kstat_create("xdt", 0, "trace_statistics", "misc", 323*6670Stariq KSTAT_TYPE_NAMED, nstats, 0)) == NULL) 324*6670Stariq return; 325*6670Stariq 326*6670Stariq xdt_kstats->ks_update = xdt_kstat_update; 327*6670Stariq 328*6670Stariq knp = xdt_kstats->ks_data; 329*6670Stariq while (nstats > 0) { 330*6670Stariq kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 331*6670Stariq knp++; 332*6670Stariq cp++; 333*6670Stariq nstats--; 334*6670Stariq } 335*6670Stariq 336*6670Stariq kstat_install(xdt_kstats); 337*6670Stariq } 338*6670Stariq 339*6670Stariq static int 340*6670Stariq xdt_sysctl_tbuf(xen_sysctl_tbuf_op_t *tbuf_op) 341*6670Stariq { 342*6670Stariq xen_sysctl_t op; 343*6670Stariq int xerr; 344*6670Stariq 345*6670Stariq op.cmd = XEN_SYSCTL_tbuf_op; 346*6670Stariq op.interface_version = XEN_SYSCTL_INTERFACE_VERSION; 347*6670Stariq op.u.tbuf_op = *tbuf_op; 348*6670Stariq 349*6670Stariq if ((xerr = HYPERVISOR_sysctl(&op)) != 0) 350*6670Stariq return (xen_xlate_errcode(xerr)); 351*6670Stariq 352*6670Stariq *tbuf_op = op.u.tbuf_op; 353*6670Stariq return (0); 354*6670Stariq } 355*6670Stariq 356*6670Stariq static int 357*6670Stariq xdt_map_trace_buffers(mfn_t mfn, caddr_t va, size_t len) 358*6670Stariq { 359*6670Stariq x86pte_t pte; 360*6670Stariq caddr_t const sva = va; 361*6670Stariq caddr_t const eva = va + len; 362*6670Stariq int xerr; 363*6670Stariq 364*6670Stariq ASSERT(mfn != MFN_INVALID); 365*6670Stariq ASSERT(va != NULL); 366*6670Stariq ASSERT(IS_PAGEALIGNED(len)); 367*6670Stariq 368*6670Stariq for (; va < eva; va += MMU_PAGESIZE) { 369*6670Stariq /* 370*6670Stariq * Ask the HAT to load a throwaway mapping to page zero, then 371*6670Stariq * overwrite it with the hypervisor mapping. It gets removed 372*6670Stariq * later via hat_unload(). 373*6670Stariq */ 374*6670Stariq hat_devload(kas.a_hat, va, MMU_PAGESIZE, (pfn_t)0, 375*6670Stariq PROT_READ | HAT_UNORDERED_OK, 376*6670Stariq HAT_LOAD_NOCONSIST | HAT_LOAD); 377*6670Stariq 378*6670Stariq pte = mmu_ptob((x86pte_t)mfn) | PT_VALID | PT_USER 379*6670Stariq | PT_FOREIGN | PT_WRITABLE; 380*6670Stariq 381*6670Stariq xerr = HYPERVISOR_update_va_mapping_otherdomain((ulong_t)va, 382*6670Stariq pte, UVMF_INVLPG | UVMF_LOCAL, DOMID_XEN); 383*6670Stariq 384*6670Stariq if (xerr != 0) { 385*6670Stariq /* unmap pages loaded so far */ 386*6670Stariq size_t ulen = (uintptr_t)(va + MMU_PAGESIZE) - 387*6670Stariq (uintptr_t)sva; 388*6670Stariq hat_unload(kas.a_hat, sva, ulen, HAT_UNLOAD_UNMAP); 389*6670Stariq return (xen_xlate_errcode(xerr)); 390*6670Stariq } 391*6670Stariq 392*6670Stariq mfn++; 393*6670Stariq } 394*6670Stariq 395*6670Stariq return (0); 396*6670Stariq } 397*6670Stariq 398*6670Stariq static int 399*6670Stariq xdt_attach_trace_buffers(void) 400*6670Stariq { 401*6670Stariq xen_sysctl_tbuf_op_t tbuf_op; 402*6670Stariq size_t len; 403*6670Stariq int err; 404*6670Stariq uint_t i; 405*6670Stariq 406*6670Stariq /* set trace buffer size */ 407*6670Stariq tbuf_op.cmd = XEN_SYSCTL_TBUFOP_set_size; 408*6670Stariq tbuf_op.size = xdt_tbuf_pages; 409*6670Stariq (void) xdt_sysctl_tbuf(&tbuf_op); 410*6670Stariq 411*6670Stariq /* get trace buffer info */ 412*6670Stariq tbuf_op.cmd = XEN_SYSCTL_TBUFOP_get_info; 413*6670Stariq if ((err = xdt_sysctl_tbuf(&tbuf_op)) != 0) 414*6670Stariq return (err); 415*6670Stariq 416*6670Stariq tbuf.size = tbuf_op.size; 417*6670Stariq tbuf.start_mfn = (mfn_t)tbuf_op.buffer_mfn; 418*6670Stariq tbuf.cnt = xdt_ncpus; 419*6670Stariq 420*6670Stariq if (tbuf.size == 0) { 421*6670Stariq cmn_err(CE_NOTE, "No trace buffers allocated!"); 422*6670Stariq return (ENOBUFS); 423*6670Stariq } 424*6670Stariq 425*6670Stariq ASSERT(tbuf.start_mfn != MFN_INVALID); 426*6670Stariq ASSERT(tbuf.cnt > 0); 427*6670Stariq 428*6670Stariq len = tbuf.size * tbuf.cnt; 429*6670Stariq tbuf.va = vmem_alloc(heap_arena, len, VM_SLEEP); 430*6670Stariq 431*6670Stariq if ((err = xdt_map_trace_buffers(tbuf.start_mfn, tbuf.va, len)) != 0) { 432*6670Stariq vmem_free(heap_arena, tbuf.va, len); 433*6670Stariq tbuf.va = NULL; 434*6670Stariq return (err); 435*6670Stariq } 436*6670Stariq 437*6670Stariq tbuf.meta = (struct t_buf **)kmem_alloc(tbuf.cnt * sizeof (*tbuf.meta), 438*6670Stariq KM_SLEEP); 439*6670Stariq tbuf.data = (struct t_rec **)kmem_alloc(tbuf.cnt * sizeof (*tbuf.data), 440*6670Stariq KM_SLEEP); 441*6670Stariq 442*6670Stariq for (i = 0; i < tbuf.cnt; i++) { 443*6670Stariq void *cpu_buf = (void *)(tbuf.va + (tbuf.size * i)); 444*6670Stariq tbuf.meta[i] = cpu_buf; 445*6670Stariq tbuf.data[i] = (struct t_rec *)((uintptr_t)cpu_buf + 446*6670Stariq sizeof (struct t_buf)); 447*6670Stariq 448*6670Stariq /* throw away stale trace records */ 449*6670Stariq tbuf.meta[i]->cons = tbuf.meta[i]->prod; 450*6670Stariq } 451*6670Stariq 452*6670Stariq return (0); 453*6670Stariq } 454*6670Stariq 455*6670Stariq static void 456*6670Stariq xdt_detach_trace_buffers(void) 457*6670Stariq { 458*6670Stariq size_t len = tbuf.size * tbuf.cnt; 459*6670Stariq 460*6670Stariq ASSERT(tbuf.va != NULL); 461*6670Stariq 462*6670Stariq hat_unload(kas.a_hat, tbuf.va, len, 463*6670Stariq HAT_UNLOAD_UNMAP | HAT_UNLOAD_UNLOCK); 464*6670Stariq vmem_free(heap_arena, tbuf.va, len); 465*6670Stariq kmem_free(tbuf.meta, tbuf.cnt * sizeof (*tbuf.meta)); 466*6670Stariq kmem_free(tbuf.data, tbuf.cnt * sizeof (*tbuf.data)); 467*6670Stariq } 468*6670Stariq 469*6670Stariq static inline void 470*6670Stariq xdt_process_rec(uint_t cpuid, struct t_rec *rec) 471*6670Stariq { 472*6670Stariq xdt_schedinfo_t *sp = &xdt_cpu_schedinfo[cpuid]; 473*6670Stariq int eid; 474*6670Stariq 475*6670Stariq ASSERT(rec != NULL); 476*6670Stariq ASSERT(xdt_ncpus == xen_get_nphyscpus()); 477*6670Stariq 478*6670Stariq if (cpuid >= xdt_ncpus) { 479*6670Stariq tbuf.stat_spurious_cpu++; 480*6670Stariq return; 481*6670Stariq } 482*6670Stariq 483*6670Stariq switch (rec->event) { 484*6670Stariq 485*6670Stariq /* 486*6670Stariq * Sched probes 487*6670Stariq */ 488*6670Stariq case TRC_SCHED_SWITCH_INFPREV: 489*6670Stariq /* 490*6670Stariq * Info on vCPU being de-scheduled 491*6670Stariq * 492*6670Stariq * rec->data[0] = prev domid 493*6670Stariq * rec->data[1] = time spent on pcpu 494*6670Stariq */ 495*6670Stariq sp->prev_domid = rec->data[0]; 496*6670Stariq sp->prev_ctime = rec->data[1]; 497*6670Stariq break; 498*6670Stariq 499*6670Stariq case TRC_SCHED_SWITCH_INFNEXT: 500*6670Stariq /* 501*6670Stariq * Info on next vCPU to be scheduled 502*6670Stariq * 503*6670Stariq * rec->data[0] = next domid 504*6670Stariq * rec->data[1] = time spent waiting to get on cpu 505*6670Stariq * rec->data[2] = time slice 506*6670Stariq */ 507*6670Stariq sp->next_domid = rec->data[0]; 508*6670Stariq sp->next_wtime = rec->data[1]; 509*6670Stariq sp->next_ts = rec->data[2]; 510*6670Stariq break; 511*6670Stariq 512*6670Stariq case TRC_SCHED_SWITCH: 513*6670Stariq /* 514*6670Stariq * vCPU switch 515*6670Stariq * 516*6670Stariq * rec->data[0] = prev domid 517*6670Stariq * rec->data[1] = prev vcpuid 518*6670Stariq * rec->data[2] = next domid 519*6670Stariq * rec->data[3] = next vcpuid 520*6670Stariq */ 521*6670Stariq if (rec->data[0] != sp->prev_domid && 522*6670Stariq rec->data[2] != sp->next_domid) { 523*6670Stariq /* prev and next info don't match doms being sched'd */ 524*6670Stariq tbuf.stat_spurious_switch++; 525*6670Stariq return; 526*6670Stariq } 527*6670Stariq 528*6670Stariq sp->prev_vcpuid = rec->data[1]; 529*6670Stariq sp->next_vcpuid = rec->data[3]; 530*6670Stariq 531*6670Stariq XDT_PROBE3(IS_IDLE_DOM(sp->prev_domid)? 532*6670Stariq XDT_SCHED_IDLE_OFF_CPU:XDT_SCHED_OFF_CPU, 533*6670Stariq cpuid, sp->prev_domid, sp->prev_vcpuid, sp->prev_ctime); 534*6670Stariq 535*6670Stariq XDT_PROBE4(IS_IDLE_DOM(sp->next_domid)? 536*6670Stariq XDT_SCHED_IDLE_ON_CPU:XDT_SCHED_ON_CPU, 537*6670Stariq cpuid, sp->next_domid, sp->next_vcpuid, sp->next_wtime, 538*6670Stariq sp->next_ts); 539*6670Stariq break; 540*6670Stariq 541*6670Stariq case TRC_SCHED_BLOCK: 542*6670Stariq /* 543*6670Stariq * vCPU blocked 544*6670Stariq * 545*6670Stariq * rec->data[0] = domid 546*6670Stariq * rec->data[1] = vcpuid 547*6670Stariq */ 548*6670Stariq XDT_PROBE2(XDT_SCHED_BLOCK, cpuid, rec->data[0], rec->data[1]); 549*6670Stariq break; 550*6670Stariq 551*6670Stariq case TRC_SCHED_SLEEP: 552*6670Stariq /* 553*6670Stariq * Put vCPU to sleep 554*6670Stariq * 555*6670Stariq * rec->data[0] = domid 556*6670Stariq * rec->data[1] = vcpuid 557*6670Stariq */ 558*6670Stariq XDT_PROBE2(XDT_SCHED_SLEEP, cpuid, rec->data[0], rec->data[1]); 559*6670Stariq break; 560*6670Stariq 561*6670Stariq case TRC_SCHED_WAKE: 562*6670Stariq /* 563*6670Stariq * Wake up vCPU 564*6670Stariq * 565*6670Stariq * rec->data[0] = domid 566*6670Stariq * rec->data[1] = vcpuid 567*6670Stariq */ 568*6670Stariq XDT_PROBE2(XDT_SCHED_WAKE, cpuid, rec->data[0], rec->data[1]); 569*6670Stariq break; 570*6670Stariq 571*6670Stariq case TRC_SCHED_YIELD: 572*6670Stariq /* 573*6670Stariq * vCPU yielded 574*6670Stariq * 575*6670Stariq * rec->data[0] = domid 576*6670Stariq * rec->data[1] = vcpuid 577*6670Stariq */ 578*6670Stariq XDT_PROBE2(XDT_SCHED_YIELD, cpuid, rec->data[0], rec->data[1]); 579*6670Stariq break; 580*6670Stariq 581*6670Stariq case TRC_SCHED_SHUTDOWN: 582*6670Stariq /* 583*6670Stariq * Guest shutting down 584*6670Stariq * 585*6670Stariq * rec->data[0] = domid 586*6670Stariq * rec->data[1] = initiating vcpu 587*6670Stariq * rec->data[2] = shutdown code 588*6670Stariq */ 589*6670Stariq switch (rec->data[2]) { 590*6670Stariq case SHUTDOWN_poweroff: 591*6670Stariq eid = XDT_SCHED_SHUTDOWN_POWEROFF; 592*6670Stariq break; 593*6670Stariq case SHUTDOWN_reboot: 594*6670Stariq eid = XDT_SCHED_SHUTDOWN_REBOOT; 595*6670Stariq break; 596*6670Stariq case SHUTDOWN_suspend: 597*6670Stariq eid = XDT_SCHED_SHUTDOWN_SUSPEND; 598*6670Stariq break; 599*6670Stariq case SHUTDOWN_crash: 600*6670Stariq eid = XDT_SCHED_SHUTDOWN_CRASH; 601*6670Stariq break; 602*6670Stariq default: 603*6670Stariq tbuf.stat_unknown_shutdown++; 604*6670Stariq return; 605*6670Stariq } 606*6670Stariq 607*6670Stariq XDT_PROBE1(eid, cpuid, rec->data[0]); 608*6670Stariq break; 609*6670Stariq 610*6670Stariq /* 611*6670Stariq * Mem probes 612*6670Stariq */ 613*6670Stariq case TRC_MEM_PAGE_GRANT_MAP: 614*6670Stariq /* 615*6670Stariq * Guest mapped page grant 616*6670Stariq * 617*6670Stariq * rec->data[0] = domid 618*6670Stariq */ 619*6670Stariq XDT_PROBE1(XDT_MEM_PAGE_GRANT_MAP, cpuid, rec->data[0]); 620*6670Stariq break; 621*6670Stariq 622*6670Stariq case TRC_MEM_PAGE_GRANT_UNMAP: 623*6670Stariq /* 624*6670Stariq * Guest unmapped page grant 625*6670Stariq * 626*6670Stariq * rec->data[0] = domid 627*6670Stariq */ 628*6670Stariq XDT_PROBE1(XDT_MEM_PAGE_GRANT_UNMAP, cpuid, rec->data[0]); 629*6670Stariq break; 630*6670Stariq 631*6670Stariq case TRC_MEM_PAGE_GRANT_TRANSFER: 632*6670Stariq /* 633*6670Stariq * Page grant is being transferred 634*6670Stariq * 635*6670Stariq * rec->data[0] = target domid 636*6670Stariq */ 637*6670Stariq XDT_PROBE1(XDT_MEM_PAGE_GRANT_TRANSFER, cpuid, rec->data[0]); 638*6670Stariq break; 639*6670Stariq 640*6670Stariq /* 641*6670Stariq * HVM probes 642*6670Stariq */ 643*6670Stariq case TRC_HVM_VMENTRY: 644*6670Stariq /* 645*6670Stariq * Return to guest via vmx_launch/vmrun 646*6670Stariq * 647*6670Stariq * rec->data[0] = (domid<<16 + vcpuid) 648*6670Stariq */ 649*6670Stariq XDT_PROBE2(XDT_HVM_VMENTRY, cpuid, HVM_DOMID(rec->data[0]), 650*6670Stariq HVM_VCPUID(rec->data[0])); 651*6670Stariq break; 652*6670Stariq 653*6670Stariq case TRC_HVM_VMEXIT: 654*6670Stariq /* 655*6670Stariq * Entry into VMEXIT handler 656*6670Stariq * 657*6670Stariq * rec->data[0] = (domid<<16 + vcpuid) 658*6670Stariq * rec->data[1] = guest rip 659*6670Stariq * rec->data[2] = cpu vendor specific exit code 660*6670Stariq */ 661*6670Stariq XDT_PROBE4(XDT_HVM_VMEXIT, cpuid, HVM_DOMID(rec->data[0]), 662*6670Stariq HVM_VCPUID(rec->data[0]), rec->data[1], rec->data[2]); 663*6670Stariq break; 664*6670Stariq 665*6670Stariq case TRC_LOST_RECORDS: 666*6670Stariq tbuf.stat_dropped_recs++; 667*6670Stariq break; 668*6670Stariq 669*6670Stariq default: 670*6670Stariq tbuf.stat_unknown_recs++; 671*6670Stariq break; 672*6670Stariq } 673*6670Stariq } 674*6670Stariq 675*6670Stariq /*ARGSUSED*/ 676*6670Stariq static void 677*6670Stariq xdt_tbuf_scan(void *arg) 678*6670Stariq { 679*6670Stariq uint_t cpuid; 680*6670Stariq size_t nrecs; 681*6670Stariq struct t_rec *rec; 682*6670Stariq uint32_t prod; 683*6670Stariq 684*6670Stariq nrecs = (tbuf.size - sizeof (struct t_buf)) / sizeof (struct t_rec); 685*6670Stariq 686*6670Stariq /* scan all cpu buffers for new records */ 687*6670Stariq for (cpuid = 0; cpuid < tbuf.cnt; cpuid++) { 688*6670Stariq prod = tbuf.meta[cpuid]->prod; 689*6670Stariq membar_consumer(); /* read prod /then/ data */ 690*6670Stariq while (tbuf.meta[cpuid]->cons != prod) { 691*6670Stariq rec = tbuf.data[cpuid] + tbuf.meta[cpuid]->cons % nrecs; 692*6670Stariq xdt_process_rec(cpuid, rec); 693*6670Stariq membar_exit(); /* read data /then/ update cons */ 694*6670Stariq tbuf.meta[cpuid]->cons++; 695*6670Stariq } 696*6670Stariq } 697*6670Stariq } 698*6670Stariq 699*6670Stariq static void 700*6670Stariq xdt_cyclic_enable(void) 701*6670Stariq { 702*6670Stariq cyc_handler_t hdlr; 703*6670Stariq cyc_time_t when; 704*6670Stariq 705*6670Stariq ASSERT(MUTEX_HELD(&cpu_lock)); 706*6670Stariq 707*6670Stariq hdlr.cyh_func = xdt_tbuf_scan; 708*6670Stariq hdlr.cyh_arg = NULL; 709*6670Stariq hdlr.cyh_level = CY_LOW_LEVEL; 710*6670Stariq 711*6670Stariq when.cyt_interval = xdt_poll_nsec; 712*6670Stariq when.cyt_when = dtrace_gethrtime() + when.cyt_interval; 713*6670Stariq 714*6670Stariq xdt_cyclic = cyclic_add(&hdlr, &when); 715*6670Stariq } 716*6670Stariq 717*6670Stariq static void 718*6670Stariq xdt_probe_create(xdt_probe_t *p) 719*6670Stariq { 720*6670Stariq ASSERT(p != NULL && p->pr_mod != NULL); 721*6670Stariq 722*6670Stariq if (dtrace_probe_lookup(xdt_id, p->pr_mod, NULL, p->pr_name) != 0) 723*6670Stariq return; 724*6670Stariq 725*6670Stariq xdt_prid[p->evt_id] = dtrace_probe_create(xdt_id, p->pr_mod, NULL, 726*6670Stariq p->pr_name, dtrace_mach_aframes(), p); 727*6670Stariq } 728*6670Stariq 729*6670Stariq /*ARGSUSED*/ 730*6670Stariq static void 731*6670Stariq xdt_provide(void *arg, const dtrace_probedesc_t *desc) 732*6670Stariq { 733*6670Stariq const char *mod, *name; 734*6670Stariq int i; 735*6670Stariq 736*6670Stariq if (desc == NULL) { 737*6670Stariq for (i = 0; xdt_probe[i].pr_mod != NULL; i++) { 738*6670Stariq xdt_probe_create(&xdt_probe[i]); 739*6670Stariq } 740*6670Stariq } else { 741*6670Stariq mod = desc->dtpd_mod; 742*6670Stariq name = desc->dtpd_name; 743*6670Stariq for (i = 0; xdt_probe[i].pr_mod != NULL; i++) { 744*6670Stariq int l1 = strlen(xdt_probe[i].pr_name); 745*6670Stariq int l2 = strlen(xdt_probe[i].pr_mod); 746*6670Stariq if (strncmp(name, xdt_probe[i].pr_name, l1) == 0 && 747*6670Stariq strncmp(mod, xdt_probe[i].pr_mod, l2) == 0) 748*6670Stariq break; 749*6670Stariq } 750*6670Stariq 751*6670Stariq if (xdt_probe[i].pr_mod == NULL) 752*6670Stariq return; 753*6670Stariq xdt_probe_create(&xdt_probe[i]); 754*6670Stariq } 755*6670Stariq 756*6670Stariq } 757*6670Stariq 758*6670Stariq /*ARGSUSED*/ 759*6670Stariq static void 760*6670Stariq xdt_destroy(void *arg, dtrace_id_t id, void *parg) 761*6670Stariq { 762*6670Stariq xdt_probe_t *p = parg; 763*6670Stariq xdt_prid[p->evt_id] = 0; 764*6670Stariq } 765*6670Stariq 766*6670Stariq static void 767*6670Stariq xdt_set_trace_mask(uint32_t mask) 768*6670Stariq { 769*6670Stariq xen_sysctl_tbuf_op_t tbuf_op; 770*6670Stariq 771*6670Stariq tbuf_op.cmd = XEN_SYSCTL_TBUFOP_set_evt_mask; 772*6670Stariq tbuf_op.evt_mask = mask; 773*6670Stariq (void) xdt_sysctl_tbuf(&tbuf_op); 774*6670Stariq } 775*6670Stariq 776*6670Stariq /*ARGSUSED*/ 777*6670Stariq static void 778*6670Stariq xdt_enable(void *arg, dtrace_id_t id, void *parg) 779*6670Stariq { 780*6670Stariq xdt_probe_t *p = parg; 781*6670Stariq xen_sysctl_tbuf_op_t tbuf_op; 782*6670Stariq 783*6670Stariq ASSERT(MUTEX_HELD(&cpu_lock)); 784*6670Stariq ASSERT(xdt_prid[p->evt_id] != 0); 785*6670Stariq 786*6670Stariq xdt_probemap[p->evt_id] = xdt_prid[p->evt_id]; 787*6670Stariq xdt_classinfo[p->class].cnt++; 788*6670Stariq 789*6670Stariq if (xdt_classinfo[p->class].cnt == 1) { 790*6670Stariq /* set the trace mask for this class */ 791*6670Stariq cur_trace_mask |= xdt_classinfo[p->class].trc_mask; 792*6670Stariq xdt_set_trace_mask(cur_trace_mask); 793*6670Stariq } 794*6670Stariq 795*6670Stariq if (xdt_cyclic == CYCLIC_NONE) { 796*6670Stariq /* 797*6670Stariq * DTrace doesn't have the notion of failing an enabling. It 798*6670Stariq * works on the premise that, if you have advertised a probe 799*6670Stariq * via the pops->dtps_provide() function, you can enable it. 800*6670Stariq * Failure is not an option. In the case where we can't enable 801*6670Stariq * Xen tracing the consumer will carry on regardless and 802*6670Stariq * think all is OK except the probes will never fire. 803*6670Stariq */ 804*6670Stariq tbuf_op.cmd = XEN_SYSCTL_TBUFOP_enable; 805*6670Stariq if (xdt_sysctl_tbuf(&tbuf_op) != 0) { 806*6670Stariq cmn_err(CE_NOTE, "Couldn't enable hypervisor tracing."); 807*6670Stariq return; 808*6670Stariq } 809*6670Stariq 810*6670Stariq xdt_cyclic_enable(); 811*6670Stariq } 812*6670Stariq } 813*6670Stariq 814*6670Stariq /*ARGSUSED*/ 815*6670Stariq static void 816*6670Stariq xdt_disable(void *arg, dtrace_id_t id, void *parg) 817*6670Stariq { 818*6670Stariq xdt_probe_t *p = parg; 819*6670Stariq xen_sysctl_tbuf_op_t tbuf_op; 820*6670Stariq int i, err; 821*6670Stariq 822*6670Stariq ASSERT(MUTEX_HELD(&cpu_lock)); 823*6670Stariq ASSERT(xdt_probemap[p->evt_id] != 0); 824*6670Stariq ASSERT(xdt_probemap[p->evt_id] == xdt_prid[p->evt_id]); 825*6670Stariq ASSERT(xdt_classinfo[p->class].cnt > 0); 826*6670Stariq 827*6670Stariq /* 828*6670Stariq * We could be here in the slight window between the cyclic firing and 829*6670Stariq * a call to dtrace_probe() occurring. We need to be careful if we tear 830*6670Stariq * down any shared state. 831*6670Stariq */ 832*6670Stariq 833*6670Stariq xdt_probemap[p->evt_id] = 0; 834*6670Stariq xdt_classinfo[p->class].cnt--; 835*6670Stariq 836*6670Stariq if (xdt_nr_active_probes() == 0) { 837*6670Stariq cur_trace_mask = 0; 838*6670Stariq 839*6670Stariq if (xdt_cyclic == CYCLIC_NONE) 840*6670Stariq return; 841*6670Stariq 842*6670Stariq /* 843*6670Stariq * We will try to disable the trace buffers. If we fail for some 844*6670Stariq * reason we will try again, up to a count of XDT_TBUF_RETRY. 845*6670Stariq * If we still aren't successful we try to set the trace mask 846*6670Stariq * to 0 in order to prevent trace records from being written. 847*6670Stariq */ 848*6670Stariq tbuf_op.cmd = XEN_SYSCTL_TBUFOP_disable; 849*6670Stariq i = 0; 850*6670Stariq do { 851*6670Stariq err = xdt_sysctl_tbuf(&tbuf_op); 852*6670Stariq } while ((err != 0) && (++i < XDT_TBUF_RETRY)); 853*6670Stariq 854*6670Stariq if (err != 0) { 855*6670Stariq cmn_err(CE_NOTE, 856*6670Stariq "Couldn't disable hypervisor tracing."); 857*6670Stariq xdt_set_trace_mask(0); 858*6670Stariq } else { 859*6670Stariq cyclic_remove(xdt_cyclic); 860*6670Stariq xdt_cyclic = CYCLIC_NONE; 861*6670Stariq /* 862*6670Stariq * We don't bother making the hypercall to set 863*6670Stariq * the trace mask, since it will be reset when 864*6670Stariq * tracing is re-enabled. 865*6670Stariq */ 866*6670Stariq } 867*6670Stariq } else if (xdt_classinfo[p->class].cnt == 0) { 868*6670Stariq cur_trace_mask ^= xdt_classinfo[p->class].trc_mask; 869*6670Stariq /* other probes are enabled, so add the sub-class mask back */ 870*6670Stariq cur_trace_mask |= 0xF000; 871*6670Stariq xdt_set_trace_mask(cur_trace_mask); 872*6670Stariq } 873*6670Stariq } 874*6670Stariq 875*6670Stariq static dtrace_pattr_t xdt_attr = { 876*6670Stariq { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_PLATFORM }, 877*6670Stariq { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_PLATFORM }, 878*6670Stariq { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 879*6670Stariq { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_PLATFORM }, 880*6670Stariq { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_PLATFORM }, 881*6670Stariq }; 882*6670Stariq 883*6670Stariq static dtrace_pops_t xdt_pops = { 884*6670Stariq xdt_provide, /* dtps_provide() */ 885*6670Stariq NULL, /* dtps_provide_module() */ 886*6670Stariq xdt_enable, /* dtps_enable() */ 887*6670Stariq xdt_disable, /* dtps_disable() */ 888*6670Stariq NULL, /* dtps_suspend() */ 889*6670Stariq NULL, /* dtps_resume() */ 890*6670Stariq NULL, /* dtps_getargdesc() */ 891*6670Stariq NULL, /* dtps_getargval() */ 892*6670Stariq NULL, /* dtps_usermode() */ 893*6670Stariq xdt_destroy /* dtps_destroy() */ 894*6670Stariq }; 895*6670Stariq 896*6670Stariq static int 897*6670Stariq xdt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 898*6670Stariq { 899*6670Stariq int val; 900*6670Stariq 901*6670Stariq if (!DOMAIN_IS_INITDOMAIN(xen_info)) 902*6670Stariq return (DDI_FAILURE); 903*6670Stariq 904*6670Stariq switch (cmd) { 905*6670Stariq case DDI_ATTACH: 906*6670Stariq break; 907*6670Stariq 908*6670Stariq case DDI_RESUME: 909*6670Stariq /* 910*6670Stariq * We might support proper suspend/resume in the future, so, 911*6670Stariq * return DDI_FAILURE for now. 912*6670Stariq */ 913*6670Stariq return (DDI_FAILURE); 914*6670Stariq 915*6670Stariq default: 916*6670Stariq return (DDI_FAILURE); 917*6670Stariq } 918*6670Stariq 919*6670Stariq xdt_ncpus = xen_get_nphyscpus(); 920*6670Stariq ASSERT(xdt_ncpus > 0); 921*6670Stariq 922*6670Stariq if (ddi_create_minor_node(devi, "xdt", S_IFCHR, 0, DDI_PSEUDO, 0) == 923*6670Stariq DDI_FAILURE || xdt_attach_trace_buffers() != 0 || 924*6670Stariq dtrace_register("xdt", &xdt_attr, DTRACE_PRIV_KERNEL, NULL, 925*6670Stariq &xdt_pops, NULL, &xdt_id) != 0) { 926*6670Stariq if (tbuf.va != NULL) 927*6670Stariq xdt_detach_trace_buffers(); 928*6670Stariq ddi_remove_minor_node(devi, NULL); 929*6670Stariq return (DDI_FAILURE); 930*6670Stariq } 931*6670Stariq 932*6670Stariq val = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, 933*6670Stariq "xdt_poll_nsec", XDT_POLL_DEFAULT); 934*6670Stariq xdt_poll_nsec = MAX(val, XDT_POLL_MIN); 935*6670Stariq 936*6670Stariq xdt_cpu_schedinfo = (xdt_schedinfo_t *)kmem_alloc(xdt_ncpus * 937*6670Stariq sizeof (xdt_schedinfo_t), KM_SLEEP); 938*6670Stariq xdt_init_trace_masks(); 939*6670Stariq xdt_kstat_init(); 940*6670Stariq 941*6670Stariq xdt_devi = devi; 942*6670Stariq ddi_report_dev(devi); 943*6670Stariq return (DDI_SUCCESS); 944*6670Stariq } 945*6670Stariq 946*6670Stariq static int 947*6670Stariq xdt_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 948*6670Stariq { 949*6670Stariq switch (cmd) { 950*6670Stariq case DDI_DETACH: 951*6670Stariq break; 952*6670Stariq 953*6670Stariq case DDI_SUSPEND: 954*6670Stariq /* 955*6670Stariq * We might support proper suspend/resume in the future. So 956*6670Stariq * return DDI_FAILURE for now. 957*6670Stariq */ 958*6670Stariq return (DDI_FAILURE); 959*6670Stariq 960*6670Stariq default: 961*6670Stariq return (DDI_FAILURE); 962*6670Stariq } 963*6670Stariq 964*6670Stariq if (dtrace_unregister(xdt_id) != 0) 965*6670Stariq return (DDI_FAILURE); 966*6670Stariq 967*6670Stariq xdt_detach_trace_buffers(); 968*6670Stariq kmem_free(xdt_cpu_schedinfo, xdt_ncpus * sizeof (xdt_schedinfo_t)); 969*6670Stariq if (xdt_cyclic != CYCLIC_NONE) 970*6670Stariq cyclic_remove(xdt_cyclic); 971*6670Stariq if (xdt_kstats != NULL) 972*6670Stariq kstat_delete(xdt_kstats); 973*6670Stariq xdt_devi = (void *)0; 974*6670Stariq ddi_remove_minor_node(devi, NULL); 975*6670Stariq 976*6670Stariq return (DDI_SUCCESS); 977*6670Stariq } 978*6670Stariq 979*6670Stariq /*ARGSUSED*/ 980*6670Stariq static int 981*6670Stariq xdt_info(dev_info_t *devi, ddi_info_cmd_t infocmd, void *arg, void **result) 982*6670Stariq { 983*6670Stariq int error; 984*6670Stariq 985*6670Stariq switch (infocmd) { 986*6670Stariq case DDI_INFO_DEVT2DEVINFO: 987*6670Stariq *result = xdt_devi; 988*6670Stariq error = DDI_SUCCESS; 989*6670Stariq break; 990*6670Stariq case DDI_INFO_DEVT2INSTANCE: 991*6670Stariq *result = (void *)0; 992*6670Stariq error = DDI_SUCCESS; 993*6670Stariq break; 994*6670Stariq default: 995*6670Stariq error = DDI_FAILURE; 996*6670Stariq } 997*6670Stariq return (error); 998*6670Stariq } 999*6670Stariq 1000*6670Stariq static struct cb_ops xdt_cb_ops = { 1001*6670Stariq nulldev, /* open(9E) */ 1002*6670Stariq nodev, /* close(9E) */ 1003*6670Stariq nodev, /* strategy(9E) */ 1004*6670Stariq nodev, /* print(9E) */ 1005*6670Stariq nodev, /* dump(9E) */ 1006*6670Stariq nodev, /* read(9E) */ 1007*6670Stariq nodev, /* write(9E) */ 1008*6670Stariq nodev, /* ioctl(9E) */ 1009*6670Stariq nodev, /* devmap(9E) */ 1010*6670Stariq nodev, /* mmap(9E) */ 1011*6670Stariq nodev, /* segmap(9E) */ 1012*6670Stariq nochpoll, /* chpoll(9E) */ 1013*6670Stariq ddi_prop_op, /* prop_op(9E) */ 1014*6670Stariq NULL, /* streamtab(9S) */ 1015*6670Stariq D_MP | D_64BIT | D_NEW /* cb_flag */ 1016*6670Stariq }; 1017*6670Stariq 1018*6670Stariq static struct dev_ops xdt_ops = { 1019*6670Stariq DEVO_REV, /* devo_rev */ 1020*6670Stariq 0, /* devo_refcnt */ 1021*6670Stariq xdt_info, /* getinfo(9E) */ 1022*6670Stariq nulldev, /* identify(9E) */ 1023*6670Stariq nulldev, /* probe(9E) */ 1024*6670Stariq xdt_attach, /* attach(9E) */ 1025*6670Stariq xdt_detach, /* detach(9E) */ 1026*6670Stariq nulldev, /* devo_reset */ 1027*6670Stariq &xdt_cb_ops, /* devo_cb_ops */ 1028*6670Stariq NULL, /* devo_bus_ops */ 1029*6670Stariq NULL /* power(9E) */ 1030*6670Stariq }; 1031*6670Stariq 1032*6670Stariq 1033*6670Stariq static struct modldrv modldrv = { 1034*6670Stariq &mod_driverops, 1035*6670Stariq "Hypervisor event tracing", 1036*6670Stariq &xdt_ops 1037*6670Stariq }; 1038*6670Stariq 1039*6670Stariq static struct modlinkage modlinkage = { 1040*6670Stariq MODREV_1, 1041*6670Stariq &modldrv, 1042*6670Stariq NULL 1043*6670Stariq }; 1044*6670Stariq 1045*6670Stariq int 1046*6670Stariq _init(void) 1047*6670Stariq { 1048*6670Stariq return (mod_install(&modlinkage)); 1049*6670Stariq } 1050*6670Stariq 1051*6670Stariq int 1052*6670Stariq _fini(void) 1053*6670Stariq { 1054*6670Stariq return (mod_remove(&modlinkage)); 1055*6670Stariq } 1056*6670Stariq 1057*6670Stariq int 1058*6670Stariq _info(struct modinfo *modinfop) 1059*6670Stariq { 1060*6670Stariq return (mod_info(&modlinkage, modinfop)); 1061*6670Stariq } 1062