xref: /onnv-gate/usr/src/uts/common/xen/dtrace/xdt.c (revision 8803:8c01b39012c9)
16670Stariq /*
26670Stariq  * CDDL HEADER START
36670Stariq  *
46670Stariq  * The contents of this file are subject to the terms of the
56670Stariq  * Common Development and Distribution License (the "License").
66670Stariq  * You may not use this file except in compliance with the License.
76670Stariq  *
86670Stariq  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
96670Stariq  * or http://www.opensolaris.org/os/licensing.
106670Stariq  * See the License for the specific language governing permissions
116670Stariq  * and limitations under the License.
126670Stariq  *
136670Stariq  * When distributing Covered Code, include this CDDL HEADER in each
146670Stariq  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
156670Stariq  * If applicable, add the following below this CDDL HEADER, with the
166670Stariq  * fields enclosed by brackets "[]" replaced with your own identifying
176670Stariq  * information: Portions Copyright [yyyy] [name of copyright owner]
186670Stariq  *
196670Stariq  * CDDL HEADER END
206670Stariq  */
216670Stariq 
226670Stariq /*
23*8803SJonathan.Haslam@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
246670Stariq  * Use is subject to license terms.
256670Stariq  */
266670Stariq 
276670Stariq /*
286670Stariq  * Xen event provider for DTrace
296670Stariq  *
306670Stariq  * NOTE: This provider is PRIVATE. It is intended as a short-term solution and
316670Stariq  * may disappear or be re-implemented at anytime.
326670Stariq  *
336670Stariq  * This provider isn't suitable as a general-purpose solution for a number of
346670Stariq  * reasons. First and foremost, we rely on the Xen tracing mechanism and don't
356670Stariq  * have any way to gather data other than that collected by the Xen trace
366670Stariq  * buffers. Further, it does not fit into the DTrace model (see "Interacting
376670Stariq  * with DTrace" below.)
386670Stariq  *
396670Stariq  *
406670Stariq  * Tracing in Xen
416670Stariq  * --------------
426670Stariq  *
436670Stariq  * Xen implements a tracing facility for generating and collecting execution
446670Stariq  * event traces from the hypervisor. When tracing is enabled, compiled in
456670Stariq  * probes record events in contiguous per-CPU trace buffers.
466670Stariq  *
476670Stariq  *               +---------+
486670Stariq  * +------+      |         |
496670Stariq  * | CPUn |----> | BUFFERn |
506670Stariq  * +------+      |         |
516670Stariq  *               +---------+- tbuf.va + (tbuf.size * n)
526670Stariq  *               :         :
536670Stariq  *               +---------+
546670Stariq  * +------+      |         |
556670Stariq  * | CPU1 |----> | BUFFER1 |
566670Stariq  * +------+      |         |
576670Stariq  *               +---------+- tbuf.va + tbuf.size
586670Stariq  * +------+      |         |
596670Stariq  * | CPU0 |----> | BUFFER0 |
606670Stariq  * +------+      |         |
616670Stariq  *               +---------+- tbuf.va
626670Stariq  *
636670Stariq  * Each CPU buffer consists of a metadata header followed by the trace records.
646670Stariq  * The metadata consists of a producer/consumer pair of pointers into the buffer
656670Stariq  * that point to the next record to be written and the next record to be read
666670Stariq  * respectively. The trace record format is as follows:
676670Stariq  *
686670Stariq  * +--------------------------------------------------------------------------+
696670Stariq  * | CPUID(uint_t) | TSC(uint64_t) | EVENTID(uint32_t) |     DATA FIELDS      |
706670Stariq  * +--------------------------------------------------------------------------+
716670Stariq  *
726670Stariq  * DATA FIELDS:
736670Stariq  * +--------------------------------------------------------------------------+
746670Stariq  * | D1(uint32_t) | D2(uint32_t) | D3(uint32_t) | D4(uint32_t) | D5(uint32_t) |
756670Stariq  * +--------------------------------------------------------------------------+
766670Stariq  *
776670Stariq  *
786670Stariq  * Interacting with DTrace
796670Stariq  * -----------------------
806670Stariq  *
816670Stariq  * Every xdt_poll_nsec nano-seconds we poll the trace buffers for data and feed
826670Stariq  * each entry into dtrace_probe() with the corresponding probe ID for the event.
836670Stariq  * As a result of this periodic collection implementation probe firings are
846670Stariq  * asynchronous. This is the only sensible way to implement this form of
856670Stariq  * provider, but because of its asynchronous nature asking things like
866670Stariq  * "current CPU" and, more importantly, arbitrary questions about the context
876670Stariq  * surrounding the probe firing are not meaningful. So, consumers should not
886670Stariq  * attempt to infer anything beyond what is supplied via the probe arguments.
896670Stariq  */
906670Stariq 
916670Stariq #include <sys/types.h>
926670Stariq #include <sys/sysmacros.h>
936670Stariq #include <sys/modctl.h>
946670Stariq #include <sys/sunddi.h>
956670Stariq #include <sys/ddi.h>
966670Stariq #include <sys/conf.h>
976670Stariq #include <sys/devops.h>
986670Stariq #include <sys/stat.h>
996670Stariq #include <sys/cmn_err.h>
1006670Stariq #include <sys/dtrace.h>
1016670Stariq #include <sys/sdt.h>
1026670Stariq #include <sys/cyclic.h>
1036670Stariq #include <vm/seg_kmem.h>
1046670Stariq #include <vm/hat_i86.h>
1056670Stariq #include <sys/hypervisor.h>
1066670Stariq #include <xen/public/trace.h>
1076670Stariq #include <xen/public/sched.h>
1086670Stariq 
1096670Stariq #define	XDT_POLL_DEFAULT	100000000	/* default poll interval (ns) */
1106670Stariq #define	XDT_POLL_MIN		10000000	/* min poll interval (ns) */
1116670Stariq #define	XDT_TBUF_RETRY		50		/* tbuf disable retry count */
1126670Stariq 
1136670Stariq /*
1146670Stariq  * The domid must match IDLE_DOMAIN_ID in xen.hg/xen/include/xen/sched.h
1156670Stariq  * in the xVM gate.
1166670Stariq  */
1176670Stariq #define	IS_IDLE_DOM(domid)	(domid == 0x7FFFU)
1186670Stariq 
1196670Stariq /* Macros to extract the domid and cpuid from a HVM trace data field */
1206670Stariq #define	HVM_DOMID(d)		(d >> 16)
1216670Stariq #define	HVM_VCPUID(d)		(d & 0xFFFF)
1226670Stariq 
1236670Stariq #define	XDT_PROBE4(event, cpuid, arg0, arg1, arg2, arg3) {		\
1246670Stariq 	dtrace_id_t id = xdt_probemap[event];				\
1256670Stariq 	if (id)								\
1266670Stariq 		dtrace_probe(id, cpuid, arg0, arg1, arg2, arg3);	\
1276670Stariq }									\
1286670Stariq 
1296670Stariq #define	XDT_PROBE3(event, cpuid, arg0, arg1, arg2) \
1306670Stariq 	XDT_PROBE4(event, cpuid, arg0, arg1, arg2, 0)
1316670Stariq 
1326670Stariq #define	XDT_PROBE2(event, cpuid, arg0, arg1) \
1336670Stariq 	XDT_PROBE4(event, cpuid, arg0, arg1, 0, 0)
1346670Stariq 
1356670Stariq #define	XDT_PROBE1(event, cpuid, arg0) \
1366670Stariq 	XDT_PROBE4(event, cpuid, arg0, 0, 0, 0)
1376670Stariq 
1386670Stariq #define	XDT_PROBE0(event, cpuid) \
1396670Stariq 	XDT_PROBE4(event, cpuid, 0, 0, 0, 0)
1406670Stariq 
1416670Stariq /* Probe classes */
1426670Stariq #define	XDT_SCHED			0
1436670Stariq #define	XDT_MEM				1
1446670Stariq #define	XDT_HVM				2
1456670Stariq #define	XDT_NCLASSES			3
1466670Stariq 
1476670Stariq /* Probe events */
1486670Stariq #define	XDT_EVT_INVALID			(-(int)1)
1496670Stariq #define	XDT_SCHED_OFF_CPU		0
1506670Stariq #define	XDT_SCHED_ON_CPU		1
1516670Stariq #define	XDT_SCHED_IDLE_OFF_CPU		2
1526670Stariq #define	XDT_SCHED_IDLE_ON_CPU		3
1536670Stariq #define	XDT_SCHED_BLOCK			4
1546670Stariq #define	XDT_SCHED_SLEEP			5
1556670Stariq #define	XDT_SCHED_WAKE			6
1566670Stariq #define	XDT_SCHED_YIELD			7
1576670Stariq #define	XDT_SCHED_SHUTDOWN_POWEROFF	8
1586670Stariq #define	XDT_SCHED_SHUTDOWN_REBOOT	9
1596670Stariq #define	XDT_SCHED_SHUTDOWN_SUSPEND	10
1606670Stariq #define	XDT_SCHED_SHUTDOWN_CRASH	11
1616670Stariq #define	XDT_MEM_PAGE_GRANT_MAP		12
1626670Stariq #define	XDT_MEM_PAGE_GRANT_UNMAP	13
1636670Stariq #define	XDT_MEM_PAGE_GRANT_TRANSFER	14
1646670Stariq #define	XDT_HVM_VMENTRY			15
1656670Stariq #define	XDT_HVM_VMEXIT			16
1666670Stariq #define	XDT_NEVENTS			17
1676670Stariq 
1686670Stariq typedef struct {
1696670Stariq 	const char	*pr_mod;	/* probe module */
1706670Stariq 	const char	*pr_name;	/* probe name */
1716670Stariq 	int		evt_id;		/* event id */
1726670Stariq 	uint_t		class;		/* probe class */
1736670Stariq } xdt_probe_t;
1746670Stariq 
1756670Stariq typedef struct {
1766670Stariq 	uint32_t	trc_mask;	/* trace mask */
1776670Stariq 	uint32_t	cnt;		/* num enabled probes in class */
1786670Stariq } xdt_classinfo_t;
1796670Stariq 
1806670Stariq typedef struct {
1816670Stariq 	ulong_t prev_domid;		/* previous dom executed */
1826670Stariq 	ulong_t prev_vcpuid;		/* previous vcpu executed */
1836670Stariq 	ulong_t prev_ctime;		/* time spent on cpu */
1846670Stariq 	ulong_t next_domid;		/* next dom to be scheduled */
1856670Stariq 	ulong_t next_vcpuid;		/* next vcpu to be scheduled */
1866670Stariq 	ulong_t next_wtime;		/* time spent waiting to get on cpu */
1876670Stariq 	ulong_t next_ts;		/* allocated time slice */
1886670Stariq } xdt_schedinfo_t;
1896670Stariq 
1906670Stariq static struct {
1916670Stariq 	uint_t cnt;			/* total num of trace buffers */
1926670Stariq 	size_t size;			/* size of each cpu buffer */
1936670Stariq 	mfn_t start_mfn;		/* starting mfn of buffers */
1946670Stariq 	caddr_t va;			/* va buffers are mapped into */
1956670Stariq 
1966670Stariq 	/* per-cpu buffers */
1976670Stariq 	struct t_buf **meta;		/* buffer metadata */
1986670Stariq 	struct t_rec **data;		/* buffer data records */
1996670Stariq 
2006670Stariq 	/* statistics */
2016670Stariq 	uint64_t stat_dropped_recs;	/* records dropped */
2026670Stariq 	uint64_t stat_spurious_cpu;	/* recs with garbage cpuids */
2036670Stariq 	uint64_t stat_spurious_switch;	/* inconsistent vcpu switches */
2046670Stariq 	uint64_t stat_unknown_shutdown;	/* unknown shutdown code */
2056670Stariq 	uint64_t stat_unknown_recs;	/* unknown records */
2066670Stariq } tbuf;
2076670Stariq 
2086670Stariq static char *xdt_stats[] = {
2096670Stariq 	"dropped_recs",
2106670Stariq };
2116670Stariq 
2126670Stariq /*
2136670Stariq  * Tunable variables
2146670Stariq  *
2156670Stariq  * The following may be tuned by adding a line to /etc/system that
2166670Stariq  * includes both the name of the module ("xdt") and the name of the variable.
2176670Stariq  * For example:
2186670Stariq  *     set xdt:xdt_tbuf_pages = 40
2196670Stariq  */
2206670Stariq uint_t xdt_tbuf_pages = 20;			/* pages to alloc per-cpu buf */
2216670Stariq 
2226670Stariq /*
2236670Stariq  * The following may be tuned by adding a line to
2246670Stariq  * /platform/i86xpv/kernel/drv/xdt.conf.
2256670Stariq  * For example:
2266670Stariq  *     xdt_poll_nsec = 200000000;
2276670Stariq  */
2286670Stariq static hrtime_t xdt_poll_nsec;			/* trace buffer poll interval */
2296670Stariq 
2306670Stariq /*
2316670Stariq  * Internal variables
2326670Stariq  */
2336670Stariq static dev_info_t *xdt_devi;
2346670Stariq static dtrace_provider_id_t xdt_id;
2356670Stariq static uint_t xdt_ncpus;			/* total number of phys CPUs */
2366670Stariq static uint32_t cur_trace_mask;			/* current trace mask */
2376670Stariq static xdt_schedinfo_t *xdt_cpu_schedinfo;	/* per-cpu sched info */
2386670Stariq dtrace_id_t xdt_probemap[XDT_NEVENTS];		/* map of enabled probes */
2396670Stariq dtrace_id_t xdt_prid[XDT_NEVENTS];		/* IDs of registered events */
2406670Stariq static cyclic_id_t xdt_cyclic = CYCLIC_NONE;
2416670Stariq static kstat_t *xdt_kstats;
2426670Stariq static xdt_classinfo_t xdt_classinfo[XDT_NCLASSES];
2436670Stariq 
2446670Stariq static xdt_probe_t xdt_probe[] = {
2456670Stariq 	/* Sched probes */
2466670Stariq 	{ "sched", "off-cpu", XDT_SCHED_OFF_CPU, XDT_SCHED },
2476670Stariq 	{ "sched", "on-cpu", XDT_SCHED_ON_CPU, XDT_SCHED },
2486670Stariq 	{ "sched", "idle-off-cpu", XDT_SCHED_IDLE_OFF_CPU, XDT_SCHED },
2496670Stariq 	{ "sched", "idle-on-cpu", XDT_SCHED_IDLE_ON_CPU, XDT_SCHED },
2506670Stariq 	{ "sched", "block", XDT_SCHED_BLOCK, XDT_SCHED },
2516670Stariq 	{ "sched", "sleep", XDT_SCHED_SLEEP, XDT_SCHED },
2526670Stariq 	{ "sched", "wake", XDT_SCHED_WAKE, XDT_SCHED },
2536670Stariq 	{ "sched", "yield", XDT_SCHED_YIELD, XDT_SCHED },
2546670Stariq 	{ "sched", "shutdown-poweroff", XDT_SCHED_SHUTDOWN_POWEROFF,
2556670Stariq 		XDT_SCHED },
2566670Stariq 	{ "sched", "shutdown-reboot", XDT_SCHED_SHUTDOWN_REBOOT, XDT_SCHED },
2576670Stariq 	{ "sched", "shutdown-suspend", XDT_SCHED_SHUTDOWN_SUSPEND, XDT_SCHED },
2586670Stariq 	{ "sched", "shutdown-crash", XDT_SCHED_SHUTDOWN_CRASH, XDT_SCHED },
2596670Stariq 
2606670Stariq 	/* Memory probes */
2616670Stariq 	{ "mem", "page-grant-map", XDT_MEM_PAGE_GRANT_MAP, XDT_MEM },
2626670Stariq 	{ "mem", "page-grant-unmap", XDT_MEM_PAGE_GRANT_UNMAP, XDT_MEM },
2636670Stariq 	{ "mem", "page-grant-transfer", XDT_MEM_PAGE_GRANT_TRANSFER, XDT_MEM },
2646670Stariq 
2656670Stariq 	/* HVM probes */
2666670Stariq 	{ "hvm", "vmentry", XDT_HVM_VMENTRY, XDT_HVM },
2676670Stariq 	{ "hvm", "vmexit", XDT_HVM_VMEXIT, XDT_HVM },
2686670Stariq 
2696670Stariq 	{ NULL }
2706670Stariq };
2716670Stariq 
2726670Stariq extern uint_t xen_get_nphyscpus(void);
2736670Stariq 
2746670Stariq static inline uint32_t
2756670Stariq xdt_nr_active_probes()
2766670Stariq {
2776670Stariq 	int i;
2786670Stariq 	uint32_t tot = 0;
2796670Stariq 
2806670Stariq 	for (i = 0; i < XDT_NCLASSES; i++)
2816670Stariq 		tot += xdt_classinfo[i].cnt;
2826670Stariq 
2836670Stariq 	return (tot);
2846670Stariq }
2856670Stariq 
2866670Stariq static void
2876670Stariq xdt_init_trace_masks(void)
2886670Stariq {
2896670Stariq 	xdt_classinfo[XDT_SCHED].trc_mask = TRC_SCHED;
2906670Stariq 	xdt_classinfo[XDT_MEM].trc_mask = TRC_MEM;
2916670Stariq 	xdt_classinfo[XDT_HVM].trc_mask = TRC_HVM;
2926670Stariq }
2936670Stariq 
2946670Stariq static int
2956670Stariq xdt_kstat_update(kstat_t *ksp, int flag)
2966670Stariq {
2976670Stariq 	kstat_named_t *knp;
2986670Stariq 
2996670Stariq 	if (flag != KSTAT_READ)
3006670Stariq 		return (EACCES);
3016670Stariq 
3026670Stariq 	knp = ksp->ks_data;
3036670Stariq 
3046670Stariq 	/*
3056670Stariq 	 * Assignment order should match that of the names in
3066670Stariq 	 * xdt_stats.
3076670Stariq 	 */
3086670Stariq 	(knp++)->value.ui64 = tbuf.stat_dropped_recs;
3096670Stariq 
3106670Stariq 	return (0);
3116670Stariq }
3126670Stariq 
3136670Stariq static void
3146670Stariq xdt_kstat_init(void)
3156670Stariq {
3166670Stariq 	int nstats = sizeof (xdt_stats) / sizeof (xdt_stats[0]);
3176670Stariq 	char **cp = xdt_stats;
3186670Stariq 	kstat_named_t *knp;
3196670Stariq 
3206670Stariq 	if ((xdt_kstats = kstat_create("xdt", 0, "trace_statistics", "misc",
3216670Stariq 	    KSTAT_TYPE_NAMED, nstats, 0)) == NULL)
3226670Stariq 		return;
3236670Stariq 
3246670Stariq 	xdt_kstats->ks_update = xdt_kstat_update;
3256670Stariq 
3266670Stariq 	knp = xdt_kstats->ks_data;
3276670Stariq 	while (nstats > 0) {
3286670Stariq 		kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
3296670Stariq 		knp++;
3306670Stariq 		cp++;
3316670Stariq 		nstats--;
3326670Stariq 	}
3336670Stariq 
3346670Stariq 	kstat_install(xdt_kstats);
3356670Stariq }
3366670Stariq 
3376670Stariq static int
3386670Stariq xdt_sysctl_tbuf(xen_sysctl_tbuf_op_t *tbuf_op)
3396670Stariq {
3406670Stariq 	xen_sysctl_t op;
3416670Stariq 	int xerr;
3426670Stariq 
3436670Stariq 	op.cmd = XEN_SYSCTL_tbuf_op;
3446670Stariq 	op.interface_version = XEN_SYSCTL_INTERFACE_VERSION;
3456670Stariq 	op.u.tbuf_op = *tbuf_op;
3466670Stariq 
3476670Stariq 	if ((xerr = HYPERVISOR_sysctl(&op)) != 0)
3486670Stariq 		return (xen_xlate_errcode(xerr));
3496670Stariq 
3506670Stariq 	*tbuf_op = op.u.tbuf_op;
3516670Stariq 	return (0);
3526670Stariq }
3536670Stariq 
3546670Stariq static int
3556670Stariq xdt_map_trace_buffers(mfn_t mfn, caddr_t va, size_t len)
3566670Stariq {
3576670Stariq 	x86pte_t pte;
3586670Stariq 	caddr_t const sva = va;
3596670Stariq 	caddr_t const eva = va + len;
3606670Stariq 	int xerr;
3616670Stariq 
3626670Stariq 	ASSERT(mfn != MFN_INVALID);
3636670Stariq 	ASSERT(va != NULL);
3646670Stariq 	ASSERT(IS_PAGEALIGNED(len));
3656670Stariq 
3666670Stariq 	for (; va < eva; va += MMU_PAGESIZE) {
3676670Stariq 		/*
3686670Stariq 		 * Ask the HAT to load a throwaway mapping to page zero, then
3696670Stariq 		 * overwrite it with the hypervisor mapping. It gets removed
3706670Stariq 		 * later via hat_unload().
3716670Stariq 		 */
3726670Stariq 		hat_devload(kas.a_hat, va, MMU_PAGESIZE, (pfn_t)0,
3736670Stariq 		    PROT_READ | HAT_UNORDERED_OK,
3746670Stariq 		    HAT_LOAD_NOCONSIST | HAT_LOAD);
3756670Stariq 
3766670Stariq 		pte = mmu_ptob((x86pte_t)mfn) | PT_VALID | PT_USER
3776670Stariq 		    | PT_FOREIGN | PT_WRITABLE;
3786670Stariq 
3796670Stariq 		xerr = HYPERVISOR_update_va_mapping_otherdomain((ulong_t)va,
3806670Stariq 		    pte, UVMF_INVLPG | UVMF_LOCAL, DOMID_XEN);
3816670Stariq 
3826670Stariq 		if (xerr != 0) {
3836670Stariq 			/* unmap pages loaded so far */
3846670Stariq 			size_t ulen = (uintptr_t)(va + MMU_PAGESIZE) -
3856670Stariq 			    (uintptr_t)sva;
3866670Stariq 			hat_unload(kas.a_hat, sva, ulen, HAT_UNLOAD_UNMAP);
3876670Stariq 			return (xen_xlate_errcode(xerr));
3886670Stariq 		}
3896670Stariq 
3906670Stariq 		mfn++;
3916670Stariq 	}
3926670Stariq 
3936670Stariq 	return (0);
3946670Stariq }
3956670Stariq 
3966670Stariq static int
3976670Stariq xdt_attach_trace_buffers(void)
3986670Stariq {
3996670Stariq 	xen_sysctl_tbuf_op_t tbuf_op;
4006670Stariq 	size_t len;
4016670Stariq 	int err;
4026670Stariq 	uint_t i;
4036670Stariq 
4047401Stariqmi@sun.com 	/*
4057401Stariqmi@sun.com 	 * Xen does not support trace buffer re-sizing. If the buffers
4067401Stariqmi@sun.com 	 * have already been allocated we just use them as is.
4077401Stariqmi@sun.com 	 */
4086670Stariq 	tbuf_op.cmd  = XEN_SYSCTL_TBUFOP_get_info;
4096670Stariq 	if ((err = xdt_sysctl_tbuf(&tbuf_op)) != 0)
4106670Stariq 		return (err);
4116670Stariq 
4127401Stariqmi@sun.com 	if (tbuf_op.size == 0) {
4137401Stariqmi@sun.com 		/* set trace buffer size */
4147401Stariqmi@sun.com 		tbuf_op.cmd  = XEN_SYSCTL_TBUFOP_set_size;
4157401Stariqmi@sun.com 		tbuf_op.size = xdt_tbuf_pages;
4167401Stariqmi@sun.com 		(void) xdt_sysctl_tbuf(&tbuf_op);
4177401Stariqmi@sun.com 
4187401Stariqmi@sun.com 		/* get trace buffer info */
4197401Stariqmi@sun.com 		tbuf_op.cmd  = XEN_SYSCTL_TBUFOP_get_info;
4207401Stariqmi@sun.com 		if ((err = xdt_sysctl_tbuf(&tbuf_op)) != 0)
4217401Stariqmi@sun.com 			return (err);
4227401Stariqmi@sun.com 
4237401Stariqmi@sun.com 		if (tbuf_op.size == 0) {
4247401Stariqmi@sun.com 			cmn_err(CE_NOTE, "Couldn't allocate trace buffers.");
4257401Stariqmi@sun.com 			return (ENOBUFS);
4267401Stariqmi@sun.com 		}
4277401Stariqmi@sun.com 	}
4287401Stariqmi@sun.com 
4296670Stariq 	tbuf.size = tbuf_op.size;
4306670Stariq 	tbuf.start_mfn = (mfn_t)tbuf_op.buffer_mfn;
4316670Stariq 	tbuf.cnt = xdt_ncpus;
4326670Stariq 
4336670Stariq 	ASSERT(tbuf.start_mfn != MFN_INVALID);
4346670Stariq 	ASSERT(tbuf.cnt > 0);
4356670Stariq 
4366670Stariq 	len = tbuf.size * tbuf.cnt;
4376670Stariq 	tbuf.va = vmem_alloc(heap_arena, len, VM_SLEEP);
4386670Stariq 
4396670Stariq 	if ((err = xdt_map_trace_buffers(tbuf.start_mfn, tbuf.va, len)) != 0) {
4406670Stariq 		vmem_free(heap_arena, tbuf.va, len);
4416670Stariq 		tbuf.va = NULL;
4426670Stariq 		return (err);
4436670Stariq 	}
4446670Stariq 
4456670Stariq 	tbuf.meta = (struct t_buf **)kmem_alloc(tbuf.cnt * sizeof (*tbuf.meta),
4466670Stariq 	    KM_SLEEP);
4476670Stariq 	tbuf.data = (struct t_rec **)kmem_alloc(tbuf.cnt * sizeof (*tbuf.data),
4486670Stariq 	    KM_SLEEP);
4496670Stariq 
4506670Stariq 	for (i = 0; i < tbuf.cnt; i++) {
4516670Stariq 		void *cpu_buf = (void *)(tbuf.va + (tbuf.size * i));
4526670Stariq 		tbuf.meta[i] = cpu_buf;
4536670Stariq 		tbuf.data[i] = (struct t_rec *)((uintptr_t)cpu_buf +
4546670Stariq 		    sizeof (struct t_buf));
4556670Stariq 
4566670Stariq 		/* throw away stale trace records */
4576670Stariq 		tbuf.meta[i]->cons = tbuf.meta[i]->prod;
4586670Stariq 	}
4596670Stariq 
4606670Stariq 	return (0);
4616670Stariq }
4626670Stariq 
4636670Stariq static void
4646670Stariq xdt_detach_trace_buffers(void)
4656670Stariq {
4666670Stariq 	size_t len = tbuf.size * tbuf.cnt;
4676670Stariq 
4686670Stariq 	ASSERT(tbuf.va != NULL);
4696670Stariq 
4706670Stariq 	hat_unload(kas.a_hat, tbuf.va, len,
4716670Stariq 	    HAT_UNLOAD_UNMAP | HAT_UNLOAD_UNLOCK);
4726670Stariq 	vmem_free(heap_arena, tbuf.va, len);
4736670Stariq 	kmem_free(tbuf.meta, tbuf.cnt * sizeof (*tbuf.meta));
4746670Stariq 	kmem_free(tbuf.data, tbuf.cnt * sizeof (*tbuf.data));
4756670Stariq }
4766670Stariq 
4776670Stariq static inline void
4786670Stariq xdt_process_rec(uint_t cpuid, struct t_rec *rec)
4796670Stariq {
4806670Stariq 	xdt_schedinfo_t *sp = &xdt_cpu_schedinfo[cpuid];
4816670Stariq 	int eid;
4826670Stariq 
4836670Stariq 	ASSERT(rec != NULL);
4846670Stariq 	ASSERT(xdt_ncpus == xen_get_nphyscpus());
4856670Stariq 
4866670Stariq 	if (cpuid >= xdt_ncpus) {
4876670Stariq 		tbuf.stat_spurious_cpu++;
4886670Stariq 		return;
4896670Stariq 	}
4906670Stariq 
4916670Stariq 	switch (rec->event) {
4926670Stariq 
4936670Stariq 	/*
4946670Stariq 	 * Sched probes
4956670Stariq 	 */
4966670Stariq 	case TRC_SCHED_SWITCH_INFPREV:
4976670Stariq 		/*
4986670Stariq 		 * Info on vCPU being de-scheduled
4996670Stariq 		 *
5006670Stariq 		 * rec->data[0] = prev domid
5016670Stariq 		 * rec->data[1] = time spent on pcpu
5026670Stariq 		 */
5036670Stariq 		sp->prev_domid = rec->data[0];
5046670Stariq 		sp->prev_ctime = rec->data[1];
5056670Stariq 		break;
5066670Stariq 
5076670Stariq 	case TRC_SCHED_SWITCH_INFNEXT:
5086670Stariq 		/*
5096670Stariq 		 * Info on next vCPU to be scheduled
5106670Stariq 		 *
5116670Stariq 		 * rec->data[0] = next domid
5126670Stariq 		 * rec->data[1] = time spent waiting to get on cpu
5136670Stariq 		 * rec->data[2] = time slice
5146670Stariq 		 */
5156670Stariq 		sp->next_domid = rec->data[0];
5166670Stariq 		sp->next_wtime = rec->data[1];
5176670Stariq 		sp->next_ts = rec->data[2];
5186670Stariq 		break;
5196670Stariq 
5206670Stariq 	case TRC_SCHED_SWITCH:
5216670Stariq 		/*
5226670Stariq 		 * vCPU switch
5236670Stariq 		 *
5246670Stariq 		 * rec->data[0] = prev domid
5256670Stariq 		 * rec->data[1] = prev vcpuid
5266670Stariq 		 * rec->data[2] = next domid
5276670Stariq 		 * rec->data[3] = next vcpuid
5286670Stariq 		 */
5296670Stariq 		if (rec->data[0] != sp->prev_domid &&
5306670Stariq 		    rec->data[2] != sp->next_domid) {
5316670Stariq 			/* prev and next info don't match doms being sched'd */
5326670Stariq 			tbuf.stat_spurious_switch++;
5336670Stariq 			return;
5346670Stariq 		}
5356670Stariq 
5366670Stariq 		sp->prev_vcpuid = rec->data[1];
5376670Stariq 		sp->next_vcpuid = rec->data[3];
5386670Stariq 
5396670Stariq 		XDT_PROBE3(IS_IDLE_DOM(sp->prev_domid)?
5406670Stariq 		    XDT_SCHED_IDLE_OFF_CPU:XDT_SCHED_OFF_CPU,
5416670Stariq 		    cpuid, sp->prev_domid, sp->prev_vcpuid, sp->prev_ctime);
5426670Stariq 
5436670Stariq 		XDT_PROBE4(IS_IDLE_DOM(sp->next_domid)?
5446670Stariq 		    XDT_SCHED_IDLE_ON_CPU:XDT_SCHED_ON_CPU,
5456670Stariq 		    cpuid, sp->next_domid, sp->next_vcpuid, sp->next_wtime,
5466670Stariq 		    sp->next_ts);
5476670Stariq 		break;
5486670Stariq 
5496670Stariq 	case TRC_SCHED_BLOCK:
5506670Stariq 		/*
5516670Stariq 		 * vCPU blocked
5526670Stariq 		 *
5536670Stariq 		 * rec->data[0] = domid
5546670Stariq 		 * rec->data[1] = vcpuid
5556670Stariq 		 */
5566670Stariq 		XDT_PROBE2(XDT_SCHED_BLOCK, cpuid, rec->data[0], rec->data[1]);
5576670Stariq 		break;
5586670Stariq 
5596670Stariq 	case TRC_SCHED_SLEEP:
5606670Stariq 		/*
5616670Stariq 		 * Put vCPU to sleep
5626670Stariq 		 *
5636670Stariq 		 * rec->data[0] = domid
5646670Stariq 		 * rec->data[1] = vcpuid
5656670Stariq 		 */
5666670Stariq 		XDT_PROBE2(XDT_SCHED_SLEEP, cpuid, rec->data[0], rec->data[1]);
5676670Stariq 		break;
5686670Stariq 
5696670Stariq 	case TRC_SCHED_WAKE:
5706670Stariq 		/*
5716670Stariq 		 * Wake up vCPU
5726670Stariq 		 *
5736670Stariq 		 * rec->data[0] = domid
5746670Stariq 		 * rec->data[1] = vcpuid
5756670Stariq 		 */
5766670Stariq 		XDT_PROBE2(XDT_SCHED_WAKE, cpuid, rec->data[0], rec->data[1]);
5776670Stariq 		break;
5786670Stariq 
5796670Stariq 	case TRC_SCHED_YIELD:
5806670Stariq 		/*
5816670Stariq 		 * vCPU yielded
5826670Stariq 		 *
5836670Stariq 		 * rec->data[0] = domid
5846670Stariq 		 * rec->data[1] = vcpuid
5856670Stariq 		 */
5866670Stariq 		XDT_PROBE2(XDT_SCHED_YIELD, cpuid, rec->data[0], rec->data[1]);
5876670Stariq 		break;
5886670Stariq 
5896670Stariq 	case TRC_SCHED_SHUTDOWN:
5906670Stariq 		/*
5916670Stariq 		 * Guest shutting down
5926670Stariq 		 *
5936670Stariq 		 * rec->data[0] = domid
5946670Stariq 		 * rec->data[1] = initiating vcpu
5956670Stariq 		 * rec->data[2] = shutdown code
5966670Stariq 		 */
5976670Stariq 		switch (rec->data[2]) {
5986670Stariq 		case SHUTDOWN_poweroff:
5996670Stariq 			eid = XDT_SCHED_SHUTDOWN_POWEROFF;
6006670Stariq 			break;
6016670Stariq 		case SHUTDOWN_reboot:
6026670Stariq 			eid = XDT_SCHED_SHUTDOWN_REBOOT;
6036670Stariq 			break;
6046670Stariq 		case SHUTDOWN_suspend:
6056670Stariq 			eid = XDT_SCHED_SHUTDOWN_SUSPEND;
6066670Stariq 			break;
6076670Stariq 		case SHUTDOWN_crash:
6086670Stariq 			eid = XDT_SCHED_SHUTDOWN_CRASH;
6096670Stariq 			break;
6106670Stariq 		default:
6116670Stariq 			tbuf.stat_unknown_shutdown++;
6126670Stariq 			return;
6136670Stariq 		}
6146670Stariq 
6156670Stariq 		XDT_PROBE1(eid, cpuid, rec->data[0]);
6166670Stariq 		break;
6176670Stariq 
6186670Stariq 	/*
6196670Stariq 	 * Mem probes
6206670Stariq 	 */
6216670Stariq 	case TRC_MEM_PAGE_GRANT_MAP:
6226670Stariq 		/*
6236670Stariq 		 * Guest mapped page grant
6246670Stariq 		 *
6256670Stariq 		 * rec->data[0] = domid
6266670Stariq 		 */
6276670Stariq 		XDT_PROBE1(XDT_MEM_PAGE_GRANT_MAP, cpuid, rec->data[0]);
6286670Stariq 		break;
6296670Stariq 
6306670Stariq 	case TRC_MEM_PAGE_GRANT_UNMAP:
6316670Stariq 		/*
6326670Stariq 		 * Guest unmapped page grant
6336670Stariq 		 *
6346670Stariq 		 * rec->data[0] = domid
6356670Stariq 		 */
6366670Stariq 		XDT_PROBE1(XDT_MEM_PAGE_GRANT_UNMAP, cpuid, rec->data[0]);
6376670Stariq 		break;
6386670Stariq 
6396670Stariq 	case TRC_MEM_PAGE_GRANT_TRANSFER:
6406670Stariq 		/*
6416670Stariq 		 * Page grant is being transferred
6426670Stariq 		 *
6436670Stariq 		 * rec->data[0] = target domid
6446670Stariq 		 */
6456670Stariq 		XDT_PROBE1(XDT_MEM_PAGE_GRANT_TRANSFER, cpuid, rec->data[0]);
6466670Stariq 		break;
6476670Stariq 
6486670Stariq 	/*
6496670Stariq 	 * HVM probes
6506670Stariq 	 */
6516670Stariq 	case TRC_HVM_VMENTRY:
6526670Stariq 		/*
6536670Stariq 		 * Return to guest via vmx_launch/vmrun
6546670Stariq 		 *
6556670Stariq 		 * rec->data[0] = (domid<<16 + vcpuid)
6566670Stariq 		 */
6576670Stariq 		XDT_PROBE2(XDT_HVM_VMENTRY, cpuid, HVM_DOMID(rec->data[0]),
6586670Stariq 		    HVM_VCPUID(rec->data[0]));
6596670Stariq 		break;
6606670Stariq 
6616670Stariq 	case TRC_HVM_VMEXIT:
6626670Stariq 		/*
6636670Stariq 		 * Entry into VMEXIT handler
6646670Stariq 		 *
6656670Stariq 		 * rec->data[0] = (domid<<16 + vcpuid)
6666670Stariq 		 * rec->data[1] = guest rip
6676670Stariq 		 * rec->data[2] = cpu vendor specific exit code
6686670Stariq 		 */
6696670Stariq 		XDT_PROBE4(XDT_HVM_VMEXIT, cpuid, HVM_DOMID(rec->data[0]),
6706670Stariq 		    HVM_VCPUID(rec->data[0]), rec->data[1], rec->data[2]);
6716670Stariq 		break;
6726670Stariq 
6736670Stariq 	case TRC_LOST_RECORDS:
6746670Stariq 		tbuf.stat_dropped_recs++;
6756670Stariq 		break;
6766670Stariq 
6776670Stariq 	default:
6786670Stariq 		tbuf.stat_unknown_recs++;
6796670Stariq 		break;
6806670Stariq 	}
6816670Stariq }
6826670Stariq 
6836670Stariq /*ARGSUSED*/
6846670Stariq static void
6856670Stariq xdt_tbuf_scan(void *arg)
6866670Stariq {
6876670Stariq 	uint_t cpuid;
6886670Stariq 	size_t nrecs;
6896670Stariq 	struct t_rec *rec;
6906670Stariq 	uint32_t prod;
6916670Stariq 
6926670Stariq 	nrecs = (tbuf.size - sizeof (struct t_buf)) / sizeof (struct t_rec);
6936670Stariq 
6946670Stariq 	/* scan all cpu buffers for new records */
6956670Stariq 	for (cpuid = 0; cpuid < tbuf.cnt; cpuid++) {
6966670Stariq 		prod = tbuf.meta[cpuid]->prod;
6976670Stariq 		membar_consumer(); /* read prod /then/ data */
6986670Stariq 		while (tbuf.meta[cpuid]->cons != prod) {
6996670Stariq 			rec = tbuf.data[cpuid] + tbuf.meta[cpuid]->cons % nrecs;
7006670Stariq 			xdt_process_rec(cpuid, rec);
7016670Stariq 			membar_exit(); /* read data /then/ update cons */
7026670Stariq 			tbuf.meta[cpuid]->cons++;
7036670Stariq 		}
7046670Stariq 	}
7056670Stariq }
7066670Stariq 
7076670Stariq static void
7086670Stariq xdt_cyclic_enable(void)
7096670Stariq {
7106670Stariq 	cyc_handler_t hdlr;
7116670Stariq 	cyc_time_t when;
7126670Stariq 
7136670Stariq 	ASSERT(MUTEX_HELD(&cpu_lock));
7146670Stariq 
7156670Stariq 	hdlr.cyh_func = xdt_tbuf_scan;
7166670Stariq 	hdlr.cyh_arg = NULL;
7176670Stariq 	hdlr.cyh_level = CY_LOW_LEVEL;
7186670Stariq 
7196670Stariq 	when.cyt_interval = xdt_poll_nsec;
7206670Stariq 	when.cyt_when = dtrace_gethrtime() + when.cyt_interval;
7216670Stariq 
7226670Stariq 	xdt_cyclic = cyclic_add(&hdlr, &when);
7236670Stariq }
7246670Stariq 
7256670Stariq static void
7266670Stariq xdt_probe_create(xdt_probe_t *p)
7276670Stariq {
7286670Stariq 	ASSERT(p != NULL && p->pr_mod != NULL);
7296670Stariq 
7306670Stariq 	if (dtrace_probe_lookup(xdt_id, p->pr_mod, NULL, p->pr_name) != 0)
7316670Stariq 		return;
7326670Stariq 
7336670Stariq 	xdt_prid[p->evt_id] = dtrace_probe_create(xdt_id, p->pr_mod, NULL,
7346670Stariq 	    p->pr_name, dtrace_mach_aframes(), p);
7356670Stariq }
7366670Stariq 
7376670Stariq /*ARGSUSED*/
7386670Stariq static void
7396670Stariq xdt_provide(void *arg, const dtrace_probedesc_t *desc)
7406670Stariq {
7416670Stariq 	const char *mod, *name;
7426670Stariq 	int i;
7436670Stariq 
7446670Stariq 	if (desc == NULL) {
7456670Stariq 		for (i = 0; xdt_probe[i].pr_mod != NULL; i++) {
7466670Stariq 			xdt_probe_create(&xdt_probe[i]);
7476670Stariq 		}
7486670Stariq 	} else {
7496670Stariq 		mod = desc->dtpd_mod;
7506670Stariq 		name = desc->dtpd_name;
7516670Stariq 		for (i = 0; xdt_probe[i].pr_mod != NULL; i++) {
7526670Stariq 			int l1 = strlen(xdt_probe[i].pr_name);
7536670Stariq 			int l2 = strlen(xdt_probe[i].pr_mod);
7546670Stariq 			if (strncmp(name, xdt_probe[i].pr_name, l1) == 0 &&
7556670Stariq 			    strncmp(mod, xdt_probe[i].pr_mod, l2) == 0)
7566670Stariq 				break;
7576670Stariq 		}
7586670Stariq 
7596670Stariq 		if (xdt_probe[i].pr_mod == NULL)
7606670Stariq 			return;
7616670Stariq 		xdt_probe_create(&xdt_probe[i]);
7626670Stariq 	}
7636670Stariq 
7646670Stariq }
7656670Stariq 
7666670Stariq /*ARGSUSED*/
7676670Stariq static void
7686670Stariq xdt_destroy(void *arg, dtrace_id_t id, void *parg)
7696670Stariq {
7706670Stariq 	xdt_probe_t *p = parg;
7716670Stariq 	xdt_prid[p->evt_id] = 0;
7726670Stariq }
7736670Stariq 
7746670Stariq static void
7756670Stariq xdt_set_trace_mask(uint32_t mask)
7766670Stariq {
7776670Stariq 	xen_sysctl_tbuf_op_t tbuf_op;
7786670Stariq 
7796670Stariq 	tbuf_op.cmd  = XEN_SYSCTL_TBUFOP_set_evt_mask;
7806670Stariq 	tbuf_op.evt_mask = mask;
7816670Stariq 	(void) xdt_sysctl_tbuf(&tbuf_op);
7826670Stariq }
7836670Stariq 
7846670Stariq /*ARGSUSED*/
785*8803SJonathan.Haslam@Sun.COM static int
7866670Stariq xdt_enable(void *arg, dtrace_id_t id, void *parg)
7876670Stariq {
7886670Stariq 	xdt_probe_t *p = parg;
7896670Stariq 	xen_sysctl_tbuf_op_t tbuf_op;
7906670Stariq 
7916670Stariq 	ASSERT(MUTEX_HELD(&cpu_lock));
7926670Stariq 	ASSERT(xdt_prid[p->evt_id] != 0);
7936670Stariq 
7946670Stariq 	xdt_probemap[p->evt_id] = xdt_prid[p->evt_id];
7956670Stariq 	xdt_classinfo[p->class].cnt++;
7966670Stariq 
7976670Stariq 	if (xdt_classinfo[p->class].cnt == 1) {
7986670Stariq 		/* set the trace mask for this class */
7996670Stariq 		cur_trace_mask |= xdt_classinfo[p->class].trc_mask;
8006670Stariq 		xdt_set_trace_mask(cur_trace_mask);
8016670Stariq 	}
8026670Stariq 
8036670Stariq 	if (xdt_cyclic == CYCLIC_NONE) {
8046670Stariq 		tbuf_op.cmd = XEN_SYSCTL_TBUFOP_enable;
8056670Stariq 		if (xdt_sysctl_tbuf(&tbuf_op) != 0) {
8066670Stariq 			cmn_err(CE_NOTE, "Couldn't enable hypervisor tracing.");
807*8803SJonathan.Haslam@Sun.COM 			return (-1);
8086670Stariq 		}
8096670Stariq 
8106670Stariq 		xdt_cyclic_enable();
8116670Stariq 	}
812*8803SJonathan.Haslam@Sun.COM 	return (0);
8136670Stariq }
8146670Stariq 
8156670Stariq /*ARGSUSED*/
8166670Stariq static void
8176670Stariq xdt_disable(void *arg, dtrace_id_t id, void *parg)
8186670Stariq {
8196670Stariq 	xdt_probe_t *p = parg;
8206670Stariq 	xen_sysctl_tbuf_op_t tbuf_op;
8216670Stariq 	int i, err;
8226670Stariq 
8236670Stariq 	ASSERT(MUTEX_HELD(&cpu_lock));
8246670Stariq 	ASSERT(xdt_probemap[p->evt_id] != 0);
8256670Stariq 	ASSERT(xdt_probemap[p->evt_id] == xdt_prid[p->evt_id]);
8266670Stariq 	ASSERT(xdt_classinfo[p->class].cnt > 0);
8276670Stariq 
8286670Stariq 	/*
8296670Stariq 	 * We could be here in the slight window between the cyclic firing and
8306670Stariq 	 * a call to dtrace_probe() occurring. We need to be careful if we tear
8316670Stariq 	 * down any shared state.
8326670Stariq 	 */
8336670Stariq 
8346670Stariq 	xdt_probemap[p->evt_id] = 0;
8356670Stariq 	xdt_classinfo[p->class].cnt--;
8366670Stariq 
8376670Stariq 	if (xdt_nr_active_probes() == 0) {
8386670Stariq 		cur_trace_mask = 0;
8396670Stariq 
8406670Stariq 		if (xdt_cyclic == CYCLIC_NONE)
8416670Stariq 			return;
8426670Stariq 
8436670Stariq 		/*
8446670Stariq 		 * We will try to disable the trace buffers. If we fail for some
8456670Stariq 		 * reason we will try again, up to a count of XDT_TBUF_RETRY.
8466670Stariq 		 * If we still aren't successful we try to set the trace mask
8476670Stariq 		 * to 0 in order to prevent trace records from being written.
8486670Stariq 		 */
8496670Stariq 		tbuf_op.cmd = XEN_SYSCTL_TBUFOP_disable;
8506670Stariq 		i = 0;
8516670Stariq 		do {
8526670Stariq 			err = xdt_sysctl_tbuf(&tbuf_op);
8536670Stariq 		} while ((err != 0) && (++i < XDT_TBUF_RETRY));
8546670Stariq 
8556670Stariq 		if (err != 0) {
8566670Stariq 			cmn_err(CE_NOTE,
8576670Stariq 			    "Couldn't disable hypervisor tracing.");
8586670Stariq 			xdt_set_trace_mask(0);
8596670Stariq 		} else {
8606670Stariq 			cyclic_remove(xdt_cyclic);
8616670Stariq 			xdt_cyclic = CYCLIC_NONE;
8626670Stariq 			/*
8636670Stariq 			 * We don't bother making the hypercall to set
8646670Stariq 			 * the trace mask, since it will be reset when
8656670Stariq 			 * tracing is re-enabled.
8666670Stariq 			 */
8676670Stariq 		}
8686670Stariq 	} else if (xdt_classinfo[p->class].cnt == 0) {
8696670Stariq 		cur_trace_mask ^= xdt_classinfo[p->class].trc_mask;
8706670Stariq 		/* other probes are enabled, so add the sub-class mask back */
8716670Stariq 		cur_trace_mask |= 0xF000;
8726670Stariq 		xdt_set_trace_mask(cur_trace_mask);
8736670Stariq 	}
8746670Stariq }
8756670Stariq 
8766670Stariq static dtrace_pattr_t xdt_attr = {
8776670Stariq { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_PLATFORM },
8786670Stariq { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_PLATFORM },
8796670Stariq { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
8806670Stariq { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_PLATFORM },
8816670Stariq { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_PLATFORM },
8826670Stariq };
8836670Stariq 
8846670Stariq static dtrace_pops_t xdt_pops = {
8856670Stariq 	xdt_provide,		/* dtps_provide() */
8866670Stariq 	NULL,			/* dtps_provide_module() */
8876670Stariq 	xdt_enable,		/* dtps_enable() */
8886670Stariq 	xdt_disable,		/* dtps_disable() */
8896670Stariq 	NULL,			/* dtps_suspend() */
8906670Stariq 	NULL,			/* dtps_resume() */
8916670Stariq 	NULL,			/* dtps_getargdesc() */
8926670Stariq 	NULL,			/* dtps_getargval() */
8936670Stariq 	NULL,			/* dtps_usermode() */
8946670Stariq 	xdt_destroy		/* dtps_destroy() */
8956670Stariq };
8966670Stariq 
8976670Stariq static int
8986670Stariq xdt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
8996670Stariq {
9006670Stariq 	int val;
9016670Stariq 
9026670Stariq 	if (!DOMAIN_IS_INITDOMAIN(xen_info))
9036670Stariq 		return (DDI_FAILURE);
9046670Stariq 
9056670Stariq 	switch (cmd) {
9066670Stariq 	case DDI_ATTACH:
9076670Stariq 		break;
9086670Stariq 
9096670Stariq 	case DDI_RESUME:
9106670Stariq 		/*
9116670Stariq 		 * We might support proper suspend/resume in the future, so,
9126670Stariq 		 * return DDI_FAILURE for now.
9136670Stariq 		 */
9146670Stariq 		return (DDI_FAILURE);
9156670Stariq 
9166670Stariq 	default:
9176670Stariq 		return (DDI_FAILURE);
9186670Stariq 	}
9196670Stariq 
9206670Stariq 	xdt_ncpus = xen_get_nphyscpus();
9216670Stariq 	ASSERT(xdt_ncpus > 0);
9226670Stariq 
9236670Stariq 	if (ddi_create_minor_node(devi, "xdt", S_IFCHR, 0, DDI_PSEUDO, 0) ==
9246670Stariq 	    DDI_FAILURE || xdt_attach_trace_buffers() != 0 ||
9256670Stariq 	    dtrace_register("xdt", &xdt_attr, DTRACE_PRIV_KERNEL, NULL,
9266670Stariq 	    &xdt_pops, NULL, &xdt_id) != 0) {
9276670Stariq 		if (tbuf.va != NULL)
9286670Stariq 			xdt_detach_trace_buffers();
9296670Stariq 		ddi_remove_minor_node(devi, NULL);
9306670Stariq 		return (DDI_FAILURE);
9316670Stariq 	}
9326670Stariq 
9336670Stariq 	val = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
9346670Stariq 	    "xdt_poll_nsec", XDT_POLL_DEFAULT);
9356670Stariq 	xdt_poll_nsec = MAX(val, XDT_POLL_MIN);
9366670Stariq 
9376670Stariq 	xdt_cpu_schedinfo = (xdt_schedinfo_t *)kmem_alloc(xdt_ncpus *
9386670Stariq 	    sizeof (xdt_schedinfo_t), KM_SLEEP);
9396670Stariq 	xdt_init_trace_masks();
9406670Stariq 	xdt_kstat_init();
9416670Stariq 
9426670Stariq 	xdt_devi = devi;
9436670Stariq 	ddi_report_dev(devi);
9446670Stariq 	return (DDI_SUCCESS);
9456670Stariq }
9466670Stariq 
9476670Stariq static int
9486670Stariq xdt_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
9496670Stariq {
9506670Stariq 	switch (cmd) {
9516670Stariq 	case DDI_DETACH:
9526670Stariq 		break;
9536670Stariq 
9546670Stariq 	case DDI_SUSPEND:
9556670Stariq 		/*
9566670Stariq 		 * We might support proper suspend/resume in the future. So
9576670Stariq 		 * return DDI_FAILURE for now.
9586670Stariq 		 */
9596670Stariq 		return (DDI_FAILURE);
9606670Stariq 
9616670Stariq 	default:
9626670Stariq 		return (DDI_FAILURE);
9636670Stariq 	}
9646670Stariq 
9656670Stariq 	if (dtrace_unregister(xdt_id) != 0)
9666670Stariq 		return (DDI_FAILURE);
9676670Stariq 
9686670Stariq 	xdt_detach_trace_buffers();
9696670Stariq 	kmem_free(xdt_cpu_schedinfo, xdt_ncpus * sizeof (xdt_schedinfo_t));
9706670Stariq 	if (xdt_cyclic != CYCLIC_NONE)
9716670Stariq 		cyclic_remove(xdt_cyclic);
9726670Stariq 	if (xdt_kstats != NULL)
9736670Stariq 		kstat_delete(xdt_kstats);
9746670Stariq 	xdt_devi = (void *)0;
9756670Stariq 	ddi_remove_minor_node(devi, NULL);
9766670Stariq 
9776670Stariq 	return (DDI_SUCCESS);
9786670Stariq }
9796670Stariq 
9806670Stariq /*ARGSUSED*/
9816670Stariq static int
9826670Stariq xdt_info(dev_info_t *devi, ddi_info_cmd_t infocmd, void *arg, void **result)
9836670Stariq {
9846670Stariq 	int error;
9856670Stariq 
9866670Stariq 	switch (infocmd) {
9876670Stariq 	case DDI_INFO_DEVT2DEVINFO:
9886670Stariq 		*result = xdt_devi;
9896670Stariq 		error = DDI_SUCCESS;
9906670Stariq 		break;
9916670Stariq 	case DDI_INFO_DEVT2INSTANCE:
9926670Stariq 		*result = (void *)0;
9936670Stariq 		error = DDI_SUCCESS;
9946670Stariq 		break;
9956670Stariq 	default:
9966670Stariq 		error = DDI_FAILURE;
9976670Stariq 	}
9986670Stariq 	return (error);
9996670Stariq }
10006670Stariq 
10016670Stariq static struct cb_ops xdt_cb_ops = {
10026670Stariq 	nulldev,		/* open(9E) */
10036670Stariq 	nodev,			/* close(9E) */
10046670Stariq 	nodev,			/* strategy(9E) */
10056670Stariq 	nodev,			/* print(9E) */
10066670Stariq 	nodev,			/* dump(9E) */
10076670Stariq 	nodev,			/* read(9E) */
10086670Stariq 	nodev,			/* write(9E) */
10096670Stariq 	nodev,			/* ioctl(9E) */
10106670Stariq 	nodev,			/* devmap(9E) */
10116670Stariq 	nodev,			/* mmap(9E) */
10126670Stariq 	nodev,			/* segmap(9E) */
10136670Stariq 	nochpoll,		/* chpoll(9E) */
10146670Stariq 	ddi_prop_op,		/* prop_op(9E) */
10156670Stariq 	NULL,			/* streamtab(9S) */
10166670Stariq 	D_MP | D_64BIT | D_NEW	/* cb_flag */
10176670Stariq };
10186670Stariq 
10196670Stariq static struct dev_ops xdt_ops = {
10206670Stariq 	DEVO_REV,		/* devo_rev */
10216670Stariq 	0,			/* devo_refcnt */
10226670Stariq 	xdt_info,		/* getinfo(9E) */
10236670Stariq 	nulldev,		/* identify(9E) */
10246670Stariq 	nulldev,		/* probe(9E) */
10256670Stariq 	xdt_attach,		/* attach(9E) */
10266670Stariq 	xdt_detach,		/* detach(9E) */
10276670Stariq 	nulldev,		/* devo_reset */
10286670Stariq 	&xdt_cb_ops,		/* devo_cb_ops */
10296670Stariq 	NULL,			/* devo_bus_ops */
10307656SSherry.Moore@Sun.COM 	NULL,			/* power(9E) */
10317656SSherry.Moore@Sun.COM 	ddi_quiesce_not_needed,		/* devo_quiesce */
10326670Stariq };
10336670Stariq 
10346670Stariq 
10356670Stariq static struct modldrv modldrv = {
10366670Stariq 	&mod_driverops,
10376670Stariq 	"Hypervisor event tracing",
10386670Stariq 	&xdt_ops
10396670Stariq };
10406670Stariq 
10416670Stariq static struct modlinkage modlinkage = {
10426670Stariq 	MODREV_1,
10436670Stariq 	&modldrv,
10446670Stariq 	NULL
10456670Stariq };
10466670Stariq 
10476670Stariq int
10486670Stariq _init(void)
10496670Stariq {
10506670Stariq 	return (mod_install(&modlinkage));
10516670Stariq }
10526670Stariq 
10536670Stariq int
10546670Stariq _fini(void)
10556670Stariq {
10566670Stariq 	return (mod_remove(&modlinkage));
10576670Stariq }
10586670Stariq 
10596670Stariq int
10606670Stariq _info(struct modinfo *modinfop)
10616670Stariq {
10626670Stariq 	return (mod_info(&modlinkage, modinfop));
10636670Stariq }
1064