xref: /onnv-gate/usr/src/uts/common/os/dtrace_subr.c (revision 1880:1982c224f2bd)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*1880Sahl  * Common Development and Distribution License (the "License").
6*1880Sahl  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
21*1880Sahl 
220Sstevel@tonic-gate /*
23*1880Sahl  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate #include <sys/dtrace.h>
300Sstevel@tonic-gate #include <sys/cmn_err.h>
310Sstevel@tonic-gate #include <sys/tnf.h>
320Sstevel@tonic-gate #include <sys/atomic.h>
330Sstevel@tonic-gate #include <sys/prsystm.h>
340Sstevel@tonic-gate #include <sys/modctl.h>
350Sstevel@tonic-gate #include <sys/aio_impl.h>
360Sstevel@tonic-gate 
370Sstevel@tonic-gate #ifdef __sparc
380Sstevel@tonic-gate #include <sys/privregs.h>
390Sstevel@tonic-gate #endif
400Sstevel@tonic-gate 
410Sstevel@tonic-gate void (*dtrace_cpu_init)(processorid_t);
420Sstevel@tonic-gate void (*dtrace_modload)(struct modctl *);
430Sstevel@tonic-gate void (*dtrace_modunload)(struct modctl *);
440Sstevel@tonic-gate void (*dtrace_helpers_cleanup)(void);
450Sstevel@tonic-gate void (*dtrace_helpers_fork)(proc_t *, proc_t *);
460Sstevel@tonic-gate void (*dtrace_cpustart_init)(void);
470Sstevel@tonic-gate void (*dtrace_cpustart_fini)(void);
480Sstevel@tonic-gate 
490Sstevel@tonic-gate void (*dtrace_kreloc_init)(void);
500Sstevel@tonic-gate void (*dtrace_kreloc_fini)(void);
510Sstevel@tonic-gate 
520Sstevel@tonic-gate void (*dtrace_debugger_init)(void);
530Sstevel@tonic-gate void (*dtrace_debugger_fini)(void);
540Sstevel@tonic-gate 
550Sstevel@tonic-gate dtrace_vtime_state_t dtrace_vtime_active = 0;
560Sstevel@tonic-gate dtrace_cacheid_t dtrace_predcache_id = DTRACE_CACHEIDNONE + 1;
570Sstevel@tonic-gate 
580Sstevel@tonic-gate typedef struct dtrace_hrestime {
590Sstevel@tonic-gate 	lock_t		dthr_lock;		/* lock for this element */
600Sstevel@tonic-gate 	timestruc_t	dthr_hrestime;		/* hrestime value */
610Sstevel@tonic-gate 	int64_t		dthr_adj;		/* hrestime_adj value */
620Sstevel@tonic-gate 	hrtime_t	dthr_hrtime;		/* hrtime value */
630Sstevel@tonic-gate } dtrace_hrestime_t;
640Sstevel@tonic-gate 
650Sstevel@tonic-gate static dtrace_hrestime_t dtrace_hrestime[2];
660Sstevel@tonic-gate 
670Sstevel@tonic-gate /*
680Sstevel@tonic-gate  * Making available adjustable high-resolution time in DTrace is regrettably
690Sstevel@tonic-gate  * more complicated than one might think it should be.  The problem is that
700Sstevel@tonic-gate  * the variables related to adjusted high-resolution time (hrestime,
710Sstevel@tonic-gate  * hrestime_adj and friends) are adjusted under hres_lock -- and this lock may
720Sstevel@tonic-gate  * be held when we enter probe context.  One might think that we could address
730Sstevel@tonic-gate  * this by having a single snapshot copy that is stored under a different lock
740Sstevel@tonic-gate  * from hres_tick(), using the snapshot iff hres_lock is locked in probe
750Sstevel@tonic-gate  * context.  Unfortunately, this too won't work:  because hres_lock is grabbed
760Sstevel@tonic-gate  * in more than just hres_tick() context, we could enter probe context
770Sstevel@tonic-gate  * concurrently on two different CPUs with both locks (hres_lock and the
780Sstevel@tonic-gate  * snapshot lock) held.  As this implies, the fundamental problem is that we
790Sstevel@tonic-gate  * need to have access to a snapshot of these variables that we _know_ will
800Sstevel@tonic-gate  * not be locked in probe context.  To effect this, we have two snapshots
810Sstevel@tonic-gate  * protected by two different locks, and we mandate that these snapshots are
820Sstevel@tonic-gate  * recorded in succession by a single thread calling dtrace_hres_tick().  (We
830Sstevel@tonic-gate  * assure this by calling it out of the same CY_HIGH_LEVEL cyclic that calls
840Sstevel@tonic-gate  * hres_tick().)  A single thread can't be in two places at once:  one of the
850Sstevel@tonic-gate  * snapshot locks is guaranteed to be unheld at all times.  The
860Sstevel@tonic-gate  * dtrace_gethrestime() algorithm is thus to check first one snapshot and then
870Sstevel@tonic-gate  * the other to find the unlocked snapshot.
880Sstevel@tonic-gate  */
890Sstevel@tonic-gate void
900Sstevel@tonic-gate dtrace_hres_tick(void)
910Sstevel@tonic-gate {
920Sstevel@tonic-gate 	int i;
930Sstevel@tonic-gate 	ushort_t spl;
940Sstevel@tonic-gate 
950Sstevel@tonic-gate 	for (i = 0; i < 2; i++) {
960Sstevel@tonic-gate 		dtrace_hrestime_t tmp;
970Sstevel@tonic-gate 
980Sstevel@tonic-gate 		spl = hr_clock_lock();
990Sstevel@tonic-gate 		tmp.dthr_hrestime = hrestime;
1000Sstevel@tonic-gate 		tmp.dthr_adj = hrestime_adj;
1010Sstevel@tonic-gate 		tmp.dthr_hrtime = dtrace_gethrtime();
1020Sstevel@tonic-gate 		hr_clock_unlock(spl);
1030Sstevel@tonic-gate 
1040Sstevel@tonic-gate 		lock_set(&dtrace_hrestime[i].dthr_lock);
1050Sstevel@tonic-gate 		dtrace_hrestime[i].dthr_hrestime = tmp.dthr_hrestime;
1060Sstevel@tonic-gate 		dtrace_hrestime[i].dthr_adj = tmp.dthr_adj;
1070Sstevel@tonic-gate 		dtrace_hrestime[i].dthr_hrtime = tmp.dthr_hrtime;
1080Sstevel@tonic-gate 		dtrace_membar_producer();
1090Sstevel@tonic-gate 
1100Sstevel@tonic-gate 		/*
1110Sstevel@tonic-gate 		 * To allow for lock-free examination of this lock, we use
1120Sstevel@tonic-gate 		 * the same trick that is used hres_lock; for more details,
1130Sstevel@tonic-gate 		 * see the description of this technique in sun4u/sys/clock.h.
1140Sstevel@tonic-gate 		 */
1150Sstevel@tonic-gate 		dtrace_hrestime[i].dthr_lock++;
1160Sstevel@tonic-gate 	}
1170Sstevel@tonic-gate }
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate hrtime_t
1200Sstevel@tonic-gate dtrace_gethrestime(void)
1210Sstevel@tonic-gate {
1220Sstevel@tonic-gate 	dtrace_hrestime_t snap;
1230Sstevel@tonic-gate 	hrtime_t now;
1240Sstevel@tonic-gate 	int i = 0, adj, nslt;
1250Sstevel@tonic-gate 
1260Sstevel@tonic-gate 	for (;;) {
1270Sstevel@tonic-gate 		snap.dthr_lock = dtrace_hrestime[i].dthr_lock;
1280Sstevel@tonic-gate 		dtrace_membar_consumer();
1290Sstevel@tonic-gate 		snap.dthr_hrestime = dtrace_hrestime[i].dthr_hrestime;
1300Sstevel@tonic-gate 		snap.dthr_hrtime = dtrace_hrestime[i].dthr_hrtime;
1310Sstevel@tonic-gate 		snap.dthr_adj = dtrace_hrestime[i].dthr_adj;
1320Sstevel@tonic-gate 		dtrace_membar_consumer();
1330Sstevel@tonic-gate 
1340Sstevel@tonic-gate 		if ((snap.dthr_lock & ~1) == dtrace_hrestime[i].dthr_lock)
1350Sstevel@tonic-gate 			break;
1360Sstevel@tonic-gate 
1370Sstevel@tonic-gate 		/*
1380Sstevel@tonic-gate 		 * If we're here, the lock was either locked, or it
1390Sstevel@tonic-gate 		 * transitioned while we were taking the snapshot.  Either
1400Sstevel@tonic-gate 		 * way, we're going to try the other dtrace_hrestime element;
1410Sstevel@tonic-gate 		 * we know that it isn't possible for both to be locked
1420Sstevel@tonic-gate 		 * simultaneously, so we will ultimately get a good snapshot.
1430Sstevel@tonic-gate 		 */
1440Sstevel@tonic-gate 		i ^= 1;
1450Sstevel@tonic-gate 	}
1460Sstevel@tonic-gate 
1470Sstevel@tonic-gate 	/*
1480Sstevel@tonic-gate 	 * We have a good snapshot.  Now perform any necessary adjustments.
1490Sstevel@tonic-gate 	 */
1500Sstevel@tonic-gate 	nslt = dtrace_gethrtime() - snap.dthr_hrtime;
1510Sstevel@tonic-gate 	ASSERT(nslt >= 0);
1520Sstevel@tonic-gate 
1530Sstevel@tonic-gate 	now = ((hrtime_t)snap.dthr_hrestime.tv_sec * (hrtime_t)NANOSEC) +
1540Sstevel@tonic-gate 	    snap.dthr_hrestime.tv_nsec;
1550Sstevel@tonic-gate 
1560Sstevel@tonic-gate 	if (snap.dthr_adj != 0) {
1570Sstevel@tonic-gate 		if (snap.dthr_adj > 0) {
1580Sstevel@tonic-gate 			adj = (nslt >> adj_shift);
1590Sstevel@tonic-gate 			if (adj > snap.dthr_adj)
1600Sstevel@tonic-gate 				adj = (int)snap.dthr_adj;
1610Sstevel@tonic-gate 		} else {
1620Sstevel@tonic-gate 			adj = -(nslt >> adj_shift);
1630Sstevel@tonic-gate 			if (adj < snap.dthr_adj)
1640Sstevel@tonic-gate 				adj = (int)snap.dthr_adj;
1650Sstevel@tonic-gate 		}
1660Sstevel@tonic-gate 		now += adj;
1670Sstevel@tonic-gate 	}
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate 	return (now);
1700Sstevel@tonic-gate }
1710Sstevel@tonic-gate 
1720Sstevel@tonic-gate void
1730Sstevel@tonic-gate dtrace_vtime_enable(void)
1740Sstevel@tonic-gate {
1750Sstevel@tonic-gate 	dtrace_vtime_state_t state, nstate;
1760Sstevel@tonic-gate 
1770Sstevel@tonic-gate 	do {
1780Sstevel@tonic-gate 		state = dtrace_vtime_active;
1790Sstevel@tonic-gate 
1800Sstevel@tonic-gate 		switch (state) {
1810Sstevel@tonic-gate 		case DTRACE_VTIME_INACTIVE:
1820Sstevel@tonic-gate 			nstate = DTRACE_VTIME_ACTIVE;
1830Sstevel@tonic-gate 			break;
1840Sstevel@tonic-gate 
1850Sstevel@tonic-gate 		case DTRACE_VTIME_INACTIVE_TNF:
1860Sstevel@tonic-gate 			nstate = DTRACE_VTIME_ACTIVE_TNF;
1870Sstevel@tonic-gate 			break;
1880Sstevel@tonic-gate 
1890Sstevel@tonic-gate 		case DTRACE_VTIME_ACTIVE:
1900Sstevel@tonic-gate 		case DTRACE_VTIME_ACTIVE_TNF:
1910Sstevel@tonic-gate 			panic("DTrace virtual time already enabled");
1920Sstevel@tonic-gate 			/*NOTREACHED*/
1930Sstevel@tonic-gate 		}
1940Sstevel@tonic-gate 
1950Sstevel@tonic-gate 	} while	(cas32((uint32_t *)&dtrace_vtime_active,
1960Sstevel@tonic-gate 	    state, nstate) != state);
1970Sstevel@tonic-gate }
1980Sstevel@tonic-gate 
1990Sstevel@tonic-gate void
2000Sstevel@tonic-gate dtrace_vtime_disable(void)
2010Sstevel@tonic-gate {
2020Sstevel@tonic-gate 	dtrace_vtime_state_t state, nstate;
2030Sstevel@tonic-gate 
2040Sstevel@tonic-gate 	do {
2050Sstevel@tonic-gate 		state = dtrace_vtime_active;
2060Sstevel@tonic-gate 
2070Sstevel@tonic-gate 		switch (state) {
2080Sstevel@tonic-gate 		case DTRACE_VTIME_ACTIVE:
2090Sstevel@tonic-gate 			nstate = DTRACE_VTIME_INACTIVE;
2100Sstevel@tonic-gate 			break;
2110Sstevel@tonic-gate 
2120Sstevel@tonic-gate 		case DTRACE_VTIME_ACTIVE_TNF:
2130Sstevel@tonic-gate 			nstate = DTRACE_VTIME_INACTIVE_TNF;
2140Sstevel@tonic-gate 			break;
2150Sstevel@tonic-gate 
2160Sstevel@tonic-gate 		case DTRACE_VTIME_INACTIVE:
2170Sstevel@tonic-gate 		case DTRACE_VTIME_INACTIVE_TNF:
2180Sstevel@tonic-gate 			panic("DTrace virtual time already disabled");
2190Sstevel@tonic-gate 			/*NOTREACHED*/
2200Sstevel@tonic-gate 		}
2210Sstevel@tonic-gate 
2220Sstevel@tonic-gate 	} while	(cas32((uint32_t *)&dtrace_vtime_active,
2230Sstevel@tonic-gate 	    state, nstate) != state);
2240Sstevel@tonic-gate }
2250Sstevel@tonic-gate 
2260Sstevel@tonic-gate void
2270Sstevel@tonic-gate dtrace_vtime_enable_tnf(void)
2280Sstevel@tonic-gate {
2290Sstevel@tonic-gate 	dtrace_vtime_state_t state, nstate;
2300Sstevel@tonic-gate 
2310Sstevel@tonic-gate 	do {
2320Sstevel@tonic-gate 		state = dtrace_vtime_active;
2330Sstevel@tonic-gate 
2340Sstevel@tonic-gate 		switch (state) {
2350Sstevel@tonic-gate 		case DTRACE_VTIME_ACTIVE:
2360Sstevel@tonic-gate 			nstate = DTRACE_VTIME_ACTIVE_TNF;
2370Sstevel@tonic-gate 			break;
2380Sstevel@tonic-gate 
2390Sstevel@tonic-gate 		case DTRACE_VTIME_INACTIVE:
2400Sstevel@tonic-gate 			nstate = DTRACE_VTIME_INACTIVE_TNF;
2410Sstevel@tonic-gate 			break;
2420Sstevel@tonic-gate 
2430Sstevel@tonic-gate 		case DTRACE_VTIME_ACTIVE_TNF:
2440Sstevel@tonic-gate 		case DTRACE_VTIME_INACTIVE_TNF:
2450Sstevel@tonic-gate 			panic("TNF already active");
2460Sstevel@tonic-gate 			/*NOTREACHED*/
2470Sstevel@tonic-gate 		}
2480Sstevel@tonic-gate 
2490Sstevel@tonic-gate 	} while	(cas32((uint32_t *)&dtrace_vtime_active,
2500Sstevel@tonic-gate 	    state, nstate) != state);
2510Sstevel@tonic-gate }
2520Sstevel@tonic-gate 
2530Sstevel@tonic-gate void
2540Sstevel@tonic-gate dtrace_vtime_disable_tnf(void)
2550Sstevel@tonic-gate {
2560Sstevel@tonic-gate 	dtrace_vtime_state_t state, nstate;
2570Sstevel@tonic-gate 
2580Sstevel@tonic-gate 	do {
2590Sstevel@tonic-gate 		state = dtrace_vtime_active;
2600Sstevel@tonic-gate 
2610Sstevel@tonic-gate 		switch (state) {
2620Sstevel@tonic-gate 		case DTRACE_VTIME_ACTIVE_TNF:
2630Sstevel@tonic-gate 			nstate = DTRACE_VTIME_ACTIVE;
2640Sstevel@tonic-gate 			break;
2650Sstevel@tonic-gate 
2660Sstevel@tonic-gate 		case DTRACE_VTIME_INACTIVE_TNF:
2670Sstevel@tonic-gate 			nstate = DTRACE_VTIME_INACTIVE;
2680Sstevel@tonic-gate 			break;
2690Sstevel@tonic-gate 
2700Sstevel@tonic-gate 		case DTRACE_VTIME_ACTIVE:
2710Sstevel@tonic-gate 		case DTRACE_VTIME_INACTIVE:
2720Sstevel@tonic-gate 			panic("TNF already inactive");
2730Sstevel@tonic-gate 			/*NOTREACHED*/
2740Sstevel@tonic-gate 		}
2750Sstevel@tonic-gate 
2760Sstevel@tonic-gate 	} while	(cas32((uint32_t *)&dtrace_vtime_active,
2770Sstevel@tonic-gate 	    state, nstate) != state);
2780Sstevel@tonic-gate }
2790Sstevel@tonic-gate 
2800Sstevel@tonic-gate void
2810Sstevel@tonic-gate dtrace_vtime_switch(kthread_t *next)
2820Sstevel@tonic-gate {
2830Sstevel@tonic-gate 	dtrace_icookie_t cookie;
2840Sstevel@tonic-gate 	hrtime_t ts;
2850Sstevel@tonic-gate 
2860Sstevel@tonic-gate 	if (tnf_tracing_active) {
2870Sstevel@tonic-gate 		tnf_thread_switch(next);
2880Sstevel@tonic-gate 
2890Sstevel@tonic-gate 		if (dtrace_vtime_active == DTRACE_VTIME_INACTIVE_TNF)
2900Sstevel@tonic-gate 			return;
2910Sstevel@tonic-gate 	}
2920Sstevel@tonic-gate 
2930Sstevel@tonic-gate 	cookie = dtrace_interrupt_disable();
2940Sstevel@tonic-gate 	ts = dtrace_gethrtime();
2950Sstevel@tonic-gate 
2960Sstevel@tonic-gate 	if (curthread->t_dtrace_start != 0) {
2970Sstevel@tonic-gate 		curthread->t_dtrace_vtime += ts - curthread->t_dtrace_start;
2980Sstevel@tonic-gate 		curthread->t_dtrace_start = 0;
2990Sstevel@tonic-gate 	}
3000Sstevel@tonic-gate 
3010Sstevel@tonic-gate 	next->t_dtrace_start = ts;
3020Sstevel@tonic-gate 
3030Sstevel@tonic-gate 	dtrace_interrupt_enable(cookie);
3040Sstevel@tonic-gate }
3050Sstevel@tonic-gate 
3060Sstevel@tonic-gate void (*dtrace_fasttrap_fork_ptr)(proc_t *, proc_t *);
3070Sstevel@tonic-gate void (*dtrace_fasttrap_exec_ptr)(proc_t *);
3080Sstevel@tonic-gate void (*dtrace_fasttrap_exit_ptr)(proc_t *);
3090Sstevel@tonic-gate 
3100Sstevel@tonic-gate /*
3110Sstevel@tonic-gate  * This function is called by cfork() in the event that it appears that
3120Sstevel@tonic-gate  * there may be dtrace tracepoints active in the parent process's address
3130Sstevel@tonic-gate  * space. This first confirms the existence of dtrace tracepoints in the
3140Sstevel@tonic-gate  * parent process and calls into the fasttrap module to remove the
3150Sstevel@tonic-gate  * corresponding tracepoints from the child. By knowing that there are
3160Sstevel@tonic-gate  * existing tracepoints, and ensuring they can't be removed, we can rely
3170Sstevel@tonic-gate  * on the fasttrap module remaining loaded.
3180Sstevel@tonic-gate  */
3190Sstevel@tonic-gate void
3200Sstevel@tonic-gate dtrace_fasttrap_fork(proc_t *p, proc_t *cp)
3210Sstevel@tonic-gate {
3220Sstevel@tonic-gate 	ASSERT(p->p_proc_flag & P_PR_LOCK);
323*1880Sahl 	ASSERT(p->p_dtrace_count > 0);
324*1880Sahl 	ASSERT(dtrace_fasttrap_fork_ptr != NULL);
3250Sstevel@tonic-gate 
326*1880Sahl 	dtrace_fasttrap_fork_ptr(p, cp);
3270Sstevel@tonic-gate }
328