xref: /onnv-gate/usr/src/uts/common/os/kcpc.c (revision 8803:8c01b39012c9)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
53156Sgirish  * Common Development and Distribution License (the "License").
63156Sgirish  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
211414Scindi 
220Sstevel@tonic-gate /*
23*8803SJonathan.Haslam@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #include <sys/param.h>
280Sstevel@tonic-gate #include <sys/thread.h>
290Sstevel@tonic-gate #include <sys/cpuvar.h>
300Sstevel@tonic-gate #include <sys/inttypes.h>
310Sstevel@tonic-gate #include <sys/cmn_err.h>
320Sstevel@tonic-gate #include <sys/time.h>
336275Strevtom #include <sys/ksynch.h>
340Sstevel@tonic-gate #include <sys/systm.h>
350Sstevel@tonic-gate #include <sys/kcpc.h>
360Sstevel@tonic-gate #include <sys/cpc_impl.h>
370Sstevel@tonic-gate #include <sys/cpc_pcbe.h>
380Sstevel@tonic-gate #include <sys/atomic.h>
390Sstevel@tonic-gate #include <sys/sunddi.h>
400Sstevel@tonic-gate #include <sys/modctl.h>
410Sstevel@tonic-gate #include <sys/sdt.h>
420Sstevel@tonic-gate #if defined(__x86)
430Sstevel@tonic-gate #include <asm/clock.h>
440Sstevel@tonic-gate #endif
450Sstevel@tonic-gate 
460Sstevel@tonic-gate kmutex_t	kcpc_ctx_llock[CPC_HASH_BUCKETS];	/* protects ctx_list */
470Sstevel@tonic-gate kcpc_ctx_t	*kcpc_ctx_list[CPC_HASH_BUCKETS];	/* head of list */
480Sstevel@tonic-gate 
490Sstevel@tonic-gate 
500Sstevel@tonic-gate krwlock_t	kcpc_cpuctx_lock;	/* lock for 'kcpc_cpuctx' below */
510Sstevel@tonic-gate int		kcpc_cpuctx;		/* number of cpu-specific contexts */
520Sstevel@tonic-gate 
530Sstevel@tonic-gate int kcpc_counts_include_idle = 1; /* Project Private /etc/system variable */
540Sstevel@tonic-gate 
550Sstevel@tonic-gate /*
560Sstevel@tonic-gate  * These are set when a PCBE module is loaded.
570Sstevel@tonic-gate  */
580Sstevel@tonic-gate uint_t		cpc_ncounters = 0;
590Sstevel@tonic-gate pcbe_ops_t	*pcbe_ops = NULL;
600Sstevel@tonic-gate 
610Sstevel@tonic-gate /*
620Sstevel@tonic-gate  * Statistics on (mis)behavior
630Sstevel@tonic-gate  */
640Sstevel@tonic-gate static uint32_t kcpc_intrctx_count;    /* # overflows in an interrupt handler */
650Sstevel@tonic-gate static uint32_t kcpc_nullctx_count;    /* # overflows in a thread with no ctx */
660Sstevel@tonic-gate 
670Sstevel@tonic-gate /*
68*8803SJonathan.Haslam@Sun.COM  * By setting 'kcpc_nullctx_panic' to 1, any overflow interrupts in a thread
69*8803SJonathan.Haslam@Sun.COM  * with no valid context will result in a panic.
700Sstevel@tonic-gate  */
710Sstevel@tonic-gate static int kcpc_nullctx_panic = 0;
720Sstevel@tonic-gate 
730Sstevel@tonic-gate static void kcpc_lwp_create(kthread_t *t, kthread_t *ct);
740Sstevel@tonic-gate static void kcpc_restore(kcpc_ctx_t *ctx);
750Sstevel@tonic-gate static void kcpc_save(kcpc_ctx_t *ctx);
760Sstevel@tonic-gate static void kcpc_free(kcpc_ctx_t *ctx, int isexec);
770Sstevel@tonic-gate static void kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx);
780Sstevel@tonic-gate static int kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch);
790Sstevel@tonic-gate static kcpc_set_t *kcpc_dup_set(kcpc_set_t *set);
800Sstevel@tonic-gate 
810Sstevel@tonic-gate void
820Sstevel@tonic-gate kcpc_register_pcbe(pcbe_ops_t *ops)
830Sstevel@tonic-gate {
840Sstevel@tonic-gate 	pcbe_ops = ops;
850Sstevel@tonic-gate 	cpc_ncounters = pcbe_ops->pcbe_ncounters();
860Sstevel@tonic-gate }
870Sstevel@tonic-gate 
88*8803SJonathan.Haslam@Sun.COM void
89*8803SJonathan.Haslam@Sun.COM kcpc_register_dcpc(void (*func)(uint64_t))
90*8803SJonathan.Haslam@Sun.COM {
91*8803SJonathan.Haslam@Sun.COM 	dtrace_cpc_fire = func;
92*8803SJonathan.Haslam@Sun.COM }
93*8803SJonathan.Haslam@Sun.COM 
94*8803SJonathan.Haslam@Sun.COM void
95*8803SJonathan.Haslam@Sun.COM kcpc_unregister_dcpc(void)
96*8803SJonathan.Haslam@Sun.COM {
97*8803SJonathan.Haslam@Sun.COM 	dtrace_cpc_fire = NULL;
98*8803SJonathan.Haslam@Sun.COM }
99*8803SJonathan.Haslam@Sun.COM 
1000Sstevel@tonic-gate int
1010Sstevel@tonic-gate kcpc_bind_cpu(kcpc_set_t *set, processorid_t cpuid, int *subcode)
1020Sstevel@tonic-gate {
1030Sstevel@tonic-gate 	cpu_t		*cp;
1040Sstevel@tonic-gate 	kcpc_ctx_t	*ctx;
1050Sstevel@tonic-gate 	int		error;
1060Sstevel@tonic-gate 
1070Sstevel@tonic-gate 	ctx = kcpc_ctx_alloc();
1080Sstevel@tonic-gate 
1090Sstevel@tonic-gate 	if (kcpc_assign_reqs(set, ctx) != 0) {
1100Sstevel@tonic-gate 		kcpc_ctx_free(ctx);
1110Sstevel@tonic-gate 		*subcode = CPC_RESOURCE_UNAVAIL;
1120Sstevel@tonic-gate 		return (EINVAL);
1130Sstevel@tonic-gate 	}
1140Sstevel@tonic-gate 
1150Sstevel@tonic-gate 	ctx->kc_cpuid = cpuid;
1160Sstevel@tonic-gate 	ctx->kc_thread = curthread;
1170Sstevel@tonic-gate 
1180Sstevel@tonic-gate 	set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP);
1190Sstevel@tonic-gate 
1200Sstevel@tonic-gate 	if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) {
1210Sstevel@tonic-gate 		kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
1220Sstevel@tonic-gate 		kcpc_ctx_free(ctx);
1230Sstevel@tonic-gate 		return (error);
1240Sstevel@tonic-gate 	}
1250Sstevel@tonic-gate 
1260Sstevel@tonic-gate 	set->ks_ctx = ctx;
1270Sstevel@tonic-gate 	ctx->kc_set = set;
1280Sstevel@tonic-gate 
1290Sstevel@tonic-gate 	/*
1300Sstevel@tonic-gate 	 * We must hold cpu_lock to prevent DR, offlining, or unbinding while
1310Sstevel@tonic-gate 	 * we are manipulating the cpu_t and programming the hardware, else the
1320Sstevel@tonic-gate 	 * the cpu_t could go away while we're looking at it.
1330Sstevel@tonic-gate 	 */
1340Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
1350Sstevel@tonic-gate 	cp = cpu_get(cpuid);
1360Sstevel@tonic-gate 
1370Sstevel@tonic-gate 	if (cp == NULL)
1380Sstevel@tonic-gate 		/*
1390Sstevel@tonic-gate 		 * The CPU could have been DRd out while we were getting set up.
1400Sstevel@tonic-gate 		 */
1410Sstevel@tonic-gate 		goto unbound;
1420Sstevel@tonic-gate 
1430Sstevel@tonic-gate 	mutex_enter(&cp->cpu_cpc_ctxlock);
1440Sstevel@tonic-gate 
1450Sstevel@tonic-gate 	if (cp->cpu_cpc_ctx != NULL) {
1460Sstevel@tonic-gate 		/*
1470Sstevel@tonic-gate 		 * If this CPU already has a bound set, return an error.
1480Sstevel@tonic-gate 		 */
1490Sstevel@tonic-gate 		mutex_exit(&cp->cpu_cpc_ctxlock);
1500Sstevel@tonic-gate 		goto unbound;
1510Sstevel@tonic-gate 	}
1520Sstevel@tonic-gate 
1530Sstevel@tonic-gate 	if (curthread->t_bind_cpu != cpuid) {
1540Sstevel@tonic-gate 		mutex_exit(&cp->cpu_cpc_ctxlock);
1550Sstevel@tonic-gate 		goto unbound;
1560Sstevel@tonic-gate 	}
1570Sstevel@tonic-gate 	cp->cpu_cpc_ctx = ctx;
1580Sstevel@tonic-gate 
1590Sstevel@tonic-gate 	/*
1600Sstevel@tonic-gate 	 * Kernel preemption must be disabled while fiddling with the hardware
1610Sstevel@tonic-gate 	 * registers to prevent partial updates.
1620Sstevel@tonic-gate 	 */
1630Sstevel@tonic-gate 	kpreempt_disable();
1640Sstevel@tonic-gate 	ctx->kc_rawtick = KCPC_GET_TICK();
1650Sstevel@tonic-gate 	pcbe_ops->pcbe_program(ctx);
1660Sstevel@tonic-gate 	kpreempt_enable();
1670Sstevel@tonic-gate 
1680Sstevel@tonic-gate 	mutex_exit(&cp->cpu_cpc_ctxlock);
1690Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
1700Sstevel@tonic-gate 
1716275Strevtom 	mutex_enter(&set->ks_lock);
1726275Strevtom 	set->ks_state |= KCPC_SET_BOUND;
1736275Strevtom 	cv_signal(&set->ks_condv);
1746275Strevtom 	mutex_exit(&set->ks_lock);
1756275Strevtom 
1760Sstevel@tonic-gate 	return (0);
1770Sstevel@tonic-gate 
1780Sstevel@tonic-gate unbound:
1790Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
1800Sstevel@tonic-gate 	set->ks_ctx = NULL;
1810Sstevel@tonic-gate 	kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
1820Sstevel@tonic-gate 	kcpc_ctx_free(ctx);
1830Sstevel@tonic-gate 	return (EAGAIN);
1840Sstevel@tonic-gate }
1850Sstevel@tonic-gate 
1860Sstevel@tonic-gate int
1870Sstevel@tonic-gate kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode)
1880Sstevel@tonic-gate {
1890Sstevel@tonic-gate 	kcpc_ctx_t	*ctx;
1900Sstevel@tonic-gate 	int		error;
1910Sstevel@tonic-gate 
1920Sstevel@tonic-gate 	/*
1930Sstevel@tonic-gate 	 * Only one set is allowed per context, so ensure there is no
1940Sstevel@tonic-gate 	 * existing context.
1950Sstevel@tonic-gate 	 */
1960Sstevel@tonic-gate 
1970Sstevel@tonic-gate 	if (t->t_cpc_ctx != NULL)
1980Sstevel@tonic-gate 		return (EEXIST);
1990Sstevel@tonic-gate 
2000Sstevel@tonic-gate 	ctx = kcpc_ctx_alloc();
2010Sstevel@tonic-gate 
2020Sstevel@tonic-gate 	/*
2030Sstevel@tonic-gate 	 * The context must begin life frozen until it has been properly
2040Sstevel@tonic-gate 	 * programmed onto the hardware. This prevents the context ops from
2050Sstevel@tonic-gate 	 * worrying about it until we're ready.
2060Sstevel@tonic-gate 	 */
2070Sstevel@tonic-gate 	ctx->kc_flags |= KCPC_CTX_FREEZE;
2080Sstevel@tonic-gate 	ctx->kc_hrtime = gethrtime();
2090Sstevel@tonic-gate 
2100Sstevel@tonic-gate 	if (kcpc_assign_reqs(set, ctx) != 0) {
2110Sstevel@tonic-gate 		kcpc_ctx_free(ctx);
2120Sstevel@tonic-gate 		*subcode = CPC_RESOURCE_UNAVAIL;
2130Sstevel@tonic-gate 		return (EINVAL);
2140Sstevel@tonic-gate 	}
2150Sstevel@tonic-gate 
2160Sstevel@tonic-gate 	ctx->kc_cpuid = -1;
2170Sstevel@tonic-gate 	if (set->ks_flags & CPC_BIND_LWP_INHERIT)
2180Sstevel@tonic-gate 		ctx->kc_flags |= KCPC_CTX_LWPINHERIT;
2190Sstevel@tonic-gate 	ctx->kc_thread = t;
2200Sstevel@tonic-gate 	t->t_cpc_ctx = ctx;
2210Sstevel@tonic-gate 	/*
2220Sstevel@tonic-gate 	 * Permit threads to look at their own hardware counters from userland.
2230Sstevel@tonic-gate 	 */
2240Sstevel@tonic-gate 	ctx->kc_flags |= KCPC_CTX_NONPRIV;
2250Sstevel@tonic-gate 
2260Sstevel@tonic-gate 	/*
2270Sstevel@tonic-gate 	 * Create the data store for this set.
2280Sstevel@tonic-gate 	 */
2290Sstevel@tonic-gate 	set->ks_data = kmem_alloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP);
2300Sstevel@tonic-gate 
2310Sstevel@tonic-gate 	if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) {
2320Sstevel@tonic-gate 		kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
2330Sstevel@tonic-gate 		kcpc_ctx_free(ctx);
2340Sstevel@tonic-gate 		t->t_cpc_ctx = NULL;
2350Sstevel@tonic-gate 		return (error);
2360Sstevel@tonic-gate 	}
2370Sstevel@tonic-gate 
2380Sstevel@tonic-gate 	set->ks_ctx = ctx;
2390Sstevel@tonic-gate 	ctx->kc_set = set;
2400Sstevel@tonic-gate 
2410Sstevel@tonic-gate 	/*
2420Sstevel@tonic-gate 	 * Add a device context to the subject thread.
2430Sstevel@tonic-gate 	 */
2440Sstevel@tonic-gate 	installctx(t, ctx, kcpc_save, kcpc_restore, NULL,
2450Sstevel@tonic-gate 	    kcpc_lwp_create, NULL, kcpc_free);
2460Sstevel@tonic-gate 
2470Sstevel@tonic-gate 	/*
2480Sstevel@tonic-gate 	 * Ask the backend to program the hardware.
2490Sstevel@tonic-gate 	 */
2500Sstevel@tonic-gate 	if (t == curthread) {
2510Sstevel@tonic-gate 		kpreempt_disable();
2520Sstevel@tonic-gate 		ctx->kc_rawtick = KCPC_GET_TICK();
2530Sstevel@tonic-gate 		atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE);
2540Sstevel@tonic-gate 		pcbe_ops->pcbe_program(ctx);
2550Sstevel@tonic-gate 		kpreempt_enable();
2560Sstevel@tonic-gate 	} else
2570Sstevel@tonic-gate 		/*
2580Sstevel@tonic-gate 		 * Since we are the agent LWP, we know the victim LWP is stopped
2590Sstevel@tonic-gate 		 * until we're done here; no need to worry about preemption or
2600Sstevel@tonic-gate 		 * migration here. We still use an atomic op to clear the flag
2610Sstevel@tonic-gate 		 * to ensure the flags are always self-consistent; they can
2620Sstevel@tonic-gate 		 * still be accessed from, for instance, another CPU doing a
2630Sstevel@tonic-gate 		 * kcpc_invalidate_all().
2640Sstevel@tonic-gate 		 */
2650Sstevel@tonic-gate 		atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE);
2660Sstevel@tonic-gate 
2676275Strevtom 	mutex_enter(&set->ks_lock);
2686275Strevtom 	set->ks_state |= KCPC_SET_BOUND;
2696275Strevtom 	cv_signal(&set->ks_condv);
2706275Strevtom 	mutex_exit(&set->ks_lock);
2710Sstevel@tonic-gate 
2720Sstevel@tonic-gate 	return (0);
2730Sstevel@tonic-gate }
2740Sstevel@tonic-gate 
2750Sstevel@tonic-gate /*
2760Sstevel@tonic-gate  * Walk through each request in the set and ask the PCBE to configure a
2770Sstevel@tonic-gate  * corresponding counter.
2780Sstevel@tonic-gate  */
279*8803SJonathan.Haslam@Sun.COM int
2800Sstevel@tonic-gate kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode)
2810Sstevel@tonic-gate {
2820Sstevel@tonic-gate 	int		i;
2830Sstevel@tonic-gate 	int		ret;
2840Sstevel@tonic-gate 	kcpc_request_t	*rp;
2850Sstevel@tonic-gate 
2860Sstevel@tonic-gate 	for (i = 0; i < set->ks_nreqs; i++) {
2870Sstevel@tonic-gate 		int n;
2880Sstevel@tonic-gate 		rp = &set->ks_req[i];
2890Sstevel@tonic-gate 
2900Sstevel@tonic-gate 		n = rp->kr_picnum;
2910Sstevel@tonic-gate 
2920Sstevel@tonic-gate 		ASSERT(n >= 0 && n < cpc_ncounters);
2930Sstevel@tonic-gate 
2940Sstevel@tonic-gate 		ASSERT(ctx->kc_pics[n].kp_req == NULL);
2950Sstevel@tonic-gate 
2960Sstevel@tonic-gate 		if (rp->kr_flags & CPC_OVF_NOTIFY_EMT) {
2970Sstevel@tonic-gate 			if ((pcbe_ops->pcbe_caps & CPC_CAP_OVERFLOW_INTERRUPT)
2980Sstevel@tonic-gate 			    == 0) {
2990Sstevel@tonic-gate 				*subcode = -1;
3000Sstevel@tonic-gate 				return (ENOTSUP);
3010Sstevel@tonic-gate 			}
3020Sstevel@tonic-gate 			/*
3030Sstevel@tonic-gate 			 * If any of the counters have requested overflow
3040Sstevel@tonic-gate 			 * notification, we flag the context as being one that
3050Sstevel@tonic-gate 			 * cares about overflow.
3060Sstevel@tonic-gate 			 */
3070Sstevel@tonic-gate 			ctx->kc_flags |= KCPC_CTX_SIGOVF;
3080Sstevel@tonic-gate 		}
3090Sstevel@tonic-gate 
3100Sstevel@tonic-gate 		rp->kr_config = NULL;
3110Sstevel@tonic-gate 		if ((ret = pcbe_ops->pcbe_configure(n, rp->kr_event,
3120Sstevel@tonic-gate 		    rp->kr_preset, rp->kr_flags, rp->kr_nattrs, rp->kr_attr,
3130Sstevel@tonic-gate 		    &(rp->kr_config), (void *)ctx)) != 0) {
3140Sstevel@tonic-gate 			kcpc_free_configs(set);
3150Sstevel@tonic-gate 			*subcode = ret;
3163732Sae112802 			switch (ret) {
3173732Sae112802 			case CPC_ATTR_REQUIRES_PRIVILEGE:
3183732Sae112802 			case CPC_HV_NO_ACCESS:
3190Sstevel@tonic-gate 				return (EACCES);
3203732Sae112802 			default:
3213732Sae112802 				return (EINVAL);
3223732Sae112802 			}
3230Sstevel@tonic-gate 		}
3240Sstevel@tonic-gate 
3250Sstevel@tonic-gate 		ctx->kc_pics[n].kp_req = rp;
3260Sstevel@tonic-gate 		rp->kr_picp = &ctx->kc_pics[n];
3270Sstevel@tonic-gate 		rp->kr_data = set->ks_data + rp->kr_index;
3280Sstevel@tonic-gate 		*rp->kr_data = rp->kr_preset;
3290Sstevel@tonic-gate 	}
3300Sstevel@tonic-gate 
3310Sstevel@tonic-gate 	return (0);
3320Sstevel@tonic-gate }
3330Sstevel@tonic-gate 
334*8803SJonathan.Haslam@Sun.COM void
3350Sstevel@tonic-gate kcpc_free_configs(kcpc_set_t *set)
3360Sstevel@tonic-gate {
3370Sstevel@tonic-gate 	int i;
3380Sstevel@tonic-gate 
3390Sstevel@tonic-gate 	for (i = 0; i < set->ks_nreqs; i++)
3400Sstevel@tonic-gate 		if (set->ks_req[i].kr_config != NULL)
3410Sstevel@tonic-gate 			pcbe_ops->pcbe_free(set->ks_req[i].kr_config);
3420Sstevel@tonic-gate }
3430Sstevel@tonic-gate 
3440Sstevel@tonic-gate /*
3450Sstevel@tonic-gate  * buf points to a user address and the data should be copied out to that
3460Sstevel@tonic-gate  * address in the current process.
3470Sstevel@tonic-gate  */
3480Sstevel@tonic-gate int
3490Sstevel@tonic-gate kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick)
3500Sstevel@tonic-gate {
3510Sstevel@tonic-gate 	kcpc_ctx_t	*ctx = set->ks_ctx;
3520Sstevel@tonic-gate 	uint64_t	curtick = KCPC_GET_TICK();
3530Sstevel@tonic-gate 
3546275Strevtom 	mutex_enter(&set->ks_lock);
3556275Strevtom 	if ((set->ks_state & KCPC_SET_BOUND) == 0) {
3566275Strevtom 		mutex_exit(&set->ks_lock);
3570Sstevel@tonic-gate 		return (EINVAL);
3586275Strevtom 	}
3596275Strevtom 	mutex_exit(&set->ks_lock);
3606275Strevtom 
3616275Strevtom 	if (ctx->kc_flags & KCPC_CTX_INVALID)
3620Sstevel@tonic-gate 		return (EAGAIN);
3630Sstevel@tonic-gate 
3640Sstevel@tonic-gate 	if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) {
3650Sstevel@tonic-gate 		/*
3660Sstevel@tonic-gate 		 * Kernel preemption must be disabled while reading the
3670Sstevel@tonic-gate 		 * hardware regs, and if this is a CPU-bound context, while
3680Sstevel@tonic-gate 		 * checking the CPU binding of the current thread.
3690Sstevel@tonic-gate 		 */
3700Sstevel@tonic-gate 		kpreempt_disable();
3710Sstevel@tonic-gate 
3720Sstevel@tonic-gate 		if (ctx->kc_cpuid != -1) {
3730Sstevel@tonic-gate 			if (curthread->t_bind_cpu != ctx->kc_cpuid) {
3740Sstevel@tonic-gate 				kpreempt_enable();
3750Sstevel@tonic-gate 				return (EAGAIN);
3760Sstevel@tonic-gate 			}
3770Sstevel@tonic-gate 		}
3780Sstevel@tonic-gate 
3790Sstevel@tonic-gate 		if (ctx->kc_thread == curthread) {
3800Sstevel@tonic-gate 			ctx->kc_hrtime = gethrtime();
3810Sstevel@tonic-gate 			pcbe_ops->pcbe_sample(ctx);
3820Sstevel@tonic-gate 			ctx->kc_vtick += curtick - ctx->kc_rawtick;
3830Sstevel@tonic-gate 			ctx->kc_rawtick = curtick;
3840Sstevel@tonic-gate 		}
3850Sstevel@tonic-gate 
3860Sstevel@tonic-gate 		kpreempt_enable();
3873732Sae112802 
3883732Sae112802 		/*
3893732Sae112802 		 * The config may have been invalidated by
3903732Sae112802 		 * the pcbe_sample op.
3913732Sae112802 		 */
3923732Sae112802 		if (ctx->kc_flags & KCPC_CTX_INVALID)
3933732Sae112802 			return (EAGAIN);
3940Sstevel@tonic-gate 	}
3950Sstevel@tonic-gate 
3960Sstevel@tonic-gate 	if (copyout(set->ks_data, buf,
3970Sstevel@tonic-gate 	    set->ks_nreqs * sizeof (uint64_t)) == -1)
3980Sstevel@tonic-gate 		return (EFAULT);
3990Sstevel@tonic-gate 	if (copyout(&ctx->kc_hrtime, hrtime, sizeof (uint64_t)) == -1)
4000Sstevel@tonic-gate 		return (EFAULT);
4010Sstevel@tonic-gate 	if (copyout(&ctx->kc_vtick, tick, sizeof (uint64_t)) == -1)
4020Sstevel@tonic-gate 		return (EFAULT);
4030Sstevel@tonic-gate 
4040Sstevel@tonic-gate 	return (0);
4050Sstevel@tonic-gate }
4060Sstevel@tonic-gate 
4070Sstevel@tonic-gate /*
4080Sstevel@tonic-gate  * Stop the counters on the CPU this context is bound to.
4090Sstevel@tonic-gate  */
4100Sstevel@tonic-gate static void
4110Sstevel@tonic-gate kcpc_stop_hw(kcpc_ctx_t *ctx)
4120Sstevel@tonic-gate {
4130Sstevel@tonic-gate 	cpu_t *cp;
4140Sstevel@tonic-gate 
4150Sstevel@tonic-gate 	ASSERT((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED))
4160Sstevel@tonic-gate 	    == KCPC_CTX_INVALID);
4170Sstevel@tonic-gate 
4180Sstevel@tonic-gate 	kpreempt_disable();
4190Sstevel@tonic-gate 
4200Sstevel@tonic-gate 	cp = cpu_get(ctx->kc_cpuid);
4210Sstevel@tonic-gate 	ASSERT(cp != NULL);
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate 	if (cp == CPU) {
4240Sstevel@tonic-gate 		pcbe_ops->pcbe_allstop();
4250Sstevel@tonic-gate 		atomic_or_uint(&ctx->kc_flags,
4260Sstevel@tonic-gate 		    KCPC_CTX_INVALID_STOPPED);
4270Sstevel@tonic-gate 	} else
4280Sstevel@tonic-gate 		kcpc_remote_stop(cp);
4290Sstevel@tonic-gate 	kpreempt_enable();
4300Sstevel@tonic-gate }
4310Sstevel@tonic-gate 
4320Sstevel@tonic-gate int
4330Sstevel@tonic-gate kcpc_unbind(kcpc_set_t *set)
4340Sstevel@tonic-gate {
4356275Strevtom 	kcpc_ctx_t	*ctx;
4360Sstevel@tonic-gate 	kthread_t	*t;
4370Sstevel@tonic-gate 
4386275Strevtom 	/*
4396275Strevtom 	 * We could be racing with the process's agent thread as it
4406275Strevtom 	 * binds the set; we must wait for the set to finish binding
4416275Strevtom 	 * before attempting to tear it down.
4426275Strevtom 	 */
4436275Strevtom 	mutex_enter(&set->ks_lock);
4446275Strevtom 	while ((set->ks_state & KCPC_SET_BOUND) == 0)
4456275Strevtom 		cv_wait(&set->ks_condv, &set->ks_lock);
4466275Strevtom 	mutex_exit(&set->ks_lock);
4470Sstevel@tonic-gate 
4486275Strevtom 	ctx = set->ks_ctx;
4496275Strevtom 
4506275Strevtom 	/*
4516275Strevtom 	 * Use kc_lock to synchronize with kcpc_restore().
4526275Strevtom 	 */
4536275Strevtom 	mutex_enter(&ctx->kc_lock);
4546275Strevtom 	ctx->kc_flags |= KCPC_CTX_INVALID;
4556275Strevtom 	mutex_exit(&ctx->kc_lock);
4560Sstevel@tonic-gate 
4570Sstevel@tonic-gate 	if (ctx->kc_cpuid == -1) {
4580Sstevel@tonic-gate 		t = ctx->kc_thread;
4590Sstevel@tonic-gate 		/*
4600Sstevel@tonic-gate 		 * The context is thread-bound and therefore has a device
4610Sstevel@tonic-gate 		 * context.  It will be freed via removectx() calling
4620Sstevel@tonic-gate 		 * freectx() calling kcpc_free().
4630Sstevel@tonic-gate 		 */
4640Sstevel@tonic-gate 		if (t == curthread &&
4655254Sgavinm 		    (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) {
4660Sstevel@tonic-gate 			kpreempt_disable();
4670Sstevel@tonic-gate 			pcbe_ops->pcbe_allstop();
4680Sstevel@tonic-gate 			atomic_or_uint(&ctx->kc_flags,
4690Sstevel@tonic-gate 			    KCPC_CTX_INVALID_STOPPED);
4700Sstevel@tonic-gate 			kpreempt_enable();
4710Sstevel@tonic-gate 		}
4720Sstevel@tonic-gate #ifdef DEBUG
4730Sstevel@tonic-gate 		if (removectx(t, ctx, kcpc_save, kcpc_restore, NULL,
4740Sstevel@tonic-gate 		    kcpc_lwp_create, NULL, kcpc_free) == 0)
4750Sstevel@tonic-gate 			panic("kcpc_unbind: context %p not preset on thread %p",
4767632SNick.Todd@Sun.COM 			    (void *)ctx, (void *)t);
4770Sstevel@tonic-gate #else
4780Sstevel@tonic-gate 		(void) removectx(t, ctx, kcpc_save, kcpc_restore, NULL,
4790Sstevel@tonic-gate 		    kcpc_lwp_create, NULL, kcpc_free);
4800Sstevel@tonic-gate #endif /* DEBUG */
4810Sstevel@tonic-gate 		t->t_cpc_set = NULL;
4820Sstevel@tonic-gate 		t->t_cpc_ctx = NULL;
4830Sstevel@tonic-gate 	} else {
4840Sstevel@tonic-gate 		/*
4850Sstevel@tonic-gate 		 * If we are unbinding a CPU-bound set from a remote CPU, the
4860Sstevel@tonic-gate 		 * native CPU's idle thread could be in the midst of programming
4870Sstevel@tonic-gate 		 * this context onto the CPU. We grab the context's lock here to
4880Sstevel@tonic-gate 		 * ensure that the idle thread is done with it. When we release
4890Sstevel@tonic-gate 		 * the lock, the CPU no longer has a context and the idle thread
4900Sstevel@tonic-gate 		 * will move on.
4910Sstevel@tonic-gate 		 *
4920Sstevel@tonic-gate 		 * cpu_lock must be held to prevent the CPU from being DR'd out
4930Sstevel@tonic-gate 		 * while we disassociate the context from the cpu_t.
4940Sstevel@tonic-gate 		 */
4950Sstevel@tonic-gate 		cpu_t *cp;
4960Sstevel@tonic-gate 		mutex_enter(&cpu_lock);
4970Sstevel@tonic-gate 		cp = cpu_get(ctx->kc_cpuid);
4980Sstevel@tonic-gate 		if (cp != NULL) {
4990Sstevel@tonic-gate 			/*
5000Sstevel@tonic-gate 			 * The CPU may have been DR'd out of the system.
5010Sstevel@tonic-gate 			 */
5020Sstevel@tonic-gate 			mutex_enter(&cp->cpu_cpc_ctxlock);
5030Sstevel@tonic-gate 			if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0)
5040Sstevel@tonic-gate 				kcpc_stop_hw(ctx);
5050Sstevel@tonic-gate 			ASSERT(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED);
5060Sstevel@tonic-gate 			cp->cpu_cpc_ctx = NULL;
5070Sstevel@tonic-gate 			mutex_exit(&cp->cpu_cpc_ctxlock);
5080Sstevel@tonic-gate 		}
5090Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
5100Sstevel@tonic-gate 		if (ctx->kc_thread == curthread) {
5110Sstevel@tonic-gate 			kcpc_free(ctx, 0);
5120Sstevel@tonic-gate 			curthread->t_cpc_set = NULL;
5130Sstevel@tonic-gate 		}
5140Sstevel@tonic-gate 	}
5150Sstevel@tonic-gate 
5160Sstevel@tonic-gate 	return (0);
5170Sstevel@tonic-gate }
5180Sstevel@tonic-gate 
5190Sstevel@tonic-gate int
5200Sstevel@tonic-gate kcpc_preset(kcpc_set_t *set, int index, uint64_t preset)
5210Sstevel@tonic-gate {
5220Sstevel@tonic-gate 	int i;
5230Sstevel@tonic-gate 
5240Sstevel@tonic-gate 	ASSERT(set != NULL);
5256275Strevtom 	ASSERT(set->ks_state & KCPC_SET_BOUND);
5260Sstevel@tonic-gate 	ASSERT(set->ks_ctx->kc_thread == curthread);
5270Sstevel@tonic-gate 	ASSERT(set->ks_ctx->kc_cpuid == -1);
5280Sstevel@tonic-gate 
5290Sstevel@tonic-gate 	if (index < 0 || index >= set->ks_nreqs)
5300Sstevel@tonic-gate 		return (EINVAL);
5310Sstevel@tonic-gate 
5320Sstevel@tonic-gate 	for (i = 0; i < set->ks_nreqs; i++)
5330Sstevel@tonic-gate 		if (set->ks_req[i].kr_index == index)
5340Sstevel@tonic-gate 			break;
5350Sstevel@tonic-gate 	ASSERT(i != set->ks_nreqs);
5360Sstevel@tonic-gate 
5370Sstevel@tonic-gate 	set->ks_req[i].kr_preset = preset;
5380Sstevel@tonic-gate 	return (0);
5390Sstevel@tonic-gate }
5400Sstevel@tonic-gate 
5410Sstevel@tonic-gate int
5420Sstevel@tonic-gate kcpc_restart(kcpc_set_t *set)
5430Sstevel@tonic-gate {
5440Sstevel@tonic-gate 	kcpc_ctx_t	*ctx = set->ks_ctx;
5450Sstevel@tonic-gate 	int		i;
5460Sstevel@tonic-gate 
5476275Strevtom 	ASSERT(set->ks_state & KCPC_SET_BOUND);
5480Sstevel@tonic-gate 	ASSERT(ctx->kc_thread == curthread);
5490Sstevel@tonic-gate 	ASSERT(ctx->kc_cpuid == -1);
5500Sstevel@tonic-gate 
5510Sstevel@tonic-gate 	kpreempt_disable();
5520Sstevel@tonic-gate 
5530Sstevel@tonic-gate 	/*
5540Sstevel@tonic-gate 	 * If the user is doing this on a running set, make sure the counters
5550Sstevel@tonic-gate 	 * are stopped first.
5560Sstevel@tonic-gate 	 */
5570Sstevel@tonic-gate 	if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0)
5580Sstevel@tonic-gate 		pcbe_ops->pcbe_allstop();
5590Sstevel@tonic-gate 
5600Sstevel@tonic-gate 	for (i = 0; i < set->ks_nreqs; i++) {
5610Sstevel@tonic-gate 		*(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset;
5620Sstevel@tonic-gate 		pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset,
5630Sstevel@tonic-gate 		    0, 0, NULL, &set->ks_req[i].kr_config, NULL);
5640Sstevel@tonic-gate 	}
5650Sstevel@tonic-gate 
5660Sstevel@tonic-gate 	/*
5670Sstevel@tonic-gate 	 * Ask the backend to program the hardware.
5680Sstevel@tonic-gate 	 */
5690Sstevel@tonic-gate 	ctx->kc_rawtick = KCPC_GET_TICK();
5700Sstevel@tonic-gate 	atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE);
5710Sstevel@tonic-gate 	pcbe_ops->pcbe_program(ctx);
5720Sstevel@tonic-gate 	kpreempt_enable();
5730Sstevel@tonic-gate 
5740Sstevel@tonic-gate 	return (0);
5750Sstevel@tonic-gate }
5760Sstevel@tonic-gate 
5770Sstevel@tonic-gate /*
5780Sstevel@tonic-gate  * Caller must hold kcpc_cpuctx_lock.
5790Sstevel@tonic-gate  */
5800Sstevel@tonic-gate int
5810Sstevel@tonic-gate kcpc_enable(kthread_t *t, int cmd, int enable)
5820Sstevel@tonic-gate {
5830Sstevel@tonic-gate 	kcpc_ctx_t	*ctx = t->t_cpc_ctx;
5840Sstevel@tonic-gate 	kcpc_set_t	*set = t->t_cpc_set;
5850Sstevel@tonic-gate 	kcpc_set_t	*newset;
5860Sstevel@tonic-gate 	int		i;
5870Sstevel@tonic-gate 	int		flag;
5880Sstevel@tonic-gate 	int		err;
5890Sstevel@tonic-gate 
5900Sstevel@tonic-gate 	ASSERT(RW_READ_HELD(&kcpc_cpuctx_lock));
5910Sstevel@tonic-gate 
5920Sstevel@tonic-gate 	if (ctx == NULL) {
5930Sstevel@tonic-gate 		/*
5940Sstevel@tonic-gate 		 * This thread has a set but no context; it must be a
5950Sstevel@tonic-gate 		 * CPU-bound set.
5960Sstevel@tonic-gate 		 */
5970Sstevel@tonic-gate 		ASSERT(t->t_cpc_set != NULL);
5980Sstevel@tonic-gate 		ASSERT(t->t_cpc_set->ks_ctx->kc_cpuid != -1);
5990Sstevel@tonic-gate 		return (EINVAL);
6000Sstevel@tonic-gate 	} else if (ctx->kc_flags & KCPC_CTX_INVALID)
6010Sstevel@tonic-gate 		return (EAGAIN);
6020Sstevel@tonic-gate 
6030Sstevel@tonic-gate 	if (cmd == CPC_ENABLE) {
6040Sstevel@tonic-gate 		if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0)
6050Sstevel@tonic-gate 			return (EINVAL);
6060Sstevel@tonic-gate 		kpreempt_disable();
6070Sstevel@tonic-gate 		atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE);
6080Sstevel@tonic-gate 		kcpc_restore(ctx);
6090Sstevel@tonic-gate 		kpreempt_enable();
6100Sstevel@tonic-gate 	} else if (cmd == CPC_DISABLE) {
6110Sstevel@tonic-gate 		if (ctx->kc_flags & KCPC_CTX_FREEZE)
6120Sstevel@tonic-gate 			return (EINVAL);
6130Sstevel@tonic-gate 		kpreempt_disable();
6140Sstevel@tonic-gate 		kcpc_save(ctx);
6150Sstevel@tonic-gate 		atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE);
6160Sstevel@tonic-gate 		kpreempt_enable();
6170Sstevel@tonic-gate 	} else if (cmd == CPC_USR_EVENTS || cmd == CPC_SYS_EVENTS) {
6180Sstevel@tonic-gate 		/*
6190Sstevel@tonic-gate 		 * Strategy for usr/sys: stop counters and update set's presets
6200Sstevel@tonic-gate 		 * with current counter values, unbind, update requests with
6210Sstevel@tonic-gate 		 * new config, then re-bind.
6220Sstevel@tonic-gate 		 */
6230Sstevel@tonic-gate 		flag = (cmd == CPC_USR_EVENTS) ?
6240Sstevel@tonic-gate 		    CPC_COUNT_USER: CPC_COUNT_SYSTEM;
6250Sstevel@tonic-gate 
6260Sstevel@tonic-gate 		kpreempt_disable();
6270Sstevel@tonic-gate 		atomic_or_uint(&ctx->kc_flags,
6280Sstevel@tonic-gate 		    KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED);
6290Sstevel@tonic-gate 		pcbe_ops->pcbe_allstop();
6300Sstevel@tonic-gate 		kpreempt_enable();
6310Sstevel@tonic-gate 		for (i = 0; i < set->ks_nreqs; i++) {
6320Sstevel@tonic-gate 			set->ks_req[i].kr_preset = *(set->ks_req[i].kr_data);
6330Sstevel@tonic-gate 			if (enable)
6340Sstevel@tonic-gate 				set->ks_req[i].kr_flags |= flag;
6350Sstevel@tonic-gate 			else
6360Sstevel@tonic-gate 				set->ks_req[i].kr_flags &= ~flag;
6370Sstevel@tonic-gate 		}
6380Sstevel@tonic-gate 		newset = kcpc_dup_set(set);
6390Sstevel@tonic-gate 		if (kcpc_unbind(set) != 0)
6400Sstevel@tonic-gate 			return (EINVAL);
6410Sstevel@tonic-gate 		t->t_cpc_set = newset;
6420Sstevel@tonic-gate 		if (kcpc_bind_thread(newset, t, &err) != 0) {
6430Sstevel@tonic-gate 			t->t_cpc_set = NULL;
6440Sstevel@tonic-gate 			kcpc_free_set(newset);
6450Sstevel@tonic-gate 			return (EINVAL);
6460Sstevel@tonic-gate 		}
6470Sstevel@tonic-gate 	} else
6480Sstevel@tonic-gate 		return (EINVAL);
6490Sstevel@tonic-gate 
6500Sstevel@tonic-gate 	return (0);
6510Sstevel@tonic-gate }
6520Sstevel@tonic-gate 
6530Sstevel@tonic-gate /*
6540Sstevel@tonic-gate  * Provide PCBEs with a way of obtaining the configs of every counter which will
6550Sstevel@tonic-gate  * be programmed together.
6560Sstevel@tonic-gate  *
6570Sstevel@tonic-gate  * If current is NULL, provide the first config.
6580Sstevel@tonic-gate  *
6590Sstevel@tonic-gate  * If data != NULL, caller wants to know where the data store associated with
6600Sstevel@tonic-gate  * the config we return is located.
6610Sstevel@tonic-gate  */
6620Sstevel@tonic-gate void *
6630Sstevel@tonic-gate kcpc_next_config(void *token, void *current, uint64_t **data)
6640Sstevel@tonic-gate {
6650Sstevel@tonic-gate 	int		i;
6660Sstevel@tonic-gate 	kcpc_pic_t	*pic;
6670Sstevel@tonic-gate 	kcpc_ctx_t *ctx = (kcpc_ctx_t *)token;
6680Sstevel@tonic-gate 
6690Sstevel@tonic-gate 	if (current == NULL) {
6700Sstevel@tonic-gate 		/*
6710Sstevel@tonic-gate 		 * Client would like the first config, which may not be in
6720Sstevel@tonic-gate 		 * counter 0; we need to search through the counters for the
6730Sstevel@tonic-gate 		 * first config.
6740Sstevel@tonic-gate 		 */
6750Sstevel@tonic-gate 		for (i = 0; i < cpc_ncounters; i++)
6760Sstevel@tonic-gate 			if (ctx->kc_pics[i].kp_req != NULL)
6770Sstevel@tonic-gate 				break;
6780Sstevel@tonic-gate 		/*
6790Sstevel@tonic-gate 		 * There are no counters configured for the given context.
6800Sstevel@tonic-gate 		 */
6810Sstevel@tonic-gate 		if (i == cpc_ncounters)
6820Sstevel@tonic-gate 			return (NULL);
6830Sstevel@tonic-gate 	} else {
6840Sstevel@tonic-gate 		/*
6850Sstevel@tonic-gate 		 * There surely is a faster way to do this.
6860Sstevel@tonic-gate 		 */
6870Sstevel@tonic-gate 		for (i = 0; i < cpc_ncounters; i++) {
6880Sstevel@tonic-gate 			pic = &ctx->kc_pics[i];
6890Sstevel@tonic-gate 
6900Sstevel@tonic-gate 			if (pic->kp_req != NULL &&
6910Sstevel@tonic-gate 			    current == pic->kp_req->kr_config)
6920Sstevel@tonic-gate 				break;
6930Sstevel@tonic-gate 		}
6940Sstevel@tonic-gate 
6950Sstevel@tonic-gate 		/*
6960Sstevel@tonic-gate 		 * We found the current config at picnum i. Now search for the
6970Sstevel@tonic-gate 		 * next configured PIC.
6980Sstevel@tonic-gate 		 */
6990Sstevel@tonic-gate 		for (i++; i < cpc_ncounters; i++) {
7000Sstevel@tonic-gate 			pic = &ctx->kc_pics[i];
7010Sstevel@tonic-gate 			if (pic->kp_req != NULL)
7020Sstevel@tonic-gate 				break;
7030Sstevel@tonic-gate 		}
7040Sstevel@tonic-gate 
7050Sstevel@tonic-gate 		if (i == cpc_ncounters)
7060Sstevel@tonic-gate 			return (NULL);
7070Sstevel@tonic-gate 	}
7080Sstevel@tonic-gate 
7090Sstevel@tonic-gate 	if (data != NULL) {
7100Sstevel@tonic-gate 		*data = ctx->kc_pics[i].kp_req->kr_data;
7110Sstevel@tonic-gate 	}
7120Sstevel@tonic-gate 
7130Sstevel@tonic-gate 	return (ctx->kc_pics[i].kp_req->kr_config);
7140Sstevel@tonic-gate }
7150Sstevel@tonic-gate 
7160Sstevel@tonic-gate 
717*8803SJonathan.Haslam@Sun.COM kcpc_ctx_t *
7180Sstevel@tonic-gate kcpc_ctx_alloc(void)
7190Sstevel@tonic-gate {
7200Sstevel@tonic-gate 	kcpc_ctx_t	*ctx;
7210Sstevel@tonic-gate 	long		hash;
7220Sstevel@tonic-gate 
7236275Strevtom 	ctx = (kcpc_ctx_t *)kmem_zalloc(sizeof (kcpc_ctx_t), KM_SLEEP);
7240Sstevel@tonic-gate 
7250Sstevel@tonic-gate 	hash = CPC_HASH_CTX(ctx);
7260Sstevel@tonic-gate 	mutex_enter(&kcpc_ctx_llock[hash]);
7270Sstevel@tonic-gate 	ctx->kc_next = kcpc_ctx_list[hash];
7280Sstevel@tonic-gate 	kcpc_ctx_list[hash] = ctx;
7290Sstevel@tonic-gate 	mutex_exit(&kcpc_ctx_llock[hash]);
7300Sstevel@tonic-gate 
7310Sstevel@tonic-gate 	ctx->kc_pics = (kcpc_pic_t *)kmem_zalloc(sizeof (kcpc_pic_t) *
7320Sstevel@tonic-gate 	    cpc_ncounters, KM_SLEEP);
7330Sstevel@tonic-gate 
7340Sstevel@tonic-gate 	ctx->kc_cpuid = -1;
7350Sstevel@tonic-gate 
7360Sstevel@tonic-gate 	return (ctx);
7370Sstevel@tonic-gate }
7380Sstevel@tonic-gate 
7390Sstevel@tonic-gate /*
7400Sstevel@tonic-gate  * Copy set from ctx to the child context, cctx, if it has CPC_BIND_LWP_INHERIT
7410Sstevel@tonic-gate  * in the flags.
7420Sstevel@tonic-gate  */
7430Sstevel@tonic-gate static void
7440Sstevel@tonic-gate kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx)
7450Sstevel@tonic-gate {
7460Sstevel@tonic-gate 	kcpc_set_t	*ks = ctx->kc_set, *cks;
7470Sstevel@tonic-gate 	int		i, j;
7480Sstevel@tonic-gate 	int		code;
7490Sstevel@tonic-gate 
7500Sstevel@tonic-gate 	ASSERT(ks != NULL);
7510Sstevel@tonic-gate 
7520Sstevel@tonic-gate 	if ((ks->ks_flags & CPC_BIND_LWP_INHERIT) == 0)
7530Sstevel@tonic-gate 		return;
7540Sstevel@tonic-gate 
7556275Strevtom 	cks = kmem_zalloc(sizeof (*cks), KM_SLEEP);
7566275Strevtom 	cks->ks_state &= ~KCPC_SET_BOUND;
7570Sstevel@tonic-gate 	cctx->kc_set = cks;
7580Sstevel@tonic-gate 	cks->ks_flags = ks->ks_flags;
7590Sstevel@tonic-gate 	cks->ks_nreqs = ks->ks_nreqs;
7600Sstevel@tonic-gate 	cks->ks_req = kmem_alloc(cks->ks_nreqs *
7610Sstevel@tonic-gate 	    sizeof (kcpc_request_t), KM_SLEEP);
7620Sstevel@tonic-gate 	cks->ks_data = kmem_alloc(cks->ks_nreqs * sizeof (uint64_t),
7630Sstevel@tonic-gate 	    KM_SLEEP);
7640Sstevel@tonic-gate 	cks->ks_ctx = cctx;
7650Sstevel@tonic-gate 
7660Sstevel@tonic-gate 	for (i = 0; i < cks->ks_nreqs; i++) {
7670Sstevel@tonic-gate 		cks->ks_req[i].kr_index = ks->ks_req[i].kr_index;
7680Sstevel@tonic-gate 		cks->ks_req[i].kr_picnum = ks->ks_req[i].kr_picnum;
7690Sstevel@tonic-gate 		(void) strncpy(cks->ks_req[i].kr_event,
7700Sstevel@tonic-gate 		    ks->ks_req[i].kr_event, CPC_MAX_EVENT_LEN);
7710Sstevel@tonic-gate 		cks->ks_req[i].kr_preset = ks->ks_req[i].kr_preset;
7720Sstevel@tonic-gate 		cks->ks_req[i].kr_flags = ks->ks_req[i].kr_flags;
7730Sstevel@tonic-gate 		cks->ks_req[i].kr_nattrs = ks->ks_req[i].kr_nattrs;
7740Sstevel@tonic-gate 		if (ks->ks_req[i].kr_nattrs > 0) {
7750Sstevel@tonic-gate 			cks->ks_req[i].kr_attr =
7760Sstevel@tonic-gate 			    kmem_alloc(ks->ks_req[i].kr_nattrs *
7775254Sgavinm 			    sizeof (kcpc_attr_t), KM_SLEEP);
7780Sstevel@tonic-gate 		}
7790Sstevel@tonic-gate 		for (j = 0; j < ks->ks_req[i].kr_nattrs; j++) {
7800Sstevel@tonic-gate 			(void) strncpy(cks->ks_req[i].kr_attr[j].ka_name,
7810Sstevel@tonic-gate 			    ks->ks_req[i].kr_attr[j].ka_name,
7820Sstevel@tonic-gate 			    CPC_MAX_ATTR_LEN);
7830Sstevel@tonic-gate 			cks->ks_req[i].kr_attr[j].ka_val =
7840Sstevel@tonic-gate 			    ks->ks_req[i].kr_attr[j].ka_val;
7850Sstevel@tonic-gate 		}
7860Sstevel@tonic-gate 	}
7870Sstevel@tonic-gate 	if (kcpc_configure_reqs(cctx, cks, &code) != 0)
7883732Sae112802 		kcpc_invalidate_config(cctx);
7896275Strevtom 
7906275Strevtom 	mutex_enter(&cks->ks_lock);
7916275Strevtom 	cks->ks_state |= KCPC_SET_BOUND;
7926275Strevtom 	cv_signal(&cks->ks_condv);
7936275Strevtom 	mutex_exit(&cks->ks_lock);
7940Sstevel@tonic-gate }
7950Sstevel@tonic-gate 
7960Sstevel@tonic-gate 
797*8803SJonathan.Haslam@Sun.COM void
7980Sstevel@tonic-gate kcpc_ctx_free(kcpc_ctx_t *ctx)
7990Sstevel@tonic-gate {
8000Sstevel@tonic-gate 	kcpc_ctx_t	**loc;
8010Sstevel@tonic-gate 	long		hash = CPC_HASH_CTX(ctx);
8020Sstevel@tonic-gate 
8030Sstevel@tonic-gate 	mutex_enter(&kcpc_ctx_llock[hash]);
8040Sstevel@tonic-gate 	loc = &kcpc_ctx_list[hash];
8050Sstevel@tonic-gate 	ASSERT(*loc != NULL);
8060Sstevel@tonic-gate 	while (*loc != ctx)
8070Sstevel@tonic-gate 		loc = &(*loc)->kc_next;
8080Sstevel@tonic-gate 	*loc = ctx->kc_next;
8090Sstevel@tonic-gate 	mutex_exit(&kcpc_ctx_llock[hash]);
8100Sstevel@tonic-gate 
8110Sstevel@tonic-gate 	kmem_free(ctx->kc_pics, cpc_ncounters * sizeof (kcpc_pic_t));
8126275Strevtom 	cv_destroy(&ctx->kc_condv);
8136275Strevtom 	mutex_destroy(&ctx->kc_lock);
8140Sstevel@tonic-gate 	kmem_free(ctx, sizeof (*ctx));
8150Sstevel@tonic-gate }
8160Sstevel@tonic-gate 
8170Sstevel@tonic-gate /*
8180Sstevel@tonic-gate  * Generic interrupt handler used on hardware that generates
8190Sstevel@tonic-gate  * overflow interrupts.
8200Sstevel@tonic-gate  *
8210Sstevel@tonic-gate  * Note: executed at high-level interrupt context!
8220Sstevel@tonic-gate  */
8230Sstevel@tonic-gate /*ARGSUSED*/
8240Sstevel@tonic-gate kcpc_ctx_t *
8250Sstevel@tonic-gate kcpc_overflow_intr(caddr_t arg, uint64_t bitmap)
8260Sstevel@tonic-gate {
8270Sstevel@tonic-gate 	kcpc_ctx_t	*ctx;
8280Sstevel@tonic-gate 	kthread_t	*t = curthread;
8290Sstevel@tonic-gate 	int		i;
8300Sstevel@tonic-gate 
8310Sstevel@tonic-gate 	/*
8320Sstevel@tonic-gate 	 * On both x86 and UltraSPARC, we may deliver the high-level
8330Sstevel@tonic-gate 	 * interrupt in kernel mode, just after we've started to run an
8340Sstevel@tonic-gate 	 * interrupt thread.  (That's because the hardware helpfully
8350Sstevel@tonic-gate 	 * delivers the overflow interrupt some random number of cycles
8360Sstevel@tonic-gate 	 * after the instruction that caused the overflow by which time
8370Sstevel@tonic-gate 	 * we're in some part of the kernel, not necessarily running on
8380Sstevel@tonic-gate 	 * the right thread).
8390Sstevel@tonic-gate 	 *
8400Sstevel@tonic-gate 	 * Check for this case here -- find the pinned thread
8410Sstevel@tonic-gate 	 * that was running when the interrupt went off.
8420Sstevel@tonic-gate 	 */
8430Sstevel@tonic-gate 	if (t->t_flag & T_INTR_THREAD) {
8440Sstevel@tonic-gate 		klwp_t *lwp;
8450Sstevel@tonic-gate 
8460Sstevel@tonic-gate 		atomic_add_32(&kcpc_intrctx_count, 1);
8470Sstevel@tonic-gate 
8480Sstevel@tonic-gate 		/*
8490Sstevel@tonic-gate 		 * Note that t_lwp is always set to point at the underlying
8500Sstevel@tonic-gate 		 * thread, thus this will work in the presence of nested
8510Sstevel@tonic-gate 		 * interrupts.
8520Sstevel@tonic-gate 		 */
8530Sstevel@tonic-gate 		ctx = NULL;
8540Sstevel@tonic-gate 		if ((lwp = t->t_lwp) != NULL) {
8550Sstevel@tonic-gate 			t = lwptot(lwp);
8560Sstevel@tonic-gate 			ctx = t->t_cpc_ctx;
8570Sstevel@tonic-gate 		}
8580Sstevel@tonic-gate 	} else
8590Sstevel@tonic-gate 		ctx = t->t_cpc_ctx;
8600Sstevel@tonic-gate 
8610Sstevel@tonic-gate 	if (ctx == NULL) {
8620Sstevel@tonic-gate 		/*
8630Sstevel@tonic-gate 		 * This can easily happen if we're using the counters in
8640Sstevel@tonic-gate 		 * "shared" mode, for example, and an overflow interrupt
8650Sstevel@tonic-gate 		 * occurs while we are running cpustat.  In that case, the
8660Sstevel@tonic-gate 		 * bound thread that has the context that belongs to this
8670Sstevel@tonic-gate 		 * CPU is almost certainly sleeping (if it was running on
8680Sstevel@tonic-gate 		 * the CPU we'd have found it above), and the actual
8690Sstevel@tonic-gate 		 * interrupted thread has no knowledge of performance counters!
8700Sstevel@tonic-gate 		 */
8710Sstevel@tonic-gate 		ctx = curthread->t_cpu->cpu_cpc_ctx;
8720Sstevel@tonic-gate 		if (ctx != NULL) {
8730Sstevel@tonic-gate 			/*
8740Sstevel@tonic-gate 			 * Return the bound context for this CPU to
8750Sstevel@tonic-gate 			 * the interrupt handler so that it can synchronously
8760Sstevel@tonic-gate 			 * sample the hardware counters and restart them.
8770Sstevel@tonic-gate 			 */
8780Sstevel@tonic-gate 			return (ctx);
8790Sstevel@tonic-gate 		}
8800Sstevel@tonic-gate 
8810Sstevel@tonic-gate 		/*
8820Sstevel@tonic-gate 		 * As long as the overflow interrupt really is delivered early
8830Sstevel@tonic-gate 		 * enough after trapping into the kernel to avoid switching
8840Sstevel@tonic-gate 		 * threads, we must always be able to find the cpc context,
8850Sstevel@tonic-gate 		 * or something went terribly wrong i.e. we ended up
8860Sstevel@tonic-gate 		 * running a passivated interrupt thread, a kernel
8870Sstevel@tonic-gate 		 * thread or we interrupted idle, all of which are Very Bad.
888*8803SJonathan.Haslam@Sun.COM 		 *
889*8803SJonathan.Haslam@Sun.COM 		 * We also could end up here owing to an incredibly unlikely
890*8803SJonathan.Haslam@Sun.COM 		 * race condition that exists on x86 based architectures when
891*8803SJonathan.Haslam@Sun.COM 		 * the cpc provider is in use; overflow interrupts are directed
892*8803SJonathan.Haslam@Sun.COM 		 * to the cpc provider if the 'dtrace_cpc_in_use' variable is
893*8803SJonathan.Haslam@Sun.COM 		 * set when we enter the handler. This variable is unset after
894*8803SJonathan.Haslam@Sun.COM 		 * overflow interrupts have been disabled on all CPUs and all
895*8803SJonathan.Haslam@Sun.COM 		 * contexts have been torn down. To stop interrupts, the cpc
896*8803SJonathan.Haslam@Sun.COM 		 * provider issues a xcall to the remote CPU before it tears
897*8803SJonathan.Haslam@Sun.COM 		 * down that CPUs context. As high priority xcalls, on an x86
898*8803SJonathan.Haslam@Sun.COM 		 * architecture, execute at a higher PIL than this handler, it
899*8803SJonathan.Haslam@Sun.COM 		 * is possible (though extremely unlikely) that the xcall could
900*8803SJonathan.Haslam@Sun.COM 		 * interrupt the overflow handler before the handler has
901*8803SJonathan.Haslam@Sun.COM 		 * checked the 'dtrace_cpc_in_use' variable, stop the counters,
902*8803SJonathan.Haslam@Sun.COM 		 * return to the cpc provider which could then rip down
903*8803SJonathan.Haslam@Sun.COM 		 * contexts and unset 'dtrace_cpc_in_use' *before* the CPUs
904*8803SJonathan.Haslam@Sun.COM 		 * overflow handler has had a chance to check the variable. In
905*8803SJonathan.Haslam@Sun.COM 		 * that case, the handler would direct the overflow into this
906*8803SJonathan.Haslam@Sun.COM 		 * code and no valid context will be found. The default behavior
907*8803SJonathan.Haslam@Sun.COM 		 * when no valid context is found is now to shout a warning to
908*8803SJonathan.Haslam@Sun.COM 		 * the console and bump the 'kcpc_nullctx_count' variable.
9090Sstevel@tonic-gate 		 */
9100Sstevel@tonic-gate 		if (kcpc_nullctx_panic)
9110Sstevel@tonic-gate 			panic("null cpc context, thread %p", (void *)t);
912*8803SJonathan.Haslam@Sun.COM 
913*8803SJonathan.Haslam@Sun.COM 		cmn_err(CE_WARN,
914*8803SJonathan.Haslam@Sun.COM 		    "null cpc context found in overflow handler!\n");
9150Sstevel@tonic-gate 		atomic_add_32(&kcpc_nullctx_count, 1);
9160Sstevel@tonic-gate 	} else if ((ctx->kc_flags & KCPC_CTX_INVALID) == 0) {
9170Sstevel@tonic-gate 		/*
9180Sstevel@tonic-gate 		 * Schedule an ast to sample the counters, which will
9190Sstevel@tonic-gate 		 * propagate any overflow into the virtualized performance
9200Sstevel@tonic-gate 		 * counter(s), and may deliver a signal.
9210Sstevel@tonic-gate 		 */
9220Sstevel@tonic-gate 		ttolwp(t)->lwp_pcb.pcb_flags |= CPC_OVERFLOW;
9230Sstevel@tonic-gate 		/*
9240Sstevel@tonic-gate 		 * If a counter has overflowed which was counting on behalf of
9250Sstevel@tonic-gate 		 * a request which specified CPC_OVF_NOTIFY_EMT, send the
9260Sstevel@tonic-gate 		 * process a signal.
9270Sstevel@tonic-gate 		 */
9280Sstevel@tonic-gate 		for (i = 0; i < cpc_ncounters; i++) {
9290Sstevel@tonic-gate 			if (ctx->kc_pics[i].kp_req != NULL &&
9300Sstevel@tonic-gate 			    bitmap & (1 << i) &&
9310Sstevel@tonic-gate 			    ctx->kc_pics[i].kp_req->kr_flags &
9320Sstevel@tonic-gate 			    CPC_OVF_NOTIFY_EMT) {
9330Sstevel@tonic-gate 				/*
9340Sstevel@tonic-gate 				 * A signal has been requested for this PIC, so
9350Sstevel@tonic-gate 				 * so freeze the context. The interrupt handler
9360Sstevel@tonic-gate 				 * has already stopped the counter hardware.
9370Sstevel@tonic-gate 				 */
9380Sstevel@tonic-gate 				atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE);
9390Sstevel@tonic-gate 				atomic_or_uint(&ctx->kc_pics[i].kp_flags,
9400Sstevel@tonic-gate 				    KCPC_PIC_OVERFLOWED);
9410Sstevel@tonic-gate 			}
9420Sstevel@tonic-gate 		}
9430Sstevel@tonic-gate 		aston(t);
9440Sstevel@tonic-gate 	}
9450Sstevel@tonic-gate 	return (NULL);
9460Sstevel@tonic-gate }
9470Sstevel@tonic-gate 
9480Sstevel@tonic-gate /*
9490Sstevel@tonic-gate  * The current thread context had an overflow interrupt; we're
9500Sstevel@tonic-gate  * executing here in high-level interrupt context.
9510Sstevel@tonic-gate  */
9520Sstevel@tonic-gate /*ARGSUSED*/
9530Sstevel@tonic-gate uint_t
9540Sstevel@tonic-gate kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2)
9550Sstevel@tonic-gate {
956*8803SJonathan.Haslam@Sun.COM 	kcpc_ctx_t *ctx;
957*8803SJonathan.Haslam@Sun.COM 	uint64_t bitmap;
958*8803SJonathan.Haslam@Sun.COM 	uint8_t *state;
9590Sstevel@tonic-gate 
9600Sstevel@tonic-gate 	if (pcbe_ops == NULL ||
9610Sstevel@tonic-gate 	    (bitmap = pcbe_ops->pcbe_overflow_bitmap()) == 0)
9620Sstevel@tonic-gate 		return (DDI_INTR_UNCLAIMED);
9633884Sha137994 
9640Sstevel@tonic-gate 	/*
9650Sstevel@tonic-gate 	 * Prevent any further interrupts.
9660Sstevel@tonic-gate 	 */
9670Sstevel@tonic-gate 	pcbe_ops->pcbe_allstop();
9680Sstevel@tonic-gate 
969*8803SJonathan.Haslam@Sun.COM 	if (dtrace_cpc_in_use) {
970*8803SJonathan.Haslam@Sun.COM 		state = &cpu_core[CPU->cpu_id].cpuc_dcpc_intr_state;
971*8803SJonathan.Haslam@Sun.COM 
972*8803SJonathan.Haslam@Sun.COM 		/*
973*8803SJonathan.Haslam@Sun.COM 		 * Set the per-CPU state bit to indicate that we are currently
974*8803SJonathan.Haslam@Sun.COM 		 * processing an interrupt if it is currently free. Drop the
975*8803SJonathan.Haslam@Sun.COM 		 * interrupt if the state isn't free (i.e. a configuration
976*8803SJonathan.Haslam@Sun.COM 		 * event is taking place).
977*8803SJonathan.Haslam@Sun.COM 		 */
978*8803SJonathan.Haslam@Sun.COM 		if (atomic_cas_8(state, DCPC_INTR_FREE,
979*8803SJonathan.Haslam@Sun.COM 		    DCPC_INTR_PROCESSING) == DCPC_INTR_FREE) {
980*8803SJonathan.Haslam@Sun.COM 			int i;
981*8803SJonathan.Haslam@Sun.COM 			kcpc_request_t req;
982*8803SJonathan.Haslam@Sun.COM 
983*8803SJonathan.Haslam@Sun.COM 			ASSERT(dtrace_cpc_fire != NULL);
984*8803SJonathan.Haslam@Sun.COM 
985*8803SJonathan.Haslam@Sun.COM 			(*dtrace_cpc_fire)(bitmap);
986*8803SJonathan.Haslam@Sun.COM 
987*8803SJonathan.Haslam@Sun.COM 			ctx = curthread->t_cpu->cpu_cpc_ctx;
988*8803SJonathan.Haslam@Sun.COM 
989*8803SJonathan.Haslam@Sun.COM 			/* Reset any counters that have overflowed */
990*8803SJonathan.Haslam@Sun.COM 			for (i = 0; i < ctx->kc_set->ks_nreqs; i++) {
991*8803SJonathan.Haslam@Sun.COM 				req = ctx->kc_set->ks_req[i];
992*8803SJonathan.Haslam@Sun.COM 
993*8803SJonathan.Haslam@Sun.COM 				if (bitmap & (1 << req.kr_picnum)) {
994*8803SJonathan.Haslam@Sun.COM 					pcbe_ops->pcbe_configure(req.kr_picnum,
995*8803SJonathan.Haslam@Sun.COM 					    req.kr_event, req.kr_preset,
996*8803SJonathan.Haslam@Sun.COM 					    req.kr_flags, req.kr_nattrs,
997*8803SJonathan.Haslam@Sun.COM 					    req.kr_attr, &(req.kr_config),
998*8803SJonathan.Haslam@Sun.COM 					    (void *)ctx);
999*8803SJonathan.Haslam@Sun.COM 				}
1000*8803SJonathan.Haslam@Sun.COM 			}
1001*8803SJonathan.Haslam@Sun.COM 			pcbe_ops->pcbe_program(ctx);
1002*8803SJonathan.Haslam@Sun.COM 
1003*8803SJonathan.Haslam@Sun.COM 			/*
1004*8803SJonathan.Haslam@Sun.COM 			 * We've finished processing the interrupt so set
1005*8803SJonathan.Haslam@Sun.COM 			 * the state back to free.
1006*8803SJonathan.Haslam@Sun.COM 			 */
1007*8803SJonathan.Haslam@Sun.COM 			cpu_core[CPU->cpu_id].cpuc_dcpc_intr_state =
1008*8803SJonathan.Haslam@Sun.COM 			    DCPC_INTR_FREE;
1009*8803SJonathan.Haslam@Sun.COM 			membar_producer();
1010*8803SJonathan.Haslam@Sun.COM 		}
1011*8803SJonathan.Haslam@Sun.COM 		return (DDI_INTR_CLAIMED);
1012*8803SJonathan.Haslam@Sun.COM 	}
1013*8803SJonathan.Haslam@Sun.COM 
10140Sstevel@tonic-gate 	/*
1015*8803SJonathan.Haslam@Sun.COM 	 * DTrace isn't involved so pass on accordingly.
10160Sstevel@tonic-gate 	 *
10170Sstevel@tonic-gate 	 * If the interrupt has occurred in the context of an lwp owning
10180Sstevel@tonic-gate 	 * the counters, then the handler posts an AST to the lwp to
10190Sstevel@tonic-gate 	 * trigger the actual sampling, and optionally deliver a signal or
10200Sstevel@tonic-gate 	 * restart the counters, on the way out of the kernel using
10210Sstevel@tonic-gate 	 * kcpc_hw_overflow_ast() (see below).
10220Sstevel@tonic-gate 	 *
10230Sstevel@tonic-gate 	 * On the other hand, if the handler returns the context to us
10240Sstevel@tonic-gate 	 * directly, then it means that there are no other threads in
10250Sstevel@tonic-gate 	 * the middle of updating it, no AST has been posted, and so we
10260Sstevel@tonic-gate 	 * should sample the counters here, and restart them with no
10270Sstevel@tonic-gate 	 * further fuss.
10280Sstevel@tonic-gate 	 */
10290Sstevel@tonic-gate 	if ((ctx = kcpc_overflow_intr(arg1, bitmap)) != NULL) {
10300Sstevel@tonic-gate 		uint64_t curtick = KCPC_GET_TICK();
10310Sstevel@tonic-gate 
10320Sstevel@tonic-gate 		ctx->kc_hrtime = gethrtime_waitfree();
10330Sstevel@tonic-gate 		ctx->kc_vtick += curtick - ctx->kc_rawtick;
10340Sstevel@tonic-gate 		ctx->kc_rawtick = curtick;
10350Sstevel@tonic-gate 		pcbe_ops->pcbe_sample(ctx);
10360Sstevel@tonic-gate 		pcbe_ops->pcbe_program(ctx);
10370Sstevel@tonic-gate 	}
10380Sstevel@tonic-gate 
10390Sstevel@tonic-gate 	return (DDI_INTR_CLAIMED);
10400Sstevel@tonic-gate }
10410Sstevel@tonic-gate 
10420Sstevel@tonic-gate /*
10430Sstevel@tonic-gate  * Called from trap() when processing the ast posted by the high-level
10440Sstevel@tonic-gate  * interrupt handler.
10450Sstevel@tonic-gate  */
10460Sstevel@tonic-gate int
10470Sstevel@tonic-gate kcpc_overflow_ast()
10480Sstevel@tonic-gate {
10490Sstevel@tonic-gate 	kcpc_ctx_t	*ctx = curthread->t_cpc_ctx;
10500Sstevel@tonic-gate 	int		i;
10510Sstevel@tonic-gate 	int		found = 0;
10520Sstevel@tonic-gate 	uint64_t	curtick = KCPC_GET_TICK();
10530Sstevel@tonic-gate 
10540Sstevel@tonic-gate 	ASSERT(ctx != NULL);	/* Beware of interrupt skid. */
10550Sstevel@tonic-gate 
10560Sstevel@tonic-gate 	/*
10570Sstevel@tonic-gate 	 * An overflow happened: sample the context to ensure that
10580Sstevel@tonic-gate 	 * the overflow is propagated into the upper bits of the
10590Sstevel@tonic-gate 	 * virtualized 64-bit counter(s).
10600Sstevel@tonic-gate 	 */
10610Sstevel@tonic-gate 	kpreempt_disable();
10620Sstevel@tonic-gate 	ctx->kc_hrtime = gethrtime_waitfree();
10630Sstevel@tonic-gate 	pcbe_ops->pcbe_sample(ctx);
10640Sstevel@tonic-gate 	kpreempt_enable();
10650Sstevel@tonic-gate 
10660Sstevel@tonic-gate 	ctx->kc_vtick += curtick - ctx->kc_rawtick;
10670Sstevel@tonic-gate 
10680Sstevel@tonic-gate 	/*
10690Sstevel@tonic-gate 	 * The interrupt handler has marked any pics with KCPC_PIC_OVERFLOWED
10700Sstevel@tonic-gate 	 * if that pic generated an overflow and if the request it was counting
10710Sstevel@tonic-gate 	 * on behalf of had CPC_OVERFLOW_REQUEST specified. We go through all
10720Sstevel@tonic-gate 	 * pics in the context and clear the KCPC_PIC_OVERFLOWED flags. If we
10730Sstevel@tonic-gate 	 * found any overflowed pics, keep the context frozen and return true
10740Sstevel@tonic-gate 	 * (thus causing a signal to be sent).
10750Sstevel@tonic-gate 	 */
10760Sstevel@tonic-gate 	for (i = 0; i < cpc_ncounters; i++) {
10770Sstevel@tonic-gate 		if (ctx->kc_pics[i].kp_flags & KCPC_PIC_OVERFLOWED) {
10780Sstevel@tonic-gate 			atomic_and_uint(&ctx->kc_pics[i].kp_flags,
10790Sstevel@tonic-gate 			    ~KCPC_PIC_OVERFLOWED);
10800Sstevel@tonic-gate 			found = 1;
10810Sstevel@tonic-gate 		}
10820Sstevel@tonic-gate 	}
10830Sstevel@tonic-gate 	if (found)
10840Sstevel@tonic-gate 		return (1);
10850Sstevel@tonic-gate 
10860Sstevel@tonic-gate 	/*
10870Sstevel@tonic-gate 	 * Otherwise, re-enable the counters and continue life as before.
10880Sstevel@tonic-gate 	 */
10890Sstevel@tonic-gate 	kpreempt_disable();
10900Sstevel@tonic-gate 	atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE);
10910Sstevel@tonic-gate 	pcbe_ops->pcbe_program(ctx);
10920Sstevel@tonic-gate 	kpreempt_enable();
10930Sstevel@tonic-gate 	return (0);
10940Sstevel@tonic-gate }
10950Sstevel@tonic-gate 
10960Sstevel@tonic-gate /*
10970Sstevel@tonic-gate  * Called when switching away from current thread.
10980Sstevel@tonic-gate  */
10990Sstevel@tonic-gate static void
11000Sstevel@tonic-gate kcpc_save(kcpc_ctx_t *ctx)
11010Sstevel@tonic-gate {
11020Sstevel@tonic-gate 	if (ctx->kc_flags & KCPC_CTX_INVALID) {
11030Sstevel@tonic-gate 		if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED)
11040Sstevel@tonic-gate 			return;
11050Sstevel@tonic-gate 		/*
11060Sstevel@tonic-gate 		 * This context has been invalidated but the counters have not
11070Sstevel@tonic-gate 		 * been stopped. Stop them here and mark the context stopped.
11080Sstevel@tonic-gate 		 */
11090Sstevel@tonic-gate 		pcbe_ops->pcbe_allstop();
11100Sstevel@tonic-gate 		atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED);
11110Sstevel@tonic-gate 		return;
11120Sstevel@tonic-gate 	}
11130Sstevel@tonic-gate 
11140Sstevel@tonic-gate 	pcbe_ops->pcbe_allstop();
11150Sstevel@tonic-gate 	if (ctx->kc_flags & KCPC_CTX_FREEZE)
11160Sstevel@tonic-gate 		return;
11170Sstevel@tonic-gate 
11180Sstevel@tonic-gate 	/*
11190Sstevel@tonic-gate 	 * Need to sample for all reqs into each req's current mpic.
11200Sstevel@tonic-gate 	 */
11210Sstevel@tonic-gate 	ctx->kc_hrtime = gethrtime();
11220Sstevel@tonic-gate 	ctx->kc_vtick += KCPC_GET_TICK() - ctx->kc_rawtick;
11230Sstevel@tonic-gate 	pcbe_ops->pcbe_sample(ctx);
11240Sstevel@tonic-gate }
11250Sstevel@tonic-gate 
11260Sstevel@tonic-gate static void
11270Sstevel@tonic-gate kcpc_restore(kcpc_ctx_t *ctx)
11280Sstevel@tonic-gate {
11296275Strevtom 	mutex_enter(&ctx->kc_lock);
11300Sstevel@tonic-gate 	if ((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) ==
11310Sstevel@tonic-gate 	    KCPC_CTX_INVALID)
11320Sstevel@tonic-gate 		/*
11330Sstevel@tonic-gate 		 * The context is invalidated but has not been marked stopped.
11340Sstevel@tonic-gate 		 * We mark it as such here because we will not start the
11350Sstevel@tonic-gate 		 * counters during this context switch.
11360Sstevel@tonic-gate 		 */
11376275Strevtom 		ctx->kc_flags |= KCPC_CTX_INVALID_STOPPED;
11380Sstevel@tonic-gate 
11390Sstevel@tonic-gate 
11406275Strevtom 	if (ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_FREEZE)) {
11416275Strevtom 		mutex_exit(&ctx->kc_lock);
11420Sstevel@tonic-gate 		return;
11436275Strevtom 	}
11446275Strevtom 
11456275Strevtom 	/*
11466275Strevtom 	 * Set kc_flags to show that a kcpc_restore() is in progress to avoid
11476275Strevtom 	 * ctx & set related memory objects being freed without us knowing.
11486275Strevtom 	 * This can happen if an agent thread is executing a kcpc_unbind(),
11496275Strevtom 	 * with this thread as the target, whilst we're concurrently doing a
11506275Strevtom 	 * restorectx() during, for example, a proc_exit().  Effectively, by
11516275Strevtom 	 * doing this, we're asking kcpc_free() to cv_wait() until
11526275Strevtom 	 * kcpc_restore() has completed.
11536275Strevtom 	 */
11546275Strevtom 	ctx->kc_flags |= KCPC_CTX_RESTORE;
11556275Strevtom 	mutex_exit(&ctx->kc_lock);
11560Sstevel@tonic-gate 
11570Sstevel@tonic-gate 	/*
11580Sstevel@tonic-gate 	 * While programming the hardware, the counters should be stopped. We
11590Sstevel@tonic-gate 	 * don't do an explicit pcbe_allstop() here because they should have
11600Sstevel@tonic-gate 	 * been stopped already by the last consumer.
11610Sstevel@tonic-gate 	 */
11620Sstevel@tonic-gate 	ctx->kc_rawtick = KCPC_GET_TICK();
11630Sstevel@tonic-gate 	pcbe_ops->pcbe_program(ctx);
11646275Strevtom 
11656275Strevtom 	/*
11666275Strevtom 	 * Wake the agent thread if it's waiting in kcpc_free().
11676275Strevtom 	 */
11686275Strevtom 	mutex_enter(&ctx->kc_lock);
11696275Strevtom 	ctx->kc_flags &= ~KCPC_CTX_RESTORE;
11706275Strevtom 	cv_signal(&ctx->kc_condv);
11716275Strevtom 	mutex_exit(&ctx->kc_lock);
11720Sstevel@tonic-gate }
11730Sstevel@tonic-gate 
11740Sstevel@tonic-gate /*
11750Sstevel@tonic-gate  * If kcpc_counts_include_idle is set to 0 by the sys admin, we add the the
11760Sstevel@tonic-gate  * following context operators to the idle thread on each CPU. They stop the
11770Sstevel@tonic-gate  * counters when the idle thread is switched on, and they start them again when
11780Sstevel@tonic-gate  * it is switched off.
11790Sstevel@tonic-gate  */
11800Sstevel@tonic-gate 
11810Sstevel@tonic-gate /*ARGSUSED*/
11820Sstevel@tonic-gate void
11830Sstevel@tonic-gate kcpc_idle_save(struct cpu *cp)
11840Sstevel@tonic-gate {
11850Sstevel@tonic-gate 	/*
11860Sstevel@tonic-gate 	 * The idle thread shouldn't be run anywhere else.
11870Sstevel@tonic-gate 	 */
11880Sstevel@tonic-gate 	ASSERT(CPU == cp);
11890Sstevel@tonic-gate 
11900Sstevel@tonic-gate 	/*
11910Sstevel@tonic-gate 	 * We must hold the CPU's context lock to ensure the context isn't freed
11920Sstevel@tonic-gate 	 * while we're looking at it.
11930Sstevel@tonic-gate 	 */
11940Sstevel@tonic-gate 	mutex_enter(&cp->cpu_cpc_ctxlock);
11950Sstevel@tonic-gate 
11960Sstevel@tonic-gate 	if ((cp->cpu_cpc_ctx == NULL) ||
11970Sstevel@tonic-gate 	    (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) {
11980Sstevel@tonic-gate 		mutex_exit(&cp->cpu_cpc_ctxlock);
11990Sstevel@tonic-gate 		return;
12000Sstevel@tonic-gate 	}
12010Sstevel@tonic-gate 
12020Sstevel@tonic-gate 	pcbe_ops->pcbe_program(cp->cpu_cpc_ctx);
12030Sstevel@tonic-gate 	mutex_exit(&cp->cpu_cpc_ctxlock);
12040Sstevel@tonic-gate }
12050Sstevel@tonic-gate 
12060Sstevel@tonic-gate void
12070Sstevel@tonic-gate kcpc_idle_restore(struct cpu *cp)
12080Sstevel@tonic-gate {
12090Sstevel@tonic-gate 	/*
12100Sstevel@tonic-gate 	 * The idle thread shouldn't be run anywhere else.
12110Sstevel@tonic-gate 	 */
12120Sstevel@tonic-gate 	ASSERT(CPU == cp);
12130Sstevel@tonic-gate 
12140Sstevel@tonic-gate 	/*
12150Sstevel@tonic-gate 	 * We must hold the CPU's context lock to ensure the context isn't freed
12160Sstevel@tonic-gate 	 * while we're looking at it.
12170Sstevel@tonic-gate 	 */
12180Sstevel@tonic-gate 	mutex_enter(&cp->cpu_cpc_ctxlock);
12190Sstevel@tonic-gate 
12200Sstevel@tonic-gate 	if ((cp->cpu_cpc_ctx == NULL) ||
12210Sstevel@tonic-gate 	    (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) {
12220Sstevel@tonic-gate 		mutex_exit(&cp->cpu_cpc_ctxlock);
12230Sstevel@tonic-gate 		return;
12240Sstevel@tonic-gate 	}
12250Sstevel@tonic-gate 
12260Sstevel@tonic-gate 	pcbe_ops->pcbe_allstop();
12270Sstevel@tonic-gate 	mutex_exit(&cp->cpu_cpc_ctxlock);
12280Sstevel@tonic-gate }
12290Sstevel@tonic-gate 
12300Sstevel@tonic-gate /*ARGSUSED*/
12310Sstevel@tonic-gate static void
12320Sstevel@tonic-gate kcpc_lwp_create(kthread_t *t, kthread_t *ct)
12330Sstevel@tonic-gate {
12340Sstevel@tonic-gate 	kcpc_ctx_t	*ctx = t->t_cpc_ctx, *cctx;
12350Sstevel@tonic-gate 	int		i;
12360Sstevel@tonic-gate 
12370Sstevel@tonic-gate 	if (ctx == NULL || (ctx->kc_flags & KCPC_CTX_LWPINHERIT) == 0)
12380Sstevel@tonic-gate 		return;
12390Sstevel@tonic-gate 
12400Sstevel@tonic-gate 	rw_enter(&kcpc_cpuctx_lock, RW_READER);
12410Sstevel@tonic-gate 	if (ctx->kc_flags & KCPC_CTX_INVALID) {
12420Sstevel@tonic-gate 		rw_exit(&kcpc_cpuctx_lock);
12430Sstevel@tonic-gate 		return;
12440Sstevel@tonic-gate 	}
12450Sstevel@tonic-gate 	cctx = kcpc_ctx_alloc();
12460Sstevel@tonic-gate 	kcpc_ctx_clone(ctx, cctx);
12470Sstevel@tonic-gate 	rw_exit(&kcpc_cpuctx_lock);
12480Sstevel@tonic-gate 
12493732Sae112802 	/*
12503732Sae112802 	 * Copy the parent context's kc_flags field, but don't overwrite
12513732Sae112802 	 * the child's in case it was modified during kcpc_ctx_clone.
12523732Sae112802 	 */
12533732Sae112802 	cctx->kc_flags |= ctx->kc_flags;
12540Sstevel@tonic-gate 	cctx->kc_thread = ct;
12550Sstevel@tonic-gate 	cctx->kc_cpuid = -1;
12560Sstevel@tonic-gate 	ct->t_cpc_set = cctx->kc_set;
12570Sstevel@tonic-gate 	ct->t_cpc_ctx = cctx;
12580Sstevel@tonic-gate 
12590Sstevel@tonic-gate 	if (cctx->kc_flags & KCPC_CTX_SIGOVF) {
12600Sstevel@tonic-gate 		kcpc_set_t *ks = cctx->kc_set;
12610Sstevel@tonic-gate 		/*
12620Sstevel@tonic-gate 		 * Our contract with the user requires us to immediately send an
12630Sstevel@tonic-gate 		 * overflow signal to all children if we have the LWPINHERIT
12640Sstevel@tonic-gate 		 * and SIGOVF flags set. In addition, all counters should be
12650Sstevel@tonic-gate 		 * set to UINT64_MAX, and their pic's overflow flag turned on
12660Sstevel@tonic-gate 		 * so that our trap() processing knows to send a signal.
12670Sstevel@tonic-gate 		 */
12680Sstevel@tonic-gate 		atomic_or_uint(&cctx->kc_flags, KCPC_CTX_FREEZE);
12690Sstevel@tonic-gate 		for (i = 0; i < ks->ks_nreqs; i++) {
12700Sstevel@tonic-gate 			kcpc_request_t *kr = &ks->ks_req[i];
12710Sstevel@tonic-gate 
12720Sstevel@tonic-gate 			if (kr->kr_flags & CPC_OVF_NOTIFY_EMT) {
12730Sstevel@tonic-gate 				*(kr->kr_data) = UINT64_MAX;
12740Sstevel@tonic-gate 				kr->kr_picp->kp_flags |= KCPC_PIC_OVERFLOWED;
12750Sstevel@tonic-gate 			}
12760Sstevel@tonic-gate 		}
12770Sstevel@tonic-gate 		ttolwp(ct)->lwp_pcb.pcb_flags |= CPC_OVERFLOW;
12780Sstevel@tonic-gate 		aston(ct);
12790Sstevel@tonic-gate 	}
12800Sstevel@tonic-gate 
12810Sstevel@tonic-gate 	installctx(ct, cctx, kcpc_save, kcpc_restore,
12820Sstevel@tonic-gate 	    NULL, kcpc_lwp_create, NULL, kcpc_free);
12830Sstevel@tonic-gate }
12840Sstevel@tonic-gate 
12850Sstevel@tonic-gate /*
12860Sstevel@tonic-gate  * Counter Stoppage Theory
12870Sstevel@tonic-gate  *
12880Sstevel@tonic-gate  * The counters may need to be stopped properly at the following occasions:
12890Sstevel@tonic-gate  *
12900Sstevel@tonic-gate  * 1) An LWP exits.
12910Sstevel@tonic-gate  * 2) A thread exits.
12920Sstevel@tonic-gate  * 3) An LWP performs an exec().
12930Sstevel@tonic-gate  * 4) A bound set is unbound.
12940Sstevel@tonic-gate  *
12950Sstevel@tonic-gate  * In addition to stopping the counters, the CPC context (a kcpc_ctx_t) may need
12960Sstevel@tonic-gate  * to be freed as well.
12970Sstevel@tonic-gate  *
12980Sstevel@tonic-gate  * Case 1: kcpc_passivate(), called via lwp_exit(), stops the counters. Later on
12990Sstevel@tonic-gate  * when the thread is freed, kcpc_free(), called by freectx(), frees the
13000Sstevel@tonic-gate  * context.
13010Sstevel@tonic-gate  *
13020Sstevel@tonic-gate  * Case 2: same as case 1 except kcpc_passivate is called from thread_exit().
13030Sstevel@tonic-gate  *
13040Sstevel@tonic-gate  * Case 3: kcpc_free(), called via freectx() via exec(), recognizes that it has
13050Sstevel@tonic-gate  * been called from exec. It stops the counters _and_ frees the context.
13060Sstevel@tonic-gate  *
13070Sstevel@tonic-gate  * Case 4: kcpc_unbind() stops the hardware _and_ frees the context.
13080Sstevel@tonic-gate  *
13090Sstevel@tonic-gate  * CPU-bound counters are always stopped via kcpc_unbind().
13100Sstevel@tonic-gate  */
13110Sstevel@tonic-gate 
13120Sstevel@tonic-gate /*
13130Sstevel@tonic-gate  * We're being called to delete the context; we ensure that all associated data
13140Sstevel@tonic-gate  * structures are freed, and that the hardware is passivated if this is an exec.
13150Sstevel@tonic-gate  */
13160Sstevel@tonic-gate 
13170Sstevel@tonic-gate /*ARGSUSED*/
13180Sstevel@tonic-gate static void
13190Sstevel@tonic-gate kcpc_free(kcpc_ctx_t *ctx, int isexec)
13200Sstevel@tonic-gate {
13210Sstevel@tonic-gate 	int		i;
13220Sstevel@tonic-gate 	kcpc_set_t	*set = ctx->kc_set;
13230Sstevel@tonic-gate 
13240Sstevel@tonic-gate 	ASSERT(set != NULL);
13250Sstevel@tonic-gate 
13266275Strevtom 	/*
13276275Strevtom 	 * Wait for kcpc_restore() to finish before we tear things down.
13286275Strevtom 	 */
13296275Strevtom 	mutex_enter(&ctx->kc_lock);
13306275Strevtom 	while (ctx->kc_flags & KCPC_CTX_RESTORE)
13316275Strevtom 		cv_wait(&ctx->kc_condv, &ctx->kc_lock);
13326275Strevtom 	ctx->kc_flags |= KCPC_CTX_INVALID;
13336275Strevtom 	mutex_exit(&ctx->kc_lock);
13340Sstevel@tonic-gate 
13350Sstevel@tonic-gate 	if (isexec) {
13360Sstevel@tonic-gate 		/*
13370Sstevel@tonic-gate 		 * This thread is execing, and after the exec it should not have
13380Sstevel@tonic-gate 		 * any performance counter context. Stop the counters properly
13390Sstevel@tonic-gate 		 * here so the system isn't surprised by an overflow interrupt
13400Sstevel@tonic-gate 		 * later.
13410Sstevel@tonic-gate 		 */
13420Sstevel@tonic-gate 		if (ctx->kc_cpuid != -1) {
13430Sstevel@tonic-gate 			cpu_t *cp;
13440Sstevel@tonic-gate 			/*
13450Sstevel@tonic-gate 			 * CPU-bound context; stop the appropriate CPU's ctrs.
13460Sstevel@tonic-gate 			 * Hold cpu_lock while examining the CPU to ensure it
13470Sstevel@tonic-gate 			 * doesn't go away.
13480Sstevel@tonic-gate 			 */
13490Sstevel@tonic-gate 			mutex_enter(&cpu_lock);
13500Sstevel@tonic-gate 			cp = cpu_get(ctx->kc_cpuid);
13510Sstevel@tonic-gate 			/*
13520Sstevel@tonic-gate 			 * The CPU could have been DR'd out, so only stop the
13530Sstevel@tonic-gate 			 * CPU and clear its context pointer if the CPU still
13540Sstevel@tonic-gate 			 * exists.
13550Sstevel@tonic-gate 			 */
13560Sstevel@tonic-gate 			if (cp != NULL) {
13570Sstevel@tonic-gate 				mutex_enter(&cp->cpu_cpc_ctxlock);
13580Sstevel@tonic-gate 				kcpc_stop_hw(ctx);
13590Sstevel@tonic-gate 				cp->cpu_cpc_ctx = NULL;
13600Sstevel@tonic-gate 				mutex_exit(&cp->cpu_cpc_ctxlock);
13610Sstevel@tonic-gate 			}
13620Sstevel@tonic-gate 			mutex_exit(&cpu_lock);
13630Sstevel@tonic-gate 			ASSERT(curthread->t_cpc_ctx == NULL);
13640Sstevel@tonic-gate 		} else {
13650Sstevel@tonic-gate 			/*
13660Sstevel@tonic-gate 			 * Thread-bound context; stop _this_ CPU's counters.
13670Sstevel@tonic-gate 			 */
13680Sstevel@tonic-gate 			kpreempt_disable();
13690Sstevel@tonic-gate 			pcbe_ops->pcbe_allstop();
13700Sstevel@tonic-gate 			atomic_or_uint(&ctx->kc_flags,
13710Sstevel@tonic-gate 			    KCPC_CTX_INVALID_STOPPED);
13720Sstevel@tonic-gate 			kpreempt_enable();
13730Sstevel@tonic-gate 			curthread->t_cpc_ctx = NULL;
13740Sstevel@tonic-gate 		}
13750Sstevel@tonic-gate 
13760Sstevel@tonic-gate 		/*
13770Sstevel@tonic-gate 		 * Since we are being called from an exec and we know that
13780Sstevel@tonic-gate 		 * exec is not permitted via the agent thread, we should clean
13790Sstevel@tonic-gate 		 * up this thread's CPC state completely, and not leave dangling
13800Sstevel@tonic-gate 		 * CPC pointers behind.
13810Sstevel@tonic-gate 		 */
13820Sstevel@tonic-gate 		ASSERT(ctx->kc_thread == curthread);
13830Sstevel@tonic-gate 		curthread->t_cpc_set = NULL;
13840Sstevel@tonic-gate 	}
13850Sstevel@tonic-gate 
13860Sstevel@tonic-gate 	/*
13870Sstevel@tonic-gate 	 * Walk through each request in this context's set and free the PCBE's
13880Sstevel@tonic-gate 	 * configuration if it exists.
13890Sstevel@tonic-gate 	 */
13900Sstevel@tonic-gate 	for (i = 0; i < set->ks_nreqs; i++) {
13910Sstevel@tonic-gate 		if (set->ks_req[i].kr_config != NULL)
13920Sstevel@tonic-gate 			pcbe_ops->pcbe_free(set->ks_req[i].kr_config);
13930Sstevel@tonic-gate 	}
13940Sstevel@tonic-gate 
13950Sstevel@tonic-gate 	kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
13960Sstevel@tonic-gate 	kcpc_ctx_free(ctx);
13970Sstevel@tonic-gate 	kcpc_free_set(set);
13980Sstevel@tonic-gate }
13990Sstevel@tonic-gate 
14000Sstevel@tonic-gate /*
14010Sstevel@tonic-gate  * Free the memory associated with a request set.
14020Sstevel@tonic-gate  */
14030Sstevel@tonic-gate void
14040Sstevel@tonic-gate kcpc_free_set(kcpc_set_t *set)
14050Sstevel@tonic-gate {
14060Sstevel@tonic-gate 	int		i;
14070Sstevel@tonic-gate 	kcpc_request_t	*req;
14080Sstevel@tonic-gate 
14090Sstevel@tonic-gate 	ASSERT(set->ks_req != NULL);
14100Sstevel@tonic-gate 
14110Sstevel@tonic-gate 	for (i = 0; i < set->ks_nreqs; i++) {
14120Sstevel@tonic-gate 		req = &set->ks_req[i];
14130Sstevel@tonic-gate 
14140Sstevel@tonic-gate 		if (req->kr_nattrs != 0) {
14150Sstevel@tonic-gate 			kmem_free(req->kr_attr,
14160Sstevel@tonic-gate 			    req->kr_nattrs * sizeof (kcpc_attr_t));
14170Sstevel@tonic-gate 		}
14180Sstevel@tonic-gate 	}
14190Sstevel@tonic-gate 
14200Sstevel@tonic-gate 	kmem_free(set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs);
14216275Strevtom 	cv_destroy(&set->ks_condv);
14226275Strevtom 	mutex_destroy(&set->ks_lock);
14230Sstevel@tonic-gate 	kmem_free(set, sizeof (kcpc_set_t));
14240Sstevel@tonic-gate }
14250Sstevel@tonic-gate 
14260Sstevel@tonic-gate /*
14270Sstevel@tonic-gate  * Grab every existing context and mark it as invalid.
14280Sstevel@tonic-gate  */
14290Sstevel@tonic-gate void
14300Sstevel@tonic-gate kcpc_invalidate_all(void)
14310Sstevel@tonic-gate {
14320Sstevel@tonic-gate 	kcpc_ctx_t *ctx;
14330Sstevel@tonic-gate 	long hash;
14340Sstevel@tonic-gate 
14350Sstevel@tonic-gate 	for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) {
14360Sstevel@tonic-gate 		mutex_enter(&kcpc_ctx_llock[hash]);
14370Sstevel@tonic-gate 		for (ctx = kcpc_ctx_list[hash]; ctx; ctx = ctx->kc_next)
14380Sstevel@tonic-gate 			atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID);
14390Sstevel@tonic-gate 		mutex_exit(&kcpc_ctx_llock[hash]);
14400Sstevel@tonic-gate 	}
14410Sstevel@tonic-gate }
14420Sstevel@tonic-gate 
14430Sstevel@tonic-gate /*
14443732Sae112802  * Interface for PCBEs to signal that an existing configuration has suddenly
14453732Sae112802  * become invalid.
14463732Sae112802  */
14473732Sae112802 void
14483732Sae112802 kcpc_invalidate_config(void *token)
14493732Sae112802 {
14503732Sae112802 	kcpc_ctx_t *ctx = token;
14513732Sae112802 
14523732Sae112802 	ASSERT(ctx != NULL);
14533732Sae112802 
14543732Sae112802 	atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID);
14553732Sae112802 }
14563732Sae112802 
14573732Sae112802 /*
14580Sstevel@tonic-gate  * Called from lwp_exit() and thread_exit()
14590Sstevel@tonic-gate  */
14600Sstevel@tonic-gate void
14610Sstevel@tonic-gate kcpc_passivate(void)
14620Sstevel@tonic-gate {
14630Sstevel@tonic-gate 	kcpc_ctx_t *ctx = curthread->t_cpc_ctx;
14640Sstevel@tonic-gate 	kcpc_set_t *set = curthread->t_cpc_set;
14650Sstevel@tonic-gate 
14660Sstevel@tonic-gate 	if (set == NULL)
14670Sstevel@tonic-gate 		return;
14680Sstevel@tonic-gate 
14690Sstevel@tonic-gate 	/*
14700Sstevel@tonic-gate 	 * We're cleaning up after this thread; ensure there are no dangling
14710Sstevel@tonic-gate 	 * CPC pointers left behind. The context and set will be freed by
14720Sstevel@tonic-gate 	 * freectx() in the case of an LWP-bound set, and by kcpc_unbind() in
14730Sstevel@tonic-gate 	 * the case of a CPU-bound set.
14740Sstevel@tonic-gate 	 */
14750Sstevel@tonic-gate 	curthread->t_cpc_ctx = NULL;
14760Sstevel@tonic-gate 
14770Sstevel@tonic-gate 	if (ctx == NULL) {
14780Sstevel@tonic-gate 		/*
14790Sstevel@tonic-gate 		 * This thread has a set but no context; it must be a CPU-bound
14800Sstevel@tonic-gate 		 * set. The hardware will be stopped via kcpc_unbind() when the
14810Sstevel@tonic-gate 		 * process exits and closes its file descriptors with
14820Sstevel@tonic-gate 		 * kcpc_close(). Our only job here is to clean up this thread's
14830Sstevel@tonic-gate 		 * state; the set will be freed with the unbind().
14840Sstevel@tonic-gate 		 */
14850Sstevel@tonic-gate 		(void) kcpc_unbind(set);
14860Sstevel@tonic-gate 		/*
14870Sstevel@tonic-gate 		 * Unbinding a set belonging to the current thread should clear
14880Sstevel@tonic-gate 		 * its set pointer.
14890Sstevel@tonic-gate 		 */
14900Sstevel@tonic-gate 		ASSERT(curthread->t_cpc_set == NULL);
14910Sstevel@tonic-gate 		return;
14920Sstevel@tonic-gate 	}
14930Sstevel@tonic-gate 
14940Sstevel@tonic-gate 	curthread->t_cpc_set = NULL;
14950Sstevel@tonic-gate 
14960Sstevel@tonic-gate 	/*
14970Sstevel@tonic-gate 	 * This thread/LWP is exiting but context switches will continue to
14980Sstevel@tonic-gate 	 * happen for a bit as the exit proceeds.  Kernel preemption must be
14990Sstevel@tonic-gate 	 * disabled here to prevent a race between checking or setting the
15000Sstevel@tonic-gate 	 * INVALID_STOPPED flag here and kcpc_restore() setting the flag during
15010Sstevel@tonic-gate 	 * a context switch.
15020Sstevel@tonic-gate 	 */
15030Sstevel@tonic-gate 
15040Sstevel@tonic-gate 	kpreempt_disable();
15050Sstevel@tonic-gate 	if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) {
15060Sstevel@tonic-gate 		pcbe_ops->pcbe_allstop();
15070Sstevel@tonic-gate 		atomic_or_uint(&ctx->kc_flags,
15080Sstevel@tonic-gate 		    KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED);
15090Sstevel@tonic-gate 	}
15100Sstevel@tonic-gate 	kpreempt_enable();
15110Sstevel@tonic-gate }
15120Sstevel@tonic-gate 
15130Sstevel@tonic-gate /*
15140Sstevel@tonic-gate  * Assign the requests in the given set to the PICs in the context.
15150Sstevel@tonic-gate  * Returns 0 if successful, -1 on failure.
15160Sstevel@tonic-gate  */
15170Sstevel@tonic-gate /*ARGSUSED*/
1518*8803SJonathan.Haslam@Sun.COM int
15190Sstevel@tonic-gate kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx)
15200Sstevel@tonic-gate {
15210Sstevel@tonic-gate 	int i;
15220Sstevel@tonic-gate 	int *picnum_save;
15230Sstevel@tonic-gate 
15240Sstevel@tonic-gate 	ASSERT(set->ks_nreqs <= cpc_ncounters);
15250Sstevel@tonic-gate 
15260Sstevel@tonic-gate 	/*
15270Sstevel@tonic-gate 	 * Provide kcpc_tryassign() with scratch space to avoid doing an
15280Sstevel@tonic-gate 	 * alloc/free with every invocation.
15290Sstevel@tonic-gate 	 */
15300Sstevel@tonic-gate 	picnum_save = kmem_alloc(set->ks_nreqs * sizeof (int), KM_SLEEP);
15310Sstevel@tonic-gate 	/*
15320Sstevel@tonic-gate 	 * kcpc_tryassign() blindly walks through each request in the set,
15330Sstevel@tonic-gate 	 * seeing if a counter can count its event. If yes, it assigns that
15340Sstevel@tonic-gate 	 * counter. However, that counter may have been the only capable counter
15350Sstevel@tonic-gate 	 * for _another_ request's event. The solution is to try every possible
15360Sstevel@tonic-gate 	 * request first. Note that this does not cover all solutions, as
15370Sstevel@tonic-gate 	 * that would require all unique orderings of requests, an n^n operation
15380Sstevel@tonic-gate 	 * which would be unacceptable for architectures with many counters.
15390Sstevel@tonic-gate 	 */
15400Sstevel@tonic-gate 	for (i = 0; i < set->ks_nreqs; i++)
15410Sstevel@tonic-gate 		if (kcpc_tryassign(set, i, picnum_save) == 0)
15420Sstevel@tonic-gate 			break;
15430Sstevel@tonic-gate 
15440Sstevel@tonic-gate 	kmem_free(picnum_save, set->ks_nreqs * sizeof (int));
15450Sstevel@tonic-gate 	if (i == set->ks_nreqs)
15460Sstevel@tonic-gate 		return (-1);
15470Sstevel@tonic-gate 	return (0);
15480Sstevel@tonic-gate }
15490Sstevel@tonic-gate 
15500Sstevel@tonic-gate static int
15510Sstevel@tonic-gate kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch)
15520Sstevel@tonic-gate {
15530Sstevel@tonic-gate 	int		i;
15540Sstevel@tonic-gate 	int		j;
15550Sstevel@tonic-gate 	uint64_t	bitmap = 0, resmap = 0;
15560Sstevel@tonic-gate 	uint64_t	ctrmap;
15570Sstevel@tonic-gate 
15580Sstevel@tonic-gate 	/*
15590Sstevel@tonic-gate 	 * We are attempting to assign the reqs to pics, but we may fail. If we
15600Sstevel@tonic-gate 	 * fail, we need to restore the state of the requests to what it was
15610Sstevel@tonic-gate 	 * when we found it, as some reqs may have been explicitly assigned to
15620Sstevel@tonic-gate 	 * a specific PIC beforehand. We do this by snapshotting the assignments
15630Sstevel@tonic-gate 	 * now and restoring from it later if we fail.
15640Sstevel@tonic-gate 	 *
15650Sstevel@tonic-gate 	 * Also we note here which counters have already been claimed by
15660Sstevel@tonic-gate 	 * requests with explicit counter assignments.
15670Sstevel@tonic-gate 	 */
15680Sstevel@tonic-gate 	for (i = 0; i < set->ks_nreqs; i++) {
15690Sstevel@tonic-gate 		scratch[i] = set->ks_req[i].kr_picnum;
15700Sstevel@tonic-gate 		if (set->ks_req[i].kr_picnum != -1)
15710Sstevel@tonic-gate 			resmap |= (1 << set->ks_req[i].kr_picnum);
15720Sstevel@tonic-gate 	}
15730Sstevel@tonic-gate 
15740Sstevel@tonic-gate 	/*
15750Sstevel@tonic-gate 	 * Walk through requests assigning them to the first PIC that is
15760Sstevel@tonic-gate 	 * capable.
15770Sstevel@tonic-gate 	 */
15780Sstevel@tonic-gate 	i = starting_req;
15790Sstevel@tonic-gate 	do {
15800Sstevel@tonic-gate 		if (set->ks_req[i].kr_picnum != -1) {
15810Sstevel@tonic-gate 			ASSERT((bitmap & (1 << set->ks_req[i].kr_picnum)) == 0);
15820Sstevel@tonic-gate 			bitmap |= (1 << set->ks_req[i].kr_picnum);
15830Sstevel@tonic-gate 			if (++i == set->ks_nreqs)
15840Sstevel@tonic-gate 				i = 0;
15850Sstevel@tonic-gate 			continue;
15860Sstevel@tonic-gate 		}
15870Sstevel@tonic-gate 
15880Sstevel@tonic-gate 		ctrmap = pcbe_ops->pcbe_event_coverage(set->ks_req[i].kr_event);
15890Sstevel@tonic-gate 		for (j = 0; j < cpc_ncounters; j++) {
15900Sstevel@tonic-gate 			if (ctrmap & (1 << j) && (bitmap & (1 << j)) == 0 &&
15910Sstevel@tonic-gate 			    (resmap & (1 << j)) == 0) {
15920Sstevel@tonic-gate 				/*
15930Sstevel@tonic-gate 				 * We can assign this counter because:
15940Sstevel@tonic-gate 				 *
15950Sstevel@tonic-gate 				 * 1. It can count the event (ctrmap)
15960Sstevel@tonic-gate 				 * 2. It hasn't been assigned yet (bitmap)
15970Sstevel@tonic-gate 				 * 3. It wasn't reserved by a request (resmap)
15980Sstevel@tonic-gate 				 */
15990Sstevel@tonic-gate 				bitmap |= (1 << j);
16000Sstevel@tonic-gate 				break;
16010Sstevel@tonic-gate 			}
16020Sstevel@tonic-gate 		}
16030Sstevel@tonic-gate 		if (j == cpc_ncounters) {
16040Sstevel@tonic-gate 			for (i = 0; i < set->ks_nreqs; i++)
16050Sstevel@tonic-gate 				set->ks_req[i].kr_picnum = scratch[i];
16060Sstevel@tonic-gate 			return (-1);
16070Sstevel@tonic-gate 		}
16080Sstevel@tonic-gate 		set->ks_req[i].kr_picnum = j;
16090Sstevel@tonic-gate 
16100Sstevel@tonic-gate 		if (++i == set->ks_nreqs)
16110Sstevel@tonic-gate 			i = 0;
16120Sstevel@tonic-gate 	} while (i != starting_req);
16130Sstevel@tonic-gate 
16140Sstevel@tonic-gate 	return (0);
16150Sstevel@tonic-gate }
16160Sstevel@tonic-gate 
16170Sstevel@tonic-gate kcpc_set_t *
16180Sstevel@tonic-gate kcpc_dup_set(kcpc_set_t *set)
16190Sstevel@tonic-gate {
16200Sstevel@tonic-gate 	kcpc_set_t	*new;
16210Sstevel@tonic-gate 	int		i;
16220Sstevel@tonic-gate 	int		j;
16230Sstevel@tonic-gate 
16246275Strevtom 	new = kmem_zalloc(sizeof (*new), KM_SLEEP);
16256275Strevtom 	new->ks_state &= ~KCPC_SET_BOUND;
16260Sstevel@tonic-gate 	new->ks_flags = set->ks_flags;
16270Sstevel@tonic-gate 	new->ks_nreqs = set->ks_nreqs;
16280Sstevel@tonic-gate 	new->ks_req = kmem_alloc(set->ks_nreqs * sizeof (kcpc_request_t),
16290Sstevel@tonic-gate 	    KM_SLEEP);
16300Sstevel@tonic-gate 	new->ks_data = NULL;
16310Sstevel@tonic-gate 	new->ks_ctx = NULL;
16320Sstevel@tonic-gate 
16330Sstevel@tonic-gate 	for (i = 0; i < new->ks_nreqs; i++) {
16340Sstevel@tonic-gate 		new->ks_req[i].kr_config = NULL;
16350Sstevel@tonic-gate 		new->ks_req[i].kr_index = set->ks_req[i].kr_index;
16360Sstevel@tonic-gate 		new->ks_req[i].kr_picnum = set->ks_req[i].kr_picnum;
16370Sstevel@tonic-gate 		new->ks_req[i].kr_picp = NULL;
16380Sstevel@tonic-gate 		new->ks_req[i].kr_data = NULL;
16390Sstevel@tonic-gate 		(void) strncpy(new->ks_req[i].kr_event, set->ks_req[i].kr_event,
16400Sstevel@tonic-gate 		    CPC_MAX_EVENT_LEN);
16410Sstevel@tonic-gate 		new->ks_req[i].kr_preset = set->ks_req[i].kr_preset;
16420Sstevel@tonic-gate 		new->ks_req[i].kr_flags = set->ks_req[i].kr_flags;
16430Sstevel@tonic-gate 		new->ks_req[i].kr_nattrs = set->ks_req[i].kr_nattrs;
16440Sstevel@tonic-gate 		new->ks_req[i].kr_attr = kmem_alloc(new->ks_req[i].kr_nattrs *
16450Sstevel@tonic-gate 		    sizeof (kcpc_attr_t), KM_SLEEP);
16460Sstevel@tonic-gate 		for (j = 0; j < new->ks_req[i].kr_nattrs; j++) {
16470Sstevel@tonic-gate 			new->ks_req[i].kr_attr[j].ka_val =
16480Sstevel@tonic-gate 			    set->ks_req[i].kr_attr[j].ka_val;
16490Sstevel@tonic-gate 			(void) strncpy(new->ks_req[i].kr_attr[j].ka_name,
16500Sstevel@tonic-gate 			    set->ks_req[i].kr_attr[j].ka_name,
16510Sstevel@tonic-gate 			    CPC_MAX_ATTR_LEN);
16520Sstevel@tonic-gate 		}
16530Sstevel@tonic-gate 	}
16540Sstevel@tonic-gate 
16550Sstevel@tonic-gate 	return (new);
16560Sstevel@tonic-gate }
16570Sstevel@tonic-gate 
16580Sstevel@tonic-gate int
16590Sstevel@tonic-gate kcpc_allow_nonpriv(void *token)
16600Sstevel@tonic-gate {
16610Sstevel@tonic-gate 	return (((kcpc_ctx_t *)token)->kc_flags & KCPC_CTX_NONPRIV);
16620Sstevel@tonic-gate }
16630Sstevel@tonic-gate 
16640Sstevel@tonic-gate void
16650Sstevel@tonic-gate kcpc_invalidate(kthread_t *t)
16660Sstevel@tonic-gate {
16670Sstevel@tonic-gate 	kcpc_ctx_t *ctx = t->t_cpc_ctx;
16680Sstevel@tonic-gate 
16690Sstevel@tonic-gate 	if (ctx != NULL)
16700Sstevel@tonic-gate 		atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID);
16710Sstevel@tonic-gate }
16720Sstevel@tonic-gate 
16730Sstevel@tonic-gate /*
16740Sstevel@tonic-gate  * Given a PCBE ID, attempt to load a matching PCBE module. The strings given
16750Sstevel@tonic-gate  * are used to construct PCBE names, starting with the most specific,
16760Sstevel@tonic-gate  * "pcbe.first.second.third.fourth" and ending with the least specific,
16770Sstevel@tonic-gate  * "pcbe.first".
16780Sstevel@tonic-gate  *
16790Sstevel@tonic-gate  * Returns 0 if a PCBE was successfully loaded and -1 upon error.
16800Sstevel@tonic-gate  */
16810Sstevel@tonic-gate int
16820Sstevel@tonic-gate kcpc_pcbe_tryload(const char *prefix, uint_t first, uint_t second, uint_t third)
16830Sstevel@tonic-gate {
16841414Scindi 	uint_t s[3];
16850Sstevel@tonic-gate 
16861414Scindi 	s[0] = first;
16871414Scindi 	s[1] = second;
16881414Scindi 	s[2] = third;
16890Sstevel@tonic-gate 
16901414Scindi 	return (modload_qualified("pcbe",
16915254Sgavinm 	    "pcbe", prefix, ".", s, 3, NULL) < 0 ? -1 : 0);
16920Sstevel@tonic-gate }
1693*8803SJonathan.Haslam@Sun.COM 
1694*8803SJonathan.Haslam@Sun.COM char *
1695*8803SJonathan.Haslam@Sun.COM kcpc_list_attrs(void)
1696*8803SJonathan.Haslam@Sun.COM {
1697*8803SJonathan.Haslam@Sun.COM 	ASSERT(pcbe_ops != NULL);
1698*8803SJonathan.Haslam@Sun.COM 
1699*8803SJonathan.Haslam@Sun.COM 	return (pcbe_ops->pcbe_list_attrs());
1700*8803SJonathan.Haslam@Sun.COM }
1701*8803SJonathan.Haslam@Sun.COM 
1702*8803SJonathan.Haslam@Sun.COM char *
1703*8803SJonathan.Haslam@Sun.COM kcpc_list_events(uint_t pic)
1704*8803SJonathan.Haslam@Sun.COM {
1705*8803SJonathan.Haslam@Sun.COM 	ASSERT(pcbe_ops != NULL);
1706*8803SJonathan.Haslam@Sun.COM 
1707*8803SJonathan.Haslam@Sun.COM 	return (pcbe_ops->pcbe_list_events(pic));
1708*8803SJonathan.Haslam@Sun.COM }
1709*8803SJonathan.Haslam@Sun.COM 
1710*8803SJonathan.Haslam@Sun.COM uint_t
1711*8803SJonathan.Haslam@Sun.COM kcpc_pcbe_capabilities(void)
1712*8803SJonathan.Haslam@Sun.COM {
1713*8803SJonathan.Haslam@Sun.COM 	ASSERT(pcbe_ops != NULL);
1714*8803SJonathan.Haslam@Sun.COM 
1715*8803SJonathan.Haslam@Sun.COM 	return (pcbe_ops->pcbe_caps);
1716*8803SJonathan.Haslam@Sun.COM }
1717*8803SJonathan.Haslam@Sun.COM 
1718*8803SJonathan.Haslam@Sun.COM int
1719*8803SJonathan.Haslam@Sun.COM kcpc_pcbe_loaded(void)
1720*8803SJonathan.Haslam@Sun.COM {
1721*8803SJonathan.Haslam@Sun.COM 	return (pcbe_ops == NULL ? -1 : 0);
1722*8803SJonathan.Haslam@Sun.COM }
1723