xref: /onnv-gate/usr/src/uts/common/io/cpc.c (revision 7656:2621e50fdf4a)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
56275Strevtom  * Common Development and Distribution License (the "License").
66275Strevtom  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
226275Strevtom  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate 
270Sstevel@tonic-gate /*
280Sstevel@tonic-gate  * CPU Performance Counter system calls and device driver.
290Sstevel@tonic-gate  *
300Sstevel@tonic-gate  * This module uses a combination of thread context operators, and
310Sstevel@tonic-gate  * thread-specific data to export CPU performance counters
320Sstevel@tonic-gate  * via both a system call and a driver interface.
330Sstevel@tonic-gate  *
340Sstevel@tonic-gate  * There are three access methods exported - the 'shared' device
350Sstevel@tonic-gate  * and the 'private' and 'agent' variants of the system call.
360Sstevel@tonic-gate  *
370Sstevel@tonic-gate  * The shared device treats the performance counter registers as
380Sstevel@tonic-gate  * a processor metric, regardless of the work scheduled on them.
390Sstevel@tonic-gate  * The private system call treats the performance counter registers
400Sstevel@tonic-gate  * as a property of a single lwp.  This is achieved by using the
410Sstevel@tonic-gate  * thread context operators to virtualize the contents of the
420Sstevel@tonic-gate  * performance counter registers between lwps.
430Sstevel@tonic-gate  *
440Sstevel@tonic-gate  * The agent method is like the private method, except that it must
450Sstevel@tonic-gate  * be accessed via /proc's agent lwp to allow the counter context of
460Sstevel@tonic-gate  * other threads to be examined safely.
470Sstevel@tonic-gate  *
480Sstevel@tonic-gate  * The shared usage fundamentally conflicts with the agent and private usage;
490Sstevel@tonic-gate  * almost all of the complexity of the module is needed to allow these two
500Sstevel@tonic-gate  * models to co-exist in a reasonable way.
510Sstevel@tonic-gate  */
520Sstevel@tonic-gate 
530Sstevel@tonic-gate #include <sys/types.h>
540Sstevel@tonic-gate #include <sys/file.h>
550Sstevel@tonic-gate #include <sys/errno.h>
560Sstevel@tonic-gate #include <sys/open.h>
570Sstevel@tonic-gate #include <sys/cred.h>
580Sstevel@tonic-gate #include <sys/conf.h>
590Sstevel@tonic-gate #include <sys/stat.h>
600Sstevel@tonic-gate #include <sys/processor.h>
610Sstevel@tonic-gate #include <sys/cpuvar.h>
620Sstevel@tonic-gate #include <sys/disp.h>
630Sstevel@tonic-gate #include <sys/kmem.h>
640Sstevel@tonic-gate #include <sys/modctl.h>
650Sstevel@tonic-gate #include <sys/ddi.h>
660Sstevel@tonic-gate #include <sys/sunddi.h>
670Sstevel@tonic-gate #include <sys/nvpair.h>
680Sstevel@tonic-gate #include <sys/policy.h>
690Sstevel@tonic-gate #include <sys/machsystm.h>
700Sstevel@tonic-gate #include <sys/cpc_impl.h>
710Sstevel@tonic-gate #include <sys/cpc_pcbe.h>
720Sstevel@tonic-gate #include <sys/kcpc.h>
730Sstevel@tonic-gate 
740Sstevel@tonic-gate static int kcpc_copyin_set(kcpc_set_t **set, void *ubuf, size_t len);
750Sstevel@tonic-gate static int kcpc_verify_set(kcpc_set_t *set);
760Sstevel@tonic-gate static uint32_t kcpc_nvlist_npairs(nvlist_t *list);
770Sstevel@tonic-gate 
780Sstevel@tonic-gate /*
790Sstevel@tonic-gate  * Generic attributes supported regardless of processor.
800Sstevel@tonic-gate  */
810Sstevel@tonic-gate 
820Sstevel@tonic-gate #define	ATTRLIST "picnum"
830Sstevel@tonic-gate #define	SEPARATOR ","
840Sstevel@tonic-gate 
850Sstevel@tonic-gate /*
860Sstevel@tonic-gate  * System call to access CPU performance counters.
870Sstevel@tonic-gate  */
880Sstevel@tonic-gate static int
890Sstevel@tonic-gate cpc(int cmd, id_t lwpid, void *udata1, void *udata2, void *udata3)
900Sstevel@tonic-gate {
910Sstevel@tonic-gate 	kthread_t	*t;
920Sstevel@tonic-gate 	int		error;
930Sstevel@tonic-gate 	int		size;
940Sstevel@tonic-gate 	const char	*str;
950Sstevel@tonic-gate 	int		code;
960Sstevel@tonic-gate 
970Sstevel@tonic-gate 	/*
980Sstevel@tonic-gate 	 * This CPC syscall should only be loaded if it found a PCBE to use.
990Sstevel@tonic-gate 	 */
1000Sstevel@tonic-gate 	ASSERT(pcbe_ops != NULL);
1010Sstevel@tonic-gate 
1020Sstevel@tonic-gate 	if (curproc->p_agenttp == curthread) {
1030Sstevel@tonic-gate 		/*
1040Sstevel@tonic-gate 		 * Only if /proc is invoking this system call from
1050Sstevel@tonic-gate 		 * the agent thread do we allow the caller to examine
1060Sstevel@tonic-gate 		 * the contexts of other lwps in the process.  And
1070Sstevel@tonic-gate 		 * because we know we're the agent, we know we don't
1080Sstevel@tonic-gate 		 * have to grab p_lock because no-one else can change
1090Sstevel@tonic-gate 		 * the state of the process.
1100Sstevel@tonic-gate 		 */
1110Sstevel@tonic-gate 		if ((t = idtot(curproc, lwpid)) == NULL || t == curthread)
1120Sstevel@tonic-gate 			return (set_errno(ESRCH));
1130Sstevel@tonic-gate 		ASSERT(t->t_tid == lwpid && ttolwp(t) != NULL);
1140Sstevel@tonic-gate 	} else
1150Sstevel@tonic-gate 		t = curthread;
1160Sstevel@tonic-gate 
1170Sstevel@tonic-gate 	if (t->t_cpc_set == NULL && (cmd == CPC_SAMPLE || cmd == CPC_RELE))
1180Sstevel@tonic-gate 		return (set_errno(EINVAL));
1190Sstevel@tonic-gate 
1200Sstevel@tonic-gate 	switch (cmd) {
1210Sstevel@tonic-gate 	case CPC_BIND:
1220Sstevel@tonic-gate 		/*
1230Sstevel@tonic-gate 		 * udata1 = pointer to packed nvlist buffer
1240Sstevel@tonic-gate 		 * udata2 = size of packed nvlist buffer
1250Sstevel@tonic-gate 		 * udata3 = User addr to return error subcode in.
1260Sstevel@tonic-gate 		 */
1270Sstevel@tonic-gate 
1280Sstevel@tonic-gate 		rw_enter(&kcpc_cpuctx_lock, RW_READER);
1290Sstevel@tonic-gate 		if (kcpc_cpuctx) {
1300Sstevel@tonic-gate 			rw_exit(&kcpc_cpuctx_lock);
1310Sstevel@tonic-gate 			return (set_errno(EAGAIN));
1320Sstevel@tonic-gate 		}
1330Sstevel@tonic-gate 
1340Sstevel@tonic-gate 		if (kcpc_hw_lwp_hook() != 0) {
1350Sstevel@tonic-gate 			rw_exit(&kcpc_cpuctx_lock);
1360Sstevel@tonic-gate 			return (set_errno(EACCES));
1370Sstevel@tonic-gate 		}
1380Sstevel@tonic-gate 
1390Sstevel@tonic-gate 		/*
1400Sstevel@tonic-gate 		 * An LWP may only have one set bound to it at a time; if there
1410Sstevel@tonic-gate 		 * is a set bound to this LWP already, we unbind it here.
1420Sstevel@tonic-gate 		 */
1430Sstevel@tonic-gate 		if (t->t_cpc_set != NULL)
1440Sstevel@tonic-gate 			(void) kcpc_unbind(t->t_cpc_set);
1450Sstevel@tonic-gate 		ASSERT(t->t_cpc_set == NULL);
1460Sstevel@tonic-gate 
1470Sstevel@tonic-gate 		if ((error = kcpc_copyin_set(&t->t_cpc_set, udata1,
1480Sstevel@tonic-gate 		    (size_t)udata2)) != 0) {
1490Sstevel@tonic-gate 			rw_exit(&kcpc_cpuctx_lock);
1500Sstevel@tonic-gate 			return (set_errno(error));
1510Sstevel@tonic-gate 		}
1520Sstevel@tonic-gate 
1530Sstevel@tonic-gate 		if ((error = kcpc_verify_set(t->t_cpc_set)) != 0) {
1540Sstevel@tonic-gate 			rw_exit(&kcpc_cpuctx_lock);
1550Sstevel@tonic-gate 			kcpc_free_set(t->t_cpc_set);
1560Sstevel@tonic-gate 			t->t_cpc_set = NULL;
1570Sstevel@tonic-gate 			if (copyout(&error, udata3, sizeof (error)) == -1)
1580Sstevel@tonic-gate 				return (set_errno(EFAULT));
1590Sstevel@tonic-gate 			return (set_errno(EINVAL));
1600Sstevel@tonic-gate 		}
1610Sstevel@tonic-gate 
1620Sstevel@tonic-gate 		if ((error = kcpc_bind_thread(t->t_cpc_set, t, &code)) != 0) {
1630Sstevel@tonic-gate 			rw_exit(&kcpc_cpuctx_lock);
1640Sstevel@tonic-gate 			kcpc_free_set(t->t_cpc_set);
1650Sstevel@tonic-gate 			t->t_cpc_set = NULL;
1660Sstevel@tonic-gate 			/*
1670Sstevel@tonic-gate 			 * EINVAL and EACCES are the only errors with more
1680Sstevel@tonic-gate 			 * specific subcodes.
1690Sstevel@tonic-gate 			 */
1700Sstevel@tonic-gate 			if ((error == EINVAL || error == EACCES) &&
1710Sstevel@tonic-gate 			    copyout(&code, udata3, sizeof (code)) == -1)
1720Sstevel@tonic-gate 				return (set_errno(EFAULT));
1730Sstevel@tonic-gate 			return (set_errno(error));
1740Sstevel@tonic-gate 		}
1750Sstevel@tonic-gate 
1760Sstevel@tonic-gate 		rw_exit(&kcpc_cpuctx_lock);
1770Sstevel@tonic-gate 		return (0);
1780Sstevel@tonic-gate 	case CPC_SAMPLE:
1790Sstevel@tonic-gate 		/*
1800Sstevel@tonic-gate 		 * udata1 = pointer to user's buffer
1810Sstevel@tonic-gate 		 * udata2 = pointer to user's hrtime
1820Sstevel@tonic-gate 		 * udata3 = pointer to user's tick
1830Sstevel@tonic-gate 		 */
1840Sstevel@tonic-gate 		/*
1850Sstevel@tonic-gate 		 * We only allow thread-bound sets to be sampled via the
1860Sstevel@tonic-gate 		 * syscall, so if this set has a CPU-bound context, return an
1870Sstevel@tonic-gate 		 * error.
1880Sstevel@tonic-gate 		 */
1890Sstevel@tonic-gate 		if (t->t_cpc_set->ks_ctx->kc_cpuid != -1)
1900Sstevel@tonic-gate 			return (set_errno(EINVAL));
1910Sstevel@tonic-gate 		if ((error = kcpc_sample(t->t_cpc_set, udata1, udata2,
1920Sstevel@tonic-gate 		    udata3)) != 0)
1930Sstevel@tonic-gate 			return (set_errno(error));
1940Sstevel@tonic-gate 
1950Sstevel@tonic-gate 		return (0);
1960Sstevel@tonic-gate 	case CPC_PRESET:
1970Sstevel@tonic-gate 	case CPC_RESTART:
1980Sstevel@tonic-gate 		/*
1990Sstevel@tonic-gate 		 * These are valid only if this lwp has a bound set.
2000Sstevel@tonic-gate 		 */
2010Sstevel@tonic-gate 		if (t->t_cpc_set == NULL)
2020Sstevel@tonic-gate 			return (set_errno(EINVAL));
2030Sstevel@tonic-gate 		if (cmd == CPC_PRESET) {
2040Sstevel@tonic-gate 			/*
2050Sstevel@tonic-gate 			 * The preset is shipped up to us from userland in two
2060Sstevel@tonic-gate 			 * parts. This lets us handle 64-bit values from 32-bit
2070Sstevel@tonic-gate 			 * and 64-bit applications in the same manner.
2080Sstevel@tonic-gate 			 *
2090Sstevel@tonic-gate 			 * udata1 = index of request to preset
2100Sstevel@tonic-gate 			 * udata2 = new 64-bit preset (most sig. 32 bits)
2110Sstevel@tonic-gate 			 * udata3 = new 64-bit preset (least sig. 32 bits)
2120Sstevel@tonic-gate 			 */
2130Sstevel@tonic-gate 			if ((error = kcpc_preset(t->t_cpc_set, (intptr_t)udata1,
2140Sstevel@tonic-gate 			    ((uint64_t)(uintptr_t)udata2 << 32ULL) |
2150Sstevel@tonic-gate 			    (uint64_t)(uintptr_t)udata3)) != 0)
2160Sstevel@tonic-gate 				return (set_errno(error));
2170Sstevel@tonic-gate 		} else {
2180Sstevel@tonic-gate 			/*
2190Sstevel@tonic-gate 			 * udata[1-3] = unused
2200Sstevel@tonic-gate 			 */
2210Sstevel@tonic-gate 			if ((error = kcpc_restart(t->t_cpc_set)) != 0)
2220Sstevel@tonic-gate 				return (set_errno(error));
2230Sstevel@tonic-gate 		}
2240Sstevel@tonic-gate 		return (0);
2250Sstevel@tonic-gate 	case CPC_ENABLE:
2260Sstevel@tonic-gate 	case CPC_DISABLE:
2270Sstevel@tonic-gate 		udata1 = 0;
2280Sstevel@tonic-gate 		/*FALLTHROUGH*/
2290Sstevel@tonic-gate 	case CPC_USR_EVENTS:
2300Sstevel@tonic-gate 	case CPC_SYS_EVENTS:
2310Sstevel@tonic-gate 		if (t != curthread || t->t_cpc_set == NULL)
2320Sstevel@tonic-gate 			return (set_errno(EINVAL));
2330Sstevel@tonic-gate 		/*
2340Sstevel@tonic-gate 		 * Provided for backwards compatibility with CPCv1.
2350Sstevel@tonic-gate 		 *
2360Sstevel@tonic-gate 		 * Stop the counters and record the current counts. Use the
2370Sstevel@tonic-gate 		 * counts as the preset to rebind a new set with the requests
2380Sstevel@tonic-gate 		 * reconfigured as requested.
2390Sstevel@tonic-gate 		 *
2400Sstevel@tonic-gate 		 * udata1: 1 == enable; 0 == disable
2410Sstevel@tonic-gate 		 * udata{2,3}: unused
2420Sstevel@tonic-gate 		 */
2430Sstevel@tonic-gate 		rw_enter(&kcpc_cpuctx_lock, RW_READER);
2440Sstevel@tonic-gate 		if ((error = kcpc_enable(t,
2450Sstevel@tonic-gate 		    cmd, (int)(uintptr_t)udata1)) != 0) {
2460Sstevel@tonic-gate 			rw_exit(&kcpc_cpuctx_lock);
2470Sstevel@tonic-gate 			return (set_errno(error));
2480Sstevel@tonic-gate 		}
2490Sstevel@tonic-gate 		rw_exit(&kcpc_cpuctx_lock);
2500Sstevel@tonic-gate 		return (0);
2510Sstevel@tonic-gate 	case CPC_NPIC:
2520Sstevel@tonic-gate 		return (cpc_ncounters);
2530Sstevel@tonic-gate 	case CPC_CAPS:
2540Sstevel@tonic-gate 		return (pcbe_ops->pcbe_caps);
2550Sstevel@tonic-gate 	case CPC_EVLIST_SIZE:
2560Sstevel@tonic-gate 	case CPC_LIST_EVENTS:
2570Sstevel@tonic-gate 		/*
2580Sstevel@tonic-gate 		 * udata1 = pointer to user's int or buffer
2590Sstevel@tonic-gate 		 * udata2 = picnum
2600Sstevel@tonic-gate 		 * udata3 = unused
2610Sstevel@tonic-gate 		 */
2620Sstevel@tonic-gate 		if ((uintptr_t)udata2 >= cpc_ncounters)
2630Sstevel@tonic-gate 			return (set_errno(EINVAL));
2640Sstevel@tonic-gate 
2650Sstevel@tonic-gate 		size = strlen(
2660Sstevel@tonic-gate 		    pcbe_ops->pcbe_list_events((uintptr_t)udata2)) + 1;
2670Sstevel@tonic-gate 
2680Sstevel@tonic-gate 		if (cmd == CPC_EVLIST_SIZE) {
2690Sstevel@tonic-gate 			if (suword32(udata1, size) == -1)
2700Sstevel@tonic-gate 				return (set_errno(EFAULT));
2710Sstevel@tonic-gate 		} else {
2720Sstevel@tonic-gate 			if (copyout(
2730Sstevel@tonic-gate 			    pcbe_ops->pcbe_list_events((uintptr_t)udata2),
2740Sstevel@tonic-gate 			    udata1, size) == -1)
2750Sstevel@tonic-gate 				return (set_errno(EFAULT));
2760Sstevel@tonic-gate 		}
2770Sstevel@tonic-gate 		return (0);
2780Sstevel@tonic-gate 	case CPC_ATTRLIST_SIZE:
2790Sstevel@tonic-gate 	case CPC_LIST_ATTRS:
2800Sstevel@tonic-gate 		/*
2810Sstevel@tonic-gate 		 * udata1 = pointer to user's int or buffer
2820Sstevel@tonic-gate 		 * udata2 = unused
2830Sstevel@tonic-gate 		 * udata3 = unused
2840Sstevel@tonic-gate 		 *
2850Sstevel@tonic-gate 		 * attrlist size is length of PCBE-supported attributes, plus
2860Sstevel@tonic-gate 		 * room for "picnum\0" plus an optional ',' separator char.
2870Sstevel@tonic-gate 		 */
2880Sstevel@tonic-gate 		str = pcbe_ops->pcbe_list_attrs();
2890Sstevel@tonic-gate 		size = strlen(str) + sizeof (SEPARATOR ATTRLIST) + 1;
2900Sstevel@tonic-gate 		if (str[0] != '\0')
2910Sstevel@tonic-gate 			/*
2920Sstevel@tonic-gate 			 * A ',' separator character is necessary.
2930Sstevel@tonic-gate 			 */
2940Sstevel@tonic-gate 			size += 1;
2950Sstevel@tonic-gate 
2960Sstevel@tonic-gate 		if (cmd == CPC_ATTRLIST_SIZE) {
2970Sstevel@tonic-gate 			if (suword32(udata1, size) == -1)
2980Sstevel@tonic-gate 				return (set_errno(EFAULT));
2990Sstevel@tonic-gate 		} else {
3000Sstevel@tonic-gate 			/*
3010Sstevel@tonic-gate 			 * Copyout the PCBE attributes, and then append the
3020Sstevel@tonic-gate 			 * generic attribute list (with separator if necessary).
3030Sstevel@tonic-gate 			 */
3040Sstevel@tonic-gate 			if (copyout(str, udata1, strlen(str)) == -1)
3050Sstevel@tonic-gate 				return (set_errno(EFAULT));
3060Sstevel@tonic-gate 			if (str[0] != '\0') {
3070Sstevel@tonic-gate 				if (copyout(SEPARATOR ATTRLIST,
3080Sstevel@tonic-gate 				    ((char *)udata1) + strlen(str),
3090Sstevel@tonic-gate 				    strlen(SEPARATOR ATTRLIST) + 1)
3100Sstevel@tonic-gate 				    == -1)
3110Sstevel@tonic-gate 					return (set_errno(EFAULT));
3120Sstevel@tonic-gate 			} else
3130Sstevel@tonic-gate 				if (copyout(ATTRLIST,
3140Sstevel@tonic-gate 				    (char *)udata1 + strlen(str),
3150Sstevel@tonic-gate 				    strlen(ATTRLIST) + 1) == -1)
3160Sstevel@tonic-gate 					return (set_errno(EFAULT));
3170Sstevel@tonic-gate 		}
3180Sstevel@tonic-gate 		return (0);
3190Sstevel@tonic-gate 	case CPC_IMPL_NAME:
3200Sstevel@tonic-gate 	case CPC_CPUREF:
3210Sstevel@tonic-gate 		/*
3220Sstevel@tonic-gate 		 * udata1 = pointer to user's buffer
3230Sstevel@tonic-gate 		 * udata2 = unused
3240Sstevel@tonic-gate 		 * udata3 = unused
3250Sstevel@tonic-gate 		 */
3260Sstevel@tonic-gate 		if (cmd == CPC_IMPL_NAME) {
3270Sstevel@tonic-gate 			str = pcbe_ops->pcbe_impl_name();
3280Sstevel@tonic-gate 			ASSERT(strlen(str) < CPC_MAX_IMPL_NAME);
3290Sstevel@tonic-gate 		} else {
3300Sstevel@tonic-gate 			str = pcbe_ops->pcbe_cpuref();
3310Sstevel@tonic-gate 			ASSERT(strlen(str) < CPC_MAX_CPUREF);
3320Sstevel@tonic-gate 		}
3330Sstevel@tonic-gate 
3340Sstevel@tonic-gate 		if (copyout(str, udata1, strlen(str) + 1) != 0)
3350Sstevel@tonic-gate 			return (set_errno(EFAULT));
3360Sstevel@tonic-gate 		return (0);
3370Sstevel@tonic-gate 	case CPC_INVALIDATE:
3380Sstevel@tonic-gate 		kcpc_invalidate(t);
3390Sstevel@tonic-gate 		return (0);
3400Sstevel@tonic-gate 	case CPC_RELE:
3410Sstevel@tonic-gate 		if ((error = kcpc_unbind(t->t_cpc_set)) != 0)
3420Sstevel@tonic-gate 			return (set_errno(error));
3430Sstevel@tonic-gate 		return (0);
3440Sstevel@tonic-gate 	default:
3450Sstevel@tonic-gate 		return (set_errno(EINVAL));
3460Sstevel@tonic-gate 	}
3470Sstevel@tonic-gate }
3480Sstevel@tonic-gate 
3490Sstevel@tonic-gate /*
3500Sstevel@tonic-gate  * The 'shared' device allows direct access to the
3510Sstevel@tonic-gate  * performance counter control register of the current CPU.
3520Sstevel@tonic-gate  * The major difference between the contexts created here and those
3530Sstevel@tonic-gate  * above is that the context handlers are -not- installed, thus
3540Sstevel@tonic-gate  * no context switching behaviour occurs.
3550Sstevel@tonic-gate  *
3560Sstevel@tonic-gate  * Because they manipulate per-cpu state, these ioctls can
3570Sstevel@tonic-gate  * only be invoked from a bound lwp, by a caller with the cpc_cpu privilege
3580Sstevel@tonic-gate  * who can open the relevant entry in /devices (the act of holding it open
3590Sstevel@tonic-gate  * causes other uses of the counters to be suspended).
3600Sstevel@tonic-gate  *
3610Sstevel@tonic-gate  * Note that for correct results, the caller -must- ensure that
3620Sstevel@tonic-gate  * all existing per-lwp contexts are either inactive or marked invalid;
3630Sstevel@tonic-gate  * that's what the open routine does.
3640Sstevel@tonic-gate  */
3650Sstevel@tonic-gate /*ARGSUSED*/
3660Sstevel@tonic-gate static int
3670Sstevel@tonic-gate kcpc_ioctl(dev_t dev, int cmd, intptr_t data, int flags, cred_t *cr, int *rvp)
3680Sstevel@tonic-gate {
3690Sstevel@tonic-gate 	kthread_t	*t = curthread;
3700Sstevel@tonic-gate 	processorid_t	cpuid;
3710Sstevel@tonic-gate 	void		*udata1 = NULL;
3720Sstevel@tonic-gate 	void		*udata2 = NULL;
3730Sstevel@tonic-gate 	void		*udata3 = NULL;
3740Sstevel@tonic-gate 	int		error;
3750Sstevel@tonic-gate 	int		code;
3760Sstevel@tonic-gate 
3770Sstevel@tonic-gate 	STRUCT_DECL(__cpc_args, args);
3780Sstevel@tonic-gate 
3790Sstevel@tonic-gate 	STRUCT_INIT(args, flags);
3800Sstevel@tonic-gate 
3810Sstevel@tonic-gate 	if (curthread->t_bind_cpu != getminor(dev))
3820Sstevel@tonic-gate 		return (EAGAIN);  /* someone unbound it? */
3830Sstevel@tonic-gate 
3840Sstevel@tonic-gate 	cpuid = getminor(dev);
3850Sstevel@tonic-gate 
3860Sstevel@tonic-gate 	if (cmd == CPCIO_BIND || cmd == CPCIO_SAMPLE) {
3870Sstevel@tonic-gate 		if (copyin((void *)data, STRUCT_BUF(args),
3880Sstevel@tonic-gate 		    STRUCT_SIZE(args)) == -1)
3890Sstevel@tonic-gate 			return (EFAULT);
3900Sstevel@tonic-gate 
3910Sstevel@tonic-gate 		udata1 = STRUCT_FGETP(args, udata1);
3920Sstevel@tonic-gate 		udata2 = STRUCT_FGETP(args, udata2);
3930Sstevel@tonic-gate 		udata3 = STRUCT_FGETP(args, udata3);
3940Sstevel@tonic-gate 	}
3950Sstevel@tonic-gate 
3960Sstevel@tonic-gate 	switch (cmd) {
3970Sstevel@tonic-gate 	case CPCIO_BIND:
3980Sstevel@tonic-gate 		/*
3990Sstevel@tonic-gate 		 * udata1 = pointer to packed nvlist buffer
4000Sstevel@tonic-gate 		 * udata2 = size of packed nvlist buffer
4010Sstevel@tonic-gate 		 * udata3 = User addr to return error subcode in.
4020Sstevel@tonic-gate 		 */
4030Sstevel@tonic-gate 		if (t->t_cpc_set != NULL) {
4040Sstevel@tonic-gate 			(void) kcpc_unbind(t->t_cpc_set);
4050Sstevel@tonic-gate 			ASSERT(t->t_cpc_set == NULL);
4060Sstevel@tonic-gate 		}
4070Sstevel@tonic-gate 
4080Sstevel@tonic-gate 		if ((error = kcpc_copyin_set(&t->t_cpc_set, udata1,
4090Sstevel@tonic-gate 		    (size_t)udata2)) != 0) {
4100Sstevel@tonic-gate 			return (error);
4110Sstevel@tonic-gate 		}
4120Sstevel@tonic-gate 
4130Sstevel@tonic-gate 		if ((error = kcpc_verify_set(t->t_cpc_set)) != 0) {
4140Sstevel@tonic-gate 			kcpc_free_set(t->t_cpc_set);
4150Sstevel@tonic-gate 			t->t_cpc_set = NULL;
4160Sstevel@tonic-gate 			if (copyout(&error, udata3, sizeof (error)) == -1)
4170Sstevel@tonic-gate 				return (EFAULT);
4180Sstevel@tonic-gate 			return (EINVAL);
4190Sstevel@tonic-gate 		}
4200Sstevel@tonic-gate 
4210Sstevel@tonic-gate 		if ((error = kcpc_bind_cpu(t->t_cpc_set, cpuid, &code)) != 0) {
4220Sstevel@tonic-gate 			kcpc_free_set(t->t_cpc_set);
4230Sstevel@tonic-gate 			t->t_cpc_set = NULL;
4240Sstevel@tonic-gate 			/*
4250Sstevel@tonic-gate 			 * Subcodes are only returned for EINVAL and EACCESS.
4260Sstevel@tonic-gate 			 */
4270Sstevel@tonic-gate 			if ((error == EINVAL || error == EACCES) &&
4280Sstevel@tonic-gate 			    copyout(&code, udata3, sizeof (code)) == -1)
4290Sstevel@tonic-gate 				return (EFAULT);
4300Sstevel@tonic-gate 			return (error);
4310Sstevel@tonic-gate 		}
4320Sstevel@tonic-gate 
4330Sstevel@tonic-gate 		return (0);
4340Sstevel@tonic-gate 	case CPCIO_SAMPLE:
4350Sstevel@tonic-gate 		/*
4360Sstevel@tonic-gate 		 * udata1 = pointer to user's buffer
4370Sstevel@tonic-gate 		 * udata2 = pointer to user's hrtime
4380Sstevel@tonic-gate 		 * udata3 = pointer to user's tick
4390Sstevel@tonic-gate 		 */
4400Sstevel@tonic-gate 		/*
4410Sstevel@tonic-gate 		 * Only CPU-bound sets may be sampled via the ioctl(). If this
4420Sstevel@tonic-gate 		 * set has no CPU-bound context, return an error.
4430Sstevel@tonic-gate 		 */
4440Sstevel@tonic-gate 		if (t->t_cpc_set == NULL)
4450Sstevel@tonic-gate 			return (EINVAL);
4460Sstevel@tonic-gate 		if ((error = kcpc_sample(t->t_cpc_set, udata1, udata2,
4470Sstevel@tonic-gate 		    udata3)) != 0)
4480Sstevel@tonic-gate 			return (error);
4490Sstevel@tonic-gate 		return (0);
4500Sstevel@tonic-gate 	case CPCIO_RELE:
4510Sstevel@tonic-gate 		if (t->t_cpc_set == NULL)
4520Sstevel@tonic-gate 			return (EINVAL);
4530Sstevel@tonic-gate 		return (kcpc_unbind(t->t_cpc_set));
4540Sstevel@tonic-gate 	default:
4550Sstevel@tonic-gate 		return (EINVAL);
4560Sstevel@tonic-gate 	}
4570Sstevel@tonic-gate }
4580Sstevel@tonic-gate 
4590Sstevel@tonic-gate /*
4600Sstevel@tonic-gate  * The device supports multiple opens, but only one open
4610Sstevel@tonic-gate  * is allowed per processor.  This is to enable multiple
4620Sstevel@tonic-gate  * instances of tools looking at different processors.
4630Sstevel@tonic-gate  */
4640Sstevel@tonic-gate #define	KCPC_MINOR_SHARED		((minor_t)0x3fffful)
4650Sstevel@tonic-gate 
4660Sstevel@tonic-gate static ulong_t *kcpc_cpumap;		/* bitmap of cpus */
4670Sstevel@tonic-gate 
4680Sstevel@tonic-gate /*ARGSUSED1*/
4690Sstevel@tonic-gate static int
4700Sstevel@tonic-gate kcpc_open(dev_t *dev, int flags, int otyp, cred_t *cr)
4710Sstevel@tonic-gate {
4720Sstevel@tonic-gate 	processorid_t	cpuid;
4730Sstevel@tonic-gate 	int		error;
4740Sstevel@tonic-gate 
4750Sstevel@tonic-gate 	ASSERT(pcbe_ops != NULL);
4760Sstevel@tonic-gate 
4770Sstevel@tonic-gate 	if ((error = secpolicy_cpc_cpu(cr)) != 0)
4780Sstevel@tonic-gate 		return (error);
4790Sstevel@tonic-gate 	if (getminor(*dev) != KCPC_MINOR_SHARED)
4800Sstevel@tonic-gate 		return (ENXIO);
4810Sstevel@tonic-gate 	if ((cpuid = curthread->t_bind_cpu) == PBIND_NONE)
4820Sstevel@tonic-gate 		return (EINVAL);
4830Sstevel@tonic-gate 	if (cpuid > max_cpuid)
4840Sstevel@tonic-gate 		return (EINVAL);
4850Sstevel@tonic-gate 
4860Sstevel@tonic-gate 	rw_enter(&kcpc_cpuctx_lock, RW_WRITER);
4870Sstevel@tonic-gate 	if (++kcpc_cpuctx == 1) {
4880Sstevel@tonic-gate 		ASSERT(kcpc_cpumap == NULL);
4890Sstevel@tonic-gate 		kcpc_cpumap = kmem_zalloc(BT_SIZEOFMAP(max_cpuid + 1),
4900Sstevel@tonic-gate 		    KM_SLEEP);
4910Sstevel@tonic-gate 		/*
4920Sstevel@tonic-gate 		 * When this device is open for processor-based contexts,
4930Sstevel@tonic-gate 		 * no further lwp-based contexts can be created.
4940Sstevel@tonic-gate 		 *
4950Sstevel@tonic-gate 		 * Since this is the first open, ensure that all existing
4960Sstevel@tonic-gate 		 * contexts are invalidated.
4970Sstevel@tonic-gate 		 */
4980Sstevel@tonic-gate 		kcpc_invalidate_all();
4990Sstevel@tonic-gate 	} else if (BT_TEST(kcpc_cpumap, cpuid)) {
5000Sstevel@tonic-gate 		kcpc_cpuctx--;
5010Sstevel@tonic-gate 		rw_exit(&kcpc_cpuctx_lock);
5020Sstevel@tonic-gate 		return (EAGAIN);
5030Sstevel@tonic-gate 	} else if (kcpc_hw_cpu_hook(cpuid, kcpc_cpumap) != 0) {
5040Sstevel@tonic-gate 		kcpc_cpuctx--;
5050Sstevel@tonic-gate 		rw_exit(&kcpc_cpuctx_lock);
5060Sstevel@tonic-gate 		return (EACCES);
5070Sstevel@tonic-gate 	}
5080Sstevel@tonic-gate 	BT_SET(kcpc_cpumap, cpuid);
5090Sstevel@tonic-gate 	rw_exit(&kcpc_cpuctx_lock);
5100Sstevel@tonic-gate 
5110Sstevel@tonic-gate 	*dev = makedevice(getmajor(*dev), (minor_t)cpuid);
5120Sstevel@tonic-gate 
5130Sstevel@tonic-gate 	return (0);
5140Sstevel@tonic-gate }
5150Sstevel@tonic-gate 
5160Sstevel@tonic-gate /*ARGSUSED1*/
5170Sstevel@tonic-gate static int
5180Sstevel@tonic-gate kcpc_close(dev_t dev, int flags, int otyp, cred_t *cr)
5190Sstevel@tonic-gate {
5200Sstevel@tonic-gate 	rw_enter(&kcpc_cpuctx_lock, RW_WRITER);
5210Sstevel@tonic-gate 	BT_CLEAR(kcpc_cpumap, getminor(dev));
5220Sstevel@tonic-gate 	if (--kcpc_cpuctx == 0) {
5230Sstevel@tonic-gate 		kmem_free(kcpc_cpumap, BT_SIZEOFMAP(max_cpuid + 1));
5240Sstevel@tonic-gate 		kcpc_cpumap = NULL;
5250Sstevel@tonic-gate 	}
5260Sstevel@tonic-gate 	ASSERT(kcpc_cpuctx >= 0);
5270Sstevel@tonic-gate 	rw_exit(&kcpc_cpuctx_lock);
5280Sstevel@tonic-gate 
5290Sstevel@tonic-gate 	return (0);
5300Sstevel@tonic-gate }
5310Sstevel@tonic-gate 
5320Sstevel@tonic-gate /*
5330Sstevel@tonic-gate  * Sane boundaries on the size of packed lists. In bytes.
5340Sstevel@tonic-gate  */
5350Sstevel@tonic-gate #define	CPC_MIN_PACKSIZE 4
5360Sstevel@tonic-gate #define	CPC_MAX_PACKSIZE 10000
5370Sstevel@tonic-gate 
5380Sstevel@tonic-gate /*
5390Sstevel@tonic-gate  * Sane boundary on the number of requests a set can contain.
5400Sstevel@tonic-gate  */
5410Sstevel@tonic-gate #define	CPC_MAX_NREQS 100
5420Sstevel@tonic-gate 
5430Sstevel@tonic-gate /*
5440Sstevel@tonic-gate  * Sane boundary on the number of attributes a request can contain.
5450Sstevel@tonic-gate  */
5460Sstevel@tonic-gate #define	CPC_MAX_ATTRS 50
5470Sstevel@tonic-gate 
5480Sstevel@tonic-gate /*
5490Sstevel@tonic-gate  * Copy in a packed nvlist from the user and create a request set out of it.
5500Sstevel@tonic-gate  * If successful, return 0 and store a pointer to the set we've created. Returns
5510Sstevel@tonic-gate  * error code on error.
5520Sstevel@tonic-gate  */
5530Sstevel@tonic-gate int
5540Sstevel@tonic-gate kcpc_copyin_set(kcpc_set_t **inset, void *ubuf, size_t len)
5550Sstevel@tonic-gate {
5560Sstevel@tonic-gate 	kcpc_set_t	*set;
5570Sstevel@tonic-gate 	int		i;
5580Sstevel@tonic-gate 	int		j;
5590Sstevel@tonic-gate 	char		*packbuf;
5600Sstevel@tonic-gate 
5610Sstevel@tonic-gate 	nvlist_t	*nvl;
5620Sstevel@tonic-gate 	nvpair_t	*nvp = NULL;
5630Sstevel@tonic-gate 
5640Sstevel@tonic-gate 	nvlist_t	*attrs;
5650Sstevel@tonic-gate 	nvpair_t	*nvp_attr;
5660Sstevel@tonic-gate 	kcpc_attr_t	*attrp;
5670Sstevel@tonic-gate 
5680Sstevel@tonic-gate 	nvlist_t	**reqlist;
5690Sstevel@tonic-gate 	uint_t		nreqs;
5700Sstevel@tonic-gate 	uint64_t	uint64;
5710Sstevel@tonic-gate 	uint32_t	uint32;
5720Sstevel@tonic-gate 	uint32_t	setflags = (uint32_t)-1;
5730Sstevel@tonic-gate 	char		*string;
5740Sstevel@tonic-gate 	char		*name;
5750Sstevel@tonic-gate 
5760Sstevel@tonic-gate 	if (len < CPC_MIN_PACKSIZE || len > CPC_MAX_PACKSIZE)
5770Sstevel@tonic-gate 		return (EINVAL);
5780Sstevel@tonic-gate 
5790Sstevel@tonic-gate 	packbuf = kmem_alloc(len, KM_SLEEP);
5800Sstevel@tonic-gate 
5810Sstevel@tonic-gate 	if (copyin(ubuf, packbuf, len) == -1) {
5820Sstevel@tonic-gate 		kmem_free(packbuf, len);
5830Sstevel@tonic-gate 		return (EFAULT);
5840Sstevel@tonic-gate 	}
5850Sstevel@tonic-gate 
5860Sstevel@tonic-gate 	if (nvlist_unpack(packbuf, len, &nvl, KM_SLEEP) != 0) {
5870Sstevel@tonic-gate 		kmem_free(packbuf, len);
5880Sstevel@tonic-gate 		return (EINVAL);
5890Sstevel@tonic-gate 	}
5900Sstevel@tonic-gate 
5910Sstevel@tonic-gate 	/*
5920Sstevel@tonic-gate 	 * The nvlist has been unpacked so there is no need for the packed
5930Sstevel@tonic-gate 	 * representation from this point on.
5940Sstevel@tonic-gate 	 */
5950Sstevel@tonic-gate 	kmem_free(packbuf, len);
5960Sstevel@tonic-gate 
5970Sstevel@tonic-gate 	i = 0;
5980Sstevel@tonic-gate 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
5990Sstevel@tonic-gate 		switch (nvpair_type(nvp)) {
6000Sstevel@tonic-gate 		case DATA_TYPE_UINT32:
6010Sstevel@tonic-gate 			if (strcmp(nvpair_name(nvp), "flags") != 0 ||
6020Sstevel@tonic-gate 			    nvpair_value_uint32(nvp, &setflags) != 0) {
6030Sstevel@tonic-gate 				nvlist_free(nvl);
6040Sstevel@tonic-gate 				return (EINVAL);
6050Sstevel@tonic-gate 			}
6060Sstevel@tonic-gate 			break;
6070Sstevel@tonic-gate 		case DATA_TYPE_NVLIST_ARRAY:
6080Sstevel@tonic-gate 			if (strcmp(nvpair_name(nvp), "reqs") != 0 ||
6090Sstevel@tonic-gate 			    nvpair_value_nvlist_array(nvp, &reqlist,
6107240Srh87107 			    &nreqs) != 0) {
6110Sstevel@tonic-gate 				nvlist_free(nvl);
6120Sstevel@tonic-gate 				return (EINVAL);
6130Sstevel@tonic-gate 			}
6140Sstevel@tonic-gate 			break;
6150Sstevel@tonic-gate 		default:
6160Sstevel@tonic-gate 			nvlist_free(nvl);
6170Sstevel@tonic-gate 			return (EINVAL);
6180Sstevel@tonic-gate 		}
6190Sstevel@tonic-gate 		i++;
6200Sstevel@tonic-gate 	}
6210Sstevel@tonic-gate 
6220Sstevel@tonic-gate 	/*
6230Sstevel@tonic-gate 	 * There should be two members in the top-level nvlist:
6240Sstevel@tonic-gate 	 * an array of nvlists consisting of the requests, and flags.
6250Sstevel@tonic-gate 	 * Anything else is an invalid set.
6260Sstevel@tonic-gate 	 */
6270Sstevel@tonic-gate 	if (i != 2) {
6280Sstevel@tonic-gate 		nvlist_free(nvl);
6290Sstevel@tonic-gate 		return (EINVAL);
6300Sstevel@tonic-gate 	}
6310Sstevel@tonic-gate 
6320Sstevel@tonic-gate 	if (nreqs > CPC_MAX_NREQS) {
6330Sstevel@tonic-gate 		nvlist_free(nvl);
6340Sstevel@tonic-gate 		return (EINVAL);
6350Sstevel@tonic-gate 	}
6360Sstevel@tonic-gate 
6370Sstevel@tonic-gate 	/*
6380Sstevel@tonic-gate 	 * The requests are now stored in the nvlist array at reqlist.
6396275Strevtom 	 * Note that the use of kmem_zalloc() to alloc the kcpc_set_t means
6406275Strevtom 	 * we don't need to call the init routines for ks_lock and ks_condv.
6410Sstevel@tonic-gate 	 */
6426275Strevtom 	set = kmem_zalloc(sizeof (kcpc_set_t), KM_SLEEP);
6430Sstevel@tonic-gate 	set->ks_req = (kcpc_request_t *)kmem_zalloc(sizeof (kcpc_request_t) *
6440Sstevel@tonic-gate 	    nreqs, KM_SLEEP);
6450Sstevel@tonic-gate 	set->ks_nreqs = nreqs;
6460Sstevel@tonic-gate 	/*
6470Sstevel@tonic-gate 	 * If the nvlist didn't contain a flags member, setflags was initialized
6480Sstevel@tonic-gate 	 * with an illegal value and this set will fail sanity checks later on.
6490Sstevel@tonic-gate 	 */
6500Sstevel@tonic-gate 	set->ks_flags = setflags;
6516275Strevtom 	/*
6526275Strevtom 	 * Initialize bind/unbind set synchronization.
6536275Strevtom 	 */
6546275Strevtom 	set->ks_state &= ~KCPC_SET_BOUND;
6550Sstevel@tonic-gate 
6560Sstevel@tonic-gate 	/*
6570Sstevel@tonic-gate 	 * Build the set up one request at a time, always keeping it self-
6580Sstevel@tonic-gate 	 * consistent so we can give it to kcpc_free_set() if we need to back
6590Sstevel@tonic-gate 	 * out and return and error.
6600Sstevel@tonic-gate 	 */
6610Sstevel@tonic-gate 	for (i = 0; i < nreqs; i++) {
6620Sstevel@tonic-gate 		nvp = NULL;
6630Sstevel@tonic-gate 		set->ks_req[i].kr_picnum = -1;
6640Sstevel@tonic-gate 		while ((nvp = nvlist_next_nvpair(reqlist[i], nvp)) != NULL) {
6650Sstevel@tonic-gate 			name = nvpair_name(nvp);
6660Sstevel@tonic-gate 			switch (nvpair_type(nvp)) {
6670Sstevel@tonic-gate 			case DATA_TYPE_UINT32:
6680Sstevel@tonic-gate 				if (nvpair_value_uint32(nvp, &uint32) == EINVAL)
6690Sstevel@tonic-gate 					goto inval;
6700Sstevel@tonic-gate 				if (strcmp(name, "cr_flags") == 0)
6710Sstevel@tonic-gate 					set->ks_req[i].kr_flags = uint32;
6720Sstevel@tonic-gate 				if (strcmp(name, "cr_index") == 0)
6730Sstevel@tonic-gate 					set->ks_req[i].kr_index = uint32;
6740Sstevel@tonic-gate 				break;
6750Sstevel@tonic-gate 			case DATA_TYPE_UINT64:
6760Sstevel@tonic-gate 				if (nvpair_value_uint64(nvp, &uint64) == EINVAL)
6770Sstevel@tonic-gate 					goto inval;
6780Sstevel@tonic-gate 				if (strcmp(name, "cr_preset") == 0)
6790Sstevel@tonic-gate 					set->ks_req[i].kr_preset = uint64;
6800Sstevel@tonic-gate 				break;
6810Sstevel@tonic-gate 			case DATA_TYPE_STRING:
6820Sstevel@tonic-gate 				if (nvpair_value_string(nvp, &string) == EINVAL)
6830Sstevel@tonic-gate 					goto inval;
6840Sstevel@tonic-gate 				if (strcmp(name, "cr_event") == 0)
6850Sstevel@tonic-gate 					(void) strncpy(set->ks_req[i].kr_event,
6860Sstevel@tonic-gate 					    string, CPC_MAX_EVENT_LEN);
6870Sstevel@tonic-gate 				break;
6880Sstevel@tonic-gate 			case DATA_TYPE_NVLIST:
6890Sstevel@tonic-gate 				if (strcmp(name, "cr_attr") != 0)
6900Sstevel@tonic-gate 					goto inval;
6910Sstevel@tonic-gate 				if (nvpair_value_nvlist(nvp, &attrs) == EINVAL)
6920Sstevel@tonic-gate 					goto inval;
6930Sstevel@tonic-gate 				nvp_attr = NULL;
6940Sstevel@tonic-gate 				/*
6950Sstevel@tonic-gate 				 * If the picnum has been specified as an
6960Sstevel@tonic-gate 				 * attribute, consume that attribute here and
6970Sstevel@tonic-gate 				 * remove it from the list of attributes.
6980Sstevel@tonic-gate 				 */
6990Sstevel@tonic-gate 				if (nvlist_lookup_uint64(attrs, "picnum",
7000Sstevel@tonic-gate 				    &uint64) == 0) {
7010Sstevel@tonic-gate 					if (nvlist_remove(attrs, "picnum",
7020Sstevel@tonic-gate 					    DATA_TYPE_UINT64) != 0)
7030Sstevel@tonic-gate 						panic("nvlist %p faulty",
7047240Srh87107 						    (void *)attrs);
7050Sstevel@tonic-gate 					set->ks_req[i].kr_picnum = uint64;
7060Sstevel@tonic-gate 				}
7070Sstevel@tonic-gate 
7080Sstevel@tonic-gate 				if ((set->ks_req[i].kr_nattrs =
7090Sstevel@tonic-gate 				    kcpc_nvlist_npairs(attrs)) == 0)
7100Sstevel@tonic-gate 					break;
7110Sstevel@tonic-gate 
7120Sstevel@tonic-gate 				if (set->ks_req[i].kr_nattrs > CPC_MAX_ATTRS)
7130Sstevel@tonic-gate 					goto inval;
7140Sstevel@tonic-gate 
7150Sstevel@tonic-gate 				set->ks_req[i].kr_attr =
7160Sstevel@tonic-gate 				    kmem_alloc(set->ks_req[i].kr_nattrs *
7170Sstevel@tonic-gate 				    sizeof (kcpc_attr_t), KM_SLEEP);
7180Sstevel@tonic-gate 				j = 0;
7190Sstevel@tonic-gate 
7200Sstevel@tonic-gate 				while ((nvp_attr = nvlist_next_nvpair(attrs,
7210Sstevel@tonic-gate 				    nvp_attr)) != NULL) {
7220Sstevel@tonic-gate 					attrp = &set->ks_req[i].kr_attr[j];
7230Sstevel@tonic-gate 
7240Sstevel@tonic-gate 					if (nvpair_type(nvp_attr) !=
7250Sstevel@tonic-gate 					    DATA_TYPE_UINT64)
7260Sstevel@tonic-gate 						goto inval;
7270Sstevel@tonic-gate 
7280Sstevel@tonic-gate 					(void) strncpy(attrp->ka_name,
7290Sstevel@tonic-gate 					    nvpair_name(nvp_attr),
7300Sstevel@tonic-gate 					    CPC_MAX_ATTR_LEN);
7310Sstevel@tonic-gate 
7320Sstevel@tonic-gate 					if (nvpair_value_uint64(nvp_attr,
7330Sstevel@tonic-gate 					    &(attrp->ka_val)) == EINVAL)
7340Sstevel@tonic-gate 						goto inval;
7350Sstevel@tonic-gate 					j++;
7360Sstevel@tonic-gate 				}
7370Sstevel@tonic-gate 				ASSERT(j == set->ks_req[i].kr_nattrs);
7380Sstevel@tonic-gate 			default:
7390Sstevel@tonic-gate 				break;
7400Sstevel@tonic-gate 			}
7410Sstevel@tonic-gate 		}
7420Sstevel@tonic-gate 	}
7430Sstevel@tonic-gate 
7440Sstevel@tonic-gate 	nvlist_free(nvl);
7450Sstevel@tonic-gate 	*inset = set;
7460Sstevel@tonic-gate 	return (0);
7470Sstevel@tonic-gate 
7480Sstevel@tonic-gate inval:
7490Sstevel@tonic-gate 	nvlist_free(nvl);
7500Sstevel@tonic-gate 	kcpc_free_set(set);
7510Sstevel@tonic-gate 	return (EINVAL);
7520Sstevel@tonic-gate }
7530Sstevel@tonic-gate 
7540Sstevel@tonic-gate /*
7550Sstevel@tonic-gate  * Count the number of nvpairs in the supplied nvlist.
7560Sstevel@tonic-gate  */
7570Sstevel@tonic-gate static uint32_t
7580Sstevel@tonic-gate kcpc_nvlist_npairs(nvlist_t *list)
7590Sstevel@tonic-gate {
7600Sstevel@tonic-gate 	nvpair_t *nvp = NULL;
7610Sstevel@tonic-gate 	uint32_t n = 0;
7620Sstevel@tonic-gate 
7630Sstevel@tonic-gate 	while ((nvp = nvlist_next_nvpair(list, nvp)) != NULL)
7640Sstevel@tonic-gate 		n++;
7650Sstevel@tonic-gate 
7660Sstevel@tonic-gate 	return (n);
7670Sstevel@tonic-gate }
7680Sstevel@tonic-gate 
7690Sstevel@tonic-gate /*
7700Sstevel@tonic-gate  * Performs sanity checks on the given set.
7710Sstevel@tonic-gate  * Returns 0 if the set checks out OK.
7720Sstevel@tonic-gate  * Returns a detailed error subcode, or -1 if there is no applicable subcode.
7730Sstevel@tonic-gate  */
7740Sstevel@tonic-gate static int
7750Sstevel@tonic-gate kcpc_verify_set(kcpc_set_t *set)
7760Sstevel@tonic-gate {
7770Sstevel@tonic-gate 	kcpc_request_t	*rp;
7780Sstevel@tonic-gate 	int		i;
7790Sstevel@tonic-gate 	uint64_t	bitmap = 0;
7800Sstevel@tonic-gate 	int		n;
7810Sstevel@tonic-gate 
7820Sstevel@tonic-gate 	if (set->ks_nreqs > cpc_ncounters)
7830Sstevel@tonic-gate 		return (-1);
7840Sstevel@tonic-gate 
7850Sstevel@tonic-gate 	if (CPC_SET_VALID_FLAGS(set->ks_flags) == 0)
7860Sstevel@tonic-gate 		return (-1);
7870Sstevel@tonic-gate 
7880Sstevel@tonic-gate 	for (i = 0; i < set->ks_nreqs; i++) {
7890Sstevel@tonic-gate 		rp = &set->ks_req[i];
7900Sstevel@tonic-gate 
7910Sstevel@tonic-gate 		/*
7920Sstevel@tonic-gate 		 * The following comparison must cast cpc_ncounters to an int,
7930Sstevel@tonic-gate 		 * because kr_picnum will be -1 if the request didn't explicitly
7940Sstevel@tonic-gate 		 * choose a PIC.
7950Sstevel@tonic-gate 		 */
7960Sstevel@tonic-gate 		if (rp->kr_picnum >= (int)cpc_ncounters)
7970Sstevel@tonic-gate 			return (CPC_INVALID_PICNUM);
7980Sstevel@tonic-gate 
7990Sstevel@tonic-gate 		/*
8000Sstevel@tonic-gate 		 * Of the pics whose physical picnum has been specified, make
8010Sstevel@tonic-gate 		 * sure each PIC appears only once in set.
8020Sstevel@tonic-gate 		 */
8030Sstevel@tonic-gate 		if ((n = set->ks_req[i].kr_picnum) != -1) {
8040Sstevel@tonic-gate 			if ((bitmap & (1 << n)) != 0)
8050Sstevel@tonic-gate 				return (-1);
8060Sstevel@tonic-gate 			bitmap |= (1 << n);
8070Sstevel@tonic-gate 		}
8080Sstevel@tonic-gate 
8090Sstevel@tonic-gate 		/*
8100Sstevel@tonic-gate 		 * Make sure the requested index falls within the range of all
8110Sstevel@tonic-gate 		 * requests.
8120Sstevel@tonic-gate 		 */
8130Sstevel@tonic-gate 		if (rp->kr_index < 0 || rp->kr_index >= set->ks_nreqs)
8140Sstevel@tonic-gate 			return (-1);
8150Sstevel@tonic-gate 
8160Sstevel@tonic-gate 		/*
8170Sstevel@tonic-gate 		 * Make sure there are no unknown flags.
8180Sstevel@tonic-gate 		 */
8190Sstevel@tonic-gate 		if (KCPC_REQ_VALID_FLAGS(rp->kr_flags) == 0)
8200Sstevel@tonic-gate 			return (CPC_REQ_INVALID_FLAGS);
8210Sstevel@tonic-gate 	}
8220Sstevel@tonic-gate 
8230Sstevel@tonic-gate 	return (0);
8240Sstevel@tonic-gate }
8250Sstevel@tonic-gate 
8260Sstevel@tonic-gate static struct cb_ops cb_ops = {
8270Sstevel@tonic-gate 	kcpc_open,
8280Sstevel@tonic-gate 	kcpc_close,
8290Sstevel@tonic-gate 	nodev,		/* strategy */
8300Sstevel@tonic-gate 	nodev,		/* print */
8310Sstevel@tonic-gate 	nodev,		/* dump */
8320Sstevel@tonic-gate 	nodev,		/* read */
8330Sstevel@tonic-gate 	nodev,		/* write */
8340Sstevel@tonic-gate 	kcpc_ioctl,
8350Sstevel@tonic-gate 	nodev,		/* devmap */
8360Sstevel@tonic-gate 	nodev,		/* mmap */
8370Sstevel@tonic-gate 	nodev,		/* segmap */
8380Sstevel@tonic-gate 	nochpoll,	/* poll */
8390Sstevel@tonic-gate 	ddi_prop_op,
8400Sstevel@tonic-gate 	NULL,
8410Sstevel@tonic-gate 	D_NEW | D_MP
8420Sstevel@tonic-gate };
8430Sstevel@tonic-gate 
8440Sstevel@tonic-gate /*ARGSUSED*/
8450Sstevel@tonic-gate static int
8460Sstevel@tonic-gate kcpc_probe(dev_info_t *devi)
8470Sstevel@tonic-gate {
8480Sstevel@tonic-gate 	return (DDI_PROBE_SUCCESS);
8490Sstevel@tonic-gate }
8500Sstevel@tonic-gate 
8510Sstevel@tonic-gate static dev_info_t *kcpc_devi;
8520Sstevel@tonic-gate 
8530Sstevel@tonic-gate static int
8540Sstevel@tonic-gate kcpc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
8550Sstevel@tonic-gate {
8560Sstevel@tonic-gate 	if (cmd != DDI_ATTACH)
8570Sstevel@tonic-gate 		return (DDI_FAILURE);
8580Sstevel@tonic-gate 	kcpc_devi = devi;
8590Sstevel@tonic-gate 	return (ddi_create_minor_node(devi, "shared", S_IFCHR,
8600Sstevel@tonic-gate 	    KCPC_MINOR_SHARED, DDI_PSEUDO, 0));
8610Sstevel@tonic-gate }
8620Sstevel@tonic-gate 
8630Sstevel@tonic-gate /*ARGSUSED*/
8640Sstevel@tonic-gate static int
8650Sstevel@tonic-gate kcpc_getinfo(dev_info_t *devi, ddi_info_cmd_t cmd, void *arg, void **result)
8660Sstevel@tonic-gate {
8670Sstevel@tonic-gate 	switch (cmd) {
8680Sstevel@tonic-gate 	case DDI_INFO_DEVT2DEVINFO:
8690Sstevel@tonic-gate 		switch (getminor((dev_t)arg)) {
8700Sstevel@tonic-gate 		case KCPC_MINOR_SHARED:
8710Sstevel@tonic-gate 			*result = kcpc_devi;
8720Sstevel@tonic-gate 			return (DDI_SUCCESS);
8730Sstevel@tonic-gate 		default:
8740Sstevel@tonic-gate 			break;
8750Sstevel@tonic-gate 		}
8760Sstevel@tonic-gate 		break;
8770Sstevel@tonic-gate 	case DDI_INFO_DEVT2INSTANCE:
8780Sstevel@tonic-gate 		*result = 0;
8790Sstevel@tonic-gate 		return (DDI_SUCCESS);
8800Sstevel@tonic-gate 	default:
8810Sstevel@tonic-gate 		break;
8820Sstevel@tonic-gate 	}
8830Sstevel@tonic-gate 
8840Sstevel@tonic-gate 	return (DDI_FAILURE);
8850Sstevel@tonic-gate }
8860Sstevel@tonic-gate 
8870Sstevel@tonic-gate static struct dev_ops dev_ops = {
8880Sstevel@tonic-gate 	DEVO_REV,
8890Sstevel@tonic-gate 	0,
8900Sstevel@tonic-gate 	kcpc_getinfo,
8910Sstevel@tonic-gate 	nulldev,		/* identify */
8920Sstevel@tonic-gate 	kcpc_probe,
8930Sstevel@tonic-gate 	kcpc_attach,
8940Sstevel@tonic-gate 	nodev,			/* detach */
8950Sstevel@tonic-gate 	nodev,			/* reset */
8960Sstevel@tonic-gate 	&cb_ops,
897*7656SSherry.Moore@Sun.COM 	(struct bus_ops *)0,
898*7656SSherry.Moore@Sun.COM 	NULL,
899*7656SSherry.Moore@Sun.COM 	ddi_quiesce_not_needed,		/* quiesce */
9000Sstevel@tonic-gate };
9010Sstevel@tonic-gate 
9020Sstevel@tonic-gate static struct modldrv modldrv = {
9030Sstevel@tonic-gate 	&mod_driverops,
9047240Srh87107 	"cpc sampling driver",
9050Sstevel@tonic-gate 	&dev_ops
9060Sstevel@tonic-gate };
9070Sstevel@tonic-gate 
9080Sstevel@tonic-gate static struct sysent cpc_sysent = {
9090Sstevel@tonic-gate 	5,
9100Sstevel@tonic-gate 	SE_NOUNLOAD | SE_ARGC | SE_32RVAL1,
9110Sstevel@tonic-gate 	cpc
9120Sstevel@tonic-gate };
9130Sstevel@tonic-gate 
9140Sstevel@tonic-gate static struct modlsys modlsys = {
9150Sstevel@tonic-gate 	&mod_syscallops,
9160Sstevel@tonic-gate 	"cpc sampling system call",
9170Sstevel@tonic-gate 	&cpc_sysent
9180Sstevel@tonic-gate };
9190Sstevel@tonic-gate 
9200Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
9210Sstevel@tonic-gate static struct modlsys modlsys32 = {
9220Sstevel@tonic-gate 	&mod_syscallops32,
9230Sstevel@tonic-gate 	"32-bit cpc sampling system call",
9240Sstevel@tonic-gate 	&cpc_sysent
9250Sstevel@tonic-gate };
9260Sstevel@tonic-gate #endif
9270Sstevel@tonic-gate 
9280Sstevel@tonic-gate static struct modlinkage modl = {
9290Sstevel@tonic-gate 	MODREV_1,
9300Sstevel@tonic-gate 	&modldrv,
9310Sstevel@tonic-gate 	&modlsys,
9320Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
9330Sstevel@tonic-gate 	&modlsys32,
9340Sstevel@tonic-gate #endif
9350Sstevel@tonic-gate };
9360Sstevel@tonic-gate 
9370Sstevel@tonic-gate static void
9380Sstevel@tonic-gate kcpc_init(void)
9390Sstevel@tonic-gate {
9400Sstevel@tonic-gate 	long hash;
9410Sstevel@tonic-gate 
9420Sstevel@tonic-gate 	rw_init(&kcpc_cpuctx_lock, NULL, RW_DEFAULT, NULL);
9430Sstevel@tonic-gate 	for (hash = 0; hash < CPC_HASH_BUCKETS; hash++)
9440Sstevel@tonic-gate 		mutex_init(&kcpc_ctx_llock[hash],
9450Sstevel@tonic-gate 		    NULL, MUTEX_DRIVER, (void *)(uintptr_t)15);
9460Sstevel@tonic-gate }
9470Sstevel@tonic-gate 
9480Sstevel@tonic-gate static void
9490Sstevel@tonic-gate kcpc_fini(void)
9500Sstevel@tonic-gate {
9510Sstevel@tonic-gate 	long hash;
9520Sstevel@tonic-gate 
9530Sstevel@tonic-gate 	for (hash = 0; hash < CPC_HASH_BUCKETS; hash++)
9540Sstevel@tonic-gate 		mutex_destroy(&kcpc_ctx_llock[hash]);
9550Sstevel@tonic-gate 	rw_destroy(&kcpc_cpuctx_lock);
9560Sstevel@tonic-gate }
9570Sstevel@tonic-gate 
9580Sstevel@tonic-gate int
9590Sstevel@tonic-gate _init(void)
9600Sstevel@tonic-gate {
9610Sstevel@tonic-gate 	int ret;
9620Sstevel@tonic-gate 
9630Sstevel@tonic-gate 	if (kcpc_hw_load_pcbe() != 0)
9640Sstevel@tonic-gate 		return (ENOTSUP);
9650Sstevel@tonic-gate 
9660Sstevel@tonic-gate 	kcpc_init();
9670Sstevel@tonic-gate 	if ((ret = mod_install(&modl)) != 0)
9680Sstevel@tonic-gate 		kcpc_fini();
9690Sstevel@tonic-gate 	return (ret);
9700Sstevel@tonic-gate }
9710Sstevel@tonic-gate 
9720Sstevel@tonic-gate int
9730Sstevel@tonic-gate _fini(void)
9740Sstevel@tonic-gate {
9750Sstevel@tonic-gate 	int ret;
9760Sstevel@tonic-gate 
9770Sstevel@tonic-gate 	if ((ret = mod_remove(&modl)) == 0)
9780Sstevel@tonic-gate 		kcpc_fini();
9790Sstevel@tonic-gate 	return (ret);
9800Sstevel@tonic-gate }
9810Sstevel@tonic-gate 
9820Sstevel@tonic-gate int
9830Sstevel@tonic-gate _info(struct modinfo *mi)
9840Sstevel@tonic-gate {
9850Sstevel@tonic-gate 	return (mod_info(&modl, mi));
9860Sstevel@tonic-gate }
987