xref: /onnv-gate/usr/src/uts/common/io/lvm/notify/md_notify.c (revision 4932:cac85bf517af)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51366Spetede  * Common Development and Distribution License (the "License").
61366Spetede  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*4932Spetede  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate #include <sys/systm.h>
290Sstevel@tonic-gate #include <sys/cmn_err.h>
300Sstevel@tonic-gate #include <sys/errno.h>
310Sstevel@tonic-gate #include <sys/ddi.h>
320Sstevel@tonic-gate #include <sys/sunddi.h>
330Sstevel@tonic-gate #include <sys/signal.h>
340Sstevel@tonic-gate #include <sys/modctl.h>
350Sstevel@tonic-gate #include <sys/proc.h>
360Sstevel@tonic-gate #include <sys/lvm/mdvar.h>
370Sstevel@tonic-gate 
380Sstevel@tonic-gate md_ops_t		event_md_ops;
390Sstevel@tonic-gate #ifndef lint
401366Spetede char			_depends_on[] = "drv/md";
410Sstevel@tonic-gate md_ops_t		*md_interface_ops = &event_md_ops;
420Sstevel@tonic-gate #endif
430Sstevel@tonic-gate 
440Sstevel@tonic-gate extern void		sigintr();
450Sstevel@tonic-gate extern void		sigunintr();
460Sstevel@tonic-gate extern md_set_t		md_set[];
470Sstevel@tonic-gate 
480Sstevel@tonic-gate extern kmutex_t		md_mx;		/* used to md global stuff */
490Sstevel@tonic-gate extern kcondvar_t	md_cv;		/* md_status events */
500Sstevel@tonic-gate extern int		md_status;
510Sstevel@tonic-gate extern clock_t		md_hz;
520Sstevel@tonic-gate extern md_event_queue_t	*md_event_queue;
530Sstevel@tonic-gate static void		 md_reaper();
540Sstevel@tonic-gate extern void		md_clear_named_service();
550Sstevel@tonic-gate 
560Sstevel@tonic-gate /* event handler stuff */
570Sstevel@tonic-gate kmutex_t		md_eventq_mx;
580Sstevel@tonic-gate int			md_reap_count = 32;	/* check for pid alive */
590Sstevel@tonic-gate int			md_reap = 0;
600Sstevel@tonic-gate int			md_max_notify_queue = 512;
610Sstevel@tonic-gate int			md_reap_off = 0;	/* non-zero turns off reap */
620Sstevel@tonic-gate /* don't allow module to be unloaded until all pending ops are complete */
630Sstevel@tonic-gate int			global_lock_wait_cnt = 0;
640Sstevel@tonic-gate 
650Sstevel@tonic-gate static int
md_flush_queue(md_event_queue_t * queue)660Sstevel@tonic-gate md_flush_queue(md_event_queue_t *queue)
670Sstevel@tonic-gate {
680Sstevel@tonic-gate 	md_event_t	*element, *next_element;
690Sstevel@tonic-gate 	/*
700Sstevel@tonic-gate 	 * if there is something waiting on it and the
710Sstevel@tonic-gate 	 * process/pid no longer exist then signal the defunct
720Sstevel@tonic-gate 	 * process continue on to clean this up later.
730Sstevel@tonic-gate 	 */
740Sstevel@tonic-gate 	if (queue->mdn_waiting)
750Sstevel@tonic-gate 		return (1);
760Sstevel@tonic-gate 	/*
770Sstevel@tonic-gate 	 * this pid no longer exists blow it away
780Sstevel@tonic-gate 	 * first remove any entries, then unlink it and lastly
790Sstevel@tonic-gate 	 * free it.
800Sstevel@tonic-gate 	 */
810Sstevel@tonic-gate 	element = queue->mdn_front;
820Sstevel@tonic-gate 	while (element) {
830Sstevel@tonic-gate 		next_element = element->mdn_next;
840Sstevel@tonic-gate 		kmem_free(element, sizeof (md_event_t));
850Sstevel@tonic-gate 		element = next_element;
860Sstevel@tonic-gate 	}
870Sstevel@tonic-gate 	queue->mdn_front = queue->mdn_tail = NULL;
880Sstevel@tonic-gate 	return (0);
890Sstevel@tonic-gate 
900Sstevel@tonic-gate }
910Sstevel@tonic-gate 
920Sstevel@tonic-gate static void
md_put_event(md_tags_t tag,set_t sp,md_dev64_t dev,int event,u_longlong_t user)930Sstevel@tonic-gate md_put_event(md_tags_t tag, set_t sp, md_dev64_t dev, int event,
940Sstevel@tonic-gate 		u_longlong_t user)
950Sstevel@tonic-gate {
960Sstevel@tonic-gate 
970Sstevel@tonic-gate 	md_event_queue_t	*queue;
980Sstevel@tonic-gate 	md_event_t		*entry;
990Sstevel@tonic-gate 
1000Sstevel@tonic-gate 	if (!md_event_queue)
1010Sstevel@tonic-gate 		return;
1020Sstevel@tonic-gate 
1030Sstevel@tonic-gate 	mutex_enter(&md_eventq_mx);
1040Sstevel@tonic-gate 	for (queue = md_event_queue; queue; queue = queue->mdn_nextq) {
1050Sstevel@tonic-gate 		if (queue->mdn_size >= md_max_notify_queue) {
1060Sstevel@tonic-gate 			ASSERT(queue->mdn_front != NULL);
1070Sstevel@tonic-gate 			ASSERT(queue->mdn_front->mdn_next != NULL);
1080Sstevel@tonic-gate 			entry =  queue->mdn_front;
1090Sstevel@tonic-gate 			queue->mdn_front = entry->mdn_next;
1100Sstevel@tonic-gate 			queue->mdn_size--;
1110Sstevel@tonic-gate 			queue->mdn_flags |= MD_EVENT_QUEUE_FULL;
1120Sstevel@tonic-gate 		} else
1130Sstevel@tonic-gate 			entry = (md_event_t *)kmem_alloc(sizeof (md_event_t),
1140Sstevel@tonic-gate 			    KM_NOSLEEP);
1150Sstevel@tonic-gate 		if (entry == NULL) {
1160Sstevel@tonic-gate 			queue->mdn_flags |= MD_EVENT_QUEUE_INVALID;
1170Sstevel@tonic-gate 			continue;
1180Sstevel@tonic-gate 		}
1190Sstevel@tonic-gate 		entry->mdn_tag = tag;
1200Sstevel@tonic-gate 		entry->mdn_set = sp;
1210Sstevel@tonic-gate 		entry->mdn_dev = dev;
1220Sstevel@tonic-gate 		entry->mdn_event = event;
1230Sstevel@tonic-gate 		entry->mdn_user = user;
1240Sstevel@tonic-gate 		entry->mdn_next = NULL;
1250Sstevel@tonic-gate 		uniqtime(&entry->mdn_time);
1260Sstevel@tonic-gate 		if (queue->mdn_front == NULL) {
1270Sstevel@tonic-gate 			queue->mdn_front = entry;
1280Sstevel@tonic-gate 			queue->mdn_tail = entry;
1290Sstevel@tonic-gate 		} else {
1300Sstevel@tonic-gate 			queue->mdn_tail->mdn_next = entry;
1310Sstevel@tonic-gate 			queue->mdn_tail = entry;
1320Sstevel@tonic-gate 		}
1330Sstevel@tonic-gate 		if (queue->mdn_waiting)
1340Sstevel@tonic-gate 			cv_signal(&queue->mdn_cv);
1350Sstevel@tonic-gate 
1360Sstevel@tonic-gate 		queue->mdn_size++;
1370Sstevel@tonic-gate 	}
1380Sstevel@tonic-gate 	md_reap++;
1390Sstevel@tonic-gate 	mutex_exit(&md_eventq_mx);
1400Sstevel@tonic-gate 
1410Sstevel@tonic-gate 	if (md_reap > md_reap_count)
1420Sstevel@tonic-gate 		md_reaper();
1430Sstevel@tonic-gate }
1440Sstevel@tonic-gate 
1450Sstevel@tonic-gate static void
md_reaper()1460Sstevel@tonic-gate md_reaper()
1470Sstevel@tonic-gate {
1480Sstevel@tonic-gate 	md_event_queue_t	*next = md_event_queue;
1490Sstevel@tonic-gate 	md_event_queue_t	*present, *last = NULL;
1500Sstevel@tonic-gate 
1510Sstevel@tonic-gate 	if (md_event_queue == NULL || md_reap_off)
1520Sstevel@tonic-gate 		return;
1530Sstevel@tonic-gate 
1540Sstevel@tonic-gate 	mutex_enter(&md_eventq_mx);
1550Sstevel@tonic-gate 	while (next) {
1560Sstevel@tonic-gate 		present = next;
1570Sstevel@tonic-gate 		next = present->mdn_nextq;
1580Sstevel@tonic-gate 
1590Sstevel@tonic-gate 		/* check for long term event queue */
1600Sstevel@tonic-gate 		if (present->mdn_flags & MD_EVENT_QUEUE_PERM) {
1610Sstevel@tonic-gate 			last = present;
1620Sstevel@tonic-gate 			continue;
1630Sstevel@tonic-gate 		}
1640Sstevel@tonic-gate 
1650Sstevel@tonic-gate 		/* check to see if the pid is still alive */
1660Sstevel@tonic-gate 		if (!md_checkpid(present->mdn_pid, present->mdn_proc))
1670Sstevel@tonic-gate 			present->mdn_flags |= MD_EVENT_QUEUE_DESTROY;
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate 		/* see if queue is a "marked queue" if so destroy */
1700Sstevel@tonic-gate 		if (! (present->mdn_flags & MD_EVENT_QUEUE_DESTROY)) {
1710Sstevel@tonic-gate 			last = present;
1720Sstevel@tonic-gate 			continue;
1730Sstevel@tonic-gate 		}
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate 		/* yeeeha   blow this one away */
1760Sstevel@tonic-gate 		present->mdn_pid = 0;
1770Sstevel@tonic-gate 		present->mdn_proc = NULL;
1780Sstevel@tonic-gate 		/*
1790Sstevel@tonic-gate 		 * if there is something waiting on it and the
1800Sstevel@tonic-gate 		 * process/pid no longer exist then signal the defunct
1810Sstevel@tonic-gate 		 * process continue on to clean this up later.
1820Sstevel@tonic-gate 		 */
1830Sstevel@tonic-gate 		if (md_flush_queue(present)) {
1840Sstevel@tonic-gate 			present->mdn_flags = MD_EVENT_QUEUE_DESTROY;
1850Sstevel@tonic-gate 			cv_broadcast(&present->mdn_cv);
1860Sstevel@tonic-gate 			last = present;
1870Sstevel@tonic-gate 			continue;
1880Sstevel@tonic-gate 		}
1890Sstevel@tonic-gate 		/* remove the entry */
1900Sstevel@tonic-gate 		if (last == NULL)
1910Sstevel@tonic-gate 			md_event_queue = next;
1920Sstevel@tonic-gate 		else
1930Sstevel@tonic-gate 			last->mdn_nextq = next;
1940Sstevel@tonic-gate 		cv_destroy(&present->mdn_cv);
1950Sstevel@tonic-gate 		kmem_free(present, sizeof (md_event_queue_t));
1960Sstevel@tonic-gate 	}
1970Sstevel@tonic-gate 	md_reap = 0;
1980Sstevel@tonic-gate 	mutex_exit(&md_eventq_mx);
1990Sstevel@tonic-gate }
2000Sstevel@tonic-gate 
2010Sstevel@tonic-gate /* ARGSUSED */
2020Sstevel@tonic-gate static int
notify_halt(md_haltcmd_t cmd,set_t setno)2030Sstevel@tonic-gate notify_halt(md_haltcmd_t cmd, set_t setno)
2040Sstevel@tonic-gate {
2050Sstevel@tonic-gate 	md_event_queue_t	*orig_queue, *queue, *queue_free;
2060Sstevel@tonic-gate 	int			i;
2070Sstevel@tonic-gate 
2080Sstevel@tonic-gate 
2090Sstevel@tonic-gate 	switch (cmd) {
2100Sstevel@tonic-gate 	    case MD_HALT_CLOSE:
2110Sstevel@tonic-gate 	    case MD_HALT_OPEN:
2120Sstevel@tonic-gate 	    case MD_HALT_DOIT:
2130Sstevel@tonic-gate 	    case MD_HALT_CHECK:
2140Sstevel@tonic-gate 
2150Sstevel@tonic-gate 		return (0);
2160Sstevel@tonic-gate 
2170Sstevel@tonic-gate 	    case MD_HALT_UNLOAD:
2180Sstevel@tonic-gate 		if (setno != MD_LOCAL_SET)
2190Sstevel@tonic-gate 			return (1);
2200Sstevel@tonic-gate 		mutex_enter(&md_eventq_mx);
2210Sstevel@tonic-gate 		if (md_event_queue == NULL) {
2220Sstevel@tonic-gate 			mutex_exit(&md_eventq_mx);
2230Sstevel@tonic-gate 			return (0);
2240Sstevel@tonic-gate 		}
2250Sstevel@tonic-gate 
2260Sstevel@tonic-gate 		orig_queue = md_event_queue;
2270Sstevel@tonic-gate 		md_event_queue = NULL;
2280Sstevel@tonic-gate 		for (i = 0; i < MD_NOTIFY_HALT_TRIES; i++) {
2290Sstevel@tonic-gate 			for (queue = orig_queue; queue;
2300Sstevel@tonic-gate 			    queue = queue->mdn_nextq) {
2310Sstevel@tonic-gate 				if (queue->mdn_waiting == 0) {
2320Sstevel@tonic-gate 					continue;
2330Sstevel@tonic-gate 				}
2340Sstevel@tonic-gate 				queue->mdn_flags = MD_EVENT_QUEUE_DESTROY;
2350Sstevel@tonic-gate 				mutex_exit(&md_eventq_mx);
2360Sstevel@tonic-gate 				cv_broadcast(&queue->mdn_cv);
2370Sstevel@tonic-gate 				delay(md_hz);
2380Sstevel@tonic-gate 				mutex_enter(&md_eventq_mx);
2390Sstevel@tonic-gate 			}
2400Sstevel@tonic-gate 		}
2410Sstevel@tonic-gate 		for (queue = orig_queue; queue; ) {
2420Sstevel@tonic-gate 			if (md_flush_queue(queue)) {
2430Sstevel@tonic-gate 				cmn_err(CE_WARN, "md: queue not freed");
2440Sstevel@tonic-gate 				mutex_exit(&md_eventq_mx);
2450Sstevel@tonic-gate 				return (1);
2460Sstevel@tonic-gate 			}
2470Sstevel@tonic-gate 			queue_free = queue;
2480Sstevel@tonic-gate 			queue = queue->mdn_nextq;
2490Sstevel@tonic-gate 			kmem_free(queue_free, sizeof (md_event_queue_t));
2500Sstevel@tonic-gate 		}
2510Sstevel@tonic-gate 		md_event_queue = NULL;
2520Sstevel@tonic-gate 		mutex_exit(&md_eventq_mx);
2530Sstevel@tonic-gate 		return (0);
2540Sstevel@tonic-gate 
2550Sstevel@tonic-gate 	    default:
2560Sstevel@tonic-gate 		return (1);
2570Sstevel@tonic-gate 	}
2580Sstevel@tonic-gate }
2590Sstevel@tonic-gate 
2600Sstevel@tonic-gate static md_event_queue_t *
md_find_event_queue(char * q_name,int lock)2610Sstevel@tonic-gate md_find_event_queue(char *q_name, int lock)
2620Sstevel@tonic-gate {
2630Sstevel@tonic-gate 	md_event_queue_t	*event_q = md_event_queue;
2640Sstevel@tonic-gate 
2650Sstevel@tonic-gate 	if (lock)
2660Sstevel@tonic-gate 		mutex_enter(&md_eventq_mx);
2670Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&md_eventq_mx));
2680Sstevel@tonic-gate 	while (event_q) {
2690Sstevel@tonic-gate 		if ((*event_q->mdn_name != *q_name) ||
2700Sstevel@tonic-gate 		    (event_q->mdn_flags & MD_EVENT_QUEUE_DESTROY)) {
2710Sstevel@tonic-gate 			event_q = event_q->mdn_nextq;
2720Sstevel@tonic-gate 			continue;
2730Sstevel@tonic-gate 		}
2740Sstevel@tonic-gate 
2750Sstevel@tonic-gate 		if (bcmp(q_name, event_q->mdn_name, MD_NOTIFY_NAME_SIZE) == 0)
2760Sstevel@tonic-gate 			break;
2770Sstevel@tonic-gate 		event_q = event_q->mdn_nextq;
2780Sstevel@tonic-gate 	}
2790Sstevel@tonic-gate 	if (lock)
2800Sstevel@tonic-gate 		mutex_exit(&md_eventq_mx);
2810Sstevel@tonic-gate 
2820Sstevel@tonic-gate 	return ((md_event_queue_t *)event_q);
2830Sstevel@tonic-gate }
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate static intptr_t
notify_interface(md_event_cmds_t cmd,md_tags_t tag,set_t set,md_dev64_t dev,md_event_type_t event)2860Sstevel@tonic-gate notify_interface(md_event_cmds_t cmd, md_tags_t tag, set_t set, md_dev64_t dev,
2870Sstevel@tonic-gate 		md_event_type_t event)
2880Sstevel@tonic-gate {
2890Sstevel@tonic-gate 	switch (cmd) {
2900Sstevel@tonic-gate 	    case EQ_PUT:
2910Sstevel@tonic-gate 		md_put_event(tag, set, dev, event, (u_longlong_t)0);
2920Sstevel@tonic-gate 		break;
2930Sstevel@tonic-gate 	    default:
2940Sstevel@tonic-gate 		return (-1);
2950Sstevel@tonic-gate 	}
2960Sstevel@tonic-gate 	return (0);
2970Sstevel@tonic-gate }
2980Sstevel@tonic-gate 
2990Sstevel@tonic-gate static int
notify_fillin_empty_ioctl(void * data,void * ioctl_in,size_t sz,int mode)3000Sstevel@tonic-gate notify_fillin_empty_ioctl(void *data, void *ioctl_in, size_t sz,
3010Sstevel@tonic-gate 		int mode)
3020Sstevel@tonic-gate {
3030Sstevel@tonic-gate 
3040Sstevel@tonic-gate 	int	err;
3050Sstevel@tonic-gate 	md_event_ioctl_t	*ioctl = (md_event_ioctl_t *)data;
3060Sstevel@tonic-gate 
3070Sstevel@tonic-gate 
3080Sstevel@tonic-gate 	ioctl->mdn_event = EQ_EMPTY;
3090Sstevel@tonic-gate 	ioctl->mdn_tag = TAG_EMPTY;
3100Sstevel@tonic-gate 	ioctl->mdn_set = MD_ALLSETS;
3110Sstevel@tonic-gate 	ioctl->mdn_dev =  MD_ALLDEVS;
3120Sstevel@tonic-gate 	uniqtime32(&ioctl->mdn_time);
3130Sstevel@tonic-gate 	ioctl->mdn_user = (u_longlong_t)0;
3140Sstevel@tonic-gate 	err = ddi_copyout(data, ioctl_in, sz, mode);
3150Sstevel@tonic-gate 	return (err);
3160Sstevel@tonic-gate }
3170Sstevel@tonic-gate 
3180Sstevel@tonic-gate /*
3190Sstevel@tonic-gate  * md_wait_for_event:
3200Sstevel@tonic-gate  * IOLOCK_RETURN which drops the md_ioctl_lock is called in this
3210Sstevel@tonic-gate  * routine to enable other mdioctls to enter the kernel while this
3220Sstevel@tonic-gate  * thread of execution waits on an event.  When that event occurs, the
3230Sstevel@tonic-gate  * stopped thread wakes and continues and md_ioctl_lock must be
3240Sstevel@tonic-gate  * reacquired.  Even though md_ioctl_lock is interruptable, we choose
3250Sstevel@tonic-gate  * to ignore EINTR.  Returning w/o acquiring md_ioctl_lock is
3260Sstevel@tonic-gate  * catastrophic since it breaks down ioctl single threading.
3270Sstevel@tonic-gate  *
3280Sstevel@tonic-gate  * Return: 0	md_eventq_mx held
3290Sstevel@tonic-gate  *	   EINTR md_eventq_mx no held
3300Sstevel@tonic-gate  *	   Always returns with IOCTL lock held
3310Sstevel@tonic-gate  */
3320Sstevel@tonic-gate 
3330Sstevel@tonic-gate static int
md_wait_for_event(md_event_queue_t * event_queue,void * ioctl_in,md_event_ioctl_t * ioctl,size_t sz,int mode,IOLOCK * lockp)3340Sstevel@tonic-gate md_wait_for_event(md_event_queue_t *event_queue, void *ioctl_in,
3350Sstevel@tonic-gate 		md_event_ioctl_t *ioctl, size_t sz,
3360Sstevel@tonic-gate 		int mode, IOLOCK *lockp)
3370Sstevel@tonic-gate {
3380Sstevel@tonic-gate 	int rval = 0;
3390Sstevel@tonic-gate 
3400Sstevel@tonic-gate 	while (event_queue->mdn_front == NULL) {
3410Sstevel@tonic-gate 		event_queue->mdn_waiting++;
3420Sstevel@tonic-gate 		(void) IOLOCK_RETURN(0, lockp);
3430Sstevel@tonic-gate 		rval = cv_wait_sig(&event_queue->mdn_cv, &md_eventq_mx);
3440Sstevel@tonic-gate 		event_queue->mdn_waiting--;
3450Sstevel@tonic-gate 		if ((rval == 0) || (event_queue->mdn_flags &
3460Sstevel@tonic-gate 					MD_EVENT_QUEUE_DESTROY)) {
3470Sstevel@tonic-gate 			global_lock_wait_cnt++;
3480Sstevel@tonic-gate 			mutex_exit(&md_eventq_mx);
3490Sstevel@tonic-gate 			/* reenable single threading of ioctls */
3500Sstevel@tonic-gate 			while (md_ioctl_lock_enter() == EINTR);
3510Sstevel@tonic-gate 
3520Sstevel@tonic-gate 			(void) notify_fillin_empty_ioctl
3530Sstevel@tonic-gate 			    ((void *)ioctl, ioctl_in, sz, mode);
3540Sstevel@tonic-gate 			mutex_enter(&md_eventq_mx);
3550Sstevel@tonic-gate 			global_lock_wait_cnt--;
3560Sstevel@tonic-gate 			mutex_exit(&md_eventq_mx);
3570Sstevel@tonic-gate 			return (EINTR);
3580Sstevel@tonic-gate 		}
3590Sstevel@tonic-gate 		/*
3600Sstevel@tonic-gate 		 * reacquire single threading ioctls. Drop eventq_mutex
3610Sstevel@tonic-gate 		 * since md_ioctl_lock_enter can sleep.
3620Sstevel@tonic-gate 		 */
3630Sstevel@tonic-gate 		global_lock_wait_cnt++;
3640Sstevel@tonic-gate 		mutex_exit(&md_eventq_mx);
3650Sstevel@tonic-gate 		while (md_ioctl_lock_enter() == EINTR);
3660Sstevel@tonic-gate 		mutex_enter(&md_eventq_mx);
3670Sstevel@tonic-gate 		global_lock_wait_cnt--;
3680Sstevel@tonic-gate 	}
3690Sstevel@tonic-gate 	return (0);
3700Sstevel@tonic-gate }
3710Sstevel@tonic-gate 
3720Sstevel@tonic-gate /* ARGSUSED */
3730Sstevel@tonic-gate static int
notify_ioctl(dev_t dev,int icmd,void * ioctl_in,int mode,IOLOCK * lockp)3740Sstevel@tonic-gate notify_ioctl(dev_t dev, int icmd, void *ioctl_in, int mode, IOLOCK *lockp)
3750Sstevel@tonic-gate {
3760Sstevel@tonic-gate 	int			cmd;
3770Sstevel@tonic-gate 	pid_t			pid;
3780Sstevel@tonic-gate 	md_event_queue_t	*event_queue;
3790Sstevel@tonic-gate 	md_event_t		*event;
3800Sstevel@tonic-gate 	cred_t			*credp;
3810Sstevel@tonic-gate 	char			*q_name;
3820Sstevel@tonic-gate 	int			err = 0;
3830Sstevel@tonic-gate 	size_t			sz = 0;
3840Sstevel@tonic-gate 	md_event_ioctl_t	*ioctl;
3850Sstevel@tonic-gate 
3860Sstevel@tonic-gate 	sz = sizeof (*ioctl);
3870Sstevel@tonic-gate 	ioctl = kmem_zalloc(sz, KM_SLEEP);
3880Sstevel@tonic-gate 
3890Sstevel@tonic-gate 	if (ddi_copyin(ioctl_in, (void *)ioctl, sz, mode)) {
3900Sstevel@tonic-gate 		err = EFAULT;
3910Sstevel@tonic-gate 		goto out;
3920Sstevel@tonic-gate 	}
3930Sstevel@tonic-gate 
3940Sstevel@tonic-gate 	if (ioctl->mdn_rev != MD_NOTIFY_REVISION) {
3950Sstevel@tonic-gate 		err = EINVAL;
3960Sstevel@tonic-gate 		goto out;
3970Sstevel@tonic-gate 	}
3980Sstevel@tonic-gate 	if (ioctl->mdn_magic != MD_EVENT_ID) {
3990Sstevel@tonic-gate 		err = EINVAL;
4000Sstevel@tonic-gate 		goto out;
4010Sstevel@tonic-gate 	}
4020Sstevel@tonic-gate 
4030Sstevel@tonic-gate 	pid = md_getpid();
4040Sstevel@tonic-gate 	cmd = ioctl->mdn_cmd;
4050Sstevel@tonic-gate 	q_name = ioctl->mdn_name;
4060Sstevel@tonic-gate 
4070Sstevel@tonic-gate 	if (((cmd != EQ_OFF) && (cmd != EQ_ON)) && (md_reap >= md_reap_count))
4080Sstevel@tonic-gate 		md_reaper();
4090Sstevel@tonic-gate 
4100Sstevel@tonic-gate 	if ((cmd != EQ_ON) && (cmd != EQ_PUT)) {
4110Sstevel@tonic-gate 		mutex_enter(&md_eventq_mx);
4120Sstevel@tonic-gate 		if ((event_queue = md_find_event_queue(q_name, 0)) == NULL) {
4130Sstevel@tonic-gate 			mutex_exit(&md_eventq_mx);
4140Sstevel@tonic-gate 			(void) notify_fillin_empty_ioctl
4150Sstevel@tonic-gate 			    ((void *)ioctl, ioctl_in, sz, mode);
4160Sstevel@tonic-gate 			err = ENOENT;
4170Sstevel@tonic-gate 			goto out;
4180Sstevel@tonic-gate 		}
4190Sstevel@tonic-gate 	}
4200Sstevel@tonic-gate 
4210Sstevel@tonic-gate 	switch (cmd) {
4220Sstevel@tonic-gate 	    case EQ_ON:
4230Sstevel@tonic-gate 
4240Sstevel@tonic-gate 		md_reaper();
4250Sstevel@tonic-gate 
4260Sstevel@tonic-gate 		mutex_enter(&md_eventq_mx);
4270Sstevel@tonic-gate 		if (md_find_event_queue(q_name, 0) != NULL) {
4280Sstevel@tonic-gate 			mutex_exit(&md_eventq_mx);
4290Sstevel@tonic-gate 			err = EEXIST;
4300Sstevel@tonic-gate 			break;
4310Sstevel@tonic-gate 		}
4320Sstevel@tonic-gate 
4330Sstevel@tonic-gate 		/* allocate and initialize queue head */
4340Sstevel@tonic-gate 		event_queue = (md_event_queue_t *)
4350Sstevel@tonic-gate 		    kmem_alloc(sizeof (md_event_queue_t), KM_NOSLEEP);
4360Sstevel@tonic-gate 		if (event_queue == NULL) {
4370Sstevel@tonic-gate 			mutex_exit(&md_eventq_mx);
4380Sstevel@tonic-gate 			err = ENOMEM;
4390Sstevel@tonic-gate 			break;
4400Sstevel@tonic-gate 		}
4410Sstevel@tonic-gate 
4420Sstevel@tonic-gate 		cv_init(&event_queue->mdn_cv, NULL, CV_DEFAULT, NULL);
4430Sstevel@tonic-gate 
4440Sstevel@tonic-gate 		event_queue->mdn_flags = 0;
4450Sstevel@tonic-gate 		event_queue->mdn_pid = pid;
4460Sstevel@tonic-gate 		event_queue->mdn_proc = md_getproc();
4470Sstevel@tonic-gate 		event_queue->mdn_size = 0;
4480Sstevel@tonic-gate 		event_queue->mdn_front = NULL;
4490Sstevel@tonic-gate 		event_queue->mdn_tail = NULL;
4500Sstevel@tonic-gate 		event_queue->mdn_waiting = 0;
4510Sstevel@tonic-gate 		event_queue->mdn_nextq = NULL;
4520Sstevel@tonic-gate 		credp = ddi_get_cred();
4530Sstevel@tonic-gate 		event_queue->mdn_uid = crgetuid(credp);
4540Sstevel@tonic-gate 		bcopy(q_name, event_queue->mdn_name,
4550Sstevel@tonic-gate 		    MD_NOTIFY_NAME_SIZE);
4560Sstevel@tonic-gate 		if (ioctl->mdn_flags & EQ_Q_PERM)
4570Sstevel@tonic-gate 			event_queue->mdn_flags |= MD_EVENT_QUEUE_PERM;
4580Sstevel@tonic-gate 
4590Sstevel@tonic-gate 		/* link into the list of event queues */
4600Sstevel@tonic-gate 		if (md_event_queue != NULL)
4610Sstevel@tonic-gate 			event_queue->mdn_nextq = md_event_queue;
4620Sstevel@tonic-gate 		md_event_queue = event_queue;
4630Sstevel@tonic-gate 		mutex_exit(&md_eventq_mx);
4640Sstevel@tonic-gate 		err = 0;
4650Sstevel@tonic-gate 		break;
4660Sstevel@tonic-gate 
4670Sstevel@tonic-gate 	    case EQ_OFF:
4680Sstevel@tonic-gate 
4690Sstevel@tonic-gate 		if (md_event_queue == NULL)
4700Sstevel@tonic-gate 			return (ENOENT);
4710Sstevel@tonic-gate 
4720Sstevel@tonic-gate 		event_queue->mdn_flags = MD_EVENT_QUEUE_DESTROY;
4730Sstevel@tonic-gate 		event_queue->mdn_pid = 0;
4740Sstevel@tonic-gate 		event_queue->mdn_proc = NULL;
4750Sstevel@tonic-gate 
4760Sstevel@tonic-gate 		if (event_queue->mdn_waiting != 0)
4770Sstevel@tonic-gate 			cv_broadcast(&event_queue->mdn_cv);
4780Sstevel@tonic-gate 
4790Sstevel@tonic-gate 		/*
4800Sstevel@tonic-gate 		 * force the reaper to delete this when it has no process
4810Sstevel@tonic-gate 		 * waiting on it.
4820Sstevel@tonic-gate 		 */
4830Sstevel@tonic-gate 		mutex_exit(&md_eventq_mx);
4840Sstevel@tonic-gate 		md_reaper();
4850Sstevel@tonic-gate 		err = 0;
4860Sstevel@tonic-gate 		break;
4870Sstevel@tonic-gate 
4880Sstevel@tonic-gate 	    case EQ_GET_NOWAIT:
4890Sstevel@tonic-gate 	    case EQ_GET_WAIT:
4900Sstevel@tonic-gate 		if (cmd == EQ_GET_WAIT) {
4910Sstevel@tonic-gate 			err = md_wait_for_event(event_queue, ioctl_in,
4920Sstevel@tonic-gate 			    ioctl, sz, mode, lockp);
4930Sstevel@tonic-gate 			if (err == EINTR)
4940Sstevel@tonic-gate 				goto out;
4950Sstevel@tonic-gate 		}
4960Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(&md_eventq_mx));
4970Sstevel@tonic-gate 		if (event_queue->mdn_flags &
4980Sstevel@tonic-gate 		    (MD_EVENT_QUEUE_INVALID | MD_EVENT_QUEUE_FULL)) {
4990Sstevel@tonic-gate 			event_queue->mdn_flags &=
5000Sstevel@tonic-gate 			    ~(MD_EVENT_QUEUE_INVALID | MD_EVENT_QUEUE_FULL);
5010Sstevel@tonic-gate 			mutex_exit(&md_eventq_mx);
5020Sstevel@tonic-gate 			err = notify_fillin_empty_ioctl
5030Sstevel@tonic-gate 			    ((void *)ioctl, ioctl_in, sz, mode);
5040Sstevel@tonic-gate 			ioctl->mdn_event = EQ_NOTIFY_LOST;
5050Sstevel@tonic-gate 			err = ddi_copyout((void *)ioctl, ioctl_in, sz, mode);
5060Sstevel@tonic-gate 			if (err)
5070Sstevel@tonic-gate 				err = EFAULT;
5080Sstevel@tonic-gate 			goto out;
5090Sstevel@tonic-gate 		}
5100Sstevel@tonic-gate 		if (event_queue->mdn_front != NULL) {
5110Sstevel@tonic-gate 			event = event_queue->mdn_front;
5120Sstevel@tonic-gate 			event_queue->mdn_front = event->mdn_next;
5130Sstevel@tonic-gate 			event_queue->mdn_size--;
5140Sstevel@tonic-gate 			if (event_queue->mdn_front == NULL)
5150Sstevel@tonic-gate 				event_queue->mdn_tail = NULL;
5160Sstevel@tonic-gate 			mutex_exit(&md_eventq_mx);
5170Sstevel@tonic-gate 			ioctl->mdn_tag = event->mdn_tag;
5180Sstevel@tonic-gate 			ioctl->mdn_set = event->mdn_set;
5190Sstevel@tonic-gate 			ioctl->mdn_dev = event->mdn_dev;
5200Sstevel@tonic-gate 			ioctl->mdn_event = event->mdn_event;
5210Sstevel@tonic-gate 			ioctl->mdn_user = event->mdn_user;
5220Sstevel@tonic-gate 			ioctl->mdn_time.tv_sec = event->mdn_time.tv_sec;
5230Sstevel@tonic-gate 			ioctl->mdn_time.tv_usec =
5240Sstevel@tonic-gate 					event->mdn_time.tv_usec;
5250Sstevel@tonic-gate 			kmem_free(event, sizeof (md_event_t));
5260Sstevel@tonic-gate 			err = ddi_copyout((void *)ioctl, ioctl_in, sz, mode);
5270Sstevel@tonic-gate 			if (err)
5280Sstevel@tonic-gate 				err = EFAULT;
5290Sstevel@tonic-gate 			goto out;
5300Sstevel@tonic-gate 		} else { /* no elements on queue */
5310Sstevel@tonic-gate 			mutex_exit(&md_eventq_mx);
5320Sstevel@tonic-gate 			err = notify_fillin_empty_ioctl
5330Sstevel@tonic-gate 			    ((void *)ioctl, ioctl_in, sz, mode);
5340Sstevel@tonic-gate 			if (err)
5350Sstevel@tonic-gate 				err = EFAULT;
5360Sstevel@tonic-gate 		}
5370Sstevel@tonic-gate 
5380Sstevel@tonic-gate 		if (cmd == EQ_GET_NOWAIT)
5390Sstevel@tonic-gate 			err = EAGAIN;
5400Sstevel@tonic-gate 		goto out;
5410Sstevel@tonic-gate 
5420Sstevel@tonic-gate 	    case EQ_PUT:
5430Sstevel@tonic-gate 
5440Sstevel@tonic-gate 		if (!md_event_queue) {
5450Sstevel@tonic-gate 			err = ENOENT;
5460Sstevel@tonic-gate 			break;
5470Sstevel@tonic-gate 		}
5480Sstevel@tonic-gate 		md_put_event(ioctl->mdn_tag,
5490Sstevel@tonic-gate 			ioctl->mdn_set, ioctl->mdn_dev,
5500Sstevel@tonic-gate 			ioctl->mdn_event, ioctl->mdn_user);
5510Sstevel@tonic-gate 		err = 0;
5520Sstevel@tonic-gate 		goto out;
5530Sstevel@tonic-gate 
5540Sstevel@tonic-gate 	    default:
5550Sstevel@tonic-gate 		err = EINVAL;
5560Sstevel@tonic-gate 		goto out;
5570Sstevel@tonic-gate 	}
5580Sstevel@tonic-gate 
5590Sstevel@tonic-gate out:
5600Sstevel@tonic-gate 	kmem_free(ioctl, sz);
5610Sstevel@tonic-gate 	return (err);
5620Sstevel@tonic-gate }
5630Sstevel@tonic-gate 
5640Sstevel@tonic-gate /*
5650Sstevel@tonic-gate  * Turn orphaned queue off for testing purposes.
5660Sstevel@tonic-gate  */
5670Sstevel@tonic-gate 
5680Sstevel@tonic-gate static intptr_t
notify_reap_off()5690Sstevel@tonic-gate notify_reap_off()
5700Sstevel@tonic-gate {
5710Sstevel@tonic-gate 	md_reap_off = 1;
5720Sstevel@tonic-gate 	return (0);
5730Sstevel@tonic-gate }
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate /*
5760Sstevel@tonic-gate  * Turn reaping back on.
5770Sstevel@tonic-gate  */
5780Sstevel@tonic-gate 
5790Sstevel@tonic-gate static intptr_t
notify_reap_on()5800Sstevel@tonic-gate notify_reap_on()
5810Sstevel@tonic-gate {
5820Sstevel@tonic-gate 	md_reap_off = 0;
5830Sstevel@tonic-gate 	return (0);
5840Sstevel@tonic-gate }
5850Sstevel@tonic-gate 
5860Sstevel@tonic-gate /*
5870Sstevel@tonic-gate  * Return information that is used to test the notification feature.
5880Sstevel@tonic-gate  */
5890Sstevel@tonic-gate 
5900Sstevel@tonic-gate static intptr_t
notify_test_stats(md_notify_stats_t * stats)5910Sstevel@tonic-gate notify_test_stats(md_notify_stats_t *stats)
5920Sstevel@tonic-gate {
5930Sstevel@tonic-gate 	stats->mds_eventq_mx = &md_eventq_mx;
5940Sstevel@tonic-gate 	stats->mds_reap_count = md_reap_count;
5950Sstevel@tonic-gate 	stats->mds_reap = md_reap;
5960Sstevel@tonic-gate 	stats->mds_max_queue = md_max_notify_queue;
5970Sstevel@tonic-gate 	stats->mds_reap_off = md_reap_off;
5980Sstevel@tonic-gate 	return (0);
5990Sstevel@tonic-gate }
6000Sstevel@tonic-gate 
6010Sstevel@tonic-gate /*
6020Sstevel@tonic-gate  * put this stuff at end so we don't have to create forward
6030Sstevel@tonic-gate  * references for everything
6040Sstevel@tonic-gate  */
6050Sstevel@tonic-gate static struct modlmisc modlmisc = {
6060Sstevel@tonic-gate 	&mod_miscops,
607*4932Spetede 	"Solaris Volume Manager notification module"
6080Sstevel@tonic-gate };
6090Sstevel@tonic-gate 
6100Sstevel@tonic-gate static struct modlinkage modlinkage = {
6110Sstevel@tonic-gate 	MODREV_1, (void *)&modlmisc, NULL
6120Sstevel@tonic-gate };
6130Sstevel@tonic-gate 
6140Sstevel@tonic-gate static md_named_services_t notify_services[] = {
6150Sstevel@tonic-gate 	{notify_interface,	"notify interface"},
6160Sstevel@tonic-gate 	{notify_reap_off,	MD_NOTIFY_REAP_OFF},
6170Sstevel@tonic-gate 	{notify_reap_on,	MD_NOTIFY_REAP_ON},
6180Sstevel@tonic-gate 	{notify_test_stats,	MD_NOTIFY_TEST_STATS},
6190Sstevel@tonic-gate 	{NULL,			0}
6200Sstevel@tonic-gate };
6210Sstevel@tonic-gate 
6220Sstevel@tonic-gate md_ops_t event_md_ops = {
6230Sstevel@tonic-gate 	NULL,			/* open */
6240Sstevel@tonic-gate 	NULL,			/* close */
6250Sstevel@tonic-gate 	NULL,			/* strategy */
6260Sstevel@tonic-gate 	NULL,			/* print */
6270Sstevel@tonic-gate 	NULL,			/* dump */
6280Sstevel@tonic-gate 	NULL,			/* read */
6290Sstevel@tonic-gate 	NULL,			/* write */
6300Sstevel@tonic-gate 	notify_ioctl,		/* event_ioctls, */
6310Sstevel@tonic-gate 	NULL,			/* snarf */
6320Sstevel@tonic-gate 	notify_halt,		/* halt */
6330Sstevel@tonic-gate 	NULL,			/* aread */
6340Sstevel@tonic-gate 	NULL,			/* awrite */
6350Sstevel@tonic-gate 	NULL,			/* import set */
6360Sstevel@tonic-gate 	notify_services		/* named_services */
6370Sstevel@tonic-gate };
6380Sstevel@tonic-gate 
6390Sstevel@tonic-gate int
_init()6400Sstevel@tonic-gate _init()
6410Sstevel@tonic-gate {
6420Sstevel@tonic-gate 	md_event_queue = NULL;
6430Sstevel@tonic-gate 	mutex_init(&md_eventq_mx, NULL, MUTEX_DEFAULT, NULL);
6440Sstevel@tonic-gate 	return (mod_install(&modlinkage));
6450Sstevel@tonic-gate }
6460Sstevel@tonic-gate 
6470Sstevel@tonic-gate int
_fini()6480Sstevel@tonic-gate _fini()
6490Sstevel@tonic-gate {
6500Sstevel@tonic-gate 	int		err = 0;
6510Sstevel@tonic-gate 
6520Sstevel@tonic-gate 	/*
6530Sstevel@tonic-gate 	 * Don't allow the module to be unloaded while there is a thread
6540Sstevel@tonic-gate 	 * of execution that is waiting for a global lock.
6550Sstevel@tonic-gate 	 */
6560Sstevel@tonic-gate 	if (global_lock_wait_cnt > 0)
6570Sstevel@tonic-gate 		return (EBUSY);
6580Sstevel@tonic-gate 
6590Sstevel@tonic-gate 	if ((err = mod_remove(&modlinkage)) != 0)
6600Sstevel@tonic-gate 		return (err);
6610Sstevel@tonic-gate 
6620Sstevel@tonic-gate 	md_clear_named_service();
6630Sstevel@tonic-gate 	mutex_destroy(&md_eventq_mx);
6640Sstevel@tonic-gate 	return (err);
6650Sstevel@tonic-gate }
6660Sstevel@tonic-gate 
6670Sstevel@tonic-gate int
_info(struct modinfo * modinfop)6680Sstevel@tonic-gate _info(struct modinfo *modinfop)
6690Sstevel@tonic-gate {
6700Sstevel@tonic-gate 	return (mod_info(&modlinkage, modinfop));
6710Sstevel@tonic-gate }
672