xref: /onnv-gate/usr/src/uts/common/io/lvm/notify/md_notify.c (revision 4932:cac85bf517af)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/systm.h>
29 #include <sys/cmn_err.h>
30 #include <sys/errno.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/signal.h>
34 #include <sys/modctl.h>
35 #include <sys/proc.h>
36 #include <sys/lvm/mdvar.h>
37 
38 md_ops_t		event_md_ops;
39 #ifndef lint
40 char			_depends_on[] = "drv/md";
41 md_ops_t		*md_interface_ops = &event_md_ops;
42 #endif
43 
44 extern void		sigintr();
45 extern void		sigunintr();
46 extern md_set_t		md_set[];
47 
48 extern kmutex_t		md_mx;		/* used to md global stuff */
49 extern kcondvar_t	md_cv;		/* md_status events */
50 extern int		md_status;
51 extern clock_t		md_hz;
52 extern md_event_queue_t	*md_event_queue;
53 static void		 md_reaper();
54 extern void		md_clear_named_service();
55 
56 /* event handler stuff */
57 kmutex_t		md_eventq_mx;
58 int			md_reap_count = 32;	/* check for pid alive */
59 int			md_reap = 0;
60 int			md_max_notify_queue = 512;
61 int			md_reap_off = 0;	/* non-zero turns off reap */
62 /* don't allow module to be unloaded until all pending ops are complete */
63 int			global_lock_wait_cnt = 0;
64 
65 static int
md_flush_queue(md_event_queue_t * queue)66 md_flush_queue(md_event_queue_t *queue)
67 {
68 	md_event_t	*element, *next_element;
69 	/*
70 	 * if there is something waiting on it and the
71 	 * process/pid no longer exist then signal the defunct
72 	 * process continue on to clean this up later.
73 	 */
74 	if (queue->mdn_waiting)
75 		return (1);
76 	/*
77 	 * this pid no longer exists blow it away
78 	 * first remove any entries, then unlink it and lastly
79 	 * free it.
80 	 */
81 	element = queue->mdn_front;
82 	while (element) {
83 		next_element = element->mdn_next;
84 		kmem_free(element, sizeof (md_event_t));
85 		element = next_element;
86 	}
87 	queue->mdn_front = queue->mdn_tail = NULL;
88 	return (0);
89 
90 }
91 
92 static void
md_put_event(md_tags_t tag,set_t sp,md_dev64_t dev,int event,u_longlong_t user)93 md_put_event(md_tags_t tag, set_t sp, md_dev64_t dev, int event,
94 		u_longlong_t user)
95 {
96 
97 	md_event_queue_t	*queue;
98 	md_event_t		*entry;
99 
100 	if (!md_event_queue)
101 		return;
102 
103 	mutex_enter(&md_eventq_mx);
104 	for (queue = md_event_queue; queue; queue = queue->mdn_nextq) {
105 		if (queue->mdn_size >= md_max_notify_queue) {
106 			ASSERT(queue->mdn_front != NULL);
107 			ASSERT(queue->mdn_front->mdn_next != NULL);
108 			entry =  queue->mdn_front;
109 			queue->mdn_front = entry->mdn_next;
110 			queue->mdn_size--;
111 			queue->mdn_flags |= MD_EVENT_QUEUE_FULL;
112 		} else
113 			entry = (md_event_t *)kmem_alloc(sizeof (md_event_t),
114 			    KM_NOSLEEP);
115 		if (entry == NULL) {
116 			queue->mdn_flags |= MD_EVENT_QUEUE_INVALID;
117 			continue;
118 		}
119 		entry->mdn_tag = tag;
120 		entry->mdn_set = sp;
121 		entry->mdn_dev = dev;
122 		entry->mdn_event = event;
123 		entry->mdn_user = user;
124 		entry->mdn_next = NULL;
125 		uniqtime(&entry->mdn_time);
126 		if (queue->mdn_front == NULL) {
127 			queue->mdn_front = entry;
128 			queue->mdn_tail = entry;
129 		} else {
130 			queue->mdn_tail->mdn_next = entry;
131 			queue->mdn_tail = entry;
132 		}
133 		if (queue->mdn_waiting)
134 			cv_signal(&queue->mdn_cv);
135 
136 		queue->mdn_size++;
137 	}
138 	md_reap++;
139 	mutex_exit(&md_eventq_mx);
140 
141 	if (md_reap > md_reap_count)
142 		md_reaper();
143 }
144 
145 static void
md_reaper()146 md_reaper()
147 {
148 	md_event_queue_t	*next = md_event_queue;
149 	md_event_queue_t	*present, *last = NULL;
150 
151 	if (md_event_queue == NULL || md_reap_off)
152 		return;
153 
154 	mutex_enter(&md_eventq_mx);
155 	while (next) {
156 		present = next;
157 		next = present->mdn_nextq;
158 
159 		/* check for long term event queue */
160 		if (present->mdn_flags & MD_EVENT_QUEUE_PERM) {
161 			last = present;
162 			continue;
163 		}
164 
165 		/* check to see if the pid is still alive */
166 		if (!md_checkpid(present->mdn_pid, present->mdn_proc))
167 			present->mdn_flags |= MD_EVENT_QUEUE_DESTROY;
168 
169 		/* see if queue is a "marked queue" if so destroy */
170 		if (! (present->mdn_flags & MD_EVENT_QUEUE_DESTROY)) {
171 			last = present;
172 			continue;
173 		}
174 
175 		/* yeeeha   blow this one away */
176 		present->mdn_pid = 0;
177 		present->mdn_proc = NULL;
178 		/*
179 		 * if there is something waiting on it and the
180 		 * process/pid no longer exist then signal the defunct
181 		 * process continue on to clean this up later.
182 		 */
183 		if (md_flush_queue(present)) {
184 			present->mdn_flags = MD_EVENT_QUEUE_DESTROY;
185 			cv_broadcast(&present->mdn_cv);
186 			last = present;
187 			continue;
188 		}
189 		/* remove the entry */
190 		if (last == NULL)
191 			md_event_queue = next;
192 		else
193 			last->mdn_nextq = next;
194 		cv_destroy(&present->mdn_cv);
195 		kmem_free(present, sizeof (md_event_queue_t));
196 	}
197 	md_reap = 0;
198 	mutex_exit(&md_eventq_mx);
199 }
200 
201 /* ARGSUSED */
202 static int
notify_halt(md_haltcmd_t cmd,set_t setno)203 notify_halt(md_haltcmd_t cmd, set_t setno)
204 {
205 	md_event_queue_t	*orig_queue, *queue, *queue_free;
206 	int			i;
207 
208 
209 	switch (cmd) {
210 	    case MD_HALT_CLOSE:
211 	    case MD_HALT_OPEN:
212 	    case MD_HALT_DOIT:
213 	    case MD_HALT_CHECK:
214 
215 		return (0);
216 
217 	    case MD_HALT_UNLOAD:
218 		if (setno != MD_LOCAL_SET)
219 			return (1);
220 		mutex_enter(&md_eventq_mx);
221 		if (md_event_queue == NULL) {
222 			mutex_exit(&md_eventq_mx);
223 			return (0);
224 		}
225 
226 		orig_queue = md_event_queue;
227 		md_event_queue = NULL;
228 		for (i = 0; i < MD_NOTIFY_HALT_TRIES; i++) {
229 			for (queue = orig_queue; queue;
230 			    queue = queue->mdn_nextq) {
231 				if (queue->mdn_waiting == 0) {
232 					continue;
233 				}
234 				queue->mdn_flags = MD_EVENT_QUEUE_DESTROY;
235 				mutex_exit(&md_eventq_mx);
236 				cv_broadcast(&queue->mdn_cv);
237 				delay(md_hz);
238 				mutex_enter(&md_eventq_mx);
239 			}
240 		}
241 		for (queue = orig_queue; queue; ) {
242 			if (md_flush_queue(queue)) {
243 				cmn_err(CE_WARN, "md: queue not freed");
244 				mutex_exit(&md_eventq_mx);
245 				return (1);
246 			}
247 			queue_free = queue;
248 			queue = queue->mdn_nextq;
249 			kmem_free(queue_free, sizeof (md_event_queue_t));
250 		}
251 		md_event_queue = NULL;
252 		mutex_exit(&md_eventq_mx);
253 		return (0);
254 
255 	    default:
256 		return (1);
257 	}
258 }
259 
260 static md_event_queue_t *
md_find_event_queue(char * q_name,int lock)261 md_find_event_queue(char *q_name, int lock)
262 {
263 	md_event_queue_t	*event_q = md_event_queue;
264 
265 	if (lock)
266 		mutex_enter(&md_eventq_mx);
267 	ASSERT(MUTEX_HELD(&md_eventq_mx));
268 	while (event_q) {
269 		if ((*event_q->mdn_name != *q_name) ||
270 		    (event_q->mdn_flags & MD_EVENT_QUEUE_DESTROY)) {
271 			event_q = event_q->mdn_nextq;
272 			continue;
273 		}
274 
275 		if (bcmp(q_name, event_q->mdn_name, MD_NOTIFY_NAME_SIZE) == 0)
276 			break;
277 		event_q = event_q->mdn_nextq;
278 	}
279 	if (lock)
280 		mutex_exit(&md_eventq_mx);
281 
282 	return ((md_event_queue_t *)event_q);
283 }
284 
285 static intptr_t
notify_interface(md_event_cmds_t cmd,md_tags_t tag,set_t set,md_dev64_t dev,md_event_type_t event)286 notify_interface(md_event_cmds_t cmd, md_tags_t tag, set_t set, md_dev64_t dev,
287 		md_event_type_t event)
288 {
289 	switch (cmd) {
290 	    case EQ_PUT:
291 		md_put_event(tag, set, dev, event, (u_longlong_t)0);
292 		break;
293 	    default:
294 		return (-1);
295 	}
296 	return (0);
297 }
298 
299 static int
notify_fillin_empty_ioctl(void * data,void * ioctl_in,size_t sz,int mode)300 notify_fillin_empty_ioctl(void *data, void *ioctl_in, size_t sz,
301 		int mode)
302 {
303 
304 	int	err;
305 	md_event_ioctl_t	*ioctl = (md_event_ioctl_t *)data;
306 
307 
308 	ioctl->mdn_event = EQ_EMPTY;
309 	ioctl->mdn_tag = TAG_EMPTY;
310 	ioctl->mdn_set = MD_ALLSETS;
311 	ioctl->mdn_dev =  MD_ALLDEVS;
312 	uniqtime32(&ioctl->mdn_time);
313 	ioctl->mdn_user = (u_longlong_t)0;
314 	err = ddi_copyout(data, ioctl_in, sz, mode);
315 	return (err);
316 }
317 
318 /*
319  * md_wait_for_event:
320  * IOLOCK_RETURN which drops the md_ioctl_lock is called in this
321  * routine to enable other mdioctls to enter the kernel while this
322  * thread of execution waits on an event.  When that event occurs, the
323  * stopped thread wakes and continues and md_ioctl_lock must be
324  * reacquired.  Even though md_ioctl_lock is interruptable, we choose
325  * to ignore EINTR.  Returning w/o acquiring md_ioctl_lock is
326  * catastrophic since it breaks down ioctl single threading.
327  *
328  * Return: 0	md_eventq_mx held
329  *	   EINTR md_eventq_mx no held
330  *	   Always returns with IOCTL lock held
331  */
332 
333 static int
md_wait_for_event(md_event_queue_t * event_queue,void * ioctl_in,md_event_ioctl_t * ioctl,size_t sz,int mode,IOLOCK * lockp)334 md_wait_for_event(md_event_queue_t *event_queue, void *ioctl_in,
335 		md_event_ioctl_t *ioctl, size_t sz,
336 		int mode, IOLOCK *lockp)
337 {
338 	int rval = 0;
339 
340 	while (event_queue->mdn_front == NULL) {
341 		event_queue->mdn_waiting++;
342 		(void) IOLOCK_RETURN(0, lockp);
343 		rval = cv_wait_sig(&event_queue->mdn_cv, &md_eventq_mx);
344 		event_queue->mdn_waiting--;
345 		if ((rval == 0) || (event_queue->mdn_flags &
346 					MD_EVENT_QUEUE_DESTROY)) {
347 			global_lock_wait_cnt++;
348 			mutex_exit(&md_eventq_mx);
349 			/* reenable single threading of ioctls */
350 			while (md_ioctl_lock_enter() == EINTR);
351 
352 			(void) notify_fillin_empty_ioctl
353 			    ((void *)ioctl, ioctl_in, sz, mode);
354 			mutex_enter(&md_eventq_mx);
355 			global_lock_wait_cnt--;
356 			mutex_exit(&md_eventq_mx);
357 			return (EINTR);
358 		}
359 		/*
360 		 * reacquire single threading ioctls. Drop eventq_mutex
361 		 * since md_ioctl_lock_enter can sleep.
362 		 */
363 		global_lock_wait_cnt++;
364 		mutex_exit(&md_eventq_mx);
365 		while (md_ioctl_lock_enter() == EINTR);
366 		mutex_enter(&md_eventq_mx);
367 		global_lock_wait_cnt--;
368 	}
369 	return (0);
370 }
371 
372 /* ARGSUSED */
373 static int
notify_ioctl(dev_t dev,int icmd,void * ioctl_in,int mode,IOLOCK * lockp)374 notify_ioctl(dev_t dev, int icmd, void *ioctl_in, int mode, IOLOCK *lockp)
375 {
376 	int			cmd;
377 	pid_t			pid;
378 	md_event_queue_t	*event_queue;
379 	md_event_t		*event;
380 	cred_t			*credp;
381 	char			*q_name;
382 	int			err = 0;
383 	size_t			sz = 0;
384 	md_event_ioctl_t	*ioctl;
385 
386 	sz = sizeof (*ioctl);
387 	ioctl = kmem_zalloc(sz, KM_SLEEP);
388 
389 	if (ddi_copyin(ioctl_in, (void *)ioctl, sz, mode)) {
390 		err = EFAULT;
391 		goto out;
392 	}
393 
394 	if (ioctl->mdn_rev != MD_NOTIFY_REVISION) {
395 		err = EINVAL;
396 		goto out;
397 	}
398 	if (ioctl->mdn_magic != MD_EVENT_ID) {
399 		err = EINVAL;
400 		goto out;
401 	}
402 
403 	pid = md_getpid();
404 	cmd = ioctl->mdn_cmd;
405 	q_name = ioctl->mdn_name;
406 
407 	if (((cmd != EQ_OFF) && (cmd != EQ_ON)) && (md_reap >= md_reap_count))
408 		md_reaper();
409 
410 	if ((cmd != EQ_ON) && (cmd != EQ_PUT)) {
411 		mutex_enter(&md_eventq_mx);
412 		if ((event_queue = md_find_event_queue(q_name, 0)) == NULL) {
413 			mutex_exit(&md_eventq_mx);
414 			(void) notify_fillin_empty_ioctl
415 			    ((void *)ioctl, ioctl_in, sz, mode);
416 			err = ENOENT;
417 			goto out;
418 		}
419 	}
420 
421 	switch (cmd) {
422 	    case EQ_ON:
423 
424 		md_reaper();
425 
426 		mutex_enter(&md_eventq_mx);
427 		if (md_find_event_queue(q_name, 0) != NULL) {
428 			mutex_exit(&md_eventq_mx);
429 			err = EEXIST;
430 			break;
431 		}
432 
433 		/* allocate and initialize queue head */
434 		event_queue = (md_event_queue_t *)
435 		    kmem_alloc(sizeof (md_event_queue_t), KM_NOSLEEP);
436 		if (event_queue == NULL) {
437 			mutex_exit(&md_eventq_mx);
438 			err = ENOMEM;
439 			break;
440 		}
441 
442 		cv_init(&event_queue->mdn_cv, NULL, CV_DEFAULT, NULL);
443 
444 		event_queue->mdn_flags = 0;
445 		event_queue->mdn_pid = pid;
446 		event_queue->mdn_proc = md_getproc();
447 		event_queue->mdn_size = 0;
448 		event_queue->mdn_front = NULL;
449 		event_queue->mdn_tail = NULL;
450 		event_queue->mdn_waiting = 0;
451 		event_queue->mdn_nextq = NULL;
452 		credp = ddi_get_cred();
453 		event_queue->mdn_uid = crgetuid(credp);
454 		bcopy(q_name, event_queue->mdn_name,
455 		    MD_NOTIFY_NAME_SIZE);
456 		if (ioctl->mdn_flags & EQ_Q_PERM)
457 			event_queue->mdn_flags |= MD_EVENT_QUEUE_PERM;
458 
459 		/* link into the list of event queues */
460 		if (md_event_queue != NULL)
461 			event_queue->mdn_nextq = md_event_queue;
462 		md_event_queue = event_queue;
463 		mutex_exit(&md_eventq_mx);
464 		err = 0;
465 		break;
466 
467 	    case EQ_OFF:
468 
469 		if (md_event_queue == NULL)
470 			return (ENOENT);
471 
472 		event_queue->mdn_flags = MD_EVENT_QUEUE_DESTROY;
473 		event_queue->mdn_pid = 0;
474 		event_queue->mdn_proc = NULL;
475 
476 		if (event_queue->mdn_waiting != 0)
477 			cv_broadcast(&event_queue->mdn_cv);
478 
479 		/*
480 		 * force the reaper to delete this when it has no process
481 		 * waiting on it.
482 		 */
483 		mutex_exit(&md_eventq_mx);
484 		md_reaper();
485 		err = 0;
486 		break;
487 
488 	    case EQ_GET_NOWAIT:
489 	    case EQ_GET_WAIT:
490 		if (cmd == EQ_GET_WAIT) {
491 			err = md_wait_for_event(event_queue, ioctl_in,
492 			    ioctl, sz, mode, lockp);
493 			if (err == EINTR)
494 				goto out;
495 		}
496 		ASSERT(MUTEX_HELD(&md_eventq_mx));
497 		if (event_queue->mdn_flags &
498 		    (MD_EVENT_QUEUE_INVALID | MD_EVENT_QUEUE_FULL)) {
499 			event_queue->mdn_flags &=
500 			    ~(MD_EVENT_QUEUE_INVALID | MD_EVENT_QUEUE_FULL);
501 			mutex_exit(&md_eventq_mx);
502 			err = notify_fillin_empty_ioctl
503 			    ((void *)ioctl, ioctl_in, sz, mode);
504 			ioctl->mdn_event = EQ_NOTIFY_LOST;
505 			err = ddi_copyout((void *)ioctl, ioctl_in, sz, mode);
506 			if (err)
507 				err = EFAULT;
508 			goto out;
509 		}
510 		if (event_queue->mdn_front != NULL) {
511 			event = event_queue->mdn_front;
512 			event_queue->mdn_front = event->mdn_next;
513 			event_queue->mdn_size--;
514 			if (event_queue->mdn_front == NULL)
515 				event_queue->mdn_tail = NULL;
516 			mutex_exit(&md_eventq_mx);
517 			ioctl->mdn_tag = event->mdn_tag;
518 			ioctl->mdn_set = event->mdn_set;
519 			ioctl->mdn_dev = event->mdn_dev;
520 			ioctl->mdn_event = event->mdn_event;
521 			ioctl->mdn_user = event->mdn_user;
522 			ioctl->mdn_time.tv_sec = event->mdn_time.tv_sec;
523 			ioctl->mdn_time.tv_usec =
524 					event->mdn_time.tv_usec;
525 			kmem_free(event, sizeof (md_event_t));
526 			err = ddi_copyout((void *)ioctl, ioctl_in, sz, mode);
527 			if (err)
528 				err = EFAULT;
529 			goto out;
530 		} else { /* no elements on queue */
531 			mutex_exit(&md_eventq_mx);
532 			err = notify_fillin_empty_ioctl
533 			    ((void *)ioctl, ioctl_in, sz, mode);
534 			if (err)
535 				err = EFAULT;
536 		}
537 
538 		if (cmd == EQ_GET_NOWAIT)
539 			err = EAGAIN;
540 		goto out;
541 
542 	    case EQ_PUT:
543 
544 		if (!md_event_queue) {
545 			err = ENOENT;
546 			break;
547 		}
548 		md_put_event(ioctl->mdn_tag,
549 			ioctl->mdn_set, ioctl->mdn_dev,
550 			ioctl->mdn_event, ioctl->mdn_user);
551 		err = 0;
552 		goto out;
553 
554 	    default:
555 		err = EINVAL;
556 		goto out;
557 	}
558 
559 out:
560 	kmem_free(ioctl, sz);
561 	return (err);
562 }
563 
564 /*
565  * Turn orphaned queue off for testing purposes.
566  */
567 
568 static intptr_t
notify_reap_off()569 notify_reap_off()
570 {
571 	md_reap_off = 1;
572 	return (0);
573 }
574 
575 /*
576  * Turn reaping back on.
577  */
578 
579 static intptr_t
notify_reap_on()580 notify_reap_on()
581 {
582 	md_reap_off = 0;
583 	return (0);
584 }
585 
586 /*
587  * Return information that is used to test the notification feature.
588  */
589 
590 static intptr_t
notify_test_stats(md_notify_stats_t * stats)591 notify_test_stats(md_notify_stats_t *stats)
592 {
593 	stats->mds_eventq_mx = &md_eventq_mx;
594 	stats->mds_reap_count = md_reap_count;
595 	stats->mds_reap = md_reap;
596 	stats->mds_max_queue = md_max_notify_queue;
597 	stats->mds_reap_off = md_reap_off;
598 	return (0);
599 }
600 
601 /*
602  * put this stuff at end so we don't have to create forward
603  * references for everything
604  */
605 static struct modlmisc modlmisc = {
606 	&mod_miscops,
607 	"Solaris Volume Manager notification module"
608 };
609 
610 static struct modlinkage modlinkage = {
611 	MODREV_1, (void *)&modlmisc, NULL
612 };
613 
614 static md_named_services_t notify_services[] = {
615 	{notify_interface,	"notify interface"},
616 	{notify_reap_off,	MD_NOTIFY_REAP_OFF},
617 	{notify_reap_on,	MD_NOTIFY_REAP_ON},
618 	{notify_test_stats,	MD_NOTIFY_TEST_STATS},
619 	{NULL,			0}
620 };
621 
622 md_ops_t event_md_ops = {
623 	NULL,			/* open */
624 	NULL,			/* close */
625 	NULL,			/* strategy */
626 	NULL,			/* print */
627 	NULL,			/* dump */
628 	NULL,			/* read */
629 	NULL,			/* write */
630 	notify_ioctl,		/* event_ioctls, */
631 	NULL,			/* snarf */
632 	notify_halt,		/* halt */
633 	NULL,			/* aread */
634 	NULL,			/* awrite */
635 	NULL,			/* import set */
636 	notify_services		/* named_services */
637 };
638 
639 int
_init()640 _init()
641 {
642 	md_event_queue = NULL;
643 	mutex_init(&md_eventq_mx, NULL, MUTEX_DEFAULT, NULL);
644 	return (mod_install(&modlinkage));
645 }
646 
647 int
_fini()648 _fini()
649 {
650 	int		err = 0;
651 
652 	/*
653 	 * Don't allow the module to be unloaded while there is a thread
654 	 * of execution that is waiting for a global lock.
655 	 */
656 	if (global_lock_wait_cnt > 0)
657 		return (EBUSY);
658 
659 	if ((err = mod_remove(&modlinkage)) != 0)
660 		return (err);
661 
662 	md_clear_named_service();
663 	mutex_destroy(&md_eventq_mx);
664 	return (err);
665 }
666 
667 int
_info(struct modinfo * modinfop)668 _info(struct modinfo *modinfop)
669 {
670 	return (mod_info(&modlinkage, modinfop));
671 }
672