1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #pragma ident "%Z%%M% %I% %E% SMI"
27
28 #include <sys/systm.h>
29 #include <sys/cmn_err.h>
30 #include <sys/errno.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/signal.h>
34 #include <sys/modctl.h>
35 #include <sys/proc.h>
36 #include <sys/lvm/mdvar.h>
37
38 md_ops_t event_md_ops;
39 #ifndef lint
40 char _depends_on[] = "drv/md";
41 md_ops_t *md_interface_ops = &event_md_ops;
42 #endif
43
44 extern void sigintr();
45 extern void sigunintr();
46 extern md_set_t md_set[];
47
48 extern kmutex_t md_mx; /* used to md global stuff */
49 extern kcondvar_t md_cv; /* md_status events */
50 extern int md_status;
51 extern clock_t md_hz;
52 extern md_event_queue_t *md_event_queue;
53 static void md_reaper();
54 extern void md_clear_named_service();
55
56 /* event handler stuff */
57 kmutex_t md_eventq_mx;
58 int md_reap_count = 32; /* check for pid alive */
59 int md_reap = 0;
60 int md_max_notify_queue = 512;
61 int md_reap_off = 0; /* non-zero turns off reap */
62 /* don't allow module to be unloaded until all pending ops are complete */
63 int global_lock_wait_cnt = 0;
64
65 static int
md_flush_queue(md_event_queue_t * queue)66 md_flush_queue(md_event_queue_t *queue)
67 {
68 md_event_t *element, *next_element;
69 /*
70 * if there is something waiting on it and the
71 * process/pid no longer exist then signal the defunct
72 * process continue on to clean this up later.
73 */
74 if (queue->mdn_waiting)
75 return (1);
76 /*
77 * this pid no longer exists blow it away
78 * first remove any entries, then unlink it and lastly
79 * free it.
80 */
81 element = queue->mdn_front;
82 while (element) {
83 next_element = element->mdn_next;
84 kmem_free(element, sizeof (md_event_t));
85 element = next_element;
86 }
87 queue->mdn_front = queue->mdn_tail = NULL;
88 return (0);
89
90 }
91
92 static void
md_put_event(md_tags_t tag,set_t sp,md_dev64_t dev,int event,u_longlong_t user)93 md_put_event(md_tags_t tag, set_t sp, md_dev64_t dev, int event,
94 u_longlong_t user)
95 {
96
97 md_event_queue_t *queue;
98 md_event_t *entry;
99
100 if (!md_event_queue)
101 return;
102
103 mutex_enter(&md_eventq_mx);
104 for (queue = md_event_queue; queue; queue = queue->mdn_nextq) {
105 if (queue->mdn_size >= md_max_notify_queue) {
106 ASSERT(queue->mdn_front != NULL);
107 ASSERT(queue->mdn_front->mdn_next != NULL);
108 entry = queue->mdn_front;
109 queue->mdn_front = entry->mdn_next;
110 queue->mdn_size--;
111 queue->mdn_flags |= MD_EVENT_QUEUE_FULL;
112 } else
113 entry = (md_event_t *)kmem_alloc(sizeof (md_event_t),
114 KM_NOSLEEP);
115 if (entry == NULL) {
116 queue->mdn_flags |= MD_EVENT_QUEUE_INVALID;
117 continue;
118 }
119 entry->mdn_tag = tag;
120 entry->mdn_set = sp;
121 entry->mdn_dev = dev;
122 entry->mdn_event = event;
123 entry->mdn_user = user;
124 entry->mdn_next = NULL;
125 uniqtime(&entry->mdn_time);
126 if (queue->mdn_front == NULL) {
127 queue->mdn_front = entry;
128 queue->mdn_tail = entry;
129 } else {
130 queue->mdn_tail->mdn_next = entry;
131 queue->mdn_tail = entry;
132 }
133 if (queue->mdn_waiting)
134 cv_signal(&queue->mdn_cv);
135
136 queue->mdn_size++;
137 }
138 md_reap++;
139 mutex_exit(&md_eventq_mx);
140
141 if (md_reap > md_reap_count)
142 md_reaper();
143 }
144
145 static void
md_reaper()146 md_reaper()
147 {
148 md_event_queue_t *next = md_event_queue;
149 md_event_queue_t *present, *last = NULL;
150
151 if (md_event_queue == NULL || md_reap_off)
152 return;
153
154 mutex_enter(&md_eventq_mx);
155 while (next) {
156 present = next;
157 next = present->mdn_nextq;
158
159 /* check for long term event queue */
160 if (present->mdn_flags & MD_EVENT_QUEUE_PERM) {
161 last = present;
162 continue;
163 }
164
165 /* check to see if the pid is still alive */
166 if (!md_checkpid(present->mdn_pid, present->mdn_proc))
167 present->mdn_flags |= MD_EVENT_QUEUE_DESTROY;
168
169 /* see if queue is a "marked queue" if so destroy */
170 if (! (present->mdn_flags & MD_EVENT_QUEUE_DESTROY)) {
171 last = present;
172 continue;
173 }
174
175 /* yeeeha blow this one away */
176 present->mdn_pid = 0;
177 present->mdn_proc = NULL;
178 /*
179 * if there is something waiting on it and the
180 * process/pid no longer exist then signal the defunct
181 * process continue on to clean this up later.
182 */
183 if (md_flush_queue(present)) {
184 present->mdn_flags = MD_EVENT_QUEUE_DESTROY;
185 cv_broadcast(&present->mdn_cv);
186 last = present;
187 continue;
188 }
189 /* remove the entry */
190 if (last == NULL)
191 md_event_queue = next;
192 else
193 last->mdn_nextq = next;
194 cv_destroy(&present->mdn_cv);
195 kmem_free(present, sizeof (md_event_queue_t));
196 }
197 md_reap = 0;
198 mutex_exit(&md_eventq_mx);
199 }
200
201 /* ARGSUSED */
202 static int
notify_halt(md_haltcmd_t cmd,set_t setno)203 notify_halt(md_haltcmd_t cmd, set_t setno)
204 {
205 md_event_queue_t *orig_queue, *queue, *queue_free;
206 int i;
207
208
209 switch (cmd) {
210 case MD_HALT_CLOSE:
211 case MD_HALT_OPEN:
212 case MD_HALT_DOIT:
213 case MD_HALT_CHECK:
214
215 return (0);
216
217 case MD_HALT_UNLOAD:
218 if (setno != MD_LOCAL_SET)
219 return (1);
220 mutex_enter(&md_eventq_mx);
221 if (md_event_queue == NULL) {
222 mutex_exit(&md_eventq_mx);
223 return (0);
224 }
225
226 orig_queue = md_event_queue;
227 md_event_queue = NULL;
228 for (i = 0; i < MD_NOTIFY_HALT_TRIES; i++) {
229 for (queue = orig_queue; queue;
230 queue = queue->mdn_nextq) {
231 if (queue->mdn_waiting == 0) {
232 continue;
233 }
234 queue->mdn_flags = MD_EVENT_QUEUE_DESTROY;
235 mutex_exit(&md_eventq_mx);
236 cv_broadcast(&queue->mdn_cv);
237 delay(md_hz);
238 mutex_enter(&md_eventq_mx);
239 }
240 }
241 for (queue = orig_queue; queue; ) {
242 if (md_flush_queue(queue)) {
243 cmn_err(CE_WARN, "md: queue not freed");
244 mutex_exit(&md_eventq_mx);
245 return (1);
246 }
247 queue_free = queue;
248 queue = queue->mdn_nextq;
249 kmem_free(queue_free, sizeof (md_event_queue_t));
250 }
251 md_event_queue = NULL;
252 mutex_exit(&md_eventq_mx);
253 return (0);
254
255 default:
256 return (1);
257 }
258 }
259
260 static md_event_queue_t *
md_find_event_queue(char * q_name,int lock)261 md_find_event_queue(char *q_name, int lock)
262 {
263 md_event_queue_t *event_q = md_event_queue;
264
265 if (lock)
266 mutex_enter(&md_eventq_mx);
267 ASSERT(MUTEX_HELD(&md_eventq_mx));
268 while (event_q) {
269 if ((*event_q->mdn_name != *q_name) ||
270 (event_q->mdn_flags & MD_EVENT_QUEUE_DESTROY)) {
271 event_q = event_q->mdn_nextq;
272 continue;
273 }
274
275 if (bcmp(q_name, event_q->mdn_name, MD_NOTIFY_NAME_SIZE) == 0)
276 break;
277 event_q = event_q->mdn_nextq;
278 }
279 if (lock)
280 mutex_exit(&md_eventq_mx);
281
282 return ((md_event_queue_t *)event_q);
283 }
284
285 static intptr_t
notify_interface(md_event_cmds_t cmd,md_tags_t tag,set_t set,md_dev64_t dev,md_event_type_t event)286 notify_interface(md_event_cmds_t cmd, md_tags_t tag, set_t set, md_dev64_t dev,
287 md_event_type_t event)
288 {
289 switch (cmd) {
290 case EQ_PUT:
291 md_put_event(tag, set, dev, event, (u_longlong_t)0);
292 break;
293 default:
294 return (-1);
295 }
296 return (0);
297 }
298
299 static int
notify_fillin_empty_ioctl(void * data,void * ioctl_in,size_t sz,int mode)300 notify_fillin_empty_ioctl(void *data, void *ioctl_in, size_t sz,
301 int mode)
302 {
303
304 int err;
305 md_event_ioctl_t *ioctl = (md_event_ioctl_t *)data;
306
307
308 ioctl->mdn_event = EQ_EMPTY;
309 ioctl->mdn_tag = TAG_EMPTY;
310 ioctl->mdn_set = MD_ALLSETS;
311 ioctl->mdn_dev = MD_ALLDEVS;
312 uniqtime32(&ioctl->mdn_time);
313 ioctl->mdn_user = (u_longlong_t)0;
314 err = ddi_copyout(data, ioctl_in, sz, mode);
315 return (err);
316 }
317
318 /*
319 * md_wait_for_event:
320 * IOLOCK_RETURN which drops the md_ioctl_lock is called in this
321 * routine to enable other mdioctls to enter the kernel while this
322 * thread of execution waits on an event. When that event occurs, the
323 * stopped thread wakes and continues and md_ioctl_lock must be
324 * reacquired. Even though md_ioctl_lock is interruptable, we choose
325 * to ignore EINTR. Returning w/o acquiring md_ioctl_lock is
326 * catastrophic since it breaks down ioctl single threading.
327 *
328 * Return: 0 md_eventq_mx held
329 * EINTR md_eventq_mx no held
330 * Always returns with IOCTL lock held
331 */
332
333 static int
md_wait_for_event(md_event_queue_t * event_queue,void * ioctl_in,md_event_ioctl_t * ioctl,size_t sz,int mode,IOLOCK * lockp)334 md_wait_for_event(md_event_queue_t *event_queue, void *ioctl_in,
335 md_event_ioctl_t *ioctl, size_t sz,
336 int mode, IOLOCK *lockp)
337 {
338 int rval = 0;
339
340 while (event_queue->mdn_front == NULL) {
341 event_queue->mdn_waiting++;
342 (void) IOLOCK_RETURN(0, lockp);
343 rval = cv_wait_sig(&event_queue->mdn_cv, &md_eventq_mx);
344 event_queue->mdn_waiting--;
345 if ((rval == 0) || (event_queue->mdn_flags &
346 MD_EVENT_QUEUE_DESTROY)) {
347 global_lock_wait_cnt++;
348 mutex_exit(&md_eventq_mx);
349 /* reenable single threading of ioctls */
350 while (md_ioctl_lock_enter() == EINTR);
351
352 (void) notify_fillin_empty_ioctl
353 ((void *)ioctl, ioctl_in, sz, mode);
354 mutex_enter(&md_eventq_mx);
355 global_lock_wait_cnt--;
356 mutex_exit(&md_eventq_mx);
357 return (EINTR);
358 }
359 /*
360 * reacquire single threading ioctls. Drop eventq_mutex
361 * since md_ioctl_lock_enter can sleep.
362 */
363 global_lock_wait_cnt++;
364 mutex_exit(&md_eventq_mx);
365 while (md_ioctl_lock_enter() == EINTR);
366 mutex_enter(&md_eventq_mx);
367 global_lock_wait_cnt--;
368 }
369 return (0);
370 }
371
372 /* ARGSUSED */
373 static int
notify_ioctl(dev_t dev,int icmd,void * ioctl_in,int mode,IOLOCK * lockp)374 notify_ioctl(dev_t dev, int icmd, void *ioctl_in, int mode, IOLOCK *lockp)
375 {
376 int cmd;
377 pid_t pid;
378 md_event_queue_t *event_queue;
379 md_event_t *event;
380 cred_t *credp;
381 char *q_name;
382 int err = 0;
383 size_t sz = 0;
384 md_event_ioctl_t *ioctl;
385
386 sz = sizeof (*ioctl);
387 ioctl = kmem_zalloc(sz, KM_SLEEP);
388
389 if (ddi_copyin(ioctl_in, (void *)ioctl, sz, mode)) {
390 err = EFAULT;
391 goto out;
392 }
393
394 if (ioctl->mdn_rev != MD_NOTIFY_REVISION) {
395 err = EINVAL;
396 goto out;
397 }
398 if (ioctl->mdn_magic != MD_EVENT_ID) {
399 err = EINVAL;
400 goto out;
401 }
402
403 pid = md_getpid();
404 cmd = ioctl->mdn_cmd;
405 q_name = ioctl->mdn_name;
406
407 if (((cmd != EQ_OFF) && (cmd != EQ_ON)) && (md_reap >= md_reap_count))
408 md_reaper();
409
410 if ((cmd != EQ_ON) && (cmd != EQ_PUT)) {
411 mutex_enter(&md_eventq_mx);
412 if ((event_queue = md_find_event_queue(q_name, 0)) == NULL) {
413 mutex_exit(&md_eventq_mx);
414 (void) notify_fillin_empty_ioctl
415 ((void *)ioctl, ioctl_in, sz, mode);
416 err = ENOENT;
417 goto out;
418 }
419 }
420
421 switch (cmd) {
422 case EQ_ON:
423
424 md_reaper();
425
426 mutex_enter(&md_eventq_mx);
427 if (md_find_event_queue(q_name, 0) != NULL) {
428 mutex_exit(&md_eventq_mx);
429 err = EEXIST;
430 break;
431 }
432
433 /* allocate and initialize queue head */
434 event_queue = (md_event_queue_t *)
435 kmem_alloc(sizeof (md_event_queue_t), KM_NOSLEEP);
436 if (event_queue == NULL) {
437 mutex_exit(&md_eventq_mx);
438 err = ENOMEM;
439 break;
440 }
441
442 cv_init(&event_queue->mdn_cv, NULL, CV_DEFAULT, NULL);
443
444 event_queue->mdn_flags = 0;
445 event_queue->mdn_pid = pid;
446 event_queue->mdn_proc = md_getproc();
447 event_queue->mdn_size = 0;
448 event_queue->mdn_front = NULL;
449 event_queue->mdn_tail = NULL;
450 event_queue->mdn_waiting = 0;
451 event_queue->mdn_nextq = NULL;
452 credp = ddi_get_cred();
453 event_queue->mdn_uid = crgetuid(credp);
454 bcopy(q_name, event_queue->mdn_name,
455 MD_NOTIFY_NAME_SIZE);
456 if (ioctl->mdn_flags & EQ_Q_PERM)
457 event_queue->mdn_flags |= MD_EVENT_QUEUE_PERM;
458
459 /* link into the list of event queues */
460 if (md_event_queue != NULL)
461 event_queue->mdn_nextq = md_event_queue;
462 md_event_queue = event_queue;
463 mutex_exit(&md_eventq_mx);
464 err = 0;
465 break;
466
467 case EQ_OFF:
468
469 if (md_event_queue == NULL)
470 return (ENOENT);
471
472 event_queue->mdn_flags = MD_EVENT_QUEUE_DESTROY;
473 event_queue->mdn_pid = 0;
474 event_queue->mdn_proc = NULL;
475
476 if (event_queue->mdn_waiting != 0)
477 cv_broadcast(&event_queue->mdn_cv);
478
479 /*
480 * force the reaper to delete this when it has no process
481 * waiting on it.
482 */
483 mutex_exit(&md_eventq_mx);
484 md_reaper();
485 err = 0;
486 break;
487
488 case EQ_GET_NOWAIT:
489 case EQ_GET_WAIT:
490 if (cmd == EQ_GET_WAIT) {
491 err = md_wait_for_event(event_queue, ioctl_in,
492 ioctl, sz, mode, lockp);
493 if (err == EINTR)
494 goto out;
495 }
496 ASSERT(MUTEX_HELD(&md_eventq_mx));
497 if (event_queue->mdn_flags &
498 (MD_EVENT_QUEUE_INVALID | MD_EVENT_QUEUE_FULL)) {
499 event_queue->mdn_flags &=
500 ~(MD_EVENT_QUEUE_INVALID | MD_EVENT_QUEUE_FULL);
501 mutex_exit(&md_eventq_mx);
502 err = notify_fillin_empty_ioctl
503 ((void *)ioctl, ioctl_in, sz, mode);
504 ioctl->mdn_event = EQ_NOTIFY_LOST;
505 err = ddi_copyout((void *)ioctl, ioctl_in, sz, mode);
506 if (err)
507 err = EFAULT;
508 goto out;
509 }
510 if (event_queue->mdn_front != NULL) {
511 event = event_queue->mdn_front;
512 event_queue->mdn_front = event->mdn_next;
513 event_queue->mdn_size--;
514 if (event_queue->mdn_front == NULL)
515 event_queue->mdn_tail = NULL;
516 mutex_exit(&md_eventq_mx);
517 ioctl->mdn_tag = event->mdn_tag;
518 ioctl->mdn_set = event->mdn_set;
519 ioctl->mdn_dev = event->mdn_dev;
520 ioctl->mdn_event = event->mdn_event;
521 ioctl->mdn_user = event->mdn_user;
522 ioctl->mdn_time.tv_sec = event->mdn_time.tv_sec;
523 ioctl->mdn_time.tv_usec =
524 event->mdn_time.tv_usec;
525 kmem_free(event, sizeof (md_event_t));
526 err = ddi_copyout((void *)ioctl, ioctl_in, sz, mode);
527 if (err)
528 err = EFAULT;
529 goto out;
530 } else { /* no elements on queue */
531 mutex_exit(&md_eventq_mx);
532 err = notify_fillin_empty_ioctl
533 ((void *)ioctl, ioctl_in, sz, mode);
534 if (err)
535 err = EFAULT;
536 }
537
538 if (cmd == EQ_GET_NOWAIT)
539 err = EAGAIN;
540 goto out;
541
542 case EQ_PUT:
543
544 if (!md_event_queue) {
545 err = ENOENT;
546 break;
547 }
548 md_put_event(ioctl->mdn_tag,
549 ioctl->mdn_set, ioctl->mdn_dev,
550 ioctl->mdn_event, ioctl->mdn_user);
551 err = 0;
552 goto out;
553
554 default:
555 err = EINVAL;
556 goto out;
557 }
558
559 out:
560 kmem_free(ioctl, sz);
561 return (err);
562 }
563
564 /*
565 * Turn orphaned queue off for testing purposes.
566 */
567
568 static intptr_t
notify_reap_off()569 notify_reap_off()
570 {
571 md_reap_off = 1;
572 return (0);
573 }
574
575 /*
576 * Turn reaping back on.
577 */
578
579 static intptr_t
notify_reap_on()580 notify_reap_on()
581 {
582 md_reap_off = 0;
583 return (0);
584 }
585
586 /*
587 * Return information that is used to test the notification feature.
588 */
589
590 static intptr_t
notify_test_stats(md_notify_stats_t * stats)591 notify_test_stats(md_notify_stats_t *stats)
592 {
593 stats->mds_eventq_mx = &md_eventq_mx;
594 stats->mds_reap_count = md_reap_count;
595 stats->mds_reap = md_reap;
596 stats->mds_max_queue = md_max_notify_queue;
597 stats->mds_reap_off = md_reap_off;
598 return (0);
599 }
600
601 /*
602 * put this stuff at end so we don't have to create forward
603 * references for everything
604 */
605 static struct modlmisc modlmisc = {
606 &mod_miscops,
607 "Solaris Volume Manager notification module"
608 };
609
610 static struct modlinkage modlinkage = {
611 MODREV_1, (void *)&modlmisc, NULL
612 };
613
614 static md_named_services_t notify_services[] = {
615 {notify_interface, "notify interface"},
616 {notify_reap_off, MD_NOTIFY_REAP_OFF},
617 {notify_reap_on, MD_NOTIFY_REAP_ON},
618 {notify_test_stats, MD_NOTIFY_TEST_STATS},
619 {NULL, 0}
620 };
621
622 md_ops_t event_md_ops = {
623 NULL, /* open */
624 NULL, /* close */
625 NULL, /* strategy */
626 NULL, /* print */
627 NULL, /* dump */
628 NULL, /* read */
629 NULL, /* write */
630 notify_ioctl, /* event_ioctls, */
631 NULL, /* snarf */
632 notify_halt, /* halt */
633 NULL, /* aread */
634 NULL, /* awrite */
635 NULL, /* import set */
636 notify_services /* named_services */
637 };
638
639 int
_init()640 _init()
641 {
642 md_event_queue = NULL;
643 mutex_init(&md_eventq_mx, NULL, MUTEX_DEFAULT, NULL);
644 return (mod_install(&modlinkage));
645 }
646
647 int
_fini()648 _fini()
649 {
650 int err = 0;
651
652 /*
653 * Don't allow the module to be unloaded while there is a thread
654 * of execution that is waiting for a global lock.
655 */
656 if (global_lock_wait_cnt > 0)
657 return (EBUSY);
658
659 if ((err = mod_remove(&modlinkage)) != 0)
660 return (err);
661
662 md_clear_named_service();
663 mutex_destroy(&md_eventq_mx);
664 return (err);
665 }
666
667 int
_info(struct modinfo * modinfop)668 _info(struct modinfo *modinfop)
669 {
670 return (mod_info(&modlinkage, modinfop));
671 }
672