xref: /netbsd-src/sys/kern/sys_mqueue.c (revision 274254cdae52594c1aa480a736aef78313d15c9c)
1 /*	$NetBSD: sys_mqueue.c,v 1.14 2009/04/04 10:12:51 ad Exp $	*/
2 
3 /*
4  * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Implementation of POSIX message queues.
31  * Defined in the Base Definitions volume of IEEE Std 1003.1-2001.
32  *
33  * Locking
34  *
35  * Global list of message queues (mqueue_head) and proc_t::p_mqueue_cnt
36  * counter are protected by mqlist_mtx lock.  The very message queue and
37  * its members are protected by mqueue::mq_mtx.
38  *
39  * Lock order:
40  * 	mqlist_mtx
41  * 	  -> mqueue::mq_mtx
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: sys_mqueue.c,v 1.14 2009/04/04 10:12:51 ad Exp $");
46 
47 #include <sys/param.h>
48 #include <sys/types.h>
49 #include <sys/condvar.h>
50 #include <sys/errno.h>
51 #include <sys/fcntl.h>
52 #include <sys/file.h>
53 #include <sys/filedesc.h>
54 #include <sys/kauth.h>
55 #include <sys/kernel.h>
56 #include <sys/kmem.h>
57 #include <sys/lwp.h>
58 #include <sys/mqueue.h>
59 #include <sys/mutex.h>
60 #include <sys/pool.h>
61 #include <sys/poll.h>
62 #include <sys/proc.h>
63 #include <sys/queue.h>
64 #include <sys/select.h>
65 #include <sys/signal.h>
66 #include <sys/signalvar.h>
67 #include <sys/stat.h>
68 #include <sys/sysctl.h>
69 #include <sys/syscallargs.h>
70 #include <sys/systm.h>
71 #include <sys/unistd.h>
72 #include <sys/vnode.h>
73 
74 /* System-wide limits. */
75 static u_int			mq_open_max = MQ_OPEN_MAX;
76 static u_int			mq_prio_max = MQ_PRIO_MAX;
77 
78 static u_int			mq_max_msgsize = 16 * MQ_DEF_MSGSIZE;
79 static u_int			mq_def_maxmsg = 32;
80 
81 static kmutex_t			mqlist_mtx;
82 static pool_cache_t		mqmsg_cache;
83 static LIST_HEAD(, mqueue)	mqueue_head =
84 	LIST_HEAD_INITIALIZER(mqueue_head);
85 
86 static int	mq_poll_fop(file_t *, int);
87 static int	mq_close_fop(file_t *);
88 
89 #define	FNOVAL	-1
90 
91 static const struct fileops mqops = {
92 	.fo_read = fbadop_read,
93 	.fo_write = fbadop_write,
94 	.fo_ioctl = fbadop_ioctl,
95 	.fo_fcntl = fnullop_fcntl,
96 	.fo_poll = mq_poll_fop,
97 	.fo_stat = fbadop_stat,
98 	.fo_close = mq_close_fop,
99 	.fo_kqfilter = fnullop_kqfilter,
100 	.fo_drain = fnullop_drain,
101 };
102 
103 /*
104  * Initialize POSIX message queue subsystem.
105  */
106 void
107 mqueue_sysinit(void)
108 {
109 
110 	mqmsg_cache = pool_cache_init(MQ_DEF_MSGSIZE, coherency_unit,
111 	    0, 0, "mqmsgpl", NULL, IPL_NONE, NULL, NULL, NULL);
112 	mutex_init(&mqlist_mtx, MUTEX_DEFAULT, IPL_NONE);
113 }
114 
115 /*
116  * Free the message.
117  */
118 static void
119 mqueue_freemsg(struct mq_msg *msg, const size_t size)
120 {
121 
122 	if (size > MQ_DEF_MSGSIZE)
123 		kmem_free(msg, size);
124 	else
125 		pool_cache_put(mqmsg_cache, msg);
126 }
127 
128 /*
129  * Destroy the message queue.
130  */
131 static void
132 mqueue_destroy(struct mqueue *mq)
133 {
134 	struct mq_msg *msg;
135 
136 	while ((msg = TAILQ_FIRST(&mq->mq_head)) != NULL) {
137 		TAILQ_REMOVE(&mq->mq_head, msg, msg_queue);
138 		mqueue_freemsg(msg, sizeof(struct mq_msg) + msg->msg_len);
139 	}
140 	seldestroy(&mq->mq_rsel);
141 	seldestroy(&mq->mq_wsel);
142 	cv_destroy(&mq->mq_send_cv);
143 	cv_destroy(&mq->mq_recv_cv);
144 	mutex_destroy(&mq->mq_mtx);
145 	kmem_free(mq, sizeof(struct mqueue));
146 }
147 
148 /*
149  * Lookup for file name in general list of message queues.
150  *  => locks the message queue
151  */
152 static void *
153 mqueue_lookup(char *name)
154 {
155 	struct mqueue *mq;
156 	KASSERT(mutex_owned(&mqlist_mtx));
157 
158 	LIST_FOREACH(mq, &mqueue_head, mq_list) {
159 		if (strncmp(mq->mq_name, name, MQ_NAMELEN) == 0) {
160 			mutex_enter(&mq->mq_mtx);
161 			return mq;
162 		}
163 	}
164 
165 	return NULL;
166 }
167 
168 /*
169  * Check access against message queue.
170  */
171 static inline int
172 mqueue_access(struct lwp *l, struct mqueue *mq, int access)
173 {
174 	mode_t acc_mode = 0;
175 
176 	KASSERT(mutex_owned(&mq->mq_mtx));
177 	KASSERT(access != FNOVAL);
178 
179 	/* Note the difference between VREAD/VWRITE and FREAD/FWRITE */
180 	if (access & FREAD)
181 		acc_mode |= VREAD;
182 	if (access & FWRITE)
183 		acc_mode |= VWRITE;
184 
185 	return vaccess(VNON, mq->mq_mode, mq->mq_euid, mq->mq_egid,
186 	    acc_mode, l->l_cred);
187 }
188 
189 /*
190  * Get the mqueue from the descriptor.
191  *  => locks the message queue, if found
192  *  => increments the reference on file entry
193  */
194 static int
195 mqueue_get(struct lwp *l, mqd_t mqd, int access, file_t **fpr)
196 {
197 	file_t *fp;
198 	struct mqueue *mq;
199 
200 	/* Get the file and descriptor */
201 	fp = fd_getfile((int)mqd);
202 	if (fp == NULL)
203 		return EBADF;
204 
205 	/* Increment the reference of file entry, and lock the mqueue */
206 	mq = fp->f_data;
207 	*fpr = fp;
208 	mutex_enter(&mq->mq_mtx);
209 	if (access == FNOVAL) {
210 		KASSERT(mutex_owned(&mq->mq_mtx));
211 		return 0;
212 	}
213 
214 	/* Check the access mode and permission */
215 	if ((fp->f_flag & access) != access || mqueue_access(l, mq, access)) {
216 		mutex_exit(&mq->mq_mtx);
217 		fd_putfile((int)mqd);
218 		return EPERM;
219 	}
220 	return 0;
221 }
222 
223 /*
224  * Converter from struct timespec to the ticks.
225  * Used by mq_timedreceive(), mq_timedsend().
226  */
227 int
228 abstimeout2timo(struct timespec *ts, int *timo)
229 {
230 	int error;
231 
232 	/*
233 	 * According to POSIX, validation check is needed only in case of
234 	 * blocking.  Thus, set the invalid value right now, and fail latter.
235 	 */
236 	error = itimespecfix(ts);
237 	*timo = (error == 0) ? tstohz(ts) : -1;
238 
239 	return 0;
240 }
241 
242 static int
243 mq_poll_fop(file_t *fp, int events)
244 {
245 	struct mqueue *mq = fp->f_data;
246 	int revents = 0;
247 
248 	mutex_enter(&mq->mq_mtx);
249 	if (events & (POLLIN | POLLRDNORM)) {
250 		/* Ready for receiving, if there are messages in the queue */
251 		if (mq->mq_attrib.mq_curmsgs)
252 			revents |= (POLLIN | POLLRDNORM);
253 		else
254 			selrecord(curlwp, &mq->mq_rsel);
255 	}
256 	if (events & (POLLOUT | POLLWRNORM)) {
257 		/* Ready for sending, if the message queue is not full */
258 		if (mq->mq_attrib.mq_curmsgs < mq->mq_attrib.mq_maxmsg)
259 			revents |= (POLLOUT | POLLWRNORM);
260 		else
261 			selrecord(curlwp, &mq->mq_wsel);
262 	}
263 	mutex_exit(&mq->mq_mtx);
264 
265 	return revents;
266 }
267 
268 static int
269 mq_close_fop(file_t *fp)
270 {
271 	struct proc *p = curproc;
272 	struct mqueue *mq = fp->f_data;
273 	bool destroy;
274 
275 	mutex_enter(&mqlist_mtx);
276 	mutex_enter(&mq->mq_mtx);
277 
278 	/* Decrease the counters */
279 	p->p_mqueue_cnt--;
280 	mq->mq_refcnt--;
281 
282 	/* Remove notification if registered for this process */
283 	if (mq->mq_notify_proc == p)
284 		mq->mq_notify_proc = NULL;
285 
286 	/*
287 	 * If this is the last reference and mqueue is marked for unlink,
288 	 * remove and later destroy the message queue.
289 	 */
290 	if (mq->mq_refcnt == 0 && (mq->mq_attrib.mq_flags & MQ_UNLINK)) {
291 		LIST_REMOVE(mq, mq_list);
292 		destroy = true;
293 	} else
294 		destroy = false;
295 
296 	mutex_exit(&mq->mq_mtx);
297 	mutex_exit(&mqlist_mtx);
298 
299 	if (destroy)
300 		mqueue_destroy(mq);
301 
302 	return 0;
303 }
304 
305 /*
306  * General mqueue system calls.
307  */
308 
309 int
310 sys_mq_open(struct lwp *l, const struct sys_mq_open_args *uap,
311     register_t *retval)
312 {
313 	/* {
314 		syscallarg(const char *) name;
315 		syscallarg(int) oflag;
316 		syscallarg(mode_t) mode;
317 		syscallarg(struct mq_attr) attr;
318 	} */
319 	struct proc *p = l->l_proc;
320 	struct mqueue *mq, *mq_new = NULL;
321 	file_t *fp;
322 	char *name;
323 	int mqd, error, oflag;
324 
325 	/* Check access mode flags */
326 	oflag = SCARG(uap, oflag);
327 	if ((oflag & O_ACCMODE) == 0)
328 		return EINVAL;
329 
330 	/* Get the name from the user-space */
331 	name = kmem_zalloc(MQ_NAMELEN, KM_SLEEP);
332 	error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL);
333 	if (error) {
334 		kmem_free(name, MQ_NAMELEN);
335 		return error;
336 	}
337 
338 	if (oflag & O_CREAT) {
339 		struct cwdinfo *cwdi = p->p_cwdi;
340 		struct mq_attr attr;
341 
342 		/* Check the limit */
343 		if (p->p_mqueue_cnt == mq_open_max) {
344 			kmem_free(name, MQ_NAMELEN);
345 			return EMFILE;
346 		}
347 
348 		/* Check for mqueue attributes */
349 		if (SCARG(uap, attr)) {
350 			error = copyin(SCARG(uap, attr), &attr,
351 				sizeof(struct mq_attr));
352 			if (error) {
353 				kmem_free(name, MQ_NAMELEN);
354 				return error;
355 			}
356 			if (attr.mq_maxmsg <= 0 || attr.mq_msgsize <= 0 ||
357 			    attr.mq_msgsize > mq_max_msgsize) {
358 				kmem_free(name, MQ_NAMELEN);
359 				return EINVAL;
360 			}
361 			attr.mq_curmsgs = 0;
362 		} else {
363 			memset(&attr, 0, sizeof(struct mq_attr));
364 			attr.mq_maxmsg = mq_def_maxmsg;
365 			attr.mq_msgsize =
366 			    MQ_DEF_MSGSIZE - sizeof(struct mq_msg);
367 		}
368 
369 		/*
370 		 * Allocate new mqueue, initialize data structures,
371 		 * copy the name, attributes and set the flag.
372 		 */
373 		mq_new = kmem_zalloc(sizeof(struct mqueue), KM_SLEEP);
374 
375 		mutex_init(&mq_new->mq_mtx, MUTEX_DEFAULT, IPL_NONE);
376 		cv_init(&mq_new->mq_send_cv, "mqsendcv");
377 		cv_init(&mq_new->mq_recv_cv, "mqrecvcv");
378 		TAILQ_INIT(&mq_new->mq_head);
379 		selinit(&mq_new->mq_rsel);
380 		selinit(&mq_new->mq_wsel);
381 
382 		strlcpy(mq_new->mq_name, name, MQ_NAMELEN);
383 		memcpy(&mq_new->mq_attrib, &attr, sizeof(struct mq_attr));
384 		mq_new->mq_attrib.mq_flags = oflag;
385 
386 		/* Store mode and effective UID with GID */
387 		mq_new->mq_mode = ((SCARG(uap, mode) &
388 		    ~cwdi->cwdi_cmask) & ALLPERMS) & ~S_ISTXT;
389 		mq_new->mq_euid = kauth_cred_geteuid(l->l_cred);
390 		mq_new->mq_egid = kauth_cred_getegid(l->l_cred);
391 	}
392 
393 	/* Allocate file structure and descriptor */
394 	error = fd_allocfile(&fp, &mqd);
395 	if (error) {
396 		if (mq_new)
397 			mqueue_destroy(mq_new);
398 		kmem_free(name, MQ_NAMELEN);
399 		return error;
400 	}
401 	fp->f_type = DTYPE_MQUEUE;
402 	fp->f_flag = FFLAGS(oflag) & (FREAD | FWRITE);
403 	fp->f_ops = &mqops;
404 
405 	/* Look up for mqueue with such name */
406 	mutex_enter(&mqlist_mtx);
407 	mq = mqueue_lookup(name);
408 	if (mq) {
409 		KASSERT(mutex_owned(&mq->mq_mtx));
410 
411 		/* Check if mqueue is not marked as unlinking */
412 		if (mq->mq_attrib.mq_flags & MQ_UNLINK) {
413 			error = EACCES;
414 			goto exit;
415 		}
416 		/* Fail if O_EXCL is set, and mqueue already exists */
417 		if ((oflag & O_CREAT) && (oflag & O_EXCL)) {
418 			error = EEXIST;
419 			goto exit;
420 		}
421 		/* Check the permission */
422 		if (mqueue_access(l, mq, fp->f_flag)) {
423 			error = EACCES;
424 			goto exit;
425 		}
426 	} else {
427 		/* Fail if mqueue neither exists, nor we create it */
428 		if ((oflag & O_CREAT) == 0) {
429 			mutex_exit(&mqlist_mtx);
430 			KASSERT(mq_new == NULL);
431 			fd_abort(p, fp, mqd);
432 			kmem_free(name, MQ_NAMELEN);
433 			return ENOENT;
434 		}
435 
436 		/* Check the limit */
437 		if (p->p_mqueue_cnt == mq_open_max) {
438 			error = EMFILE;
439 			goto exit;
440 		}
441 
442 		/* Insert the queue to the list */
443 		mq = mq_new;
444 		mutex_enter(&mq->mq_mtx);
445 		LIST_INSERT_HEAD(&mqueue_head, mq, mq_list);
446 		mq_new = NULL;
447 	}
448 
449 	/* Increase the counters, and make descriptor ready */
450 	p->p_mqueue_cnt++;
451 	mq->mq_refcnt++;
452 	fp->f_data = mq;
453 exit:
454 	mutex_exit(&mq->mq_mtx);
455 	mutex_exit(&mqlist_mtx);
456 
457 	if (mq_new)
458 		mqueue_destroy(mq_new);
459 	if (error) {
460 		fd_abort(p, fp, mqd);
461 	} else {
462 		fd_affix(p, fp, mqd);
463 		*retval = mqd;
464 	}
465 	kmem_free(name, MQ_NAMELEN);
466 
467 	return error;
468 }
469 
470 int
471 sys_mq_close(struct lwp *l, const struct sys_mq_close_args *uap,
472     register_t *retval)
473 {
474 
475 	return sys_close(l, (const void *)uap, retval);
476 }
477 
478 /*
479  * Primary mq_receive1() function.
480  */
481 int
482 mq_receive1(struct lwp *l, mqd_t mqdes, void *msg_ptr, size_t msg_len,
483     unsigned *msg_prio, int t, ssize_t *mlen)
484 {
485 	file_t *fp = NULL;
486 	struct mqueue *mq;
487 	struct mq_msg *msg = NULL;
488 	int error;
489 
490 	/* Get the message queue */
491 	error = mqueue_get(l, mqdes, FREAD, &fp);
492 	if (error)
493 		return error;
494 	mq = fp->f_data;
495 
496 	/* Check the message size limits */
497 	if (msg_len < mq->mq_attrib.mq_msgsize) {
498 		error = EMSGSIZE;
499 		goto error;
500 	}
501 
502 	/* Check if queue is empty */
503 	while (TAILQ_EMPTY(&mq->mq_head)) {
504 		if (mq->mq_attrib.mq_flags & O_NONBLOCK) {
505 			error = EAGAIN;
506 			goto error;
507 		}
508 		if (t < 0) {
509 			error = EINVAL;
510 			goto error;
511 		}
512 		/*
513 		 * Block until someone sends the message.
514 		 * While doing this, notification should not be sent.
515 		 */
516 		mq->mq_attrib.mq_flags |= MQ_RECEIVE;
517 		error = cv_timedwait_sig(&mq->mq_send_cv, &mq->mq_mtx, t);
518 		mq->mq_attrib.mq_flags &= ~MQ_RECEIVE;
519 		if (error || (mq->mq_attrib.mq_flags & MQ_UNLINK)) {
520 			error = (error == EWOULDBLOCK) ? ETIMEDOUT : EINTR;
521 			goto error;
522 		}
523 	}
524 
525 	/* Remove the message from the queue */
526 	msg = TAILQ_FIRST(&mq->mq_head);
527 	KASSERT(msg != NULL);
528 	TAILQ_REMOVE(&mq->mq_head, msg, msg_queue);
529 
530 	/* Decrement the counter and signal waiter, if any */
531 	mq->mq_attrib.mq_curmsgs--;
532 	cv_signal(&mq->mq_recv_cv);
533 
534 	/* Ready for sending now */
535 	selnotify(&mq->mq_wsel, POLLOUT | POLLWRNORM, 0);
536 error:
537 	mutex_exit(&mq->mq_mtx);
538 	fd_putfile((int)mqdes);
539 	if (error)
540 		return error;
541 
542 	/*
543 	 * Copy the data to the user-space.
544 	 * Note: According to POSIX, no message should be removed from the
545 	 * queue in case of fail - this would be violated.
546 	 */
547 	*mlen = msg->msg_len;
548 	error = copyout(msg->msg_ptr, msg_ptr, msg->msg_len);
549 	if (error == 0 && msg_prio)
550 		error = copyout(&msg->msg_prio, msg_prio, sizeof(unsigned));
551 	mqueue_freemsg(msg, sizeof(struct mq_msg) + msg->msg_len);
552 
553 	return error;
554 }
555 
556 int
557 sys_mq_receive(struct lwp *l, const struct sys_mq_receive_args *uap,
558     register_t *retval)
559 {
560 	/* {
561 		syscallarg(mqd_t) mqdes;
562 		syscallarg(char *) msg_ptr;
563 		syscallarg(size_t) msg_len;
564 		syscallarg(unsigned *) msg_prio;
565 	} */
566 	int error;
567 	ssize_t mlen;
568 
569 	error = mq_receive1(l, SCARG(uap, mqdes), SCARG(uap, msg_ptr),
570 	    SCARG(uap, msg_len), SCARG(uap, msg_prio), 0, &mlen);
571 	if (error == 0)
572 		*retval = mlen;
573 
574 	return error;
575 }
576 
577 int
578 sys___mq_timedreceive50(struct lwp *l,
579     const struct sys___mq_timedreceive50_args *uap, register_t *retval)
580 {
581 	/* {
582 		syscallarg(mqd_t) mqdes;
583 		syscallarg(char *) msg_ptr;
584 		syscallarg(size_t) msg_len;
585 		syscallarg(unsigned *) msg_prio;
586 		syscallarg(const struct timespec *) abs_timeout;
587 	} */
588 	int error, t;
589 	ssize_t mlen;
590 	struct timespec ts;
591 
592 	/* Get and convert time value */
593 	if (SCARG(uap, abs_timeout)) {
594 		error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts));
595 		if (error)
596 			return error;
597 
598 		error = abstimeout2timo(&ts, &t);
599 		if (error)
600 			return error;
601 	} else
602 		t = 0;
603 
604 	error = mq_receive1(l, SCARG(uap, mqdes), SCARG(uap, msg_ptr),
605 	    SCARG(uap, msg_len), SCARG(uap, msg_prio), t, &mlen);
606 	if (error == 0)
607 		*retval = mlen;
608 
609 	return error;
610 }
611 
612 /*
613  * Primary mq_send1() function.
614  */
615 int
616 mq_send1(struct lwp *l, mqd_t mqdes, const char *msg_ptr, size_t msg_len,
617     unsigned msg_prio, int t)
618 {
619 	file_t *fp = NULL;
620 	struct mqueue *mq;
621 	struct mq_msg *msg, *pos_msg;
622 	struct proc *notify = NULL;
623 	ksiginfo_t ksi;
624 	size_t size;
625 	int error;
626 
627 	/* Check the priority range */
628 	if (msg_prio >= mq_prio_max)
629 		return EINVAL;
630 
631 	/* Allocate a new message */
632 	size = sizeof(struct mq_msg) + msg_len;
633 	if (size > mq_max_msgsize)
634 		return EMSGSIZE;
635 
636 	if (size > MQ_DEF_MSGSIZE)
637 		msg = kmem_alloc(size, KM_SLEEP);
638 	else
639 		msg = pool_cache_get(mqmsg_cache, PR_WAITOK);
640 
641 	/* Get the data from user-space */
642 	error = copyin(msg_ptr, msg->msg_ptr, msg_len);
643 	if (error) {
644 		mqueue_freemsg(msg, size);
645 		return error;
646 	}
647 	msg->msg_len = msg_len;
648 	msg->msg_prio = msg_prio;
649 
650 	/* Get the mqueue */
651 	error = mqueue_get(l, mqdes, FWRITE, &fp);
652 	if (error) {
653 		mqueue_freemsg(msg, size);
654 		return error;
655 	}
656 	mq = fp->f_data;
657 
658 	/* Check the message size limit */
659 	if (msg_len <= 0 || msg_len > mq->mq_attrib.mq_msgsize) {
660 		error = EMSGSIZE;
661 		goto error;
662 	}
663 
664 	/* Check if queue is full */
665 	while (mq->mq_attrib.mq_curmsgs >= mq->mq_attrib.mq_maxmsg) {
666 		if (mq->mq_attrib.mq_flags & O_NONBLOCK) {
667 			error = EAGAIN;
668 			goto error;
669 		}
670 		if (t < 0) {
671 			error = EINVAL;
672 			goto error;
673 		}
674 		/* Block until queue becomes available */
675 		error = cv_timedwait_sig(&mq->mq_recv_cv, &mq->mq_mtx, t);
676 		if (error || (mq->mq_attrib.mq_flags & MQ_UNLINK)) {
677 			error = (error == EWOULDBLOCK) ? ETIMEDOUT : error;
678 			goto error;
679 		}
680 	}
681 	KASSERT(mq->mq_attrib.mq_curmsgs < mq->mq_attrib.mq_maxmsg);
682 
683 	/* Insert message into the queue, according to the priority */
684 	TAILQ_FOREACH(pos_msg, &mq->mq_head, msg_queue)
685 		if (msg->msg_prio > pos_msg->msg_prio)
686 			break;
687 	if (pos_msg == NULL)
688 		TAILQ_INSERT_TAIL(&mq->mq_head, msg, msg_queue);
689 	else
690 		TAILQ_INSERT_BEFORE(pos_msg, msg, msg_queue);
691 
692 	/* Check for the notify */
693 	if (mq->mq_attrib.mq_curmsgs == 0 && mq->mq_notify_proc &&
694 	    (mq->mq_attrib.mq_flags & MQ_RECEIVE) == 0) {
695 		/* Initialize the signal */
696 		KSI_INIT(&ksi);
697 		ksi.ksi_signo = mq->mq_sig_notify.sigev_signo;
698 		ksi.ksi_code = SI_MESGQ;
699 		ksi.ksi_value = mq->mq_sig_notify.sigev_value;
700 		/* Unregister the process */
701 		notify = mq->mq_notify_proc;
702 		mq->mq_notify_proc = NULL;
703 	}
704 
705 	/* Increment the counter and signal waiter, if any */
706 	mq->mq_attrib.mq_curmsgs++;
707 	cv_signal(&mq->mq_send_cv);
708 
709 	/* Ready for receiving now */
710 	selnotify(&mq->mq_rsel, POLLIN | POLLRDNORM, 0);
711 error:
712 	mutex_exit(&mq->mq_mtx);
713 	fd_putfile((int)mqdes);
714 
715 	if (error) {
716 		mqueue_freemsg(msg, size);
717 	} else if (notify) {
718 		/* Send the notify, if needed */
719 		mutex_enter(proc_lock);
720 		kpsignal(notify, &ksi, NULL);
721 		mutex_exit(proc_lock);
722 	}
723 
724 	return error;
725 }
726 
727 int
728 sys_mq_send(struct lwp *l, const struct sys_mq_send_args *uap,
729     register_t *retval)
730 {
731 	/* {
732 		syscallarg(mqd_t) mqdes;
733 		syscallarg(const char *) msg_ptr;
734 		syscallarg(size_t) msg_len;
735 		syscallarg(unsigned) msg_prio;
736 	} */
737 
738 	return mq_send1(l, SCARG(uap, mqdes), SCARG(uap, msg_ptr),
739 	    SCARG(uap, msg_len), SCARG(uap, msg_prio), 0);
740 }
741 
742 int
743 sys___mq_timedsend50(struct lwp *l, const struct sys___mq_timedsend50_args *uap,
744     register_t *retval)
745 {
746 	/* {
747 		syscallarg(mqd_t) mqdes;
748 		syscallarg(const char *) msg_ptr;
749 		syscallarg(size_t) msg_len;
750 		syscallarg(unsigned) msg_prio;
751 		syscallarg(const struct timespec *) abs_timeout;
752 	} */
753 	int t;
754 	struct timespec ts;
755 	int error;
756 
757 	/* Get and convert time value */
758 	if (SCARG(uap, abs_timeout)) {
759 		error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts));
760 		if (error)
761 			return error;
762 		error = abstimeout2timo(&ts, &t);
763 		if (error)
764 			return error;
765 	} else
766 		t = 0;
767 
768 	return mq_send1(l, SCARG(uap, mqdes), SCARG(uap, msg_ptr),
769 	    SCARG(uap, msg_len), SCARG(uap, msg_prio), t);
770 }
771 
772 int
773 sys_mq_notify(struct lwp *l, const struct sys_mq_notify_args *uap,
774     register_t *retval)
775 {
776 	/* {
777 		syscallarg(mqd_t) mqdes;
778 		syscallarg(const struct sigevent *) notification;
779 	} */
780 	file_t *fp = NULL;
781 	struct mqueue *mq;
782 	struct sigevent sig;
783 	int error;
784 
785 	if (SCARG(uap, notification)) {
786 		/* Get the signal from user-space */
787 		error = copyin(SCARG(uap, notification), &sig,
788 		    sizeof(struct sigevent));
789 		if (error)
790 			return error;
791 	}
792 
793 	error = mqueue_get(l, SCARG(uap, mqdes), FNOVAL, &fp);
794 	if (error)
795 		return error;
796 	mq = fp->f_data;
797 
798 	if (SCARG(uap, notification)) {
799 		/* Register notification: set the signal and target process */
800 		if (mq->mq_notify_proc == NULL) {
801 			memcpy(&mq->mq_sig_notify, &sig,
802 			    sizeof(struct sigevent));
803 			mq->mq_notify_proc = l->l_proc;
804 		} else {
805 			/* Fail if someone else already registered */
806 			error = EBUSY;
807 		}
808 	} else {
809 		/* Unregister the notification */
810 		mq->mq_notify_proc = NULL;
811 	}
812 	mutex_exit(&mq->mq_mtx);
813 	fd_putfile((int)SCARG(uap, mqdes));
814 
815 	return error;
816 }
817 
818 int
819 sys_mq_getattr(struct lwp *l, const struct sys_mq_getattr_args *uap,
820     register_t *retval)
821 {
822 	/* {
823 		syscallarg(mqd_t) mqdes;
824 		syscallarg(struct mq_attr *) mqstat;
825 	} */
826 	file_t *fp = NULL;
827 	struct mqueue *mq;
828 	struct mq_attr attr;
829 	int error;
830 
831 	/* Get the message queue */
832 	error = mqueue_get(l, SCARG(uap, mqdes), FNOVAL, &fp);
833 	if (error)
834 		return error;
835 	mq = fp->f_data;
836 	memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr));
837 	mutex_exit(&mq->mq_mtx);
838 	fd_putfile((int)SCARG(uap, mqdes));
839 
840 	return copyout(&attr, SCARG(uap, mqstat), sizeof(struct mq_attr));
841 }
842 
843 int
844 sys_mq_setattr(struct lwp *l, const struct sys_mq_setattr_args *uap,
845     register_t *retval)
846 {
847 	/* {
848 		syscallarg(mqd_t) mqdes;
849 		syscallarg(const struct mq_attr *) mqstat;
850 		syscallarg(struct mq_attr *) omqstat;
851 	} */
852 	file_t *fp = NULL;
853 	struct mqueue *mq;
854 	struct mq_attr attr;
855 	int error, nonblock;
856 
857 	error = copyin(SCARG(uap, mqstat), &attr, sizeof(struct mq_attr));
858 	if (error)
859 		return error;
860 	nonblock = (attr.mq_flags & O_NONBLOCK);
861 
862 	/* Get the message queue */
863 	error = mqueue_get(l, SCARG(uap, mqdes), FNOVAL, &fp);
864 	if (error)
865 		return error;
866 	mq = fp->f_data;
867 
868 	/* Copy the old attributes, if needed */
869 	if (SCARG(uap, omqstat))
870 		memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr));
871 
872 	/* Ignore everything, except O_NONBLOCK */
873 	if (nonblock)
874 		mq->mq_attrib.mq_flags |= O_NONBLOCK;
875 	else
876 		mq->mq_attrib.mq_flags &= ~O_NONBLOCK;
877 
878 	mutex_exit(&mq->mq_mtx);
879 	fd_putfile((int)SCARG(uap, mqdes));
880 
881 	/*
882 	 * Copy the data to the user-space.
883 	 * Note: According to POSIX, the new attributes should not be set in
884 	 * case of fail - this would be violated.
885 	 */
886 	if (SCARG(uap, omqstat))
887 		error = copyout(&attr, SCARG(uap, omqstat),
888 		    sizeof(struct mq_attr));
889 
890 	return error;
891 }
892 
893 int
894 sys_mq_unlink(struct lwp *l, const struct sys_mq_unlink_args *uap,
895     register_t *retval)
896 {
897 	/* {
898 		syscallarg(const char *) name;
899 	} */
900 	struct mqueue *mq;
901 	char *name;
902 	int error, refcnt = 0;
903 
904 	/* Get the name from the user-space */
905 	name = kmem_zalloc(MQ_NAMELEN, KM_SLEEP);
906 	error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL);
907 	if (error) {
908 		kmem_free(name, MQ_NAMELEN);
909 		return error;
910 	}
911 
912 	/* Lookup for this file */
913 	mutex_enter(&mqlist_mtx);
914 	mq = mqueue_lookup(name);
915 	if (mq == NULL) {
916 		error = ENOENT;
917 		goto error;
918 	}
919 
920 	/* Check the permissions */
921 	if (mqueue_access(l, mq, FWRITE)) {
922 		mutex_exit(&mq->mq_mtx);
923 		error = EACCES;
924 		goto error;
925 	}
926 
927 	/* Mark message queue as unlinking, before leaving the window */
928 	mq->mq_attrib.mq_flags |= MQ_UNLINK;
929 
930 	/* Wake up all waiters, if there are such */
931 	cv_broadcast(&mq->mq_send_cv);
932 	cv_broadcast(&mq->mq_recv_cv);
933 
934 	selnotify(&mq->mq_rsel, POLLHUP, 0);
935 	selnotify(&mq->mq_wsel, POLLHUP, 0);
936 
937 	refcnt = mq->mq_refcnt;
938 	if (refcnt == 0)
939 		LIST_REMOVE(mq, mq_list);
940 
941 	mutex_exit(&mq->mq_mtx);
942 error:
943 	mutex_exit(&mqlist_mtx);
944 
945 	/*
946 	 * If there are no references - destroy the message
947 	 * queue, otherwise, the last mq_close() will do that.
948 	 */
949 	if (error == 0 && refcnt == 0)
950 		mqueue_destroy(mq);
951 
952 	kmem_free(name, MQ_NAMELEN);
953 	return error;
954 }
955 
956 /*
957  * SysCtl.
958  */
959 
960 SYSCTL_SETUP(sysctl_mqueue_setup, "sysctl mqueue setup")
961 {
962 	const struct sysctlnode *node = NULL;
963 
964 	sysctl_createv(clog, 0, NULL, NULL,
965 		CTLFLAG_PERMANENT,
966 		CTLTYPE_NODE, "kern", NULL,
967 		NULL, 0, NULL, 0,
968 		CTL_KERN, CTL_EOL);
969 	sysctl_createv(clog, 0, NULL, NULL,
970 		CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
971 		CTLTYPE_INT, "posix_msg",
972 		SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
973 			     "Message Passing option to which the "
974 			     "system attempts to conform"),
975 		NULL, _POSIX_MESSAGE_PASSING, NULL, 0,
976 		CTL_KERN, CTL_CREATE, CTL_EOL);
977 	sysctl_createv(clog, 0, NULL, &node,
978 		CTLFLAG_PERMANENT,
979 		CTLTYPE_NODE, "mqueue",
980 		SYSCTL_DESCR("Message queue options"),
981 		NULL, 0, NULL, 0,
982 		CTL_KERN, CTL_CREATE, CTL_EOL);
983 
984 	if (node == NULL)
985 		return;
986 
987 	sysctl_createv(clog, 0, &node, NULL,
988 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
989 		CTLTYPE_INT, "mq_open_max",
990 		SYSCTL_DESCR("Maximal number of message queue descriptors "
991 			     "that process could open"),
992 		NULL, 0, &mq_open_max, 0,
993 		CTL_CREATE, CTL_EOL);
994 	sysctl_createv(clog, 0, &node, NULL,
995 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
996 		CTLTYPE_INT, "mq_prio_max",
997 		SYSCTL_DESCR("Maximal priority of the message"),
998 		NULL, 0, &mq_prio_max, 0,
999 		CTL_CREATE, CTL_EOL);
1000 	sysctl_createv(clog, 0, &node, NULL,
1001 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1002 		CTLTYPE_INT, "mq_max_msgsize",
1003 		SYSCTL_DESCR("Maximal allowed size of the message"),
1004 		NULL, 0, &mq_max_msgsize, 0,
1005 		CTL_CREATE, CTL_EOL);
1006 	sysctl_createv(clog, 0, &node, NULL,
1007 		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1008 		CTLTYPE_INT, "mq_def_maxmsg",
1009 		SYSCTL_DESCR("Default maximal message count"),
1010 		NULL, 0, &mq_def_maxmsg, 0,
1011 		CTL_CREATE, CTL_EOL);
1012 }
1013 
1014 /*
1015  * Debugging.
1016  */
1017 #if defined(DDB)
1018 
1019 void
1020 mqueue_print_list(void (*pr)(const char *, ...))
1021 {
1022 	struct mqueue *mq;
1023 
1024 	(*pr)("Global list of the message queues:\n");
1025 	(*pr)("%20s %10s %8s %8s %3s %4s %4s %4s\n",
1026 	    "Name", "Ptr", "Mode", "Flags",  "Ref",
1027 	    "MaxMsg", "MsgSze", "CurMsg");
1028 	LIST_FOREACH(mq, &mqueue_head, mq_list) {
1029 		(*pr)("%20s %10p %8x %8x %3u %6lu %6lu %6lu\n",
1030 		    mq->mq_name, mq, mq->mq_mode,
1031 		    mq->mq_attrib.mq_flags, mq->mq_refcnt,
1032 		    mq->mq_attrib.mq_maxmsg, mq->mq_attrib.mq_msgsize,
1033 		    mq->mq_attrib.mq_curmsgs);
1034 	}
1035 }
1036 
1037 #endif /* defined(DDB) */
1038