1 /* $NetBSD: sys_mqueue.c,v 1.39 2015/06/29 15:44:45 christos Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2011 Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Implementation of POSIX message queues. 31 * Defined in the Base Definitions volume of IEEE Std 1003.1-2001. 32 * 33 * Locking 34 * 35 * Global list of message queues (mqueue_head) is protected by mqlist_lock. 36 * Each message queue and its members are protected by mqueue::mq_mtx. 37 * Note that proc_t::p_mqueue_cnt is updated atomically. 38 * 39 * Lock order: 40 * 41 * mqlist_lock -> 42 * mqueue::mq_mtx 43 */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: sys_mqueue.c,v 1.39 2015/06/29 15:44:45 christos Exp $"); 47 48 #include <sys/param.h> 49 #include <sys/types.h> 50 #include <sys/atomic.h> 51 52 #include <sys/file.h> 53 #include <sys/filedesc.h> 54 #include <sys/kauth.h> 55 #include <sys/lwp.h> 56 #include <sys/mqueue.h> 57 #include <sys/module.h> 58 #include <sys/poll.h> 59 #include <sys/select.h> 60 #include <sys/signal.h> 61 #include <sys/signalvar.h> 62 #include <sys/stat.h> 63 #include <sys/sysctl.h> 64 #include <sys/syscall.h> 65 #include <sys/syscallvar.h> 66 #include <sys/syscallargs.h> 67 68 #include <miscfs/genfs/genfs.h> 69 70 MODULE(MODULE_CLASS_MISC, mqueue, NULL); 71 72 /* System-wide limits. */ 73 static u_int mq_open_max = MQ_OPEN_MAX; 74 static u_int mq_prio_max = MQ_PRIO_MAX; 75 static u_int mq_max_msgsize = 16 * MQ_DEF_MSGSIZE; 76 static u_int mq_def_maxmsg = 32; 77 static u_int mq_max_maxmsg = 16 * 32; 78 79 static pool_cache_t mqmsg_cache __read_mostly; 80 static kmutex_t mqlist_lock __cacheline_aligned; 81 static LIST_HEAD(, mqueue) mqueue_head __cacheline_aligned; 82 static struct sysctllog * mqsysctl_log; 83 84 static kauth_listener_t mq_listener; 85 86 static int mqueue_sysinit(void); 87 static int mqueue_sysfini(bool); 88 static int mqueue_sysctl_init(void); 89 static int mq_poll_fop(file_t *, int); 90 static int mq_stat_fop(file_t *, struct stat *); 91 static int mq_close_fop(file_t *); 92 93 static const struct fileops mqops = { 94 .fo_read = fbadop_read, 95 .fo_write = fbadop_write, 96 .fo_ioctl = fbadop_ioctl, 97 .fo_fcntl = fnullop_fcntl, 98 .fo_poll = mq_poll_fop, 99 .fo_stat = mq_stat_fop, 100 .fo_close = mq_close_fop, 101 .fo_kqfilter = fnullop_kqfilter, 102 .fo_restart = fnullop_restart, 103 }; 104 105 static const struct syscall_package mqueue_syscalls[] = { 106 { SYS_mq_open, 0, (sy_call_t *)sys_mq_open }, 107 { SYS_mq_close, 0, (sy_call_t *)sys_mq_close }, 108 { SYS_mq_unlink, 0, (sy_call_t *)sys_mq_unlink }, 109 { SYS_mq_getattr, 0, (sy_call_t *)sys_mq_getattr }, 110 { SYS_mq_setattr, 0, (sy_call_t *)sys_mq_setattr }, 111 { SYS_mq_notify, 0, (sy_call_t *)sys_mq_notify }, 112 { SYS_mq_send, 0, (sy_call_t *)sys_mq_send }, 113 { SYS_mq_receive, 0, (sy_call_t *)sys_mq_receive }, 114 { SYS___mq_timedsend50, 0, (sy_call_t *)sys___mq_timedsend50 }, 115 { SYS___mq_timedreceive50, 0, (sy_call_t *)sys___mq_timedreceive50 }, 116 { 0, 0, NULL } 117 }; 118 119 static int 120 mq_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 121 void *arg0, void *arg1, void *arg2, void *arg3) 122 { 123 mqueue_t *mq; 124 int result; 125 126 if (action != KAUTH_SYSTEM_MQUEUE) 127 return KAUTH_RESULT_DEFER; 128 129 result = KAUTH_RESULT_DEFER; 130 131 mq = arg1; 132 133 if (kauth_cred_geteuid(cred) == mq->mq_euid) 134 result = KAUTH_RESULT_ALLOW; 135 136 return result; 137 } 138 139 /* 140 * Initialisation and unloading of POSIX message queue subsystem. 141 */ 142 143 static int 144 mqueue_sysinit(void) 145 { 146 int error; 147 148 mqmsg_cache = pool_cache_init(MQ_DEF_MSGSIZE, coherency_unit, 149 0, 0, "mqmsgpl", NULL, IPL_NONE, NULL, NULL, NULL); 150 mutex_init(&mqlist_lock, MUTEX_DEFAULT, IPL_NONE); 151 LIST_INIT(&mqueue_head); 152 153 error = mqueue_sysctl_init(); 154 if (error) { 155 (void)mqueue_sysfini(false); 156 return error; 157 } 158 error = syscall_establish(NULL, mqueue_syscalls); 159 if (error) { 160 (void)mqueue_sysfini(false); 161 } 162 mq_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM, 163 mq_listener_cb, NULL); 164 return error; 165 } 166 167 static int 168 mqueue_sysfini(bool interface) 169 { 170 171 if (interface) { 172 int error; 173 bool inuse; 174 175 /* Stop syscall activity. */ 176 error = syscall_disestablish(NULL, mqueue_syscalls); 177 if (error) 178 return error; 179 /* Check if there are any message queues in use. */ 180 mutex_enter(&mqlist_lock); 181 inuse = !LIST_EMPTY(&mqueue_head); 182 mutex_exit(&mqlist_lock); 183 if (inuse) { 184 error = syscall_establish(NULL, mqueue_syscalls); 185 KASSERT(error == 0); 186 return EBUSY; 187 } 188 } 189 190 if (mqsysctl_log != NULL) 191 sysctl_teardown(&mqsysctl_log); 192 193 kauth_unlisten_scope(mq_listener); 194 195 mutex_destroy(&mqlist_lock); 196 pool_cache_destroy(mqmsg_cache); 197 return 0; 198 } 199 200 /* 201 * Module interface. 202 */ 203 static int 204 mqueue_modcmd(modcmd_t cmd, void *arg) 205 { 206 207 switch (cmd) { 208 case MODULE_CMD_INIT: 209 return mqueue_sysinit(); 210 case MODULE_CMD_FINI: 211 return mqueue_sysfini(true); 212 default: 213 return ENOTTY; 214 } 215 } 216 217 /* 218 * Free the message. 219 */ 220 static void 221 mqueue_freemsg(struct mq_msg *msg, const size_t size) 222 { 223 224 if (size > MQ_DEF_MSGSIZE) { 225 kmem_free(msg, size); 226 } else { 227 pool_cache_put(mqmsg_cache, msg); 228 } 229 } 230 231 /* 232 * Destroy the message queue. 233 */ 234 static void 235 mqueue_destroy(struct mqueue *mq) 236 { 237 struct mq_msg *msg; 238 size_t msz; 239 u_int i; 240 241 /* Note MQ_PQSIZE + 1. */ 242 for (i = 0; i <= MQ_PQSIZE; i++) { 243 while ((msg = TAILQ_FIRST(&mq->mq_head[i])) != NULL) { 244 TAILQ_REMOVE(&mq->mq_head[i], msg, msg_queue); 245 msz = sizeof(struct mq_msg) + msg->msg_len; 246 mqueue_freemsg(msg, msz); 247 } 248 } 249 if (mq->mq_name) { 250 kmem_free(mq->mq_name, MQ_NAMELEN); 251 } 252 seldestroy(&mq->mq_rsel); 253 seldestroy(&mq->mq_wsel); 254 cv_destroy(&mq->mq_send_cv); 255 cv_destroy(&mq->mq_recv_cv); 256 mutex_destroy(&mq->mq_mtx); 257 kmem_free(mq, sizeof(struct mqueue)); 258 } 259 260 /* 261 * mqueue_lookup: lookup for file name in general list of message queues. 262 * 263 * => locks the message queue on success 264 */ 265 static mqueue_t * 266 mqueue_lookup(const char *name) 267 { 268 mqueue_t *mq; 269 270 KASSERT(mutex_owned(&mqlist_lock)); 271 272 LIST_FOREACH(mq, &mqueue_head, mq_list) { 273 if (strncmp(mq->mq_name, name, MQ_NAMELEN) == 0) { 274 mutex_enter(&mq->mq_mtx); 275 return mq; 276 } 277 } 278 return NULL; 279 } 280 281 /* 282 * mqueue_get: get the mqueue from the descriptor. 283 * 284 * => locks the message queue, if found. 285 * => holds a reference on the file descriptor. 286 */ 287 int 288 mqueue_get(mqd_t mqd, int fflag, mqueue_t **mqret) 289 { 290 const int fd = (int)mqd; 291 mqueue_t *mq; 292 file_t *fp; 293 294 fp = fd_getfile(fd); 295 if (__predict_false(fp == NULL)) { 296 return EBADF; 297 } 298 if (__predict_false(fp->f_type != DTYPE_MQUEUE)) { 299 fd_putfile(fd); 300 return EBADF; 301 } 302 if (fflag && (fp->f_flag & fflag) == 0) { 303 fd_putfile(fd); 304 return EBADF; 305 } 306 mq = fp->f_mqueue; 307 mutex_enter(&mq->mq_mtx); 308 309 *mqret = mq; 310 return 0; 311 } 312 313 /* 314 * mqueue_linear_insert: perform linear insert according to the message 315 * priority into the reserved queue (MQ_PQRESQ). Reserved queue is a 316 * sorted list used only when mq_prio_max is increased via sysctl. 317 */ 318 static inline void 319 mqueue_linear_insert(struct mqueue *mq, struct mq_msg *msg) 320 { 321 struct mq_msg *mit; 322 323 TAILQ_FOREACH(mit, &mq->mq_head[MQ_PQRESQ], msg_queue) { 324 if (msg->msg_prio > mit->msg_prio) 325 break; 326 } 327 if (mit == NULL) { 328 TAILQ_INSERT_TAIL(&mq->mq_head[MQ_PQRESQ], msg, msg_queue); 329 } else { 330 TAILQ_INSERT_BEFORE(mit, msg, msg_queue); 331 } 332 } 333 334 static int 335 mq_stat_fop(file_t *fp, struct stat *st) 336 { 337 struct mqueue *mq = fp->f_mqueue; 338 339 memset(st, 0, sizeof(*st)); 340 341 mutex_enter(&mq->mq_mtx); 342 st->st_mode = mq->mq_mode; 343 st->st_uid = mq->mq_euid; 344 st->st_gid = mq->mq_egid; 345 st->st_atimespec = mq->mq_atime; 346 st->st_mtimespec = mq->mq_mtime; 347 st->st_ctimespec = st->st_birthtimespec = mq->mq_btime; 348 st->st_uid = kauth_cred_geteuid(fp->f_cred); 349 st->st_gid = kauth_cred_getegid(fp->f_cred); 350 mutex_exit(&mq->mq_mtx); 351 352 return 0; 353 } 354 355 static int 356 mq_poll_fop(file_t *fp, int events) 357 { 358 struct mqueue *mq = fp->f_mqueue; 359 struct mq_attr *mqattr; 360 int revents = 0; 361 362 mutex_enter(&mq->mq_mtx); 363 mqattr = &mq->mq_attrib; 364 if (events & (POLLIN | POLLRDNORM)) { 365 /* Ready for receiving, if there are messages in the queue. */ 366 if (mqattr->mq_curmsgs) 367 revents |= events & (POLLIN | POLLRDNORM); 368 else 369 selrecord(curlwp, &mq->mq_rsel); 370 } 371 if (events & (POLLOUT | POLLWRNORM)) { 372 /* Ready for sending, if the message queue is not full. */ 373 if (mqattr->mq_curmsgs < mqattr->mq_maxmsg) 374 revents |= events & (POLLOUT | POLLWRNORM); 375 else 376 selrecord(curlwp, &mq->mq_wsel); 377 } 378 mutex_exit(&mq->mq_mtx); 379 380 return revents; 381 } 382 383 static int 384 mq_close_fop(file_t *fp) 385 { 386 proc_t *p = curproc; 387 mqueue_t *mq = fp->f_mqueue; 388 bool destroy = false; 389 390 mutex_enter(&mq->mq_mtx); 391 KASSERT(mq->mq_refcnt > 0); 392 if (--mq->mq_refcnt == 0) { 393 /* Destroy if the last reference and unlinked. */ 394 destroy = (mq->mq_attrib.mq_flags & MQ_UNLINKED) != 0; 395 } 396 mutex_exit(&mq->mq_mtx); 397 398 if (destroy) { 399 mqueue_destroy(mq); 400 } 401 atomic_dec_uint(&p->p_mqueue_cnt); 402 return 0; 403 } 404 405 static int 406 mqueue_access(mqueue_t *mq, int access, kauth_cred_t cred) 407 { 408 mode_t acc_mode = 0; 409 410 /* Note the difference between VREAD/VWRITE and FREAD/FWRITE. */ 411 if (access & FREAD) { 412 acc_mode |= VREAD; 413 } 414 if (access & FWRITE) { 415 acc_mode |= VWRITE; 416 } 417 if (genfs_can_access(VNON, mq->mq_mode, mq->mq_euid, 418 mq->mq_egid, acc_mode, cred)) { 419 return EACCES; 420 } 421 return 0; 422 } 423 424 static int 425 mqueue_create(lwp_t *l, char *name, struct mq_attr *attr, mode_t mode, 426 int oflag, mqueue_t **mqret) 427 { 428 proc_t *p = l->l_proc; 429 struct cwdinfo *cwdi = p->p_cwdi; 430 mqueue_t *mq; 431 u_int i; 432 433 /* Pre-check the limit. */ 434 if (p->p_mqueue_cnt >= mq_open_max) { 435 return EMFILE; 436 } 437 438 /* Empty name is invalid. */ 439 if (name[0] == '\0') { 440 return EINVAL; 441 } 442 443 /* Check for mqueue attributes. */ 444 if (attr) { 445 if (attr->mq_maxmsg <= 0 || attr->mq_maxmsg > mq_max_maxmsg || 446 attr->mq_msgsize <= 0 || 447 attr->mq_msgsize > mq_max_msgsize) { 448 return EINVAL; 449 } 450 attr->mq_curmsgs = 0; 451 } 452 453 /* 454 * Allocate new message queue, initialize data structures, copy the 455 * name attributes. Note that the initial reference is set here. 456 */ 457 mq = kmem_zalloc(sizeof(mqueue_t), KM_SLEEP); 458 459 mutex_init(&mq->mq_mtx, MUTEX_DEFAULT, IPL_NONE); 460 cv_init(&mq->mq_send_cv, "mqsendcv"); 461 cv_init(&mq->mq_recv_cv, "mqrecvcv"); 462 for (i = 0; i < (MQ_PQSIZE + 1); i++) { 463 TAILQ_INIT(&mq->mq_head[i]); 464 } 465 selinit(&mq->mq_rsel); 466 selinit(&mq->mq_wsel); 467 mq->mq_name = name; 468 mq->mq_refcnt = 1; 469 470 if (attr != NULL) { 471 memcpy(&mq->mq_attrib, attr, sizeof(struct mq_attr)); 472 } else { 473 memset(&mq->mq_attrib, 0, sizeof(struct mq_attr)); 474 mq->mq_attrib.mq_maxmsg = mq_def_maxmsg; 475 mq->mq_attrib.mq_msgsize = MQ_DEF_MSGSIZE - sizeof(struct mq_msg); 476 } 477 478 CTASSERT((O_MASK & (MQ_UNLINKED | MQ_RECEIVE)) == 0); 479 mq->mq_attrib.mq_flags = (O_MASK & oflag); 480 481 /* Store mode and effective UID with GID. */ 482 mq->mq_mode = ((mode & ~cwdi->cwdi_cmask) & ALLPERMS) & ~S_ISTXT; 483 mq->mq_euid = kauth_cred_geteuid(l->l_cred); 484 mq->mq_egid = kauth_cred_getegid(l->l_cred); 485 486 *mqret = mq; 487 return 0; 488 } 489 490 /* 491 * Helper function for mq_open() - note that "u_name" is a userland pointer, 492 * while "attr" is a kernel pointer! 493 */ 494 int 495 mq_handle_open(struct lwp *l, const char *u_name, int oflag, mode_t mode, 496 struct mq_attr *attr, register_t *retval) 497 { 498 struct proc *p = l->l_proc; 499 struct mqueue *mq, *mq_new = NULL; 500 int mqd, error; 501 file_t *fp; 502 char *name; 503 504 /* Get the name from the user-space. */ 505 name = kmem_alloc(MQ_NAMELEN, KM_SLEEP); 506 error = copyinstr(u_name, name, MQ_NAMELEN - 1, NULL); 507 if (error) { 508 kmem_free(name, MQ_NAMELEN); 509 return error; 510 } 511 512 /* Allocate file structure and descriptor. */ 513 error = fd_allocfile(&fp, &mqd); 514 if (error) { 515 kmem_free(name, MQ_NAMELEN); 516 return error; 517 } 518 fp->f_type = DTYPE_MQUEUE; 519 fp->f_flag = FFLAGS(oflag) & (FREAD | FWRITE); 520 fp->f_ops = &mqops; 521 522 if (oflag & O_CREAT) { 523 /* Create a new message queue. */ 524 error = mqueue_create(l, name, attr, mode, oflag, &mq_new); 525 if (error) { 526 goto err; 527 } 528 KASSERT(mq_new != NULL); 529 } 530 531 /* Lookup for a message queue with such name. */ 532 mutex_enter(&mqlist_lock); 533 mq = mqueue_lookup(name); 534 if (mq) { 535 KASSERT(mutex_owned(&mq->mq_mtx)); 536 mutex_exit(&mqlist_lock); 537 538 /* Check for exclusive create. */ 539 if (oflag & O_EXCL) { 540 mutex_exit(&mq->mq_mtx); 541 error = EEXIST; 542 goto err; 543 } 544 545 /* Verify permissions. */ 546 if (mqueue_access(mq, fp->f_flag, l->l_cred) != 0) { 547 mutex_exit(&mq->mq_mtx); 548 error = EACCES; 549 goto err; 550 } 551 552 /* If we have the access, add a new reference. */ 553 mq->mq_refcnt++; 554 mutex_exit(&mq->mq_mtx); 555 } else { 556 /* Fail if not found and not creating. */ 557 if ((oflag & O_CREAT) == 0) { 558 mutex_exit(&mqlist_lock); 559 KASSERT(mq_new == NULL); 560 error = ENOENT; 561 goto err; 562 } 563 564 /* Account and check for the limit. */ 565 if (atomic_inc_uint_nv(&p->p_mqueue_cnt) > mq_open_max) { 566 mutex_exit(&mqlist_lock); 567 atomic_dec_uint(&p->p_mqueue_cnt); 568 error = EMFILE; 569 goto err; 570 } 571 572 /* Initial timestamps. */ 573 mq = mq_new; 574 getnanotime(&mq->mq_btime); 575 mq->mq_atime = mq->mq_mtime = mq->mq_btime; 576 577 /* 578 * Finally, insert message queue into the list. 579 * Note: it already has the initial reference. 580 */ 581 LIST_INSERT_HEAD(&mqueue_head, mq, mq_list); 582 mutex_exit(&mqlist_lock); 583 584 mq_new = NULL; 585 name = NULL; 586 } 587 KASSERT(mq != NULL); 588 fp->f_mqueue = mq; 589 fd_affix(p, fp, mqd); 590 *retval = mqd; 591 err: 592 if (error) { 593 fd_abort(p, fp, mqd); 594 } 595 if (mq_new) { 596 /* Note: will free the 'name'. */ 597 mqueue_destroy(mq_new); 598 } else if (name) { 599 kmem_free(name, MQ_NAMELEN); 600 } 601 return error; 602 } 603 604 /* 605 * General mqueue system calls. 606 */ 607 608 int 609 sys_mq_open(struct lwp *l, const struct sys_mq_open_args *uap, 610 register_t *retval) 611 { 612 /* { 613 syscallarg(const char *) name; 614 syscallarg(int) oflag; 615 syscallarg(mode_t) mode; 616 syscallarg(struct mq_attr) attr; 617 } */ 618 struct mq_attr *attr = NULL, a; 619 int error; 620 621 if ((SCARG(uap, oflag) & O_CREAT) != 0 && SCARG(uap, attr) != NULL) { 622 error = copyin(SCARG(uap, attr), &a, sizeof(a)); 623 if (error) 624 return error; 625 attr = &a; 626 } 627 628 return mq_handle_open(l, SCARG(uap, name), SCARG(uap, oflag), 629 SCARG(uap, mode), attr, retval); 630 } 631 632 int 633 sys_mq_close(struct lwp *l, const struct sys_mq_close_args *uap, 634 register_t *retval) 635 { 636 637 return sys_close(l, (const void *)uap, retval); 638 } 639 640 /* 641 * Primary mq_recv1() function. 642 */ 643 int 644 mq_recv1(mqd_t mqdes, void *msg_ptr, size_t msg_len, u_int *msg_prio, 645 struct timespec *ts, ssize_t *mlen) 646 { 647 struct mqueue *mq; 648 struct mq_msg *msg = NULL; 649 struct mq_attr *mqattr; 650 u_int idx; 651 int error; 652 653 error = mqueue_get(mqdes, FREAD, &mq); 654 if (error) { 655 return error; 656 } 657 getnanotime(&mq->mq_atime); 658 mqattr = &mq->mq_attrib; 659 660 /* Check the message size limits */ 661 if (msg_len < mqattr->mq_msgsize) { 662 error = EMSGSIZE; 663 goto error; 664 } 665 666 /* Check if queue is empty */ 667 while (mqattr->mq_curmsgs == 0) { 668 int t; 669 670 if (mqattr->mq_flags & O_NONBLOCK) { 671 error = EAGAIN; 672 goto error; 673 } 674 if (ts) { 675 error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, ts, &t, 676 NULL); 677 if (error) 678 goto error; 679 } else 680 t = 0; 681 /* 682 * Block until someone sends the message. 683 * While doing this, notification should not be sent. 684 */ 685 mqattr->mq_flags |= MQ_RECEIVE; 686 error = cv_timedwait_sig(&mq->mq_send_cv, &mq->mq_mtx, t); 687 mqattr->mq_flags &= ~MQ_RECEIVE; 688 if (error || (mqattr->mq_flags & MQ_UNLINKED)) { 689 error = (error == EWOULDBLOCK) ? ETIMEDOUT : EINTR; 690 goto error; 691 } 692 } 693 694 /* 695 * Find the highest priority message, and remove it from the queue. 696 * At first, reserved queue is checked, bitmap is next. 697 */ 698 msg = TAILQ_FIRST(&mq->mq_head[MQ_PQRESQ]); 699 if (__predict_true(msg == NULL)) { 700 idx = ffs(mq->mq_bitmap); 701 msg = TAILQ_FIRST(&mq->mq_head[idx]); 702 KASSERT(msg != NULL); 703 } else { 704 idx = MQ_PQRESQ; 705 } 706 TAILQ_REMOVE(&mq->mq_head[idx], msg, msg_queue); 707 708 /* Unmark the bit, if last message. */ 709 if (__predict_true(idx) && TAILQ_EMPTY(&mq->mq_head[idx])) { 710 KASSERT((MQ_PQSIZE - idx) == msg->msg_prio); 711 mq->mq_bitmap &= ~(1 << --idx); 712 } 713 714 /* Decrement the counter and signal waiter, if any */ 715 mqattr->mq_curmsgs--; 716 cv_signal(&mq->mq_recv_cv); 717 718 /* Ready for sending now */ 719 selnotify(&mq->mq_wsel, POLLOUT | POLLWRNORM, 0); 720 error: 721 mutex_exit(&mq->mq_mtx); 722 fd_putfile((int)mqdes); 723 if (error) 724 return error; 725 726 /* 727 * Copy the data to the user-space. 728 * Note: According to POSIX, no message should be removed from the 729 * queue in case of fail - this would be violated. 730 */ 731 *mlen = msg->msg_len; 732 error = copyout(msg->msg_ptr, msg_ptr, msg->msg_len); 733 if (error == 0 && msg_prio) 734 error = copyout(&msg->msg_prio, msg_prio, sizeof(unsigned)); 735 mqueue_freemsg(msg, sizeof(struct mq_msg) + msg->msg_len); 736 737 return error; 738 } 739 740 int 741 sys_mq_receive(struct lwp *l, const struct sys_mq_receive_args *uap, 742 register_t *retval) 743 { 744 /* { 745 syscallarg(mqd_t) mqdes; 746 syscallarg(char *) msg_ptr; 747 syscallarg(size_t) msg_len; 748 syscallarg(unsigned *) msg_prio; 749 } */ 750 ssize_t mlen; 751 int error; 752 753 error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 754 SCARG(uap, msg_len), SCARG(uap, msg_prio), NULL, &mlen); 755 if (error == 0) 756 *retval = mlen; 757 758 return error; 759 } 760 761 int 762 sys___mq_timedreceive50(struct lwp *l, 763 const struct sys___mq_timedreceive50_args *uap, register_t *retval) 764 { 765 /* { 766 syscallarg(mqd_t) mqdes; 767 syscallarg(char *) msg_ptr; 768 syscallarg(size_t) msg_len; 769 syscallarg(unsigned *) msg_prio; 770 syscallarg(const struct timespec *) abs_timeout; 771 } */ 772 struct timespec ts, *tsp; 773 ssize_t mlen; 774 int error; 775 776 /* Get and convert time value */ 777 if (SCARG(uap, abs_timeout)) { 778 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 779 if (error) 780 return error; 781 tsp = &ts; 782 } else { 783 tsp = NULL; 784 } 785 786 error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 787 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp, &mlen); 788 if (error == 0) 789 *retval = mlen; 790 791 return error; 792 } 793 794 /* 795 * Primary mq_send1() function. 796 */ 797 int 798 mq_send1(mqd_t mqdes, const char *msg_ptr, size_t msg_len, u_int msg_prio, 799 struct timespec *ts) 800 { 801 struct mqueue *mq; 802 struct mq_msg *msg; 803 struct mq_attr *mqattr; 804 struct proc *notify = NULL; 805 ksiginfo_t ksi; 806 size_t size; 807 int error; 808 809 /* Check the priority range */ 810 if (msg_prio >= mq_prio_max) 811 return EINVAL; 812 813 /* Allocate a new message */ 814 size = sizeof(struct mq_msg) + msg_len; 815 if (size > mq_max_msgsize) 816 return EMSGSIZE; 817 818 if (size > MQ_DEF_MSGSIZE) { 819 msg = kmem_alloc(size, KM_SLEEP); 820 } else { 821 msg = pool_cache_get(mqmsg_cache, PR_WAITOK); 822 } 823 824 /* Get the data from user-space */ 825 error = copyin(msg_ptr, msg->msg_ptr, msg_len); 826 if (error) { 827 mqueue_freemsg(msg, size); 828 return error; 829 } 830 msg->msg_len = msg_len; 831 msg->msg_prio = msg_prio; 832 833 error = mqueue_get(mqdes, FWRITE, &mq); 834 if (error) { 835 mqueue_freemsg(msg, size); 836 return error; 837 } 838 getnanotime(&mq->mq_mtime); 839 mqattr = &mq->mq_attrib; 840 841 /* Check the message size limit */ 842 if (msg_len <= 0 || msg_len > mqattr->mq_msgsize) { 843 error = EMSGSIZE; 844 goto error; 845 } 846 847 /* Check if queue is full */ 848 while (mqattr->mq_curmsgs >= mqattr->mq_maxmsg) { 849 int t; 850 851 if (mqattr->mq_flags & O_NONBLOCK) { 852 error = EAGAIN; 853 goto error; 854 } 855 if (ts) { 856 error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, ts, &t, 857 NULL); 858 if (error) 859 goto error; 860 } else 861 t = 0; 862 /* Block until queue becomes available */ 863 error = cv_timedwait_sig(&mq->mq_recv_cv, &mq->mq_mtx, t); 864 if (error || (mqattr->mq_flags & MQ_UNLINKED)) { 865 error = (error == EWOULDBLOCK) ? ETIMEDOUT : error; 866 goto error; 867 } 868 } 869 KASSERT(mqattr->mq_curmsgs < mqattr->mq_maxmsg); 870 871 /* 872 * Insert message into the queue, according to the priority. 873 * Note the difference between index and priority. 874 */ 875 if (__predict_true(msg_prio < MQ_PQSIZE)) { 876 u_int idx = MQ_PQSIZE - msg_prio; 877 878 KASSERT(idx != MQ_PQRESQ); 879 TAILQ_INSERT_TAIL(&mq->mq_head[idx], msg, msg_queue); 880 mq->mq_bitmap |= (1 << --idx); 881 } else { 882 mqueue_linear_insert(mq, msg); 883 } 884 885 /* Check for the notify */ 886 if (mqattr->mq_curmsgs == 0 && mq->mq_notify_proc && 887 (mqattr->mq_flags & MQ_RECEIVE) == 0 && 888 mq->mq_sig_notify.sigev_notify == SIGEV_SIGNAL) { 889 /* Initialize the signal */ 890 KSI_INIT(&ksi); 891 ksi.ksi_signo = mq->mq_sig_notify.sigev_signo; 892 ksi.ksi_code = SI_MESGQ; 893 ksi.ksi_value = mq->mq_sig_notify.sigev_value; 894 /* Unregister the process */ 895 notify = mq->mq_notify_proc; 896 mq->mq_notify_proc = NULL; 897 } 898 899 /* Increment the counter and signal waiter, if any */ 900 mqattr->mq_curmsgs++; 901 cv_signal(&mq->mq_send_cv); 902 903 /* Ready for receiving now */ 904 selnotify(&mq->mq_rsel, POLLIN | POLLRDNORM, 0); 905 error: 906 mutex_exit(&mq->mq_mtx); 907 fd_putfile((int)mqdes); 908 909 if (error) { 910 mqueue_freemsg(msg, size); 911 } else if (notify) { 912 /* Send the notify, if needed */ 913 mutex_enter(proc_lock); 914 kpsignal(notify, &ksi, NULL); 915 mutex_exit(proc_lock); 916 } 917 return error; 918 } 919 920 int 921 sys_mq_send(struct lwp *l, const struct sys_mq_send_args *uap, 922 register_t *retval) 923 { 924 /* { 925 syscallarg(mqd_t) mqdes; 926 syscallarg(const char *) msg_ptr; 927 syscallarg(size_t) msg_len; 928 syscallarg(unsigned) msg_prio; 929 } */ 930 931 return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 932 SCARG(uap, msg_len), SCARG(uap, msg_prio), NULL); 933 } 934 935 int 936 sys___mq_timedsend50(struct lwp *l, const struct sys___mq_timedsend50_args *uap, 937 register_t *retval) 938 { 939 /* { 940 syscallarg(mqd_t) mqdes; 941 syscallarg(const char *) msg_ptr; 942 syscallarg(size_t) msg_len; 943 syscallarg(unsigned) msg_prio; 944 syscallarg(const struct timespec *) abs_timeout; 945 } */ 946 struct timespec ts, *tsp; 947 int error; 948 949 /* Get and convert time value */ 950 if (SCARG(uap, abs_timeout)) { 951 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 952 if (error) 953 return error; 954 tsp = &ts; 955 } else { 956 tsp = NULL; 957 } 958 959 return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 960 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp); 961 } 962 963 int 964 sys_mq_notify(struct lwp *l, const struct sys_mq_notify_args *uap, 965 register_t *retval) 966 { 967 /* { 968 syscallarg(mqd_t) mqdes; 969 syscallarg(const struct sigevent *) notification; 970 } */ 971 struct mqueue *mq; 972 struct sigevent sig; 973 int error; 974 975 if (SCARG(uap, notification)) { 976 /* Get the signal from user-space */ 977 error = copyin(SCARG(uap, notification), &sig, 978 sizeof(struct sigevent)); 979 if (error) 980 return error; 981 if (sig.sigev_notify == SIGEV_SIGNAL && 982 (sig.sigev_signo <=0 || sig.sigev_signo >= NSIG)) 983 return EINVAL; 984 } 985 986 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 987 if (error) { 988 return error; 989 } 990 if (SCARG(uap, notification)) { 991 /* Register notification: set the signal and target process */ 992 if (mq->mq_notify_proc == NULL) { 993 memcpy(&mq->mq_sig_notify, &sig, 994 sizeof(struct sigevent)); 995 mq->mq_notify_proc = l->l_proc; 996 } else { 997 /* Fail if someone else already registered */ 998 error = EBUSY; 999 } 1000 } else { 1001 /* Unregister the notification */ 1002 mq->mq_notify_proc = NULL; 1003 } 1004 mutex_exit(&mq->mq_mtx); 1005 fd_putfile((int)SCARG(uap, mqdes)); 1006 1007 return error; 1008 } 1009 1010 int 1011 sys_mq_getattr(struct lwp *l, const struct sys_mq_getattr_args *uap, 1012 register_t *retval) 1013 { 1014 /* { 1015 syscallarg(mqd_t) mqdes; 1016 syscallarg(struct mq_attr *) mqstat; 1017 } */ 1018 struct mqueue *mq; 1019 struct mq_attr attr; 1020 int error; 1021 1022 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 1023 if (error) { 1024 return error; 1025 } 1026 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1027 mutex_exit(&mq->mq_mtx); 1028 fd_putfile((int)SCARG(uap, mqdes)); 1029 1030 return copyout(&attr, SCARG(uap, mqstat), sizeof(struct mq_attr)); 1031 } 1032 1033 int 1034 sys_mq_setattr(struct lwp *l, const struct sys_mq_setattr_args *uap, 1035 register_t *retval) 1036 { 1037 /* { 1038 syscallarg(mqd_t) mqdes; 1039 syscallarg(const struct mq_attr *) mqstat; 1040 syscallarg(struct mq_attr *) omqstat; 1041 } */ 1042 struct mqueue *mq; 1043 struct mq_attr attr; 1044 int error, nonblock; 1045 1046 error = copyin(SCARG(uap, mqstat), &attr, sizeof(struct mq_attr)); 1047 if (error) 1048 return error; 1049 nonblock = (attr.mq_flags & O_NONBLOCK); 1050 1051 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 1052 if (error) { 1053 return error; 1054 } 1055 1056 /* Copy the old attributes, if needed */ 1057 if (SCARG(uap, omqstat)) { 1058 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1059 } 1060 1061 /* Ignore everything, except O_NONBLOCK */ 1062 if (nonblock) 1063 mq->mq_attrib.mq_flags |= O_NONBLOCK; 1064 else 1065 mq->mq_attrib.mq_flags &= ~O_NONBLOCK; 1066 1067 mutex_exit(&mq->mq_mtx); 1068 fd_putfile((int)SCARG(uap, mqdes)); 1069 1070 /* 1071 * Copy the data to the user-space. 1072 * Note: According to POSIX, the new attributes should not be set in 1073 * case of fail - this would be violated. 1074 */ 1075 if (SCARG(uap, omqstat)) 1076 error = copyout(&attr, SCARG(uap, omqstat), 1077 sizeof(struct mq_attr)); 1078 1079 return error; 1080 } 1081 1082 int 1083 sys_mq_unlink(struct lwp *l, const struct sys_mq_unlink_args *uap, 1084 register_t *retval) 1085 { 1086 /* { 1087 syscallarg(const char *) name; 1088 } */ 1089 mqueue_t *mq; 1090 char *name; 1091 int error, refcnt = 0; 1092 1093 /* Get the name from the user-space */ 1094 name = kmem_alloc(MQ_NAMELEN, KM_SLEEP); 1095 error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL); 1096 if (error) { 1097 kmem_free(name, MQ_NAMELEN); 1098 return error; 1099 } 1100 1101 mutex_enter(&mqlist_lock); 1102 mq = mqueue_lookup(name); 1103 if (mq == NULL) { 1104 error = ENOENT; 1105 goto err; 1106 } 1107 KASSERT(mutex_owned(&mq->mq_mtx)); 1108 1109 /* Verify permissions. */ 1110 if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MQUEUE, 0, mq, 1111 NULL, NULL)) { 1112 mutex_exit(&mq->mq_mtx); 1113 error = EACCES; 1114 goto err; 1115 } 1116 1117 /* Remove and destroy if no references. */ 1118 LIST_REMOVE(mq, mq_list); 1119 refcnt = mq->mq_refcnt; 1120 if (refcnt) { 1121 /* Mark as unlinked, if there are references. */ 1122 mq->mq_attrib.mq_flags |= MQ_UNLINKED; 1123 } 1124 1125 /* Wake up waiters, if there are any. */ 1126 cv_broadcast(&mq->mq_send_cv); 1127 cv_broadcast(&mq->mq_recv_cv); 1128 1129 selnotify(&mq->mq_rsel, POLLHUP, 0); 1130 selnotify(&mq->mq_wsel, POLLHUP, 0); 1131 1132 mutex_exit(&mq->mq_mtx); 1133 err: 1134 mutex_exit(&mqlist_lock); 1135 /* 1136 * If last reference - destroy the message queue. Otherwise, 1137 * the last mq_close() call will do that. 1138 */ 1139 if (!error && refcnt == 0) { 1140 mqueue_destroy(mq); 1141 } 1142 kmem_free(name, MQ_NAMELEN); 1143 1144 return error; 1145 } 1146 1147 /* 1148 * System control nodes. 1149 */ 1150 static int 1151 mqueue_sysctl_init(void) 1152 { 1153 const struct sysctlnode *node = NULL; 1154 1155 mqsysctl_log = NULL; 1156 1157 sysctl_createv(&mqsysctl_log, 0, NULL, NULL, 1158 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 1159 CTLTYPE_INT, "posix_msg", 1160 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 1161 "Message Passing option to which the " 1162 "system attempts to conform"), 1163 NULL, _POSIX_MESSAGE_PASSING, NULL, 0, 1164 CTL_KERN, CTL_CREATE, CTL_EOL); 1165 sysctl_createv(&mqsysctl_log, 0, NULL, &node, 1166 CTLFLAG_PERMANENT, 1167 CTLTYPE_NODE, "mqueue", 1168 SYSCTL_DESCR("Message queue options"), 1169 NULL, 0, NULL, 0, 1170 CTL_KERN, CTL_CREATE, CTL_EOL); 1171 1172 if (node == NULL) 1173 return ENXIO; 1174 1175 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1176 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1177 CTLTYPE_INT, "mq_open_max", 1178 SYSCTL_DESCR("Maximal number of message queue descriptors " 1179 "that process could open"), 1180 NULL, 0, &mq_open_max, 0, 1181 CTL_CREATE, CTL_EOL); 1182 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1183 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1184 CTLTYPE_INT, "mq_prio_max", 1185 SYSCTL_DESCR("Maximal priority of the message"), 1186 NULL, 0, &mq_prio_max, 0, 1187 CTL_CREATE, CTL_EOL); 1188 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1189 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1190 CTLTYPE_INT, "mq_max_msgsize", 1191 SYSCTL_DESCR("Maximal allowed size of the message"), 1192 NULL, 0, &mq_max_msgsize, 0, 1193 CTL_CREATE, CTL_EOL); 1194 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1195 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1196 CTLTYPE_INT, "mq_def_maxmsg", 1197 SYSCTL_DESCR("Default maximal message count"), 1198 NULL, 0, &mq_def_maxmsg, 0, 1199 CTL_CREATE, CTL_EOL); 1200 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1201 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1202 CTLTYPE_INT, "mq_max_maxmsg", 1203 SYSCTL_DESCR("Maximal allowed message count"), 1204 NULL, 0, &mq_max_maxmsg, 0, 1205 CTL_CREATE, CTL_EOL); 1206 1207 return 0; 1208 } 1209 1210 /* 1211 * Debugging. 1212 */ 1213 #if defined(DDB) 1214 1215 void 1216 mqueue_print_list(void (*pr)(const char *, ...)) 1217 { 1218 struct mqueue *mq; 1219 1220 (*pr)("Global list of the message queues:\n"); 1221 (*pr)("%20s %10s %8s %8s %3s %4s %4s %4s\n", 1222 "Name", "Ptr", "Mode", "Flags", "Ref", 1223 "MaxMsg", "MsgSze", "CurMsg"); 1224 LIST_FOREACH(mq, &mqueue_head, mq_list) { 1225 (*pr)("%20s %10p %8x %8x %3u %6lu %6lu %6lu\n", 1226 mq->mq_name, mq, mq->mq_mode, 1227 mq->mq_attrib.mq_flags, mq->mq_refcnt, 1228 mq->mq_attrib.mq_maxmsg, mq->mq_attrib.mq_msgsize, 1229 mq->mq_attrib.mq_curmsgs); 1230 } 1231 } 1232 1233 #endif /* defined(DDB) */ 1234