1 /* $NetBSD: sys_mqueue.c,v 1.34 2012/03/13 18:40:53 elad Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2011 Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Implementation of POSIX message queues. 31 * Defined in the Base Definitions volume of IEEE Std 1003.1-2001. 32 * 33 * Locking 34 * 35 * Global list of message queues (mqueue_head) is protected by mqlist_lock. 36 * Each message queue and its members are protected by mqueue::mq_mtx. 37 * Note that proc_t::p_mqueue_cnt is updated atomically. 38 * 39 * Lock order: 40 * 41 * mqlist_lock -> 42 * mqueue::mq_mtx 43 */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: sys_mqueue.c,v 1.34 2012/03/13 18:40:53 elad Exp $"); 47 48 #include <sys/param.h> 49 #include <sys/types.h> 50 #include <sys/atomic.h> 51 52 #include <sys/file.h> 53 #include <sys/filedesc.h> 54 #include <sys/kauth.h> 55 #include <sys/lwp.h> 56 #include <sys/mqueue.h> 57 #include <sys/module.h> 58 #include <sys/poll.h> 59 #include <sys/select.h> 60 #include <sys/signal.h> 61 #include <sys/signalvar.h> 62 #include <sys/stat.h> 63 #include <sys/sysctl.h> 64 #include <sys/syscall.h> 65 #include <sys/syscallvar.h> 66 #include <sys/syscallargs.h> 67 68 #include <miscfs/genfs/genfs.h> 69 70 MODULE(MODULE_CLASS_MISC, mqueue, NULL); 71 72 /* System-wide limits. */ 73 static u_int mq_open_max = MQ_OPEN_MAX; 74 static u_int mq_prio_max = MQ_PRIO_MAX; 75 static u_int mq_max_msgsize = 16 * MQ_DEF_MSGSIZE; 76 static u_int mq_def_maxmsg = 32; 77 static u_int mq_max_maxmsg = 16 * 32; 78 79 static pool_cache_t mqmsg_cache __read_mostly; 80 static kmutex_t mqlist_lock __cacheline_aligned; 81 static LIST_HEAD(, mqueue) mqueue_head __cacheline_aligned; 82 static struct sysctllog * mqsysctl_log; 83 84 static kauth_listener_t mq_listener; 85 86 static int mqueue_sysinit(void); 87 static int mqueue_sysfini(bool); 88 static int mqueue_sysctl_init(void); 89 static int mq_poll_fop(file_t *, int); 90 static int mq_stat_fop(file_t *, struct stat *); 91 static int mq_close_fop(file_t *); 92 93 static const struct fileops mqops = { 94 .fo_read = fbadop_read, 95 .fo_write = fbadop_write, 96 .fo_ioctl = fbadop_ioctl, 97 .fo_fcntl = fnullop_fcntl, 98 .fo_poll = mq_poll_fop, 99 .fo_stat = mq_stat_fop, 100 .fo_close = mq_close_fop, 101 .fo_kqfilter = fnullop_kqfilter, 102 .fo_restart = fnullop_restart, 103 }; 104 105 static const struct syscall_package mqueue_syscalls[] = { 106 { SYS_mq_open, 0, (sy_call_t *)sys_mq_open }, 107 { SYS_mq_close, 0, (sy_call_t *)sys_mq_close }, 108 { SYS_mq_unlink, 0, (sy_call_t *)sys_mq_unlink }, 109 { SYS_mq_getattr, 0, (sy_call_t *)sys_mq_getattr }, 110 { SYS_mq_setattr, 0, (sy_call_t *)sys_mq_setattr }, 111 { SYS_mq_notify, 0, (sy_call_t *)sys_mq_notify }, 112 { SYS_mq_send, 0, (sy_call_t *)sys_mq_send }, 113 { SYS_mq_receive, 0, (sy_call_t *)sys_mq_receive }, 114 { SYS___mq_timedsend50, 0, (sy_call_t *)sys___mq_timedsend50 }, 115 { SYS___mq_timedreceive50, 0, (sy_call_t *)sys___mq_timedreceive50 }, 116 { 0, 0, NULL } 117 }; 118 119 static int 120 mq_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 121 void *arg0, void *arg1, void *arg2, void *arg3) 122 { 123 mqueue_t *mq; 124 int result; 125 126 if (action != KAUTH_SYSTEM_MQUEUE) 127 return KAUTH_RESULT_DEFER; 128 129 result = KAUTH_RESULT_DEFER; 130 131 mq = arg1; 132 133 if (kauth_cred_geteuid(cred) == mq->mq_euid) 134 result = KAUTH_RESULT_ALLOW; 135 136 return result; 137 } 138 139 /* 140 * Initialisation and unloading of POSIX message queue subsystem. 141 */ 142 143 static int 144 mqueue_sysinit(void) 145 { 146 int error; 147 148 mqmsg_cache = pool_cache_init(MQ_DEF_MSGSIZE, coherency_unit, 149 0, 0, "mqmsgpl", NULL, IPL_NONE, NULL, NULL, NULL); 150 mutex_init(&mqlist_lock, MUTEX_DEFAULT, IPL_NONE); 151 LIST_INIT(&mqueue_head); 152 153 error = mqueue_sysctl_init(); 154 if (error) { 155 (void)mqueue_sysfini(false); 156 return error; 157 } 158 error = syscall_establish(NULL, mqueue_syscalls); 159 if (error) { 160 (void)mqueue_sysfini(false); 161 } 162 mq_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM, 163 mq_listener_cb, NULL); 164 return error; 165 } 166 167 static int 168 mqueue_sysfini(bool interface) 169 { 170 171 if (interface) { 172 int error; 173 bool inuse; 174 175 /* Stop syscall activity. */ 176 error = syscall_disestablish(NULL, mqueue_syscalls); 177 if (error) 178 return error; 179 /* Check if there are any message queues in use. */ 180 mutex_enter(&mqlist_lock); 181 inuse = !LIST_EMPTY(&mqueue_head); 182 mutex_exit(&mqlist_lock); 183 if (inuse) { 184 error = syscall_establish(NULL, mqueue_syscalls); 185 KASSERT(error == 0); 186 return EBUSY; 187 } 188 } 189 190 if (mqsysctl_log != NULL) 191 sysctl_teardown(&mqsysctl_log); 192 193 kauth_unlisten_scope(mq_listener); 194 195 mutex_destroy(&mqlist_lock); 196 pool_cache_destroy(mqmsg_cache); 197 return 0; 198 } 199 200 /* 201 * Module interface. 202 */ 203 static int 204 mqueue_modcmd(modcmd_t cmd, void *arg) 205 { 206 207 switch (cmd) { 208 case MODULE_CMD_INIT: 209 return mqueue_sysinit(); 210 case MODULE_CMD_FINI: 211 return mqueue_sysfini(true); 212 default: 213 return ENOTTY; 214 } 215 } 216 217 /* 218 * Free the message. 219 */ 220 static void 221 mqueue_freemsg(struct mq_msg *msg, const size_t size) 222 { 223 224 if (size > MQ_DEF_MSGSIZE) { 225 kmem_free(msg, size); 226 } else { 227 pool_cache_put(mqmsg_cache, msg); 228 } 229 } 230 231 /* 232 * Destroy the message queue. 233 */ 234 static void 235 mqueue_destroy(struct mqueue *mq) 236 { 237 struct mq_msg *msg; 238 size_t msz; 239 u_int i; 240 241 /* Note MQ_PQSIZE + 1. */ 242 for (i = 0; i <= MQ_PQSIZE; i++) { 243 while ((msg = TAILQ_FIRST(&mq->mq_head[i])) != NULL) { 244 TAILQ_REMOVE(&mq->mq_head[i], msg, msg_queue); 245 msz = sizeof(struct mq_msg) + msg->msg_len; 246 mqueue_freemsg(msg, msz); 247 } 248 } 249 if (mq->mq_name) { 250 kmem_free(mq->mq_name, MQ_NAMELEN); 251 } 252 seldestroy(&mq->mq_rsel); 253 seldestroy(&mq->mq_wsel); 254 cv_destroy(&mq->mq_send_cv); 255 cv_destroy(&mq->mq_recv_cv); 256 mutex_destroy(&mq->mq_mtx); 257 kmem_free(mq, sizeof(struct mqueue)); 258 } 259 260 /* 261 * mqueue_lookup: lookup for file name in general list of message queues. 262 * 263 * => locks the message queue on success 264 */ 265 static mqueue_t * 266 mqueue_lookup(const char *name) 267 { 268 mqueue_t *mq; 269 270 KASSERT(mutex_owned(&mqlist_lock)); 271 272 LIST_FOREACH(mq, &mqueue_head, mq_list) { 273 if (strncmp(mq->mq_name, name, MQ_NAMELEN) == 0) { 274 mutex_enter(&mq->mq_mtx); 275 return mq; 276 } 277 } 278 return NULL; 279 } 280 281 /* 282 * mqueue_get: get the mqueue from the descriptor. 283 * 284 * => locks the message queue, if found. 285 * => holds a reference on the file descriptor. 286 */ 287 static int 288 mqueue_get(mqd_t mqd, int fflag, mqueue_t **mqret) 289 { 290 const int fd = (int)mqd; 291 mqueue_t *mq; 292 file_t *fp; 293 294 fp = fd_getfile(fd); 295 if (__predict_false(fp == NULL)) { 296 return EBADF; 297 } 298 if (__predict_false(fp->f_type != DTYPE_MQUEUE)) { 299 fd_putfile(fd); 300 return EBADF; 301 } 302 if (fflag && (fp->f_flag & fflag) == 0) { 303 fd_putfile(fd); 304 return EBADF; 305 } 306 mq = fp->f_data; 307 mutex_enter(&mq->mq_mtx); 308 309 *mqret = mq; 310 return 0; 311 } 312 313 /* 314 * mqueue_linear_insert: perform linear insert according to the message 315 * priority into the reserved queue (MQ_PQRESQ). Reserved queue is a 316 * sorted list used only when mq_prio_max is increased via sysctl. 317 */ 318 static inline void 319 mqueue_linear_insert(struct mqueue *mq, struct mq_msg *msg) 320 { 321 struct mq_msg *mit; 322 323 TAILQ_FOREACH(mit, &mq->mq_head[MQ_PQRESQ], msg_queue) { 324 if (msg->msg_prio > mit->msg_prio) 325 break; 326 } 327 if (mit == NULL) { 328 TAILQ_INSERT_TAIL(&mq->mq_head[MQ_PQRESQ], msg, msg_queue); 329 } else { 330 TAILQ_INSERT_BEFORE(mit, msg, msg_queue); 331 } 332 } 333 334 static int 335 mq_stat_fop(file_t *fp, struct stat *st) 336 { 337 struct mqueue *mq = fp->f_data; 338 339 memset(st, 0, sizeof(*st)); 340 341 mutex_enter(&mq->mq_mtx); 342 st->st_mode = mq->mq_mode; 343 st->st_uid = mq->mq_euid; 344 st->st_gid = mq->mq_egid; 345 st->st_atimespec = mq->mq_atime; 346 st->st_mtimespec = mq->mq_mtime; 347 st->st_ctimespec = st->st_birthtimespec = mq->mq_btime; 348 st->st_uid = kauth_cred_geteuid(fp->f_cred); 349 st->st_gid = kauth_cred_getegid(fp->f_cred); 350 mutex_exit(&mq->mq_mtx); 351 352 return 0; 353 } 354 355 static int 356 mq_poll_fop(file_t *fp, int events) 357 { 358 struct mqueue *mq = fp->f_data; 359 struct mq_attr *mqattr; 360 int revents = 0; 361 362 mutex_enter(&mq->mq_mtx); 363 mqattr = &mq->mq_attrib; 364 if (events & (POLLIN | POLLRDNORM)) { 365 /* Ready for receiving, if there are messages in the queue. */ 366 if (mqattr->mq_curmsgs) 367 revents |= events & (POLLIN | POLLRDNORM); 368 else 369 selrecord(curlwp, &mq->mq_rsel); 370 } 371 if (events & (POLLOUT | POLLWRNORM)) { 372 /* Ready for sending, if the message queue is not full. */ 373 if (mqattr->mq_curmsgs < mqattr->mq_maxmsg) 374 revents |= events & (POLLOUT | POLLWRNORM); 375 else 376 selrecord(curlwp, &mq->mq_wsel); 377 } 378 mutex_exit(&mq->mq_mtx); 379 380 return revents; 381 } 382 383 static int 384 mq_close_fop(file_t *fp) 385 { 386 proc_t *p = curproc; 387 mqueue_t *mq = fp->f_data; 388 bool destroy = false; 389 390 mutex_enter(&mq->mq_mtx); 391 KASSERT(mq->mq_refcnt > 0); 392 if (--mq->mq_refcnt == 0) { 393 /* Destroy if the last reference and unlinked. */ 394 destroy = (mq->mq_attrib.mq_flags & MQ_UNLINKED) != 0; 395 } 396 mutex_exit(&mq->mq_mtx); 397 398 if (destroy) { 399 mqueue_destroy(mq); 400 } 401 atomic_dec_uint(&p->p_mqueue_cnt); 402 return 0; 403 } 404 405 static int 406 mqueue_access(mqueue_t *mq, int access, kauth_cred_t cred) 407 { 408 mode_t acc_mode = 0; 409 410 /* Note the difference between VREAD/VWRITE and FREAD/FWRITE. */ 411 if (access & FREAD) { 412 acc_mode |= VREAD; 413 } 414 if (access & FWRITE) { 415 acc_mode |= VWRITE; 416 } 417 if (genfs_can_access(VNON, mq->mq_mode, mq->mq_euid, 418 mq->mq_egid, acc_mode, cred)) { 419 return EACCES; 420 } 421 return 0; 422 } 423 424 static int 425 mqueue_create(lwp_t *l, char *name, struct mq_attr *uattr, mode_t mode, 426 int oflag, mqueue_t **mqret) 427 { 428 proc_t *p = l->l_proc; 429 struct cwdinfo *cwdi = p->p_cwdi; 430 mqueue_t *mq; 431 struct mq_attr attr; 432 u_int i; 433 434 /* Pre-check the limit. */ 435 if (p->p_mqueue_cnt >= mq_open_max) { 436 return EMFILE; 437 } 438 439 /* Empty name is invalid. */ 440 if (name[0] == '\0') { 441 return EINVAL; 442 } 443 444 /* Check for mqueue attributes. */ 445 if (uattr) { 446 int error; 447 448 error = copyin(uattr, &attr, sizeof(struct mq_attr)); 449 if (error) { 450 return error; 451 } 452 if (attr.mq_maxmsg <= 0 || attr.mq_maxmsg > mq_max_maxmsg || 453 attr.mq_msgsize <= 0 || attr.mq_msgsize > mq_max_msgsize) { 454 return EINVAL; 455 } 456 attr.mq_curmsgs = 0; 457 } else { 458 memset(&attr, 0, sizeof(struct mq_attr)); 459 attr.mq_maxmsg = mq_def_maxmsg; 460 attr.mq_msgsize = MQ_DEF_MSGSIZE - sizeof(struct mq_msg); 461 } 462 463 /* 464 * Allocate new message queue, initialize data structures, copy the 465 * name attributes. Note that the initial reference is set here. 466 */ 467 mq = kmem_zalloc(sizeof(mqueue_t), KM_SLEEP); 468 469 mutex_init(&mq->mq_mtx, MUTEX_DEFAULT, IPL_NONE); 470 cv_init(&mq->mq_send_cv, "mqsendcv"); 471 cv_init(&mq->mq_recv_cv, "mqrecvcv"); 472 for (i = 0; i < (MQ_PQSIZE + 1); i++) { 473 TAILQ_INIT(&mq->mq_head[i]); 474 } 475 selinit(&mq->mq_rsel); 476 selinit(&mq->mq_wsel); 477 mq->mq_name = name; 478 mq->mq_refcnt = 1; 479 480 memcpy(&mq->mq_attrib, &attr, sizeof(struct mq_attr)); 481 482 CTASSERT((O_MASK & (MQ_UNLINKED | MQ_RECEIVE)) == 0); 483 mq->mq_attrib.mq_flags = (O_MASK & oflag); 484 485 /* Store mode and effective UID with GID. */ 486 mq->mq_mode = ((mode & ~cwdi->cwdi_cmask) & ALLPERMS) & ~S_ISTXT; 487 mq->mq_euid = kauth_cred_geteuid(l->l_cred); 488 mq->mq_egid = kauth_cred_getegid(l->l_cred); 489 490 *mqret = mq; 491 return 0; 492 } 493 494 /* 495 * General mqueue system calls. 496 */ 497 498 int 499 sys_mq_open(struct lwp *l, const struct sys_mq_open_args *uap, 500 register_t *retval) 501 { 502 /* { 503 syscallarg(const char *) name; 504 syscallarg(int) oflag; 505 syscallarg(mode_t) mode; 506 syscallarg(struct mq_attr) attr; 507 } */ 508 struct proc *p = l->l_proc; 509 struct mqueue *mq, *mq_new = NULL; 510 int mqd, error, oflag = SCARG(uap, oflag); 511 file_t *fp; 512 char *name; 513 514 /* Get the name from the user-space. */ 515 name = kmem_alloc(MQ_NAMELEN, KM_SLEEP); 516 error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL); 517 if (error) { 518 kmem_free(name, MQ_NAMELEN); 519 return error; 520 } 521 522 /* Allocate file structure and descriptor. */ 523 error = fd_allocfile(&fp, &mqd); 524 if (error) { 525 kmem_free(name, MQ_NAMELEN); 526 return error; 527 } 528 fp->f_type = DTYPE_MQUEUE; 529 fp->f_flag = FFLAGS(oflag) & (FREAD | FWRITE); 530 fp->f_ops = &mqops; 531 532 if (oflag & O_CREAT) { 533 /* Create a new message queue. */ 534 error = mqueue_create(l, name, SCARG(uap, attr), 535 SCARG(uap, mode), oflag, &mq_new); 536 if (error) { 537 goto err; 538 } 539 KASSERT(mq_new != NULL); 540 } 541 542 /* Lookup for a message queue with such name. */ 543 mutex_enter(&mqlist_lock); 544 mq = mqueue_lookup(name); 545 if (mq) { 546 KASSERT(mutex_owned(&mq->mq_mtx)); 547 mutex_exit(&mqlist_lock); 548 549 /* Check for exclusive create. */ 550 if (oflag & O_EXCL) { 551 mutex_exit(&mq->mq_mtx); 552 error = EEXIST; 553 goto err; 554 } 555 556 /* Verify permissions. */ 557 if (mqueue_access(mq, fp->f_flag, l->l_cred) != 0) { 558 mutex_exit(&mq->mq_mtx); 559 error = EACCES; 560 goto err; 561 } 562 563 /* If we have the access, add a new reference. */ 564 mq->mq_refcnt++; 565 mutex_exit(&mq->mq_mtx); 566 } else { 567 /* Fail if not found and not creating. */ 568 if ((oflag & O_CREAT) == 0) { 569 mutex_exit(&mqlist_lock); 570 KASSERT(mq_new == NULL); 571 error = ENOENT; 572 goto err; 573 } 574 575 /* Account and check for the limit. */ 576 if (atomic_inc_uint_nv(&p->p_mqueue_cnt) > mq_open_max) { 577 mutex_exit(&mqlist_lock); 578 atomic_dec_uint(&p->p_mqueue_cnt); 579 error = EMFILE; 580 goto err; 581 } 582 583 /* Initial timestamps. */ 584 mq = mq_new; 585 getnanotime(&mq->mq_btime); 586 mq->mq_atime = mq->mq_mtime = mq->mq_btime; 587 588 /* 589 * Finally, insert message queue into the list. 590 * Note: it already has the initial reference. 591 */ 592 LIST_INSERT_HEAD(&mqueue_head, mq, mq_list); 593 mutex_exit(&mqlist_lock); 594 595 mq_new = NULL; 596 name = NULL; 597 } 598 KASSERT(mq != NULL); 599 fp->f_data = mq; 600 fd_affix(p, fp, mqd); 601 *retval = mqd; 602 err: 603 if (error) { 604 fd_abort(p, fp, mqd); 605 } 606 if (mq_new) { 607 /* Note: will free the 'name'. */ 608 mqueue_destroy(mq_new); 609 } else if (name) { 610 kmem_free(name, MQ_NAMELEN); 611 } 612 return error; 613 } 614 615 int 616 sys_mq_close(struct lwp *l, const struct sys_mq_close_args *uap, 617 register_t *retval) 618 { 619 620 return sys_close(l, (const void *)uap, retval); 621 } 622 623 /* 624 * Primary mq_recv1() function. 625 */ 626 int 627 mq_recv1(mqd_t mqdes, void *msg_ptr, size_t msg_len, u_int *msg_prio, 628 struct timespec *ts, ssize_t *mlen) 629 { 630 struct mqueue *mq; 631 struct mq_msg *msg = NULL; 632 struct mq_attr *mqattr; 633 u_int idx; 634 int error; 635 636 error = mqueue_get(mqdes, FREAD, &mq); 637 if (error) { 638 return error; 639 } 640 getnanotime(&mq->mq_atime); 641 mqattr = &mq->mq_attrib; 642 643 /* Check the message size limits */ 644 if (msg_len < mqattr->mq_msgsize) { 645 error = EMSGSIZE; 646 goto error; 647 } 648 649 /* Check if queue is empty */ 650 while (mqattr->mq_curmsgs == 0) { 651 int t; 652 653 if (mqattr->mq_flags & O_NONBLOCK) { 654 error = EAGAIN; 655 goto error; 656 } 657 if (ts) { 658 error = abstimeout2timo(ts, &t); 659 if (error) 660 goto error; 661 } else 662 t = 0; 663 /* 664 * Block until someone sends the message. 665 * While doing this, notification should not be sent. 666 */ 667 mqattr->mq_flags |= MQ_RECEIVE; 668 error = cv_timedwait_sig(&mq->mq_send_cv, &mq->mq_mtx, t); 669 mqattr->mq_flags &= ~MQ_RECEIVE; 670 if (error || (mqattr->mq_flags & MQ_UNLINKED)) { 671 error = (error == EWOULDBLOCK) ? ETIMEDOUT : EINTR; 672 goto error; 673 } 674 } 675 676 /* 677 * Find the highest priority message, and remove it from the queue. 678 * At first, reserved queue is checked, bitmap is next. 679 */ 680 msg = TAILQ_FIRST(&mq->mq_head[MQ_PQRESQ]); 681 if (__predict_true(msg == NULL)) { 682 idx = ffs(mq->mq_bitmap); 683 msg = TAILQ_FIRST(&mq->mq_head[idx]); 684 KASSERT(msg != NULL); 685 } else { 686 idx = MQ_PQRESQ; 687 } 688 TAILQ_REMOVE(&mq->mq_head[idx], msg, msg_queue); 689 690 /* Unmark the bit, if last message. */ 691 if (__predict_true(idx) && TAILQ_EMPTY(&mq->mq_head[idx])) { 692 KASSERT((MQ_PQSIZE - idx) == msg->msg_prio); 693 mq->mq_bitmap &= ~(1 << --idx); 694 } 695 696 /* Decrement the counter and signal waiter, if any */ 697 mqattr->mq_curmsgs--; 698 cv_signal(&mq->mq_recv_cv); 699 700 /* Ready for sending now */ 701 selnotify(&mq->mq_wsel, POLLOUT | POLLWRNORM, 0); 702 error: 703 mutex_exit(&mq->mq_mtx); 704 fd_putfile((int)mqdes); 705 if (error) 706 return error; 707 708 /* 709 * Copy the data to the user-space. 710 * Note: According to POSIX, no message should be removed from the 711 * queue in case of fail - this would be violated. 712 */ 713 *mlen = msg->msg_len; 714 error = copyout(msg->msg_ptr, msg_ptr, msg->msg_len); 715 if (error == 0 && msg_prio) 716 error = copyout(&msg->msg_prio, msg_prio, sizeof(unsigned)); 717 mqueue_freemsg(msg, sizeof(struct mq_msg) + msg->msg_len); 718 719 return error; 720 } 721 722 int 723 sys_mq_receive(struct lwp *l, const struct sys_mq_receive_args *uap, 724 register_t *retval) 725 { 726 /* { 727 syscallarg(mqd_t) mqdes; 728 syscallarg(char *) msg_ptr; 729 syscallarg(size_t) msg_len; 730 syscallarg(unsigned *) msg_prio; 731 } */ 732 ssize_t mlen; 733 int error; 734 735 error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 736 SCARG(uap, msg_len), SCARG(uap, msg_prio), NULL, &mlen); 737 if (error == 0) 738 *retval = mlen; 739 740 return error; 741 } 742 743 int 744 sys___mq_timedreceive50(struct lwp *l, 745 const struct sys___mq_timedreceive50_args *uap, register_t *retval) 746 { 747 /* { 748 syscallarg(mqd_t) mqdes; 749 syscallarg(char *) msg_ptr; 750 syscallarg(size_t) msg_len; 751 syscallarg(unsigned *) msg_prio; 752 syscallarg(const struct timespec *) abs_timeout; 753 } */ 754 struct timespec ts, *tsp; 755 ssize_t mlen; 756 int error; 757 758 /* Get and convert time value */ 759 if (SCARG(uap, abs_timeout)) { 760 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 761 if (error) 762 return error; 763 tsp = &ts; 764 } else { 765 tsp = NULL; 766 } 767 768 error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 769 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp, &mlen); 770 if (error == 0) 771 *retval = mlen; 772 773 return error; 774 } 775 776 /* 777 * Primary mq_send1() function. 778 */ 779 int 780 mq_send1(mqd_t mqdes, const char *msg_ptr, size_t msg_len, u_int msg_prio, 781 struct timespec *ts) 782 { 783 struct mqueue *mq; 784 struct mq_msg *msg; 785 struct mq_attr *mqattr; 786 struct proc *notify = NULL; 787 ksiginfo_t ksi; 788 size_t size; 789 int error; 790 791 /* Check the priority range */ 792 if (msg_prio >= mq_prio_max) 793 return EINVAL; 794 795 /* Allocate a new message */ 796 size = sizeof(struct mq_msg) + msg_len; 797 if (size > mq_max_msgsize) 798 return EMSGSIZE; 799 800 if (size > MQ_DEF_MSGSIZE) { 801 msg = kmem_alloc(size, KM_SLEEP); 802 } else { 803 msg = pool_cache_get(mqmsg_cache, PR_WAITOK); 804 } 805 806 /* Get the data from user-space */ 807 error = copyin(msg_ptr, msg->msg_ptr, msg_len); 808 if (error) { 809 mqueue_freemsg(msg, size); 810 return error; 811 } 812 msg->msg_len = msg_len; 813 msg->msg_prio = msg_prio; 814 815 error = mqueue_get(mqdes, FWRITE, &mq); 816 if (error) { 817 mqueue_freemsg(msg, size); 818 return error; 819 } 820 getnanotime(&mq->mq_mtime); 821 mqattr = &mq->mq_attrib; 822 823 /* Check the message size limit */ 824 if (msg_len <= 0 || msg_len > mqattr->mq_msgsize) { 825 error = EMSGSIZE; 826 goto error; 827 } 828 829 /* Check if queue is full */ 830 while (mqattr->mq_curmsgs >= mqattr->mq_maxmsg) { 831 int t; 832 833 if (mqattr->mq_flags & O_NONBLOCK) { 834 error = EAGAIN; 835 goto error; 836 } 837 if (ts) { 838 error = abstimeout2timo(ts, &t); 839 if (error) 840 goto error; 841 } else 842 t = 0; 843 /* Block until queue becomes available */ 844 error = cv_timedwait_sig(&mq->mq_recv_cv, &mq->mq_mtx, t); 845 if (error || (mqattr->mq_flags & MQ_UNLINKED)) { 846 error = (error == EWOULDBLOCK) ? ETIMEDOUT : error; 847 goto error; 848 } 849 } 850 KASSERT(mqattr->mq_curmsgs < mqattr->mq_maxmsg); 851 852 /* 853 * Insert message into the queue, according to the priority. 854 * Note the difference between index and priority. 855 */ 856 if (__predict_true(msg_prio < MQ_PQSIZE)) { 857 u_int idx = MQ_PQSIZE - msg_prio; 858 859 KASSERT(idx != MQ_PQRESQ); 860 TAILQ_INSERT_TAIL(&mq->mq_head[idx], msg, msg_queue); 861 mq->mq_bitmap |= (1 << --idx); 862 } else { 863 mqueue_linear_insert(mq, msg); 864 } 865 866 /* Check for the notify */ 867 if (mqattr->mq_curmsgs == 0 && mq->mq_notify_proc && 868 (mqattr->mq_flags & MQ_RECEIVE) == 0 && 869 mq->mq_sig_notify.sigev_notify == SIGEV_SIGNAL) { 870 /* Initialize the signal */ 871 KSI_INIT(&ksi); 872 ksi.ksi_signo = mq->mq_sig_notify.sigev_signo; 873 ksi.ksi_code = SI_MESGQ; 874 ksi.ksi_value = mq->mq_sig_notify.sigev_value; 875 /* Unregister the process */ 876 notify = mq->mq_notify_proc; 877 mq->mq_notify_proc = NULL; 878 } 879 880 /* Increment the counter and signal waiter, if any */ 881 mqattr->mq_curmsgs++; 882 cv_signal(&mq->mq_send_cv); 883 884 /* Ready for receiving now */ 885 selnotify(&mq->mq_rsel, POLLIN | POLLRDNORM, 0); 886 error: 887 mutex_exit(&mq->mq_mtx); 888 fd_putfile((int)mqdes); 889 890 if (error) { 891 mqueue_freemsg(msg, size); 892 } else if (notify) { 893 /* Send the notify, if needed */ 894 mutex_enter(proc_lock); 895 kpsignal(notify, &ksi, NULL); 896 mutex_exit(proc_lock); 897 } 898 return error; 899 } 900 901 int 902 sys_mq_send(struct lwp *l, const struct sys_mq_send_args *uap, 903 register_t *retval) 904 { 905 /* { 906 syscallarg(mqd_t) mqdes; 907 syscallarg(const char *) msg_ptr; 908 syscallarg(size_t) msg_len; 909 syscallarg(unsigned) msg_prio; 910 } */ 911 912 return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 913 SCARG(uap, msg_len), SCARG(uap, msg_prio), NULL); 914 } 915 916 int 917 sys___mq_timedsend50(struct lwp *l, const struct sys___mq_timedsend50_args *uap, 918 register_t *retval) 919 { 920 /* { 921 syscallarg(mqd_t) mqdes; 922 syscallarg(const char *) msg_ptr; 923 syscallarg(size_t) msg_len; 924 syscallarg(unsigned) msg_prio; 925 syscallarg(const struct timespec *) abs_timeout; 926 } */ 927 struct timespec ts, *tsp; 928 int error; 929 930 /* Get and convert time value */ 931 if (SCARG(uap, abs_timeout)) { 932 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 933 if (error) 934 return error; 935 tsp = &ts; 936 } else { 937 tsp = NULL; 938 } 939 940 return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 941 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp); 942 } 943 944 int 945 sys_mq_notify(struct lwp *l, const struct sys_mq_notify_args *uap, 946 register_t *retval) 947 { 948 /* { 949 syscallarg(mqd_t) mqdes; 950 syscallarg(const struct sigevent *) notification; 951 } */ 952 struct mqueue *mq; 953 struct sigevent sig; 954 int error; 955 956 if (SCARG(uap, notification)) { 957 /* Get the signal from user-space */ 958 error = copyin(SCARG(uap, notification), &sig, 959 sizeof(struct sigevent)); 960 if (error) 961 return error; 962 if (sig.sigev_notify == SIGEV_SIGNAL && 963 (sig.sigev_signo <=0 || sig.sigev_signo >= NSIG)) 964 return EINVAL; 965 } 966 967 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 968 if (error) { 969 return error; 970 } 971 if (SCARG(uap, notification)) { 972 /* Register notification: set the signal and target process */ 973 if (mq->mq_notify_proc == NULL) { 974 memcpy(&mq->mq_sig_notify, &sig, 975 sizeof(struct sigevent)); 976 mq->mq_notify_proc = l->l_proc; 977 } else { 978 /* Fail if someone else already registered */ 979 error = EBUSY; 980 } 981 } else { 982 /* Unregister the notification */ 983 mq->mq_notify_proc = NULL; 984 } 985 mutex_exit(&mq->mq_mtx); 986 fd_putfile((int)SCARG(uap, mqdes)); 987 988 return error; 989 } 990 991 int 992 sys_mq_getattr(struct lwp *l, const struct sys_mq_getattr_args *uap, 993 register_t *retval) 994 { 995 /* { 996 syscallarg(mqd_t) mqdes; 997 syscallarg(struct mq_attr *) mqstat; 998 } */ 999 struct mqueue *mq; 1000 struct mq_attr attr; 1001 int error; 1002 1003 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 1004 if (error) { 1005 return error; 1006 } 1007 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1008 mutex_exit(&mq->mq_mtx); 1009 fd_putfile((int)SCARG(uap, mqdes)); 1010 1011 return copyout(&attr, SCARG(uap, mqstat), sizeof(struct mq_attr)); 1012 } 1013 1014 int 1015 sys_mq_setattr(struct lwp *l, const struct sys_mq_setattr_args *uap, 1016 register_t *retval) 1017 { 1018 /* { 1019 syscallarg(mqd_t) mqdes; 1020 syscallarg(const struct mq_attr *) mqstat; 1021 syscallarg(struct mq_attr *) omqstat; 1022 } */ 1023 struct mqueue *mq; 1024 struct mq_attr attr; 1025 int error, nonblock; 1026 1027 error = copyin(SCARG(uap, mqstat), &attr, sizeof(struct mq_attr)); 1028 if (error) 1029 return error; 1030 nonblock = (attr.mq_flags & O_NONBLOCK); 1031 1032 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 1033 if (error) { 1034 return error; 1035 } 1036 1037 /* Copy the old attributes, if needed */ 1038 if (SCARG(uap, omqstat)) { 1039 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1040 } 1041 1042 /* Ignore everything, except O_NONBLOCK */ 1043 if (nonblock) 1044 mq->mq_attrib.mq_flags |= O_NONBLOCK; 1045 else 1046 mq->mq_attrib.mq_flags &= ~O_NONBLOCK; 1047 1048 mutex_exit(&mq->mq_mtx); 1049 fd_putfile((int)SCARG(uap, mqdes)); 1050 1051 /* 1052 * Copy the data to the user-space. 1053 * Note: According to POSIX, the new attributes should not be set in 1054 * case of fail - this would be violated. 1055 */ 1056 if (SCARG(uap, omqstat)) 1057 error = copyout(&attr, SCARG(uap, omqstat), 1058 sizeof(struct mq_attr)); 1059 1060 return error; 1061 } 1062 1063 int 1064 sys_mq_unlink(struct lwp *l, const struct sys_mq_unlink_args *uap, 1065 register_t *retval) 1066 { 1067 /* { 1068 syscallarg(const char *) name; 1069 } */ 1070 mqueue_t *mq; 1071 char *name; 1072 int error, refcnt = 0; 1073 1074 /* Get the name from the user-space */ 1075 name = kmem_alloc(MQ_NAMELEN, KM_SLEEP); 1076 error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL); 1077 if (error) { 1078 kmem_free(name, MQ_NAMELEN); 1079 return error; 1080 } 1081 1082 mutex_enter(&mqlist_lock); 1083 mq = mqueue_lookup(name); 1084 if (mq == NULL) { 1085 error = ENOENT; 1086 goto err; 1087 } 1088 KASSERT(mutex_owned(&mq->mq_mtx)); 1089 1090 /* Verify permissions. */ 1091 if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MQUEUE, 0, mq, 1092 NULL, NULL)) { 1093 mutex_exit(&mq->mq_mtx); 1094 error = EACCES; 1095 goto err; 1096 } 1097 1098 /* Remove and destroy if no references. */ 1099 LIST_REMOVE(mq, mq_list); 1100 refcnt = mq->mq_refcnt; 1101 if (refcnt) { 1102 /* Mark as unlinked, if there are references. */ 1103 mq->mq_attrib.mq_flags |= MQ_UNLINKED; 1104 } 1105 1106 /* Wake up waiters, if there are any. */ 1107 cv_broadcast(&mq->mq_send_cv); 1108 cv_broadcast(&mq->mq_recv_cv); 1109 1110 selnotify(&mq->mq_rsel, POLLHUP, 0); 1111 selnotify(&mq->mq_wsel, POLLHUP, 0); 1112 1113 mutex_exit(&mq->mq_mtx); 1114 err: 1115 mutex_exit(&mqlist_lock); 1116 /* 1117 * If last reference - destroy the message queue. Otherwise, 1118 * the last mq_close() call will do that. 1119 */ 1120 if (!error && refcnt == 0) { 1121 mqueue_destroy(mq); 1122 } 1123 kmem_free(name, MQ_NAMELEN); 1124 1125 return error; 1126 } 1127 1128 /* 1129 * System control nodes. 1130 */ 1131 static int 1132 mqueue_sysctl_init(void) 1133 { 1134 const struct sysctlnode *node = NULL; 1135 1136 mqsysctl_log = NULL; 1137 1138 sysctl_createv(&mqsysctl_log, 0, NULL, NULL, 1139 CTLFLAG_PERMANENT, 1140 CTLTYPE_NODE, "kern", NULL, 1141 NULL, 0, NULL, 0, 1142 CTL_KERN, CTL_EOL); 1143 sysctl_createv(&mqsysctl_log, 0, NULL, NULL, 1144 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 1145 CTLTYPE_INT, "posix_msg", 1146 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 1147 "Message Passing option to which the " 1148 "system attempts to conform"), 1149 NULL, _POSIX_MESSAGE_PASSING, NULL, 0, 1150 CTL_KERN, CTL_CREATE, CTL_EOL); 1151 sysctl_createv(&mqsysctl_log, 0, NULL, &node, 1152 CTLFLAG_PERMANENT, 1153 CTLTYPE_NODE, "mqueue", 1154 SYSCTL_DESCR("Message queue options"), 1155 NULL, 0, NULL, 0, 1156 CTL_KERN, CTL_CREATE, CTL_EOL); 1157 1158 if (node == NULL) 1159 return ENXIO; 1160 1161 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1162 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1163 CTLTYPE_INT, "mq_open_max", 1164 SYSCTL_DESCR("Maximal number of message queue descriptors " 1165 "that process could open"), 1166 NULL, 0, &mq_open_max, 0, 1167 CTL_CREATE, CTL_EOL); 1168 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1169 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1170 CTLTYPE_INT, "mq_prio_max", 1171 SYSCTL_DESCR("Maximal priority of the message"), 1172 NULL, 0, &mq_prio_max, 0, 1173 CTL_CREATE, CTL_EOL); 1174 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1175 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1176 CTLTYPE_INT, "mq_max_msgsize", 1177 SYSCTL_DESCR("Maximal allowed size of the message"), 1178 NULL, 0, &mq_max_msgsize, 0, 1179 CTL_CREATE, CTL_EOL); 1180 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1181 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1182 CTLTYPE_INT, "mq_def_maxmsg", 1183 SYSCTL_DESCR("Default maximal message count"), 1184 NULL, 0, &mq_def_maxmsg, 0, 1185 CTL_CREATE, CTL_EOL); 1186 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1187 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1188 CTLTYPE_INT, "mq_max_maxmsg", 1189 SYSCTL_DESCR("Maximal allowed message count"), 1190 NULL, 0, &mq_max_maxmsg, 0, 1191 CTL_CREATE, CTL_EOL); 1192 1193 return 0; 1194 } 1195 1196 /* 1197 * Debugging. 1198 */ 1199 #if defined(DDB) 1200 1201 void 1202 mqueue_print_list(void (*pr)(const char *, ...)) 1203 { 1204 struct mqueue *mq; 1205 1206 (*pr)("Global list of the message queues:\n"); 1207 (*pr)("%20s %10s %8s %8s %3s %4s %4s %4s\n", 1208 "Name", "Ptr", "Mode", "Flags", "Ref", 1209 "MaxMsg", "MsgSze", "CurMsg"); 1210 LIST_FOREACH(mq, &mqueue_head, mq_list) { 1211 (*pr)("%20s %10p %8x %8x %3u %6lu %6lu %6lu\n", 1212 mq->mq_name, mq, mq->mq_mode, 1213 mq->mq_attrib.mq_flags, mq->mq_refcnt, 1214 mq->mq_attrib.mq_maxmsg, mq->mq_attrib.mq_msgsize, 1215 mq->mq_attrib.mq_curmsgs); 1216 } 1217 } 1218 1219 #endif /* defined(DDB) */ 1220