1 /* $NetBSD: sys_mqueue.c,v 1.36 2014/02/25 18:30:11 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2011 Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Implementation of POSIX message queues. 31 * Defined in the Base Definitions volume of IEEE Std 1003.1-2001. 32 * 33 * Locking 34 * 35 * Global list of message queues (mqueue_head) is protected by mqlist_lock. 36 * Each message queue and its members are protected by mqueue::mq_mtx. 37 * Note that proc_t::p_mqueue_cnt is updated atomically. 38 * 39 * Lock order: 40 * 41 * mqlist_lock -> 42 * mqueue::mq_mtx 43 */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: sys_mqueue.c,v 1.36 2014/02/25 18:30:11 pooka Exp $"); 47 48 #include <sys/param.h> 49 #include <sys/types.h> 50 #include <sys/atomic.h> 51 52 #include <sys/file.h> 53 #include <sys/filedesc.h> 54 #include <sys/kauth.h> 55 #include <sys/lwp.h> 56 #include <sys/mqueue.h> 57 #include <sys/module.h> 58 #include <sys/poll.h> 59 #include <sys/select.h> 60 #include <sys/signal.h> 61 #include <sys/signalvar.h> 62 #include <sys/stat.h> 63 #include <sys/sysctl.h> 64 #include <sys/syscall.h> 65 #include <sys/syscallvar.h> 66 #include <sys/syscallargs.h> 67 68 #include <miscfs/genfs/genfs.h> 69 70 MODULE(MODULE_CLASS_MISC, mqueue, NULL); 71 72 /* System-wide limits. */ 73 static u_int mq_open_max = MQ_OPEN_MAX; 74 static u_int mq_prio_max = MQ_PRIO_MAX; 75 static u_int mq_max_msgsize = 16 * MQ_DEF_MSGSIZE; 76 static u_int mq_def_maxmsg = 32; 77 static u_int mq_max_maxmsg = 16 * 32; 78 79 static pool_cache_t mqmsg_cache __read_mostly; 80 static kmutex_t mqlist_lock __cacheline_aligned; 81 static LIST_HEAD(, mqueue) mqueue_head __cacheline_aligned; 82 static struct sysctllog * mqsysctl_log; 83 84 static kauth_listener_t mq_listener; 85 86 static int mqueue_sysinit(void); 87 static int mqueue_sysfini(bool); 88 static int mqueue_sysctl_init(void); 89 static int mq_poll_fop(file_t *, int); 90 static int mq_stat_fop(file_t *, struct stat *); 91 static int mq_close_fop(file_t *); 92 93 static const struct fileops mqops = { 94 .fo_read = fbadop_read, 95 .fo_write = fbadop_write, 96 .fo_ioctl = fbadop_ioctl, 97 .fo_fcntl = fnullop_fcntl, 98 .fo_poll = mq_poll_fop, 99 .fo_stat = mq_stat_fop, 100 .fo_close = mq_close_fop, 101 .fo_kqfilter = fnullop_kqfilter, 102 .fo_restart = fnullop_restart, 103 }; 104 105 static const struct syscall_package mqueue_syscalls[] = { 106 { SYS_mq_open, 0, (sy_call_t *)sys_mq_open }, 107 { SYS_mq_close, 0, (sy_call_t *)sys_mq_close }, 108 { SYS_mq_unlink, 0, (sy_call_t *)sys_mq_unlink }, 109 { SYS_mq_getattr, 0, (sy_call_t *)sys_mq_getattr }, 110 { SYS_mq_setattr, 0, (sy_call_t *)sys_mq_setattr }, 111 { SYS_mq_notify, 0, (sy_call_t *)sys_mq_notify }, 112 { SYS_mq_send, 0, (sy_call_t *)sys_mq_send }, 113 { SYS_mq_receive, 0, (sy_call_t *)sys_mq_receive }, 114 { SYS___mq_timedsend50, 0, (sy_call_t *)sys___mq_timedsend50 }, 115 { SYS___mq_timedreceive50, 0, (sy_call_t *)sys___mq_timedreceive50 }, 116 { 0, 0, NULL } 117 }; 118 119 static int 120 mq_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 121 void *arg0, void *arg1, void *arg2, void *arg3) 122 { 123 mqueue_t *mq; 124 int result; 125 126 if (action != KAUTH_SYSTEM_MQUEUE) 127 return KAUTH_RESULT_DEFER; 128 129 result = KAUTH_RESULT_DEFER; 130 131 mq = arg1; 132 133 if (kauth_cred_geteuid(cred) == mq->mq_euid) 134 result = KAUTH_RESULT_ALLOW; 135 136 return result; 137 } 138 139 /* 140 * Initialisation and unloading of POSIX message queue subsystem. 141 */ 142 143 static int 144 mqueue_sysinit(void) 145 { 146 int error; 147 148 mqmsg_cache = pool_cache_init(MQ_DEF_MSGSIZE, coherency_unit, 149 0, 0, "mqmsgpl", NULL, IPL_NONE, NULL, NULL, NULL); 150 mutex_init(&mqlist_lock, MUTEX_DEFAULT, IPL_NONE); 151 LIST_INIT(&mqueue_head); 152 153 error = mqueue_sysctl_init(); 154 if (error) { 155 (void)mqueue_sysfini(false); 156 return error; 157 } 158 error = syscall_establish(NULL, mqueue_syscalls); 159 if (error) { 160 (void)mqueue_sysfini(false); 161 } 162 mq_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM, 163 mq_listener_cb, NULL); 164 return error; 165 } 166 167 static int 168 mqueue_sysfini(bool interface) 169 { 170 171 if (interface) { 172 int error; 173 bool inuse; 174 175 /* Stop syscall activity. */ 176 error = syscall_disestablish(NULL, mqueue_syscalls); 177 if (error) 178 return error; 179 /* Check if there are any message queues in use. */ 180 mutex_enter(&mqlist_lock); 181 inuse = !LIST_EMPTY(&mqueue_head); 182 mutex_exit(&mqlist_lock); 183 if (inuse) { 184 error = syscall_establish(NULL, mqueue_syscalls); 185 KASSERT(error == 0); 186 return EBUSY; 187 } 188 } 189 190 if (mqsysctl_log != NULL) 191 sysctl_teardown(&mqsysctl_log); 192 193 kauth_unlisten_scope(mq_listener); 194 195 mutex_destroy(&mqlist_lock); 196 pool_cache_destroy(mqmsg_cache); 197 return 0; 198 } 199 200 /* 201 * Module interface. 202 */ 203 static int 204 mqueue_modcmd(modcmd_t cmd, void *arg) 205 { 206 207 switch (cmd) { 208 case MODULE_CMD_INIT: 209 return mqueue_sysinit(); 210 case MODULE_CMD_FINI: 211 return mqueue_sysfini(true); 212 default: 213 return ENOTTY; 214 } 215 } 216 217 /* 218 * Free the message. 219 */ 220 static void 221 mqueue_freemsg(struct mq_msg *msg, const size_t size) 222 { 223 224 if (size > MQ_DEF_MSGSIZE) { 225 kmem_free(msg, size); 226 } else { 227 pool_cache_put(mqmsg_cache, msg); 228 } 229 } 230 231 /* 232 * Destroy the message queue. 233 */ 234 static void 235 mqueue_destroy(struct mqueue *mq) 236 { 237 struct mq_msg *msg; 238 size_t msz; 239 u_int i; 240 241 /* Note MQ_PQSIZE + 1. */ 242 for (i = 0; i <= MQ_PQSIZE; i++) { 243 while ((msg = TAILQ_FIRST(&mq->mq_head[i])) != NULL) { 244 TAILQ_REMOVE(&mq->mq_head[i], msg, msg_queue); 245 msz = sizeof(struct mq_msg) + msg->msg_len; 246 mqueue_freemsg(msg, msz); 247 } 248 } 249 if (mq->mq_name) { 250 kmem_free(mq->mq_name, MQ_NAMELEN); 251 } 252 seldestroy(&mq->mq_rsel); 253 seldestroy(&mq->mq_wsel); 254 cv_destroy(&mq->mq_send_cv); 255 cv_destroy(&mq->mq_recv_cv); 256 mutex_destroy(&mq->mq_mtx); 257 kmem_free(mq, sizeof(struct mqueue)); 258 } 259 260 /* 261 * mqueue_lookup: lookup for file name in general list of message queues. 262 * 263 * => locks the message queue on success 264 */ 265 static mqueue_t * 266 mqueue_lookup(const char *name) 267 { 268 mqueue_t *mq; 269 270 KASSERT(mutex_owned(&mqlist_lock)); 271 272 LIST_FOREACH(mq, &mqueue_head, mq_list) { 273 if (strncmp(mq->mq_name, name, MQ_NAMELEN) == 0) { 274 mutex_enter(&mq->mq_mtx); 275 return mq; 276 } 277 } 278 return NULL; 279 } 280 281 /* 282 * mqueue_get: get the mqueue from the descriptor. 283 * 284 * => locks the message queue, if found. 285 * => holds a reference on the file descriptor. 286 */ 287 static int 288 mqueue_get(mqd_t mqd, int fflag, mqueue_t **mqret) 289 { 290 const int fd = (int)mqd; 291 mqueue_t *mq; 292 file_t *fp; 293 294 fp = fd_getfile(fd); 295 if (__predict_false(fp == NULL)) { 296 return EBADF; 297 } 298 if (__predict_false(fp->f_type != DTYPE_MQUEUE)) { 299 fd_putfile(fd); 300 return EBADF; 301 } 302 if (fflag && (fp->f_flag & fflag) == 0) { 303 fd_putfile(fd); 304 return EBADF; 305 } 306 mq = fp->f_data; 307 mutex_enter(&mq->mq_mtx); 308 309 *mqret = mq; 310 return 0; 311 } 312 313 /* 314 * mqueue_linear_insert: perform linear insert according to the message 315 * priority into the reserved queue (MQ_PQRESQ). Reserved queue is a 316 * sorted list used only when mq_prio_max is increased via sysctl. 317 */ 318 static inline void 319 mqueue_linear_insert(struct mqueue *mq, struct mq_msg *msg) 320 { 321 struct mq_msg *mit; 322 323 TAILQ_FOREACH(mit, &mq->mq_head[MQ_PQRESQ], msg_queue) { 324 if (msg->msg_prio > mit->msg_prio) 325 break; 326 } 327 if (mit == NULL) { 328 TAILQ_INSERT_TAIL(&mq->mq_head[MQ_PQRESQ], msg, msg_queue); 329 } else { 330 TAILQ_INSERT_BEFORE(mit, msg, msg_queue); 331 } 332 } 333 334 static int 335 mq_stat_fop(file_t *fp, struct stat *st) 336 { 337 struct mqueue *mq = fp->f_data; 338 339 memset(st, 0, sizeof(*st)); 340 341 mutex_enter(&mq->mq_mtx); 342 st->st_mode = mq->mq_mode; 343 st->st_uid = mq->mq_euid; 344 st->st_gid = mq->mq_egid; 345 st->st_atimespec = mq->mq_atime; 346 st->st_mtimespec = mq->mq_mtime; 347 st->st_ctimespec = st->st_birthtimespec = mq->mq_btime; 348 st->st_uid = kauth_cred_geteuid(fp->f_cred); 349 st->st_gid = kauth_cred_getegid(fp->f_cred); 350 mutex_exit(&mq->mq_mtx); 351 352 return 0; 353 } 354 355 static int 356 mq_poll_fop(file_t *fp, int events) 357 { 358 struct mqueue *mq = fp->f_data; 359 struct mq_attr *mqattr; 360 int revents = 0; 361 362 mutex_enter(&mq->mq_mtx); 363 mqattr = &mq->mq_attrib; 364 if (events & (POLLIN | POLLRDNORM)) { 365 /* Ready for receiving, if there are messages in the queue. */ 366 if (mqattr->mq_curmsgs) 367 revents |= events & (POLLIN | POLLRDNORM); 368 else 369 selrecord(curlwp, &mq->mq_rsel); 370 } 371 if (events & (POLLOUT | POLLWRNORM)) { 372 /* Ready for sending, if the message queue is not full. */ 373 if (mqattr->mq_curmsgs < mqattr->mq_maxmsg) 374 revents |= events & (POLLOUT | POLLWRNORM); 375 else 376 selrecord(curlwp, &mq->mq_wsel); 377 } 378 mutex_exit(&mq->mq_mtx); 379 380 return revents; 381 } 382 383 static int 384 mq_close_fop(file_t *fp) 385 { 386 proc_t *p = curproc; 387 mqueue_t *mq = fp->f_data; 388 bool destroy = false; 389 390 mutex_enter(&mq->mq_mtx); 391 KASSERT(mq->mq_refcnt > 0); 392 if (--mq->mq_refcnt == 0) { 393 /* Destroy if the last reference and unlinked. */ 394 destroy = (mq->mq_attrib.mq_flags & MQ_UNLINKED) != 0; 395 } 396 mutex_exit(&mq->mq_mtx); 397 398 if (destroy) { 399 mqueue_destroy(mq); 400 } 401 atomic_dec_uint(&p->p_mqueue_cnt); 402 return 0; 403 } 404 405 static int 406 mqueue_access(mqueue_t *mq, int access, kauth_cred_t cred) 407 { 408 mode_t acc_mode = 0; 409 410 /* Note the difference between VREAD/VWRITE and FREAD/FWRITE. */ 411 if (access & FREAD) { 412 acc_mode |= VREAD; 413 } 414 if (access & FWRITE) { 415 acc_mode |= VWRITE; 416 } 417 if (genfs_can_access(VNON, mq->mq_mode, mq->mq_euid, 418 mq->mq_egid, acc_mode, cred)) { 419 return EACCES; 420 } 421 return 0; 422 } 423 424 static int 425 mqueue_create(lwp_t *l, char *name, struct mq_attr *uattr, mode_t mode, 426 int oflag, mqueue_t **mqret) 427 { 428 proc_t *p = l->l_proc; 429 struct cwdinfo *cwdi = p->p_cwdi; 430 mqueue_t *mq; 431 struct mq_attr attr; 432 u_int i; 433 434 /* Pre-check the limit. */ 435 if (p->p_mqueue_cnt >= mq_open_max) { 436 return EMFILE; 437 } 438 439 /* Empty name is invalid. */ 440 if (name[0] == '\0') { 441 return EINVAL; 442 } 443 444 /* Check for mqueue attributes. */ 445 if (uattr) { 446 int error; 447 448 error = copyin(uattr, &attr, sizeof(struct mq_attr)); 449 if (error) { 450 return error; 451 } 452 if (attr.mq_maxmsg <= 0 || attr.mq_maxmsg > mq_max_maxmsg || 453 attr.mq_msgsize <= 0 || attr.mq_msgsize > mq_max_msgsize) { 454 return EINVAL; 455 } 456 attr.mq_curmsgs = 0; 457 } else { 458 memset(&attr, 0, sizeof(struct mq_attr)); 459 attr.mq_maxmsg = mq_def_maxmsg; 460 attr.mq_msgsize = MQ_DEF_MSGSIZE - sizeof(struct mq_msg); 461 } 462 463 /* 464 * Allocate new message queue, initialize data structures, copy the 465 * name attributes. Note that the initial reference is set here. 466 */ 467 mq = kmem_zalloc(sizeof(mqueue_t), KM_SLEEP); 468 469 mutex_init(&mq->mq_mtx, MUTEX_DEFAULT, IPL_NONE); 470 cv_init(&mq->mq_send_cv, "mqsendcv"); 471 cv_init(&mq->mq_recv_cv, "mqrecvcv"); 472 for (i = 0; i < (MQ_PQSIZE + 1); i++) { 473 TAILQ_INIT(&mq->mq_head[i]); 474 } 475 selinit(&mq->mq_rsel); 476 selinit(&mq->mq_wsel); 477 mq->mq_name = name; 478 mq->mq_refcnt = 1; 479 480 memcpy(&mq->mq_attrib, &attr, sizeof(struct mq_attr)); 481 482 CTASSERT((O_MASK & (MQ_UNLINKED | MQ_RECEIVE)) == 0); 483 mq->mq_attrib.mq_flags = (O_MASK & oflag); 484 485 /* Store mode and effective UID with GID. */ 486 mq->mq_mode = ((mode & ~cwdi->cwdi_cmask) & ALLPERMS) & ~S_ISTXT; 487 mq->mq_euid = kauth_cred_geteuid(l->l_cred); 488 mq->mq_egid = kauth_cred_getegid(l->l_cred); 489 490 *mqret = mq; 491 return 0; 492 } 493 494 /* 495 * General mqueue system calls. 496 */ 497 498 int 499 sys_mq_open(struct lwp *l, const struct sys_mq_open_args *uap, 500 register_t *retval) 501 { 502 /* { 503 syscallarg(const char *) name; 504 syscallarg(int) oflag; 505 syscallarg(mode_t) mode; 506 syscallarg(struct mq_attr) attr; 507 } */ 508 struct proc *p = l->l_proc; 509 struct mqueue *mq, *mq_new = NULL; 510 int mqd, error, oflag = SCARG(uap, oflag); 511 file_t *fp; 512 char *name; 513 514 /* Get the name from the user-space. */ 515 name = kmem_alloc(MQ_NAMELEN, KM_SLEEP); 516 error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL); 517 if (error) { 518 kmem_free(name, MQ_NAMELEN); 519 return error; 520 } 521 522 /* Allocate file structure and descriptor. */ 523 error = fd_allocfile(&fp, &mqd); 524 if (error) { 525 kmem_free(name, MQ_NAMELEN); 526 return error; 527 } 528 fp->f_type = DTYPE_MQUEUE; 529 fp->f_flag = FFLAGS(oflag) & (FREAD | FWRITE); 530 fp->f_ops = &mqops; 531 532 if (oflag & O_CREAT) { 533 /* Create a new message queue. */ 534 error = mqueue_create(l, name, SCARG(uap, attr), 535 SCARG(uap, mode), oflag, &mq_new); 536 if (error) { 537 goto err; 538 } 539 KASSERT(mq_new != NULL); 540 } 541 542 /* Lookup for a message queue with such name. */ 543 mutex_enter(&mqlist_lock); 544 mq = mqueue_lookup(name); 545 if (mq) { 546 KASSERT(mutex_owned(&mq->mq_mtx)); 547 mutex_exit(&mqlist_lock); 548 549 /* Check for exclusive create. */ 550 if (oflag & O_EXCL) { 551 mutex_exit(&mq->mq_mtx); 552 error = EEXIST; 553 goto err; 554 } 555 556 /* Verify permissions. */ 557 if (mqueue_access(mq, fp->f_flag, l->l_cred) != 0) { 558 mutex_exit(&mq->mq_mtx); 559 error = EACCES; 560 goto err; 561 } 562 563 /* If we have the access, add a new reference. */ 564 mq->mq_refcnt++; 565 mutex_exit(&mq->mq_mtx); 566 } else { 567 /* Fail if not found and not creating. */ 568 if ((oflag & O_CREAT) == 0) { 569 mutex_exit(&mqlist_lock); 570 KASSERT(mq_new == NULL); 571 error = ENOENT; 572 goto err; 573 } 574 575 /* Account and check for the limit. */ 576 if (atomic_inc_uint_nv(&p->p_mqueue_cnt) > mq_open_max) { 577 mutex_exit(&mqlist_lock); 578 atomic_dec_uint(&p->p_mqueue_cnt); 579 error = EMFILE; 580 goto err; 581 } 582 583 /* Initial timestamps. */ 584 mq = mq_new; 585 getnanotime(&mq->mq_btime); 586 mq->mq_atime = mq->mq_mtime = mq->mq_btime; 587 588 /* 589 * Finally, insert message queue into the list. 590 * Note: it already has the initial reference. 591 */ 592 LIST_INSERT_HEAD(&mqueue_head, mq, mq_list); 593 mutex_exit(&mqlist_lock); 594 595 mq_new = NULL; 596 name = NULL; 597 } 598 KASSERT(mq != NULL); 599 fp->f_data = mq; 600 fd_affix(p, fp, mqd); 601 *retval = mqd; 602 err: 603 if (error) { 604 fd_abort(p, fp, mqd); 605 } 606 if (mq_new) { 607 /* Note: will free the 'name'. */ 608 mqueue_destroy(mq_new); 609 } else if (name) { 610 kmem_free(name, MQ_NAMELEN); 611 } 612 return error; 613 } 614 615 int 616 sys_mq_close(struct lwp *l, const struct sys_mq_close_args *uap, 617 register_t *retval) 618 { 619 620 return sys_close(l, (const void *)uap, retval); 621 } 622 623 /* 624 * Primary mq_recv1() function. 625 */ 626 int 627 mq_recv1(mqd_t mqdes, void *msg_ptr, size_t msg_len, u_int *msg_prio, 628 struct timespec *ts, ssize_t *mlen) 629 { 630 struct mqueue *mq; 631 struct mq_msg *msg = NULL; 632 struct mq_attr *mqattr; 633 u_int idx; 634 int error; 635 636 error = mqueue_get(mqdes, FREAD, &mq); 637 if (error) { 638 return error; 639 } 640 getnanotime(&mq->mq_atime); 641 mqattr = &mq->mq_attrib; 642 643 /* Check the message size limits */ 644 if (msg_len < mqattr->mq_msgsize) { 645 error = EMSGSIZE; 646 goto error; 647 } 648 649 /* Check if queue is empty */ 650 while (mqattr->mq_curmsgs == 0) { 651 int t; 652 653 if (mqattr->mq_flags & O_NONBLOCK) { 654 error = EAGAIN; 655 goto error; 656 } 657 if (ts) { 658 error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, ts, &t, 659 NULL); 660 if (error) 661 goto error; 662 } else 663 t = 0; 664 /* 665 * Block until someone sends the message. 666 * While doing this, notification should not be sent. 667 */ 668 mqattr->mq_flags |= MQ_RECEIVE; 669 error = cv_timedwait_sig(&mq->mq_send_cv, &mq->mq_mtx, t); 670 mqattr->mq_flags &= ~MQ_RECEIVE; 671 if (error || (mqattr->mq_flags & MQ_UNLINKED)) { 672 error = (error == EWOULDBLOCK) ? ETIMEDOUT : EINTR; 673 goto error; 674 } 675 } 676 677 /* 678 * Find the highest priority message, and remove it from the queue. 679 * At first, reserved queue is checked, bitmap is next. 680 */ 681 msg = TAILQ_FIRST(&mq->mq_head[MQ_PQRESQ]); 682 if (__predict_true(msg == NULL)) { 683 idx = ffs(mq->mq_bitmap); 684 msg = TAILQ_FIRST(&mq->mq_head[idx]); 685 KASSERT(msg != NULL); 686 } else { 687 idx = MQ_PQRESQ; 688 } 689 TAILQ_REMOVE(&mq->mq_head[idx], msg, msg_queue); 690 691 /* Unmark the bit, if last message. */ 692 if (__predict_true(idx) && TAILQ_EMPTY(&mq->mq_head[idx])) { 693 KASSERT((MQ_PQSIZE - idx) == msg->msg_prio); 694 mq->mq_bitmap &= ~(1 << --idx); 695 } 696 697 /* Decrement the counter and signal waiter, if any */ 698 mqattr->mq_curmsgs--; 699 cv_signal(&mq->mq_recv_cv); 700 701 /* Ready for sending now */ 702 selnotify(&mq->mq_wsel, POLLOUT | POLLWRNORM, 0); 703 error: 704 mutex_exit(&mq->mq_mtx); 705 fd_putfile((int)mqdes); 706 if (error) 707 return error; 708 709 /* 710 * Copy the data to the user-space. 711 * Note: According to POSIX, no message should be removed from the 712 * queue in case of fail - this would be violated. 713 */ 714 *mlen = msg->msg_len; 715 error = copyout(msg->msg_ptr, msg_ptr, msg->msg_len); 716 if (error == 0 && msg_prio) 717 error = copyout(&msg->msg_prio, msg_prio, sizeof(unsigned)); 718 mqueue_freemsg(msg, sizeof(struct mq_msg) + msg->msg_len); 719 720 return error; 721 } 722 723 int 724 sys_mq_receive(struct lwp *l, const struct sys_mq_receive_args *uap, 725 register_t *retval) 726 { 727 /* { 728 syscallarg(mqd_t) mqdes; 729 syscallarg(char *) msg_ptr; 730 syscallarg(size_t) msg_len; 731 syscallarg(unsigned *) msg_prio; 732 } */ 733 ssize_t mlen; 734 int error; 735 736 error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 737 SCARG(uap, msg_len), SCARG(uap, msg_prio), NULL, &mlen); 738 if (error == 0) 739 *retval = mlen; 740 741 return error; 742 } 743 744 int 745 sys___mq_timedreceive50(struct lwp *l, 746 const struct sys___mq_timedreceive50_args *uap, register_t *retval) 747 { 748 /* { 749 syscallarg(mqd_t) mqdes; 750 syscallarg(char *) msg_ptr; 751 syscallarg(size_t) msg_len; 752 syscallarg(unsigned *) msg_prio; 753 syscallarg(const struct timespec *) abs_timeout; 754 } */ 755 struct timespec ts, *tsp; 756 ssize_t mlen; 757 int error; 758 759 /* Get and convert time value */ 760 if (SCARG(uap, abs_timeout)) { 761 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 762 if (error) 763 return error; 764 tsp = &ts; 765 } else { 766 tsp = NULL; 767 } 768 769 error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 770 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp, &mlen); 771 if (error == 0) 772 *retval = mlen; 773 774 return error; 775 } 776 777 /* 778 * Primary mq_send1() function. 779 */ 780 int 781 mq_send1(mqd_t mqdes, const char *msg_ptr, size_t msg_len, u_int msg_prio, 782 struct timespec *ts) 783 { 784 struct mqueue *mq; 785 struct mq_msg *msg; 786 struct mq_attr *mqattr; 787 struct proc *notify = NULL; 788 ksiginfo_t ksi; 789 size_t size; 790 int error; 791 792 /* Check the priority range */ 793 if (msg_prio >= mq_prio_max) 794 return EINVAL; 795 796 /* Allocate a new message */ 797 size = sizeof(struct mq_msg) + msg_len; 798 if (size > mq_max_msgsize) 799 return EMSGSIZE; 800 801 if (size > MQ_DEF_MSGSIZE) { 802 msg = kmem_alloc(size, KM_SLEEP); 803 } else { 804 msg = pool_cache_get(mqmsg_cache, PR_WAITOK); 805 } 806 807 /* Get the data from user-space */ 808 error = copyin(msg_ptr, msg->msg_ptr, msg_len); 809 if (error) { 810 mqueue_freemsg(msg, size); 811 return error; 812 } 813 msg->msg_len = msg_len; 814 msg->msg_prio = msg_prio; 815 816 error = mqueue_get(mqdes, FWRITE, &mq); 817 if (error) { 818 mqueue_freemsg(msg, size); 819 return error; 820 } 821 getnanotime(&mq->mq_mtime); 822 mqattr = &mq->mq_attrib; 823 824 /* Check the message size limit */ 825 if (msg_len <= 0 || msg_len > mqattr->mq_msgsize) { 826 error = EMSGSIZE; 827 goto error; 828 } 829 830 /* Check if queue is full */ 831 while (mqattr->mq_curmsgs >= mqattr->mq_maxmsg) { 832 int t; 833 834 if (mqattr->mq_flags & O_NONBLOCK) { 835 error = EAGAIN; 836 goto error; 837 } 838 if (ts) { 839 error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, ts, &t, 840 NULL); 841 if (error) 842 goto error; 843 } else 844 t = 0; 845 /* Block until queue becomes available */ 846 error = cv_timedwait_sig(&mq->mq_recv_cv, &mq->mq_mtx, t); 847 if (error || (mqattr->mq_flags & MQ_UNLINKED)) { 848 error = (error == EWOULDBLOCK) ? ETIMEDOUT : error; 849 goto error; 850 } 851 } 852 KASSERT(mqattr->mq_curmsgs < mqattr->mq_maxmsg); 853 854 /* 855 * Insert message into the queue, according to the priority. 856 * Note the difference between index and priority. 857 */ 858 if (__predict_true(msg_prio < MQ_PQSIZE)) { 859 u_int idx = MQ_PQSIZE - msg_prio; 860 861 KASSERT(idx != MQ_PQRESQ); 862 TAILQ_INSERT_TAIL(&mq->mq_head[idx], msg, msg_queue); 863 mq->mq_bitmap |= (1 << --idx); 864 } else { 865 mqueue_linear_insert(mq, msg); 866 } 867 868 /* Check for the notify */ 869 if (mqattr->mq_curmsgs == 0 && mq->mq_notify_proc && 870 (mqattr->mq_flags & MQ_RECEIVE) == 0 && 871 mq->mq_sig_notify.sigev_notify == SIGEV_SIGNAL) { 872 /* Initialize the signal */ 873 KSI_INIT(&ksi); 874 ksi.ksi_signo = mq->mq_sig_notify.sigev_signo; 875 ksi.ksi_code = SI_MESGQ; 876 ksi.ksi_value = mq->mq_sig_notify.sigev_value; 877 /* Unregister the process */ 878 notify = mq->mq_notify_proc; 879 mq->mq_notify_proc = NULL; 880 } 881 882 /* Increment the counter and signal waiter, if any */ 883 mqattr->mq_curmsgs++; 884 cv_signal(&mq->mq_send_cv); 885 886 /* Ready for receiving now */ 887 selnotify(&mq->mq_rsel, POLLIN | POLLRDNORM, 0); 888 error: 889 mutex_exit(&mq->mq_mtx); 890 fd_putfile((int)mqdes); 891 892 if (error) { 893 mqueue_freemsg(msg, size); 894 } else if (notify) { 895 /* Send the notify, if needed */ 896 mutex_enter(proc_lock); 897 kpsignal(notify, &ksi, NULL); 898 mutex_exit(proc_lock); 899 } 900 return error; 901 } 902 903 int 904 sys_mq_send(struct lwp *l, const struct sys_mq_send_args *uap, 905 register_t *retval) 906 { 907 /* { 908 syscallarg(mqd_t) mqdes; 909 syscallarg(const char *) msg_ptr; 910 syscallarg(size_t) msg_len; 911 syscallarg(unsigned) msg_prio; 912 } */ 913 914 return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 915 SCARG(uap, msg_len), SCARG(uap, msg_prio), NULL); 916 } 917 918 int 919 sys___mq_timedsend50(struct lwp *l, const struct sys___mq_timedsend50_args *uap, 920 register_t *retval) 921 { 922 /* { 923 syscallarg(mqd_t) mqdes; 924 syscallarg(const char *) msg_ptr; 925 syscallarg(size_t) msg_len; 926 syscallarg(unsigned) msg_prio; 927 syscallarg(const struct timespec *) abs_timeout; 928 } */ 929 struct timespec ts, *tsp; 930 int error; 931 932 /* Get and convert time value */ 933 if (SCARG(uap, abs_timeout)) { 934 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 935 if (error) 936 return error; 937 tsp = &ts; 938 } else { 939 tsp = NULL; 940 } 941 942 return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 943 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp); 944 } 945 946 int 947 sys_mq_notify(struct lwp *l, const struct sys_mq_notify_args *uap, 948 register_t *retval) 949 { 950 /* { 951 syscallarg(mqd_t) mqdes; 952 syscallarg(const struct sigevent *) notification; 953 } */ 954 struct mqueue *mq; 955 struct sigevent sig; 956 int error; 957 958 if (SCARG(uap, notification)) { 959 /* Get the signal from user-space */ 960 error = copyin(SCARG(uap, notification), &sig, 961 sizeof(struct sigevent)); 962 if (error) 963 return error; 964 if (sig.sigev_notify == SIGEV_SIGNAL && 965 (sig.sigev_signo <=0 || sig.sigev_signo >= NSIG)) 966 return EINVAL; 967 } 968 969 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 970 if (error) { 971 return error; 972 } 973 if (SCARG(uap, notification)) { 974 /* Register notification: set the signal and target process */ 975 if (mq->mq_notify_proc == NULL) { 976 memcpy(&mq->mq_sig_notify, &sig, 977 sizeof(struct sigevent)); 978 mq->mq_notify_proc = l->l_proc; 979 } else { 980 /* Fail if someone else already registered */ 981 error = EBUSY; 982 } 983 } else { 984 /* Unregister the notification */ 985 mq->mq_notify_proc = NULL; 986 } 987 mutex_exit(&mq->mq_mtx); 988 fd_putfile((int)SCARG(uap, mqdes)); 989 990 return error; 991 } 992 993 int 994 sys_mq_getattr(struct lwp *l, const struct sys_mq_getattr_args *uap, 995 register_t *retval) 996 { 997 /* { 998 syscallarg(mqd_t) mqdes; 999 syscallarg(struct mq_attr *) mqstat; 1000 } */ 1001 struct mqueue *mq; 1002 struct mq_attr attr; 1003 int error; 1004 1005 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 1006 if (error) { 1007 return error; 1008 } 1009 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1010 mutex_exit(&mq->mq_mtx); 1011 fd_putfile((int)SCARG(uap, mqdes)); 1012 1013 return copyout(&attr, SCARG(uap, mqstat), sizeof(struct mq_attr)); 1014 } 1015 1016 int 1017 sys_mq_setattr(struct lwp *l, const struct sys_mq_setattr_args *uap, 1018 register_t *retval) 1019 { 1020 /* { 1021 syscallarg(mqd_t) mqdes; 1022 syscallarg(const struct mq_attr *) mqstat; 1023 syscallarg(struct mq_attr *) omqstat; 1024 } */ 1025 struct mqueue *mq; 1026 struct mq_attr attr; 1027 int error, nonblock; 1028 1029 error = copyin(SCARG(uap, mqstat), &attr, sizeof(struct mq_attr)); 1030 if (error) 1031 return error; 1032 nonblock = (attr.mq_flags & O_NONBLOCK); 1033 1034 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 1035 if (error) { 1036 return error; 1037 } 1038 1039 /* Copy the old attributes, if needed */ 1040 if (SCARG(uap, omqstat)) { 1041 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1042 } 1043 1044 /* Ignore everything, except O_NONBLOCK */ 1045 if (nonblock) 1046 mq->mq_attrib.mq_flags |= O_NONBLOCK; 1047 else 1048 mq->mq_attrib.mq_flags &= ~O_NONBLOCK; 1049 1050 mutex_exit(&mq->mq_mtx); 1051 fd_putfile((int)SCARG(uap, mqdes)); 1052 1053 /* 1054 * Copy the data to the user-space. 1055 * Note: According to POSIX, the new attributes should not be set in 1056 * case of fail - this would be violated. 1057 */ 1058 if (SCARG(uap, omqstat)) 1059 error = copyout(&attr, SCARG(uap, omqstat), 1060 sizeof(struct mq_attr)); 1061 1062 return error; 1063 } 1064 1065 int 1066 sys_mq_unlink(struct lwp *l, const struct sys_mq_unlink_args *uap, 1067 register_t *retval) 1068 { 1069 /* { 1070 syscallarg(const char *) name; 1071 } */ 1072 mqueue_t *mq; 1073 char *name; 1074 int error, refcnt = 0; 1075 1076 /* Get the name from the user-space */ 1077 name = kmem_alloc(MQ_NAMELEN, KM_SLEEP); 1078 error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL); 1079 if (error) { 1080 kmem_free(name, MQ_NAMELEN); 1081 return error; 1082 } 1083 1084 mutex_enter(&mqlist_lock); 1085 mq = mqueue_lookup(name); 1086 if (mq == NULL) { 1087 error = ENOENT; 1088 goto err; 1089 } 1090 KASSERT(mutex_owned(&mq->mq_mtx)); 1091 1092 /* Verify permissions. */ 1093 if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MQUEUE, 0, mq, 1094 NULL, NULL)) { 1095 mutex_exit(&mq->mq_mtx); 1096 error = EACCES; 1097 goto err; 1098 } 1099 1100 /* Remove and destroy if no references. */ 1101 LIST_REMOVE(mq, mq_list); 1102 refcnt = mq->mq_refcnt; 1103 if (refcnt) { 1104 /* Mark as unlinked, if there are references. */ 1105 mq->mq_attrib.mq_flags |= MQ_UNLINKED; 1106 } 1107 1108 /* Wake up waiters, if there are any. */ 1109 cv_broadcast(&mq->mq_send_cv); 1110 cv_broadcast(&mq->mq_recv_cv); 1111 1112 selnotify(&mq->mq_rsel, POLLHUP, 0); 1113 selnotify(&mq->mq_wsel, POLLHUP, 0); 1114 1115 mutex_exit(&mq->mq_mtx); 1116 err: 1117 mutex_exit(&mqlist_lock); 1118 /* 1119 * If last reference - destroy the message queue. Otherwise, 1120 * the last mq_close() call will do that. 1121 */ 1122 if (!error && refcnt == 0) { 1123 mqueue_destroy(mq); 1124 } 1125 kmem_free(name, MQ_NAMELEN); 1126 1127 return error; 1128 } 1129 1130 /* 1131 * System control nodes. 1132 */ 1133 static int 1134 mqueue_sysctl_init(void) 1135 { 1136 const struct sysctlnode *node = NULL; 1137 1138 mqsysctl_log = NULL; 1139 1140 sysctl_createv(&mqsysctl_log, 0, NULL, NULL, 1141 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 1142 CTLTYPE_INT, "posix_msg", 1143 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 1144 "Message Passing option to which the " 1145 "system attempts to conform"), 1146 NULL, _POSIX_MESSAGE_PASSING, NULL, 0, 1147 CTL_KERN, CTL_CREATE, CTL_EOL); 1148 sysctl_createv(&mqsysctl_log, 0, NULL, &node, 1149 CTLFLAG_PERMANENT, 1150 CTLTYPE_NODE, "mqueue", 1151 SYSCTL_DESCR("Message queue options"), 1152 NULL, 0, NULL, 0, 1153 CTL_KERN, CTL_CREATE, CTL_EOL); 1154 1155 if (node == NULL) 1156 return ENXIO; 1157 1158 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1159 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1160 CTLTYPE_INT, "mq_open_max", 1161 SYSCTL_DESCR("Maximal number of message queue descriptors " 1162 "that process could open"), 1163 NULL, 0, &mq_open_max, 0, 1164 CTL_CREATE, CTL_EOL); 1165 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1166 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1167 CTLTYPE_INT, "mq_prio_max", 1168 SYSCTL_DESCR("Maximal priority of the message"), 1169 NULL, 0, &mq_prio_max, 0, 1170 CTL_CREATE, CTL_EOL); 1171 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1172 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1173 CTLTYPE_INT, "mq_max_msgsize", 1174 SYSCTL_DESCR("Maximal allowed size of the message"), 1175 NULL, 0, &mq_max_msgsize, 0, 1176 CTL_CREATE, CTL_EOL); 1177 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1178 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1179 CTLTYPE_INT, "mq_def_maxmsg", 1180 SYSCTL_DESCR("Default maximal message count"), 1181 NULL, 0, &mq_def_maxmsg, 0, 1182 CTL_CREATE, CTL_EOL); 1183 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1184 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1185 CTLTYPE_INT, "mq_max_maxmsg", 1186 SYSCTL_DESCR("Maximal allowed message count"), 1187 NULL, 0, &mq_max_maxmsg, 0, 1188 CTL_CREATE, CTL_EOL); 1189 1190 return 0; 1191 } 1192 1193 /* 1194 * Debugging. 1195 */ 1196 #if defined(DDB) 1197 1198 void 1199 mqueue_print_list(void (*pr)(const char *, ...)) 1200 { 1201 struct mqueue *mq; 1202 1203 (*pr)("Global list of the message queues:\n"); 1204 (*pr)("%20s %10s %8s %8s %3s %4s %4s %4s\n", 1205 "Name", "Ptr", "Mode", "Flags", "Ref", 1206 "MaxMsg", "MsgSze", "CurMsg"); 1207 LIST_FOREACH(mq, &mqueue_head, mq_list) { 1208 (*pr)("%20s %10p %8x %8x %3u %6lu %6lu %6lu\n", 1209 mq->mq_name, mq, mq->mq_mode, 1210 mq->mq_attrib.mq_flags, mq->mq_refcnt, 1211 mq->mq_attrib.mq_maxmsg, mq->mq_attrib.mq_msgsize, 1212 mq->mq_attrib.mq_curmsgs); 1213 } 1214 } 1215 1216 #endif /* defined(DDB) */ 1217