1 /* $NetBSD: sys_mqueue.c,v 1.40 2017/11/30 20:25:55 christos Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2011 Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Implementation of POSIX message queues. 31 * Defined in the Base Definitions volume of IEEE Std 1003.1-2001. 32 * 33 * Locking 34 * 35 * Global list of message queues (mqueue_head) is protected by mqlist_lock. 36 * Each message queue and its members are protected by mqueue::mq_mtx. 37 * Note that proc_t::p_mqueue_cnt is updated atomically. 38 * 39 * Lock order: 40 * 41 * mqlist_lock -> 42 * mqueue::mq_mtx 43 */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: sys_mqueue.c,v 1.40 2017/11/30 20:25:55 christos Exp $"); 47 48 #include <sys/param.h> 49 #include <sys/types.h> 50 #include <sys/atomic.h> 51 52 #include <sys/file.h> 53 #include <sys/filedesc.h> 54 #include <sys/kauth.h> 55 #include <sys/lwp.h> 56 #include <sys/mqueue.h> 57 #include <sys/module.h> 58 #include <sys/poll.h> 59 #include <sys/select.h> 60 #include <sys/signal.h> 61 #include <sys/signalvar.h> 62 #include <sys/stat.h> 63 #include <sys/sysctl.h> 64 #include <sys/syscall.h> 65 #include <sys/syscallvar.h> 66 #include <sys/syscallargs.h> 67 68 #include <miscfs/genfs/genfs.h> 69 70 MODULE(MODULE_CLASS_MISC, mqueue, NULL); 71 72 /* System-wide limits. */ 73 static u_int mq_open_max = MQ_OPEN_MAX; 74 static u_int mq_prio_max = MQ_PRIO_MAX; 75 static u_int mq_max_msgsize = 16 * MQ_DEF_MSGSIZE; 76 static u_int mq_def_maxmsg = 32; 77 static u_int mq_max_maxmsg = 16 * 32; 78 79 static pool_cache_t mqmsg_cache __read_mostly; 80 static kmutex_t mqlist_lock __cacheline_aligned; 81 static LIST_HEAD(, mqueue) mqueue_head __cacheline_aligned; 82 static struct sysctllog * mqsysctl_log; 83 84 static kauth_listener_t mq_listener; 85 86 static int mqueue_sysinit(void); 87 static int mqueue_sysfini(bool); 88 static int mqueue_sysctl_init(void); 89 static int mq_poll_fop(file_t *, int); 90 static int mq_stat_fop(file_t *, struct stat *); 91 static int mq_close_fop(file_t *); 92 93 static const struct fileops mqops = { 94 .fo_name = "mq", 95 .fo_read = fbadop_read, 96 .fo_write = fbadop_write, 97 .fo_ioctl = fbadop_ioctl, 98 .fo_fcntl = fnullop_fcntl, 99 .fo_poll = mq_poll_fop, 100 .fo_stat = mq_stat_fop, 101 .fo_close = mq_close_fop, 102 .fo_kqfilter = fnullop_kqfilter, 103 .fo_restart = fnullop_restart, 104 }; 105 106 static const struct syscall_package mqueue_syscalls[] = { 107 { SYS_mq_open, 0, (sy_call_t *)sys_mq_open }, 108 { SYS_mq_close, 0, (sy_call_t *)sys_mq_close }, 109 { SYS_mq_unlink, 0, (sy_call_t *)sys_mq_unlink }, 110 { SYS_mq_getattr, 0, (sy_call_t *)sys_mq_getattr }, 111 { SYS_mq_setattr, 0, (sy_call_t *)sys_mq_setattr }, 112 { SYS_mq_notify, 0, (sy_call_t *)sys_mq_notify }, 113 { SYS_mq_send, 0, (sy_call_t *)sys_mq_send }, 114 { SYS_mq_receive, 0, (sy_call_t *)sys_mq_receive }, 115 { SYS___mq_timedsend50, 0, (sy_call_t *)sys___mq_timedsend50 }, 116 { SYS___mq_timedreceive50, 0, (sy_call_t *)sys___mq_timedreceive50 }, 117 { 0, 0, NULL } 118 }; 119 120 static int 121 mq_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 122 void *arg0, void *arg1, void *arg2, void *arg3) 123 { 124 mqueue_t *mq; 125 int result; 126 127 if (action != KAUTH_SYSTEM_MQUEUE) 128 return KAUTH_RESULT_DEFER; 129 130 result = KAUTH_RESULT_DEFER; 131 132 mq = arg1; 133 134 if (kauth_cred_geteuid(cred) == mq->mq_euid) 135 result = KAUTH_RESULT_ALLOW; 136 137 return result; 138 } 139 140 /* 141 * Initialisation and unloading of POSIX message queue subsystem. 142 */ 143 144 static int 145 mqueue_sysinit(void) 146 { 147 int error; 148 149 mqmsg_cache = pool_cache_init(MQ_DEF_MSGSIZE, coherency_unit, 150 0, 0, "mqmsgpl", NULL, IPL_NONE, NULL, NULL, NULL); 151 mutex_init(&mqlist_lock, MUTEX_DEFAULT, IPL_NONE); 152 LIST_INIT(&mqueue_head); 153 154 error = mqueue_sysctl_init(); 155 if (error) { 156 (void)mqueue_sysfini(false); 157 return error; 158 } 159 error = syscall_establish(NULL, mqueue_syscalls); 160 if (error) { 161 (void)mqueue_sysfini(false); 162 } 163 mq_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM, 164 mq_listener_cb, NULL); 165 return error; 166 } 167 168 static int 169 mqueue_sysfini(bool interface) 170 { 171 172 if (interface) { 173 int error; 174 bool inuse; 175 176 /* Stop syscall activity. */ 177 error = syscall_disestablish(NULL, mqueue_syscalls); 178 if (error) 179 return error; 180 /* Check if there are any message queues in use. */ 181 mutex_enter(&mqlist_lock); 182 inuse = !LIST_EMPTY(&mqueue_head); 183 mutex_exit(&mqlist_lock); 184 if (inuse) { 185 error = syscall_establish(NULL, mqueue_syscalls); 186 KASSERT(error == 0); 187 return EBUSY; 188 } 189 } 190 191 if (mqsysctl_log != NULL) 192 sysctl_teardown(&mqsysctl_log); 193 194 kauth_unlisten_scope(mq_listener); 195 196 mutex_destroy(&mqlist_lock); 197 pool_cache_destroy(mqmsg_cache); 198 return 0; 199 } 200 201 /* 202 * Module interface. 203 */ 204 static int 205 mqueue_modcmd(modcmd_t cmd, void *arg) 206 { 207 208 switch (cmd) { 209 case MODULE_CMD_INIT: 210 return mqueue_sysinit(); 211 case MODULE_CMD_FINI: 212 return mqueue_sysfini(true); 213 default: 214 return ENOTTY; 215 } 216 } 217 218 /* 219 * Free the message. 220 */ 221 static void 222 mqueue_freemsg(struct mq_msg *msg, const size_t size) 223 { 224 225 if (size > MQ_DEF_MSGSIZE) { 226 kmem_free(msg, size); 227 } else { 228 pool_cache_put(mqmsg_cache, msg); 229 } 230 } 231 232 /* 233 * Destroy the message queue. 234 */ 235 static void 236 mqueue_destroy(struct mqueue *mq) 237 { 238 struct mq_msg *msg; 239 size_t msz; 240 u_int i; 241 242 /* Note MQ_PQSIZE + 1. */ 243 for (i = 0; i <= MQ_PQSIZE; i++) { 244 while ((msg = TAILQ_FIRST(&mq->mq_head[i])) != NULL) { 245 TAILQ_REMOVE(&mq->mq_head[i], msg, msg_queue); 246 msz = sizeof(struct mq_msg) + msg->msg_len; 247 mqueue_freemsg(msg, msz); 248 } 249 } 250 if (mq->mq_name) { 251 kmem_free(mq->mq_name, MQ_NAMELEN); 252 } 253 seldestroy(&mq->mq_rsel); 254 seldestroy(&mq->mq_wsel); 255 cv_destroy(&mq->mq_send_cv); 256 cv_destroy(&mq->mq_recv_cv); 257 mutex_destroy(&mq->mq_mtx); 258 kmem_free(mq, sizeof(struct mqueue)); 259 } 260 261 /* 262 * mqueue_lookup: lookup for file name in general list of message queues. 263 * 264 * => locks the message queue on success 265 */ 266 static mqueue_t * 267 mqueue_lookup(const char *name) 268 { 269 mqueue_t *mq; 270 271 KASSERT(mutex_owned(&mqlist_lock)); 272 273 LIST_FOREACH(mq, &mqueue_head, mq_list) { 274 if (strncmp(mq->mq_name, name, MQ_NAMELEN) == 0) { 275 mutex_enter(&mq->mq_mtx); 276 return mq; 277 } 278 } 279 return NULL; 280 } 281 282 /* 283 * mqueue_get: get the mqueue from the descriptor. 284 * 285 * => locks the message queue, if found. 286 * => holds a reference on the file descriptor. 287 */ 288 int 289 mqueue_get(mqd_t mqd, int fflag, mqueue_t **mqret) 290 { 291 const int fd = (int)mqd; 292 mqueue_t *mq; 293 file_t *fp; 294 295 fp = fd_getfile(fd); 296 if (__predict_false(fp == NULL)) { 297 return EBADF; 298 } 299 if (__predict_false(fp->f_type != DTYPE_MQUEUE)) { 300 fd_putfile(fd); 301 return EBADF; 302 } 303 if (fflag && (fp->f_flag & fflag) == 0) { 304 fd_putfile(fd); 305 return EBADF; 306 } 307 mq = fp->f_mqueue; 308 mutex_enter(&mq->mq_mtx); 309 310 *mqret = mq; 311 return 0; 312 } 313 314 /* 315 * mqueue_linear_insert: perform linear insert according to the message 316 * priority into the reserved queue (MQ_PQRESQ). Reserved queue is a 317 * sorted list used only when mq_prio_max is increased via sysctl. 318 */ 319 static inline void 320 mqueue_linear_insert(struct mqueue *mq, struct mq_msg *msg) 321 { 322 struct mq_msg *mit; 323 324 TAILQ_FOREACH(mit, &mq->mq_head[MQ_PQRESQ], msg_queue) { 325 if (msg->msg_prio > mit->msg_prio) 326 break; 327 } 328 if (mit == NULL) { 329 TAILQ_INSERT_TAIL(&mq->mq_head[MQ_PQRESQ], msg, msg_queue); 330 } else { 331 TAILQ_INSERT_BEFORE(mit, msg, msg_queue); 332 } 333 } 334 335 static int 336 mq_stat_fop(file_t *fp, struct stat *st) 337 { 338 struct mqueue *mq = fp->f_mqueue; 339 340 memset(st, 0, sizeof(*st)); 341 342 mutex_enter(&mq->mq_mtx); 343 st->st_mode = mq->mq_mode; 344 st->st_uid = mq->mq_euid; 345 st->st_gid = mq->mq_egid; 346 st->st_atimespec = mq->mq_atime; 347 st->st_mtimespec = mq->mq_mtime; 348 st->st_ctimespec = st->st_birthtimespec = mq->mq_btime; 349 st->st_uid = kauth_cred_geteuid(fp->f_cred); 350 st->st_gid = kauth_cred_getegid(fp->f_cred); 351 mutex_exit(&mq->mq_mtx); 352 353 return 0; 354 } 355 356 static int 357 mq_poll_fop(file_t *fp, int events) 358 { 359 struct mqueue *mq = fp->f_mqueue; 360 struct mq_attr *mqattr; 361 int revents = 0; 362 363 mutex_enter(&mq->mq_mtx); 364 mqattr = &mq->mq_attrib; 365 if (events & (POLLIN | POLLRDNORM)) { 366 /* Ready for receiving, if there are messages in the queue. */ 367 if (mqattr->mq_curmsgs) 368 revents |= events & (POLLIN | POLLRDNORM); 369 else 370 selrecord(curlwp, &mq->mq_rsel); 371 } 372 if (events & (POLLOUT | POLLWRNORM)) { 373 /* Ready for sending, if the message queue is not full. */ 374 if (mqattr->mq_curmsgs < mqattr->mq_maxmsg) 375 revents |= events & (POLLOUT | POLLWRNORM); 376 else 377 selrecord(curlwp, &mq->mq_wsel); 378 } 379 mutex_exit(&mq->mq_mtx); 380 381 return revents; 382 } 383 384 static int 385 mq_close_fop(file_t *fp) 386 { 387 proc_t *p = curproc; 388 mqueue_t *mq = fp->f_mqueue; 389 bool destroy = false; 390 391 mutex_enter(&mq->mq_mtx); 392 KASSERT(mq->mq_refcnt > 0); 393 if (--mq->mq_refcnt == 0) { 394 /* Destroy if the last reference and unlinked. */ 395 destroy = (mq->mq_attrib.mq_flags & MQ_UNLINKED) != 0; 396 } 397 mutex_exit(&mq->mq_mtx); 398 399 if (destroy) { 400 mqueue_destroy(mq); 401 } 402 atomic_dec_uint(&p->p_mqueue_cnt); 403 return 0; 404 } 405 406 static int 407 mqueue_access(mqueue_t *mq, int access, kauth_cred_t cred) 408 { 409 mode_t acc_mode = 0; 410 411 /* Note the difference between VREAD/VWRITE and FREAD/FWRITE. */ 412 if (access & FREAD) { 413 acc_mode |= VREAD; 414 } 415 if (access & FWRITE) { 416 acc_mode |= VWRITE; 417 } 418 if (genfs_can_access(VNON, mq->mq_mode, mq->mq_euid, 419 mq->mq_egid, acc_mode, cred)) { 420 return EACCES; 421 } 422 return 0; 423 } 424 425 static int 426 mqueue_create(lwp_t *l, char *name, struct mq_attr *attr, mode_t mode, 427 int oflag, mqueue_t **mqret) 428 { 429 proc_t *p = l->l_proc; 430 struct cwdinfo *cwdi = p->p_cwdi; 431 mqueue_t *mq; 432 u_int i; 433 434 /* Pre-check the limit. */ 435 if (p->p_mqueue_cnt >= mq_open_max) { 436 return EMFILE; 437 } 438 439 /* Empty name is invalid. */ 440 if (name[0] == '\0') { 441 return EINVAL; 442 } 443 444 /* Check for mqueue attributes. */ 445 if (attr) { 446 if (attr->mq_maxmsg <= 0 || attr->mq_maxmsg > mq_max_maxmsg || 447 attr->mq_msgsize <= 0 || 448 attr->mq_msgsize > mq_max_msgsize) { 449 return EINVAL; 450 } 451 attr->mq_curmsgs = 0; 452 } 453 454 /* 455 * Allocate new message queue, initialize data structures, copy the 456 * name attributes. Note that the initial reference is set here. 457 */ 458 mq = kmem_zalloc(sizeof(mqueue_t), KM_SLEEP); 459 460 mutex_init(&mq->mq_mtx, MUTEX_DEFAULT, IPL_NONE); 461 cv_init(&mq->mq_send_cv, "mqsendcv"); 462 cv_init(&mq->mq_recv_cv, "mqrecvcv"); 463 for (i = 0; i < (MQ_PQSIZE + 1); i++) { 464 TAILQ_INIT(&mq->mq_head[i]); 465 } 466 selinit(&mq->mq_rsel); 467 selinit(&mq->mq_wsel); 468 mq->mq_name = name; 469 mq->mq_refcnt = 1; 470 471 if (attr != NULL) { 472 memcpy(&mq->mq_attrib, attr, sizeof(struct mq_attr)); 473 } else { 474 memset(&mq->mq_attrib, 0, sizeof(struct mq_attr)); 475 mq->mq_attrib.mq_maxmsg = mq_def_maxmsg; 476 mq->mq_attrib.mq_msgsize = MQ_DEF_MSGSIZE - sizeof(struct mq_msg); 477 } 478 479 CTASSERT((O_MASK & (MQ_UNLINKED | MQ_RECEIVE)) == 0); 480 mq->mq_attrib.mq_flags = (O_MASK & oflag); 481 482 /* Store mode and effective UID with GID. */ 483 mq->mq_mode = ((mode & ~cwdi->cwdi_cmask) & ALLPERMS) & ~S_ISTXT; 484 mq->mq_euid = kauth_cred_geteuid(l->l_cred); 485 mq->mq_egid = kauth_cred_getegid(l->l_cred); 486 487 *mqret = mq; 488 return 0; 489 } 490 491 /* 492 * Helper function for mq_open() - note that "u_name" is a userland pointer, 493 * while "attr" is a kernel pointer! 494 */ 495 int 496 mq_handle_open(struct lwp *l, const char *u_name, int oflag, mode_t mode, 497 struct mq_attr *attr, register_t *retval) 498 { 499 struct proc *p = l->l_proc; 500 struct mqueue *mq, *mq_new = NULL; 501 int mqd, error; 502 file_t *fp; 503 char *name; 504 505 /* Get the name from the user-space. */ 506 name = kmem_alloc(MQ_NAMELEN, KM_SLEEP); 507 error = copyinstr(u_name, name, MQ_NAMELEN - 1, NULL); 508 if (error) { 509 kmem_free(name, MQ_NAMELEN); 510 return error; 511 } 512 513 /* Allocate file structure and descriptor. */ 514 error = fd_allocfile(&fp, &mqd); 515 if (error) { 516 kmem_free(name, MQ_NAMELEN); 517 return error; 518 } 519 fp->f_type = DTYPE_MQUEUE; 520 fp->f_flag = FFLAGS(oflag) & (FREAD | FWRITE); 521 fp->f_ops = &mqops; 522 523 if (oflag & O_CREAT) { 524 /* Create a new message queue. */ 525 error = mqueue_create(l, name, attr, mode, oflag, &mq_new); 526 if (error) { 527 goto err; 528 } 529 KASSERT(mq_new != NULL); 530 } 531 532 /* Lookup for a message queue with such name. */ 533 mutex_enter(&mqlist_lock); 534 mq = mqueue_lookup(name); 535 if (mq) { 536 KASSERT(mutex_owned(&mq->mq_mtx)); 537 mutex_exit(&mqlist_lock); 538 539 /* Check for exclusive create. */ 540 if (oflag & O_EXCL) { 541 mutex_exit(&mq->mq_mtx); 542 error = EEXIST; 543 goto err; 544 } 545 546 /* Verify permissions. */ 547 if (mqueue_access(mq, fp->f_flag, l->l_cred) != 0) { 548 mutex_exit(&mq->mq_mtx); 549 error = EACCES; 550 goto err; 551 } 552 553 /* If we have the access, add a new reference. */ 554 mq->mq_refcnt++; 555 mutex_exit(&mq->mq_mtx); 556 } else { 557 /* Fail if not found and not creating. */ 558 if ((oflag & O_CREAT) == 0) { 559 mutex_exit(&mqlist_lock); 560 KASSERT(mq_new == NULL); 561 error = ENOENT; 562 goto err; 563 } 564 565 /* Account and check for the limit. */ 566 if (atomic_inc_uint_nv(&p->p_mqueue_cnt) > mq_open_max) { 567 mutex_exit(&mqlist_lock); 568 atomic_dec_uint(&p->p_mqueue_cnt); 569 error = EMFILE; 570 goto err; 571 } 572 573 /* Initial timestamps. */ 574 mq = mq_new; 575 getnanotime(&mq->mq_btime); 576 mq->mq_atime = mq->mq_mtime = mq->mq_btime; 577 578 /* 579 * Finally, insert message queue into the list. 580 * Note: it already has the initial reference. 581 */ 582 LIST_INSERT_HEAD(&mqueue_head, mq, mq_list); 583 mutex_exit(&mqlist_lock); 584 585 mq_new = NULL; 586 name = NULL; 587 } 588 KASSERT(mq != NULL); 589 fp->f_mqueue = mq; 590 fd_affix(p, fp, mqd); 591 *retval = mqd; 592 err: 593 if (error) { 594 fd_abort(p, fp, mqd); 595 } 596 if (mq_new) { 597 /* Note: will free the 'name'. */ 598 mqueue_destroy(mq_new); 599 } else if (name) { 600 kmem_free(name, MQ_NAMELEN); 601 } 602 return error; 603 } 604 605 /* 606 * General mqueue system calls. 607 */ 608 609 int 610 sys_mq_open(struct lwp *l, const struct sys_mq_open_args *uap, 611 register_t *retval) 612 { 613 /* { 614 syscallarg(const char *) name; 615 syscallarg(int) oflag; 616 syscallarg(mode_t) mode; 617 syscallarg(struct mq_attr) attr; 618 } */ 619 struct mq_attr *attr = NULL, a; 620 int error; 621 622 if ((SCARG(uap, oflag) & O_CREAT) != 0 && SCARG(uap, attr) != NULL) { 623 error = copyin(SCARG(uap, attr), &a, sizeof(a)); 624 if (error) 625 return error; 626 attr = &a; 627 } 628 629 return mq_handle_open(l, SCARG(uap, name), SCARG(uap, oflag), 630 SCARG(uap, mode), attr, retval); 631 } 632 633 int 634 sys_mq_close(struct lwp *l, const struct sys_mq_close_args *uap, 635 register_t *retval) 636 { 637 638 return sys_close(l, (const void *)uap, retval); 639 } 640 641 /* 642 * Primary mq_recv1() function. 643 */ 644 int 645 mq_recv1(mqd_t mqdes, void *msg_ptr, size_t msg_len, u_int *msg_prio, 646 struct timespec *ts, ssize_t *mlen) 647 { 648 struct mqueue *mq; 649 struct mq_msg *msg = NULL; 650 struct mq_attr *mqattr; 651 u_int idx; 652 int error; 653 654 error = mqueue_get(mqdes, FREAD, &mq); 655 if (error) { 656 return error; 657 } 658 getnanotime(&mq->mq_atime); 659 mqattr = &mq->mq_attrib; 660 661 /* Check the message size limits */ 662 if (msg_len < mqattr->mq_msgsize) { 663 error = EMSGSIZE; 664 goto error; 665 } 666 667 /* Check if queue is empty */ 668 while (mqattr->mq_curmsgs == 0) { 669 int t; 670 671 if (mqattr->mq_flags & O_NONBLOCK) { 672 error = EAGAIN; 673 goto error; 674 } 675 if (ts) { 676 error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, ts, &t, 677 NULL); 678 if (error) 679 goto error; 680 } else 681 t = 0; 682 /* 683 * Block until someone sends the message. 684 * While doing this, notification should not be sent. 685 */ 686 mqattr->mq_flags |= MQ_RECEIVE; 687 error = cv_timedwait_sig(&mq->mq_send_cv, &mq->mq_mtx, t); 688 mqattr->mq_flags &= ~MQ_RECEIVE; 689 if (error || (mqattr->mq_flags & MQ_UNLINKED)) { 690 error = (error == EWOULDBLOCK) ? ETIMEDOUT : EINTR; 691 goto error; 692 } 693 } 694 695 /* 696 * Find the highest priority message, and remove it from the queue. 697 * At first, reserved queue is checked, bitmap is next. 698 */ 699 msg = TAILQ_FIRST(&mq->mq_head[MQ_PQRESQ]); 700 if (__predict_true(msg == NULL)) { 701 idx = ffs(mq->mq_bitmap); 702 msg = TAILQ_FIRST(&mq->mq_head[idx]); 703 KASSERT(msg != NULL); 704 } else { 705 idx = MQ_PQRESQ; 706 } 707 TAILQ_REMOVE(&mq->mq_head[idx], msg, msg_queue); 708 709 /* Unmark the bit, if last message. */ 710 if (__predict_true(idx) && TAILQ_EMPTY(&mq->mq_head[idx])) { 711 KASSERT((MQ_PQSIZE - idx) == msg->msg_prio); 712 mq->mq_bitmap &= ~(1 << --idx); 713 } 714 715 /* Decrement the counter and signal waiter, if any */ 716 mqattr->mq_curmsgs--; 717 cv_signal(&mq->mq_recv_cv); 718 719 /* Ready for sending now */ 720 selnotify(&mq->mq_wsel, POLLOUT | POLLWRNORM, 0); 721 error: 722 mutex_exit(&mq->mq_mtx); 723 fd_putfile((int)mqdes); 724 if (error) 725 return error; 726 727 /* 728 * Copy the data to the user-space. 729 * Note: According to POSIX, no message should be removed from the 730 * queue in case of fail - this would be violated. 731 */ 732 *mlen = msg->msg_len; 733 error = copyout(msg->msg_ptr, msg_ptr, msg->msg_len); 734 if (error == 0 && msg_prio) 735 error = copyout(&msg->msg_prio, msg_prio, sizeof(unsigned)); 736 mqueue_freemsg(msg, sizeof(struct mq_msg) + msg->msg_len); 737 738 return error; 739 } 740 741 int 742 sys_mq_receive(struct lwp *l, const struct sys_mq_receive_args *uap, 743 register_t *retval) 744 { 745 /* { 746 syscallarg(mqd_t) mqdes; 747 syscallarg(char *) msg_ptr; 748 syscallarg(size_t) msg_len; 749 syscallarg(unsigned *) msg_prio; 750 } */ 751 ssize_t mlen; 752 int error; 753 754 error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 755 SCARG(uap, msg_len), SCARG(uap, msg_prio), NULL, &mlen); 756 if (error == 0) 757 *retval = mlen; 758 759 return error; 760 } 761 762 int 763 sys___mq_timedreceive50(struct lwp *l, 764 const struct sys___mq_timedreceive50_args *uap, register_t *retval) 765 { 766 /* { 767 syscallarg(mqd_t) mqdes; 768 syscallarg(char *) msg_ptr; 769 syscallarg(size_t) msg_len; 770 syscallarg(unsigned *) msg_prio; 771 syscallarg(const struct timespec *) abs_timeout; 772 } */ 773 struct timespec ts, *tsp; 774 ssize_t mlen; 775 int error; 776 777 /* Get and convert time value */ 778 if (SCARG(uap, abs_timeout)) { 779 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 780 if (error) 781 return error; 782 tsp = &ts; 783 } else { 784 tsp = NULL; 785 } 786 787 error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 788 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp, &mlen); 789 if (error == 0) 790 *retval = mlen; 791 792 return error; 793 } 794 795 /* 796 * Primary mq_send1() function. 797 */ 798 int 799 mq_send1(mqd_t mqdes, const char *msg_ptr, size_t msg_len, u_int msg_prio, 800 struct timespec *ts) 801 { 802 struct mqueue *mq; 803 struct mq_msg *msg; 804 struct mq_attr *mqattr; 805 struct proc *notify = NULL; 806 ksiginfo_t ksi; 807 size_t size; 808 int error; 809 810 /* Check the priority range */ 811 if (msg_prio >= mq_prio_max) 812 return EINVAL; 813 814 /* Allocate a new message */ 815 size = sizeof(struct mq_msg) + msg_len; 816 if (size > mq_max_msgsize) 817 return EMSGSIZE; 818 819 if (size > MQ_DEF_MSGSIZE) { 820 msg = kmem_alloc(size, KM_SLEEP); 821 } else { 822 msg = pool_cache_get(mqmsg_cache, PR_WAITOK); 823 } 824 825 /* Get the data from user-space */ 826 error = copyin(msg_ptr, msg->msg_ptr, msg_len); 827 if (error) { 828 mqueue_freemsg(msg, size); 829 return error; 830 } 831 msg->msg_len = msg_len; 832 msg->msg_prio = msg_prio; 833 834 error = mqueue_get(mqdes, FWRITE, &mq); 835 if (error) { 836 mqueue_freemsg(msg, size); 837 return error; 838 } 839 getnanotime(&mq->mq_mtime); 840 mqattr = &mq->mq_attrib; 841 842 /* Check the message size limit */ 843 if (msg_len <= 0 || msg_len > mqattr->mq_msgsize) { 844 error = EMSGSIZE; 845 goto error; 846 } 847 848 /* Check if queue is full */ 849 while (mqattr->mq_curmsgs >= mqattr->mq_maxmsg) { 850 int t; 851 852 if (mqattr->mq_flags & O_NONBLOCK) { 853 error = EAGAIN; 854 goto error; 855 } 856 if (ts) { 857 error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, ts, &t, 858 NULL); 859 if (error) 860 goto error; 861 } else 862 t = 0; 863 /* Block until queue becomes available */ 864 error = cv_timedwait_sig(&mq->mq_recv_cv, &mq->mq_mtx, t); 865 if (error || (mqattr->mq_flags & MQ_UNLINKED)) { 866 error = (error == EWOULDBLOCK) ? ETIMEDOUT : error; 867 goto error; 868 } 869 } 870 KASSERT(mqattr->mq_curmsgs < mqattr->mq_maxmsg); 871 872 /* 873 * Insert message into the queue, according to the priority. 874 * Note the difference between index and priority. 875 */ 876 if (__predict_true(msg_prio < MQ_PQSIZE)) { 877 u_int idx = MQ_PQSIZE - msg_prio; 878 879 KASSERT(idx != MQ_PQRESQ); 880 TAILQ_INSERT_TAIL(&mq->mq_head[idx], msg, msg_queue); 881 mq->mq_bitmap |= (1 << --idx); 882 } else { 883 mqueue_linear_insert(mq, msg); 884 } 885 886 /* Check for the notify */ 887 if (mqattr->mq_curmsgs == 0 && mq->mq_notify_proc && 888 (mqattr->mq_flags & MQ_RECEIVE) == 0 && 889 mq->mq_sig_notify.sigev_notify == SIGEV_SIGNAL) { 890 /* Initialize the signal */ 891 KSI_INIT(&ksi); 892 ksi.ksi_signo = mq->mq_sig_notify.sigev_signo; 893 ksi.ksi_code = SI_MESGQ; 894 ksi.ksi_value = mq->mq_sig_notify.sigev_value; 895 /* Unregister the process */ 896 notify = mq->mq_notify_proc; 897 mq->mq_notify_proc = NULL; 898 } 899 900 /* Increment the counter and signal waiter, if any */ 901 mqattr->mq_curmsgs++; 902 cv_signal(&mq->mq_send_cv); 903 904 /* Ready for receiving now */ 905 selnotify(&mq->mq_rsel, POLLIN | POLLRDNORM, 0); 906 error: 907 mutex_exit(&mq->mq_mtx); 908 fd_putfile((int)mqdes); 909 910 if (error) { 911 mqueue_freemsg(msg, size); 912 } else if (notify) { 913 /* Send the notify, if needed */ 914 mutex_enter(proc_lock); 915 kpsignal(notify, &ksi, NULL); 916 mutex_exit(proc_lock); 917 } 918 return error; 919 } 920 921 int 922 sys_mq_send(struct lwp *l, const struct sys_mq_send_args *uap, 923 register_t *retval) 924 { 925 /* { 926 syscallarg(mqd_t) mqdes; 927 syscallarg(const char *) msg_ptr; 928 syscallarg(size_t) msg_len; 929 syscallarg(unsigned) msg_prio; 930 } */ 931 932 return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 933 SCARG(uap, msg_len), SCARG(uap, msg_prio), NULL); 934 } 935 936 int 937 sys___mq_timedsend50(struct lwp *l, const struct sys___mq_timedsend50_args *uap, 938 register_t *retval) 939 { 940 /* { 941 syscallarg(mqd_t) mqdes; 942 syscallarg(const char *) msg_ptr; 943 syscallarg(size_t) msg_len; 944 syscallarg(unsigned) msg_prio; 945 syscallarg(const struct timespec *) abs_timeout; 946 } */ 947 struct timespec ts, *tsp; 948 int error; 949 950 /* Get and convert time value */ 951 if (SCARG(uap, abs_timeout)) { 952 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 953 if (error) 954 return error; 955 tsp = &ts; 956 } else { 957 tsp = NULL; 958 } 959 960 return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 961 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp); 962 } 963 964 int 965 sys_mq_notify(struct lwp *l, const struct sys_mq_notify_args *uap, 966 register_t *retval) 967 { 968 /* { 969 syscallarg(mqd_t) mqdes; 970 syscallarg(const struct sigevent *) notification; 971 } */ 972 struct mqueue *mq; 973 struct sigevent sig; 974 int error; 975 976 if (SCARG(uap, notification)) { 977 /* Get the signal from user-space */ 978 error = copyin(SCARG(uap, notification), &sig, 979 sizeof(struct sigevent)); 980 if (error) 981 return error; 982 if (sig.sigev_notify == SIGEV_SIGNAL && 983 (sig.sigev_signo <=0 || sig.sigev_signo >= NSIG)) 984 return EINVAL; 985 } 986 987 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 988 if (error) { 989 return error; 990 } 991 if (SCARG(uap, notification)) { 992 /* Register notification: set the signal and target process */ 993 if (mq->mq_notify_proc == NULL) { 994 memcpy(&mq->mq_sig_notify, &sig, 995 sizeof(struct sigevent)); 996 mq->mq_notify_proc = l->l_proc; 997 } else { 998 /* Fail if someone else already registered */ 999 error = EBUSY; 1000 } 1001 } else { 1002 /* Unregister the notification */ 1003 mq->mq_notify_proc = NULL; 1004 } 1005 mutex_exit(&mq->mq_mtx); 1006 fd_putfile((int)SCARG(uap, mqdes)); 1007 1008 return error; 1009 } 1010 1011 int 1012 sys_mq_getattr(struct lwp *l, const struct sys_mq_getattr_args *uap, 1013 register_t *retval) 1014 { 1015 /* { 1016 syscallarg(mqd_t) mqdes; 1017 syscallarg(struct mq_attr *) mqstat; 1018 } */ 1019 struct mqueue *mq; 1020 struct mq_attr attr; 1021 int error; 1022 1023 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 1024 if (error) { 1025 return error; 1026 } 1027 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1028 mutex_exit(&mq->mq_mtx); 1029 fd_putfile((int)SCARG(uap, mqdes)); 1030 1031 return copyout(&attr, SCARG(uap, mqstat), sizeof(struct mq_attr)); 1032 } 1033 1034 int 1035 sys_mq_setattr(struct lwp *l, const struct sys_mq_setattr_args *uap, 1036 register_t *retval) 1037 { 1038 /* { 1039 syscallarg(mqd_t) mqdes; 1040 syscallarg(const struct mq_attr *) mqstat; 1041 syscallarg(struct mq_attr *) omqstat; 1042 } */ 1043 struct mqueue *mq; 1044 struct mq_attr attr; 1045 int error, nonblock; 1046 1047 error = copyin(SCARG(uap, mqstat), &attr, sizeof(struct mq_attr)); 1048 if (error) 1049 return error; 1050 nonblock = (attr.mq_flags & O_NONBLOCK); 1051 1052 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 1053 if (error) { 1054 return error; 1055 } 1056 1057 /* Copy the old attributes, if needed */ 1058 if (SCARG(uap, omqstat)) { 1059 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1060 } 1061 1062 /* Ignore everything, except O_NONBLOCK */ 1063 if (nonblock) 1064 mq->mq_attrib.mq_flags |= O_NONBLOCK; 1065 else 1066 mq->mq_attrib.mq_flags &= ~O_NONBLOCK; 1067 1068 mutex_exit(&mq->mq_mtx); 1069 fd_putfile((int)SCARG(uap, mqdes)); 1070 1071 /* 1072 * Copy the data to the user-space. 1073 * Note: According to POSIX, the new attributes should not be set in 1074 * case of fail - this would be violated. 1075 */ 1076 if (SCARG(uap, omqstat)) 1077 error = copyout(&attr, SCARG(uap, omqstat), 1078 sizeof(struct mq_attr)); 1079 1080 return error; 1081 } 1082 1083 int 1084 sys_mq_unlink(struct lwp *l, const struct sys_mq_unlink_args *uap, 1085 register_t *retval) 1086 { 1087 /* { 1088 syscallarg(const char *) name; 1089 } */ 1090 mqueue_t *mq; 1091 char *name; 1092 int error, refcnt = 0; 1093 1094 /* Get the name from the user-space */ 1095 name = kmem_alloc(MQ_NAMELEN, KM_SLEEP); 1096 error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL); 1097 if (error) { 1098 kmem_free(name, MQ_NAMELEN); 1099 return error; 1100 } 1101 1102 mutex_enter(&mqlist_lock); 1103 mq = mqueue_lookup(name); 1104 if (mq == NULL) { 1105 error = ENOENT; 1106 goto err; 1107 } 1108 KASSERT(mutex_owned(&mq->mq_mtx)); 1109 1110 /* Verify permissions. */ 1111 if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MQUEUE, 0, mq, 1112 NULL, NULL)) { 1113 mutex_exit(&mq->mq_mtx); 1114 error = EACCES; 1115 goto err; 1116 } 1117 1118 /* Remove and destroy if no references. */ 1119 LIST_REMOVE(mq, mq_list); 1120 refcnt = mq->mq_refcnt; 1121 if (refcnt) { 1122 /* Mark as unlinked, if there are references. */ 1123 mq->mq_attrib.mq_flags |= MQ_UNLINKED; 1124 } 1125 1126 /* Wake up waiters, if there are any. */ 1127 cv_broadcast(&mq->mq_send_cv); 1128 cv_broadcast(&mq->mq_recv_cv); 1129 1130 selnotify(&mq->mq_rsel, POLLHUP, 0); 1131 selnotify(&mq->mq_wsel, POLLHUP, 0); 1132 1133 mutex_exit(&mq->mq_mtx); 1134 err: 1135 mutex_exit(&mqlist_lock); 1136 /* 1137 * If last reference - destroy the message queue. Otherwise, 1138 * the last mq_close() call will do that. 1139 */ 1140 if (!error && refcnt == 0) { 1141 mqueue_destroy(mq); 1142 } 1143 kmem_free(name, MQ_NAMELEN); 1144 1145 return error; 1146 } 1147 1148 /* 1149 * System control nodes. 1150 */ 1151 static int 1152 mqueue_sysctl_init(void) 1153 { 1154 const struct sysctlnode *node = NULL; 1155 1156 mqsysctl_log = NULL; 1157 1158 sysctl_createv(&mqsysctl_log, 0, NULL, NULL, 1159 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 1160 CTLTYPE_INT, "posix_msg", 1161 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 1162 "Message Passing option to which the " 1163 "system attempts to conform"), 1164 NULL, _POSIX_MESSAGE_PASSING, NULL, 0, 1165 CTL_KERN, CTL_CREATE, CTL_EOL); 1166 sysctl_createv(&mqsysctl_log, 0, NULL, &node, 1167 CTLFLAG_PERMANENT, 1168 CTLTYPE_NODE, "mqueue", 1169 SYSCTL_DESCR("Message queue options"), 1170 NULL, 0, NULL, 0, 1171 CTL_KERN, CTL_CREATE, CTL_EOL); 1172 1173 if (node == NULL) 1174 return ENXIO; 1175 1176 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1177 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1178 CTLTYPE_INT, "mq_open_max", 1179 SYSCTL_DESCR("Maximal number of message queue descriptors " 1180 "that process could open"), 1181 NULL, 0, &mq_open_max, 0, 1182 CTL_CREATE, CTL_EOL); 1183 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1184 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1185 CTLTYPE_INT, "mq_prio_max", 1186 SYSCTL_DESCR("Maximal priority of the message"), 1187 NULL, 0, &mq_prio_max, 0, 1188 CTL_CREATE, CTL_EOL); 1189 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1190 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1191 CTLTYPE_INT, "mq_max_msgsize", 1192 SYSCTL_DESCR("Maximal allowed size of the message"), 1193 NULL, 0, &mq_max_msgsize, 0, 1194 CTL_CREATE, CTL_EOL); 1195 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1196 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1197 CTLTYPE_INT, "mq_def_maxmsg", 1198 SYSCTL_DESCR("Default maximal message count"), 1199 NULL, 0, &mq_def_maxmsg, 0, 1200 CTL_CREATE, CTL_EOL); 1201 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1202 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1203 CTLTYPE_INT, "mq_max_maxmsg", 1204 SYSCTL_DESCR("Maximal allowed message count"), 1205 NULL, 0, &mq_max_maxmsg, 0, 1206 CTL_CREATE, CTL_EOL); 1207 1208 return 0; 1209 } 1210 1211 /* 1212 * Debugging. 1213 */ 1214 #if defined(DDB) 1215 1216 void 1217 mqueue_print_list(void (*pr)(const char *, ...)) 1218 { 1219 struct mqueue *mq; 1220 1221 (*pr)("Global list of the message queues:\n"); 1222 (*pr)("%20s %10s %8s %8s %3s %4s %4s %4s\n", 1223 "Name", "Ptr", "Mode", "Flags", "Ref", 1224 "MaxMsg", "MsgSze", "CurMsg"); 1225 LIST_FOREACH(mq, &mqueue_head, mq_list) { 1226 (*pr)("%20s %10p %8x %8x %3u %6lu %6lu %6lu\n", 1227 mq->mq_name, mq, mq->mq_mode, 1228 mq->mq_attrib.mq_flags, mq->mq_refcnt, 1229 mq->mq_attrib.mq_maxmsg, mq->mq_attrib.mq_msgsize, 1230 mq->mq_attrib.mq_curmsgs); 1231 } 1232 } 1233 1234 #endif /* defined(DDB) */ 1235