1 /* $NetBSD: sys_mqueue.c,v 1.43 2018/08/19 15:10:23 jakllsch Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2011 Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Implementation of POSIX message queues. 31 * Defined in the Base Definitions volume of IEEE Std 1003.1-2001. 32 * 33 * Locking 34 * 35 * Global list of message queues (mqueue_head) is protected by mqlist_lock. 36 * Each message queue and its members are protected by mqueue::mq_mtx. 37 * Note that proc_t::p_mqueue_cnt is updated atomically. 38 * 39 * Lock order: 40 * 41 * mqlist_lock -> 42 * mqueue::mq_mtx 43 */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: sys_mqueue.c,v 1.43 2018/08/19 15:10:23 jakllsch Exp $"); 47 48 #include <sys/param.h> 49 #include <sys/types.h> 50 #include <sys/atomic.h> 51 52 #include <sys/file.h> 53 #include <sys/filedesc.h> 54 #include <sys/kauth.h> 55 #include <sys/lwp.h> 56 #include <sys/mqueue.h> 57 #include <sys/module.h> 58 #include <sys/poll.h> 59 #include <sys/select.h> 60 #include <sys/signal.h> 61 #include <sys/signalvar.h> 62 #include <sys/stat.h> 63 #include <sys/sysctl.h> 64 #include <sys/syscall.h> 65 #include <sys/syscallvar.h> 66 #include <sys/syscallargs.h> 67 68 #include <miscfs/genfs/genfs.h> 69 70 MODULE(MODULE_CLASS_MISC, mqueue, NULL); 71 72 /* System-wide limits. */ 73 static u_int mq_open_max = MQ_OPEN_MAX; 74 static u_int mq_prio_max = MQ_PRIO_MAX; 75 static u_int mq_max_msgsize = 16 * MQ_DEF_MSGSIZE; 76 static u_int mq_def_maxmsg = 32; 77 static u_int mq_max_maxmsg = 16 * 32; 78 79 static pool_cache_t mqmsg_cache __read_mostly; 80 static kmutex_t mqlist_lock __cacheline_aligned; 81 static LIST_HEAD(, mqueue) mqueue_head __cacheline_aligned; 82 static struct sysctllog * mqsysctl_log; 83 84 static kauth_listener_t mq_listener; 85 86 static int mqueue_sysinit(void); 87 static int mqueue_sysfini(bool); 88 static int mqueue_sysctl_init(void); 89 static int mq_poll_fop(file_t *, int); 90 static int mq_stat_fop(file_t *, struct stat *); 91 static int mq_close_fop(file_t *); 92 93 static const struct fileops mqops = { 94 .fo_name = "mq", 95 .fo_read = fbadop_read, 96 .fo_write = fbadop_write, 97 .fo_ioctl = fbadop_ioctl, 98 .fo_fcntl = fnullop_fcntl, 99 .fo_poll = mq_poll_fop, 100 .fo_stat = mq_stat_fop, 101 .fo_close = mq_close_fop, 102 .fo_kqfilter = fnullop_kqfilter, 103 .fo_restart = fnullop_restart, 104 }; 105 106 static const struct syscall_package mqueue_syscalls[] = { 107 { SYS_mq_open, 0, (sy_call_t *)sys_mq_open }, 108 { SYS_mq_close, 0, (sy_call_t *)sys_mq_close }, 109 { SYS_mq_unlink, 0, (sy_call_t *)sys_mq_unlink }, 110 { SYS_mq_getattr, 0, (sy_call_t *)sys_mq_getattr }, 111 { SYS_mq_setattr, 0, (sy_call_t *)sys_mq_setattr }, 112 { SYS_mq_notify, 0, (sy_call_t *)sys_mq_notify }, 113 { SYS_mq_send, 0, (sy_call_t *)sys_mq_send }, 114 { SYS_mq_receive, 0, (sy_call_t *)sys_mq_receive }, 115 { SYS___mq_timedsend50, 0, (sy_call_t *)sys___mq_timedsend50 }, 116 { SYS___mq_timedreceive50, 0, (sy_call_t *)sys___mq_timedreceive50 }, 117 { 0, 0, NULL } 118 }; 119 120 static int 121 mq_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 122 void *arg0, void *arg1, void *arg2, void *arg3) 123 { 124 mqueue_t *mq; 125 int result; 126 127 if (action != KAUTH_SYSTEM_MQUEUE) 128 return KAUTH_RESULT_DEFER; 129 130 result = KAUTH_RESULT_DEFER; 131 132 mq = arg1; 133 134 if (kauth_cred_geteuid(cred) == mq->mq_euid) 135 result = KAUTH_RESULT_ALLOW; 136 137 return result; 138 } 139 140 /* 141 * Initialisation and unloading of POSIX message queue subsystem. 142 */ 143 144 static int 145 mqueue_sysinit(void) 146 { 147 int error; 148 149 mqmsg_cache = pool_cache_init(MQ_DEF_MSGSIZE, coherency_unit, 150 0, 0, "mqmsgpl", NULL, IPL_NONE, NULL, NULL, NULL); 151 mutex_init(&mqlist_lock, MUTEX_DEFAULT, IPL_NONE); 152 LIST_INIT(&mqueue_head); 153 154 error = mqueue_sysctl_init(); 155 if (error) { 156 (void)mqueue_sysfini(false); 157 return error; 158 } 159 error = syscall_establish(NULL, mqueue_syscalls); 160 if (error) { 161 (void)mqueue_sysfini(false); 162 } 163 mq_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM, 164 mq_listener_cb, NULL); 165 return error; 166 } 167 168 static int 169 mqueue_sysfini(bool interface) 170 { 171 172 if (interface) { 173 int error; 174 bool inuse; 175 176 /* Stop syscall activity. */ 177 error = syscall_disestablish(NULL, mqueue_syscalls); 178 if (error) 179 return error; 180 /* Check if there are any message queues in use. */ 181 mutex_enter(&mqlist_lock); 182 inuse = !LIST_EMPTY(&mqueue_head); 183 mutex_exit(&mqlist_lock); 184 if (inuse) { 185 error = syscall_establish(NULL, mqueue_syscalls); 186 KASSERT(error == 0); 187 return EBUSY; 188 } 189 } 190 191 if (mqsysctl_log != NULL) 192 sysctl_teardown(&mqsysctl_log); 193 194 kauth_unlisten_scope(mq_listener); 195 196 mutex_destroy(&mqlist_lock); 197 pool_cache_destroy(mqmsg_cache); 198 return 0; 199 } 200 201 /* 202 * Module interface. 203 */ 204 static int 205 mqueue_modcmd(modcmd_t cmd, void *arg) 206 { 207 208 switch (cmd) { 209 case MODULE_CMD_INIT: 210 return mqueue_sysinit(); 211 case MODULE_CMD_FINI: 212 return mqueue_sysfini(true); 213 default: 214 return ENOTTY; 215 } 216 } 217 218 /* 219 * Free the message. 220 */ 221 static void 222 mqueue_freemsg(struct mq_msg *msg, const size_t size) 223 { 224 225 if (size > MQ_DEF_MSGSIZE) { 226 kmem_free(msg, size); 227 } else { 228 pool_cache_put(mqmsg_cache, msg); 229 } 230 } 231 232 /* 233 * Destroy the message queue. 234 */ 235 static void 236 mqueue_destroy(struct mqueue *mq) 237 { 238 struct mq_msg *msg; 239 size_t msz; 240 u_int i; 241 242 /* Note MQ_PQSIZE + 1. */ 243 for (i = 0; i <= MQ_PQSIZE; i++) { 244 while ((msg = TAILQ_FIRST(&mq->mq_head[i])) != NULL) { 245 TAILQ_REMOVE(&mq->mq_head[i], msg, msg_queue); 246 msz = sizeof(struct mq_msg) + msg->msg_len; 247 mqueue_freemsg(msg, msz); 248 } 249 } 250 if (mq->mq_name) { 251 kmem_free(mq->mq_name, MQ_NAMELEN); 252 } 253 seldestroy(&mq->mq_rsel); 254 seldestroy(&mq->mq_wsel); 255 cv_destroy(&mq->mq_send_cv); 256 cv_destroy(&mq->mq_recv_cv); 257 mutex_destroy(&mq->mq_mtx); 258 kmem_free(mq, sizeof(struct mqueue)); 259 } 260 261 /* 262 * mqueue_lookup: lookup for file name in general list of message queues. 263 * 264 * => locks the message queue on success 265 */ 266 static mqueue_t * 267 mqueue_lookup(const char *name) 268 { 269 mqueue_t *mq; 270 271 KASSERT(mutex_owned(&mqlist_lock)); 272 273 LIST_FOREACH(mq, &mqueue_head, mq_list) { 274 if (strncmp(mq->mq_name, name, MQ_NAMELEN) == 0) { 275 mutex_enter(&mq->mq_mtx); 276 return mq; 277 } 278 } 279 return NULL; 280 } 281 282 /* 283 * mqueue_get: get the mqueue from the descriptor. 284 * 285 * => locks the message queue, if found. 286 * => holds a reference on the file descriptor. 287 */ 288 int 289 mqueue_get(mqd_t mqd, int fflag, mqueue_t **mqret) 290 { 291 const int fd = (int)mqd; 292 mqueue_t *mq; 293 file_t *fp; 294 295 fp = fd_getfile(fd); 296 if (__predict_false(fp == NULL)) { 297 return EBADF; 298 } 299 if (__predict_false(fp->f_type != DTYPE_MQUEUE)) { 300 fd_putfile(fd); 301 return EBADF; 302 } 303 if (fflag && (fp->f_flag & fflag) == 0) { 304 fd_putfile(fd); 305 return EBADF; 306 } 307 mq = fp->f_mqueue; 308 mutex_enter(&mq->mq_mtx); 309 310 *mqret = mq; 311 return 0; 312 } 313 314 /* 315 * mqueue_linear_insert: perform linear insert according to the message 316 * priority into the reserved queue (MQ_PQRESQ). Reserved queue is a 317 * sorted list used only when mq_prio_max is increased via sysctl. 318 */ 319 static inline void 320 mqueue_linear_insert(struct mqueue *mq, struct mq_msg *msg) 321 { 322 struct mq_msg *mit; 323 324 TAILQ_FOREACH(mit, &mq->mq_head[MQ_PQRESQ], msg_queue) { 325 if (msg->msg_prio > mit->msg_prio) 326 break; 327 } 328 if (mit == NULL) { 329 TAILQ_INSERT_TAIL(&mq->mq_head[MQ_PQRESQ], msg, msg_queue); 330 } else { 331 TAILQ_INSERT_BEFORE(mit, msg, msg_queue); 332 } 333 } 334 335 static int 336 mq_stat_fop(file_t *fp, struct stat *st) 337 { 338 struct mqueue *mq = fp->f_mqueue; 339 340 memset(st, 0, sizeof(*st)); 341 342 mutex_enter(&mq->mq_mtx); 343 st->st_mode = mq->mq_mode; 344 st->st_uid = mq->mq_euid; 345 st->st_gid = mq->mq_egid; 346 st->st_atimespec = mq->mq_atime; 347 st->st_mtimespec = mq->mq_mtime; 348 st->st_ctimespec = st->st_birthtimespec = mq->mq_btime; 349 st->st_uid = kauth_cred_geteuid(fp->f_cred); 350 st->st_gid = kauth_cred_getegid(fp->f_cred); 351 mutex_exit(&mq->mq_mtx); 352 353 return 0; 354 } 355 356 static int 357 mq_poll_fop(file_t *fp, int events) 358 { 359 struct mqueue *mq = fp->f_mqueue; 360 struct mq_attr *mqattr; 361 int revents = 0; 362 363 mutex_enter(&mq->mq_mtx); 364 mqattr = &mq->mq_attrib; 365 if (events & (POLLIN | POLLRDNORM)) { 366 /* Ready for receiving, if there are messages in the queue. */ 367 if (mqattr->mq_curmsgs) 368 revents |= events & (POLLIN | POLLRDNORM); 369 else 370 selrecord(curlwp, &mq->mq_rsel); 371 } 372 if (events & (POLLOUT | POLLWRNORM)) { 373 /* Ready for sending, if the message queue is not full. */ 374 if (mqattr->mq_curmsgs < mqattr->mq_maxmsg) 375 revents |= events & (POLLOUT | POLLWRNORM); 376 else 377 selrecord(curlwp, &mq->mq_wsel); 378 } 379 mutex_exit(&mq->mq_mtx); 380 381 return revents; 382 } 383 384 static int 385 mq_close_fop(file_t *fp) 386 { 387 proc_t *p = curproc; 388 mqueue_t *mq = fp->f_mqueue; 389 bool destroy = false; 390 391 mutex_enter(&mq->mq_mtx); 392 KASSERT(mq->mq_refcnt > 0); 393 if (--mq->mq_refcnt == 0) { 394 /* Destroy if the last reference and unlinked. */ 395 destroy = (mq->mq_attrib.mq_flags & MQ_UNLINKED) != 0; 396 } 397 mutex_exit(&mq->mq_mtx); 398 399 if (destroy) { 400 mqueue_destroy(mq); 401 } 402 atomic_dec_uint(&p->p_mqueue_cnt); 403 return 0; 404 } 405 406 static int 407 mqueue_access(mqueue_t *mq, int access, kauth_cred_t cred) 408 { 409 mode_t acc_mode = 0; 410 411 /* Note the difference between VREAD/VWRITE and FREAD/FWRITE. */ 412 if (access & FREAD) { 413 acc_mode |= VREAD; 414 } 415 if (access & FWRITE) { 416 acc_mode |= VWRITE; 417 } 418 if (genfs_can_access(VNON, mq->mq_mode, mq->mq_euid, 419 mq->mq_egid, acc_mode, cred)) { 420 return EACCES; 421 } 422 return 0; 423 } 424 425 static int 426 mqueue_create(lwp_t *l, char *name, struct mq_attr *attr, mode_t mode, 427 int oflag, mqueue_t **mqret) 428 { 429 proc_t *p = l->l_proc; 430 struct cwdinfo *cwdi = p->p_cwdi; 431 mqueue_t *mq; 432 u_int i; 433 434 /* Empty name is invalid. */ 435 if (name[0] == '\0') { 436 return EINVAL; 437 } 438 439 /* Check for mqueue attributes. */ 440 if (attr) { 441 if (attr->mq_maxmsg <= 0 || attr->mq_maxmsg > mq_max_maxmsg || 442 attr->mq_msgsize <= 0 || 443 attr->mq_msgsize > mq_max_msgsize) { 444 return EINVAL; 445 } 446 attr->mq_curmsgs = 0; 447 } 448 449 /* 450 * Allocate new message queue, initialize data structures, copy the 451 * name attributes. Note that the initial reference is set here. 452 */ 453 mq = kmem_zalloc(sizeof(mqueue_t), KM_SLEEP); 454 455 mutex_init(&mq->mq_mtx, MUTEX_DEFAULT, IPL_NONE); 456 cv_init(&mq->mq_send_cv, "mqsendcv"); 457 cv_init(&mq->mq_recv_cv, "mqrecvcv"); 458 for (i = 0; i < (MQ_PQSIZE + 1); i++) { 459 TAILQ_INIT(&mq->mq_head[i]); 460 } 461 selinit(&mq->mq_rsel); 462 selinit(&mq->mq_wsel); 463 mq->mq_name = name; 464 mq->mq_refcnt = 1; 465 466 if (attr != NULL) { 467 memcpy(&mq->mq_attrib, attr, sizeof(struct mq_attr)); 468 } else { 469 memset(&mq->mq_attrib, 0, sizeof(struct mq_attr)); 470 mq->mq_attrib.mq_maxmsg = mq_def_maxmsg; 471 mq->mq_attrib.mq_msgsize = MQ_DEF_MSGSIZE - sizeof(struct mq_msg); 472 } 473 474 CTASSERT((O_MASK & (MQ_UNLINKED | MQ_RECEIVE)) == 0); 475 mq->mq_attrib.mq_flags = (O_MASK & oflag); 476 477 /* Store mode and effective UID with GID. */ 478 mq->mq_mode = ((mode & ~cwdi->cwdi_cmask) & ALLPERMS) & ~S_ISTXT; 479 mq->mq_euid = kauth_cred_geteuid(l->l_cred); 480 mq->mq_egid = kauth_cred_getegid(l->l_cred); 481 482 *mqret = mq; 483 return 0; 484 } 485 486 /* 487 * Helper function for mq_open() - note that "u_name" is a userland pointer, 488 * while "attr" is a kernel pointer! 489 */ 490 int 491 mq_handle_open(struct lwp *l, const char *u_name, int oflag, mode_t mode, 492 struct mq_attr *attr, register_t *retval) 493 { 494 struct proc *p = l->l_proc; 495 struct mqueue *mq, *mq_new = NULL; 496 int mqd, error; 497 file_t *fp; 498 char *name; 499 500 /* Get the name from the user-space. */ 501 name = kmem_alloc(MQ_NAMELEN, KM_SLEEP); 502 error = copyinstr(u_name, name, MQ_NAMELEN - 1, NULL); 503 if (error) { 504 kmem_free(name, MQ_NAMELEN); 505 return error; 506 } 507 508 /* Allocate file structure and descriptor. */ 509 error = fd_allocfile(&fp, &mqd); 510 if (error) { 511 kmem_free(name, MQ_NAMELEN); 512 return error; 513 } 514 515 /* Account and check for the limit. */ 516 if (atomic_inc_uint_nv(&p->p_mqueue_cnt) > mq_open_max) { 517 atomic_dec_uint(&p->p_mqueue_cnt); 518 error = EMFILE; 519 goto err; 520 } 521 522 fp->f_type = DTYPE_MQUEUE; 523 fp->f_flag = FFLAGS(oflag) & (FREAD | FWRITE); 524 fp->f_ops = &mqops; 525 526 if (oflag & O_CREAT) { 527 /* Create a new message queue. */ 528 error = mqueue_create(l, name, attr, mode, oflag, &mq_new); 529 if (error) { 530 goto err; 531 } 532 KASSERT(mq_new != NULL); 533 } 534 535 /* Lookup for a message queue with such name. */ 536 mutex_enter(&mqlist_lock); 537 mq = mqueue_lookup(name); 538 if (mq) { 539 KASSERT(mutex_owned(&mq->mq_mtx)); 540 mutex_exit(&mqlist_lock); 541 542 /* Check for exclusive create. */ 543 if (oflag & O_EXCL) { 544 mutex_exit(&mq->mq_mtx); 545 error = EEXIST; 546 goto err; 547 } 548 549 /* Verify permissions. */ 550 if (mqueue_access(mq, fp->f_flag, l->l_cred) != 0) { 551 mutex_exit(&mq->mq_mtx); 552 error = EACCES; 553 goto err; 554 } 555 556 /* If we have the access, add a new reference. */ 557 mq->mq_refcnt++; 558 mutex_exit(&mq->mq_mtx); 559 } else { 560 /* Fail if not found and not creating. */ 561 if ((oflag & O_CREAT) == 0) { 562 mutex_exit(&mqlist_lock); 563 KASSERT(mq_new == NULL); 564 error = ENOENT; 565 goto err; 566 } 567 568 /* Initial timestamps. */ 569 mq = mq_new; 570 getnanotime(&mq->mq_btime); 571 mq->mq_atime = mq->mq_mtime = mq->mq_btime; 572 573 /* 574 * Finally, insert message queue into the list. 575 * Note: it already has the initial reference. 576 */ 577 LIST_INSERT_HEAD(&mqueue_head, mq, mq_list); 578 mutex_exit(&mqlist_lock); 579 580 mq_new = NULL; 581 name = NULL; 582 } 583 KASSERT(mq != NULL); 584 fp->f_mqueue = mq; 585 fd_affix(p, fp, mqd); 586 *retval = mqd; 587 err: 588 if (error) { 589 fd_abort(p, fp, mqd); 590 } 591 if (mq_new) { 592 /* Note: will free the 'name'. */ 593 mqueue_destroy(mq_new); 594 } else if (name) { 595 kmem_free(name, MQ_NAMELEN); 596 } 597 return error; 598 } 599 600 /* 601 * General mqueue system calls. 602 */ 603 604 int 605 sys_mq_open(struct lwp *l, const struct sys_mq_open_args *uap, 606 register_t *retval) 607 { 608 /* { 609 syscallarg(const char *) name; 610 syscallarg(int) oflag; 611 syscallarg(mode_t) mode; 612 syscallarg(struct mq_attr) attr; 613 } */ 614 struct mq_attr *attr = NULL, a; 615 int error; 616 617 if ((SCARG(uap, oflag) & O_CREAT) != 0 && SCARG(uap, attr) != NULL) { 618 error = copyin(SCARG(uap, attr), &a, sizeof(a)); 619 if (error) 620 return error; 621 attr = &a; 622 } 623 624 return mq_handle_open(l, SCARG(uap, name), SCARG(uap, oflag), 625 SCARG(uap, mode), attr, retval); 626 } 627 628 int 629 sys_mq_close(struct lwp *l, const struct sys_mq_close_args *uap, 630 register_t *retval) 631 { 632 633 return sys_close(l, (const void *)uap, retval); 634 } 635 636 /* 637 * Primary mq_recv1() function. 638 */ 639 int 640 mq_recv1(mqd_t mqdes, void *msg_ptr, size_t msg_len, u_int *msg_prio, 641 struct timespec *ts, ssize_t *mlen) 642 { 643 struct mqueue *mq; 644 struct mq_msg *msg = NULL; 645 struct mq_attr *mqattr; 646 u_int idx; 647 int error; 648 649 error = mqueue_get(mqdes, FREAD, &mq); 650 if (error) { 651 return error; 652 } 653 getnanotime(&mq->mq_atime); 654 mqattr = &mq->mq_attrib; 655 656 /* Check the message size limits */ 657 if (msg_len < mqattr->mq_msgsize) { 658 error = EMSGSIZE; 659 goto error; 660 } 661 662 /* Check if queue is empty */ 663 while (mqattr->mq_curmsgs == 0) { 664 int t; 665 666 if (mqattr->mq_flags & O_NONBLOCK) { 667 error = EAGAIN; 668 goto error; 669 } 670 if (ts) { 671 error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, ts, &t, 672 NULL); 673 if (error) 674 goto error; 675 } else 676 t = 0; 677 /* 678 * Block until someone sends the message. 679 * While doing this, notification should not be sent. 680 */ 681 mqattr->mq_flags |= MQ_RECEIVE; 682 error = cv_timedwait_sig(&mq->mq_send_cv, &mq->mq_mtx, t); 683 mqattr->mq_flags &= ~MQ_RECEIVE; 684 if (error || (mqattr->mq_flags & MQ_UNLINKED)) { 685 error = (error == EWOULDBLOCK) ? ETIMEDOUT : EINTR; 686 goto error; 687 } 688 } 689 690 /* 691 * Find the highest priority message, and remove it from the queue. 692 * At first, reserved queue is checked, bitmap is next. 693 */ 694 msg = TAILQ_FIRST(&mq->mq_head[MQ_PQRESQ]); 695 if (__predict_true(msg == NULL)) { 696 idx = ffs(mq->mq_bitmap); 697 msg = TAILQ_FIRST(&mq->mq_head[idx]); 698 KASSERT(msg != NULL); 699 } else { 700 idx = MQ_PQRESQ; 701 } 702 TAILQ_REMOVE(&mq->mq_head[idx], msg, msg_queue); 703 704 /* Unmark the bit, if last message. */ 705 if (__predict_true(idx) && TAILQ_EMPTY(&mq->mq_head[idx])) { 706 KASSERT((MQ_PQSIZE - idx) == msg->msg_prio); 707 mq->mq_bitmap &= ~(1U << --idx); 708 } 709 710 /* Decrement the counter and signal waiter, if any */ 711 mqattr->mq_curmsgs--; 712 cv_signal(&mq->mq_recv_cv); 713 714 /* Ready for sending now */ 715 selnotify(&mq->mq_wsel, POLLOUT | POLLWRNORM, 0); 716 error: 717 mutex_exit(&mq->mq_mtx); 718 fd_putfile((int)mqdes); 719 if (error) 720 return error; 721 722 /* 723 * Copy the data to the user-space. 724 * Note: According to POSIX, no message should be removed from the 725 * queue in case of fail - this would be violated. 726 */ 727 *mlen = msg->msg_len; 728 error = copyout(msg->msg_ptr, msg_ptr, msg->msg_len); 729 if (error == 0 && msg_prio) 730 error = copyout(&msg->msg_prio, msg_prio, sizeof(unsigned)); 731 mqueue_freemsg(msg, sizeof(struct mq_msg) + msg->msg_len); 732 733 return error; 734 } 735 736 int 737 sys_mq_receive(struct lwp *l, const struct sys_mq_receive_args *uap, 738 register_t *retval) 739 { 740 /* { 741 syscallarg(mqd_t) mqdes; 742 syscallarg(char *) msg_ptr; 743 syscallarg(size_t) msg_len; 744 syscallarg(unsigned *) msg_prio; 745 } */ 746 ssize_t mlen; 747 int error; 748 749 error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 750 SCARG(uap, msg_len), SCARG(uap, msg_prio), NULL, &mlen); 751 if (error == 0) 752 *retval = mlen; 753 754 return error; 755 } 756 757 int 758 sys___mq_timedreceive50(struct lwp *l, 759 const struct sys___mq_timedreceive50_args *uap, register_t *retval) 760 { 761 /* { 762 syscallarg(mqd_t) mqdes; 763 syscallarg(char *) msg_ptr; 764 syscallarg(size_t) msg_len; 765 syscallarg(unsigned *) msg_prio; 766 syscallarg(const struct timespec *) abs_timeout; 767 } */ 768 struct timespec ts, *tsp; 769 ssize_t mlen; 770 int error; 771 772 /* Get and convert time value */ 773 if (SCARG(uap, abs_timeout)) { 774 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 775 if (error) 776 return error; 777 tsp = &ts; 778 } else { 779 tsp = NULL; 780 } 781 782 error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 783 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp, &mlen); 784 if (error == 0) 785 *retval = mlen; 786 787 return error; 788 } 789 790 /* 791 * Primary mq_send1() function. 792 */ 793 int 794 mq_send1(mqd_t mqdes, const char *msg_ptr, size_t msg_len, u_int msg_prio, 795 struct timespec *ts) 796 { 797 struct mqueue *mq; 798 struct mq_msg *msg; 799 struct mq_attr *mqattr; 800 struct proc *notify = NULL; 801 ksiginfo_t ksi; 802 size_t size; 803 int error; 804 805 /* Check the priority range */ 806 if (msg_prio >= mq_prio_max) 807 return EINVAL; 808 809 /* Allocate a new message */ 810 size = sizeof(struct mq_msg) + msg_len; 811 if (size > mq_max_msgsize) 812 return EMSGSIZE; 813 814 if (size > MQ_DEF_MSGSIZE) { 815 msg = kmem_alloc(size, KM_SLEEP); 816 } else { 817 msg = pool_cache_get(mqmsg_cache, PR_WAITOK); 818 } 819 820 /* Get the data from user-space */ 821 error = copyin(msg_ptr, msg->msg_ptr, msg_len); 822 if (error) { 823 mqueue_freemsg(msg, size); 824 return error; 825 } 826 msg->msg_len = msg_len; 827 msg->msg_prio = msg_prio; 828 829 error = mqueue_get(mqdes, FWRITE, &mq); 830 if (error) { 831 mqueue_freemsg(msg, size); 832 return error; 833 } 834 getnanotime(&mq->mq_mtime); 835 mqattr = &mq->mq_attrib; 836 837 /* Check the message size limit */ 838 if (msg_len <= 0 || msg_len > mqattr->mq_msgsize) { 839 error = EMSGSIZE; 840 goto error; 841 } 842 843 /* Check if queue is full */ 844 while (mqattr->mq_curmsgs >= mqattr->mq_maxmsg) { 845 int t; 846 847 if (mqattr->mq_flags & O_NONBLOCK) { 848 error = EAGAIN; 849 goto error; 850 } 851 if (ts) { 852 error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, ts, &t, 853 NULL); 854 if (error) 855 goto error; 856 } else 857 t = 0; 858 /* Block until queue becomes available */ 859 error = cv_timedwait_sig(&mq->mq_recv_cv, &mq->mq_mtx, t); 860 if (error || (mqattr->mq_flags & MQ_UNLINKED)) { 861 error = (error == EWOULDBLOCK) ? ETIMEDOUT : error; 862 goto error; 863 } 864 } 865 KASSERT(mqattr->mq_curmsgs < mqattr->mq_maxmsg); 866 867 /* 868 * Insert message into the queue, according to the priority. 869 * Note the difference between index and priority. 870 */ 871 if (__predict_true(msg_prio < MQ_PQSIZE)) { 872 u_int idx = MQ_PQSIZE - msg_prio; 873 874 KASSERT(idx != MQ_PQRESQ); 875 TAILQ_INSERT_TAIL(&mq->mq_head[idx], msg, msg_queue); 876 mq->mq_bitmap |= (1U << --idx); 877 } else { 878 mqueue_linear_insert(mq, msg); 879 } 880 881 /* Check for the notify */ 882 if (mqattr->mq_curmsgs == 0 && mq->mq_notify_proc && 883 (mqattr->mq_flags & MQ_RECEIVE) == 0 && 884 mq->mq_sig_notify.sigev_notify == SIGEV_SIGNAL) { 885 /* Initialize the signal */ 886 KSI_INIT(&ksi); 887 ksi.ksi_signo = mq->mq_sig_notify.sigev_signo; 888 ksi.ksi_code = SI_MESGQ; 889 ksi.ksi_value = mq->mq_sig_notify.sigev_value; 890 /* Unregister the process */ 891 notify = mq->mq_notify_proc; 892 mq->mq_notify_proc = NULL; 893 } 894 895 /* Increment the counter and signal waiter, if any */ 896 mqattr->mq_curmsgs++; 897 cv_signal(&mq->mq_send_cv); 898 899 /* Ready for receiving now */ 900 selnotify(&mq->mq_rsel, POLLIN | POLLRDNORM, 0); 901 error: 902 mutex_exit(&mq->mq_mtx); 903 fd_putfile((int)mqdes); 904 905 if (error) { 906 mqueue_freemsg(msg, size); 907 } else if (notify) { 908 /* Send the notify, if needed */ 909 mutex_enter(proc_lock); 910 kpsignal(notify, &ksi, NULL); 911 mutex_exit(proc_lock); 912 } 913 return error; 914 } 915 916 int 917 sys_mq_send(struct lwp *l, const struct sys_mq_send_args *uap, 918 register_t *retval) 919 { 920 /* { 921 syscallarg(mqd_t) mqdes; 922 syscallarg(const char *) msg_ptr; 923 syscallarg(size_t) msg_len; 924 syscallarg(unsigned) msg_prio; 925 } */ 926 927 return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 928 SCARG(uap, msg_len), SCARG(uap, msg_prio), NULL); 929 } 930 931 int 932 sys___mq_timedsend50(struct lwp *l, const struct sys___mq_timedsend50_args *uap, 933 register_t *retval) 934 { 935 /* { 936 syscallarg(mqd_t) mqdes; 937 syscallarg(const char *) msg_ptr; 938 syscallarg(size_t) msg_len; 939 syscallarg(unsigned) msg_prio; 940 syscallarg(const struct timespec *) abs_timeout; 941 } */ 942 struct timespec ts, *tsp; 943 int error; 944 945 /* Get and convert time value */ 946 if (SCARG(uap, abs_timeout)) { 947 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 948 if (error) 949 return error; 950 tsp = &ts; 951 } else { 952 tsp = NULL; 953 } 954 955 return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 956 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp); 957 } 958 959 int 960 sys_mq_notify(struct lwp *l, const struct sys_mq_notify_args *uap, 961 register_t *retval) 962 { 963 /* { 964 syscallarg(mqd_t) mqdes; 965 syscallarg(const struct sigevent *) notification; 966 } */ 967 struct mqueue *mq; 968 struct sigevent sig; 969 int error; 970 971 if (SCARG(uap, notification)) { 972 /* Get the signal from user-space */ 973 error = copyin(SCARG(uap, notification), &sig, 974 sizeof(struct sigevent)); 975 if (error) 976 return error; 977 if (sig.sigev_notify == SIGEV_SIGNAL && 978 (sig.sigev_signo <=0 || sig.sigev_signo >= NSIG)) 979 return EINVAL; 980 } 981 982 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 983 if (error) { 984 return error; 985 } 986 if (SCARG(uap, notification)) { 987 /* Register notification: set the signal and target process */ 988 if (mq->mq_notify_proc == NULL) { 989 memcpy(&mq->mq_sig_notify, &sig, 990 sizeof(struct sigevent)); 991 mq->mq_notify_proc = l->l_proc; 992 } else { 993 /* Fail if someone else already registered */ 994 error = EBUSY; 995 } 996 } else { 997 /* Unregister the notification */ 998 mq->mq_notify_proc = NULL; 999 } 1000 mutex_exit(&mq->mq_mtx); 1001 fd_putfile((int)SCARG(uap, mqdes)); 1002 1003 return error; 1004 } 1005 1006 int 1007 sys_mq_getattr(struct lwp *l, const struct sys_mq_getattr_args *uap, 1008 register_t *retval) 1009 { 1010 /* { 1011 syscallarg(mqd_t) mqdes; 1012 syscallarg(struct mq_attr *) mqstat; 1013 } */ 1014 struct mqueue *mq; 1015 struct mq_attr attr; 1016 int error; 1017 1018 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 1019 if (error) { 1020 return error; 1021 } 1022 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1023 mutex_exit(&mq->mq_mtx); 1024 fd_putfile((int)SCARG(uap, mqdes)); 1025 1026 return copyout(&attr, SCARG(uap, mqstat), sizeof(struct mq_attr)); 1027 } 1028 1029 int 1030 sys_mq_setattr(struct lwp *l, const struct sys_mq_setattr_args *uap, 1031 register_t *retval) 1032 { 1033 /* { 1034 syscallarg(mqd_t) mqdes; 1035 syscallarg(const struct mq_attr *) mqstat; 1036 syscallarg(struct mq_attr *) omqstat; 1037 } */ 1038 struct mqueue *mq; 1039 struct mq_attr attr; 1040 int error, nonblock; 1041 1042 error = copyin(SCARG(uap, mqstat), &attr, sizeof(struct mq_attr)); 1043 if (error) 1044 return error; 1045 nonblock = (attr.mq_flags & O_NONBLOCK); 1046 1047 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 1048 if (error) { 1049 return error; 1050 } 1051 1052 /* Copy the old attributes, if needed */ 1053 if (SCARG(uap, omqstat)) { 1054 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1055 } 1056 1057 /* Ignore everything, except O_NONBLOCK */ 1058 if (nonblock) 1059 mq->mq_attrib.mq_flags |= O_NONBLOCK; 1060 else 1061 mq->mq_attrib.mq_flags &= ~O_NONBLOCK; 1062 1063 mutex_exit(&mq->mq_mtx); 1064 fd_putfile((int)SCARG(uap, mqdes)); 1065 1066 /* 1067 * Copy the data to the user-space. 1068 * Note: According to POSIX, the new attributes should not be set in 1069 * case of fail - this would be violated. 1070 */ 1071 if (SCARG(uap, omqstat)) 1072 error = copyout(&attr, SCARG(uap, omqstat), 1073 sizeof(struct mq_attr)); 1074 1075 return error; 1076 } 1077 1078 int 1079 sys_mq_unlink(struct lwp *l, const struct sys_mq_unlink_args *uap, 1080 register_t *retval) 1081 { 1082 /* { 1083 syscallarg(const char *) name; 1084 } */ 1085 mqueue_t *mq; 1086 char *name; 1087 int error, refcnt = 0; 1088 1089 /* Get the name from the user-space */ 1090 name = kmem_alloc(MQ_NAMELEN, KM_SLEEP); 1091 error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL); 1092 if (error) { 1093 kmem_free(name, MQ_NAMELEN); 1094 return error; 1095 } 1096 1097 mutex_enter(&mqlist_lock); 1098 mq = mqueue_lookup(name); 1099 if (mq == NULL) { 1100 error = ENOENT; 1101 goto err; 1102 } 1103 KASSERT(mutex_owned(&mq->mq_mtx)); 1104 1105 /* Verify permissions. */ 1106 if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MQUEUE, 0, mq, 1107 NULL, NULL)) { 1108 mutex_exit(&mq->mq_mtx); 1109 error = EACCES; 1110 goto err; 1111 } 1112 1113 /* Remove and destroy if no references. */ 1114 LIST_REMOVE(mq, mq_list); 1115 refcnt = mq->mq_refcnt; 1116 if (refcnt) { 1117 /* Mark as unlinked, if there are references. */ 1118 mq->mq_attrib.mq_flags |= MQ_UNLINKED; 1119 } 1120 1121 /* Wake up waiters, if there are any. */ 1122 cv_broadcast(&mq->mq_send_cv); 1123 cv_broadcast(&mq->mq_recv_cv); 1124 1125 selnotify(&mq->mq_rsel, POLLHUP, 0); 1126 selnotify(&mq->mq_wsel, POLLHUP, 0); 1127 1128 mutex_exit(&mq->mq_mtx); 1129 err: 1130 mutex_exit(&mqlist_lock); 1131 /* 1132 * If last reference - destroy the message queue. Otherwise, 1133 * the last mq_close() call will do that. 1134 */ 1135 if (!error && refcnt == 0) { 1136 mqueue_destroy(mq); 1137 } 1138 kmem_free(name, MQ_NAMELEN); 1139 1140 return error; 1141 } 1142 1143 /* 1144 * System control nodes. 1145 */ 1146 static int 1147 mqueue_sysctl_init(void) 1148 { 1149 const struct sysctlnode *node = NULL; 1150 1151 mqsysctl_log = NULL; 1152 1153 sysctl_createv(&mqsysctl_log, 0, NULL, NULL, 1154 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 1155 CTLTYPE_INT, "posix_msg", 1156 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 1157 "Message Passing option to which the " 1158 "system attempts to conform"), 1159 NULL, _POSIX_MESSAGE_PASSING, NULL, 0, 1160 CTL_KERN, CTL_CREATE, CTL_EOL); 1161 sysctl_createv(&mqsysctl_log, 0, NULL, &node, 1162 CTLFLAG_PERMANENT, 1163 CTLTYPE_NODE, "mqueue", 1164 SYSCTL_DESCR("Message queue options"), 1165 NULL, 0, NULL, 0, 1166 CTL_KERN, CTL_CREATE, CTL_EOL); 1167 1168 if (node == NULL) 1169 return ENXIO; 1170 1171 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1172 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1173 CTLTYPE_INT, "mq_open_max", 1174 SYSCTL_DESCR("Maximal number of message queue descriptors " 1175 "that process could open"), 1176 NULL, 0, &mq_open_max, 0, 1177 CTL_CREATE, CTL_EOL); 1178 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1179 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1180 CTLTYPE_INT, "mq_prio_max", 1181 SYSCTL_DESCR("Maximal priority of the message"), 1182 NULL, 0, &mq_prio_max, 0, 1183 CTL_CREATE, CTL_EOL); 1184 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1185 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1186 CTLTYPE_INT, "mq_max_msgsize", 1187 SYSCTL_DESCR("Maximal allowed size of the message"), 1188 NULL, 0, &mq_max_msgsize, 0, 1189 CTL_CREATE, CTL_EOL); 1190 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1191 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1192 CTLTYPE_INT, "mq_def_maxmsg", 1193 SYSCTL_DESCR("Default maximal message count"), 1194 NULL, 0, &mq_def_maxmsg, 0, 1195 CTL_CREATE, CTL_EOL); 1196 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1197 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1198 CTLTYPE_INT, "mq_max_maxmsg", 1199 SYSCTL_DESCR("Maximal allowed message count"), 1200 NULL, 0, &mq_max_maxmsg, 0, 1201 CTL_CREATE, CTL_EOL); 1202 1203 return 0; 1204 } 1205 1206 /* 1207 * Debugging. 1208 */ 1209 #if defined(DDB) 1210 1211 void 1212 mqueue_print_list(void (*pr)(const char *, ...)) 1213 { 1214 struct mqueue *mq; 1215 1216 (*pr)("Global list of the message queues:\n"); 1217 (*pr)("%20s %10s %8s %8s %3s %4s %4s %4s\n", 1218 "Name", "Ptr", "Mode", "Flags", "Ref", 1219 "MaxMsg", "MsgSze", "CurMsg"); 1220 LIST_FOREACH(mq, &mqueue_head, mq_list) { 1221 (*pr)("%20s %10p %8x %8x %3u %6lu %6lu %6lu\n", 1222 mq->mq_name, mq, mq->mq_mode, 1223 mq->mq_attrib.mq_flags, mq->mq_refcnt, 1224 mq->mq_attrib.mq_maxmsg, mq->mq_attrib.mq_msgsize, 1225 mq->mq_attrib.mq_curmsgs); 1226 } 1227 } 1228 1229 #endif /* defined(DDB) */ 1230