1 /* $NetBSD: sys_mqueue.c,v 1.44 2019/04/16 01:02:41 martin Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2011 Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Implementation of POSIX message queues. 31 * Defined in the Base Definitions volume of IEEE Std 1003.1-2001. 32 * 33 * Locking 34 * 35 * Global list of message queues (mqueue_head) is protected by mqlist_lock. 36 * Each message queue and its members are protected by mqueue::mq_mtx. 37 * Note that proc_t::p_mqueue_cnt is updated atomically. 38 * 39 * Lock order: 40 * 41 * mqlist_lock -> 42 * mqueue::mq_mtx 43 */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: sys_mqueue.c,v 1.44 2019/04/16 01:02:41 martin Exp $"); 47 48 #include <sys/param.h> 49 #include <sys/types.h> 50 #include <sys/atomic.h> 51 52 #include <sys/file.h> 53 #include <sys/filedesc.h> 54 #include <sys/kauth.h> 55 #include <sys/lwp.h> 56 #include <sys/mqueue.h> 57 #include <sys/module.h> 58 #include <sys/poll.h> 59 #include <sys/select.h> 60 #include <sys/signal.h> 61 #include <sys/signalvar.h> 62 #include <sys/stat.h> 63 #include <sys/sysctl.h> 64 #include <sys/syscall.h> 65 #include <sys/syscallvar.h> 66 #include <sys/syscallargs.h> 67 68 #include <miscfs/genfs/genfs.h> 69 70 MODULE(MODULE_CLASS_MISC, mqueue, NULL); 71 72 /* System-wide limits. */ 73 static u_int mq_open_max = MQ_OPEN_MAX; 74 static u_int mq_prio_max = MQ_PRIO_MAX; 75 static u_int mq_max_msgsize = 16 * MQ_DEF_MSGSIZE; 76 static u_int mq_def_maxmsg = 32; 77 static u_int mq_max_maxmsg = 16 * 32; 78 79 static pool_cache_t mqmsg_cache __read_mostly; 80 static kmutex_t mqlist_lock __cacheline_aligned; 81 static LIST_HEAD(, mqueue) mqueue_head __cacheline_aligned; 82 static struct sysctllog * mqsysctl_log; 83 84 static kauth_listener_t mq_listener; 85 86 static int mqueue_sysinit(void); 87 static int mqueue_sysfini(bool); 88 static int mqueue_sysctl_init(void); 89 static int mq_poll_fop(file_t *, int); 90 static int mq_stat_fop(file_t *, struct stat *); 91 static int mq_close_fop(file_t *); 92 93 static const struct fileops mqops = { 94 .fo_name = "mq", 95 .fo_read = fbadop_read, 96 .fo_write = fbadop_write, 97 .fo_ioctl = fbadop_ioctl, 98 .fo_fcntl = fnullop_fcntl, 99 .fo_poll = mq_poll_fop, 100 .fo_stat = mq_stat_fop, 101 .fo_close = mq_close_fop, 102 .fo_kqfilter = fnullop_kqfilter, 103 .fo_restart = fnullop_restart, 104 }; 105 106 static const struct syscall_package mqueue_syscalls[] = { 107 { SYS_mq_open, 0, (sy_call_t *)sys_mq_open }, 108 { SYS_mq_close, 0, (sy_call_t *)sys_mq_close }, 109 { SYS_mq_unlink, 0, (sy_call_t *)sys_mq_unlink }, 110 { SYS_mq_getattr, 0, (sy_call_t *)sys_mq_getattr }, 111 { SYS_mq_setattr, 0, (sy_call_t *)sys_mq_setattr }, 112 { SYS_mq_notify, 0, (sy_call_t *)sys_mq_notify }, 113 { SYS_mq_send, 0, (sy_call_t *)sys_mq_send }, 114 { SYS_mq_receive, 0, (sy_call_t *)sys_mq_receive }, 115 { SYS___mq_timedsend50, 0, (sy_call_t *)sys___mq_timedsend50 }, 116 { SYS___mq_timedreceive50, 0, (sy_call_t *)sys___mq_timedreceive50 }, 117 { 0, 0, NULL } 118 }; 119 120 static int 121 mq_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 122 void *arg0, void *arg1, void *arg2, void *arg3) 123 { 124 mqueue_t *mq; 125 int result; 126 127 if (action != KAUTH_SYSTEM_MQUEUE) 128 return KAUTH_RESULT_DEFER; 129 130 result = KAUTH_RESULT_DEFER; 131 132 mq = arg1; 133 134 if (kauth_cred_geteuid(cred) == mq->mq_euid) 135 result = KAUTH_RESULT_ALLOW; 136 137 return result; 138 } 139 140 /* 141 * Initialisation and unloading of POSIX message queue subsystem. 142 */ 143 144 static int 145 mqueue_sysinit(void) 146 { 147 int error; 148 149 mqmsg_cache = pool_cache_init(MQ_DEF_MSGSIZE, coherency_unit, 150 0, 0, "mqmsgpl", NULL, IPL_NONE, NULL, NULL, NULL); 151 mutex_init(&mqlist_lock, MUTEX_DEFAULT, IPL_NONE); 152 LIST_INIT(&mqueue_head); 153 154 error = mqueue_sysctl_init(); 155 if (error) { 156 (void)mqueue_sysfini(false); 157 return error; 158 } 159 error = syscall_establish(NULL, mqueue_syscalls); 160 if (error) { 161 (void)mqueue_sysfini(false); 162 } 163 mq_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM, 164 mq_listener_cb, NULL); 165 return error; 166 } 167 168 static int 169 mqueue_sysfini(bool interface) 170 { 171 172 if (interface) { 173 int error; 174 bool inuse; 175 176 /* Stop syscall activity. */ 177 error = syscall_disestablish(NULL, mqueue_syscalls); 178 if (error) 179 return error; 180 /* Check if there are any message queues in use. */ 181 mutex_enter(&mqlist_lock); 182 inuse = !LIST_EMPTY(&mqueue_head); 183 mutex_exit(&mqlist_lock); 184 if (inuse) { 185 error = syscall_establish(NULL, mqueue_syscalls); 186 KASSERT(error == 0); 187 return EBUSY; 188 } 189 } 190 191 if (mqsysctl_log != NULL) 192 sysctl_teardown(&mqsysctl_log); 193 194 kauth_unlisten_scope(mq_listener); 195 196 mutex_destroy(&mqlist_lock); 197 pool_cache_destroy(mqmsg_cache); 198 return 0; 199 } 200 201 /* 202 * Module interface. 203 */ 204 static int 205 mqueue_modcmd(modcmd_t cmd, void *arg) 206 { 207 208 switch (cmd) { 209 case MODULE_CMD_INIT: 210 return mqueue_sysinit(); 211 case MODULE_CMD_FINI: 212 return mqueue_sysfini(true); 213 default: 214 return ENOTTY; 215 } 216 } 217 218 /* 219 * Free the message. 220 */ 221 static void 222 mqueue_freemsg(struct mq_msg *msg, const size_t size) 223 { 224 225 if (size > MQ_DEF_MSGSIZE) { 226 kmem_free(msg, size); 227 } else { 228 pool_cache_put(mqmsg_cache, msg); 229 } 230 } 231 232 /* 233 * Destroy the message queue. 234 */ 235 static void 236 mqueue_destroy(struct mqueue *mq) 237 { 238 struct mq_msg *msg; 239 size_t msz; 240 u_int i; 241 242 /* Note MQ_PQSIZE + 1. */ 243 for (i = 0; i <= MQ_PQSIZE; i++) { 244 while ((msg = TAILQ_FIRST(&mq->mq_head[i])) != NULL) { 245 TAILQ_REMOVE(&mq->mq_head[i], msg, msg_queue); 246 msz = sizeof(struct mq_msg) + msg->msg_len; 247 mqueue_freemsg(msg, msz); 248 } 249 } 250 if (mq->mq_name) { 251 kmem_free(mq->mq_name, MQ_NAMELEN); 252 } 253 seldestroy(&mq->mq_rsel); 254 seldestroy(&mq->mq_wsel); 255 cv_destroy(&mq->mq_send_cv); 256 cv_destroy(&mq->mq_recv_cv); 257 mutex_destroy(&mq->mq_mtx); 258 kmem_free(mq, sizeof(struct mqueue)); 259 } 260 261 /* 262 * mqueue_lookup: lookup for file name in general list of message queues. 263 * 264 * => locks the message queue on success 265 */ 266 static mqueue_t * 267 mqueue_lookup(const char *name) 268 { 269 mqueue_t *mq; 270 271 KASSERT(mutex_owned(&mqlist_lock)); 272 273 LIST_FOREACH(mq, &mqueue_head, mq_list) { 274 if (strncmp(mq->mq_name, name, MQ_NAMELEN) == 0) { 275 mutex_enter(&mq->mq_mtx); 276 return mq; 277 } 278 } 279 return NULL; 280 } 281 282 /* 283 * mqueue_get: get the mqueue from the descriptor. 284 * 285 * => locks the message queue, if found. 286 * => holds a reference on the file descriptor. 287 */ 288 int 289 mqueue_get(mqd_t mqd, int fflag, mqueue_t **mqret) 290 { 291 const int fd = (int)mqd; 292 mqueue_t *mq; 293 file_t *fp; 294 295 fp = fd_getfile(fd); 296 if (__predict_false(fp == NULL)) { 297 return EBADF; 298 } 299 if (__predict_false(fp->f_type != DTYPE_MQUEUE)) { 300 fd_putfile(fd); 301 return EBADF; 302 } 303 if (fflag && (fp->f_flag & fflag) == 0) { 304 fd_putfile(fd); 305 return EBADF; 306 } 307 mq = fp->f_mqueue; 308 mutex_enter(&mq->mq_mtx); 309 310 *mqret = mq; 311 return 0; 312 } 313 314 /* 315 * mqueue_linear_insert: perform linear insert according to the message 316 * priority into the reserved queue (MQ_PQRESQ). Reserved queue is a 317 * sorted list used only when mq_prio_max is increased via sysctl. 318 */ 319 static inline void 320 mqueue_linear_insert(struct mqueue *mq, struct mq_msg *msg) 321 { 322 struct mq_msg *mit; 323 324 TAILQ_FOREACH(mit, &mq->mq_head[MQ_PQRESQ], msg_queue) { 325 if (msg->msg_prio > mit->msg_prio) 326 break; 327 } 328 if (mit == NULL) { 329 TAILQ_INSERT_TAIL(&mq->mq_head[MQ_PQRESQ], msg, msg_queue); 330 } else { 331 TAILQ_INSERT_BEFORE(mit, msg, msg_queue); 332 } 333 } 334 335 static int 336 mq_stat_fop(file_t *fp, struct stat *st) 337 { 338 struct mqueue *mq = fp->f_mqueue; 339 340 memset(st, 0, sizeof(*st)); 341 342 mutex_enter(&mq->mq_mtx); 343 st->st_mode = mq->mq_mode; 344 st->st_uid = mq->mq_euid; 345 st->st_gid = mq->mq_egid; 346 st->st_atimespec = mq->mq_atime; 347 st->st_mtimespec = mq->mq_mtime; 348 st->st_ctimespec = st->st_birthtimespec = mq->mq_btime; 349 st->st_uid = kauth_cred_geteuid(fp->f_cred); 350 st->st_gid = kauth_cred_getegid(fp->f_cred); 351 mutex_exit(&mq->mq_mtx); 352 353 return 0; 354 } 355 356 static int 357 mq_poll_fop(file_t *fp, int events) 358 { 359 struct mqueue *mq = fp->f_mqueue; 360 struct mq_attr *mqattr; 361 int revents = 0; 362 363 mutex_enter(&mq->mq_mtx); 364 mqattr = &mq->mq_attrib; 365 if (events & (POLLIN | POLLRDNORM)) { 366 /* Ready for receiving, if there are messages in the queue. */ 367 if (mqattr->mq_curmsgs) 368 revents |= events & (POLLIN | POLLRDNORM); 369 else 370 selrecord(curlwp, &mq->mq_rsel); 371 } 372 if (events & (POLLOUT | POLLWRNORM)) { 373 /* Ready for sending, if the message queue is not full. */ 374 if (mqattr->mq_curmsgs < mqattr->mq_maxmsg) 375 revents |= events & (POLLOUT | POLLWRNORM); 376 else 377 selrecord(curlwp, &mq->mq_wsel); 378 } 379 mutex_exit(&mq->mq_mtx); 380 381 return revents; 382 } 383 384 static int 385 mq_close_fop(file_t *fp) 386 { 387 proc_t *p = curproc; 388 mqueue_t *mq = fp->f_mqueue; 389 bool destroy = false; 390 391 mutex_enter(&mq->mq_mtx); 392 KASSERT(mq->mq_refcnt > 0); 393 if (--mq->mq_refcnt == 0) { 394 /* Destroy if the last reference and unlinked. */ 395 destroy = (mq->mq_attrib.mq_flags & MQ_UNLINKED) != 0; 396 } 397 mutex_exit(&mq->mq_mtx); 398 399 if (destroy) { 400 mqueue_destroy(mq); 401 } 402 atomic_dec_uint(&p->p_mqueue_cnt); 403 return 0; 404 } 405 406 static int 407 mqueue_access(mqueue_t *mq, int access, kauth_cred_t cred) 408 { 409 mode_t acc_mode = 0; 410 411 /* Note the difference between VREAD/VWRITE and FREAD/FWRITE. */ 412 if (access & FREAD) { 413 acc_mode |= VREAD; 414 } 415 if (access & FWRITE) { 416 acc_mode |= VWRITE; 417 } 418 if (genfs_can_access(VNON, mq->mq_mode, mq->mq_euid, 419 mq->mq_egid, acc_mode, cred)) { 420 return EACCES; 421 } 422 return 0; 423 } 424 425 static int 426 mqueue_create(lwp_t *l, char *name, struct mq_attr *attr, mode_t mode, 427 int oflag, mqueue_t **mqret) 428 { 429 proc_t *p = l->l_proc; 430 struct cwdinfo *cwdi = p->p_cwdi; 431 mqueue_t *mq; 432 u_int i; 433 434 /* Empty name is invalid. */ 435 if (name[0] == '\0') { 436 return EINVAL; 437 } 438 439 /* Check for mqueue attributes. */ 440 if (attr) { 441 if (attr->mq_maxmsg <= 0 || attr->mq_maxmsg > mq_max_maxmsg || 442 attr->mq_msgsize <= 0 || 443 attr->mq_msgsize > mq_max_msgsize) { 444 return EINVAL; 445 } 446 attr->mq_curmsgs = 0; 447 } 448 449 /* 450 * Allocate new message queue, initialize data structures, copy the 451 * name attributes. Note that the initial reference is set here. 452 */ 453 mq = kmem_zalloc(sizeof(mqueue_t), KM_SLEEP); 454 455 mutex_init(&mq->mq_mtx, MUTEX_DEFAULT, IPL_NONE); 456 cv_init(&mq->mq_send_cv, "mqsendcv"); 457 cv_init(&mq->mq_recv_cv, "mqrecvcv"); 458 for (i = 0; i < (MQ_PQSIZE + 1); i++) { 459 TAILQ_INIT(&mq->mq_head[i]); 460 } 461 selinit(&mq->mq_rsel); 462 selinit(&mq->mq_wsel); 463 mq->mq_name = name; 464 mq->mq_refcnt = 1; 465 466 if (attr != NULL) { 467 memcpy(&mq->mq_attrib, attr, sizeof(struct mq_attr)); 468 } else { 469 memset(&mq->mq_attrib, 0, sizeof(struct mq_attr)); 470 mq->mq_attrib.mq_maxmsg = mq_def_maxmsg; 471 mq->mq_attrib.mq_msgsize = MQ_DEF_MSGSIZE - sizeof(struct mq_msg); 472 } 473 474 CTASSERT((O_MASK & (MQ_UNLINKED | MQ_RECEIVE)) == 0); 475 mq->mq_attrib.mq_flags = (O_MASK & oflag); 476 477 /* Store mode and effective UID with GID. */ 478 mq->mq_mode = ((mode & ~cwdi->cwdi_cmask) & ALLPERMS) & ~S_ISTXT; 479 mq->mq_euid = kauth_cred_geteuid(l->l_cred); 480 mq->mq_egid = kauth_cred_getegid(l->l_cred); 481 482 *mqret = mq; 483 return 0; 484 } 485 486 /* 487 * Helper function for mq_open() - note that "u_name" is a userland pointer, 488 * while "attr" is a kernel pointer! 489 */ 490 int 491 mq_handle_open(struct lwp *l, const char *u_name, int oflag, mode_t mode, 492 struct mq_attr *attr, register_t *retval) 493 { 494 struct proc *p = l->l_proc; 495 struct mqueue *mq, *mq_new = NULL; 496 int mqd, error; 497 file_t *fp; 498 char *name; 499 500 /* Get the name from the user-space. */ 501 name = kmem_alloc(MQ_NAMELEN, KM_SLEEP); 502 error = copyinstr(u_name, name, MQ_NAMELEN - 1, NULL); 503 if (error) { 504 kmem_free(name, MQ_NAMELEN); 505 return error; 506 } 507 508 /* Allocate file structure and descriptor. */ 509 error = fd_allocfile(&fp, &mqd); 510 if (error) { 511 kmem_free(name, MQ_NAMELEN); 512 return error; 513 } 514 515 /* Account and check for the limit. */ 516 if (atomic_inc_uint_nv(&p->p_mqueue_cnt) > mq_open_max) { 517 atomic_dec_uint(&p->p_mqueue_cnt); 518 error = EMFILE; 519 goto err; 520 } 521 522 fp->f_type = DTYPE_MQUEUE; 523 fp->f_flag = FFLAGS(oflag) & (FREAD | FWRITE); 524 fp->f_ops = &mqops; 525 526 if (oflag & O_CREAT) { 527 /* Create a new message queue. */ 528 error = mqueue_create(l, name, attr, mode, oflag, &mq_new); 529 if (error) { 530 goto err; 531 } 532 KASSERT(mq_new != NULL); 533 } 534 535 /* Lookup for a message queue with such name. */ 536 mutex_enter(&mqlist_lock); 537 mq = mqueue_lookup(name); 538 if (mq) { 539 KASSERT(mutex_owned(&mq->mq_mtx)); 540 mutex_exit(&mqlist_lock); 541 542 /* Check for exclusive create. */ 543 if (oflag & O_EXCL) { 544 mutex_exit(&mq->mq_mtx); 545 error = EEXIST; 546 goto err; 547 } 548 549 /* Verify permissions. */ 550 if (mqueue_access(mq, fp->f_flag, l->l_cred) != 0) { 551 mutex_exit(&mq->mq_mtx); 552 error = EACCES; 553 goto err; 554 } 555 556 /* If we have the access, add a new reference. */ 557 mq->mq_refcnt++; 558 mutex_exit(&mq->mq_mtx); 559 } else { 560 /* Fail if not found and not creating. */ 561 if ((oflag & O_CREAT) == 0) { 562 mutex_exit(&mqlist_lock); 563 KASSERT(mq_new == NULL); 564 error = ENOENT; 565 goto err; 566 } 567 568 /* Initial timestamps. */ 569 mq = mq_new; 570 getnanotime(&mq->mq_btime); 571 mq->mq_atime = mq->mq_mtime = mq->mq_btime; 572 573 /* 574 * Finally, insert message queue into the list. 575 * Note: it already has the initial reference. 576 */ 577 LIST_INSERT_HEAD(&mqueue_head, mq, mq_list); 578 mutex_exit(&mqlist_lock); 579 580 mq_new = NULL; 581 name = NULL; 582 } 583 KASSERT(mq != NULL); 584 fp->f_mqueue = mq; 585 fd_affix(p, fp, mqd); 586 *retval = mqd; 587 err: 588 if (error) { 589 fd_abort(p, fp, mqd); 590 } 591 if (mq_new) { 592 /* Note: will free the 'name'. */ 593 mqueue_destroy(mq_new); 594 } else if (name) { 595 kmem_free(name, MQ_NAMELEN); 596 } 597 return error; 598 } 599 600 /* 601 * General mqueue system calls. 602 */ 603 604 int 605 sys_mq_open(struct lwp *l, const struct sys_mq_open_args *uap, 606 register_t *retval) 607 { 608 /* { 609 syscallarg(const char *) name; 610 syscallarg(int) oflag; 611 syscallarg(mode_t) mode; 612 syscallarg(struct mq_attr) attr; 613 } */ 614 struct mq_attr *attr = NULL, a; 615 int error; 616 617 if ((SCARG(uap, oflag) & O_CREAT) != 0 && SCARG(uap, attr) != NULL) { 618 error = copyin(SCARG(uap, attr), &a, sizeof(a)); 619 if (error) 620 return error; 621 attr = &a; 622 } 623 624 return mq_handle_open(l, SCARG(uap, name), SCARG(uap, oflag), 625 SCARG(uap, mode), attr, retval); 626 } 627 628 int 629 sys_mq_close(struct lwp *l, const struct sys_mq_close_args *uap, 630 register_t *retval) 631 { 632 633 return sys_close(l, (const void *)uap, retval); 634 } 635 636 /* 637 * Primary mq_recv1() function. 638 */ 639 int 640 mq_recv1(mqd_t mqdes, void *msg_ptr, size_t msg_len, u_int *msg_prio, 641 struct timespec *ts, ssize_t *mlen) 642 { 643 struct mqueue *mq; 644 struct mq_msg *msg = NULL; 645 struct mq_attr *mqattr; 646 u_int idx; 647 int error; 648 649 error = mqueue_get(mqdes, FREAD, &mq); 650 if (error) { 651 return error; 652 } 653 getnanotime(&mq->mq_atime); 654 mqattr = &mq->mq_attrib; 655 656 /* Check the message size limits */ 657 if (msg_len < mqattr->mq_msgsize) { 658 error = EMSGSIZE; 659 goto error; 660 } 661 662 /* Check if queue is empty */ 663 while (mqattr->mq_curmsgs == 0) { 664 int t; 665 666 if (mqattr->mq_flags & O_NONBLOCK) { 667 error = EAGAIN; 668 goto error; 669 } 670 if (ts) { 671 error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, ts, &t, 672 NULL); 673 if (error) 674 goto error; 675 } else 676 t = 0; 677 /* 678 * Block until someone sends the message. 679 * While doing this, notification should not be sent. 680 */ 681 mqattr->mq_flags |= MQ_RECEIVE; 682 error = cv_timedwait_sig(&mq->mq_send_cv, &mq->mq_mtx, t); 683 mqattr->mq_flags &= ~MQ_RECEIVE; 684 if (error || (mqattr->mq_flags & MQ_UNLINKED)) { 685 error = (error == EWOULDBLOCK) ? ETIMEDOUT : EINTR; 686 goto error; 687 } 688 } 689 690 /* 691 * Find the highest priority message, and remove it from the queue. 692 * At first, reserved queue is checked, bitmap is next. 693 */ 694 msg = TAILQ_FIRST(&mq->mq_head[MQ_PQRESQ]); 695 if (__predict_true(msg == NULL)) { 696 idx = ffs(mq->mq_bitmap); 697 msg = TAILQ_FIRST(&mq->mq_head[idx]); 698 KASSERT(msg != NULL); 699 } else { 700 idx = MQ_PQRESQ; 701 } 702 TAILQ_REMOVE(&mq->mq_head[idx], msg, msg_queue); 703 704 /* Unmark the bit, if last message. */ 705 if (__predict_true(idx) && TAILQ_EMPTY(&mq->mq_head[idx])) { 706 KASSERT((MQ_PQSIZE - idx) == msg->msg_prio); 707 mq->mq_bitmap &= ~(1U << --idx); 708 } 709 710 /* Decrement the counter and signal waiter, if any */ 711 mqattr->mq_curmsgs--; 712 cv_signal(&mq->mq_recv_cv); 713 714 /* Ready for sending now */ 715 selnotify(&mq->mq_wsel, POLLOUT | POLLWRNORM, 0); 716 error: 717 mutex_exit(&mq->mq_mtx); 718 fd_putfile((int)mqdes); 719 if (error) 720 return error; 721 722 /* 723 * Copy the data to the user-space. 724 * Note: According to POSIX, no message should be removed from the 725 * queue in case of fail - this would be violated. 726 */ 727 *mlen = msg->msg_len; 728 error = copyout(msg->msg_ptr, msg_ptr, msg->msg_len); 729 if (error == 0 && msg_prio) 730 error = copyout(&msg->msg_prio, msg_prio, sizeof(unsigned)); 731 mqueue_freemsg(msg, sizeof(struct mq_msg) + msg->msg_len); 732 733 return error; 734 } 735 736 int 737 sys_mq_receive(struct lwp *l, const struct sys_mq_receive_args *uap, 738 register_t *retval) 739 { 740 /* { 741 syscallarg(mqd_t) mqdes; 742 syscallarg(char *) msg_ptr; 743 syscallarg(size_t) msg_len; 744 syscallarg(unsigned *) msg_prio; 745 } */ 746 ssize_t mlen; 747 int error; 748 749 error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 750 SCARG(uap, msg_len), SCARG(uap, msg_prio), NULL, &mlen); 751 if (error == 0) 752 *retval = mlen; 753 754 return error; 755 } 756 757 int 758 sys___mq_timedreceive50(struct lwp *l, 759 const struct sys___mq_timedreceive50_args *uap, register_t *retval) 760 { 761 /* { 762 syscallarg(mqd_t) mqdes; 763 syscallarg(char *) msg_ptr; 764 syscallarg(size_t) msg_len; 765 syscallarg(unsigned *) msg_prio; 766 syscallarg(const struct timespec *) abs_timeout; 767 } */ 768 struct timespec ts, *tsp; 769 ssize_t mlen; 770 int error; 771 772 /* Get and convert time value */ 773 if (SCARG(uap, abs_timeout)) { 774 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 775 if (error) 776 return error; 777 tsp = &ts; 778 } else { 779 tsp = NULL; 780 } 781 782 error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 783 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp, &mlen); 784 if (error == 0) 785 *retval = mlen; 786 787 return error; 788 } 789 790 /* 791 * Primary mq_send1() function. 792 */ 793 int 794 mq_send1(mqd_t mqdes, const char *msg_ptr, size_t msg_len, u_int msg_prio, 795 struct timespec *ts) 796 { 797 struct mqueue *mq; 798 struct mq_msg *msg; 799 struct mq_attr *mqattr; 800 struct proc *notify = NULL; 801 ksiginfo_t ksi; 802 size_t size; 803 int error; 804 805 /* Check the priority range */ 806 if (msg_prio >= mq_prio_max) 807 return EINVAL; 808 809 /* Allocate a new message */ 810 if (msg_len > mq_max_msgsize) 811 return EMSGSIZE; 812 size = sizeof(struct mq_msg) + msg_len; 813 if (size > mq_max_msgsize) 814 return EMSGSIZE; 815 816 if (size > MQ_DEF_MSGSIZE) { 817 msg = kmem_alloc(size, KM_SLEEP); 818 } else { 819 msg = pool_cache_get(mqmsg_cache, PR_WAITOK); 820 } 821 822 /* Get the data from user-space */ 823 error = copyin(msg_ptr, msg->msg_ptr, msg_len); 824 if (error) { 825 mqueue_freemsg(msg, size); 826 return error; 827 } 828 msg->msg_len = msg_len; 829 msg->msg_prio = msg_prio; 830 831 error = mqueue_get(mqdes, FWRITE, &mq); 832 if (error) { 833 mqueue_freemsg(msg, size); 834 return error; 835 } 836 getnanotime(&mq->mq_mtime); 837 mqattr = &mq->mq_attrib; 838 839 /* Check the message size limit */ 840 if (msg_len <= 0 || msg_len > mqattr->mq_msgsize) { 841 error = EMSGSIZE; 842 goto error; 843 } 844 845 /* Check if queue is full */ 846 while (mqattr->mq_curmsgs >= mqattr->mq_maxmsg) { 847 int t; 848 849 if (mqattr->mq_flags & O_NONBLOCK) { 850 error = EAGAIN; 851 goto error; 852 } 853 if (ts) { 854 error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, ts, &t, 855 NULL); 856 if (error) 857 goto error; 858 } else 859 t = 0; 860 /* Block until queue becomes available */ 861 error = cv_timedwait_sig(&mq->mq_recv_cv, &mq->mq_mtx, t); 862 if (error || (mqattr->mq_flags & MQ_UNLINKED)) { 863 error = (error == EWOULDBLOCK) ? ETIMEDOUT : error; 864 goto error; 865 } 866 } 867 KASSERT(mqattr->mq_curmsgs < mqattr->mq_maxmsg); 868 869 /* 870 * Insert message into the queue, according to the priority. 871 * Note the difference between index and priority. 872 */ 873 if (__predict_true(msg_prio < MQ_PQSIZE)) { 874 u_int idx = MQ_PQSIZE - msg_prio; 875 876 KASSERT(idx != MQ_PQRESQ); 877 TAILQ_INSERT_TAIL(&mq->mq_head[idx], msg, msg_queue); 878 mq->mq_bitmap |= (1U << --idx); 879 } else { 880 mqueue_linear_insert(mq, msg); 881 } 882 883 /* Check for the notify */ 884 if (mqattr->mq_curmsgs == 0 && mq->mq_notify_proc && 885 (mqattr->mq_flags & MQ_RECEIVE) == 0 && 886 mq->mq_sig_notify.sigev_notify == SIGEV_SIGNAL) { 887 /* Initialize the signal */ 888 KSI_INIT(&ksi); 889 ksi.ksi_signo = mq->mq_sig_notify.sigev_signo; 890 ksi.ksi_code = SI_MESGQ; 891 ksi.ksi_value = mq->mq_sig_notify.sigev_value; 892 /* Unregister the process */ 893 notify = mq->mq_notify_proc; 894 mq->mq_notify_proc = NULL; 895 } 896 897 /* Increment the counter and signal waiter, if any */ 898 mqattr->mq_curmsgs++; 899 cv_signal(&mq->mq_send_cv); 900 901 /* Ready for receiving now */ 902 selnotify(&mq->mq_rsel, POLLIN | POLLRDNORM, 0); 903 error: 904 mutex_exit(&mq->mq_mtx); 905 fd_putfile((int)mqdes); 906 907 if (error) { 908 mqueue_freemsg(msg, size); 909 } else if (notify) { 910 /* Send the notify, if needed */ 911 mutex_enter(proc_lock); 912 kpsignal(notify, &ksi, NULL); 913 mutex_exit(proc_lock); 914 } 915 return error; 916 } 917 918 int 919 sys_mq_send(struct lwp *l, const struct sys_mq_send_args *uap, 920 register_t *retval) 921 { 922 /* { 923 syscallarg(mqd_t) mqdes; 924 syscallarg(const char *) msg_ptr; 925 syscallarg(size_t) msg_len; 926 syscallarg(unsigned) msg_prio; 927 } */ 928 929 return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 930 SCARG(uap, msg_len), SCARG(uap, msg_prio), NULL); 931 } 932 933 int 934 sys___mq_timedsend50(struct lwp *l, const struct sys___mq_timedsend50_args *uap, 935 register_t *retval) 936 { 937 /* { 938 syscallarg(mqd_t) mqdes; 939 syscallarg(const char *) msg_ptr; 940 syscallarg(size_t) msg_len; 941 syscallarg(unsigned) msg_prio; 942 syscallarg(const struct timespec *) abs_timeout; 943 } */ 944 struct timespec ts, *tsp; 945 int error; 946 947 /* Get and convert time value */ 948 if (SCARG(uap, abs_timeout)) { 949 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 950 if (error) 951 return error; 952 tsp = &ts; 953 } else { 954 tsp = NULL; 955 } 956 957 return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 958 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp); 959 } 960 961 int 962 sys_mq_notify(struct lwp *l, const struct sys_mq_notify_args *uap, 963 register_t *retval) 964 { 965 /* { 966 syscallarg(mqd_t) mqdes; 967 syscallarg(const struct sigevent *) notification; 968 } */ 969 struct mqueue *mq; 970 struct sigevent sig; 971 int error; 972 973 if (SCARG(uap, notification)) { 974 /* Get the signal from user-space */ 975 error = copyin(SCARG(uap, notification), &sig, 976 sizeof(struct sigevent)); 977 if (error) 978 return error; 979 if (sig.sigev_notify == SIGEV_SIGNAL && 980 (sig.sigev_signo <=0 || sig.sigev_signo >= NSIG)) 981 return EINVAL; 982 } 983 984 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 985 if (error) { 986 return error; 987 } 988 if (SCARG(uap, notification)) { 989 /* Register notification: set the signal and target process */ 990 if (mq->mq_notify_proc == NULL) { 991 memcpy(&mq->mq_sig_notify, &sig, 992 sizeof(struct sigevent)); 993 mq->mq_notify_proc = l->l_proc; 994 } else { 995 /* Fail if someone else already registered */ 996 error = EBUSY; 997 } 998 } else { 999 /* Unregister the notification */ 1000 mq->mq_notify_proc = NULL; 1001 } 1002 mutex_exit(&mq->mq_mtx); 1003 fd_putfile((int)SCARG(uap, mqdes)); 1004 1005 return error; 1006 } 1007 1008 int 1009 sys_mq_getattr(struct lwp *l, const struct sys_mq_getattr_args *uap, 1010 register_t *retval) 1011 { 1012 /* { 1013 syscallarg(mqd_t) mqdes; 1014 syscallarg(struct mq_attr *) mqstat; 1015 } */ 1016 struct mqueue *mq; 1017 struct mq_attr attr; 1018 int error; 1019 1020 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 1021 if (error) { 1022 return error; 1023 } 1024 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1025 mutex_exit(&mq->mq_mtx); 1026 fd_putfile((int)SCARG(uap, mqdes)); 1027 1028 return copyout(&attr, SCARG(uap, mqstat), sizeof(struct mq_attr)); 1029 } 1030 1031 int 1032 sys_mq_setattr(struct lwp *l, const struct sys_mq_setattr_args *uap, 1033 register_t *retval) 1034 { 1035 /* { 1036 syscallarg(mqd_t) mqdes; 1037 syscallarg(const struct mq_attr *) mqstat; 1038 syscallarg(struct mq_attr *) omqstat; 1039 } */ 1040 struct mqueue *mq; 1041 struct mq_attr attr; 1042 int error, nonblock; 1043 1044 error = copyin(SCARG(uap, mqstat), &attr, sizeof(struct mq_attr)); 1045 if (error) 1046 return error; 1047 nonblock = (attr.mq_flags & O_NONBLOCK); 1048 1049 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 1050 if (error) { 1051 return error; 1052 } 1053 1054 /* Copy the old attributes, if needed */ 1055 if (SCARG(uap, omqstat)) { 1056 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1057 } 1058 1059 /* Ignore everything, except O_NONBLOCK */ 1060 if (nonblock) 1061 mq->mq_attrib.mq_flags |= O_NONBLOCK; 1062 else 1063 mq->mq_attrib.mq_flags &= ~O_NONBLOCK; 1064 1065 mutex_exit(&mq->mq_mtx); 1066 fd_putfile((int)SCARG(uap, mqdes)); 1067 1068 /* 1069 * Copy the data to the user-space. 1070 * Note: According to POSIX, the new attributes should not be set in 1071 * case of fail - this would be violated. 1072 */ 1073 if (SCARG(uap, omqstat)) 1074 error = copyout(&attr, SCARG(uap, omqstat), 1075 sizeof(struct mq_attr)); 1076 1077 return error; 1078 } 1079 1080 int 1081 sys_mq_unlink(struct lwp *l, const struct sys_mq_unlink_args *uap, 1082 register_t *retval) 1083 { 1084 /* { 1085 syscallarg(const char *) name; 1086 } */ 1087 mqueue_t *mq; 1088 char *name; 1089 int error, refcnt = 0; 1090 1091 /* Get the name from the user-space */ 1092 name = kmem_alloc(MQ_NAMELEN, KM_SLEEP); 1093 error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL); 1094 if (error) { 1095 kmem_free(name, MQ_NAMELEN); 1096 return error; 1097 } 1098 1099 mutex_enter(&mqlist_lock); 1100 mq = mqueue_lookup(name); 1101 if (mq == NULL) { 1102 error = ENOENT; 1103 goto err; 1104 } 1105 KASSERT(mutex_owned(&mq->mq_mtx)); 1106 1107 /* Verify permissions. */ 1108 if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MQUEUE, 0, mq, 1109 NULL, NULL)) { 1110 mutex_exit(&mq->mq_mtx); 1111 error = EACCES; 1112 goto err; 1113 } 1114 1115 /* Remove and destroy if no references. */ 1116 LIST_REMOVE(mq, mq_list); 1117 refcnt = mq->mq_refcnt; 1118 if (refcnt) { 1119 /* Mark as unlinked, if there are references. */ 1120 mq->mq_attrib.mq_flags |= MQ_UNLINKED; 1121 } 1122 1123 /* Wake up waiters, if there are any. */ 1124 cv_broadcast(&mq->mq_send_cv); 1125 cv_broadcast(&mq->mq_recv_cv); 1126 1127 selnotify(&mq->mq_rsel, POLLHUP, 0); 1128 selnotify(&mq->mq_wsel, POLLHUP, 0); 1129 1130 mutex_exit(&mq->mq_mtx); 1131 err: 1132 mutex_exit(&mqlist_lock); 1133 /* 1134 * If last reference - destroy the message queue. Otherwise, 1135 * the last mq_close() call will do that. 1136 */ 1137 if (!error && refcnt == 0) { 1138 mqueue_destroy(mq); 1139 } 1140 kmem_free(name, MQ_NAMELEN); 1141 1142 return error; 1143 } 1144 1145 /* 1146 * System control nodes. 1147 */ 1148 static int 1149 mqueue_sysctl_init(void) 1150 { 1151 const struct sysctlnode *node = NULL; 1152 1153 mqsysctl_log = NULL; 1154 1155 sysctl_createv(&mqsysctl_log, 0, NULL, NULL, 1156 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 1157 CTLTYPE_INT, "posix_msg", 1158 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 1159 "Message Passing option to which the " 1160 "system attempts to conform"), 1161 NULL, _POSIX_MESSAGE_PASSING, NULL, 0, 1162 CTL_KERN, CTL_CREATE, CTL_EOL); 1163 sysctl_createv(&mqsysctl_log, 0, NULL, &node, 1164 CTLFLAG_PERMANENT, 1165 CTLTYPE_NODE, "mqueue", 1166 SYSCTL_DESCR("Message queue options"), 1167 NULL, 0, NULL, 0, 1168 CTL_KERN, CTL_CREATE, CTL_EOL); 1169 1170 if (node == NULL) 1171 return ENXIO; 1172 1173 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1174 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1175 CTLTYPE_INT, "mq_open_max", 1176 SYSCTL_DESCR("Maximal number of message queue descriptors " 1177 "that process could open"), 1178 NULL, 0, &mq_open_max, 0, 1179 CTL_CREATE, CTL_EOL); 1180 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1181 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1182 CTLTYPE_INT, "mq_prio_max", 1183 SYSCTL_DESCR("Maximal priority of the message"), 1184 NULL, 0, &mq_prio_max, 0, 1185 CTL_CREATE, CTL_EOL); 1186 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1187 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1188 CTLTYPE_INT, "mq_max_msgsize", 1189 SYSCTL_DESCR("Maximal allowed size of the message"), 1190 NULL, 0, &mq_max_msgsize, 0, 1191 CTL_CREATE, CTL_EOL); 1192 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1193 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1194 CTLTYPE_INT, "mq_def_maxmsg", 1195 SYSCTL_DESCR("Default maximal message count"), 1196 NULL, 0, &mq_def_maxmsg, 0, 1197 CTL_CREATE, CTL_EOL); 1198 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1199 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1200 CTLTYPE_INT, "mq_max_maxmsg", 1201 SYSCTL_DESCR("Maximal allowed message count"), 1202 NULL, 0, &mq_max_maxmsg, 0, 1203 CTL_CREATE, CTL_EOL); 1204 1205 return 0; 1206 } 1207 1208 /* 1209 * Debugging. 1210 */ 1211 #if defined(DDB) 1212 1213 void 1214 mqueue_print_list(void (*pr)(const char *, ...)) 1215 { 1216 struct mqueue *mq; 1217 1218 (*pr)("Global list of the message queues:\n"); 1219 (*pr)("%20s %10s %8s %8s %3s %4s %4s %4s\n", 1220 "Name", "Ptr", "Mode", "Flags", "Ref", 1221 "MaxMsg", "MsgSze", "CurMsg"); 1222 LIST_FOREACH(mq, &mqueue_head, mq_list) { 1223 (*pr)("%20s %10p %8x %8x %3u %6lu %6lu %6lu\n", 1224 mq->mq_name, mq, mq->mq_mode, 1225 mq->mq_attrib.mq_flags, mq->mq_refcnt, 1226 mq->mq_attrib.mq_maxmsg, mq->mq_attrib.mq_msgsize, 1227 mq->mq_attrib.mq_curmsgs); 1228 } 1229 } 1230 1231 #endif /* defined(DDB) */ 1232