1 /* $NetBSD: sys_mqueue.c,v 1.45 2019/09/15 20:51:03 christos Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2011 Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Implementation of POSIX message queues. 31 * Defined in the Base Definitions volume of IEEE Std 1003.1-2001. 32 * 33 * Locking 34 * 35 * Global list of message queues (mqueue_head) is protected by mqlist_lock. 36 * Each message queue and its members are protected by mqueue::mq_mtx. 37 * Note that proc_t::p_mqueue_cnt is updated atomically. 38 * 39 * Lock order: 40 * 41 * mqlist_lock -> 42 * mqueue::mq_mtx 43 */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: sys_mqueue.c,v 1.45 2019/09/15 20:51:03 christos Exp $"); 47 48 #include <sys/param.h> 49 #include <sys/types.h> 50 #include <sys/atomic.h> 51 52 #include <sys/file.h> 53 #include <sys/filedesc.h> 54 #include <sys/kauth.h> 55 #include <sys/lwp.h> 56 #include <sys/mqueue.h> 57 #include <sys/module.h> 58 #include <sys/poll.h> 59 #include <sys/select.h> 60 #include <sys/signal.h> 61 #include <sys/signalvar.h> 62 #include <sys/stat.h> 63 #include <sys/sysctl.h> 64 #include <sys/syscall.h> 65 #include <sys/syscallvar.h> 66 #include <sys/syscallargs.h> 67 68 #include <miscfs/genfs/genfs.h> 69 70 MODULE(MODULE_CLASS_MISC, mqueue, NULL); 71 72 /* System-wide limits. */ 73 static u_int mq_open_max = MQ_OPEN_MAX; 74 static u_int mq_prio_max = MQ_PRIO_MAX; 75 static u_int mq_max_msgsize = 16 * MQ_DEF_MSGSIZE; 76 static u_int mq_def_maxmsg = 32; 77 static u_int mq_max_maxmsg = 16 * 32; 78 79 static pool_cache_t mqmsg_cache __read_mostly; 80 static kmutex_t mqlist_lock __cacheline_aligned; 81 static LIST_HEAD(, mqueue) mqueue_head __cacheline_aligned; 82 static struct sysctllog * mqsysctl_log; 83 84 static kauth_listener_t mq_listener; 85 86 static int mqueue_sysinit(void); 87 static int mqueue_sysfini(bool); 88 static int mqueue_sysctl_init(void); 89 static int mq_poll_fop(file_t *, int); 90 static int mq_stat_fop(file_t *, struct stat *); 91 static int mq_close_fop(file_t *); 92 93 static const struct fileops mqops = { 94 .fo_name = "mq", 95 .fo_read = fbadop_read, 96 .fo_write = fbadop_write, 97 .fo_ioctl = fbadop_ioctl, 98 .fo_fcntl = fnullop_fcntl, 99 .fo_poll = mq_poll_fop, 100 .fo_stat = mq_stat_fop, 101 .fo_close = mq_close_fop, 102 .fo_kqfilter = fnullop_kqfilter, 103 .fo_restart = fnullop_restart, 104 }; 105 106 static const struct syscall_package mqueue_syscalls[] = { 107 { SYS_mq_open, 0, (sy_call_t *)sys_mq_open }, 108 { SYS_mq_close, 0, (sy_call_t *)sys_mq_close }, 109 { SYS_mq_unlink, 0, (sy_call_t *)sys_mq_unlink }, 110 { SYS_mq_getattr, 0, (sy_call_t *)sys_mq_getattr }, 111 { SYS_mq_setattr, 0, (sy_call_t *)sys_mq_setattr }, 112 { SYS_mq_notify, 0, (sy_call_t *)sys_mq_notify }, 113 { SYS_mq_send, 0, (sy_call_t *)sys_mq_send }, 114 { SYS_mq_receive, 0, (sy_call_t *)sys_mq_receive }, 115 { SYS___mq_timedsend50, 0, (sy_call_t *)sys___mq_timedsend50 }, 116 { SYS___mq_timedreceive50, 0, (sy_call_t *)sys___mq_timedreceive50 }, 117 { 0, 0, NULL } 118 }; 119 120 static int 121 mq_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 122 void *arg0, void *arg1, void *arg2, void *arg3) 123 { 124 mqueue_t *mq; 125 int result; 126 127 if (action != KAUTH_SYSTEM_MQUEUE) 128 return KAUTH_RESULT_DEFER; 129 130 result = KAUTH_RESULT_DEFER; 131 132 mq = arg1; 133 134 if (kauth_cred_geteuid(cred) == mq->mq_euid) 135 result = KAUTH_RESULT_ALLOW; 136 137 return result; 138 } 139 140 /* 141 * Initialisation and unloading of POSIX message queue subsystem. 142 */ 143 144 static int 145 mqueue_sysinit(void) 146 { 147 int error; 148 149 mqmsg_cache = pool_cache_init(MQ_DEF_MSGSIZE, coherency_unit, 150 0, 0, "mqmsgpl", NULL, IPL_NONE, NULL, NULL, NULL); 151 mutex_init(&mqlist_lock, MUTEX_DEFAULT, IPL_NONE); 152 LIST_INIT(&mqueue_head); 153 154 error = mqueue_sysctl_init(); 155 if (error) { 156 (void)mqueue_sysfini(false); 157 return error; 158 } 159 error = syscall_establish(NULL, mqueue_syscalls); 160 if (error) { 161 (void)mqueue_sysfini(false); 162 } 163 mq_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM, 164 mq_listener_cb, NULL); 165 return error; 166 } 167 168 static int 169 mqueue_sysfini(bool interface) 170 { 171 172 if (interface) { 173 int error; 174 bool inuse; 175 176 /* Stop syscall activity. */ 177 error = syscall_disestablish(NULL, mqueue_syscalls); 178 if (error) 179 return error; 180 /* Check if there are any message queues in use. */ 181 mutex_enter(&mqlist_lock); 182 inuse = !LIST_EMPTY(&mqueue_head); 183 mutex_exit(&mqlist_lock); 184 if (inuse) { 185 error = syscall_establish(NULL, mqueue_syscalls); 186 KASSERT(error == 0); 187 return EBUSY; 188 } 189 } 190 191 if (mqsysctl_log != NULL) 192 sysctl_teardown(&mqsysctl_log); 193 194 kauth_unlisten_scope(mq_listener); 195 196 mutex_destroy(&mqlist_lock); 197 pool_cache_destroy(mqmsg_cache); 198 return 0; 199 } 200 201 /* 202 * Module interface. 203 */ 204 static int 205 mqueue_modcmd(modcmd_t cmd, void *arg) 206 { 207 208 switch (cmd) { 209 case MODULE_CMD_INIT: 210 return mqueue_sysinit(); 211 case MODULE_CMD_FINI: 212 return mqueue_sysfini(true); 213 default: 214 return ENOTTY; 215 } 216 } 217 218 /* 219 * Free the message. 220 */ 221 static void 222 mqueue_freemsg(struct mq_msg *msg, const size_t size) 223 { 224 225 if (size > MQ_DEF_MSGSIZE) { 226 kmem_free(msg, size); 227 } else { 228 pool_cache_put(mqmsg_cache, msg); 229 } 230 } 231 232 /* 233 * Destroy the message queue. 234 */ 235 static void 236 mqueue_destroy(struct mqueue *mq) 237 { 238 struct mq_msg *msg; 239 size_t msz; 240 u_int i; 241 242 /* Note MQ_PQSIZE + 1. */ 243 for (i = 0; i <= MQ_PQSIZE; i++) { 244 while ((msg = TAILQ_FIRST(&mq->mq_head[i])) != NULL) { 245 TAILQ_REMOVE(&mq->mq_head[i], msg, msg_queue); 246 msz = sizeof(struct mq_msg) + msg->msg_len; 247 mqueue_freemsg(msg, msz); 248 } 249 } 250 if (mq->mq_name) { 251 kmem_free(mq->mq_name, MQ_NAMELEN); 252 } 253 seldestroy(&mq->mq_rsel); 254 seldestroy(&mq->mq_wsel); 255 cv_destroy(&mq->mq_send_cv); 256 cv_destroy(&mq->mq_recv_cv); 257 mutex_destroy(&mq->mq_mtx); 258 kmem_free(mq, sizeof(struct mqueue)); 259 } 260 261 /* 262 * mqueue_lookup: lookup for file name in general list of message queues. 263 * 264 * => locks the message queue on success 265 */ 266 static mqueue_t * 267 mqueue_lookup(const char *name) 268 { 269 mqueue_t *mq; 270 271 KASSERT(mutex_owned(&mqlist_lock)); 272 273 LIST_FOREACH(mq, &mqueue_head, mq_list) { 274 if (strncmp(mq->mq_name, name, MQ_NAMELEN) == 0) { 275 mutex_enter(&mq->mq_mtx); 276 return mq; 277 } 278 } 279 return NULL; 280 } 281 282 /* 283 * mqueue_get: get the mqueue from the descriptor. 284 * 285 * => locks the message queue, if found. 286 * => holds a reference on the file descriptor. 287 */ 288 int 289 mqueue_get(mqd_t mqd, int fflag, mqueue_t **mqret) 290 { 291 const int fd = (int)mqd; 292 mqueue_t *mq; 293 file_t *fp; 294 295 fp = fd_getfile(fd); 296 if (__predict_false(fp == NULL)) { 297 return EBADF; 298 } 299 if (__predict_false(fp->f_type != DTYPE_MQUEUE)) { 300 fd_putfile(fd); 301 return EBADF; 302 } 303 if (fflag && (fp->f_flag & fflag) == 0) { 304 fd_putfile(fd); 305 return EBADF; 306 } 307 mq = fp->f_mqueue; 308 mutex_enter(&mq->mq_mtx); 309 310 *mqret = mq; 311 return 0; 312 } 313 314 /* 315 * mqueue_linear_insert: perform linear insert according to the message 316 * priority into the reserved queue (MQ_PQRESQ). Reserved queue is a 317 * sorted list used only when mq_prio_max is increased via sysctl. 318 */ 319 static inline void 320 mqueue_linear_insert(struct mqueue *mq, struct mq_msg *msg) 321 { 322 struct mq_msg *mit; 323 324 TAILQ_FOREACH(mit, &mq->mq_head[MQ_PQRESQ], msg_queue) { 325 if (msg->msg_prio > mit->msg_prio) 326 break; 327 } 328 if (mit == NULL) { 329 TAILQ_INSERT_TAIL(&mq->mq_head[MQ_PQRESQ], msg, msg_queue); 330 } else { 331 TAILQ_INSERT_BEFORE(mit, msg, msg_queue); 332 } 333 } 334 335 static int 336 mq_stat_fop(file_t *fp, struct stat *st) 337 { 338 struct mqueue *mq = fp->f_mqueue; 339 340 memset(st, 0, sizeof(*st)); 341 342 mutex_enter(&mq->mq_mtx); 343 st->st_mode = mq->mq_mode; 344 st->st_uid = mq->mq_euid; 345 st->st_gid = mq->mq_egid; 346 st->st_atimespec = mq->mq_atime; 347 st->st_mtimespec = mq->mq_mtime; 348 st->st_ctimespec = st->st_birthtimespec = mq->mq_btime; 349 st->st_uid = kauth_cred_geteuid(fp->f_cred); 350 st->st_gid = kauth_cred_getegid(fp->f_cred); 351 mutex_exit(&mq->mq_mtx); 352 353 return 0; 354 } 355 356 static int 357 mq_poll_fop(file_t *fp, int events) 358 { 359 struct mqueue *mq = fp->f_mqueue; 360 struct mq_attr *mqattr; 361 int revents = 0; 362 363 mutex_enter(&mq->mq_mtx); 364 mqattr = &mq->mq_attrib; 365 if (events & (POLLIN | POLLRDNORM)) { 366 /* Ready for receiving, if there are messages in the queue. */ 367 if (mqattr->mq_curmsgs) 368 revents |= events & (POLLIN | POLLRDNORM); 369 else 370 selrecord(curlwp, &mq->mq_rsel); 371 } 372 if (events & (POLLOUT | POLLWRNORM)) { 373 /* Ready for sending, if the message queue is not full. */ 374 if (mqattr->mq_curmsgs < mqattr->mq_maxmsg) 375 revents |= events & (POLLOUT | POLLWRNORM); 376 else 377 selrecord(curlwp, &mq->mq_wsel); 378 } 379 mutex_exit(&mq->mq_mtx); 380 381 return revents; 382 } 383 384 static int 385 mq_close_fop(file_t *fp) 386 { 387 proc_t *p = curproc; 388 mqueue_t *mq = fp->f_mqueue; 389 bool destroy = false; 390 391 mutex_enter(&mq->mq_mtx); 392 KASSERT(mq->mq_refcnt > 0); 393 if (--mq->mq_refcnt == 0) { 394 /* Destroy if the last reference and unlinked. */ 395 destroy = (mq->mq_attrib.mq_flags & MQ_UNLINKED) != 0; 396 } 397 mutex_exit(&mq->mq_mtx); 398 399 if (destroy) { 400 mqueue_destroy(mq); 401 } 402 atomic_dec_uint(&p->p_mqueue_cnt); 403 return 0; 404 } 405 406 static int 407 mqueue_access(mqueue_t *mq, int access, kauth_cred_t cred) 408 { 409 mode_t acc_mode = 0; 410 411 /* Note the difference between VREAD/VWRITE and FREAD/FWRITE. */ 412 if (access & FREAD) { 413 acc_mode |= VREAD; 414 } 415 if (access & FWRITE) { 416 acc_mode |= VWRITE; 417 } 418 if (genfs_can_access(VNON, mq->mq_mode, mq->mq_euid, 419 mq->mq_egid, acc_mode, cred)) { 420 return EACCES; 421 } 422 return 0; 423 } 424 425 static int 426 mqueue_create(lwp_t *l, char *name, struct mq_attr *attr, mode_t mode, 427 int oflag, mqueue_t **mqret) 428 { 429 proc_t *p = l->l_proc; 430 struct cwdinfo *cwdi = p->p_cwdi; 431 mqueue_t *mq; 432 u_int i; 433 434 /* Empty name is invalid. */ 435 if (name[0] == '\0') { 436 return EINVAL; 437 } 438 439 /* Check for mqueue attributes. */ 440 if (attr) { 441 if (attr->mq_maxmsg <= 0 || attr->mq_maxmsg > mq_max_maxmsg || 442 attr->mq_msgsize <= 0 || 443 attr->mq_msgsize > mq_max_msgsize) { 444 return EINVAL; 445 } 446 attr->mq_curmsgs = 0; 447 } 448 449 /* 450 * Allocate new message queue, initialize data structures, copy the 451 * name attributes. Note that the initial reference is set here. 452 */ 453 mq = kmem_zalloc(sizeof(mqueue_t), KM_SLEEP); 454 455 mutex_init(&mq->mq_mtx, MUTEX_DEFAULT, IPL_NONE); 456 cv_init(&mq->mq_send_cv, "mqsendcv"); 457 cv_init(&mq->mq_recv_cv, "mqrecvcv"); 458 for (i = 0; i < (MQ_PQSIZE + 1); i++) { 459 TAILQ_INIT(&mq->mq_head[i]); 460 } 461 selinit(&mq->mq_rsel); 462 selinit(&mq->mq_wsel); 463 mq->mq_name = name; 464 mq->mq_refcnt = 1; 465 466 if (attr != NULL) { 467 memcpy(&mq->mq_attrib, attr, sizeof(struct mq_attr)); 468 } else { 469 memset(&mq->mq_attrib, 0, sizeof(struct mq_attr)); 470 mq->mq_attrib.mq_maxmsg = mq_def_maxmsg; 471 mq->mq_attrib.mq_msgsize = MQ_DEF_MSGSIZE - sizeof(struct mq_msg); 472 } 473 474 CTASSERT((O_MASK & (MQ_UNLINKED | MQ_RECEIVE)) == 0); 475 mq->mq_attrib.mq_flags = (O_MASK & oflag); 476 477 /* Store mode and effective UID with GID. */ 478 mq->mq_mode = ((mode & ~cwdi->cwdi_cmask) & ALLPERMS) & ~S_ISTXT; 479 mq->mq_euid = kauth_cred_geteuid(l->l_cred); 480 mq->mq_egid = kauth_cred_getegid(l->l_cred); 481 482 *mqret = mq; 483 return 0; 484 } 485 486 /* 487 * Helper function for mq_open() - note that "u_name" is a userland pointer, 488 * while "attr" is a kernel pointer! 489 */ 490 int 491 mq_handle_open(struct lwp *l, const char *u_name, int oflag, mode_t mode, 492 struct mq_attr *attr, register_t *retval) 493 { 494 struct proc *p = l->l_proc; 495 struct mqueue *mq, *mq_new = NULL; 496 int mqd, error; 497 file_t *fp; 498 char *name; 499 500 /* Get the name from the user-space. */ 501 name = kmem_alloc(MQ_NAMELEN, KM_SLEEP); 502 error = copyinstr(u_name, name, MQ_NAMELEN - 1, NULL); 503 if (error) { 504 kmem_free(name, MQ_NAMELEN); 505 return error; 506 } 507 508 /* Allocate file structure and descriptor. */ 509 error = fd_allocfile(&fp, &mqd); 510 if (error) { 511 kmem_free(name, MQ_NAMELEN); 512 return error; 513 } 514 515 /* Account and check for the limit. */ 516 if (atomic_inc_uint_nv(&p->p_mqueue_cnt) > mq_open_max) { 517 atomic_dec_uint(&p->p_mqueue_cnt); 518 error = EMFILE; 519 goto err; 520 } 521 522 fp->f_type = DTYPE_MQUEUE; 523 fp->f_flag = FFLAGS(oflag) & (FREAD | FWRITE); 524 fp->f_ops = &mqops; 525 526 if (oflag & O_CREAT) { 527 /* Create a new message queue. */ 528 error = mqueue_create(l, name, attr, mode, oflag, &mq_new); 529 if (error) { 530 goto err; 531 } 532 KASSERT(mq_new != NULL); 533 } 534 535 /* Lookup for a message queue with such name. */ 536 mutex_enter(&mqlist_lock); 537 mq = mqueue_lookup(name); 538 if (mq) { 539 KASSERT(mutex_owned(&mq->mq_mtx)); 540 mutex_exit(&mqlist_lock); 541 542 /* Check for exclusive create. */ 543 if (oflag & O_EXCL) { 544 mutex_exit(&mq->mq_mtx); 545 error = EEXIST; 546 goto err; 547 } 548 549 /* Verify permissions. */ 550 if (mqueue_access(mq, fp->f_flag, l->l_cred) != 0) { 551 mutex_exit(&mq->mq_mtx); 552 error = EACCES; 553 goto err; 554 } 555 556 /* If we have the access, add a new reference. */ 557 mq->mq_refcnt++; 558 mutex_exit(&mq->mq_mtx); 559 } else { 560 /* Fail if not found and not creating. */ 561 if ((oflag & O_CREAT) == 0) { 562 mutex_exit(&mqlist_lock); 563 KASSERT(mq_new == NULL); 564 error = ENOENT; 565 goto err; 566 } 567 568 /* Initial timestamps. */ 569 mq = mq_new; 570 getnanotime(&mq->mq_btime); 571 mq->mq_atime = mq->mq_mtime = mq->mq_btime; 572 573 /* 574 * Finally, insert message queue into the list. 575 * Note: it already has the initial reference. 576 */ 577 LIST_INSERT_HEAD(&mqueue_head, mq, mq_list); 578 mutex_exit(&mqlist_lock); 579 580 mq_new = NULL; 581 name = NULL; 582 } 583 KASSERT(mq != NULL); 584 fp->f_mqueue = mq; 585 fd_affix(p, fp, mqd); 586 *retval = mqd; 587 err: 588 if (error) { 589 fd_abort(p, fp, mqd); 590 } 591 if (mq_new) { 592 /* Note: will free the 'name'. */ 593 mqueue_destroy(mq_new); 594 } else if (name) { 595 kmem_free(name, MQ_NAMELEN); 596 } 597 return error; 598 } 599 600 /* 601 * General mqueue system calls. 602 */ 603 604 int 605 sys_mq_open(struct lwp *l, const struct sys_mq_open_args *uap, 606 register_t *retval) 607 { 608 /* { 609 syscallarg(const char *) name; 610 syscallarg(int) oflag; 611 syscallarg(mode_t) mode; 612 syscallarg(struct mq_attr) attr; 613 } */ 614 struct mq_attr *attr = NULL, a; 615 int error; 616 617 if ((SCARG(uap, oflag) & O_EXEC) != 0) 618 return EINVAL; 619 620 if ((SCARG(uap, oflag) & O_CREAT) != 0 && SCARG(uap, attr) != NULL) { 621 error = copyin(SCARG(uap, attr), &a, sizeof(a)); 622 if (error) 623 return error; 624 attr = &a; 625 } 626 627 return mq_handle_open(l, SCARG(uap, name), SCARG(uap, oflag), 628 SCARG(uap, mode), attr, retval); 629 } 630 631 int 632 sys_mq_close(struct lwp *l, const struct sys_mq_close_args *uap, 633 register_t *retval) 634 { 635 636 return sys_close(l, (const void *)uap, retval); 637 } 638 639 /* 640 * Primary mq_recv1() function. 641 */ 642 int 643 mq_recv1(mqd_t mqdes, void *msg_ptr, size_t msg_len, u_int *msg_prio, 644 struct timespec *ts, ssize_t *mlen) 645 { 646 struct mqueue *mq; 647 struct mq_msg *msg = NULL; 648 struct mq_attr *mqattr; 649 u_int idx; 650 int error; 651 652 error = mqueue_get(mqdes, FREAD, &mq); 653 if (error) { 654 return error; 655 } 656 getnanotime(&mq->mq_atime); 657 mqattr = &mq->mq_attrib; 658 659 /* Check the message size limits */ 660 if (msg_len < mqattr->mq_msgsize) { 661 error = EMSGSIZE; 662 goto error; 663 } 664 665 /* Check if queue is empty */ 666 while (mqattr->mq_curmsgs == 0) { 667 int t; 668 669 if (mqattr->mq_flags & O_NONBLOCK) { 670 error = EAGAIN; 671 goto error; 672 } 673 if (ts) { 674 error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, ts, &t, 675 NULL); 676 if (error) 677 goto error; 678 } else 679 t = 0; 680 /* 681 * Block until someone sends the message. 682 * While doing this, notification should not be sent. 683 */ 684 mqattr->mq_flags |= MQ_RECEIVE; 685 error = cv_timedwait_sig(&mq->mq_send_cv, &mq->mq_mtx, t); 686 mqattr->mq_flags &= ~MQ_RECEIVE; 687 if (error || (mqattr->mq_flags & MQ_UNLINKED)) { 688 error = (error == EWOULDBLOCK) ? ETIMEDOUT : EINTR; 689 goto error; 690 } 691 } 692 693 /* 694 * Find the highest priority message, and remove it from the queue. 695 * At first, reserved queue is checked, bitmap is next. 696 */ 697 msg = TAILQ_FIRST(&mq->mq_head[MQ_PQRESQ]); 698 if (__predict_true(msg == NULL)) { 699 idx = ffs(mq->mq_bitmap); 700 msg = TAILQ_FIRST(&mq->mq_head[idx]); 701 KASSERT(msg != NULL); 702 } else { 703 idx = MQ_PQRESQ; 704 } 705 TAILQ_REMOVE(&mq->mq_head[idx], msg, msg_queue); 706 707 /* Unmark the bit, if last message. */ 708 if (__predict_true(idx) && TAILQ_EMPTY(&mq->mq_head[idx])) { 709 KASSERT((MQ_PQSIZE - idx) == msg->msg_prio); 710 mq->mq_bitmap &= ~(1U << --idx); 711 } 712 713 /* Decrement the counter and signal waiter, if any */ 714 mqattr->mq_curmsgs--; 715 cv_signal(&mq->mq_recv_cv); 716 717 /* Ready for sending now */ 718 selnotify(&mq->mq_wsel, POLLOUT | POLLWRNORM, 0); 719 error: 720 mutex_exit(&mq->mq_mtx); 721 fd_putfile((int)mqdes); 722 if (error) 723 return error; 724 725 /* 726 * Copy the data to the user-space. 727 * Note: According to POSIX, no message should be removed from the 728 * queue in case of fail - this would be violated. 729 */ 730 *mlen = msg->msg_len; 731 error = copyout(msg->msg_ptr, msg_ptr, msg->msg_len); 732 if (error == 0 && msg_prio) 733 error = copyout(&msg->msg_prio, msg_prio, sizeof(unsigned)); 734 mqueue_freemsg(msg, sizeof(struct mq_msg) + msg->msg_len); 735 736 return error; 737 } 738 739 int 740 sys_mq_receive(struct lwp *l, const struct sys_mq_receive_args *uap, 741 register_t *retval) 742 { 743 /* { 744 syscallarg(mqd_t) mqdes; 745 syscallarg(char *) msg_ptr; 746 syscallarg(size_t) msg_len; 747 syscallarg(unsigned *) msg_prio; 748 } */ 749 ssize_t mlen; 750 int error; 751 752 error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 753 SCARG(uap, msg_len), SCARG(uap, msg_prio), NULL, &mlen); 754 if (error == 0) 755 *retval = mlen; 756 757 return error; 758 } 759 760 int 761 sys___mq_timedreceive50(struct lwp *l, 762 const struct sys___mq_timedreceive50_args *uap, register_t *retval) 763 { 764 /* { 765 syscallarg(mqd_t) mqdes; 766 syscallarg(char *) msg_ptr; 767 syscallarg(size_t) msg_len; 768 syscallarg(unsigned *) msg_prio; 769 syscallarg(const struct timespec *) abs_timeout; 770 } */ 771 struct timespec ts, *tsp; 772 ssize_t mlen; 773 int error; 774 775 /* Get and convert time value */ 776 if (SCARG(uap, abs_timeout)) { 777 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 778 if (error) 779 return error; 780 tsp = &ts; 781 } else { 782 tsp = NULL; 783 } 784 785 error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 786 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp, &mlen); 787 if (error == 0) 788 *retval = mlen; 789 790 return error; 791 } 792 793 /* 794 * Primary mq_send1() function. 795 */ 796 int 797 mq_send1(mqd_t mqdes, const char *msg_ptr, size_t msg_len, u_int msg_prio, 798 struct timespec *ts) 799 { 800 struct mqueue *mq; 801 struct mq_msg *msg; 802 struct mq_attr *mqattr; 803 struct proc *notify = NULL; 804 ksiginfo_t ksi; 805 size_t size; 806 int error; 807 808 /* Check the priority range */ 809 if (msg_prio >= mq_prio_max) 810 return EINVAL; 811 812 /* Allocate a new message */ 813 if (msg_len > mq_max_msgsize) 814 return EMSGSIZE; 815 size = sizeof(struct mq_msg) + msg_len; 816 if (size > mq_max_msgsize) 817 return EMSGSIZE; 818 819 if (size > MQ_DEF_MSGSIZE) { 820 msg = kmem_alloc(size, KM_SLEEP); 821 } else { 822 msg = pool_cache_get(mqmsg_cache, PR_WAITOK); 823 } 824 825 /* Get the data from user-space */ 826 error = copyin(msg_ptr, msg->msg_ptr, msg_len); 827 if (error) { 828 mqueue_freemsg(msg, size); 829 return error; 830 } 831 msg->msg_len = msg_len; 832 msg->msg_prio = msg_prio; 833 834 error = mqueue_get(mqdes, FWRITE, &mq); 835 if (error) { 836 mqueue_freemsg(msg, size); 837 return error; 838 } 839 getnanotime(&mq->mq_mtime); 840 mqattr = &mq->mq_attrib; 841 842 /* Check the message size limit */ 843 if (msg_len <= 0 || msg_len > mqattr->mq_msgsize) { 844 error = EMSGSIZE; 845 goto error; 846 } 847 848 /* Check if queue is full */ 849 while (mqattr->mq_curmsgs >= mqattr->mq_maxmsg) { 850 int t; 851 852 if (mqattr->mq_flags & O_NONBLOCK) { 853 error = EAGAIN; 854 goto error; 855 } 856 if (ts) { 857 error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, ts, &t, 858 NULL); 859 if (error) 860 goto error; 861 } else 862 t = 0; 863 /* Block until queue becomes available */ 864 error = cv_timedwait_sig(&mq->mq_recv_cv, &mq->mq_mtx, t); 865 if (error || (mqattr->mq_flags & MQ_UNLINKED)) { 866 error = (error == EWOULDBLOCK) ? ETIMEDOUT : error; 867 goto error; 868 } 869 } 870 KASSERT(mqattr->mq_curmsgs < mqattr->mq_maxmsg); 871 872 /* 873 * Insert message into the queue, according to the priority. 874 * Note the difference between index and priority. 875 */ 876 if (__predict_true(msg_prio < MQ_PQSIZE)) { 877 u_int idx = MQ_PQSIZE - msg_prio; 878 879 KASSERT(idx != MQ_PQRESQ); 880 TAILQ_INSERT_TAIL(&mq->mq_head[idx], msg, msg_queue); 881 mq->mq_bitmap |= (1U << --idx); 882 } else { 883 mqueue_linear_insert(mq, msg); 884 } 885 886 /* Check for the notify */ 887 if (mqattr->mq_curmsgs == 0 && mq->mq_notify_proc && 888 (mqattr->mq_flags & MQ_RECEIVE) == 0 && 889 mq->mq_sig_notify.sigev_notify == SIGEV_SIGNAL) { 890 /* Initialize the signal */ 891 KSI_INIT(&ksi); 892 ksi.ksi_signo = mq->mq_sig_notify.sigev_signo; 893 ksi.ksi_code = SI_MESGQ; 894 ksi.ksi_value = mq->mq_sig_notify.sigev_value; 895 /* Unregister the process */ 896 notify = mq->mq_notify_proc; 897 mq->mq_notify_proc = NULL; 898 } 899 900 /* Increment the counter and signal waiter, if any */ 901 mqattr->mq_curmsgs++; 902 cv_signal(&mq->mq_send_cv); 903 904 /* Ready for receiving now */ 905 selnotify(&mq->mq_rsel, POLLIN | POLLRDNORM, 0); 906 error: 907 mutex_exit(&mq->mq_mtx); 908 fd_putfile((int)mqdes); 909 910 if (error) { 911 mqueue_freemsg(msg, size); 912 } else if (notify) { 913 /* Send the notify, if needed */ 914 mutex_enter(proc_lock); 915 kpsignal(notify, &ksi, NULL); 916 mutex_exit(proc_lock); 917 } 918 return error; 919 } 920 921 int 922 sys_mq_send(struct lwp *l, const struct sys_mq_send_args *uap, 923 register_t *retval) 924 { 925 /* { 926 syscallarg(mqd_t) mqdes; 927 syscallarg(const char *) msg_ptr; 928 syscallarg(size_t) msg_len; 929 syscallarg(unsigned) msg_prio; 930 } */ 931 932 return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 933 SCARG(uap, msg_len), SCARG(uap, msg_prio), NULL); 934 } 935 936 int 937 sys___mq_timedsend50(struct lwp *l, const struct sys___mq_timedsend50_args *uap, 938 register_t *retval) 939 { 940 /* { 941 syscallarg(mqd_t) mqdes; 942 syscallarg(const char *) msg_ptr; 943 syscallarg(size_t) msg_len; 944 syscallarg(unsigned) msg_prio; 945 syscallarg(const struct timespec *) abs_timeout; 946 } */ 947 struct timespec ts, *tsp; 948 int error; 949 950 /* Get and convert time value */ 951 if (SCARG(uap, abs_timeout)) { 952 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 953 if (error) 954 return error; 955 tsp = &ts; 956 } else { 957 tsp = NULL; 958 } 959 960 return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 961 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp); 962 } 963 964 int 965 sys_mq_notify(struct lwp *l, const struct sys_mq_notify_args *uap, 966 register_t *retval) 967 { 968 /* { 969 syscallarg(mqd_t) mqdes; 970 syscallarg(const struct sigevent *) notification; 971 } */ 972 struct mqueue *mq; 973 struct sigevent sig; 974 int error; 975 976 if (SCARG(uap, notification)) { 977 /* Get the signal from user-space */ 978 error = copyin(SCARG(uap, notification), &sig, 979 sizeof(struct sigevent)); 980 if (error) 981 return error; 982 if (sig.sigev_notify == SIGEV_SIGNAL && 983 (sig.sigev_signo <=0 || sig.sigev_signo >= NSIG)) 984 return EINVAL; 985 } 986 987 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 988 if (error) { 989 return error; 990 } 991 if (SCARG(uap, notification)) { 992 /* Register notification: set the signal and target process */ 993 if (mq->mq_notify_proc == NULL) { 994 memcpy(&mq->mq_sig_notify, &sig, 995 sizeof(struct sigevent)); 996 mq->mq_notify_proc = l->l_proc; 997 } else { 998 /* Fail if someone else already registered */ 999 error = EBUSY; 1000 } 1001 } else { 1002 /* Unregister the notification */ 1003 mq->mq_notify_proc = NULL; 1004 } 1005 mutex_exit(&mq->mq_mtx); 1006 fd_putfile((int)SCARG(uap, mqdes)); 1007 1008 return error; 1009 } 1010 1011 int 1012 sys_mq_getattr(struct lwp *l, const struct sys_mq_getattr_args *uap, 1013 register_t *retval) 1014 { 1015 /* { 1016 syscallarg(mqd_t) mqdes; 1017 syscallarg(struct mq_attr *) mqstat; 1018 } */ 1019 struct mqueue *mq; 1020 struct mq_attr attr; 1021 int error; 1022 1023 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 1024 if (error) { 1025 return error; 1026 } 1027 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1028 mutex_exit(&mq->mq_mtx); 1029 fd_putfile((int)SCARG(uap, mqdes)); 1030 1031 return copyout(&attr, SCARG(uap, mqstat), sizeof(struct mq_attr)); 1032 } 1033 1034 int 1035 sys_mq_setattr(struct lwp *l, const struct sys_mq_setattr_args *uap, 1036 register_t *retval) 1037 { 1038 /* { 1039 syscallarg(mqd_t) mqdes; 1040 syscallarg(const struct mq_attr *) mqstat; 1041 syscallarg(struct mq_attr *) omqstat; 1042 } */ 1043 struct mqueue *mq; 1044 struct mq_attr attr; 1045 int error, nonblock; 1046 1047 error = copyin(SCARG(uap, mqstat), &attr, sizeof(struct mq_attr)); 1048 if (error) 1049 return error; 1050 nonblock = (attr.mq_flags & O_NONBLOCK); 1051 1052 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 1053 if (error) { 1054 return error; 1055 } 1056 1057 /* Copy the old attributes, if needed */ 1058 if (SCARG(uap, omqstat)) { 1059 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1060 } 1061 1062 /* Ignore everything, except O_NONBLOCK */ 1063 if (nonblock) 1064 mq->mq_attrib.mq_flags |= O_NONBLOCK; 1065 else 1066 mq->mq_attrib.mq_flags &= ~O_NONBLOCK; 1067 1068 mutex_exit(&mq->mq_mtx); 1069 fd_putfile((int)SCARG(uap, mqdes)); 1070 1071 /* 1072 * Copy the data to the user-space. 1073 * Note: According to POSIX, the new attributes should not be set in 1074 * case of fail - this would be violated. 1075 */ 1076 if (SCARG(uap, omqstat)) 1077 error = copyout(&attr, SCARG(uap, omqstat), 1078 sizeof(struct mq_attr)); 1079 1080 return error; 1081 } 1082 1083 int 1084 sys_mq_unlink(struct lwp *l, const struct sys_mq_unlink_args *uap, 1085 register_t *retval) 1086 { 1087 /* { 1088 syscallarg(const char *) name; 1089 } */ 1090 mqueue_t *mq; 1091 char *name; 1092 int error, refcnt = 0; 1093 1094 /* Get the name from the user-space */ 1095 name = kmem_alloc(MQ_NAMELEN, KM_SLEEP); 1096 error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL); 1097 if (error) { 1098 kmem_free(name, MQ_NAMELEN); 1099 return error; 1100 } 1101 1102 mutex_enter(&mqlist_lock); 1103 mq = mqueue_lookup(name); 1104 if (mq == NULL) { 1105 error = ENOENT; 1106 goto err; 1107 } 1108 KASSERT(mutex_owned(&mq->mq_mtx)); 1109 1110 /* Verify permissions. */ 1111 if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MQUEUE, 0, mq, 1112 NULL, NULL)) { 1113 mutex_exit(&mq->mq_mtx); 1114 error = EACCES; 1115 goto err; 1116 } 1117 1118 /* Remove and destroy if no references. */ 1119 LIST_REMOVE(mq, mq_list); 1120 refcnt = mq->mq_refcnt; 1121 if (refcnt) { 1122 /* Mark as unlinked, if there are references. */ 1123 mq->mq_attrib.mq_flags |= MQ_UNLINKED; 1124 } 1125 1126 /* Wake up waiters, if there are any. */ 1127 cv_broadcast(&mq->mq_send_cv); 1128 cv_broadcast(&mq->mq_recv_cv); 1129 1130 selnotify(&mq->mq_rsel, POLLHUP, 0); 1131 selnotify(&mq->mq_wsel, POLLHUP, 0); 1132 1133 mutex_exit(&mq->mq_mtx); 1134 err: 1135 mutex_exit(&mqlist_lock); 1136 /* 1137 * If last reference - destroy the message queue. Otherwise, 1138 * the last mq_close() call will do that. 1139 */ 1140 if (!error && refcnt == 0) { 1141 mqueue_destroy(mq); 1142 } 1143 kmem_free(name, MQ_NAMELEN); 1144 1145 return error; 1146 } 1147 1148 /* 1149 * System control nodes. 1150 */ 1151 static int 1152 mqueue_sysctl_init(void) 1153 { 1154 const struct sysctlnode *node = NULL; 1155 1156 mqsysctl_log = NULL; 1157 1158 sysctl_createv(&mqsysctl_log, 0, NULL, NULL, 1159 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 1160 CTLTYPE_INT, "posix_msg", 1161 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 1162 "Message Passing option to which the " 1163 "system attempts to conform"), 1164 NULL, _POSIX_MESSAGE_PASSING, NULL, 0, 1165 CTL_KERN, CTL_CREATE, CTL_EOL); 1166 sysctl_createv(&mqsysctl_log, 0, NULL, &node, 1167 CTLFLAG_PERMANENT, 1168 CTLTYPE_NODE, "mqueue", 1169 SYSCTL_DESCR("Message queue options"), 1170 NULL, 0, NULL, 0, 1171 CTL_KERN, CTL_CREATE, CTL_EOL); 1172 1173 if (node == NULL) 1174 return ENXIO; 1175 1176 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1177 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1178 CTLTYPE_INT, "mq_open_max", 1179 SYSCTL_DESCR("Maximal number of message queue descriptors " 1180 "that process could open"), 1181 NULL, 0, &mq_open_max, 0, 1182 CTL_CREATE, CTL_EOL); 1183 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1184 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1185 CTLTYPE_INT, "mq_prio_max", 1186 SYSCTL_DESCR("Maximal priority of the message"), 1187 NULL, 0, &mq_prio_max, 0, 1188 CTL_CREATE, CTL_EOL); 1189 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1190 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1191 CTLTYPE_INT, "mq_max_msgsize", 1192 SYSCTL_DESCR("Maximal allowed size of the message"), 1193 NULL, 0, &mq_max_msgsize, 0, 1194 CTL_CREATE, CTL_EOL); 1195 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1196 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1197 CTLTYPE_INT, "mq_def_maxmsg", 1198 SYSCTL_DESCR("Default maximal message count"), 1199 NULL, 0, &mq_def_maxmsg, 0, 1200 CTL_CREATE, CTL_EOL); 1201 sysctl_createv(&mqsysctl_log, 0, &node, NULL, 1202 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1203 CTLTYPE_INT, "mq_max_maxmsg", 1204 SYSCTL_DESCR("Maximal allowed message count"), 1205 NULL, 0, &mq_max_maxmsg, 0, 1206 CTL_CREATE, CTL_EOL); 1207 1208 return 0; 1209 } 1210 1211 /* 1212 * Debugging. 1213 */ 1214 #if defined(DDB) 1215 1216 void 1217 mqueue_print_list(void (*pr)(const char *, ...)) 1218 { 1219 struct mqueue *mq; 1220 1221 (*pr)("Global list of the message queues:\n"); 1222 (*pr)("%20s %10s %8s %8s %3s %4s %4s %4s\n", 1223 "Name", "Ptr", "Mode", "Flags", "Ref", 1224 "MaxMsg", "MsgSze", "CurMsg"); 1225 LIST_FOREACH(mq, &mqueue_head, mq_list) { 1226 (*pr)("%20s %10p %8x %8x %3u %6lu %6lu %6lu\n", 1227 mq->mq_name, mq, mq->mq_mode, 1228 mq->mq_attrib.mq_flags, mq->mq_refcnt, 1229 mq->mq_attrib.mq_maxmsg, mq->mq_attrib.mq_msgsize, 1230 mq->mq_attrib.mq_curmsgs); 1231 } 1232 } 1233 1234 #endif /* defined(DDB) */ 1235