1 /* $NetBSD: sys_mqueue.c,v 1.48 2020/05/23 23:42:43 ad Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2011 Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Implementation of POSIX message queues. 31 * Defined in the Base Definitions volume of IEEE Std 1003.1-2001. 32 * 33 * Locking 34 * 35 * Global list of message queues (mqueue_head) is protected by mqlist_lock. 36 * Each message queue and its members are protected by mqueue::mq_mtx. 37 * Note that proc_t::p_mqueue_cnt is updated atomically. 38 * 39 * Lock order: 40 * 41 * mqlist_lock -> 42 * mqueue::mq_mtx 43 */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: sys_mqueue.c,v 1.48 2020/05/23 23:42:43 ad Exp $"); 47 48 #include <sys/param.h> 49 #include <sys/types.h> 50 #include <sys/atomic.h> 51 52 #include <sys/file.h> 53 #include <sys/filedesc.h> 54 #include <sys/kauth.h> 55 #include <sys/lwp.h> 56 #include <sys/mqueue.h> 57 #include <sys/module.h> 58 #include <sys/poll.h> 59 #include <sys/select.h> 60 #include <sys/signal.h> 61 #include <sys/signalvar.h> 62 #include <sys/stat.h> 63 #include <sys/sysctl.h> 64 #include <sys/syscall.h> 65 #include <sys/syscallvar.h> 66 #include <sys/syscallargs.h> 67 68 #include <miscfs/genfs/genfs.h> 69 70 MODULE(MODULE_CLASS_MISC, mqueue, NULL); 71 72 /* System-wide limits. */ 73 static u_int mq_open_max = MQ_OPEN_MAX; 74 static u_int mq_prio_max = MQ_PRIO_MAX; 75 static u_int mq_max_msgsize = 16 * MQ_DEF_MSGSIZE; 76 static u_int mq_def_maxmsg = 32; 77 static u_int mq_max_maxmsg = 16 * 32; 78 79 static pool_cache_t mqmsg_cache __read_mostly; 80 static kmutex_t mqlist_lock __cacheline_aligned; 81 static LIST_HEAD(, mqueue) mqueue_head __cacheline_aligned; 82 83 static kauth_listener_t mq_listener; 84 85 static int mqueue_sysinit(void); 86 static int mqueue_sysfini(bool); 87 static int mq_poll_fop(file_t *, int); 88 static int mq_stat_fop(file_t *, struct stat *); 89 static int mq_close_fop(file_t *); 90 91 static const struct fileops mqops = { 92 .fo_name = "mq", 93 .fo_read = fbadop_read, 94 .fo_write = fbadop_write, 95 .fo_ioctl = fbadop_ioctl, 96 .fo_fcntl = fnullop_fcntl, 97 .fo_poll = mq_poll_fop, 98 .fo_stat = mq_stat_fop, 99 .fo_close = mq_close_fop, 100 .fo_kqfilter = fnullop_kqfilter, 101 .fo_restart = fnullop_restart, 102 }; 103 104 static const struct syscall_package mqueue_syscalls[] = { 105 { SYS_mq_open, 0, (sy_call_t *)sys_mq_open }, 106 { SYS_mq_close, 0, (sy_call_t *)sys_mq_close }, 107 { SYS_mq_unlink, 0, (sy_call_t *)sys_mq_unlink }, 108 { SYS_mq_getattr, 0, (sy_call_t *)sys_mq_getattr }, 109 { SYS_mq_setattr, 0, (sy_call_t *)sys_mq_setattr }, 110 { SYS_mq_notify, 0, (sy_call_t *)sys_mq_notify }, 111 { SYS_mq_send, 0, (sy_call_t *)sys_mq_send }, 112 { SYS_mq_receive, 0, (sy_call_t *)sys_mq_receive }, 113 { SYS___mq_timedsend50, 0, (sy_call_t *)sys___mq_timedsend50 }, 114 { SYS___mq_timedreceive50, 0, (sy_call_t *)sys___mq_timedreceive50 }, 115 { 0, 0, NULL } 116 }; 117 118 static int 119 mq_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 120 void *arg0, void *arg1, void *arg2, void *arg3) 121 { 122 mqueue_t *mq; 123 int result; 124 125 if (action != KAUTH_SYSTEM_MQUEUE) 126 return KAUTH_RESULT_DEFER; 127 128 result = KAUTH_RESULT_DEFER; 129 130 mq = arg1; 131 132 if (kauth_cred_geteuid(cred) == mq->mq_euid) 133 result = KAUTH_RESULT_ALLOW; 134 135 return result; 136 } 137 138 /* 139 * Initialisation and unloading of POSIX message queue subsystem. 140 */ 141 142 static int 143 mqueue_sysinit(void) 144 { 145 int error; 146 147 mqmsg_cache = pool_cache_init(MQ_DEF_MSGSIZE, coherency_unit, 148 0, 0, "mqmsgpl", NULL, IPL_NONE, NULL, NULL, NULL); 149 mutex_init(&mqlist_lock, MUTEX_DEFAULT, IPL_NONE); 150 LIST_INIT(&mqueue_head); 151 152 error = syscall_establish(NULL, mqueue_syscalls); 153 mq_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM, 154 mq_listener_cb, NULL); 155 return error; 156 } 157 158 static int 159 mqueue_sysfini(bool interface) 160 { 161 162 if (interface) { 163 int error; 164 bool inuse; 165 166 /* Stop syscall activity. */ 167 error = syscall_disestablish(NULL, mqueue_syscalls); 168 if (error) 169 return error; 170 /* Check if there are any message queues in use. */ 171 mutex_enter(&mqlist_lock); 172 inuse = !LIST_EMPTY(&mqueue_head); 173 mutex_exit(&mqlist_lock); 174 if (inuse) { 175 error = syscall_establish(NULL, mqueue_syscalls); 176 KASSERT(error == 0); 177 return EBUSY; 178 } 179 } 180 181 kauth_unlisten_scope(mq_listener); 182 183 mutex_destroy(&mqlist_lock); 184 pool_cache_destroy(mqmsg_cache); 185 return 0; 186 } 187 188 /* 189 * Module interface. 190 */ 191 static int 192 mqueue_modcmd(modcmd_t cmd, void *arg) 193 { 194 195 switch (cmd) { 196 case MODULE_CMD_INIT: 197 return mqueue_sysinit(); 198 case MODULE_CMD_FINI: 199 return mqueue_sysfini(true); 200 default: 201 return ENOTTY; 202 } 203 } 204 205 /* 206 * Free the message. 207 */ 208 static void 209 mqueue_freemsg(struct mq_msg *msg, const size_t size) 210 { 211 212 if (size > MQ_DEF_MSGSIZE) { 213 kmem_free(msg, size); 214 } else { 215 pool_cache_put(mqmsg_cache, msg); 216 } 217 } 218 219 /* 220 * Destroy the message queue. 221 */ 222 static void 223 mqueue_destroy(struct mqueue *mq) 224 { 225 struct mq_msg *msg; 226 size_t msz; 227 u_int i; 228 229 /* Note MQ_PQSIZE + 1. */ 230 for (i = 0; i <= MQ_PQSIZE; i++) { 231 while ((msg = TAILQ_FIRST(&mq->mq_head[i])) != NULL) { 232 TAILQ_REMOVE(&mq->mq_head[i], msg, msg_queue); 233 msz = sizeof(struct mq_msg) + msg->msg_len; 234 mqueue_freemsg(msg, msz); 235 } 236 } 237 if (mq->mq_name) { 238 kmem_free(mq->mq_name, MQ_NAMELEN); 239 } 240 seldestroy(&mq->mq_rsel); 241 seldestroy(&mq->mq_wsel); 242 cv_destroy(&mq->mq_send_cv); 243 cv_destroy(&mq->mq_recv_cv); 244 mutex_destroy(&mq->mq_mtx); 245 kmem_free(mq, sizeof(struct mqueue)); 246 } 247 248 /* 249 * mqueue_lookup: lookup for file name in general list of message queues. 250 * 251 * => locks the message queue on success 252 */ 253 static mqueue_t * 254 mqueue_lookup(const char *name) 255 { 256 mqueue_t *mq; 257 258 KASSERT(mutex_owned(&mqlist_lock)); 259 260 LIST_FOREACH(mq, &mqueue_head, mq_list) { 261 if (strncmp(mq->mq_name, name, MQ_NAMELEN) == 0) { 262 mutex_enter(&mq->mq_mtx); 263 return mq; 264 } 265 } 266 return NULL; 267 } 268 269 /* 270 * mqueue_get: get the mqueue from the descriptor. 271 * 272 * => locks the message queue, if found. 273 * => holds a reference on the file descriptor. 274 */ 275 int 276 mqueue_get(mqd_t mqd, int fflag, mqueue_t **mqret) 277 { 278 const int fd = (int)mqd; 279 mqueue_t *mq; 280 file_t *fp; 281 282 fp = fd_getfile(fd); 283 if (__predict_false(fp == NULL)) { 284 return EBADF; 285 } 286 if (__predict_false(fp->f_type != DTYPE_MQUEUE)) { 287 fd_putfile(fd); 288 return EBADF; 289 } 290 if (fflag && (fp->f_flag & fflag) == 0) { 291 fd_putfile(fd); 292 return EBADF; 293 } 294 mq = fp->f_mqueue; 295 mutex_enter(&mq->mq_mtx); 296 297 *mqret = mq; 298 return 0; 299 } 300 301 /* 302 * mqueue_linear_insert: perform linear insert according to the message 303 * priority into the reserved queue (MQ_PQRESQ). Reserved queue is a 304 * sorted list used only when mq_prio_max is increased via sysctl. 305 */ 306 static inline void 307 mqueue_linear_insert(struct mqueue *mq, struct mq_msg *msg) 308 { 309 struct mq_msg *mit; 310 311 TAILQ_FOREACH(mit, &mq->mq_head[MQ_PQRESQ], msg_queue) { 312 if (msg->msg_prio > mit->msg_prio) 313 break; 314 } 315 if (mit == NULL) { 316 TAILQ_INSERT_TAIL(&mq->mq_head[MQ_PQRESQ], msg, msg_queue); 317 } else { 318 TAILQ_INSERT_BEFORE(mit, msg, msg_queue); 319 } 320 } 321 322 static int 323 mq_stat_fop(file_t *fp, struct stat *st) 324 { 325 struct mqueue *mq = fp->f_mqueue; 326 327 memset(st, 0, sizeof(*st)); 328 329 mutex_enter(&mq->mq_mtx); 330 st->st_mode = mq->mq_mode; 331 st->st_uid = mq->mq_euid; 332 st->st_gid = mq->mq_egid; 333 st->st_atimespec = mq->mq_atime; 334 st->st_mtimespec = mq->mq_mtime; 335 st->st_ctimespec = st->st_birthtimespec = mq->mq_btime; 336 st->st_uid = kauth_cred_geteuid(fp->f_cred); 337 st->st_gid = kauth_cred_getegid(fp->f_cred); 338 mutex_exit(&mq->mq_mtx); 339 340 return 0; 341 } 342 343 static int 344 mq_poll_fop(file_t *fp, int events) 345 { 346 struct mqueue *mq = fp->f_mqueue; 347 struct mq_attr *mqattr; 348 int revents = 0; 349 350 mutex_enter(&mq->mq_mtx); 351 mqattr = &mq->mq_attrib; 352 if (events & (POLLIN | POLLRDNORM)) { 353 /* Ready for receiving, if there are messages in the queue. */ 354 if (mqattr->mq_curmsgs) 355 revents |= events & (POLLIN | POLLRDNORM); 356 else 357 selrecord(curlwp, &mq->mq_rsel); 358 } 359 if (events & (POLLOUT | POLLWRNORM)) { 360 /* Ready for sending, if the message queue is not full. */ 361 if (mqattr->mq_curmsgs < mqattr->mq_maxmsg) 362 revents |= events & (POLLOUT | POLLWRNORM); 363 else 364 selrecord(curlwp, &mq->mq_wsel); 365 } 366 mutex_exit(&mq->mq_mtx); 367 368 return revents; 369 } 370 371 static int 372 mq_close_fop(file_t *fp) 373 { 374 proc_t *p = curproc; 375 mqueue_t *mq = fp->f_mqueue; 376 bool destroy = false; 377 378 mutex_enter(&mq->mq_mtx); 379 KASSERT(mq->mq_refcnt > 0); 380 if (--mq->mq_refcnt == 0) { 381 /* Destroy if the last reference and unlinked. */ 382 destroy = (mq->mq_attrib.mq_flags & MQ_UNLINKED) != 0; 383 } 384 mutex_exit(&mq->mq_mtx); 385 386 if (destroy) { 387 mqueue_destroy(mq); 388 } 389 atomic_dec_uint(&p->p_mqueue_cnt); 390 return 0; 391 } 392 393 static int 394 mqueue_access(mqueue_t *mq, int access, kauth_cred_t cred) 395 { 396 accmode_t accmode = 0; 397 398 /* Note the difference between VREAD/VWRITE and FREAD/FWRITE. */ 399 if (access & FREAD) { 400 accmode |= VREAD; 401 } 402 if (access & FWRITE) { 403 accmode |= VWRITE; 404 } 405 if (genfs_can_access(NULL, cred, mq->mq_euid, mq->mq_egid, 406 mq->mq_mode, NULL, accmode)) { 407 return EACCES; 408 } 409 return 0; 410 } 411 412 static int 413 mqueue_create(lwp_t *l, char *name, struct mq_attr *attr, mode_t mode, 414 int oflag, mqueue_t **mqret) 415 { 416 proc_t *p = l->l_proc; 417 struct cwdinfo *cwdi = p->p_cwdi; 418 mqueue_t *mq; 419 u_int i; 420 421 /* Empty name is invalid. */ 422 if (name[0] == '\0') { 423 return EINVAL; 424 } 425 426 /* Check for mqueue attributes. */ 427 if (attr) { 428 if (attr->mq_maxmsg <= 0 || attr->mq_maxmsg > mq_max_maxmsg || 429 attr->mq_msgsize <= 0 || 430 attr->mq_msgsize > mq_max_msgsize) { 431 return EINVAL; 432 } 433 attr->mq_curmsgs = 0; 434 } 435 436 /* 437 * Allocate new message queue, initialize data structures, copy the 438 * name attributes. Note that the initial reference is set here. 439 */ 440 mq = kmem_zalloc(sizeof(mqueue_t), KM_SLEEP); 441 442 mutex_init(&mq->mq_mtx, MUTEX_DEFAULT, IPL_NONE); 443 cv_init(&mq->mq_send_cv, "mqsendcv"); 444 cv_init(&mq->mq_recv_cv, "mqrecvcv"); 445 for (i = 0; i < (MQ_PQSIZE + 1); i++) { 446 TAILQ_INIT(&mq->mq_head[i]); 447 } 448 selinit(&mq->mq_rsel); 449 selinit(&mq->mq_wsel); 450 mq->mq_name = name; 451 mq->mq_refcnt = 1; 452 453 if (attr != NULL) { 454 memcpy(&mq->mq_attrib, attr, sizeof(struct mq_attr)); 455 } else { 456 memset(&mq->mq_attrib, 0, sizeof(struct mq_attr)); 457 mq->mq_attrib.mq_maxmsg = mq_def_maxmsg; 458 mq->mq_attrib.mq_msgsize = MQ_DEF_MSGSIZE - sizeof(struct mq_msg); 459 } 460 461 CTASSERT((O_MASK & (MQ_UNLINKED | MQ_RECEIVE)) == 0); 462 mq->mq_attrib.mq_flags = (O_MASK & oflag); 463 464 /* Store mode and effective UID with GID. */ 465 mq->mq_mode = ((mode & ~cwdi->cwdi_cmask) & ALLPERMS) & ~S_ISTXT; 466 mq->mq_euid = kauth_cred_geteuid(l->l_cred); 467 mq->mq_egid = kauth_cred_getegid(l->l_cred); 468 469 *mqret = mq; 470 return 0; 471 } 472 473 /* 474 * Helper function for mq_open() - note that "u_name" is a userland pointer, 475 * while "attr" is a kernel pointer! 476 */ 477 int 478 mq_handle_open(struct lwp *l, const char *u_name, int oflag, mode_t mode, 479 struct mq_attr *attr, register_t *retval) 480 { 481 struct proc *p = l->l_proc; 482 struct mqueue *mq, *mq_new = NULL; 483 int mqd, error; 484 file_t *fp; 485 char *name; 486 487 /* Get the name from the user-space. */ 488 name = kmem_alloc(MQ_NAMELEN, KM_SLEEP); 489 error = copyinstr(u_name, name, MQ_NAMELEN - 1, NULL); 490 if (error) { 491 kmem_free(name, MQ_NAMELEN); 492 return error; 493 } 494 495 /* Allocate file structure and descriptor. */ 496 error = fd_allocfile(&fp, &mqd); 497 if (error) { 498 kmem_free(name, MQ_NAMELEN); 499 return error; 500 } 501 502 /* Account and check for the limit. */ 503 if (atomic_inc_uint_nv(&p->p_mqueue_cnt) > mq_open_max) { 504 atomic_dec_uint(&p->p_mqueue_cnt); 505 error = EMFILE; 506 goto err; 507 } 508 509 fp->f_type = DTYPE_MQUEUE; 510 fp->f_flag = FFLAGS(oflag) & (FREAD | FWRITE); 511 fp->f_ops = &mqops; 512 513 if (oflag & O_CREAT) { 514 /* Create a new message queue. */ 515 error = mqueue_create(l, name, attr, mode, oflag, &mq_new); 516 if (error) { 517 goto err; 518 } 519 KASSERT(mq_new != NULL); 520 } 521 522 /* Lookup for a message queue with such name. */ 523 mutex_enter(&mqlist_lock); 524 mq = mqueue_lookup(name); 525 if (mq) { 526 KASSERT(mutex_owned(&mq->mq_mtx)); 527 mutex_exit(&mqlist_lock); 528 529 /* Check for exclusive create. */ 530 if (oflag & O_EXCL) { 531 mutex_exit(&mq->mq_mtx); 532 error = EEXIST; 533 goto err; 534 } 535 536 /* Verify permissions. */ 537 if (mqueue_access(mq, fp->f_flag, l->l_cred) != 0) { 538 mutex_exit(&mq->mq_mtx); 539 error = EACCES; 540 goto err; 541 } 542 543 /* If we have the access, add a new reference. */ 544 mq->mq_refcnt++; 545 mutex_exit(&mq->mq_mtx); 546 } else { 547 /* Fail if not found and not creating. */ 548 if ((oflag & O_CREAT) == 0) { 549 mutex_exit(&mqlist_lock); 550 KASSERT(mq_new == NULL); 551 error = ENOENT; 552 goto err; 553 } 554 555 /* Initial timestamps. */ 556 mq = mq_new; 557 getnanotime(&mq->mq_btime); 558 mq->mq_atime = mq->mq_mtime = mq->mq_btime; 559 560 /* 561 * Finally, insert message queue into the list. 562 * Note: it already has the initial reference. 563 */ 564 LIST_INSERT_HEAD(&mqueue_head, mq, mq_list); 565 mutex_exit(&mqlist_lock); 566 567 mq_new = NULL; 568 name = NULL; 569 } 570 KASSERT(mq != NULL); 571 fp->f_mqueue = mq; 572 fd_affix(p, fp, mqd); 573 *retval = mqd; 574 err: 575 if (error) { 576 fd_abort(p, fp, mqd); 577 } 578 if (mq_new) { 579 /* Note: will free the 'name'. */ 580 mqueue_destroy(mq_new); 581 } else if (name) { 582 kmem_free(name, MQ_NAMELEN); 583 } 584 return error; 585 } 586 587 /* 588 * General mqueue system calls. 589 */ 590 591 int 592 sys_mq_open(struct lwp *l, const struct sys_mq_open_args *uap, 593 register_t *retval) 594 { 595 /* { 596 syscallarg(const char *) name; 597 syscallarg(int) oflag; 598 syscallarg(mode_t) mode; 599 syscallarg(struct mq_attr) attr; 600 } */ 601 struct mq_attr *attr = NULL, a; 602 int error; 603 604 if ((SCARG(uap, oflag) & O_EXEC) != 0) 605 return EINVAL; 606 607 if ((SCARG(uap, oflag) & O_CREAT) != 0 && SCARG(uap, attr) != NULL) { 608 error = copyin(SCARG(uap, attr), &a, sizeof(a)); 609 if (error) 610 return error; 611 attr = &a; 612 } 613 614 return mq_handle_open(l, SCARG(uap, name), SCARG(uap, oflag), 615 SCARG(uap, mode), attr, retval); 616 } 617 618 int 619 sys_mq_close(struct lwp *l, const struct sys_mq_close_args *uap, 620 register_t *retval) 621 { 622 623 return sys_close(l, (const void *)uap, retval); 624 } 625 626 /* 627 * Primary mq_recv1() function. 628 */ 629 int 630 mq_recv1(mqd_t mqdes, void *msg_ptr, size_t msg_len, u_int *msg_prio, 631 struct timespec *ts, ssize_t *mlen) 632 { 633 struct mqueue *mq; 634 struct mq_msg *msg = NULL; 635 struct mq_attr *mqattr; 636 u_int idx; 637 int error; 638 639 error = mqueue_get(mqdes, FREAD, &mq); 640 if (error) { 641 return error; 642 } 643 getnanotime(&mq->mq_atime); 644 mqattr = &mq->mq_attrib; 645 646 /* Check the message size limits */ 647 if (msg_len < mqattr->mq_msgsize) { 648 error = EMSGSIZE; 649 goto error; 650 } 651 652 /* Check if queue is empty */ 653 while (mqattr->mq_curmsgs == 0) { 654 int t; 655 656 if (mqattr->mq_flags & O_NONBLOCK) { 657 error = EAGAIN; 658 goto error; 659 } 660 if (ts) { 661 error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, ts, &t, 662 NULL); 663 if (error) 664 goto error; 665 } else 666 t = 0; 667 /* 668 * Block until someone sends the message. 669 * While doing this, notification should not be sent. 670 */ 671 mqattr->mq_flags |= MQ_RECEIVE; 672 error = cv_timedwait_sig(&mq->mq_send_cv, &mq->mq_mtx, t); 673 mqattr->mq_flags &= ~MQ_RECEIVE; 674 if (error || (mqattr->mq_flags & MQ_UNLINKED)) { 675 error = (error == EWOULDBLOCK) ? ETIMEDOUT : EINTR; 676 goto error; 677 } 678 } 679 680 /* 681 * Find the highest priority message, and remove it from the queue. 682 * At first, reserved queue is checked, bitmap is next. 683 */ 684 msg = TAILQ_FIRST(&mq->mq_head[MQ_PQRESQ]); 685 if (__predict_true(msg == NULL)) { 686 idx = ffs(mq->mq_bitmap); 687 msg = TAILQ_FIRST(&mq->mq_head[idx]); 688 KASSERT(msg != NULL); 689 } else { 690 idx = MQ_PQRESQ; 691 } 692 TAILQ_REMOVE(&mq->mq_head[idx], msg, msg_queue); 693 694 /* Unmark the bit, if last message. */ 695 if (__predict_true(idx) && TAILQ_EMPTY(&mq->mq_head[idx])) { 696 KASSERT((MQ_PQSIZE - idx) == msg->msg_prio); 697 mq->mq_bitmap &= ~(1U << --idx); 698 } 699 700 /* Decrement the counter and signal waiter, if any */ 701 mqattr->mq_curmsgs--; 702 cv_signal(&mq->mq_recv_cv); 703 704 /* Ready for sending now */ 705 selnotify(&mq->mq_wsel, POLLOUT | POLLWRNORM, 0); 706 error: 707 mutex_exit(&mq->mq_mtx); 708 fd_putfile((int)mqdes); 709 if (error) 710 return error; 711 712 /* 713 * Copy the data to the user-space. 714 * Note: According to POSIX, no message should be removed from the 715 * queue in case of fail - this would be violated. 716 */ 717 *mlen = msg->msg_len; 718 error = copyout(msg->msg_ptr, msg_ptr, msg->msg_len); 719 if (error == 0 && msg_prio) 720 error = copyout(&msg->msg_prio, msg_prio, sizeof(unsigned)); 721 mqueue_freemsg(msg, sizeof(struct mq_msg) + msg->msg_len); 722 723 return error; 724 } 725 726 int 727 sys_mq_receive(struct lwp *l, const struct sys_mq_receive_args *uap, 728 register_t *retval) 729 { 730 /* { 731 syscallarg(mqd_t) mqdes; 732 syscallarg(char *) msg_ptr; 733 syscallarg(size_t) msg_len; 734 syscallarg(unsigned *) msg_prio; 735 } */ 736 ssize_t mlen; 737 int error; 738 739 error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 740 SCARG(uap, msg_len), SCARG(uap, msg_prio), NULL, &mlen); 741 if (error == 0) 742 *retval = mlen; 743 744 return error; 745 } 746 747 int 748 sys___mq_timedreceive50(struct lwp *l, 749 const struct sys___mq_timedreceive50_args *uap, register_t *retval) 750 { 751 /* { 752 syscallarg(mqd_t) mqdes; 753 syscallarg(char *) msg_ptr; 754 syscallarg(size_t) msg_len; 755 syscallarg(unsigned *) msg_prio; 756 syscallarg(const struct timespec *) abs_timeout; 757 } */ 758 struct timespec ts, *tsp; 759 ssize_t mlen; 760 int error; 761 762 /* Get and convert time value */ 763 if (SCARG(uap, abs_timeout)) { 764 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 765 if (error) 766 return error; 767 tsp = &ts; 768 } else { 769 tsp = NULL; 770 } 771 772 error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 773 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp, &mlen); 774 if (error == 0) 775 *retval = mlen; 776 777 return error; 778 } 779 780 /* 781 * Primary mq_send1() function. 782 */ 783 int 784 mq_send1(mqd_t mqdes, const char *msg_ptr, size_t msg_len, u_int msg_prio, 785 struct timespec *ts) 786 { 787 struct mqueue *mq; 788 struct mq_msg *msg; 789 struct mq_attr *mqattr; 790 struct proc *notify = NULL; 791 ksiginfo_t ksi; 792 size_t size; 793 int error; 794 795 /* Check the priority range */ 796 if (msg_prio >= mq_prio_max) 797 return EINVAL; 798 799 /* Allocate a new message */ 800 if (msg_len > mq_max_msgsize) 801 return EMSGSIZE; 802 size = sizeof(struct mq_msg) + msg_len; 803 if (size > mq_max_msgsize) 804 return EMSGSIZE; 805 806 if (size > MQ_DEF_MSGSIZE) { 807 msg = kmem_alloc(size, KM_SLEEP); 808 } else { 809 msg = pool_cache_get(mqmsg_cache, PR_WAITOK); 810 } 811 812 /* Get the data from user-space */ 813 error = copyin(msg_ptr, msg->msg_ptr, msg_len); 814 if (error) { 815 mqueue_freemsg(msg, size); 816 return error; 817 } 818 msg->msg_len = msg_len; 819 msg->msg_prio = msg_prio; 820 821 error = mqueue_get(mqdes, FWRITE, &mq); 822 if (error) { 823 mqueue_freemsg(msg, size); 824 return error; 825 } 826 getnanotime(&mq->mq_mtime); 827 mqattr = &mq->mq_attrib; 828 829 /* Check the message size limit */ 830 if (msg_len <= 0 || msg_len > mqattr->mq_msgsize) { 831 error = EMSGSIZE; 832 goto error; 833 } 834 835 /* Check if queue is full */ 836 while (mqattr->mq_curmsgs >= mqattr->mq_maxmsg) { 837 int t; 838 839 if (mqattr->mq_flags & O_NONBLOCK) { 840 error = EAGAIN; 841 goto error; 842 } 843 if (ts) { 844 error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, ts, &t, 845 NULL); 846 if (error) 847 goto error; 848 } else 849 t = 0; 850 /* Block until queue becomes available */ 851 error = cv_timedwait_sig(&mq->mq_recv_cv, &mq->mq_mtx, t); 852 if (error || (mqattr->mq_flags & MQ_UNLINKED)) { 853 error = (error == EWOULDBLOCK) ? ETIMEDOUT : error; 854 goto error; 855 } 856 } 857 KASSERT(mqattr->mq_curmsgs < mqattr->mq_maxmsg); 858 859 /* 860 * Insert message into the queue, according to the priority. 861 * Note the difference between index and priority. 862 */ 863 if (__predict_true(msg_prio < MQ_PQSIZE)) { 864 u_int idx = MQ_PQSIZE - msg_prio; 865 866 KASSERT(idx != MQ_PQRESQ); 867 TAILQ_INSERT_TAIL(&mq->mq_head[idx], msg, msg_queue); 868 mq->mq_bitmap |= (1U << --idx); 869 } else { 870 mqueue_linear_insert(mq, msg); 871 } 872 873 /* Check for the notify */ 874 if (mqattr->mq_curmsgs == 0 && mq->mq_notify_proc && 875 (mqattr->mq_flags & MQ_RECEIVE) == 0 && 876 mq->mq_sig_notify.sigev_notify == SIGEV_SIGNAL) { 877 /* Initialize the signal */ 878 KSI_INIT(&ksi); 879 ksi.ksi_signo = mq->mq_sig_notify.sigev_signo; 880 ksi.ksi_code = SI_MESGQ; 881 ksi.ksi_value = mq->mq_sig_notify.sigev_value; 882 /* Unregister the process */ 883 notify = mq->mq_notify_proc; 884 mq->mq_notify_proc = NULL; 885 } 886 887 /* Increment the counter and signal waiter, if any */ 888 mqattr->mq_curmsgs++; 889 cv_signal(&mq->mq_send_cv); 890 891 /* Ready for receiving now */ 892 selnotify(&mq->mq_rsel, POLLIN | POLLRDNORM, 0); 893 error: 894 mutex_exit(&mq->mq_mtx); 895 fd_putfile((int)mqdes); 896 897 if (error) { 898 mqueue_freemsg(msg, size); 899 } else if (notify) { 900 /* Send the notify, if needed */ 901 mutex_enter(&proc_lock); 902 kpsignal(notify, &ksi, NULL); 903 mutex_exit(&proc_lock); 904 } 905 return error; 906 } 907 908 int 909 sys_mq_send(struct lwp *l, const struct sys_mq_send_args *uap, 910 register_t *retval) 911 { 912 /* { 913 syscallarg(mqd_t) mqdes; 914 syscallarg(const char *) msg_ptr; 915 syscallarg(size_t) msg_len; 916 syscallarg(unsigned) msg_prio; 917 } */ 918 919 return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 920 SCARG(uap, msg_len), SCARG(uap, msg_prio), NULL); 921 } 922 923 int 924 sys___mq_timedsend50(struct lwp *l, const struct sys___mq_timedsend50_args *uap, 925 register_t *retval) 926 { 927 /* { 928 syscallarg(mqd_t) mqdes; 929 syscallarg(const char *) msg_ptr; 930 syscallarg(size_t) msg_len; 931 syscallarg(unsigned) msg_prio; 932 syscallarg(const struct timespec *) abs_timeout; 933 } */ 934 struct timespec ts, *tsp; 935 int error; 936 937 /* Get and convert time value */ 938 if (SCARG(uap, abs_timeout)) { 939 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 940 if (error) 941 return error; 942 tsp = &ts; 943 } else { 944 tsp = NULL; 945 } 946 947 return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr), 948 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp); 949 } 950 951 int 952 sys_mq_notify(struct lwp *l, const struct sys_mq_notify_args *uap, 953 register_t *retval) 954 { 955 /* { 956 syscallarg(mqd_t) mqdes; 957 syscallarg(const struct sigevent *) notification; 958 } */ 959 struct mqueue *mq; 960 struct sigevent sig; 961 int error; 962 963 if (SCARG(uap, notification)) { 964 /* Get the signal from user-space */ 965 error = copyin(SCARG(uap, notification), &sig, 966 sizeof(struct sigevent)); 967 if (error) 968 return error; 969 if (sig.sigev_notify == SIGEV_SIGNAL && 970 (sig.sigev_signo <=0 || sig.sigev_signo >= NSIG)) 971 return EINVAL; 972 } 973 974 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 975 if (error) { 976 return error; 977 } 978 if (SCARG(uap, notification)) { 979 /* Register notification: set the signal and target process */ 980 if (mq->mq_notify_proc == NULL) { 981 memcpy(&mq->mq_sig_notify, &sig, 982 sizeof(struct sigevent)); 983 mq->mq_notify_proc = l->l_proc; 984 } else { 985 /* Fail if someone else already registered */ 986 error = EBUSY; 987 } 988 } else { 989 /* Unregister the notification */ 990 mq->mq_notify_proc = NULL; 991 } 992 mutex_exit(&mq->mq_mtx); 993 fd_putfile((int)SCARG(uap, mqdes)); 994 995 return error; 996 } 997 998 int 999 sys_mq_getattr(struct lwp *l, const struct sys_mq_getattr_args *uap, 1000 register_t *retval) 1001 { 1002 /* { 1003 syscallarg(mqd_t) mqdes; 1004 syscallarg(struct mq_attr *) mqstat; 1005 } */ 1006 struct mqueue *mq; 1007 struct mq_attr attr; 1008 int error; 1009 1010 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 1011 if (error) { 1012 return error; 1013 } 1014 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1015 mutex_exit(&mq->mq_mtx); 1016 fd_putfile((int)SCARG(uap, mqdes)); 1017 1018 return copyout(&attr, SCARG(uap, mqstat), sizeof(struct mq_attr)); 1019 } 1020 1021 int 1022 sys_mq_setattr(struct lwp *l, const struct sys_mq_setattr_args *uap, 1023 register_t *retval) 1024 { 1025 /* { 1026 syscallarg(mqd_t) mqdes; 1027 syscallarg(const struct mq_attr *) mqstat; 1028 syscallarg(struct mq_attr *) omqstat; 1029 } */ 1030 struct mqueue *mq; 1031 struct mq_attr attr; 1032 int error, nonblock; 1033 1034 error = copyin(SCARG(uap, mqstat), &attr, sizeof(struct mq_attr)); 1035 if (error) 1036 return error; 1037 nonblock = (attr.mq_flags & O_NONBLOCK); 1038 1039 error = mqueue_get(SCARG(uap, mqdes), 0, &mq); 1040 if (error) { 1041 return error; 1042 } 1043 1044 /* Copy the old attributes, if needed */ 1045 if (SCARG(uap, omqstat)) { 1046 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1047 } 1048 1049 /* Ignore everything, except O_NONBLOCK */ 1050 if (nonblock) 1051 mq->mq_attrib.mq_flags |= O_NONBLOCK; 1052 else 1053 mq->mq_attrib.mq_flags &= ~O_NONBLOCK; 1054 1055 mutex_exit(&mq->mq_mtx); 1056 fd_putfile((int)SCARG(uap, mqdes)); 1057 1058 /* 1059 * Copy the data to the user-space. 1060 * Note: According to POSIX, the new attributes should not be set in 1061 * case of fail - this would be violated. 1062 */ 1063 if (SCARG(uap, omqstat)) 1064 error = copyout(&attr, SCARG(uap, omqstat), 1065 sizeof(struct mq_attr)); 1066 1067 return error; 1068 } 1069 1070 int 1071 sys_mq_unlink(struct lwp *l, const struct sys_mq_unlink_args *uap, 1072 register_t *retval) 1073 { 1074 /* { 1075 syscallarg(const char *) name; 1076 } */ 1077 mqueue_t *mq; 1078 char *name; 1079 int error, refcnt = 0; 1080 1081 /* Get the name from the user-space */ 1082 name = kmem_alloc(MQ_NAMELEN, KM_SLEEP); 1083 error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL); 1084 if (error) { 1085 kmem_free(name, MQ_NAMELEN); 1086 return error; 1087 } 1088 1089 mutex_enter(&mqlist_lock); 1090 mq = mqueue_lookup(name); 1091 if (mq == NULL) { 1092 error = ENOENT; 1093 goto err; 1094 } 1095 KASSERT(mutex_owned(&mq->mq_mtx)); 1096 1097 /* Verify permissions. */ 1098 if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MQUEUE, 0, mq, 1099 NULL, NULL)) { 1100 mutex_exit(&mq->mq_mtx); 1101 error = EACCES; 1102 goto err; 1103 } 1104 1105 /* Remove and destroy if no references. */ 1106 LIST_REMOVE(mq, mq_list); 1107 refcnt = mq->mq_refcnt; 1108 if (refcnt) { 1109 /* Mark as unlinked, if there are references. */ 1110 mq->mq_attrib.mq_flags |= MQ_UNLINKED; 1111 } 1112 1113 /* Wake up waiters, if there are any. */ 1114 cv_broadcast(&mq->mq_send_cv); 1115 cv_broadcast(&mq->mq_recv_cv); 1116 1117 selnotify(&mq->mq_rsel, POLLHUP, 0); 1118 selnotify(&mq->mq_wsel, POLLHUP, 0); 1119 1120 mutex_exit(&mq->mq_mtx); 1121 err: 1122 mutex_exit(&mqlist_lock); 1123 /* 1124 * If last reference - destroy the message queue. Otherwise, 1125 * the last mq_close() call will do that. 1126 */ 1127 if (!error && refcnt == 0) { 1128 mqueue_destroy(mq); 1129 } 1130 kmem_free(name, MQ_NAMELEN); 1131 1132 return error; 1133 } 1134 1135 /* 1136 * System control nodes. 1137 */ 1138 SYSCTL_SETUP(mqueue_sysctl_init, "mqueue systl") 1139 { 1140 const struct sysctlnode *node = NULL; 1141 1142 sysctl_createv(clog, 0, NULL, NULL, 1143 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 1144 CTLTYPE_INT, "posix_msg", 1145 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 1146 "Message Passing option to which the " 1147 "system attempts to conform"), 1148 NULL, _POSIX_MESSAGE_PASSING, NULL, 0, 1149 CTL_KERN, CTL_CREATE, CTL_EOL); 1150 sysctl_createv(clog, 0, NULL, &node, 1151 CTLFLAG_PERMANENT, 1152 CTLTYPE_NODE, "mqueue", 1153 SYSCTL_DESCR("Message queue options"), 1154 NULL, 0, NULL, 0, 1155 CTL_KERN, CTL_CREATE, CTL_EOL); 1156 1157 if (node == NULL) 1158 return; 1159 1160 sysctl_createv(clog, 0, &node, NULL, 1161 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1162 CTLTYPE_INT, "mq_open_max", 1163 SYSCTL_DESCR("Maximal number of message queue descriptors " 1164 "that process could open"), 1165 NULL, 0, &mq_open_max, 0, 1166 CTL_CREATE, CTL_EOL); 1167 sysctl_createv(clog, 0, &node, NULL, 1168 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1169 CTLTYPE_INT, "mq_prio_max", 1170 SYSCTL_DESCR("Maximal priority of the message"), 1171 NULL, 0, &mq_prio_max, 0, 1172 CTL_CREATE, CTL_EOL); 1173 sysctl_createv(clog, 0, &node, NULL, 1174 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1175 CTLTYPE_INT, "mq_max_msgsize", 1176 SYSCTL_DESCR("Maximal allowed size of the message"), 1177 NULL, 0, &mq_max_msgsize, 0, 1178 CTL_CREATE, CTL_EOL); 1179 sysctl_createv(clog, 0, &node, NULL, 1180 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1181 CTLTYPE_INT, "mq_def_maxmsg", 1182 SYSCTL_DESCR("Default maximal message count"), 1183 NULL, 0, &mq_def_maxmsg, 0, 1184 CTL_CREATE, CTL_EOL); 1185 sysctl_createv(clog, 0, &node, NULL, 1186 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1187 CTLTYPE_INT, "mq_max_maxmsg", 1188 SYSCTL_DESCR("Maximal allowed message count"), 1189 NULL, 0, &mq_max_maxmsg, 0, 1190 CTL_CREATE, CTL_EOL); 1191 1192 return; 1193 } 1194 1195 /* 1196 * Debugging. 1197 */ 1198 #if defined(DDB) 1199 1200 void 1201 mqueue_print_list(void (*pr)(const char *, ...)) 1202 { 1203 struct mqueue *mq; 1204 1205 (*pr)("Global list of the message queues:\n"); 1206 (*pr)("%20s %10s %8s %8s %3s %4s %4s %4s\n", 1207 "Name", "Ptr", "Mode", "Flags", "Ref", 1208 "MaxMsg", "MsgSze", "CurMsg"); 1209 LIST_FOREACH(mq, &mqueue_head, mq_list) { 1210 (*pr)("%20s %10p %8x %8x %3u %6lu %6lu %6lu\n", 1211 mq->mq_name, mq, mq->mq_mode, 1212 mq->mq_attrib.mq_flags, mq->mq_refcnt, 1213 mq->mq_attrib.mq_maxmsg, mq->mq_attrib.mq_msgsize, 1214 mq->mq_attrib.mq_curmsgs); 1215 } 1216 } 1217 1218 #endif /* defined(DDB) */ 1219