1 /* $NetBSD: sys_mqueue.c,v 1.16 2009/04/11 23:05:26 christos Exp $ */ 2 3 /* 4 * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Implementation of POSIX message queues. 31 * Defined in the Base Definitions volume of IEEE Std 1003.1-2001. 32 * 33 * Locking 34 * 35 * Global list of message queues (mqueue_head) and proc_t::p_mqueue_cnt 36 * counter are protected by mqlist_mtx lock. The very message queue and 37 * its members are protected by mqueue::mq_mtx. 38 * 39 * Lock order: 40 * mqlist_mtx 41 * -> mqueue::mq_mtx 42 */ 43 44 #include <stdbool.h> 45 #include <sys/param.h> 46 #include <sys/types.h> 47 #include <sys/errno.h> 48 #include <sys/fcntl.h> 49 #include <sys/file.h> 50 #include <sys/filedesc.h> 51 #include <sys/ucred.h> 52 #include <sys/priv.h> 53 #include <sys/kernel.h> 54 #include <sys/malloc.h> 55 #include <sys/mqueue.h> 56 #include <sys/objcache.h> 57 #include <sys/poll.h> 58 #include <sys/proc.h> 59 #include <sys/queue.h> 60 #include <sys/select.h> 61 #include <sys/serialize.h> 62 #include <sys/signal.h> 63 #include <sys/signalvar.h> 64 #include <sys/spinlock.h> 65 #include <sys/spinlock2.h> 66 #include <sys/stat.h> 67 #include <sys/sysctl.h> 68 #include <sys/sysproto.h> 69 #include <sys/systm.h> 70 #include <sys/lock.h> 71 #include <sys/unistd.h> 72 #include <sys/vnode.h> 73 74 /* System-wide limits. */ 75 static u_int mq_open_max = MQ_OPEN_MAX; 76 static u_int mq_prio_max = MQ_PRIO_MAX; 77 static u_int mq_max_msgsize = 16 * MQ_DEF_MSGSIZE; 78 static u_int mq_def_maxmsg = 32; 79 static u_int mq_max_maxmsg = 16 * 32; 80 81 struct lock mqlist_mtx; 82 static struct objcache * mqmsg_cache; 83 static LIST_HEAD(, mqueue) mqueue_head = 84 LIST_HEAD_INITIALIZER(mqueue_head); 85 86 typedef struct file file_t; /* XXX: Should we put this in sys/types.h ? */ 87 88 /* Function prototypes */ 89 static int mq_poll_fop(file_t *, int, struct ucred *cred); 90 static int mq_stat_fop(file_t *, struct stat *, struct ucred *cred); 91 static int mq_close_fop(file_t *); 92 93 /* Some time-related utility functions */ 94 static int itimespecfix(struct timespec *ts); 95 static int tstohz(const struct timespec *ts); 96 97 /* File operations vector */ 98 static struct fileops mqops = { 99 .fo_read = badfo_readwrite, 100 .fo_write = badfo_readwrite, 101 .fo_ioctl = badfo_ioctl, 102 .fo_poll = mq_poll_fop, 103 .fo_stat = mq_stat_fop, 104 .fo_close = mq_close_fop, 105 .fo_kqfilter = badfo_kqfilter, 106 .fo_shutdown = badfo_shutdown 107 }; 108 109 /* Define a new malloc type for message queues */ 110 MALLOC_DECLARE(M_MQBUF); 111 MALLOC_DEFINE(M_MQBUF, "mqueues", "Buffers to message queues"); 112 113 /* Malloc arguments for object cache */ 114 struct objcache_malloc_args mqueue_malloc_args = { 115 sizeof(struct mqueue), M_MQBUF }; 116 117 /* Spinlock around the process list */ 118 extern struct spinlock allproc_spin; 119 120 /* 121 * Initialize POSIX message queue subsystem. 122 */ 123 void 124 mqueue_sysinit(void) 125 { 126 mqmsg_cache = objcache_create("mqmsg_cache", 127 0, /* infinite depot's capacity */ 128 0, /* default magazine's capacity */ 129 NULL, /* constructor */ 130 NULL, /* deconstructor */ 131 NULL, 132 objcache_malloc_alloc, 133 objcache_malloc_free, 134 &mqueue_malloc_args); 135 136 lockinit(&mqlist_mtx, "mqlist_mtx", 0, LK_CANRECURSE); 137 } 138 139 /* 140 * Free the message. 141 */ 142 static void 143 mqueue_freemsg(struct mq_msg *msg, const size_t size) 144 { 145 146 if (size > MQ_DEF_MSGSIZE) { 147 kfree(msg, M_MQBUF); 148 } else { 149 objcache_put(mqmsg_cache, msg); 150 } 151 } 152 153 /* 154 * Destroy the message queue. 155 */ 156 static void 157 mqueue_destroy(struct mqueue *mq) 158 { 159 struct mq_msg *msg; 160 size_t msz; 161 u_int i; 162 163 /* Note MQ_PQSIZE + 1. */ 164 for (i = 0; i < MQ_PQSIZE + 1; i++) { 165 while ((msg = TAILQ_FIRST(&mq->mq_head[i])) != NULL) { 166 TAILQ_REMOVE(&mq->mq_head[i], msg, msg_queue); 167 msz = sizeof(struct mq_msg) + msg->msg_len; 168 mqueue_freemsg(msg, msz); 169 } 170 } 171 lockuninit(&mq->mq_mtx); 172 kfree(mq, M_MQBUF); 173 } 174 175 /* 176 * Lookup for file name in general list of message queues. 177 * => locks the message queue 178 */ 179 static void * 180 mqueue_lookup(char *name) 181 { 182 struct mqueue *mq; 183 184 KKASSERT(lockstatus(&mqlist_mtx, curthread)); 185 186 LIST_FOREACH(mq, &mqueue_head, mq_list) { 187 if (strncmp(mq->mq_name, name, MQ_NAMELEN) == 0) { 188 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 189 return mq; 190 } 191 } 192 193 return NULL; 194 } 195 196 /* 197 * mqueue_get: get the mqueue from the descriptor. 198 * => locks the message queue, if found. 199 * => holds a reference on the file descriptor. 200 */ 201 static int 202 mqueue_get(struct lwp *l, mqd_t mqd, file_t **fpr) 203 { 204 struct mqueue *mq; 205 file_t *fp; 206 207 fp = holdfp(curproc->p_fd, (int)mqd, -1); /* XXX: Why -1 ? */ 208 if (__predict_false(fp == NULL)) 209 return EBADF; 210 211 if (__predict_false(fp->f_type != DTYPE_MQUEUE)) { 212 fdrop(fp); 213 return EBADF; 214 } 215 mq = fp->f_data; 216 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 217 218 *fpr = fp; 219 return 0; 220 } 221 222 /* 223 * mqueue_linear_insert: perform linear insert according to the message 224 * priority into the reserved queue (MQ_PQRESQ). Reserved queue is a 225 * sorted list used only when mq_prio_max is increased via sysctl. 226 */ 227 static inline void 228 mqueue_linear_insert(struct mqueue *mq, struct mq_msg *msg) 229 { 230 struct mq_msg *mit; 231 232 TAILQ_FOREACH(mit, &mq->mq_head[MQ_PQRESQ], msg_queue) { 233 if (msg->msg_prio > mit->msg_prio) 234 break; 235 } 236 if (mit == NULL) { 237 TAILQ_INSERT_TAIL(&mq->mq_head[MQ_PQRESQ], msg, msg_queue); 238 } else { 239 TAILQ_INSERT_BEFORE(mit, msg, msg_queue); 240 } 241 } 242 243 /* 244 * Validate input. 245 */ 246 int 247 itimespecfix(struct timespec *ts) 248 { 249 if (ts->tv_sec < 0 || ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000) 250 return (EINVAL); 251 if (ts->tv_sec == 0 && ts->tv_nsec != 0 && ts->tv_nsec < nstick) 252 ts->tv_nsec = nstick; 253 return (0); 254 } 255 256 /* 257 * Compute number of ticks in the specified amount of time. 258 */ 259 int 260 tstohz(const struct timespec *ts) 261 { 262 struct timeval tv; 263 264 /* 265 * usec has great enough resolution for hz, so convert to a 266 * timeval and use tvtohz() above. 267 */ 268 TIMESPEC_TO_TIMEVAL(&tv, ts); 269 return tvtohz_high(&tv); /* XXX Why _high() and not _low() ? */ 270 } 271 272 /* 273 * Converter from struct timespec to the ticks. 274 * Used by mq_timedreceive(), mq_timedsend(). 275 */ 276 int 277 abstimeout2timo(struct timespec *ts, int *timo) 278 { 279 struct timespec tsd; 280 int error; 281 282 error = itimespecfix(ts); 283 if (error) { 284 return error; 285 } 286 getnanotime(&tsd); 287 timespecsub(ts, &tsd); 288 if (ts->tv_sec < 0 || (ts->tv_sec == 0 && ts->tv_nsec <= 0)) { 289 return ETIMEDOUT; 290 } 291 *timo = tstohz(ts); 292 KKASSERT(*timo != 0); 293 294 return 0; 295 } 296 297 static int 298 mq_stat_fop(file_t *fp, struct stat *st, struct ucred *cred) 299 { 300 struct mqueue *mq = fp->f_data; 301 302 (void)memset(st, 0, sizeof(*st)); 303 304 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 305 st->st_mode = mq->mq_mode; 306 st->st_uid = mq->mq_euid; 307 st->st_gid = mq->mq_egid; 308 st->st_atimespec = mq->mq_atime; 309 st->st_mtimespec = mq->mq_mtime; 310 /*st->st_ctimespec = st->st_birthtimespec = mq->mq_btime;*/ 311 st->st_uid = fp->f_cred->cr_uid; 312 st->st_gid = fp->f_cred->cr_svgid; 313 lockmgr(&mq->mq_mtx, LK_RELEASE); 314 315 return 0; 316 } 317 318 static int 319 mq_poll_fop(file_t *fp, int events, struct ucred *cred) 320 { 321 struct mqueue *mq = fp->f_data; 322 struct mq_attr *mqattr; 323 int revents = 0; 324 325 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 326 mqattr = &mq->mq_attrib; 327 if (events & (POLLIN | POLLRDNORM)) { 328 /* Ready for receiving, if there are messages in the queue */ 329 if (mqattr->mq_curmsgs) 330 revents |= (POLLIN | POLLRDNORM); 331 else 332 selrecord(curthread, &mq->mq_rsel); 333 } 334 if (events & (POLLOUT | POLLWRNORM)) { 335 /* Ready for sending, if the message queue is not full */ 336 if (mqattr->mq_curmsgs < mqattr->mq_maxmsg) 337 revents |= (POLLOUT | POLLWRNORM); 338 else 339 selrecord(curthread, &mq->mq_wsel); 340 } 341 lockmgr(&mq->mq_mtx, LK_RELEASE); 342 343 return revents; 344 } 345 346 static int 347 mq_close_fop(file_t *fp) 348 { 349 struct proc *p = curproc; 350 struct mqueue *mq = fp->f_data; 351 bool destroy; 352 353 lockmgr(&mqlist_mtx, LK_EXCLUSIVE); 354 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 355 356 /* Decrease the counters */ 357 p->p_mqueue_cnt--; 358 mq->mq_refcnt--; 359 360 /* Remove notification if registered for this process */ 361 if (mq->mq_notify_proc == p) 362 mq->mq_notify_proc = NULL; 363 364 /* 365 * If this is the last reference and mqueue is marked for unlink, 366 * remove and later destroy the message queue. 367 */ 368 if (mq->mq_refcnt == 0 && (mq->mq_attrib.mq_flags & MQ_UNLINK)) { 369 LIST_REMOVE(mq, mq_list); 370 destroy = true; 371 } else 372 destroy = false; 373 374 lockmgr(&mq->mq_mtx, LK_RELEASE); 375 lockmgr(&mqlist_mtx, LK_RELEASE); 376 377 if (destroy) 378 mqueue_destroy(mq); 379 380 return 0; 381 } 382 383 /* 384 * General mqueue system calls. 385 */ 386 387 int 388 sys_mq_open(struct mq_open_args *uap) 389 { 390 /* { 391 syscallarg(const char *) name; 392 syscallarg(int) oflag; 393 syscallarg(mode_t) mode; 394 syscallarg(struct mq_attr) attr; 395 } */ 396 struct thread *td = curthread; 397 struct proc *p = td->td_proc; 398 struct filedesc *fdp = p->p_fd; 399 struct mqueue *mq, *mq_new = NULL; 400 file_t *fp; 401 char *name; 402 int mqd, error, oflag; 403 404 /* Check access mode flags */ 405 oflag = SCARG(uap, oflag); 406 if ((oflag & O_ACCMODE) == (O_WRONLY | O_RDWR)) { 407 return EINVAL; 408 } 409 410 /* Get the name from the user-space */ 411 name = kmalloc(MQ_NAMELEN, M_MQBUF, M_WAITOK | M_ZERO); 412 error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL); 413 if (error) { 414 kfree(name, M_MQBUF); 415 return error; 416 } 417 418 if (oflag & O_CREAT) { 419 struct mq_attr attr; 420 u_int i; 421 422 /* Check the limit */ 423 if (p->p_mqueue_cnt == mq_open_max) { 424 kfree(name, M_MQBUF); 425 return EMFILE; 426 } 427 428 /* Empty name is invalid */ 429 if (name[0] == '\0') { 430 kfree(name, M_MQBUF); 431 return EINVAL; 432 } 433 434 /* Check for mqueue attributes */ 435 if (SCARG(uap, attr)) { 436 error = copyin(SCARG(uap, attr), &attr, 437 sizeof(struct mq_attr)); 438 if (error) { 439 kfree(name, M_MQBUF); 440 return error; 441 } 442 if (attr.mq_maxmsg <= 0 || 443 attr.mq_maxmsg > mq_max_maxmsg || 444 attr.mq_msgsize <= 0 || 445 attr.mq_msgsize > mq_max_msgsize) { 446 kfree(name, M_MQBUF); 447 return EINVAL; 448 } 449 attr.mq_curmsgs = 0; 450 } else { 451 memset(&attr, 0, sizeof(struct mq_attr)); 452 attr.mq_maxmsg = mq_def_maxmsg; 453 attr.mq_msgsize = 454 MQ_DEF_MSGSIZE - sizeof(struct mq_msg); 455 } 456 457 /* 458 * Allocate new mqueue, initialize data structures, 459 * copy the name, attributes and set the flag. 460 */ 461 mq_new = kmalloc(sizeof(struct mqueue), M_MQBUF, M_WAITOK | M_ZERO); 462 463 lockinit(&mq_new->mq_mtx, "mq_new->mq_mtx", 0, LK_CANRECURSE); 464 for (i = 0; i < (MQ_PQSIZE + 1); i++) { 465 TAILQ_INIT(&mq_new->mq_head[i]); 466 } 467 468 strlcpy(mq_new->mq_name, name, MQ_NAMELEN); 469 memcpy(&mq_new->mq_attrib, &attr, sizeof(struct mq_attr)); 470 471 /*CTASSERT((O_MASK & (MQ_UNLINK | MQ_RECEIVE)) == 0);*/ 472 /* mq_new->mq_attrib.mq_flags = (O_MASK & oflag); */ 473 mq_new->mq_attrib.mq_flags = oflag; 474 475 /* Store mode and effective UID with GID */ 476 mq_new->mq_mode = ((SCARG(uap, mode) & 477 ~p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT; 478 mq_new->mq_euid = td->td_ucred->cr_uid; 479 mq_new->mq_egid = td->td_ucred->cr_svgid; 480 } 481 482 /* Allocate file structure and descriptor */ 483 error = falloc(td->td_lwp, &fp, &mqd); 484 if (error) { 485 if (mq_new) 486 mqueue_destroy(mq_new); 487 kfree(name, M_MQBUF); 488 return error; 489 } 490 fp->f_type = DTYPE_MQUEUE; 491 fp->f_flag = FFLAGS(oflag) & (FREAD | FWRITE); 492 fp->f_ops = &mqops; 493 494 /* Look up for mqueue with such name */ 495 lockmgr(&mqlist_mtx, LK_EXCLUSIVE); 496 mq = mqueue_lookup(name); 497 if (mq) { 498 int acc_mode; 499 500 KKASSERT(lockstatus(&mq->mq_mtx, curthread)); 501 502 /* Check if mqueue is not marked as unlinking */ 503 if (mq->mq_attrib.mq_flags & MQ_UNLINK) { 504 error = EACCES; 505 goto exit; 506 } 507 /* Fail if O_EXCL is set, and mqueue already exists */ 508 if ((oflag & O_CREAT) && (oflag & O_EXCL)) { 509 error = EEXIST; 510 goto exit; 511 } 512 513 /* 514 * Check the permissions. Note the difference between 515 * VREAD/VWRITE and FREAD/FWRITE. 516 */ 517 acc_mode = 0; 518 if (fp->f_flag & FREAD) { 519 acc_mode |= VREAD; 520 } 521 if (fp->f_flag & FWRITE) { 522 acc_mode |= VWRITE; 523 } 524 if (vaccess(VNON, mq->mq_mode, mq->mq_euid, mq->mq_egid, 525 acc_mode, td->td_ucred)) { 526 527 error = EACCES; 528 goto exit; 529 } 530 } else { 531 /* Fail if mqueue neither exists, nor we create it */ 532 if ((oflag & O_CREAT) == 0) { 533 lockmgr(&mqlist_mtx, LK_RELEASE); 534 KKASSERT(mq_new == NULL); 535 fsetfd(fdp, NULL, mqd); 536 fp->f_ops = &badfileops; 537 fdrop(fp); 538 kfree(name, M_MQBUF); 539 return ENOENT; 540 } 541 542 /* Check the limit */ 543 if (p->p_mqueue_cnt == mq_open_max) { 544 error = EMFILE; 545 goto exit; 546 } 547 548 /* Insert the queue to the list */ 549 mq = mq_new; 550 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 551 LIST_INSERT_HEAD(&mqueue_head, mq, mq_list); 552 mq_new = NULL; 553 getnanotime(&mq->mq_btime); 554 mq->mq_atime = mq->mq_mtime = mq->mq_btime; 555 } 556 557 /* Increase the counters, and make descriptor ready */ 558 p->p_mqueue_cnt++; 559 mq->mq_refcnt++; 560 fp->f_data = mq; 561 exit: 562 lockmgr(&mq->mq_mtx, LK_RELEASE); 563 lockmgr(&mqlist_mtx, LK_RELEASE); 564 565 if (mq_new) 566 mqueue_destroy(mq_new); 567 if (error) { 568 fsetfd(fdp, NULL, mqd); 569 fp->f_ops = &badfileops; 570 } else { 571 fsetfd(fdp, fp, mqd); 572 uap->sysmsg_result = mqd; 573 } 574 fdrop(fp); 575 kfree(name, M_MQBUF); 576 577 return error; 578 } 579 580 int 581 sys_mq_close(struct mq_close_args *uap) 582 { 583 return sys_close((void *)uap); 584 } 585 586 /* 587 * Primary mq_receive1() function. 588 */ 589 int 590 mq_receive1(struct lwp *l, mqd_t mqdes, void *msg_ptr, size_t msg_len, 591 unsigned *msg_prio, struct timespec *ts, ssize_t *mlen) 592 { 593 file_t *fp = NULL; 594 struct mqueue *mq; 595 struct mq_msg *msg = NULL; 596 struct mq_attr *mqattr; 597 u_int idx; 598 int error; 599 600 /* Get the message queue */ 601 error = mqueue_get(l, mqdes, &fp); 602 if (error) { 603 return error; 604 } 605 mq = fp->f_data; 606 if ((fp->f_flag & FREAD) == 0) { 607 error = EBADF; 608 goto error; 609 } 610 getnanotime(&mq->mq_atime); 611 mqattr = &mq->mq_attrib; 612 613 /* Check the message size limits */ 614 if (msg_len < mqattr->mq_msgsize) { 615 error = EMSGSIZE; 616 goto error; 617 } 618 619 /* Check if queue is empty */ 620 while (mqattr->mq_curmsgs == 0) { 621 int t; 622 623 if (mqattr->mq_flags & O_NONBLOCK) { 624 error = EAGAIN; 625 goto error; 626 } 627 if (ts) { 628 error = abstimeout2timo(ts, &t); 629 if (error) 630 goto error; 631 } else 632 t = 0; 633 /* 634 * Block until someone sends the message. 635 * While doing this, notification should not be sent. 636 */ 637 mqattr->mq_flags |= MQ_RECEIVE; 638 error = lksleep(&mq->mq_send_cv, &mq->mq_mtx, PCATCH, "mqsend", t); 639 mqattr->mq_flags &= ~MQ_RECEIVE; 640 if (error || (mqattr->mq_flags & MQ_UNLINK)) { 641 error = (error == EWOULDBLOCK) ? ETIMEDOUT : EINTR; 642 goto error; 643 } 644 } 645 646 647 /* 648 * Find the highest priority message, and remove it from the queue. 649 * At first, reserved queue is checked, bitmap is next. 650 */ 651 msg = TAILQ_FIRST(&mq->mq_head[MQ_PQRESQ]); 652 if (__predict_true(msg == NULL)) { 653 idx = ffs(mq->mq_bitmap); 654 msg = TAILQ_FIRST(&mq->mq_head[idx]); 655 KKASSERT(msg != NULL); 656 } else { 657 idx = MQ_PQRESQ; 658 } 659 TAILQ_REMOVE(&mq->mq_head[idx], msg, msg_queue); 660 661 /* Unmark the bit, if last message. */ 662 if (__predict_true(idx) && TAILQ_EMPTY(&mq->mq_head[idx])) { 663 KKASSERT((MQ_PQSIZE - idx) == msg->msg_prio); 664 mq->mq_bitmap &= ~(1 << --idx); 665 } 666 667 /* Decrement the counter and signal waiter, if any */ 668 mqattr->mq_curmsgs--; 669 wakeup_one(&mq->mq_recv_cv); 670 671 /* Ready for sending now */ 672 selwakeup(&mq->mq_wsel); 673 error: 674 lockmgr(&mq->mq_mtx, LK_RELEASE); 675 fdrop(fp); 676 if (error) 677 return error; 678 679 /* 680 * Copy the data to the user-space. 681 * Note: According to POSIX, no message should be removed from the 682 * queue in case of fail - this would be violated. 683 */ 684 *mlen = msg->msg_len; 685 error = copyout(msg->msg_ptr, msg_ptr, msg->msg_len); 686 if (error == 0 && msg_prio) 687 error = copyout(&msg->msg_prio, msg_prio, sizeof(unsigned)); 688 mqueue_freemsg(msg, sizeof(struct mq_msg) + msg->msg_len); 689 690 return error; 691 } 692 693 int 694 sys_mq_receive(struct mq_receive_args *uap) 695 { 696 /* { 697 syscallarg(mqd_t) mqdes; 698 syscallarg(char *) msg_ptr; 699 syscallarg(size_t) msg_len; 700 syscallarg(unsigned *) msg_prio; 701 } */ 702 ssize_t mlen; 703 int error; 704 705 error = mq_receive1(curthread->td_lwp, SCARG(uap, mqdes), SCARG(uap, msg_ptr), 706 SCARG(uap, msg_len), SCARG(uap, msg_prio), 0, &mlen); 707 if (error == 0) 708 uap->sysmsg_result = mlen; 709 710 return error; 711 } 712 713 int 714 sys_mq_timedreceive(struct mq_timedreceive_args *uap) 715 { 716 /* { 717 syscallarg(mqd_t) mqdes; 718 syscallarg(char *) msg_ptr; 719 syscallarg(size_t) msg_len; 720 syscallarg(unsigned *) msg_prio; 721 syscallarg(const struct timespec *) abs_timeout; 722 } */ 723 int error; 724 ssize_t mlen; 725 struct timespec ts, *tsp; 726 727 /* Get and convert time value */ 728 if (SCARG(uap, abs_timeout)) { 729 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 730 if (error) 731 return error; 732 tsp = &ts; 733 } else { 734 tsp = NULL; 735 } 736 737 error = mq_receive1(curthread->td_lwp, SCARG(uap, mqdes), SCARG(uap, msg_ptr), 738 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp, &mlen); 739 if (error == 0) 740 uap->sysmsg_result = mlen; 741 742 return error; 743 } 744 745 /* 746 * Primary mq_send1() function. 747 */ 748 int 749 mq_send1(struct lwp *l, mqd_t mqdes, const char *msg_ptr, size_t msg_len, 750 unsigned msg_prio, struct timespec *ts) 751 { 752 file_t *fp = NULL; 753 struct mqueue *mq; 754 struct mq_msg *msg; 755 struct mq_attr *mqattr; 756 struct proc *notify = NULL; 757 /*ksiginfo_t ksi;*/ 758 size_t size; 759 int error; 760 761 /* Check the priority range */ 762 if (msg_prio >= mq_prio_max) 763 return EINVAL; 764 765 /* Allocate a new message */ 766 size = sizeof(struct mq_msg) + msg_len; 767 if (size > mq_max_msgsize) 768 return EMSGSIZE; 769 770 if (size > MQ_DEF_MSGSIZE) { 771 msg = kmalloc(size, M_MQBUF, M_WAITOK); 772 } else { 773 msg = objcache_get(mqmsg_cache, M_WAITOK); 774 } 775 776 /* Get the data from user-space */ 777 error = copyin(msg_ptr, msg->msg_ptr, msg_len); 778 if (error) { 779 mqueue_freemsg(msg, size); 780 return error; 781 } 782 msg->msg_len = msg_len; 783 msg->msg_prio = msg_prio; 784 785 /* Get the mqueue */ 786 error = mqueue_get(l, mqdes, &fp); 787 if (error) { 788 mqueue_freemsg(msg, size); 789 return error; 790 } 791 mq = fp->f_data; 792 if ((fp->f_flag & FWRITE) == 0) { 793 error = EBADF; 794 goto error; 795 } 796 getnanotime(&mq->mq_mtime); 797 mqattr = &mq->mq_attrib; 798 799 /* Check the message size limit */ 800 if (msg_len <= 0 || msg_len > mqattr->mq_msgsize) { 801 error = EMSGSIZE; 802 goto error; 803 } 804 805 /* Check if queue is full */ 806 while (mqattr->mq_curmsgs >= mqattr->mq_maxmsg) { 807 int t; 808 809 if (mqattr->mq_flags & O_NONBLOCK) { 810 error = EAGAIN; 811 goto error; 812 } 813 if (ts) { 814 error = abstimeout2timo(ts, &t); 815 if (error) 816 goto error; 817 } else 818 t = 0; 819 /* Block until queue becomes available */ 820 error = lksleep(&mq->mq_recv_cv, &mq->mq_mtx, PCATCH, "mqrecv", t); 821 if (error || (mqattr->mq_flags & MQ_UNLINK)) { 822 error = (error == EWOULDBLOCK) ? ETIMEDOUT : error; 823 goto error; 824 } 825 } 826 KKASSERT(mq->mq_attrib.mq_curmsgs < mq->mq_attrib.mq_maxmsg); 827 828 /* 829 * Insert message into the queue, according to the priority. 830 * Note the difference between index and priority. 831 */ 832 if (__predict_true(msg_prio < MQ_PQSIZE)) { 833 u_int idx = MQ_PQSIZE - msg_prio; 834 835 KKASSERT(idx != MQ_PQRESQ); 836 TAILQ_INSERT_TAIL(&mq->mq_head[idx], msg, msg_queue); 837 mq->mq_bitmap |= (1 << --idx); 838 } else { 839 mqueue_linear_insert(mq, msg); 840 } 841 842 /* Check for the notify */ 843 if (mqattr->mq_curmsgs == 0 && mq->mq_notify_proc && 844 (mqattr->mq_flags & MQ_RECEIVE) == 0 && 845 mq->mq_sig_notify.sigev_notify == SIGEV_SIGNAL) { 846 /* Initialize the signal */ 847 /*KSI_INIT(&ksi);*/ 848 /*ksi.ksi_signo = mq->mq_sig_notify.sigev_signo;*/ 849 /*ksi.ksi_code = SI_MESGQ;*/ 850 /*ksi.ksi_value = mq->mq_sig_notify.sigev_value;*/ 851 /* Unregister the process */ 852 notify = mq->mq_notify_proc; 853 mq->mq_notify_proc = NULL; 854 } 855 856 /* Increment the counter and signal waiter, if any */ 857 mqattr->mq_curmsgs++; 858 wakeup_one(&mq->mq_send_cv); 859 860 /* Ready for receiving now */ 861 selwakeup(&mq->mq_rsel); 862 error: 863 lockmgr(&mq->mq_mtx, LK_RELEASE); 864 fdrop(fp); 865 866 if (error) { 867 mqueue_freemsg(msg, size); 868 } else if (notify) { 869 /* Send the notify, if needed */ 870 spin_lock_wr(&allproc_spin); 871 /*kpsignal(notify, &ksi, NULL);*/ 872 ksignal(notify, mq->mq_sig_notify.sigev_signo); 873 spin_unlock_wr(&allproc_spin); 874 } 875 876 return error; 877 } 878 879 int 880 sys_mq_send(struct mq_send_args *uap) 881 { 882 /* { 883 syscallarg(mqd_t) mqdes; 884 syscallarg(const char *) msg_ptr; 885 syscallarg(size_t) msg_len; 886 syscallarg(unsigned) msg_prio; 887 } */ 888 889 return mq_send1(curthread->td_lwp, SCARG(uap, mqdes), SCARG(uap, msg_ptr), 890 SCARG(uap, msg_len), SCARG(uap, msg_prio), 0); 891 } 892 893 int 894 sys_mq_timedsend(struct mq_timedsend_args *uap) 895 { 896 /* { 897 syscallarg(mqd_t) mqdes; 898 syscallarg(const char *) msg_ptr; 899 syscallarg(size_t) msg_len; 900 syscallarg(unsigned) msg_prio; 901 syscallarg(const struct timespec *) abs_timeout; 902 } */ 903 struct timespec ts, *tsp; 904 int error; 905 906 /* Get and convert time value */ 907 if (SCARG(uap, abs_timeout)) { 908 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 909 if (error) 910 return error; 911 tsp = &ts; 912 } else { 913 tsp = NULL; 914 } 915 916 return mq_send1(curthread->td_lwp, SCARG(uap, mqdes), SCARG(uap, msg_ptr), 917 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp); 918 } 919 920 int 921 sys_mq_notify(struct mq_notify_args *uap) 922 { 923 /* { 924 syscallarg(mqd_t) mqdes; 925 syscallarg(const struct sigevent *) notification; 926 } */ 927 file_t *fp = NULL; 928 struct mqueue *mq; 929 struct sigevent sig; 930 int error; 931 932 if (SCARG(uap, notification)) { 933 /* Get the signal from user-space */ 934 error = copyin(SCARG(uap, notification), &sig, 935 sizeof(struct sigevent)); 936 if (error) 937 return error; 938 if (sig.sigev_notify == SIGEV_SIGNAL && 939 (sig.sigev_signo <= 0 || sig.sigev_signo >= NSIG)) 940 return EINVAL; 941 } 942 943 error = mqueue_get(curthread->td_lwp, SCARG(uap, mqdes), &fp); 944 if (error) 945 return error; 946 mq = fp->f_data; 947 948 if (SCARG(uap, notification)) { 949 /* Register notification: set the signal and target process */ 950 if (mq->mq_notify_proc == NULL) { 951 memcpy(&mq->mq_sig_notify, &sig, 952 sizeof(struct sigevent)); 953 mq->mq_notify_proc = curproc; 954 } else { 955 /* Fail if someone else already registered */ 956 error = EBUSY; 957 } 958 } else { 959 /* Unregister the notification */ 960 mq->mq_notify_proc = NULL; 961 } 962 lockmgr(&mq->mq_mtx, LK_RELEASE); 963 fdrop(fp); 964 965 return error; 966 } 967 968 int 969 sys_mq_getattr(struct mq_getattr_args *uap) 970 { 971 /* { 972 syscallarg(mqd_t) mqdes; 973 syscallarg(struct mq_attr *) mqstat; 974 } */ 975 file_t *fp = NULL; 976 struct mqueue *mq; 977 struct mq_attr attr; 978 int error; 979 980 /* Get the message queue */ 981 error = mqueue_get(curthread->td_lwp, SCARG(uap, mqdes), &fp); 982 if (error) 983 return error; 984 mq = fp->f_data; 985 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 986 lockmgr(&mq->mq_mtx, LK_RELEASE); 987 fdrop(fp); 988 989 return copyout(&attr, SCARG(uap, mqstat), sizeof(struct mq_attr)); 990 } 991 992 int 993 sys_mq_setattr(struct mq_setattr_args *uap) 994 { 995 /* { 996 syscallarg(mqd_t) mqdes; 997 syscallarg(const struct mq_attr *) mqstat; 998 syscallarg(struct mq_attr *) omqstat; 999 } */ 1000 file_t *fp = NULL; 1001 struct mqueue *mq; 1002 struct mq_attr attr; 1003 int error, nonblock; 1004 1005 error = copyin(SCARG(uap, mqstat), &attr, sizeof(struct mq_attr)); 1006 if (error) 1007 return error; 1008 nonblock = (attr.mq_flags & O_NONBLOCK); 1009 1010 /* Get the message queue */ 1011 error = mqueue_get(curthread->td_lwp, SCARG(uap, mqdes), &fp); 1012 if (error) 1013 return error; 1014 mq = fp->f_data; 1015 1016 /* Copy the old attributes, if needed */ 1017 if (SCARG(uap, omqstat)) { 1018 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1019 } 1020 1021 /* Ignore everything, except O_NONBLOCK */ 1022 if (nonblock) 1023 mq->mq_attrib.mq_flags |= O_NONBLOCK; 1024 else 1025 mq->mq_attrib.mq_flags &= ~O_NONBLOCK; 1026 1027 lockmgr(&mq->mq_mtx, LK_RELEASE); 1028 fdrop(fp); 1029 1030 /* 1031 * Copy the data to the user-space. 1032 * Note: According to POSIX, the new attributes should not be set in 1033 * case of fail - this would be violated. 1034 */ 1035 if (SCARG(uap, omqstat)) 1036 error = copyout(&attr, SCARG(uap, omqstat), 1037 sizeof(struct mq_attr)); 1038 1039 return error; 1040 } 1041 1042 int 1043 sys_mq_unlink(struct mq_unlink_args *uap) 1044 { 1045 /* { 1046 syscallarg(const char *) name; 1047 } */ 1048 struct thread *td = curthread; 1049 struct mqueue *mq; 1050 char *name; 1051 int error, refcnt = 0; 1052 1053 /* Get the name from the user-space */ 1054 name = kmalloc(MQ_NAMELEN, M_MQBUF, M_WAITOK | M_ZERO); 1055 error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL); 1056 if (error) { 1057 kfree(name, M_MQBUF); 1058 return error; 1059 } 1060 1061 /* Lookup for this file */ 1062 lockmgr(&mqlist_mtx, LK_EXCLUSIVE); 1063 mq = mqueue_lookup(name); 1064 if (mq == NULL) { 1065 error = ENOENT; 1066 goto error; 1067 } 1068 1069 /* Check the permissions */ 1070 if (td->td_ucred->cr_uid != mq->mq_euid && 1071 priv_check(td, PRIV_ROOT) != 0) { 1072 lockmgr(&mq->mq_mtx, LK_RELEASE); 1073 error = EACCES; 1074 goto error; 1075 } 1076 1077 /* Mark message queue as unlinking, before leaving the window */ 1078 mq->mq_attrib.mq_flags |= MQ_UNLINK; 1079 1080 /* Wake up all waiters, if there are such */ 1081 wakeup(&mq->mq_send_cv); 1082 wakeup(&mq->mq_recv_cv); 1083 1084 selwakeup(&mq->mq_rsel); 1085 selwakeup(&mq->mq_wsel); 1086 1087 refcnt = mq->mq_refcnt; 1088 if (refcnt == 0) 1089 LIST_REMOVE(mq, mq_list); 1090 1091 lockmgr(&mq->mq_mtx, LK_RELEASE); 1092 error: 1093 lockmgr(&mqlist_mtx, LK_RELEASE); 1094 1095 /* 1096 * If there are no references - destroy the message 1097 * queue, otherwise, the last mq_close() will do that. 1098 */ 1099 if (error == 0 && refcnt == 0) 1100 mqueue_destroy(mq); 1101 1102 kfree(name, M_MQBUF); 1103 return error; 1104 } 1105 1106 /* 1107 * SysCtl. 1108 */ 1109 SYSCTL_NODE(_kern, OID_AUTO, mqueue, 1110 CTLFLAG_RW, 0, "Message queue options"); 1111 1112 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_open_max, 1113 CTLFLAG_RW, &mq_open_max, 0, 1114 "Maximal number of message queue descriptors per process"); 1115 1116 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_prio_max, 1117 CTLFLAG_RW, &mq_prio_max, 0, 1118 "Maximal priority of the message"); 1119 1120 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_max_msgsize, 1121 CTLFLAG_RW, &mq_max_msgsize, 0, 1122 "Maximal allowed size of the message"); 1123 1124 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_def_maxmsg, 1125 CTLFLAG_RW, &mq_def_maxmsg, 0, 1126 "Default maximal message count"); 1127 1128 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_max_maxmsg, 1129 CTLFLAG_RW, &mq_max_maxmsg, 0, 1130 "Maximal allowed message count"); 1131 1132 SYSINIT(sys_mqueue_init, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, mqueue_sysinit, NULL); 1133