1 /* $NetBSD: sys_mqueue.c,v 1.16 2009/04/11 23:05:26 christos Exp $ */ 2 3 /* 4 * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Implementation of POSIX message queues. 31 * Defined in the Base Definitions volume of IEEE Std 1003.1-2001. 32 * 33 * Locking 34 * 35 * Global list of message queues (mqueue_head) and proc_t::p_mqueue_cnt 36 * counter are protected by mqlist_mtx lock. The very message queue and 37 * its members are protected by mqueue::mq_mtx. 38 * 39 * Lock order: 40 * mqlist_mtx 41 * -> mqueue::mq_mtx 42 */ 43 44 #include <stdbool.h> 45 #include <sys/param.h> 46 #include <sys/types.h> 47 #include <sys/errno.h> 48 #include <sys/fcntl.h> 49 #include <sys/file.h> 50 #include <sys/filedesc.h> 51 #include <sys/ucred.h> 52 #include <sys/priv.h> 53 #include <sys/kernel.h> 54 #include <sys/malloc.h> 55 #include <sys/mplock2.h> 56 #include <sys/mqueue.h> 57 #include <sys/objcache.h> 58 #include <sys/proc.h> 59 #include <sys/queue.h> 60 #include <sys/select.h> 61 #include <sys/event.h> 62 #include <sys/serialize.h> 63 #include <sys/signal.h> 64 #include <sys/signalvar.h> 65 #include <sys/spinlock.h> 66 #include <sys/spinlock2.h> 67 #include <sys/stat.h> 68 #include <sys/sysctl.h> 69 #include <sys/sysproto.h> 70 #include <sys/systm.h> 71 #include <sys/lock.h> 72 #include <sys/unistd.h> 73 #include <sys/vnode.h> 74 75 /* System-wide limits. */ 76 static u_int mq_open_max = MQ_OPEN_MAX; 77 static u_int mq_prio_max = MQ_PRIO_MAX; 78 static u_int mq_max_msgsize = 16 * MQ_DEF_MSGSIZE; 79 static u_int mq_def_maxmsg = 32; 80 static u_int mq_max_maxmsg = 16 * 32; 81 82 struct lock mqlist_mtx; 83 static struct objcache * mqmsg_cache; 84 static LIST_HEAD(, mqueue) mqueue_head = 85 LIST_HEAD_INITIALIZER(mqueue_head); 86 87 typedef struct file file_t; /* XXX: Should we put this in sys/types.h ? */ 88 89 /* Function prototypes */ 90 static int mq_stat_fop(file_t *, struct stat *, struct ucred *cred); 91 static int mq_close_fop(file_t *); 92 static int mq_kqfilter_fop(struct file *fp, struct knote *kn); 93 static void mqfilter_read_detach(struct knote *kn); 94 static void mqfilter_write_detach(struct knote *kn); 95 static int mqfilter_read(struct knote *kn, long hint); 96 static int mqfilter_write(struct knote *kn, long hint); 97 98 /* Some time-related utility functions */ 99 static int itimespecfix(struct timespec *ts); 100 static int tstohz(const struct timespec *ts); 101 102 /* File operations vector */ 103 static struct fileops mqops = { 104 .fo_read = badfo_readwrite, 105 .fo_write = badfo_readwrite, 106 .fo_ioctl = badfo_ioctl, 107 .fo_stat = mq_stat_fop, 108 .fo_close = mq_close_fop, 109 .fo_kqfilter = mq_kqfilter_fop, 110 .fo_shutdown = badfo_shutdown 111 }; 112 113 /* Define a new malloc type for message queues */ 114 MALLOC_DECLARE(M_MQBUF); 115 MALLOC_DEFINE(M_MQBUF, "mqueues", "Buffers to message queues"); 116 117 /* Malloc arguments for object cache */ 118 struct objcache_malloc_args mqueue_malloc_args = { 119 sizeof(struct mqueue), M_MQBUF }; 120 121 /* 122 * Initialize POSIX message queue subsystem. 123 */ 124 void 125 mqueue_sysinit(void) 126 { 127 mqmsg_cache = objcache_create("mqmsg_cache", 128 0, /* infinite depot's capacity */ 129 0, /* default magazine's capacity */ 130 NULL, /* constructor */ 131 NULL, /* deconstructor */ 132 NULL, 133 objcache_malloc_alloc, 134 objcache_malloc_free, 135 &mqueue_malloc_args); 136 137 lockinit(&mqlist_mtx, "mqlist_mtx", 0, LK_CANRECURSE); 138 } 139 140 /* 141 * Free the message. 142 */ 143 static void 144 mqueue_freemsg(struct mq_msg *msg, const size_t size) 145 { 146 147 if (size > MQ_DEF_MSGSIZE) { 148 kfree(msg, M_MQBUF); 149 } else { 150 objcache_put(mqmsg_cache, msg); 151 } 152 } 153 154 /* 155 * Destroy the message queue. 156 */ 157 static void 158 mqueue_destroy(struct mqueue *mq) 159 { 160 struct mq_msg *msg; 161 size_t msz; 162 u_int i; 163 164 /* Note MQ_PQSIZE + 1. */ 165 for (i = 0; i < MQ_PQSIZE + 1; i++) { 166 while ((msg = TAILQ_FIRST(&mq->mq_head[i])) != NULL) { 167 TAILQ_REMOVE(&mq->mq_head[i], msg, msg_queue); 168 msz = sizeof(struct mq_msg) + msg->msg_len; 169 mqueue_freemsg(msg, msz); 170 } 171 } 172 lockuninit(&mq->mq_mtx); 173 kfree(mq, M_MQBUF); 174 } 175 176 /* 177 * Lookup for file name in general list of message queues. 178 * => locks the message queue 179 */ 180 static void * 181 mqueue_lookup(char *name) 182 { 183 struct mqueue *mq; 184 185 KKASSERT(lockstatus(&mqlist_mtx, curthread)); 186 187 LIST_FOREACH(mq, &mqueue_head, mq_list) { 188 if (strncmp(mq->mq_name, name, MQ_NAMELEN) == 0) { 189 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 190 return mq; 191 } 192 } 193 194 return NULL; 195 } 196 197 /* 198 * mqueue_get: get the mqueue from the descriptor. 199 * => locks the message queue, if found. 200 * => holds a reference on the file descriptor. 201 */ 202 static int 203 mqueue_get(struct lwp *l, mqd_t mqd, file_t **fpr) 204 { 205 struct mqueue *mq; 206 file_t *fp; 207 208 fp = holdfp(curproc->p_fd, (int)mqd, -1); /* XXX: Why -1 ? */ 209 if (__predict_false(fp == NULL)) 210 return EBADF; 211 212 if (__predict_false(fp->f_type != DTYPE_MQUEUE)) { 213 fdrop(fp); 214 return EBADF; 215 } 216 mq = fp->f_data; 217 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 218 219 *fpr = fp; 220 return 0; 221 } 222 223 /* 224 * mqueue_linear_insert: perform linear insert according to the message 225 * priority into the reserved queue (MQ_PQRESQ). Reserved queue is a 226 * sorted list used only when mq_prio_max is increased via sysctl. 227 */ 228 static inline void 229 mqueue_linear_insert(struct mqueue *mq, struct mq_msg *msg) 230 { 231 struct mq_msg *mit; 232 233 TAILQ_FOREACH(mit, &mq->mq_head[MQ_PQRESQ], msg_queue) { 234 if (msg->msg_prio > mit->msg_prio) 235 break; 236 } 237 if (mit == NULL) { 238 TAILQ_INSERT_TAIL(&mq->mq_head[MQ_PQRESQ], msg, msg_queue); 239 } else { 240 TAILQ_INSERT_BEFORE(mit, msg, msg_queue); 241 } 242 } 243 244 /* 245 * Validate input. 246 */ 247 int 248 itimespecfix(struct timespec *ts) 249 { 250 if (ts->tv_sec < 0 || ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000) 251 return (EINVAL); 252 if (ts->tv_sec == 0 && ts->tv_nsec != 0 && ts->tv_nsec < nstick) 253 ts->tv_nsec = nstick; 254 return (0); 255 } 256 257 /* 258 * Compute number of ticks in the specified amount of time. 259 */ 260 int 261 tstohz(const struct timespec *ts) 262 { 263 struct timeval tv; 264 265 /* 266 * usec has great enough resolution for hz, so convert to a 267 * timeval and use tvtohz() above. 268 */ 269 TIMESPEC_TO_TIMEVAL(&tv, ts); 270 return tvtohz_high(&tv); /* XXX Why _high() and not _low() ? */ 271 } 272 273 /* 274 * Converter from struct timespec to the ticks. 275 * Used by mq_timedreceive(), mq_timedsend(). 276 */ 277 int 278 abstimeout2timo(struct timespec *ts, int *timo) 279 { 280 struct timespec tsd; 281 int error; 282 283 error = itimespecfix(ts); 284 if (error) { 285 return error; 286 } 287 getnanotime(&tsd); 288 timespecsub(ts, &tsd); 289 if (ts->tv_sec < 0 || (ts->tv_sec == 0 && ts->tv_nsec <= 0)) { 290 return ETIMEDOUT; 291 } 292 *timo = tstohz(ts); 293 KKASSERT(*timo != 0); 294 295 return 0; 296 } 297 298 static int 299 mq_stat_fop(file_t *fp, struct stat *st, struct ucred *cred) 300 { 301 struct mqueue *mq = fp->f_data; 302 303 (void)memset(st, 0, sizeof(*st)); 304 305 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 306 st->st_mode = mq->mq_mode; 307 st->st_uid = mq->mq_euid; 308 st->st_gid = mq->mq_egid; 309 st->st_atimespec = mq->mq_atime; 310 st->st_mtimespec = mq->mq_mtime; 311 /*st->st_ctimespec = st->st_birthtimespec = mq->mq_btime;*/ 312 st->st_uid = fp->f_cred->cr_uid; 313 st->st_gid = fp->f_cred->cr_svgid; 314 lockmgr(&mq->mq_mtx, LK_RELEASE); 315 316 return 0; 317 } 318 319 static struct filterops mqfiltops_read = 320 { 1, NULL, mqfilter_read_detach, mqfilter_read }; 321 static struct filterops mqfiltops_write = 322 { 1, NULL, mqfilter_write_detach, mqfilter_write }; 323 324 static int 325 mq_kqfilter_fop(struct file *fp, struct knote *kn) 326 { 327 struct mqueue *mq = fp->f_data; 328 struct klist *klist; 329 330 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 331 332 switch (kn->kn_filter) { 333 case EVFILT_READ: 334 kn->kn_fop = &mqfiltops_read; 335 kn->kn_hook = (caddr_t)mq; 336 klist = &mq->mq_rsel.si_note; 337 break; 338 case EVFILT_WRITE: 339 kn->kn_fop = &mqfiltops_write; 340 kn->kn_hook = (caddr_t)mq; 341 klist = &mq->mq_wsel.si_note; 342 break; 343 default: 344 lockmgr(&mq->mq_mtx, LK_RELEASE); 345 return (EOPNOTSUPP); 346 } 347 348 crit_enter(); 349 SLIST_INSERT_HEAD(klist, kn, kn_selnext); 350 crit_exit(); 351 lockmgr(&mq->mq_mtx, LK_RELEASE); 352 353 return (0); 354 } 355 356 static void 357 mqfilter_read_detach(struct knote *kn) 358 { 359 struct mqueue *mq = (struct mqueue *)kn->kn_hook; 360 361 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 362 crit_enter(); 363 struct klist *klist = &mq->mq_rsel.si_note; 364 SLIST_REMOVE(klist, kn, knote, kn_selnext); 365 crit_exit(); 366 lockmgr(&mq->mq_mtx, LK_RELEASE); 367 } 368 369 static void 370 mqfilter_write_detach(struct knote *kn) 371 { 372 struct mqueue *mq = (struct mqueue *)kn->kn_hook; 373 374 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 375 crit_enter(); 376 struct klist *klist = &mq->mq_rsel.si_note; 377 SLIST_REMOVE(klist, kn, knote, kn_selnext); 378 crit_exit(); 379 lockmgr(&mq->mq_mtx, LK_RELEASE); 380 } 381 382 static int 383 mqfilter_read(struct knote *kn, long hint) 384 { 385 struct mqueue *mq = (struct mqueue *)kn->kn_hook; 386 struct mq_attr *mqattr; 387 int ready = 0; 388 389 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 390 mqattr = &mq->mq_attrib; 391 /* Ready for receiving, if there are messages in the queue */ 392 if (mqattr->mq_curmsgs) 393 ready = 1; 394 lockmgr(&mq->mq_mtx, LK_RELEASE); 395 396 return (ready); 397 } 398 399 static int 400 mqfilter_write(struct knote *kn, long hint) 401 { 402 struct mqueue *mq = (struct mqueue *)kn->kn_hook; 403 struct mq_attr *mqattr; 404 int ready = 0; 405 406 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 407 mqattr = &mq->mq_attrib; 408 /* Ready for sending, if the message queue is not full */ 409 if (mqattr->mq_curmsgs < mqattr->mq_maxmsg) 410 ready = 1; 411 lockmgr(&mq->mq_mtx, LK_RELEASE); 412 413 return (ready); 414 } 415 416 static int 417 mq_close_fop(file_t *fp) 418 { 419 struct proc *p = curproc; 420 struct mqueue *mq = fp->f_data; 421 bool destroy; 422 423 lockmgr(&mqlist_mtx, LK_EXCLUSIVE); 424 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 425 426 /* Decrease the counters */ 427 p->p_mqueue_cnt--; 428 mq->mq_refcnt--; 429 430 /* Remove notification if registered for this process */ 431 if (mq->mq_notify_proc == p) 432 mq->mq_notify_proc = NULL; 433 434 /* 435 * If this is the last reference and mqueue is marked for unlink, 436 * remove and later destroy the message queue. 437 */ 438 if (mq->mq_refcnt == 0 && (mq->mq_attrib.mq_flags & MQ_UNLINK)) { 439 LIST_REMOVE(mq, mq_list); 440 destroy = true; 441 } else 442 destroy = false; 443 444 lockmgr(&mq->mq_mtx, LK_RELEASE); 445 lockmgr(&mqlist_mtx, LK_RELEASE); 446 447 if (destroy) 448 mqueue_destroy(mq); 449 450 return 0; 451 } 452 453 /* 454 * General mqueue system calls. 455 */ 456 457 int 458 sys_mq_open(struct mq_open_args *uap) 459 { 460 /* { 461 syscallarg(const char *) name; 462 syscallarg(int) oflag; 463 syscallarg(mode_t) mode; 464 syscallarg(struct mq_attr) attr; 465 } */ 466 struct thread *td = curthread; 467 struct proc *p = td->td_proc; 468 struct filedesc *fdp = p->p_fd; 469 struct mqueue *mq, *mq_new = NULL; 470 file_t *fp; 471 char *name; 472 int mqd, error, oflag; 473 474 /* Check access mode flags */ 475 oflag = SCARG(uap, oflag); 476 if ((oflag & O_ACCMODE) == (O_WRONLY | O_RDWR)) { 477 return EINVAL; 478 } 479 480 /* Get the name from the user-space */ 481 name = kmalloc(MQ_NAMELEN, M_MQBUF, M_WAITOK | M_ZERO); 482 error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL); 483 if (error) { 484 kfree(name, M_MQBUF); 485 return error; 486 } 487 488 if (oflag & O_CREAT) { 489 struct mq_attr attr; 490 u_int i; 491 492 /* Check the limit */ 493 if (p->p_mqueue_cnt == mq_open_max) { 494 kfree(name, M_MQBUF); 495 return EMFILE; 496 } 497 498 /* Empty name is invalid */ 499 if (name[0] == '\0') { 500 kfree(name, M_MQBUF); 501 return EINVAL; 502 } 503 504 /* Check for mqueue attributes */ 505 if (SCARG(uap, attr)) { 506 error = copyin(SCARG(uap, attr), &attr, 507 sizeof(struct mq_attr)); 508 if (error) { 509 kfree(name, M_MQBUF); 510 return error; 511 } 512 if (attr.mq_maxmsg <= 0 || 513 attr.mq_maxmsg > mq_max_maxmsg || 514 attr.mq_msgsize <= 0 || 515 attr.mq_msgsize > mq_max_msgsize) { 516 kfree(name, M_MQBUF); 517 return EINVAL; 518 } 519 attr.mq_curmsgs = 0; 520 } else { 521 memset(&attr, 0, sizeof(struct mq_attr)); 522 attr.mq_maxmsg = mq_def_maxmsg; 523 attr.mq_msgsize = 524 MQ_DEF_MSGSIZE - sizeof(struct mq_msg); 525 } 526 527 /* 528 * Allocate new mqueue, initialize data structures, 529 * copy the name, attributes and set the flag. 530 */ 531 mq_new = kmalloc(sizeof(struct mqueue), M_MQBUF, M_WAITOK | M_ZERO); 532 533 lockinit(&mq_new->mq_mtx, "mq_new->mq_mtx", 0, LK_CANRECURSE); 534 for (i = 0; i < (MQ_PQSIZE + 1); i++) { 535 TAILQ_INIT(&mq_new->mq_head[i]); 536 } 537 538 strlcpy(mq_new->mq_name, name, MQ_NAMELEN); 539 memcpy(&mq_new->mq_attrib, &attr, sizeof(struct mq_attr)); 540 541 /*CTASSERT((O_MASK & (MQ_UNLINK | MQ_RECEIVE)) == 0);*/ 542 /* mq_new->mq_attrib.mq_flags = (O_MASK & oflag); */ 543 mq_new->mq_attrib.mq_flags = oflag; 544 545 /* Store mode and effective UID with GID */ 546 mq_new->mq_mode = ((SCARG(uap, mode) & 547 ~p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT; 548 mq_new->mq_euid = td->td_ucred->cr_uid; 549 mq_new->mq_egid = td->td_ucred->cr_svgid; 550 } 551 552 /* Allocate file structure and descriptor */ 553 error = falloc(td->td_lwp, &fp, &mqd); 554 if (error) { 555 if (mq_new) 556 mqueue_destroy(mq_new); 557 kfree(name, M_MQBUF); 558 return error; 559 } 560 fp->f_type = DTYPE_MQUEUE; 561 fp->f_flag = FFLAGS(oflag) & (FREAD | FWRITE); 562 fp->f_ops = &mqops; 563 564 /* Look up for mqueue with such name */ 565 lockmgr(&mqlist_mtx, LK_EXCLUSIVE); 566 mq = mqueue_lookup(name); 567 if (mq) { 568 int acc_mode; 569 570 KKASSERT(lockstatus(&mq->mq_mtx, curthread)); 571 572 /* Check if mqueue is not marked as unlinking */ 573 if (mq->mq_attrib.mq_flags & MQ_UNLINK) { 574 error = EACCES; 575 goto exit; 576 } 577 /* Fail if O_EXCL is set, and mqueue already exists */ 578 if ((oflag & O_CREAT) && (oflag & O_EXCL)) { 579 error = EEXIST; 580 goto exit; 581 } 582 583 /* 584 * Check the permissions. Note the difference between 585 * VREAD/VWRITE and FREAD/FWRITE. 586 */ 587 acc_mode = 0; 588 if (fp->f_flag & FREAD) { 589 acc_mode |= VREAD; 590 } 591 if (fp->f_flag & FWRITE) { 592 acc_mode |= VWRITE; 593 } 594 if (vaccess(VNON, mq->mq_mode, mq->mq_euid, mq->mq_egid, 595 acc_mode, td->td_ucred)) { 596 597 error = EACCES; 598 goto exit; 599 } 600 } else { 601 /* Fail if mqueue neither exists, nor we create it */ 602 if ((oflag & O_CREAT) == 0) { 603 lockmgr(&mqlist_mtx, LK_RELEASE); 604 KKASSERT(mq_new == NULL); 605 fsetfd(fdp, NULL, mqd); 606 fp->f_ops = &badfileops; 607 fdrop(fp); 608 kfree(name, M_MQBUF); 609 return ENOENT; 610 } 611 612 /* Check the limit */ 613 if (p->p_mqueue_cnt == mq_open_max) { 614 error = EMFILE; 615 goto exit; 616 } 617 618 /* Insert the queue to the list */ 619 mq = mq_new; 620 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 621 LIST_INSERT_HEAD(&mqueue_head, mq, mq_list); 622 mq_new = NULL; 623 getnanotime(&mq->mq_btime); 624 mq->mq_atime = mq->mq_mtime = mq->mq_btime; 625 } 626 627 /* Increase the counters, and make descriptor ready */ 628 p->p_mqueue_cnt++; 629 mq->mq_refcnt++; 630 fp->f_data = mq; 631 exit: 632 lockmgr(&mq->mq_mtx, LK_RELEASE); 633 lockmgr(&mqlist_mtx, LK_RELEASE); 634 635 if (mq_new) 636 mqueue_destroy(mq_new); 637 if (error) { 638 fsetfd(fdp, NULL, mqd); 639 fp->f_ops = &badfileops; 640 } else { 641 fsetfd(fdp, fp, mqd); 642 uap->sysmsg_result = mqd; 643 } 644 fdrop(fp); 645 kfree(name, M_MQBUF); 646 647 return error; 648 } 649 650 int 651 sys_mq_close(struct mq_close_args *uap) 652 { 653 return sys_close((void *)uap); 654 } 655 656 /* 657 * Primary mq_receive1() function. 658 */ 659 int 660 mq_receive1(struct lwp *l, mqd_t mqdes, void *msg_ptr, size_t msg_len, 661 unsigned *msg_prio, struct timespec *ts, ssize_t *mlen) 662 { 663 file_t *fp = NULL; 664 struct mqueue *mq; 665 struct mq_msg *msg = NULL; 666 struct mq_attr *mqattr; 667 u_int idx; 668 int error; 669 670 /* Get the message queue */ 671 error = mqueue_get(l, mqdes, &fp); 672 if (error) { 673 return error; 674 } 675 mq = fp->f_data; 676 if ((fp->f_flag & FREAD) == 0) { 677 error = EBADF; 678 goto error; 679 } 680 getnanotime(&mq->mq_atime); 681 mqattr = &mq->mq_attrib; 682 683 /* Check the message size limits */ 684 if (msg_len < mqattr->mq_msgsize) { 685 error = EMSGSIZE; 686 goto error; 687 } 688 689 /* Check if queue is empty */ 690 while (mqattr->mq_curmsgs == 0) { 691 int t; 692 693 if (mqattr->mq_flags & O_NONBLOCK) { 694 error = EAGAIN; 695 goto error; 696 } 697 if (ts) { 698 error = abstimeout2timo(ts, &t); 699 if (error) 700 goto error; 701 } else 702 t = 0; 703 /* 704 * Block until someone sends the message. 705 * While doing this, notification should not be sent. 706 */ 707 mqattr->mq_flags |= MQ_RECEIVE; 708 error = lksleep(&mq->mq_send_cv, &mq->mq_mtx, PCATCH, "mqsend", t); 709 mqattr->mq_flags &= ~MQ_RECEIVE; 710 if (error || (mqattr->mq_flags & MQ_UNLINK)) { 711 error = (error == EWOULDBLOCK) ? ETIMEDOUT : EINTR; 712 goto error; 713 } 714 } 715 716 717 /* 718 * Find the highest priority message, and remove it from the queue. 719 * At first, reserved queue is checked, bitmap is next. 720 */ 721 msg = TAILQ_FIRST(&mq->mq_head[MQ_PQRESQ]); 722 if (__predict_true(msg == NULL)) { 723 idx = ffs(mq->mq_bitmap); 724 msg = TAILQ_FIRST(&mq->mq_head[idx]); 725 KKASSERT(msg != NULL); 726 } else { 727 idx = MQ_PQRESQ; 728 } 729 TAILQ_REMOVE(&mq->mq_head[idx], msg, msg_queue); 730 731 /* Unmark the bit, if last message. */ 732 if (__predict_true(idx) && TAILQ_EMPTY(&mq->mq_head[idx])) { 733 KKASSERT((MQ_PQSIZE - idx) == msg->msg_prio); 734 mq->mq_bitmap &= ~(1 << --idx); 735 } 736 737 /* Decrement the counter and signal waiter, if any */ 738 mqattr->mq_curmsgs--; 739 wakeup_one(&mq->mq_recv_cv); 740 741 /* Ready for sending now */ 742 get_mplock(); 743 KNOTE(&mq->mq_wsel.si_note, 0); 744 rel_mplock(); 745 error: 746 lockmgr(&mq->mq_mtx, LK_RELEASE); 747 fdrop(fp); 748 if (error) 749 return error; 750 751 /* 752 * Copy the data to the user-space. 753 * Note: According to POSIX, no message should be removed from the 754 * queue in case of fail - this would be violated. 755 */ 756 *mlen = msg->msg_len; 757 error = copyout(msg->msg_ptr, msg_ptr, msg->msg_len); 758 if (error == 0 && msg_prio) 759 error = copyout(&msg->msg_prio, msg_prio, sizeof(unsigned)); 760 mqueue_freemsg(msg, sizeof(struct mq_msg) + msg->msg_len); 761 762 return error; 763 } 764 765 int 766 sys_mq_receive(struct mq_receive_args *uap) 767 { 768 /* { 769 syscallarg(mqd_t) mqdes; 770 syscallarg(char *) msg_ptr; 771 syscallarg(size_t) msg_len; 772 syscallarg(unsigned *) msg_prio; 773 } */ 774 ssize_t mlen; 775 int error; 776 777 error = mq_receive1(curthread->td_lwp, SCARG(uap, mqdes), SCARG(uap, msg_ptr), 778 SCARG(uap, msg_len), SCARG(uap, msg_prio), 0, &mlen); 779 if (error == 0) 780 uap->sysmsg_result = mlen; 781 782 return error; 783 } 784 785 int 786 sys_mq_timedreceive(struct mq_timedreceive_args *uap) 787 { 788 /* { 789 syscallarg(mqd_t) mqdes; 790 syscallarg(char *) msg_ptr; 791 syscallarg(size_t) msg_len; 792 syscallarg(unsigned *) msg_prio; 793 syscallarg(const struct timespec *) abs_timeout; 794 } */ 795 int error; 796 ssize_t mlen; 797 struct timespec ts, *tsp; 798 799 /* Get and convert time value */ 800 if (SCARG(uap, abs_timeout)) { 801 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 802 if (error) 803 return error; 804 tsp = &ts; 805 } else { 806 tsp = NULL; 807 } 808 809 error = mq_receive1(curthread->td_lwp, SCARG(uap, mqdes), SCARG(uap, msg_ptr), 810 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp, &mlen); 811 if (error == 0) 812 uap->sysmsg_result = mlen; 813 814 return error; 815 } 816 817 /* 818 * Primary mq_send1() function. 819 */ 820 int 821 mq_send1(struct lwp *l, mqd_t mqdes, const char *msg_ptr, size_t msg_len, 822 unsigned msg_prio, struct timespec *ts) 823 { 824 file_t *fp = NULL; 825 struct mqueue *mq; 826 struct mq_msg *msg; 827 struct mq_attr *mqattr; 828 struct proc *notify = NULL; 829 /*ksiginfo_t ksi;*/ 830 size_t size; 831 int error; 832 833 /* Check the priority range */ 834 if (msg_prio >= mq_prio_max) 835 return EINVAL; 836 837 /* Allocate a new message */ 838 size = sizeof(struct mq_msg) + msg_len; 839 if (size > mq_max_msgsize) 840 return EMSGSIZE; 841 842 if (size > MQ_DEF_MSGSIZE) { 843 msg = kmalloc(size, M_MQBUF, M_WAITOK); 844 } else { 845 msg = objcache_get(mqmsg_cache, M_WAITOK); 846 } 847 848 /* Get the data from user-space */ 849 error = copyin(msg_ptr, msg->msg_ptr, msg_len); 850 if (error) { 851 mqueue_freemsg(msg, size); 852 return error; 853 } 854 msg->msg_len = msg_len; 855 msg->msg_prio = msg_prio; 856 857 /* Get the mqueue */ 858 error = mqueue_get(l, mqdes, &fp); 859 if (error) { 860 mqueue_freemsg(msg, size); 861 return error; 862 } 863 mq = fp->f_data; 864 if ((fp->f_flag & FWRITE) == 0) { 865 error = EBADF; 866 goto error; 867 } 868 getnanotime(&mq->mq_mtime); 869 mqattr = &mq->mq_attrib; 870 871 /* Check the message size limit */ 872 if (msg_len <= 0 || msg_len > mqattr->mq_msgsize) { 873 error = EMSGSIZE; 874 goto error; 875 } 876 877 /* Check if queue is full */ 878 while (mqattr->mq_curmsgs >= mqattr->mq_maxmsg) { 879 int t; 880 881 if (mqattr->mq_flags & O_NONBLOCK) { 882 error = EAGAIN; 883 goto error; 884 } 885 if (ts) { 886 error = abstimeout2timo(ts, &t); 887 if (error) 888 goto error; 889 } else 890 t = 0; 891 /* Block until queue becomes available */ 892 error = lksleep(&mq->mq_recv_cv, &mq->mq_mtx, PCATCH, "mqrecv", t); 893 if (error || (mqattr->mq_flags & MQ_UNLINK)) { 894 error = (error == EWOULDBLOCK) ? ETIMEDOUT : error; 895 goto error; 896 } 897 } 898 KKASSERT(mq->mq_attrib.mq_curmsgs < mq->mq_attrib.mq_maxmsg); 899 900 /* 901 * Insert message into the queue, according to the priority. 902 * Note the difference between index and priority. 903 */ 904 if (__predict_true(msg_prio < MQ_PQSIZE)) { 905 u_int idx = MQ_PQSIZE - msg_prio; 906 907 KKASSERT(idx != MQ_PQRESQ); 908 TAILQ_INSERT_TAIL(&mq->mq_head[idx], msg, msg_queue); 909 mq->mq_bitmap |= (1 << --idx); 910 } else { 911 mqueue_linear_insert(mq, msg); 912 } 913 914 /* Check for the notify */ 915 if (mqattr->mq_curmsgs == 0 && mq->mq_notify_proc && 916 (mqattr->mq_flags & MQ_RECEIVE) == 0 && 917 mq->mq_sig_notify.sigev_notify == SIGEV_SIGNAL) { 918 /* Initialize the signal */ 919 /*KSI_INIT(&ksi);*/ 920 /*ksi.ksi_signo = mq->mq_sig_notify.sigev_signo;*/ 921 /*ksi.ksi_code = SI_MESGQ;*/ 922 /*ksi.ksi_value = mq->mq_sig_notify.sigev_value;*/ 923 /* Unregister the process */ 924 notify = mq->mq_notify_proc; 925 mq->mq_notify_proc = NULL; 926 } 927 928 /* Increment the counter and signal waiter, if any */ 929 mqattr->mq_curmsgs++; 930 wakeup_one(&mq->mq_send_cv); 931 932 /* Ready for receiving now */ 933 get_mplock(); 934 KNOTE(&mq->mq_rsel.si_note, 0); 935 rel_mplock(); 936 error: 937 lockmgr(&mq->mq_mtx, LK_RELEASE); 938 fdrop(fp); 939 940 if (error) { 941 mqueue_freemsg(msg, size); 942 } else if (notify) { 943 /* Send the notify, if needed */ 944 lwkt_gettoken(&proc_token); 945 /*kpsignal(notify, &ksi, NULL);*/ 946 ksignal(notify, mq->mq_sig_notify.sigev_signo); 947 lwkt_reltoken(&proc_token); 948 } 949 950 return error; 951 } 952 953 int 954 sys_mq_send(struct mq_send_args *uap) 955 { 956 /* { 957 syscallarg(mqd_t) mqdes; 958 syscallarg(const char *) msg_ptr; 959 syscallarg(size_t) msg_len; 960 syscallarg(unsigned) msg_prio; 961 } */ 962 963 return mq_send1(curthread->td_lwp, SCARG(uap, mqdes), SCARG(uap, msg_ptr), 964 SCARG(uap, msg_len), SCARG(uap, msg_prio), 0); 965 } 966 967 int 968 sys_mq_timedsend(struct mq_timedsend_args *uap) 969 { 970 /* { 971 syscallarg(mqd_t) mqdes; 972 syscallarg(const char *) msg_ptr; 973 syscallarg(size_t) msg_len; 974 syscallarg(unsigned) msg_prio; 975 syscallarg(const struct timespec *) abs_timeout; 976 } */ 977 struct timespec ts, *tsp; 978 int error; 979 980 /* Get and convert time value */ 981 if (SCARG(uap, abs_timeout)) { 982 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts)); 983 if (error) 984 return error; 985 tsp = &ts; 986 } else { 987 tsp = NULL; 988 } 989 990 return mq_send1(curthread->td_lwp, SCARG(uap, mqdes), SCARG(uap, msg_ptr), 991 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp); 992 } 993 994 int 995 sys_mq_notify(struct mq_notify_args *uap) 996 { 997 /* { 998 syscallarg(mqd_t) mqdes; 999 syscallarg(const struct sigevent *) notification; 1000 } */ 1001 file_t *fp = NULL; 1002 struct mqueue *mq; 1003 struct sigevent sig; 1004 int error; 1005 1006 if (SCARG(uap, notification)) { 1007 /* Get the signal from user-space */ 1008 error = copyin(SCARG(uap, notification), &sig, 1009 sizeof(struct sigevent)); 1010 if (error) 1011 return error; 1012 if (sig.sigev_notify == SIGEV_SIGNAL && 1013 (sig.sigev_signo <= 0 || sig.sigev_signo >= NSIG)) 1014 return EINVAL; 1015 } 1016 1017 error = mqueue_get(curthread->td_lwp, SCARG(uap, mqdes), &fp); 1018 if (error) 1019 return error; 1020 mq = fp->f_data; 1021 1022 if (SCARG(uap, notification)) { 1023 /* Register notification: set the signal and target process */ 1024 if (mq->mq_notify_proc == NULL) { 1025 memcpy(&mq->mq_sig_notify, &sig, 1026 sizeof(struct sigevent)); 1027 mq->mq_notify_proc = curproc; 1028 } else { 1029 /* Fail if someone else already registered */ 1030 error = EBUSY; 1031 } 1032 } else { 1033 /* Unregister the notification */ 1034 mq->mq_notify_proc = NULL; 1035 } 1036 lockmgr(&mq->mq_mtx, LK_RELEASE); 1037 fdrop(fp); 1038 1039 return error; 1040 } 1041 1042 int 1043 sys_mq_getattr(struct mq_getattr_args *uap) 1044 { 1045 /* { 1046 syscallarg(mqd_t) mqdes; 1047 syscallarg(struct mq_attr *) mqstat; 1048 } */ 1049 file_t *fp = NULL; 1050 struct mqueue *mq; 1051 struct mq_attr attr; 1052 int error; 1053 1054 /* Get the message queue */ 1055 error = mqueue_get(curthread->td_lwp, SCARG(uap, mqdes), &fp); 1056 if (error) 1057 return error; 1058 mq = fp->f_data; 1059 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1060 lockmgr(&mq->mq_mtx, LK_RELEASE); 1061 fdrop(fp); 1062 1063 return copyout(&attr, SCARG(uap, mqstat), sizeof(struct mq_attr)); 1064 } 1065 1066 int 1067 sys_mq_setattr(struct mq_setattr_args *uap) 1068 { 1069 /* { 1070 syscallarg(mqd_t) mqdes; 1071 syscallarg(const struct mq_attr *) mqstat; 1072 syscallarg(struct mq_attr *) omqstat; 1073 } */ 1074 file_t *fp = NULL; 1075 struct mqueue *mq; 1076 struct mq_attr attr; 1077 int error, nonblock; 1078 1079 error = copyin(SCARG(uap, mqstat), &attr, sizeof(struct mq_attr)); 1080 if (error) 1081 return error; 1082 nonblock = (attr.mq_flags & O_NONBLOCK); 1083 1084 /* Get the message queue */ 1085 error = mqueue_get(curthread->td_lwp, SCARG(uap, mqdes), &fp); 1086 if (error) 1087 return error; 1088 mq = fp->f_data; 1089 1090 /* Copy the old attributes, if needed */ 1091 if (SCARG(uap, omqstat)) { 1092 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1093 } 1094 1095 /* Ignore everything, except O_NONBLOCK */ 1096 if (nonblock) 1097 mq->mq_attrib.mq_flags |= O_NONBLOCK; 1098 else 1099 mq->mq_attrib.mq_flags &= ~O_NONBLOCK; 1100 1101 lockmgr(&mq->mq_mtx, LK_RELEASE); 1102 fdrop(fp); 1103 1104 /* 1105 * Copy the data to the user-space. 1106 * Note: According to POSIX, the new attributes should not be set in 1107 * case of fail - this would be violated. 1108 */ 1109 if (SCARG(uap, omqstat)) 1110 error = copyout(&attr, SCARG(uap, omqstat), 1111 sizeof(struct mq_attr)); 1112 1113 return error; 1114 } 1115 1116 int 1117 sys_mq_unlink(struct mq_unlink_args *uap) 1118 { 1119 /* { 1120 syscallarg(const char *) name; 1121 } */ 1122 struct thread *td = curthread; 1123 struct mqueue *mq; 1124 char *name; 1125 int error, refcnt = 0; 1126 1127 /* Get the name from the user-space */ 1128 name = kmalloc(MQ_NAMELEN, M_MQBUF, M_WAITOK | M_ZERO); 1129 error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL); 1130 if (error) { 1131 kfree(name, M_MQBUF); 1132 return error; 1133 } 1134 1135 /* Lookup for this file */ 1136 lockmgr(&mqlist_mtx, LK_EXCLUSIVE); 1137 mq = mqueue_lookup(name); 1138 if (mq == NULL) { 1139 error = ENOENT; 1140 goto error; 1141 } 1142 1143 /* Check the permissions */ 1144 if (td->td_ucred->cr_uid != mq->mq_euid && 1145 priv_check(td, PRIV_ROOT) != 0) { 1146 lockmgr(&mq->mq_mtx, LK_RELEASE); 1147 error = EACCES; 1148 goto error; 1149 } 1150 1151 /* Mark message queue as unlinking, before leaving the window */ 1152 mq->mq_attrib.mq_flags |= MQ_UNLINK; 1153 1154 /* Wake up all waiters, if there are such */ 1155 wakeup(&mq->mq_send_cv); 1156 wakeup(&mq->mq_recv_cv); 1157 1158 get_mplock(); 1159 KNOTE(&mq->mq_rsel.si_note, 0); 1160 KNOTE(&mq->mq_wsel.si_note, 0); 1161 rel_mplock(); 1162 1163 refcnt = mq->mq_refcnt; 1164 if (refcnt == 0) 1165 LIST_REMOVE(mq, mq_list); 1166 1167 lockmgr(&mq->mq_mtx, LK_RELEASE); 1168 error: 1169 lockmgr(&mqlist_mtx, LK_RELEASE); 1170 1171 /* 1172 * If there are no references - destroy the message 1173 * queue, otherwise, the last mq_close() will do that. 1174 */ 1175 if (error == 0 && refcnt == 0) 1176 mqueue_destroy(mq); 1177 1178 kfree(name, M_MQBUF); 1179 return error; 1180 } 1181 1182 /* 1183 * SysCtl. 1184 */ 1185 SYSCTL_NODE(_kern, OID_AUTO, mqueue, 1186 CTLFLAG_RW, 0, "Message queue options"); 1187 1188 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_open_max, 1189 CTLFLAG_RW, &mq_open_max, 0, 1190 "Maximal number of message queue descriptors per process"); 1191 1192 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_prio_max, 1193 CTLFLAG_RW, &mq_prio_max, 0, 1194 "Maximal priority of the message"); 1195 1196 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_max_msgsize, 1197 CTLFLAG_RW, &mq_max_msgsize, 0, 1198 "Maximal allowed size of the message"); 1199 1200 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_def_maxmsg, 1201 CTLFLAG_RW, &mq_def_maxmsg, 0, 1202 "Default maximal message count"); 1203 1204 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_max_maxmsg, 1205 CTLFLAG_RW, &mq_max_maxmsg, 0, 1206 "Maximal allowed message count"); 1207 1208 SYSINIT(sys_mqueue_init, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, mqueue_sysinit, NULL); 1209