1 /* $NetBSD: sys_mqueue.c,v 1.16 2009/04/11 23:05:26 christos Exp $ */ 2 3 /* 4 * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Implementation of POSIX message queues. 31 * Defined in the Base Definitions volume of IEEE Std 1003.1-2001. 32 * 33 * Locking 34 * 35 * Global list of message queues (mqueue_head) and proc_t::p_mqueue_cnt 36 * counter are protected by mqlist_mtx lock. The very message queue and 37 * its members are protected by mqueue::mq_mtx. 38 * 39 * Lock order: 40 * mqlist_mtx 41 * -> mqueue::mq_mtx 42 */ 43 44 #include <sys/param.h> 45 #include <sys/types.h> 46 #include <sys/errno.h> 47 #include <sys/fcntl.h> 48 #include <sys/file.h> 49 #include <sys/filedesc.h> 50 #include <sys/ucred.h> 51 #include <sys/caps.h> 52 #include <sys/kernel.h> 53 #include <sys/malloc.h> 54 #include <sys/mqueue.h> 55 #include <sys/proc.h> 56 #include <sys/queue.h> 57 #include <sys/event.h> 58 #include <sys/serialize.h> 59 #include <sys/signal.h> 60 #include <sys/signalvar.h> 61 #include <sys/spinlock.h> 62 #include <sys/spinlock2.h> 63 #include <sys/stat.h> 64 #include <sys/sysctl.h> 65 #include <sys/sysmsg.h> 66 #include <sys/systm.h> 67 #include <sys/lock.h> 68 #include <sys/unistd.h> 69 #include <sys/vnode.h> 70 71 /* System-wide limits. */ 72 static u_int mq_open_max = MQ_OPEN_MAX; 73 static u_int mq_prio_max = MQ_PRIO_MAX; 74 static u_int mq_max_msgsize = 16 * MQ_DEF_MSGSIZE; 75 static u_int mq_def_maxmsg = 32; 76 static u_int mq_max_maxmsg = 16 * 32; 77 78 static struct lock mqlist_mtx; 79 static LIST_HEAD(, mqueue) mqueue_head = 80 LIST_HEAD_INITIALIZER(mqueue_head); 81 82 typedef struct file file_t; /* XXX: Should we put this in sys/types.h ? */ 83 84 /* Function prototypes */ 85 static int mq_stat_fop(file_t *, struct stat *, struct ucred *cred); 86 static int mq_close_fop(file_t *); 87 static int mq_kqfilter_fop(struct file *fp, struct knote *kn); 88 static void mqfilter_read_detach(struct knote *kn); 89 static void mqfilter_write_detach(struct knote *kn); 90 static int mqfilter_read(struct knote *kn, long hint); 91 static int mqfilter_write(struct knote *kn, long hint); 92 93 /* Some time-related utility functions */ 94 static int tstohz(const struct timespec *ts); 95 96 /* File operations vector */ 97 static struct fileops mqops = { 98 .fo_read = badfo_readwrite, 99 .fo_write = badfo_readwrite, 100 .fo_ioctl = badfo_ioctl, 101 .fo_stat = mq_stat_fop, 102 .fo_close = mq_close_fop, 103 .fo_kqfilter = mq_kqfilter_fop, 104 .fo_shutdown = badfo_shutdown, 105 .fo_seek = badfo_seek 106 }; 107 108 /* Define a new malloc type for message queues */ 109 MALLOC_DECLARE(M_MQBUF); 110 MALLOC_DEFINE(M_MQBUF, "mqueues", "Buffers to message queues"); 111 112 /* 113 * Initialize POSIX message queue subsystem. 114 */ 115 void 116 mqueue_sysinit(void) 117 { 118 lockinit(&mqlist_mtx, "mqlist_mtx", 0, LK_CANRECURSE); 119 } 120 121 /* 122 * Free the message. 123 */ 124 static void 125 mqueue_freemsg(struct mq_msg *msg, const size_t size) 126 { 127 kfree(msg, M_MQBUF); 128 } 129 130 /* 131 * Destroy the message queue. 132 */ 133 static void 134 mqueue_destroy(struct mqueue *mq) 135 { 136 struct mq_msg *msg; 137 size_t msz; 138 u_int i; 139 140 /* Note MQ_PQSIZE + 1. */ 141 for (i = 0; i < MQ_PQSIZE + 1; i++) { 142 while ((msg = TAILQ_FIRST(&mq->mq_head[i])) != NULL) { 143 TAILQ_REMOVE(&mq->mq_head[i], msg, msg_queue); 144 msz = sizeof(struct mq_msg) + msg->msg_len; 145 mqueue_freemsg(msg, msz); 146 } 147 } 148 lockuninit(&mq->mq_mtx); 149 kfree(mq, M_MQBUF); 150 } 151 152 /* 153 * Lookup for file name in general list of message queues. 154 * => locks the message queue 155 */ 156 static void * 157 mqueue_lookup(char *name) 158 { 159 struct mqueue *mq; 160 161 KKASSERT(lockstatus(&mqlist_mtx, curthread)); 162 163 LIST_FOREACH(mq, &mqueue_head, mq_list) { 164 if (strncmp(mq->mq_name, name, MQ_NAMELEN) == 0) { 165 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 166 return mq; 167 } 168 } 169 170 return NULL; 171 } 172 173 /* 174 * mqueue_get: get the mqueue from the descriptor. 175 * => locks the message queue, if found. 176 * => holds a reference on the file descriptor. 177 */ 178 static int 179 mqueue_get(struct lwp *l, mqd_t mqd, file_t **fpr) 180 { 181 struct mqueue *mq; 182 file_t *fp; 183 184 fp = holdfp(curthread, (int)mqd, -1); /* XXX: Why -1 ? */ 185 if (__predict_false(fp == NULL)) 186 return EBADF; 187 188 if (__predict_false(fp->f_type != DTYPE_MQUEUE)) { 189 fdrop(fp); 190 return EBADF; 191 } 192 mq = fp->f_data; 193 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 194 195 *fpr = fp; 196 return 0; 197 } 198 199 /* 200 * mqueue_linear_insert: perform linear insert according to the message 201 * priority into the reserved queue (MQ_PQRESQ). Reserved queue is a 202 * sorted list used only when mq_prio_max is increased via sysctl. 203 */ 204 static inline void 205 mqueue_linear_insert(struct mqueue *mq, struct mq_msg *msg) 206 { 207 struct mq_msg *mit; 208 209 TAILQ_FOREACH(mit, &mq->mq_head[MQ_PQRESQ], msg_queue) { 210 if (msg->msg_prio > mit->msg_prio) 211 break; 212 } 213 if (mit == NULL) { 214 TAILQ_INSERT_TAIL(&mq->mq_head[MQ_PQRESQ], msg, msg_queue); 215 } else { 216 TAILQ_INSERT_BEFORE(mit, msg, msg_queue); 217 } 218 } 219 220 /* 221 * Compute number of ticks in the specified amount of time. 222 */ 223 static int 224 tstohz(const struct timespec *ts) 225 { 226 struct timeval tv; 227 228 /* 229 * usec has great enough resolution for hz, so convert to a 230 * timeval and use tvtohz() above. 231 */ 232 TIMESPEC_TO_TIMEVAL(&tv, ts); 233 return tvtohz_high(&tv); /* XXX Why _high() and not _low() ? */ 234 } 235 236 /* 237 * Converter from struct timespec to the ticks. 238 * Used by mq_timedreceive(), mq_timedsend(). 239 */ 240 int 241 abstimeout2timo(struct timespec *ts, int *timo) 242 { 243 struct timespec tsd; 244 int error; 245 246 error = itimespecfix(ts); 247 if (error) { 248 return error; 249 } 250 getnanotime(&tsd); 251 timespecsub(ts, &tsd, ts); 252 if (ts->tv_sec < 0 || (ts->tv_sec == 0 && ts->tv_nsec <= 0)) { 253 return ETIMEDOUT; 254 } 255 *timo = tstohz(ts); 256 KKASSERT(*timo != 0); 257 258 return 0; 259 } 260 261 static int 262 mq_stat_fop(file_t *fp, struct stat *st, struct ucred *cred) 263 { 264 struct mqueue *mq = fp->f_data; 265 266 (void)memset(st, 0, sizeof(*st)); 267 268 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 269 st->st_mode = mq->mq_mode; 270 st->st_uid = mq->mq_euid; 271 st->st_gid = mq->mq_egid; 272 st->st_atimespec = mq->mq_atime; 273 st->st_mtimespec = mq->mq_mtime; 274 /*st->st_ctimespec = st->st_birthtimespec = mq->mq_btime;*/ 275 st->st_uid = fp->f_cred->cr_uid; 276 st->st_gid = fp->f_cred->cr_svgid; 277 lockmgr(&mq->mq_mtx, LK_RELEASE); 278 279 return 0; 280 } 281 282 static struct filterops mqfiltops_read = 283 { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, mqfilter_read_detach, mqfilter_read }; 284 static struct filterops mqfiltops_write = 285 { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, mqfilter_write_detach, mqfilter_write }; 286 287 static int 288 mq_kqfilter_fop(struct file *fp, struct knote *kn) 289 { 290 struct mqueue *mq = fp->f_data; 291 struct klist *klist; 292 293 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 294 295 switch (kn->kn_filter) { 296 case EVFILT_READ: 297 kn->kn_fop = &mqfiltops_read; 298 kn->kn_hook = (caddr_t)mq; 299 klist = &mq->mq_rkq.ki_note; 300 break; 301 case EVFILT_WRITE: 302 kn->kn_fop = &mqfiltops_write; 303 kn->kn_hook = (caddr_t)mq; 304 klist = &mq->mq_wkq.ki_note; 305 break; 306 default: 307 lockmgr(&mq->mq_mtx, LK_RELEASE); 308 return (EOPNOTSUPP); 309 } 310 311 knote_insert(klist, kn); 312 lockmgr(&mq->mq_mtx, LK_RELEASE); 313 314 return (0); 315 } 316 317 static void 318 mqfilter_read_detach(struct knote *kn) 319 { 320 struct mqueue *mq = (struct mqueue *)kn->kn_hook; 321 322 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 323 struct klist *klist = &mq->mq_rkq.ki_note; 324 knote_remove(klist, kn); 325 lockmgr(&mq->mq_mtx, LK_RELEASE); 326 } 327 328 static void 329 mqfilter_write_detach(struct knote *kn) 330 { 331 struct mqueue *mq = (struct mqueue *)kn->kn_hook; 332 333 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 334 struct klist *klist = &mq->mq_wkq.ki_note; 335 knote_remove(klist, kn); 336 lockmgr(&mq->mq_mtx, LK_RELEASE); 337 } 338 339 static int 340 mqfilter_read(struct knote *kn, long hint) 341 { 342 struct mqueue *mq = (struct mqueue *)kn->kn_hook; 343 struct mq_attr *mqattr; 344 int ready = 0; 345 346 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 347 mqattr = &mq->mq_attrib; 348 /* Ready for receiving, if there are messages in the queue */ 349 if (mqattr->mq_curmsgs) 350 ready = 1; 351 lockmgr(&mq->mq_mtx, LK_RELEASE); 352 353 return (ready); 354 } 355 356 static int 357 mqfilter_write(struct knote *kn, long hint) 358 { 359 struct mqueue *mq = (struct mqueue *)kn->kn_hook; 360 struct mq_attr *mqattr; 361 int ready = 0; 362 363 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 364 mqattr = &mq->mq_attrib; 365 /* Ready for sending, if the message queue is not full */ 366 if (mqattr->mq_curmsgs < mqattr->mq_maxmsg) 367 ready = 1; 368 lockmgr(&mq->mq_mtx, LK_RELEASE); 369 370 return (ready); 371 } 372 373 static int 374 mq_close_fop(file_t *fp) 375 { 376 struct proc *p = curproc; 377 struct mqueue *mq = fp->f_data; 378 bool destroy; 379 380 lockmgr(&mqlist_mtx, LK_EXCLUSIVE); 381 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 382 383 /* Decrease the counters */ 384 p->p_mqueue_cnt--; 385 mq->mq_refcnt--; 386 387 /* Remove notification if registered for this process */ 388 if (mq->mq_notify_proc == p) 389 mq->mq_notify_proc = NULL; 390 391 /* 392 * If this is the last reference and mqueue is marked for unlink, 393 * remove and later destroy the message queue. 394 */ 395 if (mq->mq_refcnt == 0 && (mq->mq_attrib.mq_flags & MQ_UNLINK)) { 396 LIST_REMOVE(mq, mq_list); 397 destroy = true; 398 } else 399 destroy = false; 400 401 lockmgr(&mq->mq_mtx, LK_RELEASE); 402 lockmgr(&mqlist_mtx, LK_RELEASE); 403 404 if (destroy) 405 mqueue_destroy(mq); 406 407 return 0; 408 } 409 410 /* 411 * General mqueue system calls. 412 */ 413 414 int 415 sys_mq_open(struct sysmsg *sysmsg, const struct mq_open_args *uap) 416 { 417 /* { 418 syscallarg(const char *) name; 419 syscallarg(int) oflag; 420 syscallarg(mode_t) mode; 421 syscallarg(struct mq_attr) attr; 422 } */ 423 struct thread *td = curthread; 424 struct proc *p = td->td_proc; 425 struct filedesc *fdp = p->p_fd; 426 struct mqueue *mq, *mq_new = NULL; 427 file_t *fp; 428 char *name; 429 int mqd, error, oflag; 430 431 /* Check access mode flags */ 432 oflag = uap->oflag; 433 if ((oflag & O_ACCMODE) == (O_WRONLY | O_RDWR)) { 434 return EINVAL; 435 } 436 437 /* Get the name from the user-space */ 438 name = kmalloc(MQ_NAMELEN, M_MQBUF, M_WAITOK | M_ZERO | M_NULLOK); 439 if (name == NULL) 440 return (ENOMEM); 441 error = copyinstr(uap->name, name, MQ_NAMELEN - 1, NULL); 442 if (error) { 443 kfree(name, M_MQBUF); 444 return error; 445 } 446 447 if (oflag & O_CREAT) { 448 struct mq_attr attr; 449 u_int i; 450 451 /* Check the limit */ 452 if (p->p_mqueue_cnt == mq_open_max) { 453 kfree(name, M_MQBUF); 454 return EMFILE; 455 } 456 457 /* Empty name is invalid */ 458 if (name[0] == '\0') { 459 kfree(name, M_MQBUF); 460 return EINVAL; 461 } 462 463 /* Check for mqueue attributes */ 464 if (uap->attr) { 465 error = copyin(uap->attr, &attr, 466 sizeof(struct mq_attr)); 467 if (error) { 468 kfree(name, M_MQBUF); 469 return error; 470 } 471 if (attr.mq_maxmsg <= 0 || 472 attr.mq_maxmsg > mq_max_maxmsg || 473 attr.mq_msgsize <= 0 || 474 attr.mq_msgsize > mq_max_msgsize) { 475 kfree(name, M_MQBUF); 476 return EINVAL; 477 } 478 attr.mq_curmsgs = 0; 479 } else { 480 memset(&attr, 0, sizeof(struct mq_attr)); 481 attr.mq_maxmsg = mq_def_maxmsg; 482 attr.mq_msgsize = 483 MQ_DEF_MSGSIZE - sizeof(struct mq_msg); 484 } 485 486 /* 487 * Allocate new mqueue, initialize data structures, 488 * copy the name, attributes and set the flag. 489 */ 490 mq_new = kmalloc(sizeof(struct mqueue), M_MQBUF, 491 M_WAITOK | M_ZERO | M_NULLOK); 492 if (mq_new == NULL) { 493 kfree(name, M_MQBUF); 494 return (ENOMEM); 495 } 496 497 lockinit(&mq_new->mq_mtx, "mq_new->mq_mtx", 0, LK_CANRECURSE); 498 for (i = 0; i < (MQ_PQSIZE + 1); i++) { 499 TAILQ_INIT(&mq_new->mq_head[i]); 500 } 501 502 strlcpy(mq_new->mq_name, name, MQ_NAMELEN); 503 memcpy(&mq_new->mq_attrib, &attr, sizeof(struct mq_attr)); 504 505 /*CTASSERT((O_MASK & (MQ_UNLINK | MQ_RECEIVE)) == 0);*/ 506 /* mq_new->mq_attrib.mq_flags = (O_MASK & oflag); */ 507 mq_new->mq_attrib.mq_flags = oflag; 508 509 /* Store mode and effective UID with GID */ 510 mq_new->mq_mode = ((uap->mode & 511 ~p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT; 512 mq_new->mq_euid = td->td_ucred->cr_uid; 513 mq_new->mq_egid = td->td_ucred->cr_svgid; 514 } 515 516 /* Allocate file structure and descriptor */ 517 error = falloc(td->td_lwp, &fp, &mqd); 518 if (error) { 519 if (mq_new) 520 mqueue_destroy(mq_new); 521 kfree(name, M_MQBUF); 522 return error; 523 } 524 fp->f_type = DTYPE_MQUEUE; 525 fp->f_flag = FFLAGS(oflag) & (FREAD | FWRITE); 526 fp->f_ops = &mqops; 527 528 /* Look up for mqueue with such name */ 529 lockmgr(&mqlist_mtx, LK_EXCLUSIVE); 530 mq = mqueue_lookup(name); 531 if (mq) { 532 int acc_mode; 533 534 KKASSERT(lockstatus(&mq->mq_mtx, curthread)); 535 536 /* Check if mqueue is not marked as unlinking */ 537 if (mq->mq_attrib.mq_flags & MQ_UNLINK) { 538 error = EACCES; 539 goto exit; 540 } 541 /* Fail if O_EXCL is set, and mqueue already exists */ 542 if ((oflag & O_CREAT) && (oflag & O_EXCL)) { 543 error = EEXIST; 544 goto exit; 545 } 546 547 /* 548 * Check the permissions. Note the difference between 549 * VREAD/VWRITE and FREAD/FWRITE. 550 */ 551 acc_mode = 0; 552 if (fp->f_flag & FREAD) { 553 acc_mode |= VREAD; 554 } 555 if (fp->f_flag & FWRITE) { 556 acc_mode |= VWRITE; 557 } 558 if (vaccess(VNON, mq->mq_mode, mq->mq_euid, mq->mq_egid, 559 acc_mode, td->td_ucred)) { 560 561 error = EACCES; 562 goto exit; 563 } 564 } else { 565 /* Fail if mqueue neither exists, nor we create it */ 566 if ((oflag & O_CREAT) == 0) { 567 lockmgr(&mqlist_mtx, LK_RELEASE); 568 KKASSERT(mq_new == NULL); 569 fsetfd(fdp, NULL, mqd); 570 fp->f_ops = &badfileops; 571 fdrop(fp); 572 kfree(name, M_MQBUF); 573 return ENOENT; 574 } 575 576 /* Check the limit */ 577 if (p->p_mqueue_cnt == mq_open_max) { 578 error = EMFILE; 579 goto exit; 580 } 581 582 /* Insert the queue to the list */ 583 mq = mq_new; 584 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE); 585 LIST_INSERT_HEAD(&mqueue_head, mq, mq_list); 586 mq_new = NULL; 587 getnanotime(&mq->mq_btime); 588 mq->mq_atime = mq->mq_mtime = mq->mq_btime; 589 } 590 591 /* Increase the counters, and make descriptor ready */ 592 p->p_mqueue_cnt++; 593 mq->mq_refcnt++; 594 fp->f_data = mq; 595 exit: 596 lockmgr(&mq->mq_mtx, LK_RELEASE); 597 lockmgr(&mqlist_mtx, LK_RELEASE); 598 599 if (mq_new) 600 mqueue_destroy(mq_new); 601 if (error) { 602 fsetfd(fdp, NULL, mqd); 603 fp->f_ops = &badfileops; 604 } else { 605 fsetfd(fdp, fp, mqd); 606 sysmsg->sysmsg_result = mqd; 607 } 608 fdrop(fp); 609 kfree(name, M_MQBUF); 610 611 return error; 612 } 613 614 int 615 sys_mq_close(struct sysmsg *sysmsg, const struct mq_close_args *uap) 616 { 617 return sys_close(sysmsg, (const void *)uap); 618 } 619 620 /* 621 * Primary mq_receive1() function. 622 */ 623 int 624 mq_receive1(struct lwp *l, mqd_t mqdes, void *msg_ptr, size_t msg_len, 625 unsigned *msg_prio, struct timespec *ts, ssize_t *mlen) 626 { 627 file_t *fp = NULL; 628 struct mqueue *mq; 629 struct mq_msg *msg = NULL; 630 struct mq_attr *mqattr; 631 u_int idx; 632 int error; 633 634 /* Get the message queue */ 635 error = mqueue_get(l, mqdes, &fp); 636 if (error) { 637 return error; 638 } 639 mq = fp->f_data; 640 if ((fp->f_flag & FREAD) == 0) { 641 error = EBADF; 642 goto error; 643 } 644 getnanotime(&mq->mq_atime); 645 mqattr = &mq->mq_attrib; 646 647 /* Check the message size limits */ 648 if (msg_len < mqattr->mq_msgsize) { 649 error = EMSGSIZE; 650 goto error; 651 } 652 653 /* Check if queue is empty */ 654 while (mqattr->mq_curmsgs == 0) { 655 int t; 656 657 if (mqattr->mq_flags & O_NONBLOCK) { 658 error = EAGAIN; 659 goto error; 660 } 661 if (ts) { 662 error = abstimeout2timo(ts, &t); 663 if (error) 664 goto error; 665 } else 666 t = 0; 667 /* 668 * Block until someone sends the message. 669 * While doing this, notification should not be sent. 670 */ 671 mqattr->mq_flags |= MQ_RECEIVE; 672 error = lksleep(&mq->mq_send_cv, &mq->mq_mtx, PCATCH, "mqsend", t); 673 mqattr->mq_flags &= ~MQ_RECEIVE; 674 if (error || (mqattr->mq_flags & MQ_UNLINK)) { 675 error = (error == EWOULDBLOCK) ? ETIMEDOUT : EINTR; 676 goto error; 677 } 678 } 679 680 681 /* 682 * Find the highest priority message, and remove it from the queue. 683 * At first, reserved queue is checked, bitmap is next. 684 */ 685 msg = TAILQ_FIRST(&mq->mq_head[MQ_PQRESQ]); 686 if (__predict_true(msg == NULL)) { 687 idx = ffs(mq->mq_bitmap); 688 msg = TAILQ_FIRST(&mq->mq_head[idx]); 689 KKASSERT(msg != NULL); 690 } else { 691 idx = MQ_PQRESQ; 692 } 693 TAILQ_REMOVE(&mq->mq_head[idx], msg, msg_queue); 694 695 /* Unmark the bit, if last message. */ 696 if (__predict_true(idx) && TAILQ_EMPTY(&mq->mq_head[idx])) { 697 KKASSERT((MQ_PQSIZE - idx) == msg->msg_prio); 698 mq->mq_bitmap &= ~(1 << --idx); 699 } 700 701 /* Decrement the counter and signal waiter, if any */ 702 mqattr->mq_curmsgs--; 703 wakeup_one(&mq->mq_recv_cv); 704 705 /* Ready for sending now */ 706 KNOTE(&mq->mq_wkq.ki_note, 0); 707 error: 708 lockmgr(&mq->mq_mtx, LK_RELEASE); 709 fdrop(fp); 710 if (error) 711 return error; 712 713 /* 714 * Copy the data to the user-space. 715 * Note: According to POSIX, no message should be removed from the 716 * queue in case of fail - this would be violated. 717 */ 718 *mlen = msg->msg_len; 719 error = copyout(msg->msg_ptr, msg_ptr, msg->msg_len); 720 if (error == 0 && msg_prio) 721 error = copyout(&msg->msg_prio, msg_prio, sizeof(unsigned)); 722 mqueue_freemsg(msg, sizeof(struct mq_msg) + msg->msg_len); 723 724 return error; 725 } 726 727 int 728 sys_mq_receive(struct sysmsg *sysmsg, const struct mq_receive_args *uap) 729 { 730 /* { 731 syscallarg(mqd_t) mqdes; 732 syscallarg(char *) msg_ptr; 733 syscallarg(size_t) msg_len; 734 syscallarg(unsigned *) msg_prio; 735 } */ 736 ssize_t mlen; 737 int error; 738 739 error = mq_receive1(curthread->td_lwp, uap->mqdes, uap->msg_ptr, 740 uap->msg_len, uap->msg_prio, 0, &mlen); 741 if (error == 0) 742 sysmsg->sysmsg_result = mlen; 743 744 return error; 745 } 746 747 int 748 sys_mq_timedreceive(struct sysmsg *sysmsg, const struct mq_timedreceive_args *uap) 749 { 750 /* { 751 syscallarg(mqd_t) mqdes; 752 syscallarg(char *) msg_ptr; 753 syscallarg(size_t) msg_len; 754 syscallarg(unsigned *) msg_prio; 755 syscallarg(const struct timespec *) abs_timeout; 756 } */ 757 int error; 758 ssize_t mlen; 759 struct timespec ts, *tsp; 760 761 /* Get and convert time value */ 762 if (uap->abs_timeout) { 763 error = copyin(uap->abs_timeout, &ts, sizeof(ts)); 764 if (error) 765 return error; 766 tsp = &ts; 767 } else { 768 tsp = NULL; 769 } 770 771 error = mq_receive1(curthread->td_lwp, uap->mqdes, uap->msg_ptr, 772 uap->msg_len, uap->msg_prio, tsp, &mlen); 773 if (error == 0) 774 sysmsg->sysmsg_result = mlen; 775 776 return error; 777 } 778 779 /* 780 * Primary mq_send1() function. 781 */ 782 int 783 mq_send1(struct lwp *l, mqd_t mqdes, const char *msg_ptr, size_t msg_len, 784 unsigned msg_prio, struct timespec *ts) 785 { 786 file_t *fp = NULL; 787 struct mqueue *mq; 788 struct mq_msg *msg; 789 struct mq_attr *mqattr; 790 struct proc *notify = NULL; 791 /*ksiginfo_t ksi;*/ 792 size_t size; 793 int error; 794 795 /* Check the priority range */ 796 if (msg_prio >= mq_prio_max) 797 return EINVAL; 798 799 /* Allocate a new message */ 800 size = sizeof(struct mq_msg) + msg_len; 801 if (size > mq_max_msgsize) 802 return EMSGSIZE; 803 804 msg = kmalloc(size, M_MQBUF, M_WAITOK | M_NULLOK); 805 if (msg == NULL) 806 return (ENOMEM); 807 808 809 /* Get the data from user-space */ 810 error = copyin(msg_ptr, msg->msg_ptr, msg_len); 811 if (error) { 812 mqueue_freemsg(msg, size); 813 return error; 814 } 815 msg->msg_len = msg_len; 816 msg->msg_prio = msg_prio; 817 818 /* Get the mqueue */ 819 error = mqueue_get(l, mqdes, &fp); 820 if (error) { 821 mqueue_freemsg(msg, size); 822 return error; 823 } 824 mq = fp->f_data; 825 if ((fp->f_flag & FWRITE) == 0) { 826 error = EBADF; 827 goto error; 828 } 829 getnanotime(&mq->mq_mtime); 830 mqattr = &mq->mq_attrib; 831 832 /* Check the message size limit */ 833 if (msg_len <= 0 || msg_len > mqattr->mq_msgsize) { 834 error = EMSGSIZE; 835 goto error; 836 } 837 838 /* Check if queue is full */ 839 while (mqattr->mq_curmsgs >= mqattr->mq_maxmsg) { 840 int t; 841 842 if (mqattr->mq_flags & O_NONBLOCK) { 843 error = EAGAIN; 844 goto error; 845 } 846 if (ts) { 847 error = abstimeout2timo(ts, &t); 848 if (error) 849 goto error; 850 } else 851 t = 0; 852 /* Block until queue becomes available */ 853 error = lksleep(&mq->mq_recv_cv, &mq->mq_mtx, PCATCH, "mqrecv", t); 854 if (error || (mqattr->mq_flags & MQ_UNLINK)) { 855 error = (error == EWOULDBLOCK) ? ETIMEDOUT : error; 856 goto error; 857 } 858 } 859 KKASSERT(mq->mq_attrib.mq_curmsgs < mq->mq_attrib.mq_maxmsg); 860 861 /* 862 * Insert message into the queue, according to the priority. 863 * Note the difference between index and priority. 864 */ 865 if (__predict_true(msg_prio < MQ_PQSIZE)) { 866 u_int idx = MQ_PQSIZE - msg_prio; 867 868 KKASSERT(idx != MQ_PQRESQ); 869 TAILQ_INSERT_TAIL(&mq->mq_head[idx], msg, msg_queue); 870 mq->mq_bitmap |= (1 << --idx); 871 } else { 872 mqueue_linear_insert(mq, msg); 873 } 874 875 /* Check for the notify */ 876 if (mqattr->mq_curmsgs == 0 && mq->mq_notify_proc && 877 (mqattr->mq_flags & MQ_RECEIVE) == 0 && 878 mq->mq_sig_notify.sigev_notify == SIGEV_SIGNAL) { 879 /* Initialize the signal */ 880 /*KSI_INIT(&ksi);*/ 881 /*ksi.ksi_signo = mq->mq_sig_notify.sigev_signo;*/ 882 /*ksi.ksi_code = SI_MESGQ;*/ 883 /*ksi.ksi_value = mq->mq_sig_notify.sigev_value;*/ 884 /* Unregister the process */ 885 notify = mq->mq_notify_proc; 886 mq->mq_notify_proc = NULL; 887 } 888 889 /* Increment the counter and signal waiter, if any */ 890 mqattr->mq_curmsgs++; 891 wakeup_one(&mq->mq_send_cv); 892 893 /* Ready for receiving now */ 894 KNOTE(&mq->mq_rkq.ki_note, 0); 895 error: 896 if (error) { 897 lockmgr(&mq->mq_mtx, LK_RELEASE); 898 fdrop(fp); 899 mqueue_freemsg(msg, size); 900 } else if (notify) { 901 PHOLD(notify); 902 lockmgr(&mq->mq_mtx, LK_RELEASE); 903 fdrop(fp); 904 /* Send the notify, if needed */ 905 /*kpsignal(notify, &ksi, NULL);*/ 906 ksignal(notify, mq->mq_sig_notify.sigev_signo); 907 PRELE(notify); 908 } else { 909 lockmgr(&mq->mq_mtx, LK_RELEASE); 910 fdrop(fp); 911 } 912 return error; 913 } 914 915 int 916 sys_mq_send(struct sysmsg *sysmsg, const struct mq_send_args *uap) 917 { 918 /* { 919 syscallarg(mqd_t) mqdes; 920 syscallarg(const char *) msg_ptr; 921 syscallarg(size_t) msg_len; 922 syscallarg(unsigned) msg_prio; 923 } */ 924 925 return mq_send1(curthread->td_lwp, uap->mqdes, uap->msg_ptr, 926 uap->msg_len, uap->msg_prio, 0); 927 } 928 929 int 930 sys_mq_timedsend(struct sysmsg *sysmsg, const struct mq_timedsend_args *uap) 931 { 932 /* { 933 syscallarg(mqd_t) mqdes; 934 syscallarg(const char *) msg_ptr; 935 syscallarg(size_t) msg_len; 936 syscallarg(unsigned) msg_prio; 937 syscallarg(const struct timespec *) abs_timeout; 938 } */ 939 struct timespec ts, *tsp; 940 int error; 941 942 /* Get and convert time value */ 943 if (uap->abs_timeout) { 944 error = copyin(uap->abs_timeout, &ts, sizeof(ts)); 945 if (error) 946 return error; 947 tsp = &ts; 948 } else { 949 tsp = NULL; 950 } 951 952 return mq_send1(curthread->td_lwp, uap->mqdes, uap->msg_ptr, 953 uap->msg_len, uap->msg_prio, tsp); 954 } 955 956 int 957 sys_mq_notify(struct sysmsg *sysmsg, const struct mq_notify_args *uap) 958 { 959 /* { 960 syscallarg(mqd_t) mqdes; 961 syscallarg(const struct sigevent *) notification; 962 } */ 963 file_t *fp = NULL; 964 struct mqueue *mq; 965 struct sigevent sig; 966 int error; 967 968 if (uap->notification) { 969 /* Get the signal from user-space */ 970 error = copyin(uap->notification, &sig, 971 sizeof(struct sigevent)); 972 if (error) 973 return error; 974 if (sig.sigev_notify == SIGEV_SIGNAL && 975 (sig.sigev_signo <= 0 || sig.sigev_signo >= NSIG)) 976 return EINVAL; 977 } 978 979 error = mqueue_get(curthread->td_lwp, uap->mqdes, &fp); 980 if (error) 981 return error; 982 mq = fp->f_data; 983 984 if (uap->notification) { 985 /* Register notification: set the signal and target process */ 986 if (mq->mq_notify_proc == NULL) { 987 memcpy(&mq->mq_sig_notify, &sig, 988 sizeof(struct sigevent)); 989 mq->mq_notify_proc = curproc; 990 } else { 991 /* Fail if someone else already registered */ 992 error = EBUSY; 993 } 994 } else { 995 /* Unregister the notification */ 996 mq->mq_notify_proc = NULL; 997 } 998 lockmgr(&mq->mq_mtx, LK_RELEASE); 999 fdrop(fp); 1000 1001 return error; 1002 } 1003 1004 int 1005 sys_mq_getattr(struct sysmsg *sysmsg, const struct mq_getattr_args *uap) 1006 { 1007 /* { 1008 syscallarg(mqd_t) mqdes; 1009 syscallarg(struct mq_attr *) mqstat; 1010 } */ 1011 file_t *fp = NULL; 1012 struct mqueue *mq; 1013 struct mq_attr attr; 1014 int error; 1015 1016 /* Get the message queue */ 1017 error = mqueue_get(curthread->td_lwp, uap->mqdes, &fp); 1018 if (error) 1019 return error; 1020 mq = fp->f_data; 1021 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1022 lockmgr(&mq->mq_mtx, LK_RELEASE); 1023 fdrop(fp); 1024 1025 return copyout(&attr, uap->mqstat, sizeof(struct mq_attr)); 1026 } 1027 1028 int 1029 sys_mq_setattr(struct sysmsg *sysmsg, const struct mq_setattr_args *uap) 1030 { 1031 /* { 1032 syscallarg(mqd_t) mqdes; 1033 syscallarg(const struct mq_attr *) mqstat; 1034 syscallarg(struct mq_attr *) omqstat; 1035 } */ 1036 file_t *fp = NULL; 1037 struct mqueue *mq; 1038 struct mq_attr attr; 1039 int error, nonblock; 1040 1041 error = copyin(uap->mqstat, &attr, sizeof(struct mq_attr)); 1042 if (error) 1043 return error; 1044 nonblock = (attr.mq_flags & O_NONBLOCK); 1045 1046 /* Get the message queue */ 1047 error = mqueue_get(curthread->td_lwp, uap->mqdes, &fp); 1048 if (error) 1049 return error; 1050 mq = fp->f_data; 1051 1052 /* Copy the old attributes, if needed */ 1053 if (uap->omqstat) { 1054 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr)); 1055 } 1056 1057 /* Ignore everything, except O_NONBLOCK */ 1058 if (nonblock) 1059 mq->mq_attrib.mq_flags |= O_NONBLOCK; 1060 else 1061 mq->mq_attrib.mq_flags &= ~O_NONBLOCK; 1062 1063 lockmgr(&mq->mq_mtx, LK_RELEASE); 1064 fdrop(fp); 1065 1066 /* 1067 * Copy the data to the user-space. 1068 * Note: According to POSIX, the new attributes should not be set in 1069 * case of fail - this would be violated. 1070 */ 1071 if (uap->omqstat) 1072 error = copyout(&attr, uap->omqstat, sizeof(struct mq_attr)); 1073 1074 return error; 1075 } 1076 1077 int 1078 sys_mq_unlink(struct sysmsg *sysmsg, const struct mq_unlink_args *uap) 1079 { 1080 /* { 1081 syscallarg(const char *) name; 1082 } */ 1083 struct thread *td = curthread; 1084 struct mqueue *mq; 1085 char *name; 1086 int error, refcnt = 0; 1087 1088 /* Get the name from the user-space */ 1089 name = kmalloc(MQ_NAMELEN, M_MQBUF, M_WAITOK | M_ZERO | M_NULLOK); 1090 if (name == NULL) 1091 return (ENOMEM); 1092 error = copyinstr(uap->name, name, MQ_NAMELEN - 1, NULL); 1093 if (error) { 1094 kfree(name, M_MQBUF); 1095 return error; 1096 } 1097 1098 /* Lookup for this file */ 1099 lockmgr(&mqlist_mtx, LK_EXCLUSIVE); 1100 mq = mqueue_lookup(name); 1101 if (mq == NULL) { 1102 error = ENOENT; 1103 goto error; 1104 } 1105 1106 /* Check the permissions */ 1107 if (td->td_ucred->cr_uid != mq->mq_euid && 1108 caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT) != 0) 1109 { 1110 lockmgr(&mq->mq_mtx, LK_RELEASE); 1111 error = EACCES; 1112 goto error; 1113 } 1114 1115 /* Mark message queue as unlinking, before leaving the window */ 1116 mq->mq_attrib.mq_flags |= MQ_UNLINK; 1117 1118 /* Wake up all waiters, if there are such */ 1119 wakeup(&mq->mq_send_cv); 1120 wakeup(&mq->mq_recv_cv); 1121 1122 KNOTE(&mq->mq_rkq.ki_note, 0); 1123 KNOTE(&mq->mq_wkq.ki_note, 0); 1124 1125 refcnt = mq->mq_refcnt; 1126 if (refcnt == 0) 1127 LIST_REMOVE(mq, mq_list); 1128 1129 lockmgr(&mq->mq_mtx, LK_RELEASE); 1130 error: 1131 lockmgr(&mqlist_mtx, LK_RELEASE); 1132 1133 /* 1134 * If there are no references - destroy the message 1135 * queue, otherwise, the last mq_close() will do that. 1136 */ 1137 if (error == 0 && refcnt == 0) 1138 mqueue_destroy(mq); 1139 1140 kfree(name, M_MQBUF); 1141 return error; 1142 } 1143 1144 /* 1145 * SysCtl. 1146 */ 1147 SYSCTL_NODE(_kern, OID_AUTO, mqueue, 1148 CTLFLAG_RW, 0, "Message queue options"); 1149 1150 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_open_max, 1151 CTLFLAG_RW, &mq_open_max, 0, 1152 "Maximal number of message queue descriptors per process"); 1153 1154 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_prio_max, 1155 CTLFLAG_RW, &mq_prio_max, 0, 1156 "Maximal priority of the message"); 1157 1158 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_max_msgsize, 1159 CTLFLAG_RW, &mq_max_msgsize, 0, 1160 "Maximal allowed size of the message"); 1161 1162 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_def_maxmsg, 1163 CTLFLAG_RW, &mq_def_maxmsg, 0, 1164 "Default maximal message count"); 1165 1166 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_max_maxmsg, 1167 CTLFLAG_RW, &mq_max_maxmsg, 0, 1168 "Maximal allowed message count"); 1169 1170 SYSINIT(sys_mqueue_init, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, mqueue_sysinit, NULL); 1171