1 /* $OpenBSD: uipc_usrreq.c,v 1.102 2016/08/26 07:12:30 guenther Exp $ */ 2 /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/filedesc.h> 39 #include <sys/domain.h> 40 #include <sys/protosw.h> 41 #include <sys/queue.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/unpcb.h> 45 #include <sys/un.h> 46 #include <sys/namei.h> 47 #include <sys/vnode.h> 48 #include <sys/file.h> 49 #include <sys/stat.h> 50 #include <sys/mbuf.h> 51 #include <sys/task.h> 52 #include <sys/pledge.h> 53 54 void uipc_setaddr(const struct unpcb *, struct mbuf *); 55 56 /* list of all UNIX domain sockets, for unp_gc() */ 57 LIST_HEAD(unp_head, unpcb) unp_head = LIST_HEAD_INITIALIZER(unp_head); 58 59 /* 60 * Stack of sets of files that were passed over a socket but were 61 * not received and need to be closed. 62 */ 63 struct unp_deferral { 64 SLIST_ENTRY(unp_deferral) ud_link; 65 int ud_n; 66 /* followed by ud_n struct file * pointers */ 67 struct file *ud_fp[]; 68 }; 69 70 /* list of sets of files that were sent over sockets that are now closed */ 71 SLIST_HEAD(,unp_deferral) unp_deferred = SLIST_HEAD_INITIALIZER(unp_deferred); 72 73 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL); 74 75 76 /* 77 * Unix communications domain. 78 * 79 * TODO: 80 * RDM 81 * rethink name space problems 82 * need a proper out-of-band 83 */ 84 struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX }; 85 ino_t unp_ino; /* prototype for fake inode numbers */ 86 87 void 88 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam) 89 { 90 if (unp != NULL && unp->unp_addr != NULL) { 91 nam->m_len = unp->unp_addr->m_len; 92 memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t), 93 nam->m_len); 94 } else { 95 nam->m_len = sizeof(sun_noname); 96 memcpy(mtod(nam, struct sockaddr *), &sun_noname, 97 nam->m_len); 98 } 99 } 100 101 int 102 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 103 struct mbuf *control, struct proc *p) 104 { 105 struct unpcb *unp = sotounpcb(so); 106 struct socket *so2; 107 int error = 0; 108 109 if (req == PRU_CONTROL) 110 return (EOPNOTSUPP); 111 if (req != PRU_SEND && control && control->m_len) { 112 error = EOPNOTSUPP; 113 goto release; 114 } 115 if (unp == NULL && req != PRU_ATTACH) { 116 error = EINVAL; 117 goto release; 118 } 119 switch (req) { 120 121 case PRU_ATTACH: 122 if (unp) { 123 error = EISCONN; 124 break; 125 } 126 error = unp_attach(so); 127 break; 128 129 case PRU_DETACH: 130 unp_detach(unp); 131 break; 132 133 case PRU_BIND: 134 error = unp_bind(unp, nam, p); 135 break; 136 137 case PRU_LISTEN: 138 if (unp->unp_vnode == NULL) 139 error = EINVAL; 140 break; 141 142 case PRU_CONNECT: 143 error = unp_connect(so, nam, p); 144 break; 145 146 case PRU_CONNECT2: 147 error = unp_connect2(so, (struct socket *)nam); 148 break; 149 150 case PRU_DISCONNECT: 151 unp_disconnect(unp); 152 break; 153 154 case PRU_ACCEPT: 155 /* 156 * Pass back name of connected socket, 157 * if it was bound and we are still connected 158 * (our peer may have closed already!). 159 */ 160 uipc_setaddr(unp->unp_conn, nam); 161 break; 162 163 case PRU_SHUTDOWN: 164 socantsendmore(so); 165 unp_shutdown(unp); 166 break; 167 168 case PRU_RCVD: 169 switch (so->so_type) { 170 171 case SOCK_DGRAM: 172 panic("uipc 1"); 173 /*NOTREACHED*/ 174 175 case SOCK_STREAM: 176 case SOCK_SEQPACKET: 177 #define rcv (&so->so_rcv) 178 #define snd (&so2->so_snd) 179 if (unp->unp_conn == NULL) 180 break; 181 so2 = unp->unp_conn->unp_socket; 182 /* 183 * Adjust backpressure on sender 184 * and wakeup any waiting to write. 185 */ 186 snd->sb_mbcnt = rcv->sb_mbcnt; 187 snd->sb_cc = rcv->sb_cc; 188 sowwakeup(so2); 189 #undef snd 190 #undef rcv 191 break; 192 193 default: 194 panic("uipc 2"); 195 } 196 break; 197 198 case PRU_SEND: 199 if (control && (error = unp_internalize(control, p))) 200 break; 201 switch (so->so_type) { 202 203 case SOCK_DGRAM: { 204 struct sockaddr *from; 205 206 if (nam) { 207 if (unp->unp_conn) { 208 error = EISCONN; 209 break; 210 } 211 error = unp_connect(so, nam, p); 212 if (error) 213 break; 214 } else { 215 if (unp->unp_conn == NULL) { 216 error = ENOTCONN; 217 break; 218 } 219 } 220 so2 = unp->unp_conn->unp_socket; 221 if (unp->unp_addr) 222 from = mtod(unp->unp_addr, struct sockaddr *); 223 else 224 from = &sun_noname; 225 if (sbappendaddr(&so2->so_rcv, from, m, control)) { 226 sorwakeup(so2); 227 m = NULL; 228 control = NULL; 229 } else 230 error = ENOBUFS; 231 if (nam) 232 unp_disconnect(unp); 233 break; 234 } 235 236 case SOCK_STREAM: 237 case SOCK_SEQPACKET: 238 #define rcv (&so2->so_rcv) 239 #define snd (&so->so_snd) 240 if (so->so_state & SS_CANTSENDMORE) { 241 error = EPIPE; 242 break; 243 } 244 if (unp->unp_conn == NULL) { 245 error = ENOTCONN; 246 break; 247 } 248 so2 = unp->unp_conn->unp_socket; 249 /* 250 * Send to paired receive port, and then raise 251 * send buffer counts to maintain backpressure. 252 * Wake up readers. 253 */ 254 if (control) { 255 if (sbappendcontrol(rcv, m, control)) 256 control = NULL; 257 else { 258 error = ENOBUFS; 259 break; 260 } 261 } else if (so->so_type == SOCK_SEQPACKET) 262 sbappendrecord(rcv, m); 263 else 264 sbappend(rcv, m); 265 snd->sb_mbcnt = rcv->sb_mbcnt; 266 snd->sb_cc = rcv->sb_cc; 267 sorwakeup(so2); 268 m = NULL; 269 #undef snd 270 #undef rcv 271 break; 272 273 default: 274 panic("uipc 4"); 275 } 276 /* we need to undo unp_internalize in case of errors */ 277 if (control && error) 278 unp_dispose(control); 279 break; 280 281 case PRU_ABORT: 282 unp_drop(unp, ECONNABORTED); 283 break; 284 285 case PRU_SENSE: { 286 struct stat *sb = (struct stat *)m; 287 288 sb->st_blksize = so->so_snd.sb_hiwat; 289 sb->st_dev = NODEV; 290 if (unp->unp_ino == 0) 291 unp->unp_ino = unp_ino++; 292 sb->st_atim.tv_sec = 293 sb->st_mtim.tv_sec = 294 sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec; 295 sb->st_atim.tv_nsec = 296 sb->st_mtim.tv_nsec = 297 sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec; 298 sb->st_ino = unp->unp_ino; 299 return (0); 300 } 301 302 case PRU_RCVOOB: 303 return (EOPNOTSUPP); 304 305 case PRU_SENDOOB: 306 error = EOPNOTSUPP; 307 break; 308 309 case PRU_SOCKADDR: 310 uipc_setaddr(unp, nam); 311 break; 312 313 case PRU_PEERADDR: 314 uipc_setaddr(unp->unp_conn, nam); 315 break; 316 317 case PRU_SLOWTIMO: 318 break; 319 320 default: 321 panic("piusrreq"); 322 } 323 release: 324 if (control) 325 m_freem(control); 326 if (m) 327 m_freem(m); 328 return (error); 329 } 330 331 /* 332 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 333 * for stream sockets, although the total for sender and receiver is 334 * actually only PIPSIZ. 335 * Datagram sockets really use the sendspace as the maximum datagram size, 336 * and don't really want to reserve the sendspace. Their recvspace should 337 * be large enough for at least one max-size datagram plus address. 338 */ 339 #define PIPSIZ 4096 340 u_long unpst_sendspace = PIPSIZ; 341 u_long unpst_recvspace = PIPSIZ; 342 u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 343 u_long unpdg_recvspace = 4*1024; 344 345 int unp_rights; /* file descriptors in flight */ 346 347 int 348 unp_attach(struct socket *so) 349 { 350 struct unpcb *unp; 351 int error; 352 353 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 354 switch (so->so_type) { 355 356 case SOCK_STREAM: 357 case SOCK_SEQPACKET: 358 error = soreserve(so, unpst_sendspace, unpst_recvspace); 359 break; 360 361 case SOCK_DGRAM: 362 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 363 break; 364 365 default: 366 panic("unp_attach"); 367 } 368 if (error) 369 return (error); 370 } 371 unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT|M_ZERO); 372 if (unp == NULL) 373 return (ENOBUFS); 374 unp->unp_socket = so; 375 so->so_pcb = unp; 376 getnanotime(&unp->unp_ctime); 377 LIST_INSERT_HEAD(&unp_head, unp, unp_link); 378 return (0); 379 } 380 381 void 382 unp_detach(struct unpcb *unp) 383 { 384 struct vnode *vp; 385 386 LIST_REMOVE(unp, unp_link); 387 if (unp->unp_vnode) { 388 unp->unp_vnode->v_socket = NULL; 389 vp = unp->unp_vnode; 390 unp->unp_vnode = NULL; 391 vrele(vp); 392 } 393 if (unp->unp_conn) 394 unp_disconnect(unp); 395 while (!SLIST_EMPTY(&unp->unp_refs)) 396 unp_drop(SLIST_FIRST(&unp->unp_refs), ECONNRESET); 397 soisdisconnected(unp->unp_socket); 398 unp->unp_socket->so_pcb = NULL; 399 m_freem(unp->unp_addr); 400 free(unp, M_PCB, sizeof *unp); 401 if (unp_rights) 402 task_add(systq, &unp_gc_task); 403 } 404 405 int 406 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p) 407 { 408 struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); 409 struct mbuf *nam2; 410 struct vnode *vp; 411 struct vattr vattr; 412 int error; 413 struct nameidata nd; 414 size_t pathlen; 415 416 if (unp->unp_vnode != NULL) 417 return (EINVAL); 418 419 if (soun->sun_len > sizeof(struct sockaddr_un) || 420 soun->sun_len < offsetof(struct sockaddr_un, sun_path)) 421 return (EINVAL); 422 if (soun->sun_family != AF_UNIX) 423 return (EAFNOSUPPORT); 424 425 pathlen = strnlen(soun->sun_path, soun->sun_len - 426 offsetof(struct sockaddr_un, sun_path)); 427 if (pathlen == sizeof(soun->sun_path)) 428 return (EINVAL); 429 430 nam2 = m_getclr(M_WAITOK, MT_SONAME); 431 nam2->m_len = sizeof(struct sockaddr_un); 432 memcpy(mtod(nam2, struct sockaddr_un *), soun, 433 offsetof(struct sockaddr_un, sun_path) + pathlen); 434 /* No need to NUL terminate: m_getclr() returns zero'd mbufs. */ 435 436 soun = mtod(nam2, struct sockaddr_un *); 437 438 /* Fixup sun_len to keep it in sync with m_len. */ 439 soun->sun_len = nam2->m_len; 440 441 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, 442 soun->sun_path, p); 443 nd.ni_pledge = PLEDGE_UNIX; 444 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 445 if ((error = namei(&nd)) != 0) { 446 m_freem(nam2); 447 return (error); 448 } 449 vp = nd.ni_vp; 450 if (vp != NULL) { 451 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 452 if (nd.ni_dvp == vp) 453 vrele(nd.ni_dvp); 454 else 455 vput(nd.ni_dvp); 456 vrele(vp); 457 m_freem(nam2); 458 return (EADDRINUSE); 459 } 460 VATTR_NULL(&vattr); 461 vattr.va_type = VSOCK; 462 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; 463 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 464 if (error) { 465 m_freem(nam2); 466 return (error); 467 } 468 unp->unp_addr = nam2; 469 vp = nd.ni_vp; 470 vp->v_socket = unp->unp_socket; 471 unp->unp_vnode = vp; 472 unp->unp_connid.uid = p->p_ucred->cr_uid; 473 unp->unp_connid.gid = p->p_ucred->cr_gid; 474 unp->unp_connid.pid = p->p_p->ps_pid; 475 unp->unp_flags |= UNP_FEIDSBIND; 476 VOP_UNLOCK(vp, p); 477 return (0); 478 } 479 480 int 481 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p) 482 { 483 struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); 484 struct vnode *vp; 485 struct socket *so2, *so3; 486 struct unpcb *unp, *unp2, *unp3; 487 int error; 488 struct nameidata nd; 489 490 if (soun->sun_family != AF_UNIX) 491 return (EAFNOSUPPORT); 492 493 if (nam->m_len < sizeof(struct sockaddr_un)) 494 *(mtod(nam, caddr_t) + nam->m_len) = 0; 495 else if (nam->m_len > sizeof(struct sockaddr_un)) 496 return (EINVAL); 497 else if (memchr(soun->sun_path, '\0', sizeof(soun->sun_path)) == NULL) 498 return (EINVAL); 499 500 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p); 501 nd.ni_pledge = PLEDGE_UNIX; 502 if ((error = namei(&nd)) != 0) 503 return (error); 504 vp = nd.ni_vp; 505 if (vp->v_type != VSOCK) { 506 error = ENOTSOCK; 507 goto bad; 508 } 509 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 510 goto bad; 511 so2 = vp->v_socket; 512 if (so2 == NULL) { 513 error = ECONNREFUSED; 514 goto bad; 515 } 516 if (so->so_type != so2->so_type) { 517 error = EPROTOTYPE; 518 goto bad; 519 } 520 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 521 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 522 (so3 = sonewconn(so2, 0)) == 0) { 523 error = ECONNREFUSED; 524 goto bad; 525 } 526 unp = sotounpcb(so); 527 unp2 = sotounpcb(so2); 528 unp3 = sotounpcb(so3); 529 if (unp2->unp_addr) 530 unp3->unp_addr = 531 m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT); 532 unp3->unp_connid.uid = p->p_ucred->cr_uid; 533 unp3->unp_connid.gid = p->p_ucred->cr_gid; 534 unp3->unp_connid.pid = p->p_p->ps_pid; 535 unp3->unp_flags |= UNP_FEIDS; 536 so2 = so3; 537 if (unp2->unp_flags & UNP_FEIDSBIND) { 538 unp->unp_connid = unp2->unp_connid; 539 unp->unp_flags |= UNP_FEIDS; 540 } 541 } 542 error = unp_connect2(so, so2); 543 bad: 544 vput(vp); 545 return (error); 546 } 547 548 int 549 unp_connect2(struct socket *so, struct socket *so2) 550 { 551 struct unpcb *unp = sotounpcb(so); 552 struct unpcb *unp2; 553 554 if (so2->so_type != so->so_type) 555 return (EPROTOTYPE); 556 unp2 = sotounpcb(so2); 557 unp->unp_conn = unp2; 558 switch (so->so_type) { 559 560 case SOCK_DGRAM: 561 SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref); 562 soisconnected(so); 563 break; 564 565 case SOCK_STREAM: 566 case SOCK_SEQPACKET: 567 unp2->unp_conn = unp; 568 soisconnected(so); 569 soisconnected(so2); 570 break; 571 572 default: 573 panic("unp_connect2"); 574 } 575 return (0); 576 } 577 578 void 579 unp_disconnect(struct unpcb *unp) 580 { 581 struct unpcb *unp2 = unp->unp_conn; 582 583 if (unp2 == NULL) 584 return; 585 unp->unp_conn = NULL; 586 switch (unp->unp_socket->so_type) { 587 588 case SOCK_DGRAM: 589 SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref); 590 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 591 break; 592 593 case SOCK_STREAM: 594 case SOCK_SEQPACKET: 595 unp->unp_socket->so_snd.sb_mbcnt = 0; 596 unp->unp_socket->so_snd.sb_cc = 0; 597 soisdisconnected(unp->unp_socket); 598 unp2->unp_conn = NULL; 599 unp2->unp_socket->so_snd.sb_mbcnt = 0; 600 unp2->unp_socket->so_snd.sb_cc = 0; 601 soisdisconnected(unp2->unp_socket); 602 break; 603 } 604 } 605 606 void 607 unp_shutdown(struct unpcb *unp) 608 { 609 struct socket *so; 610 611 switch (unp->unp_socket->so_type) { 612 case SOCK_STREAM: 613 case SOCK_SEQPACKET: 614 if (unp->unp_conn && (so = unp->unp_conn->unp_socket)) 615 socantrcvmore(so); 616 break; 617 default: 618 break; 619 } 620 } 621 622 void 623 unp_drop(struct unpcb *unp, int errno) 624 { 625 struct socket *so = unp->unp_socket; 626 627 so->so_error = errno; 628 unp_disconnect(unp); 629 if (so->so_head) { 630 so->so_pcb = NULL; 631 sofree(so); 632 m_freem(unp->unp_addr); 633 free(unp, M_PCB, sizeof *unp); 634 } 635 } 636 637 #ifdef notdef 638 unp_drain(void) 639 { 640 641 } 642 #endif 643 644 extern struct domain unixdomain; 645 646 static struct unpcb * 647 fptounp(struct file *fp) 648 { 649 struct socket *so; 650 651 if (fp->f_type != DTYPE_SOCKET) 652 return (NULL); 653 if ((so = fp->f_data) == NULL) 654 return (NULL); 655 if (so->so_proto->pr_domain != &unixdomain) 656 return (NULL); 657 return (sotounpcb(so)); 658 } 659 660 int 661 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags) 662 { 663 struct proc *p = curproc; /* XXX */ 664 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 665 int i, *fdp = NULL; 666 struct file **rp; 667 struct file *fp; 668 int nfds, error = 0; 669 670 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / 671 sizeof(struct file *); 672 if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) 673 controllen = 0; 674 else 675 controllen -= CMSG_ALIGN(sizeof(struct cmsghdr)); 676 if (nfds > controllen / sizeof(int)) { 677 error = EMSGSIZE; 678 goto restart; 679 } 680 681 /* Make sure the recipient should be able to see the descriptors.. */ 682 rp = (struct file **)CMSG_DATA(cm); 683 for (i = 0; i < nfds; i++) { 684 fp = *rp++; 685 error = pledge_recvfd(p, fp); 686 if (error) 687 break; 688 689 /* 690 * No to block devices. If passing a directory, 691 * make sure that it is underneath the root. 692 */ 693 if (p->p_fd->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) { 694 struct vnode *vp = (struct vnode *)fp->f_data; 695 696 if (vp->v_type == VBLK || 697 (vp->v_type == VDIR && 698 !vn_isunder(vp, p->p_fd->fd_rdir, p))) { 699 error = EPERM; 700 break; 701 } 702 } 703 } 704 705 fdp = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK); 706 707 restart: 708 fdplock(p->p_fd); 709 if (error != 0) { 710 if (nfds > 0) { 711 rp = ((struct file **)CMSG_DATA(cm)); 712 unp_discard(rp, nfds); 713 } 714 goto out; 715 } 716 717 /* 718 * First loop -- allocate file descriptor table slots for the 719 * new descriptors. 720 */ 721 rp = ((struct file **)CMSG_DATA(cm)); 722 for (i = 0; i < nfds; i++) { 723 if ((error = fdalloc(p, 0, &fdp[i])) != 0) { 724 /* 725 * Back out what we've done so far. 726 */ 727 for (--i; i >= 0; i--) 728 fdremove(p->p_fd, fdp[i]); 729 730 if (error == ENOSPC) { 731 fdexpand(p); 732 error = 0; 733 } else { 734 /* 735 * This is the error that has historically 736 * been returned, and some callers may 737 * expect it. 738 */ 739 error = EMSGSIZE; 740 } 741 fdpunlock(p->p_fd); 742 goto restart; 743 } 744 745 /* 746 * Make the slot reference the descriptor so that 747 * fdalloc() works properly.. We finalize it all 748 * in the loop below. 749 */ 750 p->p_fd->fd_ofiles[fdp[i]] = *rp++; 751 752 if (flags & MSG_CMSG_CLOEXEC) 753 p->p_fd->fd_ofileflags[fdp[i]] |= UF_EXCLOSE; 754 } 755 756 /* 757 * Now that adding them has succeeded, update all of the 758 * descriptor passing state. 759 */ 760 rp = (struct file **)CMSG_DATA(cm); 761 for (i = 0; i < nfds; i++) { 762 struct unpcb *unp; 763 764 fp = *rp++; 765 if ((unp = fptounp(fp)) != NULL) 766 unp->unp_msgcount--; 767 unp_rights--; 768 } 769 770 /* 771 * Copy temporary array to message and adjust length, in case of 772 * transition from large struct file pointers to ints. 773 */ 774 memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int)); 775 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); 776 rights->m_len = CMSG_LEN(nfds * sizeof(int)); 777 out: 778 fdpunlock(p->p_fd); 779 if (fdp) 780 free(fdp, M_TEMP, nfds * sizeof(int)); 781 return (error); 782 } 783 784 int 785 unp_internalize(struct mbuf *control, struct proc *p) 786 { 787 struct filedesc *fdp = p->p_fd; 788 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 789 struct file **rp, *fp; 790 struct unpcb *unp; 791 int i, error; 792 int nfds, *ip, fd, neededspace; 793 794 /* 795 * Check for two potential msg_controllen values because 796 * IETF stuck their nose in a place it does not belong. 797 */ 798 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 799 !(cm->cmsg_len == control->m_len || 800 control->m_len == CMSG_ALIGN(cm->cmsg_len))) 801 return (EINVAL); 802 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int); 803 804 if (unp_rights + nfds > maxfiles / 10) 805 return (EMFILE); 806 807 /* Make sure we have room for the struct file pointers */ 808 morespace: 809 neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) - 810 control->m_len; 811 if (neededspace > M_TRAILINGSPACE(control)) { 812 char *tmp; 813 /* if we already have a cluster, the message is just too big */ 814 if (control->m_flags & M_EXT) 815 return (E2BIG); 816 817 /* copy cmsg data temporarily out of the mbuf */ 818 tmp = malloc(control->m_len, M_TEMP, M_WAITOK); 819 memcpy(tmp, mtod(control, caddr_t), control->m_len); 820 821 /* allocate a cluster and try again */ 822 MCLGET(control, M_WAIT); 823 if ((control->m_flags & M_EXT) == 0) { 824 free(tmp, M_TEMP, control->m_len); 825 return (ENOBUFS); /* allocation failed */ 826 } 827 828 /* copy the data back into the cluster */ 829 cm = mtod(control, struct cmsghdr *); 830 memcpy(cm, tmp, control->m_len); 831 free(tmp, M_TEMP, control->m_len); 832 goto morespace; 833 } 834 835 /* adjust message & mbuf to note amount of space actually used. */ 836 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *)); 837 control->m_len = CMSG_SPACE(nfds * sizeof(struct file *)); 838 839 ip = ((int *)CMSG_DATA(cm)) + nfds - 1; 840 rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1; 841 for (i = 0; i < nfds; i++) { 842 memcpy(&fd, ip, sizeof fd); 843 ip--; 844 if ((fp = fd_getfile(fdp, fd)) == NULL) { 845 error = EBADF; 846 goto fail; 847 } 848 if (fp->f_count == LONG_MAX-2) { 849 error = EDEADLK; 850 goto fail; 851 } 852 error = pledge_sendfd(p, fp); 853 if (error) 854 goto fail; 855 856 /* kqueue descriptors cannot be copied */ 857 if (fp->f_type == DTYPE_KQUEUE) { 858 error = EINVAL; 859 goto fail; 860 } 861 memcpy(rp, &fp, sizeof fp); 862 rp--; 863 fp->f_count++; 864 if ((unp = fptounp(fp)) != NULL) { 865 unp->unp_file = fp; 866 unp->unp_msgcount++; 867 } 868 unp_rights++; 869 } 870 return (0); 871 fail: 872 /* Back out what we just did. */ 873 for ( ; i > 0; i--) { 874 rp++; 875 memcpy(&fp, rp, sizeof(fp)); 876 fp->f_count--; 877 if ((unp = fptounp(fp)) != NULL) 878 unp->unp_msgcount--; 879 unp_rights--; 880 } 881 882 return (error); 883 } 884 885 int unp_defer, unp_gcing; 886 887 void 888 unp_gc(void *arg __unused) 889 { 890 struct unp_deferral *defer; 891 struct file *fp; 892 struct socket *so; 893 struct unpcb *unp; 894 int nunref, i; 895 896 if (unp_gcing) 897 return; 898 unp_gcing = 1; 899 900 /* close any fds on the deferred list */ 901 while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) { 902 SLIST_REMOVE_HEAD(&unp_deferred, ud_link); 903 for (i = 0; i < defer->ud_n; i++) { 904 fp = defer->ud_fp[i]; 905 if (fp == NULL) 906 continue; 907 FREF(fp); 908 if ((unp = fptounp(fp)) != NULL) 909 unp->unp_msgcount--; 910 unp_rights--; 911 (void) closef(fp, NULL); 912 } 913 free(defer, M_TEMP, sizeof(*defer) + sizeof(fp) * defer->ud_n); 914 } 915 916 unp_defer = 0; 917 LIST_FOREACH(unp, &unp_head, unp_link) 918 unp->unp_flags &= ~(UNP_GCMARK | UNP_GCDEFER | UNP_GCDEAD); 919 do { 920 nunref = 0; 921 LIST_FOREACH(unp, &unp_head, unp_link) { 922 if (unp->unp_flags & UNP_GCDEFER) { 923 /* 924 * This socket is referenced by another 925 * socket which is known to be live, 926 * so it's certainly live. 927 */ 928 unp->unp_flags &= ~UNP_GCDEFER; 929 unp_defer--; 930 } else if (unp->unp_flags & UNP_GCMARK) { 931 /* marked as live in previous pass */ 932 continue; 933 } else if ((fp = unp->unp_file) == NULL) { 934 /* not being passed, so can't be in loop */ 935 } else if (fp->f_count == 0) { 936 /* 937 * Already being closed, let normal close 938 * path take its course 939 */ 940 } else { 941 /* 942 * Unreferenced by other sockets so far, 943 * so if all the references (f_count) are 944 * from passing (unp_msgcount) then this 945 * socket is prospectively dead 946 */ 947 if (fp->f_count == unp->unp_msgcount) { 948 nunref++; 949 unp->unp_flags |= UNP_GCDEAD; 950 continue; 951 } 952 } 953 954 /* 955 * This is the first time we've seen this socket on 956 * the mark pass and known it has a live reference, 957 * so mark it, then scan its receive buffer for 958 * sockets and note them as deferred (== referenced, 959 * but not yet marked). 960 */ 961 unp->unp_flags |= UNP_GCMARK; 962 963 so = unp->unp_socket; 964 #ifdef notdef 965 if (so->so_rcv.sb_flags & SB_LOCK) { 966 /* 967 * This is problematical; it's not clear 968 * we need to wait for the sockbuf to be 969 * unlocked (on a uniprocessor, at least), 970 * and it's also not clear what to do 971 * if sbwait returns an error due to receipt 972 * of a signal. If sbwait does return 973 * an error, we'll go into an infinite 974 * loop. Delete all of this for now. 975 */ 976 (void) sbwait(&so->so_rcv); 977 goto restart; 978 } 979 #endif 980 unp_scan(so->so_rcv.sb_mb, unp_mark); 981 } 982 } while (unp_defer); 983 984 /* 985 * If there are any unreferenced sockets, then for each dispose 986 * of files in its receive buffer and then close it. 987 */ 988 if (nunref) { 989 LIST_FOREACH(unp, &unp_head, unp_link) { 990 if (unp->unp_flags & UNP_GCDEAD) 991 unp_scan(unp->unp_socket->so_rcv.sb_mb, 992 unp_discard); 993 } 994 } 995 unp_gcing = 0; 996 } 997 998 void 999 unp_dispose(struct mbuf *m) 1000 { 1001 1002 if (m) 1003 unp_scan(m, unp_discard); 1004 } 1005 1006 void 1007 unp_scan(struct mbuf *m0, void (*op)(struct file **, int)) 1008 { 1009 struct mbuf *m; 1010 struct file **rp; 1011 struct cmsghdr *cm; 1012 int qfds; 1013 1014 while (m0) { 1015 for (m = m0; m; m = m->m_next) { 1016 if (m->m_type == MT_CONTROL && 1017 m->m_len >= sizeof(*cm)) { 1018 cm = mtod(m, struct cmsghdr *); 1019 if (cm->cmsg_level != SOL_SOCKET || 1020 cm->cmsg_type != SCM_RIGHTS) 1021 continue; 1022 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm)) 1023 / sizeof(struct file *); 1024 if (qfds > 0) { 1025 rp = (struct file **)CMSG_DATA(cm); 1026 op(rp, qfds); 1027 } 1028 break; /* XXX, but saves time */ 1029 } 1030 } 1031 m0 = m0->m_nextpkt; 1032 } 1033 } 1034 1035 void 1036 unp_mark(struct file **rp, int nfds) 1037 { 1038 struct unpcb *unp; 1039 int i; 1040 1041 for (i = 0; i < nfds; i++) { 1042 if (rp[i] == NULL) 1043 continue; 1044 1045 unp = fptounp(rp[i]); 1046 if (unp == NULL) 1047 continue; 1048 1049 if (unp->unp_flags & (UNP_GCMARK|UNP_GCDEFER)) 1050 continue; 1051 1052 unp_defer++; 1053 unp->unp_flags |= UNP_GCDEFER; 1054 unp->unp_flags &= ~UNP_GCDEAD; 1055 } 1056 } 1057 1058 void 1059 unp_discard(struct file **rp, int nfds) 1060 { 1061 struct unp_deferral *defer; 1062 1063 /* copy the file pointers to a deferral structure */ 1064 defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK); 1065 defer->ud_n = nfds; 1066 memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds); 1067 memset(rp, 0, sizeof(*rp) * nfds); 1068 SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link); 1069 1070 task_add(systq, &unp_gc_task); 1071 } 1072