1 /* $OpenBSD: uipc_usrreq.c,v 1.78 2014/11/03 03:08:00 deraadt Exp $ */ 2 /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/filedesc.h> 39 #include <sys/domain.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/unpcb.h> 44 #include <sys/un.h> 45 #include <sys/namei.h> 46 #include <sys/vnode.h> 47 #include <sys/file.h> 48 #include <sys/stat.h> 49 #include <sys/mbuf.h> 50 51 void uipc_setaddr(const struct unpcb *, struct mbuf *); 52 53 /* 54 * Unix communications domain. 55 * 56 * TODO: 57 * RDM 58 * rethink name space problems 59 * need a proper out-of-band 60 */ 61 struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX }; 62 ino_t unp_ino; /* prototype for fake inode numbers */ 63 64 void 65 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam) 66 { 67 if (unp != NULL && unp->unp_addr != NULL) { 68 nam->m_len = unp->unp_addr->m_len; 69 bcopy(mtod(unp->unp_addr, caddr_t), mtod(nam, caddr_t), 70 nam->m_len); 71 } else { 72 nam->m_len = sizeof(sun_noname); 73 bcopy(&sun_noname, mtod(nam, struct sockaddr *), 74 nam->m_len); 75 } 76 } 77 78 /*ARGSUSED*/ 79 int 80 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 81 struct mbuf *control, struct proc *p) 82 { 83 struct unpcb *unp = sotounpcb(so); 84 struct socket *so2; 85 int error = 0; 86 87 if (req == PRU_CONTROL) 88 return (EOPNOTSUPP); 89 if (req != PRU_SEND && control && control->m_len) { 90 error = EOPNOTSUPP; 91 goto release; 92 } 93 if (unp == NULL && req != PRU_ATTACH) { 94 error = EINVAL; 95 goto release; 96 } 97 switch (req) { 98 99 case PRU_ATTACH: 100 if (unp) { 101 error = EISCONN; 102 break; 103 } 104 error = unp_attach(so); 105 break; 106 107 case PRU_DETACH: 108 unp_detach(unp); 109 break; 110 111 case PRU_BIND: 112 error = unp_bind(unp, nam, p); 113 break; 114 115 case PRU_LISTEN: 116 if (unp->unp_vnode == NULL) 117 error = EINVAL; 118 break; 119 120 case PRU_CONNECT: 121 error = unp_connect(so, nam, p); 122 break; 123 124 case PRU_CONNECT2: 125 error = unp_connect2(so, (struct socket *)nam); 126 break; 127 128 case PRU_DISCONNECT: 129 unp_disconnect(unp); 130 break; 131 132 case PRU_ACCEPT: 133 /* 134 * Pass back name of connected socket, 135 * if it was bound and we are still connected 136 * (our peer may have closed already!). 137 */ 138 uipc_setaddr(unp->unp_conn, nam); 139 break; 140 141 case PRU_SHUTDOWN: 142 socantsendmore(so); 143 unp_shutdown(unp); 144 break; 145 146 case PRU_RCVD: 147 switch (so->so_type) { 148 149 case SOCK_DGRAM: 150 panic("uipc 1"); 151 /*NOTREACHED*/ 152 153 case SOCK_STREAM: 154 case SOCK_SEQPACKET: 155 #define rcv (&so->so_rcv) 156 #define snd (&so2->so_snd) 157 if (unp->unp_conn == NULL) 158 break; 159 so2 = unp->unp_conn->unp_socket; 160 /* 161 * Adjust backpressure on sender 162 * and wakeup any waiting to write. 163 */ 164 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; 165 unp->unp_mbcnt = rcv->sb_mbcnt; 166 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc; 167 unp->unp_cc = rcv->sb_cc; 168 sowwakeup(so2); 169 #undef snd 170 #undef rcv 171 break; 172 173 default: 174 panic("uipc 2"); 175 } 176 break; 177 178 case PRU_SEND: 179 if (control && (error = unp_internalize(control, p))) 180 break; 181 switch (so->so_type) { 182 183 case SOCK_DGRAM: { 184 struct sockaddr *from; 185 186 if (nam) { 187 if (unp->unp_conn) { 188 error = EISCONN; 189 break; 190 } 191 error = unp_connect(so, nam, p); 192 if (error) 193 break; 194 } else { 195 if (unp->unp_conn == NULL) { 196 error = ENOTCONN; 197 break; 198 } 199 } 200 so2 = unp->unp_conn->unp_socket; 201 if (unp->unp_addr) 202 from = mtod(unp->unp_addr, struct sockaddr *); 203 else 204 from = &sun_noname; 205 if (sbappendaddr(&so2->so_rcv, from, m, control)) { 206 sorwakeup(so2); 207 m = NULL; 208 control = NULL; 209 } else 210 error = ENOBUFS; 211 if (nam) 212 unp_disconnect(unp); 213 break; 214 } 215 216 case SOCK_STREAM: 217 case SOCK_SEQPACKET: 218 #define rcv (&so2->so_rcv) 219 #define snd (&so->so_snd) 220 if (so->so_state & SS_CANTSENDMORE) { 221 error = EPIPE; 222 break; 223 } 224 if (unp->unp_conn == NULL) { 225 error = ENOTCONN; 226 break; 227 } 228 so2 = unp->unp_conn->unp_socket; 229 /* 230 * Send to paired receive port, and then reduce 231 * send buffer hiwater marks to maintain backpressure. 232 * Wake up readers. 233 */ 234 if (control) { 235 if (sbappendcontrol(rcv, m, control)) 236 control = NULL; 237 } else if (so->so_type == SOCK_SEQPACKET) 238 sbappendrecord(rcv, m); 239 else 240 sbappend(rcv, m); 241 snd->sb_mbmax -= 242 rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; 243 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; 244 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; 245 unp->unp_conn->unp_cc = rcv->sb_cc; 246 sorwakeup(so2); 247 m = NULL; 248 #undef snd 249 #undef rcv 250 break; 251 252 default: 253 panic("uipc 4"); 254 } 255 /* we need to undo unp_internalize in case of errors */ 256 if (control && error) 257 unp_dispose(control); 258 break; 259 260 case PRU_ABORT: 261 unp_drop(unp, ECONNABORTED); 262 break; 263 264 case PRU_SENSE: { 265 struct stat *sb = (struct stat *)m; 266 267 sb->st_blksize = so->so_snd.sb_hiwat; 268 switch (so->so_type) { 269 case SOCK_STREAM: 270 case SOCK_SEQPACKET: 271 if (unp->unp_conn != NULL) { 272 so2 = unp->unp_conn->unp_socket; 273 sb->st_blksize += so2->so_rcv.sb_cc; 274 } 275 break; 276 default: 277 break; 278 } 279 sb->st_dev = NODEV; 280 if (unp->unp_ino == 0) 281 unp->unp_ino = unp_ino++; 282 sb->st_atim.tv_sec = 283 sb->st_mtim.tv_sec = 284 sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec; 285 sb->st_atim.tv_nsec = 286 sb->st_mtim.tv_nsec = 287 sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec; 288 sb->st_ino = unp->unp_ino; 289 return (0); 290 } 291 292 case PRU_RCVOOB: 293 return (EOPNOTSUPP); 294 295 case PRU_SENDOOB: 296 error = EOPNOTSUPP; 297 break; 298 299 case PRU_SOCKADDR: 300 uipc_setaddr(unp, nam); 301 break; 302 303 case PRU_PEERADDR: 304 uipc_setaddr(unp->unp_conn, nam); 305 break; 306 307 case PRU_SLOWTIMO: 308 break; 309 310 default: 311 panic("piusrreq"); 312 } 313 release: 314 if (control) 315 m_freem(control); 316 if (m) 317 m_freem(m); 318 return (error); 319 } 320 321 /* 322 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 323 * for stream sockets, although the total for sender and receiver is 324 * actually only PIPSIZ. 325 * Datagram sockets really use the sendspace as the maximum datagram size, 326 * and don't really want to reserve the sendspace. Their recvspace should 327 * be large enough for at least one max-size datagram plus address. 328 */ 329 #define PIPSIZ 4096 330 u_long unpst_sendspace = PIPSIZ; 331 u_long unpst_recvspace = PIPSIZ; 332 u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 333 u_long unpdg_recvspace = 4*1024; 334 335 int unp_rights; /* file descriptors in flight */ 336 337 int 338 unp_attach(struct socket *so) 339 { 340 struct unpcb *unp; 341 int error; 342 343 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 344 switch (so->so_type) { 345 346 case SOCK_STREAM: 347 case SOCK_SEQPACKET: 348 error = soreserve(so, unpst_sendspace, unpst_recvspace); 349 break; 350 351 case SOCK_DGRAM: 352 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 353 break; 354 355 default: 356 panic("unp_attach"); 357 } 358 if (error) 359 return (error); 360 } 361 unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT|M_ZERO); 362 if (unp == NULL) 363 return (ENOBUFS); 364 unp->unp_socket = so; 365 so->so_pcb = unp; 366 getnanotime(&unp->unp_ctime); 367 return (0); 368 } 369 370 void 371 unp_detach(struct unpcb *unp) 372 { 373 struct vnode *vp; 374 375 if (unp->unp_vnode) { 376 unp->unp_vnode->v_socket = NULL; 377 vp = unp->unp_vnode; 378 unp->unp_vnode = NULL; 379 vrele(vp); 380 } 381 if (unp->unp_conn) 382 unp_disconnect(unp); 383 while (unp->unp_refs) 384 unp_drop(unp->unp_refs, ECONNRESET); 385 soisdisconnected(unp->unp_socket); 386 unp->unp_socket->so_pcb = NULL; 387 m_freem(unp->unp_addr); 388 if (unp_rights) { 389 /* 390 * Normally the receive buffer is flushed later, 391 * in sofree, but if our receive buffer holds references 392 * to descriptors that are now garbage, we will dispose 393 * of those descriptor references after the garbage collector 394 * gets them (resulting in a "panic: closef: count < 0"). 395 */ 396 sorflush(unp->unp_socket); 397 free(unp, M_PCB, 0); 398 unp_gc(); 399 } else 400 free(unp, M_PCB, 0); 401 } 402 403 int 404 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p) 405 { 406 struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); 407 struct mbuf *nam2; 408 struct vnode *vp; 409 struct vattr vattr; 410 int error; 411 struct nameidata nd; 412 size_t pathlen; 413 414 if (unp->unp_vnode != NULL) 415 return (EINVAL); 416 417 if (soun->sun_len > sizeof(struct sockaddr_un) || 418 soun->sun_len < offsetof(struct sockaddr_un, sun_path)) 419 return (EINVAL); 420 if (soun->sun_family != AF_UNIX) 421 return (EAFNOSUPPORT); 422 423 pathlen = strnlen(soun->sun_path, soun->sun_len - 424 offsetof(struct sockaddr_un, sun_path)); 425 if (pathlen == sizeof(soun->sun_path)) 426 return (EINVAL); 427 428 nam2 = m_getclr(M_WAITOK, MT_SONAME); 429 nam2->m_len = sizeof(struct sockaddr_un); 430 memcpy(mtod(nam2, struct sockaddr_un *), soun, 431 offsetof(struct sockaddr_un, sun_path) + pathlen); 432 /* No need to NUL terminate: m_getclr() returns zero'd mbufs. */ 433 434 soun = mtod(nam2, struct sockaddr_un *); 435 436 /* Fixup sun_len to keep it in sync with m_len. */ 437 soun->sun_len = nam2->m_len; 438 439 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, 440 soun->sun_path, p); 441 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 442 if ((error = namei(&nd)) != 0) { 443 m_freem(nam2); 444 return (error); 445 } 446 vp = nd.ni_vp; 447 if (vp != NULL) { 448 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 449 if (nd.ni_dvp == vp) 450 vrele(nd.ni_dvp); 451 else 452 vput(nd.ni_dvp); 453 vrele(vp); 454 m_freem(nam2); 455 return (EADDRINUSE); 456 } 457 VATTR_NULL(&vattr); 458 vattr.va_type = VSOCK; 459 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; 460 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 461 if (error) { 462 m_freem(nam2); 463 return (error); 464 } 465 unp->unp_addr = nam2; 466 vp = nd.ni_vp; 467 vp->v_socket = unp->unp_socket; 468 unp->unp_vnode = vp; 469 unp->unp_connid.uid = p->p_ucred->cr_uid; 470 unp->unp_connid.gid = p->p_ucred->cr_gid; 471 unp->unp_connid.pid = p->p_p->ps_pid; 472 unp->unp_flags |= UNP_FEIDSBIND; 473 VOP_UNLOCK(vp, 0, p); 474 return (0); 475 } 476 477 int 478 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p) 479 { 480 struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); 481 struct vnode *vp; 482 struct socket *so2, *so3; 483 struct unpcb *unp, *unp2, *unp3; 484 int error; 485 struct nameidata nd; 486 487 if (soun->sun_family != AF_UNIX) 488 return (EAFNOSUPPORT); 489 490 if (nam->m_len < sizeof(struct sockaddr_un)) 491 *(mtod(nam, caddr_t) + nam->m_len) = 0; 492 else if (nam->m_len > sizeof(struct sockaddr_un)) 493 return (EINVAL); 494 else if (memchr(soun->sun_path, '\0', sizeof(soun->sun_path)) == NULL) 495 return (EINVAL); 496 497 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p); 498 if ((error = namei(&nd)) != 0) 499 return (error); 500 vp = nd.ni_vp; 501 if (vp->v_type != VSOCK) { 502 error = ENOTSOCK; 503 goto bad; 504 } 505 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 506 goto bad; 507 so2 = vp->v_socket; 508 if (so2 == NULL) { 509 error = ECONNREFUSED; 510 goto bad; 511 } 512 if (so->so_type != so2->so_type) { 513 error = EPROTOTYPE; 514 goto bad; 515 } 516 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 517 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 518 (so3 = sonewconn(so2, 0)) == 0) { 519 error = ECONNREFUSED; 520 goto bad; 521 } 522 unp = sotounpcb(so); 523 unp2 = sotounpcb(so2); 524 unp3 = sotounpcb(so3); 525 if (unp2->unp_addr) 526 unp3->unp_addr = 527 m_copy(unp2->unp_addr, 0, (int)M_COPYALL); 528 unp3->unp_connid.uid = p->p_ucred->cr_uid; 529 unp3->unp_connid.gid = p->p_ucred->cr_gid; 530 unp3->unp_connid.pid = p->p_p->ps_pid; 531 unp3->unp_flags |= UNP_FEIDS; 532 so2 = so3; 533 if (unp2->unp_flags & UNP_FEIDSBIND) { 534 unp->unp_connid = unp2->unp_connid; 535 unp->unp_flags |= UNP_FEIDS; 536 } 537 } 538 error = unp_connect2(so, so2); 539 bad: 540 vput(vp); 541 return (error); 542 } 543 544 int 545 unp_connect2(struct socket *so, struct socket *so2) 546 { 547 struct unpcb *unp = sotounpcb(so); 548 struct unpcb *unp2; 549 550 if (so2->so_type != so->so_type) 551 return (EPROTOTYPE); 552 unp2 = sotounpcb(so2); 553 unp->unp_conn = unp2; 554 switch (so->so_type) { 555 556 case SOCK_DGRAM: 557 unp->unp_nextref = unp2->unp_refs; 558 unp2->unp_refs = unp; 559 soisconnected(so); 560 break; 561 562 case SOCK_STREAM: 563 case SOCK_SEQPACKET: 564 unp2->unp_conn = unp; 565 soisconnected(so); 566 soisconnected(so2); 567 break; 568 569 default: 570 panic("unp_connect2"); 571 } 572 return (0); 573 } 574 575 void 576 unp_disconnect(struct unpcb *unp) 577 { 578 struct unpcb *unp2 = unp->unp_conn; 579 580 if (unp2 == NULL) 581 return; 582 unp->unp_conn = NULL; 583 switch (unp->unp_socket->so_type) { 584 585 case SOCK_DGRAM: 586 if (unp2->unp_refs == unp) 587 unp2->unp_refs = unp->unp_nextref; 588 else { 589 unp2 = unp2->unp_refs; 590 for (;;) { 591 if (unp2 == NULL) 592 panic("unp_disconnect"); 593 if (unp2->unp_nextref == unp) 594 break; 595 unp2 = unp2->unp_nextref; 596 } 597 unp2->unp_nextref = unp->unp_nextref; 598 } 599 unp->unp_nextref = NULL; 600 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 601 break; 602 603 case SOCK_STREAM: 604 case SOCK_SEQPACKET: 605 soisdisconnected(unp->unp_socket); 606 unp2->unp_conn = NULL; 607 soisdisconnected(unp2->unp_socket); 608 break; 609 } 610 } 611 612 void 613 unp_shutdown(struct unpcb *unp) 614 { 615 struct socket *so; 616 617 switch (unp->unp_socket->so_type) { 618 case SOCK_STREAM: 619 case SOCK_SEQPACKET: 620 if (unp->unp_conn && (so = unp->unp_conn->unp_socket)) 621 socantrcvmore(so); 622 break; 623 default: 624 break; 625 } 626 } 627 628 void 629 unp_drop(struct unpcb *unp, int errno) 630 { 631 struct socket *so = unp->unp_socket; 632 633 so->so_error = errno; 634 unp_disconnect(unp); 635 if (so->so_head) { 636 so->so_pcb = NULL; 637 sofree(so); 638 m_freem(unp->unp_addr); 639 free(unp, M_PCB, sizeof(*unp)); 640 } 641 } 642 643 #ifdef notdef 644 unp_drain(void) 645 { 646 647 } 648 #endif 649 650 int 651 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags) 652 { 653 struct proc *p = curproc; /* XXX */ 654 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 655 int i, *fdp = NULL; 656 struct file **rp; 657 struct file *fp; 658 int nfds, error = 0; 659 660 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / 661 sizeof(struct file *); 662 if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) 663 controllen = 0; 664 else 665 controllen -= CMSG_ALIGN(sizeof(struct cmsghdr)); 666 if (nfds > controllen / sizeof(int)) { 667 error = EMSGSIZE; 668 goto restart; 669 } 670 671 rp = (struct file **)CMSG_DATA(cm); 672 673 fdp = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK); 674 675 /* Make sure the recipient should be able to see the descriptors.. */ 676 if (p->p_fd->fd_rdir != NULL) { 677 rp = (struct file **)CMSG_DATA(cm); 678 for (i = 0; i < nfds; i++) { 679 fp = *rp++; 680 /* 681 * No to block devices. If passing a directory, 682 * make sure that it is underneath the root. 683 */ 684 if (fp->f_type == DTYPE_VNODE) { 685 struct vnode *vp = (struct vnode *)fp->f_data; 686 687 if (vp->v_type == VBLK || 688 (vp->v_type == VDIR && 689 !vn_isunder(vp, p->p_fd->fd_rdir, p))) { 690 error = EPERM; 691 break; 692 } 693 } 694 } 695 } 696 697 restart: 698 fdplock(p->p_fd); 699 if (error != 0) { 700 rp = ((struct file **)CMSG_DATA(cm)); 701 for (i = 0; i < nfds; i++) { 702 fp = *rp; 703 /* 704 * zero the pointer before calling unp_discard, 705 * since it may end up in unp_gc().. 706 */ 707 *rp++ = NULL; 708 unp_discard(fp); 709 } 710 goto out; 711 } 712 713 /* 714 * First loop -- allocate file descriptor table slots for the 715 * new descriptors. 716 */ 717 rp = ((struct file **)CMSG_DATA(cm)); 718 for (i = 0; i < nfds; i++) { 719 if ((error = fdalloc(p, 0, &fdp[i])) != 0) { 720 /* 721 * Back out what we've done so far. 722 */ 723 for (--i; i >= 0; i--) 724 fdremove(p->p_fd, fdp[i]); 725 726 if (error == ENOSPC) { 727 fdexpand(p); 728 error = 0; 729 } else { 730 /* 731 * This is the error that has historically 732 * been returned, and some callers may 733 * expect it. 734 */ 735 error = EMSGSIZE; 736 } 737 fdpunlock(p->p_fd); 738 goto restart; 739 } 740 741 /* 742 * Make the slot reference the descriptor so that 743 * fdalloc() works properly.. We finalize it all 744 * in the loop below. 745 */ 746 p->p_fd->fd_ofiles[fdp[i]] = *rp++; 747 748 if (flags & MSG_CMSG_CLOEXEC) 749 p->p_fd->fd_ofileflags[fdp[i]] |= UF_EXCLOSE; 750 } 751 752 /* 753 * Now that adding them has succeeded, update all of the 754 * descriptor passing state. 755 */ 756 rp = (struct file **)CMSG_DATA(cm); 757 for (i = 0; i < nfds; i++) { 758 fp = *rp++; 759 fp->f_msgcount--; 760 unp_rights--; 761 } 762 763 /* 764 * Copy temporary array to message and adjust length, in case of 765 * transition from large struct file pointers to ints. 766 */ 767 memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int)); 768 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); 769 rights->m_len = CMSG_LEN(nfds * sizeof(int)); 770 out: 771 fdpunlock(p->p_fd); 772 if (fdp) 773 free(fdp, M_TEMP, 0); 774 return (error); 775 } 776 777 int 778 unp_internalize(struct mbuf *control, struct proc *p) 779 { 780 struct filedesc *fdp = p->p_fd; 781 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 782 struct file **rp, *fp; 783 int i, error; 784 int nfds, *ip, fd, neededspace; 785 786 /* 787 * Check for two potential msg_controllen values because 788 * IETF stuck their nose in a place it does not belong. 789 */ 790 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 791 !(cm->cmsg_len == control->m_len || 792 control->m_len == CMSG_ALIGN(cm->cmsg_len))) 793 return (EINVAL); 794 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int); 795 796 if (unp_rights + nfds > maxfiles / 10) 797 return (EMFILE); 798 799 /* Make sure we have room for the struct file pointers */ 800 morespace: 801 neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) - 802 control->m_len; 803 if (neededspace > M_TRAILINGSPACE(control)) { 804 char *tmp; 805 /* if we already have a cluster, the message is just too big */ 806 if (control->m_flags & M_EXT) 807 return (E2BIG); 808 809 /* copy cmsg data temporarily out of the mbuf */ 810 tmp = malloc(control->m_len, M_TEMP, M_WAITOK); 811 memcpy(tmp, mtod(control, caddr_t), control->m_len); 812 813 /* allocate a cluster and try again */ 814 MCLGET(control, M_WAIT); 815 if ((control->m_flags & M_EXT) == 0) { 816 free(tmp, M_TEMP, control->m_len); 817 return (ENOBUFS); /* allocation failed */ 818 } 819 820 /* copy the data back into the cluster */ 821 cm = mtod(control, struct cmsghdr *); 822 memcpy(cm, tmp, control->m_len); 823 free(tmp, M_TEMP, control->m_len); 824 goto morespace; 825 } 826 827 /* adjust message & mbuf to note amount of space actually used. */ 828 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *)); 829 control->m_len = CMSG_SPACE(nfds * sizeof(struct file *)); 830 831 ip = ((int *)CMSG_DATA(cm)) + nfds - 1; 832 rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1; 833 for (i = 0; i < nfds; i++) { 834 bcopy(ip, &fd, sizeof fd); 835 ip--; 836 if ((fp = fd_getfile(fdp, fd)) == NULL) { 837 error = EBADF; 838 goto fail; 839 } 840 if (fp->f_count == LONG_MAX-2 || 841 fp->f_msgcount == LONG_MAX-2) { 842 error = EDEADLK; 843 goto fail; 844 } 845 /* kq and systrace descriptors cannot be copied */ 846 if (fp->f_type == DTYPE_KQUEUE || 847 fp->f_type == DTYPE_SYSTRACE) { 848 error = EINVAL; 849 goto fail; 850 } 851 bcopy(&fp, rp, sizeof fp); 852 rp--; 853 fp->f_count++; 854 fp->f_msgcount++; 855 unp_rights++; 856 } 857 return (0); 858 fail: 859 /* Back out what we just did. */ 860 for ( ; i > 0; i--) { 861 rp++; 862 bcopy(rp, &fp, sizeof(fp)); 863 fp->f_count--; 864 fp->f_msgcount--; 865 unp_rights--; 866 } 867 868 return (error); 869 } 870 871 int unp_defer, unp_gcing; 872 extern struct domain unixdomain; 873 874 void 875 unp_gc(void) 876 { 877 struct file *fp, *nextfp; 878 struct socket *so; 879 struct file **extra_ref, **fpp; 880 int nunref, i; 881 882 if (unp_gcing) 883 return; 884 unp_gcing = 1; 885 unp_defer = 0; 886 LIST_FOREACH(fp, &filehead, f_list) 887 fp->f_iflags &= ~(FIF_MARK|FIF_DEFER); 888 do { 889 LIST_FOREACH(fp, &filehead, f_list) { 890 if (fp->f_iflags & FIF_DEFER) { 891 fp->f_iflags &= ~FIF_DEFER; 892 unp_defer--; 893 } else { 894 if (fp->f_count == 0) 895 continue; 896 if (fp->f_iflags & FIF_MARK) 897 continue; 898 if (fp->f_count == fp->f_msgcount) 899 continue; 900 } 901 fp->f_iflags |= FIF_MARK; 902 903 if (fp->f_type != DTYPE_SOCKET || 904 (so = fp->f_data) == NULL) 905 continue; 906 if (so->so_proto->pr_domain != &unixdomain || 907 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 908 continue; 909 #ifdef notdef 910 if (so->so_rcv.sb_flags & SB_LOCK) { 911 /* 912 * This is problematical; it's not clear 913 * we need to wait for the sockbuf to be 914 * unlocked (on a uniprocessor, at least), 915 * and it's also not clear what to do 916 * if sbwait returns an error due to receipt 917 * of a signal. If sbwait does return 918 * an error, we'll go into an infinite 919 * loop. Delete all of this for now. 920 */ 921 (void) sbwait(&so->so_rcv); 922 goto restart; 923 } 924 #endif 925 unp_scan(so->so_rcv.sb_mb, unp_mark, 0); 926 } 927 } while (unp_defer); 928 /* 929 * We grab an extra reference to each of the file table entries 930 * that are not otherwise accessible and then free the rights 931 * that are stored in messages on them. 932 * 933 * The bug in the original code is a little tricky, so I'll describe 934 * what's wrong with it here. 935 * 936 * It is incorrect to simply unp_discard each entry for f_msgcount 937 * times -- consider the case of sockets A and B that contain 938 * references to each other. On a last close of some other socket, 939 * we trigger a gc since the number of outstanding rights (unp_rights) 940 * is non-zero. If during the sweep phase the gc code un_discards, 941 * we end up doing a (full) closef on the descriptor. A closef on A 942 * results in the following chain. Closef calls soo_close, which 943 * calls soclose. Soclose calls first (through the switch 944 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 945 * returns because the previous instance had set unp_gcing, and 946 * we return all the way back to soclose, which marks the socket 947 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 948 * to free up the rights that are queued in messages on the socket A, 949 * i.e., the reference on B. The sorflush calls via the dom_dispose 950 * switch unp_dispose, which unp_scans with unp_discard. This second 951 * instance of unp_discard just calls closef on B. 952 * 953 * Well, a similar chain occurs on B, resulting in a sorflush on B, 954 * which results in another closef on A. Unfortunately, A is already 955 * being closed, and the descriptor has already been marked with 956 * SS_NOFDREF, and soclose panics at this point. 957 * 958 * Here, we first take an extra reference to each inaccessible 959 * descriptor. Then, we call sorflush ourself, since we know 960 * it is a Unix domain socket anyhow. After we destroy all the 961 * rights carried in messages, we do a last closef to get rid 962 * of our extra reference. This is the last close, and the 963 * unp_detach etc will shut down the socket. 964 * 965 * 91/09/19, bsy@cs.cmu.edu 966 */ 967 extra_ref = mallocarray(nfiles, sizeof(struct file *), M_FILE, M_WAITOK); 968 for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; 969 fp != NULL; fp = nextfp) { 970 nextfp = LIST_NEXT(fp, f_list); 971 if (fp->f_count == 0) 972 continue; 973 if (fp->f_count == fp->f_msgcount && 974 !(fp->f_iflags & FIF_MARK)) { 975 *fpp++ = fp; 976 nunref++; 977 FREF(fp); 978 fp->f_count++; 979 } 980 } 981 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 982 if ((*fpp)->f_type == DTYPE_SOCKET && (*fpp)->f_data != NULL) 983 sorflush((*fpp)->f_data); 984 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 985 (void) closef(*fpp, NULL); 986 free(extra_ref, M_FILE, 0); 987 unp_gcing = 0; 988 } 989 990 void 991 unp_dispose(struct mbuf *m) 992 { 993 994 if (m) 995 unp_scan(m, unp_discard, 1); 996 } 997 998 void 999 unp_scan(struct mbuf *m0, void (*op)(struct file *), int discard) 1000 { 1001 struct mbuf *m; 1002 struct file **rp, *fp; 1003 struct cmsghdr *cm; 1004 int i; 1005 int qfds; 1006 1007 while (m0) { 1008 for (m = m0; m; m = m->m_next) { 1009 if (m->m_type == MT_CONTROL && 1010 m->m_len >= sizeof(*cm)) { 1011 cm = mtod(m, struct cmsghdr *); 1012 if (cm->cmsg_level != SOL_SOCKET || 1013 cm->cmsg_type != SCM_RIGHTS) 1014 continue; 1015 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm)) 1016 / sizeof(struct file *); 1017 rp = (struct file **)CMSG_DATA(cm); 1018 for (i = 0; i < qfds; i++) { 1019 fp = *rp; 1020 if (discard) 1021 *rp = 0; 1022 (*op)(fp); 1023 rp++; 1024 } 1025 break; /* XXX, but saves time */ 1026 } 1027 } 1028 m0 = m0->m_nextpkt; 1029 } 1030 } 1031 1032 void 1033 unp_mark(struct file *fp) 1034 { 1035 if (fp == NULL) 1036 return; 1037 1038 if (fp->f_iflags & (FIF_MARK|FIF_DEFER)) 1039 return; 1040 1041 if (fp->f_type == DTYPE_SOCKET) { 1042 unp_defer++; 1043 fp->f_iflags |= FIF_DEFER; 1044 } else { 1045 fp->f_iflags |= FIF_MARK; 1046 } 1047 } 1048 1049 void 1050 unp_discard(struct file *fp) 1051 { 1052 1053 if (fp == NULL) 1054 return; 1055 FREF(fp); 1056 fp->f_msgcount--; 1057 unp_rights--; 1058 (void) closef(fp, NULL); 1059 } 1060