1 /* $OpenBSD: uipc_usrreq.c,v 1.76 2014/07/13 15:52:38 tedu Exp $ */ 2 /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/filedesc.h> 39 #include <sys/domain.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/unpcb.h> 44 #include <sys/un.h> 45 #include <sys/namei.h> 46 #include <sys/vnode.h> 47 #include <sys/file.h> 48 #include <sys/stat.h> 49 #include <sys/mbuf.h> 50 51 void uipc_setaddr(const struct unpcb *, struct mbuf *); 52 53 /* 54 * Unix communications domain. 55 * 56 * TODO: 57 * RDM 58 * rethink name space problems 59 * need a proper out-of-band 60 */ 61 struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX }; 62 ino_t unp_ino; /* prototype for fake inode numbers */ 63 64 void 65 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam) 66 { 67 if (unp != NULL && unp->unp_addr != NULL) { 68 nam->m_len = unp->unp_addr->m_len; 69 bcopy(mtod(unp->unp_addr, caddr_t), mtod(nam, caddr_t), 70 nam->m_len); 71 } else { 72 nam->m_len = sizeof(sun_noname); 73 bcopy(&sun_noname, mtod(nam, struct sockaddr *), 74 nam->m_len); 75 } 76 } 77 78 /*ARGSUSED*/ 79 int 80 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 81 struct mbuf *control, struct proc *p) 82 { 83 struct unpcb *unp = sotounpcb(so); 84 struct socket *so2; 85 int error = 0; 86 87 if (req == PRU_CONTROL) 88 return (EOPNOTSUPP); 89 if (req != PRU_SEND && control && control->m_len) { 90 error = EOPNOTSUPP; 91 goto release; 92 } 93 if (unp == NULL && req != PRU_ATTACH) { 94 error = EINVAL; 95 goto release; 96 } 97 switch (req) { 98 99 case PRU_ATTACH: 100 if (unp) { 101 error = EISCONN; 102 break; 103 } 104 error = unp_attach(so); 105 break; 106 107 case PRU_DETACH: 108 unp_detach(unp); 109 break; 110 111 case PRU_BIND: 112 error = unp_bind(unp, nam, p); 113 break; 114 115 case PRU_LISTEN: 116 if (unp->unp_vnode == NULL) 117 error = EINVAL; 118 break; 119 120 case PRU_CONNECT: 121 error = unp_connect(so, nam, p); 122 break; 123 124 case PRU_CONNECT2: 125 error = unp_connect2(so, (struct socket *)nam); 126 break; 127 128 case PRU_DISCONNECT: 129 unp_disconnect(unp); 130 break; 131 132 case PRU_ACCEPT: 133 /* 134 * Pass back name of connected socket, 135 * if it was bound and we are still connected 136 * (our peer may have closed already!). 137 */ 138 uipc_setaddr(unp->unp_conn, nam); 139 break; 140 141 case PRU_SHUTDOWN: 142 socantsendmore(so); 143 unp_shutdown(unp); 144 break; 145 146 case PRU_RCVD: 147 switch (so->so_type) { 148 149 case SOCK_DGRAM: 150 panic("uipc 1"); 151 /*NOTREACHED*/ 152 153 case SOCK_STREAM: 154 case SOCK_SEQPACKET: 155 #define rcv (&so->so_rcv) 156 #define snd (&so2->so_snd) 157 if (unp->unp_conn == NULL) 158 break; 159 so2 = unp->unp_conn->unp_socket; 160 /* 161 * Adjust backpressure on sender 162 * and wakeup any waiting to write. 163 */ 164 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; 165 unp->unp_mbcnt = rcv->sb_mbcnt; 166 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc; 167 unp->unp_cc = rcv->sb_cc; 168 sowwakeup(so2); 169 #undef snd 170 #undef rcv 171 break; 172 173 default: 174 panic("uipc 2"); 175 } 176 break; 177 178 case PRU_SEND: 179 if (control && (error = unp_internalize(control, p))) 180 break; 181 switch (so->so_type) { 182 183 case SOCK_DGRAM: { 184 struct sockaddr *from; 185 186 if (nam) { 187 if (unp->unp_conn) { 188 error = EISCONN; 189 break; 190 } 191 error = unp_connect(so, nam, p); 192 if (error) 193 break; 194 } else { 195 if (unp->unp_conn == NULL) { 196 error = ENOTCONN; 197 break; 198 } 199 } 200 so2 = unp->unp_conn->unp_socket; 201 if (unp->unp_addr) 202 from = mtod(unp->unp_addr, struct sockaddr *); 203 else 204 from = &sun_noname; 205 if (sbappendaddr(&so2->so_rcv, from, m, control)) { 206 sorwakeup(so2); 207 m = NULL; 208 control = NULL; 209 } else 210 error = ENOBUFS; 211 if (nam) 212 unp_disconnect(unp); 213 break; 214 } 215 216 case SOCK_STREAM: 217 case SOCK_SEQPACKET: 218 #define rcv (&so2->so_rcv) 219 #define snd (&so->so_snd) 220 if (so->so_state & SS_CANTSENDMORE) { 221 error = EPIPE; 222 break; 223 } 224 if (unp->unp_conn == NULL) { 225 error = ENOTCONN; 226 break; 227 } 228 so2 = unp->unp_conn->unp_socket; 229 /* 230 * Send to paired receive port, and then reduce 231 * send buffer hiwater marks to maintain backpressure. 232 * Wake up readers. 233 */ 234 if (control) { 235 if (sbappendcontrol(rcv, m, control)) 236 control = NULL; 237 } else if (so->so_type == SOCK_SEQPACKET) 238 sbappendrecord(rcv, m); 239 else 240 sbappend(rcv, m); 241 snd->sb_mbmax -= 242 rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; 243 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; 244 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; 245 unp->unp_conn->unp_cc = rcv->sb_cc; 246 sorwakeup(so2); 247 m = NULL; 248 #undef snd 249 #undef rcv 250 break; 251 252 default: 253 panic("uipc 4"); 254 } 255 /* we need to undo unp_internalize in case of errors */ 256 if (control && error) 257 unp_dispose(control); 258 break; 259 260 case PRU_ABORT: 261 unp_drop(unp, ECONNABORTED); 262 break; 263 264 case PRU_SENSE: { 265 struct stat *sb = (struct stat *)m; 266 267 sb->st_blksize = so->so_snd.sb_hiwat; 268 switch (so->so_type) { 269 case SOCK_STREAM: 270 case SOCK_SEQPACKET: 271 if (unp->unp_conn != NULL) { 272 so2 = unp->unp_conn->unp_socket; 273 sb->st_blksize += so2->so_rcv.sb_cc; 274 } 275 break; 276 default: 277 break; 278 } 279 sb->st_dev = NODEV; 280 if (unp->unp_ino == 0) 281 unp->unp_ino = unp_ino++; 282 sb->st_atim.tv_sec = 283 sb->st_mtim.tv_sec = 284 sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec; 285 sb->st_atim.tv_nsec = 286 sb->st_mtim.tv_nsec = 287 sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec; 288 sb->st_ino = unp->unp_ino; 289 return (0); 290 } 291 292 case PRU_RCVOOB: 293 return (EOPNOTSUPP); 294 295 case PRU_SENDOOB: 296 error = EOPNOTSUPP; 297 break; 298 299 case PRU_SOCKADDR: 300 uipc_setaddr(unp, nam); 301 break; 302 303 case PRU_PEERADDR: 304 uipc_setaddr(unp->unp_conn, nam); 305 break; 306 307 case PRU_SLOWTIMO: 308 break; 309 310 default: 311 panic("piusrreq"); 312 } 313 release: 314 if (control) 315 m_freem(control); 316 if (m) 317 m_freem(m); 318 return (error); 319 } 320 321 /* 322 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 323 * for stream sockets, although the total for sender and receiver is 324 * actually only PIPSIZ. 325 * Datagram sockets really use the sendspace as the maximum datagram size, 326 * and don't really want to reserve the sendspace. Their recvspace should 327 * be large enough for at least one max-size datagram plus address. 328 */ 329 #define PIPSIZ 4096 330 u_long unpst_sendspace = PIPSIZ; 331 u_long unpst_recvspace = PIPSIZ; 332 u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 333 u_long unpdg_recvspace = 4*1024; 334 335 int unp_rights; /* file descriptors in flight */ 336 337 int 338 unp_attach(struct socket *so) 339 { 340 struct unpcb *unp; 341 int error; 342 343 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 344 switch (so->so_type) { 345 346 case SOCK_STREAM: 347 case SOCK_SEQPACKET: 348 error = soreserve(so, unpst_sendspace, unpst_recvspace); 349 break; 350 351 case SOCK_DGRAM: 352 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 353 break; 354 355 default: 356 panic("unp_attach"); 357 } 358 if (error) 359 return (error); 360 } 361 unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT|M_ZERO); 362 if (unp == NULL) 363 return (ENOBUFS); 364 unp->unp_socket = so; 365 so->so_pcb = unp; 366 getnanotime(&unp->unp_ctime); 367 return (0); 368 } 369 370 void 371 unp_detach(struct unpcb *unp) 372 { 373 struct vnode *vp; 374 375 if (unp->unp_vnode) { 376 unp->unp_vnode->v_socket = NULL; 377 vp = unp->unp_vnode; 378 unp->unp_vnode = NULL; 379 vrele(vp); 380 } 381 if (unp->unp_conn) 382 unp_disconnect(unp); 383 while (unp->unp_refs) 384 unp_drop(unp->unp_refs, ECONNRESET); 385 soisdisconnected(unp->unp_socket); 386 unp->unp_socket->so_pcb = NULL; 387 m_freem(unp->unp_addr); 388 if (unp_rights) { 389 /* 390 * Normally the receive buffer is flushed later, 391 * in sofree, but if our receive buffer holds references 392 * to descriptors that are now garbage, we will dispose 393 * of those descriptor references after the garbage collector 394 * gets them (resulting in a "panic: closef: count < 0"). 395 */ 396 sorflush(unp->unp_socket); 397 free(unp, M_PCB, 0); 398 unp_gc(); 399 } else 400 free(unp, M_PCB, 0); 401 } 402 403 int 404 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p) 405 { 406 struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); 407 struct mbuf *nam2; 408 struct vnode *vp; 409 struct vattr vattr; 410 int error; 411 struct nameidata nd; 412 size_t pathlen; 413 414 if (unp->unp_vnode != NULL) 415 return (EINVAL); 416 417 if (soun->sun_len > sizeof(struct sockaddr_un) || 418 soun->sun_len < offsetof(struct sockaddr_un, sun_path)) 419 return (EINVAL); 420 if (soun->sun_family != AF_UNIX) 421 return (EAFNOSUPPORT); 422 423 pathlen = strnlen(soun->sun_path, soun->sun_len - 424 offsetof(struct sockaddr_un, sun_path)); 425 if (pathlen == sizeof(soun->sun_path)) 426 return (EINVAL); 427 428 nam2 = m_getclr(M_WAITOK, MT_SONAME); 429 nam2->m_len = sizeof(struct sockaddr_un); 430 memcpy(mtod(nam2, struct sockaddr_un *), soun, 431 offsetof(struct sockaddr_un, sun_path) + pathlen); 432 /* No need to NUL terminate: m_getclr() returns zero'd mbufs. */ 433 434 soun = mtod(nam2, struct sockaddr_un *); 435 436 /* Fixup sun_len to keep it in sync with m_len. */ 437 soun->sun_len = nam2->m_len; 438 439 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, 440 soun->sun_path, p); 441 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 442 if ((error = namei(&nd)) != 0) { 443 m_freem(nam2); 444 return (error); 445 } 446 vp = nd.ni_vp; 447 if (vp != NULL) { 448 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 449 if (nd.ni_dvp == vp) 450 vrele(nd.ni_dvp); 451 else 452 vput(nd.ni_dvp); 453 vrele(vp); 454 m_freem(nam2); 455 return (EADDRINUSE); 456 } 457 VATTR_NULL(&vattr); 458 vattr.va_type = VSOCK; 459 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; 460 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 461 if (error) { 462 m_freem(nam2); 463 return (error); 464 } 465 unp->unp_addr = nam2; 466 vp = nd.ni_vp; 467 vp->v_socket = unp->unp_socket; 468 unp->unp_vnode = vp; 469 unp->unp_connid.uid = p->p_ucred->cr_uid; 470 unp->unp_connid.gid = p->p_ucred->cr_gid; 471 unp->unp_connid.pid = p->p_p->ps_pid; 472 unp->unp_flags |= UNP_FEIDSBIND; 473 VOP_UNLOCK(vp, 0, p); 474 return (0); 475 } 476 477 int 478 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p) 479 { 480 struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); 481 struct vnode *vp; 482 struct socket *so2, *so3; 483 struct unpcb *unp, *unp2, *unp3; 484 int error; 485 struct nameidata nd; 486 487 if (soun->sun_family != AF_UNIX) 488 return (EAFNOSUPPORT); 489 490 if (nam->m_len < sizeof(struct sockaddr_un)) 491 *(mtod(nam, caddr_t) + nam->m_len) = 0; 492 else if (nam->m_len > sizeof(struct sockaddr_un)) 493 return (EINVAL); 494 else if (memchr(soun->sun_path, '\0', sizeof(soun->sun_path)) == NULL) 495 return (EINVAL); 496 497 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p); 498 if ((error = namei(&nd)) != 0) 499 return (error); 500 vp = nd.ni_vp; 501 if (vp->v_type != VSOCK) { 502 error = ENOTSOCK; 503 goto bad; 504 } 505 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 506 goto bad; 507 so2 = vp->v_socket; 508 if (so2 == NULL) { 509 error = ECONNREFUSED; 510 goto bad; 511 } 512 if (so->so_type != so2->so_type) { 513 error = EPROTOTYPE; 514 goto bad; 515 } 516 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 517 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 518 (so3 = sonewconn(so2, 0)) == 0) { 519 error = ECONNREFUSED; 520 goto bad; 521 } 522 unp = sotounpcb(so); 523 unp2 = sotounpcb(so2); 524 unp3 = sotounpcb(so3); 525 if (unp2->unp_addr) 526 unp3->unp_addr = 527 m_copy(unp2->unp_addr, 0, (int)M_COPYALL); 528 unp3->unp_connid.uid = p->p_ucred->cr_uid; 529 unp3->unp_connid.gid = p->p_ucred->cr_gid; 530 unp3->unp_connid.pid = p->p_p->ps_pid; 531 unp3->unp_flags |= UNP_FEIDS; 532 so2 = so3; 533 if (unp2->unp_flags & UNP_FEIDSBIND) { 534 unp->unp_connid = unp2->unp_connid; 535 unp->unp_flags |= UNP_FEIDS; 536 } 537 } 538 error = unp_connect2(so, so2); 539 bad: 540 vput(vp); 541 return (error); 542 } 543 544 int 545 unp_connect2(struct socket *so, struct socket *so2) 546 { 547 struct unpcb *unp = sotounpcb(so); 548 struct unpcb *unp2; 549 550 if (so2->so_type != so->so_type) 551 return (EPROTOTYPE); 552 unp2 = sotounpcb(so2); 553 unp->unp_conn = unp2; 554 switch (so->so_type) { 555 556 case SOCK_DGRAM: 557 unp->unp_nextref = unp2->unp_refs; 558 unp2->unp_refs = unp; 559 soisconnected(so); 560 break; 561 562 case SOCK_STREAM: 563 case SOCK_SEQPACKET: 564 unp2->unp_conn = unp; 565 soisconnected(so); 566 soisconnected(so2); 567 break; 568 569 default: 570 panic("unp_connect2"); 571 } 572 return (0); 573 } 574 575 void 576 unp_disconnect(struct unpcb *unp) 577 { 578 struct unpcb *unp2 = unp->unp_conn; 579 580 if (unp2 == NULL) 581 return; 582 unp->unp_conn = NULL; 583 switch (unp->unp_socket->so_type) { 584 585 case SOCK_DGRAM: 586 if (unp2->unp_refs == unp) 587 unp2->unp_refs = unp->unp_nextref; 588 else { 589 unp2 = unp2->unp_refs; 590 for (;;) { 591 if (unp2 == NULL) 592 panic("unp_disconnect"); 593 if (unp2->unp_nextref == unp) 594 break; 595 unp2 = unp2->unp_nextref; 596 } 597 unp2->unp_nextref = unp->unp_nextref; 598 } 599 unp->unp_nextref = NULL; 600 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 601 break; 602 603 case SOCK_STREAM: 604 case SOCK_SEQPACKET: 605 soisdisconnected(unp->unp_socket); 606 unp2->unp_conn = NULL; 607 soisdisconnected(unp2->unp_socket); 608 break; 609 } 610 } 611 612 void 613 unp_shutdown(struct unpcb *unp) 614 { 615 struct socket *so; 616 617 switch (unp->unp_socket->so_type) { 618 case SOCK_STREAM: 619 case SOCK_SEQPACKET: 620 if (unp->unp_conn && (so = unp->unp_conn->unp_socket)) 621 socantrcvmore(so); 622 break; 623 default: 624 break; 625 } 626 } 627 628 void 629 unp_drop(struct unpcb *unp, int errno) 630 { 631 struct socket *so = unp->unp_socket; 632 633 so->so_error = errno; 634 unp_disconnect(unp); 635 if (so->so_head) { 636 so->so_pcb = NULL; 637 sofree(so); 638 m_freem(unp->unp_addr); 639 free(unp, M_PCB, 0); 640 } 641 } 642 643 #ifdef notdef 644 unp_drain(void) 645 { 646 647 } 648 #endif 649 650 int 651 unp_externalize(struct mbuf *rights, socklen_t controllen) 652 { 653 struct proc *p = curproc; /* XXX */ 654 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 655 int i, *fdp = NULL; 656 struct file **rp; 657 struct file *fp; 658 int nfds, error = 0; 659 660 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / 661 sizeof(struct file *); 662 if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) 663 controllen = 0; 664 else 665 controllen -= CMSG_ALIGN(sizeof(struct cmsghdr)); 666 if (nfds > controllen / sizeof(int)) { 667 error = EMSGSIZE; 668 goto restart; 669 } 670 671 rp = (struct file **)CMSG_DATA(cm); 672 673 fdp = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK); 674 675 /* Make sure the recipient should be able to see the descriptors.. */ 676 if (p->p_fd->fd_rdir != NULL) { 677 rp = (struct file **)CMSG_DATA(cm); 678 for (i = 0; i < nfds; i++) { 679 fp = *rp++; 680 /* 681 * No to block devices. If passing a directory, 682 * make sure that it is underneath the root. 683 */ 684 if (fp->f_type == DTYPE_VNODE) { 685 struct vnode *vp = (struct vnode *)fp->f_data; 686 687 if (vp->v_type == VBLK || 688 (vp->v_type == VDIR && 689 !vn_isunder(vp, p->p_fd->fd_rdir, p))) { 690 error = EPERM; 691 break; 692 } 693 } 694 } 695 } 696 697 restart: 698 fdplock(p->p_fd); 699 if (error != 0) { 700 rp = ((struct file **)CMSG_DATA(cm)); 701 for (i = 0; i < nfds; i++) { 702 fp = *rp; 703 /* 704 * zero the pointer before calling unp_discard, 705 * since it may end up in unp_gc().. 706 */ 707 *rp++ = NULL; 708 unp_discard(fp); 709 } 710 goto out; 711 } 712 713 /* 714 * First loop -- allocate file descriptor table slots for the 715 * new descriptors. 716 */ 717 rp = ((struct file **)CMSG_DATA(cm)); 718 for (i = 0; i < nfds; i++) { 719 if ((error = fdalloc(p, 0, &fdp[i])) != 0) { 720 /* 721 * Back out what we've done so far. 722 */ 723 for (--i; i >= 0; i--) 724 fdremove(p->p_fd, fdp[i]); 725 726 if (error == ENOSPC) { 727 fdexpand(p); 728 error = 0; 729 } else { 730 /* 731 * This is the error that has historically 732 * been returned, and some callers may 733 * expect it. 734 */ 735 error = EMSGSIZE; 736 } 737 fdpunlock(p->p_fd); 738 goto restart; 739 } 740 741 /* 742 * Make the slot reference the descriptor so that 743 * fdalloc() works properly.. We finalize it all 744 * in the loop below. 745 */ 746 p->p_fd->fd_ofiles[fdp[i]] = *rp++; 747 } 748 749 /* 750 * Now that adding them has succeeded, update all of the 751 * descriptor passing state. 752 */ 753 rp = (struct file **)CMSG_DATA(cm); 754 for (i = 0; i < nfds; i++) { 755 fp = *rp++; 756 fp->f_msgcount--; 757 unp_rights--; 758 } 759 760 /* 761 * Copy temporary array to message and adjust length, in case of 762 * transition from large struct file pointers to ints. 763 */ 764 memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int)); 765 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); 766 rights->m_len = CMSG_LEN(nfds * sizeof(int)); 767 out: 768 fdpunlock(p->p_fd); 769 if (fdp) 770 free(fdp, M_TEMP, 0); 771 return (error); 772 } 773 774 int 775 unp_internalize(struct mbuf *control, struct proc *p) 776 { 777 struct filedesc *fdp = p->p_fd; 778 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 779 struct file **rp, *fp; 780 int i, error; 781 int nfds, *ip, fd, neededspace; 782 783 /* 784 * Check for two potential msg_controllen values because 785 * IETF stuck their nose in a place it does not belong. 786 */ 787 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 788 !(cm->cmsg_len == control->m_len || 789 control->m_len == CMSG_ALIGN(cm->cmsg_len))) 790 return (EINVAL); 791 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int); 792 793 if (unp_rights + nfds > maxfiles / 10) 794 return (EMFILE); 795 796 /* Make sure we have room for the struct file pointers */ 797 morespace: 798 neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) - 799 control->m_len; 800 if (neededspace > M_TRAILINGSPACE(control)) { 801 char *tmp; 802 /* if we already have a cluster, the message is just too big */ 803 if (control->m_flags & M_EXT) 804 return (E2BIG); 805 806 /* copy cmsg data temporarily out of the mbuf */ 807 tmp = malloc(control->m_len, M_TEMP, M_WAITOK); 808 memcpy(tmp, mtod(control, caddr_t), control->m_len); 809 810 /* allocate a cluster and try again */ 811 MCLGET(control, M_WAIT); 812 if ((control->m_flags & M_EXT) == 0) { 813 free(tmp, M_TEMP, 0); 814 return (ENOBUFS); /* allocation failed */ 815 } 816 817 /* copy the data back into the cluster */ 818 cm = mtod(control, struct cmsghdr *); 819 memcpy(cm, tmp, control->m_len); 820 free(tmp, M_TEMP, 0); 821 goto morespace; 822 } 823 824 /* adjust message & mbuf to note amount of space actually used. */ 825 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *)); 826 control->m_len = CMSG_SPACE(nfds * sizeof(struct file *)); 827 828 ip = ((int *)CMSG_DATA(cm)) + nfds - 1; 829 rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1; 830 for (i = 0; i < nfds; i++) { 831 bcopy(ip, &fd, sizeof fd); 832 ip--; 833 if ((fp = fd_getfile(fdp, fd)) == NULL) { 834 error = EBADF; 835 goto fail; 836 } 837 if (fp->f_count == LONG_MAX-2 || 838 fp->f_msgcount == LONG_MAX-2) { 839 error = EDEADLK; 840 goto fail; 841 } 842 /* kq and systrace descriptors cannot be copied */ 843 if (fp->f_type == DTYPE_KQUEUE || 844 fp->f_type == DTYPE_SYSTRACE) { 845 error = EINVAL; 846 goto fail; 847 } 848 bcopy(&fp, rp, sizeof fp); 849 rp--; 850 fp->f_count++; 851 fp->f_msgcount++; 852 unp_rights++; 853 } 854 return (0); 855 fail: 856 /* Back out what we just did. */ 857 for ( ; i > 0; i--) { 858 rp++; 859 bcopy(rp, &fp, sizeof(fp)); 860 fp->f_count--; 861 fp->f_msgcount--; 862 unp_rights--; 863 } 864 865 return (error); 866 } 867 868 int unp_defer, unp_gcing; 869 extern struct domain unixdomain; 870 871 void 872 unp_gc(void) 873 { 874 struct file *fp, *nextfp; 875 struct socket *so; 876 struct file **extra_ref, **fpp; 877 int nunref, i; 878 879 if (unp_gcing) 880 return; 881 unp_gcing = 1; 882 unp_defer = 0; 883 LIST_FOREACH(fp, &filehead, f_list) 884 fp->f_iflags &= ~(FIF_MARK|FIF_DEFER); 885 do { 886 LIST_FOREACH(fp, &filehead, f_list) { 887 if (fp->f_iflags & FIF_DEFER) { 888 fp->f_iflags &= ~FIF_DEFER; 889 unp_defer--; 890 } else { 891 if (fp->f_count == 0) 892 continue; 893 if (fp->f_iflags & FIF_MARK) 894 continue; 895 if (fp->f_count == fp->f_msgcount) 896 continue; 897 } 898 fp->f_iflags |= FIF_MARK; 899 900 if (fp->f_type != DTYPE_SOCKET || 901 (so = fp->f_data) == NULL) 902 continue; 903 if (so->so_proto->pr_domain != &unixdomain || 904 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 905 continue; 906 #ifdef notdef 907 if (so->so_rcv.sb_flags & SB_LOCK) { 908 /* 909 * This is problematical; it's not clear 910 * we need to wait for the sockbuf to be 911 * unlocked (on a uniprocessor, at least), 912 * and it's also not clear what to do 913 * if sbwait returns an error due to receipt 914 * of a signal. If sbwait does return 915 * an error, we'll go into an infinite 916 * loop. Delete all of this for now. 917 */ 918 (void) sbwait(&so->so_rcv); 919 goto restart; 920 } 921 #endif 922 unp_scan(so->so_rcv.sb_mb, unp_mark, 0); 923 } 924 } while (unp_defer); 925 /* 926 * We grab an extra reference to each of the file table entries 927 * that are not otherwise accessible and then free the rights 928 * that are stored in messages on them. 929 * 930 * The bug in the original code is a little tricky, so I'll describe 931 * what's wrong with it here. 932 * 933 * It is incorrect to simply unp_discard each entry for f_msgcount 934 * times -- consider the case of sockets A and B that contain 935 * references to each other. On a last close of some other socket, 936 * we trigger a gc since the number of outstanding rights (unp_rights) 937 * is non-zero. If during the sweep phase the gc code un_discards, 938 * we end up doing a (full) closef on the descriptor. A closef on A 939 * results in the following chain. Closef calls soo_close, which 940 * calls soclose. Soclose calls first (through the switch 941 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 942 * returns because the previous instance had set unp_gcing, and 943 * we return all the way back to soclose, which marks the socket 944 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 945 * to free up the rights that are queued in messages on the socket A, 946 * i.e., the reference on B. The sorflush calls via the dom_dispose 947 * switch unp_dispose, which unp_scans with unp_discard. This second 948 * instance of unp_discard just calls closef on B. 949 * 950 * Well, a similar chain occurs on B, resulting in a sorflush on B, 951 * which results in another closef on A. Unfortunately, A is already 952 * being closed, and the descriptor has already been marked with 953 * SS_NOFDREF, and soclose panics at this point. 954 * 955 * Here, we first take an extra reference to each inaccessible 956 * descriptor. Then, we call sorflush ourself, since we know 957 * it is a Unix domain socket anyhow. After we destroy all the 958 * rights carried in messages, we do a last closef to get rid 959 * of our extra reference. This is the last close, and the 960 * unp_detach etc will shut down the socket. 961 * 962 * 91/09/19, bsy@cs.cmu.edu 963 */ 964 extra_ref = mallocarray(nfiles, sizeof(struct file *), M_FILE, M_WAITOK); 965 for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; 966 fp != NULL; fp = nextfp) { 967 nextfp = LIST_NEXT(fp, f_list); 968 if (fp->f_count == 0) 969 continue; 970 if (fp->f_count == fp->f_msgcount && 971 !(fp->f_iflags & FIF_MARK)) { 972 *fpp++ = fp; 973 nunref++; 974 FREF(fp); 975 fp->f_count++; 976 } 977 } 978 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 979 if ((*fpp)->f_type == DTYPE_SOCKET && (*fpp)->f_data != NULL) 980 sorflush((*fpp)->f_data); 981 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 982 (void) closef(*fpp, NULL); 983 free(extra_ref, M_FILE, 0); 984 unp_gcing = 0; 985 } 986 987 void 988 unp_dispose(struct mbuf *m) 989 { 990 991 if (m) 992 unp_scan(m, unp_discard, 1); 993 } 994 995 void 996 unp_scan(struct mbuf *m0, void (*op)(struct file *), int discard) 997 { 998 struct mbuf *m; 999 struct file **rp, *fp; 1000 struct cmsghdr *cm; 1001 int i; 1002 int qfds; 1003 1004 while (m0) { 1005 for (m = m0; m; m = m->m_next) { 1006 if (m->m_type == MT_CONTROL && 1007 m->m_len >= sizeof(*cm)) { 1008 cm = mtod(m, struct cmsghdr *); 1009 if (cm->cmsg_level != SOL_SOCKET || 1010 cm->cmsg_type != SCM_RIGHTS) 1011 continue; 1012 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm)) 1013 / sizeof(struct file *); 1014 rp = (struct file **)CMSG_DATA(cm); 1015 for (i = 0; i < qfds; i++) { 1016 fp = *rp; 1017 if (discard) 1018 *rp = 0; 1019 (*op)(fp); 1020 rp++; 1021 } 1022 break; /* XXX, but saves time */ 1023 } 1024 } 1025 m0 = m0->m_nextpkt; 1026 } 1027 } 1028 1029 void 1030 unp_mark(struct file *fp) 1031 { 1032 if (fp == NULL) 1033 return; 1034 1035 if (fp->f_iflags & (FIF_MARK|FIF_DEFER)) 1036 return; 1037 1038 if (fp->f_type == DTYPE_SOCKET) { 1039 unp_defer++; 1040 fp->f_iflags |= FIF_DEFER; 1041 } else { 1042 fp->f_iflags |= FIF_MARK; 1043 } 1044 } 1045 1046 void 1047 unp_discard(struct file *fp) 1048 { 1049 1050 if (fp == NULL) 1051 return; 1052 FREF(fp); 1053 fp->f_msgcount--; 1054 unp_rights--; 1055 (void) closef(fp, NULL); 1056 } 1057