1 /* $OpenBSD: uipc_usrreq.c,v 1.45 2009/02/22 07:47:22 otto Exp $ */ 2 /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/filedesc.h> 39 #include <sys/domain.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/unpcb.h> 44 #include <sys/un.h> 45 #include <sys/namei.h> 46 #include <sys/vnode.h> 47 #include <sys/file.h> 48 #include <sys/stat.h> 49 #include <sys/mbuf.h> 50 51 /* 52 * Unix communications domain. 53 * 54 * TODO: 55 * SEQPACKET, RDM 56 * rethink name space problems 57 * need a proper out-of-band 58 */ 59 struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX }; 60 ino_t unp_ino; /* prototype for fake inode numbers */ 61 62 /*ARGSUSED*/ 63 int 64 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 65 struct mbuf *control, struct proc *p) 66 { 67 struct unpcb *unp = sotounpcb(so); 68 struct socket *so2; 69 int error = 0; 70 71 if (req == PRU_CONTROL) 72 return (EOPNOTSUPP); 73 if (req != PRU_SEND && control && control->m_len) { 74 error = EOPNOTSUPP; 75 goto release; 76 } 77 if (unp == NULL && req != PRU_ATTACH) { 78 error = EINVAL; 79 goto release; 80 } 81 switch (req) { 82 83 case PRU_ATTACH: 84 if (unp) { 85 error = EISCONN; 86 break; 87 } 88 error = unp_attach(so); 89 break; 90 91 case PRU_DETACH: 92 unp_detach(unp); 93 break; 94 95 case PRU_BIND: 96 error = unp_bind(unp, nam, p); 97 break; 98 99 case PRU_LISTEN: 100 if (unp->unp_vnode == NULL) 101 error = EINVAL; 102 break; 103 104 case PRU_CONNECT: 105 error = unp_connect(so, nam, p); 106 break; 107 108 case PRU_CONNECT2: 109 error = unp_connect2(so, (struct socket *)nam); 110 break; 111 112 case PRU_DISCONNECT: 113 unp_disconnect(unp); 114 break; 115 116 case PRU_ACCEPT: 117 /* 118 * Pass back name of connected socket, 119 * if it was bound and we are still connected 120 * (our peer may have closed already!). 121 */ 122 if (unp->unp_conn && unp->unp_conn->unp_addr) { 123 nam->m_len = unp->unp_conn->unp_addr->m_len; 124 bcopy(mtod(unp->unp_conn->unp_addr, caddr_t), 125 mtod(nam, caddr_t), (unsigned)nam->m_len); 126 } else { 127 nam->m_len = sizeof(sun_noname); 128 *(mtod(nam, struct sockaddr *)) = sun_noname; 129 } 130 break; 131 132 case PRU_SHUTDOWN: 133 socantsendmore(so); 134 unp_shutdown(unp); 135 break; 136 137 case PRU_RCVD: 138 switch (so->so_type) { 139 140 case SOCK_DGRAM: 141 panic("uipc 1"); 142 /*NOTREACHED*/ 143 144 case SOCK_STREAM: 145 #define rcv (&so->so_rcv) 146 #define snd (&so2->so_snd) 147 if (unp->unp_conn == NULL) 148 break; 149 so2 = unp->unp_conn->unp_socket; 150 /* 151 * Adjust backpressure on sender 152 * and wakeup any waiting to write. 153 */ 154 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; 155 unp->unp_mbcnt = rcv->sb_mbcnt; 156 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc; 157 unp->unp_cc = rcv->sb_cc; 158 sowwakeup(so2); 159 #undef snd 160 #undef rcv 161 break; 162 163 default: 164 panic("uipc 2"); 165 } 166 break; 167 168 case PRU_SEND: 169 if (control && (error = unp_internalize(control, p))) 170 break; 171 switch (so->so_type) { 172 173 case SOCK_DGRAM: { 174 struct sockaddr *from; 175 176 if (nam) { 177 if (unp->unp_conn) { 178 error = EISCONN; 179 break; 180 } 181 error = unp_connect(so, nam, p); 182 if (error) 183 break; 184 } else { 185 if (unp->unp_conn == NULL) { 186 error = ENOTCONN; 187 break; 188 } 189 } 190 so2 = unp->unp_conn->unp_socket; 191 if (unp->unp_addr) 192 from = mtod(unp->unp_addr, struct sockaddr *); 193 else 194 from = &sun_noname; 195 if (sbappendaddr(&so2->so_rcv, from, m, control)) { 196 sorwakeup(so2); 197 m = NULL; 198 control = NULL; 199 } else 200 error = ENOBUFS; 201 if (nam) 202 unp_disconnect(unp); 203 break; 204 } 205 206 case SOCK_STREAM: 207 #define rcv (&so2->so_rcv) 208 #define snd (&so->so_snd) 209 if (so->so_state & SS_CANTSENDMORE) { 210 error = EPIPE; 211 break; 212 } 213 if (unp->unp_conn == NULL) { 214 error = ENOTCONN; 215 break; 216 } 217 so2 = unp->unp_conn->unp_socket; 218 /* 219 * Send to paired receive port, and then reduce 220 * send buffer hiwater marks to maintain backpressure. 221 * Wake up readers. 222 */ 223 if (control) { 224 if (sbappendcontrol(rcv, m, control)) 225 control = NULL; 226 } else 227 sbappend(rcv, m); 228 snd->sb_mbmax -= 229 rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; 230 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; 231 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; 232 unp->unp_conn->unp_cc = rcv->sb_cc; 233 sorwakeup(so2); 234 m = NULL; 235 #undef snd 236 #undef rcv 237 break; 238 239 default: 240 panic("uipc 4"); 241 } 242 /* we need to undo unp_internalize in case of errors */ 243 if (control && error) 244 unp_dispose(control); 245 break; 246 247 case PRU_ABORT: 248 unp_drop(unp, ECONNABORTED); 249 break; 250 251 case PRU_SENSE: 252 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 253 if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) { 254 so2 = unp->unp_conn->unp_socket; 255 ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc; 256 } 257 ((struct stat *) m)->st_dev = NODEV; 258 if (unp->unp_ino == 0) 259 unp->unp_ino = unp_ino++; 260 ((struct stat *) m)->st_atim = 261 ((struct stat *) m)->st_mtim = 262 ((struct stat *) m)->st_ctim = unp->unp_ctime; 263 ((struct stat *) m)->st_ino = unp->unp_ino; 264 return (0); 265 266 case PRU_RCVOOB: 267 return (EOPNOTSUPP); 268 269 case PRU_SENDOOB: 270 error = EOPNOTSUPP; 271 break; 272 273 case PRU_SOCKADDR: 274 if (unp->unp_addr) { 275 nam->m_len = unp->unp_addr->m_len; 276 bcopy(mtod(unp->unp_addr, caddr_t), 277 mtod(nam, caddr_t), (unsigned)nam->m_len); 278 } else 279 nam->m_len = 0; 280 break; 281 282 case PRU_PEERADDR: 283 if (unp->unp_conn && unp->unp_conn->unp_addr) { 284 nam->m_len = unp->unp_conn->unp_addr->m_len; 285 bcopy(mtod(unp->unp_conn->unp_addr, caddr_t), 286 mtod(nam, caddr_t), (unsigned)nam->m_len); 287 } else 288 nam->m_len = 0; 289 break; 290 291 case PRU_PEEREID: 292 if (unp->unp_flags & UNP_FEIDS) { 293 nam->m_len = sizeof(struct unpcbid); 294 bcopy((caddr_t)(&(unp->unp_connid)), 295 mtod(nam, caddr_t), (unsigned)nam->m_len); 296 } else 297 nam->m_len = 0; 298 break; 299 300 case PRU_SLOWTIMO: 301 break; 302 303 default: 304 panic("piusrreq"); 305 } 306 release: 307 if (control) 308 m_freem(control); 309 if (m) 310 m_freem(m); 311 return (error); 312 } 313 314 /* 315 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 316 * for stream sockets, although the total for sender and receiver is 317 * actually only PIPSIZ. 318 * Datagram sockets really use the sendspace as the maximum datagram size, 319 * and don't really want to reserve the sendspace. Their recvspace should 320 * be large enough for at least one max-size datagram plus address. 321 */ 322 #define PIPSIZ 4096 323 u_long unpst_sendspace = PIPSIZ; 324 u_long unpst_recvspace = PIPSIZ; 325 u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 326 u_long unpdg_recvspace = 4*1024; 327 328 int unp_rights; /* file descriptors in flight */ 329 330 int 331 unp_attach(struct socket *so) 332 { 333 struct unpcb *unp; 334 int error; 335 336 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 337 switch (so->so_type) { 338 339 case SOCK_STREAM: 340 error = soreserve(so, unpst_sendspace, unpst_recvspace); 341 break; 342 343 case SOCK_DGRAM: 344 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 345 break; 346 347 default: 348 panic("unp_attach"); 349 } 350 if (error) 351 return (error); 352 } 353 unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT|M_ZERO); 354 if (unp == NULL) 355 return (ENOBUFS); 356 unp->unp_socket = so; 357 so->so_pcb = unp; 358 getnanotime(&unp->unp_ctime); 359 return (0); 360 } 361 362 void 363 unp_detach(struct unpcb *unp) 364 { 365 366 if (unp->unp_vnode) { 367 unp->unp_vnode->v_socket = NULL; 368 vrele(unp->unp_vnode); 369 unp->unp_vnode = NULL; 370 } 371 if (unp->unp_conn) 372 unp_disconnect(unp); 373 while (unp->unp_refs) 374 unp_drop(unp->unp_refs, ECONNRESET); 375 soisdisconnected(unp->unp_socket); 376 unp->unp_socket->so_pcb = NULL; 377 m_freem(unp->unp_addr); 378 if (unp_rights) { 379 /* 380 * Normally the receive buffer is flushed later, 381 * in sofree, but if our receive buffer holds references 382 * to descriptors that are now garbage, we will dispose 383 * of those descriptor references after the garbage collector 384 * gets them (resulting in a "panic: closef: count < 0"). 385 */ 386 sorflush(unp->unp_socket); 387 free(unp, M_PCB); 388 unp_gc(); 389 } else 390 free(unp, M_PCB); 391 } 392 393 int 394 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p) 395 { 396 struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); 397 struct vnode *vp; 398 struct vattr vattr; 399 int error, namelen; 400 struct nameidata nd; 401 char buf[MLEN]; 402 403 if (unp->unp_vnode != NULL) 404 return (EINVAL); 405 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 406 if (namelen <= 0 || namelen >= MLEN) 407 return EINVAL; 408 strncpy(buf, soun->sun_path, namelen); 409 buf[namelen] = 0; /* null-terminate the string */ 410 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, buf, p); 411 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 412 if ((error = namei(&nd)) != 0) 413 return (error); 414 vp = nd.ni_vp; 415 if (vp != NULL) { 416 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 417 if (nd.ni_dvp == vp) 418 vrele(nd.ni_dvp); 419 else 420 vput(nd.ni_dvp); 421 vrele(vp); 422 return (EADDRINUSE); 423 } 424 VATTR_NULL(&vattr); 425 vattr.va_type = VSOCK; 426 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; 427 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 428 if (error) 429 return (error); 430 vp = nd.ni_vp; 431 vp->v_socket = unp->unp_socket; 432 unp->unp_vnode = vp; 433 unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL); 434 unp->unp_connid.unp_euid = p->p_ucred->cr_uid; 435 unp->unp_connid.unp_egid = p->p_ucred->cr_gid; 436 unp->unp_flags |= UNP_FEIDSBIND; 437 VOP_UNLOCK(vp, 0, p); 438 return (0); 439 } 440 441 int 442 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p) 443 { 444 struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); 445 struct vnode *vp; 446 struct socket *so2, *so3; 447 struct unpcb *unp, *unp2, *unp3; 448 int error; 449 struct nameidata nd; 450 451 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p); 452 if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) { /* XXX */ 453 if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0) 454 return (EMSGSIZE); 455 } else 456 *(mtod(nam, caddr_t) + nam->m_len) = 0; 457 if ((error = namei(&nd)) != 0) 458 return (error); 459 vp = nd.ni_vp; 460 if (vp->v_type != VSOCK) { 461 error = ENOTSOCK; 462 goto bad; 463 } 464 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 465 goto bad; 466 so2 = vp->v_socket; 467 if (so2 == NULL) { 468 error = ECONNREFUSED; 469 goto bad; 470 } 471 if (so->so_type != so2->so_type) { 472 error = EPROTOTYPE; 473 goto bad; 474 } 475 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 476 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 477 (so3 = sonewconn(so2, 0)) == 0) { 478 error = ECONNREFUSED; 479 goto bad; 480 } 481 unp = sotounpcb(so); 482 unp2 = sotounpcb(so2); 483 unp3 = sotounpcb(so3); 484 if (unp2->unp_addr) 485 unp3->unp_addr = 486 m_copy(unp2->unp_addr, 0, (int)M_COPYALL); 487 unp3->unp_connid.unp_euid = p->p_ucred->cr_uid; 488 unp3->unp_connid.unp_egid = p->p_ucred->cr_gid; 489 unp3->unp_flags |= UNP_FEIDS; 490 so2 = so3; 491 if (unp2->unp_flags & UNP_FEIDSBIND) { 492 unp->unp_connid.unp_euid = unp2->unp_connid.unp_euid; 493 unp->unp_connid.unp_egid = unp2->unp_connid.unp_egid; 494 unp->unp_flags |= UNP_FEIDS; 495 } 496 } 497 error = unp_connect2(so, so2); 498 bad: 499 vput(vp); 500 return (error); 501 } 502 503 int 504 unp_connect2(struct socket *so, struct socket *so2) 505 { 506 struct unpcb *unp = sotounpcb(so); 507 struct unpcb *unp2; 508 509 if (so2->so_type != so->so_type) 510 return (EPROTOTYPE); 511 unp2 = sotounpcb(so2); 512 unp->unp_conn = unp2; 513 switch (so->so_type) { 514 515 case SOCK_DGRAM: 516 unp->unp_nextref = unp2->unp_refs; 517 unp2->unp_refs = unp; 518 soisconnected(so); 519 break; 520 521 case SOCK_STREAM: 522 unp2->unp_conn = unp; 523 soisconnected(so); 524 soisconnected(so2); 525 break; 526 527 default: 528 panic("unp_connect2"); 529 } 530 return (0); 531 } 532 533 void 534 unp_disconnect(struct unpcb *unp) 535 { 536 struct unpcb *unp2 = unp->unp_conn; 537 538 if (unp2 == NULL) 539 return; 540 unp->unp_conn = NULL; 541 switch (unp->unp_socket->so_type) { 542 543 case SOCK_DGRAM: 544 if (unp2->unp_refs == unp) 545 unp2->unp_refs = unp->unp_nextref; 546 else { 547 unp2 = unp2->unp_refs; 548 for (;;) { 549 if (unp2 == NULL) 550 panic("unp_disconnect"); 551 if (unp2->unp_nextref == unp) 552 break; 553 unp2 = unp2->unp_nextref; 554 } 555 unp2->unp_nextref = unp->unp_nextref; 556 } 557 unp->unp_nextref = NULL; 558 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 559 break; 560 561 case SOCK_STREAM: 562 soisdisconnected(unp->unp_socket); 563 unp2->unp_conn = NULL; 564 soisdisconnected(unp2->unp_socket); 565 break; 566 } 567 } 568 569 #ifdef notdef 570 unp_abort(struct unpcb *unp) 571 { 572 unp_detach(unp); 573 } 574 #endif 575 576 void 577 unp_shutdown(struct unpcb *unp) 578 { 579 struct socket *so; 580 581 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 582 (so = unp->unp_conn->unp_socket)) 583 socantrcvmore(so); 584 } 585 586 void 587 unp_drop(struct unpcb *unp, int errno) 588 { 589 struct socket *so = unp->unp_socket; 590 591 so->so_error = errno; 592 unp_disconnect(unp); 593 if (so->so_head) { 594 so->so_pcb = NULL; 595 sofree(so); 596 m_freem(unp->unp_addr); 597 free(unp, M_PCB); 598 } 599 } 600 601 #ifdef notdef 602 unp_drain(void) 603 { 604 605 } 606 #endif 607 608 int 609 unp_externalize(struct mbuf *rights, socklen_t controllen) 610 { 611 struct proc *p = curproc; /* XXX */ 612 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 613 int i, *fdp; 614 struct file **rp; 615 struct file *fp; 616 int nfds, error = 0; 617 618 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / 619 sizeof(struct file *); 620 if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) 621 controllen = 0; 622 else 623 controllen -= CMSG_ALIGN(sizeof(struct cmsghdr)); 624 if (nfds > controllen / sizeof(int)) 625 nfds = controllen / sizeof(int); 626 627 rp = (struct file **)CMSG_DATA(cm); 628 629 fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK); 630 631 /* Make sure the recipient should be able to see the descriptors.. */ 632 if (p->p_fd->fd_rdir != NULL) { 633 rp = (struct file **)CMSG_DATA(cm); 634 for (i = 0; i < nfds; i++) { 635 fp = *rp++; 636 /* 637 * No to block devices. If passing a directory, 638 * make sure that it is underneath the root. 639 */ 640 if (fp->f_type == DTYPE_VNODE) { 641 struct vnode *vp = (struct vnode *)fp->f_data; 642 643 if (vp->v_type == VBLK || 644 (vp->v_type == VDIR && 645 !vn_isunder(vp, p->p_fd->fd_rdir, p))) { 646 error = EPERM; 647 break; 648 } 649 } 650 } 651 } 652 653 restart: 654 fdplock(p->p_fd); 655 if (error != 0) { 656 rp = ((struct file **)CMSG_DATA(cm)); 657 for (i = 0; i < nfds; i++) { 658 fp = *rp; 659 /* 660 * zero the pointer before calling unp_discard, 661 * since it may end up in unp_gc().. 662 */ 663 *rp++ = NULL; 664 unp_discard(fp); 665 } 666 goto out; 667 } 668 669 /* 670 * First loop -- allocate file descriptor table slots for the 671 * new descriptors. 672 */ 673 rp = ((struct file **)CMSG_DATA(cm)); 674 for (i = 0; i < nfds; i++) { 675 bcopy(rp, &fp, sizeof(fp)); 676 rp++; 677 if ((error = fdalloc(p, 0, &fdp[i])) != 0) { 678 /* 679 * Back out what we've done so far. 680 */ 681 for (--i; i >= 0; i--) 682 fdremove(p->p_fd, fdp[i]); 683 684 if (error == ENOSPC) { 685 fdexpand(p); 686 error = 0; 687 } else { 688 /* 689 * This is the error that has historically 690 * been returned, and some callers may 691 * expect it. 692 */ 693 error = EMSGSIZE; 694 } 695 fdpunlock(p->p_fd); 696 goto restart; 697 } 698 699 /* 700 * Make the slot reference the descriptor so that 701 * fdalloc() works properly.. We finalize it all 702 * in the loop below. 703 */ 704 p->p_fd->fd_ofiles[fdp[i]] = fp; 705 } 706 707 /* 708 * Now that adding them has succeeded, update all of the 709 * descriptor passing state. 710 */ 711 rp = (struct file **)CMSG_DATA(cm); 712 for (i = 0; i < nfds; i++) { 713 fp = *rp++; 714 fp->f_msgcount--; 715 unp_rights--; 716 } 717 718 /* 719 * Copy temporary array to message and adjust length, in case of 720 * transition from large struct file pointers to ints. 721 */ 722 memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int)); 723 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); 724 rights->m_len = CMSG_LEN(nfds * sizeof(int)); 725 out: 726 fdpunlock(p->p_fd); 727 free(fdp, M_TEMP); 728 return (error); 729 } 730 731 int 732 unp_internalize(struct mbuf *control, struct proc *p) 733 { 734 struct filedesc *fdp = p->p_fd; 735 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 736 struct file **rp, *fp; 737 int i, error; 738 int nfds, *ip, fd, neededspace; 739 740 /* 741 * Check for two potential msg_controllen values because 742 * IETF stuck their nose in a place it does not belong. 743 */ 744 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 745 !(cm->cmsg_len == control->m_len || 746 control->m_len == CMSG_ALIGN(cm->cmsg_len))) 747 return (EINVAL); 748 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int); 749 750 /* Make sure we have room for the struct file pointers */ 751 morespace: 752 neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) - 753 control->m_len; 754 if (neededspace > M_TRAILINGSPACE(control)) { 755 /* if we already have a cluster, the message is just too big */ 756 if (control->m_flags & M_EXT) 757 return (E2BIG); 758 759 /* allocate a cluster and try again */ 760 MCLGET(control, M_WAIT); 761 if ((control->m_flags & M_EXT) == 0) 762 return (ENOBUFS); /* allocation failed */ 763 764 /* copy the data to the cluster */ 765 memcpy(mtod(control, char *), cm, cm->cmsg_len); 766 cm = mtod(control, struct cmsghdr *); 767 goto morespace; 768 } 769 770 /* adjust message & mbuf to note amount of space actually used. */ 771 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *)); 772 control->m_len = CMSG_SPACE(nfds * sizeof(struct file *)); 773 774 ip = ((int *)CMSG_DATA(cm)) + nfds - 1; 775 rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1; 776 for (i = 0; i < nfds; i++) { 777 bcopy(ip, &fd, sizeof fd); 778 ip--; 779 if ((fp = fd_getfile(fdp, fd)) == NULL) { 780 error = EBADF; 781 goto fail; 782 } 783 if (fp->f_count == LONG_MAX-2 || 784 fp->f_msgcount == LONG_MAX-2) { 785 error = EDEADLK; 786 goto fail; 787 } 788 bcopy(&fp, rp, sizeof fp); 789 rp--; 790 fp->f_count++; 791 fp->f_msgcount++; 792 unp_rights++; 793 } 794 return (0); 795 fail: 796 /* Back out what we just did. */ 797 for ( ; i > 0; i--) { 798 rp++; 799 bcopy(rp, &fp, sizeof(fp)); 800 fp->f_count--; 801 fp->f_msgcount--; 802 unp_rights--; 803 } 804 805 return (error); 806 } 807 808 int unp_defer, unp_gcing; 809 extern struct domain unixdomain; 810 811 void 812 unp_gc(void) 813 { 814 struct file *fp, *nextfp; 815 struct socket *so; 816 struct file **extra_ref, **fpp; 817 int nunref, i; 818 819 if (unp_gcing) 820 return; 821 unp_gcing = 1; 822 unp_defer = 0; 823 LIST_FOREACH(fp, &filehead, f_list) 824 fp->f_flag &= ~(FMARK|FDEFER); 825 do { 826 LIST_FOREACH(fp, &filehead, f_list) { 827 if (fp->f_flag & FDEFER) { 828 fp->f_flag &= ~FDEFER; 829 unp_defer--; 830 } else { 831 if (fp->f_count == 0) 832 continue; 833 if (fp->f_flag & FMARK) 834 continue; 835 if (fp->f_count == fp->f_msgcount) 836 continue; 837 } 838 fp->f_flag |= FMARK; 839 840 if (fp->f_type != DTYPE_SOCKET || 841 (so = (struct socket *)fp->f_data) == NULL) 842 continue; 843 if (so->so_proto->pr_domain != &unixdomain || 844 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 845 continue; 846 #ifdef notdef 847 if (so->so_rcv.sb_flags & SB_LOCK) { 848 /* 849 * This is problematical; it's not clear 850 * we need to wait for the sockbuf to be 851 * unlocked (on a uniprocessor, at least), 852 * and it's also not clear what to do 853 * if sbwait returns an error due to receipt 854 * of a signal. If sbwait does return 855 * an error, we'll go into an infinite 856 * loop. Delete all of this for now. 857 */ 858 (void) sbwait(&so->so_rcv); 859 goto restart; 860 } 861 #endif 862 unp_scan(so->so_rcv.sb_mb, unp_mark, 0); 863 } 864 } while (unp_defer); 865 /* 866 * We grab an extra reference to each of the file table entries 867 * that are not otherwise accessible and then free the rights 868 * that are stored in messages on them. 869 * 870 * The bug in the original code is a little tricky, so I'll describe 871 * what's wrong with it here. 872 * 873 * It is incorrect to simply unp_discard each entry for f_msgcount 874 * times -- consider the case of sockets A and B that contain 875 * references to each other. On a last close of some other socket, 876 * we trigger a gc since the number of outstanding rights (unp_rights) 877 * is non-zero. If during the sweep phase the gc code un_discards, 878 * we end up doing a (full) closef on the descriptor. A closef on A 879 * results in the following chain. Closef calls soo_close, which 880 * calls soclose. Soclose calls first (through the switch 881 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 882 * returns because the previous instance had set unp_gcing, and 883 * we return all the way back to soclose, which marks the socket 884 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 885 * to free up the rights that are queued in messages on the socket A, 886 * i.e., the reference on B. The sorflush calls via the dom_dispose 887 * switch unp_dispose, which unp_scans with unp_discard. This second 888 * instance of unp_discard just calls closef on B. 889 * 890 * Well, a similar chain occurs on B, resulting in a sorflush on B, 891 * which results in another closef on A. Unfortunately, A is already 892 * being closed, and the descriptor has already been marked with 893 * SS_NOFDREF, and soclose panics at this point. 894 * 895 * Here, we first take an extra reference to each inaccessible 896 * descriptor. Then, we call sorflush ourself, since we know 897 * it is a Unix domain socket anyhow. After we destroy all the 898 * rights carried in messages, we do a last closef to get rid 899 * of our extra reference. This is the last close, and the 900 * unp_detach etc will shut down the socket. 901 * 902 * 91/09/19, bsy@cs.cmu.edu 903 */ 904 extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); 905 for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; 906 fp != NULL; fp = nextfp) { 907 nextfp = LIST_NEXT(fp, f_list); 908 if (fp->f_count == 0) 909 continue; 910 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 911 *fpp++ = fp; 912 nunref++; 913 FREF(fp); 914 fp->f_count++; 915 } 916 } 917 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 918 if ((*fpp)->f_type == DTYPE_SOCKET && (*fpp)->f_data != NULL) 919 sorflush((struct socket *)(*fpp)->f_data); 920 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 921 (void) closef(*fpp, NULL); 922 free((caddr_t)extra_ref, M_FILE); 923 unp_gcing = 0; 924 } 925 926 void 927 unp_dispose(struct mbuf *m) 928 { 929 930 if (m) 931 unp_scan(m, unp_discard, 1); 932 } 933 934 void 935 unp_scan(struct mbuf *m0, void (*op)(struct file *), int discard) 936 { 937 struct mbuf *m; 938 struct file **rp, *fp; 939 struct cmsghdr *cm; 940 int i; 941 int qfds; 942 943 while (m0) { 944 for (m = m0; m; m = m->m_next) { 945 if (m->m_type == MT_CONTROL && 946 m->m_len >= sizeof(*cm)) { 947 cm = mtod(m, struct cmsghdr *); 948 if (cm->cmsg_level != SOL_SOCKET || 949 cm->cmsg_type != SCM_RIGHTS) 950 continue; 951 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm)) 952 / sizeof(struct file *); 953 rp = (struct file **)CMSG_DATA(cm); 954 for (i = 0; i < qfds; i++) { 955 fp = *rp; 956 if (discard) 957 *rp = 0; 958 (*op)(fp); 959 rp++; 960 } 961 break; /* XXX, but saves time */ 962 } 963 } 964 m0 = m0->m_nextpkt; 965 } 966 } 967 968 void 969 unp_mark(struct file *fp) 970 { 971 if (fp == NULL) 972 return; 973 974 if (fp->f_flag & FMARK) 975 return; 976 977 if (fp->f_flag & FDEFER) 978 return; 979 980 if (fp->f_type == DTYPE_SOCKET) { 981 unp_defer++; 982 fp->f_flag |= FDEFER; 983 } else { 984 fp->f_flag |= FMARK; 985 } 986 } 987 988 void 989 unp_discard(struct file *fp) 990 { 991 992 if (fp == NULL) 993 return; 994 FREF(fp); 995 fp->f_msgcount--; 996 unp_rights--; 997 (void) closef(fp, NULL); 998 } 999