xref: /netbsd-src/sys/kern/uipc_usrreq.c (revision 76dfffe33547c37f8bdd446e3e4ab0f3c16cea4b)
1 /*	$NetBSD: uipc_usrreq.c,v 1.23 1996/05/23 17:07:03 mycroft Exp $	*/
2 
3 /*
4  * Copyright (c) 1982, 1986, 1989, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
36  */
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/filedesc.h>
42 #include <sys/domain.h>
43 #include <sys/protosw.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/unpcb.h>
47 #include <sys/un.h>
48 #include <sys/namei.h>
49 #include <sys/vnode.h>
50 #include <sys/file.h>
51 #include <sys/stat.h>
52 #include <sys/mbuf.h>
53 
54 /*
55  * Unix communications domain.
56  *
57  * TODO:
58  *	SEQPACKET, RDM
59  *	rethink name space problems
60  *	need a proper out-of-band
61  */
62 struct	sockaddr_un sun_noname = { sizeof(sun_noname), AF_UNIX };
63 ino_t	unp_ino;			/* prototype for fake inode numbers */
64 
65 int
66 unp_output(m, control, unp)
67 	struct mbuf *m, *control;
68 	struct unpcb *unp;
69 {
70 	struct socket *so2;
71 	struct sockaddr_un *sun;
72 
73 	so2 = unp->unp_conn->unp_socket;
74 	if (unp->unp_addr)
75 		sun = unp->unp_addr;
76 	else
77 		sun = &sun_noname;
78 	if (sbappendaddr(&so2->so_rcv, (struct sockaddr *)sun, m,
79 	    control) == 0) {
80 		m_freem(control);
81 		m_freem(m);
82 		return (EINVAL);
83 	} else {
84 		sorwakeup(so2);
85 		return (0);
86 	}
87 }
88 
89 void
90 unp_setsockaddr(unp, nam)
91 	register struct unpcb *unp;
92 	struct mbuf *nam;
93 {
94 	struct sockaddr_un *sun;
95 
96 	if (unp->unp_addr)
97 		sun = unp->unp_addr;
98 	else
99 		sun = &sun_noname;
100 	nam->m_len = sun->sun_len;
101 	bcopy(sun, mtod(nam, caddr_t), (size_t)nam->m_len);
102 }
103 
104 void
105 unp_setpeeraddr(unp, nam)
106 	register struct unpcb *unp;
107 	struct mbuf *nam;
108 {
109 	struct sockaddr_un *sun;
110 
111 	if (unp->unp_conn && unp->unp_conn->unp_addr)
112 		sun = unp->unp_conn->unp_addr;
113 	else
114 		sun = &sun_noname;
115 	nam->m_len = sun->sun_len;
116 	bcopy(sun, mtod(nam, caddr_t), (size_t)nam->m_len);
117 }
118 
119 /*ARGSUSED*/
120 int
121 uipc_usrreq(so, req, m, nam, control, p)
122 	struct socket *so;
123 	int req;
124 	struct mbuf *m, *nam, *control;
125 	struct proc *p;
126 {
127 	struct unpcb *unp = sotounpcb(so);
128 	register struct socket *so2;
129 	register int error = 0;
130 
131 	if (req == PRU_CONTROL)
132 		return (EOPNOTSUPP);
133 
134 #ifdef DIAGNOSTIC
135 	if (req != PRU_SEND && req != PRU_SENDOOB && control)
136 		panic("uipc_usrreq: unexpected control mbuf");
137 #endif
138 	if (unp == 0 && req != PRU_ATTACH) {
139 		error = EINVAL;
140 		goto release;
141 	}
142 
143 	switch (req) {
144 
145 	case PRU_ATTACH:
146 		if (unp != 0) {
147 			error = EISCONN;
148 			break;
149 		}
150 		error = unp_attach(so);
151 		break;
152 
153 	case PRU_DETACH:
154 		unp_detach(unp);
155 		break;
156 
157 	case PRU_BIND:
158 		error = unp_bind(unp, nam, p);
159 		break;
160 
161 	case PRU_LISTEN:
162 		if (unp->unp_vnode == 0)
163 			error = EINVAL;
164 		break;
165 
166 	case PRU_CONNECT:
167 		error = unp_connect(so, nam, p);
168 		break;
169 
170 	case PRU_CONNECT2:
171 		error = unp_connect2(so, (struct socket *)nam);
172 		break;
173 
174 	case PRU_DISCONNECT:
175 		unp_disconnect(unp);
176 		break;
177 
178 	case PRU_ACCEPT:
179 		unp_setpeeraddr(unp, nam);
180 		break;
181 
182 	case PRU_SHUTDOWN:
183 		socantsendmore(so);
184 		unp_shutdown(unp);
185 		break;
186 
187 	case PRU_RCVD:
188 		switch (so->so_type) {
189 
190 		case SOCK_DGRAM:
191 			panic("uipc 1");
192 			/*NOTREACHED*/
193 
194 		case SOCK_STREAM:
195 #define	rcv (&so->so_rcv)
196 #define snd (&so2->so_snd)
197 			if (unp->unp_conn == 0)
198 				break;
199 			so2 = unp->unp_conn->unp_socket;
200 			/*
201 			 * Adjust backpressure on sender
202 			 * and wakeup any waiting to write.
203 			 */
204 			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
205 			unp->unp_mbcnt = rcv->sb_mbcnt;
206 			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
207 			unp->unp_cc = rcv->sb_cc;
208 			sowwakeup(so2);
209 #undef snd
210 #undef rcv
211 			break;
212 
213 		default:
214 			panic("uipc 2");
215 		}
216 		break;
217 
218 	case PRU_SEND:
219 		if (control && (error = unp_internalize(control, p)))
220 			break;
221 		switch (so->so_type) {
222 
223 		case SOCK_DGRAM: {
224 			if (nam) {
225 				if ((so->so_state & SS_ISCONNECTED) != 0) {
226 					error = EISCONN;
227 					goto die;
228 				}
229 				error = unp_connect(so, nam, p);
230 				if (error) {
231 				die:
232 					m_freem(control);
233 					m_freem(m);
234 					break;
235 				}
236 			} else {
237 				if ((so->so_state & SS_ISCONNECTED) == 0) {
238 					error = ENOTCONN;
239 					goto die;
240 				}
241 			}
242 			error = unp_output(m, control, unp);
243 			if (nam)
244 				unp_disconnect(unp);
245 			break;
246 		}
247 
248 		case SOCK_STREAM:
249 #define	rcv (&so2->so_rcv)
250 #define	snd (&so->so_snd)
251 			if (unp->unp_conn == 0)
252 				panic("uipc 3");
253 			so2 = unp->unp_conn->unp_socket;
254 			/*
255 			 * Send to paired receive port, and then reduce
256 			 * send buffer hiwater marks to maintain backpressure.
257 			 * Wake up readers.
258 			 */
259 			if (control) {
260 				if (sbappendcontrol(rcv, m, control) == 0)
261 					m_freem(control);
262 			} else
263 				sbappend(rcv, m);
264 			snd->sb_mbmax -=
265 			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
266 			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
267 			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
268 			unp->unp_conn->unp_cc = rcv->sb_cc;
269 			sorwakeup(so2);
270 #undef snd
271 #undef rcv
272 			break;
273 
274 		default:
275 			panic("uipc 4");
276 		}
277 		break;
278 
279 	case PRU_ABORT:
280 		unp_drop(unp, ECONNABORTED);
281 		break;
282 
283 	case PRU_SENSE:
284 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
285 		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
286 			so2 = unp->unp_conn->unp_socket;
287 			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
288 		}
289 		((struct stat *) m)->st_dev = NODEV;
290 		if (unp->unp_ino == 0)
291 			unp->unp_ino = unp_ino++;
292 		((struct stat *) m)->st_ino = unp->unp_ino;
293 		return (0);
294 
295 	case PRU_RCVOOB:
296 		error = EOPNOTSUPP;
297 		break;
298 
299 	case PRU_SENDOOB:
300 		m_freem(control);
301 		m_freem(m);
302 		error = EOPNOTSUPP;
303 		break;
304 
305 	case PRU_SOCKADDR:
306 		unp_setsockaddr(unp, nam);
307 		break;
308 
309 	case PRU_PEERADDR:
310 		unp_setpeeraddr(unp, nam);
311 		break;
312 
313 	default:
314 		panic("piusrreq");
315 	}
316 
317 release:
318 	return (error);
319 }
320 
321 /*
322  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
323  * for stream sockets, although the total for sender and receiver is
324  * actually only PIPSIZ.
325  * Datagram sockets really use the sendspace as the maximum datagram size,
326  * and don't really want to reserve the sendspace.  Their recvspace should
327  * be large enough for at least one max-size datagram plus address.
328  */
329 #define	PIPSIZ	4096
330 u_long	unpst_sendspace = PIPSIZ;
331 u_long	unpst_recvspace = PIPSIZ;
332 u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
333 u_long	unpdg_recvspace = 4*1024;
334 
335 int	unp_rights;			/* file descriptors in flight */
336 
337 int
338 unp_attach(so)
339 	struct socket *so;
340 {
341 	register struct unpcb *unp;
342 	int error;
343 
344 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
345 		switch (so->so_type) {
346 
347 		case SOCK_STREAM:
348 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
349 			break;
350 
351 		case SOCK_DGRAM:
352 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
353 			break;
354 
355 		default:
356 			panic("unp_attach");
357 		}
358 		if (error)
359 			return (error);
360 	}
361 	unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
362 	if (unp == NULL)
363 		return (ENOBUFS);
364 	bzero((caddr_t)unp, sizeof(*unp));
365 	unp->unp_socket = so;
366 	so->so_pcb = unp;
367 	return (0);
368 }
369 
370 void
371 unp_detach(unp)
372 	register struct unpcb *unp;
373 {
374 
375 	if (unp->unp_vnode) {
376 		unp->unp_vnode->v_socket = 0;
377 		vrele(unp->unp_vnode);
378 		unp->unp_vnode = 0;
379 	}
380 	if (unp->unp_conn)
381 		unp_disconnect(unp);
382 	while (unp->unp_refs)
383 		unp_drop(unp->unp_refs, ECONNRESET);
384 	soisdisconnected(unp->unp_socket);
385 	unp->unp_socket->so_pcb = 0;
386 	if (unp->unp_addr)
387 		m_freem(dtom(unp->unp_addr));
388 	if (unp_rights) {
389 		/*
390 		 * Normally the receive buffer is flushed later,
391 		 * in sofree, but if our receive buffer holds references
392 		 * to descriptors that are now garbage, we will dispose
393 		 * of those descriptor references after the garbage collector
394 		 * gets them (resulting in a "panic: closef: count < 0").
395 		 */
396 		sorflush(unp->unp_socket);
397 		free(unp, M_PCB);
398 		unp_gc();
399 	} else
400 		free(unp, M_PCB);
401 }
402 
403 int
404 unp_bind(unp, nam, p)
405 	struct unpcb *unp;
406 	struct mbuf *nam;
407 	struct proc *p;
408 {
409 	struct sockaddr_un *sun = mtod(nam, struct sockaddr_un *);
410 	register struct vnode *vp;
411 	struct vattr vattr;
412 	int error;
413 	struct nameidata nd;
414 
415 	if (unp->unp_vnode != 0)
416 		return (EINVAL);
417 	NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
418 	    sun->sun_path, p);
419 	if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) {	/* XXX */
420 		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
421 			return (EINVAL);
422 	} else
423 		*(mtod(nam, caddr_t) + nam->m_len) = 0;
424 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
425 	if ((error = namei(&nd)) != 0)
426 		return (error);
427 	vp = nd.ni_vp;
428 	if (vp != NULL) {
429 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
430 		if (nd.ni_dvp == vp)
431 			vrele(nd.ni_dvp);
432 		else
433 			vput(nd.ni_dvp);
434 		vrele(vp);
435 		return (EADDRINUSE);
436 	}
437 	VATTR_NULL(&vattr);
438 	vattr.va_type = VSOCK;
439 	vattr.va_mode = ACCESSPERMS;
440 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
441 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
442 	if (error)
443 		return (error);
444 	vp = nd.ni_vp;
445 	vp->v_socket = unp->unp_socket;
446 	unp->unp_vnode = vp;
447 	unp->unp_addr =
448 	    mtod(m_copy(nam, 0, (int)M_COPYALL), struct sockaddr_un *);
449 	VOP_UNLOCK(vp);
450 	return (0);
451 }
452 
453 int
454 unp_connect(so, nam, p)
455 	struct socket *so;
456 	struct mbuf *nam;
457 	struct proc *p;
458 {
459 	register struct sockaddr_un *sun = mtod(nam, struct sockaddr_un *);
460 	register struct vnode *vp;
461 	register struct socket *so2, *so3;
462 	struct unpcb *unp2, *unp3;
463 	int error;
464 	struct nameidata nd;
465 
466 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, sun->sun_path, p);
467 	if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) {	/* XXX */
468 		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
469 			return (EINVAL);
470 	} else
471 		*(mtod(nam, caddr_t) + nam->m_len) = 0;
472 	if ((error = namei(&nd)) != 0)
473 		return (error);
474 	vp = nd.ni_vp;
475 	if (vp->v_type != VSOCK) {
476 		error = ENOTSOCK;
477 		goto bad;
478 	}
479 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
480 		goto bad;
481 	so2 = vp->v_socket;
482 	if (so2 == 0) {
483 		error = ECONNREFUSED;
484 		goto bad;
485 	}
486 	if (so->so_type != so2->so_type) {
487 		error = EPROTOTYPE;
488 		goto bad;
489 	}
490 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
491 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
492 		    (so3 = sonewconn(so2, 0)) == 0) {
493 			error = ECONNREFUSED;
494 			goto bad;
495 		}
496 		unp2 = sotounpcb(so2);
497 		unp3 = sotounpcb(so3);
498 		if (unp2->unp_addr)
499 			unp3->unp_addr = mtod(m_copy(dtom(unp2->unp_addr), 0,
500 			    (int)M_COPYALL), struct sockaddr_un *);
501 		so2 = so3;
502 	}
503 	error = unp_connect2(so, so2);
504 bad:
505 	vput(vp);
506 	return (error);
507 }
508 
509 int
510 unp_connect2(so, so2)
511 	register struct socket *so;
512 	register struct socket *so2;
513 {
514 	register struct unpcb *unp = sotounpcb(so);
515 	register struct unpcb *unp2;
516 
517 	if (so2->so_type != so->so_type)
518 		return (EPROTOTYPE);
519 	unp2 = sotounpcb(so2);
520 	unp->unp_conn = unp2;
521 	switch (so->so_type) {
522 
523 	case SOCK_DGRAM:
524 		unp->unp_nextref = unp2->unp_refs;
525 		unp2->unp_refs = unp;
526 		soisconnected(so);
527 		break;
528 
529 	case SOCK_STREAM:
530 		unp2->unp_conn = unp;
531 		soisconnected(so);
532 		soisconnected(so2);
533 		break;
534 
535 	default:
536 		panic("unp_connect2");
537 	}
538 	return (0);
539 }
540 
541 void
542 unp_disconnect(unp)
543 	struct unpcb *unp;
544 {
545 	register struct unpcb *unp2 = unp->unp_conn;
546 
547 	if (unp2 == 0)
548 		return;
549 	unp->unp_conn = 0;
550 	switch (unp->unp_socket->so_type) {
551 
552 	case SOCK_DGRAM:
553 		if (unp2->unp_refs == unp)
554 			unp2->unp_refs = unp->unp_nextref;
555 		else {
556 			unp2 = unp2->unp_refs;
557 			for (;;) {
558 				if (unp2 == 0)
559 					panic("unp_disconnect");
560 				if (unp2->unp_nextref == unp)
561 					break;
562 				unp2 = unp2->unp_nextref;
563 			}
564 			unp2->unp_nextref = unp->unp_nextref;
565 		}
566 		unp->unp_nextref = 0;
567 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
568 		break;
569 
570 	case SOCK_STREAM:
571 		soisdisconnected(unp->unp_socket);
572 		unp2->unp_conn = 0;
573 		soisdisconnected(unp2->unp_socket);
574 		break;
575 	}
576 }
577 
578 #ifdef notdef
579 unp_abort(unp)
580 	struct unpcb *unp;
581 {
582 
583 	unp_detach(unp);
584 }
585 #endif
586 
587 void
588 unp_shutdown(unp)
589 	struct unpcb *unp;
590 {
591 	struct socket *so;
592 
593 	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
594 	    (so = unp->unp_conn->unp_socket))
595 		socantrcvmore(so);
596 }
597 
598 void
599 unp_drop(unp, errno)
600 	struct unpcb *unp;
601 	int errno;
602 {
603 	struct socket *so = unp->unp_socket;
604 
605 	so->so_error = errno;
606 	unp_disconnect(unp);
607 	if (so->so_head) {
608 		so->so_pcb = 0;
609 		sofree(so);
610 		if (unp->unp_addr)
611 			m_freem(dtom(unp->unp_addr));
612 		free(unp, M_PCB);
613 	}
614 }
615 
616 #ifdef notdef
617 unp_drain()
618 {
619 
620 }
621 #endif
622 
623 int
624 unp_externalize(rights)
625 	struct mbuf *rights;
626 {
627 	struct proc *p = curproc;		/* XXX */
628 	register int i;
629 	register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
630 	register struct file **rp = (struct file **)(cm + 1);
631 	register struct file *fp;
632 	int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
633 	int f;
634 
635 	if (!fdavail(p, newfds)) {
636 		for (i = 0; i < newfds; i++) {
637 			fp = *rp;
638 			unp_discard(fp);
639 			*rp++ = 0;
640 		}
641 		return (EMSGSIZE);
642 	}
643 	for (i = 0; i < newfds; i++) {
644 		if (fdalloc(p, 0, &f))
645 			panic("unp_externalize");
646 		fp = *rp;
647 		p->p_fd->fd_ofiles[f] = fp;
648 		fp->f_msgcount--;
649 		unp_rights--;
650 		*(int *)rp++ = f;
651 	}
652 	return (0);
653 }
654 
655 int
656 unp_internalize(control, p)
657 	struct mbuf *control;
658 	struct proc *p;
659 {
660 	struct filedesc *fdp = p->p_fd;
661 	register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
662 	register struct file **rp;
663 	register struct file *fp;
664 	register int i, fd;
665 	int oldfds;
666 
667 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
668 	    cm->cmsg_len != control->m_len)
669 		return (EINVAL);
670 	oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
671 	rp = (struct file **)(cm + 1);
672 	for (i = 0; i < oldfds; i++) {
673 		fd = *(int *)rp++;
674 		if ((unsigned)fd >= fdp->fd_nfiles ||
675 		    fdp->fd_ofiles[fd] == NULL)
676 			return (EBADF);
677 	}
678 	rp = (struct file **)(cm + 1);
679 	for (i = 0; i < oldfds; i++) {
680 		fp = fdp->fd_ofiles[*(int *)rp];
681 		*rp++ = fp;
682 		fp->f_count++;
683 		fp->f_msgcount++;
684 		unp_rights++;
685 	}
686 	return (0);
687 }
688 
689 int	unp_defer, unp_gcing;
690 extern	struct domain unixdomain;
691 
692 void
693 unp_gc()
694 {
695 	register struct file *fp, *nextfp;
696 	register struct socket *so;
697 	struct file **extra_ref, **fpp;
698 	int nunref, i;
699 
700 	if (unp_gcing)
701 		return;
702 	unp_gcing = 1;
703 	unp_defer = 0;
704 	for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next)
705 		fp->f_flag &= ~(FMARK|FDEFER);
706 	do {
707 		for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) {
708 			if (fp->f_count == 0)
709 				continue;
710 			if (fp->f_flag & FDEFER) {
711 				fp->f_flag &= ~FDEFER;
712 				unp_defer--;
713 			} else {
714 				if (fp->f_flag & FMARK)
715 					continue;
716 				if (fp->f_count == fp->f_msgcount)
717 					continue;
718 				fp->f_flag |= FMARK;
719 			}
720 			if (fp->f_type != DTYPE_SOCKET ||
721 			    (so = (struct socket *)fp->f_data) == 0)
722 				continue;
723 			if (so->so_proto->pr_domain != &unixdomain ||
724 			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
725 				continue;
726 #ifdef notdef
727 			if (so->so_rcv.sb_flags & SB_LOCK) {
728 				/*
729 				 * This is problematical; it's not clear
730 				 * we need to wait for the sockbuf to be
731 				 * unlocked (on a uniprocessor, at least),
732 				 * and it's also not clear what to do
733 				 * if sbwait returns an error due to receipt
734 				 * of a signal.  If sbwait does return
735 				 * an error, we'll go into an infinite
736 				 * loop.  Delete all of this for now.
737 				 */
738 				(void) sbwait(&so->so_rcv);
739 				goto restart;
740 			}
741 #endif
742 			unp_scan(so->so_rcv.sb_mb, unp_mark);
743 		}
744 	} while (unp_defer);
745 	/*
746 	 * We grab an extra reference to each of the file table entries
747 	 * that are not otherwise accessible and then free the rights
748 	 * that are stored in messages on them.
749 	 *
750 	 * The bug in the orginal code is a little tricky, so I'll describe
751 	 * what's wrong with it here.
752 	 *
753 	 * It is incorrect to simply unp_discard each entry for f_msgcount
754 	 * times -- consider the case of sockets A and B that contain
755 	 * references to each other.  On a last close of some other socket,
756 	 * we trigger a gc since the number of outstanding rights (unp_rights)
757 	 * is non-zero.  If during the sweep phase the gc code un_discards,
758 	 * we end up doing a (full) closef on the descriptor.  A closef on A
759 	 * results in the following chain.  Closef calls soo_close, which
760 	 * calls soclose.   Soclose calls first (through the switch
761 	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
762 	 * returns because the previous instance had set unp_gcing, and
763 	 * we return all the way back to soclose, which marks the socket
764 	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
765 	 * to free up the rights that are queued in messages on the socket A,
766 	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
767 	 * switch unp_dispose, which unp_scans with unp_discard.  This second
768 	 * instance of unp_discard just calls closef on B.
769 	 *
770 	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
771 	 * which results in another closef on A.  Unfortunately, A is already
772 	 * being closed, and the descriptor has already been marked with
773 	 * SS_NOFDREF, and soclose panics at this point.
774 	 *
775 	 * Here, we first take an extra reference to each inaccessible
776 	 * descriptor.  Then, we call sorflush ourself, since we know
777 	 * it is a Unix domain socket anyhow.  After we destroy all the
778 	 * rights carried in messages, we do a last closef to get rid
779 	 * of our extra reference.  This is the last close, and the
780 	 * unp_detach etc will shut down the socket.
781 	 *
782 	 * 91/09/19, bsy@cs.cmu.edu
783 	 */
784 	extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
785 	for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0;
786 	    fp = nextfp) {
787 		nextfp = fp->f_list.le_next;
788 		if (fp->f_count == 0)
789 			continue;
790 		if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
791 			*fpp++ = fp;
792 			nunref++;
793 			fp->f_count++;
794 		}
795 	}
796 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
797 		sorflush((struct socket *)(*fpp)->f_data);
798 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
799 		(void) closef(*fpp, (struct proc *)0);
800 	free((caddr_t)extra_ref, M_FILE);
801 	unp_gcing = 0;
802 }
803 
804 void
805 unp_dispose(m)
806 	struct mbuf *m;
807 {
808 
809 	if (m)
810 		unp_scan(m, unp_discard);
811 }
812 
813 void
814 unp_scan(m0, op)
815 	register struct mbuf *m0;
816 	void (*op) __P((struct file *));
817 {
818 	register struct mbuf *m;
819 	register struct file **rp;
820 	register struct cmsghdr *cm;
821 	register int i;
822 	int qfds;
823 
824 	while (m0) {
825 		for (m = m0; m; m = m->m_next)
826 			if (m->m_type == MT_CONTROL &&
827 			    m->m_len >= sizeof(*cm)) {
828 				cm = mtod(m, struct cmsghdr *);
829 				if (cm->cmsg_level != SOL_SOCKET ||
830 				    cm->cmsg_type != SCM_RIGHTS)
831 					continue;
832 				qfds = (cm->cmsg_len - sizeof *cm)
833 						/ sizeof (struct file *);
834 				rp = (struct file **)(cm + 1);
835 				for (i = 0; i < qfds; i++)
836 					(*op)(*rp++);
837 				break;		/* XXX, but saves time */
838 			}
839 		m0 = m0->m_act;
840 	}
841 }
842 
843 void
844 unp_mark(fp)
845 	struct file *fp;
846 {
847 
848 	if (fp->f_flag & FMARK)
849 		return;
850 	unp_defer++;
851 	fp->f_flag |= (FMARK|FDEFER);
852 }
853 
854 void
855 unp_discard(fp)
856 	struct file *fp;
857 {
858 
859 	fp->f_msgcount--;
860 	unp_rights--;
861 	(void) closef(fp, (struct proc *)0);
862 }
863