xref: /netbsd-src/sys/kern/uipc_usrreq.c (revision df0caa2637da0538ecdf6b878c4d08e684b43d8f)
1 /*	$NetBSD: uipc_usrreq.c,v 1.83 2005/06/16 14:36:42 yamt Exp $	*/
2 
3 /*-
4  * Copyright (c) 1998, 2000, 2004 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Copyright (c) 1982, 1986, 1989, 1991, 1993
42  *	The Regents of the University of California.  All rights reserved.
43  *
44  * Redistribution and use in source and binary forms, with or without
45  * modification, are permitted provided that the following conditions
46  * are met:
47  * 1. Redistributions of source code must retain the above copyright
48  *    notice, this list of conditions and the following disclaimer.
49  * 2. Redistributions in binary form must reproduce the above copyright
50  *    notice, this list of conditions and the following disclaimer in the
51  *    documentation and/or other materials provided with the distribution.
52  * 3. Neither the name of the University nor the names of its contributors
53  *    may be used to endorse or promote products derived from this software
54  *    without specific prior written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66  * SUCH DAMAGE.
67  *
68  *	@(#)uipc_usrreq.c	8.9 (Berkeley) 5/14/95
69  */
70 
71 /*
72  * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
73  *
74  * Redistribution and use in source and binary forms, with or without
75  * modification, are permitted provided that the following conditions
76  * are met:
77  * 1. Redistributions of source code must retain the above copyright
78  *    notice, this list of conditions and the following disclaimer.
79  * 2. Redistributions in binary form must reproduce the above copyright
80  *    notice, this list of conditions and the following disclaimer in the
81  *    documentation and/or other materials provided with the distribution.
82  * 3. All advertising materials mentioning features or use of this software
83  *    must display the following acknowledgement:
84  *	This product includes software developed by the University of
85  *	California, Berkeley and its contributors.
86  * 4. Neither the name of the University nor the names of its contributors
87  *    may be used to endorse or promote products derived from this software
88  *    without specific prior written permission.
89  *
90  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
91  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
92  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
93  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
94  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
95  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
96  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
97  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
98  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
99  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
100  * SUCH DAMAGE.
101  *
102  *	@(#)uipc_usrreq.c	8.9 (Berkeley) 5/14/95
103  */
104 
105 #include <sys/cdefs.h>
106 __KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.83 2005/06/16 14:36:42 yamt Exp $");
107 
108 #include <sys/param.h>
109 #include <sys/systm.h>
110 #include <sys/proc.h>
111 #include <sys/filedesc.h>
112 #include <sys/domain.h>
113 #include <sys/protosw.h>
114 #include <sys/socket.h>
115 #include <sys/socketvar.h>
116 #include <sys/unpcb.h>
117 #include <sys/un.h>
118 #include <sys/namei.h>
119 #include <sys/vnode.h>
120 #include <sys/file.h>
121 #include <sys/stat.h>
122 #include <sys/mbuf.h>
123 
124 /*
125  * Unix communications domain.
126  *
127  * TODO:
128  *	SEQPACKET, RDM
129  *	rethink name space problems
130  *	need a proper out-of-band
131  */
132 const struct	sockaddr_un sun_noname = { sizeof(sun_noname), AF_LOCAL };
133 ino_t	unp_ino;			/* prototype for fake inode numbers */
134 
135 struct mbuf *unp_addsockcred(struct proc *, struct mbuf *);
136 
137 int
138 unp_output(struct mbuf *m, struct mbuf *control, struct unpcb *unp,
139 	struct proc *p)
140 {
141 	struct socket *so2;
142 	const struct sockaddr_un *sun;
143 
144 	so2 = unp->unp_conn->unp_socket;
145 	if (unp->unp_addr)
146 		sun = unp->unp_addr;
147 	else
148 		sun = &sun_noname;
149 	if (unp->unp_conn->unp_flags & UNP_WANTCRED)
150 		control = unp_addsockcred(p, control);
151 	if (sbappendaddr(&so2->so_rcv, (const struct sockaddr *)sun, m,
152 	    control) == 0) {
153 		m_freem(control);
154 		m_freem(m);
155 		so2->so_rcv.sb_overflowed++;
156 		return (ENOBUFS);
157 	} else {
158 		sorwakeup(so2);
159 		return (0);
160 	}
161 }
162 
163 void
164 unp_setsockaddr(struct unpcb *unp, struct mbuf *nam)
165 {
166 	const struct sockaddr_un *sun;
167 
168 	if (unp->unp_addr)
169 		sun = unp->unp_addr;
170 	else
171 		sun = &sun_noname;
172 	nam->m_len = sun->sun_len;
173 	if (nam->m_len > MLEN)
174 		MEXTMALLOC(nam, nam->m_len, M_WAITOK);
175 	memcpy(mtod(nam, caddr_t), sun, (size_t)nam->m_len);
176 }
177 
178 void
179 unp_setpeeraddr(struct unpcb *unp, struct mbuf *nam)
180 {
181 	const struct sockaddr_un *sun;
182 
183 	if (unp->unp_conn && unp->unp_conn->unp_addr)
184 		sun = unp->unp_conn->unp_addr;
185 	else
186 		sun = &sun_noname;
187 	nam->m_len = sun->sun_len;
188 	if (nam->m_len > MLEN)
189 		MEXTMALLOC(nam, nam->m_len, M_WAITOK);
190 	memcpy(mtod(nam, caddr_t), sun, (size_t)nam->m_len);
191 }
192 
193 /*ARGSUSED*/
194 int
195 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
196 	struct mbuf *control, struct proc *p)
197 {
198 	struct unpcb *unp = sotounpcb(so);
199 	struct socket *so2;
200 	u_int newhiwat;
201 	int error = 0;
202 
203 	if (req == PRU_CONTROL)
204 		return (EOPNOTSUPP);
205 
206 #ifdef DIAGNOSTIC
207 	if (req != PRU_SEND && req != PRU_SENDOOB && control)
208 		panic("uipc_usrreq: unexpected control mbuf");
209 #endif
210 	if (unp == 0 && req != PRU_ATTACH) {
211 		error = EINVAL;
212 		goto release;
213 	}
214 
215 	switch (req) {
216 
217 	case PRU_ATTACH:
218 		if (unp != 0) {
219 			error = EISCONN;
220 			break;
221 		}
222 		error = unp_attach(so);
223 		break;
224 
225 	case PRU_DETACH:
226 		unp_detach(unp);
227 		break;
228 
229 	case PRU_BIND:
230 		error = unp_bind(unp, nam, p);
231 		break;
232 
233 	case PRU_LISTEN:
234 		if (unp->unp_vnode == 0)
235 			error = EINVAL;
236 		break;
237 
238 	case PRU_CONNECT:
239 		error = unp_connect(so, nam, p);
240 		break;
241 
242 	case PRU_CONNECT2:
243 		error = unp_connect2(so, (struct socket *)nam, PRU_CONNECT2);
244 		break;
245 
246 	case PRU_DISCONNECT:
247 		unp_disconnect(unp);
248 		break;
249 
250 	case PRU_ACCEPT:
251 		unp_setpeeraddr(unp, nam);
252 		/*
253 		 * Mark the initiating STREAM socket as connected *ONLY*
254 		 * after it's been accepted.  This prevents a client from
255 		 * overrunning a server and receiving ECONNREFUSED.
256 		 */
257 		if (unp->unp_conn != NULL &&
258 		    (unp->unp_conn->unp_socket->so_state & SS_ISCONNECTING))
259 			soisconnected(unp->unp_conn->unp_socket);
260 		break;
261 
262 	case PRU_SHUTDOWN:
263 		socantsendmore(so);
264 		unp_shutdown(unp);
265 		break;
266 
267 	case PRU_RCVD:
268 		switch (so->so_type) {
269 
270 		case SOCK_DGRAM:
271 			panic("uipc 1");
272 			/*NOTREACHED*/
273 
274 		case SOCK_STREAM:
275 #define	rcv (&so->so_rcv)
276 #define snd (&so2->so_snd)
277 			if (unp->unp_conn == 0)
278 				break;
279 			so2 = unp->unp_conn->unp_socket;
280 			/*
281 			 * Adjust backpressure on sender
282 			 * and wakeup any waiting to write.
283 			 */
284 			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
285 			unp->unp_mbcnt = rcv->sb_mbcnt;
286 			newhiwat = snd->sb_hiwat + unp->unp_cc - rcv->sb_cc;
287 			(void)chgsbsize(so2->so_uidinfo,
288 			    &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
289 			unp->unp_cc = rcv->sb_cc;
290 			sowwakeup(so2);
291 #undef snd
292 #undef rcv
293 			break;
294 
295 		default:
296 			panic("uipc 2");
297 		}
298 		break;
299 
300 	case PRU_SEND:
301 		/*
302 		 * Note: unp_internalize() rejects any control message
303 		 * other than SCM_RIGHTS, and only allows one.  This
304 		 * has the side-effect of preventing a caller from
305 		 * forging SCM_CREDS.
306 		 */
307 		if (control && (error = unp_internalize(control, p))) {
308 			goto die;
309 		}
310 		switch (so->so_type) {
311 
312 		case SOCK_DGRAM: {
313 			if (nam) {
314 				if ((so->so_state & SS_ISCONNECTED) != 0) {
315 					error = EISCONN;
316 					goto die;
317 				}
318 				error = unp_connect(so, nam, p);
319 				if (error) {
320 				die:
321 					m_freem(control);
322 					m_freem(m);
323 					break;
324 				}
325 			} else {
326 				if ((so->so_state & SS_ISCONNECTED) == 0) {
327 					error = ENOTCONN;
328 					goto die;
329 				}
330 			}
331 			error = unp_output(m, control, unp, p);
332 			if (nam)
333 				unp_disconnect(unp);
334 			break;
335 		}
336 
337 		case SOCK_STREAM:
338 #define	rcv (&so2->so_rcv)
339 #define	snd (&so->so_snd)
340 			if (unp->unp_conn == 0)
341 				panic("uipc 3");
342 			so2 = unp->unp_conn->unp_socket;
343 			if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
344 				/*
345 				 * Credentials are passed only once on
346 				 * SOCK_STREAM.
347 				 */
348 				unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
349 				control = unp_addsockcred(p, control);
350 			}
351 			/*
352 			 * Send to paired receive port, and then reduce
353 			 * send buffer hiwater marks to maintain backpressure.
354 			 * Wake up readers.
355 			 */
356 			if (control) {
357 				if (sbappendcontrol(rcv, m, control) == 0)
358 					m_freem(control);
359 			} else
360 				sbappend(rcv, m);
361 			snd->sb_mbmax -=
362 			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
363 			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
364 			newhiwat = snd->sb_hiwat -
365 			    (rcv->sb_cc - unp->unp_conn->unp_cc);
366 			(void)chgsbsize(so->so_uidinfo,
367 			    &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
368 			unp->unp_conn->unp_cc = rcv->sb_cc;
369 			sorwakeup(so2);
370 #undef snd
371 #undef rcv
372 			break;
373 
374 		default:
375 			panic("uipc 4");
376 		}
377 		break;
378 
379 	case PRU_ABORT:
380 		unp_drop(unp, ECONNABORTED);
381 
382 #ifdef DIAGNOSTIC
383 		if (so->so_pcb == 0)
384 			panic("uipc 5: drop killed pcb");
385 #endif
386 		unp_detach(unp);
387 		break;
388 
389 	case PRU_SENSE:
390 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
391 		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
392 			so2 = unp->unp_conn->unp_socket;
393 			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
394 		}
395 		((struct stat *) m)->st_dev = NODEV;
396 		if (unp->unp_ino == 0)
397 			unp->unp_ino = unp_ino++;
398 		((struct stat *) m)->st_atimespec =
399 		    ((struct stat *) m)->st_mtimespec =
400 		    ((struct stat *) m)->st_ctimespec = unp->unp_ctime;
401 		((struct stat *) m)->st_ino = unp->unp_ino;
402 		return (0);
403 
404 	case PRU_RCVOOB:
405 		error = EOPNOTSUPP;
406 		break;
407 
408 	case PRU_SENDOOB:
409 		m_freem(control);
410 		m_freem(m);
411 		error = EOPNOTSUPP;
412 		break;
413 
414 	case PRU_SOCKADDR:
415 		unp_setsockaddr(unp, nam);
416 		break;
417 
418 	case PRU_PEERADDR:
419 		unp_setpeeraddr(unp, nam);
420 		break;
421 
422 	default:
423 		panic("piusrreq");
424 	}
425 
426 release:
427 	return (error);
428 }
429 
430 /*
431  * Unix domain socket option processing.
432  */
433 int
434 uipc_ctloutput(int op, struct socket *so, int level, int optname,
435 	struct mbuf **mp)
436 {
437 	struct unpcb *unp = sotounpcb(so);
438 	struct mbuf *m = *mp;
439 	int optval = 0, error = 0;
440 
441 	if (level != 0) {
442 		error = EINVAL;
443 		if (op == PRCO_SETOPT && m)
444 			(void) m_free(m);
445 	} else switch (op) {
446 
447 	case PRCO_SETOPT:
448 		switch (optname) {
449 		case LOCAL_CREDS:
450 		case LOCAL_CONNWAIT:
451 			if (m == NULL || m->m_len != sizeof(int))
452 				error = EINVAL;
453 			else {
454 				optval = *mtod(m, int *);
455 				switch (optname) {
456 #define	OPTSET(bit) \
457 	if (optval) \
458 		unp->unp_flags |= (bit); \
459 	else \
460 		unp->unp_flags &= ~(bit);
461 
462 				case LOCAL_CREDS:
463 					OPTSET(UNP_WANTCRED);
464 					break;
465 				case LOCAL_CONNWAIT:
466 					OPTSET(UNP_CONNWAIT);
467 					break;
468 				}
469 			}
470 			break;
471 #undef OPTSET
472 
473 		default:
474 			error = ENOPROTOOPT;
475 			break;
476 		}
477 		if (m)
478 			(void) m_free(m);
479 		break;
480 
481 	case PRCO_GETOPT:
482 		switch (optname) {
483 		case LOCAL_CREDS:
484 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
485 			m->m_len = sizeof(int);
486 			switch (optname) {
487 
488 #define	OPTBIT(bit)	(unp->unp_flags & (bit) ? 1 : 0)
489 
490 			case LOCAL_CREDS:
491 				optval = OPTBIT(UNP_WANTCRED);
492 				break;
493 			}
494 			*mtod(m, int *) = optval;
495 			break;
496 #undef OPTBIT
497 
498 		default:
499 			error = ENOPROTOOPT;
500 			break;
501 		}
502 		break;
503 	}
504 	return (error);
505 }
506 
507 /*
508  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
509  * for stream sockets, although the total for sender and receiver is
510  * actually only PIPSIZ.
511  * Datagram sockets really use the sendspace as the maximum datagram size,
512  * and don't really want to reserve the sendspace.  Their recvspace should
513  * be large enough for at least one max-size datagram plus address.
514  */
515 #define	PIPSIZ	4096
516 u_long	unpst_sendspace = PIPSIZ;
517 u_long	unpst_recvspace = PIPSIZ;
518 u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
519 u_long	unpdg_recvspace = 4*1024;
520 
521 int	unp_rights;			/* file descriptors in flight */
522 
523 int
524 unp_attach(struct socket *so)
525 {
526 	struct unpcb *unp;
527 	struct timeval tv;
528 	int error;
529 
530 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
531 		switch (so->so_type) {
532 
533 		case SOCK_STREAM:
534 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
535 			break;
536 
537 		case SOCK_DGRAM:
538 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
539 			break;
540 
541 		default:
542 			panic("unp_attach");
543 		}
544 		if (error)
545 			return (error);
546 	}
547 	unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
548 	if (unp == NULL)
549 		return (ENOBUFS);
550 	memset((caddr_t)unp, 0, sizeof(*unp));
551 	unp->unp_socket = so;
552 	so->so_pcb = unp;
553 	microtime(&tv);
554 	TIMEVAL_TO_TIMESPEC(&tv, &unp->unp_ctime);
555 	return (0);
556 }
557 
558 void
559 unp_detach(struct unpcb *unp)
560 {
561 
562 	if (unp->unp_vnode) {
563 		unp->unp_vnode->v_socket = 0;
564 		vrele(unp->unp_vnode);
565 		unp->unp_vnode = 0;
566 	}
567 	if (unp->unp_conn)
568 		unp_disconnect(unp);
569 	while (unp->unp_refs)
570 		unp_drop(unp->unp_refs, ECONNRESET);
571 	soisdisconnected(unp->unp_socket);
572 	unp->unp_socket->so_pcb = 0;
573 	if (unp->unp_addr)
574 		free(unp->unp_addr, M_SONAME);
575 	if (unp_rights) {
576 		/*
577 		 * Normally the receive buffer is flushed later,
578 		 * in sofree, but if our receive buffer holds references
579 		 * to descriptors that are now garbage, we will dispose
580 		 * of those descriptor references after the garbage collector
581 		 * gets them (resulting in a "panic: closef: count < 0").
582 		 */
583 		sorflush(unp->unp_socket);
584 		free(unp, M_PCB);
585 		unp_gc();
586 	} else
587 		free(unp, M_PCB);
588 }
589 
590 int
591 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p)
592 {
593 	struct sockaddr_un *sun;
594 	struct vnode *vp;
595 	struct mount *mp;
596 	struct vattr vattr;
597 	size_t addrlen;
598 	int error;
599 	struct nameidata nd;
600 
601 	if (unp->unp_vnode != 0)
602 		return (EINVAL);
603 
604 	/*
605 	 * Allocate the new sockaddr.  We have to allocate one
606 	 * extra byte so that we can ensure that the pathname
607 	 * is nul-terminated.
608 	 */
609 	addrlen = nam->m_len + 1;
610 	sun = malloc(addrlen, M_SONAME, M_WAITOK);
611 	m_copydata(nam, 0, nam->m_len, (caddr_t)sun);
612 	*(((char *)sun) + nam->m_len) = '\0';
613 
614 restart:
615 	NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
616 	    sun->sun_path, p);
617 
618 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
619 	if ((error = namei(&nd)) != 0)
620 		goto bad;
621 	vp = nd.ni_vp;
622 	if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
623 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
624 		if (nd.ni_dvp == vp)
625 			vrele(nd.ni_dvp);
626 		else
627 			vput(nd.ni_dvp);
628 		vrele(vp);
629 		if (vp != NULL) {
630 			error = EADDRINUSE;
631 			goto bad;
632 		}
633 		error = vn_start_write(NULL, &mp,
634 		    V_WAIT | V_SLEEPONLY | V_PCATCH);
635 		if (error)
636 			goto bad;
637 		goto restart;
638 	}
639 	VATTR_NULL(&vattr);
640 	vattr.va_type = VSOCK;
641 	vattr.va_mode = ACCESSPERMS;
642 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
643 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
644 	vn_finished_write(mp, 0);
645 	if (error)
646 		goto bad;
647 	vp = nd.ni_vp;
648 	vp->v_socket = unp->unp_socket;
649 	unp->unp_vnode = vp;
650 	unp->unp_addrlen = addrlen;
651 	unp->unp_addr = sun;
652 	VOP_UNLOCK(vp, 0);
653 	return (0);
654 
655  bad:
656 	free(sun, M_SONAME);
657 	return (error);
658 }
659 
660 int
661 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
662 {
663 	struct sockaddr_un *sun;
664 	struct vnode *vp;
665 	struct socket *so2, *so3;
666 	struct unpcb *unp2, *unp3;
667 	size_t addrlen;
668 	int error;
669 	struct nameidata nd;
670 
671 	/*
672 	 * Allocate a temporary sockaddr.  We have to allocate one extra
673 	 * byte so that we can ensure that the pathname is nul-terminated.
674 	 * When we establish the connection, we copy the other PCB's
675 	 * sockaddr to our own.
676 	 */
677 	addrlen = nam->m_len + 1;
678 	sun = malloc(addrlen, M_SONAME, M_WAITOK);
679 	m_copydata(nam, 0, nam->m_len, (caddr_t)sun);
680 	*(((char *)sun) + nam->m_len) = '\0';
681 
682 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, sun->sun_path, p);
683 
684 	if ((error = namei(&nd)) != 0)
685 		goto bad2;
686 	vp = nd.ni_vp;
687 	if (vp->v_type != VSOCK) {
688 		error = ENOTSOCK;
689 		goto bad;
690 	}
691 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
692 		goto bad;
693 	so2 = vp->v_socket;
694 	if (so2 == 0) {
695 		error = ECONNREFUSED;
696 		goto bad;
697 	}
698 	if (so->so_type != so2->so_type) {
699 		error = EPROTOTYPE;
700 		goto bad;
701 	}
702 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
703 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
704 		    (so3 = sonewconn(so2, 0)) == 0) {
705 			error = ECONNREFUSED;
706 			goto bad;
707 		}
708 		unp2 = sotounpcb(so2);
709 		unp3 = sotounpcb(so3);
710 		if (unp2->unp_addr) {
711 			unp3->unp_addr = malloc(unp2->unp_addrlen,
712 			    M_SONAME, M_WAITOK);
713 			memcpy(unp3->unp_addr, unp2->unp_addr,
714 			    unp2->unp_addrlen);
715 			unp3->unp_addrlen = unp2->unp_addrlen;
716 		}
717 		unp3->unp_flags = unp2->unp_flags;
718 		so2 = so3;
719 	}
720 	error = unp_connect2(so, so2, PRU_CONNECT);
721  bad:
722 	vput(vp);
723  bad2:
724 	free(sun, M_SONAME);
725 	return (error);
726 }
727 
728 int
729 unp_connect2(struct socket *so, struct socket *so2, int req)
730 {
731 	struct unpcb *unp = sotounpcb(so);
732 	struct unpcb *unp2;
733 
734 	if (so2->so_type != so->so_type)
735 		return (EPROTOTYPE);
736 	unp2 = sotounpcb(so2);
737 	unp->unp_conn = unp2;
738 	switch (so->so_type) {
739 
740 	case SOCK_DGRAM:
741 		unp->unp_nextref = unp2->unp_refs;
742 		unp2->unp_refs = unp;
743 		soisconnected(so);
744 		break;
745 
746 	case SOCK_STREAM:
747 		unp2->unp_conn = unp;
748 		if (req == PRU_CONNECT &&
749 		    ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
750 			soisconnecting(so);
751 		else
752 			soisconnected(so);
753 		soisconnected(so2);
754 		break;
755 
756 	default:
757 		panic("unp_connect2");
758 	}
759 	return (0);
760 }
761 
762 void
763 unp_disconnect(struct unpcb *unp)
764 {
765 	struct unpcb *unp2 = unp->unp_conn;
766 
767 	if (unp2 == 0)
768 		return;
769 	unp->unp_conn = 0;
770 	switch (unp->unp_socket->so_type) {
771 
772 	case SOCK_DGRAM:
773 		if (unp2->unp_refs == unp)
774 			unp2->unp_refs = unp->unp_nextref;
775 		else {
776 			unp2 = unp2->unp_refs;
777 			for (;;) {
778 				if (unp2 == 0)
779 					panic("unp_disconnect");
780 				if (unp2->unp_nextref == unp)
781 					break;
782 				unp2 = unp2->unp_nextref;
783 			}
784 			unp2->unp_nextref = unp->unp_nextref;
785 		}
786 		unp->unp_nextref = 0;
787 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
788 		break;
789 
790 	case SOCK_STREAM:
791 		soisdisconnected(unp->unp_socket);
792 		unp2->unp_conn = 0;
793 		soisdisconnected(unp2->unp_socket);
794 		break;
795 	}
796 }
797 
798 #ifdef notdef
799 unp_abort(struct unpcb *unp)
800 {
801 	unp_detach(unp);
802 }
803 #endif
804 
805 void
806 unp_shutdown(struct unpcb *unp)
807 {
808 	struct socket *so;
809 
810 	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
811 	    (so = unp->unp_conn->unp_socket))
812 		socantrcvmore(so);
813 }
814 
815 void
816 unp_drop(struct unpcb *unp, int errno)
817 {
818 	struct socket *so = unp->unp_socket;
819 
820 	so->so_error = errno;
821 	unp_disconnect(unp);
822 	if (so->so_head) {
823 		so->so_pcb = 0;
824 		sofree(so);
825 		if (unp->unp_addr)
826 			free(unp->unp_addr, M_SONAME);
827 		free(unp, M_PCB);
828 	}
829 }
830 
831 #ifdef notdef
832 unp_drain(void)
833 {
834 
835 }
836 #endif
837 
838 int
839 unp_externalize(struct mbuf *rights, struct proc *p)
840 {
841 	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
842 	int i, *fdp;
843 	struct file **rp;
844 	struct file *fp;
845 	int nfds, error = 0;
846 
847 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
848 	    sizeof(struct file *);
849 	rp = (struct file **)CMSG_DATA(cm);
850 
851 	fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK);
852 
853 	/* Make sure the recipient should be able to see the descriptors.. */
854 	if (p->p_cwdi->cwdi_rdir != NULL) {
855 		rp = (struct file **)CMSG_DATA(cm);
856 		for (i = 0; i < nfds; i++) {
857 			fp = *rp++;
858 			/*
859 			 * If we are in a chroot'ed directory, and
860 			 * someone wants to pass us a directory, make
861 			 * sure it's inside the subtree we're allowed
862 			 * to access.
863 			 */
864 			if (fp->f_type == DTYPE_VNODE) {
865 				struct vnode *vp = (struct vnode *)fp->f_data;
866 				if ((vp->v_type == VDIR) &&
867 				    !vn_isunder(vp, p->p_cwdi->cwdi_rdir, p)) {
868 					error = EPERM;
869 					break;
870 				}
871 			}
872 		}
873 	}
874 
875  restart:
876 	rp = (struct file **)CMSG_DATA(cm);
877 	if (error != 0) {
878 		for (i = 0; i < nfds; i++) {
879 			fp = *rp;
880 			/*
881 			 * zero the pointer before calling unp_discard,
882 			 * since it may end up in unp_gc()..
883 			 */
884 			*rp++ = 0;
885 			unp_discard(fp);
886 		}
887 		goto out;
888 	}
889 
890 	/*
891 	 * First loop -- allocate file descriptor table slots for the
892 	 * new descriptors.
893 	 */
894 	for (i = 0; i < nfds; i++) {
895 		fp = *rp++;
896 		if ((error = fdalloc(p, 0, &fdp[i])) != 0) {
897 			/*
898 			 * Back out what we've done so far.
899 			 */
900 			for (--i; i >= 0; i--)
901 				fdremove(p->p_fd, fdp[i]);
902 
903 			if (error == ENOSPC) {
904 				fdexpand(p);
905 				error = 0;
906 			} else {
907 				/*
908 				 * This is the error that has historically
909 				 * been returned, and some callers may
910 				 * expect it.
911 				 */
912 				error = EMSGSIZE;
913 			}
914 			goto restart;
915 		}
916 
917 		/*
918 		 * Make the slot reference the descriptor so that
919 		 * fdalloc() works properly.. We finalize it all
920 		 * in the loop below.
921 		 */
922 		p->p_fd->fd_ofiles[fdp[i]] = fp;
923 	}
924 
925 	/*
926 	 * Now that adding them has succeeded, update all of the
927 	 * descriptor passing state.
928 	 */
929 	rp = (struct file **)CMSG_DATA(cm);
930 	for (i = 0; i < nfds; i++) {
931 		fp = *rp++;
932 		fp->f_msgcount--;
933 		unp_rights--;
934 	}
935 
936 	/*
937 	 * Copy temporary array to message and adjust length, in case of
938 	 * transition from large struct file pointers to ints.
939 	 */
940 	memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int));
941 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
942 	rights->m_len = CMSG_SPACE(nfds * sizeof(int));
943  out:
944 	free(fdp, M_TEMP);
945 	return (error);
946 }
947 
948 int
949 unp_internalize(struct mbuf *control, struct proc *p)
950 {
951 	struct filedesc *fdescp = p->p_fd;
952 	struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *);
953 	struct file **rp, **files;
954 	struct file *fp;
955 	int i, fd, *fdp;
956 	int nfds;
957 	u_int neededspace;
958 
959 	/* Sanity check the control message header */
960 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
961 	    cm->cmsg_len != control->m_len)
962 		return (EINVAL);
963 
964 	/* Verify that the file descriptors are valid */
965 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
966 	fdp = (int *)CMSG_DATA(cm);
967 	for (i = 0; i < nfds; i++) {
968 		fd = *fdp++;
969 		if ((fp = fd_getfile(fdescp, fd)) == NULL)
970 			return (EBADF);
971 		simple_unlock(&fp->f_slock);
972 	}
973 
974 	/* Make sure we have room for the struct file pointers */
975 	neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) -
976 	    control->m_len;
977 	if (neededspace > M_TRAILINGSPACE(control)) {
978 
979 		/* allocate new space and copy header into it */
980 		newcm = malloc(
981 		    CMSG_SPACE(nfds * sizeof(struct file *)),
982 		    M_MBUF, M_WAITOK);
983 		if (newcm == NULL)
984 			return (E2BIG);
985 		memcpy(newcm, cm, sizeof(struct cmsghdr));
986 		files = (struct file **)CMSG_DATA(newcm);
987 	} else {
988 		/* we can convert in-place */
989 		newcm = NULL;
990 		files = (struct file **)CMSG_DATA(cm);
991 	}
992 
993 	/*
994 	 * Transform the file descriptors into struct file pointers, in
995 	 * reverse order so that if pointers are bigger than ints, the
996 	 * int won't get until we're done.
997 	 */
998 	fdp = (int *)CMSG_DATA(cm) + nfds - 1;
999 	rp = files + nfds - 1;
1000 	for (i = 0; i < nfds; i++) {
1001 		fp = fdescp->fd_ofiles[*fdp--];
1002 		simple_lock(&fp->f_slock);
1003 #ifdef DIAGNOSTIC
1004 		if (fp->f_iflags & FIF_WANTCLOSE)
1005 			panic("unp_internalize: file already closed");
1006 #endif
1007 		*rp-- = fp;
1008 		fp->f_count++;
1009 		fp->f_msgcount++;
1010 		simple_unlock(&fp->f_slock);
1011 		unp_rights++;
1012 	}
1013 
1014 	if (newcm) {
1015 		if (control->m_flags & M_EXT)
1016 			MEXTREMOVE(control);
1017 		MEXTADD(control, newcm,
1018 		    CMSG_SPACE(nfds * sizeof(struct file *)),
1019 		    M_MBUF, NULL, NULL);
1020 		cm = newcm;
1021 	}
1022 
1023 	/* adjust message & mbuf to note amount of space actually used. */
1024 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *));
1025 	control->m_len = CMSG_SPACE(nfds * sizeof(struct file *));
1026 
1027 	return (0);
1028 }
1029 
1030 struct mbuf *
1031 unp_addsockcred(struct proc *p, struct mbuf *control)
1032 {
1033 	struct cmsghdr *cmp;
1034 	struct sockcred *sc;
1035 	struct mbuf *m, *n;
1036 	int len, space, i;
1037 
1038 	len = CMSG_LEN(SOCKCREDSIZE(p->p_ucred->cr_ngroups));
1039 	space = CMSG_SPACE(SOCKCREDSIZE(p->p_ucred->cr_ngroups));
1040 
1041 	m = m_get(M_WAIT, MT_CONTROL);
1042 	if (space > MLEN) {
1043 		if (space > MCLBYTES)
1044 			MEXTMALLOC(m, space, M_WAITOK);
1045 		else
1046 			m_clget(m, M_WAIT);
1047 		if ((m->m_flags & M_EXT) == 0) {
1048 			m_free(m);
1049 			return (control);
1050 		}
1051 	}
1052 
1053 	m->m_len = space;
1054 	m->m_next = NULL;
1055 	cmp = mtod(m, struct cmsghdr *);
1056 	sc = (struct sockcred *)CMSG_DATA(cmp);
1057 	cmp->cmsg_len = len;
1058 	cmp->cmsg_level = SOL_SOCKET;
1059 	cmp->cmsg_type = SCM_CREDS;
1060 	sc->sc_uid = p->p_cred->p_ruid;
1061 	sc->sc_euid = p->p_ucred->cr_uid;
1062 	sc->sc_gid = p->p_cred->p_rgid;
1063 	sc->sc_egid = p->p_ucred->cr_gid;
1064 	sc->sc_ngroups = p->p_ucred->cr_ngroups;
1065 	for (i = 0; i < sc->sc_ngroups; i++)
1066 		sc->sc_groups[i] = p->p_ucred->cr_groups[i];
1067 
1068 	/*
1069 	 * If a control message already exists, append us to the end.
1070 	 */
1071 	if (control != NULL) {
1072 		for (n = control; n->m_next != NULL; n = n->m_next)
1073 			;
1074 		n->m_next = m;
1075 	} else
1076 		control = m;
1077 
1078 	return (control);
1079 }
1080 
1081 int	unp_defer, unp_gcing;
1082 extern	struct domain unixdomain;
1083 
1084 /*
1085  * Comment added long after the fact explaining what's going on here.
1086  * Do a mark-sweep GC of file descriptors on the system, to free up
1087  * any which are caught in flight to an about-to-be-closed socket.
1088  *
1089  * Traditional mark-sweep gc's start at the "root", and mark
1090  * everything reachable from the root (which, in our case would be the
1091  * process table).  The mark bits are cleared during the sweep.
1092  *
1093  * XXX For some inexplicable reason (perhaps because the file
1094  * descriptor tables used to live in the u area which could be swapped
1095  * out and thus hard to reach), we do multiple scans over the set of
1096  * descriptors, using use *two* mark bits per object (DEFER and MARK).
1097  * Whenever we find a descriptor which references other descriptors,
1098  * the ones it references are marked with both bits, and we iterate
1099  * over the whole file table until there are no more DEFER bits set.
1100  * We also make an extra pass *before* the GC to clear the mark bits,
1101  * which could have been cleared at almost no cost during the previous
1102  * sweep.
1103  *
1104  * XXX MP: this needs to run with locks such that no other thread of
1105  * control can create or destroy references to file descriptors. it
1106  * may be necessary to defer the GC until later (when the locking
1107  * situation is more hospitable); it may be necessary to push this
1108  * into a separate thread.
1109  */
1110 void
1111 unp_gc(void)
1112 {
1113 	struct file *fp, *nextfp;
1114 	struct socket *so, *so1;
1115 	struct file **extra_ref, **fpp;
1116 	int nunref, i;
1117 
1118 	if (unp_gcing)
1119 		return;
1120 	unp_gcing = 1;
1121 	unp_defer = 0;
1122 
1123 	/* Clear mark bits */
1124 	LIST_FOREACH(fp, &filehead, f_list)
1125 		fp->f_flag &= ~(FMARK|FDEFER);
1126 
1127 	/*
1128 	 * Iterate over the set of descriptors, marking ones believed
1129 	 * (based on refcount) to be referenced from a process, and
1130 	 * marking for rescan descriptors which are queued on a socket.
1131 	 */
1132 	do {
1133 		LIST_FOREACH(fp, &filehead, f_list) {
1134 			if (fp->f_flag & FDEFER) {
1135 				fp->f_flag &= ~FDEFER;
1136 				unp_defer--;
1137 #ifdef DIAGNOSTIC
1138 				if (fp->f_count == 0)
1139 					panic("unp_gc: deferred unreferenced socket");
1140 #endif
1141 			} else {
1142 				if (fp->f_count == 0)
1143 					continue;
1144 				if (fp->f_flag & FMARK)
1145 					continue;
1146 				if (fp->f_count == fp->f_msgcount)
1147 					continue;
1148 			}
1149 			fp->f_flag |= FMARK;
1150 
1151 			if (fp->f_type != DTYPE_SOCKET ||
1152 			    (so = (struct socket *)fp->f_data) == 0)
1153 				continue;
1154 			if (so->so_proto->pr_domain != &unixdomain ||
1155 			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
1156 				continue;
1157 #ifdef notdef
1158 			if (so->so_rcv.sb_flags & SB_LOCK) {
1159 				/*
1160 				 * This is problematical; it's not clear
1161 				 * we need to wait for the sockbuf to be
1162 				 * unlocked (on a uniprocessor, at least),
1163 				 * and it's also not clear what to do
1164 				 * if sbwait returns an error due to receipt
1165 				 * of a signal.  If sbwait does return
1166 				 * an error, we'll go into an infinite
1167 				 * loop.  Delete all of this for now.
1168 				 */
1169 				(void) sbwait(&so->so_rcv);
1170 				goto restart;
1171 			}
1172 #endif
1173 			unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
1174 			/*
1175 			 * mark descriptors referenced from sockets queued on the accept queue as well.
1176 			 */
1177 			if (so->so_options & SO_ACCEPTCONN) {
1178 				TAILQ_FOREACH(so1, &so->so_q0, so_qe) {
1179 					unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
1180 				}
1181 				TAILQ_FOREACH(so1, &so->so_q, so_qe) {
1182 					unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
1183 				}
1184 			}
1185 
1186 		}
1187 	} while (unp_defer);
1188 	/*
1189 	 * Sweep pass.  Find unmarked descriptors, and free them.
1190 	 *
1191 	 * We grab an extra reference to each of the file table entries
1192 	 * that are not otherwise accessible and then free the rights
1193 	 * that are stored in messages on them.
1194 	 *
1195 	 * The bug in the original code is a little tricky, so I'll describe
1196 	 * what's wrong with it here.
1197 	 *
1198 	 * It is incorrect to simply unp_discard each entry for f_msgcount
1199 	 * times -- consider the case of sockets A and B that contain
1200 	 * references to each other.  On a last close of some other socket,
1201 	 * we trigger a gc since the number of outstanding rights (unp_rights)
1202 	 * is non-zero.  If during the sweep phase the gc code un_discards,
1203 	 * we end up doing a (full) closef on the descriptor.  A closef on A
1204 	 * results in the following chain.  Closef calls soo_close, which
1205 	 * calls soclose.   Soclose calls first (through the switch
1206 	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
1207 	 * returns because the previous instance had set unp_gcing, and
1208 	 * we return all the way back to soclose, which marks the socket
1209 	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
1210 	 * to free up the rights that are queued in messages on the socket A,
1211 	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
1212 	 * switch unp_dispose, which unp_scans with unp_discard.  This second
1213 	 * instance of unp_discard just calls closef on B.
1214 	 *
1215 	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
1216 	 * which results in another closef on A.  Unfortunately, A is already
1217 	 * being closed, and the descriptor has already been marked with
1218 	 * SS_NOFDREF, and soclose panics at this point.
1219 	 *
1220 	 * Here, we first take an extra reference to each inaccessible
1221 	 * descriptor.  Then, if the inaccessible descriptor is a
1222 	 * socket, we call sorflush in case it is a Unix domain
1223 	 * socket.  After we destroy all the rights carried in
1224 	 * messages, we do a last closef to get rid of our extra
1225 	 * reference.  This is the last close, and the unp_detach etc
1226 	 * will shut down the socket.
1227 	 *
1228 	 * 91/09/19, bsy@cs.cmu.edu
1229 	 */
1230 	extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
1231 	for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0;
1232 	    fp = nextfp) {
1233 		nextfp = LIST_NEXT(fp, f_list);
1234 		simple_lock(&fp->f_slock);
1235 		if (fp->f_count != 0 &&
1236 		    fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
1237 			*fpp++ = fp;
1238 			nunref++;
1239 			fp->f_count++;
1240 		}
1241 		simple_unlock(&fp->f_slock);
1242 	}
1243 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1244 		fp = *fpp;
1245 		simple_lock(&fp->f_slock);
1246 		FILE_USE(fp);
1247 		if (fp->f_type == DTYPE_SOCKET)
1248 			sorflush((struct socket *)fp->f_data);
1249 		FILE_UNUSE(fp, NULL);
1250 	}
1251 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1252 		fp = *fpp;
1253 		simple_lock(&fp->f_slock);
1254 		FILE_USE(fp);
1255 		(void) closef(fp, (struct proc *)0);
1256 	}
1257 	free((caddr_t)extra_ref, M_FILE);
1258 	unp_gcing = 0;
1259 }
1260 
1261 void
1262 unp_dispose(struct mbuf *m)
1263 {
1264 
1265 	if (m)
1266 		unp_scan(m, unp_discard, 1);
1267 }
1268 
1269 void
1270 unp_scan(struct mbuf *m0, void (*op)(struct file *), int discard)
1271 {
1272 	struct mbuf *m;
1273 	struct file **rp;
1274 	struct cmsghdr *cm;
1275 	int i;
1276 	int qfds;
1277 
1278 	while (m0) {
1279 		for (m = m0; m; m = m->m_next) {
1280 			if (m->m_type == MT_CONTROL &&
1281 			    m->m_len >= sizeof(*cm)) {
1282 				cm = mtod(m, struct cmsghdr *);
1283 				if (cm->cmsg_level != SOL_SOCKET ||
1284 				    cm->cmsg_type != SCM_RIGHTS)
1285 					continue;
1286 				qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm)))
1287 				    / sizeof(struct file *);
1288 				rp = (struct file **)CMSG_DATA(cm);
1289 				for (i = 0; i < qfds; i++) {
1290 					struct file *fp = *rp;
1291 					if (discard)
1292 						*rp = 0;
1293 					(*op)(fp);
1294 					rp++;
1295 				}
1296 				break;		/* XXX, but saves time */
1297 			}
1298 		}
1299 		m0 = m0->m_nextpkt;
1300 	}
1301 }
1302 
1303 void
1304 unp_mark(struct file *fp)
1305 {
1306 	if (fp == NULL)
1307 		return;
1308 
1309 	if (fp->f_flag & FMARK)
1310 		return;
1311 
1312 	/* If we're already deferred, don't screw up the defer count */
1313 	if (fp->f_flag & FDEFER)
1314 		return;
1315 
1316 	/*
1317 	 * Minimize the number of deferrals...  Sockets are the only
1318 	 * type of descriptor which can hold references to another
1319 	 * descriptor, so just mark other descriptors, and defer
1320 	 * unmarked sockets for the next pass.
1321 	 */
1322 	if (fp->f_type == DTYPE_SOCKET) {
1323 		unp_defer++;
1324 		if (fp->f_count == 0)
1325 			panic("unp_mark: queued unref");
1326 		fp->f_flag |= FDEFER;
1327 	} else {
1328 		fp->f_flag |= FMARK;
1329 	}
1330 	return;
1331 }
1332 
1333 void
1334 unp_discard(struct file *fp)
1335 {
1336 	if (fp == NULL)
1337 		return;
1338 	simple_lock(&fp->f_slock);
1339 	fp->f_usecount++;	/* i.e. FILE_USE(fp) sans locking */
1340 	fp->f_msgcount--;
1341 	simple_unlock(&fp->f_slock);
1342 	unp_rights--;
1343 	(void) closef(fp, (struct proc *)0);
1344 }
1345