xref: /netbsd-src/sys/kern/uipc_usrreq.c (revision da5f4674a3fc214be3572d358b66af40ab9401e7)
1 /*	$NetBSD: uipc_usrreq.c,v 1.67 2003/08/07 16:32:00 agc Exp $	*/
2 
3 /*-
4  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Copyright (c) 1982, 1986, 1989, 1991, 1993
42  *	The Regents of the University of California.  All rights reserved.
43  *
44  * Redistribution and use in source and binary forms, with or without
45  * modification, are permitted provided that the following conditions
46  * are met:
47  * 1. Redistributions of source code must retain the above copyright
48  *    notice, this list of conditions and the following disclaimer.
49  * 2. Redistributions in binary form must reproduce the above copyright
50  *    notice, this list of conditions and the following disclaimer in the
51  *    documentation and/or other materials provided with the distribution.
52  * 3. Neither the name of the University nor the names of its contributors
53  *    may be used to endorse or promote products derived from this software
54  *    without specific prior written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66  * SUCH DAMAGE.
67  *
68  *	@(#)uipc_usrreq.c	8.9 (Berkeley) 5/14/95
69  */
70 
71 /*
72  * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
73  *
74  * Redistribution and use in source and binary forms, with or without
75  * modification, are permitted provided that the following conditions
76  * are met:
77  * 1. Redistributions of source code must retain the above copyright
78  *    notice, this list of conditions and the following disclaimer.
79  * 2. Redistributions in binary form must reproduce the above copyright
80  *    notice, this list of conditions and the following disclaimer in the
81  *    documentation and/or other materials provided with the distribution.
82  * 3. All advertising materials mentioning features or use of this software
83  *    must display the following acknowledgement:
84  *	This product includes software developed by the University of
85  *	California, Berkeley and its contributors.
86  * 4. Neither the name of the University nor the names of its contributors
87  *    may be used to endorse or promote products derived from this software
88  *    without specific prior written permission.
89  *
90  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
91  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
92  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
93  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
94  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
95  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
96  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
97  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
98  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
99  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
100  * SUCH DAMAGE.
101  *
102  *	@(#)uipc_usrreq.c	8.9 (Berkeley) 5/14/95
103  */
104 
105 #include <sys/cdefs.h>
106 __KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.67 2003/08/07 16:32:00 agc Exp $");
107 
108 #include <sys/param.h>
109 #include <sys/systm.h>
110 #include <sys/proc.h>
111 #include <sys/filedesc.h>
112 #include <sys/domain.h>
113 #include <sys/protosw.h>
114 #include <sys/socket.h>
115 #include <sys/socketvar.h>
116 #include <sys/unpcb.h>
117 #include <sys/un.h>
118 #include <sys/namei.h>
119 #include <sys/vnode.h>
120 #include <sys/file.h>
121 #include <sys/stat.h>
122 #include <sys/mbuf.h>
123 
124 /*
125  * Unix communications domain.
126  *
127  * TODO:
128  *	SEQPACKET, RDM
129  *	rethink name space problems
130  *	need a proper out-of-band
131  */
132 struct	sockaddr_un sun_noname = { sizeof(sun_noname), AF_LOCAL };
133 ino_t	unp_ino;			/* prototype for fake inode numbers */
134 
135 struct mbuf *unp_addsockcred __P((struct proc *, struct mbuf *));
136 
137 int
138 unp_output(m, control, unp, p)
139 	struct mbuf *m, *control;
140 	struct unpcb *unp;
141 	struct proc *p;
142 {
143 	struct socket *so2;
144 	struct sockaddr_un *sun;
145 
146 	so2 = unp->unp_conn->unp_socket;
147 	if (unp->unp_addr)
148 		sun = unp->unp_addr;
149 	else
150 		sun = &sun_noname;
151 	if (unp->unp_conn->unp_flags & UNP_WANTCRED)
152 		control = unp_addsockcred(p, control);
153 	if (sbappendaddr(&so2->so_rcv, (struct sockaddr *)sun, m,
154 	    control) == 0) {
155 		m_freem(control);
156 		m_freem(m);
157 		return (ENOBUFS);
158 	} else {
159 		sorwakeup(so2);
160 		return (0);
161 	}
162 }
163 
164 void
165 unp_setsockaddr(unp, nam)
166 	struct unpcb *unp;
167 	struct mbuf *nam;
168 {
169 	struct sockaddr_un *sun;
170 
171 	if (unp->unp_addr)
172 		sun = unp->unp_addr;
173 	else
174 		sun = &sun_noname;
175 	nam->m_len = sun->sun_len;
176 	if (nam->m_len > MLEN)
177 		MEXTMALLOC(nam, nam->m_len, M_WAITOK);
178 	memcpy(mtod(nam, caddr_t), sun, (size_t)nam->m_len);
179 }
180 
181 void
182 unp_setpeeraddr(unp, nam)
183 	struct unpcb *unp;
184 	struct mbuf *nam;
185 {
186 	struct sockaddr_un *sun;
187 
188 	if (unp->unp_conn && unp->unp_conn->unp_addr)
189 		sun = unp->unp_conn->unp_addr;
190 	else
191 		sun = &sun_noname;
192 	nam->m_len = sun->sun_len;
193 	if (nam->m_len > MLEN)
194 		MEXTMALLOC(nam, nam->m_len, M_WAITOK);
195 	memcpy(mtod(nam, caddr_t), sun, (size_t)nam->m_len);
196 }
197 
198 /*ARGSUSED*/
199 int
200 uipc_usrreq(so, req, m, nam, control, p)
201 	struct socket *so;
202 	int req;
203 	struct mbuf *m, *nam, *control;
204 	struct proc *p;
205 {
206 	struct unpcb *unp = sotounpcb(so);
207 	struct socket *so2;
208 	int error = 0;
209 
210 	if (req == PRU_CONTROL)
211 		return (EOPNOTSUPP);
212 
213 #ifdef DIAGNOSTIC
214 	if (req != PRU_SEND && req != PRU_SENDOOB && control)
215 		panic("uipc_usrreq: unexpected control mbuf");
216 #endif
217 	if (unp == 0 && req != PRU_ATTACH) {
218 		error = EINVAL;
219 		goto release;
220 	}
221 
222 	switch (req) {
223 
224 	case PRU_ATTACH:
225 		if (unp != 0) {
226 			error = EISCONN;
227 			break;
228 		}
229 		error = unp_attach(so);
230 		break;
231 
232 	case PRU_DETACH:
233 		unp_detach(unp);
234 		break;
235 
236 	case PRU_BIND:
237 		error = unp_bind(unp, nam, p);
238 		break;
239 
240 	case PRU_LISTEN:
241 		if (unp->unp_vnode == 0)
242 			error = EINVAL;
243 		break;
244 
245 	case PRU_CONNECT:
246 		error = unp_connect(so, nam, p);
247 		break;
248 
249 	case PRU_CONNECT2:
250 		error = unp_connect2(so, (struct socket *)nam);
251 		break;
252 
253 	case PRU_DISCONNECT:
254 		unp_disconnect(unp);
255 		break;
256 
257 	case PRU_ACCEPT:
258 		unp_setpeeraddr(unp, nam);
259 		break;
260 
261 	case PRU_SHUTDOWN:
262 		socantsendmore(so);
263 		unp_shutdown(unp);
264 		break;
265 
266 	case PRU_RCVD:
267 		switch (so->so_type) {
268 
269 		case SOCK_DGRAM:
270 			panic("uipc 1");
271 			/*NOTREACHED*/
272 
273 		case SOCK_STREAM:
274 #define	rcv (&so->so_rcv)
275 #define snd (&so2->so_snd)
276 			if (unp->unp_conn == 0)
277 				break;
278 			so2 = unp->unp_conn->unp_socket;
279 			/*
280 			 * Adjust backpressure on sender
281 			 * and wakeup any waiting to write.
282 			 */
283 			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
284 			unp->unp_mbcnt = rcv->sb_mbcnt;
285 			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
286 			unp->unp_cc = rcv->sb_cc;
287 			sowwakeup(so2);
288 #undef snd
289 #undef rcv
290 			break;
291 
292 		default:
293 			panic("uipc 2");
294 		}
295 		break;
296 
297 	case PRU_SEND:
298 		/*
299 		 * Note: unp_internalize() rejects any control message
300 		 * other than SCM_RIGHTS, and only allows one.  This
301 		 * has the side-effect of preventing a caller from
302 		 * forging SCM_CREDS.
303 		 */
304 		if (control && (error = unp_internalize(control, p)))
305 			break;
306 		switch (so->so_type) {
307 
308 		case SOCK_DGRAM: {
309 			if (nam) {
310 				if ((so->so_state & SS_ISCONNECTED) != 0) {
311 					error = EISCONN;
312 					goto die;
313 				}
314 				error = unp_connect(so, nam, p);
315 				if (error) {
316 				die:
317 					m_freem(control);
318 					m_freem(m);
319 					break;
320 				}
321 			} else {
322 				if ((so->so_state & SS_ISCONNECTED) == 0) {
323 					error = ENOTCONN;
324 					goto die;
325 				}
326 			}
327 			error = unp_output(m, control, unp, p);
328 			if (nam)
329 				unp_disconnect(unp);
330 			break;
331 		}
332 
333 		case SOCK_STREAM:
334 #define	rcv (&so2->so_rcv)
335 #define	snd (&so->so_snd)
336 			if (unp->unp_conn == 0)
337 				panic("uipc 3");
338 			so2 = unp->unp_conn->unp_socket;
339 			if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
340 				/*
341 				 * Credentials are passed only once on
342 				 * SOCK_STREAM.
343 				 */
344 				unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
345 				control = unp_addsockcred(p, control);
346 			}
347 			/*
348 			 * Send to paired receive port, and then reduce
349 			 * send buffer hiwater marks to maintain backpressure.
350 			 * Wake up readers.
351 			 */
352 			if (control) {
353 				if (sbappendcontrol(rcv, m, control) == 0)
354 					m_freem(control);
355 			} else
356 				sbappend(rcv, m);
357 			snd->sb_mbmax -=
358 			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
359 			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
360 			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
361 			unp->unp_conn->unp_cc = rcv->sb_cc;
362 			sorwakeup(so2);
363 #undef snd
364 #undef rcv
365 			break;
366 
367 		default:
368 			panic("uipc 4");
369 		}
370 		break;
371 
372 	case PRU_ABORT:
373 		unp_drop(unp, ECONNABORTED);
374 
375 #ifdef DIAGNOSTIC
376 		if (so->so_pcb == 0)
377 			panic("uipc 5: drop killed pcb");
378 #endif
379 		unp_detach(unp);
380 		break;
381 
382 	case PRU_SENSE:
383 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
384 		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
385 			so2 = unp->unp_conn->unp_socket;
386 			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
387 		}
388 		((struct stat *) m)->st_dev = NODEV;
389 		if (unp->unp_ino == 0)
390 			unp->unp_ino = unp_ino++;
391 		((struct stat *) m)->st_atimespec =
392 		    ((struct stat *) m)->st_mtimespec =
393 		    ((struct stat *) m)->st_ctimespec = unp->unp_ctime;
394 		((struct stat *) m)->st_ino = unp->unp_ino;
395 		return (0);
396 
397 	case PRU_RCVOOB:
398 		error = EOPNOTSUPP;
399 		break;
400 
401 	case PRU_SENDOOB:
402 		m_freem(control);
403 		m_freem(m);
404 		error = EOPNOTSUPP;
405 		break;
406 
407 	case PRU_SOCKADDR:
408 		unp_setsockaddr(unp, nam);
409 		break;
410 
411 	case PRU_PEERADDR:
412 		unp_setpeeraddr(unp, nam);
413 		break;
414 
415 	default:
416 		panic("piusrreq");
417 	}
418 
419 release:
420 	return (error);
421 }
422 
423 /*
424  * Unix domain socket option processing.
425  */
426 int
427 uipc_ctloutput(op, so, level, optname, mp)
428 	int op;
429 	struct socket *so;
430 	int level, optname;
431 	struct mbuf **mp;
432 {
433 	struct unpcb *unp = sotounpcb(so);
434 	struct mbuf *m = *mp;
435 	int optval = 0, error = 0;
436 
437 	if (level != 0) {
438 		error = EINVAL;
439 		if (op == PRCO_SETOPT && m)
440 			(void) m_free(m);
441 	} else switch (op) {
442 
443 	case PRCO_SETOPT:
444 		switch (optname) {
445 		case LOCAL_CREDS:
446 			if (m == NULL || m->m_len != sizeof(int))
447 				error = EINVAL;
448 			else {
449 				optval = *mtod(m, int *);
450 				switch (optname) {
451 #define	OPTSET(bit) \
452 	if (optval) \
453 		unp->unp_flags |= (bit); \
454 	else \
455 		unp->unp_flags &= ~(bit);
456 
457 				case LOCAL_CREDS:
458 					OPTSET(UNP_WANTCRED);
459 					break;
460 				}
461 			}
462 			break;
463 #undef OPTSET
464 
465 		default:
466 			error = ENOPROTOOPT;
467 			break;
468 		}
469 		if (m)
470 			(void) m_free(m);
471 		break;
472 
473 	case PRCO_GETOPT:
474 		switch (optname) {
475 		case LOCAL_CREDS:
476 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
477 			m->m_len = sizeof(int);
478 			switch (optname) {
479 
480 #define	OPTBIT(bit)	(unp->unp_flags & (bit) ? 1 : 0)
481 
482 			case LOCAL_CREDS:
483 				optval = OPTBIT(UNP_WANTCRED);
484 				break;
485 			}
486 			*mtod(m, int *) = optval;
487 			break;
488 #undef OPTBIT
489 
490 		default:
491 			error = ENOPROTOOPT;
492 			break;
493 		}
494 		break;
495 	}
496 	return (error);
497 }
498 
499 /*
500  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
501  * for stream sockets, although the total for sender and receiver is
502  * actually only PIPSIZ.
503  * Datagram sockets really use the sendspace as the maximum datagram size,
504  * and don't really want to reserve the sendspace.  Their recvspace should
505  * be large enough for at least one max-size datagram plus address.
506  */
507 #define	PIPSIZ	4096
508 u_long	unpst_sendspace = PIPSIZ;
509 u_long	unpst_recvspace = PIPSIZ;
510 u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
511 u_long	unpdg_recvspace = 4*1024;
512 
513 int	unp_rights;			/* file descriptors in flight */
514 
515 int
516 unp_attach(so)
517 	struct socket *so;
518 {
519 	struct unpcb *unp;
520 	struct timeval tv;
521 	int error;
522 
523 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
524 		switch (so->so_type) {
525 
526 		case SOCK_STREAM:
527 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
528 			break;
529 
530 		case SOCK_DGRAM:
531 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
532 			break;
533 
534 		default:
535 			panic("unp_attach");
536 		}
537 		if (error)
538 			return (error);
539 	}
540 	unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
541 	if (unp == NULL)
542 		return (ENOBUFS);
543 	memset((caddr_t)unp, 0, sizeof(*unp));
544 	unp->unp_socket = so;
545 	so->so_pcb = unp;
546 	microtime(&tv);
547 	TIMEVAL_TO_TIMESPEC(&tv, &unp->unp_ctime);
548 	return (0);
549 }
550 
551 void
552 unp_detach(unp)
553 	struct unpcb *unp;
554 {
555 
556 	if (unp->unp_vnode) {
557 		unp->unp_vnode->v_socket = 0;
558 		vrele(unp->unp_vnode);
559 		unp->unp_vnode = 0;
560 	}
561 	if (unp->unp_conn)
562 		unp_disconnect(unp);
563 	while (unp->unp_refs)
564 		unp_drop(unp->unp_refs, ECONNRESET);
565 	soisdisconnected(unp->unp_socket);
566 	unp->unp_socket->so_pcb = 0;
567 	if (unp->unp_addr)
568 		free(unp->unp_addr, M_SONAME);
569 	if (unp_rights) {
570 		/*
571 		 * Normally the receive buffer is flushed later,
572 		 * in sofree, but if our receive buffer holds references
573 		 * to descriptors that are now garbage, we will dispose
574 		 * of those descriptor references after the garbage collector
575 		 * gets them (resulting in a "panic: closef: count < 0").
576 		 */
577 		sorflush(unp->unp_socket);
578 		free(unp, M_PCB);
579 		unp_gc();
580 	} else
581 		free(unp, M_PCB);
582 }
583 
584 int
585 unp_bind(unp, nam, p)
586 	struct unpcb *unp;
587 	struct mbuf *nam;
588 	struct proc *p;
589 {
590 	struct sockaddr_un *sun;
591 	struct vnode *vp;
592 	struct vattr vattr;
593 	size_t addrlen;
594 	int error;
595 	struct nameidata nd;
596 
597 	if (unp->unp_vnode != 0)
598 		return (EINVAL);
599 
600 	/*
601 	 * Allocate the new sockaddr.  We have to allocate one
602 	 * extra byte so that we can ensure that the pathname
603 	 * is nul-terminated.
604 	 */
605 	addrlen = nam->m_len + 1;
606 	sun = malloc(addrlen, M_SONAME, M_WAITOK);
607 	m_copydata(nam, 0, nam->m_len, (caddr_t)sun);
608 	*(((char *)sun) + nam->m_len) = '\0';
609 
610 	NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
611 	    sun->sun_path, p);
612 
613 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
614 	if ((error = namei(&nd)) != 0)
615 		goto bad;
616 	vp = nd.ni_vp;
617 	if (vp != NULL) {
618 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
619 		if (nd.ni_dvp == vp)
620 			vrele(nd.ni_dvp);
621 		else
622 			vput(nd.ni_dvp);
623 		vrele(vp);
624 		error = EADDRINUSE;
625 		goto bad;
626 	}
627 	VATTR_NULL(&vattr);
628 	vattr.va_type = VSOCK;
629 	vattr.va_mode = ACCESSPERMS;
630 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
631 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
632 	if (error)
633 		goto bad;
634 	vp = nd.ni_vp;
635 	vp->v_socket = unp->unp_socket;
636 	unp->unp_vnode = vp;
637 	unp->unp_addrlen = addrlen;
638 	unp->unp_addr = sun;
639 	VOP_UNLOCK(vp, 0);
640 	return (0);
641 
642  bad:
643 	free(sun, M_SONAME);
644 	return (error);
645 }
646 
647 int
648 unp_connect(so, nam, p)
649 	struct socket *so;
650 	struct mbuf *nam;
651 	struct proc *p;
652 {
653 	struct sockaddr_un *sun;
654 	struct vnode *vp;
655 	struct socket *so2, *so3;
656 	struct unpcb *unp2, *unp3;
657 	size_t addrlen;
658 	int error;
659 	struct nameidata nd;
660 
661 	/*
662 	 * Allocate a temporary sockaddr.  We have to allocate one extra
663 	 * byte so that we can ensure that the pathname is nul-terminated.
664 	 * When we establish the connection, we copy the other PCB's
665 	 * sockaddr to our own.
666 	 */
667 	addrlen = nam->m_len + 1;
668 	sun = malloc(addrlen, M_SONAME, M_WAITOK);
669 	m_copydata(nam, 0, nam->m_len, (caddr_t)sun);
670 	*(((char *)sun) + nam->m_len) = '\0';
671 
672 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, sun->sun_path, p);
673 
674 	if ((error = namei(&nd)) != 0)
675 		goto bad2;
676 	vp = nd.ni_vp;
677 	if (vp->v_type != VSOCK) {
678 		error = ENOTSOCK;
679 		goto bad;
680 	}
681 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
682 		goto bad;
683 	so2 = vp->v_socket;
684 	if (so2 == 0) {
685 		error = ECONNREFUSED;
686 		goto bad;
687 	}
688 	if (so->so_type != so2->so_type) {
689 		error = EPROTOTYPE;
690 		goto bad;
691 	}
692 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
693 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
694 		    (so3 = sonewconn(so2, 0)) == 0) {
695 			error = ECONNREFUSED;
696 			goto bad;
697 		}
698 		unp2 = sotounpcb(so2);
699 		unp3 = sotounpcb(so3);
700 		if (unp2->unp_addr) {
701 			unp3->unp_addr = malloc(unp2->unp_addrlen,
702 			    M_SONAME, M_WAITOK);
703 			memcpy(unp3->unp_addr, unp2->unp_addr,
704 			    unp2->unp_addrlen);
705 			unp3->unp_addrlen = unp2->unp_addrlen;
706 		}
707 		unp3->unp_flags = unp2->unp_flags;
708 		so2 = so3;
709 	}
710 	error = unp_connect2(so, so2);
711  bad:
712 	vput(vp);
713  bad2:
714 	free(sun, M_SONAME);
715 	return (error);
716 }
717 
718 int
719 unp_connect2(so, so2)
720 	struct socket *so;
721 	struct socket *so2;
722 {
723 	struct unpcb *unp = sotounpcb(so);
724 	struct unpcb *unp2;
725 
726 	if (so2->so_type != so->so_type)
727 		return (EPROTOTYPE);
728 	unp2 = sotounpcb(so2);
729 	unp->unp_conn = unp2;
730 	switch (so->so_type) {
731 
732 	case SOCK_DGRAM:
733 		unp->unp_nextref = unp2->unp_refs;
734 		unp2->unp_refs = unp;
735 		soisconnected(so);
736 		break;
737 
738 	case SOCK_STREAM:
739 		unp2->unp_conn = unp;
740 		soisconnected(so);
741 		soisconnected(so2);
742 		break;
743 
744 	default:
745 		panic("unp_connect2");
746 	}
747 	return (0);
748 }
749 
750 void
751 unp_disconnect(unp)
752 	struct unpcb *unp;
753 {
754 	struct unpcb *unp2 = unp->unp_conn;
755 
756 	if (unp2 == 0)
757 		return;
758 	unp->unp_conn = 0;
759 	switch (unp->unp_socket->so_type) {
760 
761 	case SOCK_DGRAM:
762 		if (unp2->unp_refs == unp)
763 			unp2->unp_refs = unp->unp_nextref;
764 		else {
765 			unp2 = unp2->unp_refs;
766 			for (;;) {
767 				if (unp2 == 0)
768 					panic("unp_disconnect");
769 				if (unp2->unp_nextref == unp)
770 					break;
771 				unp2 = unp2->unp_nextref;
772 			}
773 			unp2->unp_nextref = unp->unp_nextref;
774 		}
775 		unp->unp_nextref = 0;
776 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
777 		break;
778 
779 	case SOCK_STREAM:
780 		soisdisconnected(unp->unp_socket);
781 		unp2->unp_conn = 0;
782 		soisdisconnected(unp2->unp_socket);
783 		break;
784 	}
785 }
786 
787 #ifdef notdef
788 unp_abort(unp)
789 	struct unpcb *unp;
790 {
791 
792 	unp_detach(unp);
793 }
794 #endif
795 
796 void
797 unp_shutdown(unp)
798 	struct unpcb *unp;
799 {
800 	struct socket *so;
801 
802 	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
803 	    (so = unp->unp_conn->unp_socket))
804 		socantrcvmore(so);
805 }
806 
807 void
808 unp_drop(unp, errno)
809 	struct unpcb *unp;
810 	int errno;
811 {
812 	struct socket *so = unp->unp_socket;
813 
814 	so->so_error = errno;
815 	unp_disconnect(unp);
816 	if (so->so_head) {
817 		so->so_pcb = 0;
818 		sofree(so);
819 		if (unp->unp_addr)
820 			free(unp->unp_addr, M_SONAME);
821 		free(unp, M_PCB);
822 	}
823 }
824 
825 #ifdef notdef
826 unp_drain()
827 {
828 
829 }
830 #endif
831 
832 int
833 unp_externalize(rights)
834 	struct mbuf *rights;
835 {
836 	struct proc *p = curproc;		/* XXX */
837 	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
838 	int i, *fdp;
839 	struct file **rp;
840 	struct file *fp;
841 	int nfds, error = 0;
842 
843 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
844 	    sizeof(struct file *);
845 	rp = (struct file **)CMSG_DATA(cm);
846 
847 	fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK);
848 
849 	/* Make sure the recipient should be able to see the descriptors.. */
850 	if (p->p_cwdi->cwdi_rdir != NULL) {
851 		rp = (struct file **)CMSG_DATA(cm);
852 		for (i = 0; i < nfds; i++) {
853 			fp = *rp++;
854 			/*
855 			 * If we are in a chroot'ed directory, and
856 			 * someone wants to pass us a directory, make
857 			 * sure it's inside the subtree we're allowed
858 			 * to access.
859 			 */
860 			if (fp->f_type == DTYPE_VNODE) {
861 				struct vnode *vp = (struct vnode *)fp->f_data;
862 				if ((vp->v_type == VDIR) &&
863 				    !vn_isunder(vp, p->p_cwdi->cwdi_rdir, p)) {
864 					error = EPERM;
865 					break;
866 				}
867 			}
868 		}
869 	}
870 
871  restart:
872 	rp = (struct file **)CMSG_DATA(cm);
873 	if (error != 0) {
874 		for (i = 0; i < nfds; i++) {
875 			fp = *rp;
876 			/*
877 			 * zero the pointer before calling unp_discard,
878 			 * since it may end up in unp_gc()..
879 			 */
880 			*rp++ = 0;
881 			unp_discard(fp);
882 		}
883 		goto out;
884 	}
885 
886 	/*
887 	 * First loop -- allocate file descriptor table slots for the
888 	 * new descriptors.
889 	 */
890 	for (i = 0; i < nfds; i++) {
891 		fp = *rp++;
892 		if ((error = fdalloc(p, 0, &fdp[i])) != 0) {
893 			/*
894 			 * Back out what we've done so far.
895 			 */
896 			for (--i; i >= 0; i--)
897 				fdremove(p->p_fd, fdp[i]);
898 
899 			if (error == ENOSPC) {
900 				fdexpand(p);
901 				error = 0;
902 			} else {
903 				/*
904 				 * This is the error that has historically
905 				 * been returned, and some callers may
906 				 * expect it.
907 				 */
908 				error = EMSGSIZE;
909 			}
910 			goto restart;
911 		}
912 
913 		/*
914 		 * Make the slot reference the descriptor so that
915 		 * fdalloc() works properly.. We finalize it all
916 		 * in the loop below.
917 		 */
918 		p->p_fd->fd_ofiles[fdp[i]] = fp;
919 	}
920 
921 	/*
922 	 * Now that adding them has succeeded, update all of the
923 	 * descriptor passing state.
924 	 */
925 	rp = (struct file **)CMSG_DATA(cm);
926 	for (i = 0; i < nfds; i++) {
927 		fp = *rp++;
928 		fp->f_msgcount--;
929 		unp_rights--;
930 	}
931 
932 	/*
933 	 * Copy temporary array to message and adjust length, in case of
934 	 * transition from large struct file pointers to ints.
935 	 */
936 	memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int));
937 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
938 	rights->m_len = CMSG_SPACE(nfds * sizeof(int));
939  out:
940 	free(fdp, M_TEMP);
941 	return (error);
942 }
943 
944 int
945 unp_internalize(control, p)
946 	struct mbuf *control;
947 	struct proc *p;
948 {
949 	struct filedesc *fdescp = p->p_fd;
950 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
951 	struct file **rp;
952 	struct file *fp;
953 	int i, fd, *fdp;
954 	int nfds;
955 	u_int neededspace;
956 
957 	/* Sanity check the control message header */
958 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
959 	    cm->cmsg_len != control->m_len)
960 		return (EINVAL);
961 
962 	/* Verify that the file descriptors are valid */
963 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
964 	fdp = (int *)CMSG_DATA(cm);
965 	for (i = 0; i < nfds; i++) {
966 		fd = *fdp++;
967 		if ((fp = fd_getfile(fdescp, fd)) == NULL)
968 			return (EBADF);
969 		simple_unlock(&fp->f_slock);
970 	}
971 
972 	/* Make sure we have room for the struct file pointers */
973  morespace:
974 	neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) -
975 	    control->m_len;
976 	if (neededspace > M_TRAILINGSPACE(control)) {
977 
978 		/* if we already have a cluster, the message is just too big */
979 		if (control->m_flags & M_EXT)
980 			return (E2BIG);
981 
982 		/* allocate a cluster and try again */
983 		m_clget(control, M_WAIT);
984 		if ((control->m_flags & M_EXT) == 0)
985 			return (ENOBUFS);	/* allocation failed */
986 
987 		/* copy the data to the cluster */
988 		memcpy(mtod(control, char *), cm, cm->cmsg_len);
989 		cm = mtod(control, struct cmsghdr *);
990 		goto morespace;
991 	}
992 
993 	/* adjust message & mbuf to note amount of space actually used. */
994 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *));
995 	control->m_len = CMSG_SPACE(nfds * sizeof(struct file *));
996 
997 	/*
998 	 * Transform the file descriptors into struct file pointers, in
999 	 * reverse order so that if pointers are bigger than ints, the
1000 	 * int won't get until we're done.
1001 	 */
1002 	fdp = ((int *)CMSG_DATA(cm)) + nfds - 1;
1003 	rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1;
1004 	for (i = 0; i < nfds; i++) {
1005 		fp = fdescp->fd_ofiles[*fdp--];
1006 		simple_lock(&fp->f_slock);
1007 #ifdef DIAGNOSTIC
1008 		if (fp->f_iflags & FIF_WANTCLOSE)
1009 			panic("unp_internalize: file already closed");
1010 #endif
1011 		*rp-- = fp;
1012 		fp->f_count++;
1013 		fp->f_msgcount++;
1014 		simple_unlock(&fp->f_slock);
1015 		unp_rights++;
1016 	}
1017 	return (0);
1018 }
1019 
1020 struct mbuf *
1021 unp_addsockcred(p, control)
1022 	struct proc *p;
1023 	struct mbuf *control;
1024 {
1025 	struct cmsghdr *cmp;
1026 	struct sockcred *sc;
1027 	struct mbuf *m, *n;
1028 	int len, space, i;
1029 
1030 	len = CMSG_LEN(SOCKCREDSIZE(p->p_ucred->cr_ngroups));
1031 	space = CMSG_SPACE(SOCKCREDSIZE(p->p_ucred->cr_ngroups));
1032 
1033 	m = m_get(M_WAIT, MT_CONTROL);
1034 	if (space > MLEN) {
1035 		if (space > MCLBYTES)
1036 			MEXTMALLOC(m, space, M_WAITOK);
1037 		else
1038 			m_clget(m, M_WAIT);
1039 		if ((m->m_flags & M_EXT) == 0) {
1040 			m_free(m);
1041 			return (control);
1042 		}
1043 	}
1044 
1045 	m->m_len = space;
1046 	m->m_next = NULL;
1047 	cmp = mtod(m, struct cmsghdr *);
1048 	sc = (struct sockcred *)CMSG_DATA(cmp);
1049 	cmp->cmsg_len = len;
1050 	cmp->cmsg_level = SOL_SOCKET;
1051 	cmp->cmsg_type = SCM_CREDS;
1052 	sc->sc_uid = p->p_cred->p_ruid;
1053 	sc->sc_euid = p->p_ucred->cr_uid;
1054 	sc->sc_gid = p->p_cred->p_rgid;
1055 	sc->sc_egid = p->p_ucred->cr_gid;
1056 	sc->sc_ngroups = p->p_ucred->cr_ngroups;
1057 	for (i = 0; i < sc->sc_ngroups; i++)
1058 		sc->sc_groups[i] = p->p_ucred->cr_groups[i];
1059 
1060 	/*
1061 	 * If a control message already exists, append us to the end.
1062 	 */
1063 	if (control != NULL) {
1064 		for (n = control; n->m_next != NULL; n = n->m_next)
1065 			;
1066 		n->m_next = m;
1067 	} else
1068 		control = m;
1069 
1070 	return (control);
1071 }
1072 
1073 int	unp_defer, unp_gcing;
1074 extern	struct domain unixdomain;
1075 
1076 /*
1077  * Comment added long after the fact explaining what's going on here.
1078  * Do a mark-sweep GC of file descriptors on the system, to free up
1079  * any which are caught in flight to an about-to-be-closed socket.
1080  *
1081  * Traditional mark-sweep gc's start at the "root", and mark
1082  * everything reachable from the root (which, in our case would be the
1083  * process table).  The mark bits are cleared during the sweep.
1084  *
1085  * XXX For some inexplicable reason (perhaps because the file
1086  * descriptor tables used to live in the u area which could be swapped
1087  * out and thus hard to reach), we do multiple scans over the set of
1088  * descriptors, using use *two* mark bits per object (DEFER and MARK).
1089  * Whenever we find a descriptor which references other descriptors,
1090  * the ones it references are marked with both bits, and we iterate
1091  * over the whole file table until there are no more DEFER bits set.
1092  * We also make an extra pass *before* the GC to clear the mark bits,
1093  * which could have been cleared at almost no cost during the previous
1094  * sweep.
1095  *
1096  * XXX MP: this needs to run with locks such that no other thread of
1097  * control can create or destroy references to file descriptors. it
1098  * may be necessary to defer the GC until later (when the locking
1099  * situation is more hospitable); it may be necessary to push this
1100  * into a separate thread.
1101  */
1102 void
1103 unp_gc()
1104 {
1105 	struct file *fp, *nextfp;
1106 	struct socket *so, *so1;
1107 	struct file **extra_ref, **fpp;
1108 	int nunref, i;
1109 
1110 	if (unp_gcing)
1111 		return;
1112 	unp_gcing = 1;
1113 	unp_defer = 0;
1114 
1115 	/* Clear mark bits */
1116 	LIST_FOREACH(fp, &filehead, f_list)
1117 		fp->f_flag &= ~(FMARK|FDEFER);
1118 
1119 	/*
1120 	 * Iterate over the set of descriptors, marking ones believed
1121 	 * (based on refcount) to be referenced from a process, and
1122 	 * marking for rescan descriptors which are queued on a socket.
1123 	 */
1124 	do {
1125 		LIST_FOREACH(fp, &filehead, f_list) {
1126 			if (fp->f_flag & FDEFER) {
1127 				fp->f_flag &= ~FDEFER;
1128 				unp_defer--;
1129 #ifdef DIAGNOSTIC
1130 				if (fp->f_count == 0)
1131 					panic("unp_gc: deferred unreferenced socket");
1132 #endif
1133 			} else {
1134 				if (fp->f_count == 0)
1135 					continue;
1136 				if (fp->f_flag & FMARK)
1137 					continue;
1138 				if (fp->f_count == fp->f_msgcount)
1139 					continue;
1140 			}
1141 			fp->f_flag |= FMARK;
1142 
1143 			if (fp->f_type != DTYPE_SOCKET ||
1144 			    (so = (struct socket *)fp->f_data) == 0)
1145 				continue;
1146 			if (so->so_proto->pr_domain != &unixdomain ||
1147 			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
1148 				continue;
1149 #ifdef notdef
1150 			if (so->so_rcv.sb_flags & SB_LOCK) {
1151 				/*
1152 				 * This is problematical; it's not clear
1153 				 * we need to wait for the sockbuf to be
1154 				 * unlocked (on a uniprocessor, at least),
1155 				 * and it's also not clear what to do
1156 				 * if sbwait returns an error due to receipt
1157 				 * of a signal.  If sbwait does return
1158 				 * an error, we'll go into an infinite
1159 				 * loop.  Delete all of this for now.
1160 				 */
1161 				(void) sbwait(&so->so_rcv);
1162 				goto restart;
1163 			}
1164 #endif
1165 			unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
1166 			/*
1167 			 * mark descriptors referenced from sockets queued on the accept queue as well.
1168 			 */
1169 			if (so->so_options & SO_ACCEPTCONN) {
1170 				TAILQ_FOREACH(so1, &so->so_q0, so_qe) {
1171 					unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
1172 				}
1173 				TAILQ_FOREACH(so1, &so->so_q, so_qe) {
1174 					unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
1175 				}
1176 			}
1177 
1178 		}
1179 	} while (unp_defer);
1180 	/*
1181 	 * Sweep pass.  Find unmarked descriptors, and free them.
1182 	 *
1183 	 * We grab an extra reference to each of the file table entries
1184 	 * that are not otherwise accessible and then free the rights
1185 	 * that are stored in messages on them.
1186 	 *
1187 	 * The bug in the original code is a little tricky, so I'll describe
1188 	 * what's wrong with it here.
1189 	 *
1190 	 * It is incorrect to simply unp_discard each entry for f_msgcount
1191 	 * times -- consider the case of sockets A and B that contain
1192 	 * references to each other.  On a last close of some other socket,
1193 	 * we trigger a gc since the number of outstanding rights (unp_rights)
1194 	 * is non-zero.  If during the sweep phase the gc code un_discards,
1195 	 * we end up doing a (full) closef on the descriptor.  A closef on A
1196 	 * results in the following chain.  Closef calls soo_close, which
1197 	 * calls soclose.   Soclose calls first (through the switch
1198 	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
1199 	 * returns because the previous instance had set unp_gcing, and
1200 	 * we return all the way back to soclose, which marks the socket
1201 	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
1202 	 * to free up the rights that are queued in messages on the socket A,
1203 	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
1204 	 * switch unp_dispose, which unp_scans with unp_discard.  This second
1205 	 * instance of unp_discard just calls closef on B.
1206 	 *
1207 	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
1208 	 * which results in another closef on A.  Unfortunately, A is already
1209 	 * being closed, and the descriptor has already been marked with
1210 	 * SS_NOFDREF, and soclose panics at this point.
1211 	 *
1212 	 * Here, we first take an extra reference to each inaccessible
1213 	 * descriptor.  Then, if the inaccessible descriptor is a
1214 	 * socket, we call sorflush in case it is a Unix domain
1215 	 * socket.  After we destroy all the rights carried in
1216 	 * messages, we do a last closef to get rid of our extra
1217 	 * reference.  This is the last close, and the unp_detach etc
1218 	 * will shut down the socket.
1219 	 *
1220 	 * 91/09/19, bsy@cs.cmu.edu
1221 	 */
1222 	extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
1223 	for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0;
1224 	    fp = nextfp) {
1225 		nextfp = LIST_NEXT(fp, f_list);
1226 		simple_lock(&fp->f_slock);
1227 		if (fp->f_count != 0 &&
1228 		    fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
1229 			*fpp++ = fp;
1230 			nunref++;
1231 			fp->f_count++;
1232 		}
1233 		simple_unlock(&fp->f_slock);
1234 	}
1235 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1236 		fp = *fpp;
1237 		simple_lock(&fp->f_slock);
1238 		FILE_USE(fp);
1239 		if (fp->f_type == DTYPE_SOCKET)
1240 			sorflush((struct socket *)fp->f_data);
1241 		FILE_UNUSE(fp, NULL);
1242 	}
1243 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1244 		fp = *fpp;
1245 		simple_lock(&fp->f_slock);
1246 		FILE_USE(fp);
1247 		(void) closef(fp, (struct proc *)0);
1248 	}
1249 	free((caddr_t)extra_ref, M_FILE);
1250 	unp_gcing = 0;
1251 }
1252 
1253 void
1254 unp_dispose(m)
1255 	struct mbuf *m;
1256 {
1257 
1258 	if (m)
1259 		unp_scan(m, unp_discard, 1);
1260 }
1261 
1262 void
1263 unp_scan(m0, op, discard)
1264 	struct mbuf *m0;
1265 	void (*op) __P((struct file *));
1266 	int discard;
1267 {
1268 	struct mbuf *m;
1269 	struct file **rp;
1270 	struct cmsghdr *cm;
1271 	int i;
1272 	int qfds;
1273 
1274 	while (m0) {
1275 		for (m = m0; m; m = m->m_next) {
1276 			if (m->m_type == MT_CONTROL &&
1277 			    m->m_len >= sizeof(*cm)) {
1278 				cm = mtod(m, struct cmsghdr *);
1279 				if (cm->cmsg_level != SOL_SOCKET ||
1280 				    cm->cmsg_type != SCM_RIGHTS)
1281 					continue;
1282 				qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm)))
1283 				    / sizeof(struct file *);
1284 				rp = (struct file **)CMSG_DATA(cm);
1285 				for (i = 0; i < qfds; i++) {
1286 					struct file *fp = *rp;
1287 					if (discard)
1288 						*rp = 0;
1289 					(*op)(fp);
1290 					rp++;
1291 				}
1292 				break;		/* XXX, but saves time */
1293 			}
1294 		}
1295 		m0 = m0->m_nextpkt;
1296 	}
1297 }
1298 
1299 void
1300 unp_mark(fp)
1301 	struct file *fp;
1302 {
1303 	if (fp == NULL)
1304 		return;
1305 
1306 	if (fp->f_flag & FMARK)
1307 		return;
1308 
1309 	/* If we're already deferred, don't screw up the defer count */
1310 	if (fp->f_flag & FDEFER)
1311 		return;
1312 
1313 	/*
1314 	 * Minimize the number of deferrals...  Sockets are the only
1315 	 * type of descriptor which can hold references to another
1316 	 * descriptor, so just mark other descriptors, and defer
1317 	 * unmarked sockets for the next pass.
1318 	 */
1319 	if (fp->f_type == DTYPE_SOCKET) {
1320 		unp_defer++;
1321 		if (fp->f_count == 0)
1322 			panic("unp_mark: queued unref");
1323 		fp->f_flag |= FDEFER;
1324 	} else {
1325 		fp->f_flag |= FMARK;
1326 	}
1327 	return;
1328 }
1329 
1330 void
1331 unp_discard(fp)
1332 	struct file *fp;
1333 {
1334 	if (fp == NULL)
1335 		return;
1336 	simple_lock(&fp->f_slock);
1337 	fp->f_usecount++;	/* i.e. FILE_USE(fp) sans locking */
1338 	fp->f_msgcount--;
1339 	simple_unlock(&fp->f_slock);
1340 	unp_rights--;
1341 	(void) closef(fp, (struct proc *)0);
1342 }
1343