xref: /dflybsd-src/sys/kern/uipc_usrreq.c (revision b0a72a1d269b3c353ad01bef62856e2b14005e6b)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
30  * $FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.54.2.10 2003/03/04 17:28:09 nectar Exp $
31  */
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/domain.h>
37 #include <sys/fcntl.h>
38 #include <sys/malloc.h>		/* XXX must be before <sys/file.h> */
39 #include <sys/proc.h>
40 #include <sys/file.h>
41 #include <sys/filedesc.h>
42 #include <sys/mbuf.h>
43 #include <sys/nlookup.h>
44 #include <sys/protosw.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/resourcevar.h>
48 #include <sys/stat.h>
49 #include <sys/mount.h>
50 #include <sys/sysctl.h>
51 #include <sys/un.h>
52 #include <sys/unpcb.h>
53 #include <sys/vnode.h>
54 
55 #include <sys/file2.h>
56 #include <sys/spinlock2.h>
57 #include <sys/socketvar2.h>
58 #include <sys/msgport2.h>
59 
60 #define UNP_DETACHED		UNP_PRIVATE1
61 #define UNP_CONNECTING		UNP_PRIVATE2
62 #define UNP_DROPPED		UNP_PRIVATE3
63 
64 #define UNP_ISATTACHED(unp)	\
65     ((unp) != NULL && ((unp)->unp_flags & UNP_DETACHED) == 0)
66 
67 #ifdef INVARIANTS
68 #define UNP_ASSERT_TOKEN_HELD(unp) \
69     ASSERT_LWKT_TOKEN_HELD(lwkt_token_pool_lookup((unp)))
70 #else	/* !INVARIANTS */
71 #define UNP_ASSERT_TOKEN_HELD(unp)
72 #endif	/* INVARIANTS */
73 
74 typedef struct unp_defdiscard {
75 	struct unp_defdiscard *next;
76 	struct file *fp;
77 } *unp_defdiscard_t;
78 
79 static	MALLOC_DEFINE(M_UNPCB, "unpcb", "unpcb struct");
80 static	unp_gen_t unp_gencnt;
81 static	u_int unp_count;
82 
83 static	struct unp_head unp_shead, unp_dhead;
84 
85 static struct lwkt_token unp_token = LWKT_TOKEN_INITIALIZER(unp_token);
86 static int unp_defdiscard_nest;
87 static unp_defdiscard_t unp_defdiscard_base;
88 
89 /*
90  * Unix communications domain.
91  *
92  * TODO:
93  *	RDM
94  *	rethink name space problems
95  *	need a proper out-of-band
96  *	lock pushdown
97  */
98 static struct	sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
99 static ino_t	unp_ino = 1;		/* prototype for fake inode numbers */
100 
101 static int     unp_attach (struct socket *, struct pru_attach_info *);
102 static void    unp_detach (struct unpcb *);
103 static int     unp_bind (struct unpcb *,struct sockaddr *, struct thread *);
104 static int     unp_connect (struct socket *,struct sockaddr *,
105 				struct thread *);
106 static void    unp_disconnect(struct unpcb *, int);
107 static void    unp_shutdown (struct unpcb *);
108 static void    unp_gc (void);
109 static int     unp_gc_clearmarks(struct file *, void *);
110 static int     unp_gc_checkmarks(struct file *, void *);
111 static int     unp_gc_checkrefs(struct file *, void *);
112 static int     unp_revoke_gc_check(struct file *, void *);
113 static void    unp_scan (struct mbuf *, void (*)(struct file *, void *),
114 				void *data);
115 static void    unp_mark (struct file *, void *data);
116 static void    unp_discard (struct file *, void *);
117 static int     unp_internalize (struct mbuf *, struct thread *);
118 static int     unp_listen (struct unpcb *, struct thread *);
119 static void    unp_fp_externalize(struct lwp *lp, struct file *fp, int fd);
120 static int     unp_find_lockref(struct sockaddr *nam, struct thread *td,
121 		   short type, struct unpcb **unp_ret);
122 static int     unp_connect_pair(struct unpcb *unp, struct unpcb *unp2);
123 static void    unp_drop(struct unpcb *unp, int error);
124 
125 /*
126  * SMP Considerations:
127  *
128  *	Since unp_token will be automaticly released upon execution of
129  *	blocking code, we need to reference unp_conn before any possible
130  *	blocking code to prevent it from being ripped behind our back.
131  *
132  *	Any adjustment to unp->unp_conn requires both the global unp_token
133  *	AND the per-unp token (lwkt_token_pool_lookup(unp)) to be held.
134  *
135  *	Any access to so_pcb to obtain unp requires the pool token for
136  *	unp to be held.
137  */
138 
139 static __inline void
140 unp_reference(struct unpcb *unp)
141 {
142 	/* 0->1 transition will not work */
143 	KKASSERT(unp->unp_refcnt > 0);
144 	atomic_add_int(&unp->unp_refcnt, 1);
145 }
146 
147 static __inline void
148 unp_free(struct unpcb *unp)
149 {
150 	KKASSERT(unp->unp_refcnt > 0);
151 	if (atomic_fetchadd_int(&unp->unp_refcnt, -1) == 1)
152 		unp_detach(unp);
153 }
154 
155 static __inline struct unpcb *
156 unp_getsocktoken(struct socket *so)
157 {
158 	struct unpcb *unp;
159 
160 	/*
161 	 * The unp pointer is invalid until we verify that it is
162 	 * good by re-checking so_pcb AFTER obtaining the token.
163 	 */
164 	while ((unp = so->so_pcb) != NULL) {
165 		lwkt_getpooltoken(unp);
166 		if (unp == so->so_pcb)
167 			break;
168 		lwkt_relpooltoken(unp);
169 	}
170 	return unp;
171 }
172 
173 static __inline void
174 unp_reltoken(struct unpcb *unp)
175 {
176 	if (unp != NULL)
177 		lwkt_relpooltoken(unp);
178 }
179 
180 static __inline void
181 unp_setflags(struct unpcb *unp, int flags)
182 {
183 	atomic_set_int(&unp->unp_flags, flags);
184 }
185 
186 static __inline void
187 unp_clrflags(struct unpcb *unp, int flags)
188 {
189 	atomic_clear_int(&unp->unp_flags, flags);
190 }
191 
192 /*
193  * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort()
194  *	 will sofree() it when we return.
195  */
196 static void
197 uipc_abort(netmsg_t msg)
198 {
199 	struct unpcb *unp;
200 	int error;
201 
202 	lwkt_gettoken(&unp_token);
203 	unp = unp_getsocktoken(msg->base.nm_so);
204 
205 	if (UNP_ISATTACHED(unp)) {
206 		unp_setflags(unp, UNP_DETACHED);
207 		unp_drop(unp, ECONNABORTED);
208 		unp_free(unp);
209 		error = 0;
210 	} else {
211 		error = EINVAL;
212 	}
213 
214 	unp_reltoken(unp);
215 	lwkt_reltoken(&unp_token);
216 
217 	lwkt_replymsg(&msg->lmsg, error);
218 }
219 
220 static void
221 uipc_accept(netmsg_t msg)
222 {
223 	struct unpcb *unp;
224 	int error;
225 
226 	lwkt_gettoken(&unp_token);
227 	unp = unp_getsocktoken(msg->base.nm_so);
228 
229 	if (!UNP_ISATTACHED(unp)) {
230 		error = EINVAL;
231 	} else {
232 		struct unpcb *unp2 = unp->unp_conn;
233 
234 		/*
235 		 * Pass back name of connected socket,
236 		 * if it was bound and we are still connected
237 		 * (our peer may have closed already!).
238 		 */
239 		if (unp2 && unp2->unp_addr) {
240 			unp_reference(unp2);
241 			*msg->accept.nm_nam = dup_sockaddr(
242 				(struct sockaddr *)unp2->unp_addr);
243 			unp_free(unp2);
244 		} else {
245 			*msg->accept.nm_nam = dup_sockaddr(&sun_noname);
246 		}
247 		error = 0;
248 	}
249 
250 	unp_reltoken(unp);
251 	lwkt_reltoken(&unp_token);
252 
253 	lwkt_replymsg(&msg->lmsg, error);
254 }
255 
256 static void
257 uipc_attach(netmsg_t msg)
258 {
259 	int error;
260 
261 	lwkt_gettoken(&unp_token);
262 
263 	KASSERT(msg->base.nm_so->so_pcb == NULL, ("double unp attach"));
264 	error = unp_attach(msg->base.nm_so, msg->attach.nm_ai);
265 
266 	lwkt_reltoken(&unp_token);
267 	lwkt_replymsg(&msg->lmsg, error);
268 }
269 
270 static void
271 uipc_bind(netmsg_t msg)
272 {
273 	struct unpcb *unp;
274 	int error;
275 
276 	lwkt_gettoken(&unp_token);
277 	unp = unp_getsocktoken(msg->base.nm_so);
278 
279 	if (UNP_ISATTACHED(unp))
280 		error = unp_bind(unp, msg->bind.nm_nam, msg->bind.nm_td);
281 	else
282 		error = EINVAL;
283 
284 	unp_reltoken(unp);
285 	lwkt_reltoken(&unp_token);
286 
287 	lwkt_replymsg(&msg->lmsg, error);
288 }
289 
290 static void
291 uipc_connect(netmsg_t msg)
292 {
293 	int error;
294 
295 	error = unp_connect(msg->base.nm_so, msg->connect.nm_nam,
296 	    msg->connect.nm_td);
297 	lwkt_replymsg(&msg->lmsg, error);
298 }
299 
300 static void
301 uipc_connect2(netmsg_t msg)
302 {
303 	int error;
304 
305 	error = unp_connect2(msg->connect2.nm_so1, msg->connect2.nm_so2);
306 	lwkt_replymsg(&msg->lmsg, error);
307 }
308 
309 /* control is EOPNOTSUPP */
310 
311 static void
312 uipc_detach(netmsg_t msg)
313 {
314 	struct unpcb *unp;
315 	int error;
316 
317 	lwkt_gettoken(&unp_token);
318 	unp = unp_getsocktoken(msg->base.nm_so);
319 
320 	if (UNP_ISATTACHED(unp)) {
321 		unp_setflags(unp, UNP_DETACHED);
322 		unp_drop(unp, 0);
323 		unp_free(unp);
324 		error = 0;
325 	} else {
326 		error = EINVAL;
327 	}
328 
329 	unp_reltoken(unp);
330 	lwkt_reltoken(&unp_token);
331 
332 	lwkt_replymsg(&msg->lmsg, error);
333 }
334 
335 static void
336 uipc_disconnect(netmsg_t msg)
337 {
338 	struct unpcb *unp;
339 	int error;
340 
341 	lwkt_gettoken(&unp_token);
342 	unp = unp_getsocktoken(msg->base.nm_so);
343 
344 	if (UNP_ISATTACHED(unp)) {
345 		unp_disconnect(unp, 0);
346 		error = 0;
347 	} else {
348 		error = EINVAL;
349 	}
350 
351 	unp_reltoken(unp);
352 	lwkt_reltoken(&unp_token);
353 
354 	lwkt_replymsg(&msg->lmsg, error);
355 }
356 
357 static void
358 uipc_listen(netmsg_t msg)
359 {
360 	struct unpcb *unp;
361 	int error;
362 
363 	lwkt_gettoken(&unp_token);
364 	unp = unp_getsocktoken(msg->base.nm_so);
365 
366 	if (!UNP_ISATTACHED(unp) || unp->unp_vnode == NULL)
367 		error = EINVAL;
368 	else
369 		error = unp_listen(unp, msg->listen.nm_td);
370 
371 	unp_reltoken(unp);
372 	lwkt_reltoken(&unp_token);
373 
374 	lwkt_replymsg(&msg->lmsg, error);
375 }
376 
377 static void
378 uipc_peeraddr(netmsg_t msg)
379 {
380 	struct unpcb *unp;
381 	int error;
382 
383 	lwkt_gettoken(&unp_token);
384 	unp = unp_getsocktoken(msg->base.nm_so);
385 
386 	if (!UNP_ISATTACHED(unp)) {
387 		error = EINVAL;
388 	} else if (unp->unp_conn && unp->unp_conn->unp_addr) {
389 		struct unpcb *unp2 = unp->unp_conn;
390 
391 		unp_reference(unp2);
392 		*msg->peeraddr.nm_nam = dup_sockaddr(
393 				(struct sockaddr *)unp2->unp_addr);
394 		unp_free(unp2);
395 		error = 0;
396 	} else {
397 		/*
398 		 * XXX: It seems that this test always fails even when
399 		 * connection is established.  So, this else clause is
400 		 * added as workaround to return PF_LOCAL sockaddr.
401 		 */
402 		*msg->peeraddr.nm_nam = dup_sockaddr(&sun_noname);
403 		error = 0;
404 	}
405 
406 	unp_reltoken(unp);
407 	lwkt_reltoken(&unp_token);
408 
409 	lwkt_replymsg(&msg->lmsg, error);
410 }
411 
412 static void
413 uipc_rcvd(netmsg_t msg)
414 {
415 	struct unpcb *unp, *unp2;
416 	struct socket *so;
417 	struct socket *so2;
418 	int error;
419 
420 	/*
421 	 * so_pcb is only modified with both the global and the unp
422 	 * pool token held.
423 	 */
424 	so = msg->base.nm_so;
425 	unp = unp_getsocktoken(so);
426 
427 	if (!UNP_ISATTACHED(unp)) {
428 		error = EINVAL;
429 		goto done;
430 	}
431 
432 	switch (so->so_type) {
433 	case SOCK_DGRAM:
434 		panic("uipc_rcvd DGRAM?");
435 		/*NOTREACHED*/
436 	case SOCK_STREAM:
437 	case SOCK_SEQPACKET:
438 		if (unp->unp_conn == NULL)
439 			break;
440 		unp2 = unp->unp_conn;	/* protected by pool token */
441 
442 		/*
443 		 * Because we are transfering mbufs directly to the
444 		 * peer socket we have to use SSB_STOP on the sender
445 		 * to prevent it from building up infinite mbufs.
446 		 *
447 		 * As in several places in this module w ehave to ref unp2
448 		 * to ensure that it does not get ripped out from under us
449 		 * if we block on the so2 token or in sowwakeup().
450 		 */
451 		so2 = unp2->unp_socket;
452 		unp_reference(unp2);
453 		lwkt_gettoken(&so2->so_rcv.ssb_token);
454 		if (so->so_rcv.ssb_cc < so2->so_snd.ssb_hiwat &&
455 		    so->so_rcv.ssb_mbcnt < so2->so_snd.ssb_mbmax
456 		) {
457 			atomic_clear_int(&so2->so_snd.ssb_flags, SSB_STOP);
458 
459 			sowwakeup(so2);
460 		}
461 		lwkt_reltoken(&so2->so_rcv.ssb_token);
462 		unp_free(unp2);
463 		break;
464 	default:
465 		panic("uipc_rcvd unknown socktype");
466 		/*NOTREACHED*/
467 	}
468 	error = 0;
469 done:
470 	unp_reltoken(unp);
471 	lwkt_replymsg(&msg->lmsg, error);
472 }
473 
474 /* pru_rcvoob is EOPNOTSUPP */
475 
476 static void
477 uipc_send(netmsg_t msg)
478 {
479 	struct unpcb *unp, *unp2;
480 	struct socket *so;
481 	struct socket *so2;
482 	struct mbuf *control;
483 	struct mbuf *m;
484 	int error = 0;
485 
486 	so = msg->base.nm_so;
487 	control = msg->send.nm_control;
488 	m = msg->send.nm_m;
489 
490 	/*
491 	 * so_pcb is only modified with both the global and the unp
492 	 * pool token held.
493 	 */
494 	so = msg->base.nm_so;
495 	unp = unp_getsocktoken(so);
496 
497 	if (!UNP_ISATTACHED(unp)) {
498 		error = EINVAL;
499 		goto release;
500 	}
501 
502 	if (msg->send.nm_flags & PRUS_OOB) {
503 		error = EOPNOTSUPP;
504 		goto release;
505 	}
506 
507 	wakeup_start_delayed();
508 
509 	if (control && (error = unp_internalize(control, msg->send.nm_td)))
510 		goto release;
511 
512 	switch (so->so_type) {
513 	case SOCK_DGRAM:
514 	{
515 		struct sockaddr *from;
516 
517 		if (msg->send.nm_addr) {
518 			if (unp->unp_conn) {
519 				error = EISCONN;
520 				break;
521 			}
522 			error = unp_find_lockref(msg->send.nm_addr,
523 			    msg->send.nm_td, so->so_type, &unp2);
524 			if (error)
525 				break;
526 			/*
527 			 * NOTE:
528 			 * unp2 is locked and referenced.
529 			 *
530 			 * We could unlock unp2 now, since it was checked
531 			 * and referenced.
532 			 */
533 			unp_reltoken(unp2);
534 		} else {
535 			if (unp->unp_conn == NULL) {
536 				error = ENOTCONN;
537 				break;
538 			}
539 			unp2 = unp->unp_conn;
540 			unp_reference(unp2);
541 		}
542 		/* NOTE: unp2 is referenced. */
543 		so2 = unp2->unp_socket;
544 
545 		if (unp->unp_addr)
546 			from = (struct sockaddr *)unp->unp_addr;
547 		else
548 			from = &sun_noname;
549 
550 		lwkt_gettoken(&so2->so_rcv.ssb_token);
551 		if (ssb_appendaddr(&so2->so_rcv, from, m, control)) {
552 			sorwakeup(so2);
553 			m = NULL;
554 			control = NULL;
555 		} else {
556 			error = ENOBUFS;
557 		}
558 		lwkt_reltoken(&so2->so_rcv.ssb_token);
559 
560 		unp_free(unp2);
561 		break;
562 	}
563 
564 	case SOCK_STREAM:
565 	case SOCK_SEQPACKET:
566 		/* Connect if not connected yet. */
567 		/*
568 		 * Note: A better implementation would complain
569 		 * if not equal to the peer's address.
570 		 */
571 		if (unp->unp_conn == NULL) {
572 			if (msg->send.nm_addr) {
573 				error = unp_connect(so,
574 						    msg->send.nm_addr,
575 						    msg->send.nm_td);
576 				if (error)
577 					break;	/* XXX */
578 			}
579 			/*
580 			 * NOTE:
581 			 * unp_conn still could be NULL, even if the
582 			 * above unp_connect() succeeds; since the
583 			 * current unp's token could be released due
584 			 * to blocking operations after unp_conn is
585 			 * assigned.
586 			 */
587 			if (unp->unp_conn == NULL) {
588 				error = ENOTCONN;
589 				break;
590 			}
591 		}
592 		if (so->so_state & SS_CANTSENDMORE) {
593 			error = EPIPE;
594 			break;
595 		}
596 
597 		unp2 = unp->unp_conn;
598 		KASSERT(unp2 != NULL, ("unp is not connected"));
599 		so2 = unp2->unp_socket;
600 
601 		unp_reference(unp2);
602 
603 		/*
604 		 * Send to paired receive port, and then reduce
605 		 * send buffer hiwater marks to maintain backpressure.
606 		 * Wake up readers.
607 		 */
608 		lwkt_gettoken(&so2->so_rcv.ssb_token);
609 		if (control) {
610 			if (ssb_appendcontrol(&so2->so_rcv, m, control)) {
611 				control = NULL;
612 				m = NULL;
613 			}
614 		} else if (so->so_type == SOCK_SEQPACKET) {
615 			sbappendrecord(&so2->so_rcv.sb, m);
616 			m = NULL;
617 		} else {
618 			sbappend(&so2->so_rcv.sb, m);
619 			m = NULL;
620 		}
621 
622 		/*
623 		 * Because we are transfering mbufs directly to the
624 		 * peer socket we have to use SSB_STOP on the sender
625 		 * to prevent it from building up infinite mbufs.
626 		 */
627 		if (so2->so_rcv.ssb_cc >= so->so_snd.ssb_hiwat ||
628 		    so2->so_rcv.ssb_mbcnt >= so->so_snd.ssb_mbmax
629 		) {
630 			atomic_set_int(&so->so_snd.ssb_flags, SSB_STOP);
631 		}
632 		lwkt_reltoken(&so2->so_rcv.ssb_token);
633 		sorwakeup(so2);
634 
635 		unp_free(unp2);
636 		break;
637 
638 	default:
639 		panic("uipc_send unknown socktype");
640 	}
641 
642 	/*
643 	 * SEND_EOF is equivalent to a SEND followed by a SHUTDOWN.
644 	 */
645 	if (msg->send.nm_flags & PRUS_EOF) {
646 		socantsendmore(so);
647 		unp_shutdown(unp);
648 	}
649 
650 	if (control && error != 0)
651 		unp_dispose(control);
652 release:
653 	unp_reltoken(unp);
654 	wakeup_end_delayed();
655 
656 	if (control)
657 		m_freem(control);
658 	if (m)
659 		m_freem(m);
660 	lwkt_replymsg(&msg->lmsg, error);
661 }
662 
663 /*
664  * MPSAFE
665  */
666 static void
667 uipc_sense(netmsg_t msg)
668 {
669 	struct unpcb *unp;
670 	struct socket *so;
671 	struct stat *sb;
672 	int error;
673 
674 	so = msg->base.nm_so;
675 	sb = msg->sense.nm_stat;
676 
677 	/*
678 	 * so_pcb is only modified with both the global and the unp
679 	 * pool token held.
680 	 */
681 	unp = unp_getsocktoken(so);
682 
683 	if (!UNP_ISATTACHED(unp)) {
684 		error = EINVAL;
685 		goto done;
686 	}
687 
688 	sb->st_blksize = so->so_snd.ssb_hiwat;
689 	sb->st_dev = NOUDEV;
690 	if (unp->unp_ino == 0) {	/* make up a non-zero inode number */
691 		unp->unp_ino = atomic_fetchadd_long(&unp_ino, 1);
692 		if (__predict_false(unp->unp_ino == 0))
693 			unp->unp_ino = atomic_fetchadd_long(&unp_ino, 1);
694 	}
695 	sb->st_ino = unp->unp_ino;
696 	error = 0;
697 done:
698 	unp_reltoken(unp);
699 	lwkt_replymsg(&msg->lmsg, error);
700 }
701 
702 static void
703 uipc_shutdown(netmsg_t msg)
704 {
705 	struct socket *so;
706 	struct unpcb *unp;
707 	int error;
708 
709 	/*
710 	 * so_pcb is only modified with both the global and the unp
711 	 * pool token held.
712 	 */
713 	so = msg->base.nm_so;
714 	unp = unp_getsocktoken(so);
715 
716 	if (UNP_ISATTACHED(unp)) {
717 		socantsendmore(so);
718 		unp_shutdown(unp);
719 		error = 0;
720 	} else {
721 		error = EINVAL;
722 	}
723 
724 	unp_reltoken(unp);
725 	lwkt_replymsg(&msg->lmsg, error);
726 }
727 
728 static void
729 uipc_sockaddr(netmsg_t msg)
730 {
731 	struct unpcb *unp;
732 	int error;
733 
734 	/*
735 	 * so_pcb is only modified with both the global and the unp
736 	 * pool token held.
737 	 */
738 	unp = unp_getsocktoken(msg->base.nm_so);
739 
740 	if (UNP_ISATTACHED(unp)) {
741 		if (unp->unp_addr) {
742 			*msg->sockaddr.nm_nam =
743 				dup_sockaddr((struct sockaddr *)unp->unp_addr);
744 		}
745 		error = 0;
746 	} else {
747 		error = EINVAL;
748 	}
749 
750 	unp_reltoken(unp);
751 	lwkt_replymsg(&msg->lmsg, error);
752 }
753 
754 struct pr_usrreqs uipc_usrreqs = {
755 	.pru_abort = uipc_abort,
756 	.pru_accept = uipc_accept,
757 	.pru_attach = uipc_attach,
758 	.pru_bind = uipc_bind,
759 	.pru_connect = uipc_connect,
760 	.pru_connect2 = uipc_connect2,
761 	.pru_control = pr_generic_notsupp,
762 	.pru_detach = uipc_detach,
763 	.pru_disconnect = uipc_disconnect,
764 	.pru_listen = uipc_listen,
765 	.pru_peeraddr = uipc_peeraddr,
766 	.pru_rcvd = uipc_rcvd,
767 	.pru_rcvoob = pr_generic_notsupp,
768 	.pru_send = uipc_send,
769 	.pru_sense = uipc_sense,
770 	.pru_shutdown = uipc_shutdown,
771 	.pru_sockaddr = uipc_sockaddr,
772 	.pru_sosend = sosend,
773 	.pru_soreceive = soreceive
774 };
775 
776 void
777 uipc_ctloutput(netmsg_t msg)
778 {
779 	struct socket *so;
780 	struct sockopt *sopt;
781 	struct unpcb *unp;
782 	int error = 0;
783 
784 	so = msg->base.nm_so;
785 	sopt = msg->ctloutput.nm_sopt;
786 
787 	lwkt_gettoken(&unp_token);
788 	unp = unp_getsocktoken(so);
789 
790 	if (!UNP_ISATTACHED(unp)) {
791 		error = EINVAL;
792 		goto done;
793 	}
794 
795 	switch (sopt->sopt_dir) {
796 	case SOPT_GET:
797 		switch (sopt->sopt_name) {
798 		case LOCAL_PEERCRED:
799 			if (unp->unp_flags & UNP_HAVEPC)
800 				soopt_from_kbuf(sopt, &unp->unp_peercred,
801 						sizeof(unp->unp_peercred));
802 			else {
803 				if (so->so_type == SOCK_STREAM)
804 					error = ENOTCONN;
805 				else if (so->so_type == SOCK_SEQPACKET)
806 					error = ENOTCONN;
807 				else
808 					error = EINVAL;
809 			}
810 			break;
811 		default:
812 			error = EOPNOTSUPP;
813 			break;
814 		}
815 		break;
816 	case SOPT_SET:
817 	default:
818 		error = EOPNOTSUPP;
819 		break;
820 	}
821 
822 done:
823 	unp_reltoken(unp);
824 	lwkt_reltoken(&unp_token);
825 
826 	lwkt_replymsg(&msg->lmsg, error);
827 }
828 
829 /*
830  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
831  * for stream sockets, although the total for sender and receiver is
832  * actually only PIPSIZ.
833  *
834  * Datagram sockets really use the sendspace as the maximum datagram size,
835  * and don't really want to reserve the sendspace.  Their recvspace should
836  * be large enough for at least one max-size datagram plus address.
837  *
838  * We want the local send/recv space to be significant larger then lo0's
839  * mtu of 16384.
840  */
841 #ifndef PIPSIZ
842 #define	PIPSIZ	57344
843 #endif
844 static u_long	unpst_sendspace = PIPSIZ;
845 static u_long	unpst_recvspace = PIPSIZ;
846 static u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
847 static u_long	unpdg_recvspace = 4*1024;
848 
849 static int	unp_rights;			/* file descriptors in flight */
850 static struct spinlock unp_spin = SPINLOCK_INITIALIZER(&unp_spin, "unp_spin");
851 
852 SYSCTL_DECL(_net_local_seqpacket);
853 SYSCTL_DECL(_net_local_stream);
854 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
855     &unpst_sendspace, 0, "Size of stream socket send buffer");
856 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
857     &unpst_recvspace, 0, "Size of stream socket receive buffer");
858 
859 SYSCTL_DECL(_net_local_dgram);
860 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
861     &unpdg_sendspace, 0, "Max datagram socket size");
862 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
863     &unpdg_recvspace, 0, "Size of datagram socket receive buffer");
864 
865 SYSCTL_DECL(_net_local);
866 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0,
867    "File descriptors in flight");
868 
869 static int
870 unp_attach(struct socket *so, struct pru_attach_info *ai)
871 {
872 	struct unpcb *unp;
873 	int error;
874 
875 	lwkt_gettoken(&unp_token);
876 
877 	if (so->so_snd.ssb_hiwat == 0 || so->so_rcv.ssb_hiwat == 0) {
878 		switch (so->so_type) {
879 		case SOCK_STREAM:
880 		case SOCK_SEQPACKET:
881 			error = soreserve(so, unpst_sendspace, unpst_recvspace,
882 					  ai->sb_rlimit);
883 			break;
884 
885 		case SOCK_DGRAM:
886 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace,
887 					  ai->sb_rlimit);
888 			break;
889 
890 		default:
891 			panic("unp_attach");
892 		}
893 		if (error)
894 			goto failed;
895 	}
896 
897 	/*
898 	 * In order to support sendfile we have to set either SSB_STOPSUPP
899 	 * or SSB_PREALLOC.  Unix domain sockets use the SSB_STOP flow
900 	 * control mechanism.
901 	 */
902 	if (so->so_type == SOCK_STREAM) {
903 		atomic_set_int(&so->so_rcv.ssb_flags, SSB_STOPSUPP);
904 		atomic_set_int(&so->so_snd.ssb_flags, SSB_STOPSUPP);
905 	}
906 
907 	unp = kmalloc(sizeof(*unp), M_UNPCB, M_WAITOK | M_ZERO | M_NULLOK);
908 	if (unp == NULL) {
909 		error = ENOBUFS;
910 		goto failed;
911 	}
912 	unp->unp_refcnt = 1;
913 	unp->unp_gencnt = ++unp_gencnt;
914 	unp_count++;
915 	LIST_INIT(&unp->unp_refs);
916 	unp->unp_socket = so;
917 	unp->unp_rvnode = ai->fd_rdir;		/* jail cruft XXX JH */
918 	LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead
919 			 : &unp_shead, unp, unp_link);
920 	so->so_pcb = (caddr_t)unp;
921 	soreference(so);
922 	error = 0;
923 failed:
924 	lwkt_reltoken(&unp_token);
925 	return error;
926 }
927 
928 static void
929 unp_detach(struct unpcb *unp)
930 {
931 	struct socket *so;
932 
933 	lwkt_gettoken(&unp_token);
934 	lwkt_getpooltoken(unp);
935 
936 	LIST_REMOVE(unp, unp_link);	/* both tokens required */
937 	unp->unp_gencnt = ++unp_gencnt;
938 	--unp_count;
939 	if (unp->unp_vnode) {
940 		unp->unp_vnode->v_socket = NULL;
941 		vrele(unp->unp_vnode);
942 		unp->unp_vnode = NULL;
943 	}
944 	soisdisconnected(unp->unp_socket);
945 	so = unp->unp_socket;
946 	soreference(so);		/* for delayed sorflush */
947 	KKASSERT(so->so_pcb == unp);
948 	so->so_pcb = NULL;		/* both tokens required */
949 	unp->unp_socket = NULL;
950 	sofree(so);		/* remove pcb ref */
951 
952 	if (unp_rights) {
953 		/*
954 		 * Normally the receive buffer is flushed later,
955 		 * in sofree, but if our receive buffer holds references
956 		 * to descriptors that are now garbage, we will dispose
957 		 * of those descriptor references after the garbage collector
958 		 * gets them (resulting in a "panic: closef: count < 0").
959 		 */
960 		sorflush(so);
961 		unp_gc();
962 	}
963 	sofree(so);
964 	lwkt_relpooltoken(unp);
965 	lwkt_reltoken(&unp_token);
966 
967 	KASSERT(unp->unp_conn == NULL, ("unp is still connected"));
968 	KASSERT(LIST_EMPTY(&unp->unp_refs), ("unp still has references"));
969 
970 	if (unp->unp_addr)
971 		kfree(unp->unp_addr, M_SONAME);
972 	kfree(unp, M_UNPCB);
973 }
974 
975 static int
976 unp_bind(struct unpcb *unp, struct sockaddr *nam, struct thread *td)
977 {
978 	struct proc *p = td->td_proc;
979 	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
980 	struct vnode *vp;
981 	struct vattr vattr;
982 	int error, namelen;
983 	struct nlookupdata nd;
984 	char buf[SOCK_MAXADDRLEN];
985 
986 	ASSERT_LWKT_TOKEN_HELD(&unp_token);
987 	UNP_ASSERT_TOKEN_HELD(unp);
988 
989 	if (unp->unp_vnode != NULL)
990 		return EINVAL;
991 
992 	namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
993 	if (namelen <= 0)
994 		return EINVAL;
995 	strncpy(buf, soun->sun_path, namelen);
996 	buf[namelen] = 0;	/* null-terminate the string */
997 	error = nlookup_init(&nd, buf, UIO_SYSSPACE,
998 			     NLC_LOCKVP | NLC_CREATE | NLC_REFDVP);
999 	if (error == 0)
1000 		error = nlookup(&nd);
1001 	if (error == 0 && nd.nl_nch.ncp->nc_vp != NULL)
1002 		error = EADDRINUSE;
1003 	if (error)
1004 		goto done;
1005 
1006 	VATTR_NULL(&vattr);
1007 	vattr.va_type = VSOCK;
1008 	vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask);
1009 	error = VOP_NCREATE(&nd.nl_nch, nd.nl_dvp, &vp, nd.nl_cred, &vattr);
1010 	if (error == 0) {
1011 		if (unp->unp_vnode == NULL) {
1012 			vp->v_socket = unp->unp_socket;
1013 			unp->unp_vnode = vp;
1014 			unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam);
1015 			vn_unlock(vp);
1016 		} else {
1017 			vput(vp);		/* late race */
1018 			error = EINVAL;
1019 		}
1020 	}
1021 done:
1022 	nlookup_done(&nd);
1023 	return (error);
1024 }
1025 
1026 static int
1027 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1028 {
1029 	struct unpcb *unp, *unp2;
1030 	int error, flags = 0;
1031 
1032 	lwkt_gettoken(&unp_token);
1033 
1034 	unp = unp_getsocktoken(so);
1035 	if (!UNP_ISATTACHED(unp)) {
1036 		error = EINVAL;
1037 		goto failed;
1038 	}
1039 
1040 	if ((unp->unp_flags & UNP_CONNECTING) || unp->unp_conn != NULL) {
1041 		error = EISCONN;
1042 		goto failed;
1043 	}
1044 
1045 	flags = UNP_CONNECTING;
1046 	unp_setflags(unp, flags);
1047 
1048 	error = unp_find_lockref(nam, td, so->so_type, &unp2);
1049 	if (error)
1050 		goto failed;
1051 	/*
1052 	 * NOTE:
1053 	 * unp2 is locked and referenced.
1054 	 */
1055 
1056 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1057 		struct socket *so2, *so3;
1058 		struct unpcb *unp3;
1059 
1060 		so2 = unp2->unp_socket;
1061 		if (!(so2->so_options & SO_ACCEPTCONN) ||
1062 		    (so3 = sonewconn_faddr(so2, 0, NULL,
1063 		     TRUE /* keep ref */)) == NULL) {
1064 			error = ECONNREFUSED;
1065 			goto done;
1066 		}
1067 		/* so3 has a socket reference. */
1068 
1069 		unp3 = unp_getsocktoken(so3);
1070 		if (!UNP_ISATTACHED(unp3)) {
1071 			unp_reltoken(unp3);
1072 			/*
1073 			 * Already aborted; we only need to drop the
1074 			 * socket reference held by sonewconn_faddr().
1075 			 */
1076 			sofree(so3);
1077 			error = ECONNREFUSED;
1078 			goto done;
1079 		}
1080 		unp_reference(unp3);
1081 		/*
1082 		 * NOTE:
1083 		 * unp3 is locked and referenced.
1084 		 */
1085 
1086 		/*
1087 		 * Release so3 socket reference held by sonewconn_faddr().
1088 		 * Since we have referenced unp3, neither unp3 nor so3 will
1089 		 * be destroyed here.
1090 		 */
1091 		sofree(so3);
1092 
1093 		if (unp2->unp_addr != NULL) {
1094 			unp3->unp_addr = (struct sockaddr_un *)
1095 			    dup_sockaddr((struct sockaddr *)unp2->unp_addr);
1096 		}
1097 
1098 		/*
1099 		 * unp_peercred management:
1100 		 *
1101 		 * The connecter's (client's) credentials are copied
1102 		 * from its process structure at the time of connect()
1103 		 * (which is now).
1104 		 */
1105 		cru2x(td->td_proc->p_ucred, &unp3->unp_peercred);
1106 		unp_setflags(unp3, UNP_HAVEPC);
1107 		/*
1108 		 * The receiver's (server's) credentials are copied
1109 		 * from the unp_peercred member of socket on which the
1110 		 * former called listen(); unp_listen() cached that
1111 		 * process's credentials at that time so we can use
1112 		 * them now.
1113 		 */
1114 		KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1115 		    ("unp_connect: listener without cached peercred"));
1116 		memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1117 		    sizeof(unp->unp_peercred));
1118 		unp_setflags(unp, UNP_HAVEPC);
1119 
1120 		error = unp_connect_pair(unp, unp3);
1121 		if (error) {
1122 			/* XXX we need a better name */
1123 			soabort_oncpu(so3);
1124 		}
1125 
1126 		/* Done with unp3 */
1127 		unp_free(unp3);
1128 		unp_reltoken(unp3);
1129 	} else {
1130 		error = unp_connect_pair(unp, unp2);
1131 	}
1132 done:
1133 	unp_free(unp2);
1134 	unp_reltoken(unp2);
1135 failed:
1136 	if (flags)
1137 		unp_clrflags(unp, flags);
1138 	unp_reltoken(unp);
1139 
1140 	lwkt_reltoken(&unp_token);
1141 	return (error);
1142 }
1143 
1144 /*
1145  * Connect two unix domain sockets together.
1146  *
1147  * NOTE: Semantics for any change to unp_conn requires that the per-unp
1148  *	 pool token also be held.
1149  */
1150 int
1151 unp_connect2(struct socket *so, struct socket *so2)
1152 {
1153 	struct unpcb *unp, *unp2;
1154 	int error;
1155 
1156 	lwkt_gettoken(&unp_token);
1157 	if (so2->so_type != so->so_type) {
1158 		lwkt_reltoken(&unp_token);
1159 		return (EPROTOTYPE);
1160 	}
1161 	unp = unp_getsocktoken(so);
1162 	unp2 = unp_getsocktoken(so2);
1163 
1164 	if (!UNP_ISATTACHED(unp)) {
1165 		error = EINVAL;
1166 		goto done;
1167 	}
1168 	if (!UNP_ISATTACHED(unp2)) {
1169 		error = ECONNREFUSED;
1170 		goto done;
1171 	}
1172 
1173 	if (unp->unp_conn != NULL) {
1174 		error = EISCONN;
1175 		goto done;
1176 	}
1177 	if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) &&
1178 	    unp2->unp_conn != NULL) {
1179 		error = EISCONN;
1180 		goto done;
1181 	}
1182 
1183 	error = unp_connect_pair(unp, unp2);
1184 done:
1185 	unp_reltoken(unp2);
1186 	unp_reltoken(unp);
1187 	lwkt_reltoken(&unp_token);
1188 	return (error);
1189 }
1190 
1191 /*
1192  * Disconnect a unix domain socket pair.
1193  *
1194  * NOTE: Semantics for any change to unp_conn requires that the per-unp
1195  *	 pool token also be held.
1196  */
1197 static void
1198 unp_disconnect(struct unpcb *unp, int error)
1199 {
1200 	struct socket *so = unp->unp_socket;
1201 	struct unpcb *unp2;
1202 
1203 	ASSERT_LWKT_TOKEN_HELD(&unp_token);
1204 	UNP_ASSERT_TOKEN_HELD(unp);
1205 
1206 	if (error)
1207 		so->so_error = error;
1208 
1209 	while ((unp2 = unp->unp_conn) != NULL) {
1210 		lwkt_getpooltoken(unp2);
1211 		if (unp2 == unp->unp_conn)
1212 			break;
1213 		lwkt_relpooltoken(unp2);
1214 	}
1215 	if (unp2 == NULL)
1216 		return;
1217 	/* unp2 is locked. */
1218 
1219 	KASSERT((unp2->unp_flags & UNP_DROPPED) == 0, ("unp2 was dropped"));
1220 
1221 	unp->unp_conn = NULL;
1222 
1223 	switch (so->so_type) {
1224 	case SOCK_DGRAM:
1225 		LIST_REMOVE(unp, unp_reflink);
1226 		soclrstate(so, SS_ISCONNECTED);
1227 		break;
1228 
1229 	case SOCK_STREAM:
1230 	case SOCK_SEQPACKET:
1231 		/*
1232 		 * Keep a reference before clearing the unp_conn
1233 		 * to avoid racing uipc_detach()/uipc_abort() in
1234 		 * other thread.
1235 		 */
1236 		unp_reference(unp2);
1237 		KASSERT(unp2->unp_conn == unp, ("unp_conn mismatch"));
1238 		unp2->unp_conn = NULL;
1239 
1240 		soisdisconnected(so);
1241 		soisdisconnected(unp2->unp_socket);
1242 
1243 		unp_free(unp2);
1244 		break;
1245 	}
1246 
1247 	lwkt_relpooltoken(unp2);
1248 }
1249 
1250 #ifdef notdef
1251 void
1252 unp_abort(struct unpcb *unp)
1253 {
1254 	lwkt_gettoken(&unp_token);
1255 	unp_free(unp);
1256 	lwkt_reltoken(&unp_token);
1257 }
1258 #endif
1259 
1260 static int
1261 prison_unpcb(struct thread *td, struct unpcb *unp)
1262 {
1263 	struct proc *p;
1264 
1265 	if (td == NULL)
1266 		return (0);
1267 	if ((p = td->td_proc) == NULL)
1268 		return (0);
1269 	if (!p->p_ucred->cr_prison)
1270 		return (0);
1271 	if (p->p_fd->fd_rdir == unp->unp_rvnode)
1272 		return (0);
1273 	return (1);
1274 }
1275 
1276 static int
1277 unp_pcblist(SYSCTL_HANDLER_ARGS)
1278 {
1279 	int error, i, n;
1280 	struct unpcb *unp, **unp_list;
1281 	unp_gen_t gencnt;
1282 	struct unp_head *head;
1283 
1284 	head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1285 
1286 	KKASSERT(curproc != NULL);
1287 
1288 	/*
1289 	 * The process of preparing the PCB list is too time-consuming and
1290 	 * resource-intensive to repeat twice on every request.
1291 	 */
1292 	if (req->oldptr == NULL) {
1293 		n = unp_count;
1294 		req->oldidx = (n + n/8) * sizeof(struct xunpcb);
1295 		return 0;
1296 	}
1297 
1298 	if (req->newptr != NULL)
1299 		return EPERM;
1300 
1301 	lwkt_gettoken(&unp_token);
1302 
1303 	/*
1304 	 * OK, now we're committed to doing something.
1305 	 */
1306 	gencnt = unp_gencnt;
1307 	n = unp_count;
1308 
1309 	unp_list = kmalloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
1310 
1311 	for (unp = LIST_FIRST(head), i = 0; unp && i < n;
1312 	     unp = LIST_NEXT(unp, unp_link)) {
1313 		if (unp->unp_gencnt <= gencnt && !prison_unpcb(req->td, unp))
1314 			unp_list[i++] = unp;
1315 	}
1316 	n = i;			/* in case we lost some during malloc */
1317 
1318 	error = 0;
1319 	for (i = 0; i < n; i++) {
1320 		unp = unp_list[i];
1321 		if (unp->unp_gencnt <= gencnt) {
1322 			struct xunpcb xu;
1323 			xu.xu_len = sizeof xu;
1324 			xu.xu_unpp = unp;
1325 			/*
1326 			 * XXX - need more locking here to protect against
1327 			 * connect/disconnect races for SMP.
1328 			 */
1329 			if (unp->unp_addr)
1330 				bcopy(unp->unp_addr, &xu.xu_addr,
1331 				      unp->unp_addr->sun_len);
1332 			if (unp->unp_conn && unp->unp_conn->unp_addr)
1333 				bcopy(unp->unp_conn->unp_addr,
1334 				      &xu.xu_caddr,
1335 				      unp->unp_conn->unp_addr->sun_len);
1336 			bcopy(unp, &xu.xu_unp, sizeof *unp);
1337 			sotoxsocket(unp->unp_socket, &xu.xu_socket);
1338 			error = SYSCTL_OUT(req, &xu, sizeof xu);
1339 		}
1340 	}
1341 	lwkt_reltoken(&unp_token);
1342 	kfree(unp_list, M_TEMP);
1343 
1344 	return error;
1345 }
1346 
1347 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD,
1348 	    (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1349 	    "List of active local datagram sockets");
1350 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD,
1351 	    (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1352 	    "List of active local stream sockets");
1353 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist, CTLFLAG_RD,
1354 	    (caddr_t)(long)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb",
1355 	    "List of active local seqpacket stream sockets");
1356 
1357 static void
1358 unp_shutdown(struct unpcb *unp)
1359 {
1360 	struct socket *so;
1361 
1362 	if ((unp->unp_socket->so_type == SOCK_STREAM ||
1363 	     unp->unp_socket->so_type == SOCK_SEQPACKET) &&
1364 	    unp->unp_conn != NULL && (so = unp->unp_conn->unp_socket)) {
1365 		socantrcvmore(so);
1366 	}
1367 }
1368 
1369 #ifdef notdef
1370 void
1371 unp_drain(void)
1372 {
1373 	lwkt_gettoken(&unp_token);
1374 	lwkt_reltoken(&unp_token);
1375 }
1376 #endif
1377 
1378 int
1379 unp_externalize(struct mbuf *rights)
1380 {
1381 	struct thread *td = curthread;
1382 	struct proc *p = td->td_proc;		/* XXX */
1383 	struct lwp *lp = td->td_lwp;
1384 	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1385 	int *fdp;
1386 	int i;
1387 	struct file **rp;
1388 	struct file *fp;
1389 	int newfds = (cm->cmsg_len - (CMSG_DATA(cm) - (u_char *)cm))
1390 		/ sizeof (struct file *);
1391 	int f;
1392 
1393 	lwkt_gettoken(&unp_token);
1394 
1395 	/*
1396 	 * if the new FD's will not fit, then we free them all
1397 	 */
1398 	if (!fdavail(p, newfds)) {
1399 		rp = (struct file **)CMSG_DATA(cm);
1400 		for (i = 0; i < newfds; i++) {
1401 			fp = *rp;
1402 			/*
1403 			 * zero the pointer before calling unp_discard,
1404 			 * since it may end up in unp_gc()..
1405 			 */
1406 			*rp++ = NULL;
1407 			unp_discard(fp, NULL);
1408 		}
1409 		lwkt_reltoken(&unp_token);
1410 		return (EMSGSIZE);
1411 	}
1412 
1413 	/*
1414 	 * now change each pointer to an fd in the global table to
1415 	 * an integer that is the index to the local fd table entry
1416 	 * that we set up to point to the global one we are transferring.
1417 	 * If sizeof (struct file *) is bigger than or equal to sizeof int,
1418 	 * then do it in forward order. In that case, an integer will
1419 	 * always come in the same place or before its corresponding
1420 	 * struct file pointer.
1421 	 * If sizeof (struct file *) is smaller than sizeof int, then
1422 	 * do it in reverse order.
1423 	 */
1424 	if (sizeof (struct file *) >= sizeof (int)) {
1425 		fdp = (int *)CMSG_DATA(cm);
1426 		rp = (struct file **)CMSG_DATA(cm);
1427 		for (i = 0; i < newfds; i++) {
1428 			if (fdalloc(p, 0, &f))
1429 				panic("unp_externalize");
1430 			fp = *rp++;
1431 			unp_fp_externalize(lp, fp, f);
1432 			*fdp++ = f;
1433 		}
1434 	} else {
1435 		fdp = (int *)CMSG_DATA(cm) + newfds - 1;
1436 		rp = (struct file **)CMSG_DATA(cm) + newfds - 1;
1437 		for (i = 0; i < newfds; i++) {
1438 			if (fdalloc(p, 0, &f))
1439 				panic("unp_externalize");
1440 			fp = *rp--;
1441 			unp_fp_externalize(lp, fp, f);
1442 			*fdp-- = f;
1443 		}
1444 	}
1445 
1446 	/*
1447 	 * Adjust length, in case sizeof(struct file *) and sizeof(int)
1448 	 * differs.
1449 	 */
1450 	cm->cmsg_len = CMSG_LEN(newfds * sizeof(int));
1451 	rights->m_len = cm->cmsg_len;
1452 
1453 	lwkt_reltoken(&unp_token);
1454 	return (0);
1455 }
1456 
1457 static void
1458 unp_fp_externalize(struct lwp *lp, struct file *fp, int fd)
1459 {
1460 	struct file *fx;
1461 	int error;
1462 
1463 	lwkt_gettoken(&unp_token);
1464 
1465 	if (lp) {
1466 		KKASSERT(fd >= 0);
1467 		if (fp->f_flag & FREVOKED) {
1468 			kprintf("Warning: revoked fp exiting unix socket\n");
1469 			fx = NULL;
1470 			error = falloc(lp, &fx, NULL);
1471 			if (error == 0)
1472 				fsetfd(lp->lwp_proc->p_fd, fx, fd);
1473 			else
1474 				fsetfd(lp->lwp_proc->p_fd, NULL, fd);
1475 			fdrop(fx);
1476 		} else {
1477 			fsetfd(lp->lwp_proc->p_fd, fp, fd);
1478 		}
1479 	}
1480 	spin_lock(&unp_spin);
1481 	fp->f_msgcount--;
1482 	unp_rights--;
1483 	spin_unlock(&unp_spin);
1484 	fdrop(fp);
1485 
1486 	lwkt_reltoken(&unp_token);
1487 }
1488 
1489 
1490 void
1491 unp_init(void)
1492 {
1493 	LIST_INIT(&unp_dhead);
1494 	LIST_INIT(&unp_shead);
1495 	spin_init(&unp_spin, "unpinit");
1496 }
1497 
1498 static int
1499 unp_internalize(struct mbuf *control, struct thread *td)
1500 {
1501 	struct proc *p = td->td_proc;
1502 	struct filedesc *fdescp;
1503 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1504 	struct file **rp;
1505 	struct file *fp;
1506 	int i, fd, *fdp;
1507 	struct cmsgcred *cmcred;
1508 	int oldfds;
1509 	u_int newlen;
1510 	int error;
1511 
1512 	KKASSERT(p);
1513 	lwkt_gettoken(&unp_token);
1514 
1515 	fdescp = p->p_fd;
1516 	if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) ||
1517 	    cm->cmsg_level != SOL_SOCKET ||
1518 	    CMSG_ALIGN(cm->cmsg_len) != control->m_len) {
1519 		error = EINVAL;
1520 		goto done;
1521 	}
1522 
1523 	/*
1524 	 * Fill in credential information.
1525 	 */
1526 	if (cm->cmsg_type == SCM_CREDS) {
1527 		cmcred = (struct cmsgcred *)CMSG_DATA(cm);
1528 		cmcred->cmcred_pid = p->p_pid;
1529 		cmcred->cmcred_uid = p->p_ucred->cr_ruid;
1530 		cmcred->cmcred_gid = p->p_ucred->cr_rgid;
1531 		cmcred->cmcred_euid = p->p_ucred->cr_uid;
1532 		cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups,
1533 							CMGROUP_MAX);
1534 		for (i = 0; i < cmcred->cmcred_ngroups; i++)
1535 			cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i];
1536 		error = 0;
1537 		goto done;
1538 	}
1539 
1540 	/*
1541 	 * cmsghdr may not be aligned, do not allow calculation(s) to
1542 	 * go negative.
1543 	 */
1544 	if (cm->cmsg_len < CMSG_LEN(0)) {
1545 		error = EINVAL;
1546 		goto done;
1547 	}
1548 
1549 	oldfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof (int);
1550 
1551 	/*
1552 	 * check that all the FDs passed in refer to legal OPEN files
1553 	 * If not, reject the entire operation.
1554 	 */
1555 	fdp = (int *)CMSG_DATA(cm);
1556 	for (i = 0; i < oldfds; i++) {
1557 		fd = *fdp++;
1558 		if ((unsigned)fd >= fdescp->fd_nfiles ||
1559 		    fdescp->fd_files[fd].fp == NULL) {
1560 			error = EBADF;
1561 			goto done;
1562 		}
1563 		if (fdescp->fd_files[fd].fp->f_type == DTYPE_KQUEUE) {
1564 			error = EOPNOTSUPP;
1565 			goto done;
1566 		}
1567 	}
1568 	/*
1569 	 * Now replace the integer FDs with pointers to
1570 	 * the associated global file table entry..
1571 	 * Allocate a bigger buffer as necessary. But if an cluster is not
1572 	 * enough, return E2BIG.
1573 	 */
1574 	newlen = CMSG_LEN(oldfds * sizeof(struct file *));
1575 	if (newlen > MCLBYTES) {
1576 		error = E2BIG;
1577 		goto done;
1578 	}
1579 	if (newlen - control->m_len > M_TRAILINGSPACE(control)) {
1580 		if (control->m_flags & M_EXT) {
1581 			error = E2BIG;
1582 			goto done;
1583 		}
1584 		MCLGET(control, M_WAITOK);
1585 		if (!(control->m_flags & M_EXT)) {
1586 			error = ENOBUFS;
1587 			goto done;
1588 		}
1589 
1590 		/* copy the data to the cluster */
1591 		memcpy(mtod(control, char *), cm, cm->cmsg_len);
1592 		cm = mtod(control, struct cmsghdr *);
1593 	}
1594 
1595 	/*
1596 	 * Adjust length, in case sizeof(struct file *) and sizeof(int)
1597 	 * differs.
1598 	 */
1599 	cm->cmsg_len = newlen;
1600 	control->m_len = CMSG_ALIGN(newlen);
1601 
1602 	/*
1603 	 * Transform the file descriptors into struct file pointers.
1604 	 * If sizeof (struct file *) is bigger than or equal to sizeof int,
1605 	 * then do it in reverse order so that the int won't get until
1606 	 * we're done.
1607 	 * If sizeof (struct file *) is smaller than sizeof int, then
1608 	 * do it in forward order.
1609 	 */
1610 	if (sizeof (struct file *) >= sizeof (int)) {
1611 		fdp = (int *)CMSG_DATA(cm) + oldfds - 1;
1612 		rp = (struct file **)CMSG_DATA(cm) + oldfds - 1;
1613 		for (i = 0; i < oldfds; i++) {
1614 			fp = fdescp->fd_files[*fdp--].fp;
1615 			*rp-- = fp;
1616 			fhold(fp);
1617 			spin_lock(&unp_spin);
1618 			fp->f_msgcount++;
1619 			unp_rights++;
1620 			spin_unlock(&unp_spin);
1621 		}
1622 	} else {
1623 		fdp = (int *)CMSG_DATA(cm);
1624 		rp = (struct file **)CMSG_DATA(cm);
1625 		for (i = 0; i < oldfds; i++) {
1626 			fp = fdescp->fd_files[*fdp++].fp;
1627 			*rp++ = fp;
1628 			fhold(fp);
1629 			spin_lock(&unp_spin);
1630 			fp->f_msgcount++;
1631 			unp_rights++;
1632 			spin_unlock(&unp_spin);
1633 		}
1634 	}
1635 	error = 0;
1636 done:
1637 	lwkt_reltoken(&unp_token);
1638 	return error;
1639 }
1640 
1641 /*
1642  * Garbage collect in-transit file descriptors that get lost due to
1643  * loops (i.e. when a socket is sent to another process over itself,
1644  * and more complex situations).
1645  *
1646  * NOT MPSAFE - TODO socket flush code and maybe closef.  Rest is MPSAFE.
1647  */
1648 
1649 struct unp_gc_info {
1650 	struct file **extra_ref;
1651 	struct file *locked_fp;
1652 	int defer;
1653 	int index;
1654 	int maxindex;
1655 };
1656 
1657 static void
1658 unp_gc(void)
1659 {
1660 	struct unp_gc_info info;
1661 	static boolean_t unp_gcing;
1662 	struct file **fpp;
1663 	int i;
1664 
1665 	/*
1666 	 * Only one gc can be in-progress at any given moment
1667 	 */
1668 	spin_lock(&unp_spin);
1669 	if (unp_gcing) {
1670 		spin_unlock(&unp_spin);
1671 		return;
1672 	}
1673 	unp_gcing = TRUE;
1674 	spin_unlock(&unp_spin);
1675 
1676 	lwkt_gettoken(&unp_token);
1677 
1678 	/*
1679 	 * Before going through all this, set all FDs to be NOT defered
1680 	 * and NOT externally accessible (not marked).  During the scan
1681 	 * a fd can be marked externally accessible but we may or may not
1682 	 * be able to immediately process it (controlled by FDEFER).
1683 	 *
1684 	 * If we loop sleep a bit.  The complexity of the topology can cause
1685 	 * multiple loops.  Also failure to acquire the socket's so_rcv
1686 	 * token can cause us to loop.
1687 	 */
1688 	allfiles_scan_exclusive(unp_gc_clearmarks, NULL);
1689 	do {
1690 		info.defer = 0;
1691 		allfiles_scan_exclusive(unp_gc_checkmarks, &info);
1692 		if (info.defer)
1693 			tsleep(&info, 0, "gcagain", 1);
1694 	} while (info.defer);
1695 
1696 	/*
1697 	 * We grab an extra reference to each of the file table entries
1698 	 * that are not otherwise accessible and then free the rights
1699 	 * that are stored in messages on them.
1700 	 *
1701 	 * The bug in the orginal code is a little tricky, so I'll describe
1702 	 * what's wrong with it here.
1703 	 *
1704 	 * It is incorrect to simply unp_discard each entry for f_msgcount
1705 	 * times -- consider the case of sockets A and B that contain
1706 	 * references to each other.  On a last close of some other socket,
1707 	 * we trigger a gc since the number of outstanding rights (unp_rights)
1708 	 * is non-zero.  If during the sweep phase the gc code un_discards,
1709 	 * we end up doing a (full) closef on the descriptor.  A closef on A
1710 	 * results in the following chain.  Closef calls soo_close, which
1711 	 * calls soclose.   Soclose calls first (through the switch
1712 	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
1713 	 * returns because the previous instance had set unp_gcing, and
1714 	 * we return all the way back to soclose, which marks the socket
1715 	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
1716 	 * to free up the rights that are queued in messages on the socket A,
1717 	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
1718 	 * switch unp_dispose, which unp_scans with unp_discard.  This second
1719 	 * instance of unp_discard just calls closef on B.
1720 	 *
1721 	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
1722 	 * which results in another closef on A.  Unfortunately, A is already
1723 	 * being closed, and the descriptor has already been marked with
1724 	 * SS_NOFDREF, and soclose panics at this point.
1725 	 *
1726 	 * Here, we first take an extra reference to each inaccessible
1727 	 * descriptor.  Then, we call sorflush ourself, since we know
1728 	 * it is a Unix domain socket anyhow.  After we destroy all the
1729 	 * rights carried in messages, we do a last closef to get rid
1730 	 * of our extra reference.  This is the last close, and the
1731 	 * unp_detach etc will shut down the socket.
1732 	 *
1733 	 * 91/09/19, bsy@cs.cmu.edu
1734 	 */
1735 	info.extra_ref = kmalloc(256 * sizeof(struct file *), M_FILE, M_WAITOK);
1736 	info.maxindex = 256;
1737 
1738 	do {
1739 		/*
1740 		 * Look for matches
1741 		 */
1742 		info.index = 0;
1743 		allfiles_scan_exclusive(unp_gc_checkrefs, &info);
1744 
1745 		/*
1746 		 * For each FD on our hit list, do the following two things
1747 		 */
1748 		for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) {
1749 			struct file *tfp = *fpp;
1750 			if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL)
1751 				sorflush((struct socket *)(tfp->f_data));
1752 		}
1753 		for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp)
1754 			closef(*fpp, NULL);
1755 	} while (info.index == info.maxindex);
1756 
1757 	lwkt_reltoken(&unp_token);
1758 
1759 	kfree((caddr_t)info.extra_ref, M_FILE);
1760 	unp_gcing = FALSE;
1761 }
1762 
1763 /*
1764  * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry
1765  */
1766 static int
1767 unp_gc_checkrefs(struct file *fp, void *data)
1768 {
1769 	struct unp_gc_info *info = data;
1770 
1771 	if (fp->f_count == 0)
1772 		return(0);
1773 	if (info->index == info->maxindex)
1774 		return(-1);
1775 
1776 	/*
1777 	 * If all refs are from msgs, and it's not marked accessible
1778 	 * then it must be referenced from some unreachable cycle
1779 	 * of (shut-down) FDs, so include it in our
1780 	 * list of FDs to remove
1781 	 */
1782 	if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
1783 		info->extra_ref[info->index++] = fp;
1784 		fhold(fp);
1785 	}
1786 	return(0);
1787 }
1788 
1789 /*
1790  * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry
1791  */
1792 static int
1793 unp_gc_clearmarks(struct file *fp, void *data __unused)
1794 {
1795 	atomic_clear_int(&fp->f_flag, FMARK | FDEFER);
1796 	return(0);
1797 }
1798 
1799 /*
1800  * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry
1801  */
1802 static int
1803 unp_gc_checkmarks(struct file *fp, void *data)
1804 {
1805 	struct unp_gc_info *info = data;
1806 	struct socket *so;
1807 
1808 	/*
1809 	 * If the file is not open, skip it.  Make sure it isn't marked
1810 	 * defered or we could loop forever, in case we somehow race
1811 	 * something.
1812 	 */
1813 	if (fp->f_count == 0) {
1814 		if (fp->f_flag & FDEFER)
1815 			atomic_clear_int(&fp->f_flag, FDEFER);
1816 		return(0);
1817 	}
1818 	/*
1819 	 * If we already marked it as 'defer'  in a
1820 	 * previous pass, then try process it this time
1821 	 * and un-mark it
1822 	 */
1823 	if (fp->f_flag & FDEFER) {
1824 		atomic_clear_int(&fp->f_flag, FDEFER);
1825 	} else {
1826 		/*
1827 		 * if it's not defered, then check if it's
1828 		 * already marked.. if so skip it
1829 		 */
1830 		if (fp->f_flag & FMARK)
1831 			return(0);
1832 		/*
1833 		 * If all references are from messages
1834 		 * in transit, then skip it. it's not
1835 		 * externally accessible.
1836 		 */
1837 		if (fp->f_count == fp->f_msgcount)
1838 			return(0);
1839 		/*
1840 		 * If it got this far then it must be
1841 		 * externally accessible.
1842 		 */
1843 		atomic_set_int(&fp->f_flag, FMARK);
1844 	}
1845 
1846 	/*
1847 	 * either it was defered, or it is externally
1848 	 * accessible and not already marked so.
1849 	 * Now check if it is possibly one of OUR sockets.
1850 	 */
1851 	if (fp->f_type != DTYPE_SOCKET ||
1852 	    (so = (struct socket *)fp->f_data) == NULL) {
1853 		return(0);
1854 	}
1855 	if (so->so_proto->pr_domain != &localdomain ||
1856 	    !(so->so_proto->pr_flags & PR_RIGHTS)) {
1857 		return(0);
1858 	}
1859 
1860 	/*
1861 	 * So, Ok, it's one of our sockets and it IS externally accessible
1862 	 * (or was defered).  Now we look to see if we hold any file
1863 	 * descriptors in its message buffers.  Follow those links and mark
1864 	 * them as accessible too.
1865 	 *
1866 	 * We are holding multiple spinlocks here, if we cannot get the
1867 	 * token non-blocking defer until the next loop.
1868 	 */
1869 	info->locked_fp = fp;
1870 	if (lwkt_trytoken(&so->so_rcv.ssb_token)) {
1871 		unp_scan(so->so_rcv.ssb_mb, unp_mark, info);
1872 		lwkt_reltoken(&so->so_rcv.ssb_token);
1873 	} else {
1874 		atomic_set_int(&fp->f_flag, FDEFER);
1875 		++info->defer;
1876 	}
1877 	return (0);
1878 }
1879 
1880 /*
1881  * Scan all unix domain sockets and replace any revoked file pointers
1882  * found with the dummy file pointer fx.  We don't worry about races
1883  * against file pointers being read out as those are handled in the
1884  * externalize code.
1885  */
1886 
1887 #define REVOKE_GC_MAXFILES	32
1888 
1889 struct unp_revoke_gc_info {
1890 	struct file	*fx;
1891 	struct file	*fary[REVOKE_GC_MAXFILES];
1892 	int		fcount;
1893 };
1894 
1895 void
1896 unp_revoke_gc(struct file *fx)
1897 {
1898 	struct unp_revoke_gc_info info;
1899 	int i;
1900 
1901 	lwkt_gettoken(&unp_token);
1902 	info.fx = fx;
1903 	do {
1904 		info.fcount = 0;
1905 		allfiles_scan_exclusive(unp_revoke_gc_check, &info);
1906 		for (i = 0; i < info.fcount; ++i)
1907 			unp_fp_externalize(NULL, info.fary[i], -1);
1908 	} while (info.fcount == REVOKE_GC_MAXFILES);
1909 	lwkt_reltoken(&unp_token);
1910 }
1911 
1912 /*
1913  * Check for and replace revoked descriptors.
1914  *
1915  * WARNING:  This routine is not allowed to block.
1916  */
1917 static int
1918 unp_revoke_gc_check(struct file *fps, void *vinfo)
1919 {
1920 	struct unp_revoke_gc_info *info = vinfo;
1921 	struct file *fp;
1922 	struct socket *so;
1923 	struct mbuf *m0;
1924 	struct mbuf *m;
1925 	struct file **rp;
1926 	struct cmsghdr *cm;
1927 	int i;
1928 	int qfds;
1929 
1930 	/*
1931 	 * Is this a unix domain socket with rights-passing abilities?
1932 	 */
1933 	if (fps->f_type != DTYPE_SOCKET)
1934 		return (0);
1935 	if ((so = (struct socket *)fps->f_data) == NULL)
1936 		return(0);
1937 	if (so->so_proto->pr_domain != &localdomain)
1938 		return(0);
1939 	if ((so->so_proto->pr_flags & PR_RIGHTS) == 0)
1940 		return(0);
1941 
1942 	/*
1943 	 * Scan the mbufs for control messages and replace any revoked
1944 	 * descriptors we find.
1945 	 */
1946 	lwkt_gettoken(&so->so_rcv.ssb_token);
1947 	m0 = so->so_rcv.ssb_mb;
1948 	while (m0) {
1949 		for (m = m0; m; m = m->m_next) {
1950 			if (m->m_type != MT_CONTROL)
1951 				continue;
1952 			if (m->m_len < sizeof(*cm))
1953 				continue;
1954 			cm = mtod(m, struct cmsghdr *);
1955 			if (cm->cmsg_level != SOL_SOCKET ||
1956 			    cm->cmsg_type != SCM_RIGHTS) {
1957 				continue;
1958 			}
1959 			qfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof(void *);
1960 			rp = (struct file **)CMSG_DATA(cm);
1961 			for (i = 0; i < qfds; i++) {
1962 				fp = rp[i];
1963 				if (fp->f_flag & FREVOKED) {
1964 					kprintf("Warning: Removing revoked fp from unix domain socket queue\n");
1965 					fhold(info->fx);
1966 					info->fx->f_msgcount++;
1967 					unp_rights++;
1968 					rp[i] = info->fx;
1969 					info->fary[info->fcount++] = fp;
1970 				}
1971 				if (info->fcount == REVOKE_GC_MAXFILES)
1972 					break;
1973 			}
1974 			if (info->fcount == REVOKE_GC_MAXFILES)
1975 				break;
1976 		}
1977 		m0 = m0->m_nextpkt;
1978 		if (info->fcount == REVOKE_GC_MAXFILES)
1979 			break;
1980 	}
1981 	lwkt_reltoken(&so->so_rcv.ssb_token);
1982 
1983 	/*
1984 	 * Stop the scan if we filled up our array.
1985 	 */
1986 	if (info->fcount == REVOKE_GC_MAXFILES)
1987 		return(-1);
1988 	return(0);
1989 }
1990 
1991 /*
1992  * Dispose of the fp's stored in a mbuf.
1993  *
1994  * The dds loop can cause additional fps to be entered onto the
1995  * list while it is running, flattening out the operation and avoiding
1996  * a deep kernel stack recursion.
1997  */
1998 void
1999 unp_dispose(struct mbuf *m)
2000 {
2001 	unp_defdiscard_t dds;
2002 
2003 	lwkt_gettoken(&unp_token);
2004 	++unp_defdiscard_nest;
2005 	if (m) {
2006 		unp_scan(m, unp_discard, NULL);
2007 	}
2008 	if (unp_defdiscard_nest == 1) {
2009 		while ((dds = unp_defdiscard_base) != NULL) {
2010 			unp_defdiscard_base = dds->next;
2011 			closef(dds->fp, NULL);
2012 			kfree(dds, M_UNPCB);
2013 		}
2014 	}
2015 	--unp_defdiscard_nest;
2016 	lwkt_reltoken(&unp_token);
2017 }
2018 
2019 static int
2020 unp_listen(struct unpcb *unp, struct thread *td)
2021 {
2022 	struct proc *p = td->td_proc;
2023 
2024 	ASSERT_LWKT_TOKEN_HELD(&unp_token);
2025 	UNP_ASSERT_TOKEN_HELD(unp);
2026 
2027 	KKASSERT(p);
2028 	cru2x(p->p_ucred, &unp->unp_peercred);
2029 	unp_setflags(unp, UNP_HAVEPCCACHED);
2030 	return (0);
2031 }
2032 
2033 static void
2034 unp_scan(struct mbuf *m0, void (*op)(struct file *, void *), void *data)
2035 {
2036 	struct mbuf *m;
2037 	struct file **rp;
2038 	struct cmsghdr *cm;
2039 	int i;
2040 	int qfds;
2041 
2042 	while (m0) {
2043 		for (m = m0; m; m = m->m_next) {
2044 			if (m->m_type == MT_CONTROL &&
2045 			    m->m_len >= sizeof(*cm)) {
2046 				cm = mtod(m, struct cmsghdr *);
2047 				if (cm->cmsg_level != SOL_SOCKET ||
2048 				    cm->cmsg_type != SCM_RIGHTS)
2049 					continue;
2050 				qfds = (cm->cmsg_len - CMSG_LEN(0)) /
2051 					sizeof(void *);
2052 				rp = (struct file **)CMSG_DATA(cm);
2053 				for (i = 0; i < qfds; i++)
2054 					(*op)(*rp++, data);
2055 				break;		/* XXX, but saves time */
2056 			}
2057 		}
2058 		m0 = m0->m_nextpkt;
2059 	}
2060 }
2061 
2062 /*
2063  * Mark visibility.  info->defer is recalculated on every pass.
2064  */
2065 static void
2066 unp_mark(struct file *fp, void *data)
2067 {
2068 	struct unp_gc_info *info = data;
2069 
2070 	if ((fp->f_flag & FMARK) == 0) {
2071 		++info->defer;
2072 		atomic_set_int(&fp->f_flag, FMARK | FDEFER);
2073 	} else if (fp->f_flag & FDEFER) {
2074 		++info->defer;
2075 	}
2076 }
2077 
2078 /*
2079  * Discard a fp previously held in a unix domain socket mbuf.  To
2080  * avoid blowing out the kernel stack due to contrived chain-reactions
2081  * we may have to defer the operation to a higher procedural level.
2082  *
2083  * Caller holds unp_token
2084  */
2085 static void
2086 unp_discard(struct file *fp, void *data __unused)
2087 {
2088 	unp_defdiscard_t dds;
2089 
2090 	spin_lock(&unp_spin);
2091 	fp->f_msgcount--;
2092 	unp_rights--;
2093 	spin_unlock(&unp_spin);
2094 
2095 	if (unp_defdiscard_nest) {
2096 		dds = kmalloc(sizeof(*dds), M_UNPCB, M_WAITOK|M_ZERO);
2097 		dds->fp = fp;
2098 		dds->next = unp_defdiscard_base;
2099 		unp_defdiscard_base = dds;
2100 	} else {
2101 		closef(fp, NULL);
2102 	}
2103 }
2104 
2105 static int
2106 unp_find_lockref(struct sockaddr *nam, struct thread *td, short type,
2107     struct unpcb **unp_ret)
2108 {
2109 	struct proc *p = td->td_proc;
2110 	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
2111 	struct vnode *vp = NULL;
2112 	struct socket *so;
2113 	struct unpcb *unp;
2114 	int error, len;
2115 	struct nlookupdata nd;
2116 	char buf[SOCK_MAXADDRLEN];
2117 
2118 	*unp_ret = NULL;
2119 
2120 	len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
2121 	if (len <= 0) {
2122 		error = EINVAL;
2123 		goto failed;
2124 	}
2125 	strncpy(buf, soun->sun_path, len);
2126 	buf[len] = 0;
2127 
2128 	error = nlookup_init(&nd, buf, UIO_SYSSPACE, NLC_FOLLOW);
2129 	if (error == 0)
2130 		error = nlookup(&nd);
2131 	if (error == 0)
2132 		error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
2133 	nlookup_done(&nd);
2134 	if (error) {
2135 		vp = NULL;
2136 		goto failed;
2137 	}
2138 
2139 	if (vp->v_type != VSOCK) {
2140 		error = ENOTSOCK;
2141 		goto failed;
2142 	}
2143 	error = VOP_EACCESS(vp, VWRITE, p->p_ucred);
2144 	if (error)
2145 		goto failed;
2146 	so = vp->v_socket;
2147 	if (so == NULL) {
2148 		error = ECONNREFUSED;
2149 		goto failed;
2150 	}
2151 	if (so->so_type != type) {
2152 		error = EPROTOTYPE;
2153 		goto failed;
2154 	}
2155 
2156 	/* Lock this unp. */
2157 	unp = unp_getsocktoken(so);
2158 	if (!UNP_ISATTACHED(unp)) {
2159 		unp_reltoken(unp);
2160 		error = ECONNREFUSED;
2161 		goto failed;
2162 	}
2163 	/* And keep this unp referenced. */
2164 	unp_reference(unp);
2165 
2166 	/* Done! */
2167 	*unp_ret = unp;
2168 	error = 0;
2169 failed:
2170 	if (vp != NULL)
2171 		vput(vp);
2172 	return error;
2173 }
2174 
2175 static int
2176 unp_connect_pair(struct unpcb *unp, struct unpcb *unp2)
2177 {
2178 	struct socket *so = unp->unp_socket;
2179 	struct socket *so2 = unp2->unp_socket;
2180 
2181 	ASSERT_LWKT_TOKEN_HELD(&unp_token);
2182 	UNP_ASSERT_TOKEN_HELD(unp);
2183 	UNP_ASSERT_TOKEN_HELD(unp2);
2184 
2185 	KASSERT(so->so_type == so2->so_type,
2186 	    ("socket type mismatch, so %d, so2 %d", so->so_type, so2->so_type));
2187 
2188 	if (!UNP_ISATTACHED(unp))
2189 		return EINVAL;
2190 	if (!UNP_ISATTACHED(unp2))
2191 		return ECONNREFUSED;
2192 
2193 	KASSERT(unp->unp_conn == NULL, ("unp is already connected"));
2194 	unp->unp_conn = unp2;
2195 
2196 	switch (so->so_type) {
2197 	case SOCK_DGRAM:
2198 		LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
2199 		soisconnected(so);
2200 		break;
2201 
2202 	case SOCK_STREAM:
2203 	case SOCK_SEQPACKET:
2204 		KASSERT(unp2->unp_conn == NULL, ("unp2 is already connected"));
2205 		unp2->unp_conn = unp;
2206 		soisconnected(so);
2207 		soisconnected(so2);
2208 		break;
2209 
2210 	default:
2211 		panic("unp_connect_pair: unknown socket type %d", so->so_type);
2212 	}
2213 	return 0;
2214 }
2215 
2216 static void
2217 unp_drop(struct unpcb *unp, int error)
2218 {
2219 	struct unpcb *unp2;
2220 
2221 	ASSERT_LWKT_TOKEN_HELD(&unp_token);
2222 	UNP_ASSERT_TOKEN_HELD(unp);
2223 	KASSERT(unp->unp_flags & UNP_DETACHED, ("unp is not detached"));
2224 
2225 	unp_disconnect(unp, error);
2226 
2227 	while ((unp2 = LIST_FIRST(&unp->unp_refs)) != NULL) {
2228 		lwkt_getpooltoken(unp2);
2229 		unp_disconnect(unp2, ECONNRESET);
2230 		lwkt_relpooltoken(unp2);
2231 	}
2232 	unp_setflags(unp, UNP_DROPPED);
2233 }
2234