xref: /openbsd-src/sys/kern/uipc_usrreq.c (revision 25c4e8bd056e974b28f4a0ffd39d76c190a56013)
1 /*	$OpenBSD: uipc_usrreq.c,v 1.167 2022/07/02 11:49:23 mvs Exp $	*/
2 /*	$NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/filedesc.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/unpcb.h>
45 #include <sys/un.h>
46 #include <sys/namei.h>
47 #include <sys/vnode.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/mbuf.h>
51 #include <sys/task.h>
52 #include <sys/pledge.h>
53 #include <sys/pool.h>
54 #include <sys/rwlock.h>
55 #include <sys/mutex.h>
56 #include <sys/sysctl.h>
57 #include <sys/lock.h>
58 #include <sys/refcnt.h>
59 
60 #include "kcov.h"
61 #if NKCOV > 0
62 #include <sys/kcov.h>
63 #endif
64 
65 /*
66  * Locks used to protect global data and struct members:
67  *      I       immutable after creation
68  *      D       unp_df_lock
69  *      G       unp_gc_lock
70  *      M       unp_ino_mtx
71  *      R       unp_rights_mtx
72  *      a       atomic
73  *      s       socket lock
74  */
75 
76 struct rwlock unp_lock = RWLOCK_INITIALIZER("unplock");
77 struct rwlock unp_df_lock = RWLOCK_INITIALIZER("unpdflk");
78 struct rwlock unp_gc_lock = RWLOCK_INITIALIZER("unpgclk");
79 
80 struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
81 struct mutex unp_ino_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
82 
83 /*
84  * Stack of sets of files that were passed over a socket but were
85  * not received and need to be closed.
86  */
87 struct	unp_deferral {
88 	SLIST_ENTRY(unp_deferral)	ud_link;	/* [D] */
89 	int				ud_n;		/* [I] */
90 	/* followed by ud_n struct fdpass */
91 	struct fdpass			ud_fp[];	/* [I] */
92 };
93 
94 void	uipc_setaddr(const struct unpcb *, struct mbuf *);
95 void	unp_discard(struct fdpass *, int);
96 void	unp_remove_gcrefs(struct fdpass *, int);
97 void	unp_restore_gcrefs(struct fdpass *, int);
98 void	unp_scan(struct mbuf *, void (*)(struct fdpass *, int));
99 int	unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *);
100 static inline void unp_ref(struct unpcb *);
101 static inline void unp_rele(struct unpcb *);
102 struct socket *unp_solock_peer(struct socket *);
103 
104 struct pool unpcb_pool;
105 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL);
106 
107 /*
108  * Unix communications domain.
109  *
110  * TODO:
111  *	RDM
112  *	rethink name space problems
113  *	need a proper out-of-band
114  */
115 const struct	sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
116 
117 /* [G] list of all UNIX domain sockets, for unp_gc() */
118 LIST_HEAD(unp_head, unpcb)	unp_head =
119 	LIST_HEAD_INITIALIZER(unp_head);
120 /* [D] list of sets of files that were sent over sockets that are now closed */
121 SLIST_HEAD(,unp_deferral)	unp_deferred =
122 	SLIST_HEAD_INITIALIZER(unp_deferred);
123 
124 ino_t	unp_ino;	/* [U] prototype for fake inode numbers */
125 int	unp_rights;	/* [R] file descriptors in flight */
126 int	unp_defer;	/* [G] number of deferred fp to close by the GC task */
127 int	unp_gcing;	/* [G] GC task currently running */
128 
129 void
130 unp_init(void)
131 {
132 	pool_init(&unpcb_pool, sizeof(struct unpcb), 0,
133 	    IPL_SOFTNET, 0, "unpcb", NULL);
134 }
135 
136 static inline void
137 unp_ref(struct unpcb *unp)
138 {
139 	refcnt_take(&unp->unp_refcnt);
140 }
141 
142 static inline void
143 unp_rele(struct unpcb *unp)
144 {
145 	refcnt_rele_wake(&unp->unp_refcnt);
146 }
147 
148 struct socket *
149 unp_solock_peer(struct socket *so)
150 {
151 	struct unpcb *unp, *unp2;
152 	struct socket *so2;
153 
154 	unp = so->so_pcb;
155 
156 again:
157 	if ((unp2 = unp->unp_conn) == NULL)
158 		return NULL;
159 
160 	so2 = unp2->unp_socket;
161 
162 	if (so < so2)
163 		solock(so2);
164 	else if (so > so2){
165 		unp_ref(unp2);
166 		sounlock(so);
167 		solock(so2);
168 		solock(so);
169 
170 		/* Datagram socket could be reconnected due to re-lock. */
171 		if (unp->unp_conn != unp2) {
172 			sounlock(so2);
173 			unp_rele(unp2);
174 			goto again;
175 		}
176 
177 		unp_rele(unp2);
178 	}
179 
180 	return so2;
181 }
182 
183 void
184 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam)
185 {
186 	if (unp != NULL && unp->unp_addr != NULL) {
187 		nam->m_len = unp->unp_addr->m_len;
188 		memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t),
189 		    nam->m_len);
190 	} else {
191 		nam->m_len = sizeof(sun_noname);
192 		memcpy(mtod(nam, struct sockaddr *), &sun_noname,
193 		    nam->m_len);
194 	}
195 }
196 
197 int
198 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
199     struct mbuf *control, struct proc *p)
200 {
201 	struct unpcb *unp = sotounpcb(so);
202 	struct unpcb *unp2;
203 	struct socket *so2;
204 	int error = 0;
205 
206 	if (req == PRU_CONTROL)
207 		return (EOPNOTSUPP);
208 	if (req != PRU_SEND && control && control->m_len) {
209 		error = EOPNOTSUPP;
210 		goto release;
211 	}
212 	if (unp == NULL) {
213 		error = EINVAL;
214 		goto release;
215 	}
216 
217 	switch (req) {
218 
219 	case PRU_BIND:
220 		error = unp_bind(unp, nam, p);
221 		break;
222 
223 	case PRU_LISTEN:
224 		if (unp->unp_vnode == NULL)
225 			error = EINVAL;
226 		break;
227 
228 	case PRU_CONNECT:
229 		error = unp_connect(so, nam, p);
230 		break;
231 
232 	case PRU_CONNECT2:
233 		error = unp_connect2(so, (struct socket *)nam);
234 		if (!error) {
235 			unp->unp_connid.uid = p->p_ucred->cr_uid;
236 			unp->unp_connid.gid = p->p_ucred->cr_gid;
237 			unp->unp_connid.pid = p->p_p->ps_pid;
238 			unp->unp_flags |= UNP_FEIDS;
239 			unp2 = sotounpcb((struct socket *)nam);
240 			unp2->unp_connid.uid = p->p_ucred->cr_uid;
241 			unp2->unp_connid.gid = p->p_ucred->cr_gid;
242 			unp2->unp_connid.pid = p->p_p->ps_pid;
243 			unp2->unp_flags |= UNP_FEIDS;
244 		}
245 		break;
246 
247 	case PRU_DISCONNECT:
248 		unp_disconnect(unp);
249 		break;
250 
251 	case PRU_ACCEPT:
252 		/*
253 		 * Pass back name of connected socket,
254 		 * if it was bound and we are still connected
255 		 * (our peer may have closed already!).
256 		 */
257 		so2 = unp_solock_peer(so);
258 		uipc_setaddr(unp->unp_conn, nam);
259 		if (so2 != NULL && so2 != so)
260 			sounlock(so2);
261 		break;
262 
263 	case PRU_SHUTDOWN:
264 		socantsendmore(so);
265 		unp_shutdown(unp);
266 		break;
267 
268 	case PRU_RCVD:
269 		switch (so->so_type) {
270 
271 		case SOCK_DGRAM:
272 			panic("uipc 1");
273 			/*NOTREACHED*/
274 
275 		case SOCK_STREAM:
276 		case SOCK_SEQPACKET:
277 			if ((so2 = unp_solock_peer(so)) == NULL)
278 				break;
279 			/*
280 			 * Adjust backpressure on sender
281 			 * and wakeup any waiting to write.
282 			 */
283 			so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt;
284 			so2->so_snd.sb_cc = so->so_rcv.sb_cc;
285 			sowwakeup(so2);
286 			sounlock(so2);
287 			break;
288 
289 		default:
290 			panic("uipc 2");
291 		}
292 		break;
293 
294 	case PRU_SEND:
295 		if (control) {
296 			sounlock(so);
297 			error = unp_internalize(control, p);
298 			solock(so);
299 			if (error)
300 				break;
301 		}
302 		switch (so->so_type) {
303 
304 		case SOCK_DGRAM: {
305 			const struct sockaddr *from;
306 
307 			if (nam) {
308 				if (unp->unp_conn) {
309 					error = EISCONN;
310 					break;
311 				}
312 				error = unp_connect(so, nam, p);
313 				if (error)
314 					break;
315 			}
316 
317 			if ((so2 = unp_solock_peer(so)) == NULL) {
318 				if (nam != NULL)
319 					error = ECONNREFUSED;
320 				else
321 					error = ENOTCONN;
322 				break;
323 			}
324 
325 			if (unp->unp_addr)
326 				from = mtod(unp->unp_addr, struct sockaddr *);
327 			else
328 				from = &sun_noname;
329 			if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) {
330 				sorwakeup(so2);
331 				m = NULL;
332 				control = NULL;
333 			} else
334 				error = ENOBUFS;
335 
336 			if (so2 != so)
337 				sounlock(so2);
338 
339 			if (nam)
340 				unp_disconnect(unp);
341 			break;
342 		}
343 
344 		case SOCK_STREAM:
345 		case SOCK_SEQPACKET:
346 			if (so->so_state & SS_CANTSENDMORE) {
347 				error = EPIPE;
348 				break;
349 			}
350 			if ((so2 = unp_solock_peer(so)) == NULL) {
351 				error = ENOTCONN;
352 				break;
353 			}
354 
355 			/*
356 			 * Send to paired receive port, and then raise
357 			 * send buffer counts to maintain backpressure.
358 			 * Wake up readers.
359 			 */
360 			if (control) {
361 				if (sbappendcontrol(so2, &so2->so_rcv, m,
362 				    control)) {
363 					control = NULL;
364 				} else {
365 					sounlock(so2);
366 					error = ENOBUFS;
367 					break;
368 				}
369 			} else if (so->so_type == SOCK_SEQPACKET)
370 				sbappendrecord(so2, &so2->so_rcv, m);
371 			else
372 				sbappend(so2, &so2->so_rcv, m);
373 			so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt;
374 			so->so_snd.sb_cc = so2->so_rcv.sb_cc;
375 			if (so2->so_rcv.sb_cc > 0)
376 				sorwakeup(so2);
377 
378 			sounlock(so2);
379 			m = NULL;
380 			break;
381 
382 		default:
383 			panic("uipc 4");
384 		}
385 		/* we need to undo unp_internalize in case of errors */
386 		if (control && error)
387 			unp_dispose(control);
388 		break;
389 
390 	case PRU_ABORT:
391 		unp_detach(unp);
392 		sofree(so, 0);
393 		break;
394 
395 	case PRU_SENSE: {
396 		struct stat *sb = (struct stat *)m;
397 
398 		sb->st_blksize = so->so_snd.sb_hiwat;
399 		sb->st_dev = NODEV;
400 		mtx_enter(&unp_ino_mtx);
401 		if (unp->unp_ino == 0)
402 			unp->unp_ino = unp_ino++;
403 		mtx_leave(&unp_ino_mtx);
404 		sb->st_atim.tv_sec =
405 		    sb->st_mtim.tv_sec =
406 		    sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec;
407 		sb->st_atim.tv_nsec =
408 		    sb->st_mtim.tv_nsec =
409 		    sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec;
410 		sb->st_ino = unp->unp_ino;
411 		break;
412 	}
413 
414 	case PRU_RCVOOB:
415 	case PRU_SENDOOB:
416 		error = EOPNOTSUPP;
417 		break;
418 
419 	case PRU_SOCKADDR:
420 		uipc_setaddr(unp, nam);
421 		break;
422 
423 	case PRU_PEERADDR:
424 		so2 = unp_solock_peer(so);
425 		uipc_setaddr(unp->unp_conn, nam);
426 		if (so2 != NULL && so2 != so)
427 			sounlock(so2);
428 		break;
429 
430 	case PRU_SLOWTIMO:
431 		break;
432 
433 	default:
434 		panic("uipc_usrreq");
435 	}
436 release:
437 	if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) {
438 		m_freem(control);
439 		m_freem(m);
440 	}
441 	return (error);
442 }
443 
444 /*
445  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
446  * for stream sockets, although the total for sender and receiver is
447  * actually only PIPSIZ.
448  * Datagram sockets really use the sendspace as the maximum datagram size,
449  * and don't really want to reserve the sendspace.  Their recvspace should
450  * be large enough for at least one max-size datagram plus address.
451  */
452 #define	PIPSIZ	8192
453 u_int	unpst_sendspace = PIPSIZ;
454 u_int	unpst_recvspace = PIPSIZ;
455 u_int	unpsq_sendspace = PIPSIZ;
456 u_int	unpsq_recvspace = PIPSIZ;
457 u_int	unpdg_sendspace = 2*1024;	/* really max datagram size */
458 u_int	unpdg_recvspace = 16*1024;
459 
460 const struct sysctl_bounded_args unpstctl_vars[] = {
461 	{ UNPCTL_RECVSPACE, &unpst_recvspace, 0, SB_MAX },
462 	{ UNPCTL_SENDSPACE, &unpst_sendspace, 0, SB_MAX },
463 };
464 const struct sysctl_bounded_args unpsqctl_vars[] = {
465 	{ UNPCTL_RECVSPACE, &unpsq_recvspace, 0, SB_MAX },
466 	{ UNPCTL_SENDSPACE, &unpsq_sendspace, 0, SB_MAX },
467 };
468 const struct sysctl_bounded_args unpdgctl_vars[] = {
469 	{ UNPCTL_RECVSPACE, &unpdg_recvspace, 0, SB_MAX },
470 	{ UNPCTL_SENDSPACE, &unpdg_sendspace, 0, SB_MAX },
471 };
472 
473 int
474 uipc_attach(struct socket *so, int proto)
475 {
476 	struct unpcb *unp;
477 	int error;
478 
479 	if (so->so_pcb)
480 		return EISCONN;
481 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
482 		switch (so->so_type) {
483 
484 		case SOCK_STREAM:
485 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
486 			break;
487 
488 		case SOCK_SEQPACKET:
489 			error = soreserve(so, unpsq_sendspace, unpsq_recvspace);
490 			break;
491 
492 		case SOCK_DGRAM:
493 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
494 			break;
495 
496 		default:
497 			panic("unp_attach");
498 		}
499 		if (error)
500 			return (error);
501 	}
502 	unp = pool_get(&unpcb_pool, PR_NOWAIT|PR_ZERO);
503 	if (unp == NULL)
504 		return (ENOBUFS);
505 	refcnt_init(&unp->unp_refcnt);
506 	unp->unp_socket = so;
507 	so->so_pcb = unp;
508 	getnanotime(&unp->unp_ctime);
509 
510 	/*
511 	 * Enforce `unp_gc_lock' -> `solock()' lock order.
512 	 */
513 	sounlock(so);
514 	rw_enter_write(&unp_gc_lock);
515 	LIST_INSERT_HEAD(&unp_head, unp, unp_link);
516 	rw_exit_write(&unp_gc_lock);
517 	solock(so);
518 	return (0);
519 }
520 
521 int
522 uipc_detach(struct socket *so)
523 {
524 	struct unpcb *unp = sotounpcb(so);
525 
526 	if (unp == NULL)
527 		return (EINVAL);
528 
529 	unp_detach(unp);
530 
531 	return (0);
532 }
533 
534 int
535 uipc_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
536     size_t newlen)
537 {
538 	int *valp = &unp_defer;
539 
540 	/* All sysctl names at this level are terminal. */
541 	switch (name[0]) {
542 	case SOCK_STREAM:
543 		if (namelen != 2)
544 			return (ENOTDIR);
545 		return sysctl_bounded_arr(unpstctl_vars, nitems(unpstctl_vars),
546 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
547 	case SOCK_SEQPACKET:
548 		if (namelen != 2)
549 			return (ENOTDIR);
550 		return sysctl_bounded_arr(unpsqctl_vars, nitems(unpsqctl_vars),
551 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
552 	case SOCK_DGRAM:
553 		if (namelen != 2)
554 			return (ENOTDIR);
555 		return sysctl_bounded_arr(unpdgctl_vars, nitems(unpdgctl_vars),
556 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
557 	case NET_UNIX_INFLIGHT:
558 		valp = &unp_rights;
559 		/* FALLTHOUGH */
560 	case NET_UNIX_DEFERRED:
561 		if (namelen != 1)
562 			return (ENOTDIR);
563 		return sysctl_rdint(oldp, oldlenp, newp, *valp);
564 	default:
565 		return (ENOPROTOOPT);
566 	}
567 }
568 
569 void
570 unp_detach(struct unpcb *unp)
571 {
572 	struct socket *so = unp->unp_socket;
573 	struct vnode *vp = unp->unp_vnode;
574 	struct unpcb *unp2;
575 
576 	unp->unp_vnode = NULL;
577 
578 	/*
579 	 * Enforce `unp_gc_lock' -> `solock()' lock order.
580 	 * Enforce `i_lock' -> `solock()' lock order.
581 	 */
582 	sounlock(so);
583 
584 	rw_enter_write(&unp_gc_lock);
585 	LIST_REMOVE(unp, unp_link);
586 	rw_exit_write(&unp_gc_lock);
587 
588 	if (vp != NULL) {
589 		VOP_LOCK(vp, LK_EXCLUSIVE);
590 		vp->v_socket = NULL;
591 
592 		KERNEL_LOCK();
593 		vput(vp);
594 		KERNEL_UNLOCK();
595 	}
596 
597 	solock(so);
598 
599 	if (unp->unp_conn != NULL) {
600 		/*
601 		 * Datagram socket could be connected to itself.
602 		 * Such socket will be disconnected here.
603 		 */
604 		unp_disconnect(unp);
605 	}
606 
607 	while ((unp2 = SLIST_FIRST(&unp->unp_refs)) != NULL) {
608 		struct socket *so2 = unp2->unp_socket;
609 
610 		if (so < so2)
611 			solock(so2);
612 		else {
613 			unp_ref(unp2);
614 			sounlock(so);
615 			solock(so2);
616 			solock(so);
617 
618 			if (unp2->unp_conn != unp) {
619 				/* `unp2' was disconnected due to re-lock. */
620 				sounlock(so2);
621 				unp_rele(unp2);
622 				continue;
623 			}
624 
625 			unp_rele(unp2);
626 		}
627 
628 		unp2->unp_conn = NULL;
629 		SLIST_REMOVE(&unp->unp_refs, unp2, unpcb, unp_nextref);
630 		so2->so_error = ECONNRESET;
631 		so2->so_state &= ~SS_ISCONNECTED;
632 
633 		sounlock(so2);
634 	}
635 
636 	sounlock(so);
637 	refcnt_finalize(&unp->unp_refcnt, "unpfinal");
638 	solock(so);
639 
640 	soisdisconnected(so);
641 	so->so_pcb = NULL;
642 	m_freem(unp->unp_addr);
643 	pool_put(&unpcb_pool, unp);
644 	if (unp_rights)
645 		task_add(systqmp, &unp_gc_task);
646 }
647 
648 int
649 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p)
650 {
651 	struct sockaddr_un *soun;
652 	struct mbuf *nam2;
653 	struct vnode *vp;
654 	struct vattr vattr;
655 	int error;
656 	struct nameidata nd;
657 	size_t pathlen;
658 
659 	if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
660 		return (EINVAL);
661 	if (unp->unp_vnode != NULL)
662 		return (EINVAL);
663 	if ((error = unp_nam2sun(nam, &soun, &pathlen)))
664 		return (error);
665 
666 	unp->unp_flags |= UNP_BINDING;
667 
668 	/*
669 	 * Enforce `i_lock' -> `unplock' because fifo subsystem
670 	 * requires it. The socket can't be closed concurrently
671 	 * because the file descriptor reference is still held.
672 	 */
673 
674 	sounlock(unp->unp_socket);
675 
676 	nam2 = m_getclr(M_WAITOK, MT_SONAME);
677 	nam2->m_len = sizeof(struct sockaddr_un);
678 	memcpy(mtod(nam2, struct sockaddr_un *), soun,
679 	    offsetof(struct sockaddr_un, sun_path) + pathlen);
680 	/* No need to NUL terminate: m_getclr() returns zero'd mbufs. */
681 
682 	soun = mtod(nam2, struct sockaddr_un *);
683 
684 	/* Fixup sun_len to keep it in sync with m_len. */
685 	soun->sun_len = nam2->m_len;
686 
687 	NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE,
688 	    soun->sun_path, p);
689 	nd.ni_pledge = PLEDGE_UNIX;
690 
691 	KERNEL_LOCK();
692 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
693 	error = namei(&nd);
694 	if (error != 0) {
695 		m_freem(nam2);
696 		solock(unp->unp_socket);
697 		goto out;
698 	}
699 	vp = nd.ni_vp;
700 	if (vp != NULL) {
701 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
702 		if (nd.ni_dvp == vp)
703 			vrele(nd.ni_dvp);
704 		else
705 			vput(nd.ni_dvp);
706 		vrele(vp);
707 		m_freem(nam2);
708 		error = EADDRINUSE;
709 		solock(unp->unp_socket);
710 		goto out;
711 	}
712 	VATTR_NULL(&vattr);
713 	vattr.va_type = VSOCK;
714 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
715 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
716 	vput(nd.ni_dvp);
717 	if (error) {
718 		m_freem(nam2);
719 		solock(unp->unp_socket);
720 		goto out;
721 	}
722 	solock(unp->unp_socket);
723 	unp->unp_addr = nam2;
724 	vp = nd.ni_vp;
725 	vp->v_socket = unp->unp_socket;
726 	unp->unp_vnode = vp;
727 	unp->unp_connid.uid = p->p_ucred->cr_uid;
728 	unp->unp_connid.gid = p->p_ucred->cr_gid;
729 	unp->unp_connid.pid = p->p_p->ps_pid;
730 	unp->unp_flags |= UNP_FEIDSBIND;
731 	VOP_UNLOCK(vp);
732 out:
733 	KERNEL_UNLOCK();
734 	unp->unp_flags &= ~UNP_BINDING;
735 
736 	return (error);
737 }
738 
739 int
740 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
741 {
742 	struct sockaddr_un *soun;
743 	struct vnode *vp;
744 	struct socket *so2, *so3;
745 	struct unpcb *unp, *unp2, *unp3;
746 	struct nameidata nd;
747 	int error;
748 
749 	unp = sotounpcb(so);
750 	if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
751 		return (EISCONN);
752 	if ((error = unp_nam2sun(nam, &soun, NULL)))
753 		return (error);
754 
755 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
756 	nd.ni_pledge = PLEDGE_UNIX;
757 
758 	unp->unp_flags |= UNP_CONNECTING;
759 
760 	/*
761 	 * Enforce `i_lock' -> `unplock' because fifo subsystem
762 	 * requires it. The socket can't be closed concurrently
763 	 * because the file descriptor reference is still held.
764 	 */
765 
766 	sounlock(so);
767 
768 	KERNEL_LOCK();
769 	error = namei(&nd);
770 	if (error != 0)
771 		goto unlock;
772 	vp = nd.ni_vp;
773 	if (vp->v_type != VSOCK) {
774 		error = ENOTSOCK;
775 		goto put;
776 	}
777 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
778 		goto put;
779 	so2 = vp->v_socket;
780 	if (so2 == NULL) {
781 		error = ECONNREFUSED;
782 		goto put;
783 	}
784 	if (so->so_type != so2->so_type) {
785 		error = EPROTOTYPE;
786 		goto put;
787 	}
788 
789 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
790 		solock(so2);
791 
792 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
793 		    (so3 = sonewconn(so2, 0)) == NULL) {
794 			error = ECONNREFUSED;
795 		}
796 
797 		sounlock(so2);
798 
799 		if (error != 0)
800 			goto put;
801 
802 		/*
803 		 * Since `so2' is protected by vnode(9) lock, `so3'
804 		 * can't be PRU_ABORT'ed here.
805 		 */
806 		solock_pair(so, so3);
807 
808 		unp2 = sotounpcb(so2);
809 		unp3 = sotounpcb(so3);
810 
811 		/*
812 		 * `unp_addr', `unp_connid' and 'UNP_FEIDSBIND' flag
813 		 * are immutable since we set them in unp_bind().
814 		 */
815 		if (unp2->unp_addr)
816 			unp3->unp_addr =
817 			    m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT);
818 		unp3->unp_connid.uid = p->p_ucred->cr_uid;
819 		unp3->unp_connid.gid = p->p_ucred->cr_gid;
820 		unp3->unp_connid.pid = p->p_p->ps_pid;
821 		unp3->unp_flags |= UNP_FEIDS;
822 
823 		if (unp2->unp_flags & UNP_FEIDSBIND) {
824 			unp->unp_connid = unp2->unp_connid;
825 			unp->unp_flags |= UNP_FEIDS;
826 		}
827 
828 		so2 = so3;
829 	} else {
830 		if (so2 != so)
831 			solock_pair(so, so2);
832 		else
833 			solock(so);
834 	}
835 
836 	error = unp_connect2(so, so2);
837 
838 	sounlock(so);
839 
840 	/*
841 	 * `so2' can't be PRU_ABORT'ed concurrently
842 	 */
843 	if (so2 != so)
844 		sounlock(so2);
845 put:
846 	vput(vp);
847 unlock:
848 	KERNEL_UNLOCK();
849 	solock(so);
850 	unp->unp_flags &= ~UNP_CONNECTING;
851 
852 	/*
853 	 * The peer socket could be closed by concurrent thread
854 	 * when `so' and `vp' are unlocked.
855 	 */
856 	if (error == 0 && unp->unp_conn == NULL)
857 		error = ECONNREFUSED;
858 
859 	return (error);
860 }
861 
862 int
863 unp_connect2(struct socket *so, struct socket *so2)
864 {
865 	struct unpcb *unp = sotounpcb(so);
866 	struct unpcb *unp2;
867 
868 	soassertlocked(so);
869 	soassertlocked(so2);
870 
871 	if (so2->so_type != so->so_type)
872 		return (EPROTOTYPE);
873 	unp2 = sotounpcb(so2);
874 	unp->unp_conn = unp2;
875 	switch (so->so_type) {
876 
877 	case SOCK_DGRAM:
878 		SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref);
879 		soisconnected(so);
880 		break;
881 
882 	case SOCK_STREAM:
883 	case SOCK_SEQPACKET:
884 		unp2->unp_conn = unp;
885 		soisconnected(so);
886 		soisconnected(so2);
887 		break;
888 
889 	default:
890 		panic("unp_connect2");
891 	}
892 	return (0);
893 }
894 
895 void
896 unp_disconnect(struct unpcb *unp)
897 {
898 	struct socket *so2;
899 	struct unpcb *unp2;
900 
901 	if ((so2 = unp_solock_peer(unp->unp_socket)) == NULL)
902 		return;
903 
904 	unp2 = unp->unp_conn;
905 	unp->unp_conn = NULL;
906 
907 	switch (unp->unp_socket->so_type) {
908 
909 	case SOCK_DGRAM:
910 		SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref);
911 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
912 		break;
913 
914 	case SOCK_STREAM:
915 	case SOCK_SEQPACKET:
916 		unp->unp_socket->so_snd.sb_mbcnt = 0;
917 		unp->unp_socket->so_snd.sb_cc = 0;
918 		soisdisconnected(unp->unp_socket);
919 		unp2->unp_conn = NULL;
920 		unp2->unp_socket->so_snd.sb_mbcnt = 0;
921 		unp2->unp_socket->so_snd.sb_cc = 0;
922 		soisdisconnected(unp2->unp_socket);
923 		break;
924 	}
925 
926 	if (so2 != unp->unp_socket)
927 		sounlock(so2);
928 }
929 
930 void
931 unp_shutdown(struct unpcb *unp)
932 {
933 	struct socket *so2;
934 
935 	switch (unp->unp_socket->so_type) {
936 	case SOCK_STREAM:
937 	case SOCK_SEQPACKET:
938 		if ((so2 = unp_solock_peer(unp->unp_socket)) == NULL)
939 			break;
940 
941 		socantrcvmore(so2);
942 		sounlock(so2);
943 
944 		break;
945 	default:
946 		break;
947 	}
948 }
949 
950 #ifdef notdef
951 unp_drain(void)
952 {
953 
954 }
955 #endif
956 
957 static struct unpcb *
958 fptounp(struct file *fp)
959 {
960 	struct socket *so;
961 
962 	if (fp->f_type != DTYPE_SOCKET)
963 		return (NULL);
964 	if ((so = fp->f_data) == NULL)
965 		return (NULL);
966 	if (so->so_proto->pr_domain != &unixdomain)
967 		return (NULL);
968 	return (sotounpcb(so));
969 }
970 
971 int
972 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags)
973 {
974 	struct proc *p = curproc;		/* XXX */
975 	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
976 	struct filedesc *fdp = p->p_fd;
977 	int i, *fds = NULL;
978 	struct fdpass *rp;
979 	struct file *fp;
980 	int nfds, error = 0;
981 
982 	/*
983 	 * This code only works because SCM_RIGHTS is the only supported
984 	 * control message type on unix sockets. Enforce this here.
985 	 */
986 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET)
987 		return EINVAL;
988 
989 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
990 	    sizeof(struct fdpass);
991 	if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr)))
992 		controllen = 0;
993 	else
994 		controllen -= CMSG_ALIGN(sizeof(struct cmsghdr));
995 	if (nfds > controllen / sizeof(int)) {
996 		error = EMSGSIZE;
997 		goto out;
998 	}
999 
1000 	/* Make sure the recipient should be able to see the descriptors.. */
1001 	rp = (struct fdpass *)CMSG_DATA(cm);
1002 
1003 	/* fdp->fd_rdir requires KERNEL_LOCK() */
1004 	KERNEL_LOCK();
1005 
1006 	for (i = 0; i < nfds; i++) {
1007 		fp = rp->fp;
1008 		rp++;
1009 		error = pledge_recvfd(p, fp);
1010 		if (error)
1011 			break;
1012 
1013 		/*
1014 		 * No to block devices.  If passing a directory,
1015 		 * make sure that it is underneath the root.
1016 		 */
1017 		if (fdp->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) {
1018 			struct vnode *vp = (struct vnode *)fp->f_data;
1019 
1020 			if (vp->v_type == VBLK ||
1021 			    (vp->v_type == VDIR &&
1022 			    !vn_isunder(vp, fdp->fd_rdir, p))) {
1023 				error = EPERM;
1024 				break;
1025 			}
1026 		}
1027 	}
1028 
1029 	KERNEL_UNLOCK();
1030 
1031 	if (error)
1032 		goto out;
1033 
1034 	fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK);
1035 
1036 	fdplock(fdp);
1037 restart:
1038 	/*
1039 	 * First loop -- allocate file descriptor table slots for the
1040 	 * new descriptors.
1041 	 */
1042 	rp = ((struct fdpass *)CMSG_DATA(cm));
1043 	for (i = 0; i < nfds; i++) {
1044 		if ((error = fdalloc(p, 0, &fds[i])) != 0) {
1045 			/*
1046 			 * Back out what we've done so far.
1047 			 */
1048 			for (--i; i >= 0; i--)
1049 				fdremove(fdp, fds[i]);
1050 
1051 			if (error == ENOSPC) {
1052 				fdexpand(p);
1053 				goto restart;
1054 			}
1055 
1056 			fdpunlock(fdp);
1057 
1058 			/*
1059 			 * This is the error that has historically
1060 			 * been returned, and some callers may
1061 			 * expect it.
1062 			 */
1063 
1064 			error = EMSGSIZE;
1065 			goto out;
1066 		}
1067 
1068 		/*
1069 		 * Make the slot reference the descriptor so that
1070 		 * fdalloc() works properly.. We finalize it all
1071 		 * in the loop below.
1072 		 */
1073 		mtx_enter(&fdp->fd_fplock);
1074 		KASSERT(fdp->fd_ofiles[fds[i]] == NULL);
1075 		fdp->fd_ofiles[fds[i]] = rp->fp;
1076 		mtx_leave(&fdp->fd_fplock);
1077 
1078 		fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED);
1079 		if (flags & MSG_CMSG_CLOEXEC)
1080 			fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE;
1081 
1082 		rp++;
1083 	}
1084 
1085 	/*
1086 	 * Keep `fdp' locked to prevent concurrent close() of just
1087 	 * inserted descriptors. Such descriptors could have the only
1088 	 * `f_count' reference which is now shared between control
1089 	 * message and `fdp'.
1090 	 */
1091 
1092 	/*
1093 	 * Now that adding them has succeeded, update all of the
1094 	 * descriptor passing state.
1095 	 */
1096 	rp = (struct fdpass *)CMSG_DATA(cm);
1097 
1098 	for (i = 0; i < nfds; i++) {
1099 		struct unpcb *unp;
1100 
1101 		fp = rp->fp;
1102 		rp++;
1103 		if ((unp = fptounp(fp)) != NULL) {
1104 			rw_enter_write(&unp_gc_lock);
1105 			unp->unp_msgcount--;
1106 			rw_exit_write(&unp_gc_lock);
1107 		}
1108 	}
1109 	fdpunlock(fdp);
1110 
1111 	mtx_enter(&unp_rights_mtx);
1112 	unp_rights -= nfds;
1113 	mtx_leave(&unp_rights_mtx);
1114 
1115 	/*
1116 	 * Copy temporary array to message and adjust length, in case of
1117 	 * transition from large struct file pointers to ints.
1118 	 */
1119 	memcpy(CMSG_DATA(cm), fds, nfds * sizeof(int));
1120 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
1121 	rights->m_len = CMSG_LEN(nfds * sizeof(int));
1122  out:
1123 	if (fds != NULL)
1124 		free(fds, M_TEMP, nfds * sizeof(int));
1125 
1126 	if (error) {
1127 		if (nfds > 0) {
1128 			/*
1129 			 * No lock required. We are the only `cm' holder.
1130 			 */
1131 			rp = ((struct fdpass *)CMSG_DATA(cm));
1132 			unp_discard(rp, nfds);
1133 		}
1134 	}
1135 
1136 	return (error);
1137 }
1138 
1139 int
1140 unp_internalize(struct mbuf *control, struct proc *p)
1141 {
1142 	struct filedesc *fdp = p->p_fd;
1143 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1144 	struct fdpass *rp;
1145 	struct file *fp;
1146 	struct unpcb *unp;
1147 	int i, error;
1148 	int nfds, *ip, fd, neededspace;
1149 
1150 	/*
1151 	 * Check for two potential msg_controllen values because
1152 	 * IETF stuck their nose in a place it does not belong.
1153 	 */
1154 	if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0))
1155 		return (EINVAL);
1156 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1157 	    !(cm->cmsg_len == control->m_len ||
1158 	    control->m_len == CMSG_ALIGN(cm->cmsg_len)))
1159 		return (EINVAL);
1160 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
1161 
1162 	mtx_enter(&unp_rights_mtx);
1163 	if (unp_rights + nfds > maxfiles / 10) {
1164 		mtx_leave(&unp_rights_mtx);
1165 		return (EMFILE);
1166 	}
1167 	unp_rights += nfds;
1168 	mtx_leave(&unp_rights_mtx);
1169 
1170 	/* Make sure we have room for the struct file pointers */
1171 morespace:
1172 	neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) -
1173 	    control->m_len;
1174 	if (neededspace > m_trailingspace(control)) {
1175 		char *tmp;
1176 		/* if we already have a cluster, the message is just too big */
1177 		if (control->m_flags & M_EXT) {
1178 			error = E2BIG;
1179 			goto nospace;
1180 		}
1181 
1182 		/* copy cmsg data temporarily out of the mbuf */
1183 		tmp = malloc(control->m_len, M_TEMP, M_WAITOK);
1184 		memcpy(tmp, mtod(control, caddr_t), control->m_len);
1185 
1186 		/* allocate a cluster and try again */
1187 		MCLGET(control, M_WAIT);
1188 		if ((control->m_flags & M_EXT) == 0) {
1189 			free(tmp, M_TEMP, control->m_len);
1190 			error = ENOBUFS;       /* allocation failed */
1191 			goto nospace;
1192 		}
1193 
1194 		/* copy the data back into the cluster */
1195 		cm = mtod(control, struct cmsghdr *);
1196 		memcpy(cm, tmp, control->m_len);
1197 		free(tmp, M_TEMP, control->m_len);
1198 		goto morespace;
1199 	}
1200 
1201 	/* adjust message & mbuf to note amount of space actually used. */
1202 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass));
1203 	control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass));
1204 
1205 	ip = ((int *)CMSG_DATA(cm)) + nfds - 1;
1206 	rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1;
1207 	fdplock(fdp);
1208 	for (i = 0; i < nfds; i++) {
1209 		memcpy(&fd, ip, sizeof fd);
1210 		ip--;
1211 		if ((fp = fd_getfile(fdp, fd)) == NULL) {
1212 			error = EBADF;
1213 			goto fail;
1214 		}
1215 		if (fp->f_count >= FDUP_MAX_COUNT) {
1216 			error = EDEADLK;
1217 			goto fail;
1218 		}
1219 		error = pledge_sendfd(p, fp);
1220 		if (error)
1221 			goto fail;
1222 
1223 		/* kqueue descriptors cannot be copied */
1224 		if (fp->f_type == DTYPE_KQUEUE) {
1225 			error = EINVAL;
1226 			goto fail;
1227 		}
1228 #if NKCOV > 0
1229 		/* kcov descriptors cannot be copied */
1230 		if (fp->f_type == DTYPE_VNODE && kcov_vnode(fp->f_data)) {
1231 			error = EINVAL;
1232 			goto fail;
1233 		}
1234 #endif
1235 		rp->fp = fp;
1236 		rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED;
1237 		rp--;
1238 		if ((unp = fptounp(fp)) != NULL) {
1239 			rw_enter_write(&unp_gc_lock);
1240 			unp->unp_msgcount++;
1241 			unp->unp_file = fp;
1242 			rw_exit_write(&unp_gc_lock);
1243 		}
1244 	}
1245 	fdpunlock(fdp);
1246 	return (0);
1247 fail:
1248 	fdpunlock(fdp);
1249 	if (fp != NULL)
1250 		FRELE(fp, p);
1251 	/* Back out what we just did. */
1252 	for ( ; i > 0; i--) {
1253 		rp++;
1254 		fp = rp->fp;
1255 		if ((unp = fptounp(fp)) != NULL) {
1256 			rw_enter_write(&unp_gc_lock);
1257 			unp->unp_msgcount--;
1258 			rw_exit_write(&unp_gc_lock);
1259 		}
1260 		FRELE(fp, p);
1261 	}
1262 
1263 nospace:
1264 	mtx_enter(&unp_rights_mtx);
1265 	unp_rights -= nfds;
1266 	mtx_leave(&unp_rights_mtx);
1267 
1268 	return (error);
1269 }
1270 
1271 void
1272 unp_gc(void *arg __unused)
1273 {
1274 	struct unp_deferral *defer;
1275 	struct file *fp;
1276 	struct socket *so;
1277 	struct unpcb *unp;
1278 	int nunref, i;
1279 
1280 	rw_enter_write(&unp_gc_lock);
1281 	if (unp_gcing)
1282 		goto unlock;
1283 	unp_gcing = 1;
1284 	rw_exit_write(&unp_gc_lock);
1285 
1286 	rw_enter_write(&unp_df_lock);
1287 	/* close any fds on the deferred list */
1288 	while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) {
1289 		SLIST_REMOVE_HEAD(&unp_deferred, ud_link);
1290 		rw_exit_write(&unp_df_lock);
1291 		for (i = 0; i < defer->ud_n; i++) {
1292 			fp = defer->ud_fp[i].fp;
1293 			if (fp == NULL)
1294 				continue;
1295 			if ((unp = fptounp(fp)) != NULL) {
1296 				rw_enter_write(&unp_gc_lock);
1297 				unp->unp_msgcount--;
1298 				rw_exit_write(&unp_gc_lock);
1299 			}
1300 			mtx_enter(&unp_rights_mtx);
1301 			unp_rights--;
1302 			mtx_leave(&unp_rights_mtx);
1303 			 /* closef() expects a refcount of 2 */
1304 			FREF(fp);
1305 			(void) closef(fp, NULL);
1306 		}
1307 		free(defer, M_TEMP, sizeof(*defer) +
1308 		    sizeof(struct fdpass) * defer->ud_n);
1309 		rw_enter_write(&unp_df_lock);
1310 	}
1311 	rw_exit_write(&unp_df_lock);
1312 
1313 	nunref = 0;
1314 
1315 	rw_enter_write(&unp_gc_lock);
1316 
1317 	/*
1318 	 * Determine sockets which may be prospectively dead. Such
1319 	 * sockets have their `unp_msgcount' equal to the `f_count'.
1320 	 * If `unp_msgcount' is 0, the socket has not been passed
1321 	 * and can't be unreferenced.
1322 	 */
1323 	LIST_FOREACH(unp, &unp_head, unp_link) {
1324 		unp->unp_gcflags = 0;
1325 
1326 		if (unp->unp_msgcount == 0)
1327 			continue;
1328 		if ((fp = unp->unp_file) == NULL)
1329 			continue;
1330 		if (fp->f_count == unp->unp_msgcount) {
1331 			unp->unp_gcflags |= UNP_GCDEAD;
1332 			unp->unp_gcrefs = unp->unp_msgcount;
1333 			nunref++;
1334 		}
1335 	}
1336 
1337 	/*
1338 	 * Scan all sockets previously marked as dead. Remove
1339 	 * the `unp_gcrefs' reference each socket holds on any
1340 	 * dead socket in its buffer.
1341 	 */
1342 	LIST_FOREACH(unp, &unp_head, unp_link) {
1343 		if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1344 			continue;
1345 		so = unp->unp_socket;
1346 		solock(so);
1347 		unp_scan(so->so_rcv.sb_mb, unp_remove_gcrefs);
1348 		sounlock(so);
1349 	}
1350 
1351 	/*
1352 	 * If the dead socket has `unp_gcrefs' reference counter
1353 	 * greater than 0, it can't be unreferenced. Mark it as
1354 	 * alive and increment the `unp_gcrefs' reference for each
1355 	 * dead socket within its buffer. Repeat this until we
1356 	 * have no new alive sockets found.
1357 	 */
1358 	do {
1359 		unp_defer = 0;
1360 
1361 		LIST_FOREACH(unp, &unp_head, unp_link) {
1362 			if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1363 				continue;
1364 			if (unp->unp_gcrefs == 0)
1365 				continue;
1366 
1367 			unp->unp_gcflags &= ~UNP_GCDEAD;
1368 
1369 			so = unp->unp_socket;
1370 			solock(so);
1371 			unp_scan(so->so_rcv.sb_mb, unp_restore_gcrefs);
1372 			sounlock(so);
1373 
1374 			KASSERT(nunref > 0);
1375 			nunref--;
1376 		}
1377 	} while (unp_defer > 0);
1378 
1379 	/*
1380 	 * If there are any unreferenced sockets, then for each dispose
1381 	 * of files in its receive buffer and then close it.
1382 	 */
1383 	if (nunref) {
1384 		LIST_FOREACH(unp, &unp_head, unp_link) {
1385 			if (unp->unp_gcflags & UNP_GCDEAD) {
1386 				/*
1387 				 * This socket could still be connected
1388 				 * and if so it's `so_rcv' is still
1389 				 * accessible by concurrent PRU_SEND
1390 				 * thread.
1391 				 */
1392 				so = unp->unp_socket;
1393 				solock(so);
1394 				unp_scan(so->so_rcv.sb_mb, unp_discard);
1395 				sounlock(so);
1396 			}
1397 		}
1398 	}
1399 
1400 	unp_gcing = 0;
1401 unlock:
1402 	rw_exit_write(&unp_gc_lock);
1403 }
1404 
1405 void
1406 unp_dispose(struct mbuf *m)
1407 {
1408 
1409 	if (m)
1410 		unp_scan(m, unp_discard);
1411 }
1412 
1413 void
1414 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int))
1415 {
1416 	struct mbuf *m;
1417 	struct fdpass *rp;
1418 	struct cmsghdr *cm;
1419 	int qfds;
1420 
1421 	while (m0) {
1422 		for (m = m0; m; m = m->m_next) {
1423 			if (m->m_type == MT_CONTROL &&
1424 			    m->m_len >= sizeof(*cm)) {
1425 				cm = mtod(m, struct cmsghdr *);
1426 				if (cm->cmsg_level != SOL_SOCKET ||
1427 				    cm->cmsg_type != SCM_RIGHTS)
1428 					continue;
1429 				qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm))
1430 				    / sizeof(struct fdpass);
1431 				if (qfds > 0) {
1432 					rp = (struct fdpass *)CMSG_DATA(cm);
1433 					op(rp, qfds);
1434 				}
1435 				break;		/* XXX, but saves time */
1436 			}
1437 		}
1438 		m0 = m0->m_nextpkt;
1439 	}
1440 }
1441 
1442 void
1443 unp_discard(struct fdpass *rp, int nfds)
1444 {
1445 	struct unp_deferral *defer;
1446 
1447 	/* copy the file pointers to a deferral structure */
1448 	defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK);
1449 	defer->ud_n = nfds;
1450 	memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds);
1451 	memset(rp, 0, sizeof(*rp) * nfds);
1452 
1453 	rw_enter_write(&unp_df_lock);
1454 	SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link);
1455 	rw_exit_write(&unp_df_lock);
1456 
1457 	task_add(systqmp, &unp_gc_task);
1458 }
1459 
1460 void
1461 unp_remove_gcrefs(struct fdpass *rp, int nfds)
1462 {
1463 	struct unpcb *unp;
1464 	int i;
1465 
1466 	rw_assert_wrlock(&unp_gc_lock);
1467 
1468 	for (i = 0; i < nfds; i++) {
1469 		if (rp[i].fp == NULL)
1470 			continue;
1471 		if ((unp = fptounp(rp[i].fp)) == NULL)
1472 			continue;
1473 		if (unp->unp_gcflags & UNP_GCDEAD) {
1474 			KASSERT(unp->unp_gcrefs > 0);
1475 			unp->unp_gcrefs--;
1476 		}
1477 	}
1478 }
1479 
1480 void
1481 unp_restore_gcrefs(struct fdpass *rp, int nfds)
1482 {
1483 	struct unpcb *unp;
1484 	int i;
1485 
1486 	rw_assert_wrlock(&unp_gc_lock);
1487 
1488 	for (i = 0; i < nfds; i++) {
1489 		if (rp[i].fp == NULL)
1490 			continue;
1491 		if ((unp = fptounp(rp[i].fp)) == NULL)
1492 			continue;
1493 		if (unp->unp_gcflags & UNP_GCDEAD) {
1494 			unp->unp_gcrefs++;
1495 			unp_defer++;
1496 		}
1497 	}
1498 }
1499 
1500 int
1501 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen)
1502 {
1503 	struct sockaddr *sa = mtod(nam, struct sockaddr *);
1504 	size_t size, len;
1505 
1506 	if (nam->m_len < offsetof(struct sockaddr, sa_data))
1507 		return EINVAL;
1508 	if (sa->sa_family != AF_UNIX)
1509 		return EAFNOSUPPORT;
1510 	if (sa->sa_len != nam->m_len)
1511 		return EINVAL;
1512 	if (sa->sa_len > sizeof(struct sockaddr_un))
1513 		return EINVAL;
1514 	*sun = (struct sockaddr_un *)sa;
1515 
1516 	/* ensure that sun_path is NUL terminated and fits */
1517 	size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path);
1518 	len = strnlen((*sun)->sun_path, size);
1519 	if (len == sizeof((*sun)->sun_path))
1520 		return EINVAL;
1521 	if (len == size) {
1522 		if (m_trailingspace(nam) == 0)
1523 			return EINVAL;
1524 		nam->m_len++;
1525 		(*sun)->sun_len++;
1526 		(*sun)->sun_path[len] = '\0';
1527 	}
1528 	if (pathlen != NULL)
1529 		*pathlen = len;
1530 
1531 	return 0;
1532 }
1533