xref: /openbsd-src/sys/kern/uipc_usrreq.c (revision f84b1df5a16cdd762c93854218de246e79975d3b)
1 /*	$OpenBSD: uipc_usrreq.c,v 1.164 2022/04/11 18:18:17 mvs Exp $	*/
2 /*	$NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/filedesc.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/unpcb.h>
45 #include <sys/un.h>
46 #include <sys/namei.h>
47 #include <sys/vnode.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/mbuf.h>
51 #include <sys/task.h>
52 #include <sys/pledge.h>
53 #include <sys/pool.h>
54 #include <sys/rwlock.h>
55 #include <sys/mutex.h>
56 #include <sys/sysctl.h>
57 #include <sys/lock.h>
58 
59 #include "kcov.h"
60 #if NKCOV > 0
61 #include <sys/kcov.h>
62 #endif
63 
64 /*
65  * Locks used to protect global data and struct members:
66  *      I       immutable after creation
67  *      D       unp_df_lock
68  *      G       unp_gc_lock
69  *      U       unp_lock
70  *      R       unp_rights_mtx
71  *      a       atomic
72  */
73 
74 struct rwlock unp_lock = RWLOCK_INITIALIZER("unplock");
75 struct rwlock unp_df_lock = RWLOCK_INITIALIZER("unpdflk");
76 struct rwlock unp_gc_lock = RWLOCK_INITIALIZER("unpgclk");
77 
78 struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
79 
80 /*
81  * Stack of sets of files that were passed over a socket but were
82  * not received and need to be closed.
83  */
84 struct	unp_deferral {
85 	SLIST_ENTRY(unp_deferral)	ud_link;	/* [D] */
86 	int				ud_n;		/* [I] */
87 	/* followed by ud_n struct fdpass */
88 	struct fdpass			ud_fp[];	/* [I] */
89 };
90 
91 void	uipc_setaddr(const struct unpcb *, struct mbuf *);
92 void	unp_discard(struct fdpass *, int);
93 void	unp_remove_gcrefs(struct fdpass *, int);
94 void	unp_restore_gcrefs(struct fdpass *, int);
95 void	unp_scan(struct mbuf *, void (*)(struct fdpass *, int));
96 int	unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *);
97 
98 struct pool unpcb_pool;
99 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL);
100 
101 /*
102  * Unix communications domain.
103  *
104  * TODO:
105  *	RDM
106  *	rethink name space problems
107  *	need a proper out-of-band
108  */
109 const struct	sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
110 
111 /* [G] list of all UNIX domain sockets, for unp_gc() */
112 LIST_HEAD(unp_head, unpcb)	unp_head =
113 	LIST_HEAD_INITIALIZER(unp_head);
114 /* [D] list of sets of files that were sent over sockets that are now closed */
115 SLIST_HEAD(,unp_deferral)	unp_deferred =
116 	SLIST_HEAD_INITIALIZER(unp_deferred);
117 
118 ino_t	unp_ino;	/* [U] prototype for fake inode numbers */
119 int	unp_rights;	/* [R] file descriptors in flight */
120 int	unp_defer;	/* [G] number of deferred fp to close by the GC task */
121 int	unp_gcing;	/* [G] GC task currently running */
122 
123 void
124 unp_init(void)
125 {
126 	pool_init(&unpcb_pool, sizeof(struct unpcb), 0,
127 	    IPL_SOFTNET, 0, "unpcb", NULL);
128 }
129 
130 void
131 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam)
132 {
133 	if (unp != NULL && unp->unp_addr != NULL) {
134 		nam->m_len = unp->unp_addr->m_len;
135 		memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t),
136 		    nam->m_len);
137 	} else {
138 		nam->m_len = sizeof(sun_noname);
139 		memcpy(mtod(nam, struct sockaddr *), &sun_noname,
140 		    nam->m_len);
141 	}
142 }
143 
144 int
145 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
146     struct mbuf *control, struct proc *p)
147 {
148 	struct unpcb *unp = sotounpcb(so);
149 	struct unpcb *unp2;
150 	struct socket *so2;
151 	int error = 0;
152 
153 	if (req == PRU_CONTROL)
154 		return (EOPNOTSUPP);
155 	if (req != PRU_SEND && control && control->m_len) {
156 		error = EOPNOTSUPP;
157 		goto release;
158 	}
159 	if (unp == NULL) {
160 		error = EINVAL;
161 		goto release;
162 	}
163 
164 	switch (req) {
165 
166 	case PRU_BIND:
167 		error = unp_bind(unp, nam, p);
168 		break;
169 
170 	case PRU_LISTEN:
171 		if (unp->unp_vnode == NULL)
172 			error = EINVAL;
173 		break;
174 
175 	case PRU_CONNECT:
176 		error = unp_connect(so, nam, p);
177 		break;
178 
179 	case PRU_CONNECT2:
180 		error = unp_connect2(so, (struct socket *)nam);
181 		if (!error) {
182 			unp->unp_connid.uid = p->p_ucred->cr_uid;
183 			unp->unp_connid.gid = p->p_ucred->cr_gid;
184 			unp->unp_connid.pid = p->p_p->ps_pid;
185 			unp->unp_flags |= UNP_FEIDS;
186 			unp2 = sotounpcb((struct socket *)nam);
187 			unp2->unp_connid.uid = p->p_ucred->cr_uid;
188 			unp2->unp_connid.gid = p->p_ucred->cr_gid;
189 			unp2->unp_connid.pid = p->p_p->ps_pid;
190 			unp2->unp_flags |= UNP_FEIDS;
191 		}
192 		break;
193 
194 	case PRU_DISCONNECT:
195 		unp_disconnect(unp);
196 		break;
197 
198 	case PRU_ACCEPT:
199 		/*
200 		 * Pass back name of connected socket,
201 		 * if it was bound and we are still connected
202 		 * (our peer may have closed already!).
203 		 */
204 		uipc_setaddr(unp->unp_conn, nam);
205 		break;
206 
207 	case PRU_SHUTDOWN:
208 		socantsendmore(so);
209 		unp_shutdown(unp);
210 		break;
211 
212 	case PRU_RCVD:
213 		switch (so->so_type) {
214 
215 		case SOCK_DGRAM:
216 			panic("uipc 1");
217 			/*NOTREACHED*/
218 
219 		case SOCK_STREAM:
220 		case SOCK_SEQPACKET:
221 			if (unp->unp_conn == NULL)
222 				break;
223 			so2 = unp->unp_conn->unp_socket;
224 			/*
225 			 * Adjust backpressure on sender
226 			 * and wakeup any waiting to write.
227 			 */
228 			so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt;
229 			so2->so_snd.sb_cc = so->so_rcv.sb_cc;
230 			sowwakeup(so2);
231 			break;
232 
233 		default:
234 			panic("uipc 2");
235 		}
236 		break;
237 
238 	case PRU_SEND:
239 		if (control) {
240 			sounlock(so, SL_LOCKED);
241 			error = unp_internalize(control, p);
242 			solock(so);
243 			if (error)
244 				break;
245 		}
246 		switch (so->so_type) {
247 
248 		case SOCK_DGRAM: {
249 			const struct sockaddr *from;
250 
251 			if (nam) {
252 				if (unp->unp_conn) {
253 					error = EISCONN;
254 					break;
255 				}
256 				error = unp_connect(so, nam, p);
257 				if (error)
258 					break;
259 			} else {
260 				if (unp->unp_conn == NULL) {
261 					error = ENOTCONN;
262 					break;
263 				}
264 			}
265 			so2 = unp->unp_conn->unp_socket;
266 			if (unp->unp_addr)
267 				from = mtod(unp->unp_addr, struct sockaddr *);
268 			else
269 				from = &sun_noname;
270 			if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) {
271 				sorwakeup(so2);
272 				m = NULL;
273 				control = NULL;
274 			} else
275 				error = ENOBUFS;
276 			if (nam)
277 				unp_disconnect(unp);
278 			break;
279 		}
280 
281 		case SOCK_STREAM:
282 		case SOCK_SEQPACKET:
283 			if (so->so_state & SS_CANTSENDMORE) {
284 				error = EPIPE;
285 				break;
286 			}
287 			if (unp->unp_conn == NULL) {
288 				error = ENOTCONN;
289 				break;
290 			}
291 			so2 = unp->unp_conn->unp_socket;
292 			/*
293 			 * Send to paired receive port, and then raise
294 			 * send buffer counts to maintain backpressure.
295 			 * Wake up readers.
296 			 */
297 			if (control) {
298 				if (sbappendcontrol(so2, &so2->so_rcv, m,
299 				    control)) {
300 					control = NULL;
301 				} else {
302 					error = ENOBUFS;
303 					break;
304 				}
305 			} else if (so->so_type == SOCK_SEQPACKET)
306 				sbappendrecord(so2, &so2->so_rcv, m);
307 			else
308 				sbappend(so2, &so2->so_rcv, m);
309 			so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt;
310 			so->so_snd.sb_cc = so2->so_rcv.sb_cc;
311 			if (so2->so_rcv.sb_cc > 0)
312 				sorwakeup(so2);
313 			m = NULL;
314 			break;
315 
316 		default:
317 			panic("uipc 4");
318 		}
319 		/* we need to undo unp_internalize in case of errors */
320 		if (control && error)
321 			unp_dispose(control);
322 		break;
323 
324 	case PRU_ABORT:
325 		unp_detach(unp);
326 		/*
327 		 * As long as `unp_lock' is taken before entering
328 		 * uipc_usrreq() releasing it here would lead to a
329 		 * double unlock.
330 		 */
331 		sofree(so, SL_NOUNLOCK);
332 		break;
333 
334 	case PRU_SENSE: {
335 		struct stat *sb = (struct stat *)m;
336 
337 		sb->st_blksize = so->so_snd.sb_hiwat;
338 		sb->st_dev = NODEV;
339 		if (unp->unp_ino == 0)
340 			unp->unp_ino = unp_ino++;
341 		sb->st_atim.tv_sec =
342 		    sb->st_mtim.tv_sec =
343 		    sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec;
344 		sb->st_atim.tv_nsec =
345 		    sb->st_mtim.tv_nsec =
346 		    sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec;
347 		sb->st_ino = unp->unp_ino;
348 		break;
349 	}
350 
351 	case PRU_RCVOOB:
352 	case PRU_SENDOOB:
353 		error = EOPNOTSUPP;
354 		break;
355 
356 	case PRU_SOCKADDR:
357 		uipc_setaddr(unp, nam);
358 		break;
359 
360 	case PRU_PEERADDR:
361 		uipc_setaddr(unp->unp_conn, nam);
362 		break;
363 
364 	case PRU_SLOWTIMO:
365 		break;
366 
367 	default:
368 		panic("uipc_usrreq");
369 	}
370 release:
371 	if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) {
372 		m_freem(control);
373 		m_freem(m);
374 	}
375 	return (error);
376 }
377 
378 /*
379  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
380  * for stream sockets, although the total for sender and receiver is
381  * actually only PIPSIZ.
382  * Datagram sockets really use the sendspace as the maximum datagram size,
383  * and don't really want to reserve the sendspace.  Their recvspace should
384  * be large enough for at least one max-size datagram plus address.
385  */
386 #define	PIPSIZ	8192
387 u_int	unpst_sendspace = PIPSIZ;
388 u_int	unpst_recvspace = PIPSIZ;
389 u_int	unpsq_sendspace = PIPSIZ;
390 u_int	unpsq_recvspace = PIPSIZ;
391 u_int	unpdg_sendspace = 2*1024;	/* really max datagram size */
392 u_int	unpdg_recvspace = 16*1024;
393 
394 const struct sysctl_bounded_args unpstctl_vars[] = {
395 	{ UNPCTL_RECVSPACE, &unpst_recvspace, 0, SB_MAX },
396 	{ UNPCTL_SENDSPACE, &unpst_sendspace, 0, SB_MAX },
397 };
398 const struct sysctl_bounded_args unpsqctl_vars[] = {
399 	{ UNPCTL_RECVSPACE, &unpsq_recvspace, 0, SB_MAX },
400 	{ UNPCTL_SENDSPACE, &unpsq_sendspace, 0, SB_MAX },
401 };
402 const struct sysctl_bounded_args unpdgctl_vars[] = {
403 	{ UNPCTL_RECVSPACE, &unpdg_recvspace, 0, SB_MAX },
404 	{ UNPCTL_SENDSPACE, &unpdg_sendspace, 0, SB_MAX },
405 };
406 
407 int
408 uipc_attach(struct socket *so, int proto)
409 {
410 	struct unpcb *unp;
411 	int error;
412 
413 	rw_assert_wrlock(&unp_lock);
414 
415 	if (so->so_pcb)
416 		return EISCONN;
417 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
418 		switch (so->so_type) {
419 
420 		case SOCK_STREAM:
421 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
422 			break;
423 
424 		case SOCK_SEQPACKET:
425 			error = soreserve(so, unpsq_sendspace, unpsq_recvspace);
426 			break;
427 
428 		case SOCK_DGRAM:
429 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
430 			break;
431 
432 		default:
433 			panic("unp_attach");
434 		}
435 		if (error)
436 			return (error);
437 	}
438 	unp = pool_get(&unpcb_pool, PR_NOWAIT|PR_ZERO);
439 	if (unp == NULL)
440 		return (ENOBUFS);
441 	unp->unp_socket = so;
442 	so->so_pcb = unp;
443 	getnanotime(&unp->unp_ctime);
444 
445 	/*
446 	 * Enforce `unp_gc_lock' -> `solock()' lock order.
447 	 */
448 	/*
449 	 * We also release the lock on listening socket and on our peer
450 	 * socket when called from unp_connect(). This is safe. The
451 	 * listening socket protected by vnode(9) lock. The peer socket
452 	 * has 'UNP_CONNECTING' flag set.
453 	 */
454 	sounlock(so, SL_LOCKED);
455 	rw_enter_write(&unp_gc_lock);
456 	LIST_INSERT_HEAD(&unp_head, unp, unp_link);
457 	rw_exit_write(&unp_gc_lock);
458 	solock(so);
459 	return (0);
460 }
461 
462 int
463 uipc_detach(struct socket *so)
464 {
465 	struct unpcb *unp = sotounpcb(so);
466 
467 	if (unp == NULL)
468 		return (EINVAL);
469 
470 	unp_detach(unp);
471 
472 	return (0);
473 }
474 
475 int
476 uipc_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
477     size_t newlen)
478 {
479 	int *valp = &unp_defer;
480 
481 	/* All sysctl names at this level are terminal. */
482 	switch (name[0]) {
483 	case SOCK_STREAM:
484 		if (namelen != 2)
485 			return (ENOTDIR);
486 		return sysctl_bounded_arr(unpstctl_vars, nitems(unpstctl_vars),
487 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
488 	case SOCK_SEQPACKET:
489 		if (namelen != 2)
490 			return (ENOTDIR);
491 		return sysctl_bounded_arr(unpsqctl_vars, nitems(unpsqctl_vars),
492 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
493 	case SOCK_DGRAM:
494 		if (namelen != 2)
495 			return (ENOTDIR);
496 		return sysctl_bounded_arr(unpdgctl_vars, nitems(unpdgctl_vars),
497 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
498 	case NET_UNIX_INFLIGHT:
499 		valp = &unp_rights;
500 		/* FALLTHOUGH */
501 	case NET_UNIX_DEFERRED:
502 		if (namelen != 1)
503 			return (ENOTDIR);
504 		return sysctl_rdint(oldp, oldlenp, newp, *valp);
505 	default:
506 		return (ENOPROTOOPT);
507 	}
508 }
509 
510 void
511 unp_detach(struct unpcb *unp)
512 {
513 	struct socket *so = unp->unp_socket;
514 	struct vnode *vp = unp->unp_vnode;
515 
516 	rw_assert_wrlock(&unp_lock);
517 
518 	unp->unp_vnode = NULL;
519 
520 	/*
521 	 * Enforce `unp_gc_lock' -> `solock()' lock order.
522 	 * Enforce `i_lock' -> `unp_lock' lock order.
523 	 */
524 	sounlock(so, SL_LOCKED);
525 
526 	rw_enter_write(&unp_gc_lock);
527 	LIST_REMOVE(unp, unp_link);
528 	rw_exit_write(&unp_gc_lock);
529 
530 	if (vp != NULL) {
531 		VOP_LOCK(vp, LK_EXCLUSIVE);
532 		vp->v_socket = NULL;
533 
534 		KERNEL_LOCK();
535 		vput(vp);
536 		KERNEL_UNLOCK();
537 	}
538 
539 	solock(so);
540 
541 	if (unp->unp_conn)
542 		unp_disconnect(unp);
543 	while (!SLIST_EMPTY(&unp->unp_refs))
544 		unp_drop(SLIST_FIRST(&unp->unp_refs), ECONNRESET);
545 	soisdisconnected(so);
546 	so->so_pcb = NULL;
547 	m_freem(unp->unp_addr);
548 	pool_put(&unpcb_pool, unp);
549 	if (unp_rights)
550 		task_add(systqmp, &unp_gc_task);
551 }
552 
553 int
554 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p)
555 {
556 	struct sockaddr_un *soun;
557 	struct mbuf *nam2;
558 	struct vnode *vp;
559 	struct vattr vattr;
560 	int error;
561 	struct nameidata nd;
562 	size_t pathlen;
563 
564 	if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
565 		return (EINVAL);
566 	if (unp->unp_vnode != NULL)
567 		return (EINVAL);
568 	if ((error = unp_nam2sun(nam, &soun, &pathlen)))
569 		return (error);
570 
571 	unp->unp_flags |= UNP_BINDING;
572 
573 	/*
574 	 * Enforce `i_lock' -> `unplock' because fifo subsystem
575 	 * requires it. The socket can't be closed concurrently
576 	 * because the file descriptor reference is still held.
577 	 */
578 
579 	sounlock(unp->unp_socket, SL_LOCKED);
580 
581 	nam2 = m_getclr(M_WAITOK, MT_SONAME);
582 	nam2->m_len = sizeof(struct sockaddr_un);
583 	memcpy(mtod(nam2, struct sockaddr_un *), soun,
584 	    offsetof(struct sockaddr_un, sun_path) + pathlen);
585 	/* No need to NUL terminate: m_getclr() returns zero'd mbufs. */
586 
587 	soun = mtod(nam2, struct sockaddr_un *);
588 
589 	/* Fixup sun_len to keep it in sync with m_len. */
590 	soun->sun_len = nam2->m_len;
591 
592 	NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE,
593 	    soun->sun_path, p);
594 	nd.ni_pledge = PLEDGE_UNIX;
595 
596 	KERNEL_LOCK();
597 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
598 	error = namei(&nd);
599 	if (error != 0) {
600 		m_freem(nam2);
601 		solock(unp->unp_socket);
602 		goto out;
603 	}
604 	vp = nd.ni_vp;
605 	if (vp != NULL) {
606 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
607 		if (nd.ni_dvp == vp)
608 			vrele(nd.ni_dvp);
609 		else
610 			vput(nd.ni_dvp);
611 		vrele(vp);
612 		m_freem(nam2);
613 		error = EADDRINUSE;
614 		solock(unp->unp_socket);
615 		goto out;
616 	}
617 	VATTR_NULL(&vattr);
618 	vattr.va_type = VSOCK;
619 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
620 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
621 	vput(nd.ni_dvp);
622 	if (error) {
623 		m_freem(nam2);
624 		solock(unp->unp_socket);
625 		goto out;
626 	}
627 	solock(unp->unp_socket);
628 	unp->unp_addr = nam2;
629 	vp = nd.ni_vp;
630 	vp->v_socket = unp->unp_socket;
631 	unp->unp_vnode = vp;
632 	unp->unp_connid.uid = p->p_ucred->cr_uid;
633 	unp->unp_connid.gid = p->p_ucred->cr_gid;
634 	unp->unp_connid.pid = p->p_p->ps_pid;
635 	unp->unp_flags |= UNP_FEIDSBIND;
636 	VOP_UNLOCK(vp);
637 out:
638 	KERNEL_UNLOCK();
639 	unp->unp_flags &= ~UNP_BINDING;
640 
641 	return (error);
642 }
643 
644 int
645 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
646 {
647 	struct sockaddr_un *soun;
648 	struct vnode *vp;
649 	struct socket *so2, *so3;
650 	struct unpcb *unp, *unp2, *unp3;
651 	struct nameidata nd;
652 	int error;
653 
654 	unp = sotounpcb(so);
655 	if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
656 		return (EISCONN);
657 	if ((error = unp_nam2sun(nam, &soun, NULL)))
658 		return (error);
659 
660 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
661 	nd.ni_pledge = PLEDGE_UNIX;
662 
663 	unp->unp_flags |= UNP_CONNECTING;
664 
665 	/*
666 	 * Enforce `i_lock' -> `unplock' because fifo subsystem
667 	 * requires it. The socket can't be closed concurrently
668 	 * because the file descriptor reference is still held.
669 	 */
670 
671 	sounlock(so, SL_LOCKED);
672 
673 	KERNEL_LOCK();
674 	error = namei(&nd);
675 	if (error != 0)
676 		goto unlock;
677 	vp = nd.ni_vp;
678 	if (vp->v_type != VSOCK) {
679 		error = ENOTSOCK;
680 		goto put;
681 	}
682 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
683 		goto put;
684 	solock(so);
685 	so2 = vp->v_socket;
686 	if (so2 == NULL) {
687 		error = ECONNREFUSED;
688 		goto put_locked;
689 	}
690 	if (so->so_type != so2->so_type) {
691 		error = EPROTOTYPE;
692 		goto put_locked;
693 	}
694 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
695 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
696 		    (so3 = sonewconn(so2, 0)) == NULL) {
697 			error = ECONNREFUSED;
698 			goto put_locked;
699 		}
700 		unp2 = sotounpcb(so2);
701 		unp3 = sotounpcb(so3);
702 		if (unp2->unp_addr)
703 			unp3->unp_addr =
704 			    m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT);
705 		unp3->unp_connid.uid = p->p_ucred->cr_uid;
706 		unp3->unp_connid.gid = p->p_ucred->cr_gid;
707 		unp3->unp_connid.pid = p->p_p->ps_pid;
708 		unp3->unp_flags |= UNP_FEIDS;
709 		so2 = so3;
710 		if (unp2->unp_flags & UNP_FEIDSBIND) {
711 			unp->unp_connid = unp2->unp_connid;
712 			unp->unp_flags |= UNP_FEIDS;
713 		}
714 	}
715 	error = unp_connect2(so, so2);
716 put_locked:
717 	sounlock(so, SL_LOCKED);
718 put:
719 	vput(vp);
720 unlock:
721 	KERNEL_UNLOCK();
722 	solock(so);
723 	unp->unp_flags &= ~UNP_CONNECTING;
724 
725 	/*
726 	 * The peer socket could be closed by concurrent thread
727 	 * when `so' and `vp' are unlocked.
728 	 */
729 	if (error == 0 && unp->unp_conn == NULL)
730 		error = ECONNREFUSED;
731 
732 	return (error);
733 }
734 
735 int
736 unp_connect2(struct socket *so, struct socket *so2)
737 {
738 	struct unpcb *unp = sotounpcb(so);
739 	struct unpcb *unp2;
740 
741 	rw_assert_wrlock(&unp_lock);
742 
743 	if (so2->so_type != so->so_type)
744 		return (EPROTOTYPE);
745 	unp2 = sotounpcb(so2);
746 	unp->unp_conn = unp2;
747 	switch (so->so_type) {
748 
749 	case SOCK_DGRAM:
750 		SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref);
751 		soisconnected(so);
752 		break;
753 
754 	case SOCK_STREAM:
755 	case SOCK_SEQPACKET:
756 		unp2->unp_conn = unp;
757 		soisconnected(so);
758 		soisconnected(so2);
759 		break;
760 
761 	default:
762 		panic("unp_connect2");
763 	}
764 	return (0);
765 }
766 
767 void
768 unp_disconnect(struct unpcb *unp)
769 {
770 	struct unpcb *unp2 = unp->unp_conn;
771 
772 	if (unp2 == NULL)
773 		return;
774 	unp->unp_conn = NULL;
775 	switch (unp->unp_socket->so_type) {
776 
777 	case SOCK_DGRAM:
778 		SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref);
779 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
780 		break;
781 
782 	case SOCK_STREAM:
783 	case SOCK_SEQPACKET:
784 		unp->unp_socket->so_snd.sb_mbcnt = 0;
785 		unp->unp_socket->so_snd.sb_cc = 0;
786 		soisdisconnected(unp->unp_socket);
787 		unp2->unp_conn = NULL;
788 		unp2->unp_socket->so_snd.sb_mbcnt = 0;
789 		unp2->unp_socket->so_snd.sb_cc = 0;
790 		soisdisconnected(unp2->unp_socket);
791 		break;
792 	}
793 }
794 
795 void
796 unp_shutdown(struct unpcb *unp)
797 {
798 	struct socket *so;
799 
800 	switch (unp->unp_socket->so_type) {
801 	case SOCK_STREAM:
802 	case SOCK_SEQPACKET:
803 		if (unp->unp_conn && (so = unp->unp_conn->unp_socket))
804 			socantrcvmore(so);
805 		break;
806 	default:
807 		break;
808 	}
809 }
810 
811 void
812 unp_drop(struct unpcb *unp, int errno)
813 {
814 	struct socket *so = unp->unp_socket;
815 
816 	rw_assert_wrlock(&unp_lock);
817 
818 	so->so_error = errno;
819 	unp_disconnect(unp);
820 }
821 
822 #ifdef notdef
823 unp_drain(void)
824 {
825 
826 }
827 #endif
828 
829 static struct unpcb *
830 fptounp(struct file *fp)
831 {
832 	struct socket *so;
833 
834 	if (fp->f_type != DTYPE_SOCKET)
835 		return (NULL);
836 	if ((so = fp->f_data) == NULL)
837 		return (NULL);
838 	if (so->so_proto->pr_domain != &unixdomain)
839 		return (NULL);
840 	return (sotounpcb(so));
841 }
842 
843 int
844 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags)
845 {
846 	struct proc *p = curproc;		/* XXX */
847 	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
848 	struct filedesc *fdp = p->p_fd;
849 	int i, *fds = NULL;
850 	struct fdpass *rp;
851 	struct file *fp;
852 	int nfds, error = 0;
853 
854 	/*
855 	 * This code only works because SCM_RIGHTS is the only supported
856 	 * control message type on unix sockets. Enforce this here.
857 	 */
858 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET)
859 		return EINVAL;
860 
861 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
862 	    sizeof(struct fdpass);
863 	if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr)))
864 		controllen = 0;
865 	else
866 		controllen -= CMSG_ALIGN(sizeof(struct cmsghdr));
867 	if (nfds > controllen / sizeof(int)) {
868 		error = EMSGSIZE;
869 		goto out;
870 	}
871 
872 	/* Make sure the recipient should be able to see the descriptors.. */
873 	rp = (struct fdpass *)CMSG_DATA(cm);
874 
875 	/* fdp->fd_rdir requires KERNEL_LOCK() */
876 	KERNEL_LOCK();
877 
878 	for (i = 0; i < nfds; i++) {
879 		fp = rp->fp;
880 		rp++;
881 		error = pledge_recvfd(p, fp);
882 		if (error)
883 			break;
884 
885 		/*
886 		 * No to block devices.  If passing a directory,
887 		 * make sure that it is underneath the root.
888 		 */
889 		if (fdp->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) {
890 			struct vnode *vp = (struct vnode *)fp->f_data;
891 
892 			if (vp->v_type == VBLK ||
893 			    (vp->v_type == VDIR &&
894 			    !vn_isunder(vp, fdp->fd_rdir, p))) {
895 				error = EPERM;
896 				break;
897 			}
898 		}
899 	}
900 
901 	KERNEL_UNLOCK();
902 
903 	if (error)
904 		goto out;
905 
906 	fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK);
907 
908 	fdplock(fdp);
909 restart:
910 	/*
911 	 * First loop -- allocate file descriptor table slots for the
912 	 * new descriptors.
913 	 */
914 	rp = ((struct fdpass *)CMSG_DATA(cm));
915 	for (i = 0; i < nfds; i++) {
916 		if ((error = fdalloc(p, 0, &fds[i])) != 0) {
917 			/*
918 			 * Back out what we've done so far.
919 			 */
920 			for (--i; i >= 0; i--)
921 				fdremove(fdp, fds[i]);
922 
923 			if (error == ENOSPC) {
924 				fdexpand(p);
925 				goto restart;
926 			}
927 
928 			fdpunlock(fdp);
929 
930 			/*
931 			 * This is the error that has historically
932 			 * been returned, and some callers may
933 			 * expect it.
934 			 */
935 
936 			error = EMSGSIZE;
937 			goto out;
938 		}
939 
940 		/*
941 		 * Make the slot reference the descriptor so that
942 		 * fdalloc() works properly.. We finalize it all
943 		 * in the loop below.
944 		 */
945 		mtx_enter(&fdp->fd_fplock);
946 		KASSERT(fdp->fd_ofiles[fds[i]] == NULL);
947 		fdp->fd_ofiles[fds[i]] = rp->fp;
948 		mtx_leave(&fdp->fd_fplock);
949 
950 		fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED);
951 		if (flags & MSG_CMSG_CLOEXEC)
952 			fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE;
953 
954 		rp++;
955 	}
956 
957 	/*
958 	 * Keep `fdp' locked to prevent concurrent close() of just
959 	 * inserted descriptors. Such descriptors could have the only
960 	 * `f_count' reference which is now shared between control
961 	 * message and `fdp'.
962 	 */
963 
964 	/*
965 	 * Now that adding them has succeeded, update all of the
966 	 * descriptor passing state.
967 	 */
968 	rp = (struct fdpass *)CMSG_DATA(cm);
969 
970 	for (i = 0; i < nfds; i++) {
971 		struct unpcb *unp;
972 
973 		fp = rp->fp;
974 		rp++;
975 		if ((unp = fptounp(fp)) != NULL) {
976 			rw_enter_write(&unp_gc_lock);
977 			unp->unp_msgcount--;
978 			rw_exit_write(&unp_gc_lock);
979 		}
980 	}
981 	fdpunlock(fdp);
982 
983 	mtx_enter(&unp_rights_mtx);
984 	unp_rights -= nfds;
985 	mtx_leave(&unp_rights_mtx);
986 
987 	/*
988 	 * Copy temporary array to message and adjust length, in case of
989 	 * transition from large struct file pointers to ints.
990 	 */
991 	memcpy(CMSG_DATA(cm), fds, nfds * sizeof(int));
992 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
993 	rights->m_len = CMSG_LEN(nfds * sizeof(int));
994  out:
995 	if (fds != NULL)
996 		free(fds, M_TEMP, nfds * sizeof(int));
997 
998 	if (error) {
999 		if (nfds > 0) {
1000 			/*
1001 			 * No lock required. We are the only `cm' holder.
1002 			 */
1003 			rp = ((struct fdpass *)CMSG_DATA(cm));
1004 			unp_discard(rp, nfds);
1005 		}
1006 	}
1007 
1008 	return (error);
1009 }
1010 
1011 int
1012 unp_internalize(struct mbuf *control, struct proc *p)
1013 {
1014 	struct filedesc *fdp = p->p_fd;
1015 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1016 	struct fdpass *rp;
1017 	struct file *fp;
1018 	struct unpcb *unp;
1019 	int i, error;
1020 	int nfds, *ip, fd, neededspace;
1021 
1022 	/*
1023 	 * Check for two potential msg_controllen values because
1024 	 * IETF stuck their nose in a place it does not belong.
1025 	 */
1026 	if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0))
1027 		return (EINVAL);
1028 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1029 	    !(cm->cmsg_len == control->m_len ||
1030 	    control->m_len == CMSG_ALIGN(cm->cmsg_len)))
1031 		return (EINVAL);
1032 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
1033 
1034 	mtx_enter(&unp_rights_mtx);
1035 	if (unp_rights + nfds > maxfiles / 10) {
1036 		mtx_leave(&unp_rights_mtx);
1037 		return (EMFILE);
1038 	}
1039 	unp_rights += nfds;
1040 	mtx_leave(&unp_rights_mtx);
1041 
1042 	/* Make sure we have room for the struct file pointers */
1043 morespace:
1044 	neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) -
1045 	    control->m_len;
1046 	if (neededspace > m_trailingspace(control)) {
1047 		char *tmp;
1048 		/* if we already have a cluster, the message is just too big */
1049 		if (control->m_flags & M_EXT) {
1050 			error = E2BIG;
1051 			goto nospace;
1052 		}
1053 
1054 		/* copy cmsg data temporarily out of the mbuf */
1055 		tmp = malloc(control->m_len, M_TEMP, M_WAITOK);
1056 		memcpy(tmp, mtod(control, caddr_t), control->m_len);
1057 
1058 		/* allocate a cluster and try again */
1059 		MCLGET(control, M_WAIT);
1060 		if ((control->m_flags & M_EXT) == 0) {
1061 			free(tmp, M_TEMP, control->m_len);
1062 			error = ENOBUFS;       /* allocation failed */
1063 			goto nospace;
1064 		}
1065 
1066 		/* copy the data back into the cluster */
1067 		cm = mtod(control, struct cmsghdr *);
1068 		memcpy(cm, tmp, control->m_len);
1069 		free(tmp, M_TEMP, control->m_len);
1070 		goto morespace;
1071 	}
1072 
1073 	/* adjust message & mbuf to note amount of space actually used. */
1074 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass));
1075 	control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass));
1076 
1077 	ip = ((int *)CMSG_DATA(cm)) + nfds - 1;
1078 	rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1;
1079 	fdplock(fdp);
1080 	for (i = 0; i < nfds; i++) {
1081 		memcpy(&fd, ip, sizeof fd);
1082 		ip--;
1083 		if ((fp = fd_getfile(fdp, fd)) == NULL) {
1084 			error = EBADF;
1085 			goto fail;
1086 		}
1087 		if (fp->f_count >= FDUP_MAX_COUNT) {
1088 			error = EDEADLK;
1089 			goto fail;
1090 		}
1091 		error = pledge_sendfd(p, fp);
1092 		if (error)
1093 			goto fail;
1094 
1095 		/* kqueue descriptors cannot be copied */
1096 		if (fp->f_type == DTYPE_KQUEUE) {
1097 			error = EINVAL;
1098 			goto fail;
1099 		}
1100 #if NKCOV > 0
1101 		/* kcov descriptors cannot be copied */
1102 		if (fp->f_type == DTYPE_VNODE && kcov_vnode(fp->f_data)) {
1103 			error = EINVAL;
1104 			goto fail;
1105 		}
1106 #endif
1107 		rp->fp = fp;
1108 		rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED;
1109 		rp--;
1110 		if ((unp = fptounp(fp)) != NULL) {
1111 			rw_enter_write(&unp_gc_lock);
1112 			unp->unp_msgcount++;
1113 			unp->unp_file = fp;
1114 			rw_exit_write(&unp_gc_lock);
1115 		}
1116 	}
1117 	fdpunlock(fdp);
1118 	return (0);
1119 fail:
1120 	fdpunlock(fdp);
1121 	if (fp != NULL)
1122 		FRELE(fp, p);
1123 	/* Back out what we just did. */
1124 	for ( ; i > 0; i--) {
1125 		rp++;
1126 		fp = rp->fp;
1127 		if ((unp = fptounp(fp)) != NULL) {
1128 			rw_enter_write(&unp_gc_lock);
1129 			unp->unp_msgcount--;
1130 			rw_exit_write(&unp_gc_lock);
1131 		}
1132 		FRELE(fp, p);
1133 	}
1134 
1135 nospace:
1136 	mtx_enter(&unp_rights_mtx);
1137 	unp_rights -= nfds;
1138 	mtx_leave(&unp_rights_mtx);
1139 
1140 	return (error);
1141 }
1142 
1143 void
1144 unp_gc(void *arg __unused)
1145 {
1146 	struct unp_deferral *defer;
1147 	struct file *fp;
1148 	struct socket *so;
1149 	struct unpcb *unp;
1150 	int nunref, i;
1151 
1152 	rw_enter_write(&unp_gc_lock);
1153 	if (unp_gcing)
1154 		goto unlock;
1155 	unp_gcing = 1;
1156 	rw_exit_write(&unp_gc_lock);
1157 
1158 	rw_enter_write(&unp_df_lock);
1159 	/* close any fds on the deferred list */
1160 	while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) {
1161 		SLIST_REMOVE_HEAD(&unp_deferred, ud_link);
1162 		rw_exit_write(&unp_df_lock);
1163 		for (i = 0; i < defer->ud_n; i++) {
1164 			fp = defer->ud_fp[i].fp;
1165 			if (fp == NULL)
1166 				continue;
1167 			if ((unp = fptounp(fp)) != NULL) {
1168 				rw_enter_write(&unp_gc_lock);
1169 				unp->unp_msgcount--;
1170 				rw_exit_write(&unp_gc_lock);
1171 			}
1172 			mtx_enter(&unp_rights_mtx);
1173 			unp_rights--;
1174 			mtx_leave(&unp_rights_mtx);
1175 			 /* closef() expects a refcount of 2 */
1176 			FREF(fp);
1177 			(void) closef(fp, NULL);
1178 		}
1179 		free(defer, M_TEMP, sizeof(*defer) +
1180 		    sizeof(struct fdpass) * defer->ud_n);
1181 		rw_enter_write(&unp_df_lock);
1182 	}
1183 	rw_exit_write(&unp_df_lock);
1184 
1185 	nunref = 0;
1186 
1187 	rw_enter_write(&unp_gc_lock);
1188 
1189 	/*
1190 	 * Determine sockets which may be prospectively dead. Such
1191 	 * sockets have their `unp_msgcount' equal to the `f_count'.
1192 	 * If `unp_msgcount' is 0, the socket has not been passed
1193 	 * and can't be unreferenced.
1194 	 */
1195 	LIST_FOREACH(unp, &unp_head, unp_link) {
1196 		unp->unp_gcflags = 0;
1197 
1198 		if (unp->unp_msgcount == 0)
1199 			continue;
1200 		if ((fp = unp->unp_file) == NULL)
1201 			continue;
1202 		if (fp->f_count == unp->unp_msgcount) {
1203 			unp->unp_gcflags |= UNP_GCDEAD;
1204 			unp->unp_gcrefs = unp->unp_msgcount;
1205 			nunref++;
1206 		}
1207 	}
1208 
1209 	/*
1210 	 * Scan all sockets previously marked as dead. Remove
1211 	 * the `unp_gcrefs' reference each socket holds on any
1212 	 * dead socket in its buffer.
1213 	 */
1214 	LIST_FOREACH(unp, &unp_head, unp_link) {
1215 		if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1216 			continue;
1217 		so = unp->unp_socket;
1218 		solock(so);
1219 		unp_scan(so->so_rcv.sb_mb, unp_remove_gcrefs);
1220 		sounlock(so, SL_LOCKED);
1221 	}
1222 
1223 	/*
1224 	 * If the dead socket has `unp_gcrefs' reference counter
1225 	 * greater than 0, it can't be unreferenced. Mark it as
1226 	 * alive and increment the `unp_gcrefs' reference for each
1227 	 * dead socket within its buffer. Repeat this until we
1228 	 * have no new alive sockets found.
1229 	 */
1230 	do {
1231 		unp_defer = 0;
1232 
1233 		LIST_FOREACH(unp, &unp_head, unp_link) {
1234 			if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1235 				continue;
1236 			if (unp->unp_gcrefs == 0)
1237 				continue;
1238 
1239 			unp->unp_gcflags &= ~UNP_GCDEAD;
1240 
1241 			so = unp->unp_socket;
1242 			solock(so);
1243 			unp_scan(so->so_rcv.sb_mb, unp_restore_gcrefs);
1244 			sounlock(so, SL_LOCKED);
1245 
1246 			KASSERT(nunref > 0);
1247 			nunref--;
1248 		}
1249 	} while (unp_defer > 0);
1250 
1251 	/*
1252 	 * If there are any unreferenced sockets, then for each dispose
1253 	 * of files in its receive buffer and then close it.
1254 	 */
1255 	if (nunref) {
1256 		LIST_FOREACH(unp, &unp_head, unp_link) {
1257 			if (unp->unp_gcflags & UNP_GCDEAD) {
1258 				/*
1259 				 * This socket could still be connected
1260 				 * and if so it's `so_rcv' is still
1261 				 * accessible by concurrent PRU_SEND
1262 				 * thread.
1263 				 */
1264 				so = unp->unp_socket;
1265 				solock(so);
1266 				unp_scan(so->so_rcv.sb_mb, unp_discard);
1267 				sounlock(so, SL_LOCKED);
1268 			}
1269 		}
1270 	}
1271 
1272 	unp_gcing = 0;
1273 unlock:
1274 	rw_exit_write(&unp_gc_lock);
1275 }
1276 
1277 void
1278 unp_dispose(struct mbuf *m)
1279 {
1280 
1281 	if (m)
1282 		unp_scan(m, unp_discard);
1283 }
1284 
1285 void
1286 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int))
1287 {
1288 	struct mbuf *m;
1289 	struct fdpass *rp;
1290 	struct cmsghdr *cm;
1291 	int qfds;
1292 
1293 	while (m0) {
1294 		for (m = m0; m; m = m->m_next) {
1295 			if (m->m_type == MT_CONTROL &&
1296 			    m->m_len >= sizeof(*cm)) {
1297 				cm = mtod(m, struct cmsghdr *);
1298 				if (cm->cmsg_level != SOL_SOCKET ||
1299 				    cm->cmsg_type != SCM_RIGHTS)
1300 					continue;
1301 				qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm))
1302 				    / sizeof(struct fdpass);
1303 				if (qfds > 0) {
1304 					rp = (struct fdpass *)CMSG_DATA(cm);
1305 					op(rp, qfds);
1306 				}
1307 				break;		/* XXX, but saves time */
1308 			}
1309 		}
1310 		m0 = m0->m_nextpkt;
1311 	}
1312 }
1313 
1314 void
1315 unp_discard(struct fdpass *rp, int nfds)
1316 {
1317 	struct unp_deferral *defer;
1318 
1319 	/* copy the file pointers to a deferral structure */
1320 	defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK);
1321 	defer->ud_n = nfds;
1322 	memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds);
1323 	memset(rp, 0, sizeof(*rp) * nfds);
1324 
1325 	rw_enter_write(&unp_df_lock);
1326 	SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link);
1327 	rw_exit_write(&unp_df_lock);
1328 
1329 	task_add(systqmp, &unp_gc_task);
1330 }
1331 
1332 void
1333 unp_remove_gcrefs(struct fdpass *rp, int nfds)
1334 {
1335 	struct unpcb *unp;
1336 	int i;
1337 
1338 	rw_assert_wrlock(&unp_gc_lock);
1339 
1340 	for (i = 0; i < nfds; i++) {
1341 		if (rp[i].fp == NULL)
1342 			continue;
1343 		if ((unp = fptounp(rp[i].fp)) == NULL)
1344 			continue;
1345 		if (unp->unp_gcflags & UNP_GCDEAD) {
1346 			KASSERT(unp->unp_gcrefs > 0);
1347 			unp->unp_gcrefs--;
1348 		}
1349 	}
1350 }
1351 
1352 void
1353 unp_restore_gcrefs(struct fdpass *rp, int nfds)
1354 {
1355 	struct unpcb *unp;
1356 	int i;
1357 
1358 	rw_assert_wrlock(&unp_gc_lock);
1359 
1360 	for (i = 0; i < nfds; i++) {
1361 		if (rp[i].fp == NULL)
1362 			continue;
1363 		if ((unp = fptounp(rp[i].fp)) == NULL)
1364 			continue;
1365 		if (unp->unp_gcflags & UNP_GCDEAD) {
1366 			unp->unp_gcrefs++;
1367 			unp_defer++;
1368 		}
1369 	}
1370 }
1371 
1372 int
1373 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen)
1374 {
1375 	struct sockaddr *sa = mtod(nam, struct sockaddr *);
1376 	size_t size, len;
1377 
1378 	if (nam->m_len < offsetof(struct sockaddr, sa_data))
1379 		return EINVAL;
1380 	if (sa->sa_family != AF_UNIX)
1381 		return EAFNOSUPPORT;
1382 	if (sa->sa_len != nam->m_len)
1383 		return EINVAL;
1384 	if (sa->sa_len > sizeof(struct sockaddr_un))
1385 		return EINVAL;
1386 	*sun = (struct sockaddr_un *)sa;
1387 
1388 	/* ensure that sun_path is NUL terminated and fits */
1389 	size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path);
1390 	len = strnlen((*sun)->sun_path, size);
1391 	if (len == sizeof((*sun)->sun_path))
1392 		return EINVAL;
1393 	if (len == size) {
1394 		if (m_trailingspace(nam) == 0)
1395 			return EINVAL;
1396 		nam->m_len++;
1397 		(*sun)->sun_len++;
1398 		(*sun)->sun_path[len] = '\0';
1399 	}
1400 	if (pathlen != NULL)
1401 		*pathlen = len;
1402 
1403 	return 0;
1404 }
1405