xref: /openbsd-src/sys/kern/uipc_usrreq.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: uipc_usrreq.c,v 1.76 2014/07/13 15:52:38 tedu Exp $	*/
2 /*	$NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/filedesc.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/socket.h>
42 #include <sys/socketvar.h>
43 #include <sys/unpcb.h>
44 #include <sys/un.h>
45 #include <sys/namei.h>
46 #include <sys/vnode.h>
47 #include <sys/file.h>
48 #include <sys/stat.h>
49 #include <sys/mbuf.h>
50 
51 void	uipc_setaddr(const struct unpcb *, struct mbuf *);
52 
53 /*
54  * Unix communications domain.
55  *
56  * TODO:
57  *	RDM
58  *	rethink name space problems
59  *	need a proper out-of-band
60  */
61 struct	sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
62 ino_t	unp_ino;			/* prototype for fake inode numbers */
63 
64 void
65 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam)
66 {
67 	if (unp != NULL && unp->unp_addr != NULL) {
68 		nam->m_len = unp->unp_addr->m_len;
69 		bcopy(mtod(unp->unp_addr, caddr_t), mtod(nam, caddr_t),
70 		    nam->m_len);
71 	} else {
72 		nam->m_len = sizeof(sun_noname);
73 		bcopy(&sun_noname, mtod(nam, struct sockaddr *),
74 		    nam->m_len);
75 	}
76 }
77 
78 /*ARGSUSED*/
79 int
80 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
81     struct mbuf *control, struct proc *p)
82 {
83 	struct unpcb *unp = sotounpcb(so);
84 	struct socket *so2;
85 	int error = 0;
86 
87 	if (req == PRU_CONTROL)
88 		return (EOPNOTSUPP);
89 	if (req != PRU_SEND && control && control->m_len) {
90 		error = EOPNOTSUPP;
91 		goto release;
92 	}
93 	if (unp == NULL && req != PRU_ATTACH) {
94 		error = EINVAL;
95 		goto release;
96 	}
97 	switch (req) {
98 
99 	case PRU_ATTACH:
100 		if (unp) {
101 			error = EISCONN;
102 			break;
103 		}
104 		error = unp_attach(so);
105 		break;
106 
107 	case PRU_DETACH:
108 		unp_detach(unp);
109 		break;
110 
111 	case PRU_BIND:
112 		error = unp_bind(unp, nam, p);
113 		break;
114 
115 	case PRU_LISTEN:
116 		if (unp->unp_vnode == NULL)
117 			error = EINVAL;
118 		break;
119 
120 	case PRU_CONNECT:
121 		error = unp_connect(so, nam, p);
122 		break;
123 
124 	case PRU_CONNECT2:
125 		error = unp_connect2(so, (struct socket *)nam);
126 		break;
127 
128 	case PRU_DISCONNECT:
129 		unp_disconnect(unp);
130 		break;
131 
132 	case PRU_ACCEPT:
133 		/*
134 		 * Pass back name of connected socket,
135 		 * if it was bound and we are still connected
136 		 * (our peer may have closed already!).
137 		 */
138 		uipc_setaddr(unp->unp_conn, nam);
139 		break;
140 
141 	case PRU_SHUTDOWN:
142 		socantsendmore(so);
143 		unp_shutdown(unp);
144 		break;
145 
146 	case PRU_RCVD:
147 		switch (so->so_type) {
148 
149 		case SOCK_DGRAM:
150 			panic("uipc 1");
151 			/*NOTREACHED*/
152 
153 		case SOCK_STREAM:
154 		case SOCK_SEQPACKET:
155 #define	rcv (&so->so_rcv)
156 #define snd (&so2->so_snd)
157 			if (unp->unp_conn == NULL)
158 				break;
159 			so2 = unp->unp_conn->unp_socket;
160 			/*
161 			 * Adjust backpressure on sender
162 			 * and wakeup any waiting to write.
163 			 */
164 			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
165 			unp->unp_mbcnt = rcv->sb_mbcnt;
166 			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
167 			unp->unp_cc = rcv->sb_cc;
168 			sowwakeup(so2);
169 #undef snd
170 #undef rcv
171 			break;
172 
173 		default:
174 			panic("uipc 2");
175 		}
176 		break;
177 
178 	case PRU_SEND:
179 		if (control && (error = unp_internalize(control, p)))
180 			break;
181 		switch (so->so_type) {
182 
183 		case SOCK_DGRAM: {
184 			struct sockaddr *from;
185 
186 			if (nam) {
187 				if (unp->unp_conn) {
188 					error = EISCONN;
189 					break;
190 				}
191 				error = unp_connect(so, nam, p);
192 				if (error)
193 					break;
194 			} else {
195 				if (unp->unp_conn == NULL) {
196 					error = ENOTCONN;
197 					break;
198 				}
199 			}
200 			so2 = unp->unp_conn->unp_socket;
201 			if (unp->unp_addr)
202 				from = mtod(unp->unp_addr, struct sockaddr *);
203 			else
204 				from = &sun_noname;
205 			if (sbappendaddr(&so2->so_rcv, from, m, control)) {
206 				sorwakeup(so2);
207 				m = NULL;
208 				control = NULL;
209 			} else
210 				error = ENOBUFS;
211 			if (nam)
212 				unp_disconnect(unp);
213 			break;
214 		}
215 
216 		case SOCK_STREAM:
217 		case SOCK_SEQPACKET:
218 #define	rcv (&so2->so_rcv)
219 #define	snd (&so->so_snd)
220 			if (so->so_state & SS_CANTSENDMORE) {
221 				error = EPIPE;
222 				break;
223 			}
224 			if (unp->unp_conn == NULL) {
225 				error = ENOTCONN;
226 				break;
227 			}
228 			so2 = unp->unp_conn->unp_socket;
229 			/*
230 			 * Send to paired receive port, and then reduce
231 			 * send buffer hiwater marks to maintain backpressure.
232 			 * Wake up readers.
233 			 */
234 			if (control) {
235 				if (sbappendcontrol(rcv, m, control))
236 					control = NULL;
237 			} else if (so->so_type == SOCK_SEQPACKET)
238 				sbappendrecord(rcv, m);
239 			else
240 				sbappend(rcv, m);
241 			snd->sb_mbmax -=
242 			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
243 			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
244 			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
245 			unp->unp_conn->unp_cc = rcv->sb_cc;
246 			sorwakeup(so2);
247 			m = NULL;
248 #undef snd
249 #undef rcv
250 			break;
251 
252 		default:
253 			panic("uipc 4");
254 		}
255 		/* we need to undo unp_internalize in case of errors */
256 		if (control && error)
257 			unp_dispose(control);
258 		break;
259 
260 	case PRU_ABORT:
261 		unp_drop(unp, ECONNABORTED);
262 		break;
263 
264 	case PRU_SENSE: {
265 		struct stat *sb = (struct stat *)m;
266 
267 		sb->st_blksize = so->so_snd.sb_hiwat;
268 		switch (so->so_type) {
269 		case SOCK_STREAM:
270 		case SOCK_SEQPACKET:
271 			if (unp->unp_conn != NULL) {
272 				so2 = unp->unp_conn->unp_socket;
273 				sb->st_blksize += so2->so_rcv.sb_cc;
274 			}
275 			break;
276 		default:
277 			break;
278 		}
279 		sb->st_dev = NODEV;
280 		if (unp->unp_ino == 0)
281 			unp->unp_ino = unp_ino++;
282 		sb->st_atim.tv_sec =
283 		    sb->st_mtim.tv_sec =
284 		    sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec;
285 		sb->st_atim.tv_nsec =
286 		    sb->st_mtim.tv_nsec =
287 		    sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec;
288 		sb->st_ino = unp->unp_ino;
289 		return (0);
290 	}
291 
292 	case PRU_RCVOOB:
293 		return (EOPNOTSUPP);
294 
295 	case PRU_SENDOOB:
296 		error = EOPNOTSUPP;
297 		break;
298 
299 	case PRU_SOCKADDR:
300 		uipc_setaddr(unp, nam);
301 		break;
302 
303 	case PRU_PEERADDR:
304 		uipc_setaddr(unp->unp_conn, nam);
305 		break;
306 
307 	case PRU_SLOWTIMO:
308 		break;
309 
310 	default:
311 		panic("piusrreq");
312 	}
313 release:
314 	if (control)
315 		m_freem(control);
316 	if (m)
317 		m_freem(m);
318 	return (error);
319 }
320 
321 /*
322  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
323  * for stream sockets, although the total for sender and receiver is
324  * actually only PIPSIZ.
325  * Datagram sockets really use the sendspace as the maximum datagram size,
326  * and don't really want to reserve the sendspace.  Their recvspace should
327  * be large enough for at least one max-size datagram plus address.
328  */
329 #define	PIPSIZ	4096
330 u_long	unpst_sendspace = PIPSIZ;
331 u_long	unpst_recvspace = PIPSIZ;
332 u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
333 u_long	unpdg_recvspace = 4*1024;
334 
335 int	unp_rights;			/* file descriptors in flight */
336 
337 int
338 unp_attach(struct socket *so)
339 {
340 	struct unpcb *unp;
341 	int error;
342 
343 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
344 		switch (so->so_type) {
345 
346 		case SOCK_STREAM:
347 		case SOCK_SEQPACKET:
348 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
349 			break;
350 
351 		case SOCK_DGRAM:
352 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
353 			break;
354 
355 		default:
356 			panic("unp_attach");
357 		}
358 		if (error)
359 			return (error);
360 	}
361 	unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT|M_ZERO);
362 	if (unp == NULL)
363 		return (ENOBUFS);
364 	unp->unp_socket = so;
365 	so->so_pcb = unp;
366 	getnanotime(&unp->unp_ctime);
367 	return (0);
368 }
369 
370 void
371 unp_detach(struct unpcb *unp)
372 {
373 	struct vnode *vp;
374 
375 	if (unp->unp_vnode) {
376 		unp->unp_vnode->v_socket = NULL;
377 		vp = unp->unp_vnode;
378 		unp->unp_vnode = NULL;
379 		vrele(vp);
380 	}
381 	if (unp->unp_conn)
382 		unp_disconnect(unp);
383 	while (unp->unp_refs)
384 		unp_drop(unp->unp_refs, ECONNRESET);
385 	soisdisconnected(unp->unp_socket);
386 	unp->unp_socket->so_pcb = NULL;
387 	m_freem(unp->unp_addr);
388 	if (unp_rights) {
389 		/*
390 		 * Normally the receive buffer is flushed later,
391 		 * in sofree, but if our receive buffer holds references
392 		 * to descriptors that are now garbage, we will dispose
393 		 * of those descriptor references after the garbage collector
394 		 * gets them (resulting in a "panic: closef: count < 0").
395 		 */
396 		sorflush(unp->unp_socket);
397 		free(unp, M_PCB, 0);
398 		unp_gc();
399 	} else
400 		free(unp, M_PCB, 0);
401 }
402 
403 int
404 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p)
405 {
406 	struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
407 	struct mbuf *nam2;
408 	struct vnode *vp;
409 	struct vattr vattr;
410 	int error;
411 	struct nameidata nd;
412 	size_t pathlen;
413 
414 	if (unp->unp_vnode != NULL)
415 		return (EINVAL);
416 
417 	if (soun->sun_len > sizeof(struct sockaddr_un) ||
418 	    soun->sun_len < offsetof(struct sockaddr_un, sun_path))
419 		return (EINVAL);
420 	if (soun->sun_family != AF_UNIX)
421 		return (EAFNOSUPPORT);
422 
423 	pathlen = strnlen(soun->sun_path, soun->sun_len -
424 	    offsetof(struct sockaddr_un, sun_path));
425 	if (pathlen == sizeof(soun->sun_path))
426 		return (EINVAL);
427 
428 	nam2 = m_getclr(M_WAITOK, MT_SONAME);
429 	nam2->m_len = sizeof(struct sockaddr_un);
430 	memcpy(mtod(nam2, struct sockaddr_un *), soun,
431 	    offsetof(struct sockaddr_un, sun_path) + pathlen);
432 	/* No need to NUL terminate: m_getclr() returns zero'd mbufs. */
433 
434 	soun = mtod(nam2, struct sockaddr_un *);
435 
436 	/* Fixup sun_len to keep it in sync with m_len. */
437 	soun->sun_len = nam2->m_len;
438 
439 	NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE,
440 	    soun->sun_path, p);
441 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
442 	if ((error = namei(&nd)) != 0) {
443 		m_freem(nam2);
444 		return (error);
445 	}
446 	vp = nd.ni_vp;
447 	if (vp != NULL) {
448 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
449 		if (nd.ni_dvp == vp)
450 			vrele(nd.ni_dvp);
451 		else
452 			vput(nd.ni_dvp);
453 		vrele(vp);
454 		m_freem(nam2);
455 		return (EADDRINUSE);
456 	}
457 	VATTR_NULL(&vattr);
458 	vattr.va_type = VSOCK;
459 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
460 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
461 	if (error) {
462 		m_freem(nam2);
463 		return (error);
464 	}
465 	unp->unp_addr = nam2;
466 	vp = nd.ni_vp;
467 	vp->v_socket = unp->unp_socket;
468 	unp->unp_vnode = vp;
469 	unp->unp_connid.uid = p->p_ucred->cr_uid;
470 	unp->unp_connid.gid = p->p_ucred->cr_gid;
471 	unp->unp_connid.pid = p->p_p->ps_pid;
472 	unp->unp_flags |= UNP_FEIDSBIND;
473 	VOP_UNLOCK(vp, 0, p);
474 	return (0);
475 }
476 
477 int
478 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
479 {
480 	struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
481 	struct vnode *vp;
482 	struct socket *so2, *so3;
483 	struct unpcb *unp, *unp2, *unp3;
484 	int error;
485 	struct nameidata nd;
486 
487 	if (soun->sun_family != AF_UNIX)
488 		return (EAFNOSUPPORT);
489 
490 	if (nam->m_len < sizeof(struct sockaddr_un))
491 		*(mtod(nam, caddr_t) + nam->m_len) = 0;
492 	else if (nam->m_len > sizeof(struct sockaddr_un))
493 		return (EINVAL);
494 	else if (memchr(soun->sun_path, '\0', sizeof(soun->sun_path)) == NULL)
495 		return (EINVAL);
496 
497 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
498 	if ((error = namei(&nd)) != 0)
499 		return (error);
500 	vp = nd.ni_vp;
501 	if (vp->v_type != VSOCK) {
502 		error = ENOTSOCK;
503 		goto bad;
504 	}
505 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
506 		goto bad;
507 	so2 = vp->v_socket;
508 	if (so2 == NULL) {
509 		error = ECONNREFUSED;
510 		goto bad;
511 	}
512 	if (so->so_type != so2->so_type) {
513 		error = EPROTOTYPE;
514 		goto bad;
515 	}
516 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
517 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
518 		    (so3 = sonewconn(so2, 0)) == 0) {
519 			error = ECONNREFUSED;
520 			goto bad;
521 		}
522 		unp = sotounpcb(so);
523 		unp2 = sotounpcb(so2);
524 		unp3 = sotounpcb(so3);
525 		if (unp2->unp_addr)
526 			unp3->unp_addr =
527 			    m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
528 		unp3->unp_connid.uid = p->p_ucred->cr_uid;
529 		unp3->unp_connid.gid = p->p_ucred->cr_gid;
530 		unp3->unp_connid.pid = p->p_p->ps_pid;
531 		unp3->unp_flags |= UNP_FEIDS;
532 		so2 = so3;
533 		if (unp2->unp_flags & UNP_FEIDSBIND) {
534 			unp->unp_connid = unp2->unp_connid;
535 			unp->unp_flags |= UNP_FEIDS;
536 		}
537 	}
538 	error = unp_connect2(so, so2);
539 bad:
540 	vput(vp);
541 	return (error);
542 }
543 
544 int
545 unp_connect2(struct socket *so, struct socket *so2)
546 {
547 	struct unpcb *unp = sotounpcb(so);
548 	struct unpcb *unp2;
549 
550 	if (so2->so_type != so->so_type)
551 		return (EPROTOTYPE);
552 	unp2 = sotounpcb(so2);
553 	unp->unp_conn = unp2;
554 	switch (so->so_type) {
555 
556 	case SOCK_DGRAM:
557 		unp->unp_nextref = unp2->unp_refs;
558 		unp2->unp_refs = unp;
559 		soisconnected(so);
560 		break;
561 
562 	case SOCK_STREAM:
563 	case SOCK_SEQPACKET:
564 		unp2->unp_conn = unp;
565 		soisconnected(so);
566 		soisconnected(so2);
567 		break;
568 
569 	default:
570 		panic("unp_connect2");
571 	}
572 	return (0);
573 }
574 
575 void
576 unp_disconnect(struct unpcb *unp)
577 {
578 	struct unpcb *unp2 = unp->unp_conn;
579 
580 	if (unp2 == NULL)
581 		return;
582 	unp->unp_conn = NULL;
583 	switch (unp->unp_socket->so_type) {
584 
585 	case SOCK_DGRAM:
586 		if (unp2->unp_refs == unp)
587 			unp2->unp_refs = unp->unp_nextref;
588 		else {
589 			unp2 = unp2->unp_refs;
590 			for (;;) {
591 				if (unp2 == NULL)
592 					panic("unp_disconnect");
593 				if (unp2->unp_nextref == unp)
594 					break;
595 				unp2 = unp2->unp_nextref;
596 			}
597 			unp2->unp_nextref = unp->unp_nextref;
598 		}
599 		unp->unp_nextref = NULL;
600 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
601 		break;
602 
603 	case SOCK_STREAM:
604 	case SOCK_SEQPACKET:
605 		soisdisconnected(unp->unp_socket);
606 		unp2->unp_conn = NULL;
607 		soisdisconnected(unp2->unp_socket);
608 		break;
609 	}
610 }
611 
612 void
613 unp_shutdown(struct unpcb *unp)
614 {
615 	struct socket *so;
616 
617 	switch (unp->unp_socket->so_type) {
618 	case SOCK_STREAM:
619 	case SOCK_SEQPACKET:
620 		if (unp->unp_conn && (so = unp->unp_conn->unp_socket))
621 			socantrcvmore(so);
622 		break;
623 	default:
624 		break;
625 	}
626 }
627 
628 void
629 unp_drop(struct unpcb *unp, int errno)
630 {
631 	struct socket *so = unp->unp_socket;
632 
633 	so->so_error = errno;
634 	unp_disconnect(unp);
635 	if (so->so_head) {
636 		so->so_pcb = NULL;
637 		sofree(so);
638 		m_freem(unp->unp_addr);
639 		free(unp, M_PCB, 0);
640 	}
641 }
642 
643 #ifdef notdef
644 unp_drain(void)
645 {
646 
647 }
648 #endif
649 
650 int
651 unp_externalize(struct mbuf *rights, socklen_t controllen)
652 {
653 	struct proc *p = curproc;		/* XXX */
654 	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
655 	int i, *fdp = NULL;
656 	struct file **rp;
657 	struct file *fp;
658 	int nfds, error = 0;
659 
660 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
661 	    sizeof(struct file *);
662 	if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr)))
663 		controllen = 0;
664 	else
665 		controllen -= CMSG_ALIGN(sizeof(struct cmsghdr));
666 	if (nfds > controllen / sizeof(int)) {
667 		error = EMSGSIZE;
668 		goto restart;
669 	}
670 
671 	rp = (struct file **)CMSG_DATA(cm);
672 
673 	fdp = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK);
674 
675 	/* Make sure the recipient should be able to see the descriptors.. */
676 	if (p->p_fd->fd_rdir != NULL) {
677 		rp = (struct file **)CMSG_DATA(cm);
678 		for (i = 0; i < nfds; i++) {
679 			fp = *rp++;
680 			/*
681 			 * No to block devices.  If passing a directory,
682 			 * make sure that it is underneath the root.
683 			 */
684 			if (fp->f_type == DTYPE_VNODE) {
685 				struct vnode *vp = (struct vnode *)fp->f_data;
686 
687 				if (vp->v_type == VBLK ||
688 				    (vp->v_type == VDIR &&
689 				    !vn_isunder(vp, p->p_fd->fd_rdir, p))) {
690 					error = EPERM;
691 					break;
692 				}
693 			}
694 		}
695 	}
696 
697 restart:
698 	fdplock(p->p_fd);
699 	if (error != 0) {
700 		rp = ((struct file **)CMSG_DATA(cm));
701 		for (i = 0; i < nfds; i++) {
702 			fp = *rp;
703 			/*
704 			 * zero the pointer before calling unp_discard,
705 			 * since it may end up in unp_gc()..
706 			 */
707 			*rp++ = NULL;
708 			unp_discard(fp);
709 		}
710 		goto out;
711 	}
712 
713 	/*
714 	 * First loop -- allocate file descriptor table slots for the
715 	 * new descriptors.
716 	 */
717 	rp = ((struct file **)CMSG_DATA(cm));
718 	for (i = 0; i < nfds; i++) {
719 		if ((error = fdalloc(p, 0, &fdp[i])) != 0) {
720 			/*
721 			 * Back out what we've done so far.
722 			 */
723 			for (--i; i >= 0; i--)
724 				fdremove(p->p_fd, fdp[i]);
725 
726 			if (error == ENOSPC) {
727 				fdexpand(p);
728 				error = 0;
729 			} else {
730 				/*
731 				 * This is the error that has historically
732 				 * been returned, and some callers may
733 				 * expect it.
734 				 */
735 				error = EMSGSIZE;
736 			}
737 			fdpunlock(p->p_fd);
738 			goto restart;
739 		}
740 
741 		/*
742 		 * Make the slot reference the descriptor so that
743 		 * fdalloc() works properly.. We finalize it all
744 		 * in the loop below.
745 		 */
746 		p->p_fd->fd_ofiles[fdp[i]] = *rp++;
747 	}
748 
749 	/*
750 	 * Now that adding them has succeeded, update all of the
751 	 * descriptor passing state.
752 	 */
753 	rp = (struct file **)CMSG_DATA(cm);
754 	for (i = 0; i < nfds; i++) {
755 		fp = *rp++;
756 		fp->f_msgcount--;
757 		unp_rights--;
758 	}
759 
760 	/*
761 	 * Copy temporary array to message and adjust length, in case of
762 	 * transition from large struct file pointers to ints.
763 	 */
764 	memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int));
765 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
766 	rights->m_len = CMSG_LEN(nfds * sizeof(int));
767  out:
768 	fdpunlock(p->p_fd);
769 	if (fdp)
770 		free(fdp, M_TEMP, 0);
771 	return (error);
772 }
773 
774 int
775 unp_internalize(struct mbuf *control, struct proc *p)
776 {
777 	struct filedesc *fdp = p->p_fd;
778 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
779 	struct file **rp, *fp;
780 	int i, error;
781 	int nfds, *ip, fd, neededspace;
782 
783 	/*
784 	 * Check for two potential msg_controllen values because
785 	 * IETF stuck their nose in a place it does not belong.
786 	 */
787 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
788 	    !(cm->cmsg_len == control->m_len ||
789 	    control->m_len == CMSG_ALIGN(cm->cmsg_len)))
790 		return (EINVAL);
791 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
792 
793 	if (unp_rights + nfds > maxfiles / 10)
794 		return (EMFILE);
795 
796 	/* Make sure we have room for the struct file pointers */
797 morespace:
798 	neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) -
799 	    control->m_len;
800 	if (neededspace > M_TRAILINGSPACE(control)) {
801 		char *tmp;
802 		/* if we already have a cluster, the message is just too big */
803 		if (control->m_flags & M_EXT)
804 			return (E2BIG);
805 
806 		/* copy cmsg data temporarily out of the mbuf */
807 		tmp = malloc(control->m_len, M_TEMP, M_WAITOK);
808 		memcpy(tmp, mtod(control, caddr_t), control->m_len);
809 
810 		/* allocate a cluster and try again */
811 		MCLGET(control, M_WAIT);
812 		if ((control->m_flags & M_EXT) == 0) {
813 			free(tmp, M_TEMP, 0);
814 			return (ENOBUFS);       /* allocation failed */
815 		}
816 
817 		/* copy the data back into the cluster */
818 		cm = mtod(control, struct cmsghdr *);
819 		memcpy(cm, tmp, control->m_len);
820 		free(tmp, M_TEMP, 0);
821 		goto morespace;
822 	}
823 
824 	/* adjust message & mbuf to note amount of space actually used. */
825 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *));
826 	control->m_len = CMSG_SPACE(nfds * sizeof(struct file *));
827 
828 	ip = ((int *)CMSG_DATA(cm)) + nfds - 1;
829 	rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1;
830 	for (i = 0; i < nfds; i++) {
831 		bcopy(ip, &fd, sizeof fd);
832 		ip--;
833 		if ((fp = fd_getfile(fdp, fd)) == NULL) {
834 			error = EBADF;
835 			goto fail;
836 		}
837 		if (fp->f_count == LONG_MAX-2 ||
838 		    fp->f_msgcount == LONG_MAX-2) {
839 			error = EDEADLK;
840 			goto fail;
841 		}
842 		/* kq and systrace descriptors cannot be copied */
843 		if (fp->f_type == DTYPE_KQUEUE ||
844 		    fp->f_type == DTYPE_SYSTRACE) {
845 			error = EINVAL;
846 			goto fail;
847 		}
848 		bcopy(&fp, rp, sizeof fp);
849 		rp--;
850 		fp->f_count++;
851 		fp->f_msgcount++;
852 		unp_rights++;
853 	}
854 	return (0);
855 fail:
856 	/* Back out what we just did. */
857 	for ( ; i > 0; i--) {
858 		rp++;
859 		bcopy(rp, &fp, sizeof(fp));
860 		fp->f_count--;
861 		fp->f_msgcount--;
862 		unp_rights--;
863 	}
864 
865 	return (error);
866 }
867 
868 int	unp_defer, unp_gcing;
869 extern	struct domain unixdomain;
870 
871 void
872 unp_gc(void)
873 {
874 	struct file *fp, *nextfp;
875 	struct socket *so;
876 	struct file **extra_ref, **fpp;
877 	int nunref, i;
878 
879 	if (unp_gcing)
880 		return;
881 	unp_gcing = 1;
882 	unp_defer = 0;
883 	LIST_FOREACH(fp, &filehead, f_list)
884 		fp->f_iflags &= ~(FIF_MARK|FIF_DEFER);
885 	do {
886 		LIST_FOREACH(fp, &filehead, f_list) {
887 			if (fp->f_iflags & FIF_DEFER) {
888 				fp->f_iflags &= ~FIF_DEFER;
889 				unp_defer--;
890 			} else {
891 				if (fp->f_count == 0)
892 					continue;
893 				if (fp->f_iflags & FIF_MARK)
894 					continue;
895 				if (fp->f_count == fp->f_msgcount)
896 					continue;
897 			}
898 			fp->f_iflags |= FIF_MARK;
899 
900 			if (fp->f_type != DTYPE_SOCKET ||
901 			    (so = fp->f_data) == NULL)
902 				continue;
903 			if (so->so_proto->pr_domain != &unixdomain ||
904 			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
905 				continue;
906 #ifdef notdef
907 			if (so->so_rcv.sb_flags & SB_LOCK) {
908 				/*
909 				 * This is problematical; it's not clear
910 				 * we need to wait for the sockbuf to be
911 				 * unlocked (on a uniprocessor, at least),
912 				 * and it's also not clear what to do
913 				 * if sbwait returns an error due to receipt
914 				 * of a signal.  If sbwait does return
915 				 * an error, we'll go into an infinite
916 				 * loop.  Delete all of this for now.
917 				 */
918 				(void) sbwait(&so->so_rcv);
919 				goto restart;
920 			}
921 #endif
922 			unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
923 		}
924 	} while (unp_defer);
925 	/*
926 	 * We grab an extra reference to each of the file table entries
927 	 * that are not otherwise accessible and then free the rights
928 	 * that are stored in messages on them.
929 	 *
930 	 * The bug in the original code is a little tricky, so I'll describe
931 	 * what's wrong with it here.
932 	 *
933 	 * It is incorrect to simply unp_discard each entry for f_msgcount
934 	 * times -- consider the case of sockets A and B that contain
935 	 * references to each other.  On a last close of some other socket,
936 	 * we trigger a gc since the number of outstanding rights (unp_rights)
937 	 * is non-zero.  If during the sweep phase the gc code un_discards,
938 	 * we end up doing a (full) closef on the descriptor.  A closef on A
939 	 * results in the following chain.  Closef calls soo_close, which
940 	 * calls soclose.   Soclose calls first (through the switch
941 	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
942 	 * returns because the previous instance had set unp_gcing, and
943 	 * we return all the way back to soclose, which marks the socket
944 	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
945 	 * to free up the rights that are queued in messages on the socket A,
946 	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
947 	 * switch unp_dispose, which unp_scans with unp_discard.  This second
948 	 * instance of unp_discard just calls closef on B.
949 	 *
950 	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
951 	 * which results in another closef on A.  Unfortunately, A is already
952 	 * being closed, and the descriptor has already been marked with
953 	 * SS_NOFDREF, and soclose panics at this point.
954 	 *
955 	 * Here, we first take an extra reference to each inaccessible
956 	 * descriptor.  Then, we call sorflush ourself, since we know
957 	 * it is a Unix domain socket anyhow.  After we destroy all the
958 	 * rights carried in messages, we do a last closef to get rid
959 	 * of our extra reference.  This is the last close, and the
960 	 * unp_detach etc will shut down the socket.
961 	 *
962 	 * 91/09/19, bsy@cs.cmu.edu
963 	 */
964 	extra_ref = mallocarray(nfiles, sizeof(struct file *), M_FILE, M_WAITOK);
965 	for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref;
966 	    fp != NULL; fp = nextfp) {
967 		nextfp = LIST_NEXT(fp, f_list);
968 		if (fp->f_count == 0)
969 			continue;
970 		if (fp->f_count == fp->f_msgcount &&
971 		    !(fp->f_iflags & FIF_MARK)) {
972 			*fpp++ = fp;
973 			nunref++;
974 			FREF(fp);
975 			fp->f_count++;
976 		}
977 	}
978 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
979 	        if ((*fpp)->f_type == DTYPE_SOCKET && (*fpp)->f_data != NULL)
980 		        sorflush((*fpp)->f_data);
981 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
982 		(void) closef(*fpp, NULL);
983 	free(extra_ref, M_FILE, 0);
984 	unp_gcing = 0;
985 }
986 
987 void
988 unp_dispose(struct mbuf *m)
989 {
990 
991 	if (m)
992 		unp_scan(m, unp_discard, 1);
993 }
994 
995 void
996 unp_scan(struct mbuf *m0, void (*op)(struct file *), int discard)
997 {
998 	struct mbuf *m;
999 	struct file **rp, *fp;
1000 	struct cmsghdr *cm;
1001 	int i;
1002 	int qfds;
1003 
1004 	while (m0) {
1005 		for (m = m0; m; m = m->m_next) {
1006 			if (m->m_type == MT_CONTROL &&
1007 			    m->m_len >= sizeof(*cm)) {
1008 				cm = mtod(m, struct cmsghdr *);
1009 				if (cm->cmsg_level != SOL_SOCKET ||
1010 				    cm->cmsg_type != SCM_RIGHTS)
1011 					continue;
1012 				qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm))
1013 				    / sizeof(struct file *);
1014 				rp = (struct file **)CMSG_DATA(cm);
1015 				for (i = 0; i < qfds; i++) {
1016 					fp = *rp;
1017 					if (discard)
1018 						*rp = 0;
1019 					(*op)(fp);
1020 					rp++;
1021 				}
1022 				break;		/* XXX, but saves time */
1023 			}
1024 		}
1025 		m0 = m0->m_nextpkt;
1026 	}
1027 }
1028 
1029 void
1030 unp_mark(struct file *fp)
1031 {
1032 	if (fp == NULL)
1033 		return;
1034 
1035 	if (fp->f_iflags & (FIF_MARK|FIF_DEFER))
1036 		return;
1037 
1038 	if (fp->f_type == DTYPE_SOCKET) {
1039 		unp_defer++;
1040 		fp->f_iflags |= FIF_DEFER;
1041 	} else {
1042 		fp->f_iflags |= FIF_MARK;
1043 	}
1044 }
1045 
1046 void
1047 unp_discard(struct file *fp)
1048 {
1049 
1050 	if (fp == NULL)
1051 		return;
1052 	FREF(fp);
1053 	fp->f_msgcount--;
1054 	unp_rights--;
1055 	(void) closef(fp, NULL);
1056 }
1057