xref: /csrg-svn/sys/kern/uipc_usrreq.c (revision 45914)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)uipc_usrreq.c	7.22 (Berkeley) 01/10/91
8  */
9 
10 #include "param.h"
11 #include "user.h"
12 #include "proc.h"
13 #include "filedesc.h"
14 #include "domain.h"
15 #include "protosw.h"
16 #include "socket.h"
17 #include "socketvar.h"
18 #include "unpcb.h"
19 #include "un.h"
20 #include "vnode.h"
21 #include "file.h"
22 #include "stat.h"
23 #include "mbuf.h"
24 
25 /*
26  * Unix communications domain.
27  *
28  * TODO:
29  *	SEQPACKET, RDM
30  *	rethink name space problems
31  *	need a proper out-of-band
32  */
33 struct	sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
34 ino_t	unp_ino;			/* prototype for fake inode numbers */
35 
36 /*ARGSUSED*/
37 uipc_usrreq(so, req, m, nam, control)
38 	struct socket *so;
39 	int req;
40 	struct mbuf *m, *nam, *control;
41 {
42 	struct unpcb *unp = sotounpcb(so);
43 	register struct socket *so2;
44 	register int error = 0;
45 
46 	if (req == PRU_CONTROL)
47 		return (EOPNOTSUPP);
48 	if (req != PRU_SEND && control && control->m_len) {
49 		error = EOPNOTSUPP;
50 		goto release;
51 	}
52 	if (unp == 0 && req != PRU_ATTACH) {
53 		error = EINVAL;
54 		goto release;
55 	}
56 	switch (req) {
57 
58 	case PRU_ATTACH:
59 		if (unp) {
60 			error = EISCONN;
61 			break;
62 		}
63 		error = unp_attach(so);
64 		break;
65 
66 	case PRU_DETACH:
67 		unp_detach(unp);
68 		break;
69 
70 	case PRU_BIND:
71 		error = unp_bind(unp, nam);
72 		break;
73 
74 	case PRU_LISTEN:
75 		if (unp->unp_vnode == 0)
76 			error = EINVAL;
77 		break;
78 
79 	case PRU_CONNECT:
80 		error = unp_connect(so, nam);
81 		break;
82 
83 	case PRU_CONNECT2:
84 		error = unp_connect2(so, (struct socket *)nam);
85 		break;
86 
87 	case PRU_DISCONNECT:
88 		unp_disconnect(unp);
89 		break;
90 
91 	case PRU_ACCEPT:
92 		/*
93 		 * Pass back name of connected socket,
94 		 * if it was bound and we are still connected
95 		 * (our peer may have closed already!).
96 		 */
97 		if (unp->unp_conn && unp->unp_conn->unp_addr) {
98 			nam->m_len = unp->unp_conn->unp_addr->m_len;
99 			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
100 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
101 		} else {
102 			nam->m_len = sizeof(sun_noname);
103 			*(mtod(nam, struct sockaddr *)) = sun_noname;
104 		}
105 		break;
106 
107 	case PRU_SHUTDOWN:
108 		socantsendmore(so);
109 		unp_shutdown(unp);
110 		break;
111 
112 	case PRU_RCVD:
113 		switch (so->so_type) {
114 
115 		case SOCK_DGRAM:
116 			panic("uipc 1");
117 			/*NOTREACHED*/
118 
119 		case SOCK_STREAM:
120 #define	rcv (&so->so_rcv)
121 #define snd (&so2->so_snd)
122 			if (unp->unp_conn == 0)
123 				break;
124 			so2 = unp->unp_conn->unp_socket;
125 			/*
126 			 * Adjust backpressure on sender
127 			 * and wakeup any waiting to write.
128 			 */
129 			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
130 			unp->unp_mbcnt = rcv->sb_mbcnt;
131 			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
132 			unp->unp_cc = rcv->sb_cc;
133 			sowwakeup(so2);
134 #undef snd
135 #undef rcv
136 			break;
137 
138 		default:
139 			panic("uipc 2");
140 		}
141 		break;
142 
143 	case PRU_SEND:
144 		if (control && (error = unp_internalize(control)))
145 			break;
146 		switch (so->so_type) {
147 
148 		case SOCK_DGRAM: {
149 			struct sockaddr *from;
150 
151 			if (nam) {
152 				if (unp->unp_conn) {
153 					error = EISCONN;
154 					break;
155 				}
156 				error = unp_connect(so, nam);
157 				if (error)
158 					break;
159 			} else {
160 				if (unp->unp_conn == 0) {
161 					error = ENOTCONN;
162 					break;
163 				}
164 			}
165 			so2 = unp->unp_conn->unp_socket;
166 			if (unp->unp_addr)
167 				from = mtod(unp->unp_addr, struct sockaddr *);
168 			else
169 				from = &sun_noname;
170 			if (sbappendaddr(&so2->so_rcv, from, m, control)) {
171 				sorwakeup(so2);
172 				m = 0;
173 				control = 0;
174 			} else
175 				error = ENOBUFS;
176 			if (nam)
177 				unp_disconnect(unp);
178 			break;
179 		}
180 
181 		case SOCK_STREAM:
182 #define	rcv (&so2->so_rcv)
183 #define	snd (&so->so_snd)
184 			if (so->so_state & SS_CANTSENDMORE) {
185 				error = EPIPE;
186 				break;
187 			}
188 			if (unp->unp_conn == 0)
189 				panic("uipc 3");
190 			so2 = unp->unp_conn->unp_socket;
191 			/*
192 			 * Send to paired receive port, and then reduce
193 			 * send buffer hiwater marks to maintain backpressure.
194 			 * Wake up readers.
195 			 */
196 			if (control) {
197 				if (sbappendcontrol(rcv, m, control))
198 					control = 0;
199 			} else
200 				sbappend(rcv, m);
201 			snd->sb_mbmax -=
202 			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
203 			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
204 			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
205 			unp->unp_conn->unp_cc = rcv->sb_cc;
206 			sorwakeup(so2);
207 			m = 0;
208 #undef snd
209 #undef rcv
210 			break;
211 
212 		default:
213 			panic("uipc 4");
214 		}
215 		break;
216 
217 	case PRU_ABORT:
218 		unp_drop(unp, ECONNABORTED);
219 		break;
220 
221 	case PRU_SENSE:
222 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
223 		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
224 			so2 = unp->unp_conn->unp_socket;
225 			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
226 		}
227 		((struct stat *) m)->st_dev = NODEV;
228 		if (unp->unp_ino == 0)
229 			unp->unp_ino = unp_ino++;
230 		((struct stat *) m)->st_ino = unp->unp_ino;
231 		return (0);
232 
233 	case PRU_RCVOOB:
234 		return (EOPNOTSUPP);
235 
236 	case PRU_SENDOOB:
237 		error = EOPNOTSUPP;
238 		break;
239 
240 	case PRU_SOCKADDR:
241 		if (unp->unp_addr) {
242 			nam->m_len = unp->unp_addr->m_len;
243 			bcopy(mtod(unp->unp_addr, caddr_t),
244 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
245 		} else
246 			nam->m_len = 0;
247 		break;
248 
249 	case PRU_PEERADDR:
250 		if (unp->unp_conn && unp->unp_conn->unp_addr) {
251 			nam->m_len = unp->unp_conn->unp_addr->m_len;
252 			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
253 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
254 		} else
255 			nam->m_len = 0;
256 		break;
257 
258 	case PRU_SLOWTIMO:
259 		break;
260 
261 	default:
262 		panic("piusrreq");
263 	}
264 release:
265 	if (control)
266 		m_freem(control);
267 	if (m)
268 		m_freem(m);
269 	return (error);
270 }
271 
272 /*
273  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
274  * for stream sockets, although the total for sender and receiver is
275  * actually only PIPSIZ.
276  * Datagram sockets really use the sendspace as the maximum datagram size,
277  * and don't really want to reserve the sendspace.  Their recvspace should
278  * be large enough for at least one max-size datagram plus address.
279  */
280 #define	PIPSIZ	4096
281 u_long	unpst_sendspace = PIPSIZ;
282 u_long	unpst_recvspace = PIPSIZ;
283 u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
284 u_long	unpdg_recvspace = 4*1024;
285 
286 int	unp_rights;			/* file descriptors in flight */
287 
288 unp_attach(so)
289 	struct socket *so;
290 {
291 	register struct mbuf *m;
292 	register struct unpcb *unp;
293 	int error;
294 
295 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
296 		switch (so->so_type) {
297 
298 		case SOCK_STREAM:
299 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
300 			break;
301 
302 		case SOCK_DGRAM:
303 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
304 			break;
305 		}
306 		if (error)
307 			return (error);
308 	}
309 	m = m_getclr(M_DONTWAIT, MT_PCB);
310 	if (m == NULL)
311 		return (ENOBUFS);
312 	unp = mtod(m, struct unpcb *);
313 	so->so_pcb = (caddr_t)unp;
314 	unp->unp_socket = so;
315 	return (0);
316 }
317 
318 unp_detach(unp)
319 	register struct unpcb *unp;
320 {
321 
322 	if (unp->unp_vnode) {
323 		unp->unp_vnode->v_socket = 0;
324 		vrele(unp->unp_vnode);
325 		unp->unp_vnode = 0;
326 	}
327 	if (unp->unp_conn)
328 		unp_disconnect(unp);
329 	while (unp->unp_refs)
330 		unp_drop(unp->unp_refs, ECONNRESET);
331 	soisdisconnected(unp->unp_socket);
332 	unp->unp_socket->so_pcb = 0;
333 	m_freem(unp->unp_addr);
334 	(void) m_free(dtom(unp));
335 	if (unp_rights)
336 		unp_gc();
337 }
338 
339 unp_bind(unp, nam)
340 	struct unpcb *unp;
341 	struct mbuf *nam;
342 {
343 	struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
344 	register struct vnode *vp;
345 	register struct nameidata *ndp = &u.u_nd;
346 	struct vattr vattr;
347 	int error;
348 
349 	ndp->ni_dirp = soun->sun_path;
350 	if (unp->unp_vnode != NULL)
351 		return (EINVAL);
352 	if (nam->m_len == MLEN) {
353 		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
354 			return (EINVAL);
355 	} else
356 		*(mtod(nam, caddr_t) + nam->m_len) = 0;
357 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
358 	ndp->ni_nameiop = CREATE | FOLLOW | LOCKPARENT;
359 	ndp->ni_segflg = UIO_SYSSPACE;
360 	if (error = namei(ndp))
361 		return (error);
362 	vp = ndp->ni_vp;
363 	if (vp != NULL) {
364 		VOP_ABORTOP(ndp);
365 		if (ndp->ni_dvp == vp)
366 			vrele(ndp->ni_dvp);
367 		else
368 			vput(ndp->ni_dvp);
369 		vrele(vp);
370 		return (EADDRINUSE);
371 	}
372 	VATTR_NULL(&vattr);
373 	vattr.va_type = VSOCK;
374 	vattr.va_mode = 0777;
375 	if (error = VOP_CREATE(ndp, &vattr))
376 		return (error);
377 	vp = ndp->ni_vp;
378 	vp->v_socket = unp->unp_socket;
379 	unp->unp_vnode = vp;
380 	unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
381 	VOP_UNLOCK(vp);
382 	return (0);
383 }
384 
385 unp_connect(so, nam)
386 	struct socket *so;
387 	struct mbuf *nam;
388 {
389 	register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
390 	register struct vnode *vp;
391 	register struct socket *so2, *so3;
392 	register struct nameidata *ndp = &u.u_nd;
393 	struct unpcb *unp2, *unp3;
394 	int error;
395 
396 	ndp->ni_dirp = soun->sun_path;
397 	if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) {	/* XXX */
398 		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
399 			return (EMSGSIZE);
400 	} else
401 		*(mtod(nam, caddr_t) + nam->m_len) = 0;
402 	ndp->ni_nameiop = LOOKUP | FOLLOW | LOCKLEAF;
403 	ndp->ni_segflg = UIO_SYSSPACE;
404 	if (error = namei(ndp))
405 		return (error);
406 	vp = ndp->ni_vp;
407 	if (vp->v_type != VSOCK) {
408 		error = ENOTSOCK;
409 		goto bad;
410 	}
411 	if (error = VOP_ACCESS(vp, VWRITE, ndp->ni_cred))
412 		goto bad;
413 	so2 = vp->v_socket;
414 	if (so2 == 0) {
415 		error = ECONNREFUSED;
416 		goto bad;
417 	}
418 	if (so->so_type != so2->so_type) {
419 		error = EPROTOTYPE;
420 		goto bad;
421 	}
422 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
423 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
424 		    (so3 = sonewconn(so2, 0)) == 0) {
425 			error = ECONNREFUSED;
426 			goto bad;
427 		}
428 		unp2 = sotounpcb(so2);
429 		unp3 = sotounpcb(so3);
430 		if (unp2->unp_addr)
431 			unp3->unp_addr =
432 				  m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
433 		so2 = so3;
434 	}
435 	error = unp_connect2(so, so2);
436 bad:
437 	vput(vp);
438 	return (error);
439 }
440 
441 unp_connect2(so, so2)
442 	register struct socket *so;
443 	register struct socket *so2;
444 {
445 	register struct unpcb *unp = sotounpcb(so);
446 	register struct unpcb *unp2;
447 
448 	if (so2->so_type != so->so_type)
449 		return (EPROTOTYPE);
450 	unp2 = sotounpcb(so2);
451 	unp->unp_conn = unp2;
452 	switch (so->so_type) {
453 
454 	case SOCK_DGRAM:
455 		unp->unp_nextref = unp2->unp_refs;
456 		unp2->unp_refs = unp;
457 		soisconnected(so);
458 		break;
459 
460 	case SOCK_STREAM:
461 		unp2->unp_conn = unp;
462 		soisconnected(so);
463 		soisconnected(so2);
464 		break;
465 
466 	default:
467 		panic("unp_connect2");
468 	}
469 	return (0);
470 }
471 
472 unp_disconnect(unp)
473 	struct unpcb *unp;
474 {
475 	register struct unpcb *unp2 = unp->unp_conn;
476 
477 	if (unp2 == 0)
478 		return;
479 	unp->unp_conn = 0;
480 	switch (unp->unp_socket->so_type) {
481 
482 	case SOCK_DGRAM:
483 		if (unp2->unp_refs == unp)
484 			unp2->unp_refs = unp->unp_nextref;
485 		else {
486 			unp2 = unp2->unp_refs;
487 			for (;;) {
488 				if (unp2 == 0)
489 					panic("unp_disconnect");
490 				if (unp2->unp_nextref == unp)
491 					break;
492 				unp2 = unp2->unp_nextref;
493 			}
494 			unp2->unp_nextref = unp->unp_nextref;
495 		}
496 		unp->unp_nextref = 0;
497 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
498 		break;
499 
500 	case SOCK_STREAM:
501 		soisdisconnected(unp->unp_socket);
502 		unp2->unp_conn = 0;
503 		soisdisconnected(unp2->unp_socket);
504 		break;
505 	}
506 }
507 
508 #ifdef notdef
509 unp_abort(unp)
510 	struct unpcb *unp;
511 {
512 
513 	unp_detach(unp);
514 }
515 #endif
516 
517 unp_shutdown(unp)
518 	struct unpcb *unp;
519 {
520 	struct socket *so;
521 
522 	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
523 	    (so = unp->unp_conn->unp_socket))
524 		socantrcvmore(so);
525 }
526 
527 unp_drop(unp, errno)
528 	struct unpcb *unp;
529 	int errno;
530 {
531 	struct socket *so = unp->unp_socket;
532 
533 	so->so_error = errno;
534 	unp_disconnect(unp);
535 	if (so->so_head) {
536 		so->so_pcb = (caddr_t) 0;
537 		m_freem(unp->unp_addr);
538 		(void) m_free(dtom(unp));
539 		sofree(so);
540 	}
541 }
542 
543 #ifdef notdef
544 unp_drain()
545 {
546 
547 }
548 #endif
549 
550 unp_externalize(rights)
551 	struct mbuf *rights;
552 {
553 	struct filedesc *fdp = u.u_procp->p_fd;		/* XXX */
554 	register int i;
555 	register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
556 	register struct file **rp = (struct file **)(cm + 1);
557 	register struct file *fp;
558 	int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
559 	int f;
560 
561 	if (newfds > ufavail(fdp)) {
562 		for (i = 0; i < newfds; i++) {
563 			fp = *rp;
564 			unp_discard(fp);
565 			*rp++ = 0;
566 		}
567 		return (EMSGSIZE);
568 	}
569 	for (i = 0; i < newfds; i++) {
570 		if (ufalloc(fdp, 0, &f))
571 			panic("unp_externalize");
572 		fp = *rp;
573 		OFILE(fdp, f) = fp;
574 		fp->f_msgcount--;
575 		unp_rights--;
576 		*(int *)rp++ = f;
577 	}
578 	return (0);
579 }
580 
581 unp_internalize(control)
582 	struct mbuf *control;
583 {
584 	struct filedesc *fdp = u.u_procp->p_fd;		/* XXX */
585 	register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
586 	register struct file **rp;
587 	register struct file *fp;
588 	register int i, fd;
589 	int oldfds;
590 
591 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
592 	    cm->cmsg_len != control->m_len)
593 		return (EINVAL);
594 	oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
595 	rp = (struct file **)(cm + 1);
596 	for (i = 0; i < oldfds; i++) {
597 		fd = *(int *)rp++;
598 		if ((unsigned)fd >= fdp->fd_maxfiles || OFILE(fdp, fd) == NULL)
599 			return (EBADF);
600 	}
601 	rp = (struct file **)(cm + 1);
602 	for (i = 0; i < oldfds; i++) {
603 		fp = OFILE(fdp, *(int *)rp);
604 		*rp++ = fp;
605 		fp->f_count++;
606 		fp->f_msgcount++;
607 		unp_rights++;
608 	}
609 	return (0);
610 }
611 
612 int	unp_defer, unp_gcing;
613 int	unp_mark();
614 extern	struct domain unixdomain;
615 
616 unp_gc()
617 {
618 	register struct file *fp;
619 	register struct socket *so;
620 
621 	if (unp_gcing)
622 		return;
623 	unp_gcing = 1;
624 restart:
625 	unp_defer = 0;
626 	for (fp = file; fp < fileNFILE; fp++)
627 		fp->f_flag &= ~(FMARK|FDEFER);
628 	do {
629 		for (fp = file; fp < fileNFILE; fp++) {
630 			if (fp->f_count == 0)
631 				continue;
632 			if (fp->f_flag & FDEFER) {
633 				fp->f_flag &= ~FDEFER;
634 				unp_defer--;
635 			} else {
636 				if (fp->f_flag & FMARK)
637 					continue;
638 				if (fp->f_count == fp->f_msgcount)
639 					continue;
640 				fp->f_flag |= FMARK;
641 			}
642 			if (fp->f_type != DTYPE_SOCKET ||
643 			    (so = (struct socket *)fp->f_data) == 0)
644 				continue;
645 			if (so->so_proto->pr_domain != &unixdomain ||
646 			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
647 				continue;
648 #ifdef notdef
649 			if (so->so_rcv.sb_flags & SB_LOCK) {
650 				/*
651 				 * This is problematical; it's not clear
652 				 * we need to wait for the sockbuf to be
653 				 * unlocked (on a uniprocessor, at least),
654 				 * and it's also not clear what to do
655 				 * if sbwait returns an error due to receipt
656 				 * of a signal.  If sbwait does return
657 				 * an error, we'll go into an infinite
658 				 * loop.  Delete all of this for now.
659 				 */
660 				(void) sbwait(&so->so_rcv);
661 				goto restart;
662 			}
663 #endif
664 			unp_scan(so->so_rcv.sb_mb, unp_mark);
665 		}
666 	} while (unp_defer);
667 	for (fp = file; fp < fileNFILE; fp++) {
668 		if (fp->f_count == 0)
669 			continue;
670 		if (fp->f_count == fp->f_msgcount && (fp->f_flag & FMARK) == 0)
671 			while (fp->f_msgcount)
672 				unp_discard(fp);
673 	}
674 	unp_gcing = 0;
675 }
676 
677 unp_dispose(m)
678 	struct mbuf *m;
679 {
680 	int unp_discard();
681 
682 	if (m)
683 		unp_scan(m, unp_discard);
684 }
685 
686 unp_scan(m0, op)
687 	register struct mbuf *m0;
688 	int (*op)();
689 {
690 	register struct mbuf *m;
691 	register struct file **rp;
692 	register struct cmsghdr *cm;
693 	register int i;
694 	int qfds;
695 
696 	while (m0) {
697 		for (m = m0; m; m = m->m_next)
698 			if (m->m_type == MT_CONTROL &&
699 			    m->m_len >= sizeof(*cm)) {
700 				cm = mtod(m, struct cmsghdr *);
701 				if (cm->cmsg_level != SOL_SOCKET ||
702 				    cm->cmsg_type != SCM_RIGHTS)
703 					continue;
704 				qfds = (cm->cmsg_len - sizeof *cm)
705 						/ sizeof (struct file *);
706 				rp = (struct file **)(cm + 1);
707 				for (i = 0; i < qfds; i++)
708 					(*op)(*rp++);
709 				break;		/* XXX, but saves time */
710 			}
711 		m0 = m0->m_act;
712 	}
713 }
714 
715 unp_mark(fp)
716 	struct file *fp;
717 {
718 
719 	if (fp->f_flag & FMARK)
720 		return;
721 	unp_defer++;
722 	fp->f_flag |= (FMARK|FDEFER);
723 }
724 
725 unp_discard(fp)
726 	struct file *fp;
727 {
728 
729 	fp->f_msgcount--;
730 	unp_rights--;
731 	(void) closef(fp);
732 }
733