xref: /netbsd-src/sys/compat/linux/common/linux_socket.c (revision bfb6cb13d599546df69c7e4d20d70e22e15a549d)
1 /*	$NetBSD: linux_socket.c,v 1.75 2007/06/06 17:08:27 rjs Exp $	*/
2 
3 /*-
4  * Copyright (c) 1995, 1998 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden and Eric Haszlakiewicz.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*
40  * Functions in multiarch:
41  *	linux_sys_socketcall		: linux_socketcall.c
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: linux_socket.c,v 1.75 2007/06/06 17:08:27 rjs Exp $");
46 
47 #if defined(_KERNEL_OPT)
48 #include "opt_ktrace.h"
49 #include "opt_inet.h"
50 #endif /* defined(_KERNEL_OPT) */
51 
52 #include <sys/param.h>
53 #include <sys/kernel.h>
54 #include <sys/systm.h>
55 #include <sys/buf.h>
56 #include <sys/malloc.h>
57 #include <sys/ioctl.h>
58 #include <sys/tty.h>
59 #include <sys/file.h>
60 #include <sys/filedesc.h>
61 #include <sys/select.h>
62 #include <sys/socket.h>
63 #include <sys/socketvar.h>
64 #include <sys/domain.h>
65 #include <net/if.h>
66 #include <net/if_dl.h>
67 #include <net/if_types.h>
68 #include <netinet/in.h>
69 #include <netinet/tcp.h>
70 #include <sys/mount.h>
71 #include <sys/proc.h>
72 #include <sys/vnode.h>
73 #include <sys/device.h>
74 #include <sys/protosw.h>
75 #include <sys/mbuf.h>
76 #include <sys/syslog.h>
77 #include <sys/exec.h>
78 #include <sys/kauth.h>
79 
80 #include <sys/syscallargs.h>
81 #ifdef KTRACE
82 #include <sys/ktrace.h>
83 #endif
84 
85 #include <lib/libkern/libkern.h>
86 
87 #ifdef INET6
88 #include <netinet/ip6.h>
89 #include <netinet6/ip6_var.h>
90 #endif
91 
92 #include <compat/sys/socket.h>
93 #include <compat/sys/sockio.h>
94 
95 #include <compat/linux/common/linux_types.h>
96 #include <compat/linux/common/linux_util.h>
97 #include <compat/linux/common/linux_signal.h>
98 #include <compat/linux/common/linux_ioctl.h>
99 #include <compat/linux/common/linux_socket.h>
100 #if !defined(__alpha__) && !defined(__amd64__)
101 #include <compat/linux/common/linux_socketcall.h>
102 #endif
103 #include <compat/linux/common/linux_sockio.h>
104 
105 #include <compat/linux/linux_syscallargs.h>
106 
107 #ifdef DEBUG_LINUX
108 #define DPRINTF(a) uprintf a
109 #else
110 #define DPRINTF(a)
111 #endif
112 
113 /*
114  * The calls in this file are entered either via the linux_socketcall()
115  * interface or, on the Alpha, as individual syscalls.  The
116  * linux_socketcall function does any massaging of arguments so that all
117  * the calls in here need not think that they are anything other
118  * than a normal syscall.
119  */
120 
121 static int linux_to_bsd_domain __P((int));
122 static int bsd_to_linux_domain __P((int));
123 int linux_to_bsd_sopt_level __P((int));
124 int linux_to_bsd_so_sockopt __P((int));
125 int linux_to_bsd_ip_sockopt __P((int));
126 int linux_to_bsd_tcp_sockopt __P((int));
127 int linux_to_bsd_udp_sockopt __P((int));
128 int linux_getifhwaddr __P((struct lwp *, register_t *, u_int, void *));
129 static int linux_get_sa(struct lwp *, int, struct mbuf **,
130 		const struct osockaddr *, int);
131 static int linux_sa_put __P((struct osockaddr *osa));
132 static int linux_to_bsd_msg_flags __P((int));
133 static int bsd_to_linux_msg_flags __P((int));
134 
135 static const int linux_to_bsd_domain_[LINUX_AF_MAX] = {
136 	AF_UNSPEC,
137 	AF_UNIX,
138 	AF_INET,
139 	AF_CCITT,	/* LINUX_AF_AX25 */
140 	AF_IPX,
141 	AF_APPLETALK,
142 	-1,		/* LINUX_AF_NETROM */
143 	-1,		/* LINUX_AF_BRIDGE */
144 	-1,		/* LINUX_AF_ATMPVC */
145 	AF_CCITT,	/* LINUX_AF_X25 */
146 	AF_INET6,
147 	-1,		/* LINUX_AF_ROSE */
148 	AF_DECnet,
149 	-1,		/* LINUX_AF_NETBEUI */
150 	-1,		/* LINUX_AF_SECURITY */
151 	pseudo_AF_KEY,
152 	AF_ROUTE,	/* LINUX_AF_NETLINK */
153 	-1,		/* LINUX_AF_PACKET */
154 	-1,		/* LINUX_AF_ASH */
155 	-1,		/* LINUX_AF_ECONET */
156 	-1,		/* LINUX_AF_ATMSVC */
157 	AF_SNA,
158 	/* rest up to LINUX_AF_MAX-1 is not allocated */
159 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
160 };
161 
162 static const int bsd_to_linux_domain_[AF_MAX] = {
163 	LINUX_AF_UNSPEC,
164 	LINUX_AF_UNIX,
165 	LINUX_AF_INET,
166 	-1,		/* AF_IMPLINK */
167 	-1,		/* AF_PUP */
168 	-1,		/* AF_CHAOS */
169 	-1,		/* AF_NS */
170 	-1,		/* AF_ISO */
171 	-1,		/* AF_ECMA */
172 	-1,		/* AF_DATAKIT */
173 	LINUX_AF_AX25,	/* AF_CCITT */
174 	LINUX_AF_SNA,
175 	LINUX_AF_DECnet,
176 	-1,		/* AF_DLI */
177 	-1,		/* AF_LAT */
178 	-1,		/* AF_HYLINK */
179 	LINUX_AF_APPLETALK,
180 	LINUX_AF_NETLINK,
181 	-1,		/* AF_LINK */
182 	-1,		/* AF_XTP */
183 	-1,		/* AF_COIP */
184 	-1,		/* AF_CNT */
185 	-1,		/* pseudo_AF_RTIP */
186 	LINUX_AF_IPX,
187 	LINUX_AF_INET6,
188 	-1,		/* pseudo_AF_PIP */
189 	-1,		/* AF_ISDN */
190 	-1,		/* AF_NATM */
191 	-1,		/* AF_ARP */
192 	LINUX_pseudo_AF_KEY,
193 	-1,		/* pseudo_AF_HDRCMPLT */
194 };
195 
196 static const struct {
197 	int bfl;
198 	int lfl;
199 } bsd_to_linux_msg_flags_[] = {
200 	{MSG_OOB,		LINUX_MSG_OOB},
201 	{MSG_PEEK,		LINUX_MSG_PEEK},
202 	{MSG_DONTROUTE,		LINUX_MSG_DONTROUTE},
203 	{MSG_EOR,		LINUX_MSG_EOR},
204 	{MSG_TRUNC,		LINUX_MSG_TRUNC},
205 	{MSG_CTRUNC,		LINUX_MSG_CTRUNC},
206 	{MSG_WAITALL,		LINUX_MSG_WAITALL},
207 	{MSG_DONTWAIT,		LINUX_MSG_DONTWAIT},
208 	{MSG_BCAST,		0},		/* not supported, clear */
209 	{MSG_MCAST,		0},		/* not supported, clear */
210 	{-1, /* not supp */	LINUX_MSG_PROBE},
211 	{-1, /* not supp */	LINUX_MSG_FIN},
212 	{-1, /* not supp */	LINUX_MSG_SYN},
213 	{-1, /* not supp */	LINUX_MSG_CONFIRM},
214 	{-1, /* not supp */	LINUX_MSG_RST},
215 	{-1, /* not supp */	LINUX_MSG_ERRQUEUE},
216 	{-1, /* not supp */	LINUX_MSG_NOSIGNAL},
217 	{-1, /* not supp */	LINUX_MSG_MORE},
218 };
219 
220 /*
221  * Convert between Linux and BSD socket domain values
222  */
223 static int
224 linux_to_bsd_domain(ldom)
225 	int ldom;
226 {
227 	if (ldom < 0 || ldom >= LINUX_AF_MAX)
228 		return (-1);
229 
230 	return linux_to_bsd_domain_[ldom];
231 }
232 
233 /*
234  * Convert between BSD and Linux socket domain values
235  */
236 static int
237 bsd_to_linux_domain(bdom)
238 	int bdom;
239 {
240 	if (bdom < 0 || bdom >= AF_MAX)
241 		return (-1);
242 
243 	return bsd_to_linux_domain_[bdom];
244 }
245 
246 static int
247 linux_to_bsd_msg_flags(lflag)
248 	int lflag;
249 {
250 	int i, lfl, bfl;
251 	int bflag = 0;
252 
253 	if (lflag == 0)
254 		return (0);
255 
256 	for(i = 0; i < __arraycount(bsd_to_linux_msg_flags_); i++) {
257 		bfl = bsd_to_linux_msg_flags_[i].bfl;
258 		lfl = bsd_to_linux_msg_flags_[i].lfl;
259 
260 		if (lfl == 0)
261 			continue;
262 
263 		if (lflag & lfl) {
264 			if (bfl < 0)
265 				return (-1);
266 
267 			bflag |= bfl;
268 		}
269 	}
270 
271 	return (bflag);
272 }
273 
274 static int
275 bsd_to_linux_msg_flags(bflag)
276 	int bflag;
277 {
278 	int i, lfl, bfl;
279 	int lflag = 0;
280 
281 	if (bflag == 0)
282 		return (0);
283 
284 	for(i = 0; i < __arraycount(bsd_to_linux_msg_flags_); i++) {
285 		bfl = bsd_to_linux_msg_flags_[i].bfl;
286 		lfl = bsd_to_linux_msg_flags_[i].lfl;
287 
288 		if (bfl <= 0)
289 			continue;
290 
291 		if (bflag & bfl) {
292 			if (lfl < 0)
293 				return (-1);
294 
295 			lflag |= lfl;
296 		}
297 	}
298 
299 	return (lflag);
300 }
301 
302 int
303 linux_sys_socket(l, v, retval)
304 	struct lwp *l;
305 	void *v;
306 	register_t *retval;
307 {
308 	struct linux_sys_socket_args /* {
309 		syscallarg(int)	domain;
310 		syscallarg(int)	type;
311 		syscallarg(int) protocol;
312 	} */ *uap = v;
313 	struct compat_30_sys_socket_args bsa;
314 	int error;
315 
316 	SCARG(&bsa, protocol) = SCARG(uap, protocol);
317 	SCARG(&bsa, type) = SCARG(uap, type);
318 	SCARG(&bsa, domain) = linux_to_bsd_domain(SCARG(uap, domain));
319 	if (SCARG(&bsa, domain) == -1)
320 		return EINVAL;
321 	error = sys___socket30(l, &bsa, retval);
322 
323 #ifdef INET6
324 	/*
325 	 * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by
326 	 * default and some apps depend on this. So, set V6ONLY to 0
327 	 * for Linux apps if the sysctl value is set to 1.
328 	 */
329 	if (!error && ip6_v6only && SCARG(&bsa, domain) == PF_INET6) {
330 		struct proc *p = l->l_proc;
331 		struct file *fp;
332 
333 		if (getsock(p->p_fd, *retval, &fp) == 0) {
334 			struct mbuf *m;
335 
336 			m = m_get(M_WAIT, MT_SOOPTS);
337 			m->m_len = sizeof(int);
338 			*mtod(m, int *) = 0;
339 
340 			/* ignore error */
341 			(void) sosetopt((struct socket *)fp->f_data,
342 				IPPROTO_IPV6, IPV6_V6ONLY, m);
343 
344 			FILE_UNUSE(fp, l);
345 		}
346 	}
347 #endif
348 
349 	return (error);
350 }
351 
352 int
353 linux_sys_socketpair(l, v, retval)
354 	struct lwp *l;
355 	void *v;
356 	register_t *retval;
357 {
358 	struct linux_sys_socketpair_args /* {
359 		syscallarg(int) domain;
360 		syscallarg(int) type;
361 		syscallarg(int) protocol;
362 		syscallarg(int *) rsv;
363 	} */ *uap = v;
364 	struct sys_socketpair_args bsa;
365 
366 	SCARG(&bsa, domain) = linux_to_bsd_domain(SCARG(uap, domain));
367 	if (SCARG(&bsa, domain) == -1)
368 		return EINVAL;
369 	SCARG(&bsa, type) = SCARG(uap, type);
370 	SCARG(&bsa, protocol) = SCARG(uap, protocol);
371 	SCARG(&bsa, rsv) = SCARG(uap, rsv);
372 
373 	return sys_socketpair(l, &bsa, retval);
374 }
375 
376 int
377 linux_sys_sendto(l, v, retval)
378 	struct lwp *l;
379 	void *v;
380 	register_t *retval;
381 {
382 	struct linux_sys_sendto_args /* {
383 		syscallarg(int)				s;
384 		syscallarg(void *)			msg;
385 		syscallarg(int)				len;
386 		syscallarg(int)				flags;
387 		syscallarg(struct osockaddr *)		to;
388 		syscallarg(int)				tolen;
389 	} */ *uap = v;
390 	struct msghdr   msg;
391 	struct iovec    aiov;
392 	struct mbuf *nam;
393 	int bflags;
394 	int error;
395 
396 	/* Translate message flags.  */
397 	bflags = linux_to_bsd_msg_flags(SCARG(uap, flags));
398 	if (bflags < 0)
399 		/* Some supported flag */
400 		return EINVAL;
401 
402 	/* Read in and convert the sockaddr */
403 	error = linux_get_sa(l, SCARG(uap, s), &nam, SCARG(uap, to),
404 	    SCARG(uap, tolen));
405 	if (error)
406 		return (error);
407 	msg.msg_flags = MSG_NAMEMBUF;
408 
409 	msg.msg_name = nam;
410 	msg.msg_namelen = SCARG(uap, tolen);
411 	msg.msg_iov = &aiov;
412 	msg.msg_iovlen = 1;
413 	msg.msg_control = 0;
414 	aiov.iov_base = __UNCONST(SCARG(uap, msg));
415 	aiov.iov_len = SCARG(uap, len);
416 
417 	return do_sys_sendmsg(l, SCARG(uap, s), &msg, bflags, retval);
418 }
419 
420 int
421 linux_sys_sendmsg(l, v, retval)
422 	struct lwp *l;
423 	void *v;
424 	register_t *retval;
425 {
426 	struct linux_sys_sendmsg_args /* {
427 		syscallarg(int) s;
428 		syscallarg(struct msghdr *) msg;
429 		syscallarg(u_int) flags;
430 	} */ *uap = v;
431 	struct msghdr	msg;
432 	int		error;
433 	int		bflags;
434 	struct mbuf     *nam;
435 	u_int8_t	*control;
436 	struct mbuf     *ctl_mbuf = NULL;
437 
438 	msg.msg_flags = MSG_IOVUSRSPACE;
439 
440 	/*
441 	 * Translate message flags.
442 	 */
443 	bflags = linux_to_bsd_msg_flags(SCARG(uap, flags));
444 	if (bflags < 0)
445 		/* Some supported flag */
446 		return EINVAL;
447 
448 	if (msg.msg_name) {
449 		/* Read in and convert the sockaddr */
450 		error = linux_get_sa(l, SCARG(uap, s), &nam, msg.msg_name,
451 		    msg.msg_namelen);
452 		if (error)
453 			return (error);
454 		msg.msg_flags |= MSG_NAMEMBUF;
455 		msg.msg_name = nam;
456 	}
457 
458 	/*
459 	 * Handle cmsg if there is any.
460 	 */
461 	if (CMSG_FIRSTHDR(&msg)) {
462 		struct cmsghdr cmsg, *cc;
463 		ssize_t resid = msg.msg_controllen;
464 		size_t clen, cidx = 0, cspace;
465 
466 		ctl_mbuf = m_get(M_WAIT, MT_CONTROL);
467 		clen = MLEN;
468 		control = mtod(ctl_mbuf, void *);
469 
470 		cc = CMSG_FIRSTHDR(&msg);
471 		do {
472 			error = copyin(cc, &cmsg, sizeof(cmsg));
473 			if (error)
474 				goto done;
475 
476 			/*
477 			 * Sanity check the control message length.
478 			 */
479 			if (cmsg.cmsg_len > resid
480 			    || cmsg.cmsg_len < sizeof(struct cmsghdr)) {
481 				error = EINVAL;
482 				goto done;
483 			}
484 
485 			/*
486 			 * Refuse unsupported control messages, and
487 			 * translate fields as appropriate.
488 			 */
489 			switch (cmsg.cmsg_level) {
490 			case LINUX_SOL_SOCKET:
491 				/* It only differs on some archs */
492 				if (LINUX_SOL_SOCKET != SOL_SOCKET)
493 					cmsg.cmsg_level = SOL_SOCKET;
494 
495 				switch(cmsg.cmsg_type) {
496 				case LINUX_SCM_RIGHTS:
497 					/* Linux SCM_RIGHTS is same as NetBSD */
498 					break;
499 
500 				default:
501 					/* other types not supported */
502 					error = EINVAL;
503 					goto done;
504 				}
505 				break;
506 			default:
507 				/* pray and leave intact */
508 				break;
509 			}
510 
511 			cspace = CMSG_SPACE(cmsg.cmsg_len - sizeof(cmsg));
512 
513 			/* Check the buffer is big enough */
514 			if (__predict_false(cidx + cspace > clen)) {
515 				u_int8_t *nc;
516 
517 				clen = cidx + cspace;
518 				if (clen >= PAGE_SIZE) {
519 					error = EINVAL;
520 					goto done;
521 				}
522 				nc = realloc(clen <= MLEN ? NULL : control,
523 						clen, M_TEMP, M_WAITOK);
524 				if (!nc) {
525 					error = ENOMEM;
526 					goto done;
527 				}
528 				if (cidx <= MLEN)
529 					/* Old buffer was in mbuf... */
530 					memcpy(nc, control, cidx);
531 				control = nc;
532 			}
533 
534 			/* Copy header */
535 			memcpy(&control[cidx], &cmsg, sizeof(cmsg));
536 
537 			/* Zero are between header and data */
538 			memset(&control[cidx+sizeof(cmsg)], 0,
539 				CMSG_ALIGN(sizeof(cmsg)) - sizeof(cmsg));
540 
541 			/* Copyin the data */
542 			error = copyin(LINUX_CMSG_DATA(cc),
543 				CMSG_DATA(control),
544 				cmsg.cmsg_len - sizeof(cmsg));
545 			if (error)
546 				goto done;
547 
548 			resid -= cspace;
549 			cidx += cspace;
550 		} while ((cc = LINUX_CMSG_NXTHDR(&msg, cc)) && resid > 0);
551 
552 		/* If we allocated a buffer, attach to mbuf */
553 		if (cidx > MLEN) {
554 			MEXTADD(ctl_mbuf, control, clen, M_MBUF, NULL, NULL);
555 			ctl_mbuf->m_flags |= M_EXT_RW;
556 		}
557 		control = NULL;
558 		ctl_mbuf->m_len = cidx;
559 
560 		msg.msg_control = ctl_mbuf;
561 		msg.msg_flags |= MSG_CONTROLMBUF;
562 	}
563 
564 	error = do_sys_sendmsg(l, SCARG(uap, s), &msg, bflags, retval);
565 	/* Freed internally */
566 	ctl_mbuf = NULL;
567 
568 done:
569 	if (ctl_mbuf != NULL) {
570 		if (control != NULL && control != mtod(ctl_mbuf, void *))
571 			free(control, M_MBUF);
572 		m_free(ctl_mbuf);
573 	}
574 	return (error);
575 }
576 
577 int
578 linux_sys_recvfrom(l, v, retval)
579 	struct lwp *l;
580 	void *v;
581 	register_t *retval;
582 {
583 	struct linux_sys_recvfrom_args /* {
584 		syscallarg(int) s;
585 		syscallarg(void *) buf;
586 		syscallarg(int) len;
587 		syscallarg(int) flags;
588 		syscallarg(struct osockaddr *) from;
589 		syscallarg(int *) fromlenaddr;
590 	} */ *uap = v;
591 	int		error;
592 	struct sys_recvfrom_args bra;
593 
594 	SCARG(&bra, s) = SCARG(uap, s);
595 	SCARG(&bra, buf) = SCARG(uap, buf);
596 	SCARG(&bra, len) = SCARG(uap, len);
597 	SCARG(&bra, flags) = SCARG(uap, flags);
598 	SCARG(&bra, from) = (struct sockaddr *) SCARG(uap, from);
599 	SCARG(&bra, fromlenaddr) = (socklen_t *)SCARG(uap, fromlenaddr);
600 
601 	if ((error = sys_recvfrom(l, &bra, retval)))
602 		return (error);
603 
604 	if (SCARG(uap, from) && (error = linux_sa_put(SCARG(uap, from))))
605 		return (error);
606 
607 	return (0);
608 }
609 
610 int
611 linux_sys_recvmsg(l, v, retval)
612 	struct lwp *l;
613 	void *v;
614 	register_t *retval;
615 {
616 	struct linux_sys_recvmsg_args /* {
617 		syscallarg(int) s;
618 		syscallarg(struct msghdr *) msg;
619 		syscallarg(u_int) flags;
620 	} */ *uap = v;
621 	struct msghdr	msg;
622 	int		error;
623 	struct sys_recvmsg_args bsa;
624 	int lflags;
625 	u_int8_t *ocontrol = NULL; /* XXX: gcc */
626 	socklen_t ocontrollen = 0;
627 
628 	/*
629 	 * Data alignment is different on some architectures. If control
630 	 * message is expected, we must arrange for the control message
631 	 * to be initially put elsewhere, and copy to target place
632 	 * with Linux alignment.
633 	 */
634 	if (LINUX_CMSG_ALIGNDIFF) {
635 		error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
636 		if (error)
637 			return (error);
638 
639 		if (CMSG_FIRSTHDR(&msg)) {
640 			void *sg;
641 
642 			/* Need to fit within stackgap */
643 			if (msg.msg_controllen > STACKGAPLEN/2) {
644 				/* Sorry guys! */
645 				return (EINVAL);
646 			}
647 
648 			sg = stackgap_init(l->l_proc, STACKGAPLEN/3);
649 
650 			ocontrol = msg.msg_control;
651 			ocontrollen = msg.msg_controllen;
652 
653 			/* space for at least one message's worth align */
654 			msg.msg_controllen += CMSG_ALIGN(1);
655 
656 			msg.msg_control = stackgap_alloc(l->l_proc, &sg,
657 				msg.msg_controllen);
658 			if (!msg.msg_control)
659 				return (ENOMEM);
660 
661 			/*
662 			 * Okay to overwrite the original structure, it's
663 			 * supposed to be writable.
664 			 */
665 			error = copyout(&msg, SCARG(uap, msg), sizeof(msg));
666 			if (error)
667 				return (error);
668 		}
669 	}
670 
671 	SCARG(&bsa, s) = SCARG(uap, s);
672 	SCARG(&bsa, msg) = SCARG(uap, msg);
673 	SCARG(&bsa, flags) = linux_to_bsd_msg_flags(SCARG(uap, flags));
674 
675 	if (SCARG(&bsa, flags) < 0) {
676 		/* Some unsupported flag */
677 		return (EINVAL);
678 	}
679 
680 	if ((error = sys_recvmsg(l, &bsa, retval)))
681 		goto done;
682 
683 	/* Fixup sockaddr */
684 	error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
685 	if (error)
686 		goto done;
687 
688 	if (msg.msg_name && msg.msg_namelen > 2) {
689 		if ((error = linux_sa_put(msg.msg_name)))
690 			goto done;
691 	}
692 
693 	/* Fixup msg flags */
694 	lflags = bsd_to_linux_msg_flags(msg.msg_flags);
695 	if (lflags < 0) {
696 		/* Some flag unsupported by Linux */
697 		error = EINVAL;
698 		goto done;
699 	}
700 	error = copyout(&lflags, (u_int8_t *) SCARG(uap, msg) +
701 			offsetof(struct msghdr, msg_flags), sizeof(lflags));
702 	if (error)
703 		goto done;
704 
705 	/*
706 	 * Fixup cmsg. We handle two things:
707 	 * 1. different values for level/type on some archs
708 	 * 2. different alignment of CMSG_DATA on some archs
709 	 */
710 	if (CMSG_FIRSTHDR(&msg)) {
711 		struct cmsghdr cmsg, *cc;
712 		int changed = 0;
713 		size_t resid = ocontrollen;
714 
715 		cc = CMSG_FIRSTHDR(&msg);
716 		do {
717 			error = copyin(cc, &cmsg, sizeof(cmsg));
718 			if (error)
719 				goto done;
720 
721 			switch (cmsg.cmsg_level) {
722 			case SOL_SOCKET:
723 				if (SOL_SOCKET != LINUX_SOL_SOCKET) {
724 					cmsg.cmsg_level = LINUX_SOL_SOCKET;
725 					changed = 1;
726 				}
727 
728 				switch (cmsg.cmsg_type) {
729 				case SCM_RIGHTS:
730 					/* Linux SCM_RIGHTS is same as NetBSD */
731 					break;
732 
733 				default:
734 					/* other types not supported */
735 					error = EINVAL;
736 					goto done;
737 				}
738 			default:
739 				/* pray and leave intact */
740 				break;
741 			}
742 
743 			if (LINUX_CMSG_ALIGNDIFF) {
744 				int i;
745 				u_int8_t d, *sd, *td;
746 
747 				/*
748 				 * Sanity check.
749 				 */
750 				if (cmsg.cmsg_len > resid
751 				    || cmsg.cmsg_len < sizeof(cmsg)) {
752 					error = EINVAL;
753 					goto done;
754 				}
755 
756 				/*
757 				 * Need to copy the cmsg from scratch area
758 				 * to the original place, converting data
759 				 * alignment from NetBSD to Linux one.
760 				 */
761 				error = copyout(&cmsg, ocontrol, sizeof(cmsg));
762 				if (error)
763 					goto done;
764 				/* zero pad */
765 #if 0
766 				for(i=0; i < LINUX_CMSG_ALIGN(sizeof(cmsg)) - sizeof(cmsg); i++) {
767 					copyout("",&ocontrol[sizeof(cmsg)+i],1);
768 				}
769 #endif
770 
771 				sd = CMSG_DATA(cc);
772 				td = LINUX_CMSG_DATA(ocontrol);
773 
774 				/* This is not particularily effective, but ..*/
775 				d = '\0';
776 				for(i=0; i < cmsg.cmsg_len - sizeof(cmsg); i++){
777 					copyin(sd++, &d, 1);
778 					copyout(&d, td++, 1);
779 				}
780 
781 				resid -= (td - ocontrol);
782 				ocontrol = td;
783 			} else if (changed) {
784 				/* Update cmsghdr in-place */
785 				error = copyout(&cmsg, cc, sizeof(cmsg));
786 				if (error)
787 					goto done;
788 				changed = 0;
789 			}
790 		} while((cc = CMSG_NXTHDR(&msg, cc)));
791 	}
792 
793 done:
794 	return (error);
795 }
796 
797 /*
798  * Convert socket option level from Linux to NetBSD value. Only SOL_SOCKET
799  * is different, the rest matches IPPROTO_* on both systems.
800  */
801 int
802 linux_to_bsd_sopt_level(llevel)
803 	int llevel;
804 {
805 
806 	switch (llevel) {
807 	case LINUX_SOL_SOCKET:
808 		return SOL_SOCKET;
809 	case LINUX_SOL_IP:
810 		return IPPROTO_IP;
811 	case LINUX_SOL_TCP:
812 		return IPPROTO_TCP;
813 	case LINUX_SOL_UDP:
814 		return IPPROTO_UDP;
815 	default:
816 		return -1;
817 	}
818 }
819 
820 /*
821  * Convert Linux socket level socket option numbers to NetBSD values.
822  */
823 int
824 linux_to_bsd_so_sockopt(lopt)
825 	int lopt;
826 {
827 
828 	switch (lopt) {
829 	case LINUX_SO_DEBUG:
830 		return SO_DEBUG;
831 	case LINUX_SO_REUSEADDR:
832 		/*
833 		 * Linux does not implement SO_REUSEPORT, but allows reuse of a
834 		 * host:port pair through SO_REUSEADDR even if the address is not a
835 		 * multicast-address.  Effectively, this means that we should use
836 		 * SO_REUSEPORT to allow Linux applications to not exit with
837 		 * EADDRINUSE
838 		 */
839 		return SO_REUSEPORT;
840 	case LINUX_SO_TYPE:
841 		return SO_TYPE;
842 	case LINUX_SO_ERROR:
843 		return SO_ERROR;
844 	case LINUX_SO_DONTROUTE:
845 		return SO_DONTROUTE;
846 	case LINUX_SO_BROADCAST:
847 		return SO_BROADCAST;
848 	case LINUX_SO_SNDBUF:
849 		return SO_SNDBUF;
850 	case LINUX_SO_RCVBUF:
851 		return SO_RCVBUF;
852 	case LINUX_SO_KEEPALIVE:
853 		return SO_KEEPALIVE;
854 	case LINUX_SO_OOBINLINE:
855 		return SO_OOBINLINE;
856 	case LINUX_SO_LINGER:
857 		return SO_LINGER;
858 	case LINUX_SO_PRIORITY:
859 	case LINUX_SO_NO_CHECK:
860 	default:
861 		return -1;
862 	}
863 }
864 
865 /*
866  * Convert Linux IP level socket option number to NetBSD values.
867  */
868 int
869 linux_to_bsd_ip_sockopt(lopt)
870 	int lopt;
871 {
872 
873 	switch (lopt) {
874 	case LINUX_IP_TOS:
875 		return IP_TOS;
876 	case LINUX_IP_TTL:
877 		return IP_TTL;
878 	case LINUX_IP_MULTICAST_TTL:
879 		return IP_MULTICAST_TTL;
880 	case LINUX_IP_MULTICAST_LOOP:
881 		return IP_MULTICAST_LOOP;
882 	case LINUX_IP_MULTICAST_IF:
883 		return IP_MULTICAST_IF;
884 	case LINUX_IP_ADD_MEMBERSHIP:
885 		return IP_ADD_MEMBERSHIP;
886 	case LINUX_IP_DROP_MEMBERSHIP:
887 		return IP_DROP_MEMBERSHIP;
888 	default:
889 		return -1;
890 	}
891 }
892 
893 /*
894  * Convert Linux TCP level socket option number to NetBSD values.
895  */
896 int
897 linux_to_bsd_tcp_sockopt(lopt)
898 	int lopt;
899 {
900 
901 	switch (lopt) {
902 	case LINUX_TCP_NODELAY:
903 		return TCP_NODELAY;
904 	case LINUX_TCP_MAXSEG:
905 		return TCP_MAXSEG;
906 	default:
907 		return -1;
908 	}
909 }
910 
911 /*
912  * Convert Linux UDP level socket option number to NetBSD values.
913  */
914 int
915 linux_to_bsd_udp_sockopt(lopt)
916 	int lopt;
917 {
918 
919 	switch (lopt) {
920 	default:
921 		return -1;
922 	}
923 }
924 
925 /*
926  * Another reasonably straightforward function: setsockopt(2).
927  * The level and option numbers are converted; the values passed
928  * are not (yet) converted, the ones currently implemented don't
929  * need conversion, as they are the same on both systems.
930  */
931 int
932 linux_sys_setsockopt(l, v, retval)
933 	struct lwp *l;
934 	void *v;
935 	register_t *retval;
936 {
937 	struct linux_sys_setsockopt_args /* {
938 		syscallarg(int) s;
939 		syscallarg(int) level;
940 		syscallarg(int) optname;
941 		syscallarg(void *) optval;
942 		syscallarg(int) optlen;
943 	} */ *uap = v;
944 	struct proc *p = l->l_proc;
945 	struct sys_setsockopt_args bsa;
946 	int name;
947 
948 	SCARG(&bsa, s) = SCARG(uap, s);
949 	SCARG(&bsa, level) = linux_to_bsd_sopt_level(SCARG(uap, level));
950 	SCARG(&bsa, val) = SCARG(uap, optval);
951 	SCARG(&bsa, valsize) = SCARG(uap, optlen);
952 
953 	/*
954 	 * Linux supports only SOL_SOCKET for AF_LOCAL domain sockets
955 	 * and returns EOPNOTSUPP for other levels
956 	 */
957 	if (SCARG(&bsa, level) != SOL_SOCKET) {
958 		struct file *fp;
959 		struct socket *so;
960 		int error, s, family;
961 
962 		/* getsock() will use the descriptor for us */
963 	    	if ((error = getsock(p->p_fd, SCARG(&bsa, s), &fp)) != 0)
964 		    	return error;
965 
966 		s = splsoftnet();
967 		so = (struct socket *)fp->f_data;
968 		family = so->so_proto->pr_domain->dom_family;
969 		splx(s);
970 		FILE_UNUSE(fp, l);
971 
972 		if (family == AF_LOCAL)
973 			return EOPNOTSUPP;
974 	}
975 
976 	switch (SCARG(&bsa, level)) {
977 	case SOL_SOCKET:
978 		name = linux_to_bsd_so_sockopt(SCARG(uap, optname));
979 		break;
980 	case IPPROTO_IP:
981 		name = linux_to_bsd_ip_sockopt(SCARG(uap, optname));
982 		break;
983 	case IPPROTO_TCP:
984 		name = linux_to_bsd_tcp_sockopt(SCARG(uap, optname));
985 		break;
986 	case IPPROTO_UDP:
987 		name = linux_to_bsd_udp_sockopt(SCARG(uap, optname));
988 		break;
989 	default:
990 		return EINVAL;
991 	}
992 
993 	if (name == -1)
994 		return EINVAL;
995 	SCARG(&bsa, name) = name;
996 
997 	return sys_setsockopt(l, &bsa, retval);
998 }
999 
1000 /*
1001  * getsockopt(2) is very much the same as setsockopt(2) (see above)
1002  */
1003 int
1004 linux_sys_getsockopt(l, v, retval)
1005 	struct lwp *l;
1006 	void *v;
1007 	register_t *retval;
1008 {
1009 	struct linux_sys_getsockopt_args /* {
1010 		syscallarg(int) s;
1011 		syscallarg(int) level;
1012 		syscallarg(int) optname;
1013 		syscallarg(void *) optval;
1014 		syscallarg(int *) optlen;
1015 	} */ *uap = v;
1016 	struct sys_getsockopt_args bga;
1017 	int name;
1018 
1019 	SCARG(&bga, s) = SCARG(uap, s);
1020 	SCARG(&bga, level) = linux_to_bsd_sopt_level(SCARG(uap, level));
1021 	SCARG(&bga, val) = SCARG(uap, optval);
1022 	SCARG(&bga, avalsize) = (socklen_t *)SCARG(uap, optlen);
1023 
1024 	switch (SCARG(&bga, level)) {
1025 	case SOL_SOCKET:
1026 		name = linux_to_bsd_so_sockopt(SCARG(uap, optname));
1027 		break;
1028 	case IPPROTO_IP:
1029 		name = linux_to_bsd_ip_sockopt(SCARG(uap, optname));
1030 		break;
1031 	case IPPROTO_TCP:
1032 		name = linux_to_bsd_tcp_sockopt(SCARG(uap, optname));
1033 		break;
1034 	case IPPROTO_UDP:
1035 		name = linux_to_bsd_udp_sockopt(SCARG(uap, optname));
1036 		break;
1037 	default:
1038 		return EINVAL;
1039 	}
1040 
1041 	if (name == -1)
1042 		return EINVAL;
1043 	SCARG(&bga, name) = name;
1044 
1045 	return sys_getsockopt(l, &bga, retval);
1046 }
1047 
1048 #define IF_NAME_LEN 16
1049 
1050 int
1051 linux_getifhwaddr(struct lwp *l, register_t *retval, u_int fd,
1052     void *data)
1053 {
1054 	/* Not the full structure, just enough to map what we do here */
1055 	struct linux_ifreq {
1056 		char if_name[IF_NAME_LEN];
1057 		struct osockaddr hwaddr;
1058 	} lreq;
1059 	struct proc *p = l->l_proc;
1060 	struct filedesc *fdp;
1061 	struct file *fp;
1062 	struct ifaddr *ifa;
1063 	struct ifnet *ifp;
1064 	struct sockaddr_dl *sadl;
1065 	int error, found;
1066 	int index, ifnum;
1067 
1068 	/*
1069 	 * We can't emulate this ioctl by calling sys_ioctl() to run
1070 	 * SIOCGIFCONF, because the user buffer is not of the right
1071 	 * type to take those results.  We can't use kernel buffers to
1072 	 * receive the results, as the implementation of sys_ioctl()
1073 	 * and ifconf() [which implements SIOCGIFCONF] use
1074 	 * copyin()/copyout() which will fail on kernel addresses.
1075 	 *
1076 	 * So, we must duplicate code from sys_ioctl() and ifconf().  Ugh.
1077 	 */
1078 
1079 	fdp = p->p_fd;
1080 	if ((fp = fd_getfile(fdp, fd)) == NULL)
1081 		return (EBADF);
1082 
1083 	FILE_USE(fp);
1084 	if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
1085 		error = EBADF;
1086 		goto out;
1087 	}
1088 
1089 	error = copyin(data, &lreq, sizeof(lreq));
1090 	if (error)
1091 		goto out;
1092 	lreq.if_name[IF_NAME_LEN-1] = '\0';		/* just in case */
1093 
1094 	/*
1095 	 * Try real interface name first, then fake "ethX"
1096 	 */
1097 	for (ifp = ifnet.tqh_first, found = 0;
1098 	     ifp != 0 && !found;
1099 	     ifp = ifp->if_list.tqe_next) {
1100 		if (strcmp(lreq.if_name, ifp->if_xname))
1101 			/* not this interface */
1102 			continue;
1103 		found=1;
1104 		if ((ifa = ifp->if_addrlist.tqh_first) != 0) {
1105 			for (; ifa != 0; ifa = ifa->ifa_list.tqe_next) {
1106 				sadl = (struct sockaddr_dl *)ifa->ifa_addr;
1107 				/* only return ethernet addresses */
1108 				/* XXX what about FDDI, etc. ? */
1109 				if (sadl->sdl_family != AF_LINK ||
1110 				    sadl->sdl_type != IFT_ETHER)
1111 					continue;
1112 				memcpy(&lreq.hwaddr.sa_data, LLADDR(sadl),
1113 				       MIN(sadl->sdl_alen,
1114 					   sizeof(lreq.hwaddr.sa_data)));
1115 				lreq.hwaddr.sa_family =
1116 					sadl->sdl_family;
1117 				error = copyout(&lreq, data, sizeof(lreq));
1118 				goto out;
1119 			}
1120 		} else {
1121 			error = ENODEV;
1122 			goto out;
1123 		}
1124 	}
1125 
1126 	if (strncmp(lreq.if_name, "eth", 3) == 0) {
1127 		for (ifnum = 0, index = 3;
1128 		     lreq.if_name[index] != '\0' && index < IF_NAME_LEN;
1129 		     index++) {
1130 			ifnum *= 10;
1131 			ifnum += lreq.if_name[index] - '0';
1132 		}
1133 
1134 		error = EINVAL;			/* in case we don't find one */
1135 		for (ifp = ifnet.tqh_first, found = 0;
1136 		     ifp != 0 && !found;
1137 		     ifp = ifp->if_list.tqe_next) {
1138 			memcpy(lreq.if_name, ifp->if_xname,
1139 			       MIN(IF_NAME_LEN, IFNAMSIZ));
1140 			if ((ifa = ifp->if_addrlist.tqh_first) == 0)
1141 				/* no addresses on this interface */
1142 				continue;
1143 			else
1144 				for (; ifa != 0; ifa = ifa->ifa_list.tqe_next) {
1145 					sadl = (struct sockaddr_dl *)ifa->ifa_addr;
1146 					/* only return ethernet addresses */
1147 					/* XXX what about FDDI, etc. ? */
1148 					if (sadl->sdl_family != AF_LINK ||
1149 					    sadl->sdl_type != IFT_ETHER)
1150 						continue;
1151 					if (ifnum--)
1152 						/* not the reqested iface */
1153 						continue;
1154 					memcpy(&lreq.hwaddr.sa_data,
1155 					       LLADDR(sadl),
1156 					       MIN(sadl->sdl_alen,
1157 						   sizeof(lreq.hwaddr.sa_data)));
1158 					lreq.hwaddr.sa_family =
1159 						sadl->sdl_family;
1160 					error = copyout(&lreq, data, sizeof(lreq));
1161 					found = 1;
1162 					break;
1163 				}
1164 		}
1165 	} else {
1166 		/* unknown interface, not even an "eth*" name */
1167 		error = ENODEV;
1168 	}
1169 
1170 out:
1171 	FILE_UNUSE(fp, l);
1172 	return error;
1173 }
1174 #undef IF_NAME_LEN
1175 
1176 int
1177 linux_ioctl_socket(l, uap, retval)
1178 	struct lwp *l;
1179 	struct linux_sys_ioctl_args /* {
1180 		syscallarg(int) fd;
1181 		syscallarg(u_long) com;
1182 		syscallarg(void *) data;
1183 	} */ *uap;
1184 	register_t *retval;
1185 {
1186 	struct proc *p = l->l_proc;
1187 	u_long com;
1188 	int error = 0, isdev = 0, dosys = 1;
1189 	struct sys_ioctl_args ia;
1190 	struct file *fp;
1191 	struct filedesc *fdp;
1192 	struct vnode *vp;
1193 	int (*ioctlf)(struct file *, u_long, void *, struct lwp *);
1194 	struct ioctl_pt pt;
1195 
1196 	fdp = p->p_fd;
1197 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1198 		return (EBADF);
1199 
1200 	FILE_USE(fp);
1201 
1202 	if (fp->f_type == DTYPE_VNODE) {
1203 		vp = (struct vnode *)fp->f_data;
1204 		isdev = vp->v_type == VCHR;
1205 	}
1206 
1207 	/*
1208 	 * Don't try to interpret socket ioctl calls that are done
1209 	 * on a device filedescriptor, just pass them through, to
1210 	 * emulate Linux behaviour. Use PTIOCLINUX so that the
1211 	 * device will only handle these if it's prepared to do
1212 	 * so, to avoid unexpected things from happening.
1213 	 */
1214 	if (isdev) {
1215 		dosys = 0;
1216 		ioctlf = fp->f_ops->fo_ioctl;
1217 		pt.com = SCARG(uap, com);
1218 		pt.data = SCARG(uap, data);
1219 		error = ioctlf(fp, PTIOCLINUX, (void *)&pt, l);
1220 		/*
1221 		 * XXX hack: if the function returns EJUSTRETURN,
1222 		 * it has stuffed a sysctl return value in pt.data.
1223 		 */
1224 		if (error == EJUSTRETURN) {
1225 			retval[0] = (register_t)pt.data;
1226 			error = 0;
1227 		}
1228 		goto out;
1229 	}
1230 
1231 	com = SCARG(uap, com);
1232 	retval[0] = 0;
1233 
1234 	switch (com) {
1235 	case LINUX_SIOCGIFCONF:
1236 		SCARG(&ia, com) = OOSIOCGIFCONF;
1237 		break;
1238 	case LINUX_SIOCGIFFLAGS:
1239 		SCARG(&ia, com) = OSIOCGIFFLAGS;
1240 		break;
1241 	case LINUX_SIOCSIFFLAGS:
1242 		SCARG(&ia, com) = OSIOCSIFFLAGS;
1243 		break;
1244 	case LINUX_SIOCGIFADDR:
1245 		SCARG(&ia, com) = OOSIOCGIFADDR;
1246 		break;
1247 	case LINUX_SIOCGIFDSTADDR:
1248 		SCARG(&ia, com) = OOSIOCGIFDSTADDR;
1249 		break;
1250 	case LINUX_SIOCGIFBRDADDR:
1251 		SCARG(&ia, com) = OOSIOCGIFBRDADDR;
1252 		break;
1253 	case LINUX_SIOCGIFNETMASK:
1254 		SCARG(&ia, com) = OOSIOCGIFNETMASK;
1255 		break;
1256 	case LINUX_SIOCADDMULTI:
1257 		SCARG(&ia, com) = OSIOCADDMULTI;
1258 		break;
1259 	case LINUX_SIOCDELMULTI:
1260 		SCARG(&ia, com) = OSIOCDELMULTI;
1261 		break;
1262 	case LINUX_SIOCGIFHWADDR:
1263 		error = linux_getifhwaddr(l, retval, SCARG(uap, fd),
1264 					 SCARG(uap, data));
1265 		dosys = 0;
1266 		break;
1267 	default:
1268 		error = EINVAL;
1269 	}
1270 
1271 out:
1272 	FILE_UNUSE(fp, l);
1273 
1274 	if (error ==0 && dosys) {
1275 		SCARG(&ia, fd) = SCARG(uap, fd);
1276 		SCARG(&ia, data) = SCARG(uap, data);
1277 		/* XXX NJWLWP */
1278 		error = sys_ioctl(curlwp, &ia, retval);
1279 	}
1280 
1281 	return error;
1282 }
1283 
1284 int
1285 linux_sys_connect(l, v, retval)
1286 	struct lwp *l;
1287 	void *v;
1288 	register_t *retval;
1289 {
1290 	struct linux_sys_connect_args /* {
1291 		syscallarg(int) s;
1292 		syscallarg(const struct sockaddr *) name;
1293 		syscallarg(int) namelen;
1294 	} */ *uap = v;
1295 	int		error;
1296 	struct mbuf *nam;
1297 
1298 	error = linux_get_sa(l, SCARG(uap, s), &nam, SCARG(uap, name),
1299 	    SCARG(uap, namelen));
1300 	if (error)
1301 		return (error);
1302 
1303 	error = do_sys_connect(l, SCARG(uap, s), nam);
1304 
1305 	if (error == EISCONN) {
1306 		struct file *fp;
1307 		struct socket *so;
1308 		int s, state, prflags;
1309 
1310 		/* getsock() will use the descriptor for us */
1311 	    	if (getsock(l->l_proc->p_fd, SCARG(uap, s), &fp) != 0)
1312 		    	return EISCONN;
1313 
1314 		s = splsoftnet();
1315 		so = (struct socket *)fp->f_data;
1316 		state = so->so_state;
1317 		prflags = so->so_proto->pr_flags;
1318 		splx(s);
1319 		FILE_UNUSE(fp, l);
1320 		/*
1321 		 * We should only let this call succeed once per
1322 		 * non-blocking connect; however we don't have
1323 		 * a convenient place to keep that state..
1324 		 */
1325 		if ((state & SS_NBIO) && (state & SS_ISCONNECTED) &&
1326 		    (prflags & PR_CONNREQUIRED))
1327 			return 0;
1328 	}
1329 
1330 	return (error);
1331 }
1332 
1333 int
1334 linux_sys_bind(l, v, retval)
1335 	struct lwp *l;
1336 	void *v;
1337 	register_t *retval;
1338 {
1339 	struct linux_sys_bind_args /* {
1340 		syscallarg(int) s;
1341 		syscallarg(const struct osockaddr *) name;
1342 		syscallarg(int) namelen;
1343 	} */ *uap = v;
1344 	int		error;
1345 	struct mbuf     *nam;
1346 
1347 	error = linux_get_sa(l, SCARG(uap, s), &nam, SCARG(uap, name),
1348 	    SCARG(uap, namelen));
1349 	if (error)
1350 		return (error);
1351 
1352 	return do_sys_bind(l, SCARG(uap, s), nam);
1353 }
1354 
1355 int
1356 linux_sys_getsockname(l, v, retval)
1357 	struct lwp *l;
1358 	void *v;
1359 	register_t *retval;
1360 {
1361 	struct linux_sys_getsockname_args /* {
1362 		syscallarg(int) fdes;
1363 		syscallarg(void *) asa;
1364 		syscallarg(int *) alen;
1365 	} */ *uap = v;
1366 	int error;
1367 
1368 	if ((error = sys_getsockname(l, uap, retval)) != 0)
1369 		return (error);
1370 
1371 	if ((error = linux_sa_put((struct osockaddr *)SCARG(uap, asa))))
1372 		return (error);
1373 
1374 	return (0);
1375 }
1376 
1377 int
1378 linux_sys_getpeername(l, v, retval)
1379 	struct lwp *l;
1380 	void *v;
1381 	register_t *retval;
1382 {
1383 	struct sys_getpeername_args /* {
1384 		syscallarg(int) fdes;
1385 		syscallarg(void *) asa;
1386 		syscallarg(int *) alen;
1387 	} */ *uap = v;
1388 	int error;
1389 
1390 	if ((error = sys_getpeername(l, uap, retval)) != 0)
1391 		return (error);
1392 
1393 	if ((error = linux_sa_put((struct osockaddr *)SCARG(uap, asa))))
1394 		return (error);
1395 
1396 	return (0);
1397 }
1398 
1399 /*
1400  * Copy the osockaddr structure pointed to by osa to mbuf, adjust
1401  * family and convert to sockaddr.
1402  */
1403 static int
1404 linux_get_sa(struct lwp *l, int s, struct mbuf **mp, const struct osockaddr *osa, int salen)
1405 {
1406 	int error, bdom;
1407 	struct sockaddr *sa;
1408 	struct osockaddr *kosa;
1409 	struct mbuf *m;
1410 
1411 	if (salen == 1 || salen > UCHAR_MAX) {
1412 		DPRINTF(("bad osa=%p salen=%d\n", osa, salen));
1413 		return EINVAL;
1414 	}
1415 
1416 	/* We'll need the address in an mbuf later, so copy into one here */
1417 	m = m_get(M_WAIT, MT_SONAME);
1418 	if (salen > MLEN)
1419 		MEXTMALLOC(m, salen, M_WAITOK);
1420 
1421 	m->m_len = salen;
1422 
1423 	if (salen == 0)
1424 		return 0;
1425 
1426 	kosa = mtod(m, void *);
1427 	if ((error = copyin(osa, kosa, salen))) {
1428 		DPRINTF(("error %d copying osa %p len %d\n",
1429 				error, osa, salen));
1430 		goto bad;
1431 	}
1432 #ifdef KTRACE
1433 	if (KTRPOINT(l->l_proc, KTR_USER))
1434 		ktrkuser(l, "linux sockaddr", kosa, salen);
1435 #endif
1436 
1437 	bdom = linux_to_bsd_domain(kosa->sa_family);
1438 	if (bdom == -1) {
1439 		DPRINTF(("bad linux family=%d\n", kosa->sa_family));
1440 		error = EINVAL;
1441 		goto bad;
1442 	}
1443 
1444 	/*
1445 	 * If the family is unspecified, use address family of the socket.
1446 	 * This avoid triggering strict family checks in netinet/in_pcb.c et.al.
1447 	 */
1448 	if (bdom == AF_UNSPEC) {
1449 		struct file *fp;
1450 		struct socket *so;
1451 
1452 		/* getsock() will use the descriptor for us */
1453 		if ((error = getsock(l->l_proc->p_fd, s, &fp)) != 0)
1454 			goto bad;
1455 
1456 		so = (struct socket *)fp->f_data;
1457 		bdom = so->so_proto->pr_domain->dom_family;
1458 
1459 		FILE_UNUSE(fp, l);
1460 
1461 		DPRINTF(("AF_UNSPEC family adjusted to %d\n", bdom));
1462 	}
1463 
1464 #ifdef INET6
1465 	/*
1466 	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
1467 	 * which lacks the scope id compared with RFC2553 one. If we detect
1468 	 * the situation, reject the address and write a message to system log.
1469 	 *
1470 	 * Still accept addresses for which the scope id is not used.
1471 	 */
1472 	if (bdom == AF_INET6 && salen == sizeof (struct sockaddr_in6) - sizeof (u_int32_t)) {
1473 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)kosa;
1474 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) &&
1475 		    (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) ||
1476 		     IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) ||
1477 		     IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) ||
1478 		     IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
1479 		     IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
1480 			struct proc *p = l->l_proc;
1481 			int uid = l->l_cred ? kauth_cred_geteuid(l->l_cred) : -1;
1482 
1483 			log(LOG_DEBUG,
1484 			    "pid %d (%s), uid %d: obsolete pre-RFC2553 "
1485 			    "sockaddr_in6 rejected",
1486 			    p->p_pid, p->p_comm, uid);
1487 			error = EINVAL;
1488 			goto bad;
1489 		}
1490 		salen = sizeof (struct sockaddr_in6);
1491 		sin6->sin6_scope_id = 0;
1492 	}
1493 #endif
1494 
1495 	if (bdom == AF_INET)
1496 		salen = sizeof(struct sockaddr_in);
1497 
1498 	sa = (struct sockaddr *) kosa;
1499 	sa->sa_family = bdom;
1500 	sa->sa_len = salen;
1501 	m->m_len = salen;
1502 #ifdef KTRACE
1503 	if (KTRPOINT(l->l_proc, KTR_USER))
1504 		ktrkuser(l, "new sockaddr", kosa, salen);
1505 #endif
1506 
1507 #ifdef DEBUG_LINUX
1508 	DPRINTF(("family %d, len = %d [ ", sa->sa_family, sa->sa_len));
1509 	for (bdom = 0; bdom < sizeof(sa->sa_data); bdom++)
1510 	    DPRINTF(("%02x ", (unsigned char) sa->sa_data[bdom]));
1511 	DPRINTF(("\n"));
1512 #endif
1513 
1514 	*mp = m;
1515 	return 0;
1516 
1517     bad:
1518 	m_free(m);
1519 	return error;
1520 }
1521 
1522 static int
1523 linux_sa_put(osa)
1524 	struct osockaddr *osa;
1525 {
1526 	struct sockaddr sa;
1527 	struct osockaddr *kosa;
1528 	int error, bdom, len;
1529 
1530 	/*
1531 	 * Only read/write the sockaddr family and length part, the rest is
1532 	 * not changed.
1533 	 */
1534 	len = sizeof(sa.sa_len) + sizeof(sa.sa_family);
1535 
1536 	error = copyin(osa, &sa, len);
1537 	if (error)
1538 		return (error);
1539 
1540 	bdom = bsd_to_linux_domain(sa.sa_family);
1541 	if (bdom == -1)
1542 		return (EINVAL);
1543 
1544 	/* Note: we convert from sockaddr to osockaddr here, too */
1545 	kosa = (struct osockaddr *) &sa;
1546 	kosa->sa_family = bdom;
1547 	error = copyout(kosa, osa, len);
1548 	if (error)
1549 		return (error);
1550 
1551 	return (0);
1552 }
1553 
1554 #ifndef __amd64__
1555 int
1556 linux_sys_recv(l, v, retval)
1557 	struct lwp *l;
1558 	void *v;
1559 	register_t *retval;
1560 {
1561 	struct linux_sys_recv_args /* {
1562 		syscallarg(int) s;
1563 		syscallarg(void *) buf;
1564 		syscallarg(int) len;
1565 		syscallarg(int) flags;
1566 	} */ *uap = v;
1567 	struct sys_recvfrom_args bra;
1568 
1569 
1570 	SCARG(&bra, s) = SCARG(uap, s);
1571 	SCARG(&bra, buf) = SCARG(uap, buf);
1572 	SCARG(&bra, len) = (size_t) SCARG(uap, len);
1573 	SCARG(&bra, flags) = SCARG(uap, flags);
1574 	SCARG(&bra, from) = NULL;
1575 	SCARG(&bra, fromlenaddr) = NULL;
1576 
1577 	return (sys_recvfrom(l, &bra, retval));
1578 }
1579 
1580 int
1581 linux_sys_send(l, v, retval)
1582 	struct lwp *l;
1583 	void *v;
1584 	register_t *retval;
1585 {
1586 	struct linux_sys_send_args /* {
1587 		syscallarg(int) s;
1588 		syscallarg(void *) buf;
1589 		syscallarg(int) len;
1590 		syscallarg(int) flags;
1591 	} */ *uap = v;
1592 	struct sys_sendto_args bsa;
1593 
1594 	SCARG(&bsa, s)		= SCARG(uap, s);
1595 	SCARG(&bsa, buf)	= SCARG(uap, buf);
1596 	SCARG(&bsa, len)	= SCARG(uap, len);
1597 	SCARG(&bsa, flags)	= SCARG(uap, flags);
1598 	SCARG(&bsa, to)		= NULL;
1599 	SCARG(&bsa, tolen)	= 0;
1600 
1601 	return (sys_sendto(l, &bsa, retval));
1602 }
1603 #endif
1604 
1605 int
1606 linux_sys_accept(l, v, retval)
1607 	struct lwp *l;
1608 	void *v;
1609 	register_t *retval;
1610 {
1611 	struct linux_sys_accept_args /* {
1612 		syscallarg(int) s;
1613 		syscallarg(struct osockaddr *) name;
1614 		syscallarg(int *) anamelen;
1615 	} */ *uap = v;
1616 	int error;
1617 	struct sys_accept_args baa;
1618 
1619 	SCARG(&baa, s)		= SCARG(uap, s);
1620 	SCARG(&baa, name)	= (struct sockaddr *) SCARG(uap, name);
1621 	SCARG(&baa, anamelen)	= (unsigned int *) SCARG(uap, anamelen);
1622 
1623 	if ((error = sys_accept(l, &baa, retval)))
1624 		return (error);
1625 
1626 	if (SCARG(uap, name) && (error = linux_sa_put(SCARG(uap, name))))
1627 		return (error);
1628 
1629 	return (0);
1630 }
1631