xref: /netbsd-src/sys/compat/linux/common/linux_socket.c (revision 001c68bd94f75ce9270b69227c4199fbf34ee396)
1 /*	$NetBSD: linux_socket.c,v 1.46 2003/06/29 22:29:31 fvdl Exp $	*/
2 
3 /*-
4  * Copyright (c) 1995, 1998 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden and Eric Haszlakiewicz.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*
40  * Functions in multiarch:
41  *	linux_sys_socketcall		: linux_socketcall.c
42  *
43  * XXX Note: Linux CMSG_ALIGN() uses (sizeof(long)-1). For architectures
44  * where our CMSG_ALIGN() differs (like powerpc, sparc, sparc64), the passed
45  * control structure would need to be adjusted accordingly in sendmsg() and
46  * recvmsg().
47  */
48 
49 #include <sys/cdefs.h>
50 __KERNEL_RCSID(0, "$NetBSD: linux_socket.c,v 1.46 2003/06/29 22:29:31 fvdl Exp $");
51 
52 #if defined(_KERNEL_OPT)
53 #include "opt_inet.h"
54 #endif
55 
56 #include <sys/param.h>
57 #include <sys/kernel.h>
58 #include <sys/systm.h>
59 #include <sys/buf.h>
60 #include <sys/malloc.h>
61 #include <sys/ioctl.h>
62 #include <sys/tty.h>
63 #include <sys/file.h>
64 #include <sys/filedesc.h>
65 #include <sys/select.h>
66 #include <sys/socket.h>
67 #include <sys/socketvar.h>
68 #include <net/if.h>
69 #include <net/if_dl.h>
70 #include <net/if_types.h>
71 #include <netinet/in.h>
72 #include <netinet/tcp.h>
73 #include <sys/mount.h>
74 #include <sys/proc.h>
75 #include <sys/vnode.h>
76 #include <sys/device.h>
77 #include <sys/protosw.h>
78 #include <sys/mbuf.h>
79 #include <sys/syslog.h>
80 
81 #include <sys/sa.h>
82 #include <sys/syscallargs.h>
83 
84 #ifdef INET6
85 #include <netinet/ip6.h>
86 #include <netinet6/ip6_var.h>
87 #endif
88 
89 #include <compat/linux/common/linux_types.h>
90 #include <compat/linux/common/linux_util.h>
91 #include <compat/linux/common/linux_signal.h>
92 #include <compat/linux/common/linux_ioctl.h>
93 #include <compat/linux/common/linux_socket.h>
94 #include <compat/linux/common/linux_socketcall.h>
95 #include <compat/linux/common/linux_sockio.h>
96 
97 #include <compat/linux/linux_syscallargs.h>
98 
99 #ifdef DEBUG_LINUX
100 #define DPRINTF(a) uprintf a
101 #else
102 #define DPRINTF(a)
103 #endif
104 
105 /*
106  * The calls in this file are entered either via the linux_socketcall()
107  * interface or, on the Alpha, as individual syscalls.  The
108  * linux_socketcall function does any massaging of arguments so that all
109  * the calls in here need not think that they are anything other
110  * than a normal syscall.
111  */
112 
113 static int linux_to_bsd_domain __P((int));
114 static int bsd_to_linux_domain __P((int));
115 int linux_to_bsd_sopt_level __P((int));
116 int linux_to_bsd_so_sockopt __P((int));
117 int linux_to_bsd_ip_sockopt __P((int));
118 int linux_to_bsd_tcp_sockopt __P((int));
119 int linux_to_bsd_udp_sockopt __P((int));
120 int linux_getifhwaddr __P((struct proc *, register_t *, u_int, void *));
121 static int linux_sa_get __P((struct proc *, caddr_t *sgp, struct sockaddr **sap,
122 		const struct osockaddr *osa, int *osalen));
123 static int linux_sa_put __P((struct osockaddr *osa));
124 
125 static const int linux_to_bsd_domain_[LINUX_AF_MAX] = {
126 	AF_UNSPEC,
127 	AF_UNIX,
128 	AF_INET,
129 	AF_CCITT,	/* LINUX_AF_AX25 */
130 	AF_IPX,
131 	AF_APPLETALK,
132 	-1,		/* LINUX_AF_NETROM */
133 	-1,		/* LINUX_AF_BRIDGE */
134 	-1,		/* LINUX_AF_ATMPVC */
135 	AF_CCITT,	/* LINUX_AF_X25 */
136 	AF_INET6,
137 	-1,		/* LINUX_AF_ROSE */
138 	AF_DECnet,
139 	-1,		/* LINUX_AF_NETBEUI */
140 	-1,		/* LINUX_AF_SECURITY */
141 	pseudo_AF_KEY,
142 	AF_ROUTE,	/* LINUX_AF_NETLINK */
143 	-1,		/* LINUX_AF_PACKET */
144 	-1,		/* LINUX_AF_ASH */
145 	-1,		/* LINUX_AF_ECONET */
146 	-1,		/* LINUX_AF_ATMSVC */
147 	AF_SNA,
148 	/* rest up to LINUX_AF_MAX-1 is not allocated */
149 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
150 };
151 
152 static const int bsd_to_linux_domain_[AF_MAX] = {
153 	LINUX_AF_UNSPEC,
154 	LINUX_AF_UNIX,
155 	LINUX_AF_INET,
156 	-1,		/* AF_IMPLINK */
157 	-1,		/* AF_PUP */
158 	-1,		/* AF_CHAOS */
159 	-1,		/* AF_NS */
160 	-1,		/* AF_ISO */
161 	-1,		/* AF_ECMA */
162 	-1,		/* AF_DATAKIT */
163 	LINUX_AF_AX25,	/* AF_CCITT */
164 	LINUX_AF_SNA,
165 	LINUX_AF_DECnet,
166 	-1,		/* AF_DLI */
167 	-1,		/* AF_LAT */
168 	-1,		/* AF_HYLINK */
169 	LINUX_AF_APPLETALK,
170 	LINUX_AF_NETLINK,
171 	-1,		/* AF_LINK */
172 	-1,		/* AF_XTP */
173 	-1,		/* AF_COIP */
174 	-1,		/* AF_CNT */
175 	-1,		/* pseudo_AF_RTIP */
176 	LINUX_AF_IPX,
177 	LINUX_AF_INET6,
178 	-1,		/* pseudo_AF_PIP */
179 	-1,		/* AF_ISDN */
180 	-1,		/* AF_NATM */
181 	-1,		/* AF_ARP */
182 	LINUX_pseudo_AF_KEY,
183 	-1,		/* pseudo_AF_HDRCMPLT */
184 };
185 
186 /*
187  * Convert between Linux and BSD socket domain values
188  */
189 static int
190 linux_to_bsd_domain(ldom)
191 	int ldom;
192 {
193 	if (ldom < 0 || ldom >= LINUX_AF_MAX)
194 		return (-1);
195 
196 	return linux_to_bsd_domain_[ldom];
197 }
198 
199 /*
200  * Convert between BSD and Linux socket domain values
201  */
202 static int
203 bsd_to_linux_domain(bdom)
204 	int bdom;
205 {
206 	if (bdom < 0 || bdom >= AF_MAX)
207 		return (-1);
208 
209 	return bsd_to_linux_domain_[bdom];
210 }
211 
212 int
213 linux_sys_socket(l, v, retval)
214 	struct lwp *l;
215 	void *v;
216 	register_t *retval;
217 {
218 	struct linux_sys_socket_args /* {
219 		syscallarg(int)	domain;
220 		syscallarg(int)	type;
221 		syscallarg(int) protocol;
222 	} */ *uap = v;
223 	struct sys_socket_args bsa;
224 	int error;
225 
226 	SCARG(&bsa, protocol) = SCARG(uap, protocol);
227 	SCARG(&bsa, type) = SCARG(uap, type);
228 	SCARG(&bsa, domain) = linux_to_bsd_domain(SCARG(uap, domain));
229 	if (SCARG(&bsa, domain) == -1)
230 		return EINVAL;
231 	error = sys_socket(l, &bsa, retval);
232 
233 #ifdef INET6
234 	/*
235 	 * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by
236 	 * default and some apps depend on this. So, set V6ONLY to 0
237 	 * for Linux apps if the sysctl value is set to 1.
238 	 */
239 	if (!error && ip6_v6only && SCARG(&bsa, domain) == PF_INET6) {
240 		struct proc *p = l->l_proc;
241 		struct file *fp;
242 
243 		if (getsock(p->p_fd, *retval, &fp) == 0) {
244 			struct mbuf *m;
245 
246 			m = m_get(M_WAIT, MT_SOOPTS);
247 			m->m_len = sizeof(int);
248 			*mtod(m, int *) = 0;
249 
250 			/* ignore error */
251 			(void) sosetopt((struct socket *)fp->f_data,
252 				IPPROTO_IPV6, IPV6_V6ONLY, m);
253 
254 			FILE_UNUSE(fp, p);
255 		}
256 	}
257 #endif
258 
259 	return (error);
260 }
261 
262 int
263 linux_sys_socketpair(l, v, retval)
264 	struct lwp *l;
265 	void *v;
266 	register_t *retval;
267 {
268 	struct linux_sys_socketpair_args /* {
269 		syscallarg(int) domain;
270 		syscallarg(int) type;
271 		syscallarg(int) protocol;
272 		syscallarg(int *) rsv;
273 	} */ *uap = v;
274 	struct sys_socketpair_args bsa;
275 
276 	SCARG(&bsa, domain) = linux_to_bsd_domain(SCARG(uap, domain));
277 	if (SCARG(&bsa, domain) == -1)
278 		return EINVAL;
279 	SCARG(&bsa, type) = SCARG(uap, type);
280 	SCARG(&bsa, protocol) = SCARG(uap, protocol);
281 	SCARG(&bsa, rsv) = SCARG(uap, rsv);
282 
283 	return sys_socketpair(l, &bsa, retval);
284 }
285 
286 int
287 linux_sys_sendto(l, v, retval)
288 	struct lwp *l;
289 	void *v;
290 	register_t *retval;
291 {
292 	struct linux_sys_sendto_args /* {
293 		syscallarg(int)				s;
294 		syscallarg(void *)			msg;
295 		syscallarg(int)				len;
296 		syscallarg(int)				flags;
297 		syscallarg(struct osockaddr *)		to;
298 		syscallarg(int)				tolen;
299 	} */ *uap = v;
300 	struct proc *p = l->l_proc;
301 	struct sys_sendto_args bsa;
302 	int tolen;
303 
304 	SCARG(&bsa, s) = SCARG(uap, s);
305 	SCARG(&bsa, buf) = SCARG(uap, msg);
306 	SCARG(&bsa, len) = (size_t) SCARG(uap, len);
307 	SCARG(&bsa, flags) = SCARG(uap, flags);
308 	tolen = SCARG(uap, tolen);
309 	if (SCARG(uap, to)) {
310 		struct sockaddr *sa;
311 		int error;
312 		caddr_t sg = stackgap_init(p, 0);
313 
314 		if ((error = linux_sa_get(p, &sg, &sa, SCARG(uap, to), &tolen)))
315 			return (error);
316 
317 		SCARG(&bsa, to) = sa;
318 	} else
319 		SCARG(&bsa, to) = NULL;
320 	SCARG(&bsa, tolen) = tolen;
321 
322 	return (sys_sendto(l, &bsa, retval));
323 }
324 
325 int
326 linux_sys_sendmsg(l, v, retval)
327 	struct lwp *l;
328 	void *v;
329 	register_t *retval;
330 {
331 	struct linux_sys_sendmsg_args /* {
332 		syscallarg(int) s;
333 		syscallarg(struct msghdr *) msg;
334 		syscallarg(u_int) flags;
335 	} */ *uap = v;
336 	struct proc *p = l->l_proc;
337 	struct msghdr	msg;
338 	int		error;
339 	struct sys_sendmsg_args bsa;
340 	struct msghdr *nmsg = NULL;
341 
342 	error = copyin(SCARG(uap, msg), (caddr_t)&msg, sizeof(msg));
343 	if (error)
344 		return (error);
345 
346 	if (msg.msg_name) {
347 		struct sockaddr *sa;
348 		caddr_t sg = stackgap_init(p, 0);
349 
350 		nmsg = (struct msghdr *) stackgap_alloc(p, &sg,
351 		    sizeof(struct msghdr));
352 		if (!nmsg)
353 			return (ENOMEM);
354 
355 		error = linux_sa_get(p, &sg, &sa,
356 		    (struct osockaddr *) msg.msg_name, &msg.msg_namelen);
357 		if (error)
358 			return (error);
359 
360 		msg.msg_name = (struct sockaddr *) sa;
361 		if ((error = copyout(&msg, nmsg, sizeof(struct msghdr))))
362 			return (error);
363 	}
364 
365 	/*
366 	 * XXX handle different alignment of cmsg data on architectures where
367 	 * the Linux alignment is different (powerpc, sparc, sparc64).
368 	 */
369 
370 	SCARG(&bsa, s) = SCARG(uap, s);
371 	SCARG(&bsa, msg) = nmsg;
372 	SCARG(&bsa, flags) = SCARG(uap, flags);
373 
374 	if ((error = sys_sendmsg(l, &bsa, retval)))
375 		return (error);
376 
377 	return (0);
378 }
379 
380 
381 int
382 linux_sys_recvfrom(l, v, retval)
383 	struct lwp *l;
384 	void *v;
385 	register_t *retval;
386 {
387 	struct linux_sys_recvfrom_args /* {
388 		syscallarg(int) s;
389 		syscallarg(void *) buf;
390 		syscallarg(int) len;
391 		syscallarg(int) flags;
392 		syscallarg(struct osockaddr *) from;
393 		syscallarg(int *) fromlenaddr;
394 	} */ *uap = v;
395 	int		error;
396 	struct sys_recvfrom_args bra;
397 
398 	SCARG(&bra, s) = SCARG(uap, s);
399 	SCARG(&bra, buf) = SCARG(uap, buf);
400 	SCARG(&bra, len) = SCARG(uap, len);
401 	SCARG(&bra, flags) = SCARG(uap, flags);
402 	SCARG(&bra, from) = (struct sockaddr *) SCARG(uap, from);
403 	SCARG(&bra, fromlenaddr) = SCARG(uap, fromlenaddr);
404 
405 	if ((error = sys_recvfrom(l, &bra, retval)))
406 		return (error);
407 
408 	if (SCARG(uap, from) && (error = linux_sa_put(SCARG(uap, from))))
409 		return (error);
410 
411 	return (0);
412 }
413 
414 int
415 linux_sys_recvmsg(l, v, retval)
416 	struct lwp *l;
417 	void *v;
418 	register_t *retval;
419 {
420 	struct linux_sys_recvmsg_args /* {
421 		syscallarg(int) s;
422 		syscallarg(struct msghdr *) msg;
423 		syscallarg(u_int) flags;
424 	} */ *uap = v;
425 	struct msghdr	msg;
426 	int		error;
427 
428 	if ((error = sys_recvmsg(l, v, retval)))
429 		return (error);
430 
431 	error = copyin((caddr_t)SCARG(uap, msg), (caddr_t)&msg,
432 		       sizeof(msg));
433 
434 	if (!error && msg.msg_name && msg.msg_namelen > 2)
435 		error = linux_sa_put(msg.msg_name);
436 
437 	/*
438 	 * XXX handle different alignment of cmsg data on architectures where
439 	 * the Linux alignment is different (powerpc, sparc, sparc64).
440 	 */
441 
442 	return (error);
443 }
444 
445 /*
446  * Convert socket option level from Linux to NetBSD value. Only SOL_SOCKET
447  * is different, the rest matches IPPROTO_* on both systems.
448  */
449 int
450 linux_to_bsd_sopt_level(llevel)
451 	int llevel;
452 {
453 
454 	switch (llevel) {
455 	case LINUX_SOL_SOCKET:
456 		return SOL_SOCKET;
457 	case LINUX_SOL_IP:
458 		return IPPROTO_IP;
459 	case LINUX_SOL_TCP:
460 		return IPPROTO_TCP;
461 	case LINUX_SOL_UDP:
462 		return IPPROTO_UDP;
463 	default:
464 		return -1;
465 	}
466 }
467 
468 /*
469  * Convert Linux socket level socket option numbers to NetBSD values.
470  */
471 int
472 linux_to_bsd_so_sockopt(lopt)
473 	int lopt;
474 {
475 
476 	switch (lopt) {
477 	case LINUX_SO_DEBUG:
478 		return SO_DEBUG;
479 	case LINUX_SO_REUSEADDR:
480 		/*
481 		 * Linux does not implement SO_REUSEPORT, but allows reuse of a
482 		 * host:port pair through SO_REUSEADDR even if the address is not a
483 		 * multicast-address.  Effectively, this means that we should use
484 		 * SO_REUSEPORT to allow Linux applications to not exit with
485 		 * EADDRINUSE
486 		 */
487 		return SO_REUSEPORT;
488 	case LINUX_SO_TYPE:
489 		return SO_TYPE;
490 	case LINUX_SO_ERROR:
491 		return SO_ERROR;
492 	case LINUX_SO_DONTROUTE:
493 		return SO_DONTROUTE;
494 	case LINUX_SO_BROADCAST:
495 		return SO_BROADCAST;
496 	case LINUX_SO_SNDBUF:
497 		return SO_SNDBUF;
498 	case LINUX_SO_RCVBUF:
499 		return SO_RCVBUF;
500 	case LINUX_SO_KEEPALIVE:
501 		return SO_KEEPALIVE;
502 	case LINUX_SO_OOBINLINE:
503 		return SO_OOBINLINE;
504 	case LINUX_SO_LINGER:
505 		return SO_LINGER;
506 	case LINUX_SO_PRIORITY:
507 	case LINUX_SO_NO_CHECK:
508 	default:
509 		return -1;
510 	}
511 }
512 
513 /*
514  * Convert Linux IP level socket option number to NetBSD values.
515  */
516 int
517 linux_to_bsd_ip_sockopt(lopt)
518 	int lopt;
519 {
520 
521 	switch (lopt) {
522 	case LINUX_IP_TOS:
523 		return IP_TOS;
524 	case LINUX_IP_TTL:
525 		return IP_TTL;
526 	case LINUX_IP_MULTICAST_TTL:
527 		return IP_MULTICAST_TTL;
528 	case LINUX_IP_MULTICAST_LOOP:
529 		return IP_MULTICAST_LOOP;
530 	case LINUX_IP_MULTICAST_IF:
531 		return IP_MULTICAST_IF;
532 	case LINUX_IP_ADD_MEMBERSHIP:
533 		return IP_ADD_MEMBERSHIP;
534 	case LINUX_IP_DROP_MEMBERSHIP:
535 		return IP_DROP_MEMBERSHIP;
536 	default:
537 		return -1;
538 	}
539 }
540 
541 /*
542  * Convert Linux TCP level socket option number to NetBSD values.
543  */
544 int
545 linux_to_bsd_tcp_sockopt(lopt)
546 	int lopt;
547 {
548 
549 	switch (lopt) {
550 	case LINUX_TCP_NODELAY:
551 		return TCP_NODELAY;
552 	case LINUX_TCP_MAXSEG:
553 		return TCP_MAXSEG;
554 	default:
555 		return -1;
556 	}
557 }
558 
559 /*
560  * Convert Linux UDP level socket option number to NetBSD values.
561  */
562 int
563 linux_to_bsd_udp_sockopt(lopt)
564 	int lopt;
565 {
566 
567 	switch (lopt) {
568 	default:
569 		return -1;
570 	}
571 }
572 
573 /*
574  * Another reasonably straightforward function: setsockopt(2).
575  * The level and option numbers are converted; the values passed
576  * are not (yet) converted, the ones currently implemented don't
577  * need conversion, as they are the same on both systems.
578  */
579 int
580 linux_sys_setsockopt(l, v, retval)
581 	struct lwp *l;
582 	void *v;
583 	register_t *retval;
584 {
585 	struct linux_sys_setsockopt_args /* {
586 		syscallarg(int) s;
587 		syscallarg(int) level;
588 		syscallarg(int) optname;
589 		syscallarg(void *) optval;
590 		syscallarg(int) optlen;
591 	} */ *uap = v;
592 	struct sys_setsockopt_args bsa;
593 	int name;
594 
595 	SCARG(&bsa, s) = SCARG(uap, s);
596 	SCARG(&bsa, level) = linux_to_bsd_sopt_level(SCARG(uap, level));
597 	SCARG(&bsa, val) = SCARG(uap, optval);
598 	SCARG(&bsa, valsize) = SCARG(uap, optlen);
599 
600 	switch (SCARG(&bsa, level)) {
601 	case SOL_SOCKET:
602 		name = linux_to_bsd_so_sockopt(SCARG(uap, optname));
603 		break;
604 	case IPPROTO_IP:
605 		name = linux_to_bsd_ip_sockopt(SCARG(uap, optname));
606 		break;
607 	case IPPROTO_TCP:
608 		name = linux_to_bsd_tcp_sockopt(SCARG(uap, optname));
609 		break;
610 	case IPPROTO_UDP:
611 		name = linux_to_bsd_udp_sockopt(SCARG(uap, optname));
612 		break;
613 	default:
614 		return EINVAL;
615 	}
616 
617 	if (name == -1)
618 		return EINVAL;
619 	SCARG(&bsa, name) = name;
620 
621 	return sys_setsockopt(l, &bsa, retval);
622 }
623 
624 /*
625  * getsockopt(2) is very much the same as setsockopt(2) (see above)
626  */
627 int
628 linux_sys_getsockopt(l, v, retval)
629 	struct lwp *l;
630 	void *v;
631 	register_t *retval;
632 {
633 	struct linux_sys_getsockopt_args /* {
634 		syscallarg(int) s;
635 		syscallarg(int) level;
636 		syscallarg(int) optname;
637 		syscallarg(void *) optval;
638 		syscallarg(int *) optlen;
639 	} */ *uap = v;
640 	struct sys_getsockopt_args bga;
641 	int name;
642 
643 	SCARG(&bga, s) = SCARG(uap, s);
644 	SCARG(&bga, level) = linux_to_bsd_sopt_level(SCARG(uap, level));
645 	SCARG(&bga, val) = SCARG(uap, optval);
646 	SCARG(&bga, avalsize) = SCARG(uap, optlen);
647 
648 	switch (SCARG(&bga, level)) {
649 	case SOL_SOCKET:
650 		name = linux_to_bsd_so_sockopt(SCARG(uap, optname));
651 		break;
652 	case IPPROTO_IP:
653 		name = linux_to_bsd_ip_sockopt(SCARG(uap, optname));
654 		break;
655 	case IPPROTO_TCP:
656 		name = linux_to_bsd_tcp_sockopt(SCARG(uap, optname));
657 		break;
658 	case IPPROTO_UDP:
659 		name = linux_to_bsd_udp_sockopt(SCARG(uap, optname));
660 		break;
661 	default:
662 		return EINVAL;
663 	}
664 
665 	if (name == -1)
666 		return EINVAL;
667 	SCARG(&bga, name) = name;
668 
669 	return sys_getsockopt(l, &bga, retval);
670 }
671 
672 #define IF_NAME_LEN 16
673 
674 int
675 linux_getifhwaddr(p, retval, fd, data)
676 	struct proc *p;
677 	register_t *retval;
678 	u_int fd;
679 	void *data;
680 {
681 	/* Not the full structure, just enough to map what we do here */
682 	struct linux_ifreq {
683 		char if_name[IF_NAME_LEN];
684 		struct osockaddr hwaddr;
685 	} lreq;
686 	struct filedesc *fdp;
687 	struct file *fp;
688 	struct ifaddr *ifa;
689 	struct ifnet *ifp;
690 	struct sockaddr_dl *sadl;
691 	int error, found;
692 	int index, ifnum;
693 
694 	/*
695 	 * We can't emulate this ioctl by calling sys_ioctl() to run
696 	 * SIOCGIFCONF, because the user buffer is not of the right
697 	 * type to take those results.  We can't use kernel buffers to
698 	 * receive the results, as the implementation of sys_ioctl()
699 	 * and ifconf() [which implements SIOCGIFCONF] use
700 	 * copyin()/copyout() which will fail on kernel addresses.
701 	 *
702 	 * So, we must duplicate code from sys_ioctl() and ifconf().  Ugh.
703 	 */
704 
705 	fdp = p->p_fd;
706 	if ((fp = fd_getfile(fdp, fd)) == NULL)
707 		return (EBADF);
708 
709 	FILE_USE(fp);
710 	if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
711 		error = EBADF;
712 		goto out;
713 	}
714 
715 	error = copyin(data, (caddr_t)&lreq, sizeof(lreq));
716 	if (error)
717 		goto out;
718 	lreq.if_name[IF_NAME_LEN-1] = '\0';		/* just in case */
719 
720 	/*
721 	 * Try real interface name first, then fake "ethX"
722 	 */
723 	for (ifp = ifnet.tqh_first, found = 0;
724 	     ifp != 0 && !found;
725 	     ifp = ifp->if_list.tqe_next) {
726 		if (strcmp(lreq.if_name, ifp->if_xname))
727 			/* not this interface */
728 			continue;
729 		found=1;
730 		if ((ifa = ifp->if_addrlist.tqh_first) != 0) {
731 			for (; ifa != 0; ifa = ifa->ifa_list.tqe_next) {
732 				sadl = (struct sockaddr_dl *)ifa->ifa_addr;
733 				/* only return ethernet addresses */
734 				/* XXX what about FDDI, etc. ? */
735 				if (sadl->sdl_family != AF_LINK ||
736 				    sadl->sdl_type != IFT_ETHER)
737 					continue;
738 				memcpy((caddr_t)&lreq.hwaddr.sa_data,
739 				       LLADDR(sadl),
740 				       MIN(sadl->sdl_alen,
741 					   sizeof(lreq.hwaddr.sa_data)));
742 				lreq.hwaddr.sa_family =
743 					sadl->sdl_family;
744 				error = copyout((caddr_t)&lreq, data,
745 						sizeof(lreq));
746 				goto out;
747 			}
748 		} else {
749 			error = ENODEV;
750 			goto out;
751 		}
752 	}
753 
754 	if (strncmp(lreq.if_name, "eth", 3) == 0) {
755 		for (ifnum = 0, index = 3;
756 		     lreq.if_name[index] != '\0' && index < IF_NAME_LEN;
757 		     index++) {
758 			ifnum *= 10;
759 			ifnum += lreq.if_name[index] - '0';
760 		}
761 
762 		error = EINVAL;			/* in case we don't find one */
763 		for (ifp = ifnet.tqh_first, found = 0;
764 		     ifp != 0 && !found;
765 		     ifp = ifp->if_list.tqe_next) {
766 			memcpy(lreq.if_name, ifp->if_xname,
767 			       MIN(IF_NAME_LEN, IFNAMSIZ));
768 			if ((ifa = ifp->if_addrlist.tqh_first) == 0)
769 				/* no addresses on this interface */
770 				continue;
771 			else
772 				for (; ifa != 0; ifa = ifa->ifa_list.tqe_next) {
773 					sadl = (struct sockaddr_dl *)ifa->ifa_addr;
774 					/* only return ethernet addresses */
775 					/* XXX what about FDDI, etc. ? */
776 					if (sadl->sdl_family != AF_LINK ||
777 					    sadl->sdl_type != IFT_ETHER)
778 						continue;
779 					if (ifnum--)
780 						/* not the reqested iface */
781 						continue;
782 					memcpy((caddr_t)&lreq.hwaddr.sa_data,
783 					       LLADDR(sadl),
784 					       MIN(sadl->sdl_alen,
785 						   sizeof(lreq.hwaddr.sa_data)));
786 					lreq.hwaddr.sa_family =
787 						sadl->sdl_family;
788 					error = copyout((caddr_t)&lreq, data,
789 							sizeof(lreq));
790 					found = 1;
791 					break;
792 				}
793 		}
794 	} else {
795 		/* unknown interface, not even an "eth*" name */
796 		error = ENODEV;
797 	}
798 
799 out:
800 	FILE_UNUSE(fp, p);
801 	return error;
802 }
803 #undef IF_NAME_LEN
804 
805 int
806 linux_ioctl_socket(p, uap, retval)
807 	struct proc *p;
808 	struct linux_sys_ioctl_args /* {
809 		syscallarg(int) fd;
810 		syscallarg(u_long) com;
811 		syscallarg(caddr_t) data;
812 	} */ *uap;
813 	register_t *retval;
814 {
815 	u_long com;
816 	int error = 0, isdev = 0, dosys = 1;
817 	struct sys_ioctl_args ia;
818 	struct file *fp;
819 	struct filedesc *fdp;
820 	struct vnode *vp;
821 	int (*ioctlf)(struct file *, u_long, void *, struct proc *);
822 	struct ioctl_pt pt;
823 
824         fdp = p->p_fd;
825 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
826 		return (EBADF);
827 
828 	FILE_USE(fp);
829 
830 	if (fp->f_type == DTYPE_VNODE) {
831 		vp = (struct vnode *)fp->f_data;
832 		isdev = vp->v_type == VCHR;
833 	}
834 
835 	/*
836 	 * Don't try to interpret socket ioctl calls that are done
837 	 * on a device filedescriptor, just pass them through, to
838 	 * emulate Linux behaviour. Use PTIOCLINUX so that the
839 	 * device will only handle these if it's prepared to do
840 	 * so, to avoid unexpected things from happening.
841 	 */
842 	if (isdev) {
843 		dosys = 0;
844 		ioctlf = fp->f_ops->fo_ioctl;
845 		pt.com = SCARG(uap, com);
846 		pt.data = SCARG(uap, data);
847 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
848 		/*
849 		 * XXX hack: if the function returns EJUSTRETURN,
850 		 * it has stuffed a sysctl return value in pt.data.
851 		 */
852 		if (error == EJUSTRETURN) {
853 			retval[0] = (register_t)pt.data;
854 			error = 0;
855 		}
856 		goto out;
857 	}
858 
859 	com = SCARG(uap, com);
860 	retval[0] = 0;
861 
862 	switch (com) {
863 	case LINUX_SIOCGIFCONF:
864 		SCARG(&ia, com) = OSIOCGIFCONF;
865 		break;
866 	case LINUX_SIOCGIFFLAGS:
867 		SCARG(&ia, com) = SIOCGIFFLAGS;
868 		break;
869 	case LINUX_SIOCSIFFLAGS:
870 		SCARG(&ia, com) = SIOCSIFFLAGS;
871 		break;
872 	case LINUX_SIOCGIFADDR:
873 		SCARG(&ia, com) = OSIOCGIFADDR;
874 		break;
875 	case LINUX_SIOCGIFDSTADDR:
876 		SCARG(&ia, com) = OSIOCGIFDSTADDR;
877 		break;
878 	case LINUX_SIOCGIFBRDADDR:
879 		SCARG(&ia, com) = OSIOCGIFBRDADDR;
880 		break;
881 	case LINUX_SIOCGIFNETMASK:
882 		SCARG(&ia, com) = OSIOCGIFNETMASK;
883 		break;
884 	case LINUX_SIOCADDMULTI:
885 		SCARG(&ia, com) = SIOCADDMULTI;
886 		break;
887 	case LINUX_SIOCDELMULTI:
888 		SCARG(&ia, com) = SIOCDELMULTI;
889 		break;
890 	case LINUX_SIOCGIFHWADDR:
891 	        error = linux_getifhwaddr(p, retval, SCARG(uap, fd),
892 					 SCARG(uap, data));
893 		dosys = 0;
894 		break;
895 	default:
896 		error = EINVAL;
897 	}
898 
899 out:
900 	FILE_UNUSE(fp, p);
901 
902 	if (error ==0 && dosys) {
903 		SCARG(&ia, fd) = SCARG(uap, fd);
904 		SCARG(&ia, data) = SCARG(uap, data);
905 		/* XXX NJWLWP */
906 		error = sys_ioctl(curlwp, &ia, retval);
907 	}
908 
909 	return error;
910 }
911 
912 int
913 linux_sys_connect(l, v, retval)
914 	struct lwp *l;
915 	void *v;
916 	register_t *retval;
917 {
918 	struct linux_sys_connect_args /* {
919 		syscallarg(int) s;
920 		syscallarg(const struct sockaddr *) name;
921 		syscallarg(int) namelen;
922 	} */ *uap = v;
923 	struct proc *p = l->l_proc;
924 	int		error;
925 	struct sockaddr *sa;
926 	struct sys_connect_args bca;
927 	caddr_t sg = stackgap_init(p, 0);
928 	int namlen;
929 
930 	namlen = SCARG(uap, namelen);
931 	error = linux_sa_get(p, &sg, &sa, SCARG(uap, name), &namlen);
932 	if (error)
933 		return (error);
934 
935 	SCARG(&bca, s) = SCARG(uap, s);
936 	SCARG(&bca, name) = sa;
937 	SCARG(&bca, namelen) = (unsigned int) namlen;
938 
939 	error = sys_connect(l, &bca, retval);
940 
941 	if (error == EISCONN) {
942 		struct file *fp;
943 		struct socket *so;
944 		int s, state, prflags;
945 
946 		/* getsock() will use the descriptor for us */
947 	    	if (getsock(p->p_fd, SCARG(uap, s), &fp) != 0)
948 		    	return EISCONN;
949 
950 		s = splsoftnet();
951 		so = (struct socket *)fp->f_data;
952 		state = so->so_state;
953 		prflags = so->so_proto->pr_flags;
954 		splx(s);
955 		FILE_UNUSE(fp, p);
956 		/*
957 		 * We should only let this call succeed once per
958 		 * non-blocking connect; however we don't have
959 		 * a convenient place to keep that state..
960 		 */
961 		if ((state & SS_NBIO) && (state & SS_ISCONNECTED) &&
962 		    (prflags & PR_CONNREQUIRED))
963 			return 0;
964 	}
965 
966 	return (error);
967 }
968 
969 int
970 linux_sys_bind(l, v, retval)
971 	struct lwp *l;
972 	void *v;
973 	register_t *retval;
974 {
975 	struct linux_sys_bind_args /* {
976 		syscallarg(int) s;
977 		syscallarg(const struct osockaddr *) name;
978 		syscallarg(int) namelen;
979 	} */ *uap = v;
980 	struct proc *p = l->l_proc;
981 	int		error, namlen;
982 	struct sys_bind_args bsa;
983 
984 	namlen = SCARG(uap, namelen);
985 	SCARG(&bsa, s) = SCARG(uap, s);
986 	if (SCARG(uap, name)) {
987 		struct sockaddr *sa;
988 		caddr_t sg = stackgap_init(p, 0);
989 
990 		error = linux_sa_get(p, &sg, &sa, SCARG(uap, name), &namlen);
991 		if (error)
992 			return (error);
993 
994 		SCARG(&bsa, name) = sa;
995 	} else
996 		SCARG(&bsa, name) = NULL;
997 	SCARG(&bsa, namelen) = namlen;
998 
999 	return (sys_bind(l, &bsa, retval));
1000 }
1001 
1002 int
1003 linux_sys_getsockname(l, v, retval)
1004 	struct lwp *l;
1005 	void *v;
1006 	register_t *retval;
1007 {
1008 	struct linux_sys_getsockname_args /* {
1009 		syscallarg(int) fdes;
1010 		syscallarg(caddr_t) asa;
1011 		syscallarg(int *) alen;
1012 	} */ *uap = v;
1013 	int error;
1014 
1015 	if ((error = sys_getsockname(l, uap, retval)) != 0)
1016 		return (error);
1017 
1018 	if ((error = linux_sa_put((struct osockaddr *)SCARG(uap, asa))))
1019 		return (error);
1020 
1021 	return (0);
1022 }
1023 
1024 int
1025 linux_sys_getpeername(l, v, retval)
1026 	struct lwp *l;
1027 	void *v;
1028 	register_t *retval;
1029 {
1030 	struct sys_getpeername_args /* {
1031 		syscallarg(int) fdes;
1032 		syscallarg(caddr_t) asa;
1033 		syscallarg(int *) alen;
1034 	} */ *uap = v;
1035 	int error;
1036 
1037 	if ((error = sys_getpeername(l, uap, retval)) != 0)
1038 		return (error);
1039 
1040 	if ((error = linux_sa_put((struct osockaddr *)SCARG(uap, asa))))
1041 		return (error);
1042 
1043 	return (0);
1044 }
1045 
1046 /*
1047  * Copy the osockaddr structure pointed to by osa to kernel, adjust
1048  * family and convert to sockaddr, allocate stackgap and put the
1049  * the converted structure there, address on stackgap returned in sap.
1050  */
1051 static int
1052 linux_sa_get(p, sgp, sap, osa, osalen)
1053 	struct proc *p;
1054 	caddr_t *sgp;
1055 	struct sockaddr **sap;
1056 	const struct osockaddr *osa;
1057 	int *osalen;
1058 {
1059 	int error=0, bdom;
1060 	struct sockaddr *sa, *usa;
1061 	struct osockaddr *kosa = (struct osockaddr *) &sa;
1062 	int alloclen;
1063 #ifdef INET6
1064 	int oldv6size;
1065 	struct sockaddr_in6 *sin6;
1066 #endif
1067 
1068 	if (*osalen < 2 || *osalen > UCHAR_MAX || !osa) {
1069 		DPRINTF(("bad osa=%p osalen=%d\n", osa, *osalen));
1070 		return (EINVAL);
1071 	}
1072 
1073 	alloclen = *osalen;
1074 #ifdef INET6
1075 	oldv6size = 0;
1076 	/*
1077 	 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
1078 	 * if it's a v4-mapped address, so reserve the proper space
1079 	 * for it.
1080 	 */
1081 	if (alloclen == sizeof (struct sockaddr_in6) - sizeof (u_int32_t)) {
1082 		alloclen = sizeof (struct sockaddr_in6);
1083 		oldv6size = 1;
1084 	}
1085 #endif
1086 
1087 	kosa = (struct osockaddr *) malloc(alloclen, M_TEMP, M_WAITOK);
1088 
1089 	if ((error = copyin(osa, (caddr_t) kosa, *osalen))) {
1090 		DPRINTF(("error copying osa %d\n", error));
1091 		goto out;
1092 	}
1093 
1094 	bdom = linux_to_bsd_domain(kosa->sa_family);
1095 	if (bdom == -1) {
1096 		DPRINTF(("bad linux family=%d\n", kosa->sa_family));
1097 		error = EINVAL;
1098 		goto out;
1099 	}
1100 
1101 #ifdef INET6
1102 	/*
1103 	 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
1104 	 * which lacks the scope id compared with RFC2553 one. If we detect
1105 	 * the situation, reject the address and write a message to system log.
1106 	 *
1107 	 * Still accept addresses for which the scope id is not used.
1108 	 */
1109 	if (oldv6size && bdom == AF_INET6) {
1110 		sin6 = (struct sockaddr_in6 *)kosa;
1111 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
1112 		    (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
1113 		     !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
1114 		     !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
1115 		     !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
1116 		     !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
1117 			sin6->sin6_scope_id = 0;
1118 		} else {
1119 			struct proc *p = curproc;	/* XXX */
1120 			int uid = p->p_cred && p->p_ucred ?
1121 					p->p_ucred->cr_uid : -1;
1122 
1123 			log(LOG_DEBUG,
1124 			    "pid %d (%s), uid %d: obsolete pre-RFC2553 "
1125 			    "sockaddr_in6 rejected",
1126 			    p->p_pid, p->p_comm, uid);
1127 			error = EINVAL;
1128 			goto out;
1129 		}
1130 	} else
1131 #endif
1132 	if (bdom == AF_INET) {
1133 		alloclen = sizeof(struct sockaddr_in);
1134 	}
1135 
1136 	sa = (struct sockaddr *) kosa;
1137 	sa->sa_family = bdom;
1138 	sa->sa_len = alloclen;
1139 #ifdef DEBUG_LINUX
1140 	DPRINTF(("family %d, len = %d [ ", sa->sa_family, sa->sa_len));
1141 	for (bdom = 0; bdom < sizeof(sa->sa_data); bdom++)
1142 	    DPRINTF(("%02x ", sa->sa_data[bdom]));
1143 	DPRINTF(("\n"));
1144 #endif
1145 
1146 	usa = (struct sockaddr *) stackgap_alloc(p, sgp, alloclen);
1147 	if (!usa) {
1148 		error = ENOMEM;
1149 		goto out;
1150 	}
1151 
1152 	if ((error = copyout(sa, usa, alloclen))) {
1153 		DPRINTF(("error copying out socket %d\n", error));
1154 		goto out;
1155 	}
1156 
1157 	*sap = usa;
1158 
1159     out:
1160 	*osalen = alloclen;
1161 	free(kosa, M_TEMP);
1162 	return (error);
1163 }
1164 
1165 static int
1166 linux_sa_put(osa)
1167 	struct osockaddr *osa;
1168 {
1169 	struct sockaddr sa;
1170 	struct osockaddr *kosa;
1171 	int error, bdom, len;
1172 
1173 	/*
1174 	 * Only read/write the sockaddr family and length part, the rest is
1175 	 * not changed.
1176 	 */
1177 	len = sizeof(sa.sa_len) + sizeof(sa.sa_family);
1178 
1179 	error = copyin((caddr_t) osa, (caddr_t) &sa, len);
1180 	if (error)
1181 		return (error);
1182 
1183 	bdom = bsd_to_linux_domain(sa.sa_family);
1184 	if (bdom == -1)
1185 		return (EINVAL);
1186 
1187 	/* Note: we convert from sockaddr to osockaddr here, too */
1188 	kosa = (struct osockaddr *) &sa;
1189 	kosa->sa_family = bdom;
1190 	error = copyout(kosa, osa, len);
1191 	if (error)
1192 		return (error);
1193 
1194 	return (0);
1195 }
1196 
1197 int
1198 linux_sys_recv(l, v, retval)
1199 	struct lwp *l;
1200 	void *v;
1201 	register_t *retval;
1202 {
1203 	struct linux_sys_recv_args /* {
1204 		syscallarg(int) s;
1205 		syscallarg(void *) buf;
1206 		syscallarg(int) len;
1207 		syscallarg(int) flags;
1208 	} */ *uap = v;
1209 	struct sys_recvfrom_args bra;
1210 
1211 
1212 	SCARG(&bra, s) = SCARG(uap, s);
1213 	SCARG(&bra, buf) = SCARG(uap, buf);
1214 	SCARG(&bra, len) = (size_t) SCARG(uap, len);
1215 	SCARG(&bra, flags) = SCARG(uap, flags);
1216 	SCARG(&bra, from) = NULL;
1217 	SCARG(&bra, fromlenaddr) = NULL;
1218 
1219 	return (sys_recvfrom(l, &bra, retval));
1220 }
1221 
1222 int
1223 linux_sys_send(l, v, retval)
1224 	struct lwp *l;
1225 	void *v;
1226 	register_t *retval;
1227 {
1228 	struct linux_sys_send_args /* {
1229 		syscallarg(int) s;
1230 		syscallarg(caddr_t) buf;
1231 		syscallarg(int) len;
1232 		syscallarg(int) flags;
1233 	} */ *uap = v;
1234 	struct sys_sendto_args bsa;
1235 
1236 	SCARG(&bsa, s)		= SCARG(uap, s);
1237 	SCARG(&bsa, buf)	= SCARG(uap, buf);
1238 	SCARG(&bsa, len)	= SCARG(uap, len);
1239 	SCARG(&bsa, flags)	= SCARG(uap, flags);
1240 	SCARG(&bsa, to)		= NULL;
1241 	SCARG(&bsa, tolen)	= 0;
1242 
1243 	return (sys_sendto(l, &bsa, retval));
1244 }
1245 
1246 int
1247 linux_sys_accept(l, v, retval)
1248 	struct lwp *l;
1249 	void *v;
1250 	register_t *retval;
1251 {
1252 	struct linux_sys_accept_args /* {
1253 		syscallarg(int) s;
1254 		syscallarg(struct osockaddr *) name;
1255 		syscallarg(int *) anamelen;
1256 	} */ *uap = v;
1257 	int error;
1258 	struct sys_accept_args baa;
1259 
1260 	SCARG(&baa, s)		= SCARG(uap, s);
1261 	SCARG(&baa, name)	= (struct sockaddr *) SCARG(uap, name);
1262 	SCARG(&baa, anamelen)	= (unsigned int *) SCARG(uap, anamelen);
1263 
1264 	if ((error = sys_accept(l, &baa, retval)))
1265 		return (error);
1266 
1267 	if (SCARG(uap, name) && (error = linux_sa_put(SCARG(uap, name))))
1268 		return (error);
1269 
1270 	return (0);
1271 }
1272