xref: /netbsd-src/sys/netinet/raw_ip.c (revision 08c81a9c2dc8c7300e893321eb65c0925d60871c)
1 /*	$NetBSD: raw_ip.c,v 1.62 2002/08/14 00:23:33 itojun Exp $	*/
2 
3 /*
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1982, 1986, 1988, 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed by the University of
47  *	California, Berkeley and its contributors.
48  * 4. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
65  */
66 
67 #include <sys/cdefs.h>
68 __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.62 2002/08/14 00:23:33 itojun Exp $");
69 
70 #include "opt_ipsec.h"
71 #include "opt_mrouting.h"
72 
73 #include <sys/param.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/socket.h>
77 #include <sys/protosw.h>
78 #include <sys/socketvar.h>
79 #include <sys/errno.h>
80 #include <sys/systm.h>
81 #include <sys/proc.h>
82 
83 #include <net/if.h>
84 #include <net/route.h>
85 
86 #include <netinet/in.h>
87 #include <netinet/in_systm.h>
88 #include <netinet/ip.h>
89 #include <netinet/ip_var.h>
90 #include <netinet/ip_mroute.h>
91 #include <netinet/ip_icmp.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/in_var.h>
94 
95 #include <machine/stdarg.h>
96 
97 #ifdef IPSEC
98 #include <netinet6/ipsec.h>
99 #endif /*IPSEC*/
100 
101 struct inpcbtable rawcbtable;
102 
103 int	 rip_pcbnotify __P((struct inpcbtable *, struct in_addr,
104     struct in_addr, int, int, void (*) __P((struct inpcb *, int))));
105 int	 rip_bind __P((struct inpcb *, struct mbuf *));
106 int	 rip_connect __P((struct inpcb *, struct mbuf *));
107 void	 rip_disconnect __P((struct inpcb *));
108 
109 /*
110  * Nominal space allocated to a raw ip socket.
111  */
112 #define	RIPSNDQ		8192
113 #define	RIPRCVQ		8192
114 
115 /*
116  * Raw interface to IP protocol.
117  */
118 
119 /*
120  * Initialize raw connection block q.
121  */
122 void
123 rip_init()
124 {
125 
126 	in_pcbinit(&rawcbtable, 1, 1);
127 }
128 
129 static struct	sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
130 
131 /*
132  * Setup generic address and protocol structures
133  * for raw_input routine, then pass them along with
134  * mbuf chain.
135  */
136 void
137 #if __STDC__
138 rip_input(struct mbuf *m, ...)
139 #else
140 rip_input(m, va_alist)
141 	struct mbuf *m;
142 	va_dcl
143 #endif
144 {
145 	int off, proto;
146 	struct ip *ip = mtod(m, struct ip *);
147 	struct inpcb *inp;
148 	struct inpcb *last = 0;
149 	struct mbuf *opts = 0;
150 	struct sockaddr_in ripsrc;
151 	va_list ap;
152 
153 	va_start(ap, m);
154 	off = va_arg(ap, int);
155 	proto = va_arg(ap, int);
156 	va_end(ap);
157 
158 	ripsrc.sin_family = AF_INET;
159 	ripsrc.sin_len = sizeof(struct sockaddr_in);
160 	ripsrc.sin_addr = ip->ip_src;
161 	ripsrc.sin_port = 0;
162 	bzero((caddr_t)ripsrc.sin_zero, sizeof(ripsrc.sin_zero));
163 
164 	/*
165 	 * XXX Compatibility: programs using raw IP expect ip_len
166 	 * XXX to have the header length subtracted, and in host order.
167 	 * XXX ip_off is also expected to be host order.
168 	 */
169 	ip->ip_len = ntohs(ip->ip_len) - (ip->ip_hl << 2);
170 	NTOHS(ip->ip_off);
171 
172 	CIRCLEQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
173 		if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
174 			continue;
175 		if (!in_nullhost(inp->inp_laddr) &&
176 		    !in_hosteq(inp->inp_laddr, ip->ip_dst))
177 			continue;
178 		if (!in_nullhost(inp->inp_faddr) &&
179 		    !in_hosteq(inp->inp_faddr, ip->ip_src))
180 			continue;
181 		if (last) {
182 			struct mbuf *n;
183 
184 #ifdef IPSEC
185 			/* check AH/ESP integrity. */
186 			if (ipsec4_in_reject_so(m, last->inp_socket)) {
187 				ipsecstat.in_polvio++;
188 				/* do not inject data to pcb */
189 			} else
190 #endif /*IPSEC*/
191 			if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
192 				if (last->inp_flags & INP_CONTROLOPTS ||
193 				    last->inp_socket->so_options & SO_TIMESTAMP)
194 					ip_savecontrol(last, &opts, ip, n);
195 				if (sbappendaddr(&last->inp_socket->so_rcv,
196 				    sintosa(&ripsrc), n, opts) == 0) {
197 					/* should notify about lost packet */
198 					m_freem(n);
199 					if (opts)
200 						m_freem(opts);
201 				} else
202 					sorwakeup(last->inp_socket);
203 				opts = NULL;
204 			}
205 		}
206 		last = inp;
207 	}
208 #ifdef IPSEC
209 	/* check AH/ESP integrity. */
210 	if (last && ipsec4_in_reject_so(m, last->inp_socket)) {
211 		m_freem(m);
212 		ipsecstat.in_polvio++;
213 		ipstat.ips_delivered--;
214 		/* do not inject data to pcb */
215 	} else
216 #endif /*IPSEC*/
217 	if (last) {
218 		if (last->inp_flags & INP_CONTROLOPTS ||
219 		    last->inp_socket->so_options & SO_TIMESTAMP)
220 			ip_savecontrol(last, &opts, ip, m);
221 		if (sbappendaddr(&last->inp_socket->so_rcv,
222 		    sintosa(&ripsrc), m, opts) == 0) {
223 			m_freem(m);
224 			if (opts)
225 				m_freem(opts);
226 		} else
227 			sorwakeup(last->inp_socket);
228 	} else {
229 		if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
230 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
231 			    0, 0);
232 			ipstat.ips_noproto++;
233 			ipstat.ips_delivered--;
234 		} else
235 			m_freem(m);
236 	}
237 	return;
238 }
239 
240 int
241 rip_pcbnotify(table, faddr, laddr, proto, errno, notify)
242 	struct inpcbtable *table;
243 	struct in_addr faddr, laddr;
244 	int proto;
245 	int errno;
246 	void (*notify) __P((struct inpcb *, int));
247 {
248 	struct inpcb *inp, *ninp;
249 	int nmatch;
250 
251 	nmatch = 0;
252 	for (inp = CIRCLEQ_FIRST(&table->inpt_queue);
253 	    inp != (struct inpcb *)&table->inpt_queue;
254 	    inp = ninp) {
255 		ninp = inp->inp_queue.cqe_next;
256 		if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
257 			continue;
258 		if (in_hosteq(inp->inp_faddr, faddr) &&
259 		    in_hosteq(inp->inp_laddr, laddr)) {
260 			(*notify)(inp, errno);
261 			nmatch++;
262 		}
263 	}
264 
265 	return nmatch;
266 }
267 
268 void *
269 rip_ctlinput(cmd, sa, v)
270 	int cmd;
271 	struct sockaddr *sa;
272 	void *v;
273 {
274 	struct ip *ip = v;
275 	void (*notify) __P((struct inpcb *, int)) = in_rtchange;
276 	int errno;
277 
278 	if (sa->sa_family != AF_INET ||
279 	    sa->sa_len != sizeof(struct sockaddr_in))
280 		return NULL;
281 	if ((unsigned)cmd >= PRC_NCMDS)
282 		return NULL;
283 	errno = inetctlerrmap[cmd];
284 	if (PRC_IS_REDIRECT(cmd))
285 		notify = in_rtchange, ip = 0;
286 	else if (cmd == PRC_HOSTDEAD)
287 		ip = 0;
288 	else if (errno == 0)
289 		return NULL;
290 	if (ip) {
291 		rip_pcbnotify(&rawcbtable, satosin(sa)->sin_addr,
292 		    ip->ip_src, ip->ip_p, errno, notify);
293 
294 		/* XXX mapped address case */
295 	} else
296 		in_pcbnotifyall(&rawcbtable, satosin(sa)->sin_addr, errno,
297 		    notify);
298 	return NULL;
299 }
300 
301 /*
302  * Generate IP header and pass packet to ip_output.
303  * Tack on options user may have setup with control call.
304  */
305 int
306 #if __STDC__
307 rip_output(struct mbuf *m, ...)
308 #else
309 rip_output(m, va_alist)
310 	struct mbuf *m;
311 	va_dcl
312 #endif
313 {
314 	struct inpcb *inp;
315 	struct ip *ip;
316 	struct mbuf *opts;
317 	int flags;
318 	va_list ap;
319 
320 	va_start(ap, m);
321 	inp = va_arg(ap, struct inpcb *);
322 	va_end(ap);
323 
324 	flags =
325 	    (inp->inp_socket->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST
326 	    | IP_RETURNMTU;
327 
328 	/*
329 	 * If the user handed us a complete IP packet, use it.
330 	 * Otherwise, allocate an mbuf for a header and fill it in.
331 	 */
332 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
333 		if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
334 			m_freem(m);
335 			return (EMSGSIZE);
336 		}
337 		M_PREPEND(m, sizeof(struct ip), M_WAIT);
338 		ip = mtod(m, struct ip *);
339 		ip->ip_tos = 0;
340 		ip->ip_off = htons(0);
341 		ip->ip_p = inp->inp_ip.ip_p;
342 		ip->ip_len = htons(m->m_pkthdr.len);
343 		ip->ip_src = inp->inp_laddr;
344 		ip->ip_dst = inp->inp_faddr;
345 		ip->ip_ttl = MAXTTL;
346 		opts = inp->inp_options;
347 	} else {
348 		if (m->m_pkthdr.len > IP_MAXPACKET) {
349 			m_freem(m);
350 			return (EMSGSIZE);
351 		}
352 		ip = mtod(m, struct ip *);
353 		/* XXX userland passes ip_len and ip_off in host order */
354 		if (m->m_pkthdr.len != ip->ip_len) {
355 			m_freem(m);
356 			return (EINVAL);
357 		}
358 		HTONS(ip->ip_len);
359 		HTONS(ip->ip_off);
360 		if (ip->ip_id == 0)
361 			ip->ip_id = htons(ip_id++);
362 		opts = NULL;
363 		/* XXX prevent ip_output from overwriting header fields */
364 		flags |= IP_RAWOUTPUT;
365 		ipstat.ips_rawout++;
366 	}
367 #ifdef IPSEC
368 	if (ipsec_setsocket(m, inp->inp_socket) != 0) {
369 		m_freem(m);
370 		return ENOBUFS;
371 	}
372 #endif /*IPSEC*/
373 	return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions,
374 	    &inp->inp_errormtu));
375 }
376 
377 /*
378  * Raw IP socket option processing.
379  */
380 int
381 rip_ctloutput(op, so, level, optname, m)
382 	int op;
383 	struct socket *so;
384 	int level, optname;
385 	struct mbuf **m;
386 {
387 	struct inpcb *inp = sotoinpcb(so);
388 	int error = 0;
389 
390 	if (level != IPPROTO_IP) {
391 		error = ENOPROTOOPT;
392 		if (op == PRCO_SETOPT && *m != 0)
393 			(void) m_free(*m);
394 	} else switch (op) {
395 
396 	case PRCO_SETOPT:
397 		switch (optname) {
398 		case IP_HDRINCL:
399 			if (*m == 0 || (*m)->m_len < sizeof (int))
400 				error = EINVAL;
401 			else {
402 				if (*mtod(*m, int *))
403 					inp->inp_flags |= INP_HDRINCL;
404 				else
405 					inp->inp_flags &= ~INP_HDRINCL;
406 			}
407 			if (*m != 0)
408 				(void) m_free(*m);
409 			break;
410 
411 #ifdef MROUTING
412 		case MRT_INIT:
413 		case MRT_DONE:
414 		case MRT_ADD_VIF:
415 		case MRT_DEL_VIF:
416 		case MRT_ADD_MFC:
417 		case MRT_DEL_MFC:
418 		case MRT_ASSERT:
419 			error = ip_mrouter_set(so, optname, m);
420 			break;
421 #endif
422 
423 		default:
424 			error = ip_ctloutput(op, so, level, optname, m);
425 			break;
426 		}
427 		break;
428 
429 	case PRCO_GETOPT:
430 		switch (optname) {
431 		case IP_HDRINCL:
432 			*m = m_get(M_WAIT, M_SOOPTS);
433 			(*m)->m_len = sizeof (int);
434 			*mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0;
435 			break;
436 
437 #ifdef MROUTING
438 		case MRT_VERSION:
439 		case MRT_ASSERT:
440 			error = ip_mrouter_get(so, optname, m);
441 			break;
442 #endif
443 
444 		default:
445 			error = ip_ctloutput(op, so, level, optname, m);
446 			break;
447 		}
448 		break;
449 	}
450 	return (error);
451 }
452 
453 int
454 rip_bind(inp, nam)
455 	struct inpcb *inp;
456 	struct mbuf *nam;
457 {
458 	struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
459 
460 	if (nam->m_len != sizeof(*addr))
461 		return (EINVAL);
462 	if (TAILQ_FIRST(&ifnet) == 0)
463 		return (EADDRNOTAVAIL);
464 	if (addr->sin_family != AF_INET &&
465 	    addr->sin_family != AF_IMPLINK)
466 		return (EAFNOSUPPORT);
467 	if (!in_nullhost(addr->sin_addr) &&
468 	    ifa_ifwithaddr(sintosa(addr)) == 0)
469 		return (EADDRNOTAVAIL);
470 	inp->inp_laddr = addr->sin_addr;
471 	return (0);
472 }
473 
474 int
475 rip_connect(inp, nam)
476 	struct inpcb *inp;
477 	struct mbuf *nam;
478 {
479 	struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
480 
481 	if (nam->m_len != sizeof(*addr))
482 		return (EINVAL);
483 	if (TAILQ_FIRST(&ifnet) == 0)
484 		return (EADDRNOTAVAIL);
485 	if (addr->sin_family != AF_INET &&
486 	    addr->sin_family != AF_IMPLINK)
487 		return (EAFNOSUPPORT);
488 	inp->inp_faddr = addr->sin_addr;
489 	return (0);
490 }
491 
492 void
493 rip_disconnect(inp)
494 	struct inpcb *inp;
495 {
496 
497 	inp->inp_faddr = zeroin_addr;
498 }
499 
500 u_long	rip_sendspace = RIPSNDQ;
501 u_long	rip_recvspace = RIPRCVQ;
502 
503 /*ARGSUSED*/
504 int
505 rip_usrreq(so, req, m, nam, control, p)
506 	struct socket *so;
507 	int req;
508 	struct mbuf *m, *nam, *control;
509 	struct proc *p;
510 {
511 	struct inpcb *inp;
512 	int s;
513 	int error = 0;
514 #ifdef MROUTING
515 	extern struct socket *ip_mrouter;
516 #endif
517 
518 	if (req == PRU_CONTROL)
519 		return (in_control(so, (long)m, (caddr_t)nam,
520 		    (struct ifnet *)control, p));
521 
522 	if (req == PRU_PURGEIF) {
523 		in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control);
524 		in_purgeif((struct ifnet *)control);
525 		in_pcbpurgeif(&rawcbtable, (struct ifnet *)control);
526 		return (0);
527 	}
528 
529 	s = splsoftnet();
530 	inp = sotoinpcb(so);
531 #ifdef DIAGNOSTIC
532 	if (req != PRU_SEND && req != PRU_SENDOOB && control)
533 		panic("rip_usrreq: unexpected control mbuf");
534 #endif
535 	if (inp == 0 && req != PRU_ATTACH) {
536 		error = EINVAL;
537 		goto release;
538 	}
539 
540 	switch (req) {
541 
542 	case PRU_ATTACH:
543 		if (inp != 0) {
544 			error = EISCONN;
545 			break;
546 		}
547 		if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))) {
548 			error = EACCES;
549 			break;
550 		}
551 		if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
552 			error = soreserve(so, rip_sendspace, rip_recvspace);
553 			if (error)
554 				break;
555 		}
556 		error = in_pcballoc(so, &rawcbtable);
557 		if (error)
558 			break;
559 		inp = sotoinpcb(so);
560 		inp->inp_ip.ip_p = (long)nam;
561 		break;
562 
563 	case PRU_DETACH:
564 #ifdef MROUTING
565 		if (so == ip_mrouter)
566 			ip_mrouter_done();
567 #endif
568 		in_pcbdetach(inp);
569 		break;
570 
571 	case PRU_BIND:
572 		error = rip_bind(inp, nam);
573 		break;
574 
575 	case PRU_LISTEN:
576 		error = EOPNOTSUPP;
577 		break;
578 
579 	case PRU_CONNECT:
580 		error = rip_connect(inp, nam);
581 		if (error)
582 			break;
583 		soisconnected(so);
584 		break;
585 
586 	case PRU_CONNECT2:
587 		error = EOPNOTSUPP;
588 		break;
589 
590 	case PRU_DISCONNECT:
591 		soisdisconnected(so);
592 		rip_disconnect(inp);
593 		break;
594 
595 	/*
596 	 * Mark the connection as being incapable of further input.
597 	 */
598 	case PRU_SHUTDOWN:
599 		socantsendmore(so);
600 		break;
601 
602 	case PRU_RCVD:
603 		error = EOPNOTSUPP;
604 		break;
605 
606 	/*
607 	 * Ship a packet out.  The appropriate raw output
608 	 * routine handles any massaging necessary.
609 	 */
610 	case PRU_SEND:
611 		if (control && control->m_len) {
612 			m_freem(control);
613 			m_freem(m);
614 			error = EINVAL;
615 			break;
616 		}
617 	{
618 		if (nam) {
619 			if ((so->so_state & SS_ISCONNECTED) != 0) {
620 				error = EISCONN;
621 				goto die;
622 			}
623 			error = rip_connect(inp, nam);
624 			if (error) {
625 			die:
626 				m_freem(m);
627 				break;
628 			}
629 		} else {
630 			if ((so->so_state & SS_ISCONNECTED) == 0) {
631 				error = ENOTCONN;
632 				goto die;
633 			}
634 		}
635 		error = rip_output(m, inp);
636 		if (nam)
637 			rip_disconnect(inp);
638 	}
639 		break;
640 
641 	case PRU_SENSE:
642 		/*
643 		 * stat: don't bother with a blocksize.
644 		 */
645 		splx(s);
646 		return (0);
647 
648 	case PRU_RCVOOB:
649 		error = EOPNOTSUPP;
650 		break;
651 
652 	case PRU_SENDOOB:
653 		m_freem(control);
654 		m_freem(m);
655 		error = EOPNOTSUPP;
656 		break;
657 
658 	case PRU_SOCKADDR:
659 		in_setsockaddr(inp, nam);
660 		break;
661 
662 	case PRU_PEERADDR:
663 		in_setpeeraddr(inp, nam);
664 		break;
665 
666 	default:
667 		panic("rip_usrreq");
668 	}
669 
670 release:
671 	splx(s);
672 	return (error);
673 }
674