xref: /netbsd-src/sys/netinet/raw_ip.c (revision 220b5c059a84c51ea44107ea8951a57ffaecdc8c)
1 /*	$NetBSD: raw_ip.c,v 1.59 2001/11/13 00:32:40 lukem Exp $	*/
2 
3 /*
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1982, 1986, 1988, 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed by the University of
47  *	California, Berkeley and its contributors.
48  * 4. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
65  */
66 
67 #include <sys/cdefs.h>
68 __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.59 2001/11/13 00:32:40 lukem Exp $");
69 
70 #include "opt_ipsec.h"
71 #include "opt_mrouting.h"
72 
73 #include <sys/param.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/socket.h>
77 #include <sys/protosw.h>
78 #include <sys/socketvar.h>
79 #include <sys/errno.h>
80 #include <sys/systm.h>
81 #include <sys/proc.h>
82 
83 #include <net/if.h>
84 #include <net/route.h>
85 
86 #include <netinet/in.h>
87 #include <netinet/in_systm.h>
88 #include <netinet/ip.h>
89 #include <netinet/ip_var.h>
90 #include <netinet/ip_mroute.h>
91 #include <netinet/ip_icmp.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/in_var.h>
94 
95 #include <machine/stdarg.h>
96 
97 #ifdef IPSEC
98 #include <netinet6/ipsec.h>
99 #endif /*IPSEC*/
100 
101 struct inpcbtable rawcbtable;
102 
103 int	 rip_bind __P((struct inpcb *, struct mbuf *));
104 int	 rip_connect __P((struct inpcb *, struct mbuf *));
105 void	 rip_disconnect __P((struct inpcb *));
106 
107 /*
108  * Nominal space allocated to a raw ip socket.
109  */
110 #define	RIPSNDQ		8192
111 #define	RIPRCVQ		8192
112 
113 /*
114  * Raw interface to IP protocol.
115  */
116 
117 /*
118  * Initialize raw connection block q.
119  */
120 void
121 rip_init()
122 {
123 
124 	in_pcbinit(&rawcbtable, 1, 1);
125 }
126 
127 static struct	sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
128 
129 /*
130  * Setup generic address and protocol structures
131  * for raw_input routine, then pass them along with
132  * mbuf chain.
133  */
134 void
135 #if __STDC__
136 rip_input(struct mbuf *m, ...)
137 #else
138 rip_input(m, va_alist)
139 	struct mbuf *m;
140 	va_dcl
141 #endif
142 {
143 	int off, proto;
144 	struct ip *ip = mtod(m, struct ip *);
145 	struct inpcb *inp;
146 	struct inpcb *last = 0;
147 	struct mbuf *opts = 0;
148 	struct sockaddr_in ripsrc;
149 	va_list ap;
150 
151 	va_start(ap, m);
152 	off = va_arg(ap, int);
153 	proto = va_arg(ap, int);
154 	va_end(ap);
155 
156 	ripsrc.sin_family = AF_INET;
157 	ripsrc.sin_len = sizeof(struct sockaddr_in);
158 	ripsrc.sin_addr = ip->ip_src;
159 	ripsrc.sin_port = 0;
160 	bzero((caddr_t)ripsrc.sin_zero, sizeof(ripsrc.sin_zero));
161 
162 	/*
163 	 * XXX Compatibility: programs using raw IP expect ip_len
164 	 * XXX to have the header length subtracted.
165 	 */
166 	ip->ip_len -= ip->ip_hl << 2;
167 
168 	CIRCLEQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
169 		if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
170 			continue;
171 		if (!in_nullhost(inp->inp_laddr) &&
172 		    !in_hosteq(inp->inp_laddr, ip->ip_dst))
173 			continue;
174 		if (!in_nullhost(inp->inp_faddr) &&
175 		    !in_hosteq(inp->inp_faddr, ip->ip_src))
176 			continue;
177 		if (last) {
178 			struct mbuf *n;
179 
180 #ifdef IPSEC
181 			/* check AH/ESP integrity. */
182 			if (ipsec4_in_reject_so(m, last->inp_socket)) {
183 				ipsecstat.in_polvio++;
184 				/* do not inject data to pcb */
185 			} else
186 #endif /*IPSEC*/
187 			if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
188 				if (last->inp_flags & INP_CONTROLOPTS ||
189 				    last->inp_socket->so_options & SO_TIMESTAMP)
190 					ip_savecontrol(last, &opts, ip, n);
191 				if (sbappendaddr(&last->inp_socket->so_rcv,
192 				    sintosa(&ripsrc), n, opts) == 0) {
193 					/* should notify about lost packet */
194 					m_freem(n);
195 					if (opts)
196 						m_freem(opts);
197 				} else
198 					sorwakeup(last->inp_socket);
199 				opts = NULL;
200 			}
201 		}
202 		last = inp;
203 	}
204 #ifdef IPSEC
205 	/* check AH/ESP integrity. */
206 	if (last && ipsec4_in_reject_so(m, last->inp_socket)) {
207 		m_freem(m);
208 		ipsecstat.in_polvio++;
209 		ipstat.ips_delivered--;
210 		/* do not inject data to pcb */
211 	} else
212 #endif /*IPSEC*/
213 	if (last) {
214 		if (last->inp_flags & INP_CONTROLOPTS ||
215 		    last->inp_socket->so_options & SO_TIMESTAMP)
216 			ip_savecontrol(last, &opts, ip, m);
217 		if (sbappendaddr(&last->inp_socket->so_rcv,
218 		    sintosa(&ripsrc), m, opts) == 0) {
219 			m_freem(m);
220 			if (opts)
221 				m_freem(opts);
222 		} else
223 			sorwakeup(last->inp_socket);
224 	} else {
225 		if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
226 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
227 			    0, 0);
228 			ipstat.ips_noproto++;
229 			ipstat.ips_delivered--;
230 		} else
231 			m_freem(m);
232 	}
233 	return;
234 }
235 
236 /*
237  * Generate IP header and pass packet to ip_output.
238  * Tack on options user may have setup with control call.
239  */
240 int
241 #if __STDC__
242 rip_output(struct mbuf *m, ...)
243 #else
244 rip_output(m, va_alist)
245 	struct mbuf *m;
246 	va_dcl
247 #endif
248 {
249 	struct inpcb *inp;
250 	struct ip *ip;
251 	struct mbuf *opts;
252 	int flags;
253 	va_list ap;
254 
255 	va_start(ap, m);
256 	inp = va_arg(ap, struct inpcb *);
257 	va_end(ap);
258 
259 	flags =
260 	    (inp->inp_socket->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST
261 	    | IP_RETURNMTU;
262 
263 	/*
264 	 * If the user handed us a complete IP packet, use it.
265 	 * Otherwise, allocate an mbuf for a header and fill it in.
266 	 */
267 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
268 		if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
269 			m_freem(m);
270 			return (EMSGSIZE);
271 		}
272 		M_PREPEND(m, sizeof(struct ip), M_WAIT);
273 		ip = mtod(m, struct ip *);
274 		ip->ip_tos = 0;
275 		ip->ip_off = 0;
276 		ip->ip_p = inp->inp_ip.ip_p;
277 		ip->ip_len = m->m_pkthdr.len;
278 		ip->ip_src = inp->inp_laddr;
279 		ip->ip_dst = inp->inp_faddr;
280 		ip->ip_ttl = MAXTTL;
281 		opts = inp->inp_options;
282 	} else {
283 		if (m->m_pkthdr.len > IP_MAXPACKET) {
284 			m_freem(m);
285 			return (EMSGSIZE);
286 		}
287 		ip = mtod(m, struct ip *);
288 		if (m->m_pkthdr.len != ip->ip_len) {
289 			m_freem(m);
290 			return (EINVAL);
291 		}
292 		if (ip->ip_id == 0)
293 			ip->ip_id = htons(ip_id++);
294 		opts = NULL;
295 		/* XXX prevent ip_output from overwriting header fields */
296 		flags |= IP_RAWOUTPUT;
297 		ipstat.ips_rawout++;
298 	}
299 #ifdef IPSEC
300 	if (ipsec_setsocket(m, inp->inp_socket) != 0) {
301 		m_freem(m);
302 		return ENOBUFS;
303 	}
304 #endif /*IPSEC*/
305 	return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions, &inp->inp_errormtu));
306 }
307 
308 /*
309  * Raw IP socket option processing.
310  */
311 int
312 rip_ctloutput(op, so, level, optname, m)
313 	int op;
314 	struct socket *so;
315 	int level, optname;
316 	struct mbuf **m;
317 {
318 	struct inpcb *inp = sotoinpcb(so);
319 	int error = 0;
320 
321 	if (level != IPPROTO_IP) {
322 		error = ENOPROTOOPT;
323 		if (op == PRCO_SETOPT && *m != 0)
324 			(void) m_free(*m);
325 	} else switch (op) {
326 
327 	case PRCO_SETOPT:
328 		switch (optname) {
329 		case IP_HDRINCL:
330 			if (*m == 0 || (*m)->m_len < sizeof (int))
331 				error = EINVAL;
332 			else {
333 				if (*mtod(*m, int *))
334 					inp->inp_flags |= INP_HDRINCL;
335 				else
336 					inp->inp_flags &= ~INP_HDRINCL;
337 			}
338 			if (*m != 0)
339 				(void) m_free(*m);
340 			break;
341 
342 #ifdef MROUTING
343 		case MRT_INIT:
344 		case MRT_DONE:
345 		case MRT_ADD_VIF:
346 		case MRT_DEL_VIF:
347 		case MRT_ADD_MFC:
348 		case MRT_DEL_MFC:
349 		case MRT_ASSERT:
350 			error = ip_mrouter_set(so, optname, m);
351 			break;
352 #endif
353 
354 		default:
355 			error = ip_ctloutput(op, so, level, optname, m);
356 			break;
357 		}
358 		break;
359 
360 	case PRCO_GETOPT:
361 		switch (optname) {
362 		case IP_HDRINCL:
363 			*m = m_get(M_WAIT, M_SOOPTS);
364 			(*m)->m_len = sizeof (int);
365 			*mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0;
366 			break;
367 
368 #ifdef MROUTING
369 		case MRT_VERSION:
370 		case MRT_ASSERT:
371 			error = ip_mrouter_get(so, optname, m);
372 			break;
373 #endif
374 
375 		default:
376 			error = ip_ctloutput(op, so, level, optname, m);
377 			break;
378 		}
379 		break;
380 	}
381 	return (error);
382 }
383 
384 int
385 rip_bind(inp, nam)
386 	struct inpcb *inp;
387 	struct mbuf *nam;
388 {
389 	struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
390 
391 	if (nam->m_len != sizeof(*addr))
392 		return (EINVAL);
393 	if (TAILQ_FIRST(&ifnet) == 0)
394 		return (EADDRNOTAVAIL);
395 	if (addr->sin_family != AF_INET &&
396 	    addr->sin_family != AF_IMPLINK)
397 		return (EAFNOSUPPORT);
398 	if (!in_nullhost(addr->sin_addr) &&
399 	    ifa_ifwithaddr(sintosa(addr)) == 0)
400 		return (EADDRNOTAVAIL);
401 	inp->inp_laddr = addr->sin_addr;
402 	return (0);
403 }
404 
405 int
406 rip_connect(inp, nam)
407 	struct inpcb *inp;
408 	struct mbuf *nam;
409 {
410 	struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
411 
412 	if (nam->m_len != sizeof(*addr))
413 		return (EINVAL);
414 	if (TAILQ_FIRST(&ifnet) == 0)
415 		return (EADDRNOTAVAIL);
416 	if (addr->sin_family != AF_INET &&
417 	    addr->sin_family != AF_IMPLINK)
418 		return (EAFNOSUPPORT);
419 	inp->inp_faddr = addr->sin_addr;
420 	return (0);
421 }
422 
423 void
424 rip_disconnect(inp)
425 	struct inpcb *inp;
426 {
427 
428 	inp->inp_faddr = zeroin_addr;
429 }
430 
431 u_long	rip_sendspace = RIPSNDQ;
432 u_long	rip_recvspace = RIPRCVQ;
433 
434 /*ARGSUSED*/
435 int
436 rip_usrreq(so, req, m, nam, control, p)
437 	struct socket *so;
438 	int req;
439 	struct mbuf *m, *nam, *control;
440 	struct proc *p;
441 {
442 	struct inpcb *inp;
443 	int s;
444 	int error = 0;
445 #ifdef MROUTING
446 	extern struct socket *ip_mrouter;
447 #endif
448 
449 	if (req == PRU_CONTROL)
450 		return (in_control(so, (long)m, (caddr_t)nam,
451 		    (struct ifnet *)control, p));
452 
453 	if (req == PRU_PURGEIF) {
454 		in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control);
455 		in_purgeif((struct ifnet *)control);
456 		in_pcbpurgeif(&rawcbtable, (struct ifnet *)control);
457 		return (0);
458 	}
459 
460 	s = splsoftnet();
461 	inp = sotoinpcb(so);
462 #ifdef DIAGNOSTIC
463 	if (req != PRU_SEND && req != PRU_SENDOOB && control)
464 		panic("rip_usrreq: unexpected control mbuf");
465 #endif
466 	if (inp == 0 && req != PRU_ATTACH) {
467 		error = EINVAL;
468 		goto release;
469 	}
470 
471 	switch (req) {
472 
473 	case PRU_ATTACH:
474 		if (inp != 0) {
475 			error = EISCONN;
476 			break;
477 		}
478 		if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))) {
479 			error = EACCES;
480 			break;
481 		}
482 		if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
483 			error = soreserve(so, rip_sendspace, rip_recvspace);
484 			if (error)
485 				break;
486 		}
487 		error = in_pcballoc(so, &rawcbtable);
488 		if (error)
489 			break;
490 		inp = sotoinpcb(so);
491 		inp->inp_ip.ip_p = (long)nam;
492 		break;
493 
494 	case PRU_DETACH:
495 #ifdef MROUTING
496 		if (so == ip_mrouter)
497 			ip_mrouter_done();
498 #endif
499 		in_pcbdetach(inp);
500 		break;
501 
502 	case PRU_BIND:
503 		error = rip_bind(inp, nam);
504 		break;
505 
506 	case PRU_LISTEN:
507 		error = EOPNOTSUPP;
508 		break;
509 
510 	case PRU_CONNECT:
511 		error = rip_connect(inp, nam);
512 		if (error)
513 			break;
514 		soisconnected(so);
515 		break;
516 
517 	case PRU_CONNECT2:
518 		error = EOPNOTSUPP;
519 		break;
520 
521 	case PRU_DISCONNECT:
522 		soisdisconnected(so);
523 		rip_disconnect(inp);
524 		break;
525 
526 	/*
527 	 * Mark the connection as being incapable of further input.
528 	 */
529 	case PRU_SHUTDOWN:
530 		socantsendmore(so);
531 		break;
532 
533 	case PRU_RCVD:
534 		error = EOPNOTSUPP;
535 		break;
536 
537 	/*
538 	 * Ship a packet out.  The appropriate raw output
539 	 * routine handles any massaging necessary.
540 	 */
541 	case PRU_SEND:
542 		if (control && control->m_len) {
543 			m_freem(control);
544 			m_freem(m);
545 			error = EINVAL;
546 			break;
547 		}
548 	{
549 		if (nam) {
550 			if ((so->so_state & SS_ISCONNECTED) != 0) {
551 				error = EISCONN;
552 				goto die;
553 			}
554 			error = rip_connect(inp, nam);
555 			if (error) {
556 			die:
557 				m_freem(m);
558 				break;
559 			}
560 		} else {
561 			if ((so->so_state & SS_ISCONNECTED) == 0) {
562 				error = ENOTCONN;
563 				goto die;
564 			}
565 		}
566 		error = rip_output(m, inp);
567 		if (nam)
568 			rip_disconnect(inp);
569 	}
570 		break;
571 
572 	case PRU_SENSE:
573 		/*
574 		 * stat: don't bother with a blocksize.
575 		 */
576 		splx(s);
577 		return (0);
578 
579 	case PRU_RCVOOB:
580 		error = EOPNOTSUPP;
581 		break;
582 
583 	case PRU_SENDOOB:
584 		m_freem(control);
585 		m_freem(m);
586 		error = EOPNOTSUPP;
587 		break;
588 
589 	case PRU_SOCKADDR:
590 		in_setsockaddr(inp, nam);
591 		break;
592 
593 	case PRU_PEERADDR:
594 		in_setpeeraddr(inp, nam);
595 		break;
596 
597 	default:
598 		panic("rip_usrreq");
599 	}
600 
601 release:
602 	splx(s);
603 	return (error);
604 }
605