xref: /openbsd-src/sys/netinet/raw_ip.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: raw_ip.c,v 1.73 2014/06/02 10:41:40 mpi Exp $	*/
2 /*	$NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
33  *
34  * NRL grants permission for redistribution and use in source and binary
35  * forms, with or without modification, of the software and documentation
36  * created at NRL provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgements:
45  * 	This product includes software developed by the University of
46  * 	California, Berkeley and its contributors.
47  * 	This product includes software developed at the Information
48  * 	Technology Division, US Naval Research Laboratory.
49  * 4. Neither the name of the NRL nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
57  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64  *
65  * The views and conclusions contained in the software and documentation
66  * are those of the authors and should not be interpreted as representing
67  * official policies, either expressed or implied, of the US Naval
68  * Research Laboratory (NRL).
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/mbuf.h>
74 #include <sys/socket.h>
75 #include <sys/protosw.h>
76 #include <sys/socketvar.h>
77 
78 #include <net/if.h>
79 #include <net/route.h>
80 #include <net/pfvar.h>
81 
82 #include <netinet/in.h>
83 #include <netinet/in_systm.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip_mroute.h>
86 #include <netinet/ip_var.h>
87 #include <netinet/in_pcb.h>
88 #include <netinet/in_var.h>
89 #include <netinet/ip_icmp.h>
90 
91 #include "pf.h"
92 
93 struct inpcbtable rawcbtable;
94 
95 /*
96  * Nominal space allocated to a raw ip socket.
97  */
98 #define	RIPSNDQ		8192
99 #define	RIPRCVQ		8192
100 
101 /*
102  * Raw interface to IP protocol.
103  */
104 
105 /*
106  * Initialize raw connection block q.
107  */
108 void
109 rip_init()
110 {
111 
112 	in_pcbinit(&rawcbtable, 1);
113 }
114 
115 struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
116 
117 /*
118  * Setup generic address and protocol structures
119  * for raw_input routine, then pass them along with
120  * mbuf chain.
121  */
122 void
123 rip_input(struct mbuf *m, ...)
124 {
125 	struct ip *ip = mtod(m, struct ip *);
126 	struct inpcb *inp, *last = NULL;
127 	struct mbuf *opts = NULL;
128 
129 	ripsrc.sin_addr = ip->ip_src;
130 	TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
131 		if (inp->inp_socket->so_state & SS_CANTRCVMORE)
132 			continue;
133 #ifdef INET6
134 		if (inp->inp_flags & INP_IPV6)
135 			continue;
136 #endif
137 		if (rtable_l2(inp->inp_rtableid) !=
138 		    rtable_l2(m->m_pkthdr.ph_rtableid))
139 			continue;
140 
141 		if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p)
142 			continue;
143 #if NPF > 0
144 		if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
145 			struct pf_divert *divert;
146 
147 			/* XXX rdomain support */
148 			if ((divert = pf_find_divert(m)) == NULL)
149 				continue;
150 			if (inp->inp_laddr.s_addr != divert->addr.v4.s_addr)
151 				continue;
152 		} else
153 #endif
154 		if (inp->inp_laddr.s_addr &&
155 		    inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
156 			continue;
157 		if (inp->inp_faddr.s_addr &&
158 		    inp->inp_faddr.s_addr != ip->ip_src.s_addr)
159 			continue;
160 		if (last) {
161 			struct mbuf *n;
162 
163 			if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
164 				if (last->inp_flags & INP_CONTROLOPTS ||
165 				    last->inp_socket->so_options & SO_TIMESTAMP)
166 					ip_savecontrol(last, &opts, ip, n);
167 				if (sbappendaddr(&last->inp_socket->so_rcv,
168 				    sintosa(&ripsrc), n, opts) == 0) {
169 					/* should notify about lost packet */
170 					m_freem(n);
171 					if (opts)
172 						m_freem(opts);
173 				} else
174 					sorwakeup(last->inp_socket);
175 				opts = NULL;
176 			}
177 		}
178 		last = inp;
179 	}
180 	if (last) {
181 		if (last->inp_flags & INP_CONTROLOPTS ||
182 		    last->inp_socket->so_options & SO_TIMESTAMP)
183 			ip_savecontrol(last, &opts, ip, m);
184 		if (sbappendaddr(&last->inp_socket->so_rcv, sintosa(&ripsrc), m,
185 		    opts) == 0) {
186 			m_freem(m);
187 			if (opts)
188 				m_freem(opts);
189 		} else
190 			sorwakeup(last->inp_socket);
191 	} else {
192 		if (ip->ip_p != IPPROTO_ICMP)
193 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0);
194 		else
195 			m_freem(m);
196 		ipstat.ips_noproto++;
197 		ipstat.ips_delivered--;
198 	}
199 }
200 
201 /*
202  * Generate IP header and pass packet to ip_output.
203  * Tack on options user may have setup with control call.
204  */
205 int
206 rip_output(struct mbuf *m, ...)
207 {
208 	struct socket *so;
209 	u_long dst;
210 	struct ip *ip;
211 	struct inpcb *inp;
212 	int flags, error;
213 	va_list ap;
214 
215 	va_start(ap, m);
216 	so = va_arg(ap, struct socket *);
217 	dst = va_arg(ap, u_long);
218 	va_end(ap);
219 
220 	inp = sotoinpcb(so);
221 	flags = IP_ALLOWBROADCAST;
222 
223 	/*
224 	 * If the user handed us a complete IP packet, use it.
225 	 * Otherwise, allocate an mbuf for a header and fill it in.
226 	 */
227 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
228 		if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
229 			m_freem(m);
230 			return (EMSGSIZE);
231 		}
232 		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
233 		if (!m)
234 			return (ENOBUFS);
235 		ip = mtod(m, struct ip *);
236 		ip->ip_tos = inp->inp_ip.ip_tos;
237 		ip->ip_off = htons(0);
238 		ip->ip_p = inp->inp_ip.ip_p;
239 		ip->ip_len = htons(m->m_pkthdr.len);
240 		ip->ip_src = inp->inp_laddr;
241 		ip->ip_dst.s_addr = dst;
242 		ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL;
243 	} else {
244 		if (m->m_pkthdr.len > IP_MAXPACKET) {
245 			m_freem(m);
246 			return (EMSGSIZE);
247 		}
248 		if (m->m_pkthdr.len < sizeof(struct ip)) {
249 			m_freem(m);
250 			return (EINVAL);
251 		}
252 		ip = mtod(m, struct ip *);
253 		/*
254 		 * don't allow both user specified and setsockopt options,
255 		 * and don't allow packet length sizes that will crash
256 		 */
257 		if ((ip->ip_hl != (sizeof (*ip) >> 2) && inp->inp_options) ||
258 		    ntohs(ip->ip_len) > m->m_pkthdr.len ||
259 		    ntohs(ip->ip_len) < ip->ip_hl << 2) {
260 			m_freem(m);
261 			return (EINVAL);
262 		}
263 		if (ip->ip_id == 0) {
264 			ip->ip_id = htons(ip_randomid());
265 		}
266 		/* XXX prevent ip_output from overwriting header fields */
267 		flags |= IP_RAWOUTPUT;
268 		ipstat.ips_rawout++;
269 	}
270 #ifdef INET6
271 	/*
272 	 * A thought:  Even though raw IP shouldn't be able to set IPv6
273 	 *             multicast options, if it does, the last parameter to
274 	 *             ip_output should be guarded against v6/v4 problems.
275 	 */
276 #endif
277 	/* force routing table */
278 	m->m_pkthdr.ph_rtableid = inp->inp_rtableid;
279 
280 	error = ip_output(m, inp->inp_options, &inp->inp_route, flags,
281 	    inp->inp_moptions, inp, 0);
282 	if (error == EACCES)	/* translate pf(4) error for userland */
283 		error = EHOSTUNREACH;
284 	return (error);
285 }
286 
287 /*
288  * Raw IP socket option processing.
289  */
290 int
291 rip_ctloutput(int op, struct socket *so, int level, int optname,
292     struct mbuf **m)
293 {
294 	struct inpcb *inp = sotoinpcb(so);
295 	int error = 0;
296 	int dir;
297 
298 	if (level != IPPROTO_IP) {
299 		if (op == PRCO_SETOPT && *m)
300 			(void) m_free(*m);
301 		return (EINVAL);
302 	}
303 
304 	switch (optname) {
305 
306 	case IP_HDRINCL:
307 		error = 0;
308 		if (op == PRCO_SETOPT) {
309 			if (*m == 0 || (*m)->m_len < sizeof (int))
310 				error = EINVAL;
311 			else if (*mtod(*m, int *))
312 				inp->inp_flags |= INP_HDRINCL;
313 			else
314 				inp->inp_flags &= ~INP_HDRINCL;
315 			if (*m)
316 				(void)m_free(*m);
317 		} else {
318 			*m = m_get(M_WAIT, M_SOOPTS);
319 			(*m)->m_len = sizeof(int);
320 			*mtod(*m, int *) = inp->inp_flags & INP_HDRINCL;
321 		}
322 		return (error);
323 
324 	case IP_DIVERTFL:
325 		switch (op) {
326 		case PRCO_SETOPT:
327 			if (*m == 0 || (*m)->m_len < sizeof (int)) {
328 				error = EINVAL;
329 				break;
330 			}
331 			dir = *mtod(*m, int *);
332 			if (inp->inp_divertfl > 0)
333 				error = ENOTSUP;
334 			else if ((dir & IPPROTO_DIVERT_RESP) ||
335 				   (dir & IPPROTO_DIVERT_INIT))
336 				inp->inp_divertfl = dir;
337 			else
338 				error = EINVAL;
339 
340 			break;
341 
342 		case PRCO_GETOPT:
343 			*m = m_get(M_WAIT, M_SOOPTS);
344 			(*m)->m_len = sizeof(int);
345 			*mtod(*m, int *) = inp->inp_divertfl;
346 			break;
347 
348 		default:
349 			error = EINVAL;
350 			break;
351 		}
352 
353 		if (op == PRCO_SETOPT && *m)
354 			(void)m_free(*m);
355 		return (error);
356 
357 	case MRT_INIT:
358 	case MRT_DONE:
359 	case MRT_ADD_VIF:
360 	case MRT_DEL_VIF:
361 	case MRT_ADD_MFC:
362 	case MRT_DEL_MFC:
363 	case MRT_VERSION:
364 	case MRT_ASSERT:
365 	case MRT_API_SUPPORT:
366 	case MRT_API_CONFIG:
367 	case MRT_ADD_BW_UPCALL:
368 	case MRT_DEL_BW_UPCALL:
369 #ifdef MROUTING
370 		switch (op) {
371 		case PRCO_SETOPT:
372 			error = ip_mrouter_set(so, optname, m);
373 			break;
374 		case PRCO_GETOPT:
375 			error = ip_mrouter_get(so, optname, m);
376 			break;
377 		default:
378 			error = EINVAL;
379 			break;
380 		}
381 		return (error);
382 #else
383 		if (op == PRCO_SETOPT && *m)
384 			m_free(*m);
385 		return (EOPNOTSUPP);
386 #endif
387 	}
388 	return (ip_ctloutput(op, so, level, optname, m));
389 }
390 
391 u_long	rip_sendspace = RIPSNDQ;
392 u_long	rip_recvspace = RIPRCVQ;
393 
394 /*ARGSUSED*/
395 int
396 rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
397     struct mbuf *control, struct proc *p)
398 {
399 	struct inpcb *inp = sotoinpcb(so);
400 	int error = 0;
401 	int s;
402 
403 	if (req == PRU_CONTROL)
404 		return (in_control(so, (u_long)m, (caddr_t)nam,
405 		    (struct ifnet *)control));
406 
407 	if (inp == NULL && req != PRU_ATTACH) {
408 		error = EINVAL;
409 		goto release;
410 	}
411 
412 	switch (req) {
413 
414 	case PRU_ATTACH:
415 		if (inp)
416 			panic("rip_attach");
417 		if ((so->so_state & SS_PRIV) == 0) {
418 			error = EACCES;
419 			break;
420 		}
421 		if ((long)nam < 0 || (long)nam >= IPPROTO_MAX) {
422 			error = EPROTONOSUPPORT;
423 			break;
424 		}
425 		s = splsoftnet();
426 		if ((error = soreserve(so, rip_sendspace, rip_recvspace)) ||
427 		    (error = in_pcballoc(so, &rawcbtable))) {
428 			splx(s);
429 			break;
430 		}
431 		splx(s);
432 		inp = sotoinpcb(so);
433 		inp->inp_ip.ip_p = (long)nam;
434 		break;
435 
436 	case PRU_DISCONNECT:
437 		if ((so->so_state & SS_ISCONNECTED) == 0) {
438 			error = ENOTCONN;
439 			break;
440 		}
441 		/* FALLTHROUGH */
442 	case PRU_ABORT:
443 		soisdisconnected(so);
444 		/* FALLTHROUGH */
445 	case PRU_DETACH:
446 		if (inp == 0)
447 			panic("rip_detach");
448 #ifdef MROUTING
449 		if (so == ip_mrouter)
450 			ip_mrouter_done();
451 #endif
452 		in_pcbdetach(inp);
453 		break;
454 
455 	case PRU_BIND:
456 	    {
457 		struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
458 
459 		if (nam->m_len != sizeof(*addr)) {
460 			error = EINVAL;
461 			break;
462 		}
463 		if (TAILQ_EMPTY(&ifnet) || addr->sin_family != AF_INET) {
464 			error = EADDRNOTAVAIL;
465 			break;
466 		}
467 		if (!((so->so_options & SO_BINDANY) ||
468 		    addr->sin_addr.s_addr == INADDR_ANY ||
469 		    addr->sin_addr.s_addr == INADDR_BROADCAST ||
470 		    ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid))) {
471 			error = EADDRNOTAVAIL;
472 			break;
473 		}
474 		inp->inp_laddr = addr->sin_addr;
475 		break;
476 	    }
477 	case PRU_CONNECT:
478 	    {
479 		struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
480 
481 		if (nam->m_len != sizeof(*addr)) {
482 			error = EINVAL;
483 			break;
484 		}
485 		if (TAILQ_EMPTY(&ifnet)) {
486 			error = EADDRNOTAVAIL;
487 			break;
488 		}
489 		if (addr->sin_family != AF_INET) {
490 			error = EAFNOSUPPORT;
491 			break;
492 		}
493 		inp->inp_faddr = addr->sin_addr;
494 		soisconnected(so);
495 		break;
496 	    }
497 
498 	case PRU_CONNECT2:
499 		error = EOPNOTSUPP;
500 		break;
501 
502 	/*
503 	 * Mark the connection as being incapable of further input.
504 	 */
505 	case PRU_SHUTDOWN:
506 		socantsendmore(so);
507 		break;
508 
509 	/*
510 	 * Ship a packet out.  The appropriate raw output
511 	 * routine handles any massaging necessary.
512 	 */
513 	case PRU_SEND:
514 	    {
515 		u_int32_t dst;
516 
517 		if (so->so_state & SS_ISCONNECTED) {
518 			if (nam) {
519 				error = EISCONN;
520 				break;
521 			}
522 			dst = inp->inp_faddr.s_addr;
523 		} else {
524 			if (nam == NULL) {
525 				error = ENOTCONN;
526 				break;
527 			}
528 			dst = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
529 		}
530 #ifdef IPSEC
531 		/* XXX Find an IPsec TDB */
532 #endif
533 		error = rip_output(m, so, dst);
534 		m = NULL;
535 		break;
536 	    }
537 
538 	case PRU_SENSE:
539 		/*
540 		 * stat: don't bother with a blocksize.
541 		 */
542 		return (0);
543 
544 	/*
545 	 * Not supported.
546 	 */
547 	case PRU_RCVOOB:
548 	case PRU_RCVD:
549 	case PRU_LISTEN:
550 	case PRU_ACCEPT:
551 	case PRU_SENDOOB:
552 		error = EOPNOTSUPP;
553 		break;
554 
555 	case PRU_SOCKADDR:
556 		in_setsockaddr(inp, nam);
557 		break;
558 
559 	case PRU_PEERADDR:
560 		in_setpeeraddr(inp, nam);
561 		break;
562 
563 	default:
564 		panic("rip_usrreq");
565 	}
566 release:
567 	if (m != NULL)
568 		m_freem(m);
569 	return (error);
570 }
571