xref: /openbsd-src/sys/netinet/raw_ip.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: raw_ip.c,v 1.86 2016/03/07 18:44:00 naddy Exp $	*/
2 /*	$NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
33  *
34  * NRL grants permission for redistribution and use in source and binary
35  * forms, with or without modification, of the software and documentation
36  * created at NRL provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgements:
45  *	This product includes software developed by the University of
46  *	California, Berkeley and its contributors.
47  *	This product includes software developed at the Information
48  *	Technology Division, US Naval Research Laboratory.
49  * 4. Neither the name of the NRL nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
57  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64  *
65  * The views and conclusions contained in the software and documentation
66  * are those of the authors and should not be interpreted as representing
67  * official policies, either expressed or implied, of the US Naval
68  * Research Laboratory (NRL).
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/mbuf.h>
74 #include <sys/socket.h>
75 #include <sys/protosw.h>
76 #include <sys/socketvar.h>
77 
78 #include <net/if.h>
79 #include <net/if_var.h>
80 #include <net/route.h>
81 
82 #include <netinet/in.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip_mroute.h>
85 #include <netinet/ip_var.h>
86 #include <netinet/in_pcb.h>
87 #include <netinet/in_var.h>
88 #include <netinet/ip_icmp.h>
89 
90 #include <net/pfvar.h>
91 
92 #include "pf.h"
93 
94 struct inpcbtable rawcbtable;
95 
96 /*
97  * Nominal space allocated to a raw ip socket.
98  */
99 #define	RIPSNDQ		8192
100 #define	RIPRCVQ		8192
101 
102 /*
103  * Raw interface to IP protocol.
104  */
105 
106 /*
107  * Initialize raw connection block q.
108  */
109 void
110 rip_init(void)
111 {
112 
113 	in_pcbinit(&rawcbtable, 1);
114 }
115 
116 struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
117 
118 /*
119  * Setup generic address and protocol structures
120  * for raw_input routine, then pass them along with
121  * mbuf chain.
122  */
123 void
124 rip_input(struct mbuf *m, ...)
125 {
126 	struct ip *ip = mtod(m, struct ip *);
127 	struct inpcb *inp, *last = NULL;
128 	struct mbuf *opts = NULL;
129 
130 	ripsrc.sin_addr = ip->ip_src;
131 	TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
132 		if (inp->inp_socket->so_state & SS_CANTRCVMORE)
133 			continue;
134 #ifdef INET6
135 		if (inp->inp_flags & INP_IPV6)
136 			continue;
137 #endif
138 		if (rtable_l2(inp->inp_rtableid) !=
139 		    rtable_l2(m->m_pkthdr.ph_rtableid))
140 			continue;
141 
142 		if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p)
143 			continue;
144 #if NPF > 0
145 		if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
146 			struct pf_divert *divert;
147 
148 			/* XXX rdomain support */
149 			if ((divert = pf_find_divert(m)) == NULL)
150 				continue;
151 			if (!divert->addr.v4.s_addr)
152 				goto divert_reply;
153 			if (inp->inp_laddr.s_addr != divert->addr.v4.s_addr)
154 				continue;
155 		} else
156  divert_reply:
157 #endif
158 		if (inp->inp_laddr.s_addr &&
159 		    inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
160 			continue;
161 		if (inp->inp_faddr.s_addr &&
162 		    inp->inp_faddr.s_addr != ip->ip_src.s_addr)
163 			continue;
164 		if (last) {
165 			struct mbuf *n;
166 
167 			if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
168 				if (last->inp_flags & INP_CONTROLOPTS ||
169 				    last->inp_socket->so_options & SO_TIMESTAMP)
170 					ip_savecontrol(last, &opts, ip, n);
171 				if (sbappendaddr(&last->inp_socket->so_rcv,
172 				    sintosa(&ripsrc), n, opts) == 0) {
173 					/* should notify about lost packet */
174 					m_freem(n);
175 					m_freem(opts);
176 				} else
177 					sorwakeup(last->inp_socket);
178 				opts = NULL;
179 			}
180 		}
181 		last = inp;
182 	}
183 	if (last) {
184 		if (last->inp_flags & INP_CONTROLOPTS ||
185 		    last->inp_socket->so_options & SO_TIMESTAMP)
186 			ip_savecontrol(last, &opts, ip, m);
187 		if (sbappendaddr(&last->inp_socket->so_rcv, sintosa(&ripsrc), m,
188 		    opts) == 0) {
189 			m_freem(m);
190 			m_freem(opts);
191 		} else
192 			sorwakeup(last->inp_socket);
193 	} else {
194 		if (ip->ip_p != IPPROTO_ICMP)
195 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0);
196 		else
197 			m_freem(m);
198 		ipstat.ips_noproto++;
199 		ipstat.ips_delivered--;
200 	}
201 }
202 
203 /*
204  * Generate IP header and pass packet to ip_output.
205  * Tack on options user may have setup with control call.
206  */
207 int
208 rip_output(struct mbuf *m, ...)
209 {
210 	struct socket *so;
211 	u_long dst;
212 	struct ip *ip;
213 	struct inpcb *inp;
214 	int flags, error;
215 	va_list ap;
216 
217 	va_start(ap, m);
218 	so = va_arg(ap, struct socket *);
219 	dst = va_arg(ap, u_long);
220 	va_end(ap);
221 
222 	inp = sotoinpcb(so);
223 	flags = IP_ALLOWBROADCAST;
224 
225 	/*
226 	 * If the user handed us a complete IP packet, use it.
227 	 * Otherwise, allocate an mbuf for a header and fill it in.
228 	 */
229 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
230 		if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
231 			m_freem(m);
232 			return (EMSGSIZE);
233 		}
234 		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
235 		if (!m)
236 			return (ENOBUFS);
237 		ip = mtod(m, struct ip *);
238 		ip->ip_tos = inp->inp_ip.ip_tos;
239 		ip->ip_off = htons(0);
240 		ip->ip_p = inp->inp_ip.ip_p;
241 		ip->ip_len = htons(m->m_pkthdr.len);
242 		ip->ip_src = inp->inp_laddr;
243 		ip->ip_dst.s_addr = dst;
244 		ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL;
245 	} else {
246 		if (m->m_pkthdr.len > IP_MAXPACKET) {
247 			m_freem(m);
248 			return (EMSGSIZE);
249 		}
250 		if (m->m_pkthdr.len < sizeof(struct ip)) {
251 			m_freem(m);
252 			return (EINVAL);
253 		}
254 		ip = mtod(m, struct ip *);
255 		/*
256 		 * don't allow both user specified and setsockopt options,
257 		 * and don't allow packet length sizes that will crash
258 		 */
259 		if ((ip->ip_hl != (sizeof (*ip) >> 2) && inp->inp_options) ||
260 		    ntohs(ip->ip_len) > m->m_pkthdr.len ||
261 		    ntohs(ip->ip_len) < ip->ip_hl << 2) {
262 			m_freem(m);
263 			return (EINVAL);
264 		}
265 		if (ip->ip_id == 0) {
266 			ip->ip_id = htons(ip_randomid());
267 		}
268 		/* XXX prevent ip_output from overwriting header fields */
269 		flags |= IP_RAWOUTPUT;
270 		ipstat.ips_rawout++;
271 	}
272 #ifdef INET6
273 	/*
274 	 * A thought:  Even though raw IP shouldn't be able to set IPv6
275 	 *             multicast options, if it does, the last parameter to
276 	 *             ip_output should be guarded against v6/v4 problems.
277 	 */
278 #endif
279 	/* force routing table */
280 	m->m_pkthdr.ph_rtableid = inp->inp_rtableid;
281 
282 #if NPF > 0
283 	if (inp->inp_socket->so_state & SS_ISCONNECTED &&
284 	    ip->ip_p != IPPROTO_ICMP)
285 		m->m_pkthdr.pf.inp = inp;
286 #endif
287 
288 	error = ip_output(m, inp->inp_options, &inp->inp_route, flags,
289 	    inp->inp_moptions, inp, 0);
290 	if (error == EACCES)	/* translate pf(4) error for userland */
291 		error = EHOSTUNREACH;
292 	return (error);
293 }
294 
295 /*
296  * Raw IP socket option processing.
297  */
298 int
299 rip_ctloutput(int op, struct socket *so, int level, int optname,
300     struct mbuf **mp)
301 {
302 	struct inpcb *inp = sotoinpcb(so);
303 	int error = 0;
304 	int dir;
305 
306 	if (level != IPPROTO_IP) {
307 		if (op == PRCO_SETOPT)
308 			(void) m_free(*mp);
309 		return (EINVAL);
310 	}
311 
312 	switch (optname) {
313 
314 	case IP_HDRINCL:
315 		error = 0;
316 		if (op == PRCO_SETOPT) {
317 			if (*mp == NULL || (*mp)->m_len < sizeof (int))
318 				error = EINVAL;
319 			else if (*mtod(*mp, int *))
320 				inp->inp_flags |= INP_HDRINCL;
321 			else
322 				inp->inp_flags &= ~INP_HDRINCL;
323 			if (*mp)
324 				(void)m_free(*mp);
325 		} else {
326 			*mp = m_get(M_WAIT, M_SOOPTS);
327 			(*mp)->m_len = sizeof(int);
328 			*mtod(*mp, int *) = inp->inp_flags & INP_HDRINCL;
329 		}
330 		return (error);
331 
332 	case IP_DIVERTFL:
333 		switch (op) {
334 		case PRCO_SETOPT:
335 			if (*mp == NULL || (*mp)->m_len < sizeof (int)) {
336 				error = EINVAL;
337 				break;
338 			}
339 			dir = *mtod(*mp, int *);
340 			if (inp->inp_divertfl > 0)
341 				error = ENOTSUP;
342 			else if ((dir & IPPROTO_DIVERT_RESP) ||
343 				   (dir & IPPROTO_DIVERT_INIT))
344 				inp->inp_divertfl = dir;
345 			else
346 				error = EINVAL;
347 
348 			break;
349 
350 		case PRCO_GETOPT:
351 			*mp = m_get(M_WAIT, M_SOOPTS);
352 			(*mp)->m_len = sizeof(int);
353 			*mtod(*mp, int *) = inp->inp_divertfl;
354 			break;
355 
356 		default:
357 			error = EINVAL;
358 			break;
359 		}
360 
361 		if (op == PRCO_SETOPT)
362 			(void)m_free(*mp);
363 		return (error);
364 
365 	case MRT_INIT:
366 	case MRT_DONE:
367 	case MRT_ADD_VIF:
368 	case MRT_DEL_VIF:
369 	case MRT_ADD_MFC:
370 	case MRT_DEL_MFC:
371 	case MRT_VERSION:
372 	case MRT_ASSERT:
373 	case MRT_API_SUPPORT:
374 	case MRT_API_CONFIG:
375 #ifdef MROUTING
376 		switch (op) {
377 		case PRCO_SETOPT:
378 			error = ip_mrouter_set(so, optname, mp);
379 			break;
380 		case PRCO_GETOPT:
381 			error = ip_mrouter_get(so, optname, mp);
382 			break;
383 		default:
384 			error = EINVAL;
385 			break;
386 		}
387 		return (error);
388 #else
389 		if (op == PRCO_SETOPT)
390 			m_free(*mp);
391 		return (EOPNOTSUPP);
392 #endif
393 	}
394 	return (ip_ctloutput(op, so, level, optname, mp));
395 }
396 
397 u_long	rip_sendspace = RIPSNDQ;
398 u_long	rip_recvspace = RIPRCVQ;
399 
400 /*ARGSUSED*/
401 int
402 rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
403     struct mbuf *control, struct proc *p)
404 {
405 	struct inpcb *inp = sotoinpcb(so);
406 	int error = 0;
407 	int s;
408 
409 	if (req == PRU_CONTROL)
410 		return (in_control(so, (u_long)m, (caddr_t)nam,
411 		    (struct ifnet *)control));
412 
413 	if (inp == NULL && req != PRU_ATTACH) {
414 		error = EINVAL;
415 		goto release;
416 	}
417 
418 	switch (req) {
419 
420 	case PRU_ATTACH:
421 		if (inp)
422 			panic("rip_attach");
423 		if ((so->so_state & SS_PRIV) == 0) {
424 			error = EACCES;
425 			break;
426 		}
427 		if ((long)nam < 0 || (long)nam >= IPPROTO_MAX) {
428 			error = EPROTONOSUPPORT;
429 			break;
430 		}
431 		s = splsoftnet();
432 		if ((error = soreserve(so, rip_sendspace, rip_recvspace)) ||
433 		    (error = in_pcballoc(so, &rawcbtable))) {
434 			splx(s);
435 			break;
436 		}
437 		splx(s);
438 		inp = sotoinpcb(so);
439 		inp->inp_ip.ip_p = (long)nam;
440 		break;
441 
442 	case PRU_DISCONNECT:
443 		if ((so->so_state & SS_ISCONNECTED) == 0) {
444 			error = ENOTCONN;
445 			break;
446 		}
447 		/* FALLTHROUGH */
448 	case PRU_ABORT:
449 		soisdisconnected(so);
450 		/* FALLTHROUGH */
451 	case PRU_DETACH:
452 		if (inp == NULL)
453 			panic("rip_detach");
454 #ifdef MROUTING
455 		if (so == ip_mrouter)
456 			ip_mrouter_done();
457 #endif
458 		in_pcbdetach(inp);
459 		break;
460 
461 	case PRU_BIND:
462 	    {
463 		struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
464 
465 		if (nam->m_len != sizeof(*addr)) {
466 			error = EINVAL;
467 			break;
468 		}
469 		if (addr->sin_family != AF_INET) {
470 			error = EADDRNOTAVAIL;
471 			break;
472 		}
473 		if (!((so->so_options & SO_BINDANY) ||
474 		    addr->sin_addr.s_addr == INADDR_ANY ||
475 		    addr->sin_addr.s_addr == INADDR_BROADCAST ||
476 		    in_broadcast(addr->sin_addr, inp->inp_rtableid) ||
477 		    ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid))) {
478 			error = EADDRNOTAVAIL;
479 			break;
480 		}
481 		inp->inp_laddr = addr->sin_addr;
482 		break;
483 	    }
484 	case PRU_CONNECT:
485 	    {
486 		struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
487 
488 		if (nam->m_len != sizeof(*addr)) {
489 			error = EINVAL;
490 			break;
491 		}
492 		if (addr->sin_family != AF_INET) {
493 			error = EAFNOSUPPORT;
494 			break;
495 		}
496 		inp->inp_faddr = addr->sin_addr;
497 		soisconnected(so);
498 		break;
499 	    }
500 
501 	case PRU_CONNECT2:
502 		error = EOPNOTSUPP;
503 		break;
504 
505 	/*
506 	 * Mark the connection as being incapable of further input.
507 	 */
508 	case PRU_SHUTDOWN:
509 		socantsendmore(so);
510 		break;
511 
512 	/*
513 	 * Ship a packet out.  The appropriate raw output
514 	 * routine handles any massaging necessary.
515 	 */
516 	case PRU_SEND:
517 	    {
518 		u_int32_t dst;
519 
520 		if (so->so_state & SS_ISCONNECTED) {
521 			if (nam) {
522 				error = EISCONN;
523 				break;
524 			}
525 			dst = inp->inp_faddr.s_addr;
526 		} else {
527 			if (nam == NULL) {
528 				error = ENOTCONN;
529 				break;
530 			}
531 			dst = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
532 		}
533 #ifdef IPSEC
534 		/* XXX Find an IPsec TDB */
535 #endif
536 		error = rip_output(m, so, dst);
537 		m = NULL;
538 		break;
539 	    }
540 
541 	case PRU_SENSE:
542 		/*
543 		 * stat: don't bother with a blocksize.
544 		 */
545 		return (0);
546 
547 	/*
548 	 * Not supported.
549 	 */
550 	case PRU_RCVOOB:
551 	case PRU_RCVD:
552 	case PRU_LISTEN:
553 	case PRU_ACCEPT:
554 	case PRU_SENDOOB:
555 		error = EOPNOTSUPP;
556 		break;
557 
558 	case PRU_SOCKADDR:
559 		in_setsockaddr(inp, nam);
560 		break;
561 
562 	case PRU_PEERADDR:
563 		in_setpeeraddr(inp, nam);
564 		break;
565 
566 	default:
567 		panic("rip_usrreq");
568 	}
569 release:
570 	m_freem(m);
571 	return (error);
572 }
573