xref: /openbsd-src/sys/netinet/raw_ip.c (revision d59bb9942320b767f2a19aaa7690c8c6e30b724c)
1 /*	$OpenBSD: raw_ip.c,v 1.96 2017/03/03 15:48:02 bluhm Exp $	*/
2 /*	$NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
33  *
34  * NRL grants permission for redistribution and use in source and binary
35  * forms, with or without modification, of the software and documentation
36  * created at NRL provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgements:
45  *	This product includes software developed by the University of
46  *	California, Berkeley and its contributors.
47  *	This product includes software developed at the Information
48  *	Technology Division, US Naval Research Laboratory.
49  * 4. Neither the name of the NRL nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
57  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64  *
65  * The views and conclusions contained in the software and documentation
66  * are those of the authors and should not be interpreted as representing
67  * official policies, either expressed or implied, of the US Naval
68  * Research Laboratory (NRL).
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/mbuf.h>
74 #include <sys/socket.h>
75 #include <sys/protosw.h>
76 #include <sys/socketvar.h>
77 
78 #include <net/if.h>
79 #include <net/if_var.h>
80 #include <net/route.h>
81 
82 #include <netinet/in.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip_mroute.h>
85 #include <netinet/ip_var.h>
86 #include <netinet/in_pcb.h>
87 #include <netinet/in_var.h>
88 #include <netinet/ip_icmp.h>
89 
90 #include <net/pfvar.h>
91 
92 #include "pf.h"
93 
94 struct inpcbtable rawcbtable;
95 
96 /*
97  * Nominal space allocated to a raw ip socket.
98  */
99 #define	RIPSNDQ		8192
100 #define	RIPRCVQ		8192
101 
102 /*
103  * Raw interface to IP protocol.
104  */
105 
106 /*
107  * Initialize raw connection block q.
108  */
109 void
110 rip_init(void)
111 {
112 
113 	in_pcbinit(&rawcbtable, 1);
114 }
115 
116 struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
117 
118 int
119 rip_input(struct mbuf **mp, int *offp, int proto)
120 {
121 	struct mbuf *m = *mp;
122 	struct ip *ip = mtod(m, struct ip *);
123 	struct inpcb *inp, *last = NULL;
124 	struct mbuf *opts = NULL;
125 	struct counters_ref ref;
126 	uint64_t *counters;
127 
128 	ripsrc.sin_addr = ip->ip_src;
129 	TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
130 		if (inp->inp_socket->so_state & SS_CANTRCVMORE)
131 			continue;
132 #ifdef INET6
133 		if (inp->inp_flags & INP_IPV6)
134 			continue;
135 #endif
136 		if (rtable_l2(inp->inp_rtableid) !=
137 		    rtable_l2(m->m_pkthdr.ph_rtableid))
138 			continue;
139 
140 		if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p)
141 			continue;
142 #if NPF > 0
143 		if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
144 			struct pf_divert *divert;
145 
146 			/* XXX rdomain support */
147 			if ((divert = pf_find_divert(m)) == NULL)
148 				continue;
149 			if (!divert->addr.v4.s_addr)
150 				goto divert_reply;
151 			if (inp->inp_laddr.s_addr != divert->addr.v4.s_addr)
152 				continue;
153 		} else
154  divert_reply:
155 #endif
156 		if (inp->inp_laddr.s_addr &&
157 		    inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
158 			continue;
159 		if (inp->inp_faddr.s_addr &&
160 		    inp->inp_faddr.s_addr != ip->ip_src.s_addr)
161 			continue;
162 		if (last) {
163 			struct mbuf *n;
164 
165 			if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
166 				if (last->inp_flags & INP_CONTROLOPTS ||
167 				    last->inp_socket->so_options & SO_TIMESTAMP)
168 					ip_savecontrol(last, &opts, ip, n);
169 				if (sbappendaddr(&last->inp_socket->so_rcv,
170 				    sintosa(&ripsrc), n, opts) == 0) {
171 					/* should notify about lost packet */
172 					m_freem(n);
173 					m_freem(opts);
174 				} else
175 					sorwakeup(last->inp_socket);
176 				opts = NULL;
177 			}
178 		}
179 		last = inp;
180 	}
181 	if (last) {
182 		if (last->inp_flags & INP_CONTROLOPTS ||
183 		    last->inp_socket->so_options & SO_TIMESTAMP)
184 			ip_savecontrol(last, &opts, ip, m);
185 		if (sbappendaddr(&last->inp_socket->so_rcv, sintosa(&ripsrc), m,
186 		    opts) == 0) {
187 			m_freem(m);
188 			m_freem(opts);
189 		} else
190 			sorwakeup(last->inp_socket);
191 	} else {
192 		if (ip->ip_p != IPPROTO_ICMP)
193 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0);
194 		else
195 			m_freem(m);
196 
197 		counters = counters_enter(&ref, ipcounters);
198 		counters[ips_noproto]++;
199 		counters[ips_delivered]--;
200 		counters_leave(&ref, ipcounters);
201 	}
202 	return IPPROTO_DONE;
203 }
204 
205 /*
206  * Generate IP header and pass packet to ip_output.
207  * Tack on options user may have setup with control call.
208  */
209 int
210 rip_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr,
211     struct mbuf *control)
212 {
213 	struct ip *ip;
214 	struct inpcb *inp;
215 	int flags, error;
216 
217 	inp = sotoinpcb(so);
218 	flags = IP_ALLOWBROADCAST;
219 
220 	/*
221 	 * If the user handed us a complete IP packet, use it.
222 	 * Otherwise, allocate an mbuf for a header and fill it in.
223 	 */
224 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
225 		if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
226 			m_freem(m);
227 			return (EMSGSIZE);
228 		}
229 		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
230 		if (!m)
231 			return (ENOBUFS);
232 		ip = mtod(m, struct ip *);
233 		ip->ip_tos = inp->inp_ip.ip_tos;
234 		ip->ip_off = htons(0);
235 		ip->ip_p = inp->inp_ip.ip_p;
236 		ip->ip_len = htons(m->m_pkthdr.len);
237 		ip->ip_src = inp->inp_laddr;
238 		ip->ip_dst = satosin(dstaddr)->sin_addr;
239 		ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL;
240 	} else {
241 		if (m->m_pkthdr.len > IP_MAXPACKET) {
242 			m_freem(m);
243 			return (EMSGSIZE);
244 		}
245 		if (m->m_pkthdr.len < sizeof(struct ip)) {
246 			m_freem(m);
247 			return (EINVAL);
248 		}
249 		ip = mtod(m, struct ip *);
250 		/*
251 		 * don't allow both user specified and setsockopt options,
252 		 * and don't allow packet length sizes that will crash
253 		 */
254 		if ((ip->ip_hl != (sizeof (*ip) >> 2) && inp->inp_options) ||
255 		    ntohs(ip->ip_len) > m->m_pkthdr.len ||
256 		    ntohs(ip->ip_len) < ip->ip_hl << 2) {
257 			m_freem(m);
258 			return (EINVAL);
259 		}
260 		if (ip->ip_id == 0) {
261 			ip->ip_id = htons(ip_randomid());
262 		}
263 		/* XXX prevent ip_output from overwriting header fields */
264 		flags |= IP_RAWOUTPUT;
265 		ipstat_inc(ips_rawout);
266 	}
267 #ifdef INET6
268 	/*
269 	 * A thought:  Even though raw IP shouldn't be able to set IPv6
270 	 *             multicast options, if it does, the last parameter to
271 	 *             ip_output should be guarded against v6/v4 problems.
272 	 */
273 #endif
274 	/* force routing table */
275 	m->m_pkthdr.ph_rtableid = inp->inp_rtableid;
276 
277 #if NPF > 0
278 	if (inp->inp_socket->so_state & SS_ISCONNECTED &&
279 	    ip->ip_p != IPPROTO_ICMP)
280 		m->m_pkthdr.pf.inp = inp;
281 #endif
282 
283 	error = ip_output(m, inp->inp_options, &inp->inp_route, flags,
284 	    inp->inp_moptions, inp, 0);
285 	if (error == EACCES)	/* translate pf(4) error for userland */
286 		error = EHOSTUNREACH;
287 	return (error);
288 }
289 
290 /*
291  * Raw IP socket option processing.
292  */
293 int
294 rip_ctloutput(int op, struct socket *so, int level, int optname,
295     struct mbuf *m)
296 {
297 	struct inpcb *inp = sotoinpcb(so);
298 	int error = 0;
299 	int dir;
300 
301 	if (level != IPPROTO_IP) {
302 		if (op == PRCO_SETOPT)
303 			(void) m_free(m);
304 		return (EINVAL);
305 	}
306 
307 	switch (optname) {
308 
309 	case IP_HDRINCL:
310 		error = 0;
311 		if (op == PRCO_SETOPT) {
312 			if (m == NULL || m->m_len < sizeof (int))
313 				error = EINVAL;
314 			else if (*mtod(m, int *))
315 				inp->inp_flags |= INP_HDRINCL;
316 			else
317 				inp->inp_flags &= ~INP_HDRINCL;
318 			m_free(m);
319 		} else {
320 			m->m_len = sizeof(int);
321 			*mtod(m, int *) = inp->inp_flags & INP_HDRINCL;
322 		}
323 		return (error);
324 
325 	case IP_DIVERTFL:
326 		switch (op) {
327 		case PRCO_SETOPT:
328 			if (m == NULL || m->m_len < sizeof (int)) {
329 				error = EINVAL;
330 				break;
331 			}
332 			dir = *mtod(m, int *);
333 			if (inp->inp_divertfl > 0)
334 				error = ENOTSUP;
335 			else if ((dir & IPPROTO_DIVERT_RESP) ||
336 				   (dir & IPPROTO_DIVERT_INIT))
337 				inp->inp_divertfl = dir;
338 			else
339 				error = EINVAL;
340 
341 			break;
342 
343 		case PRCO_GETOPT:
344 			m->m_len = sizeof(int);
345 			*mtod(m, int *) = inp->inp_divertfl;
346 			break;
347 
348 		default:
349 			error = EINVAL;
350 			break;
351 		}
352 
353 		if (op == PRCO_SETOPT)
354 			(void)m_free(m);
355 		return (error);
356 
357 	case MRT_INIT:
358 	case MRT_DONE:
359 	case MRT_ADD_VIF:
360 	case MRT_DEL_VIF:
361 	case MRT_ADD_MFC:
362 	case MRT_DEL_MFC:
363 	case MRT_VERSION:
364 	case MRT_ASSERT:
365 	case MRT_API_SUPPORT:
366 	case MRT_API_CONFIG:
367 #ifdef MROUTING
368 		switch (op) {
369 		case PRCO_SETOPT:
370 			error = ip_mrouter_set(so, optname, m);
371 			break;
372 		case PRCO_GETOPT:
373 			error = ip_mrouter_get(so, optname, m);
374 			break;
375 		default:
376 			error = EINVAL;
377 			break;
378 		}
379 		return (error);
380 #else
381 		if (op == PRCO_SETOPT)
382 			m_free(m);
383 		return (EOPNOTSUPP);
384 #endif
385 	}
386 	return (ip_ctloutput(op, so, level, optname, m));
387 }
388 
389 u_long	rip_sendspace = RIPSNDQ;
390 u_long	rip_recvspace = RIPRCVQ;
391 
392 /*ARGSUSED*/
393 int
394 rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
395     struct mbuf *control, struct proc *p)
396 {
397 	struct inpcb *inp = sotoinpcb(so);
398 	int error = 0;
399 
400 	NET_ASSERT_LOCKED();
401 
402 	if (req == PRU_CONTROL)
403 		return (in_control(so, (u_long)m, (caddr_t)nam,
404 		    (struct ifnet *)control));
405 
406 	if (inp == NULL && req != PRU_ATTACH) {
407 		error = EINVAL;
408 		goto release;
409 	}
410 
411 	switch (req) {
412 
413 	case PRU_ATTACH:
414 		if (inp)
415 			panic("rip_attach");
416 		if ((so->so_state & SS_PRIV) == 0) {
417 			error = EACCES;
418 			break;
419 		}
420 		if ((long)nam < 0 || (long)nam >= IPPROTO_MAX) {
421 			error = EPROTONOSUPPORT;
422 			break;
423 		}
424 		if ((error = soreserve(so, rip_sendspace, rip_recvspace)) ||
425 		    (error = in_pcballoc(so, &rawcbtable))) {
426 			break;
427 		}
428 		inp = sotoinpcb(so);
429 		inp->inp_ip.ip_p = (long)nam;
430 		break;
431 
432 	case PRU_DISCONNECT:
433 		if ((so->so_state & SS_ISCONNECTED) == 0) {
434 			error = ENOTCONN;
435 			break;
436 		}
437 		/* FALLTHROUGH */
438 	case PRU_ABORT:
439 		soisdisconnected(so);
440 		/* FALLTHROUGH */
441 	case PRU_DETACH:
442 		if (inp == NULL)
443 			panic("rip_detach");
444 #ifdef MROUTING
445 		if (so == ip_mrouter[inp->inp_rtableid])
446 			ip_mrouter_done(so);
447 #endif
448 		in_pcbdetach(inp);
449 		break;
450 
451 	case PRU_BIND:
452 	    {
453 		struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
454 
455 		if (nam->m_len != sizeof(*addr)) {
456 			error = EINVAL;
457 			break;
458 		}
459 		if (addr->sin_family != AF_INET) {
460 			error = EADDRNOTAVAIL;
461 			break;
462 		}
463 		if (!((so->so_options & SO_BINDANY) ||
464 		    addr->sin_addr.s_addr == INADDR_ANY ||
465 		    addr->sin_addr.s_addr == INADDR_BROADCAST ||
466 		    in_broadcast(addr->sin_addr, inp->inp_rtableid) ||
467 		    ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid))) {
468 			error = EADDRNOTAVAIL;
469 			break;
470 		}
471 		inp->inp_laddr = addr->sin_addr;
472 		break;
473 	    }
474 	case PRU_CONNECT:
475 	    {
476 		struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
477 
478 		if (nam->m_len != sizeof(*addr)) {
479 			error = EINVAL;
480 			break;
481 		}
482 		if (addr->sin_family != AF_INET) {
483 			error = EAFNOSUPPORT;
484 			break;
485 		}
486 		inp->inp_faddr = addr->sin_addr;
487 		soisconnected(so);
488 		break;
489 	    }
490 
491 	case PRU_CONNECT2:
492 		error = EOPNOTSUPP;
493 		break;
494 
495 	/*
496 	 * Mark the connection as being incapable of further input.
497 	 */
498 	case PRU_SHUTDOWN:
499 		socantsendmore(so);
500 		break;
501 
502 	/*
503 	 * Ship a packet out.  The appropriate raw output
504 	 * routine handles any massaging necessary.
505 	 */
506 	case PRU_SEND:
507 	    {
508 		struct sockaddr_in dst;
509 
510 		memset(&dst, 0, sizeof(dst));
511 		dst.sin_family = AF_INET;
512 		dst.sin_len = sizeof(dst);
513 		if (so->so_state & SS_ISCONNECTED) {
514 			if (nam) {
515 				error = EISCONN;
516 				break;
517 			}
518 			dst.sin_addr = inp->inp_faddr;
519 		} else {
520 			if (nam == NULL) {
521 				error = ENOTCONN;
522 				break;
523 			}
524 			dst.sin_addr =
525 			    mtod(nam, struct sockaddr_in *)->sin_addr;
526 		}
527 #ifdef IPSEC
528 		/* XXX Find an IPsec TDB */
529 #endif
530 		error = rip_output(m, so, sintosa(&dst), NULL);
531 		m = NULL;
532 		break;
533 	    }
534 
535 	case PRU_SENSE:
536 		/*
537 		 * stat: don't bother with a blocksize.
538 		 */
539 		return (0);
540 
541 	/*
542 	 * Not supported.
543 	 */
544 	case PRU_RCVOOB:
545 	case PRU_RCVD:
546 	case PRU_LISTEN:
547 	case PRU_ACCEPT:
548 	case PRU_SENDOOB:
549 		error = EOPNOTSUPP;
550 		break;
551 
552 	case PRU_SOCKADDR:
553 		in_setsockaddr(inp, nam);
554 		break;
555 
556 	case PRU_PEERADDR:
557 		in_setpeeraddr(inp, nam);
558 		break;
559 
560 	default:
561 		panic("rip_usrreq");
562 	}
563 release:
564 	m_freem(m);
565 	return (error);
566 }
567