xref: /openbsd-src/sys/netinet/raw_ip.c (revision 4e1ee0786f11cc571bd0be17d38e46f635c719fc)
1 /*	$OpenBSD: raw_ip.c,v 1.119 2019/02/04 21:40:52 bluhm Exp $	*/
2 /*	$NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
33  *
34  * NRL grants permission for redistribution and use in source and binary
35  * forms, with or without modification, of the software and documentation
36  * created at NRL provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgements:
45  *	This product includes software developed by the University of
46  *	California, Berkeley and its contributors.
47  *	This product includes software developed at the Information
48  *	Technology Division, US Naval Research Laboratory.
49  * 4. Neither the name of the NRL nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
57  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64  *
65  * The views and conclusions contained in the software and documentation
66  * are those of the authors and should not be interpreted as representing
67  * official policies, either expressed or implied, of the US Naval
68  * Research Laboratory (NRL).
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/mbuf.h>
74 #include <sys/socket.h>
75 #include <sys/protosw.h>
76 #include <sys/socketvar.h>
77 
78 #include <net/if.h>
79 #include <net/if_var.h>
80 #include <net/route.h>
81 
82 #include <netinet/in.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip_mroute.h>
85 #include <netinet/ip_var.h>
86 #include <netinet/in_pcb.h>
87 #include <netinet/in_var.h>
88 #include <netinet/ip_icmp.h>
89 
90 #include <net/pfvar.h>
91 
92 #include "pf.h"
93 
94 struct inpcbtable rawcbtable;
95 
96 /*
97  * Nominal space allocated to a raw ip socket.
98  */
99 #define	RIPSNDQ		8192
100 #define	RIPRCVQ		8192
101 
102 /*
103  * Raw interface to IP protocol.
104  */
105 
106 /*
107  * Initialize raw connection block q.
108  */
109 void
110 rip_init(void)
111 {
112 
113 	in_pcbinit(&rawcbtable, 1);
114 }
115 
116 struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
117 
118 struct mbuf	*rip_chkhdr(struct mbuf *, struct mbuf *);
119 
120 int
121 rip_input(struct mbuf **mp, int *offp, int proto, int af)
122 {
123 	struct mbuf *m = *mp;
124 	struct ip *ip = mtod(m, struct ip *);
125 	struct inpcb *inp, *last = NULL;
126 	struct in_addr *key;
127 	struct mbuf *opts = NULL;
128 	struct counters_ref ref;
129 	uint64_t *counters;
130 
131 	KASSERT(af == AF_INET);
132 
133 	ripsrc.sin_addr = ip->ip_src;
134 	key = &ip->ip_dst;
135 #if NPF > 0
136 	if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
137 		struct pf_divert *divert;
138 
139 		divert = pf_find_divert(m);
140 		KASSERT(divert != NULL);
141 		switch (divert->type) {
142 		case PF_DIVERT_TO:
143 			key = &divert->addr.v4;
144 			break;
145 		case PF_DIVERT_REPLY:
146 			break;
147 		default:
148 			panic("%s: unknown divert type %d, mbuf %p, divert %p",
149 			    __func__, divert->type, m, divert);
150 		}
151 	}
152 #endif
153 	NET_ASSERT_LOCKED();
154 	TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
155 		if (inp->inp_socket->so_state & SS_CANTRCVMORE)
156 			continue;
157 #ifdef INET6
158 		if (inp->inp_flags & INP_IPV6)
159 			continue;
160 #endif
161 		if (rtable_l2(inp->inp_rtableid) !=
162 		    rtable_l2(m->m_pkthdr.ph_rtableid))
163 			continue;
164 
165 		if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p)
166 			continue;
167 		if (inp->inp_laddr.s_addr &&
168 		    inp->inp_laddr.s_addr != key->s_addr)
169 			continue;
170 		if (inp->inp_faddr.s_addr &&
171 		    inp->inp_faddr.s_addr != ip->ip_src.s_addr)
172 			continue;
173 		if (last) {
174 			struct mbuf *n;
175 
176 			if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
177 				if (last->inp_flags & INP_CONTROLOPTS ||
178 				    last->inp_socket->so_options & SO_TIMESTAMP)
179 					ip_savecontrol(last, &opts, ip, n);
180 				if (sbappendaddr(last->inp_socket,
181 				    &last->inp_socket->so_rcv,
182 				    sintosa(&ripsrc), n, opts) == 0) {
183 					/* should notify about lost packet */
184 					m_freem(n);
185 					m_freem(opts);
186 				} else
187 					sorwakeup(last->inp_socket);
188 				opts = NULL;
189 			}
190 		}
191 		last = inp;
192 	}
193 	if (last) {
194 		if (last->inp_flags & INP_CONTROLOPTS ||
195 		    last->inp_socket->so_options & SO_TIMESTAMP)
196 			ip_savecontrol(last, &opts, ip, m);
197 		if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv,
198 		    sintosa(&ripsrc), m, opts) == 0) {
199 			m_freem(m);
200 			m_freem(opts);
201 		} else
202 			sorwakeup(last->inp_socket);
203 	} else {
204 		if (ip->ip_p != IPPROTO_ICMP)
205 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0);
206 		else
207 			m_freem(m);
208 
209 		counters = counters_enter(&ref, ipcounters);
210 		counters[ips_noproto]++;
211 		counters[ips_delivered]--;
212 		counters_leave(&ref, ipcounters);
213 	}
214 	return IPPROTO_DONE;
215 }
216 
217 /*
218  * Generate IP header and pass packet to ip_output.
219  * Tack on options user may have setup with control call.
220  */
221 int
222 rip_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr,
223     struct mbuf *control)
224 {
225 	struct ip *ip;
226 	struct inpcb *inp;
227 	int flags, error;
228 
229 	inp = sotoinpcb(so);
230 	flags = IP_ALLOWBROADCAST;
231 
232 	/*
233 	 * If the user handed us a complete IP packet, use it.
234 	 * Otherwise, allocate an mbuf for a header and fill it in.
235 	 */
236 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
237 		if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
238 			m_freem(m);
239 			return (EMSGSIZE);
240 		}
241 		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
242 		if (!m)
243 			return (ENOBUFS);
244 		ip = mtod(m, struct ip *);
245 		ip->ip_tos = inp->inp_ip.ip_tos;
246 		ip->ip_off = htons(0);
247 		ip->ip_p = inp->inp_ip.ip_p;
248 		ip->ip_len = htons(m->m_pkthdr.len);
249 		ip->ip_src = inp->inp_laddr;
250 		ip->ip_dst = satosin(dstaddr)->sin_addr;
251 		ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL;
252 	} else {
253 		if (m->m_pkthdr.len > IP_MAXPACKET) {
254 			m_freem(m);
255 			return (EMSGSIZE);
256 		}
257 
258 		m = rip_chkhdr(m, inp->inp_options);
259 		if (m == NULL)
260 			return (EINVAL);
261 
262 		ip = mtod(m, struct ip *);
263 		if (ip->ip_id == 0)
264 			ip->ip_id = htons(ip_randomid());
265 
266 		/* XXX prevent ip_output from overwriting header fields */
267 		flags |= IP_RAWOUTPUT;
268 		ipstat_inc(ips_rawout);
269 	}
270 #ifdef INET6
271 	/*
272 	 * A thought:  Even though raw IP shouldn't be able to set IPv6
273 	 *             multicast options, if it does, the last parameter to
274 	 *             ip_output should be guarded against v6/v4 problems.
275 	 */
276 #endif
277 	/* force routing table */
278 	m->m_pkthdr.ph_rtableid = inp->inp_rtableid;
279 
280 #if NPF > 0
281 	if (inp->inp_socket->so_state & SS_ISCONNECTED &&
282 	    ip->ip_p != IPPROTO_ICMP)
283 		pf_mbuf_link_inpcb(m, inp);
284 #endif
285 
286 	error = ip_output(m, inp->inp_options, &inp->inp_route, flags,
287 	    inp->inp_moptions, inp, 0);
288 	return (error);
289 }
290 
291 struct mbuf *
292 rip_chkhdr(struct mbuf *m, struct mbuf *options)
293 {
294 	struct ip *ip;
295 	int hlen, opt, optlen, cnt;
296 	u_char *cp;
297 
298 	if (m->m_pkthdr.len < sizeof(struct ip)) {
299 		m_freem(m);
300 		return NULL;
301 	}
302 
303 	m = m_pullup(m, sizeof (struct ip));
304 	if (m == NULL)
305 		return NULL;
306 
307 	ip = mtod(m, struct ip *);
308 	hlen = ip->ip_hl << 2;
309 
310 	/* Don't allow packet length sizes that will crash. */
311 	if (hlen < sizeof (struct ip) ||
312 	    ntohs(ip->ip_len) < hlen ||
313 	    ntohs(ip->ip_len) != m->m_pkthdr.len) {
314 		m_freem(m);
315 		return NULL;
316 	}
317 	m = m_pullup(m, hlen);
318 	if (m == NULL)
319 		return NULL;
320 
321 	ip = mtod(m, struct ip *);
322 
323 	if (ip->ip_v != IPVERSION) {
324 		m_freem(m);
325 		return NULL;
326 	}
327 
328 	/*
329 	 * Don't allow both user specified and setsockopt options.
330 	 * If options are present verify them.
331 	 */
332 	if (hlen != sizeof(struct ip)) {
333 		if (options) {
334 			m_freem(m);
335 			return NULL;
336 		} else {
337 			cp = (u_char *)(ip + 1);
338 			cnt = hlen - sizeof(struct ip);
339 			for (; cnt > 0; cnt -= optlen, cp += optlen) {
340 				opt = cp[IPOPT_OPTVAL];
341 				if (opt == IPOPT_EOL)
342 					break;
343 				if (opt == IPOPT_NOP)
344 					optlen = 1;
345 				else {
346 					if (cnt < IPOPT_OLEN + sizeof(*cp)) {
347 						m_freem(m);
348 						return NULL;
349 					}
350 					optlen = cp[IPOPT_OLEN];
351 					if (optlen < IPOPT_OLEN + sizeof(*cp) ||
352 					    optlen > cnt) {
353 						m_freem(m);
354 						return NULL;
355 					}
356 				}
357 			}
358 		}
359 	}
360 
361 	return m;
362 }
363 
364 /*
365  * Raw IP socket option processing.
366  */
367 int
368 rip_ctloutput(int op, struct socket *so, int level, int optname,
369     struct mbuf *m)
370 {
371 	struct inpcb *inp = sotoinpcb(so);
372 	int error;
373 
374 	if (level != IPPROTO_IP)
375 		return (EINVAL);
376 
377 	switch (optname) {
378 
379 	case IP_HDRINCL:
380 		error = 0;
381 		if (op == PRCO_SETOPT) {
382 			if (m == NULL || m->m_len < sizeof (int))
383 				error = EINVAL;
384 			else if (*mtod(m, int *))
385 				inp->inp_flags |= INP_HDRINCL;
386 			else
387 				inp->inp_flags &= ~INP_HDRINCL;
388 		} else {
389 			m->m_len = sizeof(int);
390 			*mtod(m, int *) = inp->inp_flags & INP_HDRINCL;
391 		}
392 		return (error);
393 
394 	case MRT_INIT:
395 	case MRT_DONE:
396 	case MRT_ADD_VIF:
397 	case MRT_DEL_VIF:
398 	case MRT_ADD_MFC:
399 	case MRT_DEL_MFC:
400 	case MRT_VERSION:
401 	case MRT_ASSERT:
402 	case MRT_API_SUPPORT:
403 	case MRT_API_CONFIG:
404 #ifdef MROUTING
405 		switch (op) {
406 		case PRCO_SETOPT:
407 			error = ip_mrouter_set(so, optname, m);
408 			break;
409 		case PRCO_GETOPT:
410 			error = ip_mrouter_get(so, optname, m);
411 			break;
412 		default:
413 			error = EINVAL;
414 			break;
415 		}
416 		return (error);
417 #else
418 		return (EOPNOTSUPP);
419 #endif
420 	}
421 	return (ip_ctloutput(op, so, level, optname, m));
422 }
423 
424 u_long	rip_sendspace = RIPSNDQ;
425 u_long	rip_recvspace = RIPRCVQ;
426 
427 /*ARGSUSED*/
428 int
429 rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
430     struct mbuf *control, struct proc *p)
431 {
432 	struct inpcb *inp;
433 	int error = 0;
434 
435 	if (req == PRU_CONTROL)
436 		return (in_control(so, (u_long)m, (caddr_t)nam,
437 		    (struct ifnet *)control));
438 
439 	soassertlocked(so);
440 
441 	inp = sotoinpcb(so);
442 	if (inp == NULL) {
443 		error = EINVAL;
444 		goto release;
445 	}
446 
447 	switch (req) {
448 
449 	case PRU_DISCONNECT:
450 		if ((so->so_state & SS_ISCONNECTED) == 0) {
451 			error = ENOTCONN;
452 			break;
453 		}
454 		soisdisconnected(so);
455 		inp->inp_faddr.s_addr = INADDR_ANY;
456 		break;
457 	case PRU_ABORT:
458 		soisdisconnected(so);
459 		if (inp == NULL)
460 			panic("rip_abort");
461 #ifdef MROUTING
462 		if (so == ip_mrouter[inp->inp_rtableid])
463 			ip_mrouter_done(so);
464 #endif
465 		in_pcbdetach(inp);
466 		break;
467 
468 	case PRU_BIND:
469 	    {
470 		struct sockaddr_in *addr;
471 
472 		if ((error = in_nam2sin(nam, &addr)))
473 			break;
474 		if (!((so->so_options & SO_BINDANY) ||
475 		    addr->sin_addr.s_addr == INADDR_ANY ||
476 		    addr->sin_addr.s_addr == INADDR_BROADCAST ||
477 		    in_broadcast(addr->sin_addr, inp->inp_rtableid) ||
478 		    ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid))) {
479 			error = EADDRNOTAVAIL;
480 			break;
481 		}
482 		inp->inp_laddr = addr->sin_addr;
483 		break;
484 	    }
485 	case PRU_CONNECT:
486 	    {
487 		struct sockaddr_in *addr;
488 
489 		if ((error = in_nam2sin(nam, &addr)))
490 			break;
491 		inp->inp_faddr = addr->sin_addr;
492 		soisconnected(so);
493 		break;
494 	    }
495 
496 	case PRU_CONNECT2:
497 		error = EOPNOTSUPP;
498 		break;
499 
500 	/*
501 	 * Mark the connection as being incapable of further input.
502 	 */
503 	case PRU_SHUTDOWN:
504 		socantsendmore(so);
505 		break;
506 
507 	/*
508 	 * Ship a packet out.  The appropriate raw output
509 	 * routine handles any massaging necessary.
510 	 */
511 	case PRU_SEND:
512 	    {
513 		struct sockaddr_in dst;
514 
515 		memset(&dst, 0, sizeof(dst));
516 		dst.sin_family = AF_INET;
517 		dst.sin_len = sizeof(dst);
518 		if (so->so_state & SS_ISCONNECTED) {
519 			if (nam) {
520 				error = EISCONN;
521 				break;
522 			}
523 			dst.sin_addr = inp->inp_faddr;
524 		} else {
525 			struct sockaddr_in *addr;
526 
527 			if (nam == NULL) {
528 				error = ENOTCONN;
529 				break;
530 			}
531 			if ((error = in_nam2sin(nam, &addr)))
532 				break;
533 			dst.sin_addr = addr->sin_addr;
534 		}
535 #ifdef IPSEC
536 		/* XXX Find an IPsec TDB */
537 #endif
538 		error = rip_output(m, so, sintosa(&dst), NULL);
539 		m = NULL;
540 		break;
541 	    }
542 
543 	case PRU_SENSE:
544 		/*
545 		 * stat: don't bother with a blocksize.
546 		 */
547 		break;
548 
549 	/*
550 	 * Not supported.
551 	 */
552 	case PRU_LISTEN:
553 	case PRU_ACCEPT:
554 	case PRU_SENDOOB:
555 	case PRU_RCVD:
556 	case PRU_RCVOOB:
557 		error = EOPNOTSUPP;
558 		break;
559 
560 	case PRU_SOCKADDR:
561 		in_setsockaddr(inp, nam);
562 		break;
563 
564 	case PRU_PEERADDR:
565 		in_setpeeraddr(inp, nam);
566 		break;
567 
568 	default:
569 		panic("rip_usrreq");
570 	}
571 release:
572 	if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) {
573 		m_freem(control);
574 		m_freem(m);
575 	}
576 	return (error);
577 }
578 
579 int
580 rip_attach(struct socket *so, int proto)
581 {
582 	struct inpcb *inp;
583 	int error;
584 
585 	if (so->so_pcb)
586 		panic("rip_attach");
587 	if ((so->so_state & SS_PRIV) == 0)
588 		return EACCES;
589 	if (proto < 0 || proto >= IPPROTO_MAX)
590 		return EPROTONOSUPPORT;
591 
592 	if ((error = soreserve(so, rip_sendspace, rip_recvspace)))
593 		return error;
594 	NET_ASSERT_LOCKED();
595 	if ((error = in_pcballoc(so, &rawcbtable)))
596 		return error;
597 	inp = sotoinpcb(so);
598 	inp->inp_ip.ip_p = proto;
599 	return 0;
600 }
601 
602 int
603 rip_detach(struct socket *so)
604 {
605 	struct inpcb *inp = sotoinpcb(so);
606 
607 	soassertlocked(so);
608 
609 	if (inp == NULL)
610 		return (EINVAL);
611 
612 #ifdef MROUTING
613 	if (so == ip_mrouter[inp->inp_rtableid])
614 		ip_mrouter_done(so);
615 #endif
616 	in_pcbdetach(inp);
617 
618 	return (0);
619 }
620