xref: /openbsd-src/sys/netinet/raw_ip.c (revision f84b1df5a16cdd762c93854218de246e79975d3b)
1 /*	$OpenBSD: raw_ip.c,v 1.127 2022/03/23 17:22:28 bluhm Exp $	*/
2 /*	$NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
33  *
34  * NRL grants permission for redistribution and use in source and binary
35  * forms, with or without modification, of the software and documentation
36  * created at NRL provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgements:
45  *	This product includes software developed by the University of
46  *	California, Berkeley and its contributors.
47  *	This product includes software developed at the Information
48  *	Technology Division, US Naval Research Laboratory.
49  * 4. Neither the name of the NRL nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
57  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64  *
65  * The views and conclusions contained in the software and documentation
66  * are those of the authors and should not be interpreted as representing
67  * official policies, either expressed or implied, of the US Naval
68  * Research Laboratory (NRL).
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/mbuf.h>
74 #include <sys/socket.h>
75 #include <sys/protosw.h>
76 #include <sys/socketvar.h>
77 
78 #include <net/if.h>
79 #include <net/if_var.h>
80 #include <net/route.h>
81 
82 #include <netinet/in.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip_mroute.h>
85 #include <netinet/ip_var.h>
86 #include <netinet/in_pcb.h>
87 #include <netinet/in_var.h>
88 #include <netinet/ip_icmp.h>
89 
90 #include <net/pfvar.h>
91 
92 #include "pf.h"
93 
94 struct inpcbtable rawcbtable;
95 
96 /*
97  * Nominal space allocated to a raw ip socket.
98  */
99 #define	RIPSNDQ		8192
100 #define	RIPRCVQ		8192
101 
102 /*
103  * Raw interface to IP protocol.
104  */
105 
106 /*
107  * Initialize raw connection block q.
108  */
109 void
110 rip_init(void)
111 {
112 	in_pcbinit(&rawcbtable, 1);
113 }
114 
115 struct mbuf	*rip_chkhdr(struct mbuf *, struct mbuf *);
116 
117 int
118 rip_input(struct mbuf **mp, int *offp, int proto, int af)
119 {
120 	struct mbuf *m = *mp;
121 	struct ip *ip = mtod(m, struct ip *);
122 	struct inpcb *inp;
123 	SIMPLEQ_HEAD(, inpcb) inpcblist;
124 	struct in_addr *key;
125 	struct counters_ref ref;
126 	uint64_t *counters;
127 	struct sockaddr_in ripsrc;
128 
129 	KASSERT(af == AF_INET);
130 
131 	memset(&ripsrc, 0, sizeof(ripsrc));
132 	ripsrc.sin_family = AF_INET;
133 	ripsrc.sin_len = sizeof(ripsrc);
134 	ripsrc.sin_addr = ip->ip_src;
135 
136 	key = &ip->ip_dst;
137 #if NPF > 0
138 	if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
139 		struct pf_divert *divert;
140 
141 		divert = pf_find_divert(m);
142 		KASSERT(divert != NULL);
143 		switch (divert->type) {
144 		case PF_DIVERT_TO:
145 			key = &divert->addr.v4;
146 			break;
147 		case PF_DIVERT_REPLY:
148 			break;
149 		default:
150 			panic("%s: unknown divert type %d, mbuf %p, divert %p",
151 			    __func__, divert->type, m, divert);
152 		}
153 	}
154 #endif
155 	NET_ASSERT_WLOCKED();
156 	SIMPLEQ_INIT(&inpcblist);
157 	mtx_enter(&rawcbtable.inpt_mtx);
158 	TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
159 		if (inp->inp_socket->so_state & SS_CANTRCVMORE)
160 			continue;
161 #ifdef INET6
162 		if (inp->inp_flags & INP_IPV6)
163 			continue;
164 #endif
165 		if (rtable_l2(inp->inp_rtableid) !=
166 		    rtable_l2(m->m_pkthdr.ph_rtableid))
167 			continue;
168 
169 		if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p)
170 			continue;
171 		if (inp->inp_laddr.s_addr &&
172 		    inp->inp_laddr.s_addr != key->s_addr)
173 			continue;
174 		if (inp->inp_faddr.s_addr &&
175 		    inp->inp_faddr.s_addr != ip->ip_src.s_addr)
176 			continue;
177 
178 		in_pcbref(inp);
179 		SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify);
180 	}
181 	mtx_leave(&rawcbtable.inpt_mtx);
182 
183 	if (SIMPLEQ_EMPTY(&inpcblist)) {
184 		if (ip->ip_p != IPPROTO_ICMP)
185 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
186 			    0, 0);
187 		else
188 			m_freem(m);
189 
190 		counters = counters_enter(&ref, ipcounters);
191 		counters[ips_noproto]++;
192 		counters[ips_delivered]--;
193 		counters_leave(&ref, ipcounters);
194 	}
195 
196 	while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
197 		struct mbuf *n, *opts = NULL;
198 
199 		SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify);
200 		if (SIMPLEQ_EMPTY(&inpcblist))
201 			n = m;
202 		else
203 			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
204 		if (n != NULL) {
205 			if (inp->inp_flags & INP_CONTROLOPTS ||
206 			    inp->inp_socket->so_options & SO_TIMESTAMP)
207 				ip_savecontrol(inp, &opts, ip, n);
208 			if (sbappendaddr(inp->inp_socket,
209 			    &inp->inp_socket->so_rcv,
210 			    sintosa(&ripsrc), n, opts) == 0) {
211 				/* should notify about lost packet */
212 				m_freem(n);
213 				m_freem(opts);
214 			} else
215 				sorwakeup(inp->inp_socket);
216 		}
217 		in_pcbunref(inp);
218 	}
219 	return IPPROTO_DONE;
220 }
221 
222 /*
223  * Generate IP header and pass packet to ip_output.
224  * Tack on options user may have setup with control call.
225  */
226 int
227 rip_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr,
228     struct mbuf *control)
229 {
230 	struct sockaddr_in *dst = satosin(dstaddr);
231 	struct ip *ip;
232 	struct inpcb *inp;
233 	int flags, error;
234 
235 	inp = sotoinpcb(so);
236 	flags = IP_ALLOWBROADCAST;
237 
238 	/*
239 	 * If the user handed us a complete IP packet, use it.
240 	 * Otherwise, allocate an mbuf for a header and fill it in.
241 	 */
242 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
243 		if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
244 			m_freem(m);
245 			return (EMSGSIZE);
246 		}
247 		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
248 		if (!m)
249 			return (ENOBUFS);
250 		ip = mtod(m, struct ip *);
251 		ip->ip_tos = inp->inp_ip.ip_tos;
252 		ip->ip_off = htons(0);
253 		ip->ip_p = inp->inp_ip.ip_p;
254 		ip->ip_len = htons(m->m_pkthdr.len);
255 		ip->ip_src.s_addr = INADDR_ANY;
256 		ip->ip_dst = dst->sin_addr;
257 		ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL;
258 	} else {
259 		if (m->m_pkthdr.len > IP_MAXPACKET) {
260 			m_freem(m);
261 			return (EMSGSIZE);
262 		}
263 
264 		m = rip_chkhdr(m, inp->inp_options);
265 		if (m == NULL)
266 			return (EINVAL);
267 
268 		ip = mtod(m, struct ip *);
269 		if (ip->ip_id == 0)
270 			ip->ip_id = htons(ip_randomid());
271 		dst->sin_addr = ip->ip_dst;
272 
273 		/* XXX prevent ip_output from overwriting header fields */
274 		flags |= IP_RAWOUTPUT;
275 		ipstat_inc(ips_rawout);
276 	}
277 
278 	if (ip->ip_src.s_addr == INADDR_ANY) {
279 		struct in_addr *laddr;
280 
281 		error = in_pcbselsrc(&laddr, dst, inp);
282 		if (error != 0)
283 			return (error);
284 
285 		ip->ip_src = *laddr;
286 	}
287 
288 #ifdef INET6
289 	/*
290 	 * A thought:  Even though raw IP shouldn't be able to set IPv6
291 	 *             multicast options, if it does, the last parameter to
292 	 *             ip_output should be guarded against v6/v4 problems.
293 	 */
294 #endif
295 	/* force routing table */
296 	m->m_pkthdr.ph_rtableid = inp->inp_rtableid;
297 
298 #if NPF > 0
299 	if (inp->inp_socket->so_state & SS_ISCONNECTED &&
300 	    ip->ip_p != IPPROTO_ICMP)
301 		pf_mbuf_link_inpcb(m, inp);
302 #endif
303 
304 	error = ip_output(m, inp->inp_options, &inp->inp_route, flags,
305 	    inp->inp_moptions, inp, 0);
306 	return (error);
307 }
308 
309 struct mbuf *
310 rip_chkhdr(struct mbuf *m, struct mbuf *options)
311 {
312 	struct ip *ip;
313 	int hlen, opt, optlen, cnt;
314 	u_char *cp;
315 
316 	if (m->m_pkthdr.len < sizeof(struct ip)) {
317 		m_freem(m);
318 		return NULL;
319 	}
320 
321 	m = m_pullup(m, sizeof (struct ip));
322 	if (m == NULL)
323 		return NULL;
324 
325 	ip = mtod(m, struct ip *);
326 	hlen = ip->ip_hl << 2;
327 
328 	/* Don't allow packet length sizes that will crash. */
329 	if (hlen < sizeof (struct ip) ||
330 	    ntohs(ip->ip_len) < hlen ||
331 	    ntohs(ip->ip_len) != m->m_pkthdr.len) {
332 		m_freem(m);
333 		return NULL;
334 	}
335 	m = m_pullup(m, hlen);
336 	if (m == NULL)
337 		return NULL;
338 
339 	ip = mtod(m, struct ip *);
340 
341 	if (ip->ip_v != IPVERSION) {
342 		m_freem(m);
343 		return NULL;
344 	}
345 
346 	/*
347 	 * Don't allow both user specified and setsockopt options.
348 	 * If options are present verify them.
349 	 */
350 	if (hlen != sizeof(struct ip)) {
351 		if (options) {
352 			m_freem(m);
353 			return NULL;
354 		} else {
355 			cp = (u_char *)(ip + 1);
356 			cnt = hlen - sizeof(struct ip);
357 			for (; cnt > 0; cnt -= optlen, cp += optlen) {
358 				opt = cp[IPOPT_OPTVAL];
359 				if (opt == IPOPT_EOL)
360 					break;
361 				if (opt == IPOPT_NOP)
362 					optlen = 1;
363 				else {
364 					if (cnt < IPOPT_OLEN + sizeof(*cp)) {
365 						m_freem(m);
366 						return NULL;
367 					}
368 					optlen = cp[IPOPT_OLEN];
369 					if (optlen < IPOPT_OLEN + sizeof(*cp) ||
370 					    optlen > cnt) {
371 						m_freem(m);
372 						return NULL;
373 					}
374 				}
375 			}
376 		}
377 	}
378 
379 	return m;
380 }
381 
382 /*
383  * Raw IP socket option processing.
384  */
385 int
386 rip_ctloutput(int op, struct socket *so, int level, int optname,
387     struct mbuf *m)
388 {
389 	struct inpcb *inp = sotoinpcb(so);
390 	int error;
391 
392 	if (level != IPPROTO_IP)
393 		return (EINVAL);
394 
395 	switch (optname) {
396 
397 	case IP_HDRINCL:
398 		error = 0;
399 		if (op == PRCO_SETOPT) {
400 			if (m == NULL || m->m_len < sizeof (int))
401 				error = EINVAL;
402 			else if (*mtod(m, int *))
403 				inp->inp_flags |= INP_HDRINCL;
404 			else
405 				inp->inp_flags &= ~INP_HDRINCL;
406 		} else {
407 			m->m_len = sizeof(int);
408 			*mtod(m, int *) = inp->inp_flags & INP_HDRINCL;
409 		}
410 		return (error);
411 
412 	case MRT_INIT:
413 	case MRT_DONE:
414 	case MRT_ADD_VIF:
415 	case MRT_DEL_VIF:
416 	case MRT_ADD_MFC:
417 	case MRT_DEL_MFC:
418 	case MRT_VERSION:
419 	case MRT_ASSERT:
420 	case MRT_API_SUPPORT:
421 	case MRT_API_CONFIG:
422 #ifdef MROUTING
423 		switch (op) {
424 		case PRCO_SETOPT:
425 			error = ip_mrouter_set(so, optname, m);
426 			break;
427 		case PRCO_GETOPT:
428 			error = ip_mrouter_get(so, optname, m);
429 			break;
430 		default:
431 			error = EINVAL;
432 			break;
433 		}
434 		return (error);
435 #else
436 		return (EOPNOTSUPP);
437 #endif
438 	}
439 	return (ip_ctloutput(op, so, level, optname, m));
440 }
441 
442 u_long	rip_sendspace = RIPSNDQ;
443 u_long	rip_recvspace = RIPRCVQ;
444 
445 /*ARGSUSED*/
446 int
447 rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
448     struct mbuf *control, struct proc *p)
449 {
450 	struct inpcb *inp;
451 	int error = 0;
452 
453 	if (req == PRU_CONTROL)
454 		return (in_control(so, (u_long)m, (caddr_t)nam,
455 		    (struct ifnet *)control));
456 
457 	soassertlocked(so);
458 
459 	inp = sotoinpcb(so);
460 	if (inp == NULL) {
461 		error = EINVAL;
462 		goto release;
463 	}
464 
465 	switch (req) {
466 
467 	case PRU_DISCONNECT:
468 		if ((so->so_state & SS_ISCONNECTED) == 0) {
469 			error = ENOTCONN;
470 			break;
471 		}
472 		soisdisconnected(so);
473 		inp->inp_faddr.s_addr = INADDR_ANY;
474 		break;
475 	case PRU_ABORT:
476 		soisdisconnected(so);
477 		if (inp == NULL)
478 			panic("rip_abort");
479 #ifdef MROUTING
480 		if (so == ip_mrouter[inp->inp_rtableid])
481 			ip_mrouter_done(so);
482 #endif
483 		in_pcbdetach(inp);
484 		break;
485 
486 	case PRU_BIND:
487 	    {
488 		struct sockaddr_in *addr;
489 
490 		if ((error = in_nam2sin(nam, &addr)))
491 			break;
492 		if (!((so->so_options & SO_BINDANY) ||
493 		    addr->sin_addr.s_addr == INADDR_ANY ||
494 		    addr->sin_addr.s_addr == INADDR_BROADCAST ||
495 		    in_broadcast(addr->sin_addr, inp->inp_rtableid) ||
496 		    ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid))) {
497 			error = EADDRNOTAVAIL;
498 			break;
499 		}
500 		inp->inp_laddr = addr->sin_addr;
501 		break;
502 	    }
503 	case PRU_CONNECT:
504 	    {
505 		struct sockaddr_in *addr;
506 
507 		if ((error = in_nam2sin(nam, &addr)))
508 			break;
509 		inp->inp_faddr = addr->sin_addr;
510 		soisconnected(so);
511 		break;
512 	    }
513 
514 	case PRU_CONNECT2:
515 		error = EOPNOTSUPP;
516 		break;
517 
518 	/*
519 	 * Mark the connection as being incapable of further input.
520 	 */
521 	case PRU_SHUTDOWN:
522 		socantsendmore(so);
523 		break;
524 
525 	/*
526 	 * Ship a packet out.  The appropriate raw output
527 	 * routine handles any massaging necessary.
528 	 */
529 	case PRU_SEND:
530 	    {
531 		struct sockaddr_in dst;
532 
533 		memset(&dst, 0, sizeof(dst));
534 		dst.sin_family = AF_INET;
535 		dst.sin_len = sizeof(dst);
536 		if (so->so_state & SS_ISCONNECTED) {
537 			if (nam) {
538 				error = EISCONN;
539 				break;
540 			}
541 			dst.sin_addr = inp->inp_faddr;
542 		} else {
543 			struct sockaddr_in *addr;
544 
545 			if (nam == NULL) {
546 				error = ENOTCONN;
547 				break;
548 			}
549 			if ((error = in_nam2sin(nam, &addr)))
550 				break;
551 			dst.sin_addr = addr->sin_addr;
552 		}
553 #ifdef IPSEC
554 		/* XXX Find an IPsec TDB */
555 #endif
556 		error = rip_output(m, so, sintosa(&dst), NULL);
557 		m = NULL;
558 		break;
559 	    }
560 
561 	case PRU_SENSE:
562 		/*
563 		 * stat: don't bother with a blocksize.
564 		 */
565 		break;
566 
567 	/*
568 	 * Not supported.
569 	 */
570 	case PRU_LISTEN:
571 	case PRU_ACCEPT:
572 	case PRU_SENDOOB:
573 	case PRU_RCVD:
574 	case PRU_RCVOOB:
575 		error = EOPNOTSUPP;
576 		break;
577 
578 	case PRU_SOCKADDR:
579 		in_setsockaddr(inp, nam);
580 		break;
581 
582 	case PRU_PEERADDR:
583 		in_setpeeraddr(inp, nam);
584 		break;
585 
586 	default:
587 		panic("rip_usrreq");
588 	}
589 release:
590 	if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) {
591 		m_freem(control);
592 		m_freem(m);
593 	}
594 	return (error);
595 }
596 
597 int
598 rip_attach(struct socket *so, int proto)
599 {
600 	struct inpcb *inp;
601 	int error;
602 
603 	if (so->so_pcb)
604 		panic("rip_attach");
605 	if ((so->so_state & SS_PRIV) == 0)
606 		return EACCES;
607 	if (proto < 0 || proto >= IPPROTO_MAX)
608 		return EPROTONOSUPPORT;
609 
610 	if ((error = soreserve(so, rip_sendspace, rip_recvspace)))
611 		return error;
612 	NET_ASSERT_LOCKED();
613 	if ((error = in_pcballoc(so, &rawcbtable)))
614 		return error;
615 	inp = sotoinpcb(so);
616 	inp->inp_ip.ip_p = proto;
617 	return 0;
618 }
619 
620 int
621 rip_detach(struct socket *so)
622 {
623 	struct inpcb *inp = sotoinpcb(so);
624 
625 	soassertlocked(so);
626 
627 	if (inp == NULL)
628 		return (EINVAL);
629 
630 #ifdef MROUTING
631 	if (so == ip_mrouter[inp->inp_rtableid])
632 		ip_mrouter_done(so);
633 #endif
634 	in_pcbdetach(inp);
635 
636 	return (0);
637 }
638