xref: /openbsd-src/sys/netinet/raw_ip.c (revision 3374c67d44f9b75b98444cbf63020f777792342e)
1 /*	$OpenBSD: raw_ip.c,v 1.150 2022/10/17 14:49:02 mvs Exp $	*/
2 /*	$NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
33  *
34  * NRL grants permission for redistribution and use in source and binary
35  * forms, with or without modification, of the software and documentation
36  * created at NRL provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgements:
45  *	This product includes software developed by the University of
46  *	California, Berkeley and its contributors.
47  *	This product includes software developed at the Information
48  *	Technology Division, US Naval Research Laboratory.
49  * 4. Neither the name of the NRL nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
57  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64  *
65  * The views and conclusions contained in the software and documentation
66  * are those of the authors and should not be interpreted as representing
67  * official policies, either expressed or implied, of the US Naval
68  * Research Laboratory (NRL).
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/mbuf.h>
74 #include <sys/socket.h>
75 #include <sys/protosw.h>
76 #include <sys/socketvar.h>
77 
78 #include <net/if.h>
79 #include <net/if_var.h>
80 #include <net/route.h>
81 
82 #include <netinet/in.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip_mroute.h>
85 #include <netinet/ip_var.h>
86 #include <netinet/in_pcb.h>
87 #include <netinet/in_var.h>
88 #include <netinet/ip_icmp.h>
89 
90 #include <net/pfvar.h>
91 
92 #include "pf.h"
93 
94 struct inpcbtable rawcbtable;
95 
96 /*
97  * Nominal space allocated to a raw ip socket.
98  */
99 #define	RIPSNDQ		8192
100 #define	RIPRCVQ		8192
101 
102 /*
103  * Raw interface to IP protocol.
104  */
105 
106 const struct pr_usrreqs rip_usrreqs = {
107 	.pru_attach	= rip_attach,
108 	.pru_detach	= rip_detach,
109 	.pru_lock	= rip_lock,
110 	.pru_unlock	= rip_unlock,
111 	.pru_bind	= rip_bind,
112 	.pru_connect	= rip_connect,
113 	.pru_disconnect	= rip_disconnect,
114 	.pru_shutdown	= rip_shutdown,
115 	.pru_send	= rip_send,
116 	.pru_control	= in_control,
117 	.pru_sockaddr	= in_sockaddr,
118 	.pru_peeraddr	= in_peeraddr,
119 };
120 
121 /*
122  * Initialize raw connection block q.
123  */
124 void
125 rip_init(void)
126 {
127 	in_pcbinit(&rawcbtable, 1);
128 }
129 
130 struct mbuf	*rip_chkhdr(struct mbuf *, struct mbuf *);
131 
132 int
133 rip_input(struct mbuf **mp, int *offp, int proto, int af)
134 {
135 	struct mbuf *m = *mp;
136 	struct ip *ip = mtod(m, struct ip *);
137 	struct inpcb *inp;
138 	SIMPLEQ_HEAD(, inpcb) inpcblist;
139 	struct in_addr *key;
140 	struct counters_ref ref;
141 	uint64_t *counters;
142 	struct sockaddr_in ripsrc;
143 
144 	KASSERT(af == AF_INET);
145 
146 	memset(&ripsrc, 0, sizeof(ripsrc));
147 	ripsrc.sin_family = AF_INET;
148 	ripsrc.sin_len = sizeof(ripsrc);
149 	ripsrc.sin_addr = ip->ip_src;
150 
151 	key = &ip->ip_dst;
152 #if NPF > 0
153 	if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
154 		struct pf_divert *divert;
155 
156 		divert = pf_find_divert(m);
157 		KASSERT(divert != NULL);
158 		switch (divert->type) {
159 		case PF_DIVERT_TO:
160 			key = &divert->addr.v4;
161 			break;
162 		case PF_DIVERT_REPLY:
163 			break;
164 		default:
165 			panic("%s: unknown divert type %d, mbuf %p, divert %p",
166 			    __func__, divert->type, m, divert);
167 		}
168 	}
169 #endif
170 	SIMPLEQ_INIT(&inpcblist);
171 	rw_enter_write(&rawcbtable.inpt_notify);
172 	mtx_enter(&rawcbtable.inpt_mtx);
173 	TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
174 		if (inp->inp_socket->so_state & SS_CANTRCVMORE)
175 			continue;
176 #ifdef INET6
177 		if (inp->inp_flags & INP_IPV6)
178 			continue;
179 #endif
180 		if (rtable_l2(inp->inp_rtableid) !=
181 		    rtable_l2(m->m_pkthdr.ph_rtableid))
182 			continue;
183 
184 		if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p)
185 			continue;
186 		if (inp->inp_laddr.s_addr &&
187 		    inp->inp_laddr.s_addr != key->s_addr)
188 			continue;
189 		if (inp->inp_faddr.s_addr &&
190 		    inp->inp_faddr.s_addr != ip->ip_src.s_addr)
191 			continue;
192 
193 		in_pcbref(inp);
194 		SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify);
195 	}
196 	mtx_leave(&rawcbtable.inpt_mtx);
197 
198 	if (SIMPLEQ_EMPTY(&inpcblist)) {
199 		rw_exit_write(&rawcbtable.inpt_notify);
200 
201 		if (ip->ip_p != IPPROTO_ICMP)
202 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
203 			    0, 0);
204 		else
205 			m_freem(m);
206 
207 		counters = counters_enter(&ref, ipcounters);
208 		counters[ips_noproto]++;
209 		counters[ips_delivered]--;
210 		counters_leave(&ref, ipcounters);
211 
212 		return IPPROTO_DONE;
213 	}
214 
215 	while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
216 		struct mbuf *n, *opts = NULL;
217 
218 		SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify);
219 		if (SIMPLEQ_EMPTY(&inpcblist))
220 			n = m;
221 		else
222 			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
223 		if (n != NULL) {
224 			int ret;
225 
226 			if (inp->inp_flags & INP_CONTROLOPTS ||
227 			    inp->inp_socket->so_options & SO_TIMESTAMP)
228 				ip_savecontrol(inp, &opts, ip, n);
229 
230 			mtx_enter(&inp->inp_mtx);
231 			ret = sbappendaddr(inp->inp_socket,
232 			    &inp->inp_socket->so_rcv,
233 			    sintosa(&ripsrc), n, opts);
234 			mtx_leave(&inp->inp_mtx);
235 
236 			if (ret == 0) {
237 				/* should notify about lost packet */
238 				m_freem(n);
239 				m_freem(opts);
240 			} else
241 				sorwakeup(inp->inp_socket);
242 		}
243 		in_pcbunref(inp);
244 	}
245 	rw_exit_write(&rawcbtable.inpt_notify);
246 
247 	return IPPROTO_DONE;
248 }
249 
250 /*
251  * Generate IP header and pass packet to ip_output.
252  * Tack on options user may have setup with control call.
253  */
254 int
255 rip_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr,
256     struct mbuf *control)
257 {
258 	struct sockaddr_in *dst = satosin(dstaddr);
259 	struct ip *ip;
260 	struct inpcb *inp;
261 	int flags, error;
262 
263 	inp = sotoinpcb(so);
264 	flags = IP_ALLOWBROADCAST;
265 
266 	/*
267 	 * If the user handed us a complete IP packet, use it.
268 	 * Otherwise, allocate an mbuf for a header and fill it in.
269 	 */
270 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
271 		if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
272 			m_freem(m);
273 			return (EMSGSIZE);
274 		}
275 		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
276 		if (!m)
277 			return (ENOBUFS);
278 		ip = mtod(m, struct ip *);
279 		ip->ip_tos = inp->inp_ip.ip_tos;
280 		ip->ip_off = htons(0);
281 		ip->ip_p = inp->inp_ip.ip_p;
282 		ip->ip_len = htons(m->m_pkthdr.len);
283 		ip->ip_src.s_addr = INADDR_ANY;
284 		ip->ip_dst = dst->sin_addr;
285 		ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL;
286 	} else {
287 		if (m->m_pkthdr.len > IP_MAXPACKET) {
288 			m_freem(m);
289 			return (EMSGSIZE);
290 		}
291 
292 		m = rip_chkhdr(m, inp->inp_options);
293 		if (m == NULL)
294 			return (EINVAL);
295 
296 		ip = mtod(m, struct ip *);
297 		if (ip->ip_id == 0)
298 			ip->ip_id = htons(ip_randomid());
299 		dst->sin_addr = ip->ip_dst;
300 
301 		/* XXX prevent ip_output from overwriting header fields */
302 		flags |= IP_RAWOUTPUT;
303 		ipstat_inc(ips_rawout);
304 	}
305 
306 	if (ip->ip_src.s_addr == INADDR_ANY) {
307 		error = in_pcbselsrc(&ip->ip_src, dst, inp);
308 		if (error != 0)
309 			return (error);
310 	}
311 
312 #ifdef INET6
313 	/*
314 	 * A thought:  Even though raw IP shouldn't be able to set IPv6
315 	 *             multicast options, if it does, the last parameter to
316 	 *             ip_output should be guarded against v6/v4 problems.
317 	 */
318 #endif
319 	/* force routing table */
320 	m->m_pkthdr.ph_rtableid = inp->inp_rtableid;
321 
322 #if NPF > 0
323 	if (inp->inp_socket->so_state & SS_ISCONNECTED &&
324 	    ip->ip_p != IPPROTO_ICMP)
325 		pf_mbuf_link_inpcb(m, inp);
326 #endif
327 
328 	error = ip_output(m, inp->inp_options, &inp->inp_route, flags,
329 	    inp->inp_moptions, inp, 0);
330 	return (error);
331 }
332 
333 struct mbuf *
334 rip_chkhdr(struct mbuf *m, struct mbuf *options)
335 {
336 	struct ip *ip;
337 	int hlen, opt, optlen, cnt;
338 	u_char *cp;
339 
340 	if (m->m_pkthdr.len < sizeof(struct ip)) {
341 		m_freem(m);
342 		return NULL;
343 	}
344 
345 	m = m_pullup(m, sizeof (struct ip));
346 	if (m == NULL)
347 		return NULL;
348 
349 	ip = mtod(m, struct ip *);
350 	hlen = ip->ip_hl << 2;
351 
352 	/* Don't allow packet length sizes that will crash. */
353 	if (hlen < sizeof (struct ip) ||
354 	    ntohs(ip->ip_len) < hlen ||
355 	    ntohs(ip->ip_len) != m->m_pkthdr.len) {
356 		m_freem(m);
357 		return NULL;
358 	}
359 	m = m_pullup(m, hlen);
360 	if (m == NULL)
361 		return NULL;
362 
363 	ip = mtod(m, struct ip *);
364 
365 	if (ip->ip_v != IPVERSION) {
366 		m_freem(m);
367 		return NULL;
368 	}
369 
370 	/*
371 	 * Don't allow both user specified and setsockopt options.
372 	 * If options are present verify them.
373 	 */
374 	if (hlen != sizeof(struct ip)) {
375 		if (options) {
376 			m_freem(m);
377 			return NULL;
378 		} else {
379 			cp = (u_char *)(ip + 1);
380 			cnt = hlen - sizeof(struct ip);
381 			for (; cnt > 0; cnt -= optlen, cp += optlen) {
382 				opt = cp[IPOPT_OPTVAL];
383 				if (opt == IPOPT_EOL)
384 					break;
385 				if (opt == IPOPT_NOP)
386 					optlen = 1;
387 				else {
388 					if (cnt < IPOPT_OLEN + sizeof(*cp)) {
389 						m_freem(m);
390 						return NULL;
391 					}
392 					optlen = cp[IPOPT_OLEN];
393 					if (optlen < IPOPT_OLEN + sizeof(*cp) ||
394 					    optlen > cnt) {
395 						m_freem(m);
396 						return NULL;
397 					}
398 				}
399 			}
400 		}
401 	}
402 
403 	return m;
404 }
405 
406 /*
407  * Raw IP socket option processing.
408  */
409 int
410 rip_ctloutput(int op, struct socket *so, int level, int optname,
411     struct mbuf *m)
412 {
413 	struct inpcb *inp = sotoinpcb(so);
414 	int error;
415 
416 	if (level != IPPROTO_IP)
417 		return (EINVAL);
418 
419 	switch (optname) {
420 
421 	case IP_HDRINCL:
422 		error = 0;
423 		if (op == PRCO_SETOPT) {
424 			if (m == NULL || m->m_len < sizeof (int))
425 				error = EINVAL;
426 			else if (*mtod(m, int *))
427 				inp->inp_flags |= INP_HDRINCL;
428 			else
429 				inp->inp_flags &= ~INP_HDRINCL;
430 		} else {
431 			m->m_len = sizeof(int);
432 			*mtod(m, int *) = inp->inp_flags & INP_HDRINCL;
433 		}
434 		return (error);
435 
436 	case MRT_INIT:
437 	case MRT_DONE:
438 	case MRT_ADD_VIF:
439 	case MRT_DEL_VIF:
440 	case MRT_ADD_MFC:
441 	case MRT_DEL_MFC:
442 	case MRT_VERSION:
443 	case MRT_ASSERT:
444 	case MRT_API_SUPPORT:
445 	case MRT_API_CONFIG:
446 #ifdef MROUTING
447 		switch (op) {
448 		case PRCO_SETOPT:
449 			error = ip_mrouter_set(so, optname, m);
450 			break;
451 		case PRCO_GETOPT:
452 			error = ip_mrouter_get(so, optname, m);
453 			break;
454 		default:
455 			error = EINVAL;
456 			break;
457 		}
458 		return (error);
459 #else
460 		return (EOPNOTSUPP);
461 #endif
462 	}
463 	return (ip_ctloutput(op, so, level, optname, m));
464 }
465 
466 u_long	rip_sendspace = RIPSNDQ;
467 u_long	rip_recvspace = RIPRCVQ;
468 
469 int
470 rip_attach(struct socket *so, int proto, int wait)
471 {
472 	struct inpcb *inp;
473 	int error;
474 
475 	if (so->so_pcb)
476 		panic("rip_attach");
477 	if ((so->so_state & SS_PRIV) == 0)
478 		return EACCES;
479 	if (proto < 0 || proto >= IPPROTO_MAX)
480 		return EPROTONOSUPPORT;
481 
482 	if ((error = soreserve(so, rip_sendspace, rip_recvspace)))
483 		return error;
484 	NET_ASSERT_LOCKED();
485 	if ((error = in_pcballoc(so, &rawcbtable, wait)))
486 		return error;
487 	inp = sotoinpcb(so);
488 	inp->inp_ip.ip_p = proto;
489 	return 0;
490 }
491 
492 int
493 rip_detach(struct socket *so)
494 {
495 	struct inpcb *inp = sotoinpcb(so);
496 
497 	soassertlocked(so);
498 
499 	if (inp == NULL)
500 		return (EINVAL);
501 
502 #ifdef MROUTING
503 	if (so == ip_mrouter[inp->inp_rtableid])
504 		ip_mrouter_done(so);
505 #endif
506 	in_pcbdetach(inp);
507 
508 	return (0);
509 }
510 
511 void
512 rip_lock(struct socket *so)
513 {
514 	struct inpcb *inp = sotoinpcb(so);
515 
516 	NET_ASSERT_LOCKED();
517 	mtx_enter(&inp->inp_mtx);
518 }
519 
520 void
521 rip_unlock(struct socket *so)
522 {
523 	struct inpcb *inp = sotoinpcb(so);
524 
525 	NET_ASSERT_LOCKED();
526 	mtx_leave(&inp->inp_mtx);
527 }
528 
529 int
530 rip_bind(struct socket *so, struct mbuf *nam, struct proc *p)
531 {
532 	struct inpcb *inp = sotoinpcb(so);
533 	struct sockaddr_in *addr;
534 	int error;
535 
536 	soassertlocked(so);
537 
538 	if ((error = in_nam2sin(nam, &addr)))
539 		return (error);
540 
541 	if (!((so->so_options & SO_BINDANY) ||
542 	    addr->sin_addr.s_addr == INADDR_ANY ||
543 	    addr->sin_addr.s_addr == INADDR_BROADCAST ||
544 	    in_broadcast(addr->sin_addr, inp->inp_rtableid) ||
545 	    ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid)))
546 		return (EADDRNOTAVAIL);
547 
548 	inp->inp_laddr = addr->sin_addr;
549 
550 	return (0);
551 }
552 
553 int
554 rip_connect(struct socket *so, struct mbuf *nam)
555 {
556 	struct inpcb *inp = sotoinpcb(so);
557 	struct sockaddr_in *addr;
558 	int error;
559 
560 	soassertlocked(so);
561 
562 	if ((error = in_nam2sin(nam, &addr)))
563 		return (error);
564 
565 	inp->inp_faddr = addr->sin_addr;
566 	soisconnected(so);
567 
568 	return (0);
569 }
570 
571 int
572 rip_disconnect(struct socket *so)
573 {
574 	struct inpcb *inp = sotoinpcb(so);
575 
576 	soassertlocked(so);
577 
578 	if ((so->so_state & SS_ISCONNECTED) == 0)
579 		return (ENOTCONN);
580 
581 	soisdisconnected(so);
582 	inp->inp_faddr.s_addr = INADDR_ANY;
583 
584 	return (0);
585 }
586 
587 int
588 rip_shutdown(struct socket *so)
589 {
590 	/*
591 	 * Mark the connection as being incapable of further input.
592 	 */
593 
594 	soassertlocked(so);
595 	socantsendmore(so);
596 
597 	return (0);
598 }
599 
600 int
601 rip_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
602     struct mbuf *control)
603 {
604 	struct inpcb *inp = sotoinpcb(so);
605 	struct sockaddr_in dst;
606 	int error;
607 
608 	soassertlocked(so);
609 
610 	/*
611 	 * Ship a packet out.  The appropriate raw output
612 	 * routine handles any massaging necessary.
613 	 */
614 	memset(&dst, 0, sizeof(dst));
615 	dst.sin_family = AF_INET;
616 	dst.sin_len = sizeof(dst);
617 	if (so->so_state & SS_ISCONNECTED) {
618 		if (nam) {
619 			error = EISCONN;
620 			goto out;
621 		}
622 		dst.sin_addr = inp->inp_faddr;
623 	} else {
624 		struct sockaddr_in *addr;
625 
626 		if (nam == NULL) {
627 			error = ENOTCONN;
628 			goto out;
629 		}
630 		if ((error = in_nam2sin(nam, &addr)))
631 			goto out;
632 		dst.sin_addr = addr->sin_addr;
633 	}
634 #ifdef IPSEC
635 	/* XXX Find an IPsec TDB */
636 #endif
637 	error = rip_output(m, so, sintosa(&dst), NULL);
638 	m = NULL;
639 
640 out:
641 	m_freem(control);
642 	m_freem(m);
643 
644 	return (error);
645 }
646