xref: /openbsd-src/sys/netinet/raw_ip.c (revision 897fc685943471cf985a0fe38ba076ea6fe74fa5)
1 /*	$OpenBSD: raw_ip.c,v 1.108 2017/12/04 13:40:34 bluhm Exp $	*/
2 /*	$NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
33  *
34  * NRL grants permission for redistribution and use in source and binary
35  * forms, with or without modification, of the software and documentation
36  * created at NRL provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgements:
45  *	This product includes software developed by the University of
46  *	California, Berkeley and its contributors.
47  *	This product includes software developed at the Information
48  *	Technology Division, US Naval Research Laboratory.
49  * 4. Neither the name of the NRL nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
57  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64  *
65  * The views and conclusions contained in the software and documentation
66  * are those of the authors and should not be interpreted as representing
67  * official policies, either expressed or implied, of the US Naval
68  * Research Laboratory (NRL).
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/mbuf.h>
74 #include <sys/socket.h>
75 #include <sys/protosw.h>
76 #include <sys/socketvar.h>
77 
78 #include <net/if.h>
79 #include <net/if_var.h>
80 #include <net/route.h>
81 
82 #include <netinet/in.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip_mroute.h>
85 #include <netinet/ip_var.h>
86 #include <netinet/in_pcb.h>
87 #include <netinet/in_var.h>
88 #include <netinet/ip_icmp.h>
89 
90 #include <net/pfvar.h>
91 
92 #include "pf.h"
93 
94 struct inpcbtable rawcbtable;
95 
96 /*
97  * Nominal space allocated to a raw ip socket.
98  */
99 #define	RIPSNDQ		8192
100 #define	RIPRCVQ		8192
101 
102 /*
103  * Raw interface to IP protocol.
104  */
105 
106 /*
107  * Initialize raw connection block q.
108  */
109 void
110 rip_init(void)
111 {
112 
113 	in_pcbinit(&rawcbtable, 1);
114 }
115 
116 struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
117 
118 int
119 rip_input(struct mbuf **mp, int *offp, int proto, int af)
120 {
121 	struct mbuf *m = *mp;
122 	struct ip *ip = mtod(m, struct ip *);
123 	struct inpcb *inp, *last = NULL;
124 	struct in_addr *key;
125 	struct mbuf *opts = NULL;
126 	struct counters_ref ref;
127 	uint64_t *counters;
128 
129 	KASSERT(af == AF_INET);
130 
131 	ripsrc.sin_addr = ip->ip_src;
132 	key = &ip->ip_dst;
133 #if NPF > 0
134 	if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
135 		struct pf_divert *divert;
136 
137 		divert = pf_find_divert(m);
138 		KASSERT(divert != NULL);
139 		switch (divert->type) {
140 		case PF_DIVERT_TO:
141 			key = &divert->addr.v4;
142 			break;
143 		case PF_DIVERT_REPLY:
144 			break;
145 		default:
146 			panic("%s: unknown divert type %d, mbuf %p, divert %p",
147 			    __func__, divert->type, m, divert);
148 		}
149 	}
150 #endif
151 	NET_ASSERT_LOCKED();
152 	TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
153 		if (inp->inp_socket->so_state & SS_CANTRCVMORE)
154 			continue;
155 #ifdef INET6
156 		if (inp->inp_flags & INP_IPV6)
157 			continue;
158 #endif
159 		if (rtable_l2(inp->inp_rtableid) !=
160 		    rtable_l2(m->m_pkthdr.ph_rtableid))
161 			continue;
162 
163 		if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p)
164 			continue;
165 		if (inp->inp_laddr.s_addr &&
166 		    inp->inp_laddr.s_addr != key->s_addr)
167 			continue;
168 		if (inp->inp_faddr.s_addr &&
169 		    inp->inp_faddr.s_addr != ip->ip_src.s_addr)
170 			continue;
171 		if (last) {
172 			struct mbuf *n;
173 
174 			if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
175 				if (last->inp_flags & INP_CONTROLOPTS ||
176 				    last->inp_socket->so_options & SO_TIMESTAMP)
177 					ip_savecontrol(last, &opts, ip, n);
178 				if (sbappendaddr(last->inp_socket,
179 				    &last->inp_socket->so_rcv,
180 				    sintosa(&ripsrc), n, opts) == 0) {
181 					/* should notify about lost packet */
182 					m_freem(n);
183 					m_freem(opts);
184 				} else
185 					sorwakeup(last->inp_socket);
186 				opts = NULL;
187 			}
188 		}
189 		last = inp;
190 	}
191 	if (last) {
192 		if (last->inp_flags & INP_CONTROLOPTS ||
193 		    last->inp_socket->so_options & SO_TIMESTAMP)
194 			ip_savecontrol(last, &opts, ip, m);
195 		if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv,
196 		    sintosa(&ripsrc), m, opts) == 0) {
197 			m_freem(m);
198 			m_freem(opts);
199 		} else
200 			sorwakeup(last->inp_socket);
201 	} else {
202 		if (ip->ip_p != IPPROTO_ICMP)
203 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0);
204 		else
205 			m_freem(m);
206 
207 		counters = counters_enter(&ref, ipcounters);
208 		counters[ips_noproto]++;
209 		counters[ips_delivered]--;
210 		counters_leave(&ref, ipcounters);
211 	}
212 	return IPPROTO_DONE;
213 }
214 
215 /*
216  * Generate IP header and pass packet to ip_output.
217  * Tack on options user may have setup with control call.
218  */
219 int
220 rip_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr,
221     struct mbuf *control)
222 {
223 	struct ip *ip;
224 	struct inpcb *inp;
225 	int flags, error;
226 
227 	inp = sotoinpcb(so);
228 	flags = IP_ALLOWBROADCAST;
229 
230 	/*
231 	 * If the user handed us a complete IP packet, use it.
232 	 * Otherwise, allocate an mbuf for a header and fill it in.
233 	 */
234 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
235 		if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
236 			m_freem(m);
237 			return (EMSGSIZE);
238 		}
239 		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
240 		if (!m)
241 			return (ENOBUFS);
242 		ip = mtod(m, struct ip *);
243 		ip->ip_tos = inp->inp_ip.ip_tos;
244 		ip->ip_off = htons(0);
245 		ip->ip_p = inp->inp_ip.ip_p;
246 		ip->ip_len = htons(m->m_pkthdr.len);
247 		ip->ip_src = inp->inp_laddr;
248 		ip->ip_dst = satosin(dstaddr)->sin_addr;
249 		ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL;
250 	} else {
251 		if (m->m_pkthdr.len > IP_MAXPACKET) {
252 			m_freem(m);
253 			return (EMSGSIZE);
254 		}
255 		if (m->m_pkthdr.len < sizeof(struct ip)) {
256 			m_freem(m);
257 			return (EINVAL);
258 		}
259 		ip = mtod(m, struct ip *);
260 		/*
261 		 * don't allow both user specified and setsockopt options,
262 		 * and don't allow packet length sizes that will crash
263 		 */
264 		if ((ip->ip_hl != (sizeof (*ip) >> 2) && inp->inp_options) ||
265 		    ntohs(ip->ip_len) > m->m_pkthdr.len ||
266 		    ntohs(ip->ip_len) < ip->ip_hl << 2) {
267 			m_freem(m);
268 			return (EINVAL);
269 		}
270 		if (ip->ip_id == 0) {
271 			ip->ip_id = htons(ip_randomid());
272 		}
273 		/* XXX prevent ip_output from overwriting header fields */
274 		flags |= IP_RAWOUTPUT;
275 		ipstat_inc(ips_rawout);
276 	}
277 #ifdef INET6
278 	/*
279 	 * A thought:  Even though raw IP shouldn't be able to set IPv6
280 	 *             multicast options, if it does, the last parameter to
281 	 *             ip_output should be guarded against v6/v4 problems.
282 	 */
283 #endif
284 	/* force routing table */
285 	m->m_pkthdr.ph_rtableid = inp->inp_rtableid;
286 
287 #if NPF > 0
288 	if (inp->inp_socket->so_state & SS_ISCONNECTED &&
289 	    ip->ip_p != IPPROTO_ICMP)
290 		m->m_pkthdr.pf.inp = inp;
291 #endif
292 
293 	error = ip_output(m, inp->inp_options, &inp->inp_route, flags,
294 	    inp->inp_moptions, inp, 0);
295 	if (error == EACCES)	/* translate pf(4) error for userland */
296 		error = EHOSTUNREACH;
297 	return (error);
298 }
299 
300 /*
301  * Raw IP socket option processing.
302  */
303 int
304 rip_ctloutput(int op, struct socket *so, int level, int optname,
305     struct mbuf *m)
306 {
307 	struct inpcb *inp = sotoinpcb(so);
308 	int error;
309 
310 	if (level != IPPROTO_IP)
311 		return (EINVAL);
312 
313 	switch (optname) {
314 
315 	case IP_HDRINCL:
316 		error = 0;
317 		if (op == PRCO_SETOPT) {
318 			if (m == NULL || m->m_len < sizeof (int))
319 				error = EINVAL;
320 			else if (*mtod(m, int *))
321 				inp->inp_flags |= INP_HDRINCL;
322 			else
323 				inp->inp_flags &= ~INP_HDRINCL;
324 		} else {
325 			m->m_len = sizeof(int);
326 			*mtod(m, int *) = inp->inp_flags & INP_HDRINCL;
327 		}
328 		return (error);
329 
330 	case MRT_INIT:
331 	case MRT_DONE:
332 	case MRT_ADD_VIF:
333 	case MRT_DEL_VIF:
334 	case MRT_ADD_MFC:
335 	case MRT_DEL_MFC:
336 	case MRT_VERSION:
337 	case MRT_ASSERT:
338 	case MRT_API_SUPPORT:
339 	case MRT_API_CONFIG:
340 #ifdef MROUTING
341 		switch (op) {
342 		case PRCO_SETOPT:
343 			error = ip_mrouter_set(so, optname, m);
344 			break;
345 		case PRCO_GETOPT:
346 			error = ip_mrouter_get(so, optname, m);
347 			break;
348 		default:
349 			error = EINVAL;
350 			break;
351 		}
352 		return (error);
353 #else
354 		return (EOPNOTSUPP);
355 #endif
356 	}
357 	return (ip_ctloutput(op, so, level, optname, m));
358 }
359 
360 u_long	rip_sendspace = RIPSNDQ;
361 u_long	rip_recvspace = RIPRCVQ;
362 
363 /*ARGSUSED*/
364 int
365 rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
366     struct mbuf *control, struct proc *p)
367 {
368 	struct inpcb *inp = sotoinpcb(so);
369 	int error = 0;
370 
371 	soassertlocked(so);
372 
373 	if (req == PRU_CONTROL)
374 		return (in_control(so, (u_long)m, (caddr_t)nam,
375 		    (struct ifnet *)control));
376 
377 	if (inp == NULL) {
378 		error = EINVAL;
379 		goto release;
380 	}
381 
382 	switch (req) {
383 
384 	case PRU_DISCONNECT:
385 		if ((so->so_state & SS_ISCONNECTED) == 0) {
386 			error = ENOTCONN;
387 			break;
388 		}
389 		/* FALLTHROUGH */
390 	case PRU_ABORT:
391 		soisdisconnected(so);
392 		if (inp == NULL)
393 			panic("rip_abort");
394 #ifdef MROUTING
395 		if (so == ip_mrouter[inp->inp_rtableid])
396 			ip_mrouter_done(so);
397 #endif
398 		in_pcbdetach(inp);
399 		break;
400 
401 	case PRU_BIND:
402 	    {
403 		struct sockaddr_in *addr;
404 
405 		if ((error = in_nam2sin(nam, &addr)))
406 			break;
407 		if (!((so->so_options & SO_BINDANY) ||
408 		    addr->sin_addr.s_addr == INADDR_ANY ||
409 		    addr->sin_addr.s_addr == INADDR_BROADCAST ||
410 		    in_broadcast(addr->sin_addr, inp->inp_rtableid) ||
411 		    ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid))) {
412 			error = EADDRNOTAVAIL;
413 			break;
414 		}
415 		inp->inp_laddr = addr->sin_addr;
416 		break;
417 	    }
418 	case PRU_CONNECT:
419 	    {
420 		struct sockaddr_in *addr;
421 
422 		if ((error = in_nam2sin(nam, &addr)))
423 			break;
424 		inp->inp_faddr = addr->sin_addr;
425 		soisconnected(so);
426 		break;
427 	    }
428 
429 	case PRU_CONNECT2:
430 		error = EOPNOTSUPP;
431 		break;
432 
433 	/*
434 	 * Mark the connection as being incapable of further input.
435 	 */
436 	case PRU_SHUTDOWN:
437 		socantsendmore(so);
438 		break;
439 
440 	/*
441 	 * Ship a packet out.  The appropriate raw output
442 	 * routine handles any massaging necessary.
443 	 */
444 	case PRU_SEND:
445 	    {
446 		struct sockaddr_in dst;
447 
448 		memset(&dst, 0, sizeof(dst));
449 		dst.sin_family = AF_INET;
450 		dst.sin_len = sizeof(dst);
451 		if (so->so_state & SS_ISCONNECTED) {
452 			if (nam) {
453 				error = EISCONN;
454 				break;
455 			}
456 			dst.sin_addr = inp->inp_faddr;
457 		} else {
458 			struct sockaddr_in *addr;
459 
460 			if (nam == NULL) {
461 				error = ENOTCONN;
462 				break;
463 			}
464 			if ((error = in_nam2sin(nam, &addr)))
465 				break;
466 			dst.sin_addr = addr->sin_addr;
467 		}
468 #ifdef IPSEC
469 		/* XXX Find an IPsec TDB */
470 #endif
471 		error = rip_output(m, so, sintosa(&dst), NULL);
472 		m = NULL;
473 		break;
474 	    }
475 
476 	case PRU_SENSE:
477 		/*
478 		 * stat: don't bother with a blocksize.
479 		 */
480 		return (0);
481 
482 	/*
483 	 * Not supported.
484 	 */
485 	case PRU_RCVOOB:
486 	case PRU_RCVD:
487 	case PRU_LISTEN:
488 	case PRU_ACCEPT:
489 	case PRU_SENDOOB:
490 		error = EOPNOTSUPP;
491 		break;
492 
493 	case PRU_SOCKADDR:
494 		in_setsockaddr(inp, nam);
495 		break;
496 
497 	case PRU_PEERADDR:
498 		in_setpeeraddr(inp, nam);
499 		break;
500 
501 	default:
502 		panic("rip_usrreq");
503 	}
504 release:
505 	m_freem(m);
506 	return (error);
507 }
508 
509 int
510 rip_attach(struct socket *so, int proto)
511 {
512 	struct inpcb *inp;
513 	int error;
514 
515 	if (so->so_pcb)
516 		panic("rip_attach");
517 	if ((so->so_state & SS_PRIV) == 0)
518 		return EACCES;
519 	if (proto < 0 || proto >= IPPROTO_MAX)
520 		return EPROTONOSUPPORT;
521 
522 	if ((error = soreserve(so, rip_sendspace, rip_recvspace)))
523 		return error;
524 	NET_ASSERT_LOCKED();
525 	if ((error = in_pcballoc(so, &rawcbtable)))
526 		return error;
527 	inp = sotoinpcb(so);
528 	inp->inp_ip.ip_p = proto;
529 	return 0;
530 }
531 
532 int
533 rip_detach(struct socket *so)
534 {
535 	struct inpcb *inp = sotoinpcb(so);
536 
537 	soassertlocked(so);
538 
539 	if (inp == NULL)
540 		return (EINVAL);
541 
542 #ifdef MROUTING
543 	if (so == ip_mrouter[inp->inp_rtableid])
544 		ip_mrouter_done(so);
545 #endif
546 	in_pcbdetach(inp);
547 
548 	return (0);
549 }
550