xref: /netbsd-src/sys/netinet/udp_usrreq.c (revision aa73cae19608873cc4d1f712c4a0f8f8435f1ffa)
1 /*	$NetBSD: udp_usrreq.c,v 1.131 2005/02/26 22:45:12 perry Exp $	*/
2 
3 /*
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
61  */
62 
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: udp_usrreq.c,v 1.131 2005/02/26 22:45:12 perry Exp $");
65 
66 #include "opt_inet.h"
67 #include "opt_ipsec.h"
68 #include "opt_inet_csum.h"
69 #include "opt_ipkdb.h"
70 #include "opt_mbuftrace.h"
71 
72 #include <sys/param.h>
73 #include <sys/malloc.h>
74 #include <sys/mbuf.h>
75 #include <sys/protosw.h>
76 #include <sys/socket.h>
77 #include <sys/socketvar.h>
78 #include <sys/errno.h>
79 #include <sys/stat.h>
80 #include <sys/systm.h>
81 #include <sys/proc.h>
82 #include <sys/domain.h>
83 #include <sys/sysctl.h>
84 
85 #include <net/if.h>
86 #include <net/route.h>
87 
88 #include <netinet/in.h>
89 #include <netinet/in_systm.h>
90 #include <netinet/in_var.h>
91 #include <netinet/ip.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/ip_var.h>
94 #include <netinet/ip_icmp.h>
95 #include <netinet/udp.h>
96 #include <netinet/udp_var.h>
97 
98 #ifdef IPSEC_NAT_T
99 #include <netinet6/ipsec.h>
100 #include <netinet6/esp.h>
101 #endif
102 
103 #ifdef INET6
104 #include <netinet/ip6.h>
105 #include <netinet/icmp6.h>
106 #include <netinet6/ip6_var.h>
107 #include <netinet6/in6_pcb.h>
108 #include <netinet6/udp6_var.h>
109 #endif
110 
111 #ifndef INET6
112 /* always need ip6.h for IP6_EXTHDR_GET */
113 #include <netinet/ip6.h>
114 #endif
115 
116 #include "faith.h"
117 #if defined(NFAITH) && NFAITH > 0
118 #include <net/if_faith.h>
119 #endif
120 
121 #include <machine/stdarg.h>
122 
123 #ifdef FAST_IPSEC
124 #include <netipsec/ipsec.h>
125 #include <netipsec/ipsec_var.h>			/* XXX ipsecstat namespace */
126 #ifdef INET6
127 #include <netipsec/ipsec6.h>
128 #endif
129 #endif	/* FAST_IPSEC*/
130 
131 #ifdef IPSEC
132 #include <netinet6/ipsec.h>
133 #include <netkey/key.h>
134 #endif /*IPSEC*/
135 
136 #ifdef IPKDB
137 #include <ipkdb/ipkdb.h>
138 #endif
139 
140 /*
141  * UDP protocol implementation.
142  * Per RFC 768, August, 1980.
143  */
144 #ifndef	COMPAT_42
145 int	udpcksum = 1;
146 #else
147 int	udpcksum = 0;		/* XXX */
148 #endif
149 int	udp_do_loopback_cksum = 0;
150 
151 struct	inpcbtable udbtable;
152 struct	udpstat udpstat;
153 
154 #ifdef INET
155 #ifdef IPSEC_NAT_T
156 static int udp4_espinudp (struct mbuf *, int, struct sockaddr *,
157 	struct socket *);
158 #endif
159 static void udp4_sendup (struct mbuf *, int, struct sockaddr *,
160 	struct socket *);
161 static int udp4_realinput (struct sockaddr_in *, struct sockaddr_in *,
162 	struct mbuf *, int);
163 static int udp4_input_checksum(struct mbuf *, const struct udphdr *, int, int);
164 #endif
165 #ifdef INET6
166 static void udp6_sendup (struct mbuf *, int, struct sockaddr *,
167 	struct socket *);
168 static int udp6_realinput (int, struct sockaddr_in6 *,
169 	struct sockaddr_in6 *, struct mbuf *, int);
170 static int udp6_input_checksum(struct mbuf *, const struct udphdr *, int, int);
171 #endif
172 #ifdef INET
173 static	void udp_notify (struct inpcb *, int);
174 #endif
175 
176 #ifndef UDBHASHSIZE
177 #define	UDBHASHSIZE	128
178 #endif
179 int	udbhashsize = UDBHASHSIZE;
180 
181 #ifdef MBUFTRACE
182 struct mowner udp_mowner = { "udp" };
183 struct mowner udp_rx_mowner = { "udp", "rx" };
184 struct mowner udp_tx_mowner = { "udp", "tx" };
185 #endif
186 
187 #ifdef UDP_CSUM_COUNTERS
188 #include <sys/device.h>
189 
190 struct evcnt udp_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
191     NULL, "udp", "hwcsum bad");
192 struct evcnt udp_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
193     NULL, "udp", "hwcsum ok");
194 struct evcnt udp_hwcsum_data = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
195     NULL, "udp", "hwcsum data");
196 struct evcnt udp_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
197     NULL, "udp", "swcsum");
198 
199 #define	UDP_CSUM_COUNTER_INCR(ev)	(ev)->ev_count++
200 
201 EVCNT_ATTACH_STATIC(udp_hwcsum_bad);
202 EVCNT_ATTACH_STATIC(udp_hwcsum_ok);
203 EVCNT_ATTACH_STATIC(udp_hwcsum_data);
204 EVCNT_ATTACH_STATIC(udp_swcsum);
205 
206 #else
207 
208 #define	UDP_CSUM_COUNTER_INCR(ev)	/* nothing */
209 
210 #endif /* UDP_CSUM_COUNTERS */
211 
212 void
213 udp_init(void)
214 {
215 
216 	in_pcbinit(&udbtable, udbhashsize, udbhashsize);
217 
218 	MOWNER_ATTACH(&udp_tx_mowner);
219 	MOWNER_ATTACH(&udp_rx_mowner);
220 	MOWNER_ATTACH(&udp_mowner);
221 }
222 
223 /*
224  * Checksum extended UDP header and data.
225  */
226 
227 int
228 udp_input_checksum(int af, struct mbuf *m, const struct udphdr *uh,
229     int iphlen, int len)
230 {
231 
232 	switch (af) {
233 #ifdef INET
234 	case AF_INET:
235 		return udp4_input_checksum(m, uh, iphlen, len);
236 #endif
237 #ifdef INET6
238 	case AF_INET6:
239 		return udp6_input_checksum(m, uh, iphlen, len);
240 #endif
241 	}
242 #ifdef DIAGNOSTIC
243 	panic("udp_input_checksum: unknown af %d", af);
244 #endif
245 	/* NOTREACHED */
246 	return -1;
247 }
248 
249 #ifdef INET
250 
251 /*
252  * Checksum extended UDP header and data.
253  */
254 
255 static int
256 udp4_input_checksum(struct mbuf *m, const struct udphdr *uh,
257     int iphlen, int len)
258 {
259 
260 	/*
261 	 * XXX it's better to record and check if this mbuf is
262 	 * already checked.
263 	 */
264 
265 	if (uh->uh_sum == 0)
266 		return 0;
267 
268 	switch (m->m_pkthdr.csum_flags &
269 	    ((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_UDPv4) |
270 	    M_CSUM_TCP_UDP_BAD | M_CSUM_DATA)) {
271 	case M_CSUM_UDPv4|M_CSUM_TCP_UDP_BAD:
272 		UDP_CSUM_COUNTER_INCR(&udp_hwcsum_bad);
273 		goto badcsum;
274 
275 	case M_CSUM_UDPv4|M_CSUM_DATA: {
276 		u_int32_t hw_csum = m->m_pkthdr.csum_data;
277 
278 		UDP_CSUM_COUNTER_INCR(&udp_hwcsum_data);
279 		if (m->m_pkthdr.csum_flags & M_CSUM_NO_PSEUDOHDR) {
280 			const struct ip *ip =
281 			    mtod(m, const struct ip *);
282 
283 			hw_csum = in_cksum_phdr(ip->ip_src.s_addr,
284 			    ip->ip_dst.s_addr,
285 			    htons(hw_csum + len + IPPROTO_UDP));
286 		}
287 		if ((hw_csum ^ 0xffff) != 0)
288 			goto badcsum;
289 		break;
290 	}
291 
292 	case M_CSUM_UDPv4:
293 		/* Checksum was okay. */
294 		UDP_CSUM_COUNTER_INCR(&udp_hwcsum_ok);
295 		break;
296 
297 	default:
298 		/*
299 		 * Need to compute it ourselves.  Maybe skip checksum
300 		 * on loopback interfaces.
301 		 */
302 		if (__predict_true(!(m->m_pkthdr.rcvif->if_flags &
303 				     IFF_LOOPBACK) ||
304 				   udp_do_loopback_cksum)) {
305 			UDP_CSUM_COUNTER_INCR(&udp_swcsum);
306 			if (in4_cksum(m, IPPROTO_UDP, iphlen, len) != 0)
307 				goto badcsum;
308 		}
309 		break;
310 	}
311 
312 	return 0;
313 
314 badcsum:
315 	udpstat.udps_badsum++;
316 	return -1;
317 }
318 
319 void
320 udp_input(struct mbuf *m, ...)
321 {
322 	va_list ap;
323 	struct sockaddr_in src, dst;
324 	struct ip *ip;
325 	struct udphdr *uh;
326 	int iphlen;
327 	int len;
328 	int n;
329 	u_int16_t ip_len;
330 
331 	va_start(ap, m);
332 	iphlen = va_arg(ap, int);
333 	(void)va_arg(ap, int);		/* ignore value, advance ap */
334 	va_end(ap);
335 
336 	MCLAIM(m, &udp_rx_mowner);
337 	udpstat.udps_ipackets++;
338 
339 	/*
340 	 * Get IP and UDP header together in first mbuf.
341 	 */
342 	ip = mtod(m, struct ip *);
343 	IP6_EXTHDR_GET(uh, struct udphdr *, m, iphlen, sizeof(struct udphdr));
344 	if (uh == NULL) {
345 		udpstat.udps_hdrops++;
346 		return;
347 	}
348 	KASSERT(UDP_HDR_ALIGNED_P(uh));
349 
350 	/* destination port of 0 is illegal, based on RFC768. */
351 	if (uh->uh_dport == 0)
352 		goto bad;
353 
354 	/*
355 	 * Make mbuf data length reflect UDP length.
356 	 * If not enough data to reflect UDP length, drop.
357 	 */
358 	ip_len = ntohs(ip->ip_len);
359 	len = ntohs((u_int16_t)uh->uh_ulen);
360 	if (ip_len != iphlen + len) {
361 		if (ip_len < iphlen + len || len < sizeof(struct udphdr)) {
362 			udpstat.udps_badlen++;
363 			goto bad;
364 		}
365 		m_adj(m, iphlen + len - ip_len);
366 	}
367 
368 	/*
369 	 * Checksum extended UDP header and data.
370 	 */
371 	if (udp4_input_checksum(m, uh, iphlen, len))
372 		goto badcsum;
373 
374 	/* construct source and dst sockaddrs. */
375 	bzero(&src, sizeof(src));
376 	src.sin_family = AF_INET;
377 	src.sin_len = sizeof(struct sockaddr_in);
378 	bcopy(&ip->ip_src, &src.sin_addr, sizeof(src.sin_addr));
379 	src.sin_port = uh->uh_sport;
380 	bzero(&dst, sizeof(dst));
381 	dst.sin_family = AF_INET;
382 	dst.sin_len = sizeof(struct sockaddr_in);
383 	bcopy(&ip->ip_dst, &dst.sin_addr, sizeof(dst.sin_addr));
384 	dst.sin_port = uh->uh_dport;
385 
386 	n = udp4_realinput(&src, &dst, m, iphlen);
387 #ifdef INET6
388 	if (IN_MULTICAST(ip->ip_dst.s_addr) || n == 0) {
389 		struct sockaddr_in6 src6, dst6;
390 
391 		bzero(&src6, sizeof(src6));
392 		src6.sin6_family = AF_INET6;
393 		src6.sin6_len = sizeof(struct sockaddr_in6);
394 		src6.sin6_addr.s6_addr[10] = src6.sin6_addr.s6_addr[11] = 0xff;
395 		bcopy(&ip->ip_src, &src6.sin6_addr.s6_addr[12],
396 			sizeof(ip->ip_src));
397 		src6.sin6_port = uh->uh_sport;
398 		bzero(&dst6, sizeof(dst6));
399 		dst6.sin6_family = AF_INET6;
400 		dst6.sin6_len = sizeof(struct sockaddr_in6);
401 		dst6.sin6_addr.s6_addr[10] = dst6.sin6_addr.s6_addr[11] = 0xff;
402 		bcopy(&ip->ip_dst, &dst6.sin6_addr.s6_addr[12],
403 			sizeof(ip->ip_dst));
404 		dst6.sin6_port = uh->uh_dport;
405 
406 		n += udp6_realinput(AF_INET, &src6, &dst6, m, iphlen);
407 	}
408 #endif
409 
410 	if (n == 0) {
411 		if (m->m_flags & (M_BCAST | M_MCAST)) {
412 			udpstat.udps_noportbcast++;
413 			goto bad;
414 		}
415 		udpstat.udps_noport++;
416 #ifdef IPKDB
417 		if (checkipkdb(&ip->ip_src, uh->uh_sport, uh->uh_dport,
418 				m, iphlen + sizeof(struct udphdr),
419 				m->m_pkthdr.len - iphlen - sizeof(struct udphdr))) {
420 			/*
421 			 * It was a debugger connect packet,
422 			 * just drop it now
423 			 */
424 			goto bad;
425 		}
426 #endif
427 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
428 		m = NULL;
429 	}
430 
431 bad:
432 	if (m)
433 		m_freem(m);
434 	return;
435 
436 badcsum:
437 	m_freem(m);
438 }
439 #endif
440 
441 #ifdef INET6
442 static int
443 udp6_input_checksum(struct mbuf *m, const struct udphdr *uh, int off, int len)
444 {
445 
446 	if (__predict_false((m->m_flags & M_LOOP) && !udp_do_loopback_cksum)) {
447 		goto good;
448 	}
449 	if (uh->uh_sum == 0) {
450 		udp6stat.udp6s_nosum++;
451 		goto bad;
452 	}
453 	if (in6_cksum(m, IPPROTO_UDP, off, len) != 0) {
454 		udp6stat.udp6s_badsum++;
455 		goto bad;
456 	}
457 
458 good:
459 	return 0;
460 bad:
461 	return -1;
462 }
463 
464 int
465 udp6_input(struct mbuf **mp, int *offp, int proto)
466 {
467 	struct mbuf *m = *mp;
468 	int off = *offp;
469 	struct sockaddr_in6 src, dst;
470 	struct ip6_hdr *ip6;
471 	struct udphdr *uh;
472 	u_int32_t plen, ulen;
473 
474 	ip6 = mtod(m, struct ip6_hdr *);
475 
476 #if defined(NFAITH) && 0 < NFAITH
477 	if (faithprefix(&ip6->ip6_dst)) {
478 		/* send icmp6 host unreach? */
479 		m_freem(m);
480 		return IPPROTO_DONE;
481 	}
482 #endif
483 
484 	udp6stat.udp6s_ipackets++;
485 
486 	/* check for jumbogram is done in ip6_input.  we can trust pkthdr.len */
487 	plen = m->m_pkthdr.len - off;
488 	IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(struct udphdr));
489 	if (uh == NULL) {
490 		ip6stat.ip6s_tooshort++;
491 		return IPPROTO_DONE;
492 	}
493 	KASSERT(UDP_HDR_ALIGNED_P(uh));
494 	ulen = ntohs((u_short)uh->uh_ulen);
495 	/*
496 	 * RFC2675 section 4: jumbograms will have 0 in the UDP header field,
497 	 * iff payload length > 0xffff.
498 	 */
499 	if (ulen == 0 && plen > 0xffff)
500 		ulen = plen;
501 
502 	if (plen != ulen) {
503 		udp6stat.udp6s_badlen++;
504 		goto bad;
505 	}
506 
507 	/* destination port of 0 is illegal, based on RFC768. */
508 	if (uh->uh_dport == 0)
509 		goto bad;
510 
511 	/* Be proactive about malicious use of IPv4 mapped address */
512 	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
513 	    IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
514 		/* XXX stat */
515 		goto bad;
516 	}
517 
518 	/*
519 	 * Checksum extended UDP header and data.  Maybe skip checksum
520 	 * on loopback interfaces.
521 	 */
522 	if (udp6_input_checksum(m, uh, off, ulen))
523 		goto bad;
524 
525 	/*
526 	 * Construct source and dst sockaddrs.
527 	 * Note that ifindex (s6_addr16[1]) is already filled.
528 	 */
529 	bzero(&src, sizeof(src));
530 	src.sin6_family = AF_INET6;
531 	src.sin6_len = sizeof(struct sockaddr_in6);
532 	/* KAME hack: recover scopeid */
533 	(void)in6_recoverscope(&src, &ip6->ip6_src, m->m_pkthdr.rcvif);
534 	src.sin6_port = uh->uh_sport;
535 	bzero(&dst, sizeof(dst));
536 	dst.sin6_family = AF_INET6;
537 	dst.sin6_len = sizeof(struct sockaddr_in6);
538 	/* KAME hack: recover scopeid */
539 	(void)in6_recoverscope(&dst, &ip6->ip6_dst, m->m_pkthdr.rcvif);
540 	dst.sin6_port = uh->uh_dport;
541 
542 	if (udp6_realinput(AF_INET6, &src, &dst, m, off) == 0) {
543 		if (m->m_flags & M_MCAST) {
544 			udp6stat.udp6s_noportmcast++;
545 			goto bad;
546 		}
547 		udp6stat.udp6s_noport++;
548 		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0);
549 		m = NULL;
550 	}
551 
552 bad:
553 	if (m)
554 		m_freem(m);
555 	return IPPROTO_DONE;
556 }
557 #endif
558 
559 #ifdef INET
560 static void
561 udp4_sendup(struct mbuf *m, int off /* offset of data portion */,
562 	struct sockaddr *src, struct socket *so)
563 {
564 	struct mbuf *opts = NULL;
565 	struct mbuf *n;
566 	struct inpcb *inp = NULL;
567 
568 	if (!so)
569 		return;
570 	switch (so->so_proto->pr_domain->dom_family) {
571 	case AF_INET:
572 		inp = sotoinpcb(so);
573 		break;
574 #ifdef INET6
575 	case AF_INET6:
576 		break;
577 #endif
578 	default:
579 		return;
580 	}
581 
582 #if defined(IPSEC) || defined(FAST_IPSEC)
583 	/* check AH/ESP integrity. */
584 	if (so != NULL && ipsec4_in_reject_so(m, so)) {
585 		ipsecstat.in_polvio++;
586 		if ((n = m_copy(m, 0, M_COPYALL)) != NULL)
587 			icmp_error(n, ICMP_UNREACH, ICMP_UNREACH_ADMIN_PROHIBIT,
588 			    0, 0);
589 		return;
590 	}
591 #endif /*IPSEC*/
592 
593 	if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
594 		if (inp && (inp->inp_flags & INP_CONTROLOPTS
595 			 || so->so_options & SO_TIMESTAMP)) {
596 			struct ip *ip = mtod(n, struct ip *);
597 			ip_savecontrol(inp, &opts, ip, n);
598 		}
599 
600 		m_adj(n, off);
601 		if (sbappendaddr(&so->so_rcv, src, n,
602 				opts) == 0) {
603 			m_freem(n);
604 			if (opts)
605 				m_freem(opts);
606 			so->so_rcv.sb_overflowed++;
607 			udpstat.udps_fullsock++;
608 		} else
609 			sorwakeup(so);
610 	}
611 }
612 #endif
613 
614 #ifdef INET6
615 static void
616 udp6_sendup(struct mbuf *m, int off /* offset of data portion */,
617 	struct sockaddr *src, struct socket *so)
618 {
619 	struct mbuf *opts = NULL;
620 	struct mbuf *n;
621 	struct in6pcb *in6p = NULL;
622 
623 	if (!so)
624 		return;
625 	if (so->so_proto->pr_domain->dom_family != AF_INET6)
626 		return;
627 	in6p = sotoin6pcb(so);
628 
629 #if defined(IPSEC) || defined(FAST_IPSEC)
630 	/* check AH/ESP integrity. */
631 	if (so != NULL && ipsec6_in_reject_so(m, so)) {
632 		ipsec6stat.in_polvio++;
633 		if ((n = m_copy(m, 0, M_COPYALL)) != NULL)
634 			icmp6_error(n, ICMP6_DST_UNREACH,
635 			    ICMP6_DST_UNREACH_ADMIN, 0);
636 		return;
637 	}
638 #endif /*IPSEC*/
639 
640 	if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
641 		if (in6p && (in6p->in6p_flags & IN6P_CONTROLOPTS
642 			  || in6p->in6p_socket->so_options & SO_TIMESTAMP)) {
643 			struct ip6_hdr *ip6 = mtod(n, struct ip6_hdr *);
644 			ip6_savecontrol(in6p, &opts, ip6, n);
645 		}
646 
647 		m_adj(n, off);
648 		if (sbappendaddr(&so->so_rcv, src, n, opts) == 0) {
649 			m_freem(n);
650 			if (opts)
651 				m_freem(opts);
652 			so->so_rcv.sb_overflowed++;
653 			udp6stat.udp6s_fullsock++;
654 		} else
655 			sorwakeup(so);
656 	}
657 }
658 #endif
659 
660 #ifdef INET
661 static int
662 udp4_realinput(struct sockaddr_in *src, struct sockaddr_in *dst,
663 	struct mbuf *m, int off /* offset of udphdr */)
664 {
665 	u_int16_t *sport, *dport;
666 	int rcvcnt;
667 	struct in_addr *src4, *dst4;
668 	struct inpcb_hdr *inph;
669 	struct inpcb *inp;
670 
671 	rcvcnt = 0;
672 	off += sizeof(struct udphdr);	/* now, offset of payload */
673 
674 	if (src->sin_family != AF_INET || dst->sin_family != AF_INET)
675 		goto bad;
676 
677 	src4 = &src->sin_addr;
678 	sport = &src->sin_port;
679 	dst4 = &dst->sin_addr;
680 	dport = &dst->sin_port;
681 
682 	if (IN_MULTICAST(dst4->s_addr) ||
683 	    in_broadcast(*dst4, m->m_pkthdr.rcvif)) {
684 		/*
685 		 * Deliver a multicast or broadcast datagram to *all* sockets
686 		 * for which the local and remote addresses and ports match
687 		 * those of the incoming datagram.  This allows more than
688 		 * one process to receive multi/broadcasts on the same port.
689 		 * (This really ought to be done for unicast datagrams as
690 		 * well, but that would cause problems with existing
691 		 * applications that open both address-specific sockets and
692 		 * a wildcard socket listening to the same port -- they would
693 		 * end up receiving duplicates of every unicast datagram.
694 		 * Those applications open the multiple sockets to overcome an
695 		 * inadequacy of the UDP socket interface, but for backwards
696 		 * compatibility we avoid the problem here rather than
697 		 * fixing the interface.  Maybe 4.5BSD will remedy this?)
698 		 */
699 
700 		/*
701 		 * KAME note: traditionally we dropped udpiphdr from mbuf here.
702 		 * we need udpiphdr for IPsec processing so we do that later.
703 		 */
704 		/*
705 		 * Locate pcb(s) for datagram.
706 		 */
707 		CIRCLEQ_FOREACH(inph, &udbtable.inpt_queue, inph_queue) {
708 			inp = (struct inpcb *)inph;
709 			if (inp->inp_af != AF_INET)
710 				continue;
711 
712 			if (inp->inp_lport != *dport)
713 				continue;
714 			if (!in_nullhost(inp->inp_laddr)) {
715 				if (!in_hosteq(inp->inp_laddr, *dst4))
716 					continue;
717 			}
718 			if (!in_nullhost(inp->inp_faddr)) {
719 				if (!in_hosteq(inp->inp_faddr, *src4) ||
720 				    inp->inp_fport != *sport)
721 					continue;
722 			}
723 
724 			udp4_sendup(m, off, (struct sockaddr *)src,
725 				inp->inp_socket);
726 			rcvcnt++;
727 
728 			/*
729 			 * Don't look for additional matches if this one does
730 			 * not have either the SO_REUSEPORT or SO_REUSEADDR
731 			 * socket options set.  This heuristic avoids searching
732 			 * through all pcbs in the common case of a non-shared
733 			 * port.  It assumes that an application will never
734 			 * clear these options after setting them.
735 			 */
736 			if ((inp->inp_socket->so_options &
737 			    (SO_REUSEPORT|SO_REUSEADDR)) == 0)
738 				break;
739 		}
740 	} else {
741 		/*
742 		 * Locate pcb for datagram.
743 		 */
744 		inp = in_pcblookup_connect(&udbtable, *src4, *sport, *dst4, *dport);
745 		if (inp == 0) {
746 			++udpstat.udps_pcbhashmiss;
747 			inp = in_pcblookup_bind(&udbtable, *dst4, *dport);
748 			if (inp == 0)
749 				return rcvcnt;
750 		}
751 
752 #ifdef IPSEC_NAT_T
753 		/* Handle ESP over UDP */
754 		if (inp->inp_flags & INP_ESPINUDP_ALL) {
755 			struct sockaddr *sa = (struct sockaddr *)src;
756 
757 			if (udp4_espinudp(m, off, sa, inp->inp_socket) != 0) {
758 				rcvcnt++;
759 				goto bad;
760 			}
761 
762 			/* Normal UDP processing will take place */
763 		}
764 #endif
765 
766 		udp4_sendup(m, off, (struct sockaddr *)src, inp->inp_socket);
767 		rcvcnt++;
768 	}
769 
770 bad:
771 	return rcvcnt;
772 }
773 #endif
774 
775 #ifdef INET6
776 static int
777 udp6_realinput(int af, struct sockaddr_in6 *src, struct sockaddr_in6 *dst,
778 	struct mbuf *m, int off)
779 {
780 	u_int16_t sport, dport;
781 	int rcvcnt;
782 	struct in6_addr src6, dst6;
783 	const struct in_addr *dst4;
784 	struct inpcb_hdr *inph;
785 	struct in6pcb *in6p;
786 
787 	rcvcnt = 0;
788 	off += sizeof(struct udphdr);	/* now, offset of payload */
789 
790 	if (af != AF_INET && af != AF_INET6)
791 		goto bad;
792 	if (src->sin6_family != AF_INET6 || dst->sin6_family != AF_INET6)
793 		goto bad;
794 
795 	in6_embedscope(&src6, src, NULL, NULL);
796 	sport = src->sin6_port;
797 	in6_embedscope(&dst6, dst, NULL, NULL);
798 	dport = dst->sin6_port;
799 	dst4 = (struct in_addr *)&dst->sin6_addr.s6_addr[12];
800 
801 	if (IN6_IS_ADDR_MULTICAST(&dst6) ||
802 	    (af == AF_INET && IN_MULTICAST(dst4->s_addr))) {
803 		/*
804 		 * Deliver a multicast or broadcast datagram to *all* sockets
805 		 * for which the local and remote addresses and ports match
806 		 * those of the incoming datagram.  This allows more than
807 		 * one process to receive multi/broadcasts on the same port.
808 		 * (This really ought to be done for unicast datagrams as
809 		 * well, but that would cause problems with existing
810 		 * applications that open both address-specific sockets and
811 		 * a wildcard socket listening to the same port -- they would
812 		 * end up receiving duplicates of every unicast datagram.
813 		 * Those applications open the multiple sockets to overcome an
814 		 * inadequacy of the UDP socket interface, but for backwards
815 		 * compatibility we avoid the problem here rather than
816 		 * fixing the interface.  Maybe 4.5BSD will remedy this?)
817 		 */
818 
819 		/*
820 		 * KAME note: traditionally we dropped udpiphdr from mbuf here.
821 		 * we need udpiphdr for IPsec processing so we do that later.
822 		 */
823 		/*
824 		 * Locate pcb(s) for datagram.
825 		 */
826 		CIRCLEQ_FOREACH(inph, &udbtable.inpt_queue, inph_queue) {
827 			in6p = (struct in6pcb *)inph;
828 			if (in6p->in6p_af != AF_INET6)
829 				continue;
830 
831 			if (in6p->in6p_lport != dport)
832 				continue;
833 			if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
834 				if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &dst6))
835 					continue;
836 			} else {
837 				if (IN6_IS_ADDR_V4MAPPED(&dst6) &&
838 				    (in6p->in6p_flags & IN6P_IPV6_V6ONLY))
839 					continue;
840 			}
841 			if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) {
842 				if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr,
843 				    &src6) || in6p->in6p_fport != sport)
844 					continue;
845 			} else {
846 				if (IN6_IS_ADDR_V4MAPPED(&src6) &&
847 				    (in6p->in6p_flags & IN6P_IPV6_V6ONLY))
848 					continue;
849 			}
850 
851 			udp6_sendup(m, off, (struct sockaddr *)src,
852 				in6p->in6p_socket);
853 			rcvcnt++;
854 
855 			/*
856 			 * Don't look for additional matches if this one does
857 			 * not have either the SO_REUSEPORT or SO_REUSEADDR
858 			 * socket options set.  This heuristic avoids searching
859 			 * through all pcbs in the common case of a non-shared
860 			 * port.  It assumes that an application will never
861 			 * clear these options after setting them.
862 			 */
863 			if ((in6p->in6p_socket->so_options &
864 			    (SO_REUSEPORT|SO_REUSEADDR)) == 0)
865 				break;
866 		}
867 	} else {
868 		/*
869 		 * Locate pcb for datagram.
870 		 */
871 		in6p = in6_pcblookup_connect(&udbtable, &src6, sport,
872 		    &dst6, dport, 0);
873 		if (in6p == 0) {
874 			++udpstat.udps_pcbhashmiss;
875 			in6p = in6_pcblookup_bind(&udbtable, &dst6, dport, 0);
876 			if (in6p == 0)
877 				return rcvcnt;
878 		}
879 
880 		udp6_sendup(m, off, (struct sockaddr *)src, in6p->in6p_socket);
881 		rcvcnt++;
882 	}
883 
884 bad:
885 	return rcvcnt;
886 }
887 #endif
888 
889 #ifdef INET
890 /*
891  * Notify a udp user of an asynchronous error;
892  * just wake up so that he can collect error status.
893  */
894 static void
895 udp_notify(struct inpcb *inp, int errno)
896 {
897 	inp->inp_socket->so_error = errno;
898 	sorwakeup(inp->inp_socket);
899 	sowwakeup(inp->inp_socket);
900 }
901 
902 void *
903 udp_ctlinput(int cmd, struct sockaddr *sa, void *v)
904 {
905 	struct ip *ip = v;
906 	struct udphdr *uh;
907 	void (*notify)(struct inpcb *, int) = udp_notify;
908 	int errno;
909 
910 	if (sa->sa_family != AF_INET
911 	 || sa->sa_len != sizeof(struct sockaddr_in))
912 		return NULL;
913 	if ((unsigned)cmd >= PRC_NCMDS)
914 		return NULL;
915 	errno = inetctlerrmap[cmd];
916 	if (PRC_IS_REDIRECT(cmd))
917 		notify = in_rtchange, ip = 0;
918 	else if (cmd == PRC_HOSTDEAD)
919 		ip = 0;
920 	else if (errno == 0)
921 		return NULL;
922 	if (ip) {
923 		uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
924 		in_pcbnotify(&udbtable, satosin(sa)->sin_addr, uh->uh_dport,
925 		    ip->ip_src, uh->uh_sport, errno, notify);
926 
927 		/* XXX mapped address case */
928 	} else
929 		in_pcbnotifyall(&udbtable, satosin(sa)->sin_addr, errno,
930 		    notify);
931 	return NULL;
932 }
933 
934 int
935 udp_ctloutput(op, so, level, optname, mp)
936 	int op;
937 	struct socket *so;
938 	int level, optname;
939 	struct mbuf **mp;
940 {
941 	int s;
942 	int error = 0;
943 	struct mbuf *m;
944 	struct inpcb *inp;
945 	int family;
946 
947 	family = so->so_proto->pr_domain->dom_family;
948 
949 	s = splsoftnet();
950 	switch (family) {
951 #ifdef INET
952 	case PF_INET:
953 		if (level != IPPROTO_UDP) {
954 			error = ip_ctloutput(op, so, level, optname, mp);
955 			goto end;
956 		}
957 		break;
958 #endif
959 #ifdef INET6
960 	case PF_INET6:
961 		if (level != IPPROTO_UDP) {
962 			error = ip6_ctloutput(op, so, level, optname, mp);
963 			goto end;
964 		}
965 		break;
966 #endif
967 	default:
968 		error = EAFNOSUPPORT;
969 		goto end;
970 		break;
971 	}
972 
973 
974 	switch (op) {
975 	case PRCO_SETOPT:
976 		m = *mp;
977 		inp = sotoinpcb(so);
978 
979 		switch (optname) {
980 		case UDP_ENCAP:
981 			if (m == NULL || m->m_len < sizeof (int)) {
982 				error = EINVAL;
983 				goto end;
984 			}
985 
986 			switch(*mtod(m, int *)) {
987 #ifdef IPSEC_NAT_T
988 			case 0:
989 				inp->inp_flags &= ~INP_ESPINUDP_ALL;
990 				break;
991 
992 			case UDP_ENCAP_ESPINUDP:
993 				inp->inp_flags &= ~INP_ESPINUDP_ALL;
994 				inp->inp_flags |= INP_ESPINUDP;
995 				break;
996 
997 			case UDP_ENCAP_ESPINUDP_NON_IKE:
998 				inp->inp_flags &= ~INP_ESPINUDP_ALL;
999 				inp->inp_flags |= INP_ESPINUDP_NON_IKE;
1000 				break;
1001 #endif
1002 			default:
1003 				error = EINVAL;
1004 				goto end;
1005 				break;
1006 			}
1007 			break;
1008 
1009 		default:
1010 			error = ENOPROTOOPT;
1011 			goto end;
1012 			break;
1013 		}
1014 		break;
1015 
1016 	default:
1017 		error = EINVAL;
1018 		goto end;
1019 		break;
1020 	}
1021 
1022 end:
1023 	splx(s);
1024 	return error;
1025 }
1026 
1027 
1028 int
1029 udp_output(struct mbuf *m, ...)
1030 {
1031 	struct inpcb *inp;
1032 	struct udpiphdr *ui;
1033 	struct route *ro;
1034 	int len = m->m_pkthdr.len;
1035 	int error = 0;
1036 	va_list ap;
1037 
1038 	MCLAIM(m, &udp_tx_mowner);
1039 	va_start(ap, m);
1040 	inp = va_arg(ap, struct inpcb *);
1041 	va_end(ap);
1042 
1043 	/*
1044 	 * Calculate data length and get a mbuf
1045 	 * for UDP and IP headers.
1046 	 */
1047 	M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT);
1048 	if (m == 0) {
1049 		error = ENOBUFS;
1050 		goto release;
1051 	}
1052 
1053 	/*
1054 	 * Compute the packet length of the IP header, and
1055 	 * punt if the length looks bogus.
1056 	 */
1057 	if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
1058 		error = EMSGSIZE;
1059 		goto release;
1060 	}
1061 
1062 	/*
1063 	 * Fill in mbuf with extended UDP header
1064 	 * and addresses and length put into network format.
1065 	 */
1066 	ui = mtod(m, struct udpiphdr *);
1067 	ui->ui_pr = IPPROTO_UDP;
1068 	ui->ui_src = inp->inp_laddr;
1069 	ui->ui_dst = inp->inp_faddr;
1070 	ui->ui_sport = inp->inp_lport;
1071 	ui->ui_dport = inp->inp_fport;
1072 	ui->ui_ulen = htons((u_int16_t)len + sizeof(struct udphdr));
1073 
1074 	ro = &inp->inp_route;
1075 
1076 	/*
1077 	 * Set up checksum and output datagram.
1078 	 */
1079 	if (udpcksum) {
1080 		/*
1081 		 * XXX Cache pseudo-header checksum part for
1082 		 * XXX "connected" UDP sockets.
1083 		 * Maybe skip checksums on loopback interfaces.
1084 		 */
1085 		ui->ui_sum = in_cksum_phdr(ui->ui_src.s_addr,
1086 		    ui->ui_dst.s_addr, htons((u_int16_t)len +
1087 		    sizeof(struct udphdr) + IPPROTO_UDP));
1088 		if (__predict_true(ro->ro_rt == NULL ||
1089 				   !(ro->ro_rt->rt_ifp->if_flags &
1090 				     IFF_LOOPBACK) ||
1091 				   udp_do_loopback_cksum))
1092 			m->m_pkthdr.csum_flags = M_CSUM_UDPv4;
1093 		else
1094 			m->m_pkthdr.csum_flags = 0;
1095 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
1096 	} else
1097 		ui->ui_sum = 0;
1098 	((struct ip *)ui)->ip_len = htons(sizeof (struct udpiphdr) + len);
1099 	((struct ip *)ui)->ip_ttl = inp->inp_ip.ip_ttl;	/* XXX */
1100 	((struct ip *)ui)->ip_tos = inp->inp_ip.ip_tos;	/* XXX */
1101 	udpstat.udps_opackets++;
1102 
1103 	return (ip_output(m, inp->inp_options, ro,
1104 	    inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST),
1105 	    inp->inp_moptions, inp->inp_socket));
1106 
1107 release:
1108 	m_freem(m);
1109 	return (error);
1110 }
1111 
1112 int	udp_sendspace = 9216;		/* really max datagram size */
1113 int	udp_recvspace = 40 * (1024 + sizeof(struct sockaddr_in));
1114 					/* 40 1K datagrams */
1115 
1116 /*ARGSUSED*/
1117 int
1118 udp_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
1119 	struct mbuf *control, struct proc *p)
1120 {
1121 	struct inpcb *inp;
1122 	int s;
1123 	int error = 0;
1124 
1125 	if (req == PRU_CONTROL)
1126 		return (in_control(so, (long)m, (caddr_t)nam,
1127 		    (struct ifnet *)control, p));
1128 
1129 	if (req == PRU_PURGEIF) {
1130 		in_pcbpurgeif0(&udbtable, (struct ifnet *)control);
1131 		in_purgeif((struct ifnet *)control);
1132 		in_pcbpurgeif(&udbtable, (struct ifnet *)control);
1133 		return (0);
1134 	}
1135 
1136 	s = splsoftnet();
1137 	inp = sotoinpcb(so);
1138 #ifdef DIAGNOSTIC
1139 	if (req != PRU_SEND && req != PRU_SENDOOB && control)
1140 		panic("udp_usrreq: unexpected control mbuf");
1141 #endif
1142 	if (inp == 0 && req != PRU_ATTACH) {
1143 		error = EINVAL;
1144 		goto release;
1145 	}
1146 
1147 	/*
1148 	 * Note: need to block udp_input while changing
1149 	 * the udp pcb queue and/or pcb addresses.
1150 	 */
1151 	switch (req) {
1152 
1153 	case PRU_ATTACH:
1154 		if (inp != 0) {
1155 			error = EISCONN;
1156 			break;
1157 		}
1158 #ifdef MBUFTRACE
1159 		so->so_mowner = &udp_mowner;
1160 		so->so_rcv.sb_mowner = &udp_rx_mowner;
1161 		so->so_snd.sb_mowner = &udp_tx_mowner;
1162 #endif
1163 		if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1164 			error = soreserve(so, udp_sendspace, udp_recvspace);
1165 			if (error)
1166 				break;
1167 		}
1168 		error = in_pcballoc(so, &udbtable);
1169 		if (error)
1170 			break;
1171 		inp = sotoinpcb(so);
1172 		inp->inp_ip.ip_ttl = ip_defttl;
1173 		break;
1174 
1175 	case PRU_DETACH:
1176 		in_pcbdetach(inp);
1177 		break;
1178 
1179 	case PRU_BIND:
1180 		error = in_pcbbind(inp, nam, p);
1181 		break;
1182 
1183 	case PRU_LISTEN:
1184 		error = EOPNOTSUPP;
1185 		break;
1186 
1187 	case PRU_CONNECT:
1188 		error = in_pcbconnect(inp, nam);
1189 		if (error)
1190 			break;
1191 		soisconnected(so);
1192 		break;
1193 
1194 	case PRU_CONNECT2:
1195 		error = EOPNOTSUPP;
1196 		break;
1197 
1198 	case PRU_DISCONNECT:
1199 		/*soisdisconnected(so);*/
1200 		so->so_state &= ~SS_ISCONNECTED;	/* XXX */
1201 		in_pcbdisconnect(inp);
1202 		inp->inp_laddr = zeroin_addr;		/* XXX */
1203 		in_pcbstate(inp, INP_BOUND);		/* XXX */
1204 		break;
1205 
1206 	case PRU_SHUTDOWN:
1207 		socantsendmore(so);
1208 		break;
1209 
1210 	case PRU_RCVD:
1211 		error = EOPNOTSUPP;
1212 		break;
1213 
1214 	case PRU_SEND:
1215 		if (control && control->m_len) {
1216 			m_freem(control);
1217 			m_freem(m);
1218 			error = EINVAL;
1219 			break;
1220 		}
1221 	{
1222 		struct in_addr laddr;			/* XXX */
1223 
1224 		if (nam) {
1225 			laddr = inp->inp_laddr;		/* XXX */
1226 			if ((so->so_state & SS_ISCONNECTED) != 0) {
1227 				error = EISCONN;
1228 				goto die;
1229 			}
1230 			error = in_pcbconnect(inp, nam);
1231 			if (error)
1232 				goto die;
1233 		} else {
1234 			if ((so->so_state & SS_ISCONNECTED) == 0) {
1235 				error = ENOTCONN;
1236 				goto die;
1237 			}
1238 		}
1239 		error = udp_output(m, inp);
1240 		m = NULL;
1241 		if (nam) {
1242 			in_pcbdisconnect(inp);
1243 			inp->inp_laddr = laddr;		/* XXX */
1244 			in_pcbstate(inp, INP_BOUND);	/* XXX */
1245 		}
1246 	  die:
1247 		if (m)
1248 			m_freem(m);
1249 	}
1250 		break;
1251 
1252 	case PRU_SENSE:
1253 		/*
1254 		 * stat: don't bother with a blocksize.
1255 		 */
1256 		splx(s);
1257 		return (0);
1258 
1259 	case PRU_RCVOOB:
1260 		error =  EOPNOTSUPP;
1261 		break;
1262 
1263 	case PRU_SENDOOB:
1264 		m_freem(control);
1265 		m_freem(m);
1266 		error =  EOPNOTSUPP;
1267 		break;
1268 
1269 	case PRU_SOCKADDR:
1270 		in_setsockaddr(inp, nam);
1271 		break;
1272 
1273 	case PRU_PEERADDR:
1274 		in_setpeeraddr(inp, nam);
1275 		break;
1276 
1277 	default:
1278 		panic("udp_usrreq");
1279 	}
1280 
1281 release:
1282 	splx(s);
1283 	return (error);
1284 }
1285 
1286 /*
1287  * Sysctl for udp variables.
1288  */
1289 SYSCTL_SETUP(sysctl_net_inet_udp_setup, "sysctl net.inet.udp subtree setup")
1290 {
1291 
1292 	sysctl_createv(clog, 0, NULL, NULL,
1293 		       CTLFLAG_PERMANENT,
1294 		       CTLTYPE_NODE, "net", NULL,
1295 		       NULL, 0, NULL, 0,
1296 		       CTL_NET, CTL_EOL);
1297 	sysctl_createv(clog, 0, NULL, NULL,
1298 		       CTLFLAG_PERMANENT,
1299 		       CTLTYPE_NODE, "inet", NULL,
1300 		       NULL, 0, NULL, 0,
1301 		       CTL_NET, PF_INET, CTL_EOL);
1302 	sysctl_createv(clog, 0, NULL, NULL,
1303 		       CTLFLAG_PERMANENT,
1304 		       CTLTYPE_NODE, "udp",
1305 		       SYSCTL_DESCR("UDPv4 related settings"),
1306 		       NULL, 0, NULL, 0,
1307 		       CTL_NET, PF_INET, IPPROTO_UDP, CTL_EOL);
1308 
1309 	sysctl_createv(clog, 0, NULL, NULL,
1310 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1311 		       CTLTYPE_INT, "checksum",
1312 		       SYSCTL_DESCR("Compute UDP checksums"),
1313 		       NULL, 0, &udpcksum, 0,
1314 		       CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_CHECKSUM,
1315 		       CTL_EOL);
1316 	sysctl_createv(clog, 0, NULL, NULL,
1317 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1318 		       CTLTYPE_INT, "sendspace",
1319 		       SYSCTL_DESCR("Default UDP send buffer size"),
1320 		       NULL, 0, &udp_sendspace, 0,
1321 		       CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_SENDSPACE,
1322 		       CTL_EOL);
1323 	sysctl_createv(clog, 0, NULL, NULL,
1324 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1325 		       CTLTYPE_INT, "recvspace",
1326 		       SYSCTL_DESCR("Default UDP receive buffer size"),
1327 		       NULL, 0, &udp_recvspace, 0,
1328 		       CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_RECVSPACE,
1329 		       CTL_EOL);
1330 	sysctl_createv(clog, 0, NULL, NULL,
1331 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1332 		       CTLTYPE_INT, "do_loopback_cksum",
1333 		       SYSCTL_DESCR("Perform UDP checksum on loopback"),
1334 		       NULL, 0, &udp_do_loopback_cksum, 0,
1335 		       CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_LOOPBACKCKSUM,
1336 		       CTL_EOL);
1337 }
1338 #endif
1339 
1340 #if (defined INET && defined IPSEC_NAT_T)
1341 /*
1342  * Returns:
1343  * 1 if the packet was processed
1344  * 0 if normal UDP processing should take place
1345  */
1346 static int
1347 udp4_espinudp(m, off, src, so)
1348 	struct mbuf *m;
1349 	int off;
1350 	struct sockaddr *src;
1351 	struct socket *so;
1352 {
1353 	size_t len;
1354 	caddr_t data;
1355 	struct inpcb *inp;
1356 	size_t skip = 0;
1357 	size_t minlen;
1358 	size_t iphdrlen;
1359 	struct ip *ip;
1360 	struct mbuf *n;
1361 
1362 	/*
1363 	 * Collapse the mbuf chain if the first mbuf is too short
1364 	 * The longest case is: UDP + non ESP marker + ESP
1365 	 */
1366 	minlen = off + sizeof(u_int64_t) + sizeof(struct esp);
1367 	if (minlen > m->m_pkthdr.len)
1368 		minlen = m->m_pkthdr.len;
1369 
1370 	if (m->m_len < minlen) {
1371 		if ((m = m_pullup(m, minlen)) == NULL) {
1372 			printf("udp4_espinudp: m_pullup failed\n");
1373 			return 0;
1374 		}
1375 	}
1376 
1377 	len = m->m_len - off;
1378 	data = mtod(m, caddr_t) + off;
1379 	inp = sotoinpcb(so);
1380 
1381 	/* Ignore keepalive packets */
1382 	if ((len == 1) && (data[0] == '\xff')) {
1383 		return 1;
1384 	}
1385 
1386 	/*
1387 	 * Check that the payload is long enough to hold
1388 	 * an ESP header and compute the length of encapsulation
1389 	 * header to remove
1390 	 */
1391 	if (inp->inp_flags & INP_ESPINUDP) {
1392 		u_int32_t *st = (u_int32_t *)data;
1393 
1394 		if ((len <= sizeof(struct esp)) || (*st == 0))
1395 			return 0; /* Normal UDP processing */
1396 
1397 		skip = sizeof(struct udphdr);
1398 	}
1399 
1400 	if (inp->inp_flags & INP_ESPINUDP_NON_IKE) {
1401 		u_int64_t *st = (u_int64_t *)data;
1402 
1403 		if ((len <= sizeof(u_int64_t) + sizeof(struct esp))
1404 		    || (*st != 0))
1405 			return 0; /* Normal UDP processing */
1406 
1407 		skip = sizeof(struct udphdr) + sizeof(u_int64_t);
1408 	}
1409 
1410 	/*
1411 	 * Remove the UDP header (and possibly the non ESP marker)
1412 	 * IP header lendth is iphdrlen
1413 	 * Before:
1414 	 *   <--- off --->
1415 	 *   +----+------+-----+
1416 	 *   | IP |  UDP | ESP |
1417 	 *   +----+------+-----+
1418 	 *        <-skip->
1419 	 * After:
1420 	 *          +----+-----+
1421 	 *          | IP | ESP |
1422 	 *          +----+-----+
1423 	 *   <-skip->
1424 	 */
1425 	iphdrlen = off - sizeof(struct udphdr);
1426 	memmove(mtod(m, caddr_t) + skip, mtod(m, caddr_t), iphdrlen);
1427 	m_adj(m, skip);
1428 
1429 	ip = mtod(m, struct ip *);
1430 	ip->ip_len = htons(ntohs(ip->ip_len) - skip);
1431 	ip->ip_p = IPPROTO_ESP;
1432 
1433 	/*
1434 	 * Copy the mbuf to avoid multiple free, as both
1435 	 * esp4_input (which we call) and udp_input (which
1436 	 * called us) free the mbuf.
1437 	 */
1438 	if ((n = m_dup(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
1439 		printf("udp4_espinudp: m_dup failed\n");
1440 		return 0;
1441 	}
1442 
1443 	esp4_input(n, iphdrlen);
1444 
1445 	/* We handled it, it shoudln't be handled by UDP */
1446 	return 1;
1447 }
1448 #endif
1449