xref: /dflybsd-src/sys/netinet/udp_usrreq.c (revision e19e5bbc20dd1d64f1833c5d0ac7a605c8e9bfa0)
1 /*
2  * Copyright (c) 2004 Jeffrey M. Hsu.  All rights reserved.
3  * Copyright (c) 2004 The DragonFly Project.  All rights reserved.
4  *
5  * This code is derived from software contributed to The DragonFly Project
6  * by Jeffrey M. Hsu.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of The DragonFly Project nor the names of its
17  *    contributors may be used to endorse or promote products derived
18  *    from this software without specific, prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
24  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 /*
35  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
36  *	The Regents of the University of California.  All rights reserved.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. Neither the name of the University nor the names of its contributors
47  *    may be used to endorse or promote products derived from this software
48  *    without specific prior written permission.
49  *
50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  *
62  *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
63  * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.64.2.18 2003/01/24 05:11:34 sam Exp $
64  */
65 
66 #include "opt_ipsec.h"
67 #include "opt_inet6.h"
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/malloc.h>
73 #include <sys/mbuf.h>
74 #include <sys/domain.h>
75 #include <sys/proc.h>
76 #include <sys/priv.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/sysctl.h>
81 #include <sys/syslog.h>
82 #include <sys/in_cksum.h>
83 #include <sys/ktr.h>
84 
85 #include <sys/thread2.h>
86 #include <sys/socketvar2.h>
87 #include <sys/serialize.h>
88 
89 #include <machine/stdarg.h>
90 
91 #include <net/if.h>
92 #include <net/route.h>
93 #include <net/netmsg2.h>
94 #include <net/netisr2.h>
95 
96 #include <netinet/in.h>
97 #include <netinet/in_systm.h>
98 #include <netinet/ip.h>
99 #ifdef INET6
100 #include <netinet/ip6.h>
101 #endif
102 #include <netinet/in_pcb.h>
103 #include <netinet/in_var.h>
104 #include <netinet/ip_var.h>
105 #ifdef INET6
106 #include <netinet6/ip6_var.h>
107 #endif
108 #include <netinet/ip_icmp.h>
109 #include <netinet/icmp_var.h>
110 #include <netinet/udp.h>
111 #include <netinet/udp_var.h>
112 
113 #ifdef FAST_IPSEC
114 #include <netproto/ipsec/ipsec.h>
115 #endif
116 
117 #ifdef IPSEC
118 #include <netinet6/ipsec.h>
119 #endif
120 
121 #define UDP_KTR_STRING		"inp=%p"
122 #define UDP_KTR_ARGS		struct inpcb *inp
123 
124 #ifndef KTR_UDP
125 #define KTR_UDP			KTR_ALL
126 #endif
127 
128 KTR_INFO_MASTER(udp);
129 KTR_INFO(KTR_UDP, udp, send_beg, 0, UDP_KTR_STRING, UDP_KTR_ARGS);
130 KTR_INFO(KTR_UDP, udp, send_end, 1, UDP_KTR_STRING, UDP_KTR_ARGS);
131 KTR_INFO(KTR_UDP, udp, send_ipout, 2, UDP_KTR_STRING, UDP_KTR_ARGS);
132 KTR_INFO(KTR_UDP, udp, redisp_ipout_beg, 3, UDP_KTR_STRING, UDP_KTR_ARGS);
133 KTR_INFO(KTR_UDP, udp, redisp_ipout_end, 4, UDP_KTR_STRING, UDP_KTR_ARGS);
134 KTR_INFO(KTR_UDP, udp, send_redisp, 5, UDP_KTR_STRING, UDP_KTR_ARGS);
135 
136 #define logudp(name, inp)	KTR_LOG(udp_##name, inp)
137 
138 /*
139  * UDP protocol implementation.
140  * Per RFC 768, August, 1980.
141  */
142 #ifndef	COMPAT_42
143 static int	udpcksum = 1;
144 #else
145 static int	udpcksum = 0;		/* XXX */
146 #endif
147 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW,
148     &udpcksum, 0, "Enable checksumming of UDP packets");
149 
150 int	log_in_vain = 0;
151 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
152     &log_in_vain, 0, "Log all incoming UDP packets");
153 
154 static int	blackhole = 0;
155 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW,
156 	&blackhole, 0, "Do not send port unreachables for refused connects");
157 
158 static int	strict_mcast_mship = 1;
159 SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW,
160 	&strict_mcast_mship, 0, "Only send multicast to member sockets");
161 
162 int	udp_sosend_async = 1;
163 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_async, CTLFLAG_RW,
164 	&udp_sosend_async, 0, "UDP asynchronized pru_send");
165 
166 int	udp_sosend_prepend = 1;
167 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_prepend, CTLFLAG_RW,
168 	&udp_sosend_prepend, 0,
169 	"Prepend enough space for proto and link header in pru_send");
170 
171 static int udp_reuseport_ext = 1;
172 SYSCTL_INT(_net_inet_udp, OID_AUTO, reuseport_ext, CTLFLAG_RW,
173 	&udp_reuseport_ext, 0, "SO_REUSEPORT extension");
174 
175 struct	inpcbinfo udbinfo;
176 struct	inpcbportinfo udbportinfo;
177 
178 static struct netisr_barrier *udbinfo_br;
179 static struct lwkt_serialize udbinfo_slize = LWKT_SERIALIZE_INITIALIZER;
180 
181 #ifndef UDBHASHSIZE
182 #define UDBHASHSIZE 16
183 #endif
184 
185 struct	udpstat udpstat_percpu[MAXCPU] __cachealign;
186 
187 #ifdef INET6
188 struct udp_in6 {
189 	struct sockaddr_in6	uin6_sin;
190 	u_char			uin6_init_done : 1;
191 };
192 struct udp_ip6 {
193 	struct ip6_hdr		uip6_ip6;
194 	u_char			uip6_init_done : 1;
195 };
196 #else
197 struct udp_in6;
198 struct udp_ip6;
199 #endif /* INET6 */
200 
201 static void udp_append (struct inpcb *last, struct ip *ip,
202     struct mbuf *n, int off, struct sockaddr_in *udp_in,
203     struct udp_in6 *, struct udp_ip6 *);
204 #ifdef INET6
205 static void ip_2_ip6_hdr (struct ip6_hdr *ip6, struct ip *ip);
206 #endif
207 
208 static int udp_connect_oncpu(struct socket *so, struct thread *td,
209 			struct sockaddr_in *sin, struct sockaddr_in *if_sin);
210 
211 void
212 udp_init(void)
213 {
214 	int cpu;
215 
216 	in_pcbinfo_init(&udbinfo);
217 	in_pcbportinfo_init(&udbportinfo, UDBHASHSIZE, FALSE, 0);
218 
219 	udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask);
220 	udbinfo.portinfo = &udbportinfo;
221 	udbinfo.wildcardhashbase = hashinit(UDBHASHSIZE, M_PCB,
222 					    &udbinfo.wildcardhashmask);
223 	udbinfo.localgrphashbase = hashinit(UDBHASHSIZE, M_PCB,
224 					    &udbinfo.localgrphashmask);
225 	udbinfo.ipi_size = sizeof(struct inpcb);
226 
227 	udbinfo_br = netisr_barrier_create();
228 
229 	/*
230 	 * Initialize UDP statistics counters for each CPU.
231 	 */
232 	for (cpu = 0; cpu < ncpus; ++cpu)
233 		bzero(&udpstat_percpu[cpu], sizeof(struct udpstat));
234 }
235 
236 static int
237 sysctl_udpstat(SYSCTL_HANDLER_ARGS)
238 {
239 	int cpu, error = 0;
240 
241 	for (cpu = 0; cpu < ncpus; ++cpu) {
242 		if ((error = SYSCTL_OUT(req, &udpstat_percpu[cpu],
243 					sizeof(struct udpstat))))
244 			break;
245 		if ((error = SYSCTL_IN(req, &udpstat_percpu[cpu],
246 				       sizeof(struct udpstat))))
247 			break;
248 	}
249 
250 	return (error);
251 }
252 SYSCTL_PROC(_net_inet_udp, UDPCTL_STATS, stats, (CTLTYPE_OPAQUE | CTLFLAG_RW),
253     0, 0, sysctl_udpstat, "S,udpstat", "UDP statistics");
254 
255 /*
256  * Check multicast packets to make sure they are only sent to sockets with
257  * multicast memberships for the packet's destination address and arrival
258  * interface.  Multicast packets to multicast-unaware sockets are also
259  * disallowed.
260  *
261  * Returns 0 if the packet is acceptable, -1 if it is not.
262  */
263 static __inline int
264 check_multicast_membership(struct ip *ip, struct inpcb *inp, struct mbuf *m)
265 {
266 	int mshipno;
267 	struct ip_moptions *mopt;
268 
269 	if (strict_mcast_mship == 0 ||
270 	    !IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
271 		return (0);
272 	}
273 	mopt = inp->inp_moptions;
274 	if (mopt == NULL)
275 		return (-1);
276 	for (mshipno = 0; mshipno < mopt->imo_num_memberships; ++mshipno) {
277 		struct in_multi *maddr = mopt->imo_membership[mshipno];
278 
279 		if (ip->ip_dst.s_addr == maddr->inm_addr.s_addr &&
280 		    m->m_pkthdr.rcvif == maddr->inm_ifp) {
281 			return (0);
282 		}
283 	}
284 	return (-1);
285 }
286 
287 int
288 udp_input(struct mbuf **mp, int *offp, int proto)
289 {
290 	struct sockaddr_in udp_in = { sizeof udp_in, AF_INET };
291 #ifdef INET6
292 	struct udp_in6 udp_in6 = {
293 		{ sizeof udp_in6.uin6_sin, AF_INET6 }, 0
294 	};
295 	struct udp_ip6 udp_ip6;
296 #endif
297 
298 	int iphlen;
299 	struct ip *ip;
300 	struct udphdr *uh;
301 	struct inpcb *inp;
302 	struct mbuf *m;
303 	struct mbuf *opts = NULL;
304 	int len, off;
305 	struct ip save_ip;
306 	struct sockaddr *append_sa;
307 
308 	off = *offp;
309 	m = *mp;
310 	*mp = NULL;
311 
312 	iphlen = off;
313 	udp_stat.udps_ipackets++;
314 
315 	/*
316 	 * Strip IP options, if any; should skip this,
317 	 * make available to user, and use on returned packets,
318 	 * but we don't yet have a way to check the checksum
319 	 * with options still present.
320 	 */
321 	if (iphlen > sizeof(struct ip)) {
322 		ip_stripoptions(m);
323 		iphlen = sizeof(struct ip);
324 	}
325 
326 	/*
327 	 * IP and UDP headers are together in first mbuf.
328 	 * Already checked and pulled up in ip_demux().
329 	 */
330 	KASSERT(m->m_len >= iphlen + sizeof(struct udphdr),
331 	    ("UDP header not in one mbuf"));
332 
333 	ip = mtod(m, struct ip *);
334 	uh = (struct udphdr *)((caddr_t)ip + iphlen);
335 
336 	/* destination port of 0 is illegal, based on RFC768. */
337 	if (uh->uh_dport == 0)
338 		goto bad;
339 
340 	/*
341 	 * Make mbuf data length reflect UDP length.
342 	 * If not enough data to reflect UDP length, drop.
343 	 */
344 	len = ntohs((u_short)uh->uh_ulen);
345 	if (ip->ip_len != len) {
346 		if (len > ip->ip_len || len < sizeof(struct udphdr)) {
347 			udp_stat.udps_badlen++;
348 			goto bad;
349 		}
350 		m_adj(m, len - ip->ip_len);
351 		/* ip->ip_len = len; */
352 	}
353 	/*
354 	 * Save a copy of the IP header in case we want restore it
355 	 * for sending an ICMP error message in response.
356 	 */
357 	save_ip = *ip;
358 
359 	/*
360 	 * Checksum extended UDP header and data.
361 	 */
362 	if (uh->uh_sum) {
363 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
364 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
365 				uh->uh_sum = m->m_pkthdr.csum_data;
366 			else
367 				uh->uh_sum = in_pseudo(ip->ip_src.s_addr,
368 				    ip->ip_dst.s_addr, htonl((u_short)len +
369 				    m->m_pkthdr.csum_data + IPPROTO_UDP));
370 			uh->uh_sum ^= 0xffff;
371 		} else {
372 			char b[9];
373 
374 			bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
375 			bzero(((struct ipovly *)ip)->ih_x1, 9);
376 			((struct ipovly *)ip)->ih_len = uh->uh_ulen;
377 			uh->uh_sum = in_cksum(m, len + sizeof(struct ip));
378 			bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
379 		}
380 		if (uh->uh_sum) {
381 			udp_stat.udps_badsum++;
382 			m_freem(m);
383 			return(IPPROTO_DONE);
384 		}
385 	} else
386 		udp_stat.udps_nosum++;
387 
388 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
389 	    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
390 		struct inpcb *last;
391 
392 		/*
393 		 * Deliver a multicast or broadcast datagram to *all* sockets
394 		 * for which the local and remote addresses and ports match
395 		 * those of the incoming datagram.  This allows more than
396 		 * one process to receive multi/broadcasts on the same port.
397 		 * (This really ought to be done for unicast datagrams as
398 		 * well, but that would cause problems with existing
399 		 * applications that open both address-specific sockets and
400 		 * a wildcard socket listening to the same port -- they would
401 		 * end up receiving duplicates of every unicast datagram.
402 		 * Those applications open the multiple sockets to overcome an
403 		 * inadequacy of the UDP socket interface, but for backwards
404 		 * compatibility we avoid the problem here rather than
405 		 * fixing the interface.  Maybe 4.5BSD will remedy this?)
406 		 */
407 
408 		/*
409 		 * Construct sockaddr format source address.
410 		 */
411 		udp_in.sin_port = uh->uh_sport;
412 		udp_in.sin_addr = ip->ip_src;
413 		/*
414 		 * Locate pcb(s) for datagram.
415 		 * (Algorithm copied from raw_intr().)
416 		 */
417 		last = NULL;
418 #ifdef INET6
419 		udp_in6.uin6_init_done = udp_ip6.uip6_init_done = 0;
420 #endif
421 		LIST_FOREACH(inp, &udbinfo.pcblisthead, inp_list) {
422 			KKASSERT((inp->inp_flags & INP_PLACEMARKER) == 0);
423 #ifdef INET6
424 			if (!(inp->inp_vflag & INP_IPV4))
425 				continue;
426 #endif
427 			if (inp->inp_lport != uh->uh_dport)
428 				continue;
429 			if (inp->inp_laddr.s_addr != INADDR_ANY) {
430 				if (inp->inp_laddr.s_addr !=
431 				    ip->ip_dst.s_addr)
432 					continue;
433 			}
434 			if (inp->inp_faddr.s_addr != INADDR_ANY) {
435 				if (inp->inp_faddr.s_addr !=
436 				    ip->ip_src.s_addr ||
437 				    inp->inp_fport != uh->uh_sport)
438 					continue;
439 			}
440 
441 			if (check_multicast_membership(ip, inp, m) < 0)
442 				continue;
443 
444 			if (last != NULL) {
445 				struct mbuf *n;
446 
447 #ifdef IPSEC
448 				/* check AH/ESP integrity. */
449 				if (ipsec4_in_reject_so(m, last->inp_socket))
450 					ipsecstat.in_polvio++;
451 					/* do not inject data to pcb */
452 				else
453 #endif /*IPSEC*/
454 #ifdef FAST_IPSEC
455 				/* check AH/ESP integrity. */
456 				if (ipsec4_in_reject(m, last))
457 					;
458 				else
459 #endif /*FAST_IPSEC*/
460 				if ((n = m_copypacket(m, MB_DONTWAIT)) != NULL)
461 					udp_append(last, ip, n,
462 					    iphlen + sizeof(struct udphdr),
463 					    &udp_in,
464 #ifdef INET6
465 					    &udp_in6, &udp_ip6
466 #else
467 				            NULL, NULL
468 #endif
469 					    );
470 			}
471 			last = inp;
472 			/*
473 			 * Don't look for additional matches if this one does
474 			 * not have either the SO_REUSEPORT or SO_REUSEADDR
475 			 * socket options set.  This heuristic avoids searching
476 			 * through all pcbs in the common case of a non-shared
477 			 * port.  It * assumes that an application will never
478 			 * clear these options after setting them.
479 			 */
480 			if (!(last->inp_socket->so_options &
481 			    (SO_REUSEPORT | SO_REUSEADDR)))
482 				break;
483 		}
484 
485 		if (last == NULL) {
486 			/*
487 			 * No matching pcb found; discard datagram.
488 			 * (No need to send an ICMP Port Unreachable
489 			 * for a broadcast or multicast datgram.)
490 			 */
491 			udp_stat.udps_noportbcast++;
492 			goto bad;
493 		}
494 #ifdef IPSEC
495 		/* check AH/ESP integrity. */
496 		if (ipsec4_in_reject_so(m, last->inp_socket)) {
497 			ipsecstat.in_polvio++;
498 			goto bad;
499 		}
500 #endif /*IPSEC*/
501 #ifdef FAST_IPSEC
502 		/* check AH/ESP integrity. */
503 		if (ipsec4_in_reject(m, last))
504 			goto bad;
505 #endif /*FAST_IPSEC*/
506 		udp_append(last, ip, m, iphlen + sizeof(struct udphdr),
507 		    &udp_in,
508 #ifdef INET6
509 		    &udp_in6, &udp_ip6
510 #else
511 		    NULL, NULL
512 #endif
513 		    );
514 		return(IPPROTO_DONE);
515 	}
516 	/*
517 	 * Locate pcb for datagram.
518 	 */
519 	inp = in_pcblookup_pkthash(&udbinfo, ip->ip_src, uh->uh_sport,
520 	    ip->ip_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif,
521 	    udp_reuseport_ext ? m : NULL);
522 	if (inp == NULL) {
523 		if (log_in_vain) {
524 			char buf[sizeof "aaa.bbb.ccc.ddd"];
525 
526 			strcpy(buf, inet_ntoa(ip->ip_dst));
527 			log(LOG_INFO,
528 			    "Connection attempt to UDP %s:%d from %s:%d\n",
529 			    buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src),
530 			    ntohs(uh->uh_sport));
531 		}
532 		udp_stat.udps_noport++;
533 		if (m->m_flags & (M_BCAST | M_MCAST)) {
534 			udp_stat.udps_noportbcast++;
535 			goto bad;
536 		}
537 		if (blackhole)
538 			goto bad;
539 #ifdef ICMP_BANDLIM
540 		if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
541 			goto bad;
542 #endif
543 		*ip = save_ip;
544 		ip->ip_len += iphlen;
545 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
546 		return(IPPROTO_DONE);
547 	}
548 #ifdef IPSEC
549 	if (ipsec4_in_reject_so(m, inp->inp_socket)) {
550 		ipsecstat.in_polvio++;
551 		goto bad;
552 	}
553 #endif /*IPSEC*/
554 #ifdef FAST_IPSEC
555 	if (ipsec4_in_reject(m, inp))
556 		goto bad;
557 #endif /*FAST_IPSEC*/
558 	/*
559 	 * Check the minimum TTL for socket.
560 	 */
561 	if (ip->ip_ttl < inp->inp_ip_minttl)
562 		goto bad;
563 
564 	/*
565 	 * Construct sockaddr format source address.
566 	 * Stuff source address and datagram in user buffer.
567 	 */
568 	udp_in.sin_port = uh->uh_sport;
569 	udp_in.sin_addr = ip->ip_src;
570 	if ((inp->inp_flags & INP_CONTROLOPTS) ||
571 	    (inp->inp_socket->so_options & SO_TIMESTAMP)) {
572 #ifdef INET6
573 		if (inp->inp_vflag & INP_IPV6) {
574 			int savedflags;
575 
576 			ip_2_ip6_hdr(&udp_ip6.uip6_ip6, ip);
577 			savedflags = inp->inp_flags;
578 			inp->inp_flags &= ~INP_UNMAPPABLEOPTS;
579 			ip6_savecontrol(inp, &opts, &udp_ip6.uip6_ip6, m);
580 			inp->inp_flags = savedflags;
581 		} else
582 #endif
583 		ip_savecontrol(inp, &opts, ip, m);
584 	}
585 	m_adj(m, iphlen + sizeof(struct udphdr));
586 #ifdef INET6
587 	if (inp->inp_vflag & INP_IPV6) {
588 		in6_sin_2_v4mapsin6(&udp_in, &udp_in6.uin6_sin);
589 		append_sa = (struct sockaddr *)&udp_in6;
590 	} else
591 #endif
592 		append_sa = (struct sockaddr *)&udp_in;
593 
594 	lwkt_gettoken(&inp->inp_socket->so_rcv.ssb_token);
595 	if (ssb_appendaddr(&inp->inp_socket->so_rcv, append_sa, m, opts) == 0) {
596 		udp_stat.udps_fullsock++;
597 		lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token);
598 		goto bad;
599 	}
600 	lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token);
601 	sorwakeup(inp->inp_socket);
602 	return(IPPROTO_DONE);
603 bad:
604 	m_freem(m);
605 	if (opts)
606 		m_freem(opts);
607 	return(IPPROTO_DONE);
608 }
609 
610 #ifdef INET6
611 static void
612 ip_2_ip6_hdr(struct ip6_hdr *ip6, struct ip *ip)
613 {
614 	bzero(ip6, sizeof *ip6);
615 
616 	ip6->ip6_vfc = IPV6_VERSION;
617 	ip6->ip6_plen = ip->ip_len;
618 	ip6->ip6_nxt = ip->ip_p;
619 	ip6->ip6_hlim = ip->ip_ttl;
620 	ip6->ip6_src.s6_addr32[2] = ip6->ip6_dst.s6_addr32[2] =
621 		IPV6_ADDR_INT32_SMP;
622 	ip6->ip6_src.s6_addr32[3] = ip->ip_src.s_addr;
623 	ip6->ip6_dst.s6_addr32[3] = ip->ip_dst.s_addr;
624 }
625 #endif
626 
627 /*
628  * subroutine of udp_input(), mainly for source code readability.
629  * caller must properly init udp_ip6 and udp_in6 beforehand.
630  */
631 static void
632 udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off,
633     struct sockaddr_in *udp_in,
634     struct udp_in6 *udp_in6, struct udp_ip6 *udp_ip6)
635 {
636 	struct sockaddr *append_sa;
637 	struct mbuf *opts = NULL;
638 
639 	if (last->inp_flags & INP_CONTROLOPTS ||
640 	    last->inp_socket->so_options & SO_TIMESTAMP) {
641 #ifdef INET6
642 		if (last->inp_vflag & INP_IPV6) {
643 			int savedflags;
644 
645 			if (udp_ip6->uip6_init_done == 0) {
646 				ip_2_ip6_hdr(&udp_ip6->uip6_ip6, ip);
647 				udp_ip6->uip6_init_done = 1;
648 			}
649 			savedflags = last->inp_flags;
650 			last->inp_flags &= ~INP_UNMAPPABLEOPTS;
651 			ip6_savecontrol(last, &opts, &udp_ip6->uip6_ip6, n);
652 			last->inp_flags = savedflags;
653 		} else
654 #endif
655 		ip_savecontrol(last, &opts, ip, n);
656 	}
657 #ifdef INET6
658 	if (last->inp_vflag & INP_IPV6) {
659 		if (udp_in6->uin6_init_done == 0) {
660 			in6_sin_2_v4mapsin6(udp_in, &udp_in6->uin6_sin);
661 			udp_in6->uin6_init_done = 1;
662 		}
663 		append_sa = (struct sockaddr *)&udp_in6->uin6_sin;
664 	} else
665 #endif
666 		append_sa = (struct sockaddr *)udp_in;
667 	m_adj(n, off);
668 	lwkt_gettoken(&last->inp_socket->so_rcv.ssb_token);
669 	if (ssb_appendaddr(&last->inp_socket->so_rcv, append_sa, n, opts) == 0) {
670 		m_freem(n);
671 		if (opts)
672 			m_freem(opts);
673 		udp_stat.udps_fullsock++;
674 	} else {
675 		sorwakeup(last->inp_socket);
676 	}
677 	lwkt_reltoken(&last->inp_socket->so_rcv.ssb_token);
678 }
679 
680 /*
681  * Notify a udp user of an asynchronous error;
682  * just wake up so that he can collect error status.
683  */
684 void
685 udp_notify(struct inpcb *inp, int error)
686 {
687 	inp->inp_socket->so_error = error;
688 	sorwakeup(inp->inp_socket);
689 	sowwakeup(inp->inp_socket);
690 }
691 
692 struct netmsg_udp_notify {
693 	struct netmsg_base base;
694 	void		(*nm_notify)(struct inpcb *, int);
695 	struct in_addr	nm_faddr;
696 	int		nm_arg;
697 };
698 
699 static void
700 udp_notifyall_oncpu(netmsg_t msg)
701 {
702 	struct netmsg_udp_notify *nm = (struct netmsg_udp_notify *)msg;
703 #if 0
704 	int nextcpu;
705 #endif
706 
707 	in_pcbnotifyall(&udbinfo.pcblisthead, nm->nm_faddr,
708 			nm->nm_arg, nm->nm_notify);
709 	lwkt_replymsg(&nm->base.lmsg, 0);
710 
711 #if 0
712 	/* XXX currently udp only runs on cpu 0 */
713 	nextcpu = mycpuid + 1;
714 	if (nextcpu < ncpus2)
715 		lwkt_forwardmsg(netisr_cpuport(nextcpu), &nm->base.lmsg);
716 	else
717 		lwkt_replymsg(&nmsg->base.lmsg, 0);
718 #endif
719 }
720 
721 static void
722 udp_rtchange(struct inpcb *inp, int err)
723 {
724 	/* XXX Nuke this, once UDP inpcbs are CPU localized */
725 	if (inp->inp_route.ro_rt && inp->inp_route.ro_rt->rt_cpuid == mycpuid) {
726 		rtfree(inp->inp_route.ro_rt);
727 		inp->inp_route.ro_rt = NULL;
728 		/*
729 		 * A new route can be allocated the next time
730 		 * output is attempted.
731 		 */
732 	}
733 }
734 
735 void
736 udp_ctlinput(netmsg_t msg)
737 {
738 	struct sockaddr *sa = msg->ctlinput.nm_arg;
739 	struct ip *ip = msg->ctlinput.nm_extra;
740 	int cmd = msg->ctlinput.nm_cmd;
741 	struct udphdr *uh;
742 	void (*notify) (struct inpcb *, int) = udp_notify;
743 	struct in_addr faddr;
744 	struct inpcb *inp;
745 
746 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
747 
748 	faddr = ((struct sockaddr_in *)sa)->sin_addr;
749 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
750 		goto done;
751 
752 	if (PRC_IS_REDIRECT(cmd)) {
753 		ip = NULL;
754 		notify = udp_rtchange;
755 	} else if (cmd == PRC_HOSTDEAD) {
756 		ip = NULL;
757 	} else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) {
758 		goto done;
759 	}
760 
761 	if (ip) {
762 		uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
763 		inp = in_pcblookup_hash(&udbinfo, faddr, uh->uh_dport,
764 					ip->ip_src, uh->uh_sport, 0, NULL);
765 		if (inp != NULL && inp->inp_socket != NULL)
766 			(*notify)(inp, inetctlerrmap[cmd]);
767 	} else if (PRC_IS_REDIRECT(cmd)) {
768 		struct netmsg_udp_notify *nm;
769 
770 		KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
771 		nm = kmalloc(sizeof(*nm), M_LWKTMSG, M_INTWAIT);
772 		netmsg_init(&nm->base, NULL, &netisr_afree_rport,
773 			    0, udp_notifyall_oncpu);
774 		nm->nm_faddr = faddr;
775 		nm->nm_arg = inetctlerrmap[cmd];
776 		nm->nm_notify = notify;
777 		lwkt_sendmsg(netisr_cpuport(0), &nm->base.lmsg);
778 	} else {
779 		/*
780 		 * XXX We should forward msg upon PRC_HOSTHEAD and ip == NULL,
781 		 * once UDP inpcbs are CPU localized
782 		 */
783 		KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
784 		in_pcbnotifyall(&udbinfo.pcblisthead, faddr, inetctlerrmap[cmd],
785 				notify);
786 	}
787 done:
788 	lwkt_replymsg(&msg->lmsg, 0);
789 }
790 
791 static int
792 udp_pcblist(SYSCTL_HANDLER_ARGS)
793 {
794 	struct xinpcb *xi;
795 	int error, nxi, i;
796 
797 	udbinfo_lock();
798 	error = in_pcblist_global_nomarker(oidp, arg1, arg2, req, &xi, &nxi);
799 	udbinfo_unlock();
800 
801 	if (error) {
802 		KKASSERT(xi == NULL);
803 		return error;
804 	}
805 	if (nxi == 0) {
806 		KKASSERT(xi == NULL);
807 		return 0;
808 	}
809 
810 	for (i = 0; i < nxi; ++i) {
811 		error = SYSCTL_OUT(req, &xi[i], sizeof(xi[i]));
812 		if (error)
813 			break;
814 	}
815 	kfree(xi, M_TEMP);
816 
817 	return error;
818 }
819 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, &udbinfo, 0,
820 	    udp_pcblist, "S,xinpcb", "List of active UDP sockets");
821 
822 static int
823 udp_getcred(SYSCTL_HANDLER_ARGS)
824 {
825 	struct sockaddr_in addrs[2];
826 	struct ucred cred0, *cred = NULL;
827 	struct inpcb *inp;
828 	int error;
829 
830 	error = priv_check(req->td, PRIV_ROOT);
831 	if (error)
832 		return (error);
833 	error = SYSCTL_IN(req, addrs, sizeof addrs);
834 	if (error)
835 		return (error);
836 
837 	udbinfo_lock();
838 	inp = in_pcblookup_hash(&udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
839 				addrs[0].sin_addr, addrs[0].sin_port, 1, NULL);
840 	if (inp == NULL || inp->inp_socket == NULL) {
841 		error = ENOENT;
842 	} else {
843 		if (inp->inp_socket->so_cred != NULL) {
844 			cred0 = *(inp->inp_socket->so_cred);
845 			cred = &cred0;
846 		}
847 	}
848 	udbinfo_unlock();
849 
850 	if (error)
851 		return error;
852 
853 	return SYSCTL_OUT(req, cred, sizeof(struct ucred));
854 }
855 
856 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW,
857     0, 0, udp_getcred, "S,ucred", "Get the ucred of a UDP connection");
858 
859 static void
860 udp_send_redispatch(netmsg_t msg)
861 {
862 	struct mbuf *m = msg->send.nm_m;
863 	int pru_flags = msg->send.nm_flags;
864 	struct inpcb *inp = msg->send.base.nm_so->so_pcb;
865 	struct mbuf *m_opt = msg->send.nm_control; /* XXX save ipopt */
866 	int flags = msg->send.nm_priv; /* ip_output flags */
867 	int error;
868 
869 	logudp(redisp_ipout_beg, inp);
870 
871 	/*
872 	 * - Don't use inp route cache.  It should only be used in the
873 	 *   inp owner netisr.
874 	 * - Access to inp_moptions should be safe, since multicast UDP
875 	 *   datagrams are redispatched to netisr0 and inp_moptions is
876 	 *   changed only in netisr0.
877 	 */
878 	error = ip_output(m, m_opt, NULL, flags, inp->inp_moptions, inp);
879 	if ((pru_flags & PRUS_NOREPLY) == 0)
880 		lwkt_replymsg(&msg->send.base.lmsg, error);
881 
882 	if (m_opt != NULL) {
883 		/* Free saved ip options, if any */
884 		m_freem(m_opt);
885 	}
886 
887 	logudp(redisp_ipout_end, inp);
888 }
889 
890 static void
891 udp_send(netmsg_t msg)
892 {
893 	struct socket *so = msg->send.base.nm_so;
894 	struct mbuf *m = msg->send.nm_m;
895 	struct sockaddr *dstaddr = msg->send.nm_addr;
896 	int pru_flags = msg->send.nm_flags;
897 	struct inpcb *inp = so->so_pcb;
898 	struct thread *td = msg->send.nm_td;
899 	int flags;
900 
901 	struct udpiphdr *ui;
902 	int len = m->m_pkthdr.len;
903 	struct sockaddr_in *sin;	/* really is initialized before use */
904 	int error = 0, cpu;
905 
906 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
907 	KKASSERT(msg->send.nm_control == NULL);
908 
909 	logudp(send_beg, inp);
910 
911 	if (inp == NULL) {
912 		error = EINVAL;
913 		goto release;
914 	}
915 
916 	if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
917 		error = EMSGSIZE;
918 		goto release;
919 	}
920 
921 	if (inp->inp_lport == 0) {	/* unbound socket */
922 		error = in_pcbbind(inp, NULL, td);
923 		if (error)
924 			goto release;
925 
926 		udbinfo_barrier_set();
927 		in_pcbinswildcardhash(inp);
928 		udbinfo_barrier_rem();
929 	}
930 
931 	if (dstaddr != NULL) {		/* destination address specified */
932 		if (inp->inp_faddr.s_addr != INADDR_ANY) {
933 			/* already connected */
934 			error = EISCONN;
935 			goto release;
936 		}
937 		sin = (struct sockaddr_in *)dstaddr;
938 		if (!prison_remote_ip(td, (struct sockaddr *)&sin)) {
939 			error = EAFNOSUPPORT; /* IPv6 only jail */
940 			goto release;
941 		}
942 	} else {
943 		if (inp->inp_faddr.s_addr == INADDR_ANY) {
944 			/* no destination specified and not already connected */
945 			error = ENOTCONN;
946 			goto release;
947 		}
948 		sin = NULL;
949 	}
950 
951 	/*
952 	 * Calculate data length and get a mbuf
953 	 * for UDP and IP headers.
954 	 */
955 	M_PREPEND(m, sizeof(struct udpiphdr), MB_DONTWAIT);
956 	if (m == NULL) {
957 		error = ENOBUFS;
958 		goto release;
959 	}
960 
961 	/*
962 	 * Fill in mbuf with extended UDP header
963 	 * and addresses and length put into network format.
964 	 */
965 	ui = mtod(m, struct udpiphdr *);
966 	bzero(ui->ui_x1, sizeof ui->ui_x1);	/* XXX still needed? */
967 	ui->ui_pr = IPPROTO_UDP;
968 
969 	/*
970 	 * Set destination address.
971 	 */
972 	if (dstaddr != NULL) {			/* use specified destination */
973 		ui->ui_dst = sin->sin_addr;
974 		ui->ui_dport = sin->sin_port;
975 	} else {				/* use connected destination */
976 		ui->ui_dst = inp->inp_faddr;
977 		ui->ui_dport = inp->inp_fport;
978 	}
979 
980 	/*
981 	 * Set source address.
982 	 */
983 	if (inp->inp_laddr.s_addr == INADDR_ANY ||
984 	    IN_MULTICAST(ntohl(inp->inp_laddr.s_addr))) {
985 		struct sockaddr_in *if_sin;
986 
987 		if (dstaddr == NULL) {
988 			/*
989 			 * connect() had (or should have) failed because
990 			 * the interface had no IP address, but the
991 			 * application proceeded to call send() anyways.
992 			 */
993 			error = ENOTCONN;
994 			goto release;
995 		}
996 
997 		/* Look up outgoing interface. */
998 		error = in_pcbladdr_find(inp, dstaddr, &if_sin, td, 1);
999 		if (error)
1000 			goto release;
1001 		ui->ui_src = if_sin->sin_addr;	/* use address of interface */
1002 	} else {
1003 		ui->ui_src = inp->inp_laddr;	/* use non-null bound address */
1004 	}
1005 	ui->ui_sport = inp->inp_lport;
1006 	KASSERT(inp->inp_lport != 0, ("inp lport should have been bound"));
1007 
1008 	/*
1009 	 * Release the original thread, since it is no longer used
1010 	 */
1011 	if (pru_flags & PRUS_HELDTD) {
1012 		lwkt_rele(td);
1013 		pru_flags &= ~PRUS_HELDTD;
1014 	}
1015 	/*
1016 	 * Free the dest address, since it is no longer needed
1017 	 */
1018 	if (pru_flags & PRUS_FREEADDR) {
1019 		kfree(dstaddr, M_SONAME);
1020 		pru_flags &= ~PRUS_FREEADDR;
1021 	}
1022 
1023 	ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
1024 
1025 	/*
1026 	 * Set up checksum and output datagram.
1027 	 */
1028 	if (udpcksum) {
1029 		ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr,
1030 		    htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP));
1031 		m->m_pkthdr.csum_flags = CSUM_UDP;
1032 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
1033 		m->m_pkthdr.csum_thlen = sizeof(struct udphdr);
1034 	} else {
1035 		ui->ui_sum = 0;
1036 	}
1037 	((struct ip *)ui)->ip_len = sizeof(struct udpiphdr) + len;
1038 	((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;	/* XXX */
1039 	((struct ip *)ui)->ip_tos = inp->inp_ip_tos;	/* XXX */
1040 	udp_stat.udps_opackets++;
1041 
1042 	flags = IP_DEBUGROUTE |
1043 	    (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST));
1044 	if (pru_flags & PRUS_DONTROUTE)
1045 		flags |= SO_DONTROUTE;
1046 
1047 	cpu = udp_addrcpu_pkt(ui->ui_dst.s_addr, ui->ui_dport,
1048 	    ui->ui_src.s_addr, ui->ui_sport);
1049 	if (cpu != mycpuid) {
1050 		struct mbuf *m_opt = NULL;
1051 		struct netmsg_pru_send *smsg;
1052 		struct lwkt_port *port = netisr_cpuport(cpu);
1053 
1054 		/*
1055 		 * Not on the CPU that matches this UDP datagram hash;
1056 		 * redispatch to the correct CPU to do the ip_output().
1057 		 */
1058 		if (inp->inp_options != NULL) {
1059 			/*
1060 			 * If there are ip options, then save a copy,
1061 			 * since accessing inp_options on other CPUs'
1062 			 * is not safe.
1063 			 *
1064 			 * XXX optimize this?
1065 			 */
1066 			m_opt = m_copym(inp->inp_options, 0, M_COPYALL,
1067 			    MB_WAIT);
1068 		}
1069 		if ((pru_flags & PRUS_NOREPLY) == 0) {
1070 			/*
1071 			 * Change some parts of the original netmsg and
1072 			 * forward it to the target netisr.
1073 			 *
1074 			 * NOTE: so_port MUST NOT be checked in the target
1075 			 * netisr.
1076 			 */
1077 			smsg = &msg->send;
1078 			smsg->nm_priv = flags; /* ip_output flags */
1079 			smsg->nm_m = m;
1080 			smsg->nm_control = m_opt; /* XXX save ipopt */
1081 			smsg->base.lmsg.ms_flags |= MSGF_IGNSOPORT;
1082 			smsg->base.nm_dispatch = udp_send_redispatch;
1083 			lwkt_forwardmsg(port, &smsg->base.lmsg);
1084 		} else {
1085 			/*
1086 			 * Recreate the netmsg, since the original mbuf
1087 			 * could have been changed.  And send it to the
1088 			 * target netisr.
1089 			 *
1090 			 * NOTE: so_port MUST NOT be checked in the target
1091 			 * netisr.
1092 			 */
1093 			smsg = &m->m_hdr.mh_sndmsg;
1094 			netmsg_init(&smsg->base, so, &netisr_apanic_rport,
1095 			    MSGF_IGNSOPORT, udp_send_redispatch);
1096 			smsg->nm_priv = flags; /* ip_output flags */
1097 			smsg->nm_flags = pru_flags;
1098 			smsg->nm_m = m;
1099 			smsg->nm_control = m_opt; /* XXX save ipopt */
1100 			lwkt_sendmsg(port, &smsg->base.lmsg);
1101 		}
1102 
1103 		/* This UDP datagram is redispatched; done */
1104 		logudp(send_redisp, inp);
1105 		return;
1106 	}
1107 
1108 	logudp(send_ipout, inp);
1109 	error = ip_output(m, inp->inp_options, &inp->inp_route, flags,
1110 	    inp->inp_moptions, inp);
1111 	m = NULL;
1112 
1113 release:
1114 	if (m != NULL)
1115 		m_freem(m);
1116 
1117 	if (pru_flags & PRUS_HELDTD)
1118 		lwkt_rele(td);
1119 	if (pru_flags & PRUS_FREEADDR)
1120 		kfree(dstaddr, M_SONAME);
1121 	if ((pru_flags & PRUS_NOREPLY) == 0)
1122 		lwkt_replymsg(&msg->send.base.lmsg, error);
1123 
1124 	logudp(send_end, inp);
1125 }
1126 
1127 u_long	udp_sendspace = 9216;		/* really max datagram size */
1128 					/* 40 1K datagrams */
1129 SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
1130     &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
1131 
1132 u_long	udp_recvspace = 40 * (1024 +
1133 #ifdef INET6
1134 				      sizeof(struct sockaddr_in6)
1135 #else
1136 				      sizeof(struct sockaddr_in)
1137 #endif
1138 				      );
1139 SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
1140     &udp_recvspace, 0, "Maximum incoming UDP datagram size");
1141 
1142 /*
1143  * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort()
1144  *	 will sofree() it when we return.
1145  */
1146 static void
1147 udp_abort(netmsg_t msg)
1148 {
1149 	struct socket *so = msg->abort.base.nm_so;
1150 	struct inpcb *inp;
1151 	int error;
1152 
1153 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
1154 
1155 	inp = so->so_pcb;
1156 	if (inp) {
1157 		soisdisconnected(so);
1158 
1159 		udbinfo_barrier_set();
1160 		in_pcbdetach(inp);
1161 		udbinfo_barrier_rem();
1162 		error = 0;
1163 	} else {
1164 		error = EINVAL;
1165 	}
1166 	lwkt_replymsg(&msg->abort.base.lmsg, error);
1167 }
1168 
1169 static void
1170 udp_attach(netmsg_t msg)
1171 {
1172 	struct socket *so = msg->attach.base.nm_so;
1173 	struct pru_attach_info *ai = msg->attach.nm_ai;
1174 	struct inpcb *inp;
1175 	int error;
1176 
1177 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
1178 
1179 	inp = so->so_pcb;
1180 	if (inp != NULL) {
1181 		error = EINVAL;
1182 		goto out;
1183 	}
1184 	error = soreserve(so, udp_sendspace, udp_recvspace, ai->sb_rlimit);
1185 	if (error)
1186 		goto out;
1187 
1188 	udbinfo_barrier_set();
1189 	error = in_pcballoc(so, &udbinfo);
1190 	udbinfo_barrier_rem();
1191 
1192 	if (error)
1193 		goto out;
1194 
1195 	/*
1196 	 * Set default port for protocol processing prior to bind/connect.
1197 	 */
1198 	sosetport(so, netisr_cpuport(0));
1199 
1200 	inp = (struct inpcb *)so->so_pcb;
1201 	inp->inp_vflag |= INP_IPV4;
1202 	inp->inp_ip_ttl = ip_defttl;
1203 	error = 0;
1204 out:
1205 	lwkt_replymsg(&msg->attach.base.lmsg, error);
1206 }
1207 
1208 static void
1209 udp_bind(netmsg_t msg)
1210 {
1211 	struct socket *so = msg->bind.base.nm_so;
1212 	struct sockaddr *nam = msg->bind.nm_nam;
1213 	struct thread *td = msg->bind.nm_td;
1214 	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
1215 	struct inpcb *inp;
1216 	int error;
1217 
1218 	inp = so->so_pcb;
1219 	if (inp) {
1220 		error = in_pcbbind(inp, nam, td);
1221 		if (error == 0) {
1222 			if (sin->sin_addr.s_addr != INADDR_ANY)
1223 				inp->inp_flags |= INP_WASBOUND_NOTANY;
1224 
1225 			udbinfo_barrier_set();
1226 			in_pcbinswildcardhash(inp);
1227 			udbinfo_barrier_rem();
1228 		}
1229 	} else {
1230 		error = EINVAL;
1231 	}
1232 	lwkt_replymsg(&msg->bind.base.lmsg, error);
1233 }
1234 
1235 static void
1236 udp_connect(netmsg_t msg)
1237 {
1238 	struct socket *so = msg->connect.base.nm_so;
1239 	struct sockaddr *nam = msg->connect.nm_nam;
1240 	struct thread *td = msg->connect.nm_td;
1241 	struct inpcb *inp;
1242 	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
1243 	struct sockaddr_in *if_sin;
1244 	lwkt_port_t port;
1245 	int error;
1246 
1247 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
1248 
1249 	inp = so->so_pcb;
1250 	if (inp == NULL) {
1251 		error = EINVAL;
1252 		goto out;
1253 	}
1254 
1255 	if (msg->connect.nm_flags & PRUC_RECONNECT) {
1256 		panic("UDP does not support RECONNECT");
1257 #ifdef notyet
1258 		msg->connect.nm_flags &= ~PRUC_RECONNECT;
1259 		in_pcblink(inp, &udbinfo);
1260 #endif
1261 	}
1262 
1263 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
1264 		error = EISCONN;
1265 		goto out;
1266 	}
1267 	error = 0;
1268 
1269 	/*
1270 	 * Bind if we have to
1271 	 */
1272 	if (td->td_proc && td->td_proc->p_ucred->cr_prison != NULL &&
1273 	    inp->inp_laddr.s_addr == INADDR_ANY) {
1274 		error = in_pcbbind(inp, NULL, td);
1275 		if (error)
1276 			goto out;
1277 	}
1278 
1279 	/*
1280 	 * Calculate the correct protocol processing thread.  The connect
1281 	 * operation must run there.
1282 	 */
1283 	error = in_pcbladdr(inp, nam, &if_sin, td);
1284 	if (error)
1285 		goto out;
1286 	if (!prison_remote_ip(td, nam)) {
1287 		error = EAFNOSUPPORT; /* IPv6 only jail */
1288 		goto out;
1289 	}
1290 
1291 	port = udp_addrport(sin->sin_addr.s_addr, sin->sin_port,
1292 			    inp->inp_laddr.s_addr, inp->inp_lport);
1293 	if (port != &curthread->td_msgport) {
1294 #ifdef notyet
1295 		struct route *ro = &inp->inp_route;
1296 
1297 		/*
1298 		 * in_pcbladdr() may have allocated a route entry for us
1299 		 * on the current CPU, but we need a route entry on the
1300 		 * inpcb's owner CPU, so free it here.
1301 		 */
1302 		if (ro->ro_rt != NULL)
1303 			RTFREE(ro->ro_rt);
1304 		bzero(ro, sizeof(*ro));
1305 
1306 		/*
1307 		 * We are moving the protocol processing port the socket
1308 		 * is on, we have to unlink here and re-link on the
1309 		 * target cpu.
1310 		 */
1311 		in_pcbunlink(so->so_pcb, &udbinfo);
1312 		/* in_pcbunlink(so->so_pcb, &udbinfo[mycpu->gd_cpuid]); */
1313 		sosetport(so, port);
1314 		msg->connect.nm_flags |= PRUC_RECONNECT;
1315 		msg->connect.base.nm_dispatch = udp_connect;
1316 
1317 		lwkt_forwardmsg(port, &msg->connect.base.lmsg);
1318 		/* msg invalid now */
1319 		return;
1320 #else
1321 		panic("UDP activity should only be in netisr0");
1322 #endif
1323 	}
1324 	KKASSERT(port == &curthread->td_msgport);
1325 	error = udp_connect_oncpu(so, td, sin, if_sin);
1326 out:
1327 	KKASSERT(msg->connect.nm_m == NULL);
1328 	lwkt_replymsg(&msg->connect.base.lmsg, error);
1329 }
1330 
1331 static int
1332 udp_connect_oncpu(struct socket *so, struct thread *td,
1333 		  struct sockaddr_in *sin, struct sockaddr_in *if_sin)
1334 {
1335 	struct inpcb *inp;
1336 	int error;
1337 
1338 	udbinfo_barrier_set();
1339 
1340 	inp = so->so_pcb;
1341 	if (inp->inp_flags & INP_WILDCARD)
1342 		in_pcbremwildcardhash(inp);
1343 	error = in_pcbconnect(inp, (struct sockaddr *)sin, td);
1344 
1345 	if (error == 0) {
1346 		/*
1347 		 * No more errors can occur, finish adjusting the socket
1348 		 * and change the processing port to reflect the connected
1349 		 * socket.  Once set we can no longer safely mess with the
1350 		 * socket.
1351 		 */
1352 		soisconnected(so);
1353 	} else if (error == EAFNOSUPPORT) {	/* connection dissolved */
1354 		/*
1355 		 * Follow traditional BSD behavior and retain
1356 		 * the local port binding.  But, fix the old misbehavior
1357 		 * of overwriting any previously bound local address.
1358 		 */
1359 		if (!(inp->inp_flags & INP_WASBOUND_NOTANY))
1360 			inp->inp_laddr.s_addr = INADDR_ANY;
1361 		in_pcbinswildcardhash(inp);
1362 	}
1363 
1364 	udbinfo_barrier_rem();
1365 	return error;
1366 }
1367 
1368 static void
1369 udp_detach(netmsg_t msg)
1370 {
1371 	struct socket *so = msg->detach.base.nm_so;
1372 	struct inpcb *inp;
1373 	int error;
1374 
1375 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
1376 
1377 	inp = so->so_pcb;
1378 	if (inp) {
1379 		udbinfo_barrier_set();
1380 		in_pcbdetach(inp);
1381 		udbinfo_barrier_rem();
1382 		error = 0;
1383 	} else {
1384 		error = EINVAL;
1385 	}
1386 	lwkt_replymsg(&msg->detach.base.lmsg, error);
1387 }
1388 
1389 static void
1390 udp_disconnect(netmsg_t msg)
1391 {
1392 	struct socket *so = msg->disconnect.base.nm_so;
1393 	struct route *ro;
1394 	struct inpcb *inp;
1395 	int error;
1396 
1397 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
1398 
1399 	inp = so->so_pcb;
1400 	if (inp == NULL) {
1401 		error = EINVAL;
1402 		goto out;
1403 	}
1404 	if (inp->inp_faddr.s_addr == INADDR_ANY) {
1405 		error = ENOTCONN;
1406 		goto out;
1407 	}
1408 
1409 	soreference(so);
1410 
1411 	udbinfo_barrier_set();
1412 	in_pcbdisconnect(inp);
1413 	udbinfo_barrier_rem();
1414 
1415 	soclrstate(so, SS_ISCONNECTED);		/* XXX */
1416 	sofree(so);
1417 
1418 	ro = &inp->inp_route;
1419 	if (ro->ro_rt != NULL)
1420 		RTFREE(ro->ro_rt);
1421 	bzero(ro, sizeof(*ro));
1422 	error = 0;
1423 out:
1424 	lwkt_replymsg(&msg->disconnect.base.lmsg, error);
1425 }
1426 
1427 void
1428 udp_shutdown(netmsg_t msg)
1429 {
1430 	struct socket *so = msg->shutdown.base.nm_so;
1431 	struct inpcb *inp;
1432 	int error;
1433 
1434 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
1435 
1436 	inp = so->so_pcb;
1437 	if (inp) {
1438 		socantsendmore(so);
1439 		error = 0;
1440 	} else {
1441 		error = EINVAL;
1442 	}
1443 	lwkt_replymsg(&msg->shutdown.base.lmsg, error);
1444 }
1445 
1446 void
1447 udbinfo_lock(void)
1448 {
1449 	lwkt_serialize_enter(&udbinfo_slize);
1450 }
1451 
1452 void
1453 udbinfo_unlock(void)
1454 {
1455 	lwkt_serialize_exit(&udbinfo_slize);
1456 }
1457 
1458 void
1459 udbinfo_barrier_set(void)
1460 {
1461 	netisr_barrier_set(udbinfo_br);
1462 	udbinfo_lock();
1463 }
1464 
1465 void
1466 udbinfo_barrier_rem(void)
1467 {
1468 	udbinfo_unlock();
1469 	netisr_barrier_rem(udbinfo_br);
1470 }
1471 
1472 struct pr_usrreqs udp_usrreqs = {
1473 	.pru_abort = udp_abort,
1474 	.pru_accept = pr_generic_notsupp,
1475 	.pru_attach = udp_attach,
1476 	.pru_bind = udp_bind,
1477 	.pru_connect = udp_connect,
1478 	.pru_connect2 = pr_generic_notsupp,
1479 	.pru_control = in_control_dispatch,
1480 	.pru_detach = udp_detach,
1481 	.pru_disconnect = udp_disconnect,
1482 	.pru_listen = pr_generic_notsupp,
1483 	.pru_peeraddr = in_setpeeraddr_dispatch,
1484 	.pru_rcvd = pr_generic_notsupp,
1485 	.pru_rcvoob = pr_generic_notsupp,
1486 	.pru_send = udp_send,
1487 	.pru_sense = pru_sense_null,
1488 	.pru_shutdown = udp_shutdown,
1489 	.pru_sockaddr = in_setsockaddr_dispatch,
1490 	.pru_sosend = sosendudp,
1491 	.pru_soreceive = soreceive
1492 };
1493 
1494