xref: /dflybsd-src/sys/netinet/udp_usrreq.c (revision 73029d086fecb97607aec6269511b65ab8a3131d)
1 /*
2  * Copyright (c) 2004 Jeffrey M. Hsu.  All rights reserved.
3  * Copyright (c) 2004 The DragonFly Project.  All rights reserved.
4  *
5  * This code is derived from software contributed to The DragonFly Project
6  * by Jeffrey M. Hsu.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of The DragonFly Project nor the names of its
17  *    contributors may be used to endorse or promote products derived
18  *    from this software without specific, prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
24  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 /*
35  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
36  *	The Regents of the University of California.  All rights reserved.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. Neither the name of the University nor the names of its contributors
47  *    may be used to endorse or promote products derived from this software
48  *    without specific prior written permission.
49  *
50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  *
62  *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
63  * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.64.2.18 2003/01/24 05:11:34 sam Exp $
64  */
65 
66 #include "opt_ipsec.h"
67 #include "opt_inet6.h"
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/malloc.h>
73 #include <sys/mbuf.h>
74 #include <sys/domain.h>
75 #include <sys/proc.h>
76 #include <sys/priv.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/sysctl.h>
81 #include <sys/syslog.h>
82 #include <sys/in_cksum.h>
83 #include <sys/ktr.h>
84 
85 #include <sys/thread2.h>
86 #include <sys/socketvar2.h>
87 #include <sys/serialize.h>
88 
89 #include <machine/stdarg.h>
90 
91 #include <net/if.h>
92 #include <net/route.h>
93 #include <net/netmsg2.h>
94 #include <net/netisr2.h>
95 
96 #include <netinet/in.h>
97 #include <netinet/in_systm.h>
98 #include <netinet/ip.h>
99 #ifdef INET6
100 #include <netinet/ip6.h>
101 #endif
102 #include <netinet/in_pcb.h>
103 #include <netinet/in_var.h>
104 #include <netinet/ip_var.h>
105 #ifdef INET6
106 #include <netinet6/ip6_var.h>
107 #endif
108 #include <netinet/ip_icmp.h>
109 #include <netinet/icmp_var.h>
110 #include <netinet/udp.h>
111 #include <netinet/udp_var.h>
112 
113 #ifdef FAST_IPSEC
114 #include <netproto/ipsec/ipsec.h>
115 #endif
116 
117 #ifdef IPSEC
118 #include <netinet6/ipsec.h>
119 #endif
120 
121 #define UDP_KTR_STRING		"inp=%p"
122 #define UDP_KTR_ARGS		struct inpcb *inp
123 
124 #ifndef KTR_UDP
125 #define KTR_UDP			KTR_ALL
126 #endif
127 
128 KTR_INFO_MASTER(udp);
129 KTR_INFO(KTR_UDP, udp, send_beg, 0, UDP_KTR_STRING, UDP_KTR_ARGS);
130 KTR_INFO(KTR_UDP, udp, send_end, 1, UDP_KTR_STRING, UDP_KTR_ARGS);
131 KTR_INFO(KTR_UDP, udp, send_ipout, 2, UDP_KTR_STRING, UDP_KTR_ARGS);
132 KTR_INFO(KTR_UDP, udp, redisp_ipout_beg, 3, UDP_KTR_STRING, UDP_KTR_ARGS);
133 KTR_INFO(KTR_UDP, udp, redisp_ipout_end, 4, UDP_KTR_STRING, UDP_KTR_ARGS);
134 KTR_INFO(KTR_UDP, udp, send_redisp, 5, UDP_KTR_STRING, UDP_KTR_ARGS);
135 
136 #define logudp(name, inp)	KTR_LOG(udp_##name, inp)
137 
138 /*
139  * UDP protocol implementation.
140  * Per RFC 768, August, 1980.
141  */
142 #ifndef	COMPAT_42
143 static int	udpcksum = 1;
144 #else
145 static int	udpcksum = 0;		/* XXX */
146 #endif
147 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW,
148     &udpcksum, 0, "Enable checksumming of UDP packets");
149 
150 int	log_in_vain = 0;
151 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
152     &log_in_vain, 0, "Log all incoming UDP packets");
153 
154 static int	blackhole = 0;
155 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW,
156 	&blackhole, 0, "Do not send port unreachables for refused connects");
157 
158 static int	strict_mcast_mship = 1;
159 SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW,
160 	&strict_mcast_mship, 0, "Only send multicast to member sockets");
161 
162 int	udp_sosend_async = 1;
163 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_async, CTLFLAG_RW,
164 	&udp_sosend_async, 0, "UDP asynchronized pru_send");
165 
166 int	udp_sosend_prepend = 1;
167 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_prepend, CTLFLAG_RW,
168 	&udp_sosend_prepend, 0,
169 	"Prepend enough space for proto and link header in pru_send");
170 
171 static int udp_reuseport_ext = 1;
172 SYSCTL_INT(_net_inet_udp, OID_AUTO, reuseport_ext, CTLFLAG_RW,
173 	&udp_reuseport_ext, 0, "SO_REUSEPORT extension");
174 
175 struct	inpcbinfo udbinfo;
176 struct	inpcbportinfo udbportinfo;
177 
178 static struct netisr_barrier *udbinfo_br;
179 static struct lwkt_serialize udbinfo_slize = LWKT_SERIALIZE_INITIALIZER;
180 
181 #ifndef UDBHASHSIZE
182 #define UDBHASHSIZE 16
183 #endif
184 
185 struct	udpstat udpstat_percpu[MAXCPU] __cachealign;
186 
187 #ifdef INET6
188 struct udp_in6 {
189 	struct sockaddr_in6	uin6_sin;
190 	u_char			uin6_init_done : 1;
191 };
192 struct udp_ip6 {
193 	struct ip6_hdr		uip6_ip6;
194 	u_char			uip6_init_done : 1;
195 };
196 #else
197 struct udp_in6;
198 struct udp_ip6;
199 #endif /* INET6 */
200 
201 static void udp_append (struct inpcb *last, struct ip *ip,
202     struct mbuf *n, int off, struct sockaddr_in *udp_in,
203     struct udp_in6 *, struct udp_ip6 *);
204 #ifdef INET6
205 static void ip_2_ip6_hdr (struct ip6_hdr *ip6, struct ip *ip);
206 #endif
207 
208 static int udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin,
209     struct sockaddr_in *if_sin);
210 
211 void
212 udp_init(void)
213 {
214 	int cpu;
215 
216 	in_pcbinfo_init(&udbinfo);
217 	in_pcbportinfo_init(&udbportinfo, UDBHASHSIZE, FALSE, 0);
218 
219 	udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask);
220 	udbinfo.portinfo = &udbportinfo;
221 	udbinfo.wildcardhashbase = hashinit(UDBHASHSIZE, M_PCB,
222 					    &udbinfo.wildcardhashmask);
223 	udbinfo.localgrphashbase = hashinit(UDBHASHSIZE, M_PCB,
224 					    &udbinfo.localgrphashmask);
225 	udbinfo.ipi_size = sizeof(struct inpcb);
226 
227 	udbinfo_br = netisr_barrier_create();
228 
229 	/*
230 	 * Initialize UDP statistics counters for each CPU.
231 	 */
232 	for (cpu = 0; cpu < ncpus; ++cpu)
233 		bzero(&udpstat_percpu[cpu], sizeof(struct udpstat));
234 }
235 
236 static int
237 sysctl_udpstat(SYSCTL_HANDLER_ARGS)
238 {
239 	int cpu, error = 0;
240 
241 	for (cpu = 0; cpu < ncpus; ++cpu) {
242 		if ((error = SYSCTL_OUT(req, &udpstat_percpu[cpu],
243 					sizeof(struct udpstat))))
244 			break;
245 		if ((error = SYSCTL_IN(req, &udpstat_percpu[cpu],
246 				       sizeof(struct udpstat))))
247 			break;
248 	}
249 
250 	return (error);
251 }
252 SYSCTL_PROC(_net_inet_udp, UDPCTL_STATS, stats, (CTLTYPE_OPAQUE | CTLFLAG_RW),
253     0, 0, sysctl_udpstat, "S,udpstat", "UDP statistics");
254 
255 /*
256  * Check multicast packets to make sure they are only sent to sockets with
257  * multicast memberships for the packet's destination address and arrival
258  * interface.  Multicast packets to multicast-unaware sockets are also
259  * disallowed.
260  *
261  * Returns 0 if the packet is acceptable, -1 if it is not.
262  */
263 static __inline int
264 check_multicast_membership(struct ip *ip, struct inpcb *inp, struct mbuf *m)
265 {
266 	int mshipno;
267 	struct ip_moptions *mopt;
268 
269 	if (strict_mcast_mship == 0 ||
270 	    !IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
271 		return (0);
272 	}
273 	mopt = inp->inp_moptions;
274 	if (mopt == NULL)
275 		return (-1);
276 	for (mshipno = 0; mshipno < mopt->imo_num_memberships; ++mshipno) {
277 		struct in_multi *maddr = mopt->imo_membership[mshipno];
278 
279 		if (ip->ip_dst.s_addr == maddr->inm_addr.s_addr &&
280 		    m->m_pkthdr.rcvif == maddr->inm_ifp) {
281 			return (0);
282 		}
283 	}
284 	return (-1);
285 }
286 
287 int
288 udp_input(struct mbuf **mp, int *offp, int proto)
289 {
290 	struct sockaddr_in udp_in = { sizeof udp_in, AF_INET };
291 #ifdef INET6
292 	struct udp_in6 udp_in6 = {
293 		{ sizeof udp_in6.uin6_sin, AF_INET6 }, 0
294 	};
295 	struct udp_ip6 udp_ip6;
296 #endif
297 
298 	int iphlen;
299 	struct ip *ip;
300 	struct udphdr *uh;
301 	struct inpcb *inp;
302 	struct mbuf *m;
303 	struct mbuf *opts = NULL;
304 	int len, off;
305 	struct ip save_ip;
306 	struct sockaddr *append_sa;
307 
308 	off = *offp;
309 	m = *mp;
310 	*mp = NULL;
311 
312 	iphlen = off;
313 	udp_stat.udps_ipackets++;
314 
315 	/*
316 	 * Strip IP options, if any; should skip this,
317 	 * make available to user, and use on returned packets,
318 	 * but we don't yet have a way to check the checksum
319 	 * with options still present.
320 	 */
321 	if (iphlen > sizeof(struct ip)) {
322 		ip_stripoptions(m);
323 		iphlen = sizeof(struct ip);
324 	}
325 
326 	/*
327 	 * IP and UDP headers are together in first mbuf.
328 	 * Already checked and pulled up in ip_demux().
329 	 */
330 	KASSERT(m->m_len >= iphlen + sizeof(struct udphdr),
331 	    ("UDP header not in one mbuf"));
332 
333 	ip = mtod(m, struct ip *);
334 	uh = (struct udphdr *)((caddr_t)ip + iphlen);
335 
336 	/* destination port of 0 is illegal, based on RFC768. */
337 	if (uh->uh_dport == 0)
338 		goto bad;
339 
340 	/*
341 	 * Make mbuf data length reflect UDP length.
342 	 * If not enough data to reflect UDP length, drop.
343 	 */
344 	len = ntohs((u_short)uh->uh_ulen);
345 	if (ip->ip_len != len) {
346 		if (len > ip->ip_len || len < sizeof(struct udphdr)) {
347 			udp_stat.udps_badlen++;
348 			goto bad;
349 		}
350 		m_adj(m, len - ip->ip_len);
351 		/* ip->ip_len = len; */
352 	}
353 	/*
354 	 * Save a copy of the IP header in case we want restore it
355 	 * for sending an ICMP error message in response.
356 	 */
357 	save_ip = *ip;
358 
359 	/*
360 	 * Checksum extended UDP header and data.
361 	 */
362 	if (uh->uh_sum) {
363 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
364 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
365 				uh->uh_sum = m->m_pkthdr.csum_data;
366 			else
367 				uh->uh_sum = in_pseudo(ip->ip_src.s_addr,
368 				    ip->ip_dst.s_addr, htonl((u_short)len +
369 				    m->m_pkthdr.csum_data + IPPROTO_UDP));
370 			uh->uh_sum ^= 0xffff;
371 		} else {
372 			char b[9];
373 
374 			bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
375 			bzero(((struct ipovly *)ip)->ih_x1, 9);
376 			((struct ipovly *)ip)->ih_len = uh->uh_ulen;
377 			uh->uh_sum = in_cksum(m, len + sizeof(struct ip));
378 			bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
379 		}
380 		if (uh->uh_sum) {
381 			udp_stat.udps_badsum++;
382 			m_freem(m);
383 			return(IPPROTO_DONE);
384 		}
385 	} else
386 		udp_stat.udps_nosum++;
387 
388 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
389 	    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
390 		struct inpcb *last;
391 
392 		/*
393 		 * Deliver a multicast or broadcast datagram to *all* sockets
394 		 * for which the local and remote addresses and ports match
395 		 * those of the incoming datagram.  This allows more than
396 		 * one process to receive multi/broadcasts on the same port.
397 		 * (This really ought to be done for unicast datagrams as
398 		 * well, but that would cause problems with existing
399 		 * applications that open both address-specific sockets and
400 		 * a wildcard socket listening to the same port -- they would
401 		 * end up receiving duplicates of every unicast datagram.
402 		 * Those applications open the multiple sockets to overcome an
403 		 * inadequacy of the UDP socket interface, but for backwards
404 		 * compatibility we avoid the problem here rather than
405 		 * fixing the interface.  Maybe 4.5BSD will remedy this?)
406 		 */
407 
408 		/*
409 		 * Construct sockaddr format source address.
410 		 */
411 		udp_in.sin_port = uh->uh_sport;
412 		udp_in.sin_addr = ip->ip_src;
413 		/*
414 		 * Locate pcb(s) for datagram.
415 		 * (Algorithm copied from raw_intr().)
416 		 */
417 		last = NULL;
418 #ifdef INET6
419 		udp_in6.uin6_init_done = udp_ip6.uip6_init_done = 0;
420 #endif
421 		LIST_FOREACH(inp, &udbinfo.pcblisthead, inp_list) {
422 			KKASSERT((inp->inp_flags & INP_PLACEMARKER) == 0);
423 #ifdef INET6
424 			if (!(inp->inp_vflag & INP_IPV4))
425 				continue;
426 #endif
427 			if (inp->inp_lport != uh->uh_dport)
428 				continue;
429 			if (inp->inp_laddr.s_addr != INADDR_ANY) {
430 				if (inp->inp_laddr.s_addr !=
431 				    ip->ip_dst.s_addr)
432 					continue;
433 			}
434 			if (inp->inp_faddr.s_addr != INADDR_ANY) {
435 				if (inp->inp_faddr.s_addr !=
436 				    ip->ip_src.s_addr ||
437 				    inp->inp_fport != uh->uh_sport)
438 					continue;
439 			}
440 
441 			if (check_multicast_membership(ip, inp, m) < 0)
442 				continue;
443 
444 			if (last != NULL) {
445 				struct mbuf *n;
446 
447 #ifdef IPSEC
448 				/* check AH/ESP integrity. */
449 				if (ipsec4_in_reject_so(m, last->inp_socket))
450 					ipsecstat.in_polvio++;
451 					/* do not inject data to pcb */
452 				else
453 #endif /*IPSEC*/
454 #ifdef FAST_IPSEC
455 				/* check AH/ESP integrity. */
456 				if (ipsec4_in_reject(m, last))
457 					;
458 				else
459 #endif /*FAST_IPSEC*/
460 				if ((n = m_copypacket(m, MB_DONTWAIT)) != NULL)
461 					udp_append(last, ip, n,
462 					    iphlen + sizeof(struct udphdr),
463 					    &udp_in,
464 #ifdef INET6
465 					    &udp_in6, &udp_ip6
466 #else
467 				            NULL, NULL
468 #endif
469 					    );
470 			}
471 			last = inp;
472 			/*
473 			 * Don't look for additional matches if this one does
474 			 * not have either the SO_REUSEPORT or SO_REUSEADDR
475 			 * socket options set.  This heuristic avoids searching
476 			 * through all pcbs in the common case of a non-shared
477 			 * port.  It * assumes that an application will never
478 			 * clear these options after setting them.
479 			 */
480 			if (!(last->inp_socket->so_options &
481 			    (SO_REUSEPORT | SO_REUSEADDR)))
482 				break;
483 		}
484 
485 		if (last == NULL) {
486 			/*
487 			 * No matching pcb found; discard datagram.
488 			 * (No need to send an ICMP Port Unreachable
489 			 * for a broadcast or multicast datgram.)
490 			 */
491 			udp_stat.udps_noportbcast++;
492 			goto bad;
493 		}
494 #ifdef IPSEC
495 		/* check AH/ESP integrity. */
496 		if (ipsec4_in_reject_so(m, last->inp_socket)) {
497 			ipsecstat.in_polvio++;
498 			goto bad;
499 		}
500 #endif /*IPSEC*/
501 #ifdef FAST_IPSEC
502 		/* check AH/ESP integrity. */
503 		if (ipsec4_in_reject(m, last))
504 			goto bad;
505 #endif /*FAST_IPSEC*/
506 		udp_append(last, ip, m, iphlen + sizeof(struct udphdr),
507 		    &udp_in,
508 #ifdef INET6
509 		    &udp_in6, &udp_ip6
510 #else
511 		    NULL, NULL
512 #endif
513 		    );
514 		return(IPPROTO_DONE);
515 	}
516 	/*
517 	 * Locate pcb for datagram.
518 	 */
519 	inp = in_pcblookup_pkthash(&udbinfo, ip->ip_src, uh->uh_sport,
520 	    ip->ip_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif,
521 	    udp_reuseport_ext ? m : NULL);
522 	if (inp == NULL) {
523 		if (log_in_vain) {
524 			char buf[sizeof "aaa.bbb.ccc.ddd"];
525 
526 			strcpy(buf, inet_ntoa(ip->ip_dst));
527 			log(LOG_INFO,
528 			    "Connection attempt to UDP %s:%d from %s:%d\n",
529 			    buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src),
530 			    ntohs(uh->uh_sport));
531 		}
532 		udp_stat.udps_noport++;
533 		if (m->m_flags & (M_BCAST | M_MCAST)) {
534 			udp_stat.udps_noportbcast++;
535 			goto bad;
536 		}
537 		if (blackhole)
538 			goto bad;
539 #ifdef ICMP_BANDLIM
540 		if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
541 			goto bad;
542 #endif
543 		*ip = save_ip;
544 		ip->ip_len += iphlen;
545 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
546 		return(IPPROTO_DONE);
547 	}
548 #ifdef IPSEC
549 	if (ipsec4_in_reject_so(m, inp->inp_socket)) {
550 		ipsecstat.in_polvio++;
551 		goto bad;
552 	}
553 #endif /*IPSEC*/
554 #ifdef FAST_IPSEC
555 	if (ipsec4_in_reject(m, inp))
556 		goto bad;
557 #endif /*FAST_IPSEC*/
558 	/*
559 	 * Check the minimum TTL for socket.
560 	 */
561 	if (ip->ip_ttl < inp->inp_ip_minttl)
562 		goto bad;
563 
564 	/*
565 	 * Construct sockaddr format source address.
566 	 * Stuff source address and datagram in user buffer.
567 	 */
568 	udp_in.sin_port = uh->uh_sport;
569 	udp_in.sin_addr = ip->ip_src;
570 	if ((inp->inp_flags & INP_CONTROLOPTS) ||
571 	    (inp->inp_socket->so_options & SO_TIMESTAMP)) {
572 #ifdef INET6
573 		if (inp->inp_vflag & INP_IPV6) {
574 			int savedflags;
575 
576 			ip_2_ip6_hdr(&udp_ip6.uip6_ip6, ip);
577 			savedflags = inp->inp_flags;
578 			inp->inp_flags &= ~INP_UNMAPPABLEOPTS;
579 			ip6_savecontrol(inp, &opts, &udp_ip6.uip6_ip6, m);
580 			inp->inp_flags = savedflags;
581 		} else
582 #endif
583 		ip_savecontrol(inp, &opts, ip, m);
584 	}
585 	m_adj(m, iphlen + sizeof(struct udphdr));
586 #ifdef INET6
587 	if (inp->inp_vflag & INP_IPV6) {
588 		in6_sin_2_v4mapsin6(&udp_in, &udp_in6.uin6_sin);
589 		append_sa = (struct sockaddr *)&udp_in6;
590 	} else
591 #endif
592 		append_sa = (struct sockaddr *)&udp_in;
593 
594 	lwkt_gettoken(&inp->inp_socket->so_rcv.ssb_token);
595 	if (ssb_appendaddr(&inp->inp_socket->so_rcv, append_sa, m, opts) == 0) {
596 		lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token);
597 		udp_stat.udps_fullsock++;
598 		goto bad;
599 	}
600 	lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token);
601 	sorwakeup(inp->inp_socket);
602 	return(IPPROTO_DONE);
603 bad:
604 	m_freem(m);
605 	if (opts)
606 		m_freem(opts);
607 	return(IPPROTO_DONE);
608 }
609 
610 #ifdef INET6
611 static void
612 ip_2_ip6_hdr(struct ip6_hdr *ip6, struct ip *ip)
613 {
614 	bzero(ip6, sizeof *ip6);
615 
616 	ip6->ip6_vfc = IPV6_VERSION;
617 	ip6->ip6_plen = ip->ip_len;
618 	ip6->ip6_nxt = ip->ip_p;
619 	ip6->ip6_hlim = ip->ip_ttl;
620 	ip6->ip6_src.s6_addr32[2] = ip6->ip6_dst.s6_addr32[2] =
621 		IPV6_ADDR_INT32_SMP;
622 	ip6->ip6_src.s6_addr32[3] = ip->ip_src.s_addr;
623 	ip6->ip6_dst.s6_addr32[3] = ip->ip_dst.s_addr;
624 }
625 #endif
626 
627 /*
628  * subroutine of udp_input(), mainly for source code readability.
629  * caller must properly init udp_ip6 and udp_in6 beforehand.
630  */
631 static void
632 udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off,
633     struct sockaddr_in *udp_in,
634     struct udp_in6 *udp_in6, struct udp_ip6 *udp_ip6)
635 {
636 	struct sockaddr *append_sa;
637 	struct mbuf *opts = NULL;
638 	int ret;
639 
640 	if (last->inp_flags & INP_CONTROLOPTS ||
641 	    last->inp_socket->so_options & SO_TIMESTAMP) {
642 #ifdef INET6
643 		if (last->inp_vflag & INP_IPV6) {
644 			int savedflags;
645 
646 			if (udp_ip6->uip6_init_done == 0) {
647 				ip_2_ip6_hdr(&udp_ip6->uip6_ip6, ip);
648 				udp_ip6->uip6_init_done = 1;
649 			}
650 			savedflags = last->inp_flags;
651 			last->inp_flags &= ~INP_UNMAPPABLEOPTS;
652 			ip6_savecontrol(last, &opts, &udp_ip6->uip6_ip6, n);
653 			last->inp_flags = savedflags;
654 		} else
655 #endif
656 		ip_savecontrol(last, &opts, ip, n);
657 	}
658 #ifdef INET6
659 	if (last->inp_vflag & INP_IPV6) {
660 		if (udp_in6->uin6_init_done == 0) {
661 			in6_sin_2_v4mapsin6(udp_in, &udp_in6->uin6_sin);
662 			udp_in6->uin6_init_done = 1;
663 		}
664 		append_sa = (struct sockaddr *)&udp_in6->uin6_sin;
665 	} else
666 #endif
667 		append_sa = (struct sockaddr *)udp_in;
668 	m_adj(n, off);
669 
670 	lwkt_gettoken(&last->inp_socket->so_rcv.ssb_token);
671 	ret = ssb_appendaddr(&last->inp_socket->so_rcv, append_sa, n, opts);
672 	lwkt_reltoken(&last->inp_socket->so_rcv.ssb_token);
673 	if (ret == 0) {
674 		m_freem(n);
675 		if (opts)
676 			m_freem(opts);
677 		udp_stat.udps_fullsock++;
678 	} else {
679 		sorwakeup(last->inp_socket);
680 	}
681 }
682 
683 /*
684  * Notify a udp user of an asynchronous error;
685  * just wake up so that he can collect error status.
686  */
687 void
688 udp_notify(struct inpcb *inp, int error)
689 {
690 	inp->inp_socket->so_error = error;
691 	sorwakeup(inp->inp_socket);
692 	sowwakeup(inp->inp_socket);
693 }
694 
695 struct netmsg_udp_notify {
696 	struct netmsg_base base;
697 	void		(*nm_notify)(struct inpcb *, int);
698 	struct in_addr	nm_faddr;
699 	int		nm_arg;
700 };
701 
702 static void
703 udp_notifyall_oncpu(netmsg_t msg)
704 {
705 	struct netmsg_udp_notify *nm = (struct netmsg_udp_notify *)msg;
706 #if 0
707 	int nextcpu;
708 #endif
709 
710 	in_pcbnotifyall(&udbinfo.pcblisthead, nm->nm_faddr,
711 			nm->nm_arg, nm->nm_notify);
712 	lwkt_replymsg(&nm->base.lmsg, 0);
713 
714 #if 0
715 	/* XXX currently udp only runs on cpu 0 */
716 	nextcpu = mycpuid + 1;
717 	if (nextcpu < ncpus2)
718 		lwkt_forwardmsg(netisr_cpuport(nextcpu), &nm->base.lmsg);
719 	else
720 		lwkt_replymsg(&nmsg->base.lmsg, 0);
721 #endif
722 }
723 
724 void
725 udp_ctlinput(netmsg_t msg)
726 {
727 	struct sockaddr *sa = msg->ctlinput.nm_arg;
728 	struct ip *ip = msg->ctlinput.nm_extra;
729 	int cmd = msg->ctlinput.nm_cmd;
730 	struct udphdr *uh;
731 	void (*notify) (struct inpcb *, int) = udp_notify;
732 	struct in_addr faddr;
733 	struct inpcb *inp;
734 
735 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
736 
737 	faddr = ((struct sockaddr_in *)sa)->sin_addr;
738 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
739 		goto done;
740 
741 	if (PRC_IS_REDIRECT(cmd)) {
742 		ip = NULL;
743 		notify = in_rtchange;
744 	} else if (cmd == PRC_HOSTDEAD) {
745 		ip = NULL;
746 	} else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) {
747 		goto done;
748 	}
749 
750 	if (ip) {
751 		uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
752 		inp = in_pcblookup_hash(&udbinfo, faddr, uh->uh_dport,
753 					ip->ip_src, uh->uh_sport, 0, NULL);
754 		if (inp != NULL && inp->inp_socket != NULL)
755 			(*notify)(inp, inetctlerrmap[cmd]);
756 	} else {
757 		struct netmsg_udp_notify *nm;
758 
759 		KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
760 		nm = kmalloc(sizeof(*nm), M_LWKTMSG, M_INTWAIT);
761 		netmsg_init(&nm->base, NULL, &netisr_afree_rport,
762 			    0, udp_notifyall_oncpu);
763 		nm->nm_faddr = faddr;
764 		nm->nm_arg = inetctlerrmap[cmd];
765 		nm->nm_notify = notify;
766 		lwkt_sendmsg(netisr_cpuport(0), &nm->base.lmsg);
767 	}
768 done:
769 	lwkt_replymsg(&msg->lmsg, 0);
770 }
771 
772 static int
773 udp_pcblist(SYSCTL_HANDLER_ARGS)
774 {
775 	struct xinpcb *xi;
776 	int error, nxi, i;
777 
778 	udbinfo_lock();
779 	error = in_pcblist_global_nomarker(oidp, arg1, arg2, req, &xi, &nxi);
780 	udbinfo_unlock();
781 
782 	if (error) {
783 		KKASSERT(xi == NULL);
784 		return error;
785 	}
786 	if (nxi == 0) {
787 		KKASSERT(xi == NULL);
788 		return 0;
789 	}
790 
791 	for (i = 0; i < nxi; ++i) {
792 		error = SYSCTL_OUT(req, &xi[i], sizeof(xi[i]));
793 		if (error)
794 			break;
795 	}
796 	kfree(xi, M_TEMP);
797 
798 	return error;
799 }
800 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, &udbinfo, 0,
801 	    udp_pcblist, "S,xinpcb", "List of active UDP sockets");
802 
803 static int
804 udp_getcred(SYSCTL_HANDLER_ARGS)
805 {
806 	struct sockaddr_in addrs[2];
807 	struct ucred cred0, *cred = NULL;
808 	struct inpcb *inp;
809 	int error;
810 
811 	error = priv_check(req->td, PRIV_ROOT);
812 	if (error)
813 		return (error);
814 	error = SYSCTL_IN(req, addrs, sizeof addrs);
815 	if (error)
816 		return (error);
817 
818 	udbinfo_lock();
819 	inp = in_pcblookup_hash(&udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
820 				addrs[0].sin_addr, addrs[0].sin_port, 1, NULL);
821 	if (inp == NULL || inp->inp_socket == NULL) {
822 		error = ENOENT;
823 	} else {
824 		if (inp->inp_socket->so_cred != NULL) {
825 			cred0 = *(inp->inp_socket->so_cred);
826 			cred = &cred0;
827 		}
828 	}
829 	udbinfo_unlock();
830 
831 	if (error)
832 		return error;
833 
834 	return SYSCTL_OUT(req, cred, sizeof(struct ucred));
835 }
836 
837 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW,
838     0, 0, udp_getcred, "S,ucred", "Get the ucred of a UDP connection");
839 
840 static void
841 udp_send_redispatch(netmsg_t msg)
842 {
843 	struct mbuf *m = msg->send.nm_m;
844 	int pru_flags = msg->send.nm_flags;
845 	struct inpcb *inp = msg->send.base.nm_so->so_pcb;
846 	struct mbuf *m_opt = msg->send.nm_control; /* XXX save ipopt */
847 	int flags = msg->send.nm_priv; /* ip_output flags */
848 	int error;
849 
850 	logudp(redisp_ipout_beg, inp);
851 
852 	/*
853 	 * - Don't use inp route cache.  It should only be used in the
854 	 *   inp owner netisr.
855 	 * - Access to inp_moptions should be safe, since multicast UDP
856 	 *   datagrams are redispatched to netisr0 and inp_moptions is
857 	 *   changed only in netisr0.
858 	 */
859 	error = ip_output(m, m_opt, NULL, flags, inp->inp_moptions, inp);
860 	if ((pru_flags & PRUS_NOREPLY) == 0)
861 		lwkt_replymsg(&msg->send.base.lmsg, error);
862 
863 	if (m_opt != NULL) {
864 		/* Free saved ip options, if any */
865 		m_freem(m_opt);
866 	}
867 
868 	logudp(redisp_ipout_end, inp);
869 }
870 
871 static void
872 udp_send(netmsg_t msg)
873 {
874 	struct socket *so = msg->send.base.nm_so;
875 	struct mbuf *m = msg->send.nm_m;
876 	struct sockaddr *dstaddr = msg->send.nm_addr;
877 	int pru_flags = msg->send.nm_flags;
878 	struct inpcb *inp = so->so_pcb;
879 	struct thread *td = msg->send.nm_td;
880 	int flags;
881 
882 	struct udpiphdr *ui;
883 	int len = m->m_pkthdr.len;
884 	struct sockaddr_in *sin;	/* really is initialized before use */
885 	int error = 0, cpu;
886 
887 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
888 	KKASSERT(msg->send.nm_control == NULL);
889 
890 	logudp(send_beg, inp);
891 
892 	if (inp == NULL) {
893 		error = EINVAL;
894 		goto release;
895 	}
896 
897 	if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
898 		error = EMSGSIZE;
899 		goto release;
900 	}
901 
902 	if (inp->inp_lport == 0) {	/* unbound socket */
903 		error = in_pcbbind(inp, NULL, td);
904 		if (error)
905 			goto release;
906 
907 		udbinfo_barrier_set();
908 		in_pcbinswildcardhash(inp);
909 		udbinfo_barrier_rem();
910 	}
911 
912 	if (dstaddr != NULL) {		/* destination address specified */
913 		if (inp->inp_faddr.s_addr != INADDR_ANY) {
914 			/* already connected */
915 			error = EISCONN;
916 			goto release;
917 		}
918 		sin = (struct sockaddr_in *)dstaddr;
919 		if (!prison_remote_ip(td, (struct sockaddr *)&sin)) {
920 			error = EAFNOSUPPORT; /* IPv6 only jail */
921 			goto release;
922 		}
923 	} else {
924 		if (inp->inp_faddr.s_addr == INADDR_ANY) {
925 			/* no destination specified and not already connected */
926 			error = ENOTCONN;
927 			goto release;
928 		}
929 		sin = NULL;
930 	}
931 
932 	/*
933 	 * Calculate data length and get a mbuf
934 	 * for UDP and IP headers.
935 	 */
936 	M_PREPEND(m, sizeof(struct udpiphdr), MB_DONTWAIT);
937 	if (m == NULL) {
938 		error = ENOBUFS;
939 		goto release;
940 	}
941 
942 	/*
943 	 * Fill in mbuf with extended UDP header
944 	 * and addresses and length put into network format.
945 	 */
946 	ui = mtod(m, struct udpiphdr *);
947 	bzero(ui->ui_x1, sizeof ui->ui_x1);	/* XXX still needed? */
948 	ui->ui_pr = IPPROTO_UDP;
949 
950 	/*
951 	 * Set destination address.
952 	 */
953 	if (dstaddr != NULL) {			/* use specified destination */
954 		ui->ui_dst = sin->sin_addr;
955 		ui->ui_dport = sin->sin_port;
956 	} else {				/* use connected destination */
957 		ui->ui_dst = inp->inp_faddr;
958 		ui->ui_dport = inp->inp_fport;
959 	}
960 
961 	/*
962 	 * Set source address.
963 	 */
964 	if (inp->inp_laddr.s_addr == INADDR_ANY ||
965 	    IN_MULTICAST(ntohl(inp->inp_laddr.s_addr))) {
966 		struct sockaddr_in *if_sin;
967 
968 		if (dstaddr == NULL) {
969 			/*
970 			 * connect() had (or should have) failed because
971 			 * the interface had no IP address, but the
972 			 * application proceeded to call send() anyways.
973 			 */
974 			error = ENOTCONN;
975 			goto release;
976 		}
977 
978 		/* Look up outgoing interface. */
979 		error = in_pcbladdr_find(inp, dstaddr, &if_sin, td, 1);
980 		if (error)
981 			goto release;
982 		ui->ui_src = if_sin->sin_addr;	/* use address of interface */
983 	} else {
984 		ui->ui_src = inp->inp_laddr;	/* use non-null bound address */
985 	}
986 	ui->ui_sport = inp->inp_lport;
987 	KASSERT(inp->inp_lport != 0, ("inp lport should have been bound"));
988 
989 	/*
990 	 * Release the original thread, since it is no longer used
991 	 */
992 	if (pru_flags & PRUS_HELDTD) {
993 		lwkt_rele(td);
994 		pru_flags &= ~PRUS_HELDTD;
995 	}
996 	/*
997 	 * Free the dest address, since it is no longer needed
998 	 */
999 	if (pru_flags & PRUS_FREEADDR) {
1000 		kfree(dstaddr, M_SONAME);
1001 		pru_flags &= ~PRUS_FREEADDR;
1002 	}
1003 
1004 	ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
1005 
1006 	/*
1007 	 * Set up checksum and output datagram.
1008 	 */
1009 	if (udpcksum) {
1010 		ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr,
1011 		    htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP));
1012 		m->m_pkthdr.csum_flags = CSUM_UDP;
1013 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
1014 		m->m_pkthdr.csum_thlen = sizeof(struct udphdr);
1015 	} else {
1016 		ui->ui_sum = 0;
1017 	}
1018 	((struct ip *)ui)->ip_len = sizeof(struct udpiphdr) + len;
1019 	((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;	/* XXX */
1020 	((struct ip *)ui)->ip_tos = inp->inp_ip_tos;	/* XXX */
1021 	udp_stat.udps_opackets++;
1022 
1023 	flags = IP_DEBUGROUTE |
1024 	    (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST));
1025 	if (pru_flags & PRUS_DONTROUTE)
1026 		flags |= SO_DONTROUTE;
1027 
1028 	cpu = udp_addrcpu_pkt(ui->ui_dst.s_addr, ui->ui_dport,
1029 	    ui->ui_src.s_addr, ui->ui_sport);
1030 	if (cpu != mycpuid) {
1031 		struct mbuf *m_opt = NULL;
1032 		struct netmsg_pru_send *smsg;
1033 		struct lwkt_port *port = netisr_cpuport(cpu);
1034 
1035 		/*
1036 		 * Not on the CPU that matches this UDP datagram hash;
1037 		 * redispatch to the correct CPU to do the ip_output().
1038 		 */
1039 		if (inp->inp_options != NULL) {
1040 			/*
1041 			 * If there are ip options, then save a copy,
1042 			 * since accessing inp_options on other CPUs'
1043 			 * is not safe.
1044 			 *
1045 			 * XXX optimize this?
1046 			 */
1047 			m_opt = m_copym(inp->inp_options, 0, M_COPYALL,
1048 			    MB_WAIT);
1049 		}
1050 		if ((pru_flags & PRUS_NOREPLY) == 0) {
1051 			/*
1052 			 * Change some parts of the original netmsg and
1053 			 * forward it to the target netisr.
1054 			 *
1055 			 * NOTE: so_port MUST NOT be checked in the target
1056 			 * netisr.
1057 			 */
1058 			smsg = &msg->send;
1059 			smsg->nm_priv = flags; /* ip_output flags */
1060 			smsg->nm_m = m;
1061 			smsg->nm_control = m_opt; /* XXX save ipopt */
1062 			smsg->base.lmsg.ms_flags |= MSGF_IGNSOPORT;
1063 			smsg->base.nm_dispatch = udp_send_redispatch;
1064 			lwkt_forwardmsg(port, &smsg->base.lmsg);
1065 		} else {
1066 			/*
1067 			 * Recreate the netmsg, since the original mbuf
1068 			 * could have been changed.  And send it to the
1069 			 * target netisr.
1070 			 *
1071 			 * NOTE: so_port MUST NOT be checked in the target
1072 			 * netisr.
1073 			 */
1074 			smsg = &m->m_hdr.mh_sndmsg;
1075 			netmsg_init(&smsg->base, so, &netisr_apanic_rport,
1076 			    MSGF_IGNSOPORT, udp_send_redispatch);
1077 			smsg->nm_priv = flags; /* ip_output flags */
1078 			smsg->nm_flags = pru_flags;
1079 			smsg->nm_m = m;
1080 			smsg->nm_control = m_opt; /* XXX save ipopt */
1081 			lwkt_sendmsg(port, &smsg->base.lmsg);
1082 		}
1083 
1084 		/* This UDP datagram is redispatched; done */
1085 		logudp(send_redisp, inp);
1086 		return;
1087 	}
1088 
1089 	logudp(send_ipout, inp);
1090 	error = ip_output(m, inp->inp_options, &inp->inp_route, flags,
1091 	    inp->inp_moptions, inp);
1092 	m = NULL;
1093 
1094 release:
1095 	if (m != NULL)
1096 		m_freem(m);
1097 
1098 	if (pru_flags & PRUS_HELDTD)
1099 		lwkt_rele(td);
1100 	if (pru_flags & PRUS_FREEADDR)
1101 		kfree(dstaddr, M_SONAME);
1102 	if ((pru_flags & PRUS_NOREPLY) == 0)
1103 		lwkt_replymsg(&msg->send.base.lmsg, error);
1104 
1105 	logudp(send_end, inp);
1106 }
1107 
1108 u_long	udp_sendspace = 9216;		/* really max datagram size */
1109 					/* 40 1K datagrams */
1110 SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
1111     &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
1112 
1113 u_long	udp_recvspace = 40 * (1024 +
1114 #ifdef INET6
1115 				      sizeof(struct sockaddr_in6)
1116 #else
1117 				      sizeof(struct sockaddr_in)
1118 #endif
1119 				      );
1120 SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
1121     &udp_recvspace, 0, "Maximum incoming UDP datagram size");
1122 
1123 /*
1124  * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort()
1125  *	 will sofree() it when we return.
1126  */
1127 static void
1128 udp_abort(netmsg_t msg)
1129 {
1130 	struct socket *so = msg->abort.base.nm_so;
1131 	struct inpcb *inp;
1132 	int error;
1133 
1134 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
1135 
1136 	inp = so->so_pcb;
1137 	if (inp) {
1138 		soisdisconnected(so);
1139 
1140 		udbinfo_barrier_set();
1141 		in_pcbdetach(inp);
1142 		udbinfo_barrier_rem();
1143 		error = 0;
1144 	} else {
1145 		error = EINVAL;
1146 	}
1147 	lwkt_replymsg(&msg->abort.base.lmsg, error);
1148 }
1149 
1150 static void
1151 udp_attach(netmsg_t msg)
1152 {
1153 	struct socket *so = msg->attach.base.nm_so;
1154 	struct pru_attach_info *ai = msg->attach.nm_ai;
1155 	struct inpcb *inp;
1156 	int error;
1157 
1158 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
1159 
1160 	inp = so->so_pcb;
1161 	if (inp != NULL) {
1162 		error = EINVAL;
1163 		goto out;
1164 	}
1165 	error = soreserve(so, udp_sendspace, udp_recvspace, ai->sb_rlimit);
1166 	if (error)
1167 		goto out;
1168 
1169 	udbinfo_barrier_set();
1170 	error = in_pcballoc(so, &udbinfo);
1171 	udbinfo_barrier_rem();
1172 
1173 	if (error)
1174 		goto out;
1175 
1176 	inp = (struct inpcb *)so->so_pcb;
1177 	inp->inp_vflag |= INP_IPV4;
1178 	inp->inp_ip_ttl = ip_defttl;
1179 	error = 0;
1180 out:
1181 	lwkt_replymsg(&msg->attach.base.lmsg, error);
1182 }
1183 
1184 static void
1185 udp_bind(netmsg_t msg)
1186 {
1187 	struct socket *so = msg->bind.base.nm_so;
1188 	struct sockaddr *nam = msg->bind.nm_nam;
1189 	struct thread *td = msg->bind.nm_td;
1190 	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
1191 	struct inpcb *inp;
1192 	int error;
1193 
1194 	inp = so->so_pcb;
1195 	if (inp) {
1196 		error = in_pcbbind(inp, nam, td);
1197 		if (error == 0) {
1198 			if (sin->sin_addr.s_addr != INADDR_ANY)
1199 				inp->inp_flags |= INP_WASBOUND_NOTANY;
1200 
1201 			udbinfo_barrier_set();
1202 			in_pcbinswildcardhash(inp);
1203 			udbinfo_barrier_rem();
1204 		}
1205 	} else {
1206 		error = EINVAL;
1207 	}
1208 	lwkt_replymsg(&msg->bind.base.lmsg, error);
1209 }
1210 
1211 static void
1212 udp_connect(netmsg_t msg)
1213 {
1214 	struct socket *so = msg->connect.base.nm_so;
1215 	struct sockaddr *nam = msg->connect.nm_nam;
1216 	struct thread *td = msg->connect.nm_td;
1217 	struct inpcb *inp;
1218 	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
1219 	struct sockaddr_in *if_sin;
1220 	lwkt_port_t port;
1221 	int error;
1222 
1223 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
1224 	KKASSERT(msg->connect.nm_m == NULL);
1225 
1226 	inp = so->so_pcb;
1227 	if (inp == NULL) {
1228 		error = EINVAL;
1229 		goto out;
1230 	}
1231 
1232 	if (msg->connect.nm_flags & PRUC_RECONNECT) {
1233 		panic("UDP does not support RECONNECT");
1234 #ifdef notyet
1235 		msg->connect.nm_flags &= ~PRUC_RECONNECT;
1236 		in_pcblink(inp, &udbinfo);
1237 #endif
1238 	}
1239 
1240 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
1241 		error = EISCONN;
1242 		goto out;
1243 	}
1244 	error = 0;
1245 
1246 	/*
1247 	 * Bind if we have to
1248 	 */
1249 	if (inp->inp_lport == 0 ||
1250 	    (td->td_proc && td->td_proc->p_ucred->cr_prison != NULL &&
1251 	     inp->inp_laddr.s_addr == INADDR_ANY)) {
1252 		error = in_pcbbind(inp, NULL, td);
1253 		if (error)
1254 			goto out;
1255 	}
1256 
1257 	/*
1258 	 * Calculate the correct protocol processing thread.  The connect
1259 	 * operation must run there.
1260 	 */
1261 	error = in_pcbladdr(inp, nam, &if_sin, td);
1262 	if (error)
1263 		goto out;
1264 	if (!prison_remote_ip(td, nam)) {
1265 		error = EAFNOSUPPORT; /* IPv6 only jail */
1266 		goto out;
1267 	}
1268 
1269 	port = udp_addrport(sin->sin_addr.s_addr, sin->sin_port,
1270 	    inp->inp_laddr.s_addr != INADDR_ANY ?
1271 	    inp->inp_laddr.s_addr : if_sin->sin_addr.s_addr, inp->inp_lport);
1272 	if (port != &curthread->td_msgport) {
1273 #ifdef notyet
1274 		struct route *ro = &inp->inp_route;
1275 
1276 		/*
1277 		 * in_pcbladdr() may have allocated a route entry for us
1278 		 * on the current CPU, but we need a route entry on the
1279 		 * inpcb's owner CPU, so free it here.
1280 		 */
1281 		if (ro->ro_rt != NULL)
1282 			RTFREE(ro->ro_rt);
1283 		bzero(ro, sizeof(*ro));
1284 
1285 		/*
1286 		 * We are moving the protocol processing port the socket
1287 		 * is on, we have to unlink here and re-link on the
1288 		 * target cpu.
1289 		 */
1290 		in_pcbunlink(so->so_pcb, &udbinfo);
1291 		/* in_pcbunlink(so->so_pcb, &udbinfo[mycpu->gd_cpuid]); */
1292 		sosetport(so, port);
1293 		msg->connect.nm_flags |= PRUC_RECONNECT;
1294 		msg->connect.base.nm_dispatch = udp_connect;
1295 
1296 		lwkt_forwardmsg(port, &msg->connect.base.lmsg);
1297 		/* msg invalid now */
1298 		return;
1299 #else
1300 		panic("UDP activity should only be in netisr0");
1301 #endif
1302 	}
1303 	KKASSERT(port == &curthread->td_msgport);
1304 	error = udp_connect_oncpu(inp, sin, if_sin);
1305 out:
1306 	lwkt_replymsg(&msg->connect.base.lmsg, error);
1307 }
1308 
1309 static int
1310 udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin,
1311     struct sockaddr_in *if_sin)
1312 {
1313 	struct socket *so = inp->inp_socket;
1314 	struct inpcb *oinp;
1315 
1316 	oinp = in_pcblookup_hash(inp->inp_pcbinfo,
1317 	    sin->sin_addr, sin->sin_port,
1318 	    inp->inp_laddr.s_addr != INADDR_ANY ?
1319 	    inp->inp_laddr : if_sin->sin_addr, inp->inp_lport, FALSE, NULL);
1320 	if (oinp != NULL)
1321 		return EADDRINUSE;
1322 
1323 	udbinfo_barrier_set();
1324 
1325 	if (inp->inp_flags & INP_WILDCARD)
1326 		in_pcbremwildcardhash(inp);
1327 
1328 	if (inp->inp_laddr.s_addr == INADDR_ANY)
1329 		inp->inp_laddr = if_sin->sin_addr;
1330 	inp->inp_faddr = sin->sin_addr;
1331 	inp->inp_fport = sin->sin_port;
1332 	in_pcbinsconnhash(inp);
1333 
1334 	/*
1335 	 * No more errors can occur, finish adjusting the socket
1336 	 * and change the processing port to reflect the connected
1337 	 * socket.  Once set we can no longer safely mess with the
1338 	 * socket.
1339 	 */
1340 	soisconnected(so);
1341 
1342 	udbinfo_barrier_rem();
1343 
1344 	return 0;
1345 }
1346 
1347 static void
1348 udp_detach(netmsg_t msg)
1349 {
1350 	struct socket *so = msg->detach.base.nm_so;
1351 	struct inpcb *inp;
1352 	int error;
1353 
1354 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
1355 
1356 	inp = so->so_pcb;
1357 	if (inp) {
1358 		udbinfo_barrier_set();
1359 		in_pcbdetach(inp);
1360 		udbinfo_barrier_rem();
1361 		error = 0;
1362 	} else {
1363 		error = EINVAL;
1364 	}
1365 	lwkt_replymsg(&msg->detach.base.lmsg, error);
1366 }
1367 
1368 static void
1369 udp_disconnect(netmsg_t msg)
1370 {
1371 	struct socket *so = msg->disconnect.base.nm_so;
1372 	struct route *ro;
1373 	struct inpcb *inp;
1374 	int error;
1375 
1376 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
1377 
1378 	inp = so->so_pcb;
1379 	if (inp == NULL) {
1380 		error = EINVAL;
1381 		goto out;
1382 	}
1383 	if (inp->inp_faddr.s_addr == INADDR_ANY) {
1384 		error = ENOTCONN;
1385 		goto out;
1386 	}
1387 
1388 	udbinfo_barrier_set();
1389 
1390 	in_pcbdisconnect(inp);
1391 
1392 	/*
1393 	 * Follow traditional BSD behavior and retain the local port
1394 	 * binding.  But, fix the old misbehavior of overwriting any
1395 	 * previously bound local address.
1396 	 */
1397 	if (!(inp->inp_flags & INP_WASBOUND_NOTANY))
1398 		inp->inp_laddr.s_addr = INADDR_ANY;
1399 	in_pcbinswildcardhash(inp);
1400 
1401 	udbinfo_barrier_rem();
1402 
1403 	soclrstate(so, SS_ISCONNECTED);		/* XXX */
1404 
1405 	ro = &inp->inp_route;
1406 	if (ro->ro_rt != NULL)
1407 		RTFREE(ro->ro_rt);
1408 	bzero(ro, sizeof(*ro));
1409 	error = 0;
1410 out:
1411 	lwkt_replymsg(&msg->disconnect.base.lmsg, error);
1412 }
1413 
1414 void
1415 udp_shutdown(netmsg_t msg)
1416 {
1417 	struct socket *so = msg->shutdown.base.nm_so;
1418 	struct inpcb *inp;
1419 	int error;
1420 
1421 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
1422 
1423 	inp = so->so_pcb;
1424 	if (inp) {
1425 		socantsendmore(so);
1426 		error = 0;
1427 	} else {
1428 		error = EINVAL;
1429 	}
1430 	lwkt_replymsg(&msg->shutdown.base.lmsg, error);
1431 }
1432 
1433 void
1434 udbinfo_lock(void)
1435 {
1436 	lwkt_serialize_enter(&udbinfo_slize);
1437 }
1438 
1439 void
1440 udbinfo_unlock(void)
1441 {
1442 	lwkt_serialize_exit(&udbinfo_slize);
1443 }
1444 
1445 void
1446 udbinfo_barrier_set(void)
1447 {
1448 	netisr_barrier_set(udbinfo_br);
1449 	udbinfo_lock();
1450 }
1451 
1452 void
1453 udbinfo_barrier_rem(void)
1454 {
1455 	udbinfo_unlock();
1456 	netisr_barrier_rem(udbinfo_br);
1457 }
1458 
1459 struct pr_usrreqs udp_usrreqs = {
1460 	.pru_abort = udp_abort,
1461 	.pru_accept = pr_generic_notsupp,
1462 	.pru_attach = udp_attach,
1463 	.pru_bind = udp_bind,
1464 	.pru_connect = udp_connect,
1465 	.pru_connect2 = pr_generic_notsupp,
1466 	.pru_control = in_control_dispatch,
1467 	.pru_detach = udp_detach,
1468 	.pru_disconnect = udp_disconnect,
1469 	.pru_listen = pr_generic_notsupp,
1470 	.pru_peeraddr = in_setpeeraddr_dispatch,
1471 	.pru_rcvd = pr_generic_notsupp,
1472 	.pru_rcvoob = pr_generic_notsupp,
1473 	.pru_send = udp_send,
1474 	.pru_sense = pru_sense_null,
1475 	.pru_shutdown = udp_shutdown,
1476 	.pru_sockaddr = in_setsockaddr_dispatch,
1477 	.pru_sosend = sosendudp,
1478 	.pru_soreceive = soreceive
1479 };
1480 
1481