1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 63 * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.64.2.18 2003/01/24 05:11:34 sam Exp $ 64 */ 65 66 #include "opt_ipsec.h" 67 #include "opt_inet6.h" 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/kernel.h> 72 #include <sys/malloc.h> 73 #include <sys/mbuf.h> 74 #include <sys/domain.h> 75 #include <sys/proc.h> 76 #include <sys/priv.h> 77 #include <sys/protosw.h> 78 #include <sys/socket.h> 79 #include <sys/socketvar.h> 80 #include <sys/sysctl.h> 81 #include <sys/syslog.h> 82 #include <sys/in_cksum.h> 83 #include <sys/ktr.h> 84 85 #include <sys/thread2.h> 86 #include <sys/socketvar2.h> 87 #include <sys/serialize.h> 88 89 #include <machine/stdarg.h> 90 91 #include <net/if.h> 92 #include <net/route.h> 93 #include <net/netmsg2.h> 94 #include <net/netisr2.h> 95 96 #include <netinet/in.h> 97 #include <netinet/in_systm.h> 98 #include <netinet/ip.h> 99 #ifdef INET6 100 #include <netinet/ip6.h> 101 #endif 102 #include <netinet/in_pcb.h> 103 #include <netinet/in_var.h> 104 #include <netinet/ip_var.h> 105 #ifdef INET6 106 #include <netinet6/ip6_var.h> 107 #endif 108 #include <netinet/ip_icmp.h> 109 #include <netinet/icmp_var.h> 110 #include <netinet/udp.h> 111 #include <netinet/udp_var.h> 112 113 #ifdef FAST_IPSEC 114 #include <netproto/ipsec/ipsec.h> 115 #endif 116 117 #ifdef IPSEC 118 #include <netinet6/ipsec.h> 119 #endif 120 121 #define MSGF_UDP_SEND MSGF_PROTO1 122 123 #define INP_DIRECT_DETACH INP_FLAG_PROTO2 124 125 #define UDP_KTR_STRING "inp=%p" 126 #define UDP_KTR_ARGS struct inpcb *inp 127 128 #ifndef KTR_UDP 129 #define KTR_UDP KTR_ALL 130 #endif 131 132 KTR_INFO_MASTER(udp); 133 KTR_INFO(KTR_UDP, udp, send_beg, 0, UDP_KTR_STRING, UDP_KTR_ARGS); 134 KTR_INFO(KTR_UDP, udp, send_end, 1, UDP_KTR_STRING, UDP_KTR_ARGS); 135 KTR_INFO(KTR_UDP, udp, send_ipout, 2, UDP_KTR_STRING, UDP_KTR_ARGS); 136 KTR_INFO(KTR_UDP, udp, redisp_ipout_beg, 3, UDP_KTR_STRING, UDP_KTR_ARGS); 137 KTR_INFO(KTR_UDP, udp, redisp_ipout_end, 4, UDP_KTR_STRING, UDP_KTR_ARGS); 138 KTR_INFO(KTR_UDP, udp, send_redisp, 5, UDP_KTR_STRING, UDP_KTR_ARGS); 139 KTR_INFO(KTR_UDP, udp, send_inswildcard, 6, UDP_KTR_STRING, UDP_KTR_ARGS); 140 141 #define logudp(name, inp) KTR_LOG(udp_##name, inp) 142 143 /* 144 * UDP protocol implementation. 145 * Per RFC 768, August, 1980. 146 */ 147 #ifndef COMPAT_42 148 static int udpcksum = 1; 149 #else 150 static int udpcksum = 0; /* XXX */ 151 #endif 152 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, 153 &udpcksum, 0, "Enable checksumming of UDP packets"); 154 155 int log_in_vain = 0; 156 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, 157 &log_in_vain, 0, "Log all incoming UDP packets"); 158 159 static int blackhole = 0; 160 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, 161 &blackhole, 0, "Do not send port unreachables for refused connects"); 162 163 static int strict_mcast_mship = 1; 164 SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW, 165 &strict_mcast_mship, 0, "Only send multicast to member sockets"); 166 167 int udp_sosend_async = 1; 168 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_async, CTLFLAG_RW, 169 &udp_sosend_async, 0, "UDP asynchronized pru_send"); 170 171 int udp_sosend_prepend = 1; 172 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_prepend, CTLFLAG_RW, 173 &udp_sosend_prepend, 0, 174 "Prepend enough space for proto and link header in pru_send"); 175 176 static int udp_reuseport_ext = 1; 177 SYSCTL_INT(_net_inet_udp, OID_AUTO, reuseport_ext, CTLFLAG_RW, 178 &udp_reuseport_ext, 0, "SO_REUSEPORT extension"); 179 180 struct inpcbinfo udbinfo[MAXCPU]; 181 182 #ifndef UDBHASHSIZE 183 #define UDBHASHSIZE 16 184 #endif 185 186 struct udpstat udpstat_percpu[MAXCPU] __cachealign; 187 188 static void udp_append(struct inpcb *last, struct ip *ip, 189 struct mbuf *n, int off, struct sockaddr_in *udp_in); 190 191 static int udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 192 struct sockaddr_in *if_sin, uint16_t hash); 193 194 static boolean_t udp_inswildcardhash(struct inpcb *inp, 195 struct netmsg_base *msg, int error); 196 static void udp_remwildcardhash(struct inpcb *inp); 197 198 static __inline int 199 udp_lportcpu(short lport) 200 { 201 return (ntohs(lport) & ncpus2_mask); 202 } 203 204 void 205 udp_init(void) 206 { 207 struct inpcbportinfo *portinfo; 208 int cpu; 209 210 portinfo = kmalloc_cachealign(sizeof(*portinfo) * ncpus2, M_PCB, 211 M_WAITOK); 212 213 for (cpu = 0; cpu < ncpus2; cpu++) { 214 struct inpcbinfo *uicb = &udbinfo[cpu]; 215 216 /* 217 * NOTE: 218 * UDP pcb list, wildcard hash table and localgroup hash 219 * table are shared. 220 */ 221 in_pcbinfo_init(uicb, cpu, TRUE); 222 uicb->hashbase = hashinit(UDBHASHSIZE, M_PCB, &uicb->hashmask); 223 224 in_pcbportinfo_init(&portinfo[cpu], UDBHASHSIZE, cpu); 225 uicb->portinfo = portinfo; 226 uicb->portinfo_mask = ncpus2_mask; 227 228 uicb->wildcardhashbase = hashinit(UDBHASHSIZE, M_PCB, 229 &uicb->wildcardhashmask); 230 uicb->localgrphashbase = hashinit(UDBHASHSIZE, M_PCB, 231 &uicb->localgrphashmask); 232 233 uicb->ipi_size = sizeof(struct inpcb); 234 } 235 236 /* 237 * Initialize UDP statistics counters for each CPU. 238 */ 239 for (cpu = 0; cpu < ncpus; ++cpu) 240 bzero(&udpstat_percpu[cpu], sizeof(struct udpstat)); 241 } 242 243 static int 244 sysctl_udpstat(SYSCTL_HANDLER_ARGS) 245 { 246 int cpu, error = 0; 247 248 for (cpu = 0; cpu < ncpus; ++cpu) { 249 if ((error = SYSCTL_OUT(req, &udpstat_percpu[cpu], 250 sizeof(struct udpstat)))) 251 break; 252 if ((error = SYSCTL_IN(req, &udpstat_percpu[cpu], 253 sizeof(struct udpstat)))) 254 break; 255 } 256 257 return (error); 258 } 259 SYSCTL_PROC(_net_inet_udp, UDPCTL_STATS, stats, (CTLTYPE_OPAQUE | CTLFLAG_RW), 260 0, 0, sysctl_udpstat, "S,udpstat", "UDP statistics"); 261 262 void 263 udp_ctloutput(netmsg_t msg) 264 { 265 struct socket *so = msg->base.nm_so; 266 struct sockopt *sopt = msg->ctloutput.nm_sopt; 267 struct inpcb *inp = so->so_pcb; 268 269 if (inp == NULL) { 270 lwkt_replymsg(&msg->lmsg, EINVAL); 271 return; 272 } 273 274 if (sopt->sopt_level == IPPROTO_IP && sopt->sopt_dir == SOPT_SET) { 275 switch (sopt->sopt_name) { 276 case IP_MULTICAST_IF: 277 case IP_MULTICAST_VIF: 278 case IP_MULTICAST_TTL: 279 case IP_MULTICAST_LOOP: 280 case IP_ADD_MEMBERSHIP: 281 case IP_DROP_MEMBERSHIP: 282 /* 283 * This pr_ctloutput msg will be forwarded 284 * to netisr0 to run; we can't do direct 285 * detaching anymore. 286 * 287 * NOTE: 288 * Don't optimize for the sockets whose 289 * current so_port is netisr0's msgport. 290 * These sockets could be connect(2)'ed 291 * later and the so_port will be changed. 292 */ 293 inp->inp_flags &= ~INP_DIRECT_DETACH; 294 break; 295 } 296 } 297 return ip_ctloutput(msg); 298 } 299 300 /* 301 * Check multicast packets to make sure they are only sent to sockets with 302 * multicast memberships for the packet's destination address and arrival 303 * interface. Multicast packets to multicast-unaware sockets are also 304 * disallowed. 305 * 306 * Returns 0 if the packet is acceptable, -1 if it is not. 307 */ 308 static __inline int 309 check_multicast_membership(const struct ip *ip, const struct inpcb *inp, 310 const struct mbuf *m) 311 { 312 const struct ip_moptions *mopt; 313 int mshipno; 314 315 if (strict_mcast_mship == 0 || 316 !IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 317 return (0); 318 } 319 320 ASSERT_IN_NETISR(0); 321 322 mopt = inp->inp_moptions; 323 if (mopt == NULL) 324 return (-1); 325 for (mshipno = 0; mshipno < mopt->imo_num_memberships; ++mshipno) { 326 const struct in_multi *maddr = mopt->imo_membership[mshipno]; 327 328 if (ip->ip_dst.s_addr == maddr->inm_addr.s_addr && 329 m->m_pkthdr.rcvif == maddr->inm_ifp) { 330 return (0); 331 } 332 } 333 return (-1); 334 } 335 336 struct udp_mcast_arg { 337 struct inpcb *inp; 338 struct inpcb *last; 339 struct ip *ip; 340 struct mbuf *m; 341 int iphlen; 342 struct sockaddr_in *udp_in; 343 }; 344 345 static int 346 udp_mcast_input(struct udp_mcast_arg *arg) 347 { 348 struct inpcb *inp = arg->inp; 349 struct inpcb *last = arg->last; 350 struct ip *ip = arg->ip; 351 struct mbuf *m = arg->m; 352 353 if (check_multicast_membership(ip, inp, m) < 0) 354 return ERESTART; /* caller continue */ 355 356 if (last != NULL) { 357 struct mbuf *n; 358 359 #ifdef IPSEC 360 /* check AH/ESP integrity. */ 361 if (ipsec4_in_reject_so(m, last->inp_socket)) 362 ipsecstat.in_polvio++; 363 /* do not inject data to pcb */ 364 else 365 #endif /*IPSEC*/ 366 #ifdef FAST_IPSEC 367 /* check AH/ESP integrity. */ 368 if (ipsec4_in_reject(m, last)) 369 ; 370 else 371 #endif /*FAST_IPSEC*/ 372 if ((n = m_copypacket(m, M_NOWAIT)) != NULL) 373 udp_append(last, ip, n, 374 arg->iphlen + sizeof(struct udphdr), 375 arg->udp_in); 376 } 377 arg->last = last = inp; 378 379 /* 380 * Don't look for additional matches if this one does 381 * not have either the SO_REUSEPORT or SO_REUSEADDR 382 * socket options set. This heuristic avoids searching 383 * through all pcbs in the common case of a non-shared 384 * port. It * assumes that an application will never 385 * clear these options after setting them. 386 */ 387 if (!(last->inp_socket->so_options & 388 (SO_REUSEPORT | SO_REUSEADDR))) 389 return EJUSTRETURN; /* caller stop */ 390 return 0; 391 } 392 393 int 394 udp_input(struct mbuf **mp, int *offp, int proto) 395 { 396 struct sockaddr_in udp_in = { sizeof udp_in, AF_INET }; 397 int iphlen; 398 struct ip *ip; 399 struct udphdr *uh; 400 struct inpcb *inp; 401 struct mbuf *m; 402 struct mbuf *opts = NULL; 403 int len, off; 404 struct ip save_ip; 405 struct inpcbinfo *pcbinfo = &udbinfo[mycpuid]; 406 407 off = *offp; 408 m = *mp; 409 *mp = NULL; 410 411 iphlen = off; 412 udp_stat.udps_ipackets++; 413 414 /* 415 * Strip IP options, if any; should skip this, 416 * make available to user, and use on returned packets, 417 * but we don't yet have a way to check the checksum 418 * with options still present. 419 */ 420 if (iphlen > sizeof(struct ip)) { 421 ip_stripoptions(m); 422 iphlen = sizeof(struct ip); 423 } 424 425 /* 426 * IP and UDP headers are together in first mbuf. 427 * Already checked and pulled up in ip_demux(). 428 */ 429 KASSERT(m->m_len >= iphlen + sizeof(struct udphdr), 430 ("UDP header not in one mbuf")); 431 432 ip = mtod(m, struct ip *); 433 uh = (struct udphdr *)((caddr_t)ip + iphlen); 434 435 /* destination port of 0 is illegal, based on RFC768. */ 436 if (uh->uh_dport == 0) 437 goto bad; 438 439 /* 440 * Make mbuf data length reflect UDP length. 441 * If not enough data to reflect UDP length, drop. 442 */ 443 len = ntohs((u_short)uh->uh_ulen); 444 if (ip->ip_len != len) { 445 if (len > ip->ip_len || len < sizeof(struct udphdr)) { 446 udp_stat.udps_badlen++; 447 goto bad; 448 } 449 m_adj(m, len - ip->ip_len); 450 /* ip->ip_len = len; */ 451 } 452 /* 453 * Save a copy of the IP header in case we want restore it 454 * for sending an ICMP error message in response. 455 */ 456 save_ip = *ip; 457 458 /* 459 * Checksum extended UDP header and data. 460 */ 461 if (uh->uh_sum) { 462 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 463 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) 464 uh->uh_sum = m->m_pkthdr.csum_data; 465 else 466 uh->uh_sum = in_pseudo(ip->ip_src.s_addr, 467 ip->ip_dst.s_addr, htonl((u_short)len + 468 m->m_pkthdr.csum_data + IPPROTO_UDP)); 469 uh->uh_sum ^= 0xffff; 470 } else { 471 char b[9]; 472 473 bcopy(((struct ipovly *)ip)->ih_x1, b, 9); 474 bzero(((struct ipovly *)ip)->ih_x1, 9); 475 ((struct ipovly *)ip)->ih_len = uh->uh_ulen; 476 uh->uh_sum = in_cksum(m, len + sizeof(struct ip)); 477 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); 478 } 479 if (uh->uh_sum) { 480 udp_stat.udps_badsum++; 481 m_freem(m); 482 return(IPPROTO_DONE); 483 } 484 } else 485 udp_stat.udps_nosum++; 486 487 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 488 in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { 489 struct inpcbhead *connhead; 490 struct inpcontainer *ic, *ic_marker; 491 struct inpcontainerhead *ichead; 492 struct udp_mcast_arg arg; 493 struct inpcb *last; 494 int error; 495 496 /* 497 * Deliver a multicast or broadcast datagram to *all* sockets 498 * for which the local and remote addresses and ports match 499 * those of the incoming datagram. This allows more than 500 * one process to receive multi/broadcasts on the same port. 501 * (This really ought to be done for unicast datagrams as 502 * well, but that would cause problems with existing 503 * applications that open both address-specific sockets and 504 * a wildcard socket listening to the same port -- they would 505 * end up receiving duplicates of every unicast datagram. 506 * Those applications open the multiple sockets to overcome an 507 * inadequacy of the UDP socket interface, but for backwards 508 * compatibility we avoid the problem here rather than 509 * fixing the interface. Maybe 4.5BSD will remedy this?) 510 */ 511 512 /* 513 * Construct sockaddr format source address. 514 */ 515 udp_in.sin_port = uh->uh_sport; 516 udp_in.sin_addr = ip->ip_src; 517 arg.udp_in = &udp_in; 518 /* 519 * Locate pcb(s) for datagram. 520 * (Algorithm copied from raw_intr().) 521 */ 522 last = NULL; 523 arg.iphlen = iphlen; 524 525 connhead = &pcbinfo->hashbase[ 526 INP_PCBCONNHASH(ip->ip_src.s_addr, uh->uh_sport, 527 ip->ip_dst.s_addr, uh->uh_dport, pcbinfo->hashmask)]; 528 LIST_FOREACH(inp, connhead, inp_hash) { 529 #ifdef INET6 530 if (!INP_ISIPV4(inp)) 531 continue; 532 #endif 533 if (!in_hosteq(inp->inp_faddr, ip->ip_src) || 534 !in_hosteq(inp->inp_laddr, ip->ip_dst) || 535 inp->inp_fport != uh->uh_sport || 536 inp->inp_lport != uh->uh_dport) 537 continue; 538 539 arg.inp = inp; 540 arg.last = last; 541 arg.ip = ip; 542 arg.m = m; 543 544 error = udp_mcast_input(&arg); 545 if (error == ERESTART) 546 continue; 547 last = arg.last; 548 549 if (error == EJUSTRETURN) 550 goto done; 551 } 552 553 ichead = &pcbinfo->wildcardhashbase[ 554 INP_PCBWILDCARDHASH(uh->uh_dport, 555 pcbinfo->wildcardhashmask)]; 556 ic_marker = in_pcbcontainer_marker(mycpuid); 557 558 GET_PCBINFO_TOKEN(pcbinfo); 559 LIST_INSERT_HEAD(ichead, ic_marker, ic_list); 560 while ((ic = LIST_NEXT(ic_marker, ic_list)) != NULL) { 561 LIST_REMOVE(ic_marker, ic_list); 562 LIST_INSERT_AFTER(ic, ic_marker, ic_list); 563 564 inp = ic->ic_inp; 565 if (inp->inp_flags & INP_PLACEMARKER) 566 continue; 567 #ifdef INET6 568 if (!INP_ISIPV4(inp)) 569 continue; 570 #endif 571 if (inp->inp_lport != uh->uh_dport) 572 continue; 573 if (inp->inp_laddr.s_addr != INADDR_ANY && 574 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 575 continue; 576 577 arg.inp = inp; 578 arg.last = last; 579 arg.ip = ip; 580 arg.m = m; 581 582 error = udp_mcast_input(&arg); 583 if (error == ERESTART) 584 continue; 585 last = arg.last; 586 587 if (error == EJUSTRETURN) 588 break; 589 } 590 LIST_REMOVE(ic_marker, ic_list); 591 REL_PCBINFO_TOKEN(pcbinfo); 592 done: 593 if (last == NULL) { 594 /* 595 * No matching pcb found; discard datagram. 596 * (No need to send an ICMP Port Unreachable 597 * for a broadcast or multicast datgram.) 598 */ 599 udp_stat.udps_noportbcast++; 600 goto bad; 601 } 602 #ifdef IPSEC 603 /* check AH/ESP integrity. */ 604 if (ipsec4_in_reject_so(m, last->inp_socket)) { 605 ipsecstat.in_polvio++; 606 goto bad; 607 } 608 #endif /*IPSEC*/ 609 #ifdef FAST_IPSEC 610 /* check AH/ESP integrity. */ 611 if (ipsec4_in_reject(m, last)) 612 goto bad; 613 #endif /*FAST_IPSEC*/ 614 udp_append(last, ip, m, iphlen + sizeof(struct udphdr), 615 &udp_in); 616 return(IPPROTO_DONE); 617 } 618 /* 619 * Locate pcb for datagram. 620 */ 621 inp = in_pcblookup_pkthash(pcbinfo, ip->ip_src, uh->uh_sport, 622 ip->ip_dst, uh->uh_dport, TRUE, m->m_pkthdr.rcvif, 623 udp_reuseport_ext ? m : NULL); 624 if (inp == NULL) { 625 if (log_in_vain) { 626 char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 627 628 log(LOG_INFO, 629 "Connection attempt to UDP %s:%d from %s:%d\n", 630 kinet_ntoa(ip->ip_dst, dst), ntohs(uh->uh_dport), 631 kinet_ntoa(ip->ip_src, src), ntohs(uh->uh_sport)); 632 } 633 udp_stat.udps_noport++; 634 if (m->m_flags & (M_BCAST | M_MCAST)) { 635 udp_stat.udps_noportbcast++; 636 goto bad; 637 } 638 if (blackhole) 639 goto bad; 640 #ifdef ICMP_BANDLIM 641 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) 642 goto bad; 643 #endif 644 *ip = save_ip; 645 ip->ip_len += iphlen; 646 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); 647 return(IPPROTO_DONE); 648 } 649 KASSERT(INP_ISIPV4(inp), ("not inet inpcb")); 650 #ifdef IPSEC 651 if (ipsec4_in_reject_so(m, inp->inp_socket)) { 652 ipsecstat.in_polvio++; 653 goto bad; 654 } 655 #endif /*IPSEC*/ 656 #ifdef FAST_IPSEC 657 if (ipsec4_in_reject(m, inp)) 658 goto bad; 659 #endif /*FAST_IPSEC*/ 660 /* 661 * Check the minimum TTL for socket. 662 */ 663 if (ip->ip_ttl < inp->inp_ip_minttl) 664 goto bad; 665 666 /* 667 * Construct sockaddr format source address. 668 * Stuff source address and datagram in user buffer. 669 */ 670 udp_in.sin_port = uh->uh_sport; 671 udp_in.sin_addr = ip->ip_src; 672 if ((inp->inp_flags & INP_CONTROLOPTS) || 673 (inp->inp_socket->so_options & SO_TIMESTAMP)) 674 ip_savecontrol(inp, &opts, ip, m); 675 m_adj(m, iphlen + sizeof(struct udphdr)); 676 677 lwkt_gettoken(&inp->inp_socket->so_rcv.ssb_token); 678 if (ssb_appendaddr(&inp->inp_socket->so_rcv, 679 (struct sockaddr *)&udp_in, m, opts) == 0) { 680 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 681 udp_stat.udps_fullsock++; 682 goto bad; 683 } 684 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 685 sorwakeup(inp->inp_socket); 686 return(IPPROTO_DONE); 687 bad: 688 m_freem(m); 689 if (opts) 690 m_freem(opts); 691 return(IPPROTO_DONE); 692 } 693 694 /* 695 * subroutine of udp_input(), mainly for source code readability. 696 * caller must properly init udp_ip6 and udp_in6 beforehand. 697 */ 698 static void 699 udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off, 700 struct sockaddr_in *udp_in) 701 { 702 struct mbuf *opts = NULL; 703 int ret; 704 705 KASSERT(INP_ISIPV4(last), ("not inet inpcb")); 706 707 if (last->inp_flags & INP_CONTROLOPTS || 708 last->inp_socket->so_options & SO_TIMESTAMP) 709 ip_savecontrol(last, &opts, ip, n); 710 m_adj(n, off); 711 712 lwkt_gettoken(&last->inp_socket->so_rcv.ssb_token); 713 ret = ssb_appendaddr(&last->inp_socket->so_rcv, 714 (struct sockaddr *)udp_in, n, opts); 715 lwkt_reltoken(&last->inp_socket->so_rcv.ssb_token); 716 if (ret == 0) { 717 m_freem(n); 718 if (opts) 719 m_freem(opts); 720 udp_stat.udps_fullsock++; 721 } else { 722 sorwakeup(last->inp_socket); 723 } 724 } 725 726 /* 727 * Notify a udp user of an asynchronous error; 728 * just wake up so that he can collect error status. 729 */ 730 void 731 udp_notify(struct inpcb *inp, int error) 732 { 733 inp->inp_socket->so_error = error; 734 sorwakeup(inp->inp_socket); 735 sowwakeup(inp->inp_socket); 736 } 737 738 struct netmsg_udp_notify { 739 struct netmsg_base base; 740 inp_notify_t nm_notify; 741 struct in_addr nm_faddr; 742 int nm_arg; 743 }; 744 745 static void 746 udp_notifyall_oncpu(netmsg_t msg) 747 { 748 struct netmsg_udp_notify *nm = (struct netmsg_udp_notify *)msg; 749 int nextcpu, cpu = mycpuid; 750 751 in_pcbnotifyall(&udbinfo[cpu], nm->nm_faddr, nm->nm_arg, nm->nm_notify); 752 753 nextcpu = cpu + 1; 754 if (nextcpu < ncpus2) 755 lwkt_forwardmsg(netisr_cpuport(nextcpu), &nm->base.lmsg); 756 else 757 lwkt_replymsg(&nm->base.lmsg, 0); 758 } 759 760 inp_notify_t 761 udp_get_inpnotify(int cmd, const struct sockaddr *sa, 762 struct ip **ip0, int *cpuid) 763 { 764 struct in_addr faddr; 765 struct ip *ip = *ip0; 766 inp_notify_t notify = udp_notify; 767 768 faddr = ((const struct sockaddr_in *)sa)->sin_addr; 769 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) 770 return NULL; 771 772 if (PRC_IS_REDIRECT(cmd)) { 773 ip = NULL; 774 notify = in_rtchange; 775 } else if (cmd == PRC_HOSTDEAD) { 776 ip = NULL; 777 } else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) { 778 return NULL; 779 } 780 781 if (cpuid != NULL) { 782 if (ip == NULL) { 783 /* Go through all CPUs */ 784 *cpuid = ncpus; 785 } else { 786 const struct udphdr *uh; 787 788 uh = (const struct udphdr *) 789 ((caddr_t)ip + (ip->ip_hl << 2)); 790 *cpuid = udp_addrcpu(faddr.s_addr, uh->uh_dport, 791 ip->ip_src.s_addr, uh->uh_sport); 792 } 793 } 794 795 *ip0 = ip; 796 return notify; 797 } 798 799 void 800 udp_ctlinput(netmsg_t msg) 801 { 802 struct sockaddr *sa = msg->ctlinput.nm_arg; 803 struct ip *ip = msg->ctlinput.nm_extra; 804 int cmd = msg->ctlinput.nm_cmd, cpuid; 805 inp_notify_t notify; 806 struct in_addr faddr; 807 808 notify = udp_get_inpnotify(cmd, sa, &ip, &cpuid); 809 if (notify == NULL) 810 goto done; 811 812 faddr = ((struct sockaddr_in *)sa)->sin_addr; 813 if (ip) { 814 const struct udphdr *uh; 815 struct inpcb *inp; 816 817 if (cpuid != mycpuid) 818 goto done; 819 820 uh = (const struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 821 inp = in_pcblookup_hash(&udbinfo[mycpuid], faddr, uh->uh_dport, 822 ip->ip_src, uh->uh_sport, 0, NULL); 823 if (inp != NULL && inp->inp_socket != NULL) 824 notify(inp, inetctlerrmap[cmd]); 825 } else if (msg->ctlinput.nm_direct) { 826 if (cpuid != ncpus && cpuid != mycpuid) 827 goto done; 828 if (mycpuid >= ncpus2) 829 goto done; 830 831 in_pcbnotifyall(&udbinfo[mycpuid], faddr, inetctlerrmap[cmd], 832 notify); 833 } else { 834 struct netmsg_udp_notify *nm; 835 836 ASSERT_IN_NETISR(0); 837 nm = kmalloc(sizeof(*nm), M_LWKTMSG, M_INTWAIT); 838 netmsg_init(&nm->base, NULL, &netisr_afree_rport, 839 0, udp_notifyall_oncpu); 840 nm->nm_faddr = faddr; 841 nm->nm_arg = inetctlerrmap[cmd]; 842 nm->nm_notify = notify; 843 lwkt_sendmsg(netisr_cpuport(0), &nm->base.lmsg); 844 } 845 done: 846 lwkt_replymsg(&msg->lmsg, 0); 847 } 848 849 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, udbinfo, 0, 850 in_pcblist_global_ncpus2, "S,xinpcb", "List of active UDP sockets"); 851 852 static int 853 udp_getcred(SYSCTL_HANDLER_ARGS) 854 { 855 struct sockaddr_in addrs[2]; 856 struct ucred cred0, *cred = NULL; 857 struct inpcb *inp; 858 int error, cpu, origcpu; 859 860 error = priv_check(req->td, PRIV_ROOT); 861 if (error) 862 return (error); 863 error = SYSCTL_IN(req, addrs, sizeof addrs); 864 if (error) 865 return (error); 866 867 origcpu = mycpuid; 868 cpu = udp_addrcpu(addrs[1].sin_addr.s_addr, addrs[1].sin_port, 869 addrs[0].sin_addr.s_addr, addrs[0].sin_port); 870 871 lwkt_migratecpu(cpu); 872 873 inp = in_pcblookup_hash(&udbinfo[cpu], 874 addrs[1].sin_addr, addrs[1].sin_port, 875 addrs[0].sin_addr, addrs[0].sin_port, TRUE, NULL); 876 if (inp == NULL || inp->inp_socket == NULL) { 877 error = ENOENT; 878 } else if (inp->inp_socket->so_cred != NULL) { 879 cred0 = *(inp->inp_socket->so_cred); 880 cred = &cred0; 881 } 882 883 lwkt_migratecpu(origcpu); 884 885 if (error) 886 return error; 887 888 return SYSCTL_OUT(req, cred, sizeof(struct ucred)); 889 } 890 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 891 0, 0, udp_getcred, "S,ucred", "Get the ucred of a UDP connection"); 892 893 static void 894 udp_send_redispatch(netmsg_t msg) 895 { 896 struct mbuf *m = msg->send.nm_m; 897 int pru_flags = msg->send.nm_flags; 898 struct inpcb *inp = msg->send.base.nm_so->so_pcb; 899 struct mbuf *m_opt = msg->send.nm_control; /* XXX save ipopt */ 900 int flags = msg->send.nm_priv; /* ip_output flags */ 901 int error; 902 903 logudp(redisp_ipout_beg, inp); 904 905 /* 906 * - Don't use inp route cache. It should only be used in the 907 * inp owner netisr. 908 * - Access to inp_moptions should be safe, since multicast UDP 909 * datagrams are redispatched to netisr0 and inp_moptions is 910 * changed only in netisr0. 911 */ 912 error = ip_output(m, m_opt, NULL, flags, inp->inp_moptions, inp); 913 if ((pru_flags & PRUS_NOREPLY) == 0) 914 lwkt_replymsg(&msg->send.base.lmsg, error); 915 916 if (m_opt != NULL) { 917 /* Free saved ip options, if any */ 918 m_freem(m_opt); 919 } 920 921 logudp(redisp_ipout_end, inp); 922 } 923 924 static void 925 udp_send(netmsg_t msg) 926 { 927 struct socket *so = msg->send.base.nm_so; 928 struct mbuf *m = msg->send.nm_m; 929 struct sockaddr *dstaddr = msg->send.nm_addr; 930 int pru_flags = msg->send.nm_flags; 931 struct inpcb *inp = so->so_pcb; 932 struct thread *td = msg->send.nm_td; 933 uint16_t hash; 934 int flags; 935 936 struct udpiphdr *ui; 937 int len = m->m_pkthdr.len; 938 struct sockaddr_in *sin; /* really is initialized before use */ 939 int error = 0, cpu; 940 941 KKASSERT(msg->send.nm_control == NULL); 942 943 logudp(send_beg, inp); 944 945 if (inp == NULL) { 946 error = EINVAL; 947 goto release; 948 } 949 950 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { 951 error = EMSGSIZE; 952 goto release; 953 } 954 955 if (inp->inp_lport == 0) { /* unbound socket */ 956 boolean_t forwarded; 957 958 error = in_pcbbind(inp, NULL, td); 959 if (error) 960 goto release; 961 962 /* 963 * Need to call udp_send again, after this inpcb is 964 * inserted into wildcard hash table. 965 */ 966 msg->send.base.lmsg.ms_flags |= MSGF_UDP_SEND; 967 forwarded = udp_inswildcardhash(inp, &msg->send.base, 0); 968 if (forwarded) { 969 /* 970 * The message is further forwarded, so we are 971 * done here. 972 */ 973 logudp(send_inswildcard, inp); 974 return; 975 } 976 } 977 978 if (dstaddr != NULL) { /* destination address specified */ 979 if (inp->inp_faddr.s_addr != INADDR_ANY) { 980 /* already connected */ 981 error = EISCONN; 982 goto release; 983 } 984 sin = (struct sockaddr_in *)dstaddr; 985 if (!prison_remote_ip(td, (struct sockaddr *)&sin)) { 986 error = EAFNOSUPPORT; /* IPv6 only jail */ 987 goto release; 988 } 989 } else { 990 if (inp->inp_faddr.s_addr == INADDR_ANY) { 991 /* no destination specified and not already connected */ 992 error = ENOTCONN; 993 goto release; 994 } 995 sin = NULL; 996 } 997 998 /* 999 * Calculate data length and get a mbuf 1000 * for UDP and IP headers. 1001 */ 1002 M_PREPEND(m, sizeof(struct udpiphdr), M_NOWAIT); 1003 if (m == NULL) { 1004 error = ENOBUFS; 1005 goto release; 1006 } 1007 1008 /* 1009 * Fill in mbuf with extended UDP header 1010 * and addresses and length put into network format. 1011 */ 1012 ui = mtod(m, struct udpiphdr *); 1013 bzero(ui->ui_x1, sizeof ui->ui_x1); /* XXX still needed? */ 1014 ui->ui_pr = IPPROTO_UDP; 1015 1016 /* 1017 * Set destination address. 1018 */ 1019 if (dstaddr != NULL) { /* use specified destination */ 1020 ui->ui_dst = sin->sin_addr; 1021 ui->ui_dport = sin->sin_port; 1022 } else { /* use connected destination */ 1023 ui->ui_dst = inp->inp_faddr; 1024 ui->ui_dport = inp->inp_fport; 1025 } 1026 1027 /* 1028 * Set source address. 1029 */ 1030 if (inp->inp_laddr.s_addr == INADDR_ANY || 1031 IN_MULTICAST(ntohl(inp->inp_laddr.s_addr))) { 1032 struct sockaddr_in *if_sin; 1033 1034 if (dstaddr == NULL) { 1035 /* 1036 * connect() had (or should have) failed because 1037 * the interface had no IP address, but the 1038 * application proceeded to call send() anyways. 1039 */ 1040 error = ENOTCONN; 1041 goto release; 1042 } 1043 1044 /* Look up outgoing interface. */ 1045 error = in_pcbladdr_find(inp, dstaddr, &if_sin, td, 1); 1046 if (error) 1047 goto release; 1048 ui->ui_src = if_sin->sin_addr; /* use address of interface */ 1049 } else { 1050 ui->ui_src = inp->inp_laddr; /* use non-null bound address */ 1051 } 1052 ui->ui_sport = inp->inp_lport; 1053 KASSERT(inp->inp_lport != 0, ("inp lport should have been bound")); 1054 1055 /* 1056 * Release the original thread, since it is no longer used 1057 */ 1058 if (pru_flags & PRUS_HELDTD) { 1059 lwkt_rele(td); 1060 pru_flags &= ~PRUS_HELDTD; 1061 } 1062 /* 1063 * Free the dest address, since it is no longer needed 1064 */ 1065 if (pru_flags & PRUS_FREEADDR) { 1066 kfree(dstaddr, M_SONAME); 1067 pru_flags &= ~PRUS_FREEADDR; 1068 } 1069 1070 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); 1071 1072 /* 1073 * Set up checksum and output datagram. 1074 */ 1075 if (udpcksum) { 1076 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr, 1077 htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); 1078 m->m_pkthdr.csum_flags = CSUM_UDP; 1079 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 1080 m->m_pkthdr.csum_thlen = sizeof(struct udphdr); 1081 } else { 1082 ui->ui_sum = 0; 1083 } 1084 ((struct ip *)ui)->ip_len = sizeof(struct udpiphdr) + len; 1085 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ 1086 ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ 1087 udp_stat.udps_opackets++; 1088 1089 flags = IP_DEBUGROUTE | 1090 (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST)); 1091 if (pru_flags & PRUS_DONTROUTE) 1092 flags |= SO_DONTROUTE; 1093 1094 if (inp->inp_flags & INP_CONNECTED) { 1095 /* 1096 * For connected socket, this datagram has already 1097 * been in the correct netisr; no need to rehash. 1098 */ 1099 KASSERT(inp->inp_flags & INP_HASH, ("inpcb has no hash")); 1100 m_sethash(m, inp->inp_hashval); 1101 goto sendit; 1102 } 1103 1104 hash = udp_addrhash(ui->ui_dst.s_addr, ui->ui_dport, 1105 ui->ui_src.s_addr, ui->ui_sport); 1106 m_sethash(m, hash); 1107 1108 cpu = netisr_hashcpu(hash); 1109 if (cpu != mycpuid) { 1110 struct mbuf *m_opt = NULL; 1111 struct netmsg_pru_send *smsg; 1112 struct lwkt_port *port = netisr_cpuport(cpu); 1113 1114 /* 1115 * Not on the CPU that matches this UDP datagram hash; 1116 * redispatch to the correct CPU to do the ip_output(). 1117 */ 1118 if (inp->inp_options != NULL) { 1119 /* 1120 * If there are ip options, then save a copy, 1121 * since accessing inp_options on other CPUs' 1122 * is not safe. 1123 * 1124 * XXX optimize this? 1125 */ 1126 m_opt = m_copym(inp->inp_options, 0, M_COPYALL, 1127 M_WAITOK); 1128 } 1129 if ((pru_flags & PRUS_NOREPLY) == 0) { 1130 /* 1131 * Change some parts of the original netmsg and 1132 * forward it to the target netisr. 1133 * 1134 * NOTE: so_port MUST NOT be checked in the target 1135 * netisr. 1136 */ 1137 smsg = &msg->send; 1138 smsg->nm_priv = flags; /* ip_output flags */ 1139 smsg->nm_m = m; 1140 smsg->nm_control = m_opt; /* XXX save ipopt */ 1141 smsg->base.lmsg.ms_flags |= MSGF_IGNSOPORT; 1142 smsg->base.nm_dispatch = udp_send_redispatch; 1143 lwkt_forwardmsg(port, &smsg->base.lmsg); 1144 } else { 1145 /* 1146 * Recreate the netmsg, since the original mbuf 1147 * could have been changed. And send it to the 1148 * target netisr. 1149 * 1150 * NOTE: so_port MUST NOT be checked in the target 1151 * netisr. 1152 */ 1153 smsg = &m->m_hdr.mh_sndmsg; 1154 netmsg_init(&smsg->base, so, &netisr_apanic_rport, 1155 MSGF_IGNSOPORT, udp_send_redispatch); 1156 smsg->nm_priv = flags; /* ip_output flags */ 1157 smsg->nm_flags = pru_flags; 1158 smsg->nm_m = m; 1159 smsg->nm_control = m_opt; /* XXX save ipopt */ 1160 lwkt_sendmsg(port, &smsg->base.lmsg); 1161 } 1162 1163 /* This UDP datagram is redispatched; done */ 1164 logudp(send_redisp, inp); 1165 return; 1166 } 1167 1168 sendit: 1169 logudp(send_ipout, inp); 1170 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 1171 inp->inp_moptions, inp); 1172 m = NULL; 1173 1174 release: 1175 if (m != NULL) 1176 m_freem(m); 1177 1178 if (pru_flags & PRUS_HELDTD) 1179 lwkt_rele(td); 1180 if (pru_flags & PRUS_FREEADDR) 1181 kfree(dstaddr, M_SONAME); 1182 if ((pru_flags & PRUS_NOREPLY) == 0) 1183 lwkt_replymsg(&msg->send.base.lmsg, error); 1184 1185 logudp(send_end, inp); 1186 } 1187 1188 u_long udp_sendspace = 9216; /* really max datagram size */ 1189 /* 40 1K datagrams */ 1190 SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, 1191 &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); 1192 1193 u_long udp_recvspace = 40 * (1024 + 1194 #ifdef INET6 1195 sizeof(struct sockaddr_in6) 1196 #else 1197 sizeof(struct sockaddr_in) 1198 #endif 1199 ); 1200 SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1201 &udp_recvspace, 0, "Maximum incoming UDP datagram size"); 1202 1203 /* 1204 * This should never happen, since UDP socket does not support 1205 * connection acception (SO_ACCEPTCONN, i.e. listen(2)). 1206 */ 1207 static void 1208 udp_abort(netmsg_t msg __unused) 1209 { 1210 panic("udp_abort is called"); 1211 } 1212 1213 static int 1214 udp_preattach(struct socket *so, int proto __unused, struct pru_attach_info *ai) 1215 { 1216 return soreserve(so, udp_sendspace, udp_recvspace, ai->sb_rlimit); 1217 } 1218 1219 static void 1220 udp_attach(netmsg_t msg) 1221 { 1222 struct socket *so = msg->attach.base.nm_so; 1223 struct pru_attach_info *ai = msg->attach.nm_ai; 1224 struct inpcb *inp; 1225 int error; 1226 1227 KASSERT(so->so_pcb == NULL, ("udp socket attached")); 1228 1229 if (ai != NULL) { 1230 error = udp_preattach(so, 0 /* don't care */, ai); 1231 if (error) 1232 goto out; 1233 } else { 1234 /* Post attach; do nothing */ 1235 } 1236 1237 error = in_pcballoc(so, &udbinfo[mycpuid]); 1238 if (error) 1239 goto out; 1240 1241 inp = so->so_pcb; 1242 inp->inp_flags |= INP_DIRECT_DETACH; 1243 inp->inp_ip_ttl = ip_defttl; 1244 error = 0; 1245 out: 1246 lwkt_replymsg(&msg->attach.base.lmsg, error); 1247 } 1248 1249 static void 1250 udp_inswildcard_replymsg(netmsg_t msg) 1251 { 1252 lwkt_msg_t lmsg = &msg->lmsg; 1253 1254 if (lmsg->ms_flags & MSGF_UDP_SEND) { 1255 udp_send(msg); 1256 /* msg is replied by udp_send() */ 1257 } else { 1258 lwkt_replymsg(lmsg, lmsg->ms_error); 1259 } 1260 } 1261 1262 static void 1263 udp_soreuseport_dispatch(netmsg_t msg) 1264 { 1265 /* This inpcb has already been in the wildcard hash. */ 1266 in_pcblink_flags(msg->base.nm_so->so_pcb, &udbinfo[mycpuid], 0); 1267 udp_inswildcard_replymsg(msg); 1268 } 1269 1270 static void 1271 udp_sosetport(struct lwkt_msg *msg, lwkt_port_t port) 1272 { 1273 sosetport(((struct netmsg_base *)msg)->nm_so, port); 1274 } 1275 1276 static boolean_t 1277 udp_inswildcardhash_oncpu(struct inpcb *inp, struct netmsg_base *msg) 1278 { 1279 int cpu; 1280 1281 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1282 ("not on owner cpu")); 1283 1284 in_pcbinswildcardhash(inp); 1285 for (cpu = 0; cpu < ncpus2; ++cpu) { 1286 if (cpu == mycpuid) { 1287 /* 1288 * This inpcb has been inserted by the above 1289 * in_pcbinswildcardhash(). 1290 */ 1291 continue; 1292 } 1293 in_pcbinswildcardhash_oncpu(inp, &udbinfo[cpu]); 1294 } 1295 1296 if (inp->inp_socket->so_options & SO_REUSEPORT) { 1297 /* 1298 * For SO_REUSEPORT socket, redistribute it based on its 1299 * local group index. 1300 */ 1301 cpu = inp->inp_lgrpindex & ncpus2_mask; 1302 if (cpu != mycpuid) { 1303 struct lwkt_port *port = netisr_cpuport(cpu); 1304 lwkt_msg_t lmsg = &msg->lmsg; 1305 1306 /* 1307 * We are moving the protocol processing port the 1308 * socket is on, we have to unlink here and re-link 1309 * on the target cpu (this inpcb is still left in 1310 * the wildcard hash). 1311 */ 1312 in_pcbunlink_flags(inp, &udbinfo[mycpuid], 0); 1313 msg->nm_dispatch = udp_soreuseport_dispatch; 1314 1315 /* 1316 * See the related comment in tcp_usrreq.c 1317 * tcp_connect() 1318 */ 1319 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1320 lwkt_forwardmsg(port, lmsg); 1321 return TRUE; /* forwarded */ 1322 } 1323 } 1324 return FALSE; 1325 } 1326 1327 static void 1328 udp_inswildcardhash_dispatch(netmsg_t msg) 1329 { 1330 struct inpcb *inp = msg->base.nm_so->so_pcb; 1331 boolean_t forwarded; 1332 1333 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1334 KASSERT(udp_lportcpu(inp->inp_lport) == mycpuid, ("not target cpu")); 1335 1336 in_pcblink(inp, &udbinfo[mycpuid]); 1337 1338 forwarded = udp_inswildcardhash_oncpu(inp, &msg->base); 1339 if (forwarded) { 1340 /* The message is further forwarded, so we are done here. */ 1341 return; 1342 } 1343 udp_inswildcard_replymsg(msg); 1344 } 1345 1346 static boolean_t 1347 udp_inswildcardhash(struct inpcb *inp, struct netmsg_base *msg, int error) 1348 { 1349 lwkt_msg_t lmsg = &msg->lmsg; 1350 int cpu; 1351 1352 ASSERT_INP_NOTINHASH(inp); 1353 1354 /* This inpcb could no longer be directly detached */ 1355 inp->inp_flags &= ~INP_DIRECT_DETACH; 1356 1357 /* 1358 * Always clear the route cache, so we don't need to 1359 * worry about any owner CPU changes later. 1360 */ 1361 in_pcbresetroute(inp); 1362 1363 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1364 cpu = udp_lportcpu(inp->inp_lport); 1365 1366 lmsg->ms_error = error; 1367 if (cpu != mycpuid) { 1368 struct lwkt_port *port = netisr_cpuport(cpu); 1369 1370 /* 1371 * We are moving the protocol processing port the socket 1372 * is on, we have to unlink here and re-link on the 1373 * target cpu. 1374 */ 1375 in_pcbunlink(inp, &udbinfo[mycpuid]); 1376 msg->nm_dispatch = udp_inswildcardhash_dispatch; 1377 1378 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1379 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1380 lwkt_forwardmsg(port, lmsg); 1381 return TRUE; /* forwarded */ 1382 } 1383 1384 return udp_inswildcardhash_oncpu(inp, msg); 1385 } 1386 1387 static void 1388 udp_bind(netmsg_t msg) 1389 { 1390 struct socket *so = msg->bind.base.nm_so; 1391 struct inpcb *inp; 1392 int error; 1393 1394 inp = so->so_pcb; 1395 if (inp) { 1396 struct sockaddr *nam = msg->bind.nm_nam; 1397 struct thread *td = msg->bind.nm_td; 1398 struct sockaddr_in *sin; 1399 lwkt_port_t port; 1400 int cpu; 1401 1402 /* 1403 * Check "already bound" here (in_pcbbind() does the same 1404 * check though), so we don't forward a connected/bound 1405 * socket randomly which would panic in the following 1406 * in_pcbunlink(). 1407 */ 1408 if (inp->inp_lport != 0 || 1409 inp->inp_laddr.s_addr != INADDR_ANY) { 1410 error = EINVAL; /* already bound */ 1411 goto done; 1412 } 1413 1414 if (nam->sa_len != sizeof(*sin)) { 1415 error = EINVAL; 1416 goto done; 1417 } 1418 sin = (struct sockaddr_in *)nam; 1419 1420 cpu = udp_lportcpu(sin->sin_port); 1421 port = netisr_cpuport(cpu); 1422 1423 /* 1424 * See the related comment in tcp_usrreq.c tcp_usr_bind(). 1425 * The exception is that we use local port based netisr 1426 * to serialize in_pcbbind(). 1427 */ 1428 if (&curthread->td_msgport != port) { 1429 lwkt_msg_t lmsg = &msg->bind.base.lmsg; 1430 1431 KASSERT((msg->bind.nm_flags & PRUB_RELINK) == 0, 1432 ("already asked to relink")); 1433 1434 in_pcbunlink(so->so_pcb, &udbinfo[mycpuid]); 1435 msg->bind.nm_flags |= PRUB_RELINK; 1436 1437 /* 1438 * See the related comment in tcp_usrreq.c 1439 * tcp_connect(). 1440 */ 1441 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1442 lwkt_forwardmsg(port, lmsg); 1443 /* msg invalid now */ 1444 return; 1445 } 1446 KASSERT(so->so_port == port, ("so_port is not netisr%d", cpu)); 1447 1448 if (msg->bind.nm_flags & PRUB_RELINK) { 1449 msg->bind.nm_flags &= ~PRUB_RELINK; 1450 in_pcblink(so->so_pcb, &udbinfo[mycpuid]); 1451 } 1452 KASSERT(inp->inp_pcbinfo == &udbinfo[cpu], 1453 ("pcbinfo is not udbinfo%d", cpu)); 1454 1455 error = in_pcbbind(inp, nam, td); 1456 if (error == 0) { 1457 boolean_t forwarded; 1458 1459 if (sin->sin_addr.s_addr != INADDR_ANY) 1460 inp->inp_flags |= INP_WASBOUND_NOTANY; 1461 1462 forwarded = udp_inswildcardhash(inp, 1463 &msg->bind.base, 0); 1464 if (forwarded) { 1465 /* 1466 * The message is further forwarded, so 1467 * we are done here. 1468 */ 1469 return; 1470 } 1471 } 1472 } else { 1473 error = EINVAL; 1474 } 1475 done: 1476 lwkt_replymsg(&msg->bind.base.lmsg, error); 1477 } 1478 1479 static int 1480 udp_preconnect(struct socket *so, const struct sockaddr *nam __unused, 1481 struct thread *td __unused) 1482 { 1483 sosetstate(so, SS_ISCONNECTED); /* XXX */ 1484 return 0; 1485 } 1486 1487 static void 1488 udp_connect(netmsg_t msg) 1489 { 1490 struct socket *so = msg->connect.base.nm_so; 1491 struct sockaddr *nam = msg->connect.nm_nam; 1492 struct thread *td = msg->connect.nm_td; 1493 struct inpcb *inp; 1494 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 1495 struct sockaddr_in *if_sin; 1496 struct lwkt_port *port; 1497 uint16_t hash; 1498 int error; 1499 1500 KKASSERT(msg->connect.nm_m == NULL); 1501 1502 inp = so->so_pcb; 1503 if (inp == NULL) { 1504 error = EINVAL; 1505 goto out; 1506 } 1507 1508 if (msg->connect.nm_flags & PRUC_RECONNECT) { 1509 msg->connect.nm_flags &= ~PRUC_RECONNECT; 1510 in_pcblink(inp, &udbinfo[mycpuid]); 1511 } 1512 1513 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1514 error = EISCONN; 1515 goto out; 1516 } 1517 error = 0; 1518 1519 /* 1520 * Bind if we have to 1521 */ 1522 if (inp->inp_lport == 0) { 1523 error = in_pcbbind(inp, NULL, td); 1524 if (error) 1525 goto out; 1526 } 1527 1528 /* 1529 * Calculate the correct protocol processing thread. The connect 1530 * operation must run there. 1531 */ 1532 error = in_pcbladdr(inp, nam, &if_sin, td); 1533 if (error) 1534 goto out; 1535 if (!prison_remote_ip(td, nam)) { 1536 error = EAFNOSUPPORT; /* IPv6 only jail */ 1537 goto out; 1538 } 1539 1540 hash = udp_addrhash(sin->sin_addr.s_addr, sin->sin_port, 1541 inp->inp_laddr.s_addr != INADDR_ANY ? 1542 inp->inp_laddr.s_addr : if_sin->sin_addr.s_addr, inp->inp_lport); 1543 port = netisr_hashport(hash); 1544 if (port != &curthread->td_msgport) { 1545 lwkt_msg_t lmsg = &msg->connect.base.lmsg; 1546 int nm_flags = PRUC_RECONNECT; 1547 1548 /* 1549 * in_pcbladdr() may have allocated a route entry for us 1550 * on the current CPU, but we need a route entry on the 1551 * inpcb's owner CPU, so free it here. 1552 */ 1553 in_pcbresetroute(inp); 1554 1555 if (inp->inp_flags & INP_WILDCARD) { 1556 /* 1557 * Remove this inpcb from the wildcard hash before 1558 * the socket's msgport changes. 1559 */ 1560 udp_remwildcardhash(inp); 1561 } 1562 1563 if (so->so_orig_port == NULL) { 1564 /* 1565 * First time change protocol processing port. 1566 * Save the current port for synchronization upon 1567 * udp_detach. 1568 */ 1569 so->so_orig_port = &curthread->td_msgport; 1570 } else { 1571 /* 1572 * We have changed protocol processing port more 1573 * than once. We could not do direct detach 1574 * anymore, because we lose the track of the 1575 * original protocol processing ports to perform 1576 * synchronization upon udp_detach. This should 1577 * be rare though. 1578 */ 1579 inp->inp_flags &= ~INP_DIRECT_DETACH; 1580 } 1581 1582 /* 1583 * We are moving the protocol processing port the socket 1584 * is on, we have to unlink here and re-link on the 1585 * target cpu. 1586 */ 1587 in_pcbunlink(inp, &udbinfo[mycpuid]); 1588 msg->connect.nm_flags |= nm_flags; 1589 1590 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1591 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1592 lwkt_forwardmsg(port, lmsg); 1593 /* msg invalid now */ 1594 return; 1595 } 1596 error = udp_connect_oncpu(inp, sin, if_sin, hash); 1597 out: 1598 if (msg->connect.nm_flags & PRUC_HELDTD) 1599 lwkt_rele(td); 1600 if (error && (msg->connect.nm_flags & PRUC_ASYNC)) { 1601 if (inp->inp_lport == 0) { 1602 /* 1603 * As long as we have the local port, it is fine 1604 * for connect to fail, e.g. disconnect. 1605 */ 1606 so->so_error = error; 1607 } 1608 soclrstate(so, SS_ISCONNECTED); 1609 /* 1610 * Wake up callers blocked on this socket to make sure 1611 * that they can see this error. 1612 * 1613 * NOTE: 1614 * sodisconnected() can't be used here, which bricks 1615 * sending and receiving. 1616 */ 1617 wakeup(&so->so_timeo); 1618 sowwakeup(so); 1619 sorwakeup(so); 1620 } 1621 if (error && inp != NULL && inp->inp_lport != 0 && 1622 (inp->inp_flags & INP_WILDCARD) == 0) { 1623 boolean_t forwarded; 1624 1625 /* Connect failed; put it to wildcard hash. */ 1626 forwarded = udp_inswildcardhash(inp, &msg->connect.base, 1627 error); 1628 if (forwarded) { 1629 /* 1630 * The message is further forwarded, so we are done 1631 * here. 1632 */ 1633 return; 1634 } 1635 } 1636 lwkt_replymsg(&msg->connect.base.lmsg, error); 1637 } 1638 1639 static void 1640 udp_remwildcardhash(struct inpcb *inp) 1641 { 1642 int cpu; 1643 1644 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1645 ("not on owner cpu")); 1646 1647 for (cpu = 0; cpu < ncpus2; ++cpu) { 1648 if (cpu == mycpuid) { 1649 /* 1650 * This inpcb will be removed by the later 1651 * in_pcbremwildcardhash(). 1652 */ 1653 continue; 1654 } 1655 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpu]); 1656 } 1657 in_pcbremwildcardhash(inp); 1658 } 1659 1660 static int 1661 udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 1662 struct sockaddr_in *if_sin, uint16_t hash) 1663 { 1664 struct socket *so = inp->inp_socket; 1665 struct inpcb *oinp; 1666 1667 oinp = in_pcblookup_hash(inp->inp_pcbinfo, 1668 sin->sin_addr, sin->sin_port, 1669 inp->inp_laddr.s_addr != INADDR_ANY ? 1670 inp->inp_laddr : if_sin->sin_addr, inp->inp_lport, FALSE, NULL); 1671 if (oinp != NULL) 1672 return EADDRINUSE; 1673 1674 /* 1675 * No more errors can occur, finish adjusting the socket 1676 * and change the processing port to reflect the connected 1677 * socket. Once set we can no longer safely mess with the 1678 * socket. 1679 */ 1680 1681 if (inp->inp_flags & INP_WILDCARD) 1682 udp_remwildcardhash(inp); 1683 1684 if (inp->inp_laddr.s_addr == INADDR_ANY) 1685 inp->inp_laddr = if_sin->sin_addr; 1686 inp->inp_faddr = sin->sin_addr; 1687 inp->inp_fport = sin->sin_port; 1688 in_pcbinsconnhash(inp); 1689 1690 inp->inp_flags |= INP_HASH; 1691 inp->inp_hashval = hash; 1692 1693 soisconnected(so); 1694 1695 return 0; 1696 } 1697 1698 static void 1699 udp_detach2(struct socket *so) 1700 { 1701 in_pcbdetach(so->so_pcb); 1702 sodiscard(so); 1703 sofree(so); 1704 } 1705 1706 static void 1707 udp_detach_final_dispatch(netmsg_t msg) 1708 { 1709 udp_detach2(msg->base.nm_so); 1710 } 1711 1712 static void 1713 udp_detach_oncpu_dispatch(netmsg_t msg) 1714 { 1715 struct netmsg_base *clomsg = &msg->base; 1716 struct socket *so = clomsg->nm_so; 1717 struct inpcb *inp = so->so_pcb; 1718 struct thread *td = curthread; 1719 int nextcpu, cpuid = mycpuid; 1720 1721 KASSERT(td->td_type == TD_TYPE_NETISR, ("not in netisr")); 1722 1723 if (inp->inp_flags & INP_WILDCARD) { 1724 /* 1725 * This inp will be removed on the inp's 1726 * owner CPU later, so don't do it now. 1727 */ 1728 if (&td->td_msgport != so->so_port) 1729 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpuid]); 1730 } 1731 1732 if (cpuid == 0) { 1733 /* 1734 * Free and clear multicast socket option, 1735 * which is only accessed in netisr0. 1736 */ 1737 ip_freemoptions(inp->inp_moptions); 1738 inp->inp_moptions = NULL; 1739 } 1740 1741 nextcpu = cpuid + 1; 1742 if (nextcpu < ncpus2) { 1743 lwkt_forwardmsg(netisr_cpuport(nextcpu), &clomsg->lmsg); 1744 } else { 1745 /* 1746 * No one could see this inpcb now; destroy this 1747 * inpcb in its owner netisr. 1748 */ 1749 netmsg_init(clomsg, so, &netisr_apanic_rport, 0, 1750 udp_detach_final_dispatch); 1751 lwkt_sendmsg(so->so_port, &clomsg->lmsg); 1752 } 1753 } 1754 1755 static void 1756 udp_detach_syncorig_dispatch(netmsg_t msg) 1757 { 1758 struct netmsg_base *clomsg = &msg->base; 1759 struct socket *so = clomsg->nm_so; 1760 1761 /* 1762 * Original protocol processing port is synchronized; 1763 * destroy this inpcb in its owner netisr. 1764 */ 1765 netmsg_init(clomsg, so, &netisr_apanic_rport, 0, 1766 udp_detach_final_dispatch); 1767 lwkt_sendmsg(so->so_port, &clomsg->lmsg); 1768 } 1769 1770 static void 1771 udp_detach(netmsg_t msg) 1772 { 1773 struct socket *so = msg->detach.base.nm_so; 1774 struct netmsg_base *clomsg; 1775 struct inpcb *inp; 1776 1777 inp = so->so_pcb; 1778 if (inp == NULL) { 1779 lwkt_replymsg(&msg->detach.base.lmsg, EINVAL); 1780 return; 1781 } 1782 1783 /* 1784 * Reply EJUSTRETURN ASAP, we will call sodiscard() and 1785 * sofree() later. 1786 */ 1787 lwkt_replymsg(&msg->detach.base.lmsg, EJUSTRETURN); 1788 1789 if (ncpus2 == 1) { 1790 /* Only one CPU, detach the inpcb directly. */ 1791 udp_detach2(so); 1792 return; 1793 } 1794 1795 /* 1796 * Remove this inpcb from the inpcb list first, so that 1797 * no one could find this inpcb from the inpcb list. 1798 */ 1799 in_pcbofflist(inp); 1800 1801 /* 1802 * Remove this inpcb from the local port hash directly 1803 * here, so that its bound local port could be recycled 1804 * timely. 1805 */ 1806 in_pcbremporthash(inp); 1807 1808 if (inp->inp_flags & INP_DIRECT_DETACH) { 1809 /* 1810 * Direct detaching is allowed 1811 */ 1812 KASSERT((inp->inp_flags & INP_WILDCARD) == 0, 1813 ("in the wildcardhash")); 1814 KASSERT(inp->inp_moptions == NULL, ("has mcast options")); 1815 if (so->so_orig_port == NULL) { 1816 udp_detach2(so); 1817 } else { 1818 /* 1819 * Protocol processing port changed once, so 1820 * we need to make sure that there are nothing 1821 * left on the original protocol processing 1822 * port before we destroy this socket and inpcb. 1823 * This is more lightweight than going through 1824 * all UDP processing netisrs. 1825 */ 1826 clomsg = &so->so_clomsg; 1827 netmsg_init(clomsg, so, &netisr_apanic_rport, 1828 MSGF_IGNSOPORT, udp_detach_syncorig_dispatch); 1829 lwkt_sendmsg(so->so_orig_port, &clomsg->lmsg); 1830 } 1831 return; 1832 } 1833 1834 /* 1835 * Go through netisrs which process UDP to make sure 1836 * no one could find this inpcb anymore. 1837 */ 1838 clomsg = &so->so_clomsg; 1839 netmsg_init(clomsg, so, &netisr_apanic_rport, MSGF_IGNSOPORT, 1840 udp_detach_oncpu_dispatch); 1841 lwkt_sendmsg(netisr_cpuport(0), &clomsg->lmsg); 1842 } 1843 1844 static void 1845 udp_disconnect(netmsg_t msg) 1846 { 1847 struct socket *so = msg->disconnect.base.nm_so; 1848 struct inpcb *inp; 1849 boolean_t forwarded; 1850 int error = 0; 1851 1852 inp = so->so_pcb; 1853 if (inp == NULL) { 1854 error = EINVAL; 1855 goto out; 1856 } 1857 if (inp->inp_faddr.s_addr == INADDR_ANY) { 1858 error = ENOTCONN; 1859 goto out; 1860 } 1861 1862 soclrstate(so, SS_ISCONNECTED); /* XXX */ 1863 1864 in_pcbdisconnect(inp); 1865 inp->inp_flags &= ~INP_HASH; 1866 1867 /* 1868 * Follow traditional BSD behavior and retain the local port 1869 * binding. But, fix the old misbehavior of overwriting any 1870 * previously bound local address. 1871 */ 1872 if (!(inp->inp_flags & INP_WASBOUND_NOTANY)) 1873 inp->inp_laddr.s_addr = INADDR_ANY; 1874 1875 if (so->so_state & SS_ISCLOSING) { 1876 /* 1877 * If this socket is being closed, there is no need 1878 * to put this socket back into wildcard hash table. 1879 */ 1880 error = 0; 1881 goto out; 1882 } 1883 1884 forwarded = udp_inswildcardhash(inp, &msg->disconnect.base, 0); 1885 if (forwarded) { 1886 /* 1887 * The message is further forwarded, so we are done 1888 * here. 1889 */ 1890 return; 1891 } 1892 out: 1893 lwkt_replymsg(&msg->disconnect.base.lmsg, error); 1894 } 1895 1896 void 1897 udp_shutdown(netmsg_t msg) 1898 { 1899 struct socket *so = msg->shutdown.base.nm_so; 1900 struct inpcb *inp; 1901 int error; 1902 1903 inp = so->so_pcb; 1904 if (inp) { 1905 socantsendmore(so); 1906 error = 0; 1907 } else { 1908 error = EINVAL; 1909 } 1910 lwkt_replymsg(&msg->shutdown.base.lmsg, error); 1911 } 1912 1913 struct pr_usrreqs udp_usrreqs = { 1914 .pru_abort = udp_abort, 1915 .pru_accept = pr_generic_notsupp, 1916 .pru_attach = udp_attach, 1917 .pru_bind = udp_bind, 1918 .pru_connect = udp_connect, 1919 .pru_connect2 = pr_generic_notsupp, 1920 .pru_control = in_control_dispatch, 1921 .pru_detach = udp_detach, 1922 .pru_disconnect = udp_disconnect, 1923 .pru_listen = pr_generic_notsupp, 1924 .pru_peeraddr = in_setpeeraddr_dispatch, 1925 .pru_rcvd = pr_generic_notsupp, 1926 .pru_rcvoob = pr_generic_notsupp, 1927 .pru_send = udp_send, 1928 .pru_sense = pru_sense_null, 1929 .pru_shutdown = udp_shutdown, 1930 .pru_sockaddr = in_setsockaddr_dispatch, 1931 .pru_sosend = sosendudp, 1932 .pru_soreceive = soreceive, 1933 .pru_preconnect = udp_preconnect, 1934 .pru_preattach = udp_preattach 1935 }; 1936