1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 63 * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.64.2.18 2003/01/24 05:11:34 sam Exp $ 64 */ 65 66 #include "opt_ipsec.h" 67 #include "opt_inet6.h" 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/kernel.h> 72 #include <sys/malloc.h> 73 #include <sys/mbuf.h> 74 #include <sys/domain.h> 75 #include <sys/proc.h> 76 #include <sys/priv.h> 77 #include <sys/protosw.h> 78 #include <sys/socket.h> 79 #include <sys/socketvar.h> 80 #include <sys/sysctl.h> 81 #include <sys/syslog.h> 82 #include <sys/in_cksum.h> 83 #include <sys/ktr.h> 84 85 #include <sys/thread2.h> 86 #include <sys/socketvar2.h> 87 #include <sys/serialize.h> 88 89 #include <machine/stdarg.h> 90 91 #include <net/if.h> 92 #include <net/route.h> 93 #include <net/netmsg2.h> 94 #include <net/netisr2.h> 95 96 #include <netinet/in.h> 97 #include <netinet/in_systm.h> 98 #include <netinet/ip.h> 99 #ifdef INET6 100 #include <netinet/ip6.h> 101 #endif 102 #include <netinet/in_pcb.h> 103 #include <netinet/in_var.h> 104 #include <netinet/ip_var.h> 105 #ifdef INET6 106 #include <netinet6/ip6_var.h> 107 #endif 108 #include <netinet/ip_icmp.h> 109 #include <netinet/icmp_var.h> 110 #include <netinet/udp.h> 111 #include <netinet/udp_var.h> 112 113 #ifdef FAST_IPSEC 114 #include <netproto/ipsec/ipsec.h> 115 #endif 116 117 #ifdef IPSEC 118 #include <netinet6/ipsec.h> 119 #endif 120 121 #define MSGF_UDP_SEND MSGF_PROTO1 122 123 #define UDP_KTR_STRING "inp=%p" 124 #define UDP_KTR_ARGS struct inpcb *inp 125 126 #ifndef KTR_UDP 127 #define KTR_UDP KTR_ALL 128 #endif 129 130 KTR_INFO_MASTER(udp); 131 KTR_INFO(KTR_UDP, udp, send_beg, 0, UDP_KTR_STRING, UDP_KTR_ARGS); 132 KTR_INFO(KTR_UDP, udp, send_end, 1, UDP_KTR_STRING, UDP_KTR_ARGS); 133 KTR_INFO(KTR_UDP, udp, send_ipout, 2, UDP_KTR_STRING, UDP_KTR_ARGS); 134 KTR_INFO(KTR_UDP, udp, redisp_ipout_beg, 3, UDP_KTR_STRING, UDP_KTR_ARGS); 135 KTR_INFO(KTR_UDP, udp, redisp_ipout_end, 4, UDP_KTR_STRING, UDP_KTR_ARGS); 136 KTR_INFO(KTR_UDP, udp, send_redisp, 5, UDP_KTR_STRING, UDP_KTR_ARGS); 137 KTR_INFO(KTR_UDP, udp, send_inswildcard, 6, UDP_KTR_STRING, UDP_KTR_ARGS); 138 139 #define logudp(name, inp) KTR_LOG(udp_##name, inp) 140 141 /* 142 * UDP protocol implementation. 143 * Per RFC 768, August, 1980. 144 */ 145 #ifndef COMPAT_42 146 static int udpcksum = 1; 147 #else 148 static int udpcksum = 0; /* XXX */ 149 #endif 150 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, 151 &udpcksum, 0, "Enable checksumming of UDP packets"); 152 153 int log_in_vain = 0; 154 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, 155 &log_in_vain, 0, "Log all incoming UDP packets"); 156 157 static int blackhole = 0; 158 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, 159 &blackhole, 0, "Do not send port unreachables for refused connects"); 160 161 static int strict_mcast_mship = 1; 162 SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW, 163 &strict_mcast_mship, 0, "Only send multicast to member sockets"); 164 165 int udp_sosend_async = 1; 166 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_async, CTLFLAG_RW, 167 &udp_sosend_async, 0, "UDP asynchronized pru_send"); 168 169 int udp_sosend_prepend = 1; 170 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_prepend, CTLFLAG_RW, 171 &udp_sosend_prepend, 0, 172 "Prepend enough space for proto and link header in pru_send"); 173 174 static int udp_reuseport_ext = 1; 175 SYSCTL_INT(_net_inet_udp, OID_AUTO, reuseport_ext, CTLFLAG_RW, 176 &udp_reuseport_ext, 0, "SO_REUSEPORT extension"); 177 178 struct inpcbinfo udbinfo[MAXCPU]; 179 180 #ifndef UDBHASHSIZE 181 #define UDBHASHSIZE 16 182 #endif 183 184 struct udpstat udpstat_percpu[MAXCPU] __cachealign; 185 186 #ifdef INET6 187 struct udp_in6 { 188 struct sockaddr_in6 uin6_sin; 189 u_char uin6_init_done : 1; 190 }; 191 struct udp_ip6 { 192 struct ip6_hdr uip6_ip6; 193 u_char uip6_init_done : 1; 194 }; 195 #else 196 struct udp_in6; 197 struct udp_ip6; 198 #endif /* INET6 */ 199 200 static void udp_append (struct inpcb *last, struct ip *ip, 201 struct mbuf *n, int off, struct sockaddr_in *udp_in, 202 struct udp_in6 *, struct udp_ip6 *); 203 #ifdef INET6 204 static void ip_2_ip6_hdr (struct ip6_hdr *ip6, struct ip *ip); 205 #endif 206 207 static int udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 208 struct sockaddr_in *if_sin); 209 210 static boolean_t udp_inswildcardhash(struct inpcb *inp, 211 struct netmsg_base *msg, int error); 212 static void udp_remwildcardhash(struct inpcb *inp); 213 214 void 215 udp_init(void) 216 { 217 struct inpcbportinfo *portinfo; 218 int cpu; 219 220 portinfo = kmalloc_cachealign(sizeof(*portinfo) * ncpus2, M_PCB, 221 M_WAITOK); 222 223 for (cpu = 0; cpu < ncpus2; cpu++) { 224 struct inpcbinfo *uicb = &udbinfo[cpu]; 225 226 /* 227 * NOTE: 228 * UDP pcb list, wildcard hash table and localgroup hash 229 * table are shared. 230 */ 231 in_pcbinfo_init(uicb, cpu, TRUE); 232 uicb->hashbase = hashinit(UDBHASHSIZE, M_PCB, &uicb->hashmask); 233 234 in_pcbportinfo_init(&portinfo[cpu], UDBHASHSIZE, TRUE, cpu); 235 uicb->portinfo = portinfo; 236 uicb->portinfo_mask = ncpus2_mask; 237 238 uicb->wildcardhashbase = hashinit(UDBHASHSIZE, M_PCB, 239 &uicb->wildcardhashmask); 240 uicb->localgrphashbase = hashinit(UDBHASHSIZE, M_PCB, 241 &uicb->localgrphashmask); 242 243 uicb->ipi_size = sizeof(struct inpcb); 244 } 245 246 /* 247 * Initialize UDP statistics counters for each CPU. 248 */ 249 for (cpu = 0; cpu < ncpus; ++cpu) 250 bzero(&udpstat_percpu[cpu], sizeof(struct udpstat)); 251 } 252 253 static int 254 sysctl_udpstat(SYSCTL_HANDLER_ARGS) 255 { 256 int cpu, error = 0; 257 258 for (cpu = 0; cpu < ncpus; ++cpu) { 259 if ((error = SYSCTL_OUT(req, &udpstat_percpu[cpu], 260 sizeof(struct udpstat)))) 261 break; 262 if ((error = SYSCTL_IN(req, &udpstat_percpu[cpu], 263 sizeof(struct udpstat)))) 264 break; 265 } 266 267 return (error); 268 } 269 SYSCTL_PROC(_net_inet_udp, UDPCTL_STATS, stats, (CTLTYPE_OPAQUE | CTLFLAG_RW), 270 0, 0, sysctl_udpstat, "S,udpstat", "UDP statistics"); 271 272 /* 273 * Check multicast packets to make sure they are only sent to sockets with 274 * multicast memberships for the packet's destination address and arrival 275 * interface. Multicast packets to multicast-unaware sockets are also 276 * disallowed. 277 * 278 * Returns 0 if the packet is acceptable, -1 if it is not. 279 */ 280 static __inline int 281 check_multicast_membership(const struct ip *ip, const struct inpcb *inp, 282 const struct mbuf *m) 283 { 284 const struct ip_moptions *mopt; 285 int mshipno; 286 287 if (strict_mcast_mship == 0 || 288 !IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 289 return (0); 290 } 291 292 KASSERT(&curthread->td_msgport == netisr_cpuport(0), 293 ("multicast input not in netisr0")); 294 295 mopt = inp->inp_moptions; 296 if (mopt == NULL) 297 return (-1); 298 for (mshipno = 0; mshipno < mopt->imo_num_memberships; ++mshipno) { 299 const struct in_multi *maddr = mopt->imo_membership[mshipno]; 300 301 if (ip->ip_dst.s_addr == maddr->inm_addr.s_addr && 302 m->m_pkthdr.rcvif == maddr->inm_ifp) { 303 return (0); 304 } 305 } 306 return (-1); 307 } 308 309 struct udp_mcast_arg { 310 struct inpcb *inp; 311 struct inpcb *last; 312 struct ip *ip; 313 struct mbuf *m; 314 int iphlen; 315 struct sockaddr_in *udp_in; 316 #ifdef INET6 317 struct udp_in6 *udp_in6; 318 struct udp_ip6 *udp_ip6; 319 #endif 320 }; 321 322 static int 323 udp_mcast_input(struct udp_mcast_arg *arg) 324 { 325 struct inpcb *inp = arg->inp; 326 struct inpcb *last = arg->last; 327 struct ip *ip = arg->ip; 328 struct mbuf *m = arg->m; 329 330 if (check_multicast_membership(ip, inp, m) < 0) 331 return ERESTART; /* caller continue */ 332 333 if (last != NULL) { 334 struct mbuf *n; 335 336 #ifdef IPSEC 337 /* check AH/ESP integrity. */ 338 if (ipsec4_in_reject_so(m, last->inp_socket)) 339 ipsecstat.in_polvio++; 340 /* do not inject data to pcb */ 341 else 342 #endif /*IPSEC*/ 343 #ifdef FAST_IPSEC 344 /* check AH/ESP integrity. */ 345 if (ipsec4_in_reject(m, last)) 346 ; 347 else 348 #endif /*FAST_IPSEC*/ 349 if ((n = m_copypacket(m, MB_DONTWAIT)) != NULL) 350 udp_append(last, ip, n, 351 arg->iphlen + sizeof(struct udphdr), 352 arg->udp_in, 353 #ifdef INET6 354 arg->udp_in6, arg->udp_ip6 355 #else 356 NULL, NULL 357 #endif 358 ); 359 } 360 arg->last = last = inp; 361 362 /* 363 * Don't look for additional matches if this one does 364 * not have either the SO_REUSEPORT or SO_REUSEADDR 365 * socket options set. This heuristic avoids searching 366 * through all pcbs in the common case of a non-shared 367 * port. It * assumes that an application will never 368 * clear these options after setting them. 369 */ 370 if (!(last->inp_socket->so_options & 371 (SO_REUSEPORT | SO_REUSEADDR))) 372 return EJUSTRETURN; /* caller stop */ 373 return 0; 374 } 375 376 int 377 udp_input(struct mbuf **mp, int *offp, int proto) 378 { 379 struct sockaddr_in udp_in = { sizeof udp_in, AF_INET }; 380 #ifdef INET6 381 struct udp_in6 udp_in6 = { 382 { sizeof udp_in6.uin6_sin, AF_INET6 }, 0 383 }; 384 struct udp_ip6 udp_ip6; 385 #endif 386 387 int iphlen; 388 struct ip *ip; 389 struct udphdr *uh; 390 struct inpcb *inp; 391 struct mbuf *m; 392 struct mbuf *opts = NULL; 393 int len, off; 394 struct ip save_ip; 395 struct sockaddr *append_sa; 396 struct inpcbinfo *pcbinfo = &udbinfo[mycpuid]; 397 398 off = *offp; 399 m = *mp; 400 *mp = NULL; 401 402 iphlen = off; 403 udp_stat.udps_ipackets++; 404 405 /* 406 * Strip IP options, if any; should skip this, 407 * make available to user, and use on returned packets, 408 * but we don't yet have a way to check the checksum 409 * with options still present. 410 */ 411 if (iphlen > sizeof(struct ip)) { 412 ip_stripoptions(m); 413 iphlen = sizeof(struct ip); 414 } 415 416 /* 417 * IP and UDP headers are together in first mbuf. 418 * Already checked and pulled up in ip_demux(). 419 */ 420 KASSERT(m->m_len >= iphlen + sizeof(struct udphdr), 421 ("UDP header not in one mbuf")); 422 423 ip = mtod(m, struct ip *); 424 uh = (struct udphdr *)((caddr_t)ip + iphlen); 425 426 /* destination port of 0 is illegal, based on RFC768. */ 427 if (uh->uh_dport == 0) 428 goto bad; 429 430 /* 431 * Make mbuf data length reflect UDP length. 432 * If not enough data to reflect UDP length, drop. 433 */ 434 len = ntohs((u_short)uh->uh_ulen); 435 if (ip->ip_len != len) { 436 if (len > ip->ip_len || len < sizeof(struct udphdr)) { 437 udp_stat.udps_badlen++; 438 goto bad; 439 } 440 m_adj(m, len - ip->ip_len); 441 /* ip->ip_len = len; */ 442 } 443 /* 444 * Save a copy of the IP header in case we want restore it 445 * for sending an ICMP error message in response. 446 */ 447 save_ip = *ip; 448 449 /* 450 * Checksum extended UDP header and data. 451 */ 452 if (uh->uh_sum) { 453 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 454 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) 455 uh->uh_sum = m->m_pkthdr.csum_data; 456 else 457 uh->uh_sum = in_pseudo(ip->ip_src.s_addr, 458 ip->ip_dst.s_addr, htonl((u_short)len + 459 m->m_pkthdr.csum_data + IPPROTO_UDP)); 460 uh->uh_sum ^= 0xffff; 461 } else { 462 char b[9]; 463 464 bcopy(((struct ipovly *)ip)->ih_x1, b, 9); 465 bzero(((struct ipovly *)ip)->ih_x1, 9); 466 ((struct ipovly *)ip)->ih_len = uh->uh_ulen; 467 uh->uh_sum = in_cksum(m, len + sizeof(struct ip)); 468 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); 469 } 470 if (uh->uh_sum) { 471 udp_stat.udps_badsum++; 472 m_freem(m); 473 return(IPPROTO_DONE); 474 } 475 } else 476 udp_stat.udps_nosum++; 477 478 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 479 in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { 480 struct inpcbhead *connhead; 481 struct inpcontainer *ic, *ic_marker; 482 struct inpcontainerhead *ichead; 483 struct udp_mcast_arg arg; 484 struct inpcb *last; 485 int error; 486 487 /* 488 * Deliver a multicast or broadcast datagram to *all* sockets 489 * for which the local and remote addresses and ports match 490 * those of the incoming datagram. This allows more than 491 * one process to receive multi/broadcasts on the same port. 492 * (This really ought to be done for unicast datagrams as 493 * well, but that would cause problems with existing 494 * applications that open both address-specific sockets and 495 * a wildcard socket listening to the same port -- they would 496 * end up receiving duplicates of every unicast datagram. 497 * Those applications open the multiple sockets to overcome an 498 * inadequacy of the UDP socket interface, but for backwards 499 * compatibility we avoid the problem here rather than 500 * fixing the interface. Maybe 4.5BSD will remedy this?) 501 */ 502 503 /* 504 * Construct sockaddr format source address. 505 */ 506 udp_in.sin_port = uh->uh_sport; 507 udp_in.sin_addr = ip->ip_src; 508 arg.udp_in = &udp_in; 509 /* 510 * Locate pcb(s) for datagram. 511 * (Algorithm copied from raw_intr().) 512 */ 513 last = NULL; 514 #ifdef INET6 515 udp_in6.uin6_init_done = udp_ip6.uip6_init_done = 0; 516 arg.udp_in6 = &udp_in6; 517 arg.udp_ip6 = &udp_ip6; 518 #endif 519 arg.iphlen = iphlen; 520 521 connhead = &pcbinfo->hashbase[ 522 INP_PCBCONNHASH(ip->ip_src.s_addr, uh->uh_sport, 523 ip->ip_dst.s_addr, uh->uh_dport, pcbinfo->hashmask)]; 524 LIST_FOREACH(inp, connhead, inp_hash) { 525 #ifdef INET6 526 if (!(inp->inp_vflag & INP_IPV4)) 527 continue; 528 #endif 529 if (!in_hosteq(inp->inp_faddr, ip->ip_src) || 530 !in_hosteq(inp->inp_laddr, ip->ip_dst) || 531 inp->inp_fport != uh->uh_sport || 532 inp->inp_lport != uh->uh_dport) 533 continue; 534 535 arg.inp = inp; 536 arg.last = last; 537 arg.ip = ip; 538 arg.m = m; 539 540 error = udp_mcast_input(&arg); 541 if (error == ERESTART) 542 continue; 543 last = arg.last; 544 545 if (error == EJUSTRETURN) 546 goto done; 547 } 548 549 ichead = &pcbinfo->wildcardhashbase[ 550 INP_PCBWILDCARDHASH(uh->uh_dport, 551 pcbinfo->wildcardhashmask)]; 552 ic_marker = in_pcbcontainer_marker(mycpuid); 553 554 GET_PCBINFO_TOKEN(pcbinfo); 555 LIST_INSERT_HEAD(ichead, ic_marker, ic_list); 556 while ((ic = LIST_NEXT(ic_marker, ic_list)) != NULL) { 557 LIST_REMOVE(ic_marker, ic_list); 558 LIST_INSERT_AFTER(ic, ic_marker, ic_list); 559 560 inp = ic->ic_inp; 561 if (inp->inp_flags & INP_PLACEMARKER) 562 continue; 563 #ifdef INET6 564 if (!(inp->inp_vflag & INP_IPV4)) 565 continue; 566 #endif 567 if (inp->inp_lport != uh->uh_dport) 568 continue; 569 if (inp->inp_laddr.s_addr != INADDR_ANY && 570 inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 571 continue; 572 573 arg.inp = inp; 574 arg.last = last; 575 arg.ip = ip; 576 arg.m = m; 577 578 error = udp_mcast_input(&arg); 579 if (error == ERESTART) 580 continue; 581 last = arg.last; 582 583 if (error == EJUSTRETURN) 584 break; 585 } 586 LIST_REMOVE(ic_marker, ic_list); 587 REL_PCBINFO_TOKEN(pcbinfo); 588 done: 589 if (last == NULL) { 590 /* 591 * No matching pcb found; discard datagram. 592 * (No need to send an ICMP Port Unreachable 593 * for a broadcast or multicast datgram.) 594 */ 595 udp_stat.udps_noportbcast++; 596 goto bad; 597 } 598 #ifdef IPSEC 599 /* check AH/ESP integrity. */ 600 if (ipsec4_in_reject_so(m, last->inp_socket)) { 601 ipsecstat.in_polvio++; 602 goto bad; 603 } 604 #endif /*IPSEC*/ 605 #ifdef FAST_IPSEC 606 /* check AH/ESP integrity. */ 607 if (ipsec4_in_reject(m, last)) 608 goto bad; 609 #endif /*FAST_IPSEC*/ 610 udp_append(last, ip, m, iphlen + sizeof(struct udphdr), 611 &udp_in, 612 #ifdef INET6 613 &udp_in6, &udp_ip6 614 #else 615 NULL, NULL 616 #endif 617 ); 618 return(IPPROTO_DONE); 619 } 620 /* 621 * Locate pcb for datagram. 622 */ 623 inp = in_pcblookup_pkthash(pcbinfo, ip->ip_src, uh->uh_sport, 624 ip->ip_dst, uh->uh_dport, TRUE, m->m_pkthdr.rcvif, 625 udp_reuseport_ext ? m : NULL); 626 if (inp == NULL) { 627 if (log_in_vain) { 628 char buf[sizeof "aaa.bbb.ccc.ddd"]; 629 630 strcpy(buf, inet_ntoa(ip->ip_dst)); 631 log(LOG_INFO, 632 "Connection attempt to UDP %s:%d from %s:%d\n", 633 buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src), 634 ntohs(uh->uh_sport)); 635 } 636 udp_stat.udps_noport++; 637 if (m->m_flags & (M_BCAST | M_MCAST)) { 638 udp_stat.udps_noportbcast++; 639 goto bad; 640 } 641 if (blackhole) 642 goto bad; 643 #ifdef ICMP_BANDLIM 644 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) 645 goto bad; 646 #endif 647 *ip = save_ip; 648 ip->ip_len += iphlen; 649 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); 650 return(IPPROTO_DONE); 651 } 652 #ifdef IPSEC 653 if (ipsec4_in_reject_so(m, inp->inp_socket)) { 654 ipsecstat.in_polvio++; 655 goto bad; 656 } 657 #endif /*IPSEC*/ 658 #ifdef FAST_IPSEC 659 if (ipsec4_in_reject(m, inp)) 660 goto bad; 661 #endif /*FAST_IPSEC*/ 662 /* 663 * Check the minimum TTL for socket. 664 */ 665 if (ip->ip_ttl < inp->inp_ip_minttl) 666 goto bad; 667 668 /* 669 * Construct sockaddr format source address. 670 * Stuff source address and datagram in user buffer. 671 */ 672 udp_in.sin_port = uh->uh_sport; 673 udp_in.sin_addr = ip->ip_src; 674 if ((inp->inp_flags & INP_CONTROLOPTS) || 675 (inp->inp_socket->so_options & SO_TIMESTAMP)) { 676 #ifdef INET6 677 if (inp->inp_vflag & INP_IPV6) { 678 int savedflags; 679 680 ip_2_ip6_hdr(&udp_ip6.uip6_ip6, ip); 681 savedflags = inp->inp_flags; 682 inp->inp_flags &= ~INP_UNMAPPABLEOPTS; 683 ip6_savecontrol(inp, &opts, &udp_ip6.uip6_ip6, m); 684 inp->inp_flags = savedflags; 685 } else 686 #endif 687 ip_savecontrol(inp, &opts, ip, m); 688 } 689 m_adj(m, iphlen + sizeof(struct udphdr)); 690 #ifdef INET6 691 if (inp->inp_vflag & INP_IPV6) { 692 in6_sin_2_v4mapsin6(&udp_in, &udp_in6.uin6_sin); 693 append_sa = (struct sockaddr *)&udp_in6; 694 } else 695 #endif 696 append_sa = (struct sockaddr *)&udp_in; 697 698 lwkt_gettoken(&inp->inp_socket->so_rcv.ssb_token); 699 if (ssb_appendaddr(&inp->inp_socket->so_rcv, append_sa, m, opts) == 0) { 700 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 701 udp_stat.udps_fullsock++; 702 goto bad; 703 } 704 lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token); 705 sorwakeup(inp->inp_socket); 706 return(IPPROTO_DONE); 707 bad: 708 m_freem(m); 709 if (opts) 710 m_freem(opts); 711 return(IPPROTO_DONE); 712 } 713 714 #ifdef INET6 715 static void 716 ip_2_ip6_hdr(struct ip6_hdr *ip6, struct ip *ip) 717 { 718 bzero(ip6, sizeof *ip6); 719 720 ip6->ip6_vfc = IPV6_VERSION; 721 ip6->ip6_plen = ip->ip_len; 722 ip6->ip6_nxt = ip->ip_p; 723 ip6->ip6_hlim = ip->ip_ttl; 724 ip6->ip6_src.s6_addr32[2] = ip6->ip6_dst.s6_addr32[2] = 725 IPV6_ADDR_INT32_SMP; 726 ip6->ip6_src.s6_addr32[3] = ip->ip_src.s_addr; 727 ip6->ip6_dst.s6_addr32[3] = ip->ip_dst.s_addr; 728 } 729 #endif 730 731 /* 732 * subroutine of udp_input(), mainly for source code readability. 733 * caller must properly init udp_ip6 and udp_in6 beforehand. 734 */ 735 static void 736 udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off, 737 struct sockaddr_in *udp_in, 738 struct udp_in6 *udp_in6, struct udp_ip6 *udp_ip6) 739 { 740 struct sockaddr *append_sa; 741 struct mbuf *opts = NULL; 742 int ret; 743 744 if (last->inp_flags & INP_CONTROLOPTS || 745 last->inp_socket->so_options & SO_TIMESTAMP) { 746 #ifdef INET6 747 if (last->inp_vflag & INP_IPV6) { 748 int savedflags; 749 750 if (udp_ip6->uip6_init_done == 0) { 751 ip_2_ip6_hdr(&udp_ip6->uip6_ip6, ip); 752 udp_ip6->uip6_init_done = 1; 753 } 754 savedflags = last->inp_flags; 755 last->inp_flags &= ~INP_UNMAPPABLEOPTS; 756 ip6_savecontrol(last, &opts, &udp_ip6->uip6_ip6, n); 757 last->inp_flags = savedflags; 758 } else 759 #endif 760 ip_savecontrol(last, &opts, ip, n); 761 } 762 #ifdef INET6 763 if (last->inp_vflag & INP_IPV6) { 764 if (udp_in6->uin6_init_done == 0) { 765 in6_sin_2_v4mapsin6(udp_in, &udp_in6->uin6_sin); 766 udp_in6->uin6_init_done = 1; 767 } 768 append_sa = (struct sockaddr *)&udp_in6->uin6_sin; 769 } else 770 #endif 771 append_sa = (struct sockaddr *)udp_in; 772 m_adj(n, off); 773 774 lwkt_gettoken(&last->inp_socket->so_rcv.ssb_token); 775 ret = ssb_appendaddr(&last->inp_socket->so_rcv, append_sa, n, opts); 776 lwkt_reltoken(&last->inp_socket->so_rcv.ssb_token); 777 if (ret == 0) { 778 m_freem(n); 779 if (opts) 780 m_freem(opts); 781 udp_stat.udps_fullsock++; 782 } else { 783 sorwakeup(last->inp_socket); 784 } 785 } 786 787 /* 788 * Notify a udp user of an asynchronous error; 789 * just wake up so that he can collect error status. 790 */ 791 void 792 udp_notify(struct inpcb *inp, int error) 793 { 794 inp->inp_socket->so_error = error; 795 sorwakeup(inp->inp_socket); 796 sowwakeup(inp->inp_socket); 797 } 798 799 struct netmsg_udp_notify { 800 struct netmsg_base base; 801 void (*nm_notify)(struct inpcb *, int); 802 struct in_addr nm_faddr; 803 int nm_arg; 804 }; 805 806 static void 807 udp_notifyall_oncpu(netmsg_t msg) 808 { 809 struct netmsg_udp_notify *nm = (struct netmsg_udp_notify *)msg; 810 int nextcpu, cpu = mycpuid; 811 812 in_pcbnotifyall(&udbinfo[cpu], nm->nm_faddr, nm->nm_arg, nm->nm_notify); 813 814 nextcpu = cpu + 1; 815 if (nextcpu < ncpus2) 816 lwkt_forwardmsg(netisr_cpuport(nextcpu), &nm->base.lmsg); 817 else 818 lwkt_replymsg(&nm->base.lmsg, 0); 819 } 820 821 void 822 udp_ctlinput(netmsg_t msg) 823 { 824 struct sockaddr *sa = msg->ctlinput.nm_arg; 825 struct ip *ip = msg->ctlinput.nm_extra; 826 int cmd = msg->ctlinput.nm_cmd; 827 struct udphdr *uh; 828 void (*notify) (struct inpcb *, int) = udp_notify; 829 struct in_addr faddr; 830 struct inpcb *inp; 831 832 faddr = ((struct sockaddr_in *)sa)->sin_addr; 833 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) 834 goto done; 835 836 if (PRC_IS_REDIRECT(cmd)) { 837 ip = NULL; 838 notify = in_rtchange; 839 } else if (cmd == PRC_HOSTDEAD) { 840 ip = NULL; 841 } else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) { 842 goto done; 843 } 844 845 if (ip) { 846 uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 847 inp = in_pcblookup_hash(&udbinfo[mycpuid], faddr, uh->uh_dport, 848 ip->ip_src, uh->uh_sport, 0, NULL); 849 if (inp != NULL && inp->inp_socket != NULL) 850 (*notify)(inp, inetctlerrmap[cmd]); 851 } else { 852 struct netmsg_udp_notify *nm; 853 854 KKASSERT(&curthread->td_msgport == netisr_cpuport(0)); 855 nm = kmalloc(sizeof(*nm), M_LWKTMSG, M_INTWAIT); 856 netmsg_init(&nm->base, NULL, &netisr_afree_rport, 857 0, udp_notifyall_oncpu); 858 nm->nm_faddr = faddr; 859 nm->nm_arg = inetctlerrmap[cmd]; 860 nm->nm_notify = notify; 861 lwkt_sendmsg(netisr_cpuport(0), &nm->base.lmsg); 862 } 863 done: 864 lwkt_replymsg(&msg->lmsg, 0); 865 } 866 867 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, udbinfo, 0, 868 in_pcblist_global_ncpus2, "S,xinpcb", "List of active UDP sockets"); 869 870 static int 871 udp_getcred(SYSCTL_HANDLER_ARGS) 872 { 873 struct sockaddr_in addrs[2]; 874 struct ucred cred0, *cred = NULL; 875 struct inpcb *inp; 876 int error, cpu, origcpu; 877 878 error = priv_check(req->td, PRIV_ROOT); 879 if (error) 880 return (error); 881 error = SYSCTL_IN(req, addrs, sizeof addrs); 882 if (error) 883 return (error); 884 885 origcpu = mycpuid; 886 cpu = udp_addrcpu(addrs[1].sin_addr.s_addr, addrs[1].sin_port, 887 addrs[0].sin_addr.s_addr, addrs[0].sin_port); 888 889 lwkt_migratecpu(cpu); 890 891 inp = in_pcblookup_hash(&udbinfo[cpu], 892 addrs[1].sin_addr, addrs[1].sin_port, 893 addrs[0].sin_addr, addrs[0].sin_port, TRUE, NULL); 894 if (inp == NULL || inp->inp_socket == NULL) { 895 error = ENOENT; 896 } else if (inp->inp_socket->so_cred != NULL) { 897 cred0 = *(inp->inp_socket->so_cred); 898 cred = &cred0; 899 } 900 901 lwkt_migratecpu(origcpu); 902 903 if (error) 904 return error; 905 906 return SYSCTL_OUT(req, cred, sizeof(struct ucred)); 907 } 908 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 909 0, 0, udp_getcred, "S,ucred", "Get the ucred of a UDP connection"); 910 911 static void 912 udp_send_redispatch(netmsg_t msg) 913 { 914 struct mbuf *m = msg->send.nm_m; 915 int pru_flags = msg->send.nm_flags; 916 struct inpcb *inp = msg->send.base.nm_so->so_pcb; 917 struct mbuf *m_opt = msg->send.nm_control; /* XXX save ipopt */ 918 int flags = msg->send.nm_priv; /* ip_output flags */ 919 int error; 920 921 logudp(redisp_ipout_beg, inp); 922 923 /* 924 * - Don't use inp route cache. It should only be used in the 925 * inp owner netisr. 926 * - Access to inp_moptions should be safe, since multicast UDP 927 * datagrams are redispatched to netisr0 and inp_moptions is 928 * changed only in netisr0. 929 */ 930 error = ip_output(m, m_opt, NULL, flags, inp->inp_moptions, inp); 931 if ((pru_flags & PRUS_NOREPLY) == 0) 932 lwkt_replymsg(&msg->send.base.lmsg, error); 933 934 if (m_opt != NULL) { 935 /* Free saved ip options, if any */ 936 m_freem(m_opt); 937 } 938 939 logudp(redisp_ipout_end, inp); 940 } 941 942 static void 943 udp_send(netmsg_t msg) 944 { 945 struct socket *so = msg->send.base.nm_so; 946 struct mbuf *m = msg->send.nm_m; 947 struct sockaddr *dstaddr = msg->send.nm_addr; 948 int pru_flags = msg->send.nm_flags; 949 struct inpcb *inp = so->so_pcb; 950 struct thread *td = msg->send.nm_td; 951 int flags; 952 953 struct udpiphdr *ui; 954 int len = m->m_pkthdr.len; 955 struct sockaddr_in *sin; /* really is initialized before use */ 956 int error = 0, cpu; 957 958 KKASSERT(msg->send.nm_control == NULL); 959 960 logudp(send_beg, inp); 961 962 if (inp == NULL) { 963 error = EINVAL; 964 goto release; 965 } 966 967 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { 968 error = EMSGSIZE; 969 goto release; 970 } 971 972 if (inp->inp_lport == 0) { /* unbound socket */ 973 boolean_t forwarded; 974 975 error = in_pcbbind(inp, NULL, td); 976 if (error) 977 goto release; 978 979 /* 980 * Need to call udp_send again, after this inpcb is 981 * inserted into wildcard hash table. 982 */ 983 msg->send.base.lmsg.ms_flags |= MSGF_UDP_SEND; 984 forwarded = udp_inswildcardhash(inp, &msg->send.base, 0); 985 if (forwarded) { 986 /* 987 * The message is further forwarded, so we are 988 * done here. 989 */ 990 logudp(send_inswildcard, inp); 991 return; 992 } 993 } 994 995 if (dstaddr != NULL) { /* destination address specified */ 996 if (inp->inp_faddr.s_addr != INADDR_ANY) { 997 /* already connected */ 998 error = EISCONN; 999 goto release; 1000 } 1001 sin = (struct sockaddr_in *)dstaddr; 1002 if (!prison_remote_ip(td, (struct sockaddr *)&sin)) { 1003 error = EAFNOSUPPORT; /* IPv6 only jail */ 1004 goto release; 1005 } 1006 } else { 1007 if (inp->inp_faddr.s_addr == INADDR_ANY) { 1008 /* no destination specified and not already connected */ 1009 error = ENOTCONN; 1010 goto release; 1011 } 1012 sin = NULL; 1013 } 1014 1015 /* 1016 * Calculate data length and get a mbuf 1017 * for UDP and IP headers. 1018 */ 1019 M_PREPEND(m, sizeof(struct udpiphdr), MB_DONTWAIT); 1020 if (m == NULL) { 1021 error = ENOBUFS; 1022 goto release; 1023 } 1024 1025 /* 1026 * Fill in mbuf with extended UDP header 1027 * and addresses and length put into network format. 1028 */ 1029 ui = mtod(m, struct udpiphdr *); 1030 bzero(ui->ui_x1, sizeof ui->ui_x1); /* XXX still needed? */ 1031 ui->ui_pr = IPPROTO_UDP; 1032 1033 /* 1034 * Set destination address. 1035 */ 1036 if (dstaddr != NULL) { /* use specified destination */ 1037 ui->ui_dst = sin->sin_addr; 1038 ui->ui_dport = sin->sin_port; 1039 } else { /* use connected destination */ 1040 ui->ui_dst = inp->inp_faddr; 1041 ui->ui_dport = inp->inp_fport; 1042 } 1043 1044 /* 1045 * Set source address. 1046 */ 1047 if (inp->inp_laddr.s_addr == INADDR_ANY || 1048 IN_MULTICAST(ntohl(inp->inp_laddr.s_addr))) { 1049 struct sockaddr_in *if_sin; 1050 1051 if (dstaddr == NULL) { 1052 /* 1053 * connect() had (or should have) failed because 1054 * the interface had no IP address, but the 1055 * application proceeded to call send() anyways. 1056 */ 1057 error = ENOTCONN; 1058 goto release; 1059 } 1060 1061 /* Look up outgoing interface. */ 1062 error = in_pcbladdr_find(inp, dstaddr, &if_sin, td, 1); 1063 if (error) 1064 goto release; 1065 ui->ui_src = if_sin->sin_addr; /* use address of interface */ 1066 } else { 1067 ui->ui_src = inp->inp_laddr; /* use non-null bound address */ 1068 } 1069 ui->ui_sport = inp->inp_lport; 1070 KASSERT(inp->inp_lport != 0, ("inp lport should have been bound")); 1071 1072 /* 1073 * Release the original thread, since it is no longer used 1074 */ 1075 if (pru_flags & PRUS_HELDTD) { 1076 lwkt_rele(td); 1077 pru_flags &= ~PRUS_HELDTD; 1078 } 1079 /* 1080 * Free the dest address, since it is no longer needed 1081 */ 1082 if (pru_flags & PRUS_FREEADDR) { 1083 kfree(dstaddr, M_SONAME); 1084 pru_flags &= ~PRUS_FREEADDR; 1085 } 1086 1087 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); 1088 1089 /* 1090 * Set up checksum and output datagram. 1091 */ 1092 if (udpcksum) { 1093 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr, 1094 htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); 1095 m->m_pkthdr.csum_flags = CSUM_UDP; 1096 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 1097 m->m_pkthdr.csum_thlen = sizeof(struct udphdr); 1098 } else { 1099 ui->ui_sum = 0; 1100 } 1101 ((struct ip *)ui)->ip_len = sizeof(struct udpiphdr) + len; 1102 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ 1103 ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ 1104 udp_stat.udps_opackets++; 1105 1106 flags = IP_DEBUGROUTE | 1107 (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST)); 1108 if (pru_flags & PRUS_DONTROUTE) 1109 flags |= SO_DONTROUTE; 1110 1111 if (inp->inp_flags & INP_CONNECTED) { 1112 /* 1113 * For connected socket, this datagram has already 1114 * been in the correct netisr; no need to rehash. 1115 */ 1116 goto sendit; 1117 } 1118 1119 cpu = udp_addrcpu(ui->ui_dst.s_addr, ui->ui_dport, 1120 ui->ui_src.s_addr, ui->ui_sport); 1121 if (cpu != mycpuid) { 1122 struct mbuf *m_opt = NULL; 1123 struct netmsg_pru_send *smsg; 1124 struct lwkt_port *port = netisr_cpuport(cpu); 1125 1126 /* 1127 * Not on the CPU that matches this UDP datagram hash; 1128 * redispatch to the correct CPU to do the ip_output(). 1129 */ 1130 if (inp->inp_options != NULL) { 1131 /* 1132 * If there are ip options, then save a copy, 1133 * since accessing inp_options on other CPUs' 1134 * is not safe. 1135 * 1136 * XXX optimize this? 1137 */ 1138 m_opt = m_copym(inp->inp_options, 0, M_COPYALL, 1139 MB_WAIT); 1140 } 1141 if ((pru_flags & PRUS_NOREPLY) == 0) { 1142 /* 1143 * Change some parts of the original netmsg and 1144 * forward it to the target netisr. 1145 * 1146 * NOTE: so_port MUST NOT be checked in the target 1147 * netisr. 1148 */ 1149 smsg = &msg->send; 1150 smsg->nm_priv = flags; /* ip_output flags */ 1151 smsg->nm_m = m; 1152 smsg->nm_control = m_opt; /* XXX save ipopt */ 1153 smsg->base.lmsg.ms_flags |= MSGF_IGNSOPORT; 1154 smsg->base.nm_dispatch = udp_send_redispatch; 1155 lwkt_forwardmsg(port, &smsg->base.lmsg); 1156 } else { 1157 /* 1158 * Recreate the netmsg, since the original mbuf 1159 * could have been changed. And send it to the 1160 * target netisr. 1161 * 1162 * NOTE: so_port MUST NOT be checked in the target 1163 * netisr. 1164 */ 1165 smsg = &m->m_hdr.mh_sndmsg; 1166 netmsg_init(&smsg->base, so, &netisr_apanic_rport, 1167 MSGF_IGNSOPORT, udp_send_redispatch); 1168 smsg->nm_priv = flags; /* ip_output flags */ 1169 smsg->nm_flags = pru_flags; 1170 smsg->nm_m = m; 1171 smsg->nm_control = m_opt; /* XXX save ipopt */ 1172 lwkt_sendmsg(port, &smsg->base.lmsg); 1173 } 1174 1175 /* This UDP datagram is redispatched; done */ 1176 logudp(send_redisp, inp); 1177 return; 1178 } 1179 1180 sendit: 1181 logudp(send_ipout, inp); 1182 error = ip_output(m, inp->inp_options, &inp->inp_route, flags, 1183 inp->inp_moptions, inp); 1184 m = NULL; 1185 1186 release: 1187 if (m != NULL) 1188 m_freem(m); 1189 1190 if (pru_flags & PRUS_HELDTD) 1191 lwkt_rele(td); 1192 if (pru_flags & PRUS_FREEADDR) 1193 kfree(dstaddr, M_SONAME); 1194 if ((pru_flags & PRUS_NOREPLY) == 0) 1195 lwkt_replymsg(&msg->send.base.lmsg, error); 1196 1197 logudp(send_end, inp); 1198 } 1199 1200 u_long udp_sendspace = 9216; /* really max datagram size */ 1201 /* 40 1K datagrams */ 1202 SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, 1203 &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); 1204 1205 u_long udp_recvspace = 40 * (1024 + 1206 #ifdef INET6 1207 sizeof(struct sockaddr_in6) 1208 #else 1209 sizeof(struct sockaddr_in) 1210 #endif 1211 ); 1212 SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1213 &udp_recvspace, 0, "Maximum incoming UDP datagram size"); 1214 1215 /* 1216 * This should never happen, since UDP socket does not support 1217 * connection acception (SO_ACCEPTCONN, i.e. listen(2)). 1218 */ 1219 static void 1220 udp_abort(netmsg_t msg __unused) 1221 { 1222 panic("udp_abort is called"); 1223 } 1224 1225 static void 1226 udp_attach(netmsg_t msg) 1227 { 1228 struct socket *so = msg->attach.base.nm_so; 1229 struct pru_attach_info *ai = msg->attach.nm_ai; 1230 struct inpcb *inp; 1231 int error; 1232 1233 inp = so->so_pcb; 1234 if (inp != NULL) { 1235 error = EINVAL; 1236 goto out; 1237 } 1238 error = soreserve(so, udp_sendspace, udp_recvspace, ai->sb_rlimit); 1239 if (error) 1240 goto out; 1241 1242 error = in_pcballoc(so, &udbinfo[mycpuid]); 1243 if (error) 1244 goto out; 1245 1246 inp = (struct inpcb *)so->so_pcb; 1247 inp->inp_vflag |= INP_IPV4; 1248 inp->inp_ip_ttl = ip_defttl; 1249 error = 0; 1250 out: 1251 lwkt_replymsg(&msg->attach.base.lmsg, error); 1252 } 1253 1254 static boolean_t 1255 udp_inswildcardhash_oncpu(struct inpcb *inp) 1256 { 1257 int cpu; 1258 1259 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1260 ("not on owner cpu")); 1261 1262 in_pcbinswildcardhash(inp); 1263 for (cpu = 0; cpu < ncpus2; ++cpu) { 1264 if (cpu == mycpuid) { 1265 /* 1266 * This inpcb has been inserted by the above 1267 * in_pcbinswildcardhash(). 1268 */ 1269 continue; 1270 } 1271 in_pcbinswildcardhash_oncpu(inp, &udbinfo[cpu]); 1272 } 1273 1274 /* TODO need to change port again, if SO_REUSEPORT */ 1275 return FALSE; 1276 } 1277 1278 static void 1279 udp_inswildcardhash_dispatch(netmsg_t msg) 1280 { 1281 struct inpcb *inp = msg->base.nm_so->so_pcb; 1282 lwkt_msg_t lmsg = &msg->base.lmsg; 1283 1284 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1285 KASSERT((ntohs(inp->inp_lport) & ncpus2_mask) == mycpuid, 1286 ("not target cpu")); 1287 1288 in_pcblink(inp, &udbinfo[mycpuid]); 1289 udp_inswildcardhash_oncpu(inp); 1290 1291 if (lmsg->ms_flags & MSGF_UDP_SEND) { 1292 udp_send(msg); 1293 /* msg is replied by udp_send() */ 1294 } else { 1295 lwkt_replymsg(lmsg, lmsg->ms_error); 1296 } 1297 } 1298 1299 static void 1300 udp_sosetport(struct lwkt_msg *msg, lwkt_port_t port) 1301 { 1302 sosetport(((struct netmsg_base *)msg)->nm_so, port); 1303 } 1304 1305 static boolean_t 1306 udp_inswildcardhash(struct inpcb *inp, struct netmsg_base *msg, int error) 1307 { 1308 struct route *ro = &inp->inp_route; 1309 lwkt_msg_t lmsg = &msg->lmsg; 1310 int cpu; 1311 1312 /* 1313 * Always clear the route cache, so we don't need to 1314 * worry about any owner CPU changes later. 1315 */ 1316 if (ro->ro_rt != NULL) 1317 RTFREE(ro->ro_rt); 1318 bzero(ro, sizeof(*ro)); 1319 1320 KASSERT(inp->inp_lport != 0, ("local port not set yet")); 1321 cpu = ntohs(inp->inp_lport) & ncpus2_mask; 1322 1323 lmsg->ms_error = error; 1324 if (cpu != mycpuid) { 1325 struct lwkt_port *port = netisr_cpuport(cpu); 1326 1327 /* 1328 * We are moving the protocol processing port the socket 1329 * is on, we have to unlink here and re-link on the 1330 * target cpu. 1331 */ 1332 in_pcbunlink(inp, &udbinfo[mycpuid]); 1333 msg->nm_dispatch = udp_inswildcardhash_dispatch; 1334 1335 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1336 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1337 lwkt_forwardmsg(port, lmsg); 1338 return TRUE; /* forwarded */ 1339 } 1340 1341 udp_inswildcardhash_oncpu(inp); 1342 return FALSE; 1343 } 1344 1345 static void 1346 udp_bind(netmsg_t msg) 1347 { 1348 struct socket *so = msg->bind.base.nm_so; 1349 struct inpcb *inp; 1350 int error; 1351 1352 inp = so->so_pcb; 1353 if (inp) { 1354 struct sockaddr *nam = msg->bind.nm_nam; 1355 struct thread *td = msg->bind.nm_td; 1356 1357 error = in_pcbbind(inp, nam, td); 1358 if (error == 0) { 1359 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 1360 boolean_t forwarded; 1361 1362 if (sin->sin_addr.s_addr != INADDR_ANY) 1363 inp->inp_flags |= INP_WASBOUND_NOTANY; 1364 1365 forwarded = udp_inswildcardhash(inp, 1366 &msg->bind.base, 0); 1367 if (forwarded) { 1368 /* 1369 * The message is further forwarded, so 1370 * we are done here. 1371 */ 1372 return; 1373 } 1374 } 1375 } else { 1376 error = EINVAL; 1377 } 1378 lwkt_replymsg(&msg->bind.base.lmsg, error); 1379 } 1380 1381 static void 1382 udp_connect(netmsg_t msg) 1383 { 1384 struct socket *so = msg->connect.base.nm_so; 1385 struct sockaddr *nam = msg->connect.nm_nam; 1386 struct thread *td = msg->connect.nm_td; 1387 struct inpcb *inp; 1388 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 1389 struct sockaddr_in *if_sin; 1390 struct lwkt_port *port; 1391 int error; 1392 1393 KKASSERT(msg->connect.nm_m == NULL); 1394 1395 inp = so->so_pcb; 1396 if (inp == NULL) { 1397 error = EINVAL; 1398 goto out; 1399 } 1400 1401 if (msg->connect.nm_flags & PRUC_RECONNECT) { 1402 msg->connect.nm_flags &= ~PRUC_RECONNECT; 1403 in_pcblink(inp, &udbinfo[mycpuid]); 1404 } 1405 1406 if (inp->inp_faddr.s_addr != INADDR_ANY) { 1407 error = EISCONN; 1408 goto out; 1409 } 1410 error = 0; 1411 1412 /* 1413 * Bind if we have to 1414 */ 1415 if (inp->inp_lport == 0 || 1416 (td->td_proc && td->td_proc->p_ucred->cr_prison != NULL && 1417 inp->inp_laddr.s_addr == INADDR_ANY)) { 1418 error = in_pcbbind(inp, NULL, td); 1419 if (error) 1420 goto out; 1421 } 1422 1423 /* 1424 * Calculate the correct protocol processing thread. The connect 1425 * operation must run there. 1426 */ 1427 error = in_pcbladdr(inp, nam, &if_sin, td); 1428 if (error) 1429 goto out; 1430 if (!prison_remote_ip(td, nam)) { 1431 error = EAFNOSUPPORT; /* IPv6 only jail */ 1432 goto out; 1433 } 1434 1435 port = udp_addrport(sin->sin_addr.s_addr, sin->sin_port, 1436 inp->inp_laddr.s_addr != INADDR_ANY ? 1437 inp->inp_laddr.s_addr : if_sin->sin_addr.s_addr, inp->inp_lport); 1438 if (port != &curthread->td_msgport) { 1439 struct route *ro = &inp->inp_route; 1440 lwkt_msg_t lmsg = &msg->connect.base.lmsg; 1441 int nm_flags = PRUC_RECONNECT; 1442 1443 /* 1444 * in_pcbladdr() may have allocated a route entry for us 1445 * on the current CPU, but we need a route entry on the 1446 * inpcb's owner CPU, so free it here. 1447 */ 1448 if (ro->ro_rt != NULL) 1449 RTFREE(ro->ro_rt); 1450 bzero(ro, sizeof(*ro)); 1451 1452 if (inp->inp_flags & INP_WILDCARD) { 1453 /* 1454 * Remove this inpcb from the wildcard hash before 1455 * the socket's msgport changes. 1456 */ 1457 udp_remwildcardhash(inp); 1458 } 1459 1460 /* 1461 * We are moving the protocol processing port the socket 1462 * is on, we have to unlink here and re-link on the 1463 * target cpu. 1464 */ 1465 in_pcbunlink(inp, &udbinfo[mycpuid]); 1466 msg->connect.nm_flags |= nm_flags; 1467 1468 /* See the related comment in tcp_usrreq.c tcp_connect() */ 1469 lwkt_setmsg_receipt(lmsg, udp_sosetport); 1470 lwkt_forwardmsg(port, lmsg); 1471 /* msg invalid now */ 1472 return; 1473 } 1474 error = udp_connect_oncpu(inp, sin, if_sin); 1475 out: 1476 if (error && inp != NULL && inp->inp_lport != 0 && 1477 (inp->inp_flags & INP_WILDCARD) == 0) { 1478 boolean_t forwarded; 1479 1480 /* Connect failed; put it to wildcard hash. */ 1481 forwarded = udp_inswildcardhash(inp, &msg->connect.base, 1482 error); 1483 if (forwarded) { 1484 /* 1485 * The message is further forwarded, so we are done 1486 * here. 1487 */ 1488 return; 1489 } 1490 } 1491 lwkt_replymsg(&msg->connect.base.lmsg, error); 1492 } 1493 1494 static void 1495 udp_remwildcardhash(struct inpcb *inp) 1496 { 1497 int cpu; 1498 1499 KASSERT(inp->inp_pcbinfo == &udbinfo[mycpuid], 1500 ("not on owner cpu")); 1501 1502 for (cpu = 0; cpu < ncpus2; ++cpu) { 1503 if (cpu == mycpuid) { 1504 /* 1505 * This inpcb will be removed by the later 1506 * in_pcbremwildcardhash(). 1507 */ 1508 continue; 1509 } 1510 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpu]); 1511 } 1512 in_pcbremwildcardhash(inp); 1513 } 1514 1515 static int 1516 udp_connect_oncpu(struct inpcb *inp, struct sockaddr_in *sin, 1517 struct sockaddr_in *if_sin) 1518 { 1519 struct socket *so = inp->inp_socket; 1520 struct inpcb *oinp; 1521 1522 oinp = in_pcblookup_hash(inp->inp_pcbinfo, 1523 sin->sin_addr, sin->sin_port, 1524 inp->inp_laddr.s_addr != INADDR_ANY ? 1525 inp->inp_laddr : if_sin->sin_addr, inp->inp_lport, FALSE, NULL); 1526 if (oinp != NULL) 1527 return EADDRINUSE; 1528 1529 /* 1530 * No more errors can occur, finish adjusting the socket 1531 * and change the processing port to reflect the connected 1532 * socket. Once set we can no longer safely mess with the 1533 * socket. 1534 */ 1535 1536 if (inp->inp_flags & INP_WILDCARD) 1537 udp_remwildcardhash(inp); 1538 1539 if (inp->inp_laddr.s_addr == INADDR_ANY) 1540 inp->inp_laddr = if_sin->sin_addr; 1541 inp->inp_faddr = sin->sin_addr; 1542 inp->inp_fport = sin->sin_port; 1543 in_pcbinsconnhash(inp); 1544 1545 soisconnected(so); 1546 1547 return 0; 1548 } 1549 1550 static void 1551 udp_detach2(struct socket *so) 1552 { 1553 in_pcbdetach(so->so_pcb); 1554 sodiscard(so); 1555 sofree(so); 1556 } 1557 1558 static void 1559 udp_detach_final_dispatch(netmsg_t msg) 1560 { 1561 udp_detach2(msg->base.nm_so); 1562 } 1563 1564 static void 1565 udp_detach_oncpu_dispatch(netmsg_t msg) 1566 { 1567 struct netmsg_base *clomsg = &msg->base; 1568 struct socket *so = clomsg->nm_so; 1569 struct inpcb *inp = so->so_pcb; 1570 struct thread *td = curthread; 1571 int nextcpu, cpuid = mycpuid; 1572 1573 KASSERT(td->td_type == TD_TYPE_NETISR, ("not in netisr")); 1574 1575 if (inp->inp_flags & INP_WILDCARD) { 1576 /* 1577 * This inp will be removed on the inp's 1578 * owner CPU later, so don't do it now. 1579 */ 1580 if (&td->td_msgport != so->so_port) 1581 in_pcbremwildcardhash_oncpu(inp, &udbinfo[cpuid]); 1582 } 1583 1584 if (cpuid == 0) { 1585 /* 1586 * Free and clear multicast socket option, 1587 * which is only accessed in netisr0. 1588 */ 1589 ip_freemoptions(inp->inp_moptions); 1590 inp->inp_moptions = NULL; 1591 } 1592 1593 nextcpu = cpuid + 1; 1594 if (nextcpu < ncpus2) { 1595 lwkt_forwardmsg(netisr_cpuport(nextcpu), &clomsg->lmsg); 1596 } else { 1597 /* 1598 * No one could see this inpcb now; destroy this 1599 * inpcb in its owner netisr. 1600 */ 1601 netmsg_init(clomsg, so, &netisr_apanic_rport, 0, 1602 udp_detach_final_dispatch); 1603 lwkt_sendmsg(so->so_port, &clomsg->lmsg); 1604 } 1605 } 1606 1607 static void 1608 udp_detach(netmsg_t msg) 1609 { 1610 struct socket *so = msg->detach.base.nm_so; 1611 struct netmsg_base *clomsg; 1612 struct inpcb *inp; 1613 1614 inp = so->so_pcb; 1615 if (inp == NULL) { 1616 lwkt_replymsg(&msg->detach.base.lmsg, EINVAL); 1617 return; 1618 } 1619 1620 /* 1621 * Reply EJUSTRETURN ASAP, we will call sodiscard() and 1622 * sofree() later. 1623 */ 1624 lwkt_replymsg(&msg->detach.base.lmsg, EJUSTRETURN); 1625 1626 if (ncpus == 1) { 1627 /* Only one CPU, detach the inpcb directly. */ 1628 udp_detach2(so); 1629 return; 1630 } 1631 1632 /* 1633 * Remove this inpcb from the inpcb list first, so that 1634 * no one could find this inpcb from the inpcb list. 1635 */ 1636 in_pcbofflist(inp); 1637 1638 /* 1639 * Go through netisrs which process UDP to make sure 1640 * no one could find this inpcb anymore. 1641 */ 1642 clomsg = &so->so_clomsg; 1643 netmsg_init(clomsg, so, &netisr_apanic_rport, MSGF_IGNSOPORT, 1644 udp_detach_oncpu_dispatch); 1645 lwkt_sendmsg(netisr_cpuport(0), &clomsg->lmsg); 1646 } 1647 1648 static void 1649 udp_disconnect(netmsg_t msg) 1650 { 1651 struct socket *so = msg->disconnect.base.nm_so; 1652 struct inpcb *inp; 1653 boolean_t forwarded; 1654 int error = 0; 1655 1656 inp = so->so_pcb; 1657 if (inp == NULL) { 1658 error = EINVAL; 1659 goto out; 1660 } 1661 if (inp->inp_faddr.s_addr == INADDR_ANY) { 1662 error = ENOTCONN; 1663 goto out; 1664 } 1665 1666 soclrstate(so, SS_ISCONNECTED); /* XXX */ 1667 1668 in_pcbdisconnect(inp); 1669 1670 /* 1671 * Follow traditional BSD behavior and retain the local port 1672 * binding. But, fix the old misbehavior of overwriting any 1673 * previously bound local address. 1674 */ 1675 if (!(inp->inp_flags & INP_WASBOUND_NOTANY)) 1676 inp->inp_laddr.s_addr = INADDR_ANY; 1677 1678 if (so->so_state & SS_ISCLOSING) { 1679 /* 1680 * If this socket is being closed, there is no need 1681 * to put this socket back into wildcard hash table. 1682 */ 1683 error = 0; 1684 goto out; 1685 } 1686 1687 forwarded = udp_inswildcardhash(inp, &msg->disconnect.base, 0); 1688 if (forwarded) { 1689 /* 1690 * The message is further forwarded, so we are done 1691 * here. 1692 */ 1693 return; 1694 } 1695 out: 1696 lwkt_replymsg(&msg->disconnect.base.lmsg, error); 1697 } 1698 1699 void 1700 udp_shutdown(netmsg_t msg) 1701 { 1702 struct socket *so = msg->shutdown.base.nm_so; 1703 struct inpcb *inp; 1704 int error; 1705 1706 inp = so->so_pcb; 1707 if (inp) { 1708 socantsendmore(so); 1709 error = 0; 1710 } else { 1711 error = EINVAL; 1712 } 1713 lwkt_replymsg(&msg->shutdown.base.lmsg, error); 1714 } 1715 1716 struct pr_usrreqs udp_usrreqs = { 1717 .pru_abort = udp_abort, 1718 .pru_accept = pr_generic_notsupp, 1719 .pru_attach = udp_attach, 1720 .pru_bind = udp_bind, 1721 .pru_connect = udp_connect, 1722 .pru_connect2 = pr_generic_notsupp, 1723 .pru_control = in_control_dispatch, 1724 .pru_detach = udp_detach, 1725 .pru_disconnect = udp_disconnect, 1726 .pru_listen = pr_generic_notsupp, 1727 .pru_peeraddr = in_setpeeraddr_dispatch, 1728 .pru_rcvd = pr_generic_notsupp, 1729 .pru_rcvoob = pr_generic_notsupp, 1730 .pru_send = udp_send, 1731 .pru_sense = pru_sense_null, 1732 .pru_shutdown = udp_shutdown, 1733 .pru_sockaddr = in_setsockaddr_dispatch, 1734 .pru_sosend = sosendudp, 1735 .pru_soreceive = soreceive 1736 }; 1737