1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 67 * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.64.2.18 2003/01/24 05:11:34 sam Exp $ 68 * $DragonFly: src/sys/netinet/udp_usrreq.c,v 1.47 2008/11/11 10:46:58 sephe Exp $ 69 */ 70 71 #include "opt_ipsec.h" 72 #include "opt_inet6.h" 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/kernel.h> 77 #include <sys/malloc.h> 78 #include <sys/mbuf.h> 79 #include <sys/domain.h> 80 #include <sys/proc.h> 81 #include <sys/priv.h> 82 #include <sys/protosw.h> 83 #include <sys/socket.h> 84 #include <sys/socketvar.h> 85 #include <sys/sysctl.h> 86 #include <sys/syslog.h> 87 #include <sys/in_cksum.h> 88 89 #include <sys/thread2.h> 90 #include <sys/socketvar2.h> 91 92 #include <machine/stdarg.h> 93 94 #include <net/if.h> 95 #include <net/route.h> 96 #include <net/netmsg2.h> 97 98 #include <netinet/in.h> 99 #include <netinet/in_systm.h> 100 #include <netinet/ip.h> 101 #ifdef INET6 102 #include <netinet/ip6.h> 103 #endif 104 #include <netinet/in_pcb.h> 105 #include <netinet/in_var.h> 106 #include <netinet/ip_var.h> 107 #ifdef INET6 108 #include <netinet6/ip6_var.h> 109 #endif 110 #include <netinet/ip_icmp.h> 111 #include <netinet/icmp_var.h> 112 #include <netinet/udp.h> 113 #include <netinet/udp_var.h> 114 115 #ifdef FAST_IPSEC 116 #include <netproto/ipsec/ipsec.h> 117 #endif 118 119 #ifdef IPSEC 120 #include <netinet6/ipsec.h> 121 #endif 122 123 /* 124 * UDP protocol implementation. 125 * Per RFC 768, August, 1980. 126 */ 127 #ifndef COMPAT_42 128 static int udpcksum = 1; 129 #else 130 static int udpcksum = 0; /* XXX */ 131 #endif 132 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, 133 &udpcksum, 0, ""); 134 135 int log_in_vain = 0; 136 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, 137 &log_in_vain, 0, "Log all incoming UDP packets"); 138 139 static int blackhole = 0; 140 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, 141 &blackhole, 0, "Do not send port unreachables for refused connects"); 142 143 static int strict_mcast_mship = 1; 144 SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW, 145 &strict_mcast_mship, 0, "Only send multicast to member sockets"); 146 147 struct inpcbinfo udbinfo; 148 149 #ifndef UDBHASHSIZE 150 #define UDBHASHSIZE 16 151 #endif 152 153 struct udpstat udpstat; /* from udp_var.h */ 154 SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW, 155 &udpstat, udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)"); 156 157 static struct sockaddr_in udp_in = { sizeof udp_in, AF_INET }; 158 #ifdef INET6 159 struct udp_in6 { 160 struct sockaddr_in6 uin6_sin; 161 u_char uin6_init_done : 1; 162 } udp_in6 = { 163 { sizeof udp_in6.uin6_sin, AF_INET6 }, 164 0 165 }; 166 struct udp_ip6 { 167 struct ip6_hdr uip6_ip6; 168 u_char uip6_init_done : 1; 169 } udp_ip6; 170 #endif /* INET6 */ 171 172 static void udp_append (struct inpcb *last, struct ip *ip, 173 struct mbuf *n, int off); 174 #ifdef INET6 175 static void ip_2_ip6_hdr (struct ip6_hdr *ip6, struct ip *ip); 176 #endif 177 178 static int udp_connect_oncpu(struct socket *so, struct thread *td, 179 struct sockaddr_in *sin, struct sockaddr_in *if_sin); 180 static int udp_detach (struct socket *so); 181 static int udp_output (struct inpcb *, struct mbuf *, struct sockaddr *, 182 struct mbuf *, struct thread *); 183 184 void 185 udp_init(void) 186 { 187 in_pcbinfo_init(&udbinfo); 188 udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask); 189 udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB, 190 &udbinfo.porthashmask); 191 udbinfo.wildcardhashbase = hashinit(UDBHASHSIZE, M_PCB, 192 &udbinfo.wildcardhashmask); 193 udbinfo.ipi_size = sizeof(struct inpcb); 194 } 195 196 /* 197 * Check multicast packets to make sure they are only sent to sockets with 198 * multicast memberships for the packet's destination address and arrival 199 * interface. Multicast packets to multicast-unaware sockets are also 200 * disallowed. 201 * 202 * Returns 0 if the packet is acceptable, -1 if it is not. 203 */ 204 static __inline int 205 check_multicast_membership(struct ip *ip, struct inpcb *inp, struct mbuf *m) 206 { 207 int mshipno; 208 struct ip_moptions *mopt; 209 210 if (strict_mcast_mship == 0 || 211 !IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 212 return (0); 213 } 214 mopt = inp->inp_moptions; 215 if (mopt == NULL) 216 return (-1); 217 for (mshipno = 0; mshipno <= mopt->imo_num_memberships; ++mshipno) { 218 struct in_multi *maddr = mopt->imo_membership[mshipno]; 219 220 if (ip->ip_dst.s_addr == maddr->inm_addr.s_addr && 221 m->m_pkthdr.rcvif == maddr->inm_ifp) { 222 return (0); 223 } 224 } 225 return (-1); 226 } 227 228 void 229 udp_input(struct mbuf *m, ...) 230 { 231 int iphlen; 232 struct ip *ip; 233 struct udphdr *uh; 234 struct inpcb *inp; 235 struct mbuf *opts = NULL; 236 int len, off, proto; 237 struct ip save_ip; 238 struct sockaddr *append_sa; 239 __va_list ap; 240 241 __va_start(ap, m); 242 off = __va_arg(ap, int); 243 proto = __va_arg(ap, int); 244 __va_end(ap); 245 246 iphlen = off; 247 udpstat.udps_ipackets++; 248 249 /* 250 * Strip IP options, if any; should skip this, 251 * make available to user, and use on returned packets, 252 * but we don't yet have a way to check the checksum 253 * with options still present. 254 */ 255 if (iphlen > sizeof(struct ip)) { 256 ip_stripoptions(m); 257 iphlen = sizeof(struct ip); 258 } 259 260 /* 261 * IP and UDP headers are together in first mbuf. 262 * Already checked and pulled up in ip_demux(). 263 */ 264 KASSERT(m->m_len >= iphlen + sizeof(struct udphdr), 265 ("UDP header not in one mbuf")); 266 267 ip = mtod(m, struct ip *); 268 uh = (struct udphdr *)((caddr_t)ip + iphlen); 269 270 /* destination port of 0 is illegal, based on RFC768. */ 271 if (uh->uh_dport == 0) 272 goto bad; 273 274 /* 275 * Make mbuf data length reflect UDP length. 276 * If not enough data to reflect UDP length, drop. 277 */ 278 len = ntohs((u_short)uh->uh_ulen); 279 if (ip->ip_len != len) { 280 if (len > ip->ip_len || len < sizeof(struct udphdr)) { 281 udpstat.udps_badlen++; 282 goto bad; 283 } 284 m_adj(m, len - ip->ip_len); 285 /* ip->ip_len = len; */ 286 } 287 /* 288 * Save a copy of the IP header in case we want restore it 289 * for sending an ICMP error message in response. 290 */ 291 save_ip = *ip; 292 293 /* 294 * Checksum extended UDP header and data. 295 */ 296 if (uh->uh_sum) { 297 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 298 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) 299 uh->uh_sum = m->m_pkthdr.csum_data; 300 else 301 uh->uh_sum = in_pseudo(ip->ip_src.s_addr, 302 ip->ip_dst.s_addr, htonl((u_short)len + 303 m->m_pkthdr.csum_data + IPPROTO_UDP)); 304 uh->uh_sum ^= 0xffff; 305 } else { 306 char b[9]; 307 308 bcopy(((struct ipovly *)ip)->ih_x1, b, 9); 309 bzero(((struct ipovly *)ip)->ih_x1, 9); 310 ((struct ipovly *)ip)->ih_len = uh->uh_ulen; 311 uh->uh_sum = in_cksum(m, len + sizeof(struct ip)); 312 bcopy(b, ((struct ipovly *)ip)->ih_x1, 9); 313 } 314 if (uh->uh_sum) { 315 udpstat.udps_badsum++; 316 m_freem(m); 317 return; 318 } 319 } else 320 udpstat.udps_nosum++; 321 322 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || 323 in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { 324 struct inpcb *last; 325 326 /* 327 * Deliver a multicast or broadcast datagram to *all* sockets 328 * for which the local and remote addresses and ports match 329 * those of the incoming datagram. This allows more than 330 * one process to receive multi/broadcasts on the same port. 331 * (This really ought to be done for unicast datagrams as 332 * well, but that would cause problems with existing 333 * applications that open both address-specific sockets and 334 * a wildcard socket listening to the same port -- they would 335 * end up receiving duplicates of every unicast datagram. 336 * Those applications open the multiple sockets to overcome an 337 * inadequacy of the UDP socket interface, but for backwards 338 * compatibility we avoid the problem here rather than 339 * fixing the interface. Maybe 4.5BSD will remedy this?) 340 */ 341 342 /* 343 * Construct sockaddr format source address. 344 */ 345 udp_in.sin_port = uh->uh_sport; 346 udp_in.sin_addr = ip->ip_src; 347 /* 348 * Locate pcb(s) for datagram. 349 * (Algorithm copied from raw_intr().) 350 */ 351 last = NULL; 352 #ifdef INET6 353 udp_in6.uin6_init_done = udp_ip6.uip6_init_done = 0; 354 #endif 355 LIST_FOREACH(inp, &udbinfo.pcblisthead, inp_list) { 356 if (inp->inp_flags & INP_PLACEMARKER) 357 continue; 358 #ifdef INET6 359 if (!(inp->inp_vflag & INP_IPV4)) 360 continue; 361 #endif 362 if (inp->inp_lport != uh->uh_dport) 363 continue; 364 if (inp->inp_laddr.s_addr != INADDR_ANY) { 365 if (inp->inp_laddr.s_addr != 366 ip->ip_dst.s_addr) 367 continue; 368 } 369 if (inp->inp_faddr.s_addr != INADDR_ANY) { 370 if (inp->inp_faddr.s_addr != 371 ip->ip_src.s_addr || 372 inp->inp_fport != uh->uh_sport) 373 continue; 374 } 375 376 if (check_multicast_membership(ip, inp, m) < 0) 377 continue; 378 379 if (last != NULL) { 380 struct mbuf *n; 381 382 #ifdef IPSEC 383 /* check AH/ESP integrity. */ 384 if (ipsec4_in_reject_so(m, last->inp_socket)) 385 ipsecstat.in_polvio++; 386 /* do not inject data to pcb */ 387 else 388 #endif /*IPSEC*/ 389 #ifdef FAST_IPSEC 390 /* check AH/ESP integrity. */ 391 if (ipsec4_in_reject(m, last)) 392 ; 393 else 394 #endif /*FAST_IPSEC*/ 395 if ((n = m_copypacket(m, MB_DONTWAIT)) != NULL) 396 udp_append(last, ip, n, 397 iphlen + 398 sizeof(struct udphdr)); 399 } 400 last = inp; 401 /* 402 * Don't look for additional matches if this one does 403 * not have either the SO_REUSEPORT or SO_REUSEADDR 404 * socket options set. This heuristic avoids searching 405 * through all pcbs in the common case of a non-shared 406 * port. It * assumes that an application will never 407 * clear these options after setting them. 408 */ 409 if (!(last->inp_socket->so_options & 410 (SO_REUSEPORT | SO_REUSEADDR))) 411 break; 412 } 413 414 if (last == NULL) { 415 /* 416 * No matching pcb found; discard datagram. 417 * (No need to send an ICMP Port Unreachable 418 * for a broadcast or multicast datgram.) 419 */ 420 udpstat.udps_noportbcast++; 421 goto bad; 422 } 423 #ifdef IPSEC 424 /* check AH/ESP integrity. */ 425 if (ipsec4_in_reject_so(m, last->inp_socket)) { 426 ipsecstat.in_polvio++; 427 goto bad; 428 } 429 #endif /*IPSEC*/ 430 #ifdef FAST_IPSEC 431 /* check AH/ESP integrity. */ 432 if (ipsec4_in_reject(m, last)) 433 goto bad; 434 #endif /*FAST_IPSEC*/ 435 udp_append(last, ip, m, iphlen + sizeof(struct udphdr)); 436 return; 437 } 438 /* 439 * Locate pcb for datagram. 440 */ 441 inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport, 442 ip->ip_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif); 443 if (inp == NULL) { 444 if (log_in_vain) { 445 char buf[sizeof "aaa.bbb.ccc.ddd"]; 446 447 strcpy(buf, inet_ntoa(ip->ip_dst)); 448 log(LOG_INFO, 449 "Connection attempt to UDP %s:%d from %s:%d\n", 450 buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src), 451 ntohs(uh->uh_sport)); 452 } 453 udpstat.udps_noport++; 454 if (m->m_flags & (M_BCAST | M_MCAST)) { 455 udpstat.udps_noportbcast++; 456 goto bad; 457 } 458 if (blackhole) 459 goto bad; 460 #ifdef ICMP_BANDLIM 461 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) 462 goto bad; 463 #endif 464 *ip = save_ip; 465 ip->ip_len += iphlen; 466 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); 467 return; 468 } 469 #ifdef IPSEC 470 if (ipsec4_in_reject_so(m, inp->inp_socket)) { 471 ipsecstat.in_polvio++; 472 goto bad; 473 } 474 #endif /*IPSEC*/ 475 #ifdef FAST_IPSEC 476 if (ipsec4_in_reject(m, inp)) 477 goto bad; 478 #endif /*FAST_IPSEC*/ 479 /* 480 * Check the minimum TTL for socket. 481 */ 482 if (ip->ip_ttl < inp->inp_ip_minttl) 483 goto bad; 484 485 /* 486 * Construct sockaddr format source address. 487 * Stuff source address and datagram in user buffer. 488 */ 489 udp_in.sin_port = uh->uh_sport; 490 udp_in.sin_addr = ip->ip_src; 491 if ((inp->inp_flags & INP_CONTROLOPTS) || 492 (inp->inp_socket->so_options & SO_TIMESTAMP)) { 493 #ifdef INET6 494 if (inp->inp_vflag & INP_IPV6) { 495 int savedflags; 496 497 ip_2_ip6_hdr(&udp_ip6.uip6_ip6, ip); 498 savedflags = inp->inp_flags; 499 inp->inp_flags &= ~INP_UNMAPPABLEOPTS; 500 ip6_savecontrol(inp, &opts, &udp_ip6.uip6_ip6, m); 501 inp->inp_flags = savedflags; 502 } else 503 #endif 504 ip_savecontrol(inp, &opts, ip, m); 505 } 506 m_adj(m, iphlen + sizeof(struct udphdr)); 507 #ifdef INET6 508 if (inp->inp_vflag & INP_IPV6) { 509 in6_sin_2_v4mapsin6(&udp_in, &udp_in6.uin6_sin); 510 append_sa = (struct sockaddr *)&udp_in6; 511 } else 512 #endif 513 append_sa = (struct sockaddr *)&udp_in; 514 if (ssb_appendaddr(&inp->inp_socket->so_rcv, append_sa, m, opts) == 0) { 515 udpstat.udps_fullsock++; 516 goto bad; 517 } 518 sorwakeup(inp->inp_socket); 519 return; 520 bad: 521 m_freem(m); 522 if (opts) 523 m_freem(opts); 524 return; 525 } 526 527 #ifdef INET6 528 static void 529 ip_2_ip6_hdr(struct ip6_hdr *ip6, struct ip *ip) 530 { 531 bzero(ip6, sizeof *ip6); 532 533 ip6->ip6_vfc = IPV6_VERSION; 534 ip6->ip6_plen = ip->ip_len; 535 ip6->ip6_nxt = ip->ip_p; 536 ip6->ip6_hlim = ip->ip_ttl; 537 ip6->ip6_src.s6_addr32[2] = ip6->ip6_dst.s6_addr32[2] = 538 IPV6_ADDR_INT32_SMP; 539 ip6->ip6_src.s6_addr32[3] = ip->ip_src.s_addr; 540 ip6->ip6_dst.s6_addr32[3] = ip->ip_dst.s_addr; 541 } 542 #endif 543 544 /* 545 * subroutine of udp_input(), mainly for source code readability. 546 * caller must properly init udp_ip6 and udp_in6 beforehand. 547 */ 548 static void 549 udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off) 550 { 551 struct sockaddr *append_sa; 552 struct mbuf *opts = NULL; 553 554 if (last->inp_flags & INP_CONTROLOPTS || 555 last->inp_socket->so_options & SO_TIMESTAMP) { 556 #ifdef INET6 557 if (last->inp_vflag & INP_IPV6) { 558 int savedflags; 559 560 if (udp_ip6.uip6_init_done == 0) { 561 ip_2_ip6_hdr(&udp_ip6.uip6_ip6, ip); 562 udp_ip6.uip6_init_done = 1; 563 } 564 savedflags = last->inp_flags; 565 last->inp_flags &= ~INP_UNMAPPABLEOPTS; 566 ip6_savecontrol(last, &opts, &udp_ip6.uip6_ip6, n); 567 last->inp_flags = savedflags; 568 } else 569 #endif 570 ip_savecontrol(last, &opts, ip, n); 571 } 572 #ifdef INET6 573 if (last->inp_vflag & INP_IPV6) { 574 if (udp_in6.uin6_init_done == 0) { 575 in6_sin_2_v4mapsin6(&udp_in, &udp_in6.uin6_sin); 576 udp_in6.uin6_init_done = 1; 577 } 578 append_sa = (struct sockaddr *)&udp_in6.uin6_sin; 579 } else 580 #endif 581 append_sa = (struct sockaddr *)&udp_in; 582 m_adj(n, off); 583 if (ssb_appendaddr(&last->inp_socket->so_rcv, append_sa, n, opts) == 0) { 584 m_freem(n); 585 if (opts) 586 m_freem(opts); 587 udpstat.udps_fullsock++; 588 } else 589 sorwakeup(last->inp_socket); 590 } 591 592 /* 593 * Notify a udp user of an asynchronous error; 594 * just wake up so that he can collect error status. 595 */ 596 void 597 udp_notify(struct inpcb *inp, int error) 598 { 599 inp->inp_socket->so_error = error; 600 sorwakeup(inp->inp_socket); 601 sowwakeup(inp->inp_socket); 602 } 603 604 struct netmsg_udp_notify { 605 struct netmsg nm_nmsg; 606 void (*nm_notify)(struct inpcb *, int); 607 struct in_addr nm_faddr; 608 int nm_arg; 609 }; 610 611 static void 612 udp_notifyall_oncpu(struct netmsg *netmsg) 613 { 614 struct netmsg_udp_notify *nmsg = (struct netmsg_udp_notify *)netmsg; 615 int nextcpu; 616 617 in_pcbnotifyall(&udbinfo.pcblisthead, nmsg->nm_faddr, nmsg->nm_arg, 618 nmsg->nm_notify); 619 620 nextcpu = mycpuid + 1; 621 if (nextcpu < ncpus2) 622 lwkt_forwardmsg(cpu_portfn(nextcpu), &netmsg->nm_lmsg); 623 else 624 lwkt_replymsg(&netmsg->nm_lmsg, 0); 625 } 626 627 static void 628 udp_rtchange(struct inpcb *inp, int err) 629 { 630 #ifdef SMP 631 /* XXX Nuke this, once UDP inpcbs are CPU localized */ 632 if (inp->inp_route.ro_rt && inp->inp_route.ro_rt->rt_cpuid == mycpuid) { 633 rtfree(inp->inp_route.ro_rt); 634 inp->inp_route.ro_rt = NULL; 635 /* 636 * A new route can be allocated the next time 637 * output is attempted. 638 */ 639 } 640 #else 641 in_rtchange(inp, err); 642 #endif 643 } 644 645 void 646 udp_ctlinput(int cmd, struct sockaddr *sa, void *vip) 647 { 648 struct ip *ip = vip; 649 struct udphdr *uh; 650 void (*notify) (struct inpcb *, int) = udp_notify; 651 struct in_addr faddr; 652 struct inpcb *inp; 653 654 faddr = ((struct sockaddr_in *)sa)->sin_addr; 655 if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) 656 return; 657 658 if (PRC_IS_REDIRECT(cmd)) { 659 ip = NULL; 660 notify = udp_rtchange; 661 } else if (cmd == PRC_HOSTDEAD) 662 ip = NULL; 663 else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) 664 return; 665 if (ip) { 666 uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 667 inp = in_pcblookup_hash(&udbinfo, faddr, uh->uh_dport, 668 ip->ip_src, uh->uh_sport, 0, NULL); 669 if (inp != NULL && inp->inp_socket != NULL) 670 (*notify)(inp, inetctlerrmap[cmd]); 671 } else if (PRC_IS_REDIRECT(cmd)) { 672 struct netmsg_udp_notify nmsg; 673 674 KKASSERT(&curthread->td_msgport == cpu_portfn(0)); 675 netmsg_init(&nmsg.nm_nmsg, NULL, &curthread->td_msgport, 676 0, udp_notifyall_oncpu); 677 nmsg.nm_faddr = faddr; 678 nmsg.nm_arg = inetctlerrmap[cmd]; 679 nmsg.nm_notify = notify; 680 681 lwkt_domsg(cpu_portfn(0), &nmsg.nm_nmsg.nm_lmsg, 0); 682 } else { 683 /* 684 * XXX We should forward msg upon PRC_HOSTHEAD and ip == NULL, 685 * once UDP inpcbs are CPU localized 686 */ 687 KKASSERT(&curthread->td_msgport == cpu_portfn(0)); 688 in_pcbnotifyall(&udbinfo.pcblisthead, faddr, inetctlerrmap[cmd], 689 notify); 690 } 691 } 692 693 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, &udbinfo, 0, 694 in_pcblist_global, "S,xinpcb", "List of active UDP sockets"); 695 696 static int 697 udp_getcred(SYSCTL_HANDLER_ARGS) 698 { 699 struct sockaddr_in addrs[2]; 700 struct inpcb *inp; 701 int error; 702 703 error = priv_check(req->td, PRIV_ROOT); 704 if (error) 705 return (error); 706 error = SYSCTL_IN(req, addrs, sizeof addrs); 707 if (error) 708 return (error); 709 inp = in_pcblookup_hash(&udbinfo, addrs[1].sin_addr, addrs[1].sin_port, 710 addrs[0].sin_addr, addrs[0].sin_port, 1, NULL); 711 if (inp == NULL || inp->inp_socket == NULL) { 712 error = ENOENT; 713 goto out; 714 } 715 error = SYSCTL_OUT(req, inp->inp_socket->so_cred, sizeof(struct ucred)); 716 out: 717 return (error); 718 } 719 720 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 721 0, 0, udp_getcred, "S,ucred", "Get the ucred of a UDP connection"); 722 723 static int 724 udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *dstaddr, 725 struct mbuf *control, struct thread *td) 726 { 727 struct udpiphdr *ui; 728 int len = m->m_pkthdr.len; 729 struct sockaddr_in *sin; /* really is initialized before use */ 730 int error = 0, lport_any = 0; 731 732 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { 733 error = EMSGSIZE; 734 goto release; 735 } 736 737 if (inp->inp_lport == 0) { /* unbound socket */ 738 error = in_pcbbind(inp, NULL, td); 739 if (error) 740 goto release; 741 in_pcbinswildcardhash(inp); 742 lport_any = 1; 743 } 744 745 if (dstaddr != NULL) { /* destination address specified */ 746 if (inp->inp_faddr.s_addr != INADDR_ANY) { 747 /* already connected */ 748 error = EISCONN; 749 goto release; 750 } 751 sin = (struct sockaddr_in *)dstaddr; 752 if (!prison_remote_ip(td, (struct sockaddr *)&sin)) { 753 error = EAFNOSUPPORT; /* IPv6 only jail */ 754 goto release; 755 } 756 } else { 757 if (inp->inp_faddr.s_addr == INADDR_ANY) { 758 /* no destination specified and not already connected */ 759 error = ENOTCONN; 760 goto release; 761 } 762 sin = NULL; 763 } 764 765 /* 766 * Calculate data length and get a mbuf 767 * for UDP and IP headers. 768 */ 769 M_PREPEND(m, sizeof(struct udpiphdr), MB_DONTWAIT); 770 if (m == NULL) { 771 error = ENOBUFS; 772 goto release; 773 } 774 775 /* 776 * Fill in mbuf with extended UDP header 777 * and addresses and length put into network format. 778 */ 779 ui = mtod(m, struct udpiphdr *); 780 bzero(ui->ui_x1, sizeof ui->ui_x1); /* XXX still needed? */ 781 ui->ui_pr = IPPROTO_UDP; 782 783 /* 784 * Set destination address. 785 */ 786 if (dstaddr != NULL) { /* use specified destination */ 787 ui->ui_dst = sin->sin_addr; 788 ui->ui_dport = sin->sin_port; 789 } else { /* use connected destination */ 790 ui->ui_dst = inp->inp_faddr; 791 ui->ui_dport = inp->inp_fport; 792 } 793 794 /* 795 * Set source address. 796 */ 797 if (inp->inp_laddr.s_addr == INADDR_ANY) { 798 struct sockaddr_in *if_sin; 799 800 if (dstaddr == NULL) { 801 /* 802 * connect() had (or should have) failed because 803 * the interface had no IP address, but the 804 * application proceeded to call send() anyways. 805 */ 806 error = ENOTCONN; 807 goto release; 808 } 809 810 /* Look up outgoing interface. */ 811 if ((error = in_pcbladdr(inp, dstaddr, &if_sin, td))) 812 goto release; 813 ui->ui_src = if_sin->sin_addr; /* use address of interface */ 814 } else { 815 ui->ui_src = inp->inp_laddr; /* use non-null bound address */ 816 } 817 ui->ui_sport = inp->inp_lport; 818 KASSERT(inp->inp_lport != 0, ("inp lport should have been bound")); 819 820 ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); 821 822 /* 823 * Set up checksum and output datagram. 824 */ 825 if (udpcksum) { 826 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr, 827 htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); 828 m->m_pkthdr.csum_flags = CSUM_UDP; 829 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); 830 } else { 831 ui->ui_sum = 0; 832 } 833 ((struct ip *)ui)->ip_len = sizeof(struct udpiphdr) + len; 834 ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ 835 ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ 836 udpstat.udps_opackets++; 837 838 error = ip_output(m, inp->inp_options, &inp->inp_route, 839 (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST)) | 840 IP_DEBUGROUTE, 841 inp->inp_moptions, inp); 842 843 /* 844 * If this is the first data gram sent on an unbound and unconnected 845 * UDP socket, lport will be changed in this function. If target 846 * CPU after this lport changing is no longer the current CPU, then 847 * free the route entry allocated on the current CPU. 848 */ 849 if (lport_any) { 850 if (udp_addrcpu(inp->inp_faddr.s_addr, inp->inp_fport, 851 inp->inp_laddr.s_addr, inp->inp_lport) != mycpuid) { 852 struct route *ro = &inp->inp_route; 853 854 if (ro->ro_rt != NULL) 855 RTFREE(ro->ro_rt); 856 bzero(ro, sizeof(*ro)); 857 } 858 } 859 return (error); 860 861 release: 862 m_freem(m); 863 return (error); 864 } 865 866 u_long udp_sendspace = 9216; /* really max datagram size */ 867 /* 40 1K datagrams */ 868 SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, 869 &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); 870 871 u_long udp_recvspace = 40 * (1024 + 872 #ifdef INET6 873 sizeof(struct sockaddr_in6) 874 #else 875 sizeof(struct sockaddr_in) 876 #endif 877 ); 878 SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 879 &udp_recvspace, 0, "Maximum incoming UDP datagram size"); 880 881 /* 882 * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort() 883 * will sofree() it when we return. 884 */ 885 static int 886 udp_abort(struct socket *so) 887 { 888 struct inpcb *inp; 889 int error; 890 891 inp = so->so_pcb; 892 if (inp) { 893 soisdisconnected(so); 894 in_pcbdetach(inp); 895 error = 0; 896 } else { 897 error = EINVAL; 898 } 899 return error; 900 } 901 902 static int 903 udp_attach(struct socket *so, int proto, struct pru_attach_info *ai) 904 { 905 struct inpcb *inp; 906 int error; 907 908 inp = so->so_pcb; 909 if (inp != NULL) 910 return EINVAL; 911 912 error = soreserve(so, udp_sendspace, udp_recvspace, ai->sb_rlimit); 913 if (error) 914 return error; 915 error = in_pcballoc(so, &udbinfo); 916 if (error) 917 return error; 918 so->so_port = udp_soport_attach(so); 919 920 inp = (struct inpcb *)so->so_pcb; 921 inp->inp_vflag |= INP_IPV4; 922 inp->inp_ip_ttl = ip_defttl; 923 return 0; 924 } 925 926 static int 927 udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 928 { 929 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 930 struct inpcb *inp; 931 int error; 932 933 inp = so->so_pcb; 934 if (inp == NULL) 935 return EINVAL; 936 error = in_pcbbind(inp, nam, td); 937 if (error == 0) { 938 if (sin->sin_addr.s_addr != INADDR_ANY) 939 inp->inp_flags |= INP_WASBOUND_NOTANY; 940 in_pcbinswildcardhash(inp); 941 } 942 return error; 943 } 944 945 #ifdef SMP 946 947 struct netmsg_udp_connect { 948 struct netmsg nm_netmsg; 949 struct socket *nm_so; 950 struct sockaddr_in *nm_sin; 951 struct sockaddr_in *nm_ifsin; 952 struct thread *nm_td; 953 }; 954 955 static void 956 udp_connect_handler(netmsg_t netmsg) 957 { 958 struct netmsg_udp_connect *msg = (void *)netmsg; 959 int error; 960 961 error = udp_connect_oncpu(msg->nm_so, msg->nm_td, 962 msg->nm_sin, msg->nm_ifsin); 963 lwkt_replymsg(&msg->nm_netmsg.nm_lmsg, error); 964 } 965 966 #endif 967 968 static int 969 udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 970 { 971 struct inpcb *inp; 972 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 973 struct sockaddr_in *if_sin; 974 lwkt_port_t port; 975 int error; 976 977 inp = so->so_pcb; 978 if (inp == NULL) 979 return EINVAL; 980 if (inp->inp_faddr.s_addr != INADDR_ANY) 981 return EISCONN; 982 error = 0; 983 984 /* 985 * Bind if we have to 986 */ 987 if (td->td_proc && td->td_proc->p_ucred->cr_prison != NULL && 988 inp->inp_laddr.s_addr == INADDR_ANY) { 989 error = in_pcbbind(inp, NULL, td); 990 if (error) 991 return (error); 992 } 993 994 /* 995 * Calculate the correct protocol processing thread. The connect 996 * operation must run there. 997 */ 998 error = in_pcbladdr(inp, nam, &if_sin, td); 999 if (error) 1000 return(error); 1001 if (!prison_remote_ip(td, nam)) 1002 return(EAFNOSUPPORT); /* IPv6 only jail */ 1003 1004 port = udp_addrport(sin->sin_addr.s_addr, sin->sin_port, 1005 inp->inp_laddr.s_addr, inp->inp_lport); 1006 #ifdef SMP 1007 if (port != &curthread->td_msgport) { 1008 struct netmsg_udp_connect msg; 1009 struct route *ro = &inp->inp_route; 1010 1011 /* 1012 * in_pcbladdr() may have allocated a route entry for us 1013 * on the current CPU, but we need a route entry on the 1014 * inpcb's owner CPU, so free it here. 1015 */ 1016 if (ro->ro_rt != NULL) 1017 RTFREE(ro->ro_rt); 1018 bzero(ro, sizeof(*ro)); 1019 1020 /* 1021 * NOTE: We haven't set so->so_port yet do not pass so 1022 * to netmsg_init() or it will be improperly forwarded. 1023 */ 1024 netmsg_init(&msg.nm_netmsg, NULL, &curthread->td_msgport, 1025 0, udp_connect_handler); 1026 msg.nm_so = so; 1027 msg.nm_sin = sin; 1028 msg.nm_ifsin = if_sin; 1029 msg.nm_td = td; 1030 error = lwkt_domsg(port, &msg.nm_netmsg.nm_lmsg, 0); 1031 } else { 1032 error = udp_connect_oncpu(so, td, sin, if_sin); 1033 } 1034 #else 1035 error = udp_connect_oncpu(so, td, sin, if_sin); 1036 #endif 1037 return (error); 1038 } 1039 1040 static int 1041 udp_connect_oncpu(struct socket *so, struct thread *td, 1042 struct sockaddr_in *sin, struct sockaddr_in *if_sin) 1043 { 1044 struct inpcb *inp; 1045 int error; 1046 1047 inp = so->so_pcb; 1048 if (inp->inp_flags & INP_WILDCARD) 1049 in_pcbremwildcardhash(inp); 1050 error = in_pcbconnect(inp, (struct sockaddr *)sin, td); 1051 1052 if (error == 0) { 1053 /* 1054 * No more errors can occur, finish adjusting the socket 1055 * and change the processing port to reflect the connected 1056 * socket. Once set we can no longer safely mess with the 1057 * socket. 1058 */ 1059 soisconnected(so); 1060 sosetport(so, &curthread->td_msgport); 1061 } else if (error == EAFNOSUPPORT) { /* connection dissolved */ 1062 /* 1063 * Follow traditional BSD behavior and retain 1064 * the local port binding. But, fix the old misbehavior 1065 * of overwriting any previously bound local address. 1066 */ 1067 if (!(inp->inp_flags & INP_WASBOUND_NOTANY)) 1068 inp->inp_laddr.s_addr = INADDR_ANY; 1069 in_pcbinswildcardhash(inp); 1070 } 1071 return error; 1072 } 1073 1074 static int 1075 udp_detach(struct socket *so) 1076 { 1077 struct inpcb *inp; 1078 1079 inp = so->so_pcb; 1080 if (inp == NULL) 1081 return EINVAL; 1082 in_pcbdetach(inp); 1083 return 0; 1084 } 1085 1086 static int 1087 udp_disconnect(struct socket *so) 1088 { 1089 struct route *ro; 1090 struct inpcb *inp; 1091 1092 inp = so->so_pcb; 1093 if (inp == NULL) 1094 return EINVAL; 1095 if (inp->inp_faddr.s_addr == INADDR_ANY) 1096 return ENOTCONN; 1097 1098 soreference(so); 1099 in_pcbdisconnect(inp); 1100 soclrstate(so, SS_ISCONNECTED); /* XXX */ 1101 sofree(so); 1102 1103 ro = &inp->inp_route; 1104 if (ro->ro_rt != NULL) 1105 RTFREE(ro->ro_rt); 1106 bzero(ro, sizeof(*ro)); 1107 1108 return 0; 1109 } 1110 1111 static int 1112 udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, 1113 struct mbuf *control, struct thread *td) 1114 { 1115 struct inpcb *inp; 1116 1117 inp = so->so_pcb; 1118 if (inp == NULL) { 1119 m_freem(m); 1120 return EINVAL; 1121 } 1122 return udp_output(inp, m, addr, control, td); 1123 } 1124 1125 int 1126 udp_shutdown(struct socket *so) 1127 { 1128 struct inpcb *inp; 1129 1130 inp = so->so_pcb; 1131 if (inp == NULL) 1132 return EINVAL; 1133 socantsendmore(so); 1134 return 0; 1135 } 1136 1137 struct pr_usrreqs udp_usrreqs = { 1138 .pru_abort = udp_abort, 1139 .pru_accept = pru_accept_notsupp, 1140 .pru_attach = udp_attach, 1141 .pru_bind = udp_bind, 1142 .pru_connect = udp_connect, 1143 .pru_connect2 = pru_connect2_notsupp, 1144 .pru_control = in_control, 1145 .pru_detach = udp_detach, 1146 .pru_disconnect = udp_disconnect, 1147 .pru_listen = pru_listen_notsupp, 1148 .pru_peeraddr = in_setpeeraddr, 1149 .pru_rcvd = pru_rcvd_notsupp, 1150 .pru_rcvoob = pru_rcvoob_notsupp, 1151 .pru_send = udp_send, 1152 .pru_sense = pru_sense_null, 1153 .pru_shutdown = udp_shutdown, 1154 .pru_sockaddr = in_setsockaddr, 1155 .pru_sosend = sosendudp, 1156 .pru_soreceive = soreceive 1157 }; 1158 1159