1 /* $NetBSD: ip_output.c,v 1.236 2015/04/03 07:55:18 ozaki-r Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1998 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Public Access Networks Corporation ("Panix"). It was developed under 38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 59 * POSSIBILITY OF SUCH DAMAGE. 60 */ 61 62 /* 63 * Copyright (c) 1982, 1986, 1988, 1990, 1993 64 * The Regents of the University of California. All rights reserved. 65 * 66 * Redistribution and use in source and binary forms, with or without 67 * modification, are permitted provided that the following conditions 68 * are met: 69 * 1. Redistributions of source code must retain the above copyright 70 * notice, this list of conditions and the following disclaimer. 71 * 2. Redistributions in binary form must reproduce the above copyright 72 * notice, this list of conditions and the following disclaimer in the 73 * documentation and/or other materials provided with the distribution. 74 * 3. Neither the name of the University nor the names of its contributors 75 * may be used to endorse or promote products derived from this software 76 * without specific prior written permission. 77 * 78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 88 * SUCH DAMAGE. 89 * 90 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 91 */ 92 93 #include <sys/cdefs.h> 94 __KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.236 2015/04/03 07:55:18 ozaki-r Exp $"); 95 96 #include "opt_inet.h" 97 #include "opt_ipsec.h" 98 #include "opt_mrouting.h" 99 #include "opt_net_mpsafe.h" 100 101 #include <sys/param.h> 102 #include <sys/kmem.h> 103 #include <sys/mbuf.h> 104 #include <sys/protosw.h> 105 #include <sys/socket.h> 106 #include <sys/socketvar.h> 107 #include <sys/kauth.h> 108 #ifdef IPSEC 109 #include <sys/domain.h> 110 #endif 111 #include <sys/systm.h> 112 113 #include <net/if.h> 114 #include <net/route.h> 115 #include <net/pfil.h> 116 117 #include <netinet/in.h> 118 #include <netinet/in_systm.h> 119 #include <netinet/ip.h> 120 #include <netinet/in_pcb.h> 121 #include <netinet/in_var.h> 122 #include <netinet/ip_var.h> 123 #include <netinet/ip_private.h> 124 #include <netinet/in_offload.h> 125 #include <netinet/portalgo.h> 126 #include <netinet/udp.h> 127 128 #ifdef INET6 129 #include <netinet6/ip6_var.h> 130 #endif 131 132 #ifdef MROUTING 133 #include <netinet/ip_mroute.h> 134 #endif 135 136 #ifdef IPSEC 137 #include <netipsec/ipsec.h> 138 #include <netipsec/key.h> 139 #endif 140 141 static int ip_pcbopts(struct inpcb *, const struct sockopt *); 142 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); 143 static struct ifnet *ip_multicast_if(struct in_addr *, int *); 144 static void ip_mloopback(struct ifnet *, struct mbuf *, 145 const struct sockaddr_in *); 146 147 extern pfil_head_t *inet_pfil_hook; /* XXX */ 148 149 int ip_do_loopback_cksum = 0; 150 151 /* 152 * IP output. The packet in mbuf chain m contains a skeletal IP 153 * header (with len, off, ttl, proto, tos, src, dst). 154 * The mbuf chain containing the packet will be freed. 155 * The mbuf opt, if present, will not be freed. 156 */ 157 int 158 ip_output(struct mbuf *m0, ...) 159 { 160 struct rtentry *rt; 161 struct ip *ip; 162 struct ifnet *ifp; 163 struct mbuf *m = m0; 164 int hlen = sizeof (struct ip); 165 int len, error = 0; 166 struct route iproute; 167 const struct sockaddr_in *dst; 168 struct in_ifaddr *ia; 169 int isbroadcast; 170 struct mbuf *opt; 171 struct route *ro; 172 int flags, sw_csum; 173 u_long mtu; 174 struct ip_moptions *imo; 175 struct socket *so; 176 va_list ap; 177 #ifdef IPSEC 178 struct secpolicy *sp = NULL; 179 #endif 180 bool natt_frag = false; 181 bool rtmtu_nolock; 182 union { 183 struct sockaddr dst; 184 struct sockaddr_in dst4; 185 } u; 186 struct sockaddr *rdst = &u.dst; /* real IP destination, as opposed 187 * to the nexthop 188 */ 189 190 len = 0; 191 va_start(ap, m0); 192 opt = va_arg(ap, struct mbuf *); 193 ro = va_arg(ap, struct route *); 194 flags = va_arg(ap, int); 195 imo = va_arg(ap, struct ip_moptions *); 196 so = va_arg(ap, struct socket *); 197 va_end(ap); 198 199 MCLAIM(m, &ip_tx_mowner); 200 201 KASSERT((m->m_flags & M_PKTHDR) != 0); 202 KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) == 0); 203 KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) != 204 (M_CSUM_TCPv4|M_CSUM_UDPv4)); 205 206 if (opt) { 207 m = ip_insertoptions(m, opt, &len); 208 if (len >= sizeof(struct ip)) 209 hlen = len; 210 } 211 ip = mtod(m, struct ip *); 212 213 /* 214 * Fill in IP header. 215 */ 216 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 217 ip->ip_v = IPVERSION; 218 ip->ip_off = htons(0); 219 /* ip->ip_id filled in after we find out source ia */ 220 ip->ip_hl = hlen >> 2; 221 IP_STATINC(IP_STAT_LOCALOUT); 222 } else { 223 hlen = ip->ip_hl << 2; 224 } 225 226 /* 227 * Route packet. 228 */ 229 if (ro == NULL) { 230 memset(&iproute, 0, sizeof(iproute)); 231 ro = &iproute; 232 } 233 sockaddr_in_init(&u.dst4, &ip->ip_dst, 0); 234 dst = satocsin(rtcache_getdst(ro)); 235 236 /* 237 * If there is a cached route, check that it is to the same 238 * destination and is still up. If not, free it and try again. 239 * The address family should also be checked in case of sharing 240 * the cache with IPv6. 241 */ 242 if (dst && (dst->sin_family != AF_INET || 243 !in_hosteq(dst->sin_addr, ip->ip_dst))) 244 rtcache_free(ro); 245 246 if ((rt = rtcache_validate(ro)) == NULL && 247 (rt = rtcache_update(ro, 1)) == NULL) { 248 dst = &u.dst4; 249 rtcache_setdst(ro, &u.dst); 250 } 251 252 /* 253 * If routing to interface only, short circuit routing lookup. 254 */ 255 if (flags & IP_ROUTETOIF) { 256 if ((ia = ifatoia(ifa_ifwithladdr(sintocsa(dst)))) == NULL) { 257 IP_STATINC(IP_STAT_NOROUTE); 258 error = ENETUNREACH; 259 goto bad; 260 } 261 ifp = ia->ia_ifp; 262 mtu = ifp->if_mtu; 263 ip->ip_ttl = 1; 264 isbroadcast = in_broadcast(dst->sin_addr, ifp); 265 } else if ((IN_MULTICAST(ip->ip_dst.s_addr) || 266 ip->ip_dst.s_addr == INADDR_BROADCAST) && 267 imo != NULL && imo->imo_multicast_ifp != NULL) { 268 ifp = imo->imo_multicast_ifp; 269 mtu = ifp->if_mtu; 270 IFP_TO_IA(ifp, ia); 271 isbroadcast = 0; 272 } else { 273 if (rt == NULL) 274 rt = rtcache_init(ro); 275 if (rt == NULL) { 276 IP_STATINC(IP_STAT_NOROUTE); 277 error = EHOSTUNREACH; 278 goto bad; 279 } 280 ia = ifatoia(rt->rt_ifa); 281 ifp = rt->rt_ifp; 282 if ((mtu = rt->rt_rmx.rmx_mtu) == 0) 283 mtu = ifp->if_mtu; 284 rt->rt_use++; 285 if (rt->rt_flags & RTF_GATEWAY) 286 dst = satosin(rt->rt_gateway); 287 if (rt->rt_flags & RTF_HOST) 288 isbroadcast = rt->rt_flags & RTF_BROADCAST; 289 else 290 isbroadcast = in_broadcast(dst->sin_addr, ifp); 291 } 292 rtmtu_nolock = rt && (rt->rt_rmx.rmx_locks & RTV_MTU) == 0; 293 294 if (IN_MULTICAST(ip->ip_dst.s_addr) || 295 (ip->ip_dst.s_addr == INADDR_BROADCAST)) { 296 bool inmgroup; 297 298 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ? 299 M_BCAST : M_MCAST; 300 /* 301 * See if the caller provided any multicast options 302 */ 303 if (imo != NULL) 304 ip->ip_ttl = imo->imo_multicast_ttl; 305 else 306 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 307 308 /* 309 * if we don't know the outgoing ifp yet, we can't generate 310 * output 311 */ 312 if (!ifp) { 313 IP_STATINC(IP_STAT_NOROUTE); 314 error = ENETUNREACH; 315 goto bad; 316 } 317 318 /* 319 * If the packet is multicast or broadcast, confirm that 320 * the outgoing interface can transmit it. 321 */ 322 if (((m->m_flags & M_MCAST) && 323 (ifp->if_flags & IFF_MULTICAST) == 0) || 324 ((m->m_flags & M_BCAST) && 325 (ifp->if_flags & (IFF_BROADCAST|IFF_POINTOPOINT)) == 0)) { 326 IP_STATINC(IP_STAT_NOROUTE); 327 error = ENETUNREACH; 328 goto bad; 329 } 330 /* 331 * If source address not specified yet, use an address 332 * of outgoing interface. 333 */ 334 if (in_nullhost(ip->ip_src)) { 335 struct in_ifaddr *xia; 336 struct ifaddr *xifa; 337 338 IFP_TO_IA(ifp, xia); 339 if (!xia) { 340 error = EADDRNOTAVAIL; 341 goto bad; 342 } 343 xifa = &xia->ia_ifa; 344 if (xifa->ifa_getifa != NULL) { 345 xia = ifatoia((*xifa->ifa_getifa)(xifa, rdst)); 346 } 347 ip->ip_src = xia->ia_addr.sin_addr; 348 } 349 350 inmgroup = in_multi_group(ip->ip_dst, ifp, flags); 351 if (inmgroup && (imo == NULL || imo->imo_multicast_loop)) { 352 /* 353 * If we belong to the destination multicast group 354 * on the outgoing interface, and the caller did not 355 * forbid loopback, loop back a copy. 356 */ 357 ip_mloopback(ifp, m, &u.dst4); 358 } 359 #ifdef MROUTING 360 else { 361 /* 362 * If we are acting as a multicast router, perform 363 * multicast forwarding as if the packet had just 364 * arrived on the interface to which we are about 365 * to send. The multicast forwarding function 366 * recursively calls this function, using the 367 * IP_FORWARDING flag to prevent infinite recursion. 368 * 369 * Multicasts that are looped back by ip_mloopback(), 370 * above, will be forwarded by the ip_input() routine, 371 * if necessary. 372 */ 373 extern struct socket *ip_mrouter; 374 375 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 376 if (ip_mforward(m, ifp) != 0) { 377 m_freem(m); 378 goto done; 379 } 380 } 381 } 382 #endif 383 /* 384 * Multicasts with a time-to-live of zero may be looped- 385 * back, above, but must not be transmitted on a network. 386 * Also, multicasts addressed to the loopback interface 387 * are not sent -- the above call to ip_mloopback() will 388 * loop back a copy if this host actually belongs to the 389 * destination group on the loopback interface. 390 */ 391 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) { 392 m_freem(m); 393 goto done; 394 } 395 goto sendit; 396 } 397 398 /* 399 * If source address not specified yet, use address 400 * of outgoing interface. 401 */ 402 if (in_nullhost(ip->ip_src)) { 403 struct ifaddr *xifa; 404 405 xifa = &ia->ia_ifa; 406 if (xifa->ifa_getifa != NULL) 407 ia = ifatoia((*xifa->ifa_getifa)(xifa, rdst)); 408 ip->ip_src = ia->ia_addr.sin_addr; 409 } 410 411 /* 412 * packets with Class-D address as source are not valid per 413 * RFC 1112 414 */ 415 if (IN_MULTICAST(ip->ip_src.s_addr)) { 416 IP_STATINC(IP_STAT_ODROPPED); 417 error = EADDRNOTAVAIL; 418 goto bad; 419 } 420 421 /* 422 * Look for broadcast address and and verify user is allowed to 423 * send such a packet. 424 */ 425 if (isbroadcast) { 426 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 427 error = EADDRNOTAVAIL; 428 goto bad; 429 } 430 if ((flags & IP_ALLOWBROADCAST) == 0) { 431 error = EACCES; 432 goto bad; 433 } 434 /* don't allow broadcast messages to be fragmented */ 435 if (ntohs(ip->ip_len) > ifp->if_mtu) { 436 error = EMSGSIZE; 437 goto bad; 438 } 439 m->m_flags |= M_BCAST; 440 } else 441 m->m_flags &= ~M_BCAST; 442 443 sendit: 444 if ((flags & (IP_FORWARDING|IP_NOIPNEWID)) == 0) { 445 if (m->m_pkthdr.len < IP_MINFRAGSIZE) { 446 ip->ip_id = 0; 447 } else if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) { 448 ip->ip_id = ip_newid(ia); 449 } else { 450 451 /* 452 * TSO capable interfaces (typically?) increment 453 * ip_id for each segment. 454 * "allocate" enough ids here to increase the chance 455 * for them to be unique. 456 * 457 * note that the following calculation is not 458 * needed to be precise. wasting some ip_id is fine. 459 */ 460 461 unsigned int segsz = m->m_pkthdr.segsz; 462 unsigned int datasz = ntohs(ip->ip_len) - hlen; 463 unsigned int num = howmany(datasz, segsz); 464 465 ip->ip_id = ip_newid_range(ia, num); 466 } 467 } 468 469 /* 470 * If we're doing Path MTU Discovery, we need to set DF unless 471 * the route's MTU is locked. 472 */ 473 if ((flags & IP_MTUDISC) != 0 && rtmtu_nolock) { 474 ip->ip_off |= htons(IP_DF); 475 } 476 477 #ifdef IPSEC 478 if (ipsec_used) { 479 bool ipsec_done = false; 480 481 /* Perform IPsec processing, if any. */ 482 error = ipsec4_output(m, so, flags, &sp, &mtu, &natt_frag, 483 &ipsec_done); 484 if (error || ipsec_done) 485 goto done; 486 } 487 #endif 488 489 /* 490 * Run through list of hooks for output packets. 491 */ 492 error = pfil_run_hooks(inet_pfil_hook, &m, ifp, PFIL_OUT); 493 if (error) 494 goto done; 495 if (m == NULL) 496 goto done; 497 498 ip = mtod(m, struct ip *); 499 hlen = ip->ip_hl << 2; 500 501 m->m_pkthdr.csum_data |= hlen << 16; 502 503 #if IFA_STATS 504 /* 505 * search for the source address structure to 506 * maintain output statistics. 507 */ 508 INADDR_TO_IA(ip->ip_src, ia); 509 #endif 510 511 /* Maybe skip checksums on loopback interfaces. */ 512 if (IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) { 513 m->m_pkthdr.csum_flags |= M_CSUM_IPv4; 514 } 515 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx; 516 /* 517 * If small enough for mtu of path, or if using TCP segmentation 518 * offload, can just send directly. 519 */ 520 if (ntohs(ip->ip_len) <= mtu || 521 (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) != 0) { 522 const struct sockaddr *sa; 523 524 #if IFA_STATS 525 if (ia) 526 ia->ia_ifa.ifa_data.ifad_outbytes += ntohs(ip->ip_len); 527 #endif 528 /* 529 * Always initialize the sum to 0! Some HW assisted 530 * checksumming requires this. 531 */ 532 ip->ip_sum = 0; 533 534 if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) { 535 /* 536 * Perform any checksums that the hardware can't do 537 * for us. 538 * 539 * XXX Does any hardware require the {th,uh}_sum 540 * XXX fields to be 0? 541 */ 542 if (sw_csum & M_CSUM_IPv4) { 543 KASSERT(IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)); 544 ip->ip_sum = in_cksum(m, hlen); 545 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4; 546 } 547 if (sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { 548 if (IN_NEED_CHECKSUM(ifp, 549 sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4))) { 550 in_delayed_cksum(m); 551 } 552 m->m_pkthdr.csum_flags &= 553 ~(M_CSUM_TCPv4|M_CSUM_UDPv4); 554 } 555 } 556 557 sa = (m->m_flags & M_MCAST) ? sintocsa(rdst) : sintocsa(dst); 558 if (__predict_true( 559 (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0 || 560 (ifp->if_capenable & IFCAP_TSOv4) != 0)) { 561 #ifndef NET_MPSAFE 562 KERNEL_LOCK(1, NULL); 563 #endif 564 error = (*ifp->if_output)(ifp, m, sa, rt); 565 #ifndef NET_MPSAFE 566 KERNEL_UNLOCK_ONE(NULL); 567 #endif 568 } else { 569 error = ip_tso_output(ifp, m, sa, rt); 570 } 571 goto done; 572 } 573 574 /* 575 * We can't use HW checksumming if we're about to 576 * to fragment the packet. 577 * 578 * XXX Some hardware can do this. 579 */ 580 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { 581 if (IN_NEED_CHECKSUM(ifp, 582 m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4))) { 583 in_delayed_cksum(m); 584 } 585 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4); 586 } 587 588 /* 589 * Too large for interface; fragment if possible. 590 * Must be able to put at least 8 bytes per fragment. 591 */ 592 if (ntohs(ip->ip_off) & IP_DF) { 593 if (flags & IP_RETURNMTU) { 594 struct inpcb *inp; 595 596 KASSERT(so && solocked(so)); 597 inp = sotoinpcb(so); 598 inp->inp_errormtu = mtu; 599 } 600 error = EMSGSIZE; 601 IP_STATINC(IP_STAT_CANTFRAG); 602 goto bad; 603 } 604 605 error = ip_fragment(m, ifp, mtu); 606 if (error) { 607 m = NULL; 608 goto bad; 609 } 610 611 for (; m; m = m0) { 612 m0 = m->m_nextpkt; 613 m->m_nextpkt = 0; 614 if (error) { 615 m_freem(m); 616 continue; 617 } 618 #if IFA_STATS 619 if (ia) 620 ia->ia_ifa.ifa_data.ifad_outbytes += ntohs(ip->ip_len); 621 #endif 622 /* 623 * If we get there, the packet has not been handled by 624 * IPsec whereas it should have. Now that it has been 625 * fragmented, re-inject it in ip_output so that IPsec 626 * processing can occur. 627 */ 628 if (natt_frag) { 629 error = ip_output(m, opt, ro, 630 flags | IP_RAWOUTPUT | IP_NOIPNEWID, 631 imo, so); 632 } else { 633 KASSERT((m->m_pkthdr.csum_flags & 634 (M_CSUM_UDPv4 | M_CSUM_TCPv4)) == 0); 635 #ifndef NET_MPSAFE 636 KERNEL_LOCK(1, NULL); 637 #endif 638 error = (*ifp->if_output)(ifp, m, 639 (m->m_flags & M_MCAST) ? 640 sintocsa(rdst) : sintocsa(dst), rt); 641 #ifndef NET_MPSAFE 642 KERNEL_UNLOCK_ONE(NULL); 643 #endif 644 } 645 } 646 if (error == 0) { 647 IP_STATINC(IP_STAT_FRAGMENTED); 648 } 649 done: 650 if (ro == &iproute) { 651 rtcache_free(&iproute); 652 } 653 #ifdef IPSEC 654 if (sp) { 655 KEY_FREESP(&sp); 656 } 657 #endif 658 return error; 659 bad: 660 m_freem(m); 661 goto done; 662 } 663 664 int 665 ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu) 666 { 667 struct ip *ip, *mhip; 668 struct mbuf *m0; 669 int len, hlen, off; 670 int mhlen, firstlen; 671 struct mbuf **mnext; 672 int sw_csum = m->m_pkthdr.csum_flags; 673 int fragments = 0; 674 int s; 675 int error = 0; 676 677 ip = mtod(m, struct ip *); 678 hlen = ip->ip_hl << 2; 679 if (ifp != NULL) 680 sw_csum &= ~ifp->if_csum_flags_tx; 681 682 len = (mtu - hlen) &~ 7; 683 if (len < 8) { 684 m_freem(m); 685 return (EMSGSIZE); 686 } 687 688 firstlen = len; 689 mnext = &m->m_nextpkt; 690 691 /* 692 * Loop through length of segment after first fragment, 693 * make new header and copy data of each part and link onto chain. 694 */ 695 m0 = m; 696 mhlen = sizeof (struct ip); 697 for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { 698 MGETHDR(m, M_DONTWAIT, MT_HEADER); 699 if (m == 0) { 700 error = ENOBUFS; 701 IP_STATINC(IP_STAT_ODROPPED); 702 goto sendorfree; 703 } 704 MCLAIM(m, m0->m_owner); 705 *mnext = m; 706 mnext = &m->m_nextpkt; 707 m->m_data += max_linkhdr; 708 mhip = mtod(m, struct ip *); 709 *mhip = *ip; 710 /* we must inherit MCAST and BCAST flags */ 711 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST); 712 if (hlen > sizeof (struct ip)) { 713 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 714 mhip->ip_hl = mhlen >> 2; 715 } 716 m->m_len = mhlen; 717 mhip->ip_off = ((off - hlen) >> 3) + 718 (ntohs(ip->ip_off) & ~IP_MF); 719 if (ip->ip_off & htons(IP_MF)) 720 mhip->ip_off |= IP_MF; 721 if (off + len >= ntohs(ip->ip_len)) 722 len = ntohs(ip->ip_len) - off; 723 else 724 mhip->ip_off |= IP_MF; 725 HTONS(mhip->ip_off); 726 mhip->ip_len = htons((u_int16_t)(len + mhlen)); 727 m->m_next = m_copym(m0, off, len, M_DONTWAIT); 728 if (m->m_next == 0) { 729 error = ENOBUFS; /* ??? */ 730 IP_STATINC(IP_STAT_ODROPPED); 731 goto sendorfree; 732 } 733 m->m_pkthdr.len = mhlen + len; 734 m->m_pkthdr.rcvif = NULL; 735 mhip->ip_sum = 0; 736 KASSERT((m->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0); 737 if (sw_csum & M_CSUM_IPv4) { 738 mhip->ip_sum = in_cksum(m, mhlen); 739 } else { 740 /* 741 * checksum is hw-offloaded or not necessary. 742 */ 743 m->m_pkthdr.csum_flags |= 744 m0->m_pkthdr.csum_flags & M_CSUM_IPv4; 745 m->m_pkthdr.csum_data |= mhlen << 16; 746 KASSERT(!(ifp != NULL && 747 IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) 748 || (m->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0); 749 } 750 IP_STATINC(IP_STAT_OFRAGMENTS); 751 fragments++; 752 } 753 /* 754 * Update first fragment by trimming what's been copied out 755 * and updating header, then send each fragment (in order). 756 */ 757 m = m0; 758 m_adj(m, hlen + firstlen - ntohs(ip->ip_len)); 759 m->m_pkthdr.len = hlen + firstlen; 760 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len); 761 ip->ip_off |= htons(IP_MF); 762 ip->ip_sum = 0; 763 if (sw_csum & M_CSUM_IPv4) { 764 ip->ip_sum = in_cksum(m, hlen); 765 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4; 766 } else { 767 /* 768 * checksum is hw-offloaded or not necessary. 769 */ 770 KASSERT(!(ifp != NULL && IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) 771 || (m->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0); 772 KASSERT(M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data) >= 773 sizeof(struct ip)); 774 } 775 sendorfree: 776 /* 777 * If there is no room for all the fragments, don't queue 778 * any of them. 779 */ 780 if (ifp != NULL) { 781 s = splnet(); 782 if (ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len < fragments && 783 error == 0) { 784 error = ENOBUFS; 785 IP_STATINC(IP_STAT_ODROPPED); 786 IFQ_INC_DROPS(&ifp->if_snd); 787 } 788 splx(s); 789 } 790 if (error) { 791 for (m = m0; m; m = m0) { 792 m0 = m->m_nextpkt; 793 m->m_nextpkt = NULL; 794 m_freem(m); 795 } 796 } 797 return (error); 798 } 799 800 /* 801 * Process a delayed payload checksum calculation. 802 */ 803 void 804 in_delayed_cksum(struct mbuf *m) 805 { 806 struct ip *ip; 807 u_int16_t csum, offset; 808 809 ip = mtod(m, struct ip *); 810 offset = ip->ip_hl << 2; 811 csum = in4_cksum(m, 0, offset, ntohs(ip->ip_len) - offset); 812 if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv4) != 0) 813 csum = 0xffff; 814 815 offset += M_CSUM_DATA_IPv4_OFFSET(m->m_pkthdr.csum_data); 816 817 if ((offset + sizeof(u_int16_t)) > m->m_len) { 818 /* This happen when ip options were inserted 819 printf("in_delayed_cksum: pullup len %d off %d proto %d\n", 820 m->m_len, offset, ip->ip_p); 821 */ 822 m_copyback(m, offset, sizeof(csum), (void *) &csum); 823 } else 824 *(u_int16_t *)(mtod(m, char *) + offset) = csum; 825 } 826 827 /* 828 * Determine the maximum length of the options to be inserted; 829 * we would far rather allocate too much space rather than too little. 830 */ 831 832 u_int 833 ip_optlen(struct inpcb *inp) 834 { 835 struct mbuf *m = inp->inp_options; 836 837 if (m && m->m_len > offsetof(struct ipoption, ipopt_dst)) { 838 return (m->m_len - offsetof(struct ipoption, ipopt_dst)); 839 } 840 return 0; 841 } 842 843 /* 844 * Insert IP options into preformed packet. 845 * Adjust IP destination as required for IP source routing, 846 * as indicated by a non-zero in_addr at the start of the options. 847 */ 848 static struct mbuf * 849 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) 850 { 851 struct ipoption *p = mtod(opt, struct ipoption *); 852 struct mbuf *n; 853 struct ip *ip = mtod(m, struct ip *); 854 unsigned optlen; 855 856 optlen = opt->m_len - sizeof(p->ipopt_dst); 857 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) 858 return (m); /* XXX should fail */ 859 if (!in_nullhost(p->ipopt_dst)) 860 ip->ip_dst = p->ipopt_dst; 861 if (M_READONLY(m) || M_LEADINGSPACE(m) < optlen) { 862 MGETHDR(n, M_DONTWAIT, MT_HEADER); 863 if (n == 0) 864 return (m); 865 MCLAIM(n, m->m_owner); 866 M_MOVE_PKTHDR(n, m); 867 m->m_len -= sizeof(struct ip); 868 m->m_data += sizeof(struct ip); 869 n->m_next = m; 870 m = n; 871 m->m_len = optlen + sizeof(struct ip); 872 m->m_data += max_linkhdr; 873 bcopy((void *)ip, mtod(m, void *), sizeof(struct ip)); 874 } else { 875 m->m_data -= optlen; 876 m->m_len += optlen; 877 memmove(mtod(m, void *), ip, sizeof(struct ip)); 878 } 879 m->m_pkthdr.len += optlen; 880 ip = mtod(m, struct ip *); 881 bcopy((void *)p->ipopt_list, (void *)(ip + 1), (unsigned)optlen); 882 *phlen = sizeof(struct ip) + optlen; 883 ip->ip_len = htons(ntohs(ip->ip_len) + optlen); 884 return (m); 885 } 886 887 /* 888 * Copy options from ip to jp, 889 * omitting those not copied during fragmentation. 890 */ 891 int 892 ip_optcopy(struct ip *ip, struct ip *jp) 893 { 894 u_char *cp, *dp; 895 int opt, optlen, cnt; 896 897 cp = (u_char *)(ip + 1); 898 dp = (u_char *)(jp + 1); 899 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 900 for (; cnt > 0; cnt -= optlen, cp += optlen) { 901 opt = cp[0]; 902 if (opt == IPOPT_EOL) 903 break; 904 if (opt == IPOPT_NOP) { 905 /* Preserve for IP mcast tunnel's LSRR alignment. */ 906 *dp++ = IPOPT_NOP; 907 optlen = 1; 908 continue; 909 } 910 911 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp)); 912 optlen = cp[IPOPT_OLEN]; 913 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen < cnt); 914 915 /* Invalid lengths should have been caught by ip_dooptions. */ 916 if (optlen > cnt) 917 optlen = cnt; 918 if (IPOPT_COPIED(opt)) { 919 bcopy((void *)cp, (void *)dp, (unsigned)optlen); 920 dp += optlen; 921 } 922 } 923 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 924 *dp++ = IPOPT_EOL; 925 return (optlen); 926 } 927 928 /* 929 * IP socket option processing. 930 */ 931 int 932 ip_ctloutput(int op, struct socket *so, struct sockopt *sopt) 933 { 934 struct inpcb *inp = sotoinpcb(so); 935 struct ip *ip = &inp->inp_ip; 936 int inpflags = inp->inp_flags; 937 int optval = 0, error = 0; 938 939 if (sopt->sopt_level != IPPROTO_IP) { 940 if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_NOHEADER) 941 return 0; 942 return ENOPROTOOPT; 943 } 944 945 switch (op) { 946 case PRCO_SETOPT: 947 switch (sopt->sopt_name) { 948 case IP_OPTIONS: 949 #ifdef notyet 950 case IP_RETOPTS: 951 #endif 952 error = ip_pcbopts(inp, sopt); 953 break; 954 955 case IP_TOS: 956 case IP_TTL: 957 case IP_MINTTL: 958 case IP_PKTINFO: 959 case IP_RECVOPTS: 960 case IP_RECVRETOPTS: 961 case IP_RECVDSTADDR: 962 case IP_RECVIF: 963 case IP_RECVPKTINFO: 964 case IP_RECVTTL: 965 error = sockopt_getint(sopt, &optval); 966 if (error) 967 break; 968 969 switch (sopt->sopt_name) { 970 case IP_TOS: 971 ip->ip_tos = optval; 972 break; 973 974 case IP_TTL: 975 ip->ip_ttl = optval; 976 break; 977 978 case IP_MINTTL: 979 if (optval > 0 && optval <= MAXTTL) 980 inp->inp_ip_minttl = optval; 981 else 982 error = EINVAL; 983 break; 984 #define OPTSET(bit) \ 985 if (optval) \ 986 inpflags |= bit; \ 987 else \ 988 inpflags &= ~bit; 989 990 case IP_PKTINFO: 991 OPTSET(INP_PKTINFO); 992 break; 993 994 case IP_RECVOPTS: 995 OPTSET(INP_RECVOPTS); 996 break; 997 998 case IP_RECVPKTINFO: 999 OPTSET(INP_RECVPKTINFO); 1000 break; 1001 1002 case IP_RECVRETOPTS: 1003 OPTSET(INP_RECVRETOPTS); 1004 break; 1005 1006 case IP_RECVDSTADDR: 1007 OPTSET(INP_RECVDSTADDR); 1008 break; 1009 1010 case IP_RECVIF: 1011 OPTSET(INP_RECVIF); 1012 break; 1013 1014 case IP_RECVTTL: 1015 OPTSET(INP_RECVTTL); 1016 break; 1017 } 1018 break; 1019 #undef OPTSET 1020 1021 case IP_MULTICAST_IF: 1022 case IP_MULTICAST_TTL: 1023 case IP_MULTICAST_LOOP: 1024 case IP_ADD_MEMBERSHIP: 1025 case IP_DROP_MEMBERSHIP: 1026 error = ip_setmoptions(&inp->inp_moptions, sopt); 1027 break; 1028 1029 case IP_PORTRANGE: 1030 error = sockopt_getint(sopt, &optval); 1031 if (error) 1032 break; 1033 1034 switch (optval) { 1035 case IP_PORTRANGE_DEFAULT: 1036 case IP_PORTRANGE_HIGH: 1037 inpflags &= ~(INP_LOWPORT); 1038 break; 1039 1040 case IP_PORTRANGE_LOW: 1041 inpflags |= INP_LOWPORT; 1042 break; 1043 1044 default: 1045 error = EINVAL; 1046 break; 1047 } 1048 break; 1049 1050 case IP_PORTALGO: 1051 error = sockopt_getint(sopt, &optval); 1052 if (error) 1053 break; 1054 1055 error = portalgo_algo_index_select( 1056 (struct inpcb_hdr *)inp, optval); 1057 break; 1058 1059 #if defined(IPSEC) 1060 case IP_IPSEC_POLICY: 1061 if (ipsec_enabled) { 1062 error = ipsec4_set_policy(inp, sopt->sopt_name, 1063 sopt->sopt_data, sopt->sopt_size, 1064 curlwp->l_cred); 1065 break; 1066 } 1067 /*FALLTHROUGH*/ 1068 #endif /* IPSEC */ 1069 1070 default: 1071 error = ENOPROTOOPT; 1072 break; 1073 } 1074 break; 1075 1076 case PRCO_GETOPT: 1077 switch (sopt->sopt_name) { 1078 case IP_OPTIONS: 1079 case IP_RETOPTS: { 1080 struct mbuf *mopts = inp->inp_options; 1081 1082 if (mopts) { 1083 struct mbuf *m; 1084 1085 m = m_copym(mopts, 0, M_COPYALL, M_DONTWAIT); 1086 if (m == NULL) { 1087 error = ENOBUFS; 1088 break; 1089 } 1090 error = sockopt_setmbuf(sopt, m); 1091 } 1092 break; 1093 } 1094 case IP_PKTINFO: 1095 case IP_TOS: 1096 case IP_TTL: 1097 case IP_MINTTL: 1098 case IP_RECVOPTS: 1099 case IP_RECVRETOPTS: 1100 case IP_RECVDSTADDR: 1101 case IP_RECVIF: 1102 case IP_RECVPKTINFO: 1103 case IP_RECVTTL: 1104 case IP_ERRORMTU: 1105 switch (sopt->sopt_name) { 1106 case IP_TOS: 1107 optval = ip->ip_tos; 1108 break; 1109 1110 case IP_TTL: 1111 optval = ip->ip_ttl; 1112 break; 1113 1114 case IP_MINTTL: 1115 optval = inp->inp_ip_minttl; 1116 break; 1117 1118 case IP_ERRORMTU: 1119 optval = inp->inp_errormtu; 1120 break; 1121 1122 #define OPTBIT(bit) (inpflags & bit ? 1 : 0) 1123 1124 case IP_PKTINFO: 1125 optval = OPTBIT(INP_PKTINFO); 1126 break; 1127 1128 case IP_RECVOPTS: 1129 optval = OPTBIT(INP_RECVOPTS); 1130 break; 1131 1132 case IP_RECVPKTINFO: 1133 optval = OPTBIT(INP_RECVPKTINFO); 1134 break; 1135 1136 case IP_RECVRETOPTS: 1137 optval = OPTBIT(INP_RECVRETOPTS); 1138 break; 1139 1140 case IP_RECVDSTADDR: 1141 optval = OPTBIT(INP_RECVDSTADDR); 1142 break; 1143 1144 case IP_RECVIF: 1145 optval = OPTBIT(INP_RECVIF); 1146 break; 1147 1148 case IP_RECVTTL: 1149 optval = OPTBIT(INP_RECVTTL); 1150 break; 1151 } 1152 error = sockopt_setint(sopt, optval); 1153 break; 1154 1155 #if 0 /* defined(IPSEC) */ 1156 case IP_IPSEC_POLICY: 1157 { 1158 struct mbuf *m = NULL; 1159 1160 /* XXX this will return EINVAL as sopt is empty */ 1161 error = ipsec4_get_policy(inp, sopt->sopt_data, 1162 sopt->sopt_size, &m); 1163 if (error == 0) 1164 error = sockopt_setmbuf(sopt, m); 1165 break; 1166 } 1167 #endif /*IPSEC*/ 1168 1169 case IP_MULTICAST_IF: 1170 case IP_MULTICAST_TTL: 1171 case IP_MULTICAST_LOOP: 1172 case IP_ADD_MEMBERSHIP: 1173 case IP_DROP_MEMBERSHIP: 1174 error = ip_getmoptions(inp->inp_moptions, sopt); 1175 break; 1176 1177 case IP_PORTRANGE: 1178 if (inpflags & INP_LOWPORT) 1179 optval = IP_PORTRANGE_LOW; 1180 else 1181 optval = IP_PORTRANGE_DEFAULT; 1182 error = sockopt_setint(sopt, optval); 1183 break; 1184 1185 case IP_PORTALGO: 1186 optval = inp->inp_portalgo; 1187 error = sockopt_setint(sopt, optval); 1188 break; 1189 1190 default: 1191 error = ENOPROTOOPT; 1192 break; 1193 } 1194 break; 1195 } 1196 1197 if (!error) { 1198 inp->inp_flags = inpflags; 1199 } 1200 return error; 1201 } 1202 1203 /* 1204 * Set up IP options in pcb for insertion in output packets. 1205 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1206 * with destination address if source routed. 1207 */ 1208 static int 1209 ip_pcbopts(struct inpcb *inp, const struct sockopt *sopt) 1210 { 1211 struct mbuf *m; 1212 const u_char *cp; 1213 u_char *dp; 1214 int cnt; 1215 1216 /* Turn off any old options. */ 1217 if (inp->inp_options) { 1218 m_free(inp->inp_options); 1219 } 1220 inp->inp_options = NULL; 1221 if ((cnt = sopt->sopt_size) == 0) { 1222 /* Only turning off any previous options. */ 1223 return 0; 1224 } 1225 cp = sopt->sopt_data; 1226 1227 #ifndef __vax__ 1228 if (cnt % sizeof(int32_t)) 1229 return (EINVAL); 1230 #endif 1231 1232 m = m_get(M_DONTWAIT, MT_SOOPTS); 1233 if (m == NULL) 1234 return (ENOBUFS); 1235 1236 dp = mtod(m, u_char *); 1237 memset(dp, 0, sizeof(struct in_addr)); 1238 dp += sizeof(struct in_addr); 1239 m->m_len = sizeof(struct in_addr); 1240 1241 /* 1242 * IP option list according to RFC791. Each option is of the form 1243 * 1244 * [optval] [olen] [(olen - 2) data bytes] 1245 * 1246 * We validate the list and copy options to an mbuf for prepending 1247 * to data packets. The IP first-hop destination address will be 1248 * stored before actual options and is zero if unset. 1249 */ 1250 while (cnt > 0) { 1251 uint8_t optval, olen, offset; 1252 1253 optval = cp[IPOPT_OPTVAL]; 1254 1255 if (optval == IPOPT_EOL || optval == IPOPT_NOP) { 1256 olen = 1; 1257 } else { 1258 if (cnt < IPOPT_OLEN + 1) 1259 goto bad; 1260 1261 olen = cp[IPOPT_OLEN]; 1262 if (olen < IPOPT_OLEN + 1 || olen > cnt) 1263 goto bad; 1264 } 1265 1266 if (optval == IPOPT_LSRR || optval == IPOPT_SSRR) { 1267 /* 1268 * user process specifies route as: 1269 * ->A->B->C->D 1270 * D must be our final destination (but we can't 1271 * check that since we may not have connected yet). 1272 * A is first hop destination, which doesn't appear in 1273 * actual IP option, but is stored before the options. 1274 */ 1275 if (olen < IPOPT_OFFSET + 1 + sizeof(struct in_addr)) 1276 goto bad; 1277 1278 offset = cp[IPOPT_OFFSET]; 1279 memcpy(mtod(m, u_char *), cp + IPOPT_OFFSET + 1, 1280 sizeof(struct in_addr)); 1281 1282 cp += sizeof(struct in_addr); 1283 cnt -= sizeof(struct in_addr); 1284 olen -= sizeof(struct in_addr); 1285 1286 if (m->m_len + olen > MAX_IPOPTLEN + sizeof(struct in_addr)) 1287 goto bad; 1288 1289 memcpy(dp, cp, olen); 1290 dp[IPOPT_OPTVAL] = optval; 1291 dp[IPOPT_OLEN] = olen; 1292 dp[IPOPT_OFFSET] = offset; 1293 break; 1294 } else { 1295 if (m->m_len + olen > MAX_IPOPTLEN + sizeof(struct in_addr)) 1296 goto bad; 1297 1298 memcpy(dp, cp, olen); 1299 break; 1300 } 1301 1302 dp += olen; 1303 m->m_len += olen; 1304 1305 if (optval == IPOPT_EOL) 1306 break; 1307 1308 cp += olen; 1309 cnt -= olen; 1310 } 1311 1312 inp->inp_options = m; 1313 return 0; 1314 bad: 1315 (void)m_free(m); 1316 return EINVAL; 1317 } 1318 1319 /* 1320 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. 1321 */ 1322 static struct ifnet * 1323 ip_multicast_if(struct in_addr *a, int *ifindexp) 1324 { 1325 int ifindex; 1326 struct ifnet *ifp = NULL; 1327 struct in_ifaddr *ia; 1328 1329 if (ifindexp) 1330 *ifindexp = 0; 1331 if (ntohl(a->s_addr) >> 24 == 0) { 1332 ifindex = ntohl(a->s_addr) & 0xffffff; 1333 ifp = if_byindex(ifindex); 1334 if (!ifp) 1335 return NULL; 1336 if (ifindexp) 1337 *ifindexp = ifindex; 1338 } else { 1339 LIST_FOREACH(ia, &IN_IFADDR_HASH(a->s_addr), ia_hash) { 1340 if (in_hosteq(ia->ia_addr.sin_addr, *a) && 1341 (ia->ia_ifp->if_flags & IFF_MULTICAST) != 0) { 1342 ifp = ia->ia_ifp; 1343 break; 1344 } 1345 } 1346 } 1347 return ifp; 1348 } 1349 1350 static int 1351 ip_getoptval(const struct sockopt *sopt, u_int8_t *val, u_int maxval) 1352 { 1353 u_int tval; 1354 u_char cval; 1355 int error; 1356 1357 if (sopt == NULL) 1358 return EINVAL; 1359 1360 switch (sopt->sopt_size) { 1361 case sizeof(u_char): 1362 error = sockopt_get(sopt, &cval, sizeof(u_char)); 1363 tval = cval; 1364 break; 1365 1366 case sizeof(u_int): 1367 error = sockopt_get(sopt, &tval, sizeof(u_int)); 1368 break; 1369 1370 default: 1371 error = EINVAL; 1372 } 1373 1374 if (error) 1375 return error; 1376 1377 if (tval > maxval) 1378 return EINVAL; 1379 1380 *val = tval; 1381 return 0; 1382 } 1383 1384 static int 1385 ip_get_membership(const struct sockopt *sopt, struct ifnet **ifp, 1386 struct in_addr *ia, bool add) 1387 { 1388 int error; 1389 struct ip_mreq mreq; 1390 1391 error = sockopt_get(sopt, &mreq, sizeof(mreq)); 1392 if (error) 1393 return error; 1394 1395 if (!IN_MULTICAST(mreq.imr_multiaddr.s_addr)) 1396 return EINVAL; 1397 1398 memcpy(ia, &mreq.imr_multiaddr, sizeof(*ia)); 1399 1400 if (in_nullhost(mreq.imr_interface)) { 1401 union { 1402 struct sockaddr dst; 1403 struct sockaddr_in dst4; 1404 } u; 1405 struct route ro; 1406 1407 if (!add) { 1408 *ifp = NULL; 1409 return 0; 1410 } 1411 /* 1412 * If no interface address was provided, use the interface of 1413 * the route to the given multicast address. 1414 */ 1415 struct rtentry *rt; 1416 memset(&ro, 0, sizeof(ro)); 1417 1418 sockaddr_in_init(&u.dst4, ia, 0); 1419 rtcache_setdst(&ro, &u.dst); 1420 *ifp = (rt = rtcache_init(&ro)) != NULL ? rt->rt_ifp : NULL; 1421 rtcache_free(&ro); 1422 } else { 1423 *ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1424 if (!add && *ifp == NULL) 1425 return EADDRNOTAVAIL; 1426 } 1427 return 0; 1428 } 1429 1430 /* 1431 * Add a multicast group membership. 1432 * Group must be a valid IP multicast address. 1433 */ 1434 static int 1435 ip_add_membership(struct ip_moptions *imo, const struct sockopt *sopt) 1436 { 1437 struct ifnet *ifp; 1438 struct in_addr ia; 1439 int i, error; 1440 1441 if (sopt->sopt_size == sizeof(struct ip_mreq)) 1442 error = ip_get_membership(sopt, &ifp, &ia, true); 1443 else 1444 #ifdef INET6 1445 error = ip6_get_membership(sopt, &ifp, &ia, sizeof(ia)); 1446 #else 1447 return EINVAL; 1448 #endif 1449 1450 if (error) 1451 return error; 1452 1453 /* 1454 * See if we found an interface, and confirm that it 1455 * supports multicast. 1456 */ 1457 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) 1458 return EADDRNOTAVAIL; 1459 1460 /* 1461 * See if the membership already exists or if all the 1462 * membership slots are full. 1463 */ 1464 for (i = 0; i < imo->imo_num_memberships; ++i) { 1465 if (imo->imo_membership[i]->inm_ifp == ifp && 1466 in_hosteq(imo->imo_membership[i]->inm_addr, ia)) 1467 break; 1468 } 1469 if (i < imo->imo_num_memberships) 1470 return EADDRINUSE; 1471 1472 if (i == IP_MAX_MEMBERSHIPS) 1473 return ETOOMANYREFS; 1474 1475 /* 1476 * Everything looks good; add a new record to the multicast 1477 * address list for the given interface. 1478 */ 1479 if ((imo->imo_membership[i] = in_addmulti(&ia, ifp)) == NULL) 1480 return ENOBUFS; 1481 1482 ++imo->imo_num_memberships; 1483 return 0; 1484 } 1485 1486 /* 1487 * Drop a multicast group membership. 1488 * Group must be a valid IP multicast address. 1489 */ 1490 static int 1491 ip_drop_membership(struct ip_moptions *imo, const struct sockopt *sopt) 1492 { 1493 struct in_addr ia; 1494 struct ifnet *ifp; 1495 int i, error; 1496 1497 if (sopt->sopt_size == sizeof(struct ip_mreq)) 1498 error = ip_get_membership(sopt, &ifp, &ia, false); 1499 else 1500 #ifdef INET6 1501 error = ip6_get_membership(sopt, &ifp, &ia, sizeof(ia)); 1502 #else 1503 return EINVAL; 1504 #endif 1505 1506 if (error) 1507 return error; 1508 1509 /* 1510 * Find the membership in the membership array. 1511 */ 1512 for (i = 0; i < imo->imo_num_memberships; ++i) { 1513 if ((ifp == NULL || 1514 imo->imo_membership[i]->inm_ifp == ifp) && 1515 in_hosteq(imo->imo_membership[i]->inm_addr, ia)) 1516 break; 1517 } 1518 if (i == imo->imo_num_memberships) 1519 return EADDRNOTAVAIL; 1520 1521 /* 1522 * Give up the multicast address record to which the 1523 * membership points. 1524 */ 1525 in_delmulti(imo->imo_membership[i]); 1526 1527 /* 1528 * Remove the gap in the membership array. 1529 */ 1530 for (++i; i < imo->imo_num_memberships; ++i) 1531 imo->imo_membership[i-1] = imo->imo_membership[i]; 1532 --imo->imo_num_memberships; 1533 return 0; 1534 } 1535 1536 /* 1537 * Set the IP multicast options in response to user setsockopt(). 1538 */ 1539 int 1540 ip_setmoptions(struct ip_moptions **pimo, const struct sockopt *sopt) 1541 { 1542 struct ip_moptions *imo = *pimo; 1543 struct in_addr addr; 1544 struct ifnet *ifp; 1545 int ifindex, error = 0; 1546 1547 if (!imo) { 1548 /* 1549 * No multicast option buffer attached to the pcb; 1550 * allocate one and initialize to default values. 1551 */ 1552 imo = kmem_intr_alloc(sizeof(*imo), KM_NOSLEEP); 1553 if (imo == NULL) 1554 return ENOBUFS; 1555 1556 imo->imo_multicast_ifp = NULL; 1557 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1558 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1559 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1560 imo->imo_num_memberships = 0; 1561 *pimo = imo; 1562 } 1563 1564 switch (sopt->sopt_name) { 1565 case IP_MULTICAST_IF: 1566 /* 1567 * Select the interface for outgoing multicast packets. 1568 */ 1569 error = sockopt_get(sopt, &addr, sizeof(addr)); 1570 if (error) 1571 break; 1572 1573 /* 1574 * INADDR_ANY is used to remove a previous selection. 1575 * When no interface is selected, a default one is 1576 * chosen every time a multicast packet is sent. 1577 */ 1578 if (in_nullhost(addr)) { 1579 imo->imo_multicast_ifp = NULL; 1580 break; 1581 } 1582 /* 1583 * The selected interface is identified by its local 1584 * IP address. Find the interface and confirm that 1585 * it supports multicasting. 1586 */ 1587 ifp = ip_multicast_if(&addr, &ifindex); 1588 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1589 error = EADDRNOTAVAIL; 1590 break; 1591 } 1592 imo->imo_multicast_ifp = ifp; 1593 if (ifindex) 1594 imo->imo_multicast_addr = addr; 1595 else 1596 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1597 break; 1598 1599 case IP_MULTICAST_TTL: 1600 /* 1601 * Set the IP time-to-live for outgoing multicast packets. 1602 */ 1603 error = ip_getoptval(sopt, &imo->imo_multicast_ttl, MAXTTL); 1604 break; 1605 1606 case IP_MULTICAST_LOOP: 1607 /* 1608 * Set the loopback flag for outgoing multicast packets. 1609 * Must be zero or one. 1610 */ 1611 error = ip_getoptval(sopt, &imo->imo_multicast_loop, 1); 1612 break; 1613 1614 case IP_ADD_MEMBERSHIP: /* IPV6_JOIN_GROUP */ 1615 error = ip_add_membership(imo, sopt); 1616 break; 1617 1618 case IP_DROP_MEMBERSHIP: /* IPV6_LEAVE_GROUP */ 1619 error = ip_drop_membership(imo, sopt); 1620 break; 1621 1622 default: 1623 error = EOPNOTSUPP; 1624 break; 1625 } 1626 1627 /* 1628 * If all options have default values, no need to keep the mbuf. 1629 */ 1630 if (imo->imo_multicast_ifp == NULL && 1631 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 1632 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 1633 imo->imo_num_memberships == 0) { 1634 kmem_free(imo, sizeof(*imo)); 1635 *pimo = NULL; 1636 } 1637 1638 return error; 1639 } 1640 1641 /* 1642 * Return the IP multicast options in response to user getsockopt(). 1643 */ 1644 int 1645 ip_getmoptions(struct ip_moptions *imo, struct sockopt *sopt) 1646 { 1647 struct in_addr addr; 1648 struct in_ifaddr *ia; 1649 uint8_t optval; 1650 int error = 0; 1651 1652 switch (sopt->sopt_name) { 1653 case IP_MULTICAST_IF: 1654 if (imo == NULL || imo->imo_multicast_ifp == NULL) 1655 addr = zeroin_addr; 1656 else if (imo->imo_multicast_addr.s_addr) { 1657 /* return the value user has set */ 1658 addr = imo->imo_multicast_addr; 1659 } else { 1660 IFP_TO_IA(imo->imo_multicast_ifp, ia); 1661 addr = ia ? ia->ia_addr.sin_addr : zeroin_addr; 1662 } 1663 error = sockopt_set(sopt, &addr, sizeof(addr)); 1664 break; 1665 1666 case IP_MULTICAST_TTL: 1667 optval = imo ? imo->imo_multicast_ttl 1668 : IP_DEFAULT_MULTICAST_TTL; 1669 1670 error = sockopt_set(sopt, &optval, sizeof(optval)); 1671 break; 1672 1673 case IP_MULTICAST_LOOP: 1674 optval = imo ? imo->imo_multicast_loop 1675 : IP_DEFAULT_MULTICAST_LOOP; 1676 1677 error = sockopt_set(sopt, &optval, sizeof(optval)); 1678 break; 1679 1680 default: 1681 error = EOPNOTSUPP; 1682 } 1683 1684 return error; 1685 } 1686 1687 /* 1688 * Discard the IP multicast options. 1689 */ 1690 void 1691 ip_freemoptions(struct ip_moptions *imo) 1692 { 1693 int i; 1694 1695 if (imo != NULL) { 1696 for (i = 0; i < imo->imo_num_memberships; ++i) 1697 in_delmulti(imo->imo_membership[i]); 1698 kmem_free(imo, sizeof(*imo)); 1699 } 1700 } 1701 1702 /* 1703 * Routine called from ip_output() to loop back a copy of an IP multicast 1704 * packet to the input queue of a specified interface. Note that this 1705 * calls the output routine of the loopback "driver", but with an interface 1706 * pointer that might NOT be lo0ifp -- easier than replicating that code here. 1707 */ 1708 static void 1709 ip_mloopback(struct ifnet *ifp, struct mbuf *m, const struct sockaddr_in *dst) 1710 { 1711 struct ip *ip; 1712 struct mbuf *copym; 1713 1714 copym = m_copypacket(m, M_DONTWAIT); 1715 if (copym != NULL 1716 && (copym->m_flags & M_EXT || copym->m_len < sizeof(struct ip))) 1717 copym = m_pullup(copym, sizeof(struct ip)); 1718 if (copym == NULL) 1719 return; 1720 /* 1721 * We don't bother to fragment if the IP length is greater 1722 * than the interface's MTU. Can this possibly matter? 1723 */ 1724 ip = mtod(copym, struct ip *); 1725 1726 if (copym->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { 1727 in_delayed_cksum(copym); 1728 copym->m_pkthdr.csum_flags &= 1729 ~(M_CSUM_TCPv4|M_CSUM_UDPv4); 1730 } 1731 1732 ip->ip_sum = 0; 1733 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2); 1734 #ifndef NET_MPSAFE 1735 KERNEL_LOCK(1, NULL); 1736 #endif 1737 (void)looutput(ifp, copym, sintocsa(dst), NULL); 1738 #ifndef NET_MPSAFE 1739 KERNEL_UNLOCK_ONE(NULL); 1740 #endif 1741 } 1742