1 /* $NetBSD: ip_output.c,v 1.229 2014/05/30 01:39:03 christos Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1998 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Public Access Networks Corporation ("Panix"). It was developed under 38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 59 * POSSIBILITY OF SUCH DAMAGE. 60 */ 61 62 /* 63 * Copyright (c) 1982, 1986, 1988, 1990, 1993 64 * The Regents of the University of California. All rights reserved. 65 * 66 * Redistribution and use in source and binary forms, with or without 67 * modification, are permitted provided that the following conditions 68 * are met: 69 * 1. Redistributions of source code must retain the above copyright 70 * notice, this list of conditions and the following disclaimer. 71 * 2. Redistributions in binary form must reproduce the above copyright 72 * notice, this list of conditions and the following disclaimer in the 73 * documentation and/or other materials provided with the distribution. 74 * 3. Neither the name of the University nor the names of its contributors 75 * may be used to endorse or promote products derived from this software 76 * without specific prior written permission. 77 * 78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 88 * SUCH DAMAGE. 89 * 90 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 91 */ 92 93 #include <sys/cdefs.h> 94 __KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.229 2014/05/30 01:39:03 christos Exp $"); 95 96 #include "opt_inet.h" 97 #include "opt_ipsec.h" 98 #include "opt_mrouting.h" 99 100 #include <sys/param.h> 101 #include <sys/kmem.h> 102 #include <sys/mbuf.h> 103 #include <sys/protosw.h> 104 #include <sys/socket.h> 105 #include <sys/socketvar.h> 106 #include <sys/kauth.h> 107 #ifdef IPSEC 108 #include <sys/domain.h> 109 #endif 110 #include <sys/systm.h> 111 112 #include <net/if.h> 113 #include <net/route.h> 114 #include <net/pfil.h> 115 116 #include <netinet/in.h> 117 #include <netinet/in_systm.h> 118 #include <netinet/ip.h> 119 #include <netinet/in_pcb.h> 120 #include <netinet/in_var.h> 121 #include <netinet/ip_var.h> 122 #include <netinet/ip_private.h> 123 #include <netinet/in_offload.h> 124 #include <netinet/portalgo.h> 125 #include <netinet/udp.h> 126 127 #ifdef MROUTING 128 #include <netinet/ip_mroute.h> 129 #endif 130 131 #include <netipsec/ipsec.h> 132 #include <netipsec/key.h> 133 134 static int ip_pcbopts(struct inpcb *, const struct sockopt *); 135 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); 136 static struct ifnet *ip_multicast_if(struct in_addr *, int *); 137 static void ip_mloopback(struct ifnet *, struct mbuf *, 138 const struct sockaddr_in *); 139 static int ip_setmoptions(struct inpcb *, const struct sockopt *); 140 static int ip_getmoptions(struct inpcb *, struct sockopt *); 141 142 extern pfil_head_t *inet_pfil_hook; /* XXX */ 143 144 int ip_do_loopback_cksum = 0; 145 146 /* 147 * IP output. The packet in mbuf chain m contains a skeletal IP 148 * header (with len, off, ttl, proto, tos, src, dst). 149 * The mbuf chain containing the packet will be freed. 150 * The mbuf opt, if present, will not be freed. 151 */ 152 int 153 ip_output(struct mbuf *m0, ...) 154 { 155 struct rtentry *rt; 156 struct ip *ip; 157 struct ifnet *ifp; 158 struct mbuf *m = m0; 159 int hlen = sizeof (struct ip); 160 int len, error = 0; 161 struct route iproute; 162 const struct sockaddr_in *dst; 163 struct in_ifaddr *ia; 164 struct ifaddr *xifa; 165 struct mbuf *opt; 166 struct route *ro; 167 int flags, sw_csum; 168 u_long mtu; 169 struct ip_moptions *imo; 170 struct socket *so; 171 va_list ap; 172 #ifdef IPSEC 173 struct secpolicy *sp = NULL; 174 #endif 175 bool natt_frag = false; 176 bool __unused done = false; 177 union { 178 struct sockaddr dst; 179 struct sockaddr_in dst4; 180 } u; 181 struct sockaddr *rdst = &u.dst; /* real IP destination, as opposed 182 * to the nexthop 183 */ 184 185 len = 0; 186 va_start(ap, m0); 187 opt = va_arg(ap, struct mbuf *); 188 ro = va_arg(ap, struct route *); 189 flags = va_arg(ap, int); 190 imo = va_arg(ap, struct ip_moptions *); 191 so = va_arg(ap, struct socket *); 192 va_end(ap); 193 194 MCLAIM(m, &ip_tx_mowner); 195 196 KASSERT((m->m_flags & M_PKTHDR) != 0); 197 KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) == 0); 198 KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) != 199 (M_CSUM_TCPv4|M_CSUM_UDPv4)); 200 201 if (opt) { 202 m = ip_insertoptions(m, opt, &len); 203 if (len >= sizeof(struct ip)) 204 hlen = len; 205 } 206 ip = mtod(m, struct ip *); 207 208 /* 209 * Fill in IP header. 210 */ 211 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 212 ip->ip_v = IPVERSION; 213 ip->ip_off = htons(0); 214 /* ip->ip_id filled in after we find out source ia */ 215 ip->ip_hl = hlen >> 2; 216 IP_STATINC(IP_STAT_LOCALOUT); 217 } else { 218 hlen = ip->ip_hl << 2; 219 } 220 221 /* 222 * Route packet. 223 */ 224 memset(&iproute, 0, sizeof(iproute)); 225 if (ro == NULL) 226 ro = &iproute; 227 sockaddr_in_init(&u.dst4, &ip->ip_dst, 0); 228 dst = satocsin(rtcache_getdst(ro)); 229 230 /* 231 * If there is a cached route, check that it is to the same 232 * destination and is still up. If not, free it and try again. 233 * The address family should also be checked in case of sharing 234 * the cache with IPv6. 235 */ 236 if (dst && (dst->sin_family != AF_INET || 237 !in_hosteq(dst->sin_addr, ip->ip_dst))) 238 rtcache_free(ro); 239 240 if ((rt = rtcache_validate(ro)) == NULL && 241 (rt = rtcache_update(ro, 1)) == NULL) { 242 dst = &u.dst4; 243 rtcache_setdst(ro, &u.dst); 244 } 245 246 /* 247 * If routing to interface only, short circuit routing lookup. 248 */ 249 if (flags & IP_ROUTETOIF) { 250 if ((ia = ifatoia(ifa_ifwithladdr(sintocsa(dst)))) == NULL) { 251 IP_STATINC(IP_STAT_NOROUTE); 252 error = ENETUNREACH; 253 goto bad; 254 } 255 ifp = ia->ia_ifp; 256 mtu = ifp->if_mtu; 257 ip->ip_ttl = 1; 258 } else if ((IN_MULTICAST(ip->ip_dst.s_addr) || 259 ip->ip_dst.s_addr == INADDR_BROADCAST) && 260 imo != NULL && imo->imo_multicast_ifp != NULL) { 261 ifp = imo->imo_multicast_ifp; 262 mtu = ifp->if_mtu; 263 IFP_TO_IA(ifp, ia); 264 } else { 265 if (rt == NULL) 266 rt = rtcache_init(ro); 267 if (rt == NULL) { 268 IP_STATINC(IP_STAT_NOROUTE); 269 error = EHOSTUNREACH; 270 goto bad; 271 } 272 ia = ifatoia(rt->rt_ifa); 273 ifp = rt->rt_ifp; 274 if ((mtu = rt->rt_rmx.rmx_mtu) == 0) 275 mtu = ifp->if_mtu; 276 rt->rt_use++; 277 if (rt->rt_flags & RTF_GATEWAY) 278 dst = satosin(rt->rt_gateway); 279 } 280 281 if (IN_MULTICAST(ip->ip_dst.s_addr) || 282 (ip->ip_dst.s_addr == INADDR_BROADCAST)) { 283 bool inmgroup; 284 285 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ? 286 M_BCAST : M_MCAST; 287 /* 288 * See if the caller provided any multicast options 289 */ 290 if (imo != NULL) 291 ip->ip_ttl = imo->imo_multicast_ttl; 292 else 293 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 294 295 /* 296 * if we don't know the outgoing ifp yet, we can't generate 297 * output 298 */ 299 if (!ifp) { 300 IP_STATINC(IP_STAT_NOROUTE); 301 error = ENETUNREACH; 302 goto bad; 303 } 304 305 /* 306 * If the packet is multicast or broadcast, confirm that 307 * the outgoing interface can transmit it. 308 */ 309 if (((m->m_flags & M_MCAST) && 310 (ifp->if_flags & IFF_MULTICAST) == 0) || 311 ((m->m_flags & M_BCAST) && 312 (ifp->if_flags & (IFF_BROADCAST|IFF_POINTOPOINT)) == 0)) { 313 IP_STATINC(IP_STAT_NOROUTE); 314 error = ENETUNREACH; 315 goto bad; 316 } 317 /* 318 * If source address not specified yet, use an address 319 * of outgoing interface. 320 */ 321 if (in_nullhost(ip->ip_src)) { 322 struct in_ifaddr *xia; 323 324 IFP_TO_IA(ifp, xia); 325 if (!xia) { 326 error = EADDRNOTAVAIL; 327 goto bad; 328 } 329 xifa = &xia->ia_ifa; 330 if (xifa->ifa_getifa != NULL) { 331 xia = ifatoia((*xifa->ifa_getifa)(xifa, rdst)); 332 } 333 ip->ip_src = xia->ia_addr.sin_addr; 334 } 335 336 inmgroup = in_multi_group(ip->ip_dst, ifp, flags); 337 if (inmgroup && (imo == NULL || imo->imo_multicast_loop)) { 338 /* 339 * If we belong to the destination multicast group 340 * on the outgoing interface, and the caller did not 341 * forbid loopback, loop back a copy. 342 */ 343 ip_mloopback(ifp, m, &u.dst4); 344 } 345 #ifdef MROUTING 346 else { 347 /* 348 * If we are acting as a multicast router, perform 349 * multicast forwarding as if the packet had just 350 * arrived on the interface to which we are about 351 * to send. The multicast forwarding function 352 * recursively calls this function, using the 353 * IP_FORWARDING flag to prevent infinite recursion. 354 * 355 * Multicasts that are looped back by ip_mloopback(), 356 * above, will be forwarded by the ip_input() routine, 357 * if necessary. 358 */ 359 extern struct socket *ip_mrouter; 360 361 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 362 if (ip_mforward(m, ifp) != 0) { 363 m_freem(m); 364 goto done; 365 } 366 } 367 } 368 #endif 369 /* 370 * Multicasts with a time-to-live of zero may be looped- 371 * back, above, but must not be transmitted on a network. 372 * Also, multicasts addressed to the loopback interface 373 * are not sent -- the above call to ip_mloopback() will 374 * loop back a copy if this host actually belongs to the 375 * destination group on the loopback interface. 376 */ 377 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) { 378 m_freem(m); 379 goto done; 380 } 381 382 goto sendit; 383 } 384 /* 385 * If source address not specified yet, use address 386 * of outgoing interface. 387 */ 388 if (in_nullhost(ip->ip_src)) { 389 xifa = &ia->ia_ifa; 390 if (xifa->ifa_getifa != NULL) 391 ia = ifatoia((*xifa->ifa_getifa)(xifa, rdst)); 392 ip->ip_src = ia->ia_addr.sin_addr; 393 } 394 395 /* 396 * packets with Class-D address as source are not valid per 397 * RFC 1112 398 */ 399 if (IN_MULTICAST(ip->ip_src.s_addr)) { 400 IP_STATINC(IP_STAT_ODROPPED); 401 error = EADDRNOTAVAIL; 402 goto bad; 403 } 404 405 /* 406 * Look for broadcast address and 407 * and verify user is allowed to send 408 * such a packet. 409 */ 410 if (in_broadcast(dst->sin_addr, ifp)) { 411 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 412 error = EADDRNOTAVAIL; 413 goto bad; 414 } 415 if ((flags & IP_ALLOWBROADCAST) == 0) { 416 error = EACCES; 417 goto bad; 418 } 419 /* don't allow broadcast messages to be fragmented */ 420 if (ntohs(ip->ip_len) > ifp->if_mtu) { 421 error = EMSGSIZE; 422 goto bad; 423 } 424 m->m_flags |= M_BCAST; 425 } else 426 m->m_flags &= ~M_BCAST; 427 428 sendit: 429 if ((flags & (IP_FORWARDING|IP_NOIPNEWID)) == 0) { 430 if (m->m_pkthdr.len < IP_MINFRAGSIZE) { 431 ip->ip_id = 0; 432 } else if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) { 433 ip->ip_id = ip_newid(ia); 434 } else { 435 436 /* 437 * TSO capable interfaces (typically?) increment 438 * ip_id for each segment. 439 * "allocate" enough ids here to increase the chance 440 * for them to be unique. 441 * 442 * note that the following calculation is not 443 * needed to be precise. wasting some ip_id is fine. 444 */ 445 446 unsigned int segsz = m->m_pkthdr.segsz; 447 unsigned int datasz = ntohs(ip->ip_len) - hlen; 448 unsigned int num = howmany(datasz, segsz); 449 450 ip->ip_id = ip_newid_range(ia, num); 451 } 452 } 453 /* 454 * If we're doing Path MTU Discovery, we need to set DF unless 455 * the route's MTU is locked. 456 */ 457 if ((flags & IP_MTUDISC) != 0 && rt != NULL && 458 (rt->rt_rmx.rmx_locks & RTV_MTU) == 0) 459 ip->ip_off |= htons(IP_DF); 460 461 #ifdef IPSEC 462 if (ipsec_used) { 463 /* Perform IPsec processing, if any. */ 464 error = ipsec4_output(m, so, flags, &sp, &mtu, &natt_frag, 465 &done); 466 if (error || done) 467 goto done; 468 } 469 #endif 470 471 /* 472 * Run through list of hooks for output packets. 473 */ 474 if ((error = pfil_run_hooks(inet_pfil_hook, &m, ifp, PFIL_OUT)) != 0) 475 goto done; 476 if (m == NULL) 477 goto done; 478 479 ip = mtod(m, struct ip *); 480 hlen = ip->ip_hl << 2; 481 482 m->m_pkthdr.csum_data |= hlen << 16; 483 484 #if IFA_STATS 485 /* 486 * search for the source address structure to 487 * maintain output statistics. 488 */ 489 INADDR_TO_IA(ip->ip_src, ia); 490 #endif 491 492 /* Maybe skip checksums on loopback interfaces. */ 493 if (IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) { 494 m->m_pkthdr.csum_flags |= M_CSUM_IPv4; 495 } 496 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx; 497 /* 498 * If small enough for mtu of path, or if using TCP segmentation 499 * offload, can just send directly. 500 */ 501 if (ntohs(ip->ip_len) <= mtu || 502 (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) != 0) { 503 #if IFA_STATS 504 if (ia) 505 ia->ia_ifa.ifa_data.ifad_outbytes += ntohs(ip->ip_len); 506 #endif 507 /* 508 * Always initialize the sum to 0! Some HW assisted 509 * checksumming requires this. 510 */ 511 ip->ip_sum = 0; 512 513 if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) { 514 /* 515 * Perform any checksums that the hardware can't do 516 * for us. 517 * 518 * XXX Does any hardware require the {th,uh}_sum 519 * XXX fields to be 0? 520 */ 521 if (sw_csum & M_CSUM_IPv4) { 522 KASSERT(IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)); 523 ip->ip_sum = in_cksum(m, hlen); 524 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4; 525 } 526 if (sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { 527 if (IN_NEED_CHECKSUM(ifp, 528 sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4))) { 529 in_delayed_cksum(m); 530 } 531 m->m_pkthdr.csum_flags &= 532 ~(M_CSUM_TCPv4|M_CSUM_UDPv4); 533 } 534 } 535 536 if (__predict_true( 537 (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0 || 538 (ifp->if_capenable & IFCAP_TSOv4) != 0)) { 539 KERNEL_LOCK(1, NULL); 540 error = 541 (*ifp->if_output)(ifp, m, 542 (m->m_flags & M_MCAST) ? 543 sintocsa(rdst) : sintocsa(dst), 544 rt); 545 KERNEL_UNLOCK_ONE(NULL); 546 } else { 547 error = 548 ip_tso_output(ifp, m, 549 (m->m_flags & M_MCAST) ? 550 sintocsa(rdst) : sintocsa(dst), 551 rt); 552 } 553 goto done; 554 } 555 556 /* 557 * We can't use HW checksumming if we're about to 558 * to fragment the packet. 559 * 560 * XXX Some hardware can do this. 561 */ 562 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { 563 if (IN_NEED_CHECKSUM(ifp, 564 m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4))) { 565 in_delayed_cksum(m); 566 } 567 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4); 568 } 569 570 /* 571 * Too large for interface; fragment if possible. 572 * Must be able to put at least 8 bytes per fragment. 573 */ 574 if (ntohs(ip->ip_off) & IP_DF) { 575 if (flags & IP_RETURNMTU) { 576 struct inpcb *inp; 577 578 KASSERT(so && solocked(so)); 579 inp = sotoinpcb(so); 580 inp->inp_errormtu = mtu; 581 } 582 error = EMSGSIZE; 583 IP_STATINC(IP_STAT_CANTFRAG); 584 goto bad; 585 } 586 587 error = ip_fragment(m, ifp, mtu); 588 if (error) { 589 m = NULL; 590 goto bad; 591 } 592 593 for (; m; m = m0) { 594 m0 = m->m_nextpkt; 595 m->m_nextpkt = 0; 596 if (error == 0) { 597 #if IFA_STATS 598 if (ia) 599 ia->ia_ifa.ifa_data.ifad_outbytes += 600 ntohs(ip->ip_len); 601 #endif 602 /* 603 * If we get there, the packet has not been handled by 604 * IPsec whereas it should have. Now that it has been 605 * fragmented, re-inject it in ip_output so that IPsec 606 * processing can occur. 607 */ 608 if (natt_frag) { 609 error = ip_output(m, opt, ro, 610 flags | IP_RAWOUTPUT | IP_NOIPNEWID, 611 imo, so); 612 } else { 613 KASSERT((m->m_pkthdr.csum_flags & 614 (M_CSUM_UDPv4 | M_CSUM_TCPv4)) == 0); 615 KERNEL_LOCK(1, NULL); 616 error = (*ifp->if_output)(ifp, m, 617 (m->m_flags & M_MCAST) ? 618 sintocsa(rdst) : sintocsa(dst), rt); 619 KERNEL_UNLOCK_ONE(NULL); 620 } 621 } else 622 m_freem(m); 623 } 624 625 if (error == 0) 626 IP_STATINC(IP_STAT_FRAGMENTED); 627 done: 628 rtcache_free(&iproute); 629 #ifdef IPSEC 630 if (sp) { 631 KEY_FREESP(&sp); 632 } 633 #endif 634 return error; 635 bad: 636 m_freem(m); 637 goto done; 638 } 639 640 int 641 ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu) 642 { 643 struct ip *ip, *mhip; 644 struct mbuf *m0; 645 int len, hlen, off; 646 int mhlen, firstlen; 647 struct mbuf **mnext; 648 int sw_csum = m->m_pkthdr.csum_flags; 649 int fragments = 0; 650 int s; 651 int error = 0; 652 653 ip = mtod(m, struct ip *); 654 hlen = ip->ip_hl << 2; 655 if (ifp != NULL) 656 sw_csum &= ~ifp->if_csum_flags_tx; 657 658 len = (mtu - hlen) &~ 7; 659 if (len < 8) { 660 m_freem(m); 661 return (EMSGSIZE); 662 } 663 664 firstlen = len; 665 mnext = &m->m_nextpkt; 666 667 /* 668 * Loop through length of segment after first fragment, 669 * make new header and copy data of each part and link onto chain. 670 */ 671 m0 = m; 672 mhlen = sizeof (struct ip); 673 for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { 674 MGETHDR(m, M_DONTWAIT, MT_HEADER); 675 if (m == 0) { 676 error = ENOBUFS; 677 IP_STATINC(IP_STAT_ODROPPED); 678 goto sendorfree; 679 } 680 MCLAIM(m, m0->m_owner); 681 *mnext = m; 682 mnext = &m->m_nextpkt; 683 m->m_data += max_linkhdr; 684 mhip = mtod(m, struct ip *); 685 *mhip = *ip; 686 /* we must inherit MCAST and BCAST flags */ 687 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST); 688 if (hlen > sizeof (struct ip)) { 689 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 690 mhip->ip_hl = mhlen >> 2; 691 } 692 m->m_len = mhlen; 693 mhip->ip_off = ((off - hlen) >> 3) + 694 (ntohs(ip->ip_off) & ~IP_MF); 695 if (ip->ip_off & htons(IP_MF)) 696 mhip->ip_off |= IP_MF; 697 if (off + len >= ntohs(ip->ip_len)) 698 len = ntohs(ip->ip_len) - off; 699 else 700 mhip->ip_off |= IP_MF; 701 HTONS(mhip->ip_off); 702 mhip->ip_len = htons((u_int16_t)(len + mhlen)); 703 m->m_next = m_copym(m0, off, len, M_DONTWAIT); 704 if (m->m_next == 0) { 705 error = ENOBUFS; /* ??? */ 706 IP_STATINC(IP_STAT_ODROPPED); 707 goto sendorfree; 708 } 709 m->m_pkthdr.len = mhlen + len; 710 m->m_pkthdr.rcvif = NULL; 711 mhip->ip_sum = 0; 712 KASSERT((m->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0); 713 if (sw_csum & M_CSUM_IPv4) { 714 mhip->ip_sum = in_cksum(m, mhlen); 715 } else { 716 /* 717 * checksum is hw-offloaded or not necessary. 718 */ 719 m->m_pkthdr.csum_flags |= 720 m0->m_pkthdr.csum_flags & M_CSUM_IPv4; 721 m->m_pkthdr.csum_data |= mhlen << 16; 722 KASSERT(!(ifp != NULL && 723 IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) 724 || (m->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0); 725 } 726 IP_STATINC(IP_STAT_OFRAGMENTS); 727 fragments++; 728 } 729 /* 730 * Update first fragment by trimming what's been copied out 731 * and updating header, then send each fragment (in order). 732 */ 733 m = m0; 734 m_adj(m, hlen + firstlen - ntohs(ip->ip_len)); 735 m->m_pkthdr.len = hlen + firstlen; 736 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len); 737 ip->ip_off |= htons(IP_MF); 738 ip->ip_sum = 0; 739 if (sw_csum & M_CSUM_IPv4) { 740 ip->ip_sum = in_cksum(m, hlen); 741 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4; 742 } else { 743 /* 744 * checksum is hw-offloaded or not necessary. 745 */ 746 KASSERT(!(ifp != NULL && IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) 747 || (m->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0); 748 KASSERT(M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data) >= 749 sizeof(struct ip)); 750 } 751 sendorfree: 752 /* 753 * If there is no room for all the fragments, don't queue 754 * any of them. 755 */ 756 if (ifp != NULL) { 757 s = splnet(); 758 if (ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len < fragments && 759 error == 0) { 760 error = ENOBUFS; 761 IP_STATINC(IP_STAT_ODROPPED); 762 IFQ_INC_DROPS(&ifp->if_snd); 763 } 764 splx(s); 765 } 766 if (error) { 767 for (m = m0; m; m = m0) { 768 m0 = m->m_nextpkt; 769 m->m_nextpkt = NULL; 770 m_freem(m); 771 } 772 } 773 return (error); 774 } 775 776 /* 777 * Process a delayed payload checksum calculation. 778 */ 779 void 780 in_delayed_cksum(struct mbuf *m) 781 { 782 struct ip *ip; 783 u_int16_t csum, offset; 784 785 ip = mtod(m, struct ip *); 786 offset = ip->ip_hl << 2; 787 csum = in4_cksum(m, 0, offset, ntohs(ip->ip_len) - offset); 788 if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv4) != 0) 789 csum = 0xffff; 790 791 offset += M_CSUM_DATA_IPv4_OFFSET(m->m_pkthdr.csum_data); 792 793 if ((offset + sizeof(u_int16_t)) > m->m_len) { 794 /* This happen when ip options were inserted 795 printf("in_delayed_cksum: pullup len %d off %d proto %d\n", 796 m->m_len, offset, ip->ip_p); 797 */ 798 m_copyback(m, offset, sizeof(csum), (void *) &csum); 799 } else 800 *(u_int16_t *)(mtod(m, char *) + offset) = csum; 801 } 802 803 /* 804 * Determine the maximum length of the options to be inserted; 805 * we would far rather allocate too much space rather than too little. 806 */ 807 808 u_int 809 ip_optlen(struct inpcb *inp) 810 { 811 struct mbuf *m = inp->inp_options; 812 813 if (m && m->m_len > offsetof(struct ipoption, ipopt_dst)) { 814 return (m->m_len - offsetof(struct ipoption, ipopt_dst)); 815 } 816 return 0; 817 } 818 819 /* 820 * Insert IP options into preformed packet. 821 * Adjust IP destination as required for IP source routing, 822 * as indicated by a non-zero in_addr at the start of the options. 823 */ 824 static struct mbuf * 825 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) 826 { 827 struct ipoption *p = mtod(opt, struct ipoption *); 828 struct mbuf *n; 829 struct ip *ip = mtod(m, struct ip *); 830 unsigned optlen; 831 832 optlen = opt->m_len - sizeof(p->ipopt_dst); 833 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) 834 return (m); /* XXX should fail */ 835 if (!in_nullhost(p->ipopt_dst)) 836 ip->ip_dst = p->ipopt_dst; 837 if (M_READONLY(m) || M_LEADINGSPACE(m) < optlen) { 838 MGETHDR(n, M_DONTWAIT, MT_HEADER); 839 if (n == 0) 840 return (m); 841 MCLAIM(n, m->m_owner); 842 M_MOVE_PKTHDR(n, m); 843 m->m_len -= sizeof(struct ip); 844 m->m_data += sizeof(struct ip); 845 n->m_next = m; 846 m = n; 847 m->m_len = optlen + sizeof(struct ip); 848 m->m_data += max_linkhdr; 849 bcopy((void *)ip, mtod(m, void *), sizeof(struct ip)); 850 } else { 851 m->m_data -= optlen; 852 m->m_len += optlen; 853 memmove(mtod(m, void *), ip, sizeof(struct ip)); 854 } 855 m->m_pkthdr.len += optlen; 856 ip = mtod(m, struct ip *); 857 bcopy((void *)p->ipopt_list, (void *)(ip + 1), (unsigned)optlen); 858 *phlen = sizeof(struct ip) + optlen; 859 ip->ip_len = htons(ntohs(ip->ip_len) + optlen); 860 return (m); 861 } 862 863 /* 864 * Copy options from ip to jp, 865 * omitting those not copied during fragmentation. 866 */ 867 int 868 ip_optcopy(struct ip *ip, struct ip *jp) 869 { 870 u_char *cp, *dp; 871 int opt, optlen, cnt; 872 873 cp = (u_char *)(ip + 1); 874 dp = (u_char *)(jp + 1); 875 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 876 for (; cnt > 0; cnt -= optlen, cp += optlen) { 877 opt = cp[0]; 878 if (opt == IPOPT_EOL) 879 break; 880 if (opt == IPOPT_NOP) { 881 /* Preserve for IP mcast tunnel's LSRR alignment. */ 882 *dp++ = IPOPT_NOP; 883 optlen = 1; 884 continue; 885 } 886 887 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp)); 888 optlen = cp[IPOPT_OLEN]; 889 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen < cnt); 890 891 /* Invalid lengths should have been caught by ip_dooptions. */ 892 if (optlen > cnt) 893 optlen = cnt; 894 if (IPOPT_COPIED(opt)) { 895 bcopy((void *)cp, (void *)dp, (unsigned)optlen); 896 dp += optlen; 897 } 898 } 899 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 900 *dp++ = IPOPT_EOL; 901 return (optlen); 902 } 903 904 /* 905 * IP socket option processing. 906 */ 907 int 908 ip_ctloutput(int op, struct socket *so, struct sockopt *sopt) 909 { 910 struct inpcb *inp = sotoinpcb(so); 911 struct ip *ip = &inp->inp_ip; 912 int inpflags = inp->inp_flags; 913 int optval = 0, error = 0; 914 915 if (sopt->sopt_level != IPPROTO_IP) { 916 if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_NOHEADER) 917 return 0; 918 return ENOPROTOOPT; 919 } 920 921 switch (op) { 922 case PRCO_SETOPT: 923 switch (sopt->sopt_name) { 924 case IP_OPTIONS: 925 #ifdef notyet 926 case IP_RETOPTS: 927 #endif 928 error = ip_pcbopts(inp, sopt); 929 break; 930 931 case IP_TOS: 932 case IP_TTL: 933 case IP_MINTTL: 934 case IP_PKTINFO: 935 case IP_RECVOPTS: 936 case IP_RECVRETOPTS: 937 case IP_RECVDSTADDR: 938 case IP_RECVIF: 939 case IP_RECVPKTINFO: 940 case IP_RECVTTL: 941 error = sockopt_getint(sopt, &optval); 942 if (error) 943 break; 944 945 switch (sopt->sopt_name) { 946 case IP_TOS: 947 ip->ip_tos = optval; 948 break; 949 950 case IP_TTL: 951 ip->ip_ttl = optval; 952 break; 953 954 case IP_MINTTL: 955 if (optval > 0 && optval <= MAXTTL) 956 inp->inp_ip_minttl = optval; 957 else 958 error = EINVAL; 959 break; 960 #define OPTSET(bit) \ 961 if (optval) \ 962 inpflags |= bit; \ 963 else \ 964 inpflags &= ~bit; 965 966 case IP_PKTINFO: 967 OPTSET(INP_PKTINFO); 968 break; 969 970 case IP_RECVOPTS: 971 OPTSET(INP_RECVOPTS); 972 break; 973 974 case IP_RECVPKTINFO: 975 OPTSET(INP_RECVPKTINFO); 976 break; 977 978 case IP_RECVRETOPTS: 979 OPTSET(INP_RECVRETOPTS); 980 break; 981 982 case IP_RECVDSTADDR: 983 OPTSET(INP_RECVDSTADDR); 984 break; 985 986 case IP_RECVIF: 987 OPTSET(INP_RECVIF); 988 break; 989 990 case IP_RECVTTL: 991 OPTSET(INP_RECVTTL); 992 break; 993 } 994 break; 995 #undef OPTSET 996 997 case IP_MULTICAST_IF: 998 case IP_MULTICAST_TTL: 999 case IP_MULTICAST_LOOP: 1000 case IP_ADD_MEMBERSHIP: 1001 case IP_DROP_MEMBERSHIP: 1002 error = ip_setmoptions(inp, sopt); 1003 break; 1004 1005 case IP_PORTRANGE: 1006 error = sockopt_getint(sopt, &optval); 1007 if (error) 1008 break; 1009 1010 switch (optval) { 1011 case IP_PORTRANGE_DEFAULT: 1012 case IP_PORTRANGE_HIGH: 1013 inpflags &= ~(INP_LOWPORT); 1014 break; 1015 1016 case IP_PORTRANGE_LOW: 1017 inpflags |= INP_LOWPORT; 1018 break; 1019 1020 default: 1021 error = EINVAL; 1022 break; 1023 } 1024 break; 1025 1026 case IP_PORTALGO: 1027 error = sockopt_getint(sopt, &optval); 1028 if (error) 1029 break; 1030 1031 error = portalgo_algo_index_select( 1032 (struct inpcb_hdr *)inp, optval); 1033 break; 1034 1035 #if defined(IPSEC) 1036 case IP_IPSEC_POLICY: 1037 if (ipsec_enabled) { 1038 error = ipsec4_set_policy(inp, sopt->sopt_name, 1039 sopt->sopt_data, sopt->sopt_size, 1040 curlwp->l_cred); 1041 break; 1042 } 1043 /*FALLTHROUGH*/ 1044 #endif /* IPSEC */ 1045 1046 default: 1047 error = ENOPROTOOPT; 1048 break; 1049 } 1050 break; 1051 1052 case PRCO_GETOPT: 1053 switch (sopt->sopt_name) { 1054 case IP_OPTIONS: 1055 case IP_RETOPTS: { 1056 struct mbuf *mopts = inp->inp_options; 1057 1058 if (mopts) { 1059 struct mbuf *m; 1060 1061 m = m_copym(mopts, 0, M_COPYALL, M_DONTWAIT); 1062 if (m == NULL) { 1063 error = ENOBUFS; 1064 break; 1065 } 1066 error = sockopt_setmbuf(sopt, m); 1067 } 1068 break; 1069 } 1070 case IP_PKTINFO: 1071 case IP_TOS: 1072 case IP_TTL: 1073 case IP_MINTTL: 1074 case IP_RECVOPTS: 1075 case IP_RECVRETOPTS: 1076 case IP_RECVDSTADDR: 1077 case IP_RECVIF: 1078 case IP_RECVPKTINFO: 1079 case IP_RECVTTL: 1080 case IP_ERRORMTU: 1081 switch (sopt->sopt_name) { 1082 case IP_TOS: 1083 optval = ip->ip_tos; 1084 break; 1085 1086 case IP_TTL: 1087 optval = ip->ip_ttl; 1088 break; 1089 1090 case IP_MINTTL: 1091 optval = inp->inp_ip_minttl; 1092 break; 1093 1094 case IP_ERRORMTU: 1095 optval = inp->inp_errormtu; 1096 break; 1097 1098 #define OPTBIT(bit) (inpflags & bit ? 1 : 0) 1099 1100 case IP_PKTINFO: 1101 optval = OPTBIT(INP_PKTINFO); 1102 break; 1103 1104 case IP_RECVOPTS: 1105 optval = OPTBIT(INP_RECVOPTS); 1106 break; 1107 1108 case IP_RECVPKTINFO: 1109 optval = OPTBIT(INP_RECVPKTINFO); 1110 break; 1111 1112 case IP_RECVRETOPTS: 1113 optval = OPTBIT(INP_RECVRETOPTS); 1114 break; 1115 1116 case IP_RECVDSTADDR: 1117 optval = OPTBIT(INP_RECVDSTADDR); 1118 break; 1119 1120 case IP_RECVIF: 1121 optval = OPTBIT(INP_RECVIF); 1122 break; 1123 1124 case IP_RECVTTL: 1125 optval = OPTBIT(INP_RECVTTL); 1126 break; 1127 } 1128 error = sockopt_setint(sopt, optval); 1129 break; 1130 1131 #if 0 /* defined(IPSEC) */ 1132 case IP_IPSEC_POLICY: 1133 { 1134 struct mbuf *m = NULL; 1135 1136 /* XXX this will return EINVAL as sopt is empty */ 1137 error = ipsec4_get_policy(inp, sopt->sopt_data, 1138 sopt->sopt_size, &m); 1139 if (error == 0) 1140 error = sockopt_setmbuf(sopt, m); 1141 break; 1142 } 1143 #endif /*IPSEC*/ 1144 1145 case IP_MULTICAST_IF: 1146 case IP_MULTICAST_TTL: 1147 case IP_MULTICAST_LOOP: 1148 case IP_ADD_MEMBERSHIP: 1149 case IP_DROP_MEMBERSHIP: 1150 error = ip_getmoptions(inp, sopt); 1151 break; 1152 1153 case IP_PORTRANGE: 1154 if (inpflags & INP_LOWPORT) 1155 optval = IP_PORTRANGE_LOW; 1156 else 1157 optval = IP_PORTRANGE_DEFAULT; 1158 error = sockopt_setint(sopt, optval); 1159 break; 1160 1161 case IP_PORTALGO: 1162 optval = inp->inp_portalgo; 1163 error = sockopt_setint(sopt, optval); 1164 break; 1165 1166 default: 1167 error = ENOPROTOOPT; 1168 break; 1169 } 1170 break; 1171 } 1172 1173 if (!error) { 1174 inp->inp_flags = inpflags; 1175 } 1176 return error; 1177 } 1178 1179 /* 1180 * Set up IP options in pcb for insertion in output packets. 1181 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1182 * with destination address if source routed. 1183 */ 1184 static int 1185 ip_pcbopts(struct inpcb *inp, const struct sockopt *sopt) 1186 { 1187 struct mbuf *m; 1188 const u_char *cp; 1189 u_char *dp; 1190 int cnt; 1191 1192 /* Turn off any old options. */ 1193 if (inp->inp_options) { 1194 m_free(inp->inp_options); 1195 } 1196 inp->inp_options = NULL; 1197 if ((cnt = sopt->sopt_size) == 0) { 1198 /* Only turning off any previous options. */ 1199 return 0; 1200 } 1201 cp = sopt->sopt_data; 1202 1203 #ifndef __vax__ 1204 if (cnt % sizeof(int32_t)) 1205 return (EINVAL); 1206 #endif 1207 1208 m = m_get(M_DONTWAIT, MT_SOOPTS); 1209 if (m == NULL) 1210 return (ENOBUFS); 1211 1212 dp = mtod(m, u_char *); 1213 memset(dp, 0, sizeof(struct in_addr)); 1214 dp += sizeof(struct in_addr); 1215 m->m_len = sizeof(struct in_addr); 1216 1217 /* 1218 * IP option list according to RFC791. Each option is of the form 1219 * 1220 * [optval] [olen] [(olen - 2) data bytes] 1221 * 1222 * We validate the list and copy options to an mbuf for prepending 1223 * to data packets. The IP first-hop destination address will be 1224 * stored before actual options and is zero if unset. 1225 */ 1226 while (cnt > 0) { 1227 uint8_t optval, olen, offset; 1228 1229 optval = cp[IPOPT_OPTVAL]; 1230 1231 if (optval == IPOPT_EOL || optval == IPOPT_NOP) { 1232 olen = 1; 1233 } else { 1234 if (cnt < IPOPT_OLEN + 1) 1235 goto bad; 1236 1237 olen = cp[IPOPT_OLEN]; 1238 if (olen < IPOPT_OLEN + 1 || olen > cnt) 1239 goto bad; 1240 } 1241 1242 if (optval == IPOPT_LSRR || optval == IPOPT_SSRR) { 1243 /* 1244 * user process specifies route as: 1245 * ->A->B->C->D 1246 * D must be our final destination (but we can't 1247 * check that since we may not have connected yet). 1248 * A is first hop destination, which doesn't appear in 1249 * actual IP option, but is stored before the options. 1250 */ 1251 if (olen < IPOPT_OFFSET + 1 + sizeof(struct in_addr)) 1252 goto bad; 1253 1254 offset = cp[IPOPT_OFFSET]; 1255 memcpy(mtod(m, u_char *), cp + IPOPT_OFFSET + 1, 1256 sizeof(struct in_addr)); 1257 1258 cp += sizeof(struct in_addr); 1259 cnt -= sizeof(struct in_addr); 1260 olen -= sizeof(struct in_addr); 1261 1262 if (m->m_len + olen > MAX_IPOPTLEN + sizeof(struct in_addr)) 1263 goto bad; 1264 1265 memcpy(dp, cp, olen); 1266 dp[IPOPT_OPTVAL] = optval; 1267 dp[IPOPT_OLEN] = olen; 1268 dp[IPOPT_OFFSET] = offset; 1269 break; 1270 } else { 1271 if (m->m_len + olen > MAX_IPOPTLEN + sizeof(struct in_addr)) 1272 goto bad; 1273 1274 memcpy(dp, cp, olen); 1275 break; 1276 } 1277 1278 dp += olen; 1279 m->m_len += olen; 1280 1281 if (optval == IPOPT_EOL) 1282 break; 1283 1284 cp += olen; 1285 cnt -= olen; 1286 } 1287 1288 inp->inp_options = m; 1289 return 0; 1290 bad: 1291 (void)m_free(m); 1292 return EINVAL; 1293 } 1294 1295 /* 1296 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. 1297 */ 1298 static struct ifnet * 1299 ip_multicast_if(struct in_addr *a, int *ifindexp) 1300 { 1301 int ifindex; 1302 struct ifnet *ifp = NULL; 1303 struct in_ifaddr *ia; 1304 1305 if (ifindexp) 1306 *ifindexp = 0; 1307 if (ntohl(a->s_addr) >> 24 == 0) { 1308 ifindex = ntohl(a->s_addr) & 0xffffff; 1309 ifp = if_byindex(ifindex); 1310 if (!ifp) 1311 return NULL; 1312 if (ifindexp) 1313 *ifindexp = ifindex; 1314 } else { 1315 LIST_FOREACH(ia, &IN_IFADDR_HASH(a->s_addr), ia_hash) { 1316 if (in_hosteq(ia->ia_addr.sin_addr, *a) && 1317 (ia->ia_ifp->if_flags & IFF_MULTICAST) != 0) { 1318 ifp = ia->ia_ifp; 1319 break; 1320 } 1321 } 1322 } 1323 return ifp; 1324 } 1325 1326 static int 1327 ip_getoptval(const struct sockopt *sopt, u_int8_t *val, u_int maxval) 1328 { 1329 u_int tval; 1330 u_char cval; 1331 int error; 1332 1333 if (sopt == NULL) 1334 return EINVAL; 1335 1336 switch (sopt->sopt_size) { 1337 case sizeof(u_char): 1338 error = sockopt_get(sopt, &cval, sizeof(u_char)); 1339 tval = cval; 1340 break; 1341 1342 case sizeof(u_int): 1343 error = sockopt_get(sopt, &tval, sizeof(u_int)); 1344 break; 1345 1346 default: 1347 error = EINVAL; 1348 } 1349 1350 if (error) 1351 return error; 1352 1353 if (tval > maxval) 1354 return EINVAL; 1355 1356 *val = tval; 1357 return 0; 1358 } 1359 1360 /* 1361 * Set the IP multicast options in response to user setsockopt(). 1362 */ 1363 static int 1364 ip_setmoptions(struct inpcb *inp, const struct sockopt *sopt) 1365 { 1366 struct ip_moptions *imo = inp->inp_moptions; 1367 struct in_addr addr; 1368 struct ip_mreq lmreq, *mreq; 1369 struct ifnet *ifp; 1370 int i, ifindex, error = 0; 1371 1372 if (!imo) { 1373 /* 1374 * No multicast option buffer attached to the pcb; 1375 * allocate one and initialize to default values. 1376 */ 1377 imo = kmem_intr_alloc(sizeof(*imo), KM_NOSLEEP); 1378 if (imo == NULL) 1379 return ENOBUFS; 1380 1381 imo->imo_multicast_ifp = NULL; 1382 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1383 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1384 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1385 imo->imo_num_memberships = 0; 1386 inp->inp_moptions = imo; 1387 } 1388 1389 switch (sopt->sopt_name) { 1390 case IP_MULTICAST_IF: 1391 /* 1392 * Select the interface for outgoing multicast packets. 1393 */ 1394 error = sockopt_get(sopt, &addr, sizeof(addr)); 1395 if (error) 1396 break; 1397 1398 /* 1399 * INADDR_ANY is used to remove a previous selection. 1400 * When no interface is selected, a default one is 1401 * chosen every time a multicast packet is sent. 1402 */ 1403 if (in_nullhost(addr)) { 1404 imo->imo_multicast_ifp = NULL; 1405 break; 1406 } 1407 /* 1408 * The selected interface is identified by its local 1409 * IP address. Find the interface and confirm that 1410 * it supports multicasting. 1411 */ 1412 ifp = ip_multicast_if(&addr, &ifindex); 1413 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1414 error = EADDRNOTAVAIL; 1415 break; 1416 } 1417 imo->imo_multicast_ifp = ifp; 1418 if (ifindex) 1419 imo->imo_multicast_addr = addr; 1420 else 1421 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1422 break; 1423 1424 case IP_MULTICAST_TTL: 1425 /* 1426 * Set the IP time-to-live for outgoing multicast packets. 1427 */ 1428 error = ip_getoptval(sopt, &imo->imo_multicast_ttl, MAXTTL); 1429 break; 1430 1431 case IP_MULTICAST_LOOP: 1432 /* 1433 * Set the loopback flag for outgoing multicast packets. 1434 * Must be zero or one. 1435 */ 1436 error = ip_getoptval(sopt, &imo->imo_multicast_loop, 1); 1437 break; 1438 1439 case IP_ADD_MEMBERSHIP: 1440 /* 1441 * Add a multicast group membership. 1442 * Group must be a valid IP multicast address. 1443 */ 1444 error = sockopt_get(sopt, &lmreq, sizeof(lmreq)); 1445 if (error) 1446 break; 1447 1448 mreq = &lmreq; 1449 1450 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1451 error = EINVAL; 1452 break; 1453 } 1454 /* 1455 * If no interface address was provided, use the interface of 1456 * the route to the given multicast address. 1457 */ 1458 if (in_nullhost(mreq->imr_interface)) { 1459 struct rtentry *rt; 1460 union { 1461 struct sockaddr dst; 1462 struct sockaddr_in dst4; 1463 } u; 1464 struct route ro; 1465 1466 memset(&ro, 0, sizeof(ro)); 1467 1468 sockaddr_in_init(&u.dst4, &mreq->imr_multiaddr, 0); 1469 rtcache_setdst(&ro, &u.dst); 1470 ifp = (rt = rtcache_init(&ro)) != NULL ? rt->rt_ifp 1471 : NULL; 1472 rtcache_free(&ro); 1473 } else { 1474 ifp = ip_multicast_if(&mreq->imr_interface, NULL); 1475 } 1476 /* 1477 * See if we found an interface, and confirm that it 1478 * supports multicast. 1479 */ 1480 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1481 error = EADDRNOTAVAIL; 1482 break; 1483 } 1484 /* 1485 * See if the membership already exists or if all the 1486 * membership slots are full. 1487 */ 1488 for (i = 0; i < imo->imo_num_memberships; ++i) { 1489 if (imo->imo_membership[i]->inm_ifp == ifp && 1490 in_hosteq(imo->imo_membership[i]->inm_addr, 1491 mreq->imr_multiaddr)) 1492 break; 1493 } 1494 if (i < imo->imo_num_memberships) { 1495 error = EADDRINUSE; 1496 break; 1497 } 1498 if (i == IP_MAX_MEMBERSHIPS) { 1499 error = ETOOMANYREFS; 1500 break; 1501 } 1502 /* 1503 * Everything looks good; add a new record to the multicast 1504 * address list for the given interface. 1505 */ 1506 if ((imo->imo_membership[i] = 1507 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) { 1508 error = ENOBUFS; 1509 break; 1510 } 1511 ++imo->imo_num_memberships; 1512 break; 1513 1514 case IP_DROP_MEMBERSHIP: 1515 /* 1516 * Drop a multicast group membership. 1517 * Group must be a valid IP multicast address. 1518 */ 1519 error = sockopt_get(sopt, &lmreq, sizeof(lmreq)); 1520 if (error) 1521 break; 1522 1523 mreq = &lmreq; 1524 1525 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1526 error = EINVAL; 1527 break; 1528 } 1529 /* 1530 * If an interface address was specified, get a pointer 1531 * to its ifnet structure. 1532 */ 1533 if (in_nullhost(mreq->imr_interface)) 1534 ifp = NULL; 1535 else { 1536 ifp = ip_multicast_if(&mreq->imr_interface, NULL); 1537 if (ifp == NULL) { 1538 error = EADDRNOTAVAIL; 1539 break; 1540 } 1541 } 1542 /* 1543 * Find the membership in the membership array. 1544 */ 1545 for (i = 0; i < imo->imo_num_memberships; ++i) { 1546 if ((ifp == NULL || 1547 imo->imo_membership[i]->inm_ifp == ifp) && 1548 in_hosteq(imo->imo_membership[i]->inm_addr, 1549 mreq->imr_multiaddr)) 1550 break; 1551 } 1552 if (i == imo->imo_num_memberships) { 1553 error = EADDRNOTAVAIL; 1554 break; 1555 } 1556 /* 1557 * Give up the multicast address record to which the 1558 * membership points. 1559 */ 1560 in_delmulti(imo->imo_membership[i]); 1561 /* 1562 * Remove the gap in the membership array. 1563 */ 1564 for (++i; i < imo->imo_num_memberships; ++i) 1565 imo->imo_membership[i-1] = imo->imo_membership[i]; 1566 --imo->imo_num_memberships; 1567 break; 1568 1569 default: 1570 error = EOPNOTSUPP; 1571 break; 1572 } 1573 1574 /* 1575 * If all options have default values, no need to keep the mbuf. 1576 */ 1577 if (imo->imo_multicast_ifp == NULL && 1578 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 1579 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 1580 imo->imo_num_memberships == 0) { 1581 kmem_free(imo, sizeof(*imo)); 1582 inp->inp_moptions = NULL; 1583 } 1584 1585 return error; 1586 } 1587 1588 /* 1589 * Return the IP multicast options in response to user getsockopt(). 1590 */ 1591 static int 1592 ip_getmoptions(struct inpcb *inp, struct sockopt *sopt) 1593 { 1594 struct ip_moptions *imo = inp->inp_moptions; 1595 struct in_addr addr; 1596 struct in_ifaddr *ia; 1597 uint8_t optval; 1598 int error = 0; 1599 1600 switch (sopt->sopt_name) { 1601 case IP_MULTICAST_IF: 1602 if (imo == NULL || imo->imo_multicast_ifp == NULL) 1603 addr = zeroin_addr; 1604 else if (imo->imo_multicast_addr.s_addr) { 1605 /* return the value user has set */ 1606 addr = imo->imo_multicast_addr; 1607 } else { 1608 IFP_TO_IA(imo->imo_multicast_ifp, ia); 1609 addr = ia ? ia->ia_addr.sin_addr : zeroin_addr; 1610 } 1611 error = sockopt_set(sopt, &addr, sizeof(addr)); 1612 break; 1613 1614 case IP_MULTICAST_TTL: 1615 optval = imo ? imo->imo_multicast_ttl 1616 : IP_DEFAULT_MULTICAST_TTL; 1617 1618 error = sockopt_set(sopt, &optval, sizeof(optval)); 1619 break; 1620 1621 case IP_MULTICAST_LOOP: 1622 optval = imo ? imo->imo_multicast_loop 1623 : IP_DEFAULT_MULTICAST_LOOP; 1624 1625 error = sockopt_set(sopt, &optval, sizeof(optval)); 1626 break; 1627 1628 default: 1629 error = EOPNOTSUPP; 1630 } 1631 1632 return error; 1633 } 1634 1635 /* 1636 * Discard the IP multicast options. 1637 */ 1638 void 1639 ip_freemoptions(struct ip_moptions *imo) 1640 { 1641 int i; 1642 1643 if (imo != NULL) { 1644 for (i = 0; i < imo->imo_num_memberships; ++i) 1645 in_delmulti(imo->imo_membership[i]); 1646 kmem_free(imo, sizeof(*imo)); 1647 } 1648 } 1649 1650 /* 1651 * Routine called from ip_output() to loop back a copy of an IP multicast 1652 * packet to the input queue of a specified interface. Note that this 1653 * calls the output routine of the loopback "driver", but with an interface 1654 * pointer that might NOT be lo0ifp -- easier than replicating that code here. 1655 */ 1656 static void 1657 ip_mloopback(struct ifnet *ifp, struct mbuf *m, const struct sockaddr_in *dst) 1658 { 1659 struct ip *ip; 1660 struct mbuf *copym; 1661 1662 copym = m_copypacket(m, M_DONTWAIT); 1663 if (copym != NULL 1664 && (copym->m_flags & M_EXT || copym->m_len < sizeof(struct ip))) 1665 copym = m_pullup(copym, sizeof(struct ip)); 1666 if (copym == NULL) 1667 return; 1668 /* 1669 * We don't bother to fragment if the IP length is greater 1670 * than the interface's MTU. Can this possibly matter? 1671 */ 1672 ip = mtod(copym, struct ip *); 1673 1674 if (copym->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { 1675 in_delayed_cksum(copym); 1676 copym->m_pkthdr.csum_flags &= 1677 ~(M_CSUM_TCPv4|M_CSUM_UDPv4); 1678 } 1679 1680 ip->ip_sum = 0; 1681 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2); 1682 (void)looutput(ifp, copym, sintocsa(dst), NULL); 1683 } 1684