1 /* $OpenBSD: ip_output.c,v 1.336 2017/02/09 15:19:32 jca Exp $ */ 2 /* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 33 */ 34 35 #include "pf.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/mbuf.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/proc.h> 44 #include <sys/kernel.h> 45 46 #include <net/if.h> 47 #include <net/if_var.h> 48 #include <net/if_enc.h> 49 #include <net/route.h> 50 51 #include <netinet/in.h> 52 #include <netinet/ip.h> 53 #include <netinet/in_pcb.h> 54 #include <netinet/in_var.h> 55 #include <netinet/ip_var.h> 56 #include <netinet/ip_icmp.h> 57 #include <netinet/tcp.h> 58 #include <netinet/udp.h> 59 #include <netinet/tcp_timer.h> 60 #include <netinet/tcp_var.h> 61 #include <netinet/udp_var.h> 62 63 #if NPF > 0 64 #include <net/pfvar.h> 65 #endif 66 67 #ifdef IPSEC 68 #ifdef ENCDEBUG 69 #define DPRINTF(x) do { if (encdebug) printf x ; } while (0) 70 #else 71 #define DPRINTF(x) 72 #endif 73 #endif /* IPSEC */ 74 75 void ip_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in *); 76 static __inline u_int16_t __attribute__((__unused__)) 77 in_cksum_phdr(u_int32_t, u_int32_t, u_int32_t); 78 void in_delayed_cksum(struct mbuf *); 79 80 #ifdef IPSEC 81 struct tdb * 82 ip_output_ipsec_lookup(struct mbuf *m, int hlen, int *error, struct inpcb *inp, 83 int ipsecflowinfo); 84 int 85 ip_output_ipsec_send(struct tdb *tdb, struct mbuf *m, struct ifnet *ifp, 86 struct route *ro); 87 #endif /* IPSEC */ 88 89 /* 90 * IP output. The packet in mbuf chain m contains a skeletal IP 91 * header (with len, off, ttl, proto, tos, src, dst). 92 * The mbuf chain containing the packet will be freed. 93 * The mbuf opt, if present, will not be freed. 94 */ 95 int 96 ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags, 97 struct ip_moptions *imo, struct inpcb *inp, u_int32_t ipsecflowinfo) 98 { 99 struct ip *ip; 100 struct ifnet *ifp = NULL; 101 struct mbuf *m = m0; 102 int hlen = sizeof (struct ip); 103 int len, error = 0; 104 struct route iproute; 105 struct sockaddr_in *dst; 106 struct tdb *tdb = NULL; 107 u_long mtu; 108 #if defined(MROUTING) 109 int rv; 110 #endif 111 112 NET_ASSERT_LOCKED(); 113 114 #ifdef IPSEC 115 if (inp && (inp->inp_flags & INP_IPV6) != 0) 116 panic("ip_output: IPv6 pcb is passed"); 117 #endif /* IPSEC */ 118 119 #ifdef DIAGNOSTIC 120 if ((m->m_flags & M_PKTHDR) == 0) 121 panic("ip_output no HDR"); 122 #endif 123 if (opt) { 124 m = ip_insertoptions(m, opt, &len); 125 hlen = len; 126 } 127 128 ip = mtod(m, struct ip *); 129 130 /* 131 * Fill in IP header. 132 */ 133 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 134 ip->ip_v = IPVERSION; 135 ip->ip_off &= htons(IP_DF); 136 ip->ip_id = htons(ip_randomid()); 137 ip->ip_hl = hlen >> 2; 138 ipstat_inc(ips_localout); 139 } else { 140 hlen = ip->ip_hl << 2; 141 } 142 143 /* 144 * We should not send traffic to 0/8 say both Stevens and RFCs 145 * 5735 section 3 and 1122 sections 3.2.1.3 and 3.3.6. 146 */ 147 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == 0) { 148 error = ENETUNREACH; 149 goto bad; 150 } 151 152 #if NPF > 0 153 reroute: 154 #endif 155 156 /* 157 * Do a route lookup now in case we need the source address to 158 * do an SPD lookup in IPsec; for most packets, the source address 159 * is set at a higher level protocol. ICMPs and other packets 160 * though (e.g., traceroute) have a source address of zeroes. 161 */ 162 if (ro == NULL) { 163 ro = &iproute; 164 memset(ro, 0, sizeof(*ro)); 165 } 166 167 dst = satosin(&ro->ro_dst); 168 169 /* 170 * If there is a cached route, check that it is to the same 171 * destination and is still up. If not, free it and try again. 172 */ 173 if (!rtisvalid(ro->ro_rt) || 174 dst->sin_addr.s_addr != ip->ip_dst.s_addr || 175 ro->ro_tableid != m->m_pkthdr.ph_rtableid) { 176 rtfree(ro->ro_rt); 177 ro->ro_rt = NULL; 178 } 179 180 if (ro->ro_rt == NULL) { 181 dst->sin_family = AF_INET; 182 dst->sin_len = sizeof(*dst); 183 dst->sin_addr = ip->ip_dst; 184 ro->ro_tableid = m->m_pkthdr.ph_rtableid; 185 } 186 187 if ((IN_MULTICAST(ip->ip_dst.s_addr) || 188 (ip->ip_dst.s_addr == INADDR_BROADCAST)) && 189 imo != NULL && (ifp = if_get(imo->imo_ifidx)) != NULL) { 190 191 mtu = ifp->if_mtu; 192 if (ip->ip_src.s_addr == INADDR_ANY) { 193 struct in_ifaddr *ia; 194 195 KERNEL_LOCK(); 196 IFP_TO_IA(ifp, ia); 197 if (ia != NULL) 198 ip->ip_src = ia->ia_addr.sin_addr; 199 KERNEL_UNLOCK(); 200 } 201 } else { 202 struct in_ifaddr *ia; 203 204 if (ro->ro_rt == NULL) 205 ro->ro_rt = rtalloc_mpath(&ro->ro_dst, 206 &ip->ip_src.s_addr, ro->ro_tableid); 207 208 if (ro->ro_rt == NULL) { 209 ipstat_inc(ips_noroute); 210 error = EHOSTUNREACH; 211 goto bad; 212 } 213 214 ia = ifatoia(ro->ro_rt->rt_ifa); 215 if (ISSET(ro->ro_rt->rt_flags, RTF_LOCAL)) 216 ifp = if_get(rtable_loindex(m->m_pkthdr.ph_rtableid)); 217 else 218 ifp = if_get(ro->ro_rt->rt_ifidx); 219 if (ifp == NULL) { 220 error = EHOSTUNREACH; 221 goto bad; 222 } 223 if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0) 224 mtu = ifp->if_mtu; 225 226 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 227 dst = satosin(ro->ro_rt->rt_gateway); 228 229 /* Set the source IP address */ 230 if (ip->ip_src.s_addr == INADDR_ANY && ia) 231 ip->ip_src = ia->ia_addr.sin_addr; 232 } 233 234 #ifdef IPSEC 235 if (ipsec_in_use || inp != NULL) { 236 KERNEL_LOCK(); 237 /* Do we have any pending SAs to apply ? */ 238 tdb = ip_output_ipsec_lookup(m, hlen, &error, inp, 239 ipsecflowinfo); 240 KERNEL_UNLOCK(); 241 if (error != 0) { 242 /* Should silently drop packet */ 243 if (error == -EINVAL) 244 error = 0; 245 m_freem(m); 246 goto done; 247 } 248 if (tdb != NULL) { 249 /* 250 * If it needs TCP/UDP hardware-checksumming, do the 251 * computation now. 252 */ 253 in_proto_cksum_out(m, NULL); 254 } 255 } 256 #endif /* IPSEC */ 257 258 if (IN_MULTICAST(ip->ip_dst.s_addr) || 259 (ip->ip_dst.s_addr == INADDR_BROADCAST)) { 260 261 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ? 262 M_BCAST : M_MCAST; 263 264 /* 265 * IP destination address is multicast. Make sure "dst" 266 * still points to the address in "ro". (It may have been 267 * changed to point to a gateway address, above.) 268 */ 269 dst = satosin(&ro->ro_dst); 270 271 /* 272 * See if the caller provided any multicast options 273 */ 274 if (imo != NULL) 275 ip->ip_ttl = imo->imo_ttl; 276 else 277 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 278 279 /* 280 * if we don't know the outgoing ifp yet, we can't generate 281 * output 282 */ 283 if (!ifp) { 284 ipstat_inc(ips_noroute); 285 error = EHOSTUNREACH; 286 goto bad; 287 } 288 289 /* 290 * Confirm that the outgoing interface supports multicast, 291 * but only if the packet actually is going out on that 292 * interface (i.e., no IPsec is applied). 293 */ 294 if ((((m->m_flags & M_MCAST) && 295 (ifp->if_flags & IFF_MULTICAST) == 0) || 296 ((m->m_flags & M_BCAST) && 297 (ifp->if_flags & IFF_BROADCAST) == 0)) && (tdb == NULL)) { 298 ipstat_inc(ips_noroute); 299 error = ENETUNREACH; 300 goto bad; 301 } 302 303 /* 304 * If source address not specified yet, use address 305 * of outgoing interface. 306 */ 307 if (ip->ip_src.s_addr == INADDR_ANY) { 308 struct in_ifaddr *ia; 309 310 KERNEL_LOCK(); 311 IFP_TO_IA(ifp, ia); 312 if (ia != NULL) 313 ip->ip_src = ia->ia_addr.sin_addr; 314 KERNEL_UNLOCK(); 315 } 316 317 if ((imo == NULL || imo->imo_loop) && 318 in_hasmulti(&ip->ip_dst, ifp)) { 319 /* 320 * If we belong to the destination multicast group 321 * on the outgoing interface, and the caller did not 322 * forbid loopback, loop back a copy. 323 * Can't defer TCP/UDP checksumming, do the 324 * computation now. 325 */ 326 in_proto_cksum_out(m, NULL); 327 ip_mloopback(ifp, m, dst); 328 } 329 #ifdef MROUTING 330 else { 331 /* 332 * If we are acting as a multicast router, perform 333 * multicast forwarding as if the packet had just 334 * arrived on the interface to which we are about 335 * to send. The multicast forwarding function 336 * recursively calls this function, using the 337 * IP_FORWARDING flag to prevent infinite recursion. 338 * 339 * Multicasts that are looped back by ip_mloopback(), 340 * above, will be forwarded by the ip_input() routine, 341 * if necessary. 342 */ 343 if (ipmforwarding && ip_mrouter[ifp->if_rdomain] && 344 (flags & IP_FORWARDING) == 0) { 345 KERNEL_LOCK(); 346 rv = ip_mforward(m, ifp); 347 KERNEL_UNLOCK(); 348 if (rv != 0) { 349 m_freem(m); 350 goto done; 351 } 352 } 353 } 354 #endif 355 /* 356 * Multicasts with a time-to-live of zero may be looped- 357 * back, above, but must not be transmitted on a network. 358 * Also, multicasts addressed to the loopback interface 359 * are not sent -- the above call to ip_mloopback() will 360 * loop back a copy if this host actually belongs to the 361 * destination group on the loopback interface. 362 */ 363 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) { 364 m_freem(m); 365 goto done; 366 } 367 368 goto sendit; 369 } 370 371 /* 372 * Look for broadcast address and verify user is allowed to send 373 * such a packet; if the packet is going in an IPsec tunnel, skip 374 * this check. 375 */ 376 if ((tdb == NULL) && ((dst->sin_addr.s_addr == INADDR_BROADCAST) || 377 (ro && ro->ro_rt && ISSET(ro->ro_rt->rt_flags, RTF_BROADCAST)))) { 378 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 379 error = EADDRNOTAVAIL; 380 goto bad; 381 } 382 if ((flags & IP_ALLOWBROADCAST) == 0) { 383 error = EACCES; 384 goto bad; 385 } 386 387 /* Don't allow broadcast messages to be fragmented */ 388 if (ntohs(ip->ip_len) > ifp->if_mtu) { 389 error = EMSGSIZE; 390 goto bad; 391 } 392 m->m_flags |= M_BCAST; 393 } else 394 m->m_flags &= ~M_BCAST; 395 396 sendit: 397 /* 398 * If we're doing Path MTU discovery, we need to set DF unless 399 * the route's MTU is locked. 400 */ 401 if ((flags & IP_MTUDISC) && ro && ro->ro_rt && 402 (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0) 403 ip->ip_off |= htons(IP_DF); 404 405 #ifdef IPSEC 406 /* 407 * Check if the packet needs encapsulation. 408 */ 409 if (tdb != NULL) { 410 KERNEL_LOCK(); 411 /* Callee frees mbuf */ 412 error = ip_output_ipsec_send(tdb, m, ifp, ro); 413 KERNEL_UNLOCK(); 414 goto done; 415 } 416 #endif /* IPSEC */ 417 418 /* 419 * Packet filter 420 */ 421 #if NPF > 0 422 if (pf_test(AF_INET, PF_OUT, ifp, &m) != PF_PASS) { 423 error = EACCES; 424 m_freem(m); 425 goto done; 426 } 427 if (m == NULL) 428 goto done; 429 ip = mtod(m, struct ip *); 430 hlen = ip->ip_hl << 2; 431 if ((m->m_pkthdr.pf.flags & (PF_TAG_REROUTE | PF_TAG_GENERATED)) == 432 (PF_TAG_REROUTE | PF_TAG_GENERATED)) 433 /* already rerun the route lookup, go on */ 434 m->m_pkthdr.pf.flags &= ~(PF_TAG_GENERATED | PF_TAG_REROUTE); 435 else if (m->m_pkthdr.pf.flags & PF_TAG_REROUTE) { 436 /* tag as generated to skip over pf_test on rerun */ 437 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 438 ro = NULL; 439 if_put(ifp); /* drop reference since target changed */ 440 ifp = NULL; 441 goto reroute; 442 } 443 #endif 444 in_proto_cksum_out(m, ifp); 445 446 #ifdef IPSEC 447 if (ipsec_in_use && (flags & IP_FORWARDING) && (ipforwarding == 2) && 448 (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) == NULL)) { 449 error = EHOSTUNREACH; 450 m_freem(m); 451 goto done; 452 } 453 #endif 454 455 /* 456 * If small enough for interface, can just send directly. 457 */ 458 if (ntohs(ip->ip_len) <= mtu) { 459 ip->ip_sum = 0; 460 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && 461 (ifp->if_bridgeport == NULL)) 462 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 463 else { 464 ipstat_inc(ips_outswcsum); 465 ip->ip_sum = in_cksum(m, hlen); 466 } 467 468 error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); 469 goto done; 470 } 471 472 /* 473 * Too large for interface; fragment if possible. 474 * Must be able to put at least 8 bytes per fragment. 475 */ 476 if (ip->ip_off & htons(IP_DF)) { 477 #ifdef IPSEC 478 if (ip_mtudisc) 479 ipsec_adjust_mtu(m, ifp->if_mtu); 480 #endif 481 error = EMSGSIZE; 482 /* 483 * This case can happen if the user changed the MTU 484 * of an interface after enabling IP on it. Because 485 * most netifs don't keep track of routes pointing to 486 * them, there is no way for one to update all its 487 * routes when the MTU is changed. 488 */ 489 if (rtisvalid(ro->ro_rt) && 490 ISSET(ro->ro_rt->rt_flags, RTF_HOST) && 491 !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) && 492 (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 493 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 494 } 495 ipstat_inc(ips_cantfrag); 496 goto bad; 497 } 498 499 error = ip_fragment(m, ifp, mtu); 500 if (error) { 501 m = m0 = NULL; 502 goto bad; 503 } 504 505 for (; m; m = m0) { 506 m0 = m->m_nextpkt; 507 m->m_nextpkt = 0; 508 if (error == 0) 509 error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); 510 else 511 m_freem(m); 512 } 513 514 if (error == 0) 515 ipstat_inc(ips_fragmented); 516 517 done: 518 if (ro == &iproute && ro->ro_rt) 519 rtfree(ro->ro_rt); 520 if_put(ifp); 521 return (error); 522 bad: 523 m_freem(m0); 524 goto done; 525 } 526 527 #ifdef IPSEC 528 struct tdb * 529 ip_output_ipsec_lookup(struct mbuf *m, int hlen, int *error, struct inpcb *inp, 530 int ipsecflowinfo) 531 { 532 struct m_tag *mtag; 533 struct tdb_ident *tdbi; 534 struct tdb *tdb; 535 536 /* Do we have any pending SAs to apply ? */ 537 tdb = ipsp_spd_lookup(m, AF_INET, hlen, error, IPSP_DIRECTION_OUT, 538 NULL, inp, ipsecflowinfo); 539 if (tdb == NULL) 540 return NULL; 541 /* Loop detection */ 542 for (mtag = m_tag_first(m); mtag != NULL; mtag = m_tag_next(m, mtag)) { 543 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE) 544 continue; 545 tdbi = (struct tdb_ident *)(mtag + 1); 546 if (tdbi->spi == tdb->tdb_spi && 547 tdbi->proto == tdb->tdb_sproto && 548 tdbi->rdomain == tdb->tdb_rdomain && 549 !memcmp(&tdbi->dst, &tdb->tdb_dst, 550 sizeof(union sockaddr_union))) { 551 /* no IPsec needed */ 552 return NULL; 553 } 554 } 555 return tdb; 556 } 557 558 int 559 ip_output_ipsec_send(struct tdb *tdb, struct mbuf *m, struct ifnet *ifp, 560 struct route *ro) 561 { 562 #if NPF > 0 563 struct ifnet *encif; 564 #endif 565 struct ip *ip; 566 567 #if NPF > 0 568 /* 569 * Packet filter 570 */ 571 if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL || 572 pf_test(AF_INET, PF_OUT, encif, &m) != PF_PASS) { 573 m_freem(m); 574 return EACCES; 575 } 576 if (m == NULL) 577 return 0; 578 /* 579 * PF_TAG_REROUTE handling or not... 580 * Packet is entering IPsec so the routing is 581 * already overruled by the IPsec policy. 582 * Until now the change was not reconsidered. 583 * What's the behaviour? 584 */ 585 in_proto_cksum_out(m, encif); 586 #endif 587 588 /* Check if we are allowed to fragment */ 589 ip = mtod(m, struct ip *); 590 if (ip_mtudisc && (ip->ip_off & htons(IP_DF)) && tdb->tdb_mtu && 591 ntohs(ip->ip_len) > tdb->tdb_mtu && 592 tdb->tdb_mtutimeout > time_second) { 593 struct rtentry *rt = NULL; 594 int rt_mtucloned = 0; 595 int transportmode = 0; 596 597 transportmode = (tdb->tdb_dst.sa.sa_family == AF_INET) && 598 (tdb->tdb_dst.sin.sin_addr.s_addr == ip->ip_dst.s_addr); 599 600 /* Find a host route to store the mtu in */ 601 if (ro != NULL) 602 rt = ro->ro_rt; 603 /* but don't add a PMTU route for transport mode SAs */ 604 if (transportmode) 605 rt = NULL; 606 else if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) { 607 rt = icmp_mtudisc_clone(ip->ip_dst, 608 m->m_pkthdr.ph_rtableid); 609 rt_mtucloned = 1; 610 } 611 DPRINTF(("%s: spi %08x mtu %d rt %p cloned %d\n", __func__, 612 ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned)); 613 if (rt != NULL) { 614 rt->rt_rmx.rmx_mtu = tdb->tdb_mtu; 615 if (ro && ro->ro_rt != NULL) { 616 rtfree(ro->ro_rt); 617 ro->ro_rt = rtalloc(&ro->ro_dst, RT_RESOLVE, 618 m->m_pkthdr.ph_rtableid); 619 } 620 if (rt_mtucloned) 621 rtfree(rt); 622 } 623 ipsec_adjust_mtu(m, tdb->tdb_mtu); 624 m_freem(m); 625 return EMSGSIZE; 626 } 627 628 /* 629 * Clear these -- they'll be set in the recursive invocation 630 * as needed. 631 */ 632 m->m_flags &= ~(M_MCAST | M_BCAST); 633 634 /* Callee frees mbuf */ 635 return ipsp_process_packet(m, tdb, AF_INET, 0); 636 } 637 #endif /* IPSEC */ 638 639 int 640 ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu) 641 { 642 struct ip *ip, *mhip; 643 struct mbuf *m0; 644 int len, hlen, off; 645 int mhlen, firstlen; 646 struct mbuf **mnext; 647 int fragments = 0; 648 int error = 0; 649 650 ip = mtod(m, struct ip *); 651 hlen = ip->ip_hl << 2; 652 653 len = (mtu - hlen) &~ 7; 654 if (len < 8) { 655 m_freem(m); 656 return (EMSGSIZE); 657 } 658 659 /* 660 * If we are doing fragmentation, we can't defer TCP/UDP 661 * checksumming; compute the checksum and clear the flag. 662 */ 663 in_proto_cksum_out(m, NULL); 664 firstlen = len; 665 mnext = &m->m_nextpkt; 666 667 /* 668 * Loop through length of segment after first fragment, 669 * make new header and copy data of each part and link onto chain. 670 */ 671 m0 = m; 672 mhlen = sizeof (struct ip); 673 for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { 674 MGETHDR(m, M_DONTWAIT, MT_HEADER); 675 if (m == NULL) { 676 ipstat_inc(ips_odropped); 677 error = ENOBUFS; 678 goto sendorfree; 679 } 680 *mnext = m; 681 mnext = &m->m_nextpkt; 682 m->m_data += max_linkhdr; 683 mhip = mtod(m, struct ip *); 684 *mhip = *ip; 685 /* we must inherit MCAST/BCAST flags, routing table and prio */ 686 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST); 687 m->m_pkthdr.ph_rtableid = m0->m_pkthdr.ph_rtableid; 688 m->m_pkthdr.pf.prio = m0->m_pkthdr.pf.prio; 689 if (hlen > sizeof (struct ip)) { 690 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 691 mhip->ip_hl = mhlen >> 2; 692 } 693 m->m_len = mhlen; 694 mhip->ip_off = ((off - hlen) >> 3) + 695 (ntohs(ip->ip_off) & ~IP_MF); 696 if (ip->ip_off & htons(IP_MF)) 697 mhip->ip_off |= IP_MF; 698 if (off + len >= ntohs(ip->ip_len)) 699 len = ntohs(ip->ip_len) - off; 700 else 701 mhip->ip_off |= IP_MF; 702 mhip->ip_len = htons((u_int16_t)(len + mhlen)); 703 m->m_next = m_copym(m0, off, len, M_NOWAIT); 704 if (m->m_next == 0) { 705 ipstat_inc(ips_odropped); 706 error = ENOBUFS; 707 goto sendorfree; 708 } 709 m->m_pkthdr.len = mhlen + len; 710 m->m_pkthdr.ph_ifidx = 0; 711 mhip->ip_off = htons((u_int16_t)mhip->ip_off); 712 mhip->ip_sum = 0; 713 if ((ifp != NULL) && 714 (ifp->if_capabilities & IFCAP_CSUM_IPv4) && 715 (ifp->if_bridgeport == NULL)) 716 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 717 else { 718 ipstat_inc(ips_outswcsum); 719 mhip->ip_sum = in_cksum(m, mhlen); 720 } 721 ipstat_inc(ips_ofragments); 722 fragments++; 723 } 724 /* 725 * Update first fragment by trimming what's been copied out 726 * and updating header, then send each fragment (in order). 727 */ 728 m = m0; 729 m_adj(m, hlen + firstlen - ntohs(ip->ip_len)); 730 m->m_pkthdr.len = hlen + firstlen; 731 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len); 732 ip->ip_off |= htons(IP_MF); 733 ip->ip_sum = 0; 734 if ((ifp != NULL) && 735 (ifp->if_capabilities & IFCAP_CSUM_IPv4) && 736 (ifp->if_bridgeport == NULL)) 737 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 738 else { 739 ipstat_inc(ips_outswcsum); 740 ip->ip_sum = in_cksum(m, hlen); 741 } 742 sendorfree: 743 if (error) { 744 for (m = m0; m; m = m0) { 745 m0 = m->m_nextpkt; 746 m->m_nextpkt = NULL; 747 m_freem(m); 748 } 749 } 750 751 return (error); 752 } 753 754 /* 755 * Insert IP options into preformed packet. 756 * Adjust IP destination as required for IP source routing, 757 * as indicated by a non-zero in_addr at the start of the options. 758 */ 759 struct mbuf * 760 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) 761 { 762 struct ipoption *p = mtod(opt, struct ipoption *); 763 struct mbuf *n; 764 struct ip *ip = mtod(m, struct ip *); 765 unsigned int optlen; 766 767 optlen = opt->m_len - sizeof(p->ipopt_dst); 768 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) 769 return (m); /* XXX should fail */ 770 if (p->ipopt_dst.s_addr) 771 ip->ip_dst = p->ipopt_dst; 772 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 773 MGETHDR(n, M_DONTWAIT, MT_HEADER); 774 if (n == NULL) 775 return (m); 776 M_MOVE_HDR(n, m); 777 n->m_pkthdr.len += optlen; 778 m->m_len -= sizeof(struct ip); 779 m->m_data += sizeof(struct ip); 780 n->m_next = m; 781 m = n; 782 m->m_len = optlen + sizeof(struct ip); 783 m->m_data += max_linkhdr; 784 memcpy(mtod(m, caddr_t), ip, sizeof(struct ip)); 785 } else { 786 m->m_data -= optlen; 787 m->m_len += optlen; 788 m->m_pkthdr.len += optlen; 789 memmove(mtod(m, caddr_t), (caddr_t)ip, sizeof(struct ip)); 790 } 791 ip = mtod(m, struct ip *); 792 memcpy(ip + 1, p->ipopt_list, optlen); 793 *phlen = sizeof(struct ip) + optlen; 794 ip->ip_len = htons(ntohs(ip->ip_len) + optlen); 795 return (m); 796 } 797 798 /* 799 * Copy options from ip to jp, 800 * omitting those not copied during fragmentation. 801 */ 802 int 803 ip_optcopy(struct ip *ip, struct ip *jp) 804 { 805 u_char *cp, *dp; 806 int opt, optlen, cnt; 807 808 cp = (u_char *)(ip + 1); 809 dp = (u_char *)(jp + 1); 810 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 811 for (; cnt > 0; cnt -= optlen, cp += optlen) { 812 opt = cp[0]; 813 if (opt == IPOPT_EOL) 814 break; 815 if (opt == IPOPT_NOP) { 816 /* Preserve for IP mcast tunnel's LSRR alignment. */ 817 *dp++ = IPOPT_NOP; 818 optlen = 1; 819 continue; 820 } 821 #ifdef DIAGNOSTIC 822 if (cnt < IPOPT_OLEN + sizeof(*cp)) 823 panic("malformed IPv4 option passed to ip_optcopy"); 824 #endif 825 optlen = cp[IPOPT_OLEN]; 826 #ifdef DIAGNOSTIC 827 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 828 panic("malformed IPv4 option passed to ip_optcopy"); 829 #endif 830 /* bogus lengths should have been caught by ip_dooptions */ 831 if (optlen > cnt) 832 optlen = cnt; 833 if (IPOPT_COPIED(opt)) { 834 memcpy(dp, cp, optlen); 835 dp += optlen; 836 } 837 } 838 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 839 *dp++ = IPOPT_EOL; 840 return (optlen); 841 } 842 843 /* 844 * IP socket option processing. 845 */ 846 int 847 ip_ctloutput(int op, struct socket *so, int level, int optname, 848 struct mbuf *m) 849 { 850 struct inpcb *inp = sotoinpcb(so); 851 int optval = 0; 852 struct proc *p = curproc; /* XXX */ 853 int error = 0; 854 u_int rtid = 0; 855 856 if (level != IPPROTO_IP) { 857 error = EINVAL; 858 if (op == PRCO_SETOPT) 859 (void) m_free(m); 860 } else switch (op) { 861 case PRCO_SETOPT: 862 switch (optname) { 863 case IP_OPTIONS: 864 return (ip_pcbopts(&inp->inp_options, m)); 865 866 case IP_TOS: 867 case IP_TTL: 868 case IP_MINTTL: 869 case IP_RECVOPTS: 870 case IP_RECVRETOPTS: 871 case IP_RECVDSTADDR: 872 case IP_RECVIF: 873 case IP_RECVTTL: 874 case IP_RECVDSTPORT: 875 case IP_RECVRTABLE: 876 case IP_IPSECFLOWINFO: 877 if (m == NULL || m->m_len != sizeof(int)) 878 error = EINVAL; 879 else { 880 optval = *mtod(m, int *); 881 switch (optname) { 882 883 case IP_TOS: 884 inp->inp_ip.ip_tos = optval; 885 break; 886 887 case IP_TTL: 888 if (optval > 0 && optval <= MAXTTL) 889 inp->inp_ip.ip_ttl = optval; 890 else if (optval == -1) 891 inp->inp_ip.ip_ttl = ip_defttl; 892 else 893 error = EINVAL; 894 break; 895 896 case IP_MINTTL: 897 if (optval >= 0 && optval <= MAXTTL) 898 inp->inp_ip_minttl = optval; 899 else 900 error = EINVAL; 901 break; 902 #define OPTSET(bit) \ 903 if (optval) \ 904 inp->inp_flags |= bit; \ 905 else \ 906 inp->inp_flags &= ~bit; 907 908 case IP_RECVOPTS: 909 OPTSET(INP_RECVOPTS); 910 break; 911 912 case IP_RECVRETOPTS: 913 OPTSET(INP_RECVRETOPTS); 914 break; 915 916 case IP_RECVDSTADDR: 917 OPTSET(INP_RECVDSTADDR); 918 break; 919 case IP_RECVIF: 920 OPTSET(INP_RECVIF); 921 break; 922 case IP_RECVTTL: 923 OPTSET(INP_RECVTTL); 924 break; 925 case IP_RECVDSTPORT: 926 OPTSET(INP_RECVDSTPORT); 927 break; 928 case IP_RECVRTABLE: 929 OPTSET(INP_RECVRTABLE); 930 break; 931 case IP_IPSECFLOWINFO: 932 OPTSET(INP_IPSECFLOWINFO); 933 break; 934 } 935 } 936 break; 937 #undef OPTSET 938 939 case IP_MULTICAST_IF: 940 case IP_MULTICAST_TTL: 941 case IP_MULTICAST_LOOP: 942 case IP_ADD_MEMBERSHIP: 943 case IP_DROP_MEMBERSHIP: 944 error = ip_setmoptions(optname, &inp->inp_moptions, m, 945 inp->inp_rtableid); 946 break; 947 948 case IP_PORTRANGE: 949 if (m == NULL || m->m_len != sizeof(int)) 950 error = EINVAL; 951 else { 952 optval = *mtod(m, int *); 953 954 switch (optval) { 955 956 case IP_PORTRANGE_DEFAULT: 957 inp->inp_flags &= ~(INP_LOWPORT); 958 inp->inp_flags &= ~(INP_HIGHPORT); 959 break; 960 961 case IP_PORTRANGE_HIGH: 962 inp->inp_flags &= ~(INP_LOWPORT); 963 inp->inp_flags |= INP_HIGHPORT; 964 break; 965 966 case IP_PORTRANGE_LOW: 967 inp->inp_flags &= ~(INP_HIGHPORT); 968 inp->inp_flags |= INP_LOWPORT; 969 break; 970 971 default: 972 973 error = EINVAL; 974 break; 975 } 976 } 977 break; 978 case IP_AUTH_LEVEL: 979 case IP_ESP_TRANS_LEVEL: 980 case IP_ESP_NETWORK_LEVEL: 981 case IP_IPCOMP_LEVEL: 982 #ifndef IPSEC 983 error = EOPNOTSUPP; 984 #else 985 if (m == NULL || m->m_len != sizeof(int)) { 986 error = EINVAL; 987 break; 988 } 989 optval = *mtod(m, int *); 990 991 if (optval < IPSEC_LEVEL_BYPASS || 992 optval > IPSEC_LEVEL_UNIQUE) { 993 error = EINVAL; 994 break; 995 } 996 997 switch (optname) { 998 case IP_AUTH_LEVEL: 999 if (optval < IPSEC_AUTH_LEVEL_DEFAULT && 1000 suser(p, 0)) { 1001 error = EACCES; 1002 break; 1003 } 1004 inp->inp_seclevel[SL_AUTH] = optval; 1005 break; 1006 1007 case IP_ESP_TRANS_LEVEL: 1008 if (optval < IPSEC_ESP_TRANS_LEVEL_DEFAULT && 1009 suser(p, 0)) { 1010 error = EACCES; 1011 break; 1012 } 1013 inp->inp_seclevel[SL_ESP_TRANS] = optval; 1014 break; 1015 1016 case IP_ESP_NETWORK_LEVEL: 1017 if (optval < IPSEC_ESP_NETWORK_LEVEL_DEFAULT && 1018 suser(p, 0)) { 1019 error = EACCES; 1020 break; 1021 } 1022 inp->inp_seclevel[SL_ESP_NETWORK] = optval; 1023 break; 1024 case IP_IPCOMP_LEVEL: 1025 if (optval < IPSEC_IPCOMP_LEVEL_DEFAULT && 1026 suser(p, 0)) { 1027 error = EACCES; 1028 break; 1029 } 1030 inp->inp_seclevel[SL_IPCOMP] = optval; 1031 break; 1032 } 1033 #endif 1034 break; 1035 1036 case IP_IPSEC_LOCAL_ID: 1037 case IP_IPSEC_REMOTE_ID: 1038 error = EOPNOTSUPP; 1039 break; 1040 case SO_RTABLE: 1041 if (m == NULL || m->m_len < sizeof(u_int)) { 1042 error = EINVAL; 1043 break; 1044 } 1045 rtid = *mtod(m, u_int *); 1046 if (inp->inp_rtableid == rtid) 1047 break; 1048 /* needs privileges to switch when already set */ 1049 if (p->p_p->ps_rtableid != rtid && 1050 p->p_p->ps_rtableid != 0 && 1051 (error = suser(p, 0)) != 0) 1052 break; 1053 /* table must exist */ 1054 if (!rtable_exists(rtid)) { 1055 error = EINVAL; 1056 break; 1057 } 1058 if (inp->inp_lport) { 1059 error = EBUSY; 1060 break; 1061 } 1062 inp->inp_rtableid = rtid; 1063 in_pcbrehash(inp); 1064 break; 1065 case IP_PIPEX: 1066 if (m != NULL && m->m_len == sizeof(int)) 1067 inp->inp_pipex = *mtod(m, int *); 1068 else 1069 error = EINVAL; 1070 break; 1071 1072 default: 1073 error = ENOPROTOOPT; 1074 break; 1075 } 1076 m_free(m); 1077 break; 1078 1079 case PRCO_GETOPT: 1080 switch (optname) { 1081 case IP_OPTIONS: 1082 case IP_RETOPTS: 1083 if (inp->inp_options) { 1084 m->m_len = inp->inp_options->m_len; 1085 memcpy(mtod(m, caddr_t), 1086 mtod(inp->inp_options, caddr_t), m->m_len); 1087 } else 1088 m->m_len = 0; 1089 break; 1090 1091 case IP_TOS: 1092 case IP_TTL: 1093 case IP_MINTTL: 1094 case IP_RECVOPTS: 1095 case IP_RECVRETOPTS: 1096 case IP_RECVDSTADDR: 1097 case IP_RECVIF: 1098 case IP_RECVTTL: 1099 case IP_RECVDSTPORT: 1100 case IP_RECVRTABLE: 1101 case IP_IPSECFLOWINFO: 1102 case IP_IPDEFTTL: 1103 m->m_len = sizeof(int); 1104 switch (optname) { 1105 1106 case IP_TOS: 1107 optval = inp->inp_ip.ip_tos; 1108 break; 1109 1110 case IP_TTL: 1111 optval = inp->inp_ip.ip_ttl; 1112 break; 1113 1114 case IP_MINTTL: 1115 optval = inp->inp_ip_minttl; 1116 break; 1117 1118 case IP_IPDEFTTL: 1119 optval = ip_defttl; 1120 break; 1121 1122 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1123 1124 case IP_RECVOPTS: 1125 optval = OPTBIT(INP_RECVOPTS); 1126 break; 1127 1128 case IP_RECVRETOPTS: 1129 optval = OPTBIT(INP_RECVRETOPTS); 1130 break; 1131 1132 case IP_RECVDSTADDR: 1133 optval = OPTBIT(INP_RECVDSTADDR); 1134 break; 1135 case IP_RECVIF: 1136 optval = OPTBIT(INP_RECVIF); 1137 break; 1138 case IP_RECVTTL: 1139 optval = OPTBIT(INP_RECVTTL); 1140 break; 1141 case IP_RECVDSTPORT: 1142 optval = OPTBIT(INP_RECVDSTPORT); 1143 break; 1144 case IP_RECVRTABLE: 1145 optval = OPTBIT(INP_RECVRTABLE); 1146 break; 1147 case IP_IPSECFLOWINFO: 1148 optval = OPTBIT(INP_IPSECFLOWINFO); 1149 break; 1150 } 1151 *mtod(m, int *) = optval; 1152 break; 1153 1154 case IP_MULTICAST_IF: 1155 case IP_MULTICAST_TTL: 1156 case IP_MULTICAST_LOOP: 1157 case IP_ADD_MEMBERSHIP: 1158 case IP_DROP_MEMBERSHIP: 1159 error = ip_getmoptions(optname, inp->inp_moptions, m); 1160 break; 1161 1162 case IP_PORTRANGE: 1163 m->m_len = sizeof(int); 1164 1165 if (inp->inp_flags & INP_HIGHPORT) 1166 optval = IP_PORTRANGE_HIGH; 1167 else if (inp->inp_flags & INP_LOWPORT) 1168 optval = IP_PORTRANGE_LOW; 1169 else 1170 optval = 0; 1171 1172 *mtod(m, int *) = optval; 1173 break; 1174 1175 case IP_AUTH_LEVEL: 1176 case IP_ESP_TRANS_LEVEL: 1177 case IP_ESP_NETWORK_LEVEL: 1178 case IP_IPCOMP_LEVEL: 1179 #ifndef IPSEC 1180 m->m_len = sizeof(int); 1181 *mtod(m, int *) = IPSEC_LEVEL_NONE; 1182 #else 1183 m->m_len = sizeof(int); 1184 switch (optname) { 1185 case IP_AUTH_LEVEL: 1186 optval = inp->inp_seclevel[SL_AUTH]; 1187 break; 1188 1189 case IP_ESP_TRANS_LEVEL: 1190 optval = inp->inp_seclevel[SL_ESP_TRANS]; 1191 break; 1192 1193 case IP_ESP_NETWORK_LEVEL: 1194 optval = inp->inp_seclevel[SL_ESP_NETWORK]; 1195 break; 1196 case IP_IPCOMP_LEVEL: 1197 optval = inp->inp_seclevel[SL_IPCOMP]; 1198 break; 1199 } 1200 *mtod(m, int *) = optval; 1201 #endif 1202 break; 1203 case IP_IPSEC_LOCAL_ID: 1204 case IP_IPSEC_REMOTE_ID: 1205 error = EOPNOTSUPP; 1206 break; 1207 case SO_RTABLE: 1208 m->m_len = sizeof(u_int); 1209 *mtod(m, u_int *) = inp->inp_rtableid; 1210 break; 1211 case IP_PIPEX: 1212 m->m_len = sizeof(int); 1213 *mtod(m, int *) = inp->inp_pipex; 1214 break; 1215 default: 1216 error = ENOPROTOOPT; 1217 break; 1218 } 1219 break; 1220 } 1221 return (error); 1222 } 1223 1224 /* 1225 * Set up IP options in pcb for insertion in output packets. 1226 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1227 * with destination address if source routed. 1228 */ 1229 int 1230 ip_pcbopts(struct mbuf **pcbopt, struct mbuf *m) 1231 { 1232 int cnt, optlen; 1233 u_char *cp; 1234 u_char opt; 1235 1236 /* turn off any old options */ 1237 m_free(*pcbopt); 1238 *pcbopt = 0; 1239 if (m == NULL || m->m_len == 0) { 1240 /* 1241 * Only turning off any previous options. 1242 */ 1243 m_free(m); 1244 return (0); 1245 } 1246 1247 if (m->m_len % sizeof(int32_t)) 1248 goto bad; 1249 1250 /* 1251 * IP first-hop destination address will be stored before 1252 * actual options; move other options back 1253 * and clear it when none present. 1254 */ 1255 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1256 goto bad; 1257 cnt = m->m_len; 1258 m->m_len += sizeof(struct in_addr); 1259 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1260 memmove((caddr_t)cp, mtod(m, caddr_t), (unsigned)cnt); 1261 memset(mtod(m, caddr_t), 0, sizeof(struct in_addr)); 1262 1263 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1264 opt = cp[IPOPT_OPTVAL]; 1265 if (opt == IPOPT_EOL) 1266 break; 1267 if (opt == IPOPT_NOP) 1268 optlen = 1; 1269 else { 1270 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1271 goto bad; 1272 optlen = cp[IPOPT_OLEN]; 1273 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1274 goto bad; 1275 } 1276 switch (opt) { 1277 1278 default: 1279 break; 1280 1281 case IPOPT_LSRR: 1282 case IPOPT_SSRR: 1283 /* 1284 * user process specifies route as: 1285 * ->A->B->C->D 1286 * D must be our final destination (but we can't 1287 * check that since we may not have connected yet). 1288 * A is first hop destination, which doesn't appear in 1289 * actual IP option, but is stored before the options. 1290 */ 1291 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1292 goto bad; 1293 m->m_len -= sizeof(struct in_addr); 1294 cnt -= sizeof(struct in_addr); 1295 optlen -= sizeof(struct in_addr); 1296 cp[IPOPT_OLEN] = optlen; 1297 /* 1298 * Move first hop before start of options. 1299 */ 1300 memcpy(mtod(m, caddr_t), &cp[IPOPT_OFFSET+1], 1301 sizeof(struct in_addr)); 1302 /* 1303 * Then copy rest of options back 1304 * to close up the deleted entry. 1305 */ 1306 memmove((caddr_t)&cp[IPOPT_OFFSET+1], 1307 (caddr_t)(&cp[IPOPT_OFFSET+1] + 1308 sizeof(struct in_addr)), 1309 (unsigned)cnt - (IPOPT_OFFSET+1)); 1310 break; 1311 } 1312 } 1313 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1314 goto bad; 1315 *pcbopt = m; 1316 return (0); 1317 1318 bad: 1319 (void)m_free(m); 1320 return (EINVAL); 1321 } 1322 1323 /* 1324 * Set the IP multicast options in response to user setsockopt(). 1325 */ 1326 int 1327 ip_setmoptions(int optname, struct ip_moptions **imop, struct mbuf *m, 1328 u_int rtableid) 1329 { 1330 struct in_addr addr; 1331 struct in_ifaddr *ia; 1332 struct ip_mreq *mreq; 1333 struct ifnet *ifp = NULL; 1334 struct ip_moptions *imo = *imop; 1335 struct in_multi **immp; 1336 struct rtentry *rt; 1337 struct sockaddr_in sin; 1338 int i, error = 0; 1339 u_char loop; 1340 1341 if (imo == NULL) { 1342 /* 1343 * No multicast option buffer attached to the pcb; 1344 * allocate one and initialize to default values. 1345 */ 1346 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK|M_ZERO); 1347 immp = (struct in_multi **)malloc( 1348 (sizeof(*immp) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 1349 M_WAITOK|M_ZERO); 1350 *imop = imo; 1351 imo->imo_ifidx = 0; 1352 imo->imo_ttl = IP_DEFAULT_MULTICAST_TTL; 1353 imo->imo_loop = IP_DEFAULT_MULTICAST_LOOP; 1354 imo->imo_num_memberships = 0; 1355 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1356 imo->imo_membership = immp; 1357 } 1358 1359 switch (optname) { 1360 1361 case IP_MULTICAST_IF: 1362 /* 1363 * Select the interface for outgoing multicast packets. 1364 */ 1365 if (m == NULL || m->m_len != sizeof(struct in_addr)) { 1366 error = EINVAL; 1367 break; 1368 } 1369 addr = *(mtod(m, struct in_addr *)); 1370 /* 1371 * INADDR_ANY is used to remove a previous selection. 1372 * When no interface is selected, a default one is 1373 * chosen every time a multicast packet is sent. 1374 */ 1375 if (addr.s_addr == INADDR_ANY) { 1376 imo->imo_ifidx = 0; 1377 break; 1378 } 1379 /* 1380 * The selected interface is identified by its local 1381 * IP address. Find the interface and confirm that 1382 * it supports multicasting. 1383 */ 1384 memset(&sin, 0, sizeof(sin)); 1385 sin.sin_len = sizeof(sin); 1386 sin.sin_family = AF_INET; 1387 sin.sin_addr = addr; 1388 ia = ifatoia(ifa_ifwithaddr(sintosa(&sin), rtableid)); 1389 if (ia == NULL || 1390 (ia->ia_ifp->if_flags & IFF_MULTICAST) == 0) { 1391 error = EADDRNOTAVAIL; 1392 break; 1393 } 1394 imo->imo_ifidx = ia->ia_ifp->if_index; 1395 break; 1396 1397 case IP_MULTICAST_TTL: 1398 /* 1399 * Set the IP time-to-live for outgoing multicast packets. 1400 */ 1401 if (m == NULL || m->m_len != 1) { 1402 error = EINVAL; 1403 break; 1404 } 1405 imo->imo_ttl = *(mtod(m, u_char *)); 1406 break; 1407 1408 case IP_MULTICAST_LOOP: 1409 /* 1410 * Set the loopback flag for outgoing multicast packets. 1411 * Must be zero or one. 1412 */ 1413 if (m == NULL || m->m_len != 1 || 1414 (loop = *(mtod(m, u_char *))) > 1) { 1415 error = EINVAL; 1416 break; 1417 } 1418 imo->imo_loop = loop; 1419 break; 1420 1421 case IP_ADD_MEMBERSHIP: 1422 /* 1423 * Add a multicast group membership. 1424 * Group must be a valid IP multicast address. 1425 */ 1426 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1427 error = EINVAL; 1428 break; 1429 } 1430 mreq = mtod(m, struct ip_mreq *); 1431 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1432 error = EINVAL; 1433 break; 1434 } 1435 /* 1436 * If no interface address was provided, use the interface of 1437 * the route to the given multicast address. 1438 */ 1439 if (mreq->imr_interface.s_addr == INADDR_ANY) { 1440 memset(&sin, 0, sizeof(sin)); 1441 sin.sin_len = sizeof(sin); 1442 sin.sin_family = AF_INET; 1443 sin.sin_addr = mreq->imr_multiaddr; 1444 rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid); 1445 if (!rtisvalid(rt)) { 1446 rtfree(rt); 1447 error = EADDRNOTAVAIL; 1448 break; 1449 } 1450 } else { 1451 memset(&sin, 0, sizeof(sin)); 1452 sin.sin_len = sizeof(sin); 1453 sin.sin_family = AF_INET; 1454 sin.sin_addr = mreq->imr_interface; 1455 rt = rtalloc(sintosa(&sin), 0, rtableid); 1456 if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) { 1457 rtfree(rt); 1458 error = EADDRNOTAVAIL; 1459 break; 1460 } 1461 } 1462 ifp = if_get(rt->rt_ifidx); 1463 rtfree(rt); 1464 1465 /* 1466 * See if we found an interface, and confirm that it 1467 * supports multicast. 1468 */ 1469 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1470 error = EADDRNOTAVAIL; 1471 if_put(ifp); 1472 break; 1473 } 1474 /* 1475 * See if the membership already exists or if all the 1476 * membership slots are full. 1477 */ 1478 for (i = 0; i < imo->imo_num_memberships; ++i) { 1479 if (imo->imo_membership[i]->inm_ifidx 1480 == ifp->if_index && 1481 imo->imo_membership[i]->inm_addr.s_addr 1482 == mreq->imr_multiaddr.s_addr) 1483 break; 1484 } 1485 if (i < imo->imo_num_memberships) { 1486 error = EADDRINUSE; 1487 if_put(ifp); 1488 break; 1489 } 1490 if (imo->imo_num_memberships == imo->imo_max_memberships) { 1491 struct in_multi **nmships, **omships; 1492 size_t newmax; 1493 /* 1494 * Resize the vector to next power-of-two minus 1. If the 1495 * size would exceed the maximum then we know we've really 1496 * run out of entries. Otherwise, we reallocate the vector. 1497 */ 1498 nmships = NULL; 1499 omships = imo->imo_membership; 1500 newmax = ((imo->imo_max_memberships + 1) * 2) - 1; 1501 if (newmax <= IP_MAX_MEMBERSHIPS) { 1502 nmships = (struct in_multi **)malloc( 1503 sizeof(*nmships) * newmax, M_IPMOPTS, 1504 M_NOWAIT|M_ZERO); 1505 if (nmships != NULL) { 1506 memcpy(nmships, omships, 1507 sizeof(*omships) * 1508 imo->imo_max_memberships); 1509 free(omships, M_IPMOPTS, 1510 sizeof(*omships) * 1511 imo->imo_max_memberships); 1512 imo->imo_membership = nmships; 1513 imo->imo_max_memberships = newmax; 1514 } 1515 } 1516 if (nmships == NULL) { 1517 error = ENOBUFS; 1518 if_put(ifp); 1519 break; 1520 } 1521 } 1522 /* 1523 * Everything looks good; add a new record to the multicast 1524 * address list for the given interface. 1525 */ 1526 if ((imo->imo_membership[i] = 1527 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) { 1528 error = ENOBUFS; 1529 if_put(ifp); 1530 break; 1531 } 1532 ++imo->imo_num_memberships; 1533 if_put(ifp); 1534 break; 1535 1536 case IP_DROP_MEMBERSHIP: 1537 /* 1538 * Drop a multicast group membership. 1539 * Group must be a valid IP multicast address. 1540 */ 1541 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1542 error = EINVAL; 1543 break; 1544 } 1545 mreq = mtod(m, struct ip_mreq *); 1546 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1547 error = EINVAL; 1548 break; 1549 } 1550 /* 1551 * If an interface address was specified, get a pointer 1552 * to its ifnet structure. 1553 */ 1554 if (mreq->imr_interface.s_addr == INADDR_ANY) 1555 ifp = NULL; 1556 else { 1557 memset(&sin, 0, sizeof(sin)); 1558 sin.sin_len = sizeof(sin); 1559 sin.sin_family = AF_INET; 1560 sin.sin_addr = mreq->imr_interface; 1561 ia = ifatoia(ifa_ifwithaddr(sintosa(&sin), rtableid)); 1562 if (ia == NULL) { 1563 error = EADDRNOTAVAIL; 1564 break; 1565 } 1566 ifp = ia->ia_ifp; 1567 } 1568 /* 1569 * Find the membership in the membership array. 1570 */ 1571 for (i = 0; i < imo->imo_num_memberships; ++i) { 1572 if ((ifp == NULL || 1573 imo->imo_membership[i]->inm_ifidx == 1574 ifp->if_index) && 1575 imo->imo_membership[i]->inm_addr.s_addr == 1576 mreq->imr_multiaddr.s_addr) 1577 break; 1578 } 1579 if (i == imo->imo_num_memberships) { 1580 error = EADDRNOTAVAIL; 1581 break; 1582 } 1583 /* 1584 * Give up the multicast address record to which the 1585 * membership points. 1586 */ 1587 in_delmulti(imo->imo_membership[i]); 1588 /* 1589 * Remove the gap in the membership array. 1590 */ 1591 for (++i; i < imo->imo_num_memberships; ++i) 1592 imo->imo_membership[i-1] = imo->imo_membership[i]; 1593 --imo->imo_num_memberships; 1594 break; 1595 1596 default: 1597 error = EOPNOTSUPP; 1598 break; 1599 } 1600 1601 /* 1602 * If all options have default values, no need to keep the data. 1603 */ 1604 if (imo->imo_ifidx == 0 && 1605 imo->imo_ttl == IP_DEFAULT_MULTICAST_TTL && 1606 imo->imo_loop == IP_DEFAULT_MULTICAST_LOOP && 1607 imo->imo_num_memberships == 0) { 1608 free(imo->imo_membership , M_IPMOPTS, 0); 1609 free(*imop, M_IPMOPTS, sizeof(**imop)); 1610 *imop = NULL; 1611 } 1612 1613 return (error); 1614 } 1615 1616 /* 1617 * Return the IP multicast options in response to user getsockopt(). 1618 */ 1619 int 1620 ip_getmoptions(int optname, struct ip_moptions *imo, struct mbuf *m) 1621 { 1622 u_char *ttl; 1623 u_char *loop; 1624 struct in_addr *addr; 1625 struct in_ifaddr *ia; 1626 struct ifnet *ifp; 1627 1628 switch (optname) { 1629 1630 case IP_MULTICAST_IF: 1631 addr = mtod(m, struct in_addr *); 1632 m->m_len = sizeof(struct in_addr); 1633 if (imo == NULL || (ifp = if_get(imo->imo_ifidx)) == NULL) 1634 addr->s_addr = INADDR_ANY; 1635 else { 1636 IFP_TO_IA(ifp, ia); 1637 if_put(ifp); 1638 addr->s_addr = (ia == NULL) ? INADDR_ANY 1639 : ia->ia_addr.sin_addr.s_addr; 1640 } 1641 return (0); 1642 1643 case IP_MULTICAST_TTL: 1644 ttl = mtod(m, u_char *); 1645 m->m_len = 1; 1646 *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL 1647 : imo->imo_ttl; 1648 return (0); 1649 1650 case IP_MULTICAST_LOOP: 1651 loop = mtod(m, u_char *); 1652 m->m_len = 1; 1653 *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP 1654 : imo->imo_loop; 1655 return (0); 1656 1657 default: 1658 return (EOPNOTSUPP); 1659 } 1660 } 1661 1662 /* 1663 * Discard the IP multicast options. 1664 */ 1665 void 1666 ip_freemoptions(struct ip_moptions *imo) 1667 { 1668 int i; 1669 1670 if (imo != NULL) { 1671 for (i = 0; i < imo->imo_num_memberships; ++i) 1672 in_delmulti(imo->imo_membership[i]); 1673 free(imo->imo_membership, M_IPMOPTS, 0); 1674 free(imo, M_IPMOPTS, sizeof(*imo)); 1675 } 1676 } 1677 1678 /* 1679 * Routine called from ip_output() to loop back a copy of an IP multicast 1680 * packet to the input queue of a specified interface. 1681 */ 1682 void 1683 ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst) 1684 { 1685 struct ip *ip; 1686 struct mbuf *copym; 1687 1688 copym = m_dup_pkt(m, max_linkhdr, M_DONTWAIT); 1689 if (copym != NULL) { 1690 /* 1691 * We don't bother to fragment if the IP length is greater 1692 * than the interface's MTU. Can this possibly matter? 1693 */ 1694 ip = mtod(copym, struct ip *); 1695 ip->ip_sum = 0; 1696 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2); 1697 if_input_local(ifp, copym, dst->sin_family); 1698 } 1699 } 1700 1701 /* 1702 * Compute significant parts of the IPv4 checksum pseudo-header 1703 * for use in a delayed TCP/UDP checksum calculation. 1704 */ 1705 static __inline u_int16_t __attribute__((__unused__)) 1706 in_cksum_phdr(u_int32_t src, u_int32_t dst, u_int32_t lenproto) 1707 { 1708 u_int32_t sum; 1709 1710 sum = lenproto + 1711 (u_int16_t)(src >> 16) + 1712 (u_int16_t)(src /*& 0xffff*/) + 1713 (u_int16_t)(dst >> 16) + 1714 (u_int16_t)(dst /*& 0xffff*/); 1715 1716 sum = (u_int16_t)(sum >> 16) + (u_int16_t)(sum /*& 0xffff*/); 1717 1718 if (sum > 0xffff) 1719 sum -= 0xffff; 1720 1721 return (sum); 1722 } 1723 1724 /* 1725 * Process a delayed payload checksum calculation. 1726 */ 1727 void 1728 in_delayed_cksum(struct mbuf *m) 1729 { 1730 struct ip *ip; 1731 u_int16_t csum, offset; 1732 1733 ip = mtod(m, struct ip *); 1734 offset = ip->ip_hl << 2; 1735 csum = in4_cksum(m, 0, offset, m->m_pkthdr.len - offset); 1736 if (csum == 0 && ip->ip_p == IPPROTO_UDP) 1737 csum = 0xffff; 1738 1739 switch (ip->ip_p) { 1740 case IPPROTO_TCP: 1741 offset += offsetof(struct tcphdr, th_sum); 1742 break; 1743 1744 case IPPROTO_UDP: 1745 offset += offsetof(struct udphdr, uh_sum); 1746 break; 1747 1748 case IPPROTO_ICMP: 1749 offset += offsetof(struct icmp, icmp_cksum); 1750 break; 1751 1752 default: 1753 return; 1754 } 1755 1756 if ((offset + sizeof(u_int16_t)) > m->m_len) 1757 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT); 1758 else 1759 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 1760 } 1761 1762 void 1763 in_proto_cksum_out(struct mbuf *m, struct ifnet *ifp) 1764 { 1765 struct ip *ip = mtod(m, struct ip *); 1766 1767 /* some hw and in_delayed_cksum need the pseudo header cksum */ 1768 if (m->m_pkthdr.csum_flags & 1769 (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_ICMP_CSUM_OUT)) { 1770 u_int16_t csum = 0, offset; 1771 1772 offset = ip->ip_hl << 2; 1773 if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) 1774 csum = in_cksum_phdr(ip->ip_src.s_addr, 1775 ip->ip_dst.s_addr, htonl(ntohs(ip->ip_len) - 1776 offset + ip->ip_p)); 1777 if (ip->ip_p == IPPROTO_TCP) 1778 offset += offsetof(struct tcphdr, th_sum); 1779 else if (ip->ip_p == IPPROTO_UDP) 1780 offset += offsetof(struct udphdr, uh_sum); 1781 else if (ip->ip_p == IPPROTO_ICMP) 1782 offset += offsetof(struct icmp, icmp_cksum); 1783 if ((offset + sizeof(u_int16_t)) > m->m_len) 1784 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT); 1785 else 1786 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 1787 } 1788 1789 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) { 1790 if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || 1791 ip->ip_hl != 5 || ifp->if_bridgeport != NULL) { 1792 tcpstat_inc(tcps_outswcsum); 1793 in_delayed_cksum(m); 1794 m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */ 1795 } 1796 } else if (m->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) { 1797 if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_UDPv4) || 1798 ip->ip_hl != 5 || ifp->if_bridgeport != NULL) { 1799 udpstat_inc(udps_outswcsum); 1800 in_delayed_cksum(m); 1801 m->m_pkthdr.csum_flags &= ~M_UDP_CSUM_OUT; /* Clear */ 1802 } 1803 } else if (m->m_pkthdr.csum_flags & M_ICMP_CSUM_OUT) { 1804 in_delayed_cksum(m); 1805 m->m_pkthdr.csum_flags &= ~M_ICMP_CSUM_OUT; /* Clear */ 1806 } 1807 } 1808