1 /* $OpenBSD: ip_output.c,v 1.325 2016/07/01 18:28:58 jca Exp $ */ 2 /* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 33 */ 34 35 #include "pf.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/mbuf.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/proc.h> 44 #include <sys/kernel.h> 45 46 #include <net/if.h> 47 #include <net/if_var.h> 48 #include <net/if_enc.h> 49 #include <net/route.h> 50 51 #include <netinet/in.h> 52 #include <netinet/ip.h> 53 #include <netinet/in_pcb.h> 54 #include <netinet/in_var.h> 55 #include <netinet/ip_var.h> 56 #include <netinet/ip_icmp.h> 57 #include <netinet/tcp.h> 58 #include <netinet/udp.h> 59 #include <netinet/tcp_timer.h> 60 #include <netinet/tcp_var.h> 61 #include <netinet/udp_var.h> 62 63 #if NPF > 0 64 #include <net/pfvar.h> 65 #endif 66 67 #ifdef IPSEC 68 #ifdef ENCDEBUG 69 #define DPRINTF(x) do { if (encdebug) printf x ; } while (0) 70 #else 71 #define DPRINTF(x) 72 #endif 73 #endif /* IPSEC */ 74 75 void ip_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in *); 76 static __inline u_int16_t __attribute__((__unused__)) 77 in_cksum_phdr(u_int32_t, u_int32_t, u_int32_t); 78 void in_delayed_cksum(struct mbuf *); 79 80 #ifdef IPSEC 81 struct tdb * 82 ip_output_ipsec_lookup(struct mbuf *m, int hlen, int *error, struct inpcb *inp, 83 int ipsecflowinfo); 84 int 85 ip_output_ipsec_send(struct tdb *tdb, struct mbuf *m, struct ifnet *ifp, 86 struct route *ro); 87 #endif /* IPSEC */ 88 89 /* 90 * IP output. The packet in mbuf chain m contains a skeletal IP 91 * header (with len, off, ttl, proto, tos, src, dst). 92 * The mbuf chain containing the packet will be freed. 93 * The mbuf opt, if present, will not be freed. 94 */ 95 int 96 ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags, 97 struct ip_moptions *imo, struct inpcb *inp, u_int32_t ipsecflowinfo) 98 { 99 struct ip *ip; 100 struct ifnet *ifp = NULL; 101 struct mbuf *m = m0; 102 int hlen = sizeof (struct ip); 103 int len, error = 0; 104 struct route iproute; 105 struct sockaddr_in *dst; 106 struct tdb *tdb = NULL; 107 u_long mtu; 108 #if defined(MROUTING) 109 int rv; 110 #endif 111 112 #ifdef IPSEC 113 if (inp && (inp->inp_flags & INP_IPV6) != 0) 114 panic("ip_output: IPv6 pcb is passed"); 115 #endif /* IPSEC */ 116 117 #ifdef DIAGNOSTIC 118 if ((m->m_flags & M_PKTHDR) == 0) 119 panic("ip_output no HDR"); 120 #endif 121 if (opt) { 122 m = ip_insertoptions(m, opt, &len); 123 hlen = len; 124 } 125 126 ip = mtod(m, struct ip *); 127 128 /* 129 * Fill in IP header. 130 */ 131 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 132 ip->ip_v = IPVERSION; 133 ip->ip_off &= htons(IP_DF); 134 ip->ip_id = htons(ip_randomid()); 135 ip->ip_hl = hlen >> 2; 136 ipstat.ips_localout++; 137 } else { 138 hlen = ip->ip_hl << 2; 139 } 140 141 /* 142 * We should not send traffic to 0/8 say both Stevens and RFCs 143 * 5735 section 3 and 1122 sections 3.2.1.3 and 3.3.6. 144 */ 145 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == 0) { 146 error = ENETUNREACH; 147 goto bad; 148 } 149 150 #if NPF > 0 151 reroute: 152 #endif 153 154 /* 155 * Do a route lookup now in case we need the source address to 156 * do an SPD lookup in IPsec; for most packets, the source address 157 * is set at a higher level protocol. ICMPs and other packets 158 * though (e.g., traceroute) have a source address of zeroes. 159 */ 160 if (ro == NULL) { 161 ro = &iproute; 162 memset(ro, 0, sizeof(*ro)); 163 } 164 165 dst = satosin(&ro->ro_dst); 166 167 /* 168 * If there is a cached route, check that it is to the same 169 * destination and is still up. If not, free it and try again. 170 */ 171 if (!rtisvalid(ro->ro_rt) || 172 dst->sin_addr.s_addr != ip->ip_dst.s_addr || 173 ro->ro_tableid != m->m_pkthdr.ph_rtableid) { 174 rtfree(ro->ro_rt); 175 ro->ro_rt = NULL; 176 } 177 178 if (ro->ro_rt == NULL) { 179 dst->sin_family = AF_INET; 180 dst->sin_len = sizeof(*dst); 181 dst->sin_addr = ip->ip_dst; 182 ro->ro_tableid = m->m_pkthdr.ph_rtableid; 183 } 184 185 if ((IN_MULTICAST(ip->ip_dst.s_addr) || 186 (ip->ip_dst.s_addr == INADDR_BROADCAST)) && 187 imo != NULL && (ifp = if_get(imo->imo_ifidx)) != NULL) { 188 189 mtu = ifp->if_mtu; 190 if (ip->ip_src.s_addr == INADDR_ANY) { 191 struct in_ifaddr *ia; 192 193 KERNEL_LOCK(); 194 IFP_TO_IA(ifp, ia); 195 if (ia != NULL) 196 ip->ip_src = ia->ia_addr.sin_addr; 197 KERNEL_UNLOCK(); 198 } 199 } else { 200 struct in_ifaddr *ia; 201 202 if (ro->ro_rt == NULL) 203 ro->ro_rt = rtalloc_mpath(&ro->ro_dst, 204 &ip->ip_src.s_addr, ro->ro_tableid); 205 206 if (ro->ro_rt == NULL) { 207 ipstat.ips_noroute++; 208 error = EHOSTUNREACH; 209 goto bad; 210 } 211 212 ia = ifatoia(ro->ro_rt->rt_ifa); 213 if (ISSET(ro->ro_rt->rt_flags, RTF_LOCAL)) 214 ifp = if_get(lo0ifidx); 215 else 216 ifp = if_get(ro->ro_rt->rt_ifidx); 217 if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0) 218 mtu = ifp->if_mtu; 219 220 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 221 dst = satosin(ro->ro_rt->rt_gateway); 222 223 /* Set the source IP address */ 224 if (ip->ip_src.s_addr == INADDR_ANY && ia) 225 ip->ip_src = ia->ia_addr.sin_addr; 226 } 227 228 #ifdef IPSEC 229 if (ipsec_in_use || inp != NULL) { 230 KERNEL_LOCK(); 231 /* Do we have any pending SAs to apply ? */ 232 tdb = ip_output_ipsec_lookup(m, hlen, &error, inp, 233 ipsecflowinfo); 234 KERNEL_UNLOCK(); 235 if (error != 0) { 236 /* Should silently drop packet */ 237 if (error == -EINVAL) 238 error = 0; 239 m_freem(m); 240 goto done; 241 } 242 if (tdb != NULL) { 243 /* 244 * If it needs TCP/UDP hardware-checksumming, do the 245 * computation now. 246 */ 247 in_proto_cksum_out(m, NULL); 248 249 /* If it's not a multicast packet, try to fast-path */ 250 if (!IN_MULTICAST(ip->ip_dst.s_addr)) { 251 goto sendit; 252 } 253 } 254 } 255 #endif /* IPSEC */ 256 257 if (IN_MULTICAST(ip->ip_dst.s_addr) || 258 (ip->ip_dst.s_addr == INADDR_BROADCAST)) { 259 260 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ? 261 M_BCAST : M_MCAST; 262 263 /* 264 * IP destination address is multicast. Make sure "dst" 265 * still points to the address in "ro". (It may have been 266 * changed to point to a gateway address, above.) 267 */ 268 dst = satosin(&ro->ro_dst); 269 270 /* 271 * See if the caller provided any multicast options 272 */ 273 if (imo != NULL) 274 ip->ip_ttl = imo->imo_ttl; 275 else 276 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 277 278 /* 279 * if we don't know the outgoing ifp yet, we can't generate 280 * output 281 */ 282 if (!ifp) { 283 ipstat.ips_noroute++; 284 error = EHOSTUNREACH; 285 goto bad; 286 } 287 288 /* 289 * Confirm that the outgoing interface supports multicast, 290 * but only if the packet actually is going out on that 291 * interface (i.e., no IPsec is applied). 292 */ 293 if ((((m->m_flags & M_MCAST) && 294 (ifp->if_flags & IFF_MULTICAST) == 0) || 295 ((m->m_flags & M_BCAST) && 296 (ifp->if_flags & IFF_BROADCAST) == 0)) && (tdb == NULL)) { 297 ipstat.ips_noroute++; 298 error = ENETUNREACH; 299 goto bad; 300 } 301 302 /* 303 * If source address not specified yet, use address 304 * of outgoing interface. 305 */ 306 if (ip->ip_src.s_addr == INADDR_ANY) { 307 struct in_ifaddr *ia; 308 309 KERNEL_LOCK(); 310 IFP_TO_IA(ifp, ia); 311 if (ia != NULL) 312 ip->ip_src = ia->ia_addr.sin_addr; 313 KERNEL_UNLOCK(); 314 } 315 316 if ((imo == NULL || imo->imo_loop) && 317 in_hasmulti(&ip->ip_dst, ifp)) { 318 /* 319 * If we belong to the destination multicast group 320 * on the outgoing interface, and the caller did not 321 * forbid loopback, loop back a copy. 322 * Can't defer TCP/UDP checksumming, do the 323 * computation now. 324 */ 325 in_proto_cksum_out(m, NULL); 326 ip_mloopback(ifp, m, dst); 327 } 328 #ifdef MROUTING 329 else { 330 /* 331 * If we are acting as a multicast router, perform 332 * multicast forwarding as if the packet had just 333 * arrived on the interface to which we are about 334 * to send. The multicast forwarding function 335 * recursively calls this function, using the 336 * IP_FORWARDING flag to prevent infinite recursion. 337 * 338 * Multicasts that are looped back by ip_mloopback(), 339 * above, will be forwarded by the ip_input() routine, 340 * if necessary. 341 */ 342 if (ipmforwarding && ip_mrouter && 343 (flags & IP_FORWARDING) == 0) { 344 KERNEL_LOCK(); 345 rv = ip_mforward(m, ifp); 346 KERNEL_UNLOCK(); 347 if (rv != 0) { 348 m_freem(m); 349 goto done; 350 } 351 } 352 } 353 #endif 354 /* 355 * Multicasts with a time-to-live of zero may be looped- 356 * back, above, but must not be transmitted on a network. 357 * Also, multicasts addressed to the loopback interface 358 * are not sent -- the above call to ip_mloopback() will 359 * loop back a copy if this host actually belongs to the 360 * destination group on the loopback interface. 361 */ 362 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) { 363 m_freem(m); 364 goto done; 365 } 366 367 goto sendit; 368 } 369 370 /* 371 * Look for broadcast address and verify user is allowed to send 372 * such a packet; if the packet is going in an IPsec tunnel, skip 373 * this check. 374 */ 375 if ((tdb == NULL) && ((dst->sin_addr.s_addr == INADDR_BROADCAST) || 376 (ro && ro->ro_rt && ISSET(ro->ro_rt->rt_flags, RTF_BROADCAST)))) { 377 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 378 error = EADDRNOTAVAIL; 379 goto bad; 380 } 381 if ((flags & IP_ALLOWBROADCAST) == 0) { 382 error = EACCES; 383 goto bad; 384 } 385 386 /* Don't allow broadcast messages to be fragmented */ 387 if (ntohs(ip->ip_len) > ifp->if_mtu) { 388 error = EMSGSIZE; 389 goto bad; 390 } 391 m->m_flags |= M_BCAST; 392 } else 393 m->m_flags &= ~M_BCAST; 394 395 sendit: 396 /* 397 * If we're doing Path MTU discovery, we need to set DF unless 398 * the route's MTU is locked. 399 */ 400 if ((flags & IP_MTUDISC) && ro && ro->ro_rt && 401 (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0) 402 ip->ip_off |= htons(IP_DF); 403 404 #ifdef IPSEC 405 /* 406 * Check if the packet needs encapsulation. 407 */ 408 if (tdb != NULL) { 409 KERNEL_LOCK(); 410 /* Callee frees mbuf */ 411 error = ip_output_ipsec_send(tdb, m, ifp, ro); 412 KERNEL_UNLOCK(); 413 goto done; 414 } 415 #endif /* IPSEC */ 416 417 /* 418 * Packet filter 419 */ 420 #if NPF > 0 421 if (pf_test(AF_INET, PF_OUT, ifp, &m) != PF_PASS) { 422 error = EACCES; 423 m_freem(m); 424 goto done; 425 } 426 if (m == NULL) 427 goto done; 428 ip = mtod(m, struct ip *); 429 hlen = ip->ip_hl << 2; 430 if ((m->m_pkthdr.pf.flags & (PF_TAG_REROUTE | PF_TAG_GENERATED)) == 431 (PF_TAG_REROUTE | PF_TAG_GENERATED)) 432 /* already rerun the route lookup, go on */ 433 m->m_pkthdr.pf.flags &= ~(PF_TAG_GENERATED | PF_TAG_REROUTE); 434 else if (m->m_pkthdr.pf.flags & PF_TAG_REROUTE) { 435 /* tag as generated to skip over pf_test on rerun */ 436 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 437 ro = NULL; 438 if_put(ifp); /* drop reference since target changed */ 439 ifp = NULL; 440 goto reroute; 441 } 442 #endif 443 in_proto_cksum_out(m, ifp); 444 445 #ifdef IPSEC 446 if (ipsec_in_use && (flags & IP_FORWARDING) && (ipforwarding == 2) && 447 (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) == NULL)) { 448 error = EHOSTUNREACH; 449 m_freem(m); 450 goto done; 451 } 452 #endif 453 454 /* 455 * If small enough for interface, can just send directly. 456 */ 457 if (ntohs(ip->ip_len) <= mtu) { 458 ip->ip_sum = 0; 459 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && 460 (ifp->if_bridgeport == NULL)) 461 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 462 else { 463 ipstat.ips_outswcsum++; 464 ip->ip_sum = in_cksum(m, hlen); 465 } 466 467 error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); 468 goto done; 469 } 470 471 /* 472 * Too large for interface; fragment if possible. 473 * Must be able to put at least 8 bytes per fragment. 474 */ 475 if (ip->ip_off & htons(IP_DF)) { 476 #ifdef IPSEC 477 if (ip_mtudisc) 478 ipsec_adjust_mtu(m, ifp->if_mtu); 479 #endif 480 error = EMSGSIZE; 481 /* 482 * This case can happen if the user changed the MTU 483 * of an interface after enabling IP on it. Because 484 * most netifs don't keep track of routes pointing to 485 * them, there is no way for one to update all its 486 * routes when the MTU is changed. 487 */ 488 if (rtisvalid(ro->ro_rt) && 489 ISSET(ro->ro_rt->rt_flags, RTF_HOST) && 490 !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) && 491 (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 492 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 493 } 494 ipstat.ips_cantfrag++; 495 goto bad; 496 } 497 498 error = ip_fragment(m, ifp, mtu); 499 if (error) { 500 m = m0 = NULL; 501 goto bad; 502 } 503 504 for (; m; m = m0) { 505 m0 = m->m_nextpkt; 506 m->m_nextpkt = 0; 507 if (error == 0) 508 error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); 509 else 510 m_freem(m); 511 } 512 513 if (error == 0) 514 ipstat.ips_fragmented++; 515 516 done: 517 if (ro == &iproute && ro->ro_rt) 518 rtfree(ro->ro_rt); 519 if_put(ifp); 520 return (error); 521 bad: 522 m_freem(m0); 523 goto done; 524 } 525 526 #ifdef IPSEC 527 struct tdb * 528 ip_output_ipsec_lookup(struct mbuf *m, int hlen, int *error, struct inpcb *inp, 529 int ipsecflowinfo) 530 { 531 struct m_tag *mtag; 532 struct tdb_ident *tdbi; 533 struct tdb *tdb; 534 535 /* Do we have any pending SAs to apply ? */ 536 tdb = ipsp_spd_lookup(m, AF_INET, hlen, error, IPSP_DIRECTION_OUT, 537 NULL, inp, ipsecflowinfo); 538 if (tdb == NULL) 539 return NULL; 540 /* Loop detection */ 541 for (mtag = m_tag_first(m); mtag != NULL; mtag = m_tag_next(m, mtag)) { 542 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE) 543 continue; 544 tdbi = (struct tdb_ident *)(mtag + 1); 545 if (tdbi->spi == tdb->tdb_spi && 546 tdbi->proto == tdb->tdb_sproto && 547 tdbi->rdomain == tdb->tdb_rdomain && 548 !memcmp(&tdbi->dst, &tdb->tdb_dst, 549 sizeof(union sockaddr_union))) { 550 /* no IPsec needed */ 551 return NULL; 552 } 553 } 554 return tdb; 555 } 556 557 int 558 ip_output_ipsec_send(struct tdb *tdb, struct mbuf *m, struct ifnet *ifp, 559 struct route *ro) 560 { 561 #if NPF > 0 562 struct ifnet *encif; 563 #endif 564 struct ip *ip; 565 566 #if NPF > 0 567 /* 568 * Packet filter 569 */ 570 if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL || 571 pf_test(AF_INET, PF_OUT, encif, &m) != PF_PASS) { 572 m_freem(m); 573 return EACCES; 574 } 575 if (m == NULL) 576 return 0; 577 /* 578 * PF_TAG_REROUTE handling or not... 579 * Packet is entering IPsec so the routing is 580 * already overruled by the IPsec policy. 581 * Until now the change was not reconsidered. 582 * What's the behaviour? 583 */ 584 in_proto_cksum_out(m, encif); 585 #endif 586 587 /* Check if we are allowed to fragment */ 588 ip = mtod(m, struct ip *); 589 if (ip_mtudisc && (ip->ip_off & htons(IP_DF)) && tdb->tdb_mtu && 590 ntohs(ip->ip_len) > tdb->tdb_mtu && 591 tdb->tdb_mtutimeout > time_second) { 592 struct rtentry *rt = NULL; 593 int rt_mtucloned = 0; 594 int transportmode = 0; 595 596 transportmode = (tdb->tdb_dst.sa.sa_family == AF_INET) && 597 (tdb->tdb_dst.sin.sin_addr.s_addr == ip->ip_dst.s_addr); 598 599 /* Find a host route to store the mtu in */ 600 if (ro != NULL) 601 rt = ro->ro_rt; 602 /* but don't add a PMTU route for transport mode SAs */ 603 if (transportmode) 604 rt = NULL; 605 else if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) { 606 rt = icmp_mtudisc_clone(ip->ip_dst, 607 m->m_pkthdr.ph_rtableid); 608 rt_mtucloned = 1; 609 } 610 DPRINTF(("%s: spi %08x mtu %d rt %p cloned %d\n", __func__, 611 ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned)); 612 if (rt != NULL) { 613 rt->rt_rmx.rmx_mtu = tdb->tdb_mtu; 614 if (ro && ro->ro_rt != NULL) { 615 rtfree(ro->ro_rt); 616 ro->ro_rt = rtalloc(&ro->ro_dst, RT_RESOLVE, 617 m->m_pkthdr.ph_rtableid); 618 } 619 if (rt_mtucloned) 620 rtfree(rt); 621 } 622 ipsec_adjust_mtu(m, tdb->tdb_mtu); 623 m_freem(m); 624 return EMSGSIZE; 625 } 626 627 /* 628 * Clear these -- they'll be set in the recursive invocation 629 * as needed. 630 */ 631 m->m_flags &= ~(M_MCAST | M_BCAST); 632 633 /* Callee frees mbuf */ 634 return ipsp_process_packet(m, tdb, AF_INET, 0); 635 } 636 #endif /* IPSEC */ 637 638 int 639 ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu) 640 { 641 struct ip *ip, *mhip; 642 struct mbuf *m0; 643 int len, hlen, off; 644 int mhlen, firstlen; 645 struct mbuf **mnext; 646 int fragments = 0; 647 int error = 0; 648 649 ip = mtod(m, struct ip *); 650 hlen = ip->ip_hl << 2; 651 652 len = (mtu - hlen) &~ 7; 653 if (len < 8) { 654 m_freem(m); 655 return (EMSGSIZE); 656 } 657 658 /* 659 * If we are doing fragmentation, we can't defer TCP/UDP 660 * checksumming; compute the checksum and clear the flag. 661 */ 662 in_proto_cksum_out(m, NULL); 663 firstlen = len; 664 mnext = &m->m_nextpkt; 665 666 /* 667 * Loop through length of segment after first fragment, 668 * make new header and copy data of each part and link onto chain. 669 */ 670 m0 = m; 671 mhlen = sizeof (struct ip); 672 for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { 673 MGETHDR(m, M_DONTWAIT, MT_HEADER); 674 if (m == NULL) { 675 ipstat.ips_odropped++; 676 error = ENOBUFS; 677 goto sendorfree; 678 } 679 *mnext = m; 680 mnext = &m->m_nextpkt; 681 m->m_data += max_linkhdr; 682 mhip = mtod(m, struct ip *); 683 *mhip = *ip; 684 /* we must inherit MCAST/BCAST flags, routing table and prio */ 685 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST); 686 m->m_pkthdr.ph_rtableid = m0->m_pkthdr.ph_rtableid; 687 m->m_pkthdr.pf.prio = m0->m_pkthdr.pf.prio; 688 if (hlen > sizeof (struct ip)) { 689 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 690 mhip->ip_hl = mhlen >> 2; 691 } 692 m->m_len = mhlen; 693 mhip->ip_off = ((off - hlen) >> 3) + 694 (ntohs(ip->ip_off) & ~IP_MF); 695 if (ip->ip_off & htons(IP_MF)) 696 mhip->ip_off |= IP_MF; 697 if (off + len >= ntohs(ip->ip_len)) 698 len = ntohs(ip->ip_len) - off; 699 else 700 mhip->ip_off |= IP_MF; 701 mhip->ip_len = htons((u_int16_t)(len + mhlen)); 702 m->m_next = m_copym(m0, off, len, M_NOWAIT); 703 if (m->m_next == 0) { 704 ipstat.ips_odropped++; 705 error = ENOBUFS; 706 goto sendorfree; 707 } 708 m->m_pkthdr.len = mhlen + len; 709 m->m_pkthdr.ph_ifidx = 0; 710 mhip->ip_off = htons((u_int16_t)mhip->ip_off); 711 mhip->ip_sum = 0; 712 if ((ifp != NULL) && 713 (ifp->if_capabilities & IFCAP_CSUM_IPv4) && 714 (ifp->if_bridgeport == NULL)) 715 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 716 else { 717 ipstat.ips_outswcsum++; 718 mhip->ip_sum = in_cksum(m, mhlen); 719 } 720 ipstat.ips_ofragments++; 721 fragments++; 722 } 723 /* 724 * Update first fragment by trimming what's been copied out 725 * and updating header, then send each fragment (in order). 726 */ 727 m = m0; 728 m_adj(m, hlen + firstlen - ntohs(ip->ip_len)); 729 m->m_pkthdr.len = hlen + firstlen; 730 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len); 731 ip->ip_off |= htons(IP_MF); 732 ip->ip_sum = 0; 733 if ((ifp != NULL) && 734 (ifp->if_capabilities & IFCAP_CSUM_IPv4) && 735 (ifp->if_bridgeport == NULL)) 736 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 737 else { 738 ipstat.ips_outswcsum++; 739 ip->ip_sum = in_cksum(m, hlen); 740 } 741 sendorfree: 742 if (error) { 743 for (m = m0; m; m = m0) { 744 m0 = m->m_nextpkt; 745 m->m_nextpkt = NULL; 746 m_freem(m); 747 } 748 } 749 750 return (error); 751 } 752 753 /* 754 * Insert IP options into preformed packet. 755 * Adjust IP destination as required for IP source routing, 756 * as indicated by a non-zero in_addr at the start of the options. 757 */ 758 struct mbuf * 759 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) 760 { 761 struct ipoption *p = mtod(opt, struct ipoption *); 762 struct mbuf *n; 763 struct ip *ip = mtod(m, struct ip *); 764 unsigned int optlen; 765 766 optlen = opt->m_len - sizeof(p->ipopt_dst); 767 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) 768 return (m); /* XXX should fail */ 769 if (p->ipopt_dst.s_addr) 770 ip->ip_dst = p->ipopt_dst; 771 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 772 MGETHDR(n, M_DONTWAIT, MT_HEADER); 773 if (n == NULL) 774 return (m); 775 M_MOVE_HDR(n, m); 776 n->m_pkthdr.len += optlen; 777 m->m_len -= sizeof(struct ip); 778 m->m_data += sizeof(struct ip); 779 n->m_next = m; 780 m = n; 781 m->m_len = optlen + sizeof(struct ip); 782 m->m_data += max_linkhdr; 783 memcpy(mtod(m, caddr_t), ip, sizeof(struct ip)); 784 } else { 785 m->m_data -= optlen; 786 m->m_len += optlen; 787 m->m_pkthdr.len += optlen; 788 memmove(mtod(m, caddr_t), (caddr_t)ip, sizeof(struct ip)); 789 } 790 ip = mtod(m, struct ip *); 791 memcpy(ip + 1, p->ipopt_list, optlen); 792 *phlen = sizeof(struct ip) + optlen; 793 ip->ip_len = htons(ntohs(ip->ip_len) + optlen); 794 return (m); 795 } 796 797 /* 798 * Copy options from ip to jp, 799 * omitting those not copied during fragmentation. 800 */ 801 int 802 ip_optcopy(struct ip *ip, struct ip *jp) 803 { 804 u_char *cp, *dp; 805 int opt, optlen, cnt; 806 807 cp = (u_char *)(ip + 1); 808 dp = (u_char *)(jp + 1); 809 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 810 for (; cnt > 0; cnt -= optlen, cp += optlen) { 811 opt = cp[0]; 812 if (opt == IPOPT_EOL) 813 break; 814 if (opt == IPOPT_NOP) { 815 /* Preserve for IP mcast tunnel's LSRR alignment. */ 816 *dp++ = IPOPT_NOP; 817 optlen = 1; 818 continue; 819 } 820 #ifdef DIAGNOSTIC 821 if (cnt < IPOPT_OLEN + sizeof(*cp)) 822 panic("malformed IPv4 option passed to ip_optcopy"); 823 #endif 824 optlen = cp[IPOPT_OLEN]; 825 #ifdef DIAGNOSTIC 826 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 827 panic("malformed IPv4 option passed to ip_optcopy"); 828 #endif 829 /* bogus lengths should have been caught by ip_dooptions */ 830 if (optlen > cnt) 831 optlen = cnt; 832 if (IPOPT_COPIED(opt)) { 833 memcpy(dp, cp, optlen); 834 dp += optlen; 835 } 836 } 837 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 838 *dp++ = IPOPT_EOL; 839 return (optlen); 840 } 841 842 /* 843 * IP socket option processing. 844 */ 845 int 846 ip_ctloutput(int op, struct socket *so, int level, int optname, 847 struct mbuf **mp) 848 { 849 struct inpcb *inp = sotoinpcb(so); 850 struct mbuf *m = *mp; 851 int optval = 0; 852 struct proc *p = curproc; /* XXX */ 853 int error = 0; 854 u_int rtid = 0; 855 856 if (level != IPPROTO_IP) { 857 error = EINVAL; 858 if (op == PRCO_SETOPT) 859 (void) m_free(*mp); 860 } else switch (op) { 861 case PRCO_SETOPT: 862 switch (optname) { 863 case IP_OPTIONS: 864 return (ip_pcbopts(&inp->inp_options, m)); 865 866 case IP_TOS: 867 case IP_TTL: 868 case IP_MINTTL: 869 case IP_RECVOPTS: 870 case IP_RECVRETOPTS: 871 case IP_RECVDSTADDR: 872 case IP_RECVIF: 873 case IP_RECVTTL: 874 case IP_RECVDSTPORT: 875 case IP_RECVRTABLE: 876 case IP_IPSECFLOWINFO: 877 if (m == NULL || m->m_len != sizeof(int)) 878 error = EINVAL; 879 else { 880 optval = *mtod(m, int *); 881 switch (optname) { 882 883 case IP_TOS: 884 inp->inp_ip.ip_tos = optval; 885 break; 886 887 case IP_TTL: 888 if (optval > 0 && optval <= MAXTTL) 889 inp->inp_ip.ip_ttl = optval; 890 else if (optval == -1) 891 inp->inp_ip.ip_ttl = ip_defttl; 892 else 893 error = EINVAL; 894 break; 895 896 case IP_MINTTL: 897 if (optval >= 0 && optval <= MAXTTL) 898 inp->inp_ip_minttl = optval; 899 else 900 error = EINVAL; 901 break; 902 #define OPTSET(bit) \ 903 if (optval) \ 904 inp->inp_flags |= bit; \ 905 else \ 906 inp->inp_flags &= ~bit; 907 908 case IP_RECVOPTS: 909 OPTSET(INP_RECVOPTS); 910 break; 911 912 case IP_RECVRETOPTS: 913 OPTSET(INP_RECVRETOPTS); 914 break; 915 916 case IP_RECVDSTADDR: 917 OPTSET(INP_RECVDSTADDR); 918 break; 919 case IP_RECVIF: 920 OPTSET(INP_RECVIF); 921 break; 922 case IP_RECVTTL: 923 OPTSET(INP_RECVTTL); 924 break; 925 case IP_RECVDSTPORT: 926 OPTSET(INP_RECVDSTPORT); 927 break; 928 case IP_RECVRTABLE: 929 OPTSET(INP_RECVRTABLE); 930 break; 931 case IP_IPSECFLOWINFO: 932 OPTSET(INP_IPSECFLOWINFO); 933 break; 934 } 935 } 936 break; 937 #undef OPTSET 938 939 case IP_MULTICAST_IF: 940 case IP_MULTICAST_TTL: 941 case IP_MULTICAST_LOOP: 942 case IP_ADD_MEMBERSHIP: 943 case IP_DROP_MEMBERSHIP: 944 error = ip_setmoptions(optname, &inp->inp_moptions, m, 945 inp->inp_rtableid); 946 break; 947 948 case IP_PORTRANGE: 949 if (m == NULL || m->m_len != sizeof(int)) 950 error = EINVAL; 951 else { 952 optval = *mtod(m, int *); 953 954 switch (optval) { 955 956 case IP_PORTRANGE_DEFAULT: 957 inp->inp_flags &= ~(INP_LOWPORT); 958 inp->inp_flags &= ~(INP_HIGHPORT); 959 break; 960 961 case IP_PORTRANGE_HIGH: 962 inp->inp_flags &= ~(INP_LOWPORT); 963 inp->inp_flags |= INP_HIGHPORT; 964 break; 965 966 case IP_PORTRANGE_LOW: 967 inp->inp_flags &= ~(INP_HIGHPORT); 968 inp->inp_flags |= INP_LOWPORT; 969 break; 970 971 default: 972 973 error = EINVAL; 974 break; 975 } 976 } 977 break; 978 case IP_AUTH_LEVEL: 979 case IP_ESP_TRANS_LEVEL: 980 case IP_ESP_NETWORK_LEVEL: 981 case IP_IPCOMP_LEVEL: 982 #ifndef IPSEC 983 error = EOPNOTSUPP; 984 #else 985 if (m == NULL || m->m_len != sizeof(int)) { 986 error = EINVAL; 987 break; 988 } 989 optval = *mtod(m, int *); 990 991 if (optval < IPSEC_LEVEL_BYPASS || 992 optval > IPSEC_LEVEL_UNIQUE) { 993 error = EINVAL; 994 break; 995 } 996 997 switch (optname) { 998 case IP_AUTH_LEVEL: 999 if (optval < IPSEC_AUTH_LEVEL_DEFAULT && 1000 suser(p, 0)) { 1001 error = EACCES; 1002 break; 1003 } 1004 inp->inp_seclevel[SL_AUTH] = optval; 1005 break; 1006 1007 case IP_ESP_TRANS_LEVEL: 1008 if (optval < IPSEC_ESP_TRANS_LEVEL_DEFAULT && 1009 suser(p, 0)) { 1010 error = EACCES; 1011 break; 1012 } 1013 inp->inp_seclevel[SL_ESP_TRANS] = optval; 1014 break; 1015 1016 case IP_ESP_NETWORK_LEVEL: 1017 if (optval < IPSEC_ESP_NETWORK_LEVEL_DEFAULT && 1018 suser(p, 0)) { 1019 error = EACCES; 1020 break; 1021 } 1022 inp->inp_seclevel[SL_ESP_NETWORK] = optval; 1023 break; 1024 case IP_IPCOMP_LEVEL: 1025 if (optval < IPSEC_IPCOMP_LEVEL_DEFAULT && 1026 suser(p, 0)) { 1027 error = EACCES; 1028 break; 1029 } 1030 inp->inp_seclevel[SL_IPCOMP] = optval; 1031 break; 1032 } 1033 #endif 1034 break; 1035 1036 case IP_IPSEC_LOCAL_ID: 1037 case IP_IPSEC_REMOTE_ID: 1038 error = EOPNOTSUPP; 1039 break; 1040 case SO_RTABLE: 1041 if (m == NULL || m->m_len < sizeof(u_int)) { 1042 error = EINVAL; 1043 break; 1044 } 1045 rtid = *mtod(m, u_int *); 1046 if (inp->inp_rtableid == rtid) 1047 break; 1048 /* needs privileges to switch when already set */ 1049 if (p->p_p->ps_rtableid != rtid && 1050 p->p_p->ps_rtableid != 0 && 1051 (error = suser(p, 0)) != 0) 1052 break; 1053 /* table must exist */ 1054 if (!rtable_exists(rtid)) { 1055 error = EINVAL; 1056 break; 1057 } 1058 if (inp->inp_lport) { 1059 error = EBUSY; 1060 break; 1061 } 1062 inp->inp_rtableid = rtid; 1063 in_pcbrehash(inp); 1064 break; 1065 case IP_PIPEX: 1066 if (m != NULL && m->m_len == sizeof(int)) 1067 inp->inp_pipex = *mtod(m, int *); 1068 else 1069 error = EINVAL; 1070 break; 1071 1072 default: 1073 error = ENOPROTOOPT; 1074 break; 1075 } 1076 if (m) 1077 (void)m_free(m); 1078 break; 1079 1080 case PRCO_GETOPT: 1081 switch (optname) { 1082 case IP_OPTIONS: 1083 case IP_RETOPTS: 1084 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1085 if (inp->inp_options) { 1086 m->m_len = inp->inp_options->m_len; 1087 memcpy(mtod(m, caddr_t), 1088 mtod(inp->inp_options, caddr_t), m->m_len); 1089 } else 1090 m->m_len = 0; 1091 break; 1092 1093 case IP_TOS: 1094 case IP_TTL: 1095 case IP_MINTTL: 1096 case IP_RECVOPTS: 1097 case IP_RECVRETOPTS: 1098 case IP_RECVDSTADDR: 1099 case IP_RECVIF: 1100 case IP_RECVTTL: 1101 case IP_RECVDSTPORT: 1102 case IP_RECVRTABLE: 1103 case IP_IPSECFLOWINFO: 1104 case IP_IPDEFTTL: 1105 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1106 m->m_len = sizeof(int); 1107 switch (optname) { 1108 1109 case IP_TOS: 1110 optval = inp->inp_ip.ip_tos; 1111 break; 1112 1113 case IP_TTL: 1114 optval = inp->inp_ip.ip_ttl; 1115 break; 1116 1117 case IP_MINTTL: 1118 optval = inp->inp_ip_minttl; 1119 break; 1120 1121 case IP_IPDEFTTL: 1122 optval = ip_defttl; 1123 break; 1124 1125 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1126 1127 case IP_RECVOPTS: 1128 optval = OPTBIT(INP_RECVOPTS); 1129 break; 1130 1131 case IP_RECVRETOPTS: 1132 optval = OPTBIT(INP_RECVRETOPTS); 1133 break; 1134 1135 case IP_RECVDSTADDR: 1136 optval = OPTBIT(INP_RECVDSTADDR); 1137 break; 1138 case IP_RECVIF: 1139 optval = OPTBIT(INP_RECVIF); 1140 break; 1141 case IP_RECVTTL: 1142 optval = OPTBIT(INP_RECVTTL); 1143 break; 1144 case IP_RECVDSTPORT: 1145 optval = OPTBIT(INP_RECVDSTPORT); 1146 break; 1147 case IP_RECVRTABLE: 1148 optval = OPTBIT(INP_RECVRTABLE); 1149 break; 1150 case IP_IPSECFLOWINFO: 1151 optval = OPTBIT(INP_IPSECFLOWINFO); 1152 break; 1153 } 1154 *mtod(m, int *) = optval; 1155 break; 1156 1157 case IP_MULTICAST_IF: 1158 case IP_MULTICAST_TTL: 1159 case IP_MULTICAST_LOOP: 1160 case IP_ADD_MEMBERSHIP: 1161 case IP_DROP_MEMBERSHIP: 1162 error = ip_getmoptions(optname, inp->inp_moptions, mp); 1163 break; 1164 1165 case IP_PORTRANGE: 1166 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1167 m->m_len = sizeof(int); 1168 1169 if (inp->inp_flags & INP_HIGHPORT) 1170 optval = IP_PORTRANGE_HIGH; 1171 else if (inp->inp_flags & INP_LOWPORT) 1172 optval = IP_PORTRANGE_LOW; 1173 else 1174 optval = 0; 1175 1176 *mtod(m, int *) = optval; 1177 break; 1178 1179 case IP_AUTH_LEVEL: 1180 case IP_ESP_TRANS_LEVEL: 1181 case IP_ESP_NETWORK_LEVEL: 1182 case IP_IPCOMP_LEVEL: 1183 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1184 #ifndef IPSEC 1185 m->m_len = sizeof(int); 1186 *mtod(m, int *) = IPSEC_LEVEL_NONE; 1187 #else 1188 m->m_len = sizeof(int); 1189 switch (optname) { 1190 case IP_AUTH_LEVEL: 1191 optval = inp->inp_seclevel[SL_AUTH]; 1192 break; 1193 1194 case IP_ESP_TRANS_LEVEL: 1195 optval = inp->inp_seclevel[SL_ESP_TRANS]; 1196 break; 1197 1198 case IP_ESP_NETWORK_LEVEL: 1199 optval = inp->inp_seclevel[SL_ESP_NETWORK]; 1200 break; 1201 case IP_IPCOMP_LEVEL: 1202 optval = inp->inp_seclevel[SL_IPCOMP]; 1203 break; 1204 } 1205 *mtod(m, int *) = optval; 1206 #endif 1207 break; 1208 case IP_IPSEC_LOCAL_ID: 1209 case IP_IPSEC_REMOTE_ID: 1210 error = EOPNOTSUPP; 1211 break; 1212 case SO_RTABLE: 1213 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1214 m->m_len = sizeof(u_int); 1215 *mtod(m, u_int *) = inp->inp_rtableid; 1216 break; 1217 case IP_PIPEX: 1218 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1219 m->m_len = sizeof(int); 1220 *mtod(m, int *) = inp->inp_pipex; 1221 break; 1222 default: 1223 error = ENOPROTOOPT; 1224 break; 1225 } 1226 break; 1227 } 1228 return (error); 1229 } 1230 1231 /* 1232 * Set up IP options in pcb for insertion in output packets. 1233 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1234 * with destination address if source routed. 1235 */ 1236 int 1237 ip_pcbopts(struct mbuf **pcbopt, struct mbuf *m) 1238 { 1239 int cnt, optlen; 1240 u_char *cp; 1241 u_char opt; 1242 1243 /* turn off any old options */ 1244 if (*pcbopt) 1245 (void)m_free(*pcbopt); 1246 *pcbopt = 0; 1247 if (m == NULL || m->m_len == 0) { 1248 /* 1249 * Only turning off any previous options. 1250 */ 1251 if (m) 1252 (void)m_free(m); 1253 return (0); 1254 } 1255 1256 if (m->m_len % sizeof(int32_t)) 1257 goto bad; 1258 1259 /* 1260 * IP first-hop destination address will be stored before 1261 * actual options; move other options back 1262 * and clear it when none present. 1263 */ 1264 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1265 goto bad; 1266 cnt = m->m_len; 1267 m->m_len += sizeof(struct in_addr); 1268 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1269 memmove((caddr_t)cp, mtod(m, caddr_t), (unsigned)cnt); 1270 memset(mtod(m, caddr_t), 0, sizeof(struct in_addr)); 1271 1272 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1273 opt = cp[IPOPT_OPTVAL]; 1274 if (opt == IPOPT_EOL) 1275 break; 1276 if (opt == IPOPT_NOP) 1277 optlen = 1; 1278 else { 1279 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1280 goto bad; 1281 optlen = cp[IPOPT_OLEN]; 1282 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1283 goto bad; 1284 } 1285 switch (opt) { 1286 1287 default: 1288 break; 1289 1290 case IPOPT_LSRR: 1291 case IPOPT_SSRR: 1292 /* 1293 * user process specifies route as: 1294 * ->A->B->C->D 1295 * D must be our final destination (but we can't 1296 * check that since we may not have connected yet). 1297 * A is first hop destination, which doesn't appear in 1298 * actual IP option, but is stored before the options. 1299 */ 1300 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1301 goto bad; 1302 m->m_len -= sizeof(struct in_addr); 1303 cnt -= sizeof(struct in_addr); 1304 optlen -= sizeof(struct in_addr); 1305 cp[IPOPT_OLEN] = optlen; 1306 /* 1307 * Move first hop before start of options. 1308 */ 1309 memcpy(mtod(m, caddr_t), &cp[IPOPT_OFFSET+1], 1310 sizeof(struct in_addr)); 1311 /* 1312 * Then copy rest of options back 1313 * to close up the deleted entry. 1314 */ 1315 memmove((caddr_t)&cp[IPOPT_OFFSET+1], 1316 (caddr_t)(&cp[IPOPT_OFFSET+1] + 1317 sizeof(struct in_addr)), 1318 (unsigned)cnt - (IPOPT_OFFSET+1)); 1319 break; 1320 } 1321 } 1322 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1323 goto bad; 1324 *pcbopt = m; 1325 return (0); 1326 1327 bad: 1328 (void)m_free(m); 1329 return (EINVAL); 1330 } 1331 1332 /* 1333 * Set the IP multicast options in response to user setsockopt(). 1334 */ 1335 int 1336 ip_setmoptions(int optname, struct ip_moptions **imop, struct mbuf *m, 1337 u_int rtableid) 1338 { 1339 struct in_addr addr; 1340 struct in_ifaddr *ia; 1341 struct ip_mreq *mreq; 1342 struct ifnet *ifp = NULL; 1343 struct ip_moptions *imo = *imop; 1344 struct in_multi **immp; 1345 struct rtentry *rt; 1346 struct sockaddr_in sin; 1347 int i, error = 0; 1348 u_char loop; 1349 1350 if (imo == NULL) { 1351 /* 1352 * No multicast option buffer attached to the pcb; 1353 * allocate one and initialize to default values. 1354 */ 1355 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK|M_ZERO); 1356 immp = (struct in_multi **)malloc( 1357 (sizeof(*immp) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 1358 M_WAITOK|M_ZERO); 1359 *imop = imo; 1360 imo->imo_ifidx = 0; 1361 imo->imo_ttl = IP_DEFAULT_MULTICAST_TTL; 1362 imo->imo_loop = IP_DEFAULT_MULTICAST_LOOP; 1363 imo->imo_num_memberships = 0; 1364 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1365 imo->imo_membership = immp; 1366 } 1367 1368 switch (optname) { 1369 1370 case IP_MULTICAST_IF: 1371 /* 1372 * Select the interface for outgoing multicast packets. 1373 */ 1374 if (m == NULL || m->m_len != sizeof(struct in_addr)) { 1375 error = EINVAL; 1376 break; 1377 } 1378 addr = *(mtod(m, struct in_addr *)); 1379 /* 1380 * INADDR_ANY is used to remove a previous selection. 1381 * When no interface is selected, a default one is 1382 * chosen every time a multicast packet is sent. 1383 */ 1384 if (addr.s_addr == INADDR_ANY) { 1385 imo->imo_ifidx = 0; 1386 break; 1387 } 1388 /* 1389 * The selected interface is identified by its local 1390 * IP address. Find the interface and confirm that 1391 * it supports multicasting. 1392 */ 1393 memset(&sin, 0, sizeof(sin)); 1394 sin.sin_len = sizeof(sin); 1395 sin.sin_family = AF_INET; 1396 sin.sin_addr = addr; 1397 ia = ifatoia(ifa_ifwithaddr(sintosa(&sin), rtableid)); 1398 if (ia == NULL || 1399 (ia->ia_ifp->if_flags & IFF_MULTICAST) == 0) { 1400 error = EADDRNOTAVAIL; 1401 break; 1402 } 1403 imo->imo_ifidx = ia->ia_ifp->if_index; 1404 break; 1405 1406 case IP_MULTICAST_TTL: 1407 /* 1408 * Set the IP time-to-live for outgoing multicast packets. 1409 */ 1410 if (m == NULL || m->m_len != 1) { 1411 error = EINVAL; 1412 break; 1413 } 1414 imo->imo_ttl = *(mtod(m, u_char *)); 1415 break; 1416 1417 case IP_MULTICAST_LOOP: 1418 /* 1419 * Set the loopback flag for outgoing multicast packets. 1420 * Must be zero or one. 1421 */ 1422 if (m == NULL || m->m_len != 1 || 1423 (loop = *(mtod(m, u_char *))) > 1) { 1424 error = EINVAL; 1425 break; 1426 } 1427 imo->imo_loop = loop; 1428 break; 1429 1430 case IP_ADD_MEMBERSHIP: 1431 /* 1432 * Add a multicast group membership. 1433 * Group must be a valid IP multicast address. 1434 */ 1435 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1436 error = EINVAL; 1437 break; 1438 } 1439 mreq = mtod(m, struct ip_mreq *); 1440 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1441 error = EINVAL; 1442 break; 1443 } 1444 /* 1445 * If no interface address was provided, use the interface of 1446 * the route to the given multicast address. 1447 */ 1448 if (mreq->imr_interface.s_addr == INADDR_ANY) { 1449 memset(&sin, 0, sizeof(sin)); 1450 sin.sin_len = sizeof(sin); 1451 sin.sin_family = AF_INET; 1452 sin.sin_addr = mreq->imr_multiaddr; 1453 rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid); 1454 if (!rtisvalid(rt)) { 1455 rtfree(rt); 1456 error = EADDRNOTAVAIL; 1457 break; 1458 } 1459 } else { 1460 memset(&sin, 0, sizeof(sin)); 1461 sin.sin_len = sizeof(sin); 1462 sin.sin_family = AF_INET; 1463 sin.sin_addr = mreq->imr_interface; 1464 rt = rtalloc(sintosa(&sin), 0, rtableid); 1465 if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) { 1466 rtfree(rt); 1467 error = EADDRNOTAVAIL; 1468 break; 1469 } 1470 } 1471 ifp = if_get(rt->rt_ifidx); 1472 rtfree(rt); 1473 1474 /* 1475 * See if we found an interface, and confirm that it 1476 * supports multicast. 1477 */ 1478 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1479 error = EADDRNOTAVAIL; 1480 if_put(ifp); 1481 break; 1482 } 1483 /* 1484 * See if the membership already exists or if all the 1485 * membership slots are full. 1486 */ 1487 for (i = 0; i < imo->imo_num_memberships; ++i) { 1488 if (imo->imo_membership[i]->inm_ifidx 1489 == ifp->if_index && 1490 imo->imo_membership[i]->inm_addr.s_addr 1491 == mreq->imr_multiaddr.s_addr) 1492 break; 1493 } 1494 if (i < imo->imo_num_memberships) { 1495 error = EADDRINUSE; 1496 if_put(ifp); 1497 break; 1498 } 1499 if (imo->imo_num_memberships == imo->imo_max_memberships) { 1500 struct in_multi **nmships, **omships; 1501 size_t newmax; 1502 /* 1503 * Resize the vector to next power-of-two minus 1. If the 1504 * size would exceed the maximum then we know we've really 1505 * run out of entries. Otherwise, we reallocate the vector. 1506 */ 1507 nmships = NULL; 1508 omships = imo->imo_membership; 1509 newmax = ((imo->imo_max_memberships + 1) * 2) - 1; 1510 if (newmax <= IP_MAX_MEMBERSHIPS) { 1511 nmships = (struct in_multi **)malloc( 1512 sizeof(*nmships) * newmax, M_IPMOPTS, 1513 M_NOWAIT|M_ZERO); 1514 if (nmships != NULL) { 1515 memcpy(nmships, omships, 1516 sizeof(*omships) * 1517 imo->imo_max_memberships); 1518 free(omships, M_IPMOPTS, 1519 sizeof(*omships) * 1520 imo->imo_max_memberships); 1521 imo->imo_membership = nmships; 1522 imo->imo_max_memberships = newmax; 1523 } 1524 } 1525 if (nmships == NULL) { 1526 error = ENOBUFS; 1527 if_put(ifp); 1528 break; 1529 } 1530 } 1531 /* 1532 * Everything looks good; add a new record to the multicast 1533 * address list for the given interface. 1534 */ 1535 if ((imo->imo_membership[i] = 1536 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) { 1537 error = ENOBUFS; 1538 if_put(ifp); 1539 break; 1540 } 1541 ++imo->imo_num_memberships; 1542 if_put(ifp); 1543 break; 1544 1545 case IP_DROP_MEMBERSHIP: 1546 /* 1547 * Drop a multicast group membership. 1548 * Group must be a valid IP multicast address. 1549 */ 1550 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1551 error = EINVAL; 1552 break; 1553 } 1554 mreq = mtod(m, struct ip_mreq *); 1555 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1556 error = EINVAL; 1557 break; 1558 } 1559 /* 1560 * If an interface address was specified, get a pointer 1561 * to its ifnet structure. 1562 */ 1563 if (mreq->imr_interface.s_addr == INADDR_ANY) 1564 ifp = NULL; 1565 else { 1566 memset(&sin, 0, sizeof(sin)); 1567 sin.sin_len = sizeof(sin); 1568 sin.sin_family = AF_INET; 1569 sin.sin_addr = mreq->imr_interface; 1570 ia = ifatoia(ifa_ifwithaddr(sintosa(&sin), rtableid)); 1571 if (ia == NULL) { 1572 error = EADDRNOTAVAIL; 1573 break; 1574 } 1575 ifp = ia->ia_ifp; 1576 } 1577 /* 1578 * Find the membership in the membership array. 1579 */ 1580 for (i = 0; i < imo->imo_num_memberships; ++i) { 1581 if ((ifp == NULL || 1582 imo->imo_membership[i]->inm_ifidx == 1583 ifp->if_index) && 1584 imo->imo_membership[i]->inm_addr.s_addr == 1585 mreq->imr_multiaddr.s_addr) 1586 break; 1587 } 1588 if (i == imo->imo_num_memberships) { 1589 error = EADDRNOTAVAIL; 1590 break; 1591 } 1592 /* 1593 * Give up the multicast address record to which the 1594 * membership points. 1595 */ 1596 in_delmulti(imo->imo_membership[i]); 1597 /* 1598 * Remove the gap in the membership array. 1599 */ 1600 for (++i; i < imo->imo_num_memberships; ++i) 1601 imo->imo_membership[i-1] = imo->imo_membership[i]; 1602 --imo->imo_num_memberships; 1603 break; 1604 1605 default: 1606 error = EOPNOTSUPP; 1607 break; 1608 } 1609 1610 /* 1611 * If all options have default values, no need to keep the data. 1612 */ 1613 if (imo->imo_ifidx == 0 && 1614 imo->imo_ttl == IP_DEFAULT_MULTICAST_TTL && 1615 imo->imo_loop == IP_DEFAULT_MULTICAST_LOOP && 1616 imo->imo_num_memberships == 0) { 1617 free(imo->imo_membership , M_IPMOPTS, 0); 1618 free(*imop, M_IPMOPTS, sizeof(**imop)); 1619 *imop = NULL; 1620 } 1621 1622 return (error); 1623 } 1624 1625 /* 1626 * Return the IP multicast options in response to user getsockopt(). 1627 */ 1628 int 1629 ip_getmoptions(int optname, struct ip_moptions *imo, struct mbuf **mp) 1630 { 1631 u_char *ttl; 1632 u_char *loop; 1633 struct in_addr *addr; 1634 struct in_ifaddr *ia; 1635 struct ifnet *ifp; 1636 1637 *mp = m_get(M_WAIT, MT_SOOPTS); 1638 1639 switch (optname) { 1640 1641 case IP_MULTICAST_IF: 1642 addr = mtod(*mp, struct in_addr *); 1643 (*mp)->m_len = sizeof(struct in_addr); 1644 if (imo == NULL || (ifp = if_get(imo->imo_ifidx)) == NULL) 1645 addr->s_addr = INADDR_ANY; 1646 else { 1647 IFP_TO_IA(ifp, ia); 1648 if_put(ifp); 1649 addr->s_addr = (ia == NULL) ? INADDR_ANY 1650 : ia->ia_addr.sin_addr.s_addr; 1651 } 1652 return (0); 1653 1654 case IP_MULTICAST_TTL: 1655 ttl = mtod(*mp, u_char *); 1656 (*mp)->m_len = 1; 1657 *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL 1658 : imo->imo_ttl; 1659 return (0); 1660 1661 case IP_MULTICAST_LOOP: 1662 loop = mtod(*mp, u_char *); 1663 (*mp)->m_len = 1; 1664 *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP 1665 : imo->imo_loop; 1666 return (0); 1667 1668 default: 1669 return (EOPNOTSUPP); 1670 } 1671 } 1672 1673 /* 1674 * Discard the IP multicast options. 1675 */ 1676 void 1677 ip_freemoptions(struct ip_moptions *imo) 1678 { 1679 int i; 1680 1681 if (imo != NULL) { 1682 for (i = 0; i < imo->imo_num_memberships; ++i) 1683 in_delmulti(imo->imo_membership[i]); 1684 free(imo->imo_membership, M_IPMOPTS, 0); 1685 free(imo, M_IPMOPTS, sizeof(*imo)); 1686 } 1687 } 1688 1689 /* 1690 * Routine called from ip_output() to loop back a copy of an IP multicast 1691 * packet to the input queue of a specified interface. 1692 */ 1693 void 1694 ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst) 1695 { 1696 struct ip *ip; 1697 struct mbuf *copym; 1698 1699 copym = m_copym2(m, 0, M_COPYALL, M_DONTWAIT); 1700 if (copym != NULL) { 1701 /* 1702 * We don't bother to fragment if the IP length is greater 1703 * than the interface's MTU. Can this possibly matter? 1704 */ 1705 ip = mtod(copym, struct ip *); 1706 ip->ip_sum = 0; 1707 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2); 1708 if_input_local(ifp, copym, dst->sin_family); 1709 } 1710 } 1711 1712 /* 1713 * Compute significant parts of the IPv4 checksum pseudo-header 1714 * for use in a delayed TCP/UDP checksum calculation. 1715 */ 1716 static __inline u_int16_t __attribute__((__unused__)) 1717 in_cksum_phdr(u_int32_t src, u_int32_t dst, u_int32_t lenproto) 1718 { 1719 u_int32_t sum; 1720 1721 sum = lenproto + 1722 (u_int16_t)(src >> 16) + 1723 (u_int16_t)(src /*& 0xffff*/) + 1724 (u_int16_t)(dst >> 16) + 1725 (u_int16_t)(dst /*& 0xffff*/); 1726 1727 sum = (u_int16_t)(sum >> 16) + (u_int16_t)(sum /*& 0xffff*/); 1728 1729 if (sum > 0xffff) 1730 sum -= 0xffff; 1731 1732 return (sum); 1733 } 1734 1735 /* 1736 * Process a delayed payload checksum calculation. 1737 */ 1738 void 1739 in_delayed_cksum(struct mbuf *m) 1740 { 1741 struct ip *ip; 1742 u_int16_t csum, offset; 1743 1744 ip = mtod(m, struct ip *); 1745 offset = ip->ip_hl << 2; 1746 csum = in4_cksum(m, 0, offset, m->m_pkthdr.len - offset); 1747 if (csum == 0 && ip->ip_p == IPPROTO_UDP) 1748 csum = 0xffff; 1749 1750 switch (ip->ip_p) { 1751 case IPPROTO_TCP: 1752 offset += offsetof(struct tcphdr, th_sum); 1753 break; 1754 1755 case IPPROTO_UDP: 1756 offset += offsetof(struct udphdr, uh_sum); 1757 break; 1758 1759 case IPPROTO_ICMP: 1760 offset += offsetof(struct icmp, icmp_cksum); 1761 break; 1762 1763 default: 1764 return; 1765 } 1766 1767 if ((offset + sizeof(u_int16_t)) > m->m_len) 1768 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT); 1769 else 1770 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 1771 } 1772 1773 void 1774 in_proto_cksum_out(struct mbuf *m, struct ifnet *ifp) 1775 { 1776 struct ip *ip = mtod(m, struct ip *); 1777 1778 /* some hw and in_delayed_cksum need the pseudo header cksum */ 1779 if (m->m_pkthdr.csum_flags & 1780 (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_ICMP_CSUM_OUT)) { 1781 u_int16_t csum = 0, offset; 1782 1783 offset = ip->ip_hl << 2; 1784 if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) 1785 csum = in_cksum_phdr(ip->ip_src.s_addr, 1786 ip->ip_dst.s_addr, htonl(ntohs(ip->ip_len) - 1787 offset + ip->ip_p)); 1788 if (ip->ip_p == IPPROTO_TCP) 1789 offset += offsetof(struct tcphdr, th_sum); 1790 else if (ip->ip_p == IPPROTO_UDP) 1791 offset += offsetof(struct udphdr, uh_sum); 1792 else if (ip->ip_p == IPPROTO_ICMP) 1793 offset += offsetof(struct icmp, icmp_cksum); 1794 if ((offset + sizeof(u_int16_t)) > m->m_len) 1795 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT); 1796 else 1797 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 1798 } 1799 1800 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) { 1801 if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || 1802 ip->ip_hl != 5 || ifp->if_bridgeport != NULL) { 1803 tcpstat.tcps_outswcsum++; 1804 in_delayed_cksum(m); 1805 m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */ 1806 } 1807 } else if (m->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) { 1808 if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_UDPv4) || 1809 ip->ip_hl != 5 || ifp->if_bridgeport != NULL) { 1810 udpstat.udps_outswcsum++; 1811 in_delayed_cksum(m); 1812 m->m_pkthdr.csum_flags &= ~M_UDP_CSUM_OUT; /* Clear */ 1813 } 1814 } else if (m->m_pkthdr.csum_flags & M_ICMP_CSUM_OUT) { 1815 in_delayed_cksum(m); 1816 m->m_pkthdr.csum_flags &= ~M_ICMP_CSUM_OUT; /* Clear */ 1817 } 1818 } 1819