1 /* $OpenBSD: ip_output.c,v 1.344 2017/11/22 12:28:49 bluhm Exp $ */ 2 /* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 33 */ 34 35 #include "pf.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/mbuf.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/proc.h> 44 #include <sys/kernel.h> 45 46 #include <net/if.h> 47 #include <net/if_var.h> 48 #include <net/if_enc.h> 49 #include <net/route.h> 50 51 #include <netinet/in.h> 52 #include <netinet/ip.h> 53 #include <netinet/in_pcb.h> 54 #include <netinet/in_var.h> 55 #include <netinet/ip_var.h> 56 #include <netinet/ip_icmp.h> 57 #include <netinet/tcp.h> 58 #include <netinet/udp.h> 59 #include <netinet/tcp_timer.h> 60 #include <netinet/tcp_var.h> 61 #include <netinet/udp_var.h> 62 63 #if NPF > 0 64 #include <net/pfvar.h> 65 #endif 66 67 #ifdef IPSEC 68 #ifdef ENCDEBUG 69 #define DPRINTF(x) do { if (encdebug) printf x ; } while (0) 70 #else 71 #define DPRINTF(x) 72 #endif 73 #endif /* IPSEC */ 74 75 int ip_pcbopts(struct mbuf **, struct mbuf *); 76 int ip_setmoptions(int, struct ip_moptions **, struct mbuf *, u_int); 77 void ip_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in *); 78 static __inline u_int16_t __attribute__((__unused__)) 79 in_cksum_phdr(u_int32_t, u_int32_t, u_int32_t); 80 void in_delayed_cksum(struct mbuf *); 81 82 #ifdef IPSEC 83 struct tdb * 84 ip_output_ipsec_lookup(struct mbuf *m, int hlen, int *error, struct inpcb *inp, 85 int ipsecflowinfo); 86 int 87 ip_output_ipsec_send(struct tdb *, struct mbuf *, struct route *, int); 88 #endif /* IPSEC */ 89 90 /* 91 * IP output. The packet in mbuf chain m contains a skeletal IP 92 * header (with len, off, ttl, proto, tos, src, dst). 93 * The mbuf chain containing the packet will be freed. 94 * The mbuf opt, if present, will not be freed. 95 */ 96 int 97 ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags, 98 struct ip_moptions *imo, struct inpcb *inp, u_int32_t ipsecflowinfo) 99 { 100 struct ip *ip; 101 struct ifnet *ifp = NULL; 102 struct mbuf *m = m0; 103 int hlen = sizeof (struct ip); 104 int len, error = 0; 105 struct route iproute; 106 struct sockaddr_in *dst; 107 struct tdb *tdb = NULL; 108 u_long mtu; 109 #if defined(MROUTING) 110 int rv; 111 #endif 112 113 NET_ASSERT_LOCKED(); 114 115 #ifdef IPSEC 116 if (inp && (inp->inp_flags & INP_IPV6) != 0) 117 panic("ip_output: IPv6 pcb is passed"); 118 #endif /* IPSEC */ 119 120 #ifdef DIAGNOSTIC 121 if ((m->m_flags & M_PKTHDR) == 0) 122 panic("ip_output no HDR"); 123 #endif 124 if (opt) { 125 m = ip_insertoptions(m, opt, &len); 126 hlen = len; 127 } 128 129 ip = mtod(m, struct ip *); 130 131 /* 132 * Fill in IP header. 133 */ 134 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 135 ip->ip_v = IPVERSION; 136 ip->ip_off &= htons(IP_DF); 137 ip->ip_id = htons(ip_randomid()); 138 ip->ip_hl = hlen >> 2; 139 ipstat_inc(ips_localout); 140 } else { 141 hlen = ip->ip_hl << 2; 142 } 143 144 /* 145 * We should not send traffic to 0/8 say both Stevens and RFCs 146 * 5735 section 3 and 1122 sections 3.2.1.3 and 3.3.6. 147 */ 148 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == 0) { 149 error = ENETUNREACH; 150 goto bad; 151 } 152 153 #if NPF > 0 154 reroute: 155 #endif 156 157 /* 158 * Do a route lookup now in case we need the source address to 159 * do an SPD lookup in IPsec; for most packets, the source address 160 * is set at a higher level protocol. ICMPs and other packets 161 * though (e.g., traceroute) have a source address of zeroes. 162 */ 163 if (ro == NULL) { 164 ro = &iproute; 165 memset(ro, 0, sizeof(*ro)); 166 } 167 168 dst = satosin(&ro->ro_dst); 169 170 /* 171 * If there is a cached route, check that it is to the same 172 * destination and is still up. If not, free it and try again. 173 */ 174 if (!rtisvalid(ro->ro_rt) || 175 dst->sin_addr.s_addr != ip->ip_dst.s_addr || 176 ro->ro_tableid != m->m_pkthdr.ph_rtableid) { 177 rtfree(ro->ro_rt); 178 ro->ro_rt = NULL; 179 } 180 181 if (ro->ro_rt == NULL) { 182 dst->sin_family = AF_INET; 183 dst->sin_len = sizeof(*dst); 184 dst->sin_addr = ip->ip_dst; 185 ro->ro_tableid = m->m_pkthdr.ph_rtableid; 186 } 187 188 if ((IN_MULTICAST(ip->ip_dst.s_addr) || 189 (ip->ip_dst.s_addr == INADDR_BROADCAST)) && 190 imo != NULL && (ifp = if_get(imo->imo_ifidx)) != NULL) { 191 192 mtu = ifp->if_mtu; 193 if (ip->ip_src.s_addr == INADDR_ANY) { 194 struct in_ifaddr *ia; 195 196 IFP_TO_IA(ifp, ia); 197 if (ia != NULL) 198 ip->ip_src = ia->ia_addr.sin_addr; 199 } 200 } else { 201 struct in_ifaddr *ia; 202 203 if (ro->ro_rt == NULL) 204 ro->ro_rt = rtalloc_mpath(&ro->ro_dst, 205 &ip->ip_src.s_addr, ro->ro_tableid); 206 207 if (ro->ro_rt == NULL) { 208 ipstat_inc(ips_noroute); 209 error = EHOSTUNREACH; 210 goto bad; 211 } 212 213 ia = ifatoia(ro->ro_rt->rt_ifa); 214 if (ISSET(ro->ro_rt->rt_flags, RTF_LOCAL)) 215 ifp = if_get(rtable_loindex(m->m_pkthdr.ph_rtableid)); 216 else 217 ifp = if_get(ro->ro_rt->rt_ifidx); 218 if (ifp == NULL) { 219 error = EHOSTUNREACH; 220 goto bad; 221 } 222 if ((mtu = ro->ro_rt->rt_mtu) == 0) 223 mtu = ifp->if_mtu; 224 225 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 226 dst = satosin(ro->ro_rt->rt_gateway); 227 228 /* Set the source IP address */ 229 if (ip->ip_src.s_addr == INADDR_ANY && ia) 230 ip->ip_src = ia->ia_addr.sin_addr; 231 } 232 233 #ifdef IPSEC 234 if (ipsec_in_use || inp != NULL) { 235 /* Do we have any pending SAs to apply ? */ 236 tdb = ip_output_ipsec_lookup(m, hlen, &error, inp, 237 ipsecflowinfo); 238 if (error != 0) { 239 /* Should silently drop packet */ 240 if (error == -EINVAL) 241 error = 0; 242 m_freem(m); 243 goto done; 244 } 245 if (tdb != NULL) { 246 /* 247 * If it needs TCP/UDP hardware-checksumming, do the 248 * computation now. 249 */ 250 in_proto_cksum_out(m, NULL); 251 } 252 } 253 #endif /* IPSEC */ 254 255 if (IN_MULTICAST(ip->ip_dst.s_addr) || 256 (ip->ip_dst.s_addr == INADDR_BROADCAST)) { 257 258 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ? 259 M_BCAST : M_MCAST; 260 261 /* 262 * IP destination address is multicast. Make sure "dst" 263 * still points to the address in "ro". (It may have been 264 * changed to point to a gateway address, above.) 265 */ 266 dst = satosin(&ro->ro_dst); 267 268 /* 269 * See if the caller provided any multicast options 270 */ 271 if (imo != NULL) 272 ip->ip_ttl = imo->imo_ttl; 273 else 274 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 275 276 /* 277 * if we don't know the outgoing ifp yet, we can't generate 278 * output 279 */ 280 if (!ifp) { 281 ipstat_inc(ips_noroute); 282 error = EHOSTUNREACH; 283 goto bad; 284 } 285 286 /* 287 * Confirm that the outgoing interface supports multicast, 288 * but only if the packet actually is going out on that 289 * interface (i.e., no IPsec is applied). 290 */ 291 if ((((m->m_flags & M_MCAST) && 292 (ifp->if_flags & IFF_MULTICAST) == 0) || 293 ((m->m_flags & M_BCAST) && 294 (ifp->if_flags & IFF_BROADCAST) == 0)) && (tdb == NULL)) { 295 ipstat_inc(ips_noroute); 296 error = ENETUNREACH; 297 goto bad; 298 } 299 300 /* 301 * If source address not specified yet, use address 302 * of outgoing interface. 303 */ 304 if (ip->ip_src.s_addr == INADDR_ANY) { 305 struct in_ifaddr *ia; 306 307 IFP_TO_IA(ifp, ia); 308 if (ia != NULL) 309 ip->ip_src = ia->ia_addr.sin_addr; 310 } 311 312 if ((imo == NULL || imo->imo_loop) && 313 in_hasmulti(&ip->ip_dst, ifp)) { 314 /* 315 * If we belong to the destination multicast group 316 * on the outgoing interface, and the caller did not 317 * forbid loopback, loop back a copy. 318 * Can't defer TCP/UDP checksumming, do the 319 * computation now. 320 */ 321 in_proto_cksum_out(m, NULL); 322 ip_mloopback(ifp, m, dst); 323 } 324 #ifdef MROUTING 325 else { 326 /* 327 * If we are acting as a multicast router, perform 328 * multicast forwarding as if the packet had just 329 * arrived on the interface to which we are about 330 * to send. The multicast forwarding function 331 * recursively calls this function, using the 332 * IP_FORWARDING flag to prevent infinite recursion. 333 * 334 * Multicasts that are looped back by ip_mloopback(), 335 * above, will be forwarded by the ip_input() routine, 336 * if necessary. 337 */ 338 if (ipmforwarding && ip_mrouter[ifp->if_rdomain] && 339 (flags & IP_FORWARDING) == 0) { 340 KERNEL_LOCK(); 341 rv = ip_mforward(m, ifp); 342 KERNEL_UNLOCK(); 343 if (rv != 0) { 344 m_freem(m); 345 goto done; 346 } 347 } 348 } 349 #endif 350 /* 351 * Multicasts with a time-to-live of zero may be looped- 352 * back, above, but must not be transmitted on a network. 353 * Also, multicasts addressed to the loopback interface 354 * are not sent -- the above call to ip_mloopback() will 355 * loop back a copy if this host actually belongs to the 356 * destination group on the loopback interface. 357 */ 358 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) { 359 m_freem(m); 360 goto done; 361 } 362 363 goto sendit; 364 } 365 366 /* 367 * Look for broadcast address and verify user is allowed to send 368 * such a packet; if the packet is going in an IPsec tunnel, skip 369 * this check. 370 */ 371 if ((tdb == NULL) && ((dst->sin_addr.s_addr == INADDR_BROADCAST) || 372 (ro && ro->ro_rt && ISSET(ro->ro_rt->rt_flags, RTF_BROADCAST)))) { 373 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 374 error = EADDRNOTAVAIL; 375 goto bad; 376 } 377 if ((flags & IP_ALLOWBROADCAST) == 0) { 378 error = EACCES; 379 goto bad; 380 } 381 382 /* Don't allow broadcast messages to be fragmented */ 383 if (ntohs(ip->ip_len) > ifp->if_mtu) { 384 error = EMSGSIZE; 385 goto bad; 386 } 387 m->m_flags |= M_BCAST; 388 } else 389 m->m_flags &= ~M_BCAST; 390 391 sendit: 392 /* 393 * If we're doing Path MTU discovery, we need to set DF unless 394 * the route's MTU is locked. 395 */ 396 if ((flags & IP_MTUDISC) && ro && ro->ro_rt && 397 (ro->ro_rt->rt_locks & RTV_MTU) == 0) 398 ip->ip_off |= htons(IP_DF); 399 400 #ifdef IPSEC 401 /* 402 * Check if the packet needs encapsulation. 403 */ 404 if (tdb != NULL) { 405 /* Callee frees mbuf */ 406 error = ip_output_ipsec_send(tdb, m, ro, 407 (flags & IP_FORWARDING) ? 1 : 0); 408 goto done; 409 } 410 #endif /* IPSEC */ 411 412 /* 413 * Packet filter 414 */ 415 #if NPF > 0 416 if (pf_test(AF_INET, (flags & IP_FORWARDING) ? PF_FWD : PF_OUT, 417 ifp, &m) != PF_PASS) { 418 error = EACCES; 419 m_freem(m); 420 goto done; 421 } 422 if (m == NULL) 423 goto done; 424 ip = mtod(m, struct ip *); 425 hlen = ip->ip_hl << 2; 426 if ((m->m_pkthdr.pf.flags & (PF_TAG_REROUTE | PF_TAG_GENERATED)) == 427 (PF_TAG_REROUTE | PF_TAG_GENERATED)) 428 /* already rerun the route lookup, go on */ 429 m->m_pkthdr.pf.flags &= ~(PF_TAG_GENERATED | PF_TAG_REROUTE); 430 else if (m->m_pkthdr.pf.flags & PF_TAG_REROUTE) { 431 /* tag as generated to skip over pf_test on rerun */ 432 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 433 ro = NULL; 434 if_put(ifp); /* drop reference since target changed */ 435 ifp = NULL; 436 goto reroute; 437 } 438 #endif 439 in_proto_cksum_out(m, ifp); 440 441 #ifdef IPSEC 442 if (ipsec_in_use && (flags & IP_FORWARDING) && (ipforwarding == 2) && 443 (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) == NULL)) { 444 error = EHOSTUNREACH; 445 m_freem(m); 446 goto done; 447 } 448 #endif 449 450 /* 451 * If small enough for interface, can just send directly. 452 */ 453 if (ntohs(ip->ip_len) <= mtu) { 454 ip->ip_sum = 0; 455 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && 456 (ifp->if_bridgeport == NULL)) 457 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 458 else { 459 ipstat_inc(ips_outswcsum); 460 ip->ip_sum = in_cksum(m, hlen); 461 } 462 463 error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); 464 goto done; 465 } 466 467 /* 468 * Too large for interface; fragment if possible. 469 * Must be able to put at least 8 bytes per fragment. 470 */ 471 if (ip->ip_off & htons(IP_DF)) { 472 #ifdef IPSEC 473 if (ip_mtudisc) 474 ipsec_adjust_mtu(m, ifp->if_mtu); 475 #endif 476 error = EMSGSIZE; 477 /* 478 * This case can happen if the user changed the MTU 479 * of an interface after enabling IP on it. Because 480 * most netifs don't keep track of routes pointing to 481 * them, there is no way for one to update all its 482 * routes when the MTU is changed. 483 */ 484 if (rtisvalid(ro->ro_rt) && 485 ISSET(ro->ro_rt->rt_flags, RTF_HOST) && 486 !(ro->ro_rt->rt_locks & RTV_MTU) && 487 (ro->ro_rt->rt_mtu > ifp->if_mtu)) { 488 ro->ro_rt->rt_mtu = ifp->if_mtu; 489 } 490 ipstat_inc(ips_cantfrag); 491 goto bad; 492 } 493 494 error = ip_fragment(m, ifp, mtu); 495 if (error) { 496 m = m0 = NULL; 497 goto bad; 498 } 499 500 for (; m; m = m0) { 501 m0 = m->m_nextpkt; 502 m->m_nextpkt = 0; 503 if (error == 0) 504 error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); 505 else 506 m_freem(m); 507 } 508 509 if (error == 0) 510 ipstat_inc(ips_fragmented); 511 512 done: 513 if (ro == &iproute && ro->ro_rt) 514 rtfree(ro->ro_rt); 515 if_put(ifp); 516 return (error); 517 bad: 518 m_freem(m0); 519 goto done; 520 } 521 522 #ifdef IPSEC 523 struct tdb * 524 ip_output_ipsec_lookup(struct mbuf *m, int hlen, int *error, struct inpcb *inp, 525 int ipsecflowinfo) 526 { 527 struct m_tag *mtag; 528 struct tdb_ident *tdbi; 529 struct tdb *tdb; 530 531 /* Do we have any pending SAs to apply ? */ 532 tdb = ipsp_spd_lookup(m, AF_INET, hlen, error, IPSP_DIRECTION_OUT, 533 NULL, inp, ipsecflowinfo); 534 if (tdb == NULL) 535 return NULL; 536 /* Loop detection */ 537 for (mtag = m_tag_first(m); mtag != NULL; mtag = m_tag_next(m, mtag)) { 538 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE) 539 continue; 540 tdbi = (struct tdb_ident *)(mtag + 1); 541 if (tdbi->spi == tdb->tdb_spi && 542 tdbi->proto == tdb->tdb_sproto && 543 tdbi->rdomain == tdb->tdb_rdomain && 544 !memcmp(&tdbi->dst, &tdb->tdb_dst, 545 sizeof(union sockaddr_union))) { 546 /* no IPsec needed */ 547 return NULL; 548 } 549 } 550 return tdb; 551 } 552 553 int 554 ip_output_ipsec_send(struct tdb *tdb, struct mbuf *m, struct route *ro, int fwd) 555 { 556 #if NPF > 0 557 struct ifnet *encif; 558 #endif 559 struct ip *ip; 560 561 #if NPF > 0 562 /* 563 * Packet filter 564 */ 565 if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL || 566 pf_test(AF_INET, fwd ? PF_FWD : PF_OUT, encif, &m) != PF_PASS) { 567 m_freem(m); 568 return EACCES; 569 } 570 if (m == NULL) 571 return 0; 572 /* 573 * PF_TAG_REROUTE handling or not... 574 * Packet is entering IPsec so the routing is 575 * already overruled by the IPsec policy. 576 * Until now the change was not reconsidered. 577 * What's the behaviour? 578 */ 579 in_proto_cksum_out(m, encif); 580 #endif 581 582 /* Check if we are allowed to fragment */ 583 ip = mtod(m, struct ip *); 584 if (ip_mtudisc && (ip->ip_off & htons(IP_DF)) && tdb->tdb_mtu && 585 ntohs(ip->ip_len) > tdb->tdb_mtu && 586 tdb->tdb_mtutimeout > time_second) { 587 struct rtentry *rt = NULL; 588 int rt_mtucloned = 0; 589 int transportmode = 0; 590 591 transportmode = (tdb->tdb_dst.sa.sa_family == AF_INET) && 592 (tdb->tdb_dst.sin.sin_addr.s_addr == ip->ip_dst.s_addr); 593 594 /* Find a host route to store the mtu in */ 595 if (ro != NULL) 596 rt = ro->ro_rt; 597 /* but don't add a PMTU route for transport mode SAs */ 598 if (transportmode) 599 rt = NULL; 600 else if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) { 601 rt = icmp_mtudisc_clone(ip->ip_dst, 602 m->m_pkthdr.ph_rtableid); 603 rt_mtucloned = 1; 604 } 605 DPRINTF(("%s: spi %08x mtu %d rt %p cloned %d\n", __func__, 606 ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned)); 607 if (rt != NULL) { 608 rt->rt_mtu = tdb->tdb_mtu; 609 if (ro && ro->ro_rt != NULL) { 610 rtfree(ro->ro_rt); 611 ro->ro_rt = rtalloc(&ro->ro_dst, RT_RESOLVE, 612 m->m_pkthdr.ph_rtableid); 613 } 614 if (rt_mtucloned) 615 rtfree(rt); 616 } 617 ipsec_adjust_mtu(m, tdb->tdb_mtu); 618 m_freem(m); 619 return EMSGSIZE; 620 } 621 622 /* 623 * Clear these -- they'll be set in the recursive invocation 624 * as needed. 625 */ 626 m->m_flags &= ~(M_MCAST | M_BCAST); 627 628 /* Callee frees mbuf */ 629 return ipsp_process_packet(m, tdb, AF_INET, 0); 630 } 631 #endif /* IPSEC */ 632 633 int 634 ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu) 635 { 636 struct ip *ip, *mhip; 637 struct mbuf *m0; 638 int len, hlen, off; 639 int mhlen, firstlen; 640 struct mbuf **mnext; 641 int fragments = 0; 642 int error = 0; 643 644 ip = mtod(m, struct ip *); 645 hlen = ip->ip_hl << 2; 646 647 len = (mtu - hlen) &~ 7; 648 if (len < 8) { 649 m_freem(m); 650 return (EMSGSIZE); 651 } 652 653 /* 654 * If we are doing fragmentation, we can't defer TCP/UDP 655 * checksumming; compute the checksum and clear the flag. 656 */ 657 in_proto_cksum_out(m, NULL); 658 firstlen = len; 659 mnext = &m->m_nextpkt; 660 661 /* 662 * Loop through length of segment after first fragment, 663 * make new header and copy data of each part and link onto chain. 664 */ 665 m0 = m; 666 mhlen = sizeof (struct ip); 667 for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { 668 MGETHDR(m, M_DONTWAIT, MT_HEADER); 669 if (m == NULL) { 670 ipstat_inc(ips_odropped); 671 error = ENOBUFS; 672 goto sendorfree; 673 } 674 *mnext = m; 675 mnext = &m->m_nextpkt; 676 m->m_data += max_linkhdr; 677 mhip = mtod(m, struct ip *); 678 *mhip = *ip; 679 /* we must inherit MCAST/BCAST flags, routing table and prio */ 680 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST); 681 m->m_pkthdr.ph_rtableid = m0->m_pkthdr.ph_rtableid; 682 m->m_pkthdr.pf.prio = m0->m_pkthdr.pf.prio; 683 if (hlen > sizeof (struct ip)) { 684 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 685 mhip->ip_hl = mhlen >> 2; 686 } 687 m->m_len = mhlen; 688 mhip->ip_off = ((off - hlen) >> 3) + 689 (ntohs(ip->ip_off) & ~IP_MF); 690 if (ip->ip_off & htons(IP_MF)) 691 mhip->ip_off |= IP_MF; 692 if (off + len >= ntohs(ip->ip_len)) 693 len = ntohs(ip->ip_len) - off; 694 else 695 mhip->ip_off |= IP_MF; 696 mhip->ip_len = htons((u_int16_t)(len + mhlen)); 697 m->m_next = m_copym(m0, off, len, M_NOWAIT); 698 if (m->m_next == 0) { 699 ipstat_inc(ips_odropped); 700 error = ENOBUFS; 701 goto sendorfree; 702 } 703 m->m_pkthdr.len = mhlen + len; 704 m->m_pkthdr.ph_ifidx = 0; 705 mhip->ip_off = htons((u_int16_t)mhip->ip_off); 706 mhip->ip_sum = 0; 707 if ((ifp != NULL) && 708 (ifp->if_capabilities & IFCAP_CSUM_IPv4) && 709 (ifp->if_bridgeport == NULL)) 710 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 711 else { 712 ipstat_inc(ips_outswcsum); 713 mhip->ip_sum = in_cksum(m, mhlen); 714 } 715 ipstat_inc(ips_ofragments); 716 fragments++; 717 } 718 /* 719 * Update first fragment by trimming what's been copied out 720 * and updating header, then send each fragment (in order). 721 */ 722 m = m0; 723 m_adj(m, hlen + firstlen - ntohs(ip->ip_len)); 724 m->m_pkthdr.len = hlen + firstlen; 725 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len); 726 ip->ip_off |= htons(IP_MF); 727 ip->ip_sum = 0; 728 if ((ifp != NULL) && 729 (ifp->if_capabilities & IFCAP_CSUM_IPv4) && 730 (ifp->if_bridgeport == NULL)) 731 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 732 else { 733 ipstat_inc(ips_outswcsum); 734 ip->ip_sum = in_cksum(m, hlen); 735 } 736 sendorfree: 737 if (error) { 738 for (m = m0; m; m = m0) { 739 m0 = m->m_nextpkt; 740 m->m_nextpkt = NULL; 741 m_freem(m); 742 } 743 } 744 745 return (error); 746 } 747 748 /* 749 * Insert IP options into preformed packet. 750 * Adjust IP destination as required for IP source routing, 751 * as indicated by a non-zero in_addr at the start of the options. 752 */ 753 struct mbuf * 754 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) 755 { 756 struct ipoption *p = mtod(opt, struct ipoption *); 757 struct mbuf *n; 758 struct ip *ip = mtod(m, struct ip *); 759 unsigned int optlen; 760 761 optlen = opt->m_len - sizeof(p->ipopt_dst); 762 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) 763 return (m); /* XXX should fail */ 764 if (p->ipopt_dst.s_addr) 765 ip->ip_dst = p->ipopt_dst; 766 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 767 MGETHDR(n, M_DONTWAIT, MT_HEADER); 768 if (n == NULL) 769 return (m); 770 M_MOVE_HDR(n, m); 771 n->m_pkthdr.len += optlen; 772 m->m_len -= sizeof(struct ip); 773 m->m_data += sizeof(struct ip); 774 n->m_next = m; 775 m = n; 776 m->m_len = optlen + sizeof(struct ip); 777 m->m_data += max_linkhdr; 778 memcpy(mtod(m, caddr_t), ip, sizeof(struct ip)); 779 } else { 780 m->m_data -= optlen; 781 m->m_len += optlen; 782 m->m_pkthdr.len += optlen; 783 memmove(mtod(m, caddr_t), (caddr_t)ip, sizeof(struct ip)); 784 } 785 ip = mtod(m, struct ip *); 786 memcpy(ip + 1, p->ipopt_list, optlen); 787 *phlen = sizeof(struct ip) + optlen; 788 ip->ip_len = htons(ntohs(ip->ip_len) + optlen); 789 return (m); 790 } 791 792 /* 793 * Copy options from ip to jp, 794 * omitting those not copied during fragmentation. 795 */ 796 int 797 ip_optcopy(struct ip *ip, struct ip *jp) 798 { 799 u_char *cp, *dp; 800 int opt, optlen, cnt; 801 802 cp = (u_char *)(ip + 1); 803 dp = (u_char *)(jp + 1); 804 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 805 for (; cnt > 0; cnt -= optlen, cp += optlen) { 806 opt = cp[0]; 807 if (opt == IPOPT_EOL) 808 break; 809 if (opt == IPOPT_NOP) { 810 /* Preserve for IP mcast tunnel's LSRR alignment. */ 811 *dp++ = IPOPT_NOP; 812 optlen = 1; 813 continue; 814 } 815 #ifdef DIAGNOSTIC 816 if (cnt < IPOPT_OLEN + sizeof(*cp)) 817 panic("malformed IPv4 option passed to ip_optcopy"); 818 #endif 819 optlen = cp[IPOPT_OLEN]; 820 #ifdef DIAGNOSTIC 821 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 822 panic("malformed IPv4 option passed to ip_optcopy"); 823 #endif 824 /* bogus lengths should have been caught by ip_dooptions */ 825 if (optlen > cnt) 826 optlen = cnt; 827 if (IPOPT_COPIED(opt)) { 828 memcpy(dp, cp, optlen); 829 dp += optlen; 830 } 831 } 832 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 833 *dp++ = IPOPT_EOL; 834 return (optlen); 835 } 836 837 /* 838 * IP socket option processing. 839 */ 840 int 841 ip_ctloutput(int op, struct socket *so, int level, int optname, 842 struct mbuf *m) 843 { 844 struct inpcb *inp = sotoinpcb(so); 845 int optval = 0; 846 struct proc *p = curproc; /* XXX */ 847 int error = 0; 848 u_int rtid = 0; 849 850 if (level != IPPROTO_IP) { 851 error = EINVAL; 852 } else switch (op) { 853 case PRCO_SETOPT: 854 switch (optname) { 855 case IP_OPTIONS: 856 return (ip_pcbopts(&inp->inp_options, m)); 857 858 case IP_TOS: 859 case IP_TTL: 860 case IP_MINTTL: 861 case IP_RECVOPTS: 862 case IP_RECVRETOPTS: 863 case IP_RECVDSTADDR: 864 case IP_RECVIF: 865 case IP_RECVTTL: 866 case IP_RECVDSTPORT: 867 case IP_RECVRTABLE: 868 case IP_IPSECFLOWINFO: 869 if (m == NULL || m->m_len != sizeof(int)) 870 error = EINVAL; 871 else { 872 optval = *mtod(m, int *); 873 switch (optname) { 874 875 case IP_TOS: 876 inp->inp_ip.ip_tos = optval; 877 break; 878 879 case IP_TTL: 880 if (optval > 0 && optval <= MAXTTL) 881 inp->inp_ip.ip_ttl = optval; 882 else if (optval == -1) 883 inp->inp_ip.ip_ttl = ip_defttl; 884 else 885 error = EINVAL; 886 break; 887 888 case IP_MINTTL: 889 if (optval >= 0 && optval <= MAXTTL) 890 inp->inp_ip_minttl = optval; 891 else 892 error = EINVAL; 893 break; 894 #define OPTSET(bit) \ 895 if (optval) \ 896 inp->inp_flags |= bit; \ 897 else \ 898 inp->inp_flags &= ~bit; 899 900 case IP_RECVOPTS: 901 OPTSET(INP_RECVOPTS); 902 break; 903 904 case IP_RECVRETOPTS: 905 OPTSET(INP_RECVRETOPTS); 906 break; 907 908 case IP_RECVDSTADDR: 909 OPTSET(INP_RECVDSTADDR); 910 break; 911 case IP_RECVIF: 912 OPTSET(INP_RECVIF); 913 break; 914 case IP_RECVTTL: 915 OPTSET(INP_RECVTTL); 916 break; 917 case IP_RECVDSTPORT: 918 OPTSET(INP_RECVDSTPORT); 919 break; 920 case IP_RECVRTABLE: 921 OPTSET(INP_RECVRTABLE); 922 break; 923 case IP_IPSECFLOWINFO: 924 OPTSET(INP_IPSECFLOWINFO); 925 break; 926 } 927 } 928 break; 929 #undef OPTSET 930 931 case IP_MULTICAST_IF: 932 case IP_MULTICAST_TTL: 933 case IP_MULTICAST_LOOP: 934 case IP_ADD_MEMBERSHIP: 935 case IP_DROP_MEMBERSHIP: 936 error = ip_setmoptions(optname, &inp->inp_moptions, m, 937 inp->inp_rtableid); 938 break; 939 940 case IP_PORTRANGE: 941 if (m == NULL || m->m_len != sizeof(int)) 942 error = EINVAL; 943 else { 944 optval = *mtod(m, int *); 945 946 switch (optval) { 947 948 case IP_PORTRANGE_DEFAULT: 949 inp->inp_flags &= ~(INP_LOWPORT); 950 inp->inp_flags &= ~(INP_HIGHPORT); 951 break; 952 953 case IP_PORTRANGE_HIGH: 954 inp->inp_flags &= ~(INP_LOWPORT); 955 inp->inp_flags |= INP_HIGHPORT; 956 break; 957 958 case IP_PORTRANGE_LOW: 959 inp->inp_flags &= ~(INP_HIGHPORT); 960 inp->inp_flags |= INP_LOWPORT; 961 break; 962 963 default: 964 965 error = EINVAL; 966 break; 967 } 968 } 969 break; 970 case IP_AUTH_LEVEL: 971 case IP_ESP_TRANS_LEVEL: 972 case IP_ESP_NETWORK_LEVEL: 973 case IP_IPCOMP_LEVEL: 974 #ifndef IPSEC 975 error = EOPNOTSUPP; 976 #else 977 if (m == NULL || m->m_len != sizeof(int)) { 978 error = EINVAL; 979 break; 980 } 981 optval = *mtod(m, int *); 982 983 if (optval < IPSEC_LEVEL_BYPASS || 984 optval > IPSEC_LEVEL_UNIQUE) { 985 error = EINVAL; 986 break; 987 } 988 989 switch (optname) { 990 case IP_AUTH_LEVEL: 991 if (optval < IPSEC_AUTH_LEVEL_DEFAULT && 992 suser(p, 0)) { 993 error = EACCES; 994 break; 995 } 996 inp->inp_seclevel[SL_AUTH] = optval; 997 break; 998 999 case IP_ESP_TRANS_LEVEL: 1000 if (optval < IPSEC_ESP_TRANS_LEVEL_DEFAULT && 1001 suser(p, 0)) { 1002 error = EACCES; 1003 break; 1004 } 1005 inp->inp_seclevel[SL_ESP_TRANS] = optval; 1006 break; 1007 1008 case IP_ESP_NETWORK_LEVEL: 1009 if (optval < IPSEC_ESP_NETWORK_LEVEL_DEFAULT && 1010 suser(p, 0)) { 1011 error = EACCES; 1012 break; 1013 } 1014 inp->inp_seclevel[SL_ESP_NETWORK] = optval; 1015 break; 1016 case IP_IPCOMP_LEVEL: 1017 if (optval < IPSEC_IPCOMP_LEVEL_DEFAULT && 1018 suser(p, 0)) { 1019 error = EACCES; 1020 break; 1021 } 1022 inp->inp_seclevel[SL_IPCOMP] = optval; 1023 break; 1024 } 1025 #endif 1026 break; 1027 1028 case IP_IPSEC_LOCAL_ID: 1029 case IP_IPSEC_REMOTE_ID: 1030 error = EOPNOTSUPP; 1031 break; 1032 case SO_RTABLE: 1033 if (m == NULL || m->m_len < sizeof(u_int)) { 1034 error = EINVAL; 1035 break; 1036 } 1037 rtid = *mtod(m, u_int *); 1038 if (inp->inp_rtableid == rtid) 1039 break; 1040 /* needs privileges to switch when already set */ 1041 if (p->p_p->ps_rtableid != rtid && 1042 p->p_p->ps_rtableid != 0 && 1043 (error = suser(p, 0)) != 0) 1044 break; 1045 /* table must exist */ 1046 if (!rtable_exists(rtid)) { 1047 error = EINVAL; 1048 break; 1049 } 1050 if (inp->inp_lport) { 1051 error = EBUSY; 1052 break; 1053 } 1054 inp->inp_rtableid = rtid; 1055 in_pcbrehash(inp); 1056 break; 1057 case IP_PIPEX: 1058 if (m != NULL && m->m_len == sizeof(int)) 1059 inp->inp_pipex = *mtod(m, int *); 1060 else 1061 error = EINVAL; 1062 break; 1063 1064 default: 1065 error = ENOPROTOOPT; 1066 break; 1067 } 1068 break; 1069 1070 case PRCO_GETOPT: 1071 switch (optname) { 1072 case IP_OPTIONS: 1073 case IP_RETOPTS: 1074 if (inp->inp_options) { 1075 m->m_len = inp->inp_options->m_len; 1076 memcpy(mtod(m, caddr_t), 1077 mtod(inp->inp_options, caddr_t), m->m_len); 1078 } else 1079 m->m_len = 0; 1080 break; 1081 1082 case IP_TOS: 1083 case IP_TTL: 1084 case IP_MINTTL: 1085 case IP_RECVOPTS: 1086 case IP_RECVRETOPTS: 1087 case IP_RECVDSTADDR: 1088 case IP_RECVIF: 1089 case IP_RECVTTL: 1090 case IP_RECVDSTPORT: 1091 case IP_RECVRTABLE: 1092 case IP_IPSECFLOWINFO: 1093 case IP_IPDEFTTL: 1094 m->m_len = sizeof(int); 1095 switch (optname) { 1096 1097 case IP_TOS: 1098 optval = inp->inp_ip.ip_tos; 1099 break; 1100 1101 case IP_TTL: 1102 optval = inp->inp_ip.ip_ttl; 1103 break; 1104 1105 case IP_MINTTL: 1106 optval = inp->inp_ip_minttl; 1107 break; 1108 1109 case IP_IPDEFTTL: 1110 optval = ip_defttl; 1111 break; 1112 1113 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1114 1115 case IP_RECVOPTS: 1116 optval = OPTBIT(INP_RECVOPTS); 1117 break; 1118 1119 case IP_RECVRETOPTS: 1120 optval = OPTBIT(INP_RECVRETOPTS); 1121 break; 1122 1123 case IP_RECVDSTADDR: 1124 optval = OPTBIT(INP_RECVDSTADDR); 1125 break; 1126 case IP_RECVIF: 1127 optval = OPTBIT(INP_RECVIF); 1128 break; 1129 case IP_RECVTTL: 1130 optval = OPTBIT(INP_RECVTTL); 1131 break; 1132 case IP_RECVDSTPORT: 1133 optval = OPTBIT(INP_RECVDSTPORT); 1134 break; 1135 case IP_RECVRTABLE: 1136 optval = OPTBIT(INP_RECVRTABLE); 1137 break; 1138 case IP_IPSECFLOWINFO: 1139 optval = OPTBIT(INP_IPSECFLOWINFO); 1140 break; 1141 } 1142 *mtod(m, int *) = optval; 1143 break; 1144 1145 case IP_MULTICAST_IF: 1146 case IP_MULTICAST_TTL: 1147 case IP_MULTICAST_LOOP: 1148 case IP_ADD_MEMBERSHIP: 1149 case IP_DROP_MEMBERSHIP: 1150 error = ip_getmoptions(optname, inp->inp_moptions, m); 1151 break; 1152 1153 case IP_PORTRANGE: 1154 m->m_len = sizeof(int); 1155 1156 if (inp->inp_flags & INP_HIGHPORT) 1157 optval = IP_PORTRANGE_HIGH; 1158 else if (inp->inp_flags & INP_LOWPORT) 1159 optval = IP_PORTRANGE_LOW; 1160 else 1161 optval = 0; 1162 1163 *mtod(m, int *) = optval; 1164 break; 1165 1166 case IP_AUTH_LEVEL: 1167 case IP_ESP_TRANS_LEVEL: 1168 case IP_ESP_NETWORK_LEVEL: 1169 case IP_IPCOMP_LEVEL: 1170 #ifndef IPSEC 1171 m->m_len = sizeof(int); 1172 *mtod(m, int *) = IPSEC_LEVEL_NONE; 1173 #else 1174 m->m_len = sizeof(int); 1175 switch (optname) { 1176 case IP_AUTH_LEVEL: 1177 optval = inp->inp_seclevel[SL_AUTH]; 1178 break; 1179 1180 case IP_ESP_TRANS_LEVEL: 1181 optval = inp->inp_seclevel[SL_ESP_TRANS]; 1182 break; 1183 1184 case IP_ESP_NETWORK_LEVEL: 1185 optval = inp->inp_seclevel[SL_ESP_NETWORK]; 1186 break; 1187 case IP_IPCOMP_LEVEL: 1188 optval = inp->inp_seclevel[SL_IPCOMP]; 1189 break; 1190 } 1191 *mtod(m, int *) = optval; 1192 #endif 1193 break; 1194 case IP_IPSEC_LOCAL_ID: 1195 case IP_IPSEC_REMOTE_ID: 1196 error = EOPNOTSUPP; 1197 break; 1198 case SO_RTABLE: 1199 m->m_len = sizeof(u_int); 1200 *mtod(m, u_int *) = inp->inp_rtableid; 1201 break; 1202 case IP_PIPEX: 1203 m->m_len = sizeof(int); 1204 *mtod(m, int *) = inp->inp_pipex; 1205 break; 1206 default: 1207 error = ENOPROTOOPT; 1208 break; 1209 } 1210 break; 1211 } 1212 return (error); 1213 } 1214 1215 /* 1216 * Set up IP options in pcb for insertion in output packets. 1217 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1218 * with destination address if source routed. 1219 */ 1220 int 1221 ip_pcbopts(struct mbuf **pcbopt, struct mbuf *m) 1222 { 1223 int cnt, optlen; 1224 u_char *cp; 1225 u_char opt; 1226 1227 /* turn off any old options */ 1228 m_free(*pcbopt); 1229 *pcbopt = 0; 1230 if (m == NULL || m->m_len == 0) { 1231 /* 1232 * Only turning off any previous options. 1233 */ 1234 return (0); 1235 } 1236 1237 if (m->m_len % sizeof(int32_t)) 1238 return (EINVAL); 1239 1240 /* 1241 * IP first-hop destination address will be stored before 1242 * actual options; move other options back 1243 * and clear it when none present. 1244 */ 1245 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1246 return (EINVAL); 1247 cnt = m->m_len; 1248 m->m_len += sizeof(struct in_addr); 1249 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1250 memmove((caddr_t)cp, mtod(m, caddr_t), (unsigned)cnt); 1251 memset(mtod(m, caddr_t), 0, sizeof(struct in_addr)); 1252 1253 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1254 opt = cp[IPOPT_OPTVAL]; 1255 if (opt == IPOPT_EOL) 1256 break; 1257 if (opt == IPOPT_NOP) 1258 optlen = 1; 1259 else { 1260 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1261 return (EINVAL); 1262 optlen = cp[IPOPT_OLEN]; 1263 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1264 return (EINVAL); 1265 } 1266 switch (opt) { 1267 1268 default: 1269 break; 1270 1271 case IPOPT_LSRR: 1272 case IPOPT_SSRR: 1273 /* 1274 * user process specifies route as: 1275 * ->A->B->C->D 1276 * D must be our final destination (but we can't 1277 * check that since we may not have connected yet). 1278 * A is first hop destination, which doesn't appear in 1279 * actual IP option, but is stored before the options. 1280 */ 1281 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1282 return (EINVAL); 1283 m->m_len -= sizeof(struct in_addr); 1284 cnt -= sizeof(struct in_addr); 1285 optlen -= sizeof(struct in_addr); 1286 cp[IPOPT_OLEN] = optlen; 1287 /* 1288 * Move first hop before start of options. 1289 */ 1290 memcpy(mtod(m, caddr_t), &cp[IPOPT_OFFSET+1], 1291 sizeof(struct in_addr)); 1292 /* 1293 * Then copy rest of options back 1294 * to close up the deleted entry. 1295 */ 1296 memmove((caddr_t)&cp[IPOPT_OFFSET+1], 1297 (caddr_t)(&cp[IPOPT_OFFSET+1] + 1298 sizeof(struct in_addr)), 1299 (unsigned)cnt - (IPOPT_OFFSET+1)); 1300 break; 1301 } 1302 } 1303 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1304 return (EINVAL); 1305 *pcbopt = m_copym(m, 0, M_COPYALL, M_NOWAIT); 1306 if (*pcbopt == NULL) 1307 return (ENOBUFS); 1308 1309 return (0); 1310 } 1311 1312 /* 1313 * Set the IP multicast options in response to user setsockopt(). 1314 */ 1315 int 1316 ip_setmoptions(int optname, struct ip_moptions **imop, struct mbuf *m, 1317 u_int rtableid) 1318 { 1319 struct in_addr addr; 1320 struct in_ifaddr *ia; 1321 struct ip_mreq *mreq; 1322 struct ifnet *ifp = NULL; 1323 struct ip_moptions *imo = *imop; 1324 struct in_multi **immp; 1325 struct rtentry *rt; 1326 struct sockaddr_in sin; 1327 int i, error = 0; 1328 u_char loop; 1329 1330 if (imo == NULL) { 1331 /* 1332 * No multicast option buffer attached to the pcb; 1333 * allocate one and initialize to default values. 1334 */ 1335 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK|M_ZERO); 1336 immp = (struct in_multi **)malloc( 1337 (sizeof(*immp) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 1338 M_WAITOK|M_ZERO); 1339 *imop = imo; 1340 imo->imo_ifidx = 0; 1341 imo->imo_ttl = IP_DEFAULT_MULTICAST_TTL; 1342 imo->imo_loop = IP_DEFAULT_MULTICAST_LOOP; 1343 imo->imo_num_memberships = 0; 1344 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1345 imo->imo_membership = immp; 1346 } 1347 1348 switch (optname) { 1349 1350 case IP_MULTICAST_IF: 1351 /* 1352 * Select the interface for outgoing multicast packets. 1353 */ 1354 if (m == NULL || m->m_len != sizeof(struct in_addr)) { 1355 error = EINVAL; 1356 break; 1357 } 1358 addr = *(mtod(m, struct in_addr *)); 1359 /* 1360 * INADDR_ANY is used to remove a previous selection. 1361 * When no interface is selected, a default one is 1362 * chosen every time a multicast packet is sent. 1363 */ 1364 if (addr.s_addr == INADDR_ANY) { 1365 imo->imo_ifidx = 0; 1366 break; 1367 } 1368 /* 1369 * The selected interface is identified by its local 1370 * IP address. Find the interface and confirm that 1371 * it supports multicasting. 1372 */ 1373 memset(&sin, 0, sizeof(sin)); 1374 sin.sin_len = sizeof(sin); 1375 sin.sin_family = AF_INET; 1376 sin.sin_addr = addr; 1377 ia = ifatoia(ifa_ifwithaddr(sintosa(&sin), rtableid)); 1378 if (ia == NULL || 1379 (ia->ia_ifp->if_flags & IFF_MULTICAST) == 0) { 1380 error = EADDRNOTAVAIL; 1381 break; 1382 } 1383 imo->imo_ifidx = ia->ia_ifp->if_index; 1384 break; 1385 1386 case IP_MULTICAST_TTL: 1387 /* 1388 * Set the IP time-to-live for outgoing multicast packets. 1389 */ 1390 if (m == NULL || m->m_len != 1) { 1391 error = EINVAL; 1392 break; 1393 } 1394 imo->imo_ttl = *(mtod(m, u_char *)); 1395 break; 1396 1397 case IP_MULTICAST_LOOP: 1398 /* 1399 * Set the loopback flag for outgoing multicast packets. 1400 * Must be zero or one. 1401 */ 1402 if (m == NULL || m->m_len != 1 || 1403 (loop = *(mtod(m, u_char *))) > 1) { 1404 error = EINVAL; 1405 break; 1406 } 1407 imo->imo_loop = loop; 1408 break; 1409 1410 case IP_ADD_MEMBERSHIP: 1411 /* 1412 * Add a multicast group membership. 1413 * Group must be a valid IP multicast address. 1414 */ 1415 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1416 error = EINVAL; 1417 break; 1418 } 1419 mreq = mtod(m, struct ip_mreq *); 1420 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1421 error = EINVAL; 1422 break; 1423 } 1424 /* 1425 * If no interface address was provided, use the interface of 1426 * the route to the given multicast address. 1427 */ 1428 if (mreq->imr_interface.s_addr == INADDR_ANY) { 1429 memset(&sin, 0, sizeof(sin)); 1430 sin.sin_len = sizeof(sin); 1431 sin.sin_family = AF_INET; 1432 sin.sin_addr = mreq->imr_multiaddr; 1433 rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid); 1434 if (!rtisvalid(rt)) { 1435 rtfree(rt); 1436 error = EADDRNOTAVAIL; 1437 break; 1438 } 1439 } else { 1440 memset(&sin, 0, sizeof(sin)); 1441 sin.sin_len = sizeof(sin); 1442 sin.sin_family = AF_INET; 1443 sin.sin_addr = mreq->imr_interface; 1444 rt = rtalloc(sintosa(&sin), 0, rtableid); 1445 if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) { 1446 rtfree(rt); 1447 error = EADDRNOTAVAIL; 1448 break; 1449 } 1450 } 1451 ifp = if_get(rt->rt_ifidx); 1452 rtfree(rt); 1453 1454 /* 1455 * See if we found an interface, and confirm that it 1456 * supports multicast. 1457 */ 1458 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1459 error = EADDRNOTAVAIL; 1460 if_put(ifp); 1461 break; 1462 } 1463 /* 1464 * See if the membership already exists or if all the 1465 * membership slots are full. 1466 */ 1467 for (i = 0; i < imo->imo_num_memberships; ++i) { 1468 if (imo->imo_membership[i]->inm_ifidx 1469 == ifp->if_index && 1470 imo->imo_membership[i]->inm_addr.s_addr 1471 == mreq->imr_multiaddr.s_addr) 1472 break; 1473 } 1474 if (i < imo->imo_num_memberships) { 1475 error = EADDRINUSE; 1476 if_put(ifp); 1477 break; 1478 } 1479 if (imo->imo_num_memberships == imo->imo_max_memberships) { 1480 struct in_multi **nmships, **omships; 1481 size_t newmax; 1482 /* 1483 * Resize the vector to next power-of-two minus 1. If the 1484 * size would exceed the maximum then we know we've really 1485 * run out of entries. Otherwise, we reallocate the vector. 1486 */ 1487 nmships = NULL; 1488 omships = imo->imo_membership; 1489 newmax = ((imo->imo_max_memberships + 1) * 2) - 1; 1490 if (newmax <= IP_MAX_MEMBERSHIPS) { 1491 nmships = (struct in_multi **)mallocarray( 1492 newmax, sizeof(*nmships), M_IPMOPTS, 1493 M_NOWAIT|M_ZERO); 1494 if (nmships != NULL) { 1495 memcpy(nmships, omships, 1496 sizeof(*omships) * 1497 imo->imo_max_memberships); 1498 free(omships, M_IPMOPTS, 1499 sizeof(*omships) * 1500 imo->imo_max_memberships); 1501 imo->imo_membership = nmships; 1502 imo->imo_max_memberships = newmax; 1503 } 1504 } 1505 if (nmships == NULL) { 1506 error = ENOBUFS; 1507 if_put(ifp); 1508 break; 1509 } 1510 } 1511 /* 1512 * Everything looks good; add a new record to the multicast 1513 * address list for the given interface. 1514 */ 1515 if ((imo->imo_membership[i] = 1516 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) { 1517 error = ENOBUFS; 1518 if_put(ifp); 1519 break; 1520 } 1521 ++imo->imo_num_memberships; 1522 if_put(ifp); 1523 break; 1524 1525 case IP_DROP_MEMBERSHIP: 1526 /* 1527 * Drop a multicast group membership. 1528 * Group must be a valid IP multicast address. 1529 */ 1530 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1531 error = EINVAL; 1532 break; 1533 } 1534 mreq = mtod(m, struct ip_mreq *); 1535 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1536 error = EINVAL; 1537 break; 1538 } 1539 /* 1540 * If an interface address was specified, get a pointer 1541 * to its ifnet structure. 1542 */ 1543 if (mreq->imr_interface.s_addr == INADDR_ANY) 1544 ifp = NULL; 1545 else { 1546 memset(&sin, 0, sizeof(sin)); 1547 sin.sin_len = sizeof(sin); 1548 sin.sin_family = AF_INET; 1549 sin.sin_addr = mreq->imr_interface; 1550 ia = ifatoia(ifa_ifwithaddr(sintosa(&sin), rtableid)); 1551 if (ia == NULL) { 1552 error = EADDRNOTAVAIL; 1553 break; 1554 } 1555 ifp = ia->ia_ifp; 1556 } 1557 /* 1558 * Find the membership in the membership array. 1559 */ 1560 for (i = 0; i < imo->imo_num_memberships; ++i) { 1561 if ((ifp == NULL || 1562 imo->imo_membership[i]->inm_ifidx == 1563 ifp->if_index) && 1564 imo->imo_membership[i]->inm_addr.s_addr == 1565 mreq->imr_multiaddr.s_addr) 1566 break; 1567 } 1568 if (i == imo->imo_num_memberships) { 1569 error = EADDRNOTAVAIL; 1570 break; 1571 } 1572 /* 1573 * Give up the multicast address record to which the 1574 * membership points. 1575 */ 1576 in_delmulti(imo->imo_membership[i]); 1577 /* 1578 * Remove the gap in the membership array. 1579 */ 1580 for (++i; i < imo->imo_num_memberships; ++i) 1581 imo->imo_membership[i-1] = imo->imo_membership[i]; 1582 --imo->imo_num_memberships; 1583 break; 1584 1585 default: 1586 error = EOPNOTSUPP; 1587 break; 1588 } 1589 1590 /* 1591 * If all options have default values, no need to keep the data. 1592 */ 1593 if (imo->imo_ifidx == 0 && 1594 imo->imo_ttl == IP_DEFAULT_MULTICAST_TTL && 1595 imo->imo_loop == IP_DEFAULT_MULTICAST_LOOP && 1596 imo->imo_num_memberships == 0) { 1597 free(imo->imo_membership , M_IPMOPTS, 0); 1598 free(*imop, M_IPMOPTS, sizeof(**imop)); 1599 *imop = NULL; 1600 } 1601 1602 return (error); 1603 } 1604 1605 /* 1606 * Return the IP multicast options in response to user getsockopt(). 1607 */ 1608 int 1609 ip_getmoptions(int optname, struct ip_moptions *imo, struct mbuf *m) 1610 { 1611 u_char *ttl; 1612 u_char *loop; 1613 struct in_addr *addr; 1614 struct in_ifaddr *ia; 1615 struct ifnet *ifp; 1616 1617 switch (optname) { 1618 1619 case IP_MULTICAST_IF: 1620 addr = mtod(m, struct in_addr *); 1621 m->m_len = sizeof(struct in_addr); 1622 if (imo == NULL || (ifp = if_get(imo->imo_ifidx)) == NULL) 1623 addr->s_addr = INADDR_ANY; 1624 else { 1625 IFP_TO_IA(ifp, ia); 1626 if_put(ifp); 1627 addr->s_addr = (ia == NULL) ? INADDR_ANY 1628 : ia->ia_addr.sin_addr.s_addr; 1629 } 1630 return (0); 1631 1632 case IP_MULTICAST_TTL: 1633 ttl = mtod(m, u_char *); 1634 m->m_len = 1; 1635 *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL 1636 : imo->imo_ttl; 1637 return (0); 1638 1639 case IP_MULTICAST_LOOP: 1640 loop = mtod(m, u_char *); 1641 m->m_len = 1; 1642 *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP 1643 : imo->imo_loop; 1644 return (0); 1645 1646 default: 1647 return (EOPNOTSUPP); 1648 } 1649 } 1650 1651 /* 1652 * Discard the IP multicast options. 1653 */ 1654 void 1655 ip_freemoptions(struct ip_moptions *imo) 1656 { 1657 int i; 1658 1659 if (imo != NULL) { 1660 for (i = 0; i < imo->imo_num_memberships; ++i) 1661 in_delmulti(imo->imo_membership[i]); 1662 free(imo->imo_membership, M_IPMOPTS, 0); 1663 free(imo, M_IPMOPTS, sizeof(*imo)); 1664 } 1665 } 1666 1667 /* 1668 * Routine called from ip_output() to loop back a copy of an IP multicast 1669 * packet to the input queue of a specified interface. 1670 */ 1671 void 1672 ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst) 1673 { 1674 struct ip *ip; 1675 struct mbuf *copym; 1676 1677 copym = m_dup_pkt(m, max_linkhdr, M_DONTWAIT); 1678 if (copym != NULL) { 1679 /* 1680 * We don't bother to fragment if the IP length is greater 1681 * than the interface's MTU. Can this possibly matter? 1682 */ 1683 ip = mtod(copym, struct ip *); 1684 ip->ip_sum = 0; 1685 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2); 1686 if_input_local(ifp, copym, dst->sin_family); 1687 } 1688 } 1689 1690 /* 1691 * Compute significant parts of the IPv4 checksum pseudo-header 1692 * for use in a delayed TCP/UDP checksum calculation. 1693 */ 1694 static __inline u_int16_t __attribute__((__unused__)) 1695 in_cksum_phdr(u_int32_t src, u_int32_t dst, u_int32_t lenproto) 1696 { 1697 u_int32_t sum; 1698 1699 sum = lenproto + 1700 (u_int16_t)(src >> 16) + 1701 (u_int16_t)(src /*& 0xffff*/) + 1702 (u_int16_t)(dst >> 16) + 1703 (u_int16_t)(dst /*& 0xffff*/); 1704 1705 sum = (u_int16_t)(sum >> 16) + (u_int16_t)(sum /*& 0xffff*/); 1706 1707 if (sum > 0xffff) 1708 sum -= 0xffff; 1709 1710 return (sum); 1711 } 1712 1713 /* 1714 * Process a delayed payload checksum calculation. 1715 */ 1716 void 1717 in_delayed_cksum(struct mbuf *m) 1718 { 1719 struct ip *ip; 1720 u_int16_t csum, offset; 1721 1722 ip = mtod(m, struct ip *); 1723 offset = ip->ip_hl << 2; 1724 csum = in4_cksum(m, 0, offset, m->m_pkthdr.len - offset); 1725 if (csum == 0 && ip->ip_p == IPPROTO_UDP) 1726 csum = 0xffff; 1727 1728 switch (ip->ip_p) { 1729 case IPPROTO_TCP: 1730 offset += offsetof(struct tcphdr, th_sum); 1731 break; 1732 1733 case IPPROTO_UDP: 1734 offset += offsetof(struct udphdr, uh_sum); 1735 break; 1736 1737 case IPPROTO_ICMP: 1738 offset += offsetof(struct icmp, icmp_cksum); 1739 break; 1740 1741 default: 1742 return; 1743 } 1744 1745 if ((offset + sizeof(u_int16_t)) > m->m_len) 1746 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT); 1747 else 1748 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 1749 } 1750 1751 void 1752 in_proto_cksum_out(struct mbuf *m, struct ifnet *ifp) 1753 { 1754 struct ip *ip = mtod(m, struct ip *); 1755 1756 /* some hw and in_delayed_cksum need the pseudo header cksum */ 1757 if (m->m_pkthdr.csum_flags & 1758 (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_ICMP_CSUM_OUT)) { 1759 u_int16_t csum = 0, offset; 1760 1761 offset = ip->ip_hl << 2; 1762 if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) 1763 csum = in_cksum_phdr(ip->ip_src.s_addr, 1764 ip->ip_dst.s_addr, htonl(ntohs(ip->ip_len) - 1765 offset + ip->ip_p)); 1766 if (ip->ip_p == IPPROTO_TCP) 1767 offset += offsetof(struct tcphdr, th_sum); 1768 else if (ip->ip_p == IPPROTO_UDP) 1769 offset += offsetof(struct udphdr, uh_sum); 1770 else if (ip->ip_p == IPPROTO_ICMP) 1771 offset += offsetof(struct icmp, icmp_cksum); 1772 if ((offset + sizeof(u_int16_t)) > m->m_len) 1773 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT); 1774 else 1775 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 1776 } 1777 1778 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) { 1779 if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || 1780 ip->ip_hl != 5 || ifp->if_bridgeport != NULL) { 1781 tcpstat_inc(tcps_outswcsum); 1782 in_delayed_cksum(m); 1783 m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */ 1784 } 1785 } else if (m->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) { 1786 if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_UDPv4) || 1787 ip->ip_hl != 5 || ifp->if_bridgeport != NULL) { 1788 udpstat_inc(udps_outswcsum); 1789 in_delayed_cksum(m); 1790 m->m_pkthdr.csum_flags &= ~M_UDP_CSUM_OUT; /* Clear */ 1791 } 1792 } else if (m->m_pkthdr.csum_flags & M_ICMP_CSUM_OUT) { 1793 in_delayed_cksum(m); 1794 m->m_pkthdr.csum_flags &= ~M_ICMP_CSUM_OUT; /* Clear */ 1795 } 1796 } 1797