1 /* $OpenBSD: ip_output.c,v 1.265 2014/07/12 18:44:23 tedu Exp $ */ 2 /* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 33 */ 34 35 #include "pf.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/mbuf.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/proc.h> 44 #include <sys/kernel.h> 45 46 #include <net/if.h> 47 #include <net/if_enc.h> 48 #include <net/route.h> 49 50 #include <netinet/in.h> 51 #include <netinet/in_systm.h> 52 #include <netinet/ip.h> 53 #include <netinet/in_pcb.h> 54 #include <netinet/in_var.h> 55 #include <netinet/ip_var.h> 56 #include <netinet/ip_icmp.h> 57 #include <netinet/tcp.h> 58 #include <netinet/udp.h> 59 #include <netinet/tcp_timer.h> 60 #include <netinet/tcp_var.h> 61 #include <netinet/udp_var.h> 62 63 #if NPF > 0 64 #include <net/pfvar.h> 65 #endif 66 67 #ifdef IPSEC 68 #ifdef ENCDEBUG 69 #define DPRINTF(x) do { if (encdebug) printf x ; } while (0) 70 #else 71 #define DPRINTF(x) 72 #endif 73 #endif /* IPSEC */ 74 75 struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); 76 void ip_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in *); 77 static __inline u_int16_t __attribute__((__unused__)) 78 in_cksum_phdr(u_int32_t, u_int32_t, u_int32_t); 79 void in_delayed_cksum(struct mbuf *); 80 81 /* 82 * IP output. The packet in mbuf chain m contains a skeletal IP 83 * header (with len, off, ttl, proto, tos, src, dst). 84 * The mbuf chain containing the packet will be freed. 85 * The mbuf opt, if present, will not be freed. 86 */ 87 int 88 ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags, 89 struct ip_moptions *imo, struct inpcb *inp, u_int32_t ipsecflowinfo) 90 { 91 struct ip *ip; 92 struct ifnet *ifp; 93 struct mbuf *m = m0; 94 int hlen = sizeof (struct ip); 95 int len, error = 0; 96 struct route iproute; 97 struct sockaddr_in *dst; 98 struct in_ifaddr *ia; 99 u_int8_t sproto = 0, donerouting = 0; 100 u_long mtu; 101 #ifdef IPSEC 102 u_int32_t icmp_mtu = 0; 103 union sockaddr_union sdst; 104 u_int32_t sspi; 105 struct m_tag *mtag; 106 struct tdb_ident *tdbi; 107 108 struct tdb *tdb; 109 #if NPF > 0 110 struct ifnet *encif; 111 #endif 112 #endif /* IPSEC */ 113 114 #ifdef IPSEC 115 if (inp && (inp->inp_flags & INP_IPV6) != 0) 116 panic("ip_output: IPv6 pcb is passed"); 117 #endif /* IPSEC */ 118 119 #ifdef DIAGNOSTIC 120 if ((m->m_flags & M_PKTHDR) == 0) 121 panic("ip_output no HDR"); 122 #endif 123 if (opt) { 124 m = ip_insertoptions(m, opt, &len); 125 hlen = len; 126 } 127 128 ip = mtod(m, struct ip *); 129 130 /* 131 * Fill in IP header. 132 */ 133 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 134 ip->ip_v = IPVERSION; 135 ip->ip_off &= htons(IP_DF); 136 ip->ip_id = htons(ip_randomid()); 137 ip->ip_hl = hlen >> 2; 138 ipstat.ips_localout++; 139 } else { 140 hlen = ip->ip_hl << 2; 141 } 142 143 /* 144 * We should not send traffic to 0/8 say both Stevens and RFCs 145 * 5735 section 3 and 1122 sections 3.2.1.3 and 3.3.6. 146 */ 147 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == 0) { 148 error = ENETUNREACH; 149 goto bad; 150 } 151 152 /* 153 * If we're missing the IP source address, do a route lookup. We'll 154 * remember this result, in case we don't need to do any IPsec 155 * processing on the packet. We need the source address so we can 156 * do an SPD lookup in IPsec; for most packets, the source address 157 * is set at a higher level protocol. ICMPs and other packets 158 * though (e.g., traceroute) have a source address of zeroes. 159 */ 160 if (ip->ip_src.s_addr == INADDR_ANY) { 161 if (flags & IP_ROUTETOETHER) { 162 error = EINVAL; 163 goto bad; 164 } 165 donerouting = 1; 166 167 if (ro == 0) { 168 ro = &iproute; 169 memset(ro, 0, sizeof(*ro)); 170 } 171 172 dst = satosin(&ro->ro_dst); 173 174 /* 175 * If there is a cached route, check that it is to the same 176 * destination and is still up. If not, free it and try again. 177 */ 178 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 179 dst->sin_addr.s_addr != ip->ip_dst.s_addr || 180 ro->ro_tableid != m->m_pkthdr.ph_rtableid)) { 181 RTFREE(ro->ro_rt); 182 ro->ro_rt = (struct rtentry *)0; 183 } 184 185 if (ro->ro_rt == 0) { 186 dst->sin_family = AF_INET; 187 dst->sin_len = sizeof(*dst); 188 dst->sin_addr = ip->ip_dst; 189 ro->ro_tableid = m->m_pkthdr.ph_rtableid; 190 } 191 192 if ((IN_MULTICAST(ip->ip_dst.s_addr) || 193 (ip->ip_dst.s_addr == INADDR_BROADCAST)) && 194 imo != NULL && imo->imo_multicast_ifp != NULL) { 195 ifp = imo->imo_multicast_ifp; 196 mtu = ifp->if_mtu; 197 IFP_TO_IA(ifp, ia); 198 } else { 199 if (ro->ro_rt == 0) 200 rtalloc_mpath(ro, NULL); 201 202 if (ro->ro_rt == 0) { 203 ipstat.ips_noroute++; 204 error = EHOSTUNREACH; 205 goto bad; 206 } 207 208 ia = ifatoia(ro->ro_rt->rt_ifa); 209 ifp = ro->ro_rt->rt_ifp; 210 if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0) 211 mtu = ifp->if_mtu; 212 ro->ro_rt->rt_use++; 213 214 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 215 dst = satosin(ro->ro_rt->rt_gateway); 216 } 217 218 /* Set the source IP address */ 219 if (!IN_MULTICAST(ip->ip_dst.s_addr)) 220 ip->ip_src = ia->ia_addr.sin_addr; 221 } 222 223 #if NPF > 0 224 reroute: 225 #endif 226 227 #ifdef IPSEC 228 if (!ipsec_in_use && inp == NULL) 229 goto done_spd; 230 231 /* Do we have any pending SAs to apply ? */ 232 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); 233 if (mtag != NULL) { 234 #ifdef DIAGNOSTIC 235 if (mtag->m_tag_len != sizeof (struct tdb_ident)) 236 panic("ip_output: tag of length %hu (should be %zu", 237 mtag->m_tag_len, sizeof (struct tdb_ident)); 238 #endif 239 tdbi = (struct tdb_ident *)(mtag + 1); 240 tdb = gettdb(tdbi->rdomain, 241 tdbi->spi, &tdbi->dst, tdbi->proto); 242 if (tdb == NULL) 243 error = -EINVAL; 244 m_tag_delete(m, mtag); 245 } 246 else 247 tdb = ipsp_spd_lookup(m, AF_INET, hlen, &error, 248 IPSP_DIRECTION_OUT, NULL, inp, ipsecflowinfo); 249 250 if (tdb == NULL) { 251 if (error == 0) { 252 /* 253 * No IPsec processing required, we'll just send the 254 * packet out. 255 */ 256 sproto = 0; 257 258 /* Fall through to routing/multicast handling */ 259 } else { 260 /* 261 * -EINVAL is used to indicate that the packet should 262 * be silently dropped, typically because we've asked 263 * key management for an SA. 264 */ 265 if (error == -EINVAL) /* Should silently drop packet */ 266 error = 0; 267 268 m_freem(m); 269 goto done; 270 } 271 } else { 272 /* Loop detection */ 273 for (mtag = m_tag_first(m); mtag != NULL; 274 mtag = m_tag_next(m, mtag)) { 275 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && 276 mtag->m_tag_id != 277 PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) 278 continue; 279 tdbi = (struct tdb_ident *)(mtag + 1); 280 if (tdbi->spi == tdb->tdb_spi && 281 tdbi->proto == tdb->tdb_sproto && 282 tdbi->rdomain == tdb->tdb_rdomain && 283 !memcmp(&tdbi->dst, &tdb->tdb_dst, 284 sizeof(union sockaddr_union))) { 285 sproto = 0; /* mark as no-IPsec-needed */ 286 goto done_spd; 287 } 288 } 289 290 /* We need to do IPsec */ 291 bcopy(&tdb->tdb_dst, &sdst, sizeof(sdst)); 292 sspi = tdb->tdb_spi; 293 sproto = tdb->tdb_sproto; 294 295 /* 296 * If it needs TCP/UDP hardware-checksumming, do the 297 * computation now. 298 */ 299 in_proto_cksum_out(m, NULL); 300 301 /* If it's not a multicast packet, try to fast-path */ 302 if (!IN_MULTICAST(ip->ip_dst.s_addr)) { 303 goto sendit; 304 } 305 } 306 307 /* Fall through to the routing/multicast handling code */ 308 done_spd: 309 #endif /* IPSEC */ 310 311 if (flags & IP_ROUTETOETHER) { 312 dst = satosin(&ro->ro_dst); 313 ifp = ro->ro_rt->rt_ifp; 314 mtu = ifp->if_mtu; 315 ro->ro_rt = NULL; 316 } else if (donerouting == 0) { 317 if (ro == 0) { 318 ro = &iproute; 319 memset(ro, 0, sizeof(*ro)); 320 } 321 322 dst = satosin(&ro->ro_dst); 323 324 /* 325 * If there is a cached route, check that it is to the same 326 * destination and is still up. If not, free it and try again. 327 */ 328 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 329 dst->sin_addr.s_addr != ip->ip_dst.s_addr || 330 ro->ro_tableid != m->m_pkthdr.ph_rtableid)) { 331 RTFREE(ro->ro_rt); 332 ro->ro_rt = (struct rtentry *)0; 333 } 334 335 if (ro->ro_rt == 0) { 336 dst->sin_family = AF_INET; 337 dst->sin_len = sizeof(*dst); 338 dst->sin_addr = ip->ip_dst; 339 ro->ro_tableid = m->m_pkthdr.ph_rtableid; 340 } 341 342 if ((IN_MULTICAST(ip->ip_dst.s_addr) || 343 (ip->ip_dst.s_addr == INADDR_BROADCAST)) && 344 imo != NULL && imo->imo_multicast_ifp != NULL) { 345 ifp = imo->imo_multicast_ifp; 346 mtu = ifp->if_mtu; 347 IFP_TO_IA(ifp, ia); 348 } else { 349 if (ro->ro_rt == 0) 350 rtalloc_mpath(ro, &ip->ip_src.s_addr); 351 352 if (ro->ro_rt == 0) { 353 ipstat.ips_noroute++; 354 error = EHOSTUNREACH; 355 goto bad; 356 } 357 358 ia = ifatoia(ro->ro_rt->rt_ifa); 359 ifp = ro->ro_rt->rt_ifp; 360 if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0) 361 mtu = ifp->if_mtu; 362 ro->ro_rt->rt_use++; 363 364 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 365 dst = satosin(ro->ro_rt->rt_gateway); 366 } 367 368 /* Set the source IP address */ 369 if (ip->ip_src.s_addr == INADDR_ANY) 370 ip->ip_src = ia->ia_addr.sin_addr; 371 } 372 373 if (IN_MULTICAST(ip->ip_dst.s_addr) || 374 (ip->ip_dst.s_addr == INADDR_BROADCAST)) { 375 struct in_multi *inm; 376 377 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ? 378 M_BCAST : M_MCAST; 379 380 /* 381 * IP destination address is multicast. Make sure "dst" 382 * still points to the address in "ro". (It may have been 383 * changed to point to a gateway address, above.) 384 */ 385 dst = satosin(&ro->ro_dst); 386 387 /* 388 * See if the caller provided any multicast options 389 */ 390 if (imo != NULL) 391 ip->ip_ttl = imo->imo_multicast_ttl; 392 else 393 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 394 395 /* 396 * if we don't know the outgoing ifp yet, we can't generate 397 * output 398 */ 399 if (!ifp) { 400 ipstat.ips_noroute++; 401 error = EHOSTUNREACH; 402 goto bad; 403 } 404 405 /* 406 * Confirm that the outgoing interface supports multicast, 407 * but only if the packet actually is going out on that 408 * interface (i.e., no IPsec is applied). 409 */ 410 if ((((m->m_flags & M_MCAST) && 411 (ifp->if_flags & IFF_MULTICAST) == 0) || 412 ((m->m_flags & M_BCAST) && 413 (ifp->if_flags & IFF_BROADCAST) == 0)) && (sproto == 0)) { 414 ipstat.ips_noroute++; 415 error = ENETUNREACH; 416 goto bad; 417 } 418 419 /* 420 * If source address not specified yet, use address 421 * of outgoing interface. 422 */ 423 if (ip->ip_src.s_addr == INADDR_ANY) { 424 IFP_TO_IA(ifp, ia); 425 if (ia != NULL) 426 ip->ip_src = ia->ia_addr.sin_addr; 427 } 428 429 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm); 430 if (inm != NULL && 431 (imo == NULL || imo->imo_multicast_loop)) { 432 /* 433 * If we belong to the destination multicast group 434 * on the outgoing interface, and the caller did not 435 * forbid loopback, loop back a copy. 436 * Can't defer TCP/UDP checksumming, do the 437 * computation now. 438 */ 439 in_proto_cksum_out(m, NULL); 440 ip_mloopback(ifp, m, dst); 441 } 442 #ifdef MROUTING 443 else { 444 /* 445 * If we are acting as a multicast router, perform 446 * multicast forwarding as if the packet had just 447 * arrived on the interface to which we are about 448 * to send. The multicast forwarding function 449 * recursively calls this function, using the 450 * IP_FORWARDING flag to prevent infinite recursion. 451 * 452 * Multicasts that are looped back by ip_mloopback(), 453 * above, will be forwarded by the ip_input() routine, 454 * if necessary. 455 */ 456 if (ipmforwarding && ip_mrouter && 457 (flags & IP_FORWARDING) == 0) { 458 if (ip_mforward(m, ifp) != 0) { 459 m_freem(m); 460 goto done; 461 } 462 } 463 } 464 #endif 465 /* 466 * Multicasts with a time-to-live of zero may be looped- 467 * back, above, but must not be transmitted on a network. 468 * Also, multicasts addressed to the loopback interface 469 * are not sent -- the above call to ip_mloopback() will 470 * loop back a copy if this host actually belongs to the 471 * destination group on the loopback interface. 472 */ 473 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) { 474 m_freem(m); 475 goto done; 476 } 477 478 goto sendit; 479 } 480 481 /* 482 * Look for broadcast address and verify user is allowed to send 483 * such a packet; if the packet is going in an IPsec tunnel, skip 484 * this check. 485 */ 486 if ((sproto == 0) && (in_broadcast(dst->sin_addr, ifp, 487 m->m_pkthdr.ph_rtableid))) { 488 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 489 error = EADDRNOTAVAIL; 490 goto bad; 491 } 492 if ((flags & IP_ALLOWBROADCAST) == 0) { 493 error = EACCES; 494 goto bad; 495 } 496 497 /* Don't allow broadcast messages to be fragmented */ 498 if (ntohs(ip->ip_len) > ifp->if_mtu) { 499 error = EMSGSIZE; 500 goto bad; 501 } 502 m->m_flags |= M_BCAST; 503 } else 504 m->m_flags &= ~M_BCAST; 505 506 sendit: 507 /* 508 * If we're doing Path MTU discovery, we need to set DF unless 509 * the route's MTU is locked. 510 */ 511 if ((flags & IP_MTUDISC) && ro && ro->ro_rt && 512 (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0) 513 ip->ip_off |= htons(IP_DF); 514 515 #ifdef IPSEC 516 /* 517 * Check if the packet needs encapsulation. 518 */ 519 if (sproto != 0) { 520 tdb = gettdb(rtable_l2(m->m_pkthdr.ph_rtableid), 521 sspi, &sdst, sproto); 522 if (tdb == NULL) { 523 DPRINTF(("ip_output: unknown TDB")); 524 error = EHOSTUNREACH; 525 m_freem(m); 526 goto done; 527 } 528 529 /* 530 * Packet filter 531 */ 532 #if NPF > 0 533 if ((encif = enc_getif(tdb->tdb_rdomain, 534 tdb->tdb_tap)) == NULL || 535 pf_test(AF_INET, PF_OUT, encif, &m, NULL) != PF_PASS) { 536 error = EACCES; 537 m_freem(m); 538 goto done; 539 } 540 if (m == NULL) { 541 goto done; 542 } 543 ip = mtod(m, struct ip *); 544 hlen = ip->ip_hl << 2; 545 /* 546 * PF_TAG_REROUTE handling or not... 547 * Packet is entering IPsec so the routing is 548 * already overruled by the IPsec policy. 549 * Until now the change was not reconsidered. 550 * What's the behaviour? 551 */ 552 #endif 553 in_proto_cksum_out(m, encif); 554 555 /* Check if we are allowed to fragment */ 556 if (ip_mtudisc && (ip->ip_off & htons(IP_DF)) && tdb->tdb_mtu && 557 ntohs(ip->ip_len) > tdb->tdb_mtu && 558 tdb->tdb_mtutimeout > time_second) { 559 struct rtentry *rt = NULL; 560 int rt_mtucloned = 0; 561 int transportmode = 0; 562 563 transportmode = (tdb->tdb_dst.sa.sa_family == AF_INET) && 564 (tdb->tdb_dst.sin.sin_addr.s_addr == 565 ip->ip_dst.s_addr); 566 icmp_mtu = tdb->tdb_mtu; 567 568 /* Find a host route to store the mtu in */ 569 if (ro != NULL) 570 rt = ro->ro_rt; 571 /* but don't add a PMTU route for transport mode SAs */ 572 if (transportmode) 573 rt = NULL; 574 else if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) { 575 rt = icmp_mtudisc_clone(ip->ip_dst, 576 m->m_pkthdr.ph_rtableid); 577 rt_mtucloned = 1; 578 } 579 DPRINTF(("ip_output: spi %08x mtu %d rt %p cloned %d\n", 580 ntohl(tdb->tdb_spi), icmp_mtu, rt, rt_mtucloned)); 581 if (rt != NULL) { 582 rt->rt_rmx.rmx_mtu = icmp_mtu; 583 if (ro && ro->ro_rt != NULL) { 584 RTFREE(ro->ro_rt); 585 ro->ro_rt = rtalloc1(&ro->ro_dst, RT_REPORT, 586 m->m_pkthdr.ph_rtableid); 587 } 588 if (rt_mtucloned) 589 rtfree(rt); 590 } 591 error = EMSGSIZE; 592 goto bad; 593 } 594 595 /* 596 * Clear these -- they'll be set in the recursive invocation 597 * as needed. 598 */ 599 m->m_flags &= ~(M_MCAST | M_BCAST); 600 601 /* Callee frees mbuf */ 602 error = ipsp_process_packet(m, tdb, AF_INET, 0); 603 return error; /* Nothing more to be done */ 604 } 605 606 /* 607 * If we got here and IPsec crypto processing didn't happen, drop it. 608 */ 609 if (ipsec_in_use && (mtag = m_tag_find(m, 610 PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL)) != NULL) { 611 /* Notify IPsec to do its own crypto. */ 612 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); 613 m_freem(m); 614 error = EHOSTUNREACH; 615 goto done; 616 } 617 #endif /* IPSEC */ 618 619 /* 620 * Packet filter 621 */ 622 #if NPF > 0 623 if (pf_test(AF_INET, PF_OUT, ifp, &m, NULL) != PF_PASS) { 624 error = EHOSTUNREACH; 625 m_freem(m); 626 goto done; 627 } 628 if (m == NULL) 629 goto done; 630 ip = mtod(m, struct ip *); 631 hlen = ip->ip_hl << 2; 632 if ((m->m_pkthdr.pf.flags & (PF_TAG_REROUTE | PF_TAG_GENERATED)) == 633 (PF_TAG_REROUTE | PF_TAG_GENERATED)) 634 /* already rerun the route lookup, go on */ 635 m->m_pkthdr.pf.flags &= ~(PF_TAG_GENERATED | PF_TAG_REROUTE); 636 else if (m->m_pkthdr.pf.flags & PF_TAG_REROUTE) { 637 /* tag as generated to skip over pf_test on rerun */ 638 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 639 ro = NULL; 640 donerouting = 0; 641 goto reroute; 642 } 643 #endif 644 in_proto_cksum_out(m, ifp); 645 646 #ifdef IPSEC 647 if (ipsec_in_use && (flags & IP_FORWARDING) && (ipforwarding == 2) && 648 (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) == NULL)) { 649 error = EHOSTUNREACH; 650 m_freem(m); 651 goto done; 652 } 653 #endif 654 655 /* 656 * If small enough for interface, can just send directly. 657 */ 658 if (ntohs(ip->ip_len) <= mtu) { 659 ip->ip_sum = 0; 660 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && 661 (ifp->if_bridgeport == NULL)) 662 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 663 else { 664 ipstat.ips_outswcsum++; 665 ip->ip_sum = in_cksum(m, hlen); 666 } 667 668 error = (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt); 669 goto done; 670 } 671 672 /* 673 * Too large for interface; fragment if possible. 674 * Must be able to put at least 8 bytes per fragment. 675 */ 676 if (ip->ip_off & htons(IP_DF)) { 677 #ifdef IPSEC 678 icmp_mtu = ifp->if_mtu; 679 #endif 680 error = EMSGSIZE; 681 /* 682 * This case can happen if the user changed the MTU 683 * of an interface after enabling IP on it. Because 684 * most netifs don't keep track of routes pointing to 685 * them, there is no way for one to update all its 686 * routes when the MTU is changed. 687 */ 688 if (ro->ro_rt != NULL && 689 (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) && 690 !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) && 691 (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 692 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 693 } 694 ipstat.ips_cantfrag++; 695 goto bad; 696 } 697 698 error = ip_fragment(m, ifp, mtu); 699 if (error) { 700 m = m0 = NULL; 701 goto bad; 702 } 703 704 for (; m; m = m0) { 705 m0 = m->m_nextpkt; 706 m->m_nextpkt = 0; 707 if (error == 0) 708 error = (*ifp->if_output)(ifp, m, sintosa(dst), 709 ro->ro_rt); 710 else 711 m_freem(m); 712 } 713 714 if (error == 0) 715 ipstat.ips_fragmented++; 716 717 done: 718 if (ro == &iproute && ro->ro_rt) 719 RTFREE(ro->ro_rt); 720 return (error); 721 bad: 722 #ifdef IPSEC 723 if (error == EMSGSIZE && ip_mtudisc && icmp_mtu != 0 && m != NULL) 724 ipsec_adjust_mtu(m, icmp_mtu); 725 #endif 726 m_freem(m0); 727 goto done; 728 } 729 730 int 731 ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu) 732 { 733 struct ip *ip, *mhip; 734 struct mbuf *m0; 735 int len, hlen, off; 736 int mhlen, firstlen; 737 struct mbuf **mnext; 738 int fragments = 0; 739 int error = 0; 740 741 ip = mtod(m, struct ip *); 742 hlen = ip->ip_hl << 2; 743 744 len = (mtu - hlen) &~ 7; 745 if (len < 8) { 746 m_freem(m); 747 return (EMSGSIZE); 748 } 749 750 /* 751 * If we are doing fragmentation, we can't defer TCP/UDP 752 * checksumming; compute the checksum and clear the flag. 753 */ 754 in_proto_cksum_out(m, NULL); 755 firstlen = len; 756 mnext = &m->m_nextpkt; 757 758 /* 759 * Loop through length of segment after first fragment, 760 * make new header and copy data of each part and link onto chain. 761 */ 762 m0 = m; 763 mhlen = sizeof (struct ip); 764 for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { 765 MGETHDR(m, M_DONTWAIT, MT_HEADER); 766 if (m == 0) { 767 ipstat.ips_odropped++; 768 error = ENOBUFS; 769 goto sendorfree; 770 } 771 *mnext = m; 772 mnext = &m->m_nextpkt; 773 m->m_data += max_linkhdr; 774 mhip = mtod(m, struct ip *); 775 *mhip = *ip; 776 /* we must inherit MCAST and BCAST flags and routing table */ 777 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST); 778 m->m_pkthdr.ph_rtableid = m0->m_pkthdr.ph_rtableid; 779 if (hlen > sizeof (struct ip)) { 780 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 781 mhip->ip_hl = mhlen >> 2; 782 } 783 m->m_len = mhlen; 784 mhip->ip_off = ((off - hlen) >> 3) + 785 (ntohs(ip->ip_off) & ~IP_MF); 786 if (ip->ip_off & htons(IP_MF)) 787 mhip->ip_off |= IP_MF; 788 if (off + len >= ntohs(ip->ip_len)) 789 len = ntohs(ip->ip_len) - off; 790 else 791 mhip->ip_off |= IP_MF; 792 mhip->ip_len = htons((u_int16_t)(len + mhlen)); 793 m->m_next = m_copy(m0, off, len); 794 if (m->m_next == 0) { 795 ipstat.ips_odropped++; 796 error = ENOBUFS; 797 goto sendorfree; 798 } 799 m->m_pkthdr.len = mhlen + len; 800 m->m_pkthdr.rcvif = (struct ifnet *)0; 801 mhip->ip_off = htons((u_int16_t)mhip->ip_off); 802 mhip->ip_sum = 0; 803 if ((ifp != NULL) && 804 (ifp->if_capabilities & IFCAP_CSUM_IPv4) && 805 (ifp->if_bridgeport == NULL)) 806 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 807 else { 808 ipstat.ips_outswcsum++; 809 mhip->ip_sum = in_cksum(m, mhlen); 810 } 811 ipstat.ips_ofragments++; 812 fragments++; 813 } 814 /* 815 * Update first fragment by trimming what's been copied out 816 * and updating header, then send each fragment (in order). 817 */ 818 m = m0; 819 m_adj(m, hlen + firstlen - ntohs(ip->ip_len)); 820 m->m_pkthdr.len = hlen + firstlen; 821 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len); 822 ip->ip_off |= htons(IP_MF); 823 ip->ip_sum = 0; 824 if ((ifp != NULL) && 825 (ifp->if_capabilities & IFCAP_CSUM_IPv4) && 826 (ifp->if_bridgeport == NULL)) 827 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 828 else { 829 ipstat.ips_outswcsum++; 830 ip->ip_sum = in_cksum(m, hlen); 831 } 832 sendorfree: 833 if (error) { 834 for (m = m0; m; m = m0) { 835 m0 = m->m_nextpkt; 836 m->m_nextpkt = NULL; 837 m_freem(m); 838 } 839 } 840 841 return (error); 842 } 843 844 /* 845 * Insert IP options into preformed packet. 846 * Adjust IP destination as required for IP source routing, 847 * as indicated by a non-zero in_addr at the start of the options. 848 */ 849 struct mbuf * 850 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) 851 { 852 struct ipoption *p = mtod(opt, struct ipoption *); 853 struct mbuf *n; 854 struct ip *ip = mtod(m, struct ip *); 855 unsigned int optlen; 856 857 optlen = opt->m_len - sizeof(p->ipopt_dst); 858 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) 859 return (m); /* XXX should fail */ 860 if (p->ipopt_dst.s_addr) 861 ip->ip_dst = p->ipopt_dst; 862 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 863 MGETHDR(n, M_DONTWAIT, MT_HEADER); 864 if (n == 0) 865 return (m); 866 M_MOVE_HDR(n, m); 867 n->m_pkthdr.len += optlen; 868 m->m_len -= sizeof(struct ip); 869 m->m_data += sizeof(struct ip); 870 n->m_next = m; 871 m = n; 872 m->m_len = optlen + sizeof(struct ip); 873 m->m_data += max_linkhdr; 874 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 875 } else { 876 m->m_data -= optlen; 877 m->m_len += optlen; 878 m->m_pkthdr.len += optlen; 879 memmove(mtod(m, caddr_t), (caddr_t)ip, sizeof(struct ip)); 880 } 881 ip = mtod(m, struct ip *); 882 bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), optlen); 883 *phlen = sizeof(struct ip) + optlen; 884 ip->ip_len = htons(ntohs(ip->ip_len) + optlen); 885 return (m); 886 } 887 888 /* 889 * Copy options from ip to jp, 890 * omitting those not copied during fragmentation. 891 */ 892 int 893 ip_optcopy(struct ip *ip, struct ip *jp) 894 { 895 u_char *cp, *dp; 896 int opt, optlen, cnt; 897 898 cp = (u_char *)(ip + 1); 899 dp = (u_char *)(jp + 1); 900 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 901 for (; cnt > 0; cnt -= optlen, cp += optlen) { 902 opt = cp[0]; 903 if (opt == IPOPT_EOL) 904 break; 905 if (opt == IPOPT_NOP) { 906 /* Preserve for IP mcast tunnel's LSRR alignment. */ 907 *dp++ = IPOPT_NOP; 908 optlen = 1; 909 continue; 910 } 911 #ifdef DIAGNOSTIC 912 if (cnt < IPOPT_OLEN + sizeof(*cp)) 913 panic("malformed IPv4 option passed to ip_optcopy"); 914 #endif 915 optlen = cp[IPOPT_OLEN]; 916 #ifdef DIAGNOSTIC 917 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 918 panic("malformed IPv4 option passed to ip_optcopy"); 919 #endif 920 /* bogus lengths should have been caught by ip_dooptions */ 921 if (optlen > cnt) 922 optlen = cnt; 923 if (IPOPT_COPIED(opt)) { 924 bcopy((caddr_t)cp, (caddr_t)dp, optlen); 925 dp += optlen; 926 } 927 } 928 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 929 *dp++ = IPOPT_EOL; 930 return (optlen); 931 } 932 933 /* 934 * IP socket option processing. 935 */ 936 int 937 ip_ctloutput(int op, struct socket *so, int level, int optname, 938 struct mbuf **mp) 939 { 940 struct inpcb *inp = sotoinpcb(so); 941 struct mbuf *m = *mp; 942 int optval = 0; 943 struct proc *p = curproc; /* XXX */ 944 #ifdef IPSEC 945 struct ipsec_ref *ipr; 946 u_int16_t opt16val; 947 #endif 948 int error = 0; 949 u_int rtid = 0; 950 951 if (level != IPPROTO_IP) { 952 error = EINVAL; 953 if (op == PRCO_SETOPT && *mp) 954 (void) m_free(*mp); 955 } else switch (op) { 956 case PRCO_SETOPT: 957 switch (optname) { 958 case IP_OPTIONS: 959 return (ip_pcbopts(&inp->inp_options, m)); 960 961 case IP_TOS: 962 case IP_TTL: 963 case IP_MINTTL: 964 case IP_RECVOPTS: 965 case IP_RECVRETOPTS: 966 case IP_RECVDSTADDR: 967 case IP_RECVIF: 968 case IP_RECVTTL: 969 case IP_RECVDSTPORT: 970 case IP_RECVRTABLE: 971 case IP_IPSECFLOWINFO: 972 if (m == NULL || m->m_len != sizeof(int)) 973 error = EINVAL; 974 else { 975 optval = *mtod(m, int *); 976 switch (optname) { 977 978 case IP_TOS: 979 inp->inp_ip.ip_tos = optval; 980 break; 981 982 case IP_TTL: 983 if (optval > 0 && optval <= MAXTTL) 984 inp->inp_ip.ip_ttl = optval; 985 else 986 error = EINVAL; 987 break; 988 989 case IP_MINTTL: 990 if (optval > 0 && optval <= MAXTTL) 991 inp->inp_ip_minttl = optval; 992 else 993 error = EINVAL; 994 break; 995 #define OPTSET(bit) \ 996 if (optval) \ 997 inp->inp_flags |= bit; \ 998 else \ 999 inp->inp_flags &= ~bit; 1000 1001 case IP_RECVOPTS: 1002 OPTSET(INP_RECVOPTS); 1003 break; 1004 1005 case IP_RECVRETOPTS: 1006 OPTSET(INP_RECVRETOPTS); 1007 break; 1008 1009 case IP_RECVDSTADDR: 1010 OPTSET(INP_RECVDSTADDR); 1011 break; 1012 case IP_RECVIF: 1013 OPTSET(INP_RECVIF); 1014 break; 1015 case IP_RECVTTL: 1016 OPTSET(INP_RECVTTL); 1017 break; 1018 case IP_RECVDSTPORT: 1019 OPTSET(INP_RECVDSTPORT); 1020 break; 1021 case IP_RECVRTABLE: 1022 OPTSET(INP_RECVRTABLE); 1023 break; 1024 case IP_IPSECFLOWINFO: 1025 OPTSET(INP_IPSECFLOWINFO); 1026 break; 1027 } 1028 } 1029 break; 1030 #undef OPTSET 1031 1032 case IP_MULTICAST_IF: 1033 case IP_MULTICAST_TTL: 1034 case IP_MULTICAST_LOOP: 1035 case IP_ADD_MEMBERSHIP: 1036 case IP_DROP_MEMBERSHIP: 1037 error = ip_setmoptions(optname, &inp->inp_moptions, m, 1038 inp->inp_rtableid); 1039 break; 1040 1041 case IP_PORTRANGE: 1042 if (m == 0 || m->m_len != sizeof(int)) 1043 error = EINVAL; 1044 else { 1045 optval = *mtod(m, int *); 1046 1047 switch (optval) { 1048 1049 case IP_PORTRANGE_DEFAULT: 1050 inp->inp_flags &= ~(INP_LOWPORT); 1051 inp->inp_flags &= ~(INP_HIGHPORT); 1052 break; 1053 1054 case IP_PORTRANGE_HIGH: 1055 inp->inp_flags &= ~(INP_LOWPORT); 1056 inp->inp_flags |= INP_HIGHPORT; 1057 break; 1058 1059 case IP_PORTRANGE_LOW: 1060 inp->inp_flags &= ~(INP_HIGHPORT); 1061 inp->inp_flags |= INP_LOWPORT; 1062 break; 1063 1064 default: 1065 1066 error = EINVAL; 1067 break; 1068 } 1069 } 1070 break; 1071 case IP_AUTH_LEVEL: 1072 case IP_ESP_TRANS_LEVEL: 1073 case IP_ESP_NETWORK_LEVEL: 1074 case IP_IPCOMP_LEVEL: 1075 #ifndef IPSEC 1076 error = EOPNOTSUPP; 1077 #else 1078 if (m == 0 || m->m_len != sizeof(int)) { 1079 error = EINVAL; 1080 break; 1081 } 1082 optval = *mtod(m, int *); 1083 1084 if (optval < IPSEC_LEVEL_BYPASS || 1085 optval > IPSEC_LEVEL_UNIQUE) { 1086 error = EINVAL; 1087 break; 1088 } 1089 1090 /* Unlink cached output TDB to force a re-search */ 1091 if (inp->inp_tdb_out) { 1092 int s = splsoftnet(); 1093 TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, 1094 inp, inp_tdb_out_next); 1095 splx(s); 1096 } 1097 1098 if (inp->inp_tdb_in) { 1099 int s = splsoftnet(); 1100 TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in, 1101 inp, inp_tdb_in_next); 1102 splx(s); 1103 } 1104 1105 switch (optname) { 1106 case IP_AUTH_LEVEL: 1107 if (optval < IPSEC_AUTH_LEVEL_DEFAULT && 1108 suser(p, 0)) { 1109 error = EACCES; 1110 break; 1111 } 1112 inp->inp_seclevel[SL_AUTH] = optval; 1113 break; 1114 1115 case IP_ESP_TRANS_LEVEL: 1116 if (optval < IPSEC_ESP_TRANS_LEVEL_DEFAULT && 1117 suser(p, 0)) { 1118 error = EACCES; 1119 break; 1120 } 1121 inp->inp_seclevel[SL_ESP_TRANS] = optval; 1122 break; 1123 1124 case IP_ESP_NETWORK_LEVEL: 1125 if (optval < IPSEC_ESP_NETWORK_LEVEL_DEFAULT && 1126 suser(p, 0)) { 1127 error = EACCES; 1128 break; 1129 } 1130 inp->inp_seclevel[SL_ESP_NETWORK] = optval; 1131 break; 1132 case IP_IPCOMP_LEVEL: 1133 if (optval < IPSEC_IPCOMP_LEVEL_DEFAULT && 1134 suser(p, 0)) { 1135 error = EACCES; 1136 break; 1137 } 1138 inp->inp_seclevel[SL_IPCOMP] = optval; 1139 break; 1140 } 1141 if (!error) 1142 inp->inp_secrequire = get_sa_require(inp); 1143 #endif 1144 break; 1145 1146 case IP_IPSEC_REMOTE_CRED: 1147 case IP_IPSEC_REMOTE_AUTH: 1148 /* Can't set the remote credential or key */ 1149 error = EOPNOTSUPP; 1150 break; 1151 1152 case IP_IPSEC_LOCAL_ID: 1153 case IP_IPSEC_REMOTE_ID: 1154 case IP_IPSEC_LOCAL_CRED: 1155 case IP_IPSEC_LOCAL_AUTH: 1156 #ifndef IPSEC 1157 error = EOPNOTSUPP; 1158 #else 1159 if (m == NULL || m->m_len < 2) { 1160 error = EINVAL; 1161 break; 1162 } 1163 1164 m_copydata(m, 0, 2, (caddr_t) &opt16val); 1165 1166 /* If the type is 0, then we cleanup and return */ 1167 if (opt16val == 0) { 1168 switch (optname) { 1169 case IP_IPSEC_LOCAL_ID: 1170 if (inp->inp_ipo != NULL && 1171 inp->inp_ipo->ipo_srcid != NULL) { 1172 ipsp_reffree(inp->inp_ipo->ipo_srcid); 1173 inp->inp_ipo->ipo_srcid = NULL; 1174 } 1175 break; 1176 1177 case IP_IPSEC_REMOTE_ID: 1178 if (inp->inp_ipo != NULL && 1179 inp->inp_ipo->ipo_dstid != NULL) { 1180 ipsp_reffree(inp->inp_ipo->ipo_dstid); 1181 inp->inp_ipo->ipo_dstid = NULL; 1182 } 1183 break; 1184 1185 case IP_IPSEC_LOCAL_CRED: 1186 if (inp->inp_ipo != NULL && 1187 inp->inp_ipo->ipo_local_cred != NULL) { 1188 ipsp_reffree(inp->inp_ipo->ipo_local_cred); 1189 inp->inp_ipo->ipo_local_cred = NULL; 1190 } 1191 break; 1192 1193 case IP_IPSEC_LOCAL_AUTH: 1194 if (inp->inp_ipo != NULL && 1195 inp->inp_ipo->ipo_local_auth != NULL) { 1196 ipsp_reffree(inp->inp_ipo->ipo_local_auth); 1197 inp->inp_ipo->ipo_local_auth = NULL; 1198 } 1199 break; 1200 } 1201 1202 error = 0; 1203 break; 1204 } 1205 1206 /* Can't have an empty payload */ 1207 if (m->m_len == 2) { 1208 error = EINVAL; 1209 break; 1210 } 1211 1212 /* Allocate if needed */ 1213 if (inp->inp_ipo == NULL) { 1214 inp->inp_ipo = ipsec_add_policy(inp, 1215 AF_INET, IPSP_DIRECTION_OUT); 1216 if (inp->inp_ipo == NULL) { 1217 error = ENOBUFS; 1218 break; 1219 } 1220 } 1221 1222 ipr = malloc(sizeof(struct ipsec_ref) + m->m_len - 2, 1223 M_CREDENTIALS, M_NOWAIT); 1224 if (ipr == NULL) { 1225 error = ENOBUFS; 1226 break; 1227 } 1228 1229 ipr->ref_count = 1; 1230 ipr->ref_malloctype = M_CREDENTIALS; 1231 ipr->ref_len = m->m_len - 2; 1232 ipr->ref_type = opt16val; 1233 m_copydata(m, 2, m->m_len - 2, (caddr_t)(ipr + 1)); 1234 1235 switch (optname) { 1236 case IP_IPSEC_LOCAL_ID: 1237 /* Check valid types and NUL-termination */ 1238 if (ipr->ref_type < IPSP_IDENTITY_PREFIX || 1239 ipr->ref_type > IPSP_IDENTITY_CONNECTION || 1240 ((char *)(ipr + 1))[ipr->ref_len - 1]) { 1241 free(ipr, M_CREDENTIALS, 0); 1242 error = EINVAL; 1243 } else { 1244 if (inp->inp_ipo->ipo_srcid != NULL) 1245 ipsp_reffree(inp->inp_ipo->ipo_srcid); 1246 inp->inp_ipo->ipo_srcid = ipr; 1247 } 1248 break; 1249 case IP_IPSEC_REMOTE_ID: 1250 /* Check valid types and NUL-termination */ 1251 if (ipr->ref_type < IPSP_IDENTITY_PREFIX || 1252 ipr->ref_type > IPSP_IDENTITY_CONNECTION || 1253 ((char *)(ipr + 1))[ipr->ref_len - 1]) { 1254 free(ipr, M_CREDENTIALS, 0); 1255 error = EINVAL; 1256 } else { 1257 if (inp->inp_ipo->ipo_dstid != NULL) 1258 ipsp_reffree(inp->inp_ipo->ipo_dstid); 1259 inp->inp_ipo->ipo_dstid = ipr; 1260 } 1261 break; 1262 case IP_IPSEC_LOCAL_CRED: 1263 if (ipr->ref_type < IPSP_CRED_KEYNOTE || 1264 ipr->ref_type > IPSP_CRED_X509) { 1265 free(ipr, M_CREDENTIALS, 0); 1266 error = EINVAL; 1267 } else { 1268 if (inp->inp_ipo->ipo_local_cred != NULL) 1269 ipsp_reffree(inp->inp_ipo->ipo_local_cred); 1270 inp->inp_ipo->ipo_local_cred = ipr; 1271 } 1272 break; 1273 case IP_IPSEC_LOCAL_AUTH: 1274 if (ipr->ref_type < IPSP_AUTH_PASSPHRASE || 1275 ipr->ref_type > IPSP_AUTH_RSA) { 1276 free(ipr, M_CREDENTIALS, 0); 1277 error = EINVAL; 1278 } else { 1279 if (inp->inp_ipo->ipo_local_auth != NULL) 1280 ipsp_reffree(inp->inp_ipo->ipo_local_auth); 1281 inp->inp_ipo->ipo_local_auth = ipr; 1282 } 1283 break; 1284 } 1285 1286 /* Unlink cached output TDB to force a re-search */ 1287 if (inp->inp_tdb_out) { 1288 int s = splsoftnet(); 1289 TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, 1290 inp, inp_tdb_out_next); 1291 splx(s); 1292 } 1293 1294 if (inp->inp_tdb_in) { 1295 int s = splsoftnet(); 1296 TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in, 1297 inp, inp_tdb_in_next); 1298 splx(s); 1299 } 1300 #endif 1301 break; 1302 case SO_RTABLE: 1303 if (m == NULL || m->m_len < sizeof(u_int)) { 1304 error = EINVAL; 1305 break; 1306 } 1307 rtid = *mtod(m, u_int *); 1308 if (inp->inp_rtableid == rtid) 1309 break; 1310 /* needs priviledges to switch when already set */ 1311 if (p->p_p->ps_rtableid != rtid && 1312 p->p_p->ps_rtableid != 0 && 1313 (error = suser(p, 0)) != 0) 1314 break; 1315 /* table must exist */ 1316 if (!rtable_exists(rtid)) { 1317 error = EINVAL; 1318 break; 1319 } 1320 inp->inp_rtableid = rtid; 1321 break; 1322 case IP_PIPEX: 1323 if (m != NULL && m->m_len == sizeof(int)) 1324 inp->inp_pipex = *mtod(m, int *); 1325 else 1326 error = EINVAL; 1327 break; 1328 1329 default: 1330 error = ENOPROTOOPT; 1331 break; 1332 } 1333 if (m) 1334 (void)m_free(m); 1335 break; 1336 1337 case PRCO_GETOPT: 1338 switch (optname) { 1339 case IP_OPTIONS: 1340 case IP_RETOPTS: 1341 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1342 if (inp->inp_options) { 1343 m->m_len = inp->inp_options->m_len; 1344 bcopy(mtod(inp->inp_options, caddr_t), 1345 mtod(m, caddr_t), m->m_len); 1346 } else 1347 m->m_len = 0; 1348 break; 1349 1350 case IP_TOS: 1351 case IP_TTL: 1352 case IP_MINTTL: 1353 case IP_RECVOPTS: 1354 case IP_RECVRETOPTS: 1355 case IP_RECVDSTADDR: 1356 case IP_RECVIF: 1357 case IP_RECVTTL: 1358 case IP_RECVDSTPORT: 1359 case IP_RECVRTABLE: 1360 case IP_IPSECFLOWINFO: 1361 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1362 m->m_len = sizeof(int); 1363 switch (optname) { 1364 1365 case IP_TOS: 1366 optval = inp->inp_ip.ip_tos; 1367 break; 1368 1369 case IP_TTL: 1370 optval = inp->inp_ip.ip_ttl; 1371 break; 1372 1373 case IP_MINTTL: 1374 optval = inp->inp_ip_minttl; 1375 break; 1376 1377 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1378 1379 case IP_RECVOPTS: 1380 optval = OPTBIT(INP_RECVOPTS); 1381 break; 1382 1383 case IP_RECVRETOPTS: 1384 optval = OPTBIT(INP_RECVRETOPTS); 1385 break; 1386 1387 case IP_RECVDSTADDR: 1388 optval = OPTBIT(INP_RECVDSTADDR); 1389 break; 1390 case IP_RECVIF: 1391 optval = OPTBIT(INP_RECVIF); 1392 break; 1393 case IP_RECVTTL: 1394 optval = OPTBIT(INP_RECVTTL); 1395 break; 1396 case IP_RECVDSTPORT: 1397 optval = OPTBIT(INP_RECVDSTPORT); 1398 break; 1399 case IP_RECVRTABLE: 1400 optval = OPTBIT(INP_RECVRTABLE); 1401 break; 1402 case IP_IPSECFLOWINFO: 1403 optval = OPTBIT(INP_IPSECFLOWINFO); 1404 break; 1405 } 1406 *mtod(m, int *) = optval; 1407 break; 1408 1409 case IP_MULTICAST_IF: 1410 case IP_MULTICAST_TTL: 1411 case IP_MULTICAST_LOOP: 1412 case IP_ADD_MEMBERSHIP: 1413 case IP_DROP_MEMBERSHIP: 1414 error = ip_getmoptions(optname, inp->inp_moptions, mp); 1415 break; 1416 1417 case IP_PORTRANGE: 1418 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1419 m->m_len = sizeof(int); 1420 1421 if (inp->inp_flags & INP_HIGHPORT) 1422 optval = IP_PORTRANGE_HIGH; 1423 else if (inp->inp_flags & INP_LOWPORT) 1424 optval = IP_PORTRANGE_LOW; 1425 else 1426 optval = 0; 1427 1428 *mtod(m, int *) = optval; 1429 break; 1430 1431 case IP_AUTH_LEVEL: 1432 case IP_ESP_TRANS_LEVEL: 1433 case IP_ESP_NETWORK_LEVEL: 1434 case IP_IPCOMP_LEVEL: 1435 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1436 #ifndef IPSEC 1437 m->m_len = sizeof(int); 1438 *mtod(m, int *) = IPSEC_LEVEL_NONE; 1439 #else 1440 m->m_len = sizeof(int); 1441 switch (optname) { 1442 case IP_AUTH_LEVEL: 1443 optval = inp->inp_seclevel[SL_AUTH]; 1444 break; 1445 1446 case IP_ESP_TRANS_LEVEL: 1447 optval = inp->inp_seclevel[SL_ESP_TRANS]; 1448 break; 1449 1450 case IP_ESP_NETWORK_LEVEL: 1451 optval = inp->inp_seclevel[SL_ESP_NETWORK]; 1452 break; 1453 case IP_IPCOMP_LEVEL: 1454 optval = inp->inp_seclevel[SL_IPCOMP]; 1455 break; 1456 } 1457 *mtod(m, int *) = optval; 1458 #endif 1459 break; 1460 case IP_IPSEC_LOCAL_ID: 1461 case IP_IPSEC_REMOTE_ID: 1462 case IP_IPSEC_LOCAL_CRED: 1463 case IP_IPSEC_REMOTE_CRED: 1464 case IP_IPSEC_LOCAL_AUTH: 1465 case IP_IPSEC_REMOTE_AUTH: 1466 #ifndef IPSEC 1467 error = EOPNOTSUPP; 1468 #else 1469 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1470 m->m_len = sizeof(u_int16_t); 1471 ipr = NULL; 1472 switch (optname) { 1473 case IP_IPSEC_LOCAL_ID: 1474 if (inp->inp_ipo != NULL) 1475 ipr = inp->inp_ipo->ipo_srcid; 1476 opt16val = IPSP_IDENTITY_NONE; 1477 break; 1478 case IP_IPSEC_REMOTE_ID: 1479 if (inp->inp_ipo != NULL) 1480 ipr = inp->inp_ipo->ipo_dstid; 1481 opt16val = IPSP_IDENTITY_NONE; 1482 break; 1483 case IP_IPSEC_LOCAL_CRED: 1484 if (inp->inp_ipo != NULL) 1485 ipr = inp->inp_ipo->ipo_local_cred; 1486 opt16val = IPSP_CRED_NONE; 1487 break; 1488 case IP_IPSEC_REMOTE_CRED: 1489 ipr = inp->inp_ipsec_remotecred; 1490 opt16val = IPSP_CRED_NONE; 1491 break; 1492 case IP_IPSEC_LOCAL_AUTH: 1493 if (inp->inp_ipo != NULL) 1494 ipr = inp->inp_ipo->ipo_local_auth; 1495 opt16val = IPSP_AUTH_NONE; 1496 break; 1497 case IP_IPSEC_REMOTE_AUTH: 1498 ipr = inp->inp_ipsec_remoteauth; 1499 opt16val = IPSP_AUTH_NONE; 1500 break; 1501 } 1502 if (ipr == NULL) 1503 *mtod(m, u_int16_t *) = opt16val; 1504 else { 1505 size_t len; 1506 1507 len = m->m_len + ipr->ref_len; 1508 if (len > MCLBYTES) { 1509 m_free(m); 1510 error = EINVAL; 1511 break; 1512 } 1513 /* allocate mbuf cluster for larger option */ 1514 if (len > MLEN) { 1515 MCLGET(m, M_WAITOK); 1516 if ((m->m_flags & M_EXT) == 0) { 1517 m_free(m); 1518 error = ENOBUFS; 1519 break; 1520 } 1521 1522 } 1523 m->m_len = len; 1524 *mtod(m, u_int16_t *) = ipr->ref_type; 1525 m_copyback(m, sizeof(u_int16_t), ipr->ref_len, 1526 ipr + 1, M_NOWAIT); 1527 } 1528 #endif 1529 break; 1530 case SO_RTABLE: 1531 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1532 m->m_len = sizeof(u_int); 1533 *mtod(m, u_int *) = inp->inp_rtableid; 1534 break; 1535 case IP_PIPEX: 1536 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1537 m->m_len = sizeof(int); 1538 *mtod(m, int *) = inp->inp_pipex; 1539 break; 1540 default: 1541 error = ENOPROTOOPT; 1542 break; 1543 } 1544 break; 1545 } 1546 return (error); 1547 } 1548 1549 /* 1550 * Set up IP options in pcb for insertion in output packets. 1551 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1552 * with destination address if source routed. 1553 */ 1554 int 1555 ip_pcbopts(struct mbuf **pcbopt, struct mbuf *m) 1556 { 1557 int cnt, optlen; 1558 u_char *cp; 1559 u_char opt; 1560 1561 /* turn off any old options */ 1562 if (*pcbopt) 1563 (void)m_free(*pcbopt); 1564 *pcbopt = 0; 1565 if (m == (struct mbuf *)0 || m->m_len == 0) { 1566 /* 1567 * Only turning off any previous options. 1568 */ 1569 if (m) 1570 (void)m_free(m); 1571 return (0); 1572 } 1573 1574 if (m->m_len % sizeof(int32_t)) 1575 goto bad; 1576 1577 /* 1578 * IP first-hop destination address will be stored before 1579 * actual options; move other options back 1580 * and clear it when none present. 1581 */ 1582 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1583 goto bad; 1584 cnt = m->m_len; 1585 m->m_len += sizeof(struct in_addr); 1586 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1587 memmove((caddr_t)cp, mtod(m, caddr_t), (unsigned)cnt); 1588 memset(mtod(m, caddr_t), 0, sizeof(struct in_addr)); 1589 1590 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1591 opt = cp[IPOPT_OPTVAL]; 1592 if (opt == IPOPT_EOL) 1593 break; 1594 if (opt == IPOPT_NOP) 1595 optlen = 1; 1596 else { 1597 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1598 goto bad; 1599 optlen = cp[IPOPT_OLEN]; 1600 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1601 goto bad; 1602 } 1603 switch (opt) { 1604 1605 default: 1606 break; 1607 1608 case IPOPT_LSRR: 1609 case IPOPT_SSRR: 1610 /* 1611 * user process specifies route as: 1612 * ->A->B->C->D 1613 * D must be our final destination (but we can't 1614 * check that since we may not have connected yet). 1615 * A is first hop destination, which doesn't appear in 1616 * actual IP option, but is stored before the options. 1617 */ 1618 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1619 goto bad; 1620 m->m_len -= sizeof(struct in_addr); 1621 cnt -= sizeof(struct in_addr); 1622 optlen -= sizeof(struct in_addr); 1623 cp[IPOPT_OLEN] = optlen; 1624 /* 1625 * Move first hop before start of options. 1626 */ 1627 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), 1628 sizeof(struct in_addr)); 1629 /* 1630 * Then copy rest of options back 1631 * to close up the deleted entry. 1632 */ 1633 memmove((caddr_t)&cp[IPOPT_OFFSET+1], 1634 (caddr_t)(&cp[IPOPT_OFFSET+1] + 1635 sizeof(struct in_addr)), 1636 (unsigned)cnt - (IPOPT_OFFSET+1)); 1637 break; 1638 } 1639 } 1640 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1641 goto bad; 1642 *pcbopt = m; 1643 return (0); 1644 1645 bad: 1646 (void)m_free(m); 1647 return (EINVAL); 1648 } 1649 1650 /* 1651 * Set the IP multicast options in response to user setsockopt(). 1652 */ 1653 int 1654 ip_setmoptions(int optname, struct ip_moptions **imop, struct mbuf *m, 1655 u_int rtableid) 1656 { 1657 int error = 0; 1658 u_char loop; 1659 int i; 1660 struct in_addr addr; 1661 struct in_ifaddr *ia; 1662 struct ip_mreq *mreq; 1663 struct ifnet *ifp = NULL; 1664 struct ip_moptions *imo = *imop; 1665 struct in_multi **immp; 1666 struct route ro; 1667 struct sockaddr_in *dst; 1668 1669 if (imo == NULL) { 1670 /* 1671 * No multicast option buffer attached to the pcb; 1672 * allocate one and initialize to default values. 1673 */ 1674 imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS, 1675 M_WAITOK|M_ZERO); 1676 immp = (struct in_multi **)malloc( 1677 (sizeof(*immp) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 1678 M_WAITOK|M_ZERO); 1679 *imop = imo; 1680 imo->imo_multicast_ifp = NULL; 1681 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1682 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1683 imo->imo_num_memberships = 0; 1684 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1685 imo->imo_membership = immp; 1686 } 1687 1688 switch (optname) { 1689 1690 case IP_MULTICAST_IF: 1691 /* 1692 * Select the interface for outgoing multicast packets. 1693 */ 1694 if (m == NULL || m->m_len != sizeof(struct in_addr)) { 1695 error = EINVAL; 1696 break; 1697 } 1698 addr = *(mtod(m, struct in_addr *)); 1699 /* 1700 * INADDR_ANY is used to remove a previous selection. 1701 * When no interface is selected, a default one is 1702 * chosen every time a multicast packet is sent. 1703 */ 1704 if (addr.s_addr == INADDR_ANY) { 1705 imo->imo_multicast_ifp = NULL; 1706 break; 1707 } 1708 /* 1709 * The selected interface is identified by its local 1710 * IP address. Find the interface and confirm that 1711 * it supports multicasting. 1712 */ 1713 ia = in_iawithaddr(addr, rtableid); 1714 if (ia) 1715 ifp = ia->ia_ifp; 1716 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1717 error = EADDRNOTAVAIL; 1718 break; 1719 } 1720 imo->imo_multicast_ifp = ifp; 1721 break; 1722 1723 case IP_MULTICAST_TTL: 1724 /* 1725 * Set the IP time-to-live for outgoing multicast packets. 1726 */ 1727 if (m == NULL || m->m_len != 1) { 1728 error = EINVAL; 1729 break; 1730 } 1731 imo->imo_multicast_ttl = *(mtod(m, u_char *)); 1732 break; 1733 1734 case IP_MULTICAST_LOOP: 1735 /* 1736 * Set the loopback flag for outgoing multicast packets. 1737 * Must be zero or one. 1738 */ 1739 if (m == NULL || m->m_len != 1 || 1740 (loop = *(mtod(m, u_char *))) > 1) { 1741 error = EINVAL; 1742 break; 1743 } 1744 imo->imo_multicast_loop = loop; 1745 break; 1746 1747 case IP_ADD_MEMBERSHIP: 1748 /* 1749 * Add a multicast group membership. 1750 * Group must be a valid IP multicast address. 1751 */ 1752 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1753 error = EINVAL; 1754 break; 1755 } 1756 mreq = mtod(m, struct ip_mreq *); 1757 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1758 error = EINVAL; 1759 break; 1760 } 1761 /* 1762 * If no interface address was provided, use the interface of 1763 * the route to the given multicast address. 1764 */ 1765 if (mreq->imr_interface.s_addr == INADDR_ANY) { 1766 ro.ro_rt = NULL; 1767 dst = satosin(&ro.ro_dst); 1768 dst->sin_len = sizeof(*dst); 1769 dst->sin_family = AF_INET; 1770 dst->sin_addr = mreq->imr_multiaddr; 1771 if (!(ro.ro_rt && ro.ro_rt->rt_ifp && 1772 (ro.ro_rt->rt_flags & RTF_UP))) 1773 ro.ro_rt = rtalloc1(&ro.ro_dst, RT_REPORT, 1774 rtableid); 1775 if (ro.ro_rt == NULL) { 1776 error = EADDRNOTAVAIL; 1777 break; 1778 } 1779 ifp = ro.ro_rt->rt_ifp; 1780 rtfree(ro.ro_rt); 1781 } else { 1782 ia = in_iawithaddr(mreq->imr_interface, rtableid); 1783 if (ia) 1784 ifp = ia->ia_ifp; 1785 } 1786 /* 1787 * See if we found an interface, and confirm that it 1788 * supports multicast. 1789 */ 1790 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1791 error = EADDRNOTAVAIL; 1792 break; 1793 } 1794 /* 1795 * See if the membership already exists or if all the 1796 * membership slots are full. 1797 */ 1798 for (i = 0; i < imo->imo_num_memberships; ++i) { 1799 if (imo->imo_membership[i]->inm_ifidx 1800 == ifp->if_index && 1801 imo->imo_membership[i]->inm_addr.s_addr 1802 == mreq->imr_multiaddr.s_addr) 1803 break; 1804 } 1805 if (i < imo->imo_num_memberships) { 1806 error = EADDRINUSE; 1807 break; 1808 } 1809 if (imo->imo_num_memberships == imo->imo_max_memberships) { 1810 struct in_multi **nmships, **omships; 1811 size_t newmax; 1812 /* 1813 * Resize the vector to next power-of-two minus 1. If the 1814 * size would exceed the maximum then we know we've really 1815 * run out of entries. Otherwise, we reallocate the vector. 1816 */ 1817 nmships = NULL; 1818 omships = imo->imo_membership; 1819 newmax = ((imo->imo_max_memberships + 1) * 2) - 1; 1820 if (newmax <= IP_MAX_MEMBERSHIPS) { 1821 nmships = (struct in_multi **)malloc( 1822 sizeof(*nmships) * newmax, M_IPMOPTS, 1823 M_NOWAIT|M_ZERO); 1824 if (nmships != NULL) { 1825 bcopy(omships, nmships, 1826 sizeof(*omships) * 1827 imo->imo_max_memberships); 1828 free(omships, M_IPMOPTS, 0); 1829 imo->imo_membership = nmships; 1830 imo->imo_max_memberships = newmax; 1831 } 1832 } 1833 if (nmships == NULL) { 1834 error = ETOOMANYREFS; 1835 break; 1836 } 1837 } 1838 /* 1839 * Everything looks good; add a new record to the multicast 1840 * address list for the given interface. 1841 */ 1842 if ((imo->imo_membership[i] = 1843 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) { 1844 error = ENOBUFS; 1845 break; 1846 } 1847 ++imo->imo_num_memberships; 1848 break; 1849 1850 case IP_DROP_MEMBERSHIP: 1851 /* 1852 * Drop a multicast group membership. 1853 * Group must be a valid IP multicast address. 1854 */ 1855 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1856 error = EINVAL; 1857 break; 1858 } 1859 mreq = mtod(m, struct ip_mreq *); 1860 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1861 error = EINVAL; 1862 break; 1863 } 1864 /* 1865 * If an interface address was specified, get a pointer 1866 * to its ifnet structure. 1867 */ 1868 if (mreq->imr_interface.s_addr == INADDR_ANY) 1869 ifp = NULL; 1870 else { 1871 ia = in_iawithaddr(mreq->imr_interface, rtableid); 1872 if (ia == NULL) { 1873 error = EADDRNOTAVAIL; 1874 break; 1875 } 1876 ifp = ia->ia_ifp; 1877 } 1878 /* 1879 * Find the membership in the membership array. 1880 */ 1881 for (i = 0; i < imo->imo_num_memberships; ++i) { 1882 if ((ifp == NULL || 1883 imo->imo_membership[i]->inm_ifidx == 1884 ifp->if_index) && 1885 imo->imo_membership[i]->inm_addr.s_addr == 1886 mreq->imr_multiaddr.s_addr) 1887 break; 1888 } 1889 if (i == imo->imo_num_memberships) { 1890 error = EADDRNOTAVAIL; 1891 break; 1892 } 1893 /* 1894 * Give up the multicast address record to which the 1895 * membership points. 1896 */ 1897 in_delmulti(imo->imo_membership[i]); 1898 /* 1899 * Remove the gap in the membership array. 1900 */ 1901 for (++i; i < imo->imo_num_memberships; ++i) 1902 imo->imo_membership[i-1] = imo->imo_membership[i]; 1903 --imo->imo_num_memberships; 1904 break; 1905 1906 default: 1907 error = EOPNOTSUPP; 1908 break; 1909 } 1910 1911 /* 1912 * If all options have default values, no need to keep the data. 1913 */ 1914 if (imo->imo_multicast_ifp == NULL && 1915 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 1916 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 1917 imo->imo_num_memberships == 0) { 1918 free(imo->imo_membership , M_IPMOPTS, 0); 1919 free(*imop, M_IPMOPTS, 0); 1920 *imop = NULL; 1921 } 1922 1923 return (error); 1924 } 1925 1926 /* 1927 * Return the IP multicast options in response to user getsockopt(). 1928 */ 1929 int 1930 ip_getmoptions(int optname, struct ip_moptions *imo, struct mbuf **mp) 1931 { 1932 u_char *ttl; 1933 u_char *loop; 1934 struct in_addr *addr; 1935 struct in_ifaddr *ia; 1936 1937 *mp = m_get(M_WAIT, MT_SOOPTS); 1938 1939 switch (optname) { 1940 1941 case IP_MULTICAST_IF: 1942 addr = mtod(*mp, struct in_addr *); 1943 (*mp)->m_len = sizeof(struct in_addr); 1944 if (imo == NULL || imo->imo_multicast_ifp == NULL) 1945 addr->s_addr = INADDR_ANY; 1946 else { 1947 IFP_TO_IA(imo->imo_multicast_ifp, ia); 1948 addr->s_addr = (ia == NULL) ? INADDR_ANY 1949 : ia->ia_addr.sin_addr.s_addr; 1950 } 1951 return (0); 1952 1953 case IP_MULTICAST_TTL: 1954 ttl = mtod(*mp, u_char *); 1955 (*mp)->m_len = 1; 1956 *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL 1957 : imo->imo_multicast_ttl; 1958 return (0); 1959 1960 case IP_MULTICAST_LOOP: 1961 loop = mtod(*mp, u_char *); 1962 (*mp)->m_len = 1; 1963 *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP 1964 : imo->imo_multicast_loop; 1965 return (0); 1966 1967 default: 1968 return (EOPNOTSUPP); 1969 } 1970 } 1971 1972 /* 1973 * Discard the IP multicast options. 1974 */ 1975 void 1976 ip_freemoptions(struct ip_moptions *imo) 1977 { 1978 int i; 1979 1980 if (imo != NULL) { 1981 for (i = 0; i < imo->imo_num_memberships; ++i) 1982 in_delmulti(imo->imo_membership[i]); 1983 free(imo->imo_membership, M_IPMOPTS, 0); 1984 free(imo, M_IPMOPTS, 0); 1985 } 1986 } 1987 1988 /* 1989 * Routine called from ip_output() to loop back a copy of an IP multicast 1990 * packet to the input queue of a specified interface. Note that this 1991 * calls the output routine of the loopback "driver", but with an interface 1992 * pointer that might NOT be &loif -- easier than replicating that code here. 1993 */ 1994 void 1995 ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst) 1996 { 1997 struct ip *ip; 1998 struct mbuf *copym; 1999 2000 copym = m_copym2(m, 0, M_COPYALL, M_DONTWAIT); 2001 if (copym != NULL) { 2002 /* 2003 * We don't bother to fragment if the IP length is greater 2004 * than the interface's MTU. Can this possibly matter? 2005 */ 2006 ip = mtod(copym, struct ip *); 2007 ip->ip_sum = 0; 2008 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2); 2009 (void) looutput(ifp, copym, sintosa(dst), NULL); 2010 } 2011 } 2012 2013 /* 2014 * Compute significant parts of the IPv4 checksum pseudo-header 2015 * for use in a delayed TCP/UDP checksum calculation. 2016 */ 2017 static __inline u_int16_t __attribute__((__unused__)) 2018 in_cksum_phdr(u_int32_t src, u_int32_t dst, u_int32_t lenproto) 2019 { 2020 u_int32_t sum; 2021 2022 sum = lenproto + 2023 (u_int16_t)(src >> 16) + 2024 (u_int16_t)(src /*& 0xffff*/) + 2025 (u_int16_t)(dst >> 16) + 2026 (u_int16_t)(dst /*& 0xffff*/); 2027 2028 sum = (u_int16_t)(sum >> 16) + (u_int16_t)(sum /*& 0xffff*/); 2029 2030 if (sum > 0xffff) 2031 sum -= 0xffff; 2032 2033 return (sum); 2034 } 2035 2036 /* 2037 * Process a delayed payload checksum calculation. 2038 */ 2039 void 2040 in_delayed_cksum(struct mbuf *m) 2041 { 2042 struct ip *ip; 2043 u_int16_t csum, offset; 2044 2045 ip = mtod(m, struct ip *); 2046 offset = ip->ip_hl << 2; 2047 csum = in4_cksum(m, 0, offset, m->m_pkthdr.len - offset); 2048 if (csum == 0 && ip->ip_p == IPPROTO_UDP) 2049 csum = 0xffff; 2050 2051 switch (ip->ip_p) { 2052 case IPPROTO_TCP: 2053 offset += offsetof(struct tcphdr, th_sum); 2054 break; 2055 2056 case IPPROTO_UDP: 2057 offset += offsetof(struct udphdr, uh_sum); 2058 break; 2059 2060 case IPPROTO_ICMP: 2061 offset += offsetof(struct icmp, icmp_cksum); 2062 break; 2063 2064 default: 2065 return; 2066 } 2067 2068 if ((offset + sizeof(u_int16_t)) > m->m_len) 2069 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT); 2070 else 2071 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 2072 } 2073 2074 void 2075 in_proto_cksum_out(struct mbuf *m, struct ifnet *ifp) 2076 { 2077 /* some hw and in_delayed_cksum need the pseudo header cksum */ 2078 if (m->m_pkthdr.csum_flags & 2079 (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_ICMP_CSUM_OUT)) { 2080 struct ip *ip; 2081 u_int16_t csum = 0, offset; 2082 2083 ip = mtod(m, struct ip *); 2084 offset = ip->ip_hl << 2; 2085 if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) 2086 csum = in_cksum_phdr(ip->ip_src.s_addr, 2087 ip->ip_dst.s_addr, htonl(ntohs(ip->ip_len) - 2088 offset + ip->ip_p)); 2089 if (ip->ip_p == IPPROTO_TCP) 2090 offset += offsetof(struct tcphdr, th_sum); 2091 else if (ip->ip_p == IPPROTO_UDP) 2092 offset += offsetof(struct udphdr, uh_sum); 2093 else if (ip->ip_p == IPPROTO_ICMP) 2094 offset += offsetof(struct icmp, icmp_cksum); 2095 if ((offset + sizeof(u_int16_t)) > m->m_len) 2096 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT); 2097 else 2098 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 2099 } 2100 2101 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) { 2102 if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || 2103 ifp->if_bridgeport != NULL) { 2104 tcpstat.tcps_outswcsum++; 2105 in_delayed_cksum(m); 2106 m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */ 2107 } 2108 } else if (m->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) { 2109 if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_UDPv4) || 2110 ifp->if_bridgeport != NULL) { 2111 udpstat.udps_outswcsum++; 2112 in_delayed_cksum(m); 2113 m->m_pkthdr.csum_flags &= ~M_UDP_CSUM_OUT; /* Clear */ 2114 } 2115 } else if (m->m_pkthdr.csum_flags & M_ICMP_CSUM_OUT) { 2116 in_delayed_cksum(m); 2117 m->m_pkthdr.csum_flags &= ~M_ICMP_CSUM_OUT; /* Clear */ 2118 } 2119 } 2120