1 /* $OpenBSD: ip_output.c,v 1.140 2001/11/26 16:50:26 jasoni Exp $ */ 2 /* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 37 */ 38 39 #include "pf.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/mbuf.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/proc.h> 48 #include <sys/kernel.h> 49 50 #include <net/if.h> 51 #include <net/if_enc.h> 52 #include <net/route.h> 53 54 #include <netinet/in.h> 55 #include <netinet/in_systm.h> 56 #include <netinet/ip.h> 57 #include <netinet/in_pcb.h> 58 #include <netinet/in_var.h> 59 #include <netinet/ip_var.h> 60 #include <netinet/ip_icmp.h> 61 #include <netinet/tcp.h> 62 #include <netinet/udp.h> 63 #include <netinet/tcp_timer.h> 64 #include <netinet/tcp_var.h> 65 #include <netinet/udp_var.h> 66 67 #if NPF > 0 68 #include <net/pfvar.h> 69 #endif 70 71 #ifdef vax 72 #include <machine/mtpr.h> 73 #endif 74 75 #ifdef IPSEC 76 #ifdef ENCDEBUG 77 #define DPRINTF(x) do { if (encdebug) printf x ; } while (0) 78 #else 79 #define DPRINTF(x) 80 #endif 81 82 extern u_int8_t get_sa_require __P((struct inpcb *)); 83 84 extern int ipsec_auth_default_level; 85 extern int ipsec_esp_trans_default_level; 86 extern int ipsec_esp_network_default_level; 87 extern int ipsec_ipcomp_default_level; 88 #endif /* IPSEC */ 89 90 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *)); 91 static void ip_mloopback 92 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *)); 93 94 /* 95 * IP output. The packet in mbuf chain m contains a skeletal IP 96 * header (with len, off, ttl, proto, tos, src, dst). 97 * The mbuf chain containing the packet will be freed. 98 * The mbuf opt, if present, will not be freed. 99 */ 100 int 101 #if __STDC__ 102 ip_output(struct mbuf *m0, ...) 103 #else 104 ip_output(m0, va_alist) 105 struct mbuf *m0; 106 va_dcl 107 #endif 108 { 109 register struct ip *ip, *mhip; 110 register struct ifnet *ifp; 111 struct mbuf *m = m0; 112 register int hlen = sizeof (struct ip); 113 int len, off, error = 0; 114 struct route iproute; 115 struct sockaddr_in *dst; 116 struct in_ifaddr *ia; 117 struct mbuf *opt; 118 struct route *ro; 119 int flags; 120 struct ip_moptions *imo; 121 va_list ap; 122 u_int8_t sproto = 0, donerouting = 0; 123 #ifdef IPSEC 124 u_int32_t icmp_mtu = 0; 125 union sockaddr_union sdst; 126 u_int32_t sspi; 127 struct m_tag *mtag; 128 struct tdb_ident *tdbi; 129 130 struct inpcb *inp; 131 struct tdb *tdb; 132 int s; 133 #endif /* IPSEC */ 134 135 va_start(ap, m0); 136 opt = va_arg(ap, struct mbuf *); 137 ro = va_arg(ap, struct route *); 138 flags = va_arg(ap, int); 139 imo = va_arg(ap, struct ip_moptions *); 140 #ifdef IPSEC 141 inp = va_arg(ap, struct inpcb *); 142 if (inp && (inp->inp_flags & INP_IPV6) != 0) 143 panic("ip_output: IPv6 pcb is passed"); 144 #endif /* IPSEC */ 145 va_end(ap); 146 147 #ifdef DIAGNOSTIC 148 if ((m->m_flags & M_PKTHDR) == 0) 149 panic("ip_output no HDR"); 150 #endif 151 if (opt) { 152 m = ip_insertoptions(m, opt, &len); 153 hlen = len; 154 } 155 156 ip = mtod(m, struct ip *); 157 158 /* 159 * Fill in IP header. 160 */ 161 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 162 ip->ip_v = IPVERSION; 163 ip->ip_off &= IP_DF; 164 ip->ip_id = htons(ip_randomid()); 165 ip->ip_hl = hlen >> 2; 166 ipstat.ips_localout++; 167 } else { 168 hlen = ip->ip_hl << 2; 169 } 170 171 /* 172 * If we're missing the IP source address, do a route lookup. We'll 173 * remember this result, in case we don't need to do any IPsec 174 * processing on the packet. We need the source address so we can 175 * do an SPD lookup in IPsec; for most packets, the source address 176 * is set at a higher level protocol. ICMPs and other packets 177 * though (e.g., traceroute) have a source address of zeroes. 178 */ 179 if (ip->ip_src.s_addr == INADDR_ANY) { 180 donerouting = 1; 181 182 if (ro == 0) { 183 ro = &iproute; 184 bzero((caddr_t)ro, sizeof (*ro)); 185 } 186 187 dst = satosin(&ro->ro_dst); 188 189 /* 190 * If there is a cached route, check that it is to the same 191 * destination and is still up. If not, free it and try again. 192 */ 193 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 194 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 195 RTFREE(ro->ro_rt); 196 ro->ro_rt = (struct rtentry *)0; 197 } 198 199 if (ro->ro_rt == 0) { 200 dst->sin_family = AF_INET; 201 dst->sin_len = sizeof(*dst); 202 dst->sin_addr = ip->ip_dst; 203 } 204 205 /* 206 * If routing to interface only, short-circuit routing lookup. 207 */ 208 if (flags & IP_ROUTETOIF) { 209 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && 210 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { 211 ipstat.ips_noroute++; 212 error = ENETUNREACH; 213 goto bad; 214 } 215 216 ifp = ia->ia_ifp; 217 ip->ip_ttl = 1; 218 } else { 219 if (ro->ro_rt == 0) 220 rtalloc(ro); 221 222 if (ro->ro_rt == 0) { 223 ipstat.ips_noroute++; 224 error = EHOSTUNREACH; 225 goto bad; 226 } 227 228 ia = ifatoia(ro->ro_rt->rt_ifa); 229 ifp = ro->ro_rt->rt_ifp; 230 ro->ro_rt->rt_use++; 231 232 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 233 dst = satosin(ro->ro_rt->rt_gateway); 234 } 235 236 /* Set the source IP address */ 237 if (!IN_MULTICAST(ip->ip_dst.s_addr)) 238 ip->ip_src = ia->ia_addr.sin_addr; 239 } 240 241 #ifdef IPSEC 242 /* 243 * splnet is chosen over spltdb because we are not allowed to 244 * lower the level, and udp_output calls us in splnet(). 245 */ 246 s = splnet(); 247 248 /* Do we have any pending SAs to apply ? */ 249 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); 250 if (mtag != NULL) { 251 #ifdef DIAGNOSTIC 252 if (mtag->m_tag_len != sizeof (struct tdb_ident)) 253 panic("ip_output: tag of length %d (should be %d", 254 mtag->m_tag_len, sizeof (struct tdb_ident)); 255 #endif 256 tdbi = (struct tdb_ident *)(mtag + 1); 257 tdb = gettdb(tdbi->spi, &tdbi->dst, tdbi->proto); 258 if (tdb == NULL) 259 error = -EINVAL; 260 m_tag_delete(m, mtag); 261 } 262 else 263 tdb = ipsp_spd_lookup(m, AF_INET, hlen, &error, 264 IPSP_DIRECTION_OUT, NULL, inp); 265 266 if (tdb == NULL) { 267 splx(s); 268 269 if (error == 0) { 270 /* 271 * No IPsec processing required, we'll just send the 272 * packet out. 273 */ 274 sproto = 0; 275 276 /* Fall through to routing/multicast handling */ 277 } else { 278 /* 279 * -EINVAL is used to indicate that the packet should 280 * be silently dropped, typically because we've asked 281 * key management for an SA. 282 */ 283 if (error == -EINVAL) /* Should silently drop packet */ 284 error = 0; 285 286 m_freem(m); 287 goto done; 288 } 289 } else { 290 /* Loop detection */ 291 for (mtag = m_tag_first(m); mtag != NULL; 292 mtag = m_tag_next(m, mtag)) { 293 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && 294 mtag->m_tag_id != 295 PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) 296 continue; 297 tdbi = (struct tdb_ident *)(mtag + 1); 298 if (tdbi->spi == tdb->tdb_spi && 299 tdbi->proto == tdb->tdb_sproto && 300 !bcmp(&tdbi->dst, &tdb->tdb_dst, 301 sizeof(union sockaddr_union))) { 302 splx(s); 303 sproto = 0; /* mark as no-IPsec-needed */ 304 goto done_spd; 305 } 306 } 307 308 /* We need to do IPsec */ 309 bcopy(&tdb->tdb_dst, &sdst, sizeof(sdst)); 310 sspi = tdb->tdb_spi; 311 sproto = tdb->tdb_sproto; 312 splx(s); 313 314 /* 315 * If it needs TCP/UDP hardware-checksumming, do the 316 * computation now. 317 */ 318 if (m->m_pkthdr.csum & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) { 319 in_delayed_cksum(m); 320 m->m_pkthdr.csum &= 321 ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT); 322 } 323 324 /* If it's not a multicast packet, try to fast-path */ 325 if (!IN_MULTICAST(ip->ip_dst.s_addr)) { 326 goto sendit; 327 } 328 } 329 330 /* Fall through to the routing/multicast handling code */ 331 done_spd: 332 #endif /* IPSEC */ 333 334 if (donerouting == 0) { 335 if (ro == 0) { 336 ro = &iproute; 337 bzero((caddr_t)ro, sizeof (*ro)); 338 } 339 340 dst = satosin(&ro->ro_dst); 341 342 /* 343 * If there is a cached route, check that it is to the same 344 * destination and is still up. If not, free it and try again. 345 */ 346 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 347 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 348 RTFREE(ro->ro_rt); 349 ro->ro_rt = (struct rtentry *)0; 350 } 351 352 if (ro->ro_rt == 0) { 353 dst->sin_family = AF_INET; 354 dst->sin_len = sizeof(*dst); 355 dst->sin_addr = ip->ip_dst; 356 } 357 358 /* 359 * If routing to interface only, short-circuit routing lookup. 360 */ 361 if (flags & IP_ROUTETOIF) { 362 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && 363 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { 364 ipstat.ips_noroute++; 365 error = ENETUNREACH; 366 goto bad; 367 } 368 369 ifp = ia->ia_ifp; 370 ip->ip_ttl = 1; 371 } else { 372 if (ro->ro_rt == 0) 373 rtalloc(ro); 374 375 if (ro->ro_rt == 0) { 376 ipstat.ips_noroute++; 377 error = EHOSTUNREACH; 378 goto bad; 379 } 380 381 ia = ifatoia(ro->ro_rt->rt_ifa); 382 ifp = ro->ro_rt->rt_ifp; 383 ro->ro_rt->rt_use++; 384 385 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 386 dst = satosin(ro->ro_rt->rt_gateway); 387 } 388 389 /* Set the source IP address */ 390 if (ip->ip_src.s_addr == INADDR_ANY) 391 ip->ip_src = ia->ia_addr.sin_addr; 392 } 393 394 if (IN_MULTICAST(ip->ip_dst.s_addr) || 395 (ip->ip_dst.s_addr == INADDR_BROADCAST)) { 396 struct in_multi *inm; 397 398 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ? 399 M_BCAST : M_MCAST; 400 401 /* 402 * IP destination address is multicast. Make sure "dst" 403 * still points to the address in "ro". (It may have been 404 * changed to point to a gateway address, above.) 405 */ 406 dst = satosin(&ro->ro_dst); 407 408 /* 409 * See if the caller provided any multicast options 410 */ 411 if (imo != NULL) { 412 ip->ip_ttl = imo->imo_multicast_ttl; 413 if (imo->imo_multicast_ifp != NULL) 414 ifp = imo->imo_multicast_ifp; 415 } else 416 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 417 418 /* 419 * Confirm that the outgoing interface supports multicast, 420 * but only if the packet actually is going out on that 421 * interface (i.e., no IPsec is applied). 422 */ 423 if ((((m->m_flags & M_MCAST) && 424 (ifp->if_flags & IFF_MULTICAST) == 0) || 425 ((m->m_flags & M_BCAST) && 426 (ifp->if_flags & IFF_BROADCAST) == 0)) && (sproto == 0)) { 427 ipstat.ips_noroute++; 428 error = ENETUNREACH; 429 goto bad; 430 } 431 432 /* 433 * If source address not specified yet, use address 434 * of outgoing interface. 435 */ 436 if (ip->ip_src.s_addr == INADDR_ANY) { 437 register struct in_ifaddr *ia; 438 439 for (ia = in_ifaddr.tqh_first; 440 ia; 441 ia = ia->ia_list.tqe_next) 442 if (ia->ia_ifp == ifp) { 443 ip->ip_src = ia->ia_addr.sin_addr; 444 break; 445 } 446 } 447 448 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm); 449 if (inm != NULL && 450 (imo == NULL || imo->imo_multicast_loop)) { 451 /* 452 * If we belong to the destination multicast group 453 * on the outgoing interface, and the caller did not 454 * forbid loopback, loop back a copy. 455 * Can't defer TCP/UDP checksumming, do the 456 * computation now. 457 */ 458 if (m->m_pkthdr.csum & 459 (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) { 460 in_delayed_cksum(m); 461 m->m_pkthdr.csum &= 462 ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT); 463 } 464 ip_mloopback(ifp, m, dst); 465 } 466 #ifdef MROUTING 467 else { 468 /* 469 * If we are acting as a multicast router, perform 470 * multicast forwarding as if the packet had just 471 * arrived on the interface to which we are about 472 * to send. The multicast forwarding function 473 * recursively calls this function, using the 474 * IP_FORWARDING flag to prevent infinite recursion. 475 * 476 * Multicasts that are looped back by ip_mloopback(), 477 * above, will be forwarded by the ip_input() routine, 478 * if necessary. 479 */ 480 extern struct socket *ip_mrouter; 481 482 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 483 if (ip_mforward(m, ifp) != 0) { 484 m_freem(m); 485 goto done; 486 } 487 } 488 } 489 #endif 490 /* 491 * Multicasts with a time-to-live of zero may be looped- 492 * back, above, but must not be transmitted on a network. 493 * Also, multicasts addressed to the loopback interface 494 * are not sent -- the above call to ip_mloopback() will 495 * loop back a copy if this host actually belongs to the 496 * destination group on the loopback interface. 497 */ 498 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) { 499 m_freem(m); 500 goto done; 501 } 502 503 goto sendit; 504 } 505 506 /* 507 * Look for broadcast address and and verify user is allowed to send 508 * such a packet; if the packet is going in an IPsec tunnel, skip 509 * this check. 510 */ 511 if ((sproto == 0) && (in_broadcast(dst->sin_addr, ifp))) { 512 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 513 error = EADDRNOTAVAIL; 514 goto bad; 515 } 516 if ((flags & IP_ALLOWBROADCAST) == 0) { 517 error = EACCES; 518 goto bad; 519 } 520 521 /* Don't allow broadcast messages to be fragmented */ 522 if ((u_int16_t)ip->ip_len > ifp->if_mtu) { 523 error = EMSGSIZE; 524 goto bad; 525 } 526 m->m_flags |= M_BCAST; 527 } else 528 m->m_flags &= ~M_BCAST; 529 530 sendit: 531 /* 532 * If we're doing Path MTU discovery, we need to set DF unless 533 * the route's MTU is locked. 534 */ 535 if ((flags & IP_MTUDISC) && ro && ro->ro_rt && 536 (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0) 537 ip->ip_off |= IP_DF; 538 539 #ifdef IPSEC 540 /* 541 * Check if the packet needs encapsulation. 542 */ 543 if (sproto != 0) { 544 s = splnet(); 545 546 /* 547 * Packet filter 548 */ 549 #if NPF > 0 550 551 if (pf_test(PF_OUT, &encif[0].sc_if, &m) != PF_PASS) { 552 error = EHOSTUNREACH; 553 splx(s); 554 m_freem(m); 555 goto done; 556 } 557 if (m == NULL) { 558 splx(s); 559 goto done; 560 } 561 ip = mtod(m, struct ip *); 562 hlen = ip->ip_hl << 2; 563 #endif 564 565 tdb = gettdb(sspi, &sdst, sproto); 566 if (tdb == NULL) { 567 error = EHOSTUNREACH; 568 splx(s); 569 m_freem(m); 570 goto done; 571 } 572 573 /* Latch to PCB */ 574 if (inp) 575 tdb_add_inp(tdb, inp, 0); 576 577 /* Check if we are allowed to fragment */ 578 if ((ip->ip_off & IP_DF) && tdb->tdb_mtu && 579 (u_int16_t)ip->ip_len > tdb->tdb_mtu && 580 tdb->tdb_mtutimeout > time.tv_sec) { 581 struct rtentry *rt = NULL; 582 583 icmp_mtu = tdb->tdb_mtu; 584 splx(s); 585 586 /* Find a host route to store the mtu in */ 587 if (ro != NULL) 588 rt = ro->ro_rt; 589 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) { 590 struct sockaddr_in dst = { 591 sizeof(struct sockaddr_in), AF_INET}; 592 dst.sin_addr = ip->ip_dst; 593 rt = icmp_mtudisc_clone((struct sockaddr *)&dst); 594 } 595 if (rt != NULL) { 596 rt->rt_rmx.rmx_mtu = icmp_mtu; 597 if (ro && ro->ro_rt != NULL) { 598 RTFREE(ro->ro_rt); 599 ro->ro_rt = (struct rtentry *) 0; 600 rtalloc(ro); 601 } 602 } 603 error = EMSGSIZE; 604 goto bad; 605 } 606 607 /* Massage the IP header for use by the IPsec code */ 608 ip->ip_len = htons((u_short) ip->ip_len); 609 ip->ip_off = htons((u_short) ip->ip_off); 610 611 /* 612 * Clear these -- they'll be set in the recursive invocation 613 * as needed. 614 */ 615 m->m_flags &= ~(M_MCAST | M_BCAST); 616 617 /* Callee frees mbuf */ 618 error = ipsp_process_packet(m, tdb, AF_INET, 0); 619 splx(s); 620 return error; /* Nothing more to be done */ 621 } 622 623 /* 624 * If deferred crypto processing is needed, check that the 625 * interface supports it. 626 */ 627 if ((mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL)) 628 != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) { 629 /* Notify IPsec to do its own crypto. */ 630 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); 631 m_freem(m); 632 error = EHOSTUNREACH; 633 goto done; 634 } 635 #endif /* IPSEC */ 636 637 /* Catch routing changes wrt. hardware checksumming for TCP or UDP. */ 638 if (m->m_pkthdr.csum & M_TCPV4_CSUM_OUT) { 639 if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || 640 ifp->if_bridge != NULL) { 641 in_delayed_cksum(m); 642 m->m_pkthdr.csum &= ~M_TCPV4_CSUM_OUT; /* Clear */ 643 } 644 } else if (m->m_pkthdr.csum & M_UDPV4_CSUM_OUT) { 645 if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) || 646 ifp->if_bridge != NULL) { 647 in_delayed_cksum(m); 648 m->m_pkthdr.csum &= ~M_UDPV4_CSUM_OUT; /* Clear */ 649 } 650 } 651 652 /* 653 * Packet filter 654 */ 655 #if NPF > 0 656 if (pf_test(PF_OUT, ifp, &m) != PF_PASS) { 657 error = EHOSTUNREACH; 658 m_freem(m); 659 goto done; 660 } 661 if (m == NULL) 662 goto done; 663 664 ip = mtod(m, struct ip *); 665 hlen = ip->ip_hl << 2; 666 #endif 667 668 /* 669 * If small enough for interface, can just send directly. 670 */ 671 if ((u_int16_t)ip->ip_len <= ifp->if_mtu) { 672 ip->ip_len = htons((u_int16_t)ip->ip_len); 673 ip->ip_off = htons((u_int16_t)ip->ip_off); 674 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && 675 ifp->if_bridge == NULL) { 676 m->m_pkthdr.csum |= M_IPV4_CSUM_OUT; 677 ipstat.ips_outhwcsum++; 678 } else { 679 ip->ip_sum = 0; 680 ip->ip_sum = in_cksum(m, hlen); 681 } 682 /* Update relevant hardware checksum stats for TCP/UDP */ 683 if (m->m_pkthdr.csum & M_TCPV4_CSUM_OUT) 684 tcpstat.tcps_outhwcsum++; 685 else if (m->m_pkthdr.csum & M_UDPV4_CSUM_OUT) 686 udpstat.udps_outhwcsum++; 687 error = (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt); 688 goto done; 689 } 690 691 /* 692 * Too large for interface; fragment if possible. 693 * Must be able to put at least 8 bytes per fragment. 694 */ 695 if (ip->ip_off & IP_DF) { 696 #ifdef IPSEC 697 icmp_mtu = ifp->if_mtu; 698 #endif 699 error = EMSGSIZE; 700 /* 701 * This case can happen if the user changed the MTU 702 * of an interface after enabling IP on it. Because 703 * most netifs don't keep track of routes pointing to 704 * them, there is no way for one to update all its 705 * routes when the MTU is changed. 706 */ 707 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) && 708 !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) && 709 (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 710 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 711 } 712 ipstat.ips_cantfrag++; 713 goto bad; 714 } 715 len = (ifp->if_mtu - hlen) &~ 7; 716 if (len < 8) { 717 error = EMSGSIZE; 718 goto bad; 719 } 720 721 /* 722 * If we are doing fragmentation, we can't defer TCP/UDP 723 * checksumming; compute the checksum and clear the flag. 724 */ 725 if (m->m_pkthdr.csum & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) { 726 in_delayed_cksum(m); 727 m->m_pkthdr.csum &= ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT); 728 } 729 730 { 731 int mhlen, firstlen = len; 732 struct mbuf **mnext = &m->m_nextpkt; 733 734 /* 735 * Loop through length of segment after first fragment, 736 * make new header and copy data of each part and link onto chain. 737 */ 738 m0 = m; 739 mhlen = sizeof (struct ip); 740 for (off = hlen + len; off < (u_int16_t)ip->ip_len; off += len) { 741 MGETHDR(m, M_DONTWAIT, MT_HEADER); 742 if (m == 0) { 743 error = ENOBUFS; 744 ipstat.ips_odropped++; 745 goto sendorfree; 746 } 747 *mnext = m; 748 mnext = &m->m_nextpkt; 749 m->m_data += max_linkhdr; 750 mhip = mtod(m, struct ip *); 751 *mhip = *ip; 752 /* we must inherit MCAST and BCAST flags */ 753 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST); 754 if (hlen > sizeof (struct ip)) { 755 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 756 mhip->ip_hl = mhlen >> 2; 757 } 758 m->m_len = mhlen; 759 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); 760 if (ip->ip_off & IP_MF) 761 mhip->ip_off |= IP_MF; 762 if (off + len >= (u_int16_t)ip->ip_len) 763 len = (u_int16_t)ip->ip_len - off; 764 else 765 mhip->ip_off |= IP_MF; 766 mhip->ip_len = htons((u_int16_t)(len + mhlen)); 767 m->m_next = m_copy(m0, off, len); 768 if (m->m_next == 0) { 769 error = ENOBUFS; /* ??? */ 770 ipstat.ips_odropped++; 771 goto sendorfree; 772 } 773 m->m_pkthdr.len = mhlen + len; 774 m->m_pkthdr.rcvif = (struct ifnet *)0; 775 mhip->ip_off = htons((u_int16_t)mhip->ip_off); 776 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && 777 ifp->if_bridge == NULL) { 778 m->m_pkthdr.csum |= M_IPV4_CSUM_OUT; 779 ipstat.ips_outhwcsum++; 780 } else { 781 mhip->ip_sum = 0; 782 mhip->ip_sum = in_cksum(m, mhlen); 783 } 784 ipstat.ips_ofragments++; 785 } 786 /* 787 * Update first fragment by trimming what's been copied out 788 * and updating header, then send each fragment (in order). 789 */ 790 m = m0; 791 m_adj(m, hlen + firstlen - (u_int16_t)ip->ip_len); 792 m->m_pkthdr.len = hlen + firstlen; 793 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len); 794 ip->ip_off = htons((u_int16_t)(ip->ip_off | IP_MF)); 795 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && 796 ifp->if_bridge == NULL) { 797 m->m_pkthdr.csum |= M_IPV4_CSUM_OUT; 798 ipstat.ips_outhwcsum++; 799 } else { 800 ip->ip_sum = 0; 801 ip->ip_sum = in_cksum(m, hlen); 802 } 803 sendorfree: 804 for (m = m0; m; m = m0) { 805 m0 = m->m_nextpkt; 806 m->m_nextpkt = 0; 807 if (error == 0) 808 error = (*ifp->if_output)(ifp, m, sintosa(dst), 809 ro->ro_rt); 810 else 811 m_freem(m); 812 } 813 814 if (error == 0) 815 ipstat.ips_fragmented++; 816 } 817 done: 818 if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt) 819 RTFREE(ro->ro_rt); 820 return (error); 821 bad: 822 #ifdef IPSEC 823 if (error == EMSGSIZE && icmp_mtu != 0) 824 ipsec_adjust_mtu(m, icmp_mtu); 825 #endif 826 m_freem(m0); 827 goto done; 828 } 829 830 /* 831 * Insert IP options into preformed packet. 832 * Adjust IP destination as required for IP source routing, 833 * as indicated by a non-zero in_addr at the start of the options. 834 */ 835 static struct mbuf * 836 ip_insertoptions(m, opt, phlen) 837 register struct mbuf *m; 838 struct mbuf *opt; 839 int *phlen; 840 { 841 register struct ipoption *p = mtod(opt, struct ipoption *); 842 struct mbuf *n; 843 register struct ip *ip = mtod(m, struct ip *); 844 unsigned optlen; 845 846 optlen = opt->m_len - sizeof(p->ipopt_dst); 847 if (optlen + (u_int16_t)ip->ip_len > IP_MAXPACKET) 848 return (m); /* XXX should fail */ 849 if (p->ipopt_dst.s_addr) 850 ip->ip_dst = p->ipopt_dst; 851 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 852 MGETHDR(n, M_DONTWAIT, MT_HEADER); 853 if (n == 0) 854 return (m); 855 M_MOVE_HDR(n, m); 856 n->m_pkthdr.len += optlen; 857 m->m_len -= sizeof(struct ip); 858 m->m_data += sizeof(struct ip); 859 n->m_next = m; 860 m = n; 861 m->m_len = optlen + sizeof(struct ip); 862 m->m_data += max_linkhdr; 863 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 864 } else { 865 m->m_data -= optlen; 866 m->m_len += optlen; 867 m->m_pkthdr.len += optlen; 868 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 869 } 870 ip = mtod(m, struct ip *); 871 bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen); 872 *phlen = sizeof(struct ip) + optlen; 873 ip->ip_len += optlen; 874 return (m); 875 } 876 877 /* 878 * Copy options from ip to jp, 879 * omitting those not copied during fragmentation. 880 */ 881 int 882 ip_optcopy(ip, jp) 883 struct ip *ip, *jp; 884 { 885 register u_char *cp, *dp; 886 int opt, optlen, cnt; 887 888 cp = (u_char *)(ip + 1); 889 dp = (u_char *)(jp + 1); 890 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 891 for (; cnt > 0; cnt -= optlen, cp += optlen) { 892 opt = cp[0]; 893 if (opt == IPOPT_EOL) 894 break; 895 if (opt == IPOPT_NOP) { 896 /* Preserve for IP mcast tunnel's LSRR alignment. */ 897 *dp++ = IPOPT_NOP; 898 optlen = 1; 899 continue; 900 } 901 #ifdef DIAGNOSTIC 902 if (cnt < IPOPT_OLEN + sizeof(*cp)) 903 panic("malformed IPv4 option passed to ip_optcopy"); 904 #endif 905 optlen = cp[IPOPT_OLEN]; 906 #ifdef DIAGNOSTIC 907 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 908 panic("malformed IPv4 option passed to ip_optcopy"); 909 #endif 910 /* bogus lengths should have been caught by ip_dooptions */ 911 if (optlen > cnt) 912 optlen = cnt; 913 if (IPOPT_COPIED(opt)) { 914 bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen); 915 dp += optlen; 916 } 917 } 918 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 919 *dp++ = IPOPT_EOL; 920 return (optlen); 921 } 922 923 /* 924 * IP socket option processing. 925 */ 926 int 927 ip_ctloutput(op, so, level, optname, mp) 928 int op; 929 struct socket *so; 930 int level, optname; 931 struct mbuf **mp; 932 { 933 register struct inpcb *inp = sotoinpcb(so); 934 register struct mbuf *m = *mp; 935 register int optval = 0; 936 #ifdef IPSEC 937 struct proc *p = curproc; /* XXX */ 938 struct ipsec_ref *ipr; 939 u_int16_t opt16val; 940 #endif 941 int error = 0; 942 943 if (level != IPPROTO_IP) { 944 error = EINVAL; 945 if (op == PRCO_SETOPT && *mp) 946 (void) m_free(*mp); 947 } else switch (op) { 948 case PRCO_SETOPT: 949 switch (optname) { 950 case IP_OPTIONS: 951 #ifdef notyet 952 case IP_RETOPTS: 953 return (ip_pcbopts(optname, &inp->inp_options, m)); 954 #else 955 return (ip_pcbopts(&inp->inp_options, m)); 956 #endif 957 958 case IP_TOS: 959 case IP_TTL: 960 case IP_RECVOPTS: 961 case IP_RECVRETOPTS: 962 case IP_RECVDSTADDR: 963 if (m == NULL || m->m_len != sizeof(int)) 964 error = EINVAL; 965 else { 966 optval = *mtod(m, int *); 967 switch (optname) { 968 969 case IP_TOS: 970 inp->inp_ip.ip_tos = optval; 971 break; 972 973 case IP_TTL: 974 inp->inp_ip.ip_ttl = optval; 975 break; 976 #define OPTSET(bit) \ 977 if (optval) \ 978 inp->inp_flags |= bit; \ 979 else \ 980 inp->inp_flags &= ~bit; 981 982 case IP_RECVOPTS: 983 OPTSET(INP_RECVOPTS); 984 break; 985 986 case IP_RECVRETOPTS: 987 OPTSET(INP_RECVRETOPTS); 988 break; 989 990 case IP_RECVDSTADDR: 991 OPTSET(INP_RECVDSTADDR); 992 break; 993 } 994 } 995 break; 996 #undef OPTSET 997 998 case IP_MULTICAST_IF: 999 case IP_MULTICAST_TTL: 1000 case IP_MULTICAST_LOOP: 1001 case IP_ADD_MEMBERSHIP: 1002 case IP_DROP_MEMBERSHIP: 1003 error = ip_setmoptions(optname, &inp->inp_moptions, m); 1004 break; 1005 1006 case IP_PORTRANGE: 1007 if (m == 0 || m->m_len != sizeof(int)) 1008 error = EINVAL; 1009 else { 1010 optval = *mtod(m, int *); 1011 1012 switch (optval) { 1013 1014 case IP_PORTRANGE_DEFAULT: 1015 inp->inp_flags &= ~(INP_LOWPORT); 1016 inp->inp_flags &= ~(INP_HIGHPORT); 1017 break; 1018 1019 case IP_PORTRANGE_HIGH: 1020 inp->inp_flags &= ~(INP_LOWPORT); 1021 inp->inp_flags |= INP_HIGHPORT; 1022 break; 1023 1024 case IP_PORTRANGE_LOW: 1025 inp->inp_flags &= ~(INP_HIGHPORT); 1026 inp->inp_flags |= INP_LOWPORT; 1027 break; 1028 1029 default: 1030 1031 error = EINVAL; 1032 break; 1033 } 1034 } 1035 break; 1036 case IP_AUTH_LEVEL: 1037 case IP_ESP_TRANS_LEVEL: 1038 case IP_ESP_NETWORK_LEVEL: 1039 case IP_IPCOMP_LEVEL: 1040 #ifndef IPSEC 1041 error = EOPNOTSUPP; 1042 #else 1043 if (m == 0 || m->m_len != sizeof(int)) { 1044 error = EINVAL; 1045 break; 1046 } 1047 optval = *mtod(m, int *); 1048 1049 if (optval < IPSEC_LEVEL_BYPASS || 1050 optval > IPSEC_LEVEL_UNIQUE) { 1051 error = EINVAL; 1052 break; 1053 } 1054 1055 /* Unlink cached output TDB to force a re-search */ 1056 if (inp->inp_tdb_out) { 1057 int s = spltdb(); 1058 TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, 1059 inp, inp_tdb_out_next); 1060 splx(s); 1061 } 1062 1063 if (inp->inp_tdb_in) { 1064 int s = spltdb(); 1065 TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in, 1066 inp, inp_tdb_in_next); 1067 splx(s); 1068 } 1069 1070 switch (optname) { 1071 case IP_AUTH_LEVEL: 1072 if (optval < ipsec_auth_default_level && 1073 suser(p->p_ucred, &p->p_acflag)) { 1074 error = EACCES; 1075 break; 1076 } 1077 inp->inp_seclevel[SL_AUTH] = optval; 1078 break; 1079 1080 case IP_ESP_TRANS_LEVEL: 1081 if (optval < ipsec_esp_trans_default_level && 1082 suser(p->p_ucred, &p->p_acflag)) { 1083 error = EACCES; 1084 break; 1085 } 1086 inp->inp_seclevel[SL_ESP_TRANS] = optval; 1087 break; 1088 1089 case IP_ESP_NETWORK_LEVEL: 1090 if (optval < ipsec_esp_network_default_level && 1091 suser(p->p_ucred, &p->p_acflag)) { 1092 error = EACCES; 1093 break; 1094 } 1095 inp->inp_seclevel[SL_ESP_NETWORK] = optval; 1096 break; 1097 case IP_IPCOMP_LEVEL: 1098 if (optval < ipsec_ipcomp_default_level && 1099 suser(p->p_ucred, &p->p_acflag)) { 1100 error = EACCES; 1101 break; 1102 } 1103 inp->inp_seclevel[SL_IPCOMP] = optval; 1104 break; 1105 } 1106 if (!error) 1107 inp->inp_secrequire = get_sa_require(inp); 1108 #endif 1109 break; 1110 1111 case IP_IPSEC_REMOTE_CRED: 1112 case IP_IPSEC_REMOTE_AUTH: 1113 /* Can't set the remote credential or key */ 1114 error = EOPNOTSUPP; 1115 break; 1116 1117 case IP_IPSEC_LOCAL_ID: 1118 case IP_IPSEC_REMOTE_ID: 1119 case IP_IPSEC_LOCAL_CRED: 1120 case IP_IPSEC_LOCAL_AUTH: 1121 #ifndef IPSEC 1122 error = EOPNOTSUPP; 1123 #else 1124 if (m->m_len < 2) { 1125 error = EINVAL; 1126 break; 1127 } 1128 1129 m_copydata(m, 0, 2, (caddr_t) &opt16val); 1130 1131 /* If the type is 0, then we cleanup and return */ 1132 if (opt16val == 0) { 1133 switch (optname) { 1134 case IP_IPSEC_LOCAL_ID: 1135 if (inp->inp_ipsec_localid != NULL) 1136 ipsp_reffree(inp->inp_ipsec_localid); 1137 inp->inp_ipsec_localid = NULL; 1138 break; 1139 1140 case IP_IPSEC_REMOTE_ID: 1141 if (inp->inp_ipsec_remoteid != NULL) 1142 ipsp_reffree(inp->inp_ipsec_remoteid); 1143 inp->inp_ipsec_remoteid = NULL; 1144 break; 1145 1146 case IP_IPSEC_LOCAL_CRED: 1147 if (inp->inp_ipsec_localcred != NULL) 1148 ipsp_reffree(inp->inp_ipsec_localcred); 1149 inp->inp_ipsec_localcred = NULL; 1150 break; 1151 1152 case IP_IPSEC_LOCAL_AUTH: 1153 if (inp->inp_ipsec_localauth != NULL) 1154 ipsp_reffree(inp->inp_ipsec_localauth); 1155 inp->inp_ipsec_localauth = NULL; 1156 break; 1157 } 1158 1159 error = 0; 1160 break; 1161 } 1162 1163 /* Can't have an empty payload */ 1164 if (m->m_len == 2) { 1165 error = EINVAL; 1166 break; 1167 } 1168 1169 MALLOC(ipr, struct ipsec_ref *, 1170 sizeof(struct ipsec_ref) + m->m_len - 2, 1171 M_CREDENTIALS, M_NOWAIT); 1172 if (ipr == NULL) { 1173 error = ENOBUFS; 1174 break; 1175 } 1176 ipr->ref_count = 1; 1177 ipr->ref_malloctype = M_CREDENTIALS; 1178 ipr->ref_len = m->m_len - 2; 1179 ipr->ref_type = opt16val; 1180 m_copydata(m, 2, m->m_len - 2, (caddr_t)(ipr + 1)); 1181 1182 switch (optname) { 1183 case IP_IPSEC_LOCAL_ID: 1184 /* Check valid types and NUL-termination */ 1185 if (ipr->ref_type < IPSP_IDENTITY_PREFIX || 1186 ipr->ref_type > IPSP_IDENTITY_CONNECTION || 1187 ((char *)(ipr + 1))[ipr->ref_len - 1]) { 1188 FREE(ipr, M_CREDENTIALS); 1189 error = EINVAL; 1190 } else { 1191 if (inp->inp_ipsec_localid != NULL) 1192 ipsp_reffree(inp->inp_ipsec_localid); 1193 inp->inp_ipsec_localid = ipr; 1194 } 1195 break; 1196 case IP_IPSEC_REMOTE_ID: 1197 /* Check valid types and NUL-termination */ 1198 if (ipr->ref_type < IPSP_IDENTITY_PREFIX || 1199 ipr->ref_type > IPSP_IDENTITY_CONNECTION || 1200 ((char *)(ipr + 1))[ipr->ref_len - 1]) { 1201 FREE(ipr, M_CREDENTIALS); 1202 error = EINVAL; 1203 } else { 1204 if (inp->inp_ipsec_remoteid != NULL) 1205 ipsp_reffree(inp->inp_ipsec_remoteid); 1206 inp->inp_ipsec_remoteid = ipr; 1207 } 1208 break; 1209 case IP_IPSEC_LOCAL_CRED: 1210 if (ipr->ref_type < IPSP_CRED_KEYNOTE || 1211 ipr->ref_type > IPSP_CRED_X509) { 1212 FREE(ipr, M_CREDENTIALS); 1213 error = EINVAL; 1214 } else { 1215 if (inp->inp_ipsec_localcred != NULL) 1216 ipsp_reffree(inp->inp_ipsec_localcred); 1217 inp->inp_ipsec_localcred = ipr; 1218 } 1219 break; 1220 case IP_IPSEC_LOCAL_AUTH: 1221 if (ipr->ref_type < IPSP_AUTH_PASSPHRASE || 1222 ipr->ref_type > IPSP_AUTH_RSA) { 1223 FREE(ipr, M_CREDENTIALS); 1224 error = EINVAL; 1225 } else { 1226 if (inp->inp_ipsec_localauth != NULL) 1227 ipsp_reffree(inp->inp_ipsec_localauth); 1228 inp->inp_ipsec_localauth = ipr; 1229 } 1230 break; 1231 } 1232 1233 /* Unlink cached output TDB to force a re-search */ 1234 if (inp->inp_tdb_out) { 1235 int s = spltdb(); 1236 TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, 1237 inp, inp_tdb_out_next); 1238 splx(s); 1239 } 1240 1241 if (inp->inp_tdb_in) { 1242 int s = spltdb(); 1243 TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in, 1244 inp, inp_tdb_in_next); 1245 splx(s); 1246 } 1247 #endif 1248 break; 1249 default: 1250 error = ENOPROTOOPT; 1251 break; 1252 } 1253 if (m) 1254 (void)m_free(m); 1255 break; 1256 1257 case PRCO_GETOPT: 1258 switch (optname) { 1259 case IP_OPTIONS: 1260 case IP_RETOPTS: 1261 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1262 if (inp->inp_options) { 1263 m->m_len = inp->inp_options->m_len; 1264 bcopy(mtod(inp->inp_options, caddr_t), 1265 mtod(m, caddr_t), (unsigned)m->m_len); 1266 } else 1267 m->m_len = 0; 1268 break; 1269 1270 case IP_TOS: 1271 case IP_TTL: 1272 case IP_RECVOPTS: 1273 case IP_RECVRETOPTS: 1274 case IP_RECVDSTADDR: 1275 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1276 m->m_len = sizeof(int); 1277 switch (optname) { 1278 1279 case IP_TOS: 1280 optval = inp->inp_ip.ip_tos; 1281 break; 1282 1283 case IP_TTL: 1284 optval = inp->inp_ip.ip_ttl; 1285 break; 1286 1287 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1288 1289 case IP_RECVOPTS: 1290 optval = OPTBIT(INP_RECVOPTS); 1291 break; 1292 1293 case IP_RECVRETOPTS: 1294 optval = OPTBIT(INP_RECVRETOPTS); 1295 break; 1296 1297 case IP_RECVDSTADDR: 1298 optval = OPTBIT(INP_RECVDSTADDR); 1299 break; 1300 } 1301 *mtod(m, int *) = optval; 1302 break; 1303 1304 case IP_MULTICAST_IF: 1305 case IP_MULTICAST_TTL: 1306 case IP_MULTICAST_LOOP: 1307 case IP_ADD_MEMBERSHIP: 1308 case IP_DROP_MEMBERSHIP: 1309 error = ip_getmoptions(optname, inp->inp_moptions, mp); 1310 break; 1311 1312 case IP_PORTRANGE: 1313 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1314 m->m_len = sizeof(int); 1315 1316 if (inp->inp_flags & INP_HIGHPORT) 1317 optval = IP_PORTRANGE_HIGH; 1318 else if (inp->inp_flags & INP_LOWPORT) 1319 optval = IP_PORTRANGE_LOW; 1320 else 1321 optval = 0; 1322 1323 *mtod(m, int *) = optval; 1324 break; 1325 1326 case IP_AUTH_LEVEL: 1327 case IP_ESP_TRANS_LEVEL: 1328 case IP_ESP_NETWORK_LEVEL: 1329 case IP_IPCOMP_LEVEL: 1330 #ifndef IPSEC 1331 m->m_len = sizeof(int); 1332 *mtod(m, int *) = IPSEC_LEVEL_NONE; 1333 #else 1334 m->m_len = sizeof(int); 1335 switch (optname) { 1336 case IP_AUTH_LEVEL: 1337 optval = inp->inp_seclevel[SL_AUTH]; 1338 break; 1339 1340 case IP_ESP_TRANS_LEVEL: 1341 optval = inp->inp_seclevel[SL_ESP_TRANS]; 1342 break; 1343 1344 case IP_ESP_NETWORK_LEVEL: 1345 optval = inp->inp_seclevel[SL_ESP_NETWORK]; 1346 break; 1347 case IP_IPCOMP_LEVEL: 1348 optval = inp->inp_seclevel[SL_IPCOMP]; 1349 break; 1350 } 1351 *mtod(m, int *) = optval; 1352 #endif 1353 break; 1354 case IP_IPSEC_LOCAL_ID: 1355 case IP_IPSEC_REMOTE_ID: 1356 case IP_IPSEC_LOCAL_CRED: 1357 case IP_IPSEC_REMOTE_CRED: 1358 case IP_IPSEC_LOCAL_AUTH: 1359 case IP_IPSEC_REMOTE_AUTH: 1360 #ifndef IPSEC 1361 error = EOPNOTSUPP; 1362 #else 1363 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1364 m->m_len = sizeof(u_int16_t); 1365 switch (optname) { 1366 case IP_IPSEC_LOCAL_ID: 1367 ipr = inp->inp_ipsec_localid; 1368 opt16val = IPSP_IDENTITY_NONE; 1369 break; 1370 case IP_IPSEC_REMOTE_ID: 1371 ipr = inp->inp_ipsec_remoteid; 1372 opt16val = IPSP_IDENTITY_NONE; 1373 break; 1374 case IP_IPSEC_LOCAL_CRED: 1375 ipr = inp->inp_ipsec_localcred; 1376 opt16val = IPSP_CRED_NONE; 1377 break; 1378 case IP_IPSEC_REMOTE_CRED: 1379 ipr = inp->inp_ipsec_remotecred; 1380 opt16val = IPSP_CRED_NONE; 1381 break; 1382 case IP_IPSEC_LOCAL_AUTH: 1383 ipr = inp->inp_ipsec_localauth; 1384 break; 1385 case IP_IPSEC_REMOTE_AUTH: 1386 ipr = inp->inp_ipsec_remoteauth; 1387 break; 1388 } 1389 if (ipr == NULL) 1390 *mtod(m, u_int16_t *) = opt16val; 1391 else { 1392 m->m_len += ipr->ref_len; 1393 *mtod(m, u_int16_t *) = ipr->ref_type; 1394 m_copyback(m, sizeof(u_int16_t), ipr->ref_len, 1395 (caddr_t)(ipr + 1)); 1396 } 1397 #endif 1398 break; 1399 default: 1400 error = ENOPROTOOPT; 1401 break; 1402 } 1403 break; 1404 } 1405 return (error); 1406 } 1407 1408 /* 1409 * Set up IP options in pcb for insertion in output packets. 1410 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1411 * with destination address if source routed. 1412 */ 1413 int 1414 #ifdef notyet 1415 ip_pcbopts(optname, pcbopt, m) 1416 int optname; 1417 #else 1418 ip_pcbopts(pcbopt, m) 1419 #endif 1420 struct mbuf **pcbopt; 1421 register struct mbuf *m; 1422 { 1423 register int cnt, optlen; 1424 register u_char *cp; 1425 u_char opt; 1426 1427 /* turn off any old options */ 1428 if (*pcbopt) 1429 (void)m_free(*pcbopt); 1430 *pcbopt = 0; 1431 if (m == (struct mbuf *)0 || m->m_len == 0) { 1432 /* 1433 * Only turning off any previous options. 1434 */ 1435 if (m) 1436 (void)m_free(m); 1437 return (0); 1438 } 1439 1440 #ifndef vax 1441 if (m->m_len % sizeof(int32_t)) 1442 goto bad; 1443 #endif 1444 /* 1445 * IP first-hop destination address will be stored before 1446 * actual options; move other options back 1447 * and clear it when none present. 1448 */ 1449 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1450 goto bad; 1451 cnt = m->m_len; 1452 m->m_len += sizeof(struct in_addr); 1453 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1454 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt); 1455 bzero(mtod(m, caddr_t), sizeof(struct in_addr)); 1456 1457 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1458 opt = cp[IPOPT_OPTVAL]; 1459 if (opt == IPOPT_EOL) 1460 break; 1461 if (opt == IPOPT_NOP) 1462 optlen = 1; 1463 else { 1464 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1465 goto bad; 1466 optlen = cp[IPOPT_OLEN]; 1467 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1468 goto bad; 1469 } 1470 switch (opt) { 1471 1472 default: 1473 break; 1474 1475 case IPOPT_LSRR: 1476 case IPOPT_SSRR: 1477 /* 1478 * user process specifies route as: 1479 * ->A->B->C->D 1480 * D must be our final destination (but we can't 1481 * check that since we may not have connected yet). 1482 * A is first hop destination, which doesn't appear in 1483 * actual IP option, but is stored before the options. 1484 */ 1485 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1486 goto bad; 1487 m->m_len -= sizeof(struct in_addr); 1488 cnt -= sizeof(struct in_addr); 1489 optlen -= sizeof(struct in_addr); 1490 cp[IPOPT_OLEN] = optlen; 1491 /* 1492 * Move first hop before start of options. 1493 */ 1494 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), 1495 sizeof(struct in_addr)); 1496 /* 1497 * Then copy rest of options back 1498 * to close up the deleted entry. 1499 */ 1500 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] + 1501 sizeof(struct in_addr)), 1502 (caddr_t)&cp[IPOPT_OFFSET+1], 1503 (unsigned)cnt + sizeof(struct in_addr)); 1504 break; 1505 } 1506 } 1507 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1508 goto bad; 1509 *pcbopt = m; 1510 return (0); 1511 1512 bad: 1513 (void)m_free(m); 1514 return (EINVAL); 1515 } 1516 1517 /* 1518 * Set the IP multicast options in response to user setsockopt(). 1519 */ 1520 int 1521 ip_setmoptions(optname, imop, m) 1522 int optname; 1523 struct ip_moptions **imop; 1524 struct mbuf *m; 1525 { 1526 register int error = 0; 1527 u_char loop; 1528 register int i; 1529 struct in_addr addr; 1530 register struct ip_mreq *mreq; 1531 register struct ifnet *ifp; 1532 register struct ip_moptions *imo = *imop; 1533 struct route ro; 1534 register struct sockaddr_in *dst; 1535 1536 if (imo == NULL) { 1537 /* 1538 * No multicast option buffer attached to the pcb; 1539 * allocate one and initialize to default values. 1540 */ 1541 imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS, 1542 M_WAITOK); 1543 1544 *imop = imo; 1545 imo->imo_multicast_ifp = NULL; 1546 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1547 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1548 imo->imo_num_memberships = 0; 1549 } 1550 1551 switch (optname) { 1552 1553 case IP_MULTICAST_IF: 1554 /* 1555 * Select the interface for outgoing multicast packets. 1556 */ 1557 if (m == NULL || m->m_len != sizeof(struct in_addr)) { 1558 error = EINVAL; 1559 break; 1560 } 1561 addr = *(mtod(m, struct in_addr *)); 1562 /* 1563 * INADDR_ANY is used to remove a previous selection. 1564 * When no interface is selected, a default one is 1565 * chosen every time a multicast packet is sent. 1566 */ 1567 if (addr.s_addr == INADDR_ANY) { 1568 imo->imo_multicast_ifp = NULL; 1569 break; 1570 } 1571 /* 1572 * The selected interface is identified by its local 1573 * IP address. Find the interface and confirm that 1574 * it supports multicasting. 1575 */ 1576 INADDR_TO_IFP(addr, ifp); 1577 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1578 error = EADDRNOTAVAIL; 1579 break; 1580 } 1581 imo->imo_multicast_ifp = ifp; 1582 break; 1583 1584 case IP_MULTICAST_TTL: 1585 /* 1586 * Set the IP time-to-live for outgoing multicast packets. 1587 */ 1588 if (m == NULL || m->m_len != 1) { 1589 error = EINVAL; 1590 break; 1591 } 1592 imo->imo_multicast_ttl = *(mtod(m, u_char *)); 1593 break; 1594 1595 case IP_MULTICAST_LOOP: 1596 /* 1597 * Set the loopback flag for outgoing multicast packets. 1598 * Must be zero or one. 1599 */ 1600 if (m == NULL || m->m_len != 1 || 1601 (loop = *(mtod(m, u_char *))) > 1) { 1602 error = EINVAL; 1603 break; 1604 } 1605 imo->imo_multicast_loop = loop; 1606 break; 1607 1608 case IP_ADD_MEMBERSHIP: 1609 /* 1610 * Add a multicast group membership. 1611 * Group must be a valid IP multicast address. 1612 */ 1613 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1614 error = EINVAL; 1615 break; 1616 } 1617 mreq = mtod(m, struct ip_mreq *); 1618 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1619 error = EINVAL; 1620 break; 1621 } 1622 /* 1623 * If no interface address was provided, use the interface of 1624 * the route to the given multicast address. 1625 */ 1626 if (mreq->imr_interface.s_addr == INADDR_ANY) { 1627 ro.ro_rt = NULL; 1628 dst = satosin(&ro.ro_dst); 1629 dst->sin_len = sizeof(*dst); 1630 dst->sin_family = AF_INET; 1631 dst->sin_addr = mreq->imr_multiaddr; 1632 rtalloc(&ro); 1633 if (ro.ro_rt == NULL) { 1634 error = EADDRNOTAVAIL; 1635 break; 1636 } 1637 ifp = ro.ro_rt->rt_ifp; 1638 rtfree(ro.ro_rt); 1639 } else { 1640 INADDR_TO_IFP(mreq->imr_interface, ifp); 1641 } 1642 /* 1643 * See if we found an interface, and confirm that it 1644 * supports multicast. 1645 */ 1646 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1647 error = EADDRNOTAVAIL; 1648 break; 1649 } 1650 /* 1651 * See if the membership already exists or if all the 1652 * membership slots are full. 1653 */ 1654 for (i = 0; i < imo->imo_num_memberships; ++i) { 1655 if (imo->imo_membership[i]->inm_ifp == ifp && 1656 imo->imo_membership[i]->inm_addr.s_addr 1657 == mreq->imr_multiaddr.s_addr) 1658 break; 1659 } 1660 if (i < imo->imo_num_memberships) { 1661 error = EADDRINUSE; 1662 break; 1663 } 1664 if (i == IP_MAX_MEMBERSHIPS) { 1665 error = ETOOMANYREFS; 1666 break; 1667 } 1668 /* 1669 * Everything looks good; add a new record to the multicast 1670 * address list for the given interface. 1671 */ 1672 if ((imo->imo_membership[i] = 1673 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) { 1674 error = ENOBUFS; 1675 break; 1676 } 1677 ++imo->imo_num_memberships; 1678 break; 1679 1680 case IP_DROP_MEMBERSHIP: 1681 /* 1682 * Drop a multicast group membership. 1683 * Group must be a valid IP multicast address. 1684 */ 1685 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1686 error = EINVAL; 1687 break; 1688 } 1689 mreq = mtod(m, struct ip_mreq *); 1690 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1691 error = EINVAL; 1692 break; 1693 } 1694 /* 1695 * If an interface address was specified, get a pointer 1696 * to its ifnet structure. 1697 */ 1698 if (mreq->imr_interface.s_addr == INADDR_ANY) 1699 ifp = NULL; 1700 else { 1701 INADDR_TO_IFP(mreq->imr_interface, ifp); 1702 if (ifp == NULL) { 1703 error = EADDRNOTAVAIL; 1704 break; 1705 } 1706 } 1707 /* 1708 * Find the membership in the membership array. 1709 */ 1710 for (i = 0; i < imo->imo_num_memberships; ++i) { 1711 if ((ifp == NULL || 1712 imo->imo_membership[i]->inm_ifp == ifp) && 1713 imo->imo_membership[i]->inm_addr.s_addr == 1714 mreq->imr_multiaddr.s_addr) 1715 break; 1716 } 1717 if (i == imo->imo_num_memberships) { 1718 error = EADDRNOTAVAIL; 1719 break; 1720 } 1721 /* 1722 * Give up the multicast address record to which the 1723 * membership points. 1724 */ 1725 in_delmulti(imo->imo_membership[i]); 1726 /* 1727 * Remove the gap in the membership array. 1728 */ 1729 for (++i; i < imo->imo_num_memberships; ++i) 1730 imo->imo_membership[i-1] = imo->imo_membership[i]; 1731 --imo->imo_num_memberships; 1732 break; 1733 1734 default: 1735 error = EOPNOTSUPP; 1736 break; 1737 } 1738 1739 /* 1740 * If all options have default values, no need to keep the mbuf. 1741 */ 1742 if (imo->imo_multicast_ifp == NULL && 1743 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 1744 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 1745 imo->imo_num_memberships == 0) { 1746 free(*imop, M_IPMOPTS); 1747 *imop = NULL; 1748 } 1749 1750 return (error); 1751 } 1752 1753 /* 1754 * Return the IP multicast options in response to user getsockopt(). 1755 */ 1756 int 1757 ip_getmoptions(optname, imo, mp) 1758 int optname; 1759 register struct ip_moptions *imo; 1760 register struct mbuf **mp; 1761 { 1762 u_char *ttl; 1763 u_char *loop; 1764 struct in_addr *addr; 1765 struct in_ifaddr *ia; 1766 1767 *mp = m_get(M_WAIT, MT_SOOPTS); 1768 1769 switch (optname) { 1770 1771 case IP_MULTICAST_IF: 1772 addr = mtod(*mp, struct in_addr *); 1773 (*mp)->m_len = sizeof(struct in_addr); 1774 if (imo == NULL || imo->imo_multicast_ifp == NULL) 1775 addr->s_addr = INADDR_ANY; 1776 else { 1777 IFP_TO_IA(imo->imo_multicast_ifp, ia); 1778 addr->s_addr = (ia == NULL) ? INADDR_ANY 1779 : ia->ia_addr.sin_addr.s_addr; 1780 } 1781 return (0); 1782 1783 case IP_MULTICAST_TTL: 1784 ttl = mtod(*mp, u_char *); 1785 (*mp)->m_len = 1; 1786 *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL 1787 : imo->imo_multicast_ttl; 1788 return (0); 1789 1790 case IP_MULTICAST_LOOP: 1791 loop = mtod(*mp, u_char *); 1792 (*mp)->m_len = 1; 1793 *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP 1794 : imo->imo_multicast_loop; 1795 return (0); 1796 1797 default: 1798 return (EOPNOTSUPP); 1799 } 1800 } 1801 1802 /* 1803 * Discard the IP multicast options. 1804 */ 1805 void 1806 ip_freemoptions(imo) 1807 register struct ip_moptions *imo; 1808 { 1809 register int i; 1810 1811 if (imo != NULL) { 1812 for (i = 0; i < imo->imo_num_memberships; ++i) 1813 in_delmulti(imo->imo_membership[i]); 1814 free(imo, M_IPMOPTS); 1815 } 1816 } 1817 1818 /* 1819 * Routine called from ip_output() to loop back a copy of an IP multicast 1820 * packet to the input queue of a specified interface. Note that this 1821 * calls the output routine of the loopback "driver", but with an interface 1822 * pointer that might NOT be &loif -- easier than replicating that code here. 1823 */ 1824 static void 1825 ip_mloopback(ifp, m, dst) 1826 struct ifnet *ifp; 1827 register struct mbuf *m; 1828 register struct sockaddr_in *dst; 1829 { 1830 register struct ip *ip; 1831 struct mbuf *copym; 1832 1833 copym = m_copym2(m, 0, M_COPYALL, M_DONTWAIT); 1834 if (copym != NULL) { 1835 /* 1836 * We don't bother to fragment if the IP length is greater 1837 * than the interface's MTU. Can this possibly matter? 1838 */ 1839 ip = mtod(copym, struct ip *); 1840 ip->ip_len = htons((u_int16_t)ip->ip_len); 1841 ip->ip_off = htons((u_int16_t)ip->ip_off); 1842 ip->ip_sum = 0; 1843 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2); 1844 (void) looutput(ifp, copym, sintosa(dst), NULL); 1845 } 1846 } 1847 1848 /* 1849 * Process a delayed payload checksum calculation. 1850 */ 1851 void 1852 in_delayed_cksum(struct mbuf *m) 1853 { 1854 struct ip *ip; 1855 u_int16_t csum, offset; 1856 1857 ip = mtod(m, struct ip *); 1858 offset = ip->ip_hl << 2; 1859 csum = in4_cksum(m, 0, offset, m->m_pkthdr.len - offset); 1860 if (csum == 0 && ip->ip_p == IPPROTO_UDP) 1861 csum = 0xffff; 1862 1863 switch (ip->ip_p) { 1864 case IPPROTO_TCP: 1865 offset += offsetof(struct tcphdr, th_sum); 1866 break; 1867 1868 case IPPROTO_UDP: 1869 offset += offsetof(struct udphdr, uh_sum); 1870 break; 1871 1872 default: 1873 return; 1874 } 1875 1876 if ((offset + sizeof(u_int16_t)) > m->m_len) 1877 m_copyback(m, offset, sizeof(csum), (caddr_t) &csum); 1878 else 1879 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 1880 } 1881