1 /* $OpenBSD: ip_output.c,v 1.137 2001/08/26 21:12:06 niklas Exp $ */ 2 /* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 37 */ 38 39 #include "pf.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/mbuf.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/proc.h> 48 #include <sys/kernel.h> 49 50 #include <net/if.h> 51 #include <net/if_enc.h> 52 #include <net/route.h> 53 54 #include <netinet/in.h> 55 #include <netinet/in_systm.h> 56 #include <netinet/ip.h> 57 #include <netinet/in_pcb.h> 58 #include <netinet/in_var.h> 59 #include <netinet/ip_var.h> 60 #include <netinet/ip_icmp.h> 61 #include <netinet/tcp.h> 62 #include <netinet/udp.h> 63 #include <netinet/tcp_timer.h> 64 #include <netinet/tcp_var.h> 65 #include <netinet/udp_var.h> 66 67 #if NPF > 0 68 #include <net/pfvar.h> 69 #endif 70 71 #ifdef vax 72 #include <machine/mtpr.h> 73 #endif 74 75 #ifdef IPSEC 76 #ifdef ENCDEBUG 77 #define DPRINTF(x) do { if (encdebug) printf x ; } while (0) 78 #else 79 #define DPRINTF(x) 80 #endif 81 82 extern u_int8_t get_sa_require __P((struct inpcb *)); 83 84 extern int ipsec_auth_default_level; 85 extern int ipsec_esp_trans_default_level; 86 extern int ipsec_esp_network_default_level; 87 extern int ipsec_ipcomp_default_level; 88 #endif /* IPSEC */ 89 90 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *)); 91 static void ip_mloopback 92 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *)); 93 94 /* 95 * IP output. The packet in mbuf chain m contains a skeletal IP 96 * header (with len, off, ttl, proto, tos, src, dst). 97 * The mbuf chain containing the packet will be freed. 98 * The mbuf opt, if present, will not be freed. 99 */ 100 int 101 #if __STDC__ 102 ip_output(struct mbuf *m0, ...) 103 #else 104 ip_output(m0, va_alist) 105 struct mbuf *m0; 106 va_dcl 107 #endif 108 { 109 register struct ip *ip, *mhip; 110 register struct ifnet *ifp; 111 struct mbuf *m = m0; 112 register int hlen = sizeof (struct ip); 113 int len, off, error = 0; 114 struct route iproute; 115 struct sockaddr_in *dst; 116 struct in_ifaddr *ia; 117 struct mbuf *opt; 118 struct route *ro; 119 int flags; 120 struct ip_moptions *imo; 121 va_list ap; 122 u_int8_t sproto = 0, donerouting = 0; 123 #ifdef IPSEC 124 u_int32_t icmp_mtu = 0; 125 union sockaddr_union sdst; 126 u_int32_t sspi; 127 struct m_tag *mtag; 128 struct tdb_ident *tdbi; 129 130 struct inpcb *inp; 131 struct tdb *tdb; 132 int s; 133 #endif /* IPSEC */ 134 135 va_start(ap, m0); 136 opt = va_arg(ap, struct mbuf *); 137 ro = va_arg(ap, struct route *); 138 flags = va_arg(ap, int); 139 imo = va_arg(ap, struct ip_moptions *); 140 #ifdef IPSEC 141 inp = va_arg(ap, struct inpcb *); 142 if (inp && (inp->inp_flags & INP_IPV6) != 0) 143 panic("ip_output: IPv6 pcb is passed"); 144 #endif /* IPSEC */ 145 va_end(ap); 146 147 #ifdef DIAGNOSTIC 148 if ((m->m_flags & M_PKTHDR) == 0) 149 panic("ip_output no HDR"); 150 #endif 151 if (opt) { 152 m = ip_insertoptions(m, opt, &len); 153 hlen = len; 154 } 155 156 ip = mtod(m, struct ip *); 157 158 /* 159 * Fill in IP header. 160 */ 161 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 162 ip->ip_v = IPVERSION; 163 ip->ip_off &= IP_DF; 164 ip->ip_id = htons(ip_randomid()); 165 ip->ip_hl = hlen >> 2; 166 ipstat.ips_localout++; 167 } else { 168 hlen = ip->ip_hl << 2; 169 } 170 171 /* 172 * If we're missing the IP source address, do a route lookup. We'll 173 * remember this result, in case we don't need to do any IPsec 174 * processing on the packet. We need the source address so we can 175 * do an SPD lookup in IPsec; for most packets, the source address 176 * is set at a higher level protocol. ICMPs and other packets 177 * though (e.g., traceroute) have a source address of zeroes. 178 */ 179 if (ip->ip_src.s_addr == INADDR_ANY) { 180 donerouting = 1; 181 182 if (ro == 0) { 183 ro = &iproute; 184 bzero((caddr_t)ro, sizeof (*ro)); 185 } 186 187 dst = satosin(&ro->ro_dst); 188 189 /* 190 * If there is a cached route, check that it is to the same 191 * destination and is still up. If not, free it and try again. 192 */ 193 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 194 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 195 RTFREE(ro->ro_rt); 196 ro->ro_rt = (struct rtentry *)0; 197 } 198 199 if (ro->ro_rt == 0) { 200 dst->sin_family = AF_INET; 201 dst->sin_len = sizeof(*dst); 202 dst->sin_addr = ip->ip_dst; 203 } 204 205 /* 206 * If routing to interface only, short-circuit routing lookup. 207 */ 208 if (flags & IP_ROUTETOIF) { 209 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && 210 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { 211 ipstat.ips_noroute++; 212 error = ENETUNREACH; 213 goto bad; 214 } 215 216 ifp = ia->ia_ifp; 217 ip->ip_ttl = 1; 218 } else { 219 if (ro->ro_rt == 0) 220 rtalloc(ro); 221 222 if (ro->ro_rt == 0) { 223 ipstat.ips_noroute++; 224 error = EHOSTUNREACH; 225 goto bad; 226 } 227 228 ia = ifatoia(ro->ro_rt->rt_ifa); 229 ifp = ro->ro_rt->rt_ifp; 230 ro->ro_rt->rt_use++; 231 232 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 233 dst = satosin(ro->ro_rt->rt_gateway); 234 } 235 236 /* Set the source IP address */ 237 if (!IN_MULTICAST(ip->ip_dst.s_addr)) 238 ip->ip_src = ia->ia_addr.sin_addr; 239 } 240 241 #ifdef IPSEC 242 /* 243 * splnet is chosen over spltdb because we are not allowed to 244 * lower the level, and udp_output calls us in splnet(). 245 */ 246 s = splnet(); 247 248 /* Do we have any pending SAs to apply ? */ 249 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); 250 if (mtag != NULL) { 251 #ifdef DIAGNOSTIC 252 if (mtag->m_tag_len != sizeof (struct tdb_ident)) 253 panic("ip_output: tag of length %d (should be %d", 254 mtag->m_tag_len, sizeof (struct tdb_ident)); 255 #endif 256 tdbi = (struct tdb_ident *)(mtag + 1); 257 tdb = gettdb(tdbi->spi, &tdbi->dst, tdbi->proto); 258 if (tdb == NULL) 259 error = -EINVAL; 260 m_tag_delete(m, mtag); 261 } 262 else 263 tdb = ipsp_spd_lookup(m, AF_INET, hlen, &error, 264 IPSP_DIRECTION_OUT, NULL, inp); 265 266 if (tdb == NULL) { 267 splx(s); 268 269 if (error == 0) { 270 /* 271 * No IPsec processing required, we'll just send the 272 * packet out. 273 */ 274 sproto = 0; 275 276 /* Fall through to routing/multicast handling */ 277 } else { 278 /* 279 * -EINVAL is used to indicate that the packet should 280 * be silently dropped, typically because we've asked 281 * key management for an SA. 282 */ 283 if (error == -EINVAL) /* Should silently drop packet */ 284 error = 0; 285 286 m_freem(m); 287 goto done; 288 } 289 } else { 290 /* Loop detection */ 291 for (mtag = m_tag_first(m); mtag != NULL; 292 mtag = m_tag_next(m, mtag)) { 293 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && 294 mtag->m_tag_id != 295 PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) 296 continue; 297 tdbi = (struct tdb_ident *)(mtag + 1); 298 if (tdbi->spi == tdb->tdb_spi && 299 tdbi->proto == tdb->tdb_sproto && 300 !bcmp(&tdbi->dst, &tdb->tdb_dst, 301 sizeof(union sockaddr_union))) { 302 splx(s); 303 sproto = 0; /* mark as no-IPsec-needed */ 304 goto done_spd; 305 } 306 } 307 308 /* We need to do IPsec */ 309 bcopy(&tdb->tdb_dst, &sdst, sizeof(sdst)); 310 sspi = tdb->tdb_spi; 311 sproto = tdb->tdb_sproto; 312 splx(s); 313 314 /* 315 * If it needs TCP/UDP hardware-checksumming, do the 316 * computation now. 317 */ 318 if (m->m_pkthdr.csum & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) { 319 in_delayed_cksum(m); 320 m->m_pkthdr.csum &= 321 ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT); 322 } 323 324 /* If it's not a multicast packet, try to fast-path */ 325 if (!IN_MULTICAST(ip->ip_dst.s_addr)) { 326 goto sendit; 327 } 328 } 329 330 /* Fall through to the routing/multicast handling code */ 331 done_spd: 332 #endif /* IPSEC */ 333 334 if (donerouting == 0) { 335 if (ro == 0) { 336 ro = &iproute; 337 bzero((caddr_t)ro, sizeof (*ro)); 338 } 339 340 dst = satosin(&ro->ro_dst); 341 342 /* 343 * If there is a cached route, check that it is to the same 344 * destination and is still up. If not, free it and try again. 345 */ 346 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 347 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 348 RTFREE(ro->ro_rt); 349 ro->ro_rt = (struct rtentry *)0; 350 } 351 352 if (ro->ro_rt == 0) { 353 dst->sin_family = AF_INET; 354 dst->sin_len = sizeof(*dst); 355 dst->sin_addr = ip->ip_dst; 356 } 357 358 /* 359 * If routing to interface only, short-circuit routing lookup. 360 */ 361 if (flags & IP_ROUTETOIF) { 362 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && 363 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { 364 ipstat.ips_noroute++; 365 error = ENETUNREACH; 366 goto bad; 367 } 368 369 ifp = ia->ia_ifp; 370 ip->ip_ttl = 1; 371 } else { 372 if (ro->ro_rt == 0) 373 rtalloc(ro); 374 375 if (ro->ro_rt == 0) { 376 ipstat.ips_noroute++; 377 error = EHOSTUNREACH; 378 goto bad; 379 } 380 381 ia = ifatoia(ro->ro_rt->rt_ifa); 382 ifp = ro->ro_rt->rt_ifp; 383 ro->ro_rt->rt_use++; 384 385 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 386 dst = satosin(ro->ro_rt->rt_gateway); 387 } 388 389 /* Set the source IP address */ 390 if (ip->ip_src.s_addr == INADDR_ANY) 391 ip->ip_src = ia->ia_addr.sin_addr; 392 } 393 394 if (IN_MULTICAST(ip->ip_dst.s_addr) || 395 (ip->ip_dst.s_addr == INADDR_BROADCAST)) { 396 struct in_multi *inm; 397 398 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ? 399 M_BCAST : M_MCAST; 400 401 /* 402 * IP destination address is multicast. Make sure "dst" 403 * still points to the address in "ro". (It may have been 404 * changed to point to a gateway address, above.) 405 */ 406 dst = satosin(&ro->ro_dst); 407 408 /* 409 * See if the caller provided any multicast options 410 */ 411 if (imo != NULL) { 412 ip->ip_ttl = imo->imo_multicast_ttl; 413 if (imo->imo_multicast_ifp != NULL) 414 ifp = imo->imo_multicast_ifp; 415 } else 416 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 417 418 /* 419 * Confirm that the outgoing interface supports multicast, 420 * but only if the packet actually is going out on that 421 * interface (i.e., no IPsec is applied). 422 */ 423 if ((((m->m_flags & M_MCAST) && 424 (ifp->if_flags & IFF_MULTICAST) == 0) || 425 ((m->m_flags & M_BCAST) && 426 (ifp->if_flags & IFF_BROADCAST) == 0)) && (sproto == 0)) { 427 ipstat.ips_noroute++; 428 error = ENETUNREACH; 429 goto bad; 430 } 431 432 /* 433 * If source address not specified yet, use address 434 * of outgoing interface. 435 */ 436 if (ip->ip_src.s_addr == INADDR_ANY) { 437 register struct in_ifaddr *ia; 438 439 for (ia = in_ifaddr.tqh_first; 440 ia; 441 ia = ia->ia_list.tqe_next) 442 if (ia->ia_ifp == ifp) { 443 ip->ip_src = ia->ia_addr.sin_addr; 444 break; 445 } 446 } 447 448 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm); 449 if (inm != NULL && 450 (imo == NULL || imo->imo_multicast_loop)) { 451 /* 452 * If we belong to the destination multicast group 453 * on the outgoing interface, and the caller did not 454 * forbid loopback, loop back a copy. 455 */ 456 ip_mloopback(ifp, m, dst); 457 } 458 #ifdef MROUTING 459 else { 460 /* 461 * If we are acting as a multicast router, perform 462 * multicast forwarding as if the packet had just 463 * arrived on the interface to which we are about 464 * to send. The multicast forwarding function 465 * recursively calls this function, using the 466 * IP_FORWARDING flag to prevent infinite recursion. 467 * 468 * Multicasts that are looped back by ip_mloopback(), 469 * above, will be forwarded by the ip_input() routine, 470 * if necessary. 471 */ 472 extern struct socket *ip_mrouter; 473 474 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 475 if (ip_mforward(m, ifp) != 0) { 476 m_freem(m); 477 goto done; 478 } 479 } 480 } 481 #endif 482 /* 483 * Multicasts with a time-to-live of zero may be looped- 484 * back, above, but must not be transmitted on a network. 485 * Also, multicasts addressed to the loopback interface 486 * are not sent -- the above call to ip_mloopback() will 487 * loop back a copy if this host actually belongs to the 488 * destination group on the loopback interface. 489 */ 490 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) { 491 m_freem(m); 492 goto done; 493 } 494 495 goto sendit; 496 } 497 498 /* 499 * Look for broadcast address and and verify user is allowed to send 500 * such a packet; if the packet is going in an IPsec tunnel, skip 501 * this check. 502 */ 503 if ((sproto == 0) && (in_broadcast(dst->sin_addr, ifp))) { 504 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 505 error = EADDRNOTAVAIL; 506 goto bad; 507 } 508 if ((flags & IP_ALLOWBROADCAST) == 0) { 509 error = EACCES; 510 goto bad; 511 } 512 513 /* Don't allow broadcast messages to be fragmented */ 514 if ((u_int16_t)ip->ip_len > ifp->if_mtu) { 515 error = EMSGSIZE; 516 goto bad; 517 } 518 m->m_flags |= M_BCAST; 519 } else 520 m->m_flags &= ~M_BCAST; 521 522 sendit: 523 /* 524 * If we're doing Path MTU discovery, we need to set DF unless 525 * the route's MTU is locked. 526 */ 527 if ((flags & IP_MTUDISC) && ro && ro->ro_rt && 528 (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0) 529 ip->ip_off |= IP_DF; 530 531 #ifdef IPSEC 532 /* 533 * Check if the packet needs encapsulation. 534 */ 535 if (sproto != 0) { 536 s = splnet(); 537 538 /* 539 * Packet filter 540 */ 541 #if NPF > 0 542 543 if (pf_test(PF_OUT, &encif[0].sc_if, &m) != PF_PASS) { 544 error = EHOSTUNREACH; 545 splx(s); 546 m_freem(m); 547 goto done; 548 } 549 ip = mtod(m, struct ip *); 550 hlen = ip->ip_hl << 2; 551 #endif 552 553 tdb = gettdb(sspi, &sdst, sproto); 554 if (tdb == NULL) { 555 error = EHOSTUNREACH; 556 splx(s); 557 m_freem(m); 558 goto done; 559 } 560 561 /* Latch to PCB */ 562 if (inp) 563 tdb_add_inp(tdb, inp, 0); 564 565 /* Check if we are allowed to fragment */ 566 if ((ip->ip_off & IP_DF) && tdb->tdb_mtu && 567 (u_int16_t)ip->ip_len > tdb->tdb_mtu && 568 tdb->tdb_mtutimeout > time.tv_sec) { 569 struct rtentry *rt = NULL; 570 571 icmp_mtu = tdb->tdb_mtu; 572 splx(s); 573 574 /* Find a host route to store the mtu in */ 575 if (ro != NULL) 576 rt = ro->ro_rt; 577 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) { 578 struct sockaddr_in dst = { 579 sizeof(struct sockaddr_in), AF_INET}; 580 dst.sin_addr = ip->ip_dst; 581 rt = icmp_mtudisc_clone((struct sockaddr *)&dst); 582 } 583 if (rt != NULL) { 584 rt->rt_rmx.rmx_mtu = icmp_mtu; 585 if (ro && ro->ro_rt != NULL) { 586 RTFREE(ro->ro_rt); 587 ro->ro_rt = (struct rtentry *) 0; 588 rtalloc(ro); 589 } 590 } 591 error = EMSGSIZE; 592 goto bad; 593 } 594 595 /* Massage the IP header for use by the IPsec code */ 596 ip->ip_len = htons((u_short) ip->ip_len); 597 ip->ip_off = htons((u_short) ip->ip_off); 598 599 /* 600 * Clear these -- they'll be set in the recursive invocation 601 * as needed. 602 */ 603 m->m_flags &= ~(M_MCAST | M_BCAST); 604 605 /* Callee frees mbuf */ 606 error = ipsp_process_packet(m, tdb, AF_INET, 0); 607 splx(s); 608 return error; /* Nothing more to be done */ 609 } 610 611 /* 612 * If deferred crypto processing is needed, check that the 613 * interface supports it. 614 */ 615 if ((mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL)) 616 != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) { 617 /* Notify IPsec to do its own crypto. */ 618 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); 619 m_freem(m); 620 error = EHOSTUNREACH; 621 goto done; 622 } 623 #endif /* IPSEC */ 624 625 /* Catch routing changes wrt. hardware checksumming for TCP or UDP. */ 626 if (m->m_pkthdr.csum & M_TCPV4_CSUM_OUT) { 627 if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || 628 ifp->if_bridge != NULL) { 629 in_delayed_cksum(m); 630 m->m_pkthdr.csum &= ~M_TCPV4_CSUM_OUT; /* Clear */ 631 } 632 } else if (m->m_pkthdr.csum & M_UDPV4_CSUM_OUT) { 633 if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) || 634 ifp->if_bridge != NULL) { 635 in_delayed_cksum(m); 636 m->m_pkthdr.csum &= ~M_UDPV4_CSUM_OUT; /* Clear */ 637 } 638 } 639 640 /* 641 * Packet filter 642 */ 643 #if NPF > 0 644 if (pf_test(PF_OUT, ifp, &m) != PF_PASS) { 645 error = EHOSTUNREACH; 646 m_freem(m); 647 goto done; 648 } 649 ip = mtod(m, struct ip *); 650 hlen = ip->ip_hl << 2; 651 #endif 652 653 /* 654 * If small enough for interface, can just send directly. 655 */ 656 if ((u_int16_t)ip->ip_len <= ifp->if_mtu) { 657 ip->ip_len = htons((u_int16_t)ip->ip_len); 658 ip->ip_off = htons((u_int16_t)ip->ip_off); 659 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && 660 ifp->if_bridge == NULL) { 661 m->m_pkthdr.csum |= M_IPV4_CSUM_OUT; 662 ipstat.ips_outhwcsum++; 663 } else { 664 ip->ip_sum = 0; 665 ip->ip_sum = in_cksum(m, hlen); 666 } 667 /* Update relevant hardware checksum stats for TCP/UDP */ 668 if (m->m_pkthdr.csum & M_TCPV4_CSUM_OUT) 669 tcpstat.tcps_outhwcsum++; 670 else if (m->m_pkthdr.csum & M_UDPV4_CSUM_OUT) 671 udpstat.udps_outhwcsum++; 672 error = (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt); 673 goto done; 674 } 675 676 /* 677 * Too large for interface; fragment if possible. 678 * Must be able to put at least 8 bytes per fragment. 679 */ 680 if (ip->ip_off & IP_DF) { 681 #ifdef IPSEC 682 icmp_mtu = ifp->if_mtu; 683 #endif 684 error = EMSGSIZE; 685 /* 686 * This case can happen if the user changed the MTU 687 * of an interface after enabling IP on it. Because 688 * most netifs don't keep track of routes pointing to 689 * them, there is no way for one to update all its 690 * routes when the MTU is changed. 691 */ 692 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) 693 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) 694 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 695 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 696 } 697 ipstat.ips_cantfrag++; 698 goto bad; 699 } 700 len = (ifp->if_mtu - hlen) &~ 7; 701 if (len < 8) { 702 error = EMSGSIZE; 703 goto bad; 704 } 705 706 /* 707 * If we are doing fragmentation, we can't defer TCP/UDP 708 * checksumming; compute the checksum and clear the flag. 709 */ 710 if (m->m_pkthdr.csum & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) { 711 in_delayed_cksum(m); 712 m->m_pkthdr.csum &= ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT); 713 } 714 715 { 716 int mhlen, firstlen = len; 717 struct mbuf **mnext = &m->m_nextpkt; 718 719 /* 720 * Loop through length of segment after first fragment, 721 * make new header and copy data of each part and link onto chain. 722 */ 723 m0 = m; 724 mhlen = sizeof (struct ip); 725 for (off = hlen + len; off < (u_int16_t)ip->ip_len; off += len) { 726 MGETHDR(m, M_DONTWAIT, MT_HEADER); 727 if (m == 0) { 728 error = ENOBUFS; 729 ipstat.ips_odropped++; 730 goto sendorfree; 731 } 732 *mnext = m; 733 mnext = &m->m_nextpkt; 734 m->m_data += max_linkhdr; 735 mhip = mtod(m, struct ip *); 736 *mhip = *ip; 737 /* we must inherit MCAST and BCAST flags */ 738 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST); 739 if (hlen > sizeof (struct ip)) { 740 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 741 mhip->ip_hl = mhlen >> 2; 742 } 743 m->m_len = mhlen; 744 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); 745 if (ip->ip_off & IP_MF) 746 mhip->ip_off |= IP_MF; 747 if (off + len >= (u_int16_t)ip->ip_len) 748 len = (u_int16_t)ip->ip_len - off; 749 else 750 mhip->ip_off |= IP_MF; 751 mhip->ip_len = htons((u_int16_t)(len + mhlen)); 752 m->m_next = m_copy(m0, off, len); 753 if (m->m_next == 0) { 754 error = ENOBUFS; /* ??? */ 755 ipstat.ips_odropped++; 756 goto sendorfree; 757 } 758 m->m_pkthdr.len = mhlen + len; 759 m->m_pkthdr.rcvif = (struct ifnet *)0; 760 mhip->ip_off = htons((u_int16_t)mhip->ip_off); 761 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && 762 ifp->if_bridge == NULL) { 763 m->m_pkthdr.csum |= M_IPV4_CSUM_OUT; 764 ipstat.ips_outhwcsum++; 765 } else { 766 mhip->ip_sum = 0; 767 mhip->ip_sum = in_cksum(m, mhlen); 768 } 769 ipstat.ips_ofragments++; 770 } 771 /* 772 * Update first fragment by trimming what's been copied out 773 * and updating header, then send each fragment (in order). 774 */ 775 m = m0; 776 m_adj(m, hlen + firstlen - (u_int16_t)ip->ip_len); 777 m->m_pkthdr.len = hlen + firstlen; 778 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len); 779 ip->ip_off = htons((u_int16_t)(ip->ip_off | IP_MF)); 780 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && 781 ifp->if_bridge == NULL) { 782 m->m_pkthdr.csum |= M_IPV4_CSUM_OUT; 783 ipstat.ips_outhwcsum++; 784 } else { 785 ip->ip_sum = 0; 786 ip->ip_sum = in_cksum(m, hlen); 787 } 788 sendorfree: 789 for (m = m0; m; m = m0) { 790 m0 = m->m_nextpkt; 791 m->m_nextpkt = 0; 792 if (error == 0) 793 error = (*ifp->if_output)(ifp, m, sintosa(dst), 794 ro->ro_rt); 795 else 796 m_freem(m); 797 } 798 799 if (error == 0) 800 ipstat.ips_fragmented++; 801 } 802 done: 803 if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt) 804 RTFREE(ro->ro_rt); 805 return (error); 806 bad: 807 #ifdef IPSEC 808 if (error == EMSGSIZE && icmp_mtu != 0) 809 ipsec_adjust_mtu(m, icmp_mtu); 810 #endif 811 m_freem(m0); 812 goto done; 813 } 814 815 /* 816 * Insert IP options into preformed packet. 817 * Adjust IP destination as required for IP source routing, 818 * as indicated by a non-zero in_addr at the start of the options. 819 */ 820 static struct mbuf * 821 ip_insertoptions(m, opt, phlen) 822 register struct mbuf *m; 823 struct mbuf *opt; 824 int *phlen; 825 { 826 register struct ipoption *p = mtod(opt, struct ipoption *); 827 struct mbuf *n; 828 register struct ip *ip = mtod(m, struct ip *); 829 unsigned optlen; 830 831 optlen = opt->m_len - sizeof(p->ipopt_dst); 832 if (optlen + (u_int16_t)ip->ip_len > IP_MAXPACKET) 833 return (m); /* XXX should fail */ 834 if (p->ipopt_dst.s_addr) 835 ip->ip_dst = p->ipopt_dst; 836 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 837 MGETHDR(n, M_DONTWAIT, MT_HEADER); 838 if (n == 0) 839 return (m); 840 M_MOVE_HDR(n, m); 841 n->m_pkthdr.len += optlen; 842 m->m_len -= sizeof(struct ip); 843 m->m_data += sizeof(struct ip); 844 n->m_next = m; 845 m = n; 846 m->m_len = optlen + sizeof(struct ip); 847 m->m_data += max_linkhdr; 848 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 849 } else { 850 m->m_data -= optlen; 851 m->m_len += optlen; 852 m->m_pkthdr.len += optlen; 853 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 854 } 855 ip = mtod(m, struct ip *); 856 bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen); 857 *phlen = sizeof(struct ip) + optlen; 858 ip->ip_len += optlen; 859 return (m); 860 } 861 862 /* 863 * Copy options from ip to jp, 864 * omitting those not copied during fragmentation. 865 */ 866 int 867 ip_optcopy(ip, jp) 868 struct ip *ip, *jp; 869 { 870 register u_char *cp, *dp; 871 int opt, optlen, cnt; 872 873 cp = (u_char *)(ip + 1); 874 dp = (u_char *)(jp + 1); 875 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 876 for (; cnt > 0; cnt -= optlen, cp += optlen) { 877 opt = cp[0]; 878 if (opt == IPOPT_EOL) 879 break; 880 if (opt == IPOPT_NOP) { 881 /* Preserve for IP mcast tunnel's LSRR alignment. */ 882 *dp++ = IPOPT_NOP; 883 optlen = 1; 884 continue; 885 } 886 #ifdef DIAGNOSTIC 887 if (cnt < IPOPT_OLEN + sizeof(*cp)) 888 panic("malformed IPv4 option passed to ip_optcopy"); 889 #endif 890 optlen = cp[IPOPT_OLEN]; 891 #ifdef DIAGNOSTIC 892 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 893 panic("malformed IPv4 option passed to ip_optcopy"); 894 #endif 895 /* bogus lengths should have been caught by ip_dooptions */ 896 if (optlen > cnt) 897 optlen = cnt; 898 if (IPOPT_COPIED(opt)) { 899 bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen); 900 dp += optlen; 901 } 902 } 903 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 904 *dp++ = IPOPT_EOL; 905 return (optlen); 906 } 907 908 /* 909 * IP socket option processing. 910 */ 911 int 912 ip_ctloutput(op, so, level, optname, mp) 913 int op; 914 struct socket *so; 915 int level, optname; 916 struct mbuf **mp; 917 { 918 register struct inpcb *inp = sotoinpcb(so); 919 register struct mbuf *m = *mp; 920 register int optval = 0; 921 #ifdef IPSEC 922 struct proc *p = curproc; /* XXX */ 923 struct ipsec_ref *ipr; 924 u_int16_t opt16val; 925 #endif 926 int error = 0; 927 928 if (level != IPPROTO_IP) { 929 error = EINVAL; 930 if (op == PRCO_SETOPT && *mp) 931 (void) m_free(*mp); 932 } else switch (op) { 933 case PRCO_SETOPT: 934 switch (optname) { 935 case IP_OPTIONS: 936 #ifdef notyet 937 case IP_RETOPTS: 938 return (ip_pcbopts(optname, &inp->inp_options, m)); 939 #else 940 return (ip_pcbopts(&inp->inp_options, m)); 941 #endif 942 943 case IP_TOS: 944 case IP_TTL: 945 case IP_RECVOPTS: 946 case IP_RECVRETOPTS: 947 case IP_RECVDSTADDR: 948 if (m == NULL || m->m_len != sizeof(int)) 949 error = EINVAL; 950 else { 951 optval = *mtod(m, int *); 952 switch (optname) { 953 954 case IP_TOS: 955 inp->inp_ip.ip_tos = optval; 956 break; 957 958 case IP_TTL: 959 inp->inp_ip.ip_ttl = optval; 960 break; 961 #define OPTSET(bit) \ 962 if (optval) \ 963 inp->inp_flags |= bit; \ 964 else \ 965 inp->inp_flags &= ~bit; 966 967 case IP_RECVOPTS: 968 OPTSET(INP_RECVOPTS); 969 break; 970 971 case IP_RECVRETOPTS: 972 OPTSET(INP_RECVRETOPTS); 973 break; 974 975 case IP_RECVDSTADDR: 976 OPTSET(INP_RECVDSTADDR); 977 break; 978 } 979 } 980 break; 981 #undef OPTSET 982 983 case IP_MULTICAST_IF: 984 case IP_MULTICAST_TTL: 985 case IP_MULTICAST_LOOP: 986 case IP_ADD_MEMBERSHIP: 987 case IP_DROP_MEMBERSHIP: 988 error = ip_setmoptions(optname, &inp->inp_moptions, m); 989 break; 990 991 case IP_PORTRANGE: 992 if (m == 0 || m->m_len != sizeof(int)) 993 error = EINVAL; 994 else { 995 optval = *mtod(m, int *); 996 997 switch (optval) { 998 999 case IP_PORTRANGE_DEFAULT: 1000 inp->inp_flags &= ~(INP_LOWPORT); 1001 inp->inp_flags &= ~(INP_HIGHPORT); 1002 break; 1003 1004 case IP_PORTRANGE_HIGH: 1005 inp->inp_flags &= ~(INP_LOWPORT); 1006 inp->inp_flags |= INP_HIGHPORT; 1007 break; 1008 1009 case IP_PORTRANGE_LOW: 1010 inp->inp_flags &= ~(INP_HIGHPORT); 1011 inp->inp_flags |= INP_LOWPORT; 1012 break; 1013 1014 default: 1015 1016 error = EINVAL; 1017 break; 1018 } 1019 } 1020 break; 1021 case IP_AUTH_LEVEL: 1022 case IP_ESP_TRANS_LEVEL: 1023 case IP_ESP_NETWORK_LEVEL: 1024 case IP_IPCOMP_LEVEL: 1025 #ifndef IPSEC 1026 error = EOPNOTSUPP; 1027 #else 1028 if (m == 0 || m->m_len != sizeof(int)) { 1029 error = EINVAL; 1030 break; 1031 } 1032 optval = *mtod(m, int *); 1033 1034 if (optval < IPSEC_LEVEL_BYPASS || 1035 optval > IPSEC_LEVEL_UNIQUE) { 1036 error = EINVAL; 1037 break; 1038 } 1039 1040 /* Unlink cached output TDB to force a re-search */ 1041 if (inp->inp_tdb_out) { 1042 int s = spltdb(); 1043 TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, 1044 inp, inp_tdb_out_next); 1045 splx(s); 1046 } 1047 1048 if (inp->inp_tdb_in) { 1049 int s = spltdb(); 1050 TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in, 1051 inp, inp_tdb_in_next); 1052 splx(s); 1053 } 1054 1055 switch (optname) { 1056 case IP_AUTH_LEVEL: 1057 if (optval < ipsec_auth_default_level && 1058 suser(p->p_ucred, &p->p_acflag)) { 1059 error = EACCES; 1060 break; 1061 } 1062 inp->inp_seclevel[SL_AUTH] = optval; 1063 break; 1064 1065 case IP_ESP_TRANS_LEVEL: 1066 if (optval < ipsec_esp_trans_default_level && 1067 suser(p->p_ucred, &p->p_acflag)) { 1068 error = EACCES; 1069 break; 1070 } 1071 inp->inp_seclevel[SL_ESP_TRANS] = optval; 1072 break; 1073 1074 case IP_ESP_NETWORK_LEVEL: 1075 if (optval < ipsec_esp_network_default_level && 1076 suser(p->p_ucred, &p->p_acflag)) { 1077 error = EACCES; 1078 break; 1079 } 1080 inp->inp_seclevel[SL_ESP_NETWORK] = optval; 1081 break; 1082 case IP_IPCOMP_LEVEL: 1083 if (optval < ipsec_ipcomp_default_level && 1084 suser(p->p_ucred, &p->p_acflag)) { 1085 error = EACCES; 1086 break; 1087 } 1088 inp->inp_seclevel[SL_IPCOMP] = optval; 1089 break; 1090 } 1091 if (!error) 1092 inp->inp_secrequire = get_sa_require(inp); 1093 #endif 1094 break; 1095 1096 case IP_IPSEC_REMOTE_CRED: 1097 case IP_IPSEC_REMOTE_AUTH: 1098 /* Can't set the remote credential or key */ 1099 error = EOPNOTSUPP; 1100 break; 1101 1102 case IP_IPSEC_LOCAL_ID: 1103 case IP_IPSEC_REMOTE_ID: 1104 case IP_IPSEC_LOCAL_CRED: 1105 case IP_IPSEC_LOCAL_AUTH: 1106 #ifndef IPSEC 1107 error = EOPNOTSUPP; 1108 #else 1109 if (m->m_len < 2) { 1110 error = EINVAL; 1111 break; 1112 } 1113 1114 m_copydata(m, 0, 2, (caddr_t) &opt16val); 1115 1116 /* If the type is 0, then we cleanup and return */ 1117 if (opt16val == 0) { 1118 switch (optname) { 1119 case IP_IPSEC_LOCAL_ID: 1120 if (inp->inp_ipsec_localid != NULL) 1121 ipsp_reffree(inp->inp_ipsec_localid); 1122 inp->inp_ipsec_localid = NULL; 1123 break; 1124 1125 case IP_IPSEC_REMOTE_ID: 1126 if (inp->inp_ipsec_remoteid != NULL) 1127 ipsp_reffree(inp->inp_ipsec_remoteid); 1128 inp->inp_ipsec_remoteid = NULL; 1129 break; 1130 1131 case IP_IPSEC_LOCAL_CRED: 1132 if (inp->inp_ipsec_localcred != NULL) 1133 ipsp_reffree(inp->inp_ipsec_localcred); 1134 inp->inp_ipsec_localcred = NULL; 1135 break; 1136 1137 case IP_IPSEC_LOCAL_AUTH: 1138 if (inp->inp_ipsec_localauth != NULL) 1139 ipsp_reffree(inp->inp_ipsec_localauth); 1140 inp->inp_ipsec_localauth = NULL; 1141 break; 1142 } 1143 1144 error = 0; 1145 break; 1146 } 1147 1148 /* Can't have an empty payload */ 1149 if (m->m_len == 2) { 1150 error = EINVAL; 1151 break; 1152 } 1153 1154 MALLOC(ipr, struct ipsec_ref *, 1155 sizeof(struct ipsec_ref) + m->m_len - 2, 1156 M_CREDENTIALS, M_NOWAIT); 1157 if (ipr == NULL) { 1158 error = ENOBUFS; 1159 break; 1160 } 1161 ipr->ref_count = 1; 1162 ipr->ref_malloctype = M_CREDENTIALS; 1163 ipr->ref_len = m->m_len - 2; 1164 ipr->ref_type = opt16val; 1165 m_copydata(m, 2, m->m_len - 2, (caddr_t)(ipr + 1)); 1166 1167 switch (optname) { 1168 case IP_IPSEC_LOCAL_ID: 1169 /* Check valid types and NUL-termination */ 1170 if (ipr->ref_type < IPSP_IDENTITY_PREFIX 1171 || ipr->ref_type > IPSP_IDENTITY_CONNECTION 1172 || ((char *)(ipr + 1))[ipr->ref_len - 1]) { 1173 FREE(ipr, M_CREDENTIALS); 1174 error = EINVAL; 1175 } else { 1176 if (inp->inp_ipsec_localid != NULL) 1177 ipsp_reffree(inp->inp_ipsec_localid); 1178 inp->inp_ipsec_localid = ipr; 1179 } 1180 break; 1181 case IP_IPSEC_REMOTE_ID: 1182 /* Check valid types and NUL-termination */ 1183 if (ipr->ref_type < IPSP_IDENTITY_PREFIX 1184 || ipr->ref_type > IPSP_IDENTITY_CONNECTION 1185 || ((char *)(ipr + 1))[ipr->ref_len - 1]) { 1186 FREE(ipr, M_CREDENTIALS); 1187 error = EINVAL; 1188 } else { 1189 if (inp->inp_ipsec_remoteid != NULL) 1190 ipsp_reffree(inp->inp_ipsec_remoteid); 1191 inp->inp_ipsec_remoteid = ipr; 1192 } 1193 break; 1194 case IP_IPSEC_LOCAL_CRED: 1195 if (ipr->ref_type < IPSP_CRED_KEYNOTE || 1196 ipr->ref_type > IPSP_CRED_X509) { 1197 FREE(ipr, M_CREDENTIALS); 1198 error = EINVAL; 1199 } else { 1200 if (inp->inp_ipsec_localcred != NULL) 1201 ipsp_reffree(inp->inp_ipsec_localcred); 1202 inp->inp_ipsec_localcred = ipr; 1203 } 1204 break; 1205 case IP_IPSEC_LOCAL_AUTH: 1206 if (ipr->ref_type < IPSP_AUTH_PASSPHRASE || 1207 ipr->ref_type > IPSP_AUTH_RSA) { 1208 FREE(ipr, M_CREDENTIALS); 1209 error = EINVAL; 1210 } else { 1211 if (inp->inp_ipsec_localauth != NULL) 1212 ipsp_reffree(inp->inp_ipsec_localauth); 1213 inp->inp_ipsec_localauth = ipr; 1214 } 1215 break; 1216 } 1217 1218 /* Unlink cached output TDB to force a re-search */ 1219 if (inp->inp_tdb_out) { 1220 int s = spltdb(); 1221 TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, 1222 inp, inp_tdb_out_next); 1223 splx(s); 1224 } 1225 1226 if (inp->inp_tdb_in) { 1227 int s = spltdb(); 1228 TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in, 1229 inp, inp_tdb_in_next); 1230 splx(s); 1231 } 1232 #endif 1233 break; 1234 default: 1235 error = ENOPROTOOPT; 1236 break; 1237 } 1238 if (m) 1239 (void)m_free(m); 1240 break; 1241 1242 case PRCO_GETOPT: 1243 switch (optname) { 1244 case IP_OPTIONS: 1245 case IP_RETOPTS: 1246 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1247 if (inp->inp_options) { 1248 m->m_len = inp->inp_options->m_len; 1249 bcopy(mtod(inp->inp_options, caddr_t), 1250 mtod(m, caddr_t), (unsigned)m->m_len); 1251 } else 1252 m->m_len = 0; 1253 break; 1254 1255 case IP_TOS: 1256 case IP_TTL: 1257 case IP_RECVOPTS: 1258 case IP_RECVRETOPTS: 1259 case IP_RECVDSTADDR: 1260 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1261 m->m_len = sizeof(int); 1262 switch (optname) { 1263 1264 case IP_TOS: 1265 optval = inp->inp_ip.ip_tos; 1266 break; 1267 1268 case IP_TTL: 1269 optval = inp->inp_ip.ip_ttl; 1270 break; 1271 1272 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1273 1274 case IP_RECVOPTS: 1275 optval = OPTBIT(INP_RECVOPTS); 1276 break; 1277 1278 case IP_RECVRETOPTS: 1279 optval = OPTBIT(INP_RECVRETOPTS); 1280 break; 1281 1282 case IP_RECVDSTADDR: 1283 optval = OPTBIT(INP_RECVDSTADDR); 1284 break; 1285 } 1286 *mtod(m, int *) = optval; 1287 break; 1288 1289 case IP_MULTICAST_IF: 1290 case IP_MULTICAST_TTL: 1291 case IP_MULTICAST_LOOP: 1292 case IP_ADD_MEMBERSHIP: 1293 case IP_DROP_MEMBERSHIP: 1294 error = ip_getmoptions(optname, inp->inp_moptions, mp); 1295 break; 1296 1297 case IP_PORTRANGE: 1298 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1299 m->m_len = sizeof(int); 1300 1301 if (inp->inp_flags & INP_HIGHPORT) 1302 optval = IP_PORTRANGE_HIGH; 1303 else if (inp->inp_flags & INP_LOWPORT) 1304 optval = IP_PORTRANGE_LOW; 1305 else 1306 optval = 0; 1307 1308 *mtod(m, int *) = optval; 1309 break; 1310 1311 case IP_AUTH_LEVEL: 1312 case IP_ESP_TRANS_LEVEL: 1313 case IP_ESP_NETWORK_LEVEL: 1314 case IP_IPCOMP_LEVEL: 1315 #ifndef IPSEC 1316 m->m_len = sizeof(int); 1317 *mtod(m, int *) = IPSEC_LEVEL_NONE; 1318 #else 1319 m->m_len = sizeof(int); 1320 switch (optname) { 1321 case IP_AUTH_LEVEL: 1322 optval = inp->inp_seclevel[SL_AUTH]; 1323 break; 1324 1325 case IP_ESP_TRANS_LEVEL: 1326 optval = inp->inp_seclevel[SL_ESP_TRANS]; 1327 break; 1328 1329 case IP_ESP_NETWORK_LEVEL: 1330 optval = inp->inp_seclevel[SL_ESP_NETWORK]; 1331 break; 1332 case IP_IPCOMP_LEVEL: 1333 optval = inp->inp_seclevel[SL_IPCOMP]; 1334 break; 1335 } 1336 *mtod(m, int *) = optval; 1337 #endif 1338 break; 1339 case IP_IPSEC_LOCAL_ID: 1340 case IP_IPSEC_REMOTE_ID: 1341 case IP_IPSEC_LOCAL_CRED: 1342 case IP_IPSEC_REMOTE_CRED: 1343 case IP_IPSEC_LOCAL_AUTH: 1344 case IP_IPSEC_REMOTE_AUTH: 1345 #ifndef IPSEC 1346 error = EOPNOTSUPP; 1347 #else 1348 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1349 m->m_len = sizeof(u_int16_t); 1350 switch (optname) { 1351 case IP_IPSEC_LOCAL_ID: 1352 ipr = inp->inp_ipsec_localid; 1353 opt16val = IPSP_IDENTITY_NONE; 1354 break; 1355 case IP_IPSEC_REMOTE_ID: 1356 ipr = inp->inp_ipsec_remoteid; 1357 opt16val = IPSP_IDENTITY_NONE; 1358 break; 1359 case IP_IPSEC_LOCAL_CRED: 1360 ipr = inp->inp_ipsec_localcred; 1361 opt16val = IPSP_CRED_NONE; 1362 break; 1363 case IP_IPSEC_REMOTE_CRED: 1364 ipr = inp->inp_ipsec_remotecred; 1365 opt16val = IPSP_CRED_NONE; 1366 break; 1367 case IP_IPSEC_LOCAL_AUTH: 1368 ipr = inp->inp_ipsec_localauth; 1369 break; 1370 case IP_IPSEC_REMOTE_AUTH: 1371 ipr = inp->inp_ipsec_remoteauth; 1372 break; 1373 } 1374 if (ipr == NULL) 1375 *mtod(m, u_int16_t *) = opt16val; 1376 else { 1377 m->m_len += ipr->ref_len; 1378 *mtod(m, u_int16_t *) = ipr->ref_type; 1379 m_copyback(m, sizeof(u_int16_t), ipr->ref_len, 1380 (caddr_t)(ipr + 1)); 1381 } 1382 #endif 1383 break; 1384 default: 1385 error = ENOPROTOOPT; 1386 break; 1387 } 1388 break; 1389 } 1390 return (error); 1391 } 1392 1393 /* 1394 * Set up IP options in pcb for insertion in output packets. 1395 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1396 * with destination address if source routed. 1397 */ 1398 int 1399 #ifdef notyet 1400 ip_pcbopts(optname, pcbopt, m) 1401 int optname; 1402 #else 1403 ip_pcbopts(pcbopt, m) 1404 #endif 1405 struct mbuf **pcbopt; 1406 register struct mbuf *m; 1407 { 1408 register int cnt, optlen; 1409 register u_char *cp; 1410 u_char opt; 1411 1412 /* turn off any old options */ 1413 if (*pcbopt) 1414 (void)m_free(*pcbopt); 1415 *pcbopt = 0; 1416 if (m == (struct mbuf *)0 || m->m_len == 0) { 1417 /* 1418 * Only turning off any previous options. 1419 */ 1420 if (m) 1421 (void)m_free(m); 1422 return (0); 1423 } 1424 1425 #ifndef vax 1426 if (m->m_len % sizeof(int32_t)) 1427 goto bad; 1428 #endif 1429 /* 1430 * IP first-hop destination address will be stored before 1431 * actual options; move other options back 1432 * and clear it when none present. 1433 */ 1434 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1435 goto bad; 1436 cnt = m->m_len; 1437 m->m_len += sizeof(struct in_addr); 1438 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1439 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt); 1440 bzero(mtod(m, caddr_t), sizeof(struct in_addr)); 1441 1442 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1443 opt = cp[IPOPT_OPTVAL]; 1444 if (opt == IPOPT_EOL) 1445 break; 1446 if (opt == IPOPT_NOP) 1447 optlen = 1; 1448 else { 1449 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1450 goto bad; 1451 optlen = cp[IPOPT_OLEN]; 1452 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1453 goto bad; 1454 } 1455 switch (opt) { 1456 1457 default: 1458 break; 1459 1460 case IPOPT_LSRR: 1461 case IPOPT_SSRR: 1462 /* 1463 * user process specifies route as: 1464 * ->A->B->C->D 1465 * D must be our final destination (but we can't 1466 * check that since we may not have connected yet). 1467 * A is first hop destination, which doesn't appear in 1468 * actual IP option, but is stored before the options. 1469 */ 1470 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1471 goto bad; 1472 m->m_len -= sizeof(struct in_addr); 1473 cnt -= sizeof(struct in_addr); 1474 optlen -= sizeof(struct in_addr); 1475 cp[IPOPT_OLEN] = optlen; 1476 /* 1477 * Move first hop before start of options. 1478 */ 1479 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), 1480 sizeof(struct in_addr)); 1481 /* 1482 * Then copy rest of options back 1483 * to close up the deleted entry. 1484 */ 1485 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] + 1486 sizeof(struct in_addr)), 1487 (caddr_t)&cp[IPOPT_OFFSET+1], 1488 (unsigned)cnt + sizeof(struct in_addr)); 1489 break; 1490 } 1491 } 1492 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1493 goto bad; 1494 *pcbopt = m; 1495 return (0); 1496 1497 bad: 1498 (void)m_free(m); 1499 return (EINVAL); 1500 } 1501 1502 /* 1503 * Set the IP multicast options in response to user setsockopt(). 1504 */ 1505 int 1506 ip_setmoptions(optname, imop, m) 1507 int optname; 1508 struct ip_moptions **imop; 1509 struct mbuf *m; 1510 { 1511 register int error = 0; 1512 u_char loop; 1513 register int i; 1514 struct in_addr addr; 1515 register struct ip_mreq *mreq; 1516 register struct ifnet *ifp; 1517 register struct ip_moptions *imo = *imop; 1518 struct route ro; 1519 register struct sockaddr_in *dst; 1520 1521 if (imo == NULL) { 1522 /* 1523 * No multicast option buffer attached to the pcb; 1524 * allocate one and initialize to default values. 1525 */ 1526 imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS, 1527 M_WAITOK); 1528 1529 *imop = imo; 1530 imo->imo_multicast_ifp = NULL; 1531 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1532 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1533 imo->imo_num_memberships = 0; 1534 } 1535 1536 switch (optname) { 1537 1538 case IP_MULTICAST_IF: 1539 /* 1540 * Select the interface for outgoing multicast packets. 1541 */ 1542 if (m == NULL || m->m_len != sizeof(struct in_addr)) { 1543 error = EINVAL; 1544 break; 1545 } 1546 addr = *(mtod(m, struct in_addr *)); 1547 /* 1548 * INADDR_ANY is used to remove a previous selection. 1549 * When no interface is selected, a default one is 1550 * chosen every time a multicast packet is sent. 1551 */ 1552 if (addr.s_addr == INADDR_ANY) { 1553 imo->imo_multicast_ifp = NULL; 1554 break; 1555 } 1556 /* 1557 * The selected interface is identified by its local 1558 * IP address. Find the interface and confirm that 1559 * it supports multicasting. 1560 */ 1561 INADDR_TO_IFP(addr, ifp); 1562 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1563 error = EADDRNOTAVAIL; 1564 break; 1565 } 1566 imo->imo_multicast_ifp = ifp; 1567 break; 1568 1569 case IP_MULTICAST_TTL: 1570 /* 1571 * Set the IP time-to-live for outgoing multicast packets. 1572 */ 1573 if (m == NULL || m->m_len != 1) { 1574 error = EINVAL; 1575 break; 1576 } 1577 imo->imo_multicast_ttl = *(mtod(m, u_char *)); 1578 break; 1579 1580 case IP_MULTICAST_LOOP: 1581 /* 1582 * Set the loopback flag for outgoing multicast packets. 1583 * Must be zero or one. 1584 */ 1585 if (m == NULL || m->m_len != 1 || 1586 (loop = *(mtod(m, u_char *))) > 1) { 1587 error = EINVAL; 1588 break; 1589 } 1590 imo->imo_multicast_loop = loop; 1591 break; 1592 1593 case IP_ADD_MEMBERSHIP: 1594 /* 1595 * Add a multicast group membership. 1596 * Group must be a valid IP multicast address. 1597 */ 1598 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1599 error = EINVAL; 1600 break; 1601 } 1602 mreq = mtod(m, struct ip_mreq *); 1603 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1604 error = EINVAL; 1605 break; 1606 } 1607 /* 1608 * If no interface address was provided, use the interface of 1609 * the route to the given multicast address. 1610 */ 1611 if (mreq->imr_interface.s_addr == INADDR_ANY) { 1612 ro.ro_rt = NULL; 1613 dst = satosin(&ro.ro_dst); 1614 dst->sin_len = sizeof(*dst); 1615 dst->sin_family = AF_INET; 1616 dst->sin_addr = mreq->imr_multiaddr; 1617 rtalloc(&ro); 1618 if (ro.ro_rt == NULL) { 1619 error = EADDRNOTAVAIL; 1620 break; 1621 } 1622 ifp = ro.ro_rt->rt_ifp; 1623 rtfree(ro.ro_rt); 1624 } else { 1625 INADDR_TO_IFP(mreq->imr_interface, ifp); 1626 } 1627 /* 1628 * See if we found an interface, and confirm that it 1629 * supports multicast. 1630 */ 1631 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1632 error = EADDRNOTAVAIL; 1633 break; 1634 } 1635 /* 1636 * See if the membership already exists or if all the 1637 * membership slots are full. 1638 */ 1639 for (i = 0; i < imo->imo_num_memberships; ++i) { 1640 if (imo->imo_membership[i]->inm_ifp == ifp && 1641 imo->imo_membership[i]->inm_addr.s_addr 1642 == mreq->imr_multiaddr.s_addr) 1643 break; 1644 } 1645 if (i < imo->imo_num_memberships) { 1646 error = EADDRINUSE; 1647 break; 1648 } 1649 if (i == IP_MAX_MEMBERSHIPS) { 1650 error = ETOOMANYREFS; 1651 break; 1652 } 1653 /* 1654 * Everything looks good; add a new record to the multicast 1655 * address list for the given interface. 1656 */ 1657 if ((imo->imo_membership[i] = 1658 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) { 1659 error = ENOBUFS; 1660 break; 1661 } 1662 ++imo->imo_num_memberships; 1663 break; 1664 1665 case IP_DROP_MEMBERSHIP: 1666 /* 1667 * Drop a multicast group membership. 1668 * Group must be a valid IP multicast address. 1669 */ 1670 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1671 error = EINVAL; 1672 break; 1673 } 1674 mreq = mtod(m, struct ip_mreq *); 1675 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1676 error = EINVAL; 1677 break; 1678 } 1679 /* 1680 * If an interface address was specified, get a pointer 1681 * to its ifnet structure. 1682 */ 1683 if (mreq->imr_interface.s_addr == INADDR_ANY) 1684 ifp = NULL; 1685 else { 1686 INADDR_TO_IFP(mreq->imr_interface, ifp); 1687 if (ifp == NULL) { 1688 error = EADDRNOTAVAIL; 1689 break; 1690 } 1691 } 1692 /* 1693 * Find the membership in the membership array. 1694 */ 1695 for (i = 0; i < imo->imo_num_memberships; ++i) { 1696 if ((ifp == NULL || 1697 imo->imo_membership[i]->inm_ifp == ifp) && 1698 imo->imo_membership[i]->inm_addr.s_addr == 1699 mreq->imr_multiaddr.s_addr) 1700 break; 1701 } 1702 if (i == imo->imo_num_memberships) { 1703 error = EADDRNOTAVAIL; 1704 break; 1705 } 1706 /* 1707 * Give up the multicast address record to which the 1708 * membership points. 1709 */ 1710 in_delmulti(imo->imo_membership[i]); 1711 /* 1712 * Remove the gap in the membership array. 1713 */ 1714 for (++i; i < imo->imo_num_memberships; ++i) 1715 imo->imo_membership[i-1] = imo->imo_membership[i]; 1716 --imo->imo_num_memberships; 1717 break; 1718 1719 default: 1720 error = EOPNOTSUPP; 1721 break; 1722 } 1723 1724 /* 1725 * If all options have default values, no need to keep the mbuf. 1726 */ 1727 if (imo->imo_multicast_ifp == NULL && 1728 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 1729 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 1730 imo->imo_num_memberships == 0) { 1731 free(*imop, M_IPMOPTS); 1732 *imop = NULL; 1733 } 1734 1735 return (error); 1736 } 1737 1738 /* 1739 * Return the IP multicast options in response to user getsockopt(). 1740 */ 1741 int 1742 ip_getmoptions(optname, imo, mp) 1743 int optname; 1744 register struct ip_moptions *imo; 1745 register struct mbuf **mp; 1746 { 1747 u_char *ttl; 1748 u_char *loop; 1749 struct in_addr *addr; 1750 struct in_ifaddr *ia; 1751 1752 *mp = m_get(M_WAIT, MT_SOOPTS); 1753 1754 switch (optname) { 1755 1756 case IP_MULTICAST_IF: 1757 addr = mtod(*mp, struct in_addr *); 1758 (*mp)->m_len = sizeof(struct in_addr); 1759 if (imo == NULL || imo->imo_multicast_ifp == NULL) 1760 addr->s_addr = INADDR_ANY; 1761 else { 1762 IFP_TO_IA(imo->imo_multicast_ifp, ia); 1763 addr->s_addr = (ia == NULL) ? INADDR_ANY 1764 : ia->ia_addr.sin_addr.s_addr; 1765 } 1766 return (0); 1767 1768 case IP_MULTICAST_TTL: 1769 ttl = mtod(*mp, u_char *); 1770 (*mp)->m_len = 1; 1771 *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL 1772 : imo->imo_multicast_ttl; 1773 return (0); 1774 1775 case IP_MULTICAST_LOOP: 1776 loop = mtod(*mp, u_char *); 1777 (*mp)->m_len = 1; 1778 *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP 1779 : imo->imo_multicast_loop; 1780 return (0); 1781 1782 default: 1783 return (EOPNOTSUPP); 1784 } 1785 } 1786 1787 /* 1788 * Discard the IP multicast options. 1789 */ 1790 void 1791 ip_freemoptions(imo) 1792 register struct ip_moptions *imo; 1793 { 1794 register int i; 1795 1796 if (imo != NULL) { 1797 for (i = 0; i < imo->imo_num_memberships; ++i) 1798 in_delmulti(imo->imo_membership[i]); 1799 free(imo, M_IPMOPTS); 1800 } 1801 } 1802 1803 /* 1804 * Routine called from ip_output() to loop back a copy of an IP multicast 1805 * packet to the input queue of a specified interface. Note that this 1806 * calls the output routine of the loopback "driver", but with an interface 1807 * pointer that might NOT be &loif -- easier than replicating that code here. 1808 */ 1809 static void 1810 ip_mloopback(ifp, m, dst) 1811 struct ifnet *ifp; 1812 register struct mbuf *m; 1813 register struct sockaddr_in *dst; 1814 { 1815 register struct ip *ip; 1816 struct mbuf *copym; 1817 1818 copym = m_copym2(m, 0, M_COPYALL, M_DONTWAIT); 1819 if (copym != NULL) { 1820 /* 1821 * We don't bother to fragment if the IP length is greater 1822 * than the interface's MTU. Can this possibly matter? 1823 */ 1824 ip = mtod(copym, struct ip *); 1825 ip->ip_len = htons((u_int16_t)ip->ip_len); 1826 ip->ip_off = htons((u_int16_t)ip->ip_off); 1827 ip->ip_sum = 0; 1828 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2); 1829 (void) looutput(ifp, copym, sintosa(dst), NULL); 1830 } 1831 } 1832 1833 /* 1834 * Process a delayed payload checksum calculation. 1835 */ 1836 void 1837 in_delayed_cksum(struct mbuf *m) 1838 { 1839 struct ip *ip; 1840 u_int16_t csum, offset; 1841 1842 ip = mtod(m, struct ip *); 1843 offset = ip->ip_hl << 2; 1844 csum = in4_cksum(m, 0, offset, m->m_pkthdr.len - offset); 1845 if (csum == 0 && ip->ip_p == IPPROTO_UDP) 1846 csum = 0xffff; 1847 1848 switch (ip->ip_p) { 1849 case IPPROTO_TCP: 1850 offset += offsetof(struct tcphdr, th_sum); 1851 break; 1852 1853 case IPPROTO_UDP: 1854 offset += offsetof(struct udphdr, uh_sum); 1855 break; 1856 1857 default: 1858 return; 1859 } 1860 1861 if ((offset + sizeof(u_int16_t)) > m->m_len) 1862 m_copyback(m, offset, sizeof(csum), (caddr_t) &csum); 1863 else 1864 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 1865 } 1866