1 /* 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 34 * $FreeBSD: src/sys/netinet/ip_input.c,v 1.130.2.52 2003/03/07 07:01:28 silby Exp $ 35 * $DragonFly: src/sys/netinet/ip_input.c,v 1.18 2004/04/13 00:14:01 hsu Exp $ 36 */ 37 38 #define _IP_VHL 39 40 #include "opt_bootp.h" 41 #include "opt_ipfw.h" 42 #include "opt_ipdn.h" 43 #include "opt_ipdivert.h" 44 #include "opt_ipfilter.h" 45 #include "opt_ipstealth.h" 46 #include "opt_ipsec.h" 47 #include "opt_pfil_hooks.h" 48 #include "opt_random_ip_id.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/mbuf.h> 53 #include <sys/malloc.h> 54 #include <sys/domain.h> 55 #include <sys/protosw.h> 56 #include <sys/socket.h> 57 #include <sys/time.h> 58 #include <sys/kernel.h> 59 #include <sys/syslog.h> 60 #include <sys/sysctl.h> 61 #include <sys/in_cksum.h> 62 63 #include <sys/thread2.h> 64 #include <sys/msgport2.h> 65 66 #include <net/if.h> 67 #include <net/if_types.h> 68 #include <net/if_var.h> 69 #include <net/if_dl.h> 70 #ifdef PFIL_HOOKS 71 #include <net/pfil.h> 72 #endif 73 #include <net/route.h> 74 #include <net/netisr.h> 75 #include <net/intrq.h> 76 77 #include <netinet/in.h> 78 #include <netinet/in_systm.h> 79 #include <netinet/in_var.h> 80 #include <netinet/ip.h> 81 #include <netinet/in_pcb.h> 82 #include <netinet/ip_var.h> 83 #include <netinet/ip_icmp.h> 84 85 #include <netinet/ipprotosw.h> 86 87 #include <sys/socketvar.h> 88 89 #include <net/ipfw/ip_fw.h> 90 #include <net/dummynet/ip_dummynet.h> 91 92 #ifdef IPSEC 93 #include <netinet6/ipsec.h> 94 #include <netproto/key/key.h> 95 #endif 96 97 #ifdef FAST_IPSEC 98 #include <netipsec/ipsec.h> 99 #include <netipsec/key.h> 100 #endif 101 102 int rsvp_on = 0; 103 static int ip_rsvp_on; 104 struct socket *ip_rsvpd; 105 106 int ipforwarding = 0; 107 SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW, 108 &ipforwarding, 0, "Enable IP forwarding between interfaces"); 109 110 static int ipsendredirects = 1; /* XXX */ 111 SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW, 112 &ipsendredirects, 0, "Enable sending IP redirects"); 113 114 int ip_defttl = IPDEFTTL; 115 SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW, 116 &ip_defttl, 0, "Maximum TTL on IP packets"); 117 118 static int ip_dosourceroute = 0; 119 SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW, 120 &ip_dosourceroute, 0, "Enable forwarding source routed IP packets"); 121 122 static int ip_acceptsourceroute = 0; 123 SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute, 124 CTLFLAG_RW, &ip_acceptsourceroute, 0, 125 "Enable accepting source routed IP packets"); 126 127 static int ip_keepfaith = 0; 128 SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW, 129 &ip_keepfaith, 0, 130 "Enable packet capture for FAITH IPv4->IPv6 translater daemon"); 131 132 static int nipq = 0; /* total # of reass queues */ 133 static int maxnipq; 134 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_RW, 135 &maxnipq, 0, 136 "Maximum number of IPv4 fragment reassembly queue entries"); 137 138 static int maxfragsperpacket; 139 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW, 140 &maxfragsperpacket, 0, 141 "Maximum number of IPv4 fragments allowed per packet"); 142 143 static int ip_sendsourcequench = 0; 144 SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW, 145 &ip_sendsourcequench, 0, 146 "Enable the transmission of source quench packets"); 147 148 /* 149 * XXX - Setting ip_checkinterface mostly implements the receive side of 150 * the Strong ES model described in RFC 1122, but since the routing table 151 * and transmit implementation do not implement the Strong ES model, 152 * setting this to 1 results in an odd hybrid. 153 * 154 * XXX - ip_checkinterface currently must be disabled if you use ipnat 155 * to translate the destination address to another local interface. 156 * 157 * XXX - ip_checkinterface must be disabled if you add IP aliases 158 * to the loopback interface instead of the interface where the 159 * packets for those addresses are received. 160 */ 161 static int ip_checkinterface = 0; 162 SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW, 163 &ip_checkinterface, 0, "Verify packet arrives on correct interface"); 164 165 #ifdef DIAGNOSTIC 166 static int ipprintfs = 0; 167 #endif 168 169 static struct ifqueue ipintrq; 170 static int ipqmaxlen = IFQ_MAXLEN; 171 172 extern struct domain inetdomain; 173 extern struct ipprotosw inetsw[]; 174 u_char ip_protox[IPPROTO_MAX]; 175 struct in_ifaddrhead in_ifaddrhead; /* first inet address */ 176 struct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */ 177 u_long in_ifaddrhmask; /* mask for hash table */ 178 179 SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW, 180 &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue"); 181 SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD, 182 &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue"); 183 184 struct ipstat ipstat; 185 SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW, 186 &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)"); 187 188 /* Packet reassembly stuff */ 189 #define IPREASS_NHASH_LOG2 6 190 #define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2) 191 #define IPREASS_HMASK (IPREASS_NHASH - 1) 192 #define IPREASS_HASH(x,y) \ 193 (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK) 194 195 static struct ipq ipq[IPREASS_NHASH]; 196 const int ipintrq_present = 1; 197 198 #ifdef IPCTL_DEFMTU 199 SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, 200 &ip_mtu, 0, "Default MTU"); 201 #endif 202 203 #ifdef IPSTEALTH 204 static int ipstealth = 0; 205 SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW, &ipstealth, 0, ""); 206 #else 207 static const int ipstealth = 0; 208 #endif 209 210 211 /* Firewall hooks */ 212 ip_fw_chk_t *ip_fw_chk_ptr; 213 int fw_enable = 1; 214 int fw_one_pass = 1; 215 216 /* Dummynet hooks */ 217 ip_dn_io_t *ip_dn_io_ptr; 218 219 #ifdef PFIL_HOOKS 220 struct pfil_head inet_pfil_hook; 221 #endif 222 223 /* 224 * XXX this is ugly -- the following two global variables are 225 * used to store packet state while it travels through the stack. 226 * Note that the code even makes assumptions on the size and 227 * alignment of fields inside struct ip_srcrt so e.g. adding some 228 * fields will break the code. This needs to be fixed. 229 * 230 * We need to save the IP options in case a protocol wants to respond 231 * to an incoming packet over the same route if the packet got here 232 * using IP source routing. This allows connection establishment and 233 * maintenance when the remote end is on a network that is not known 234 * to us. 235 */ 236 static int ip_nhops = 0; 237 238 static struct ip_srcrt { 239 struct in_addr dst; /* final destination */ 240 char nop; /* one NOP to align */ 241 char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */ 242 struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)]; 243 } ip_srcrt; 244 245 static void save_rte (u_char *, struct in_addr); 246 static int ip_dooptions (struct mbuf *m, int, 247 struct sockaddr_in *next_hop); 248 static void ip_forward (struct mbuf *m, int srcrt, 249 struct sockaddr_in *next_hop); 250 static void ip_freef (struct ipq *); 251 static void ip_input_handler (struct netmsg *); 252 static struct mbuf *ip_reass (struct mbuf *, struct ipq *, 253 struct ipq *, u_int32_t *, u_int16_t *); 254 255 /* 256 * IP initialization: fill in IP protocol switch table. 257 * All protocols not implemented in kernel go to raw IP protocol handler. 258 */ 259 void 260 ip_init() 261 { 262 struct ipprotosw *pr; 263 int i; 264 265 TAILQ_INIT(&in_ifaddrhead); 266 in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask); 267 pr = (struct ipprotosw *)pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 268 if (pr == NULL) 269 panic("ip_init"); 270 for (i = 0; i < IPPROTO_MAX; i++) 271 ip_protox[i] = pr - inetsw; 272 for (pr = (struct ipprotosw *)inetdomain.dom_protosw; 273 pr < (struct ipprotosw *)inetdomain.dom_protoswNPROTOSW; pr++) 274 if (pr->pr_domain->dom_family == PF_INET && 275 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) 276 ip_protox[pr->pr_protocol] = pr - inetsw; 277 278 #ifdef PFIL_HOOKS 279 inet_pfil_hook.ph_type = PFIL_TYPE_AF; 280 inet_pfil_hook.ph_af = AF_INET; 281 if ((i = pfil_head_register(&inet_pfil_hook)) != 0) 282 printf("%s: WARNING: unable to register pfil hook, " 283 "error %d\n", __func__, i); 284 #endif 285 286 for (i = 0; i < IPREASS_NHASH; i++) 287 ipq[i].next = ipq[i].prev = &ipq[i]; 288 289 maxnipq = nmbclusters / 32; 290 maxfragsperpacket = 16; 291 292 #ifndef RANDOM_IP_ID 293 ip_id = time_second & 0xffff; 294 #endif 295 ipintrq.ifq_maxlen = ipqmaxlen; 296 297 netisr_register(NETISR_IP, ip_mport, ip_input_handler); 298 } 299 300 /* 301 * XXX watch out this one. It is perhaps used as a cache for 302 * the most recently used route ? it is cleared in in_addroute() 303 * when a new route is successfully created. 304 */ 305 struct route ipforward_rt; 306 static struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET }; 307 308 /* Do transport protocol processing. */ 309 static void 310 transport_processing_oncpu(struct mbuf *m, int hlen, struct ip *ip, 311 struct sockaddr_in *nexthop) 312 { 313 /* 314 * Switch out to protocol's input routine. 315 */ 316 if (nexthop && ip->ip_p == IPPROTO_TCP) { 317 /* TCP needs IPFORWARD info if available */ 318 struct m_hdr tag; 319 320 tag.mh_type = MT_TAG; 321 tag.mh_flags = PACKET_TAG_IPFORWARD; 322 tag.mh_data = (caddr_t)nexthop; 323 tag.mh_next = m; 324 325 (*inetsw[ip_protox[ip->ip_p]].pr_input) 326 ((struct mbuf *)&tag, hlen, ip->ip_p); 327 } else { 328 (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen, ip->ip_p); 329 } 330 } 331 332 struct netmsg_transport_packet { 333 struct lwkt_msg nm_lmsg; 334 netisr_fn_t nm_handler; 335 struct mbuf *nm_mbuf; 336 int nm_hlen; 337 boolean_t nm_hasnexthop; 338 struct sockaddr_in nm_nexthop; 339 }; 340 341 static void 342 transport_processing_handler(struct netmsg *msg0) 343 { 344 struct netmsg_transport_packet *msg = 345 (struct netmsg_transport_packet *)msg0; 346 struct sockaddr_in *nexthop; 347 struct ip *ip; 348 349 ip = mtod(msg->nm_mbuf, struct ip *); 350 nexthop = msg->nm_hasnexthop ? &msg->nm_nexthop : NULL; 351 transport_processing_oncpu(msg->nm_mbuf, msg->nm_hlen, ip, nexthop); 352 lwkt_replymsg(&msg0->nm_lmsg, 0); 353 } 354 355 static void 356 ip_input_handler(struct netmsg *msg0) 357 { 358 struct mbuf *m = ((struct netmsg_packet *)msg0)->nm_packet; 359 360 ip_input(m); 361 lwkt_replymsg(&msg0->nm_lmsg, 0); 362 } 363 364 /* 365 * Ip input routine. Checksum and byte swap header. If fragmented 366 * try to reassemble. Process options. Pass to next level. 367 */ 368 void 369 ip_input(struct mbuf *m) 370 { 371 struct ip *ip; 372 struct ipq *fp; 373 struct in_ifaddr *ia = NULL; 374 struct ifaddr *ifa; 375 int i, hlen, checkif; 376 u_short sum; 377 struct in_addr pkt_dst; 378 u_int32_t divert_info = 0; /* packet divert/tee info */ 379 struct ip_fw_args args; 380 boolean_t using_srcrt = FALSE; /* forward (by PFIL_HOOKS) */ 381 boolean_t needredispatch = FALSE; 382 #ifdef PFIL_HOOKS 383 struct in_addr odst; /* original dst address(NAT) */ 384 #endif 385 #ifdef FAST_IPSEC 386 struct m_tag *mtag; 387 struct tdb_ident *tdbi; 388 struct secpolicy *sp; 389 int s, error; 390 #endif 391 392 args.eh = NULL; 393 args.oif = NULL; 394 args.rule = NULL; 395 args.divert_rule = 0; /* divert cookie */ 396 args.next_hop = NULL; 397 398 /* Grab info from MT_TAG mbufs prepended to the chain. */ 399 for (; m && m->m_type == MT_TAG; m = m->m_next) { 400 switch(m->_m_tag_id) { 401 default: 402 printf("ip_input: unrecognised MT_TAG tag %d\n", 403 m->_m_tag_id); 404 break; 405 406 case PACKET_TAG_DUMMYNET: 407 args.rule = ((struct dn_pkt *)m)->rule; 408 break; 409 410 case PACKET_TAG_DIVERT: 411 args.divert_rule = (int)m->m_hdr.mh_data & 0xffff; 412 break; 413 414 case PACKET_TAG_IPFORWARD: 415 args.next_hop = (struct sockaddr_in *)m->m_hdr.mh_data; 416 break; 417 } 418 } 419 420 KASSERT(m != NULL && (m->m_flags & M_PKTHDR) != 0, 421 ("ip_input: no HDR")); 422 423 if (args.rule) { /* dummynet already filtered us */ 424 ip = mtod(m, struct ip *); 425 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 426 goto iphack; 427 } 428 429 ipstat.ips_total++; 430 431 /* length checks already done in ip_demux() */ 432 KASSERT(m->m_len >= sizeof(ip), ("IP header not in one mbuf")); 433 434 ip = mtod(m, struct ip *); 435 436 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) { 437 ipstat.ips_badvers++; 438 goto bad; 439 } 440 441 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 442 /* length checks already done in ip_demux() */ 443 KASSERT(hlen >= sizeof(struct ip), ("IP header len too small")); 444 KASSERT(m->m_len >= hlen, ("packet shorter than IP header length")); 445 446 /* 127/8 must not appear on wire - RFC1122 */ 447 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 448 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 449 if (!(m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK)) { 450 ipstat.ips_badaddr++; 451 goto bad; 452 } 453 } 454 455 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { 456 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 457 } else { 458 if (hlen == sizeof(struct ip)) { 459 sum = in_cksum_hdr(ip); 460 } else { 461 sum = in_cksum(m, hlen); 462 } 463 } 464 if (sum) { 465 ipstat.ips_badsum++; 466 goto bad; 467 } 468 469 /* 470 * Convert fields to host representation. 471 */ 472 ip->ip_len = ntohs(ip->ip_len); 473 if (ip->ip_len < hlen) { 474 ipstat.ips_badlen++; 475 goto bad; 476 } 477 ip->ip_off = ntohs(ip->ip_off); 478 479 /* 480 * Check that the amount of data in the buffers 481 * is as at least much as the IP header would have us expect. 482 * Trim mbufs if longer than we expect. 483 * Drop packet if shorter than we expect. 484 */ 485 if (m->m_pkthdr.len < ip->ip_len) { 486 ipstat.ips_tooshort++; 487 goto bad; 488 } 489 if (m->m_pkthdr.len > ip->ip_len) { 490 if (m->m_len == m->m_pkthdr.len) { 491 m->m_len = ip->ip_len; 492 m->m_pkthdr.len = ip->ip_len; 493 } else 494 m_adj(m, ip->ip_len - m->m_pkthdr.len); 495 } 496 #if defined(IPSEC) && !defined(IPSEC_FILTERGIF) 497 /* 498 * Bypass packet filtering for packets from a tunnel (gif). 499 */ 500 if (ipsec_gethist(m, NULL)) 501 goto pass; 502 #endif 503 504 /* 505 * IpHack's section. 506 * Right now when no processing on packet has done 507 * and it is still fresh out of network we do our black 508 * deals with it. 509 * - Firewall: deny/allow/divert 510 * - Xlate: translate packet's addr/port (NAT). 511 * - Pipe: pass pkt through dummynet. 512 * - Wrap: fake packet's addr/port <unimpl.> 513 * - Encapsulate: put it in another IP and send out. <unimp.> 514 */ 515 516 iphack: 517 518 #ifdef PFIL_HOOKS 519 /* 520 * Run through list of hooks for input packets. 521 * 522 * NB: Beware of the destination address changing (e.g. 523 * by NAT rewriting). When this happens, tell 524 * ip_forward to do the right thing. 525 */ 526 odst = ip->ip_dst; 527 if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN)) 528 return; 529 if (m == NULL) /* consumed by filter */ 530 return; 531 ip = mtod(m, struct ip *); 532 using_srcrt = (odst.s_addr != ip->ip_dst.s_addr); 533 #endif 534 535 if (fw_enable && IPFW_LOADED) { 536 /* 537 * If we've been forwarded from the output side, then 538 * skip the firewall a second time 539 */ 540 if (args.next_hop) 541 goto ours; 542 543 args.m = m; 544 i = ip_fw_chk_ptr(&args); 545 m = args.m; 546 547 if ( (i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */ 548 if (m) 549 m_freem(m); 550 return; 551 } 552 ip = mtod(m, struct ip *); /* just in case m changed */ 553 if (i == 0 && args.next_hop == NULL) /* common case */ 554 goto pass; 555 if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG)) { 556 /* Send packet to the appropriate pipe */ 557 ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args); 558 return; 559 } 560 #ifdef IPDIVERT 561 if (i != 0 && !(i & IP_FW_PORT_DYNT_FLAG)) { 562 /* Divert or tee packet */ 563 divert_info = i; 564 goto ours; 565 } 566 #endif 567 if (i == 0 && args.next_hop != NULL) 568 goto pass; 569 /* 570 * if we get here, the packet must be dropped 571 */ 572 m_freem(m); 573 return; 574 } 575 pass: 576 577 /* 578 * Process options and, if not destined for us, 579 * ship it on. ip_dooptions returns 1 when an 580 * error was detected (causing an icmp message 581 * to be sent and the original packet to be freed). 582 */ 583 ip_nhops = 0; /* for source routed packets */ 584 if (hlen > sizeof(struct ip) && ip_dooptions(m, 0, args.next_hop)) 585 return; 586 587 /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no 588 * matter if it is destined to another node, or whether it is 589 * a multicast one, RSVP wants it! and prevents it from being forwarded 590 * anywhere else. Also checks if the rsvp daemon is running before 591 * grabbing the packet. 592 */ 593 if (rsvp_on && ip->ip_p == IPPROTO_RSVP) 594 goto ours; 595 596 /* 597 * Check our list of addresses, to see if the packet is for us. 598 * If we don't have any addresses, assume any unicast packet 599 * we receive might be for us (and let the upper layers deal 600 * with it). 601 */ 602 if (TAILQ_EMPTY(&in_ifaddrhead) && !(m->m_flags & (M_MCAST | M_BCAST))) 603 goto ours; 604 605 /* 606 * Cache the destination address of the packet; this may be 607 * changed by use of 'ipfw fwd'. 608 */ 609 pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst; 610 611 /* 612 * Enable a consistency check between the destination address 613 * and the arrival interface for a unicast packet (the RFC 1122 614 * strong ES model) if IP forwarding is disabled and the packet 615 * is not locally generated and the packet is not subject to 616 * 'ipfw fwd'. 617 * 618 * XXX - Checking also should be disabled if the destination 619 * address is ipnat'ed to a different interface. 620 * 621 * XXX - Checking is incompatible with IP aliases added 622 * to the loopback interface instead of the interface where 623 * the packets are received. 624 */ 625 checkif = ip_checkinterface && 626 !ipforwarding && 627 m->m_pkthdr.rcvif != NULL && 628 !(m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) && 629 (args.next_hop == NULL); 630 631 /* 632 * Check for exact addresses in the hash bucket. 633 */ 634 LIST_FOREACH(ia, INADDR_HASH(pkt_dst.s_addr), ia_hash) { 635 /* 636 * If the address matches, verify that the packet 637 * arrived via the correct interface if checking is 638 * enabled. 639 */ 640 if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr && 641 (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif)) 642 goto ours; 643 } 644 /* 645 * Check for broadcast addresses. 646 * 647 * Only accept broadcast packets that arrive via the matching 648 * interface. Reception of forwarded directed broadcasts would 649 * be handled via ip_forward() and ether_output() with the loopback 650 * into the stack for SIMPLEX interfaces handled by ether_output(). 651 */ 652 if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { 653 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) { 654 if (ifa->ifa_addr->sa_family != AF_INET) 655 continue; 656 ia = ifatoia(ifa); 657 if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 658 pkt_dst.s_addr) 659 goto ours; 660 if (ia->ia_netbroadcast.s_addr == pkt_dst.s_addr) 661 goto ours; 662 #ifdef BOOTP_COMPAT 663 if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) 664 goto ours; 665 #endif 666 } 667 } 668 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 669 struct in_multi *inm; 670 if (ip_mrouter) { 671 /* 672 * If we are acting as a multicast router, all 673 * incoming multicast packets are passed to the 674 * kernel-level multicast forwarding function. 675 * The packet is returned (relatively) intact; if 676 * ip_mforward() returns a non-zero value, the packet 677 * must be discarded, else it may be accepted below. 678 */ 679 if (ip_mforward && 680 ip_mforward(ip, m->m_pkthdr.rcvif, m, NULL) != 0) { 681 ipstat.ips_cantforward++; 682 m_freem(m); 683 return; 684 } 685 686 /* 687 * The process-level routing daemon needs to receive 688 * all multicast IGMP packets, whether or not this 689 * host belongs to their destination groups. 690 */ 691 if (ip->ip_p == IPPROTO_IGMP) 692 goto ours; 693 ipstat.ips_forward++; 694 } 695 /* 696 * See if we belong to the destination multicast group on the 697 * arrival interface. 698 */ 699 IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm); 700 if (inm == NULL) { 701 ipstat.ips_notmember++; 702 m_freem(m); 703 return; 704 } 705 goto ours; 706 } 707 if (ip->ip_dst.s_addr == INADDR_BROADCAST) 708 goto ours; 709 if (ip->ip_dst.s_addr == INADDR_ANY) 710 goto ours; 711 712 /* 713 * FAITH(Firewall Aided Internet Translator) 714 */ 715 if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) { 716 if (ip_keepfaith) { 717 if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP) 718 goto ours; 719 } 720 m_freem(m); 721 return; 722 } 723 724 /* 725 * Not for us; forward if possible and desirable. 726 */ 727 if (!ipforwarding) { 728 ipstat.ips_cantforward++; 729 m_freem(m); 730 } else { 731 #ifdef IPSEC 732 /* 733 * Enforce inbound IPsec SPD. 734 */ 735 if (ipsec4_in_reject(m, NULL)) { 736 ipsecstat.in_polvio++; 737 goto bad; 738 } 739 #endif 740 #ifdef FAST_IPSEC 741 mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); 742 s = splnet(); 743 if (mtag != NULL) { 744 tdbi = (struct tdb_ident *)(mtag + 1); 745 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); 746 } else { 747 sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, 748 IP_FORWARDING, &error); 749 } 750 if (sp == NULL) { /* NB: can happen if error */ 751 splx(s); 752 /*XXX error stat???*/ 753 DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/ 754 goto bad; 755 } 756 757 /* 758 * Check security policy against packet attributes. 759 */ 760 error = ipsec_in_reject(sp, m); 761 KEY_FREESP(&sp); 762 splx(s); 763 if (error) { 764 ipstat.ips_cantforward++; 765 goto bad; 766 } 767 #endif 768 ip_forward(m, using_srcrt, args.next_hop); 769 } 770 return; 771 772 ours: 773 774 /* 775 * IPSTEALTH: Process non-routing options only 776 * if the packet is destined for us. 777 */ 778 if (ipstealth && 779 hlen > sizeof(struct ip) && 780 ip_dooptions(m, 1, args.next_hop)) 781 return; 782 783 /* Count the packet in the ip address stats */ 784 if (ia != NULL) { 785 ia->ia_ifa.if_ipackets++; 786 ia->ia_ifa.if_ibytes += m->m_pkthdr.len; 787 } 788 789 /* 790 * If offset or IP_MF are set, must reassemble. 791 * Otherwise, nothing need be done. 792 * (We could look in the reassembly queue to see 793 * if the packet was previously fragmented, 794 * but it's not worth the time; just let them time out.) 795 */ 796 if (ip->ip_off & (IP_MF | IP_OFFMASK)) { 797 798 /* If maxnipq is 0, never accept fragments. */ 799 if (maxnipq == 0) { 800 ipstat.ips_fragments++; 801 ipstat.ips_fragdropped++; 802 goto bad; 803 } 804 805 sum = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id); 806 /* 807 * Look for queue of fragments 808 * of this datagram. 809 */ 810 for (fp = ipq[sum].next; fp != &ipq[sum]; fp = fp->next) 811 if (ip->ip_id == fp->ipq_id && 812 ip->ip_src.s_addr == fp->ipq_src.s_addr && 813 ip->ip_dst.s_addr == fp->ipq_dst.s_addr && 814 ip->ip_p == fp->ipq_p) 815 goto found; 816 817 fp = NULL; 818 819 /* 820 * Enforce upper bound on number of fragmented packets 821 * for which we attempt reassembly; 822 * If maxnipq is -1, accept all fragments without limitation. 823 */ 824 if ((nipq > maxnipq) && (maxnipq > 0)) { 825 /* 826 * drop something from the tail of the current queue 827 * before proceeding further 828 */ 829 if (ipq[sum].prev == &ipq[sum]) { /* gak */ 830 for (i = 0; i < IPREASS_NHASH; i++) { 831 if (ipq[i].prev != &ipq[i]) { 832 ipstat.ips_fragtimeout += 833 ipq[i].prev->ipq_nfrags; 834 ip_freef(ipq[i].prev); 835 break; 836 } 837 } 838 } else { 839 ipstat.ips_fragtimeout += 840 ipq[sum].prev->ipq_nfrags; 841 ip_freef(ipq[sum].prev); 842 } 843 } 844 found: 845 /* 846 * Adjust ip_len to not reflect header, 847 * convert offset of this to bytes. 848 */ 849 ip->ip_len -= hlen; 850 if (ip->ip_off & IP_MF) { 851 /* 852 * Make sure that fragments have a data length 853 * that's a non-zero multiple of 8 bytes. 854 */ 855 if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) { 856 ipstat.ips_toosmall++; /* XXX */ 857 goto bad; 858 } 859 m->m_flags |= M_FRAG; 860 } else 861 m->m_flags &= ~M_FRAG; 862 ip->ip_off <<= 3; 863 864 /* 865 * Attempt reassembly; if it succeeds, proceed. 866 * ip_reass() will return a different mbuf, and update 867 * the divert info in divert_info and args.divert_rule. 868 */ 869 ipstat.ips_fragments++; 870 m->m_pkthdr.header = ip; 871 m = ip_reass(m, fp, &ipq[sum], &divert_info, &args.divert_rule); 872 if (m == NULL) 873 return; 874 ipstat.ips_reassembled++; 875 needredispatch = TRUE; 876 ip = mtod(m, struct ip *); 877 /* Get the header length of the reassembled packet */ 878 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 879 #ifdef IPDIVERT 880 /* Restore original checksum before diverting packet */ 881 if (divert_info != 0) { 882 ip->ip_len += hlen; 883 ip->ip_len = htons(ip->ip_len); 884 ip->ip_off = htons(ip->ip_off); 885 ip->ip_sum = 0; 886 if (hlen == sizeof(struct ip)) 887 ip->ip_sum = in_cksum_hdr(ip); 888 else 889 ip->ip_sum = in_cksum(m, hlen); 890 ip->ip_off = ntohs(ip->ip_off); 891 ip->ip_len = ntohs(ip->ip_len); 892 ip->ip_len -= hlen; 893 } 894 #endif 895 } else { 896 ip->ip_len -= hlen; 897 } 898 899 #ifdef IPDIVERT 900 /* 901 * Divert or tee packet to the divert protocol if required. 902 */ 903 if (divert_info != 0) { 904 struct mbuf *clone = NULL; 905 906 /* Clone packet if we're doing a 'tee' */ 907 if ((divert_info & IP_FW_PORT_TEE_FLAG) != 0) 908 clone = m_dup(m, M_DONTWAIT); 909 910 /* Restore packet header fields to original values */ 911 ip->ip_len += hlen; 912 ip->ip_len = htons(ip->ip_len); 913 ip->ip_off = htons(ip->ip_off); 914 915 /* Deliver packet to divert input routine */ 916 divert_packet(m, 1, divert_info & 0xffff, args.divert_rule); 917 ipstat.ips_delivered++; 918 919 /* If 'tee', continue with original packet */ 920 if (clone == NULL) 921 return; 922 m = clone; 923 ip = mtod(m, struct ip *); 924 ip->ip_len += hlen; 925 /* 926 * Jump backwards to complete processing of the 927 * packet. But first clear divert_info to avoid 928 * entering this block again. 929 * We do not need to clear args.divert_rule 930 * or args.next_hop as they will not be used. 931 */ 932 divert_info = 0; 933 goto pass; 934 } 935 #endif 936 937 #ifdef IPSEC 938 /* 939 * enforce IPsec policy checking if we are seeing last header. 940 * note that we do not visit this with protocols with pcb layer 941 * code - like udp/tcp/raw ip. 942 */ 943 if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) && 944 ipsec4_in_reject(m, NULL)) { 945 ipsecstat.in_polvio++; 946 goto bad; 947 } 948 #endif 949 #if FAST_IPSEC 950 /* 951 * enforce IPsec policy checking if we are seeing last header. 952 * note that we do not visit this with protocols with pcb layer 953 * code - like udp/tcp/raw ip. 954 */ 955 if (inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) { 956 /* 957 * Check if the packet has already had IPsec processing 958 * done. If so, then just pass it along. This tag gets 959 * set during AH, ESP, etc. input handling, before the 960 * packet is returned to the ip input queue for delivery. 961 */ 962 mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); 963 s = splnet(); 964 if (mtag != NULL) { 965 tdbi = (struct tdb_ident *)(mtag + 1); 966 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); 967 } else { 968 sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, 969 IP_FORWARDING, &error); 970 } 971 if (sp != NULL) { 972 /* 973 * Check security policy against packet attributes. 974 */ 975 error = ipsec_in_reject(sp, m); 976 KEY_FREESP(&sp); 977 } else { 978 /* XXX error stat??? */ 979 error = EINVAL; 980 DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/ 981 goto bad; 982 } 983 splx(s); 984 if (error) 985 goto bad; 986 } 987 #endif /* FAST_IPSEC */ 988 989 ipstat.ips_delivered++; 990 if (needredispatch) { 991 struct netmsg_transport_packet *msg; 992 lwkt_port_t port; 993 994 msg = malloc(sizeof(struct netmsg_transport_packet), 995 M_LWKTMSG, M_NOWAIT); 996 if (!msg) 997 goto bad; 998 lwkt_initmsg_rp(&msg->nm_lmsg, &netisr_afree_rport, 999 CMD_NETMSG_ONCPU); 1000 msg->nm_handler = transport_processing_handler; 1001 msg->nm_mbuf = m; 1002 msg->nm_hlen = hlen; 1003 msg->nm_hasnexthop = (args.next_hop != NULL); 1004 if (msg->nm_hasnexthop) 1005 msg->nm_nexthop = *args.next_hop; /* structure copy */ 1006 1007 ip->ip_off = htons(ip->ip_off); 1008 ip->ip_len = htons(ip->ip_len); 1009 port = ip_mport(m); 1010 ip->ip_len = ntohs(ip->ip_len); 1011 ip->ip_off = ntohs(ip->ip_off); 1012 1013 lwkt_sendmsg(port, &msg->nm_lmsg); 1014 } else { 1015 transport_processing_oncpu(m, hlen, ip, args.next_hop); 1016 } 1017 return; 1018 1019 bad: 1020 m_freem(m); 1021 } 1022 1023 /* 1024 * Take incoming datagram fragment and try to reassemble it into 1025 * whole datagram. If a chain for reassembly of this datagram already 1026 * exists, then it is given as fp; otherwise have to make a chain. 1027 * 1028 * When IPDIVERT enabled, keep additional state with each packet that 1029 * tells us if we need to divert or tee the packet we're building. 1030 * In particular, *divinfo includes the port and TEE flag, 1031 * *divert_rule is the number of the matching rule. 1032 */ 1033 1034 static struct mbuf * 1035 ip_reass(struct mbuf *m, struct ipq *fp, struct ipq *where, 1036 u_int32_t *divinfo, u_int16_t *divert_rule) 1037 { 1038 struct ip *ip = mtod(m, struct ip *); 1039 struct mbuf *p = NULL, *q, *nq; 1040 struct mbuf *t; 1041 int hlen = IP_VHL_HL(ip->ip_vhl) << 2; 1042 int i, next; 1043 1044 /* 1045 * Presence of header sizes in mbufs 1046 * would confuse code below. 1047 */ 1048 m->m_data += hlen; 1049 m->m_len -= hlen; 1050 1051 /* 1052 * If first fragment to arrive, create a reassembly queue. 1053 */ 1054 if (fp == NULL) { 1055 if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL) 1056 goto dropfrag; 1057 fp = mtod(t, struct ipq *); 1058 insque(fp, where); 1059 nipq++; 1060 fp->ipq_nfrags = 1; 1061 fp->ipq_ttl = IPFRAGTTL; 1062 fp->ipq_p = ip->ip_p; 1063 fp->ipq_id = ip->ip_id; 1064 fp->ipq_src = ip->ip_src; 1065 fp->ipq_dst = ip->ip_dst; 1066 fp->ipq_frags = m; 1067 m->m_nextpkt = NULL; 1068 #ifdef IPDIVERT 1069 fp->ipq_div_info = 0; 1070 fp->ipq_div_cookie = 0; 1071 #endif 1072 goto inserted; 1073 } else { 1074 fp->ipq_nfrags++; 1075 } 1076 1077 #define GETIP(m) ((struct ip*)((m)->m_pkthdr.header)) 1078 1079 /* 1080 * Find a segment which begins after this one does. 1081 */ 1082 for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) 1083 if (GETIP(q)->ip_off > ip->ip_off) 1084 break; 1085 1086 /* 1087 * If there is a preceding segment, it may provide some of 1088 * our data already. If so, drop the data from the incoming 1089 * segment. If it provides all of our data, drop us, otherwise 1090 * stick new segment in the proper place. 1091 * 1092 * If some of the data is dropped from the the preceding 1093 * segment, then it's checksum is invalidated. 1094 */ 1095 if (p) { 1096 i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off; 1097 if (i > 0) { 1098 if (i >= ip->ip_len) 1099 goto dropfrag; 1100 m_adj(m, i); 1101 m->m_pkthdr.csum_flags = 0; 1102 ip->ip_off += i; 1103 ip->ip_len -= i; 1104 } 1105 m->m_nextpkt = p->m_nextpkt; 1106 p->m_nextpkt = m; 1107 } else { 1108 m->m_nextpkt = fp->ipq_frags; 1109 fp->ipq_frags = m; 1110 } 1111 1112 /* 1113 * While we overlap succeeding segments trim them or, 1114 * if they are completely covered, dequeue them. 1115 */ 1116 for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off; 1117 q = nq) { 1118 i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off; 1119 if (i < GETIP(q)->ip_len) { 1120 GETIP(q)->ip_len -= i; 1121 GETIP(q)->ip_off += i; 1122 m_adj(q, i); 1123 q->m_pkthdr.csum_flags = 0; 1124 break; 1125 } 1126 nq = q->m_nextpkt; 1127 m->m_nextpkt = nq; 1128 ipstat.ips_fragdropped++; 1129 fp->ipq_nfrags--; 1130 m_freem(q); 1131 } 1132 1133 inserted: 1134 1135 #ifdef IPDIVERT 1136 /* 1137 * Transfer firewall instructions to the fragment structure. 1138 * Only trust info in the fragment at offset 0. 1139 */ 1140 if (ip->ip_off == 0) { 1141 fp->ipq_div_info = *divinfo; 1142 fp->ipq_div_cookie = *divert_rule; 1143 } 1144 *divinfo = 0; 1145 *divert_rule = 0; 1146 #endif 1147 1148 /* 1149 * Check for complete reassembly and perform frag per packet 1150 * limiting. 1151 * 1152 * Frag limiting is performed here so that the nth frag has 1153 * a chance to complete the packet before we drop the packet. 1154 * As a result, n+1 frags are actually allowed per packet, but 1155 * only n will ever be stored. (n = maxfragsperpacket.) 1156 * 1157 */ 1158 next = 0; 1159 for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) { 1160 if (GETIP(q)->ip_off != next) { 1161 if (fp->ipq_nfrags > maxfragsperpacket) { 1162 ipstat.ips_fragdropped += fp->ipq_nfrags; 1163 ip_freef(fp); 1164 } 1165 return (NULL); 1166 } 1167 next += GETIP(q)->ip_len; 1168 } 1169 /* Make sure the last packet didn't have the IP_MF flag */ 1170 if (p->m_flags & M_FRAG) { 1171 if (fp->ipq_nfrags > maxfragsperpacket) { 1172 ipstat.ips_fragdropped += fp->ipq_nfrags; 1173 ip_freef(fp); 1174 } 1175 return (NULL); 1176 } 1177 1178 /* 1179 * Reassembly is complete. Make sure the packet is a sane size. 1180 */ 1181 q = fp->ipq_frags; 1182 ip = GETIP(q); 1183 if (next + (IP_VHL_HL(ip->ip_vhl) << 2) > IP_MAXPACKET) { 1184 ipstat.ips_toolong++; 1185 ipstat.ips_fragdropped += fp->ipq_nfrags; 1186 ip_freef(fp); 1187 return (NULL); 1188 } 1189 1190 /* 1191 * Concatenate fragments. 1192 */ 1193 m = q; 1194 t = m->m_next; 1195 m->m_next = NULL; 1196 m_cat(m, t); 1197 nq = q->m_nextpkt; 1198 q->m_nextpkt = NULL; 1199 for (q = nq; q != NULL; q = nq) { 1200 nq = q->m_nextpkt; 1201 q->m_nextpkt = NULL; 1202 m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags; 1203 m->m_pkthdr.csum_data += q->m_pkthdr.csum_data; 1204 m_cat(m, q); 1205 } 1206 1207 #ifdef IPDIVERT 1208 /* 1209 * Extract firewall instructions from the fragment structure. 1210 */ 1211 *divinfo = fp->ipq_div_info; 1212 *divert_rule = fp->ipq_div_cookie; 1213 #endif 1214 1215 /* 1216 * Create header for new ip packet by 1217 * modifying header of first packet; 1218 * dequeue and discard fragment reassembly header. 1219 * Make header visible. 1220 */ 1221 ip->ip_len = next; 1222 ip->ip_src = fp->ipq_src; 1223 ip->ip_dst = fp->ipq_dst; 1224 remque(fp); 1225 nipq--; 1226 (void) m_free(dtom(fp)); 1227 m->m_len += (IP_VHL_HL(ip->ip_vhl) << 2); 1228 m->m_data -= (IP_VHL_HL(ip->ip_vhl) << 2); 1229 /* some debugging cruft by sklower, below, will go away soon */ 1230 if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */ 1231 int plen = 0; 1232 1233 for (t = m; t; t = t->m_next) 1234 plen += t->m_len; 1235 m->m_pkthdr.len = plen; 1236 } 1237 return (m); 1238 1239 dropfrag: 1240 #ifdef IPDIVERT 1241 *divinfo = 0; 1242 *divert_rule = 0; 1243 #endif 1244 ipstat.ips_fragdropped++; 1245 if (fp != NULL) 1246 fp->ipq_nfrags--; 1247 m_freem(m); 1248 return (NULL); 1249 1250 #undef GETIP 1251 } 1252 1253 /* 1254 * Free a fragment reassembly header and all 1255 * associated datagrams. 1256 */ 1257 static void 1258 ip_freef(struct ipq *fp) 1259 { 1260 struct mbuf *q; 1261 1262 while (fp->ipq_frags) { 1263 q = fp->ipq_frags; 1264 fp->ipq_frags = q->m_nextpkt; 1265 m_freem(q); 1266 } 1267 remque(fp); 1268 (void) m_free(dtom(fp)); 1269 nipq--; 1270 } 1271 1272 /* 1273 * IP timer processing; 1274 * if a timer expires on a reassembly 1275 * queue, discard it. 1276 */ 1277 void 1278 ip_slowtimo() 1279 { 1280 struct ipq *fp; 1281 int s = splnet(); 1282 int i; 1283 1284 for (i = 0; i < IPREASS_NHASH; i++) { 1285 fp = ipq[i].next; 1286 if (fp == NULL) 1287 continue; 1288 while (fp != &ipq[i]) { 1289 --fp->ipq_ttl; 1290 fp = fp->next; 1291 if (fp->prev->ipq_ttl == 0) { 1292 ipstat.ips_fragtimeout += fp->prev->ipq_nfrags; 1293 ip_freef(fp->prev); 1294 } 1295 } 1296 } 1297 /* 1298 * If we are over the maximum number of fragments 1299 * (due to the limit being lowered), drain off 1300 * enough to get down to the new limit. 1301 */ 1302 if (maxnipq >= 0 && nipq > maxnipq) { 1303 for (i = 0; i < IPREASS_NHASH; i++) { 1304 while (nipq > maxnipq && 1305 (ipq[i].next != &ipq[i])) { 1306 ipstat.ips_fragdropped += 1307 ipq[i].next->ipq_nfrags; 1308 ip_freef(ipq[i].next); 1309 } 1310 } 1311 } 1312 ipflow_slowtimo(); 1313 splx(s); 1314 } 1315 1316 /* 1317 * Drain off all datagram fragments. 1318 */ 1319 void 1320 ip_drain() 1321 { 1322 int i; 1323 1324 for (i = 0; i < IPREASS_NHASH; i++) { 1325 while (ipq[i].next != &ipq[i]) { 1326 ipstat.ips_fragdropped += ipq[i].next->ipq_nfrags; 1327 ip_freef(ipq[i].next); 1328 } 1329 } 1330 in_rtqdrain(); 1331 } 1332 1333 /* 1334 * Do option processing on a datagram, 1335 * possibly discarding it if bad options are encountered, 1336 * or forwarding it if source-routed. 1337 * The pass argument is used when operating in the IPSTEALTH 1338 * mode to tell what options to process: 1339 * [LS]SRR (pass 0) or the others (pass 1). 1340 * The reason for as many as two passes is that when doing IPSTEALTH, 1341 * non-routing options should be processed only if the packet is for us. 1342 * Returns 1 if packet has been forwarded/freed, 1343 * 0 if the packet should be processed further. 1344 */ 1345 static int 1346 ip_dooptions(struct mbuf *m, int pass, struct sockaddr_in *next_hop) 1347 { 1348 struct ip *ip = mtod(m, struct ip *); 1349 u_char *cp; 1350 struct in_ifaddr *ia; 1351 int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB; 1352 boolean_t forward = FALSE; 1353 struct in_addr *sin, dst; 1354 n_time ntime; 1355 1356 dst = ip->ip_dst; 1357 cp = (u_char *)(ip + 1); 1358 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof(struct ip); 1359 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1360 opt = cp[IPOPT_OPTVAL]; 1361 if (opt == IPOPT_EOL) 1362 break; 1363 if (opt == IPOPT_NOP) 1364 optlen = 1; 1365 else { 1366 if (cnt < IPOPT_OLEN + sizeof(*cp)) { 1367 code = &cp[IPOPT_OLEN] - (u_char *)ip; 1368 goto bad; 1369 } 1370 optlen = cp[IPOPT_OLEN]; 1371 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) { 1372 code = &cp[IPOPT_OLEN] - (u_char *)ip; 1373 goto bad; 1374 } 1375 } 1376 switch (opt) { 1377 1378 default: 1379 break; 1380 1381 /* 1382 * Source routing with record. 1383 * Find interface with current destination address. 1384 * If none on this machine then drop if strictly routed, 1385 * or do nothing if loosely routed. 1386 * Record interface address and bring up next address 1387 * component. If strictly routed make sure next 1388 * address is on directly accessible net. 1389 */ 1390 case IPOPT_LSRR: 1391 case IPOPT_SSRR: 1392 if (ipstealth && pass > 0) 1393 break; 1394 if (optlen < IPOPT_OFFSET + sizeof(*cp)) { 1395 code = &cp[IPOPT_OLEN] - (u_char *)ip; 1396 goto bad; 1397 } 1398 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { 1399 code = &cp[IPOPT_OFFSET] - (u_char *)ip; 1400 goto bad; 1401 } 1402 ipaddr.sin_addr = ip->ip_dst; 1403 ia = (struct in_ifaddr *) 1404 ifa_ifwithaddr((struct sockaddr *)&ipaddr); 1405 if (ia == NULL) { 1406 if (opt == IPOPT_SSRR) { 1407 type = ICMP_UNREACH; 1408 code = ICMP_UNREACH_SRCFAIL; 1409 goto bad; 1410 } 1411 if (!ip_dosourceroute) 1412 goto nosourcerouting; 1413 /* 1414 * Loose routing, and not at next destination 1415 * yet; nothing to do except forward. 1416 */ 1417 break; 1418 } 1419 off--; /* 0 origin */ 1420 if (off > optlen - (int)sizeof(struct in_addr)) { 1421 /* 1422 * End of source route. Should be for us. 1423 */ 1424 if (!ip_acceptsourceroute) 1425 goto nosourcerouting; 1426 save_rte(cp, ip->ip_src); 1427 break; 1428 } 1429 if (ipstealth) 1430 goto dropit; 1431 if (!ip_dosourceroute) { 1432 if (ipforwarding) { 1433 char buf[16]; /* aaa.bbb.ccc.ddd\0 */ 1434 /* 1435 * Acting as a router, so generate ICMP 1436 */ 1437 nosourcerouting: 1438 strcpy(buf, inet_ntoa(ip->ip_dst)); 1439 log(LOG_WARNING, 1440 "attempted source route from %s to %s\n", 1441 inet_ntoa(ip->ip_src), buf); 1442 type = ICMP_UNREACH; 1443 code = ICMP_UNREACH_SRCFAIL; 1444 goto bad; 1445 } else { 1446 /* 1447 * Not acting as a router, 1448 * so silently drop. 1449 */ 1450 dropit: 1451 ipstat.ips_cantforward++; 1452 m_freem(m); 1453 return (1); 1454 } 1455 } 1456 1457 /* 1458 * locate outgoing interface 1459 */ 1460 (void)memcpy(&ipaddr.sin_addr, cp + off, 1461 sizeof(ipaddr.sin_addr)); 1462 1463 if (opt == IPOPT_SSRR) { 1464 #define INA struct in_ifaddr * 1465 #define SA struct sockaddr * 1466 if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) 1467 == NULL) 1468 ia = (INA)ifa_ifwithnet((SA)&ipaddr); 1469 } else 1470 ia = ip_rtaddr(ipaddr.sin_addr, &ipforward_rt); 1471 if (ia == NULL) { 1472 type = ICMP_UNREACH; 1473 code = ICMP_UNREACH_SRCFAIL; 1474 goto bad; 1475 } 1476 ip->ip_dst = ipaddr.sin_addr; 1477 (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr), 1478 sizeof(struct in_addr)); 1479 cp[IPOPT_OFFSET] += sizeof(struct in_addr); 1480 /* 1481 * Let ip_intr's mcast routing check handle mcast pkts 1482 */ 1483 forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr)); 1484 break; 1485 1486 case IPOPT_RR: 1487 if (ipstealth && pass == 0) 1488 break; 1489 if (optlen < IPOPT_OFFSET + sizeof(*cp)) { 1490 code = &cp[IPOPT_OFFSET] - (u_char *)ip; 1491 goto bad; 1492 } 1493 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { 1494 code = &cp[IPOPT_OFFSET] - (u_char *)ip; 1495 goto bad; 1496 } 1497 /* 1498 * If no space remains, ignore. 1499 */ 1500 off--; /* 0 origin */ 1501 if (off > optlen - (int)sizeof(struct in_addr)) 1502 break; 1503 (void)memcpy(&ipaddr.sin_addr, &ip->ip_dst, 1504 sizeof(ipaddr.sin_addr)); 1505 /* 1506 * locate outgoing interface; if we're the destination, 1507 * use the incoming interface (should be same). 1508 */ 1509 if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == NULL && 1510 (ia = ip_rtaddr(ipaddr.sin_addr, &ipforward_rt)) 1511 == NULL) { 1512 type = ICMP_UNREACH; 1513 code = ICMP_UNREACH_HOST; 1514 goto bad; 1515 } 1516 (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr), 1517 sizeof(struct in_addr)); 1518 cp[IPOPT_OFFSET] += sizeof(struct in_addr); 1519 break; 1520 1521 case IPOPT_TS: 1522 if (ipstealth && pass == 0) 1523 break; 1524 code = cp - (u_char *)ip; 1525 if (optlen < 4 || optlen > 40) { 1526 code = &cp[IPOPT_OLEN] - (u_char *)ip; 1527 goto bad; 1528 } 1529 if ((off = cp[IPOPT_OFFSET]) < 5) { 1530 code = &cp[IPOPT_OLEN] - (u_char *)ip; 1531 goto bad; 1532 } 1533 if (off > optlen - (int)sizeof(int32_t)) { 1534 cp[IPOPT_OFFSET + 1] += (1 << 4); 1535 if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) { 1536 code = &cp[IPOPT_OFFSET] - (u_char *)ip; 1537 goto bad; 1538 } 1539 break; 1540 } 1541 off--; /* 0 origin */ 1542 sin = (struct in_addr *)(cp + off); 1543 switch (cp[IPOPT_OFFSET + 1] & 0x0f) { 1544 1545 case IPOPT_TS_TSONLY: 1546 break; 1547 1548 case IPOPT_TS_TSANDADDR: 1549 if (off + sizeof(n_time) + 1550 sizeof(struct in_addr) > optlen) { 1551 code = &cp[IPOPT_OFFSET] - (u_char *)ip; 1552 goto bad; 1553 } 1554 ipaddr.sin_addr = dst; 1555 ia = (INA)ifaof_ifpforaddr((SA)&ipaddr, 1556 m->m_pkthdr.rcvif); 1557 if (ia == NULL) 1558 continue; 1559 (void)memcpy(sin, &IA_SIN(ia)->sin_addr, 1560 sizeof(struct in_addr)); 1561 cp[IPOPT_OFFSET] += sizeof(struct in_addr); 1562 off += sizeof(struct in_addr); 1563 break; 1564 1565 case IPOPT_TS_PRESPEC: 1566 if (off + sizeof(n_time) + 1567 sizeof(struct in_addr) > optlen) { 1568 code = &cp[IPOPT_OFFSET] - (u_char *)ip; 1569 goto bad; 1570 } 1571 (void)memcpy(&ipaddr.sin_addr, sin, 1572 sizeof(struct in_addr)); 1573 if (ifa_ifwithaddr((SA)&ipaddr) == NULL) 1574 continue; 1575 cp[IPOPT_OFFSET] += sizeof(struct in_addr); 1576 off += sizeof(struct in_addr); 1577 break; 1578 1579 default: 1580 code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip; 1581 goto bad; 1582 } 1583 ntime = iptime(); 1584 (void)memcpy(cp + off, &ntime, sizeof(n_time)); 1585 cp[IPOPT_OFFSET] += sizeof(n_time); 1586 } 1587 } 1588 if (forward && ipforwarding) { 1589 ip_forward(m, 1, next_hop); 1590 return (1); 1591 } 1592 return (0); 1593 bad: 1594 icmp_error(m, type, code, 0, NULL); 1595 ipstat.ips_badoptions++; 1596 return (1); 1597 } 1598 1599 /* 1600 * Given address of next destination (final or next hop), 1601 * return internet address info of interface to be used to get there. 1602 */ 1603 struct in_ifaddr * 1604 ip_rtaddr(struct in_addr dst, struct route *rt) 1605 { 1606 struct sockaddr_in *sin; 1607 1608 sin = (struct sockaddr_in *)&rt->ro_dst; 1609 1610 if (rt->ro_rt == NULL || dst.s_addr != sin->sin_addr.s_addr) { 1611 if (rt->ro_rt) { 1612 RTFREE(rt->ro_rt); 1613 rt->ro_rt = NULL; 1614 } 1615 sin->sin_family = AF_INET; 1616 sin->sin_len = sizeof(*sin); 1617 sin->sin_addr = dst; 1618 rtalloc_ign(rt, RTF_PRCLONING); 1619 } 1620 1621 if (rt->ro_rt == NULL) 1622 return (NULL); 1623 1624 return (ifatoia(rt->ro_rt->rt_ifa)); 1625 } 1626 1627 /* 1628 * Save incoming source route for use in replies, 1629 * to be picked up later by ip_srcroute if the receiver is interested. 1630 */ 1631 void 1632 save_rte(u_char *option, struct in_addr dst) 1633 { 1634 unsigned olen; 1635 1636 olen = option[IPOPT_OLEN]; 1637 #ifdef DIAGNOSTIC 1638 if (ipprintfs) 1639 printf("save_rte: olen %d\n", olen); 1640 #endif 1641 if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst))) 1642 return; 1643 bcopy(option, ip_srcrt.srcopt, olen); 1644 ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr); 1645 ip_srcrt.dst = dst; 1646 } 1647 1648 /* 1649 * Retrieve incoming source route for use in replies, 1650 * in the same form used by setsockopt. 1651 * The first hop is placed before the options, will be removed later. 1652 */ 1653 struct mbuf * 1654 ip_srcroute() 1655 { 1656 struct in_addr *p, *q; 1657 struct mbuf *m; 1658 1659 if (ip_nhops == 0) 1660 return (NULL); 1661 m = m_get(M_DONTWAIT, MT_HEADER); 1662 if (m == NULL) 1663 return (NULL); 1664 1665 #define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt)) 1666 1667 /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */ 1668 m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) + 1669 OPTSIZ; 1670 #ifdef DIAGNOSTIC 1671 if (ipprintfs) 1672 printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len); 1673 #endif 1674 1675 /* 1676 * First save first hop for return route 1677 */ 1678 p = &ip_srcrt.route[ip_nhops - 1]; 1679 *(mtod(m, struct in_addr *)) = *p--; 1680 #ifdef DIAGNOSTIC 1681 if (ipprintfs) 1682 printf(" hops %lx", ntohl(mtod(m, struct in_addr *)->s_addr)); 1683 #endif 1684 1685 /* 1686 * Copy option fields and padding (nop) to mbuf. 1687 */ 1688 ip_srcrt.nop = IPOPT_NOP; 1689 ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF; 1690 (void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr), &ip_srcrt.nop, 1691 OPTSIZ); 1692 q = (struct in_addr *)(mtod(m, caddr_t) + 1693 sizeof(struct in_addr) + OPTSIZ); 1694 #undef OPTSIZ 1695 /* 1696 * Record return path as an IP source route, 1697 * reversing the path (pointers are now aligned). 1698 */ 1699 while (p >= ip_srcrt.route) { 1700 #ifdef DIAGNOSTIC 1701 if (ipprintfs) 1702 printf(" %lx", ntohl(q->s_addr)); 1703 #endif 1704 *q++ = *p--; 1705 } 1706 /* 1707 * Last hop goes to final destination. 1708 */ 1709 *q = ip_srcrt.dst; 1710 #ifdef DIAGNOSTIC 1711 if (ipprintfs) 1712 printf(" %lx\n", ntohl(q->s_addr)); 1713 #endif 1714 return (m); 1715 } 1716 1717 /* 1718 * Strip out IP options. 1719 */ 1720 void 1721 ip_stripoptions(struct mbuf *m) 1722 { 1723 int datalen; 1724 struct ip *ip = mtod(m, struct ip *); 1725 caddr_t opts; 1726 int optlen; 1727 1728 optlen = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof(struct ip); 1729 opts = (caddr_t)(ip + 1); 1730 datalen = m->m_len - (sizeof(struct ip) + optlen); 1731 bcopy(opts + optlen, opts, datalen); 1732 m->m_len -= optlen; 1733 if (m->m_flags & M_PKTHDR) 1734 m->m_pkthdr.len -= optlen; 1735 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, sizeof(struct ip) >> 2); 1736 } 1737 1738 u_char inetctlerrmap[PRC_NCMDS] = { 1739 0, 0, 0, 0, 1740 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 1741 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 1742 EMSGSIZE, EHOSTUNREACH, 0, 0, 1743 0, 0, 0, 0, 1744 ENOPROTOOPT, ECONNREFUSED 1745 }; 1746 1747 /* 1748 * Forward a packet. If some error occurs return the sender 1749 * an icmp packet. Note we can't always generate a meaningful 1750 * icmp message because icmp doesn't have a large enough repertoire 1751 * of codes and types. 1752 * 1753 * If not forwarding, just drop the packet. This could be confusing 1754 * if ipforwarding was zero but some routing protocol was advancing 1755 * us as a gateway to somewhere. However, we must let the routing 1756 * protocol deal with that. 1757 * 1758 * The using_srcrt parameter indicates whether the packet is being forwarded 1759 * via a source route. 1760 */ 1761 static void 1762 ip_forward(struct mbuf *m, int using_srcrt, struct sockaddr_in *next_hop) 1763 { 1764 struct ip *ip = mtod(m, struct ip *); 1765 struct sockaddr_in *sin; 1766 struct rtentry *rt; 1767 int error, type = 0, code = 0; 1768 struct mbuf *mcopy; 1769 n_long dest; 1770 struct in_addr pkt_dst; 1771 struct ifnet *destifp; 1772 struct m_hdr tag; 1773 #if defined(IPSEC) || defined(FAST_IPSEC) 1774 struct ifnet dummyifp; 1775 #endif 1776 1777 dest = 0; 1778 /* 1779 * Cache the destination address of the packet; this may be 1780 * changed by use of 'ipfw fwd'. 1781 */ 1782 pkt_dst = next_hop ? next_hop->sin_addr : ip->ip_dst; 1783 1784 #ifdef DIAGNOSTIC 1785 if (ipprintfs) 1786 printf("forward: src %lx dst %lx ttl %x\n", 1787 ip->ip_src.s_addr, pkt_dst.s_addr, ip->ip_ttl); 1788 #endif 1789 1790 if (m->m_flags & (M_BCAST | M_MCAST) || !in_canforward(pkt_dst)) { 1791 ipstat.ips_cantforward++; 1792 m_freem(m); 1793 return; 1794 } 1795 if (!ipstealth && ip->ip_ttl <= IPTTLDEC) { 1796 icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, NULL); 1797 return; 1798 } 1799 1800 sin = (struct sockaddr_in *)&ipforward_rt.ro_dst; 1801 if ((rt = ipforward_rt.ro_rt) == NULL || 1802 pkt_dst.s_addr != sin->sin_addr.s_addr) { 1803 if (ipforward_rt.ro_rt) { 1804 RTFREE(ipforward_rt.ro_rt); 1805 ipforward_rt.ro_rt = NULL; 1806 } 1807 sin->sin_family = AF_INET; 1808 sin->sin_len = sizeof(*sin); 1809 sin->sin_addr = pkt_dst; 1810 1811 rtalloc_ign(&ipforward_rt, RTF_PRCLONING); 1812 if (ipforward_rt.ro_rt == NULL) { 1813 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 1814 NULL); 1815 return; 1816 } 1817 rt = ipforward_rt.ro_rt; 1818 } 1819 1820 /* 1821 * Save the IP header and at most 8 bytes of the payload, 1822 * in case we need to generate an ICMP message to the src. 1823 * 1824 * XXX this can be optimized a lot by saving the data in a local 1825 * buffer on the stack (72 bytes at most), and only allocating the 1826 * mbuf if really necessary. The vast majority of the packets 1827 * are forwarded without having to send an ICMP back (either 1828 * because unnecessary, or because rate limited), so we are 1829 * really we are wasting a lot of work here. 1830 * 1831 * We don't use m_copy() because it might return a reference 1832 * to a shared cluster. Both this function and ip_output() 1833 * assume exclusive access to the IP header in `m', so any 1834 * data in a cluster may change before we reach icmp_error(). 1835 */ 1836 MGET(mcopy, M_DONTWAIT, m->m_type); 1837 if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) { 1838 /* 1839 * It's probably ok if the pkthdr dup fails (because 1840 * the deep copy of the tag chain failed), but for now 1841 * be conservative and just discard the copy since 1842 * code below may some day want the tags. 1843 */ 1844 m_free(mcopy); 1845 mcopy = NULL; 1846 } 1847 if (mcopy != NULL) { 1848 mcopy->m_len = imin((IP_VHL_HL(ip->ip_vhl) << 2) + 8, 1849 (int)ip->ip_len); 1850 m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); 1851 } 1852 1853 if (!ipstealth) 1854 ip->ip_ttl -= IPTTLDEC; 1855 1856 /* 1857 * If forwarding packet using same interface that it came in on, 1858 * perhaps should send a redirect to sender to shortcut a hop. 1859 * Only send redirect if source is sending directly to us, 1860 * and if packet was not source routed (or has any options). 1861 * Also, don't send redirect if forwarding using a default route 1862 * or a route modified by a redirect. 1863 */ 1864 if (rt->rt_ifp == m->m_pkthdr.rcvif && 1865 !(rt->rt_flags & (RTF_DYNAMIC | RTF_MODIFIED)) && 1866 satosin(rt_key(rt))->sin_addr.s_addr != INADDR_ANY && 1867 ipsendredirects && !using_srcrt && next_hop != NULL) { 1868 u_long src = ntohl(ip->ip_src.s_addr); 1869 1870 #define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) 1871 if (RTA(rt) != NULL && 1872 (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { 1873 if (rt->rt_flags & RTF_GATEWAY) 1874 dest = satosin(rt->rt_gateway)->sin_addr.s_addr; 1875 else 1876 dest = pkt_dst.s_addr; 1877 /* 1878 * Router requirements says to only send 1879 * host redirects. 1880 */ 1881 type = ICMP_REDIRECT; 1882 code = ICMP_REDIRECT_HOST; 1883 #ifdef DIAGNOSTIC 1884 if (ipprintfs) 1885 printf("redirect (%d) to %lx\n", code, dest); 1886 #endif 1887 } 1888 } 1889 1890 if (next_hop) { 1891 /* Pass IPFORWARD info if available */ 1892 1893 tag.mh_type = MT_TAG; 1894 tag.mh_flags = PACKET_TAG_IPFORWARD; 1895 tag.mh_data = (caddr_t)next_hop; 1896 tag.mh_next = m; 1897 m = (struct mbuf *)&tag; 1898 } 1899 1900 error = ip_output(m, NULL, &ipforward_rt, IP_FORWARDING, NULL, NULL); 1901 1902 if (error) 1903 ipstat.ips_cantforward++; 1904 else { 1905 ipstat.ips_forward++; 1906 if (type) 1907 ipstat.ips_redirectsent++; 1908 else { 1909 if (mcopy) { 1910 ipflow_create(&ipforward_rt, mcopy); 1911 m_freem(mcopy); 1912 } 1913 return; 1914 } 1915 } 1916 if (mcopy == NULL) 1917 return; 1918 destifp = NULL; 1919 1920 switch (error) { 1921 1922 case 0: /* forwarded, but need redirect */ 1923 /* type, code set above */ 1924 break; 1925 1926 case ENETUNREACH: /* shouldn't happen, checked above */ 1927 case EHOSTUNREACH: 1928 case ENETDOWN: 1929 case EHOSTDOWN: 1930 default: 1931 type = ICMP_UNREACH; 1932 code = ICMP_UNREACH_HOST; 1933 break; 1934 1935 case EMSGSIZE: 1936 type = ICMP_UNREACH; 1937 code = ICMP_UNREACH_NEEDFRAG; 1938 #ifdef IPSEC 1939 /* 1940 * If the packet is routed over IPsec tunnel, tell the 1941 * originator the tunnel MTU. 1942 * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz 1943 * XXX quickhack!!! 1944 */ 1945 if (ipforward_rt.ro_rt) { 1946 struct secpolicy *sp = NULL; 1947 int ipsecerror; 1948 int ipsechdr; 1949 struct route *ro; 1950 1951 sp = ipsec4_getpolicybyaddr(mcopy, 1952 IPSEC_DIR_OUTBOUND, 1953 IP_FORWARDING, 1954 &ipsecerror); 1955 1956 if (sp == NULL) 1957 destifp = ipforward_rt.ro_rt->rt_ifp; 1958 else { 1959 /* count IPsec header size */ 1960 ipsechdr = ipsec4_hdrsiz(mcopy, 1961 IPSEC_DIR_OUTBOUND, 1962 NULL); 1963 1964 /* 1965 * find the correct route for outer IPv4 1966 * header, compute tunnel MTU. 1967 * 1968 * XXX BUG ALERT 1969 * The "dummyifp" code relies upon the fact 1970 * that icmp_error() touches only ifp->if_mtu. 1971 */ 1972 /*XXX*/ 1973 destifp = NULL; 1974 if (sp->req != NULL 1975 && sp->req->sav != NULL 1976 && sp->req->sav->sah != NULL) { 1977 ro = &sp->req->sav->sah->sa_route; 1978 if (ro->ro_rt && ro->ro_rt->rt_ifp) { 1979 dummyifp.if_mtu = 1980 ro->ro_rt->rt_ifp->if_mtu; 1981 dummyifp.if_mtu -= ipsechdr; 1982 destifp = &dummyifp; 1983 } 1984 } 1985 1986 key_freesp(sp); 1987 } 1988 } 1989 #elif FAST_IPSEC 1990 /* 1991 * If the packet is routed over IPsec tunnel, tell the 1992 * originator the tunnel MTU. 1993 * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz 1994 * XXX quickhack!!! 1995 */ 1996 if (ipforward_rt.ro_rt) { 1997 struct secpolicy *sp = NULL; 1998 int ipsecerror; 1999 int ipsechdr; 2000 struct route *ro; 2001 2002 sp = ipsec_getpolicybyaddr(mcopy, 2003 IPSEC_DIR_OUTBOUND, 2004 IP_FORWARDING, 2005 &ipsecerror); 2006 2007 if (sp == NULL) 2008 destifp = ipforward_rt.ro_rt->rt_ifp; 2009 else { 2010 /* count IPsec header size */ 2011 ipsechdr = ipsec4_hdrsiz(mcopy, 2012 IPSEC_DIR_OUTBOUND, 2013 NULL); 2014 2015 /* 2016 * find the correct route for outer IPv4 2017 * header, compute tunnel MTU. 2018 * 2019 * XXX BUG ALERT 2020 * The "dummyifp" code relies upon the fact 2021 * that icmp_error() touches only ifp->if_mtu. 2022 */ 2023 /*XXX*/ 2024 destifp = NULL; 2025 if (sp->req != NULL 2026 && sp->req->sav != NULL 2027 && sp->req->sav->sah != NULL) { 2028 ro = &sp->req->sav->sah->sa_route; 2029 if (ro->ro_rt && ro->ro_rt->rt_ifp) { 2030 dummyifp.if_mtu = 2031 ro->ro_rt->rt_ifp->if_mtu; 2032 dummyifp.if_mtu -= ipsechdr; 2033 destifp = &dummyifp; 2034 } 2035 } 2036 2037 KEY_FREESP(&sp); 2038 } 2039 } 2040 #else /* !IPSEC && !FAST_IPSEC */ 2041 if (ipforward_rt.ro_rt) 2042 destifp = ipforward_rt.ro_rt->rt_ifp; 2043 #endif /*IPSEC*/ 2044 ipstat.ips_cantfrag++; 2045 break; 2046 2047 case ENOBUFS: 2048 /* 2049 * A router should not generate ICMP_SOURCEQUENCH as 2050 * required in RFC1812 Requirements for IP Version 4 Routers. 2051 * Source quench could be a big problem under DoS attacks, 2052 * or if the underlying interface is rate-limited. 2053 * Those who need source quench packets may re-enable them 2054 * via the net.inet.ip.sendsourcequench sysctl. 2055 */ 2056 if (!ip_sendsourcequench) { 2057 m_freem(mcopy); 2058 return; 2059 } else { 2060 type = ICMP_SOURCEQUENCH; 2061 code = 0; 2062 } 2063 break; 2064 2065 case EACCES: /* ipfw denied packet */ 2066 m_freem(mcopy); 2067 return; 2068 } 2069 icmp_error(mcopy, type, code, dest, destifp); 2070 } 2071 2072 void 2073 ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, 2074 struct mbuf *m) 2075 { 2076 if (inp->inp_socket->so_options & SO_TIMESTAMP) { 2077 struct timeval tv; 2078 2079 microtime(&tv); 2080 *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv), 2081 SCM_TIMESTAMP, SOL_SOCKET); 2082 if (*mp) 2083 mp = &(*mp)->m_next; 2084 } 2085 if (inp->inp_flags & INP_RECVDSTADDR) { 2086 *mp = sbcreatecontrol((caddr_t) &ip->ip_dst, 2087 sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); 2088 if (*mp) 2089 mp = &(*mp)->m_next; 2090 } 2091 #ifdef notyet 2092 /* XXX 2093 * Moving these out of udp_input() made them even more broken 2094 * than they already were. 2095 */ 2096 /* options were tossed already */ 2097 if (inp->inp_flags & INP_RECVOPTS) { 2098 *mp = sbcreatecontrol((caddr_t) opts_deleted_above, 2099 sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); 2100 if (*mp) 2101 mp = &(*mp)->m_next; 2102 } 2103 /* ip_srcroute doesn't do what we want here, need to fix */ 2104 if (inp->inp_flags & INP_RECVRETOPTS) { 2105 *mp = sbcreatecontrol((caddr_t) ip_srcroute(), 2106 sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); 2107 if (*mp) 2108 mp = &(*mp)->m_next; 2109 } 2110 #endif 2111 if (inp->inp_flags & INP_RECVIF) { 2112 struct ifnet *ifp; 2113 struct sdlbuf { 2114 struct sockaddr_dl sdl; 2115 u_char pad[32]; 2116 } sdlbuf; 2117 struct sockaddr_dl *sdp; 2118 struct sockaddr_dl *sdl2 = &sdlbuf.sdl; 2119 2120 if (((ifp = m->m_pkthdr.rcvif)) && 2121 ((ifp->if_index != 0) && (ifp->if_index <= if_index))) { 2122 sdp = (struct sockaddr_dl *) 2123 ifnet_addrs[ifp->if_index - 1]->ifa_addr; 2124 /* 2125 * Change our mind and don't try copy. 2126 */ 2127 if ((sdp->sdl_family != AF_LINK) || 2128 (sdp->sdl_len > sizeof(sdlbuf))) { 2129 goto makedummy; 2130 } 2131 bcopy(sdp, sdl2, sdp->sdl_len); 2132 } else { 2133 makedummy: 2134 sdl2->sdl_len = 2135 offsetof(struct sockaddr_dl, sdl_data[0]); 2136 sdl2->sdl_family = AF_LINK; 2137 sdl2->sdl_index = 0; 2138 sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; 2139 } 2140 *mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len, 2141 IP_RECVIF, IPPROTO_IP); 2142 if (*mp) 2143 mp = &(*mp)->m_next; 2144 } 2145 } 2146 2147 /* 2148 * XXX these routines are called from the upper part of the kernel. 2149 * 2150 * They could also be moved to ip_mroute.c, since all the RSVP 2151 * handling is done there already. 2152 */ 2153 int 2154 ip_rsvp_init(struct socket *so) 2155 { 2156 if (so->so_type != SOCK_RAW || 2157 so->so_proto->pr_protocol != IPPROTO_RSVP) 2158 return EOPNOTSUPP; 2159 2160 if (ip_rsvpd != NULL) 2161 return EADDRINUSE; 2162 2163 ip_rsvpd = so; 2164 /* 2165 * This may seem silly, but we need to be sure we don't over-increment 2166 * the RSVP counter, in case something slips up. 2167 */ 2168 if (!ip_rsvp_on) { 2169 ip_rsvp_on = 1; 2170 rsvp_on++; 2171 } 2172 2173 return 0; 2174 } 2175 2176 int 2177 ip_rsvp_done(void) 2178 { 2179 ip_rsvpd = NULL; 2180 /* 2181 * This may seem silly, but we need to be sure we don't over-decrement 2182 * the RSVP counter, in case something slips up. 2183 */ 2184 if (ip_rsvp_on) { 2185 ip_rsvp_on = 0; 2186 rsvp_on--; 2187 } 2188 return 0; 2189 } 2190 2191 void 2192 rsvp_input(struct mbuf *m, int off, int proto) /* XXX must fixup manually */ 2193 { 2194 if (rsvp_input_p) { /* call the real one if loaded */ 2195 rsvp_input_p(m, off, proto); 2196 return; 2197 } 2198 2199 /* Can still get packets with rsvp_on = 0 if there is a local member 2200 * of the group to which the RSVP packet is addressed. But in this 2201 * case we want to throw the packet away. 2202 */ 2203 2204 if (!rsvp_on) { 2205 m_freem(m); 2206 return; 2207 } 2208 2209 if (ip_rsvpd != NULL) { 2210 rip_input(m, off, proto); 2211 return; 2212 } 2213 /* Drop the packet */ 2214 m_freem(m); 2215 } 2216