1 /* $NetBSD: ip_icmp.c,v 1.91 2005/02/26 22:45:12 perry Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Public Access Networks Corporation ("Panix"). It was developed under 38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon. 39 * 40 * This code is derived from software contributed to The NetBSD Foundation 41 * by Jason R. Thorpe of Zembu Labs, Inc. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. All advertising materials mentioning features or use of this software 52 * must display the following acknowledgement: 53 * This product includes software developed by the NetBSD 54 * Foundation, Inc. and its contributors. 55 * 4. Neither the name of The NetBSD Foundation nor the names of its 56 * contributors may be used to endorse or promote products derived 57 * from this software without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 60 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 61 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 63 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 64 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 65 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 66 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 67 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 68 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 69 * POSSIBILITY OF SUCH DAMAGE. 70 */ 71 72 /* 73 * Copyright (c) 1982, 1986, 1988, 1993 74 * The Regents of the University of California. All rights reserved. 75 * 76 * Redistribution and use in source and binary forms, with or without 77 * modification, are permitted provided that the following conditions 78 * are met: 79 * 1. Redistributions of source code must retain the above copyright 80 * notice, this list of conditions and the following disclaimer. 81 * 2. Redistributions in binary form must reproduce the above copyright 82 * notice, this list of conditions and the following disclaimer in the 83 * documentation and/or other materials provided with the distribution. 84 * 3. Neither the name of the University nor the names of its contributors 85 * may be used to endorse or promote products derived from this software 86 * without specific prior written permission. 87 * 88 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 89 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 90 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 91 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 92 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 93 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 94 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 95 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 96 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 97 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 98 * SUCH DAMAGE. 99 * 100 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 101 */ 102 103 #include <sys/cdefs.h> 104 __KERNEL_RCSID(0, "$NetBSD: ip_icmp.c,v 1.91 2005/02/26 22:45:12 perry Exp $"); 105 106 #include "opt_ipsec.h" 107 108 #include <sys/param.h> 109 #include <sys/systm.h> 110 #include <sys/malloc.h> 111 #include <sys/mbuf.h> 112 #include <sys/protosw.h> 113 #include <sys/socket.h> 114 #include <sys/time.h> 115 #include <sys/kernel.h> 116 #include <sys/syslog.h> 117 #include <sys/sysctl.h> 118 119 #include <net/if.h> 120 #include <net/route.h> 121 122 #include <netinet/in.h> 123 #include <netinet/in_systm.h> 124 #include <netinet/in_var.h> 125 #include <netinet/ip.h> 126 #include <netinet/ip_icmp.h> 127 #include <netinet/ip_var.h> 128 #include <netinet/in_pcb.h> 129 #include <netinet/icmp_var.h> 130 131 #ifdef IPSEC 132 #include <netinet6/ipsec.h> 133 #include <netkey/key.h> 134 #endif 135 136 #ifdef FAST_IPSEC 137 #include <netipsec/ipsec.h> 138 #include <netipsec/key.h> 139 #endif /* FAST_IPSEC*/ 140 141 #include <machine/stdarg.h> 142 143 /* 144 * ICMP routines: error generation, receive packet processing, and 145 * routines to turnaround packets back to the originator, and 146 * host table maintenance routines. 147 */ 148 149 int icmpmaskrepl = 0; 150 #ifdef ICMPPRINTFS 151 int icmpprintfs = 0; 152 #endif 153 int icmpreturndatabytes = 8; 154 155 struct icmpstat icmpstat; 156 157 /* 158 * List of callbacks to notify when Path MTU changes are made. 159 */ 160 struct icmp_mtudisc_callback { 161 LIST_ENTRY(icmp_mtudisc_callback) mc_list; 162 void (*mc_func)(struct in_addr); 163 }; 164 165 LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks = 166 LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks); 167 168 #if 0 169 static int ip_next_mtu(int, int); 170 #else 171 /*static*/ int ip_next_mtu(int, int); 172 #endif 173 174 extern int icmperrppslim; 175 static int icmperrpps_count = 0; 176 static struct timeval icmperrppslim_last; 177 static int icmp_rediraccept = 1; 178 static int icmp_redirtimeout = 600; 179 static struct rttimer_queue *icmp_redirect_timeout_q = NULL; 180 181 static void icmp_mtudisc_timeout(struct rtentry *, struct rttimer *); 182 static void icmp_redirect_timeout(struct rtentry *, struct rttimer *); 183 184 static int icmp_ratelimit(const struct in_addr *, const int, const int); 185 186 187 void 188 icmp_init(void) 189 { 190 /* 191 * This is only useful if the user initializes redirtimeout to 192 * something other than zero. 193 */ 194 if (icmp_redirtimeout != 0) { 195 icmp_redirect_timeout_q = 196 rt_timer_queue_create(icmp_redirtimeout); 197 } 198 } 199 200 /* 201 * Register a Path MTU Discovery callback. 202 */ 203 void 204 icmp_mtudisc_callback_register(void (*func)(struct in_addr)) 205 { 206 struct icmp_mtudisc_callback *mc; 207 208 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL; 209 mc = LIST_NEXT(mc, mc_list)) { 210 if (mc->mc_func == func) 211 return; 212 } 213 214 mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT); 215 if (mc == NULL) 216 panic("icmp_mtudisc_callback_register"); 217 218 mc->mc_func = func; 219 LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, mc, mc_list); 220 } 221 222 /* 223 * Generate an error packet of type error 224 * in response to bad packet ip. 225 */ 226 void 227 icmp_error(struct mbuf *n, int type, int code, n_long dest, 228 struct ifnet *destifp) 229 { 230 struct ip *oip = mtod(n, struct ip *), *nip; 231 unsigned oiplen = oip->ip_hl << 2; 232 struct icmp *icp; 233 struct mbuf *m; 234 unsigned icmplen, mblen; 235 236 #ifdef ICMPPRINTFS 237 if (icmpprintfs) 238 printf("icmp_error(%x, %d, %d)\n", oip, type, code); 239 #endif 240 if (type != ICMP_REDIRECT) 241 icmpstat.icps_error++; 242 /* 243 * Don't send error if the original packet was encrypted. 244 * Don't send error if not the first fragment of message. 245 * Don't error if the old packet protocol was ICMP 246 * error message, only known informational types. 247 */ 248 if (n->m_flags & M_DECRYPTED) 249 goto freeit; 250 if (oip->ip_off &~ htons(IP_MF|IP_DF)) 251 goto freeit; 252 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT && 253 n->m_len >= oiplen + ICMP_MINLEN && 254 !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) { 255 icmpstat.icps_oldicmp++; 256 goto freeit; 257 } 258 /* Don't send error in response to a multicast or broadcast packet */ 259 if (n->m_flags & (M_BCAST|M_MCAST)) 260 goto freeit; 261 262 /* 263 * First, do a rate limitation check. 264 */ 265 if (icmp_ratelimit(&oip->ip_src, type, code)) { 266 /* XXX stat */ 267 goto freeit; 268 } 269 270 /* 271 * Now, formulate icmp message 272 */ 273 icmplen = oiplen + min(icmpreturndatabytes, 274 ntohs(oip->ip_len) - oiplen); 275 /* 276 * Defend against mbuf chains shorter than oip->ip_len - oiplen: 277 */ 278 mblen = 0; 279 for (m = n; m && (mblen < icmplen); m = m->m_next) 280 mblen += m->m_len; 281 icmplen = min(mblen, icmplen); 282 283 /* 284 * As we are not required to return everything we have, 285 * we return whatever we can return at ease. 286 * 287 * Note that ICMP datagrams longer than 576 octets are out of spec 288 * according to RFC1812; the limit on icmpreturndatabytes below in 289 * icmp_sysctl will keep things below that limit. 290 */ 291 292 KASSERT(ICMP_MINLEN <= MCLBYTES); 293 294 if (icmplen + ICMP_MINLEN > MCLBYTES) 295 icmplen = MCLBYTES - ICMP_MINLEN; 296 297 m = m_gethdr(M_DONTWAIT, MT_HEADER); 298 if (m && (icmplen + ICMP_MINLEN > MHLEN)) { 299 MCLGET(m, M_DONTWAIT); 300 if ((m->m_flags & M_EXT) == 0) { 301 m_freem(m); 302 m = NULL; 303 } 304 } 305 if (m == NULL) 306 goto freeit; 307 MCLAIM(m, n->m_owner); 308 m->m_len = icmplen + ICMP_MINLEN; 309 if ((m->m_flags & M_EXT) == 0) 310 MH_ALIGN(m, m->m_len); 311 icp = mtod(m, struct icmp *); 312 if ((u_int)type > ICMP_MAXTYPE) 313 panic("icmp_error"); 314 icmpstat.icps_outhist[type]++; 315 icp->icmp_type = type; 316 if (type == ICMP_REDIRECT) 317 icp->icmp_gwaddr.s_addr = dest; 318 else { 319 icp->icmp_void = 0; 320 /* 321 * The following assignments assume an overlay with the 322 * zeroed icmp_void field. 323 */ 324 if (type == ICMP_PARAMPROB) { 325 icp->icmp_pptr = code; 326 code = 0; 327 } else if (type == ICMP_UNREACH && 328 code == ICMP_UNREACH_NEEDFRAG && destifp) 329 icp->icmp_nextmtu = htons(destifp->if_mtu); 330 } 331 332 icp->icmp_code = code; 333 m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip); 334 nip = &icp->icmp_ip; 335 336 /* 337 * Now, copy old ip header (without options) 338 * in front of icmp message. 339 */ 340 if (m->m_data - sizeof(struct ip) < m->m_pktdat) 341 panic("icmp len"); 342 m->m_data -= sizeof(struct ip); 343 m->m_len += sizeof(struct ip); 344 m->m_pkthdr.len = m->m_len; 345 m->m_pkthdr.rcvif = n->m_pkthdr.rcvif; 346 nip = mtod(m, struct ip *); 347 /* ip_v set in ip_output */ 348 nip->ip_hl = sizeof(struct ip) >> 2; 349 nip->ip_tos = 0; 350 nip->ip_len = htons(m->m_len); 351 /* ip_id set in ip_output */ 352 nip->ip_off = htons(0); 353 /* ip_ttl set in icmp_reflect */ 354 nip->ip_p = IPPROTO_ICMP; 355 nip->ip_src = oip->ip_src; 356 nip->ip_dst = oip->ip_dst; 357 icmp_reflect(m); 358 359 freeit: 360 m_freem(n); 361 } 362 363 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET }; 364 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET }; 365 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET }; 366 struct sockaddr_in icmpmask = { 8, 0 }; 367 368 /* 369 * Process a received ICMP message. 370 */ 371 void 372 icmp_input(struct mbuf *m, ...) 373 { 374 int proto; 375 struct icmp *icp; 376 struct ip *ip = mtod(m, struct ip *); 377 int icmplen; 378 int i; 379 struct in_ifaddr *ia; 380 void *(*ctlfunc)(int, struct sockaddr *, void *); 381 int code; 382 int hlen; 383 va_list ap; 384 struct rtentry *rt; 385 386 va_start(ap, m); 387 hlen = va_arg(ap, int); 388 proto = va_arg(ap, int); 389 va_end(ap); 390 391 /* 392 * Locate icmp structure in mbuf, and check 393 * that not corrupted and of at least minimum length. 394 */ 395 icmplen = ntohs(ip->ip_len) - hlen; 396 #ifdef ICMPPRINTFS 397 if (icmpprintfs) 398 printf("icmp_input from %x to %x, len %d\n", 399 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), 400 icmplen); 401 #endif 402 if (icmplen < ICMP_MINLEN) { 403 icmpstat.icps_tooshort++; 404 goto freeit; 405 } 406 i = hlen + min(icmplen, ICMP_ADVLENMIN); 407 if (m->m_len < i && (m = m_pullup(m, i)) == 0) { 408 icmpstat.icps_tooshort++; 409 return; 410 } 411 ip = mtod(m, struct ip *); 412 m->m_len -= hlen; 413 m->m_data += hlen; 414 icp = mtod(m, struct icmp *); 415 /* Don't need to assert alignment, here. */ 416 if (in_cksum(m, icmplen)) { 417 icmpstat.icps_checksum++; 418 goto freeit; 419 } 420 m->m_len += hlen; 421 m->m_data -= hlen; 422 423 #ifdef ICMPPRINTFS 424 /* 425 * Message type specific processing. 426 */ 427 if (icmpprintfs) 428 printf("icmp_input, type %d code %d\n", icp->icmp_type, 429 icp->icmp_code); 430 #endif 431 if (icp->icmp_type > ICMP_MAXTYPE) 432 goto raw; 433 icmpstat.icps_inhist[icp->icmp_type]++; 434 code = icp->icmp_code; 435 switch (icp->icmp_type) { 436 437 case ICMP_UNREACH: 438 switch (code) { 439 case ICMP_UNREACH_NET: 440 case ICMP_UNREACH_HOST: 441 case ICMP_UNREACH_PROTOCOL: 442 case ICMP_UNREACH_PORT: 443 case ICMP_UNREACH_SRCFAIL: 444 code += PRC_UNREACH_NET; 445 break; 446 447 case ICMP_UNREACH_NEEDFRAG: 448 code = PRC_MSGSIZE; 449 break; 450 451 case ICMP_UNREACH_NET_UNKNOWN: 452 case ICMP_UNREACH_NET_PROHIB: 453 case ICMP_UNREACH_TOSNET: 454 code = PRC_UNREACH_NET; 455 break; 456 457 case ICMP_UNREACH_HOST_UNKNOWN: 458 case ICMP_UNREACH_ISOLATED: 459 case ICMP_UNREACH_HOST_PROHIB: 460 case ICMP_UNREACH_TOSHOST: 461 code = PRC_UNREACH_HOST; 462 break; 463 464 default: 465 goto badcode; 466 } 467 goto deliver; 468 469 case ICMP_TIMXCEED: 470 if (code > 1) 471 goto badcode; 472 code += PRC_TIMXCEED_INTRANS; 473 goto deliver; 474 475 case ICMP_PARAMPROB: 476 if (code > 1) 477 goto badcode; 478 code = PRC_PARAMPROB; 479 goto deliver; 480 481 case ICMP_SOURCEQUENCH: 482 if (code) 483 goto badcode; 484 code = PRC_QUENCH; 485 goto deliver; 486 487 deliver: 488 /* 489 * Problem with datagram; advise higher level routines. 490 */ 491 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || 492 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { 493 icmpstat.icps_badlen++; 494 goto freeit; 495 } 496 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr)) 497 goto badcode; 498 #ifdef ICMPPRINTFS 499 if (icmpprintfs) 500 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p); 501 #endif 502 icmpsrc.sin_addr = icp->icmp_ip.ip_dst; 503 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput; 504 if (ctlfunc) 505 (void) (*ctlfunc)(code, sintosa(&icmpsrc), 506 &icp->icmp_ip); 507 break; 508 509 badcode: 510 icmpstat.icps_badcode++; 511 break; 512 513 case ICMP_ECHO: 514 icp->icmp_type = ICMP_ECHOREPLY; 515 goto reflect; 516 517 case ICMP_TSTAMP: 518 if (icmplen < ICMP_TSLEN) { 519 icmpstat.icps_badlen++; 520 break; 521 } 522 icp->icmp_type = ICMP_TSTAMPREPLY; 523 icp->icmp_rtime = iptime(); 524 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */ 525 goto reflect; 526 527 case ICMP_MASKREQ: 528 if (icmpmaskrepl == 0) 529 break; 530 /* 531 * We are not able to respond with all ones broadcast 532 * unless we receive it over a point-to-point interface. 533 */ 534 if (icmplen < ICMP_MASKLEN) { 535 icmpstat.icps_badlen++; 536 break; 537 } 538 if (ip->ip_dst.s_addr == INADDR_BROADCAST || 539 in_nullhost(ip->ip_dst)) 540 icmpdst.sin_addr = ip->ip_src; 541 else 542 icmpdst.sin_addr = ip->ip_dst; 543 ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst), 544 m->m_pkthdr.rcvif)); 545 if (ia == 0) 546 break; 547 icp->icmp_type = ICMP_MASKREPLY; 548 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr; 549 if (in_nullhost(ip->ip_src)) { 550 if (ia->ia_ifp->if_flags & IFF_BROADCAST) 551 ip->ip_src = ia->ia_broadaddr.sin_addr; 552 else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT) 553 ip->ip_src = ia->ia_dstaddr.sin_addr; 554 } 555 reflect: 556 icmpstat.icps_reflect++; 557 icmpstat.icps_outhist[icp->icmp_type]++; 558 icmp_reflect(m); 559 return; 560 561 case ICMP_REDIRECT: 562 if (code > 3) 563 goto badcode; 564 if (icmp_rediraccept == 0) 565 goto freeit; 566 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || 567 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { 568 icmpstat.icps_badlen++; 569 break; 570 } 571 /* 572 * Short circuit routing redirects to force 573 * immediate change in the kernel's routing 574 * tables. The message is also handed to anyone 575 * listening on a raw socket (e.g. the routing 576 * daemon for use in updating its tables). 577 */ 578 icmpgw.sin_addr = ip->ip_src; 579 icmpdst.sin_addr = icp->icmp_gwaddr; 580 #ifdef ICMPPRINTFS 581 if (icmpprintfs) 582 printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst, 583 icp->icmp_gwaddr); 584 #endif 585 icmpsrc.sin_addr = icp->icmp_ip.ip_dst; 586 rt = NULL; 587 rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst), 588 (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST, 589 sintosa(&icmpgw), (struct rtentry **)&rt); 590 if (rt != NULL && icmp_redirtimeout != 0) { 591 i = rt_timer_add(rt, icmp_redirect_timeout, 592 icmp_redirect_timeout_q); 593 if (i) 594 log(LOG_ERR, "ICMP: redirect failed to " 595 "register timeout for route to %x, " 596 "code %d\n", 597 icp->icmp_ip.ip_dst.s_addr, i); 598 } 599 if (rt != NULL) 600 rtfree(rt); 601 602 pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc)); 603 #if defined(IPSEC) || defined(FAST_IPSEC) 604 key_sa_routechange((struct sockaddr *)&icmpsrc); 605 #endif 606 break; 607 608 /* 609 * No kernel processing for the following; 610 * just fall through to send to raw listener. 611 */ 612 case ICMP_ECHOREPLY: 613 case ICMP_ROUTERADVERT: 614 case ICMP_ROUTERSOLICIT: 615 case ICMP_TSTAMPREPLY: 616 case ICMP_IREQREPLY: 617 case ICMP_MASKREPLY: 618 default: 619 break; 620 } 621 622 raw: 623 rip_input(m, hlen, proto); 624 return; 625 626 freeit: 627 m_freem(m); 628 return; 629 } 630 631 /* 632 * Reflect the ip packet back to the source 633 */ 634 void 635 icmp_reflect(struct mbuf *m) 636 { 637 struct ip *ip = mtod(m, struct ip *); 638 struct in_ifaddr *ia; 639 struct ifaddr *ifa; 640 struct sockaddr_in *sin = 0; 641 struct in_addr t; 642 struct mbuf *opts = 0; 643 int optlen = (ip->ip_hl << 2) - sizeof(struct ip); 644 645 if (!in_canforward(ip->ip_src) && 646 ((ip->ip_src.s_addr & IN_CLASSA_NET) != 647 htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) { 648 m_freem(m); /* Bad return address */ 649 goto done; /* ip_output() will check for broadcast */ 650 } 651 t = ip->ip_dst; 652 ip->ip_dst = ip->ip_src; 653 /* 654 * If the incoming packet was addressed directly to us, use 655 * dst as the src for the reply. Otherwise (broadcast or 656 * anonymous), use an address which corresponds to the 657 * incoming interface, with a preference for the address which 658 * corresponds to the route to the destination of the ICMP. 659 */ 660 661 /* Look for packet addressed to us */ 662 INADDR_TO_IA(t, ia); 663 664 /* look for packet sent to broadcast address */ 665 if (ia == NULL && m->m_pkthdr.rcvif && 666 (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) { 667 IFADDR_FOREACH(ifa, m->m_pkthdr.rcvif) { 668 if (ifa->ifa_addr->sa_family != AF_INET) 669 continue; 670 if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) { 671 ia = ifatoia(ifa); 672 break; 673 } 674 } 675 } 676 677 if (ia) 678 sin = &ia->ia_addr; 679 680 icmpdst.sin_addr = t; 681 682 /* 683 * if the packet is addressed somewhere else, compute the 684 * source address for packets routed back to the source, and 685 * use that, if it's an address on the interface which 686 * received the packet 687 */ 688 if (sin == (struct sockaddr_in *)0 && m->m_pkthdr.rcvif) { 689 struct sockaddr_in sin_dst; 690 struct route icmproute; 691 int errornum; 692 693 sin_dst.sin_family = AF_INET; 694 sin_dst.sin_len = sizeof(struct sockaddr_in); 695 sin_dst.sin_addr = ip->ip_dst; 696 bzero(&icmproute, sizeof(icmproute)); 697 errornum = 0; 698 sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum); 699 /* errornum is never used */ 700 if (icmproute.ro_rt) 701 RTFREE(icmproute.ro_rt); 702 /* check to make sure sin is a source address on rcvif */ 703 if (sin) { 704 t = sin->sin_addr; 705 sin = (struct sockaddr_in *)0; 706 INADDR_TO_IA(t, ia); 707 while (ia) { 708 if (ia->ia_ifp == m->m_pkthdr.rcvif) { 709 sin = &ia->ia_addr; 710 break; 711 } 712 NEXT_IA_WITH_SAME_ADDR(ia); 713 } 714 } 715 } 716 717 /* 718 * if it was not addressed to us, but the route doesn't go out 719 * the source interface, pick an address on the source 720 * interface. This can happen when routing is asymmetric, or 721 * when the incoming packet was encapsulated 722 */ 723 if (sin == (struct sockaddr_in *)0 && m->m_pkthdr.rcvif) { 724 IFADDR_FOREACH(ifa, m->m_pkthdr.rcvif) { 725 if (ifa->ifa_addr->sa_family != AF_INET) 726 continue; 727 sin = &(ifatoia(ifa)->ia_addr); 728 break; 729 } 730 } 731 732 /* 733 * The following happens if the packet was not addressed to us, 734 * and was received on an interface with no IP address: 735 * We find the first AF_INET address on the first non-loopback 736 * interface. 737 */ 738 if (sin == (struct sockaddr_in *)0) 739 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_list) { 740 if (ia->ia_ifp->if_flags & IFF_LOOPBACK) 741 continue; 742 sin = &ia->ia_addr; 743 break; 744 } 745 746 /* 747 * If we still didn't find an address, punt. We could have an 748 * interface up (and receiving packets) with no address. 749 */ 750 if (sin == (struct sockaddr_in *)0) { 751 m_freem(m); 752 goto done; 753 } 754 755 ip->ip_src = sin->sin_addr; 756 ip->ip_ttl = MAXTTL; 757 758 if (optlen > 0) { 759 u_char *cp; 760 int opt, cnt; 761 u_int len; 762 763 /* 764 * Retrieve any source routing from the incoming packet; 765 * add on any record-route or timestamp options. 766 */ 767 cp = (u_char *) (ip + 1); 768 if ((opts = ip_srcroute()) == 0 && 769 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) { 770 MCLAIM(opts, m->m_owner); 771 opts->m_len = sizeof(struct in_addr); 772 *mtod(opts, struct in_addr *) = zeroin_addr; 773 } 774 if (opts) { 775 #ifdef ICMPPRINTFS 776 if (icmpprintfs) 777 printf("icmp_reflect optlen %d rt %d => ", 778 optlen, opts->m_len); 779 #endif 780 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) { 781 opt = cp[IPOPT_OPTVAL]; 782 if (opt == IPOPT_EOL) 783 break; 784 if (opt == IPOPT_NOP) 785 len = 1; 786 else { 787 if (cnt < IPOPT_OLEN + sizeof(*cp)) 788 break; 789 len = cp[IPOPT_OLEN]; 790 if (len < IPOPT_OLEN + sizeof(*cp) || 791 len > cnt) 792 break; 793 } 794 /* 795 * Should check for overflow, but it "can't happen" 796 */ 797 if (opt == IPOPT_RR || opt == IPOPT_TS || 798 opt == IPOPT_SECURITY) { 799 bcopy((caddr_t)cp, 800 mtod(opts, caddr_t) + opts->m_len, len); 801 opts->m_len += len; 802 } 803 } 804 /* Terminate & pad, if necessary */ 805 if ((cnt = opts->m_len % 4) != 0) { 806 for (; cnt < 4; cnt++) { 807 *(mtod(opts, caddr_t) + opts->m_len) = 808 IPOPT_EOL; 809 opts->m_len++; 810 } 811 } 812 #ifdef ICMPPRINTFS 813 if (icmpprintfs) 814 printf("%d\n", opts->m_len); 815 #endif 816 } 817 /* 818 * Now strip out original options by copying rest of first 819 * mbuf's data back, and adjust the IP length. 820 */ 821 ip->ip_len = htons(ntohs(ip->ip_len) - optlen); 822 ip->ip_hl = sizeof(struct ip) >> 2; 823 m->m_len -= optlen; 824 if (m->m_flags & M_PKTHDR) 825 m->m_pkthdr.len -= optlen; 826 optlen += sizeof(struct ip); 827 bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1), 828 (unsigned)(m->m_len - sizeof(struct ip))); 829 } 830 m_tag_delete_nonpersistent(m); 831 m->m_flags &= ~(M_BCAST|M_MCAST); 832 833 /* 834 * Clear any in-bound checksum flags for this packet. 835 */ 836 if (m->m_flags & M_PKTHDR) 837 m->m_pkthdr.csum_flags = 0; 838 839 icmp_send(m, opts); 840 done: 841 if (opts) 842 (void)m_free(opts); 843 } 844 845 /* 846 * Send an icmp packet back to the ip level, 847 * after supplying a checksum. 848 */ 849 void 850 icmp_send(struct mbuf *m, struct mbuf *opts) 851 { 852 struct ip *ip = mtod(m, struct ip *); 853 int hlen; 854 struct icmp *icp; 855 856 hlen = ip->ip_hl << 2; 857 m->m_data += hlen; 858 m->m_len -= hlen; 859 icp = mtod(m, struct icmp *); 860 icp->icmp_cksum = 0; 861 icp->icmp_cksum = in_cksum(m, ntohs(ip->ip_len) - hlen); 862 m->m_data -= hlen; 863 m->m_len += hlen; 864 #ifdef ICMPPRINTFS 865 if (icmpprintfs) 866 printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src); 867 #endif 868 (void) ip_output(m, opts, NULL, 0, 869 (struct ip_moptions *)NULL, (struct socket *)NULL); 870 } 871 872 n_time 873 iptime(void) 874 { 875 struct timeval atv; 876 u_long t; 877 878 microtime(&atv); 879 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000; 880 return (htonl(t)); 881 } 882 883 /* 884 * sysctl helper routine for net.inet.icmp.returndatabytes. ensures 885 * that the new value is in the correct range. 886 */ 887 static int 888 sysctl_net_inet_icmp_returndatabytes(SYSCTLFN_ARGS) 889 { 890 int error, t; 891 struct sysctlnode node; 892 893 node = *rnode; 894 node.sysctl_data = &t; 895 t = icmpreturndatabytes; 896 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 897 if (error || newp == NULL) 898 return (error); 899 900 if (t < 8 || t > 512) 901 return (EINVAL); 902 icmpreturndatabytes = t; 903 904 return (0); 905 } 906 907 /* 908 * sysctl helper routine for net.inet.icmp.redirtimeout. ensures that 909 * the given value is not less than zero and then resets the timeout 910 * queue. 911 */ 912 static int 913 sysctl_net_inet_icmp_redirtimeout(SYSCTLFN_ARGS) 914 { 915 int error, tmp; 916 struct sysctlnode node; 917 918 node = *rnode; 919 node.sysctl_data = &tmp; 920 tmp = icmp_redirtimeout; 921 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 922 if (error || newp == NULL) 923 return (error); 924 if (tmp < 0) 925 return (EINVAL); 926 icmp_redirtimeout = tmp; 927 928 /* 929 * was it a *defined* side-effect that anyone even *reading* 930 * this value causes these things to happen? 931 */ 932 if (icmp_redirect_timeout_q != NULL) { 933 if (icmp_redirtimeout == 0) { 934 rt_timer_queue_destroy(icmp_redirect_timeout_q, 935 TRUE); 936 icmp_redirect_timeout_q = NULL; 937 } else { 938 rt_timer_queue_change(icmp_redirect_timeout_q, 939 icmp_redirtimeout); 940 } 941 } else if (icmp_redirtimeout > 0) { 942 icmp_redirect_timeout_q = 943 rt_timer_queue_create(icmp_redirtimeout); 944 } 945 946 return (0); 947 } 948 949 SYSCTL_SETUP(sysctl_net_inet_icmp_setup, "sysctl net.inet.icmp subtree setup") 950 { 951 952 sysctl_createv(clog, 0, NULL, NULL, 953 CTLFLAG_PERMANENT, 954 CTLTYPE_NODE, "net", NULL, 955 NULL, 0, NULL, 0, 956 CTL_NET, CTL_EOL); 957 sysctl_createv(clog, 0, NULL, NULL, 958 CTLFLAG_PERMANENT, 959 CTLTYPE_NODE, "inet", NULL, 960 NULL, 0, NULL, 0, 961 CTL_NET, PF_INET, CTL_EOL); 962 sysctl_createv(clog, 0, NULL, NULL, 963 CTLFLAG_PERMANENT, 964 CTLTYPE_NODE, "icmp", 965 SYSCTL_DESCR("ICMPv4 related settings"), 966 NULL, 0, NULL, 0, 967 CTL_NET, PF_INET, IPPROTO_ICMP, CTL_EOL); 968 969 sysctl_createv(clog, 0, NULL, NULL, 970 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 971 CTLTYPE_INT, "maskrepl", 972 SYSCTL_DESCR("Respond to ICMP_MASKREQ messages"), 973 NULL, 0, &icmpmaskrepl, 0, 974 CTL_NET, PF_INET, IPPROTO_ICMP, 975 ICMPCTL_MASKREPL, CTL_EOL); 976 sysctl_createv(clog, 0, NULL, NULL, 977 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 978 CTLTYPE_INT, "returndatabytes", 979 SYSCTL_DESCR("Number of bytes to return in an ICMP " 980 "error message"), 981 sysctl_net_inet_icmp_returndatabytes, 0, 982 &icmpreturndatabytes, 0, 983 CTL_NET, PF_INET, IPPROTO_ICMP, 984 ICMPCTL_RETURNDATABYTES, CTL_EOL); 985 sysctl_createv(clog, 0, NULL, NULL, 986 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 987 CTLTYPE_INT, "errppslimit", 988 SYSCTL_DESCR("Maximum number of outgoing ICMP error " 989 "messages per second"), 990 NULL, 0, &icmperrppslim, 0, 991 CTL_NET, PF_INET, IPPROTO_ICMP, 992 ICMPCTL_ERRPPSLIMIT, CTL_EOL); 993 sysctl_createv(clog, 0, NULL, NULL, 994 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 995 CTLTYPE_INT, "rediraccept", 996 SYSCTL_DESCR("Accept ICMP_REDIRECT messages"), 997 NULL, 0, &icmp_rediraccept, 0, 998 CTL_NET, PF_INET, IPPROTO_ICMP, 999 ICMPCTL_REDIRACCEPT, CTL_EOL); 1000 sysctl_createv(clog, 0, NULL, NULL, 1001 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1002 CTLTYPE_INT, "redirtimeout", 1003 SYSCTL_DESCR("Lifetime of ICMP_REDIRECT generated " 1004 "routes"), 1005 sysctl_net_inet_icmp_redirtimeout, 0, 1006 &icmp_redirtimeout, 0, 1007 CTL_NET, PF_INET, IPPROTO_ICMP, 1008 ICMPCTL_REDIRTIMEOUT, CTL_EOL); 1009 } 1010 1011 /* Table of common MTUs: */ 1012 1013 static const u_int mtu_table[] = { 1014 65535, 65280, 32000, 17914, 9180, 8166, 1015 4352, 2002, 1492, 1006, 508, 296, 68, 0 1016 }; 1017 1018 void 1019 icmp_mtudisc(struct icmp *icp, struct in_addr faddr) 1020 { 1021 struct icmp_mtudisc_callback *mc; 1022 struct sockaddr *dst = sintosa(&icmpsrc); 1023 struct rtentry *rt; 1024 u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */ 1025 int error; 1026 1027 rt = rtalloc1(dst, 1); 1028 if (rt == 0) 1029 return; 1030 1031 /* If we didn't get a host route, allocate one */ 1032 1033 if ((rt->rt_flags & RTF_HOST) == 0) { 1034 struct rtentry *nrt; 1035 1036 error = rtrequest((int) RTM_ADD, dst, 1037 (struct sockaddr *) rt->rt_gateway, 1038 (struct sockaddr *) 0, 1039 RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt); 1040 if (error) { 1041 rtfree(rt); 1042 return; 1043 } 1044 nrt->rt_rmx = rt->rt_rmx; 1045 rtfree(rt); 1046 rt = nrt; 1047 } 1048 error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q); 1049 if (error) { 1050 rtfree(rt); 1051 return; 1052 } 1053 1054 if (mtu == 0) { 1055 int i = 0; 1056 1057 mtu = ntohs(icp->icmp_ip.ip_len); 1058 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */ 1059 if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0) 1060 mtu -= (icp->icmp_ip.ip_hl << 2); 1061 1062 /* If we still can't guess a value, try the route */ 1063 1064 if (mtu == 0) { 1065 mtu = rt->rt_rmx.rmx_mtu; 1066 1067 /* If no route mtu, default to the interface mtu */ 1068 1069 if (mtu == 0) 1070 mtu = rt->rt_ifp->if_mtu; 1071 } 1072 1073 for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++) 1074 if (mtu > mtu_table[i]) { 1075 mtu = mtu_table[i]; 1076 break; 1077 } 1078 } 1079 1080 /* 1081 * XXX: RTV_MTU is overloaded, since the admin can set it 1082 * to turn off PMTU for a route, and the kernel can 1083 * set it to indicate a serious problem with PMTU 1084 * on a route. We should be using a separate flag 1085 * for the kernel to indicate this. 1086 */ 1087 1088 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) { 1089 if (mtu < 296 || mtu > rt->rt_ifp->if_mtu) 1090 rt->rt_rmx.rmx_locks |= RTV_MTU; 1091 else if (rt->rt_rmx.rmx_mtu > mtu || 1092 rt->rt_rmx.rmx_mtu == 0) { 1093 icmpstat.icps_pmtuchg++; 1094 rt->rt_rmx.rmx_mtu = mtu; 1095 } 1096 } 1097 1098 if (rt) 1099 rtfree(rt); 1100 1101 /* 1102 * Notify protocols that the MTU for this destination 1103 * has changed. 1104 */ 1105 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL; 1106 mc = LIST_NEXT(mc, mc_list)) 1107 (*mc->mc_func)(faddr); 1108 } 1109 1110 /* 1111 * Return the next larger or smaller MTU plateau (table from RFC 1191) 1112 * given current value MTU. If DIR is less than zero, a larger plateau 1113 * is returned; otherwise, a smaller value is returned. 1114 */ 1115 int 1116 ip_next_mtu(int mtu, int dir) /* XXX */ 1117 { 1118 int i; 1119 1120 for (i = 0; i < (sizeof mtu_table) / (sizeof mtu_table[0]); i++) { 1121 if (mtu >= mtu_table[i]) 1122 break; 1123 } 1124 1125 if (dir < 0) { 1126 if (i == 0) { 1127 return 0; 1128 } else { 1129 return mtu_table[i - 1]; 1130 } 1131 } else { 1132 if (mtu_table[i] == 0) { 1133 return 0; 1134 } else if (mtu > mtu_table[i]) { 1135 return mtu_table[i]; 1136 } else { 1137 return mtu_table[i + 1]; 1138 } 1139 } 1140 } 1141 1142 static void 1143 icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer *r) 1144 { 1145 if (rt == NULL) 1146 panic("icmp_mtudisc_timeout: bad route to timeout"); 1147 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) == 1148 (RTF_DYNAMIC | RTF_HOST)) { 1149 rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt), 1150 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); 1151 } else { 1152 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) { 1153 rt->rt_rmx.rmx_mtu = 0; 1154 } 1155 } 1156 } 1157 1158 static void 1159 icmp_redirect_timeout(struct rtentry *rt, struct rttimer *r) 1160 { 1161 if (rt == NULL) 1162 panic("icmp_redirect_timeout: bad route to timeout"); 1163 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) == 1164 (RTF_DYNAMIC | RTF_HOST)) { 1165 rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt), 1166 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); 1167 } 1168 } 1169 1170 /* 1171 * Perform rate limit check. 1172 * Returns 0 if it is okay to send the icmp packet. 1173 * Returns 1 if the router SHOULD NOT send this icmp packet due to rate 1174 * limitation. 1175 * 1176 * XXX per-destination/type check necessary? 1177 */ 1178 /* "type" and "code" are not used at this moment */ 1179 static int 1180 icmp_ratelimit(const struct in_addr *dst, const int type, const int code) 1181 { 1182 1183 /* PPS limit */ 1184 if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count, 1185 icmperrppslim)) { 1186 /* The packet is subject to rate limit */ 1187 return 1; 1188 } 1189 1190 /* okay to send */ 1191 return 0; 1192 } 1193