1 /* $OpenBSD: ip_icmp.c,v 1.198 2025/01/03 21:27:40 bluhm Exp $ */ 2 /* $NetBSD: ip_icmp.c,v 1.19 1996/02/13 23:42:22 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include "carp.h" 72 #include "pf.h" 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/mbuf.h> 77 #include <sys/protosw.h> 78 #include <sys/socket.h> 79 #include <sys/sysctl.h> 80 81 #include <net/if.h> 82 #include <net/if_var.h> 83 #include <net/route.h> 84 85 #include <netinet/in.h> 86 #include <netinet/in_systm.h> 87 #include <netinet/in_var.h> 88 #include <netinet/ip.h> 89 #include <netinet/ip_icmp.h> 90 #include <netinet/ip_var.h> 91 #include <netinet/icmp_var.h> 92 93 #if NCARP > 0 94 #include <net/if_types.h> 95 #include <netinet/ip_carp.h> 96 #endif 97 98 #if NPF > 0 99 #include <net/pfvar.h> 100 #endif 101 102 /* 103 * ICMP routines: error generation, receive packet processing, and 104 * routines to turnaround packets back to the originator, and 105 * host table maintenance routines. 106 */ 107 108 /* 109 * Locks used to protect data: 110 * a atomic 111 */ 112 113 #ifdef ICMPPRINTFS 114 int icmpprintfs = 0; /* Settable from ddb */ 115 #endif 116 117 /* values controllable via sysctl */ 118 int icmpmaskrepl = 0; /* [a] */ 119 int icmpbmcastecho = 0; /* [a] */ 120 int icmptstamprepl = 1; /* [a] */ 121 int icmperrppslim = 100; /* [a] */ 122 int icmp_rediraccept = 0; /* [a] */ 123 int icmp_redirtimeout = 10 * 60; 124 125 static int icmperrpps_count = 0; 126 static struct timeval icmperrppslim_last; 127 128 struct rttimer_queue ip_mtudisc_timeout_q; 129 struct rttimer_queue icmp_redirect_timeout_q; 130 struct cpumem *icmpcounters; 131 132 const struct sysctl_bounded_args icmpctl_vars[] = { 133 { ICMPCTL_MASKREPL, &icmpmaskrepl, 0, 1 }, 134 { ICMPCTL_BMCASTECHO, &icmpbmcastecho, 0, 1 }, 135 { ICMPCTL_ERRPPSLIMIT, &icmperrppslim, -1, INT_MAX }, 136 { ICMPCTL_REDIRACCEPT, &icmp_rediraccept, 0, 1 }, 137 { ICMPCTL_TSTAMPREPL, &icmptstamprepl, 0, 1 }, 138 }; 139 140 141 void icmp_mtudisc_timeout(struct rtentry *, u_int); 142 int icmp_ratelimit(const struct in_addr *, const int, const int); 143 int icmp_input_if(struct ifnet *, struct mbuf **, int *, int, int); 144 int icmp_sysctl_icmpstat(void *, size_t *, void *); 145 146 void 147 icmp_init(void) 148 { 149 rt_timer_queue_init(&ip_mtudisc_timeout_q, ip_mtudisc_timeout, 150 &icmp_mtudisc_timeout); 151 rt_timer_queue_init(&icmp_redirect_timeout_q, icmp_redirtimeout, 152 NULL); 153 icmpcounters = counters_alloc(icps_ncounters); 154 } 155 156 struct mbuf * 157 icmp_do_error(struct mbuf *n, int type, int code, u_int32_t dest, int destmtu) 158 { 159 struct ip *oip = mtod(n, struct ip *), *nip; 160 unsigned oiplen = oip->ip_hl << 2; 161 struct icmp *icp; 162 struct mbuf *m; 163 unsigned icmplen, mblen; 164 165 #ifdef ICMPPRINTFS 166 if (icmpprintfs) 167 printf("icmp_error(%x, %d, %d)\n", oip, type, code); 168 #endif 169 if (type != ICMP_REDIRECT) 170 icmpstat_inc(icps_error); 171 /* 172 * Don't send error if not the first fragment of message. 173 * Don't error if the old packet protocol was ICMP 174 * error message, only known informational types. 175 */ 176 if (oip->ip_off & htons(IP_OFFMASK)) 177 goto freeit; 178 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT && 179 n->m_len >= oiplen + ICMP_MINLEN && 180 !ICMP_INFOTYPE(((struct icmp *) 181 ((caddr_t)oip + oiplen))->icmp_type)) { 182 icmpstat_inc(icps_oldicmp); 183 goto freeit; 184 } 185 /* Don't send error in response to a multicast or broadcast packet */ 186 if (n->m_flags & (M_BCAST|M_MCAST)) 187 goto freeit; 188 189 /* 190 * First, do a rate limitation check. 191 */ 192 if (icmp_ratelimit(&oip->ip_src, type, code)) { 193 icmpstat_inc(icps_toofreq); 194 goto freeit; 195 } 196 197 /* 198 * Now, formulate icmp message 199 */ 200 icmplen = oiplen + min(8, ntohs(oip->ip_len)); 201 /* 202 * Defend against mbuf chains shorter than oip->ip_len: 203 */ 204 mblen = 0; 205 for (m = n; m && (mblen < icmplen); m = m->m_next) 206 mblen += m->m_len; 207 icmplen = min(mblen, icmplen); 208 209 /* 210 * As we are not required to return everything we have, 211 * we return whatever we can return at ease. 212 * 213 * Note that ICMP datagrams longer than 576 octets are out of spec 214 * according to RFC1812; 215 */ 216 217 KASSERT(ICMP_MINLEN + sizeof (struct ip) <= MCLBYTES); 218 219 if (sizeof (struct ip) + icmplen + ICMP_MINLEN > MCLBYTES) 220 icmplen = MCLBYTES - ICMP_MINLEN - sizeof (struct ip); 221 222 m = m_gethdr(M_DONTWAIT, MT_HEADER); 223 if (m && ((sizeof (struct ip) + icmplen + ICMP_MINLEN + 224 sizeof(long) - 1) &~ (sizeof(long) - 1)) > MHLEN) { 225 MCLGET(m, M_DONTWAIT); 226 if ((m->m_flags & M_EXT) == 0) { 227 m_freem(m); 228 m = NULL; 229 } 230 } 231 if (m == NULL) 232 goto freeit; 233 /* keep in same rtable and preserve other pkthdr bits */ 234 m->m_pkthdr.ph_rtableid = n->m_pkthdr.ph_rtableid; 235 m->m_pkthdr.ph_ifidx = n->m_pkthdr.ph_ifidx; 236 /* move PF_GENERATED to new packet, if existent XXX preserve more? */ 237 if (n->m_pkthdr.pf.flags & PF_TAG_GENERATED) 238 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 239 m->m_pkthdr.len = m->m_len = icmplen + ICMP_MINLEN; 240 m_align(m, m->m_len); 241 icp = mtod(m, struct icmp *); 242 if ((u_int)type > ICMP_MAXTYPE) 243 panic("icmp_error"); 244 icmpstat_inc(icps_outhist + type); 245 icp->icmp_type = type; 246 if (type == ICMP_REDIRECT) 247 icp->icmp_gwaddr.s_addr = dest; 248 else { 249 icp->icmp_void = 0; 250 /* 251 * The following assignments assume an overlay with the 252 * zeroed icmp_void field. 253 */ 254 if (type == ICMP_PARAMPROB) { 255 icp->icmp_pptr = code; 256 code = 0; 257 } else if (type == ICMP_UNREACH && 258 code == ICMP_UNREACH_NEEDFRAG && destmtu) 259 icp->icmp_nextmtu = htons(destmtu); 260 } 261 262 icp->icmp_code = code; 263 m_copydata(n, 0, icmplen, &icp->icmp_ip); 264 265 /* 266 * Now, copy old ip header (without options) 267 * in front of icmp message. 268 */ 269 m = m_prepend(m, sizeof(struct ip), M_DONTWAIT); 270 if (m == NULL) 271 goto freeit; 272 nip = mtod(m, struct ip *); 273 /* ip_v set in ip_output */ 274 nip->ip_hl = sizeof(struct ip) >> 2; 275 nip->ip_tos = 0; 276 nip->ip_len = htons(m->m_len); 277 /* ip_id set in ip_output */ 278 nip->ip_off = 0; 279 /* ip_ttl set in icmp_reflect */ 280 nip->ip_p = IPPROTO_ICMP; 281 nip->ip_src = oip->ip_src; 282 nip->ip_dst = oip->ip_dst; 283 284 m_freem(n); 285 return (m); 286 287 freeit: 288 m_freem(n); 289 return (NULL); 290 } 291 292 /* 293 * Generate an error packet of type error 294 * in response to bad packet ip. 295 * 296 * The ip packet inside has ip_off and ip_len in host byte order. 297 */ 298 void 299 icmp_error(struct mbuf *n, int type, int code, u_int32_t dest, int destmtu) 300 { 301 struct mbuf *m; 302 303 m = icmp_do_error(n, type, code, dest, destmtu); 304 if (m != NULL) 305 if (!icmp_reflect(m, NULL, NULL)) 306 icmp_send(m, NULL); 307 } 308 309 /* 310 * Process a received ICMP message. 311 */ 312 int 313 icmp_input(struct mbuf **mp, int *offp, int proto, int af) 314 { 315 struct ifnet *ifp; 316 317 ifp = if_get((*mp)->m_pkthdr.ph_ifidx); 318 if (ifp == NULL) { 319 m_freemp(mp); 320 return IPPROTO_DONE; 321 } 322 323 proto = icmp_input_if(ifp, mp, offp, proto, af); 324 if_put(ifp); 325 return proto; 326 } 327 328 int 329 icmp_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto, int af) 330 { 331 struct mbuf *m = *mp; 332 int hlen = *offp; 333 struct icmp *icp; 334 struct ip *ip = mtod(m, struct ip *); 335 struct sockaddr_in sin; 336 int icmplen, i, code; 337 struct in_ifaddr *ia; 338 void (*ctlfunc)(int, struct sockaddr *, u_int, void *); 339 struct mbuf *opts; 340 341 /* 342 * Locate icmp structure in mbuf, and check 343 * that not corrupted and of at least minimum length. 344 */ 345 icmplen = ntohs(ip->ip_len) - hlen; 346 #ifdef ICMPPRINTFS 347 if (icmpprintfs) { 348 char dst[INET_ADDRSTRLEN], src[INET_ADDRSTRLEN]; 349 350 inet_ntop(AF_INET, &ip->ip_dst, dst, sizeof(dst)); 351 inet_ntop(AF_INET, &ip->ip_src, src, sizeof(src)); 352 353 printf("icmp_input from %s to %s, len %d\n", src, dst, icmplen); 354 } 355 #endif 356 if (icmplen < ICMP_MINLEN) { 357 icmpstat_inc(icps_tooshort); 358 goto freeit; 359 } 360 i = hlen + min(icmplen, ICMP_ADVLENMAX); 361 if ((m = *mp = m_pullup(m, i)) == NULL) { 362 icmpstat_inc(icps_tooshort); 363 return IPPROTO_DONE; 364 } 365 ip = mtod(m, struct ip *); 366 if (in4_cksum(m, 0, hlen, icmplen)) { 367 icmpstat_inc(icps_checksum); 368 goto freeit; 369 } 370 371 icp = (struct icmp *)(mtod(m, caddr_t) + hlen); 372 #ifdef ICMPPRINTFS 373 /* 374 * Message type specific processing. 375 */ 376 if (icmpprintfs) 377 printf("icmp_input, type %d code %d\n", icp->icmp_type, 378 icp->icmp_code); 379 #endif 380 if (icp->icmp_type > ICMP_MAXTYPE) 381 goto raw; 382 #if NPF > 0 383 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 384 switch (icp->icmp_type) { 385 /* 386 * As pf_icmp_mapping() considers redirects belonging to a 387 * diverted connection, we must include it here. 388 */ 389 case ICMP_REDIRECT: 390 /* FALLTHROUGH */ 391 /* 392 * These ICMP types map to other connections. They must be 393 * delivered to pr_ctlinput() also for diverted connections. 394 */ 395 case ICMP_UNREACH: 396 case ICMP_TIMXCEED: 397 case ICMP_PARAMPROB: 398 case ICMP_SOURCEQUENCH: 399 /* 400 * Do not use the divert-to property of the TCP or UDP 401 * rule when doing the PCB lookup for the raw socket. 402 */ 403 m->m_pkthdr.pf.flags &=~ PF_TAG_DIVERTED; 404 break; 405 default: 406 goto raw; 407 } 408 } 409 #endif /* NPF */ 410 icmpstat_inc(icps_inhist + icp->icmp_type); 411 code = icp->icmp_code; 412 switch (icp->icmp_type) { 413 414 case ICMP_UNREACH: 415 switch (code) { 416 case ICMP_UNREACH_NET: 417 case ICMP_UNREACH_HOST: 418 case ICMP_UNREACH_PROTOCOL: 419 case ICMP_UNREACH_PORT: 420 case ICMP_UNREACH_SRCFAIL: 421 code += PRC_UNREACH_NET; 422 break; 423 424 case ICMP_UNREACH_NEEDFRAG: 425 code = PRC_MSGSIZE; 426 break; 427 428 case ICMP_UNREACH_NET_UNKNOWN: 429 case ICMP_UNREACH_NET_PROHIB: 430 case ICMP_UNREACH_TOSNET: 431 code = PRC_UNREACH_NET; 432 break; 433 434 case ICMP_UNREACH_HOST_UNKNOWN: 435 case ICMP_UNREACH_ISOLATED: 436 case ICMP_UNREACH_HOST_PROHIB: 437 case ICMP_UNREACH_TOSHOST: 438 case ICMP_UNREACH_FILTER_PROHIB: 439 case ICMP_UNREACH_HOST_PRECEDENCE: 440 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 441 code = PRC_UNREACH_HOST; 442 break; 443 444 default: 445 goto badcode; 446 } 447 goto deliver; 448 449 case ICMP_TIMXCEED: 450 if (code > 1) 451 goto badcode; 452 code += PRC_TIMXCEED_INTRANS; 453 goto deliver; 454 455 case ICMP_PARAMPROB: 456 if (code > 1) 457 goto badcode; 458 code = PRC_PARAMPROB; 459 goto deliver; 460 461 case ICMP_SOURCEQUENCH: 462 if (code) 463 goto badcode; 464 code = PRC_QUENCH; 465 deliver: 466 /* 467 * Problem with datagram; advise higher level routines. 468 */ 469 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || 470 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { 471 icmpstat_inc(icps_badlen); 472 goto freeit; 473 } 474 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr)) 475 goto badcode; 476 #ifdef INET6 477 /* Get more contiguous data for a v6 in v4 ICMP message. */ 478 if (icp->icmp_ip.ip_p == IPPROTO_IPV6) { 479 if (icmplen < ICMP_V6ADVLENMIN || 480 icmplen < ICMP_V6ADVLEN(icp)) { 481 icmpstat_inc(icps_badlen); 482 goto freeit; 483 } 484 } 485 #endif /* INET6 */ 486 #ifdef ICMPPRINTFS 487 if (icmpprintfs) 488 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p); 489 #endif 490 memset(&sin, 0, sizeof(sin)); 491 sin.sin_family = AF_INET; 492 sin.sin_len = sizeof(struct sockaddr_in); 493 sin.sin_addr = icp->icmp_ip.ip_dst; 494 #if NCARP > 0 495 if (carp_lsdrop(ifp, m, AF_INET, &sin.sin_addr.s_addr, 496 &ip->ip_dst.s_addr, 1)) 497 goto freeit; 498 #endif 499 /* 500 * XXX if the packet contains [IPv4 AH TCP], we can't make a 501 * notification to TCP layer. 502 */ 503 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput; 504 if (ctlfunc) 505 (*ctlfunc)(code, sintosa(&sin), m->m_pkthdr.ph_rtableid, 506 &icp->icmp_ip); 507 break; 508 509 badcode: 510 icmpstat_inc(icps_badcode); 511 break; 512 513 case ICMP_ECHO: 514 if (atomic_load_int(&icmpbmcastecho) == 0 && 515 (m->m_flags & (M_MCAST | M_BCAST)) != 0) { 516 icmpstat_inc(icps_bmcastecho); 517 break; 518 } 519 icp->icmp_type = ICMP_ECHOREPLY; 520 goto reflect; 521 522 case ICMP_TSTAMP: 523 if (atomic_load_int(&icmptstamprepl) == 0) 524 break; 525 526 if (atomic_load_int(&icmpbmcastecho) == 0 && 527 (m->m_flags & (M_MCAST | M_BCAST)) != 0) { 528 icmpstat_inc(icps_bmcastecho); 529 break; 530 } 531 if (icmplen < ICMP_TSLEN) { 532 icmpstat_inc(icps_badlen); 533 break; 534 } 535 icp->icmp_type = ICMP_TSTAMPREPLY; 536 icp->icmp_rtime = iptime(); 537 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */ 538 goto reflect; 539 540 case ICMP_MASKREQ: 541 if (atomic_load_int(&icmpmaskrepl) == 0) 542 break; 543 if (icmplen < ICMP_MASKLEN) { 544 icmpstat_inc(icps_badlen); 545 break; 546 } 547 /* 548 * We are not able to respond with all ones broadcast 549 * unless we receive it over a point-to-point interface. 550 */ 551 memset(&sin, 0, sizeof(sin)); 552 sin.sin_family = AF_INET; 553 sin.sin_len = sizeof(struct sockaddr_in); 554 if (ip->ip_dst.s_addr == INADDR_BROADCAST || 555 ip->ip_dst.s_addr == INADDR_ANY) 556 sin.sin_addr = ip->ip_src; 557 else 558 sin.sin_addr = ip->ip_dst; 559 if (ifp == NULL) 560 break; 561 ia = ifatoia(ifaof_ifpforaddr(sintosa(&sin), ifp)); 562 if (ia == NULL) 563 break; 564 icp->icmp_type = ICMP_MASKREPLY; 565 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr; 566 if (ip->ip_src.s_addr == 0) { 567 if (ifp->if_flags & IFF_BROADCAST) { 568 if (ia->ia_broadaddr.sin_addr.s_addr) 569 ip->ip_src = ia->ia_broadaddr.sin_addr; 570 else 571 ip->ip_src.s_addr = INADDR_BROADCAST; 572 } 573 else if (ifp->if_flags & IFF_POINTOPOINT) 574 ip->ip_src = ia->ia_dstaddr.sin_addr; 575 } 576 reflect: 577 #if NCARP > 0 578 if (carp_lsdrop(ifp, m, AF_INET, &ip->ip_src.s_addr, 579 &ip->ip_dst.s_addr, 1)) 580 goto freeit; 581 #endif 582 icmpstat_inc(icps_reflect); 583 icmpstat_inc(icps_outhist + icp->icmp_type); 584 if (!icmp_reflect(m, &opts, NULL)) { 585 icmp_send(m, opts); 586 m_free(opts); 587 } 588 return IPPROTO_DONE; 589 590 case ICMP_REDIRECT: 591 { 592 struct sockaddr_in sdst; 593 struct sockaddr_in sgw; 594 struct sockaddr_in ssrc; 595 struct rtentry *newrt = NULL; 596 int i_am_router = (atomic_load_int(&ip_forwarding) != 0); 597 598 if (atomic_load_int(&icmp_rediraccept) == 0 || i_am_router) 599 goto freeit; 600 if (code > 3) 601 goto badcode; 602 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || 603 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { 604 icmpstat_inc(icps_badlen); 605 break; 606 } 607 /* 608 * Short circuit routing redirects to force 609 * immediate change in the kernel's routing 610 * tables. The message is also handed to anyone 611 * listening on a raw socket (e.g. the routing 612 * daemon for use in updating its tables). 613 */ 614 memset(&sdst, 0, sizeof(sdst)); 615 memset(&sgw, 0, sizeof(sgw)); 616 memset(&ssrc, 0, sizeof(ssrc)); 617 sdst.sin_family = sgw.sin_family = ssrc.sin_family = AF_INET; 618 sdst.sin_len = sgw.sin_len = ssrc.sin_len = sizeof(sdst); 619 memcpy(&sdst.sin_addr, &icp->icmp_ip.ip_dst, 620 sizeof(sdst.sin_addr)); 621 memcpy(&sgw.sin_addr, &icp->icmp_gwaddr, 622 sizeof(sgw.sin_addr)); 623 memcpy(&ssrc.sin_addr, &ip->ip_src, 624 sizeof(ssrc.sin_addr)); 625 626 #ifdef ICMPPRINTFS 627 if (icmpprintfs) { 628 char gw[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 629 630 inet_ntop(AF_INET, &icp->icmp_gwaddr, gw, sizeof(gw)); 631 inet_ntop(AF_INET, &icp->icmp_ip.ip_dst, 632 dst, sizeof(dst)); 633 634 printf("redirect dst %s to %s\n", dst, gw); 635 } 636 #endif 637 638 #if NCARP > 0 639 if (carp_lsdrop(ifp, m, AF_INET, &sdst.sin_addr.s_addr, 640 &ip->ip_dst.s_addr, 1)) 641 goto freeit; 642 #endif 643 rtredirect(sintosa(&sdst), sintosa(&sgw), 644 sintosa(&ssrc), &newrt, m->m_pkthdr.ph_rtableid); 645 if (newrt != NULL && icmp_redirtimeout > 0) { 646 rt_timer_add(newrt, &icmp_redirect_timeout_q, 647 m->m_pkthdr.ph_rtableid); 648 } 649 rtfree(newrt); 650 pfctlinput(PRC_REDIRECT_HOST, sintosa(&sdst)); 651 break; 652 } 653 /* 654 * No kernel processing for the following; 655 * just fall through to send to raw listener. 656 */ 657 case ICMP_ECHOREPLY: 658 case ICMP_ROUTERADVERT: 659 case ICMP_ROUTERSOLICIT: 660 case ICMP_TSTAMPREPLY: 661 case ICMP_IREQREPLY: 662 case ICMP_MASKREPLY: 663 case ICMP_TRACEROUTE: 664 case ICMP_DATACONVERR: 665 case ICMP_MOBILE_REDIRECT: 666 case ICMP_IPV6_WHEREAREYOU: 667 case ICMP_IPV6_IAMHERE: 668 case ICMP_MOBILE_REGREQUEST: 669 case ICMP_MOBILE_REGREPLY: 670 case ICMP_PHOTURIS: 671 default: 672 break; 673 } 674 675 raw: 676 return rip_input(mp, offp, proto, af); 677 678 freeit: 679 m_freem(m); 680 return IPPROTO_DONE; 681 } 682 683 /* 684 * Reflect the ip packet back to the source 685 */ 686 int 687 icmp_reflect(struct mbuf *m, struct mbuf **op, struct in_ifaddr *ia) 688 { 689 struct ip *ip = mtod(m, struct ip *); 690 struct mbuf *opts = NULL; 691 struct sockaddr_in sin; 692 struct rtentry *rt = NULL; 693 int optlen = (ip->ip_hl << 2) - sizeof(struct ip); 694 u_int rtableid; 695 u_int8_t pfflags; 696 697 if (!in_canforward(ip->ip_src) && 698 ((ip->ip_src.s_addr & IN_CLASSA_NET) != 699 htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) { 700 m_freem(m); /* Bad return address */ 701 return (EHOSTUNREACH); 702 } 703 704 if (m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) { 705 m_freem(m); 706 return (ELOOP); 707 } 708 rtableid = m->m_pkthdr.ph_rtableid; 709 pfflags = m->m_pkthdr.pf.flags; 710 m_resethdr(m); 711 m->m_pkthdr.ph_rtableid = rtableid; 712 m->m_pkthdr.pf.flags = pfflags & PF_TAG_GENERATED; 713 714 /* 715 * If the incoming packet was addressed directly to us, 716 * use dst as the src for the reply. For broadcast, use 717 * the address which corresponds to the incoming interface. 718 */ 719 if (ia == NULL) { 720 memset(&sin, 0, sizeof(sin)); 721 sin.sin_len = sizeof(sin); 722 sin.sin_family = AF_INET; 723 sin.sin_addr = ip->ip_dst; 724 725 rt = rtalloc(sintosa(&sin), 0, rtableid); 726 if (rtisvalid(rt) && 727 ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) 728 ia = ifatoia(rt->rt_ifa); 729 } 730 731 /* 732 * The following happens if the packet was not addressed to us. 733 * Use the new source address and do a route lookup. If it fails 734 * drop the packet as there is no path to the host. 735 */ 736 if (ia == NULL) { 737 rtfree(rt); 738 739 memset(&sin, 0, sizeof(sin)); 740 sin.sin_len = sizeof(sin); 741 sin.sin_family = AF_INET; 742 sin.sin_addr = ip->ip_src; 743 744 /* keep packet in the original virtual instance */ 745 rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid); 746 if (rt == NULL) { 747 ipstat_inc(ips_noroute); 748 m_freem(m); 749 return (EHOSTUNREACH); 750 } 751 752 ia = ifatoia(rt->rt_ifa); 753 } 754 755 ip->ip_dst = ip->ip_src; 756 ip->ip_ttl = MAXTTL; 757 758 /* It is safe to dereference ``ia'' iff ``rt'' is valid. */ 759 ip->ip_src = ia->ia_addr.sin_addr; 760 rtfree(rt); 761 762 if (optlen > 0) { 763 u_char *cp; 764 int opt, cnt; 765 u_int len; 766 767 /* 768 * Retrieve any source routing from the incoming packet; 769 * add on any record-route or timestamp options. 770 */ 771 cp = (u_char *) (ip + 1); 772 if (op && (opts = ip_srcroute(m)) == NULL && 773 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) { 774 opts->m_len = sizeof(struct in_addr); 775 mtod(opts, struct in_addr *)->s_addr = 0; 776 } 777 if (op && opts) { 778 #ifdef ICMPPRINTFS 779 if (icmpprintfs) 780 printf("icmp_reflect optlen %d rt %d => ", 781 optlen, opts->m_len); 782 #endif 783 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) { 784 opt = cp[IPOPT_OPTVAL]; 785 if (opt == IPOPT_EOL) 786 break; 787 if (opt == IPOPT_NOP) 788 len = 1; 789 else { 790 if (cnt < IPOPT_OLEN + sizeof(*cp)) 791 break; 792 len = cp[IPOPT_OLEN]; 793 if (len < IPOPT_OLEN + sizeof(*cp) || 794 len > cnt) 795 break; 796 } 797 /* 798 * Should check for overflow, but it 799 * "can't happen" 800 */ 801 if (opt == IPOPT_RR || opt == IPOPT_TS || 802 opt == IPOPT_SECURITY) { 803 memcpy(mtod(opts, caddr_t) + 804 opts->m_len, cp, len); 805 opts->m_len += len; 806 } 807 } 808 /* Terminate & pad, if necessary */ 809 if ((cnt = opts->m_len % 4) != 0) 810 for (; cnt < 4; cnt++) { 811 *(mtod(opts, caddr_t) + opts->m_len) = 812 IPOPT_EOL; 813 opts->m_len++; 814 } 815 #ifdef ICMPPRINTFS 816 if (icmpprintfs) 817 printf("%d\n", opts->m_len); 818 #endif 819 } 820 ip_stripoptions(m); 821 } 822 m->m_flags &= ~(M_BCAST|M_MCAST); 823 if (op) 824 *op = opts; 825 826 return (0); 827 } 828 829 /* 830 * Send an icmp packet back to the ip level 831 */ 832 void 833 icmp_send(struct mbuf *m, struct mbuf *opts) 834 { 835 struct ip *ip = mtod(m, struct ip *); 836 int hlen; 837 struct icmp *icp; 838 839 hlen = ip->ip_hl << 2; 840 icp = (struct icmp *)(mtod(m, caddr_t) + hlen); 841 icp->icmp_cksum = 0; 842 m->m_pkthdr.csum_flags = M_ICMP_CSUM_OUT; 843 #ifdef ICMPPRINTFS 844 if (icmpprintfs) { 845 char dst[INET_ADDRSTRLEN], src[INET_ADDRSTRLEN]; 846 847 inet_ntop(AF_INET, &ip->ip_dst, dst, sizeof(dst)); 848 inet_ntop(AF_INET, &ip->ip_src, src, sizeof(src)); 849 850 printf("icmp_send dst %s src %s\n", dst, src); 851 } 852 #endif 853 /* 854 * ip_send() cannot handle IP options properly. So in case we have 855 * options fill out the IP header here and use ip_send_raw() instead. 856 */ 857 if (opts != NULL) { 858 m = ip_insertoptions(m, opts, &hlen); 859 ip = mtod(m, struct ip *); 860 ip->ip_hl = (hlen >> 2); 861 ip->ip_v = IPVERSION; 862 ip->ip_off &= htons(IP_DF); 863 ip->ip_id = htons(ip_randomid()); 864 ipstat_inc(ips_localout); 865 ip_send_raw(m); 866 } else 867 ip_send(m); 868 } 869 870 u_int32_t 871 iptime(void) 872 { 873 struct timeval atv; 874 u_long t; 875 876 microtime(&atv); 877 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000; 878 return (htonl(t)); 879 } 880 881 int 882 icmp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 883 size_t newlen) 884 { 885 int error; 886 887 /* All sysctl names at this level are terminal. */ 888 if (namelen != 1) 889 return (ENOTDIR); 890 891 switch (name[0]) { 892 case ICMPCTL_REDIRTIMEOUT: { 893 size_t savelen = *oldlenp; 894 895 if ((error = sysctl_vslock(oldp, savelen))) 896 break; 897 NET_LOCK(); 898 error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, 899 &icmp_redirtimeout, 0, INT_MAX); 900 rt_timer_queue_change(&icmp_redirect_timeout_q, 901 icmp_redirtimeout); 902 NET_UNLOCK(); 903 sysctl_vsunlock(oldp, savelen); 904 break; 905 } 906 case ICMPCTL_STATS: 907 error = icmp_sysctl_icmpstat(oldp, oldlenp, newp); 908 break; 909 910 default: 911 error = sysctl_bounded_arr(icmpctl_vars, nitems(icmpctl_vars), 912 name, namelen, oldp, oldlenp, newp, newlen); 913 break; 914 } 915 916 return (error); 917 } 918 919 int 920 icmp_sysctl_icmpstat(void *oldp, size_t *oldlenp, void *newp) 921 { 922 uint64_t counters[icps_ncounters]; 923 struct icmpstat icmpstat; 924 u_long *words = (u_long *)&icmpstat; 925 int i; 926 927 CTASSERT(sizeof(icmpstat) == (nitems(counters) * sizeof(u_long))); 928 memset(&icmpstat, 0, sizeof icmpstat); 929 counters_read(icmpcounters, counters, nitems(counters), NULL); 930 931 for (i = 0; i < nitems(counters); i++) 932 words[i] = (u_long)counters[i]; 933 934 return (sysctl_rdstruct(oldp, oldlenp, newp, 935 &icmpstat, sizeof(icmpstat))); 936 } 937 938 struct rtentry * 939 icmp_mtudisc_clone(struct in_addr dst, u_int rtableid, int ipsec) 940 { 941 struct sockaddr_in sin; 942 struct rtentry *rt; 943 int error; 944 945 memset(&sin, 0, sizeof(sin)); 946 sin.sin_family = AF_INET; 947 sin.sin_len = sizeof(sin); 948 sin.sin_addr = dst; 949 950 rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid); 951 952 /* Check if the route is actually usable */ 953 if (!rtisvalid(rt)) 954 goto bad; 955 /* IPsec needs the route only for PMTU, it can use reject for that */ 956 if (!ipsec && (rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE))) 957 goto bad; 958 959 /* 960 * No PMTU for local routes and permanent neighbors, 961 * ARP and NDP use the same expire timer as the route. 962 */ 963 if (ISSET(rt->rt_flags, RTF_LOCAL) || 964 (ISSET(rt->rt_flags, RTF_LLINFO) && rt->rt_expire == 0)) 965 goto bad; 966 967 /* If we didn't get a host route, allocate one */ 968 if ((rt->rt_flags & RTF_HOST) == 0) { 969 struct rtentry *nrt; 970 struct rt_addrinfo info; 971 struct sockaddr_rtlabel sa_rl; 972 973 memset(&info, 0, sizeof(info)); 974 info.rti_ifa = rt->rt_ifa; 975 info.rti_flags = RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC; 976 info.rti_info[RTAX_DST] = sintosa(&sin); 977 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 978 info.rti_info[RTAX_LABEL] = 979 rtlabel_id2sa(rt->rt_labelid, &sa_rl); 980 981 error = rtrequest(RTM_ADD, &info, rt->rt_priority, &nrt, 982 rtableid); 983 if (error) 984 goto bad; 985 nrt->rt_rmx = rt->rt_rmx; 986 rtfree(rt); 987 rt = nrt; 988 rtm_send(rt, RTM_ADD, 0, rtableid); 989 } 990 error = rt_timer_add(rt, &ip_mtudisc_timeout_q, rtableid); 991 if (error) 992 goto bad; 993 994 return (rt); 995 bad: 996 rtfree(rt); 997 return (NULL); 998 } 999 1000 /* Table of common MTUs: */ 1001 static const u_short mtu_table[] = { 1002 65535, 65280, 32000, 17914, 9180, 8166, 1003 4352, 2002, 1492, 1006, 508, 296, 68, 0 1004 }; 1005 1006 void 1007 icmp_mtudisc(struct icmp *icp, u_int rtableid) 1008 { 1009 struct rtentry *rt; 1010 struct ifnet *ifp; 1011 u_int rtmtu; 1012 u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */ 1013 1014 rt = icmp_mtudisc_clone(icp->icmp_ip.ip_dst, rtableid, 0); 1015 if (rt == NULL) 1016 return; 1017 1018 ifp = if_get(rt->rt_ifidx); 1019 if (ifp == NULL) { 1020 rtfree(rt); 1021 return; 1022 } 1023 1024 rtmtu = atomic_load_int(&rt->rt_mtu); 1025 if (mtu == 0) { 1026 int i = 0; 1027 1028 mtu = ntohs(icp->icmp_ip.ip_len); 1029 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */ 1030 if (mtu > rtmtu && rtmtu != 0) 1031 mtu -= (icp->icmp_ip.ip_hl << 2); 1032 1033 /* If we still can't guess a value, try the route */ 1034 if (mtu == 0) { 1035 mtu = rtmtu; 1036 1037 /* If no route mtu, default to the interface mtu */ 1038 1039 if (mtu == 0) 1040 mtu = ifp->if_mtu; 1041 } 1042 1043 for (i = 0; i < nitems(mtu_table); i++) 1044 if (mtu > mtu_table[i]) { 1045 mtu = mtu_table[i]; 1046 break; 1047 } 1048 } 1049 1050 /* 1051 * XXX: RTV_MTU is overloaded, since the admin can set it 1052 * to turn off PMTU for a route, and the kernel can 1053 * set it to indicate a serious problem with PMTU 1054 * on a route. We should be using a separate flag 1055 * for the kernel to indicate this. 1056 */ 1057 if ((rt->rt_locks & RTV_MTU) == 0) { 1058 if (mtu < 296 || mtu > ifp->if_mtu) 1059 rt->rt_locks |= RTV_MTU; 1060 else if (rtmtu > mtu || rtmtu == 0) 1061 atomic_cas_uint(&rt->rt_mtu, rtmtu, mtu); 1062 } 1063 1064 if_put(ifp); 1065 rtfree(rt); 1066 } 1067 1068 void 1069 icmp_mtudisc_timeout(struct rtentry *rt, u_int rtableid) 1070 { 1071 struct ifnet *ifp; 1072 1073 NET_ASSERT_LOCKED(); 1074 1075 ifp = if_get(rt->rt_ifidx); 1076 if (ifp == NULL) 1077 return; 1078 1079 if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) { 1080 void (*ctlfunc)(int, struct sockaddr *, u_int, void *); 1081 struct sockaddr_in sin; 1082 1083 sin = *satosin(rt_key(rt)); 1084 1085 rtdeletemsg(rt, ifp, rtableid); 1086 1087 /* Notify TCP layer of increased Path MTU estimate */ 1088 ctlfunc = inetsw[ip_protox[IPPROTO_TCP]].pr_ctlinput; 1089 if (ctlfunc) 1090 (*ctlfunc)(PRC_MTUINC, sintosa(&sin), 1091 rtableid, NULL); 1092 } else { 1093 if ((rt->rt_locks & RTV_MTU) == 0) 1094 atomic_store_int(&rt->rt_mtu, 0); 1095 } 1096 1097 if_put(ifp); 1098 } 1099 1100 /* 1101 * Perform rate limit check. 1102 * Returns 0 if it is okay to send the icmp packet. 1103 * Returns 1 if the router SHOULD NOT send this icmp packet due to rate 1104 * limitation. 1105 * 1106 * XXX per-destination/type check necessary? 1107 */ 1108 int 1109 icmp_ratelimit(const struct in_addr *dst, const int type, const int code) 1110 { 1111 int icmperrppslim_local = atomic_load_int(&icmperrppslim); 1112 /* PPS limit */ 1113 if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count, 1114 icmperrppslim_local)) 1115 return 1; /* The packet is subject to rate limit */ 1116 return 0; /* okay to send */ 1117 } 1118 1119 int 1120 icmp_do_exthdr(struct mbuf *m, u_int16_t class, u_int8_t ctype, void *buf, 1121 size_t len) 1122 { 1123 struct ip *ip = mtod(m, struct ip *); 1124 int hlen, off; 1125 struct mbuf *n; 1126 struct icmp *icp; 1127 struct icmp_ext_hdr *ieh; 1128 struct { 1129 struct icmp_ext_hdr ieh; 1130 struct icmp_ext_obj_hdr ieo; 1131 } hdr; 1132 1133 hlen = ip->ip_hl << 2; 1134 icp = (struct icmp *)(mtod(m, caddr_t) + hlen); 1135 if (icp->icmp_type != ICMP_TIMXCEED && icp->icmp_type != ICMP_UNREACH && 1136 icp->icmp_type != ICMP_PARAMPROB) 1137 /* exthdr not supported */ 1138 return (0); 1139 1140 if (icp->icmp_length != 0) 1141 /* exthdr already present, giving up */ 1142 return (0); 1143 1144 /* the actual offset starts after the common ICMP header */ 1145 hlen += ICMP_MINLEN; 1146 /* exthdr must start on a word boundary */ 1147 off = roundup(ntohs(ip->ip_len) - hlen, sizeof(u_int32_t)); 1148 /* ... and at an offset of ICMP_EXT_OFFSET or bigger */ 1149 off = max(off, ICMP_EXT_OFFSET); 1150 icp->icmp_length = off / sizeof(u_int32_t); 1151 1152 memset(&hdr, 0, sizeof(hdr)); 1153 hdr.ieh.ieh_version = ICMP_EXT_HDR_VERSION; 1154 hdr.ieo.ieo_length = htons(sizeof(struct icmp_ext_obj_hdr) + len); 1155 hdr.ieo.ieo_cnum = class; 1156 hdr.ieo.ieo_ctype = ctype; 1157 1158 if (m_copyback(m, hlen + off, sizeof(hdr), &hdr, M_NOWAIT) || 1159 m_copyback(m, hlen + off + sizeof(hdr), len, buf, M_NOWAIT)) { 1160 m_freem(m); 1161 return (ENOBUFS); 1162 } 1163 1164 /* calculate checksum */ 1165 n = m_getptr(m, hlen + off, &off); 1166 if (n == NULL) 1167 panic("icmp_do_exthdr: m_getptr failure"); 1168 ieh = (struct icmp_ext_hdr *)(mtod(n, caddr_t) + off); 1169 ieh->ieh_cksum = in4_cksum(n, 0, off, sizeof(hdr) + len); 1170 1171 ip->ip_len = htons(m->m_pkthdr.len); 1172 1173 return (0); 1174 } 1175