1 /* $OpenBSD: ip_ipip.c,v 1.105 2024/08/22 10:58:31 mvs Exp $ */ 2 /* 3 * The authors of this code are John Ioannidis (ji@tla.org), 4 * Angelos D. Keromytis (kermit@csd.uch.gr) and 5 * Niels Provos (provos@physnet.uni-hamburg.de). 6 * 7 * The original version of this code was written by John Ioannidis 8 * for BSD/OS in Athens, Greece, in November 1995. 9 * 10 * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, 11 * by Angelos D. Keromytis. 12 * 13 * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis 14 * and Niels Provos. 15 * 16 * Additional features in 1999 by Angelos D. Keromytis. 17 * 18 * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis, 19 * Angelos D. Keromytis and Niels Provos. 20 * Copyright (c) 2001, Angelos D. Keromytis. 21 * 22 * Permission to use, copy, and modify this software with or without fee 23 * is hereby granted, provided that this entire notice is included in 24 * all copies of any software which is or includes a copy or 25 * modification of this software. 26 * You may use this code under the GNU public license if you so wish. Please 27 * contribute changes back to the authors under this freer than GPL license 28 * so that we may further the use of strong encryption without limitations to 29 * all. 30 * 31 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR 32 * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY 33 * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE 34 * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR 35 * PURPOSE. 36 */ 37 38 /* 39 * IP-inside-IP processing 40 */ 41 42 #include "bpfilter.h" 43 #include "gif.h" 44 #include "pf.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/mbuf.h> 49 #include <sys/socket.h> 50 #include <sys/sysctl.h> 51 52 #include <net/if.h> 53 #include <net/if_types.h> 54 #include <net/if_var.h> 55 #include <net/route.h> 56 #include <net/netisr.h> 57 #include <net/bpf.h> 58 59 #include <netinet/in.h> 60 #include <netinet/ip.h> 61 #include <netinet/in_pcb.h> 62 #include <netinet/ip_var.h> 63 #include <netinet6/ip6_var.h> 64 #include <netinet/ip_ecn.h> 65 #include <netinet/ip_ipip.h> 66 67 #ifdef MROUTING 68 #include <netinet/ip_mroute.h> 69 #endif 70 71 #if NPF > 0 72 #include <net/pfvar.h> 73 #endif 74 75 /* 76 * Locks used to protect data: 77 * a atomic 78 */ 79 80 #ifdef ENCDEBUG 81 #define DPRINTF(fmt, args...) \ 82 do { \ 83 if (encdebug) \ 84 printf("%s: " fmt "\n", __func__, ## args); \ 85 } while (0) 86 #else 87 #define DPRINTF(fmt, args...) \ 88 do { } while (0) 89 #endif 90 91 /* 92 * We can control the acceptance of IP4 packets by altering the sysctl 93 * net.inet.ipip.allow value. Zero means drop them, all else is acceptance. 94 */ 95 int ipip_allow = 0; /* [a] */ 96 97 struct cpumem *ipipcounters; 98 99 void 100 ipip_init(void) 101 { 102 ipipcounters = counters_alloc(ipips_ncounters); 103 } 104 105 /* 106 * Really only a wrapper for ipip_input_if(), for use with pr_input. 107 */ 108 int 109 ipip_input(struct mbuf **mp, int *offp, int nxt, int af) 110 { 111 struct ifnet *ifp; 112 int ipip_allow_local = atomic_load_int(&ipip_allow); 113 114 /* If we do not accept IP-in-IP explicitly, drop. */ 115 if (ipip_allow_local == 0 && ((*mp)->m_flags & (M_AUTH|M_CONF)) == 0) { 116 DPRINTF("dropped due to policy"); 117 ipipstat_inc(ipips_pdrops); 118 m_freemp(mp); 119 return IPPROTO_DONE; 120 } 121 122 ifp = if_get((*mp)->m_pkthdr.ph_ifidx); 123 if (ifp == NULL) { 124 m_freemp(mp); 125 return IPPROTO_DONE; 126 } 127 nxt = ipip_input_if(mp, offp, nxt, af, ipip_allow_local, ifp); 128 if_put(ifp); 129 130 return nxt; 131 } 132 133 /* 134 * ipip_input gets called when we receive an IP{46} encapsulated packet, 135 * either because we got it at a real interface, or because AH or ESP 136 * were being used in tunnel mode (in which case the ph_ifidx element 137 * will contain the index of the encX interface associated with the 138 * tunnel. 139 */ 140 141 int 142 ipip_input_if(struct mbuf **mp, int *offp, int proto, int oaf, int allow, 143 struct ifnet *ifp) 144 { 145 struct mbuf *m = *mp; 146 struct sockaddr_in *sin; 147 struct ip *ip; 148 #ifdef INET6 149 struct sockaddr_in6 *sin6; 150 struct ip6_hdr *ip6; 151 #endif 152 int mode, hlen; 153 u_int8_t itos, otos; 154 sa_family_t iaf; 155 156 ipipstat_inc(ipips_ipackets); 157 158 switch (oaf) { 159 case AF_INET: 160 hlen = sizeof(struct ip); 161 break; 162 #ifdef INET6 163 case AF_INET6: 164 hlen = sizeof(struct ip6_hdr); 165 break; 166 #endif 167 default: 168 unhandled_af(oaf); 169 } 170 171 /* Bring the IP header in the first mbuf, if not there already */ 172 if (m->m_len < hlen) { 173 if ((m = *mp = m_pullup(m, hlen)) == NULL) { 174 DPRINTF("m_pullup() failed"); 175 ipipstat_inc(ipips_hdrops); 176 goto bad; 177 } 178 } 179 180 /* Keep outer ecn field. */ 181 switch (oaf) { 182 case AF_INET: 183 ip = mtod(m, struct ip *); 184 otos = ip->ip_tos; 185 break; 186 #ifdef INET6 187 case AF_INET6: 188 ip6 = mtod(m, struct ip6_hdr *); 189 otos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 190 break; 191 #endif 192 } 193 194 /* Remove outer IP header */ 195 KASSERT(*offp > 0); 196 m_adj(m, *offp); 197 *offp = 0; 198 ip = NULL; 199 #ifdef INET6 200 ip6 = NULL; 201 #endif 202 203 switch (proto) { 204 case IPPROTO_IPV4: 205 hlen = sizeof(struct ip); 206 break; 207 208 #ifdef INET6 209 case IPPROTO_IPV6: 210 hlen = sizeof(struct ip6_hdr); 211 break; 212 #endif 213 default: 214 ipipstat_inc(ipips_family); 215 goto bad; 216 } 217 218 /* Sanity check */ 219 if (m->m_pkthdr.len < hlen) { 220 ipipstat_inc(ipips_hdrops); 221 goto bad; 222 } 223 224 /* 225 * Bring the inner header into the first mbuf, if not there already. 226 */ 227 if (m->m_len < hlen) { 228 if ((m = *mp = m_pullup(m, hlen)) == NULL) { 229 DPRINTF("m_pullup() failed"); 230 ipipstat_inc(ipips_hdrops); 231 goto bad; 232 } 233 } 234 235 /* 236 * RFC 1853 specifies that the inner TTL should not be touched on 237 * decapsulation. There's no reason this comment should be here, but 238 * this is as good as any a position. 239 */ 240 241 /* Some sanity checks in the inner IP header */ 242 switch (proto) { 243 case IPPROTO_IPV4: 244 iaf = AF_INET; 245 ip = mtod(m, struct ip *); 246 hlen = ip->ip_hl << 2; 247 if (m->m_pkthdr.len < hlen) { 248 ipipstat_inc(ipips_hdrops); 249 goto bad; 250 } 251 itos = ip->ip_tos; 252 mode = m->m_flags & (M_AUTH|M_CONF) ? 253 ECN_ALLOWED_IPSEC : ECN_ALLOWED; 254 if (!ip_ecn_egress(mode, &otos, &itos)) { 255 DPRINTF("ip_ecn_egress() failed"); 256 ipipstat_inc(ipips_pdrops); 257 goto bad; 258 } 259 /* re-calculate the checksum if ip_tos was changed */ 260 if (itos != ip->ip_tos) 261 ip_tos_patch(ip, itos); 262 break; 263 #ifdef INET6 264 case IPPROTO_IPV6: 265 iaf = AF_INET6; 266 ip6 = mtod(m, struct ip6_hdr *); 267 itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 268 if (!ip_ecn_egress(ECN_ALLOWED, &otos, &itos)) { 269 DPRINTF("ip_ecn_egress() failed"); 270 ipipstat_inc(ipips_pdrops); 271 goto bad; 272 } 273 ip6->ip6_flow &= ~htonl(0xff << 20); 274 ip6->ip6_flow |= htonl((u_int32_t) itos << 20); 275 break; 276 #endif 277 } 278 279 /* Check for local address spoofing. */ 280 if (!(ifp->if_flags & IFF_LOOPBACK) && allow != 2) { 281 struct sockaddr_storage ss; 282 struct rtentry *rt; 283 284 memset(&ss, 0, sizeof(ss)); 285 286 if (ip) { 287 sin = (struct sockaddr_in *)&ss; 288 sin->sin_family = AF_INET; 289 sin->sin_len = sizeof(*sin); 290 sin->sin_addr = ip->ip_src; 291 #ifdef INET6 292 } else if (ip6) { 293 sin6 = (struct sockaddr_in6 *)&ss; 294 sin6->sin6_family = AF_INET6; 295 sin6->sin6_len = sizeof(*sin6); 296 sin6->sin6_addr = ip6->ip6_src; 297 #endif /* INET6 */ 298 } 299 rt = rtalloc(sstosa(&ss), 0, m->m_pkthdr.ph_rtableid); 300 if ((rt != NULL) && (rt->rt_flags & RTF_LOCAL)) { 301 ipipstat_inc(ipips_spoof); 302 rtfree(rt); 303 goto bad; 304 } 305 rtfree(rt); 306 } 307 308 /* Statistics */ 309 ipipstat_add(ipips_ibytes, m->m_pkthdr.len - hlen); 310 311 #if NBPFILTER > 0 && NGIF > 0 312 if (ifp->if_type == IFT_GIF && ifp->if_bpf != NULL) 313 bpf_mtap_af(ifp->if_bpf, iaf, m, BPF_DIRECTION_IN); 314 #endif 315 #if NPF > 0 316 pf_pkt_addr_changed(m); 317 #endif 318 319 /* 320 * Interface pointer stays the same; if no IPsec processing has 321 * been done (or will be done), this will point to a normal 322 * interface. Otherwise, it'll point to an enc interface, which 323 * will allow a packet filter to distinguish between secure and 324 * untrusted packets. 325 */ 326 327 switch (proto) { 328 case IPPROTO_IPV4: 329 return ip_input_if(mp, offp, proto, oaf, ifp); 330 #ifdef INET6 331 case IPPROTO_IPV6: 332 return ip6_input_if(mp, offp, proto, oaf, ifp); 333 #endif 334 } 335 bad: 336 m_freemp(mp); 337 return IPPROTO_DONE; 338 } 339 340 int 341 ipip_output(struct mbuf **mp, struct tdb *tdb) 342 { 343 struct mbuf *m = *mp; 344 u_int8_t tp, otos, itos; 345 u_int64_t obytes; 346 struct ip *ipo; 347 #ifdef INET6 348 struct ip6_hdr *ip6, *ip6o; 349 #endif /* INET6 */ 350 #ifdef ENCDEBUG 351 char buf[INET6_ADDRSTRLEN]; 352 #endif 353 int error; 354 355 /* XXX Deal with empty TDB source/destination addresses. */ 356 357 m_copydata(m, 0, 1, &tp); 358 tp = (tp >> 4) & 0xff; /* Get the IP version number. */ 359 360 switch (tdb->tdb_dst.sa.sa_family) { 361 case AF_INET: 362 if (tdb->tdb_src.sa.sa_family != AF_INET || 363 tdb->tdb_src.sin.sin_addr.s_addr == INADDR_ANY || 364 tdb->tdb_dst.sin.sin_addr.s_addr == INADDR_ANY) { 365 366 DPRINTF("unspecified tunnel endpoint address " 367 "in SA %s/%08x", 368 ipsp_address(&tdb->tdb_dst, buf, sizeof(buf)), 369 ntohl(tdb->tdb_spi)); 370 371 ipipstat_inc(ipips_unspec); 372 error = EINVAL; 373 goto drop; 374 } 375 376 M_PREPEND(*mp, sizeof(struct ip), M_DONTWAIT); 377 if (*mp == NULL) { 378 DPRINTF("M_PREPEND failed"); 379 ipipstat_inc(ipips_hdrops); 380 error = ENOBUFS; 381 goto drop; 382 } 383 m = *mp; 384 385 ipo = mtod(m, struct ip *); 386 387 ipo->ip_v = IPVERSION; 388 ipo->ip_hl = 5; 389 ipo->ip_len = htons(m->m_pkthdr.len); 390 ipo->ip_ttl = ip_defttl; 391 ipo->ip_sum = 0; 392 ipo->ip_src = tdb->tdb_src.sin.sin_addr; 393 ipo->ip_dst = tdb->tdb_dst.sin.sin_addr; 394 395 /* 396 * We do the htons() to prevent snoopers from determining our 397 * endianness. 398 */ 399 ipo->ip_id = htons(ip_randomid()); 400 401 /* If the inner protocol is IP... */ 402 if (tp == IPVERSION) { 403 /* Save ECN notification */ 404 m_copydata(m, sizeof(struct ip) + 405 offsetof(struct ip, ip_tos), 406 sizeof(u_int8_t), (caddr_t) &itos); 407 408 ipo->ip_p = IPPROTO_IPIP; 409 410 /* 411 * We should be keeping tunnel soft-state and 412 * send back ICMPs if needed. 413 */ 414 m_copydata(m, sizeof(struct ip) + 415 offsetof(struct ip, ip_off), 416 sizeof(u_int16_t), (caddr_t) &ipo->ip_off); 417 ipo->ip_off = ntohs(ipo->ip_off); 418 ipo->ip_off &= ~(IP_DF | IP_MF | IP_OFFMASK); 419 ipo->ip_off = htons(ipo->ip_off); 420 } 421 #ifdef INET6 422 else if (tp == (IPV6_VERSION >> 4)) { 423 u_int32_t itos32; 424 425 /* Save ECN notification. */ 426 m_copydata(m, sizeof(struct ip) + 427 offsetof(struct ip6_hdr, ip6_flow), 428 sizeof(u_int32_t), (caddr_t) &itos32); 429 itos = ntohl(itos32) >> 20; 430 ipo->ip_p = IPPROTO_IPV6; 431 ipo->ip_off = 0; 432 } 433 #endif /* INET6 */ 434 else { 435 ipipstat_inc(ipips_family); 436 error = EAFNOSUPPORT; 437 goto drop; 438 } 439 440 otos = 0; 441 ip_ecn_ingress(ECN_ALLOWED, &otos, &itos); 442 ipo->ip_tos = otos; 443 444 obytes = m->m_pkthdr.len - sizeof(struct ip); 445 if (tdb->tdb_xform->xf_type == XF_IP4) 446 tdb->tdb_cur_bytes += obytes; 447 break; 448 449 #ifdef INET6 450 case AF_INET6: 451 if (IN6_IS_ADDR_UNSPECIFIED(&tdb->tdb_dst.sin6.sin6_addr) || 452 tdb->tdb_src.sa.sa_family != AF_INET6 || 453 IN6_IS_ADDR_UNSPECIFIED(&tdb->tdb_src.sin6.sin6_addr)) { 454 455 DPRINTF("unspecified tunnel endpoint address " 456 "in SA %s/%08x", 457 ipsp_address(&tdb->tdb_dst, buf, sizeof(buf)), 458 ntohl(tdb->tdb_spi)); 459 460 ipipstat_inc(ipips_unspec); 461 error = EINVAL; 462 goto drop; 463 } 464 465 /* If the inner protocol is IPv6, clear link local scope */ 466 if (tp == (IPV6_VERSION >> 4)) { 467 /* scoped address handling */ 468 ip6 = mtod(m, struct ip6_hdr *); 469 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) 470 ip6->ip6_src.s6_addr16[1] = 0; 471 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) 472 ip6->ip6_dst.s6_addr16[1] = 0; 473 } 474 475 M_PREPEND(*mp, sizeof(struct ip6_hdr), M_DONTWAIT); 476 if (*mp == NULL) { 477 DPRINTF("M_PREPEND failed"); 478 ipipstat_inc(ipips_hdrops); 479 error = ENOBUFS; 480 goto drop; 481 } 482 m = *mp; 483 484 /* Initialize IPv6 header */ 485 ip6o = mtod(m, struct ip6_hdr *); 486 ip6o->ip6_flow = 0; 487 ip6o->ip6_vfc &= ~IPV6_VERSION_MASK; 488 ip6o->ip6_vfc |= IPV6_VERSION; 489 ip6o->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6o)); 490 ip6o->ip6_hlim = ip6_defhlim; 491 in6_embedscope(&ip6o->ip6_src, &tdb->tdb_src.sin6, NULL, NULL); 492 in6_embedscope(&ip6o->ip6_dst, &tdb->tdb_dst.sin6, NULL, NULL); 493 494 if (tp == IPVERSION) { 495 /* Save ECN notification */ 496 m_copydata(m, sizeof(struct ip6_hdr) + 497 offsetof(struct ip, ip_tos), sizeof(u_int8_t), 498 (caddr_t) &itos); 499 500 /* This is really IPVERSION. */ 501 ip6o->ip6_nxt = IPPROTO_IPIP; 502 } 503 else 504 if (tp == (IPV6_VERSION >> 4)) { 505 u_int32_t itos32; 506 507 /* Save ECN notification. */ 508 m_copydata(m, sizeof(struct ip6_hdr) + 509 offsetof(struct ip6_hdr, ip6_flow), 510 sizeof(u_int32_t), (caddr_t) &itos32); 511 itos = ntohl(itos32) >> 20; 512 513 ip6o->ip6_nxt = IPPROTO_IPV6; 514 } else { 515 ipipstat_inc(ipips_family); 516 error = EAFNOSUPPORT; 517 goto drop; 518 } 519 520 otos = 0; 521 ip_ecn_ingress(ECN_ALLOWED, &otos, &itos); 522 ip6o->ip6_flow |= htonl((u_int32_t) otos << 20); 523 524 obytes = m->m_pkthdr.len - sizeof(struct ip6_hdr); 525 if (tdb->tdb_xform->xf_type == XF_IP4) 526 tdb->tdb_cur_bytes += obytes; 527 break; 528 #endif /* INET6 */ 529 530 default: 531 DPRINTF("unsupported protocol family %d", 532 tdb->tdb_dst.sa.sa_family); 533 ipipstat_inc(ipips_family); 534 error = EPFNOSUPPORT; 535 goto drop; 536 } 537 538 ipipstat_pkt(ipips_opackets, ipips_obytes, obytes); 539 return 0; 540 541 drop: 542 m_freemp(mp); 543 return error; 544 } 545 546 #ifdef IPSEC 547 int 548 ipe4_attach(void) 549 { 550 return 0; 551 } 552 553 int 554 ipe4_init(struct tdb *tdbp, const struct xformsw *xsp, struct ipsecinit *ii) 555 { 556 tdbp->tdb_xform = xsp; 557 return 0; 558 } 559 560 int 561 ipe4_zeroize(struct tdb *tdbp) 562 { 563 return 0; 564 } 565 566 int 567 ipe4_input(struct mbuf **mp, struct tdb *tdb, int hlen, int proto) 568 { 569 /* This is a rather serious mistake, so no conditional printing. */ 570 printf("%s: should never be called\n", __func__); 571 m_freemp(mp); 572 return EINVAL; 573 } 574 #endif /* IPSEC */ 575 576 int 577 ipip_sysctl_ipipstat(void *oldp, size_t *oldlenp, void *newp) 578 { 579 struct ipipstat ipipstat; 580 581 CTASSERT(sizeof(ipipstat) == (ipips_ncounters * sizeof(uint64_t))); 582 memset(&ipipstat, 0, sizeof ipipstat); 583 counters_read(ipipcounters, (uint64_t *)&ipipstat, ipips_ncounters, 584 NULL); 585 return (sysctl_rdstruct(oldp, oldlenp, newp, 586 &ipipstat, sizeof(ipipstat))); 587 } 588 589 int 590 ipip_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 591 size_t newlen) 592 { 593 /* All sysctl names at this level are terminal. */ 594 if (namelen != 1) 595 return (ENOTDIR); 596 597 switch (name[0]) { 598 case IPIPCTL_ALLOW: 599 return (sysctl_int_bounded(oldp, oldlenp, newp, newlen, 600 &ipip_allow, 0, 2)); 601 case IPIPCTL_STATS: 602 return (ipip_sysctl_ipipstat(oldp, oldlenp, newp)); 603 default: 604 return (ENOPROTOOPT); 605 } 606 /* NOTREACHED */ 607 } 608