1 /* $NetBSD: npf_inet.c,v 1.23 2013/08/23 10:37:03 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 2009-2012 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This material is based upon work partially supported by The 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Various protocol related helper routines. 34 * 35 * This layer manipulates npf_cache_t structure i.e. caches requested headers 36 * and stores which information was cached in the information bit field. 37 * It is also responsibility of this layer to update or invalidate the cache 38 * on rewrites (e.g. by translation routines). 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.23 2013/08/23 10:37:03 rmind Exp $"); 43 44 #include <sys/param.h> 45 #include <sys/types.h> 46 47 #include <net/pfil.h> 48 #include <net/if.h> 49 #include <net/ethertypes.h> 50 #include <net/if_ether.h> 51 52 #include <netinet/in_systm.h> 53 #include <netinet/in.h> 54 #include <netinet/ip.h> 55 #include <netinet/ip6.h> 56 #include <netinet/tcp.h> 57 #include <netinet/udp.h> 58 #include <netinet/ip_icmp.h> 59 60 #include "npf_impl.h" 61 62 /* 63 * npf_fixup{16,32}_cksum: update IPv4 checksum. 64 */ 65 66 uint16_t 67 npf_fixup16_cksum(uint16_t cksum, uint16_t odatum, uint16_t ndatum) 68 { 69 uint32_t sum; 70 71 /* 72 * RFC 1624: 73 * HC' = ~(~HC + ~m + m') 74 */ 75 sum = ~ntohs(cksum) & 0xffff; 76 sum += (~ntohs(odatum) & 0xffff) + ntohs(ndatum); 77 sum = (sum >> 16) + (sum & 0xffff); 78 sum += (sum >> 16); 79 80 return htons(~sum & 0xffff); 81 } 82 83 uint16_t 84 npf_fixup32_cksum(uint16_t cksum, uint32_t odatum, uint32_t ndatum) 85 { 86 87 cksum = npf_fixup16_cksum(cksum, odatum & 0xffff, ndatum & 0xffff); 88 cksum = npf_fixup16_cksum(cksum, odatum >> 16, ndatum >> 16); 89 return cksum; 90 } 91 92 /* 93 * npf_addr_cksum: calculate checksum of the address, either IPv4 or IPv6. 94 */ 95 uint16_t 96 npf_addr_cksum(uint16_t cksum, int sz, const npf_addr_t *oaddr, 97 const npf_addr_t *naddr) 98 { 99 const uint32_t *oip32 = (const uint32_t *)oaddr; 100 const uint32_t *nip32 = (const uint32_t *)naddr; 101 102 KASSERT(sz % sizeof(uint32_t) == 0); 103 do { 104 cksum = npf_fixup32_cksum(cksum, *oip32++, *nip32++); 105 sz -= sizeof(uint32_t); 106 } while (sz); 107 108 return cksum; 109 } 110 111 /* 112 * npf_addr_sum: provide IP address as a summed (if needed) 32-bit integer. 113 * Note: used for hash function. 114 */ 115 uint32_t 116 npf_addr_sum(const int sz, const npf_addr_t *a1, const npf_addr_t *a2) 117 { 118 uint32_t mix = 0; 119 int i; 120 121 KASSERT(sz > 0 && a1 != NULL && a2 != NULL); 122 123 for (i = 0; i < (sz >> 2); i++) { 124 mix += a1->s6_addr32[i]; 125 mix += a2->s6_addr32[i]; 126 } 127 return mix; 128 } 129 130 /* 131 * npf_addr_mask: apply the mask to a given address and store the result. 132 */ 133 void 134 npf_addr_mask(const npf_addr_t *addr, const npf_netmask_t mask, 135 const int alen, npf_addr_t *out) 136 { 137 const int nwords = alen >> 2; 138 uint_fast8_t length = mask; 139 140 /* Note: maximum length is 32 for IPv4 and 128 for IPv6. */ 141 KASSERT(length <= NPF_MAX_NETMASK); 142 143 for (int i = 0; i < nwords; i++) { 144 uint32_t wordmask; 145 146 if (length >= 32) { 147 wordmask = htonl(0xffffffff); 148 length -= 32; 149 } else if (length) { 150 wordmask = htonl(0xffffffff << (32 - length)); 151 length = 0; 152 } else { 153 wordmask = 0; 154 } 155 out->s6_addr32[i] = addr->s6_addr32[i] & wordmask; 156 } 157 } 158 159 /* 160 * npf_addr_cmp: compare two addresses, either IPv4 or IPv6. 161 * 162 * => Return 0 if equal and negative/positive if less/greater accordingly. 163 * => Ignore the mask, if NPF_NO_NETMASK is specified. 164 */ 165 int 166 npf_addr_cmp(const npf_addr_t *addr1, const npf_netmask_t mask1, 167 const npf_addr_t *addr2, const npf_netmask_t mask2, const int alen) 168 { 169 npf_addr_t realaddr1, realaddr2; 170 171 if (mask1 != NPF_NO_NETMASK) { 172 npf_addr_mask(addr1, mask1, alen, &realaddr1); 173 addr1 = &realaddr1; 174 } 175 if (mask2 != NPF_NO_NETMASK) { 176 npf_addr_mask(addr2, mask2, alen, &realaddr2); 177 addr2 = &realaddr2; 178 } 179 return memcmp(addr1, addr2, alen); 180 } 181 182 /* 183 * npf_tcpsaw: helper to fetch SEQ, ACK, WIN and return TCP data length. 184 * 185 * => Returns all values in host byte-order. 186 */ 187 int 188 npf_tcpsaw(const npf_cache_t *npc, tcp_seq *seq, tcp_seq *ack, uint32_t *win) 189 { 190 const struct tcphdr *th = npc->npc_l4.tcp; 191 u_int thlen; 192 193 KASSERT(npf_iscached(npc, NPC_TCP)); 194 195 *seq = ntohl(th->th_seq); 196 *ack = ntohl(th->th_ack); 197 *win = (uint32_t)ntohs(th->th_win); 198 thlen = th->th_off << 2; 199 200 if (npf_iscached(npc, NPC_IP4)) { 201 const struct ip *ip = npc->npc_ip.v4; 202 return ntohs(ip->ip_len) - npc->npc_hlen - thlen; 203 } else if (npf_iscached(npc, NPC_IP6)) { 204 const struct ip6_hdr *ip6 = npc->npc_ip.v6; 205 return ntohs(ip6->ip6_plen) - thlen; 206 } 207 return 0; 208 } 209 210 /* 211 * npf_fetch_tcpopts: parse and return TCP options. 212 */ 213 bool 214 npf_fetch_tcpopts(npf_cache_t *npc, nbuf_t *nbuf, uint16_t *mss, int *wscale) 215 { 216 const struct tcphdr *th = npc->npc_l4.tcp; 217 int topts_len, step; 218 void *nptr; 219 uint8_t val; 220 bool ok; 221 222 KASSERT(npf_iscached(npc, NPC_IP46)); 223 KASSERT(npf_iscached(npc, NPC_TCP)); 224 225 /* Determine if there are any TCP options, get their length. */ 226 topts_len = (th->th_off << 2) - sizeof(struct tcphdr); 227 if (topts_len <= 0) { 228 /* No options. */ 229 return false; 230 } 231 KASSERT(topts_len <= MAX_TCPOPTLEN); 232 233 /* First step: IP and TCP header up to options. */ 234 step = npc->npc_hlen + sizeof(struct tcphdr); 235 nbuf_reset(nbuf); 236 next: 237 if ((nptr = nbuf_advance(nbuf, step, 1)) == NULL) { 238 ok = false; 239 goto done; 240 } 241 val = *(uint8_t *)nptr; 242 243 switch (val) { 244 case TCPOPT_EOL: 245 /* Done. */ 246 ok = true; 247 goto done; 248 case TCPOPT_NOP: 249 topts_len--; 250 step = 1; 251 break; 252 case TCPOPT_MAXSEG: 253 if ((nptr = nbuf_advance(nbuf, 2, 2)) == NULL) { 254 ok = false; 255 goto done; 256 } 257 if (mss) { 258 if (*mss) { 259 memcpy(nptr, mss, sizeof(uint16_t)); 260 } else { 261 memcpy(mss, nptr, sizeof(uint16_t)); 262 } 263 } 264 topts_len -= TCPOLEN_MAXSEG; 265 step = 2; 266 break; 267 case TCPOPT_WINDOW: 268 /* TCP Window Scaling (RFC 1323). */ 269 if ((nptr = nbuf_advance(nbuf, 2, 1)) == NULL) { 270 ok = false; 271 goto done; 272 } 273 val = *(uint8_t *)nptr; 274 *wscale = (val > TCP_MAX_WINSHIFT) ? TCP_MAX_WINSHIFT : val; 275 topts_len -= TCPOLEN_WINDOW; 276 step = 1; 277 break; 278 default: 279 if ((nptr = nbuf_advance(nbuf, 1, 1)) == NULL) { 280 ok = false; 281 goto done; 282 } 283 val = *(uint8_t *)nptr; 284 if (val < 2 || val > topts_len) { 285 ok = false; 286 goto done; 287 } 288 topts_len -= val; 289 step = val - 1; 290 } 291 292 /* Any options left? */ 293 if (__predict_true(topts_len > 0)) { 294 goto next; 295 } 296 ok = true; 297 done: 298 if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) { 299 npf_recache(npc, nbuf); 300 } 301 return ok; 302 } 303 304 static int 305 npf_cache_ip(npf_cache_t *npc, nbuf_t *nbuf) 306 { 307 const void *nptr = nbuf_dataptr(nbuf); 308 const uint8_t ver = *(const uint8_t *)nptr; 309 int flags = 0; 310 311 switch (ver >> 4) { 312 case IPVERSION: { 313 struct ip *ip; 314 315 ip = nbuf_ensure_contig(nbuf, sizeof(struct ip)); 316 if (ip == NULL) { 317 return 0; 318 } 319 320 /* Check header length and fragment offset. */ 321 if ((u_int)(ip->ip_hl << 2) < sizeof(struct ip)) { 322 return 0; 323 } 324 if (ip->ip_off & ~htons(IP_DF | IP_RF)) { 325 /* Note fragmentation. */ 326 flags |= NPC_IPFRAG; 327 } 328 329 /* Cache: layer 3 - IPv4. */ 330 npc->npc_alen = sizeof(struct in_addr); 331 npc->npc_srcip = (npf_addr_t *)&ip->ip_src; 332 npc->npc_dstip = (npf_addr_t *)&ip->ip_dst; 333 npc->npc_hlen = ip->ip_hl << 2; 334 npc->npc_proto = ip->ip_p; 335 336 npc->npc_ip.v4 = ip; 337 flags |= NPC_IP4; 338 break; 339 } 340 341 case (IPV6_VERSION >> 4): { 342 struct ip6_hdr *ip6; 343 struct ip6_ext *ip6e; 344 size_t off, hlen; 345 346 ip6 = nbuf_ensure_contig(nbuf, sizeof(struct ip6_hdr)); 347 if (ip6 == NULL) { 348 return 0; 349 } 350 351 /* Set initial next-protocol value. */ 352 hlen = sizeof(struct ip6_hdr); 353 npc->npc_proto = ip6->ip6_nxt; 354 npc->npc_hlen = hlen; 355 356 /* 357 * Advance by the length of the current header. 358 */ 359 off = nbuf_offset(nbuf); 360 while (nbuf_advance(nbuf, hlen, 0) != NULL) { 361 ip6e = nbuf_ensure_contig(nbuf, sizeof(*ip6e)); 362 if (ip6e == NULL) { 363 return 0; 364 } 365 366 /* 367 * Determine whether we are going to continue. 368 */ 369 switch (npc->npc_proto) { 370 case IPPROTO_HOPOPTS: 371 case IPPROTO_DSTOPTS: 372 case IPPROTO_ROUTING: 373 hlen = (ip6e->ip6e_len + 1) << 3; 374 break; 375 case IPPROTO_FRAGMENT: 376 hlen = sizeof(struct ip6_frag); 377 flags |= NPC_IPFRAG; 378 break; 379 case IPPROTO_AH: 380 hlen = (ip6e->ip6e_len + 2) << 2; 381 break; 382 default: 383 hlen = 0; 384 break; 385 } 386 387 if (!hlen) { 388 break; 389 } 390 npc->npc_proto = ip6e->ip6e_nxt; 391 npc->npc_hlen += hlen; 392 } 393 394 /* 395 * Re-fetch the header pointers (nbufs might have been 396 * reallocated). Restore the original offset (if any). 397 */ 398 nbuf_reset(nbuf); 399 ip6 = nbuf_dataptr(nbuf); 400 if (off) { 401 nbuf_advance(nbuf, off, 0); 402 } 403 404 /* Cache: layer 3 - IPv6. */ 405 npc->npc_alen = sizeof(struct in6_addr); 406 npc->npc_srcip = (npf_addr_t *)&ip6->ip6_src; 407 npc->npc_dstip = (npf_addr_t *)&ip6->ip6_dst; 408 409 npc->npc_ip.v6 = ip6; 410 flags |= NPC_IP6; 411 break; 412 } 413 default: 414 break; 415 } 416 return flags; 417 } 418 419 /* 420 * npf_cache_all: general routine to cache all relevant IP (v4 or v6) 421 * and TCP, UDP or ICMP headers. 422 * 423 * => nbuf offset shall be set accordingly. 424 */ 425 int 426 npf_cache_all(npf_cache_t *npc, nbuf_t *nbuf) 427 { 428 int flags, l4flags; 429 u_int hlen; 430 431 /* 432 * This routine is a main point where the references are cached, 433 * therefore clear the flag as we reset. 434 */ 435 again: 436 nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET); 437 438 /* 439 * First, cache the L3 header (IPv4 or IPv6). If IP packet is 440 * fragmented, then we cannot look into L4. 441 */ 442 flags = npf_cache_ip(npc, nbuf); 443 if ((flags & NPC_IP46) == 0 || (flags & NPC_IPFRAG) != 0) { 444 nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET); 445 npc->npc_info |= flags; 446 return flags; 447 } 448 hlen = npc->npc_hlen; 449 450 switch (npc->npc_proto) { 451 case IPPROTO_TCP: 452 /* Cache: layer 4 - TCP. */ 453 npc->npc_l4.tcp = nbuf_advance(nbuf, hlen, 454 sizeof(struct tcphdr)); 455 l4flags = NPC_LAYER4 | NPC_TCP; 456 break; 457 case IPPROTO_UDP: 458 /* Cache: layer 4 - UDP. */ 459 npc->npc_l4.udp = nbuf_advance(nbuf, hlen, 460 sizeof(struct udphdr)); 461 l4flags = NPC_LAYER4 | NPC_UDP; 462 break; 463 case IPPROTO_ICMP: 464 /* Cache: layer 4 - ICMPv4. */ 465 npc->npc_l4.icmp = nbuf_advance(nbuf, hlen, 466 offsetof(struct icmp, icmp_void)); 467 l4flags = NPC_LAYER4 | NPC_ICMP; 468 break; 469 case IPPROTO_ICMPV6: 470 /* Cache: layer 4 - ICMPv6. */ 471 npc->npc_l4.icmp6 = nbuf_advance(nbuf, hlen, 472 offsetof(struct icmp6_hdr, icmp6_data32)); 473 l4flags = NPC_LAYER4 | NPC_ICMP; 474 break; 475 default: 476 l4flags = 0; 477 break; 478 } 479 480 if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) { 481 goto again; 482 } 483 484 /* Add the L4 flags if nbuf_advance() succeeded. */ 485 if (l4flags && npc->npc_l4.hdr) { 486 flags |= l4flags; 487 } 488 npc->npc_info |= flags; 489 return flags; 490 } 491 492 void 493 npf_recache(npf_cache_t *npc, nbuf_t *nbuf) 494 { 495 const int mflags __unused = npc->npc_info & (NPC_IP46 | NPC_LAYER4); 496 int flags; 497 498 nbuf_reset(nbuf); 499 npc->npc_info = 0; 500 flags = npf_cache_all(npc, nbuf); 501 KASSERT((flags & mflags) == mflags); 502 KASSERT(nbuf_flag_p(nbuf, NBUF_DATAREF_RESET) == 0); 503 } 504 505 /* 506 * npf_rwrip: rewrite required IP address. 507 */ 508 bool 509 npf_rwrip(const npf_cache_t *npc, int di, const npf_addr_t *addr) 510 { 511 npf_addr_t *oaddr; 512 513 KASSERT(npf_iscached(npc, NPC_IP46)); 514 515 /* 516 * Rewrite source address if outgoing and destination if incoming. 517 */ 518 oaddr = (di == PFIL_OUT) ? npc->npc_srcip : npc->npc_dstip; 519 memcpy(oaddr, addr, npc->npc_alen); 520 return true; 521 } 522 523 /* 524 * npf_rwrport: rewrite required TCP/UDP port. 525 */ 526 bool 527 npf_rwrport(const npf_cache_t *npc, int di, const in_port_t port) 528 { 529 const int proto = npc->npc_proto; 530 in_port_t *oport; 531 532 KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP)); 533 KASSERT(proto == IPPROTO_TCP || proto == IPPROTO_UDP); 534 535 /* Get the offset and store the port in it. */ 536 if (proto == IPPROTO_TCP) { 537 struct tcphdr *th = npc->npc_l4.tcp; 538 oport = (di == PFIL_OUT) ? &th->th_sport : &th->th_dport; 539 } else { 540 struct udphdr *uh = npc->npc_l4.udp; 541 oport = (di == PFIL_OUT) ? &uh->uh_sport : &uh->uh_dport; 542 } 543 memcpy(oport, &port, sizeof(in_port_t)); 544 return true; 545 } 546 547 /* 548 * npf_rwrcksum: rewrite IPv4 and/or TCP/UDP checksum. 549 */ 550 bool 551 npf_rwrcksum(const npf_cache_t *npc, const int di, 552 const npf_addr_t *addr, const in_port_t port) 553 { 554 const int proto = npc->npc_proto; 555 const int alen = npc->npc_alen; 556 npf_addr_t *oaddr; 557 uint16_t *ocksum; 558 in_port_t oport; 559 560 KASSERT(npf_iscached(npc, NPC_LAYER4)); 561 oaddr = (di == PFIL_OUT) ? npc->npc_srcip : npc->npc_dstip; 562 563 if (npf_iscached(npc, NPC_IP4)) { 564 struct ip *ip = npc->npc_ip.v4; 565 uint16_t ipsum = ip->ip_sum; 566 567 /* Recalculate IPv4 checksum and rewrite. */ 568 ip->ip_sum = npf_addr_cksum(ipsum, alen, oaddr, addr); 569 } else { 570 /* No checksum for IPv6. */ 571 KASSERT(npf_iscached(npc, NPC_IP6)); 572 } 573 574 /* Nothing else to do for ICMP. */ 575 if (proto == IPPROTO_ICMP) { 576 return true; 577 } 578 KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP)); 579 580 /* 581 * Calculate TCP/UDP checksum: 582 * - Skip if UDP and the current checksum is zero. 583 * - Fixup the IP address change. 584 * - Fixup the port change, if required (non-zero). 585 */ 586 if (proto == IPPROTO_TCP) { 587 struct tcphdr *th = npc->npc_l4.tcp; 588 589 ocksum = &th->th_sum; 590 oport = (di == PFIL_OUT) ? th->th_sport : th->th_dport; 591 } else { 592 struct udphdr *uh = npc->npc_l4.udp; 593 594 KASSERT(proto == IPPROTO_UDP); 595 ocksum = &uh->uh_sum; 596 if (*ocksum == 0) { 597 /* No need to update. */ 598 return true; 599 } 600 oport = (di == PFIL_OUT) ? uh->uh_sport : uh->uh_dport; 601 } 602 603 uint16_t cksum = npf_addr_cksum(*ocksum, alen, oaddr, addr); 604 if (port) { 605 cksum = npf_fixup16_cksum(cksum, oport, port); 606 } 607 608 /* Rewrite TCP/UDP checksum. */ 609 memcpy(ocksum, &cksum, sizeof(uint16_t)); 610 return true; 611 } 612 613 #if defined(DDB) || defined(_NPF_TESTING) 614 615 void 616 npf_addr_dump(const npf_addr_t *addr) 617 { 618 printf("IP[%x:%x:%x:%x]\n", 619 addr->s6_addr32[0], addr->s6_addr32[1], 620 addr->s6_addr32[2], addr->s6_addr32[3]); 621 } 622 623 #endif 624