1 /*- 2 * Copyright (c) 2010-2020 The NetBSD Foundation, Inc. 3 * All rights reserved. 4 * 5 * This material is based upon work partially supported by The 6 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 * POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * BPF byte-code generation for NPF rules. 32 * 33 * Overview 34 * 35 * Each NPF rule is compiled into a BPF micro-program. There is a 36 * BPF byte-code fragment for each higher-level filtering logic, 37 * e.g. to match L4 protocol, IP/mask, etc. The generation process 38 * combines multiple BPF-byte code fragments into one program. 39 * 40 * Basic case 41 * 42 * Consider a basic case where all filters should match. They 43 * are expressed as logical conjunction, e.g.: 44 * 45 * A and B and C and D 46 * 47 * Each test (filter) criterion can be evaluated to true (match) or 48 * false (no match) and the logic is as follows: 49 * 50 * - If the value is true, then jump to the "next" test (offset 0). 51 * 52 * - If the value is false, then jump to the JUMP_MAGIC value (0xff). 53 * This "magic" value is used to indicate that it will have to be 54 * patched at a later stage. 55 * 56 * Once all byte-code fragments are combined into one, then there 57 * are two additional steps: 58 * 59 * - Two instructions are appended at the end of the program: "return 60 * success" followed by "return failure". 61 * 62 * - All jumps with the JUMP_MAGIC value are patched to point to the 63 * "return failure" instruction. 64 * 65 * Therefore, if all filter criteria will match, then the first 66 * instruction will be reached, indicating a successful match of the 67 * rule. Otherwise, if any of the criteria will not match, it will 68 * take the failure path and the rule will not be matching. 69 * 70 * Grouping 71 * 72 * Filters can have groups, which have an effect of logical 73 * disjunction, e.g.: 74 * 75 * A and B and (C or D) 76 * 77 * In such case, the logic inside the group has to be inverted i.e. 78 * the jump values swapped. If the test value is true, then jump 79 * out of the group; if false, then jump "next". At the end of the 80 * group, an addition failure path is appended and the JUMP_MAGIC 81 * uses within the group are patched to jump past the said path. 82 */ 83 84 #include <sys/cdefs.h> 85 __RCSID("$NetBSD: npf_bpf_comp.c,v 1.16 2020/05/30 14:16:56 rmind Exp $"); 86 87 #include <stdlib.h> 88 #include <stdbool.h> 89 #include <stddef.h> 90 #include <string.h> 91 #include <inttypes.h> 92 #include <err.h> 93 #include <assert.h> 94 95 #include <netinet/in.h> 96 #include <netinet/in_systm.h> 97 #define __FAVOR_BSD 98 #include <netinet/ip.h> 99 #include <netinet/ip6.h> 100 #include <netinet/udp.h> 101 #include <netinet/tcp.h> 102 #include <netinet/ip_icmp.h> 103 #include <netinet/icmp6.h> 104 105 #include <net/bpf.h> 106 107 #include "npfctl.h" 108 109 /* 110 * Note: clear X_EQ_L4OFF when register X is invalidated i.e. it stores 111 * something other than L4 header offset. Generally, when BPF_LDX is used. 112 */ 113 #define FETCHED_L3 0x01 114 #define CHECKED_L4_PROTO 0x02 115 #define X_EQ_L4OFF 0x04 116 117 struct npf_bpf { 118 /* 119 * BPF program code, the allocated length (in bytes), the number 120 * of logical blocks and the flags. 121 */ 122 struct bpf_program prog; 123 size_t alen; 124 unsigned nblocks; 125 sa_family_t af; 126 uint32_t flags; 127 128 /* 129 * Indicators whether we are inside the group and whether this 130 * group is implementing inverted logic. 131 * 132 * The current group offset (counted in BPF instructions) 133 * and block number at the start of the group. 134 */ 135 unsigned ingroup; 136 bool invert; 137 unsigned goff; 138 unsigned gblock; 139 140 /* Track inversion (excl. mark). */ 141 uint32_t invflags; 142 143 /* BPF marks, allocated length and the real length. */ 144 uint32_t * marks; 145 size_t malen; 146 size_t mlen; 147 }; 148 149 /* 150 * NPF success and failure values to be returned from BPF. 151 */ 152 #define NPF_BPF_SUCCESS ((u_int)-1) 153 #define NPF_BPF_FAILURE 0 154 155 /* 156 * Magic value to indicate the failure path, which is fixed up on completion. 157 * Note: this is the longest jump offset in BPF, since the offset is one byte. 158 */ 159 #define JUMP_MAGIC 0xff 160 161 /* Reduce re-allocations by expanding in 64 byte blocks. */ 162 #define ALLOC_MASK (64 - 1) 163 #define ALLOC_ROUND(x) (((x) + ALLOC_MASK) & ~ALLOC_MASK) 164 165 #ifndef IPV6_VERSION 166 #define IPV6_VERSION 0x60 167 #endif 168 169 npf_bpf_t * 170 npfctl_bpf_create(void) 171 { 172 return ecalloc(1, sizeof(npf_bpf_t)); 173 } 174 175 static void 176 fixup_jumps(npf_bpf_t *ctx, u_int start, u_int end, bool swap) 177 { 178 struct bpf_program *bp = &ctx->prog; 179 180 for (u_int i = start; i < end; i++) { 181 struct bpf_insn *insn = &bp->bf_insns[i]; 182 const u_int fail_off = end - i; 183 bool seen_magic = false; 184 185 if (fail_off >= JUMP_MAGIC) { 186 errx(EXIT_FAILURE, "BPF generation error: " 187 "the number of instructions is over the limit"); 188 } 189 if (BPF_CLASS(insn->code) != BPF_JMP) { 190 continue; 191 } 192 if (BPF_OP(insn->code) == BPF_JA) { 193 /* 194 * BPF_JA can be used to jump to the failure path. 195 * If we are swapping i.e. inside the group, then 196 * jump "next"; groups have a failure path appended 197 * at their end. 198 */ 199 if (insn->k == JUMP_MAGIC) { 200 insn->k = swap ? 0 : fail_off; 201 } 202 continue; 203 } 204 205 /* 206 * Fixup the "magic" value. Swap only the "magic" jumps. 207 */ 208 209 if (insn->jt == JUMP_MAGIC) { 210 insn->jt = fail_off; 211 seen_magic = true; 212 } 213 if (insn->jf == JUMP_MAGIC) { 214 insn->jf = fail_off; 215 seen_magic = true; 216 } 217 218 if (seen_magic && swap) { 219 uint8_t jt = insn->jt; 220 insn->jt = insn->jf; 221 insn->jf = jt; 222 } 223 } 224 } 225 226 static void 227 add_insns(npf_bpf_t *ctx, struct bpf_insn *insns, size_t count) 228 { 229 struct bpf_program *bp = &ctx->prog; 230 size_t offset, len, reqlen; 231 232 /* Note: bf_len is the count of instructions. */ 233 offset = bp->bf_len * sizeof(struct bpf_insn); 234 len = count * sizeof(struct bpf_insn); 235 236 /* Ensure the memory buffer for the program. */ 237 reqlen = ALLOC_ROUND(offset + len); 238 if (reqlen > ctx->alen) { 239 bp->bf_insns = erealloc(bp->bf_insns, reqlen); 240 ctx->alen = reqlen; 241 } 242 243 /* Add the code block. */ 244 memcpy((uint8_t *)bp->bf_insns + offset, insns, len); 245 bp->bf_len += count; 246 } 247 248 static void 249 add_bmarks(npf_bpf_t *ctx, const uint32_t *m, size_t len) 250 { 251 size_t reqlen, nargs = m[1]; 252 253 if ((len / sizeof(uint32_t) - 2) != nargs) { 254 errx(EXIT_FAILURE, "invalid BPF block description"); 255 } 256 reqlen = ALLOC_ROUND(ctx->mlen + len); 257 if (reqlen > ctx->malen) { 258 ctx->marks = erealloc(ctx->marks, reqlen); 259 ctx->malen = reqlen; 260 } 261 memcpy((uint8_t *)ctx->marks + ctx->mlen, m, len); 262 ctx->mlen += len; 263 } 264 265 static void 266 done_block(npf_bpf_t *ctx, const uint32_t *m, size_t len) 267 { 268 add_bmarks(ctx, m, len); 269 ctx->nblocks++; 270 } 271 272 struct bpf_program * 273 npfctl_bpf_complete(npf_bpf_t *ctx) 274 { 275 struct bpf_program *bp = &ctx->prog; 276 const u_int retoff = bp->bf_len; 277 278 /* No instructions (optimised out). */ 279 if (!bp->bf_len) 280 return NULL; 281 282 /* Add the return fragment (success and failure paths). */ 283 struct bpf_insn insns_ret[] = { 284 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_SUCCESS), 285 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE), 286 }; 287 add_insns(ctx, insns_ret, __arraycount(insns_ret)); 288 289 /* Fixup all jumps to the main failure path. */ 290 fixup_jumps(ctx, 0, retoff, false); 291 292 return &ctx->prog; 293 } 294 295 const void * 296 npfctl_bpf_bmarks(npf_bpf_t *ctx, size_t *len) 297 { 298 *len = ctx->mlen; 299 return ctx->marks; 300 } 301 302 void 303 npfctl_bpf_destroy(npf_bpf_t *ctx) 304 { 305 free(ctx->prog.bf_insns); 306 free(ctx->marks); 307 free(ctx); 308 } 309 310 /* 311 * npfctl_bpf_group_enter: begin a logical group. It merely uses logical 312 * disjunction (OR) for comparisons within the group. 313 */ 314 void 315 npfctl_bpf_group_enter(npf_bpf_t *ctx, bool invert) 316 { 317 struct bpf_program *bp = &ctx->prog; 318 319 assert(ctx->goff == 0); 320 assert(ctx->gblock == 0); 321 322 ctx->goff = bp->bf_len; 323 ctx->gblock = ctx->nblocks; 324 ctx->invert = invert; 325 ctx->ingroup++; 326 } 327 328 void 329 npfctl_bpf_group_exit(npf_bpf_t *ctx) 330 { 331 struct bpf_program *bp = &ctx->prog; 332 const size_t curoff = bp->bf_len; 333 334 assert(ctx->ingroup); 335 ctx->ingroup--; 336 337 /* If there are no blocks or only one - nothing to do. */ 338 if (!ctx->invert && (ctx->nblocks - ctx->gblock) <= 1) { 339 ctx->goff = ctx->gblock = 0; 340 return; 341 } 342 343 /* 344 * If inverting, then prepend a jump over the statement below. 345 * On match, it will skip-through and the fail path will be taken. 346 */ 347 if (ctx->invert) { 348 struct bpf_insn insns_ret[] = { 349 BPF_STMT(BPF_JMP+BPF_JA, 1), 350 }; 351 add_insns(ctx, insns_ret, __arraycount(insns_ret)); 352 } 353 354 /* 355 * Append a failure return as a fall-through i.e. if there is 356 * no match within the group. 357 */ 358 struct bpf_insn insns_ret[] = { 359 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE), 360 }; 361 add_insns(ctx, insns_ret, __arraycount(insns_ret)); 362 363 /* 364 * Adjust jump offsets: on match - jump outside the group i.e. 365 * to the current offset. Otherwise, jump to the next instruction 366 * which would lead to the fall-through code above if none matches. 367 */ 368 fixup_jumps(ctx, ctx->goff, curoff, true); 369 ctx->goff = ctx->gblock = 0; 370 } 371 372 static void 373 fetch_l3(npf_bpf_t *ctx, sa_family_t af, unsigned flags) 374 { 375 unsigned ver; 376 377 switch (af) { 378 case AF_INET: 379 ver = IPVERSION; 380 break; 381 case AF_INET6: 382 ver = IPV6_VERSION >> 4; 383 break; 384 case AF_UNSPEC: 385 ver = 0; 386 break; 387 default: 388 abort(); 389 } 390 391 /* 392 * The memory store is populated with: 393 * - BPF_MW_IPVER: IP version (4 or 6). 394 * - BPF_MW_L4OFF: L4 header offset. 395 * - BPF_MW_L4PROTO: L4 protocol. 396 */ 397 if ((ctx->flags & FETCHED_L3) == 0 || (af && ctx->af == 0)) { 398 const uint8_t jt = ver ? 0 : JUMP_MAGIC; 399 const uint8_t jf = ver ? JUMP_MAGIC : 0; 400 const bool ingroup = ctx->ingroup != 0; 401 const bool invert = ctx->invert; 402 403 /* 404 * L3 block cannot be inserted in the middle of a group. 405 * In fact, it never is. Check and start the group after. 406 */ 407 if (ingroup) { 408 assert(ctx->nblocks == ctx->gblock); 409 npfctl_bpf_group_exit(ctx); 410 } 411 412 /* 413 * A <- IP version; A == expected-version? 414 * If no particular version specified, check for non-zero. 415 */ 416 struct bpf_insn insns_af[] = { 417 BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_IPVER), 418 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ver, jt, jf), 419 }; 420 add_insns(ctx, insns_af, __arraycount(insns_af)); 421 ctx->flags |= FETCHED_L3; 422 ctx->af = af; 423 424 if (af) { 425 uint32_t mwords[] = { BM_IPVER, 1, af }; 426 add_bmarks(ctx, mwords, sizeof(mwords)); 427 } 428 if (ingroup) { 429 npfctl_bpf_group_enter(ctx, invert); 430 } 431 432 } else if (af && af != ctx->af) { 433 errx(EXIT_FAILURE, "address family mismatch"); 434 } 435 436 if ((flags & X_EQ_L4OFF) != 0 && (ctx->flags & X_EQ_L4OFF) == 0) { 437 /* X <- IP header length */ 438 struct bpf_insn insns_hlen[] = { 439 BPF_STMT(BPF_LDX+BPF_MEM, BPF_MW_L4OFF), 440 }; 441 add_insns(ctx, insns_hlen, __arraycount(insns_hlen)); 442 ctx->flags |= X_EQ_L4OFF; 443 } 444 } 445 446 static void 447 bm_invert_checkpoint(npf_bpf_t *ctx, const unsigned opts) 448 { 449 uint32_t bm = 0; 450 451 if (ctx->ingroup && ctx->invert) { 452 const unsigned seen = ctx->invflags; 453 454 if ((opts & MATCH_SRC) != 0 && (seen & MATCH_SRC) == 0) { 455 bm = BM_SRC_NEG; 456 } 457 if ((opts & MATCH_DST) != 0 && (seen & MATCH_DST) == 0) { 458 bm = BM_DST_NEG; 459 } 460 ctx->invflags |= opts & (MATCH_SRC | MATCH_DST); 461 } 462 if (bm) { 463 uint32_t mwords[] = { bm, 0 }; 464 add_bmarks(ctx, mwords, sizeof(mwords)); 465 } 466 } 467 468 /* 469 * npfctl_bpf_ipver: match the IP version. 470 */ 471 void 472 npfctl_bpf_ipver(npf_bpf_t *ctx, sa_family_t af) 473 { 474 fetch_l3(ctx, af, 0); 475 } 476 477 /* 478 * npfctl_bpf_proto: code block to match IP version and L4 protocol. 479 */ 480 void 481 npfctl_bpf_proto(npf_bpf_t *ctx, unsigned proto) 482 { 483 struct bpf_insn insns_proto[] = { 484 /* A <- L4 protocol; A == expected-protocol? */ 485 BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO), 486 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, proto, 0, JUMP_MAGIC), 487 }; 488 add_insns(ctx, insns_proto, __arraycount(insns_proto)); 489 490 uint32_t mwords[] = { BM_PROTO, 1, proto }; 491 done_block(ctx, mwords, sizeof(mwords)); 492 ctx->flags |= CHECKED_L4_PROTO; 493 } 494 495 /* 496 * npfctl_bpf_cidr: code block to match IPv4 or IPv6 CIDR. 497 * 498 * => IP address shall be in the network byte order. 499 */ 500 void 501 npfctl_bpf_cidr(npf_bpf_t *ctx, unsigned opts, sa_family_t af, 502 const npf_addr_t *addr, const npf_netmask_t mask) 503 { 504 const uint32_t *awords = (const uint32_t *)addr; 505 unsigned nwords, length, maxmask, off; 506 507 assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0)); 508 assert((mask && mask <= NPF_MAX_NETMASK) || mask == NPF_NO_NETMASK); 509 510 switch (af) { 511 case AF_INET: 512 maxmask = 32; 513 off = (opts & MATCH_SRC) ? 514 offsetof(struct ip, ip_src) : 515 offsetof(struct ip, ip_dst); 516 nwords = sizeof(struct in_addr) / sizeof(uint32_t); 517 break; 518 case AF_INET6: 519 maxmask = 128; 520 off = (opts & MATCH_SRC) ? 521 offsetof(struct ip6_hdr, ip6_src) : 522 offsetof(struct ip6_hdr, ip6_dst); 523 nwords = sizeof(struct in6_addr) / sizeof(uint32_t); 524 break; 525 default: 526 abort(); 527 } 528 529 /* Ensure address family. */ 530 fetch_l3(ctx, af, 0); 531 532 length = (mask == NPF_NO_NETMASK) ? maxmask : mask; 533 534 /* CAUTION: BPF operates in host byte-order. */ 535 for (unsigned i = 0; i < nwords; i++) { 536 const unsigned woff = i * sizeof(uint32_t); 537 uint32_t word = ntohl(awords[i]); 538 uint32_t wordmask; 539 540 if (length >= 32) { 541 /* The mask is a full word - do not apply it. */ 542 wordmask = 0; 543 length -= 32; 544 } else if (length) { 545 wordmask = 0xffffffff << (32 - length); 546 length = 0; 547 } else { 548 /* The mask became zero - skip the rest. */ 549 break; 550 } 551 552 /* A <- IP address (or one word of it) */ 553 struct bpf_insn insns_ip[] = { 554 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, off + woff), 555 }; 556 add_insns(ctx, insns_ip, __arraycount(insns_ip)); 557 558 /* A <- (A & MASK) */ 559 if (wordmask) { 560 struct bpf_insn insns_mask[] = { 561 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, wordmask), 562 }; 563 add_insns(ctx, insns_mask, __arraycount(insns_mask)); 564 } 565 566 /* A == expected-IP-word ? */ 567 struct bpf_insn insns_cmp[] = { 568 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, word, 0, JUMP_MAGIC), 569 }; 570 add_insns(ctx, insns_cmp, __arraycount(insns_cmp)); 571 } 572 573 uint32_t mwords[] = { 574 (opts & MATCH_SRC) ? BM_SRC_CIDR: BM_DST_CIDR, 6, 575 af, mask, awords[0], awords[1], awords[2], awords[3], 576 }; 577 bm_invert_checkpoint(ctx, opts); 578 done_block(ctx, mwords, sizeof(mwords)); 579 } 580 581 /* 582 * npfctl_bpf_ports: code block to match TCP/UDP port range. 583 * 584 * => Port numbers shall be in the network byte order. 585 */ 586 void 587 npfctl_bpf_ports(npf_bpf_t *ctx, unsigned opts, in_port_t from, in_port_t to) 588 { 589 const unsigned sport_off = offsetof(struct udphdr, uh_sport); 590 const unsigned dport_off = offsetof(struct udphdr, uh_dport); 591 unsigned off; 592 593 /* TCP and UDP port offsets are the same. */ 594 assert(sport_off == offsetof(struct tcphdr, th_sport)); 595 assert(dport_off == offsetof(struct tcphdr, th_dport)); 596 assert(ctx->flags & CHECKED_L4_PROTO); 597 598 assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0)); 599 off = (opts & MATCH_SRC) ? sport_off : dport_off; 600 601 /* X <- IP header length */ 602 fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF); 603 604 struct bpf_insn insns_fetch[] = { 605 /* A <- port */ 606 BPF_STMT(BPF_LD+BPF_H+BPF_IND, off), 607 }; 608 add_insns(ctx, insns_fetch, __arraycount(insns_fetch)); 609 610 /* CAUTION: BPF operates in host byte-order. */ 611 from = ntohs(from); 612 to = ntohs(to); 613 614 if (from == to) { 615 /* Single port case. */ 616 struct bpf_insn insns_port[] = { 617 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, from, 0, JUMP_MAGIC), 618 }; 619 add_insns(ctx, insns_port, __arraycount(insns_port)); 620 } else { 621 /* Port range case. */ 622 struct bpf_insn insns_range[] = { 623 BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, from, 0, 1), 624 BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, to, 0, 1), 625 BPF_STMT(BPF_JMP+BPF_JA, JUMP_MAGIC), 626 }; 627 add_insns(ctx, insns_range, __arraycount(insns_range)); 628 } 629 630 uint32_t mwords[] = { 631 (opts & MATCH_SRC) ? BM_SRC_PORTS : BM_DST_PORTS, 2, from, to 632 }; 633 done_block(ctx, mwords, sizeof(mwords)); 634 } 635 636 /* 637 * npfctl_bpf_tcpfl: code block to match TCP flags. 638 */ 639 void 640 npfctl_bpf_tcpfl(npf_bpf_t *ctx, uint8_t tf, uint8_t tf_mask) 641 { 642 const unsigned tcpfl_off = offsetof(struct tcphdr, th_flags); 643 const bool usingmask = tf_mask != tf; 644 645 /* X <- IP header length */ 646 fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF); 647 648 if ((ctx->flags & CHECKED_L4_PROTO) == 0) { 649 const unsigned jf = usingmask ? 3 : 2; 650 assert(ctx->ingroup == 0); 651 652 /* 653 * A <- L4 protocol; A == TCP? If not, jump out. 654 * 655 * Note: the TCP flag matching might be without 'proto tcp' 656 * when using a plain 'stateful' rule. In such case it also 657 * handles other protocols, thus no strict TCP check. 658 */ 659 struct bpf_insn insns_tcp[] = { 660 BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO), 661 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, IPPROTO_TCP, 0, jf), 662 }; 663 add_insns(ctx, insns_tcp, __arraycount(insns_tcp)); 664 } 665 666 struct bpf_insn insns_tf[] = { 667 /* A <- TCP flags */ 668 BPF_STMT(BPF_LD+BPF_B+BPF_IND, tcpfl_off), 669 }; 670 add_insns(ctx, insns_tf, __arraycount(insns_tf)); 671 672 if (usingmask) { 673 /* A <- (A & mask) */ 674 struct bpf_insn insns_mask[] = { 675 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, tf_mask), 676 }; 677 add_insns(ctx, insns_mask, __arraycount(insns_mask)); 678 } 679 680 struct bpf_insn insns_cmp[] = { 681 /* A == expected-TCP-flags? */ 682 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, tf, 0, JUMP_MAGIC), 683 }; 684 add_insns(ctx, insns_cmp, __arraycount(insns_cmp)); 685 686 uint32_t mwords[] = { BM_TCPFL, 2, tf, tf_mask }; 687 done_block(ctx, mwords, sizeof(mwords)); 688 } 689 690 /* 691 * npfctl_bpf_icmp: code block to match ICMP type and/or code. 692 * Note: suitable for both the ICMPv4 and ICMPv6. 693 */ 694 void 695 npfctl_bpf_icmp(npf_bpf_t *ctx, int type, int code) 696 { 697 const u_int type_off = offsetof(struct icmp, icmp_type); 698 const u_int code_off = offsetof(struct icmp, icmp_code); 699 700 assert(ctx->flags & CHECKED_L4_PROTO); 701 assert(offsetof(struct icmp6_hdr, icmp6_type) == type_off); 702 assert(offsetof(struct icmp6_hdr, icmp6_code) == code_off); 703 assert(type != -1 || code != -1); 704 705 /* X <- IP header length */ 706 fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF); 707 708 if (type != -1) { 709 struct bpf_insn insns_type[] = { 710 BPF_STMT(BPF_LD+BPF_B+BPF_IND, type_off), 711 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, type, 0, JUMP_MAGIC), 712 }; 713 add_insns(ctx, insns_type, __arraycount(insns_type)); 714 715 uint32_t mwords[] = { BM_ICMP_TYPE, 1, type }; 716 done_block(ctx, mwords, sizeof(mwords)); 717 } 718 719 if (code != -1) { 720 struct bpf_insn insns_code[] = { 721 BPF_STMT(BPF_LD+BPF_B+BPF_IND, code_off), 722 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, code, 0, JUMP_MAGIC), 723 }; 724 add_insns(ctx, insns_code, __arraycount(insns_code)); 725 726 uint32_t mwords[] = { BM_ICMP_CODE, 1, code }; 727 done_block(ctx, mwords, sizeof(mwords)); 728 } 729 } 730 731 #define SRC_FLAG_BIT (1U << 31) 732 733 /* 734 * npfctl_bpf_table: code block to match source/destination IP address 735 * against NPF table specified by ID. 736 */ 737 void 738 npfctl_bpf_table(npf_bpf_t *ctx, unsigned opts, unsigned tid) 739 { 740 const bool src = (opts & MATCH_SRC) != 0; 741 742 struct bpf_insn insns_table[] = { 743 BPF_STMT(BPF_LD+BPF_IMM, (src ? SRC_FLAG_BIT : 0) | tid), 744 BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_TABLE), 745 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, JUMP_MAGIC, 0), 746 }; 747 add_insns(ctx, insns_table, __arraycount(insns_table)); 748 749 uint32_t mwords[] = { src ? BM_SRC_TABLE: BM_DST_TABLE, 1, tid }; 750 bm_invert_checkpoint(ctx, opts); 751 done_block(ctx, mwords, sizeof(mwords)); 752 } 753