1 /* $NetBSD: npf_bpf_comp.c,v 1.2 2013/11/05 01:50:30 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 2010-2013 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This material is based upon work partially supported by The 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * BPF byte-code generation for NPF rules. 34 */ 35 36 #include <sys/cdefs.h> 37 __RCSID("$NetBSD: npf_bpf_comp.c,v 1.2 2013/11/05 01:50:30 rmind Exp $"); 38 39 #include <stdlib.h> 40 #include <stdbool.h> 41 #include <stddef.h> 42 #include <string.h> 43 #include <inttypes.h> 44 #include <err.h> 45 #include <assert.h> 46 47 #include <netinet/in.h> 48 #include <netinet/in_systm.h> 49 #include <netinet/ip.h> 50 #include <netinet/ip6.h> 51 #include <netinet/udp.h> 52 #include <netinet/tcp.h> 53 #include <netinet/ip_icmp.h> 54 #include <netinet/icmp6.h> 55 56 #include <net/bpf.h> 57 58 #include "npfctl.h" 59 60 /* 61 * Note: clear X_EQ_L4OFF when register X is invalidated i.e. it stores 62 * something other than L4 header offset. Generally, when BPF_LDX is used. 63 */ 64 #define FETCHED_L3 0x01 65 #define X_EQ_L4OFF 0x02 66 67 struct npf_bpf { 68 /* 69 * BPF program code, the allocated length (in bytes), the number 70 * of logical blocks and the flags. 71 */ 72 struct bpf_program prog; 73 size_t alen; 74 u_int nblocks; 75 sa_family_t af; 76 uint32_t flags; 77 78 /* The current group offset and block number. */ 79 bool ingroup; 80 u_int goff; 81 u_int gblock; 82 83 /* BPF marks, allocated length and the real length. */ 84 uint32_t * marks; 85 size_t malen; 86 size_t mlen; 87 }; 88 89 /* 90 * NPF success and failure values to be returned from BPF. 91 */ 92 #define NPF_BPF_SUCCESS ((u_int)-1) 93 #define NPF_BPF_FAILURE 0 94 95 /* 96 * Magic value to indicate the failure path, which is fixed up on completion. 97 * Note: this is the longest jump offset in BPF, since the offset is one byte. 98 */ 99 #define JUMP_MAGIC 0xff 100 101 /* Reduce re-allocations by expanding in 64 byte blocks. */ 102 #define ALLOC_MASK (64 - 1) 103 #define ALLOC_ROUND(x) (((x) + ALLOC_MASK) & ~ALLOC_MASK) 104 105 npf_bpf_t * 106 npfctl_bpf_create(void) 107 { 108 return ecalloc(1, sizeof(npf_bpf_t)); 109 } 110 111 static void 112 fixup_jumps(npf_bpf_t *ctx, u_int start, u_int end, bool swap) 113 { 114 struct bpf_program *bp = &ctx->prog; 115 116 for (u_int i = start; i < end; i++) { 117 struct bpf_insn *insn = &bp->bf_insns[i]; 118 const u_int fail_off = end - i; 119 120 if (fail_off >= JUMP_MAGIC) { 121 errx(EXIT_FAILURE, "BPF generation error: " 122 "the number of instructions is over the limit"); 123 } 124 if (BPF_CLASS(insn->code) != BPF_JMP) { 125 continue; 126 } 127 if (swap) { 128 uint8_t jt = insn->jt; 129 insn->jt = insn->jf; 130 insn->jf = jt; 131 } 132 if (insn->jt == JUMP_MAGIC) 133 insn->jt = fail_off; 134 if (insn->jf == JUMP_MAGIC) 135 insn->jf = fail_off; 136 } 137 } 138 139 static void 140 add_insns(npf_bpf_t *ctx, struct bpf_insn *insns, size_t count) 141 { 142 struct bpf_program *bp = &ctx->prog; 143 size_t offset, len, reqlen; 144 145 /* Note: bf_len is the count of instructions. */ 146 offset = bp->bf_len * sizeof(struct bpf_insn); 147 len = count * sizeof(struct bpf_insn); 148 149 /* Ensure the memory buffer for the program. */ 150 reqlen = ALLOC_ROUND(offset + len); 151 if (reqlen > ctx->alen) { 152 bp->bf_insns = erealloc(bp->bf_insns, reqlen); 153 ctx->alen = reqlen; 154 } 155 156 /* Add the code block. */ 157 memcpy((uint8_t *)bp->bf_insns + offset, insns, len); 158 bp->bf_len += count; 159 } 160 161 static void 162 done_raw_block(npf_bpf_t *ctx, const uint32_t *m, size_t len) 163 { 164 size_t reqlen, nargs = m[1]; 165 166 if ((len / sizeof(uint32_t) - 2) != nargs) { 167 errx(EXIT_FAILURE, "invalid BPF block description"); 168 } 169 reqlen = ALLOC_ROUND(ctx->mlen + len); 170 if (reqlen > ctx->malen) { 171 ctx->marks = erealloc(ctx->marks, reqlen); 172 ctx->malen = reqlen; 173 } 174 memcpy((uint8_t *)ctx->marks + ctx->mlen, m, len); 175 ctx->mlen += len; 176 } 177 178 static void 179 done_block(npf_bpf_t *ctx, const uint32_t *m, size_t len) 180 { 181 done_raw_block(ctx, m, len); 182 ctx->nblocks++; 183 } 184 185 struct bpf_program * 186 npfctl_bpf_complete(npf_bpf_t *ctx) 187 { 188 struct bpf_program *bp = &ctx->prog; 189 const u_int retoff = bp->bf_len; 190 191 /* Add the return fragment (success and failure paths). */ 192 struct bpf_insn insns_ret[] = { 193 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_SUCCESS), 194 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE), 195 }; 196 add_insns(ctx, insns_ret, __arraycount(insns_ret)); 197 198 /* Fixup all jumps to the main failure path. */ 199 fixup_jumps(ctx, 0, retoff, false); 200 201 return &ctx->prog; 202 } 203 204 const void * 205 npfctl_bpf_bmarks(npf_bpf_t *ctx, size_t *len) 206 { 207 *len = ctx->mlen; 208 return ctx->marks; 209 } 210 211 void 212 npfctl_bpf_destroy(npf_bpf_t *ctx) 213 { 214 free(ctx->prog.bf_insns); 215 free(ctx->marks); 216 free(ctx); 217 } 218 219 /* 220 * npfctl_bpf_group: begin a logical group. It merely uses logical 221 * disjunction (OR) for compares within the group. 222 */ 223 void 224 npfctl_bpf_group(npf_bpf_t *ctx) 225 { 226 struct bpf_program *bp = &ctx->prog; 227 228 assert(ctx->goff == 0); 229 assert(ctx->gblock == 0); 230 231 ctx->goff = bp->bf_len; 232 ctx->gblock = ctx->nblocks; 233 ctx->ingroup = true; 234 } 235 236 void 237 npfctl_bpf_endgroup(npf_bpf_t *ctx) 238 { 239 struct bpf_program *bp = &ctx->prog; 240 const size_t curoff = bp->bf_len; 241 242 /* If there are no blocks or only one - nothing to do. */ 243 if ((ctx->nblocks - ctx->gblock) <= 1) { 244 ctx->goff = ctx->gblock = 0; 245 return; 246 } 247 248 /* 249 * Append a failure return as a fall-through i.e. if there is 250 * no match within the group. 251 */ 252 struct bpf_insn insns_ret[] = { 253 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE), 254 }; 255 add_insns(ctx, insns_ret, __arraycount(insns_ret)); 256 257 /* 258 * Adjust jump offsets: on match - jump outside the group i.e. 259 * to the current offset. Otherwise, jump to the next instruction 260 * which would lead to the fall-through code above if none matches. 261 */ 262 fixup_jumps(ctx, ctx->goff, curoff, true); 263 ctx->goff = ctx->gblock = 0; 264 } 265 266 static void 267 fetch_l3(npf_bpf_t *ctx, sa_family_t af, u_int flags) 268 { 269 u_int ver; 270 271 switch (af) { 272 case AF_INET: 273 ver = IPVERSION; 274 break; 275 case AF_INET6: 276 ver = IPV6_VERSION >> 4; 277 break; 278 case AF_UNSPEC: 279 ver = 0; 280 break; 281 default: 282 abort(); 283 } 284 285 /* 286 * Fetch L3 information. The coprocessor populates the following 287 * words in the scratch memory store: 288 * - BPF_MW_IPVER: IP version (4 or 6). 289 * - BPF_MW_L4OFF: L4 header offset. 290 * - BPF_MW_L4PROTO: L4 protocol. 291 */ 292 if ((ctx->flags & FETCHED_L3) == 0 || (af && ctx->af == 0)) { 293 const uint8_t jt = ver ? 0 : JUMP_MAGIC; 294 const uint8_t jf = ver ? JUMP_MAGIC : 0; 295 bool ingroup = ctx->ingroup; 296 297 /* 298 * L3 block cannot be inserted in the middle of a group. 299 * In fact, it never is. Check and start the group after. 300 */ 301 if (ingroup) { 302 assert(ctx->nblocks == ctx->gblock); 303 npfctl_bpf_endgroup(ctx); 304 } 305 306 /* 307 * A <- IP version; A == expected-version? 308 * If no particular version specified, check for non-zero. 309 */ 310 if ((ctx->flags & FETCHED_L3) == 0) { 311 struct bpf_insn insns_l3[] = { 312 BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_L3), 313 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ver, jt, jf), 314 }; 315 add_insns(ctx, insns_l3, __arraycount(insns_l3)); 316 ctx->flags |= FETCHED_L3; 317 } else { 318 /* IP version is already fetched in BPF_MW_IPVER. */ 319 struct bpf_insn insns_af[] = { 320 BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_IPVER), 321 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ver, jt, jf), 322 }; 323 add_insns(ctx, insns_af, __arraycount(insns_af)); 324 } 325 ctx->af = af; 326 327 if (af) { 328 uint32_t mwords[] = { BM_IPVER, 1, af }; 329 done_raw_block(ctx, mwords, sizeof(mwords)); 330 } 331 if (ingroup) { 332 npfctl_bpf_group(ctx); 333 } 334 335 } else if (af && af != ctx->af) { 336 errx(EXIT_FAILURE, "address family mismatch"); 337 } 338 339 if ((flags & X_EQ_L4OFF) != 0 && (ctx->flags & X_EQ_L4OFF) == 0) { 340 /* X <- IP header length */ 341 struct bpf_insn insns_hlen[] = { 342 BPF_STMT(BPF_LDX+BPF_MEM, BPF_MW_L4OFF), 343 }; 344 add_insns(ctx, insns_hlen, __arraycount(insns_hlen)); 345 ctx->flags |= X_EQ_L4OFF; 346 } 347 } 348 349 /* 350 * npfctl_bpf_proto: code block to match IP version and L4 protocol. 351 */ 352 void 353 npfctl_bpf_proto(npf_bpf_t *ctx, sa_family_t af, int proto) 354 { 355 assert(af != AF_UNSPEC || proto != -1); 356 357 /* Note: fails if IP version does not match. */ 358 fetch_l3(ctx, af, 0); 359 if (proto == -1) { 360 return; 361 } 362 363 struct bpf_insn insns_proto[] = { 364 /* A <- L4 protocol; A == expected-protocol? */ 365 BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO), 366 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, proto, 0, JUMP_MAGIC), 367 }; 368 add_insns(ctx, insns_proto, __arraycount(insns_proto)); 369 370 uint32_t mwords[] = { BM_PROTO, 1, proto }; 371 done_block(ctx, mwords, sizeof(mwords)); 372 } 373 374 /* 375 * npfctl_bpf_cidr: code block to match IPv4 or IPv6 CIDR. 376 * 377 * => IP address shall be in the network byte order. 378 */ 379 void 380 npfctl_bpf_cidr(npf_bpf_t *ctx, u_int opts, sa_family_t af, 381 const npf_addr_t *addr, const npf_netmask_t mask) 382 { 383 const uint32_t *awords = (const uint32_t *)addr; 384 u_int nwords, length, maxmask, off; 385 386 assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0)); 387 assert((mask && mask <= NPF_MAX_NETMASK) || mask == NPF_NO_NETMASK); 388 389 switch (af) { 390 case AF_INET: 391 maxmask = 32; 392 off = (opts & MATCH_SRC) ? 393 offsetof(struct ip, ip_src) : 394 offsetof(struct ip, ip_dst); 395 nwords = sizeof(struct in_addr) / sizeof(uint32_t); 396 break; 397 case AF_INET6: 398 maxmask = 128; 399 off = (opts & MATCH_SRC) ? 400 offsetof(struct ip6_hdr, ip6_src) : 401 offsetof(struct ip6_hdr, ip6_dst); 402 nwords = sizeof(struct in6_addr) / sizeof(uint32_t); 403 break; 404 default: 405 abort(); 406 } 407 408 /* Ensure address family. */ 409 fetch_l3(ctx, af, 0); 410 411 length = (mask == NPF_NO_NETMASK) ? maxmask : mask; 412 413 /* CAUTION: BPF operates in host byte-order. */ 414 for (u_int i = 0; i < nwords; i++) { 415 const u_int woff = i * sizeof(uint32_t); 416 uint32_t word = ntohl(awords[i]); 417 uint32_t wordmask; 418 419 if (length >= 32) { 420 /* The mask is a full word - do not apply it. */ 421 wordmask = 0; 422 length -= 32; 423 } else if (length) { 424 wordmask = 0xffffffff << (maxmask - length); 425 length = 0; 426 } else { 427 /* 428 * The mask is zero - just compare the word 429 * against zero. 430 */ 431 wordmask = 0; 432 word = 0; 433 } 434 435 /* A <- IP address (or one word of it) */ 436 struct bpf_insn insns_ip[] = { 437 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, off + woff), 438 }; 439 add_insns(ctx, insns_ip, __arraycount(insns_ip)); 440 441 /* A <- (A & MASK) */ 442 if (wordmask) { 443 struct bpf_insn insns_mask[] = { 444 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, wordmask), 445 }; 446 add_insns(ctx, insns_mask, __arraycount(insns_mask)); 447 } 448 449 /* A == expected-IP-word ? */ 450 struct bpf_insn insns_cmp[] = { 451 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, word, 0, JUMP_MAGIC), 452 }; 453 add_insns(ctx, insns_cmp, __arraycount(insns_cmp)); 454 } 455 456 uint32_t mwords[] = { 457 (opts & MATCH_SRC) ? BM_SRC_CIDR: BM_DST_CIDR, 6, 458 af, mask, awords[0], awords[1], awords[2], awords[3], 459 }; 460 done_block(ctx, mwords, sizeof(mwords)); 461 } 462 463 /* 464 * npfctl_bpf_ports: code block to match TCP/UDP port range. 465 * 466 * => Port numbers shall be in the network byte order. 467 */ 468 void 469 npfctl_bpf_ports(npf_bpf_t *ctx, u_int opts, in_port_t from, in_port_t to) 470 { 471 const u_int sport_off = offsetof(struct udphdr, uh_sport); 472 const u_int dport_off = offsetof(struct udphdr, uh_dport); 473 u_int off; 474 475 /* TCP and UDP port offsets are the same. */ 476 assert(sport_off == offsetof(struct tcphdr, th_sport)); 477 assert(dport_off == offsetof(struct tcphdr, th_dport)); 478 479 assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0)); 480 off = (opts & MATCH_SRC) ? sport_off : dport_off; 481 482 /* X <- IP header length */ 483 fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF); 484 485 struct bpf_insn insns_fetch[] = { 486 /* A <- port */ 487 BPF_STMT(BPF_LD+BPF_H+BPF_IND, off), 488 }; 489 add_insns(ctx, insns_fetch, __arraycount(insns_fetch)); 490 491 /* CAUTION: BPF operates in host byte-order. */ 492 from = ntohs(from); 493 to = ntohs(to); 494 495 if (from == to) { 496 /* Single port case. */ 497 struct bpf_insn insns_port[] = { 498 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, from, 0, JUMP_MAGIC), 499 }; 500 add_insns(ctx, insns_port, __arraycount(insns_port)); 501 } else { 502 /* Port range case. */ 503 struct bpf_insn insns_range[] = { 504 BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, from, 0, JUMP_MAGIC), 505 BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, to, JUMP_MAGIC, 0), 506 }; 507 add_insns(ctx, insns_range, __arraycount(insns_range)); 508 } 509 510 uint32_t mwords[] = { 511 opts & MATCH_SRC ? BM_SRC_PORTS : BM_DST_PORTS, 2, from, to 512 }; 513 done_block(ctx, mwords, sizeof(mwords)); 514 } 515 516 /* 517 * npfctl_bpf_tcpfl: code block to match TCP flags. 518 */ 519 void 520 npfctl_bpf_tcpfl(npf_bpf_t *ctx, uint8_t tf, uint8_t tf_mask) 521 { 522 const u_int tcpfl_off = offsetof(struct tcphdr, th_flags); 523 524 /* X <- IP header length */ 525 fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF); 526 527 struct bpf_insn insns_tf[] = { 528 /* A <- TCP flags */ 529 BPF_STMT(BPF_LD+BPF_B+BPF_IND, tcpfl_off), 530 }; 531 add_insns(ctx, insns_tf, __arraycount(insns_tf)); 532 533 if (tf_mask != tf) { 534 /* A <- (A & mask) */ 535 struct bpf_insn insns_mask[] = { 536 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, tf_mask), 537 }; 538 add_insns(ctx, insns_mask, __arraycount(insns_mask)); 539 } 540 541 struct bpf_insn insns_cmp[] = { 542 /* A == expected-TCP-flags? */ 543 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, tf, 0, JUMP_MAGIC), 544 }; 545 add_insns(ctx, insns_cmp, __arraycount(insns_cmp)); 546 547 uint32_t mwords[] = { BM_TCPFL, 2, tf, tf_mask}; 548 done_block(ctx, mwords, sizeof(mwords)); 549 } 550 551 /* 552 * npfctl_bpf_icmp: code block to match ICMP type and/or code. 553 * Note: suitable both for the ICMPv4 and ICMPv6. 554 */ 555 void 556 npfctl_bpf_icmp(npf_bpf_t *ctx, int type, int code) 557 { 558 const u_int type_off = offsetof(struct icmp, icmp_type); 559 const u_int code_off = offsetof(struct icmp, icmp_code); 560 561 assert(offsetof(struct icmp6_hdr, icmp6_type) == type_off); 562 assert(offsetof(struct icmp6_hdr, icmp6_code) == code_off); 563 assert(type != -1 || code != -1); 564 565 /* X <- IP header length */ 566 fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF); 567 568 if (type != -1) { 569 struct bpf_insn insns_type[] = { 570 BPF_STMT(BPF_LD+BPF_B+BPF_IND, type_off), 571 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, type, 0, JUMP_MAGIC), 572 }; 573 add_insns(ctx, insns_type, __arraycount(insns_type)); 574 575 uint32_t mwords[] = { BM_ICMP_TYPE, 1, type }; 576 done_block(ctx, mwords, sizeof(mwords)); 577 } 578 579 if (code != -1) { 580 struct bpf_insn insns_code[] = { 581 BPF_STMT(BPF_LD+BPF_B+BPF_IND, code_off), 582 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, code, 0, JUMP_MAGIC), 583 }; 584 add_insns(ctx, insns_code, __arraycount(insns_code)); 585 586 uint32_t mwords[] = { BM_ICMP_CODE, 1, code }; 587 done_block(ctx, mwords, sizeof(mwords)); 588 } 589 } 590 591 #define SRC_FLAG_BIT (1U << 31) 592 593 /* 594 * npfctl_bpf_table: code block to match source/destination IP address 595 * against NPF table specified by ID. 596 */ 597 void 598 npfctl_bpf_table(npf_bpf_t *ctx, u_int opts, u_int tid) 599 { 600 const bool src = (opts & MATCH_SRC) != 0; 601 602 struct bpf_insn insns_table[] = { 603 BPF_STMT(BPF_LD+BPF_IMM, (src ? SRC_FLAG_BIT : 0) | tid), 604 BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_TABLE), 605 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, JUMP_MAGIC, 0), 606 }; 607 add_insns(ctx, insns_table, __arraycount(insns_table)); 608 609 uint32_t mwords[] = { src ? BM_SRC_TABLE: BM_DST_TABLE, 1, tid }; 610 done_block(ctx, mwords, sizeof(mwords)); 611 } 612