xref: /netbsd-src/usr.sbin/npf/npfctl/npf_bpf_comp.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /*-
2  * Copyright (c) 2010-2020 The NetBSD Foundation, Inc.
3  * All rights reserved.
4  *
5  * This material is based upon work partially supported by The
6  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27  * POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * BPF byte-code generation for NPF rules.
32  *
33  * Overview
34  *
35  *	Each NPF rule is compiled into a BPF micro-program.  There is a
36  *	BPF byte-code fragment for each higher-level filtering logic,
37  *	e.g. to match L4 protocol, IP/mask, etc.  The generation process
38  *	combines multiple BPF-byte code fragments into one program.
39  *
40  * Basic case
41  *
42  *	Consider a basic case where all filters should match.  They
43  *	are expressed as logical conjunction, e.g.:
44  *
45  *		A and B and C and D
46  *
47  *	Each test (filter) criterion can be evaluated to true (match) or
48  *	false (no match) and the logic is as follows:
49  *
50  *	- If the value is true, then jump to the "next" test (offset 0).
51  *
52  *	- If the value is false, then jump to the JUMP_MAGIC value (0xff).
53  *	This "magic" value is used to indicate that it will have to be
54  *	patched at a later stage.
55  *
56  *	Once all byte-code fragments are combined into one, then there
57  *	are two additional steps:
58  *
59  *	- Two instructions are appended at the end of the program: "return
60  *	success" followed by "return failure".
61  *
62  *	- All jumps with the JUMP_MAGIC value are patched to point to the
63  *	"return failure" instruction.
64  *
65  *	Therefore, if all filter criteria will match, then the first
66  *	instruction will be reached, indicating a successful match of the
67  *	rule.  Otherwise, if any of the criteria will not match, it will
68  *	take the failure path and the rule will not be matching.
69  *
70  * Grouping
71  *
72  *	Filters can have groups, which have an effect of logical
73  *	disjunction, e.g.:
74  *
75  *		A and B and (C or D)
76  *
77  *	In such case, the logic inside the group has to be inverted i.e.
78  *	the jump values swapped.  If the test value is true, then jump
79  *	out of the group; if false, then jump "next".  At the end of the
80  *	group, an addition failure path is appended and the JUMP_MAGIC
81  *	uses within the group are patched to jump past the said path.
82  */
83 
84 #include <sys/cdefs.h>
85 __RCSID("$NetBSD: npf_bpf_comp.c,v 1.16 2020/05/30 14:16:56 rmind Exp $");
86 
87 #include <stdlib.h>
88 #include <stdbool.h>
89 #include <stddef.h>
90 #include <string.h>
91 #include <inttypes.h>
92 #include <err.h>
93 #include <assert.h>
94 
95 #include <netinet/in.h>
96 #include <netinet/in_systm.h>
97 #define	__FAVOR_BSD
98 #include <netinet/ip.h>
99 #include <netinet/ip6.h>
100 #include <netinet/udp.h>
101 #include <netinet/tcp.h>
102 #include <netinet/ip_icmp.h>
103 #include <netinet/icmp6.h>
104 
105 #include <net/bpf.h>
106 
107 #include "npfctl.h"
108 
109 /*
110  * Note: clear X_EQ_L4OFF when register X is invalidated i.e. it stores
111  * something other than L4 header offset.  Generally, when BPF_LDX is used.
112  */
113 #define	FETCHED_L3		0x01
114 #define	CHECKED_L4_PROTO	0x02
115 #define	X_EQ_L4OFF		0x04
116 
117 struct npf_bpf {
118 	/*
119 	 * BPF program code, the allocated length (in bytes), the number
120 	 * of logical blocks and the flags.
121 	 */
122 	struct bpf_program	prog;
123 	size_t			alen;
124 	unsigned		nblocks;
125 	sa_family_t		af;
126 	uint32_t		flags;
127 
128 	/*
129 	 * Indicators whether we are inside the group and whether this
130 	 * group is implementing inverted logic.
131 	 *
132 	 * The current group offset (counted in BPF instructions)
133 	 * and block number at the start of the group.
134 	 */
135 	unsigned		ingroup;
136 	bool			invert;
137 	unsigned		goff;
138 	unsigned		gblock;
139 
140 	/* Track inversion (excl. mark). */
141 	uint32_t		invflags;
142 
143 	/* BPF marks, allocated length and the real length. */
144 	uint32_t *		marks;
145 	size_t			malen;
146 	size_t			mlen;
147 };
148 
149 /*
150  * NPF success and failure values to be returned from BPF.
151  */
152 #define	NPF_BPF_SUCCESS		((u_int)-1)
153 #define	NPF_BPF_FAILURE		0
154 
155 /*
156  * Magic value to indicate the failure path, which is fixed up on completion.
157  * Note: this is the longest jump offset in BPF, since the offset is one byte.
158  */
159 #define	JUMP_MAGIC		0xff
160 
161 /* Reduce re-allocations by expanding in 64 byte blocks. */
162 #define	ALLOC_MASK		(64 - 1)
163 #define	ALLOC_ROUND(x)		(((x) + ALLOC_MASK) & ~ALLOC_MASK)
164 
165 #ifndef IPV6_VERSION
166 #define	IPV6_VERSION		0x60
167 #endif
168 
169 npf_bpf_t *
170 npfctl_bpf_create(void)
171 {
172 	return ecalloc(1, sizeof(npf_bpf_t));
173 }
174 
175 static void
176 fixup_jumps(npf_bpf_t *ctx, u_int start, u_int end, bool swap)
177 {
178 	struct bpf_program *bp = &ctx->prog;
179 
180 	for (u_int i = start; i < end; i++) {
181 		struct bpf_insn *insn = &bp->bf_insns[i];
182 		const u_int fail_off = end - i;
183 		bool seen_magic = false;
184 
185 		if (fail_off >= JUMP_MAGIC) {
186 			errx(EXIT_FAILURE, "BPF generation error: "
187 			    "the number of instructions is over the limit");
188 		}
189 		if (BPF_CLASS(insn->code) != BPF_JMP) {
190 			continue;
191 		}
192 		if (BPF_OP(insn->code) == BPF_JA) {
193 			/*
194 			 * BPF_JA can be used to jump to the failure path.
195 			 * If we are swapping i.e. inside the group, then
196 			 * jump "next"; groups have a failure path appended
197 			 * at their end.
198 			 */
199 			if (insn->k == JUMP_MAGIC) {
200 				insn->k = swap ? 0 : fail_off;
201 			}
202 			continue;
203 		}
204 
205 		/*
206 		 * Fixup the "magic" value.  Swap only the "magic" jumps.
207 		 */
208 
209 		if (insn->jt == JUMP_MAGIC) {
210 			insn->jt = fail_off;
211 			seen_magic = true;
212 		}
213 		if (insn->jf == JUMP_MAGIC) {
214 			insn->jf = fail_off;
215 			seen_magic = true;
216 		}
217 
218 		if (seen_magic && swap) {
219 			uint8_t jt = insn->jt;
220 			insn->jt = insn->jf;
221 			insn->jf = jt;
222 		}
223 	}
224 }
225 
226 static void
227 add_insns(npf_bpf_t *ctx, struct bpf_insn *insns, size_t count)
228 {
229 	struct bpf_program *bp = &ctx->prog;
230 	size_t offset, len, reqlen;
231 
232 	/* Note: bf_len is the count of instructions. */
233 	offset = bp->bf_len * sizeof(struct bpf_insn);
234 	len = count * sizeof(struct bpf_insn);
235 
236 	/* Ensure the memory buffer for the program. */
237 	reqlen = ALLOC_ROUND(offset + len);
238 	if (reqlen > ctx->alen) {
239 		bp->bf_insns = erealloc(bp->bf_insns, reqlen);
240 		ctx->alen = reqlen;
241 	}
242 
243 	/* Add the code block. */
244 	memcpy((uint8_t *)bp->bf_insns + offset, insns, len);
245 	bp->bf_len += count;
246 }
247 
248 static void
249 add_bmarks(npf_bpf_t *ctx, const uint32_t *m, size_t len)
250 {
251 	size_t reqlen, nargs = m[1];
252 
253 	if ((len / sizeof(uint32_t) - 2) != nargs) {
254 		errx(EXIT_FAILURE, "invalid BPF block description");
255 	}
256 	reqlen = ALLOC_ROUND(ctx->mlen + len);
257 	if (reqlen > ctx->malen) {
258 		ctx->marks = erealloc(ctx->marks, reqlen);
259 		ctx->malen = reqlen;
260 	}
261 	memcpy((uint8_t *)ctx->marks + ctx->mlen, m, len);
262 	ctx->mlen += len;
263 }
264 
265 static void
266 done_block(npf_bpf_t *ctx, const uint32_t *m, size_t len)
267 {
268 	add_bmarks(ctx, m, len);
269 	ctx->nblocks++;
270 }
271 
272 struct bpf_program *
273 npfctl_bpf_complete(npf_bpf_t *ctx)
274 {
275 	struct bpf_program *bp = &ctx->prog;
276 	const u_int retoff = bp->bf_len;
277 
278 	/* No instructions (optimised out). */
279 	if (!bp->bf_len)
280 		return NULL;
281 
282 	/* Add the return fragment (success and failure paths). */
283 	struct bpf_insn insns_ret[] = {
284 		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_SUCCESS),
285 		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE),
286 	};
287 	add_insns(ctx, insns_ret, __arraycount(insns_ret));
288 
289 	/* Fixup all jumps to the main failure path. */
290 	fixup_jumps(ctx, 0, retoff, false);
291 
292 	return &ctx->prog;
293 }
294 
295 const void *
296 npfctl_bpf_bmarks(npf_bpf_t *ctx, size_t *len)
297 {
298 	*len = ctx->mlen;
299 	return ctx->marks;
300 }
301 
302 void
303 npfctl_bpf_destroy(npf_bpf_t *ctx)
304 {
305 	free(ctx->prog.bf_insns);
306 	free(ctx->marks);
307 	free(ctx);
308 }
309 
310 /*
311  * npfctl_bpf_group_enter: begin a logical group.  It merely uses logical
312  * disjunction (OR) for comparisons within the group.
313  */
314 void
315 npfctl_bpf_group_enter(npf_bpf_t *ctx, bool invert)
316 {
317 	struct bpf_program *bp = &ctx->prog;
318 
319 	assert(ctx->goff == 0);
320 	assert(ctx->gblock == 0);
321 
322 	ctx->goff = bp->bf_len;
323 	ctx->gblock = ctx->nblocks;
324 	ctx->invert = invert;
325 	ctx->ingroup++;
326 }
327 
328 void
329 npfctl_bpf_group_exit(npf_bpf_t *ctx)
330 {
331 	struct bpf_program *bp = &ctx->prog;
332 	const size_t curoff = bp->bf_len;
333 
334 	assert(ctx->ingroup);
335 	ctx->ingroup--;
336 
337 	/* If there are no blocks or only one - nothing to do. */
338 	if (!ctx->invert && (ctx->nblocks - ctx->gblock) <= 1) {
339 		ctx->goff = ctx->gblock = 0;
340 		return;
341 	}
342 
343 	/*
344 	 * If inverting, then prepend a jump over the statement below.
345 	 * On match, it will skip-through and the fail path will be taken.
346 	 */
347 	if (ctx->invert) {
348 		struct bpf_insn insns_ret[] = {
349 			BPF_STMT(BPF_JMP+BPF_JA, 1),
350 		};
351 		add_insns(ctx, insns_ret, __arraycount(insns_ret));
352 	}
353 
354 	/*
355 	 * Append a failure return as a fall-through i.e. if there is
356 	 * no match within the group.
357 	 */
358 	struct bpf_insn insns_ret[] = {
359 		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE),
360 	};
361 	add_insns(ctx, insns_ret, __arraycount(insns_ret));
362 
363 	/*
364 	 * Adjust jump offsets: on match - jump outside the group i.e.
365 	 * to the current offset.  Otherwise, jump to the next instruction
366 	 * which would lead to the fall-through code above if none matches.
367 	 */
368 	fixup_jumps(ctx, ctx->goff, curoff, true);
369 	ctx->goff = ctx->gblock = 0;
370 }
371 
372 static void
373 fetch_l3(npf_bpf_t *ctx, sa_family_t af, unsigned flags)
374 {
375 	unsigned ver;
376 
377 	switch (af) {
378 	case AF_INET:
379 		ver = IPVERSION;
380 		break;
381 	case AF_INET6:
382 		ver = IPV6_VERSION >> 4;
383 		break;
384 	case AF_UNSPEC:
385 		ver = 0;
386 		break;
387 	default:
388 		abort();
389 	}
390 
391 	/*
392 	 * The memory store is populated with:
393 	 * - BPF_MW_IPVER: IP version (4 or 6).
394 	 * - BPF_MW_L4OFF: L4 header offset.
395 	 * - BPF_MW_L4PROTO: L4 protocol.
396 	 */
397 	if ((ctx->flags & FETCHED_L3) == 0 || (af && ctx->af == 0)) {
398 		const uint8_t jt = ver ? 0 : JUMP_MAGIC;
399 		const uint8_t jf = ver ? JUMP_MAGIC : 0;
400 		const bool ingroup = ctx->ingroup != 0;
401 		const bool invert = ctx->invert;
402 
403 		/*
404 		 * L3 block cannot be inserted in the middle of a group.
405 		 * In fact, it never is.  Check and start the group after.
406 		 */
407 		if (ingroup) {
408 			assert(ctx->nblocks == ctx->gblock);
409 			npfctl_bpf_group_exit(ctx);
410 		}
411 
412 		/*
413 		 * A <- IP version; A == expected-version?
414 		 * If no particular version specified, check for non-zero.
415 		 */
416 		struct bpf_insn insns_af[] = {
417 			BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_IPVER),
418 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ver, jt, jf),
419 		};
420 		add_insns(ctx, insns_af, __arraycount(insns_af));
421 		ctx->flags |= FETCHED_L3;
422 		ctx->af = af;
423 
424 		if (af) {
425 			uint32_t mwords[] = { BM_IPVER, 1, af };
426 			add_bmarks(ctx, mwords, sizeof(mwords));
427 		}
428 		if (ingroup) {
429 			npfctl_bpf_group_enter(ctx, invert);
430 		}
431 
432 	} else if (af && af != ctx->af) {
433 		errx(EXIT_FAILURE, "address family mismatch");
434 	}
435 
436 	if ((flags & X_EQ_L4OFF) != 0 && (ctx->flags & X_EQ_L4OFF) == 0) {
437 		/* X <- IP header length */
438 		struct bpf_insn insns_hlen[] = {
439 			BPF_STMT(BPF_LDX+BPF_MEM, BPF_MW_L4OFF),
440 		};
441 		add_insns(ctx, insns_hlen, __arraycount(insns_hlen));
442 		ctx->flags |= X_EQ_L4OFF;
443 	}
444 }
445 
446 static void
447 bm_invert_checkpoint(npf_bpf_t *ctx, const unsigned opts)
448 {
449 	uint32_t bm = 0;
450 
451 	if (ctx->ingroup && ctx->invert) {
452 		const unsigned seen = ctx->invflags;
453 
454 		if ((opts & MATCH_SRC) != 0 && (seen & MATCH_SRC) == 0) {
455 			bm = BM_SRC_NEG;
456 		}
457 		if ((opts & MATCH_DST) != 0 && (seen & MATCH_DST) == 0) {
458 			bm = BM_DST_NEG;
459 		}
460 		ctx->invflags |= opts & (MATCH_SRC | MATCH_DST);
461 	}
462 	if (bm) {
463 		uint32_t mwords[] = { bm, 0 };
464 		add_bmarks(ctx, mwords, sizeof(mwords));
465 	}
466 }
467 
468 /*
469  * npfctl_bpf_ipver: match the IP version.
470  */
471 void
472 npfctl_bpf_ipver(npf_bpf_t *ctx, sa_family_t af)
473 {
474 	fetch_l3(ctx, af, 0);
475 }
476 
477 /*
478  * npfctl_bpf_proto: code block to match IP version and L4 protocol.
479  */
480 void
481 npfctl_bpf_proto(npf_bpf_t *ctx, unsigned proto)
482 {
483 	struct bpf_insn insns_proto[] = {
484 		/* A <- L4 protocol; A == expected-protocol? */
485 		BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO),
486 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, proto, 0, JUMP_MAGIC),
487 	};
488 	add_insns(ctx, insns_proto, __arraycount(insns_proto));
489 
490 	uint32_t mwords[] = { BM_PROTO, 1, proto };
491 	done_block(ctx, mwords, sizeof(mwords));
492 	ctx->flags |= CHECKED_L4_PROTO;
493 }
494 
495 /*
496  * npfctl_bpf_cidr: code block to match IPv4 or IPv6 CIDR.
497  *
498  * => IP address shall be in the network byte order.
499  */
500 void
501 npfctl_bpf_cidr(npf_bpf_t *ctx, unsigned opts, sa_family_t af,
502     const npf_addr_t *addr, const npf_netmask_t mask)
503 {
504 	const uint32_t *awords = (const uint32_t *)addr;
505 	unsigned nwords, length, maxmask, off;
506 
507 	assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
508 	assert((mask && mask <= NPF_MAX_NETMASK) || mask == NPF_NO_NETMASK);
509 
510 	switch (af) {
511 	case AF_INET:
512 		maxmask = 32;
513 		off = (opts & MATCH_SRC) ?
514 		    offsetof(struct ip, ip_src) :
515 		    offsetof(struct ip, ip_dst);
516 		nwords = sizeof(struct in_addr) / sizeof(uint32_t);
517 		break;
518 	case AF_INET6:
519 		maxmask = 128;
520 		off = (opts & MATCH_SRC) ?
521 		    offsetof(struct ip6_hdr, ip6_src) :
522 		    offsetof(struct ip6_hdr, ip6_dst);
523 		nwords = sizeof(struct in6_addr) / sizeof(uint32_t);
524 		break;
525 	default:
526 		abort();
527 	}
528 
529 	/* Ensure address family. */
530 	fetch_l3(ctx, af, 0);
531 
532 	length = (mask == NPF_NO_NETMASK) ? maxmask : mask;
533 
534 	/* CAUTION: BPF operates in host byte-order. */
535 	for (unsigned i = 0; i < nwords; i++) {
536 		const unsigned woff = i * sizeof(uint32_t);
537 		uint32_t word = ntohl(awords[i]);
538 		uint32_t wordmask;
539 
540 		if (length >= 32) {
541 			/* The mask is a full word - do not apply it. */
542 			wordmask = 0;
543 			length -= 32;
544 		} else if (length) {
545 			wordmask = 0xffffffff << (32 - length);
546 			length = 0;
547 		} else {
548 			/* The mask became zero - skip the rest. */
549 			break;
550 		}
551 
552 		/* A <- IP address (or one word of it) */
553 		struct bpf_insn insns_ip[] = {
554 			BPF_STMT(BPF_LD+BPF_W+BPF_ABS, off + woff),
555 		};
556 		add_insns(ctx, insns_ip, __arraycount(insns_ip));
557 
558 		/* A <- (A & MASK) */
559 		if (wordmask) {
560 			struct bpf_insn insns_mask[] = {
561 				BPF_STMT(BPF_ALU+BPF_AND+BPF_K, wordmask),
562 			};
563 			add_insns(ctx, insns_mask, __arraycount(insns_mask));
564 		}
565 
566 		/* A == expected-IP-word ? */
567 		struct bpf_insn insns_cmp[] = {
568 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, word, 0, JUMP_MAGIC),
569 		};
570 		add_insns(ctx, insns_cmp, __arraycount(insns_cmp));
571 	}
572 
573 	uint32_t mwords[] = {
574 		(opts & MATCH_SRC) ? BM_SRC_CIDR: BM_DST_CIDR, 6,
575 		af, mask, awords[0], awords[1], awords[2], awords[3],
576 	};
577 	bm_invert_checkpoint(ctx, opts);
578 	done_block(ctx, mwords, sizeof(mwords));
579 }
580 
581 /*
582  * npfctl_bpf_ports: code block to match TCP/UDP port range.
583  *
584  * => Port numbers shall be in the network byte order.
585  */
586 void
587 npfctl_bpf_ports(npf_bpf_t *ctx, unsigned opts, in_port_t from, in_port_t to)
588 {
589 	const unsigned sport_off = offsetof(struct udphdr, uh_sport);
590 	const unsigned dport_off = offsetof(struct udphdr, uh_dport);
591 	unsigned off;
592 
593 	/* TCP and UDP port offsets are the same. */
594 	assert(sport_off == offsetof(struct tcphdr, th_sport));
595 	assert(dport_off == offsetof(struct tcphdr, th_dport));
596 	assert(ctx->flags & CHECKED_L4_PROTO);
597 
598 	assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
599 	off = (opts & MATCH_SRC) ? sport_off : dport_off;
600 
601 	/* X <- IP header length */
602 	fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
603 
604 	struct bpf_insn insns_fetch[] = {
605 		/* A <- port */
606 		BPF_STMT(BPF_LD+BPF_H+BPF_IND, off),
607 	};
608 	add_insns(ctx, insns_fetch, __arraycount(insns_fetch));
609 
610 	/* CAUTION: BPF operates in host byte-order. */
611 	from = ntohs(from);
612 	to = ntohs(to);
613 
614 	if (from == to) {
615 		/* Single port case. */
616 		struct bpf_insn insns_port[] = {
617 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, from, 0, JUMP_MAGIC),
618 		};
619 		add_insns(ctx, insns_port, __arraycount(insns_port));
620 	} else {
621 		/* Port range case. */
622 		struct bpf_insn insns_range[] = {
623 			BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, from, 0, 1),
624 			BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, to, 0, 1),
625 			BPF_STMT(BPF_JMP+BPF_JA, JUMP_MAGIC),
626 		};
627 		add_insns(ctx, insns_range, __arraycount(insns_range));
628 	}
629 
630 	uint32_t mwords[] = {
631 		(opts & MATCH_SRC) ? BM_SRC_PORTS : BM_DST_PORTS, 2, from, to
632 	};
633 	done_block(ctx, mwords, sizeof(mwords));
634 }
635 
636 /*
637  * npfctl_bpf_tcpfl: code block to match TCP flags.
638  */
639 void
640 npfctl_bpf_tcpfl(npf_bpf_t *ctx, uint8_t tf, uint8_t tf_mask)
641 {
642 	const unsigned tcpfl_off = offsetof(struct tcphdr, th_flags);
643 	const bool usingmask = tf_mask != tf;
644 
645 	/* X <- IP header length */
646 	fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
647 
648 	if ((ctx->flags & CHECKED_L4_PROTO) == 0) {
649 		const unsigned jf = usingmask ? 3 : 2;
650 		assert(ctx->ingroup == 0);
651 
652 		/*
653 		 * A <- L4 protocol; A == TCP?  If not, jump out.
654 		 *
655 		 * Note: the TCP flag matching might be without 'proto tcp'
656 		 * when using a plain 'stateful' rule.  In such case it also
657 		 * handles other protocols, thus no strict TCP check.
658 		 */
659 		struct bpf_insn insns_tcp[] = {
660 			BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO),
661 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, IPPROTO_TCP, 0, jf),
662 		};
663 		add_insns(ctx, insns_tcp, __arraycount(insns_tcp));
664 	}
665 
666 	struct bpf_insn insns_tf[] = {
667 		/* A <- TCP flags */
668 		BPF_STMT(BPF_LD+BPF_B+BPF_IND, tcpfl_off),
669 	};
670 	add_insns(ctx, insns_tf, __arraycount(insns_tf));
671 
672 	if (usingmask) {
673 		/* A <- (A & mask) */
674 		struct bpf_insn insns_mask[] = {
675 			BPF_STMT(BPF_ALU+BPF_AND+BPF_K, tf_mask),
676 		};
677 		add_insns(ctx, insns_mask, __arraycount(insns_mask));
678 	}
679 
680 	struct bpf_insn insns_cmp[] = {
681 		/* A == expected-TCP-flags? */
682 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, tf, 0, JUMP_MAGIC),
683 	};
684 	add_insns(ctx, insns_cmp, __arraycount(insns_cmp));
685 
686 	uint32_t mwords[] = { BM_TCPFL, 2, tf, tf_mask };
687 	done_block(ctx, mwords, sizeof(mwords));
688 }
689 
690 /*
691  * npfctl_bpf_icmp: code block to match ICMP type and/or code.
692  * Note: suitable for both the ICMPv4 and ICMPv6.
693  */
694 void
695 npfctl_bpf_icmp(npf_bpf_t *ctx, int type, int code)
696 {
697 	const u_int type_off = offsetof(struct icmp, icmp_type);
698 	const u_int code_off = offsetof(struct icmp, icmp_code);
699 
700 	assert(ctx->flags & CHECKED_L4_PROTO);
701 	assert(offsetof(struct icmp6_hdr, icmp6_type) == type_off);
702 	assert(offsetof(struct icmp6_hdr, icmp6_code) == code_off);
703 	assert(type != -1 || code != -1);
704 
705 	/* X <- IP header length */
706 	fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
707 
708 	if (type != -1) {
709 		struct bpf_insn insns_type[] = {
710 			BPF_STMT(BPF_LD+BPF_B+BPF_IND, type_off),
711 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, type, 0, JUMP_MAGIC),
712 		};
713 		add_insns(ctx, insns_type, __arraycount(insns_type));
714 
715 		uint32_t mwords[] = { BM_ICMP_TYPE, 1, type };
716 		done_block(ctx, mwords, sizeof(mwords));
717 	}
718 
719 	if (code != -1) {
720 		struct bpf_insn insns_code[] = {
721 			BPF_STMT(BPF_LD+BPF_B+BPF_IND, code_off),
722 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, code, 0, JUMP_MAGIC),
723 		};
724 		add_insns(ctx, insns_code, __arraycount(insns_code));
725 
726 		uint32_t mwords[] = { BM_ICMP_CODE, 1, code };
727 		done_block(ctx, mwords, sizeof(mwords));
728 	}
729 }
730 
731 #define	SRC_FLAG_BIT	(1U << 31)
732 
733 /*
734  * npfctl_bpf_table: code block to match source/destination IP address
735  * against NPF table specified by ID.
736  */
737 void
738 npfctl_bpf_table(npf_bpf_t *ctx, unsigned opts, unsigned tid)
739 {
740 	const bool src = (opts & MATCH_SRC) != 0;
741 
742 	struct bpf_insn insns_table[] = {
743 		BPF_STMT(BPF_LD+BPF_IMM, (src ? SRC_FLAG_BIT : 0) | tid),
744 		BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_TABLE),
745 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, JUMP_MAGIC, 0),
746 	};
747 	add_insns(ctx, insns_table, __arraycount(insns_table));
748 
749 	uint32_t mwords[] = { src ? BM_SRC_TABLE: BM_DST_TABLE, 1, tid };
750 	bm_invert_checkpoint(ctx, opts);
751 	done_block(ctx, mwords, sizeof(mwords));
752 }
753