xref: /netbsd-src/usr.sbin/npf/npfctl/npf_bpf_comp.c (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1 /*-
2  * Copyright (c) 2010-2019 The NetBSD Foundation, Inc.
3  * All rights reserved.
4  *
5  * This material is based upon work partially supported by The
6  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27  * POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * BPF byte-code generation for NPF rules.
32  *
33  * Overview
34  *
35  *	Each NPF rule is compiled into a BPF micro-program.  There is a
36  *	BPF byte-code fragment for each higher-level filtering logic,
37  *	e.g. to match L4 protocol, IP/mask, etc.  The generation process
38  *	combines multiple BPF-byte code fragments into one program.
39  *
40  * Basic case
41  *
42  *	Consider a basic case where all filters should match.  They
43  *	are expressed as logical conjunction, e.g.:
44  *
45  *		A and B and C and D
46  *
47  *	Each test (filter) criterion can be evaluated to true (match) or
48  *	false (no match) and the logic is as follows:
49  *
50  *	- If the value is true, then jump to the "next" test (offset 0).
51  *
52  *	- If the value is false, then jump to the JUMP_MAGIC value (0xff).
53  *	This "magic" value is used to indicate that it will have to be
54  *	patched at a later stage.
55  *
56  *	Once all byte-code fragments are combined into one, then there
57  *	are two additional steps:
58  *
59  *	- Two instructions are appended at the end of the program: "return
60  *	success" followed by "return failure".
61  *
62  *	- All jumps with the JUMP_MAGIC value are patched to point to the
63  *	"return failure" instruction.
64  *
65  *	Therefore, if all filter criteria will match, then the first
66  *	instruction will be reached, indicating a successful match of the
67  *	rule.  Otherwise, if any of the criteria will not match, it will
68  *	take the failure path and the rule will not be matching.
69  *
70  * Grouping
71  *
72  *	Filters can have groups, which have a meaning of logical
73  *	disjunction, e.g.:
74  *
75  *		A and B and (C or D)
76  *
77  *	In such case, the logic inside the group has to be inverted i.e.
78  *	the jump values swapped.  If the test value is true, then jump
79  *	out of the group; if false, then jump "next".  At the end of the
80  *	group, an addition failure path is appended and the JUMP_MAGIC
81  *	uses within the group are patched to jump past the said path.
82  */
83 
84 #include <sys/cdefs.h>
85 __RCSID("$NetBSD: npf_bpf_comp.c,v 1.15 2019/08/25 13:21:03 rmind Exp $");
86 
87 #include <stdlib.h>
88 #include <stdbool.h>
89 #include <stddef.h>
90 #include <string.h>
91 #include <inttypes.h>
92 #include <err.h>
93 #include <assert.h>
94 
95 #include <netinet/in.h>
96 #include <netinet/in_systm.h>
97 #define	__FAVOR_BSD
98 #include <netinet/ip.h>
99 #include <netinet/ip6.h>
100 #include <netinet/udp.h>
101 #include <netinet/tcp.h>
102 #include <netinet/ip_icmp.h>
103 #include <netinet/icmp6.h>
104 
105 #include <net/bpf.h>
106 
107 #include "npfctl.h"
108 
109 /*
110  * Note: clear X_EQ_L4OFF when register X is invalidated i.e. it stores
111  * something other than L4 header offset.  Generally, when BPF_LDX is used.
112  */
113 #define	FETCHED_L3		0x01
114 #define	CHECKED_L4		0x02
115 #define	X_EQ_L4OFF		0x04
116 
117 struct npf_bpf {
118 	/*
119 	 * BPF program code, the allocated length (in bytes), the number
120 	 * of logical blocks and the flags.
121 	 */
122 	struct bpf_program	prog;
123 	size_t			alen;
124 	u_int			nblocks;
125 	sa_family_t		af;
126 	uint32_t		flags;
127 
128 	/*
129 	 * The current group offset (counted in BPF instructions)
130 	 * and block number at the start of the group.
131 	 */
132 	bool			ingroup;
133 	u_int			goff;
134 	u_int			gblock;
135 
136 	/* BPF marks, allocated length and the real length. */
137 	uint32_t *		marks;
138 	size_t			malen;
139 	size_t			mlen;
140 };
141 
142 /*
143  * NPF success and failure values to be returned from BPF.
144  */
145 #define	NPF_BPF_SUCCESS		((u_int)-1)
146 #define	NPF_BPF_FAILURE		0
147 
148 /*
149  * Magic value to indicate the failure path, which is fixed up on completion.
150  * Note: this is the longest jump offset in BPF, since the offset is one byte.
151  */
152 #define	JUMP_MAGIC		0xff
153 
154 /* Reduce re-allocations by expanding in 64 byte blocks. */
155 #define	ALLOC_MASK		(64 - 1)
156 #define	ALLOC_ROUND(x)		(((x) + ALLOC_MASK) & ~ALLOC_MASK)
157 
158 #ifndef IPV6_VERSION
159 #define	IPV6_VERSION		0x60
160 #endif
161 
162 npf_bpf_t *
163 npfctl_bpf_create(void)
164 {
165 	return ecalloc(1, sizeof(npf_bpf_t));
166 }
167 
168 static void
169 fixup_jumps(npf_bpf_t *ctx, u_int start, u_int end, bool swap)
170 {
171 	struct bpf_program *bp = &ctx->prog;
172 
173 	for (u_int i = start; i < end; i++) {
174 		struct bpf_insn *insn = &bp->bf_insns[i];
175 		const u_int fail_off = end - i;
176 		bool seen_magic = false;
177 
178 		if (fail_off >= JUMP_MAGIC) {
179 			errx(EXIT_FAILURE, "BPF generation error: "
180 			    "the number of instructions is over the limit");
181 		}
182 		if (BPF_CLASS(insn->code) != BPF_JMP) {
183 			continue;
184 		}
185 		if (BPF_OP(insn->code) == BPF_JA) {
186 			/*
187 			 * BPF_JA can be used to jump to the failure path.
188 			 * If we are swapping i.e. inside the group, then
189 			 * jump "next"; groups have a failure path appended
190 			 * at their end.
191 			 */
192 			if (insn->k == JUMP_MAGIC) {
193 				insn->k = swap ? 0 : fail_off;
194 			}
195 			continue;
196 		}
197 
198 		/*
199 		 * Fixup the "magic" value.  Swap only the "magic" jumps.
200 		 */
201 
202 		if (insn->jt == JUMP_MAGIC) {
203 			insn->jt = fail_off;
204 			seen_magic = true;
205 		}
206 		if (insn->jf == JUMP_MAGIC) {
207 			insn->jf = fail_off;
208 			seen_magic = true;
209 		}
210 
211 		if (seen_magic && swap) {
212 			uint8_t jt = insn->jt;
213 			insn->jt = insn->jf;
214 			insn->jf = jt;
215 		}
216 	}
217 }
218 
219 static void
220 add_insns(npf_bpf_t *ctx, struct bpf_insn *insns, size_t count)
221 {
222 	struct bpf_program *bp = &ctx->prog;
223 	size_t offset, len, reqlen;
224 
225 	/* Note: bf_len is the count of instructions. */
226 	offset = bp->bf_len * sizeof(struct bpf_insn);
227 	len = count * sizeof(struct bpf_insn);
228 
229 	/* Ensure the memory buffer for the program. */
230 	reqlen = ALLOC_ROUND(offset + len);
231 	if (reqlen > ctx->alen) {
232 		bp->bf_insns = erealloc(bp->bf_insns, reqlen);
233 		ctx->alen = reqlen;
234 	}
235 
236 	/* Add the code block. */
237 	memcpy((uint8_t *)bp->bf_insns + offset, insns, len);
238 	bp->bf_len += count;
239 }
240 
241 static void
242 done_raw_block(npf_bpf_t *ctx, const uint32_t *m, size_t len)
243 {
244 	size_t reqlen, nargs = m[1];
245 
246 	if ((len / sizeof(uint32_t) - 2) != nargs) {
247 		errx(EXIT_FAILURE, "invalid BPF block description");
248 	}
249 	reqlen = ALLOC_ROUND(ctx->mlen + len);
250 	if (reqlen > ctx->malen) {
251 		ctx->marks = erealloc(ctx->marks, reqlen);
252 		ctx->malen = reqlen;
253 	}
254 	memcpy((uint8_t *)ctx->marks + ctx->mlen, m, len);
255 	ctx->mlen += len;
256 }
257 
258 static void
259 done_block(npf_bpf_t *ctx, const uint32_t *m, size_t len)
260 {
261 	done_raw_block(ctx, m, len);
262 	ctx->nblocks++;
263 }
264 
265 struct bpf_program *
266 npfctl_bpf_complete(npf_bpf_t *ctx)
267 {
268 	struct bpf_program *bp = &ctx->prog;
269 	const u_int retoff = bp->bf_len;
270 
271 	/* No instructions (optimised out). */
272 	if (!bp->bf_len)
273 		return NULL;
274 
275 	/* Add the return fragment (success and failure paths). */
276 	struct bpf_insn insns_ret[] = {
277 		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_SUCCESS),
278 		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE),
279 	};
280 	add_insns(ctx, insns_ret, __arraycount(insns_ret));
281 
282 	/* Fixup all jumps to the main failure path. */
283 	fixup_jumps(ctx, 0, retoff, false);
284 
285 	return &ctx->prog;
286 }
287 
288 const void *
289 npfctl_bpf_bmarks(npf_bpf_t *ctx, size_t *len)
290 {
291 	*len = ctx->mlen;
292 	return ctx->marks;
293 }
294 
295 void
296 npfctl_bpf_destroy(npf_bpf_t *ctx)
297 {
298 	free(ctx->prog.bf_insns);
299 	free(ctx->marks);
300 	free(ctx);
301 }
302 
303 /*
304  * npfctl_bpf_group_enter: begin a logical group.  It merely uses logical
305  * disjunction (OR) for compares within the group.
306  */
307 void
308 npfctl_bpf_group_enter(npf_bpf_t *ctx)
309 {
310 	struct bpf_program *bp = &ctx->prog;
311 
312 	assert(ctx->goff == 0);
313 	assert(ctx->gblock == 0);
314 
315 	ctx->goff = bp->bf_len;
316 	ctx->gblock = ctx->nblocks;
317 	ctx->ingroup = true;
318 }
319 
320 void
321 npfctl_bpf_group_exit(npf_bpf_t *ctx, bool invert)
322 {
323 	struct bpf_program *bp = &ctx->prog;
324 	const size_t curoff = bp->bf_len;
325 
326 	/* If there are no blocks or only one - nothing to do. */
327 	if (!invert && (ctx->nblocks - ctx->gblock) <= 1) {
328 		ctx->goff = ctx->gblock = 0;
329 		return;
330 	}
331 
332 	/*
333 	 * If inverting, then prepend a jump over the statement below.
334 	 * On match, it will skip-through and the fail path will be taken.
335 	 */
336 	if (invert) {
337 		struct bpf_insn insns_ret[] = {
338 			BPF_STMT(BPF_JMP+BPF_JA, 1),
339 		};
340 		add_insns(ctx, insns_ret, __arraycount(insns_ret));
341 	}
342 
343 	/*
344 	 * Append a failure return as a fall-through i.e. if there is
345 	 * no match within the group.
346 	 */
347 	struct bpf_insn insns_ret[] = {
348 		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE),
349 	};
350 	add_insns(ctx, insns_ret, __arraycount(insns_ret));
351 
352 	/*
353 	 * Adjust jump offsets: on match - jump outside the group i.e.
354 	 * to the current offset.  Otherwise, jump to the next instruction
355 	 * which would lead to the fall-through code above if none matches.
356 	 */
357 	fixup_jumps(ctx, ctx->goff, curoff, true);
358 	ctx->goff = ctx->gblock = 0;
359 }
360 
361 static void
362 fetch_l3(npf_bpf_t *ctx, sa_family_t af, u_int flags)
363 {
364 	u_int ver;
365 
366 	switch (af) {
367 	case AF_INET:
368 		ver = IPVERSION;
369 		break;
370 	case AF_INET6:
371 		ver = IPV6_VERSION >> 4;
372 		break;
373 	case AF_UNSPEC:
374 		ver = 0;
375 		break;
376 	default:
377 		abort();
378 	}
379 
380 	/*
381 	 * The memory store is populated with:
382 	 * - BPF_MW_IPVER: IP version (4 or 6).
383 	 * - BPF_MW_L4OFF: L4 header offset.
384 	 * - BPF_MW_L4PROTO: L4 protocol.
385 	 */
386 	if ((ctx->flags & FETCHED_L3) == 0 || (af && ctx->af == 0)) {
387 		const uint8_t jt = ver ? 0 : JUMP_MAGIC;
388 		const uint8_t jf = ver ? JUMP_MAGIC : 0;
389 		bool ingroup = ctx->ingroup;
390 
391 		/*
392 		 * L3 block cannot be inserted in the middle of a group.
393 		 * In fact, it never is.  Check and start the group after.
394 		 */
395 		if (ingroup) {
396 			assert(ctx->nblocks == ctx->gblock);
397 			npfctl_bpf_group_exit(ctx, false);
398 		}
399 
400 		/*
401 		 * A <- IP version; A == expected-version?
402 		 * If no particular version specified, check for non-zero.
403 		 */
404 		struct bpf_insn insns_af[] = {
405 			BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_IPVER),
406 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ver, jt, jf),
407 		};
408 		add_insns(ctx, insns_af, __arraycount(insns_af));
409 		ctx->flags |= FETCHED_L3;
410 		ctx->af = af;
411 
412 		if (af) {
413 			uint32_t mwords[] = { BM_IPVER, 1, af };
414 			done_raw_block(ctx, mwords, sizeof(mwords));
415 		}
416 		if (ingroup) {
417 			npfctl_bpf_group_enter(ctx);
418 		}
419 
420 	} else if (af && af != ctx->af) {
421 		errx(EXIT_FAILURE, "address family mismatch");
422 	}
423 
424 	if ((flags & X_EQ_L4OFF) != 0 && (ctx->flags & X_EQ_L4OFF) == 0) {
425 		/* X <- IP header length */
426 		struct bpf_insn insns_hlen[] = {
427 			BPF_STMT(BPF_LDX+BPF_MEM, BPF_MW_L4OFF),
428 		};
429 		add_insns(ctx, insns_hlen, __arraycount(insns_hlen));
430 		ctx->flags |= X_EQ_L4OFF;
431 	}
432 }
433 
434 /*
435  * npfctl_bpf_proto: code block to match IP version and L4 protocol.
436  */
437 void
438 npfctl_bpf_proto(npf_bpf_t *ctx, sa_family_t af, int proto)
439 {
440 	assert(af != AF_UNSPEC || proto != -1);
441 
442 	/* Note: fails if IP version does not match. */
443 	fetch_l3(ctx, af, 0);
444 	if (proto == -1) {
445 		return;
446 	}
447 
448 	struct bpf_insn insns_proto[] = {
449 		/* A <- L4 protocol; A == expected-protocol? */
450 		BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO),
451 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, proto, 0, JUMP_MAGIC),
452 	};
453 	add_insns(ctx, insns_proto, __arraycount(insns_proto));
454 
455 	uint32_t mwords[] = { BM_PROTO, 1, proto };
456 	done_block(ctx, mwords, sizeof(mwords));
457 	ctx->flags |= CHECKED_L4;
458 }
459 
460 /*
461  * npfctl_bpf_cidr: code block to match IPv4 or IPv6 CIDR.
462  *
463  * => IP address shall be in the network byte order.
464  */
465 void
466 npfctl_bpf_cidr(npf_bpf_t *ctx, u_int opts, sa_family_t af,
467     const npf_addr_t *addr, const npf_netmask_t mask)
468 {
469 	const uint32_t *awords = (const uint32_t *)addr;
470 	u_int nwords, length, maxmask, off;
471 
472 	assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
473 	assert((mask && mask <= NPF_MAX_NETMASK) || mask == NPF_NO_NETMASK);
474 
475 	switch (af) {
476 	case AF_INET:
477 		maxmask = 32;
478 		off = (opts & MATCH_SRC) ?
479 		    offsetof(struct ip, ip_src) :
480 		    offsetof(struct ip, ip_dst);
481 		nwords = sizeof(struct in_addr) / sizeof(uint32_t);
482 		break;
483 	case AF_INET6:
484 		maxmask = 128;
485 		off = (opts & MATCH_SRC) ?
486 		    offsetof(struct ip6_hdr, ip6_src) :
487 		    offsetof(struct ip6_hdr, ip6_dst);
488 		nwords = sizeof(struct in6_addr) / sizeof(uint32_t);
489 		break;
490 	default:
491 		abort();
492 	}
493 
494 	/* Ensure address family. */
495 	fetch_l3(ctx, af, 0);
496 
497 	length = (mask == NPF_NO_NETMASK) ? maxmask : mask;
498 
499 	/* CAUTION: BPF operates in host byte-order. */
500 	for (u_int i = 0; i < nwords; i++) {
501 		const u_int woff = i * sizeof(uint32_t);
502 		uint32_t word = ntohl(awords[i]);
503 		uint32_t wordmask;
504 
505 		if (length >= 32) {
506 			/* The mask is a full word - do not apply it. */
507 			wordmask = 0;
508 			length -= 32;
509 		} else if (length) {
510 			wordmask = 0xffffffff << (32 - length);
511 			length = 0;
512 		} else {
513 			/* The mask became zero - skip the rest. */
514 			break;
515 		}
516 
517 		/* A <- IP address (or one word of it) */
518 		struct bpf_insn insns_ip[] = {
519 			BPF_STMT(BPF_LD+BPF_W+BPF_ABS, off + woff),
520 		};
521 		add_insns(ctx, insns_ip, __arraycount(insns_ip));
522 
523 		/* A <- (A & MASK) */
524 		if (wordmask) {
525 			struct bpf_insn insns_mask[] = {
526 				BPF_STMT(BPF_ALU+BPF_AND+BPF_K, wordmask),
527 			};
528 			add_insns(ctx, insns_mask, __arraycount(insns_mask));
529 		}
530 
531 		/* A == expected-IP-word ? */
532 		struct bpf_insn insns_cmp[] = {
533 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, word, 0, JUMP_MAGIC),
534 		};
535 		add_insns(ctx, insns_cmp, __arraycount(insns_cmp));
536 	}
537 
538 	uint32_t mwords[] = {
539 		(opts & MATCH_SRC) ? BM_SRC_CIDR: BM_DST_CIDR, 6,
540 		af, mask, awords[0], awords[1], awords[2], awords[3],
541 	};
542 	done_block(ctx, mwords, sizeof(mwords));
543 }
544 
545 /*
546  * npfctl_bpf_ports: code block to match TCP/UDP port range.
547  *
548  * => Port numbers shall be in the network byte order.
549  */
550 void
551 npfctl_bpf_ports(npf_bpf_t *ctx, u_int opts, in_port_t from, in_port_t to)
552 {
553 	const u_int sport_off = offsetof(struct udphdr, uh_sport);
554 	const u_int dport_off = offsetof(struct udphdr, uh_dport);
555 	u_int off;
556 
557 	/* TCP and UDP port offsets are the same. */
558 	assert(sport_off == offsetof(struct tcphdr, th_sport));
559 	assert(dport_off == offsetof(struct tcphdr, th_dport));
560 	assert(ctx->flags & CHECKED_L4);
561 
562 	assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
563 	off = (opts & MATCH_SRC) ? sport_off : dport_off;
564 
565 	/* X <- IP header length */
566 	fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
567 
568 	struct bpf_insn insns_fetch[] = {
569 		/* A <- port */
570 		BPF_STMT(BPF_LD+BPF_H+BPF_IND, off),
571 	};
572 	add_insns(ctx, insns_fetch, __arraycount(insns_fetch));
573 
574 	/* CAUTION: BPF operates in host byte-order. */
575 	from = ntohs(from);
576 	to = ntohs(to);
577 
578 	if (from == to) {
579 		/* Single port case. */
580 		struct bpf_insn insns_port[] = {
581 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, from, 0, JUMP_MAGIC),
582 		};
583 		add_insns(ctx, insns_port, __arraycount(insns_port));
584 	} else {
585 		/* Port range case. */
586 		struct bpf_insn insns_range[] = {
587 			BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, from, 0, 1),
588 			BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, to, 0, 1),
589 			BPF_STMT(BPF_JMP+BPF_JA, JUMP_MAGIC),
590 		};
591 		add_insns(ctx, insns_range, __arraycount(insns_range));
592 	}
593 
594 	uint32_t mwords[] = {
595 		opts & MATCH_SRC ? BM_SRC_PORTS : BM_DST_PORTS, 2, from, to
596 	};
597 	done_block(ctx, mwords, sizeof(mwords));
598 }
599 
600 /*
601  * npfctl_bpf_tcpfl: code block to match TCP flags.
602  */
603 void
604 npfctl_bpf_tcpfl(npf_bpf_t *ctx, uint8_t tf, uint8_t tf_mask, bool checktcp)
605 {
606 	const u_int tcpfl_off = offsetof(struct tcphdr, th_flags);
607 	const bool usingmask = tf_mask != tf;
608 
609 	/* X <- IP header length */
610 	fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
611 	if (checktcp) {
612 		const u_int jf = usingmask ? 3 : 2;
613 		assert(ctx->ingroup == false);
614 
615 		/* A <- L4 protocol; A == TCP?  If not, jump out. */
616 		struct bpf_insn insns_tcp[] = {
617 			BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO),
618 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, IPPROTO_TCP, 0, jf),
619 		};
620 		add_insns(ctx, insns_tcp, __arraycount(insns_tcp));
621 	} else {
622 		assert(ctx->flags & CHECKED_L4);
623 	}
624 
625 	struct bpf_insn insns_tf[] = {
626 		/* A <- TCP flags */
627 		BPF_STMT(BPF_LD+BPF_B+BPF_IND, tcpfl_off),
628 	};
629 	add_insns(ctx, insns_tf, __arraycount(insns_tf));
630 
631 	if (usingmask) {
632 		/* A <- (A & mask) */
633 		struct bpf_insn insns_mask[] = {
634 			BPF_STMT(BPF_ALU+BPF_AND+BPF_K, tf_mask),
635 		};
636 		add_insns(ctx, insns_mask, __arraycount(insns_mask));
637 	}
638 
639 	struct bpf_insn insns_cmp[] = {
640 		/* A == expected-TCP-flags? */
641 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, tf, 0, JUMP_MAGIC),
642 	};
643 	add_insns(ctx, insns_cmp, __arraycount(insns_cmp));
644 
645 	uint32_t mwords[] = { BM_TCPFL, 2, tf, tf_mask};
646 	done_block(ctx, mwords, sizeof(mwords));
647 }
648 
649 /*
650  * npfctl_bpf_icmp: code block to match ICMP type and/or code.
651  * Note: suitable both for the ICMPv4 and ICMPv6.
652  */
653 void
654 npfctl_bpf_icmp(npf_bpf_t *ctx, int type, int code)
655 {
656 	const u_int type_off = offsetof(struct icmp, icmp_type);
657 	const u_int code_off = offsetof(struct icmp, icmp_code);
658 
659 	assert(ctx->flags & CHECKED_L4);
660 	assert(offsetof(struct icmp6_hdr, icmp6_type) == type_off);
661 	assert(offsetof(struct icmp6_hdr, icmp6_code) == code_off);
662 	assert(type != -1 || code != -1);
663 
664 	/* X <- IP header length */
665 	fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
666 
667 	if (type != -1) {
668 		struct bpf_insn insns_type[] = {
669 			BPF_STMT(BPF_LD+BPF_B+BPF_IND, type_off),
670 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, type, 0, JUMP_MAGIC),
671 		};
672 		add_insns(ctx, insns_type, __arraycount(insns_type));
673 
674 		uint32_t mwords[] = { BM_ICMP_TYPE, 1, type };
675 		done_block(ctx, mwords, sizeof(mwords));
676 	}
677 
678 	if (code != -1) {
679 		struct bpf_insn insns_code[] = {
680 			BPF_STMT(BPF_LD+BPF_B+BPF_IND, code_off),
681 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, code, 0, JUMP_MAGIC),
682 		};
683 		add_insns(ctx, insns_code, __arraycount(insns_code));
684 
685 		uint32_t mwords[] = { BM_ICMP_CODE, 1, code };
686 		done_block(ctx, mwords, sizeof(mwords));
687 	}
688 }
689 
690 #define	SRC_FLAG_BIT	(1U << 31)
691 
692 /*
693  * npfctl_bpf_table: code block to match source/destination IP address
694  * against NPF table specified by ID.
695  */
696 void
697 npfctl_bpf_table(npf_bpf_t *ctx, u_int opts, u_int tid)
698 {
699 	const bool src = (opts & MATCH_SRC) != 0;
700 
701 	struct bpf_insn insns_table[] = {
702 		BPF_STMT(BPF_LD+BPF_IMM, (src ? SRC_FLAG_BIT : 0) | tid),
703 		BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_TABLE),
704 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, JUMP_MAGIC, 0),
705 	};
706 	add_insns(ctx, insns_table, __arraycount(insns_table));
707 
708 	uint32_t mwords[] = { src ? BM_SRC_TABLE: BM_DST_TABLE, 1, tid };
709 	done_block(ctx, mwords, sizeof(mwords));
710 }
711