xref: /netbsd-src/sys/net/bpfjit.c (revision 19ef5b5b0bcb90f63509df6e78769de1b57c2758)
1 /*	$NetBSD: bpfjit.c,v 1.8 2014/05/22 13:35:45 alnsn Exp $	*/
2 
3 /*-
4  * Copyright (c) 2011-2014 Alexander Nasonov.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
22  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #ifdef _KERNEL
34 __KERNEL_RCSID(0, "$NetBSD: bpfjit.c,v 1.8 2014/05/22 13:35:45 alnsn Exp $");
35 #else
36 __RCSID("$NetBSD: bpfjit.c,v 1.8 2014/05/22 13:35:45 alnsn Exp $");
37 #endif
38 
39 #include <sys/types.h>
40 #include <sys/queue.h>
41 
42 #ifndef _KERNEL
43 #include <assert.h>
44 #define BJ_ASSERT(c) assert(c)
45 #else
46 #define BJ_ASSERT(c) KASSERT(c)
47 #endif
48 
49 #ifndef _KERNEL
50 #include <stdlib.h>
51 #define BJ_ALLOC(sz) malloc(sz)
52 #define BJ_FREE(p, sz) free(p)
53 #else
54 #include <sys/kmem.h>
55 #define BJ_ALLOC(sz) kmem_alloc(sz, KM_SLEEP)
56 #define BJ_FREE(p, sz) kmem_free(p, sz)
57 #endif
58 
59 #ifndef _KERNEL
60 #include <limits.h>
61 #include <stdbool.h>
62 #include <stddef.h>
63 #include <stdint.h>
64 #else
65 #include <sys/atomic.h>
66 #include <sys/module.h>
67 #endif
68 
69 #define	__BPF_PRIVATE
70 #include <net/bpf.h>
71 #include <net/bpfjit.h>
72 #include <sljitLir.h>
73 
74 #if !defined(_KERNEL) && defined(SLJIT_VERBOSE) && SLJIT_VERBOSE
75 #include <stdio.h> /* for stderr */
76 #endif
77 
78 /*
79  * Permanent register assignments.
80  */
81 #define BJ_BUF		SLJIT_SAVED_REG1
82 #define BJ_WIRELEN	SLJIT_SAVED_REG2
83 #define BJ_BUFLEN	SLJIT_SAVED_REG3
84 #define BJ_AREG		SLJIT_TEMPORARY_REG1
85 #define BJ_TMP1REG	SLJIT_TEMPORARY_REG2
86 #define BJ_TMP2REG	SLJIT_TEMPORARY_REG3
87 #define BJ_XREG		SLJIT_TEMPORARY_EREG1
88 #define BJ_TMP3REG	SLJIT_TEMPORARY_EREG2
89 
90 typedef unsigned int bpfjit_init_mask_t;
91 #define BJ_INIT_NOBITS  0u
92 #define BJ_INIT_MBIT(k) (1u << (k))
93 #define BJ_INIT_MMASK   (BJ_INIT_MBIT(BPF_MEMWORDS) - 1u)
94 #define BJ_INIT_ABIT    BJ_INIT_MBIT(BPF_MEMWORDS)
95 #define BJ_INIT_XBIT    BJ_INIT_MBIT(BPF_MEMWORDS + 1)
96 
97 typedef uint32_t bpfjit_abc_length_t;
98 #define MAX_ABC_LENGTH UINT32_MAX
99 
100 struct bpfjit_stack
101 {
102 	uint32_t mem[BPF_MEMWORDS];
103 #ifdef _KERNEL
104 	void *tmp;
105 #endif
106 };
107 
108 /*
109  * Data for BPF_JMP instruction.
110  * Forward declaration for struct bpfjit_jump.
111  */
112 struct bpfjit_jump_data;
113 
114 /*
115  * Node of bjumps list.
116  */
117 struct bpfjit_jump {
118 	struct sljit_jump *sjump;
119 	SLIST_ENTRY(bpfjit_jump) entries;
120 	struct bpfjit_jump_data *jdata;
121 };
122 
123 /*
124  * Data for BPF_JMP instruction.
125  */
126 struct bpfjit_jump_data {
127 	/*
128 	 * These entries make up bjumps list:
129 	 * jtf[0] - when coming from jt path,
130 	 * jtf[1] - when coming from jf path.
131 	 */
132 	struct bpfjit_jump jtf[2];
133 	/*
134 	 * Length calculated by Array Bounds Check Elimination (ABC) pass.
135 	 */
136 	bpfjit_abc_length_t abc_length;
137 	/*
138 	 * Length checked by the last out-of-bounds check.
139 	 */
140 	bpfjit_abc_length_t checked_length;
141 };
142 
143 /*
144  * Data for "read from packet" instructions.
145  * See also read_pkt_insn() function below.
146  */
147 struct bpfjit_read_pkt_data {
148 	/*
149 	 * Length calculated by Array Bounds Check Elimination (ABC) pass.
150 	 */
151 	bpfjit_abc_length_t abc_length;
152 	/*
153 	 * If positive, emit "if (buflen < check_length) return 0"
154 	 * out-of-bounds check.
155 	 * We assume that buflen is never equal to UINT32_MAX (otherwise,
156 	 * we'd need a special bool variable to emit unconditional "return 0").
157 	 */
158 	bpfjit_abc_length_t check_length;
159 };
160 
161 /*
162  * Additional (optimization-related) data for bpf_insn.
163  */
164 struct bpfjit_insn_data {
165 	/* List of jumps to this insn. */
166 	SLIST_HEAD(, bpfjit_jump) bjumps;
167 
168 	union {
169 		struct bpfjit_jump_data     jdata;
170 		struct bpfjit_read_pkt_data rdata;
171 	} u;
172 
173 	bpfjit_init_mask_t invalid;
174 	bool unreachable;
175 };
176 
177 #ifdef _KERNEL
178 
179 uint32_t m_xword(const struct mbuf *, uint32_t, int *);
180 uint32_t m_xhalf(const struct mbuf *, uint32_t, int *);
181 uint32_t m_xbyte(const struct mbuf *, uint32_t, int *);
182 
183 MODULE(MODULE_CLASS_MISC, bpfjit, "sljit")
184 
185 static int
186 bpfjit_modcmd(modcmd_t cmd, void *arg)
187 {
188 
189 	switch (cmd) {
190 	case MODULE_CMD_INIT:
191 		bpfjit_module_ops.bj_free_code = &bpfjit_free_code;
192 		membar_producer();
193 		bpfjit_module_ops.bj_generate_code = &bpfjit_generate_code;
194 		membar_producer();
195 		return 0;
196 
197 	case MODULE_CMD_FINI:
198 		return EOPNOTSUPP;
199 
200 	default:
201 		return ENOTTY;
202 	}
203 }
204 #endif
205 
206 static uint32_t
207 read_width(const struct bpf_insn *pc)
208 {
209 
210 	switch (BPF_SIZE(pc->code)) {
211 	case BPF_W:
212 		return 4;
213 	case BPF_H:
214 		return 2;
215 	case BPF_B:
216 		return 1;
217 	default:
218 		BJ_ASSERT(false);
219 		return 0;
220 	}
221 }
222 
223 static bool
224 grow_jumps(struct sljit_jump ***jumps, size_t *size)
225 {
226 	struct sljit_jump **newptr;
227 	const size_t elemsz = sizeof(struct sljit_jump *);
228 	size_t old_size = *size;
229 	size_t new_size = 2 * old_size;
230 
231 	if (new_size < old_size || new_size > SIZE_MAX / elemsz)
232 		return false;
233 
234 	newptr = BJ_ALLOC(new_size * elemsz);
235 	if (newptr == NULL)
236 		return false;
237 
238 	memcpy(newptr, *jumps, old_size * elemsz);
239 	BJ_FREE(*jumps, old_size * elemsz);
240 
241 	*jumps = newptr;
242 	*size = new_size;
243 	return true;
244 }
245 
246 static bool
247 append_jump(struct sljit_jump *jump, struct sljit_jump ***jumps,
248     size_t *size, size_t *max_size)
249 {
250 	if (*size == *max_size && !grow_jumps(jumps, max_size))
251 		return false;
252 
253 	(*jumps)[(*size)++] = jump;
254 	return true;
255 }
256 
257 /*
258  * Generate code for BPF_LD+BPF_B+BPF_ABS    A <- P[k:1].
259  */
260 static int
261 emit_read8(struct sljit_compiler* compiler, uint32_t k)
262 {
263 
264 	return sljit_emit_op1(compiler,
265 	    SLJIT_MOV_UB,
266 	    BJ_AREG, 0,
267 	    SLJIT_MEM1(BJ_BUF), k);
268 }
269 
270 /*
271  * Generate code for BPF_LD+BPF_H+BPF_ABS    A <- P[k:2].
272  */
273 static int
274 emit_read16(struct sljit_compiler* compiler, uint32_t k)
275 {
276 	int status;
277 
278 	/* tmp1 = buf[k]; */
279 	status = sljit_emit_op1(compiler,
280 	    SLJIT_MOV_UB,
281 	    BJ_TMP1REG, 0,
282 	    SLJIT_MEM1(BJ_BUF), k);
283 	if (status != SLJIT_SUCCESS)
284 		return status;
285 
286 	/* A = buf[k+1]; */
287 	status = sljit_emit_op1(compiler,
288 	    SLJIT_MOV_UB,
289 	    BJ_AREG, 0,
290 	    SLJIT_MEM1(BJ_BUF), k+1);
291 	if (status != SLJIT_SUCCESS)
292 		return status;
293 
294 	/* tmp1 = tmp1 << 8; */
295 	status = sljit_emit_op2(compiler,
296 	    SLJIT_SHL,
297 	    BJ_TMP1REG, 0,
298 	    BJ_TMP1REG, 0,
299 	    SLJIT_IMM, 8);
300 	if (status != SLJIT_SUCCESS)
301 		return status;
302 
303 	/* A = A + tmp1; */
304 	status = sljit_emit_op2(compiler,
305 	    SLJIT_ADD,
306 	    BJ_AREG, 0,
307 	    BJ_AREG, 0,
308 	    BJ_TMP1REG, 0);
309 	return status;
310 }
311 
312 /*
313  * Generate code for BPF_LD+BPF_W+BPF_ABS    A <- P[k:4].
314  */
315 static int
316 emit_read32(struct sljit_compiler* compiler, uint32_t k)
317 {
318 	int status;
319 
320 	/* tmp1 = buf[k]; */
321 	status = sljit_emit_op1(compiler,
322 	    SLJIT_MOV_UB,
323 	    BJ_TMP1REG, 0,
324 	    SLJIT_MEM1(BJ_BUF), k);
325 	if (status != SLJIT_SUCCESS)
326 		return status;
327 
328 	/* tmp2 = buf[k+1]; */
329 	status = sljit_emit_op1(compiler,
330 	    SLJIT_MOV_UB,
331 	    BJ_TMP2REG, 0,
332 	    SLJIT_MEM1(BJ_BUF), k+1);
333 	if (status != SLJIT_SUCCESS)
334 		return status;
335 
336 	/* A = buf[k+3]; */
337 	status = sljit_emit_op1(compiler,
338 	    SLJIT_MOV_UB,
339 	    BJ_AREG, 0,
340 	    SLJIT_MEM1(BJ_BUF), k+3);
341 	if (status != SLJIT_SUCCESS)
342 		return status;
343 
344 	/* tmp1 = tmp1 << 24; */
345 	status = sljit_emit_op2(compiler,
346 	    SLJIT_SHL,
347 	    BJ_TMP1REG, 0,
348 	    BJ_TMP1REG, 0,
349 	    SLJIT_IMM, 24);
350 	if (status != SLJIT_SUCCESS)
351 		return status;
352 
353 	/* A = A + tmp1; */
354 	status = sljit_emit_op2(compiler,
355 	    SLJIT_ADD,
356 	    BJ_AREG, 0,
357 	    BJ_AREG, 0,
358 	    BJ_TMP1REG, 0);
359 	if (status != SLJIT_SUCCESS)
360 		return status;
361 
362 	/* tmp1 = buf[k+2]; */
363 	status = sljit_emit_op1(compiler,
364 	    SLJIT_MOV_UB,
365 	    BJ_TMP1REG, 0,
366 	    SLJIT_MEM1(BJ_BUF), k+2);
367 	if (status != SLJIT_SUCCESS)
368 		return status;
369 
370 	/* tmp2 = tmp2 << 16; */
371 	status = sljit_emit_op2(compiler,
372 	    SLJIT_SHL,
373 	    BJ_TMP2REG, 0,
374 	    BJ_TMP2REG, 0,
375 	    SLJIT_IMM, 16);
376 	if (status != SLJIT_SUCCESS)
377 		return status;
378 
379 	/* A = A + tmp2; */
380 	status = sljit_emit_op2(compiler,
381 	    SLJIT_ADD,
382 	    BJ_AREG, 0,
383 	    BJ_AREG, 0,
384 	    BJ_TMP2REG, 0);
385 	if (status != SLJIT_SUCCESS)
386 		return status;
387 
388 	/* tmp1 = tmp1 << 8; */
389 	status = sljit_emit_op2(compiler,
390 	    SLJIT_SHL,
391 	    BJ_TMP1REG, 0,
392 	    BJ_TMP1REG, 0,
393 	    SLJIT_IMM, 8);
394 	if (status != SLJIT_SUCCESS)
395 		return status;
396 
397 	/* A = A + tmp1; */
398 	status = sljit_emit_op2(compiler,
399 	    SLJIT_ADD,
400 	    BJ_AREG, 0,
401 	    BJ_AREG, 0,
402 	    BJ_TMP1REG, 0);
403 	return status;
404 }
405 
406 #ifdef _KERNEL
407 /*
408  * Generate m_xword/m_xhalf/m_xbyte call.
409  *
410  * pc is one of:
411  * BPF_LD+BPF_W+BPF_ABS    A <- P[k:4]
412  * BPF_LD+BPF_H+BPF_ABS    A <- P[k:2]
413  * BPF_LD+BPF_B+BPF_ABS    A <- P[k:1]
414  * BPF_LD+BPF_W+BPF_IND    A <- P[X+k:4]
415  * BPF_LD+BPF_H+BPF_IND    A <- P[X+k:2]
416  * BPF_LD+BPF_B+BPF_IND    A <- P[X+k:1]
417  * BPF_LDX+BPF_B+BPF_MSH   X <- 4*(P[k:1]&0xf)
418  *
419  * The dst variable should be
420  *  - BJ_AREG when emitting code for BPF_LD instructions,
421  *  - BJ_XREG or any of BJ_TMP[1-3]REG registers when emitting
422  *    code for BPF_MSH instruction.
423  */
424 static int
425 emit_xcall(struct sljit_compiler* compiler, const struct bpf_insn *pc,
426     int dst, sljit_w dstw, struct sljit_jump **ret0_jump,
427     uint32_t (*fn)(const struct mbuf *, uint32_t, int *))
428 {
429 #if BJ_XREG == SLJIT_RETURN_REG   || \
430     BJ_XREG == SLJIT_TEMPORARY_REG1 || \
431     BJ_XREG == SLJIT_TEMPORARY_REG2 || \
432     BJ_XREG == SLJIT_TEMPORARY_REG3
433 #error "Not supported assignment of registers."
434 #endif
435 	int status;
436 
437 	/*
438 	 * The third argument of fn is an address on stack.
439 	 */
440 	const int arg3_offset = offsetof(struct bpfjit_stack, tmp);
441 
442 	if (BPF_CLASS(pc->code) == BPF_LDX) {
443 		/* save A */
444 		status = sljit_emit_op1(compiler,
445 		    SLJIT_MOV,
446 		    BJ_TMP3REG, 0,
447 		    BJ_AREG, 0);
448 		if (status != SLJIT_SUCCESS)
449 			return status;
450 	}
451 
452 	/*
453 	 * Prepare registers for fn(buf, k, &err) call.
454 	 */
455 	status = sljit_emit_op1(compiler,
456 	    SLJIT_MOV,
457 	    SLJIT_TEMPORARY_REG1, 0,
458 	    BJ_BUF, 0);
459 	if (status != SLJIT_SUCCESS)
460 		return status;
461 
462 	if (BPF_CLASS(pc->code) == BPF_LD && BPF_MODE(pc->code) == BPF_IND) {
463 		status = sljit_emit_op2(compiler,
464 		    SLJIT_ADD,
465 		    SLJIT_TEMPORARY_REG2, 0,
466 		    BJ_XREG, 0,
467 		    SLJIT_IMM, (uint32_t)pc->k);
468 	} else {
469 		status = sljit_emit_op1(compiler,
470 		    SLJIT_MOV,
471 		    SLJIT_TEMPORARY_REG2, 0,
472 		    SLJIT_IMM, (uint32_t)pc->k);
473 	}
474 
475 	if (status != SLJIT_SUCCESS)
476 		return status;
477 
478 	status = sljit_get_local_base(compiler,
479 	    SLJIT_TEMPORARY_REG3, 0, arg3_offset);
480 	if (status != SLJIT_SUCCESS)
481 		return status;
482 
483 	/* fn(buf, k, &err); */
484 	status = sljit_emit_ijump(compiler,
485 	    SLJIT_CALL3,
486 	    SLJIT_IMM, SLJIT_FUNC_OFFSET(fn));
487 
488 	if (dst != SLJIT_RETURN_REG) {
489 		/* move return value to dst */
490 		status = sljit_emit_op1(compiler,
491 		    SLJIT_MOV,
492 		    dst, dstw,
493 		    SLJIT_RETURN_REG, 0);
494 		if (status != SLJIT_SUCCESS)
495 			return status;
496 	}
497 
498 	if (BPF_CLASS(pc->code) == BPF_LDX) {
499 		/* restore A */
500 		status = sljit_emit_op1(compiler,
501 		    SLJIT_MOV,
502 		    BJ_AREG, 0,
503 		    BJ_TMP3REG, 0);
504 		if (status != SLJIT_SUCCESS)
505 			return status;
506 	}
507 
508 	/* tmp3 = *err; */
509 	status = sljit_emit_op1(compiler,
510 	    SLJIT_MOV_UI,
511 	    SLJIT_TEMPORARY_REG3, 0,
512 	    SLJIT_MEM1(SLJIT_LOCALS_REG), arg3_offset);
513 	if (status != SLJIT_SUCCESS)
514 		return status;
515 
516 	/* if (tmp3 != 0) return 0; */
517 	*ret0_jump = sljit_emit_cmp(compiler,
518 	    SLJIT_C_NOT_EQUAL,
519 	    SLJIT_TEMPORARY_REG3, 0,
520 	    SLJIT_IMM, 0);
521 	if (*ret0_jump == NULL)
522 		return SLJIT_ERR_ALLOC_FAILED;
523 
524 	return status;
525 }
526 #endif
527 
528 /*
529  * Generate code for
530  * BPF_LD+BPF_W+BPF_ABS    A <- P[k:4]
531  * BPF_LD+BPF_H+BPF_ABS    A <- P[k:2]
532  * BPF_LD+BPF_B+BPF_ABS    A <- P[k:1]
533  * BPF_LD+BPF_W+BPF_IND    A <- P[X+k:4]
534  * BPF_LD+BPF_H+BPF_IND    A <- P[X+k:2]
535  * BPF_LD+BPF_B+BPF_IND    A <- P[X+k:1]
536  */
537 static int
538 emit_pkt_read(struct sljit_compiler* compiler,
539     const struct bpf_insn *pc, struct sljit_jump *to_mchain_jump,
540     struct sljit_jump ***ret0, size_t *ret0_size, size_t *ret0_maxsize)
541 {
542 	int status = 0; /* XXX gcc 4.1 */
543 	uint32_t width;
544 	struct sljit_jump *jump;
545 #ifdef _KERNEL
546 	struct sljit_label *label;
547 	struct sljit_jump *over_mchain_jump;
548 	const bool check_zero_buflen = (to_mchain_jump != NULL);
549 #endif
550 	const uint32_t k = pc->k;
551 
552 #ifdef _KERNEL
553 	if (to_mchain_jump == NULL) {
554 		to_mchain_jump = sljit_emit_cmp(compiler,
555 		    SLJIT_C_EQUAL,
556 		    BJ_BUFLEN, 0,
557 		    SLJIT_IMM, 0);
558 		if (to_mchain_jump == NULL)
559 			return SLJIT_ERR_ALLOC_FAILED;
560 	}
561 #endif
562 
563 	width = read_width(pc);
564 
565 	if (BPF_MODE(pc->code) == BPF_IND) {
566 		/* tmp1 = buflen - (pc->k + width); */
567 		status = sljit_emit_op2(compiler,
568 		    SLJIT_SUB,
569 		    BJ_TMP1REG, 0,
570 		    BJ_BUFLEN, 0,
571 		    SLJIT_IMM, k + width);
572 		if (status != SLJIT_SUCCESS)
573 			return status;
574 
575 		/* buf += X; */
576 		status = sljit_emit_op2(compiler,
577 		    SLJIT_ADD,
578 		    BJ_BUF, 0,
579 		    BJ_BUF, 0,
580 		    BJ_XREG, 0);
581 		if (status != SLJIT_SUCCESS)
582 			return status;
583 
584 		/* if (tmp1 < X) return 0; */
585 		jump = sljit_emit_cmp(compiler,
586 		    SLJIT_C_LESS,
587 		    BJ_TMP1REG, 0,
588 		    BJ_XREG, 0);
589 		if (jump == NULL)
590 			return SLJIT_ERR_ALLOC_FAILED;
591 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
592 			return SLJIT_ERR_ALLOC_FAILED;
593 	}
594 
595 	switch (width) {
596 	case 4:
597 		status = emit_read32(compiler, k);
598 		break;
599 	case 2:
600 		status = emit_read16(compiler, k);
601 		break;
602 	case 1:
603 		status = emit_read8(compiler, k);
604 		break;
605 	}
606 
607 	if (status != SLJIT_SUCCESS)
608 		return status;
609 
610 	if (BPF_MODE(pc->code) == BPF_IND) {
611 		/* buf -= X; */
612 		status = sljit_emit_op2(compiler,
613 		    SLJIT_SUB,
614 		    BJ_BUF, 0,
615 		    BJ_BUF, 0,
616 		    BJ_XREG, 0);
617 		if (status != SLJIT_SUCCESS)
618 			return status;
619 	}
620 
621 #ifdef _KERNEL
622 	over_mchain_jump = sljit_emit_jump(compiler, SLJIT_JUMP);
623 	if (over_mchain_jump == NULL)
624 		return SLJIT_ERR_ALLOC_FAILED;
625 
626 	/* entry point to mchain handler */
627 	label = sljit_emit_label(compiler);
628 	if (label == NULL)
629 		return SLJIT_ERR_ALLOC_FAILED;
630 	sljit_set_label(to_mchain_jump, label);
631 
632 	if (check_zero_buflen) {
633 		/* if (buflen != 0) return 0; */
634 		jump = sljit_emit_cmp(compiler,
635 		    SLJIT_C_NOT_EQUAL,
636 		    BJ_BUFLEN, 0,
637 		    SLJIT_IMM, 0);
638 		if (jump == NULL)
639 			return SLJIT_ERR_ALLOC_FAILED;
640 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
641 			return SLJIT_ERR_ALLOC_FAILED;
642 	}
643 
644 	switch (width) {
645 	case 4:
646 		status = emit_xcall(compiler, pc, BJ_AREG, 0, &jump, &m_xword);
647 		break;
648 	case 2:
649 		status = emit_xcall(compiler, pc, BJ_AREG, 0, &jump, &m_xhalf);
650 		break;
651 	case 1:
652 		status = emit_xcall(compiler, pc, BJ_AREG, 0, &jump, &m_xbyte);
653 		break;
654 	}
655 
656 	if (status != SLJIT_SUCCESS)
657 		return status;
658 
659 	if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
660 		return SLJIT_ERR_ALLOC_FAILED;
661 
662 	label = sljit_emit_label(compiler);
663 	if (label == NULL)
664 		return SLJIT_ERR_ALLOC_FAILED;
665 	sljit_set_label(over_mchain_jump, label);
666 #endif
667 
668 	return status;
669 }
670 
671 /*
672  * Generate code for BPF_LDX+BPF_B+BPF_MSH    X <- 4*(P[k:1]&0xf).
673  */
674 static int
675 emit_msh(struct sljit_compiler* compiler,
676     const struct bpf_insn *pc, struct sljit_jump *to_mchain_jump,
677     struct sljit_jump ***ret0, size_t *ret0_size, size_t *ret0_maxsize)
678 {
679 	int status;
680 #ifdef _KERNEL
681 	struct sljit_label *label;
682 	struct sljit_jump *jump, *over_mchain_jump;
683 	const bool check_zero_buflen = (to_mchain_jump != NULL);
684 #endif
685 	const uint32_t k = pc->k;
686 
687 #ifdef _KERNEL
688 	if (to_mchain_jump == NULL) {
689 		to_mchain_jump = sljit_emit_cmp(compiler,
690 		    SLJIT_C_EQUAL,
691 		    BJ_BUFLEN, 0,
692 		    SLJIT_IMM, 0);
693 		if (to_mchain_jump == NULL)
694 			return SLJIT_ERR_ALLOC_FAILED;
695 	}
696 #endif
697 
698 	/* tmp1 = buf[k] */
699 	status = sljit_emit_op1(compiler,
700 	    SLJIT_MOV_UB,
701 	    BJ_TMP1REG, 0,
702 	    SLJIT_MEM1(BJ_BUF), k);
703 	if (status != SLJIT_SUCCESS)
704 		return status;
705 
706 	/* tmp1 &= 0xf */
707 	status = sljit_emit_op2(compiler,
708 	    SLJIT_AND,
709 	    BJ_TMP1REG, 0,
710 	    BJ_TMP1REG, 0,
711 	    SLJIT_IMM, 0xf);
712 	if (status != SLJIT_SUCCESS)
713 		return status;
714 
715 	/* tmp1 = tmp1 << 2 */
716 	status = sljit_emit_op2(compiler,
717 	    SLJIT_SHL,
718 	    BJ_XREG, 0,
719 	    BJ_TMP1REG, 0,
720 	    SLJIT_IMM, 2);
721 	if (status != SLJIT_SUCCESS)
722 		return status;
723 
724 #ifdef _KERNEL
725 	over_mchain_jump = sljit_emit_jump(compiler, SLJIT_JUMP);
726 	if (over_mchain_jump == NULL)
727 		return SLJIT_ERR_ALLOC_FAILED;
728 
729 	/* entry point to mchain handler */
730 	label = sljit_emit_label(compiler);
731 	if (label == NULL)
732 		return SLJIT_ERR_ALLOC_FAILED;
733 	sljit_set_label(to_mchain_jump, label);
734 
735 	if (check_zero_buflen) {
736 		/* if (buflen != 0) return 0; */
737 		jump = sljit_emit_cmp(compiler,
738 		    SLJIT_C_NOT_EQUAL,
739 		    BJ_BUFLEN, 0,
740 		    SLJIT_IMM, 0);
741 		if (jump == NULL)
742 			return SLJIT_ERR_ALLOC_FAILED;
743 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
744 			return SLJIT_ERR_ALLOC_FAILED;
745 	}
746 
747 	status = emit_xcall(compiler, pc, BJ_TMP1REG, 0, &jump, &m_xbyte);
748 	if (status != SLJIT_SUCCESS)
749 		return status;
750 
751 	if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
752 		return SLJIT_ERR_ALLOC_FAILED;
753 
754 	/* tmp1 &= 0xf */
755 	status = sljit_emit_op2(compiler,
756 	    SLJIT_AND,
757 	    BJ_TMP1REG, 0,
758 	    BJ_TMP1REG, 0,
759 	    SLJIT_IMM, 0xf);
760 	if (status != SLJIT_SUCCESS)
761 		return status;
762 
763 	/* tmp1 = tmp1 << 2 */
764 	status = sljit_emit_op2(compiler,
765 	    SLJIT_SHL,
766 	    BJ_XREG, 0,
767 	    BJ_TMP1REG, 0,
768 	    SLJIT_IMM, 2);
769 	if (status != SLJIT_SUCCESS)
770 		return status;
771 
772 
773 	label = sljit_emit_label(compiler);
774 	if (label == NULL)
775 		return SLJIT_ERR_ALLOC_FAILED;
776 	sljit_set_label(over_mchain_jump, label);
777 #endif
778 
779 	return status;
780 }
781 
782 static int
783 emit_pow2_division(struct sljit_compiler* compiler, uint32_t k)
784 {
785 	int shift = 0;
786 	int status = SLJIT_SUCCESS;
787 
788 	while (k > 1) {
789 		k >>= 1;
790 		shift++;
791 	}
792 
793 	BJ_ASSERT(k == 1 && shift < 32);
794 
795 	if (shift != 0) {
796 		status = sljit_emit_op2(compiler,
797 		    SLJIT_LSHR|SLJIT_INT_OP,
798 		    BJ_AREG, 0,
799 		    BJ_AREG, 0,
800 		    SLJIT_IMM, shift);
801 	}
802 
803 	return status;
804 }
805 
806 #if !defined(BPFJIT_USE_UDIV)
807 static sljit_uw
808 divide(sljit_uw x, sljit_uw y)
809 {
810 
811 	return (uint32_t)x / (uint32_t)y;
812 }
813 #endif
814 
815 /*
816  * Generate A = A / div.
817  * divt,divw are either SLJIT_IMM,pc->k or BJ_XREG,0.
818  */
819 static int
820 emit_division(struct sljit_compiler* compiler, int divt, sljit_w divw)
821 {
822 	int status;
823 
824 #if BJ_XREG == SLJIT_RETURN_REG   || \
825     BJ_XREG == SLJIT_TEMPORARY_REG1 || \
826     BJ_XREG == SLJIT_TEMPORARY_REG2 || \
827     BJ_AREG == SLJIT_TEMPORARY_REG2
828 #error "Not supported assignment of registers."
829 #endif
830 
831 #if BJ_AREG != SLJIT_TEMPORARY_REG1
832 	status = sljit_emit_op1(compiler,
833 	    SLJIT_MOV,
834 	    SLJIT_TEMPORARY_REG1, 0,
835 	    BJ_AREG, 0);
836 	if (status != SLJIT_SUCCESS)
837 		return status;
838 #endif
839 
840 	status = sljit_emit_op1(compiler,
841 	    SLJIT_MOV,
842 	    SLJIT_TEMPORARY_REG2, 0,
843 	    divt, divw);
844 	if (status != SLJIT_SUCCESS)
845 		return status;
846 
847 #if defined(BPFJIT_USE_UDIV)
848 	status = sljit_emit_op0(compiler, SLJIT_UDIV|SLJIT_INT_OP);
849 
850 #if BJ_AREG != SLJIT_TEMPORARY_REG1
851 	status = sljit_emit_op1(compiler,
852 	    SLJIT_MOV,
853 	    BJ_AREG, 0,
854 	    SLJIT_TEMPORARY_REG1, 0);
855 	if (status != SLJIT_SUCCESS)
856 		return status;
857 #endif
858 #else
859 	status = sljit_emit_ijump(compiler,
860 	    SLJIT_CALL2,
861 	    SLJIT_IMM, SLJIT_FUNC_OFFSET(divide));
862 
863 #if BJ_AREG != SLJIT_RETURN_REG
864 	status = sljit_emit_op1(compiler,
865 	    SLJIT_MOV,
866 	    BJ_AREG, 0,
867 	    SLJIT_RETURN_REG, 0);
868 	if (status != SLJIT_SUCCESS)
869 		return status;
870 #endif
871 #endif
872 
873 	return status;
874 }
875 
876 /*
877  * Return true if pc is a "read from packet" instruction.
878  * If length is not NULL and return value is true, *length will
879  * be set to a safe length required to read a packet.
880  */
881 static bool
882 read_pkt_insn(const struct bpf_insn *pc, bpfjit_abc_length_t *length)
883 {
884 	bool rv;
885 	bpfjit_abc_length_t width;
886 
887 	switch (BPF_CLASS(pc->code)) {
888 	default:
889 		rv = false;
890 		break;
891 
892 	case BPF_LD:
893 		rv = BPF_MODE(pc->code) == BPF_ABS ||
894 		     BPF_MODE(pc->code) == BPF_IND;
895 		if (rv)
896 			width = read_width(pc);
897 		break;
898 
899 	case BPF_LDX:
900 		rv = pc->code == (BPF_LDX|BPF_B|BPF_MSH);
901 		width = 1;
902 		break;
903 	}
904 
905 	if (rv && length != NULL) {
906 		*length = (pc->k > UINT32_MAX - width) ?
907 		    UINT32_MAX : pc->k + width;
908 	}
909 
910 	return rv;
911 }
912 
913 static void
914 optimize_init(struct bpfjit_insn_data *insn_dat, size_t insn_count)
915 {
916 	size_t i;
917 
918 	for (i = 0; i < insn_count; i++) {
919 		SLIST_INIT(&insn_dat[i].bjumps);
920 		insn_dat[i].invalid = BJ_INIT_NOBITS;
921 	}
922 }
923 
924 /*
925  * The function divides instructions into blocks. Destination of a jump
926  * instruction starts a new block. BPF_RET and BPF_JMP instructions
927  * terminate a block. Blocks are linear, that is, there are no jumps out
928  * from the middle of a block and there are no jumps in to the middle of
929  * a block.
930  *
931  * The function also sets bits in *initmask for memwords that
932  * need to be initialized to zero. Note that this set should be empty
933  * for any valid kernel filter program.
934  */
935 static bool
936 optimize_pass1(const struct bpf_insn *insns,
937     struct bpfjit_insn_data *insn_dat, size_t insn_count,
938     bpfjit_init_mask_t *initmask, int *nscratches)
939 {
940 	struct bpfjit_jump *jtf;
941 	size_t i;
942 	uint32_t jt, jf;
943 	bpfjit_init_mask_t invalid; /* borrowed from bpf_filter() */
944 	bool unreachable;
945 
946 	*nscratches = 2;
947 	*initmask = BJ_INIT_NOBITS;
948 
949 	unreachable = false;
950 	invalid = ~BJ_INIT_NOBITS;
951 
952 	for (i = 0; i < insn_count; i++) {
953 		if (!SLIST_EMPTY(&insn_dat[i].bjumps))
954 			unreachable = false;
955 		insn_dat[i].unreachable = unreachable;
956 
957 		if (unreachable)
958 			continue;
959 
960 		invalid |= insn_dat[i].invalid;
961 
962 		switch (BPF_CLASS(insns[i].code)) {
963 		case BPF_RET:
964 			if (BPF_RVAL(insns[i].code) == BPF_A)
965 				*initmask |= invalid & BJ_INIT_ABIT;
966 
967 			unreachable = true;
968 			continue;
969 
970 		case BPF_LD:
971 			if (BPF_MODE(insns[i].code) == BPF_IND ||
972 			    BPF_MODE(insns[i].code) == BPF_ABS) {
973 				if (BPF_MODE(insns[i].code) == BPF_IND &&
974 				    *nscratches < 4) {
975 					/* uses BJ_XREG */
976 					*nscratches = 4;
977 				}
978 				if (*nscratches < 3 &&
979 				    read_width(&insns[i]) == 4) {
980 					/* uses BJ_TMP2REG */
981 					*nscratches = 3;
982 				}
983 			}
984 
985 			if (BPF_MODE(insns[i].code) == BPF_IND)
986 				*initmask |= invalid & BJ_INIT_XBIT;
987 
988 			if (BPF_MODE(insns[i].code) == BPF_MEM &&
989 			    (uint32_t)insns[i].k < BPF_MEMWORDS) {
990 				*initmask |= invalid & BJ_INIT_MBIT(insns[i].k);
991 			}
992 
993 			invalid &= ~BJ_INIT_ABIT;
994 			continue;
995 
996 		case BPF_LDX:
997 #if defined(_KERNEL)
998 			/* uses BJ_TMP3REG */
999 			*nscratches = 5;
1000 #endif
1001 			/* uses BJ_XREG */
1002 			if (*nscratches < 4)
1003 				*nscratches = 4;
1004 
1005 			if (BPF_MODE(insns[i].code) == BPF_MEM &&
1006 			    (uint32_t)insns[i].k < BPF_MEMWORDS) {
1007 				*initmask |= invalid & BJ_INIT_MBIT(insns[i].k);
1008 			}
1009 
1010 			invalid &= ~BJ_INIT_XBIT;
1011 			continue;
1012 
1013 		case BPF_ST:
1014 			*initmask |= invalid & BJ_INIT_ABIT;
1015 
1016 			if ((uint32_t)insns[i].k < BPF_MEMWORDS)
1017 				invalid &= ~BJ_INIT_MBIT(insns[i].k);
1018 
1019 			continue;
1020 
1021 		case BPF_STX:
1022 			/* uses BJ_XREG */
1023 			if (*nscratches < 4)
1024 				*nscratches = 4;
1025 
1026 			*initmask |= invalid & BJ_INIT_XBIT;
1027 
1028 			if ((uint32_t)insns[i].k < BPF_MEMWORDS)
1029 				invalid &= ~BJ_INIT_MBIT(insns[i].k);
1030 
1031 			continue;
1032 
1033 		case BPF_ALU:
1034 			*initmask |= invalid & BJ_INIT_ABIT;
1035 
1036 			if (insns[i].code != (BPF_ALU|BPF_NEG) &&
1037 			    BPF_SRC(insns[i].code) == BPF_X) {
1038 				*initmask |= invalid & BJ_INIT_XBIT;
1039 				/* uses BJ_XREG */
1040 				if (*nscratches < 4)
1041 					*nscratches = 4;
1042 
1043 			}
1044 
1045 			invalid &= ~BJ_INIT_ABIT;
1046 			continue;
1047 
1048 		case BPF_MISC:
1049 			switch (BPF_MISCOP(insns[i].code)) {
1050 			case BPF_TAX: // X <- A
1051 				/* uses BJ_XREG */
1052 				if (*nscratches < 4)
1053 					*nscratches = 4;
1054 
1055 				*initmask |= invalid & BJ_INIT_ABIT;
1056 				invalid &= ~BJ_INIT_XBIT;
1057 				continue;
1058 
1059 			case BPF_TXA: // A <- X
1060 				/* uses BJ_XREG */
1061 				if (*nscratches < 4)
1062 					*nscratches = 4;
1063 
1064 				*initmask |= invalid & BJ_INIT_XBIT;
1065 				invalid &= ~BJ_INIT_ABIT;
1066 				continue;
1067 			}
1068 
1069 			continue;
1070 
1071 		case BPF_JMP:
1072 			/* Initialize abc_length for ABC pass. */
1073 			insn_dat[i].u.jdata.abc_length = MAX_ABC_LENGTH;
1074 
1075 			if (BPF_OP(insns[i].code) == BPF_JA) {
1076 				jt = jf = insns[i].k;
1077 			} else {
1078 				jt = insns[i].jt;
1079 				jf = insns[i].jf;
1080 			}
1081 
1082 			if (jt >= insn_count - (i + 1) ||
1083 			    jf >= insn_count - (i + 1)) {
1084 				return false;
1085 			}
1086 
1087 			if (jt > 0 && jf > 0)
1088 				unreachable = true;
1089 
1090 			jt += i + 1;
1091 			jf += i + 1;
1092 
1093 			jtf = insn_dat[i].u.jdata.jtf;
1094 
1095 			jtf[0].sjump = NULL;
1096 			jtf[0].jdata = &insn_dat[i].u.jdata;
1097 			SLIST_INSERT_HEAD(&insn_dat[jt].bjumps,
1098 			    &jtf[0], entries);
1099 
1100 			if (jf != jt) {
1101 				jtf[1].sjump = NULL;
1102 				jtf[1].jdata = &insn_dat[i].u.jdata;
1103 				SLIST_INSERT_HEAD(&insn_dat[jf].bjumps,
1104 				    &jtf[1], entries);
1105 			}
1106 
1107 			insn_dat[jf].invalid |= invalid;
1108 			insn_dat[jt].invalid |= invalid;
1109 			invalid = 0;
1110 
1111 			continue;
1112 		}
1113 	}
1114 
1115 	return true;
1116 }
1117 
1118 /*
1119  * Array Bounds Check Elimination (ABC) pass.
1120  */
1121 static void
1122 optimize_pass2(const struct bpf_insn *insns,
1123     struct bpfjit_insn_data *insn_dat, size_t insn_count)
1124 {
1125 	struct bpfjit_jump *jmp;
1126 	const struct bpf_insn *pc;
1127 	struct bpfjit_insn_data *pd;
1128 	size_t i;
1129 	bpfjit_abc_length_t length, abc_length = 0;
1130 
1131 	for (i = insn_count; i != 0; i--) {
1132 		pc = &insns[i-1];
1133 		pd = &insn_dat[i-1];
1134 
1135 		if (pd->unreachable)
1136 			continue;
1137 
1138 		switch (BPF_CLASS(pc->code)) {
1139 		case BPF_RET:
1140 			abc_length = 0;
1141 			break;
1142 
1143 		case BPF_JMP:
1144 			abc_length = pd->u.jdata.abc_length;
1145 			break;
1146 
1147 		default:
1148 			if (read_pkt_insn(pc, &length)) {
1149 				if (abc_length < length)
1150 					abc_length = length;
1151 				pd->u.rdata.abc_length = abc_length;
1152 			}
1153 			break;
1154 		}
1155 
1156 		SLIST_FOREACH(jmp, &pd->bjumps, entries) {
1157 			if (jmp->jdata->abc_length > abc_length)
1158 				jmp->jdata->abc_length = abc_length;
1159 		}
1160 	}
1161 }
1162 
1163 static void
1164 optimize_pass3(const struct bpf_insn *insns,
1165     struct bpfjit_insn_data *insn_dat, size_t insn_count)
1166 {
1167 	struct bpfjit_jump *jmp;
1168 	size_t i;
1169 	bpfjit_abc_length_t checked_length = 0;
1170 
1171 	for (i = 0; i < insn_count; i++) {
1172 		if (insn_dat[i].unreachable)
1173 			continue;
1174 
1175 		SLIST_FOREACH(jmp, &insn_dat[i].bjumps, entries) {
1176 			if (jmp->jdata->checked_length < checked_length)
1177 				checked_length = jmp->jdata->checked_length;
1178 		}
1179 
1180 		if (BPF_CLASS(insns[i].code) == BPF_JMP) {
1181 			insn_dat[i].u.jdata.checked_length = checked_length;
1182 		} else if (read_pkt_insn(&insns[i], NULL)) {
1183 			struct bpfjit_read_pkt_data *rdata =
1184 			    &insn_dat[i].u.rdata;
1185 			rdata->check_length = 0;
1186 			if (checked_length < rdata->abc_length) {
1187 				checked_length = rdata->abc_length;
1188 				rdata->check_length = checked_length;
1189 			}
1190 		}
1191 	}
1192 }
1193 
1194 static bool
1195 optimize(const struct bpf_insn *insns,
1196     struct bpfjit_insn_data *insn_dat, size_t insn_count,
1197     bpfjit_init_mask_t *initmask, int *nscratches)
1198 {
1199 
1200 	optimize_init(insn_dat, insn_count);
1201 
1202 	if (!optimize_pass1(insns, insn_dat, insn_count,
1203 	    initmask, nscratches)) {
1204 		return false;
1205 	}
1206 
1207 	optimize_pass2(insns, insn_dat, insn_count);
1208 	optimize_pass3(insns, insn_dat, insn_count);
1209 
1210 	return true;
1211 }
1212 
1213 /*
1214  * Convert BPF_ALU operations except BPF_NEG and BPF_DIV to sljit operation.
1215  */
1216 static int
1217 bpf_alu_to_sljit_op(const struct bpf_insn *pc)
1218 {
1219 
1220 	/*
1221 	 * Note: all supported 64bit arches have 32bit multiply
1222 	 * instruction so SLJIT_INT_OP doesn't have any overhead.
1223 	 */
1224 	switch (BPF_OP(pc->code)) {
1225 	case BPF_ADD: return SLJIT_ADD;
1226 	case BPF_SUB: return SLJIT_SUB;
1227 	case BPF_MUL: return SLJIT_MUL|SLJIT_INT_OP;
1228 	case BPF_OR:  return SLJIT_OR;
1229 	case BPF_AND: return SLJIT_AND;
1230 	case BPF_LSH: return SLJIT_SHL;
1231 	case BPF_RSH: return SLJIT_LSHR|SLJIT_INT_OP;
1232 	default:
1233 		BJ_ASSERT(false);
1234 		return 0;
1235 	}
1236 }
1237 
1238 /*
1239  * Convert BPF_JMP operations except BPF_JA to sljit condition.
1240  */
1241 static int
1242 bpf_jmp_to_sljit_cond(const struct bpf_insn *pc, bool negate)
1243 {
1244 	/*
1245 	 * Note: all supported 64bit arches have 32bit comparison
1246 	 * instructions so SLJIT_INT_OP doesn't have any overhead.
1247 	 */
1248 	int rv = SLJIT_INT_OP;
1249 
1250 	switch (BPF_OP(pc->code)) {
1251 	case BPF_JGT:
1252 		rv |= negate ? SLJIT_C_LESS_EQUAL : SLJIT_C_GREATER;
1253 		break;
1254 	case BPF_JGE:
1255 		rv |= negate ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL;
1256 		break;
1257 	case BPF_JEQ:
1258 		rv |= negate ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL;
1259 		break;
1260 	case BPF_JSET:
1261 		rv |= negate ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL;
1262 		break;
1263 	default:
1264 		BJ_ASSERT(false);
1265 	}
1266 
1267 	return rv;
1268 }
1269 
1270 /*
1271  * Convert BPF_K and BPF_X to sljit register.
1272  */
1273 static int
1274 kx_to_reg(const struct bpf_insn *pc)
1275 {
1276 
1277 	switch (BPF_SRC(pc->code)) {
1278 	case BPF_K: return SLJIT_IMM;
1279 	case BPF_X: return BJ_XREG;
1280 	default:
1281 		BJ_ASSERT(false);
1282 		return 0;
1283 	}
1284 }
1285 
1286 static sljit_w
1287 kx_to_reg_arg(const struct bpf_insn *pc)
1288 {
1289 
1290 	switch (BPF_SRC(pc->code)) {
1291 	case BPF_K: return (uint32_t)pc->k; /* SLJIT_IMM, pc->k, */
1292 	case BPF_X: return 0;               /* BJ_XREG, 0,      */
1293 	default:
1294 		BJ_ASSERT(false);
1295 		return 0;
1296 	}
1297 }
1298 
1299 bpfjit_func_t
1300 bpfjit_generate_code(bpf_ctx_t *bc, struct bpf_insn *insns, size_t insn_count)
1301 {
1302 	void *rv;
1303 	struct sljit_compiler *compiler;
1304 
1305 	size_t i;
1306 	int status;
1307 	int branching, negate;
1308 	unsigned int rval, mode, src;
1309 
1310 	/* optimization related */
1311 	bpfjit_init_mask_t initmask;
1312 	int nscratches;
1313 
1314 	/* a list of jumps to out-of-bound return from a generated function */
1315 	struct sljit_jump **ret0;
1316 	size_t ret0_size, ret0_maxsize;
1317 
1318 	const struct bpf_insn *pc;
1319 	struct bpfjit_insn_data *insn_dat;
1320 
1321 	/* for local use */
1322 	struct sljit_label *label;
1323 	struct sljit_jump *jump;
1324 	struct bpfjit_jump *bjump, *jtf;
1325 
1326 	struct sljit_jump *to_mchain_jump;
1327 
1328 	uint32_t jt, jf;
1329 
1330 	rv = NULL;
1331 	compiler = NULL;
1332 	insn_dat = NULL;
1333 	ret0 = NULL;
1334 
1335 	if (insn_count == 0 || insn_count > SIZE_MAX / sizeof(insn_dat[0]))
1336 		goto fail;
1337 
1338 	insn_dat = BJ_ALLOC(insn_count * sizeof(insn_dat[0]));
1339 	if (insn_dat == NULL)
1340 		goto fail;
1341 
1342 	if (!optimize(insns, insn_dat, insn_count,
1343 	    &initmask, &nscratches)) {
1344 		goto fail;
1345 	}
1346 
1347 #if defined(_KERNEL)
1348 	/* bpf_filter() checks initialization of memwords. */
1349 	BJ_ASSERT((initmask & BJ_INIT_MMASK) == 0);
1350 #endif
1351 
1352 	ret0_size = 0;
1353 	ret0_maxsize = 64;
1354 	ret0 = BJ_ALLOC(ret0_maxsize * sizeof(ret0[0]));
1355 	if (ret0 == NULL)
1356 		goto fail;
1357 
1358 	compiler = sljit_create_compiler();
1359 	if (compiler == NULL)
1360 		goto fail;
1361 
1362 #if !defined(_KERNEL) && defined(SLJIT_VERBOSE) && SLJIT_VERBOSE
1363 	sljit_compiler_verbose(compiler, stderr);
1364 #endif
1365 
1366 	status = sljit_emit_enter(compiler,
1367 	    3, nscratches, 3, sizeof(struct bpfjit_stack));
1368 	if (status != SLJIT_SUCCESS)
1369 		goto fail;
1370 
1371 	for (i = 0; i < BPF_MEMWORDS; i++) {
1372 		if (initmask & BJ_INIT_MBIT(i)) {
1373 			status = sljit_emit_op1(compiler,
1374 			    SLJIT_MOV_UI,
1375 			    SLJIT_MEM1(SLJIT_LOCALS_REG),
1376 			    offsetof(struct bpfjit_stack, mem) +
1377 			        i * sizeof(uint32_t),
1378 			    SLJIT_IMM, 0);
1379 			if (status != SLJIT_SUCCESS)
1380 				goto fail;
1381 		}
1382 	}
1383 
1384 	if (initmask & BJ_INIT_ABIT) {
1385 		/* A = 0; */
1386 		status = sljit_emit_op1(compiler,
1387 		    SLJIT_MOV,
1388 		    BJ_AREG, 0,
1389 		    SLJIT_IMM, 0);
1390 		if (status != SLJIT_SUCCESS)
1391 			goto fail;
1392 	}
1393 
1394 	if (initmask & BJ_INIT_XBIT) {
1395 		/* X = 0; */
1396 		status = sljit_emit_op1(compiler,
1397 		    SLJIT_MOV,
1398 		    BJ_XREG, 0,
1399 		    SLJIT_IMM, 0);
1400 		if (status != SLJIT_SUCCESS)
1401 			goto fail;
1402 	}
1403 
1404 	for (i = 0; i < insn_count; i++) {
1405 		if (insn_dat[i].unreachable)
1406 			continue;
1407 
1408 		to_mchain_jump = NULL;
1409 
1410 		/*
1411 		 * Resolve jumps to the current insn.
1412 		 */
1413 		label = NULL;
1414 		SLIST_FOREACH(bjump, &insn_dat[i].bjumps, entries) {
1415 			if (bjump->sjump != NULL) {
1416 				if (label == NULL)
1417 					label = sljit_emit_label(compiler);
1418 				if (label == NULL)
1419 					goto fail;
1420 				sljit_set_label(bjump->sjump, label);
1421 			}
1422 		}
1423 
1424 		if (read_pkt_insn(&insns[i], NULL) &&
1425 		    insn_dat[i].u.rdata.check_length > 0) {
1426 			/* if (buflen < check_length) return 0; */
1427 			jump = sljit_emit_cmp(compiler,
1428 			    SLJIT_C_LESS,
1429 			    BJ_BUFLEN, 0,
1430 			    SLJIT_IMM,
1431 			    insn_dat[i].u.rdata.check_length);
1432 			if (jump == NULL)
1433 				goto fail;
1434 #ifdef _KERNEL
1435 			to_mchain_jump = jump;
1436 #else
1437 			if (!append_jump(jump, &ret0,
1438 			    &ret0_size, &ret0_maxsize))
1439 				goto fail;
1440 #endif
1441 		}
1442 
1443 		pc = &insns[i];
1444 		switch (BPF_CLASS(pc->code)) {
1445 
1446 		default:
1447 			goto fail;
1448 
1449 		case BPF_LD:
1450 			/* BPF_LD+BPF_IMM          A <- k */
1451 			if (pc->code == (BPF_LD|BPF_IMM)) {
1452 				status = sljit_emit_op1(compiler,
1453 				    SLJIT_MOV,
1454 				    BJ_AREG, 0,
1455 				    SLJIT_IMM, (uint32_t)pc->k);
1456 				if (status != SLJIT_SUCCESS)
1457 					goto fail;
1458 
1459 				continue;
1460 			}
1461 
1462 			/* BPF_LD+BPF_MEM          A <- M[k] */
1463 			if (pc->code == (BPF_LD|BPF_MEM)) {
1464 				if ((uint32_t)pc->k >= BPF_MEMWORDS)
1465 					goto fail;
1466 				status = sljit_emit_op1(compiler,
1467 				    SLJIT_MOV_UI,
1468 				    BJ_AREG, 0,
1469 				    SLJIT_MEM1(SLJIT_LOCALS_REG),
1470 				    offsetof(struct bpfjit_stack, mem) +
1471 				        pc->k * sizeof(uint32_t));
1472 				if (status != SLJIT_SUCCESS)
1473 					goto fail;
1474 
1475 				continue;
1476 			}
1477 
1478 			/* BPF_LD+BPF_W+BPF_LEN    A <- len */
1479 			if (pc->code == (BPF_LD|BPF_W|BPF_LEN)) {
1480 				status = sljit_emit_op1(compiler,
1481 				    SLJIT_MOV,
1482 				    BJ_AREG, 0,
1483 				    BJ_WIRELEN, 0);
1484 				if (status != SLJIT_SUCCESS)
1485 					goto fail;
1486 
1487 				continue;
1488 			}
1489 
1490 			mode = BPF_MODE(pc->code);
1491 			if (mode != BPF_ABS && mode != BPF_IND)
1492 				goto fail;
1493 
1494 			status = emit_pkt_read(compiler, pc,
1495 			    to_mchain_jump, &ret0, &ret0_size, &ret0_maxsize);
1496 			if (status != SLJIT_SUCCESS)
1497 				goto fail;
1498 
1499 			continue;
1500 
1501 		case BPF_LDX:
1502 			mode = BPF_MODE(pc->code);
1503 
1504 			/* BPF_LDX+BPF_W+BPF_IMM    X <- k */
1505 			if (mode == BPF_IMM) {
1506 				if (BPF_SIZE(pc->code) != BPF_W)
1507 					goto fail;
1508 				status = sljit_emit_op1(compiler,
1509 				    SLJIT_MOV,
1510 				    BJ_XREG, 0,
1511 				    SLJIT_IMM, (uint32_t)pc->k);
1512 				if (status != SLJIT_SUCCESS)
1513 					goto fail;
1514 
1515 				continue;
1516 			}
1517 
1518 			/* BPF_LDX+BPF_W+BPF_LEN    X <- len */
1519 			if (mode == BPF_LEN) {
1520 				if (BPF_SIZE(pc->code) != BPF_W)
1521 					goto fail;
1522 				status = sljit_emit_op1(compiler,
1523 				    SLJIT_MOV,
1524 				    BJ_XREG, 0,
1525 				    BJ_WIRELEN, 0);
1526 				if (status != SLJIT_SUCCESS)
1527 					goto fail;
1528 
1529 				continue;
1530 			}
1531 
1532 			/* BPF_LDX+BPF_W+BPF_MEM    X <- M[k] */
1533 			if (mode == BPF_MEM) {
1534 				if (BPF_SIZE(pc->code) != BPF_W)
1535 					goto fail;
1536 				if ((uint32_t)pc->k >= BPF_MEMWORDS)
1537 					goto fail;
1538 				status = sljit_emit_op1(compiler,
1539 				    SLJIT_MOV_UI,
1540 				    BJ_XREG, 0,
1541 				    SLJIT_MEM1(SLJIT_LOCALS_REG),
1542 				    offsetof(struct bpfjit_stack, mem) +
1543 				        pc->k * sizeof(uint32_t));
1544 				if (status != SLJIT_SUCCESS)
1545 					goto fail;
1546 
1547 				continue;
1548 			}
1549 
1550 			/* BPF_LDX+BPF_B+BPF_MSH    X <- 4*(P[k:1]&0xf) */
1551 			if (mode != BPF_MSH || BPF_SIZE(pc->code) != BPF_B)
1552 				goto fail;
1553 
1554 			status = emit_msh(compiler, pc,
1555 			    to_mchain_jump, &ret0, &ret0_size, &ret0_maxsize);
1556 			if (status != SLJIT_SUCCESS)
1557 				goto fail;
1558 
1559 			continue;
1560 
1561 		case BPF_ST:
1562 			if (pc->code != BPF_ST ||
1563 			    (uint32_t)pc->k >= BPF_MEMWORDS) {
1564 				goto fail;
1565 			}
1566 
1567 			status = sljit_emit_op1(compiler,
1568 			    SLJIT_MOV_UI,
1569 			    SLJIT_MEM1(SLJIT_LOCALS_REG),
1570 			    offsetof(struct bpfjit_stack, mem) +
1571 			        pc->k * sizeof(uint32_t),
1572 			    BJ_AREG, 0);
1573 			if (status != SLJIT_SUCCESS)
1574 				goto fail;
1575 
1576 			continue;
1577 
1578 		case BPF_STX:
1579 			if (pc->code != BPF_STX ||
1580 			    (uint32_t)pc->k >= BPF_MEMWORDS) {
1581 				goto fail;
1582 			}
1583 
1584 			status = sljit_emit_op1(compiler,
1585 			    SLJIT_MOV_UI,
1586 			    SLJIT_MEM1(SLJIT_LOCALS_REG),
1587 			    offsetof(struct bpfjit_stack, mem) +
1588 			        pc->k * sizeof(uint32_t),
1589 			    BJ_XREG, 0);
1590 			if (status != SLJIT_SUCCESS)
1591 				goto fail;
1592 
1593 			continue;
1594 
1595 		case BPF_ALU:
1596 			if (pc->code == (BPF_ALU|BPF_NEG)) {
1597 				status = sljit_emit_op1(compiler,
1598 				    SLJIT_NEG,
1599 				    BJ_AREG, 0,
1600 				    BJ_AREG, 0);
1601 				if (status != SLJIT_SUCCESS)
1602 					goto fail;
1603 
1604 				continue;
1605 			}
1606 
1607 			if (BPF_OP(pc->code) != BPF_DIV) {
1608 				status = sljit_emit_op2(compiler,
1609 				    bpf_alu_to_sljit_op(pc),
1610 				    BJ_AREG, 0,
1611 				    BJ_AREG, 0,
1612 				    kx_to_reg(pc), kx_to_reg_arg(pc));
1613 				if (status != SLJIT_SUCCESS)
1614 					goto fail;
1615 
1616 				continue;
1617 			}
1618 
1619 			/* BPF_DIV */
1620 
1621 			src = BPF_SRC(pc->code);
1622 			if (src != BPF_X && src != BPF_K)
1623 				goto fail;
1624 
1625 			/* division by zero? */
1626 			if (src == BPF_X) {
1627 				jump = sljit_emit_cmp(compiler,
1628 				    SLJIT_C_EQUAL|SLJIT_INT_OP,
1629 				    BJ_XREG, 0,
1630 				    SLJIT_IMM, 0);
1631 				if (jump == NULL)
1632 					goto fail;
1633 				if (!append_jump(jump, &ret0,
1634 				    &ret0_size, &ret0_maxsize))
1635 					goto fail;
1636 			} else if (pc->k == 0) {
1637 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1638 				if (jump == NULL)
1639 					goto fail;
1640 				if (!append_jump(jump, &ret0,
1641 				    &ret0_size, &ret0_maxsize))
1642 					goto fail;
1643 			}
1644 
1645 			if (src == BPF_X) {
1646 				status = emit_division(compiler, BJ_XREG, 0);
1647 				if (status != SLJIT_SUCCESS)
1648 					goto fail;
1649 			} else if (pc->k != 0) {
1650 				if (pc->k & (pc->k - 1)) {
1651 				    status = emit_division(compiler,
1652 				        SLJIT_IMM, (uint32_t)pc->k);
1653 				} else {
1654 				    status = emit_pow2_division(compiler,
1655 				        (uint32_t)pc->k);
1656 				}
1657 				if (status != SLJIT_SUCCESS)
1658 					goto fail;
1659 			}
1660 
1661 			continue;
1662 
1663 		case BPF_JMP:
1664 			if (BPF_OP(pc->code) == BPF_JA) {
1665 				jt = jf = pc->k;
1666 			} else {
1667 				jt = pc->jt;
1668 				jf = pc->jf;
1669 			}
1670 
1671 			negate = (jt == 0) ? 1 : 0;
1672 			branching = (jt == jf) ? 0 : 1;
1673 			jtf = insn_dat[i].u.jdata.jtf;
1674 
1675 			if (branching) {
1676 				if (BPF_OP(pc->code) != BPF_JSET) {
1677 					jump = sljit_emit_cmp(compiler,
1678 					    bpf_jmp_to_sljit_cond(pc, negate),
1679 					    BJ_AREG, 0,
1680 					    kx_to_reg(pc), kx_to_reg_arg(pc));
1681 				} else {
1682 					status = sljit_emit_op2(compiler,
1683 					    SLJIT_AND,
1684 					    BJ_TMP1REG, 0,
1685 					    BJ_AREG, 0,
1686 					    kx_to_reg(pc), kx_to_reg_arg(pc));
1687 					if (status != SLJIT_SUCCESS)
1688 						goto fail;
1689 
1690 					jump = sljit_emit_cmp(compiler,
1691 					    bpf_jmp_to_sljit_cond(pc, negate),
1692 					    BJ_TMP1REG, 0,
1693 					    SLJIT_IMM, 0);
1694 				}
1695 
1696 				if (jump == NULL)
1697 					goto fail;
1698 
1699 				BJ_ASSERT(jtf[negate].sjump == NULL);
1700 				jtf[negate].sjump = jump;
1701 			}
1702 
1703 			if (!branching || (jt != 0 && jf != 0)) {
1704 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1705 				if (jump == NULL)
1706 					goto fail;
1707 
1708 				BJ_ASSERT(jtf[branching].sjump == NULL);
1709 				jtf[branching].sjump = jump;
1710 			}
1711 
1712 			continue;
1713 
1714 		case BPF_RET:
1715 			rval = BPF_RVAL(pc->code);
1716 			if (rval == BPF_X)
1717 				goto fail;
1718 
1719 			/* BPF_RET+BPF_K    accept k bytes */
1720 			if (rval == BPF_K) {
1721 				status = sljit_emit_return(compiler,
1722 				    SLJIT_MOV_UI,
1723 				    SLJIT_IMM, (uint32_t)pc->k);
1724 				if (status != SLJIT_SUCCESS)
1725 					goto fail;
1726 			}
1727 
1728 			/* BPF_RET+BPF_A    accept A bytes */
1729 			if (rval == BPF_A) {
1730 				status = sljit_emit_return(compiler,
1731 				    SLJIT_MOV_UI,
1732 				    BJ_AREG, 0);
1733 				if (status != SLJIT_SUCCESS)
1734 					goto fail;
1735 			}
1736 
1737 			continue;
1738 
1739 		case BPF_MISC:
1740 			switch (BPF_MISCOP(pc->code)) {
1741 			case BPF_TAX:
1742 				status = sljit_emit_op1(compiler,
1743 				    SLJIT_MOV_UI,
1744 				    BJ_XREG, 0,
1745 				    BJ_AREG, 0);
1746 				if (status != SLJIT_SUCCESS)
1747 					goto fail;
1748 
1749 				continue;
1750 
1751 			case BPF_TXA:
1752 				status = sljit_emit_op1(compiler,
1753 				    SLJIT_MOV,
1754 				    BJ_AREG, 0,
1755 				    BJ_XREG, 0);
1756 				if (status != SLJIT_SUCCESS)
1757 					goto fail;
1758 
1759 				continue;
1760 			}
1761 
1762 			goto fail;
1763 		} /* switch */
1764 	} /* main loop */
1765 
1766 	BJ_ASSERT(ret0_size <= ret0_maxsize);
1767 
1768 	if (ret0_size > 0) {
1769 		label = sljit_emit_label(compiler);
1770 		if (label == NULL)
1771 			goto fail;
1772 		for (i = 0; i < ret0_size; i++)
1773 			sljit_set_label(ret0[i], label);
1774 	}
1775 
1776 	status = sljit_emit_return(compiler,
1777 	    SLJIT_MOV_UI,
1778 	    SLJIT_IMM, 0);
1779 	if (status != SLJIT_SUCCESS)
1780 		goto fail;
1781 
1782 	rv = sljit_generate_code(compiler);
1783 
1784 fail:
1785 	if (compiler != NULL)
1786 		sljit_free_compiler(compiler);
1787 
1788 	if (insn_dat != NULL)
1789 		BJ_FREE(insn_dat, insn_count * sizeof(insn_dat[0]));
1790 
1791 	if (ret0 != NULL)
1792 		BJ_FREE(ret0, ret0_maxsize * sizeof(ret0[0]));
1793 
1794 	return (bpfjit_func_t)rv;
1795 }
1796 
1797 void
1798 bpfjit_free_code(bpfjit_func_t code)
1799 {
1800 
1801 	sljit_free_code((void *)code);
1802 }
1803