xref: /netbsd-src/sys/net/bpfjit.c (revision defe44bfc70cfd76ef8dcd678b6e86ba3d7d684c)
1 /*	$NetBSD: bpfjit.c,v 1.3 2013/09/20 23:19:52 rmind Exp $	*/
2 
3 /*-
4  * Copyright (c) 2011-2012 Alexander Nasonov.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
22  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #ifdef _KERNEL
34 __KERNEL_RCSID(0, "$NetBSD: bpfjit.c,v 1.3 2013/09/20 23:19:52 rmind Exp $");
35 #else
36 __RCSID("$NetBSD: bpfjit.c,v 1.3 2013/09/20 23:19:52 rmind Exp $");
37 #endif
38 
39 #include <sys/types.h>
40 #include <sys/queue.h>
41 
42 #ifndef _KERNEL
43 #include <stdlib.h>
44 #include <assert.h>
45 #define BPFJIT_ALLOC(sz) malloc(sz)
46 #define BPFJIT_FREE(p, sz) free(p)
47 #define BPFJIT_ASSERT(c) assert(c)
48 #else
49 #include <sys/kmem.h>
50 #define BPFJIT_ALLOC(sz) kmem_alloc(sz, KM_SLEEP)
51 #define BPFJIT_FREE(p, sz) kmem_free(p, sz)
52 #define BPFJIT_ASSERT(c) KASSERT(c)
53 #endif
54 
55 #ifndef _KERNEL
56 #include <limits.h>
57 #include <stdio.h>
58 #include <stdbool.h>
59 #include <stddef.h>
60 #include <stdint.h>
61 #else
62 #include <sys/atomic.h>
63 #include <sys/module.h>
64 #endif
65 
66 #include <net/bpfjit.h>
67 #include <sljitLir.h>
68 
69 #define BPFJIT_A	SLJIT_TEMPORARY_REG1
70 #define BPFJIT_X	SLJIT_TEMPORARY_EREG1
71 #define BPFJIT_TMP1	SLJIT_TEMPORARY_REG2
72 #define BPFJIT_TMP2	SLJIT_TEMPORARY_REG3
73 #define BPFJIT_BUF	SLJIT_SAVED_REG1
74 #define BPFJIT_WIRELEN	SLJIT_SAVED_REG2
75 #define BPFJIT_BUFLEN	SLJIT_SAVED_REG3
76 #define BPFJIT_KERN_TMP SLJIT_TEMPORARY_EREG2
77 
78 /*
79  * Flags for bpfjit_optimization_hints().
80  */
81 #define BPFJIT_INIT_X 0x10000
82 #define BPFJIT_INIT_A 0x20000
83 
84 /*
85  * Node of bj_jumps list.
86  */
87 struct bpfjit_jump {
88 	struct sljit_jump *bj_jump;
89 	SLIST_ENTRY(bpfjit_jump) bj_entries;
90 	uint32_t bj_safe_length;
91 };
92 
93 /*
94  * Data for BPF_JMP instruction.
95  */
96 struct bpfjit_jump_data {
97 	/*
98 	 * These entries make up bj_jumps list:
99 	 * bj_jtf[0] - when coming from jt path,
100 	 * bj_jtf[1] - when coming from jf path.
101 	 */
102 	struct bpfjit_jump bj_jtf[2];
103 };
104 
105 /*
106  * Data for "read from packet" instructions.
107  * See also read_pkt_insn() function below.
108  */
109 struct bpfjit_read_pkt_data {
110 	/*
111 	 * If positive, emit "if (buflen < bj_check_length) return 0".
112 	 * We assume that buflen is never equal to UINT32_MAX (otherwise,
113 	 * we need a special bool variable to emit unconditional "return 0").
114 	 */
115 	uint32_t bj_check_length;
116 };
117 
118 /*
119  * Additional (optimization-related) data for bpf_insn.
120  */
121 struct bpfjit_insn_data {
122 	/* List of jumps to this insn. */
123 	SLIST_HEAD(, bpfjit_jump) bj_jumps;
124 
125 	union {
126 		struct bpfjit_jump_data     bj_jdata;
127 		struct bpfjit_read_pkt_data bj_rdata;
128 	} bj_aux;
129 
130 	bool bj_unreachable;
131 };
132 
133 #ifdef _KERNEL
134 
135 uint32_t m_xword(const struct mbuf *, uint32_t, int *);
136 uint32_t m_xhalf(const struct mbuf *, uint32_t, int *);
137 uint32_t m_xbyte(const struct mbuf *, uint32_t, int *);
138 
139 MODULE(MODULE_CLASS_MISC, bpfjit, "sljit")
140 
141 static int
142 bpfjit_modcmd(modcmd_t cmd, void *arg)
143 {
144 
145 	switch (cmd) {
146 	case MODULE_CMD_INIT:
147 		bpfjit_module_ops.bj_free_code = &bpfjit_free_code;
148 		membar_producer();
149 		bpfjit_module_ops.bj_generate_code = &bpfjit_generate_code;
150 		membar_producer();
151 		return 0;
152 
153 	case MODULE_CMD_FINI:
154 		return EOPNOTSUPP;
155 
156 	default:
157 		return ENOTTY;
158 	}
159 }
160 #endif
161 
162 static uint32_t
163 read_width(struct bpf_insn *pc)
164 {
165 
166 	switch (BPF_SIZE(pc->code)) {
167 	case BPF_W:
168 		return 4;
169 	case BPF_H:
170 		return 2;
171 	case BPF_B:
172 		return 1;
173 	default:
174 		BPFJIT_ASSERT(false);
175 		return 0;
176 	}
177 }
178 
179 /*
180  * Get offset of M[k] on the stack.
181  */
182 static size_t
183 mem_local_offset(uint32_t k, unsigned int minm)
184 {
185 	size_t moff = (k - minm) * sizeof(uint32_t);
186 
187 #ifdef _KERNEL
188 	/*
189 	 * 4 bytes for the third argument of m_xword/m_xhalf/m_xbyte.
190 	 */
191 	return sizeof(uint32_t) + moff;
192 #else
193 	return moff;
194 #endif
195 }
196 
197 /*
198  * Generate code for BPF_LD+BPF_B+BPF_ABS    A <- P[k:1].
199  */
200 static int
201 emit_read8(struct sljit_compiler* compiler, uint32_t k)
202 {
203 
204 	return sljit_emit_op1(compiler,
205 	    SLJIT_MOV_UB,
206 	    BPFJIT_A, 0,
207 	    SLJIT_MEM1(BPFJIT_BUF), k);
208 }
209 
210 /*
211  * Generate code for BPF_LD+BPF_H+BPF_ABS    A <- P[k:2].
212  */
213 static int
214 emit_read16(struct sljit_compiler* compiler, uint32_t k)
215 {
216 	int status;
217 
218 	/* tmp1 = buf[k]; */
219 	status = sljit_emit_op1(compiler,
220 	    SLJIT_MOV_UB,
221 	    BPFJIT_TMP1, 0,
222 	    SLJIT_MEM1(BPFJIT_BUF), k);
223 	if (status != SLJIT_SUCCESS)
224 		return status;
225 
226 	/* A = buf[k+1]; */
227 	status = sljit_emit_op1(compiler,
228 	    SLJIT_MOV_UB,
229 	    BPFJIT_A, 0,
230 	    SLJIT_MEM1(BPFJIT_BUF), k+1);
231 	if (status != SLJIT_SUCCESS)
232 		return status;
233 
234 	/* tmp1 = tmp1 << 8; */
235 	status = sljit_emit_op2(compiler,
236 	    SLJIT_SHL,
237 	    BPFJIT_TMP1, 0,
238 	    BPFJIT_TMP1, 0,
239 	    SLJIT_IMM, 8);
240 	if (status != SLJIT_SUCCESS)
241 		return status;
242 
243 	/* A = A + tmp1; */
244 	status = sljit_emit_op2(compiler,
245 	    SLJIT_ADD,
246 	    BPFJIT_A, 0,
247 	    BPFJIT_A, 0,
248 	    BPFJIT_TMP1, 0);
249 	return status;
250 }
251 
252 /*
253  * Generate code for BPF_LD+BPF_W+BPF_ABS    A <- P[k:4].
254  */
255 static int
256 emit_read32(struct sljit_compiler* compiler, uint32_t k)
257 {
258 	int status;
259 
260 	/* tmp1 = buf[k]; */
261 	status = sljit_emit_op1(compiler,
262 	    SLJIT_MOV_UB,
263 	    BPFJIT_TMP1, 0,
264 	    SLJIT_MEM1(BPFJIT_BUF), k);
265 	if (status != SLJIT_SUCCESS)
266 		return status;
267 
268 	/* tmp2 = buf[k+1]; */
269 	status = sljit_emit_op1(compiler,
270 	    SLJIT_MOV_UB,
271 	    BPFJIT_TMP2, 0,
272 	    SLJIT_MEM1(BPFJIT_BUF), k+1);
273 	if (status != SLJIT_SUCCESS)
274 		return status;
275 
276 	/* A = buf[k+3]; */
277 	status = sljit_emit_op1(compiler,
278 	    SLJIT_MOV_UB,
279 	    BPFJIT_A, 0,
280 	    SLJIT_MEM1(BPFJIT_BUF), k+3);
281 	if (status != SLJIT_SUCCESS)
282 		return status;
283 
284 	/* tmp1 = tmp1 << 24; */
285 	status = sljit_emit_op2(compiler,
286 	    SLJIT_SHL,
287 	    BPFJIT_TMP1, 0,
288 	    BPFJIT_TMP1, 0,
289 	    SLJIT_IMM, 24);
290 	if (status != SLJIT_SUCCESS)
291 		return status;
292 
293 	/* A = A + tmp1; */
294 	status = sljit_emit_op2(compiler,
295 	    SLJIT_ADD,
296 	    BPFJIT_A, 0,
297 	    BPFJIT_A, 0,
298 	    BPFJIT_TMP1, 0);
299 	if (status != SLJIT_SUCCESS)
300 		return status;
301 
302 	/* tmp1 = buf[k+2]; */
303 	status = sljit_emit_op1(compiler,
304 	    SLJIT_MOV_UB,
305 	    BPFJIT_TMP1, 0,
306 	    SLJIT_MEM1(BPFJIT_BUF), k+2);
307 	if (status != SLJIT_SUCCESS)
308 		return status;
309 
310 	/* tmp2 = tmp2 << 16; */
311 	status = sljit_emit_op2(compiler,
312 	    SLJIT_SHL,
313 	    BPFJIT_TMP2, 0,
314 	    BPFJIT_TMP2, 0,
315 	    SLJIT_IMM, 16);
316 	if (status != SLJIT_SUCCESS)
317 		return status;
318 
319 	/* A = A + tmp2; */
320 	status = sljit_emit_op2(compiler,
321 	    SLJIT_ADD,
322 	    BPFJIT_A, 0,
323 	    BPFJIT_A, 0,
324 	    BPFJIT_TMP2, 0);
325 	if (status != SLJIT_SUCCESS)
326 		return status;
327 
328 	/* tmp1 = tmp1 << 8; */
329 	status = sljit_emit_op2(compiler,
330 	    SLJIT_SHL,
331 	    BPFJIT_TMP1, 0,
332 	    BPFJIT_TMP1, 0,
333 	    SLJIT_IMM, 8);
334 	if (status != SLJIT_SUCCESS)
335 		return status;
336 
337 	/* A = A + tmp1; */
338 	status = sljit_emit_op2(compiler,
339 	    SLJIT_ADD,
340 	    BPFJIT_A, 0,
341 	    BPFJIT_A, 0,
342 	    BPFJIT_TMP1, 0);
343 	return status;
344 }
345 
346 #ifdef _KERNEL
347 /*
348  * Generate m_xword/m_xhalf/m_xbyte call.
349  *
350  * pc is one of:
351  * BPF_LD+BPF_W+BPF_ABS    A <- P[k:4]
352  * BPF_LD+BPF_H+BPF_ABS    A <- P[k:2]
353  * BPF_LD+BPF_B+BPF_ABS    A <- P[k:1]
354  * BPF_LD+BPF_W+BPF_IND    A <- P[X+k:4]
355  * BPF_LD+BPF_H+BPF_IND    A <- P[X+k:2]
356  * BPF_LD+BPF_B+BPF_IND    A <- P[X+k:1]
357  * BPF_LDX+BPF_B+BPF_MSH   X <- 4*(P[k:1]&0xf)
358  *
359  * dst must be BPFJIT_A for BPF_LD instructions and BPFJIT_X
360  * or any of BPFJIT_TMP* registrers for BPF_MSH instruction.
361  */
362 static int
363 emit_xcall(struct sljit_compiler* compiler, struct bpf_insn *pc,
364     int dst, sljit_w dstw, struct sljit_jump **ret0_jump,
365     uint32_t (*fn)(const struct mbuf *, uint32_t, int *))
366 {
367 #if BPFJIT_X != SLJIT_TEMPORARY_EREG1 || \
368     BPFJIT_X == SLJIT_RETURN_REG
369 #error "Not supported assignment of registers."
370 #endif
371 	int status;
372 
373 	/*
374 	 * The third argument of fn is an address on stack.
375 	 */
376 	const int arg3_offset = 0;
377 
378 	if (BPF_CLASS(pc->code) == BPF_LDX) {
379 		/* save A */
380 		status = sljit_emit_op1(compiler,
381 		    SLJIT_MOV,
382 		    BPFJIT_KERN_TMP, 0,
383 		    BPFJIT_A, 0);
384 		if (status != SLJIT_SUCCESS)
385 			return status;
386 	}
387 
388 	/*
389 	 * Prepare registers for fn(buf, k, &err) call.
390 	 */
391 	status = sljit_emit_op1(compiler,
392 	    SLJIT_MOV,
393 	    SLJIT_TEMPORARY_REG1, 0,
394 	    BPFJIT_BUF, 0);
395 	if (status != SLJIT_SUCCESS)
396 		return status;
397 
398 	if (BPF_CLASS(pc->code) == BPF_LD && BPF_MODE(pc->code) == BPF_IND) {
399 		status = sljit_emit_op2(compiler,
400 		    SLJIT_ADD,
401 		    SLJIT_TEMPORARY_REG2, 0,
402 		    BPFJIT_X, 0,
403 		    SLJIT_IMM, (uint32_t)pc->k);
404 	} else {
405 		status = sljit_emit_op1(compiler,
406 		    SLJIT_MOV,
407 		    SLJIT_TEMPORARY_REG2, 0,
408 		    SLJIT_IMM, (uint32_t)pc->k);
409 	}
410 
411 	if (status != SLJIT_SUCCESS)
412 		return status;
413 
414 	status = sljit_get_local_base(compiler,
415 	    SLJIT_TEMPORARY_REG3, 0, arg3_offset);
416 	if (status != SLJIT_SUCCESS)
417 		return status;
418 
419 	/* fn(buf, k, &err); */
420 	status = sljit_emit_ijump(compiler,
421 	    SLJIT_CALL3,
422 	    SLJIT_IMM, SLJIT_FUNC_OFFSET(fn));
423 
424 	if (BPF_CLASS(pc->code) == BPF_LDX) {
425 
426 		/* move return value to dst */
427 		BPFJIT_ASSERT(dst != SLJIT_RETURN_REG);
428 		status = sljit_emit_op1(compiler,
429 		    SLJIT_MOV,
430 		    dst, dstw,
431 		    SLJIT_RETURN_REG, 0);
432 		if (status != SLJIT_SUCCESS)
433 			return status;
434 
435 		/* restore A */
436 		status = sljit_emit_op1(compiler,
437 		    SLJIT_MOV,
438 		    BPFJIT_A, 0,
439 		    BPFJIT_KERN_TMP, 0);
440 		if (status != SLJIT_SUCCESS)
441 			return status;
442 
443 	} else if (dst != SLJIT_RETURN_REG) {
444 		status = sljit_emit_op1(compiler,
445 		    SLJIT_MOV,
446 		    dst, dstw,
447 		    SLJIT_RETURN_REG, 0);
448 		if (status != SLJIT_SUCCESS)
449 			return status;
450 	}
451 
452 	/* tmp3 = *err; */
453 	status = sljit_emit_op1(compiler,
454 	    SLJIT_MOV_UI,
455 	    SLJIT_TEMPORARY_REG3, 0,
456 	    SLJIT_MEM1(SLJIT_LOCALS_REG), arg3_offset);
457 	if (status != SLJIT_SUCCESS)
458 		return status;
459 
460 	/* if (tmp3 != 0) return 0; */
461 	*ret0_jump = sljit_emit_cmp(compiler,
462 	    SLJIT_C_NOT_EQUAL,
463 	    SLJIT_TEMPORARY_REG3, 0,
464 	    SLJIT_IMM, 0);
465 	if (*ret0_jump == NULL)
466 		return SLJIT_ERR_ALLOC_FAILED;
467 
468 	return status;
469 }
470 #endif
471 
472 /*
473  * Generate code for
474  * BPF_LD+BPF_W+BPF_ABS    A <- P[k:4]
475  * BPF_LD+BPF_H+BPF_ABS    A <- P[k:2]
476  * BPF_LD+BPF_B+BPF_ABS    A <- P[k:1]
477  * BPF_LD+BPF_W+BPF_IND    A <- P[X+k:4]
478  * BPF_LD+BPF_H+BPF_IND    A <- P[X+k:2]
479  * BPF_LD+BPF_B+BPF_IND    A <- P[X+k:1]
480  */
481 static int
482 emit_pkt_read(struct sljit_compiler* compiler,
483     struct bpf_insn *pc, struct sljit_jump *to_mchain_jump,
484     struct sljit_jump **ret0, size_t *ret0_size)
485 {
486 	int status;
487 	uint32_t width;
488 	struct sljit_jump *jump;
489 #ifdef _KERNEL
490 	struct sljit_label *label;
491 	struct sljit_jump *over_mchain_jump;
492 	const bool check_zero_buflen = (to_mchain_jump != NULL);
493 #endif
494 	const uint32_t k = pc->k;
495 
496 #ifdef _KERNEL
497 	if (to_mchain_jump == NULL) {
498 		to_mchain_jump = sljit_emit_cmp(compiler,
499 		    SLJIT_C_EQUAL,
500 		    BPFJIT_BUFLEN, 0,
501 		    SLJIT_IMM, 0);
502 		if (to_mchain_jump == NULL)
503   			return SLJIT_ERR_ALLOC_FAILED;
504 	}
505 #endif
506 
507 	width = read_width(pc);
508 
509 	if (BPF_MODE(pc->code) == BPF_IND) {
510 		/* tmp1 = buflen - (pc->k + width); */
511 		status = sljit_emit_op2(compiler,
512 		    SLJIT_SUB,
513 		    BPFJIT_TMP1, 0,
514 		    BPFJIT_BUFLEN, 0,
515 		    SLJIT_IMM, k + width);
516 		if (status != SLJIT_SUCCESS)
517 			return status;
518 
519 		/* buf += X; */
520 		status = sljit_emit_op2(compiler,
521 		    SLJIT_ADD,
522 		    BPFJIT_BUF, 0,
523 		    BPFJIT_BUF, 0,
524 		    BPFJIT_X, 0);
525 		if (status != SLJIT_SUCCESS)
526 			return status;
527 
528 		/* if (tmp1 < X) return 0; */
529 		jump = sljit_emit_cmp(compiler,
530 		    SLJIT_C_LESS,
531 		    BPFJIT_TMP1, 0,
532 		    BPFJIT_X, 0);
533 		if (jump == NULL)
534   			return SLJIT_ERR_ALLOC_FAILED;
535 		ret0[(*ret0_size)++] = jump;
536 	}
537 
538 	switch (width) {
539 	case 4:
540 		status = emit_read32(compiler, k);
541 		break;
542 	case 2:
543 		status = emit_read16(compiler, k);
544 		break;
545 	case 1:
546 		status = emit_read8(compiler, k);
547 		break;
548 	}
549 
550 	if (status != SLJIT_SUCCESS)
551 		return status;
552 
553 	if (BPF_MODE(pc->code) == BPF_IND) {
554 		/* buf -= X; */
555 		status = sljit_emit_op2(compiler,
556 		    SLJIT_SUB,
557 		    BPFJIT_BUF, 0,
558 		    BPFJIT_BUF, 0,
559 		    BPFJIT_X, 0);
560 		if (status != SLJIT_SUCCESS)
561 			return status;
562 	}
563 
564 #ifdef _KERNEL
565 	over_mchain_jump = sljit_emit_jump(compiler, SLJIT_JUMP);
566 	if (over_mchain_jump == NULL)
567   		return SLJIT_ERR_ALLOC_FAILED;
568 
569 	/* entry point to mchain handler */
570 	label = sljit_emit_label(compiler);
571 	if (label == NULL)
572   		return SLJIT_ERR_ALLOC_FAILED;
573 	sljit_set_label(to_mchain_jump, label);
574 
575 	if (check_zero_buflen) {
576 		/* if (buflen != 0) return 0; */
577 		jump = sljit_emit_cmp(compiler,
578 		    SLJIT_C_NOT_EQUAL,
579 		    BPFJIT_BUFLEN, 0,
580 		    SLJIT_IMM, 0);
581 		if (jump == NULL)
582 			return SLJIT_ERR_ALLOC_FAILED;
583 		ret0[(*ret0_size)++] = jump;
584 	}
585 
586 	switch (width) {
587 	case 4:
588 		status = emit_xcall(compiler, pc, BPFJIT_A, 0, &jump, &m_xword);
589 		break;
590 	case 2:
591 		status = emit_xcall(compiler, pc, BPFJIT_A, 0, &jump, &m_xhalf);
592 		break;
593 	case 1:
594 		status = emit_xcall(compiler, pc, BPFJIT_A, 0, &jump, &m_xbyte);
595 		break;
596 	}
597 
598 	if (status != SLJIT_SUCCESS)
599 		return status;
600 
601 	ret0[(*ret0_size)++] = jump;
602 
603 	label = sljit_emit_label(compiler);
604 	if (label == NULL)
605 		return SLJIT_ERR_ALLOC_FAILED;
606 	sljit_set_label(over_mchain_jump, label);
607 #endif
608 
609 	return status;
610 }
611 
612 /*
613  * Generate code for BPF_LDX+BPF_B+BPF_MSH    X <- 4*(P[k:1]&0xf).
614  */
615 static int
616 emit_msh(struct sljit_compiler* compiler,
617     struct bpf_insn *pc, struct sljit_jump *to_mchain_jump,
618     struct sljit_jump **ret0, size_t *ret0_size)
619 {
620 	int status;
621 #ifdef _KERNEL
622 	struct sljit_label *label;
623 	struct sljit_jump *jump, *over_mchain_jump;
624 	const bool check_zero_buflen = (to_mchain_jump != NULL);
625 #endif
626 	const uint32_t k = pc->k;
627 
628 #ifdef _KERNEL
629 	if (to_mchain_jump == NULL) {
630 		to_mchain_jump = sljit_emit_cmp(compiler,
631 		    SLJIT_C_EQUAL,
632 		    BPFJIT_BUFLEN, 0,
633 		    SLJIT_IMM, 0);
634 		if (to_mchain_jump == NULL)
635  			return SLJIT_ERR_ALLOC_FAILED;
636 	}
637 #endif
638 
639 	/* tmp1 = buf[k] */
640 	status = sljit_emit_op1(compiler,
641 	    SLJIT_MOV_UB,
642 	    BPFJIT_TMP1, 0,
643 	    SLJIT_MEM1(BPFJIT_BUF), k);
644 	if (status != SLJIT_SUCCESS)
645 		return status;
646 
647 	/* tmp1 &= 0xf */
648 	status = sljit_emit_op2(compiler,
649 	    SLJIT_AND,
650 	    BPFJIT_TMP1, 0,
651 	    BPFJIT_TMP1, 0,
652 	    SLJIT_IMM, 0xf);
653 	if (status != SLJIT_SUCCESS)
654 		return status;
655 
656 	/* tmp1 = tmp1 << 2 */
657 	status = sljit_emit_op2(compiler,
658 	    SLJIT_SHL,
659 	    BPFJIT_X, 0,
660 	    BPFJIT_TMP1, 0,
661 	    SLJIT_IMM, 2);
662 	if (status != SLJIT_SUCCESS)
663 		return status;
664 
665 #ifdef _KERNEL
666 	over_mchain_jump = sljit_emit_jump(compiler, SLJIT_JUMP);
667 	if (over_mchain_jump == NULL)
668 		return SLJIT_ERR_ALLOC_FAILED;
669 
670 	/* entry point to mchain handler */
671 	label = sljit_emit_label(compiler);
672 	if (label == NULL)
673 		return SLJIT_ERR_ALLOC_FAILED;
674 	sljit_set_label(to_mchain_jump, label);
675 
676 	if (check_zero_buflen) {
677 		/* if (buflen != 0) return 0; */
678 		jump = sljit_emit_cmp(compiler,
679 		    SLJIT_C_NOT_EQUAL,
680 		    BPFJIT_BUFLEN, 0,
681 		    SLJIT_IMM, 0);
682 		if (jump == NULL)
683   			return SLJIT_ERR_ALLOC_FAILED;
684 		ret0[(*ret0_size)++] = jump;
685 	}
686 
687 	status = emit_xcall(compiler, pc, BPFJIT_TMP1, 0, &jump, &m_xbyte);
688 	if (status != SLJIT_SUCCESS)
689 		return status;
690 	ret0[(*ret0_size)++] = jump;
691 
692 	/* tmp1 &= 0xf */
693 	status = sljit_emit_op2(compiler,
694 	    SLJIT_AND,
695 	    BPFJIT_TMP1, 0,
696 	    BPFJIT_TMP1, 0,
697 	    SLJIT_IMM, 0xf);
698 	if (status != SLJIT_SUCCESS)
699 		return status;
700 
701 	/* tmp1 = tmp1 << 2 */
702 	status = sljit_emit_op2(compiler,
703 	    SLJIT_SHL,
704 	    BPFJIT_X, 0,
705 	    BPFJIT_TMP1, 0,
706 	    SLJIT_IMM, 2);
707 	if (status != SLJIT_SUCCESS)
708 		return status;
709 
710 
711 	label = sljit_emit_label(compiler);
712 	if (label == NULL)
713 		return SLJIT_ERR_ALLOC_FAILED;
714 	sljit_set_label(over_mchain_jump, label);
715 #endif
716 
717 	return status;
718 }
719 
720 static int
721 emit_pow2_division(struct sljit_compiler* compiler, uint32_t k)
722 {
723 	int shift = 0;
724 	int status = SLJIT_SUCCESS;
725 
726 	while (k > 1) {
727 		k >>= 1;
728 		shift++;
729 	}
730 
731 	BPFJIT_ASSERT(k == 1 && shift < 32);
732 
733 	if (shift != 0) {
734 		status = sljit_emit_op2(compiler,
735 		    SLJIT_LSHR|SLJIT_INT_OP,
736 		    BPFJIT_A, 0,
737 		    BPFJIT_A, 0,
738 		    SLJIT_IMM, shift);
739 	}
740 
741 	return status;
742 }
743 
744 #if !defined(BPFJIT_USE_UDIV)
745 static sljit_uw
746 divide(sljit_uw x, sljit_uw y)
747 {
748 
749 	return (uint32_t)x / (uint32_t)y;
750 }
751 #endif
752 
753 /*
754  * Generate A = A / div.
755  * divt,divw are either SLJIT_IMM,pc->k or BPFJIT_X,0.
756  */
757 static int
758 emit_division(struct sljit_compiler* compiler, int divt, sljit_w divw)
759 {
760 	int status;
761 
762 #if BPFJIT_X == SLJIT_TEMPORARY_REG1 || \
763     BPFJIT_X == SLJIT_RETURN_REG     || \
764     BPFJIT_X == SLJIT_TEMPORARY_REG2 || \
765     BPFJIT_A == SLJIT_TEMPORARY_REG2
766 #error "Not supported assignment of registers."
767 #endif
768 
769 #if BPFJIT_A != SLJIT_TEMPORARY_REG1
770 	status = sljit_emit_op1(compiler,
771 	    SLJIT_MOV,
772 	    SLJIT_TEMPORARY_REG1, 0,
773 	    BPFJIT_A, 0);
774 	if (status != SLJIT_SUCCESS)
775 		return status;
776 #endif
777 
778 	status = sljit_emit_op1(compiler,
779 	    SLJIT_MOV,
780 	    SLJIT_TEMPORARY_REG2, 0,
781 	    divt, divw);
782 	if (status != SLJIT_SUCCESS)
783 		return status;
784 
785 #if defined(BPFJIT_USE_UDIV)
786 	status = sljit_emit_op0(compiler, SLJIT_UDIV|SLJIT_INT_OP);
787 
788 #if BPFJIT_A != SLJIT_TEMPORARY_REG1
789 	status = sljit_emit_op1(compiler,
790 	    SLJIT_MOV,
791 	    BPFJIT_A, 0,
792 	    SLJIT_TEMPORARY_REG1, 0);
793 	if (status != SLJIT_SUCCESS)
794 		return status;
795 #endif
796 #else
797 	status = sljit_emit_ijump(compiler,
798 	    SLJIT_CALL2,
799 	    SLJIT_IMM, SLJIT_FUNC_OFFSET(divide));
800 
801 #if BPFJIT_A != SLJIT_RETURN_REG
802 	status = sljit_emit_op1(compiler,
803 	    SLJIT_MOV,
804 	    BPFJIT_A, 0,
805 	    SLJIT_RETURN_REG, 0);
806 	if (status != SLJIT_SUCCESS)
807 		return status;
808 #endif
809 #endif
810 
811 	return status;
812 }
813 
814 /*
815  * Count BPF_RET instructions.
816  */
817 static size_t
818 count_returns(struct bpf_insn *insns, size_t insn_count)
819 {
820 	size_t i;
821 	size_t rv;
822 
823 	rv = 0;
824 	for (i = 0; i < insn_count; i++) {
825 		if (BPF_CLASS(insns[i].code) == BPF_RET)
826 			rv++;
827 	}
828 
829 	return rv;
830 }
831 
832 /*
833  * Return true if pc is a "read from packet" instruction.
834  * If length is not NULL and return value is true, *length will
835  * be set to a safe length required to read a packet.
836  */
837 static bool
838 read_pkt_insn(struct bpf_insn *pc, uint32_t *length)
839 {
840 	bool rv;
841 	uint32_t width;
842 
843 	switch (BPF_CLASS(pc->code)) {
844 	default:
845 		rv = false;
846 		break;
847 
848 	case BPF_LD:
849 		rv = BPF_MODE(pc->code) == BPF_ABS ||
850 		     BPF_MODE(pc->code) == BPF_IND;
851 		if (rv)
852 			width = read_width(pc);
853 		break;
854 
855 	case BPF_LDX:
856 		rv = pc->code == (BPF_LDX|BPF_B|BPF_MSH);
857 		width = 1;
858 		break;
859 	}
860 
861 	if (rv && length != NULL) {
862 		*length = (pc->k > UINT32_MAX - width) ?
863 		    UINT32_MAX : pc->k + width;
864 	}
865 
866 	return rv;
867 }
868 
869 /*
870  * Set bj_check_length for all "read from packet" instructions
871  * in a linear block of instructions [from, to).
872  */
873 static void
874 set_check_length(struct bpf_insn *insns, struct bpfjit_insn_data *insn_dat,
875     size_t from, size_t to, uint32_t length)
876 {
877 
878 	for (; from < to; from++) {
879 		if (read_pkt_insn(&insns[from], NULL)) {
880 			insn_dat[from].bj_aux.bj_rdata.bj_check_length = length;
881 			length = 0;
882 		}
883 	}
884 }
885 
886 /*
887  * The function divides instructions into blocks. Destination of a jump
888  * instruction starts a new block. BPF_RET and BPF_JMP instructions
889  * terminate a block. Blocks are linear, that is, there are no jumps out
890  * from the middle of a block and there are no jumps in to the middle of
891  * a block.
892  * If a block has one or more "read from packet" instructions,
893  * bj_check_length will be set to one value for the whole block and that
894  * value will be equal to the greatest value of safe lengths of "read from
895  * packet" instructions inside the block.
896  */
897 static int
898 optimize(struct bpf_insn *insns,
899     struct bpfjit_insn_data *insn_dat, size_t insn_count)
900 {
901 	size_t i;
902 	size_t first_read;
903 	bool unreachable;
904 	uint32_t jt, jf;
905 	uint32_t length, safe_length;
906 	struct bpfjit_jump *jmp, *jtf;
907 
908 	for (i = 0; i < insn_count; i++)
909 		SLIST_INIT(&insn_dat[i].bj_jumps);
910 
911 	safe_length = 0;
912 	unreachable = false;
913 	first_read = SIZE_MAX;
914 
915 	for (i = 0; i < insn_count; i++) {
916 
917 		if (!SLIST_EMPTY(&insn_dat[i].bj_jumps)) {
918 			unreachable = false;
919 
920 			set_check_length(insns, insn_dat,
921 			    first_read, i, safe_length);
922 			first_read = SIZE_MAX;
923 
924 			safe_length = UINT32_MAX;
925 			SLIST_FOREACH(jmp, &insn_dat[i].bj_jumps, bj_entries) {
926 				if (jmp->bj_safe_length < safe_length)
927 					safe_length = jmp->bj_safe_length;
928 			}
929 		}
930 
931 		insn_dat[i].bj_unreachable = unreachable;
932 		if (unreachable)
933 			continue;
934 
935 		if (read_pkt_insn(&insns[i], &length)) {
936 			if (first_read == SIZE_MAX)
937 				first_read = i;
938 			if (length > safe_length)
939 				safe_length = length;
940 		}
941 
942 		switch (BPF_CLASS(insns[i].code)) {
943 		case BPF_RET:
944 			unreachable = true;
945 			continue;
946 
947 		case BPF_JMP:
948 			if (insns[i].code == (BPF_JMP|BPF_JA)) {
949 				jt = jf = insns[i].k;
950 			} else {
951 				jt = insns[i].jt;
952 				jf = insns[i].jf;
953 			}
954 
955 			if (jt >= insn_count - (i + 1) ||
956 			    jf >= insn_count - (i + 1)) {
957 				return -1;
958 			}
959 
960 			if (jt > 0 && jf > 0)
961 				unreachable = true;
962 
963 			jtf = insn_dat[i].bj_aux.bj_jdata.bj_jtf;
964 
965 			jtf[0].bj_jump = NULL;
966 			jtf[0].bj_safe_length = safe_length;
967 			SLIST_INSERT_HEAD(&insn_dat[i + 1 + jt].bj_jumps,
968 			    &jtf[0], bj_entries);
969 
970 			if (jf != jt) {
971 				jtf[1].bj_jump = NULL;
972 				jtf[1].bj_safe_length = safe_length;
973 				SLIST_INSERT_HEAD(&insn_dat[i + 1 + jf].bj_jumps,
974 				    &jtf[1], bj_entries);
975 			}
976 
977 			continue;
978 		}
979 	}
980 
981 	set_check_length(insns, insn_dat, first_read, insn_count, safe_length);
982 
983 	return 0;
984 }
985 
986 /*
987  * Count out-of-bounds and division by zero jumps.
988  *
989  * insn_dat should be initialized by optimize().
990  */
991 static size_t
992 get_ret0_size(struct bpf_insn *insns, struct bpfjit_insn_data *insn_dat,
993     size_t insn_count)
994 {
995 	size_t rv = 0;
996 	size_t i;
997 
998 	for (i = 0; i < insn_count; i++) {
999 
1000 		if (read_pkt_insn(&insns[i], NULL)) {
1001 			if (insn_dat[i].bj_aux.bj_rdata.bj_check_length > 0)
1002 				rv++;
1003 #ifdef _KERNEL
1004 			rv++;
1005 #endif
1006 		}
1007 
1008 		if (insns[i].code == (BPF_LD|BPF_IND|BPF_B) ||
1009 		    insns[i].code == (BPF_LD|BPF_IND|BPF_H) ||
1010 		    insns[i].code == (BPF_LD|BPF_IND|BPF_W)) {
1011 			rv++;
1012 		}
1013 
1014 		if (insns[i].code == (BPF_ALU|BPF_DIV|BPF_X))
1015 			rv++;
1016 
1017 		if (insns[i].code == (BPF_ALU|BPF_DIV|BPF_K) &&
1018 		    insns[i].k == 0) {
1019 			rv++;
1020 		}
1021 	}
1022 
1023 	return rv;
1024 }
1025 
1026 /*
1027  * Convert BPF_ALU operations except BPF_NEG and BPF_DIV to sljit operation.
1028  */
1029 static int
1030 bpf_alu_to_sljit_op(struct bpf_insn *pc)
1031 {
1032 
1033 	/*
1034 	 * Note: all supported 64bit arches have 32bit multiply
1035 	 * instruction so SLJIT_INT_OP doesn't have any overhead.
1036 	 */
1037 	switch (BPF_OP(pc->code)) {
1038 	case BPF_ADD: return SLJIT_ADD;
1039 	case BPF_SUB: return SLJIT_SUB;
1040 	case BPF_MUL: return SLJIT_MUL|SLJIT_INT_OP;
1041 	case BPF_OR:  return SLJIT_OR;
1042 	case BPF_AND: return SLJIT_AND;
1043 	case BPF_LSH: return SLJIT_SHL;
1044 	case BPF_RSH: return SLJIT_LSHR|SLJIT_INT_OP;
1045 	default:
1046 		BPFJIT_ASSERT(false);
1047 		return 0;
1048 	}
1049 }
1050 
1051 /*
1052  * Convert BPF_JMP operations except BPF_JA to sljit condition.
1053  */
1054 static int
1055 bpf_jmp_to_sljit_cond(struct bpf_insn *pc, bool negate)
1056 {
1057 	/*
1058 	 * Note: all supported 64bit arches have 32bit comparison
1059 	 * instructions so SLJIT_INT_OP doesn't have any overhead.
1060 	 */
1061 	int rv = SLJIT_INT_OP;
1062 
1063 	switch (BPF_OP(pc->code)) {
1064 	case BPF_JGT:
1065 		rv |= negate ? SLJIT_C_LESS_EQUAL : SLJIT_C_GREATER;
1066 		break;
1067 	case BPF_JGE:
1068 		rv |= negate ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL;
1069 		break;
1070 	case BPF_JEQ:
1071 		rv |= negate ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL;
1072 		break;
1073 	case BPF_JSET:
1074 		rv |= negate ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL;
1075 		break;
1076 	default:
1077 		BPFJIT_ASSERT(false);
1078 	}
1079 
1080 	return rv;
1081 }
1082 
1083 static unsigned int
1084 bpfjit_optimization_hints(struct bpf_insn *insns, size_t insn_count)
1085 {
1086 	unsigned int rv = BPFJIT_INIT_A;
1087 	struct bpf_insn *pc;
1088 	unsigned int minm, maxm;
1089 
1090 	BPFJIT_ASSERT(BPF_MEMWORDS - 1 <= 0xff);
1091 
1092 	maxm = 0;
1093 	minm = BPF_MEMWORDS - 1;
1094 
1095 	for (pc = insns; pc != insns + insn_count; pc++) {
1096 		switch (BPF_CLASS(pc->code)) {
1097 		case BPF_LD:
1098 			if (BPF_MODE(pc->code) == BPF_IND)
1099 				rv |= BPFJIT_INIT_X;
1100 			if (BPF_MODE(pc->code) == BPF_MEM &&
1101 			    (uint32_t)pc->k < BPF_MEMWORDS) {
1102 				if (pc->k > maxm)
1103 					maxm = pc->k;
1104 				if (pc->k < minm)
1105 					minm = pc->k;
1106 			}
1107 			continue;
1108 		case BPF_LDX:
1109 			rv |= BPFJIT_INIT_X;
1110 			if (BPF_MODE(pc->code) == BPF_MEM &&
1111 			    (uint32_t)pc->k < BPF_MEMWORDS) {
1112 				if (pc->k > maxm)
1113 					maxm = pc->k;
1114 				if (pc->k < minm)
1115 					minm = pc->k;
1116 			}
1117 			continue;
1118 		case BPF_ST:
1119 			if ((uint32_t)pc->k < BPF_MEMWORDS) {
1120 				if (pc->k > maxm)
1121 					maxm = pc->k;
1122 				if (pc->k < minm)
1123 					minm = pc->k;
1124 			}
1125 			continue;
1126 		case BPF_STX:
1127 			rv |= BPFJIT_INIT_X;
1128 			if ((uint32_t)pc->k < BPF_MEMWORDS) {
1129 				if (pc->k > maxm)
1130 					maxm = pc->k;
1131 				if (pc->k < minm)
1132 					minm = pc->k;
1133 			}
1134 			continue;
1135 		case BPF_ALU:
1136 			if (pc->code == (BPF_ALU|BPF_NEG))
1137 				continue;
1138 			if (BPF_SRC(pc->code) == BPF_X)
1139 				rv |= BPFJIT_INIT_X;
1140 			continue;
1141 		case BPF_JMP:
1142 			if (pc->code == (BPF_JMP|BPF_JA))
1143 				continue;
1144 			if (BPF_SRC(pc->code) == BPF_X)
1145 				rv |= BPFJIT_INIT_X;
1146 			continue;
1147 		case BPF_RET:
1148 			continue;
1149 		case BPF_MISC:
1150 			rv |= BPFJIT_INIT_X;
1151 			continue;
1152 		default:
1153 			BPFJIT_ASSERT(false);
1154 		}
1155 	}
1156 
1157 	return rv | (maxm << 8) | minm;
1158 }
1159 
1160 /*
1161  * Convert BPF_K and BPF_X to sljit register.
1162  */
1163 static int
1164 kx_to_reg(struct bpf_insn *pc)
1165 {
1166 
1167 	switch (BPF_SRC(pc->code)) {
1168 	case BPF_K: return SLJIT_IMM;
1169 	case BPF_X: return BPFJIT_X;
1170 	default:
1171 		BPFJIT_ASSERT(false);
1172 		return 0;
1173 	}
1174 }
1175 
1176 static sljit_w
1177 kx_to_reg_arg(struct bpf_insn *pc)
1178 {
1179 
1180 	switch (BPF_SRC(pc->code)) {
1181 	case BPF_K: return (uint32_t)pc->k; /* SLJIT_IMM, pc->k, */
1182 	case BPF_X: return 0;               /* BPFJIT_X, 0,      */
1183 	default:
1184 		BPFJIT_ASSERT(false);
1185 		return 0;
1186 	}
1187 }
1188 
1189 bpfjit_function_t
1190 bpfjit_generate_code(struct bpf_insn *insns, size_t insn_count)
1191 {
1192 	void *rv;
1193 	size_t i;
1194 	int status;
1195 	int branching, negate;
1196 	unsigned int rval, mode, src;
1197 	int ntmp;
1198 	unsigned int locals_size;
1199 	unsigned int minm, maxm; /* min/max k for M[k] */
1200 	size_t mem_locals_start; /* start of M[] array */
1201 	unsigned int opts;
1202 	struct bpf_insn *pc;
1203 	struct sljit_compiler* compiler;
1204 
1205 	/* a list of jumps to a normal return from a generated function */
1206 	struct sljit_jump **returns;
1207 	size_t returns_size, returns_maxsize;
1208 
1209 	/* a list of jumps to out-of-bound return from a generated function */
1210 	struct sljit_jump **ret0;
1211 	size_t ret0_size = 0, ret0_maxsize = 0;
1212 
1213 	struct bpfjit_insn_data *insn_dat;
1214 
1215 	/* for local use */
1216 	struct sljit_label *label;
1217 	struct sljit_jump *jump;
1218 	struct bpfjit_jump *bjump, *jtf;
1219 
1220 	struct sljit_jump *to_mchain_jump;
1221 
1222 	uint32_t jt, jf;
1223 
1224 	rv = NULL;
1225 	compiler = NULL;
1226 	insn_dat = NULL;
1227 	returns = NULL;
1228 	ret0 = NULL;
1229 
1230 	opts = bpfjit_optimization_hints(insns, insn_count);
1231 	minm = opts & 0xff;
1232 	maxm = (opts >> 8) & 0xff;
1233 	mem_locals_start = mem_local_offset(0, 0);
1234 	locals_size = (minm <= maxm) ?
1235 	    mem_local_offset(maxm + 1, minm) : mem_locals_start;
1236 
1237 	ntmp = 4;
1238 #ifdef _KERNEL
1239 	ntmp += 1; /* for BPFJIT_KERN_TMP */
1240 #endif
1241 
1242 	returns_maxsize = count_returns(insns, insn_count);
1243 	if (returns_maxsize  == 0)
1244 		goto fail;
1245 
1246 	insn_dat = BPFJIT_ALLOC(insn_count * sizeof(insn_dat[0]));
1247 	if (insn_dat == NULL)
1248 		goto fail;
1249 
1250 	if (optimize(insns, insn_dat, insn_count) < 0)
1251 		goto fail;
1252 
1253 	ret0_size = 0;
1254 	ret0_maxsize = get_ret0_size(insns, insn_dat, insn_count);
1255 	if (ret0_maxsize > 0) {
1256 		ret0 = BPFJIT_ALLOC(ret0_maxsize * sizeof(ret0[0]));
1257 		if (ret0 == NULL)
1258 			goto fail;
1259 	}
1260 
1261 	returns_size = 0;
1262 	returns = BPFJIT_ALLOC(returns_maxsize * sizeof(returns[0]));
1263 	if (returns == NULL)
1264 		goto fail;
1265 
1266 	compiler = sljit_create_compiler();
1267 	if (compiler == NULL)
1268 		goto fail;
1269 
1270 #if !defined(_KERNEL) && defined(SLJIT_VERBOSE) && SLJIT_VERBOSE
1271 	sljit_compiler_verbose(compiler, stderr);
1272 #endif
1273 
1274 	status = sljit_emit_enter(compiler, 3, ntmp, 3, locals_size);
1275 	if (status != SLJIT_SUCCESS)
1276 		goto fail;
1277 
1278 	for (i = mem_locals_start; i < locals_size; i+= sizeof(uint32_t)) {
1279 		status = sljit_emit_op1(compiler,
1280 		    SLJIT_MOV_UI,
1281 		    SLJIT_MEM1(SLJIT_LOCALS_REG), i,
1282 		    SLJIT_IMM, 0);
1283 		if (status != SLJIT_SUCCESS)
1284 			goto fail;
1285 	}
1286 
1287 	if (opts & BPFJIT_INIT_A) {
1288 		/* A = 0; */
1289 		status = sljit_emit_op1(compiler,
1290 		    SLJIT_MOV,
1291 		    BPFJIT_A, 0,
1292 		    SLJIT_IMM, 0);
1293 		if (status != SLJIT_SUCCESS)
1294 			goto fail;
1295 	}
1296 
1297 	if (opts & BPFJIT_INIT_X) {
1298 		/* X = 0; */
1299 		status = sljit_emit_op1(compiler,
1300 		    SLJIT_MOV,
1301 		    BPFJIT_X, 0,
1302 		    SLJIT_IMM, 0);
1303 		if (status != SLJIT_SUCCESS)
1304 			goto fail;
1305 	}
1306 
1307 	for (i = 0; i < insn_count; i++) {
1308 		if (insn_dat[i].bj_unreachable)
1309 			continue;
1310 
1311 		to_mchain_jump = NULL;
1312 
1313 		/*
1314 		 * Resolve jumps to the current insn.
1315 		 */
1316 		label = NULL;
1317 		SLIST_FOREACH(bjump, &insn_dat[i].bj_jumps, bj_entries) {
1318 			if (bjump->bj_jump != NULL) {
1319 				if (label == NULL)
1320 					label = sljit_emit_label(compiler);
1321 				if (label == NULL)
1322 					goto fail;
1323 				sljit_set_label(bjump->bj_jump, label);
1324 			}
1325 		}
1326 
1327 		if (read_pkt_insn(&insns[i], NULL) &&
1328 		    insn_dat[i].bj_aux.bj_rdata.bj_check_length > 0) {
1329 			/* if (buflen < bj_check_length) return 0; */
1330 			jump = sljit_emit_cmp(compiler,
1331 			    SLJIT_C_LESS,
1332 			    BPFJIT_BUFLEN, 0,
1333 			    SLJIT_IMM,
1334 			    insn_dat[i].bj_aux.bj_rdata.bj_check_length);
1335 			if (jump == NULL)
1336 		  		goto fail;
1337 #ifdef _KERNEL
1338 			to_mchain_jump = jump;
1339 #else
1340 			ret0[ret0_size++] = jump;
1341 #endif
1342 		}
1343 
1344 		pc = &insns[i];
1345 		switch (BPF_CLASS(pc->code)) {
1346 
1347 		default:
1348 			goto fail;
1349 
1350 		case BPF_LD:
1351 			/* BPF_LD+BPF_IMM          A <- k */
1352 			if (pc->code == (BPF_LD|BPF_IMM)) {
1353 				status = sljit_emit_op1(compiler,
1354 				    SLJIT_MOV,
1355 				    BPFJIT_A, 0,
1356 				    SLJIT_IMM, (uint32_t)pc->k);
1357 				if (status != SLJIT_SUCCESS)
1358 					goto fail;
1359 
1360 				continue;
1361 			}
1362 
1363 			/* BPF_LD+BPF_MEM          A <- M[k] */
1364 			if (pc->code == (BPF_LD|BPF_MEM)) {
1365 				if (pc->k < minm || pc->k > maxm)
1366 					goto fail;
1367 				status = sljit_emit_op1(compiler,
1368 				    SLJIT_MOV_UI,
1369 				    BPFJIT_A, 0,
1370 				    SLJIT_MEM1(SLJIT_LOCALS_REG),
1371 				    mem_local_offset(pc->k, minm));
1372 				if (status != SLJIT_SUCCESS)
1373 					goto fail;
1374 
1375 				continue;
1376 			}
1377 
1378 			/* BPF_LD+BPF_W+BPF_LEN    A <- len */
1379 			if (pc->code == (BPF_LD|BPF_W|BPF_LEN)) {
1380 				status = sljit_emit_op1(compiler,
1381 				    SLJIT_MOV,
1382 				    BPFJIT_A, 0,
1383 				    BPFJIT_WIRELEN, 0);
1384 				if (status != SLJIT_SUCCESS)
1385 					goto fail;
1386 
1387 				continue;
1388 			}
1389 
1390 			mode = BPF_MODE(pc->code);
1391 			if (mode != BPF_ABS && mode != BPF_IND)
1392 				goto fail;
1393 
1394 			status = emit_pkt_read(compiler, pc,
1395 			    to_mchain_jump, ret0, &ret0_size);
1396 			if (status != SLJIT_SUCCESS)
1397 				goto fail;
1398 
1399 			continue;
1400 
1401 		case BPF_LDX:
1402 			mode = BPF_MODE(pc->code);
1403 
1404 			/* BPF_LDX+BPF_W+BPF_IMM    X <- k */
1405 			if (mode == BPF_IMM) {
1406 				if (BPF_SIZE(pc->code) != BPF_W)
1407 					goto fail;
1408 				status = sljit_emit_op1(compiler,
1409 				    SLJIT_MOV,
1410 				    BPFJIT_X, 0,
1411 				    SLJIT_IMM, (uint32_t)pc->k);
1412 				if (status != SLJIT_SUCCESS)
1413 					goto fail;
1414 
1415 				continue;
1416 			}
1417 
1418 			/* BPF_LDX+BPF_W+BPF_LEN    X <- len */
1419 			if (mode == BPF_LEN) {
1420 				if (BPF_SIZE(pc->code) != BPF_W)
1421 					goto fail;
1422 				status = sljit_emit_op1(compiler,
1423 				    SLJIT_MOV,
1424 				    BPFJIT_X, 0,
1425 				    BPFJIT_WIRELEN, 0);
1426 				if (status != SLJIT_SUCCESS)
1427 					goto fail;
1428 
1429 				continue;
1430 			}
1431 
1432 			/* BPF_LDX+BPF_W+BPF_MEM    X <- M[k] */
1433 			if (mode == BPF_MEM) {
1434 				if (BPF_SIZE(pc->code) != BPF_W)
1435 					goto fail;
1436 				if (pc->k < minm || pc->k > maxm)
1437 					goto fail;
1438 				status = sljit_emit_op1(compiler,
1439 				    SLJIT_MOV_UI,
1440 				    BPFJIT_X, 0,
1441 				    SLJIT_MEM1(SLJIT_LOCALS_REG),
1442 				    mem_local_offset(pc->k, minm));
1443 				if (status != SLJIT_SUCCESS)
1444 					goto fail;
1445 
1446 				continue;
1447 			}
1448 
1449 			/* BPF_LDX+BPF_B+BPF_MSH    X <- 4*(P[k:1]&0xf) */
1450 			if (mode != BPF_MSH || BPF_SIZE(pc->code) != BPF_B)
1451 				goto fail;
1452 
1453 			status = emit_msh(compiler, pc,
1454 			    to_mchain_jump, ret0, &ret0_size);
1455 			if (status != SLJIT_SUCCESS)
1456 				goto fail;
1457 
1458 			continue;
1459 
1460 		case BPF_ST:
1461 			if (pc->code != BPF_ST || pc->k < minm || pc->k > maxm)
1462 				goto fail;
1463 
1464 			status = sljit_emit_op1(compiler,
1465 			    SLJIT_MOV_UI,
1466 			    SLJIT_MEM1(SLJIT_LOCALS_REG),
1467 			    mem_local_offset(pc->k, minm),
1468 			    BPFJIT_A, 0);
1469 			if (status != SLJIT_SUCCESS)
1470 				goto fail;
1471 
1472 			continue;
1473 
1474 		case BPF_STX:
1475 			if (pc->code != BPF_STX || pc->k < minm || pc->k > maxm)
1476 				goto fail;
1477 
1478 			status = sljit_emit_op1(compiler,
1479 			    SLJIT_MOV_UI,
1480 			    SLJIT_MEM1(SLJIT_LOCALS_REG),
1481 			    mem_local_offset(pc->k, minm),
1482 			    BPFJIT_X, 0);
1483 			if (status != SLJIT_SUCCESS)
1484 				goto fail;
1485 
1486 			continue;
1487 
1488 		case BPF_ALU:
1489 
1490 			if (pc->code == (BPF_ALU|BPF_NEG)) {
1491 				status = sljit_emit_op1(compiler,
1492 				    SLJIT_NEG,
1493 				    BPFJIT_A, 0,
1494 				    BPFJIT_A, 0);
1495 				if (status != SLJIT_SUCCESS)
1496 					goto fail;
1497 
1498 				continue;
1499 			}
1500 
1501 			if (BPF_OP(pc->code) != BPF_DIV) {
1502 				status = sljit_emit_op2(compiler,
1503 				    bpf_alu_to_sljit_op(pc),
1504 				    BPFJIT_A, 0,
1505 				    BPFJIT_A, 0,
1506 				    kx_to_reg(pc), kx_to_reg_arg(pc));
1507 				if (status != SLJIT_SUCCESS)
1508 					goto fail;
1509 
1510 				continue;
1511 			}
1512 
1513 			/* BPF_DIV */
1514 
1515 			src = BPF_SRC(pc->code);
1516 			if (src != BPF_X && src != BPF_K)
1517 				goto fail;
1518 
1519 			/* division by zero? */
1520 			if (src == BPF_X) {
1521 				jump = sljit_emit_cmp(compiler,
1522 				    SLJIT_C_EQUAL|SLJIT_INT_OP,
1523 				    BPFJIT_X, 0,
1524 				    SLJIT_IMM, 0);
1525 				if (jump == NULL)
1526 					goto fail;
1527 				ret0[ret0_size++] = jump;
1528 			} else if (pc->k == 0) {
1529 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1530 				if (jump == NULL)
1531 					goto fail;
1532 				ret0[ret0_size++] = jump;
1533 			}
1534 
1535 			if (src == BPF_X) {
1536 				status = emit_division(compiler, BPFJIT_X, 0);
1537 				if (status != SLJIT_SUCCESS)
1538 					goto fail;
1539 			} else if (pc->k != 0) {
1540 				if (pc->k & (pc->k - 1)) {
1541 				    status = emit_division(compiler,
1542 				        SLJIT_IMM, (uint32_t)pc->k);
1543 				} else {
1544     				    status = emit_pow2_division(compiler,
1545 				        (uint32_t)pc->k);
1546 				}
1547 				if (status != SLJIT_SUCCESS)
1548 					goto fail;
1549 			}
1550 
1551 			continue;
1552 
1553 		case BPF_JMP:
1554 
1555 			if (pc->code == (BPF_JMP|BPF_JA)) {
1556 				jt = jf = pc->k;
1557 			} else {
1558 				jt = pc->jt;
1559 				jf = pc->jf;
1560 			}
1561 
1562 			negate = (jt == 0) ? 1 : 0;
1563 			branching = (jt == jf) ? 0 : 1;
1564 			jtf = insn_dat[i].bj_aux.bj_jdata.bj_jtf;
1565 
1566 			if (branching) {
1567 				if (BPF_OP(pc->code) != BPF_JSET) {
1568 					jump = sljit_emit_cmp(compiler,
1569 					    bpf_jmp_to_sljit_cond(pc, negate),
1570 					    BPFJIT_A, 0,
1571 					    kx_to_reg(pc), kx_to_reg_arg(pc));
1572 				} else {
1573 					status = sljit_emit_op2(compiler,
1574 					    SLJIT_AND,
1575 					    BPFJIT_TMP1, 0,
1576 					    BPFJIT_A, 0,
1577 					    kx_to_reg(pc), kx_to_reg_arg(pc));
1578 					if (status != SLJIT_SUCCESS)
1579 						goto fail;
1580 
1581 					jump = sljit_emit_cmp(compiler,
1582 					    bpf_jmp_to_sljit_cond(pc, negate),
1583 					    BPFJIT_TMP1, 0,
1584 					    SLJIT_IMM, 0);
1585 				}
1586 
1587 				if (jump == NULL)
1588 					goto fail;
1589 
1590 				BPFJIT_ASSERT(jtf[negate].bj_jump == NULL);
1591 				jtf[negate].bj_jump = jump;
1592 			}
1593 
1594 			if (!branching || (jt != 0 && jf != 0)) {
1595 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1596 				if (jump == NULL)
1597 					goto fail;
1598 
1599 				BPFJIT_ASSERT(jtf[branching].bj_jump == NULL);
1600 				jtf[branching].bj_jump = jump;
1601 			}
1602 
1603 			continue;
1604 
1605 		case BPF_RET:
1606 
1607 			rval = BPF_RVAL(pc->code);
1608 			if (rval == BPF_X)
1609 				goto fail;
1610 
1611 			/* BPF_RET+BPF_K    accept k bytes */
1612 			if (rval == BPF_K) {
1613 				status = sljit_emit_op1(compiler,
1614 				    SLJIT_MOV,
1615 				    BPFJIT_A, 0,
1616 				    SLJIT_IMM, (uint32_t)pc->k);
1617 				if (status != SLJIT_SUCCESS)
1618 					goto fail;
1619 			}
1620 
1621 			/* BPF_RET+BPF_A    accept A bytes */
1622 			if (rval == BPF_A) {
1623 #if BPFJIT_A != SLJIT_RETURN_REG
1624 				status = sljit_emit_op1(compiler,
1625 				    SLJIT_MOV,
1626 				    SLJIT_RETURN_REG, 0,
1627 				    BPFJIT_A, 0);
1628 				if (status != SLJIT_SUCCESS)
1629 					goto fail;
1630 #endif
1631 			}
1632 
1633 			/*
1634 			 * Save a jump to a normal return. If the program
1635 			 * ends with BPF_RET, no jump is needed because
1636 			 * the normal return is generated right after the
1637 			 * last instruction.
1638 			 */
1639 			if (i != insn_count - 1) {
1640 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1641 				if (jump == NULL)
1642 					goto fail;
1643 				returns[returns_size++] = jump;
1644 			}
1645 
1646 			continue;
1647 
1648 		case BPF_MISC:
1649 
1650 			if (pc->code == (BPF_MISC|BPF_TAX)) {
1651 				status = sljit_emit_op1(compiler,
1652 				    SLJIT_MOV_UI,
1653 				    BPFJIT_X, 0,
1654 				    BPFJIT_A, 0);
1655 				if (status != SLJIT_SUCCESS)
1656 					goto fail;
1657 
1658 				continue;
1659 			}
1660 
1661 			if (pc->code == (BPF_MISC|BPF_TXA)) {
1662 				status = sljit_emit_op1(compiler,
1663 				    SLJIT_MOV,
1664 				    BPFJIT_A, 0,
1665 				    BPFJIT_X, 0);
1666 				if (status != SLJIT_SUCCESS)
1667 					goto fail;
1668 
1669 				continue;
1670 			}
1671 
1672 			goto fail;
1673 		} /* switch */
1674 	} /* main loop */
1675 
1676 	BPFJIT_ASSERT(ret0_size == ret0_maxsize);
1677 	BPFJIT_ASSERT(returns_size <= returns_maxsize);
1678 
1679 	if (returns_size > 0) {
1680 		label = sljit_emit_label(compiler);
1681 		if (label == NULL)
1682 			goto fail;
1683 		for (i = 0; i < returns_size; i++)
1684 			sljit_set_label(returns[i], label);
1685 	}
1686 
1687 	status = sljit_emit_return(compiler,
1688 	    SLJIT_MOV_UI,
1689 	    BPFJIT_A, 0);
1690 	if (status != SLJIT_SUCCESS)
1691 		goto fail;
1692 
1693 	if (ret0_size > 0) {
1694 		label = sljit_emit_label(compiler);
1695 		if (label == NULL)
1696 			goto fail;
1697 
1698 		for (i = 0; i < ret0_size; i++)
1699 			sljit_set_label(ret0[i], label);
1700 
1701 		status = sljit_emit_op1(compiler,
1702 		    SLJIT_MOV,
1703 		    SLJIT_RETURN_REG, 0,
1704 		    SLJIT_IMM, 0);
1705 		if (status != SLJIT_SUCCESS)
1706 			goto fail;
1707 
1708 		status = sljit_emit_return(compiler,
1709 		    SLJIT_MOV_UI,
1710 		    SLJIT_RETURN_REG, 0);
1711 		if (status != SLJIT_SUCCESS)
1712 			goto fail;
1713 	}
1714 
1715 	rv = sljit_generate_code(compiler);
1716 
1717 fail:
1718 	if (compiler != NULL)
1719 		sljit_free_compiler(compiler);
1720 
1721 	if (insn_dat != NULL)
1722 		BPFJIT_FREE(insn_dat, insn_count * sizeof(insn_dat[0]));
1723 
1724 	if (returns != NULL)
1725 		BPFJIT_FREE(returns, returns_maxsize * sizeof(returns[0]));
1726 
1727 	if (ret0 != NULL)
1728 		BPFJIT_FREE(ret0, ret0_maxsize * sizeof(ret0[0]));
1729 
1730 	return (bpfjit_function_t)rv;
1731 }
1732 
1733 void
1734 bpfjit_free_code(bpfjit_function_t code)
1735 {
1736 
1737 	sljit_free_code((void *)code);
1738 }
1739