xref: /netbsd-src/sys/net/bpfjit.c (revision 6a493d6bc668897c91594964a732d38505b70cbb)
1 /*	$NetBSD: bpfjit.c,v 1.6 2013/12/15 21:18:01 pooka Exp $	*/
2 
3 /*-
4  * Copyright (c) 2011-2012 Alexander Nasonov.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
22  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #ifdef _KERNEL
34 __KERNEL_RCSID(0, "$NetBSD: bpfjit.c,v 1.6 2013/12/15 21:18:01 pooka Exp $");
35 #else
36 __RCSID("$NetBSD: bpfjit.c,v 1.6 2013/12/15 21:18:01 pooka Exp $");
37 #endif
38 
39 #include <sys/types.h>
40 #include <sys/queue.h>
41 
42 #ifndef _KERNEL
43 #include <stdlib.h>
44 #include <assert.h>
45 #define BPFJIT_ALLOC(sz) malloc(sz)
46 #define BPFJIT_FREE(p, sz) free(p)
47 #define BPFJIT_ASSERT(c) assert(c)
48 #else
49 #include <sys/kmem.h>
50 #define BPFJIT_ALLOC(sz) kmem_alloc(sz, KM_SLEEP)
51 #define BPFJIT_FREE(p, sz) kmem_free(p, sz)
52 #define BPFJIT_ASSERT(c) KASSERT(c)
53 #endif
54 
55 #ifndef _KERNEL
56 #include <limits.h>
57 #include <stdio.h>
58 #include <stdbool.h>
59 #include <stddef.h>
60 #include <stdint.h>
61 #else
62 #include <sys/atomic.h>
63 #include <sys/module.h>
64 #endif
65 
66 #define	__BPF_PRIVATE
67 #include <net/bpf.h>
68 #include <net/bpfjit.h>
69 #include <sljitLir.h>
70 
71 #define BPFJIT_A	SLJIT_TEMPORARY_REG1
72 #define BPFJIT_X	SLJIT_TEMPORARY_EREG1
73 #define BPFJIT_TMP1	SLJIT_TEMPORARY_REG2
74 #define BPFJIT_TMP2	SLJIT_TEMPORARY_REG3
75 #define BPFJIT_BUF	SLJIT_SAVED_REG1
76 #define BPFJIT_WIRELEN	SLJIT_SAVED_REG2
77 #define BPFJIT_BUFLEN	SLJIT_SAVED_REG3
78 #define BPFJIT_KERN_TMP SLJIT_TEMPORARY_EREG2
79 
80 /*
81  * Flags for bpfjit_optimization_hints().
82  */
83 #define BPFJIT_INIT_X 0x10000
84 #define BPFJIT_INIT_A 0x20000
85 
86 /*
87  * Node of bj_jumps list.
88  */
89 struct bpfjit_jump {
90 	struct sljit_jump *bj_jump;
91 	SLIST_ENTRY(bpfjit_jump) bj_entries;
92 	uint32_t bj_safe_length;
93 };
94 
95 /*
96  * Data for BPF_JMP instruction.
97  */
98 struct bpfjit_jump_data {
99 	/*
100 	 * These entries make up bj_jumps list:
101 	 * bj_jtf[0] - when coming from jt path,
102 	 * bj_jtf[1] - when coming from jf path.
103 	 */
104 	struct bpfjit_jump bj_jtf[2];
105 };
106 
107 /*
108  * Data for "read from packet" instructions.
109  * See also read_pkt_insn() function below.
110  */
111 struct bpfjit_read_pkt_data {
112 	/*
113 	 * If positive, emit "if (buflen < bj_check_length) return 0".
114 	 * We assume that buflen is never equal to UINT32_MAX (otherwise,
115 	 * we need a special bool variable to emit unconditional "return 0").
116 	 */
117 	uint32_t bj_check_length;
118 };
119 
120 /*
121  * Additional (optimization-related) data for bpf_insn.
122  */
123 struct bpfjit_insn_data {
124 	/* List of jumps to this insn. */
125 	SLIST_HEAD(, bpfjit_jump) bj_jumps;
126 
127 	union {
128 		struct bpfjit_jump_data     bj_jdata;
129 		struct bpfjit_read_pkt_data bj_rdata;
130 	} bj_aux;
131 
132 	bool bj_unreachable;
133 };
134 
135 #ifdef _KERNEL
136 
137 uint32_t m_xword(const struct mbuf *, uint32_t, int *);
138 uint32_t m_xhalf(const struct mbuf *, uint32_t, int *);
139 uint32_t m_xbyte(const struct mbuf *, uint32_t, int *);
140 
141 MODULE(MODULE_CLASS_MISC, bpfjit, "sljit")
142 
143 static int
144 bpfjit_modcmd(modcmd_t cmd, void *arg)
145 {
146 
147 	switch (cmd) {
148 	case MODULE_CMD_INIT:
149 		bpfjit_module_ops.bj_free_code = &bpfjit_free_code;
150 		membar_producer();
151 		bpfjit_module_ops.bj_generate_code = &bpfjit_generate_code;
152 		membar_producer();
153 		return 0;
154 
155 	case MODULE_CMD_FINI:
156 		return EOPNOTSUPP;
157 
158 	default:
159 		return ENOTTY;
160 	}
161 }
162 #endif
163 
164 static uint32_t
165 read_width(struct bpf_insn *pc)
166 {
167 
168 	switch (BPF_SIZE(pc->code)) {
169 	case BPF_W:
170 		return 4;
171 	case BPF_H:
172 		return 2;
173 	case BPF_B:
174 		return 1;
175 	default:
176 		BPFJIT_ASSERT(false);
177 		return 0;
178 	}
179 }
180 
181 /*
182  * Get offset of M[k] on the stack.
183  */
184 static size_t
185 mem_local_offset(uint32_t k, unsigned int minm)
186 {
187 	size_t moff = (k - minm) * sizeof(uint32_t);
188 
189 #ifdef _KERNEL
190 	/*
191 	 * 4 bytes for the third argument of m_xword/m_xhalf/m_xbyte.
192 	 */
193 	return sizeof(uint32_t) + moff;
194 #else
195 	return moff;
196 #endif
197 }
198 
199 /*
200  * Generate code for BPF_LD+BPF_B+BPF_ABS    A <- P[k:1].
201  */
202 static int
203 emit_read8(struct sljit_compiler* compiler, uint32_t k)
204 {
205 
206 	return sljit_emit_op1(compiler,
207 	    SLJIT_MOV_UB,
208 	    BPFJIT_A, 0,
209 	    SLJIT_MEM1(BPFJIT_BUF), k);
210 }
211 
212 /*
213  * Generate code for BPF_LD+BPF_H+BPF_ABS    A <- P[k:2].
214  */
215 static int
216 emit_read16(struct sljit_compiler* compiler, uint32_t k)
217 {
218 	int status;
219 
220 	/* tmp1 = buf[k]; */
221 	status = sljit_emit_op1(compiler,
222 	    SLJIT_MOV_UB,
223 	    BPFJIT_TMP1, 0,
224 	    SLJIT_MEM1(BPFJIT_BUF), k);
225 	if (status != SLJIT_SUCCESS)
226 		return status;
227 
228 	/* A = buf[k+1]; */
229 	status = sljit_emit_op1(compiler,
230 	    SLJIT_MOV_UB,
231 	    BPFJIT_A, 0,
232 	    SLJIT_MEM1(BPFJIT_BUF), k+1);
233 	if (status != SLJIT_SUCCESS)
234 		return status;
235 
236 	/* tmp1 = tmp1 << 8; */
237 	status = sljit_emit_op2(compiler,
238 	    SLJIT_SHL,
239 	    BPFJIT_TMP1, 0,
240 	    BPFJIT_TMP1, 0,
241 	    SLJIT_IMM, 8);
242 	if (status != SLJIT_SUCCESS)
243 		return status;
244 
245 	/* A = A + tmp1; */
246 	status = sljit_emit_op2(compiler,
247 	    SLJIT_ADD,
248 	    BPFJIT_A, 0,
249 	    BPFJIT_A, 0,
250 	    BPFJIT_TMP1, 0);
251 	return status;
252 }
253 
254 /*
255  * Generate code for BPF_LD+BPF_W+BPF_ABS    A <- P[k:4].
256  */
257 static int
258 emit_read32(struct sljit_compiler* compiler, uint32_t k)
259 {
260 	int status;
261 
262 	/* tmp1 = buf[k]; */
263 	status = sljit_emit_op1(compiler,
264 	    SLJIT_MOV_UB,
265 	    BPFJIT_TMP1, 0,
266 	    SLJIT_MEM1(BPFJIT_BUF), k);
267 	if (status != SLJIT_SUCCESS)
268 		return status;
269 
270 	/* tmp2 = buf[k+1]; */
271 	status = sljit_emit_op1(compiler,
272 	    SLJIT_MOV_UB,
273 	    BPFJIT_TMP2, 0,
274 	    SLJIT_MEM1(BPFJIT_BUF), k+1);
275 	if (status != SLJIT_SUCCESS)
276 		return status;
277 
278 	/* A = buf[k+3]; */
279 	status = sljit_emit_op1(compiler,
280 	    SLJIT_MOV_UB,
281 	    BPFJIT_A, 0,
282 	    SLJIT_MEM1(BPFJIT_BUF), k+3);
283 	if (status != SLJIT_SUCCESS)
284 		return status;
285 
286 	/* tmp1 = tmp1 << 24; */
287 	status = sljit_emit_op2(compiler,
288 	    SLJIT_SHL,
289 	    BPFJIT_TMP1, 0,
290 	    BPFJIT_TMP1, 0,
291 	    SLJIT_IMM, 24);
292 	if (status != SLJIT_SUCCESS)
293 		return status;
294 
295 	/* A = A + tmp1; */
296 	status = sljit_emit_op2(compiler,
297 	    SLJIT_ADD,
298 	    BPFJIT_A, 0,
299 	    BPFJIT_A, 0,
300 	    BPFJIT_TMP1, 0);
301 	if (status != SLJIT_SUCCESS)
302 		return status;
303 
304 	/* tmp1 = buf[k+2]; */
305 	status = sljit_emit_op1(compiler,
306 	    SLJIT_MOV_UB,
307 	    BPFJIT_TMP1, 0,
308 	    SLJIT_MEM1(BPFJIT_BUF), k+2);
309 	if (status != SLJIT_SUCCESS)
310 		return status;
311 
312 	/* tmp2 = tmp2 << 16; */
313 	status = sljit_emit_op2(compiler,
314 	    SLJIT_SHL,
315 	    BPFJIT_TMP2, 0,
316 	    BPFJIT_TMP2, 0,
317 	    SLJIT_IMM, 16);
318 	if (status != SLJIT_SUCCESS)
319 		return status;
320 
321 	/* A = A + tmp2; */
322 	status = sljit_emit_op2(compiler,
323 	    SLJIT_ADD,
324 	    BPFJIT_A, 0,
325 	    BPFJIT_A, 0,
326 	    BPFJIT_TMP2, 0);
327 	if (status != SLJIT_SUCCESS)
328 		return status;
329 
330 	/* tmp1 = tmp1 << 8; */
331 	status = sljit_emit_op2(compiler,
332 	    SLJIT_SHL,
333 	    BPFJIT_TMP1, 0,
334 	    BPFJIT_TMP1, 0,
335 	    SLJIT_IMM, 8);
336 	if (status != SLJIT_SUCCESS)
337 		return status;
338 
339 	/* A = A + tmp1; */
340 	status = sljit_emit_op2(compiler,
341 	    SLJIT_ADD,
342 	    BPFJIT_A, 0,
343 	    BPFJIT_A, 0,
344 	    BPFJIT_TMP1, 0);
345 	return status;
346 }
347 
348 #ifdef _KERNEL
349 /*
350  * Generate m_xword/m_xhalf/m_xbyte call.
351  *
352  * pc is one of:
353  * BPF_LD+BPF_W+BPF_ABS    A <- P[k:4]
354  * BPF_LD+BPF_H+BPF_ABS    A <- P[k:2]
355  * BPF_LD+BPF_B+BPF_ABS    A <- P[k:1]
356  * BPF_LD+BPF_W+BPF_IND    A <- P[X+k:4]
357  * BPF_LD+BPF_H+BPF_IND    A <- P[X+k:2]
358  * BPF_LD+BPF_B+BPF_IND    A <- P[X+k:1]
359  * BPF_LDX+BPF_B+BPF_MSH   X <- 4*(P[k:1]&0xf)
360  *
361  * dst must be BPFJIT_A for BPF_LD instructions and BPFJIT_X
362  * or any of BPFJIT_TMP* registrers for BPF_MSH instruction.
363  */
364 static int
365 emit_xcall(struct sljit_compiler* compiler, struct bpf_insn *pc,
366     int dst, sljit_w dstw, struct sljit_jump **ret0_jump,
367     uint32_t (*fn)(const struct mbuf *, uint32_t, int *))
368 {
369 #if BPFJIT_X != SLJIT_TEMPORARY_EREG1 || \
370     BPFJIT_X == SLJIT_RETURN_REG
371 #error "Not supported assignment of registers."
372 #endif
373 	int status;
374 
375 	/*
376 	 * The third argument of fn is an address on stack.
377 	 */
378 	const int arg3_offset = 0;
379 
380 	if (BPF_CLASS(pc->code) == BPF_LDX) {
381 		/* save A */
382 		status = sljit_emit_op1(compiler,
383 		    SLJIT_MOV,
384 		    BPFJIT_KERN_TMP, 0,
385 		    BPFJIT_A, 0);
386 		if (status != SLJIT_SUCCESS)
387 			return status;
388 	}
389 
390 	/*
391 	 * Prepare registers for fn(buf, k, &err) call.
392 	 */
393 	status = sljit_emit_op1(compiler,
394 	    SLJIT_MOV,
395 	    SLJIT_TEMPORARY_REG1, 0,
396 	    BPFJIT_BUF, 0);
397 	if (status != SLJIT_SUCCESS)
398 		return status;
399 
400 	if (BPF_CLASS(pc->code) == BPF_LD && BPF_MODE(pc->code) == BPF_IND) {
401 		status = sljit_emit_op2(compiler,
402 		    SLJIT_ADD,
403 		    SLJIT_TEMPORARY_REG2, 0,
404 		    BPFJIT_X, 0,
405 		    SLJIT_IMM, (uint32_t)pc->k);
406 	} else {
407 		status = sljit_emit_op1(compiler,
408 		    SLJIT_MOV,
409 		    SLJIT_TEMPORARY_REG2, 0,
410 		    SLJIT_IMM, (uint32_t)pc->k);
411 	}
412 
413 	if (status != SLJIT_SUCCESS)
414 		return status;
415 
416 	status = sljit_get_local_base(compiler,
417 	    SLJIT_TEMPORARY_REG3, 0, arg3_offset);
418 	if (status != SLJIT_SUCCESS)
419 		return status;
420 
421 	/* fn(buf, k, &err); */
422 	status = sljit_emit_ijump(compiler,
423 	    SLJIT_CALL3,
424 	    SLJIT_IMM, SLJIT_FUNC_OFFSET(fn));
425 
426 	if (BPF_CLASS(pc->code) == BPF_LDX) {
427 
428 		/* move return value to dst */
429 		BPFJIT_ASSERT(dst != SLJIT_RETURN_REG);
430 		status = sljit_emit_op1(compiler,
431 		    SLJIT_MOV,
432 		    dst, dstw,
433 		    SLJIT_RETURN_REG, 0);
434 		if (status != SLJIT_SUCCESS)
435 			return status;
436 
437 		/* restore A */
438 		status = sljit_emit_op1(compiler,
439 		    SLJIT_MOV,
440 		    BPFJIT_A, 0,
441 		    BPFJIT_KERN_TMP, 0);
442 		if (status != SLJIT_SUCCESS)
443 			return status;
444 
445 	} else if (dst != SLJIT_RETURN_REG) {
446 		status = sljit_emit_op1(compiler,
447 		    SLJIT_MOV,
448 		    dst, dstw,
449 		    SLJIT_RETURN_REG, 0);
450 		if (status != SLJIT_SUCCESS)
451 			return status;
452 	}
453 
454 	/* tmp3 = *err; */
455 	status = sljit_emit_op1(compiler,
456 	    SLJIT_MOV_UI,
457 	    SLJIT_TEMPORARY_REG3, 0,
458 	    SLJIT_MEM1(SLJIT_LOCALS_REG), arg3_offset);
459 	if (status != SLJIT_SUCCESS)
460 		return status;
461 
462 	/* if (tmp3 != 0) return 0; */
463 	*ret0_jump = sljit_emit_cmp(compiler,
464 	    SLJIT_C_NOT_EQUAL,
465 	    SLJIT_TEMPORARY_REG3, 0,
466 	    SLJIT_IMM, 0);
467 	if (*ret0_jump == NULL)
468 		return SLJIT_ERR_ALLOC_FAILED;
469 
470 	return status;
471 }
472 #endif
473 
474 /*
475  * Generate code for
476  * BPF_LD+BPF_W+BPF_ABS    A <- P[k:4]
477  * BPF_LD+BPF_H+BPF_ABS    A <- P[k:2]
478  * BPF_LD+BPF_B+BPF_ABS    A <- P[k:1]
479  * BPF_LD+BPF_W+BPF_IND    A <- P[X+k:4]
480  * BPF_LD+BPF_H+BPF_IND    A <- P[X+k:2]
481  * BPF_LD+BPF_B+BPF_IND    A <- P[X+k:1]
482  */
483 static int
484 emit_pkt_read(struct sljit_compiler* compiler,
485     struct bpf_insn *pc, struct sljit_jump *to_mchain_jump,
486     struct sljit_jump **ret0, size_t *ret0_size)
487 {
488 	int status = 0; /* XXX gcc 4.1 */
489 	uint32_t width;
490 	struct sljit_jump *jump;
491 #ifdef _KERNEL
492 	struct sljit_label *label;
493 	struct sljit_jump *over_mchain_jump;
494 	const bool check_zero_buflen = (to_mchain_jump != NULL);
495 #endif
496 	const uint32_t k = pc->k;
497 
498 #ifdef _KERNEL
499 	if (to_mchain_jump == NULL) {
500 		to_mchain_jump = sljit_emit_cmp(compiler,
501 		    SLJIT_C_EQUAL,
502 		    BPFJIT_BUFLEN, 0,
503 		    SLJIT_IMM, 0);
504 		if (to_mchain_jump == NULL)
505   			return SLJIT_ERR_ALLOC_FAILED;
506 	}
507 #endif
508 
509 	width = read_width(pc);
510 
511 	if (BPF_MODE(pc->code) == BPF_IND) {
512 		/* tmp1 = buflen - (pc->k + width); */
513 		status = sljit_emit_op2(compiler,
514 		    SLJIT_SUB,
515 		    BPFJIT_TMP1, 0,
516 		    BPFJIT_BUFLEN, 0,
517 		    SLJIT_IMM, k + width);
518 		if (status != SLJIT_SUCCESS)
519 			return status;
520 
521 		/* buf += X; */
522 		status = sljit_emit_op2(compiler,
523 		    SLJIT_ADD,
524 		    BPFJIT_BUF, 0,
525 		    BPFJIT_BUF, 0,
526 		    BPFJIT_X, 0);
527 		if (status != SLJIT_SUCCESS)
528 			return status;
529 
530 		/* if (tmp1 < X) return 0; */
531 		jump = sljit_emit_cmp(compiler,
532 		    SLJIT_C_LESS,
533 		    BPFJIT_TMP1, 0,
534 		    BPFJIT_X, 0);
535 		if (jump == NULL)
536   			return SLJIT_ERR_ALLOC_FAILED;
537 		ret0[(*ret0_size)++] = jump;
538 	}
539 
540 	switch (width) {
541 	case 4:
542 		status = emit_read32(compiler, k);
543 		break;
544 	case 2:
545 		status = emit_read16(compiler, k);
546 		break;
547 	case 1:
548 		status = emit_read8(compiler, k);
549 		break;
550 	}
551 
552 	if (status != SLJIT_SUCCESS)
553 		return status;
554 
555 	if (BPF_MODE(pc->code) == BPF_IND) {
556 		/* buf -= X; */
557 		status = sljit_emit_op2(compiler,
558 		    SLJIT_SUB,
559 		    BPFJIT_BUF, 0,
560 		    BPFJIT_BUF, 0,
561 		    BPFJIT_X, 0);
562 		if (status != SLJIT_SUCCESS)
563 			return status;
564 	}
565 
566 #ifdef _KERNEL
567 	over_mchain_jump = sljit_emit_jump(compiler, SLJIT_JUMP);
568 	if (over_mchain_jump == NULL)
569   		return SLJIT_ERR_ALLOC_FAILED;
570 
571 	/* entry point to mchain handler */
572 	label = sljit_emit_label(compiler);
573 	if (label == NULL)
574   		return SLJIT_ERR_ALLOC_FAILED;
575 	sljit_set_label(to_mchain_jump, label);
576 
577 	if (check_zero_buflen) {
578 		/* if (buflen != 0) return 0; */
579 		jump = sljit_emit_cmp(compiler,
580 		    SLJIT_C_NOT_EQUAL,
581 		    BPFJIT_BUFLEN, 0,
582 		    SLJIT_IMM, 0);
583 		if (jump == NULL)
584 			return SLJIT_ERR_ALLOC_FAILED;
585 		ret0[(*ret0_size)++] = jump;
586 	}
587 
588 	switch (width) {
589 	case 4:
590 		status = emit_xcall(compiler, pc, BPFJIT_A, 0, &jump, &m_xword);
591 		break;
592 	case 2:
593 		status = emit_xcall(compiler, pc, BPFJIT_A, 0, &jump, &m_xhalf);
594 		break;
595 	case 1:
596 		status = emit_xcall(compiler, pc, BPFJIT_A, 0, &jump, &m_xbyte);
597 		break;
598 	}
599 
600 	if (status != SLJIT_SUCCESS)
601 		return status;
602 
603 	ret0[(*ret0_size)++] = jump;
604 
605 	label = sljit_emit_label(compiler);
606 	if (label == NULL)
607 		return SLJIT_ERR_ALLOC_FAILED;
608 	sljit_set_label(over_mchain_jump, label);
609 #endif
610 
611 	return status;
612 }
613 
614 /*
615  * Generate code for BPF_LDX+BPF_B+BPF_MSH    X <- 4*(P[k:1]&0xf).
616  */
617 static int
618 emit_msh(struct sljit_compiler* compiler,
619     struct bpf_insn *pc, struct sljit_jump *to_mchain_jump,
620     struct sljit_jump **ret0, size_t *ret0_size)
621 {
622 	int status;
623 #ifdef _KERNEL
624 	struct sljit_label *label;
625 	struct sljit_jump *jump, *over_mchain_jump;
626 	const bool check_zero_buflen = (to_mchain_jump != NULL);
627 #endif
628 	const uint32_t k = pc->k;
629 
630 #ifdef _KERNEL
631 	if (to_mchain_jump == NULL) {
632 		to_mchain_jump = sljit_emit_cmp(compiler,
633 		    SLJIT_C_EQUAL,
634 		    BPFJIT_BUFLEN, 0,
635 		    SLJIT_IMM, 0);
636 		if (to_mchain_jump == NULL)
637  			return SLJIT_ERR_ALLOC_FAILED;
638 	}
639 #endif
640 
641 	/* tmp1 = buf[k] */
642 	status = sljit_emit_op1(compiler,
643 	    SLJIT_MOV_UB,
644 	    BPFJIT_TMP1, 0,
645 	    SLJIT_MEM1(BPFJIT_BUF), k);
646 	if (status != SLJIT_SUCCESS)
647 		return status;
648 
649 	/* tmp1 &= 0xf */
650 	status = sljit_emit_op2(compiler,
651 	    SLJIT_AND,
652 	    BPFJIT_TMP1, 0,
653 	    BPFJIT_TMP1, 0,
654 	    SLJIT_IMM, 0xf);
655 	if (status != SLJIT_SUCCESS)
656 		return status;
657 
658 	/* tmp1 = tmp1 << 2 */
659 	status = sljit_emit_op2(compiler,
660 	    SLJIT_SHL,
661 	    BPFJIT_X, 0,
662 	    BPFJIT_TMP1, 0,
663 	    SLJIT_IMM, 2);
664 	if (status != SLJIT_SUCCESS)
665 		return status;
666 
667 #ifdef _KERNEL
668 	over_mchain_jump = sljit_emit_jump(compiler, SLJIT_JUMP);
669 	if (over_mchain_jump == NULL)
670 		return SLJIT_ERR_ALLOC_FAILED;
671 
672 	/* entry point to mchain handler */
673 	label = sljit_emit_label(compiler);
674 	if (label == NULL)
675 		return SLJIT_ERR_ALLOC_FAILED;
676 	sljit_set_label(to_mchain_jump, label);
677 
678 	if (check_zero_buflen) {
679 		/* if (buflen != 0) return 0; */
680 		jump = sljit_emit_cmp(compiler,
681 		    SLJIT_C_NOT_EQUAL,
682 		    BPFJIT_BUFLEN, 0,
683 		    SLJIT_IMM, 0);
684 		if (jump == NULL)
685   			return SLJIT_ERR_ALLOC_FAILED;
686 		ret0[(*ret0_size)++] = jump;
687 	}
688 
689 	status = emit_xcall(compiler, pc, BPFJIT_TMP1, 0, &jump, &m_xbyte);
690 	if (status != SLJIT_SUCCESS)
691 		return status;
692 	ret0[(*ret0_size)++] = jump;
693 
694 	/* tmp1 &= 0xf */
695 	status = sljit_emit_op2(compiler,
696 	    SLJIT_AND,
697 	    BPFJIT_TMP1, 0,
698 	    BPFJIT_TMP1, 0,
699 	    SLJIT_IMM, 0xf);
700 	if (status != SLJIT_SUCCESS)
701 		return status;
702 
703 	/* tmp1 = tmp1 << 2 */
704 	status = sljit_emit_op2(compiler,
705 	    SLJIT_SHL,
706 	    BPFJIT_X, 0,
707 	    BPFJIT_TMP1, 0,
708 	    SLJIT_IMM, 2);
709 	if (status != SLJIT_SUCCESS)
710 		return status;
711 
712 
713 	label = sljit_emit_label(compiler);
714 	if (label == NULL)
715 		return SLJIT_ERR_ALLOC_FAILED;
716 	sljit_set_label(over_mchain_jump, label);
717 #endif
718 
719 	return status;
720 }
721 
722 static int
723 emit_pow2_division(struct sljit_compiler* compiler, uint32_t k)
724 {
725 	int shift = 0;
726 	int status = SLJIT_SUCCESS;
727 
728 	while (k > 1) {
729 		k >>= 1;
730 		shift++;
731 	}
732 
733 	BPFJIT_ASSERT(k == 1 && shift < 32);
734 
735 	if (shift != 0) {
736 		status = sljit_emit_op2(compiler,
737 		    SLJIT_LSHR|SLJIT_INT_OP,
738 		    BPFJIT_A, 0,
739 		    BPFJIT_A, 0,
740 		    SLJIT_IMM, shift);
741 	}
742 
743 	return status;
744 }
745 
746 #if !defined(BPFJIT_USE_UDIV)
747 static sljit_uw
748 divide(sljit_uw x, sljit_uw y)
749 {
750 
751 	return (uint32_t)x / (uint32_t)y;
752 }
753 #endif
754 
755 /*
756  * Generate A = A / div.
757  * divt,divw are either SLJIT_IMM,pc->k or BPFJIT_X,0.
758  */
759 static int
760 emit_division(struct sljit_compiler* compiler, int divt, sljit_w divw)
761 {
762 	int status;
763 
764 #if BPFJIT_X == SLJIT_TEMPORARY_REG1 || \
765     BPFJIT_X == SLJIT_RETURN_REG     || \
766     BPFJIT_X == SLJIT_TEMPORARY_REG2 || \
767     BPFJIT_A == SLJIT_TEMPORARY_REG2
768 #error "Not supported assignment of registers."
769 #endif
770 
771 #if BPFJIT_A != SLJIT_TEMPORARY_REG1
772 	status = sljit_emit_op1(compiler,
773 	    SLJIT_MOV,
774 	    SLJIT_TEMPORARY_REG1, 0,
775 	    BPFJIT_A, 0);
776 	if (status != SLJIT_SUCCESS)
777 		return status;
778 #endif
779 
780 	status = sljit_emit_op1(compiler,
781 	    SLJIT_MOV,
782 	    SLJIT_TEMPORARY_REG2, 0,
783 	    divt, divw);
784 	if (status != SLJIT_SUCCESS)
785 		return status;
786 
787 #if defined(BPFJIT_USE_UDIV)
788 	status = sljit_emit_op0(compiler, SLJIT_UDIV|SLJIT_INT_OP);
789 
790 #if BPFJIT_A != SLJIT_TEMPORARY_REG1
791 	status = sljit_emit_op1(compiler,
792 	    SLJIT_MOV,
793 	    BPFJIT_A, 0,
794 	    SLJIT_TEMPORARY_REG1, 0);
795 	if (status != SLJIT_SUCCESS)
796 		return status;
797 #endif
798 #else
799 	status = sljit_emit_ijump(compiler,
800 	    SLJIT_CALL2,
801 	    SLJIT_IMM, SLJIT_FUNC_OFFSET(divide));
802 
803 #if BPFJIT_A != SLJIT_RETURN_REG
804 	status = sljit_emit_op1(compiler,
805 	    SLJIT_MOV,
806 	    BPFJIT_A, 0,
807 	    SLJIT_RETURN_REG, 0);
808 	if (status != SLJIT_SUCCESS)
809 		return status;
810 #endif
811 #endif
812 
813 	return status;
814 }
815 
816 /*
817  * Count BPF_RET instructions.
818  */
819 static size_t
820 count_returns(struct bpf_insn *insns, size_t insn_count)
821 {
822 	size_t i;
823 	size_t rv;
824 
825 	rv = 0;
826 	for (i = 0; i < insn_count; i++) {
827 		if (BPF_CLASS(insns[i].code) == BPF_RET)
828 			rv++;
829 	}
830 
831 	return rv;
832 }
833 
834 /*
835  * Return true if pc is a "read from packet" instruction.
836  * If length is not NULL and return value is true, *length will
837  * be set to a safe length required to read a packet.
838  */
839 static bool
840 read_pkt_insn(struct bpf_insn *pc, uint32_t *length)
841 {
842 	bool rv;
843 	uint32_t width;
844 
845 	switch (BPF_CLASS(pc->code)) {
846 	default:
847 		rv = false;
848 		break;
849 
850 	case BPF_LD:
851 		rv = BPF_MODE(pc->code) == BPF_ABS ||
852 		     BPF_MODE(pc->code) == BPF_IND;
853 		if (rv)
854 			width = read_width(pc);
855 		break;
856 
857 	case BPF_LDX:
858 		rv = pc->code == (BPF_LDX|BPF_B|BPF_MSH);
859 		width = 1;
860 		break;
861 	}
862 
863 	if (rv && length != NULL) {
864 		*length = (pc->k > UINT32_MAX - width) ?
865 		    UINT32_MAX : pc->k + width;
866 	}
867 
868 	return rv;
869 }
870 
871 /*
872  * Set bj_check_length for all "read from packet" instructions
873  * in a linear block of instructions [from, to).
874  */
875 static void
876 set_check_length(struct bpf_insn *insns, struct bpfjit_insn_data *insn_dat,
877     size_t from, size_t to, uint32_t length)
878 {
879 
880 	for (; from < to; from++) {
881 		if (read_pkt_insn(&insns[from], NULL)) {
882 			insn_dat[from].bj_aux.bj_rdata.bj_check_length = length;
883 			length = 0;
884 		}
885 	}
886 }
887 
888 /*
889  * The function divides instructions into blocks. Destination of a jump
890  * instruction starts a new block. BPF_RET and BPF_JMP instructions
891  * terminate a block. Blocks are linear, that is, there are no jumps out
892  * from the middle of a block and there are no jumps in to the middle of
893  * a block.
894  * If a block has one or more "read from packet" instructions,
895  * bj_check_length will be set to one value for the whole block and that
896  * value will be equal to the greatest value of safe lengths of "read from
897  * packet" instructions inside the block.
898  */
899 static int
900 optimize(struct bpf_insn *insns,
901     struct bpfjit_insn_data *insn_dat, size_t insn_count)
902 {
903 	size_t i;
904 	size_t first_read;
905 	bool unreachable;
906 	uint32_t jt, jf;
907 	uint32_t length, safe_length;
908 	struct bpfjit_jump *jmp, *jtf;
909 
910 	for (i = 0; i < insn_count; i++)
911 		SLIST_INIT(&insn_dat[i].bj_jumps);
912 
913 	safe_length = 0;
914 	unreachable = false;
915 	first_read = SIZE_MAX;
916 
917 	for (i = 0; i < insn_count; i++) {
918 
919 		if (!SLIST_EMPTY(&insn_dat[i].bj_jumps)) {
920 			unreachable = false;
921 
922 			set_check_length(insns, insn_dat,
923 			    first_read, i, safe_length);
924 			first_read = SIZE_MAX;
925 
926 			safe_length = UINT32_MAX;
927 			SLIST_FOREACH(jmp, &insn_dat[i].bj_jumps, bj_entries) {
928 				if (jmp->bj_safe_length < safe_length)
929 					safe_length = jmp->bj_safe_length;
930 			}
931 		}
932 
933 		insn_dat[i].bj_unreachable = unreachable;
934 		if (unreachable)
935 			continue;
936 
937 		if (read_pkt_insn(&insns[i], &length)) {
938 			if (first_read == SIZE_MAX)
939 				first_read = i;
940 			if (length > safe_length)
941 				safe_length = length;
942 		}
943 
944 		switch (BPF_CLASS(insns[i].code)) {
945 		case BPF_RET:
946 			unreachable = true;
947 			continue;
948 
949 		case BPF_JMP:
950 			if (insns[i].code == (BPF_JMP|BPF_JA)) {
951 				jt = jf = insns[i].k;
952 			} else {
953 				jt = insns[i].jt;
954 				jf = insns[i].jf;
955 			}
956 
957 			if (jt >= insn_count - (i + 1) ||
958 			    jf >= insn_count - (i + 1)) {
959 				return -1;
960 			}
961 
962 			if (jt > 0 && jf > 0)
963 				unreachable = true;
964 
965 			jtf = insn_dat[i].bj_aux.bj_jdata.bj_jtf;
966 
967 			jtf[0].bj_jump = NULL;
968 			jtf[0].bj_safe_length = safe_length;
969 			SLIST_INSERT_HEAD(&insn_dat[i + 1 + jt].bj_jumps,
970 			    &jtf[0], bj_entries);
971 
972 			if (jf != jt) {
973 				jtf[1].bj_jump = NULL;
974 				jtf[1].bj_safe_length = safe_length;
975 				SLIST_INSERT_HEAD(&insn_dat[i + 1 + jf].bj_jumps,
976 				    &jtf[1], bj_entries);
977 			}
978 
979 			continue;
980 		}
981 	}
982 
983 	set_check_length(insns, insn_dat, first_read, insn_count, safe_length);
984 
985 	return 0;
986 }
987 
988 /*
989  * Count out-of-bounds and division by zero jumps.
990  *
991  * insn_dat should be initialized by optimize().
992  */
993 static size_t
994 get_ret0_size(struct bpf_insn *insns, struct bpfjit_insn_data *insn_dat,
995     size_t insn_count)
996 {
997 	size_t rv = 0;
998 	size_t i;
999 
1000 	for (i = 0; i < insn_count; i++) {
1001 
1002 		if (read_pkt_insn(&insns[i], NULL)) {
1003 			if (insn_dat[i].bj_aux.bj_rdata.bj_check_length > 0)
1004 				rv++;
1005 #ifdef _KERNEL
1006 			rv++;
1007 #endif
1008 		}
1009 
1010 		if (insns[i].code == (BPF_LD|BPF_IND|BPF_B) ||
1011 		    insns[i].code == (BPF_LD|BPF_IND|BPF_H) ||
1012 		    insns[i].code == (BPF_LD|BPF_IND|BPF_W)) {
1013 			rv++;
1014 		}
1015 
1016 		if (insns[i].code == (BPF_ALU|BPF_DIV|BPF_X))
1017 			rv++;
1018 
1019 		if (insns[i].code == (BPF_ALU|BPF_DIV|BPF_K) &&
1020 		    insns[i].k == 0) {
1021 			rv++;
1022 		}
1023 	}
1024 
1025 	return rv;
1026 }
1027 
1028 /*
1029  * Convert BPF_ALU operations except BPF_NEG and BPF_DIV to sljit operation.
1030  */
1031 static int
1032 bpf_alu_to_sljit_op(struct bpf_insn *pc)
1033 {
1034 
1035 	/*
1036 	 * Note: all supported 64bit arches have 32bit multiply
1037 	 * instruction so SLJIT_INT_OP doesn't have any overhead.
1038 	 */
1039 	switch (BPF_OP(pc->code)) {
1040 	case BPF_ADD: return SLJIT_ADD;
1041 	case BPF_SUB: return SLJIT_SUB;
1042 	case BPF_MUL: return SLJIT_MUL|SLJIT_INT_OP;
1043 	case BPF_OR:  return SLJIT_OR;
1044 	case BPF_AND: return SLJIT_AND;
1045 	case BPF_LSH: return SLJIT_SHL;
1046 	case BPF_RSH: return SLJIT_LSHR|SLJIT_INT_OP;
1047 	default:
1048 		BPFJIT_ASSERT(false);
1049 		return 0;
1050 	}
1051 }
1052 
1053 /*
1054  * Convert BPF_JMP operations except BPF_JA to sljit condition.
1055  */
1056 static int
1057 bpf_jmp_to_sljit_cond(struct bpf_insn *pc, bool negate)
1058 {
1059 	/*
1060 	 * Note: all supported 64bit arches have 32bit comparison
1061 	 * instructions so SLJIT_INT_OP doesn't have any overhead.
1062 	 */
1063 	int rv = SLJIT_INT_OP;
1064 
1065 	switch (BPF_OP(pc->code)) {
1066 	case BPF_JGT:
1067 		rv |= negate ? SLJIT_C_LESS_EQUAL : SLJIT_C_GREATER;
1068 		break;
1069 	case BPF_JGE:
1070 		rv |= negate ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL;
1071 		break;
1072 	case BPF_JEQ:
1073 		rv |= negate ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL;
1074 		break;
1075 	case BPF_JSET:
1076 		rv |= negate ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL;
1077 		break;
1078 	default:
1079 		BPFJIT_ASSERT(false);
1080 	}
1081 
1082 	return rv;
1083 }
1084 
1085 static unsigned int
1086 bpfjit_optimization_hints(struct bpf_insn *insns, size_t insn_count)
1087 {
1088 	unsigned int rv = BPFJIT_INIT_A;
1089 	struct bpf_insn *pc;
1090 	unsigned int minm, maxm;
1091 
1092 	BPFJIT_ASSERT(BPF_MEMWORDS - 1 <= 0xff);
1093 
1094 	maxm = 0;
1095 	minm = BPF_MEMWORDS - 1;
1096 
1097 	for (pc = insns; pc != insns + insn_count; pc++) {
1098 		switch (BPF_CLASS(pc->code)) {
1099 		case BPF_LD:
1100 			if (BPF_MODE(pc->code) == BPF_IND)
1101 				rv |= BPFJIT_INIT_X;
1102 			if (BPF_MODE(pc->code) == BPF_MEM &&
1103 			    (uint32_t)pc->k < BPF_MEMWORDS) {
1104 				if (pc->k > maxm)
1105 					maxm = pc->k;
1106 				if (pc->k < minm)
1107 					minm = pc->k;
1108 			}
1109 			continue;
1110 		case BPF_LDX:
1111 			rv |= BPFJIT_INIT_X;
1112 			if (BPF_MODE(pc->code) == BPF_MEM &&
1113 			    (uint32_t)pc->k < BPF_MEMWORDS) {
1114 				if (pc->k > maxm)
1115 					maxm = pc->k;
1116 				if (pc->k < minm)
1117 					minm = pc->k;
1118 			}
1119 			continue;
1120 		case BPF_ST:
1121 			if ((uint32_t)pc->k < BPF_MEMWORDS) {
1122 				if (pc->k > maxm)
1123 					maxm = pc->k;
1124 				if (pc->k < minm)
1125 					minm = pc->k;
1126 			}
1127 			continue;
1128 		case BPF_STX:
1129 			rv |= BPFJIT_INIT_X;
1130 			if ((uint32_t)pc->k < BPF_MEMWORDS) {
1131 				if (pc->k > maxm)
1132 					maxm = pc->k;
1133 				if (pc->k < minm)
1134 					minm = pc->k;
1135 			}
1136 			continue;
1137 		case BPF_ALU:
1138 			if (pc->code == (BPF_ALU|BPF_NEG))
1139 				continue;
1140 			if (BPF_SRC(pc->code) == BPF_X)
1141 				rv |= BPFJIT_INIT_X;
1142 			continue;
1143 		case BPF_JMP:
1144 			if (pc->code == (BPF_JMP|BPF_JA))
1145 				continue;
1146 			if (BPF_SRC(pc->code) == BPF_X)
1147 				rv |= BPFJIT_INIT_X;
1148 			continue;
1149 		case BPF_RET:
1150 			continue;
1151 		case BPF_MISC:
1152 			rv |= BPFJIT_INIT_X;
1153 			continue;
1154 		default:
1155 			BPFJIT_ASSERT(false);
1156 		}
1157 	}
1158 
1159 	return rv | (maxm << 8) | minm;
1160 }
1161 
1162 /*
1163  * Convert BPF_K and BPF_X to sljit register.
1164  */
1165 static int
1166 kx_to_reg(struct bpf_insn *pc)
1167 {
1168 
1169 	switch (BPF_SRC(pc->code)) {
1170 	case BPF_K: return SLJIT_IMM;
1171 	case BPF_X: return BPFJIT_X;
1172 	default:
1173 		BPFJIT_ASSERT(false);
1174 		return 0;
1175 	}
1176 }
1177 
1178 static sljit_w
1179 kx_to_reg_arg(struct bpf_insn *pc)
1180 {
1181 
1182 	switch (BPF_SRC(pc->code)) {
1183 	case BPF_K: return (uint32_t)pc->k; /* SLJIT_IMM, pc->k, */
1184 	case BPF_X: return 0;               /* BPFJIT_X, 0,      */
1185 	default:
1186 		BPFJIT_ASSERT(false);
1187 		return 0;
1188 	}
1189 }
1190 
1191 bpfjit_func_t
1192 bpfjit_generate_code(bpf_ctx_t *bc, struct bpf_insn *insns, size_t insn_count)
1193 {
1194 	void *rv;
1195 	size_t i;
1196 	int status;
1197 	int branching, negate;
1198 	unsigned int rval, mode, src;
1199 	int ntmp;
1200 	unsigned int locals_size;
1201 	unsigned int minm, maxm; /* min/max k for M[k] */
1202 	size_t mem_locals_start; /* start of M[] array */
1203 	unsigned int opts;
1204 	struct bpf_insn *pc;
1205 	struct sljit_compiler* compiler;
1206 
1207 	/* a list of jumps to a normal return from a generated function */
1208 	struct sljit_jump **returns;
1209 	size_t returns_size, returns_maxsize;
1210 
1211 	/* a list of jumps to out-of-bound return from a generated function */
1212 	struct sljit_jump **ret0;
1213 	size_t ret0_size = 0, ret0_maxsize = 0;
1214 
1215 	struct bpfjit_insn_data *insn_dat;
1216 
1217 	/* for local use */
1218 	struct sljit_label *label;
1219 	struct sljit_jump *jump;
1220 	struct bpfjit_jump *bjump, *jtf;
1221 
1222 	struct sljit_jump *to_mchain_jump;
1223 
1224 	uint32_t jt, jf;
1225 
1226 	rv = NULL;
1227 	compiler = NULL;
1228 	insn_dat = NULL;
1229 	returns = NULL;
1230 	ret0 = NULL;
1231 
1232 	opts = bpfjit_optimization_hints(insns, insn_count);
1233 	minm = opts & 0xff;
1234 	maxm = (opts >> 8) & 0xff;
1235 	mem_locals_start = mem_local_offset(0, 0);
1236 	locals_size = (minm <= maxm) ?
1237 	    mem_local_offset(maxm + 1, minm) : mem_locals_start;
1238 
1239 	ntmp = 4;
1240 #ifdef _KERNEL
1241 	ntmp += 1; /* for BPFJIT_KERN_TMP */
1242 #endif
1243 
1244 	returns_maxsize = count_returns(insns, insn_count);
1245 	if (returns_maxsize  == 0)
1246 		goto fail;
1247 
1248 	insn_dat = BPFJIT_ALLOC(insn_count * sizeof(insn_dat[0]));
1249 	if (insn_dat == NULL)
1250 		goto fail;
1251 
1252 	if (optimize(insns, insn_dat, insn_count) < 0)
1253 		goto fail;
1254 
1255 	ret0_size = 0;
1256 	ret0_maxsize = get_ret0_size(insns, insn_dat, insn_count);
1257 	if (ret0_maxsize > 0) {
1258 		ret0 = BPFJIT_ALLOC(ret0_maxsize * sizeof(ret0[0]));
1259 		if (ret0 == NULL)
1260 			goto fail;
1261 	}
1262 
1263 	returns_size = 0;
1264 	returns = BPFJIT_ALLOC(returns_maxsize * sizeof(returns[0]));
1265 	if (returns == NULL)
1266 		goto fail;
1267 
1268 	compiler = sljit_create_compiler();
1269 	if (compiler == NULL)
1270 		goto fail;
1271 
1272 #if !defined(_KERNEL) && defined(SLJIT_VERBOSE) && SLJIT_VERBOSE
1273 	sljit_compiler_verbose(compiler, stderr);
1274 #endif
1275 
1276 	status = sljit_emit_enter(compiler, 3, ntmp, 3, locals_size);
1277 	if (status != SLJIT_SUCCESS)
1278 		goto fail;
1279 
1280 	for (i = mem_locals_start; i < locals_size; i+= sizeof(uint32_t)) {
1281 		status = sljit_emit_op1(compiler,
1282 		    SLJIT_MOV_UI,
1283 		    SLJIT_MEM1(SLJIT_LOCALS_REG), i,
1284 		    SLJIT_IMM, 0);
1285 		if (status != SLJIT_SUCCESS)
1286 			goto fail;
1287 	}
1288 
1289 	if (opts & BPFJIT_INIT_A) {
1290 		/* A = 0; */
1291 		status = sljit_emit_op1(compiler,
1292 		    SLJIT_MOV,
1293 		    BPFJIT_A, 0,
1294 		    SLJIT_IMM, 0);
1295 		if (status != SLJIT_SUCCESS)
1296 			goto fail;
1297 	}
1298 
1299 	if (opts & BPFJIT_INIT_X) {
1300 		/* X = 0; */
1301 		status = sljit_emit_op1(compiler,
1302 		    SLJIT_MOV,
1303 		    BPFJIT_X, 0,
1304 		    SLJIT_IMM, 0);
1305 		if (status != SLJIT_SUCCESS)
1306 			goto fail;
1307 	}
1308 
1309 	for (i = 0; i < insn_count; i++) {
1310 		if (insn_dat[i].bj_unreachable)
1311 			continue;
1312 
1313 		to_mchain_jump = NULL;
1314 
1315 		/*
1316 		 * Resolve jumps to the current insn.
1317 		 */
1318 		label = NULL;
1319 		SLIST_FOREACH(bjump, &insn_dat[i].bj_jumps, bj_entries) {
1320 			if (bjump->bj_jump != NULL) {
1321 				if (label == NULL)
1322 					label = sljit_emit_label(compiler);
1323 				if (label == NULL)
1324 					goto fail;
1325 				sljit_set_label(bjump->bj_jump, label);
1326 			}
1327 		}
1328 
1329 		if (read_pkt_insn(&insns[i], NULL) &&
1330 		    insn_dat[i].bj_aux.bj_rdata.bj_check_length > 0) {
1331 			/* if (buflen < bj_check_length) return 0; */
1332 			jump = sljit_emit_cmp(compiler,
1333 			    SLJIT_C_LESS,
1334 			    BPFJIT_BUFLEN, 0,
1335 			    SLJIT_IMM,
1336 			    insn_dat[i].bj_aux.bj_rdata.bj_check_length);
1337 			if (jump == NULL)
1338 		  		goto fail;
1339 #ifdef _KERNEL
1340 			to_mchain_jump = jump;
1341 #else
1342 			ret0[ret0_size++] = jump;
1343 #endif
1344 		}
1345 
1346 		pc = &insns[i];
1347 		switch (BPF_CLASS(pc->code)) {
1348 
1349 		default:
1350 			goto fail;
1351 
1352 		case BPF_LD:
1353 			/* BPF_LD+BPF_IMM          A <- k */
1354 			if (pc->code == (BPF_LD|BPF_IMM)) {
1355 				status = sljit_emit_op1(compiler,
1356 				    SLJIT_MOV,
1357 				    BPFJIT_A, 0,
1358 				    SLJIT_IMM, (uint32_t)pc->k);
1359 				if (status != SLJIT_SUCCESS)
1360 					goto fail;
1361 
1362 				continue;
1363 			}
1364 
1365 			/* BPF_LD+BPF_MEM          A <- M[k] */
1366 			if (pc->code == (BPF_LD|BPF_MEM)) {
1367 				if (pc->k < minm || pc->k > maxm)
1368 					goto fail;
1369 				status = sljit_emit_op1(compiler,
1370 				    SLJIT_MOV_UI,
1371 				    BPFJIT_A, 0,
1372 				    SLJIT_MEM1(SLJIT_LOCALS_REG),
1373 				    mem_local_offset(pc->k, minm));
1374 				if (status != SLJIT_SUCCESS)
1375 					goto fail;
1376 
1377 				continue;
1378 			}
1379 
1380 			/* BPF_LD+BPF_W+BPF_LEN    A <- len */
1381 			if (pc->code == (BPF_LD|BPF_W|BPF_LEN)) {
1382 				status = sljit_emit_op1(compiler,
1383 				    SLJIT_MOV,
1384 				    BPFJIT_A, 0,
1385 				    BPFJIT_WIRELEN, 0);
1386 				if (status != SLJIT_SUCCESS)
1387 					goto fail;
1388 
1389 				continue;
1390 			}
1391 
1392 			mode = BPF_MODE(pc->code);
1393 			if (mode != BPF_ABS && mode != BPF_IND)
1394 				goto fail;
1395 
1396 			status = emit_pkt_read(compiler, pc,
1397 			    to_mchain_jump, ret0, &ret0_size);
1398 			if (status != SLJIT_SUCCESS)
1399 				goto fail;
1400 
1401 			continue;
1402 
1403 		case BPF_LDX:
1404 			mode = BPF_MODE(pc->code);
1405 
1406 			/* BPF_LDX+BPF_W+BPF_IMM    X <- k */
1407 			if (mode == BPF_IMM) {
1408 				if (BPF_SIZE(pc->code) != BPF_W)
1409 					goto fail;
1410 				status = sljit_emit_op1(compiler,
1411 				    SLJIT_MOV,
1412 				    BPFJIT_X, 0,
1413 				    SLJIT_IMM, (uint32_t)pc->k);
1414 				if (status != SLJIT_SUCCESS)
1415 					goto fail;
1416 
1417 				continue;
1418 			}
1419 
1420 			/* BPF_LDX+BPF_W+BPF_LEN    X <- len */
1421 			if (mode == BPF_LEN) {
1422 				if (BPF_SIZE(pc->code) != BPF_W)
1423 					goto fail;
1424 				status = sljit_emit_op1(compiler,
1425 				    SLJIT_MOV,
1426 				    BPFJIT_X, 0,
1427 				    BPFJIT_WIRELEN, 0);
1428 				if (status != SLJIT_SUCCESS)
1429 					goto fail;
1430 
1431 				continue;
1432 			}
1433 
1434 			/* BPF_LDX+BPF_W+BPF_MEM    X <- M[k] */
1435 			if (mode == BPF_MEM) {
1436 				if (BPF_SIZE(pc->code) != BPF_W)
1437 					goto fail;
1438 				if (pc->k < minm || pc->k > maxm)
1439 					goto fail;
1440 				status = sljit_emit_op1(compiler,
1441 				    SLJIT_MOV_UI,
1442 				    BPFJIT_X, 0,
1443 				    SLJIT_MEM1(SLJIT_LOCALS_REG),
1444 				    mem_local_offset(pc->k, minm));
1445 				if (status != SLJIT_SUCCESS)
1446 					goto fail;
1447 
1448 				continue;
1449 			}
1450 
1451 			/* BPF_LDX+BPF_B+BPF_MSH    X <- 4*(P[k:1]&0xf) */
1452 			if (mode != BPF_MSH || BPF_SIZE(pc->code) != BPF_B)
1453 				goto fail;
1454 
1455 			status = emit_msh(compiler, pc,
1456 			    to_mchain_jump, ret0, &ret0_size);
1457 			if (status != SLJIT_SUCCESS)
1458 				goto fail;
1459 
1460 			continue;
1461 
1462 		case BPF_ST:
1463 			if (pc->code != BPF_ST || pc->k < minm || pc->k > maxm)
1464 				goto fail;
1465 
1466 			status = sljit_emit_op1(compiler,
1467 			    SLJIT_MOV_UI,
1468 			    SLJIT_MEM1(SLJIT_LOCALS_REG),
1469 			    mem_local_offset(pc->k, minm),
1470 			    BPFJIT_A, 0);
1471 			if (status != SLJIT_SUCCESS)
1472 				goto fail;
1473 
1474 			continue;
1475 
1476 		case BPF_STX:
1477 			if (pc->code != BPF_STX || pc->k < minm || pc->k > maxm)
1478 				goto fail;
1479 
1480 			status = sljit_emit_op1(compiler,
1481 			    SLJIT_MOV_UI,
1482 			    SLJIT_MEM1(SLJIT_LOCALS_REG),
1483 			    mem_local_offset(pc->k, minm),
1484 			    BPFJIT_X, 0);
1485 			if (status != SLJIT_SUCCESS)
1486 				goto fail;
1487 
1488 			continue;
1489 
1490 		case BPF_ALU:
1491 
1492 			if (pc->code == (BPF_ALU|BPF_NEG)) {
1493 				status = sljit_emit_op1(compiler,
1494 				    SLJIT_NEG,
1495 				    BPFJIT_A, 0,
1496 				    BPFJIT_A, 0);
1497 				if (status != SLJIT_SUCCESS)
1498 					goto fail;
1499 
1500 				continue;
1501 			}
1502 
1503 			if (BPF_OP(pc->code) != BPF_DIV) {
1504 				status = sljit_emit_op2(compiler,
1505 				    bpf_alu_to_sljit_op(pc),
1506 				    BPFJIT_A, 0,
1507 				    BPFJIT_A, 0,
1508 				    kx_to_reg(pc), kx_to_reg_arg(pc));
1509 				if (status != SLJIT_SUCCESS)
1510 					goto fail;
1511 
1512 				continue;
1513 			}
1514 
1515 			/* BPF_DIV */
1516 
1517 			src = BPF_SRC(pc->code);
1518 			if (src != BPF_X && src != BPF_K)
1519 				goto fail;
1520 
1521 			/* division by zero? */
1522 			if (src == BPF_X) {
1523 				jump = sljit_emit_cmp(compiler,
1524 				    SLJIT_C_EQUAL|SLJIT_INT_OP,
1525 				    BPFJIT_X, 0,
1526 				    SLJIT_IMM, 0);
1527 				if (jump == NULL)
1528 					goto fail;
1529 				ret0[ret0_size++] = jump;
1530 			} else if (pc->k == 0) {
1531 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1532 				if (jump == NULL)
1533 					goto fail;
1534 				ret0[ret0_size++] = jump;
1535 			}
1536 
1537 			if (src == BPF_X) {
1538 				status = emit_division(compiler, BPFJIT_X, 0);
1539 				if (status != SLJIT_SUCCESS)
1540 					goto fail;
1541 			} else if (pc->k != 0) {
1542 				if (pc->k & (pc->k - 1)) {
1543 				    status = emit_division(compiler,
1544 				        SLJIT_IMM, (uint32_t)pc->k);
1545 				} else {
1546     				    status = emit_pow2_division(compiler,
1547 				        (uint32_t)pc->k);
1548 				}
1549 				if (status != SLJIT_SUCCESS)
1550 					goto fail;
1551 			}
1552 
1553 			continue;
1554 
1555 		case BPF_JMP:
1556 
1557 			if (pc->code == (BPF_JMP|BPF_JA)) {
1558 				jt = jf = pc->k;
1559 			} else {
1560 				jt = pc->jt;
1561 				jf = pc->jf;
1562 			}
1563 
1564 			negate = (jt == 0) ? 1 : 0;
1565 			branching = (jt == jf) ? 0 : 1;
1566 			jtf = insn_dat[i].bj_aux.bj_jdata.bj_jtf;
1567 
1568 			if (branching) {
1569 				if (BPF_OP(pc->code) != BPF_JSET) {
1570 					jump = sljit_emit_cmp(compiler,
1571 					    bpf_jmp_to_sljit_cond(pc, negate),
1572 					    BPFJIT_A, 0,
1573 					    kx_to_reg(pc), kx_to_reg_arg(pc));
1574 				} else {
1575 					status = sljit_emit_op2(compiler,
1576 					    SLJIT_AND,
1577 					    BPFJIT_TMP1, 0,
1578 					    BPFJIT_A, 0,
1579 					    kx_to_reg(pc), kx_to_reg_arg(pc));
1580 					if (status != SLJIT_SUCCESS)
1581 						goto fail;
1582 
1583 					jump = sljit_emit_cmp(compiler,
1584 					    bpf_jmp_to_sljit_cond(pc, negate),
1585 					    BPFJIT_TMP1, 0,
1586 					    SLJIT_IMM, 0);
1587 				}
1588 
1589 				if (jump == NULL)
1590 					goto fail;
1591 
1592 				BPFJIT_ASSERT(jtf[negate].bj_jump == NULL);
1593 				jtf[negate].bj_jump = jump;
1594 			}
1595 
1596 			if (!branching || (jt != 0 && jf != 0)) {
1597 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1598 				if (jump == NULL)
1599 					goto fail;
1600 
1601 				BPFJIT_ASSERT(jtf[branching].bj_jump == NULL);
1602 				jtf[branching].bj_jump = jump;
1603 			}
1604 
1605 			continue;
1606 
1607 		case BPF_RET:
1608 
1609 			rval = BPF_RVAL(pc->code);
1610 			if (rval == BPF_X)
1611 				goto fail;
1612 
1613 			/* BPF_RET+BPF_K    accept k bytes */
1614 			if (rval == BPF_K) {
1615 				status = sljit_emit_op1(compiler,
1616 				    SLJIT_MOV,
1617 				    BPFJIT_A, 0,
1618 				    SLJIT_IMM, (uint32_t)pc->k);
1619 				if (status != SLJIT_SUCCESS)
1620 					goto fail;
1621 			}
1622 
1623 			/* BPF_RET+BPF_A    accept A bytes */
1624 			if (rval == BPF_A) {
1625 #if BPFJIT_A != SLJIT_RETURN_REG
1626 				status = sljit_emit_op1(compiler,
1627 				    SLJIT_MOV,
1628 				    SLJIT_RETURN_REG, 0,
1629 				    BPFJIT_A, 0);
1630 				if (status != SLJIT_SUCCESS)
1631 					goto fail;
1632 #endif
1633 			}
1634 
1635 			/*
1636 			 * Save a jump to a normal return. If the program
1637 			 * ends with BPF_RET, no jump is needed because
1638 			 * the normal return is generated right after the
1639 			 * last instruction.
1640 			 */
1641 			if (i != insn_count - 1) {
1642 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1643 				if (jump == NULL)
1644 					goto fail;
1645 				returns[returns_size++] = jump;
1646 			}
1647 
1648 			continue;
1649 
1650 		case BPF_MISC:
1651 
1652 			if (pc->code == (BPF_MISC|BPF_TAX)) {
1653 				status = sljit_emit_op1(compiler,
1654 				    SLJIT_MOV_UI,
1655 				    BPFJIT_X, 0,
1656 				    BPFJIT_A, 0);
1657 				if (status != SLJIT_SUCCESS)
1658 					goto fail;
1659 
1660 				continue;
1661 			}
1662 
1663 			if (pc->code == (BPF_MISC|BPF_TXA)) {
1664 				status = sljit_emit_op1(compiler,
1665 				    SLJIT_MOV,
1666 				    BPFJIT_A, 0,
1667 				    BPFJIT_X, 0);
1668 				if (status != SLJIT_SUCCESS)
1669 					goto fail;
1670 
1671 				continue;
1672 			}
1673 
1674 			goto fail;
1675 		} /* switch */
1676 	} /* main loop */
1677 
1678 	BPFJIT_ASSERT(ret0_size == ret0_maxsize);
1679 	BPFJIT_ASSERT(returns_size <= returns_maxsize);
1680 
1681 	if (returns_size > 0) {
1682 		label = sljit_emit_label(compiler);
1683 		if (label == NULL)
1684 			goto fail;
1685 		for (i = 0; i < returns_size; i++)
1686 			sljit_set_label(returns[i], label);
1687 	}
1688 
1689 	status = sljit_emit_return(compiler,
1690 	    SLJIT_MOV_UI,
1691 	    BPFJIT_A, 0);
1692 	if (status != SLJIT_SUCCESS)
1693 		goto fail;
1694 
1695 	if (ret0_size > 0) {
1696 		label = sljit_emit_label(compiler);
1697 		if (label == NULL)
1698 			goto fail;
1699 
1700 		for (i = 0; i < ret0_size; i++)
1701 			sljit_set_label(ret0[i], label);
1702 
1703 		status = sljit_emit_op1(compiler,
1704 		    SLJIT_MOV,
1705 		    SLJIT_RETURN_REG, 0,
1706 		    SLJIT_IMM, 0);
1707 		if (status != SLJIT_SUCCESS)
1708 			goto fail;
1709 
1710 		status = sljit_emit_return(compiler,
1711 		    SLJIT_MOV_UI,
1712 		    SLJIT_RETURN_REG, 0);
1713 		if (status != SLJIT_SUCCESS)
1714 			goto fail;
1715 	}
1716 
1717 	rv = sljit_generate_code(compiler);
1718 
1719 fail:
1720 	if (compiler != NULL)
1721 		sljit_free_compiler(compiler);
1722 
1723 	if (insn_dat != NULL)
1724 		BPFJIT_FREE(insn_dat, insn_count * sizeof(insn_dat[0]));
1725 
1726 	if (returns != NULL)
1727 		BPFJIT_FREE(returns, returns_maxsize * sizeof(returns[0]));
1728 
1729 	if (ret0 != NULL)
1730 		BPFJIT_FREE(ret0, ret0_maxsize * sizeof(ret0[0]));
1731 
1732 	return (bpfjit_func_t)rv;
1733 }
1734 
1735 void
1736 bpfjit_free_code(bpfjit_func_t code)
1737 {
1738 	sljit_free_code((void *)code);
1739 }
1740