xref: /netbsd-src/sys/net/bpfjit.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /*	$NetBSD: bpfjit.c,v 1.21 2014/07/05 11:13:13 alnsn Exp $	*/
2 
3 /*-
4  * Copyright (c) 2011-2014 Alexander Nasonov.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
22  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #ifdef _KERNEL
34 __KERNEL_RCSID(0, "$NetBSD: bpfjit.c,v 1.21 2014/07/05 11:13:13 alnsn Exp $");
35 #else
36 __RCSID("$NetBSD: bpfjit.c,v 1.21 2014/07/05 11:13:13 alnsn Exp $");
37 #endif
38 
39 #include <sys/types.h>
40 #include <sys/queue.h>
41 
42 #ifndef _KERNEL
43 #include <assert.h>
44 #define BJ_ASSERT(c) assert(c)
45 #else
46 #define BJ_ASSERT(c) KASSERT(c)
47 #endif
48 
49 #ifndef _KERNEL
50 #include <stdlib.h>
51 #define BJ_ALLOC(sz) malloc(sz)
52 #define BJ_FREE(p, sz) free(p)
53 #else
54 #include <sys/kmem.h>
55 #define BJ_ALLOC(sz) kmem_alloc(sz, KM_SLEEP)
56 #define BJ_FREE(p, sz) kmem_free(p, sz)
57 #endif
58 
59 #ifndef _KERNEL
60 #include <limits.h>
61 #include <stdbool.h>
62 #include <stddef.h>
63 #include <stdint.h>
64 #else
65 #include <sys/atomic.h>
66 #include <sys/module.h>
67 #endif
68 
69 #define	__BPF_PRIVATE
70 #include <net/bpf.h>
71 #include <net/bpfjit.h>
72 #include <sljitLir.h>
73 
74 #if !defined(_KERNEL) && defined(SLJIT_VERBOSE) && SLJIT_VERBOSE
75 #include <stdio.h> /* for stderr */
76 #endif
77 
78 /*
79  * Arguments of generated bpfjit_func_t.
80  * The first argument is reassigned upon entry
81  * to a more frequently used buf argument.
82  */
83 #define BJ_CTX_ARG	SLJIT_SAVED_REG1
84 #define BJ_ARGS		SLJIT_SAVED_REG2
85 
86 /*
87  * Permanent register assignments.
88  */
89 #define BJ_BUF		SLJIT_SAVED_REG1
90 //#define BJ_ARGS	SLJIT_SAVED_REG2
91 #define BJ_BUFLEN	SLJIT_SAVED_REG3
92 #define BJ_AREG		SLJIT_SCRATCH_REG1
93 #define BJ_TMP1REG	SLJIT_SCRATCH_REG2
94 #define BJ_TMP2REG	SLJIT_SCRATCH_REG3
95 #define BJ_XREG		SLJIT_TEMPORARY_EREG1
96 #define BJ_TMP3REG	SLJIT_TEMPORARY_EREG2
97 
98 /*
99  * EREG registers can't be used for indirect calls, reuse BJ_BUF and
100  * BJ_BUFLEN registers. They can be easily restored from BJ_ARGS.
101  */
102 #define BJ_COPF_PTR	SLJIT_SAVED_REG1
103 #define BJ_COPF_IDX	SLJIT_SAVED_REG3
104 
105 #ifdef _KERNEL
106 #define MAX_MEMWORDS BPF_MAX_MEMWORDS
107 #else
108 #define MAX_MEMWORDS BPF_MEMWORDS
109 #endif
110 
111 #define BJ_INIT_NOBITS  ((bpf_memword_init_t)0)
112 #define BJ_INIT_MBIT(k) BPF_MEMWORD_INIT(k)
113 #define BJ_INIT_ABIT    BJ_INIT_MBIT(MAX_MEMWORDS)
114 #define BJ_INIT_XBIT    BJ_INIT_MBIT(MAX_MEMWORDS + 1)
115 
116 /*
117  * Get a number of memwords and external memwords from a bpf_ctx object.
118  */
119 #define GET_EXTWORDS(bc) ((bc) ? (bc)->extwords : 0)
120 #define GET_MEMWORDS(bc) (GET_EXTWORDS(bc) ? GET_EXTWORDS(bc) : BPF_MEMWORDS)
121 
122 /*
123  * Optimization hints.
124  */
125 typedef unsigned int bpfjit_hint_t;
126 #define BJ_HINT_LDW  0x01 /* 32-bit packet read  */
127 #define BJ_HINT_IND  0x02 /* packet read at a variable offset */
128 #define BJ_HINT_COP  0x04 /* BPF_COP or BPF_COPX instruction  */
129 #define BJ_HINT_XREG 0x08 /* BJ_XREG is needed   */
130 #define BJ_HINT_LDX  0x10 /* BPF_LDX instruction */
131 
132 /*
133  * Datatype for Array Bounds Check Elimination (ABC) pass.
134  */
135 typedef uint64_t bpfjit_abc_length_t;
136 #define MAX_ABC_LENGTH (UINT32_MAX + UINT64_C(4)) /* max. width is 4 */
137 
138 struct bpfjit_stack
139 {
140 	bpf_ctx_t *ctx;
141 	uint32_t *extmem; /* pointer to external memory store */
142 #ifdef _KERNEL
143 	int err; /* 3rd argument for m_xword/m_xhalf/m_xbyte function call */
144 #endif
145 	uint32_t mem[BPF_MEMWORDS]; /* internal memory store */
146 };
147 
148 /*
149  * Data for BPF_JMP instruction.
150  * Forward declaration for struct bpfjit_jump.
151  */
152 struct bpfjit_jump_data;
153 
154 /*
155  * Node of bjumps list.
156  */
157 struct bpfjit_jump {
158 	struct sljit_jump *sjump;
159 	SLIST_ENTRY(bpfjit_jump) entries;
160 	struct bpfjit_jump_data *jdata;
161 };
162 
163 /*
164  * Data for BPF_JMP instruction.
165  */
166 struct bpfjit_jump_data {
167 	/*
168 	 * These entries make up bjumps list:
169 	 * jtf[0] - when coming from jt path,
170 	 * jtf[1] - when coming from jf path.
171 	 */
172 	struct bpfjit_jump jtf[2];
173 	/*
174 	 * Length calculated by Array Bounds Check Elimination (ABC) pass.
175 	 */
176 	bpfjit_abc_length_t abc_length;
177 	/*
178 	 * Length checked by the last out-of-bounds check.
179 	 */
180 	bpfjit_abc_length_t checked_length;
181 };
182 
183 /*
184  * Data for "read from packet" instructions.
185  * See also read_pkt_insn() function below.
186  */
187 struct bpfjit_read_pkt_data {
188 	/*
189 	 * Length calculated by Array Bounds Check Elimination (ABC) pass.
190 	 */
191 	bpfjit_abc_length_t abc_length;
192 	/*
193 	 * If positive, emit "if (buflen < check_length) return 0"
194 	 * out-of-bounds check.
195 	 * Values greater than UINT32_MAX generate unconditional "return 0".
196 	 */
197 	bpfjit_abc_length_t check_length;
198 };
199 
200 /*
201  * Additional (optimization-related) data for bpf_insn.
202  */
203 struct bpfjit_insn_data {
204 	/* List of jumps to this insn. */
205 	SLIST_HEAD(, bpfjit_jump) bjumps;
206 
207 	union {
208 		struct bpfjit_jump_data     jdata;
209 		struct bpfjit_read_pkt_data rdata;
210 	} u;
211 
212 	bpf_memword_init_t invalid;
213 	bool unreachable;
214 };
215 
216 #ifdef _KERNEL
217 
218 uint32_t m_xword(const struct mbuf *, uint32_t, int *);
219 uint32_t m_xhalf(const struct mbuf *, uint32_t, int *);
220 uint32_t m_xbyte(const struct mbuf *, uint32_t, int *);
221 
222 MODULE(MODULE_CLASS_MISC, bpfjit, "sljit")
223 
224 static int
225 bpfjit_modcmd(modcmd_t cmd, void *arg)
226 {
227 
228 	switch (cmd) {
229 	case MODULE_CMD_INIT:
230 		bpfjit_module_ops.bj_free_code = &bpfjit_free_code;
231 		membar_producer();
232 		bpfjit_module_ops.bj_generate_code = &bpfjit_generate_code;
233 		membar_producer();
234 		return 0;
235 
236 	case MODULE_CMD_FINI:
237 		return EOPNOTSUPP;
238 
239 	default:
240 		return ENOTTY;
241 	}
242 }
243 #endif
244 
245 /*
246  * Return a number of scratch registers to pass
247  * to sljit_emit_enter() function.
248  */
249 static sljit_si
250 nscratches(bpfjit_hint_t hints)
251 {
252 	sljit_si rv = 2;
253 
254 	if (hints & BJ_HINT_LDW)
255 		rv = 3; /* uses BJ_TMP2REG */
256 
257 	if (hints & BJ_HINT_COP)
258 		rv = 3; /* calls copfunc with three arguments */
259 
260 	if (hints & BJ_HINT_XREG)
261 		rv = 4; /* uses BJ_XREG */
262 
263 #ifdef _KERNEL
264 	if (hints & BJ_HINT_LDX)
265 		rv = 5; /* uses BJ_TMP3REG */
266 #endif
267 
268 	return rv;
269 }
270 
271 static uint32_t
272 read_width(const struct bpf_insn *pc)
273 {
274 
275 	switch (BPF_SIZE(pc->code)) {
276 	case BPF_W:
277 		return 4;
278 	case BPF_H:
279 		return 2;
280 	case BPF_B:
281 		return 1;
282 	default:
283 		BJ_ASSERT(false);
284 		return 0;
285 	}
286 }
287 
288 /*
289  * Copy buf and buflen members of bpf_args from BJ_ARGS
290  * pointer to BJ_BUF and BJ_BUFLEN registers.
291  */
292 static int
293 load_buf_buflen(struct sljit_compiler *compiler)
294 {
295 	int status;
296 
297 	status = sljit_emit_op1(compiler,
298 	    SLJIT_MOV_P,
299 	    BJ_BUF, 0,
300 	    SLJIT_MEM1(BJ_ARGS),
301 	    offsetof(struct bpf_args, pkt));
302 	if (status != SLJIT_SUCCESS)
303 		return status;
304 
305 	status = sljit_emit_op1(compiler,
306 	    SLJIT_MOV, /* size_t source */
307 	    BJ_BUFLEN, 0,
308 	    SLJIT_MEM1(BJ_ARGS),
309 	    offsetof(struct bpf_args, buflen));
310 
311 	return status;
312 }
313 
314 static bool
315 grow_jumps(struct sljit_jump ***jumps, size_t *size)
316 {
317 	struct sljit_jump **newptr;
318 	const size_t elemsz = sizeof(struct sljit_jump *);
319 	size_t old_size = *size;
320 	size_t new_size = 2 * old_size;
321 
322 	if (new_size < old_size || new_size > SIZE_MAX / elemsz)
323 		return false;
324 
325 	newptr = BJ_ALLOC(new_size * elemsz);
326 	if (newptr == NULL)
327 		return false;
328 
329 	memcpy(newptr, *jumps, old_size * elemsz);
330 	BJ_FREE(*jumps, old_size * elemsz);
331 
332 	*jumps = newptr;
333 	*size = new_size;
334 	return true;
335 }
336 
337 static bool
338 append_jump(struct sljit_jump *jump, struct sljit_jump ***jumps,
339     size_t *size, size_t *max_size)
340 {
341 	if (*size == *max_size && !grow_jumps(jumps, max_size))
342 		return false;
343 
344 	(*jumps)[(*size)++] = jump;
345 	return true;
346 }
347 
348 /*
349  * Generate code for BPF_LD+BPF_B+BPF_ABS    A <- P[k:1].
350  */
351 static int
352 emit_read8(struct sljit_compiler *compiler, uint32_t k)
353 {
354 
355 	return sljit_emit_op1(compiler,
356 	    SLJIT_MOV_UB,
357 	    BJ_AREG, 0,
358 	    SLJIT_MEM1(BJ_BUF), k);
359 }
360 
361 /*
362  * Generate code for BPF_LD+BPF_H+BPF_ABS    A <- P[k:2].
363  */
364 static int
365 emit_read16(struct sljit_compiler *compiler, uint32_t k)
366 {
367 	int status;
368 
369 	/* tmp1 = buf[k]; */
370 	status = sljit_emit_op1(compiler,
371 	    SLJIT_MOV_UB,
372 	    BJ_TMP1REG, 0,
373 	    SLJIT_MEM1(BJ_BUF), k);
374 	if (status != SLJIT_SUCCESS)
375 		return status;
376 
377 	/* A = buf[k+1]; */
378 	status = sljit_emit_op1(compiler,
379 	    SLJIT_MOV_UB,
380 	    BJ_AREG, 0,
381 	    SLJIT_MEM1(BJ_BUF), k+1);
382 	if (status != SLJIT_SUCCESS)
383 		return status;
384 
385 	/* tmp1 = tmp1 << 8; */
386 	status = sljit_emit_op2(compiler,
387 	    SLJIT_SHL,
388 	    BJ_TMP1REG, 0,
389 	    BJ_TMP1REG, 0,
390 	    SLJIT_IMM, 8);
391 	if (status != SLJIT_SUCCESS)
392 		return status;
393 
394 	/* A = A + tmp1; */
395 	status = sljit_emit_op2(compiler,
396 	    SLJIT_ADD,
397 	    BJ_AREG, 0,
398 	    BJ_AREG, 0,
399 	    BJ_TMP1REG, 0);
400 	return status;
401 }
402 
403 /*
404  * Generate code for BPF_LD+BPF_W+BPF_ABS    A <- P[k:4].
405  */
406 static int
407 emit_read32(struct sljit_compiler *compiler, uint32_t k)
408 {
409 	int status;
410 
411 	/* tmp1 = buf[k]; */
412 	status = sljit_emit_op1(compiler,
413 	    SLJIT_MOV_UB,
414 	    BJ_TMP1REG, 0,
415 	    SLJIT_MEM1(BJ_BUF), k);
416 	if (status != SLJIT_SUCCESS)
417 		return status;
418 
419 	/* tmp2 = buf[k+1]; */
420 	status = sljit_emit_op1(compiler,
421 	    SLJIT_MOV_UB,
422 	    BJ_TMP2REG, 0,
423 	    SLJIT_MEM1(BJ_BUF), k+1);
424 	if (status != SLJIT_SUCCESS)
425 		return status;
426 
427 	/* A = buf[k+3]; */
428 	status = sljit_emit_op1(compiler,
429 	    SLJIT_MOV_UB,
430 	    BJ_AREG, 0,
431 	    SLJIT_MEM1(BJ_BUF), k+3);
432 	if (status != SLJIT_SUCCESS)
433 		return status;
434 
435 	/* tmp1 = tmp1 << 24; */
436 	status = sljit_emit_op2(compiler,
437 	    SLJIT_SHL,
438 	    BJ_TMP1REG, 0,
439 	    BJ_TMP1REG, 0,
440 	    SLJIT_IMM, 24);
441 	if (status != SLJIT_SUCCESS)
442 		return status;
443 
444 	/* A = A + tmp1; */
445 	status = sljit_emit_op2(compiler,
446 	    SLJIT_ADD,
447 	    BJ_AREG, 0,
448 	    BJ_AREG, 0,
449 	    BJ_TMP1REG, 0);
450 	if (status != SLJIT_SUCCESS)
451 		return status;
452 
453 	/* tmp1 = buf[k+2]; */
454 	status = sljit_emit_op1(compiler,
455 	    SLJIT_MOV_UB,
456 	    BJ_TMP1REG, 0,
457 	    SLJIT_MEM1(BJ_BUF), k+2);
458 	if (status != SLJIT_SUCCESS)
459 		return status;
460 
461 	/* tmp2 = tmp2 << 16; */
462 	status = sljit_emit_op2(compiler,
463 	    SLJIT_SHL,
464 	    BJ_TMP2REG, 0,
465 	    BJ_TMP2REG, 0,
466 	    SLJIT_IMM, 16);
467 	if (status != SLJIT_SUCCESS)
468 		return status;
469 
470 	/* A = A + tmp2; */
471 	status = sljit_emit_op2(compiler,
472 	    SLJIT_ADD,
473 	    BJ_AREG, 0,
474 	    BJ_AREG, 0,
475 	    BJ_TMP2REG, 0);
476 	if (status != SLJIT_SUCCESS)
477 		return status;
478 
479 	/* tmp1 = tmp1 << 8; */
480 	status = sljit_emit_op2(compiler,
481 	    SLJIT_SHL,
482 	    BJ_TMP1REG, 0,
483 	    BJ_TMP1REG, 0,
484 	    SLJIT_IMM, 8);
485 	if (status != SLJIT_SUCCESS)
486 		return status;
487 
488 	/* A = A + tmp1; */
489 	status = sljit_emit_op2(compiler,
490 	    SLJIT_ADD,
491 	    BJ_AREG, 0,
492 	    BJ_AREG, 0,
493 	    BJ_TMP1REG, 0);
494 	return status;
495 }
496 
497 #ifdef _KERNEL
498 /*
499  * Generate m_xword/m_xhalf/m_xbyte call.
500  *
501  * pc is one of:
502  * BPF_LD+BPF_W+BPF_ABS    A <- P[k:4]
503  * BPF_LD+BPF_H+BPF_ABS    A <- P[k:2]
504  * BPF_LD+BPF_B+BPF_ABS    A <- P[k:1]
505  * BPF_LD+BPF_W+BPF_IND    A <- P[X+k:4]
506  * BPF_LD+BPF_H+BPF_IND    A <- P[X+k:2]
507  * BPF_LD+BPF_B+BPF_IND    A <- P[X+k:1]
508  * BPF_LDX+BPF_B+BPF_MSH   X <- 4*(P[k:1]&0xf)
509  *
510  * The dst variable should be
511  *  - BJ_AREG when emitting code for BPF_LD instructions,
512  *  - BJ_XREG or any of BJ_TMP[1-3]REG registers when emitting
513  *    code for BPF_MSH instruction.
514  */
515 static int
516 emit_xcall(struct sljit_compiler *compiler, const struct bpf_insn *pc,
517     int dst, sljit_sw dstw, struct sljit_jump **ret0_jump,
518     uint32_t (*fn)(const struct mbuf *, uint32_t, int *))
519 {
520 #if BJ_XREG == SLJIT_RETURN_REG   || \
521     BJ_XREG == SLJIT_SCRATCH_REG1 || \
522     BJ_XREG == SLJIT_SCRATCH_REG2 || \
523     BJ_XREG == SLJIT_SCRATCH_REG3
524 #error "Not supported assignment of registers."
525 #endif
526 	int status;
527 
528 	if (BPF_CLASS(pc->code) == BPF_LDX) {
529 		/* save A */
530 		status = sljit_emit_op1(compiler,
531 		    SLJIT_MOV,
532 		    BJ_TMP3REG, 0,
533 		    BJ_AREG, 0);
534 		if (status != SLJIT_SUCCESS)
535 			return status;
536 	}
537 
538 	/*
539 	 * Prepare registers for fn(buf, k, &err) call.
540 	 */
541 	status = sljit_emit_op1(compiler,
542 	    SLJIT_MOV,
543 	    SLJIT_SCRATCH_REG1, 0,
544 	    BJ_BUF, 0);
545 	if (status != SLJIT_SUCCESS)
546 		return status;
547 
548 	if (BPF_CLASS(pc->code) == BPF_LD && BPF_MODE(pc->code) == BPF_IND) {
549 		status = sljit_emit_op2(compiler,
550 		    SLJIT_ADD,
551 		    SLJIT_SCRATCH_REG2, 0,
552 		    BJ_XREG, 0,
553 		    SLJIT_IMM, (uint32_t)pc->k);
554 	} else {
555 		status = sljit_emit_op1(compiler,
556 		    SLJIT_MOV,
557 		    SLJIT_SCRATCH_REG2, 0,
558 		    SLJIT_IMM, (uint32_t)pc->k);
559 	}
560 
561 	if (status != SLJIT_SUCCESS)
562 		return status;
563 
564 	/*
565 	 * The third argument of fn is an address on stack.
566 	 */
567 	status = sljit_get_local_base(compiler,
568 	    SLJIT_SCRATCH_REG3, 0,
569 	    offsetof(struct bpfjit_stack, err));
570 	if (status != SLJIT_SUCCESS)
571 		return status;
572 
573 	/* fn(buf, k, &err); */
574 	status = sljit_emit_ijump(compiler,
575 	    SLJIT_CALL3,
576 	    SLJIT_IMM, SLJIT_FUNC_OFFSET(fn));
577 
578 	if (dst != SLJIT_RETURN_REG) {
579 		/* move return value to dst */
580 		status = sljit_emit_op1(compiler,
581 		    SLJIT_MOV,
582 		    dst, dstw,
583 		    SLJIT_RETURN_REG, 0);
584 		if (status != SLJIT_SUCCESS)
585 			return status;
586 	}
587 
588 	if (BPF_CLASS(pc->code) == BPF_LDX) {
589 		/* restore A */
590 		status = sljit_emit_op1(compiler,
591 		    SLJIT_MOV,
592 		    BJ_AREG, 0,
593 		    BJ_TMP3REG, 0);
594 		if (status != SLJIT_SUCCESS)
595 			return status;
596 	}
597 
598 	/* tmp3 = *err; */
599 	status = sljit_emit_op1(compiler,
600 	    SLJIT_MOV_UI,
601 	    SLJIT_SCRATCH_REG3, 0,
602 	    SLJIT_MEM1(SLJIT_LOCALS_REG),
603 	    offsetof(struct bpfjit_stack, err));
604 	if (status != SLJIT_SUCCESS)
605 		return status;
606 
607 	/* if (tmp3 != 0) return 0; */
608 	*ret0_jump = sljit_emit_cmp(compiler,
609 	    SLJIT_C_NOT_EQUAL,
610 	    SLJIT_SCRATCH_REG3, 0,
611 	    SLJIT_IMM, 0);
612 	if (*ret0_jump == NULL)
613 		return SLJIT_ERR_ALLOC_FAILED;
614 
615 	return status;
616 }
617 #endif
618 
619 /*
620  * Emit code for BPF_COP and BPF_COPX instructions.
621  */
622 static int
623 emit_cop(struct sljit_compiler *compiler, const bpf_ctx_t *bc,
624     const struct bpf_insn *pc, struct sljit_jump **ret0_jump)
625 {
626 #if BJ_XREG == SLJIT_RETURN_REG   || \
627     BJ_XREG == SLJIT_SCRATCH_REG1 || \
628     BJ_XREG == SLJIT_SCRATCH_REG2 || \
629     BJ_XREG == SLJIT_SCRATCH_REG3 || \
630     BJ_COPF_PTR == BJ_ARGS        || \
631     BJ_COPF_IDX	== BJ_ARGS
632 #error "Not supported assignment of registers."
633 #endif
634 
635 	struct sljit_jump *jump;
636 	int status;
637 
638 	jump = NULL;
639 
640 	BJ_ASSERT(bc != NULL && bc->copfuncs != NULL);
641 
642 	if (BPF_MISCOP(pc->code) == BPF_COPX) {
643 		/* if (X >= bc->nfuncs) return 0; */
644 		jump = sljit_emit_cmp(compiler,
645 		    SLJIT_C_GREATER_EQUAL,
646 		    BJ_XREG, 0,
647 		    SLJIT_IMM, bc->nfuncs);
648 		if (jump == NULL)
649 			return SLJIT_ERR_ALLOC_FAILED;
650 	}
651 
652 	if (jump != NULL)
653 		*ret0_jump = jump;
654 
655 	/*
656 	 * Copy bpf_copfunc_t arguments to registers.
657 	 */
658 #if BJ_AREG != SLJIT_SCRATCH_REG3
659 	status = sljit_emit_op1(compiler,
660 	    SLJIT_MOV_UI,
661 	    SLJIT_SCRATCH_REG3, 0,
662 	    BJ_AREG, 0);
663 	if (status != SLJIT_SUCCESS)
664 		return status;
665 #endif
666 
667 	status = sljit_emit_op1(compiler,
668 	    SLJIT_MOV_P,
669 	    SLJIT_SCRATCH_REG1, 0,
670 	    SLJIT_MEM1(SLJIT_LOCALS_REG),
671 	    offsetof(struct bpfjit_stack, ctx));
672 	if (status != SLJIT_SUCCESS)
673 		return status;
674 
675 	status = sljit_emit_op1(compiler,
676 	    SLJIT_MOV_P,
677 	    SLJIT_SCRATCH_REG2, 0,
678 	    BJ_ARGS, 0);
679 	if (status != SLJIT_SUCCESS)
680 		return status;
681 
682 	if (BPF_MISCOP(pc->code) == BPF_COP) {
683 		status = sljit_emit_ijump(compiler,
684 		    SLJIT_CALL3,
685 		    SLJIT_IMM, SLJIT_FUNC_OFFSET(bc->copfuncs[pc->k]));
686 		if (status != SLJIT_SUCCESS)
687 			return status;
688 	} else if (BPF_MISCOP(pc->code) == BPF_COPX) {
689 		/* load ctx->copfuncs */
690 		status = sljit_emit_op1(compiler,
691 		    SLJIT_MOV_P,
692 		    BJ_COPF_PTR, 0,
693 		    SLJIT_MEM1(SLJIT_SCRATCH_REG1),
694 		    offsetof(struct bpf_ctx, copfuncs));
695 		if (status != SLJIT_SUCCESS)
696 			return status;
697 
698 		/*
699 		 * Load X to a register that can be used for
700 		 * memory addressing.
701 		 */
702 		status = sljit_emit_op1(compiler,
703 		    SLJIT_MOV,
704 		    BJ_COPF_IDX, 0,
705 		    BJ_XREG, 0);
706 		if (status != SLJIT_SUCCESS)
707 			return status;
708 
709 		status = sljit_emit_ijump(compiler,
710 		    SLJIT_CALL3,
711 		    SLJIT_MEM2(BJ_COPF_PTR, BJ_COPF_IDX),
712 		    SLJIT_WORD_SHIFT);
713 		if (status != SLJIT_SUCCESS)
714 			return status;
715 
716 		status = load_buf_buflen(compiler);
717 		if (status != SLJIT_SUCCESS)
718 			return status;
719 	}
720 
721 #if BJ_AREG != SLJIT_RETURN_REG
722 	status = sljit_emit_op1(compiler,
723 	    SLJIT_MOV,
724 	    BJ_AREG, 0,
725 	    SLJIT_RETURN_REG, 0);
726 	if (status != SLJIT_SUCCESS)
727 		return status;
728 #endif
729 
730 	return status;
731 }
732 
733 /*
734  * Generate code for
735  * BPF_LD+BPF_W+BPF_ABS    A <- P[k:4]
736  * BPF_LD+BPF_H+BPF_ABS    A <- P[k:2]
737  * BPF_LD+BPF_B+BPF_ABS    A <- P[k:1]
738  * BPF_LD+BPF_W+BPF_IND    A <- P[X+k:4]
739  * BPF_LD+BPF_H+BPF_IND    A <- P[X+k:2]
740  * BPF_LD+BPF_B+BPF_IND    A <- P[X+k:1]
741  */
742 static int
743 emit_pkt_read(struct sljit_compiler *compiler,
744     const struct bpf_insn *pc, struct sljit_jump *to_mchain_jump,
745     struct sljit_jump ***ret0, size_t *ret0_size, size_t *ret0_maxsize)
746 {
747 	int status = 0; /* XXX gcc 4.1 */
748 	uint32_t width;
749 	struct sljit_jump *jump;
750 #ifdef _KERNEL
751 	struct sljit_label *label;
752 	struct sljit_jump *over_mchain_jump;
753 	const bool check_zero_buflen = (to_mchain_jump != NULL);
754 #endif
755 	const uint32_t k = pc->k;
756 
757 #ifdef _KERNEL
758 	if (to_mchain_jump == NULL) {
759 		to_mchain_jump = sljit_emit_cmp(compiler,
760 		    SLJIT_C_EQUAL,
761 		    BJ_BUFLEN, 0,
762 		    SLJIT_IMM, 0);
763 		if (to_mchain_jump == NULL)
764 			return SLJIT_ERR_ALLOC_FAILED;
765 	}
766 #endif
767 
768 	width = read_width(pc);
769 
770 	if (BPF_MODE(pc->code) == BPF_IND) {
771 		/* tmp1 = buflen - (pc->k + width); */
772 		status = sljit_emit_op2(compiler,
773 		    SLJIT_SUB,
774 		    BJ_TMP1REG, 0,
775 		    BJ_BUFLEN, 0,
776 		    SLJIT_IMM, k + width);
777 		if (status != SLJIT_SUCCESS)
778 			return status;
779 
780 		/* buf += X; */
781 		status = sljit_emit_op2(compiler,
782 		    SLJIT_ADD,
783 		    BJ_BUF, 0,
784 		    BJ_BUF, 0,
785 		    BJ_XREG, 0);
786 		if (status != SLJIT_SUCCESS)
787 			return status;
788 
789 		/* if (tmp1 < X) return 0; */
790 		jump = sljit_emit_cmp(compiler,
791 		    SLJIT_C_LESS,
792 		    BJ_TMP1REG, 0,
793 		    BJ_XREG, 0);
794 		if (jump == NULL)
795 			return SLJIT_ERR_ALLOC_FAILED;
796 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
797 			return SLJIT_ERR_ALLOC_FAILED;
798 	}
799 
800 	switch (width) {
801 	case 4:
802 		status = emit_read32(compiler, k);
803 		break;
804 	case 2:
805 		status = emit_read16(compiler, k);
806 		break;
807 	case 1:
808 		status = emit_read8(compiler, k);
809 		break;
810 	}
811 
812 	if (status != SLJIT_SUCCESS)
813 		return status;
814 
815 	if (BPF_MODE(pc->code) == BPF_IND) {
816 		/* buf -= X; */
817 		status = sljit_emit_op2(compiler,
818 		    SLJIT_SUB,
819 		    BJ_BUF, 0,
820 		    BJ_BUF, 0,
821 		    BJ_XREG, 0);
822 		if (status != SLJIT_SUCCESS)
823 			return status;
824 	}
825 
826 #ifdef _KERNEL
827 	over_mchain_jump = sljit_emit_jump(compiler, SLJIT_JUMP);
828 	if (over_mchain_jump == NULL)
829 		return SLJIT_ERR_ALLOC_FAILED;
830 
831 	/* entry point to mchain handler */
832 	label = sljit_emit_label(compiler);
833 	if (label == NULL)
834 		return SLJIT_ERR_ALLOC_FAILED;
835 	sljit_set_label(to_mchain_jump, label);
836 
837 	if (check_zero_buflen) {
838 		/* if (buflen != 0) return 0; */
839 		jump = sljit_emit_cmp(compiler,
840 		    SLJIT_C_NOT_EQUAL,
841 		    BJ_BUFLEN, 0,
842 		    SLJIT_IMM, 0);
843 		if (jump == NULL)
844 			return SLJIT_ERR_ALLOC_FAILED;
845 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
846 			return SLJIT_ERR_ALLOC_FAILED;
847 	}
848 
849 	switch (width) {
850 	case 4:
851 		status = emit_xcall(compiler, pc, BJ_AREG, 0, &jump, &m_xword);
852 		break;
853 	case 2:
854 		status = emit_xcall(compiler, pc, BJ_AREG, 0, &jump, &m_xhalf);
855 		break;
856 	case 1:
857 		status = emit_xcall(compiler, pc, BJ_AREG, 0, &jump, &m_xbyte);
858 		break;
859 	}
860 
861 	if (status != SLJIT_SUCCESS)
862 		return status;
863 
864 	if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
865 		return SLJIT_ERR_ALLOC_FAILED;
866 
867 	label = sljit_emit_label(compiler);
868 	if (label == NULL)
869 		return SLJIT_ERR_ALLOC_FAILED;
870 	sljit_set_label(over_mchain_jump, label);
871 #endif
872 
873 	return status;
874 }
875 
876 static int
877 emit_memload(struct sljit_compiler *compiler,
878     sljit_si dst, uint32_t k, size_t extwords)
879 {
880 	int status;
881 	sljit_si src;
882 	sljit_sw srcw;
883 
884 	srcw = k * sizeof(uint32_t);
885 
886 	if (extwords == 0) {
887 		src = SLJIT_MEM1(SLJIT_LOCALS_REG);
888 		srcw += offsetof(struct bpfjit_stack, mem);
889 	} else {
890 		/* copy extmem pointer to the tmp1 register */
891 		status = sljit_emit_op1(compiler,
892 		    SLJIT_MOV_P,
893 		    BJ_TMP1REG, 0,
894 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
895 		    offsetof(struct bpfjit_stack, extmem));
896 		if (status != SLJIT_SUCCESS)
897 			return status;
898 		src = SLJIT_MEM1(BJ_TMP1REG);
899 	}
900 
901 	return sljit_emit_op1(compiler, SLJIT_MOV_UI, dst, 0, src, srcw);
902 }
903 
904 static int
905 emit_memstore(struct sljit_compiler *compiler,
906     sljit_si src, uint32_t k, size_t extwords)
907 {
908 	int status;
909 	sljit_si dst;
910 	sljit_sw dstw;
911 
912 	dstw = k * sizeof(uint32_t);
913 
914 	if (extwords == 0) {
915 		dst = SLJIT_MEM1(SLJIT_LOCALS_REG);
916 		dstw += offsetof(struct bpfjit_stack, mem);
917 	} else {
918 		/* copy extmem pointer to the tmp1 register */
919 		status = sljit_emit_op1(compiler,
920 		    SLJIT_MOV_P,
921 		    BJ_TMP1REG, 0,
922 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
923 		    offsetof(struct bpfjit_stack, extmem));
924 		if (status != SLJIT_SUCCESS)
925 			return status;
926 		dst = SLJIT_MEM1(BJ_TMP1REG);
927 	}
928 
929 	return sljit_emit_op1(compiler, SLJIT_MOV_UI, dst, dstw, src, 0);
930 }
931 
932 /*
933  * Generate code for BPF_LDX+BPF_B+BPF_MSH    X <- 4*(P[k:1]&0xf).
934  */
935 static int
936 emit_msh(struct sljit_compiler *compiler,
937     const struct bpf_insn *pc, struct sljit_jump *to_mchain_jump,
938     struct sljit_jump ***ret0, size_t *ret0_size, size_t *ret0_maxsize)
939 {
940 	int status;
941 #ifdef _KERNEL
942 	struct sljit_label *label;
943 	struct sljit_jump *jump, *over_mchain_jump;
944 	const bool check_zero_buflen = (to_mchain_jump != NULL);
945 #endif
946 	const uint32_t k = pc->k;
947 
948 #ifdef _KERNEL
949 	if (to_mchain_jump == NULL) {
950 		to_mchain_jump = sljit_emit_cmp(compiler,
951 		    SLJIT_C_EQUAL,
952 		    BJ_BUFLEN, 0,
953 		    SLJIT_IMM, 0);
954 		if (to_mchain_jump == NULL)
955 			return SLJIT_ERR_ALLOC_FAILED;
956 	}
957 #endif
958 
959 	/* tmp1 = buf[k] */
960 	status = sljit_emit_op1(compiler,
961 	    SLJIT_MOV_UB,
962 	    BJ_TMP1REG, 0,
963 	    SLJIT_MEM1(BJ_BUF), k);
964 	if (status != SLJIT_SUCCESS)
965 		return status;
966 
967 	/* tmp1 &= 0xf */
968 	status = sljit_emit_op2(compiler,
969 	    SLJIT_AND,
970 	    BJ_TMP1REG, 0,
971 	    BJ_TMP1REG, 0,
972 	    SLJIT_IMM, 0xf);
973 	if (status != SLJIT_SUCCESS)
974 		return status;
975 
976 	/* tmp1 = tmp1 << 2 */
977 	status = sljit_emit_op2(compiler,
978 	    SLJIT_SHL,
979 	    BJ_XREG, 0,
980 	    BJ_TMP1REG, 0,
981 	    SLJIT_IMM, 2);
982 	if (status != SLJIT_SUCCESS)
983 		return status;
984 
985 #ifdef _KERNEL
986 	over_mchain_jump = sljit_emit_jump(compiler, SLJIT_JUMP);
987 	if (over_mchain_jump == NULL)
988 		return SLJIT_ERR_ALLOC_FAILED;
989 
990 	/* entry point to mchain handler */
991 	label = sljit_emit_label(compiler);
992 	if (label == NULL)
993 		return SLJIT_ERR_ALLOC_FAILED;
994 	sljit_set_label(to_mchain_jump, label);
995 
996 	if (check_zero_buflen) {
997 		/* if (buflen != 0) return 0; */
998 		jump = sljit_emit_cmp(compiler,
999 		    SLJIT_C_NOT_EQUAL,
1000 		    BJ_BUFLEN, 0,
1001 		    SLJIT_IMM, 0);
1002 		if (jump == NULL)
1003 			return SLJIT_ERR_ALLOC_FAILED;
1004 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
1005 			return SLJIT_ERR_ALLOC_FAILED;
1006 	}
1007 
1008 	status = emit_xcall(compiler, pc, BJ_TMP1REG, 0, &jump, &m_xbyte);
1009 	if (status != SLJIT_SUCCESS)
1010 		return status;
1011 
1012 	if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
1013 		return SLJIT_ERR_ALLOC_FAILED;
1014 
1015 	/* tmp1 &= 0xf */
1016 	status = sljit_emit_op2(compiler,
1017 	    SLJIT_AND,
1018 	    BJ_TMP1REG, 0,
1019 	    BJ_TMP1REG, 0,
1020 	    SLJIT_IMM, 0xf);
1021 	if (status != SLJIT_SUCCESS)
1022 		return status;
1023 
1024 	/* tmp1 = tmp1 << 2 */
1025 	status = sljit_emit_op2(compiler,
1026 	    SLJIT_SHL,
1027 	    BJ_XREG, 0,
1028 	    BJ_TMP1REG, 0,
1029 	    SLJIT_IMM, 2);
1030 	if (status != SLJIT_SUCCESS)
1031 		return status;
1032 
1033 
1034 	label = sljit_emit_label(compiler);
1035 	if (label == NULL)
1036 		return SLJIT_ERR_ALLOC_FAILED;
1037 	sljit_set_label(over_mchain_jump, label);
1038 #endif
1039 
1040 	return status;
1041 }
1042 
1043 static int
1044 emit_pow2_division(struct sljit_compiler *compiler, uint32_t k)
1045 {
1046 	int shift = 0;
1047 	int status = SLJIT_SUCCESS;
1048 
1049 	while (k > 1) {
1050 		k >>= 1;
1051 		shift++;
1052 	}
1053 
1054 	BJ_ASSERT(k == 1 && shift < 32);
1055 
1056 	if (shift != 0) {
1057 		status = sljit_emit_op2(compiler,
1058 		    SLJIT_LSHR|SLJIT_INT_OP,
1059 		    BJ_AREG, 0,
1060 		    BJ_AREG, 0,
1061 		    SLJIT_IMM, shift);
1062 	}
1063 
1064 	return status;
1065 }
1066 
1067 #if !defined(BPFJIT_USE_UDIV)
1068 static sljit_uw
1069 divide(sljit_uw x, sljit_uw y)
1070 {
1071 
1072 	return (uint32_t)x / (uint32_t)y;
1073 }
1074 #endif
1075 
1076 /*
1077  * Generate A = A / div.
1078  * divt,divw are either SLJIT_IMM,pc->k or BJ_XREG,0.
1079  */
1080 static int
1081 emit_division(struct sljit_compiler *compiler, int divt, sljit_sw divw)
1082 {
1083 	int status;
1084 
1085 #if BJ_XREG == SLJIT_RETURN_REG   || \
1086     BJ_XREG == SLJIT_SCRATCH_REG1 || \
1087     BJ_XREG == SLJIT_SCRATCH_REG2 || \
1088     BJ_AREG == SLJIT_SCRATCH_REG2
1089 #error "Not supported assignment of registers."
1090 #endif
1091 
1092 #if BJ_AREG != SLJIT_SCRATCH_REG1
1093 	status = sljit_emit_op1(compiler,
1094 	    SLJIT_MOV,
1095 	    SLJIT_SCRATCH_REG1, 0,
1096 	    BJ_AREG, 0);
1097 	if (status != SLJIT_SUCCESS)
1098 		return status;
1099 #endif
1100 
1101 	status = sljit_emit_op1(compiler,
1102 	    SLJIT_MOV,
1103 	    SLJIT_SCRATCH_REG2, 0,
1104 	    divt, divw);
1105 	if (status != SLJIT_SUCCESS)
1106 		return status;
1107 
1108 #if defined(BPFJIT_USE_UDIV)
1109 	status = sljit_emit_op0(compiler, SLJIT_UDIV|SLJIT_INT_OP);
1110 
1111 #if BJ_AREG != SLJIT_SCRATCH_REG1
1112 	status = sljit_emit_op1(compiler,
1113 	    SLJIT_MOV,
1114 	    BJ_AREG, 0,
1115 	    SLJIT_SCRATCH_REG1, 0);
1116 	if (status != SLJIT_SUCCESS)
1117 		return status;
1118 #endif
1119 #else
1120 	status = sljit_emit_ijump(compiler,
1121 	    SLJIT_CALL2,
1122 	    SLJIT_IMM, SLJIT_FUNC_OFFSET(divide));
1123 
1124 #if BJ_AREG != SLJIT_RETURN_REG
1125 	status = sljit_emit_op1(compiler,
1126 	    SLJIT_MOV,
1127 	    BJ_AREG, 0,
1128 	    SLJIT_RETURN_REG, 0);
1129 	if (status != SLJIT_SUCCESS)
1130 		return status;
1131 #endif
1132 #endif
1133 
1134 	return status;
1135 }
1136 
1137 /*
1138  * Return true if pc is a "read from packet" instruction.
1139  * If length is not NULL and return value is true, *length will
1140  * be set to a safe length required to read a packet.
1141  */
1142 static bool
1143 read_pkt_insn(const struct bpf_insn *pc, bpfjit_abc_length_t *length)
1144 {
1145 	bool rv;
1146 	bpfjit_abc_length_t width;
1147 
1148 	switch (BPF_CLASS(pc->code)) {
1149 	default:
1150 		rv = false;
1151 		break;
1152 
1153 	case BPF_LD:
1154 		rv = BPF_MODE(pc->code) == BPF_ABS ||
1155 		     BPF_MODE(pc->code) == BPF_IND;
1156 		if (rv)
1157 			width = read_width(pc);
1158 		break;
1159 
1160 	case BPF_LDX:
1161 		rv = pc->code == (BPF_LDX|BPF_B|BPF_MSH);
1162 		width = 1;
1163 		break;
1164 	}
1165 
1166 	if (rv && length != NULL) {
1167 		/*
1168 		 * Values greater than UINT32_MAX will generate
1169 		 * unconditional "return 0".
1170 		 */
1171 		*length = (uint32_t)pc->k + width;
1172 	}
1173 
1174 	return rv;
1175 }
1176 
1177 static void
1178 optimize_init(struct bpfjit_insn_data *insn_dat, size_t insn_count)
1179 {
1180 	size_t i;
1181 
1182 	for (i = 0; i < insn_count; i++) {
1183 		SLIST_INIT(&insn_dat[i].bjumps);
1184 		insn_dat[i].invalid = BJ_INIT_NOBITS;
1185 	}
1186 }
1187 
1188 /*
1189  * The function divides instructions into blocks. Destination of a jump
1190  * instruction starts a new block. BPF_RET and BPF_JMP instructions
1191  * terminate a block. Blocks are linear, that is, there are no jumps out
1192  * from the middle of a block and there are no jumps in to the middle of
1193  * a block.
1194  *
1195  * The function also sets bits in *initmask for memwords that
1196  * need to be initialized to zero. Note that this set should be empty
1197  * for any valid kernel filter program.
1198  */
1199 static bool
1200 optimize_pass1(const bpf_ctx_t *bc, const struct bpf_insn *insns,
1201     struct bpfjit_insn_data *insn_dat, size_t insn_count,
1202     bpf_memword_init_t *initmask, bpfjit_hint_t *hints)
1203 {
1204 	struct bpfjit_jump *jtf;
1205 	size_t i;
1206 	uint32_t jt, jf;
1207 	bpfjit_abc_length_t length;
1208 	bpf_memword_init_t invalid; /* borrowed from bpf_filter() */
1209 	bool unreachable;
1210 
1211 	const size_t memwords = GET_MEMWORDS(bc);
1212 
1213 	*hints = 0;
1214 	*initmask = BJ_INIT_NOBITS;
1215 
1216 	unreachable = false;
1217 	invalid = ~BJ_INIT_NOBITS;
1218 
1219 	for (i = 0; i < insn_count; i++) {
1220 		if (!SLIST_EMPTY(&insn_dat[i].bjumps))
1221 			unreachable = false;
1222 		insn_dat[i].unreachable = unreachable;
1223 
1224 		if (unreachable)
1225 			continue;
1226 
1227 		invalid |= insn_dat[i].invalid;
1228 
1229 		if (read_pkt_insn(&insns[i], &length) && length > UINT32_MAX)
1230 			unreachable = true;
1231 
1232 		switch (BPF_CLASS(insns[i].code)) {
1233 		case BPF_RET:
1234 			if (BPF_RVAL(insns[i].code) == BPF_A)
1235 				*initmask |= invalid & BJ_INIT_ABIT;
1236 
1237 			unreachable = true;
1238 			continue;
1239 
1240 		case BPF_LD:
1241 			if ((BPF_MODE(insns[i].code) == BPF_IND ||
1242 			    BPF_MODE(insns[i].code) == BPF_ABS) &&
1243 			    read_width(&insns[i]) == 4) {
1244 				*hints |= BJ_HINT_LDW;
1245 			}
1246 
1247 			if (BPF_MODE(insns[i].code) == BPF_IND) {
1248 				*hints |= BJ_HINT_XREG | BJ_HINT_IND;
1249 				*initmask |= invalid & BJ_INIT_XBIT;
1250 			}
1251 
1252 			if (BPF_MODE(insns[i].code) == BPF_MEM &&
1253 			    (uint32_t)insns[i].k < memwords) {
1254 				*initmask |= invalid & BJ_INIT_MBIT(insns[i].k);
1255 			}
1256 
1257 			invalid &= ~BJ_INIT_ABIT;
1258 			continue;
1259 
1260 		case BPF_LDX:
1261 			*hints |= BJ_HINT_XREG | BJ_HINT_LDX;
1262 
1263 			if (BPF_MODE(insns[i].code) == BPF_MEM &&
1264 			    (uint32_t)insns[i].k < memwords) {
1265 				*initmask |= invalid & BJ_INIT_MBIT(insns[i].k);
1266 			}
1267 
1268 			invalid &= ~BJ_INIT_XBIT;
1269 			continue;
1270 
1271 		case BPF_ST:
1272 			*initmask |= invalid & BJ_INIT_ABIT;
1273 
1274 			if ((uint32_t)insns[i].k < memwords)
1275 				invalid &= ~BJ_INIT_MBIT(insns[i].k);
1276 
1277 			continue;
1278 
1279 		case BPF_STX:
1280 			*hints |= BJ_HINT_XREG;
1281 			*initmask |= invalid & BJ_INIT_XBIT;
1282 
1283 			if ((uint32_t)insns[i].k < memwords)
1284 				invalid &= ~BJ_INIT_MBIT(insns[i].k);
1285 
1286 			continue;
1287 
1288 		case BPF_ALU:
1289 			*initmask |= invalid & BJ_INIT_ABIT;
1290 
1291 			if (insns[i].code != (BPF_ALU|BPF_NEG) &&
1292 			    BPF_SRC(insns[i].code) == BPF_X) {
1293 				*hints |= BJ_HINT_XREG;
1294 				*initmask |= invalid & BJ_INIT_XBIT;
1295 			}
1296 
1297 			invalid &= ~BJ_INIT_ABIT;
1298 			continue;
1299 
1300 		case BPF_MISC:
1301 			switch (BPF_MISCOP(insns[i].code)) {
1302 			case BPF_TAX: // X <- A
1303 				*hints |= BJ_HINT_XREG;
1304 				*initmask |= invalid & BJ_INIT_ABIT;
1305 				invalid &= ~BJ_INIT_XBIT;
1306 				continue;
1307 
1308 			case BPF_TXA: // A <- X
1309 				*hints |= BJ_HINT_XREG;
1310 				*initmask |= invalid & BJ_INIT_XBIT;
1311 				invalid &= ~BJ_INIT_ABIT;
1312 				continue;
1313 
1314 			case BPF_COPX:
1315 				*hints |= BJ_HINT_XREG;
1316 				/* FALLTHROUGH */
1317 
1318 			case BPF_COP:
1319 				*hints |= BJ_HINT_COP;
1320 				*initmask |= invalid & BJ_INIT_ABIT;
1321 				invalid &= ~BJ_INIT_ABIT;
1322 				continue;
1323 			}
1324 
1325 			continue;
1326 
1327 		case BPF_JMP:
1328 			/* Initialize abc_length for ABC pass. */
1329 			insn_dat[i].u.jdata.abc_length = MAX_ABC_LENGTH;
1330 
1331 			if (BPF_OP(insns[i].code) == BPF_JA) {
1332 				jt = jf = insns[i].k;
1333 			} else {
1334 				jt = insns[i].jt;
1335 				jf = insns[i].jf;
1336 			}
1337 
1338 			if (jt >= insn_count - (i + 1) ||
1339 			    jf >= insn_count - (i + 1)) {
1340 				return false;
1341 			}
1342 
1343 			if (jt > 0 && jf > 0)
1344 				unreachable = true;
1345 
1346 			jt += i + 1;
1347 			jf += i + 1;
1348 
1349 			jtf = insn_dat[i].u.jdata.jtf;
1350 
1351 			jtf[0].sjump = NULL;
1352 			jtf[0].jdata = &insn_dat[i].u.jdata;
1353 			SLIST_INSERT_HEAD(&insn_dat[jt].bjumps,
1354 			    &jtf[0], entries);
1355 
1356 			if (jf != jt) {
1357 				jtf[1].sjump = NULL;
1358 				jtf[1].jdata = &insn_dat[i].u.jdata;
1359 				SLIST_INSERT_HEAD(&insn_dat[jf].bjumps,
1360 				    &jtf[1], entries);
1361 			}
1362 
1363 			insn_dat[jf].invalid |= invalid;
1364 			insn_dat[jt].invalid |= invalid;
1365 			invalid = 0;
1366 
1367 			continue;
1368 		}
1369 	}
1370 
1371 	return true;
1372 }
1373 
1374 /*
1375  * Array Bounds Check Elimination (ABC) pass.
1376  */
1377 static void
1378 optimize_pass2(const bpf_ctx_t *bc, const struct bpf_insn *insns,
1379     struct bpfjit_insn_data *insn_dat, size_t insn_count)
1380 {
1381 	struct bpfjit_jump *jmp;
1382 	const struct bpf_insn *pc;
1383 	struct bpfjit_insn_data *pd;
1384 	size_t i;
1385 	bpfjit_abc_length_t length, abc_length = 0;
1386 
1387 	const size_t extwords = GET_EXTWORDS(bc);
1388 
1389 	for (i = insn_count; i != 0; i--) {
1390 		pc = &insns[i-1];
1391 		pd = &insn_dat[i-1];
1392 
1393 		if (pd->unreachable)
1394 			continue;
1395 
1396 		switch (BPF_CLASS(pc->code)) {
1397 		case BPF_RET:
1398 			/*
1399 			 * It's quite common for bpf programs to
1400 			 * check packet bytes in increasing order
1401 			 * and return zero if bytes don't match
1402 			 * specified critetion. Such programs disable
1403 			 * ABC optimization completely because for
1404 			 * every jump there is a branch with no read
1405 			 * instruction.
1406 			 * With no side effects, BPF_STMT(BPF_RET+BPF_K, 0)
1407 			 * is indistinguishable from out-of-bound load.
1408 			 * Therefore, abc_length can be set to
1409 			 * MAX_ABC_LENGTH and enable ABC for many
1410 			 * bpf programs.
1411 			 * If this optimization encounters any
1412 			 * instruction with a side effect, it will
1413 			 * reset abc_length.
1414 			 */
1415 			if (BPF_RVAL(pc->code) == BPF_K && pc->k == 0)
1416 				abc_length = MAX_ABC_LENGTH;
1417 			else
1418 				abc_length = 0;
1419 			break;
1420 
1421 		case BPF_MISC:
1422 			if (BPF_MISCOP(pc->code) == BPF_COP ||
1423 			    BPF_MISCOP(pc->code) == BPF_COPX) {
1424 				/* COP instructions can have side effects. */
1425 				abc_length = 0;
1426 			}
1427 			break;
1428 
1429 		case BPF_ST:
1430 		case BPF_STX:
1431 			if (extwords != 0) {
1432 				/* Write to memory is visible after a call. */
1433 				abc_length = 0;
1434 			}
1435 			break;
1436 
1437 		case BPF_JMP:
1438 			abc_length = pd->u.jdata.abc_length;
1439 			break;
1440 
1441 		default:
1442 			if (read_pkt_insn(pc, &length)) {
1443 				if (abc_length < length)
1444 					abc_length = length;
1445 				pd->u.rdata.abc_length = abc_length;
1446 			}
1447 			break;
1448 		}
1449 
1450 		SLIST_FOREACH(jmp, &pd->bjumps, entries) {
1451 			if (jmp->jdata->abc_length > abc_length)
1452 				jmp->jdata->abc_length = abc_length;
1453 		}
1454 	}
1455 }
1456 
1457 static void
1458 optimize_pass3(const struct bpf_insn *insns,
1459     struct bpfjit_insn_data *insn_dat, size_t insn_count)
1460 {
1461 	struct bpfjit_jump *jmp;
1462 	size_t i;
1463 	bpfjit_abc_length_t checked_length = 0;
1464 
1465 	for (i = 0; i < insn_count; i++) {
1466 		if (insn_dat[i].unreachable)
1467 			continue;
1468 
1469 		SLIST_FOREACH(jmp, &insn_dat[i].bjumps, entries) {
1470 			if (jmp->jdata->checked_length < checked_length)
1471 				checked_length = jmp->jdata->checked_length;
1472 		}
1473 
1474 		if (BPF_CLASS(insns[i].code) == BPF_JMP) {
1475 			insn_dat[i].u.jdata.checked_length = checked_length;
1476 		} else if (read_pkt_insn(&insns[i], NULL)) {
1477 			struct bpfjit_read_pkt_data *rdata =
1478 			    &insn_dat[i].u.rdata;
1479 			rdata->check_length = 0;
1480 			if (checked_length < rdata->abc_length) {
1481 				checked_length = rdata->abc_length;
1482 				rdata->check_length = checked_length;
1483 			}
1484 		}
1485 	}
1486 }
1487 
1488 static bool
1489 optimize(const bpf_ctx_t *bc, const struct bpf_insn *insns,
1490     struct bpfjit_insn_data *insn_dat, size_t insn_count,
1491     bpf_memword_init_t *initmask, bpfjit_hint_t *hints)
1492 {
1493 
1494 	optimize_init(insn_dat, insn_count);
1495 
1496 	if (!optimize_pass1(bc, insns, insn_dat, insn_count, initmask, hints))
1497 		return false;
1498 
1499 	optimize_pass2(bc, insns, insn_dat, insn_count);
1500 	optimize_pass3(insns, insn_dat, insn_count);
1501 
1502 	return true;
1503 }
1504 
1505 /*
1506  * Convert BPF_ALU operations except BPF_NEG and BPF_DIV to sljit operation.
1507  */
1508 static int
1509 bpf_alu_to_sljit_op(const struct bpf_insn *pc)
1510 {
1511 
1512 	/*
1513 	 * Note: all supported 64bit arches have 32bit multiply
1514 	 * instruction so SLJIT_INT_OP doesn't have any overhead.
1515 	 */
1516 	switch (BPF_OP(pc->code)) {
1517 	case BPF_ADD: return SLJIT_ADD;
1518 	case BPF_SUB: return SLJIT_SUB;
1519 	case BPF_MUL: return SLJIT_MUL|SLJIT_INT_OP;
1520 	case BPF_OR:  return SLJIT_OR;
1521 	case BPF_AND: return SLJIT_AND;
1522 	case BPF_LSH: return SLJIT_SHL;
1523 	case BPF_RSH: return SLJIT_LSHR|SLJIT_INT_OP;
1524 	default:
1525 		BJ_ASSERT(false);
1526 		return 0;
1527 	}
1528 }
1529 
1530 /*
1531  * Convert BPF_JMP operations except BPF_JA to sljit condition.
1532  */
1533 static int
1534 bpf_jmp_to_sljit_cond(const struct bpf_insn *pc, bool negate)
1535 {
1536 	/*
1537 	 * Note: all supported 64bit arches have 32bit comparison
1538 	 * instructions so SLJIT_INT_OP doesn't have any overhead.
1539 	 */
1540 	int rv = SLJIT_INT_OP;
1541 
1542 	switch (BPF_OP(pc->code)) {
1543 	case BPF_JGT:
1544 		rv |= negate ? SLJIT_C_LESS_EQUAL : SLJIT_C_GREATER;
1545 		break;
1546 	case BPF_JGE:
1547 		rv |= negate ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL;
1548 		break;
1549 	case BPF_JEQ:
1550 		rv |= negate ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL;
1551 		break;
1552 	case BPF_JSET:
1553 		rv |= negate ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL;
1554 		break;
1555 	default:
1556 		BJ_ASSERT(false);
1557 	}
1558 
1559 	return rv;
1560 }
1561 
1562 /*
1563  * Convert BPF_K and BPF_X to sljit register.
1564  */
1565 static int
1566 kx_to_reg(const struct bpf_insn *pc)
1567 {
1568 
1569 	switch (BPF_SRC(pc->code)) {
1570 	case BPF_K: return SLJIT_IMM;
1571 	case BPF_X: return BJ_XREG;
1572 	default:
1573 		BJ_ASSERT(false);
1574 		return 0;
1575 	}
1576 }
1577 
1578 static sljit_sw
1579 kx_to_reg_arg(const struct bpf_insn *pc)
1580 {
1581 
1582 	switch (BPF_SRC(pc->code)) {
1583 	case BPF_K: return (uint32_t)pc->k; /* SLJIT_IMM, pc->k, */
1584 	case BPF_X: return 0;               /* BJ_XREG, 0,      */
1585 	default:
1586 		BJ_ASSERT(false);
1587 		return 0;
1588 	}
1589 }
1590 
1591 static bool
1592 generate_insn_code(struct sljit_compiler *compiler, const bpf_ctx_t *bc,
1593     const struct bpf_insn *insns, struct bpfjit_insn_data *insn_dat,
1594     size_t insn_count)
1595 {
1596 	/* a list of jumps to out-of-bound return from a generated function */
1597 	struct sljit_jump **ret0;
1598 	size_t ret0_size, ret0_maxsize;
1599 
1600 	struct sljit_jump *jump;
1601 	struct sljit_label *label;
1602 	const struct bpf_insn *pc;
1603 	struct bpfjit_jump *bjump, *jtf;
1604 	struct sljit_jump *to_mchain_jump;
1605 
1606 	size_t i;
1607 	int status;
1608 	int branching, negate;
1609 	unsigned int rval, mode, src;
1610 	uint32_t jt, jf;
1611 
1612 	bool unconditional_ret;
1613 	bool rv;
1614 
1615 	const size_t extwords = GET_EXTWORDS(bc);
1616 	const size_t memwords = GET_MEMWORDS(bc);
1617 
1618 	ret0 = NULL;
1619 	rv = false;
1620 
1621 	ret0_size = 0;
1622 	ret0_maxsize = 64;
1623 	ret0 = BJ_ALLOC(ret0_maxsize * sizeof(ret0[0]));
1624 	if (ret0 == NULL)
1625 		goto fail;
1626 
1627 	for (i = 0; i < insn_count; i++) {
1628 		if (insn_dat[i].unreachable)
1629 			continue;
1630 
1631 		/*
1632 		 * Resolve jumps to the current insn.
1633 		 */
1634 		label = NULL;
1635 		SLIST_FOREACH(bjump, &insn_dat[i].bjumps, entries) {
1636 			if (bjump->sjump != NULL) {
1637 				if (label == NULL)
1638 					label = sljit_emit_label(compiler);
1639 				if (label == NULL)
1640 					goto fail;
1641 				sljit_set_label(bjump->sjump, label);
1642 			}
1643 		}
1644 
1645 		to_mchain_jump = NULL;
1646 		unconditional_ret = false;
1647 
1648 		if (read_pkt_insn(&insns[i], NULL)) {
1649 			if (insn_dat[i].u.rdata.check_length > UINT32_MAX) {
1650 				/* Jump to "return 0" unconditionally. */
1651 				unconditional_ret = true;
1652 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1653 				if (jump == NULL)
1654 					goto fail;
1655 				if (!append_jump(jump, &ret0,
1656 				    &ret0_size, &ret0_maxsize))
1657 					goto fail;
1658 			} else if (insn_dat[i].u.rdata.check_length > 0) {
1659 				/* if (buflen < check_length) return 0; */
1660 				jump = sljit_emit_cmp(compiler,
1661 				    SLJIT_C_LESS,
1662 				    BJ_BUFLEN, 0,
1663 				    SLJIT_IMM,
1664 				    insn_dat[i].u.rdata.check_length);
1665 				if (jump == NULL)
1666 					goto fail;
1667 #ifdef _KERNEL
1668 				to_mchain_jump = jump;
1669 #else
1670 				if (!append_jump(jump, &ret0,
1671 				    &ret0_size, &ret0_maxsize))
1672 					goto fail;
1673 #endif
1674 			}
1675 		}
1676 
1677 		pc = &insns[i];
1678 		switch (BPF_CLASS(pc->code)) {
1679 
1680 		default:
1681 			goto fail;
1682 
1683 		case BPF_LD:
1684 			/* BPF_LD+BPF_IMM          A <- k */
1685 			if (pc->code == (BPF_LD|BPF_IMM)) {
1686 				status = sljit_emit_op1(compiler,
1687 				    SLJIT_MOV,
1688 				    BJ_AREG, 0,
1689 				    SLJIT_IMM, (uint32_t)pc->k);
1690 				if (status != SLJIT_SUCCESS)
1691 					goto fail;
1692 
1693 				continue;
1694 			}
1695 
1696 			/* BPF_LD+BPF_MEM          A <- M[k] */
1697 			if (pc->code == (BPF_LD|BPF_MEM)) {
1698 				if ((uint32_t)pc->k >= memwords)
1699 					goto fail;
1700 				status = emit_memload(compiler,
1701 				    BJ_AREG, pc->k, extwords);
1702 				if (status != SLJIT_SUCCESS)
1703 					goto fail;
1704 
1705 				continue;
1706 			}
1707 
1708 			/* BPF_LD+BPF_W+BPF_LEN    A <- len */
1709 			if (pc->code == (BPF_LD|BPF_W|BPF_LEN)) {
1710 				status = sljit_emit_op1(compiler,
1711 				    SLJIT_MOV, /* size_t source */
1712 				    BJ_AREG, 0,
1713 				    SLJIT_MEM1(BJ_ARGS),
1714 				    offsetof(struct bpf_args, wirelen));
1715 				if (status != SLJIT_SUCCESS)
1716 					goto fail;
1717 
1718 				continue;
1719 			}
1720 
1721 			mode = BPF_MODE(pc->code);
1722 			if (mode != BPF_ABS && mode != BPF_IND)
1723 				goto fail;
1724 
1725 			if (unconditional_ret)
1726 				continue;
1727 
1728 			status = emit_pkt_read(compiler, pc,
1729 			    to_mchain_jump, &ret0, &ret0_size, &ret0_maxsize);
1730 			if (status != SLJIT_SUCCESS)
1731 				goto fail;
1732 
1733 			continue;
1734 
1735 		case BPF_LDX:
1736 			mode = BPF_MODE(pc->code);
1737 
1738 			/* BPF_LDX+BPF_W+BPF_IMM    X <- k */
1739 			if (mode == BPF_IMM) {
1740 				if (BPF_SIZE(pc->code) != BPF_W)
1741 					goto fail;
1742 				status = sljit_emit_op1(compiler,
1743 				    SLJIT_MOV,
1744 				    BJ_XREG, 0,
1745 				    SLJIT_IMM, (uint32_t)pc->k);
1746 				if (status != SLJIT_SUCCESS)
1747 					goto fail;
1748 
1749 				continue;
1750 			}
1751 
1752 			/* BPF_LDX+BPF_W+BPF_LEN    X <- len */
1753 			if (mode == BPF_LEN) {
1754 				if (BPF_SIZE(pc->code) != BPF_W)
1755 					goto fail;
1756 				status = sljit_emit_op1(compiler,
1757 				    SLJIT_MOV, /* size_t source */
1758 				    BJ_XREG, 0,
1759 				    SLJIT_MEM1(BJ_ARGS),
1760 				    offsetof(struct bpf_args, wirelen));
1761 				if (status != SLJIT_SUCCESS)
1762 					goto fail;
1763 
1764 				continue;
1765 			}
1766 
1767 			/* BPF_LDX+BPF_W+BPF_MEM    X <- M[k] */
1768 			if (mode == BPF_MEM) {
1769 				if (BPF_SIZE(pc->code) != BPF_W)
1770 					goto fail;
1771 				if ((uint32_t)pc->k >= memwords)
1772 					goto fail;
1773 				status = emit_memload(compiler,
1774 				    BJ_XREG, pc->k, extwords);
1775 				if (status != SLJIT_SUCCESS)
1776 					goto fail;
1777 
1778 				continue;
1779 			}
1780 
1781 			/* BPF_LDX+BPF_B+BPF_MSH    X <- 4*(P[k:1]&0xf) */
1782 			if (mode != BPF_MSH || BPF_SIZE(pc->code) != BPF_B)
1783 				goto fail;
1784 
1785 			if (unconditional_ret)
1786 				continue;
1787 
1788 			status = emit_msh(compiler, pc,
1789 			    to_mchain_jump, &ret0, &ret0_size, &ret0_maxsize);
1790 			if (status != SLJIT_SUCCESS)
1791 				goto fail;
1792 
1793 			continue;
1794 
1795 		case BPF_ST:
1796 			if (pc->code != BPF_ST ||
1797 			    (uint32_t)pc->k >= memwords) {
1798 				goto fail;
1799 			}
1800 
1801 			status = emit_memstore(compiler,
1802 			    BJ_AREG, pc->k, extwords);
1803 			if (status != SLJIT_SUCCESS)
1804 				goto fail;
1805 
1806 			continue;
1807 
1808 		case BPF_STX:
1809 			if (pc->code != BPF_STX ||
1810 			    (uint32_t)pc->k >= memwords) {
1811 				goto fail;
1812 			}
1813 
1814 			status = emit_memstore(compiler,
1815 			    BJ_XREG, pc->k, extwords);
1816 			if (status != SLJIT_SUCCESS)
1817 				goto fail;
1818 
1819 			continue;
1820 
1821 		case BPF_ALU:
1822 			if (pc->code == (BPF_ALU|BPF_NEG)) {
1823 				status = sljit_emit_op1(compiler,
1824 				    SLJIT_NEG,
1825 				    BJ_AREG, 0,
1826 				    BJ_AREG, 0);
1827 				if (status != SLJIT_SUCCESS)
1828 					goto fail;
1829 
1830 				continue;
1831 			}
1832 
1833 			if (BPF_OP(pc->code) != BPF_DIV) {
1834 				status = sljit_emit_op2(compiler,
1835 				    bpf_alu_to_sljit_op(pc),
1836 				    BJ_AREG, 0,
1837 				    BJ_AREG, 0,
1838 				    kx_to_reg(pc), kx_to_reg_arg(pc));
1839 				if (status != SLJIT_SUCCESS)
1840 					goto fail;
1841 
1842 				continue;
1843 			}
1844 
1845 			/* BPF_DIV */
1846 
1847 			src = BPF_SRC(pc->code);
1848 			if (src != BPF_X && src != BPF_K)
1849 				goto fail;
1850 
1851 			/* division by zero? */
1852 			if (src == BPF_X) {
1853 				jump = sljit_emit_cmp(compiler,
1854 				    SLJIT_C_EQUAL|SLJIT_INT_OP,
1855 				    BJ_XREG, 0,
1856 				    SLJIT_IMM, 0);
1857 				if (jump == NULL)
1858 					goto fail;
1859 				if (!append_jump(jump, &ret0,
1860 				    &ret0_size, &ret0_maxsize))
1861 					goto fail;
1862 			} else if (pc->k == 0) {
1863 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1864 				if (jump == NULL)
1865 					goto fail;
1866 				if (!append_jump(jump, &ret0,
1867 				    &ret0_size, &ret0_maxsize))
1868 					goto fail;
1869 			}
1870 
1871 			if (src == BPF_X) {
1872 				status = emit_division(compiler, BJ_XREG, 0);
1873 				if (status != SLJIT_SUCCESS)
1874 					goto fail;
1875 			} else if (pc->k != 0) {
1876 				if (pc->k & (pc->k - 1)) {
1877 				    status = emit_division(compiler,
1878 				        SLJIT_IMM, (uint32_t)pc->k);
1879 				} else {
1880 				    status = emit_pow2_division(compiler,
1881 				        (uint32_t)pc->k);
1882 				}
1883 				if (status != SLJIT_SUCCESS)
1884 					goto fail;
1885 			}
1886 
1887 			continue;
1888 
1889 		case BPF_JMP:
1890 			if (BPF_OP(pc->code) == BPF_JA) {
1891 				jt = jf = pc->k;
1892 			} else {
1893 				jt = pc->jt;
1894 				jf = pc->jf;
1895 			}
1896 
1897 			negate = (jt == 0) ? 1 : 0;
1898 			branching = (jt == jf) ? 0 : 1;
1899 			jtf = insn_dat[i].u.jdata.jtf;
1900 
1901 			if (branching) {
1902 				if (BPF_OP(pc->code) != BPF_JSET) {
1903 					jump = sljit_emit_cmp(compiler,
1904 					    bpf_jmp_to_sljit_cond(pc, negate),
1905 					    BJ_AREG, 0,
1906 					    kx_to_reg(pc), kx_to_reg_arg(pc));
1907 				} else {
1908 					status = sljit_emit_op2(compiler,
1909 					    SLJIT_AND,
1910 					    BJ_TMP1REG, 0,
1911 					    BJ_AREG, 0,
1912 					    kx_to_reg(pc), kx_to_reg_arg(pc));
1913 					if (status != SLJIT_SUCCESS)
1914 						goto fail;
1915 
1916 					jump = sljit_emit_cmp(compiler,
1917 					    bpf_jmp_to_sljit_cond(pc, negate),
1918 					    BJ_TMP1REG, 0,
1919 					    SLJIT_IMM, 0);
1920 				}
1921 
1922 				if (jump == NULL)
1923 					goto fail;
1924 
1925 				BJ_ASSERT(jtf[negate].sjump == NULL);
1926 				jtf[negate].sjump = jump;
1927 			}
1928 
1929 			if (!branching || (jt != 0 && jf != 0)) {
1930 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1931 				if (jump == NULL)
1932 					goto fail;
1933 
1934 				BJ_ASSERT(jtf[branching].sjump == NULL);
1935 				jtf[branching].sjump = jump;
1936 			}
1937 
1938 			continue;
1939 
1940 		case BPF_RET:
1941 			rval = BPF_RVAL(pc->code);
1942 			if (rval == BPF_X)
1943 				goto fail;
1944 
1945 			/* BPF_RET+BPF_K    accept k bytes */
1946 			if (rval == BPF_K) {
1947 				status = sljit_emit_return(compiler,
1948 				    SLJIT_MOV_UI,
1949 				    SLJIT_IMM, (uint32_t)pc->k);
1950 				if (status != SLJIT_SUCCESS)
1951 					goto fail;
1952 			}
1953 
1954 			/* BPF_RET+BPF_A    accept A bytes */
1955 			if (rval == BPF_A) {
1956 				status = sljit_emit_return(compiler,
1957 				    SLJIT_MOV_UI,
1958 				    BJ_AREG, 0);
1959 				if (status != SLJIT_SUCCESS)
1960 					goto fail;
1961 			}
1962 
1963 			continue;
1964 
1965 		case BPF_MISC:
1966 			switch (BPF_MISCOP(pc->code)) {
1967 			case BPF_TAX:
1968 				status = sljit_emit_op1(compiler,
1969 				    SLJIT_MOV_UI,
1970 				    BJ_XREG, 0,
1971 				    BJ_AREG, 0);
1972 				if (status != SLJIT_SUCCESS)
1973 					goto fail;
1974 
1975 				continue;
1976 
1977 			case BPF_TXA:
1978 				status = sljit_emit_op1(compiler,
1979 				    SLJIT_MOV,
1980 				    BJ_AREG, 0,
1981 				    BJ_XREG, 0);
1982 				if (status != SLJIT_SUCCESS)
1983 					goto fail;
1984 
1985 				continue;
1986 
1987 			case BPF_COP:
1988 			case BPF_COPX:
1989 				if (bc == NULL || bc->copfuncs == NULL)
1990 					goto fail;
1991 				if (BPF_MISCOP(pc->code) == BPF_COP &&
1992 				    (uint32_t)pc->k >= bc->nfuncs) {
1993 					goto fail;
1994 				}
1995 
1996 				jump = NULL;
1997 				status = emit_cop(compiler, bc, pc, &jump);
1998 				if (status != SLJIT_SUCCESS)
1999 					goto fail;
2000 
2001 				if (jump != NULL && !append_jump(jump,
2002 				    &ret0, &ret0_size, &ret0_maxsize))
2003 					goto fail;
2004 
2005 				continue;
2006 			}
2007 
2008 			goto fail;
2009 		} /* switch */
2010 	} /* main loop */
2011 
2012 	BJ_ASSERT(ret0_size <= ret0_maxsize);
2013 
2014 	if (ret0_size > 0) {
2015 		label = sljit_emit_label(compiler);
2016 		if (label == NULL)
2017 			goto fail;
2018 		for (i = 0; i < ret0_size; i++)
2019 			sljit_set_label(ret0[i], label);
2020 	}
2021 
2022 	rv = true;
2023 
2024 fail:
2025 	if (ret0 != NULL)
2026 		BJ_FREE(ret0, ret0_maxsize * sizeof(ret0[0]));
2027 
2028 	return rv;
2029 }
2030 
2031 bpfjit_func_t
2032 bpfjit_generate_code(const bpf_ctx_t *bc,
2033     const struct bpf_insn *insns, size_t insn_count)
2034 {
2035 	void *rv;
2036 	struct sljit_compiler *compiler;
2037 
2038 	size_t i;
2039 	int status;
2040 
2041 	/* optimization related */
2042 	bpf_memword_init_t initmask;
2043 	bpfjit_hint_t hints;
2044 
2045 	/* memory store location for initial zero initialization */
2046 	sljit_si mem_reg;
2047 	sljit_sw mem_off;
2048 
2049 	struct bpfjit_insn_data *insn_dat;
2050 
2051 	const size_t extwords = GET_EXTWORDS(bc);
2052 	const size_t memwords = GET_MEMWORDS(bc);
2053 	const bpf_memword_init_t preinited = extwords ? bc->preinited : 0;
2054 
2055 	rv = NULL;
2056 	compiler = NULL;
2057 	insn_dat = NULL;
2058 
2059 	if (memwords > MAX_MEMWORDS)
2060 		goto fail;
2061 
2062 	if (insn_count == 0 || insn_count > SIZE_MAX / sizeof(insn_dat[0]))
2063 		goto fail;
2064 
2065 	insn_dat = BJ_ALLOC(insn_count * sizeof(insn_dat[0]));
2066 	if (insn_dat == NULL)
2067 		goto fail;
2068 
2069 	if (!optimize(bc, insns, insn_dat, insn_count, &initmask, &hints))
2070 		goto fail;
2071 
2072 	compiler = sljit_create_compiler();
2073 	if (compiler == NULL)
2074 		goto fail;
2075 
2076 #if !defined(_KERNEL) && defined(SLJIT_VERBOSE) && SLJIT_VERBOSE
2077 	sljit_compiler_verbose(compiler, stderr);
2078 #endif
2079 
2080 	status = sljit_emit_enter(compiler,
2081 	    2, nscratches(hints), 3, sizeof(struct bpfjit_stack));
2082 	if (status != SLJIT_SUCCESS)
2083 		goto fail;
2084 
2085 	if (hints & BJ_HINT_COP) {
2086 		/* save ctx argument */
2087 		status = sljit_emit_op1(compiler,
2088 		    SLJIT_MOV_P,
2089 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
2090 		    offsetof(struct bpfjit_stack, ctx),
2091 		    BJ_CTX_ARG, 0);
2092 		if (status != SLJIT_SUCCESS)
2093 			goto fail;
2094 	}
2095 
2096 	if (extwords == 0) {
2097 		mem_reg = SLJIT_MEM1(SLJIT_LOCALS_REG);
2098 		mem_off = offsetof(struct bpfjit_stack, mem);
2099 	} else {
2100 		/* copy "mem" argument from bpf_args to bpfjit_stack */
2101 		status = sljit_emit_op1(compiler,
2102 		    SLJIT_MOV_P,
2103 		    BJ_TMP1REG, 0,
2104 		    SLJIT_MEM1(BJ_ARGS), offsetof(struct bpf_args, mem));
2105 		if (status != SLJIT_SUCCESS)
2106 			goto fail;
2107 
2108 		status = sljit_emit_op1(compiler,
2109 		    SLJIT_MOV_P,
2110 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
2111 		    offsetof(struct bpfjit_stack, extmem),
2112 		    BJ_TMP1REG, 0);
2113 		if (status != SLJIT_SUCCESS)
2114 			goto fail;
2115 
2116 		mem_reg = SLJIT_MEM1(BJ_TMP1REG);
2117 		mem_off = 0;
2118 	}
2119 
2120 	/*
2121 	 * Exclude pre-initialised external memory words but keep
2122 	 * initialization statuses of A and X registers in case
2123 	 * bc->preinited wrongly sets those two bits.
2124 	 */
2125 	initmask &= ~preinited | BJ_INIT_ABIT | BJ_INIT_XBIT;
2126 
2127 #if defined(_KERNEL)
2128 	/* bpf_filter() checks initialization of memwords. */
2129 	BJ_ASSERT((initmask & (BJ_INIT_MBIT(memwords) - 1)) == 0);
2130 #endif
2131 	for (i = 0; i < memwords; i++) {
2132 		if (initmask & BJ_INIT_MBIT(i)) {
2133 			/* M[i] = 0; */
2134 			status = sljit_emit_op1(compiler,
2135 			    SLJIT_MOV_UI,
2136 			    mem_reg, mem_off + i * sizeof(uint32_t),
2137 			    SLJIT_IMM, 0);
2138 			if (status != SLJIT_SUCCESS)
2139 				goto fail;
2140 		}
2141 	}
2142 
2143 	if (initmask & BJ_INIT_ABIT) {
2144 		/* A = 0; */
2145 		status = sljit_emit_op1(compiler,
2146 		    SLJIT_MOV,
2147 		    BJ_AREG, 0,
2148 		    SLJIT_IMM, 0);
2149 		if (status != SLJIT_SUCCESS)
2150 			goto fail;
2151 	}
2152 
2153 	if (initmask & BJ_INIT_XBIT) {
2154 		/* X = 0; */
2155 		status = sljit_emit_op1(compiler,
2156 		    SLJIT_MOV,
2157 		    BJ_XREG, 0,
2158 		    SLJIT_IMM, 0);
2159 		if (status != SLJIT_SUCCESS)
2160 			goto fail;
2161 	}
2162 
2163 	status = load_buf_buflen(compiler);
2164 	if (status != SLJIT_SUCCESS)
2165 		goto fail;
2166 
2167 	if (!generate_insn_code(compiler, bc, insns, insn_dat, insn_count))
2168 		goto fail;
2169 
2170 	status = sljit_emit_return(compiler,
2171 	    SLJIT_MOV_UI,
2172 	    SLJIT_IMM, 0);
2173 	if (status != SLJIT_SUCCESS)
2174 		goto fail;
2175 
2176 	rv = sljit_generate_code(compiler);
2177 
2178 fail:
2179 	if (compiler != NULL)
2180 		sljit_free_compiler(compiler);
2181 
2182 	if (insn_dat != NULL)
2183 		BJ_FREE(insn_dat, insn_count * sizeof(insn_dat[0]));
2184 
2185 	return (bpfjit_func_t)rv;
2186 }
2187 
2188 void
2189 bpfjit_free_code(bpfjit_func_t code)
2190 {
2191 
2192 	sljit_free_code((void *)code);
2193 }
2194