xref: /netbsd-src/sys/net/bpfjit.c (revision f89f6560d453f5e37386cc7938c072d2f528b9fa)
1 /*	$NetBSD: bpfjit.c,v 1.43 2015/02/14 21:32:46 alnsn Exp $	*/
2 
3 /*-
4  * Copyright (c) 2011-2015 Alexander Nasonov.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
22  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #ifdef _KERNEL
34 __KERNEL_RCSID(0, "$NetBSD: bpfjit.c,v 1.43 2015/02/14 21:32:46 alnsn Exp $");
35 #else
36 __RCSID("$NetBSD: bpfjit.c,v 1.43 2015/02/14 21:32:46 alnsn Exp $");
37 #endif
38 
39 #include <sys/types.h>
40 #include <sys/queue.h>
41 
42 #ifndef _KERNEL
43 #include <assert.h>
44 #define BJ_ASSERT(c) assert(c)
45 #else
46 #define BJ_ASSERT(c) KASSERT(c)
47 #endif
48 
49 #ifndef _KERNEL
50 #include <stdlib.h>
51 #define BJ_ALLOC(sz) malloc(sz)
52 #define BJ_FREE(p, sz) free(p)
53 #else
54 #include <sys/kmem.h>
55 #define BJ_ALLOC(sz) kmem_alloc(sz, KM_SLEEP)
56 #define BJ_FREE(p, sz) kmem_free(p, sz)
57 #endif
58 
59 #ifndef _KERNEL
60 #include <limits.h>
61 #include <stdbool.h>
62 #include <stddef.h>
63 #include <stdint.h>
64 #else
65 #include <sys/atomic.h>
66 #include <sys/module.h>
67 #endif
68 
69 #define	__BPF_PRIVATE
70 #include <net/bpf.h>
71 #include <net/bpfjit.h>
72 #include <sljitLir.h>
73 
74 #if !defined(_KERNEL) && defined(SLJIT_VERBOSE) && SLJIT_VERBOSE
75 #include <stdio.h> /* for stderr */
76 #endif
77 
78 /*
79  * Arguments of generated bpfjit_func_t.
80  * The first argument is reassigned upon entry
81  * to a more frequently used buf argument.
82  */
83 #define BJ_CTX_ARG	SLJIT_SAVED_REG1
84 #define BJ_ARGS		SLJIT_SAVED_REG2
85 
86 /*
87  * Permanent register assignments.
88  */
89 #define BJ_BUF		SLJIT_SAVED_REG1
90 //#define BJ_ARGS	SLJIT_SAVED_REG2
91 #define BJ_BUFLEN	SLJIT_SAVED_REG3
92 #define BJ_AREG		SLJIT_SCRATCH_REG1
93 #define BJ_TMP1REG	SLJIT_SCRATCH_REG2
94 #define BJ_TMP2REG	SLJIT_SCRATCH_REG3
95 #define BJ_XREG		SLJIT_TEMPORARY_EREG1
96 #define BJ_TMP3REG	SLJIT_TEMPORARY_EREG2
97 
98 #ifdef _KERNEL
99 #define MAX_MEMWORDS BPF_MAX_MEMWORDS
100 #else
101 #define MAX_MEMWORDS BPF_MEMWORDS
102 #endif
103 
104 #define BJ_INIT_NOBITS  ((bpf_memword_init_t)0)
105 #define BJ_INIT_MBIT(k) BPF_MEMWORD_INIT(k)
106 #define BJ_INIT_ABIT    BJ_INIT_MBIT(MAX_MEMWORDS)
107 #define BJ_INIT_XBIT    BJ_INIT_MBIT(MAX_MEMWORDS + 1)
108 
109 /*
110  * Get a number of memwords and external memwords from a bpf_ctx object.
111  */
112 #define GET_EXTWORDS(bc) ((bc) ? (bc)->extwords : 0)
113 #define GET_MEMWORDS(bc) (GET_EXTWORDS(bc) ? GET_EXTWORDS(bc) : BPF_MEMWORDS)
114 
115 /*
116  * Optimization hints.
117  */
118 typedef unsigned int bpfjit_hint_t;
119 #define BJ_HINT_ABS  0x01 /* packet read at absolute offset   */
120 #define BJ_HINT_IND  0x02 /* packet read at variable offset   */
121 #define BJ_HINT_MSH  0x04 /* BPF_MSH instruction              */
122 #define BJ_HINT_COP  0x08 /* BPF_COP or BPF_COPX instruction  */
123 #define BJ_HINT_COPX 0x10 /* BPF_COPX instruction             */
124 #define BJ_HINT_XREG 0x20 /* BJ_XREG is needed                */
125 #define BJ_HINT_LDX  0x40 /* BPF_LDX instruction              */
126 #define BJ_HINT_PKT  (BJ_HINT_ABS|BJ_HINT_IND|BJ_HINT_MSH)
127 
128 /*
129  * Datatype for Array Bounds Check Elimination (ABC) pass.
130  */
131 typedef uint64_t bpfjit_abc_length_t;
132 #define MAX_ABC_LENGTH (UINT32_MAX + UINT64_C(4)) /* max. width is 4 */
133 
134 struct bpfjit_stack
135 {
136 	bpf_ctx_t *ctx;
137 	uint32_t *extmem; /* pointer to external memory store */
138 	uint32_t reg; /* saved A or X register */
139 #ifdef _KERNEL
140 	int err; /* 3rd argument for m_xword/m_xhalf/m_xbyte function call */
141 #endif
142 	uint32_t mem[BPF_MEMWORDS]; /* internal memory store */
143 };
144 
145 /*
146  * Data for BPF_JMP instruction.
147  * Forward declaration for struct bpfjit_jump.
148  */
149 struct bpfjit_jump_data;
150 
151 /*
152  * Node of bjumps list.
153  */
154 struct bpfjit_jump {
155 	struct sljit_jump *sjump;
156 	SLIST_ENTRY(bpfjit_jump) entries;
157 	struct bpfjit_jump_data *jdata;
158 };
159 
160 /*
161  * Data for BPF_JMP instruction.
162  */
163 struct bpfjit_jump_data {
164 	/*
165 	 * These entries make up bjumps list:
166 	 * jtf[0] - when coming from jt path,
167 	 * jtf[1] - when coming from jf path.
168 	 */
169 	struct bpfjit_jump jtf[2];
170 	/*
171 	 * Length calculated by Array Bounds Check Elimination (ABC) pass.
172 	 */
173 	bpfjit_abc_length_t abc_length;
174 	/*
175 	 * Length checked by the last out-of-bounds check.
176 	 */
177 	bpfjit_abc_length_t checked_length;
178 };
179 
180 /*
181  * Data for "read from packet" instructions.
182  * See also read_pkt_insn() function below.
183  */
184 struct bpfjit_read_pkt_data {
185 	/*
186 	 * Length calculated by Array Bounds Check Elimination (ABC) pass.
187 	 */
188 	bpfjit_abc_length_t abc_length;
189 	/*
190 	 * If positive, emit "if (buflen < check_length) return 0"
191 	 * out-of-bounds check.
192 	 * Values greater than UINT32_MAX generate unconditional "return 0".
193 	 */
194 	bpfjit_abc_length_t check_length;
195 };
196 
197 /*
198  * Additional (optimization-related) data for bpf_insn.
199  */
200 struct bpfjit_insn_data {
201 	/* List of jumps to this insn. */
202 	SLIST_HEAD(, bpfjit_jump) bjumps;
203 
204 	union {
205 		struct bpfjit_jump_data     jdata;
206 		struct bpfjit_read_pkt_data rdata;
207 	} u;
208 
209 	bpf_memword_init_t invalid;
210 	bool unreachable;
211 };
212 
213 #ifdef _KERNEL
214 
215 uint32_t m_xword(const struct mbuf *, uint32_t, int *);
216 uint32_t m_xhalf(const struct mbuf *, uint32_t, int *);
217 uint32_t m_xbyte(const struct mbuf *, uint32_t, int *);
218 
219 MODULE(MODULE_CLASS_MISC, bpfjit, "sljit")
220 
221 static int
222 bpfjit_modcmd(modcmd_t cmd, void *arg)
223 {
224 
225 	switch (cmd) {
226 	case MODULE_CMD_INIT:
227 		bpfjit_module_ops.bj_free_code = &bpfjit_free_code;
228 		membar_producer();
229 		bpfjit_module_ops.bj_generate_code = &bpfjit_generate_code;
230 		membar_producer();
231 		return 0;
232 
233 	case MODULE_CMD_FINI:
234 		return EOPNOTSUPP;
235 
236 	default:
237 		return ENOTTY;
238 	}
239 }
240 #endif
241 
242 /*
243  * Return a number of scratch registers to pass
244  * to sljit_emit_enter() function.
245  */
246 static sljit_si
247 nscratches(bpfjit_hint_t hints)
248 {
249 	sljit_si rv = 2;
250 
251 #ifdef _KERNEL
252 	if (hints & BJ_HINT_PKT)
253 		rv = 3; /* xcall with three arguments */
254 #endif
255 
256 	if (hints & BJ_HINT_IND)
257 		rv = 3; /* uses BJ_TMP2REG */
258 
259 	if (hints & BJ_HINT_COP)
260 		rv = 3; /* calls copfunc with three arguments */
261 
262 	if (hints & BJ_HINT_XREG)
263 		rv = 4; /* uses BJ_XREG */
264 
265 #ifdef _KERNEL
266 	if (hints & BJ_HINT_LDX)
267 		rv = 5; /* uses BJ_TMP3REG */
268 #endif
269 
270 	if (hints & BJ_HINT_COPX)
271 		rv = 5; /* uses BJ_TMP3REG */
272 
273 	return rv;
274 }
275 
276 /*
277  * Return a number of saved registers to pass
278  * to sljit_emit_enter() function.
279  */
280 static sljit_si
281 nsaveds(bpfjit_hint_t hints)
282 {
283 	sljit_si rv = 3;
284 
285 	return rv;
286 }
287 
288 static uint32_t
289 read_width(const struct bpf_insn *pc)
290 {
291 
292 	switch (BPF_SIZE(pc->code)) {
293 	case BPF_W: return 4;
294 	case BPF_H: return 2;
295 	case BPF_B: return 1;
296 	default:    return 0;
297 	}
298 }
299 
300 /*
301  * Copy buf and buflen members of bpf_args from BJ_ARGS
302  * pointer to BJ_BUF and BJ_BUFLEN registers.
303  */
304 static int
305 load_buf_buflen(struct sljit_compiler *compiler)
306 {
307 	int status;
308 
309 	status = sljit_emit_op1(compiler,
310 	    SLJIT_MOV_P,
311 	    BJ_BUF, 0,
312 	    SLJIT_MEM1(BJ_ARGS),
313 	    offsetof(struct bpf_args, pkt));
314 	if (status != SLJIT_SUCCESS)
315 		return status;
316 
317 	status = sljit_emit_op1(compiler,
318 	    SLJIT_MOV, /* size_t source */
319 	    BJ_BUFLEN, 0,
320 	    SLJIT_MEM1(BJ_ARGS),
321 	    offsetof(struct bpf_args, buflen));
322 
323 	return status;
324 }
325 
326 static bool
327 grow_jumps(struct sljit_jump ***jumps, size_t *size)
328 {
329 	struct sljit_jump **newptr;
330 	const size_t elemsz = sizeof(struct sljit_jump *);
331 	size_t old_size = *size;
332 	size_t new_size = 2 * old_size;
333 
334 	if (new_size < old_size || new_size > SIZE_MAX / elemsz)
335 		return false;
336 
337 	newptr = BJ_ALLOC(new_size * elemsz);
338 	if (newptr == NULL)
339 		return false;
340 
341 	memcpy(newptr, *jumps, old_size * elemsz);
342 	BJ_FREE(*jumps, old_size * elemsz);
343 
344 	*jumps = newptr;
345 	*size = new_size;
346 	return true;
347 }
348 
349 static bool
350 append_jump(struct sljit_jump *jump, struct sljit_jump ***jumps,
351     size_t *size, size_t *max_size)
352 {
353 	if (*size == *max_size && !grow_jumps(jumps, max_size))
354 		return false;
355 
356 	(*jumps)[(*size)++] = jump;
357 	return true;
358 }
359 
360 /*
361  * Emit code for BPF_LD+BPF_B+BPF_ABS    A <- P[k:1].
362  */
363 static int
364 emit_read8(struct sljit_compiler *compiler, sljit_si src, uint32_t k)
365 {
366 
367 	return sljit_emit_op1(compiler,
368 	    SLJIT_MOV_UB,
369 	    BJ_AREG, 0,
370 	    SLJIT_MEM1(src), k);
371 }
372 
373 /*
374  * Emit code for BPF_LD+BPF_H+BPF_ABS    A <- P[k:2].
375  */
376 static int
377 emit_read16(struct sljit_compiler *compiler, sljit_si src, uint32_t k)
378 {
379 	int status;
380 
381 	BJ_ASSERT(k <= UINT32_MAX - 1);
382 
383 	/* A = buf[k]; */
384 	status = sljit_emit_op1(compiler,
385 	    SLJIT_MOV_UB,
386 	    BJ_AREG, 0,
387 	    SLJIT_MEM1(src), k);
388 	if (status != SLJIT_SUCCESS)
389 		return status;
390 
391 	/* tmp1 = buf[k+1]; */
392 	status = sljit_emit_op1(compiler,
393 	    SLJIT_MOV_UB,
394 	    BJ_TMP1REG, 0,
395 	    SLJIT_MEM1(src), k+1);
396 	if (status != SLJIT_SUCCESS)
397 		return status;
398 
399 	/* A = A << 8; */
400 	status = sljit_emit_op2(compiler,
401 	    SLJIT_SHL,
402 	    BJ_AREG, 0,
403 	    BJ_AREG, 0,
404 	    SLJIT_IMM, 8);
405 	if (status != SLJIT_SUCCESS)
406 		return status;
407 
408 	/* A = A + tmp1; */
409 	status = sljit_emit_op2(compiler,
410 	    SLJIT_ADD,
411 	    BJ_AREG, 0,
412 	    BJ_AREG, 0,
413 	    BJ_TMP1REG, 0);
414 	return status;
415 }
416 
417 /*
418  * Emit code for BPF_LD+BPF_W+BPF_ABS    A <- P[k:4].
419  */
420 static int
421 emit_read32(struct sljit_compiler *compiler, sljit_si src, uint32_t k)
422 {
423 	int status;
424 
425 	BJ_ASSERT(k <= UINT32_MAX - 3);
426 
427 	/* A = buf[k]; */
428 	status = sljit_emit_op1(compiler,
429 	    SLJIT_MOV_UB,
430 	    BJ_AREG, 0,
431 	    SLJIT_MEM1(src), k);
432 	if (status != SLJIT_SUCCESS)
433 		return status;
434 
435 	/* tmp1 = buf[k+1]; */
436 	status = sljit_emit_op1(compiler,
437 	    SLJIT_MOV_UB,
438 	    BJ_TMP1REG, 0,
439 	    SLJIT_MEM1(src), k+1);
440 	if (status != SLJIT_SUCCESS)
441 		return status;
442 
443 	/* A = A << 8; */
444 	status = sljit_emit_op2(compiler,
445 	    SLJIT_SHL,
446 	    BJ_AREG, 0,
447 	    BJ_AREG, 0,
448 	    SLJIT_IMM, 8);
449 	if (status != SLJIT_SUCCESS)
450 		return status;
451 
452 	/* A = A + tmp1; */
453 	status = sljit_emit_op2(compiler,
454 	    SLJIT_ADD,
455 	    BJ_AREG, 0,
456 	    BJ_AREG, 0,
457 	    BJ_TMP1REG, 0);
458 	if (status != SLJIT_SUCCESS)
459 		return status;
460 
461 	/* tmp1 = buf[k+2]; */
462 	status = sljit_emit_op1(compiler,
463 	    SLJIT_MOV_UB,
464 	    BJ_TMP1REG, 0,
465 	    SLJIT_MEM1(src), k+2);
466 	if (status != SLJIT_SUCCESS)
467 		return status;
468 
469 	/* A = A << 8; */
470 	status = sljit_emit_op2(compiler,
471 	    SLJIT_SHL,
472 	    BJ_AREG, 0,
473 	    BJ_AREG, 0,
474 	    SLJIT_IMM, 8);
475 	if (status != SLJIT_SUCCESS)
476 		return status;
477 
478 	/* A = A + tmp1; */
479 	status = sljit_emit_op2(compiler,
480 	    SLJIT_ADD,
481 	    BJ_AREG, 0,
482 	    BJ_AREG, 0,
483 	    BJ_TMP1REG, 0);
484 	if (status != SLJIT_SUCCESS)
485 		return status;
486 
487 	/* tmp1 = buf[k+3]; */
488 	status = sljit_emit_op1(compiler,
489 	    SLJIT_MOV_UB,
490 	    BJ_TMP1REG, 0,
491 	    SLJIT_MEM1(src), k+3);
492 	if (status != SLJIT_SUCCESS)
493 		return status;
494 
495 	/* A = A << 8; */
496 	status = sljit_emit_op2(compiler,
497 	    SLJIT_SHL,
498 	    BJ_AREG, 0,
499 	    BJ_AREG, 0,
500 	    SLJIT_IMM, 8);
501 	if (status != SLJIT_SUCCESS)
502 		return status;
503 
504 	/* A = A + tmp1; */
505 	status = sljit_emit_op2(compiler,
506 	    SLJIT_ADD,
507 	    BJ_AREG, 0,
508 	    BJ_AREG, 0,
509 	    BJ_TMP1REG, 0);
510 	return status;
511 }
512 
513 #ifdef _KERNEL
514 /*
515  * Emit code for m_xword/m_xhalf/m_xbyte call.
516  *
517  * @pc BPF_LD+BPF_W+BPF_ABS    A <- P[k:4]
518  *     BPF_LD+BPF_H+BPF_ABS    A <- P[k:2]
519  *     BPF_LD+BPF_B+BPF_ABS    A <- P[k:1]
520  *     BPF_LD+BPF_W+BPF_IND    A <- P[X+k:4]
521  *     BPF_LD+BPF_H+BPF_IND    A <- P[X+k:2]
522  *     BPF_LD+BPF_B+BPF_IND    A <- P[X+k:1]
523  *     BPF_LDX+BPF_B+BPF_MSH   X <- 4*(P[k:1]&0xf)
524  */
525 static int
526 emit_xcall(struct sljit_compiler *compiler, bpfjit_hint_t hints,
527     const struct bpf_insn *pc, int dst, struct sljit_jump ***ret0,
528     size_t *ret0_size, size_t *ret0_maxsize,
529     uint32_t (*fn)(const struct mbuf *, uint32_t, int *))
530 {
531 #if BJ_XREG == SLJIT_RETURN_REG   || \
532     BJ_XREG == SLJIT_SCRATCH_REG1 || \
533     BJ_XREG == SLJIT_SCRATCH_REG2 || \
534     BJ_XREG == SLJIT_SCRATCH_REG3
535 #error "Not supported assignment of registers."
536 #endif
537 	struct sljit_jump *jump;
538 	sljit_si save_reg;
539 	int status;
540 
541 	save_reg = (BPF_CLASS(pc->code) == BPF_LDX) ? BJ_AREG : BJ_XREG;
542 
543 	if (save_reg == BJ_AREG || (hints & BJ_HINT_XREG)) {
544 		/* save A or X */
545 		status = sljit_emit_op1(compiler,
546 		    SLJIT_MOV_UI, /* uint32_t destination */
547 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
548 		    offsetof(struct bpfjit_stack, reg),
549 		    save_reg, 0);
550 		if (status != SLJIT_SUCCESS)
551 			return status;
552 	}
553 
554 	/*
555 	 * Prepare registers for fn(mbuf, k, &err) call.
556 	 */
557 	status = sljit_emit_op1(compiler,
558 	    SLJIT_MOV,
559 	    SLJIT_SCRATCH_REG1, 0,
560 	    BJ_BUF, 0);
561 	if (status != SLJIT_SUCCESS)
562 		return status;
563 
564 	if (BPF_CLASS(pc->code) == BPF_LD && BPF_MODE(pc->code) == BPF_IND) {
565 		if (pc->k == 0) {
566 			/* k = X; */
567 			status = sljit_emit_op1(compiler,
568 			    SLJIT_MOV,
569 			    SLJIT_SCRATCH_REG2, 0,
570 			    BJ_XREG, 0);
571 			if (status != SLJIT_SUCCESS)
572 				return status;
573 		} else {
574 			/* if (X > UINT32_MAX - pc->k) return 0; */
575 			jump = sljit_emit_cmp(compiler,
576 			    SLJIT_C_GREATER,
577 			    BJ_XREG, 0,
578 			    SLJIT_IMM, UINT32_MAX - pc->k);
579 			if (jump == NULL)
580 				return SLJIT_ERR_ALLOC_FAILED;
581 			if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
582 				return SLJIT_ERR_ALLOC_FAILED;
583 
584 			/* k = X + pc->k; */
585 			status = sljit_emit_op2(compiler,
586 			    SLJIT_ADD,
587 			    SLJIT_SCRATCH_REG2, 0,
588 			    BJ_XREG, 0,
589 			    SLJIT_IMM, (uint32_t)pc->k);
590 			if (status != SLJIT_SUCCESS)
591 				return status;
592 		}
593 	} else {
594 		/* k = pc->k */
595 		status = sljit_emit_op1(compiler,
596 		    SLJIT_MOV,
597 		    SLJIT_SCRATCH_REG2, 0,
598 		    SLJIT_IMM, (uint32_t)pc->k);
599 		if (status != SLJIT_SUCCESS)
600 			return status;
601 	}
602 
603 	/*
604 	 * The third argument of fn is an address on stack.
605 	 */
606 	status = sljit_get_local_base(compiler,
607 	    SLJIT_SCRATCH_REG3, 0,
608 	    offsetof(struct bpfjit_stack, err));
609 	if (status != SLJIT_SUCCESS)
610 		return status;
611 
612 	/* fn(buf, k, &err); */
613 	status = sljit_emit_ijump(compiler,
614 	    SLJIT_CALL3,
615 	    SLJIT_IMM, SLJIT_FUNC_OFFSET(fn));
616 	if (status != SLJIT_SUCCESS)
617 		return status;
618 
619 	if (dst != SLJIT_RETURN_REG) {
620 		/* move return value to dst */
621 		status = sljit_emit_op1(compiler,
622 		    SLJIT_MOV,
623 		    dst, 0,
624 		    SLJIT_RETURN_REG, 0);
625 		if (status != SLJIT_SUCCESS)
626 			return status;
627 	}
628 
629 	/* if (*err != 0) return 0; */
630 	jump = sljit_emit_cmp(compiler,
631 	    SLJIT_C_NOT_EQUAL|SLJIT_INT_OP,
632 	    SLJIT_MEM1(SLJIT_LOCALS_REG),
633 	    offsetof(struct bpfjit_stack, err),
634 	    SLJIT_IMM, 0);
635 	if (jump == NULL)
636 		return SLJIT_ERR_ALLOC_FAILED;
637 
638 	if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
639 		return SLJIT_ERR_ALLOC_FAILED;
640 
641 	if (save_reg == BJ_AREG || (hints & BJ_HINT_XREG)) {
642 		/* restore A or X */
643 		status = sljit_emit_op1(compiler,
644 		    SLJIT_MOV_UI, /* uint32_t source */
645 		    save_reg, 0,
646 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
647 		    offsetof(struct bpfjit_stack, reg));
648 		if (status != SLJIT_SUCCESS)
649 			return status;
650 	}
651 
652 	return SLJIT_SUCCESS;
653 }
654 #endif
655 
656 /*
657  * Emit code for BPF_COP and BPF_COPX instructions.
658  */
659 static int
660 emit_cop(struct sljit_compiler *compiler, bpfjit_hint_t hints,
661     const bpf_ctx_t *bc, const struct bpf_insn *pc,
662     struct sljit_jump ***ret0, size_t *ret0_size, size_t *ret0_maxsize)
663 {
664 #if BJ_XREG    == SLJIT_RETURN_REG   || \
665     BJ_XREG    == SLJIT_SCRATCH_REG1 || \
666     BJ_XREG    == SLJIT_SCRATCH_REG2 || \
667     BJ_XREG    == SLJIT_SCRATCH_REG3 || \
668     BJ_TMP3REG == SLJIT_SCRATCH_REG1 || \
669     BJ_TMP3REG == SLJIT_SCRATCH_REG2 || \
670     BJ_TMP3REG == SLJIT_SCRATCH_REG3
671 #error "Not supported assignment of registers."
672 #endif
673 
674 	struct sljit_jump *jump;
675 	sljit_si call_reg;
676 	sljit_sw call_off;
677 	int status;
678 
679 	BJ_ASSERT(bc != NULL && bc->copfuncs != NULL);
680 
681 	if (hints & BJ_HINT_LDX) {
682 		/* save X */
683 		status = sljit_emit_op1(compiler,
684 		    SLJIT_MOV_UI, /* uint32_t destination */
685 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
686 		    offsetof(struct bpfjit_stack, reg),
687 		    BJ_XREG, 0);
688 		if (status != SLJIT_SUCCESS)
689 			return status;
690 	}
691 
692 	if (BPF_MISCOP(pc->code) == BPF_COP) {
693 		call_reg = SLJIT_IMM;
694 		call_off = SLJIT_FUNC_OFFSET(bc->copfuncs[pc->k]);
695 	} else {
696 		/* if (X >= bc->nfuncs) return 0; */
697 		jump = sljit_emit_cmp(compiler,
698 		    SLJIT_C_GREATER_EQUAL,
699 		    BJ_XREG, 0,
700 		    SLJIT_IMM, bc->nfuncs);
701 		if (jump == NULL)
702 			return SLJIT_ERR_ALLOC_FAILED;
703 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
704 			return SLJIT_ERR_ALLOC_FAILED;
705 
706 		/* tmp1 = ctx; */
707 		status = sljit_emit_op1(compiler,
708 		    SLJIT_MOV_P,
709 		    BJ_TMP1REG, 0,
710 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
711 		    offsetof(struct bpfjit_stack, ctx));
712 		if (status != SLJIT_SUCCESS)
713 			return status;
714 
715 		/* tmp1 = ctx->copfuncs; */
716 		status = sljit_emit_op1(compiler,
717 		    SLJIT_MOV_P,
718 		    BJ_TMP1REG, 0,
719 		    SLJIT_MEM1(BJ_TMP1REG),
720 		    offsetof(struct bpf_ctx, copfuncs));
721 		if (status != SLJIT_SUCCESS)
722 			return status;
723 
724 		/* tmp2 = X; */
725 		status = sljit_emit_op1(compiler,
726 		    SLJIT_MOV,
727 		    BJ_TMP2REG, 0,
728 		    BJ_XREG, 0);
729 		if (status != SLJIT_SUCCESS)
730 			return status;
731 
732 		/* tmp3 = ctx->copfuncs[tmp2]; */
733 		call_reg = BJ_TMP3REG;
734 		call_off = 0;
735 		status = sljit_emit_op1(compiler,
736 		    SLJIT_MOV_P,
737 		    call_reg, call_off,
738 		    SLJIT_MEM2(BJ_TMP1REG, BJ_TMP2REG),
739 		    SLJIT_WORD_SHIFT);
740 		if (status != SLJIT_SUCCESS)
741 			return status;
742 	}
743 
744 	/*
745 	 * Copy bpf_copfunc_t arguments to registers.
746 	 */
747 #if BJ_AREG != SLJIT_SCRATCH_REG3
748 	status = sljit_emit_op1(compiler,
749 	    SLJIT_MOV_UI,
750 	    SLJIT_SCRATCH_REG3, 0,
751 	    BJ_AREG, 0);
752 	if (status != SLJIT_SUCCESS)
753 		return status;
754 #endif
755 
756 	status = sljit_emit_op1(compiler,
757 	    SLJIT_MOV_P,
758 	    SLJIT_SCRATCH_REG1, 0,
759 	    SLJIT_MEM1(SLJIT_LOCALS_REG),
760 	    offsetof(struct bpfjit_stack, ctx));
761 	if (status != SLJIT_SUCCESS)
762 		return status;
763 
764 	status = sljit_emit_op1(compiler,
765 	    SLJIT_MOV_P,
766 	    SLJIT_SCRATCH_REG2, 0,
767 	    BJ_ARGS, 0);
768 	if (status != SLJIT_SUCCESS)
769 		return status;
770 
771 	status = sljit_emit_ijump(compiler,
772 	    SLJIT_CALL3, call_reg, call_off);
773 	if (status != SLJIT_SUCCESS)
774 		return status;
775 
776 #if BJ_AREG != SLJIT_RETURN_REG
777 	status = sljit_emit_op1(compiler,
778 	    SLJIT_MOV,
779 	    BJ_AREG, 0,
780 	    SLJIT_RETURN_REG, 0);
781 	if (status != SLJIT_SUCCESS)
782 		return status;
783 #endif
784 
785 	if (hints & BJ_HINT_LDX) {
786 		/* restore X */
787 		status = sljit_emit_op1(compiler,
788 		    SLJIT_MOV_UI, /* uint32_t source */
789 		    BJ_XREG, 0,
790 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
791 		    offsetof(struct bpfjit_stack, reg));
792 		if (status != SLJIT_SUCCESS)
793 			return status;
794 	}
795 
796 	return SLJIT_SUCCESS;
797 }
798 
799 /*
800  * Generate code for
801  * BPF_LD+BPF_W+BPF_ABS    A <- P[k:4]
802  * BPF_LD+BPF_H+BPF_ABS    A <- P[k:2]
803  * BPF_LD+BPF_B+BPF_ABS    A <- P[k:1]
804  * BPF_LD+BPF_W+BPF_IND    A <- P[X+k:4]
805  * BPF_LD+BPF_H+BPF_IND    A <- P[X+k:2]
806  * BPF_LD+BPF_B+BPF_IND    A <- P[X+k:1]
807  */
808 static int
809 emit_pkt_read(struct sljit_compiler *compiler, bpfjit_hint_t hints,
810     const struct bpf_insn *pc, struct sljit_jump *to_mchain_jump,
811     struct sljit_jump ***ret0, size_t *ret0_size, size_t *ret0_maxsize)
812 {
813 	int status = SLJIT_ERR_ALLOC_FAILED;
814 	uint32_t width;
815 	sljit_si ld_reg;
816 	struct sljit_jump *jump;
817 #ifdef _KERNEL
818 	struct sljit_label *label;
819 	struct sljit_jump *over_mchain_jump;
820 	const bool check_zero_buflen = (to_mchain_jump != NULL);
821 #endif
822 	const uint32_t k = pc->k;
823 
824 #ifdef _KERNEL
825 	if (to_mchain_jump == NULL) {
826 		to_mchain_jump = sljit_emit_cmp(compiler,
827 		    SLJIT_C_EQUAL,
828 		    BJ_BUFLEN, 0,
829 		    SLJIT_IMM, 0);
830 		if (to_mchain_jump == NULL)
831 			return SLJIT_ERR_ALLOC_FAILED;
832 	}
833 #endif
834 
835 	ld_reg = BJ_BUF;
836 	width = read_width(pc);
837 	if (width == 0)
838 		return SLJIT_ERR_ALLOC_FAILED;
839 
840 	if (BPF_MODE(pc->code) == BPF_IND) {
841 		/* tmp1 = buflen - (pc->k + width); */
842 		status = sljit_emit_op2(compiler,
843 		    SLJIT_SUB,
844 		    BJ_TMP1REG, 0,
845 		    BJ_BUFLEN, 0,
846 		    SLJIT_IMM, k + width);
847 		if (status != SLJIT_SUCCESS)
848 			return status;
849 
850 		/* ld_reg = buf + X; */
851 		ld_reg = BJ_TMP2REG;
852 		status = sljit_emit_op2(compiler,
853 		    SLJIT_ADD,
854 		    ld_reg, 0,
855 		    BJ_BUF, 0,
856 		    BJ_XREG, 0);
857 		if (status != SLJIT_SUCCESS)
858 			return status;
859 
860 		/* if (tmp1 < X) return 0; */
861 		jump = sljit_emit_cmp(compiler,
862 		    SLJIT_C_LESS,
863 		    BJ_TMP1REG, 0,
864 		    BJ_XREG, 0);
865 		if (jump == NULL)
866 			return SLJIT_ERR_ALLOC_FAILED;
867 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
868 			return SLJIT_ERR_ALLOC_FAILED;
869 	}
870 
871 	/*
872 	 * Don't emit wrapped-around reads. They're dead code but
873 	 * dead code elimination logic isn't smart enough to figure
874 	 * it out.
875 	 */
876 	if (k <= UINT32_MAX - width + 1) {
877 		switch (width) {
878 		case 4:
879 			status = emit_read32(compiler, ld_reg, k);
880 			break;
881 		case 2:
882 			status = emit_read16(compiler, ld_reg, k);
883 			break;
884 		case 1:
885 			status = emit_read8(compiler, ld_reg, k);
886 			break;
887 		}
888 
889 		if (status != SLJIT_SUCCESS)
890 			return status;
891 	}
892 
893 #ifdef _KERNEL
894 	over_mchain_jump = sljit_emit_jump(compiler, SLJIT_JUMP);
895 	if (over_mchain_jump == NULL)
896 		return SLJIT_ERR_ALLOC_FAILED;
897 
898 	/* entry point to mchain handler */
899 	label = sljit_emit_label(compiler);
900 	if (label == NULL)
901 		return SLJIT_ERR_ALLOC_FAILED;
902 	sljit_set_label(to_mchain_jump, label);
903 
904 	if (check_zero_buflen) {
905 		/* if (buflen != 0) return 0; */
906 		jump = sljit_emit_cmp(compiler,
907 		    SLJIT_C_NOT_EQUAL,
908 		    BJ_BUFLEN, 0,
909 		    SLJIT_IMM, 0);
910 		if (jump == NULL)
911 			return SLJIT_ERR_ALLOC_FAILED;
912 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
913 			return SLJIT_ERR_ALLOC_FAILED;
914 	}
915 
916 	switch (width) {
917 	case 4:
918 		status = emit_xcall(compiler, hints, pc, BJ_AREG,
919 		    ret0, ret0_size, ret0_maxsize, &m_xword);
920 		break;
921 	case 2:
922 		status = emit_xcall(compiler, hints, pc, BJ_AREG,
923 		    ret0, ret0_size, ret0_maxsize, &m_xhalf);
924 		break;
925 	case 1:
926 		status = emit_xcall(compiler, hints, pc, BJ_AREG,
927 		    ret0, ret0_size, ret0_maxsize, &m_xbyte);
928 		break;
929 	}
930 
931 	if (status != SLJIT_SUCCESS)
932 		return status;
933 
934 	label = sljit_emit_label(compiler);
935 	if (label == NULL)
936 		return SLJIT_ERR_ALLOC_FAILED;
937 	sljit_set_label(over_mchain_jump, label);
938 #endif
939 
940 	return SLJIT_SUCCESS;
941 }
942 
943 static int
944 emit_memload(struct sljit_compiler *compiler,
945     sljit_si dst, uint32_t k, size_t extwords)
946 {
947 	int status;
948 	sljit_si src;
949 	sljit_sw srcw;
950 
951 	srcw = k * sizeof(uint32_t);
952 
953 	if (extwords == 0) {
954 		src = SLJIT_MEM1(SLJIT_LOCALS_REG);
955 		srcw += offsetof(struct bpfjit_stack, mem);
956 	} else {
957 		/* copy extmem pointer to the tmp1 register */
958 		status = sljit_emit_op1(compiler,
959 		    SLJIT_MOV_P,
960 		    BJ_TMP1REG, 0,
961 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
962 		    offsetof(struct bpfjit_stack, extmem));
963 		if (status != SLJIT_SUCCESS)
964 			return status;
965 		src = SLJIT_MEM1(BJ_TMP1REG);
966 	}
967 
968 	return sljit_emit_op1(compiler, SLJIT_MOV_UI, dst, 0, src, srcw);
969 }
970 
971 static int
972 emit_memstore(struct sljit_compiler *compiler,
973     sljit_si src, uint32_t k, size_t extwords)
974 {
975 	int status;
976 	sljit_si dst;
977 	sljit_sw dstw;
978 
979 	dstw = k * sizeof(uint32_t);
980 
981 	if (extwords == 0) {
982 		dst = SLJIT_MEM1(SLJIT_LOCALS_REG);
983 		dstw += offsetof(struct bpfjit_stack, mem);
984 	} else {
985 		/* copy extmem pointer to the tmp1 register */
986 		status = sljit_emit_op1(compiler,
987 		    SLJIT_MOV_P,
988 		    BJ_TMP1REG, 0,
989 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
990 		    offsetof(struct bpfjit_stack, extmem));
991 		if (status != SLJIT_SUCCESS)
992 			return status;
993 		dst = SLJIT_MEM1(BJ_TMP1REG);
994 	}
995 
996 	return sljit_emit_op1(compiler, SLJIT_MOV_UI, dst, dstw, src, 0);
997 }
998 
999 /*
1000  * Emit code for BPF_LDX+BPF_B+BPF_MSH    X <- 4*(P[k:1]&0xf).
1001  */
1002 static int
1003 emit_msh(struct sljit_compiler *compiler, bpfjit_hint_t hints,
1004     const struct bpf_insn *pc, struct sljit_jump *to_mchain_jump,
1005     struct sljit_jump ***ret0, size_t *ret0_size, size_t *ret0_maxsize)
1006 {
1007 	int status;
1008 #ifdef _KERNEL
1009 	struct sljit_label *label;
1010 	struct sljit_jump *jump, *over_mchain_jump;
1011 	const bool check_zero_buflen = (to_mchain_jump != NULL);
1012 #endif
1013 	const uint32_t k = pc->k;
1014 
1015 #ifdef _KERNEL
1016 	if (to_mchain_jump == NULL) {
1017 		to_mchain_jump = sljit_emit_cmp(compiler,
1018 		    SLJIT_C_EQUAL,
1019 		    BJ_BUFLEN, 0,
1020 		    SLJIT_IMM, 0);
1021 		if (to_mchain_jump == NULL)
1022 			return SLJIT_ERR_ALLOC_FAILED;
1023 	}
1024 #endif
1025 
1026 	/* tmp1 = buf[k] */
1027 	status = sljit_emit_op1(compiler,
1028 	    SLJIT_MOV_UB,
1029 	    BJ_TMP1REG, 0,
1030 	    SLJIT_MEM1(BJ_BUF), k);
1031 	if (status != SLJIT_SUCCESS)
1032 		return status;
1033 
1034 #ifdef _KERNEL
1035 	over_mchain_jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1036 	if (over_mchain_jump == NULL)
1037 		return SLJIT_ERR_ALLOC_FAILED;
1038 
1039 	/* entry point to mchain handler */
1040 	label = sljit_emit_label(compiler);
1041 	if (label == NULL)
1042 		return SLJIT_ERR_ALLOC_FAILED;
1043 	sljit_set_label(to_mchain_jump, label);
1044 
1045 	if (check_zero_buflen) {
1046 		/* if (buflen != 0) return 0; */
1047 		jump = sljit_emit_cmp(compiler,
1048 		    SLJIT_C_NOT_EQUAL,
1049 		    BJ_BUFLEN, 0,
1050 		    SLJIT_IMM, 0);
1051 		if (jump == NULL)
1052 			return SLJIT_ERR_ALLOC_FAILED;
1053 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
1054 			return SLJIT_ERR_ALLOC_FAILED;
1055 	}
1056 
1057 	status = emit_xcall(compiler, hints, pc, BJ_TMP1REG,
1058 	    ret0, ret0_size, ret0_maxsize, &m_xbyte);
1059 	if (status != SLJIT_SUCCESS)
1060 		return status;
1061 
1062 	label = sljit_emit_label(compiler);
1063 	if (label == NULL)
1064 		return SLJIT_ERR_ALLOC_FAILED;
1065 	sljit_set_label(over_mchain_jump, label);
1066 #endif
1067 
1068 	/* tmp1 &= 0xf */
1069 	status = sljit_emit_op2(compiler,
1070 	    SLJIT_AND,
1071 	    BJ_TMP1REG, 0,
1072 	    BJ_TMP1REG, 0,
1073 	    SLJIT_IMM, 0xf);
1074 	if (status != SLJIT_SUCCESS)
1075 		return status;
1076 
1077 	/* X = tmp1 << 2 */
1078 	status = sljit_emit_op2(compiler,
1079 	    SLJIT_SHL,
1080 	    BJ_XREG, 0,
1081 	    BJ_TMP1REG, 0,
1082 	    SLJIT_IMM, 2);
1083 	if (status != SLJIT_SUCCESS)
1084 		return status;
1085 
1086 	return SLJIT_SUCCESS;
1087 }
1088 
1089 /*
1090  * Emit code for A = A / k or A = A % k when k is a power of 2.
1091  * @pc BPF_DIV or BPF_MOD instruction.
1092  */
1093 static int
1094 emit_pow2_moddiv(struct sljit_compiler *compiler, const struct bpf_insn *pc)
1095 {
1096 	uint32_t k = pc->k;
1097 	int status = SLJIT_SUCCESS;
1098 
1099 	BJ_ASSERT(k != 0 && (k & (k - 1)) == 0);
1100 
1101 	if (BPF_OP(pc->code) == BPF_MOD) {
1102 		status = sljit_emit_op2(compiler,
1103 		    SLJIT_AND,
1104 		    BJ_AREG, 0,
1105 		    BJ_AREG, 0,
1106 		    SLJIT_IMM, k - 1);
1107 	} else {
1108 		int shift = 0;
1109 
1110 		/*
1111 		 * Do shift = __builtin_ctz(k).
1112 		 * The loop is slower, but that's ok.
1113 		 */
1114 		while (k > 1) {
1115 			k >>= 1;
1116 			shift++;
1117 		}
1118 
1119 		if (shift != 0) {
1120 			status = sljit_emit_op2(compiler,
1121 			    SLJIT_LSHR|SLJIT_INT_OP,
1122 			    BJ_AREG, 0,
1123 			    BJ_AREG, 0,
1124 			    SLJIT_IMM, shift);
1125 		}
1126 	}
1127 
1128 	return status;
1129 }
1130 
1131 #if !defined(BPFJIT_USE_UDIV)
1132 static sljit_uw
1133 divide(sljit_uw x, sljit_uw y)
1134 {
1135 
1136 	return (uint32_t)x / (uint32_t)y;
1137 }
1138 
1139 static sljit_uw
1140 modulus(sljit_uw x, sljit_uw y)
1141 {
1142 
1143 	return (uint32_t)x % (uint32_t)y;
1144 }
1145 #endif
1146 
1147 /*
1148  * Emit code for A = A / div or A = A % div.
1149  * @pc BPF_DIV or BPF_MOD instruction.
1150  */
1151 static int
1152 emit_moddiv(struct sljit_compiler *compiler, const struct bpf_insn *pc)
1153 {
1154 	int status;
1155 	const bool xdiv = BPF_OP(pc->code) == BPF_DIV;
1156 	const bool xreg = BPF_SRC(pc->code) == BPF_X;
1157 
1158 #if BJ_XREG == SLJIT_RETURN_REG   || \
1159     BJ_XREG == SLJIT_SCRATCH_REG1 || \
1160     BJ_XREG == SLJIT_SCRATCH_REG2 || \
1161     BJ_AREG == SLJIT_SCRATCH_REG2
1162 #error "Not supported assignment of registers."
1163 #endif
1164 
1165 #if BJ_AREG != SLJIT_SCRATCH_REG1
1166 	status = sljit_emit_op1(compiler,
1167 	    SLJIT_MOV,
1168 	    SLJIT_SCRATCH_REG1, 0,
1169 	    BJ_AREG, 0);
1170 	if (status != SLJIT_SUCCESS)
1171 		return status;
1172 #endif
1173 
1174 	status = sljit_emit_op1(compiler,
1175 	    SLJIT_MOV,
1176 	    SLJIT_SCRATCH_REG2, 0,
1177 	    xreg ? BJ_XREG : SLJIT_IMM,
1178 	    xreg ? 0 : (uint32_t)pc->k);
1179 	if (status != SLJIT_SUCCESS)
1180 		return status;
1181 
1182 #if defined(BPFJIT_USE_UDIV)
1183 	status = sljit_emit_op0(compiler, SLJIT_UDIV|SLJIT_INT_OP);
1184 
1185 	if (BPF_OP(pc->code) == BPF_DIV) {
1186 #if BJ_AREG != SLJIT_SCRATCH_REG1
1187 		status = sljit_emit_op1(compiler,
1188 		    SLJIT_MOV,
1189 		    BJ_AREG, 0,
1190 		    SLJIT_SCRATCH_REG1, 0);
1191 #endif
1192 	} else {
1193 #if BJ_AREG != SLJIT_SCRATCH_REG2
1194 		/* Remainder is in SLJIT_SCRATCH_REG2. */
1195 		status = sljit_emit_op1(compiler,
1196 		    SLJIT_MOV,
1197 		    BJ_AREG, 0,
1198 		    SLJIT_SCRATCH_REG2, 0);
1199 #endif
1200 	}
1201 
1202 	if (status != SLJIT_SUCCESS)
1203 		return status;
1204 #else
1205 	status = sljit_emit_ijump(compiler,
1206 	    SLJIT_CALL2,
1207 	    SLJIT_IMM, xdiv ? SLJIT_FUNC_OFFSET(divide) :
1208 		SLJIT_FUNC_OFFSET(modulus));
1209 
1210 #if BJ_AREG != SLJIT_RETURN_REG
1211 	status = sljit_emit_op1(compiler,
1212 	    SLJIT_MOV,
1213 	    BJ_AREG, 0,
1214 	    SLJIT_RETURN_REG, 0);
1215 	if (status != SLJIT_SUCCESS)
1216 		return status;
1217 #endif
1218 #endif
1219 
1220 	return status;
1221 }
1222 
1223 /*
1224  * Return true if pc is a "read from packet" instruction.
1225  * If length is not NULL and return value is true, *length will
1226  * be set to a safe length required to read a packet.
1227  */
1228 static bool
1229 read_pkt_insn(const struct bpf_insn *pc, bpfjit_abc_length_t *length)
1230 {
1231 	bool rv;
1232 	bpfjit_abc_length_t width = 0; /* XXXuninit */
1233 
1234 	switch (BPF_CLASS(pc->code)) {
1235 	default:
1236 		rv = false;
1237 		break;
1238 
1239 	case BPF_LD:
1240 		rv = BPF_MODE(pc->code) == BPF_ABS ||
1241 		     BPF_MODE(pc->code) == BPF_IND;
1242 		if (rv) {
1243 			width = read_width(pc);
1244 			rv = (width != 0);
1245 		}
1246 		break;
1247 
1248 	case BPF_LDX:
1249 		rv = BPF_MODE(pc->code) == BPF_MSH &&
1250 		     BPF_SIZE(pc->code) == BPF_B;
1251 		width = 1;
1252 		break;
1253 	}
1254 
1255 	if (rv && length != NULL) {
1256 		/*
1257 		 * Values greater than UINT32_MAX will generate
1258 		 * unconditional "return 0".
1259 		 */
1260 		*length = (uint32_t)pc->k + width;
1261 	}
1262 
1263 	return rv;
1264 }
1265 
1266 static void
1267 optimize_init(struct bpfjit_insn_data *insn_dat, size_t insn_count)
1268 {
1269 	size_t i;
1270 
1271 	for (i = 0; i < insn_count; i++) {
1272 		SLIST_INIT(&insn_dat[i].bjumps);
1273 		insn_dat[i].invalid = BJ_INIT_NOBITS;
1274 	}
1275 }
1276 
1277 /*
1278  * The function divides instructions into blocks. Destination of a jump
1279  * instruction starts a new block. BPF_RET and BPF_JMP instructions
1280  * terminate a block. Blocks are linear, that is, there are no jumps out
1281  * from the middle of a block and there are no jumps in to the middle of
1282  * a block.
1283  *
1284  * The function also sets bits in *initmask for memwords that
1285  * need to be initialized to zero. Note that this set should be empty
1286  * for any valid kernel filter program.
1287  */
1288 static bool
1289 optimize_pass1(const bpf_ctx_t *bc, const struct bpf_insn *insns,
1290     struct bpfjit_insn_data *insn_dat, size_t insn_count,
1291     bpf_memword_init_t *initmask, bpfjit_hint_t *hints)
1292 {
1293 	struct bpfjit_jump *jtf;
1294 	size_t i;
1295 	uint32_t jt, jf;
1296 	bpfjit_abc_length_t length;
1297 	bpf_memword_init_t invalid; /* borrowed from bpf_filter() */
1298 	bool unreachable;
1299 
1300 	const size_t memwords = GET_MEMWORDS(bc);
1301 
1302 	*hints = 0;
1303 	*initmask = BJ_INIT_NOBITS;
1304 
1305 	unreachable = false;
1306 	invalid = ~BJ_INIT_NOBITS;
1307 
1308 	for (i = 0; i < insn_count; i++) {
1309 		if (!SLIST_EMPTY(&insn_dat[i].bjumps))
1310 			unreachable = false;
1311 		insn_dat[i].unreachable = unreachable;
1312 
1313 		if (unreachable)
1314 			continue;
1315 
1316 		invalid |= insn_dat[i].invalid;
1317 
1318 		if (read_pkt_insn(&insns[i], &length) && length > UINT32_MAX)
1319 			unreachable = true;
1320 
1321 		switch (BPF_CLASS(insns[i].code)) {
1322 		case BPF_RET:
1323 			if (BPF_RVAL(insns[i].code) == BPF_A)
1324 				*initmask |= invalid & BJ_INIT_ABIT;
1325 
1326 			unreachable = true;
1327 			continue;
1328 
1329 		case BPF_LD:
1330 			if (BPF_MODE(insns[i].code) == BPF_ABS)
1331 				*hints |= BJ_HINT_ABS;
1332 
1333 			if (BPF_MODE(insns[i].code) == BPF_IND) {
1334 				*hints |= BJ_HINT_IND | BJ_HINT_XREG;
1335 				*initmask |= invalid & BJ_INIT_XBIT;
1336 			}
1337 
1338 			if (BPF_MODE(insns[i].code) == BPF_MEM &&
1339 			    (uint32_t)insns[i].k < memwords) {
1340 				*initmask |= invalid & BJ_INIT_MBIT(insns[i].k);
1341 			}
1342 
1343 			invalid &= ~BJ_INIT_ABIT;
1344 			continue;
1345 
1346 		case BPF_LDX:
1347 			*hints |= BJ_HINT_XREG | BJ_HINT_LDX;
1348 
1349 			if (BPF_MODE(insns[i].code) == BPF_MEM &&
1350 			    (uint32_t)insns[i].k < memwords) {
1351 				*initmask |= invalid & BJ_INIT_MBIT(insns[i].k);
1352 			}
1353 
1354 			if (BPF_MODE(insns[i].code) == BPF_MSH &&
1355 			    BPF_SIZE(insns[i].code) == BPF_B) {
1356 				*hints |= BJ_HINT_MSH;
1357 			}
1358 
1359 			invalid &= ~BJ_INIT_XBIT;
1360 			continue;
1361 
1362 		case BPF_ST:
1363 			*initmask |= invalid & BJ_INIT_ABIT;
1364 
1365 			if ((uint32_t)insns[i].k < memwords)
1366 				invalid &= ~BJ_INIT_MBIT(insns[i].k);
1367 
1368 			continue;
1369 
1370 		case BPF_STX:
1371 			*hints |= BJ_HINT_XREG;
1372 			*initmask |= invalid & BJ_INIT_XBIT;
1373 
1374 			if ((uint32_t)insns[i].k < memwords)
1375 				invalid &= ~BJ_INIT_MBIT(insns[i].k);
1376 
1377 			continue;
1378 
1379 		case BPF_ALU:
1380 			*initmask |= invalid & BJ_INIT_ABIT;
1381 
1382 			if (insns[i].code != (BPF_ALU|BPF_NEG) &&
1383 			    BPF_SRC(insns[i].code) == BPF_X) {
1384 				*hints |= BJ_HINT_XREG;
1385 				*initmask |= invalid & BJ_INIT_XBIT;
1386 			}
1387 
1388 			invalid &= ~BJ_INIT_ABIT;
1389 			continue;
1390 
1391 		case BPF_MISC:
1392 			switch (BPF_MISCOP(insns[i].code)) {
1393 			case BPF_TAX: // X <- A
1394 				*hints |= BJ_HINT_XREG;
1395 				*initmask |= invalid & BJ_INIT_ABIT;
1396 				invalid &= ~BJ_INIT_XBIT;
1397 				continue;
1398 
1399 			case BPF_TXA: // A <- X
1400 				*hints |= BJ_HINT_XREG;
1401 				*initmask |= invalid & BJ_INIT_XBIT;
1402 				invalid &= ~BJ_INIT_ABIT;
1403 				continue;
1404 
1405 			case BPF_COPX:
1406 				*hints |= BJ_HINT_XREG | BJ_HINT_COPX;
1407 				/* FALLTHROUGH */
1408 
1409 			case BPF_COP:
1410 				*hints |= BJ_HINT_COP;
1411 				*initmask |= invalid & BJ_INIT_ABIT;
1412 				invalid &= ~BJ_INIT_ABIT;
1413 				continue;
1414 			}
1415 
1416 			continue;
1417 
1418 		case BPF_JMP:
1419 			/* Initialize abc_length for ABC pass. */
1420 			insn_dat[i].u.jdata.abc_length = MAX_ABC_LENGTH;
1421 
1422 			*initmask |= invalid & BJ_INIT_ABIT;
1423 
1424 			if (BPF_SRC(insns[i].code) == BPF_X) {
1425 				*hints |= BJ_HINT_XREG;
1426 				*initmask |= invalid & BJ_INIT_XBIT;
1427 			}
1428 
1429 			if (BPF_OP(insns[i].code) == BPF_JA) {
1430 				jt = jf = insns[i].k;
1431 			} else {
1432 				jt = insns[i].jt;
1433 				jf = insns[i].jf;
1434 			}
1435 
1436 			if (jt >= insn_count - (i + 1) ||
1437 			    jf >= insn_count - (i + 1)) {
1438 				return false;
1439 			}
1440 
1441 			if (jt > 0 && jf > 0)
1442 				unreachable = true;
1443 
1444 			jt += i + 1;
1445 			jf += i + 1;
1446 
1447 			jtf = insn_dat[i].u.jdata.jtf;
1448 
1449 			jtf[0].jdata = &insn_dat[i].u.jdata;
1450 			SLIST_INSERT_HEAD(&insn_dat[jt].bjumps,
1451 			    &jtf[0], entries);
1452 
1453 			if (jf != jt) {
1454 				jtf[1].jdata = &insn_dat[i].u.jdata;
1455 				SLIST_INSERT_HEAD(&insn_dat[jf].bjumps,
1456 				    &jtf[1], entries);
1457 			}
1458 
1459 			insn_dat[jf].invalid |= invalid;
1460 			insn_dat[jt].invalid |= invalid;
1461 			invalid = 0;
1462 
1463 			continue;
1464 		}
1465 	}
1466 
1467 	return true;
1468 }
1469 
1470 /*
1471  * Array Bounds Check Elimination (ABC) pass.
1472  */
1473 static void
1474 optimize_pass2(const bpf_ctx_t *bc, const struct bpf_insn *insns,
1475     struct bpfjit_insn_data *insn_dat, size_t insn_count)
1476 {
1477 	struct bpfjit_jump *jmp;
1478 	const struct bpf_insn *pc;
1479 	struct bpfjit_insn_data *pd;
1480 	size_t i;
1481 	bpfjit_abc_length_t length, abc_length = 0;
1482 
1483 	const size_t extwords = GET_EXTWORDS(bc);
1484 
1485 	for (i = insn_count; i != 0; i--) {
1486 		pc = &insns[i-1];
1487 		pd = &insn_dat[i-1];
1488 
1489 		if (pd->unreachable)
1490 			continue;
1491 
1492 		switch (BPF_CLASS(pc->code)) {
1493 		case BPF_RET:
1494 			/*
1495 			 * It's quite common for bpf programs to
1496 			 * check packet bytes in increasing order
1497 			 * and return zero if bytes don't match
1498 			 * specified critetion. Such programs disable
1499 			 * ABC optimization completely because for
1500 			 * every jump there is a branch with no read
1501 			 * instruction.
1502 			 * With no side effects, BPF_STMT(BPF_RET+BPF_K, 0)
1503 			 * is indistinguishable from out-of-bound load.
1504 			 * Therefore, abc_length can be set to
1505 			 * MAX_ABC_LENGTH and enable ABC for many
1506 			 * bpf programs.
1507 			 * If this optimization encounters any
1508 			 * instruction with a side effect, it will
1509 			 * reset abc_length.
1510 			 */
1511 			if (BPF_RVAL(pc->code) == BPF_K && pc->k == 0)
1512 				abc_length = MAX_ABC_LENGTH;
1513 			else
1514 				abc_length = 0;
1515 			break;
1516 
1517 		case BPF_MISC:
1518 			if (BPF_MISCOP(pc->code) == BPF_COP ||
1519 			    BPF_MISCOP(pc->code) == BPF_COPX) {
1520 				/* COP instructions can have side effects. */
1521 				abc_length = 0;
1522 			}
1523 			break;
1524 
1525 		case BPF_ST:
1526 		case BPF_STX:
1527 			if (extwords != 0) {
1528 				/* Write to memory is visible after a call. */
1529 				abc_length = 0;
1530 			}
1531 			break;
1532 
1533 		case BPF_JMP:
1534 			abc_length = pd->u.jdata.abc_length;
1535 			break;
1536 
1537 		default:
1538 			if (read_pkt_insn(pc, &length)) {
1539 				if (abc_length < length)
1540 					abc_length = length;
1541 				pd->u.rdata.abc_length = abc_length;
1542 			}
1543 			break;
1544 		}
1545 
1546 		SLIST_FOREACH(jmp, &pd->bjumps, entries) {
1547 			if (jmp->jdata->abc_length > abc_length)
1548 				jmp->jdata->abc_length = abc_length;
1549 		}
1550 	}
1551 }
1552 
1553 static void
1554 optimize_pass3(const struct bpf_insn *insns,
1555     struct bpfjit_insn_data *insn_dat, size_t insn_count)
1556 {
1557 	struct bpfjit_jump *jmp;
1558 	size_t i;
1559 	bpfjit_abc_length_t checked_length = 0;
1560 
1561 	for (i = 0; i < insn_count; i++) {
1562 		if (insn_dat[i].unreachable)
1563 			continue;
1564 
1565 		SLIST_FOREACH(jmp, &insn_dat[i].bjumps, entries) {
1566 			if (jmp->jdata->checked_length < checked_length)
1567 				checked_length = jmp->jdata->checked_length;
1568 		}
1569 
1570 		if (BPF_CLASS(insns[i].code) == BPF_JMP) {
1571 			insn_dat[i].u.jdata.checked_length = checked_length;
1572 		} else if (read_pkt_insn(&insns[i], NULL)) {
1573 			struct bpfjit_read_pkt_data *rdata =
1574 			    &insn_dat[i].u.rdata;
1575 			rdata->check_length = 0;
1576 			if (checked_length < rdata->abc_length) {
1577 				checked_length = rdata->abc_length;
1578 				rdata->check_length = checked_length;
1579 			}
1580 		}
1581 	}
1582 }
1583 
1584 static bool
1585 optimize(const bpf_ctx_t *bc, const struct bpf_insn *insns,
1586     struct bpfjit_insn_data *insn_dat, size_t insn_count,
1587     bpf_memword_init_t *initmask, bpfjit_hint_t *hints)
1588 {
1589 
1590 	optimize_init(insn_dat, insn_count);
1591 
1592 	if (!optimize_pass1(bc, insns, insn_dat, insn_count, initmask, hints))
1593 		return false;
1594 
1595 	optimize_pass2(bc, insns, insn_dat, insn_count);
1596 	optimize_pass3(insns, insn_dat, insn_count);
1597 
1598 	return true;
1599 }
1600 
1601 /*
1602  * Convert BPF_ALU operations except BPF_NEG and BPF_DIV to sljit operation.
1603  */
1604 static int
1605 bpf_alu_to_sljit_op(const struct bpf_insn *pc)
1606 {
1607 	const int bad = SLJIT_UNUSED;
1608 	const uint32_t k = pc->k;
1609 
1610 	/*
1611 	 * Note: all supported 64bit arches have 32bit multiply
1612 	 * instruction so SLJIT_INT_OP doesn't have any overhead.
1613 	 */
1614 	switch (BPF_OP(pc->code)) {
1615 	case BPF_ADD: return SLJIT_ADD;
1616 	case BPF_SUB: return SLJIT_SUB;
1617 	case BPF_MUL: return SLJIT_MUL|SLJIT_INT_OP;
1618 	case BPF_OR:  return SLJIT_OR;
1619 	case BPF_XOR: return SLJIT_XOR;
1620 	case BPF_AND: return SLJIT_AND;
1621 	case BPF_LSH: return (k > 31) ? bad : SLJIT_SHL;
1622 	case BPF_RSH: return (k > 31) ? bad : SLJIT_LSHR|SLJIT_INT_OP;
1623 	default:
1624 		return bad;
1625 	}
1626 }
1627 
1628 /*
1629  * Convert BPF_JMP operations except BPF_JA to sljit condition.
1630  */
1631 static int
1632 bpf_jmp_to_sljit_cond(const struct bpf_insn *pc, bool negate)
1633 {
1634 	/*
1635 	 * Note: all supported 64bit arches have 32bit comparison
1636 	 * instructions so SLJIT_INT_OP doesn't have any overhead.
1637 	 */
1638 	int rv = SLJIT_INT_OP;
1639 
1640 	switch (BPF_OP(pc->code)) {
1641 	case BPF_JGT:
1642 		rv |= negate ? SLJIT_C_LESS_EQUAL : SLJIT_C_GREATER;
1643 		break;
1644 	case BPF_JGE:
1645 		rv |= negate ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL;
1646 		break;
1647 	case BPF_JEQ:
1648 		rv |= negate ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL;
1649 		break;
1650 	case BPF_JSET:
1651 		rv |= negate ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL;
1652 		break;
1653 	default:
1654 		BJ_ASSERT(false);
1655 	}
1656 
1657 	return rv;
1658 }
1659 
1660 /*
1661  * Convert BPF_K and BPF_X to sljit register.
1662  */
1663 static int
1664 kx_to_reg(const struct bpf_insn *pc)
1665 {
1666 
1667 	switch (BPF_SRC(pc->code)) {
1668 	case BPF_K: return SLJIT_IMM;
1669 	case BPF_X: return BJ_XREG;
1670 	default:
1671 		BJ_ASSERT(false);
1672 		return 0;
1673 	}
1674 }
1675 
1676 static sljit_sw
1677 kx_to_reg_arg(const struct bpf_insn *pc)
1678 {
1679 
1680 	switch (BPF_SRC(pc->code)) {
1681 	case BPF_K: return (uint32_t)pc->k; /* SLJIT_IMM, pc->k, */
1682 	case BPF_X: return 0;               /* BJ_XREG, 0,      */
1683 	default:
1684 		BJ_ASSERT(false);
1685 		return 0;
1686 	}
1687 }
1688 
1689 static bool
1690 generate_insn_code(struct sljit_compiler *compiler, bpfjit_hint_t hints,
1691     const bpf_ctx_t *bc, const struct bpf_insn *insns,
1692     struct bpfjit_insn_data *insn_dat, size_t insn_count)
1693 {
1694 	/* a list of jumps to out-of-bound return from a generated function */
1695 	struct sljit_jump **ret0;
1696 	size_t ret0_size, ret0_maxsize;
1697 
1698 	struct sljit_jump *jump;
1699 	struct sljit_label *label;
1700 	const struct bpf_insn *pc;
1701 	struct bpfjit_jump *bjump, *jtf;
1702 	struct sljit_jump *to_mchain_jump;
1703 
1704 	size_t i;
1705 	int status;
1706 	int branching, negate;
1707 	unsigned int rval, mode, src, op;
1708 	uint32_t jt, jf;
1709 
1710 	bool unconditional_ret;
1711 	bool rv;
1712 
1713 	const size_t extwords = GET_EXTWORDS(bc);
1714 	const size_t memwords = GET_MEMWORDS(bc);
1715 
1716 	ret0 = NULL;
1717 	rv = false;
1718 
1719 	ret0_size = 0;
1720 	ret0_maxsize = 64;
1721 	ret0 = BJ_ALLOC(ret0_maxsize * sizeof(ret0[0]));
1722 	if (ret0 == NULL)
1723 		goto fail;
1724 
1725 	/* reset sjump members of jdata */
1726 	for (i = 0; i < insn_count; i++) {
1727 		if (insn_dat[i].unreachable ||
1728 		    BPF_CLASS(insns[i].code) != BPF_JMP) {
1729 			continue;
1730 		}
1731 
1732 		jtf = insn_dat[i].u.jdata.jtf;
1733 		jtf[0].sjump = jtf[1].sjump = NULL;
1734 	}
1735 
1736 	/* main loop */
1737 	for (i = 0; i < insn_count; i++) {
1738 		if (insn_dat[i].unreachable)
1739 			continue;
1740 
1741 		/*
1742 		 * Resolve jumps to the current insn.
1743 		 */
1744 		label = NULL;
1745 		SLIST_FOREACH(bjump, &insn_dat[i].bjumps, entries) {
1746 			if (bjump->sjump != NULL) {
1747 				if (label == NULL)
1748 					label = sljit_emit_label(compiler);
1749 				if (label == NULL)
1750 					goto fail;
1751 				sljit_set_label(bjump->sjump, label);
1752 			}
1753 		}
1754 
1755 		to_mchain_jump = NULL;
1756 		unconditional_ret = false;
1757 
1758 		if (read_pkt_insn(&insns[i], NULL)) {
1759 			if (insn_dat[i].u.rdata.check_length > UINT32_MAX) {
1760 				/* Jump to "return 0" unconditionally. */
1761 				unconditional_ret = true;
1762 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1763 				if (jump == NULL)
1764 					goto fail;
1765 				if (!append_jump(jump, &ret0,
1766 				    &ret0_size, &ret0_maxsize))
1767 					goto fail;
1768 			} else if (insn_dat[i].u.rdata.check_length > 0) {
1769 				/* if (buflen < check_length) return 0; */
1770 				jump = sljit_emit_cmp(compiler,
1771 				    SLJIT_C_LESS,
1772 				    BJ_BUFLEN, 0,
1773 				    SLJIT_IMM,
1774 				    insn_dat[i].u.rdata.check_length);
1775 				if (jump == NULL)
1776 					goto fail;
1777 #ifdef _KERNEL
1778 				to_mchain_jump = jump;
1779 #else
1780 				if (!append_jump(jump, &ret0,
1781 				    &ret0_size, &ret0_maxsize))
1782 					goto fail;
1783 #endif
1784 			}
1785 		}
1786 
1787 		pc = &insns[i];
1788 		switch (BPF_CLASS(pc->code)) {
1789 
1790 		default:
1791 			goto fail;
1792 
1793 		case BPF_LD:
1794 			/* BPF_LD+BPF_IMM          A <- k */
1795 			if (pc->code == (BPF_LD|BPF_IMM)) {
1796 				status = sljit_emit_op1(compiler,
1797 				    SLJIT_MOV,
1798 				    BJ_AREG, 0,
1799 				    SLJIT_IMM, (uint32_t)pc->k);
1800 				if (status != SLJIT_SUCCESS)
1801 					goto fail;
1802 
1803 				continue;
1804 			}
1805 
1806 			/* BPF_LD+BPF_MEM          A <- M[k] */
1807 			if (pc->code == (BPF_LD|BPF_MEM)) {
1808 				if ((uint32_t)pc->k >= memwords)
1809 					goto fail;
1810 				status = emit_memload(compiler,
1811 				    BJ_AREG, pc->k, extwords);
1812 				if (status != SLJIT_SUCCESS)
1813 					goto fail;
1814 
1815 				continue;
1816 			}
1817 
1818 			/* BPF_LD+BPF_W+BPF_LEN    A <- len */
1819 			if (pc->code == (BPF_LD|BPF_W|BPF_LEN)) {
1820 				status = sljit_emit_op1(compiler,
1821 				    SLJIT_MOV, /* size_t source */
1822 				    BJ_AREG, 0,
1823 				    SLJIT_MEM1(BJ_ARGS),
1824 				    offsetof(struct bpf_args, wirelen));
1825 				if (status != SLJIT_SUCCESS)
1826 					goto fail;
1827 
1828 				continue;
1829 			}
1830 
1831 			mode = BPF_MODE(pc->code);
1832 			if (mode != BPF_ABS && mode != BPF_IND)
1833 				goto fail;
1834 
1835 			if (unconditional_ret)
1836 				continue;
1837 
1838 			status = emit_pkt_read(compiler, hints, pc,
1839 			    to_mchain_jump, &ret0, &ret0_size, &ret0_maxsize);
1840 			if (status != SLJIT_SUCCESS)
1841 				goto fail;
1842 
1843 			continue;
1844 
1845 		case BPF_LDX:
1846 			mode = BPF_MODE(pc->code);
1847 
1848 			/* BPF_LDX+BPF_W+BPF_IMM    X <- k */
1849 			if (mode == BPF_IMM) {
1850 				if (BPF_SIZE(pc->code) != BPF_W)
1851 					goto fail;
1852 				status = sljit_emit_op1(compiler,
1853 				    SLJIT_MOV,
1854 				    BJ_XREG, 0,
1855 				    SLJIT_IMM, (uint32_t)pc->k);
1856 				if (status != SLJIT_SUCCESS)
1857 					goto fail;
1858 
1859 				continue;
1860 			}
1861 
1862 			/* BPF_LDX+BPF_W+BPF_LEN    X <- len */
1863 			if (mode == BPF_LEN) {
1864 				if (BPF_SIZE(pc->code) != BPF_W)
1865 					goto fail;
1866 				status = sljit_emit_op1(compiler,
1867 				    SLJIT_MOV, /* size_t source */
1868 				    BJ_XREG, 0,
1869 				    SLJIT_MEM1(BJ_ARGS),
1870 				    offsetof(struct bpf_args, wirelen));
1871 				if (status != SLJIT_SUCCESS)
1872 					goto fail;
1873 
1874 				continue;
1875 			}
1876 
1877 			/* BPF_LDX+BPF_W+BPF_MEM    X <- M[k] */
1878 			if (mode == BPF_MEM) {
1879 				if (BPF_SIZE(pc->code) != BPF_W)
1880 					goto fail;
1881 				if ((uint32_t)pc->k >= memwords)
1882 					goto fail;
1883 				status = emit_memload(compiler,
1884 				    BJ_XREG, pc->k, extwords);
1885 				if (status != SLJIT_SUCCESS)
1886 					goto fail;
1887 
1888 				continue;
1889 			}
1890 
1891 			/* BPF_LDX+BPF_B+BPF_MSH    X <- 4*(P[k:1]&0xf) */
1892 			if (mode != BPF_MSH || BPF_SIZE(pc->code) != BPF_B)
1893 				goto fail;
1894 
1895 			if (unconditional_ret)
1896 				continue;
1897 
1898 			status = emit_msh(compiler, hints, pc,
1899 			    to_mchain_jump, &ret0, &ret0_size, &ret0_maxsize);
1900 			if (status != SLJIT_SUCCESS)
1901 				goto fail;
1902 
1903 			continue;
1904 
1905 		case BPF_ST:
1906 			if (pc->code != BPF_ST ||
1907 			    (uint32_t)pc->k >= memwords) {
1908 				goto fail;
1909 			}
1910 
1911 			status = emit_memstore(compiler,
1912 			    BJ_AREG, pc->k, extwords);
1913 			if (status != SLJIT_SUCCESS)
1914 				goto fail;
1915 
1916 			continue;
1917 
1918 		case BPF_STX:
1919 			if (pc->code != BPF_STX ||
1920 			    (uint32_t)pc->k >= memwords) {
1921 				goto fail;
1922 			}
1923 
1924 			status = emit_memstore(compiler,
1925 			    BJ_XREG, pc->k, extwords);
1926 			if (status != SLJIT_SUCCESS)
1927 				goto fail;
1928 
1929 			continue;
1930 
1931 		case BPF_ALU:
1932 			if (pc->code == (BPF_ALU|BPF_NEG)) {
1933 				status = sljit_emit_op1(compiler,
1934 				    SLJIT_NEG,
1935 				    BJ_AREG, 0,
1936 				    BJ_AREG, 0);
1937 				if (status != SLJIT_SUCCESS)
1938 					goto fail;
1939 
1940 				continue;
1941 			}
1942 
1943 			op = BPF_OP(pc->code);
1944 			if (op != BPF_DIV && op != BPF_MOD) {
1945 				const int op2 = bpf_alu_to_sljit_op(pc);
1946 
1947 				if (op2 == SLJIT_UNUSED)
1948 					goto fail;
1949 				status = sljit_emit_op2(compiler,
1950 				    op2, BJ_AREG, 0, BJ_AREG, 0,
1951 				    kx_to_reg(pc), kx_to_reg_arg(pc));
1952 				if (status != SLJIT_SUCCESS)
1953 					goto fail;
1954 
1955 				continue;
1956 			}
1957 
1958 			/* BPF_DIV/BPF_MOD */
1959 
1960 			src = BPF_SRC(pc->code);
1961 			if (src != BPF_X && src != BPF_K)
1962 				goto fail;
1963 
1964 			/* division by zero? */
1965 			if (src == BPF_X) {
1966 				jump = sljit_emit_cmp(compiler,
1967 				    SLJIT_C_EQUAL|SLJIT_INT_OP,
1968 				    BJ_XREG, 0,
1969 				    SLJIT_IMM, 0);
1970 				if (jump == NULL)
1971 					goto fail;
1972 				if (!append_jump(jump, &ret0,
1973 				    &ret0_size, &ret0_maxsize))
1974 					goto fail;
1975 			} else if (pc->k == 0) {
1976 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1977 				if (jump == NULL)
1978 					goto fail;
1979 				if (!append_jump(jump, &ret0,
1980 				    &ret0_size, &ret0_maxsize))
1981 					goto fail;
1982 			}
1983 
1984 			if (src == BPF_X) {
1985 				status = emit_moddiv(compiler, pc);
1986 				if (status != SLJIT_SUCCESS)
1987 					goto fail;
1988 			} else if (pc->k != 0) {
1989 				if (pc->k & (pc->k - 1)) {
1990 					status = emit_moddiv(compiler, pc);
1991 				} else {
1992 					status = emit_pow2_moddiv(compiler, pc);
1993 				}
1994 				if (status != SLJIT_SUCCESS)
1995 					goto fail;
1996 			}
1997 
1998 			continue;
1999 
2000 		case BPF_JMP:
2001 			op = BPF_OP(pc->code);
2002 			if (op == BPF_JA) {
2003 				jt = jf = pc->k;
2004 			} else {
2005 				jt = pc->jt;
2006 				jf = pc->jf;
2007 			}
2008 
2009 			negate = (jt == 0) ? 1 : 0;
2010 			branching = (jt == jf) ? 0 : 1;
2011 			jtf = insn_dat[i].u.jdata.jtf;
2012 
2013 			if (branching) {
2014 				if (op != BPF_JSET) {
2015 					jump = sljit_emit_cmp(compiler,
2016 					    bpf_jmp_to_sljit_cond(pc, negate),
2017 					    BJ_AREG, 0,
2018 					    kx_to_reg(pc), kx_to_reg_arg(pc));
2019 				} else {
2020 					status = sljit_emit_op2(compiler,
2021 					    SLJIT_AND,
2022 					    BJ_TMP1REG, 0,
2023 					    BJ_AREG, 0,
2024 					    kx_to_reg(pc), kx_to_reg_arg(pc));
2025 					if (status != SLJIT_SUCCESS)
2026 						goto fail;
2027 
2028 					jump = sljit_emit_cmp(compiler,
2029 					    bpf_jmp_to_sljit_cond(pc, negate),
2030 					    BJ_TMP1REG, 0,
2031 					    SLJIT_IMM, 0);
2032 				}
2033 
2034 				if (jump == NULL)
2035 					goto fail;
2036 
2037 				BJ_ASSERT(jtf[negate].sjump == NULL);
2038 				jtf[negate].sjump = jump;
2039 			}
2040 
2041 			if (!branching || (jt != 0 && jf != 0)) {
2042 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
2043 				if (jump == NULL)
2044 					goto fail;
2045 
2046 				BJ_ASSERT(jtf[branching].sjump == NULL);
2047 				jtf[branching].sjump = jump;
2048 			}
2049 
2050 			continue;
2051 
2052 		case BPF_RET:
2053 			rval = BPF_RVAL(pc->code);
2054 			if (rval == BPF_X)
2055 				goto fail;
2056 
2057 			/* BPF_RET+BPF_K    accept k bytes */
2058 			if (rval == BPF_K) {
2059 				status = sljit_emit_return(compiler,
2060 				    SLJIT_MOV_UI,
2061 				    SLJIT_IMM, (uint32_t)pc->k);
2062 				if (status != SLJIT_SUCCESS)
2063 					goto fail;
2064 			}
2065 
2066 			/* BPF_RET+BPF_A    accept A bytes */
2067 			if (rval == BPF_A) {
2068 				status = sljit_emit_return(compiler,
2069 				    SLJIT_MOV_UI,
2070 				    BJ_AREG, 0);
2071 				if (status != SLJIT_SUCCESS)
2072 					goto fail;
2073 			}
2074 
2075 			continue;
2076 
2077 		case BPF_MISC:
2078 			switch (BPF_MISCOP(pc->code)) {
2079 			case BPF_TAX:
2080 				status = sljit_emit_op1(compiler,
2081 				    SLJIT_MOV_UI,
2082 				    BJ_XREG, 0,
2083 				    BJ_AREG, 0);
2084 				if (status != SLJIT_SUCCESS)
2085 					goto fail;
2086 
2087 				continue;
2088 
2089 			case BPF_TXA:
2090 				status = sljit_emit_op1(compiler,
2091 				    SLJIT_MOV,
2092 				    BJ_AREG, 0,
2093 				    BJ_XREG, 0);
2094 				if (status != SLJIT_SUCCESS)
2095 					goto fail;
2096 
2097 				continue;
2098 
2099 			case BPF_COP:
2100 			case BPF_COPX:
2101 				if (bc == NULL || bc->copfuncs == NULL)
2102 					goto fail;
2103 				if (BPF_MISCOP(pc->code) == BPF_COP &&
2104 				    (uint32_t)pc->k >= bc->nfuncs) {
2105 					goto fail;
2106 				}
2107 
2108 				status = emit_cop(compiler, hints, bc, pc,
2109 				    &ret0, &ret0_size, &ret0_maxsize);
2110 				if (status != SLJIT_SUCCESS)
2111 					goto fail;
2112 
2113 				continue;
2114 			}
2115 
2116 			goto fail;
2117 		} /* switch */
2118 	} /* main loop */
2119 
2120 	BJ_ASSERT(ret0_size <= ret0_maxsize);
2121 
2122 	if (ret0_size > 0) {
2123 		label = sljit_emit_label(compiler);
2124 		if (label == NULL)
2125 			goto fail;
2126 		for (i = 0; i < ret0_size; i++)
2127 			sljit_set_label(ret0[i], label);
2128 	}
2129 
2130 	status = sljit_emit_return(compiler,
2131 	    SLJIT_MOV_UI,
2132 	    SLJIT_IMM, 0);
2133 	if (status != SLJIT_SUCCESS)
2134 		goto fail;
2135 
2136 	rv = true;
2137 
2138 fail:
2139 	if (ret0 != NULL)
2140 		BJ_FREE(ret0, ret0_maxsize * sizeof(ret0[0]));
2141 
2142 	return rv;
2143 }
2144 
2145 bpfjit_func_t
2146 bpfjit_generate_code(const bpf_ctx_t *bc,
2147     const struct bpf_insn *insns, size_t insn_count)
2148 {
2149 	void *rv;
2150 	struct sljit_compiler *compiler;
2151 
2152 	size_t i;
2153 	int status;
2154 
2155 	/* optimization related */
2156 	bpf_memword_init_t initmask;
2157 	bpfjit_hint_t hints;
2158 
2159 	/* memory store location for initial zero initialization */
2160 	sljit_si mem_reg;
2161 	sljit_sw mem_off;
2162 
2163 	struct bpfjit_insn_data *insn_dat;
2164 
2165 	const size_t extwords = GET_EXTWORDS(bc);
2166 	const size_t memwords = GET_MEMWORDS(bc);
2167 	const bpf_memword_init_t preinited = extwords ? bc->preinited : 0;
2168 
2169 	rv = NULL;
2170 	compiler = NULL;
2171 	insn_dat = NULL;
2172 
2173 	if (memwords > MAX_MEMWORDS)
2174 		goto fail;
2175 
2176 	if (insn_count == 0 || insn_count > SIZE_MAX / sizeof(insn_dat[0]))
2177 		goto fail;
2178 
2179 	insn_dat = BJ_ALLOC(insn_count * sizeof(insn_dat[0]));
2180 	if (insn_dat == NULL)
2181 		goto fail;
2182 
2183 	if (!optimize(bc, insns, insn_dat, insn_count, &initmask, &hints))
2184 		goto fail;
2185 
2186 	compiler = sljit_create_compiler();
2187 	if (compiler == NULL)
2188 		goto fail;
2189 
2190 #if !defined(_KERNEL) && defined(SLJIT_VERBOSE) && SLJIT_VERBOSE
2191 	sljit_compiler_verbose(compiler, stderr);
2192 #endif
2193 
2194 	status = sljit_emit_enter(compiler,
2195 	    2, nscratches(hints), nsaveds(hints), sizeof(struct bpfjit_stack));
2196 	if (status != SLJIT_SUCCESS)
2197 		goto fail;
2198 
2199 	if (hints & BJ_HINT_COP) {
2200 		/* save ctx argument */
2201 		status = sljit_emit_op1(compiler,
2202 		    SLJIT_MOV_P,
2203 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
2204 		    offsetof(struct bpfjit_stack, ctx),
2205 		    BJ_CTX_ARG, 0);
2206 		if (status != SLJIT_SUCCESS)
2207 			goto fail;
2208 	}
2209 
2210 	if (extwords == 0) {
2211 		mem_reg = SLJIT_MEM1(SLJIT_LOCALS_REG);
2212 		mem_off = offsetof(struct bpfjit_stack, mem);
2213 	} else {
2214 		/* copy "mem" argument from bpf_args to bpfjit_stack */
2215 		status = sljit_emit_op1(compiler,
2216 		    SLJIT_MOV_P,
2217 		    BJ_TMP1REG, 0,
2218 		    SLJIT_MEM1(BJ_ARGS), offsetof(struct bpf_args, mem));
2219 		if (status != SLJIT_SUCCESS)
2220 			goto fail;
2221 
2222 		status = sljit_emit_op1(compiler,
2223 		    SLJIT_MOV_P,
2224 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
2225 		    offsetof(struct bpfjit_stack, extmem),
2226 		    BJ_TMP1REG, 0);
2227 		if (status != SLJIT_SUCCESS)
2228 			goto fail;
2229 
2230 		mem_reg = SLJIT_MEM1(BJ_TMP1REG);
2231 		mem_off = 0;
2232 	}
2233 
2234 	/*
2235 	 * Exclude pre-initialised external memory words but keep
2236 	 * initialization statuses of A and X registers in case
2237 	 * bc->preinited wrongly sets those two bits.
2238 	 */
2239 	initmask &= ~preinited | BJ_INIT_ABIT | BJ_INIT_XBIT;
2240 
2241 #if defined(_KERNEL)
2242 	/* bpf_filter() checks initialization of memwords. */
2243 	BJ_ASSERT((initmask & (BJ_INIT_MBIT(memwords) - 1)) == 0);
2244 #endif
2245 	for (i = 0; i < memwords; i++) {
2246 		if (initmask & BJ_INIT_MBIT(i)) {
2247 			/* M[i] = 0; */
2248 			status = sljit_emit_op1(compiler,
2249 			    SLJIT_MOV_UI,
2250 			    mem_reg, mem_off + i * sizeof(uint32_t),
2251 			    SLJIT_IMM, 0);
2252 			if (status != SLJIT_SUCCESS)
2253 				goto fail;
2254 		}
2255 	}
2256 
2257 	if (initmask & BJ_INIT_ABIT) {
2258 		/* A = 0; */
2259 		status = sljit_emit_op1(compiler,
2260 		    SLJIT_MOV,
2261 		    BJ_AREG, 0,
2262 		    SLJIT_IMM, 0);
2263 		if (status != SLJIT_SUCCESS)
2264 			goto fail;
2265 	}
2266 
2267 	if (initmask & BJ_INIT_XBIT) {
2268 		/* X = 0; */
2269 		status = sljit_emit_op1(compiler,
2270 		    SLJIT_MOV,
2271 		    BJ_XREG, 0,
2272 		    SLJIT_IMM, 0);
2273 		if (status != SLJIT_SUCCESS)
2274 			goto fail;
2275 	}
2276 
2277 	status = load_buf_buflen(compiler);
2278 	if (status != SLJIT_SUCCESS)
2279 		goto fail;
2280 
2281 	if (!generate_insn_code(compiler, hints,
2282 	    bc, insns, insn_dat, insn_count)) {
2283 		goto fail;
2284 	}
2285 
2286 	rv = sljit_generate_code(compiler);
2287 
2288 fail:
2289 	if (compiler != NULL)
2290 		sljit_free_compiler(compiler);
2291 
2292 	if (insn_dat != NULL)
2293 		BJ_FREE(insn_dat, insn_count * sizeof(insn_dat[0]));
2294 
2295 	return (bpfjit_func_t)rv;
2296 }
2297 
2298 void
2299 bpfjit_free_code(bpfjit_func_t code)
2300 {
2301 
2302 	sljit_free_code((void *)code);
2303 }
2304