xref: /netbsd-src/sys/net/bpfjit.c (revision f14316bcbc544b96a93e884bc5c2b15fd60e22ae)
1 /*	$NetBSD: bpfjit.c,v 1.32 2014/07/26 11:23:46 alnsn Exp $	*/
2 
3 /*-
4  * Copyright (c) 2011-2014 Alexander Nasonov.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
22  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #ifdef _KERNEL
34 __KERNEL_RCSID(0, "$NetBSD: bpfjit.c,v 1.32 2014/07/26 11:23:46 alnsn Exp $");
35 #else
36 __RCSID("$NetBSD: bpfjit.c,v 1.32 2014/07/26 11:23:46 alnsn Exp $");
37 #endif
38 
39 #include <sys/types.h>
40 #include <sys/queue.h>
41 
42 #ifndef _KERNEL
43 #include <assert.h>
44 #define BJ_ASSERT(c) assert(c)
45 #else
46 #define BJ_ASSERT(c) KASSERT(c)
47 #endif
48 
49 #ifndef _KERNEL
50 #include <stdlib.h>
51 #define BJ_ALLOC(sz) malloc(sz)
52 #define BJ_FREE(p, sz) free(p)
53 #else
54 #include <sys/kmem.h>
55 #define BJ_ALLOC(sz) kmem_alloc(sz, KM_SLEEP)
56 #define BJ_FREE(p, sz) kmem_free(p, sz)
57 #endif
58 
59 #ifndef _KERNEL
60 #include <limits.h>
61 #include <stdbool.h>
62 #include <stddef.h>
63 #include <stdint.h>
64 #else
65 #include <sys/atomic.h>
66 #include <sys/module.h>
67 #endif
68 
69 #define	__BPF_PRIVATE
70 #include <net/bpf.h>
71 #include <net/bpfjit.h>
72 #include <sljitLir.h>
73 
74 #if !defined(_KERNEL) && defined(SLJIT_VERBOSE) && SLJIT_VERBOSE
75 #include <stdio.h> /* for stderr */
76 #endif
77 
78 /*
79  * Arguments of generated bpfjit_func_t.
80  * The first argument is reassigned upon entry
81  * to a more frequently used buf argument.
82  */
83 #define BJ_CTX_ARG	SLJIT_SAVED_REG1
84 #define BJ_ARGS		SLJIT_SAVED_REG2
85 
86 /*
87  * Permanent register assignments.
88  */
89 #define BJ_BUF		SLJIT_SAVED_REG1
90 //#define BJ_ARGS	SLJIT_SAVED_REG2
91 #define BJ_BUFLEN	SLJIT_SAVED_REG3
92 #define BJ_AREG		SLJIT_SCRATCH_REG1
93 #define BJ_TMP1REG	SLJIT_SCRATCH_REG2
94 #define BJ_TMP2REG	SLJIT_SCRATCH_REG3
95 #define BJ_XREG		SLJIT_TEMPORARY_EREG1
96 #define BJ_TMP3REG	SLJIT_TEMPORARY_EREG2
97 
98 #ifdef _KERNEL
99 #define MAX_MEMWORDS BPF_MAX_MEMWORDS
100 #else
101 #define MAX_MEMWORDS BPF_MEMWORDS
102 #endif
103 
104 #define BJ_INIT_NOBITS  ((bpf_memword_init_t)0)
105 #define BJ_INIT_MBIT(k) BPF_MEMWORD_INIT(k)
106 #define BJ_INIT_ABIT    BJ_INIT_MBIT(MAX_MEMWORDS)
107 #define BJ_INIT_XBIT    BJ_INIT_MBIT(MAX_MEMWORDS + 1)
108 
109 /*
110  * Get a number of memwords and external memwords from a bpf_ctx object.
111  */
112 #define GET_EXTWORDS(bc) ((bc) ? (bc)->extwords : 0)
113 #define GET_MEMWORDS(bc) (GET_EXTWORDS(bc) ? GET_EXTWORDS(bc) : BPF_MEMWORDS)
114 
115 /*
116  * Optimization hints.
117  */
118 typedef unsigned int bpfjit_hint_t;
119 #define BJ_HINT_ABS  0x01 /* packet read at absolute offset   */
120 #define BJ_HINT_IND  0x02 /* packet read at variable offset   */
121 #define BJ_HINT_MSH  0x04 /* BPF_MSH instruction              */
122 #define BJ_HINT_COP  0x08 /* BPF_COP or BPF_COPX instruction  */
123 #define BJ_HINT_COPX 0x10 /* BPF_COPX instruction             */
124 #define BJ_HINT_XREG 0x20 /* BJ_XREG is needed                */
125 #define BJ_HINT_LDX  0x40 /* BPF_LDX instruction              */
126 #define BJ_HINT_PKT  (BJ_HINT_ABS|BJ_HINT_IND|BJ_HINT_MSH)
127 
128 /*
129  * Datatype for Array Bounds Check Elimination (ABC) pass.
130  */
131 typedef uint64_t bpfjit_abc_length_t;
132 #define MAX_ABC_LENGTH (UINT32_MAX + UINT64_C(4)) /* max. width is 4 */
133 
134 struct bpfjit_stack
135 {
136 	bpf_ctx_t *ctx;
137 	uint32_t *extmem; /* pointer to external memory store */
138 	uint32_t reg; /* saved A or X register */
139 #ifdef _KERNEL
140 	int err; /* 3rd argument for m_xword/m_xhalf/m_xbyte function call */
141 #endif
142 	uint32_t mem[BPF_MEMWORDS]; /* internal memory store */
143 };
144 
145 /*
146  * Data for BPF_JMP instruction.
147  * Forward declaration for struct bpfjit_jump.
148  */
149 struct bpfjit_jump_data;
150 
151 /*
152  * Node of bjumps list.
153  */
154 struct bpfjit_jump {
155 	struct sljit_jump *sjump;
156 	SLIST_ENTRY(bpfjit_jump) entries;
157 	struct bpfjit_jump_data *jdata;
158 };
159 
160 /*
161  * Data for BPF_JMP instruction.
162  */
163 struct bpfjit_jump_data {
164 	/*
165 	 * These entries make up bjumps list:
166 	 * jtf[0] - when coming from jt path,
167 	 * jtf[1] - when coming from jf path.
168 	 */
169 	struct bpfjit_jump jtf[2];
170 	/*
171 	 * Length calculated by Array Bounds Check Elimination (ABC) pass.
172 	 */
173 	bpfjit_abc_length_t abc_length;
174 	/*
175 	 * Length checked by the last out-of-bounds check.
176 	 */
177 	bpfjit_abc_length_t checked_length;
178 };
179 
180 /*
181  * Data for "read from packet" instructions.
182  * See also read_pkt_insn() function below.
183  */
184 struct bpfjit_read_pkt_data {
185 	/*
186 	 * Length calculated by Array Bounds Check Elimination (ABC) pass.
187 	 */
188 	bpfjit_abc_length_t abc_length;
189 	/*
190 	 * If positive, emit "if (buflen < check_length) return 0"
191 	 * out-of-bounds check.
192 	 * Values greater than UINT32_MAX generate unconditional "return 0".
193 	 */
194 	bpfjit_abc_length_t check_length;
195 };
196 
197 /*
198  * Additional (optimization-related) data for bpf_insn.
199  */
200 struct bpfjit_insn_data {
201 	/* List of jumps to this insn. */
202 	SLIST_HEAD(, bpfjit_jump) bjumps;
203 
204 	union {
205 		struct bpfjit_jump_data     jdata;
206 		struct bpfjit_read_pkt_data rdata;
207 	} u;
208 
209 	bpf_memword_init_t invalid;
210 	bool unreachable;
211 };
212 
213 #ifdef _KERNEL
214 
215 uint32_t m_xword(const struct mbuf *, uint32_t, int *);
216 uint32_t m_xhalf(const struct mbuf *, uint32_t, int *);
217 uint32_t m_xbyte(const struct mbuf *, uint32_t, int *);
218 
219 MODULE(MODULE_CLASS_MISC, bpfjit, "sljit")
220 
221 static int
222 bpfjit_modcmd(modcmd_t cmd, void *arg)
223 {
224 
225 	switch (cmd) {
226 	case MODULE_CMD_INIT:
227 		bpfjit_module_ops.bj_free_code = &bpfjit_free_code;
228 		membar_producer();
229 		bpfjit_module_ops.bj_generate_code = &bpfjit_generate_code;
230 		membar_producer();
231 		return 0;
232 
233 	case MODULE_CMD_FINI:
234 		return EOPNOTSUPP;
235 
236 	default:
237 		return ENOTTY;
238 	}
239 }
240 #endif
241 
242 /*
243  * Return a number of scratch registers to pass
244  * to sljit_emit_enter() function.
245  */
246 static sljit_si
247 nscratches(bpfjit_hint_t hints)
248 {
249 	sljit_si rv = 2;
250 
251 #ifdef _KERNEL
252 	if (hints & BJ_HINT_PKT)
253 		rv = 3; /* xcall with three arguments */
254 #endif
255 
256 	if (hints & BJ_HINT_IND)
257 		rv = 3; /* uses BJ_TMP2REG */
258 
259 	if (hints & BJ_HINT_COP)
260 		rv = 3; /* calls copfunc with three arguments */
261 
262 	if (hints & BJ_HINT_XREG)
263 		rv = 4; /* uses BJ_XREG */
264 
265 #ifdef _KERNEL
266 	if (hints & BJ_HINT_LDX)
267 		rv = 5; /* uses BJ_TMP3REG */
268 #endif
269 
270 	if (hints & BJ_HINT_COPX)
271 		rv = 5; /* uses BJ_TMP3REG */
272 
273 	return rv;
274 }
275 
276 /*
277  * Return a number of saved registers to pass
278  * to sljit_emit_enter() function.
279  */
280 static sljit_si
281 nsaveds(bpfjit_hint_t hints)
282 {
283 	sljit_si rv = 3;
284 
285 	return rv;
286 }
287 
288 static uint32_t
289 read_width(const struct bpf_insn *pc)
290 {
291 
292 	switch (BPF_SIZE(pc->code)) {
293 	case BPF_W:
294 		return 4;
295 	case BPF_H:
296 		return 2;
297 	case BPF_B:
298 		return 1;
299 	default:
300 		BJ_ASSERT(false);
301 		return 0;
302 	}
303 }
304 
305 /*
306  * Copy buf and buflen members of bpf_args from BJ_ARGS
307  * pointer to BJ_BUF and BJ_BUFLEN registers.
308  */
309 static int
310 load_buf_buflen(struct sljit_compiler *compiler)
311 {
312 	int status;
313 
314 	status = sljit_emit_op1(compiler,
315 	    SLJIT_MOV_P,
316 	    BJ_BUF, 0,
317 	    SLJIT_MEM1(BJ_ARGS),
318 	    offsetof(struct bpf_args, pkt));
319 	if (status != SLJIT_SUCCESS)
320 		return status;
321 
322 	status = sljit_emit_op1(compiler,
323 	    SLJIT_MOV, /* size_t source */
324 	    BJ_BUFLEN, 0,
325 	    SLJIT_MEM1(BJ_ARGS),
326 	    offsetof(struct bpf_args, buflen));
327 
328 	return status;
329 }
330 
331 static bool
332 grow_jumps(struct sljit_jump ***jumps, size_t *size)
333 {
334 	struct sljit_jump **newptr;
335 	const size_t elemsz = sizeof(struct sljit_jump *);
336 	size_t old_size = *size;
337 	size_t new_size = 2 * old_size;
338 
339 	if (new_size < old_size || new_size > SIZE_MAX / elemsz)
340 		return false;
341 
342 	newptr = BJ_ALLOC(new_size * elemsz);
343 	if (newptr == NULL)
344 		return false;
345 
346 	memcpy(newptr, *jumps, old_size * elemsz);
347 	BJ_FREE(*jumps, old_size * elemsz);
348 
349 	*jumps = newptr;
350 	*size = new_size;
351 	return true;
352 }
353 
354 static bool
355 append_jump(struct sljit_jump *jump, struct sljit_jump ***jumps,
356     size_t *size, size_t *max_size)
357 {
358 	if (*size == *max_size && !grow_jumps(jumps, max_size))
359 		return false;
360 
361 	(*jumps)[(*size)++] = jump;
362 	return true;
363 }
364 
365 /*
366  * Emit code for BPF_LD+BPF_B+BPF_ABS    A <- P[k:1].
367  */
368 static int
369 emit_read8(struct sljit_compiler *compiler, sljit_si src, uint32_t k)
370 {
371 
372 	return sljit_emit_op1(compiler,
373 	    SLJIT_MOV_UB,
374 	    BJ_AREG, 0,
375 	    SLJIT_MEM1(src), k);
376 }
377 
378 /*
379  * Emit code for BPF_LD+BPF_H+BPF_ABS    A <- P[k:2].
380  */
381 static int
382 emit_read16(struct sljit_compiler *compiler, sljit_si src, uint32_t k)
383 {
384 	int status;
385 
386 	BJ_ASSERT(k <= UINT32_MAX - 1);
387 
388 	/* A = buf[k]; */
389 	status = sljit_emit_op1(compiler,
390 	    SLJIT_MOV_UB,
391 	    BJ_AREG, 0,
392 	    SLJIT_MEM1(src), k);
393 	if (status != SLJIT_SUCCESS)
394 		return status;
395 
396 	/* tmp1 = buf[k+1]; */
397 	status = sljit_emit_op1(compiler,
398 	    SLJIT_MOV_UB,
399 	    BJ_TMP1REG, 0,
400 	    SLJIT_MEM1(src), k+1);
401 	if (status != SLJIT_SUCCESS)
402 		return status;
403 
404 	/* A = A << 8; */
405 	status = sljit_emit_op2(compiler,
406 	    SLJIT_SHL,
407 	    BJ_AREG, 0,
408 	    BJ_AREG, 0,
409 	    SLJIT_IMM, 8);
410 	if (status != SLJIT_SUCCESS)
411 		return status;
412 
413 	/* A = A + tmp1; */
414 	status = sljit_emit_op2(compiler,
415 	    SLJIT_ADD,
416 	    BJ_AREG, 0,
417 	    BJ_AREG, 0,
418 	    BJ_TMP1REG, 0);
419 	return status;
420 }
421 
422 /*
423  * Emit code for BPF_LD+BPF_W+BPF_ABS    A <- P[k:4].
424  */
425 static int
426 emit_read32(struct sljit_compiler *compiler, sljit_si src, uint32_t k)
427 {
428 	int status;
429 
430 	BJ_ASSERT(k <= UINT32_MAX - 3);
431 
432 	/* A = buf[k]; */
433 	status = sljit_emit_op1(compiler,
434 	    SLJIT_MOV_UB,
435 	    BJ_AREG, 0,
436 	    SLJIT_MEM1(src), k);
437 	if (status != SLJIT_SUCCESS)
438 		return status;
439 
440 	/* tmp1 = buf[k+1]; */
441 	status = sljit_emit_op1(compiler,
442 	    SLJIT_MOV_UB,
443 	    BJ_TMP1REG, 0,
444 	    SLJIT_MEM1(src), k+1);
445 	if (status != SLJIT_SUCCESS)
446 		return status;
447 
448 	/* A = A << 8; */
449 	status = sljit_emit_op2(compiler,
450 	    SLJIT_SHL,
451 	    BJ_AREG, 0,
452 	    BJ_AREG, 0,
453 	    SLJIT_IMM, 8);
454 	if (status != SLJIT_SUCCESS)
455 		return status;
456 
457 	/* A = A + tmp1; */
458 	status = sljit_emit_op2(compiler,
459 	    SLJIT_ADD,
460 	    BJ_AREG, 0,
461 	    BJ_AREG, 0,
462 	    BJ_TMP1REG, 0);
463 	if (status != SLJIT_SUCCESS)
464 		return status;
465 
466 	/* tmp1 = buf[k+2]; */
467 	status = sljit_emit_op1(compiler,
468 	    SLJIT_MOV_UB,
469 	    BJ_TMP1REG, 0,
470 	    SLJIT_MEM1(src), k+2);
471 	if (status != SLJIT_SUCCESS)
472 		return status;
473 
474 	/* A = A << 8; */
475 	status = sljit_emit_op2(compiler,
476 	    SLJIT_SHL,
477 	    BJ_AREG, 0,
478 	    BJ_AREG, 0,
479 	    SLJIT_IMM, 8);
480 	if (status != SLJIT_SUCCESS)
481 		return status;
482 
483 	/* A = A + tmp1; */
484 	status = sljit_emit_op2(compiler,
485 	    SLJIT_ADD,
486 	    BJ_AREG, 0,
487 	    BJ_AREG, 0,
488 	    BJ_TMP1REG, 0);
489 	if (status != SLJIT_SUCCESS)
490 		return status;
491 
492 	/* tmp1 = buf[k+3]; */
493 	status = sljit_emit_op1(compiler,
494 	    SLJIT_MOV_UB,
495 	    BJ_TMP1REG, 0,
496 	    SLJIT_MEM1(src), k+3);
497 	if (status != SLJIT_SUCCESS)
498 		return status;
499 
500 	/* A = A << 8; */
501 	status = sljit_emit_op2(compiler,
502 	    SLJIT_SHL,
503 	    BJ_AREG, 0,
504 	    BJ_AREG, 0,
505 	    SLJIT_IMM, 8);
506 	if (status != SLJIT_SUCCESS)
507 		return status;
508 
509 	/* A = A + tmp1; */
510 	status = sljit_emit_op2(compiler,
511 	    SLJIT_ADD,
512 	    BJ_AREG, 0,
513 	    BJ_AREG, 0,
514 	    BJ_TMP1REG, 0);
515 	return status;
516 }
517 
518 #ifdef _KERNEL
519 /*
520  * Emit code for m_xword/m_xhalf/m_xbyte call.
521  *
522  * @pc BPF_LD+BPF_W+BPF_ABS    A <- P[k:4]
523  *     BPF_LD+BPF_H+BPF_ABS    A <- P[k:2]
524  *     BPF_LD+BPF_B+BPF_ABS    A <- P[k:1]
525  *     BPF_LD+BPF_W+BPF_IND    A <- P[X+k:4]
526  *     BPF_LD+BPF_H+BPF_IND    A <- P[X+k:2]
527  *     BPF_LD+BPF_B+BPF_IND    A <- P[X+k:1]
528  *     BPF_LDX+BPF_B+BPF_MSH   X <- 4*(P[k:1]&0xf)
529  */
530 static int
531 emit_xcall(struct sljit_compiler *compiler, bpfjit_hint_t hints,
532     const struct bpf_insn *pc, int dst, struct sljit_jump ***ret0,
533     size_t *ret0_size, size_t *ret0_maxsize,
534     uint32_t (*fn)(const struct mbuf *, uint32_t, int *))
535 {
536 #if BJ_XREG == SLJIT_RETURN_REG   || \
537     BJ_XREG == SLJIT_SCRATCH_REG1 || \
538     BJ_XREG == SLJIT_SCRATCH_REG2 || \
539     BJ_XREG == SLJIT_SCRATCH_REG3
540 #error "Not supported assignment of registers."
541 #endif
542 	struct sljit_jump *jump;
543 	sljit_si save_reg;
544 	int status;
545 
546 	save_reg = (BPF_CLASS(pc->code) == BPF_LDX) ? BJ_AREG : BJ_XREG;
547 
548 	if (save_reg == BJ_AREG || (hints & BJ_HINT_XREG)) {
549 		/* save A or X */
550 		status = sljit_emit_op1(compiler,
551 		    SLJIT_MOV_UI, /* uint32_t destination */
552 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
553 		    offsetof(struct bpfjit_stack, reg),
554 		    save_reg, 0);
555 		if (status != SLJIT_SUCCESS)
556 			return status;
557 	}
558 
559 	/*
560 	 * Prepare registers for fn(mbuf, k, &err) call.
561 	 */
562 	status = sljit_emit_op1(compiler,
563 	    SLJIT_MOV,
564 	    SLJIT_SCRATCH_REG1, 0,
565 	    BJ_BUF, 0);
566 	if (status != SLJIT_SUCCESS)
567 		return status;
568 
569 	if (BPF_CLASS(pc->code) == BPF_LD && BPF_MODE(pc->code) == BPF_IND) {
570 		if (pc->k == 0) {
571 			/* k = X; */
572 			status = sljit_emit_op1(compiler,
573 			    SLJIT_MOV,
574 			    SLJIT_SCRATCH_REG2, 0,
575 			    BJ_XREG, 0);
576 			if (status != SLJIT_SUCCESS)
577 				return status;
578 		} else {
579 			/* if (X > UINT32_MAX - pc->k) return 0; */
580 			jump = sljit_emit_cmp(compiler,
581 			    SLJIT_C_GREATER,
582 			    BJ_XREG, 0,
583 			    SLJIT_IMM, UINT32_MAX - pc->k);
584 			if (jump == NULL)
585 				return SLJIT_ERR_ALLOC_FAILED;
586 			if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
587 				return SLJIT_ERR_ALLOC_FAILED;
588 
589 			/* k = X + pc->k; */
590 			status = sljit_emit_op2(compiler,
591 			    SLJIT_ADD,
592 			    SLJIT_SCRATCH_REG2, 0,
593 			    BJ_XREG, 0,
594 			    SLJIT_IMM, (uint32_t)pc->k);
595 			if (status != SLJIT_SUCCESS)
596 				return status;
597 		}
598 	} else {
599 		/* k = pc->k */
600 		status = sljit_emit_op1(compiler,
601 		    SLJIT_MOV,
602 		    SLJIT_SCRATCH_REG2, 0,
603 		    SLJIT_IMM, (uint32_t)pc->k);
604 		if (status != SLJIT_SUCCESS)
605 			return status;
606 	}
607 
608 	/*
609 	 * The third argument of fn is an address on stack.
610 	 */
611 	status = sljit_get_local_base(compiler,
612 	    SLJIT_SCRATCH_REG3, 0,
613 	    offsetof(struct bpfjit_stack, err));
614 	if (status != SLJIT_SUCCESS)
615 		return status;
616 
617 	/* fn(buf, k, &err); */
618 	status = sljit_emit_ijump(compiler,
619 	    SLJIT_CALL3,
620 	    SLJIT_IMM, SLJIT_FUNC_OFFSET(fn));
621 	if (status != SLJIT_SUCCESS)
622 		return status;
623 
624 	if (dst != SLJIT_RETURN_REG) {
625 		/* move return value to dst */
626 		status = sljit_emit_op1(compiler,
627 		    SLJIT_MOV,
628 		    dst, 0,
629 		    SLJIT_RETURN_REG, 0);
630 		if (status != SLJIT_SUCCESS)
631 			return status;
632 	}
633 
634 	/* if (*err != 0) return 0; */
635 	jump = sljit_emit_cmp(compiler,
636 	    SLJIT_C_NOT_EQUAL|SLJIT_INT_OP,
637 	    SLJIT_MEM1(SLJIT_LOCALS_REG),
638 	    offsetof(struct bpfjit_stack, err),
639 	    SLJIT_IMM, 0);
640 	if (jump == NULL)
641 		return SLJIT_ERR_ALLOC_FAILED;
642 
643 	if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
644 		return SLJIT_ERR_ALLOC_FAILED;
645 
646 	if (save_reg == BJ_AREG || (hints & BJ_HINT_XREG)) {
647 		/* restore A or X */
648 		status = sljit_emit_op1(compiler,
649 		    SLJIT_MOV_UI, /* uint32_t source */
650 		    save_reg, 0,
651 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
652 		    offsetof(struct bpfjit_stack, reg));
653 		if (status != SLJIT_SUCCESS)
654 			return status;
655 	}
656 
657 	return SLJIT_SUCCESS;
658 }
659 #endif
660 
661 /*
662  * Emit code for BPF_COP and BPF_COPX instructions.
663  */
664 static int
665 emit_cop(struct sljit_compiler *compiler, bpfjit_hint_t hints,
666     const bpf_ctx_t *bc, const struct bpf_insn *pc,
667     struct sljit_jump ***ret0, size_t *ret0_size, size_t *ret0_maxsize)
668 {
669 #if BJ_XREG    == SLJIT_RETURN_REG   || \
670     BJ_XREG    == SLJIT_SCRATCH_REG1 || \
671     BJ_XREG    == SLJIT_SCRATCH_REG2 || \
672     BJ_XREG    == SLJIT_SCRATCH_REG3 || \
673     BJ_TMP3REG == SLJIT_SCRATCH_REG1 || \
674     BJ_TMP3REG == SLJIT_SCRATCH_REG2 || \
675     BJ_TMP3REG == SLJIT_SCRATCH_REG3
676 #error "Not supported assignment of registers."
677 #endif
678 
679 	struct sljit_jump *jump;
680 	sljit_si call_reg;
681 	sljit_sw call_off;
682 	int status;
683 
684 	BJ_ASSERT(bc != NULL && bc->copfuncs != NULL);
685 
686 	if (hints & BJ_HINT_LDX) {
687 		/* save X */
688 		status = sljit_emit_op1(compiler,
689 		    SLJIT_MOV_UI, /* uint32_t destination */
690 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
691 		    offsetof(struct bpfjit_stack, reg),
692 		    BJ_XREG, 0);
693 		if (status != SLJIT_SUCCESS)
694 			return status;
695 	}
696 
697 	if (BPF_MISCOP(pc->code) == BPF_COP) {
698 		call_reg = SLJIT_IMM;
699 		call_off = SLJIT_FUNC_OFFSET(bc->copfuncs[pc->k]);
700 	} else {
701 		/* if (X >= bc->nfuncs) return 0; */
702 		jump = sljit_emit_cmp(compiler,
703 		    SLJIT_C_GREATER_EQUAL,
704 		    BJ_XREG, 0,
705 		    SLJIT_IMM, bc->nfuncs);
706 		if (jump == NULL)
707 			return SLJIT_ERR_ALLOC_FAILED;
708 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
709 			return SLJIT_ERR_ALLOC_FAILED;
710 
711 		/* tmp1 = ctx; */
712 		status = sljit_emit_op1(compiler,
713 		    SLJIT_MOV_P,
714 		    BJ_TMP1REG, 0,
715 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
716 		    offsetof(struct bpfjit_stack, ctx));
717 		if (status != SLJIT_SUCCESS)
718 			return status;
719 
720 		/* tmp1 = ctx->copfuncs; */
721 		status = sljit_emit_op1(compiler,
722 		    SLJIT_MOV_P,
723 		    BJ_TMP1REG, 0,
724 		    SLJIT_MEM1(BJ_TMP1REG),
725 		    offsetof(struct bpf_ctx, copfuncs));
726 		if (status != SLJIT_SUCCESS)
727 			return status;
728 
729 		/* tmp2 = X; */
730 		status = sljit_emit_op1(compiler,
731 		    SLJIT_MOV,
732 		    BJ_TMP2REG, 0,
733 		    BJ_XREG, 0);
734 		if (status != SLJIT_SUCCESS)
735 			return status;
736 
737 		/* tmp3 = ctx->copfuncs[tmp2]; */
738 		call_reg = BJ_TMP3REG;
739 		call_off = 0;
740 		status = sljit_emit_op1(compiler,
741 		    SLJIT_MOV_P,
742 		    call_reg, call_off,
743 		    SLJIT_MEM2(BJ_TMP1REG, BJ_TMP2REG),
744 		    SLJIT_WORD_SHIFT);
745 		if (status != SLJIT_SUCCESS)
746 			return status;
747 	}
748 
749 	/*
750 	 * Copy bpf_copfunc_t arguments to registers.
751 	 */
752 #if BJ_AREG != SLJIT_SCRATCH_REG3
753 	status = sljit_emit_op1(compiler,
754 	    SLJIT_MOV_UI,
755 	    SLJIT_SCRATCH_REG3, 0,
756 	    BJ_AREG, 0);
757 	if (status != SLJIT_SUCCESS)
758 		return status;
759 #endif
760 
761 	status = sljit_emit_op1(compiler,
762 	    SLJIT_MOV_P,
763 	    SLJIT_SCRATCH_REG1, 0,
764 	    SLJIT_MEM1(SLJIT_LOCALS_REG),
765 	    offsetof(struct bpfjit_stack, ctx));
766 	if (status != SLJIT_SUCCESS)
767 		return status;
768 
769 	status = sljit_emit_op1(compiler,
770 	    SLJIT_MOV_P,
771 	    SLJIT_SCRATCH_REG2, 0,
772 	    BJ_ARGS, 0);
773 	if (status != SLJIT_SUCCESS)
774 		return status;
775 
776 	status = sljit_emit_ijump(compiler,
777 	    SLJIT_CALL3, call_reg, call_off);
778 	if (status != SLJIT_SUCCESS)
779 		return status;
780 
781 #if BJ_AREG != SLJIT_RETURN_REG
782 	status = sljit_emit_op1(compiler,
783 	    SLJIT_MOV,
784 	    BJ_AREG, 0,
785 	    SLJIT_RETURN_REG, 0);
786 	if (status != SLJIT_SUCCESS)
787 		return status;
788 #endif
789 
790 	if (hints & BJ_HINT_LDX) {
791 		/* restore X */
792 		status = sljit_emit_op1(compiler,
793 		    SLJIT_MOV_UI, /* uint32_t source */
794 		    BJ_XREG, 0,
795 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
796 		    offsetof(struct bpfjit_stack, reg));
797 		if (status != SLJIT_SUCCESS)
798 			return status;
799 	}
800 
801 	return SLJIT_SUCCESS;
802 }
803 
804 /*
805  * Generate code for
806  * BPF_LD+BPF_W+BPF_ABS    A <- P[k:4]
807  * BPF_LD+BPF_H+BPF_ABS    A <- P[k:2]
808  * BPF_LD+BPF_B+BPF_ABS    A <- P[k:1]
809  * BPF_LD+BPF_W+BPF_IND    A <- P[X+k:4]
810  * BPF_LD+BPF_H+BPF_IND    A <- P[X+k:2]
811  * BPF_LD+BPF_B+BPF_IND    A <- P[X+k:1]
812  */
813 static int
814 emit_pkt_read(struct sljit_compiler *compiler, bpfjit_hint_t hints,
815     const struct bpf_insn *pc, struct sljit_jump *to_mchain_jump,
816     struct sljit_jump ***ret0, size_t *ret0_size, size_t *ret0_maxsize)
817 {
818 	int status = SLJIT_ERR_ALLOC_FAILED;
819 	uint32_t width;
820 	sljit_si ld_reg;
821 	struct sljit_jump *jump;
822 #ifdef _KERNEL
823 	struct sljit_label *label;
824 	struct sljit_jump *over_mchain_jump;
825 	const bool check_zero_buflen = (to_mchain_jump != NULL);
826 #endif
827 	const uint32_t k = pc->k;
828 
829 #ifdef _KERNEL
830 	if (to_mchain_jump == NULL) {
831 		to_mchain_jump = sljit_emit_cmp(compiler,
832 		    SLJIT_C_EQUAL,
833 		    BJ_BUFLEN, 0,
834 		    SLJIT_IMM, 0);
835 		if (to_mchain_jump == NULL)
836 			return SLJIT_ERR_ALLOC_FAILED;
837 	}
838 #endif
839 
840 	ld_reg = BJ_BUF;
841 	width = read_width(pc);
842 
843 	if (BPF_MODE(pc->code) == BPF_IND) {
844 		/* tmp1 = buflen - (pc->k + width); */
845 		status = sljit_emit_op2(compiler,
846 		    SLJIT_SUB,
847 		    BJ_TMP1REG, 0,
848 		    BJ_BUFLEN, 0,
849 		    SLJIT_IMM, k + width);
850 		if (status != SLJIT_SUCCESS)
851 			return status;
852 
853 		/* ld_reg = buf + X; */
854 		ld_reg = BJ_TMP2REG;
855 		status = sljit_emit_op2(compiler,
856 		    SLJIT_ADD,
857 		    ld_reg, 0,
858 		    BJ_BUF, 0,
859 		    BJ_XREG, 0);
860 		if (status != SLJIT_SUCCESS)
861 			return status;
862 
863 		/* if (tmp1 < X) return 0; */
864 		jump = sljit_emit_cmp(compiler,
865 		    SLJIT_C_LESS,
866 		    BJ_TMP1REG, 0,
867 		    BJ_XREG, 0);
868 		if (jump == NULL)
869 			return SLJIT_ERR_ALLOC_FAILED;
870 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
871 			return SLJIT_ERR_ALLOC_FAILED;
872 	}
873 
874 	switch (width) {
875 	case 4:
876 		status = emit_read32(compiler, ld_reg, k);
877 		break;
878 	case 2:
879 		status = emit_read16(compiler, ld_reg, k);
880 		break;
881 	case 1:
882 		status = emit_read8(compiler, ld_reg, k);
883 		break;
884 	}
885 
886 	if (status != SLJIT_SUCCESS)
887 		return status;
888 
889 #ifdef _KERNEL
890 	over_mchain_jump = sljit_emit_jump(compiler, SLJIT_JUMP);
891 	if (over_mchain_jump == NULL)
892 		return SLJIT_ERR_ALLOC_FAILED;
893 
894 	/* entry point to mchain handler */
895 	label = sljit_emit_label(compiler);
896 	if (label == NULL)
897 		return SLJIT_ERR_ALLOC_FAILED;
898 	sljit_set_label(to_mchain_jump, label);
899 
900 	if (check_zero_buflen) {
901 		/* if (buflen != 0) return 0; */
902 		jump = sljit_emit_cmp(compiler,
903 		    SLJIT_C_NOT_EQUAL,
904 		    BJ_BUFLEN, 0,
905 		    SLJIT_IMM, 0);
906 		if (jump == NULL)
907 			return SLJIT_ERR_ALLOC_FAILED;
908 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
909 			return SLJIT_ERR_ALLOC_FAILED;
910 	}
911 
912 	switch (width) {
913 	case 4:
914 		status = emit_xcall(compiler, hints, pc, BJ_AREG,
915 		    ret0, ret0_size, ret0_maxsize, &m_xword);
916 		break;
917 	case 2:
918 		status = emit_xcall(compiler, hints, pc, BJ_AREG,
919 		    ret0, ret0_size, ret0_maxsize, &m_xhalf);
920 		break;
921 	case 1:
922 		status = emit_xcall(compiler, hints, pc, BJ_AREG,
923 		    ret0, ret0_size, ret0_maxsize, &m_xbyte);
924 		break;
925 	}
926 
927 	if (status != SLJIT_SUCCESS)
928 		return status;
929 
930 	label = sljit_emit_label(compiler);
931 	if (label == NULL)
932 		return SLJIT_ERR_ALLOC_FAILED;
933 	sljit_set_label(over_mchain_jump, label);
934 #endif
935 
936 	return SLJIT_SUCCESS;
937 }
938 
939 static int
940 emit_memload(struct sljit_compiler *compiler,
941     sljit_si dst, uint32_t k, size_t extwords)
942 {
943 	int status;
944 	sljit_si src;
945 	sljit_sw srcw;
946 
947 	srcw = k * sizeof(uint32_t);
948 
949 	if (extwords == 0) {
950 		src = SLJIT_MEM1(SLJIT_LOCALS_REG);
951 		srcw += offsetof(struct bpfjit_stack, mem);
952 	} else {
953 		/* copy extmem pointer to the tmp1 register */
954 		status = sljit_emit_op1(compiler,
955 		    SLJIT_MOV_P,
956 		    BJ_TMP1REG, 0,
957 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
958 		    offsetof(struct bpfjit_stack, extmem));
959 		if (status != SLJIT_SUCCESS)
960 			return status;
961 		src = SLJIT_MEM1(BJ_TMP1REG);
962 	}
963 
964 	return sljit_emit_op1(compiler, SLJIT_MOV_UI, dst, 0, src, srcw);
965 }
966 
967 static int
968 emit_memstore(struct sljit_compiler *compiler,
969     sljit_si src, uint32_t k, size_t extwords)
970 {
971 	int status;
972 	sljit_si dst;
973 	sljit_sw dstw;
974 
975 	dstw = k * sizeof(uint32_t);
976 
977 	if (extwords == 0) {
978 		dst = SLJIT_MEM1(SLJIT_LOCALS_REG);
979 		dstw += offsetof(struct bpfjit_stack, mem);
980 	} else {
981 		/* copy extmem pointer to the tmp1 register */
982 		status = sljit_emit_op1(compiler,
983 		    SLJIT_MOV_P,
984 		    BJ_TMP1REG, 0,
985 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
986 		    offsetof(struct bpfjit_stack, extmem));
987 		if (status != SLJIT_SUCCESS)
988 			return status;
989 		dst = SLJIT_MEM1(BJ_TMP1REG);
990 	}
991 
992 	return sljit_emit_op1(compiler, SLJIT_MOV_UI, dst, dstw, src, 0);
993 }
994 
995 /*
996  * Emit code for BPF_LDX+BPF_B+BPF_MSH    X <- 4*(P[k:1]&0xf).
997  */
998 static int
999 emit_msh(struct sljit_compiler *compiler, bpfjit_hint_t hints,
1000     const struct bpf_insn *pc, struct sljit_jump *to_mchain_jump,
1001     struct sljit_jump ***ret0, size_t *ret0_size, size_t *ret0_maxsize)
1002 {
1003 	int status;
1004 #ifdef _KERNEL
1005 	struct sljit_label *label;
1006 	struct sljit_jump *jump, *over_mchain_jump;
1007 	const bool check_zero_buflen = (to_mchain_jump != NULL);
1008 #endif
1009 	const uint32_t k = pc->k;
1010 
1011 #ifdef _KERNEL
1012 	if (to_mchain_jump == NULL) {
1013 		to_mchain_jump = sljit_emit_cmp(compiler,
1014 		    SLJIT_C_EQUAL,
1015 		    BJ_BUFLEN, 0,
1016 		    SLJIT_IMM, 0);
1017 		if (to_mchain_jump == NULL)
1018 			return SLJIT_ERR_ALLOC_FAILED;
1019 	}
1020 #endif
1021 
1022 	/* tmp1 = buf[k] */
1023 	status = sljit_emit_op1(compiler,
1024 	    SLJIT_MOV_UB,
1025 	    BJ_TMP1REG, 0,
1026 	    SLJIT_MEM1(BJ_BUF), k);
1027 	if (status != SLJIT_SUCCESS)
1028 		return status;
1029 
1030 #ifdef _KERNEL
1031 	over_mchain_jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1032 	if (over_mchain_jump == NULL)
1033 		return SLJIT_ERR_ALLOC_FAILED;
1034 
1035 	/* entry point to mchain handler */
1036 	label = sljit_emit_label(compiler);
1037 	if (label == NULL)
1038 		return SLJIT_ERR_ALLOC_FAILED;
1039 	sljit_set_label(to_mchain_jump, label);
1040 
1041 	if (check_zero_buflen) {
1042 		/* if (buflen != 0) return 0; */
1043 		jump = sljit_emit_cmp(compiler,
1044 		    SLJIT_C_NOT_EQUAL,
1045 		    BJ_BUFLEN, 0,
1046 		    SLJIT_IMM, 0);
1047 		if (jump == NULL)
1048 			return SLJIT_ERR_ALLOC_FAILED;
1049 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
1050 			return SLJIT_ERR_ALLOC_FAILED;
1051 	}
1052 
1053 	status = emit_xcall(compiler, hints, pc, BJ_TMP1REG,
1054 	    ret0, ret0_size, ret0_maxsize, &m_xbyte);
1055 	if (status != SLJIT_SUCCESS)
1056 		return status;
1057 
1058 	label = sljit_emit_label(compiler);
1059 	if (label == NULL)
1060 		return SLJIT_ERR_ALLOC_FAILED;
1061 	sljit_set_label(over_mchain_jump, label);
1062 #endif
1063 
1064 	/* tmp1 &= 0xf */
1065 	status = sljit_emit_op2(compiler,
1066 	    SLJIT_AND,
1067 	    BJ_TMP1REG, 0,
1068 	    BJ_TMP1REG, 0,
1069 	    SLJIT_IMM, 0xf);
1070 	if (status != SLJIT_SUCCESS)
1071 		return status;
1072 
1073 	/* X = tmp1 << 2 */
1074 	status = sljit_emit_op2(compiler,
1075 	    SLJIT_SHL,
1076 	    BJ_XREG, 0,
1077 	    BJ_TMP1REG, 0,
1078 	    SLJIT_IMM, 2);
1079 	if (status != SLJIT_SUCCESS)
1080 		return status;
1081 
1082 	return SLJIT_SUCCESS;
1083 }
1084 
1085 static int
1086 emit_pow2_division(struct sljit_compiler *compiler, uint32_t k)
1087 {
1088 	int shift = 0;
1089 	int status = SLJIT_SUCCESS;
1090 
1091 	while (k > 1) {
1092 		k >>= 1;
1093 		shift++;
1094 	}
1095 
1096 	BJ_ASSERT(k == 1 && shift < 32);
1097 
1098 	if (shift != 0) {
1099 		status = sljit_emit_op2(compiler,
1100 		    SLJIT_LSHR|SLJIT_INT_OP,
1101 		    BJ_AREG, 0,
1102 		    BJ_AREG, 0,
1103 		    SLJIT_IMM, shift);
1104 	}
1105 
1106 	return status;
1107 }
1108 
1109 #if !defined(BPFJIT_USE_UDIV)
1110 static sljit_uw
1111 divide(sljit_uw x, sljit_uw y)
1112 {
1113 
1114 	return (uint32_t)x / (uint32_t)y;
1115 }
1116 #endif
1117 
1118 /*
1119  * Emit code for A = A / div.
1120  * divt,divw are either SLJIT_IMM,pc->k or BJ_XREG,0.
1121  */
1122 static int
1123 emit_division(struct sljit_compiler *compiler, int divt, sljit_sw divw)
1124 {
1125 	int status;
1126 
1127 #if BJ_XREG == SLJIT_RETURN_REG   || \
1128     BJ_XREG == SLJIT_SCRATCH_REG1 || \
1129     BJ_XREG == SLJIT_SCRATCH_REG2 || \
1130     BJ_AREG == SLJIT_SCRATCH_REG2
1131 #error "Not supported assignment of registers."
1132 #endif
1133 
1134 #if BJ_AREG != SLJIT_SCRATCH_REG1
1135 	status = sljit_emit_op1(compiler,
1136 	    SLJIT_MOV,
1137 	    SLJIT_SCRATCH_REG1, 0,
1138 	    BJ_AREG, 0);
1139 	if (status != SLJIT_SUCCESS)
1140 		return status;
1141 #endif
1142 
1143 	status = sljit_emit_op1(compiler,
1144 	    SLJIT_MOV,
1145 	    SLJIT_SCRATCH_REG2, 0,
1146 	    divt, divw);
1147 	if (status != SLJIT_SUCCESS)
1148 		return status;
1149 
1150 #if defined(BPFJIT_USE_UDIV)
1151 	status = sljit_emit_op0(compiler, SLJIT_UDIV|SLJIT_INT_OP);
1152 
1153 #if BJ_AREG != SLJIT_SCRATCH_REG1
1154 	status = sljit_emit_op1(compiler,
1155 	    SLJIT_MOV,
1156 	    BJ_AREG, 0,
1157 	    SLJIT_SCRATCH_REG1, 0);
1158 	if (status != SLJIT_SUCCESS)
1159 		return status;
1160 #endif
1161 #else
1162 	status = sljit_emit_ijump(compiler,
1163 	    SLJIT_CALL2,
1164 	    SLJIT_IMM, SLJIT_FUNC_OFFSET(divide));
1165 
1166 #if BJ_AREG != SLJIT_RETURN_REG
1167 	status = sljit_emit_op1(compiler,
1168 	    SLJIT_MOV,
1169 	    BJ_AREG, 0,
1170 	    SLJIT_RETURN_REG, 0);
1171 	if (status != SLJIT_SUCCESS)
1172 		return status;
1173 #endif
1174 #endif
1175 
1176 	return status;
1177 }
1178 
1179 /*
1180  * Return true if pc is a "read from packet" instruction.
1181  * If length is not NULL and return value is true, *length will
1182  * be set to a safe length required to read a packet.
1183  */
1184 static bool
1185 read_pkt_insn(const struct bpf_insn *pc, bpfjit_abc_length_t *length)
1186 {
1187 	bool rv;
1188 	bpfjit_abc_length_t width;
1189 
1190 	switch (BPF_CLASS(pc->code)) {
1191 	default:
1192 		rv = false;
1193 		break;
1194 
1195 	case BPF_LD:
1196 		rv = BPF_MODE(pc->code) == BPF_ABS ||
1197 		     BPF_MODE(pc->code) == BPF_IND;
1198 		if (rv)
1199 			width = read_width(pc);
1200 		break;
1201 
1202 	case BPF_LDX:
1203 		rv = pc->code == (BPF_LDX|BPF_B|BPF_MSH);
1204 		width = 1;
1205 		break;
1206 	}
1207 
1208 	if (rv && length != NULL) {
1209 		/*
1210 		 * Values greater than UINT32_MAX will generate
1211 		 * unconditional "return 0".
1212 		 */
1213 		*length = (uint32_t)pc->k + width;
1214 	}
1215 
1216 	return rv;
1217 }
1218 
1219 static void
1220 optimize_init(struct bpfjit_insn_data *insn_dat, size_t insn_count)
1221 {
1222 	size_t i;
1223 
1224 	for (i = 0; i < insn_count; i++) {
1225 		SLIST_INIT(&insn_dat[i].bjumps);
1226 		insn_dat[i].invalid = BJ_INIT_NOBITS;
1227 	}
1228 }
1229 
1230 /*
1231  * The function divides instructions into blocks. Destination of a jump
1232  * instruction starts a new block. BPF_RET and BPF_JMP instructions
1233  * terminate a block. Blocks are linear, that is, there are no jumps out
1234  * from the middle of a block and there are no jumps in to the middle of
1235  * a block.
1236  *
1237  * The function also sets bits in *initmask for memwords that
1238  * need to be initialized to zero. Note that this set should be empty
1239  * for any valid kernel filter program.
1240  */
1241 static bool
1242 optimize_pass1(const bpf_ctx_t *bc, const struct bpf_insn *insns,
1243     struct bpfjit_insn_data *insn_dat, size_t insn_count,
1244     bpf_memword_init_t *initmask, bpfjit_hint_t *hints)
1245 {
1246 	struct bpfjit_jump *jtf;
1247 	size_t i;
1248 	uint32_t jt, jf;
1249 	bpfjit_abc_length_t length;
1250 	bpf_memword_init_t invalid; /* borrowed from bpf_filter() */
1251 	bool unreachable;
1252 
1253 	const size_t memwords = GET_MEMWORDS(bc);
1254 
1255 	*hints = 0;
1256 	*initmask = BJ_INIT_NOBITS;
1257 
1258 	unreachable = false;
1259 	invalid = ~BJ_INIT_NOBITS;
1260 
1261 	for (i = 0; i < insn_count; i++) {
1262 		if (!SLIST_EMPTY(&insn_dat[i].bjumps))
1263 			unreachable = false;
1264 		insn_dat[i].unreachable = unreachable;
1265 
1266 		if (unreachable)
1267 			continue;
1268 
1269 		invalid |= insn_dat[i].invalid;
1270 
1271 		if (read_pkt_insn(&insns[i], &length) && length > UINT32_MAX)
1272 			unreachable = true;
1273 
1274 		switch (BPF_CLASS(insns[i].code)) {
1275 		case BPF_RET:
1276 			if (BPF_RVAL(insns[i].code) == BPF_A)
1277 				*initmask |= invalid & BJ_INIT_ABIT;
1278 
1279 			unreachable = true;
1280 			continue;
1281 
1282 		case BPF_LD:
1283 			if (BPF_MODE(insns[i].code) == BPF_ABS)
1284 				*hints |= BJ_HINT_ABS;
1285 
1286 			if (BPF_MODE(insns[i].code) == BPF_IND) {
1287 				*hints |= BJ_HINT_IND | BJ_HINT_XREG;
1288 				*initmask |= invalid & BJ_INIT_XBIT;
1289 			}
1290 
1291 			if (BPF_MODE(insns[i].code) == BPF_MEM &&
1292 			    (uint32_t)insns[i].k < memwords) {
1293 				*initmask |= invalid & BJ_INIT_MBIT(insns[i].k);
1294 			}
1295 
1296 			invalid &= ~BJ_INIT_ABIT;
1297 			continue;
1298 
1299 		case BPF_LDX:
1300 			*hints |= BJ_HINT_XREG | BJ_HINT_LDX;
1301 
1302 			if (BPF_MODE(insns[i].code) == BPF_MEM &&
1303 			    (uint32_t)insns[i].k < memwords) {
1304 				*initmask |= invalid & BJ_INIT_MBIT(insns[i].k);
1305 			}
1306 
1307 			if (BPF_MODE(insns[i].code) == BPF_MSH &&
1308 			    BPF_SIZE(insns[i].code) == BPF_B) {
1309 				*hints |= BJ_HINT_MSH;
1310 			}
1311 
1312 			invalid &= ~BJ_INIT_XBIT;
1313 			continue;
1314 
1315 		case BPF_ST:
1316 			*initmask |= invalid & BJ_INIT_ABIT;
1317 
1318 			if ((uint32_t)insns[i].k < memwords)
1319 				invalid &= ~BJ_INIT_MBIT(insns[i].k);
1320 
1321 			continue;
1322 
1323 		case BPF_STX:
1324 			*hints |= BJ_HINT_XREG;
1325 			*initmask |= invalid & BJ_INIT_XBIT;
1326 
1327 			if ((uint32_t)insns[i].k < memwords)
1328 				invalid &= ~BJ_INIT_MBIT(insns[i].k);
1329 
1330 			continue;
1331 
1332 		case BPF_ALU:
1333 			*initmask |= invalid & BJ_INIT_ABIT;
1334 
1335 			if (insns[i].code != (BPF_ALU|BPF_NEG) &&
1336 			    BPF_SRC(insns[i].code) == BPF_X) {
1337 				*hints |= BJ_HINT_XREG;
1338 				*initmask |= invalid & BJ_INIT_XBIT;
1339 			}
1340 
1341 			invalid &= ~BJ_INIT_ABIT;
1342 			continue;
1343 
1344 		case BPF_MISC:
1345 			switch (BPF_MISCOP(insns[i].code)) {
1346 			case BPF_TAX: // X <- A
1347 				*hints |= BJ_HINT_XREG;
1348 				*initmask |= invalid & BJ_INIT_ABIT;
1349 				invalid &= ~BJ_INIT_XBIT;
1350 				continue;
1351 
1352 			case BPF_TXA: // A <- X
1353 				*hints |= BJ_HINT_XREG;
1354 				*initmask |= invalid & BJ_INIT_XBIT;
1355 				invalid &= ~BJ_INIT_ABIT;
1356 				continue;
1357 
1358 			case BPF_COPX:
1359 				*hints |= BJ_HINT_XREG | BJ_HINT_COPX;
1360 				/* FALLTHROUGH */
1361 
1362 			case BPF_COP:
1363 				*hints |= BJ_HINT_COP;
1364 				*initmask |= invalid & BJ_INIT_ABIT;
1365 				invalid &= ~BJ_INIT_ABIT;
1366 				continue;
1367 			}
1368 
1369 			continue;
1370 
1371 		case BPF_JMP:
1372 			/* Initialize abc_length for ABC pass. */
1373 			insn_dat[i].u.jdata.abc_length = MAX_ABC_LENGTH;
1374 
1375 			if (BPF_OP(insns[i].code) == BPF_JA) {
1376 				jt = jf = insns[i].k;
1377 			} else {
1378 				jt = insns[i].jt;
1379 				jf = insns[i].jf;
1380 			}
1381 
1382 			if (jt >= insn_count - (i + 1) ||
1383 			    jf >= insn_count - (i + 1)) {
1384 				return false;
1385 			}
1386 
1387 			if (jt > 0 && jf > 0)
1388 				unreachable = true;
1389 
1390 			jt += i + 1;
1391 			jf += i + 1;
1392 
1393 			jtf = insn_dat[i].u.jdata.jtf;
1394 
1395 			jtf[0].jdata = &insn_dat[i].u.jdata;
1396 			SLIST_INSERT_HEAD(&insn_dat[jt].bjumps,
1397 			    &jtf[0], entries);
1398 
1399 			if (jf != jt) {
1400 				jtf[1].jdata = &insn_dat[i].u.jdata;
1401 				SLIST_INSERT_HEAD(&insn_dat[jf].bjumps,
1402 				    &jtf[1], entries);
1403 			}
1404 
1405 			insn_dat[jf].invalid |= invalid;
1406 			insn_dat[jt].invalid |= invalid;
1407 			invalid = 0;
1408 
1409 			continue;
1410 		}
1411 	}
1412 
1413 	return true;
1414 }
1415 
1416 /*
1417  * Array Bounds Check Elimination (ABC) pass.
1418  */
1419 static void
1420 optimize_pass2(const bpf_ctx_t *bc, const struct bpf_insn *insns,
1421     struct bpfjit_insn_data *insn_dat, size_t insn_count)
1422 {
1423 	struct bpfjit_jump *jmp;
1424 	const struct bpf_insn *pc;
1425 	struct bpfjit_insn_data *pd;
1426 	size_t i;
1427 	bpfjit_abc_length_t length, abc_length = 0;
1428 
1429 	const size_t extwords = GET_EXTWORDS(bc);
1430 
1431 	for (i = insn_count; i != 0; i--) {
1432 		pc = &insns[i-1];
1433 		pd = &insn_dat[i-1];
1434 
1435 		if (pd->unreachable)
1436 			continue;
1437 
1438 		switch (BPF_CLASS(pc->code)) {
1439 		case BPF_RET:
1440 			/*
1441 			 * It's quite common for bpf programs to
1442 			 * check packet bytes in increasing order
1443 			 * and return zero if bytes don't match
1444 			 * specified critetion. Such programs disable
1445 			 * ABC optimization completely because for
1446 			 * every jump there is a branch with no read
1447 			 * instruction.
1448 			 * With no side effects, BPF_STMT(BPF_RET+BPF_K, 0)
1449 			 * is indistinguishable from out-of-bound load.
1450 			 * Therefore, abc_length can be set to
1451 			 * MAX_ABC_LENGTH and enable ABC for many
1452 			 * bpf programs.
1453 			 * If this optimization encounters any
1454 			 * instruction with a side effect, it will
1455 			 * reset abc_length.
1456 			 */
1457 			if (BPF_RVAL(pc->code) == BPF_K && pc->k == 0)
1458 				abc_length = MAX_ABC_LENGTH;
1459 			else
1460 				abc_length = 0;
1461 			break;
1462 
1463 		case BPF_MISC:
1464 			if (BPF_MISCOP(pc->code) == BPF_COP ||
1465 			    BPF_MISCOP(pc->code) == BPF_COPX) {
1466 				/* COP instructions can have side effects. */
1467 				abc_length = 0;
1468 			}
1469 			break;
1470 
1471 		case BPF_ST:
1472 		case BPF_STX:
1473 			if (extwords != 0) {
1474 				/* Write to memory is visible after a call. */
1475 				abc_length = 0;
1476 			}
1477 			break;
1478 
1479 		case BPF_JMP:
1480 			abc_length = pd->u.jdata.abc_length;
1481 			break;
1482 
1483 		default:
1484 			if (read_pkt_insn(pc, &length)) {
1485 				if (abc_length < length)
1486 					abc_length = length;
1487 				pd->u.rdata.abc_length = abc_length;
1488 			}
1489 			break;
1490 		}
1491 
1492 		SLIST_FOREACH(jmp, &pd->bjumps, entries) {
1493 			if (jmp->jdata->abc_length > abc_length)
1494 				jmp->jdata->abc_length = abc_length;
1495 		}
1496 	}
1497 }
1498 
1499 static void
1500 optimize_pass3(const struct bpf_insn *insns,
1501     struct bpfjit_insn_data *insn_dat, size_t insn_count)
1502 {
1503 	struct bpfjit_jump *jmp;
1504 	size_t i;
1505 	bpfjit_abc_length_t checked_length = 0;
1506 
1507 	for (i = 0; i < insn_count; i++) {
1508 		if (insn_dat[i].unreachable)
1509 			continue;
1510 
1511 		SLIST_FOREACH(jmp, &insn_dat[i].bjumps, entries) {
1512 			if (jmp->jdata->checked_length < checked_length)
1513 				checked_length = jmp->jdata->checked_length;
1514 		}
1515 
1516 		if (BPF_CLASS(insns[i].code) == BPF_JMP) {
1517 			insn_dat[i].u.jdata.checked_length = checked_length;
1518 		} else if (read_pkt_insn(&insns[i], NULL)) {
1519 			struct bpfjit_read_pkt_data *rdata =
1520 			    &insn_dat[i].u.rdata;
1521 			rdata->check_length = 0;
1522 			if (checked_length < rdata->abc_length) {
1523 				checked_length = rdata->abc_length;
1524 				rdata->check_length = checked_length;
1525 			}
1526 		}
1527 	}
1528 }
1529 
1530 static bool
1531 optimize(const bpf_ctx_t *bc, const struct bpf_insn *insns,
1532     struct bpfjit_insn_data *insn_dat, size_t insn_count,
1533     bpf_memword_init_t *initmask, bpfjit_hint_t *hints)
1534 {
1535 
1536 	optimize_init(insn_dat, insn_count);
1537 
1538 	if (!optimize_pass1(bc, insns, insn_dat, insn_count, initmask, hints))
1539 		return false;
1540 
1541 	optimize_pass2(bc, insns, insn_dat, insn_count);
1542 	optimize_pass3(insns, insn_dat, insn_count);
1543 
1544 	return true;
1545 }
1546 
1547 /*
1548  * Convert BPF_ALU operations except BPF_NEG and BPF_DIV to sljit operation.
1549  */
1550 static int
1551 bpf_alu_to_sljit_op(const struct bpf_insn *pc)
1552 {
1553 
1554 	/*
1555 	 * Note: all supported 64bit arches have 32bit multiply
1556 	 * instruction so SLJIT_INT_OP doesn't have any overhead.
1557 	 */
1558 	switch (BPF_OP(pc->code)) {
1559 	case BPF_ADD: return SLJIT_ADD;
1560 	case BPF_SUB: return SLJIT_SUB;
1561 	case BPF_MUL: return SLJIT_MUL|SLJIT_INT_OP;
1562 	case BPF_OR:  return SLJIT_OR;
1563 	case BPF_AND: return SLJIT_AND;
1564 	case BPF_LSH: return SLJIT_SHL;
1565 	case BPF_RSH: return SLJIT_LSHR|SLJIT_INT_OP;
1566 	default:
1567 		BJ_ASSERT(false);
1568 		return 0;
1569 	}
1570 }
1571 
1572 /*
1573  * Convert BPF_JMP operations except BPF_JA to sljit condition.
1574  */
1575 static int
1576 bpf_jmp_to_sljit_cond(const struct bpf_insn *pc, bool negate)
1577 {
1578 	/*
1579 	 * Note: all supported 64bit arches have 32bit comparison
1580 	 * instructions so SLJIT_INT_OP doesn't have any overhead.
1581 	 */
1582 	int rv = SLJIT_INT_OP;
1583 
1584 	switch (BPF_OP(pc->code)) {
1585 	case BPF_JGT:
1586 		rv |= negate ? SLJIT_C_LESS_EQUAL : SLJIT_C_GREATER;
1587 		break;
1588 	case BPF_JGE:
1589 		rv |= negate ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL;
1590 		break;
1591 	case BPF_JEQ:
1592 		rv |= negate ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL;
1593 		break;
1594 	case BPF_JSET:
1595 		rv |= negate ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL;
1596 		break;
1597 	default:
1598 		BJ_ASSERT(false);
1599 	}
1600 
1601 	return rv;
1602 }
1603 
1604 /*
1605  * Convert BPF_K and BPF_X to sljit register.
1606  */
1607 static int
1608 kx_to_reg(const struct bpf_insn *pc)
1609 {
1610 
1611 	switch (BPF_SRC(pc->code)) {
1612 	case BPF_K: return SLJIT_IMM;
1613 	case BPF_X: return BJ_XREG;
1614 	default:
1615 		BJ_ASSERT(false);
1616 		return 0;
1617 	}
1618 }
1619 
1620 static sljit_sw
1621 kx_to_reg_arg(const struct bpf_insn *pc)
1622 {
1623 
1624 	switch (BPF_SRC(pc->code)) {
1625 	case BPF_K: return (uint32_t)pc->k; /* SLJIT_IMM, pc->k, */
1626 	case BPF_X: return 0;               /* BJ_XREG, 0,      */
1627 	default:
1628 		BJ_ASSERT(false);
1629 		return 0;
1630 	}
1631 }
1632 
1633 static bool
1634 generate_insn_code(struct sljit_compiler *compiler, bpfjit_hint_t hints,
1635     const bpf_ctx_t *bc, const struct bpf_insn *insns,
1636     struct bpfjit_insn_data *insn_dat, size_t insn_count)
1637 {
1638 	/* a list of jumps to out-of-bound return from a generated function */
1639 	struct sljit_jump **ret0;
1640 	size_t ret0_size, ret0_maxsize;
1641 
1642 	struct sljit_jump *jump;
1643 	struct sljit_label *label;
1644 	const struct bpf_insn *pc;
1645 	struct bpfjit_jump *bjump, *jtf;
1646 	struct sljit_jump *to_mchain_jump;
1647 
1648 	size_t i;
1649 	int status;
1650 	int branching, negate;
1651 	unsigned int rval, mode, src;
1652 	uint32_t jt, jf;
1653 
1654 	bool unconditional_ret;
1655 	bool rv;
1656 
1657 	const size_t extwords = GET_EXTWORDS(bc);
1658 	const size_t memwords = GET_MEMWORDS(bc);
1659 
1660 	ret0 = NULL;
1661 	rv = false;
1662 
1663 	ret0_size = 0;
1664 	ret0_maxsize = 64;
1665 	ret0 = BJ_ALLOC(ret0_maxsize * sizeof(ret0[0]));
1666 	if (ret0 == NULL)
1667 		goto fail;
1668 
1669 	/* reset sjump members of jdata */
1670 	for (i = 0; i < insn_count; i++) {
1671 		if (insn_dat[i].unreachable ||
1672 		    BPF_CLASS(insns[i].code) != BPF_JMP) {
1673 			continue;
1674 		}
1675 
1676 		jtf = insn_dat[i].u.jdata.jtf;
1677 		jtf[0].sjump = jtf[1].sjump = NULL;
1678 	}
1679 
1680 	/* main loop */
1681 	for (i = 0; i < insn_count; i++) {
1682 		if (insn_dat[i].unreachable)
1683 			continue;
1684 
1685 		/*
1686 		 * Resolve jumps to the current insn.
1687 		 */
1688 		label = NULL;
1689 		SLIST_FOREACH(bjump, &insn_dat[i].bjumps, entries) {
1690 			if (bjump->sjump != NULL) {
1691 				if (label == NULL)
1692 					label = sljit_emit_label(compiler);
1693 				if (label == NULL)
1694 					goto fail;
1695 				sljit_set_label(bjump->sjump, label);
1696 			}
1697 		}
1698 
1699 		to_mchain_jump = NULL;
1700 		unconditional_ret = false;
1701 
1702 		if (read_pkt_insn(&insns[i], NULL)) {
1703 			if (insn_dat[i].u.rdata.check_length > UINT32_MAX) {
1704 				/* Jump to "return 0" unconditionally. */
1705 				unconditional_ret = true;
1706 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1707 				if (jump == NULL)
1708 					goto fail;
1709 				if (!append_jump(jump, &ret0,
1710 				    &ret0_size, &ret0_maxsize))
1711 					goto fail;
1712 			} else if (insn_dat[i].u.rdata.check_length > 0) {
1713 				/* if (buflen < check_length) return 0; */
1714 				jump = sljit_emit_cmp(compiler,
1715 				    SLJIT_C_LESS,
1716 				    BJ_BUFLEN, 0,
1717 				    SLJIT_IMM,
1718 				    insn_dat[i].u.rdata.check_length);
1719 				if (jump == NULL)
1720 					goto fail;
1721 #ifdef _KERNEL
1722 				to_mchain_jump = jump;
1723 #else
1724 				if (!append_jump(jump, &ret0,
1725 				    &ret0_size, &ret0_maxsize))
1726 					goto fail;
1727 #endif
1728 			}
1729 		}
1730 
1731 		pc = &insns[i];
1732 		switch (BPF_CLASS(pc->code)) {
1733 
1734 		default:
1735 			goto fail;
1736 
1737 		case BPF_LD:
1738 			/* BPF_LD+BPF_IMM          A <- k */
1739 			if (pc->code == (BPF_LD|BPF_IMM)) {
1740 				status = sljit_emit_op1(compiler,
1741 				    SLJIT_MOV,
1742 				    BJ_AREG, 0,
1743 				    SLJIT_IMM, (uint32_t)pc->k);
1744 				if (status != SLJIT_SUCCESS)
1745 					goto fail;
1746 
1747 				continue;
1748 			}
1749 
1750 			/* BPF_LD+BPF_MEM          A <- M[k] */
1751 			if (pc->code == (BPF_LD|BPF_MEM)) {
1752 				if ((uint32_t)pc->k >= memwords)
1753 					goto fail;
1754 				status = emit_memload(compiler,
1755 				    BJ_AREG, pc->k, extwords);
1756 				if (status != SLJIT_SUCCESS)
1757 					goto fail;
1758 
1759 				continue;
1760 			}
1761 
1762 			/* BPF_LD+BPF_W+BPF_LEN    A <- len */
1763 			if (pc->code == (BPF_LD|BPF_W|BPF_LEN)) {
1764 				status = sljit_emit_op1(compiler,
1765 				    SLJIT_MOV, /* size_t source */
1766 				    BJ_AREG, 0,
1767 				    SLJIT_MEM1(BJ_ARGS),
1768 				    offsetof(struct bpf_args, wirelen));
1769 				if (status != SLJIT_SUCCESS)
1770 					goto fail;
1771 
1772 				continue;
1773 			}
1774 
1775 			mode = BPF_MODE(pc->code);
1776 			if (mode != BPF_ABS && mode != BPF_IND)
1777 				goto fail;
1778 
1779 			if (unconditional_ret)
1780 				continue;
1781 
1782 			status = emit_pkt_read(compiler, hints, pc,
1783 			    to_mchain_jump, &ret0, &ret0_size, &ret0_maxsize);
1784 			if (status != SLJIT_SUCCESS)
1785 				goto fail;
1786 
1787 			continue;
1788 
1789 		case BPF_LDX:
1790 			mode = BPF_MODE(pc->code);
1791 
1792 			/* BPF_LDX+BPF_W+BPF_IMM    X <- k */
1793 			if (mode == BPF_IMM) {
1794 				if (BPF_SIZE(pc->code) != BPF_W)
1795 					goto fail;
1796 				status = sljit_emit_op1(compiler,
1797 				    SLJIT_MOV,
1798 				    BJ_XREG, 0,
1799 				    SLJIT_IMM, (uint32_t)pc->k);
1800 				if (status != SLJIT_SUCCESS)
1801 					goto fail;
1802 
1803 				continue;
1804 			}
1805 
1806 			/* BPF_LDX+BPF_W+BPF_LEN    X <- len */
1807 			if (mode == BPF_LEN) {
1808 				if (BPF_SIZE(pc->code) != BPF_W)
1809 					goto fail;
1810 				status = sljit_emit_op1(compiler,
1811 				    SLJIT_MOV, /* size_t source */
1812 				    BJ_XREG, 0,
1813 				    SLJIT_MEM1(BJ_ARGS),
1814 				    offsetof(struct bpf_args, wirelen));
1815 				if (status != SLJIT_SUCCESS)
1816 					goto fail;
1817 
1818 				continue;
1819 			}
1820 
1821 			/* BPF_LDX+BPF_W+BPF_MEM    X <- M[k] */
1822 			if (mode == BPF_MEM) {
1823 				if (BPF_SIZE(pc->code) != BPF_W)
1824 					goto fail;
1825 				if ((uint32_t)pc->k >= memwords)
1826 					goto fail;
1827 				status = emit_memload(compiler,
1828 				    BJ_XREG, pc->k, extwords);
1829 				if (status != SLJIT_SUCCESS)
1830 					goto fail;
1831 
1832 				continue;
1833 			}
1834 
1835 			/* BPF_LDX+BPF_B+BPF_MSH    X <- 4*(P[k:1]&0xf) */
1836 			if (mode != BPF_MSH || BPF_SIZE(pc->code) != BPF_B)
1837 				goto fail;
1838 
1839 			if (unconditional_ret)
1840 				continue;
1841 
1842 			status = emit_msh(compiler, hints, pc,
1843 			    to_mchain_jump, &ret0, &ret0_size, &ret0_maxsize);
1844 			if (status != SLJIT_SUCCESS)
1845 				goto fail;
1846 
1847 			continue;
1848 
1849 		case BPF_ST:
1850 			if (pc->code != BPF_ST ||
1851 			    (uint32_t)pc->k >= memwords) {
1852 				goto fail;
1853 			}
1854 
1855 			status = emit_memstore(compiler,
1856 			    BJ_AREG, pc->k, extwords);
1857 			if (status != SLJIT_SUCCESS)
1858 				goto fail;
1859 
1860 			continue;
1861 
1862 		case BPF_STX:
1863 			if (pc->code != BPF_STX ||
1864 			    (uint32_t)pc->k >= memwords) {
1865 				goto fail;
1866 			}
1867 
1868 			status = emit_memstore(compiler,
1869 			    BJ_XREG, pc->k, extwords);
1870 			if (status != SLJIT_SUCCESS)
1871 				goto fail;
1872 
1873 			continue;
1874 
1875 		case BPF_ALU:
1876 			if (pc->code == (BPF_ALU|BPF_NEG)) {
1877 				status = sljit_emit_op1(compiler,
1878 				    SLJIT_NEG,
1879 				    BJ_AREG, 0,
1880 				    BJ_AREG, 0);
1881 				if (status != SLJIT_SUCCESS)
1882 					goto fail;
1883 
1884 				continue;
1885 			}
1886 
1887 			if (BPF_OP(pc->code) != BPF_DIV) {
1888 				status = sljit_emit_op2(compiler,
1889 				    bpf_alu_to_sljit_op(pc),
1890 				    BJ_AREG, 0,
1891 				    BJ_AREG, 0,
1892 				    kx_to_reg(pc), kx_to_reg_arg(pc));
1893 				if (status != SLJIT_SUCCESS)
1894 					goto fail;
1895 
1896 				continue;
1897 			}
1898 
1899 			/* BPF_DIV */
1900 
1901 			src = BPF_SRC(pc->code);
1902 			if (src != BPF_X && src != BPF_K)
1903 				goto fail;
1904 
1905 			/* division by zero? */
1906 			if (src == BPF_X) {
1907 				jump = sljit_emit_cmp(compiler,
1908 				    SLJIT_C_EQUAL|SLJIT_INT_OP,
1909 				    BJ_XREG, 0,
1910 				    SLJIT_IMM, 0);
1911 				if (jump == NULL)
1912 					goto fail;
1913 				if (!append_jump(jump, &ret0,
1914 				    &ret0_size, &ret0_maxsize))
1915 					goto fail;
1916 			} else if (pc->k == 0) {
1917 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1918 				if (jump == NULL)
1919 					goto fail;
1920 				if (!append_jump(jump, &ret0,
1921 				    &ret0_size, &ret0_maxsize))
1922 					goto fail;
1923 			}
1924 
1925 			if (src == BPF_X) {
1926 				status = emit_division(compiler, BJ_XREG, 0);
1927 				if (status != SLJIT_SUCCESS)
1928 					goto fail;
1929 			} else if (pc->k != 0) {
1930 				if (pc->k & (pc->k - 1)) {
1931 				    status = emit_division(compiler,
1932 				        SLJIT_IMM, (uint32_t)pc->k);
1933 				} else {
1934 				    status = emit_pow2_division(compiler,
1935 				        (uint32_t)pc->k);
1936 				}
1937 				if (status != SLJIT_SUCCESS)
1938 					goto fail;
1939 			}
1940 
1941 			continue;
1942 
1943 		case BPF_JMP:
1944 			if (BPF_OP(pc->code) == BPF_JA) {
1945 				jt = jf = pc->k;
1946 			} else {
1947 				jt = pc->jt;
1948 				jf = pc->jf;
1949 			}
1950 
1951 			negate = (jt == 0) ? 1 : 0;
1952 			branching = (jt == jf) ? 0 : 1;
1953 			jtf = insn_dat[i].u.jdata.jtf;
1954 
1955 			if (branching) {
1956 				if (BPF_OP(pc->code) != BPF_JSET) {
1957 					jump = sljit_emit_cmp(compiler,
1958 					    bpf_jmp_to_sljit_cond(pc, negate),
1959 					    BJ_AREG, 0,
1960 					    kx_to_reg(pc), kx_to_reg_arg(pc));
1961 				} else {
1962 					status = sljit_emit_op2(compiler,
1963 					    SLJIT_AND,
1964 					    BJ_TMP1REG, 0,
1965 					    BJ_AREG, 0,
1966 					    kx_to_reg(pc), kx_to_reg_arg(pc));
1967 					if (status != SLJIT_SUCCESS)
1968 						goto fail;
1969 
1970 					jump = sljit_emit_cmp(compiler,
1971 					    bpf_jmp_to_sljit_cond(pc, negate),
1972 					    BJ_TMP1REG, 0,
1973 					    SLJIT_IMM, 0);
1974 				}
1975 
1976 				if (jump == NULL)
1977 					goto fail;
1978 
1979 				BJ_ASSERT(jtf[negate].sjump == NULL);
1980 				jtf[negate].sjump = jump;
1981 			}
1982 
1983 			if (!branching || (jt != 0 && jf != 0)) {
1984 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1985 				if (jump == NULL)
1986 					goto fail;
1987 
1988 				BJ_ASSERT(jtf[branching].sjump == NULL);
1989 				jtf[branching].sjump = jump;
1990 			}
1991 
1992 			continue;
1993 
1994 		case BPF_RET:
1995 			rval = BPF_RVAL(pc->code);
1996 			if (rval == BPF_X)
1997 				goto fail;
1998 
1999 			/* BPF_RET+BPF_K    accept k bytes */
2000 			if (rval == BPF_K) {
2001 				status = sljit_emit_return(compiler,
2002 				    SLJIT_MOV_UI,
2003 				    SLJIT_IMM, (uint32_t)pc->k);
2004 				if (status != SLJIT_SUCCESS)
2005 					goto fail;
2006 			}
2007 
2008 			/* BPF_RET+BPF_A    accept A bytes */
2009 			if (rval == BPF_A) {
2010 				status = sljit_emit_return(compiler,
2011 				    SLJIT_MOV_UI,
2012 				    BJ_AREG, 0);
2013 				if (status != SLJIT_SUCCESS)
2014 					goto fail;
2015 			}
2016 
2017 			continue;
2018 
2019 		case BPF_MISC:
2020 			switch (BPF_MISCOP(pc->code)) {
2021 			case BPF_TAX:
2022 				status = sljit_emit_op1(compiler,
2023 				    SLJIT_MOV_UI,
2024 				    BJ_XREG, 0,
2025 				    BJ_AREG, 0);
2026 				if (status != SLJIT_SUCCESS)
2027 					goto fail;
2028 
2029 				continue;
2030 
2031 			case BPF_TXA:
2032 				status = sljit_emit_op1(compiler,
2033 				    SLJIT_MOV,
2034 				    BJ_AREG, 0,
2035 				    BJ_XREG, 0);
2036 				if (status != SLJIT_SUCCESS)
2037 					goto fail;
2038 
2039 				continue;
2040 
2041 			case BPF_COP:
2042 			case BPF_COPX:
2043 				if (bc == NULL || bc->copfuncs == NULL)
2044 					goto fail;
2045 				if (BPF_MISCOP(pc->code) == BPF_COP &&
2046 				    (uint32_t)pc->k >= bc->nfuncs) {
2047 					goto fail;
2048 				}
2049 
2050 				status = emit_cop(compiler, hints, bc, pc,
2051 				    &ret0, &ret0_size, &ret0_maxsize);
2052 				if (status != SLJIT_SUCCESS)
2053 					goto fail;
2054 
2055 				continue;
2056 			}
2057 
2058 			goto fail;
2059 		} /* switch */
2060 	} /* main loop */
2061 
2062 	BJ_ASSERT(ret0_size <= ret0_maxsize);
2063 
2064 	if (ret0_size > 0) {
2065 		label = sljit_emit_label(compiler);
2066 		if (label == NULL)
2067 			goto fail;
2068 		for (i = 0; i < ret0_size; i++)
2069 			sljit_set_label(ret0[i], label);
2070 	}
2071 
2072 	status = sljit_emit_return(compiler,
2073 	    SLJIT_MOV_UI,
2074 	    SLJIT_IMM, 0);
2075 	if (status != SLJIT_SUCCESS)
2076 		goto fail;
2077 
2078 	rv = true;
2079 
2080 fail:
2081 	if (ret0 != NULL)
2082 		BJ_FREE(ret0, ret0_maxsize * sizeof(ret0[0]));
2083 
2084 	return rv;
2085 }
2086 
2087 bpfjit_func_t
2088 bpfjit_generate_code(const bpf_ctx_t *bc,
2089     const struct bpf_insn *insns, size_t insn_count)
2090 {
2091 	void *rv;
2092 	struct sljit_compiler *compiler;
2093 
2094 	size_t i;
2095 	int status;
2096 
2097 	/* optimization related */
2098 	bpf_memword_init_t initmask;
2099 	bpfjit_hint_t hints;
2100 
2101 	/* memory store location for initial zero initialization */
2102 	sljit_si mem_reg;
2103 	sljit_sw mem_off;
2104 
2105 	struct bpfjit_insn_data *insn_dat;
2106 
2107 	const size_t extwords = GET_EXTWORDS(bc);
2108 	const size_t memwords = GET_MEMWORDS(bc);
2109 	const bpf_memword_init_t preinited = extwords ? bc->preinited : 0;
2110 
2111 	rv = NULL;
2112 	compiler = NULL;
2113 	insn_dat = NULL;
2114 
2115 	if (memwords > MAX_MEMWORDS)
2116 		goto fail;
2117 
2118 	if (insn_count == 0 || insn_count > SIZE_MAX / sizeof(insn_dat[0]))
2119 		goto fail;
2120 
2121 	insn_dat = BJ_ALLOC(insn_count * sizeof(insn_dat[0]));
2122 	if (insn_dat == NULL)
2123 		goto fail;
2124 
2125 	if (!optimize(bc, insns, insn_dat, insn_count, &initmask, &hints))
2126 		goto fail;
2127 
2128 	compiler = sljit_create_compiler();
2129 	if (compiler == NULL)
2130 		goto fail;
2131 
2132 #if !defined(_KERNEL) && defined(SLJIT_VERBOSE) && SLJIT_VERBOSE
2133 	sljit_compiler_verbose(compiler, stderr);
2134 #endif
2135 
2136 	status = sljit_emit_enter(compiler,
2137 	    2, nscratches(hints), nsaveds(hints), sizeof(struct bpfjit_stack));
2138 	if (status != SLJIT_SUCCESS)
2139 		goto fail;
2140 
2141 	if (hints & BJ_HINT_COP) {
2142 		/* save ctx argument */
2143 		status = sljit_emit_op1(compiler,
2144 		    SLJIT_MOV_P,
2145 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
2146 		    offsetof(struct bpfjit_stack, ctx),
2147 		    BJ_CTX_ARG, 0);
2148 		if (status != SLJIT_SUCCESS)
2149 			goto fail;
2150 	}
2151 
2152 	if (extwords == 0) {
2153 		mem_reg = SLJIT_MEM1(SLJIT_LOCALS_REG);
2154 		mem_off = offsetof(struct bpfjit_stack, mem);
2155 	} else {
2156 		/* copy "mem" argument from bpf_args to bpfjit_stack */
2157 		status = sljit_emit_op1(compiler,
2158 		    SLJIT_MOV_P,
2159 		    BJ_TMP1REG, 0,
2160 		    SLJIT_MEM1(BJ_ARGS), offsetof(struct bpf_args, mem));
2161 		if (status != SLJIT_SUCCESS)
2162 			goto fail;
2163 
2164 		status = sljit_emit_op1(compiler,
2165 		    SLJIT_MOV_P,
2166 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
2167 		    offsetof(struct bpfjit_stack, extmem),
2168 		    BJ_TMP1REG, 0);
2169 		if (status != SLJIT_SUCCESS)
2170 			goto fail;
2171 
2172 		mem_reg = SLJIT_MEM1(BJ_TMP1REG);
2173 		mem_off = 0;
2174 	}
2175 
2176 	/*
2177 	 * Exclude pre-initialised external memory words but keep
2178 	 * initialization statuses of A and X registers in case
2179 	 * bc->preinited wrongly sets those two bits.
2180 	 */
2181 	initmask &= ~preinited | BJ_INIT_ABIT | BJ_INIT_XBIT;
2182 
2183 #if defined(_KERNEL)
2184 	/* bpf_filter() checks initialization of memwords. */
2185 	BJ_ASSERT((initmask & (BJ_INIT_MBIT(memwords) - 1)) == 0);
2186 #endif
2187 	for (i = 0; i < memwords; i++) {
2188 		if (initmask & BJ_INIT_MBIT(i)) {
2189 			/* M[i] = 0; */
2190 			status = sljit_emit_op1(compiler,
2191 			    SLJIT_MOV_UI,
2192 			    mem_reg, mem_off + i * sizeof(uint32_t),
2193 			    SLJIT_IMM, 0);
2194 			if (status != SLJIT_SUCCESS)
2195 				goto fail;
2196 		}
2197 	}
2198 
2199 	if (initmask & BJ_INIT_ABIT) {
2200 		/* A = 0; */
2201 		status = sljit_emit_op1(compiler,
2202 		    SLJIT_MOV,
2203 		    BJ_AREG, 0,
2204 		    SLJIT_IMM, 0);
2205 		if (status != SLJIT_SUCCESS)
2206 			goto fail;
2207 	}
2208 
2209 	if (initmask & BJ_INIT_XBIT) {
2210 		/* X = 0; */
2211 		status = sljit_emit_op1(compiler,
2212 		    SLJIT_MOV,
2213 		    BJ_XREG, 0,
2214 		    SLJIT_IMM, 0);
2215 		if (status != SLJIT_SUCCESS)
2216 			goto fail;
2217 	}
2218 
2219 	status = load_buf_buflen(compiler);
2220 	if (status != SLJIT_SUCCESS)
2221 		goto fail;
2222 
2223 	if (!generate_insn_code(compiler, hints,
2224 	    bc, insns, insn_dat, insn_count)) {
2225 		goto fail;
2226 	}
2227 
2228 	rv = sljit_generate_code(compiler);
2229 
2230 fail:
2231 	if (compiler != NULL)
2232 		sljit_free_compiler(compiler);
2233 
2234 	if (insn_dat != NULL)
2235 		BJ_FREE(insn_dat, insn_count * sizeof(insn_dat[0]));
2236 
2237 	return (bpfjit_func_t)rv;
2238 }
2239 
2240 void
2241 bpfjit_free_code(bpfjit_func_t code)
2242 {
2243 
2244 	sljit_free_code((void *)code);
2245 }
2246