xref: /netbsd-src/sys/net/bpfjit.c (revision aad9773e38ed2370a628a6416e098f9008fc10a7)
1 /*	$NetBSD: bpfjit.c,v 1.36 2014/11/20 20:31:22 alnsn Exp $	*/
2 
3 /*-
4  * Copyright (c) 2011-2014 Alexander Nasonov.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
22  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #ifdef _KERNEL
34 __KERNEL_RCSID(0, "$NetBSD: bpfjit.c,v 1.36 2014/11/20 20:31:22 alnsn Exp $");
35 #else
36 __RCSID("$NetBSD: bpfjit.c,v 1.36 2014/11/20 20:31:22 alnsn Exp $");
37 #endif
38 
39 #include <sys/types.h>
40 #include <sys/queue.h>
41 
42 #ifndef _KERNEL
43 #include <assert.h>
44 #define BJ_ASSERT(c) assert(c)
45 #else
46 #define BJ_ASSERT(c) KASSERT(c)
47 #endif
48 
49 #ifndef _KERNEL
50 #include <stdlib.h>
51 #define BJ_ALLOC(sz) malloc(sz)
52 #define BJ_FREE(p, sz) free(p)
53 #else
54 #include <sys/kmem.h>
55 #define BJ_ALLOC(sz) kmem_alloc(sz, KM_SLEEP)
56 #define BJ_FREE(p, sz) kmem_free(p, sz)
57 #endif
58 
59 #ifndef _KERNEL
60 #include <limits.h>
61 #include <stdbool.h>
62 #include <stddef.h>
63 #include <stdint.h>
64 #else
65 #include <sys/atomic.h>
66 #include <sys/module.h>
67 #endif
68 
69 #define	__BPF_PRIVATE
70 #include <net/bpf.h>
71 #include <net/bpfjit.h>
72 #include <sljitLir.h>
73 
74 #if !defined(_KERNEL) && defined(SLJIT_VERBOSE) && SLJIT_VERBOSE
75 #include <stdio.h> /* for stderr */
76 #endif
77 
78 /*
79  * Arguments of generated bpfjit_func_t.
80  * The first argument is reassigned upon entry
81  * to a more frequently used buf argument.
82  */
83 #define BJ_CTX_ARG	SLJIT_SAVED_REG1
84 #define BJ_ARGS		SLJIT_SAVED_REG2
85 
86 /*
87  * Permanent register assignments.
88  */
89 #define BJ_BUF		SLJIT_SAVED_REG1
90 //#define BJ_ARGS	SLJIT_SAVED_REG2
91 #define BJ_BUFLEN	SLJIT_SAVED_REG3
92 #define BJ_AREG		SLJIT_SCRATCH_REG1
93 #define BJ_TMP1REG	SLJIT_SCRATCH_REG2
94 #define BJ_TMP2REG	SLJIT_SCRATCH_REG3
95 #define BJ_XREG		SLJIT_TEMPORARY_EREG1
96 #define BJ_TMP3REG	SLJIT_TEMPORARY_EREG2
97 
98 #ifdef _KERNEL
99 #define MAX_MEMWORDS BPF_MAX_MEMWORDS
100 #else
101 #define MAX_MEMWORDS BPF_MEMWORDS
102 #endif
103 
104 #define BJ_INIT_NOBITS  ((bpf_memword_init_t)0)
105 #define BJ_INIT_MBIT(k) BPF_MEMWORD_INIT(k)
106 #define BJ_INIT_ABIT    BJ_INIT_MBIT(MAX_MEMWORDS)
107 #define BJ_INIT_XBIT    BJ_INIT_MBIT(MAX_MEMWORDS + 1)
108 
109 /*
110  * Get a number of memwords and external memwords from a bpf_ctx object.
111  */
112 #define GET_EXTWORDS(bc) ((bc) ? (bc)->extwords : 0)
113 #define GET_MEMWORDS(bc) (GET_EXTWORDS(bc) ? GET_EXTWORDS(bc) : BPF_MEMWORDS)
114 
115 /*
116  * Optimization hints.
117  */
118 typedef unsigned int bpfjit_hint_t;
119 #define BJ_HINT_ABS  0x01 /* packet read at absolute offset   */
120 #define BJ_HINT_IND  0x02 /* packet read at variable offset   */
121 #define BJ_HINT_MSH  0x04 /* BPF_MSH instruction              */
122 #define BJ_HINT_COP  0x08 /* BPF_COP or BPF_COPX instruction  */
123 #define BJ_HINT_COPX 0x10 /* BPF_COPX instruction             */
124 #define BJ_HINT_XREG 0x20 /* BJ_XREG is needed                */
125 #define BJ_HINT_LDX  0x40 /* BPF_LDX instruction              */
126 #define BJ_HINT_PKT  (BJ_HINT_ABS|BJ_HINT_IND|BJ_HINT_MSH)
127 
128 /*
129  * Datatype for Array Bounds Check Elimination (ABC) pass.
130  */
131 typedef uint64_t bpfjit_abc_length_t;
132 #define MAX_ABC_LENGTH (UINT32_MAX + UINT64_C(4)) /* max. width is 4 */
133 
134 struct bpfjit_stack
135 {
136 	bpf_ctx_t *ctx;
137 	uint32_t *extmem; /* pointer to external memory store */
138 	uint32_t reg; /* saved A or X register */
139 #ifdef _KERNEL
140 	int err; /* 3rd argument for m_xword/m_xhalf/m_xbyte function call */
141 #endif
142 	uint32_t mem[BPF_MEMWORDS]; /* internal memory store */
143 };
144 
145 /*
146  * Data for BPF_JMP instruction.
147  * Forward declaration for struct bpfjit_jump.
148  */
149 struct bpfjit_jump_data;
150 
151 /*
152  * Node of bjumps list.
153  */
154 struct bpfjit_jump {
155 	struct sljit_jump *sjump;
156 	SLIST_ENTRY(bpfjit_jump) entries;
157 	struct bpfjit_jump_data *jdata;
158 };
159 
160 /*
161  * Data for BPF_JMP instruction.
162  */
163 struct bpfjit_jump_data {
164 	/*
165 	 * These entries make up bjumps list:
166 	 * jtf[0] - when coming from jt path,
167 	 * jtf[1] - when coming from jf path.
168 	 */
169 	struct bpfjit_jump jtf[2];
170 	/*
171 	 * Length calculated by Array Bounds Check Elimination (ABC) pass.
172 	 */
173 	bpfjit_abc_length_t abc_length;
174 	/*
175 	 * Length checked by the last out-of-bounds check.
176 	 */
177 	bpfjit_abc_length_t checked_length;
178 };
179 
180 /*
181  * Data for "read from packet" instructions.
182  * See also read_pkt_insn() function below.
183  */
184 struct bpfjit_read_pkt_data {
185 	/*
186 	 * Length calculated by Array Bounds Check Elimination (ABC) pass.
187 	 */
188 	bpfjit_abc_length_t abc_length;
189 	/*
190 	 * If positive, emit "if (buflen < check_length) return 0"
191 	 * out-of-bounds check.
192 	 * Values greater than UINT32_MAX generate unconditional "return 0".
193 	 */
194 	bpfjit_abc_length_t check_length;
195 };
196 
197 /*
198  * Additional (optimization-related) data for bpf_insn.
199  */
200 struct bpfjit_insn_data {
201 	/* List of jumps to this insn. */
202 	SLIST_HEAD(, bpfjit_jump) bjumps;
203 
204 	union {
205 		struct bpfjit_jump_data     jdata;
206 		struct bpfjit_read_pkt_data rdata;
207 	} u;
208 
209 	bpf_memword_init_t invalid;
210 	bool unreachable;
211 };
212 
213 #ifdef _KERNEL
214 
215 uint32_t m_xword(const struct mbuf *, uint32_t, int *);
216 uint32_t m_xhalf(const struct mbuf *, uint32_t, int *);
217 uint32_t m_xbyte(const struct mbuf *, uint32_t, int *);
218 
219 MODULE(MODULE_CLASS_MISC, bpfjit, "sljit")
220 
221 static int
222 bpfjit_modcmd(modcmd_t cmd, void *arg)
223 {
224 
225 	switch (cmd) {
226 	case MODULE_CMD_INIT:
227 		bpfjit_module_ops.bj_free_code = &bpfjit_free_code;
228 		membar_producer();
229 		bpfjit_module_ops.bj_generate_code = &bpfjit_generate_code;
230 		membar_producer();
231 		return 0;
232 
233 	case MODULE_CMD_FINI:
234 		return EOPNOTSUPP;
235 
236 	default:
237 		return ENOTTY;
238 	}
239 }
240 #endif
241 
242 /*
243  * Return a number of scratch registers to pass
244  * to sljit_emit_enter() function.
245  */
246 static sljit_si
247 nscratches(bpfjit_hint_t hints)
248 {
249 	sljit_si rv = 2;
250 
251 #ifdef _KERNEL
252 	if (hints & BJ_HINT_PKT)
253 		rv = 3; /* xcall with three arguments */
254 #endif
255 
256 	if (hints & BJ_HINT_IND)
257 		rv = 3; /* uses BJ_TMP2REG */
258 
259 	if (hints & BJ_HINT_COP)
260 		rv = 3; /* calls copfunc with three arguments */
261 
262 	if (hints & BJ_HINT_XREG)
263 		rv = 4; /* uses BJ_XREG */
264 
265 #ifdef _KERNEL
266 	if (hints & BJ_HINT_LDX)
267 		rv = 5; /* uses BJ_TMP3REG */
268 #endif
269 
270 	if (hints & BJ_HINT_COPX)
271 		rv = 5; /* uses BJ_TMP3REG */
272 
273 	return rv;
274 }
275 
276 /*
277  * Return a number of saved registers to pass
278  * to sljit_emit_enter() function.
279  */
280 static sljit_si
281 nsaveds(bpfjit_hint_t hints)
282 {
283 	sljit_si rv = 3;
284 
285 	return rv;
286 }
287 
288 static uint32_t
289 read_width(const struct bpf_insn *pc)
290 {
291 
292 	switch (BPF_SIZE(pc->code)) {
293 	case BPF_W:
294 		return 4;
295 	case BPF_H:
296 		return 2;
297 	case BPF_B:
298 		return 1;
299 	default:
300 		BJ_ASSERT(false);
301 		return 0;
302 	}
303 }
304 
305 /*
306  * Copy buf and buflen members of bpf_args from BJ_ARGS
307  * pointer to BJ_BUF and BJ_BUFLEN registers.
308  */
309 static int
310 load_buf_buflen(struct sljit_compiler *compiler)
311 {
312 	int status;
313 
314 	status = sljit_emit_op1(compiler,
315 	    SLJIT_MOV_P,
316 	    BJ_BUF, 0,
317 	    SLJIT_MEM1(BJ_ARGS),
318 	    offsetof(struct bpf_args, pkt));
319 	if (status != SLJIT_SUCCESS)
320 		return status;
321 
322 	status = sljit_emit_op1(compiler,
323 	    SLJIT_MOV, /* size_t source */
324 	    BJ_BUFLEN, 0,
325 	    SLJIT_MEM1(BJ_ARGS),
326 	    offsetof(struct bpf_args, buflen));
327 
328 	return status;
329 }
330 
331 static bool
332 grow_jumps(struct sljit_jump ***jumps, size_t *size)
333 {
334 	struct sljit_jump **newptr;
335 	const size_t elemsz = sizeof(struct sljit_jump *);
336 	size_t old_size = *size;
337 	size_t new_size = 2 * old_size;
338 
339 	if (new_size < old_size || new_size > SIZE_MAX / elemsz)
340 		return false;
341 
342 	newptr = BJ_ALLOC(new_size * elemsz);
343 	if (newptr == NULL)
344 		return false;
345 
346 	memcpy(newptr, *jumps, old_size * elemsz);
347 	BJ_FREE(*jumps, old_size * elemsz);
348 
349 	*jumps = newptr;
350 	*size = new_size;
351 	return true;
352 }
353 
354 static bool
355 append_jump(struct sljit_jump *jump, struct sljit_jump ***jumps,
356     size_t *size, size_t *max_size)
357 {
358 	if (*size == *max_size && !grow_jumps(jumps, max_size))
359 		return false;
360 
361 	(*jumps)[(*size)++] = jump;
362 	return true;
363 }
364 
365 /*
366  * Emit code for BPF_LD+BPF_B+BPF_ABS    A <- P[k:1].
367  */
368 static int
369 emit_read8(struct sljit_compiler *compiler, sljit_si src, uint32_t k)
370 {
371 
372 	return sljit_emit_op1(compiler,
373 	    SLJIT_MOV_UB,
374 	    BJ_AREG, 0,
375 	    SLJIT_MEM1(src), k);
376 }
377 
378 /*
379  * Emit code for BPF_LD+BPF_H+BPF_ABS    A <- P[k:2].
380  */
381 static int
382 emit_read16(struct sljit_compiler *compiler, sljit_si src, uint32_t k)
383 {
384 	int status;
385 
386 	BJ_ASSERT(k <= UINT32_MAX - 1);
387 
388 	/* A = buf[k]; */
389 	status = sljit_emit_op1(compiler,
390 	    SLJIT_MOV_UB,
391 	    BJ_AREG, 0,
392 	    SLJIT_MEM1(src), k);
393 	if (status != SLJIT_SUCCESS)
394 		return status;
395 
396 	/* tmp1 = buf[k+1]; */
397 	status = sljit_emit_op1(compiler,
398 	    SLJIT_MOV_UB,
399 	    BJ_TMP1REG, 0,
400 	    SLJIT_MEM1(src), k+1);
401 	if (status != SLJIT_SUCCESS)
402 		return status;
403 
404 	/* A = A << 8; */
405 	status = sljit_emit_op2(compiler,
406 	    SLJIT_SHL,
407 	    BJ_AREG, 0,
408 	    BJ_AREG, 0,
409 	    SLJIT_IMM, 8);
410 	if (status != SLJIT_SUCCESS)
411 		return status;
412 
413 	/* A = A + tmp1; */
414 	status = sljit_emit_op2(compiler,
415 	    SLJIT_ADD,
416 	    BJ_AREG, 0,
417 	    BJ_AREG, 0,
418 	    BJ_TMP1REG, 0);
419 	return status;
420 }
421 
422 /*
423  * Emit code for BPF_LD+BPF_W+BPF_ABS    A <- P[k:4].
424  */
425 static int
426 emit_read32(struct sljit_compiler *compiler, sljit_si src, uint32_t k)
427 {
428 	int status;
429 
430 	BJ_ASSERT(k <= UINT32_MAX - 3);
431 
432 	/* A = buf[k]; */
433 	status = sljit_emit_op1(compiler,
434 	    SLJIT_MOV_UB,
435 	    BJ_AREG, 0,
436 	    SLJIT_MEM1(src), k);
437 	if (status != SLJIT_SUCCESS)
438 		return status;
439 
440 	/* tmp1 = buf[k+1]; */
441 	status = sljit_emit_op1(compiler,
442 	    SLJIT_MOV_UB,
443 	    BJ_TMP1REG, 0,
444 	    SLJIT_MEM1(src), k+1);
445 	if (status != SLJIT_SUCCESS)
446 		return status;
447 
448 	/* A = A << 8; */
449 	status = sljit_emit_op2(compiler,
450 	    SLJIT_SHL,
451 	    BJ_AREG, 0,
452 	    BJ_AREG, 0,
453 	    SLJIT_IMM, 8);
454 	if (status != SLJIT_SUCCESS)
455 		return status;
456 
457 	/* A = A + tmp1; */
458 	status = sljit_emit_op2(compiler,
459 	    SLJIT_ADD,
460 	    BJ_AREG, 0,
461 	    BJ_AREG, 0,
462 	    BJ_TMP1REG, 0);
463 	if (status != SLJIT_SUCCESS)
464 		return status;
465 
466 	/* tmp1 = buf[k+2]; */
467 	status = sljit_emit_op1(compiler,
468 	    SLJIT_MOV_UB,
469 	    BJ_TMP1REG, 0,
470 	    SLJIT_MEM1(src), k+2);
471 	if (status != SLJIT_SUCCESS)
472 		return status;
473 
474 	/* A = A << 8; */
475 	status = sljit_emit_op2(compiler,
476 	    SLJIT_SHL,
477 	    BJ_AREG, 0,
478 	    BJ_AREG, 0,
479 	    SLJIT_IMM, 8);
480 	if (status != SLJIT_SUCCESS)
481 		return status;
482 
483 	/* A = A + tmp1; */
484 	status = sljit_emit_op2(compiler,
485 	    SLJIT_ADD,
486 	    BJ_AREG, 0,
487 	    BJ_AREG, 0,
488 	    BJ_TMP1REG, 0);
489 	if (status != SLJIT_SUCCESS)
490 		return status;
491 
492 	/* tmp1 = buf[k+3]; */
493 	status = sljit_emit_op1(compiler,
494 	    SLJIT_MOV_UB,
495 	    BJ_TMP1REG, 0,
496 	    SLJIT_MEM1(src), k+3);
497 	if (status != SLJIT_SUCCESS)
498 		return status;
499 
500 	/* A = A << 8; */
501 	status = sljit_emit_op2(compiler,
502 	    SLJIT_SHL,
503 	    BJ_AREG, 0,
504 	    BJ_AREG, 0,
505 	    SLJIT_IMM, 8);
506 	if (status != SLJIT_SUCCESS)
507 		return status;
508 
509 	/* A = A + tmp1; */
510 	status = sljit_emit_op2(compiler,
511 	    SLJIT_ADD,
512 	    BJ_AREG, 0,
513 	    BJ_AREG, 0,
514 	    BJ_TMP1REG, 0);
515 	return status;
516 }
517 
518 #ifdef _KERNEL
519 /*
520  * Emit code for m_xword/m_xhalf/m_xbyte call.
521  *
522  * @pc BPF_LD+BPF_W+BPF_ABS    A <- P[k:4]
523  *     BPF_LD+BPF_H+BPF_ABS    A <- P[k:2]
524  *     BPF_LD+BPF_B+BPF_ABS    A <- P[k:1]
525  *     BPF_LD+BPF_W+BPF_IND    A <- P[X+k:4]
526  *     BPF_LD+BPF_H+BPF_IND    A <- P[X+k:2]
527  *     BPF_LD+BPF_B+BPF_IND    A <- P[X+k:1]
528  *     BPF_LDX+BPF_B+BPF_MSH   X <- 4*(P[k:1]&0xf)
529  */
530 static int
531 emit_xcall(struct sljit_compiler *compiler, bpfjit_hint_t hints,
532     const struct bpf_insn *pc, int dst, struct sljit_jump ***ret0,
533     size_t *ret0_size, size_t *ret0_maxsize,
534     uint32_t (*fn)(const struct mbuf *, uint32_t, int *))
535 {
536 #if BJ_XREG == SLJIT_RETURN_REG   || \
537     BJ_XREG == SLJIT_SCRATCH_REG1 || \
538     BJ_XREG == SLJIT_SCRATCH_REG2 || \
539     BJ_XREG == SLJIT_SCRATCH_REG3
540 #error "Not supported assignment of registers."
541 #endif
542 	struct sljit_jump *jump;
543 	sljit_si save_reg;
544 	int status;
545 
546 	save_reg = (BPF_CLASS(pc->code) == BPF_LDX) ? BJ_AREG : BJ_XREG;
547 
548 	if (save_reg == BJ_AREG || (hints & BJ_HINT_XREG)) {
549 		/* save A or X */
550 		status = sljit_emit_op1(compiler,
551 		    SLJIT_MOV_UI, /* uint32_t destination */
552 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
553 		    offsetof(struct bpfjit_stack, reg),
554 		    save_reg, 0);
555 		if (status != SLJIT_SUCCESS)
556 			return status;
557 	}
558 
559 	/*
560 	 * Prepare registers for fn(mbuf, k, &err) call.
561 	 */
562 	status = sljit_emit_op1(compiler,
563 	    SLJIT_MOV,
564 	    SLJIT_SCRATCH_REG1, 0,
565 	    BJ_BUF, 0);
566 	if (status != SLJIT_SUCCESS)
567 		return status;
568 
569 	if (BPF_CLASS(pc->code) == BPF_LD && BPF_MODE(pc->code) == BPF_IND) {
570 		if (pc->k == 0) {
571 			/* k = X; */
572 			status = sljit_emit_op1(compiler,
573 			    SLJIT_MOV,
574 			    SLJIT_SCRATCH_REG2, 0,
575 			    BJ_XREG, 0);
576 			if (status != SLJIT_SUCCESS)
577 				return status;
578 		} else {
579 			/* if (X > UINT32_MAX - pc->k) return 0; */
580 			jump = sljit_emit_cmp(compiler,
581 			    SLJIT_C_GREATER,
582 			    BJ_XREG, 0,
583 			    SLJIT_IMM, UINT32_MAX - pc->k);
584 			if (jump == NULL)
585 				return SLJIT_ERR_ALLOC_FAILED;
586 			if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
587 				return SLJIT_ERR_ALLOC_FAILED;
588 
589 			/* k = X + pc->k; */
590 			status = sljit_emit_op2(compiler,
591 			    SLJIT_ADD,
592 			    SLJIT_SCRATCH_REG2, 0,
593 			    BJ_XREG, 0,
594 			    SLJIT_IMM, (uint32_t)pc->k);
595 			if (status != SLJIT_SUCCESS)
596 				return status;
597 		}
598 	} else {
599 		/* k = pc->k */
600 		status = sljit_emit_op1(compiler,
601 		    SLJIT_MOV,
602 		    SLJIT_SCRATCH_REG2, 0,
603 		    SLJIT_IMM, (uint32_t)pc->k);
604 		if (status != SLJIT_SUCCESS)
605 			return status;
606 	}
607 
608 	/*
609 	 * The third argument of fn is an address on stack.
610 	 */
611 	status = sljit_get_local_base(compiler,
612 	    SLJIT_SCRATCH_REG3, 0,
613 	    offsetof(struct bpfjit_stack, err));
614 	if (status != SLJIT_SUCCESS)
615 		return status;
616 
617 	/* fn(buf, k, &err); */
618 	status = sljit_emit_ijump(compiler,
619 	    SLJIT_CALL3,
620 	    SLJIT_IMM, SLJIT_FUNC_OFFSET(fn));
621 	if (status != SLJIT_SUCCESS)
622 		return status;
623 
624 	if (dst != SLJIT_RETURN_REG) {
625 		/* move return value to dst */
626 		status = sljit_emit_op1(compiler,
627 		    SLJIT_MOV,
628 		    dst, 0,
629 		    SLJIT_RETURN_REG, 0);
630 		if (status != SLJIT_SUCCESS)
631 			return status;
632 	}
633 
634 	/* if (*err != 0) return 0; */
635 	jump = sljit_emit_cmp(compiler,
636 	    SLJIT_C_NOT_EQUAL|SLJIT_INT_OP,
637 	    SLJIT_MEM1(SLJIT_LOCALS_REG),
638 	    offsetof(struct bpfjit_stack, err),
639 	    SLJIT_IMM, 0);
640 	if (jump == NULL)
641 		return SLJIT_ERR_ALLOC_FAILED;
642 
643 	if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
644 		return SLJIT_ERR_ALLOC_FAILED;
645 
646 	if (save_reg == BJ_AREG || (hints & BJ_HINT_XREG)) {
647 		/* restore A or X */
648 		status = sljit_emit_op1(compiler,
649 		    SLJIT_MOV_UI, /* uint32_t source */
650 		    save_reg, 0,
651 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
652 		    offsetof(struct bpfjit_stack, reg));
653 		if (status != SLJIT_SUCCESS)
654 			return status;
655 	}
656 
657 	return SLJIT_SUCCESS;
658 }
659 #endif
660 
661 /*
662  * Emit code for BPF_COP and BPF_COPX instructions.
663  */
664 static int
665 emit_cop(struct sljit_compiler *compiler, bpfjit_hint_t hints,
666     const bpf_ctx_t *bc, const struct bpf_insn *pc,
667     struct sljit_jump ***ret0, size_t *ret0_size, size_t *ret0_maxsize)
668 {
669 #if BJ_XREG    == SLJIT_RETURN_REG   || \
670     BJ_XREG    == SLJIT_SCRATCH_REG1 || \
671     BJ_XREG    == SLJIT_SCRATCH_REG2 || \
672     BJ_XREG    == SLJIT_SCRATCH_REG3 || \
673     BJ_TMP3REG == SLJIT_SCRATCH_REG1 || \
674     BJ_TMP3REG == SLJIT_SCRATCH_REG2 || \
675     BJ_TMP3REG == SLJIT_SCRATCH_REG3
676 #error "Not supported assignment of registers."
677 #endif
678 
679 	struct sljit_jump *jump;
680 	sljit_si call_reg;
681 	sljit_sw call_off;
682 	int status;
683 
684 	BJ_ASSERT(bc != NULL && bc->copfuncs != NULL);
685 
686 	if (hints & BJ_HINT_LDX) {
687 		/* save X */
688 		status = sljit_emit_op1(compiler,
689 		    SLJIT_MOV_UI, /* uint32_t destination */
690 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
691 		    offsetof(struct bpfjit_stack, reg),
692 		    BJ_XREG, 0);
693 		if (status != SLJIT_SUCCESS)
694 			return status;
695 	}
696 
697 	if (BPF_MISCOP(pc->code) == BPF_COP) {
698 		call_reg = SLJIT_IMM;
699 		call_off = SLJIT_FUNC_OFFSET(bc->copfuncs[pc->k]);
700 	} else {
701 		/* if (X >= bc->nfuncs) return 0; */
702 		jump = sljit_emit_cmp(compiler,
703 		    SLJIT_C_GREATER_EQUAL,
704 		    BJ_XREG, 0,
705 		    SLJIT_IMM, bc->nfuncs);
706 		if (jump == NULL)
707 			return SLJIT_ERR_ALLOC_FAILED;
708 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
709 			return SLJIT_ERR_ALLOC_FAILED;
710 
711 		/* tmp1 = ctx; */
712 		status = sljit_emit_op1(compiler,
713 		    SLJIT_MOV_P,
714 		    BJ_TMP1REG, 0,
715 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
716 		    offsetof(struct bpfjit_stack, ctx));
717 		if (status != SLJIT_SUCCESS)
718 			return status;
719 
720 		/* tmp1 = ctx->copfuncs; */
721 		status = sljit_emit_op1(compiler,
722 		    SLJIT_MOV_P,
723 		    BJ_TMP1REG, 0,
724 		    SLJIT_MEM1(BJ_TMP1REG),
725 		    offsetof(struct bpf_ctx, copfuncs));
726 		if (status != SLJIT_SUCCESS)
727 			return status;
728 
729 		/* tmp2 = X; */
730 		status = sljit_emit_op1(compiler,
731 		    SLJIT_MOV,
732 		    BJ_TMP2REG, 0,
733 		    BJ_XREG, 0);
734 		if (status != SLJIT_SUCCESS)
735 			return status;
736 
737 		/* tmp3 = ctx->copfuncs[tmp2]; */
738 		call_reg = BJ_TMP3REG;
739 		call_off = 0;
740 		status = sljit_emit_op1(compiler,
741 		    SLJIT_MOV_P,
742 		    call_reg, call_off,
743 		    SLJIT_MEM2(BJ_TMP1REG, BJ_TMP2REG),
744 		    SLJIT_WORD_SHIFT);
745 		if (status != SLJIT_SUCCESS)
746 			return status;
747 	}
748 
749 	/*
750 	 * Copy bpf_copfunc_t arguments to registers.
751 	 */
752 #if BJ_AREG != SLJIT_SCRATCH_REG3
753 	status = sljit_emit_op1(compiler,
754 	    SLJIT_MOV_UI,
755 	    SLJIT_SCRATCH_REG3, 0,
756 	    BJ_AREG, 0);
757 	if (status != SLJIT_SUCCESS)
758 		return status;
759 #endif
760 
761 	status = sljit_emit_op1(compiler,
762 	    SLJIT_MOV_P,
763 	    SLJIT_SCRATCH_REG1, 0,
764 	    SLJIT_MEM1(SLJIT_LOCALS_REG),
765 	    offsetof(struct bpfjit_stack, ctx));
766 	if (status != SLJIT_SUCCESS)
767 		return status;
768 
769 	status = sljit_emit_op1(compiler,
770 	    SLJIT_MOV_P,
771 	    SLJIT_SCRATCH_REG2, 0,
772 	    BJ_ARGS, 0);
773 	if (status != SLJIT_SUCCESS)
774 		return status;
775 
776 	status = sljit_emit_ijump(compiler,
777 	    SLJIT_CALL3, call_reg, call_off);
778 	if (status != SLJIT_SUCCESS)
779 		return status;
780 
781 #if BJ_AREG != SLJIT_RETURN_REG
782 	status = sljit_emit_op1(compiler,
783 	    SLJIT_MOV,
784 	    BJ_AREG, 0,
785 	    SLJIT_RETURN_REG, 0);
786 	if (status != SLJIT_SUCCESS)
787 		return status;
788 #endif
789 
790 	if (hints & BJ_HINT_LDX) {
791 		/* restore X */
792 		status = sljit_emit_op1(compiler,
793 		    SLJIT_MOV_UI, /* uint32_t source */
794 		    BJ_XREG, 0,
795 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
796 		    offsetof(struct bpfjit_stack, reg));
797 		if (status != SLJIT_SUCCESS)
798 			return status;
799 	}
800 
801 	return SLJIT_SUCCESS;
802 }
803 
804 /*
805  * Generate code for
806  * BPF_LD+BPF_W+BPF_ABS    A <- P[k:4]
807  * BPF_LD+BPF_H+BPF_ABS    A <- P[k:2]
808  * BPF_LD+BPF_B+BPF_ABS    A <- P[k:1]
809  * BPF_LD+BPF_W+BPF_IND    A <- P[X+k:4]
810  * BPF_LD+BPF_H+BPF_IND    A <- P[X+k:2]
811  * BPF_LD+BPF_B+BPF_IND    A <- P[X+k:1]
812  */
813 static int
814 emit_pkt_read(struct sljit_compiler *compiler, bpfjit_hint_t hints,
815     const struct bpf_insn *pc, struct sljit_jump *to_mchain_jump,
816     struct sljit_jump ***ret0, size_t *ret0_size, size_t *ret0_maxsize)
817 {
818 	int status = SLJIT_ERR_ALLOC_FAILED;
819 	uint32_t width;
820 	sljit_si ld_reg;
821 	struct sljit_jump *jump;
822 #ifdef _KERNEL
823 	struct sljit_label *label;
824 	struct sljit_jump *over_mchain_jump;
825 	const bool check_zero_buflen = (to_mchain_jump != NULL);
826 #endif
827 	const uint32_t k = pc->k;
828 
829 #ifdef _KERNEL
830 	if (to_mchain_jump == NULL) {
831 		to_mchain_jump = sljit_emit_cmp(compiler,
832 		    SLJIT_C_EQUAL,
833 		    BJ_BUFLEN, 0,
834 		    SLJIT_IMM, 0);
835 		if (to_mchain_jump == NULL)
836 			return SLJIT_ERR_ALLOC_FAILED;
837 	}
838 #endif
839 
840 	ld_reg = BJ_BUF;
841 	width = read_width(pc);
842 
843 	if (BPF_MODE(pc->code) == BPF_IND) {
844 		/* tmp1 = buflen - (pc->k + width); */
845 		status = sljit_emit_op2(compiler,
846 		    SLJIT_SUB,
847 		    BJ_TMP1REG, 0,
848 		    BJ_BUFLEN, 0,
849 		    SLJIT_IMM, k + width);
850 		if (status != SLJIT_SUCCESS)
851 			return status;
852 
853 		/* ld_reg = buf + X; */
854 		ld_reg = BJ_TMP2REG;
855 		status = sljit_emit_op2(compiler,
856 		    SLJIT_ADD,
857 		    ld_reg, 0,
858 		    BJ_BUF, 0,
859 		    BJ_XREG, 0);
860 		if (status != SLJIT_SUCCESS)
861 			return status;
862 
863 		/* if (tmp1 < X) return 0; */
864 		jump = sljit_emit_cmp(compiler,
865 		    SLJIT_C_LESS,
866 		    BJ_TMP1REG, 0,
867 		    BJ_XREG, 0);
868 		if (jump == NULL)
869 			return SLJIT_ERR_ALLOC_FAILED;
870 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
871 			return SLJIT_ERR_ALLOC_FAILED;
872 	}
873 
874 	switch (width) {
875 	case 4:
876 		status = emit_read32(compiler, ld_reg, k);
877 		break;
878 	case 2:
879 		status = emit_read16(compiler, ld_reg, k);
880 		break;
881 	case 1:
882 		status = emit_read8(compiler, ld_reg, k);
883 		break;
884 	}
885 
886 	if (status != SLJIT_SUCCESS)
887 		return status;
888 
889 #ifdef _KERNEL
890 	over_mchain_jump = sljit_emit_jump(compiler, SLJIT_JUMP);
891 	if (over_mchain_jump == NULL)
892 		return SLJIT_ERR_ALLOC_FAILED;
893 
894 	/* entry point to mchain handler */
895 	label = sljit_emit_label(compiler);
896 	if (label == NULL)
897 		return SLJIT_ERR_ALLOC_FAILED;
898 	sljit_set_label(to_mchain_jump, label);
899 
900 	if (check_zero_buflen) {
901 		/* if (buflen != 0) return 0; */
902 		jump = sljit_emit_cmp(compiler,
903 		    SLJIT_C_NOT_EQUAL,
904 		    BJ_BUFLEN, 0,
905 		    SLJIT_IMM, 0);
906 		if (jump == NULL)
907 			return SLJIT_ERR_ALLOC_FAILED;
908 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
909 			return SLJIT_ERR_ALLOC_FAILED;
910 	}
911 
912 	switch (width) {
913 	case 4:
914 		status = emit_xcall(compiler, hints, pc, BJ_AREG,
915 		    ret0, ret0_size, ret0_maxsize, &m_xword);
916 		break;
917 	case 2:
918 		status = emit_xcall(compiler, hints, pc, BJ_AREG,
919 		    ret0, ret0_size, ret0_maxsize, &m_xhalf);
920 		break;
921 	case 1:
922 		status = emit_xcall(compiler, hints, pc, BJ_AREG,
923 		    ret0, ret0_size, ret0_maxsize, &m_xbyte);
924 		break;
925 	}
926 
927 	if (status != SLJIT_SUCCESS)
928 		return status;
929 
930 	label = sljit_emit_label(compiler);
931 	if (label == NULL)
932 		return SLJIT_ERR_ALLOC_FAILED;
933 	sljit_set_label(over_mchain_jump, label);
934 #endif
935 
936 	return SLJIT_SUCCESS;
937 }
938 
939 static int
940 emit_memload(struct sljit_compiler *compiler,
941     sljit_si dst, uint32_t k, size_t extwords)
942 {
943 	int status;
944 	sljit_si src;
945 	sljit_sw srcw;
946 
947 	srcw = k * sizeof(uint32_t);
948 
949 	if (extwords == 0) {
950 		src = SLJIT_MEM1(SLJIT_LOCALS_REG);
951 		srcw += offsetof(struct bpfjit_stack, mem);
952 	} else {
953 		/* copy extmem pointer to the tmp1 register */
954 		status = sljit_emit_op1(compiler,
955 		    SLJIT_MOV_P,
956 		    BJ_TMP1REG, 0,
957 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
958 		    offsetof(struct bpfjit_stack, extmem));
959 		if (status != SLJIT_SUCCESS)
960 			return status;
961 		src = SLJIT_MEM1(BJ_TMP1REG);
962 	}
963 
964 	return sljit_emit_op1(compiler, SLJIT_MOV_UI, dst, 0, src, srcw);
965 }
966 
967 static int
968 emit_memstore(struct sljit_compiler *compiler,
969     sljit_si src, uint32_t k, size_t extwords)
970 {
971 	int status;
972 	sljit_si dst;
973 	sljit_sw dstw;
974 
975 	dstw = k * sizeof(uint32_t);
976 
977 	if (extwords == 0) {
978 		dst = SLJIT_MEM1(SLJIT_LOCALS_REG);
979 		dstw += offsetof(struct bpfjit_stack, mem);
980 	} else {
981 		/* copy extmem pointer to the tmp1 register */
982 		status = sljit_emit_op1(compiler,
983 		    SLJIT_MOV_P,
984 		    BJ_TMP1REG, 0,
985 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
986 		    offsetof(struct bpfjit_stack, extmem));
987 		if (status != SLJIT_SUCCESS)
988 			return status;
989 		dst = SLJIT_MEM1(BJ_TMP1REG);
990 	}
991 
992 	return sljit_emit_op1(compiler, SLJIT_MOV_UI, dst, dstw, src, 0);
993 }
994 
995 /*
996  * Emit code for BPF_LDX+BPF_B+BPF_MSH    X <- 4*(P[k:1]&0xf).
997  */
998 static int
999 emit_msh(struct sljit_compiler *compiler, bpfjit_hint_t hints,
1000     const struct bpf_insn *pc, struct sljit_jump *to_mchain_jump,
1001     struct sljit_jump ***ret0, size_t *ret0_size, size_t *ret0_maxsize)
1002 {
1003 	int status;
1004 #ifdef _KERNEL
1005 	struct sljit_label *label;
1006 	struct sljit_jump *jump, *over_mchain_jump;
1007 	const bool check_zero_buflen = (to_mchain_jump != NULL);
1008 #endif
1009 	const uint32_t k = pc->k;
1010 
1011 #ifdef _KERNEL
1012 	if (to_mchain_jump == NULL) {
1013 		to_mchain_jump = sljit_emit_cmp(compiler,
1014 		    SLJIT_C_EQUAL,
1015 		    BJ_BUFLEN, 0,
1016 		    SLJIT_IMM, 0);
1017 		if (to_mchain_jump == NULL)
1018 			return SLJIT_ERR_ALLOC_FAILED;
1019 	}
1020 #endif
1021 
1022 	/* tmp1 = buf[k] */
1023 	status = sljit_emit_op1(compiler,
1024 	    SLJIT_MOV_UB,
1025 	    BJ_TMP1REG, 0,
1026 	    SLJIT_MEM1(BJ_BUF), k);
1027 	if (status != SLJIT_SUCCESS)
1028 		return status;
1029 
1030 #ifdef _KERNEL
1031 	over_mchain_jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1032 	if (over_mchain_jump == NULL)
1033 		return SLJIT_ERR_ALLOC_FAILED;
1034 
1035 	/* entry point to mchain handler */
1036 	label = sljit_emit_label(compiler);
1037 	if (label == NULL)
1038 		return SLJIT_ERR_ALLOC_FAILED;
1039 	sljit_set_label(to_mchain_jump, label);
1040 
1041 	if (check_zero_buflen) {
1042 		/* if (buflen != 0) return 0; */
1043 		jump = sljit_emit_cmp(compiler,
1044 		    SLJIT_C_NOT_EQUAL,
1045 		    BJ_BUFLEN, 0,
1046 		    SLJIT_IMM, 0);
1047 		if (jump == NULL)
1048 			return SLJIT_ERR_ALLOC_FAILED;
1049 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
1050 			return SLJIT_ERR_ALLOC_FAILED;
1051 	}
1052 
1053 	status = emit_xcall(compiler, hints, pc, BJ_TMP1REG,
1054 	    ret0, ret0_size, ret0_maxsize, &m_xbyte);
1055 	if (status != SLJIT_SUCCESS)
1056 		return status;
1057 
1058 	label = sljit_emit_label(compiler);
1059 	if (label == NULL)
1060 		return SLJIT_ERR_ALLOC_FAILED;
1061 	sljit_set_label(over_mchain_jump, label);
1062 #endif
1063 
1064 	/* tmp1 &= 0xf */
1065 	status = sljit_emit_op2(compiler,
1066 	    SLJIT_AND,
1067 	    BJ_TMP1REG, 0,
1068 	    BJ_TMP1REG, 0,
1069 	    SLJIT_IMM, 0xf);
1070 	if (status != SLJIT_SUCCESS)
1071 		return status;
1072 
1073 	/* X = tmp1 << 2 */
1074 	status = sljit_emit_op2(compiler,
1075 	    SLJIT_SHL,
1076 	    BJ_XREG, 0,
1077 	    BJ_TMP1REG, 0,
1078 	    SLJIT_IMM, 2);
1079 	if (status != SLJIT_SUCCESS)
1080 		return status;
1081 
1082 	return SLJIT_SUCCESS;
1083 }
1084 
1085 /*
1086  * Emit code for A = A / k or A = A % k when k is a power of 2.
1087  * @pc BPF_DIV or BPF_MOD instruction.
1088  */
1089 static int
1090 emit_pow2_moddiv(struct sljit_compiler *compiler, const struct bpf_insn *pc)
1091 {
1092 	uint32_t k = pc->k;
1093 	int status = SLJIT_SUCCESS;
1094 
1095 	BJ_ASSERT(k != 0 && (k & (k - 1)) == 0);
1096 
1097 	if (BPF_OP(pc->code) == BPF_MOD) {
1098 		status = sljit_emit_op2(compiler,
1099 		    SLJIT_AND,
1100 		    BJ_AREG, 0,
1101 		    BJ_AREG, 0,
1102 		    SLJIT_IMM, k - 1);
1103 	} else {
1104 		int shift = 0;
1105 
1106 		/*
1107 		 * Do shift = __builtin_ctz(k).
1108 		 * The loop is slower, but that's ok.
1109 		 */
1110 		while (k > 1) {
1111 			k >>= 1;
1112 			shift++;
1113 		}
1114 
1115 		if (shift != 0) {
1116 			status = sljit_emit_op2(compiler,
1117 			    SLJIT_LSHR|SLJIT_INT_OP,
1118 			    BJ_AREG, 0,
1119 			    BJ_AREG, 0,
1120 			    SLJIT_IMM, shift);
1121 		}
1122 	}
1123 
1124 	return status;
1125 }
1126 
1127 #if !defined(BPFJIT_USE_UDIV)
1128 static sljit_uw
1129 divide(sljit_uw x, sljit_uw y)
1130 {
1131 
1132 	return (uint32_t)x / (uint32_t)y;
1133 }
1134 
1135 static sljit_uw
1136 modulus(sljit_uw x, sljit_uw y)
1137 {
1138 
1139 	return (uint32_t)x % (uint32_t)y;
1140 }
1141 #endif
1142 
1143 /*
1144  * Emit code for A = A / div or A = A % div.
1145  * @pc BPF_DIV or BPF_MOD instruction.
1146  */
1147 static int
1148 emit_moddiv(struct sljit_compiler *compiler, const struct bpf_insn *pc)
1149 {
1150 	int status;
1151 	const bool div = BPF_OP(pc->code) == BPF_DIV;
1152 	const bool xreg = BPF_SRC(pc->code) == BPF_X;
1153 
1154 #if BJ_XREG == SLJIT_RETURN_REG   || \
1155     BJ_XREG == SLJIT_SCRATCH_REG1 || \
1156     BJ_XREG == SLJIT_SCRATCH_REG2 || \
1157     BJ_AREG == SLJIT_SCRATCH_REG2
1158 #error "Not supported assignment of registers."
1159 #endif
1160 
1161 #if BJ_AREG != SLJIT_SCRATCH_REG1
1162 	status = sljit_emit_op1(compiler,
1163 	    SLJIT_MOV,
1164 	    SLJIT_SCRATCH_REG1, 0,
1165 	    BJ_AREG, 0);
1166 	if (status != SLJIT_SUCCESS)
1167 		return status;
1168 #endif
1169 
1170 	status = sljit_emit_op1(compiler,
1171 	    SLJIT_MOV,
1172 	    SLJIT_SCRATCH_REG2, 0,
1173 	    xreg ? BJ_XREG : SLJIT_IMM,
1174 	    xreg ? 0 : (uint32_t)pc->k);
1175 	if (status != SLJIT_SUCCESS)
1176 		return status;
1177 
1178 #if defined(BPFJIT_USE_UDIV)
1179 	status = sljit_emit_op0(compiler, SLJIT_UDIV|SLJIT_INT_OP);
1180 
1181 	if (BPF_OP(pc->code) == BPF_DIV) {
1182 #if BJ_AREG != SLJIT_SCRATCH_REG1
1183 		status = sljit_emit_op1(compiler,
1184 		    SLJIT_MOV,
1185 		    BJ_AREG, 0,
1186 		    SLJIT_SCRATCH_REG1, 0);
1187 #endif
1188 	} else {
1189 #if BJ_AREG != SLJIT_SCRATCH_REG2
1190 		/* Remainder is in SLJIT_SCRATCH_REG2. */
1191 		status = sljit_emit_op1(compiler,
1192 		    SLJIT_MOV,
1193 		    BJ_AREG, 0,
1194 		    SLJIT_SCRATCH_REG2, 0);
1195 #endif
1196 	}
1197 
1198 	if (status != SLJIT_SUCCESS)
1199 		return status;
1200 #else
1201 	status = sljit_emit_ijump(compiler,
1202 	    SLJIT_CALL2,
1203 	    SLJIT_IMM, div ? SLJIT_FUNC_OFFSET(divide) :
1204 		SLJIT_FUNC_OFFSET(modulus));
1205 
1206 #if BJ_AREG != SLJIT_RETURN_REG
1207 	status = sljit_emit_op1(compiler,
1208 	    SLJIT_MOV,
1209 	    BJ_AREG, 0,
1210 	    SLJIT_RETURN_REG, 0);
1211 	if (status != SLJIT_SUCCESS)
1212 		return status;
1213 #endif
1214 #endif
1215 
1216 	return status;
1217 }
1218 
1219 /*
1220  * Return true if pc is a "read from packet" instruction.
1221  * If length is not NULL and return value is true, *length will
1222  * be set to a safe length required to read a packet.
1223  */
1224 static bool
1225 read_pkt_insn(const struct bpf_insn *pc, bpfjit_abc_length_t *length)
1226 {
1227 	bool rv;
1228 	bpfjit_abc_length_t width;
1229 
1230 	switch (BPF_CLASS(pc->code)) {
1231 	default:
1232 		rv = false;
1233 		break;
1234 
1235 	case BPF_LD:
1236 		rv = BPF_MODE(pc->code) == BPF_ABS ||
1237 		     BPF_MODE(pc->code) == BPF_IND;
1238 		if (rv)
1239 			width = read_width(pc);
1240 		break;
1241 
1242 	case BPF_LDX:
1243 		rv = pc->code == (BPF_LDX|BPF_B|BPF_MSH);
1244 		width = 1;
1245 		break;
1246 	}
1247 
1248 	if (rv && length != NULL) {
1249 		/*
1250 		 * Values greater than UINT32_MAX will generate
1251 		 * unconditional "return 0".
1252 		 */
1253 		*length = (uint32_t)pc->k + width;
1254 	}
1255 
1256 	return rv;
1257 }
1258 
1259 static void
1260 optimize_init(struct bpfjit_insn_data *insn_dat, size_t insn_count)
1261 {
1262 	size_t i;
1263 
1264 	for (i = 0; i < insn_count; i++) {
1265 		SLIST_INIT(&insn_dat[i].bjumps);
1266 		insn_dat[i].invalid = BJ_INIT_NOBITS;
1267 	}
1268 }
1269 
1270 /*
1271  * The function divides instructions into blocks. Destination of a jump
1272  * instruction starts a new block. BPF_RET and BPF_JMP instructions
1273  * terminate a block. Blocks are linear, that is, there are no jumps out
1274  * from the middle of a block and there are no jumps in to the middle of
1275  * a block.
1276  *
1277  * The function also sets bits in *initmask for memwords that
1278  * need to be initialized to zero. Note that this set should be empty
1279  * for any valid kernel filter program.
1280  */
1281 static bool
1282 optimize_pass1(const bpf_ctx_t *bc, const struct bpf_insn *insns,
1283     struct bpfjit_insn_data *insn_dat, size_t insn_count,
1284     bpf_memword_init_t *initmask, bpfjit_hint_t *hints)
1285 {
1286 	struct bpfjit_jump *jtf;
1287 	size_t i;
1288 	uint32_t jt, jf;
1289 	bpfjit_abc_length_t length;
1290 	bpf_memword_init_t invalid; /* borrowed from bpf_filter() */
1291 	bool unreachable;
1292 
1293 	const size_t memwords = GET_MEMWORDS(bc);
1294 
1295 	*hints = 0;
1296 	*initmask = BJ_INIT_NOBITS;
1297 
1298 	unreachable = false;
1299 	invalid = ~BJ_INIT_NOBITS;
1300 
1301 	for (i = 0; i < insn_count; i++) {
1302 		if (!SLIST_EMPTY(&insn_dat[i].bjumps))
1303 			unreachable = false;
1304 		insn_dat[i].unreachable = unreachable;
1305 
1306 		if (unreachable)
1307 			continue;
1308 
1309 		invalid |= insn_dat[i].invalid;
1310 
1311 		if (read_pkt_insn(&insns[i], &length) && length > UINT32_MAX)
1312 			unreachable = true;
1313 
1314 		switch (BPF_CLASS(insns[i].code)) {
1315 		case BPF_RET:
1316 			if (BPF_RVAL(insns[i].code) == BPF_A)
1317 				*initmask |= invalid & BJ_INIT_ABIT;
1318 
1319 			unreachable = true;
1320 			continue;
1321 
1322 		case BPF_LD:
1323 			if (BPF_MODE(insns[i].code) == BPF_ABS)
1324 				*hints |= BJ_HINT_ABS;
1325 
1326 			if (BPF_MODE(insns[i].code) == BPF_IND) {
1327 				*hints |= BJ_HINT_IND | BJ_HINT_XREG;
1328 				*initmask |= invalid & BJ_INIT_XBIT;
1329 			}
1330 
1331 			if (BPF_MODE(insns[i].code) == BPF_MEM &&
1332 			    (uint32_t)insns[i].k < memwords) {
1333 				*initmask |= invalid & BJ_INIT_MBIT(insns[i].k);
1334 			}
1335 
1336 			invalid &= ~BJ_INIT_ABIT;
1337 			continue;
1338 
1339 		case BPF_LDX:
1340 			*hints |= BJ_HINT_XREG | BJ_HINT_LDX;
1341 
1342 			if (BPF_MODE(insns[i].code) == BPF_MEM &&
1343 			    (uint32_t)insns[i].k < memwords) {
1344 				*initmask |= invalid & BJ_INIT_MBIT(insns[i].k);
1345 			}
1346 
1347 			if (BPF_MODE(insns[i].code) == BPF_MSH &&
1348 			    BPF_SIZE(insns[i].code) == BPF_B) {
1349 				*hints |= BJ_HINT_MSH;
1350 			}
1351 
1352 			invalid &= ~BJ_INIT_XBIT;
1353 			continue;
1354 
1355 		case BPF_ST:
1356 			*initmask |= invalid & BJ_INIT_ABIT;
1357 
1358 			if ((uint32_t)insns[i].k < memwords)
1359 				invalid &= ~BJ_INIT_MBIT(insns[i].k);
1360 
1361 			continue;
1362 
1363 		case BPF_STX:
1364 			*hints |= BJ_HINT_XREG;
1365 			*initmask |= invalid & BJ_INIT_XBIT;
1366 
1367 			if ((uint32_t)insns[i].k < memwords)
1368 				invalid &= ~BJ_INIT_MBIT(insns[i].k);
1369 
1370 			continue;
1371 
1372 		case BPF_ALU:
1373 			*initmask |= invalid & BJ_INIT_ABIT;
1374 
1375 			if (insns[i].code != (BPF_ALU|BPF_NEG) &&
1376 			    BPF_SRC(insns[i].code) == BPF_X) {
1377 				*hints |= BJ_HINT_XREG;
1378 				*initmask |= invalid & BJ_INIT_XBIT;
1379 			}
1380 
1381 			invalid &= ~BJ_INIT_ABIT;
1382 			continue;
1383 
1384 		case BPF_MISC:
1385 			switch (BPF_MISCOP(insns[i].code)) {
1386 			case BPF_TAX: // X <- A
1387 				*hints |= BJ_HINT_XREG;
1388 				*initmask |= invalid & BJ_INIT_ABIT;
1389 				invalid &= ~BJ_INIT_XBIT;
1390 				continue;
1391 
1392 			case BPF_TXA: // A <- X
1393 				*hints |= BJ_HINT_XREG;
1394 				*initmask |= invalid & BJ_INIT_XBIT;
1395 				invalid &= ~BJ_INIT_ABIT;
1396 				continue;
1397 
1398 			case BPF_COPX:
1399 				*hints |= BJ_HINT_XREG | BJ_HINT_COPX;
1400 				/* FALLTHROUGH */
1401 
1402 			case BPF_COP:
1403 				*hints |= BJ_HINT_COP;
1404 				*initmask |= invalid & BJ_INIT_ABIT;
1405 				invalid &= ~BJ_INIT_ABIT;
1406 				continue;
1407 			}
1408 
1409 			continue;
1410 
1411 		case BPF_JMP:
1412 			/* Initialize abc_length for ABC pass. */
1413 			insn_dat[i].u.jdata.abc_length = MAX_ABC_LENGTH;
1414 
1415 			if (BPF_OP(insns[i].code) == BPF_JA) {
1416 				jt = jf = insns[i].k;
1417 			} else {
1418 				jt = insns[i].jt;
1419 				jf = insns[i].jf;
1420 			}
1421 
1422 			if (jt >= insn_count - (i + 1) ||
1423 			    jf >= insn_count - (i + 1)) {
1424 				return false;
1425 			}
1426 
1427 			if (jt > 0 && jf > 0)
1428 				unreachable = true;
1429 
1430 			jt += i + 1;
1431 			jf += i + 1;
1432 
1433 			jtf = insn_dat[i].u.jdata.jtf;
1434 
1435 			jtf[0].jdata = &insn_dat[i].u.jdata;
1436 			SLIST_INSERT_HEAD(&insn_dat[jt].bjumps,
1437 			    &jtf[0], entries);
1438 
1439 			if (jf != jt) {
1440 				jtf[1].jdata = &insn_dat[i].u.jdata;
1441 				SLIST_INSERT_HEAD(&insn_dat[jf].bjumps,
1442 				    &jtf[1], entries);
1443 			}
1444 
1445 			insn_dat[jf].invalid |= invalid;
1446 			insn_dat[jt].invalid |= invalid;
1447 			invalid = 0;
1448 
1449 			continue;
1450 		}
1451 	}
1452 
1453 	return true;
1454 }
1455 
1456 /*
1457  * Array Bounds Check Elimination (ABC) pass.
1458  */
1459 static void
1460 optimize_pass2(const bpf_ctx_t *bc, const struct bpf_insn *insns,
1461     struct bpfjit_insn_data *insn_dat, size_t insn_count)
1462 {
1463 	struct bpfjit_jump *jmp;
1464 	const struct bpf_insn *pc;
1465 	struct bpfjit_insn_data *pd;
1466 	size_t i;
1467 	bpfjit_abc_length_t length, abc_length = 0;
1468 
1469 	const size_t extwords = GET_EXTWORDS(bc);
1470 
1471 	for (i = insn_count; i != 0; i--) {
1472 		pc = &insns[i-1];
1473 		pd = &insn_dat[i-1];
1474 
1475 		if (pd->unreachable)
1476 			continue;
1477 
1478 		switch (BPF_CLASS(pc->code)) {
1479 		case BPF_RET:
1480 			/*
1481 			 * It's quite common for bpf programs to
1482 			 * check packet bytes in increasing order
1483 			 * and return zero if bytes don't match
1484 			 * specified critetion. Such programs disable
1485 			 * ABC optimization completely because for
1486 			 * every jump there is a branch with no read
1487 			 * instruction.
1488 			 * With no side effects, BPF_STMT(BPF_RET+BPF_K, 0)
1489 			 * is indistinguishable from out-of-bound load.
1490 			 * Therefore, abc_length can be set to
1491 			 * MAX_ABC_LENGTH and enable ABC for many
1492 			 * bpf programs.
1493 			 * If this optimization encounters any
1494 			 * instruction with a side effect, it will
1495 			 * reset abc_length.
1496 			 */
1497 			if (BPF_RVAL(pc->code) == BPF_K && pc->k == 0)
1498 				abc_length = MAX_ABC_LENGTH;
1499 			else
1500 				abc_length = 0;
1501 			break;
1502 
1503 		case BPF_MISC:
1504 			if (BPF_MISCOP(pc->code) == BPF_COP ||
1505 			    BPF_MISCOP(pc->code) == BPF_COPX) {
1506 				/* COP instructions can have side effects. */
1507 				abc_length = 0;
1508 			}
1509 			break;
1510 
1511 		case BPF_ST:
1512 		case BPF_STX:
1513 			if (extwords != 0) {
1514 				/* Write to memory is visible after a call. */
1515 				abc_length = 0;
1516 			}
1517 			break;
1518 
1519 		case BPF_JMP:
1520 			abc_length = pd->u.jdata.abc_length;
1521 			break;
1522 
1523 		default:
1524 			if (read_pkt_insn(pc, &length)) {
1525 				if (abc_length < length)
1526 					abc_length = length;
1527 				pd->u.rdata.abc_length = abc_length;
1528 			}
1529 			break;
1530 		}
1531 
1532 		SLIST_FOREACH(jmp, &pd->bjumps, entries) {
1533 			if (jmp->jdata->abc_length > abc_length)
1534 				jmp->jdata->abc_length = abc_length;
1535 		}
1536 	}
1537 }
1538 
1539 static void
1540 optimize_pass3(const struct bpf_insn *insns,
1541     struct bpfjit_insn_data *insn_dat, size_t insn_count)
1542 {
1543 	struct bpfjit_jump *jmp;
1544 	size_t i;
1545 	bpfjit_abc_length_t checked_length = 0;
1546 
1547 	for (i = 0; i < insn_count; i++) {
1548 		if (insn_dat[i].unreachable)
1549 			continue;
1550 
1551 		SLIST_FOREACH(jmp, &insn_dat[i].bjumps, entries) {
1552 			if (jmp->jdata->checked_length < checked_length)
1553 				checked_length = jmp->jdata->checked_length;
1554 		}
1555 
1556 		if (BPF_CLASS(insns[i].code) == BPF_JMP) {
1557 			insn_dat[i].u.jdata.checked_length = checked_length;
1558 		} else if (read_pkt_insn(&insns[i], NULL)) {
1559 			struct bpfjit_read_pkt_data *rdata =
1560 			    &insn_dat[i].u.rdata;
1561 			rdata->check_length = 0;
1562 			if (checked_length < rdata->abc_length) {
1563 				checked_length = rdata->abc_length;
1564 				rdata->check_length = checked_length;
1565 			}
1566 		}
1567 	}
1568 }
1569 
1570 static bool
1571 optimize(const bpf_ctx_t *bc, const struct bpf_insn *insns,
1572     struct bpfjit_insn_data *insn_dat, size_t insn_count,
1573     bpf_memword_init_t *initmask, bpfjit_hint_t *hints)
1574 {
1575 
1576 	optimize_init(insn_dat, insn_count);
1577 
1578 	if (!optimize_pass1(bc, insns, insn_dat, insn_count, initmask, hints))
1579 		return false;
1580 
1581 	optimize_pass2(bc, insns, insn_dat, insn_count);
1582 	optimize_pass3(insns, insn_dat, insn_count);
1583 
1584 	return true;
1585 }
1586 
1587 /*
1588  * Convert BPF_ALU operations except BPF_NEG and BPF_DIV to sljit operation.
1589  */
1590 static int
1591 bpf_alu_to_sljit_op(const struct bpf_insn *pc)
1592 {
1593 
1594 	/*
1595 	 * Note: all supported 64bit arches have 32bit multiply
1596 	 * instruction so SLJIT_INT_OP doesn't have any overhead.
1597 	 */
1598 	switch (BPF_OP(pc->code)) {
1599 	case BPF_ADD: return SLJIT_ADD;
1600 	case BPF_SUB: return SLJIT_SUB;
1601 	case BPF_MUL: return SLJIT_MUL|SLJIT_INT_OP;
1602 	case BPF_OR:  return SLJIT_OR;
1603 	case BPF_XOR: return SLJIT_XOR;
1604 	case BPF_AND: return SLJIT_AND;
1605 	case BPF_LSH: return SLJIT_SHL;
1606 	case BPF_RSH: return SLJIT_LSHR|SLJIT_INT_OP;
1607 	default:
1608 		BJ_ASSERT(false);
1609 		return 0;
1610 	}
1611 }
1612 
1613 /*
1614  * Convert BPF_JMP operations except BPF_JA to sljit condition.
1615  */
1616 static int
1617 bpf_jmp_to_sljit_cond(const struct bpf_insn *pc, bool negate)
1618 {
1619 	/*
1620 	 * Note: all supported 64bit arches have 32bit comparison
1621 	 * instructions so SLJIT_INT_OP doesn't have any overhead.
1622 	 */
1623 	int rv = SLJIT_INT_OP;
1624 
1625 	switch (BPF_OP(pc->code)) {
1626 	case BPF_JGT:
1627 		rv |= negate ? SLJIT_C_LESS_EQUAL : SLJIT_C_GREATER;
1628 		break;
1629 	case BPF_JGE:
1630 		rv |= negate ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL;
1631 		break;
1632 	case BPF_JEQ:
1633 		rv |= negate ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL;
1634 		break;
1635 	case BPF_JSET:
1636 		rv |= negate ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL;
1637 		break;
1638 	default:
1639 		BJ_ASSERT(false);
1640 	}
1641 
1642 	return rv;
1643 }
1644 
1645 /*
1646  * Convert BPF_K and BPF_X to sljit register.
1647  */
1648 static int
1649 kx_to_reg(const struct bpf_insn *pc)
1650 {
1651 
1652 	switch (BPF_SRC(pc->code)) {
1653 	case BPF_K: return SLJIT_IMM;
1654 	case BPF_X: return BJ_XREG;
1655 	default:
1656 		BJ_ASSERT(false);
1657 		return 0;
1658 	}
1659 }
1660 
1661 static sljit_sw
1662 kx_to_reg_arg(const struct bpf_insn *pc)
1663 {
1664 
1665 	switch (BPF_SRC(pc->code)) {
1666 	case BPF_K: return (uint32_t)pc->k; /* SLJIT_IMM, pc->k, */
1667 	case BPF_X: return 0;               /* BJ_XREG, 0,      */
1668 	default:
1669 		BJ_ASSERT(false);
1670 		return 0;
1671 	}
1672 }
1673 
1674 static bool
1675 generate_insn_code(struct sljit_compiler *compiler, bpfjit_hint_t hints,
1676     const bpf_ctx_t *bc, const struct bpf_insn *insns,
1677     struct bpfjit_insn_data *insn_dat, size_t insn_count)
1678 {
1679 	/* a list of jumps to out-of-bound return from a generated function */
1680 	struct sljit_jump **ret0;
1681 	size_t ret0_size, ret0_maxsize;
1682 
1683 	struct sljit_jump *jump;
1684 	struct sljit_label *label;
1685 	const struct bpf_insn *pc;
1686 	struct bpfjit_jump *bjump, *jtf;
1687 	struct sljit_jump *to_mchain_jump;
1688 
1689 	size_t i;
1690 	int status;
1691 	int branching, negate;
1692 	unsigned int rval, mode, src, op;
1693 	uint32_t jt, jf;
1694 
1695 	bool unconditional_ret;
1696 	bool rv;
1697 
1698 	const size_t extwords = GET_EXTWORDS(bc);
1699 	const size_t memwords = GET_MEMWORDS(bc);
1700 
1701 	ret0 = NULL;
1702 	rv = false;
1703 
1704 	ret0_size = 0;
1705 	ret0_maxsize = 64;
1706 	ret0 = BJ_ALLOC(ret0_maxsize * sizeof(ret0[0]));
1707 	if (ret0 == NULL)
1708 		goto fail;
1709 
1710 	/* reset sjump members of jdata */
1711 	for (i = 0; i < insn_count; i++) {
1712 		if (insn_dat[i].unreachable ||
1713 		    BPF_CLASS(insns[i].code) != BPF_JMP) {
1714 			continue;
1715 		}
1716 
1717 		jtf = insn_dat[i].u.jdata.jtf;
1718 		jtf[0].sjump = jtf[1].sjump = NULL;
1719 	}
1720 
1721 	/* main loop */
1722 	for (i = 0; i < insn_count; i++) {
1723 		if (insn_dat[i].unreachable)
1724 			continue;
1725 
1726 		/*
1727 		 * Resolve jumps to the current insn.
1728 		 */
1729 		label = NULL;
1730 		SLIST_FOREACH(bjump, &insn_dat[i].bjumps, entries) {
1731 			if (bjump->sjump != NULL) {
1732 				if (label == NULL)
1733 					label = sljit_emit_label(compiler);
1734 				if (label == NULL)
1735 					goto fail;
1736 				sljit_set_label(bjump->sjump, label);
1737 			}
1738 		}
1739 
1740 		to_mchain_jump = NULL;
1741 		unconditional_ret = false;
1742 
1743 		if (read_pkt_insn(&insns[i], NULL)) {
1744 			if (insn_dat[i].u.rdata.check_length > UINT32_MAX) {
1745 				/* Jump to "return 0" unconditionally. */
1746 				unconditional_ret = true;
1747 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1748 				if (jump == NULL)
1749 					goto fail;
1750 				if (!append_jump(jump, &ret0,
1751 				    &ret0_size, &ret0_maxsize))
1752 					goto fail;
1753 			} else if (insn_dat[i].u.rdata.check_length > 0) {
1754 				/* if (buflen < check_length) return 0; */
1755 				jump = sljit_emit_cmp(compiler,
1756 				    SLJIT_C_LESS,
1757 				    BJ_BUFLEN, 0,
1758 				    SLJIT_IMM,
1759 				    insn_dat[i].u.rdata.check_length);
1760 				if (jump == NULL)
1761 					goto fail;
1762 #ifdef _KERNEL
1763 				to_mchain_jump = jump;
1764 #else
1765 				if (!append_jump(jump, &ret0,
1766 				    &ret0_size, &ret0_maxsize))
1767 					goto fail;
1768 #endif
1769 			}
1770 		}
1771 
1772 		pc = &insns[i];
1773 		switch (BPF_CLASS(pc->code)) {
1774 
1775 		default:
1776 			goto fail;
1777 
1778 		case BPF_LD:
1779 			/* BPF_LD+BPF_IMM          A <- k */
1780 			if (pc->code == (BPF_LD|BPF_IMM)) {
1781 				status = sljit_emit_op1(compiler,
1782 				    SLJIT_MOV,
1783 				    BJ_AREG, 0,
1784 				    SLJIT_IMM, (uint32_t)pc->k);
1785 				if (status != SLJIT_SUCCESS)
1786 					goto fail;
1787 
1788 				continue;
1789 			}
1790 
1791 			/* BPF_LD+BPF_MEM          A <- M[k] */
1792 			if (pc->code == (BPF_LD|BPF_MEM)) {
1793 				if ((uint32_t)pc->k >= memwords)
1794 					goto fail;
1795 				status = emit_memload(compiler,
1796 				    BJ_AREG, pc->k, extwords);
1797 				if (status != SLJIT_SUCCESS)
1798 					goto fail;
1799 
1800 				continue;
1801 			}
1802 
1803 			/* BPF_LD+BPF_W+BPF_LEN    A <- len */
1804 			if (pc->code == (BPF_LD|BPF_W|BPF_LEN)) {
1805 				status = sljit_emit_op1(compiler,
1806 				    SLJIT_MOV, /* size_t source */
1807 				    BJ_AREG, 0,
1808 				    SLJIT_MEM1(BJ_ARGS),
1809 				    offsetof(struct bpf_args, wirelen));
1810 				if (status != SLJIT_SUCCESS)
1811 					goto fail;
1812 
1813 				continue;
1814 			}
1815 
1816 			mode = BPF_MODE(pc->code);
1817 			if (mode != BPF_ABS && mode != BPF_IND)
1818 				goto fail;
1819 
1820 			if (unconditional_ret)
1821 				continue;
1822 
1823 			status = emit_pkt_read(compiler, hints, pc,
1824 			    to_mchain_jump, &ret0, &ret0_size, &ret0_maxsize);
1825 			if (status != SLJIT_SUCCESS)
1826 				goto fail;
1827 
1828 			continue;
1829 
1830 		case BPF_LDX:
1831 			mode = BPF_MODE(pc->code);
1832 
1833 			/* BPF_LDX+BPF_W+BPF_IMM    X <- k */
1834 			if (mode == BPF_IMM) {
1835 				if (BPF_SIZE(pc->code) != BPF_W)
1836 					goto fail;
1837 				status = sljit_emit_op1(compiler,
1838 				    SLJIT_MOV,
1839 				    BJ_XREG, 0,
1840 				    SLJIT_IMM, (uint32_t)pc->k);
1841 				if (status != SLJIT_SUCCESS)
1842 					goto fail;
1843 
1844 				continue;
1845 			}
1846 
1847 			/* BPF_LDX+BPF_W+BPF_LEN    X <- len */
1848 			if (mode == BPF_LEN) {
1849 				if (BPF_SIZE(pc->code) != BPF_W)
1850 					goto fail;
1851 				status = sljit_emit_op1(compiler,
1852 				    SLJIT_MOV, /* size_t source */
1853 				    BJ_XREG, 0,
1854 				    SLJIT_MEM1(BJ_ARGS),
1855 				    offsetof(struct bpf_args, wirelen));
1856 				if (status != SLJIT_SUCCESS)
1857 					goto fail;
1858 
1859 				continue;
1860 			}
1861 
1862 			/* BPF_LDX+BPF_W+BPF_MEM    X <- M[k] */
1863 			if (mode == BPF_MEM) {
1864 				if (BPF_SIZE(pc->code) != BPF_W)
1865 					goto fail;
1866 				if ((uint32_t)pc->k >= memwords)
1867 					goto fail;
1868 				status = emit_memload(compiler,
1869 				    BJ_XREG, pc->k, extwords);
1870 				if (status != SLJIT_SUCCESS)
1871 					goto fail;
1872 
1873 				continue;
1874 			}
1875 
1876 			/* BPF_LDX+BPF_B+BPF_MSH    X <- 4*(P[k:1]&0xf) */
1877 			if (mode != BPF_MSH || BPF_SIZE(pc->code) != BPF_B)
1878 				goto fail;
1879 
1880 			if (unconditional_ret)
1881 				continue;
1882 
1883 			status = emit_msh(compiler, hints, pc,
1884 			    to_mchain_jump, &ret0, &ret0_size, &ret0_maxsize);
1885 			if (status != SLJIT_SUCCESS)
1886 				goto fail;
1887 
1888 			continue;
1889 
1890 		case BPF_ST:
1891 			if (pc->code != BPF_ST ||
1892 			    (uint32_t)pc->k >= memwords) {
1893 				goto fail;
1894 			}
1895 
1896 			status = emit_memstore(compiler,
1897 			    BJ_AREG, pc->k, extwords);
1898 			if (status != SLJIT_SUCCESS)
1899 				goto fail;
1900 
1901 			continue;
1902 
1903 		case BPF_STX:
1904 			if (pc->code != BPF_STX ||
1905 			    (uint32_t)pc->k >= memwords) {
1906 				goto fail;
1907 			}
1908 
1909 			status = emit_memstore(compiler,
1910 			    BJ_XREG, pc->k, extwords);
1911 			if (status != SLJIT_SUCCESS)
1912 				goto fail;
1913 
1914 			continue;
1915 
1916 		case BPF_ALU:
1917 			if (pc->code == (BPF_ALU|BPF_NEG)) {
1918 				status = sljit_emit_op1(compiler,
1919 				    SLJIT_NEG,
1920 				    BJ_AREG, 0,
1921 				    BJ_AREG, 0);
1922 				if (status != SLJIT_SUCCESS)
1923 					goto fail;
1924 
1925 				continue;
1926 			}
1927 
1928 			op = BPF_OP(pc->code);
1929 			if (op != BPF_DIV && op != BPF_MOD) {
1930 				status = sljit_emit_op2(compiler,
1931 				    bpf_alu_to_sljit_op(pc),
1932 				    BJ_AREG, 0,
1933 				    BJ_AREG, 0,
1934 				    kx_to_reg(pc), kx_to_reg_arg(pc));
1935 				if (status != SLJIT_SUCCESS)
1936 					goto fail;
1937 
1938 				continue;
1939 			}
1940 
1941 			/* BPF_DIV/BPF_MOD */
1942 
1943 			src = BPF_SRC(pc->code);
1944 			if (src != BPF_X && src != BPF_K)
1945 				goto fail;
1946 
1947 			/* division by zero? */
1948 			if (src == BPF_X) {
1949 				jump = sljit_emit_cmp(compiler,
1950 				    SLJIT_C_EQUAL|SLJIT_INT_OP,
1951 				    BJ_XREG, 0,
1952 				    SLJIT_IMM, 0);
1953 				if (jump == NULL)
1954 					goto fail;
1955 				if (!append_jump(jump, &ret0,
1956 				    &ret0_size, &ret0_maxsize))
1957 					goto fail;
1958 			} else if (pc->k == 0) {
1959 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1960 				if (jump == NULL)
1961 					goto fail;
1962 				if (!append_jump(jump, &ret0,
1963 				    &ret0_size, &ret0_maxsize))
1964 					goto fail;
1965 			}
1966 
1967 			if (src == BPF_X) {
1968 				status = emit_moddiv(compiler, pc);
1969 				if (status != SLJIT_SUCCESS)
1970 					goto fail;
1971 			} else if (pc->k != 0) {
1972 				if (pc->k & (pc->k - 1)) {
1973 					status = emit_moddiv(compiler, pc);
1974 				} else {
1975 					status = emit_pow2_moddiv(compiler, pc);
1976 				}
1977 				if (status != SLJIT_SUCCESS)
1978 					goto fail;
1979 			}
1980 
1981 			continue;
1982 
1983 		case BPF_JMP:
1984 			op = BPF_OP(pc->code);
1985 			if (op == BPF_JA) {
1986 				jt = jf = pc->k;
1987 			} else {
1988 				jt = pc->jt;
1989 				jf = pc->jf;
1990 			}
1991 
1992 			negate = (jt == 0) ? 1 : 0;
1993 			branching = (jt == jf) ? 0 : 1;
1994 			jtf = insn_dat[i].u.jdata.jtf;
1995 
1996 			if (branching) {
1997 				if (op != BPF_JSET) {
1998 					jump = sljit_emit_cmp(compiler,
1999 					    bpf_jmp_to_sljit_cond(pc, negate),
2000 					    BJ_AREG, 0,
2001 					    kx_to_reg(pc), kx_to_reg_arg(pc));
2002 				} else {
2003 					status = sljit_emit_op2(compiler,
2004 					    SLJIT_AND,
2005 					    BJ_TMP1REG, 0,
2006 					    BJ_AREG, 0,
2007 					    kx_to_reg(pc), kx_to_reg_arg(pc));
2008 					if (status != SLJIT_SUCCESS)
2009 						goto fail;
2010 
2011 					jump = sljit_emit_cmp(compiler,
2012 					    bpf_jmp_to_sljit_cond(pc, negate),
2013 					    BJ_TMP1REG, 0,
2014 					    SLJIT_IMM, 0);
2015 				}
2016 
2017 				if (jump == NULL)
2018 					goto fail;
2019 
2020 				BJ_ASSERT(jtf[negate].sjump == NULL);
2021 				jtf[negate].sjump = jump;
2022 			}
2023 
2024 			if (!branching || (jt != 0 && jf != 0)) {
2025 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
2026 				if (jump == NULL)
2027 					goto fail;
2028 
2029 				BJ_ASSERT(jtf[branching].sjump == NULL);
2030 				jtf[branching].sjump = jump;
2031 			}
2032 
2033 			continue;
2034 
2035 		case BPF_RET:
2036 			rval = BPF_RVAL(pc->code);
2037 			if (rval == BPF_X)
2038 				goto fail;
2039 
2040 			/* BPF_RET+BPF_K    accept k bytes */
2041 			if (rval == BPF_K) {
2042 				status = sljit_emit_return(compiler,
2043 				    SLJIT_MOV_UI,
2044 				    SLJIT_IMM, (uint32_t)pc->k);
2045 				if (status != SLJIT_SUCCESS)
2046 					goto fail;
2047 			}
2048 
2049 			/* BPF_RET+BPF_A    accept A bytes */
2050 			if (rval == BPF_A) {
2051 				status = sljit_emit_return(compiler,
2052 				    SLJIT_MOV_UI,
2053 				    BJ_AREG, 0);
2054 				if (status != SLJIT_SUCCESS)
2055 					goto fail;
2056 			}
2057 
2058 			continue;
2059 
2060 		case BPF_MISC:
2061 			switch (BPF_MISCOP(pc->code)) {
2062 			case BPF_TAX:
2063 				status = sljit_emit_op1(compiler,
2064 				    SLJIT_MOV_UI,
2065 				    BJ_XREG, 0,
2066 				    BJ_AREG, 0);
2067 				if (status != SLJIT_SUCCESS)
2068 					goto fail;
2069 
2070 				continue;
2071 
2072 			case BPF_TXA:
2073 				status = sljit_emit_op1(compiler,
2074 				    SLJIT_MOV,
2075 				    BJ_AREG, 0,
2076 				    BJ_XREG, 0);
2077 				if (status != SLJIT_SUCCESS)
2078 					goto fail;
2079 
2080 				continue;
2081 
2082 			case BPF_COP:
2083 			case BPF_COPX:
2084 				if (bc == NULL || bc->copfuncs == NULL)
2085 					goto fail;
2086 				if (BPF_MISCOP(pc->code) == BPF_COP &&
2087 				    (uint32_t)pc->k >= bc->nfuncs) {
2088 					goto fail;
2089 				}
2090 
2091 				status = emit_cop(compiler, hints, bc, pc,
2092 				    &ret0, &ret0_size, &ret0_maxsize);
2093 				if (status != SLJIT_SUCCESS)
2094 					goto fail;
2095 
2096 				continue;
2097 			}
2098 
2099 			goto fail;
2100 		} /* switch */
2101 	} /* main loop */
2102 
2103 	BJ_ASSERT(ret0_size <= ret0_maxsize);
2104 
2105 	if (ret0_size > 0) {
2106 		label = sljit_emit_label(compiler);
2107 		if (label == NULL)
2108 			goto fail;
2109 		for (i = 0; i < ret0_size; i++)
2110 			sljit_set_label(ret0[i], label);
2111 	}
2112 
2113 	status = sljit_emit_return(compiler,
2114 	    SLJIT_MOV_UI,
2115 	    SLJIT_IMM, 0);
2116 	if (status != SLJIT_SUCCESS)
2117 		goto fail;
2118 
2119 	rv = true;
2120 
2121 fail:
2122 	if (ret0 != NULL)
2123 		BJ_FREE(ret0, ret0_maxsize * sizeof(ret0[0]));
2124 
2125 	return rv;
2126 }
2127 
2128 bpfjit_func_t
2129 bpfjit_generate_code(const bpf_ctx_t *bc,
2130     const struct bpf_insn *insns, size_t insn_count)
2131 {
2132 	void *rv;
2133 	struct sljit_compiler *compiler;
2134 
2135 	size_t i;
2136 	int status;
2137 
2138 	/* optimization related */
2139 	bpf_memword_init_t initmask;
2140 	bpfjit_hint_t hints;
2141 
2142 	/* memory store location for initial zero initialization */
2143 	sljit_si mem_reg;
2144 	sljit_sw mem_off;
2145 
2146 	struct bpfjit_insn_data *insn_dat;
2147 
2148 	const size_t extwords = GET_EXTWORDS(bc);
2149 	const size_t memwords = GET_MEMWORDS(bc);
2150 	const bpf_memword_init_t preinited = extwords ? bc->preinited : 0;
2151 
2152 	rv = NULL;
2153 	compiler = NULL;
2154 	insn_dat = NULL;
2155 
2156 	if (memwords > MAX_MEMWORDS)
2157 		goto fail;
2158 
2159 	if (insn_count == 0 || insn_count > SIZE_MAX / sizeof(insn_dat[0]))
2160 		goto fail;
2161 
2162 	insn_dat = BJ_ALLOC(insn_count * sizeof(insn_dat[0]));
2163 	if (insn_dat == NULL)
2164 		goto fail;
2165 
2166 	if (!optimize(bc, insns, insn_dat, insn_count, &initmask, &hints))
2167 		goto fail;
2168 
2169 	compiler = sljit_create_compiler();
2170 	if (compiler == NULL)
2171 		goto fail;
2172 
2173 #if !defined(_KERNEL) && defined(SLJIT_VERBOSE) && SLJIT_VERBOSE
2174 	sljit_compiler_verbose(compiler, stderr);
2175 #endif
2176 
2177 	status = sljit_emit_enter(compiler,
2178 	    2, nscratches(hints), nsaveds(hints), sizeof(struct bpfjit_stack));
2179 	if (status != SLJIT_SUCCESS)
2180 		goto fail;
2181 
2182 	if (hints & BJ_HINT_COP) {
2183 		/* save ctx argument */
2184 		status = sljit_emit_op1(compiler,
2185 		    SLJIT_MOV_P,
2186 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
2187 		    offsetof(struct bpfjit_stack, ctx),
2188 		    BJ_CTX_ARG, 0);
2189 		if (status != SLJIT_SUCCESS)
2190 			goto fail;
2191 	}
2192 
2193 	if (extwords == 0) {
2194 		mem_reg = SLJIT_MEM1(SLJIT_LOCALS_REG);
2195 		mem_off = offsetof(struct bpfjit_stack, mem);
2196 	} else {
2197 		/* copy "mem" argument from bpf_args to bpfjit_stack */
2198 		status = sljit_emit_op1(compiler,
2199 		    SLJIT_MOV_P,
2200 		    BJ_TMP1REG, 0,
2201 		    SLJIT_MEM1(BJ_ARGS), offsetof(struct bpf_args, mem));
2202 		if (status != SLJIT_SUCCESS)
2203 			goto fail;
2204 
2205 		status = sljit_emit_op1(compiler,
2206 		    SLJIT_MOV_P,
2207 		    SLJIT_MEM1(SLJIT_LOCALS_REG),
2208 		    offsetof(struct bpfjit_stack, extmem),
2209 		    BJ_TMP1REG, 0);
2210 		if (status != SLJIT_SUCCESS)
2211 			goto fail;
2212 
2213 		mem_reg = SLJIT_MEM1(BJ_TMP1REG);
2214 		mem_off = 0;
2215 	}
2216 
2217 	/*
2218 	 * Exclude pre-initialised external memory words but keep
2219 	 * initialization statuses of A and X registers in case
2220 	 * bc->preinited wrongly sets those two bits.
2221 	 */
2222 	initmask &= ~preinited | BJ_INIT_ABIT | BJ_INIT_XBIT;
2223 
2224 #if defined(_KERNEL)
2225 	/* bpf_filter() checks initialization of memwords. */
2226 	BJ_ASSERT((initmask & (BJ_INIT_MBIT(memwords) - 1)) == 0);
2227 #endif
2228 	for (i = 0; i < memwords; i++) {
2229 		if (initmask & BJ_INIT_MBIT(i)) {
2230 			/* M[i] = 0; */
2231 			status = sljit_emit_op1(compiler,
2232 			    SLJIT_MOV_UI,
2233 			    mem_reg, mem_off + i * sizeof(uint32_t),
2234 			    SLJIT_IMM, 0);
2235 			if (status != SLJIT_SUCCESS)
2236 				goto fail;
2237 		}
2238 	}
2239 
2240 	if (initmask & BJ_INIT_ABIT) {
2241 		/* A = 0; */
2242 		status = sljit_emit_op1(compiler,
2243 		    SLJIT_MOV,
2244 		    BJ_AREG, 0,
2245 		    SLJIT_IMM, 0);
2246 		if (status != SLJIT_SUCCESS)
2247 			goto fail;
2248 	}
2249 
2250 	if (initmask & BJ_INIT_XBIT) {
2251 		/* X = 0; */
2252 		status = sljit_emit_op1(compiler,
2253 		    SLJIT_MOV,
2254 		    BJ_XREG, 0,
2255 		    SLJIT_IMM, 0);
2256 		if (status != SLJIT_SUCCESS)
2257 			goto fail;
2258 	}
2259 
2260 	status = load_buf_buflen(compiler);
2261 	if (status != SLJIT_SUCCESS)
2262 		goto fail;
2263 
2264 	if (!generate_insn_code(compiler, hints,
2265 	    bc, insns, insn_dat, insn_count)) {
2266 		goto fail;
2267 	}
2268 
2269 	rv = sljit_generate_code(compiler);
2270 
2271 fail:
2272 	if (compiler != NULL)
2273 		sljit_free_compiler(compiler);
2274 
2275 	if (insn_dat != NULL)
2276 		BJ_FREE(insn_dat, insn_count * sizeof(insn_dat[0]));
2277 
2278 	return (bpfjit_func_t)rv;
2279 }
2280 
2281 void
2282 bpfjit_free_code(bpfjit_func_t code)
2283 {
2284 
2285 	sljit_free_code((void *)code);
2286 }
2287