xref: /netbsd-src/sys/net/bpfjit.c (revision 2c6fc41c810f5088457889d00eba558e8bc74d9e)
1 /*	$NetBSD: bpfjit.c,v 1.11 2014/05/23 22:04:09 alnsn Exp $	*/
2 
3 /*-
4  * Copyright (c) 2011-2014 Alexander Nasonov.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
22  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #ifdef _KERNEL
34 __KERNEL_RCSID(0, "$NetBSD: bpfjit.c,v 1.11 2014/05/23 22:04:09 alnsn Exp $");
35 #else
36 __RCSID("$NetBSD: bpfjit.c,v 1.11 2014/05/23 22:04:09 alnsn Exp $");
37 #endif
38 
39 #include <sys/types.h>
40 #include <sys/queue.h>
41 
42 #ifndef _KERNEL
43 #include <assert.h>
44 #define BJ_ASSERT(c) assert(c)
45 #else
46 #define BJ_ASSERT(c) KASSERT(c)
47 #endif
48 
49 #ifndef _KERNEL
50 #include <stdlib.h>
51 #define BJ_ALLOC(sz) malloc(sz)
52 #define BJ_FREE(p, sz) free(p)
53 #else
54 #include <sys/kmem.h>
55 #define BJ_ALLOC(sz) kmem_alloc(sz, KM_SLEEP)
56 #define BJ_FREE(p, sz) kmem_free(p, sz)
57 #endif
58 
59 #ifndef _KERNEL
60 #include <limits.h>
61 #include <stdbool.h>
62 #include <stddef.h>
63 #include <stdint.h>
64 #else
65 #include <sys/atomic.h>
66 #include <sys/module.h>
67 #endif
68 
69 #define	__BPF_PRIVATE
70 #include <net/bpf.h>
71 #include <net/bpfjit.h>
72 #include <sljitLir.h>
73 
74 #if !defined(_KERNEL) && defined(SLJIT_VERBOSE) && SLJIT_VERBOSE
75 #include <stdio.h> /* for stderr */
76 #endif
77 
78 /*
79  * Permanent register assignments.
80  */
81 #define BJ_BUF		SLJIT_SAVED_REG1
82 #define BJ_WIRELEN	SLJIT_SAVED_REG2
83 #define BJ_BUFLEN	SLJIT_SAVED_REG3
84 #define BJ_AREG		SLJIT_TEMPORARY_REG1
85 #define BJ_TMP1REG	SLJIT_TEMPORARY_REG2
86 #define BJ_TMP2REG	SLJIT_TEMPORARY_REG3
87 #define BJ_XREG		SLJIT_TEMPORARY_EREG1
88 #define BJ_TMP3REG	SLJIT_TEMPORARY_EREG2
89 
90 typedef unsigned int bpfjit_init_mask_t;
91 #define BJ_INIT_NOBITS  0u
92 #define BJ_INIT_MBIT(k) (1u << (k))
93 #define BJ_INIT_MMASK   (BJ_INIT_MBIT(BPF_MEMWORDS) - 1u)
94 #define BJ_INIT_ABIT    BJ_INIT_MBIT(BPF_MEMWORDS)
95 #define BJ_INIT_XBIT    BJ_INIT_MBIT(BPF_MEMWORDS + 1)
96 
97 /*
98  * Datatype for Array Bounds Check Elimination (ABC) pass.
99  */
100 typedef uint64_t bpfjit_abc_length_t;
101 #define MAX_ABC_LENGTH (UINT32_MAX + UINT64_C(4)) /* max. width is 4 */
102 
103 struct bpfjit_stack
104 {
105 	uint32_t mem[BPF_MEMWORDS];
106 #ifdef _KERNEL
107 	void *tmp;
108 #endif
109 };
110 
111 /*
112  * Data for BPF_JMP instruction.
113  * Forward declaration for struct bpfjit_jump.
114  */
115 struct bpfjit_jump_data;
116 
117 /*
118  * Node of bjumps list.
119  */
120 struct bpfjit_jump {
121 	struct sljit_jump *sjump;
122 	SLIST_ENTRY(bpfjit_jump) entries;
123 	struct bpfjit_jump_data *jdata;
124 };
125 
126 /*
127  * Data for BPF_JMP instruction.
128  */
129 struct bpfjit_jump_data {
130 	/*
131 	 * These entries make up bjumps list:
132 	 * jtf[0] - when coming from jt path,
133 	 * jtf[1] - when coming from jf path.
134 	 */
135 	struct bpfjit_jump jtf[2];
136 	/*
137 	 * Length calculated by Array Bounds Check Elimination (ABC) pass.
138 	 */
139 	bpfjit_abc_length_t abc_length;
140 	/*
141 	 * Length checked by the last out-of-bounds check.
142 	 */
143 	bpfjit_abc_length_t checked_length;
144 };
145 
146 /*
147  * Data for "read from packet" instructions.
148  * See also read_pkt_insn() function below.
149  */
150 struct bpfjit_read_pkt_data {
151 	/*
152 	 * Length calculated by Array Bounds Check Elimination (ABC) pass.
153 	 */
154 	bpfjit_abc_length_t abc_length;
155 	/*
156 	 * If positive, emit "if (buflen < check_length) return 0"
157 	 * out-of-bounds check.
158 	 * Values greater than UINT32_MAX generate unconditional "return 0".
159 	 */
160 	bpfjit_abc_length_t check_length;
161 };
162 
163 /*
164  * Additional (optimization-related) data for bpf_insn.
165  */
166 struct bpfjit_insn_data {
167 	/* List of jumps to this insn. */
168 	SLIST_HEAD(, bpfjit_jump) bjumps;
169 
170 	union {
171 		struct bpfjit_jump_data     jdata;
172 		struct bpfjit_read_pkt_data rdata;
173 	} u;
174 
175 	bpfjit_init_mask_t invalid;
176 	bool unreachable;
177 };
178 
179 #ifdef _KERNEL
180 
181 uint32_t m_xword(const struct mbuf *, uint32_t, int *);
182 uint32_t m_xhalf(const struct mbuf *, uint32_t, int *);
183 uint32_t m_xbyte(const struct mbuf *, uint32_t, int *);
184 
185 MODULE(MODULE_CLASS_MISC, bpfjit, "sljit")
186 
187 static int
188 bpfjit_modcmd(modcmd_t cmd, void *arg)
189 {
190 
191 	switch (cmd) {
192 	case MODULE_CMD_INIT:
193 		bpfjit_module_ops.bj_free_code = &bpfjit_free_code;
194 		membar_producer();
195 		bpfjit_module_ops.bj_generate_code = &bpfjit_generate_code;
196 		membar_producer();
197 		return 0;
198 
199 	case MODULE_CMD_FINI:
200 		return EOPNOTSUPP;
201 
202 	default:
203 		return ENOTTY;
204 	}
205 }
206 #endif
207 
208 static uint32_t
209 read_width(const struct bpf_insn *pc)
210 {
211 
212 	switch (BPF_SIZE(pc->code)) {
213 	case BPF_W:
214 		return 4;
215 	case BPF_H:
216 		return 2;
217 	case BPF_B:
218 		return 1;
219 	default:
220 		BJ_ASSERT(false);
221 		return 0;
222 	}
223 }
224 
225 static bool
226 grow_jumps(struct sljit_jump ***jumps, size_t *size)
227 {
228 	struct sljit_jump **newptr;
229 	const size_t elemsz = sizeof(struct sljit_jump *);
230 	size_t old_size = *size;
231 	size_t new_size = 2 * old_size;
232 
233 	if (new_size < old_size || new_size > SIZE_MAX / elemsz)
234 		return false;
235 
236 	newptr = BJ_ALLOC(new_size * elemsz);
237 	if (newptr == NULL)
238 		return false;
239 
240 	memcpy(newptr, *jumps, old_size * elemsz);
241 	BJ_FREE(*jumps, old_size * elemsz);
242 
243 	*jumps = newptr;
244 	*size = new_size;
245 	return true;
246 }
247 
248 static bool
249 append_jump(struct sljit_jump *jump, struct sljit_jump ***jumps,
250     size_t *size, size_t *max_size)
251 {
252 	if (*size == *max_size && !grow_jumps(jumps, max_size))
253 		return false;
254 
255 	(*jumps)[(*size)++] = jump;
256 	return true;
257 }
258 
259 /*
260  * Generate code for BPF_LD+BPF_B+BPF_ABS    A <- P[k:1].
261  */
262 static int
263 emit_read8(struct sljit_compiler* compiler, uint32_t k)
264 {
265 
266 	return sljit_emit_op1(compiler,
267 	    SLJIT_MOV_UB,
268 	    BJ_AREG, 0,
269 	    SLJIT_MEM1(BJ_BUF), k);
270 }
271 
272 /*
273  * Generate code for BPF_LD+BPF_H+BPF_ABS    A <- P[k:2].
274  */
275 static int
276 emit_read16(struct sljit_compiler* compiler, uint32_t k)
277 {
278 	int status;
279 
280 	/* tmp1 = buf[k]; */
281 	status = sljit_emit_op1(compiler,
282 	    SLJIT_MOV_UB,
283 	    BJ_TMP1REG, 0,
284 	    SLJIT_MEM1(BJ_BUF), k);
285 	if (status != SLJIT_SUCCESS)
286 		return status;
287 
288 	/* A = buf[k+1]; */
289 	status = sljit_emit_op1(compiler,
290 	    SLJIT_MOV_UB,
291 	    BJ_AREG, 0,
292 	    SLJIT_MEM1(BJ_BUF), k+1);
293 	if (status != SLJIT_SUCCESS)
294 		return status;
295 
296 	/* tmp1 = tmp1 << 8; */
297 	status = sljit_emit_op2(compiler,
298 	    SLJIT_SHL,
299 	    BJ_TMP1REG, 0,
300 	    BJ_TMP1REG, 0,
301 	    SLJIT_IMM, 8);
302 	if (status != SLJIT_SUCCESS)
303 		return status;
304 
305 	/* A = A + tmp1; */
306 	status = sljit_emit_op2(compiler,
307 	    SLJIT_ADD,
308 	    BJ_AREG, 0,
309 	    BJ_AREG, 0,
310 	    BJ_TMP1REG, 0);
311 	return status;
312 }
313 
314 /*
315  * Generate code for BPF_LD+BPF_W+BPF_ABS    A <- P[k:4].
316  */
317 static int
318 emit_read32(struct sljit_compiler* compiler, uint32_t k)
319 {
320 	int status;
321 
322 	/* tmp1 = buf[k]; */
323 	status = sljit_emit_op1(compiler,
324 	    SLJIT_MOV_UB,
325 	    BJ_TMP1REG, 0,
326 	    SLJIT_MEM1(BJ_BUF), k);
327 	if (status != SLJIT_SUCCESS)
328 		return status;
329 
330 	/* tmp2 = buf[k+1]; */
331 	status = sljit_emit_op1(compiler,
332 	    SLJIT_MOV_UB,
333 	    BJ_TMP2REG, 0,
334 	    SLJIT_MEM1(BJ_BUF), k+1);
335 	if (status != SLJIT_SUCCESS)
336 		return status;
337 
338 	/* A = buf[k+3]; */
339 	status = sljit_emit_op1(compiler,
340 	    SLJIT_MOV_UB,
341 	    BJ_AREG, 0,
342 	    SLJIT_MEM1(BJ_BUF), k+3);
343 	if (status != SLJIT_SUCCESS)
344 		return status;
345 
346 	/* tmp1 = tmp1 << 24; */
347 	status = sljit_emit_op2(compiler,
348 	    SLJIT_SHL,
349 	    BJ_TMP1REG, 0,
350 	    BJ_TMP1REG, 0,
351 	    SLJIT_IMM, 24);
352 	if (status != SLJIT_SUCCESS)
353 		return status;
354 
355 	/* A = A + tmp1; */
356 	status = sljit_emit_op2(compiler,
357 	    SLJIT_ADD,
358 	    BJ_AREG, 0,
359 	    BJ_AREG, 0,
360 	    BJ_TMP1REG, 0);
361 	if (status != SLJIT_SUCCESS)
362 		return status;
363 
364 	/* tmp1 = buf[k+2]; */
365 	status = sljit_emit_op1(compiler,
366 	    SLJIT_MOV_UB,
367 	    BJ_TMP1REG, 0,
368 	    SLJIT_MEM1(BJ_BUF), k+2);
369 	if (status != SLJIT_SUCCESS)
370 		return status;
371 
372 	/* tmp2 = tmp2 << 16; */
373 	status = sljit_emit_op2(compiler,
374 	    SLJIT_SHL,
375 	    BJ_TMP2REG, 0,
376 	    BJ_TMP2REG, 0,
377 	    SLJIT_IMM, 16);
378 	if (status != SLJIT_SUCCESS)
379 		return status;
380 
381 	/* A = A + tmp2; */
382 	status = sljit_emit_op2(compiler,
383 	    SLJIT_ADD,
384 	    BJ_AREG, 0,
385 	    BJ_AREG, 0,
386 	    BJ_TMP2REG, 0);
387 	if (status != SLJIT_SUCCESS)
388 		return status;
389 
390 	/* tmp1 = tmp1 << 8; */
391 	status = sljit_emit_op2(compiler,
392 	    SLJIT_SHL,
393 	    BJ_TMP1REG, 0,
394 	    BJ_TMP1REG, 0,
395 	    SLJIT_IMM, 8);
396 	if (status != SLJIT_SUCCESS)
397 		return status;
398 
399 	/* A = A + tmp1; */
400 	status = sljit_emit_op2(compiler,
401 	    SLJIT_ADD,
402 	    BJ_AREG, 0,
403 	    BJ_AREG, 0,
404 	    BJ_TMP1REG, 0);
405 	return status;
406 }
407 
408 #ifdef _KERNEL
409 /*
410  * Generate m_xword/m_xhalf/m_xbyte call.
411  *
412  * pc is one of:
413  * BPF_LD+BPF_W+BPF_ABS    A <- P[k:4]
414  * BPF_LD+BPF_H+BPF_ABS    A <- P[k:2]
415  * BPF_LD+BPF_B+BPF_ABS    A <- P[k:1]
416  * BPF_LD+BPF_W+BPF_IND    A <- P[X+k:4]
417  * BPF_LD+BPF_H+BPF_IND    A <- P[X+k:2]
418  * BPF_LD+BPF_B+BPF_IND    A <- P[X+k:1]
419  * BPF_LDX+BPF_B+BPF_MSH   X <- 4*(P[k:1]&0xf)
420  *
421  * The dst variable should be
422  *  - BJ_AREG when emitting code for BPF_LD instructions,
423  *  - BJ_XREG or any of BJ_TMP[1-3]REG registers when emitting
424  *    code for BPF_MSH instruction.
425  */
426 static int
427 emit_xcall(struct sljit_compiler* compiler, const struct bpf_insn *pc,
428     int dst, sljit_w dstw, struct sljit_jump **ret0_jump,
429     uint32_t (*fn)(const struct mbuf *, uint32_t, int *))
430 {
431 #if BJ_XREG == SLJIT_RETURN_REG   || \
432     BJ_XREG == SLJIT_TEMPORARY_REG1 || \
433     BJ_XREG == SLJIT_TEMPORARY_REG2 || \
434     BJ_XREG == SLJIT_TEMPORARY_REG3
435 #error "Not supported assignment of registers."
436 #endif
437 	int status;
438 
439 	/*
440 	 * The third argument of fn is an address on stack.
441 	 */
442 	const int arg3_offset = offsetof(struct bpfjit_stack, tmp);
443 
444 	if (BPF_CLASS(pc->code) == BPF_LDX) {
445 		/* save A */
446 		status = sljit_emit_op1(compiler,
447 		    SLJIT_MOV,
448 		    BJ_TMP3REG, 0,
449 		    BJ_AREG, 0);
450 		if (status != SLJIT_SUCCESS)
451 			return status;
452 	}
453 
454 	/*
455 	 * Prepare registers for fn(buf, k, &err) call.
456 	 */
457 	status = sljit_emit_op1(compiler,
458 	    SLJIT_MOV,
459 	    SLJIT_TEMPORARY_REG1, 0,
460 	    BJ_BUF, 0);
461 	if (status != SLJIT_SUCCESS)
462 		return status;
463 
464 	if (BPF_CLASS(pc->code) == BPF_LD && BPF_MODE(pc->code) == BPF_IND) {
465 		status = sljit_emit_op2(compiler,
466 		    SLJIT_ADD,
467 		    SLJIT_TEMPORARY_REG2, 0,
468 		    BJ_XREG, 0,
469 		    SLJIT_IMM, (uint32_t)pc->k);
470 	} else {
471 		status = sljit_emit_op1(compiler,
472 		    SLJIT_MOV,
473 		    SLJIT_TEMPORARY_REG2, 0,
474 		    SLJIT_IMM, (uint32_t)pc->k);
475 	}
476 
477 	if (status != SLJIT_SUCCESS)
478 		return status;
479 
480 	status = sljit_get_local_base(compiler,
481 	    SLJIT_TEMPORARY_REG3, 0, arg3_offset);
482 	if (status != SLJIT_SUCCESS)
483 		return status;
484 
485 	/* fn(buf, k, &err); */
486 	status = sljit_emit_ijump(compiler,
487 	    SLJIT_CALL3,
488 	    SLJIT_IMM, SLJIT_FUNC_OFFSET(fn));
489 
490 	if (dst != SLJIT_RETURN_REG) {
491 		/* move return value to dst */
492 		status = sljit_emit_op1(compiler,
493 		    SLJIT_MOV,
494 		    dst, dstw,
495 		    SLJIT_RETURN_REG, 0);
496 		if (status != SLJIT_SUCCESS)
497 			return status;
498 	}
499 
500 	if (BPF_CLASS(pc->code) == BPF_LDX) {
501 		/* restore A */
502 		status = sljit_emit_op1(compiler,
503 		    SLJIT_MOV,
504 		    BJ_AREG, 0,
505 		    BJ_TMP3REG, 0);
506 		if (status != SLJIT_SUCCESS)
507 			return status;
508 	}
509 
510 	/* tmp3 = *err; */
511 	status = sljit_emit_op1(compiler,
512 	    SLJIT_MOV_UI,
513 	    SLJIT_TEMPORARY_REG3, 0,
514 	    SLJIT_MEM1(SLJIT_LOCALS_REG), arg3_offset);
515 	if (status != SLJIT_SUCCESS)
516 		return status;
517 
518 	/* if (tmp3 != 0) return 0; */
519 	*ret0_jump = sljit_emit_cmp(compiler,
520 	    SLJIT_C_NOT_EQUAL,
521 	    SLJIT_TEMPORARY_REG3, 0,
522 	    SLJIT_IMM, 0);
523 	if (*ret0_jump == NULL)
524 		return SLJIT_ERR_ALLOC_FAILED;
525 
526 	return status;
527 }
528 #endif
529 
530 /*
531  * Generate code for
532  * BPF_LD+BPF_W+BPF_ABS    A <- P[k:4]
533  * BPF_LD+BPF_H+BPF_ABS    A <- P[k:2]
534  * BPF_LD+BPF_B+BPF_ABS    A <- P[k:1]
535  * BPF_LD+BPF_W+BPF_IND    A <- P[X+k:4]
536  * BPF_LD+BPF_H+BPF_IND    A <- P[X+k:2]
537  * BPF_LD+BPF_B+BPF_IND    A <- P[X+k:1]
538  */
539 static int
540 emit_pkt_read(struct sljit_compiler* compiler,
541     const struct bpf_insn *pc, struct sljit_jump *to_mchain_jump,
542     struct sljit_jump ***ret0, size_t *ret0_size, size_t *ret0_maxsize)
543 {
544 	int status = 0; /* XXX gcc 4.1 */
545 	uint32_t width;
546 	struct sljit_jump *jump;
547 #ifdef _KERNEL
548 	struct sljit_label *label;
549 	struct sljit_jump *over_mchain_jump;
550 	const bool check_zero_buflen = (to_mchain_jump != NULL);
551 #endif
552 	const uint32_t k = pc->k;
553 
554 #ifdef _KERNEL
555 	if (to_mchain_jump == NULL) {
556 		to_mchain_jump = sljit_emit_cmp(compiler,
557 		    SLJIT_C_EQUAL,
558 		    BJ_BUFLEN, 0,
559 		    SLJIT_IMM, 0);
560 		if (to_mchain_jump == NULL)
561 			return SLJIT_ERR_ALLOC_FAILED;
562 	}
563 #endif
564 
565 	width = read_width(pc);
566 
567 	if (BPF_MODE(pc->code) == BPF_IND) {
568 		/* tmp1 = buflen - (pc->k + width); */
569 		status = sljit_emit_op2(compiler,
570 		    SLJIT_SUB,
571 		    BJ_TMP1REG, 0,
572 		    BJ_BUFLEN, 0,
573 		    SLJIT_IMM, k + width);
574 		if (status != SLJIT_SUCCESS)
575 			return status;
576 
577 		/* buf += X; */
578 		status = sljit_emit_op2(compiler,
579 		    SLJIT_ADD,
580 		    BJ_BUF, 0,
581 		    BJ_BUF, 0,
582 		    BJ_XREG, 0);
583 		if (status != SLJIT_SUCCESS)
584 			return status;
585 
586 		/* if (tmp1 < X) return 0; */
587 		jump = sljit_emit_cmp(compiler,
588 		    SLJIT_C_LESS,
589 		    BJ_TMP1REG, 0,
590 		    BJ_XREG, 0);
591 		if (jump == NULL)
592 			return SLJIT_ERR_ALLOC_FAILED;
593 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
594 			return SLJIT_ERR_ALLOC_FAILED;
595 	}
596 
597 	switch (width) {
598 	case 4:
599 		status = emit_read32(compiler, k);
600 		break;
601 	case 2:
602 		status = emit_read16(compiler, k);
603 		break;
604 	case 1:
605 		status = emit_read8(compiler, k);
606 		break;
607 	}
608 
609 	if (status != SLJIT_SUCCESS)
610 		return status;
611 
612 	if (BPF_MODE(pc->code) == BPF_IND) {
613 		/* buf -= X; */
614 		status = sljit_emit_op2(compiler,
615 		    SLJIT_SUB,
616 		    BJ_BUF, 0,
617 		    BJ_BUF, 0,
618 		    BJ_XREG, 0);
619 		if (status != SLJIT_SUCCESS)
620 			return status;
621 	}
622 
623 #ifdef _KERNEL
624 	over_mchain_jump = sljit_emit_jump(compiler, SLJIT_JUMP);
625 	if (over_mchain_jump == NULL)
626 		return SLJIT_ERR_ALLOC_FAILED;
627 
628 	/* entry point to mchain handler */
629 	label = sljit_emit_label(compiler);
630 	if (label == NULL)
631 		return SLJIT_ERR_ALLOC_FAILED;
632 	sljit_set_label(to_mchain_jump, label);
633 
634 	if (check_zero_buflen) {
635 		/* if (buflen != 0) return 0; */
636 		jump = sljit_emit_cmp(compiler,
637 		    SLJIT_C_NOT_EQUAL,
638 		    BJ_BUFLEN, 0,
639 		    SLJIT_IMM, 0);
640 		if (jump == NULL)
641 			return SLJIT_ERR_ALLOC_FAILED;
642 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
643 			return SLJIT_ERR_ALLOC_FAILED;
644 	}
645 
646 	switch (width) {
647 	case 4:
648 		status = emit_xcall(compiler, pc, BJ_AREG, 0, &jump, &m_xword);
649 		break;
650 	case 2:
651 		status = emit_xcall(compiler, pc, BJ_AREG, 0, &jump, &m_xhalf);
652 		break;
653 	case 1:
654 		status = emit_xcall(compiler, pc, BJ_AREG, 0, &jump, &m_xbyte);
655 		break;
656 	}
657 
658 	if (status != SLJIT_SUCCESS)
659 		return status;
660 
661 	if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
662 		return SLJIT_ERR_ALLOC_FAILED;
663 
664 	label = sljit_emit_label(compiler);
665 	if (label == NULL)
666 		return SLJIT_ERR_ALLOC_FAILED;
667 	sljit_set_label(over_mchain_jump, label);
668 #endif
669 
670 	return status;
671 }
672 
673 /*
674  * Generate code for BPF_LDX+BPF_B+BPF_MSH    X <- 4*(P[k:1]&0xf).
675  */
676 static int
677 emit_msh(struct sljit_compiler* compiler,
678     const struct bpf_insn *pc, struct sljit_jump *to_mchain_jump,
679     struct sljit_jump ***ret0, size_t *ret0_size, size_t *ret0_maxsize)
680 {
681 	int status;
682 #ifdef _KERNEL
683 	struct sljit_label *label;
684 	struct sljit_jump *jump, *over_mchain_jump;
685 	const bool check_zero_buflen = (to_mchain_jump != NULL);
686 #endif
687 	const uint32_t k = pc->k;
688 
689 #ifdef _KERNEL
690 	if (to_mchain_jump == NULL) {
691 		to_mchain_jump = sljit_emit_cmp(compiler,
692 		    SLJIT_C_EQUAL,
693 		    BJ_BUFLEN, 0,
694 		    SLJIT_IMM, 0);
695 		if (to_mchain_jump == NULL)
696 			return SLJIT_ERR_ALLOC_FAILED;
697 	}
698 #endif
699 
700 	/* tmp1 = buf[k] */
701 	status = sljit_emit_op1(compiler,
702 	    SLJIT_MOV_UB,
703 	    BJ_TMP1REG, 0,
704 	    SLJIT_MEM1(BJ_BUF), k);
705 	if (status != SLJIT_SUCCESS)
706 		return status;
707 
708 	/* tmp1 &= 0xf */
709 	status = sljit_emit_op2(compiler,
710 	    SLJIT_AND,
711 	    BJ_TMP1REG, 0,
712 	    BJ_TMP1REG, 0,
713 	    SLJIT_IMM, 0xf);
714 	if (status != SLJIT_SUCCESS)
715 		return status;
716 
717 	/* tmp1 = tmp1 << 2 */
718 	status = sljit_emit_op2(compiler,
719 	    SLJIT_SHL,
720 	    BJ_XREG, 0,
721 	    BJ_TMP1REG, 0,
722 	    SLJIT_IMM, 2);
723 	if (status != SLJIT_SUCCESS)
724 		return status;
725 
726 #ifdef _KERNEL
727 	over_mchain_jump = sljit_emit_jump(compiler, SLJIT_JUMP);
728 	if (over_mchain_jump == NULL)
729 		return SLJIT_ERR_ALLOC_FAILED;
730 
731 	/* entry point to mchain handler */
732 	label = sljit_emit_label(compiler);
733 	if (label == NULL)
734 		return SLJIT_ERR_ALLOC_FAILED;
735 	sljit_set_label(to_mchain_jump, label);
736 
737 	if (check_zero_buflen) {
738 		/* if (buflen != 0) return 0; */
739 		jump = sljit_emit_cmp(compiler,
740 		    SLJIT_C_NOT_EQUAL,
741 		    BJ_BUFLEN, 0,
742 		    SLJIT_IMM, 0);
743 		if (jump == NULL)
744 			return SLJIT_ERR_ALLOC_FAILED;
745 		if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
746 			return SLJIT_ERR_ALLOC_FAILED;
747 	}
748 
749 	status = emit_xcall(compiler, pc, BJ_TMP1REG, 0, &jump, &m_xbyte);
750 	if (status != SLJIT_SUCCESS)
751 		return status;
752 
753 	if (!append_jump(jump, ret0, ret0_size, ret0_maxsize))
754 		return SLJIT_ERR_ALLOC_FAILED;
755 
756 	/* tmp1 &= 0xf */
757 	status = sljit_emit_op2(compiler,
758 	    SLJIT_AND,
759 	    BJ_TMP1REG, 0,
760 	    BJ_TMP1REG, 0,
761 	    SLJIT_IMM, 0xf);
762 	if (status != SLJIT_SUCCESS)
763 		return status;
764 
765 	/* tmp1 = tmp1 << 2 */
766 	status = sljit_emit_op2(compiler,
767 	    SLJIT_SHL,
768 	    BJ_XREG, 0,
769 	    BJ_TMP1REG, 0,
770 	    SLJIT_IMM, 2);
771 	if (status != SLJIT_SUCCESS)
772 		return status;
773 
774 
775 	label = sljit_emit_label(compiler);
776 	if (label == NULL)
777 		return SLJIT_ERR_ALLOC_FAILED;
778 	sljit_set_label(over_mchain_jump, label);
779 #endif
780 
781 	return status;
782 }
783 
784 static int
785 emit_pow2_division(struct sljit_compiler* compiler, uint32_t k)
786 {
787 	int shift = 0;
788 	int status = SLJIT_SUCCESS;
789 
790 	while (k > 1) {
791 		k >>= 1;
792 		shift++;
793 	}
794 
795 	BJ_ASSERT(k == 1 && shift < 32);
796 
797 	if (shift != 0) {
798 		status = sljit_emit_op2(compiler,
799 		    SLJIT_LSHR|SLJIT_INT_OP,
800 		    BJ_AREG, 0,
801 		    BJ_AREG, 0,
802 		    SLJIT_IMM, shift);
803 	}
804 
805 	return status;
806 }
807 
808 #if !defined(BPFJIT_USE_UDIV)
809 static sljit_uw
810 divide(sljit_uw x, sljit_uw y)
811 {
812 
813 	return (uint32_t)x / (uint32_t)y;
814 }
815 #endif
816 
817 /*
818  * Generate A = A / div.
819  * divt,divw are either SLJIT_IMM,pc->k or BJ_XREG,0.
820  */
821 static int
822 emit_division(struct sljit_compiler* compiler, int divt, sljit_w divw)
823 {
824 	int status;
825 
826 #if BJ_XREG == SLJIT_RETURN_REG   || \
827     BJ_XREG == SLJIT_TEMPORARY_REG1 || \
828     BJ_XREG == SLJIT_TEMPORARY_REG2 || \
829     BJ_AREG == SLJIT_TEMPORARY_REG2
830 #error "Not supported assignment of registers."
831 #endif
832 
833 #if BJ_AREG != SLJIT_TEMPORARY_REG1
834 	status = sljit_emit_op1(compiler,
835 	    SLJIT_MOV,
836 	    SLJIT_TEMPORARY_REG1, 0,
837 	    BJ_AREG, 0);
838 	if (status != SLJIT_SUCCESS)
839 		return status;
840 #endif
841 
842 	status = sljit_emit_op1(compiler,
843 	    SLJIT_MOV,
844 	    SLJIT_TEMPORARY_REG2, 0,
845 	    divt, divw);
846 	if (status != SLJIT_SUCCESS)
847 		return status;
848 
849 #if defined(BPFJIT_USE_UDIV)
850 	status = sljit_emit_op0(compiler, SLJIT_UDIV|SLJIT_INT_OP);
851 
852 #if BJ_AREG != SLJIT_TEMPORARY_REG1
853 	status = sljit_emit_op1(compiler,
854 	    SLJIT_MOV,
855 	    BJ_AREG, 0,
856 	    SLJIT_TEMPORARY_REG1, 0);
857 	if (status != SLJIT_SUCCESS)
858 		return status;
859 #endif
860 #else
861 	status = sljit_emit_ijump(compiler,
862 	    SLJIT_CALL2,
863 	    SLJIT_IMM, SLJIT_FUNC_OFFSET(divide));
864 
865 #if BJ_AREG != SLJIT_RETURN_REG
866 	status = sljit_emit_op1(compiler,
867 	    SLJIT_MOV,
868 	    BJ_AREG, 0,
869 	    SLJIT_RETURN_REG, 0);
870 	if (status != SLJIT_SUCCESS)
871 		return status;
872 #endif
873 #endif
874 
875 	return status;
876 }
877 
878 /*
879  * Return true if pc is a "read from packet" instruction.
880  * If length is not NULL and return value is true, *length will
881  * be set to a safe length required to read a packet.
882  */
883 static bool
884 read_pkt_insn(const struct bpf_insn *pc, bpfjit_abc_length_t *length)
885 {
886 	bool rv;
887 	bpfjit_abc_length_t width;
888 
889 	switch (BPF_CLASS(pc->code)) {
890 	default:
891 		rv = false;
892 		break;
893 
894 	case BPF_LD:
895 		rv = BPF_MODE(pc->code) == BPF_ABS ||
896 		     BPF_MODE(pc->code) == BPF_IND;
897 		if (rv)
898 			width = read_width(pc);
899 		break;
900 
901 	case BPF_LDX:
902 		rv = pc->code == (BPF_LDX|BPF_B|BPF_MSH);
903 		width = 1;
904 		break;
905 	}
906 
907 	if (rv && length != NULL) {
908 		/*
909 		 * Values greater than UINT32_MAX will generate
910 		 * unconditional "return 0".
911 		 */
912 		*length = (uint32_t)pc->k + width;
913 	}
914 
915 	return rv;
916 }
917 
918 static void
919 optimize_init(struct bpfjit_insn_data *insn_dat, size_t insn_count)
920 {
921 	size_t i;
922 
923 	for (i = 0; i < insn_count; i++) {
924 		SLIST_INIT(&insn_dat[i].bjumps);
925 		insn_dat[i].invalid = BJ_INIT_NOBITS;
926 	}
927 }
928 
929 /*
930  * The function divides instructions into blocks. Destination of a jump
931  * instruction starts a new block. BPF_RET and BPF_JMP instructions
932  * terminate a block. Blocks are linear, that is, there are no jumps out
933  * from the middle of a block and there are no jumps in to the middle of
934  * a block.
935  *
936  * The function also sets bits in *initmask for memwords that
937  * need to be initialized to zero. Note that this set should be empty
938  * for any valid kernel filter program.
939  */
940 static bool
941 optimize_pass1(const struct bpf_insn *insns,
942     struct bpfjit_insn_data *insn_dat, size_t insn_count,
943     bpfjit_init_mask_t *initmask, int *nscratches)
944 {
945 	struct bpfjit_jump *jtf;
946 	size_t i;
947 	uint32_t jt, jf;
948 	bpfjit_abc_length_t length;
949 	bpfjit_init_mask_t invalid; /* borrowed from bpf_filter() */
950 	bool unreachable;
951 
952 	*nscratches = 2;
953 	*initmask = BJ_INIT_NOBITS;
954 
955 	unreachable = false;
956 	invalid = ~BJ_INIT_NOBITS;
957 
958 	for (i = 0; i < insn_count; i++) {
959 		if (!SLIST_EMPTY(&insn_dat[i].bjumps))
960 			unreachable = false;
961 		insn_dat[i].unreachable = unreachable;
962 
963 		if (unreachable)
964 			continue;
965 
966 		invalid |= insn_dat[i].invalid;
967 
968 		if (read_pkt_insn(&insns[i], &length) && length > UINT32_MAX)
969 			unreachable = true;
970 
971 		switch (BPF_CLASS(insns[i].code)) {
972 		case BPF_RET:
973 			if (BPF_RVAL(insns[i].code) == BPF_A)
974 				*initmask |= invalid & BJ_INIT_ABIT;
975 
976 			unreachable = true;
977 			continue;
978 
979 		case BPF_LD:
980 			if (BPF_MODE(insns[i].code) == BPF_IND ||
981 			    BPF_MODE(insns[i].code) == BPF_ABS) {
982 				if (BPF_MODE(insns[i].code) == BPF_IND &&
983 				    *nscratches < 4) {
984 					/* uses BJ_XREG */
985 					*nscratches = 4;
986 				}
987 				if (*nscratches < 3 &&
988 				    read_width(&insns[i]) == 4) {
989 					/* uses BJ_TMP2REG */
990 					*nscratches = 3;
991 				}
992 			}
993 
994 			if (BPF_MODE(insns[i].code) == BPF_IND)
995 				*initmask |= invalid & BJ_INIT_XBIT;
996 
997 			if (BPF_MODE(insns[i].code) == BPF_MEM &&
998 			    (uint32_t)insns[i].k < BPF_MEMWORDS) {
999 				*initmask |= invalid & BJ_INIT_MBIT(insns[i].k);
1000 			}
1001 
1002 			invalid &= ~BJ_INIT_ABIT;
1003 			continue;
1004 
1005 		case BPF_LDX:
1006 #if defined(_KERNEL)
1007 			/* uses BJ_TMP3REG */
1008 			*nscratches = 5;
1009 #endif
1010 			/* uses BJ_XREG */
1011 			if (*nscratches < 4)
1012 				*nscratches = 4;
1013 
1014 			if (BPF_MODE(insns[i].code) == BPF_MEM &&
1015 			    (uint32_t)insns[i].k < BPF_MEMWORDS) {
1016 				*initmask |= invalid & BJ_INIT_MBIT(insns[i].k);
1017 			}
1018 
1019 			invalid &= ~BJ_INIT_XBIT;
1020 			continue;
1021 
1022 		case BPF_ST:
1023 			*initmask |= invalid & BJ_INIT_ABIT;
1024 
1025 			if ((uint32_t)insns[i].k < BPF_MEMWORDS)
1026 				invalid &= ~BJ_INIT_MBIT(insns[i].k);
1027 
1028 			continue;
1029 
1030 		case BPF_STX:
1031 			/* uses BJ_XREG */
1032 			if (*nscratches < 4)
1033 				*nscratches = 4;
1034 
1035 			*initmask |= invalid & BJ_INIT_XBIT;
1036 
1037 			if ((uint32_t)insns[i].k < BPF_MEMWORDS)
1038 				invalid &= ~BJ_INIT_MBIT(insns[i].k);
1039 
1040 			continue;
1041 
1042 		case BPF_ALU:
1043 			*initmask |= invalid & BJ_INIT_ABIT;
1044 
1045 			if (insns[i].code != (BPF_ALU|BPF_NEG) &&
1046 			    BPF_SRC(insns[i].code) == BPF_X) {
1047 				*initmask |= invalid & BJ_INIT_XBIT;
1048 				/* uses BJ_XREG */
1049 				if (*nscratches < 4)
1050 					*nscratches = 4;
1051 
1052 			}
1053 
1054 			invalid &= ~BJ_INIT_ABIT;
1055 			continue;
1056 
1057 		case BPF_MISC:
1058 			switch (BPF_MISCOP(insns[i].code)) {
1059 			case BPF_TAX: // X <- A
1060 				/* uses BJ_XREG */
1061 				if (*nscratches < 4)
1062 					*nscratches = 4;
1063 
1064 				*initmask |= invalid & BJ_INIT_ABIT;
1065 				invalid &= ~BJ_INIT_XBIT;
1066 				continue;
1067 
1068 			case BPF_TXA: // A <- X
1069 				/* uses BJ_XREG */
1070 				if (*nscratches < 4)
1071 					*nscratches = 4;
1072 
1073 				*initmask |= invalid & BJ_INIT_XBIT;
1074 				invalid &= ~BJ_INIT_ABIT;
1075 				continue;
1076 			}
1077 
1078 			continue;
1079 
1080 		case BPF_JMP:
1081 			/* Initialize abc_length for ABC pass. */
1082 			insn_dat[i].u.jdata.abc_length = MAX_ABC_LENGTH;
1083 
1084 			if (BPF_OP(insns[i].code) == BPF_JA) {
1085 				jt = jf = insns[i].k;
1086 			} else {
1087 				jt = insns[i].jt;
1088 				jf = insns[i].jf;
1089 			}
1090 
1091 			if (jt >= insn_count - (i + 1) ||
1092 			    jf >= insn_count - (i + 1)) {
1093 				return false;
1094 			}
1095 
1096 			if (jt > 0 && jf > 0)
1097 				unreachable = true;
1098 
1099 			jt += i + 1;
1100 			jf += i + 1;
1101 
1102 			jtf = insn_dat[i].u.jdata.jtf;
1103 
1104 			jtf[0].sjump = NULL;
1105 			jtf[0].jdata = &insn_dat[i].u.jdata;
1106 			SLIST_INSERT_HEAD(&insn_dat[jt].bjumps,
1107 			    &jtf[0], entries);
1108 
1109 			if (jf != jt) {
1110 				jtf[1].sjump = NULL;
1111 				jtf[1].jdata = &insn_dat[i].u.jdata;
1112 				SLIST_INSERT_HEAD(&insn_dat[jf].bjumps,
1113 				    &jtf[1], entries);
1114 			}
1115 
1116 			insn_dat[jf].invalid |= invalid;
1117 			insn_dat[jt].invalid |= invalid;
1118 			invalid = 0;
1119 
1120 			continue;
1121 		}
1122 	}
1123 
1124 	return true;
1125 }
1126 
1127 /*
1128  * Array Bounds Check Elimination (ABC) pass.
1129  */
1130 static void
1131 optimize_pass2(const struct bpf_insn *insns,
1132     struct bpfjit_insn_data *insn_dat, size_t insn_count)
1133 {
1134 	struct bpfjit_jump *jmp;
1135 	const struct bpf_insn *pc;
1136 	struct bpfjit_insn_data *pd;
1137 	size_t i;
1138 	bpfjit_abc_length_t length, abc_length = 0;
1139 
1140 	for (i = insn_count; i != 0; i--) {
1141 		pc = &insns[i-1];
1142 		pd = &insn_dat[i-1];
1143 
1144 		if (pd->unreachable)
1145 			continue;
1146 
1147 		switch (BPF_CLASS(pc->code)) {
1148 		case BPF_RET:
1149 			/*
1150 			 * It's quite common for bpf programs to
1151 			 * check packet bytes in increasing order
1152 			 * and return zero if bytes don't match
1153 			 * specified critetion. Such programs disable
1154 			 * ABC optimization completely because for
1155 			 * every jump there is a branch with no read
1156 			 * instruction.
1157 			 * With no side effects, BPF_RET+BPF_K 0 is
1158 			 * indistinguishable from out-of-bound load.
1159 			 * Therefore, abc_length can be set to
1160 			 * MAX_ABC_LENGTH and enable ABC for many
1161 			 * bpf programs.
1162 			 * If this optimization pass encounters any
1163 			 * instruction with a side effect, it will
1164 			 * reset abc_length.
1165 			 */
1166 			if (BPF_RVAL(pc->code) == BPF_K && pc->k == 0)
1167 				abc_length = MAX_ABC_LENGTH;
1168 			else
1169 				abc_length = 0;
1170 			break;
1171 
1172 		case BPF_JMP:
1173 			abc_length = pd->u.jdata.abc_length;
1174 			break;
1175 
1176 		default:
1177 			if (read_pkt_insn(pc, &length)) {
1178 				if (abc_length < length)
1179 					abc_length = length;
1180 				pd->u.rdata.abc_length = abc_length;
1181 			}
1182 			break;
1183 		}
1184 
1185 		SLIST_FOREACH(jmp, &pd->bjumps, entries) {
1186 			if (jmp->jdata->abc_length > abc_length)
1187 				jmp->jdata->abc_length = abc_length;
1188 		}
1189 	}
1190 }
1191 
1192 static void
1193 optimize_pass3(const struct bpf_insn *insns,
1194     struct bpfjit_insn_data *insn_dat, size_t insn_count)
1195 {
1196 	struct bpfjit_jump *jmp;
1197 	size_t i;
1198 	bpfjit_abc_length_t checked_length = 0;
1199 
1200 	for (i = 0; i < insn_count; i++) {
1201 		if (insn_dat[i].unreachable)
1202 			continue;
1203 
1204 		SLIST_FOREACH(jmp, &insn_dat[i].bjumps, entries) {
1205 			if (jmp->jdata->checked_length < checked_length)
1206 				checked_length = jmp->jdata->checked_length;
1207 		}
1208 
1209 		if (BPF_CLASS(insns[i].code) == BPF_JMP) {
1210 			insn_dat[i].u.jdata.checked_length = checked_length;
1211 		} else if (read_pkt_insn(&insns[i], NULL)) {
1212 			struct bpfjit_read_pkt_data *rdata =
1213 			    &insn_dat[i].u.rdata;
1214 			rdata->check_length = 0;
1215 			if (checked_length < rdata->abc_length) {
1216 				checked_length = rdata->abc_length;
1217 				rdata->check_length = checked_length;
1218 			}
1219 		}
1220 	}
1221 }
1222 
1223 static bool
1224 optimize(const struct bpf_insn *insns,
1225     struct bpfjit_insn_data *insn_dat, size_t insn_count,
1226     bpfjit_init_mask_t *initmask, int *nscratches)
1227 {
1228 
1229 	optimize_init(insn_dat, insn_count);
1230 
1231 	if (!optimize_pass1(insns, insn_dat, insn_count,
1232 	    initmask, nscratches)) {
1233 		return false;
1234 	}
1235 
1236 	optimize_pass2(insns, insn_dat, insn_count);
1237 	optimize_pass3(insns, insn_dat, insn_count);
1238 
1239 	return true;
1240 }
1241 
1242 /*
1243  * Convert BPF_ALU operations except BPF_NEG and BPF_DIV to sljit operation.
1244  */
1245 static int
1246 bpf_alu_to_sljit_op(const struct bpf_insn *pc)
1247 {
1248 
1249 	/*
1250 	 * Note: all supported 64bit arches have 32bit multiply
1251 	 * instruction so SLJIT_INT_OP doesn't have any overhead.
1252 	 */
1253 	switch (BPF_OP(pc->code)) {
1254 	case BPF_ADD: return SLJIT_ADD;
1255 	case BPF_SUB: return SLJIT_SUB;
1256 	case BPF_MUL: return SLJIT_MUL|SLJIT_INT_OP;
1257 	case BPF_OR:  return SLJIT_OR;
1258 	case BPF_AND: return SLJIT_AND;
1259 	case BPF_LSH: return SLJIT_SHL;
1260 	case BPF_RSH: return SLJIT_LSHR|SLJIT_INT_OP;
1261 	default:
1262 		BJ_ASSERT(false);
1263 		return 0;
1264 	}
1265 }
1266 
1267 /*
1268  * Convert BPF_JMP operations except BPF_JA to sljit condition.
1269  */
1270 static int
1271 bpf_jmp_to_sljit_cond(const struct bpf_insn *pc, bool negate)
1272 {
1273 	/*
1274 	 * Note: all supported 64bit arches have 32bit comparison
1275 	 * instructions so SLJIT_INT_OP doesn't have any overhead.
1276 	 */
1277 	int rv = SLJIT_INT_OP;
1278 
1279 	switch (BPF_OP(pc->code)) {
1280 	case BPF_JGT:
1281 		rv |= negate ? SLJIT_C_LESS_EQUAL : SLJIT_C_GREATER;
1282 		break;
1283 	case BPF_JGE:
1284 		rv |= negate ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL;
1285 		break;
1286 	case BPF_JEQ:
1287 		rv |= negate ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL;
1288 		break;
1289 	case BPF_JSET:
1290 		rv |= negate ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL;
1291 		break;
1292 	default:
1293 		BJ_ASSERT(false);
1294 	}
1295 
1296 	return rv;
1297 }
1298 
1299 /*
1300  * Convert BPF_K and BPF_X to sljit register.
1301  */
1302 static int
1303 kx_to_reg(const struct bpf_insn *pc)
1304 {
1305 
1306 	switch (BPF_SRC(pc->code)) {
1307 	case BPF_K: return SLJIT_IMM;
1308 	case BPF_X: return BJ_XREG;
1309 	default:
1310 		BJ_ASSERT(false);
1311 		return 0;
1312 	}
1313 }
1314 
1315 static sljit_w
1316 kx_to_reg_arg(const struct bpf_insn *pc)
1317 {
1318 
1319 	switch (BPF_SRC(pc->code)) {
1320 	case BPF_K: return (uint32_t)pc->k; /* SLJIT_IMM, pc->k, */
1321 	case BPF_X: return 0;               /* BJ_XREG, 0,      */
1322 	default:
1323 		BJ_ASSERT(false);
1324 		return 0;
1325 	}
1326 }
1327 
1328 bpfjit_func_t
1329 bpfjit_generate_code(bpf_ctx_t *bc, struct bpf_insn *insns, size_t insn_count)
1330 {
1331 	void *rv;
1332 	struct sljit_compiler *compiler;
1333 
1334 	size_t i;
1335 	int status;
1336 	int branching, negate;
1337 	unsigned int rval, mode, src;
1338 
1339 	/* optimization related */
1340 	bpfjit_init_mask_t initmask;
1341 	int nscratches;
1342 
1343 	/* a list of jumps to out-of-bound return from a generated function */
1344 	struct sljit_jump **ret0;
1345 	size_t ret0_size, ret0_maxsize;
1346 
1347 	const struct bpf_insn *pc;
1348 	struct bpfjit_insn_data *insn_dat;
1349 
1350 	/* for local use */
1351 	struct sljit_label *label;
1352 	struct sljit_jump *jump;
1353 	struct bpfjit_jump *bjump, *jtf;
1354 
1355 	struct sljit_jump *to_mchain_jump;
1356 	bool unconditional_ret;
1357 
1358 	uint32_t jt, jf;
1359 
1360 	rv = NULL;
1361 	compiler = NULL;
1362 	insn_dat = NULL;
1363 	ret0 = NULL;
1364 
1365 	if (insn_count == 0 || insn_count > SIZE_MAX / sizeof(insn_dat[0]))
1366 		goto fail;
1367 
1368 	insn_dat = BJ_ALLOC(insn_count * sizeof(insn_dat[0]));
1369 	if (insn_dat == NULL)
1370 		goto fail;
1371 
1372 	if (!optimize(insns, insn_dat, insn_count,
1373 	    &initmask, &nscratches)) {
1374 		goto fail;
1375 	}
1376 
1377 #if defined(_KERNEL)
1378 	/* bpf_filter() checks initialization of memwords. */
1379 	BJ_ASSERT((initmask & BJ_INIT_MMASK) == 0);
1380 #endif
1381 
1382 	ret0_size = 0;
1383 	ret0_maxsize = 64;
1384 	ret0 = BJ_ALLOC(ret0_maxsize * sizeof(ret0[0]));
1385 	if (ret0 == NULL)
1386 		goto fail;
1387 
1388 	compiler = sljit_create_compiler();
1389 	if (compiler == NULL)
1390 		goto fail;
1391 
1392 #if !defined(_KERNEL) && defined(SLJIT_VERBOSE) && SLJIT_VERBOSE
1393 	sljit_compiler_verbose(compiler, stderr);
1394 #endif
1395 
1396 	status = sljit_emit_enter(compiler,
1397 	    3, nscratches, 3, sizeof(struct bpfjit_stack));
1398 	if (status != SLJIT_SUCCESS)
1399 		goto fail;
1400 
1401 	for (i = 0; i < BPF_MEMWORDS; i++) {
1402 		if (initmask & BJ_INIT_MBIT(i)) {
1403 			status = sljit_emit_op1(compiler,
1404 			    SLJIT_MOV_UI,
1405 			    SLJIT_MEM1(SLJIT_LOCALS_REG),
1406 			    offsetof(struct bpfjit_stack, mem) +
1407 			        i * sizeof(uint32_t),
1408 			    SLJIT_IMM, 0);
1409 			if (status != SLJIT_SUCCESS)
1410 				goto fail;
1411 		}
1412 	}
1413 
1414 	if (initmask & BJ_INIT_ABIT) {
1415 		/* A = 0; */
1416 		status = sljit_emit_op1(compiler,
1417 		    SLJIT_MOV,
1418 		    BJ_AREG, 0,
1419 		    SLJIT_IMM, 0);
1420 		if (status != SLJIT_SUCCESS)
1421 			goto fail;
1422 	}
1423 
1424 	if (initmask & BJ_INIT_XBIT) {
1425 		/* X = 0; */
1426 		status = sljit_emit_op1(compiler,
1427 		    SLJIT_MOV,
1428 		    BJ_XREG, 0,
1429 		    SLJIT_IMM, 0);
1430 		if (status != SLJIT_SUCCESS)
1431 			goto fail;
1432 	}
1433 
1434 	for (i = 0; i < insn_count; i++) {
1435 		if (insn_dat[i].unreachable)
1436 			continue;
1437 
1438 		/*
1439 		 * Resolve jumps to the current insn.
1440 		 */
1441 		label = NULL;
1442 		SLIST_FOREACH(bjump, &insn_dat[i].bjumps, entries) {
1443 			if (bjump->sjump != NULL) {
1444 				if (label == NULL)
1445 					label = sljit_emit_label(compiler);
1446 				if (label == NULL)
1447 					goto fail;
1448 				sljit_set_label(bjump->sjump, label);
1449 			}
1450 		}
1451 
1452 		to_mchain_jump = NULL;
1453 		unconditional_ret = false;
1454 
1455 		if (read_pkt_insn(&insns[i], NULL)) {
1456 			if (insn_dat[i].u.rdata.check_length > UINT32_MAX) {
1457 				/* Jump to "return 0" unconditionally. */
1458 				unconditional_ret = true;
1459 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1460 				if (jump == NULL)
1461 					goto fail;
1462 				if (!append_jump(jump, &ret0,
1463 				    &ret0_size, &ret0_maxsize))
1464 					goto fail;
1465 			} else if (insn_dat[i].u.rdata.check_length > 0) {
1466 				/* if (buflen < check_length) return 0; */
1467 				jump = sljit_emit_cmp(compiler,
1468 				    SLJIT_C_LESS,
1469 				    BJ_BUFLEN, 0,
1470 				    SLJIT_IMM,
1471 				    insn_dat[i].u.rdata.check_length);
1472 				if (jump == NULL)
1473 					goto fail;
1474 #ifdef _KERNEL
1475 				to_mchain_jump = jump;
1476 #else
1477 				if (!append_jump(jump, &ret0,
1478 				    &ret0_size, &ret0_maxsize))
1479 					goto fail;
1480 #endif
1481 			}
1482 		}
1483 
1484 		pc = &insns[i];
1485 		switch (BPF_CLASS(pc->code)) {
1486 
1487 		default:
1488 			goto fail;
1489 
1490 		case BPF_LD:
1491 			/* BPF_LD+BPF_IMM          A <- k */
1492 			if (pc->code == (BPF_LD|BPF_IMM)) {
1493 				status = sljit_emit_op1(compiler,
1494 				    SLJIT_MOV,
1495 				    BJ_AREG, 0,
1496 				    SLJIT_IMM, (uint32_t)pc->k);
1497 				if (status != SLJIT_SUCCESS)
1498 					goto fail;
1499 
1500 				continue;
1501 			}
1502 
1503 			/* BPF_LD+BPF_MEM          A <- M[k] */
1504 			if (pc->code == (BPF_LD|BPF_MEM)) {
1505 				if ((uint32_t)pc->k >= BPF_MEMWORDS)
1506 					goto fail;
1507 				status = sljit_emit_op1(compiler,
1508 				    SLJIT_MOV_UI,
1509 				    BJ_AREG, 0,
1510 				    SLJIT_MEM1(SLJIT_LOCALS_REG),
1511 				    offsetof(struct bpfjit_stack, mem) +
1512 				        pc->k * sizeof(uint32_t));
1513 				if (status != SLJIT_SUCCESS)
1514 					goto fail;
1515 
1516 				continue;
1517 			}
1518 
1519 			/* BPF_LD+BPF_W+BPF_LEN    A <- len */
1520 			if (pc->code == (BPF_LD|BPF_W|BPF_LEN)) {
1521 				status = sljit_emit_op1(compiler,
1522 				    SLJIT_MOV,
1523 				    BJ_AREG, 0,
1524 				    BJ_WIRELEN, 0);
1525 				if (status != SLJIT_SUCCESS)
1526 					goto fail;
1527 
1528 				continue;
1529 			}
1530 
1531 			mode = BPF_MODE(pc->code);
1532 			if (mode != BPF_ABS && mode != BPF_IND)
1533 				goto fail;
1534 
1535 			if (unconditional_ret)
1536 				continue;
1537 
1538 			status = emit_pkt_read(compiler, pc,
1539 			    to_mchain_jump, &ret0, &ret0_size, &ret0_maxsize);
1540 			if (status != SLJIT_SUCCESS)
1541 				goto fail;
1542 
1543 			continue;
1544 
1545 		case BPF_LDX:
1546 			mode = BPF_MODE(pc->code);
1547 
1548 			/* BPF_LDX+BPF_W+BPF_IMM    X <- k */
1549 			if (mode == BPF_IMM) {
1550 				if (BPF_SIZE(pc->code) != BPF_W)
1551 					goto fail;
1552 				status = sljit_emit_op1(compiler,
1553 				    SLJIT_MOV,
1554 				    BJ_XREG, 0,
1555 				    SLJIT_IMM, (uint32_t)pc->k);
1556 				if (status != SLJIT_SUCCESS)
1557 					goto fail;
1558 
1559 				continue;
1560 			}
1561 
1562 			/* BPF_LDX+BPF_W+BPF_LEN    X <- len */
1563 			if (mode == BPF_LEN) {
1564 				if (BPF_SIZE(pc->code) != BPF_W)
1565 					goto fail;
1566 				status = sljit_emit_op1(compiler,
1567 				    SLJIT_MOV,
1568 				    BJ_XREG, 0,
1569 				    BJ_WIRELEN, 0);
1570 				if (status != SLJIT_SUCCESS)
1571 					goto fail;
1572 
1573 				continue;
1574 			}
1575 
1576 			/* BPF_LDX+BPF_W+BPF_MEM    X <- M[k] */
1577 			if (mode == BPF_MEM) {
1578 				if (BPF_SIZE(pc->code) != BPF_W)
1579 					goto fail;
1580 				if ((uint32_t)pc->k >= BPF_MEMWORDS)
1581 					goto fail;
1582 				status = sljit_emit_op1(compiler,
1583 				    SLJIT_MOV_UI,
1584 				    BJ_XREG, 0,
1585 				    SLJIT_MEM1(SLJIT_LOCALS_REG),
1586 				    offsetof(struct bpfjit_stack, mem) +
1587 				        pc->k * sizeof(uint32_t));
1588 				if (status != SLJIT_SUCCESS)
1589 					goto fail;
1590 
1591 				continue;
1592 			}
1593 
1594 			/* BPF_LDX+BPF_B+BPF_MSH    X <- 4*(P[k:1]&0xf) */
1595 			if (mode != BPF_MSH || BPF_SIZE(pc->code) != BPF_B)
1596 				goto fail;
1597 
1598 			if (unconditional_ret)
1599 				continue;
1600 
1601 			status = emit_msh(compiler, pc,
1602 			    to_mchain_jump, &ret0, &ret0_size, &ret0_maxsize);
1603 			if (status != SLJIT_SUCCESS)
1604 				goto fail;
1605 
1606 			continue;
1607 
1608 		case BPF_ST:
1609 			if (pc->code != BPF_ST ||
1610 			    (uint32_t)pc->k >= BPF_MEMWORDS) {
1611 				goto fail;
1612 			}
1613 
1614 			status = sljit_emit_op1(compiler,
1615 			    SLJIT_MOV_UI,
1616 			    SLJIT_MEM1(SLJIT_LOCALS_REG),
1617 			    offsetof(struct bpfjit_stack, mem) +
1618 			        pc->k * sizeof(uint32_t),
1619 			    BJ_AREG, 0);
1620 			if (status != SLJIT_SUCCESS)
1621 				goto fail;
1622 
1623 			continue;
1624 
1625 		case BPF_STX:
1626 			if (pc->code != BPF_STX ||
1627 			    (uint32_t)pc->k >= BPF_MEMWORDS) {
1628 				goto fail;
1629 			}
1630 
1631 			status = sljit_emit_op1(compiler,
1632 			    SLJIT_MOV_UI,
1633 			    SLJIT_MEM1(SLJIT_LOCALS_REG),
1634 			    offsetof(struct bpfjit_stack, mem) +
1635 			        pc->k * sizeof(uint32_t),
1636 			    BJ_XREG, 0);
1637 			if (status != SLJIT_SUCCESS)
1638 				goto fail;
1639 
1640 			continue;
1641 
1642 		case BPF_ALU:
1643 			if (pc->code == (BPF_ALU|BPF_NEG)) {
1644 				status = sljit_emit_op1(compiler,
1645 				    SLJIT_NEG,
1646 				    BJ_AREG, 0,
1647 				    BJ_AREG, 0);
1648 				if (status != SLJIT_SUCCESS)
1649 					goto fail;
1650 
1651 				continue;
1652 			}
1653 
1654 			if (BPF_OP(pc->code) != BPF_DIV) {
1655 				status = sljit_emit_op2(compiler,
1656 				    bpf_alu_to_sljit_op(pc),
1657 				    BJ_AREG, 0,
1658 				    BJ_AREG, 0,
1659 				    kx_to_reg(pc), kx_to_reg_arg(pc));
1660 				if (status != SLJIT_SUCCESS)
1661 					goto fail;
1662 
1663 				continue;
1664 			}
1665 
1666 			/* BPF_DIV */
1667 
1668 			src = BPF_SRC(pc->code);
1669 			if (src != BPF_X && src != BPF_K)
1670 				goto fail;
1671 
1672 			/* division by zero? */
1673 			if (src == BPF_X) {
1674 				jump = sljit_emit_cmp(compiler,
1675 				    SLJIT_C_EQUAL|SLJIT_INT_OP,
1676 				    BJ_XREG, 0,
1677 				    SLJIT_IMM, 0);
1678 				if (jump == NULL)
1679 					goto fail;
1680 				if (!append_jump(jump, &ret0,
1681 				    &ret0_size, &ret0_maxsize))
1682 					goto fail;
1683 			} else if (pc->k == 0) {
1684 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1685 				if (jump == NULL)
1686 					goto fail;
1687 				if (!append_jump(jump, &ret0,
1688 				    &ret0_size, &ret0_maxsize))
1689 					goto fail;
1690 			}
1691 
1692 			if (src == BPF_X) {
1693 				status = emit_division(compiler, BJ_XREG, 0);
1694 				if (status != SLJIT_SUCCESS)
1695 					goto fail;
1696 			} else if (pc->k != 0) {
1697 				if (pc->k & (pc->k - 1)) {
1698 				    status = emit_division(compiler,
1699 				        SLJIT_IMM, (uint32_t)pc->k);
1700 				} else {
1701 				    status = emit_pow2_division(compiler,
1702 				        (uint32_t)pc->k);
1703 				}
1704 				if (status != SLJIT_SUCCESS)
1705 					goto fail;
1706 			}
1707 
1708 			continue;
1709 
1710 		case BPF_JMP:
1711 			if (BPF_OP(pc->code) == BPF_JA) {
1712 				jt = jf = pc->k;
1713 			} else {
1714 				jt = pc->jt;
1715 				jf = pc->jf;
1716 			}
1717 
1718 			negate = (jt == 0) ? 1 : 0;
1719 			branching = (jt == jf) ? 0 : 1;
1720 			jtf = insn_dat[i].u.jdata.jtf;
1721 
1722 			if (branching) {
1723 				if (BPF_OP(pc->code) != BPF_JSET) {
1724 					jump = sljit_emit_cmp(compiler,
1725 					    bpf_jmp_to_sljit_cond(pc, negate),
1726 					    BJ_AREG, 0,
1727 					    kx_to_reg(pc), kx_to_reg_arg(pc));
1728 				} else {
1729 					status = sljit_emit_op2(compiler,
1730 					    SLJIT_AND,
1731 					    BJ_TMP1REG, 0,
1732 					    BJ_AREG, 0,
1733 					    kx_to_reg(pc), kx_to_reg_arg(pc));
1734 					if (status != SLJIT_SUCCESS)
1735 						goto fail;
1736 
1737 					jump = sljit_emit_cmp(compiler,
1738 					    bpf_jmp_to_sljit_cond(pc, negate),
1739 					    BJ_TMP1REG, 0,
1740 					    SLJIT_IMM, 0);
1741 				}
1742 
1743 				if (jump == NULL)
1744 					goto fail;
1745 
1746 				BJ_ASSERT(jtf[negate].sjump == NULL);
1747 				jtf[negate].sjump = jump;
1748 			}
1749 
1750 			if (!branching || (jt != 0 && jf != 0)) {
1751 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
1752 				if (jump == NULL)
1753 					goto fail;
1754 
1755 				BJ_ASSERT(jtf[branching].sjump == NULL);
1756 				jtf[branching].sjump = jump;
1757 			}
1758 
1759 			continue;
1760 
1761 		case BPF_RET:
1762 			rval = BPF_RVAL(pc->code);
1763 			if (rval == BPF_X)
1764 				goto fail;
1765 
1766 			/* BPF_RET+BPF_K    accept k bytes */
1767 			if (rval == BPF_K) {
1768 				status = sljit_emit_return(compiler,
1769 				    SLJIT_MOV_UI,
1770 				    SLJIT_IMM, (uint32_t)pc->k);
1771 				if (status != SLJIT_SUCCESS)
1772 					goto fail;
1773 			}
1774 
1775 			/* BPF_RET+BPF_A    accept A bytes */
1776 			if (rval == BPF_A) {
1777 				status = sljit_emit_return(compiler,
1778 				    SLJIT_MOV_UI,
1779 				    BJ_AREG, 0);
1780 				if (status != SLJIT_SUCCESS)
1781 					goto fail;
1782 			}
1783 
1784 			continue;
1785 
1786 		case BPF_MISC:
1787 			switch (BPF_MISCOP(pc->code)) {
1788 			case BPF_TAX:
1789 				status = sljit_emit_op1(compiler,
1790 				    SLJIT_MOV_UI,
1791 				    BJ_XREG, 0,
1792 				    BJ_AREG, 0);
1793 				if (status != SLJIT_SUCCESS)
1794 					goto fail;
1795 
1796 				continue;
1797 
1798 			case BPF_TXA:
1799 				status = sljit_emit_op1(compiler,
1800 				    SLJIT_MOV,
1801 				    BJ_AREG, 0,
1802 				    BJ_XREG, 0);
1803 				if (status != SLJIT_SUCCESS)
1804 					goto fail;
1805 
1806 				continue;
1807 			}
1808 
1809 			goto fail;
1810 		} /* switch */
1811 	} /* main loop */
1812 
1813 	BJ_ASSERT(ret0_size <= ret0_maxsize);
1814 
1815 	if (ret0_size > 0) {
1816 		label = sljit_emit_label(compiler);
1817 		if (label == NULL)
1818 			goto fail;
1819 		for (i = 0; i < ret0_size; i++)
1820 			sljit_set_label(ret0[i], label);
1821 	}
1822 
1823 	status = sljit_emit_return(compiler,
1824 	    SLJIT_MOV_UI,
1825 	    SLJIT_IMM, 0);
1826 	if (status != SLJIT_SUCCESS)
1827 		goto fail;
1828 
1829 	rv = sljit_generate_code(compiler);
1830 
1831 fail:
1832 	if (compiler != NULL)
1833 		sljit_free_compiler(compiler);
1834 
1835 	if (insn_dat != NULL)
1836 		BJ_FREE(insn_dat, insn_count * sizeof(insn_dat[0]));
1837 
1838 	if (ret0 != NULL)
1839 		BJ_FREE(ret0, ret0_maxsize * sizeof(ret0[0]));
1840 
1841 	return (bpfjit_func_t)rv;
1842 }
1843 
1844 void
1845 bpfjit_free_code(bpfjit_func_t code)
1846 {
1847 
1848 	sljit_free_code((void *)code);
1849 }
1850