xref: /dpdk/lib/bpf/bpf_jit_arm64.c (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2019 Marvell International Ltd.
3  */
4 
5 #include <errno.h>
6 #include <stdbool.h>
7 
8 #include <rte_common.h>
9 #include <rte_byteorder.h>
10 
11 #include "bpf_impl.h"
12 
13 #define A64_REG_MASK(r)		((r) & 0x1f)
14 #define A64_INVALID_OP_CODE	(0xffffffff)
15 
16 #define TMP_REG_1		(EBPF_REG_10 + 1)
17 #define TMP_REG_2		(EBPF_REG_10 + 2)
18 #define TMP_REG_3		(EBPF_REG_10 + 3)
19 
20 #define EBPF_FP			(EBPF_REG_10)
21 #define EBPF_OP_GET(op)		(BPF_OP(op) >> 4)
22 
23 #define A64_R(x)		x
24 #define A64_FP			29
25 #define A64_LR			30
26 #define A64_SP			31
27 #define A64_ZR			31
28 
29 #define check_imm(n, val) (((val) >= 0) ? !!((val) >> (n)) : !!((~val) >> (n)))
30 #define mask_imm(n, val) ((val) & ((1 << (n)) - 1))
31 
32 struct ebpf_a64_map {
33 	uint32_t off; /* eBPF to arm64 insn offset mapping for jump */
34 	uint8_t off_to_b; /* Offset to branch instruction delta */
35 };
36 
37 struct a64_jit_ctx {
38 	size_t stack_sz;          /* Stack size */
39 	uint32_t *ins;            /* ARM64 instructions. NULL if first pass */
40 	struct ebpf_a64_map *map; /* eBPF to arm64 insn mapping for jump */
41 	uint32_t idx;             /* Current instruction index */
42 	uint32_t program_start;   /* Program index, Just after prologue */
43 	uint32_t program_sz;      /* Program size. Found in first pass */
44 	uint8_t foundcall;        /* Found EBPF_CALL class code in eBPF pgm */
45 };
46 
47 static int
48 check_immr_imms(bool is64, uint8_t immr, uint8_t imms)
49 {
50 	const unsigned int width = is64 ? 64 : 32;
51 
52 	if (immr >= width || imms >= width)
53 		return 1;
54 
55 	return 0;
56 }
57 
58 static int
59 check_mov_hw(bool is64, const uint8_t val)
60 {
61 	if (val == 16 || val == 0)
62 		return 0;
63 	else if (is64 && val != 64 && val != 48 && val != 32)
64 		return 1;
65 
66 	return 0;
67 }
68 
69 static int
70 check_ls_sz(uint8_t sz)
71 {
72 	if (sz == BPF_B || sz == BPF_H || sz == BPF_W || sz == EBPF_DW)
73 		return 0;
74 
75 	return 1;
76 }
77 
78 static int
79 check_reg(uint8_t r)
80 {
81 	return (r > 31) ? 1 : 0;
82 }
83 
84 static int
85 is_first_pass(struct a64_jit_ctx *ctx)
86 {
87 	return (ctx->ins == NULL);
88 }
89 
90 static int
91 check_invalid_args(struct a64_jit_ctx *ctx, uint32_t limit)
92 {
93 	uint32_t idx;
94 
95 	if (is_first_pass(ctx))
96 		return 0;
97 
98 	for (idx = 0; idx < limit; idx++) {
99 		if (rte_le_to_cpu_32(ctx->ins[idx]) == A64_INVALID_OP_CODE) {
100 			RTE_BPF_LOG(ERR,
101 				"%s: invalid opcode at %u;\n", __func__, idx);
102 			return -EINVAL;
103 		}
104 	}
105 	return 0;
106 }
107 
108 static int
109 jump_offset_init(struct a64_jit_ctx *ctx, struct rte_bpf *bpf)
110 {
111 	uint32_t i;
112 
113 	ctx->map = malloc(bpf->prm.nb_ins * sizeof(ctx->map[0]));
114 	if (ctx->map == NULL)
115 		return -ENOMEM;
116 
117 	/* Fill with fake offsets */
118 	for (i = 0; i != bpf->prm.nb_ins; i++) {
119 		ctx->map[i].off = INT32_MAX;
120 		ctx->map[i].off_to_b = 0;
121 	}
122 	return 0;
123 }
124 
125 static void
126 jump_offset_fini(struct a64_jit_ctx *ctx)
127 {
128 	free(ctx->map);
129 }
130 
131 static void
132 jump_offset_update(struct a64_jit_ctx *ctx, uint32_t ebpf_idx)
133 {
134 	if (is_first_pass(ctx))
135 		ctx->map[ebpf_idx].off = ctx->idx;
136 }
137 
138 static void
139 jump_offset_to_branch_update(struct a64_jit_ctx *ctx, uint32_t ebpf_idx)
140 {
141 	if (is_first_pass(ctx))
142 		ctx->map[ebpf_idx].off_to_b = ctx->idx - ctx->map[ebpf_idx].off;
143 
144 }
145 
146 static int32_t
147 jump_offset_get(struct a64_jit_ctx *ctx, uint32_t from, int16_t offset)
148 {
149 	int32_t a64_from, a64_to;
150 
151 	a64_from = ctx->map[from].off +  ctx->map[from].off_to_b;
152 	a64_to = ctx->map[from + offset + 1].off;
153 
154 	if (a64_to == INT32_MAX)
155 		return a64_to;
156 
157 	return a64_to - a64_from;
158 }
159 
160 enum a64_cond_e {
161 	A64_EQ = 0x0, /* == */
162 	A64_NE = 0x1, /* != */
163 	A64_CS = 0x2, /* Unsigned >= */
164 	A64_CC = 0x3, /* Unsigned < */
165 	A64_MI = 0x4, /* < 0 */
166 	A64_PL = 0x5, /* >= 0 */
167 	A64_VS = 0x6, /* Overflow */
168 	A64_VC = 0x7, /* No overflow */
169 	A64_HI = 0x8, /* Unsigned > */
170 	A64_LS = 0x9, /* Unsigned <= */
171 	A64_GE = 0xa, /* Signed >= */
172 	A64_LT = 0xb, /* Signed < */
173 	A64_GT = 0xc, /* Signed > */
174 	A64_LE = 0xd, /* Signed <= */
175 	A64_AL = 0xe, /* Always */
176 };
177 
178 static int
179 check_cond(uint8_t cond)
180 {
181 	return (cond >= A64_AL) ? 1 : 0;
182 }
183 
184 static uint8_t
185 ebpf_to_a64_cond(uint8_t op)
186 {
187 	switch (BPF_OP(op)) {
188 	case BPF_JEQ:
189 		return A64_EQ;
190 	case BPF_JGT:
191 		return A64_HI;
192 	case EBPF_JLT:
193 		return A64_CC;
194 	case BPF_JGE:
195 		return A64_CS;
196 	case EBPF_JLE:
197 		return A64_LS;
198 	case BPF_JSET:
199 	case EBPF_JNE:
200 		return A64_NE;
201 	case EBPF_JSGT:
202 		return A64_GT;
203 	case EBPF_JSLT:
204 		return A64_LT;
205 	case EBPF_JSGE:
206 		return A64_GE;
207 	case EBPF_JSLE:
208 		return A64_LE;
209 	default:
210 		return UINT8_MAX;
211 	}
212 }
213 
214 /* Emit an instruction */
215 static inline void
216 emit_insn(struct a64_jit_ctx *ctx, uint32_t insn, int error)
217 {
218 	if (error)
219 		insn = A64_INVALID_OP_CODE;
220 
221 	if (ctx->ins)
222 		ctx->ins[ctx->idx] = rte_cpu_to_le_32(insn);
223 
224 	ctx->idx++;
225 }
226 
227 static void
228 emit_ret(struct a64_jit_ctx *ctx)
229 {
230 	emit_insn(ctx, 0xd65f03c0, 0);
231 }
232 
233 static void
234 emit_add_sub_imm(struct a64_jit_ctx *ctx, bool is64, bool sub, uint8_t rd,
235 		 uint8_t rn, int16_t imm12)
236 {
237 	uint32_t insn, imm;
238 
239 	imm = mask_imm(12, imm12);
240 	insn = (!!is64) << 31;
241 	insn |= (!!sub) << 30;
242 	insn |= 0x11000000;
243 	insn |= rd;
244 	insn |= rn << 5;
245 	insn |= imm << 10;
246 
247 	emit_insn(ctx, insn,
248 		  check_reg(rd) || check_reg(rn) || check_imm(12, imm12));
249 }
250 
251 static void
252 emit_add_imm_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn, uint16_t imm12)
253 {
254 	emit_add_sub_imm(ctx, 1, 0, rd, rn, imm12);
255 }
256 
257 static void
258 emit_sub_imm_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn, uint16_t imm12)
259 {
260 	emit_add_sub_imm(ctx, 1, 1, rd, rn, imm12);
261 }
262 
263 static void
264 emit_mov(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn)
265 {
266 	emit_add_sub_imm(ctx, is64, 0, rd, rn, 0);
267 }
268 
269 static void
270 emit_mov_64(struct a64_jit_ctx *ctx, uint8_t rd, uint8_t rn)
271 {
272 	emit_mov(ctx, 1, rd, rn);
273 }
274 
275 static void
276 emit_ls_pair_64(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2, uint8_t rn,
277 		bool push, bool load, bool pre_index)
278 {
279 	uint32_t insn;
280 
281 	insn = (!!load) << 22;
282 	insn |= (!!pre_index) << 24;
283 	insn |= 0xa8800000;
284 	insn |= rt;
285 	insn |= rn << 5;
286 	insn |= rt2 << 10;
287 	if (push)
288 		insn |= 0x7e << 15; /* 0x7e means -2 with imm7 */
289 	else
290 		insn |= 0x2 << 15;
291 
292 	emit_insn(ctx, insn, check_reg(rn) || check_reg(rt) || check_reg(rt2));
293 
294 }
295 
296 /* Emit stp rt, rt2, [sp, #-16]! */
297 static void
298 emit_stack_push(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2)
299 {
300 	emit_ls_pair_64(ctx, rt, rt2, A64_SP, 1, 0, 1);
301 }
302 
303 /* Emit ldp rt, rt2, [sp, #16] */
304 static void
305 emit_stack_pop(struct a64_jit_ctx *ctx, uint8_t rt, uint8_t rt2)
306 {
307 	emit_ls_pair_64(ctx, rt, rt2, A64_SP, 0, 1, 0);
308 }
309 
310 #define A64_MOVN 0
311 #define A64_MOVZ 2
312 #define A64_MOVK 3
313 static void
314 mov_imm(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t type,
315 	uint16_t imm16, uint8_t shift)
316 {
317 	uint32_t insn;
318 
319 	insn = (!!is64) << 31;
320 	insn |= type << 29;
321 	insn |= 0x25 << 23;
322 	insn |= (shift/16) << 21;
323 	insn |= imm16 << 5;
324 	insn |= rd;
325 
326 	emit_insn(ctx, insn, check_reg(rd) || check_mov_hw(is64, shift));
327 }
328 
329 static void
330 emit_mov_imm32(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint32_t val)
331 {
332 	uint16_t upper = val >> 16;
333 	uint16_t lower = val & 0xffff;
334 
335 	/* Positive number */
336 	if ((val & 1UL << 31) == 0) {
337 		mov_imm(ctx, is64, rd, A64_MOVZ, lower, 0);
338 		if (upper)
339 			mov_imm(ctx, is64, rd, A64_MOVK, upper, 16);
340 	} else { /* Negative number */
341 		if (upper == 0xffff) {
342 			mov_imm(ctx, is64, rd, A64_MOVN, ~lower, 0);
343 		} else {
344 			mov_imm(ctx, is64, rd, A64_MOVN, ~upper, 16);
345 			if (lower != 0xffff)
346 				mov_imm(ctx, is64, rd, A64_MOVK, lower, 0);
347 		}
348 	}
349 }
350 
351 static int
352 u16_blocks_weight(const uint64_t val, bool one)
353 {
354 	return (((val >>  0) & 0xffff) == (one ? 0xffff : 0x0000)) +
355 	       (((val >> 16) & 0xffff) == (one ? 0xffff : 0x0000)) +
356 	       (((val >> 32) & 0xffff) == (one ? 0xffff : 0x0000)) +
357 	       (((val >> 48) & 0xffff) == (one ? 0xffff : 0x0000));
358 }
359 
360 static void
361 emit_mov_imm(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint64_t val)
362 {
363 	uint64_t nval = ~val;
364 	int movn, sr;
365 
366 	if (is64 == 0)
367 		return emit_mov_imm32(ctx, 0, rd, (uint32_t)(val & 0xffffffff));
368 
369 	/* Find MOVN or MOVZ first */
370 	movn = u16_blocks_weight(val, true) > u16_blocks_weight(val, false);
371 	/* Find shift right value */
372 	sr = movn ? rte_fls_u64(nval) - 1 : rte_fls_u64(val) - 1;
373 	sr = RTE_ALIGN_FLOOR(sr, 16);
374 	sr = RTE_MAX(sr, 0);
375 
376 	if (movn)
377 		mov_imm(ctx, 1, rd, A64_MOVN, (nval >> sr) & 0xffff, sr);
378 	else
379 		mov_imm(ctx, 1, rd, A64_MOVZ, (val >> sr) & 0xffff, sr);
380 
381 	sr -= 16;
382 	while (sr >= 0) {
383 		if (((val >> sr) & 0xffff) != (movn ? 0xffff : 0x0000))
384 			mov_imm(ctx, 1, rd, A64_MOVK, (val >> sr) & 0xffff, sr);
385 		sr -= 16;
386 	}
387 }
388 
389 static void
390 emit_ls(struct a64_jit_ctx *ctx, uint8_t sz, uint8_t rt, uint8_t rn, uint8_t rm,
391 	bool load)
392 {
393 	uint32_t insn;
394 
395 	insn = 0x1c1 << 21;
396 	if (load)
397 		insn |= 1 << 22;
398 	if (sz == BPF_B)
399 		insn |= 0 << 30;
400 	else if (sz == BPF_H)
401 		insn |= 1 << 30;
402 	else if (sz == BPF_W)
403 		insn |= 2 << 30;
404 	else if (sz == EBPF_DW)
405 		insn |= 3 << 30;
406 
407 	insn |= rm << 16;
408 	insn |= 0x1a << 10; /* LSL and S = 0 */
409 	insn |= rn << 5;
410 	insn |= rt;
411 
412 	emit_insn(ctx, insn, check_reg(rt) || check_reg(rn) || check_reg(rm) ||
413 		  check_ls_sz(sz));
414 }
415 
416 static void
417 emit_str(struct a64_jit_ctx *ctx, uint8_t sz, uint8_t rt, uint8_t rn,
418 	 uint8_t rm)
419 {
420 	emit_ls(ctx, sz, rt, rn, rm, 0);
421 }
422 
423 static void
424 emit_ldr(struct a64_jit_ctx *ctx, uint8_t sz, uint8_t rt, uint8_t rn,
425 	 uint8_t rm)
426 {
427 	emit_ls(ctx, sz, rt, rn, rm, 1);
428 }
429 
430 #define A64_ADD 0x58
431 #define A64_SUB 0x258
432 static void
433 emit_add_sub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn,
434 	     uint8_t rm, uint16_t op)
435 {
436 	uint32_t insn;
437 
438 	insn = (!!is64) << 31;
439 	insn |= op << 21; /* shift == 0 */
440 	insn |= rm << 16;
441 	insn |= rn << 5;
442 	insn |= rd;
443 
444 	emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
445 }
446 
447 static void
448 emit_add(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
449 {
450 	emit_add_sub(ctx, is64, rd, rd, rm, A64_ADD);
451 }
452 
453 static void
454 emit_sub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
455 {
456 	emit_add_sub(ctx, is64, rd, rd, rm, A64_SUB);
457 }
458 
459 static void
460 emit_neg(struct a64_jit_ctx *ctx, bool is64, uint8_t rd)
461 {
462 	emit_add_sub(ctx, is64, rd, A64_ZR, rd, A64_SUB);
463 }
464 
465 static void
466 emit_mul(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
467 {
468 	uint32_t insn;
469 
470 	insn = (!!is64) << 31;
471 	insn |= 0xd8 << 21;
472 	insn |= rm << 16;
473 	insn |= A64_ZR << 10;
474 	insn |= rd << 5;
475 	insn |= rd;
476 
477 	emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
478 }
479 
480 #define A64_UDIV 0x2
481 #define A64_LSLV 0x8
482 #define A64_LSRV 0x9
483 #define A64_ASRV 0xA
484 static void
485 emit_data_process_two_src(struct a64_jit_ctx *ctx, bool is64, uint8_t rd,
486 			  uint8_t rn, uint8_t rm, uint16_t op)
487 
488 {
489 	uint32_t insn;
490 
491 	insn = (!!is64) << 31;
492 	insn |= 0xd6 << 21;
493 	insn |= rm << 16;
494 	insn |= op << 10;
495 	insn |= rn << 5;
496 	insn |= rd;
497 
498 	emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
499 }
500 
501 static void
502 emit_div(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
503 {
504 	emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_UDIV);
505 }
506 
507 static void
508 emit_lslv(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
509 {
510 	emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_LSLV);
511 }
512 
513 static void
514 emit_lsrv(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
515 {
516 	emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_LSRV);
517 }
518 
519 static void
520 emit_asrv(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
521 {
522 	emit_data_process_two_src(ctx, is64, rd, rd, rm, A64_ASRV);
523 }
524 
525 #define A64_UBFM 0x2
526 #define A64_SBFM 0x0
527 static void
528 emit_bitfield(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn,
529 	      uint8_t immr, uint8_t imms, uint16_t op)
530 
531 {
532 	uint32_t insn;
533 
534 	insn = (!!is64) << 31;
535 	if (insn)
536 		insn |= 1 << 22; /* Set N bit when is64 is set */
537 	insn |= op << 29;
538 	insn |= 0x26 << 23;
539 	insn |= immr << 16;
540 	insn |= imms << 10;
541 	insn |= rn << 5;
542 	insn |= rd;
543 
544 	emit_insn(ctx, insn, check_reg(rd) || check_reg(rn) ||
545 		  check_immr_imms(is64, immr, imms));
546 }
547 static void
548 emit_lsl(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm)
549 {
550 	const unsigned int width = is64 ? 64 : 32;
551 	uint8_t imms, immr;
552 
553 	immr = (width - imm) & (width - 1);
554 	imms = width - 1 - imm;
555 
556 	emit_bitfield(ctx, is64, rd, rd, immr, imms, A64_UBFM);
557 }
558 
559 static void
560 emit_lsr(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm)
561 {
562 	emit_bitfield(ctx, is64, rd, rd, imm, is64 ? 63 : 31, A64_UBFM);
563 }
564 
565 static void
566 emit_asr(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t imm)
567 {
568 	emit_bitfield(ctx, is64, rd, rd, imm, is64 ? 63 : 31, A64_SBFM);
569 }
570 
571 #define A64_AND 0
572 #define A64_OR 1
573 #define A64_XOR 2
574 static void
575 emit_logical(struct a64_jit_ctx *ctx, bool is64, uint8_t rd,
576 	     uint8_t rm, uint16_t op)
577 {
578 	uint32_t insn;
579 
580 	insn = (!!is64) << 31;
581 	insn |= op << 29;
582 	insn |= 0x50 << 21;
583 	insn |= rm << 16;
584 	insn |= rd << 5;
585 	insn |= rd;
586 
587 	emit_insn(ctx, insn, check_reg(rd) || check_reg(rm));
588 }
589 
590 static void
591 emit_or(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
592 {
593 	emit_logical(ctx, is64, rd, rm, A64_OR);
594 }
595 
596 static void
597 emit_and(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
598 {
599 	emit_logical(ctx, is64, rd, rm, A64_AND);
600 }
601 
602 static void
603 emit_xor(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rm)
604 {
605 	emit_logical(ctx, is64, rd, rm, A64_XOR);
606 }
607 
608 static void
609 emit_msub(struct a64_jit_ctx *ctx, bool is64, uint8_t rd, uint8_t rn,
610 	  uint8_t rm, uint8_t ra)
611 {
612 	uint32_t insn;
613 
614 	insn = (!!is64) << 31;
615 	insn |= 0xd8 << 21;
616 	insn |= rm << 16;
617 	insn |= 0x1 << 15;
618 	insn |= ra << 10;
619 	insn |= rn << 5;
620 	insn |= rd;
621 
622 	emit_insn(ctx, insn, check_reg(rd) || check_reg(rn) || check_reg(rm) ||
623 		  check_reg(ra));
624 }
625 
626 static void
627 emit_mod(struct a64_jit_ctx *ctx, bool is64, uint8_t tmp, uint8_t rd,
628 	 uint8_t rm)
629 {
630 	emit_data_process_two_src(ctx, is64, tmp, rd, rm, A64_UDIV);
631 	emit_msub(ctx, is64, rd, tmp, rm, rd);
632 }
633 
634 static void
635 emit_blr(struct a64_jit_ctx *ctx, uint8_t rn)
636 {
637 	uint32_t insn;
638 
639 	insn = 0xd63f0000;
640 	insn |= rn << 5;
641 
642 	emit_insn(ctx, insn, check_reg(rn));
643 }
644 
645 static void
646 emit_zero_extend(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm)
647 {
648 	switch (imm) {
649 	case 16:
650 		/* Zero-extend 16 bits into 64 bits */
651 		emit_bitfield(ctx, 1, rd, rd, 0, 15, A64_UBFM);
652 		break;
653 	case 32:
654 		/* Zero-extend 32 bits into 64 bits */
655 		emit_bitfield(ctx, 1, rd, rd, 0, 31, A64_UBFM);
656 		break;
657 	case 64:
658 		break;
659 	default:
660 		/* Generate error */
661 		emit_insn(ctx, 0, 1);
662 	}
663 }
664 
665 static void
666 emit_rev(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm)
667 {
668 	uint32_t insn;
669 
670 	insn = 0xdac00000;
671 	insn |= rd << 5;
672 	insn |= rd;
673 
674 	switch (imm) {
675 	case 16:
676 		insn |= 1 << 10;
677 		emit_insn(ctx, insn, check_reg(rd));
678 		emit_zero_extend(ctx, rd, 16);
679 		break;
680 	case 32:
681 		insn |= 2 << 10;
682 		emit_insn(ctx, insn, check_reg(rd));
683 		/* Upper 32 bits already cleared */
684 		break;
685 	case 64:
686 		insn |= 3 << 10;
687 		emit_insn(ctx, insn, check_reg(rd));
688 		break;
689 	default:
690 		/* Generate error */
691 		emit_insn(ctx, insn, 1);
692 	}
693 }
694 
695 static int
696 is_be(void)
697 {
698 #if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
699 	return 1;
700 #else
701 	return 0;
702 #endif
703 }
704 
705 static void
706 emit_be(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm)
707 {
708 	if (is_be())
709 		emit_zero_extend(ctx, rd, imm);
710 	else
711 		emit_rev(ctx, rd, imm);
712 }
713 
714 static void
715 emit_le(struct a64_jit_ctx *ctx, uint8_t rd, int32_t imm)
716 {
717 	if (is_be())
718 		emit_rev(ctx, rd, imm);
719 	else
720 		emit_zero_extend(ctx, rd, imm);
721 }
722 
723 static uint8_t
724 ebpf_to_a64_reg(struct a64_jit_ctx *ctx, uint8_t reg)
725 {
726 	const uint32_t ebpf2a64_has_call[] = {
727 		/* Map A64 R7 register as EBPF return register */
728 		[EBPF_REG_0] = A64_R(7),
729 		/* Map A64 arguments register as EBPF arguments register */
730 		[EBPF_REG_1] = A64_R(0),
731 		[EBPF_REG_2] = A64_R(1),
732 		[EBPF_REG_3] = A64_R(2),
733 		[EBPF_REG_4] = A64_R(3),
734 		[EBPF_REG_5] = A64_R(4),
735 		/* Map A64 callee save register as EBPF callee save register */
736 		[EBPF_REG_6] = A64_R(19),
737 		[EBPF_REG_7] = A64_R(20),
738 		[EBPF_REG_8] = A64_R(21),
739 		[EBPF_REG_9] = A64_R(22),
740 		[EBPF_FP]    = A64_R(25),
741 		/* Map A64 scratch registers as temporary storage */
742 		[TMP_REG_1] = A64_R(9),
743 		[TMP_REG_2] = A64_R(10),
744 		[TMP_REG_3] = A64_R(11),
745 	};
746 
747 	const uint32_t ebpf2a64_no_call[] = {
748 		/* Map A64 R7 register as EBPF return register */
749 		[EBPF_REG_0] = A64_R(7),
750 		/* Map A64 arguments register as EBPF arguments register */
751 		[EBPF_REG_1] = A64_R(0),
752 		[EBPF_REG_2] = A64_R(1),
753 		[EBPF_REG_3] = A64_R(2),
754 		[EBPF_REG_4] = A64_R(3),
755 		[EBPF_REG_5] = A64_R(4),
756 		/*
757 		 * EBPF program does not have EBPF_CALL op code,
758 		 * Map A64 scratch registers as EBPF callee save registers.
759 		 */
760 		[EBPF_REG_6] = A64_R(9),
761 		[EBPF_REG_7] = A64_R(10),
762 		[EBPF_REG_8] = A64_R(11),
763 		[EBPF_REG_9] = A64_R(12),
764 		/* Map A64 FP register as EBPF FP register */
765 		[EBPF_FP]    = A64_FP,
766 		/* Map remaining A64 scratch registers as temporary storage */
767 		[TMP_REG_1] = A64_R(13),
768 		[TMP_REG_2] = A64_R(14),
769 		[TMP_REG_3] = A64_R(15),
770 	};
771 
772 	if (ctx->foundcall)
773 		return ebpf2a64_has_call[reg];
774 	else
775 		return ebpf2a64_no_call[reg];
776 }
777 
778 /*
779  * Procedure call standard for the arm64
780  * -------------------------------------
781  * R0..R7  - Parameter/result registers
782  * R8      - Indirect result location register
783  * R9..R15 - Scratch registers
784  * R15     - Platform Register
785  * R16     - First intra-procedure-call scratch register
786  * R17     - Second intra-procedure-call temporary register
787  * R19-R28 - Callee saved registers
788  * R29     - Frame pointer
789  * R30     - Link register
790  * R31     - Stack pointer
791  */
792 static void
793 emit_prologue_has_call(struct a64_jit_ctx *ctx)
794 {
795 	uint8_t r6, r7, r8, r9, fp;
796 
797 	r6 = ebpf_to_a64_reg(ctx, EBPF_REG_6);
798 	r7 = ebpf_to_a64_reg(ctx, EBPF_REG_7);
799 	r8 = ebpf_to_a64_reg(ctx, EBPF_REG_8);
800 	r9 = ebpf_to_a64_reg(ctx, EBPF_REG_9);
801 	fp = ebpf_to_a64_reg(ctx, EBPF_FP);
802 
803 	/*
804 	 * eBPF prog stack layout
805 	 *
806 	 *                               high
807 	 *       eBPF prologue       0:+-----+ <= original A64_SP
808 	 *                             |FP/LR|
809 	 *                         -16:+-----+ <= current A64_FP
810 	 *    Callee saved registers   | ... |
811 	 *             EBPF_FP =>  -64:+-----+
812 	 *                             |     |
813 	 *       eBPF prog stack       | ... |
814 	 *                             |     |
815 	 * (EBPF_FP - bpf->stack_sz)=> +-----+
816 	 * Pad for A64_SP 16B alignment| PAD |
817 	 * (EBPF_FP - ctx->stack_sz)=> +-----+ <= current A64_SP
818 	 *                             |     |
819 	 *                             | ... | Function call stack
820 	 *                             |     |
821 	 *                             +-----+
822 	 *                              low
823 	 */
824 	emit_stack_push(ctx, A64_FP, A64_LR);
825 	emit_mov_64(ctx, A64_FP, A64_SP);
826 	emit_stack_push(ctx, r6, r7);
827 	emit_stack_push(ctx, r8, r9);
828 	/*
829 	 * There is no requirement to save A64_R(28) in stack. Doing it here,
830 	 * because, A64_SP needs be to 16B aligned and STR vs STP
831 	 * takes same number of cycles(typically).
832 	 */
833 	emit_stack_push(ctx, fp, A64_R(28));
834 	emit_mov_64(ctx, fp, A64_SP);
835 	if (ctx->stack_sz)
836 		emit_sub_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
837 }
838 
839 static void
840 emit_epilogue_has_call(struct a64_jit_ctx *ctx)
841 {
842 	uint8_t r6, r7, r8, r9, fp, r0;
843 
844 	r6 = ebpf_to_a64_reg(ctx, EBPF_REG_6);
845 	r7 = ebpf_to_a64_reg(ctx, EBPF_REG_7);
846 	r8 = ebpf_to_a64_reg(ctx, EBPF_REG_8);
847 	r9 = ebpf_to_a64_reg(ctx, EBPF_REG_9);
848 	fp = ebpf_to_a64_reg(ctx, EBPF_FP);
849 	r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0);
850 
851 	if (ctx->stack_sz)
852 		emit_add_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
853 	emit_stack_pop(ctx, fp, A64_R(28));
854 	emit_stack_pop(ctx, r8, r9);
855 	emit_stack_pop(ctx, r6, r7);
856 	emit_stack_pop(ctx, A64_FP, A64_LR);
857 	emit_mov_64(ctx, A64_R(0), r0);
858 	emit_ret(ctx);
859 }
860 
861 static void
862 emit_prologue_no_call(struct a64_jit_ctx *ctx)
863 {
864 	/*
865 	 * eBPF prog stack layout without EBPF_CALL opcode
866 	 *
867 	 *                               high
868 	 *    eBPF prologue(EBPF_FP) 0:+-----+ <= original A64_SP/current A64_FP
869 	 *                             |     |
870 	 *                             | ... |
871 	 *            eBPF prog stack  |     |
872 	 *                             |     |
873 	 * (EBPF_FP - bpf->stack_sz)=> +-----+
874 	 * Pad for A64_SP 16B alignment| PAD |
875 	 * (EBPF_FP - ctx->stack_sz)=> +-----+ <= current A64_SP
876 	 *                             |     |
877 	 *                             | ... | Function call stack
878 	 *                             |     |
879 	 *                             +-----+
880 	 *                              low
881 	 */
882 	if (ctx->stack_sz) {
883 		emit_mov_64(ctx, A64_FP, A64_SP);
884 		emit_sub_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
885 	}
886 }
887 
888 static void
889 emit_epilogue_no_call(struct a64_jit_ctx *ctx)
890 {
891 	if (ctx->stack_sz)
892 		emit_add_imm_64(ctx, A64_SP, A64_SP, ctx->stack_sz);
893 	emit_mov_64(ctx, A64_R(0), ebpf_to_a64_reg(ctx, EBPF_REG_0));
894 	emit_ret(ctx);
895 }
896 
897 static void
898 emit_prologue(struct a64_jit_ctx *ctx)
899 {
900 	if (ctx->foundcall)
901 		emit_prologue_has_call(ctx);
902 	else
903 		emit_prologue_no_call(ctx);
904 
905 	ctx->program_start = ctx->idx;
906 }
907 
908 static void
909 emit_epilogue(struct a64_jit_ctx *ctx)
910 {
911 	ctx->program_sz = ctx->idx - ctx->program_start;
912 
913 	if (ctx->foundcall)
914 		emit_epilogue_has_call(ctx);
915 	else
916 		emit_epilogue_no_call(ctx);
917 }
918 
919 static void
920 emit_call(struct a64_jit_ctx *ctx, uint8_t tmp, void *func)
921 {
922 	uint8_t r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0);
923 
924 	emit_mov_imm(ctx, 1, tmp, (uint64_t)func);
925 	emit_blr(ctx, tmp);
926 	emit_mov_64(ctx, r0, A64_R(0));
927 }
928 
929 static void
930 emit_cbnz(struct a64_jit_ctx *ctx, bool is64, uint8_t rt, int32_t imm19)
931 {
932 	uint32_t insn, imm;
933 
934 	imm = mask_imm(19, imm19);
935 	insn = (!!is64) << 31;
936 	insn |= 0x35 << 24;
937 	insn |= imm << 5;
938 	insn |= rt;
939 
940 	emit_insn(ctx, insn, check_reg(rt) || check_imm(19, imm19));
941 }
942 
943 static void
944 emit_b(struct a64_jit_ctx *ctx, int32_t imm26)
945 {
946 	uint32_t insn, imm;
947 
948 	imm = mask_imm(26, imm26);
949 	insn = 0x5 << 26;
950 	insn |= imm;
951 
952 	emit_insn(ctx, insn, check_imm(26, imm26));
953 }
954 
955 static void
956 emit_return_zero_if_src_zero(struct a64_jit_ctx *ctx, bool is64, uint8_t src)
957 {
958 	uint8_t r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0);
959 	uint16_t jump_to_epilogue;
960 
961 	emit_cbnz(ctx, is64, src, 3);
962 	emit_mov_imm(ctx, is64, r0, 0);
963 	jump_to_epilogue = (ctx->program_start + ctx->program_sz) - ctx->idx;
964 	emit_b(ctx, jump_to_epilogue);
965 }
966 
967 static void
968 emit_stadd(struct a64_jit_ctx *ctx, bool is64, uint8_t rs, uint8_t rn)
969 {
970 	uint32_t insn;
971 
972 	insn = 0xb820001f;
973 	insn |= (!!is64) << 30;
974 	insn |= rs << 16;
975 	insn |= rn << 5;
976 
977 	emit_insn(ctx, insn, check_reg(rs) || check_reg(rn));
978 }
979 
980 static void
981 emit_ldxr(struct a64_jit_ctx *ctx, bool is64, uint8_t rt, uint8_t rn)
982 {
983 	uint32_t insn;
984 
985 	insn = 0x885f7c00;
986 	insn |= (!!is64) << 30;
987 	insn |= rn << 5;
988 	insn |= rt;
989 
990 	emit_insn(ctx, insn, check_reg(rt) || check_reg(rn));
991 }
992 
993 static void
994 emit_stxr(struct a64_jit_ctx *ctx, bool is64, uint8_t rs, uint8_t rt,
995 	  uint8_t rn)
996 {
997 	uint32_t insn;
998 
999 	insn = 0x88007c00;
1000 	insn |= (!!is64) << 30;
1001 	insn |= rs << 16;
1002 	insn |= rn << 5;
1003 	insn |= rt;
1004 
1005 	emit_insn(ctx, insn, check_reg(rs) || check_reg(rt) || check_reg(rn));
1006 }
1007 
1008 static int
1009 has_atomics(void)
1010 {
1011 	int rc = 0;
1012 
1013 #if defined(__ARM_FEATURE_ATOMICS) || defined(RTE_ARM_FEATURE_ATOMICS)
1014 	rc = 1;
1015 #endif
1016 	return rc;
1017 }
1018 
1019 static void
1020 emit_xadd(struct a64_jit_ctx *ctx, uint8_t op, uint8_t tmp1, uint8_t tmp2,
1021 	  uint8_t tmp3, uint8_t dst, int16_t off, uint8_t src)
1022 {
1023 	bool is64 = (BPF_SIZE(op) == EBPF_DW);
1024 	uint8_t rn;
1025 
1026 	if (off) {
1027 		emit_mov_imm(ctx, 1, tmp1, off);
1028 		emit_add(ctx, 1, tmp1, dst);
1029 		rn = tmp1;
1030 	} else {
1031 		rn = dst;
1032 	}
1033 
1034 	if (has_atomics()) {
1035 		emit_stadd(ctx, is64, src, rn);
1036 	} else {
1037 		emit_ldxr(ctx, is64, tmp2, rn);
1038 		emit_add(ctx, is64, tmp2, src);
1039 		emit_stxr(ctx, is64, tmp3, tmp2, rn);
1040 		emit_cbnz(ctx, is64, tmp3, -3);
1041 	}
1042 }
1043 
1044 #define A64_CMP 0x6b00000f
1045 #define A64_TST 0x6a00000f
1046 static void
1047 emit_cmp_tst(struct a64_jit_ctx *ctx, bool is64, uint8_t rn, uint8_t rm,
1048 	     uint32_t opc)
1049 {
1050 	uint32_t insn;
1051 
1052 	insn = opc;
1053 	insn |= (!!is64) << 31;
1054 	insn |= rm << 16;
1055 	insn |= rn << 5;
1056 
1057 	emit_insn(ctx, insn, check_reg(rn) || check_reg(rm));
1058 }
1059 
1060 static void
1061 emit_cmp(struct a64_jit_ctx *ctx, bool is64, uint8_t rn, uint8_t rm)
1062 {
1063 	emit_cmp_tst(ctx, is64, rn, rm, A64_CMP);
1064 }
1065 
1066 static void
1067 emit_tst(struct a64_jit_ctx *ctx, bool is64, uint8_t rn, uint8_t rm)
1068 {
1069 	emit_cmp_tst(ctx, is64, rn, rm, A64_TST);
1070 }
1071 
1072 static void
1073 emit_b_cond(struct a64_jit_ctx *ctx, uint8_t cond, int32_t imm19)
1074 {
1075 	uint32_t insn, imm;
1076 
1077 	imm = mask_imm(19, imm19);
1078 	insn = 0x15 << 26;
1079 	insn |= imm << 5;
1080 	insn |= cond;
1081 
1082 	emit_insn(ctx, insn, check_cond(cond) || check_imm(19, imm19));
1083 }
1084 
1085 static void
1086 emit_branch(struct a64_jit_ctx *ctx, uint8_t op, uint32_t i, int16_t off)
1087 {
1088 	jump_offset_to_branch_update(ctx, i);
1089 	emit_b_cond(ctx, ebpf_to_a64_cond(op), jump_offset_get(ctx, i, off));
1090 }
1091 
1092 static void
1093 check_program_has_call(struct a64_jit_ctx *ctx, struct rte_bpf *bpf)
1094 {
1095 	const struct ebpf_insn *ins;
1096 	uint8_t op;
1097 	uint32_t i;
1098 
1099 	for (i = 0; i != bpf->prm.nb_ins; i++) {
1100 		ins = bpf->prm.ins + i;
1101 		op = ins->code;
1102 
1103 		switch (op) {
1104 		/* Call imm */
1105 		case (BPF_JMP | EBPF_CALL):
1106 			ctx->foundcall = 1;
1107 			return;
1108 		}
1109 	}
1110 }
1111 
1112 /*
1113  * Walk through eBPF code and translate them to arm64 one.
1114  */
1115 static int
1116 emit(struct a64_jit_ctx *ctx, struct rte_bpf *bpf)
1117 {
1118 	uint8_t op, dst, src, tmp1, tmp2, tmp3;
1119 	const struct ebpf_insn *ins;
1120 	uint64_t u64;
1121 	int16_t off;
1122 	int32_t imm;
1123 	uint32_t i;
1124 	bool is64;
1125 	int rc;
1126 
1127 	/* Reset context fields */
1128 	ctx->idx = 0;
1129 	/* arm64 SP must be aligned to 16 */
1130 	ctx->stack_sz = RTE_ALIGN_MUL_CEIL(bpf->stack_sz, 16);
1131 	tmp1 = ebpf_to_a64_reg(ctx, TMP_REG_1);
1132 	tmp2 = ebpf_to_a64_reg(ctx, TMP_REG_2);
1133 	tmp3 = ebpf_to_a64_reg(ctx, TMP_REG_3);
1134 
1135 	emit_prologue(ctx);
1136 
1137 	for (i = 0; i != bpf->prm.nb_ins; i++) {
1138 
1139 		jump_offset_update(ctx, i);
1140 		ins = bpf->prm.ins + i;
1141 		op = ins->code;
1142 		off = ins->off;
1143 		imm = ins->imm;
1144 
1145 		dst = ebpf_to_a64_reg(ctx, ins->dst_reg);
1146 		src = ebpf_to_a64_reg(ctx, ins->src_reg);
1147 		is64 = (BPF_CLASS(op) == EBPF_ALU64);
1148 
1149 		switch (op) {
1150 		/* dst = src */
1151 		case (BPF_ALU | EBPF_MOV | BPF_X):
1152 		case (EBPF_ALU64 | EBPF_MOV | BPF_X):
1153 			emit_mov(ctx, is64, dst, src);
1154 			break;
1155 		/* dst = imm */
1156 		case (BPF_ALU | EBPF_MOV | BPF_K):
1157 		case (EBPF_ALU64 | EBPF_MOV | BPF_K):
1158 			emit_mov_imm(ctx, is64, dst, imm);
1159 			break;
1160 		/* dst += src */
1161 		case (BPF_ALU | BPF_ADD | BPF_X):
1162 		case (EBPF_ALU64 | BPF_ADD | BPF_X):
1163 			emit_add(ctx, is64, dst, src);
1164 			break;
1165 		/* dst += imm */
1166 		case (BPF_ALU | BPF_ADD | BPF_K):
1167 		case (EBPF_ALU64 | BPF_ADD | BPF_K):
1168 			emit_mov_imm(ctx, is64, tmp1, imm);
1169 			emit_add(ctx, is64, dst, tmp1);
1170 			break;
1171 		/* dst -= src */
1172 		case (BPF_ALU | BPF_SUB | BPF_X):
1173 		case (EBPF_ALU64 | BPF_SUB | BPF_X):
1174 			emit_sub(ctx, is64, dst, src);
1175 			break;
1176 		/* dst -= imm */
1177 		case (BPF_ALU | BPF_SUB | BPF_K):
1178 		case (EBPF_ALU64 | BPF_SUB | BPF_K):
1179 			emit_mov_imm(ctx, is64, tmp1, imm);
1180 			emit_sub(ctx, is64, dst, tmp1);
1181 			break;
1182 		/* dst *= src */
1183 		case (BPF_ALU | BPF_MUL | BPF_X):
1184 		case (EBPF_ALU64 | BPF_MUL | BPF_X):
1185 			emit_mul(ctx, is64, dst, src);
1186 			break;
1187 		/* dst *= imm */
1188 		case (BPF_ALU | BPF_MUL | BPF_K):
1189 		case (EBPF_ALU64 | BPF_MUL | BPF_K):
1190 			emit_mov_imm(ctx, is64, tmp1, imm);
1191 			emit_mul(ctx, is64, dst, tmp1);
1192 			break;
1193 		/* dst /= src */
1194 		case (BPF_ALU | BPF_DIV | BPF_X):
1195 		case (EBPF_ALU64 | BPF_DIV | BPF_X):
1196 			emit_return_zero_if_src_zero(ctx, is64, src);
1197 			emit_div(ctx, is64, dst, src);
1198 			break;
1199 		/* dst /= imm */
1200 		case (BPF_ALU | BPF_DIV | BPF_K):
1201 		case (EBPF_ALU64 | BPF_DIV | BPF_K):
1202 			emit_mov_imm(ctx, is64, tmp1, imm);
1203 			emit_div(ctx, is64, dst, tmp1);
1204 			break;
1205 		/* dst %= src */
1206 		case (BPF_ALU | BPF_MOD | BPF_X):
1207 		case (EBPF_ALU64 | BPF_MOD | BPF_X):
1208 			emit_return_zero_if_src_zero(ctx, is64, src);
1209 			emit_mod(ctx, is64, tmp1, dst, src);
1210 			break;
1211 		/* dst %= imm */
1212 		case (BPF_ALU | BPF_MOD | BPF_K):
1213 		case (EBPF_ALU64 | BPF_MOD | BPF_K):
1214 			emit_mov_imm(ctx, is64, tmp1, imm);
1215 			emit_mod(ctx, is64, tmp2, dst, tmp1);
1216 			break;
1217 		/* dst |= src */
1218 		case (BPF_ALU | BPF_OR | BPF_X):
1219 		case (EBPF_ALU64 | BPF_OR | BPF_X):
1220 			emit_or(ctx, is64, dst, src);
1221 			break;
1222 		/* dst |= imm */
1223 		case (BPF_ALU | BPF_OR | BPF_K):
1224 		case (EBPF_ALU64 | BPF_OR | BPF_K):
1225 			emit_mov_imm(ctx, is64, tmp1, imm);
1226 			emit_or(ctx, is64, dst, tmp1);
1227 			break;
1228 		/* dst &= src */
1229 		case (BPF_ALU | BPF_AND | BPF_X):
1230 		case (EBPF_ALU64 | BPF_AND | BPF_X):
1231 			emit_and(ctx, is64, dst, src);
1232 			break;
1233 		/* dst &= imm */
1234 		case (BPF_ALU | BPF_AND | BPF_K):
1235 		case (EBPF_ALU64 | BPF_AND | BPF_K):
1236 			emit_mov_imm(ctx, is64, tmp1, imm);
1237 			emit_and(ctx, is64, dst, tmp1);
1238 			break;
1239 		/* dst ^= src */
1240 		case (BPF_ALU | BPF_XOR | BPF_X):
1241 		case (EBPF_ALU64 | BPF_XOR | BPF_X):
1242 			emit_xor(ctx, is64, dst, src);
1243 			break;
1244 		/* dst ^= imm */
1245 		case (BPF_ALU | BPF_XOR | BPF_K):
1246 		case (EBPF_ALU64 | BPF_XOR | BPF_K):
1247 			emit_mov_imm(ctx, is64, tmp1, imm);
1248 			emit_xor(ctx, is64, dst, tmp1);
1249 			break;
1250 		/* dst = -dst */
1251 		case (BPF_ALU | BPF_NEG):
1252 		case (EBPF_ALU64 | BPF_NEG):
1253 			emit_neg(ctx, is64, dst);
1254 			break;
1255 		/* dst <<= src */
1256 		case BPF_ALU | BPF_LSH | BPF_X:
1257 		case EBPF_ALU64 | BPF_LSH | BPF_X:
1258 			emit_lslv(ctx, is64, dst, src);
1259 			break;
1260 		/* dst <<= imm */
1261 		case BPF_ALU | BPF_LSH | BPF_K:
1262 		case EBPF_ALU64 | BPF_LSH | BPF_K:
1263 			emit_lsl(ctx, is64, dst, imm);
1264 			break;
1265 		/* dst >>= src */
1266 		case BPF_ALU | BPF_RSH | BPF_X:
1267 		case EBPF_ALU64 | BPF_RSH | BPF_X:
1268 			emit_lsrv(ctx, is64, dst, src);
1269 			break;
1270 		/* dst >>= imm */
1271 		case BPF_ALU | BPF_RSH | BPF_K:
1272 		case EBPF_ALU64 | BPF_RSH | BPF_K:
1273 			emit_lsr(ctx, is64, dst, imm);
1274 			break;
1275 		/* dst >>= src (arithmetic) */
1276 		case BPF_ALU | EBPF_ARSH | BPF_X:
1277 		case EBPF_ALU64 | EBPF_ARSH | BPF_X:
1278 			emit_asrv(ctx, is64, dst, src);
1279 			break;
1280 		/* dst >>= imm (arithmetic) */
1281 		case BPF_ALU | EBPF_ARSH | BPF_K:
1282 		case EBPF_ALU64 | EBPF_ARSH | BPF_K:
1283 			emit_asr(ctx, is64, dst, imm);
1284 			break;
1285 		/* dst = be##imm(dst) */
1286 		case (BPF_ALU | EBPF_END | EBPF_TO_BE):
1287 			emit_be(ctx, dst, imm);
1288 			break;
1289 		/* dst = le##imm(dst) */
1290 		case (BPF_ALU | EBPF_END | EBPF_TO_LE):
1291 			emit_le(ctx, dst, imm);
1292 			break;
1293 		/* dst = *(size *) (src + off) */
1294 		case (BPF_LDX | BPF_MEM | BPF_B):
1295 		case (BPF_LDX | BPF_MEM | BPF_H):
1296 		case (BPF_LDX | BPF_MEM | BPF_W):
1297 		case (BPF_LDX | BPF_MEM | EBPF_DW):
1298 			emit_mov_imm(ctx, 1, tmp1, off);
1299 			emit_ldr(ctx, BPF_SIZE(op), dst, src, tmp1);
1300 			break;
1301 		/* dst = imm64 */
1302 		case (BPF_LD | BPF_IMM | EBPF_DW):
1303 			u64 = ((uint64_t)ins[1].imm << 32) | (uint32_t)imm;
1304 			emit_mov_imm(ctx, 1, dst, u64);
1305 			i++;
1306 			break;
1307 		/* *(size *)(dst + off) = src */
1308 		case (BPF_STX | BPF_MEM | BPF_B):
1309 		case (BPF_STX | BPF_MEM | BPF_H):
1310 		case (BPF_STX | BPF_MEM | BPF_W):
1311 		case (BPF_STX | BPF_MEM | EBPF_DW):
1312 			emit_mov_imm(ctx, 1, tmp1, off);
1313 			emit_str(ctx, BPF_SIZE(op), src, dst, tmp1);
1314 			break;
1315 		/* *(size *)(dst + off) = imm */
1316 		case (BPF_ST | BPF_MEM | BPF_B):
1317 		case (BPF_ST | BPF_MEM | BPF_H):
1318 		case (BPF_ST | BPF_MEM | BPF_W):
1319 		case (BPF_ST | BPF_MEM | EBPF_DW):
1320 			emit_mov_imm(ctx, 1, tmp1, imm);
1321 			emit_mov_imm(ctx, 1, tmp2, off);
1322 			emit_str(ctx, BPF_SIZE(op), tmp1, dst, tmp2);
1323 			break;
1324 		/* STX XADD: lock *(size *)(dst + off) += src */
1325 		case (BPF_STX | EBPF_XADD | BPF_W):
1326 		case (BPF_STX | EBPF_XADD | EBPF_DW):
1327 			emit_xadd(ctx, op, tmp1, tmp2, tmp3, dst, off, src);
1328 			break;
1329 		/* PC += off */
1330 		case (BPF_JMP | BPF_JA):
1331 			emit_b(ctx, jump_offset_get(ctx, i, off));
1332 			break;
1333 		/* PC += off if dst COND imm */
1334 		case (BPF_JMP | BPF_JEQ | BPF_K):
1335 		case (BPF_JMP | EBPF_JNE | BPF_K):
1336 		case (BPF_JMP | BPF_JGT | BPF_K):
1337 		case (BPF_JMP | EBPF_JLT | BPF_K):
1338 		case (BPF_JMP | BPF_JGE | BPF_K):
1339 		case (BPF_JMP | EBPF_JLE | BPF_K):
1340 		case (BPF_JMP | EBPF_JSGT | BPF_K):
1341 		case (BPF_JMP | EBPF_JSLT | BPF_K):
1342 		case (BPF_JMP | EBPF_JSGE | BPF_K):
1343 		case (BPF_JMP | EBPF_JSLE | BPF_K):
1344 			emit_mov_imm(ctx, 1, tmp1, imm);
1345 			emit_cmp(ctx, 1, dst, tmp1);
1346 			emit_branch(ctx, op, i, off);
1347 			break;
1348 		case (BPF_JMP | BPF_JSET | BPF_K):
1349 			emit_mov_imm(ctx, 1, tmp1, imm);
1350 			emit_tst(ctx, 1, dst, tmp1);
1351 			emit_branch(ctx, op, i, off);
1352 			break;
1353 		/* PC += off if dst COND src */
1354 		case (BPF_JMP | BPF_JEQ | BPF_X):
1355 		case (BPF_JMP | EBPF_JNE | BPF_X):
1356 		case (BPF_JMP | BPF_JGT | BPF_X):
1357 		case (BPF_JMP | EBPF_JLT | BPF_X):
1358 		case (BPF_JMP | BPF_JGE | BPF_X):
1359 		case (BPF_JMP | EBPF_JLE | BPF_X):
1360 		case (BPF_JMP | EBPF_JSGT | BPF_X):
1361 		case (BPF_JMP | EBPF_JSLT | BPF_X):
1362 		case (BPF_JMP | EBPF_JSGE | BPF_X):
1363 		case (BPF_JMP | EBPF_JSLE | BPF_X):
1364 			emit_cmp(ctx, 1, dst, src);
1365 			emit_branch(ctx, op, i, off);
1366 			break;
1367 		case (BPF_JMP | BPF_JSET | BPF_X):
1368 			emit_tst(ctx, 1, dst, src);
1369 			emit_branch(ctx, op, i, off);
1370 			break;
1371 		/* Call imm */
1372 		case (BPF_JMP | EBPF_CALL):
1373 			emit_call(ctx, tmp1, bpf->prm.xsym[ins->imm].func.val);
1374 			break;
1375 		/* Return r0 */
1376 		case (BPF_JMP | EBPF_EXIT):
1377 			emit_epilogue(ctx);
1378 			break;
1379 		default:
1380 			RTE_BPF_LOG(ERR,
1381 				"%s(%p): invalid opcode %#x at pc: %u;\n",
1382 				__func__, bpf, ins->code, i);
1383 			return -EINVAL;
1384 		}
1385 	}
1386 	rc = check_invalid_args(ctx, ctx->idx);
1387 
1388 	return rc;
1389 }
1390 
1391 /*
1392  * Produce a native ISA version of the given BPF code.
1393  */
1394 int
1395 bpf_jit_arm64(struct rte_bpf *bpf)
1396 {
1397 	struct a64_jit_ctx ctx;
1398 	size_t size;
1399 	int rc;
1400 
1401 	/* Init JIT context */
1402 	memset(&ctx, 0, sizeof(ctx));
1403 
1404 	/* Initialize the memory for eBPF to a64 insn offset map for jump */
1405 	rc = jump_offset_init(&ctx, bpf);
1406 	if (rc)
1407 		goto error;
1408 
1409 	/* Find eBPF program has call class or not */
1410 	check_program_has_call(&ctx, bpf);
1411 
1412 	/* First pass to calculate total code size and valid jump offsets */
1413 	rc = emit(&ctx, bpf);
1414 	if (rc)
1415 		goto finish;
1416 
1417 	size = ctx.idx * sizeof(uint32_t);
1418 	/* Allocate JIT program memory */
1419 	ctx.ins = mmap(NULL, size, PROT_READ | PROT_WRITE,
1420 			       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1421 	if (ctx.ins == MAP_FAILED) {
1422 		rc = -ENOMEM;
1423 		goto finish;
1424 	}
1425 
1426 	/* Second pass to generate code */
1427 	rc = emit(&ctx, bpf);
1428 	if (rc)
1429 		goto munmap;
1430 
1431 	rc = mprotect(ctx.ins, size, PROT_READ | PROT_EXEC) != 0;
1432 	if (rc) {
1433 		rc = -errno;
1434 		goto munmap;
1435 	}
1436 
1437 	/* Flush the icache */
1438 	__builtin___clear_cache((char *)ctx.ins, (char *)(ctx.ins + ctx.idx));
1439 
1440 	bpf->jit.func = (void *)ctx.ins;
1441 	bpf->jit.sz = size;
1442 
1443 	goto finish;
1444 
1445 munmap:
1446 	munmap(ctx.ins, size);
1447 finish:
1448 	jump_offset_fini(&ctx);
1449 error:
1450 	return rc;
1451 }
1452