1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2021 Microsoft Corporation 3 * 4 * Based on bpf_convert_filter() in the Linux kernel sources 5 * and filter2xdp. 6 * 7 * Licensed as BSD with permission original authors. 8 * Copyright (C) 2017 Tobias Klauser 9 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com 10 */ 11 12 #include <assert.h> 13 #include <errno.h> 14 #include <stdbool.h> 15 #include <stddef.h> 16 #include <stdint.h> 17 #include <stdlib.h> 18 #include <string.h> 19 20 #include <rte_common.h> 21 #include <rte_bpf.h> 22 #include <rte_log.h> 23 #include <rte_malloc.h> 24 #include <rte_errno.h> 25 26 #include <pcap/pcap.h> 27 #include <pcap/bpf.h> 28 29 #include "bpf_impl.h" 30 #include "bpf_def.h" 31 32 #ifndef BPF_MAXINSNS 33 #define BPF_MAXINSNS 4096 34 #endif 35 36 /* 37 * Linux socket filter uses negative absolute offsets to 38 * reference ancillary data. 39 */ 40 #define SKF_AD_OFF (-0x1000) 41 #define SKF_AD_PROTOCOL 0 42 #define SKF_AD_PKTTYPE 4 43 #define SKF_AD_IFINDEX 8 44 #define SKF_AD_NLATTR 12 45 #define SKF_AD_NLATTR_NEST 16 46 #define SKF_AD_MARK 20 47 #define SKF_AD_QUEUE 24 48 #define SKF_AD_HATYPE 28 49 #define SKF_AD_RXHASH 32 50 #define SKF_AD_CPU 36 51 #define SKF_AD_ALU_XOR_X 40 52 #define SKF_AD_VLAN_TAG 44 53 #define SKF_AD_VLAN_TAG_PRESENT 48 54 #define SKF_AD_PAY_OFFSET 52 55 #define SKF_AD_RANDOM 56 56 #define SKF_AD_VLAN_TPID 60 57 #define SKF_AD_MAX 64 58 59 /* ArgX, context and stack frame pointer register positions. Note, 60 * Arg1, Arg2, Arg3, etc are used as argument mappings of function 61 * calls in BPF_CALL instruction. 62 */ 63 #define BPF_REG_ARG1 EBPF_REG_1 64 #define BPF_REG_ARG2 EBPF_REG_2 65 #define BPF_REG_ARG3 EBPF_REG_3 66 #define BPF_REG_ARG4 EBPF_REG_4 67 #define BPF_REG_ARG5 EBPF_REG_5 68 #define BPF_REG_CTX EBPF_REG_6 69 #define BPF_REG_FP EBPF_REG_10 70 71 /* Additional register mappings for converted user programs. */ 72 #define BPF_REG_A EBPF_REG_0 73 #define BPF_REG_X EBPF_REG_7 74 #define BPF_REG_TMP EBPF_REG_8 75 76 /* Helper macros for filter block array initializers. */ 77 78 /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ 79 80 #define EBPF_ALU64_REG(OP, DST, SRC) \ 81 ((struct ebpf_insn) { \ 82 .code = EBPF_ALU64 | BPF_OP(OP) | BPF_X, \ 83 .dst_reg = DST, \ 84 .src_reg = SRC, \ 85 .off = 0, \ 86 .imm = 0 }) 87 88 #define BPF_ALU32_REG(OP, DST, SRC) \ 89 ((struct ebpf_insn) { \ 90 .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ 91 .dst_reg = DST, \ 92 .src_reg = SRC, \ 93 .off = 0, \ 94 .imm = 0 }) 95 96 /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ 97 98 #define BPF_ALU32_IMM(OP, DST, IMM) \ 99 ((struct ebpf_insn) { \ 100 .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ 101 .dst_reg = DST, \ 102 .src_reg = 0, \ 103 .off = 0, \ 104 .imm = IMM }) 105 106 /* Short form of mov, dst_reg = src_reg */ 107 108 #define BPF_MOV64_REG(DST, SRC) \ 109 ((struct ebpf_insn) { \ 110 .code = EBPF_ALU64 | EBPF_MOV | BPF_X, \ 111 .dst_reg = DST, \ 112 .src_reg = SRC, \ 113 .off = 0, \ 114 .imm = 0 }) 115 116 #define BPF_MOV32_REG(DST, SRC) \ 117 ((struct ebpf_insn) { \ 118 .code = BPF_ALU | EBPF_MOV | BPF_X, \ 119 .dst_reg = DST, \ 120 .src_reg = SRC, \ 121 .off = 0, \ 122 .imm = 0 }) 123 124 /* Short form of mov, dst_reg = imm32 */ 125 126 #define BPF_MOV32_IMM(DST, IMM) \ 127 ((struct ebpf_insn) { \ 128 .code = BPF_ALU | EBPF_MOV | BPF_K, \ 129 .dst_reg = DST, \ 130 .src_reg = 0, \ 131 .off = 0, \ 132 .imm = IMM }) 133 134 /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */ 135 136 #define BPF_MOV32_RAW(TYPE, DST, SRC, IMM) \ 137 ((struct ebpf_insn) { \ 138 .code = BPF_ALU | EBPF_MOV | BPF_SRC(TYPE), \ 139 .dst_reg = DST, \ 140 .src_reg = SRC, \ 141 .off = 0, \ 142 .imm = IMM }) 143 144 /* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ 145 146 #define BPF_LD_ABS(SIZE, IMM) \ 147 ((struct ebpf_insn) { \ 148 .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ 149 .dst_reg = 0, \ 150 .src_reg = 0, \ 151 .off = 0, \ 152 .imm = IMM }) 153 154 /* Memory load, dst_reg = *(uint *) (src_reg + off16) */ 155 156 #define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ 157 ((struct ebpf_insn) { \ 158 .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ 159 .dst_reg = DST, \ 160 .src_reg = SRC, \ 161 .off = OFF, \ 162 .imm = 0 }) 163 164 /* Memory store, *(uint *) (dst_reg + off16) = src_reg */ 165 166 #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ 167 ((struct ebpf_insn) { \ 168 .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ 169 .dst_reg = DST, \ 170 .src_reg = SRC, \ 171 .off = OFF, \ 172 .imm = 0 }) 173 174 /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ 175 176 #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ 177 ((struct ebpf_insn) { \ 178 .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ 179 .dst_reg = DST, \ 180 .src_reg = 0, \ 181 .off = OFF, \ 182 .imm = IMM }) 183 184 /* Raw code statement block */ 185 186 #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ 187 ((struct ebpf_insn) { \ 188 .code = CODE, \ 189 .dst_reg = DST, \ 190 .src_reg = SRC, \ 191 .off = OFF, \ 192 .imm = IMM }) 193 194 /* Program exit */ 195 196 #define BPF_EXIT_INSN() \ 197 ((struct ebpf_insn) { \ 198 .code = BPF_JMP | EBPF_EXIT, \ 199 .dst_reg = 0, \ 200 .src_reg = 0, \ 201 .off = 0, \ 202 .imm = 0 }) 203 204 /* 205 * Placeholder to convert BPF extensions like length and VLAN tag 206 * If and when DPDK BPF supports them. 207 */ 208 static bool convert_bpf_load(const struct bpf_insn *fp, 209 struct ebpf_insn **new_insnp __rte_unused) 210 { 211 switch (fp->k) { 212 case SKF_AD_OFF + SKF_AD_PROTOCOL: 213 case SKF_AD_OFF + SKF_AD_PKTTYPE: 214 case SKF_AD_OFF + SKF_AD_IFINDEX: 215 case SKF_AD_OFF + SKF_AD_HATYPE: 216 case SKF_AD_OFF + SKF_AD_MARK: 217 case SKF_AD_OFF + SKF_AD_RXHASH: 218 case SKF_AD_OFF + SKF_AD_QUEUE: 219 case SKF_AD_OFF + SKF_AD_VLAN_TAG: 220 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: 221 case SKF_AD_OFF + SKF_AD_VLAN_TPID: 222 case SKF_AD_OFF + SKF_AD_PAY_OFFSET: 223 case SKF_AD_OFF + SKF_AD_NLATTR: 224 case SKF_AD_OFF + SKF_AD_NLATTR_NEST: 225 case SKF_AD_OFF + SKF_AD_CPU: 226 case SKF_AD_OFF + SKF_AD_RANDOM: 227 case SKF_AD_OFF + SKF_AD_ALU_XOR_X: 228 /* Linux has special negative offsets to access meta-data. */ 229 RTE_BPF_LOG_LINE(ERR, 230 "rte_bpf_convert: socket offset %d not supported", 231 fp->k - SKF_AD_OFF); 232 return true; 233 default: 234 return false; 235 } 236 } 237 238 static int bpf_convert_filter(const struct bpf_insn *prog, size_t len, 239 struct ebpf_insn *new_prog, uint32_t *new_len) 240 { 241 unsigned int pass = 0; 242 size_t new_flen = 0, target, i; 243 struct ebpf_insn *new_insn; 244 const struct bpf_insn *fp; 245 int *addrs = NULL; 246 uint8_t bpf_src; 247 248 if (len > BPF_MAXINSNS) { 249 RTE_BPF_LOG_LINE(ERR, "%s: cBPF program too long (%zu insns)", 250 __func__, len); 251 return -EINVAL; 252 } 253 254 /* On second pass, allocate the new program */ 255 if (new_prog) { 256 addrs = calloc(len, sizeof(*addrs)); 257 if (addrs == NULL) 258 return -ENOMEM; 259 } 260 261 do_pass: 262 new_insn = new_prog; 263 fp = prog; 264 265 /* Classic BPF related prologue emission. */ 266 if (new_insn) { 267 /* Classic BPF expects A and X to be reset first. These need 268 * to be guaranteed to be the first two instructions. 269 */ 270 *new_insn++ = EBPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); 271 *new_insn++ = EBPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X); 272 273 /* All programs must keep CTX in callee saved BPF_REG_CTX. 274 * In eBPF case it's done by the compiler, here we need to 275 * do this ourself. Initial CTX is present in BPF_REG_ARG1. 276 */ 277 *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); 278 } else { 279 new_insn += 3; 280 } 281 282 for (i = 0; i < len; fp++, i++) { 283 struct ebpf_insn tmp_insns[6] = { }; 284 struct ebpf_insn *insn = tmp_insns; 285 286 if (addrs) 287 addrs[i] = new_insn - new_prog; 288 289 switch (fp->code) { 290 /* Absolute loads are how classic BPF accesses skb */ 291 case BPF_LD | BPF_ABS | BPF_W: 292 case BPF_LD | BPF_ABS | BPF_H: 293 case BPF_LD | BPF_ABS | BPF_B: 294 if (convert_bpf_load(fp, &insn)) 295 goto err; 296 297 *insn = BPF_RAW_INSN(fp->code, 0, 0, 0, fp->k); 298 break; 299 300 case BPF_ALU | BPF_DIV | BPF_X: 301 case BPF_ALU | BPF_MOD | BPF_X: 302 /* For cBPF, don't cause floating point exception */ 303 *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X); 304 *insn++ = BPF_JMP_IMM(EBPF_JNE, BPF_REG_X, 0, 2); 305 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); 306 *insn++ = BPF_EXIT_INSN(); 307 /* fallthrough */ 308 case BPF_ALU | BPF_ADD | BPF_X: 309 case BPF_ALU | BPF_ADD | BPF_K: 310 case BPF_ALU | BPF_SUB | BPF_X: 311 case BPF_ALU | BPF_SUB | BPF_K: 312 case BPF_ALU | BPF_AND | BPF_X: 313 case BPF_ALU | BPF_AND | BPF_K: 314 case BPF_ALU | BPF_OR | BPF_X: 315 case BPF_ALU | BPF_OR | BPF_K: 316 case BPF_ALU | BPF_LSH | BPF_X: 317 case BPF_ALU | BPF_LSH | BPF_K: 318 case BPF_ALU | BPF_RSH | BPF_X: 319 case BPF_ALU | BPF_RSH | BPF_K: 320 case BPF_ALU | BPF_XOR | BPF_X: 321 case BPF_ALU | BPF_XOR | BPF_K: 322 case BPF_ALU | BPF_MUL | BPF_X: 323 case BPF_ALU | BPF_MUL | BPF_K: 324 case BPF_ALU | BPF_DIV | BPF_K: 325 case BPF_ALU | BPF_MOD | BPF_K: 326 case BPF_ALU | BPF_NEG: 327 case BPF_LD | BPF_IND | BPF_W: 328 case BPF_LD | BPF_IND | BPF_H: 329 case BPF_LD | BPF_IND | BPF_B: 330 /* All arithmetic insns map as-is. */ 331 insn->code = fp->code; 332 insn->dst_reg = BPF_REG_A; 333 bpf_src = BPF_SRC(fp->code); 334 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0; 335 insn->off = 0; 336 insn->imm = fp->k; 337 break; 338 339 /* Jump transformation cannot use BPF block macros 340 * everywhere as offset calculation and target updates 341 * require a bit more work than the rest, i.e. jump 342 * opcodes map as-is, but offsets need adjustment. 343 */ 344 345 #define BPF_EMIT_JMP \ 346 do { \ 347 if (target >= len) \ 348 goto err; \ 349 insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \ 350 /* Adjust pc relative offset for 2nd or 3rd insn. */ \ 351 insn->off -= insn - tmp_insns; \ 352 } while (0) 353 354 case BPF_JMP | BPF_JA: 355 target = i + fp->k + 1; 356 insn->code = fp->code; 357 BPF_EMIT_JMP; 358 break; 359 360 case BPF_JMP | BPF_JEQ | BPF_K: 361 case BPF_JMP | BPF_JEQ | BPF_X: 362 case BPF_JMP | BPF_JSET | BPF_K: 363 case BPF_JMP | BPF_JSET | BPF_X: 364 case BPF_JMP | BPF_JGT | BPF_K: 365 case BPF_JMP | BPF_JGT | BPF_X: 366 case BPF_JMP | BPF_JGE | BPF_K: 367 case BPF_JMP | BPF_JGE | BPF_X: 368 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) { 369 /* BPF immediates are signed, zero extend 370 * immediate into tmp register and use it 371 * in compare insn. 372 */ 373 *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k); 374 375 insn->dst_reg = BPF_REG_A; 376 insn->src_reg = BPF_REG_TMP; 377 bpf_src = BPF_X; 378 } else { 379 insn->dst_reg = BPF_REG_A; 380 insn->imm = fp->k; 381 bpf_src = BPF_SRC(fp->code); 382 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0; 383 } 384 385 /* Common case where 'jump_false' is next insn. */ 386 if (fp->jf == 0) { 387 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; 388 target = i + fp->jt + 1; 389 BPF_EMIT_JMP; 390 break; 391 } 392 393 /* Convert JEQ into JNE when 'jump_true' is next insn. */ 394 if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) { 395 insn->code = BPF_JMP | EBPF_JNE | bpf_src; 396 target = i + fp->jf + 1; 397 BPF_EMIT_JMP; 398 break; 399 } 400 401 /* Other jumps are mapped into two insns: Jxx and JA. */ 402 target = i + fp->jt + 1; 403 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; 404 BPF_EMIT_JMP; 405 insn++; 406 407 insn->code = BPF_JMP | BPF_JA; 408 target = i + fp->jf + 1; 409 BPF_EMIT_JMP; 410 break; 411 412 /* ldxb 4 * ([14] & 0xf) is remapped into 6 insns. */ 413 case BPF_LDX | BPF_MSH | BPF_B: 414 /* tmp = A */ 415 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A); 416 /* A = BPF_R0 = *(u8 *) (skb->data + K) */ 417 *insn++ = BPF_LD_ABS(BPF_B, fp->k); 418 /* A &= 0xf */ 419 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); 420 /* A <<= 2 */ 421 *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); 422 /* X = A */ 423 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); 424 /* A = tmp */ 425 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); 426 break; 427 428 /* RET_K is remapped into 2 insns. RET_A case doesn't need an 429 * extra mov as EBPF_REG_0 is already mapped into BPF_REG_A. 430 */ 431 case BPF_RET | BPF_A: 432 case BPF_RET | BPF_K: 433 if (BPF_RVAL(fp->code) == BPF_K) { 434 *insn++ = BPF_MOV32_RAW(BPF_K, EBPF_REG_0, 435 0, fp->k); 436 } 437 *insn = BPF_EXIT_INSN(); 438 break; 439 440 /* Store to stack. */ 441 case BPF_ST: 442 case BPF_STX: 443 *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) == 444 BPF_ST ? BPF_REG_A : BPF_REG_X, 445 -(BPF_MEMWORDS - fp->k) * 4); 446 break; 447 448 /* Load from stack. */ 449 case BPF_LD | BPF_MEM: 450 case BPF_LDX | BPF_MEM: 451 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? 452 BPF_REG_A : BPF_REG_X, BPF_REG_FP, 453 -(BPF_MEMWORDS - fp->k) * 4); 454 break; 455 456 /* A = K or X = K */ 457 case BPF_LD | BPF_IMM: 458 case BPF_LDX | BPF_IMM: 459 *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ? 460 BPF_REG_A : BPF_REG_X, fp->k); 461 break; 462 463 /* X = A */ 464 case BPF_MISC | BPF_TAX: 465 *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); 466 break; 467 468 /* A = X */ 469 case BPF_MISC | BPF_TXA: 470 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X); 471 break; 472 473 /* A = mbuf->len or X = mbuf->len */ 474 case BPF_LD | BPF_W | BPF_LEN: 475 case BPF_LDX | BPF_W | BPF_LEN: 476 /* BPF_ABS/BPF_IND implicitly expect mbuf ptr in R6 */ 477 478 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? 479 BPF_REG_A : BPF_REG_X, BPF_REG_CTX, 480 offsetof(struct rte_mbuf, pkt_len)); 481 break; 482 483 /* Unknown instruction. */ 484 default: 485 RTE_BPF_LOG_LINE(ERR, "%s: Unknown instruction!: %#x", 486 __func__, fp->code); 487 goto err; 488 } 489 490 insn++; 491 if (new_prog) 492 memcpy(new_insn, tmp_insns, 493 sizeof(*insn) * (insn - tmp_insns)); 494 new_insn += insn - tmp_insns; 495 } 496 497 if (!new_prog) { 498 /* Only calculating new length. */ 499 *new_len = new_insn - new_prog; 500 return 0; 501 } 502 503 pass++; 504 if ((ptrdiff_t)new_flen != new_insn - new_prog) { 505 new_flen = new_insn - new_prog; 506 if (pass > 2) 507 goto err; 508 goto do_pass; 509 } 510 511 free(addrs); 512 assert(*new_len == new_flen); 513 514 return 0; 515 err: 516 free(addrs); 517 return -1; 518 } 519 520 struct rte_bpf_prm * 521 rte_bpf_convert(const struct bpf_program *prog) 522 { 523 struct rte_bpf_prm *prm = NULL; 524 struct ebpf_insn *ebpf = NULL; 525 uint32_t ebpf_len = 0; 526 int ret; 527 528 if (prog == NULL) { 529 RTE_BPF_LOG_LINE(ERR, "%s: NULL program", __func__); 530 rte_errno = EINVAL; 531 return NULL; 532 } 533 534 /* 1st pass: calculate the eBPF program length */ 535 ret = bpf_convert_filter(prog->bf_insns, prog->bf_len, NULL, &ebpf_len); 536 if (ret < 0) { 537 RTE_BPF_LOG_LINE(ERR, "%s: cannot get eBPF length", __func__); 538 rte_errno = -ret; 539 return NULL; 540 } 541 542 RTE_BPF_LOG_LINE(DEBUG, "%s: prog len cBPF=%u -> eBPF=%u", 543 __func__, prog->bf_len, ebpf_len); 544 545 prm = rte_zmalloc("bpf_filter", 546 sizeof(*prm) + ebpf_len * sizeof(*ebpf), 0); 547 if (prm == NULL) { 548 rte_errno = ENOMEM; 549 return NULL; 550 } 551 552 /* The EPBF instructions in this case are right after the header */ 553 ebpf = (void *)(prm + 1); 554 555 /* 2nd pass: remap cBPF to eBPF instructions */ 556 ret = bpf_convert_filter(prog->bf_insns, prog->bf_len, ebpf, &ebpf_len); 557 if (ret < 0) { 558 RTE_BPF_LOG_LINE(ERR, "%s: cannot convert cBPF to eBPF", __func__); 559 rte_free(prm); 560 rte_errno = -ret; 561 return NULL; 562 } 563 564 prm->ins = ebpf; 565 prm->nb_ins = ebpf_len; 566 567 /* Classic BPF programs use mbufs */ 568 prm->prog_arg.type = RTE_BPF_ARG_PTR_MBUF; 569 prm->prog_arg.size = sizeof(struct rte_mbuf); 570 571 return prm; 572 } 573