1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2021 Microsoft Corporation 3 * 4 * Based on bpf_convert_filter() in the Linux kernel sources 5 * and filter2xdp. 6 * 7 * Licensed as BSD with permission original authors. 8 * Copyright (C) 2017 Tobias Klauser 9 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com 10 */ 11 12 #include <assert.h> 13 #include <errno.h> 14 #include <stdbool.h> 15 #include <stddef.h> 16 #include <stdint.h> 17 #include <stdlib.h> 18 #include <string.h> 19 20 #include <rte_common.h> 21 #include <rte_bpf.h> 22 #include <rte_log.h> 23 #include <rte_malloc.h> 24 #include <rte_errno.h> 25 26 /* Workaround name conflicts with libpcap */ 27 #define bpf_validate(f, len) bpf_validate_libpcap(f, len) 28 #include <pcap/pcap.h> 29 #include <pcap/bpf.h> 30 #undef bpf_validate 31 32 #include "bpf_impl.h" 33 #include "bpf_def.h" 34 35 #ifndef BPF_MAXINSNS 36 #define BPF_MAXINSNS 4096 37 #endif 38 39 /* 40 * Linux socket filter uses negative absolute offsets to 41 * reference ancillary data. 42 */ 43 #define SKF_AD_OFF (-0x1000) 44 #define SKF_AD_PROTOCOL 0 45 #define SKF_AD_PKTTYPE 4 46 #define SKF_AD_IFINDEX 8 47 #define SKF_AD_NLATTR 12 48 #define SKF_AD_NLATTR_NEST 16 49 #define SKF_AD_MARK 20 50 #define SKF_AD_QUEUE 24 51 #define SKF_AD_HATYPE 28 52 #define SKF_AD_RXHASH 32 53 #define SKF_AD_CPU 36 54 #define SKF_AD_ALU_XOR_X 40 55 #define SKF_AD_VLAN_TAG 44 56 #define SKF_AD_VLAN_TAG_PRESENT 48 57 #define SKF_AD_PAY_OFFSET 52 58 #define SKF_AD_RANDOM 56 59 #define SKF_AD_VLAN_TPID 60 60 #define SKF_AD_MAX 64 61 62 /* ArgX, context and stack frame pointer register positions. Note, 63 * Arg1, Arg2, Arg3, etc are used as argument mappings of function 64 * calls in BPF_CALL instruction. 65 */ 66 #define BPF_REG_ARG1 EBPF_REG_1 67 #define BPF_REG_ARG2 EBPF_REG_2 68 #define BPF_REG_ARG3 EBPF_REG_3 69 #define BPF_REG_ARG4 EBPF_REG_4 70 #define BPF_REG_ARG5 EBPF_REG_5 71 #define BPF_REG_CTX EBPF_REG_6 72 #define BPF_REG_FP EBPF_REG_10 73 74 /* Additional register mappings for converted user programs. */ 75 #define BPF_REG_A EBPF_REG_0 76 #define BPF_REG_X EBPF_REG_7 77 #define BPF_REG_TMP EBPF_REG_8 78 79 /* Helper macros for filter block array initializers. */ 80 81 /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ 82 83 #define EBPF_ALU64_REG(OP, DST, SRC) \ 84 ((struct ebpf_insn) { \ 85 .code = EBPF_ALU64 | BPF_OP(OP) | BPF_X, \ 86 .dst_reg = DST, \ 87 .src_reg = SRC, \ 88 .off = 0, \ 89 .imm = 0 }) 90 91 #define BPF_ALU32_REG(OP, DST, SRC) \ 92 ((struct ebpf_insn) { \ 93 .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ 94 .dst_reg = DST, \ 95 .src_reg = SRC, \ 96 .off = 0, \ 97 .imm = 0 }) 98 99 /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ 100 101 #define BPF_ALU32_IMM(OP, DST, IMM) \ 102 ((struct ebpf_insn) { \ 103 .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ 104 .dst_reg = DST, \ 105 .src_reg = 0, \ 106 .off = 0, \ 107 .imm = IMM }) 108 109 /* Short form of mov, dst_reg = src_reg */ 110 111 #define BPF_MOV64_REG(DST, SRC) \ 112 ((struct ebpf_insn) { \ 113 .code = EBPF_ALU64 | EBPF_MOV | BPF_X, \ 114 .dst_reg = DST, \ 115 .src_reg = SRC, \ 116 .off = 0, \ 117 .imm = 0 }) 118 119 #define BPF_MOV32_REG(DST, SRC) \ 120 ((struct ebpf_insn) { \ 121 .code = BPF_ALU | EBPF_MOV | BPF_X, \ 122 .dst_reg = DST, \ 123 .src_reg = SRC, \ 124 .off = 0, \ 125 .imm = 0 }) 126 127 /* Short form of mov, dst_reg = imm32 */ 128 129 #define BPF_MOV32_IMM(DST, IMM) \ 130 ((struct ebpf_insn) { \ 131 .code = BPF_ALU | EBPF_MOV | BPF_K, \ 132 .dst_reg = DST, \ 133 .src_reg = 0, \ 134 .off = 0, \ 135 .imm = IMM }) 136 137 /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */ 138 139 #define BPF_MOV32_RAW(TYPE, DST, SRC, IMM) \ 140 ((struct ebpf_insn) { \ 141 .code = BPF_ALU | EBPF_MOV | BPF_SRC(TYPE), \ 142 .dst_reg = DST, \ 143 .src_reg = SRC, \ 144 .off = 0, \ 145 .imm = IMM }) 146 147 /* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ 148 149 #define BPF_LD_ABS(SIZE, IMM) \ 150 ((struct ebpf_insn) { \ 151 .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ 152 .dst_reg = 0, \ 153 .src_reg = 0, \ 154 .off = 0, \ 155 .imm = IMM }) 156 157 /* Memory load, dst_reg = *(uint *) (src_reg + off16) */ 158 159 #define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ 160 ((struct ebpf_insn) { \ 161 .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ 162 .dst_reg = DST, \ 163 .src_reg = SRC, \ 164 .off = OFF, \ 165 .imm = 0 }) 166 167 /* Memory store, *(uint *) (dst_reg + off16) = src_reg */ 168 169 #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ 170 ((struct ebpf_insn) { \ 171 .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ 172 .dst_reg = DST, \ 173 .src_reg = SRC, \ 174 .off = OFF, \ 175 .imm = 0 }) 176 177 /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ 178 179 #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ 180 ((struct ebpf_insn) { \ 181 .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ 182 .dst_reg = DST, \ 183 .src_reg = 0, \ 184 .off = OFF, \ 185 .imm = IMM }) 186 187 /* Raw code statement block */ 188 189 #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ 190 ((struct ebpf_insn) { \ 191 .code = CODE, \ 192 .dst_reg = DST, \ 193 .src_reg = SRC, \ 194 .off = OFF, \ 195 .imm = IMM }) 196 197 /* Program exit */ 198 199 #define BPF_EXIT_INSN() \ 200 ((struct ebpf_insn) { \ 201 .code = BPF_JMP | EBPF_EXIT, \ 202 .dst_reg = 0, \ 203 .src_reg = 0, \ 204 .off = 0, \ 205 .imm = 0 }) 206 207 /* 208 * Placeholder to convert BPF extensions like length and VLAN tag 209 * If and when DPDK BPF supports them. 210 */ 211 static bool convert_bpf_load(const struct bpf_insn *fp, 212 struct ebpf_insn **new_insnp __rte_unused) 213 { 214 switch (fp->k) { 215 case SKF_AD_OFF + SKF_AD_PROTOCOL: 216 case SKF_AD_OFF + SKF_AD_PKTTYPE: 217 case SKF_AD_OFF + SKF_AD_IFINDEX: 218 case SKF_AD_OFF + SKF_AD_HATYPE: 219 case SKF_AD_OFF + SKF_AD_MARK: 220 case SKF_AD_OFF + SKF_AD_RXHASH: 221 case SKF_AD_OFF + SKF_AD_QUEUE: 222 case SKF_AD_OFF + SKF_AD_VLAN_TAG: 223 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: 224 case SKF_AD_OFF + SKF_AD_VLAN_TPID: 225 case SKF_AD_OFF + SKF_AD_PAY_OFFSET: 226 case SKF_AD_OFF + SKF_AD_NLATTR: 227 case SKF_AD_OFF + SKF_AD_NLATTR_NEST: 228 case SKF_AD_OFF + SKF_AD_CPU: 229 case SKF_AD_OFF + SKF_AD_RANDOM: 230 case SKF_AD_OFF + SKF_AD_ALU_XOR_X: 231 /* Linux has special negative offsets to access meta-data. */ 232 RTE_BPF_LOG(ERR, 233 "rte_bpf_convert: socket offset %d not supported\n", 234 fp->k - SKF_AD_OFF); 235 return true; 236 default: 237 return false; 238 } 239 } 240 241 static int bpf_convert_filter(const struct bpf_insn *prog, size_t len, 242 struct ebpf_insn *new_prog, uint32_t *new_len) 243 { 244 unsigned int pass = 0; 245 size_t new_flen = 0, target, i; 246 struct ebpf_insn *new_insn; 247 const struct bpf_insn *fp; 248 int *addrs = NULL; 249 uint8_t bpf_src; 250 251 if (len > BPF_MAXINSNS) { 252 RTE_BPF_LOG(ERR, "%s: cBPF program too long (%zu insns)\n", 253 __func__, len); 254 return -EINVAL; 255 } 256 257 /* On second pass, allocate the new program */ 258 if (new_prog) { 259 addrs = calloc(len, sizeof(*addrs)); 260 if (addrs == NULL) 261 return -ENOMEM; 262 } 263 264 do_pass: 265 new_insn = new_prog; 266 fp = prog; 267 268 /* Classic BPF related prologue emission. */ 269 if (new_insn) { 270 /* Classic BPF expects A and X to be reset first. These need 271 * to be guaranteed to be the first two instructions. 272 */ 273 *new_insn++ = EBPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); 274 *new_insn++ = EBPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X); 275 276 /* All programs must keep CTX in callee saved BPF_REG_CTX. 277 * In eBPF case it's done by the compiler, here we need to 278 * do this ourself. Initial CTX is present in BPF_REG_ARG1. 279 */ 280 *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); 281 } else { 282 new_insn += 3; 283 } 284 285 for (i = 0; i < len; fp++, i++) { 286 struct ebpf_insn tmp_insns[6] = { }; 287 struct ebpf_insn *insn = tmp_insns; 288 289 if (addrs) 290 addrs[i] = new_insn - new_prog; 291 292 switch (fp->code) { 293 /* Absolute loads are how classic BPF accesses skb */ 294 case BPF_LD | BPF_ABS | BPF_W: 295 case BPF_LD | BPF_ABS | BPF_H: 296 case BPF_LD | BPF_ABS | BPF_B: 297 if (convert_bpf_load(fp, &insn)) 298 goto err; 299 300 *insn = BPF_RAW_INSN(fp->code, 0, 0, 0, fp->k); 301 break; 302 303 case BPF_ALU | BPF_DIV | BPF_X: 304 case BPF_ALU | BPF_MOD | BPF_X: 305 /* For cBPF, don't cause floating point exception */ 306 *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X); 307 *insn++ = BPF_JMP_IMM(EBPF_JNE, BPF_REG_X, 0, 2); 308 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); 309 *insn++ = BPF_EXIT_INSN(); 310 /* fallthrough */ 311 case BPF_ALU | BPF_ADD | BPF_X: 312 case BPF_ALU | BPF_ADD | BPF_K: 313 case BPF_ALU | BPF_SUB | BPF_X: 314 case BPF_ALU | BPF_SUB | BPF_K: 315 case BPF_ALU | BPF_AND | BPF_X: 316 case BPF_ALU | BPF_AND | BPF_K: 317 case BPF_ALU | BPF_OR | BPF_X: 318 case BPF_ALU | BPF_OR | BPF_K: 319 case BPF_ALU | BPF_LSH | BPF_X: 320 case BPF_ALU | BPF_LSH | BPF_K: 321 case BPF_ALU | BPF_RSH | BPF_X: 322 case BPF_ALU | BPF_RSH | BPF_K: 323 case BPF_ALU | BPF_XOR | BPF_X: 324 case BPF_ALU | BPF_XOR | BPF_K: 325 case BPF_ALU | BPF_MUL | BPF_X: 326 case BPF_ALU | BPF_MUL | BPF_K: 327 case BPF_ALU | BPF_DIV | BPF_K: 328 case BPF_ALU | BPF_MOD | BPF_K: 329 case BPF_ALU | BPF_NEG: 330 case BPF_LD | BPF_IND | BPF_W: 331 case BPF_LD | BPF_IND | BPF_H: 332 case BPF_LD | BPF_IND | BPF_B: 333 /* All arithmetic insns map as-is. */ 334 insn->code = fp->code; 335 insn->dst_reg = BPF_REG_A; 336 bpf_src = BPF_SRC(fp->code); 337 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0; 338 insn->off = 0; 339 insn->imm = fp->k; 340 break; 341 342 /* Jump transformation cannot use BPF block macros 343 * everywhere as offset calculation and target updates 344 * require a bit more work than the rest, i.e. jump 345 * opcodes map as-is, but offsets need adjustment. 346 */ 347 348 #define BPF_EMIT_JMP \ 349 do { \ 350 if (target >= len) \ 351 goto err; \ 352 insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \ 353 /* Adjust pc relative offset for 2nd or 3rd insn. */ \ 354 insn->off -= insn - tmp_insns; \ 355 } while (0) 356 357 case BPF_JMP | BPF_JA: 358 target = i + fp->k + 1; 359 insn->code = fp->code; 360 BPF_EMIT_JMP; 361 break; 362 363 case BPF_JMP | BPF_JEQ | BPF_K: 364 case BPF_JMP | BPF_JEQ | BPF_X: 365 case BPF_JMP | BPF_JSET | BPF_K: 366 case BPF_JMP | BPF_JSET | BPF_X: 367 case BPF_JMP | BPF_JGT | BPF_K: 368 case BPF_JMP | BPF_JGT | BPF_X: 369 case BPF_JMP | BPF_JGE | BPF_K: 370 case BPF_JMP | BPF_JGE | BPF_X: 371 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) { 372 /* BPF immediates are signed, zero extend 373 * immediate into tmp register and use it 374 * in compare insn. 375 */ 376 *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k); 377 378 insn->dst_reg = BPF_REG_A; 379 insn->src_reg = BPF_REG_TMP; 380 bpf_src = BPF_X; 381 } else { 382 insn->dst_reg = BPF_REG_A; 383 insn->imm = fp->k; 384 bpf_src = BPF_SRC(fp->code); 385 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0; 386 } 387 388 /* Common case where 'jump_false' is next insn. */ 389 if (fp->jf == 0) { 390 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; 391 target = i + fp->jt + 1; 392 BPF_EMIT_JMP; 393 break; 394 } 395 396 /* Convert JEQ into JNE when 'jump_true' is next insn. */ 397 if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) { 398 insn->code = BPF_JMP | EBPF_JNE | bpf_src; 399 target = i + fp->jf + 1; 400 BPF_EMIT_JMP; 401 break; 402 } 403 404 /* Other jumps are mapped into two insns: Jxx and JA. */ 405 target = i + fp->jt + 1; 406 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; 407 BPF_EMIT_JMP; 408 insn++; 409 410 insn->code = BPF_JMP | BPF_JA; 411 target = i + fp->jf + 1; 412 BPF_EMIT_JMP; 413 break; 414 415 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ 416 case BPF_LDX | BPF_MSH | BPF_B: 417 /* tmp = A */ 418 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A); 419 /* A = BPF_R0 = *(u8 *) (skb->data + K) */ 420 *insn++ = BPF_LD_ABS(BPF_B, fp->k); 421 /* A &= 0xf */ 422 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); 423 /* A <<= 2 */ 424 *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); 425 /* X = A */ 426 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); 427 /* A = tmp */ 428 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); 429 break; 430 431 /* RET_K is remaped into 2 insns. RET_A case doesn't need an 432 * extra mov as EBPF_REG_0 is already mapped into BPF_REG_A. 433 */ 434 case BPF_RET | BPF_A: 435 case BPF_RET | BPF_K: 436 if (BPF_RVAL(fp->code) == BPF_K) { 437 *insn++ = BPF_MOV32_RAW(BPF_K, EBPF_REG_0, 438 0, fp->k); 439 } 440 *insn = BPF_EXIT_INSN(); 441 break; 442 443 /* Store to stack. */ 444 case BPF_ST: 445 case BPF_STX: 446 *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) == 447 BPF_ST ? BPF_REG_A : BPF_REG_X, 448 -(BPF_MEMWORDS - fp->k) * 4); 449 break; 450 451 /* Load from stack. */ 452 case BPF_LD | BPF_MEM: 453 case BPF_LDX | BPF_MEM: 454 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? 455 BPF_REG_A : BPF_REG_X, BPF_REG_FP, 456 -(BPF_MEMWORDS - fp->k) * 4); 457 break; 458 459 /* A = K or X = K */ 460 case BPF_LD | BPF_IMM: 461 case BPF_LDX | BPF_IMM: 462 *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ? 463 BPF_REG_A : BPF_REG_X, fp->k); 464 break; 465 466 /* X = A */ 467 case BPF_MISC | BPF_TAX: 468 *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); 469 break; 470 471 /* A = X */ 472 case BPF_MISC | BPF_TXA: 473 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X); 474 break; 475 476 /* A = mbuf->len or X = mbuf->len */ 477 case BPF_LD | BPF_W | BPF_LEN: 478 case BPF_LDX | BPF_W | BPF_LEN: 479 /* BPF_ABS/BPF_IND implicitly expect mbuf ptr in R6 */ 480 481 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? 482 BPF_REG_A : BPF_REG_X, BPF_REG_CTX, 483 offsetof(struct rte_mbuf, pkt_len)); 484 break; 485 486 /* Unknown instruction. */ 487 default: 488 RTE_BPF_LOG(ERR, "%s: Unknown instruction!: %#x\n", 489 __func__, fp->code); 490 goto err; 491 } 492 493 insn++; 494 if (new_prog) 495 memcpy(new_insn, tmp_insns, 496 sizeof(*insn) * (insn - tmp_insns)); 497 new_insn += insn - tmp_insns; 498 } 499 500 if (!new_prog) { 501 /* Only calculating new length. */ 502 *new_len = new_insn - new_prog; 503 return 0; 504 } 505 506 pass++; 507 if ((ptrdiff_t)new_flen != new_insn - new_prog) { 508 new_flen = new_insn - new_prog; 509 if (pass > 2) 510 goto err; 511 goto do_pass; 512 } 513 514 free(addrs); 515 assert(*new_len == new_flen); 516 517 return 0; 518 err: 519 free(addrs); 520 return -1; 521 } 522 523 struct rte_bpf_prm * 524 rte_bpf_convert(const struct bpf_program *prog) 525 { 526 struct rte_bpf_prm *prm = NULL; 527 struct ebpf_insn *ebpf = NULL; 528 uint32_t ebpf_len = 0; 529 int ret; 530 531 if (prog == NULL) { 532 RTE_BPF_LOG(ERR, "%s: NULL program\n", __func__); 533 rte_errno = EINVAL; 534 return NULL; 535 } 536 537 /* 1st pass: calculate the eBPF program length */ 538 ret = bpf_convert_filter(prog->bf_insns, prog->bf_len, NULL, &ebpf_len); 539 if (ret < 0) { 540 RTE_BPF_LOG(ERR, "%s: cannot get eBPF length\n", __func__); 541 rte_errno = -ret; 542 return NULL; 543 } 544 545 RTE_BPF_LOG(DEBUG, "%s: prog len cBPF=%u -> eBPF=%u\n", 546 __func__, prog->bf_len, ebpf_len); 547 548 prm = rte_zmalloc("bpf_filter", 549 sizeof(*prm) + ebpf_len * sizeof(*ebpf), 0); 550 if (prm == NULL) { 551 rte_errno = ENOMEM; 552 return NULL; 553 } 554 555 /* The EPBF instructions in this case are right after the header */ 556 ebpf = (void *)(prm + 1); 557 558 /* 2nd pass: remap cBPF to eBPF instructions */ 559 ret = bpf_convert_filter(prog->bf_insns, prog->bf_len, ebpf, &ebpf_len); 560 if (ret < 0) { 561 RTE_BPF_LOG(ERR, "%s: cannot convert cBPF to eBPF\n", __func__); 562 free(prm); 563 rte_errno = -ret; 564 return NULL; 565 } 566 567 prm->ins = ebpf; 568 prm->nb_ins = ebpf_len; 569 570 /* Classic BPF programs use mbufs */ 571 prm->prog_arg.type = RTE_BPF_ARG_PTR_MBUF; 572 prm->prog_arg.size = sizeof(struct rte_mbuf); 573 574 return prm; 575 } 576