1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2018 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <string.h> 7 #include <errno.h> 8 #include <stdint.h> 9 10 #include <sys/queue.h> 11 12 #include <rte_common.h> 13 #include <rte_malloc.h> 14 #include <rte_log.h> 15 #include <rte_atomic.h> 16 #include <rte_mbuf.h> 17 #include <rte_ethdev.h> 18 19 #include <rte_bpf_ethdev.h> 20 #include "bpf_impl.h" 21 22 /* 23 * information about installed BPF rx/tx callback 24 */ 25 26 struct bpf_eth_cbi { 27 /* used by both data & control path */ 28 uint32_t use; /*usage counter */ 29 const struct rte_eth_rxtx_callback *cb; /* callback handle */ 30 struct rte_bpf *bpf; 31 struct rte_bpf_jit jit; 32 /* used by control path only */ 33 LIST_ENTRY(bpf_eth_cbi) link; 34 uint16_t port; 35 uint16_t queue; 36 } __rte_cache_aligned; 37 38 /* 39 * Odd number means that callback is used by datapath. 40 * Even number means that callback is not used by datapath. 41 */ 42 #define BPF_ETH_CBI_INUSE 1 43 44 /* 45 * List to manage RX/TX installed callbacks. 46 */ 47 LIST_HEAD(bpf_eth_cbi_list, bpf_eth_cbi); 48 49 enum { 50 BPF_ETH_RX, 51 BPF_ETH_TX, 52 BPF_ETH_NUM, 53 }; 54 55 /* 56 * information about all installed BPF rx/tx callbacks 57 */ 58 struct bpf_eth_cbh { 59 rte_spinlock_t lock; 60 struct bpf_eth_cbi_list list; 61 uint32_t type; 62 }; 63 64 static struct bpf_eth_cbh rx_cbh = { 65 .lock = RTE_SPINLOCK_INITIALIZER, 66 .list = LIST_HEAD_INITIALIZER(list), 67 .type = BPF_ETH_RX, 68 }; 69 70 static struct bpf_eth_cbh tx_cbh = { 71 .lock = RTE_SPINLOCK_INITIALIZER, 72 .list = LIST_HEAD_INITIALIZER(list), 73 .type = BPF_ETH_TX, 74 }; 75 76 /* 77 * Marks given callback as used by datapath. 78 */ 79 static __rte_always_inline void 80 bpf_eth_cbi_inuse(struct bpf_eth_cbi *cbi) 81 { 82 cbi->use++; 83 /* make sure no store/load reordering could happen */ 84 rte_smp_mb(); 85 } 86 87 /* 88 * Marks given callback list as not used by datapath. 89 */ 90 static __rte_always_inline void 91 bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi) 92 { 93 /* make sure all previous loads are completed */ 94 rte_smp_rmb(); 95 cbi->use++; 96 } 97 98 /* 99 * Waits till datapath finished using given callback. 100 */ 101 static void 102 bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi) 103 { 104 uint32_t puse; 105 106 /* make sure all previous loads and stores are completed */ 107 rte_smp_mb(); 108 109 puse = cbi->use; 110 111 /* in use, busy wait till current RX/TX iteration is finished */ 112 if ((puse & BPF_ETH_CBI_INUSE) != 0) { 113 RTE_WAIT_UNTIL_MASKED((uint32_t *)(uintptr_t)&cbi->use, 114 UINT32_MAX, !=, puse, __ATOMIC_RELAXED); 115 } 116 } 117 118 static void 119 bpf_eth_cbi_cleanup(struct bpf_eth_cbi *bc) 120 { 121 bc->bpf = NULL; 122 memset(&bc->jit, 0, sizeof(bc->jit)); 123 } 124 125 static struct bpf_eth_cbi * 126 bpf_eth_cbh_find(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue) 127 { 128 struct bpf_eth_cbi *cbi; 129 130 LIST_FOREACH(cbi, &cbh->list, link) { 131 if (cbi->port == port && cbi->queue == queue) 132 break; 133 } 134 return cbi; 135 } 136 137 static struct bpf_eth_cbi * 138 bpf_eth_cbh_add(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue) 139 { 140 struct bpf_eth_cbi *cbi; 141 142 /* return an existing one */ 143 cbi = bpf_eth_cbh_find(cbh, port, queue); 144 if (cbi != NULL) 145 return cbi; 146 147 cbi = rte_zmalloc(NULL, sizeof(*cbi), RTE_CACHE_LINE_SIZE); 148 if (cbi != NULL) { 149 cbi->port = port; 150 cbi->queue = queue; 151 LIST_INSERT_HEAD(&cbh->list, cbi, link); 152 } 153 return cbi; 154 } 155 156 /* 157 * BPF packet processing routines. 158 */ 159 160 static inline uint32_t 161 apply_filter(struct rte_mbuf *mb[], const uint64_t rc[], uint32_t num, 162 uint32_t drop) 163 { 164 uint32_t i, j, k; 165 struct rte_mbuf *dr[num]; 166 167 for (i = 0, j = 0, k = 0; i != num; i++) { 168 169 /* filter matches */ 170 if (rc[i] != 0) 171 mb[j++] = mb[i]; 172 /* no match */ 173 else 174 dr[k++] = mb[i]; 175 } 176 177 if (drop != 0) { 178 /* free filtered out mbufs */ 179 for (i = 0; i != k; i++) 180 rte_pktmbuf_free(dr[i]); 181 } else { 182 /* copy filtered out mbufs beyond good ones */ 183 for (i = 0; i != k; i++) 184 mb[j + i] = dr[i]; 185 } 186 187 return j; 188 } 189 190 static inline uint32_t 191 pkt_filter_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num, 192 uint32_t drop) 193 { 194 uint32_t i; 195 void *dp[num]; 196 uint64_t rc[num]; 197 198 for (i = 0; i != num; i++) 199 dp[i] = rte_pktmbuf_mtod(mb[i], void *); 200 201 rte_bpf_exec_burst(bpf, dp, rc, num); 202 return apply_filter(mb, rc, num, drop); 203 } 204 205 static inline uint32_t 206 pkt_filter_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[], 207 uint32_t num, uint32_t drop) 208 { 209 uint32_t i, n; 210 void *dp; 211 uint64_t rc[num]; 212 213 n = 0; 214 for (i = 0; i != num; i++) { 215 dp = rte_pktmbuf_mtod(mb[i], void *); 216 rc[i] = jit->func(dp); 217 n += (rc[i] == 0); 218 } 219 220 if (n != 0) 221 num = apply_filter(mb, rc, num, drop); 222 223 return num; 224 } 225 226 static inline uint32_t 227 pkt_filter_mb_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num, 228 uint32_t drop) 229 { 230 uint64_t rc[num]; 231 232 rte_bpf_exec_burst(bpf, (void **)mb, rc, num); 233 return apply_filter(mb, rc, num, drop); 234 } 235 236 static inline uint32_t 237 pkt_filter_mb_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[], 238 uint32_t num, uint32_t drop) 239 { 240 uint32_t i, n; 241 uint64_t rc[num]; 242 243 n = 0; 244 for (i = 0; i != num; i++) { 245 rc[i] = jit->func(mb[i]); 246 n += (rc[i] == 0); 247 } 248 249 if (n != 0) 250 num = apply_filter(mb, rc, num, drop); 251 252 return num; 253 } 254 255 /* 256 * RX/TX callbacks for raw data bpf. 257 */ 258 259 static uint16_t 260 bpf_rx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue, 261 struct rte_mbuf *pkt[], uint16_t nb_pkts, 262 __rte_unused uint16_t max_pkts, void *user_param) 263 { 264 struct bpf_eth_cbi *cbi; 265 uint16_t rc; 266 267 cbi = user_param; 268 269 bpf_eth_cbi_inuse(cbi); 270 rc = (cbi->cb != NULL) ? 271 pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 1) : 272 nb_pkts; 273 bpf_eth_cbi_unuse(cbi); 274 return rc; 275 } 276 277 static uint16_t 278 bpf_rx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue, 279 struct rte_mbuf *pkt[], uint16_t nb_pkts, 280 __rte_unused uint16_t max_pkts, void *user_param) 281 { 282 struct bpf_eth_cbi *cbi; 283 uint16_t rc; 284 285 cbi = user_param; 286 bpf_eth_cbi_inuse(cbi); 287 rc = (cbi->cb != NULL) ? 288 pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 1) : 289 nb_pkts; 290 bpf_eth_cbi_unuse(cbi); 291 return rc; 292 } 293 294 static uint16_t 295 bpf_tx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue, 296 struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param) 297 { 298 struct bpf_eth_cbi *cbi; 299 uint16_t rc; 300 301 cbi = user_param; 302 bpf_eth_cbi_inuse(cbi); 303 rc = (cbi->cb != NULL) ? 304 pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 0) : 305 nb_pkts; 306 bpf_eth_cbi_unuse(cbi); 307 return rc; 308 } 309 310 static uint16_t 311 bpf_tx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue, 312 struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param) 313 { 314 struct bpf_eth_cbi *cbi; 315 uint16_t rc; 316 317 cbi = user_param; 318 bpf_eth_cbi_inuse(cbi); 319 rc = (cbi->cb != NULL) ? 320 pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 0) : 321 nb_pkts; 322 bpf_eth_cbi_unuse(cbi); 323 return rc; 324 } 325 326 /* 327 * RX/TX callbacks for mbuf. 328 */ 329 330 static uint16_t 331 bpf_rx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue, 332 struct rte_mbuf *pkt[], uint16_t nb_pkts, 333 __rte_unused uint16_t max_pkts, void *user_param) 334 { 335 struct bpf_eth_cbi *cbi; 336 uint16_t rc; 337 338 cbi = user_param; 339 bpf_eth_cbi_inuse(cbi); 340 rc = (cbi->cb != NULL) ? 341 pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 1) : 342 nb_pkts; 343 bpf_eth_cbi_unuse(cbi); 344 return rc; 345 } 346 347 static uint16_t 348 bpf_rx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue, 349 struct rte_mbuf *pkt[], uint16_t nb_pkts, 350 __rte_unused uint16_t max_pkts, void *user_param) 351 { 352 struct bpf_eth_cbi *cbi; 353 uint16_t rc; 354 355 cbi = user_param; 356 bpf_eth_cbi_inuse(cbi); 357 rc = (cbi->cb != NULL) ? 358 pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 1) : 359 nb_pkts; 360 bpf_eth_cbi_unuse(cbi); 361 return rc; 362 } 363 364 static uint16_t 365 bpf_tx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue, 366 struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param) 367 { 368 struct bpf_eth_cbi *cbi; 369 uint16_t rc; 370 371 cbi = user_param; 372 bpf_eth_cbi_inuse(cbi); 373 rc = (cbi->cb != NULL) ? 374 pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 0) : 375 nb_pkts; 376 bpf_eth_cbi_unuse(cbi); 377 return rc; 378 } 379 380 static uint16_t 381 bpf_tx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue, 382 struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param) 383 { 384 struct bpf_eth_cbi *cbi; 385 uint16_t rc; 386 387 cbi = user_param; 388 bpf_eth_cbi_inuse(cbi); 389 rc = (cbi->cb != NULL) ? 390 pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 0) : 391 nb_pkts; 392 bpf_eth_cbi_unuse(cbi); 393 return rc; 394 } 395 396 static rte_rx_callback_fn 397 select_rx_callback(enum rte_bpf_arg_type type, uint32_t flags) 398 { 399 if (flags & RTE_BPF_ETH_F_JIT) { 400 if (type == RTE_BPF_ARG_PTR) 401 return bpf_rx_callback_jit; 402 else if (type == RTE_BPF_ARG_PTR_MBUF) 403 return bpf_rx_callback_mb_jit; 404 } else if (type == RTE_BPF_ARG_PTR) 405 return bpf_rx_callback_vm; 406 else if (type == RTE_BPF_ARG_PTR_MBUF) 407 return bpf_rx_callback_mb_vm; 408 409 return NULL; 410 } 411 412 static rte_tx_callback_fn 413 select_tx_callback(enum rte_bpf_arg_type type, uint32_t flags) 414 { 415 if (flags & RTE_BPF_ETH_F_JIT) { 416 if (type == RTE_BPF_ARG_PTR) 417 return bpf_tx_callback_jit; 418 else if (type == RTE_BPF_ARG_PTR_MBUF) 419 return bpf_tx_callback_mb_jit; 420 } else if (type == RTE_BPF_ARG_PTR) 421 return bpf_tx_callback_vm; 422 else if (type == RTE_BPF_ARG_PTR_MBUF) 423 return bpf_tx_callback_mb_vm; 424 425 return NULL; 426 } 427 428 /* 429 * helper function to perform BPF unload for given port/queue. 430 * have to introduce extra complexity (and possible slowdown) here, 431 * as right now there is no safe generic way to remove RX/TX callback 432 * while IO is active. 433 * Still don't free memory allocated for callback handle itself, 434 * again right now there is no safe way to do that without stopping RX/TX 435 * on given port/queue first. 436 */ 437 static void 438 bpf_eth_cbi_unload(struct bpf_eth_cbi *bc) 439 { 440 /* mark this cbi as empty */ 441 bc->cb = NULL; 442 rte_smp_mb(); 443 444 /* make sure datapath doesn't use bpf anymore, then destroy bpf */ 445 bpf_eth_cbi_wait(bc); 446 rte_bpf_destroy(bc->bpf); 447 bpf_eth_cbi_cleanup(bc); 448 } 449 450 static void 451 bpf_eth_unload(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue) 452 { 453 struct bpf_eth_cbi *bc; 454 455 bc = bpf_eth_cbh_find(cbh, port, queue); 456 if (bc == NULL || bc->cb == NULL) 457 return; 458 459 if (cbh->type == BPF_ETH_RX) 460 rte_eth_remove_rx_callback(port, queue, bc->cb); 461 else 462 rte_eth_remove_tx_callback(port, queue, bc->cb); 463 464 bpf_eth_cbi_unload(bc); 465 } 466 467 468 void 469 rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue) 470 { 471 struct bpf_eth_cbh *cbh; 472 473 cbh = &rx_cbh; 474 rte_spinlock_lock(&cbh->lock); 475 bpf_eth_unload(cbh, port, queue); 476 rte_spinlock_unlock(&cbh->lock); 477 } 478 479 void 480 rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue) 481 { 482 struct bpf_eth_cbh *cbh; 483 484 cbh = &tx_cbh; 485 rte_spinlock_lock(&cbh->lock); 486 bpf_eth_unload(cbh, port, queue); 487 rte_spinlock_unlock(&cbh->lock); 488 } 489 490 static int 491 bpf_eth_elf_load(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue, 492 const struct rte_bpf_prm *prm, const char *fname, const char *sname, 493 uint32_t flags) 494 { 495 int32_t rc; 496 struct bpf_eth_cbi *bc; 497 struct rte_bpf *bpf; 498 rte_rx_callback_fn frx; 499 rte_tx_callback_fn ftx; 500 struct rte_bpf_jit jit; 501 502 frx = NULL; 503 ftx = NULL; 504 505 if (prm == NULL || rte_eth_dev_is_valid_port(port) == 0 || 506 queue >= RTE_MAX_QUEUES_PER_PORT) 507 return -EINVAL; 508 509 if (cbh->type == BPF_ETH_RX) 510 frx = select_rx_callback(prm->prog_arg.type, flags); 511 else 512 ftx = select_tx_callback(prm->prog_arg.type, flags); 513 514 if (frx == NULL && ftx == NULL) { 515 RTE_BPF_LOG(ERR, "%s(%u, %u): no callback selected;\n", 516 __func__, port, queue); 517 return -EINVAL; 518 } 519 520 bpf = rte_bpf_elf_load(prm, fname, sname); 521 if (bpf == NULL) 522 return -rte_errno; 523 524 rte_bpf_get_jit(bpf, &jit); 525 526 if ((flags & RTE_BPF_ETH_F_JIT) != 0 && jit.func == NULL) { 527 RTE_BPF_LOG(ERR, "%s(%u, %u): no JIT generated;\n", 528 __func__, port, queue); 529 rte_bpf_destroy(bpf); 530 return -ENOTSUP; 531 } 532 533 /* setup/update global callback info */ 534 bc = bpf_eth_cbh_add(cbh, port, queue); 535 if (bc == NULL) 536 return -ENOMEM; 537 538 /* remove old one, if any */ 539 if (bc->cb != NULL) 540 bpf_eth_unload(cbh, port, queue); 541 542 bc->bpf = bpf; 543 bc->jit = jit; 544 545 if (cbh->type == BPF_ETH_RX) 546 bc->cb = rte_eth_add_rx_callback(port, queue, frx, bc); 547 else 548 bc->cb = rte_eth_add_tx_callback(port, queue, ftx, bc); 549 550 if (bc->cb == NULL) { 551 rc = -rte_errno; 552 rte_bpf_destroy(bpf); 553 bpf_eth_cbi_cleanup(bc); 554 } else 555 rc = 0; 556 557 return rc; 558 } 559 560 int 561 rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue, 562 const struct rte_bpf_prm *prm, const char *fname, const char *sname, 563 uint32_t flags) 564 { 565 int32_t rc; 566 struct bpf_eth_cbh *cbh; 567 568 cbh = &rx_cbh; 569 rte_spinlock_lock(&cbh->lock); 570 rc = bpf_eth_elf_load(cbh, port, queue, prm, fname, sname, flags); 571 rte_spinlock_unlock(&cbh->lock); 572 573 return rc; 574 } 575 576 int 577 rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue, 578 const struct rte_bpf_prm *prm, const char *fname, const char *sname, 579 uint32_t flags) 580 { 581 int32_t rc; 582 struct bpf_eth_cbh *cbh; 583 584 cbh = &tx_cbh; 585 rte_spinlock_lock(&cbh->lock); 586 rc = bpf_eth_elf_load(cbh, port, queue, prm, fname, sname, flags); 587 rte_spinlock_unlock(&cbh->lock); 588 589 return rc; 590 } 591