xref: /dpdk/lib/bpf/bpf_pkt.c (revision 69f9d8aa357d2299e057b7e335f340e20a0c5e7e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Intel Corporation
3  */
4 
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <errno.h>
9 #include <stdint.h>
10 #include <unistd.h>
11 #include <inttypes.h>
12 
13 #include <sys/types.h>
14 #include <sys/stat.h>
15 #include <fcntl.h>
16 #include <sys/queue.h>
17 
18 #include <rte_common.h>
19 #include <rte_byteorder.h>
20 #include <rte_malloc.h>
21 #include <rte_log.h>
22 #include <rte_debug.h>
23 #include <rte_cycles.h>
24 #include <rte_eal.h>
25 #include <rte_per_lcore.h>
26 #include <rte_lcore.h>
27 #include <rte_atomic.h>
28 #include <rte_mbuf.h>
29 #include <rte_ethdev.h>
30 
31 #include <rte_bpf_ethdev.h>
32 #include "bpf_impl.h"
33 
34 /*
35  * information about installed BPF rx/tx callback
36  */
37 
38 struct bpf_eth_cbi {
39 	/* used by both data & control path */
40 	uint32_t use;    /*usage counter */
41 	const struct rte_eth_rxtx_callback *cb;  /* callback handle */
42 	struct rte_bpf *bpf;
43 	struct rte_bpf_jit jit;
44 	/* used by control path only */
45 	LIST_ENTRY(bpf_eth_cbi) link;
46 	uint16_t port;
47 	uint16_t queue;
48 } __rte_cache_aligned;
49 
50 /*
51  * Odd number means that callback is used by datapath.
52  * Even number means that callback is not used by datapath.
53  */
54 #define BPF_ETH_CBI_INUSE  1
55 
56 /*
57  * List to manage RX/TX installed callbacks.
58  */
59 LIST_HEAD(bpf_eth_cbi_list, bpf_eth_cbi);
60 
61 enum {
62 	BPF_ETH_RX,
63 	BPF_ETH_TX,
64 	BPF_ETH_NUM,
65 };
66 
67 /*
68  * information about all installed BPF rx/tx callbacks
69  */
70 struct bpf_eth_cbh {
71 	rte_spinlock_t lock;
72 	struct bpf_eth_cbi_list list;
73 	uint32_t type;
74 };
75 
76 static struct bpf_eth_cbh rx_cbh = {
77 	.lock = RTE_SPINLOCK_INITIALIZER,
78 	.list = LIST_HEAD_INITIALIZER(list),
79 	.type = BPF_ETH_RX,
80 };
81 
82 static struct bpf_eth_cbh tx_cbh = {
83 	.lock = RTE_SPINLOCK_INITIALIZER,
84 	.list = LIST_HEAD_INITIALIZER(list),
85 	.type = BPF_ETH_TX,
86 };
87 
88 /*
89  * Marks given callback as used by datapath.
90  */
91 static __rte_always_inline void
92 bpf_eth_cbi_inuse(struct bpf_eth_cbi *cbi)
93 {
94 	cbi->use++;
95 	/* make sure no store/load reordering could happen */
96 	rte_smp_mb();
97 }
98 
99 /*
100  * Marks given callback list as not used by datapath.
101  */
102 static __rte_always_inline void
103 bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
104 {
105 	/* make sure all previous loads are completed */
106 	rte_smp_rmb();
107 	cbi->use++;
108 }
109 
110 /*
111  * Waits till datapath finished using given callback.
112  */
113 static void
114 bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
115 {
116 	uint32_t puse;
117 
118 	/* make sure all previous loads and stores are completed */
119 	rte_smp_mb();
120 
121 	puse = cbi->use;
122 
123 	/* in use, busy wait till current RX/TX iteration is finished */
124 	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
125 		RTE_WAIT_UNTIL_MASKED((uint32_t *)(uintptr_t)&cbi->use,
126 			UINT32_MAX, !=, puse, __ATOMIC_RELAXED);
127 	}
128 }
129 
130 static void
131 bpf_eth_cbi_cleanup(struct bpf_eth_cbi *bc)
132 {
133 	bc->bpf = NULL;
134 	memset(&bc->jit, 0, sizeof(bc->jit));
135 }
136 
137 static struct bpf_eth_cbi *
138 bpf_eth_cbh_find(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
139 {
140 	struct bpf_eth_cbi *cbi;
141 
142 	LIST_FOREACH(cbi, &cbh->list, link) {
143 		if (cbi->port == port && cbi->queue == queue)
144 			break;
145 	}
146 	return cbi;
147 }
148 
149 static struct bpf_eth_cbi *
150 bpf_eth_cbh_add(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
151 {
152 	struct bpf_eth_cbi *cbi;
153 
154 	/* return an existing one */
155 	cbi = bpf_eth_cbh_find(cbh, port, queue);
156 	if (cbi != NULL)
157 		return cbi;
158 
159 	cbi = rte_zmalloc(NULL, sizeof(*cbi), RTE_CACHE_LINE_SIZE);
160 	if (cbi != NULL) {
161 		cbi->port = port;
162 		cbi->queue = queue;
163 		LIST_INSERT_HEAD(&cbh->list, cbi, link);
164 	}
165 	return cbi;
166 }
167 
168 /*
169  * BPF packet processing routinies.
170  */
171 
172 static inline uint32_t
173 apply_filter(struct rte_mbuf *mb[], const uint64_t rc[], uint32_t num,
174 	uint32_t drop)
175 {
176 	uint32_t i, j, k;
177 	struct rte_mbuf *dr[num];
178 
179 	for (i = 0, j = 0, k = 0; i != num; i++) {
180 
181 		/* filter matches */
182 		if (rc[i] != 0)
183 			mb[j++] = mb[i];
184 		/* no match */
185 		else
186 			dr[k++] = mb[i];
187 	}
188 
189 	if (drop != 0) {
190 		/* free filtered out mbufs */
191 		for (i = 0; i != k; i++)
192 			rte_pktmbuf_free(dr[i]);
193 	} else {
194 		/* copy filtered out mbufs beyond good ones */
195 		for (i = 0; i != k; i++)
196 			mb[j + i] = dr[i];
197 	}
198 
199 	return j;
200 }
201 
202 static inline uint32_t
203 pkt_filter_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num,
204 	uint32_t drop)
205 {
206 	uint32_t i;
207 	void *dp[num];
208 	uint64_t rc[num];
209 
210 	for (i = 0; i != num; i++)
211 		dp[i] = rte_pktmbuf_mtod(mb[i], void *);
212 
213 	rte_bpf_exec_burst(bpf, dp, rc, num);
214 	return apply_filter(mb, rc, num, drop);
215 }
216 
217 static inline uint32_t
218 pkt_filter_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[],
219 	uint32_t num, uint32_t drop)
220 {
221 	uint32_t i, n;
222 	void *dp;
223 	uint64_t rc[num];
224 
225 	n = 0;
226 	for (i = 0; i != num; i++) {
227 		dp = rte_pktmbuf_mtod(mb[i], void *);
228 		rc[i] = jit->func(dp);
229 		n += (rc[i] == 0);
230 	}
231 
232 	if (n != 0)
233 		num = apply_filter(mb, rc, num, drop);
234 
235 	return num;
236 }
237 
238 static inline uint32_t
239 pkt_filter_mb_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num,
240 	uint32_t drop)
241 {
242 	uint64_t rc[num];
243 
244 	rte_bpf_exec_burst(bpf, (void **)mb, rc, num);
245 	return apply_filter(mb, rc, num, drop);
246 }
247 
248 static inline uint32_t
249 pkt_filter_mb_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[],
250 	uint32_t num, uint32_t drop)
251 {
252 	uint32_t i, n;
253 	uint64_t rc[num];
254 
255 	n = 0;
256 	for (i = 0; i != num; i++) {
257 		rc[i] = jit->func(mb[i]);
258 		n += (rc[i] == 0);
259 	}
260 
261 	if (n != 0)
262 		num = apply_filter(mb, rc, num, drop);
263 
264 	return num;
265 }
266 
267 /*
268  * RX/TX callbacks for raw data bpf.
269  */
270 
271 static uint16_t
272 bpf_rx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
273 	struct rte_mbuf *pkt[], uint16_t nb_pkts,
274 	__rte_unused uint16_t max_pkts, void *user_param)
275 {
276 	struct bpf_eth_cbi *cbi;
277 	uint16_t rc;
278 
279 	cbi = user_param;
280 
281 	bpf_eth_cbi_inuse(cbi);
282 	rc = (cbi->cb != NULL) ?
283 		pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 1) :
284 		nb_pkts;
285 	bpf_eth_cbi_unuse(cbi);
286 	return rc;
287 }
288 
289 static uint16_t
290 bpf_rx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
291 	struct rte_mbuf *pkt[], uint16_t nb_pkts,
292 	__rte_unused uint16_t max_pkts, void *user_param)
293 {
294 	struct bpf_eth_cbi *cbi;
295 	uint16_t rc;
296 
297 	cbi = user_param;
298 	bpf_eth_cbi_inuse(cbi);
299 	rc = (cbi->cb != NULL) ?
300 		pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 1) :
301 		nb_pkts;
302 	bpf_eth_cbi_unuse(cbi);
303 	return rc;
304 }
305 
306 static uint16_t
307 bpf_tx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
308 	struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
309 {
310 	struct bpf_eth_cbi *cbi;
311 	uint16_t rc;
312 
313 	cbi = user_param;
314 	bpf_eth_cbi_inuse(cbi);
315 	rc = (cbi->cb != NULL) ?
316 		pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 0) :
317 		nb_pkts;
318 	bpf_eth_cbi_unuse(cbi);
319 	return rc;
320 }
321 
322 static uint16_t
323 bpf_tx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
324 	struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
325 {
326 	struct bpf_eth_cbi *cbi;
327 	uint16_t rc;
328 
329 	cbi = user_param;
330 	bpf_eth_cbi_inuse(cbi);
331 	rc = (cbi->cb != NULL) ?
332 		pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 0) :
333 		nb_pkts;
334 	bpf_eth_cbi_unuse(cbi);
335 	return rc;
336 }
337 
338 /*
339  * RX/TX callbacks for mbuf.
340  */
341 
342 static uint16_t
343 bpf_rx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
344 	struct rte_mbuf *pkt[], uint16_t nb_pkts,
345 	__rte_unused uint16_t max_pkts, void *user_param)
346 {
347 	struct bpf_eth_cbi *cbi;
348 	uint16_t rc;
349 
350 	cbi = user_param;
351 	bpf_eth_cbi_inuse(cbi);
352 	rc = (cbi->cb != NULL) ?
353 		pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 1) :
354 		nb_pkts;
355 	bpf_eth_cbi_unuse(cbi);
356 	return rc;
357 }
358 
359 static uint16_t
360 bpf_rx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
361 	struct rte_mbuf *pkt[], uint16_t nb_pkts,
362 	__rte_unused uint16_t max_pkts, void *user_param)
363 {
364 	struct bpf_eth_cbi *cbi;
365 	uint16_t rc;
366 
367 	cbi = user_param;
368 	bpf_eth_cbi_inuse(cbi);
369 	rc = (cbi->cb != NULL) ?
370 		pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 1) :
371 		nb_pkts;
372 	bpf_eth_cbi_unuse(cbi);
373 	return rc;
374 }
375 
376 static uint16_t
377 bpf_tx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
378 	struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
379 {
380 	struct bpf_eth_cbi *cbi;
381 	uint16_t rc;
382 
383 	cbi = user_param;
384 	bpf_eth_cbi_inuse(cbi);
385 	rc = (cbi->cb != NULL) ?
386 		pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 0) :
387 		nb_pkts;
388 	bpf_eth_cbi_unuse(cbi);
389 	return rc;
390 }
391 
392 static uint16_t
393 bpf_tx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
394 	struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
395 {
396 	struct bpf_eth_cbi *cbi;
397 	uint16_t rc;
398 
399 	cbi = user_param;
400 	bpf_eth_cbi_inuse(cbi);
401 	rc = (cbi->cb != NULL) ?
402 		pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 0) :
403 		nb_pkts;
404 	bpf_eth_cbi_unuse(cbi);
405 	return rc;
406 }
407 
408 static rte_rx_callback_fn
409 select_rx_callback(enum rte_bpf_arg_type type, uint32_t flags)
410 {
411 	if (flags & RTE_BPF_ETH_F_JIT) {
412 		if (type == RTE_BPF_ARG_PTR)
413 			return bpf_rx_callback_jit;
414 		else if (type == RTE_BPF_ARG_PTR_MBUF)
415 			return bpf_rx_callback_mb_jit;
416 	} else if (type == RTE_BPF_ARG_PTR)
417 		return bpf_rx_callback_vm;
418 	else if (type == RTE_BPF_ARG_PTR_MBUF)
419 		return bpf_rx_callback_mb_vm;
420 
421 	return NULL;
422 }
423 
424 static rte_tx_callback_fn
425 select_tx_callback(enum rte_bpf_arg_type type, uint32_t flags)
426 {
427 	if (flags & RTE_BPF_ETH_F_JIT) {
428 		if (type == RTE_BPF_ARG_PTR)
429 			return bpf_tx_callback_jit;
430 		else if (type == RTE_BPF_ARG_PTR_MBUF)
431 			return bpf_tx_callback_mb_jit;
432 	} else if (type == RTE_BPF_ARG_PTR)
433 		return bpf_tx_callback_vm;
434 	else if (type == RTE_BPF_ARG_PTR_MBUF)
435 		return bpf_tx_callback_mb_vm;
436 
437 	return NULL;
438 }
439 
440 /*
441  * helper function to perform BPF unload for given port/queue.
442  * have to introduce extra complexity (and possible slowdown) here,
443  * as right now there is no safe generic way to remove RX/TX callback
444  * while IO is active.
445  * Still don't free memory allocated for callback handle itself,
446  * again right now there is no safe way to do that without stopping RX/TX
447  * on given port/queue first.
448  */
449 static void
450 bpf_eth_cbi_unload(struct bpf_eth_cbi *bc)
451 {
452 	/* mark this cbi as empty */
453 	bc->cb = NULL;
454 	rte_smp_mb();
455 
456 	/* make sure datapath doesn't use bpf anymore, then destroy bpf */
457 	bpf_eth_cbi_wait(bc);
458 	rte_bpf_destroy(bc->bpf);
459 	bpf_eth_cbi_cleanup(bc);
460 }
461 
462 static void
463 bpf_eth_unload(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
464 {
465 	struct bpf_eth_cbi *bc;
466 
467 	bc = bpf_eth_cbh_find(cbh, port, queue);
468 	if (bc == NULL || bc->cb == NULL)
469 		return;
470 
471 	if (cbh->type == BPF_ETH_RX)
472 		rte_eth_remove_rx_callback(port, queue, bc->cb);
473 	else
474 		rte_eth_remove_tx_callback(port, queue, bc->cb);
475 
476 	bpf_eth_cbi_unload(bc);
477 }
478 
479 
480 void
481 rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue)
482 {
483 	struct bpf_eth_cbh *cbh;
484 
485 	cbh = &rx_cbh;
486 	rte_spinlock_lock(&cbh->lock);
487 	bpf_eth_unload(cbh, port, queue);
488 	rte_spinlock_unlock(&cbh->lock);
489 }
490 
491 void
492 rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue)
493 {
494 	struct bpf_eth_cbh *cbh;
495 
496 	cbh = &tx_cbh;
497 	rte_spinlock_lock(&cbh->lock);
498 	bpf_eth_unload(cbh, port, queue);
499 	rte_spinlock_unlock(&cbh->lock);
500 }
501 
502 static int
503 bpf_eth_elf_load(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue,
504 	const struct rte_bpf_prm *prm, const char *fname, const char *sname,
505 	uint32_t flags)
506 {
507 	int32_t rc;
508 	struct bpf_eth_cbi *bc;
509 	struct rte_bpf *bpf;
510 	rte_rx_callback_fn frx;
511 	rte_tx_callback_fn ftx;
512 	struct rte_bpf_jit jit;
513 
514 	frx = NULL;
515 	ftx = NULL;
516 
517 	if (prm == NULL || rte_eth_dev_is_valid_port(port) == 0 ||
518 			queue >= RTE_MAX_QUEUES_PER_PORT)
519 		return -EINVAL;
520 
521 	if (cbh->type == BPF_ETH_RX)
522 		frx = select_rx_callback(prm->prog_arg.type, flags);
523 	else
524 		ftx = select_tx_callback(prm->prog_arg.type, flags);
525 
526 	if (frx == NULL && ftx == NULL) {
527 		RTE_BPF_LOG(ERR, "%s(%u, %u): no callback selected;\n",
528 			__func__, port, queue);
529 		return -EINVAL;
530 	}
531 
532 	bpf = rte_bpf_elf_load(prm, fname, sname);
533 	if (bpf == NULL)
534 		return -rte_errno;
535 
536 	rte_bpf_get_jit(bpf, &jit);
537 
538 	if ((flags & RTE_BPF_ETH_F_JIT) != 0 && jit.func == NULL) {
539 		RTE_BPF_LOG(ERR, "%s(%u, %u): no JIT generated;\n",
540 			__func__, port, queue);
541 		rte_bpf_destroy(bpf);
542 		return -ENOTSUP;
543 	}
544 
545 	/* setup/update global callback info */
546 	bc = bpf_eth_cbh_add(cbh, port, queue);
547 	if (bc == NULL)
548 		return -ENOMEM;
549 
550 	/* remove old one, if any */
551 	if (bc->cb != NULL)
552 		bpf_eth_unload(cbh, port, queue);
553 
554 	bc->bpf = bpf;
555 	bc->jit = jit;
556 
557 	if (cbh->type == BPF_ETH_RX)
558 		bc->cb = rte_eth_add_rx_callback(port, queue, frx, bc);
559 	else
560 		bc->cb = rte_eth_add_tx_callback(port, queue, ftx, bc);
561 
562 	if (bc->cb == NULL) {
563 		rc = -rte_errno;
564 		rte_bpf_destroy(bpf);
565 		bpf_eth_cbi_cleanup(bc);
566 	} else
567 		rc = 0;
568 
569 	return rc;
570 }
571 
572 int
573 rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue,
574 	const struct rte_bpf_prm *prm, const char *fname, const char *sname,
575 	uint32_t flags)
576 {
577 	int32_t rc;
578 	struct bpf_eth_cbh *cbh;
579 
580 	cbh = &rx_cbh;
581 	rte_spinlock_lock(&cbh->lock);
582 	rc = bpf_eth_elf_load(cbh, port, queue, prm, fname, sname, flags);
583 	rte_spinlock_unlock(&cbh->lock);
584 
585 	return rc;
586 }
587 
588 int
589 rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue,
590 	const struct rte_bpf_prm *prm, const char *fname, const char *sname,
591 	uint32_t flags)
592 {
593 	int32_t rc;
594 	struct bpf_eth_cbh *cbh;
595 
596 	cbh = &tx_cbh;
597 	rte_spinlock_lock(&cbh->lock);
598 	rc = bpf_eth_elf_load(cbh, port, queue, prm, fname, sname, flags);
599 	rte_spinlock_unlock(&cbh->lock);
600 
601 	return rc;
602 }
603