xref: /dpdk/lib/bpf/bpf_pkt.c (revision 30a1de105a5f40d77b344a891c4a68f79e815c43)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <string.h>
7 #include <errno.h>
8 #include <stdint.h>
9 
10 #include <sys/queue.h>
11 
12 #include <rte_common.h>
13 #include <rte_malloc.h>
14 #include <rte_log.h>
15 #include <rte_atomic.h>
16 #include <rte_mbuf.h>
17 #include <rte_ethdev.h>
18 
19 #include <rte_bpf_ethdev.h>
20 #include "bpf_impl.h"
21 
22 /*
23  * information about installed BPF rx/tx callback
24  */
25 
26 struct bpf_eth_cbi {
27 	/* used by both data & control path */
28 	uint32_t use;    /*usage counter */
29 	const struct rte_eth_rxtx_callback *cb;  /* callback handle */
30 	struct rte_bpf *bpf;
31 	struct rte_bpf_jit jit;
32 	/* used by control path only */
33 	LIST_ENTRY(bpf_eth_cbi) link;
34 	uint16_t port;
35 	uint16_t queue;
36 } __rte_cache_aligned;
37 
38 /*
39  * Odd number means that callback is used by datapath.
40  * Even number means that callback is not used by datapath.
41  */
42 #define BPF_ETH_CBI_INUSE  1
43 
44 /*
45  * List to manage RX/TX installed callbacks.
46  */
47 LIST_HEAD(bpf_eth_cbi_list, bpf_eth_cbi);
48 
49 enum {
50 	BPF_ETH_RX,
51 	BPF_ETH_TX,
52 	BPF_ETH_NUM,
53 };
54 
55 /*
56  * information about all installed BPF rx/tx callbacks
57  */
58 struct bpf_eth_cbh {
59 	rte_spinlock_t lock;
60 	struct bpf_eth_cbi_list list;
61 	uint32_t type;
62 };
63 
64 static struct bpf_eth_cbh rx_cbh = {
65 	.lock = RTE_SPINLOCK_INITIALIZER,
66 	.list = LIST_HEAD_INITIALIZER(list),
67 	.type = BPF_ETH_RX,
68 };
69 
70 static struct bpf_eth_cbh tx_cbh = {
71 	.lock = RTE_SPINLOCK_INITIALIZER,
72 	.list = LIST_HEAD_INITIALIZER(list),
73 	.type = BPF_ETH_TX,
74 };
75 
76 /*
77  * Marks given callback as used by datapath.
78  */
79 static __rte_always_inline void
80 bpf_eth_cbi_inuse(struct bpf_eth_cbi *cbi)
81 {
82 	cbi->use++;
83 	/* make sure no store/load reordering could happen */
84 	rte_smp_mb();
85 }
86 
87 /*
88  * Marks given callback list as not used by datapath.
89  */
90 static __rte_always_inline void
91 bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
92 {
93 	/* make sure all previous loads are completed */
94 	rte_smp_rmb();
95 	cbi->use++;
96 }
97 
98 /*
99  * Waits till datapath finished using given callback.
100  */
101 static void
102 bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
103 {
104 	uint32_t puse;
105 
106 	/* make sure all previous loads and stores are completed */
107 	rte_smp_mb();
108 
109 	puse = cbi->use;
110 
111 	/* in use, busy wait till current RX/TX iteration is finished */
112 	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
113 		RTE_WAIT_UNTIL_MASKED((uint32_t *)(uintptr_t)&cbi->use,
114 			UINT32_MAX, !=, puse, __ATOMIC_RELAXED);
115 	}
116 }
117 
118 static void
119 bpf_eth_cbi_cleanup(struct bpf_eth_cbi *bc)
120 {
121 	bc->bpf = NULL;
122 	memset(&bc->jit, 0, sizeof(bc->jit));
123 }
124 
125 static struct bpf_eth_cbi *
126 bpf_eth_cbh_find(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
127 {
128 	struct bpf_eth_cbi *cbi;
129 
130 	LIST_FOREACH(cbi, &cbh->list, link) {
131 		if (cbi->port == port && cbi->queue == queue)
132 			break;
133 	}
134 	return cbi;
135 }
136 
137 static struct bpf_eth_cbi *
138 bpf_eth_cbh_add(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
139 {
140 	struct bpf_eth_cbi *cbi;
141 
142 	/* return an existing one */
143 	cbi = bpf_eth_cbh_find(cbh, port, queue);
144 	if (cbi != NULL)
145 		return cbi;
146 
147 	cbi = rte_zmalloc(NULL, sizeof(*cbi), RTE_CACHE_LINE_SIZE);
148 	if (cbi != NULL) {
149 		cbi->port = port;
150 		cbi->queue = queue;
151 		LIST_INSERT_HEAD(&cbh->list, cbi, link);
152 	}
153 	return cbi;
154 }
155 
156 /*
157  * BPF packet processing routines.
158  */
159 
160 static inline uint32_t
161 apply_filter(struct rte_mbuf *mb[], const uint64_t rc[], uint32_t num,
162 	uint32_t drop)
163 {
164 	uint32_t i, j, k;
165 	struct rte_mbuf *dr[num];
166 
167 	for (i = 0, j = 0, k = 0; i != num; i++) {
168 
169 		/* filter matches */
170 		if (rc[i] != 0)
171 			mb[j++] = mb[i];
172 		/* no match */
173 		else
174 			dr[k++] = mb[i];
175 	}
176 
177 	if (drop != 0) {
178 		/* free filtered out mbufs */
179 		for (i = 0; i != k; i++)
180 			rte_pktmbuf_free(dr[i]);
181 	} else {
182 		/* copy filtered out mbufs beyond good ones */
183 		for (i = 0; i != k; i++)
184 			mb[j + i] = dr[i];
185 	}
186 
187 	return j;
188 }
189 
190 static inline uint32_t
191 pkt_filter_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num,
192 	uint32_t drop)
193 {
194 	uint32_t i;
195 	void *dp[num];
196 	uint64_t rc[num];
197 
198 	for (i = 0; i != num; i++)
199 		dp[i] = rte_pktmbuf_mtod(mb[i], void *);
200 
201 	rte_bpf_exec_burst(bpf, dp, rc, num);
202 	return apply_filter(mb, rc, num, drop);
203 }
204 
205 static inline uint32_t
206 pkt_filter_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[],
207 	uint32_t num, uint32_t drop)
208 {
209 	uint32_t i, n;
210 	void *dp;
211 	uint64_t rc[num];
212 
213 	n = 0;
214 	for (i = 0; i != num; i++) {
215 		dp = rte_pktmbuf_mtod(mb[i], void *);
216 		rc[i] = jit->func(dp);
217 		n += (rc[i] == 0);
218 	}
219 
220 	if (n != 0)
221 		num = apply_filter(mb, rc, num, drop);
222 
223 	return num;
224 }
225 
226 static inline uint32_t
227 pkt_filter_mb_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num,
228 	uint32_t drop)
229 {
230 	uint64_t rc[num];
231 
232 	rte_bpf_exec_burst(bpf, (void **)mb, rc, num);
233 	return apply_filter(mb, rc, num, drop);
234 }
235 
236 static inline uint32_t
237 pkt_filter_mb_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[],
238 	uint32_t num, uint32_t drop)
239 {
240 	uint32_t i, n;
241 	uint64_t rc[num];
242 
243 	n = 0;
244 	for (i = 0; i != num; i++) {
245 		rc[i] = jit->func(mb[i]);
246 		n += (rc[i] == 0);
247 	}
248 
249 	if (n != 0)
250 		num = apply_filter(mb, rc, num, drop);
251 
252 	return num;
253 }
254 
255 /*
256  * RX/TX callbacks for raw data bpf.
257  */
258 
259 static uint16_t
260 bpf_rx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
261 	struct rte_mbuf *pkt[], uint16_t nb_pkts,
262 	__rte_unused uint16_t max_pkts, void *user_param)
263 {
264 	struct bpf_eth_cbi *cbi;
265 	uint16_t rc;
266 
267 	cbi = user_param;
268 
269 	bpf_eth_cbi_inuse(cbi);
270 	rc = (cbi->cb != NULL) ?
271 		pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 1) :
272 		nb_pkts;
273 	bpf_eth_cbi_unuse(cbi);
274 	return rc;
275 }
276 
277 static uint16_t
278 bpf_rx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
279 	struct rte_mbuf *pkt[], uint16_t nb_pkts,
280 	__rte_unused uint16_t max_pkts, void *user_param)
281 {
282 	struct bpf_eth_cbi *cbi;
283 	uint16_t rc;
284 
285 	cbi = user_param;
286 	bpf_eth_cbi_inuse(cbi);
287 	rc = (cbi->cb != NULL) ?
288 		pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 1) :
289 		nb_pkts;
290 	bpf_eth_cbi_unuse(cbi);
291 	return rc;
292 }
293 
294 static uint16_t
295 bpf_tx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
296 	struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
297 {
298 	struct bpf_eth_cbi *cbi;
299 	uint16_t rc;
300 
301 	cbi = user_param;
302 	bpf_eth_cbi_inuse(cbi);
303 	rc = (cbi->cb != NULL) ?
304 		pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 0) :
305 		nb_pkts;
306 	bpf_eth_cbi_unuse(cbi);
307 	return rc;
308 }
309 
310 static uint16_t
311 bpf_tx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
312 	struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
313 {
314 	struct bpf_eth_cbi *cbi;
315 	uint16_t rc;
316 
317 	cbi = user_param;
318 	bpf_eth_cbi_inuse(cbi);
319 	rc = (cbi->cb != NULL) ?
320 		pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 0) :
321 		nb_pkts;
322 	bpf_eth_cbi_unuse(cbi);
323 	return rc;
324 }
325 
326 /*
327  * RX/TX callbacks for mbuf.
328  */
329 
330 static uint16_t
331 bpf_rx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
332 	struct rte_mbuf *pkt[], uint16_t nb_pkts,
333 	__rte_unused uint16_t max_pkts, void *user_param)
334 {
335 	struct bpf_eth_cbi *cbi;
336 	uint16_t rc;
337 
338 	cbi = user_param;
339 	bpf_eth_cbi_inuse(cbi);
340 	rc = (cbi->cb != NULL) ?
341 		pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 1) :
342 		nb_pkts;
343 	bpf_eth_cbi_unuse(cbi);
344 	return rc;
345 }
346 
347 static uint16_t
348 bpf_rx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
349 	struct rte_mbuf *pkt[], uint16_t nb_pkts,
350 	__rte_unused uint16_t max_pkts, void *user_param)
351 {
352 	struct bpf_eth_cbi *cbi;
353 	uint16_t rc;
354 
355 	cbi = user_param;
356 	bpf_eth_cbi_inuse(cbi);
357 	rc = (cbi->cb != NULL) ?
358 		pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 1) :
359 		nb_pkts;
360 	bpf_eth_cbi_unuse(cbi);
361 	return rc;
362 }
363 
364 static uint16_t
365 bpf_tx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
366 	struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
367 {
368 	struct bpf_eth_cbi *cbi;
369 	uint16_t rc;
370 
371 	cbi = user_param;
372 	bpf_eth_cbi_inuse(cbi);
373 	rc = (cbi->cb != NULL) ?
374 		pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 0) :
375 		nb_pkts;
376 	bpf_eth_cbi_unuse(cbi);
377 	return rc;
378 }
379 
380 static uint16_t
381 bpf_tx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
382 	struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
383 {
384 	struct bpf_eth_cbi *cbi;
385 	uint16_t rc;
386 
387 	cbi = user_param;
388 	bpf_eth_cbi_inuse(cbi);
389 	rc = (cbi->cb != NULL) ?
390 		pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 0) :
391 		nb_pkts;
392 	bpf_eth_cbi_unuse(cbi);
393 	return rc;
394 }
395 
396 static rte_rx_callback_fn
397 select_rx_callback(enum rte_bpf_arg_type type, uint32_t flags)
398 {
399 	if (flags & RTE_BPF_ETH_F_JIT) {
400 		if (type == RTE_BPF_ARG_PTR)
401 			return bpf_rx_callback_jit;
402 		else if (type == RTE_BPF_ARG_PTR_MBUF)
403 			return bpf_rx_callback_mb_jit;
404 	} else if (type == RTE_BPF_ARG_PTR)
405 		return bpf_rx_callback_vm;
406 	else if (type == RTE_BPF_ARG_PTR_MBUF)
407 		return bpf_rx_callback_mb_vm;
408 
409 	return NULL;
410 }
411 
412 static rte_tx_callback_fn
413 select_tx_callback(enum rte_bpf_arg_type type, uint32_t flags)
414 {
415 	if (flags & RTE_BPF_ETH_F_JIT) {
416 		if (type == RTE_BPF_ARG_PTR)
417 			return bpf_tx_callback_jit;
418 		else if (type == RTE_BPF_ARG_PTR_MBUF)
419 			return bpf_tx_callback_mb_jit;
420 	} else if (type == RTE_BPF_ARG_PTR)
421 		return bpf_tx_callback_vm;
422 	else if (type == RTE_BPF_ARG_PTR_MBUF)
423 		return bpf_tx_callback_mb_vm;
424 
425 	return NULL;
426 }
427 
428 /*
429  * helper function to perform BPF unload for given port/queue.
430  * have to introduce extra complexity (and possible slowdown) here,
431  * as right now there is no safe generic way to remove RX/TX callback
432  * while IO is active.
433  * Still don't free memory allocated for callback handle itself,
434  * again right now there is no safe way to do that without stopping RX/TX
435  * on given port/queue first.
436  */
437 static void
438 bpf_eth_cbi_unload(struct bpf_eth_cbi *bc)
439 {
440 	/* mark this cbi as empty */
441 	bc->cb = NULL;
442 	rte_smp_mb();
443 
444 	/* make sure datapath doesn't use bpf anymore, then destroy bpf */
445 	bpf_eth_cbi_wait(bc);
446 	rte_bpf_destroy(bc->bpf);
447 	bpf_eth_cbi_cleanup(bc);
448 }
449 
450 static void
451 bpf_eth_unload(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
452 {
453 	struct bpf_eth_cbi *bc;
454 
455 	bc = bpf_eth_cbh_find(cbh, port, queue);
456 	if (bc == NULL || bc->cb == NULL)
457 		return;
458 
459 	if (cbh->type == BPF_ETH_RX)
460 		rte_eth_remove_rx_callback(port, queue, bc->cb);
461 	else
462 		rte_eth_remove_tx_callback(port, queue, bc->cb);
463 
464 	bpf_eth_cbi_unload(bc);
465 }
466 
467 
468 void
469 rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue)
470 {
471 	struct bpf_eth_cbh *cbh;
472 
473 	cbh = &rx_cbh;
474 	rte_spinlock_lock(&cbh->lock);
475 	bpf_eth_unload(cbh, port, queue);
476 	rte_spinlock_unlock(&cbh->lock);
477 }
478 
479 void
480 rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue)
481 {
482 	struct bpf_eth_cbh *cbh;
483 
484 	cbh = &tx_cbh;
485 	rte_spinlock_lock(&cbh->lock);
486 	bpf_eth_unload(cbh, port, queue);
487 	rte_spinlock_unlock(&cbh->lock);
488 }
489 
490 static int
491 bpf_eth_elf_load(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue,
492 	const struct rte_bpf_prm *prm, const char *fname, const char *sname,
493 	uint32_t flags)
494 {
495 	int32_t rc;
496 	struct bpf_eth_cbi *bc;
497 	struct rte_bpf *bpf;
498 	rte_rx_callback_fn frx;
499 	rte_tx_callback_fn ftx;
500 	struct rte_bpf_jit jit;
501 
502 	frx = NULL;
503 	ftx = NULL;
504 
505 	if (prm == NULL || rte_eth_dev_is_valid_port(port) == 0 ||
506 			queue >= RTE_MAX_QUEUES_PER_PORT)
507 		return -EINVAL;
508 
509 	if (cbh->type == BPF_ETH_RX)
510 		frx = select_rx_callback(prm->prog_arg.type, flags);
511 	else
512 		ftx = select_tx_callback(prm->prog_arg.type, flags);
513 
514 	if (frx == NULL && ftx == NULL) {
515 		RTE_BPF_LOG(ERR, "%s(%u, %u): no callback selected;\n",
516 			__func__, port, queue);
517 		return -EINVAL;
518 	}
519 
520 	bpf = rte_bpf_elf_load(prm, fname, sname);
521 	if (bpf == NULL)
522 		return -rte_errno;
523 
524 	rte_bpf_get_jit(bpf, &jit);
525 
526 	if ((flags & RTE_BPF_ETH_F_JIT) != 0 && jit.func == NULL) {
527 		RTE_BPF_LOG(ERR, "%s(%u, %u): no JIT generated;\n",
528 			__func__, port, queue);
529 		rte_bpf_destroy(bpf);
530 		return -ENOTSUP;
531 	}
532 
533 	/* setup/update global callback info */
534 	bc = bpf_eth_cbh_add(cbh, port, queue);
535 	if (bc == NULL)
536 		return -ENOMEM;
537 
538 	/* remove old one, if any */
539 	if (bc->cb != NULL)
540 		bpf_eth_unload(cbh, port, queue);
541 
542 	bc->bpf = bpf;
543 	bc->jit = jit;
544 
545 	if (cbh->type == BPF_ETH_RX)
546 		bc->cb = rte_eth_add_rx_callback(port, queue, frx, bc);
547 	else
548 		bc->cb = rte_eth_add_tx_callback(port, queue, ftx, bc);
549 
550 	if (bc->cb == NULL) {
551 		rc = -rte_errno;
552 		rte_bpf_destroy(bpf);
553 		bpf_eth_cbi_cleanup(bc);
554 	} else
555 		rc = 0;
556 
557 	return rc;
558 }
559 
560 int
561 rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue,
562 	const struct rte_bpf_prm *prm, const char *fname, const char *sname,
563 	uint32_t flags)
564 {
565 	int32_t rc;
566 	struct bpf_eth_cbh *cbh;
567 
568 	cbh = &rx_cbh;
569 	rte_spinlock_lock(&cbh->lock);
570 	rc = bpf_eth_elf_load(cbh, port, queue, prm, fname, sname, flags);
571 	rte_spinlock_unlock(&cbh->lock);
572 
573 	return rc;
574 }
575 
576 int
577 rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue,
578 	const struct rte_bpf_prm *prm, const char *fname, const char *sname,
579 	uint32_t flags)
580 {
581 	int32_t rc;
582 	struct bpf_eth_cbh *cbh;
583 
584 	cbh = &tx_cbh;
585 	rte_spinlock_lock(&cbh->lock);
586 	rc = bpf_eth_elf_load(cbh, port, queue, prm, fname, sname, flags);
587 	rte_spinlock_unlock(&cbh->lock);
588 
589 	return rc;
590 }
591