xref: /dpdk/lib/bpf/bpf_pkt.c (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Intel Corporation
3  */
4 
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <errno.h>
9 #include <stdint.h>
10 #include <unistd.h>
11 #include <inttypes.h>
12 
13 #include <sys/types.h>
14 #include <sys/stat.h>
15 #include <fcntl.h>
16 #include <sys/queue.h>
17 
18 #include <rte_common.h>
19 #include <rte_byteorder.h>
20 #include <rte_malloc.h>
21 #include <rte_log.h>
22 #include <rte_debug.h>
23 #include <rte_cycles.h>
24 #include <rte_eal.h>
25 #include <rte_per_lcore.h>
26 #include <rte_lcore.h>
27 #include <rte_atomic.h>
28 #include <rte_mbuf.h>
29 #include <rte_ethdev.h>
30 
31 #include <rte_bpf_ethdev.h>
32 #include "bpf_impl.h"
33 
34 /*
35  * information about installed BPF rx/tx callback
36  */
37 
38 struct bpf_eth_cbi {
39 	/* used by both data & control path */
40 	uint32_t use;    /*usage counter */
41 	const struct rte_eth_rxtx_callback *cb;  /* callback handle */
42 	struct rte_bpf *bpf;
43 	struct rte_bpf_jit jit;
44 	/* used by control path only */
45 	LIST_ENTRY(bpf_eth_cbi) link;
46 	uint16_t port;
47 	uint16_t queue;
48 } __rte_cache_aligned;
49 
50 /*
51  * Odd number means that callback is used by datapath.
52  * Even number means that callback is not used by datapath.
53  */
54 #define BPF_ETH_CBI_INUSE  1
55 
56 /*
57  * List to manage RX/TX installed callbacks.
58  */
59 LIST_HEAD(bpf_eth_cbi_list, bpf_eth_cbi);
60 
61 enum {
62 	BPF_ETH_RX,
63 	BPF_ETH_TX,
64 	BPF_ETH_NUM,
65 };
66 
67 /*
68  * information about all installed BPF rx/tx callbacks
69  */
70 struct bpf_eth_cbh {
71 	rte_spinlock_t lock;
72 	struct bpf_eth_cbi_list list;
73 	uint32_t type;
74 };
75 
76 static struct bpf_eth_cbh rx_cbh = {
77 	.lock = RTE_SPINLOCK_INITIALIZER,
78 	.list = LIST_HEAD_INITIALIZER(list),
79 	.type = BPF_ETH_RX,
80 };
81 
82 static struct bpf_eth_cbh tx_cbh = {
83 	.lock = RTE_SPINLOCK_INITIALIZER,
84 	.list = LIST_HEAD_INITIALIZER(list),
85 	.type = BPF_ETH_TX,
86 };
87 
88 /*
89  * Marks given callback as used by datapath.
90  */
91 static __rte_always_inline void
92 bpf_eth_cbi_inuse(struct bpf_eth_cbi *cbi)
93 {
94 	cbi->use++;
95 	/* make sure no store/load reordering could happen */
96 	rte_smp_mb();
97 }
98 
99 /*
100  * Marks given callback list as not used by datapath.
101  */
102 static __rte_always_inline void
103 bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
104 {
105 	/* make sure all previous loads are completed */
106 	rte_smp_rmb();
107 	cbi->use++;
108 }
109 
110 /*
111  * Waits till datapath finished using given callback.
112  */
113 static void
114 bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
115 {
116 	uint32_t nuse, puse;
117 
118 	/* make sure all previous loads and stores are completed */
119 	rte_smp_mb();
120 
121 	puse = cbi->use;
122 
123 	/* in use, busy wait till current RX/TX iteration is finished */
124 	if ((puse & BPF_ETH_CBI_INUSE) != 0) {
125 		do {
126 			rte_pause();
127 			rte_compiler_barrier();
128 			nuse = cbi->use;
129 		} while (nuse == puse);
130 	}
131 }
132 
133 static void
134 bpf_eth_cbi_cleanup(struct bpf_eth_cbi *bc)
135 {
136 	bc->bpf = NULL;
137 	memset(&bc->jit, 0, sizeof(bc->jit));
138 }
139 
140 static struct bpf_eth_cbi *
141 bpf_eth_cbh_find(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
142 {
143 	struct bpf_eth_cbi *cbi;
144 
145 	LIST_FOREACH(cbi, &cbh->list, link) {
146 		if (cbi->port == port && cbi->queue == queue)
147 			break;
148 	}
149 	return cbi;
150 }
151 
152 static struct bpf_eth_cbi *
153 bpf_eth_cbh_add(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
154 {
155 	struct bpf_eth_cbi *cbi;
156 
157 	/* return an existing one */
158 	cbi = bpf_eth_cbh_find(cbh, port, queue);
159 	if (cbi != NULL)
160 		return cbi;
161 
162 	cbi = rte_zmalloc(NULL, sizeof(*cbi), RTE_CACHE_LINE_SIZE);
163 	if (cbi != NULL) {
164 		cbi->port = port;
165 		cbi->queue = queue;
166 		LIST_INSERT_HEAD(&cbh->list, cbi, link);
167 	}
168 	return cbi;
169 }
170 
171 /*
172  * BPF packet processing routinies.
173  */
174 
175 static inline uint32_t
176 apply_filter(struct rte_mbuf *mb[], const uint64_t rc[], uint32_t num,
177 	uint32_t drop)
178 {
179 	uint32_t i, j, k;
180 	struct rte_mbuf *dr[num];
181 
182 	for (i = 0, j = 0, k = 0; i != num; i++) {
183 
184 		/* filter matches */
185 		if (rc[i] != 0)
186 			mb[j++] = mb[i];
187 		/* no match */
188 		else
189 			dr[k++] = mb[i];
190 	}
191 
192 	if (drop != 0) {
193 		/* free filtered out mbufs */
194 		for (i = 0; i != k; i++)
195 			rte_pktmbuf_free(dr[i]);
196 	} else {
197 		/* copy filtered out mbufs beyond good ones */
198 		for (i = 0; i != k; i++)
199 			mb[j + i] = dr[i];
200 	}
201 
202 	return j;
203 }
204 
205 static inline uint32_t
206 pkt_filter_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num,
207 	uint32_t drop)
208 {
209 	uint32_t i;
210 	void *dp[num];
211 	uint64_t rc[num];
212 
213 	for (i = 0; i != num; i++)
214 		dp[i] = rte_pktmbuf_mtod(mb[i], void *);
215 
216 	rte_bpf_exec_burst(bpf, dp, rc, num);
217 	return apply_filter(mb, rc, num, drop);
218 }
219 
220 static inline uint32_t
221 pkt_filter_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[],
222 	uint32_t num, uint32_t drop)
223 {
224 	uint32_t i, n;
225 	void *dp;
226 	uint64_t rc[num];
227 
228 	n = 0;
229 	for (i = 0; i != num; i++) {
230 		dp = rte_pktmbuf_mtod(mb[i], void *);
231 		rc[i] = jit->func(dp);
232 		n += (rc[i] == 0);
233 	}
234 
235 	if (n != 0)
236 		num = apply_filter(mb, rc, num, drop);
237 
238 	return num;
239 }
240 
241 static inline uint32_t
242 pkt_filter_mb_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num,
243 	uint32_t drop)
244 {
245 	uint64_t rc[num];
246 
247 	rte_bpf_exec_burst(bpf, (void **)mb, rc, num);
248 	return apply_filter(mb, rc, num, drop);
249 }
250 
251 static inline uint32_t
252 pkt_filter_mb_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[],
253 	uint32_t num, uint32_t drop)
254 {
255 	uint32_t i, n;
256 	uint64_t rc[num];
257 
258 	n = 0;
259 	for (i = 0; i != num; i++) {
260 		rc[i] = jit->func(mb[i]);
261 		n += (rc[i] == 0);
262 	}
263 
264 	if (n != 0)
265 		num = apply_filter(mb, rc, num, drop);
266 
267 	return num;
268 }
269 
270 /*
271  * RX/TX callbacks for raw data bpf.
272  */
273 
274 static uint16_t
275 bpf_rx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
276 	struct rte_mbuf *pkt[], uint16_t nb_pkts,
277 	__rte_unused uint16_t max_pkts, void *user_param)
278 {
279 	struct bpf_eth_cbi *cbi;
280 	uint16_t rc;
281 
282 	cbi = user_param;
283 
284 	bpf_eth_cbi_inuse(cbi);
285 	rc = (cbi->cb != NULL) ?
286 		pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 1) :
287 		nb_pkts;
288 	bpf_eth_cbi_unuse(cbi);
289 	return rc;
290 }
291 
292 static uint16_t
293 bpf_rx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
294 	struct rte_mbuf *pkt[], uint16_t nb_pkts,
295 	__rte_unused uint16_t max_pkts, void *user_param)
296 {
297 	struct bpf_eth_cbi *cbi;
298 	uint16_t rc;
299 
300 	cbi = user_param;
301 	bpf_eth_cbi_inuse(cbi);
302 	rc = (cbi->cb != NULL) ?
303 		pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 1) :
304 		nb_pkts;
305 	bpf_eth_cbi_unuse(cbi);
306 	return rc;
307 }
308 
309 static uint16_t
310 bpf_tx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
311 	struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
312 {
313 	struct bpf_eth_cbi *cbi;
314 	uint16_t rc;
315 
316 	cbi = user_param;
317 	bpf_eth_cbi_inuse(cbi);
318 	rc = (cbi->cb != NULL) ?
319 		pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 0) :
320 		nb_pkts;
321 	bpf_eth_cbi_unuse(cbi);
322 	return rc;
323 }
324 
325 static uint16_t
326 bpf_tx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
327 	struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
328 {
329 	struct bpf_eth_cbi *cbi;
330 	uint16_t rc;
331 
332 	cbi = user_param;
333 	bpf_eth_cbi_inuse(cbi);
334 	rc = (cbi->cb != NULL) ?
335 		pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 0) :
336 		nb_pkts;
337 	bpf_eth_cbi_unuse(cbi);
338 	return rc;
339 }
340 
341 /*
342  * RX/TX callbacks for mbuf.
343  */
344 
345 static uint16_t
346 bpf_rx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
347 	struct rte_mbuf *pkt[], uint16_t nb_pkts,
348 	__rte_unused uint16_t max_pkts, void *user_param)
349 {
350 	struct bpf_eth_cbi *cbi;
351 	uint16_t rc;
352 
353 	cbi = user_param;
354 	bpf_eth_cbi_inuse(cbi);
355 	rc = (cbi->cb != NULL) ?
356 		pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 1) :
357 		nb_pkts;
358 	bpf_eth_cbi_unuse(cbi);
359 	return rc;
360 }
361 
362 static uint16_t
363 bpf_rx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
364 	struct rte_mbuf *pkt[], uint16_t nb_pkts,
365 	__rte_unused uint16_t max_pkts, void *user_param)
366 {
367 	struct bpf_eth_cbi *cbi;
368 	uint16_t rc;
369 
370 	cbi = user_param;
371 	bpf_eth_cbi_inuse(cbi);
372 	rc = (cbi->cb != NULL) ?
373 		pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 1) :
374 		nb_pkts;
375 	bpf_eth_cbi_unuse(cbi);
376 	return rc;
377 }
378 
379 static uint16_t
380 bpf_tx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
381 	struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
382 {
383 	struct bpf_eth_cbi *cbi;
384 	uint16_t rc;
385 
386 	cbi = user_param;
387 	bpf_eth_cbi_inuse(cbi);
388 	rc = (cbi->cb != NULL) ?
389 		pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 0) :
390 		nb_pkts;
391 	bpf_eth_cbi_unuse(cbi);
392 	return rc;
393 }
394 
395 static uint16_t
396 bpf_tx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
397 	struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
398 {
399 	struct bpf_eth_cbi *cbi;
400 	uint16_t rc;
401 
402 	cbi = user_param;
403 	bpf_eth_cbi_inuse(cbi);
404 	rc = (cbi->cb != NULL) ?
405 		pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 0) :
406 		nb_pkts;
407 	bpf_eth_cbi_unuse(cbi);
408 	return rc;
409 }
410 
411 static rte_rx_callback_fn
412 select_rx_callback(enum rte_bpf_arg_type type, uint32_t flags)
413 {
414 	if (flags & RTE_BPF_ETH_F_JIT) {
415 		if (type == RTE_BPF_ARG_PTR)
416 			return bpf_rx_callback_jit;
417 		else if (type == RTE_BPF_ARG_PTR_MBUF)
418 			return bpf_rx_callback_mb_jit;
419 	} else if (type == RTE_BPF_ARG_PTR)
420 		return bpf_rx_callback_vm;
421 	else if (type == RTE_BPF_ARG_PTR_MBUF)
422 		return bpf_rx_callback_mb_vm;
423 
424 	return NULL;
425 }
426 
427 static rte_tx_callback_fn
428 select_tx_callback(enum rte_bpf_arg_type type, uint32_t flags)
429 {
430 	if (flags & RTE_BPF_ETH_F_JIT) {
431 		if (type == RTE_BPF_ARG_PTR)
432 			return bpf_tx_callback_jit;
433 		else if (type == RTE_BPF_ARG_PTR_MBUF)
434 			return bpf_tx_callback_mb_jit;
435 	} else if (type == RTE_BPF_ARG_PTR)
436 		return bpf_tx_callback_vm;
437 	else if (type == RTE_BPF_ARG_PTR_MBUF)
438 		return bpf_tx_callback_mb_vm;
439 
440 	return NULL;
441 }
442 
443 /*
444  * helper function to perform BPF unload for given port/queue.
445  * have to introduce extra complexity (and possible slowdown) here,
446  * as right now there is no safe generic way to remove RX/TX callback
447  * while IO is active.
448  * Still don't free memory allocated for callback handle itself,
449  * again right now there is no safe way to do that without stopping RX/TX
450  * on given port/queue first.
451  */
452 static void
453 bpf_eth_cbi_unload(struct bpf_eth_cbi *bc)
454 {
455 	/* mark this cbi as empty */
456 	bc->cb = NULL;
457 	rte_smp_mb();
458 
459 	/* make sure datapath doesn't use bpf anymore, then destroy bpf */
460 	bpf_eth_cbi_wait(bc);
461 	rte_bpf_destroy(bc->bpf);
462 	bpf_eth_cbi_cleanup(bc);
463 }
464 
465 static void
466 bpf_eth_unload(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
467 {
468 	struct bpf_eth_cbi *bc;
469 
470 	bc = bpf_eth_cbh_find(cbh, port, queue);
471 	if (bc == NULL || bc->cb == NULL)
472 		return;
473 
474 	if (cbh->type == BPF_ETH_RX)
475 		rte_eth_remove_rx_callback(port, queue, bc->cb);
476 	else
477 		rte_eth_remove_tx_callback(port, queue, bc->cb);
478 
479 	bpf_eth_cbi_unload(bc);
480 }
481 
482 
483 void
484 rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue)
485 {
486 	struct bpf_eth_cbh *cbh;
487 
488 	cbh = &rx_cbh;
489 	rte_spinlock_lock(&cbh->lock);
490 	bpf_eth_unload(cbh, port, queue);
491 	rte_spinlock_unlock(&cbh->lock);
492 }
493 
494 void
495 rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue)
496 {
497 	struct bpf_eth_cbh *cbh;
498 
499 	cbh = &tx_cbh;
500 	rte_spinlock_lock(&cbh->lock);
501 	bpf_eth_unload(cbh, port, queue);
502 	rte_spinlock_unlock(&cbh->lock);
503 }
504 
505 static int
506 bpf_eth_elf_load(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue,
507 	const struct rte_bpf_prm *prm, const char *fname, const char *sname,
508 	uint32_t flags)
509 {
510 	int32_t rc;
511 	struct bpf_eth_cbi *bc;
512 	struct rte_bpf *bpf;
513 	rte_rx_callback_fn frx;
514 	rte_tx_callback_fn ftx;
515 	struct rte_bpf_jit jit;
516 
517 	frx = NULL;
518 	ftx = NULL;
519 
520 	if (prm == NULL || rte_eth_dev_is_valid_port(port) == 0 ||
521 			queue >= RTE_MAX_QUEUES_PER_PORT)
522 		return -EINVAL;
523 
524 	if (cbh->type == BPF_ETH_RX)
525 		frx = select_rx_callback(prm->prog_arg.type, flags);
526 	else
527 		ftx = select_tx_callback(prm->prog_arg.type, flags);
528 
529 	if (frx == NULL && ftx == NULL) {
530 		RTE_BPF_LOG(ERR, "%s(%u, %u): no callback selected;\n",
531 			__func__, port, queue);
532 		return -EINVAL;
533 	}
534 
535 	bpf = rte_bpf_elf_load(prm, fname, sname);
536 	if (bpf == NULL)
537 		return -rte_errno;
538 
539 	rte_bpf_get_jit(bpf, &jit);
540 
541 	if ((flags & RTE_BPF_ETH_F_JIT) != 0 && jit.func == NULL) {
542 		RTE_BPF_LOG(ERR, "%s(%u, %u): no JIT generated;\n",
543 			__func__, port, queue);
544 		rte_bpf_destroy(bpf);
545 		return -ENOTSUP;
546 	}
547 
548 	/* setup/update global callback info */
549 	bc = bpf_eth_cbh_add(cbh, port, queue);
550 	if (bc == NULL)
551 		return -ENOMEM;
552 
553 	/* remove old one, if any */
554 	if (bc->cb != NULL)
555 		bpf_eth_unload(cbh, port, queue);
556 
557 	bc->bpf = bpf;
558 	bc->jit = jit;
559 
560 	if (cbh->type == BPF_ETH_RX)
561 		bc->cb = rte_eth_add_rx_callback(port, queue, frx, bc);
562 	else
563 		bc->cb = rte_eth_add_tx_callback(port, queue, ftx, bc);
564 
565 	if (bc->cb == NULL) {
566 		rc = -rte_errno;
567 		rte_bpf_destroy(bpf);
568 		bpf_eth_cbi_cleanup(bc);
569 	} else
570 		rc = 0;
571 
572 	return rc;
573 }
574 
575 int
576 rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue,
577 	const struct rte_bpf_prm *prm, const char *fname, const char *sname,
578 	uint32_t flags)
579 {
580 	int32_t rc;
581 	struct bpf_eth_cbh *cbh;
582 
583 	cbh = &rx_cbh;
584 	rte_spinlock_lock(&cbh->lock);
585 	rc = bpf_eth_elf_load(cbh, port, queue, prm, fname, sname, flags);
586 	rte_spinlock_unlock(&cbh->lock);
587 
588 	return rc;
589 }
590 
591 int
592 rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue,
593 	const struct rte_bpf_prm *prm, const char *fname, const char *sname,
594 	uint32_t flags)
595 {
596 	int32_t rc;
597 	struct bpf_eth_cbh *cbh;
598 
599 	cbh = &tx_cbh;
600 	rte_spinlock_lock(&cbh->lock);
601 	rc = bpf_eth_elf_load(cbh, port, queue, prm, fname, sname, flags);
602 	rte_spinlock_unlock(&cbh->lock);
603 
604 	return rc;
605 }
606