xref: /dpdk/lib/mbuf/rte_mbuf_dyn.c (revision 02d36ef6a9528e0f4a3403956e66bcea5fadbf8c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 6WIND S.A.
3  */
4 
5 #include <sys/queue.h>
6 #include <stdint.h>
7 #include <limits.h>
8 
9 #include <rte_common.h>
10 #include <rte_eal.h>
11 #include <rte_eal_memconfig.h>
12 #include <rte_tailq.h>
13 #include <rte_errno.h>
14 #include <rte_malloc.h>
15 #include <rte_string_fns.h>
16 #include <rte_bitops.h>
17 #include <rte_mbuf.h>
18 #include <rte_mbuf_dyn.h>
19 
20 #define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
21 
22 struct mbuf_dynfield_elt {
23 	struct rte_mbuf_dynfield params;
24 	size_t offset;
25 };
26 TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
27 
28 static struct rte_tailq_elem mbuf_dynfield_tailq = {
29 	.name = "RTE_MBUF_DYNFIELD",
30 };
31 EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
32 
33 struct mbuf_dynflag_elt {
34 	struct rte_mbuf_dynflag params;
35 	unsigned int bitnum;
36 };
37 TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
38 
39 static struct rte_tailq_elem mbuf_dynflag_tailq = {
40 	.name = "RTE_MBUF_DYNFLAG",
41 };
42 EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
43 
44 struct mbuf_dyn_shm {
45 	/**
46 	 * For each mbuf byte, free_space[i] != 0 if space is free.
47 	 * The value is the size of the biggest aligned element that
48 	 * can fit in the zone.
49 	 */
50 	uint8_t free_space[sizeof(struct rte_mbuf)];
51 	/** Bitfield of available flags. */
52 	uint64_t free_flags;
53 };
54 static struct mbuf_dyn_shm *shm;
55 
56 /* Set the value of free_space[] according to the size and alignment of
57  * the free areas. This helps to select the best place when reserving a
58  * dynamic field. Assume tailq is locked.
59  */
60 static void
61 process_score(void)
62 {
63 	size_t off, align, size, i;
64 
65 	/* first, erase previous info */
66 	for (i = 0; i < sizeof(struct rte_mbuf); i++) {
67 		if (shm->free_space[i])
68 			shm->free_space[i] = 1;
69 	}
70 
71 	off = 0;
72 	while (off < sizeof(struct rte_mbuf)) {
73 		/* get the size of the free zone */
74 		for (size = 0; (off + size) < sizeof(struct rte_mbuf) &&
75 			     shm->free_space[off + size]; size++)
76 			;
77 		if (size == 0) {
78 			off++;
79 			continue;
80 		}
81 
82 		/* get the alignment of biggest object that can fit in
83 		 * the zone at this offset.
84 		 */
85 		for (align = 1;
86 		     (off % (align << 1)) == 0 && (align << 1) <= size;
87 		     align <<= 1)
88 			;
89 
90 		/* save it in free_space[] */
91 		for (i = off; i < off + align; i++)
92 			shm->free_space[i] = RTE_MAX(align, shm->free_space[i]);
93 
94 		off += align;
95 	}
96 }
97 
98 /* Mark the area occupied by a mbuf field as available in the shm. */
99 #define mark_free(field)						\
100 	memset(&shm->free_space[offsetof(struct rte_mbuf, field)],	\
101 		1, sizeof(((struct rte_mbuf *)0)->field))
102 
103 /* Allocate and initialize the shared memory. Assume tailq is locked */
104 static int
105 init_shared_mem(void)
106 {
107 	const struct rte_memzone *mz;
108 	uint64_t mask;
109 
110 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
111 		mz = rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
112 						sizeof(struct mbuf_dyn_shm),
113 						SOCKET_ID_ANY, 0,
114 						RTE_CACHE_LINE_SIZE);
115 	} else {
116 		mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
117 	}
118 	if (mz == NULL) {
119 		RTE_LOG(ERR, MBUF, "Failed to get mbuf dyn shared memory\n");
120 		return -1;
121 	}
122 
123 	shm = mz->addr;
124 
125 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
126 		/* init free_space, keep it sync'd with
127 		 * rte_mbuf_dynfield_copy().
128 		 */
129 		memset(shm, 0, sizeof(*shm));
130 		mark_free(dynfield1);
131 #if !RTE_IOVA_AS_PA
132 		mark_free(dynfield2);
133 #endif
134 
135 		/* init free_flags */
136 		for (mask = RTE_MBUF_F_FIRST_FREE; mask <= RTE_MBUF_F_LAST_FREE; mask <<= 1)
137 			shm->free_flags |= mask;
138 
139 		process_score();
140 	}
141 
142 	return 0;
143 }
144 
145 /* check if this offset can be used */
146 static int
147 check_offset(size_t offset, size_t size, size_t align)
148 {
149 	size_t i;
150 
151 	if ((offset & (align - 1)) != 0)
152 		return -1;
153 	if (offset + size > sizeof(struct rte_mbuf))
154 		return -1;
155 
156 	for (i = 0; i < size; i++) {
157 		if (!shm->free_space[i + offset])
158 			return -1;
159 	}
160 
161 	return 0;
162 }
163 
164 /* assume tailq is locked */
165 static struct mbuf_dynfield_elt *
166 __mbuf_dynfield_lookup(const char *name)
167 {
168 	struct mbuf_dynfield_list *mbuf_dynfield_list;
169 	struct mbuf_dynfield_elt *mbuf_dynfield;
170 	struct rte_tailq_entry *te;
171 
172 	mbuf_dynfield_list = RTE_TAILQ_CAST(
173 		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
174 
175 	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
176 		mbuf_dynfield = (struct mbuf_dynfield_elt *)te->data;
177 		if (strcmp(name, mbuf_dynfield->params.name) == 0)
178 			break;
179 	}
180 
181 	if (te == NULL || mbuf_dynfield == NULL) {
182 		rte_errno = ENOENT;
183 		return NULL;
184 	}
185 
186 	return mbuf_dynfield;
187 }
188 
189 int
190 rte_mbuf_dynfield_lookup(const char *name, struct rte_mbuf_dynfield *params)
191 {
192 	struct mbuf_dynfield_elt *mbuf_dynfield;
193 
194 	rte_mcfg_tailq_read_lock();
195 	if (shm == NULL && init_shared_mem() < 0)
196 		mbuf_dynfield = NULL;
197 	else
198 		mbuf_dynfield = __mbuf_dynfield_lookup(name);
199 	rte_mcfg_tailq_read_unlock();
200 
201 	if (mbuf_dynfield == NULL)
202 		return -1;
203 
204 	if (params != NULL)
205 		memcpy(params, &mbuf_dynfield->params, sizeof(*params));
206 
207 	return mbuf_dynfield->offset;
208 }
209 
210 static int mbuf_dynfield_cmp(const struct rte_mbuf_dynfield *params1,
211 		const struct rte_mbuf_dynfield *params2)
212 {
213 	if (strcmp(params1->name, params2->name))
214 		return -1;
215 	if (params1->size != params2->size)
216 		return -1;
217 	if (params1->align != params2->align)
218 		return -1;
219 	if (params1->flags != params2->flags)
220 		return -1;
221 	return 0;
222 }
223 
224 /* assume tailq is locked */
225 static int
226 __rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
227 				size_t req)
228 {
229 	struct mbuf_dynfield_list *mbuf_dynfield_list;
230 	struct mbuf_dynfield_elt *mbuf_dynfield = NULL;
231 	struct rte_tailq_entry *te = NULL;
232 	unsigned int best_zone = UINT_MAX;
233 	size_t i, offset;
234 	int ret;
235 
236 	if (shm == NULL && init_shared_mem() < 0)
237 		return -1;
238 
239 	mbuf_dynfield = __mbuf_dynfield_lookup(params->name);
240 	if (mbuf_dynfield != NULL) {
241 		if (req != SIZE_MAX && req != mbuf_dynfield->offset) {
242 			rte_errno = EEXIST;
243 			return -1;
244 		}
245 		if (mbuf_dynfield_cmp(params, &mbuf_dynfield->params) < 0) {
246 			rte_errno = EEXIST;
247 			return -1;
248 		}
249 		return mbuf_dynfield->offset;
250 	}
251 
252 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
253 		rte_errno = EPERM;
254 		return -1;
255 	}
256 
257 	if (req == SIZE_MAX) {
258 		/* Find the best place to put this field: we search the
259 		 * lowest value of shm->free_space[offset]: the zones
260 		 * containing room for larger fields are kept for later.
261 		 */
262 		for (offset = 0;
263 		     offset < sizeof(struct rte_mbuf);
264 		     offset++) {
265 			if (check_offset(offset, params->size,
266 						params->align) == 0 &&
267 					shm->free_space[offset] < best_zone) {
268 				best_zone = shm->free_space[offset];
269 				req = offset;
270 			}
271 		}
272 		if (req == SIZE_MAX) {
273 			rte_errno = ENOENT;
274 			return -1;
275 		}
276 	} else {
277 		if (check_offset(req, params->size, params->align) < 0) {
278 			rte_errno = EBUSY;
279 			return -1;
280 		}
281 	}
282 
283 	offset = req;
284 	mbuf_dynfield_list = RTE_TAILQ_CAST(
285 		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
286 
287 	te = rte_zmalloc("MBUF_DYNFIELD_TAILQ_ENTRY", sizeof(*te), 0);
288 	if (te == NULL) {
289 		rte_errno = ENOMEM;
290 		return -1;
291 	}
292 
293 	mbuf_dynfield = rte_zmalloc("mbuf_dynfield", sizeof(*mbuf_dynfield), 0);
294 	if (mbuf_dynfield == NULL) {
295 		rte_free(te);
296 		rte_errno = ENOMEM;
297 		return -1;
298 	}
299 
300 	ret = strlcpy(mbuf_dynfield->params.name, params->name,
301 		sizeof(mbuf_dynfield->params.name));
302 	if (ret < 0 || ret >= (int)sizeof(mbuf_dynfield->params.name)) {
303 		rte_errno = ENAMETOOLONG;
304 		rte_free(mbuf_dynfield);
305 		rte_free(te);
306 		return -1;
307 	}
308 	memcpy(&mbuf_dynfield->params, params, sizeof(mbuf_dynfield->params));
309 	mbuf_dynfield->offset = offset;
310 	te->data = mbuf_dynfield;
311 
312 	TAILQ_INSERT_TAIL(mbuf_dynfield_list, te, next);
313 
314 	for (i = offset; i < offset + params->size; i++)
315 		shm->free_space[i] = 0;
316 	process_score();
317 
318 	RTE_LOG(DEBUG, MBUF, "Registered dynamic field %s (sz=%zu, al=%zu, fl=0x%x) -> %zd\n",
319 		params->name, params->size, params->align, params->flags,
320 		offset);
321 
322 	return offset;
323 }
324 
325 int
326 rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
327 				size_t req)
328 {
329 	int ret;
330 
331 	if (params->size >= sizeof(struct rte_mbuf)) {
332 		rte_errno = EINVAL;
333 		return -1;
334 	}
335 	if (!rte_is_power_of_2(params->align)) {
336 		rte_errno = EINVAL;
337 		return -1;
338 	}
339 	if (params->flags != 0) {
340 		rte_errno = EINVAL;
341 		return -1;
342 	}
343 
344 	rte_mcfg_tailq_write_lock();
345 	ret = __rte_mbuf_dynfield_register_offset(params, req);
346 	rte_mcfg_tailq_write_unlock();
347 
348 	return ret;
349 }
350 
351 int
352 rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params)
353 {
354 	return rte_mbuf_dynfield_register_offset(params, SIZE_MAX);
355 }
356 
357 /* assume tailq is locked */
358 static struct mbuf_dynflag_elt *
359 __mbuf_dynflag_lookup(const char *name)
360 {
361 	struct mbuf_dynflag_list *mbuf_dynflag_list;
362 	struct mbuf_dynflag_elt *mbuf_dynflag;
363 	struct rte_tailq_entry *te;
364 
365 	mbuf_dynflag_list = RTE_TAILQ_CAST(
366 		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
367 
368 	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
369 		mbuf_dynflag = (struct mbuf_dynflag_elt *)te->data;
370 		if (strncmp(name, mbuf_dynflag->params.name,
371 				RTE_MBUF_DYN_NAMESIZE) == 0)
372 			break;
373 	}
374 
375 	if (te == NULL) {
376 		rte_errno = ENOENT;
377 		return NULL;
378 	}
379 
380 	return mbuf_dynflag;
381 }
382 
383 int
384 rte_mbuf_dynflag_lookup(const char *name,
385 			struct rte_mbuf_dynflag *params)
386 {
387 	struct mbuf_dynflag_elt *mbuf_dynflag;
388 
389 	rte_mcfg_tailq_read_lock();
390 	if (shm == NULL && init_shared_mem() < 0)
391 		mbuf_dynflag = NULL;
392 	else
393 		mbuf_dynflag = __mbuf_dynflag_lookup(name);
394 	rte_mcfg_tailq_read_unlock();
395 
396 	if (mbuf_dynflag == NULL)
397 		return -1;
398 
399 	if (params != NULL)
400 		memcpy(params, &mbuf_dynflag->params, sizeof(*params));
401 
402 	return mbuf_dynflag->bitnum;
403 }
404 
405 static int mbuf_dynflag_cmp(const struct rte_mbuf_dynflag *params1,
406 		const struct rte_mbuf_dynflag *params2)
407 {
408 	if (strcmp(params1->name, params2->name))
409 		return -1;
410 	if (params1->flags != params2->flags)
411 		return -1;
412 	return 0;
413 }
414 
415 /* assume tailq is locked */
416 static int
417 __rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
418 				unsigned int req)
419 {
420 	struct mbuf_dynflag_list *mbuf_dynflag_list;
421 	struct mbuf_dynflag_elt *mbuf_dynflag = NULL;
422 	struct rte_tailq_entry *te = NULL;
423 	unsigned int bitnum;
424 	int ret;
425 
426 	if (shm == NULL && init_shared_mem() < 0)
427 		return -1;
428 
429 	mbuf_dynflag = __mbuf_dynflag_lookup(params->name);
430 	if (mbuf_dynflag != NULL) {
431 		if (req != UINT_MAX && req != mbuf_dynflag->bitnum) {
432 			rte_errno = EEXIST;
433 			return -1;
434 		}
435 		if (mbuf_dynflag_cmp(params, &mbuf_dynflag->params) < 0) {
436 			rte_errno = EEXIST;
437 			return -1;
438 		}
439 		return mbuf_dynflag->bitnum;
440 	}
441 
442 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
443 		rte_errno = EPERM;
444 		return -1;
445 	}
446 
447 	if (req == UINT_MAX) {
448 		if (shm->free_flags == 0) {
449 			rte_errno = ENOENT;
450 			return -1;
451 		}
452 		bitnum = rte_bsf64(shm->free_flags);
453 	} else {
454 		if ((shm->free_flags & (1ULL << req)) == 0) {
455 			rte_errno = EBUSY;
456 			return -1;
457 		}
458 		bitnum = req;
459 	}
460 
461 	mbuf_dynflag_list = RTE_TAILQ_CAST(
462 		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
463 
464 	te = rte_zmalloc("MBUF_DYNFLAG_TAILQ_ENTRY", sizeof(*te), 0);
465 	if (te == NULL) {
466 		rte_errno = ENOMEM;
467 		return -1;
468 	}
469 
470 	mbuf_dynflag = rte_zmalloc("mbuf_dynflag", sizeof(*mbuf_dynflag), 0);
471 	if (mbuf_dynflag == NULL) {
472 		rte_free(te);
473 		rte_errno = ENOMEM;
474 		return -1;
475 	}
476 
477 	ret = strlcpy(mbuf_dynflag->params.name, params->name,
478 		sizeof(mbuf_dynflag->params.name));
479 	if (ret < 0 || ret >= (int)sizeof(mbuf_dynflag->params.name)) {
480 		rte_free(mbuf_dynflag);
481 		rte_free(te);
482 		rte_errno = ENAMETOOLONG;
483 		return -1;
484 	}
485 	mbuf_dynflag->bitnum = bitnum;
486 	te->data = mbuf_dynflag;
487 
488 	TAILQ_INSERT_TAIL(mbuf_dynflag_list, te, next);
489 
490 	shm->free_flags &= ~(1ULL << bitnum);
491 
492 	RTE_LOG(DEBUG, MBUF, "Registered dynamic flag %s (fl=0x%x) -> %u\n",
493 		params->name, params->flags, bitnum);
494 
495 	return bitnum;
496 }
497 
498 int
499 rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
500 				unsigned int req)
501 {
502 	int ret;
503 
504 	if (params->flags != 0) {
505 		rte_errno = EINVAL;
506 		return -1;
507 	}
508 	if (req >= RTE_SIZEOF_FIELD(struct rte_mbuf, ol_flags) * CHAR_BIT &&
509 			req != UINT_MAX) {
510 		rte_errno = EINVAL;
511 		return -1;
512 	}
513 
514 	rte_mcfg_tailq_write_lock();
515 	ret = __rte_mbuf_dynflag_register_bitnum(params, req);
516 	rte_mcfg_tailq_write_unlock();
517 
518 	return ret;
519 }
520 
521 int
522 rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params)
523 {
524 	return rte_mbuf_dynflag_register_bitnum(params, UINT_MAX);
525 }
526 
527 void rte_mbuf_dyn_dump(FILE *out)
528 {
529 	struct mbuf_dynfield_list *mbuf_dynfield_list;
530 	struct mbuf_dynfield_elt *dynfield;
531 	struct mbuf_dynflag_list *mbuf_dynflag_list;
532 	struct mbuf_dynflag_elt *dynflag;
533 	struct rte_tailq_entry *te;
534 	size_t i;
535 
536 	rte_mcfg_tailq_write_lock();
537 	if (shm == NULL && init_shared_mem() < 0) {
538 		rte_mcfg_tailq_write_unlock();
539 		return;
540 	}
541 
542 	fprintf(out, "Reserved fields:\n");
543 	mbuf_dynfield_list = RTE_TAILQ_CAST(
544 		mbuf_dynfield_tailq.head, mbuf_dynfield_list);
545 	TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
546 		dynfield = (struct mbuf_dynfield_elt *)te->data;
547 		fprintf(out, "  name=%s offset=%zd size=%zd align=%zd flags=%x\n",
548 			dynfield->params.name, dynfield->offset,
549 			dynfield->params.size, dynfield->params.align,
550 			dynfield->params.flags);
551 	}
552 	fprintf(out, "Reserved flags:\n");
553 	mbuf_dynflag_list = RTE_TAILQ_CAST(
554 		mbuf_dynflag_tailq.head, mbuf_dynflag_list);
555 	TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
556 		dynflag = (struct mbuf_dynflag_elt *)te->data;
557 		fprintf(out, "  name=%s bitnum=%u flags=%x\n",
558 			dynflag->params.name, dynflag->bitnum,
559 			dynflag->params.flags);
560 	}
561 	fprintf(out, "Free space in mbuf (0 = occupied, value = free zone alignment):\n");
562 	for (i = 0; i < sizeof(struct rte_mbuf); i++) {
563 		if ((i % 8) == 0)
564 			fprintf(out, "  %4.4zx: ", i);
565 		fprintf(out, "%2.2x%s", shm->free_space[i],
566 			(i % 8 != 7) ? " " : "\n");
567 	}
568 	fprintf(out, "Free bit in mbuf->ol_flags (0 = occupied, 1 = free):\n");
569 	for (i = 0; i < sizeof(uint64_t) * CHAR_BIT; i++) {
570 		if ((i % 8) == 0)
571 			fprintf(out, "  %4.4zx: ", i);
572 		fprintf(out, "%1.1x%s", (shm->free_flags & (1ULL << i)) ? 1 : 0,
573 			(i % 8 != 7) ? " " : "\n");
574 	}
575 
576 	rte_mcfg_tailq_write_unlock();
577 }
578 
579 static int
580 rte_mbuf_dyn_timestamp_register(int *field_offset, uint64_t *flag,
581 		const char *direction, const char *flag_name)
582 {
583 	static const struct rte_mbuf_dynfield field_desc = {
584 		.name = RTE_MBUF_DYNFIELD_TIMESTAMP_NAME,
585 		.size = sizeof(rte_mbuf_timestamp_t),
586 		.align = __alignof__(rte_mbuf_timestamp_t),
587 	};
588 	struct rte_mbuf_dynflag flag_desc = {};
589 	int offset;
590 
591 	offset = rte_mbuf_dynfield_register(&field_desc);
592 	if (offset < 0) {
593 		RTE_LOG(ERR, MBUF,
594 			"Failed to register mbuf field for timestamp\n");
595 		return -1;
596 	}
597 	if (field_offset != NULL)
598 		*field_offset = offset;
599 
600 	strlcpy(flag_desc.name, flag_name, sizeof(flag_desc.name));
601 	offset = rte_mbuf_dynflag_register(&flag_desc);
602 	if (offset < 0) {
603 		RTE_LOG(ERR, MBUF,
604 			"Failed to register mbuf flag for %s timestamp\n",
605 			direction);
606 		return -1;
607 	}
608 	if (flag != NULL)
609 		*flag = RTE_BIT64(offset);
610 
611 	return 0;
612 }
613 
614 int
615 rte_mbuf_dyn_rx_timestamp_register(int *field_offset, uint64_t *rx_flag)
616 {
617 	return rte_mbuf_dyn_timestamp_register(field_offset, rx_flag,
618 			"Rx", RTE_MBUF_DYNFLAG_RX_TIMESTAMP_NAME);
619 }
620 
621 int
622 rte_mbuf_dyn_tx_timestamp_register(int *field_offset, uint64_t *tx_flag)
623 {
624 	return rte_mbuf_dyn_timestamp_register(field_offset, tx_flag,
625 			"Tx", RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME);
626 }
627