xref: /dpdk/lib/table/rte_swx_table_learner.c (revision c56185fc183fc0532d2f03aaf04bbf0989ea91a5)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2020 Intel Corporation
3  */
4 #include <string.h>
5 #include <stdio.h>
6 #include <errno.h>
7 
8 #include <rte_common.h>
9 #include <rte_cycles.h>
10 #include <rte_prefetch.h>
11 #include <rte_jhash.h>
12 #include <rte_hash_crc.h>
13 
14 #include "rte_swx_keycmp.h"
15 #include "rte_swx_table_learner.h"
16 
17 #ifndef RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES
18 #define RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES 1
19 #endif
20 
21 #ifndef RTE_SWX_TABLE_SELECTOR_HUGE_PAGES_DISABLE
22 
23 #include <rte_malloc.h>
24 
25 static void *
26 env_calloc(size_t size, size_t alignment, int numa_node)
27 {
28 	return rte_zmalloc_socket(NULL, size, alignment, numa_node);
29 }
30 
31 static void
32 env_free(void *start, size_t size __rte_unused)
33 {
34 	rte_free(start);
35 }
36 
37 #else
38 
39 #include <numa.h>
40 
41 static void *
42 env_calloc(size_t size, size_t alignment __rte_unused, int numa_node)
43 {
44 	void *start;
45 
46 	if (numa_available() == -1)
47 		return NULL;
48 
49 	start = numa_alloc_onnode(size, numa_node);
50 	if (!start)
51 		return NULL;
52 
53 	memset(start, 0, size);
54 	return start;
55 }
56 
57 static void
58 env_free(void *start, size_t size)
59 {
60 	if ((numa_available() == -1) || !start)
61 		return;
62 
63 	numa_free(start, size);
64 }
65 
66 #endif
67 
68 static void
69 table_keycpy(void *dst, void *src, uint32_t n_bytes)
70 {
71 	memcpy(dst, src, n_bytes);
72 }
73 
74 #define TABLE_KEYS_PER_BUCKET 4
75 #define TABLE_KEYS_PER_BUCKET_LOG2 2
76 
77 #define TABLE_BUCKET_USEFUL_SIZE \
78 	(TABLE_KEYS_PER_BUCKET * (sizeof(uint32_t) + sizeof(uint32_t) + sizeof(uint8_t)))
79 
80 #define TABLE_BUCKET_PAD_SIZE \
81 	(RTE_CACHE_LINE_SIZE - TABLE_BUCKET_USEFUL_SIZE)
82 
83 struct table_bucket {
84 	uint32_t time[TABLE_KEYS_PER_BUCKET];
85 	uint32_t sig[TABLE_KEYS_PER_BUCKET];
86 	uint8_t key_timeout_id[TABLE_KEYS_PER_BUCKET];
87 	uint8_t pad[TABLE_BUCKET_PAD_SIZE];
88 	uint8_t key[];
89 };
90 
91 struct table_params {
92 	/* The real key size. Must be non-zero. */
93 	size_t key_size;
94 
95 	/* The key size upgrated to the next power of 2. */
96 	size_t key_size_pow2;
97 
98 	/* log2(key_size_pow2). Purpose: avoid multiplication with non-power-of-2 numbers. */
99 	size_t key_size_log2;
100 
101 	/* The key offset within the key buffer. */
102 	size_t key_offset;
103 
104 	/* The real action data size. */
105 	size_t action_data_size;
106 
107 	/* The data size, i.e. the 8-byte action_id field plus the action data size, upgraded to the
108 	 * next power of 2.
109 	 */
110 	size_t data_size_pow2;
111 
112 	/* log2(data_size_pow2). Purpose: avoid multiplication with non-power of 2 numbers. */
113 	size_t data_size_log2;
114 
115 	/* Number of buckets. Must be a power of 2 to avoid modulo with non-power-of-2 numbers. */
116 	size_t n_buckets;
117 
118 	/* Bucket mask. Purpose: replace modulo with bitmask and operation. */
119 	size_t bucket_mask;
120 
121 	/* Total number of key bytes in the bucket, including the key padding bytes. There are
122 	 * (key_size_pow2 - key_size) padding bytes for each key in the bucket.
123 	 */
124 	size_t bucket_key_all_size;
125 
126 	/* Bucket size. Must be a power of 2 to avoid multiplication with non-power-of-2 number. */
127 	size_t bucket_size;
128 
129 	/* log2(bucket_size). Purpose: avoid multiplication with non-power of 2 numbers. */
130 	size_t bucket_size_log2;
131 
132 	/* Hash function. */
133 	rte_swx_hash_func_t hash_func;
134 
135 	/* Key comparison function. */
136 	rte_swx_keycmp_func_t keycmp_func;
137 
138 	/* Set of all possible key timeout values measured in CPU clock cycles. */
139 	uint64_t key_timeout[RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX];
140 
141 	/* Number of key timeout values. */
142 	uint32_t n_key_timeouts;
143 
144 	/* Total memory size. */
145 	size_t total_size;
146 };
147 
148 struct table {
149 	/* Table parameters. */
150 	struct table_params params;
151 
152 	/* Table buckets. */
153 	uint8_t buckets[];
154 } __rte_cache_aligned;
155 
156 /* The timeout (in cycles) is stored in the table as a 32-bit value by truncating its least
157  * significant 32 bits. Therefore, to make sure the time is always advancing when adding the timeout
158  * value on top of the current time, the minimum timeout value is 1^32 cycles, which is 2 seconds on
159  * a 2 GHz CPU.
160  */
161 static uint64_t
162 timeout_convert(uint32_t timeout_in_seconds)
163 {
164 	uint64_t timeout_in_cycles = timeout_in_seconds * rte_get_tsc_hz();
165 
166 	if (!(timeout_in_cycles >> 32))
167 		timeout_in_cycles = 1LLU << 32;
168 
169 	return timeout_in_cycles;
170 }
171 
172 static int
173 table_params_get(struct table_params *p, struct rte_swx_table_learner_params *params)
174 {
175 	uint32_t i;
176 
177 	/* Check input parameters. */
178 	if (!params ||
179 	    !params->key_size ||
180 	    !params->n_keys_max ||
181 	    (params->n_keys_max > 1U << 31) ||
182 	    !params->key_timeout ||
183 	    !params->n_key_timeouts ||
184 	    (params->n_key_timeouts > RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX))
185 		return -EINVAL;
186 
187 	if (params->key_mask0) {
188 		for (i = 0; i < params->key_size; i++)
189 			if (params->key_mask0[i] != 0xFF)
190 				break;
191 
192 		if (i < params->key_size)
193 			return -EINVAL;
194 	}
195 
196 	for (i = 0; i < params->n_key_timeouts; i++)
197 		if (!params->key_timeout[i])
198 			return -EINVAL;
199 
200 	/* Key. */
201 	p->key_size = params->key_size;
202 
203 	p->key_size_pow2 = rte_align64pow2(p->key_size);
204 
205 	p->key_size_log2 = rte_ctz64(p->key_size_pow2);
206 
207 	p->key_offset = params->key_offset;
208 
209 	/* Data. */
210 	p->action_data_size = params->action_data_size;
211 
212 	p->data_size_pow2 = rte_align64pow2(sizeof(uint64_t) + p->action_data_size);
213 
214 	p->data_size_log2 = rte_ctz64(p->data_size_pow2);
215 
216 	/* Buckets. */
217 	p->n_buckets = rte_align32pow2(params->n_keys_max);
218 
219 	p->bucket_mask = p->n_buckets - 1;
220 
221 	p->bucket_key_all_size = TABLE_KEYS_PER_BUCKET * p->key_size_pow2;
222 
223 	p->bucket_size = rte_align64pow2(sizeof(struct table_bucket) +
224 					 p->bucket_key_all_size +
225 					 TABLE_KEYS_PER_BUCKET * p->data_size_pow2);
226 
227 	p->bucket_size_log2 = rte_ctz64(p->bucket_size);
228 
229 	p->hash_func = params->hash_func ? params->hash_func : rte_hash_crc;
230 
231 	p->keycmp_func = rte_swx_keycmp_func_get(params->key_size);
232 
233 	/* Timeout. */
234 	for (i = 0; i < params->n_key_timeouts; i++)
235 		p->key_timeout[i] = timeout_convert(params->key_timeout[i]);
236 
237 	p->n_key_timeouts = rte_align32pow2(params->n_key_timeouts);
238 
239 	for ( ; i < p->n_key_timeouts; i++)
240 		p->key_timeout[i] = p->key_timeout[0];
241 
242 	/* Total size. */
243 	p->total_size = sizeof(struct table) + p->n_buckets * p->bucket_size;
244 
245 	return 0;
246 }
247 
248 static inline struct table_bucket *
249 table_bucket_get(struct table *t, size_t bucket_id)
250 {
251 	return (struct table_bucket *)&t->buckets[bucket_id << t->params.bucket_size_log2];
252 }
253 
254 static inline uint8_t *
255 table_bucket_key_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos)
256 {
257 	return &b->key[bucket_key_pos << t->params.key_size_log2];
258 }
259 
260 static inline uint64_t *
261 table_bucket_data_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos)
262 {
263 	return (uint64_t *)&b->key[t->params.bucket_key_all_size +
264 				   (bucket_key_pos << t->params.data_size_log2)];
265 }
266 
267 static inline size_t
268 table_entry_id_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos)
269 {
270 	size_t bucket_id = ((uint8_t *)b - t->buckets) >> t->params.bucket_size_log2;
271 
272 	return (bucket_id << TABLE_KEYS_PER_BUCKET_LOG2) + bucket_key_pos;
273 }
274 
275 uint64_t
276 rte_swx_table_learner_footprint_get(struct rte_swx_table_learner_params *params)
277 {
278 	struct table_params p;
279 	int status;
280 
281 	status = table_params_get(&p, params);
282 
283 	return status ? 0 : p.total_size;
284 }
285 
286 void *
287 rte_swx_table_learner_create(struct rte_swx_table_learner_params *params, int numa_node)
288 {
289 	struct table_params p;
290 	struct table *t;
291 	int status;
292 
293 	/* Check and process the input parameters. */
294 	status = table_params_get(&p, params);
295 	if (status)
296 		return NULL;
297 
298 	/* Memory allocation. */
299 	t = env_calloc(p.total_size, RTE_CACHE_LINE_SIZE, numa_node);
300 	if (!t)
301 		return NULL;
302 
303 	/* Memory initialization. */
304 	memcpy(&t->params, &p, sizeof(struct table_params));
305 
306 	return t;
307 }
308 
309 void
310 rte_swx_table_learner_free(void *table)
311 {
312 	struct table *t = table;
313 
314 	if (!t)
315 		return;
316 
317 	env_free(t, t->params.total_size);
318 }
319 
320 int
321 rte_swx_table_learner_timeout_update(void *table,
322 				     uint32_t key_timeout_id,
323 				     uint32_t key_timeout)
324 {
325 	struct table *t = table;
326 
327 	if (!t ||
328 	    (key_timeout_id >= t->params.n_key_timeouts) ||
329 	    !key_timeout)
330 		return -EINVAL;
331 
332 	t->params.key_timeout[key_timeout_id] = timeout_convert(key_timeout);
333 
334 	return 0;
335 }
336 
337 struct mailbox {
338 	/* Writer: lookup state 0. Reader(s): lookup state 1, add(). */
339 	struct table_bucket *bucket;
340 
341 	/* Writer: lookup state 0. Reader(s): lookup state 1, add(). */
342 	uint32_t input_sig;
343 
344 	/* Writer: lookup state 0. Reader(s): lookup state 1, add(). */
345 	uint8_t *input_key;
346 
347 	/* Writer: lookup state 1. Reader(s): add(). Values: 0 = miss; 1 = hit. */
348 	uint32_t hit;
349 
350 	/* Writer: lookup state 1. Reader(s): add(). Valid only when hit is non-zero. */
351 	size_t bucket_key_pos;
352 
353 	/* State. */
354 	int state;
355 };
356 
357 uint64_t
358 rte_swx_table_learner_mailbox_size_get(void)
359 {
360 	return sizeof(struct mailbox);
361 }
362 
363 int
364 rte_swx_table_learner_lookup(void *table,
365 			     void *mailbox,
366 			     uint64_t input_time,
367 			     uint8_t **key,
368 			     uint64_t *action_id,
369 			     uint8_t **action_data,
370 			     size_t *entry_id,
371 			     int *hit)
372 {
373 	struct table *t = table;
374 	struct mailbox *m = mailbox;
375 
376 	switch (m->state) {
377 	case 0: {
378 		uint8_t *input_key;
379 		struct table_bucket *b;
380 		size_t bucket_id;
381 		uint32_t input_sig;
382 
383 		input_key = &(*key)[t->params.key_offset];
384 		input_sig = t->params.hash_func(input_key, t->params.key_size, 0);
385 		bucket_id = input_sig & t->params.bucket_mask;
386 		b = table_bucket_get(t, bucket_id);
387 
388 		rte_prefetch0(b);
389 		rte_prefetch0(&b->key[0]);
390 		rte_prefetch0(&b->key[RTE_CACHE_LINE_SIZE]);
391 
392 		m->bucket = b;
393 		m->input_key = input_key;
394 		m->input_sig = input_sig | 1;
395 		m->state = 1;
396 		return 0;
397 	}
398 
399 	case 1: {
400 		struct table_bucket *b = m->bucket;
401 		uint32_t i;
402 
403 		/* Search the input key through the bucket keys. */
404 		for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) {
405 			uint64_t time = b->time[i];
406 			uint32_t sig = b->sig[i];
407 			uint8_t *key = table_bucket_key_get(t, b, i);
408 
409 			time <<= 32;
410 
411 			if ((time > input_time) &&
412 			    (sig == m->input_sig) &&
413 			    t->params.keycmp_func(key, m->input_key, t->params.key_size)) {
414 				uint64_t *data = table_bucket_data_get(t, b, i);
415 
416 				/* Hit. */
417 				rte_prefetch0(data);
418 
419 				m->hit = 1;
420 				m->bucket_key_pos = i;
421 				m->state = 0;
422 
423 				*action_id = data[0];
424 				*action_data = (uint8_t *)&data[1];
425 				*entry_id = table_entry_id_get(t, b, i);
426 				*hit = 1;
427 				return 1;
428 			}
429 		}
430 
431 		/* Miss. */
432 		m->hit = 0;
433 		m->state = 0;
434 
435 		*hit = 0;
436 		return 1;
437 	}
438 
439 	default:
440 		/* This state should never be reached. Miss. */
441 		m->hit = 0;
442 		m->state = 0;
443 
444 		*hit = 0;
445 		return 1;
446 	}
447 }
448 
449 void
450 rte_swx_table_learner_rearm(void *table,
451 			    void *mailbox,
452 			    uint64_t input_time)
453 {
454 	struct table *t = table;
455 	struct mailbox *m = mailbox;
456 	struct table_bucket *b;
457 	size_t bucket_key_pos;
458 	uint64_t key_timeout;
459 	uint32_t key_timeout_id;
460 
461 	if (!m->hit)
462 		return;
463 
464 	b = m->bucket;
465 	bucket_key_pos = m->bucket_key_pos;
466 
467 	key_timeout_id = b->key_timeout_id[bucket_key_pos];
468 	key_timeout = t->params.key_timeout[key_timeout_id];
469 	b->time[bucket_key_pos] = (input_time + key_timeout) >> 32;
470 }
471 
472 void
473 rte_swx_table_learner_rearm_new(void *table,
474 				void *mailbox,
475 				uint64_t input_time,
476 				uint32_t key_timeout_id)
477 {
478 	struct table *t = table;
479 	struct mailbox *m = mailbox;
480 	struct table_bucket *b;
481 	size_t bucket_key_pos;
482 	uint64_t key_timeout;
483 
484 	if (!m->hit)
485 		return;
486 
487 	b = m->bucket;
488 	bucket_key_pos = m->bucket_key_pos;
489 
490 	key_timeout_id &= t->params.n_key_timeouts - 1;
491 	key_timeout = t->params.key_timeout[key_timeout_id];
492 	b->time[bucket_key_pos] = (input_time + key_timeout) >> 32;
493 	b->key_timeout_id[bucket_key_pos] = (uint8_t)key_timeout_id;
494 }
495 
496 uint32_t
497 rte_swx_table_learner_add(void *table,
498 			  void *mailbox,
499 			  uint64_t input_time,
500 			  uint64_t action_id,
501 			  uint8_t *action_data,
502 			  uint32_t key_timeout_id)
503 {
504 	struct table *t = table;
505 	struct mailbox *m = mailbox;
506 	struct table_bucket *b = m->bucket;
507 	uint64_t key_timeout;
508 	uint32_t i;
509 
510 	/* Adjust the key timeout ID to fit the valid range. */
511 	key_timeout_id &= t->params.n_key_timeouts - 1;
512 	key_timeout = t->params.key_timeout[key_timeout_id];
513 
514 	/* Lookup hit: The following bucket fields need to be updated:
515 	 * - key (key, sig): NO (already correctly set).
516 	 * - key timeout (key_timeout_id, time): YES.
517 	 * - key data (data): YES.
518 	 */
519 	if (m->hit) {
520 		size_t bucket_key_pos = m->bucket_key_pos;
521 		uint64_t *data = table_bucket_data_get(t, b, bucket_key_pos);
522 
523 		/* Install the key timeout. */
524 		b->time[bucket_key_pos] = (input_time + key_timeout) >> 32;
525 		b->key_timeout_id[bucket_key_pos] = (uint8_t)key_timeout_id;
526 
527 		/* Install the key data. */
528 		data[0] = action_id;
529 		if (t->params.action_data_size && action_data)
530 			memcpy(&data[1], action_data, t->params.action_data_size);
531 
532 		return 0;
533 	}
534 
535 	/* Lookup miss: Search for a free position in the current bucket and install the key. */
536 	for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) {
537 		uint64_t time = b->time[i];
538 
539 		time <<= 32;
540 
541 		/* Free position: Either there was never a key installed here, so the key time is
542 		 * set to zero (the init value), which is always less than the current time, or this
543 		 * position was used before, but the key expired (the key time is in the past).
544 		 */
545 		if (time < input_time) {
546 			uint8_t *key = table_bucket_key_get(t, b, i);
547 			uint64_t *data = table_bucket_data_get(t, b, i);
548 
549 			/* Install the key and the key timeout. */
550 			b->time[i] = (input_time + key_timeout) >> 32;
551 			b->sig[i] = m->input_sig;
552 			b->key_timeout_id[i] = (uint8_t)key_timeout_id;
553 			table_keycpy(key, m->input_key, t->params.key_size);
554 
555 			/* Install the key data. */
556 			data[0] = action_id;
557 			if (t->params.action_data_size && action_data)
558 				memcpy(&data[1], action_data, t->params.action_data_size);
559 
560 			/* Mailbox. */
561 			m->hit = 1;
562 			m->bucket_key_pos = i;
563 
564 			return 0;
565 		}
566 	}
567 
568 	/* Bucket full. */
569 	return 1;
570 }
571 
572 void
573 rte_swx_table_learner_delete(void *table __rte_unused,
574 			     void *mailbox)
575 {
576 	struct mailbox *m = mailbox;
577 
578 	if (m->hit) {
579 		struct table_bucket *b = m->bucket;
580 
581 		/* Expire the key. */
582 		b->time[m->bucket_key_pos] = 0;
583 
584 		/* Mailbox. */
585 		m->hit = 0;
586 	}
587 }
588