xref: /dpdk/lib/table/rte_swx_table_learner.c (revision 30a1de105a5f40d77b344a891c4a68f79e815c43)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2020 Intel Corporation
3  */
4 #include <string.h>
5 #include <stdio.h>
6 #include <errno.h>
7 
8 #include <rte_common.h>
9 #include <rte_cycles.h>
10 #include <rte_prefetch.h>
11 
12 #include "rte_swx_table_learner.h"
13 
14 #ifndef RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES
15 #define RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES 1
16 #endif
17 
18 #ifndef RTE_SWX_TABLE_SELECTOR_HUGE_PAGES_DISABLE
19 
20 #include <rte_malloc.h>
21 
22 static void *
23 env_calloc(size_t size, size_t alignment, int numa_node)
24 {
25 	return rte_zmalloc_socket(NULL, size, alignment, numa_node);
26 }
27 
28 static void
29 env_free(void *start, size_t size __rte_unused)
30 {
31 	rte_free(start);
32 }
33 
34 #else
35 
36 #include <numa.h>
37 
38 static void *
39 env_calloc(size_t size, size_t alignment __rte_unused, int numa_node)
40 {
41 	void *start;
42 
43 	if (numa_available() == -1)
44 		return NULL;
45 
46 	start = numa_alloc_onnode(size, numa_node);
47 	if (!start)
48 		return NULL;
49 
50 	memset(start, 0, size);
51 	return start;
52 }
53 
54 static void
55 env_free(void *start, size_t size)
56 {
57 	if ((numa_available() == -1) || !start)
58 		return;
59 
60 	numa_free(start, size);
61 }
62 
63 #endif
64 
65 #if defined(RTE_ARCH_X86_64)
66 
67 #include <x86intrin.h>
68 
69 #define crc32_u64(crc, v) _mm_crc32_u64(crc, v)
70 
71 #else
72 
73 static inline uint64_t
74 crc32_u64_generic(uint64_t crc, uint64_t value)
75 {
76 	int i;
77 
78 	crc = (crc & 0xFFFFFFFFLLU) ^ value;
79 	for (i = 63; i >= 0; i--) {
80 		uint64_t mask;
81 
82 		mask = -(crc & 1LLU);
83 		crc = (crc >> 1LLU) ^ (0x82F63B78LLU & mask);
84 	}
85 
86 	return crc;
87 }
88 
89 #define crc32_u64(crc, v) crc32_u64_generic(crc, v)
90 
91 #endif
92 
93 /* Key size needs to be one of: 8, 16, 32 or 64. */
94 static inline uint32_t
95 hash(void *key, void *key_mask, uint32_t key_size, uint32_t seed)
96 {
97 	uint64_t *k = key;
98 	uint64_t *m = key_mask;
99 	uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5;
100 
101 	switch (key_size) {
102 	case 8:
103 		crc0 = crc32_u64(seed, k[0] & m[0]);
104 		return crc0;
105 
106 	case 16:
107 		k0 = k[0] & m[0];
108 
109 		crc0 = crc32_u64(k0, seed);
110 		crc1 = crc32_u64(k0 >> 32, k[1] & m[1]);
111 
112 		crc0 ^= crc1;
113 
114 		return crc0;
115 
116 	case 32:
117 		k0 = k[0] & m[0];
118 		k2 = k[2] & m[2];
119 
120 		crc0 = crc32_u64(k0, seed);
121 		crc1 = crc32_u64(k0 >> 32, k[1] & m[1]);
122 
123 		crc2 = crc32_u64(k2, k[3] & m[3]);
124 		crc3 = k2 >> 32;
125 
126 		crc0 = crc32_u64(crc0, crc1);
127 		crc1 = crc32_u64(crc2, crc3);
128 
129 		crc0 ^= crc1;
130 
131 		return crc0;
132 
133 	case 64:
134 		k0 = k[0] & m[0];
135 		k2 = k[2] & m[2];
136 		k5 = k[5] & m[5];
137 
138 		crc0 = crc32_u64(k0, seed);
139 		crc1 = crc32_u64(k0 >> 32, k[1] & m[1]);
140 
141 		crc2 = crc32_u64(k2, k[3] & m[3]);
142 		crc3 = crc32_u64(k2 >> 32, k[4] & m[4]);
143 
144 		crc4 = crc32_u64(k5, k[6] & m[6]);
145 		crc5 = crc32_u64(k5 >> 32, k[7] & m[7]);
146 
147 		crc0 = crc32_u64(crc0, (crc1 << 32) ^ crc2);
148 		crc1 = crc32_u64(crc3, (crc4 << 32) ^ crc5);
149 
150 		crc0 ^= crc1;
151 
152 		return crc0;
153 
154 	default:
155 		crc0 = 0;
156 		return crc0;
157 	}
158 }
159 
160 /*
161  * Return: 0 = Keys are NOT equal; 1 = Keys are equal.
162  */
163 static inline uint32_t
164 table_keycmp(void *a, void *b, void *b_mask, uint32_t n_bytes)
165 {
166 	uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask;
167 
168 	switch (n_bytes) {
169 	case 8: {
170 		uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]);
171 		uint32_t result = 1;
172 
173 		if (xor0)
174 			result = 0;
175 		return result;
176 	}
177 
178 	case 16: {
179 		uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]);
180 		uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]);
181 		uint64_t or = xor0 | xor1;
182 		uint32_t result = 1;
183 
184 		if (or)
185 			result = 0;
186 		return result;
187 	}
188 
189 	case 32: {
190 		uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]);
191 		uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]);
192 		uint64_t xor2 = a64[2] ^ (b64[2] & b_mask64[2]);
193 		uint64_t xor3 = a64[3] ^ (b64[3] & b_mask64[3]);
194 		uint64_t or = (xor0 | xor1) | (xor2 | xor3);
195 		uint32_t result = 1;
196 
197 		if (or)
198 			result = 0;
199 		return result;
200 	}
201 
202 	case 64: {
203 		uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]);
204 		uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]);
205 		uint64_t xor2 = a64[2] ^ (b64[2] & b_mask64[2]);
206 		uint64_t xor3 = a64[3] ^ (b64[3] & b_mask64[3]);
207 		uint64_t xor4 = a64[4] ^ (b64[4] & b_mask64[4]);
208 		uint64_t xor5 = a64[5] ^ (b64[5] & b_mask64[5]);
209 		uint64_t xor6 = a64[6] ^ (b64[6] & b_mask64[6]);
210 		uint64_t xor7 = a64[7] ^ (b64[7] & b_mask64[7]);
211 		uint64_t or = ((xor0 | xor1) | (xor2 | xor3)) |
212 			      ((xor4 | xor5) | (xor6 | xor7));
213 		uint32_t result = 1;
214 
215 		if (or)
216 			result = 0;
217 		return result;
218 	}
219 
220 	default: {
221 		uint32_t i;
222 
223 		for (i = 0; i < n_bytes / sizeof(uint64_t); i++)
224 			if (a64[i] != (b64[i] & b_mask64[i]))
225 				return 0;
226 		return 1;
227 	}
228 	}
229 }
230 
231 #define TABLE_KEYS_PER_BUCKET 4
232 
233 #define TABLE_BUCKET_PAD_SIZE \
234 	(RTE_CACHE_LINE_SIZE - TABLE_KEYS_PER_BUCKET * (sizeof(uint32_t) + sizeof(uint32_t)))
235 
236 struct table_bucket {
237 	uint32_t time[TABLE_KEYS_PER_BUCKET];
238 	uint32_t sig[TABLE_KEYS_PER_BUCKET];
239 	uint8_t pad[TABLE_BUCKET_PAD_SIZE];
240 	uint8_t key[0];
241 };
242 
243 struct table_params {
244 	/* The real key size. Must be non-zero. */
245 	size_t key_size;
246 
247 	/* They key size upgrated to the next power of 2. This used for hash generation (in
248 	 * increments of 8 bytes, from 8 to 64 bytes) and for run-time key comparison. This is why
249 	 * key sizes bigger than 64 bytes are not allowed.
250 	 */
251 	size_t key_size_pow2;
252 
253 	/* log2(key_size_pow2). Purpose: avoid multiplication with non-power-of-2 numbers. */
254 	size_t key_size_log2;
255 
256 	/* The key offset within the key buffer. */
257 	size_t key_offset;
258 
259 	/* The real action data size. */
260 	size_t action_data_size;
261 
262 	/* The data size, i.e. the 8-byte action_id field plus the action data size, upgraded to the
263 	 * next power of 2.
264 	 */
265 	size_t data_size_pow2;
266 
267 	/* log2(data_size_pow2). Purpose: avoid multiplication with non-power of 2 numbers. */
268 	size_t data_size_log2;
269 
270 	/* Number of buckets. Must be a power of 2 to avoid modulo with non-power-of-2 numbers. */
271 	size_t n_buckets;
272 
273 	/* Bucket mask. Purpose: replace modulo with bitmask and operation. */
274 	size_t bucket_mask;
275 
276 	/* Total number of key bytes in the bucket, including the key padding bytes. There are
277 	 * (key_size_pow2 - key_size) padding bytes for each key in the bucket.
278 	 */
279 	size_t bucket_key_all_size;
280 
281 	/* Bucket size. Must be a power of 2 to avoid multiplication with non-power-of-2 number. */
282 	size_t bucket_size;
283 
284 	/* log2(bucket_size). Purpose: avoid multiplication with non-power of 2 numbers. */
285 	size_t bucket_size_log2;
286 
287 	/* Timeout in CPU clock cycles. */
288 	uint64_t key_timeout;
289 
290 	/* Total memory size. */
291 	size_t total_size;
292 };
293 
294 struct table {
295 	/* Table parameters. */
296 	struct table_params params;
297 
298 	/* Key mask. Array of *key_size* bytes. */
299 	uint8_t key_mask0[RTE_CACHE_LINE_SIZE];
300 
301 	/* Table buckets. */
302 	uint8_t buckets[0];
303 } __rte_cache_aligned;
304 
305 static int
306 table_params_get(struct table_params *p, struct rte_swx_table_learner_params *params)
307 {
308 	/* Check input parameters. */
309 	if (!params ||
310 	    !params->key_size ||
311 	    (params->key_size > 64) ||
312 	    !params->n_keys_max ||
313 	    (params->n_keys_max > 1U << 31) ||
314 	    !params->key_timeout)
315 		return -EINVAL;
316 
317 	/* Key. */
318 	p->key_size = params->key_size;
319 
320 	p->key_size_pow2 = rte_align64pow2(p->key_size);
321 	if (p->key_size_pow2 < 8)
322 		p->key_size_pow2 = 8;
323 
324 	p->key_size_log2 = __builtin_ctzll(p->key_size_pow2);
325 
326 	p->key_offset = params->key_offset;
327 
328 	/* Data. */
329 	p->action_data_size = params->action_data_size;
330 
331 	p->data_size_pow2 = rte_align64pow2(sizeof(uint64_t) + p->action_data_size);
332 
333 	p->data_size_log2 = __builtin_ctzll(p->data_size_pow2);
334 
335 	/* Buckets. */
336 	p->n_buckets = rte_align32pow2(params->n_keys_max);
337 
338 	p->bucket_mask = p->n_buckets - 1;
339 
340 	p->bucket_key_all_size = TABLE_KEYS_PER_BUCKET * p->key_size_pow2;
341 
342 	p->bucket_size = rte_align64pow2(sizeof(struct table_bucket) +
343 					 p->bucket_key_all_size +
344 					 TABLE_KEYS_PER_BUCKET * p->data_size_pow2);
345 
346 	p->bucket_size_log2 = __builtin_ctzll(p->bucket_size);
347 
348 	/* Timeout. */
349 	p->key_timeout = params->key_timeout * rte_get_tsc_hz();
350 
351 	/* Total size. */
352 	p->total_size = sizeof(struct table) + p->n_buckets * p->bucket_size;
353 
354 	return 0;
355 }
356 
357 static inline struct table_bucket *
358 table_bucket_get(struct table *t, size_t bucket_id)
359 {
360 	return (struct table_bucket *)&t->buckets[bucket_id << t->params.bucket_size_log2];
361 }
362 
363 static inline uint8_t *
364 table_bucket_key_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos)
365 {
366 	return &b->key[bucket_key_pos << t->params.key_size_log2];
367 }
368 
369 static inline uint64_t *
370 table_bucket_data_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos)
371 {
372 	return (uint64_t *)&b->key[t->params.bucket_key_all_size +
373 				   (bucket_key_pos << t->params.data_size_log2)];
374 }
375 
376 uint64_t
377 rte_swx_table_learner_footprint_get(struct rte_swx_table_learner_params *params)
378 {
379 	struct table_params p;
380 	int status;
381 
382 	status = table_params_get(&p, params);
383 
384 	return status ? 0 : p.total_size;
385 }
386 
387 void *
388 rte_swx_table_learner_create(struct rte_swx_table_learner_params *params, int numa_node)
389 {
390 	struct table_params p;
391 	struct table *t;
392 	int status;
393 
394 	/* Check and process the input parameters. */
395 	status = table_params_get(&p, params);
396 	if (status)
397 		return NULL;
398 
399 	/* Memory allocation. */
400 	t = env_calloc(p.total_size, RTE_CACHE_LINE_SIZE, numa_node);
401 	if (!t)
402 		return NULL;
403 
404 	/* Memory initialization. */
405 	memcpy(&t->params, &p, sizeof(struct table_params));
406 
407 	if (params->key_mask0)
408 		memcpy(t->key_mask0, params->key_mask0, params->key_size);
409 	else
410 		memset(t->key_mask0, 0xFF, params->key_size);
411 
412 	return t;
413 }
414 
415 void
416 rte_swx_table_learner_free(void *table)
417 {
418 	struct table *t = table;
419 
420 	if (!t)
421 		return;
422 
423 	env_free(t, t->params.total_size);
424 }
425 
426 struct mailbox {
427 	/* Writer: lookup state 0. Reader(s): lookup state 1, add(). */
428 	struct table_bucket *bucket;
429 
430 	/* Writer: lookup state 0. Reader(s): lookup state 1, add(). */
431 	uint32_t input_sig;
432 
433 	/* Writer: lookup state 1. Reader(s): add(). */
434 	uint8_t *input_key;
435 
436 	/* Writer: lookup state 1. Reader(s): add(). Values: 0 = miss; 1 = hit. */
437 	uint32_t hit;
438 
439 	/* Writer: lookup state 1. Reader(s): add(). Valid only when hit is non-zero. */
440 	size_t bucket_key_pos;
441 
442 	/* State. */
443 	int state;
444 };
445 
446 uint64_t
447 rte_swx_table_learner_mailbox_size_get(void)
448 {
449 	return sizeof(struct mailbox);
450 }
451 
452 int
453 rte_swx_table_learner_lookup(void *table,
454 			     void *mailbox,
455 			     uint64_t input_time,
456 			     uint8_t **key,
457 			     uint64_t *action_id,
458 			     uint8_t **action_data,
459 			     int *hit)
460 {
461 	struct table *t = table;
462 	struct mailbox *m = mailbox;
463 
464 	switch (m->state) {
465 	case 0: {
466 		uint8_t *input_key;
467 		struct table_bucket *b;
468 		size_t bucket_id;
469 		uint32_t input_sig;
470 
471 		input_key = &(*key)[t->params.key_offset];
472 		input_sig = hash(input_key, t->key_mask0, t->params.key_size_pow2, 0);
473 		bucket_id = input_sig & t->params.bucket_mask;
474 		b = table_bucket_get(t, bucket_id);
475 
476 		rte_prefetch0(b);
477 		rte_prefetch0(&b->key[0]);
478 		rte_prefetch0(&b->key[RTE_CACHE_LINE_SIZE]);
479 
480 		m->bucket = b;
481 		m->input_key = input_key;
482 		m->input_sig = input_sig | 1;
483 		m->state = 1;
484 		return 0;
485 	}
486 
487 	case 1: {
488 		struct table_bucket *b = m->bucket;
489 		uint32_t i;
490 
491 		/* Search the input key through the bucket keys. */
492 		for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) {
493 			uint64_t time = b->time[i];
494 			uint32_t sig = b->sig[i];
495 			uint8_t *key = table_bucket_key_get(t, b, i);
496 			uint32_t key_size_pow2 = t->params.key_size_pow2;
497 
498 			time <<= 32;
499 
500 			if ((time > input_time) &&
501 			    (sig == m->input_sig) &&
502 			    table_keycmp(key, m->input_key, t->key_mask0, key_size_pow2)) {
503 				uint64_t *data = table_bucket_data_get(t, b, i);
504 
505 				/* Hit. */
506 				rte_prefetch0(data);
507 
508 				b->time[i] = (input_time + t->params.key_timeout) >> 32;
509 
510 				m->hit = 1;
511 				m->bucket_key_pos = i;
512 				m->state = 0;
513 
514 				*action_id = data[0];
515 				*action_data = (uint8_t *)&data[1];
516 				*hit = 1;
517 				return 1;
518 			}
519 		}
520 
521 		/* Miss. */
522 		m->hit = 0;
523 		m->state = 0;
524 
525 		*hit = 0;
526 		return 1;
527 	}
528 
529 	default:
530 		/* This state should never be reached. Miss. */
531 		m->hit = 0;
532 		m->state = 0;
533 
534 		*hit = 0;
535 		return 1;
536 	}
537 }
538 
539 uint32_t
540 rte_swx_table_learner_add(void *table,
541 			  void *mailbox,
542 			  uint64_t input_time,
543 			  uint64_t action_id,
544 			  uint8_t *action_data)
545 {
546 	struct table *t = table;
547 	struct mailbox *m = mailbox;
548 	struct table_bucket *b = m->bucket;
549 	uint32_t i;
550 
551 	/* Lookup hit: The key, key signature and key time are already properly configured (the key
552 	 * time was bumped by lookup), only the key data need to be updated.
553 	 */
554 	if (m->hit) {
555 		uint64_t *data = table_bucket_data_get(t, b, m->bucket_key_pos);
556 
557 		/* Install the key data. */
558 		data[0] = action_id;
559 		if (t->params.action_data_size && action_data)
560 			memcpy(&data[1], action_data, t->params.action_data_size);
561 
562 		return 0;
563 	}
564 
565 	/* Lookup miss: Search for a free position in the current bucket and install the key. */
566 	for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) {
567 		uint64_t time = b->time[i];
568 
569 		time <<= 32;
570 
571 		/* Free position: Either there was never a key installed here, so the key time is
572 		 * set to zero (the init value), which is always less than the current time, or this
573 		 * position was used before, but the key expired (the key time is in the past).
574 		 */
575 		if (time < input_time) {
576 			uint8_t *key = table_bucket_key_get(t, b, i);
577 			uint64_t *data = table_bucket_data_get(t, b, i);
578 
579 			/* Install the key. */
580 			b->time[i] = (input_time + t->params.key_timeout) >> 32;
581 			b->sig[i] = m->input_sig;
582 			memcpy(key, m->input_key, t->params.key_size);
583 
584 			/* Install the key data. */
585 			data[0] = action_id;
586 			if (t->params.action_data_size && action_data)
587 				memcpy(&data[1], action_data, t->params.action_data_size);
588 
589 			/* Mailbox. */
590 			m->hit = 1;
591 			m->bucket_key_pos = i;
592 
593 			return 0;
594 		}
595 	}
596 
597 	/* Bucket full. */
598 	return 1;
599 }
600 
601 void
602 rte_swx_table_learner_delete(void *table __rte_unused,
603 			     void *mailbox)
604 {
605 	struct mailbox *m = mailbox;
606 
607 	if (m->hit) {
608 		struct table_bucket *b = m->bucket;
609 
610 		/* Expire the key. */
611 		b->time[m->bucket_key_pos] = 0;
612 
613 		/* Mailbox. */
614 		m->hit = 0;
615 	}
616 }
617