xref: /dpdk/lib/table/rte_swx_table_learner.c (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2020 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <string.h>
6 #include <stdio.h>
7 #include <errno.h>
8 
9 #include <rte_common.h>
10 #include <rte_cycles.h>
11 #include <rte_prefetch.h>
12 
13 #include "rte_swx_table_learner.h"
14 
15 #ifndef RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES
16 #define RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES 1
17 #endif
18 
19 #ifndef RTE_SWX_TABLE_SELECTOR_HUGE_PAGES_DISABLE
20 
21 #include <rte_malloc.h>
22 
23 static void *
24 env_calloc(size_t size, size_t alignment, int numa_node)
25 {
26 	return rte_zmalloc_socket(NULL, size, alignment, numa_node);
27 }
28 
29 static void
30 env_free(void *start, size_t size __rte_unused)
31 {
32 	rte_free(start);
33 }
34 
35 #else
36 
37 #include <numa.h>
38 
39 static void *
40 env_calloc(size_t size, size_t alignment __rte_unused, int numa_node)
41 {
42 	void *start;
43 
44 	if (numa_available() == -1)
45 		return NULL;
46 
47 	start = numa_alloc_onnode(size, numa_node);
48 	if (!start)
49 		return NULL;
50 
51 	memset(start, 0, size);
52 	return start;
53 }
54 
55 static void
56 env_free(void *start, size_t size)
57 {
58 	if ((numa_available() == -1) || !start)
59 		return;
60 
61 	numa_free(start, size);
62 }
63 
64 #endif
65 
66 #if defined(RTE_ARCH_X86_64)
67 
68 #include <x86intrin.h>
69 
70 #define crc32_u64(crc, v) _mm_crc32_u64(crc, v)
71 
72 #else
73 
74 static inline uint64_t
75 crc32_u64_generic(uint64_t crc, uint64_t value)
76 {
77 	int i;
78 
79 	crc = (crc & 0xFFFFFFFFLLU) ^ value;
80 	for (i = 63; i >= 0; i--) {
81 		uint64_t mask;
82 
83 		mask = -(crc & 1LLU);
84 		crc = (crc >> 1LLU) ^ (0x82F63B78LLU & mask);
85 	}
86 
87 	return crc;
88 }
89 
90 #define crc32_u64(crc, v) crc32_u64_generic(crc, v)
91 
92 #endif
93 
94 /* Key size needs to be one of: 8, 16, 32 or 64. */
95 static inline uint32_t
96 hash(void *key, void *key_mask, uint32_t key_size, uint32_t seed)
97 {
98 	uint64_t *k = key;
99 	uint64_t *m = key_mask;
100 	uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5;
101 
102 	switch (key_size) {
103 	case 8:
104 		crc0 = crc32_u64(seed, k[0] & m[0]);
105 		return crc0;
106 
107 	case 16:
108 		k0 = k[0] & m[0];
109 
110 		crc0 = crc32_u64(k0, seed);
111 		crc1 = crc32_u64(k0 >> 32, k[1] & m[1]);
112 
113 		crc0 ^= crc1;
114 
115 		return crc0;
116 
117 	case 32:
118 		k0 = k[0] & m[0];
119 		k2 = k[2] & m[2];
120 
121 		crc0 = crc32_u64(k0, seed);
122 		crc1 = crc32_u64(k0 >> 32, k[1] & m[1]);
123 
124 		crc2 = crc32_u64(k2, k[3] & m[3]);
125 		crc3 = k2 >> 32;
126 
127 		crc0 = crc32_u64(crc0, crc1);
128 		crc1 = crc32_u64(crc2, crc3);
129 
130 		crc0 ^= crc1;
131 
132 		return crc0;
133 
134 	case 64:
135 		k0 = k[0] & m[0];
136 		k2 = k[2] & m[2];
137 		k5 = k[5] & m[5];
138 
139 		crc0 = crc32_u64(k0, seed);
140 		crc1 = crc32_u64(k0 >> 32, k[1] & m[1]);
141 
142 		crc2 = crc32_u64(k2, k[3] & m[3]);
143 		crc3 = crc32_u64(k2 >> 32, k[4] & m[4]);
144 
145 		crc4 = crc32_u64(k5, k[6] & m[6]);
146 		crc5 = crc32_u64(k5 >> 32, k[7] & m[7]);
147 
148 		crc0 = crc32_u64(crc0, (crc1 << 32) ^ crc2);
149 		crc1 = crc32_u64(crc3, (crc4 << 32) ^ crc5);
150 
151 		crc0 ^= crc1;
152 
153 		return crc0;
154 
155 	default:
156 		crc0 = 0;
157 		return crc0;
158 	}
159 }
160 
161 /*
162  * Return: 0 = Keys are NOT equal; 1 = Keys are equal.
163  */
164 static inline uint32_t
165 table_keycmp(void *a, void *b, void *b_mask, uint32_t n_bytes)
166 {
167 	uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask;
168 
169 	switch (n_bytes) {
170 	case 8: {
171 		uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]);
172 		uint32_t result = 1;
173 
174 		if (xor0)
175 			result = 0;
176 		return result;
177 	}
178 
179 	case 16: {
180 		uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]);
181 		uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]);
182 		uint64_t or = xor0 | xor1;
183 		uint32_t result = 1;
184 
185 		if (or)
186 			result = 0;
187 		return result;
188 	}
189 
190 	case 32: {
191 		uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]);
192 		uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]);
193 		uint64_t xor2 = a64[2] ^ (b64[2] & b_mask64[2]);
194 		uint64_t xor3 = a64[3] ^ (b64[3] & b_mask64[3]);
195 		uint64_t or = (xor0 | xor1) | (xor2 | xor3);
196 		uint32_t result = 1;
197 
198 		if (or)
199 			result = 0;
200 		return result;
201 	}
202 
203 	case 64: {
204 		uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]);
205 		uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]);
206 		uint64_t xor2 = a64[2] ^ (b64[2] & b_mask64[2]);
207 		uint64_t xor3 = a64[3] ^ (b64[3] & b_mask64[3]);
208 		uint64_t xor4 = a64[4] ^ (b64[4] & b_mask64[4]);
209 		uint64_t xor5 = a64[5] ^ (b64[5] & b_mask64[5]);
210 		uint64_t xor6 = a64[6] ^ (b64[6] & b_mask64[6]);
211 		uint64_t xor7 = a64[7] ^ (b64[7] & b_mask64[7]);
212 		uint64_t or = ((xor0 | xor1) | (xor2 | xor3)) |
213 			      ((xor4 | xor5) | (xor6 | xor7));
214 		uint32_t result = 1;
215 
216 		if (or)
217 			result = 0;
218 		return result;
219 	}
220 
221 	default: {
222 		uint32_t i;
223 
224 		for (i = 0; i < n_bytes / sizeof(uint64_t); i++)
225 			if (a64[i] != (b64[i] & b_mask64[i]))
226 				return 0;
227 		return 1;
228 	}
229 	}
230 }
231 
232 #define TABLE_KEYS_PER_BUCKET 4
233 
234 #define TABLE_BUCKET_PAD_SIZE \
235 	(RTE_CACHE_LINE_SIZE - TABLE_KEYS_PER_BUCKET * (sizeof(uint32_t) + sizeof(uint32_t)))
236 
237 struct table_bucket {
238 	uint32_t time[TABLE_KEYS_PER_BUCKET];
239 	uint32_t sig[TABLE_KEYS_PER_BUCKET];
240 	uint8_t pad[TABLE_BUCKET_PAD_SIZE];
241 	uint8_t key[0];
242 };
243 
244 struct table_params {
245 	/* The real key size. Must be non-zero. */
246 	size_t key_size;
247 
248 	/* They key size upgrated to the next power of 2. This used for hash generation (in
249 	 * increments of 8 bytes, from 8 to 64 bytes) and for run-time key comparison. This is why
250 	 * key sizes bigger than 64 bytes are not allowed.
251 	 */
252 	size_t key_size_pow2;
253 
254 	/* log2(key_size_pow2). Purpose: avoid multiplication with non-power-of-2 numbers. */
255 	size_t key_size_log2;
256 
257 	/* The key offset within the key buffer. */
258 	size_t key_offset;
259 
260 	/* The real action data size. */
261 	size_t action_data_size;
262 
263 	/* The data size, i.e. the 8-byte action_id field plus the action data size, upgraded to the
264 	 * next power of 2.
265 	 */
266 	size_t data_size_pow2;
267 
268 	/* log2(data_size_pow2). Purpose: avoid multiplication with non-power of 2 numbers. */
269 	size_t data_size_log2;
270 
271 	/* Number of buckets. Must be a power of 2 to avoid modulo with non-power-of-2 numbers. */
272 	size_t n_buckets;
273 
274 	/* Bucket mask. Purpose: replace modulo with bitmask and operation. */
275 	size_t bucket_mask;
276 
277 	/* Total number of key bytes in the bucket, including the key padding bytes. There are
278 	 * (key_size_pow2 - key_size) padding bytes for each key in the bucket.
279 	 */
280 	size_t bucket_key_all_size;
281 
282 	/* Bucket size. Must be a power of 2 to avoid multiplication with non-power-of-2 number. */
283 	size_t bucket_size;
284 
285 	/* log2(bucket_size). Purpose: avoid multiplication with non-power of 2 numbers. */
286 	size_t bucket_size_log2;
287 
288 	/* Timeout in CPU clock cycles. */
289 	uint64_t key_timeout;
290 
291 	/* Total memory size. */
292 	size_t total_size;
293 };
294 
295 struct table {
296 	/* Table parameters. */
297 	struct table_params params;
298 
299 	/* Key mask. Array of *key_size* bytes. */
300 	uint8_t key_mask0[RTE_CACHE_LINE_SIZE];
301 
302 	/* Table buckets. */
303 	uint8_t buckets[0];
304 } __rte_cache_aligned;
305 
306 static int
307 table_params_get(struct table_params *p, struct rte_swx_table_learner_params *params)
308 {
309 	/* Check input parameters. */
310 	if (!params ||
311 	    !params->key_size ||
312 	    (params->key_size > 64) ||
313 	    !params->n_keys_max ||
314 	    (params->n_keys_max > 1U << 31) ||
315 	    !params->key_timeout)
316 		return -EINVAL;
317 
318 	/* Key. */
319 	p->key_size = params->key_size;
320 
321 	p->key_size_pow2 = rte_align64pow2(p->key_size);
322 	if (p->key_size_pow2 < 8)
323 		p->key_size_pow2 = 8;
324 
325 	p->key_size_log2 = __builtin_ctzll(p->key_size_pow2);
326 
327 	p->key_offset = params->key_offset;
328 
329 	/* Data. */
330 	p->action_data_size = params->action_data_size;
331 
332 	p->data_size_pow2 = rte_align64pow2(sizeof(uint64_t) + p->action_data_size);
333 
334 	p->data_size_log2 = __builtin_ctzll(p->data_size_pow2);
335 
336 	/* Buckets. */
337 	p->n_buckets = rte_align32pow2(params->n_keys_max);
338 
339 	p->bucket_mask = p->n_buckets - 1;
340 
341 	p->bucket_key_all_size = TABLE_KEYS_PER_BUCKET * p->key_size_pow2;
342 
343 	p->bucket_size = rte_align64pow2(sizeof(struct table_bucket) +
344 					 p->bucket_key_all_size +
345 					 TABLE_KEYS_PER_BUCKET * p->data_size_pow2);
346 
347 	p->bucket_size_log2 = __builtin_ctzll(p->bucket_size);
348 
349 	/* Timeout. */
350 	p->key_timeout = params->key_timeout * rte_get_tsc_hz();
351 
352 	/* Total size. */
353 	p->total_size = sizeof(struct table) + p->n_buckets * p->bucket_size;
354 
355 	return 0;
356 }
357 
358 static inline struct table_bucket *
359 table_bucket_get(struct table *t, size_t bucket_id)
360 {
361 	return (struct table_bucket *)&t->buckets[bucket_id << t->params.bucket_size_log2];
362 }
363 
364 static inline uint8_t *
365 table_bucket_key_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos)
366 {
367 	return &b->key[bucket_key_pos << t->params.key_size_log2];
368 }
369 
370 static inline uint64_t *
371 table_bucket_data_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos)
372 {
373 	return (uint64_t *)&b->key[t->params.bucket_key_all_size +
374 				   (bucket_key_pos << t->params.data_size_log2)];
375 }
376 
377 uint64_t
378 rte_swx_table_learner_footprint_get(struct rte_swx_table_learner_params *params)
379 {
380 	struct table_params p;
381 	int status;
382 
383 	status = table_params_get(&p, params);
384 
385 	return status ? 0 : p.total_size;
386 }
387 
388 void *
389 rte_swx_table_learner_create(struct rte_swx_table_learner_params *params, int numa_node)
390 {
391 	struct table_params p;
392 	struct table *t;
393 	int status;
394 
395 	/* Check and process the input parameters. */
396 	status = table_params_get(&p, params);
397 	if (status)
398 		return NULL;
399 
400 	/* Memory allocation. */
401 	t = env_calloc(p.total_size, RTE_CACHE_LINE_SIZE, numa_node);
402 	if (!t)
403 		return NULL;
404 
405 	/* Memory initialization. */
406 	memcpy(&t->params, &p, sizeof(struct table_params));
407 
408 	if (params->key_mask0)
409 		memcpy(t->key_mask0, params->key_mask0, params->key_size);
410 	else
411 		memset(t->key_mask0, 0xFF, params->key_size);
412 
413 	return t;
414 }
415 
416 void
417 rte_swx_table_learner_free(void *table)
418 {
419 	struct table *t = table;
420 
421 	if (!t)
422 		return;
423 
424 	env_free(t, t->params.total_size);
425 }
426 
427 struct mailbox {
428 	/* Writer: lookup state 0. Reader(s): lookup state 1, add(). */
429 	struct table_bucket *bucket;
430 
431 	/* Writer: lookup state 0. Reader(s): lookup state 1, add(). */
432 	uint32_t input_sig;
433 
434 	/* Writer: lookup state 1. Reader(s): add(). */
435 	uint8_t *input_key;
436 
437 	/* Writer: lookup state 1. Reader(s): add(). Values: 0 = miss; 1 = hit. */
438 	uint32_t hit;
439 
440 	/* Writer: lookup state 1. Reader(s): add(). Valid only when hit is non-zero. */
441 	size_t bucket_key_pos;
442 
443 	/* State. */
444 	int state;
445 };
446 
447 uint64_t
448 rte_swx_table_learner_mailbox_size_get(void)
449 {
450 	return sizeof(struct mailbox);
451 }
452 
453 int
454 rte_swx_table_learner_lookup(void *table,
455 			     void *mailbox,
456 			     uint64_t input_time,
457 			     uint8_t **key,
458 			     uint64_t *action_id,
459 			     uint8_t **action_data,
460 			     int *hit)
461 {
462 	struct table *t = table;
463 	struct mailbox *m = mailbox;
464 
465 	switch (m->state) {
466 	case 0: {
467 		uint8_t *input_key;
468 		struct table_bucket *b;
469 		size_t bucket_id;
470 		uint32_t input_sig;
471 
472 		input_key = &(*key)[t->params.key_offset];
473 		input_sig = hash(input_key, t->key_mask0, t->params.key_size_pow2, 0);
474 		bucket_id = input_sig & t->params.bucket_mask;
475 		b = table_bucket_get(t, bucket_id);
476 
477 		rte_prefetch0(b);
478 		rte_prefetch0(&b->key[0]);
479 		rte_prefetch0(&b->key[RTE_CACHE_LINE_SIZE]);
480 
481 		m->bucket = b;
482 		m->input_key = input_key;
483 		m->input_sig = input_sig | 1;
484 		m->state = 1;
485 		return 0;
486 	}
487 
488 	case 1: {
489 		struct table_bucket *b = m->bucket;
490 		uint32_t i;
491 
492 		/* Search the input key through the bucket keys. */
493 		for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) {
494 			uint64_t time = b->time[i];
495 			uint32_t sig = b->sig[i];
496 			uint8_t *key = table_bucket_key_get(t, b, i);
497 			uint32_t key_size_pow2 = t->params.key_size_pow2;
498 
499 			time <<= 32;
500 
501 			if ((time > input_time) &&
502 			    (sig == m->input_sig) &&
503 			    table_keycmp(key, m->input_key, t->key_mask0, key_size_pow2)) {
504 				uint64_t *data = table_bucket_data_get(t, b, i);
505 
506 				/* Hit. */
507 				rte_prefetch0(data);
508 
509 				b->time[i] = (input_time + t->params.key_timeout) >> 32;
510 
511 				m->hit = 1;
512 				m->bucket_key_pos = i;
513 				m->state = 0;
514 
515 				*action_id = data[0];
516 				*action_data = (uint8_t *)&data[1];
517 				*hit = 1;
518 				return 1;
519 			}
520 		}
521 
522 		/* Miss. */
523 		m->hit = 0;
524 		m->state = 0;
525 
526 		*hit = 0;
527 		return 1;
528 	}
529 
530 	default:
531 		/* This state should never be reached. Miss. */
532 		m->hit = 0;
533 		m->state = 0;
534 
535 		*hit = 0;
536 		return 1;
537 	}
538 }
539 
540 uint32_t
541 rte_swx_table_learner_add(void *table,
542 			  void *mailbox,
543 			  uint64_t input_time,
544 			  uint64_t action_id,
545 			  uint8_t *action_data)
546 {
547 	struct table *t = table;
548 	struct mailbox *m = mailbox;
549 	struct table_bucket *b = m->bucket;
550 	uint32_t i;
551 
552 	/* Lookup hit: The key, key signature and key time are already properly configured (the key
553 	 * time was bumped by lookup), only the key data need to be updated.
554 	 */
555 	if (m->hit) {
556 		uint64_t *data = table_bucket_data_get(t, b, m->bucket_key_pos);
557 
558 		/* Install the key data. */
559 		data[0] = action_id;
560 		if (t->params.action_data_size && action_data)
561 			memcpy(&data[1], action_data, t->params.action_data_size);
562 
563 		return 0;
564 	}
565 
566 	/* Lookup miss: Search for a free position in the current bucket and install the key. */
567 	for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) {
568 		uint64_t time = b->time[i];
569 
570 		time <<= 32;
571 
572 		/* Free position: Either there was never a key installed here, so the key time is
573 		 * set to zero (the init value), which is always less than the current time, or this
574 		 * position was used before, but the key expired (the key time is in the past).
575 		 */
576 		if (time < input_time) {
577 			uint8_t *key = table_bucket_key_get(t, b, i);
578 			uint64_t *data = table_bucket_data_get(t, b, i);
579 
580 			/* Install the key. */
581 			b->time[i] = (input_time + t->params.key_timeout) >> 32;
582 			b->sig[i] = m->input_sig;
583 			memcpy(key, m->input_key, t->params.key_size);
584 
585 			/* Install the key data. */
586 			data[0] = action_id;
587 			if (t->params.action_data_size && action_data)
588 				memcpy(&data[1], action_data, t->params.action_data_size);
589 
590 			/* Mailbox. */
591 			m->hit = 1;
592 			m->bucket_key_pos = i;
593 
594 			return 0;
595 		}
596 	}
597 
598 	/* Bucket full. */
599 	return 1;
600 }
601 
602 void
603 rte_swx_table_learner_delete(void *table __rte_unused,
604 			     void *mailbox)
605 {
606 	struct mailbox *m = mailbox;
607 
608 	if (m->hit) {
609 		struct table_bucket *b = m->bucket;
610 
611 		/* Expire the key. */
612 		b->time[m->bucket_key_pos] = 0;
613 
614 		/* Mailbox. */
615 		m->hit = 0;
616 	}
617 }
618