1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2020 Intel Corporation
3 */
4 #include <string.h>
5 #include <stdio.h>
6 #include <errno.h>
7
8 #include <rte_common.h>
9 #include <rte_cycles.h>
10 #include <rte_prefetch.h>
11 #include <rte_jhash.h>
12 #include <rte_hash_crc.h>
13
14 #include "rte_swx_keycmp.h"
15 #include "rte_swx_table_learner.h"
16
17 #ifndef RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES
18 #define RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES 1
19 #endif
20
21 #ifndef RTE_SWX_TABLE_SELECTOR_HUGE_PAGES_DISABLE
22
23 #include <rte_malloc.h>
24
25 static void *
env_calloc(size_t size,size_t alignment,int numa_node)26 env_calloc(size_t size, size_t alignment, int numa_node)
27 {
28 return rte_zmalloc_socket(NULL, size, alignment, numa_node);
29 }
30
31 static void
env_free(void * start,size_t size __rte_unused)32 env_free(void *start, size_t size __rte_unused)
33 {
34 rte_free(start);
35 }
36
37 #else
38
39 #include <numa.h>
40
41 static void *
env_calloc(size_t size,size_t alignment __rte_unused,int numa_node)42 env_calloc(size_t size, size_t alignment __rte_unused, int numa_node)
43 {
44 void *start;
45
46 if (numa_available() == -1)
47 return NULL;
48
49 start = numa_alloc_onnode(size, numa_node);
50 if (!start)
51 return NULL;
52
53 memset(start, 0, size);
54 return start;
55 }
56
57 static void
env_free(void * start,size_t size)58 env_free(void *start, size_t size)
59 {
60 if ((numa_available() == -1) || !start)
61 return;
62
63 numa_free(start, size);
64 }
65
66 #endif
67
68 static void
table_keycpy(void * dst,void * src,uint32_t n_bytes)69 table_keycpy(void *dst, void *src, uint32_t n_bytes)
70 {
71 memcpy(dst, src, n_bytes);
72 }
73
74 #define TABLE_KEYS_PER_BUCKET 4
75 #define TABLE_KEYS_PER_BUCKET_LOG2 2
76
77 #define TABLE_BUCKET_USEFUL_SIZE \
78 (TABLE_KEYS_PER_BUCKET * (sizeof(uint32_t) + sizeof(uint32_t) + sizeof(uint8_t)))
79
80 #define TABLE_BUCKET_PAD_SIZE \
81 (RTE_CACHE_LINE_SIZE - TABLE_BUCKET_USEFUL_SIZE)
82
83 struct table_bucket {
84 uint32_t time[TABLE_KEYS_PER_BUCKET];
85 uint32_t sig[TABLE_KEYS_PER_BUCKET];
86 uint8_t key_timeout_id[TABLE_KEYS_PER_BUCKET];
87 uint8_t pad[TABLE_BUCKET_PAD_SIZE];
88 uint8_t key[];
89 };
90
91 struct table_params {
92 /* The real key size. Must be non-zero. */
93 size_t key_size;
94
95 /* The key size upgrated to the next power of 2. */
96 size_t key_size_pow2;
97
98 /* log2(key_size_pow2). Purpose: avoid multiplication with non-power-of-2 numbers. */
99 size_t key_size_log2;
100
101 /* The key offset within the key buffer. */
102 size_t key_offset;
103
104 /* The real action data size. */
105 size_t action_data_size;
106
107 /* The data size, i.e. the 8-byte action_id field plus the action data size, upgraded to the
108 * next power of 2.
109 */
110 size_t data_size_pow2;
111
112 /* log2(data_size_pow2). Purpose: avoid multiplication with non-power of 2 numbers. */
113 size_t data_size_log2;
114
115 /* Number of buckets. Must be a power of 2 to avoid modulo with non-power-of-2 numbers. */
116 size_t n_buckets;
117
118 /* Bucket mask. Purpose: replace modulo with bitmask and operation. */
119 size_t bucket_mask;
120
121 /* Total number of key bytes in the bucket, including the key padding bytes. There are
122 * (key_size_pow2 - key_size) padding bytes for each key in the bucket.
123 */
124 size_t bucket_key_all_size;
125
126 /* Bucket size. Must be a power of 2 to avoid multiplication with non-power-of-2 number. */
127 size_t bucket_size;
128
129 /* log2(bucket_size). Purpose: avoid multiplication with non-power of 2 numbers. */
130 size_t bucket_size_log2;
131
132 /* Hash function. */
133 rte_swx_hash_func_t hash_func;
134
135 /* Key comparison function. */
136 rte_swx_keycmp_func_t keycmp_func;
137
138 /* Set of all possible key timeout values measured in CPU clock cycles. */
139 uint64_t key_timeout[RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX];
140
141 /* Number of key timeout values. */
142 uint32_t n_key_timeouts;
143
144 /* Total memory size. */
145 size_t total_size;
146 };
147
148 struct __rte_cache_aligned table {
149 /* Table parameters. */
150 struct table_params params;
151
152 /* Table buckets. */
153 uint8_t buckets[];
154 };
155
156 /* The timeout (in cycles) is stored in the table as a 32-bit value by truncating its least
157 * significant 32 bits. Therefore, to make sure the time is always advancing when adding the timeout
158 * value on top of the current time, the minimum timeout value is 1^32 cycles, which is 2 seconds on
159 * a 2 GHz CPU.
160 */
161 static uint64_t
timeout_convert(uint32_t timeout_in_seconds)162 timeout_convert(uint32_t timeout_in_seconds)
163 {
164 uint64_t timeout_in_cycles = timeout_in_seconds * rte_get_tsc_hz();
165
166 if (!(timeout_in_cycles >> 32))
167 timeout_in_cycles = 1LLU << 32;
168
169 return timeout_in_cycles;
170 }
171
172 static int
table_params_get(struct table_params * p,struct rte_swx_table_learner_params * params)173 table_params_get(struct table_params *p, struct rte_swx_table_learner_params *params)
174 {
175 uint32_t i;
176
177 /* Check input parameters. */
178 if (!params ||
179 !params->key_size ||
180 !params->n_keys_max ||
181 (params->n_keys_max > 1U << 31) ||
182 !params->key_timeout ||
183 !params->n_key_timeouts ||
184 (params->n_key_timeouts > RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX))
185 return -EINVAL;
186
187 if (params->key_mask0) {
188 for (i = 0; i < params->key_size; i++)
189 if (params->key_mask0[i] != 0xFF)
190 break;
191
192 if (i < params->key_size)
193 return -EINVAL;
194 }
195
196 for (i = 0; i < params->n_key_timeouts; i++)
197 if (!params->key_timeout[i])
198 return -EINVAL;
199
200 /* Key. */
201 p->key_size = params->key_size;
202
203 p->key_size_pow2 = rte_align64pow2(p->key_size);
204
205 p->key_size_log2 = rte_ctz64(p->key_size_pow2);
206
207 p->key_offset = params->key_offset;
208
209 /* Data. */
210 p->action_data_size = params->action_data_size;
211
212 p->data_size_pow2 = rte_align64pow2(sizeof(uint64_t) + p->action_data_size);
213
214 p->data_size_log2 = rte_ctz64(p->data_size_pow2);
215
216 /* Buckets. */
217 p->n_buckets = rte_align32pow2(params->n_keys_max);
218
219 p->bucket_mask = p->n_buckets - 1;
220
221 p->bucket_key_all_size = TABLE_KEYS_PER_BUCKET * p->key_size_pow2;
222
223 p->bucket_size = rte_align64pow2(sizeof(struct table_bucket) +
224 p->bucket_key_all_size +
225 TABLE_KEYS_PER_BUCKET * p->data_size_pow2);
226
227 p->bucket_size_log2 = rte_ctz64(p->bucket_size);
228
229 p->hash_func = params->hash_func ? params->hash_func : rte_hash_crc;
230
231 p->keycmp_func = rte_swx_keycmp_func_get(params->key_size);
232
233 /* Timeout. */
234 for (i = 0; i < params->n_key_timeouts; i++)
235 p->key_timeout[i] = timeout_convert(params->key_timeout[i]);
236
237 p->n_key_timeouts = rte_align32pow2(params->n_key_timeouts);
238
239 for ( ; i < p->n_key_timeouts; i++)
240 p->key_timeout[i] = p->key_timeout[0];
241
242 /* Total size. */
243 p->total_size = sizeof(struct table) + p->n_buckets * p->bucket_size;
244
245 return 0;
246 }
247
248 static inline struct table_bucket *
table_bucket_get(struct table * t,size_t bucket_id)249 table_bucket_get(struct table *t, size_t bucket_id)
250 {
251 return (struct table_bucket *)&t->buckets[bucket_id << t->params.bucket_size_log2];
252 }
253
254 static inline uint8_t *
table_bucket_key_get(struct table * t,struct table_bucket * b,size_t bucket_key_pos)255 table_bucket_key_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos)
256 {
257 return &b->key[bucket_key_pos << t->params.key_size_log2];
258 }
259
260 static inline uint64_t *
table_bucket_data_get(struct table * t,struct table_bucket * b,size_t bucket_key_pos)261 table_bucket_data_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos)
262 {
263 return (uint64_t *)&b->key[t->params.bucket_key_all_size +
264 (bucket_key_pos << t->params.data_size_log2)];
265 }
266
267 static inline size_t
table_entry_id_get(struct table * t,struct table_bucket * b,size_t bucket_key_pos)268 table_entry_id_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos)
269 {
270 size_t bucket_id = ((uint8_t *)b - t->buckets) >> t->params.bucket_size_log2;
271
272 return (bucket_id << TABLE_KEYS_PER_BUCKET_LOG2) + bucket_key_pos;
273 }
274
275 uint64_t
rte_swx_table_learner_footprint_get(struct rte_swx_table_learner_params * params)276 rte_swx_table_learner_footprint_get(struct rte_swx_table_learner_params *params)
277 {
278 struct table_params p;
279 int status;
280
281 status = table_params_get(&p, params);
282
283 return status ? 0 : p.total_size;
284 }
285
286 void *
rte_swx_table_learner_create(struct rte_swx_table_learner_params * params,int numa_node)287 rte_swx_table_learner_create(struct rte_swx_table_learner_params *params, int numa_node)
288 {
289 struct table_params p;
290 struct table *t;
291 int status;
292
293 /* Check and process the input parameters. */
294 status = table_params_get(&p, params);
295 if (status)
296 return NULL;
297
298 /* Memory allocation. */
299 t = env_calloc(p.total_size, RTE_CACHE_LINE_SIZE, numa_node);
300 if (!t)
301 return NULL;
302
303 /* Memory initialization. */
304 memcpy(&t->params, &p, sizeof(struct table_params));
305
306 return t;
307 }
308
309 void
rte_swx_table_learner_free(void * table)310 rte_swx_table_learner_free(void *table)
311 {
312 struct table *t = table;
313
314 if (!t)
315 return;
316
317 env_free(t, t->params.total_size);
318 }
319
320 int
rte_swx_table_learner_timeout_update(void * table,uint32_t key_timeout_id,uint32_t key_timeout)321 rte_swx_table_learner_timeout_update(void *table,
322 uint32_t key_timeout_id,
323 uint32_t key_timeout)
324 {
325 struct table *t = table;
326
327 if (!t ||
328 (key_timeout_id >= t->params.n_key_timeouts) ||
329 !key_timeout)
330 return -EINVAL;
331
332 t->params.key_timeout[key_timeout_id] = timeout_convert(key_timeout);
333
334 return 0;
335 }
336
337 struct mailbox {
338 /* Writer: lookup state 0. Reader(s): lookup state 1, add(). */
339 struct table_bucket *bucket;
340
341 /* Writer: lookup state 0. Reader(s): lookup state 1, add(). */
342 uint32_t input_sig;
343
344 /* Writer: lookup state 0. Reader(s): lookup state 1, add(). */
345 uint8_t *input_key;
346
347 /* Writer: lookup state 1. Reader(s): add(). Values: 0 = miss; 1 = hit. */
348 uint32_t hit;
349
350 /* Writer: lookup state 1. Reader(s): add(). Valid only when hit is non-zero. */
351 size_t bucket_key_pos;
352
353 /* State. */
354 int state;
355 };
356
357 uint64_t
rte_swx_table_learner_mailbox_size_get(void)358 rte_swx_table_learner_mailbox_size_get(void)
359 {
360 return sizeof(struct mailbox);
361 }
362
363 int
rte_swx_table_learner_lookup(void * table,void * mailbox,uint64_t input_time,uint8_t ** key,uint64_t * action_id,uint8_t ** action_data,size_t * entry_id,int * hit)364 rte_swx_table_learner_lookup(void *table,
365 void *mailbox,
366 uint64_t input_time,
367 uint8_t **key,
368 uint64_t *action_id,
369 uint8_t **action_data,
370 size_t *entry_id,
371 int *hit)
372 {
373 struct table *t = table;
374 struct mailbox *m = mailbox;
375
376 switch (m->state) {
377 case 0: {
378 uint8_t *input_key;
379 struct table_bucket *b;
380 size_t bucket_id;
381 uint32_t input_sig;
382
383 input_key = &(*key)[t->params.key_offset];
384 input_sig = t->params.hash_func(input_key, t->params.key_size, 0);
385 bucket_id = input_sig & t->params.bucket_mask;
386 b = table_bucket_get(t, bucket_id);
387
388 rte_prefetch0(b);
389 rte_prefetch0(&b->key[0]);
390 rte_prefetch0(&b->key[RTE_CACHE_LINE_SIZE]);
391
392 m->bucket = b;
393 m->input_key = input_key;
394 m->input_sig = input_sig | 1;
395 m->state = 1;
396 return 0;
397 }
398
399 case 1: {
400 struct table_bucket *b = m->bucket;
401 uint32_t i;
402
403 /* Search the input key through the bucket keys. */
404 for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) {
405 uint64_t time = b->time[i];
406 uint32_t sig = b->sig[i];
407 uint8_t *key = table_bucket_key_get(t, b, i);
408
409 time <<= 32;
410
411 if ((time > input_time) &&
412 (sig == m->input_sig) &&
413 t->params.keycmp_func(key, m->input_key, t->params.key_size)) {
414 uint64_t *data = table_bucket_data_get(t, b, i);
415
416 /* Hit. */
417 rte_prefetch0(data);
418
419 m->hit = 1;
420 m->bucket_key_pos = i;
421 m->state = 0;
422
423 *action_id = data[0];
424 *action_data = (uint8_t *)&data[1];
425 *entry_id = table_entry_id_get(t, b, i);
426 *hit = 1;
427 return 1;
428 }
429 }
430
431 /* Miss. */
432 m->hit = 0;
433 m->state = 0;
434
435 *hit = 0;
436 return 1;
437 }
438
439 default:
440 /* This state should never be reached. Miss. */
441 m->hit = 0;
442 m->state = 0;
443
444 *hit = 0;
445 return 1;
446 }
447 }
448
449 void
rte_swx_table_learner_rearm(void * table,void * mailbox,uint64_t input_time)450 rte_swx_table_learner_rearm(void *table,
451 void *mailbox,
452 uint64_t input_time)
453 {
454 struct table *t = table;
455 struct mailbox *m = mailbox;
456 struct table_bucket *b;
457 size_t bucket_key_pos;
458 uint64_t key_timeout;
459 uint32_t key_timeout_id;
460
461 if (!m->hit)
462 return;
463
464 b = m->bucket;
465 bucket_key_pos = m->bucket_key_pos;
466
467 key_timeout_id = b->key_timeout_id[bucket_key_pos];
468 key_timeout = t->params.key_timeout[key_timeout_id];
469 b->time[bucket_key_pos] = (input_time + key_timeout) >> 32;
470 }
471
472 void
rte_swx_table_learner_rearm_new(void * table,void * mailbox,uint64_t input_time,uint32_t key_timeout_id)473 rte_swx_table_learner_rearm_new(void *table,
474 void *mailbox,
475 uint64_t input_time,
476 uint32_t key_timeout_id)
477 {
478 struct table *t = table;
479 struct mailbox *m = mailbox;
480 struct table_bucket *b;
481 size_t bucket_key_pos;
482 uint64_t key_timeout;
483
484 if (!m->hit)
485 return;
486
487 b = m->bucket;
488 bucket_key_pos = m->bucket_key_pos;
489
490 key_timeout_id &= t->params.n_key_timeouts - 1;
491 key_timeout = t->params.key_timeout[key_timeout_id];
492 b->time[bucket_key_pos] = (input_time + key_timeout) >> 32;
493 b->key_timeout_id[bucket_key_pos] = (uint8_t)key_timeout_id;
494 }
495
496 uint32_t
rte_swx_table_learner_add(void * table,void * mailbox,uint64_t input_time,uint64_t action_id,uint8_t * action_data,uint32_t key_timeout_id)497 rte_swx_table_learner_add(void *table,
498 void *mailbox,
499 uint64_t input_time,
500 uint64_t action_id,
501 uint8_t *action_data,
502 uint32_t key_timeout_id)
503 {
504 struct table *t = table;
505 struct mailbox *m = mailbox;
506 struct table_bucket *b = m->bucket;
507 uint64_t key_timeout;
508 uint32_t i;
509
510 /* Adjust the key timeout ID to fit the valid range. */
511 key_timeout_id &= t->params.n_key_timeouts - 1;
512 key_timeout = t->params.key_timeout[key_timeout_id];
513
514 /* Lookup hit: The following bucket fields need to be updated:
515 * - key (key, sig): NO (already correctly set).
516 * - key timeout (key_timeout_id, time): YES.
517 * - key data (data): YES.
518 */
519 if (m->hit) {
520 size_t bucket_key_pos = m->bucket_key_pos;
521 uint64_t *data = table_bucket_data_get(t, b, bucket_key_pos);
522
523 /* Install the key timeout. */
524 b->time[bucket_key_pos] = (input_time + key_timeout) >> 32;
525 b->key_timeout_id[bucket_key_pos] = (uint8_t)key_timeout_id;
526
527 /* Install the key data. */
528 data[0] = action_id;
529 if (t->params.action_data_size && action_data)
530 memcpy(&data[1], action_data, t->params.action_data_size);
531
532 return 0;
533 }
534
535 /* Lookup miss: Search for a free position in the current bucket and install the key. */
536 for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) {
537 uint64_t time = b->time[i];
538
539 time <<= 32;
540
541 /* Free position: Either there was never a key installed here, so the key time is
542 * set to zero (the init value), which is always less than the current time, or this
543 * position was used before, but the key expired (the key time is in the past).
544 */
545 if (time < input_time) {
546 uint8_t *key = table_bucket_key_get(t, b, i);
547 uint64_t *data = table_bucket_data_get(t, b, i);
548
549 /* Install the key and the key timeout. */
550 b->time[i] = (input_time + key_timeout) >> 32;
551 b->sig[i] = m->input_sig;
552 b->key_timeout_id[i] = (uint8_t)key_timeout_id;
553 table_keycpy(key, m->input_key, t->params.key_size);
554
555 /* Install the key data. */
556 data[0] = action_id;
557 if (t->params.action_data_size && action_data)
558 memcpy(&data[1], action_data, t->params.action_data_size);
559
560 /* Mailbox. */
561 m->hit = 1;
562 m->bucket_key_pos = i;
563
564 return 0;
565 }
566 }
567
568 /* Bucket full. */
569 return 1;
570 }
571
572 void
rte_swx_table_learner_delete(void * table __rte_unused,void * mailbox)573 rte_swx_table_learner_delete(void *table __rte_unused,
574 void *mailbox)
575 {
576 struct mailbox *m = mailbox;
577
578 if (m->hit) {
579 struct table_bucket *b = m->bucket;
580
581 /* Expire the key. */
582 b->time[m->bucket_key_pos] = 0;
583
584 /* Mailbox. */
585 m->hit = 0;
586 }
587 }
588