1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2020 Intel Corporation 3 */ 4 #include <string.h> 5 #include <stdio.h> 6 #include <errno.h> 7 8 #include <rte_common.h> 9 #include <rte_cycles.h> 10 #include <rte_prefetch.h> 11 #include <rte_jhash.h> 12 #include <rte_hash_crc.h> 13 14 #include "rte_swx_keycmp.h" 15 #include "rte_swx_table_learner.h" 16 17 #ifndef RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES 18 #define RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES 1 19 #endif 20 21 #ifndef RTE_SWX_TABLE_SELECTOR_HUGE_PAGES_DISABLE 22 23 #include <rte_malloc.h> 24 25 static void * 26 env_calloc(size_t size, size_t alignment, int numa_node) 27 { 28 return rte_zmalloc_socket(NULL, size, alignment, numa_node); 29 } 30 31 static void 32 env_free(void *start, size_t size __rte_unused) 33 { 34 rte_free(start); 35 } 36 37 #else 38 39 #include <numa.h> 40 41 static void * 42 env_calloc(size_t size, size_t alignment __rte_unused, int numa_node) 43 { 44 void *start; 45 46 if (numa_available() == -1) 47 return NULL; 48 49 start = numa_alloc_onnode(size, numa_node); 50 if (!start) 51 return NULL; 52 53 memset(start, 0, size); 54 return start; 55 } 56 57 static void 58 env_free(void *start, size_t size) 59 { 60 if ((numa_available() == -1) || !start) 61 return; 62 63 numa_free(start, size); 64 } 65 66 #endif 67 68 static void 69 table_keycpy(void *dst, void *src, uint32_t n_bytes) 70 { 71 memcpy(dst, src, n_bytes); 72 } 73 74 #define TABLE_KEYS_PER_BUCKET 4 75 #define TABLE_KEYS_PER_BUCKET_LOG2 2 76 77 #define TABLE_BUCKET_USEFUL_SIZE \ 78 (TABLE_KEYS_PER_BUCKET * (sizeof(uint32_t) + sizeof(uint32_t) + sizeof(uint8_t))) 79 80 #define TABLE_BUCKET_PAD_SIZE \ 81 (RTE_CACHE_LINE_SIZE - TABLE_BUCKET_USEFUL_SIZE) 82 83 struct table_bucket { 84 uint32_t time[TABLE_KEYS_PER_BUCKET]; 85 uint32_t sig[TABLE_KEYS_PER_BUCKET]; 86 uint8_t key_timeout_id[TABLE_KEYS_PER_BUCKET]; 87 uint8_t pad[TABLE_BUCKET_PAD_SIZE]; 88 uint8_t key[]; 89 }; 90 91 struct table_params { 92 /* The real key size. Must be non-zero. */ 93 size_t key_size; 94 95 /* The key size upgrated to the next power of 2. */ 96 size_t key_size_pow2; 97 98 /* log2(key_size_pow2). Purpose: avoid multiplication with non-power-of-2 numbers. */ 99 size_t key_size_log2; 100 101 /* The key offset within the key buffer. */ 102 size_t key_offset; 103 104 /* The real action data size. */ 105 size_t action_data_size; 106 107 /* The data size, i.e. the 8-byte action_id field plus the action data size, upgraded to the 108 * next power of 2. 109 */ 110 size_t data_size_pow2; 111 112 /* log2(data_size_pow2). Purpose: avoid multiplication with non-power of 2 numbers. */ 113 size_t data_size_log2; 114 115 /* Number of buckets. Must be a power of 2 to avoid modulo with non-power-of-2 numbers. */ 116 size_t n_buckets; 117 118 /* Bucket mask. Purpose: replace modulo with bitmask and operation. */ 119 size_t bucket_mask; 120 121 /* Total number of key bytes in the bucket, including the key padding bytes. There are 122 * (key_size_pow2 - key_size) padding bytes for each key in the bucket. 123 */ 124 size_t bucket_key_all_size; 125 126 /* Bucket size. Must be a power of 2 to avoid multiplication with non-power-of-2 number. */ 127 size_t bucket_size; 128 129 /* log2(bucket_size). Purpose: avoid multiplication with non-power of 2 numbers. */ 130 size_t bucket_size_log2; 131 132 /* Hash function. */ 133 rte_swx_hash_func_t hash_func; 134 135 /* Key comparison function. */ 136 rte_swx_keycmp_func_t keycmp_func; 137 138 /* Set of all possible key timeout values measured in CPU clock cycles. */ 139 uint64_t key_timeout[RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX]; 140 141 /* Number of key timeout values. */ 142 uint32_t n_key_timeouts; 143 144 /* Total memory size. */ 145 size_t total_size; 146 }; 147 148 struct table { 149 /* Table parameters. */ 150 struct table_params params; 151 152 /* Table buckets. */ 153 uint8_t buckets[]; 154 } __rte_cache_aligned; 155 156 /* The timeout (in cycles) is stored in the table as a 32-bit value by truncating its least 157 * significant 32 bits. Therefore, to make sure the time is always advancing when adding the timeout 158 * value on top of the current time, the minimum timeout value is 1^32 cycles, which is 2 seconds on 159 * a 2 GHz CPU. 160 */ 161 static uint64_t 162 timeout_convert(uint32_t timeout_in_seconds) 163 { 164 uint64_t timeout_in_cycles = timeout_in_seconds * rte_get_tsc_hz(); 165 166 if (!(timeout_in_cycles >> 32)) 167 timeout_in_cycles = 1LLU << 32; 168 169 return timeout_in_cycles; 170 } 171 172 static int 173 table_params_get(struct table_params *p, struct rte_swx_table_learner_params *params) 174 { 175 uint32_t i; 176 177 /* Check input parameters. */ 178 if (!params || 179 !params->key_size || 180 !params->n_keys_max || 181 (params->n_keys_max > 1U << 31) || 182 !params->key_timeout || 183 !params->n_key_timeouts || 184 (params->n_key_timeouts > RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX)) 185 return -EINVAL; 186 187 if (params->key_mask0) { 188 for (i = 0; i < params->key_size; i++) 189 if (params->key_mask0[i] != 0xFF) 190 break; 191 192 if (i < params->key_size) 193 return -EINVAL; 194 } 195 196 for (i = 0; i < params->n_key_timeouts; i++) 197 if (!params->key_timeout[i]) 198 return -EINVAL; 199 200 /* Key. */ 201 p->key_size = params->key_size; 202 203 p->key_size_pow2 = rte_align64pow2(p->key_size); 204 205 p->key_size_log2 = rte_ctz64(p->key_size_pow2); 206 207 p->key_offset = params->key_offset; 208 209 /* Data. */ 210 p->action_data_size = params->action_data_size; 211 212 p->data_size_pow2 = rte_align64pow2(sizeof(uint64_t) + p->action_data_size); 213 214 p->data_size_log2 = rte_ctz64(p->data_size_pow2); 215 216 /* Buckets. */ 217 p->n_buckets = rte_align32pow2(params->n_keys_max); 218 219 p->bucket_mask = p->n_buckets - 1; 220 221 p->bucket_key_all_size = TABLE_KEYS_PER_BUCKET * p->key_size_pow2; 222 223 p->bucket_size = rte_align64pow2(sizeof(struct table_bucket) + 224 p->bucket_key_all_size + 225 TABLE_KEYS_PER_BUCKET * p->data_size_pow2); 226 227 p->bucket_size_log2 = rte_ctz64(p->bucket_size); 228 229 p->hash_func = params->hash_func ? params->hash_func : rte_hash_crc; 230 231 p->keycmp_func = rte_swx_keycmp_func_get(params->key_size); 232 233 /* Timeout. */ 234 for (i = 0; i < params->n_key_timeouts; i++) 235 p->key_timeout[i] = timeout_convert(params->key_timeout[i]); 236 237 p->n_key_timeouts = rte_align32pow2(params->n_key_timeouts); 238 239 for ( ; i < p->n_key_timeouts; i++) 240 p->key_timeout[i] = p->key_timeout[0]; 241 242 /* Total size. */ 243 p->total_size = sizeof(struct table) + p->n_buckets * p->bucket_size; 244 245 return 0; 246 } 247 248 static inline struct table_bucket * 249 table_bucket_get(struct table *t, size_t bucket_id) 250 { 251 return (struct table_bucket *)&t->buckets[bucket_id << t->params.bucket_size_log2]; 252 } 253 254 static inline uint8_t * 255 table_bucket_key_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos) 256 { 257 return &b->key[bucket_key_pos << t->params.key_size_log2]; 258 } 259 260 static inline uint64_t * 261 table_bucket_data_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos) 262 { 263 return (uint64_t *)&b->key[t->params.bucket_key_all_size + 264 (bucket_key_pos << t->params.data_size_log2)]; 265 } 266 267 static inline size_t 268 table_entry_id_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos) 269 { 270 size_t bucket_id = ((uint8_t *)b - t->buckets) >> t->params.bucket_size_log2; 271 272 return (bucket_id << TABLE_KEYS_PER_BUCKET_LOG2) + bucket_key_pos; 273 } 274 275 uint64_t 276 rte_swx_table_learner_footprint_get(struct rte_swx_table_learner_params *params) 277 { 278 struct table_params p; 279 int status; 280 281 status = table_params_get(&p, params); 282 283 return status ? 0 : p.total_size; 284 } 285 286 void * 287 rte_swx_table_learner_create(struct rte_swx_table_learner_params *params, int numa_node) 288 { 289 struct table_params p; 290 struct table *t; 291 int status; 292 293 /* Check and process the input parameters. */ 294 status = table_params_get(&p, params); 295 if (status) 296 return NULL; 297 298 /* Memory allocation. */ 299 t = env_calloc(p.total_size, RTE_CACHE_LINE_SIZE, numa_node); 300 if (!t) 301 return NULL; 302 303 /* Memory initialization. */ 304 memcpy(&t->params, &p, sizeof(struct table_params)); 305 306 return t; 307 } 308 309 void 310 rte_swx_table_learner_free(void *table) 311 { 312 struct table *t = table; 313 314 if (!t) 315 return; 316 317 env_free(t, t->params.total_size); 318 } 319 320 int 321 rte_swx_table_learner_timeout_update(void *table, 322 uint32_t key_timeout_id, 323 uint32_t key_timeout) 324 { 325 struct table *t = table; 326 327 if (!t || 328 (key_timeout_id >= t->params.n_key_timeouts) || 329 !key_timeout) 330 return -EINVAL; 331 332 t->params.key_timeout[key_timeout_id] = timeout_convert(key_timeout); 333 334 return 0; 335 } 336 337 struct mailbox { 338 /* Writer: lookup state 0. Reader(s): lookup state 1, add(). */ 339 struct table_bucket *bucket; 340 341 /* Writer: lookup state 0. Reader(s): lookup state 1, add(). */ 342 uint32_t input_sig; 343 344 /* Writer: lookup state 0. Reader(s): lookup state 1, add(). */ 345 uint8_t *input_key; 346 347 /* Writer: lookup state 1. Reader(s): add(). Values: 0 = miss; 1 = hit. */ 348 uint32_t hit; 349 350 /* Writer: lookup state 1. Reader(s): add(). Valid only when hit is non-zero. */ 351 size_t bucket_key_pos; 352 353 /* State. */ 354 int state; 355 }; 356 357 uint64_t 358 rte_swx_table_learner_mailbox_size_get(void) 359 { 360 return sizeof(struct mailbox); 361 } 362 363 int 364 rte_swx_table_learner_lookup(void *table, 365 void *mailbox, 366 uint64_t input_time, 367 uint8_t **key, 368 uint64_t *action_id, 369 uint8_t **action_data, 370 size_t *entry_id, 371 int *hit) 372 { 373 struct table *t = table; 374 struct mailbox *m = mailbox; 375 376 switch (m->state) { 377 case 0: { 378 uint8_t *input_key; 379 struct table_bucket *b; 380 size_t bucket_id; 381 uint32_t input_sig; 382 383 input_key = &(*key)[t->params.key_offset]; 384 input_sig = t->params.hash_func(input_key, t->params.key_size, 0); 385 bucket_id = input_sig & t->params.bucket_mask; 386 b = table_bucket_get(t, bucket_id); 387 388 rte_prefetch0(b); 389 rte_prefetch0(&b->key[0]); 390 rte_prefetch0(&b->key[RTE_CACHE_LINE_SIZE]); 391 392 m->bucket = b; 393 m->input_key = input_key; 394 m->input_sig = input_sig | 1; 395 m->state = 1; 396 return 0; 397 } 398 399 case 1: { 400 struct table_bucket *b = m->bucket; 401 uint32_t i; 402 403 /* Search the input key through the bucket keys. */ 404 for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) { 405 uint64_t time = b->time[i]; 406 uint32_t sig = b->sig[i]; 407 uint8_t *key = table_bucket_key_get(t, b, i); 408 409 time <<= 32; 410 411 if ((time > input_time) && 412 (sig == m->input_sig) && 413 t->params.keycmp_func(key, m->input_key, t->params.key_size)) { 414 uint64_t *data = table_bucket_data_get(t, b, i); 415 416 /* Hit. */ 417 rte_prefetch0(data); 418 419 m->hit = 1; 420 m->bucket_key_pos = i; 421 m->state = 0; 422 423 *action_id = data[0]; 424 *action_data = (uint8_t *)&data[1]; 425 *entry_id = table_entry_id_get(t, b, i); 426 *hit = 1; 427 return 1; 428 } 429 } 430 431 /* Miss. */ 432 m->hit = 0; 433 m->state = 0; 434 435 *hit = 0; 436 return 1; 437 } 438 439 default: 440 /* This state should never be reached. Miss. */ 441 m->hit = 0; 442 m->state = 0; 443 444 *hit = 0; 445 return 1; 446 } 447 } 448 449 void 450 rte_swx_table_learner_rearm(void *table, 451 void *mailbox, 452 uint64_t input_time) 453 { 454 struct table *t = table; 455 struct mailbox *m = mailbox; 456 struct table_bucket *b; 457 size_t bucket_key_pos; 458 uint64_t key_timeout; 459 uint32_t key_timeout_id; 460 461 if (!m->hit) 462 return; 463 464 b = m->bucket; 465 bucket_key_pos = m->bucket_key_pos; 466 467 key_timeout_id = b->key_timeout_id[bucket_key_pos]; 468 key_timeout = t->params.key_timeout[key_timeout_id]; 469 b->time[bucket_key_pos] = (input_time + key_timeout) >> 32; 470 } 471 472 void 473 rte_swx_table_learner_rearm_new(void *table, 474 void *mailbox, 475 uint64_t input_time, 476 uint32_t key_timeout_id) 477 { 478 struct table *t = table; 479 struct mailbox *m = mailbox; 480 struct table_bucket *b; 481 size_t bucket_key_pos; 482 uint64_t key_timeout; 483 484 if (!m->hit) 485 return; 486 487 b = m->bucket; 488 bucket_key_pos = m->bucket_key_pos; 489 490 key_timeout_id &= t->params.n_key_timeouts - 1; 491 key_timeout = t->params.key_timeout[key_timeout_id]; 492 b->time[bucket_key_pos] = (input_time + key_timeout) >> 32; 493 b->key_timeout_id[bucket_key_pos] = (uint8_t)key_timeout_id; 494 } 495 496 uint32_t 497 rte_swx_table_learner_add(void *table, 498 void *mailbox, 499 uint64_t input_time, 500 uint64_t action_id, 501 uint8_t *action_data, 502 uint32_t key_timeout_id) 503 { 504 struct table *t = table; 505 struct mailbox *m = mailbox; 506 struct table_bucket *b = m->bucket; 507 uint64_t key_timeout; 508 uint32_t i; 509 510 /* Adjust the key timeout ID to fit the valid range. */ 511 key_timeout_id &= t->params.n_key_timeouts - 1; 512 key_timeout = t->params.key_timeout[key_timeout_id]; 513 514 /* Lookup hit: The following bucket fields need to be updated: 515 * - key (key, sig): NO (already correctly set). 516 * - key timeout (key_timeout_id, time): YES. 517 * - key data (data): YES. 518 */ 519 if (m->hit) { 520 size_t bucket_key_pos = m->bucket_key_pos; 521 uint64_t *data = table_bucket_data_get(t, b, bucket_key_pos); 522 523 /* Install the key timeout. */ 524 b->time[bucket_key_pos] = (input_time + key_timeout) >> 32; 525 b->key_timeout_id[bucket_key_pos] = (uint8_t)key_timeout_id; 526 527 /* Install the key data. */ 528 data[0] = action_id; 529 if (t->params.action_data_size && action_data) 530 memcpy(&data[1], action_data, t->params.action_data_size); 531 532 return 0; 533 } 534 535 /* Lookup miss: Search for a free position in the current bucket and install the key. */ 536 for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) { 537 uint64_t time = b->time[i]; 538 539 time <<= 32; 540 541 /* Free position: Either there was never a key installed here, so the key time is 542 * set to zero (the init value), which is always less than the current time, or this 543 * position was used before, but the key expired (the key time is in the past). 544 */ 545 if (time < input_time) { 546 uint8_t *key = table_bucket_key_get(t, b, i); 547 uint64_t *data = table_bucket_data_get(t, b, i); 548 549 /* Install the key and the key timeout. */ 550 b->time[i] = (input_time + key_timeout) >> 32; 551 b->sig[i] = m->input_sig; 552 b->key_timeout_id[i] = (uint8_t)key_timeout_id; 553 table_keycpy(key, m->input_key, t->params.key_size); 554 555 /* Install the key data. */ 556 data[0] = action_id; 557 if (t->params.action_data_size && action_data) 558 memcpy(&data[1], action_data, t->params.action_data_size); 559 560 /* Mailbox. */ 561 m->hit = 1; 562 m->bucket_key_pos = i; 563 564 return 0; 565 } 566 } 567 568 /* Bucket full. */ 569 return 1; 570 } 571 572 void 573 rte_swx_table_learner_delete(void *table __rte_unused, 574 void *mailbox) 575 { 576 struct mailbox *m = mailbox; 577 578 if (m->hit) { 579 struct table_bucket *b = m->bucket; 580 581 /* Expire the key. */ 582 b->time[m->bucket_key_pos] = 0; 583 584 /* Mailbox. */ 585 m->hit = 0; 586 } 587 } 588