1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2020 Intel Corporation 3 */ 4 #include <string.h> 5 #include <stdio.h> 6 #include <errno.h> 7 8 #include <rte_common.h> 9 #include <rte_cycles.h> 10 #include <rte_prefetch.h> 11 12 #include "rte_swx_table_learner.h" 13 14 #ifndef RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES 15 #define RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES 1 16 #endif 17 18 #ifndef RTE_SWX_TABLE_SELECTOR_HUGE_PAGES_DISABLE 19 20 #include <rte_malloc.h> 21 22 static void * 23 env_calloc(size_t size, size_t alignment, int numa_node) 24 { 25 return rte_zmalloc_socket(NULL, size, alignment, numa_node); 26 } 27 28 static void 29 env_free(void *start, size_t size __rte_unused) 30 { 31 rte_free(start); 32 } 33 34 #else 35 36 #include <numa.h> 37 38 static void * 39 env_calloc(size_t size, size_t alignment __rte_unused, int numa_node) 40 { 41 void *start; 42 43 if (numa_available() == -1) 44 return NULL; 45 46 start = numa_alloc_onnode(size, numa_node); 47 if (!start) 48 return NULL; 49 50 memset(start, 0, size); 51 return start; 52 } 53 54 static void 55 env_free(void *start, size_t size) 56 { 57 if ((numa_available() == -1) || !start) 58 return; 59 60 numa_free(start, size); 61 } 62 63 #endif 64 65 #if defined(RTE_ARCH_X86_64) 66 67 #include <x86intrin.h> 68 69 #define crc32_u64(crc, v) _mm_crc32_u64(crc, v) 70 71 #else 72 73 static inline uint64_t 74 crc32_u64_generic(uint64_t crc, uint64_t value) 75 { 76 int i; 77 78 crc = (crc & 0xFFFFFFFFLLU) ^ value; 79 for (i = 63; i >= 0; i--) { 80 uint64_t mask; 81 82 mask = -(crc & 1LLU); 83 crc = (crc >> 1LLU) ^ (0x82F63B78LLU & mask); 84 } 85 86 return crc; 87 } 88 89 #define crc32_u64(crc, v) crc32_u64_generic(crc, v) 90 91 #endif 92 93 /* Key size needs to be one of: 8, 16, 32 or 64. */ 94 static inline uint32_t 95 hash(void *key, void *key_mask, uint32_t key_size, uint32_t seed) 96 { 97 uint64_t *k = key; 98 uint64_t *m = key_mask; 99 uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5; 100 101 switch (key_size) { 102 case 8: 103 crc0 = crc32_u64(seed, k[0] & m[0]); 104 return crc0; 105 106 case 16: 107 k0 = k[0] & m[0]; 108 109 crc0 = crc32_u64(k0, seed); 110 crc1 = crc32_u64(k0 >> 32, k[1] & m[1]); 111 112 crc0 ^= crc1; 113 114 return crc0; 115 116 case 32: 117 k0 = k[0] & m[0]; 118 k2 = k[2] & m[2]; 119 120 crc0 = crc32_u64(k0, seed); 121 crc1 = crc32_u64(k0 >> 32, k[1] & m[1]); 122 123 crc2 = crc32_u64(k2, k[3] & m[3]); 124 crc3 = k2 >> 32; 125 126 crc0 = crc32_u64(crc0, crc1); 127 crc1 = crc32_u64(crc2, crc3); 128 129 crc0 ^= crc1; 130 131 return crc0; 132 133 case 64: 134 k0 = k[0] & m[0]; 135 k2 = k[2] & m[2]; 136 k5 = k[5] & m[5]; 137 138 crc0 = crc32_u64(k0, seed); 139 crc1 = crc32_u64(k0 >> 32, k[1] & m[1]); 140 141 crc2 = crc32_u64(k2, k[3] & m[3]); 142 crc3 = crc32_u64(k2 >> 32, k[4] & m[4]); 143 144 crc4 = crc32_u64(k5, k[6] & m[6]); 145 crc5 = crc32_u64(k5 >> 32, k[7] & m[7]); 146 147 crc0 = crc32_u64(crc0, (crc1 << 32) ^ crc2); 148 crc1 = crc32_u64(crc3, (crc4 << 32) ^ crc5); 149 150 crc0 ^= crc1; 151 152 return crc0; 153 154 default: 155 crc0 = 0; 156 return crc0; 157 } 158 } 159 160 /* n_bytes needs to be a multiple of 8 bytes. */ 161 static void 162 table_keycpy(void *dst, void *src, void *src_mask, uint32_t n_bytes) 163 { 164 uint64_t *dst64 = dst, *src64 = src, *src_mask64 = src_mask; 165 uint32_t i; 166 167 for (i = 0; i < n_bytes / sizeof(uint64_t); i++) 168 dst64[i] = src64[i] & src_mask64[i]; 169 } 170 171 /* 172 * Return: 0 = Keys are NOT equal; 1 = Keys are equal. 173 */ 174 static inline uint32_t 175 table_keycmp(void *a, void *b, void *b_mask, uint32_t n_bytes) 176 { 177 uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask; 178 179 switch (n_bytes) { 180 case 8: { 181 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]); 182 uint32_t result = 1; 183 184 if (xor0) 185 result = 0; 186 return result; 187 } 188 189 case 16: { 190 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]); 191 uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]); 192 uint64_t or = xor0 | xor1; 193 uint32_t result = 1; 194 195 if (or) 196 result = 0; 197 return result; 198 } 199 200 case 32: { 201 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]); 202 uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]); 203 uint64_t xor2 = a64[2] ^ (b64[2] & b_mask64[2]); 204 uint64_t xor3 = a64[3] ^ (b64[3] & b_mask64[3]); 205 uint64_t or = (xor0 | xor1) | (xor2 | xor3); 206 uint32_t result = 1; 207 208 if (or) 209 result = 0; 210 return result; 211 } 212 213 case 64: { 214 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]); 215 uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]); 216 uint64_t xor2 = a64[2] ^ (b64[2] & b_mask64[2]); 217 uint64_t xor3 = a64[3] ^ (b64[3] & b_mask64[3]); 218 uint64_t xor4 = a64[4] ^ (b64[4] & b_mask64[4]); 219 uint64_t xor5 = a64[5] ^ (b64[5] & b_mask64[5]); 220 uint64_t xor6 = a64[6] ^ (b64[6] & b_mask64[6]); 221 uint64_t xor7 = a64[7] ^ (b64[7] & b_mask64[7]); 222 uint64_t or = ((xor0 | xor1) | (xor2 | xor3)) | 223 ((xor4 | xor5) | (xor6 | xor7)); 224 uint32_t result = 1; 225 226 if (or) 227 result = 0; 228 return result; 229 } 230 231 default: { 232 uint32_t i; 233 234 for (i = 0; i < n_bytes / sizeof(uint64_t); i++) 235 if (a64[i] != (b64[i] & b_mask64[i])) 236 return 0; 237 return 1; 238 } 239 } 240 } 241 242 #define TABLE_KEYS_PER_BUCKET 4 243 244 #define TABLE_BUCKET_USEFUL_SIZE \ 245 (TABLE_KEYS_PER_BUCKET * (sizeof(uint32_t) + sizeof(uint32_t) + sizeof(uint8_t))) 246 247 #define TABLE_BUCKET_PAD_SIZE \ 248 (RTE_CACHE_LINE_SIZE - TABLE_BUCKET_USEFUL_SIZE) 249 250 struct table_bucket { 251 uint32_t time[TABLE_KEYS_PER_BUCKET]; 252 uint32_t sig[TABLE_KEYS_PER_BUCKET]; 253 uint8_t key_timeout_id[TABLE_KEYS_PER_BUCKET]; 254 uint8_t pad[TABLE_BUCKET_PAD_SIZE]; 255 uint8_t key[]; 256 }; 257 258 struct table_params { 259 /* The real key size. Must be non-zero. */ 260 size_t key_size; 261 262 /* They key size upgrated to the next power of 2. This used for hash generation (in 263 * increments of 8 bytes, from 8 to 64 bytes) and for run-time key comparison. This is why 264 * key sizes bigger than 64 bytes are not allowed. 265 */ 266 size_t key_size_pow2; 267 268 /* log2(key_size_pow2). Purpose: avoid multiplication with non-power-of-2 numbers. */ 269 size_t key_size_log2; 270 271 /* The key offset within the key buffer. */ 272 size_t key_offset; 273 274 /* The real action data size. */ 275 size_t action_data_size; 276 277 /* The data size, i.e. the 8-byte action_id field plus the action data size, upgraded to the 278 * next power of 2. 279 */ 280 size_t data_size_pow2; 281 282 /* log2(data_size_pow2). Purpose: avoid multiplication with non-power of 2 numbers. */ 283 size_t data_size_log2; 284 285 /* Number of buckets. Must be a power of 2 to avoid modulo with non-power-of-2 numbers. */ 286 size_t n_buckets; 287 288 /* Bucket mask. Purpose: replace modulo with bitmask and operation. */ 289 size_t bucket_mask; 290 291 /* Total number of key bytes in the bucket, including the key padding bytes. There are 292 * (key_size_pow2 - key_size) padding bytes for each key in the bucket. 293 */ 294 size_t bucket_key_all_size; 295 296 /* Bucket size. Must be a power of 2 to avoid multiplication with non-power-of-2 number. */ 297 size_t bucket_size; 298 299 /* log2(bucket_size). Purpose: avoid multiplication with non-power of 2 numbers. */ 300 size_t bucket_size_log2; 301 302 /* Set of all possible key timeout values measured in CPU clock cycles. */ 303 uint64_t key_timeout[RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX]; 304 305 /* Number of key timeout values. */ 306 uint32_t n_key_timeouts; 307 308 /* Total memory size. */ 309 size_t total_size; 310 }; 311 312 struct table { 313 /* Table parameters. */ 314 struct table_params params; 315 316 /* Key mask. Array of *key_size* bytes. */ 317 uint8_t key_mask0[RTE_CACHE_LINE_SIZE]; 318 319 /* Table buckets. */ 320 uint8_t buckets[]; 321 } __rte_cache_aligned; 322 323 /* The timeout (in cycles) is stored in the table as a 32-bit value by truncating its least 324 * significant 32 bits. Therefore, to make sure the time is always advancing when adding the timeout 325 * value on top of the current time, the minimum timeout value is 1^32 cycles, which is 2 seconds on 326 * a 2 GHz CPU. 327 */ 328 static uint64_t 329 timeout_convert(uint32_t timeout_in_seconds) 330 { 331 uint64_t timeout_in_cycles = timeout_in_seconds * rte_get_tsc_hz(); 332 333 if (!(timeout_in_cycles >> 32)) 334 timeout_in_cycles = 1LLU << 32; 335 336 return timeout_in_cycles; 337 } 338 339 static int 340 table_params_get(struct table_params *p, struct rte_swx_table_learner_params *params) 341 { 342 uint32_t i; 343 344 /* Check input parameters. */ 345 if (!params || 346 !params->key_size || 347 (params->key_size > 64) || 348 !params->n_keys_max || 349 (params->n_keys_max > 1U << 31) || 350 !params->key_timeout || 351 !params->n_key_timeouts || 352 (params->n_key_timeouts > RTE_SWX_TABLE_LEARNER_N_KEY_TIMEOUTS_MAX)) 353 return -EINVAL; 354 355 for (i = 0; i < params->n_key_timeouts; i++) 356 if (!params->key_timeout[i]) 357 return -EINVAL; 358 359 /* Key. */ 360 p->key_size = params->key_size; 361 362 p->key_size_pow2 = rte_align64pow2(p->key_size); 363 if (p->key_size_pow2 < 8) 364 p->key_size_pow2 = 8; 365 366 p->key_size_log2 = __builtin_ctzll(p->key_size_pow2); 367 368 p->key_offset = params->key_offset; 369 370 /* Data. */ 371 p->action_data_size = params->action_data_size; 372 373 p->data_size_pow2 = rte_align64pow2(sizeof(uint64_t) + p->action_data_size); 374 375 p->data_size_log2 = __builtin_ctzll(p->data_size_pow2); 376 377 /* Buckets. */ 378 p->n_buckets = rte_align32pow2(params->n_keys_max); 379 380 p->bucket_mask = p->n_buckets - 1; 381 382 p->bucket_key_all_size = TABLE_KEYS_PER_BUCKET * p->key_size_pow2; 383 384 p->bucket_size = rte_align64pow2(sizeof(struct table_bucket) + 385 p->bucket_key_all_size + 386 TABLE_KEYS_PER_BUCKET * p->data_size_pow2); 387 388 p->bucket_size_log2 = __builtin_ctzll(p->bucket_size); 389 390 /* Timeout. */ 391 for (i = 0; i < params->n_key_timeouts; i++) 392 p->key_timeout[i] = timeout_convert(params->key_timeout[i]); 393 394 p->n_key_timeouts = rte_align32pow2(params->n_key_timeouts); 395 396 for ( ; i < p->n_key_timeouts; i++) 397 p->key_timeout[i] = p->key_timeout[0]; 398 399 /* Total size. */ 400 p->total_size = sizeof(struct table) + p->n_buckets * p->bucket_size; 401 402 return 0; 403 } 404 405 static inline struct table_bucket * 406 table_bucket_get(struct table *t, size_t bucket_id) 407 { 408 return (struct table_bucket *)&t->buckets[bucket_id << t->params.bucket_size_log2]; 409 } 410 411 static inline uint8_t * 412 table_bucket_key_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos) 413 { 414 return &b->key[bucket_key_pos << t->params.key_size_log2]; 415 } 416 417 static inline uint64_t * 418 table_bucket_data_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos) 419 { 420 return (uint64_t *)&b->key[t->params.bucket_key_all_size + 421 (bucket_key_pos << t->params.data_size_log2)]; 422 } 423 424 uint64_t 425 rte_swx_table_learner_footprint_get(struct rte_swx_table_learner_params *params) 426 { 427 struct table_params p; 428 int status; 429 430 status = table_params_get(&p, params); 431 432 return status ? 0 : p.total_size; 433 } 434 435 void * 436 rte_swx_table_learner_create(struct rte_swx_table_learner_params *params, int numa_node) 437 { 438 struct table_params p; 439 struct table *t; 440 int status; 441 442 /* Check and process the input parameters. */ 443 status = table_params_get(&p, params); 444 if (status) 445 return NULL; 446 447 /* Memory allocation. */ 448 t = env_calloc(p.total_size, RTE_CACHE_LINE_SIZE, numa_node); 449 if (!t) 450 return NULL; 451 452 /* Memory initialization. */ 453 memcpy(&t->params, &p, sizeof(struct table_params)); 454 455 if (params->key_mask0) 456 memcpy(t->key_mask0, params->key_mask0, params->key_size); 457 else 458 memset(t->key_mask0, 0xFF, params->key_size); 459 460 return t; 461 } 462 463 void 464 rte_swx_table_learner_free(void *table) 465 { 466 struct table *t = table; 467 468 if (!t) 469 return; 470 471 env_free(t, t->params.total_size); 472 } 473 474 int 475 rte_swx_table_learner_timeout_update(void *table, 476 uint32_t key_timeout_id, 477 uint32_t key_timeout) 478 { 479 struct table *t = table; 480 481 if (!t || 482 (key_timeout_id >= t->params.n_key_timeouts) || 483 !key_timeout) 484 return -EINVAL; 485 486 t->params.key_timeout[key_timeout_id] = timeout_convert(key_timeout); 487 488 return 0; 489 } 490 491 struct mailbox { 492 /* Writer: lookup state 0. Reader(s): lookup state 1, add(). */ 493 struct table_bucket *bucket; 494 495 /* Writer: lookup state 0. Reader(s): lookup state 1, add(). */ 496 uint32_t input_sig; 497 498 /* Writer: lookup state 1. Reader(s): add(). */ 499 uint8_t *input_key; 500 501 /* Writer: lookup state 1. Reader(s): add(). Values: 0 = miss; 1 = hit. */ 502 uint32_t hit; 503 504 /* Writer: lookup state 1. Reader(s): add(). Valid only when hit is non-zero. */ 505 size_t bucket_key_pos; 506 507 /* State. */ 508 int state; 509 }; 510 511 uint64_t 512 rte_swx_table_learner_mailbox_size_get(void) 513 { 514 return sizeof(struct mailbox); 515 } 516 517 int 518 rte_swx_table_learner_lookup(void *table, 519 void *mailbox, 520 uint64_t input_time, 521 uint8_t **key, 522 uint64_t *action_id, 523 uint8_t **action_data, 524 int *hit) 525 { 526 struct table *t = table; 527 struct mailbox *m = mailbox; 528 529 switch (m->state) { 530 case 0: { 531 uint8_t *input_key; 532 struct table_bucket *b; 533 size_t bucket_id; 534 uint32_t input_sig; 535 536 input_key = &(*key)[t->params.key_offset]; 537 input_sig = hash(input_key, t->key_mask0, t->params.key_size_pow2, 0); 538 bucket_id = input_sig & t->params.bucket_mask; 539 b = table_bucket_get(t, bucket_id); 540 541 rte_prefetch0(b); 542 rte_prefetch0(&b->key[0]); 543 rte_prefetch0(&b->key[RTE_CACHE_LINE_SIZE]); 544 545 m->bucket = b; 546 m->input_key = input_key; 547 m->input_sig = input_sig | 1; 548 m->state = 1; 549 return 0; 550 } 551 552 case 1: { 553 struct table_bucket *b = m->bucket; 554 uint32_t i; 555 556 /* Search the input key through the bucket keys. */ 557 for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) { 558 uint64_t time = b->time[i]; 559 uint32_t sig = b->sig[i]; 560 uint8_t *key = table_bucket_key_get(t, b, i); 561 uint32_t key_size_pow2 = t->params.key_size_pow2; 562 563 time <<= 32; 564 565 if ((time > input_time) && 566 (sig == m->input_sig) && 567 table_keycmp(key, m->input_key, t->key_mask0, key_size_pow2)) { 568 uint64_t *data = table_bucket_data_get(t, b, i); 569 570 /* Hit. */ 571 rte_prefetch0(data); 572 573 m->hit = 1; 574 m->bucket_key_pos = i; 575 m->state = 0; 576 577 *action_id = data[0]; 578 *action_data = (uint8_t *)&data[1]; 579 *hit = 1; 580 return 1; 581 } 582 } 583 584 /* Miss. */ 585 m->hit = 0; 586 m->state = 0; 587 588 *hit = 0; 589 return 1; 590 } 591 592 default: 593 /* This state should never be reached. Miss. */ 594 m->hit = 0; 595 m->state = 0; 596 597 *hit = 0; 598 return 1; 599 } 600 } 601 602 void 603 rte_swx_table_learner_rearm(void *table, 604 void *mailbox, 605 uint64_t input_time) 606 { 607 struct table *t = table; 608 struct mailbox *m = mailbox; 609 struct table_bucket *b; 610 size_t bucket_key_pos; 611 uint64_t key_timeout; 612 uint32_t key_timeout_id; 613 614 if (!m->hit) 615 return; 616 617 b = m->bucket; 618 bucket_key_pos = m->bucket_key_pos; 619 620 key_timeout_id = b->key_timeout_id[bucket_key_pos]; 621 key_timeout = t->params.key_timeout[key_timeout_id]; 622 b->time[bucket_key_pos] = (input_time + key_timeout) >> 32; 623 } 624 625 void 626 rte_swx_table_learner_rearm_new(void *table, 627 void *mailbox, 628 uint64_t input_time, 629 uint32_t key_timeout_id) 630 { 631 struct table *t = table; 632 struct mailbox *m = mailbox; 633 struct table_bucket *b; 634 size_t bucket_key_pos; 635 uint64_t key_timeout; 636 637 if (!m->hit) 638 return; 639 640 b = m->bucket; 641 bucket_key_pos = m->bucket_key_pos; 642 643 key_timeout_id &= t->params.n_key_timeouts - 1; 644 key_timeout = t->params.key_timeout[key_timeout_id]; 645 b->time[bucket_key_pos] = (input_time + key_timeout) >> 32; 646 b->key_timeout_id[bucket_key_pos] = (uint8_t)key_timeout_id; 647 } 648 649 uint32_t 650 rte_swx_table_learner_add(void *table, 651 void *mailbox, 652 uint64_t input_time, 653 uint64_t action_id, 654 uint8_t *action_data, 655 uint32_t key_timeout_id) 656 { 657 struct table *t = table; 658 struct mailbox *m = mailbox; 659 struct table_bucket *b = m->bucket; 660 uint64_t key_timeout; 661 uint32_t i; 662 663 /* Adjust the key timeout ID to fit the valid range. */ 664 key_timeout_id &= t->params.n_key_timeouts - 1; 665 key_timeout = t->params.key_timeout[key_timeout_id]; 666 667 /* Lookup hit: The following bucket fields need to be updated: 668 * - key (key, sig): NO (already correctly set). 669 * - key timeout (key_timeout_id, time): YES. 670 * - key data (data): YES. 671 */ 672 if (m->hit) { 673 size_t bucket_key_pos = m->bucket_key_pos; 674 uint64_t *data = table_bucket_data_get(t, b, bucket_key_pos); 675 676 /* Install the key timeout. */ 677 b->time[bucket_key_pos] = (input_time + key_timeout) >> 32; 678 b->key_timeout_id[bucket_key_pos] = (uint8_t)key_timeout_id; 679 680 /* Install the key data. */ 681 data[0] = action_id; 682 if (t->params.action_data_size && action_data) 683 memcpy(&data[1], action_data, t->params.action_data_size); 684 685 return 0; 686 } 687 688 /* Lookup miss: Search for a free position in the current bucket and install the key. */ 689 for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) { 690 uint64_t time = b->time[i]; 691 692 time <<= 32; 693 694 /* Free position: Either there was never a key installed here, so the key time is 695 * set to zero (the init value), which is always less than the current time, or this 696 * position was used before, but the key expired (the key time is in the past). 697 */ 698 if (time < input_time) { 699 uint8_t *key = table_bucket_key_get(t, b, i); 700 uint64_t *data = table_bucket_data_get(t, b, i); 701 702 /* Install the key and the key timeout. */ 703 b->time[i] = (input_time + key_timeout) >> 32; 704 b->sig[i] = m->input_sig; 705 b->key_timeout_id[i] = (uint8_t)key_timeout_id; 706 table_keycpy(key, m->input_key, t->key_mask0, t->params.key_size_pow2); 707 708 /* Install the key data. */ 709 data[0] = action_id; 710 if (t->params.action_data_size && action_data) 711 memcpy(&data[1], action_data, t->params.action_data_size); 712 713 /* Mailbox. */ 714 m->hit = 1; 715 m->bucket_key_pos = i; 716 717 return 0; 718 } 719 } 720 721 /* Bucket full. */ 722 return 1; 723 } 724 725 void 726 rte_swx_table_learner_delete(void *table __rte_unused, 727 void *mailbox) 728 { 729 struct mailbox *m = mailbox; 730 731 if (m->hit) { 732 struct table_bucket *b = m->bucket; 733 734 /* Expire the key. */ 735 b->time[m->bucket_key_pos] = 0; 736 737 /* Mailbox. */ 738 m->hit = 0; 739 } 740 } 741