1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2020 Intel Corporation 3 */ 4 #include <stdlib.h> 5 #include <string.h> 6 #include <stdio.h> 7 #include <errno.h> 8 9 #include <rte_common.h> 10 #include <rte_cycles.h> 11 #include <rte_prefetch.h> 12 13 #include "rte_swx_table_learner.h" 14 15 #ifndef RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES 16 #define RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES 1 17 #endif 18 19 #ifndef RTE_SWX_TABLE_SELECTOR_HUGE_PAGES_DISABLE 20 21 #include <rte_malloc.h> 22 23 static void * 24 env_calloc(size_t size, size_t alignment, int numa_node) 25 { 26 return rte_zmalloc_socket(NULL, size, alignment, numa_node); 27 } 28 29 static void 30 env_free(void *start, size_t size __rte_unused) 31 { 32 rte_free(start); 33 } 34 35 #else 36 37 #include <numa.h> 38 39 static void * 40 env_calloc(size_t size, size_t alignment __rte_unused, int numa_node) 41 { 42 void *start; 43 44 if (numa_available() == -1) 45 return NULL; 46 47 start = numa_alloc_onnode(size, numa_node); 48 if (!start) 49 return NULL; 50 51 memset(start, 0, size); 52 return start; 53 } 54 55 static void 56 env_free(void *start, size_t size) 57 { 58 if ((numa_available() == -1) || !start) 59 return; 60 61 numa_free(start, size); 62 } 63 64 #endif 65 66 #if defined(RTE_ARCH_X86_64) 67 68 #include <x86intrin.h> 69 70 #define crc32_u64(crc, v) _mm_crc32_u64(crc, v) 71 72 #else 73 74 static inline uint64_t 75 crc32_u64_generic(uint64_t crc, uint64_t value) 76 { 77 int i; 78 79 crc = (crc & 0xFFFFFFFFLLU) ^ value; 80 for (i = 63; i >= 0; i--) { 81 uint64_t mask; 82 83 mask = -(crc & 1LLU); 84 crc = (crc >> 1LLU) ^ (0x82F63B78LLU & mask); 85 } 86 87 return crc; 88 } 89 90 #define crc32_u64(crc, v) crc32_u64_generic(crc, v) 91 92 #endif 93 94 /* Key size needs to be one of: 8, 16, 32 or 64. */ 95 static inline uint32_t 96 hash(void *key, void *key_mask, uint32_t key_size, uint32_t seed) 97 { 98 uint64_t *k = key; 99 uint64_t *m = key_mask; 100 uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5; 101 102 switch (key_size) { 103 case 8: 104 crc0 = crc32_u64(seed, k[0] & m[0]); 105 return crc0; 106 107 case 16: 108 k0 = k[0] & m[0]; 109 110 crc0 = crc32_u64(k0, seed); 111 crc1 = crc32_u64(k0 >> 32, k[1] & m[1]); 112 113 crc0 ^= crc1; 114 115 return crc0; 116 117 case 32: 118 k0 = k[0] & m[0]; 119 k2 = k[2] & m[2]; 120 121 crc0 = crc32_u64(k0, seed); 122 crc1 = crc32_u64(k0 >> 32, k[1] & m[1]); 123 124 crc2 = crc32_u64(k2, k[3] & m[3]); 125 crc3 = k2 >> 32; 126 127 crc0 = crc32_u64(crc0, crc1); 128 crc1 = crc32_u64(crc2, crc3); 129 130 crc0 ^= crc1; 131 132 return crc0; 133 134 case 64: 135 k0 = k[0] & m[0]; 136 k2 = k[2] & m[2]; 137 k5 = k[5] & m[5]; 138 139 crc0 = crc32_u64(k0, seed); 140 crc1 = crc32_u64(k0 >> 32, k[1] & m[1]); 141 142 crc2 = crc32_u64(k2, k[3] & m[3]); 143 crc3 = crc32_u64(k2 >> 32, k[4] & m[4]); 144 145 crc4 = crc32_u64(k5, k[6] & m[6]); 146 crc5 = crc32_u64(k5 >> 32, k[7] & m[7]); 147 148 crc0 = crc32_u64(crc0, (crc1 << 32) ^ crc2); 149 crc1 = crc32_u64(crc3, (crc4 << 32) ^ crc5); 150 151 crc0 ^= crc1; 152 153 return crc0; 154 155 default: 156 crc0 = 0; 157 return crc0; 158 } 159 } 160 161 /* 162 * Return: 0 = Keys are NOT equal; 1 = Keys are equal. 163 */ 164 static inline uint32_t 165 table_keycmp(void *a, void *b, void *b_mask, uint32_t n_bytes) 166 { 167 uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask; 168 169 switch (n_bytes) { 170 case 8: { 171 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]); 172 uint32_t result = 1; 173 174 if (xor0) 175 result = 0; 176 return result; 177 } 178 179 case 16: { 180 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]); 181 uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]); 182 uint64_t or = xor0 | xor1; 183 uint32_t result = 1; 184 185 if (or) 186 result = 0; 187 return result; 188 } 189 190 case 32: { 191 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]); 192 uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]); 193 uint64_t xor2 = a64[2] ^ (b64[2] & b_mask64[2]); 194 uint64_t xor3 = a64[3] ^ (b64[3] & b_mask64[3]); 195 uint64_t or = (xor0 | xor1) | (xor2 | xor3); 196 uint32_t result = 1; 197 198 if (or) 199 result = 0; 200 return result; 201 } 202 203 case 64: { 204 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]); 205 uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]); 206 uint64_t xor2 = a64[2] ^ (b64[2] & b_mask64[2]); 207 uint64_t xor3 = a64[3] ^ (b64[3] & b_mask64[3]); 208 uint64_t xor4 = a64[4] ^ (b64[4] & b_mask64[4]); 209 uint64_t xor5 = a64[5] ^ (b64[5] & b_mask64[5]); 210 uint64_t xor6 = a64[6] ^ (b64[6] & b_mask64[6]); 211 uint64_t xor7 = a64[7] ^ (b64[7] & b_mask64[7]); 212 uint64_t or = ((xor0 | xor1) | (xor2 | xor3)) | 213 ((xor4 | xor5) | (xor6 | xor7)); 214 uint32_t result = 1; 215 216 if (or) 217 result = 0; 218 return result; 219 } 220 221 default: { 222 uint32_t i; 223 224 for (i = 0; i < n_bytes / sizeof(uint64_t); i++) 225 if (a64[i] != (b64[i] & b_mask64[i])) 226 return 0; 227 return 1; 228 } 229 } 230 } 231 232 #define TABLE_KEYS_PER_BUCKET 4 233 234 #define TABLE_BUCKET_PAD_SIZE \ 235 (RTE_CACHE_LINE_SIZE - TABLE_KEYS_PER_BUCKET * (sizeof(uint32_t) + sizeof(uint32_t))) 236 237 struct table_bucket { 238 uint32_t time[TABLE_KEYS_PER_BUCKET]; 239 uint32_t sig[TABLE_KEYS_PER_BUCKET]; 240 uint8_t pad[TABLE_BUCKET_PAD_SIZE]; 241 uint8_t key[0]; 242 }; 243 244 struct table_params { 245 /* The real key size. Must be non-zero. */ 246 size_t key_size; 247 248 /* They key size upgrated to the next power of 2. This used for hash generation (in 249 * increments of 8 bytes, from 8 to 64 bytes) and for run-time key comparison. This is why 250 * key sizes bigger than 64 bytes are not allowed. 251 */ 252 size_t key_size_pow2; 253 254 /* log2(key_size_pow2). Purpose: avoid multiplication with non-power-of-2 numbers. */ 255 size_t key_size_log2; 256 257 /* The key offset within the key buffer. */ 258 size_t key_offset; 259 260 /* The real action data size. */ 261 size_t action_data_size; 262 263 /* The data size, i.e. the 8-byte action_id field plus the action data size, upgraded to the 264 * next power of 2. 265 */ 266 size_t data_size_pow2; 267 268 /* log2(data_size_pow2). Purpose: avoid multiplication with non-power of 2 numbers. */ 269 size_t data_size_log2; 270 271 /* Number of buckets. Must be a power of 2 to avoid modulo with non-power-of-2 numbers. */ 272 size_t n_buckets; 273 274 /* Bucket mask. Purpose: replace modulo with bitmask and operation. */ 275 size_t bucket_mask; 276 277 /* Total number of key bytes in the bucket, including the key padding bytes. There are 278 * (key_size_pow2 - key_size) padding bytes for each key in the bucket. 279 */ 280 size_t bucket_key_all_size; 281 282 /* Bucket size. Must be a power of 2 to avoid multiplication with non-power-of-2 number. */ 283 size_t bucket_size; 284 285 /* log2(bucket_size). Purpose: avoid multiplication with non-power of 2 numbers. */ 286 size_t bucket_size_log2; 287 288 /* Timeout in CPU clock cycles. */ 289 uint64_t key_timeout; 290 291 /* Total memory size. */ 292 size_t total_size; 293 }; 294 295 struct table { 296 /* Table parameters. */ 297 struct table_params params; 298 299 /* Key mask. Array of *key_size* bytes. */ 300 uint8_t key_mask0[RTE_CACHE_LINE_SIZE]; 301 302 /* Table buckets. */ 303 uint8_t buckets[0]; 304 } __rte_cache_aligned; 305 306 static int 307 table_params_get(struct table_params *p, struct rte_swx_table_learner_params *params) 308 { 309 /* Check input parameters. */ 310 if (!params || 311 !params->key_size || 312 (params->key_size > 64) || 313 !params->n_keys_max || 314 (params->n_keys_max > 1U << 31) || 315 !params->key_timeout) 316 return -EINVAL; 317 318 /* Key. */ 319 p->key_size = params->key_size; 320 321 p->key_size_pow2 = rte_align64pow2(p->key_size); 322 if (p->key_size_pow2 < 8) 323 p->key_size_pow2 = 8; 324 325 p->key_size_log2 = __builtin_ctzll(p->key_size_pow2); 326 327 p->key_offset = params->key_offset; 328 329 /* Data. */ 330 p->action_data_size = params->action_data_size; 331 332 p->data_size_pow2 = rte_align64pow2(sizeof(uint64_t) + p->action_data_size); 333 334 p->data_size_log2 = __builtin_ctzll(p->data_size_pow2); 335 336 /* Buckets. */ 337 p->n_buckets = rte_align32pow2(params->n_keys_max); 338 339 p->bucket_mask = p->n_buckets - 1; 340 341 p->bucket_key_all_size = TABLE_KEYS_PER_BUCKET * p->key_size_pow2; 342 343 p->bucket_size = rte_align64pow2(sizeof(struct table_bucket) + 344 p->bucket_key_all_size + 345 TABLE_KEYS_PER_BUCKET * p->data_size_pow2); 346 347 p->bucket_size_log2 = __builtin_ctzll(p->bucket_size); 348 349 /* Timeout. */ 350 p->key_timeout = params->key_timeout * rte_get_tsc_hz(); 351 352 /* Total size. */ 353 p->total_size = sizeof(struct table) + p->n_buckets * p->bucket_size; 354 355 return 0; 356 } 357 358 static inline struct table_bucket * 359 table_bucket_get(struct table *t, size_t bucket_id) 360 { 361 return (struct table_bucket *)&t->buckets[bucket_id << t->params.bucket_size_log2]; 362 } 363 364 static inline uint8_t * 365 table_bucket_key_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos) 366 { 367 return &b->key[bucket_key_pos << t->params.key_size_log2]; 368 } 369 370 static inline uint64_t * 371 table_bucket_data_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos) 372 { 373 return (uint64_t *)&b->key[t->params.bucket_key_all_size + 374 (bucket_key_pos << t->params.data_size_log2)]; 375 } 376 377 uint64_t 378 rte_swx_table_learner_footprint_get(struct rte_swx_table_learner_params *params) 379 { 380 struct table_params p; 381 int status; 382 383 status = table_params_get(&p, params); 384 385 return status ? 0 : p.total_size; 386 } 387 388 void * 389 rte_swx_table_learner_create(struct rte_swx_table_learner_params *params, int numa_node) 390 { 391 struct table_params p; 392 struct table *t; 393 int status; 394 395 /* Check and process the input parameters. */ 396 status = table_params_get(&p, params); 397 if (status) 398 return NULL; 399 400 /* Memory allocation. */ 401 t = env_calloc(p.total_size, RTE_CACHE_LINE_SIZE, numa_node); 402 if (!t) 403 return NULL; 404 405 /* Memory initialization. */ 406 memcpy(&t->params, &p, sizeof(struct table_params)); 407 408 if (params->key_mask0) 409 memcpy(t->key_mask0, params->key_mask0, params->key_size); 410 else 411 memset(t->key_mask0, 0xFF, params->key_size); 412 413 return t; 414 } 415 416 void 417 rte_swx_table_learner_free(void *table) 418 { 419 struct table *t = table; 420 421 if (!t) 422 return; 423 424 env_free(t, t->params.total_size); 425 } 426 427 struct mailbox { 428 /* Writer: lookup state 0. Reader(s): lookup state 1, add(). */ 429 struct table_bucket *bucket; 430 431 /* Writer: lookup state 0. Reader(s): lookup state 1, add(). */ 432 uint32_t input_sig; 433 434 /* Writer: lookup state 1. Reader(s): add(). */ 435 uint8_t *input_key; 436 437 /* Writer: lookup state 1. Reader(s): add(). Values: 0 = miss; 1 = hit. */ 438 uint32_t hit; 439 440 /* Writer: lookup state 1. Reader(s): add(). Valid only when hit is non-zero. */ 441 size_t bucket_key_pos; 442 443 /* State. */ 444 int state; 445 }; 446 447 uint64_t 448 rte_swx_table_learner_mailbox_size_get(void) 449 { 450 return sizeof(struct mailbox); 451 } 452 453 int 454 rte_swx_table_learner_lookup(void *table, 455 void *mailbox, 456 uint64_t input_time, 457 uint8_t **key, 458 uint64_t *action_id, 459 uint8_t **action_data, 460 int *hit) 461 { 462 struct table *t = table; 463 struct mailbox *m = mailbox; 464 465 switch (m->state) { 466 case 0: { 467 uint8_t *input_key; 468 struct table_bucket *b; 469 size_t bucket_id; 470 uint32_t input_sig; 471 472 input_key = &(*key)[t->params.key_offset]; 473 input_sig = hash(input_key, t->key_mask0, t->params.key_size_pow2, 0); 474 bucket_id = input_sig & t->params.bucket_mask; 475 b = table_bucket_get(t, bucket_id); 476 477 rte_prefetch0(b); 478 rte_prefetch0(&b->key[0]); 479 rte_prefetch0(&b->key[RTE_CACHE_LINE_SIZE]); 480 481 m->bucket = b; 482 m->input_key = input_key; 483 m->input_sig = input_sig | 1; 484 m->state = 1; 485 return 0; 486 } 487 488 case 1: { 489 struct table_bucket *b = m->bucket; 490 uint32_t i; 491 492 /* Search the input key through the bucket keys. */ 493 for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) { 494 uint64_t time = b->time[i]; 495 uint32_t sig = b->sig[i]; 496 uint8_t *key = table_bucket_key_get(t, b, i); 497 uint32_t key_size_pow2 = t->params.key_size_pow2; 498 499 time <<= 32; 500 501 if ((time > input_time) && 502 (sig == m->input_sig) && 503 table_keycmp(key, m->input_key, t->key_mask0, key_size_pow2)) { 504 uint64_t *data = table_bucket_data_get(t, b, i); 505 506 /* Hit. */ 507 rte_prefetch0(data); 508 509 b->time[i] = (input_time + t->params.key_timeout) >> 32; 510 511 m->hit = 1; 512 m->bucket_key_pos = i; 513 m->state = 0; 514 515 *action_id = data[0]; 516 *action_data = (uint8_t *)&data[1]; 517 *hit = 1; 518 return 1; 519 } 520 } 521 522 /* Miss. */ 523 m->hit = 0; 524 m->state = 0; 525 526 *hit = 0; 527 return 1; 528 } 529 530 default: 531 /* This state should never be reached. Miss. */ 532 m->hit = 0; 533 m->state = 0; 534 535 *hit = 0; 536 return 1; 537 } 538 } 539 540 uint32_t 541 rte_swx_table_learner_add(void *table, 542 void *mailbox, 543 uint64_t input_time, 544 uint64_t action_id, 545 uint8_t *action_data) 546 { 547 struct table *t = table; 548 struct mailbox *m = mailbox; 549 struct table_bucket *b = m->bucket; 550 uint32_t i; 551 552 /* Lookup hit: The key, key signature and key time are already properly configured (the key 553 * time was bumped by lookup), only the key data need to be updated. 554 */ 555 if (m->hit) { 556 uint64_t *data = table_bucket_data_get(t, b, m->bucket_key_pos); 557 558 /* Install the key data. */ 559 data[0] = action_id; 560 if (t->params.action_data_size && action_data) 561 memcpy(&data[1], action_data, t->params.action_data_size); 562 563 return 0; 564 } 565 566 /* Lookup miss: Search for a free position in the current bucket and install the key. */ 567 for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) { 568 uint64_t time = b->time[i]; 569 570 time <<= 32; 571 572 /* Free position: Either there was never a key installed here, so the key time is 573 * set to zero (the init value), which is always less than the current time, or this 574 * position was used before, but the key expired (the key time is in the past). 575 */ 576 if (time < input_time) { 577 uint8_t *key = table_bucket_key_get(t, b, i); 578 uint64_t *data = table_bucket_data_get(t, b, i); 579 580 /* Install the key. */ 581 b->time[i] = (input_time + t->params.key_timeout) >> 32; 582 b->sig[i] = m->input_sig; 583 memcpy(key, m->input_key, t->params.key_size); 584 585 /* Install the key data. */ 586 data[0] = action_id; 587 if (t->params.action_data_size && action_data) 588 memcpy(&data[1], action_data, t->params.action_data_size); 589 590 /* Mailbox. */ 591 m->hit = 1; 592 m->bucket_key_pos = i; 593 594 return 0; 595 } 596 } 597 598 /* Bucket full. */ 599 return 1; 600 } 601 602 void 603 rte_swx_table_learner_delete(void *table __rte_unused, 604 void *mailbox) 605 { 606 struct mailbox *m = mailbox; 607 608 if (m->hit) { 609 struct table_bucket *b = m->bucket; 610 611 /* Expire the key. */ 612 b->time[m->bucket_key_pos] = 0; 613 614 /* Mailbox. */ 615 m->hit = 0; 616 } 617 } 618