1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2020 Intel Corporation 3 */ 4 #include <string.h> 5 #include <stdio.h> 6 #include <errno.h> 7 8 #include <rte_common.h> 9 #include <rte_cycles.h> 10 #include <rte_prefetch.h> 11 12 #include "rte_swx_table_learner.h" 13 14 #ifndef RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES 15 #define RTE_SWX_TABLE_LEARNER_USE_HUGE_PAGES 1 16 #endif 17 18 #ifndef RTE_SWX_TABLE_SELECTOR_HUGE_PAGES_DISABLE 19 20 #include <rte_malloc.h> 21 22 static void * 23 env_calloc(size_t size, size_t alignment, int numa_node) 24 { 25 return rte_zmalloc_socket(NULL, size, alignment, numa_node); 26 } 27 28 static void 29 env_free(void *start, size_t size __rte_unused) 30 { 31 rte_free(start); 32 } 33 34 #else 35 36 #include <numa.h> 37 38 static void * 39 env_calloc(size_t size, size_t alignment __rte_unused, int numa_node) 40 { 41 void *start; 42 43 if (numa_available() == -1) 44 return NULL; 45 46 start = numa_alloc_onnode(size, numa_node); 47 if (!start) 48 return NULL; 49 50 memset(start, 0, size); 51 return start; 52 } 53 54 static void 55 env_free(void *start, size_t size) 56 { 57 if ((numa_available() == -1) || !start) 58 return; 59 60 numa_free(start, size); 61 } 62 63 #endif 64 65 #if defined(RTE_ARCH_X86_64) 66 67 #include <x86intrin.h> 68 69 #define crc32_u64(crc, v) _mm_crc32_u64(crc, v) 70 71 #else 72 73 static inline uint64_t 74 crc32_u64_generic(uint64_t crc, uint64_t value) 75 { 76 int i; 77 78 crc = (crc & 0xFFFFFFFFLLU) ^ value; 79 for (i = 63; i >= 0; i--) { 80 uint64_t mask; 81 82 mask = -(crc & 1LLU); 83 crc = (crc >> 1LLU) ^ (0x82F63B78LLU & mask); 84 } 85 86 return crc; 87 } 88 89 #define crc32_u64(crc, v) crc32_u64_generic(crc, v) 90 91 #endif 92 93 /* Key size needs to be one of: 8, 16, 32 or 64. */ 94 static inline uint32_t 95 hash(void *key, void *key_mask, uint32_t key_size, uint32_t seed) 96 { 97 uint64_t *k = key; 98 uint64_t *m = key_mask; 99 uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5; 100 101 switch (key_size) { 102 case 8: 103 crc0 = crc32_u64(seed, k[0] & m[0]); 104 return crc0; 105 106 case 16: 107 k0 = k[0] & m[0]; 108 109 crc0 = crc32_u64(k0, seed); 110 crc1 = crc32_u64(k0 >> 32, k[1] & m[1]); 111 112 crc0 ^= crc1; 113 114 return crc0; 115 116 case 32: 117 k0 = k[0] & m[0]; 118 k2 = k[2] & m[2]; 119 120 crc0 = crc32_u64(k0, seed); 121 crc1 = crc32_u64(k0 >> 32, k[1] & m[1]); 122 123 crc2 = crc32_u64(k2, k[3] & m[3]); 124 crc3 = k2 >> 32; 125 126 crc0 = crc32_u64(crc0, crc1); 127 crc1 = crc32_u64(crc2, crc3); 128 129 crc0 ^= crc1; 130 131 return crc0; 132 133 case 64: 134 k0 = k[0] & m[0]; 135 k2 = k[2] & m[2]; 136 k5 = k[5] & m[5]; 137 138 crc0 = crc32_u64(k0, seed); 139 crc1 = crc32_u64(k0 >> 32, k[1] & m[1]); 140 141 crc2 = crc32_u64(k2, k[3] & m[3]); 142 crc3 = crc32_u64(k2 >> 32, k[4] & m[4]); 143 144 crc4 = crc32_u64(k5, k[6] & m[6]); 145 crc5 = crc32_u64(k5 >> 32, k[7] & m[7]); 146 147 crc0 = crc32_u64(crc0, (crc1 << 32) ^ crc2); 148 crc1 = crc32_u64(crc3, (crc4 << 32) ^ crc5); 149 150 crc0 ^= crc1; 151 152 return crc0; 153 154 default: 155 crc0 = 0; 156 return crc0; 157 } 158 } 159 160 /* 161 * Return: 0 = Keys are NOT equal; 1 = Keys are equal. 162 */ 163 static inline uint32_t 164 table_keycmp(void *a, void *b, void *b_mask, uint32_t n_bytes) 165 { 166 uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask; 167 168 switch (n_bytes) { 169 case 8: { 170 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]); 171 uint32_t result = 1; 172 173 if (xor0) 174 result = 0; 175 return result; 176 } 177 178 case 16: { 179 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]); 180 uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]); 181 uint64_t or = xor0 | xor1; 182 uint32_t result = 1; 183 184 if (or) 185 result = 0; 186 return result; 187 } 188 189 case 32: { 190 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]); 191 uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]); 192 uint64_t xor2 = a64[2] ^ (b64[2] & b_mask64[2]); 193 uint64_t xor3 = a64[3] ^ (b64[3] & b_mask64[3]); 194 uint64_t or = (xor0 | xor1) | (xor2 | xor3); 195 uint32_t result = 1; 196 197 if (or) 198 result = 0; 199 return result; 200 } 201 202 case 64: { 203 uint64_t xor0 = a64[0] ^ (b64[0] & b_mask64[0]); 204 uint64_t xor1 = a64[1] ^ (b64[1] & b_mask64[1]); 205 uint64_t xor2 = a64[2] ^ (b64[2] & b_mask64[2]); 206 uint64_t xor3 = a64[3] ^ (b64[3] & b_mask64[3]); 207 uint64_t xor4 = a64[4] ^ (b64[4] & b_mask64[4]); 208 uint64_t xor5 = a64[5] ^ (b64[5] & b_mask64[5]); 209 uint64_t xor6 = a64[6] ^ (b64[6] & b_mask64[6]); 210 uint64_t xor7 = a64[7] ^ (b64[7] & b_mask64[7]); 211 uint64_t or = ((xor0 | xor1) | (xor2 | xor3)) | 212 ((xor4 | xor5) | (xor6 | xor7)); 213 uint32_t result = 1; 214 215 if (or) 216 result = 0; 217 return result; 218 } 219 220 default: { 221 uint32_t i; 222 223 for (i = 0; i < n_bytes / sizeof(uint64_t); i++) 224 if (a64[i] != (b64[i] & b_mask64[i])) 225 return 0; 226 return 1; 227 } 228 } 229 } 230 231 #define TABLE_KEYS_PER_BUCKET 4 232 233 #define TABLE_BUCKET_PAD_SIZE \ 234 (RTE_CACHE_LINE_SIZE - TABLE_KEYS_PER_BUCKET * (sizeof(uint32_t) + sizeof(uint32_t))) 235 236 struct table_bucket { 237 uint32_t time[TABLE_KEYS_PER_BUCKET]; 238 uint32_t sig[TABLE_KEYS_PER_BUCKET]; 239 uint8_t pad[TABLE_BUCKET_PAD_SIZE]; 240 uint8_t key[0]; 241 }; 242 243 struct table_params { 244 /* The real key size. Must be non-zero. */ 245 size_t key_size; 246 247 /* They key size upgrated to the next power of 2. This used for hash generation (in 248 * increments of 8 bytes, from 8 to 64 bytes) and for run-time key comparison. This is why 249 * key sizes bigger than 64 bytes are not allowed. 250 */ 251 size_t key_size_pow2; 252 253 /* log2(key_size_pow2). Purpose: avoid multiplication with non-power-of-2 numbers. */ 254 size_t key_size_log2; 255 256 /* The key offset within the key buffer. */ 257 size_t key_offset; 258 259 /* The real action data size. */ 260 size_t action_data_size; 261 262 /* The data size, i.e. the 8-byte action_id field plus the action data size, upgraded to the 263 * next power of 2. 264 */ 265 size_t data_size_pow2; 266 267 /* log2(data_size_pow2). Purpose: avoid multiplication with non-power of 2 numbers. */ 268 size_t data_size_log2; 269 270 /* Number of buckets. Must be a power of 2 to avoid modulo with non-power-of-2 numbers. */ 271 size_t n_buckets; 272 273 /* Bucket mask. Purpose: replace modulo with bitmask and operation. */ 274 size_t bucket_mask; 275 276 /* Total number of key bytes in the bucket, including the key padding bytes. There are 277 * (key_size_pow2 - key_size) padding bytes for each key in the bucket. 278 */ 279 size_t bucket_key_all_size; 280 281 /* Bucket size. Must be a power of 2 to avoid multiplication with non-power-of-2 number. */ 282 size_t bucket_size; 283 284 /* log2(bucket_size). Purpose: avoid multiplication with non-power of 2 numbers. */ 285 size_t bucket_size_log2; 286 287 /* Timeout in CPU clock cycles. */ 288 uint64_t key_timeout; 289 290 /* Total memory size. */ 291 size_t total_size; 292 }; 293 294 struct table { 295 /* Table parameters. */ 296 struct table_params params; 297 298 /* Key mask. Array of *key_size* bytes. */ 299 uint8_t key_mask0[RTE_CACHE_LINE_SIZE]; 300 301 /* Table buckets. */ 302 uint8_t buckets[0]; 303 } __rte_cache_aligned; 304 305 static int 306 table_params_get(struct table_params *p, struct rte_swx_table_learner_params *params) 307 { 308 /* Check input parameters. */ 309 if (!params || 310 !params->key_size || 311 (params->key_size > 64) || 312 !params->n_keys_max || 313 (params->n_keys_max > 1U << 31) || 314 !params->key_timeout) 315 return -EINVAL; 316 317 /* Key. */ 318 p->key_size = params->key_size; 319 320 p->key_size_pow2 = rte_align64pow2(p->key_size); 321 if (p->key_size_pow2 < 8) 322 p->key_size_pow2 = 8; 323 324 p->key_size_log2 = __builtin_ctzll(p->key_size_pow2); 325 326 p->key_offset = params->key_offset; 327 328 /* Data. */ 329 p->action_data_size = params->action_data_size; 330 331 p->data_size_pow2 = rte_align64pow2(sizeof(uint64_t) + p->action_data_size); 332 333 p->data_size_log2 = __builtin_ctzll(p->data_size_pow2); 334 335 /* Buckets. */ 336 p->n_buckets = rte_align32pow2(params->n_keys_max); 337 338 p->bucket_mask = p->n_buckets - 1; 339 340 p->bucket_key_all_size = TABLE_KEYS_PER_BUCKET * p->key_size_pow2; 341 342 p->bucket_size = rte_align64pow2(sizeof(struct table_bucket) + 343 p->bucket_key_all_size + 344 TABLE_KEYS_PER_BUCKET * p->data_size_pow2); 345 346 p->bucket_size_log2 = __builtin_ctzll(p->bucket_size); 347 348 /* Timeout. */ 349 p->key_timeout = params->key_timeout * rte_get_tsc_hz(); 350 351 /* Total size. */ 352 p->total_size = sizeof(struct table) + p->n_buckets * p->bucket_size; 353 354 return 0; 355 } 356 357 static inline struct table_bucket * 358 table_bucket_get(struct table *t, size_t bucket_id) 359 { 360 return (struct table_bucket *)&t->buckets[bucket_id << t->params.bucket_size_log2]; 361 } 362 363 static inline uint8_t * 364 table_bucket_key_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos) 365 { 366 return &b->key[bucket_key_pos << t->params.key_size_log2]; 367 } 368 369 static inline uint64_t * 370 table_bucket_data_get(struct table *t, struct table_bucket *b, size_t bucket_key_pos) 371 { 372 return (uint64_t *)&b->key[t->params.bucket_key_all_size + 373 (bucket_key_pos << t->params.data_size_log2)]; 374 } 375 376 uint64_t 377 rte_swx_table_learner_footprint_get(struct rte_swx_table_learner_params *params) 378 { 379 struct table_params p; 380 int status; 381 382 status = table_params_get(&p, params); 383 384 return status ? 0 : p.total_size; 385 } 386 387 void * 388 rte_swx_table_learner_create(struct rte_swx_table_learner_params *params, int numa_node) 389 { 390 struct table_params p; 391 struct table *t; 392 int status; 393 394 /* Check and process the input parameters. */ 395 status = table_params_get(&p, params); 396 if (status) 397 return NULL; 398 399 /* Memory allocation. */ 400 t = env_calloc(p.total_size, RTE_CACHE_LINE_SIZE, numa_node); 401 if (!t) 402 return NULL; 403 404 /* Memory initialization. */ 405 memcpy(&t->params, &p, sizeof(struct table_params)); 406 407 if (params->key_mask0) 408 memcpy(t->key_mask0, params->key_mask0, params->key_size); 409 else 410 memset(t->key_mask0, 0xFF, params->key_size); 411 412 return t; 413 } 414 415 void 416 rte_swx_table_learner_free(void *table) 417 { 418 struct table *t = table; 419 420 if (!t) 421 return; 422 423 env_free(t, t->params.total_size); 424 } 425 426 struct mailbox { 427 /* Writer: lookup state 0. Reader(s): lookup state 1, add(). */ 428 struct table_bucket *bucket; 429 430 /* Writer: lookup state 0. Reader(s): lookup state 1, add(). */ 431 uint32_t input_sig; 432 433 /* Writer: lookup state 1. Reader(s): add(). */ 434 uint8_t *input_key; 435 436 /* Writer: lookup state 1. Reader(s): add(). Values: 0 = miss; 1 = hit. */ 437 uint32_t hit; 438 439 /* Writer: lookup state 1. Reader(s): add(). Valid only when hit is non-zero. */ 440 size_t bucket_key_pos; 441 442 /* State. */ 443 int state; 444 }; 445 446 uint64_t 447 rte_swx_table_learner_mailbox_size_get(void) 448 { 449 return sizeof(struct mailbox); 450 } 451 452 int 453 rte_swx_table_learner_lookup(void *table, 454 void *mailbox, 455 uint64_t input_time, 456 uint8_t **key, 457 uint64_t *action_id, 458 uint8_t **action_data, 459 int *hit) 460 { 461 struct table *t = table; 462 struct mailbox *m = mailbox; 463 464 switch (m->state) { 465 case 0: { 466 uint8_t *input_key; 467 struct table_bucket *b; 468 size_t bucket_id; 469 uint32_t input_sig; 470 471 input_key = &(*key)[t->params.key_offset]; 472 input_sig = hash(input_key, t->key_mask0, t->params.key_size_pow2, 0); 473 bucket_id = input_sig & t->params.bucket_mask; 474 b = table_bucket_get(t, bucket_id); 475 476 rte_prefetch0(b); 477 rte_prefetch0(&b->key[0]); 478 rte_prefetch0(&b->key[RTE_CACHE_LINE_SIZE]); 479 480 m->bucket = b; 481 m->input_key = input_key; 482 m->input_sig = input_sig | 1; 483 m->state = 1; 484 return 0; 485 } 486 487 case 1: { 488 struct table_bucket *b = m->bucket; 489 uint32_t i; 490 491 /* Search the input key through the bucket keys. */ 492 for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) { 493 uint64_t time = b->time[i]; 494 uint32_t sig = b->sig[i]; 495 uint8_t *key = table_bucket_key_get(t, b, i); 496 uint32_t key_size_pow2 = t->params.key_size_pow2; 497 498 time <<= 32; 499 500 if ((time > input_time) && 501 (sig == m->input_sig) && 502 table_keycmp(key, m->input_key, t->key_mask0, key_size_pow2)) { 503 uint64_t *data = table_bucket_data_get(t, b, i); 504 505 /* Hit. */ 506 rte_prefetch0(data); 507 508 b->time[i] = (input_time + t->params.key_timeout) >> 32; 509 510 m->hit = 1; 511 m->bucket_key_pos = i; 512 m->state = 0; 513 514 *action_id = data[0]; 515 *action_data = (uint8_t *)&data[1]; 516 *hit = 1; 517 return 1; 518 } 519 } 520 521 /* Miss. */ 522 m->hit = 0; 523 m->state = 0; 524 525 *hit = 0; 526 return 1; 527 } 528 529 default: 530 /* This state should never be reached. Miss. */ 531 m->hit = 0; 532 m->state = 0; 533 534 *hit = 0; 535 return 1; 536 } 537 } 538 539 uint32_t 540 rte_swx_table_learner_add(void *table, 541 void *mailbox, 542 uint64_t input_time, 543 uint64_t action_id, 544 uint8_t *action_data) 545 { 546 struct table *t = table; 547 struct mailbox *m = mailbox; 548 struct table_bucket *b = m->bucket; 549 uint32_t i; 550 551 /* Lookup hit: The key, key signature and key time are already properly configured (the key 552 * time was bumped by lookup), only the key data need to be updated. 553 */ 554 if (m->hit) { 555 uint64_t *data = table_bucket_data_get(t, b, m->bucket_key_pos); 556 557 /* Install the key data. */ 558 data[0] = action_id; 559 if (t->params.action_data_size && action_data) 560 memcpy(&data[1], action_data, t->params.action_data_size); 561 562 return 0; 563 } 564 565 /* Lookup miss: Search for a free position in the current bucket and install the key. */ 566 for (i = 0; i < TABLE_KEYS_PER_BUCKET; i++) { 567 uint64_t time = b->time[i]; 568 569 time <<= 32; 570 571 /* Free position: Either there was never a key installed here, so the key time is 572 * set to zero (the init value), which is always less than the current time, or this 573 * position was used before, but the key expired (the key time is in the past). 574 */ 575 if (time < input_time) { 576 uint8_t *key = table_bucket_key_get(t, b, i); 577 uint64_t *data = table_bucket_data_get(t, b, i); 578 579 /* Install the key. */ 580 b->time[i] = (input_time + t->params.key_timeout) >> 32; 581 b->sig[i] = m->input_sig; 582 memcpy(key, m->input_key, t->params.key_size); 583 584 /* Install the key data. */ 585 data[0] = action_id; 586 if (t->params.action_data_size && action_data) 587 memcpy(&data[1], action_data, t->params.action_data_size); 588 589 /* Mailbox. */ 590 m->hit = 1; 591 m->bucket_key_pos = i; 592 593 return 0; 594 } 595 } 596 597 /* Bucket full. */ 598 return 1; 599 } 600 601 void 602 rte_swx_table_learner_delete(void *table __rte_unused, 603 void *mailbox) 604 { 605 struct mailbox *m = mailbox; 606 607 if (m->hit) { 608 struct table_bucket *b = m->bucket; 609 610 /* Expire the key. */ 611 b->time[m->bucket_key_pos] = 0; 612 613 /* Mailbox. */ 614 m->hit = 0; 615 } 616 } 617