1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
3 * Copyright(c) 2020 Arm Limited
4 */
5
6 #include <string.h>
7 #include <stdint.h>
8 #include <errno.h>
9 #include <stdio.h>
10 #include <sys/queue.h>
11
12 #include <rte_log.h>
13 #include <rte_common.h>
14 #include <rte_malloc.h>
15 #include <rte_eal_memconfig.h>
16 #include <rte_string_fns.h>
17 #include <rte_errno.h>
18 #include <rte_tailq.h>
19
20 #include "rte_lpm.h"
21 #include "lpm_log.h"
22
23 RTE_LOG_REGISTER_DEFAULT(lpm_logtype, INFO);
24
25 TAILQ_HEAD(rte_lpm_list, rte_tailq_entry);
26
27 static struct rte_tailq_elem rte_lpm_tailq = {
28 .name = "RTE_LPM",
29 };
30 EAL_REGISTER_TAILQ(rte_lpm_tailq)
31
32 #define MAX_DEPTH_TBL24 24
33
34 enum valid_flag {
35 INVALID = 0,
36 VALID
37 };
38
39 /** @internal Rule structure. */
40 struct rte_lpm_rule {
41 uint32_t ip; /**< Rule IP address. */
42 uint32_t next_hop; /**< Rule next hop. */
43 };
44
45 /** @internal Contains metadata about the rules table. */
46 struct rte_lpm_rule_info {
47 uint32_t used_rules; /**< Used rules so far. */
48 uint32_t first_rule; /**< Indexes the first rule of a given depth. */
49 };
50
51 /** @internal LPM structure. */
52 struct __rte_lpm {
53 /* Exposed LPM data. */
54 struct rte_lpm lpm;
55
56 /* LPM metadata. */
57 char name[RTE_LPM_NAMESIZE]; /**< Name of the lpm. */
58 uint32_t max_rules; /**< Max. balanced rules per lpm. */
59 uint32_t number_tbl8s; /**< Number of tbl8s. */
60 /**< Rule info table. */
61 struct rte_lpm_rule_info rule_info[RTE_LPM_MAX_DEPTH];
62 struct rte_lpm_rule *rules_tbl; /**< LPM rules. */
63
64 /* RCU config. */
65 struct rte_rcu_qsbr *v; /* RCU QSBR variable. */
66 enum rte_lpm_qsbr_mode rcu_mode;/* Blocking, defer queue. */
67 struct rte_rcu_qsbr_dq *dq; /* RCU QSBR defer queue. */
68 };
69
70 /* Macro to enable/disable run-time checks. */
71 #if defined(RTE_LIBRTE_LPM_DEBUG)
72 #include <rte_debug.h>
73 #define VERIFY_DEPTH(depth) do { \
74 if ((depth == 0) || (depth > RTE_LPM_MAX_DEPTH)) \
75 rte_panic("LPM: Invalid depth (%u) at line %d", \
76 (unsigned)(depth), __LINE__); \
77 } while (0)
78 #else
79 #define VERIFY_DEPTH(depth)
80 #endif
81
82 /*
83 * Converts a given depth value to its corresponding mask value.
84 *
85 * depth (IN) : range = 1 - 32
86 * mask (OUT) : 32bit mask
87 */
88 static uint32_t __rte_pure
depth_to_mask(uint8_t depth)89 depth_to_mask(uint8_t depth)
90 {
91 VERIFY_DEPTH(depth);
92
93 /* To calculate a mask start with a 1 on the left hand side and right
94 * shift while populating the left hand side with 1's
95 */
96 return (int)0x80000000 >> (depth - 1);
97 }
98
99 /*
100 * Converts given depth value to its corresponding range value.
101 */
102 static uint32_t __rte_pure
depth_to_range(uint8_t depth)103 depth_to_range(uint8_t depth)
104 {
105 VERIFY_DEPTH(depth);
106
107 /*
108 * Calculate tbl24 range. (Note: 2^depth = 1 << depth)
109 */
110 if (depth <= MAX_DEPTH_TBL24)
111 return 1 << (MAX_DEPTH_TBL24 - depth);
112
113 /* Else if depth is greater than 24 */
114 return 1 << (RTE_LPM_MAX_DEPTH - depth);
115 }
116
117 /*
118 * Find an existing lpm table and return a pointer to it.
119 */
120 struct rte_lpm *
rte_lpm_find_existing(const char * name)121 rte_lpm_find_existing(const char *name)
122 {
123 struct __rte_lpm *i_lpm = NULL;
124 struct rte_tailq_entry *te;
125 struct rte_lpm_list *lpm_list;
126
127 lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list);
128
129 rte_mcfg_tailq_read_lock();
130 TAILQ_FOREACH(te, lpm_list, next) {
131 i_lpm = te->data;
132 if (strncmp(name, i_lpm->name, RTE_LPM_NAMESIZE) == 0)
133 break;
134 }
135 rte_mcfg_tailq_read_unlock();
136
137 if (te == NULL) {
138 rte_errno = ENOENT;
139 return NULL;
140 }
141
142 return &i_lpm->lpm;
143 }
144
145 /*
146 * Allocates memory for LPM object
147 */
148 struct rte_lpm *
rte_lpm_create(const char * name,int socket_id,const struct rte_lpm_config * config)149 rte_lpm_create(const char *name, int socket_id,
150 const struct rte_lpm_config *config)
151 {
152 char mem_name[RTE_LPM_NAMESIZE];
153 struct __rte_lpm *i_lpm;
154 struct rte_lpm *lpm = NULL;
155 struct rte_tailq_entry *te;
156 uint32_t mem_size, rules_size, tbl8s_size;
157 struct rte_lpm_list *lpm_list;
158
159 lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list);
160
161 RTE_BUILD_BUG_ON(sizeof(struct rte_lpm_tbl_entry) != 4);
162
163 /* Check user arguments. */
164 if ((name == NULL) || (socket_id < -1) || (config->max_rules == 0)
165 || config->number_tbl8s > RTE_LPM_MAX_TBL8_NUM_GROUPS) {
166 rte_errno = EINVAL;
167 return NULL;
168 }
169
170 snprintf(mem_name, sizeof(mem_name), "LPM_%s", name);
171
172 rte_mcfg_tailq_write_lock();
173
174 /* guarantee there's no existing */
175 TAILQ_FOREACH(te, lpm_list, next) {
176 i_lpm = te->data;
177 if (strncmp(name, i_lpm->name, RTE_LPM_NAMESIZE) == 0)
178 break;
179 }
180
181 if (te != NULL) {
182 rte_errno = EEXIST;
183 goto exit;
184 }
185
186 /* Determine the amount of memory to allocate. */
187 mem_size = sizeof(*i_lpm);
188 rules_size = sizeof(struct rte_lpm_rule) * config->max_rules;
189 tbl8s_size = sizeof(struct rte_lpm_tbl_entry) *
190 RTE_LPM_TBL8_GROUP_NUM_ENTRIES * config->number_tbl8s;
191
192 /* allocate tailq entry */
193 te = rte_zmalloc("LPM_TAILQ_ENTRY", sizeof(*te), 0);
194 if (te == NULL) {
195 LPM_LOG(ERR, "Failed to allocate tailq entry");
196 rte_errno = ENOMEM;
197 goto exit;
198 }
199
200 /* Allocate memory to store the LPM data structures. */
201 i_lpm = rte_zmalloc_socket(mem_name, mem_size,
202 RTE_CACHE_LINE_SIZE, socket_id);
203 if (i_lpm == NULL) {
204 LPM_LOG(ERR, "LPM memory allocation failed");
205 rte_free(te);
206 rte_errno = ENOMEM;
207 goto exit;
208 }
209
210 i_lpm->rules_tbl = rte_zmalloc_socket(NULL,
211 (size_t)rules_size, RTE_CACHE_LINE_SIZE, socket_id);
212
213 if (i_lpm->rules_tbl == NULL) {
214 LPM_LOG(ERR, "LPM rules_tbl memory allocation failed");
215 rte_free(i_lpm);
216 i_lpm = NULL;
217 rte_free(te);
218 rte_errno = ENOMEM;
219 goto exit;
220 }
221
222 i_lpm->lpm.tbl8 = rte_zmalloc_socket(NULL,
223 (size_t)tbl8s_size, RTE_CACHE_LINE_SIZE, socket_id);
224
225 if (i_lpm->lpm.tbl8 == NULL) {
226 LPM_LOG(ERR, "LPM tbl8 memory allocation failed");
227 rte_free(i_lpm->rules_tbl);
228 rte_free(i_lpm);
229 i_lpm = NULL;
230 rte_free(te);
231 rte_errno = ENOMEM;
232 goto exit;
233 }
234
235 /* Save user arguments. */
236 i_lpm->max_rules = config->max_rules;
237 i_lpm->number_tbl8s = config->number_tbl8s;
238 strlcpy(i_lpm->name, name, sizeof(i_lpm->name));
239
240 te->data = i_lpm;
241 lpm = &i_lpm->lpm;
242
243 TAILQ_INSERT_TAIL(lpm_list, te, next);
244
245 exit:
246 rte_mcfg_tailq_write_unlock();
247
248 return lpm;
249 }
250
251 /*
252 * Deallocates memory for given LPM table.
253 */
254 void
rte_lpm_free(struct rte_lpm * lpm)255 rte_lpm_free(struct rte_lpm *lpm)
256 {
257 struct rte_lpm_list *lpm_list;
258 struct rte_tailq_entry *te;
259 struct __rte_lpm *i_lpm;
260
261 /* Check user arguments. */
262 if (lpm == NULL)
263 return;
264 i_lpm = container_of(lpm, struct __rte_lpm, lpm);
265
266 lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list);
267
268 rte_mcfg_tailq_write_lock();
269
270 /* find our tailq entry */
271 TAILQ_FOREACH(te, lpm_list, next) {
272 if (te->data == (void *)i_lpm)
273 break;
274 }
275 if (te != NULL)
276 TAILQ_REMOVE(lpm_list, te, next);
277
278 rte_mcfg_tailq_write_unlock();
279
280 if (i_lpm->dq != NULL)
281 rte_rcu_qsbr_dq_delete(i_lpm->dq);
282 rte_free(i_lpm->lpm.tbl8);
283 rte_free(i_lpm->rules_tbl);
284 rte_free(i_lpm);
285 rte_free(te);
286 }
287
288 static void
__lpm_rcu_qsbr_free_resource(void * p,void * data,unsigned int n)289 __lpm_rcu_qsbr_free_resource(void *p, void *data, unsigned int n)
290 {
291 struct rte_lpm_tbl_entry *tbl8 = ((struct __rte_lpm *)p)->lpm.tbl8;
292 struct rte_lpm_tbl_entry zero_tbl8_entry = {0};
293 uint32_t tbl8_group_index = *(uint32_t *)data;
294
295 RTE_SET_USED(n);
296 /* Set tbl8 group invalid */
297 __atomic_store(&tbl8[tbl8_group_index], &zero_tbl8_entry,
298 __ATOMIC_RELAXED);
299 }
300
301 /* Associate QSBR variable with an LPM object.
302 */
303 int
rte_lpm_rcu_qsbr_add(struct rte_lpm * lpm,struct rte_lpm_rcu_config * cfg)304 rte_lpm_rcu_qsbr_add(struct rte_lpm *lpm, struct rte_lpm_rcu_config *cfg)
305 {
306 struct rte_rcu_qsbr_dq_parameters params = {0};
307 char rcu_dq_name[RTE_RCU_QSBR_DQ_NAMESIZE];
308 struct __rte_lpm *i_lpm;
309
310 if (lpm == NULL || cfg == NULL) {
311 rte_errno = EINVAL;
312 return 1;
313 }
314
315 i_lpm = container_of(lpm, struct __rte_lpm, lpm);
316 if (i_lpm->v != NULL) {
317 rte_errno = EEXIST;
318 return 1;
319 }
320
321 if (cfg->mode == RTE_LPM_QSBR_MODE_SYNC) {
322 /* No other things to do. */
323 } else if (cfg->mode == RTE_LPM_QSBR_MODE_DQ) {
324 /* Init QSBR defer queue. */
325 snprintf(rcu_dq_name, sizeof(rcu_dq_name),
326 "LPM_RCU_%s", i_lpm->name);
327 params.name = rcu_dq_name;
328 params.size = cfg->dq_size;
329 if (params.size == 0)
330 params.size = i_lpm->number_tbl8s;
331 params.trigger_reclaim_limit = cfg->reclaim_thd;
332 params.max_reclaim_size = cfg->reclaim_max;
333 if (params.max_reclaim_size == 0)
334 params.max_reclaim_size = RTE_LPM_RCU_DQ_RECLAIM_MAX;
335 params.esize = sizeof(uint32_t); /* tbl8 group index */
336 params.free_fn = __lpm_rcu_qsbr_free_resource;
337 params.p = i_lpm;
338 params.v = cfg->v;
339 i_lpm->dq = rte_rcu_qsbr_dq_create(¶ms);
340 if (i_lpm->dq == NULL) {
341 LPM_LOG(ERR, "LPM defer queue creation failed");
342 return 1;
343 }
344 } else {
345 rte_errno = EINVAL;
346 return 1;
347 }
348 i_lpm->rcu_mode = cfg->mode;
349 i_lpm->v = cfg->v;
350
351 return 0;
352 }
353
354 /*
355 * Adds a rule to the rule table.
356 *
357 * NOTE: The rule table is split into 32 groups. Each group contains rules that
358 * apply to a specific prefix depth (i.e. group 1 contains rules that apply to
359 * prefixes with a depth of 1 etc.). In the following code (depth - 1) is used
360 * to refer to depth 1 because even though the depth range is 1 - 32, depths
361 * are stored in the rule table from 0 - 31.
362 * NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
363 */
364 static int32_t
rule_add(struct __rte_lpm * i_lpm,uint32_t ip_masked,uint8_t depth,uint32_t next_hop)365 rule_add(struct __rte_lpm *i_lpm, uint32_t ip_masked, uint8_t depth,
366 uint32_t next_hop)
367 {
368 uint32_t rule_gindex, rule_index, last_rule;
369 int i;
370
371 VERIFY_DEPTH(depth);
372
373 /* Scan through rule group to see if rule already exists. */
374 if (i_lpm->rule_info[depth - 1].used_rules > 0) {
375
376 /* rule_gindex stands for rule group index. */
377 rule_gindex = i_lpm->rule_info[depth - 1].first_rule;
378 /* Initialise rule_index to point to start of rule group. */
379 rule_index = rule_gindex;
380 /* Last rule = Last used rule in this rule group. */
381 last_rule = rule_gindex + i_lpm->rule_info[depth - 1].used_rules;
382
383 for (; rule_index < last_rule; rule_index++) {
384
385 /* If rule already exists update next hop and return. */
386 if (i_lpm->rules_tbl[rule_index].ip == ip_masked) {
387
388 if (i_lpm->rules_tbl[rule_index].next_hop
389 == next_hop)
390 return -EEXIST;
391 i_lpm->rules_tbl[rule_index].next_hop = next_hop;
392
393 return rule_index;
394 }
395 }
396
397 if (rule_index == i_lpm->max_rules)
398 return -ENOSPC;
399 } else {
400 /* Calculate the position in which the rule will be stored. */
401 rule_index = 0;
402
403 for (i = depth - 1; i > 0; i--) {
404 if (i_lpm->rule_info[i - 1].used_rules > 0) {
405 rule_index = i_lpm->rule_info[i - 1].first_rule
406 + i_lpm->rule_info[i - 1].used_rules;
407 break;
408 }
409 }
410 if (rule_index == i_lpm->max_rules)
411 return -ENOSPC;
412
413 i_lpm->rule_info[depth - 1].first_rule = rule_index;
414 }
415
416 /* Make room for the new rule in the array. */
417 for (i = RTE_LPM_MAX_DEPTH; i > depth; i--) {
418 if (i_lpm->rule_info[i - 1].first_rule
419 + i_lpm->rule_info[i - 1].used_rules == i_lpm->max_rules)
420 return -ENOSPC;
421
422 if (i_lpm->rule_info[i - 1].used_rules > 0) {
423 i_lpm->rules_tbl[i_lpm->rule_info[i - 1].first_rule
424 + i_lpm->rule_info[i - 1].used_rules]
425 = i_lpm->rules_tbl[i_lpm->rule_info[i - 1].first_rule];
426 i_lpm->rule_info[i - 1].first_rule++;
427 }
428 }
429
430 /* Add the new rule. */
431 i_lpm->rules_tbl[rule_index].ip = ip_masked;
432 i_lpm->rules_tbl[rule_index].next_hop = next_hop;
433
434 /* Increment the used rules counter for this rule group. */
435 i_lpm->rule_info[depth - 1].used_rules++;
436
437 return rule_index;
438 }
439
440 /*
441 * Delete a rule from the rule table.
442 * NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
443 */
444 static void
rule_delete(struct __rte_lpm * i_lpm,int32_t rule_index,uint8_t depth)445 rule_delete(struct __rte_lpm *i_lpm, int32_t rule_index, uint8_t depth)
446 {
447 int i;
448
449 VERIFY_DEPTH(depth);
450
451 i_lpm->rules_tbl[rule_index] =
452 i_lpm->rules_tbl[i_lpm->rule_info[depth - 1].first_rule
453 + i_lpm->rule_info[depth - 1].used_rules - 1];
454
455 for (i = depth; i < RTE_LPM_MAX_DEPTH; i++) {
456 if (i_lpm->rule_info[i].used_rules > 0) {
457 i_lpm->rules_tbl[i_lpm->rule_info[i].first_rule - 1] =
458 i_lpm->rules_tbl[i_lpm->rule_info[i].first_rule
459 + i_lpm->rule_info[i].used_rules - 1];
460 i_lpm->rule_info[i].first_rule--;
461 }
462 }
463
464 i_lpm->rule_info[depth - 1].used_rules--;
465 }
466
467 /*
468 * Finds a rule in rule table.
469 * NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
470 */
471 static int32_t
rule_find(struct __rte_lpm * i_lpm,uint32_t ip_masked,uint8_t depth)472 rule_find(struct __rte_lpm *i_lpm, uint32_t ip_masked, uint8_t depth)
473 {
474 uint32_t rule_gindex, last_rule, rule_index;
475
476 VERIFY_DEPTH(depth);
477
478 rule_gindex = i_lpm->rule_info[depth - 1].first_rule;
479 last_rule = rule_gindex + i_lpm->rule_info[depth - 1].used_rules;
480
481 /* Scan used rules at given depth to find rule. */
482 for (rule_index = rule_gindex; rule_index < last_rule; rule_index++) {
483 /* If rule is found return the rule index. */
484 if (i_lpm->rules_tbl[rule_index].ip == ip_masked)
485 return rule_index;
486 }
487
488 /* If rule is not found return -EINVAL. */
489 return -EINVAL;
490 }
491
492 /*
493 * Find, clean and allocate a tbl8.
494 */
495 static int32_t
_tbl8_alloc(struct __rte_lpm * i_lpm)496 _tbl8_alloc(struct __rte_lpm *i_lpm)
497 {
498 uint32_t group_idx; /* tbl8 group index. */
499 struct rte_lpm_tbl_entry *tbl8_entry;
500
501 /* Scan through tbl8 to find a free (i.e. INVALID) tbl8 group. */
502 for (group_idx = 0; group_idx < i_lpm->number_tbl8s; group_idx++) {
503 tbl8_entry = &i_lpm->lpm.tbl8[group_idx *
504 RTE_LPM_TBL8_GROUP_NUM_ENTRIES];
505 /* If a free tbl8 group is found clean it and set as VALID. */
506 if (!tbl8_entry->valid_group) {
507 struct rte_lpm_tbl_entry new_tbl8_entry = {
508 .next_hop = 0,
509 .valid = INVALID,
510 .depth = 0,
511 .valid_group = VALID,
512 };
513
514 memset(&tbl8_entry[0], 0,
515 RTE_LPM_TBL8_GROUP_NUM_ENTRIES *
516 sizeof(tbl8_entry[0]));
517
518 __atomic_store(tbl8_entry, &new_tbl8_entry,
519 __ATOMIC_RELAXED);
520
521 /* Return group index for allocated tbl8 group. */
522 return group_idx;
523 }
524 }
525
526 /* If there are no tbl8 groups free then return error. */
527 return -ENOSPC;
528 }
529
530 static int32_t
tbl8_alloc(struct __rte_lpm * i_lpm)531 tbl8_alloc(struct __rte_lpm *i_lpm)
532 {
533 int32_t group_idx; /* tbl8 group index. */
534
535 group_idx = _tbl8_alloc(i_lpm);
536 if (group_idx == -ENOSPC && i_lpm->dq != NULL) {
537 /* If there are no tbl8 groups try to reclaim one. */
538 if (rte_rcu_qsbr_dq_reclaim(i_lpm->dq, 1,
539 NULL, NULL, NULL) == 0)
540 group_idx = _tbl8_alloc(i_lpm);
541 }
542
543 return group_idx;
544 }
545
546 static int32_t
tbl8_free(struct __rte_lpm * i_lpm,uint32_t tbl8_group_start)547 tbl8_free(struct __rte_lpm *i_lpm, uint32_t tbl8_group_start)
548 {
549 struct rte_lpm_tbl_entry zero_tbl8_entry = {0};
550 int status;
551
552 if (i_lpm->v == NULL) {
553 /* Set tbl8 group invalid*/
554 __atomic_store(&i_lpm->lpm.tbl8[tbl8_group_start], &zero_tbl8_entry,
555 __ATOMIC_RELAXED);
556 } else if (i_lpm->rcu_mode == RTE_LPM_QSBR_MODE_SYNC) {
557 /* Wait for quiescent state change. */
558 rte_rcu_qsbr_synchronize(i_lpm->v,
559 RTE_QSBR_THRID_INVALID);
560 /* Set tbl8 group invalid*/
561 __atomic_store(&i_lpm->lpm.tbl8[tbl8_group_start], &zero_tbl8_entry,
562 __ATOMIC_RELAXED);
563 } else if (i_lpm->rcu_mode == RTE_LPM_QSBR_MODE_DQ) {
564 /* Push into QSBR defer queue. */
565 status = rte_rcu_qsbr_dq_enqueue(i_lpm->dq,
566 (void *)&tbl8_group_start);
567 if (status == 1) {
568 LPM_LOG(ERR, "Failed to push QSBR FIFO");
569 return -rte_errno;
570 }
571 }
572
573 return 0;
574 }
575
576 static __rte_noinline int32_t
add_depth_small(struct __rte_lpm * i_lpm,uint32_t ip,uint8_t depth,uint32_t next_hop)577 add_depth_small(struct __rte_lpm *i_lpm, uint32_t ip, uint8_t depth,
578 uint32_t next_hop)
579 {
580 #define group_idx next_hop
581 uint32_t tbl24_index, tbl24_range, tbl8_index, tbl8_group_end, i, j;
582
583 /* Calculate the index into Table24. */
584 tbl24_index = ip >> 8;
585 tbl24_range = depth_to_range(depth);
586
587 for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) {
588 /*
589 * For invalid OR valid and non-extended tbl 24 entries set
590 * entry.
591 */
592 if (!i_lpm->lpm.tbl24[i].valid || (i_lpm->lpm.tbl24[i].valid_group == 0 &&
593 i_lpm->lpm.tbl24[i].depth <= depth)) {
594
595 struct rte_lpm_tbl_entry new_tbl24_entry = {
596 .next_hop = next_hop,
597 .valid = VALID,
598 .valid_group = 0,
599 .depth = depth,
600 };
601
602 /* Setting tbl24 entry in one go to avoid race
603 * conditions
604 */
605 __atomic_store(&i_lpm->lpm.tbl24[i], &new_tbl24_entry,
606 __ATOMIC_RELEASE);
607
608 continue;
609 }
610
611 if (i_lpm->lpm.tbl24[i].valid_group == 1) {
612 /* If tbl24 entry is valid and extended calculate the
613 * index into tbl8.
614 */
615 tbl8_index = i_lpm->lpm.tbl24[i].group_idx *
616 RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
617 tbl8_group_end = tbl8_index +
618 RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
619
620 for (j = tbl8_index; j < tbl8_group_end; j++) {
621 if (!i_lpm->lpm.tbl8[j].valid ||
622 i_lpm->lpm.tbl8[j].depth <= depth) {
623 struct rte_lpm_tbl_entry
624 new_tbl8_entry = {
625 .valid = VALID,
626 .valid_group = VALID,
627 .depth = depth,
628 .next_hop = next_hop,
629 };
630
631 /*
632 * Setting tbl8 entry in one go to avoid
633 * race conditions
634 */
635 __atomic_store(&i_lpm->lpm.tbl8[j],
636 &new_tbl8_entry,
637 __ATOMIC_RELAXED);
638
639 continue;
640 }
641 }
642 }
643 }
644 #undef group_idx
645 return 0;
646 }
647
648 static __rte_noinline int32_t
add_depth_big(struct __rte_lpm * i_lpm,uint32_t ip_masked,uint8_t depth,uint32_t next_hop)649 add_depth_big(struct __rte_lpm *i_lpm, uint32_t ip_masked, uint8_t depth,
650 uint32_t next_hop)
651 {
652 #define group_idx next_hop
653 uint32_t tbl24_index;
654 int32_t tbl8_group_index, tbl8_group_start, tbl8_group_end, tbl8_index,
655 tbl8_range, i;
656
657 tbl24_index = (ip_masked >> 8);
658 tbl8_range = depth_to_range(depth);
659
660 if (!i_lpm->lpm.tbl24[tbl24_index].valid) {
661 /* Search for a free tbl8 group. */
662 tbl8_group_index = tbl8_alloc(i_lpm);
663
664 /* Check tbl8 allocation was successful. */
665 if (tbl8_group_index < 0) {
666 return tbl8_group_index;
667 }
668
669 /* Find index into tbl8 and range. */
670 tbl8_index = (tbl8_group_index *
671 RTE_LPM_TBL8_GROUP_NUM_ENTRIES) +
672 (ip_masked & 0xFF);
673
674 /* Set tbl8 entry. */
675 for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) {
676 struct rte_lpm_tbl_entry new_tbl8_entry = {
677 .valid = VALID,
678 .depth = depth,
679 .valid_group = i_lpm->lpm.tbl8[i].valid_group,
680 .next_hop = next_hop,
681 };
682 __atomic_store(&i_lpm->lpm.tbl8[i], &new_tbl8_entry,
683 __ATOMIC_RELAXED);
684 }
685
686 /*
687 * Update tbl24 entry to point to new tbl8 entry. Note: The
688 * ext_flag and tbl8_index need to be updated simultaneously,
689 * so assign whole structure in one go
690 */
691
692 struct rte_lpm_tbl_entry new_tbl24_entry = {
693 .group_idx = tbl8_group_index,
694 .valid = VALID,
695 .valid_group = 1,
696 .depth = 0,
697 };
698
699 /* The tbl24 entry must be written only after the
700 * tbl8 entries are written.
701 */
702 __atomic_store(&i_lpm->lpm.tbl24[tbl24_index], &new_tbl24_entry,
703 __ATOMIC_RELEASE);
704
705 } /* If valid entry but not extended calculate the index into Table8. */
706 else if (i_lpm->lpm.tbl24[tbl24_index].valid_group == 0) {
707 /* Search for free tbl8 group. */
708 tbl8_group_index = tbl8_alloc(i_lpm);
709
710 if (tbl8_group_index < 0) {
711 return tbl8_group_index;
712 }
713
714 tbl8_group_start = tbl8_group_index *
715 RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
716 tbl8_group_end = tbl8_group_start +
717 RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
718
719 /* Populate new tbl8 with tbl24 value. */
720 for (i = tbl8_group_start; i < tbl8_group_end; i++) {
721 struct rte_lpm_tbl_entry new_tbl8_entry = {
722 .valid = VALID,
723 .depth = i_lpm->lpm.tbl24[tbl24_index].depth,
724 .valid_group = i_lpm->lpm.tbl8[i].valid_group,
725 .next_hop = i_lpm->lpm.tbl24[tbl24_index].next_hop,
726 };
727 __atomic_store(&i_lpm->lpm.tbl8[i], &new_tbl8_entry,
728 __ATOMIC_RELAXED);
729 }
730
731 tbl8_index = tbl8_group_start + (ip_masked & 0xFF);
732
733 /* Insert new rule into the tbl8 entry. */
734 for (i = tbl8_index; i < tbl8_index + tbl8_range; i++) {
735 struct rte_lpm_tbl_entry new_tbl8_entry = {
736 .valid = VALID,
737 .depth = depth,
738 .valid_group = i_lpm->lpm.tbl8[i].valid_group,
739 .next_hop = next_hop,
740 };
741 __atomic_store(&i_lpm->lpm.tbl8[i], &new_tbl8_entry,
742 __ATOMIC_RELAXED);
743 }
744
745 /*
746 * Update tbl24 entry to point to new tbl8 entry. Note: The
747 * ext_flag and tbl8_index need to be updated simultaneously,
748 * so assign whole structure in one go.
749 */
750
751 struct rte_lpm_tbl_entry new_tbl24_entry = {
752 .group_idx = tbl8_group_index,
753 .valid = VALID,
754 .valid_group = 1,
755 .depth = 0,
756 };
757
758 /* The tbl24 entry must be written only after the
759 * tbl8 entries are written.
760 */
761 __atomic_store(&i_lpm->lpm.tbl24[tbl24_index], &new_tbl24_entry,
762 __ATOMIC_RELEASE);
763
764 } else { /*
765 * If it is valid, extended entry calculate the index into tbl8.
766 */
767 tbl8_group_index = i_lpm->lpm.tbl24[tbl24_index].group_idx;
768 tbl8_group_start = tbl8_group_index *
769 RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
770 tbl8_index = tbl8_group_start + (ip_masked & 0xFF);
771
772 for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) {
773
774 if (!i_lpm->lpm.tbl8[i].valid ||
775 i_lpm->lpm.tbl8[i].depth <= depth) {
776 struct rte_lpm_tbl_entry new_tbl8_entry = {
777 .valid = VALID,
778 .depth = depth,
779 .next_hop = next_hop,
780 .valid_group = i_lpm->lpm.tbl8[i].valid_group,
781 };
782
783 /*
784 * Setting tbl8 entry in one go to avoid race
785 * condition
786 */
787 __atomic_store(&i_lpm->lpm.tbl8[i], &new_tbl8_entry,
788 __ATOMIC_RELAXED);
789
790 continue;
791 }
792 }
793 }
794 #undef group_idx
795 return 0;
796 }
797
798 /*
799 * Add a route
800 */
801 int
rte_lpm_add(struct rte_lpm * lpm,uint32_t ip,uint8_t depth,uint32_t next_hop)802 rte_lpm_add(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
803 uint32_t next_hop)
804 {
805 int32_t rule_index, status = 0;
806 struct __rte_lpm *i_lpm;
807 uint32_t ip_masked;
808
809 /* Check user arguments. */
810 if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM_MAX_DEPTH))
811 return -EINVAL;
812
813 i_lpm = container_of(lpm, struct __rte_lpm, lpm);
814 ip_masked = ip & depth_to_mask(depth);
815
816 /* Add the rule to the rule table. */
817 rule_index = rule_add(i_lpm, ip_masked, depth, next_hop);
818
819 /* Skip table entries update if The rule is the same as
820 * the rule in the rules table.
821 */
822 if (rule_index == -EEXIST)
823 return 0;
824
825 /* If the is no space available for new rule return error. */
826 if (rule_index < 0) {
827 return rule_index;
828 }
829
830 if (depth <= MAX_DEPTH_TBL24) {
831 status = add_depth_small(i_lpm, ip_masked, depth, next_hop);
832 } else { /* If depth > RTE_LPM_MAX_DEPTH_TBL24 */
833 status = add_depth_big(i_lpm, ip_masked, depth, next_hop);
834
835 /*
836 * If add fails due to exhaustion of tbl8 extensions delete
837 * rule that was added to rule table.
838 */
839 if (status < 0) {
840 rule_delete(i_lpm, rule_index, depth);
841
842 return status;
843 }
844 }
845
846 return 0;
847 }
848
849 /*
850 * Look for a rule in the high-level rules table
851 */
852 int
rte_lpm_is_rule_present(struct rte_lpm * lpm,uint32_t ip,uint8_t depth,uint32_t * next_hop)853 rte_lpm_is_rule_present(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
854 uint32_t *next_hop)
855 {
856 struct __rte_lpm *i_lpm;
857 uint32_t ip_masked;
858 int32_t rule_index;
859
860 /* Check user arguments. */
861 if ((lpm == NULL) ||
862 (next_hop == NULL) ||
863 (depth < 1) || (depth > RTE_LPM_MAX_DEPTH))
864 return -EINVAL;
865
866 /* Look for the rule using rule_find. */
867 i_lpm = container_of(lpm, struct __rte_lpm, lpm);
868 ip_masked = ip & depth_to_mask(depth);
869 rule_index = rule_find(i_lpm, ip_masked, depth);
870
871 if (rule_index >= 0) {
872 *next_hop = i_lpm->rules_tbl[rule_index].next_hop;
873 return 1;
874 }
875
876 /* If rule is not found return 0. */
877 return 0;
878 }
879
880 static int32_t
find_previous_rule(struct __rte_lpm * i_lpm,uint32_t ip,uint8_t depth,uint8_t * sub_rule_depth)881 find_previous_rule(struct __rte_lpm *i_lpm, uint32_t ip, uint8_t depth,
882 uint8_t *sub_rule_depth)
883 {
884 int32_t rule_index;
885 uint32_t ip_masked;
886 uint8_t prev_depth;
887
888 for (prev_depth = (uint8_t)(depth - 1); prev_depth > 0; prev_depth--) {
889 ip_masked = ip & depth_to_mask(prev_depth);
890
891 rule_index = rule_find(i_lpm, ip_masked, prev_depth);
892
893 if (rule_index >= 0) {
894 *sub_rule_depth = prev_depth;
895 return rule_index;
896 }
897 }
898
899 return -1;
900 }
901
902 static int32_t
delete_depth_small(struct __rte_lpm * i_lpm,uint32_t ip_masked,uint8_t depth,int32_t sub_rule_index,uint8_t sub_rule_depth)903 delete_depth_small(struct __rte_lpm *i_lpm, uint32_t ip_masked,
904 uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
905 {
906 #define group_idx next_hop
907 uint32_t tbl24_range, tbl24_index, tbl8_group_index, tbl8_index, i, j;
908
909 /* Calculate the range and index into Table24. */
910 tbl24_range = depth_to_range(depth);
911 tbl24_index = (ip_masked >> 8);
912 struct rte_lpm_tbl_entry zero_tbl24_entry = {0};
913
914 /*
915 * Firstly check the sub_rule_index. A -1 indicates no replacement rule
916 * and a positive number indicates a sub_rule_index.
917 */
918 if (sub_rule_index < 0) {
919 /*
920 * If no replacement rule exists then invalidate entries
921 * associated with this rule.
922 */
923 for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) {
924
925 if (i_lpm->lpm.tbl24[i].valid_group == 0 &&
926 i_lpm->lpm.tbl24[i].depth <= depth) {
927 __atomic_store(&i_lpm->lpm.tbl24[i],
928 &zero_tbl24_entry, __ATOMIC_RELEASE);
929 } else if (i_lpm->lpm.tbl24[i].valid_group == 1) {
930 /*
931 * If TBL24 entry is extended, then there has
932 * to be a rule with depth >= 25 in the
933 * associated TBL8 group.
934 */
935
936 tbl8_group_index = i_lpm->lpm.tbl24[i].group_idx;
937 tbl8_index = tbl8_group_index *
938 RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
939
940 for (j = tbl8_index; j < (tbl8_index +
941 RTE_LPM_TBL8_GROUP_NUM_ENTRIES); j++) {
942
943 if (i_lpm->lpm.tbl8[j].depth <= depth)
944 i_lpm->lpm.tbl8[j].valid = INVALID;
945 }
946 }
947 }
948 } else {
949 /*
950 * If a replacement rule exists then modify entries
951 * associated with this rule.
952 */
953
954 struct rte_lpm_tbl_entry new_tbl24_entry = {
955 .next_hop = i_lpm->rules_tbl[sub_rule_index].next_hop,
956 .valid = VALID,
957 .valid_group = 0,
958 .depth = sub_rule_depth,
959 };
960
961 struct rte_lpm_tbl_entry new_tbl8_entry = {
962 .valid = VALID,
963 .valid_group = VALID,
964 .depth = sub_rule_depth,
965 .next_hop = i_lpm->rules_tbl
966 [sub_rule_index].next_hop,
967 };
968
969 for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) {
970
971 if (i_lpm->lpm.tbl24[i].valid_group == 0 &&
972 i_lpm->lpm.tbl24[i].depth <= depth) {
973 __atomic_store(&i_lpm->lpm.tbl24[i], &new_tbl24_entry,
974 __ATOMIC_RELEASE);
975 } else if (i_lpm->lpm.tbl24[i].valid_group == 1) {
976 /*
977 * If TBL24 entry is extended, then there has
978 * to be a rule with depth >= 25 in the
979 * associated TBL8 group.
980 */
981
982 tbl8_group_index = i_lpm->lpm.tbl24[i].group_idx;
983 tbl8_index = tbl8_group_index *
984 RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
985
986 for (j = tbl8_index; j < (tbl8_index +
987 RTE_LPM_TBL8_GROUP_NUM_ENTRIES); j++) {
988
989 if (i_lpm->lpm.tbl8[j].depth <= depth)
990 __atomic_store(&i_lpm->lpm.tbl8[j],
991 &new_tbl8_entry,
992 __ATOMIC_RELAXED);
993 }
994 }
995 }
996 }
997 #undef group_idx
998 return 0;
999 }
1000
1001 /*
1002 * Checks if table 8 group can be recycled.
1003 *
1004 * Return of -EEXIST means tbl8 is in use and thus can not be recycled.
1005 * Return of -EINVAL means tbl8 is empty and thus can be recycled
1006 * Return of value > -1 means tbl8 is in use but has all the same values and
1007 * thus can be recycled
1008 */
1009 static int32_t
tbl8_recycle_check(struct rte_lpm_tbl_entry * tbl8,uint32_t tbl8_group_start)1010 tbl8_recycle_check(struct rte_lpm_tbl_entry *tbl8,
1011 uint32_t tbl8_group_start)
1012 {
1013 uint32_t tbl8_group_end, i;
1014 tbl8_group_end = tbl8_group_start + RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
1015
1016 /*
1017 * Check the first entry of the given tbl8. If it is invalid we know
1018 * this tbl8 does not contain any rule with a depth < RTE_LPM_MAX_DEPTH
1019 * (As they would affect all entries in a tbl8) and thus this table
1020 * can not be recycled.
1021 */
1022 if (tbl8[tbl8_group_start].valid) {
1023 /*
1024 * If first entry is valid check if the depth is less than 24
1025 * and if so check the rest of the entries to verify that they
1026 * are all of this depth.
1027 */
1028 if (tbl8[tbl8_group_start].depth <= MAX_DEPTH_TBL24) {
1029 for (i = (tbl8_group_start + 1); i < tbl8_group_end;
1030 i++) {
1031
1032 if (tbl8[i].depth !=
1033 tbl8[tbl8_group_start].depth) {
1034
1035 return -EEXIST;
1036 }
1037 }
1038 /* If all entries are the same return the tb8 index */
1039 return tbl8_group_start;
1040 }
1041
1042 return -EEXIST;
1043 }
1044 /*
1045 * If the first entry is invalid check if the rest of the entries in
1046 * the tbl8 are invalid.
1047 */
1048 for (i = (tbl8_group_start + 1); i < tbl8_group_end; i++) {
1049 if (tbl8[i].valid)
1050 return -EEXIST;
1051 }
1052 /* If no valid entries are found then return -EINVAL. */
1053 return -EINVAL;
1054 }
1055
1056 static int32_t
delete_depth_big(struct __rte_lpm * i_lpm,uint32_t ip_masked,uint8_t depth,int32_t sub_rule_index,uint8_t sub_rule_depth)1057 delete_depth_big(struct __rte_lpm *i_lpm, uint32_t ip_masked,
1058 uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
1059 {
1060 #define group_idx next_hop
1061 uint32_t tbl24_index, tbl8_group_index, tbl8_group_start, tbl8_index,
1062 tbl8_range, i;
1063 int32_t tbl8_recycle_index, status = 0;
1064
1065 /*
1066 * Calculate the index into tbl24 and range. Note: All depths larger
1067 * than MAX_DEPTH_TBL24 are associated with only one tbl24 entry.
1068 */
1069 tbl24_index = ip_masked >> 8;
1070
1071 /* Calculate the index into tbl8 and range. */
1072 tbl8_group_index = i_lpm->lpm.tbl24[tbl24_index].group_idx;
1073 tbl8_group_start = tbl8_group_index * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
1074 tbl8_index = tbl8_group_start + (ip_masked & 0xFF);
1075 tbl8_range = depth_to_range(depth);
1076
1077 if (sub_rule_index < 0) {
1078 /*
1079 * Loop through the range of entries on tbl8 for which the
1080 * rule_to_delete must be removed or modified.
1081 */
1082 for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) {
1083 if (i_lpm->lpm.tbl8[i].depth <= depth)
1084 i_lpm->lpm.tbl8[i].valid = INVALID;
1085 }
1086 } else {
1087 /* Set new tbl8 entry. */
1088 struct rte_lpm_tbl_entry new_tbl8_entry = {
1089 .valid = VALID,
1090 .depth = sub_rule_depth,
1091 .valid_group = i_lpm->lpm.tbl8[tbl8_group_start].valid_group,
1092 .next_hop = i_lpm->rules_tbl[sub_rule_index].next_hop,
1093 };
1094
1095 /*
1096 * Loop through the range of entries on tbl8 for which the
1097 * rule_to_delete must be modified.
1098 */
1099 for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) {
1100 if (i_lpm->lpm.tbl8[i].depth <= depth)
1101 __atomic_store(&i_lpm->lpm.tbl8[i], &new_tbl8_entry,
1102 __ATOMIC_RELAXED);
1103 }
1104 }
1105
1106 /*
1107 * Check if there are any valid entries in this tbl8 group. If all
1108 * tbl8 entries are invalid we can free the tbl8 and invalidate the
1109 * associated tbl24 entry.
1110 */
1111
1112 tbl8_recycle_index = tbl8_recycle_check(i_lpm->lpm.tbl8, tbl8_group_start);
1113
1114 if (tbl8_recycle_index == -EINVAL) {
1115 /* Set tbl24 before freeing tbl8 to avoid race condition.
1116 * Prevent the free of the tbl8 group from hoisting.
1117 */
1118 i_lpm->lpm.tbl24[tbl24_index].valid = 0;
1119 rte_atomic_thread_fence(rte_memory_order_release);
1120 status = tbl8_free(i_lpm, tbl8_group_start);
1121 } else if (tbl8_recycle_index > -1) {
1122 /* Update tbl24 entry. */
1123 struct rte_lpm_tbl_entry new_tbl24_entry = {
1124 .next_hop = i_lpm->lpm.tbl8[tbl8_recycle_index].next_hop,
1125 .valid = VALID,
1126 .valid_group = 0,
1127 .depth = i_lpm->lpm.tbl8[tbl8_recycle_index].depth,
1128 };
1129
1130 /* Set tbl24 before freeing tbl8 to avoid race condition.
1131 * Prevent the free of the tbl8 group from hoisting.
1132 */
1133 __atomic_store(&i_lpm->lpm.tbl24[tbl24_index], &new_tbl24_entry,
1134 __ATOMIC_RELAXED);
1135 rte_atomic_thread_fence(rte_memory_order_release);
1136 status = tbl8_free(i_lpm, tbl8_group_start);
1137 }
1138 #undef group_idx
1139 return status;
1140 }
1141
1142 /*
1143 * Deletes a rule
1144 */
1145 int
rte_lpm_delete(struct rte_lpm * lpm,uint32_t ip,uint8_t depth)1146 rte_lpm_delete(struct rte_lpm *lpm, uint32_t ip, uint8_t depth)
1147 {
1148 int32_t rule_to_delete_index, sub_rule_index;
1149 struct __rte_lpm *i_lpm;
1150 uint32_t ip_masked;
1151 uint8_t sub_rule_depth;
1152 /*
1153 * Check input arguments. Note: IP must be a positive integer of 32
1154 * bits in length therefore it need not be checked.
1155 */
1156 if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM_MAX_DEPTH)) {
1157 return -EINVAL;
1158 }
1159
1160 i_lpm = container_of(lpm, struct __rte_lpm, lpm);
1161 ip_masked = ip & depth_to_mask(depth);
1162
1163 /*
1164 * Find the index of the input rule, that needs to be deleted, in the
1165 * rule table.
1166 */
1167 rule_to_delete_index = rule_find(i_lpm, ip_masked, depth);
1168
1169 /*
1170 * Check if rule_to_delete_index was found. If no rule was found the
1171 * function rule_find returns -EINVAL.
1172 */
1173 if (rule_to_delete_index < 0)
1174 return -EINVAL;
1175
1176 /* Delete the rule from the rule table. */
1177 rule_delete(i_lpm, rule_to_delete_index, depth);
1178
1179 /*
1180 * Find rule to replace the rule_to_delete. If there is no rule to
1181 * replace the rule_to_delete we return -1 and invalidate the table
1182 * entries associated with this rule.
1183 */
1184 sub_rule_depth = 0;
1185 sub_rule_index = find_previous_rule(i_lpm, ip, depth, &sub_rule_depth);
1186
1187 /*
1188 * If the input depth value is less than 25 use function
1189 * delete_depth_small otherwise use delete_depth_big.
1190 */
1191 if (depth <= MAX_DEPTH_TBL24) {
1192 return delete_depth_small(i_lpm, ip_masked, depth,
1193 sub_rule_index, sub_rule_depth);
1194 } else { /* If depth > MAX_DEPTH_TBL24 */
1195 return delete_depth_big(i_lpm, ip_masked, depth, sub_rule_index,
1196 sub_rule_depth);
1197 }
1198 }
1199
1200 /*
1201 * Delete all rules from the LPM table.
1202 */
1203 void
rte_lpm_delete_all(struct rte_lpm * lpm)1204 rte_lpm_delete_all(struct rte_lpm *lpm)
1205 {
1206 struct __rte_lpm *i_lpm;
1207
1208 i_lpm = container_of(lpm, struct __rte_lpm, lpm);
1209 /* Zero rule information. */
1210 memset(i_lpm->rule_info, 0, sizeof(i_lpm->rule_info));
1211
1212 /* Zero tbl24. */
1213 memset(i_lpm->lpm.tbl24, 0, sizeof(i_lpm->lpm.tbl24));
1214
1215 /* Zero tbl8. */
1216 memset(i_lpm->lpm.tbl8, 0, sizeof(i_lpm->lpm.tbl8[0])
1217 * RTE_LPM_TBL8_GROUP_NUM_ENTRIES * i_lpm->number_tbl8s);
1218
1219 /* Delete all rules form the rules table. */
1220 memset(i_lpm->rules_tbl, 0, sizeof(i_lpm->rules_tbl[0]) * i_lpm->max_rules);
1221 }
1222