1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2016 Mellanox Technologies, Ltd 4 */ 5 6 #include <stdalign.h> 7 #include <stdint.h> 8 #include <string.h> 9 #include <stdbool.h> 10 #include <sys/queue.h> 11 12 #include <rte_common.h> 13 #include <rte_ether.h> 14 #include <ethdev_driver.h> 15 #include <rte_eal_paging.h> 16 #include <rte_flow.h> 17 #include <rte_cycles.h> 18 #include <rte_flow_driver.h> 19 #include <rte_malloc.h> 20 #include <rte_ip.h> 21 22 #include <mlx5_glue.h> 23 #include <mlx5_devx_cmds.h> 24 #include <mlx5_prm.h> 25 #include <mlx5_malloc.h> 26 27 #include "mlx5_defs.h" 28 #include "mlx5.h" 29 #include "mlx5_flow.h" 30 #include "mlx5_flow_os.h" 31 #include "mlx5_rx.h" 32 #include "mlx5_tx.h" 33 #include "mlx5_common_os.h" 34 #include "rte_pmd_mlx5.h" 35 36 struct tunnel_default_miss_ctx { 37 uint16_t *queue; 38 __extension__ 39 union { 40 struct rte_flow_action_rss action_rss; 41 struct rte_flow_action_queue miss_queue; 42 struct rte_flow_action_jump miss_jump; 43 uint8_t raw[0]; 44 }; 45 }; 46 47 static int 48 flow_tunnel_add_default_miss(struct rte_eth_dev *dev, 49 struct rte_flow *flow, 50 const struct rte_flow_attr *attr, 51 const struct rte_flow_action *app_actions, 52 uint32_t flow_idx, 53 const struct mlx5_flow_tunnel *tunnel, 54 struct tunnel_default_miss_ctx *ctx, 55 struct rte_flow_error *error); 56 static struct mlx5_flow_tunnel * 57 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id); 58 static void 59 mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel); 60 static uint32_t 61 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev, 62 const struct mlx5_flow_tunnel *tunnel, 63 uint32_t group, uint32_t *table, 64 struct rte_flow_error *error); 65 66 static struct mlx5_flow_workspace *mlx5_flow_push_thread_workspace(void); 67 static void mlx5_flow_pop_thread_workspace(void); 68 69 70 /** Device flow drivers. */ 71 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops; 72 73 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops; 74 75 const struct mlx5_flow_driver_ops *flow_drv_ops[] = { 76 [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops, 77 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H) 78 [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops, 79 #endif 80 [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops, 81 [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops 82 }; 83 84 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */ 85 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \ 86 (const int []){ \ 87 __VA_ARGS__, 0, \ 88 } 89 90 /** Node object of input graph for mlx5_flow_expand_rss(). */ 91 struct mlx5_flow_expand_node { 92 const int *const next; 93 /**< 94 * List of next node indexes. Index 0 is interpreted as a terminator. 95 */ 96 const enum rte_flow_item_type type; 97 /**< Pattern item type of current node. */ 98 uint64_t rss_types; 99 /**< 100 * RSS types bit-field associated with this node 101 * (see ETH_RSS_* definitions). 102 */ 103 uint8_t optional; 104 /**< optional expand field. Default 0 to expand, 1 not go deeper. */ 105 }; 106 107 /** Object returned by mlx5_flow_expand_rss(). */ 108 struct mlx5_flow_expand_rss { 109 uint32_t entries; 110 /**< Number of entries @p patterns and @p priorities. */ 111 struct { 112 struct rte_flow_item *pattern; /**< Expanded pattern array. */ 113 uint32_t priority; /**< Priority offset for each expansion. */ 114 } entry[]; 115 }; 116 117 static void 118 mlx5_dbg__print_pattern(const struct rte_flow_item *item); 119 120 static bool 121 mlx5_flow_is_rss_expandable_item(const struct rte_flow_item *item) 122 { 123 switch (item->type) { 124 case RTE_FLOW_ITEM_TYPE_ETH: 125 case RTE_FLOW_ITEM_TYPE_VLAN: 126 case RTE_FLOW_ITEM_TYPE_IPV4: 127 case RTE_FLOW_ITEM_TYPE_IPV6: 128 case RTE_FLOW_ITEM_TYPE_UDP: 129 case RTE_FLOW_ITEM_TYPE_TCP: 130 case RTE_FLOW_ITEM_TYPE_VXLAN: 131 case RTE_FLOW_ITEM_TYPE_NVGRE: 132 case RTE_FLOW_ITEM_TYPE_GRE: 133 case RTE_FLOW_ITEM_TYPE_GENEVE: 134 case RTE_FLOW_ITEM_TYPE_MPLS: 135 return true; 136 default: 137 break; 138 } 139 return false; 140 } 141 142 static enum rte_flow_item_type 143 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item) 144 { 145 enum rte_flow_item_type ret = RTE_FLOW_ITEM_TYPE_VOID; 146 uint16_t ether_type = 0; 147 uint16_t ether_type_m; 148 uint8_t ip_next_proto = 0; 149 uint8_t ip_next_proto_m; 150 151 if (item == NULL || item->spec == NULL) 152 return ret; 153 switch (item->type) { 154 case RTE_FLOW_ITEM_TYPE_ETH: 155 if (item->mask) 156 ether_type_m = ((const struct rte_flow_item_eth *) 157 (item->mask))->type; 158 else 159 ether_type_m = rte_flow_item_eth_mask.type; 160 if (ether_type_m != RTE_BE16(0xFFFF)) 161 break; 162 ether_type = ((const struct rte_flow_item_eth *) 163 (item->spec))->type; 164 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4) 165 ret = RTE_FLOW_ITEM_TYPE_IPV4; 166 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6) 167 ret = RTE_FLOW_ITEM_TYPE_IPV6; 168 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN) 169 ret = RTE_FLOW_ITEM_TYPE_VLAN; 170 else 171 ret = RTE_FLOW_ITEM_TYPE_END; 172 break; 173 case RTE_FLOW_ITEM_TYPE_VLAN: 174 if (item->mask) 175 ether_type_m = ((const struct rte_flow_item_vlan *) 176 (item->mask))->inner_type; 177 else 178 ether_type_m = rte_flow_item_vlan_mask.inner_type; 179 if (ether_type_m != RTE_BE16(0xFFFF)) 180 break; 181 ether_type = ((const struct rte_flow_item_vlan *) 182 (item->spec))->inner_type; 183 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4) 184 ret = RTE_FLOW_ITEM_TYPE_IPV4; 185 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6) 186 ret = RTE_FLOW_ITEM_TYPE_IPV6; 187 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN) 188 ret = RTE_FLOW_ITEM_TYPE_VLAN; 189 else 190 ret = RTE_FLOW_ITEM_TYPE_END; 191 break; 192 case RTE_FLOW_ITEM_TYPE_IPV4: 193 if (item->mask) 194 ip_next_proto_m = ((const struct rte_flow_item_ipv4 *) 195 (item->mask))->hdr.next_proto_id; 196 else 197 ip_next_proto_m = 198 rte_flow_item_ipv4_mask.hdr.next_proto_id; 199 if (ip_next_proto_m != 0xFF) 200 break; 201 ip_next_proto = ((const struct rte_flow_item_ipv4 *) 202 (item->spec))->hdr.next_proto_id; 203 if (ip_next_proto == IPPROTO_UDP) 204 ret = RTE_FLOW_ITEM_TYPE_UDP; 205 else if (ip_next_proto == IPPROTO_TCP) 206 ret = RTE_FLOW_ITEM_TYPE_TCP; 207 else if (ip_next_proto == IPPROTO_IP) 208 ret = RTE_FLOW_ITEM_TYPE_IPV4; 209 else if (ip_next_proto == IPPROTO_IPV6) 210 ret = RTE_FLOW_ITEM_TYPE_IPV6; 211 else 212 ret = RTE_FLOW_ITEM_TYPE_END; 213 break; 214 case RTE_FLOW_ITEM_TYPE_IPV6: 215 if (item->mask) 216 ip_next_proto_m = ((const struct rte_flow_item_ipv6 *) 217 (item->mask))->hdr.proto; 218 else 219 ip_next_proto_m = 220 rte_flow_item_ipv6_mask.hdr.proto; 221 if (ip_next_proto_m != 0xFF) 222 break; 223 ip_next_proto = ((const struct rte_flow_item_ipv6 *) 224 (item->spec))->hdr.proto; 225 if (ip_next_proto == IPPROTO_UDP) 226 ret = RTE_FLOW_ITEM_TYPE_UDP; 227 else if (ip_next_proto == IPPROTO_TCP) 228 ret = RTE_FLOW_ITEM_TYPE_TCP; 229 else if (ip_next_proto == IPPROTO_IP) 230 ret = RTE_FLOW_ITEM_TYPE_IPV4; 231 else if (ip_next_proto == IPPROTO_IPV6) 232 ret = RTE_FLOW_ITEM_TYPE_IPV6; 233 else 234 ret = RTE_FLOW_ITEM_TYPE_END; 235 break; 236 default: 237 ret = RTE_FLOW_ITEM_TYPE_VOID; 238 break; 239 } 240 return ret; 241 } 242 243 #define MLX5_RSS_EXP_ELT_N 16 244 245 /** 246 * Expand RSS flows into several possible flows according to the RSS hash 247 * fields requested and the driver capabilities. 248 * 249 * @param[out] buf 250 * Buffer to store the result expansion. 251 * @param[in] size 252 * Buffer size in bytes. If 0, @p buf can be NULL. 253 * @param[in] pattern 254 * User flow pattern. 255 * @param[in] types 256 * RSS types to expand (see ETH_RSS_* definitions). 257 * @param[in] graph 258 * Input graph to expand @p pattern according to @p types. 259 * @param[in] graph_root_index 260 * Index of root node in @p graph, typically 0. 261 * 262 * @return 263 * A positive value representing the size of @p buf in bytes regardless of 264 * @p size on success, a negative errno value otherwise and rte_errno is 265 * set, the following errors are defined: 266 * 267 * -E2BIG: graph-depth @p graph is too deep. 268 * -EINVAL: @p size has not enough space for expanded pattern. 269 */ 270 static int 271 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, 272 const struct rte_flow_item *pattern, uint64_t types, 273 const struct mlx5_flow_expand_node graph[], 274 int graph_root_index) 275 { 276 const struct rte_flow_item *item; 277 const struct mlx5_flow_expand_node *node = &graph[graph_root_index]; 278 const int *next_node; 279 const int *stack[MLX5_RSS_EXP_ELT_N]; 280 int stack_pos = 0; 281 struct rte_flow_item flow_items[MLX5_RSS_EXP_ELT_N]; 282 unsigned int i; 283 size_t lsize; 284 size_t user_pattern_size = 0; 285 void *addr = NULL; 286 const struct mlx5_flow_expand_node *next = NULL; 287 struct rte_flow_item missed_item; 288 int missed = 0; 289 int elt = 0; 290 const struct rte_flow_item *last_item = NULL; 291 292 memset(&missed_item, 0, sizeof(missed_item)); 293 lsize = offsetof(struct mlx5_flow_expand_rss, entry) + 294 MLX5_RSS_EXP_ELT_N * sizeof(buf->entry[0]); 295 if (lsize > size) 296 return -EINVAL; 297 buf->entry[0].priority = 0; 298 buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N]; 299 buf->entries = 0; 300 addr = buf->entry[0].pattern; 301 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 302 if (!mlx5_flow_is_rss_expandable_item(item)) { 303 user_pattern_size += sizeof(*item); 304 continue; 305 } 306 last_item = item; 307 for (i = 0; node->next && node->next[i]; ++i) { 308 next = &graph[node->next[i]]; 309 if (next->type == item->type) 310 break; 311 } 312 if (next) 313 node = next; 314 user_pattern_size += sizeof(*item); 315 } 316 user_pattern_size += sizeof(*item); /* Handle END item. */ 317 lsize += user_pattern_size; 318 if (lsize > size) 319 return -EINVAL; 320 /* Copy the user pattern in the first entry of the buffer. */ 321 rte_memcpy(addr, pattern, user_pattern_size); 322 addr = (void *)(((uintptr_t)addr) + user_pattern_size); 323 buf->entries = 1; 324 /* Start expanding. */ 325 memset(flow_items, 0, sizeof(flow_items)); 326 user_pattern_size -= sizeof(*item); 327 /* 328 * Check if the last valid item has spec set, need complete pattern, 329 * and the pattern can be used for expansion. 330 */ 331 missed_item.type = mlx5_flow_expand_rss_item_complete(last_item); 332 if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) { 333 /* Item type END indicates expansion is not required. */ 334 return lsize; 335 } 336 if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) { 337 next = NULL; 338 missed = 1; 339 for (i = 0; node->next && node->next[i]; ++i) { 340 next = &graph[node->next[i]]; 341 if (next->type == missed_item.type) { 342 flow_items[0].type = missed_item.type; 343 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END; 344 break; 345 } 346 next = NULL; 347 } 348 } 349 if (next && missed) { 350 elt = 2; /* missed item + item end. */ 351 node = next; 352 lsize += elt * sizeof(*item) + user_pattern_size; 353 if (lsize > size) 354 return -EINVAL; 355 if (node->rss_types & types) { 356 buf->entry[buf->entries].priority = 1; 357 buf->entry[buf->entries].pattern = addr; 358 buf->entries++; 359 rte_memcpy(addr, buf->entry[0].pattern, 360 user_pattern_size); 361 addr = (void *)(((uintptr_t)addr) + user_pattern_size); 362 rte_memcpy(addr, flow_items, elt * sizeof(*item)); 363 addr = (void *)(((uintptr_t)addr) + 364 elt * sizeof(*item)); 365 } 366 } 367 memset(flow_items, 0, sizeof(flow_items)); 368 next_node = node->next; 369 stack[stack_pos] = next_node; 370 node = next_node ? &graph[*next_node] : NULL; 371 while (node) { 372 flow_items[stack_pos].type = node->type; 373 if (node->rss_types & types) { 374 size_t n; 375 /* 376 * compute the number of items to copy from the 377 * expansion and copy it. 378 * When the stack_pos is 0, there are 1 element in it, 379 * plus the addition END item. 380 */ 381 elt = stack_pos + 2; 382 flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END; 383 lsize += elt * sizeof(*item) + user_pattern_size; 384 if (lsize > size) 385 return -EINVAL; 386 n = elt * sizeof(*item); 387 buf->entry[buf->entries].priority = 388 stack_pos + 1 + missed; 389 buf->entry[buf->entries].pattern = addr; 390 buf->entries++; 391 rte_memcpy(addr, buf->entry[0].pattern, 392 user_pattern_size); 393 addr = (void *)(((uintptr_t)addr) + 394 user_pattern_size); 395 rte_memcpy(addr, &missed_item, 396 missed * sizeof(*item)); 397 addr = (void *)(((uintptr_t)addr) + 398 missed * sizeof(*item)); 399 rte_memcpy(addr, flow_items, n); 400 addr = (void *)(((uintptr_t)addr) + n); 401 } 402 /* Go deeper. */ 403 if (!node->optional && node->next) { 404 next_node = node->next; 405 if (stack_pos++ == MLX5_RSS_EXP_ELT_N) { 406 rte_errno = E2BIG; 407 return -rte_errno; 408 } 409 stack[stack_pos] = next_node; 410 } else if (*(next_node + 1)) { 411 /* Follow up with the next possibility. */ 412 ++next_node; 413 } else { 414 /* Move to the next path. */ 415 if (stack_pos) 416 next_node = stack[--stack_pos]; 417 next_node++; 418 stack[stack_pos] = next_node; 419 } 420 node = *next_node ? &graph[*next_node] : NULL; 421 }; 422 return lsize; 423 } 424 425 enum mlx5_expansion { 426 MLX5_EXPANSION_ROOT, 427 MLX5_EXPANSION_ROOT_OUTER, 428 MLX5_EXPANSION_ROOT_ETH_VLAN, 429 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN, 430 MLX5_EXPANSION_OUTER_ETH, 431 MLX5_EXPANSION_OUTER_ETH_VLAN, 432 MLX5_EXPANSION_OUTER_VLAN, 433 MLX5_EXPANSION_OUTER_IPV4, 434 MLX5_EXPANSION_OUTER_IPV4_UDP, 435 MLX5_EXPANSION_OUTER_IPV4_TCP, 436 MLX5_EXPANSION_OUTER_IPV6, 437 MLX5_EXPANSION_OUTER_IPV6_UDP, 438 MLX5_EXPANSION_OUTER_IPV6_TCP, 439 MLX5_EXPANSION_VXLAN, 440 MLX5_EXPANSION_VXLAN_GPE, 441 MLX5_EXPANSION_GRE, 442 MLX5_EXPANSION_NVGRE, 443 MLX5_EXPANSION_GRE_KEY, 444 MLX5_EXPANSION_MPLS, 445 MLX5_EXPANSION_ETH, 446 MLX5_EXPANSION_ETH_VLAN, 447 MLX5_EXPANSION_VLAN, 448 MLX5_EXPANSION_IPV4, 449 MLX5_EXPANSION_IPV4_UDP, 450 MLX5_EXPANSION_IPV4_TCP, 451 MLX5_EXPANSION_IPV6, 452 MLX5_EXPANSION_IPV6_UDP, 453 MLX5_EXPANSION_IPV6_TCP, 454 }; 455 456 /** Supported expansion of items. */ 457 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = { 458 [MLX5_EXPANSION_ROOT] = { 459 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 460 MLX5_EXPANSION_IPV4, 461 MLX5_EXPANSION_IPV6), 462 .type = RTE_FLOW_ITEM_TYPE_END, 463 }, 464 [MLX5_EXPANSION_ROOT_OUTER] = { 465 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH, 466 MLX5_EXPANSION_OUTER_IPV4, 467 MLX5_EXPANSION_OUTER_IPV6), 468 .type = RTE_FLOW_ITEM_TYPE_END, 469 }, 470 [MLX5_EXPANSION_ROOT_ETH_VLAN] = { 471 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN), 472 .type = RTE_FLOW_ITEM_TYPE_END, 473 }, 474 [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = { 475 .next = MLX5_FLOW_EXPAND_RSS_NEXT 476 (MLX5_EXPANSION_OUTER_ETH_VLAN), 477 .type = RTE_FLOW_ITEM_TYPE_END, 478 }, 479 [MLX5_EXPANSION_OUTER_ETH] = { 480 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 481 MLX5_EXPANSION_OUTER_IPV6), 482 .type = RTE_FLOW_ITEM_TYPE_ETH, 483 .rss_types = 0, 484 }, 485 [MLX5_EXPANSION_OUTER_ETH_VLAN] = { 486 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN), 487 .type = RTE_FLOW_ITEM_TYPE_ETH, 488 .rss_types = 0, 489 }, 490 [MLX5_EXPANSION_OUTER_VLAN] = { 491 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 492 MLX5_EXPANSION_OUTER_IPV6), 493 .type = RTE_FLOW_ITEM_TYPE_VLAN, 494 }, 495 [MLX5_EXPANSION_OUTER_IPV4] = { 496 .next = MLX5_FLOW_EXPAND_RSS_NEXT 497 (MLX5_EXPANSION_OUTER_IPV4_UDP, 498 MLX5_EXPANSION_OUTER_IPV4_TCP, 499 MLX5_EXPANSION_GRE, 500 MLX5_EXPANSION_NVGRE, 501 MLX5_EXPANSION_IPV4, 502 MLX5_EXPANSION_IPV6), 503 .type = RTE_FLOW_ITEM_TYPE_IPV4, 504 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 505 ETH_RSS_NONFRAG_IPV4_OTHER, 506 }, 507 [MLX5_EXPANSION_OUTER_IPV4_UDP] = { 508 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 509 MLX5_EXPANSION_VXLAN_GPE, 510 MLX5_EXPANSION_MPLS), 511 .type = RTE_FLOW_ITEM_TYPE_UDP, 512 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 513 }, 514 [MLX5_EXPANSION_OUTER_IPV4_TCP] = { 515 .type = RTE_FLOW_ITEM_TYPE_TCP, 516 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 517 }, 518 [MLX5_EXPANSION_OUTER_IPV6] = { 519 .next = MLX5_FLOW_EXPAND_RSS_NEXT 520 (MLX5_EXPANSION_OUTER_IPV6_UDP, 521 MLX5_EXPANSION_OUTER_IPV6_TCP, 522 MLX5_EXPANSION_IPV4, 523 MLX5_EXPANSION_IPV6, 524 MLX5_EXPANSION_GRE, 525 MLX5_EXPANSION_NVGRE), 526 .type = RTE_FLOW_ITEM_TYPE_IPV6, 527 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 528 ETH_RSS_NONFRAG_IPV6_OTHER, 529 }, 530 [MLX5_EXPANSION_OUTER_IPV6_UDP] = { 531 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 532 MLX5_EXPANSION_VXLAN_GPE, 533 MLX5_EXPANSION_MPLS), 534 .type = RTE_FLOW_ITEM_TYPE_UDP, 535 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 536 }, 537 [MLX5_EXPANSION_OUTER_IPV6_TCP] = { 538 .type = RTE_FLOW_ITEM_TYPE_TCP, 539 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 540 }, 541 [MLX5_EXPANSION_VXLAN] = { 542 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 543 MLX5_EXPANSION_IPV4, 544 MLX5_EXPANSION_IPV6), 545 .type = RTE_FLOW_ITEM_TYPE_VXLAN, 546 }, 547 [MLX5_EXPANSION_VXLAN_GPE] = { 548 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 549 MLX5_EXPANSION_IPV4, 550 MLX5_EXPANSION_IPV6), 551 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE, 552 }, 553 [MLX5_EXPANSION_GRE] = { 554 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 555 MLX5_EXPANSION_IPV6, 556 MLX5_EXPANSION_GRE_KEY, 557 MLX5_EXPANSION_MPLS), 558 .type = RTE_FLOW_ITEM_TYPE_GRE, 559 }, 560 [MLX5_EXPANSION_GRE_KEY] = { 561 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 562 MLX5_EXPANSION_IPV6, 563 MLX5_EXPANSION_MPLS), 564 .type = RTE_FLOW_ITEM_TYPE_GRE_KEY, 565 .optional = 1, 566 }, 567 [MLX5_EXPANSION_NVGRE] = { 568 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH), 569 .type = RTE_FLOW_ITEM_TYPE_NVGRE, 570 }, 571 [MLX5_EXPANSION_MPLS] = { 572 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 573 MLX5_EXPANSION_IPV6, 574 MLX5_EXPANSION_ETH), 575 .type = RTE_FLOW_ITEM_TYPE_MPLS, 576 }, 577 [MLX5_EXPANSION_ETH] = { 578 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 579 MLX5_EXPANSION_IPV6), 580 .type = RTE_FLOW_ITEM_TYPE_ETH, 581 }, 582 [MLX5_EXPANSION_ETH_VLAN] = { 583 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN), 584 .type = RTE_FLOW_ITEM_TYPE_ETH, 585 }, 586 [MLX5_EXPANSION_VLAN] = { 587 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 588 MLX5_EXPANSION_IPV6), 589 .type = RTE_FLOW_ITEM_TYPE_VLAN, 590 }, 591 [MLX5_EXPANSION_IPV4] = { 592 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP, 593 MLX5_EXPANSION_IPV4_TCP), 594 .type = RTE_FLOW_ITEM_TYPE_IPV4, 595 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 596 ETH_RSS_NONFRAG_IPV4_OTHER, 597 }, 598 [MLX5_EXPANSION_IPV4_UDP] = { 599 .type = RTE_FLOW_ITEM_TYPE_UDP, 600 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 601 }, 602 [MLX5_EXPANSION_IPV4_TCP] = { 603 .type = RTE_FLOW_ITEM_TYPE_TCP, 604 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 605 }, 606 [MLX5_EXPANSION_IPV6] = { 607 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP, 608 MLX5_EXPANSION_IPV6_TCP), 609 .type = RTE_FLOW_ITEM_TYPE_IPV6, 610 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 611 ETH_RSS_NONFRAG_IPV6_OTHER, 612 }, 613 [MLX5_EXPANSION_IPV6_UDP] = { 614 .type = RTE_FLOW_ITEM_TYPE_UDP, 615 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 616 }, 617 [MLX5_EXPANSION_IPV6_TCP] = { 618 .type = RTE_FLOW_ITEM_TYPE_TCP, 619 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 620 }, 621 }; 622 623 static struct rte_flow_action_handle * 624 mlx5_action_handle_create(struct rte_eth_dev *dev, 625 const struct rte_flow_indir_action_conf *conf, 626 const struct rte_flow_action *action, 627 struct rte_flow_error *error); 628 static int mlx5_action_handle_destroy 629 (struct rte_eth_dev *dev, 630 struct rte_flow_action_handle *handle, 631 struct rte_flow_error *error); 632 static int mlx5_action_handle_update 633 (struct rte_eth_dev *dev, 634 struct rte_flow_action_handle *handle, 635 const void *update, 636 struct rte_flow_error *error); 637 static int mlx5_action_handle_query 638 (struct rte_eth_dev *dev, 639 const struct rte_flow_action_handle *handle, 640 void *data, 641 struct rte_flow_error *error); 642 static int 643 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev, 644 struct rte_flow_tunnel *app_tunnel, 645 struct rte_flow_action **actions, 646 uint32_t *num_of_actions, 647 struct rte_flow_error *error); 648 static int 649 mlx5_flow_tunnel_match(struct rte_eth_dev *dev, 650 struct rte_flow_tunnel *app_tunnel, 651 struct rte_flow_item **items, 652 uint32_t *num_of_items, 653 struct rte_flow_error *error); 654 static int 655 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev, 656 struct rte_flow_item *pmd_items, 657 uint32_t num_items, struct rte_flow_error *err); 658 static int 659 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev, 660 struct rte_flow_action *pmd_actions, 661 uint32_t num_actions, 662 struct rte_flow_error *err); 663 static int 664 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev, 665 struct rte_mbuf *m, 666 struct rte_flow_restore_info *info, 667 struct rte_flow_error *err); 668 669 static const struct rte_flow_ops mlx5_flow_ops = { 670 .validate = mlx5_flow_validate, 671 .create = mlx5_flow_create, 672 .destroy = mlx5_flow_destroy, 673 .flush = mlx5_flow_flush, 674 .isolate = mlx5_flow_isolate, 675 .query = mlx5_flow_query, 676 .dev_dump = mlx5_flow_dev_dump, 677 .get_aged_flows = mlx5_flow_get_aged_flows, 678 .action_handle_create = mlx5_action_handle_create, 679 .action_handle_destroy = mlx5_action_handle_destroy, 680 .action_handle_update = mlx5_action_handle_update, 681 .action_handle_query = mlx5_action_handle_query, 682 .tunnel_decap_set = mlx5_flow_tunnel_decap_set, 683 .tunnel_match = mlx5_flow_tunnel_match, 684 .tunnel_action_decap_release = mlx5_flow_tunnel_action_release, 685 .tunnel_item_release = mlx5_flow_tunnel_item_release, 686 .get_restore_info = mlx5_flow_tunnel_get_restore_info, 687 }; 688 689 /* Tunnel information. */ 690 struct mlx5_flow_tunnel_info { 691 uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */ 692 uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */ 693 }; 694 695 static struct mlx5_flow_tunnel_info tunnels_info[] = { 696 { 697 .tunnel = MLX5_FLOW_LAYER_VXLAN, 698 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP, 699 }, 700 { 701 .tunnel = MLX5_FLOW_LAYER_GENEVE, 702 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP, 703 }, 704 { 705 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE, 706 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP, 707 }, 708 { 709 .tunnel = MLX5_FLOW_LAYER_GRE, 710 .ptype = RTE_PTYPE_TUNNEL_GRE, 711 }, 712 { 713 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP, 714 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP, 715 }, 716 { 717 .tunnel = MLX5_FLOW_LAYER_MPLS, 718 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE, 719 }, 720 { 721 .tunnel = MLX5_FLOW_LAYER_NVGRE, 722 .ptype = RTE_PTYPE_TUNNEL_NVGRE, 723 }, 724 { 725 .tunnel = MLX5_FLOW_LAYER_IPIP, 726 .ptype = RTE_PTYPE_TUNNEL_IP, 727 }, 728 { 729 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP, 730 .ptype = RTE_PTYPE_TUNNEL_IP, 731 }, 732 { 733 .tunnel = MLX5_FLOW_LAYER_GTP, 734 .ptype = RTE_PTYPE_TUNNEL_GTPU, 735 }, 736 }; 737 738 739 740 /** 741 * Translate tag ID to register. 742 * 743 * @param[in] dev 744 * Pointer to the Ethernet device structure. 745 * @param[in] feature 746 * The feature that request the register. 747 * @param[in] id 748 * The request register ID. 749 * @param[out] error 750 * Error description in case of any. 751 * 752 * @return 753 * The request register on success, a negative errno 754 * value otherwise and rte_errno is set. 755 */ 756 int 757 mlx5_flow_get_reg_id(struct rte_eth_dev *dev, 758 enum mlx5_feature_name feature, 759 uint32_t id, 760 struct rte_flow_error *error) 761 { 762 struct mlx5_priv *priv = dev->data->dev_private; 763 struct mlx5_dev_config *config = &priv->config; 764 enum modify_reg start_reg; 765 bool skip_mtr_reg = false; 766 767 switch (feature) { 768 case MLX5_HAIRPIN_RX: 769 return REG_B; 770 case MLX5_HAIRPIN_TX: 771 return REG_A; 772 case MLX5_METADATA_RX: 773 switch (config->dv_xmeta_en) { 774 case MLX5_XMETA_MODE_LEGACY: 775 return REG_B; 776 case MLX5_XMETA_MODE_META16: 777 return REG_C_0; 778 case MLX5_XMETA_MODE_META32: 779 return REG_C_1; 780 } 781 break; 782 case MLX5_METADATA_TX: 783 return REG_A; 784 case MLX5_METADATA_FDB: 785 switch (config->dv_xmeta_en) { 786 case MLX5_XMETA_MODE_LEGACY: 787 return REG_NON; 788 case MLX5_XMETA_MODE_META16: 789 return REG_C_0; 790 case MLX5_XMETA_MODE_META32: 791 return REG_C_1; 792 } 793 break; 794 case MLX5_FLOW_MARK: 795 switch (config->dv_xmeta_en) { 796 case MLX5_XMETA_MODE_LEGACY: 797 return REG_NON; 798 case MLX5_XMETA_MODE_META16: 799 return REG_C_1; 800 case MLX5_XMETA_MODE_META32: 801 return REG_C_0; 802 } 803 break; 804 case MLX5_MTR_ID: 805 /* 806 * If meter color and meter id share one register, flow match 807 * should use the meter color register for match. 808 */ 809 if (priv->mtr_reg_share) 810 return priv->mtr_color_reg; 811 else 812 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 813 REG_C_3; 814 case MLX5_MTR_COLOR: 815 case MLX5_ASO_FLOW_HIT: 816 case MLX5_ASO_CONNTRACK: 817 /* All features use the same REG_C. */ 818 MLX5_ASSERT(priv->mtr_color_reg != REG_NON); 819 return priv->mtr_color_reg; 820 case MLX5_COPY_MARK: 821 /* 822 * Metadata COPY_MARK register using is in meter suffix sub 823 * flow while with meter. It's safe to share the same register. 824 */ 825 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3; 826 case MLX5_APP_TAG: 827 /* 828 * If meter is enable, it will engage the register for color 829 * match and flow match. If meter color match is not using the 830 * REG_C_2, need to skip the REG_C_x be used by meter color 831 * match. 832 * If meter is disable, free to use all available registers. 833 */ 834 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 835 (priv->mtr_reg_share ? REG_C_3 : REG_C_4); 836 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2); 837 if (id > (uint32_t)(REG_C_7 - start_reg)) 838 return rte_flow_error_set(error, EINVAL, 839 RTE_FLOW_ERROR_TYPE_ITEM, 840 NULL, "invalid tag id"); 841 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON) 842 return rte_flow_error_set(error, ENOTSUP, 843 RTE_FLOW_ERROR_TYPE_ITEM, 844 NULL, "unsupported tag id"); 845 /* 846 * This case means meter is using the REG_C_x great than 2. 847 * Take care not to conflict with meter color REG_C_x. 848 * If the available index REG_C_y >= REG_C_x, skip the 849 * color register. 850 */ 851 if (skip_mtr_reg && config->flow_mreg_c 852 [id + start_reg - REG_C_0] >= priv->mtr_color_reg) { 853 if (id >= (uint32_t)(REG_C_7 - start_reg)) 854 return rte_flow_error_set(error, EINVAL, 855 RTE_FLOW_ERROR_TYPE_ITEM, 856 NULL, "invalid tag id"); 857 if (config->flow_mreg_c 858 [id + 1 + start_reg - REG_C_0] != REG_NON) 859 return config->flow_mreg_c 860 [id + 1 + start_reg - REG_C_0]; 861 return rte_flow_error_set(error, ENOTSUP, 862 RTE_FLOW_ERROR_TYPE_ITEM, 863 NULL, "unsupported tag id"); 864 } 865 return config->flow_mreg_c[id + start_reg - REG_C_0]; 866 } 867 MLX5_ASSERT(false); 868 return rte_flow_error_set(error, EINVAL, 869 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 870 NULL, "invalid feature name"); 871 } 872 873 /** 874 * Check extensive flow metadata register support. 875 * 876 * @param dev 877 * Pointer to rte_eth_dev structure. 878 * 879 * @return 880 * True if device supports extensive flow metadata register, otherwise false. 881 */ 882 bool 883 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev) 884 { 885 struct mlx5_priv *priv = dev->data->dev_private; 886 struct mlx5_dev_config *config = &priv->config; 887 888 /* 889 * Having available reg_c can be regarded inclusively as supporting 890 * extensive flow metadata register, which could mean, 891 * - metadata register copy action by modify header. 892 * - 16 modify header actions is supported. 893 * - reg_c's are preserved across different domain (FDB and NIC) on 894 * packet loopback by flow lookup miss. 895 */ 896 return config->flow_mreg_c[2] != REG_NON; 897 } 898 899 /** 900 * Get the lowest priority. 901 * 902 * @param[in] dev 903 * Pointer to the Ethernet device structure. 904 * @param[in] attributes 905 * Pointer to device flow rule attributes. 906 * 907 * @return 908 * The value of lowest priority of flow. 909 */ 910 uint32_t 911 mlx5_get_lowest_priority(struct rte_eth_dev *dev, 912 const struct rte_flow_attr *attr) 913 { 914 struct mlx5_priv *priv = dev->data->dev_private; 915 916 if (!attr->group && !attr->transfer) 917 return priv->config.flow_prio - 2; 918 return MLX5_NON_ROOT_FLOW_MAX_PRIO - 1; 919 } 920 921 /** 922 * Calculate matcher priority of the flow. 923 * 924 * @param[in] dev 925 * Pointer to the Ethernet device structure. 926 * @param[in] attr 927 * Pointer to device flow rule attributes. 928 * @param[in] subpriority 929 * The priority based on the items. 930 * @return 931 * The matcher priority of the flow. 932 */ 933 uint16_t 934 mlx5_get_matcher_priority(struct rte_eth_dev *dev, 935 const struct rte_flow_attr *attr, 936 uint32_t subpriority) 937 { 938 uint16_t priority = (uint16_t)attr->priority; 939 struct mlx5_priv *priv = dev->data->dev_private; 940 941 if (!attr->group && !attr->transfer) { 942 if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR) 943 priority = priv->config.flow_prio - 1; 944 return mlx5_os_flow_adjust_priority(dev, priority, subpriority); 945 } 946 if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR) 947 priority = MLX5_NON_ROOT_FLOW_MAX_PRIO; 948 return priority * 3 + subpriority; 949 } 950 951 /** 952 * Verify the @p item specifications (spec, last, mask) are compatible with the 953 * NIC capabilities. 954 * 955 * @param[in] item 956 * Item specification. 957 * @param[in] mask 958 * @p item->mask or flow default bit-masks. 959 * @param[in] nic_mask 960 * Bit-masks covering supported fields by the NIC to compare with user mask. 961 * @param[in] size 962 * Bit-masks size in bytes. 963 * @param[in] range_accepted 964 * True if range of values is accepted for specific fields, false otherwise. 965 * @param[out] error 966 * Pointer to error structure. 967 * 968 * @return 969 * 0 on success, a negative errno value otherwise and rte_errno is set. 970 */ 971 int 972 mlx5_flow_item_acceptable(const struct rte_flow_item *item, 973 const uint8_t *mask, 974 const uint8_t *nic_mask, 975 unsigned int size, 976 bool range_accepted, 977 struct rte_flow_error *error) 978 { 979 unsigned int i; 980 981 MLX5_ASSERT(nic_mask); 982 for (i = 0; i < size; ++i) 983 if ((nic_mask[i] | mask[i]) != nic_mask[i]) 984 return rte_flow_error_set(error, ENOTSUP, 985 RTE_FLOW_ERROR_TYPE_ITEM, 986 item, 987 "mask enables non supported" 988 " bits"); 989 if (!item->spec && (item->mask || item->last)) 990 return rte_flow_error_set(error, EINVAL, 991 RTE_FLOW_ERROR_TYPE_ITEM, item, 992 "mask/last without a spec is not" 993 " supported"); 994 if (item->spec && item->last && !range_accepted) { 995 uint8_t spec[size]; 996 uint8_t last[size]; 997 unsigned int i; 998 int ret; 999 1000 for (i = 0; i < size; ++i) { 1001 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i]; 1002 last[i] = ((const uint8_t *)item->last)[i] & mask[i]; 1003 } 1004 ret = memcmp(spec, last, size); 1005 if (ret != 0) 1006 return rte_flow_error_set(error, EINVAL, 1007 RTE_FLOW_ERROR_TYPE_ITEM, 1008 item, 1009 "range is not valid"); 1010 } 1011 return 0; 1012 } 1013 1014 /** 1015 * Adjust the hash fields according to the @p flow information. 1016 * 1017 * @param[in] dev_flow. 1018 * Pointer to the mlx5_flow. 1019 * @param[in] tunnel 1020 * 1 when the hash field is for a tunnel item. 1021 * @param[in] layer_types 1022 * ETH_RSS_* types. 1023 * @param[in] hash_fields 1024 * Item hash fields. 1025 * 1026 * @return 1027 * The hash fields that should be used. 1028 */ 1029 uint64_t 1030 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc, 1031 int tunnel __rte_unused, uint64_t layer_types, 1032 uint64_t hash_fields) 1033 { 1034 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 1035 int rss_request_inner = rss_desc->level >= 2; 1036 1037 /* Check RSS hash level for tunnel. */ 1038 if (tunnel && rss_request_inner) 1039 hash_fields |= IBV_RX_HASH_INNER; 1040 else if (tunnel || rss_request_inner) 1041 return 0; 1042 #endif 1043 /* Check if requested layer matches RSS hash fields. */ 1044 if (!(rss_desc->types & layer_types)) 1045 return 0; 1046 return hash_fields; 1047 } 1048 1049 /** 1050 * Lookup and set the ptype in the data Rx part. A single Ptype can be used, 1051 * if several tunnel rules are used on this queue, the tunnel ptype will be 1052 * cleared. 1053 * 1054 * @param rxq_ctrl 1055 * Rx queue to update. 1056 */ 1057 static void 1058 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl) 1059 { 1060 unsigned int i; 1061 uint32_t tunnel_ptype = 0; 1062 1063 /* Look up for the ptype to use. */ 1064 for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) { 1065 if (!rxq_ctrl->flow_tunnels_n[i]) 1066 continue; 1067 if (!tunnel_ptype) { 1068 tunnel_ptype = tunnels_info[i].ptype; 1069 } else { 1070 tunnel_ptype = 0; 1071 break; 1072 } 1073 } 1074 rxq_ctrl->rxq.tunnel = tunnel_ptype; 1075 } 1076 1077 /** 1078 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive 1079 * flow. 1080 * 1081 * @param[in] dev 1082 * Pointer to the Ethernet device structure. 1083 * @param[in] dev_handle 1084 * Pointer to device flow handle structure. 1085 */ 1086 void 1087 flow_drv_rxq_flags_set(struct rte_eth_dev *dev, 1088 struct mlx5_flow_handle *dev_handle) 1089 { 1090 struct mlx5_priv *priv = dev->data->dev_private; 1091 const int mark = dev_handle->mark; 1092 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 1093 struct mlx5_ind_table_obj *ind_tbl = NULL; 1094 unsigned int i; 1095 1096 if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) { 1097 struct mlx5_hrxq *hrxq; 1098 1099 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 1100 dev_handle->rix_hrxq); 1101 if (hrxq) 1102 ind_tbl = hrxq->ind_table; 1103 } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) { 1104 struct mlx5_shared_action_rss *shared_rss; 1105 1106 shared_rss = mlx5_ipool_get 1107 (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], 1108 dev_handle->rix_srss); 1109 if (shared_rss) 1110 ind_tbl = shared_rss->ind_tbl; 1111 } 1112 if (!ind_tbl) 1113 return; 1114 for (i = 0; i != ind_tbl->queues_n; ++i) { 1115 int idx = ind_tbl->queues[i]; 1116 struct mlx5_rxq_ctrl *rxq_ctrl = 1117 container_of((*priv->rxqs)[idx], 1118 struct mlx5_rxq_ctrl, rxq); 1119 1120 /* 1121 * To support metadata register copy on Tx loopback, 1122 * this must be always enabled (metadata may arive 1123 * from other port - not from local flows only. 1124 */ 1125 if (priv->config.dv_flow_en && 1126 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 1127 mlx5_flow_ext_mreg_supported(dev)) { 1128 rxq_ctrl->rxq.mark = 1; 1129 rxq_ctrl->flow_mark_n = 1; 1130 } else if (mark) { 1131 rxq_ctrl->rxq.mark = 1; 1132 rxq_ctrl->flow_mark_n++; 1133 } 1134 if (tunnel) { 1135 unsigned int j; 1136 1137 /* Increase the counter matching the flow. */ 1138 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 1139 if ((tunnels_info[j].tunnel & 1140 dev_handle->layers) == 1141 tunnels_info[j].tunnel) { 1142 rxq_ctrl->flow_tunnels_n[j]++; 1143 break; 1144 } 1145 } 1146 flow_rxq_tunnel_ptype_update(rxq_ctrl); 1147 } 1148 } 1149 } 1150 1151 /** 1152 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow 1153 * 1154 * @param[in] dev 1155 * Pointer to the Ethernet device structure. 1156 * @param[in] flow 1157 * Pointer to flow structure. 1158 */ 1159 static void 1160 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow) 1161 { 1162 struct mlx5_priv *priv = dev->data->dev_private; 1163 uint32_t handle_idx; 1164 struct mlx5_flow_handle *dev_handle; 1165 1166 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 1167 handle_idx, dev_handle, next) 1168 flow_drv_rxq_flags_set(dev, dev_handle); 1169 } 1170 1171 /** 1172 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 1173 * device flow if no other flow uses it with the same kind of request. 1174 * 1175 * @param dev 1176 * Pointer to Ethernet device. 1177 * @param[in] dev_handle 1178 * Pointer to the device flow handle structure. 1179 */ 1180 static void 1181 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, 1182 struct mlx5_flow_handle *dev_handle) 1183 { 1184 struct mlx5_priv *priv = dev->data->dev_private; 1185 const int mark = dev_handle->mark; 1186 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 1187 struct mlx5_ind_table_obj *ind_tbl = NULL; 1188 unsigned int i; 1189 1190 if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) { 1191 struct mlx5_hrxq *hrxq; 1192 1193 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 1194 dev_handle->rix_hrxq); 1195 if (hrxq) 1196 ind_tbl = hrxq->ind_table; 1197 } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) { 1198 struct mlx5_shared_action_rss *shared_rss; 1199 1200 shared_rss = mlx5_ipool_get 1201 (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], 1202 dev_handle->rix_srss); 1203 if (shared_rss) 1204 ind_tbl = shared_rss->ind_tbl; 1205 } 1206 if (!ind_tbl) 1207 return; 1208 MLX5_ASSERT(dev->data->dev_started); 1209 for (i = 0; i != ind_tbl->queues_n; ++i) { 1210 int idx = ind_tbl->queues[i]; 1211 struct mlx5_rxq_ctrl *rxq_ctrl = 1212 container_of((*priv->rxqs)[idx], 1213 struct mlx5_rxq_ctrl, rxq); 1214 1215 if (priv->config.dv_flow_en && 1216 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 1217 mlx5_flow_ext_mreg_supported(dev)) { 1218 rxq_ctrl->rxq.mark = 1; 1219 rxq_ctrl->flow_mark_n = 1; 1220 } else if (mark) { 1221 rxq_ctrl->flow_mark_n--; 1222 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n; 1223 } 1224 if (tunnel) { 1225 unsigned int j; 1226 1227 /* Decrease the counter matching the flow. */ 1228 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 1229 if ((tunnels_info[j].tunnel & 1230 dev_handle->layers) == 1231 tunnels_info[j].tunnel) { 1232 rxq_ctrl->flow_tunnels_n[j]--; 1233 break; 1234 } 1235 } 1236 flow_rxq_tunnel_ptype_update(rxq_ctrl); 1237 } 1238 } 1239 } 1240 1241 /** 1242 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 1243 * @p flow if no other flow uses it with the same kind of request. 1244 * 1245 * @param dev 1246 * Pointer to Ethernet device. 1247 * @param[in] flow 1248 * Pointer to the flow. 1249 */ 1250 static void 1251 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow) 1252 { 1253 struct mlx5_priv *priv = dev->data->dev_private; 1254 uint32_t handle_idx; 1255 struct mlx5_flow_handle *dev_handle; 1256 1257 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 1258 handle_idx, dev_handle, next) 1259 flow_drv_rxq_flags_trim(dev, dev_handle); 1260 } 1261 1262 /** 1263 * Clear the Mark/Flag and Tunnel ptype information in all Rx queues. 1264 * 1265 * @param dev 1266 * Pointer to Ethernet device. 1267 */ 1268 static void 1269 flow_rxq_flags_clear(struct rte_eth_dev *dev) 1270 { 1271 struct mlx5_priv *priv = dev->data->dev_private; 1272 unsigned int i; 1273 1274 for (i = 0; i != priv->rxqs_n; ++i) { 1275 struct mlx5_rxq_ctrl *rxq_ctrl; 1276 unsigned int j; 1277 1278 if (!(*priv->rxqs)[i]) 1279 continue; 1280 rxq_ctrl = container_of((*priv->rxqs)[i], 1281 struct mlx5_rxq_ctrl, rxq); 1282 rxq_ctrl->flow_mark_n = 0; 1283 rxq_ctrl->rxq.mark = 0; 1284 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) 1285 rxq_ctrl->flow_tunnels_n[j] = 0; 1286 rxq_ctrl->rxq.tunnel = 0; 1287 } 1288 } 1289 1290 /** 1291 * Set the Rx queue dynamic metadata (mask and offset) for a flow 1292 * 1293 * @param[in] dev 1294 * Pointer to the Ethernet device structure. 1295 */ 1296 void 1297 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev) 1298 { 1299 struct mlx5_priv *priv = dev->data->dev_private; 1300 struct mlx5_rxq_data *data; 1301 unsigned int i; 1302 1303 for (i = 0; i != priv->rxqs_n; ++i) { 1304 if (!(*priv->rxqs)[i]) 1305 continue; 1306 data = (*priv->rxqs)[i]; 1307 if (!rte_flow_dynf_metadata_avail()) { 1308 data->dynf_meta = 0; 1309 data->flow_meta_mask = 0; 1310 data->flow_meta_offset = -1; 1311 data->flow_meta_port_mask = 0; 1312 } else { 1313 data->dynf_meta = 1; 1314 data->flow_meta_mask = rte_flow_dynf_metadata_mask; 1315 data->flow_meta_offset = rte_flow_dynf_metadata_offs; 1316 data->flow_meta_port_mask = (uint32_t)~0; 1317 if (priv->config.dv_xmeta_en == MLX5_XMETA_MODE_META16) 1318 data->flow_meta_port_mask >>= 16; 1319 } 1320 } 1321 } 1322 1323 /* 1324 * return a pointer to the desired action in the list of actions. 1325 * 1326 * @param[in] actions 1327 * The list of actions to search the action in. 1328 * @param[in] action 1329 * The action to find. 1330 * 1331 * @return 1332 * Pointer to the action in the list, if found. NULL otherwise. 1333 */ 1334 const struct rte_flow_action * 1335 mlx5_flow_find_action(const struct rte_flow_action *actions, 1336 enum rte_flow_action_type action) 1337 { 1338 if (actions == NULL) 1339 return NULL; 1340 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) 1341 if (actions->type == action) 1342 return actions; 1343 return NULL; 1344 } 1345 1346 /* 1347 * Validate the flag action. 1348 * 1349 * @param[in] action_flags 1350 * Bit-fields that holds the actions detected until now. 1351 * @param[in] attr 1352 * Attributes of flow that includes this action. 1353 * @param[out] error 1354 * Pointer to error structure. 1355 * 1356 * @return 1357 * 0 on success, a negative errno value otherwise and rte_errno is set. 1358 */ 1359 int 1360 mlx5_flow_validate_action_flag(uint64_t action_flags, 1361 const struct rte_flow_attr *attr, 1362 struct rte_flow_error *error) 1363 { 1364 if (action_flags & MLX5_FLOW_ACTION_MARK) 1365 return rte_flow_error_set(error, EINVAL, 1366 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1367 "can't mark and flag in same flow"); 1368 if (action_flags & MLX5_FLOW_ACTION_FLAG) 1369 return rte_flow_error_set(error, EINVAL, 1370 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1371 "can't have 2 flag" 1372 " actions in same flow"); 1373 if (attr->egress) 1374 return rte_flow_error_set(error, ENOTSUP, 1375 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1376 "flag action not supported for " 1377 "egress"); 1378 return 0; 1379 } 1380 1381 /* 1382 * Validate the mark action. 1383 * 1384 * @param[in] action 1385 * Pointer to the queue action. 1386 * @param[in] action_flags 1387 * Bit-fields that holds the actions detected until now. 1388 * @param[in] attr 1389 * Attributes of flow that includes this action. 1390 * @param[out] error 1391 * Pointer to error structure. 1392 * 1393 * @return 1394 * 0 on success, a negative errno value otherwise and rte_errno is set. 1395 */ 1396 int 1397 mlx5_flow_validate_action_mark(const struct rte_flow_action *action, 1398 uint64_t action_flags, 1399 const struct rte_flow_attr *attr, 1400 struct rte_flow_error *error) 1401 { 1402 const struct rte_flow_action_mark *mark = action->conf; 1403 1404 if (!mark) 1405 return rte_flow_error_set(error, EINVAL, 1406 RTE_FLOW_ERROR_TYPE_ACTION, 1407 action, 1408 "configuration cannot be null"); 1409 if (mark->id >= MLX5_FLOW_MARK_MAX) 1410 return rte_flow_error_set(error, EINVAL, 1411 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1412 &mark->id, 1413 "mark id must in 0 <= id < " 1414 RTE_STR(MLX5_FLOW_MARK_MAX)); 1415 if (action_flags & MLX5_FLOW_ACTION_FLAG) 1416 return rte_flow_error_set(error, EINVAL, 1417 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1418 "can't flag and mark in same flow"); 1419 if (action_flags & MLX5_FLOW_ACTION_MARK) 1420 return rte_flow_error_set(error, EINVAL, 1421 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1422 "can't have 2 mark actions in same" 1423 " flow"); 1424 if (attr->egress) 1425 return rte_flow_error_set(error, ENOTSUP, 1426 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1427 "mark action not supported for " 1428 "egress"); 1429 return 0; 1430 } 1431 1432 /* 1433 * Validate the drop action. 1434 * 1435 * @param[in] action_flags 1436 * Bit-fields that holds the actions detected until now. 1437 * @param[in] attr 1438 * Attributes of flow that includes this action. 1439 * @param[out] error 1440 * Pointer to error structure. 1441 * 1442 * @return 1443 * 0 on success, a negative errno value otherwise and rte_errno is set. 1444 */ 1445 int 1446 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused, 1447 const struct rte_flow_attr *attr, 1448 struct rte_flow_error *error) 1449 { 1450 if (attr->egress) 1451 return rte_flow_error_set(error, ENOTSUP, 1452 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1453 "drop action not supported for " 1454 "egress"); 1455 return 0; 1456 } 1457 1458 /* 1459 * Validate the queue action. 1460 * 1461 * @param[in] action 1462 * Pointer to the queue action. 1463 * @param[in] action_flags 1464 * Bit-fields that holds the actions detected until now. 1465 * @param[in] dev 1466 * Pointer to the Ethernet device structure. 1467 * @param[in] attr 1468 * Attributes of flow that includes this action. 1469 * @param[out] error 1470 * Pointer to error structure. 1471 * 1472 * @return 1473 * 0 on success, a negative errno value otherwise and rte_errno is set. 1474 */ 1475 int 1476 mlx5_flow_validate_action_queue(const struct rte_flow_action *action, 1477 uint64_t action_flags, 1478 struct rte_eth_dev *dev, 1479 const struct rte_flow_attr *attr, 1480 struct rte_flow_error *error) 1481 { 1482 struct mlx5_priv *priv = dev->data->dev_private; 1483 const struct rte_flow_action_queue *queue = action->conf; 1484 1485 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1486 return rte_flow_error_set(error, EINVAL, 1487 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1488 "can't have 2 fate actions in" 1489 " same flow"); 1490 if (!priv->rxqs_n) 1491 return rte_flow_error_set(error, EINVAL, 1492 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1493 NULL, "No Rx queues configured"); 1494 if (queue->index >= priv->rxqs_n) 1495 return rte_flow_error_set(error, EINVAL, 1496 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1497 &queue->index, 1498 "queue index out of range"); 1499 if (!(*priv->rxqs)[queue->index]) 1500 return rte_flow_error_set(error, EINVAL, 1501 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1502 &queue->index, 1503 "queue is not configured"); 1504 if (attr->egress) 1505 return rte_flow_error_set(error, ENOTSUP, 1506 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1507 "queue action not supported for " 1508 "egress"); 1509 return 0; 1510 } 1511 1512 /* 1513 * Validate the rss action. 1514 * 1515 * @param[in] dev 1516 * Pointer to the Ethernet device structure. 1517 * @param[in] action 1518 * Pointer to the queue action. 1519 * @param[out] error 1520 * Pointer to error structure. 1521 * 1522 * @return 1523 * 0 on success, a negative errno value otherwise and rte_errno is set. 1524 */ 1525 int 1526 mlx5_validate_action_rss(struct rte_eth_dev *dev, 1527 const struct rte_flow_action *action, 1528 struct rte_flow_error *error) 1529 { 1530 struct mlx5_priv *priv = dev->data->dev_private; 1531 const struct rte_flow_action_rss *rss = action->conf; 1532 enum mlx5_rxq_type rxq_type = MLX5_RXQ_TYPE_UNDEFINED; 1533 unsigned int i; 1534 1535 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT && 1536 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) 1537 return rte_flow_error_set(error, ENOTSUP, 1538 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1539 &rss->func, 1540 "RSS hash function not supported"); 1541 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 1542 if (rss->level > 2) 1543 #else 1544 if (rss->level > 1) 1545 #endif 1546 return rte_flow_error_set(error, ENOTSUP, 1547 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1548 &rss->level, 1549 "tunnel RSS is not supported"); 1550 /* allow RSS key_len 0 in case of NULL (default) RSS key. */ 1551 if (rss->key_len == 0 && rss->key != NULL) 1552 return rte_flow_error_set(error, ENOTSUP, 1553 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1554 &rss->key_len, 1555 "RSS hash key length 0"); 1556 if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN) 1557 return rte_flow_error_set(error, ENOTSUP, 1558 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1559 &rss->key_len, 1560 "RSS hash key too small"); 1561 if (rss->key_len > MLX5_RSS_HASH_KEY_LEN) 1562 return rte_flow_error_set(error, ENOTSUP, 1563 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1564 &rss->key_len, 1565 "RSS hash key too large"); 1566 if (rss->queue_num > priv->config.ind_table_max_size) 1567 return rte_flow_error_set(error, ENOTSUP, 1568 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1569 &rss->queue_num, 1570 "number of queues too large"); 1571 if (rss->types & MLX5_RSS_HF_MASK) 1572 return rte_flow_error_set(error, ENOTSUP, 1573 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1574 &rss->types, 1575 "some RSS protocols are not" 1576 " supported"); 1577 if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) && 1578 !(rss->types & ETH_RSS_IP)) 1579 return rte_flow_error_set(error, EINVAL, 1580 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1581 "L3 partial RSS requested but L3 RSS" 1582 " type not specified"); 1583 if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) && 1584 !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP))) 1585 return rte_flow_error_set(error, EINVAL, 1586 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1587 "L4 partial RSS requested but L4 RSS" 1588 " type not specified"); 1589 if (!priv->rxqs_n) 1590 return rte_flow_error_set(error, EINVAL, 1591 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1592 NULL, "No Rx queues configured"); 1593 if (!rss->queue_num) 1594 return rte_flow_error_set(error, EINVAL, 1595 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1596 NULL, "No queues configured"); 1597 for (i = 0; i != rss->queue_num; ++i) { 1598 struct mlx5_rxq_ctrl *rxq_ctrl; 1599 1600 if (rss->queue[i] >= priv->rxqs_n) 1601 return rte_flow_error_set 1602 (error, EINVAL, 1603 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1604 &rss->queue[i], "queue index out of range"); 1605 if (!(*priv->rxqs)[rss->queue[i]]) 1606 return rte_flow_error_set 1607 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1608 &rss->queue[i], "queue is not configured"); 1609 rxq_ctrl = container_of((*priv->rxqs)[rss->queue[i]], 1610 struct mlx5_rxq_ctrl, rxq); 1611 if (i == 0) 1612 rxq_type = rxq_ctrl->type; 1613 if (rxq_type != rxq_ctrl->type) 1614 return rte_flow_error_set 1615 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1616 &rss->queue[i], 1617 "combining hairpin and regular RSS queues is not supported"); 1618 } 1619 return 0; 1620 } 1621 1622 /* 1623 * Validate the rss action. 1624 * 1625 * @param[in] action 1626 * Pointer to the queue action. 1627 * @param[in] action_flags 1628 * Bit-fields that holds the actions detected until now. 1629 * @param[in] dev 1630 * Pointer to the Ethernet device structure. 1631 * @param[in] attr 1632 * Attributes of flow that includes this action. 1633 * @param[in] item_flags 1634 * Items that were detected. 1635 * @param[out] error 1636 * Pointer to error structure. 1637 * 1638 * @return 1639 * 0 on success, a negative errno value otherwise and rte_errno is set. 1640 */ 1641 int 1642 mlx5_flow_validate_action_rss(const struct rte_flow_action *action, 1643 uint64_t action_flags, 1644 struct rte_eth_dev *dev, 1645 const struct rte_flow_attr *attr, 1646 uint64_t item_flags, 1647 struct rte_flow_error *error) 1648 { 1649 const struct rte_flow_action_rss *rss = action->conf; 1650 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1651 int ret; 1652 1653 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1654 return rte_flow_error_set(error, EINVAL, 1655 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1656 "can't have 2 fate actions" 1657 " in same flow"); 1658 ret = mlx5_validate_action_rss(dev, action, error); 1659 if (ret) 1660 return ret; 1661 if (attr->egress) 1662 return rte_flow_error_set(error, ENOTSUP, 1663 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1664 "rss action not supported for " 1665 "egress"); 1666 if (rss->level > 1 && !tunnel) 1667 return rte_flow_error_set(error, EINVAL, 1668 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1669 "inner RSS is not supported for " 1670 "non-tunnel flows"); 1671 if ((item_flags & MLX5_FLOW_LAYER_ECPRI) && 1672 !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) { 1673 return rte_flow_error_set(error, EINVAL, 1674 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1675 "RSS on eCPRI is not supported now"); 1676 } 1677 return 0; 1678 } 1679 1680 /* 1681 * Validate the default miss action. 1682 * 1683 * @param[in] action_flags 1684 * Bit-fields that holds the actions detected until now. 1685 * @param[out] error 1686 * Pointer to error structure. 1687 * 1688 * @return 1689 * 0 on success, a negative errno value otherwise and rte_errno is set. 1690 */ 1691 int 1692 mlx5_flow_validate_action_default_miss(uint64_t action_flags, 1693 const struct rte_flow_attr *attr, 1694 struct rte_flow_error *error) 1695 { 1696 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1697 return rte_flow_error_set(error, EINVAL, 1698 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1699 "can't have 2 fate actions in" 1700 " same flow"); 1701 if (attr->egress) 1702 return rte_flow_error_set(error, ENOTSUP, 1703 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1704 "default miss action not supported " 1705 "for egress"); 1706 if (attr->group) 1707 return rte_flow_error_set(error, ENOTSUP, 1708 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL, 1709 "only group 0 is supported"); 1710 if (attr->transfer) 1711 return rte_flow_error_set(error, ENOTSUP, 1712 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1713 NULL, "transfer is not supported"); 1714 return 0; 1715 } 1716 1717 /* 1718 * Validate the count action. 1719 * 1720 * @param[in] dev 1721 * Pointer to the Ethernet device structure. 1722 * @param[in] attr 1723 * Attributes of flow that includes this action. 1724 * @param[out] error 1725 * Pointer to error structure. 1726 * 1727 * @return 1728 * 0 on success, a negative errno value otherwise and rte_errno is set. 1729 */ 1730 int 1731 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused, 1732 const struct rte_flow_attr *attr, 1733 struct rte_flow_error *error) 1734 { 1735 if (attr->egress) 1736 return rte_flow_error_set(error, ENOTSUP, 1737 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1738 "count action not supported for " 1739 "egress"); 1740 return 0; 1741 } 1742 1743 /* 1744 * Validate the ASO CT action. 1745 * 1746 * @param[in] dev 1747 * Pointer to the Ethernet device structure. 1748 * @param[in] conntrack 1749 * Pointer to the CT action profile. 1750 * @param[out] error 1751 * Pointer to error structure. 1752 * 1753 * @return 1754 * 0 on success, a negative errno value otherwise and rte_errno is set. 1755 */ 1756 int 1757 mlx5_validate_action_ct(struct rte_eth_dev *dev, 1758 const struct rte_flow_action_conntrack *conntrack, 1759 struct rte_flow_error *error) 1760 { 1761 RTE_SET_USED(dev); 1762 1763 if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT) 1764 return rte_flow_error_set(error, EINVAL, 1765 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1766 "Invalid CT state"); 1767 if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST) 1768 return rte_flow_error_set(error, EINVAL, 1769 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1770 "Invalid last TCP packet flag"); 1771 return 0; 1772 } 1773 1774 /** 1775 * Verify the @p attributes will be correctly understood by the NIC and store 1776 * them in the @p flow if everything is correct. 1777 * 1778 * @param[in] dev 1779 * Pointer to the Ethernet device structure. 1780 * @param[in] attributes 1781 * Pointer to flow attributes 1782 * @param[out] error 1783 * Pointer to error structure. 1784 * 1785 * @return 1786 * 0 on success, a negative errno value otherwise and rte_errno is set. 1787 */ 1788 int 1789 mlx5_flow_validate_attributes(struct rte_eth_dev *dev, 1790 const struct rte_flow_attr *attributes, 1791 struct rte_flow_error *error) 1792 { 1793 struct mlx5_priv *priv = dev->data->dev_private; 1794 uint32_t priority_max = priv->config.flow_prio - 1; 1795 1796 if (attributes->group) 1797 return rte_flow_error_set(error, ENOTSUP, 1798 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 1799 NULL, "groups is not supported"); 1800 if (attributes->priority != MLX5_FLOW_LOWEST_PRIO_INDICATOR && 1801 attributes->priority >= priority_max) 1802 return rte_flow_error_set(error, ENOTSUP, 1803 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 1804 NULL, "priority out of range"); 1805 if (attributes->egress) 1806 return rte_flow_error_set(error, ENOTSUP, 1807 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1808 "egress is not supported"); 1809 if (attributes->transfer && !priv->config.dv_esw_en) 1810 return rte_flow_error_set(error, ENOTSUP, 1811 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1812 NULL, "transfer is not supported"); 1813 if (!attributes->ingress) 1814 return rte_flow_error_set(error, EINVAL, 1815 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, 1816 NULL, 1817 "ingress attribute is mandatory"); 1818 return 0; 1819 } 1820 1821 /** 1822 * Validate ICMP6 item. 1823 * 1824 * @param[in] item 1825 * Item specification. 1826 * @param[in] item_flags 1827 * Bit-fields that holds the items detected until now. 1828 * @param[in] ext_vlan_sup 1829 * Whether extended VLAN features are supported or not. 1830 * @param[out] error 1831 * Pointer to error structure. 1832 * 1833 * @return 1834 * 0 on success, a negative errno value otherwise and rte_errno is set. 1835 */ 1836 int 1837 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item, 1838 uint64_t item_flags, 1839 uint8_t target_protocol, 1840 struct rte_flow_error *error) 1841 { 1842 const struct rte_flow_item_icmp6 *mask = item->mask; 1843 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1844 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : 1845 MLX5_FLOW_LAYER_OUTER_L3_IPV6; 1846 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1847 MLX5_FLOW_LAYER_OUTER_L4; 1848 int ret; 1849 1850 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6) 1851 return rte_flow_error_set(error, EINVAL, 1852 RTE_FLOW_ERROR_TYPE_ITEM, item, 1853 "protocol filtering not compatible" 1854 " with ICMP6 layer"); 1855 if (!(item_flags & l3m)) 1856 return rte_flow_error_set(error, EINVAL, 1857 RTE_FLOW_ERROR_TYPE_ITEM, item, 1858 "IPv6 is mandatory to filter on" 1859 " ICMP6"); 1860 if (item_flags & l4m) 1861 return rte_flow_error_set(error, EINVAL, 1862 RTE_FLOW_ERROR_TYPE_ITEM, item, 1863 "multiple L4 layers not supported"); 1864 if (!mask) 1865 mask = &rte_flow_item_icmp6_mask; 1866 ret = mlx5_flow_item_acceptable 1867 (item, (const uint8_t *)mask, 1868 (const uint8_t *)&rte_flow_item_icmp6_mask, 1869 sizeof(struct rte_flow_item_icmp6), 1870 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 1871 if (ret < 0) 1872 return ret; 1873 return 0; 1874 } 1875 1876 /** 1877 * Validate ICMP item. 1878 * 1879 * @param[in] item 1880 * Item specification. 1881 * @param[in] item_flags 1882 * Bit-fields that holds the items detected until now. 1883 * @param[out] error 1884 * Pointer to error structure. 1885 * 1886 * @return 1887 * 0 on success, a negative errno value otherwise and rte_errno is set. 1888 */ 1889 int 1890 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item, 1891 uint64_t item_flags, 1892 uint8_t target_protocol, 1893 struct rte_flow_error *error) 1894 { 1895 const struct rte_flow_item_icmp *mask = item->mask; 1896 const struct rte_flow_item_icmp nic_mask = { 1897 .hdr.icmp_type = 0xff, 1898 .hdr.icmp_code = 0xff, 1899 .hdr.icmp_ident = RTE_BE16(0xffff), 1900 .hdr.icmp_seq_nb = RTE_BE16(0xffff), 1901 }; 1902 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1903 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : 1904 MLX5_FLOW_LAYER_OUTER_L3_IPV4; 1905 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1906 MLX5_FLOW_LAYER_OUTER_L4; 1907 int ret; 1908 1909 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP) 1910 return rte_flow_error_set(error, EINVAL, 1911 RTE_FLOW_ERROR_TYPE_ITEM, item, 1912 "protocol filtering not compatible" 1913 " with ICMP layer"); 1914 if (!(item_flags & l3m)) 1915 return rte_flow_error_set(error, EINVAL, 1916 RTE_FLOW_ERROR_TYPE_ITEM, item, 1917 "IPv4 is mandatory to filter" 1918 " on ICMP"); 1919 if (item_flags & l4m) 1920 return rte_flow_error_set(error, EINVAL, 1921 RTE_FLOW_ERROR_TYPE_ITEM, item, 1922 "multiple L4 layers not supported"); 1923 if (!mask) 1924 mask = &nic_mask; 1925 ret = mlx5_flow_item_acceptable 1926 (item, (const uint8_t *)mask, 1927 (const uint8_t *)&nic_mask, 1928 sizeof(struct rte_flow_item_icmp), 1929 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 1930 if (ret < 0) 1931 return ret; 1932 return 0; 1933 } 1934 1935 /** 1936 * Validate Ethernet item. 1937 * 1938 * @param[in] item 1939 * Item specification. 1940 * @param[in] item_flags 1941 * Bit-fields that holds the items detected until now. 1942 * @param[out] error 1943 * Pointer to error structure. 1944 * 1945 * @return 1946 * 0 on success, a negative errno value otherwise and rte_errno is set. 1947 */ 1948 int 1949 mlx5_flow_validate_item_eth(const struct rte_flow_item *item, 1950 uint64_t item_flags, bool ext_vlan_sup, 1951 struct rte_flow_error *error) 1952 { 1953 const struct rte_flow_item_eth *mask = item->mask; 1954 const struct rte_flow_item_eth nic_mask = { 1955 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1956 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1957 .type = RTE_BE16(0xffff), 1958 .has_vlan = ext_vlan_sup ? 1 : 0, 1959 }; 1960 int ret; 1961 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1962 const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 1963 MLX5_FLOW_LAYER_OUTER_L2; 1964 1965 if (item_flags & ethm) 1966 return rte_flow_error_set(error, ENOTSUP, 1967 RTE_FLOW_ERROR_TYPE_ITEM, item, 1968 "multiple L2 layers not supported"); 1969 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) || 1970 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3))) 1971 return rte_flow_error_set(error, EINVAL, 1972 RTE_FLOW_ERROR_TYPE_ITEM, item, 1973 "L2 layer should not follow " 1974 "L3 layers"); 1975 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) || 1976 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN))) 1977 return rte_flow_error_set(error, EINVAL, 1978 RTE_FLOW_ERROR_TYPE_ITEM, item, 1979 "L2 layer should not follow VLAN"); 1980 if (!mask) 1981 mask = &rte_flow_item_eth_mask; 1982 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1983 (const uint8_t *)&nic_mask, 1984 sizeof(struct rte_flow_item_eth), 1985 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 1986 return ret; 1987 } 1988 1989 /** 1990 * Validate VLAN item. 1991 * 1992 * @param[in] item 1993 * Item specification. 1994 * @param[in] item_flags 1995 * Bit-fields that holds the items detected until now. 1996 * @param[in] dev 1997 * Ethernet device flow is being created on. 1998 * @param[out] error 1999 * Pointer to error structure. 2000 * 2001 * @return 2002 * 0 on success, a negative errno value otherwise and rte_errno is set. 2003 */ 2004 int 2005 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item, 2006 uint64_t item_flags, 2007 struct rte_eth_dev *dev, 2008 struct rte_flow_error *error) 2009 { 2010 const struct rte_flow_item_vlan *spec = item->spec; 2011 const struct rte_flow_item_vlan *mask = item->mask; 2012 const struct rte_flow_item_vlan nic_mask = { 2013 .tci = RTE_BE16(UINT16_MAX), 2014 .inner_type = RTE_BE16(UINT16_MAX), 2015 }; 2016 uint16_t vlan_tag = 0; 2017 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 2018 int ret; 2019 const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | 2020 MLX5_FLOW_LAYER_INNER_L4) : 2021 (MLX5_FLOW_LAYER_OUTER_L3 | 2022 MLX5_FLOW_LAYER_OUTER_L4); 2023 const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : 2024 MLX5_FLOW_LAYER_OUTER_VLAN; 2025 2026 if (item_flags & vlanm) 2027 return rte_flow_error_set(error, EINVAL, 2028 RTE_FLOW_ERROR_TYPE_ITEM, item, 2029 "multiple VLAN layers not supported"); 2030 else if ((item_flags & l34m) != 0) 2031 return rte_flow_error_set(error, EINVAL, 2032 RTE_FLOW_ERROR_TYPE_ITEM, item, 2033 "VLAN cannot follow L3/L4 layer"); 2034 if (!mask) 2035 mask = &rte_flow_item_vlan_mask; 2036 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 2037 (const uint8_t *)&nic_mask, 2038 sizeof(struct rte_flow_item_vlan), 2039 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2040 if (ret) 2041 return ret; 2042 if (!tunnel && mask->tci != RTE_BE16(0x0fff)) { 2043 struct mlx5_priv *priv = dev->data->dev_private; 2044 2045 if (priv->vmwa_context) { 2046 /* 2047 * Non-NULL context means we have a virtual machine 2048 * and SR-IOV enabled, we have to create VLAN interface 2049 * to make hypervisor to setup E-Switch vport 2050 * context correctly. We avoid creating the multiple 2051 * VLAN interfaces, so we cannot support VLAN tag mask. 2052 */ 2053 return rte_flow_error_set(error, EINVAL, 2054 RTE_FLOW_ERROR_TYPE_ITEM, 2055 item, 2056 "VLAN tag mask is not" 2057 " supported in virtual" 2058 " environment"); 2059 } 2060 } 2061 if (spec) { 2062 vlan_tag = spec->tci; 2063 vlan_tag &= mask->tci; 2064 } 2065 /* 2066 * From verbs perspective an empty VLAN is equivalent 2067 * to a packet without VLAN layer. 2068 */ 2069 if (!vlan_tag) 2070 return rte_flow_error_set(error, EINVAL, 2071 RTE_FLOW_ERROR_TYPE_ITEM_SPEC, 2072 item->spec, 2073 "VLAN cannot be empty"); 2074 return 0; 2075 } 2076 2077 /** 2078 * Validate IPV4 item. 2079 * 2080 * @param[in] item 2081 * Item specification. 2082 * @param[in] item_flags 2083 * Bit-fields that holds the items detected until now. 2084 * @param[in] last_item 2085 * Previous validated item in the pattern items. 2086 * @param[in] ether_type 2087 * Type in the ethernet layer header (including dot1q). 2088 * @param[in] acc_mask 2089 * Acceptable mask, if NULL default internal default mask 2090 * will be used to check whether item fields are supported. 2091 * @param[in] range_accepted 2092 * True if range of values is accepted for specific fields, false otherwise. 2093 * @param[out] error 2094 * Pointer to error structure. 2095 * 2096 * @return 2097 * 0 on success, a negative errno value otherwise and rte_errno is set. 2098 */ 2099 int 2100 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, 2101 uint64_t item_flags, 2102 uint64_t last_item, 2103 uint16_t ether_type, 2104 const struct rte_flow_item_ipv4 *acc_mask, 2105 bool range_accepted, 2106 struct rte_flow_error *error) 2107 { 2108 const struct rte_flow_item_ipv4 *mask = item->mask; 2109 const struct rte_flow_item_ipv4 *spec = item->spec; 2110 const struct rte_flow_item_ipv4 nic_mask = { 2111 .hdr = { 2112 .src_addr = RTE_BE32(0xffffffff), 2113 .dst_addr = RTE_BE32(0xffffffff), 2114 .type_of_service = 0xff, 2115 .next_proto_id = 0xff, 2116 }, 2117 }; 2118 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 2119 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 2120 MLX5_FLOW_LAYER_OUTER_L3; 2121 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 2122 MLX5_FLOW_LAYER_OUTER_L4; 2123 int ret; 2124 uint8_t next_proto = 0xFF; 2125 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 2126 MLX5_FLOW_LAYER_OUTER_VLAN | 2127 MLX5_FLOW_LAYER_INNER_VLAN); 2128 2129 if ((last_item & l2_vlan) && ether_type && 2130 ether_type != RTE_ETHER_TYPE_IPV4) 2131 return rte_flow_error_set(error, EINVAL, 2132 RTE_FLOW_ERROR_TYPE_ITEM, item, 2133 "IPv4 cannot follow L2/VLAN layer " 2134 "which ether type is not IPv4"); 2135 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) { 2136 if (mask && spec) 2137 next_proto = mask->hdr.next_proto_id & 2138 spec->hdr.next_proto_id; 2139 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 2140 return rte_flow_error_set(error, EINVAL, 2141 RTE_FLOW_ERROR_TYPE_ITEM, 2142 item, 2143 "multiple tunnel " 2144 "not supported"); 2145 } 2146 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) 2147 return rte_flow_error_set(error, EINVAL, 2148 RTE_FLOW_ERROR_TYPE_ITEM, item, 2149 "wrong tunnel type - IPv6 specified " 2150 "but IPv4 item provided"); 2151 if (item_flags & l3m) 2152 return rte_flow_error_set(error, ENOTSUP, 2153 RTE_FLOW_ERROR_TYPE_ITEM, item, 2154 "multiple L3 layers not supported"); 2155 else if (item_flags & l4m) 2156 return rte_flow_error_set(error, EINVAL, 2157 RTE_FLOW_ERROR_TYPE_ITEM, item, 2158 "L3 cannot follow an L4 layer."); 2159 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 2160 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 2161 return rte_flow_error_set(error, EINVAL, 2162 RTE_FLOW_ERROR_TYPE_ITEM, item, 2163 "L3 cannot follow an NVGRE layer."); 2164 if (!mask) 2165 mask = &rte_flow_item_ipv4_mask; 2166 else if (mask->hdr.next_proto_id != 0 && 2167 mask->hdr.next_proto_id != 0xff) 2168 return rte_flow_error_set(error, EINVAL, 2169 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 2170 "partial mask is not supported" 2171 " for protocol"); 2172 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 2173 acc_mask ? (const uint8_t *)acc_mask 2174 : (const uint8_t *)&nic_mask, 2175 sizeof(struct rte_flow_item_ipv4), 2176 range_accepted, error); 2177 if (ret < 0) 2178 return ret; 2179 return 0; 2180 } 2181 2182 /** 2183 * Validate IPV6 item. 2184 * 2185 * @param[in] item 2186 * Item specification. 2187 * @param[in] item_flags 2188 * Bit-fields that holds the items detected until now. 2189 * @param[in] last_item 2190 * Previous validated item in the pattern items. 2191 * @param[in] ether_type 2192 * Type in the ethernet layer header (including dot1q). 2193 * @param[in] acc_mask 2194 * Acceptable mask, if NULL default internal default mask 2195 * will be used to check whether item fields are supported. 2196 * @param[out] error 2197 * Pointer to error structure. 2198 * 2199 * @return 2200 * 0 on success, a negative errno value otherwise and rte_errno is set. 2201 */ 2202 int 2203 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, 2204 uint64_t item_flags, 2205 uint64_t last_item, 2206 uint16_t ether_type, 2207 const struct rte_flow_item_ipv6 *acc_mask, 2208 struct rte_flow_error *error) 2209 { 2210 const struct rte_flow_item_ipv6 *mask = item->mask; 2211 const struct rte_flow_item_ipv6 *spec = item->spec; 2212 const struct rte_flow_item_ipv6 nic_mask = { 2213 .hdr = { 2214 .src_addr = 2215 "\xff\xff\xff\xff\xff\xff\xff\xff" 2216 "\xff\xff\xff\xff\xff\xff\xff\xff", 2217 .dst_addr = 2218 "\xff\xff\xff\xff\xff\xff\xff\xff" 2219 "\xff\xff\xff\xff\xff\xff\xff\xff", 2220 .vtc_flow = RTE_BE32(0xffffffff), 2221 .proto = 0xff, 2222 }, 2223 }; 2224 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 2225 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 2226 MLX5_FLOW_LAYER_OUTER_L3; 2227 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 2228 MLX5_FLOW_LAYER_OUTER_L4; 2229 int ret; 2230 uint8_t next_proto = 0xFF; 2231 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 2232 MLX5_FLOW_LAYER_OUTER_VLAN | 2233 MLX5_FLOW_LAYER_INNER_VLAN); 2234 2235 if ((last_item & l2_vlan) && ether_type && 2236 ether_type != RTE_ETHER_TYPE_IPV6) 2237 return rte_flow_error_set(error, EINVAL, 2238 RTE_FLOW_ERROR_TYPE_ITEM, item, 2239 "IPv6 cannot follow L2/VLAN layer " 2240 "which ether type is not IPv6"); 2241 if (mask && mask->hdr.proto == UINT8_MAX && spec) 2242 next_proto = spec->hdr.proto; 2243 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) { 2244 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 2245 return rte_flow_error_set(error, EINVAL, 2246 RTE_FLOW_ERROR_TYPE_ITEM, 2247 item, 2248 "multiple tunnel " 2249 "not supported"); 2250 } 2251 if (next_proto == IPPROTO_HOPOPTS || 2252 next_proto == IPPROTO_ROUTING || 2253 next_proto == IPPROTO_FRAGMENT || 2254 next_proto == IPPROTO_ESP || 2255 next_proto == IPPROTO_AH || 2256 next_proto == IPPROTO_DSTOPTS) 2257 return rte_flow_error_set(error, EINVAL, 2258 RTE_FLOW_ERROR_TYPE_ITEM, item, 2259 "IPv6 proto (next header) should " 2260 "not be set as extension header"); 2261 if (item_flags & MLX5_FLOW_LAYER_IPIP) 2262 return rte_flow_error_set(error, EINVAL, 2263 RTE_FLOW_ERROR_TYPE_ITEM, item, 2264 "wrong tunnel type - IPv4 specified " 2265 "but IPv6 item provided"); 2266 if (item_flags & l3m) 2267 return rte_flow_error_set(error, ENOTSUP, 2268 RTE_FLOW_ERROR_TYPE_ITEM, item, 2269 "multiple L3 layers not supported"); 2270 else if (item_flags & l4m) 2271 return rte_flow_error_set(error, EINVAL, 2272 RTE_FLOW_ERROR_TYPE_ITEM, item, 2273 "L3 cannot follow an L4 layer."); 2274 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 2275 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 2276 return rte_flow_error_set(error, EINVAL, 2277 RTE_FLOW_ERROR_TYPE_ITEM, item, 2278 "L3 cannot follow an NVGRE layer."); 2279 if (!mask) 2280 mask = &rte_flow_item_ipv6_mask; 2281 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 2282 acc_mask ? (const uint8_t *)acc_mask 2283 : (const uint8_t *)&nic_mask, 2284 sizeof(struct rte_flow_item_ipv6), 2285 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2286 if (ret < 0) 2287 return ret; 2288 return 0; 2289 } 2290 2291 /** 2292 * Validate UDP item. 2293 * 2294 * @param[in] item 2295 * Item specification. 2296 * @param[in] item_flags 2297 * Bit-fields that holds the items detected until now. 2298 * @param[in] target_protocol 2299 * The next protocol in the previous item. 2300 * @param[in] flow_mask 2301 * mlx5 flow-specific (DV, verbs, etc.) supported header fields mask. 2302 * @param[out] error 2303 * Pointer to error structure. 2304 * 2305 * @return 2306 * 0 on success, a negative errno value otherwise and rte_errno is set. 2307 */ 2308 int 2309 mlx5_flow_validate_item_udp(const struct rte_flow_item *item, 2310 uint64_t item_flags, 2311 uint8_t target_protocol, 2312 struct rte_flow_error *error) 2313 { 2314 const struct rte_flow_item_udp *mask = item->mask; 2315 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 2316 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 2317 MLX5_FLOW_LAYER_OUTER_L3; 2318 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 2319 MLX5_FLOW_LAYER_OUTER_L4; 2320 int ret; 2321 2322 if (target_protocol != 0xff && target_protocol != IPPROTO_UDP) 2323 return rte_flow_error_set(error, EINVAL, 2324 RTE_FLOW_ERROR_TYPE_ITEM, item, 2325 "protocol filtering not compatible" 2326 " with UDP layer"); 2327 if (!(item_flags & l3m)) 2328 return rte_flow_error_set(error, EINVAL, 2329 RTE_FLOW_ERROR_TYPE_ITEM, item, 2330 "L3 is mandatory to filter on L4"); 2331 if (item_flags & l4m) 2332 return rte_flow_error_set(error, EINVAL, 2333 RTE_FLOW_ERROR_TYPE_ITEM, item, 2334 "multiple L4 layers not supported"); 2335 if (!mask) 2336 mask = &rte_flow_item_udp_mask; 2337 ret = mlx5_flow_item_acceptable 2338 (item, (const uint8_t *)mask, 2339 (const uint8_t *)&rte_flow_item_udp_mask, 2340 sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED, 2341 error); 2342 if (ret < 0) 2343 return ret; 2344 return 0; 2345 } 2346 2347 /** 2348 * Validate TCP item. 2349 * 2350 * @param[in] item 2351 * Item specification. 2352 * @param[in] item_flags 2353 * Bit-fields that holds the items detected until now. 2354 * @param[in] target_protocol 2355 * The next protocol in the previous item. 2356 * @param[out] error 2357 * Pointer to error structure. 2358 * 2359 * @return 2360 * 0 on success, a negative errno value otherwise and rte_errno is set. 2361 */ 2362 int 2363 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item, 2364 uint64_t item_flags, 2365 uint8_t target_protocol, 2366 const struct rte_flow_item_tcp *flow_mask, 2367 struct rte_flow_error *error) 2368 { 2369 const struct rte_flow_item_tcp *mask = item->mask; 2370 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 2371 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 2372 MLX5_FLOW_LAYER_OUTER_L3; 2373 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 2374 MLX5_FLOW_LAYER_OUTER_L4; 2375 int ret; 2376 2377 MLX5_ASSERT(flow_mask); 2378 if (target_protocol != 0xff && target_protocol != IPPROTO_TCP) 2379 return rte_flow_error_set(error, EINVAL, 2380 RTE_FLOW_ERROR_TYPE_ITEM, item, 2381 "protocol filtering not compatible" 2382 " with TCP layer"); 2383 if (!(item_flags & l3m)) 2384 return rte_flow_error_set(error, EINVAL, 2385 RTE_FLOW_ERROR_TYPE_ITEM, item, 2386 "L3 is mandatory to filter on L4"); 2387 if (item_flags & l4m) 2388 return rte_flow_error_set(error, EINVAL, 2389 RTE_FLOW_ERROR_TYPE_ITEM, item, 2390 "multiple L4 layers not supported"); 2391 if (!mask) 2392 mask = &rte_flow_item_tcp_mask; 2393 ret = mlx5_flow_item_acceptable 2394 (item, (const uint8_t *)mask, 2395 (const uint8_t *)flow_mask, 2396 sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED, 2397 error); 2398 if (ret < 0) 2399 return ret; 2400 return 0; 2401 } 2402 2403 /** 2404 * Validate VXLAN item. 2405 * 2406 * @param[in] item 2407 * Item specification. 2408 * @param[in] item_flags 2409 * Bit-fields that holds the items detected until now. 2410 * @param[in] target_protocol 2411 * The next protocol in the previous item. 2412 * @param[out] error 2413 * Pointer to error structure. 2414 * 2415 * @return 2416 * 0 on success, a negative errno value otherwise and rte_errno is set. 2417 */ 2418 int 2419 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item, 2420 uint64_t item_flags, 2421 struct rte_flow_error *error) 2422 { 2423 const struct rte_flow_item_vxlan *spec = item->spec; 2424 const struct rte_flow_item_vxlan *mask = item->mask; 2425 int ret; 2426 union vni { 2427 uint32_t vlan_id; 2428 uint8_t vni[4]; 2429 } id = { .vlan_id = 0, }; 2430 2431 2432 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2433 return rte_flow_error_set(error, ENOTSUP, 2434 RTE_FLOW_ERROR_TYPE_ITEM, item, 2435 "multiple tunnel layers not" 2436 " supported"); 2437 /* 2438 * Verify only UDPv4 is present as defined in 2439 * https://tools.ietf.org/html/rfc7348 2440 */ 2441 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2442 return rte_flow_error_set(error, EINVAL, 2443 RTE_FLOW_ERROR_TYPE_ITEM, item, 2444 "no outer UDP layer found"); 2445 if (!mask) 2446 mask = &rte_flow_item_vxlan_mask; 2447 ret = mlx5_flow_item_acceptable 2448 (item, (const uint8_t *)mask, 2449 (const uint8_t *)&rte_flow_item_vxlan_mask, 2450 sizeof(struct rte_flow_item_vxlan), 2451 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2452 if (ret < 0) 2453 return ret; 2454 if (spec) { 2455 memcpy(&id.vni[1], spec->vni, 3); 2456 memcpy(&id.vni[1], mask->vni, 3); 2457 } 2458 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2459 return rte_flow_error_set(error, ENOTSUP, 2460 RTE_FLOW_ERROR_TYPE_ITEM, item, 2461 "VXLAN tunnel must be fully defined"); 2462 return 0; 2463 } 2464 2465 /** 2466 * Validate VXLAN_GPE item. 2467 * 2468 * @param[in] item 2469 * Item specification. 2470 * @param[in] item_flags 2471 * Bit-fields that holds the items detected until now. 2472 * @param[in] priv 2473 * Pointer to the private data structure. 2474 * @param[in] target_protocol 2475 * The next protocol in the previous item. 2476 * @param[out] error 2477 * Pointer to error structure. 2478 * 2479 * @return 2480 * 0 on success, a negative errno value otherwise and rte_errno is set. 2481 */ 2482 int 2483 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item, 2484 uint64_t item_flags, 2485 struct rte_eth_dev *dev, 2486 struct rte_flow_error *error) 2487 { 2488 struct mlx5_priv *priv = dev->data->dev_private; 2489 const struct rte_flow_item_vxlan_gpe *spec = item->spec; 2490 const struct rte_flow_item_vxlan_gpe *mask = item->mask; 2491 int ret; 2492 union vni { 2493 uint32_t vlan_id; 2494 uint8_t vni[4]; 2495 } id = { .vlan_id = 0, }; 2496 2497 if (!priv->config.l3_vxlan_en) 2498 return rte_flow_error_set(error, ENOTSUP, 2499 RTE_FLOW_ERROR_TYPE_ITEM, item, 2500 "L3 VXLAN is not enabled by device" 2501 " parameter and/or not configured in" 2502 " firmware"); 2503 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2504 return rte_flow_error_set(error, ENOTSUP, 2505 RTE_FLOW_ERROR_TYPE_ITEM, item, 2506 "multiple tunnel layers not" 2507 " supported"); 2508 /* 2509 * Verify only UDPv4 is present as defined in 2510 * https://tools.ietf.org/html/rfc7348 2511 */ 2512 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2513 return rte_flow_error_set(error, EINVAL, 2514 RTE_FLOW_ERROR_TYPE_ITEM, item, 2515 "no outer UDP layer found"); 2516 if (!mask) 2517 mask = &rte_flow_item_vxlan_gpe_mask; 2518 ret = mlx5_flow_item_acceptable 2519 (item, (const uint8_t *)mask, 2520 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask, 2521 sizeof(struct rte_flow_item_vxlan_gpe), 2522 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2523 if (ret < 0) 2524 return ret; 2525 if (spec) { 2526 if (spec->protocol) 2527 return rte_flow_error_set(error, ENOTSUP, 2528 RTE_FLOW_ERROR_TYPE_ITEM, 2529 item, 2530 "VxLAN-GPE protocol" 2531 " not supported"); 2532 memcpy(&id.vni[1], spec->vni, 3); 2533 memcpy(&id.vni[1], mask->vni, 3); 2534 } 2535 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2536 return rte_flow_error_set(error, ENOTSUP, 2537 RTE_FLOW_ERROR_TYPE_ITEM, item, 2538 "VXLAN-GPE tunnel must be fully" 2539 " defined"); 2540 return 0; 2541 } 2542 /** 2543 * Validate GRE Key item. 2544 * 2545 * @param[in] item 2546 * Item specification. 2547 * @param[in] item_flags 2548 * Bit flags to mark detected items. 2549 * @param[in] gre_item 2550 * Pointer to gre_item 2551 * @param[out] error 2552 * Pointer to error structure. 2553 * 2554 * @return 2555 * 0 on success, a negative errno value otherwise and rte_errno is set. 2556 */ 2557 int 2558 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item, 2559 uint64_t item_flags, 2560 const struct rte_flow_item *gre_item, 2561 struct rte_flow_error *error) 2562 { 2563 const rte_be32_t *mask = item->mask; 2564 int ret = 0; 2565 rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX); 2566 const struct rte_flow_item_gre *gre_spec; 2567 const struct rte_flow_item_gre *gre_mask; 2568 2569 if (item_flags & MLX5_FLOW_LAYER_GRE_KEY) 2570 return rte_flow_error_set(error, ENOTSUP, 2571 RTE_FLOW_ERROR_TYPE_ITEM, item, 2572 "Multiple GRE key not support"); 2573 if (!(item_flags & MLX5_FLOW_LAYER_GRE)) 2574 return rte_flow_error_set(error, ENOTSUP, 2575 RTE_FLOW_ERROR_TYPE_ITEM, item, 2576 "No preceding GRE header"); 2577 if (item_flags & MLX5_FLOW_LAYER_INNER) 2578 return rte_flow_error_set(error, ENOTSUP, 2579 RTE_FLOW_ERROR_TYPE_ITEM, item, 2580 "GRE key following a wrong item"); 2581 gre_mask = gre_item->mask; 2582 if (!gre_mask) 2583 gre_mask = &rte_flow_item_gre_mask; 2584 gre_spec = gre_item->spec; 2585 if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) && 2586 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000))) 2587 return rte_flow_error_set(error, EINVAL, 2588 RTE_FLOW_ERROR_TYPE_ITEM, item, 2589 "Key bit must be on"); 2590 2591 if (!mask) 2592 mask = &gre_key_default_mask; 2593 ret = mlx5_flow_item_acceptable 2594 (item, (const uint8_t *)mask, 2595 (const uint8_t *)&gre_key_default_mask, 2596 sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2597 return ret; 2598 } 2599 2600 /** 2601 * Validate GRE item. 2602 * 2603 * @param[in] item 2604 * Item specification. 2605 * @param[in] item_flags 2606 * Bit flags to mark detected items. 2607 * @param[in] target_protocol 2608 * The next protocol in the previous item. 2609 * @param[out] error 2610 * Pointer to error structure. 2611 * 2612 * @return 2613 * 0 on success, a negative errno value otherwise and rte_errno is set. 2614 */ 2615 int 2616 mlx5_flow_validate_item_gre(const struct rte_flow_item *item, 2617 uint64_t item_flags, 2618 uint8_t target_protocol, 2619 struct rte_flow_error *error) 2620 { 2621 const struct rte_flow_item_gre *spec __rte_unused = item->spec; 2622 const struct rte_flow_item_gre *mask = item->mask; 2623 int ret; 2624 const struct rte_flow_item_gre nic_mask = { 2625 .c_rsvd0_ver = RTE_BE16(0xB000), 2626 .protocol = RTE_BE16(UINT16_MAX), 2627 }; 2628 2629 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2630 return rte_flow_error_set(error, EINVAL, 2631 RTE_FLOW_ERROR_TYPE_ITEM, item, 2632 "protocol filtering not compatible" 2633 " with this GRE layer"); 2634 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2635 return rte_flow_error_set(error, ENOTSUP, 2636 RTE_FLOW_ERROR_TYPE_ITEM, item, 2637 "multiple tunnel layers not" 2638 " supported"); 2639 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2640 return rte_flow_error_set(error, ENOTSUP, 2641 RTE_FLOW_ERROR_TYPE_ITEM, item, 2642 "L3 Layer is missing"); 2643 if (!mask) 2644 mask = &rte_flow_item_gre_mask; 2645 ret = mlx5_flow_item_acceptable 2646 (item, (const uint8_t *)mask, 2647 (const uint8_t *)&nic_mask, 2648 sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED, 2649 error); 2650 if (ret < 0) 2651 return ret; 2652 #ifndef HAVE_MLX5DV_DR 2653 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT 2654 if (spec && (spec->protocol & mask->protocol)) 2655 return rte_flow_error_set(error, ENOTSUP, 2656 RTE_FLOW_ERROR_TYPE_ITEM, item, 2657 "without MPLS support the" 2658 " specification cannot be used for" 2659 " filtering"); 2660 #endif 2661 #endif 2662 return 0; 2663 } 2664 2665 /** 2666 * Validate Geneve item. 2667 * 2668 * @param[in] item 2669 * Item specification. 2670 * @param[in] itemFlags 2671 * Bit-fields that holds the items detected until now. 2672 * @param[in] enPriv 2673 * Pointer to the private data structure. 2674 * @param[out] error 2675 * Pointer to error structure. 2676 * 2677 * @return 2678 * 0 on success, a negative errno value otherwise and rte_errno is set. 2679 */ 2680 2681 int 2682 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item, 2683 uint64_t item_flags, 2684 struct rte_eth_dev *dev, 2685 struct rte_flow_error *error) 2686 { 2687 struct mlx5_priv *priv = dev->data->dev_private; 2688 const struct rte_flow_item_geneve *spec = item->spec; 2689 const struct rte_flow_item_geneve *mask = item->mask; 2690 int ret; 2691 uint16_t gbhdr; 2692 uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ? 2693 MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0; 2694 const struct rte_flow_item_geneve nic_mask = { 2695 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80), 2696 .vni = "\xff\xff\xff", 2697 .protocol = RTE_BE16(UINT16_MAX), 2698 }; 2699 2700 if (!priv->config.hca_attr.tunnel_stateless_geneve_rx) 2701 return rte_flow_error_set(error, ENOTSUP, 2702 RTE_FLOW_ERROR_TYPE_ITEM, item, 2703 "L3 Geneve is not enabled by device" 2704 " parameter and/or not configured in" 2705 " firmware"); 2706 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2707 return rte_flow_error_set(error, ENOTSUP, 2708 RTE_FLOW_ERROR_TYPE_ITEM, item, 2709 "multiple tunnel layers not" 2710 " supported"); 2711 /* 2712 * Verify only UDPv4 is present as defined in 2713 * https://tools.ietf.org/html/rfc7348 2714 */ 2715 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2716 return rte_flow_error_set(error, EINVAL, 2717 RTE_FLOW_ERROR_TYPE_ITEM, item, 2718 "no outer UDP layer found"); 2719 if (!mask) 2720 mask = &rte_flow_item_geneve_mask; 2721 ret = mlx5_flow_item_acceptable 2722 (item, (const uint8_t *)mask, 2723 (const uint8_t *)&nic_mask, 2724 sizeof(struct rte_flow_item_geneve), 2725 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2726 if (ret) 2727 return ret; 2728 if (spec) { 2729 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0); 2730 if (MLX5_GENEVE_VER_VAL(gbhdr) || 2731 MLX5_GENEVE_CRITO_VAL(gbhdr) || 2732 MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1) 2733 return rte_flow_error_set(error, ENOTSUP, 2734 RTE_FLOW_ERROR_TYPE_ITEM, 2735 item, 2736 "Geneve protocol unsupported" 2737 " fields are being used"); 2738 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len) 2739 return rte_flow_error_set 2740 (error, ENOTSUP, 2741 RTE_FLOW_ERROR_TYPE_ITEM, 2742 item, 2743 "Unsupported Geneve options length"); 2744 } 2745 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2746 return rte_flow_error_set 2747 (error, ENOTSUP, 2748 RTE_FLOW_ERROR_TYPE_ITEM, item, 2749 "Geneve tunnel must be fully defined"); 2750 return 0; 2751 } 2752 2753 /** 2754 * Validate Geneve TLV option item. 2755 * 2756 * @param[in] item 2757 * Item specification. 2758 * @param[in] last_item 2759 * Previous validated item in the pattern items. 2760 * @param[in] geneve_item 2761 * Previous GENEVE item specification. 2762 * @param[in] dev 2763 * Pointer to the rte_eth_dev structure. 2764 * @param[out] error 2765 * Pointer to error structure. 2766 * 2767 * @return 2768 * 0 on success, a negative errno value otherwise and rte_errno is set. 2769 */ 2770 int 2771 mlx5_flow_validate_item_geneve_opt(const struct rte_flow_item *item, 2772 uint64_t last_item, 2773 const struct rte_flow_item *geneve_item, 2774 struct rte_eth_dev *dev, 2775 struct rte_flow_error *error) 2776 { 2777 struct mlx5_priv *priv = dev->data->dev_private; 2778 struct mlx5_dev_ctx_shared *sh = priv->sh; 2779 struct mlx5_geneve_tlv_option_resource *geneve_opt_resource; 2780 struct mlx5_hca_attr *hca_attr = &priv->config.hca_attr; 2781 uint8_t data_max_supported = 2782 hca_attr->max_geneve_tlv_option_data_len * 4; 2783 struct mlx5_dev_config *config = &priv->config; 2784 const struct rte_flow_item_geneve *geneve_spec; 2785 const struct rte_flow_item_geneve *geneve_mask; 2786 const struct rte_flow_item_geneve_opt *spec = item->spec; 2787 const struct rte_flow_item_geneve_opt *mask = item->mask; 2788 unsigned int i; 2789 unsigned int data_len; 2790 uint8_t tlv_option_len; 2791 uint16_t optlen_m, optlen_v; 2792 const struct rte_flow_item_geneve_opt full_mask = { 2793 .option_class = RTE_BE16(0xffff), 2794 .option_type = 0xff, 2795 .option_len = 0x1f, 2796 }; 2797 2798 if (!mask) 2799 mask = &rte_flow_item_geneve_opt_mask; 2800 if (!spec) 2801 return rte_flow_error_set 2802 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item, 2803 "Geneve TLV opt class/type/length must be specified"); 2804 if ((uint32_t)spec->option_len > MLX5_GENEVE_OPTLEN_MASK) 2805 return rte_flow_error_set 2806 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item, 2807 "Geneve TLV opt length exceeeds the limit (31)"); 2808 /* Check if class type and length masks are full. */ 2809 if (full_mask.option_class != mask->option_class || 2810 full_mask.option_type != mask->option_type || 2811 full_mask.option_len != (mask->option_len & full_mask.option_len)) 2812 return rte_flow_error_set 2813 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item, 2814 "Geneve TLV opt class/type/length masks must be full"); 2815 /* Check if length is supported */ 2816 if ((uint32_t)spec->option_len > 2817 config->hca_attr.max_geneve_tlv_option_data_len) 2818 return rte_flow_error_set 2819 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item, 2820 "Geneve TLV opt length not supported"); 2821 if (config->hca_attr.max_geneve_tlv_options > 1) 2822 DRV_LOG(DEBUG, 2823 "max_geneve_tlv_options supports more than 1 option"); 2824 /* Check GENEVE item preceding. */ 2825 if (!geneve_item || !(last_item & MLX5_FLOW_LAYER_GENEVE)) 2826 return rte_flow_error_set 2827 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item, 2828 "Geneve opt item must be preceded with Geneve item"); 2829 geneve_spec = geneve_item->spec; 2830 geneve_mask = geneve_item->mask ? geneve_item->mask : 2831 &rte_flow_item_geneve_mask; 2832 /* Check if GENEVE TLV option size doesn't exceed option length */ 2833 if (geneve_spec && (geneve_mask->ver_opt_len_o_c_rsvd0 || 2834 geneve_spec->ver_opt_len_o_c_rsvd0)) { 2835 tlv_option_len = spec->option_len & mask->option_len; 2836 optlen_v = rte_be_to_cpu_16(geneve_spec->ver_opt_len_o_c_rsvd0); 2837 optlen_v = MLX5_GENEVE_OPTLEN_VAL(optlen_v); 2838 optlen_m = rte_be_to_cpu_16(geneve_mask->ver_opt_len_o_c_rsvd0); 2839 optlen_m = MLX5_GENEVE_OPTLEN_VAL(optlen_m); 2840 if ((optlen_v & optlen_m) <= tlv_option_len) 2841 return rte_flow_error_set 2842 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item, 2843 "GENEVE TLV option length exceeds optlen"); 2844 } 2845 /* Check if length is 0 or data is 0. */ 2846 if (spec->data == NULL || spec->option_len == 0) 2847 return rte_flow_error_set 2848 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item, 2849 "Geneve TLV opt with zero data/length not supported"); 2850 /* Check not all data & mask are 0. */ 2851 data_len = spec->option_len * 4; 2852 if (mask->data == NULL) { 2853 for (i = 0; i < data_len; i++) 2854 if (spec->data[i]) 2855 break; 2856 if (i == data_len) 2857 return rte_flow_error_set(error, ENOTSUP, 2858 RTE_FLOW_ERROR_TYPE_ITEM, item, 2859 "Can't match on Geneve option data 0"); 2860 } else { 2861 for (i = 0; i < data_len; i++) 2862 if (spec->data[i] & mask->data[i]) 2863 break; 2864 if (i == data_len) 2865 return rte_flow_error_set(error, ENOTSUP, 2866 RTE_FLOW_ERROR_TYPE_ITEM, item, 2867 "Can't match on Geneve option data and mask 0"); 2868 /* Check data mask supported. */ 2869 for (i = data_max_supported; i < data_len ; i++) 2870 if (mask->data[i]) 2871 return rte_flow_error_set(error, ENOTSUP, 2872 RTE_FLOW_ERROR_TYPE_ITEM, item, 2873 "Data mask is of unsupported size"); 2874 } 2875 /* Check GENEVE option is supported in NIC. */ 2876 if (!config->hca_attr.geneve_tlv_opt) 2877 return rte_flow_error_set 2878 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item, 2879 "Geneve TLV opt not supported"); 2880 /* Check if we already have geneve option with different type/class. */ 2881 rte_spinlock_lock(&sh->geneve_tlv_opt_sl); 2882 geneve_opt_resource = sh->geneve_tlv_option_resource; 2883 if (geneve_opt_resource != NULL) 2884 if (geneve_opt_resource->option_class != spec->option_class || 2885 geneve_opt_resource->option_type != spec->option_type || 2886 geneve_opt_resource->length != spec->option_len) { 2887 rte_spinlock_unlock(&sh->geneve_tlv_opt_sl); 2888 return rte_flow_error_set(error, ENOTSUP, 2889 RTE_FLOW_ERROR_TYPE_ITEM, item, 2890 "Only one Geneve TLV option supported"); 2891 } 2892 rte_spinlock_unlock(&sh->geneve_tlv_opt_sl); 2893 return 0; 2894 } 2895 2896 /** 2897 * Validate MPLS item. 2898 * 2899 * @param[in] dev 2900 * Pointer to the rte_eth_dev structure. 2901 * @param[in] item 2902 * Item specification. 2903 * @param[in] item_flags 2904 * Bit-fields that holds the items detected until now. 2905 * @param[in] prev_layer 2906 * The protocol layer indicated in previous item. 2907 * @param[out] error 2908 * Pointer to error structure. 2909 * 2910 * @return 2911 * 0 on success, a negative errno value otherwise and rte_errno is set. 2912 */ 2913 int 2914 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused, 2915 const struct rte_flow_item *item __rte_unused, 2916 uint64_t item_flags __rte_unused, 2917 uint64_t prev_layer __rte_unused, 2918 struct rte_flow_error *error) 2919 { 2920 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 2921 const struct rte_flow_item_mpls *mask = item->mask; 2922 struct mlx5_priv *priv = dev->data->dev_private; 2923 int ret; 2924 2925 if (!priv->config.mpls_en) 2926 return rte_flow_error_set(error, ENOTSUP, 2927 RTE_FLOW_ERROR_TYPE_ITEM, item, 2928 "MPLS not supported or" 2929 " disabled in firmware" 2930 " configuration."); 2931 /* MPLS over UDP, GRE is allowed */ 2932 if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L4_UDP | 2933 MLX5_FLOW_LAYER_GRE | 2934 MLX5_FLOW_LAYER_GRE_KEY))) 2935 return rte_flow_error_set(error, EINVAL, 2936 RTE_FLOW_ERROR_TYPE_ITEM, item, 2937 "protocol filtering not compatible" 2938 " with MPLS layer"); 2939 /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */ 2940 if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) && 2941 !(item_flags & MLX5_FLOW_LAYER_GRE)) 2942 return rte_flow_error_set(error, ENOTSUP, 2943 RTE_FLOW_ERROR_TYPE_ITEM, item, 2944 "multiple tunnel layers not" 2945 " supported"); 2946 if (!mask) 2947 mask = &rte_flow_item_mpls_mask; 2948 ret = mlx5_flow_item_acceptable 2949 (item, (const uint8_t *)mask, 2950 (const uint8_t *)&rte_flow_item_mpls_mask, 2951 sizeof(struct rte_flow_item_mpls), 2952 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2953 if (ret < 0) 2954 return ret; 2955 return 0; 2956 #else 2957 return rte_flow_error_set(error, ENOTSUP, 2958 RTE_FLOW_ERROR_TYPE_ITEM, item, 2959 "MPLS is not supported by Verbs, please" 2960 " update."); 2961 #endif 2962 } 2963 2964 /** 2965 * Validate NVGRE item. 2966 * 2967 * @param[in] item 2968 * Item specification. 2969 * @param[in] item_flags 2970 * Bit flags to mark detected items. 2971 * @param[in] target_protocol 2972 * The next protocol in the previous item. 2973 * @param[out] error 2974 * Pointer to error structure. 2975 * 2976 * @return 2977 * 0 on success, a negative errno value otherwise and rte_errno is set. 2978 */ 2979 int 2980 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item, 2981 uint64_t item_flags, 2982 uint8_t target_protocol, 2983 struct rte_flow_error *error) 2984 { 2985 const struct rte_flow_item_nvgre *mask = item->mask; 2986 int ret; 2987 2988 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2989 return rte_flow_error_set(error, EINVAL, 2990 RTE_FLOW_ERROR_TYPE_ITEM, item, 2991 "protocol filtering not compatible" 2992 " with this GRE layer"); 2993 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2994 return rte_flow_error_set(error, ENOTSUP, 2995 RTE_FLOW_ERROR_TYPE_ITEM, item, 2996 "multiple tunnel layers not" 2997 " supported"); 2998 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2999 return rte_flow_error_set(error, ENOTSUP, 3000 RTE_FLOW_ERROR_TYPE_ITEM, item, 3001 "L3 Layer is missing"); 3002 if (!mask) 3003 mask = &rte_flow_item_nvgre_mask; 3004 ret = mlx5_flow_item_acceptable 3005 (item, (const uint8_t *)mask, 3006 (const uint8_t *)&rte_flow_item_nvgre_mask, 3007 sizeof(struct rte_flow_item_nvgre), 3008 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 3009 if (ret < 0) 3010 return ret; 3011 return 0; 3012 } 3013 3014 /** 3015 * Validate eCPRI item. 3016 * 3017 * @param[in] item 3018 * Item specification. 3019 * @param[in] item_flags 3020 * Bit-fields that holds the items detected until now. 3021 * @param[in] last_item 3022 * Previous validated item in the pattern items. 3023 * @param[in] ether_type 3024 * Type in the ethernet layer header (including dot1q). 3025 * @param[in] acc_mask 3026 * Acceptable mask, if NULL default internal default mask 3027 * will be used to check whether item fields are supported. 3028 * @param[out] error 3029 * Pointer to error structure. 3030 * 3031 * @return 3032 * 0 on success, a negative errno value otherwise and rte_errno is set. 3033 */ 3034 int 3035 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item, 3036 uint64_t item_flags, 3037 uint64_t last_item, 3038 uint16_t ether_type, 3039 const struct rte_flow_item_ecpri *acc_mask, 3040 struct rte_flow_error *error) 3041 { 3042 const struct rte_flow_item_ecpri *mask = item->mask; 3043 const struct rte_flow_item_ecpri nic_mask = { 3044 .hdr = { 3045 .common = { 3046 .u32 = 3047 RTE_BE32(((const struct rte_ecpri_common_hdr) { 3048 .type = 0xFF, 3049 }).u32), 3050 }, 3051 .dummy[0] = 0xFFFFFFFF, 3052 }, 3053 }; 3054 const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 | 3055 MLX5_FLOW_LAYER_OUTER_VLAN); 3056 struct rte_flow_item_ecpri mask_lo; 3057 3058 if (!(last_item & outer_l2_vlan) && 3059 last_item != MLX5_FLOW_LAYER_OUTER_L4_UDP) 3060 return rte_flow_error_set(error, EINVAL, 3061 RTE_FLOW_ERROR_TYPE_ITEM, item, 3062 "eCPRI can only follow L2/VLAN layer or UDP layer"); 3063 if ((last_item & outer_l2_vlan) && ether_type && 3064 ether_type != RTE_ETHER_TYPE_ECPRI) 3065 return rte_flow_error_set(error, EINVAL, 3066 RTE_FLOW_ERROR_TYPE_ITEM, item, 3067 "eCPRI cannot follow L2/VLAN layer which ether type is not 0xAEFE"); 3068 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 3069 return rte_flow_error_set(error, EINVAL, 3070 RTE_FLOW_ERROR_TYPE_ITEM, item, 3071 "eCPRI with tunnel is not supported right now"); 3072 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3) 3073 return rte_flow_error_set(error, ENOTSUP, 3074 RTE_FLOW_ERROR_TYPE_ITEM, item, 3075 "multiple L3 layers not supported"); 3076 else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP) 3077 return rte_flow_error_set(error, EINVAL, 3078 RTE_FLOW_ERROR_TYPE_ITEM, item, 3079 "eCPRI cannot coexist with a TCP layer"); 3080 /* In specification, eCPRI could be over UDP layer. */ 3081 else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP) 3082 return rte_flow_error_set(error, EINVAL, 3083 RTE_FLOW_ERROR_TYPE_ITEM, item, 3084 "eCPRI over UDP layer is not yet supported right now"); 3085 /* Mask for type field in common header could be zero. */ 3086 if (!mask) 3087 mask = &rte_flow_item_ecpri_mask; 3088 mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32); 3089 /* Input mask is in big-endian format. */ 3090 if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff) 3091 return rte_flow_error_set(error, EINVAL, 3092 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 3093 "partial mask is not supported for protocol"); 3094 else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0) 3095 return rte_flow_error_set(error, EINVAL, 3096 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 3097 "message header mask must be after a type mask"); 3098 return mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 3099 acc_mask ? (const uint8_t *)acc_mask 3100 : (const uint8_t *)&nic_mask, 3101 sizeof(struct rte_flow_item_ecpri), 3102 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 3103 } 3104 3105 /** 3106 * Release resource related QUEUE/RSS action split. 3107 * 3108 * @param dev 3109 * Pointer to Ethernet device. 3110 * @param flow 3111 * Flow to release id's from. 3112 */ 3113 static void 3114 flow_mreg_split_qrss_release(struct rte_eth_dev *dev, 3115 struct rte_flow *flow) 3116 { 3117 struct mlx5_priv *priv = dev->data->dev_private; 3118 uint32_t handle_idx; 3119 struct mlx5_flow_handle *dev_handle; 3120 3121 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 3122 handle_idx, dev_handle, next) 3123 if (dev_handle->split_flow_id && 3124 !dev_handle->is_meter_flow_id) 3125 mlx5_ipool_free(priv->sh->ipool 3126 [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], 3127 dev_handle->split_flow_id); 3128 } 3129 3130 static int 3131 flow_null_validate(struct rte_eth_dev *dev __rte_unused, 3132 const struct rte_flow_attr *attr __rte_unused, 3133 const struct rte_flow_item items[] __rte_unused, 3134 const struct rte_flow_action actions[] __rte_unused, 3135 bool external __rte_unused, 3136 int hairpin __rte_unused, 3137 struct rte_flow_error *error) 3138 { 3139 return rte_flow_error_set(error, ENOTSUP, 3140 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 3141 } 3142 3143 static struct mlx5_flow * 3144 flow_null_prepare(struct rte_eth_dev *dev __rte_unused, 3145 const struct rte_flow_attr *attr __rte_unused, 3146 const struct rte_flow_item items[] __rte_unused, 3147 const struct rte_flow_action actions[] __rte_unused, 3148 struct rte_flow_error *error) 3149 { 3150 rte_flow_error_set(error, ENOTSUP, 3151 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 3152 return NULL; 3153 } 3154 3155 static int 3156 flow_null_translate(struct rte_eth_dev *dev __rte_unused, 3157 struct mlx5_flow *dev_flow __rte_unused, 3158 const struct rte_flow_attr *attr __rte_unused, 3159 const struct rte_flow_item items[] __rte_unused, 3160 const struct rte_flow_action actions[] __rte_unused, 3161 struct rte_flow_error *error) 3162 { 3163 return rte_flow_error_set(error, ENOTSUP, 3164 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 3165 } 3166 3167 static int 3168 flow_null_apply(struct rte_eth_dev *dev __rte_unused, 3169 struct rte_flow *flow __rte_unused, 3170 struct rte_flow_error *error) 3171 { 3172 return rte_flow_error_set(error, ENOTSUP, 3173 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 3174 } 3175 3176 static void 3177 flow_null_remove(struct rte_eth_dev *dev __rte_unused, 3178 struct rte_flow *flow __rte_unused) 3179 { 3180 } 3181 3182 static void 3183 flow_null_destroy(struct rte_eth_dev *dev __rte_unused, 3184 struct rte_flow *flow __rte_unused) 3185 { 3186 } 3187 3188 static int 3189 flow_null_query(struct rte_eth_dev *dev __rte_unused, 3190 struct rte_flow *flow __rte_unused, 3191 const struct rte_flow_action *actions __rte_unused, 3192 void *data __rte_unused, 3193 struct rte_flow_error *error) 3194 { 3195 return rte_flow_error_set(error, ENOTSUP, 3196 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 3197 } 3198 3199 static int 3200 flow_null_sync_domain(struct rte_eth_dev *dev __rte_unused, 3201 uint32_t domains __rte_unused, 3202 uint32_t flags __rte_unused) 3203 { 3204 return 0; 3205 } 3206 3207 /* Void driver to protect from null pointer reference. */ 3208 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = { 3209 .validate = flow_null_validate, 3210 .prepare = flow_null_prepare, 3211 .translate = flow_null_translate, 3212 .apply = flow_null_apply, 3213 .remove = flow_null_remove, 3214 .destroy = flow_null_destroy, 3215 .query = flow_null_query, 3216 .sync_domain = flow_null_sync_domain, 3217 }; 3218 3219 /** 3220 * Select flow driver type according to flow attributes and device 3221 * configuration. 3222 * 3223 * @param[in] dev 3224 * Pointer to the dev structure. 3225 * @param[in] attr 3226 * Pointer to the flow attributes. 3227 * 3228 * @return 3229 * flow driver type, MLX5_FLOW_TYPE_MAX otherwise. 3230 */ 3231 static enum mlx5_flow_drv_type 3232 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr) 3233 { 3234 struct mlx5_priv *priv = dev->data->dev_private; 3235 /* The OS can determine first a specific flow type (DV, VERBS) */ 3236 enum mlx5_flow_drv_type type = mlx5_flow_os_get_type(); 3237 3238 if (type != MLX5_FLOW_TYPE_MAX) 3239 return type; 3240 /* If no OS specific type - continue with DV/VERBS selection */ 3241 if (attr->transfer && priv->config.dv_esw_en) 3242 type = MLX5_FLOW_TYPE_DV; 3243 if (!attr->transfer) 3244 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV : 3245 MLX5_FLOW_TYPE_VERBS; 3246 return type; 3247 } 3248 3249 #define flow_get_drv_ops(type) flow_drv_ops[type] 3250 3251 /** 3252 * Flow driver validation API. This abstracts calling driver specific functions. 3253 * The type of flow driver is determined according to flow attributes. 3254 * 3255 * @param[in] dev 3256 * Pointer to the dev structure. 3257 * @param[in] attr 3258 * Pointer to the flow attributes. 3259 * @param[in] items 3260 * Pointer to the list of items. 3261 * @param[in] actions 3262 * Pointer to the list of actions. 3263 * @param[in] external 3264 * This flow rule is created by request external to PMD. 3265 * @param[in] hairpin 3266 * Number of hairpin TX actions, 0 means classic flow. 3267 * @param[out] error 3268 * Pointer to the error structure. 3269 * 3270 * @return 3271 * 0 on success, a negative errno value otherwise and rte_errno is set. 3272 */ 3273 static inline int 3274 flow_drv_validate(struct rte_eth_dev *dev, 3275 const struct rte_flow_attr *attr, 3276 const struct rte_flow_item items[], 3277 const struct rte_flow_action actions[], 3278 bool external, int hairpin, struct rte_flow_error *error) 3279 { 3280 const struct mlx5_flow_driver_ops *fops; 3281 enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr); 3282 3283 fops = flow_get_drv_ops(type); 3284 return fops->validate(dev, attr, items, actions, external, 3285 hairpin, error); 3286 } 3287 3288 /** 3289 * Flow driver preparation API. This abstracts calling driver specific 3290 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 3291 * calculates the size of memory required for device flow, allocates the memory, 3292 * initializes the device flow and returns the pointer. 3293 * 3294 * @note 3295 * This function initializes device flow structure such as dv or verbs in 3296 * struct mlx5_flow. However, it is caller's responsibility to initialize the 3297 * rest. For example, adding returning device flow to flow->dev_flow list and 3298 * setting backward reference to the flow should be done out of this function. 3299 * layers field is not filled either. 3300 * 3301 * @param[in] dev 3302 * Pointer to the dev structure. 3303 * @param[in] attr 3304 * Pointer to the flow attributes. 3305 * @param[in] items 3306 * Pointer to the list of items. 3307 * @param[in] actions 3308 * Pointer to the list of actions. 3309 * @param[in] flow_idx 3310 * This memory pool index to the flow. 3311 * @param[out] error 3312 * Pointer to the error structure. 3313 * 3314 * @return 3315 * Pointer to device flow on success, otherwise NULL and rte_errno is set. 3316 */ 3317 static inline struct mlx5_flow * 3318 flow_drv_prepare(struct rte_eth_dev *dev, 3319 const struct rte_flow *flow, 3320 const struct rte_flow_attr *attr, 3321 const struct rte_flow_item items[], 3322 const struct rte_flow_action actions[], 3323 uint32_t flow_idx, 3324 struct rte_flow_error *error) 3325 { 3326 const struct mlx5_flow_driver_ops *fops; 3327 enum mlx5_flow_drv_type type = flow->drv_type; 3328 struct mlx5_flow *mlx5_flow = NULL; 3329 3330 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 3331 fops = flow_get_drv_ops(type); 3332 mlx5_flow = fops->prepare(dev, attr, items, actions, error); 3333 if (mlx5_flow) 3334 mlx5_flow->flow_idx = flow_idx; 3335 return mlx5_flow; 3336 } 3337 3338 /** 3339 * Flow driver translation API. This abstracts calling driver specific 3340 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 3341 * translates a generic flow into a driver flow. flow_drv_prepare() must 3342 * precede. 3343 * 3344 * @note 3345 * dev_flow->layers could be filled as a result of parsing during translation 3346 * if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled 3347 * if necessary. As a flow can have multiple dev_flows by RSS flow expansion, 3348 * flow->actions could be overwritten even though all the expanded dev_flows 3349 * have the same actions. 3350 * 3351 * @param[in] dev 3352 * Pointer to the rte dev structure. 3353 * @param[in, out] dev_flow 3354 * Pointer to the mlx5 flow. 3355 * @param[in] attr 3356 * Pointer to the flow attributes. 3357 * @param[in] items 3358 * Pointer to the list of items. 3359 * @param[in] actions 3360 * Pointer to the list of actions. 3361 * @param[out] error 3362 * Pointer to the error structure. 3363 * 3364 * @return 3365 * 0 on success, a negative errno value otherwise and rte_errno is set. 3366 */ 3367 static inline int 3368 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, 3369 const struct rte_flow_attr *attr, 3370 const struct rte_flow_item items[], 3371 const struct rte_flow_action actions[], 3372 struct rte_flow_error *error) 3373 { 3374 const struct mlx5_flow_driver_ops *fops; 3375 enum mlx5_flow_drv_type type = dev_flow->flow->drv_type; 3376 3377 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 3378 fops = flow_get_drv_ops(type); 3379 return fops->translate(dev, dev_flow, attr, items, actions, error); 3380 } 3381 3382 /** 3383 * Flow driver apply API. This abstracts calling driver specific functions. 3384 * Parent flow (rte_flow) should have driver type (drv_type). It applies 3385 * translated driver flows on to device. flow_drv_translate() must precede. 3386 * 3387 * @param[in] dev 3388 * Pointer to Ethernet device structure. 3389 * @param[in, out] flow 3390 * Pointer to flow structure. 3391 * @param[out] error 3392 * Pointer to error structure. 3393 * 3394 * @return 3395 * 0 on success, a negative errno value otherwise and rte_errno is set. 3396 */ 3397 static inline int 3398 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, 3399 struct rte_flow_error *error) 3400 { 3401 const struct mlx5_flow_driver_ops *fops; 3402 enum mlx5_flow_drv_type type = flow->drv_type; 3403 3404 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 3405 fops = flow_get_drv_ops(type); 3406 return fops->apply(dev, flow, error); 3407 } 3408 3409 /** 3410 * Flow driver destroy API. This abstracts calling driver specific functions. 3411 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 3412 * on device and releases resources of the flow. 3413 * 3414 * @param[in] dev 3415 * Pointer to Ethernet device. 3416 * @param[in, out] flow 3417 * Pointer to flow structure. 3418 */ 3419 static inline void 3420 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) 3421 { 3422 const struct mlx5_flow_driver_ops *fops; 3423 enum mlx5_flow_drv_type type = flow->drv_type; 3424 3425 flow_mreg_split_qrss_release(dev, flow); 3426 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 3427 fops = flow_get_drv_ops(type); 3428 fops->destroy(dev, flow); 3429 } 3430 3431 /** 3432 * Flow driver find RSS policy tbl API. This abstracts calling driver 3433 * specific functions. Parent flow (rte_flow) should have driver 3434 * type (drv_type). It will find the RSS policy table that has the rss_desc. 3435 * 3436 * @param[in] dev 3437 * Pointer to Ethernet device. 3438 * @param[in, out] flow 3439 * Pointer to flow structure. 3440 * @param[in] policy 3441 * Pointer to meter policy table. 3442 * @param[in] rss_desc 3443 * Pointer to rss_desc 3444 */ 3445 static struct mlx5_flow_meter_sub_policy * 3446 flow_drv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev, 3447 struct rte_flow *flow, 3448 struct mlx5_flow_meter_policy *policy, 3449 struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS]) 3450 { 3451 const struct mlx5_flow_driver_ops *fops; 3452 enum mlx5_flow_drv_type type = flow->drv_type; 3453 3454 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 3455 fops = flow_get_drv_ops(type); 3456 return fops->meter_sub_policy_rss_prepare(dev, policy, rss_desc); 3457 } 3458 3459 /** 3460 * Get RSS action from the action list. 3461 * 3462 * @param[in] dev 3463 * Pointer to Ethernet device. 3464 * @param[in] actions 3465 * Pointer to the list of actions. 3466 * @param[in] flow 3467 * Parent flow structure pointer. 3468 * 3469 * @return 3470 * Pointer to the RSS action if exist, else return NULL. 3471 */ 3472 static const struct rte_flow_action_rss* 3473 flow_get_rss_action(struct rte_eth_dev *dev, 3474 const struct rte_flow_action actions[]) 3475 { 3476 struct mlx5_priv *priv = dev->data->dev_private; 3477 const struct rte_flow_action_rss *rss = NULL; 3478 3479 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3480 switch (actions->type) { 3481 case RTE_FLOW_ACTION_TYPE_RSS: 3482 rss = actions->conf; 3483 break; 3484 case RTE_FLOW_ACTION_TYPE_SAMPLE: 3485 { 3486 const struct rte_flow_action_sample *sample = 3487 actions->conf; 3488 const struct rte_flow_action *act = sample->actions; 3489 for (; act->type != RTE_FLOW_ACTION_TYPE_END; act++) 3490 if (act->type == RTE_FLOW_ACTION_TYPE_RSS) 3491 rss = act->conf; 3492 break; 3493 } 3494 case RTE_FLOW_ACTION_TYPE_METER: 3495 { 3496 uint32_t mtr_idx; 3497 struct mlx5_flow_meter_info *fm; 3498 struct mlx5_flow_meter_policy *policy; 3499 const struct rte_flow_action_meter *mtr = actions->conf; 3500 3501 fm = mlx5_flow_meter_find(priv, mtr->mtr_id, &mtr_idx); 3502 if (fm) { 3503 policy = mlx5_flow_meter_policy_find(dev, 3504 fm->policy_id, NULL); 3505 if (policy && policy->is_rss) 3506 rss = 3507 policy->act_cnt[RTE_COLOR_GREEN].rss->conf; 3508 } 3509 break; 3510 } 3511 default: 3512 break; 3513 } 3514 } 3515 return rss; 3516 } 3517 3518 /** 3519 * Get ASO age action by index. 3520 * 3521 * @param[in] dev 3522 * Pointer to the Ethernet device structure. 3523 * @param[in] age_idx 3524 * Index to the ASO age action. 3525 * 3526 * @return 3527 * The specified ASO age action. 3528 */ 3529 struct mlx5_aso_age_action* 3530 flow_aso_age_get_by_idx(struct rte_eth_dev *dev, uint32_t age_idx) 3531 { 3532 uint16_t pool_idx = age_idx & UINT16_MAX; 3533 uint16_t offset = (age_idx >> 16) & UINT16_MAX; 3534 struct mlx5_priv *priv = dev->data->dev_private; 3535 struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng; 3536 struct mlx5_aso_age_pool *pool = mng->pools[pool_idx]; 3537 3538 return &pool->actions[offset - 1]; 3539 } 3540 3541 /* maps indirect action to translated direct in some actions array */ 3542 struct mlx5_translated_action_handle { 3543 struct rte_flow_action_handle *action; /**< Indirect action handle. */ 3544 int index; /**< Index in related array of rte_flow_action. */ 3545 }; 3546 3547 /** 3548 * Translates actions of type RTE_FLOW_ACTION_TYPE_INDIRECT to related 3549 * direct action if translation possible. 3550 * This functionality used to run same execution path for both direct and 3551 * indirect actions on flow create. All necessary preparations for indirect 3552 * action handling should be performed on *handle* actions list returned 3553 * from this call. 3554 * 3555 * @param[in] dev 3556 * Pointer to Ethernet device. 3557 * @param[in] actions 3558 * List of actions to translate. 3559 * @param[out] handle 3560 * List to store translated indirect action object handles. 3561 * @param[in, out] indir_n 3562 * Size of *handle* array. On return should be updated with number of 3563 * indirect actions retrieved from the *actions* list. 3564 * @param[out] translated_actions 3565 * List of actions where all indirect actions were translated to direct 3566 * if possible. NULL if no translation took place. 3567 * @param[out] error 3568 * Pointer to the error structure. 3569 * 3570 * @return 3571 * 0 on success, a negative errno value otherwise and rte_errno is set. 3572 */ 3573 static int 3574 flow_action_handles_translate(struct rte_eth_dev *dev, 3575 const struct rte_flow_action actions[], 3576 struct mlx5_translated_action_handle *handle, 3577 int *indir_n, 3578 struct rte_flow_action **translated_actions, 3579 struct rte_flow_error *error) 3580 { 3581 struct mlx5_priv *priv = dev->data->dev_private; 3582 struct rte_flow_action *translated = NULL; 3583 size_t actions_size; 3584 int n; 3585 int copied_n = 0; 3586 struct mlx5_translated_action_handle *handle_end = NULL; 3587 3588 for (n = 0; actions[n].type != RTE_FLOW_ACTION_TYPE_END; n++) { 3589 if (actions[n].type != RTE_FLOW_ACTION_TYPE_INDIRECT) 3590 continue; 3591 if (copied_n == *indir_n) { 3592 return rte_flow_error_set 3593 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_NUM, 3594 NULL, "too many shared actions"); 3595 } 3596 rte_memcpy(&handle[copied_n].action, &actions[n].conf, 3597 sizeof(actions[n].conf)); 3598 handle[copied_n].index = n; 3599 copied_n++; 3600 } 3601 n++; 3602 *indir_n = copied_n; 3603 if (!copied_n) 3604 return 0; 3605 actions_size = sizeof(struct rte_flow_action) * n; 3606 translated = mlx5_malloc(MLX5_MEM_ZERO, actions_size, 0, SOCKET_ID_ANY); 3607 if (!translated) { 3608 rte_errno = ENOMEM; 3609 return -ENOMEM; 3610 } 3611 memcpy(translated, actions, actions_size); 3612 for (handle_end = handle + copied_n; handle < handle_end; handle++) { 3613 struct mlx5_shared_action_rss *shared_rss; 3614 uint32_t act_idx = (uint32_t)(uintptr_t)handle->action; 3615 uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET; 3616 uint32_t idx = act_idx & 3617 ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1); 3618 3619 switch (type) { 3620 case MLX5_INDIRECT_ACTION_TYPE_RSS: 3621 shared_rss = mlx5_ipool_get 3622 (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx); 3623 translated[handle->index].type = 3624 RTE_FLOW_ACTION_TYPE_RSS; 3625 translated[handle->index].conf = 3626 &shared_rss->origin; 3627 break; 3628 case MLX5_INDIRECT_ACTION_TYPE_COUNT: 3629 translated[handle->index].type = 3630 (enum rte_flow_action_type) 3631 MLX5_RTE_FLOW_ACTION_TYPE_COUNT; 3632 translated[handle->index].conf = (void *)(uintptr_t)idx; 3633 break; 3634 case MLX5_INDIRECT_ACTION_TYPE_AGE: 3635 if (priv->sh->flow_hit_aso_en) { 3636 translated[handle->index].type = 3637 (enum rte_flow_action_type) 3638 MLX5_RTE_FLOW_ACTION_TYPE_AGE; 3639 translated[handle->index].conf = 3640 (void *)(uintptr_t)idx; 3641 break; 3642 } 3643 /* Fall-through */ 3644 case MLX5_INDIRECT_ACTION_TYPE_CT: 3645 if (priv->sh->ct_aso_en) { 3646 translated[handle->index].type = 3647 RTE_FLOW_ACTION_TYPE_CONNTRACK; 3648 translated[handle->index].conf = 3649 (void *)(uintptr_t)idx; 3650 break; 3651 } 3652 /* Fall-through */ 3653 default: 3654 mlx5_free(translated); 3655 return rte_flow_error_set 3656 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, 3657 NULL, "invalid indirect action type"); 3658 } 3659 } 3660 *translated_actions = translated; 3661 return 0; 3662 } 3663 3664 /** 3665 * Get Shared RSS action from the action list. 3666 * 3667 * @param[in] dev 3668 * Pointer to Ethernet device. 3669 * @param[in] shared 3670 * Pointer to the list of actions. 3671 * @param[in] shared_n 3672 * Actions list length. 3673 * 3674 * @return 3675 * The MLX5 RSS action ID if exists, otherwise return 0. 3676 */ 3677 static uint32_t 3678 flow_get_shared_rss_action(struct rte_eth_dev *dev, 3679 struct mlx5_translated_action_handle *handle, 3680 int shared_n) 3681 { 3682 struct mlx5_translated_action_handle *handle_end; 3683 struct mlx5_priv *priv = dev->data->dev_private; 3684 struct mlx5_shared_action_rss *shared_rss; 3685 3686 3687 for (handle_end = handle + shared_n; handle < handle_end; handle++) { 3688 uint32_t act_idx = (uint32_t)(uintptr_t)handle->action; 3689 uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET; 3690 uint32_t idx = act_idx & 3691 ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1); 3692 switch (type) { 3693 case MLX5_INDIRECT_ACTION_TYPE_RSS: 3694 shared_rss = mlx5_ipool_get 3695 (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], 3696 idx); 3697 __atomic_add_fetch(&shared_rss->refcnt, 1, 3698 __ATOMIC_RELAXED); 3699 return idx; 3700 default: 3701 break; 3702 } 3703 } 3704 return 0; 3705 } 3706 3707 static unsigned int 3708 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level) 3709 { 3710 const struct rte_flow_item *item; 3711 unsigned int has_vlan = 0; 3712 3713 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 3714 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { 3715 has_vlan = 1; 3716 break; 3717 } 3718 } 3719 if (has_vlan) 3720 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN : 3721 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN; 3722 return rss_level < 2 ? MLX5_EXPANSION_ROOT : 3723 MLX5_EXPANSION_ROOT_OUTER; 3724 } 3725 3726 /** 3727 * Get layer flags from the prefix flow. 3728 * 3729 * Some flows may be split to several subflows, the prefix subflow gets the 3730 * match items and the suffix sub flow gets the actions. 3731 * Some actions need the user defined match item flags to get the detail for 3732 * the action. 3733 * This function helps the suffix flow to get the item layer flags from prefix 3734 * subflow. 3735 * 3736 * @param[in] dev_flow 3737 * Pointer the created preifx subflow. 3738 * 3739 * @return 3740 * The layers get from prefix subflow. 3741 */ 3742 static inline uint64_t 3743 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow) 3744 { 3745 uint64_t layers = 0; 3746 3747 /* 3748 * Layers bits could be localization, but usually the compiler will 3749 * help to do the optimization work for source code. 3750 * If no decap actions, use the layers directly. 3751 */ 3752 if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP)) 3753 return dev_flow->handle->layers; 3754 /* Convert L3 layers with decap action. */ 3755 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4) 3756 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4; 3757 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6) 3758 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6; 3759 /* Convert L4 layers with decap action. */ 3760 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP) 3761 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP; 3762 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP) 3763 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP; 3764 return layers; 3765 } 3766 3767 /** 3768 * Get metadata split action information. 3769 * 3770 * @param[in] actions 3771 * Pointer to the list of actions. 3772 * @param[out] qrss 3773 * Pointer to the return pointer. 3774 * @param[out] qrss_type 3775 * Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned 3776 * if no QUEUE/RSS is found. 3777 * @param[out] encap_idx 3778 * Pointer to the index of the encap action if exists, otherwise the last 3779 * action index. 3780 * 3781 * @return 3782 * Total number of actions. 3783 */ 3784 static int 3785 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[], 3786 const struct rte_flow_action **qrss, 3787 int *encap_idx) 3788 { 3789 const struct rte_flow_action_raw_encap *raw_encap; 3790 int actions_n = 0; 3791 int raw_decap_idx = -1; 3792 3793 *encap_idx = -1; 3794 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3795 switch (actions->type) { 3796 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3797 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3798 *encap_idx = actions_n; 3799 break; 3800 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3801 raw_decap_idx = actions_n; 3802 break; 3803 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3804 raw_encap = actions->conf; 3805 if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 3806 *encap_idx = raw_decap_idx != -1 ? 3807 raw_decap_idx : actions_n; 3808 break; 3809 case RTE_FLOW_ACTION_TYPE_QUEUE: 3810 case RTE_FLOW_ACTION_TYPE_RSS: 3811 *qrss = actions; 3812 break; 3813 default: 3814 break; 3815 } 3816 actions_n++; 3817 } 3818 if (*encap_idx == -1) 3819 *encap_idx = actions_n; 3820 /* Count RTE_FLOW_ACTION_TYPE_END. */ 3821 return actions_n + 1; 3822 } 3823 3824 /** 3825 * Check if the action will change packet. 3826 * 3827 * @param dev 3828 * Pointer to Ethernet device. 3829 * @param[in] type 3830 * action type. 3831 * 3832 * @return 3833 * true if action will change packet, false otherwise. 3834 */ 3835 static bool flow_check_modify_action_type(struct rte_eth_dev *dev, 3836 enum rte_flow_action_type type) 3837 { 3838 struct mlx5_priv *priv = dev->data->dev_private; 3839 3840 switch (type) { 3841 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC: 3842 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST: 3843 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC: 3844 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST: 3845 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC: 3846 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST: 3847 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC: 3848 case RTE_FLOW_ACTION_TYPE_SET_TP_DST: 3849 case RTE_FLOW_ACTION_TYPE_DEC_TTL: 3850 case RTE_FLOW_ACTION_TYPE_SET_TTL: 3851 case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ: 3852 case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ: 3853 case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK: 3854 case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK: 3855 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP: 3856 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP: 3857 case RTE_FLOW_ACTION_TYPE_SET_META: 3858 case RTE_FLOW_ACTION_TYPE_SET_TAG: 3859 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN: 3860 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 3861 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 3862 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: 3863 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3864 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: 3865 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3866 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP: 3867 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3868 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3869 case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD: 3870 return true; 3871 case RTE_FLOW_ACTION_TYPE_FLAG: 3872 case RTE_FLOW_ACTION_TYPE_MARK: 3873 if (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) 3874 return true; 3875 else 3876 return false; 3877 default: 3878 return false; 3879 } 3880 } 3881 3882 /** 3883 * Check meter action from the action list. 3884 * 3885 * @param dev 3886 * Pointer to Ethernet device. 3887 * @param[in] actions 3888 * Pointer to the list of actions. 3889 * @param[out] has_mtr 3890 * Pointer to the meter exist flag. 3891 * @param[out] has_modify 3892 * Pointer to the flag showing there's packet change action. 3893 * @param[out] meter_id 3894 * Pointer to the meter id. 3895 * 3896 * @return 3897 * Total number of actions. 3898 */ 3899 static int 3900 flow_check_meter_action(struct rte_eth_dev *dev, 3901 const struct rte_flow_action actions[], 3902 bool *has_mtr, bool *has_modify, uint32_t *meter_id) 3903 { 3904 const struct rte_flow_action_meter *mtr = NULL; 3905 int actions_n = 0; 3906 3907 MLX5_ASSERT(has_mtr); 3908 *has_mtr = false; 3909 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3910 switch (actions->type) { 3911 case RTE_FLOW_ACTION_TYPE_METER: 3912 mtr = actions->conf; 3913 *meter_id = mtr->mtr_id; 3914 *has_mtr = true; 3915 break; 3916 default: 3917 break; 3918 } 3919 if (!*has_mtr) 3920 *has_modify |= flow_check_modify_action_type(dev, 3921 actions->type); 3922 actions_n++; 3923 } 3924 /* Count RTE_FLOW_ACTION_TYPE_END. */ 3925 return actions_n + 1; 3926 } 3927 3928 /** 3929 * Check if the flow should be split due to hairpin. 3930 * The reason for the split is that in current HW we can't 3931 * support encap and push-vlan on Rx, so if a flow contains 3932 * these actions we move it to Tx. 3933 * 3934 * @param dev 3935 * Pointer to Ethernet device. 3936 * @param[in] attr 3937 * Flow rule attributes. 3938 * @param[in] actions 3939 * Associated actions (list terminated by the END action). 3940 * 3941 * @return 3942 * > 0 the number of actions and the flow should be split, 3943 * 0 when no split required. 3944 */ 3945 static int 3946 flow_check_hairpin_split(struct rte_eth_dev *dev, 3947 const struct rte_flow_attr *attr, 3948 const struct rte_flow_action actions[]) 3949 { 3950 int queue_action = 0; 3951 int action_n = 0; 3952 int split = 0; 3953 const struct rte_flow_action_queue *queue; 3954 const struct rte_flow_action_rss *rss; 3955 const struct rte_flow_action_raw_encap *raw_encap; 3956 const struct rte_eth_hairpin_conf *conf; 3957 3958 if (!attr->ingress) 3959 return 0; 3960 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3961 switch (actions->type) { 3962 case RTE_FLOW_ACTION_TYPE_QUEUE: 3963 queue = actions->conf; 3964 if (queue == NULL) 3965 return 0; 3966 conf = mlx5_rxq_get_hairpin_conf(dev, queue->index); 3967 if (conf == NULL || conf->tx_explicit != 0) 3968 return 0; 3969 queue_action = 1; 3970 action_n++; 3971 break; 3972 case RTE_FLOW_ACTION_TYPE_RSS: 3973 rss = actions->conf; 3974 if (rss == NULL || rss->queue_num == 0) 3975 return 0; 3976 conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]); 3977 if (conf == NULL || conf->tx_explicit != 0) 3978 return 0; 3979 queue_action = 1; 3980 action_n++; 3981 break; 3982 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3983 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3984 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 3985 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 3986 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: 3987 split++; 3988 action_n++; 3989 break; 3990 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3991 raw_encap = actions->conf; 3992 if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 3993 split++; 3994 action_n++; 3995 break; 3996 default: 3997 action_n++; 3998 break; 3999 } 4000 } 4001 if (split && queue_action) 4002 return action_n; 4003 return 0; 4004 } 4005 4006 /* Declare flow create/destroy prototype in advance. */ 4007 static uint32_t 4008 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 4009 const struct rte_flow_attr *attr, 4010 const struct rte_flow_item items[], 4011 const struct rte_flow_action actions[], 4012 bool external, struct rte_flow_error *error); 4013 4014 static void 4015 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 4016 uint32_t flow_idx); 4017 4018 int 4019 flow_dv_mreg_match_cb(struct mlx5_hlist *list __rte_unused, 4020 struct mlx5_hlist_entry *entry, 4021 uint64_t key, void *cb_ctx __rte_unused) 4022 { 4023 struct mlx5_flow_mreg_copy_resource *mcp_res = 4024 container_of(entry, typeof(*mcp_res), hlist_ent); 4025 4026 return mcp_res->mark_id != key; 4027 } 4028 4029 struct mlx5_hlist_entry * 4030 flow_dv_mreg_create_cb(struct mlx5_hlist *list, uint64_t key, 4031 void *cb_ctx) 4032 { 4033 struct rte_eth_dev *dev = list->ctx; 4034 struct mlx5_priv *priv = dev->data->dev_private; 4035 struct mlx5_flow_cb_ctx *ctx = cb_ctx; 4036 struct mlx5_flow_mreg_copy_resource *mcp_res; 4037 struct rte_flow_error *error = ctx->error; 4038 uint32_t idx = 0; 4039 int ret; 4040 uint32_t mark_id = key; 4041 struct rte_flow_attr attr = { 4042 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 4043 .ingress = 1, 4044 }; 4045 struct mlx5_rte_flow_item_tag tag_spec = { 4046 .data = mark_id, 4047 }; 4048 struct rte_flow_item items[] = { 4049 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, }, 4050 }; 4051 struct rte_flow_action_mark ftag = { 4052 .id = mark_id, 4053 }; 4054 struct mlx5_flow_action_copy_mreg cp_mreg = { 4055 .dst = REG_B, 4056 .src = REG_NON, 4057 }; 4058 struct rte_flow_action_jump jump = { 4059 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 4060 }; 4061 struct rte_flow_action actions[] = { 4062 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, }, 4063 }; 4064 4065 /* Fill the register fileds in the flow. */ 4066 ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error); 4067 if (ret < 0) 4068 return NULL; 4069 tag_spec.id = ret; 4070 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 4071 if (ret < 0) 4072 return NULL; 4073 cp_mreg.src = ret; 4074 /* Provide the full width of FLAG specific value. */ 4075 if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT)) 4076 tag_spec.data = MLX5_FLOW_MARK_DEFAULT; 4077 /* Build a new flow. */ 4078 if (mark_id != MLX5_DEFAULT_COPY_ID) { 4079 items[0] = (struct rte_flow_item){ 4080 .type = (enum rte_flow_item_type) 4081 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 4082 .spec = &tag_spec, 4083 }; 4084 items[1] = (struct rte_flow_item){ 4085 .type = RTE_FLOW_ITEM_TYPE_END, 4086 }; 4087 actions[0] = (struct rte_flow_action){ 4088 .type = (enum rte_flow_action_type) 4089 MLX5_RTE_FLOW_ACTION_TYPE_MARK, 4090 .conf = &ftag, 4091 }; 4092 actions[1] = (struct rte_flow_action){ 4093 .type = (enum rte_flow_action_type) 4094 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 4095 .conf = &cp_mreg, 4096 }; 4097 actions[2] = (struct rte_flow_action){ 4098 .type = RTE_FLOW_ACTION_TYPE_JUMP, 4099 .conf = &jump, 4100 }; 4101 actions[3] = (struct rte_flow_action){ 4102 .type = RTE_FLOW_ACTION_TYPE_END, 4103 }; 4104 } else { 4105 /* Default rule, wildcard match. */ 4106 attr.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR; 4107 items[0] = (struct rte_flow_item){ 4108 .type = RTE_FLOW_ITEM_TYPE_END, 4109 }; 4110 actions[0] = (struct rte_flow_action){ 4111 .type = (enum rte_flow_action_type) 4112 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 4113 .conf = &cp_mreg, 4114 }; 4115 actions[1] = (struct rte_flow_action){ 4116 .type = RTE_FLOW_ACTION_TYPE_JUMP, 4117 .conf = &jump, 4118 }; 4119 actions[2] = (struct rte_flow_action){ 4120 .type = RTE_FLOW_ACTION_TYPE_END, 4121 }; 4122 } 4123 /* Build a new entry. */ 4124 mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx); 4125 if (!mcp_res) { 4126 rte_errno = ENOMEM; 4127 return NULL; 4128 } 4129 mcp_res->idx = idx; 4130 mcp_res->mark_id = mark_id; 4131 /* 4132 * The copy Flows are not included in any list. There 4133 * ones are referenced from other Flows and can not 4134 * be applied, removed, deleted in ardbitrary order 4135 * by list traversing. 4136 */ 4137 mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items, 4138 actions, false, error); 4139 if (!mcp_res->rix_flow) { 4140 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], idx); 4141 return NULL; 4142 } 4143 return &mcp_res->hlist_ent; 4144 } 4145 4146 /** 4147 * Add a flow of copying flow metadata registers in RX_CP_TBL. 4148 * 4149 * As mark_id is unique, if there's already a registered flow for the mark_id, 4150 * return by increasing the reference counter of the resource. Otherwise, create 4151 * the resource (mcp_res) and flow. 4152 * 4153 * Flow looks like, 4154 * - If ingress port is ANY and reg_c[1] is mark_id, 4155 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 4156 * 4157 * For default flow (zero mark_id), flow is like, 4158 * - If ingress port is ANY, 4159 * reg_b := reg_c[0] and jump to RX_ACT_TBL. 4160 * 4161 * @param dev 4162 * Pointer to Ethernet device. 4163 * @param mark_id 4164 * ID of MARK action, zero means default flow for META. 4165 * @param[out] error 4166 * Perform verbose error reporting if not NULL. 4167 * 4168 * @return 4169 * Associated resource on success, NULL otherwise and rte_errno is set. 4170 */ 4171 static struct mlx5_flow_mreg_copy_resource * 4172 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id, 4173 struct rte_flow_error *error) 4174 { 4175 struct mlx5_priv *priv = dev->data->dev_private; 4176 struct mlx5_hlist_entry *entry; 4177 struct mlx5_flow_cb_ctx ctx = { 4178 .dev = dev, 4179 .error = error, 4180 }; 4181 4182 /* Check if already registered. */ 4183 MLX5_ASSERT(priv->mreg_cp_tbl); 4184 entry = mlx5_hlist_register(priv->mreg_cp_tbl, mark_id, &ctx); 4185 if (!entry) 4186 return NULL; 4187 return container_of(entry, struct mlx5_flow_mreg_copy_resource, 4188 hlist_ent); 4189 } 4190 4191 void 4192 flow_dv_mreg_remove_cb(struct mlx5_hlist *list, struct mlx5_hlist_entry *entry) 4193 { 4194 struct mlx5_flow_mreg_copy_resource *mcp_res = 4195 container_of(entry, typeof(*mcp_res), hlist_ent); 4196 struct rte_eth_dev *dev = list->ctx; 4197 struct mlx5_priv *priv = dev->data->dev_private; 4198 4199 MLX5_ASSERT(mcp_res->rix_flow); 4200 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 4201 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 4202 } 4203 4204 /** 4205 * Release flow in RX_CP_TBL. 4206 * 4207 * @param dev 4208 * Pointer to Ethernet device. 4209 * @flow 4210 * Parent flow for wich copying is provided. 4211 */ 4212 static void 4213 flow_mreg_del_copy_action(struct rte_eth_dev *dev, 4214 struct rte_flow *flow) 4215 { 4216 struct mlx5_flow_mreg_copy_resource *mcp_res; 4217 struct mlx5_priv *priv = dev->data->dev_private; 4218 4219 if (!flow->rix_mreg_copy) 4220 return; 4221 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 4222 flow->rix_mreg_copy); 4223 if (!mcp_res || !priv->mreg_cp_tbl) 4224 return; 4225 MLX5_ASSERT(mcp_res->rix_flow); 4226 mlx5_hlist_unregister(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 4227 flow->rix_mreg_copy = 0; 4228 } 4229 4230 /** 4231 * Remove the default copy action from RX_CP_TBL. 4232 * 4233 * This functions is called in the mlx5_dev_start(). No thread safe 4234 * is guaranteed. 4235 * 4236 * @param dev 4237 * Pointer to Ethernet device. 4238 */ 4239 static void 4240 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev) 4241 { 4242 struct mlx5_hlist_entry *entry; 4243 struct mlx5_priv *priv = dev->data->dev_private; 4244 4245 /* Check if default flow is registered. */ 4246 if (!priv->mreg_cp_tbl) 4247 return; 4248 entry = mlx5_hlist_lookup(priv->mreg_cp_tbl, 4249 MLX5_DEFAULT_COPY_ID, NULL); 4250 if (!entry) 4251 return; 4252 mlx5_hlist_unregister(priv->mreg_cp_tbl, entry); 4253 } 4254 4255 /** 4256 * Add the default copy action in in RX_CP_TBL. 4257 * 4258 * This functions is called in the mlx5_dev_start(). No thread safe 4259 * is guaranteed. 4260 * 4261 * @param dev 4262 * Pointer to Ethernet device. 4263 * @param[out] error 4264 * Perform verbose error reporting if not NULL. 4265 * 4266 * @return 4267 * 0 for success, negative value otherwise and rte_errno is set. 4268 */ 4269 static int 4270 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev, 4271 struct rte_flow_error *error) 4272 { 4273 struct mlx5_priv *priv = dev->data->dev_private; 4274 struct mlx5_flow_mreg_copy_resource *mcp_res; 4275 4276 /* Check whether extensive metadata feature is engaged. */ 4277 if (!priv->config.dv_flow_en || 4278 priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 4279 !mlx5_flow_ext_mreg_supported(dev) || 4280 !priv->sh->dv_regc0_mask) 4281 return 0; 4282 /* 4283 * Add default mreg copy flow may be called multiple time, but 4284 * only be called once in stop. Avoid register it twice. 4285 */ 4286 if (mlx5_hlist_lookup(priv->mreg_cp_tbl, MLX5_DEFAULT_COPY_ID, NULL)) 4287 return 0; 4288 mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error); 4289 if (!mcp_res) 4290 return -rte_errno; 4291 return 0; 4292 } 4293 4294 /** 4295 * Add a flow of copying flow metadata registers in RX_CP_TBL. 4296 * 4297 * All the flow having Q/RSS action should be split by 4298 * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL 4299 * performs the following, 4300 * - CQE->flow_tag := reg_c[1] (MARK) 4301 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 4302 * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1] 4303 * but there should be a flow per each MARK ID set by MARK action. 4304 * 4305 * For the aforementioned reason, if there's a MARK action in flow's action 4306 * list, a corresponding flow should be added to the RX_CP_TBL in order to copy 4307 * the MARK ID to CQE's flow_tag like, 4308 * - If reg_c[1] is mark_id, 4309 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 4310 * 4311 * For SET_META action which stores value in reg_c[0], as the destination is 4312 * also a flow metadata register (reg_b), adding a default flow is enough. Zero 4313 * MARK ID means the default flow. The default flow looks like, 4314 * - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL. 4315 * 4316 * @param dev 4317 * Pointer to Ethernet device. 4318 * @param flow 4319 * Pointer to flow structure. 4320 * @param[in] actions 4321 * Pointer to the list of actions. 4322 * @param[out] error 4323 * Perform verbose error reporting if not NULL. 4324 * 4325 * @return 4326 * 0 on success, negative value otherwise and rte_errno is set. 4327 */ 4328 static int 4329 flow_mreg_update_copy_table(struct rte_eth_dev *dev, 4330 struct rte_flow *flow, 4331 const struct rte_flow_action *actions, 4332 struct rte_flow_error *error) 4333 { 4334 struct mlx5_priv *priv = dev->data->dev_private; 4335 struct mlx5_dev_config *config = &priv->config; 4336 struct mlx5_flow_mreg_copy_resource *mcp_res; 4337 const struct rte_flow_action_mark *mark; 4338 4339 /* Check whether extensive metadata feature is engaged. */ 4340 if (!config->dv_flow_en || 4341 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 4342 !mlx5_flow_ext_mreg_supported(dev) || 4343 !priv->sh->dv_regc0_mask) 4344 return 0; 4345 /* Find MARK action. */ 4346 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 4347 switch (actions->type) { 4348 case RTE_FLOW_ACTION_TYPE_FLAG: 4349 mcp_res = flow_mreg_add_copy_action 4350 (dev, MLX5_FLOW_MARK_DEFAULT, error); 4351 if (!mcp_res) 4352 return -rte_errno; 4353 flow->rix_mreg_copy = mcp_res->idx; 4354 return 0; 4355 case RTE_FLOW_ACTION_TYPE_MARK: 4356 mark = (const struct rte_flow_action_mark *) 4357 actions->conf; 4358 mcp_res = 4359 flow_mreg_add_copy_action(dev, mark->id, error); 4360 if (!mcp_res) 4361 return -rte_errno; 4362 flow->rix_mreg_copy = mcp_res->idx; 4363 return 0; 4364 default: 4365 break; 4366 } 4367 } 4368 return 0; 4369 } 4370 4371 #define MLX5_MAX_SPLIT_ACTIONS 24 4372 #define MLX5_MAX_SPLIT_ITEMS 24 4373 4374 /** 4375 * Split the hairpin flow. 4376 * Since HW can't support encap and push-vlan on Rx, we move these 4377 * actions to Tx. 4378 * If the count action is after the encap then we also 4379 * move the count action. in this case the count will also measure 4380 * the outer bytes. 4381 * 4382 * @param dev 4383 * Pointer to Ethernet device. 4384 * @param[in] actions 4385 * Associated actions (list terminated by the END action). 4386 * @param[out] actions_rx 4387 * Rx flow actions. 4388 * @param[out] actions_tx 4389 * Tx flow actions.. 4390 * @param[out] pattern_tx 4391 * The pattern items for the Tx flow. 4392 * @param[out] flow_id 4393 * The flow ID connected to this flow. 4394 * 4395 * @return 4396 * 0 on success. 4397 */ 4398 static int 4399 flow_hairpin_split(struct rte_eth_dev *dev, 4400 const struct rte_flow_action actions[], 4401 struct rte_flow_action actions_rx[], 4402 struct rte_flow_action actions_tx[], 4403 struct rte_flow_item pattern_tx[], 4404 uint32_t flow_id) 4405 { 4406 const struct rte_flow_action_raw_encap *raw_encap; 4407 const struct rte_flow_action_raw_decap *raw_decap; 4408 struct mlx5_rte_flow_action_set_tag *set_tag; 4409 struct rte_flow_action *tag_action; 4410 struct mlx5_rte_flow_item_tag *tag_item; 4411 struct rte_flow_item *item; 4412 char *addr; 4413 int encap = 0; 4414 4415 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 4416 switch (actions->type) { 4417 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 4418 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 4419 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 4420 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 4421 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: 4422 rte_memcpy(actions_tx, actions, 4423 sizeof(struct rte_flow_action)); 4424 actions_tx++; 4425 break; 4426 case RTE_FLOW_ACTION_TYPE_COUNT: 4427 if (encap) { 4428 rte_memcpy(actions_tx, actions, 4429 sizeof(struct rte_flow_action)); 4430 actions_tx++; 4431 } else { 4432 rte_memcpy(actions_rx, actions, 4433 sizeof(struct rte_flow_action)); 4434 actions_rx++; 4435 } 4436 break; 4437 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 4438 raw_encap = actions->conf; 4439 if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) { 4440 memcpy(actions_tx, actions, 4441 sizeof(struct rte_flow_action)); 4442 actions_tx++; 4443 encap = 1; 4444 } else { 4445 rte_memcpy(actions_rx, actions, 4446 sizeof(struct rte_flow_action)); 4447 actions_rx++; 4448 } 4449 break; 4450 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 4451 raw_decap = actions->conf; 4452 if (raw_decap->size < MLX5_ENCAPSULATION_DECISION_SIZE) { 4453 memcpy(actions_tx, actions, 4454 sizeof(struct rte_flow_action)); 4455 actions_tx++; 4456 } else { 4457 rte_memcpy(actions_rx, actions, 4458 sizeof(struct rte_flow_action)); 4459 actions_rx++; 4460 } 4461 break; 4462 default: 4463 rte_memcpy(actions_rx, actions, 4464 sizeof(struct rte_flow_action)); 4465 actions_rx++; 4466 break; 4467 } 4468 } 4469 /* Add set meta action and end action for the Rx flow. */ 4470 tag_action = actions_rx; 4471 tag_action->type = (enum rte_flow_action_type) 4472 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 4473 actions_rx++; 4474 rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action)); 4475 actions_rx++; 4476 set_tag = (void *)actions_rx; 4477 *set_tag = (struct mlx5_rte_flow_action_set_tag) { 4478 .id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL), 4479 .data = flow_id, 4480 }; 4481 MLX5_ASSERT(set_tag->id > REG_NON); 4482 tag_action->conf = set_tag; 4483 /* Create Tx item list. */ 4484 rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action)); 4485 addr = (void *)&pattern_tx[2]; 4486 item = pattern_tx; 4487 item->type = (enum rte_flow_item_type) 4488 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 4489 tag_item = (void *)addr; 4490 tag_item->data = flow_id; 4491 tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL); 4492 MLX5_ASSERT(set_tag->id > REG_NON); 4493 item->spec = tag_item; 4494 addr += sizeof(struct mlx5_rte_flow_item_tag); 4495 tag_item = (void *)addr; 4496 tag_item->data = UINT32_MAX; 4497 tag_item->id = UINT16_MAX; 4498 item->mask = tag_item; 4499 item->last = NULL; 4500 item++; 4501 item->type = RTE_FLOW_ITEM_TYPE_END; 4502 return 0; 4503 } 4504 4505 /** 4506 * The last stage of splitting chain, just creates the subflow 4507 * without any modification. 4508 * 4509 * @param[in] dev 4510 * Pointer to Ethernet device. 4511 * @param[in] flow 4512 * Parent flow structure pointer. 4513 * @param[in, out] sub_flow 4514 * Pointer to return the created subflow, may be NULL. 4515 * @param[in] attr 4516 * Flow rule attributes. 4517 * @param[in] items 4518 * Pattern specification (list terminated by the END pattern item). 4519 * @param[in] actions 4520 * Associated actions (list terminated by the END action). 4521 * @param[in] flow_split_info 4522 * Pointer to flow split info structure. 4523 * @param[out] error 4524 * Perform verbose error reporting if not NULL. 4525 * @return 4526 * 0 on success, negative value otherwise 4527 */ 4528 static int 4529 flow_create_split_inner(struct rte_eth_dev *dev, 4530 struct rte_flow *flow, 4531 struct mlx5_flow **sub_flow, 4532 const struct rte_flow_attr *attr, 4533 const struct rte_flow_item items[], 4534 const struct rte_flow_action actions[], 4535 struct mlx5_flow_split_info *flow_split_info, 4536 struct rte_flow_error *error) 4537 { 4538 struct mlx5_flow *dev_flow; 4539 4540 dev_flow = flow_drv_prepare(dev, flow, attr, items, actions, 4541 flow_split_info->flow_idx, error); 4542 if (!dev_flow) 4543 return -rte_errno; 4544 dev_flow->flow = flow; 4545 dev_flow->external = flow_split_info->external; 4546 dev_flow->skip_scale = flow_split_info->skip_scale; 4547 /* Subflow object was created, we must include one in the list. */ 4548 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 4549 dev_flow->handle, next); 4550 /* 4551 * If dev_flow is as one of the suffix flow, some actions in suffix 4552 * flow may need some user defined item layer flags, and pass the 4553 * Metadate rxq mark flag to suffix flow as well. 4554 */ 4555 if (flow_split_info->prefix_layers) 4556 dev_flow->handle->layers = flow_split_info->prefix_layers; 4557 if (flow_split_info->prefix_mark) 4558 dev_flow->handle->mark = 1; 4559 if (sub_flow) 4560 *sub_flow = dev_flow; 4561 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 4562 dev_flow->dv.table_id = flow_split_info->table_id; 4563 #endif 4564 return flow_drv_translate(dev, dev_flow, attr, items, actions, error); 4565 } 4566 4567 /** 4568 * Get the sub policy of a meter. 4569 * 4570 * @param[in] dev 4571 * Pointer to Ethernet device. 4572 * @param[in] flow 4573 * Parent flow structure pointer. 4574 * @param[in] policy_id; 4575 * Meter Policy id. 4576 * @param[in] attr 4577 * Flow rule attributes. 4578 * @param[in] items 4579 * Pattern specification (list terminated by the END pattern item). 4580 * @param[out] error 4581 * Perform verbose error reporting if not NULL. 4582 * 4583 * @return 4584 * Pointer to the meter sub policy, NULL otherwise and rte_errno is set. 4585 */ 4586 static struct mlx5_flow_meter_sub_policy * 4587 get_meter_sub_policy(struct rte_eth_dev *dev, 4588 struct rte_flow *flow, 4589 uint32_t policy_id, 4590 const struct rte_flow_attr *attr, 4591 const struct rte_flow_item items[], 4592 struct rte_flow_error *error) 4593 { 4594 struct mlx5_flow_meter_policy *policy; 4595 struct mlx5_flow_meter_sub_policy *sub_policy = NULL; 4596 4597 policy = mlx5_flow_meter_policy_find(dev, policy_id, NULL); 4598 if (!policy) { 4599 rte_flow_error_set(error, EINVAL, 4600 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 4601 "Failed to find Meter Policy."); 4602 goto exit; 4603 } 4604 if (policy->is_rss || 4605 (policy->is_queue && 4606 !policy->sub_policys[MLX5_MTR_DOMAIN_INGRESS][0]->rix_hrxq[0])) { 4607 struct mlx5_flow_workspace *wks = 4608 mlx5_flow_get_thread_workspace(); 4609 struct mlx5_flow_rss_desc rss_desc_v[MLX5_MTR_RTE_COLORS]; 4610 struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS] = {0}; 4611 uint32_t i; 4612 4613 MLX5_ASSERT(wks); 4614 /** 4615 * This is a tmp dev_flow, 4616 * no need to register any matcher for it in translate. 4617 */ 4618 wks->skip_matcher_reg = 1; 4619 for (i = 0; i < MLX5_MTR_RTE_COLORS; i++) { 4620 struct mlx5_flow dev_flow = {0}; 4621 struct mlx5_flow_handle dev_handle = { {0} }; 4622 4623 if (policy->is_rss) { 4624 const void *rss_act = 4625 policy->act_cnt[i].rss->conf; 4626 struct rte_flow_action rss_actions[2] = { 4627 [0] = { 4628 .type = RTE_FLOW_ACTION_TYPE_RSS, 4629 .conf = rss_act 4630 }, 4631 [1] = { 4632 .type = RTE_FLOW_ACTION_TYPE_END, 4633 .conf = NULL 4634 } 4635 }; 4636 4637 dev_flow.handle = &dev_handle; 4638 dev_flow.ingress = attr->ingress; 4639 dev_flow.flow = flow; 4640 dev_flow.external = 0; 4641 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 4642 dev_flow.dv.transfer = attr->transfer; 4643 #endif 4644 /** 4645 * Translate RSS action to get rss hash fields. 4646 */ 4647 if (flow_drv_translate(dev, &dev_flow, attr, 4648 items, rss_actions, error)) 4649 goto exit; 4650 rss_desc_v[i] = wks->rss_desc; 4651 rss_desc_v[i].key_len = MLX5_RSS_HASH_KEY_LEN; 4652 rss_desc_v[i].hash_fields = 4653 dev_flow.hash_fields; 4654 rss_desc_v[i].queue_num = 4655 rss_desc_v[i].hash_fields ? 4656 rss_desc_v[i].queue_num : 1; 4657 rss_desc_v[i].tunnel = 4658 !!(dev_flow.handle->layers & 4659 MLX5_FLOW_LAYER_TUNNEL); 4660 } else { 4661 /* This is queue action. */ 4662 rss_desc_v[i] = wks->rss_desc; 4663 rss_desc_v[i].key_len = 0; 4664 rss_desc_v[i].hash_fields = 0; 4665 rss_desc_v[i].queue = 4666 &policy->act_cnt[i].queue; 4667 rss_desc_v[i].queue_num = 1; 4668 } 4669 rss_desc[i] = &rss_desc_v[i]; 4670 } 4671 sub_policy = flow_drv_meter_sub_policy_rss_prepare(dev, 4672 flow, policy, rss_desc); 4673 } else { 4674 enum mlx5_meter_domain mtr_domain = 4675 attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER : 4676 attr->egress ? MLX5_MTR_DOMAIN_EGRESS : 4677 MLX5_MTR_DOMAIN_INGRESS; 4678 sub_policy = policy->sub_policys[mtr_domain][0]; 4679 } 4680 if (!sub_policy) { 4681 rte_flow_error_set(error, EINVAL, 4682 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 4683 "Failed to get meter sub-policy."); 4684 goto exit; 4685 } 4686 exit: 4687 return sub_policy; 4688 } 4689 4690 /** 4691 * Split the meter flow. 4692 * 4693 * As meter flow will split to three sub flow, other than meter 4694 * action, the other actions make sense to only meter accepts 4695 * the packet. If it need to be dropped, no other additional 4696 * actions should be take. 4697 * 4698 * One kind of special action which decapsulates the L3 tunnel 4699 * header will be in the prefix sub flow, as not to take the 4700 * L3 tunnel header into account. 4701 * 4702 * @param[in] dev 4703 * Pointer to Ethernet device. 4704 * @param[in] flow 4705 * Parent flow structure pointer. 4706 * @param[in] fm 4707 * Pointer to flow meter structure. 4708 * @param[in] attr 4709 * Flow rule attributes. 4710 * @param[in] items 4711 * Pattern specification (list terminated by the END pattern item). 4712 * @param[out] sfx_items 4713 * Suffix flow match items (list terminated by the END pattern item). 4714 * @param[in] actions 4715 * Associated actions (list terminated by the END action). 4716 * @param[out] actions_sfx 4717 * Suffix flow actions. 4718 * @param[out] actions_pre 4719 * Prefix flow actions. 4720 * @param[out] mtr_flow_id 4721 * Pointer to meter flow id. 4722 * @param[out] error 4723 * Perform verbose error reporting if not NULL. 4724 * 4725 * @return 4726 * 0 on success, a negative errno value otherwise and rte_errno is set. 4727 */ 4728 static int 4729 flow_meter_split_prep(struct rte_eth_dev *dev, 4730 struct rte_flow *flow, 4731 struct mlx5_flow_meter_info *fm, 4732 const struct rte_flow_attr *attr, 4733 const struct rte_flow_item items[], 4734 struct rte_flow_item sfx_items[], 4735 const struct rte_flow_action actions[], 4736 struct rte_flow_action actions_sfx[], 4737 struct rte_flow_action actions_pre[], 4738 uint32_t *mtr_flow_id, 4739 struct rte_flow_error *error) 4740 { 4741 struct mlx5_priv *priv = dev->data->dev_private; 4742 struct rte_flow_action *tag_action = NULL; 4743 struct rte_flow_item *tag_item; 4744 struct mlx5_rte_flow_action_set_tag *set_tag; 4745 const struct rte_flow_action_raw_encap *raw_encap; 4746 const struct rte_flow_action_raw_decap *raw_decap; 4747 struct mlx5_rte_flow_item_tag *tag_item_spec; 4748 struct mlx5_rte_flow_item_tag *tag_item_mask; 4749 uint32_t tag_id = 0; 4750 struct rte_flow_item *vlan_item_dst = NULL; 4751 const struct rte_flow_item *vlan_item_src = NULL; 4752 struct rte_flow_action *hw_mtr_action; 4753 struct rte_flow_action *action_pre_head = NULL; 4754 int32_t flow_src_port = priv->representor_id; 4755 bool mtr_first; 4756 uint8_t mtr_id_offset = priv->mtr_reg_share ? MLX5_MTR_COLOR_BITS : 0; 4757 uint8_t mtr_reg_bits = priv->mtr_reg_share ? 4758 MLX5_MTR_IDLE_BITS_IN_COLOR_REG : MLX5_REG_BITS; 4759 uint32_t flow_id = 0; 4760 uint32_t flow_id_reversed = 0; 4761 uint8_t flow_id_bits = 0; 4762 int shift; 4763 4764 /* Prepare the suffix subflow items. */ 4765 tag_item = sfx_items++; 4766 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { 4767 struct mlx5_priv *port_priv; 4768 const struct rte_flow_item_port_id *pid_v; 4769 int item_type = items->type; 4770 4771 switch (item_type) { 4772 case RTE_FLOW_ITEM_TYPE_PORT_ID: 4773 pid_v = items->spec; 4774 MLX5_ASSERT(pid_v); 4775 port_priv = mlx5_port_to_eswitch_info(pid_v->id, false); 4776 if (!port_priv) 4777 return rte_flow_error_set(error, 4778 rte_errno, 4779 RTE_FLOW_ERROR_TYPE_ITEM_SPEC, 4780 pid_v, 4781 "Failed to get port info."); 4782 flow_src_port = port_priv->representor_id; 4783 memcpy(sfx_items, items, sizeof(*sfx_items)); 4784 sfx_items++; 4785 break; 4786 case RTE_FLOW_ITEM_TYPE_VLAN: 4787 /* Determine if copy vlan item below. */ 4788 vlan_item_src = items; 4789 vlan_item_dst = sfx_items++; 4790 vlan_item_dst->type = RTE_FLOW_ITEM_TYPE_VOID; 4791 break; 4792 default: 4793 break; 4794 } 4795 } 4796 sfx_items->type = RTE_FLOW_ITEM_TYPE_END; 4797 sfx_items++; 4798 mtr_first = priv->sh->meter_aso_en && 4799 (attr->egress || (attr->transfer && flow_src_port != UINT16_MAX)); 4800 /* For ASO meter, meter must be before tag in TX direction. */ 4801 if (mtr_first) { 4802 action_pre_head = actions_pre++; 4803 /* Leave space for tag action. */ 4804 tag_action = actions_pre++; 4805 } 4806 /* Prepare the actions for prefix and suffix flow. */ 4807 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 4808 struct rte_flow_action *action_cur = NULL; 4809 4810 switch (actions->type) { 4811 case RTE_FLOW_ACTION_TYPE_METER: 4812 if (mtr_first) { 4813 action_cur = action_pre_head; 4814 } else { 4815 /* Leave space for tag action. */ 4816 tag_action = actions_pre++; 4817 action_cur = actions_pre++; 4818 } 4819 break; 4820 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: 4821 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP: 4822 action_cur = actions_pre++; 4823 break; 4824 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 4825 raw_encap = actions->conf; 4826 if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE) 4827 action_cur = actions_pre++; 4828 break; 4829 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 4830 raw_decap = actions->conf; 4831 if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 4832 action_cur = actions_pre++; 4833 break; 4834 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 4835 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 4836 if (vlan_item_dst && vlan_item_src) { 4837 memcpy(vlan_item_dst, vlan_item_src, 4838 sizeof(*vlan_item_dst)); 4839 /* 4840 * Convert to internal match item, it is used 4841 * for vlan push and set vid. 4842 */ 4843 vlan_item_dst->type = (enum rte_flow_item_type) 4844 MLX5_RTE_FLOW_ITEM_TYPE_VLAN; 4845 } 4846 break; 4847 default: 4848 break; 4849 } 4850 if (!action_cur) 4851 action_cur = (fm->def_policy) ? 4852 actions_sfx++ : actions_pre++; 4853 memcpy(action_cur, actions, sizeof(struct rte_flow_action)); 4854 } 4855 /* Add end action to the actions. */ 4856 actions_sfx->type = RTE_FLOW_ACTION_TYPE_END; 4857 if (priv->sh->meter_aso_en) { 4858 /** 4859 * For ASO meter, need to add an extra jump action explicitly, 4860 * to jump from meter to policer table. 4861 */ 4862 struct mlx5_flow_meter_sub_policy *sub_policy; 4863 struct mlx5_flow_tbl_data_entry *tbl_data; 4864 4865 if (!fm->def_policy) { 4866 sub_policy = get_meter_sub_policy(dev, flow, 4867 fm->policy_id, attr, 4868 items, error); 4869 if (!sub_policy) 4870 return -rte_errno; 4871 } else { 4872 enum mlx5_meter_domain mtr_domain = 4873 attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER : 4874 attr->egress ? MLX5_MTR_DOMAIN_EGRESS : 4875 MLX5_MTR_DOMAIN_INGRESS; 4876 4877 sub_policy = 4878 &priv->sh->mtrmng->def_policy[mtr_domain]->sub_policy; 4879 } 4880 tbl_data = container_of(sub_policy->tbl_rsc, 4881 struct mlx5_flow_tbl_data_entry, tbl); 4882 hw_mtr_action = actions_pre++; 4883 hw_mtr_action->type = (enum rte_flow_action_type) 4884 MLX5_RTE_FLOW_ACTION_TYPE_JUMP; 4885 hw_mtr_action->conf = tbl_data->jump.action; 4886 } 4887 actions_pre->type = RTE_FLOW_ACTION_TYPE_END; 4888 actions_pre++; 4889 if (!tag_action) 4890 return rte_flow_error_set(error, ENOMEM, 4891 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 4892 "No tag action space."); 4893 if (!mtr_flow_id) { 4894 tag_action->type = RTE_FLOW_ACTION_TYPE_VOID; 4895 goto exit; 4896 } 4897 /* Only default-policy Meter creates mtr flow id. */ 4898 if (fm->def_policy) { 4899 mlx5_ipool_malloc(fm->flow_ipool, &tag_id); 4900 if (!tag_id) 4901 return rte_flow_error_set(error, ENOMEM, 4902 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 4903 "Failed to allocate meter flow id."); 4904 flow_id = tag_id - 1; 4905 flow_id_bits = (!flow_id) ? 1 : 4906 (MLX5_REG_BITS - __builtin_clz(flow_id)); 4907 if ((flow_id_bits + priv->sh->mtrmng->max_mtr_bits) > 4908 mtr_reg_bits) { 4909 mlx5_ipool_free(fm->flow_ipool, tag_id); 4910 return rte_flow_error_set(error, EINVAL, 4911 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 4912 "Meter flow id exceeds max limit."); 4913 } 4914 if (flow_id_bits > priv->sh->mtrmng->max_mtr_flow_bits) 4915 priv->sh->mtrmng->max_mtr_flow_bits = flow_id_bits; 4916 } 4917 /* Build tag actions and items for meter_id/meter flow_id. */ 4918 set_tag = (struct mlx5_rte_flow_action_set_tag *)actions_pre; 4919 tag_item_spec = (struct mlx5_rte_flow_item_tag *)sfx_items; 4920 tag_item_mask = tag_item_spec + 1; 4921 /* Both flow_id and meter_id share the same register. */ 4922 *set_tag = (struct mlx5_rte_flow_action_set_tag) { 4923 .id = (enum modify_reg)mlx5_flow_get_reg_id(dev, MLX5_MTR_ID, 4924 0, error), 4925 .offset = mtr_id_offset, 4926 .length = mtr_reg_bits, 4927 .data = flow->meter, 4928 }; 4929 /* 4930 * The color Reg bits used by flow_id are growing from 4931 * msb to lsb, so must do bit reverse for flow_id val in RegC. 4932 */ 4933 for (shift = 0; shift < flow_id_bits; shift++) 4934 flow_id_reversed = (flow_id_reversed << 1) | 4935 ((flow_id >> shift) & 0x1); 4936 set_tag->data |= 4937 flow_id_reversed << (mtr_reg_bits - flow_id_bits); 4938 tag_item_spec->id = set_tag->id; 4939 tag_item_spec->data = set_tag->data << mtr_id_offset; 4940 tag_item_mask->data = UINT32_MAX << mtr_id_offset; 4941 tag_action->type = (enum rte_flow_action_type) 4942 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 4943 tag_action->conf = set_tag; 4944 tag_item->type = (enum rte_flow_item_type) 4945 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 4946 tag_item->spec = tag_item_spec; 4947 tag_item->last = NULL; 4948 tag_item->mask = tag_item_mask; 4949 exit: 4950 if (mtr_flow_id) 4951 *mtr_flow_id = tag_id; 4952 return 0; 4953 } 4954 4955 /** 4956 * Split action list having QUEUE/RSS for metadata register copy. 4957 * 4958 * Once Q/RSS action is detected in user's action list, the flow action 4959 * should be split in order to copy metadata registers, which will happen in 4960 * RX_CP_TBL like, 4961 * - CQE->flow_tag := reg_c[1] (MARK) 4962 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 4963 * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL. 4964 * This is because the last action of each flow must be a terminal action 4965 * (QUEUE, RSS or DROP). 4966 * 4967 * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is 4968 * stored and kept in the mlx5_flow structure per each sub_flow. 4969 * 4970 * The Q/RSS action is replaced with, 4971 * - SET_TAG, setting the allocated flow ID to reg_c[2]. 4972 * And the following JUMP action is added at the end, 4973 * - JUMP, to RX_CP_TBL. 4974 * 4975 * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by 4976 * flow_create_split_metadata() routine. The flow will look like, 4977 * - If flow ID matches (reg_c[2]), perform Q/RSS. 4978 * 4979 * @param dev 4980 * Pointer to Ethernet device. 4981 * @param[out] split_actions 4982 * Pointer to store split actions to jump to CP_TBL. 4983 * @param[in] actions 4984 * Pointer to the list of original flow actions. 4985 * @param[in] qrss 4986 * Pointer to the Q/RSS action. 4987 * @param[in] actions_n 4988 * Number of original actions. 4989 * @param[out] error 4990 * Perform verbose error reporting if not NULL. 4991 * 4992 * @return 4993 * non-zero unique flow_id on success, otherwise 0 and 4994 * error/rte_error are set. 4995 */ 4996 static uint32_t 4997 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev, 4998 struct rte_flow_action *split_actions, 4999 const struct rte_flow_action *actions, 5000 const struct rte_flow_action *qrss, 5001 int actions_n, struct rte_flow_error *error) 5002 { 5003 struct mlx5_priv *priv = dev->data->dev_private; 5004 struct mlx5_rte_flow_action_set_tag *set_tag; 5005 struct rte_flow_action_jump *jump; 5006 const int qrss_idx = qrss - actions; 5007 uint32_t flow_id = 0; 5008 int ret = 0; 5009 5010 /* 5011 * Given actions will be split 5012 * - Replace QUEUE/RSS action with SET_TAG to set flow ID. 5013 * - Add jump to mreg CP_TBL. 5014 * As a result, there will be one more action. 5015 */ 5016 ++actions_n; 5017 memcpy(split_actions, actions, sizeof(*split_actions) * actions_n); 5018 set_tag = (void *)(split_actions + actions_n); 5019 /* 5020 * If tag action is not set to void(it means we are not the meter 5021 * suffix flow), add the tag action. Since meter suffix flow already 5022 * has the tag added. 5023 */ 5024 if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) { 5025 /* 5026 * Allocate the new subflow ID. This one is unique within 5027 * device and not shared with representors. Otherwise, 5028 * we would have to resolve multi-thread access synch 5029 * issue. Each flow on the shared device is appended 5030 * with source vport identifier, so the resulting 5031 * flows will be unique in the shared (by master and 5032 * representors) domain even if they have coinciding 5033 * IDs. 5034 */ 5035 mlx5_ipool_malloc(priv->sh->ipool 5036 [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &flow_id); 5037 if (!flow_id) 5038 return rte_flow_error_set(error, ENOMEM, 5039 RTE_FLOW_ERROR_TYPE_ACTION, 5040 NULL, "can't allocate id " 5041 "for split Q/RSS subflow"); 5042 /* Internal SET_TAG action to set flow ID. */ 5043 *set_tag = (struct mlx5_rte_flow_action_set_tag){ 5044 .data = flow_id, 5045 }; 5046 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error); 5047 if (ret < 0) 5048 return ret; 5049 set_tag->id = ret; 5050 /* Construct new actions array. */ 5051 /* Replace QUEUE/RSS action. */ 5052 split_actions[qrss_idx] = (struct rte_flow_action){ 5053 .type = (enum rte_flow_action_type) 5054 MLX5_RTE_FLOW_ACTION_TYPE_TAG, 5055 .conf = set_tag, 5056 }; 5057 } 5058 /* JUMP action to jump to mreg copy table (CP_TBL). */ 5059 jump = (void *)(set_tag + 1); 5060 *jump = (struct rte_flow_action_jump){ 5061 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 5062 }; 5063 split_actions[actions_n - 2] = (struct rte_flow_action){ 5064 .type = RTE_FLOW_ACTION_TYPE_JUMP, 5065 .conf = jump, 5066 }; 5067 split_actions[actions_n - 1] = (struct rte_flow_action){ 5068 .type = RTE_FLOW_ACTION_TYPE_END, 5069 }; 5070 return flow_id; 5071 } 5072 5073 /** 5074 * Extend the given action list for Tx metadata copy. 5075 * 5076 * Copy the given action list to the ext_actions and add flow metadata register 5077 * copy action in order to copy reg_a set by WQE to reg_c[0]. 5078 * 5079 * @param[out] ext_actions 5080 * Pointer to the extended action list. 5081 * @param[in] actions 5082 * Pointer to the list of actions. 5083 * @param[in] actions_n 5084 * Number of actions in the list. 5085 * @param[out] error 5086 * Perform verbose error reporting if not NULL. 5087 * @param[in] encap_idx 5088 * The encap action inndex. 5089 * 5090 * @return 5091 * 0 on success, negative value otherwise 5092 */ 5093 static int 5094 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev, 5095 struct rte_flow_action *ext_actions, 5096 const struct rte_flow_action *actions, 5097 int actions_n, struct rte_flow_error *error, 5098 int encap_idx) 5099 { 5100 struct mlx5_flow_action_copy_mreg *cp_mreg = 5101 (struct mlx5_flow_action_copy_mreg *) 5102 (ext_actions + actions_n + 1); 5103 int ret; 5104 5105 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 5106 if (ret < 0) 5107 return ret; 5108 cp_mreg->dst = ret; 5109 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error); 5110 if (ret < 0) 5111 return ret; 5112 cp_mreg->src = ret; 5113 if (encap_idx != 0) 5114 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx); 5115 if (encap_idx == actions_n - 1) { 5116 ext_actions[actions_n - 1] = (struct rte_flow_action){ 5117 .type = (enum rte_flow_action_type) 5118 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 5119 .conf = cp_mreg, 5120 }; 5121 ext_actions[actions_n] = (struct rte_flow_action){ 5122 .type = RTE_FLOW_ACTION_TYPE_END, 5123 }; 5124 } else { 5125 ext_actions[encap_idx] = (struct rte_flow_action){ 5126 .type = (enum rte_flow_action_type) 5127 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 5128 .conf = cp_mreg, 5129 }; 5130 memcpy(ext_actions + encap_idx + 1, actions + encap_idx, 5131 sizeof(*ext_actions) * (actions_n - encap_idx)); 5132 } 5133 return 0; 5134 } 5135 5136 /** 5137 * Check the match action from the action list. 5138 * 5139 * @param[in] actions 5140 * Pointer to the list of actions. 5141 * @param[in] attr 5142 * Flow rule attributes. 5143 * @param[in] action 5144 * The action to be check if exist. 5145 * @param[out] match_action_pos 5146 * Pointer to the position of the matched action if exists, otherwise is -1. 5147 * @param[out] qrss_action_pos 5148 * Pointer to the position of the Queue/RSS action if exists, otherwise is -1. 5149 * @param[out] modify_after_mirror 5150 * Pointer to the flag of modify action after FDB mirroring. 5151 * 5152 * @return 5153 * > 0 the total number of actions. 5154 * 0 if not found match action in action list. 5155 */ 5156 static int 5157 flow_check_match_action(const struct rte_flow_action actions[], 5158 const struct rte_flow_attr *attr, 5159 enum rte_flow_action_type action, 5160 int *match_action_pos, int *qrss_action_pos, 5161 int *modify_after_mirror) 5162 { 5163 const struct rte_flow_action_sample *sample; 5164 int actions_n = 0; 5165 uint32_t ratio = 0; 5166 int sub_type = 0; 5167 int flag = 0; 5168 int fdb_mirror = 0; 5169 5170 *match_action_pos = -1; 5171 *qrss_action_pos = -1; 5172 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 5173 if (actions->type == action) { 5174 flag = 1; 5175 *match_action_pos = actions_n; 5176 } 5177 switch (actions->type) { 5178 case RTE_FLOW_ACTION_TYPE_QUEUE: 5179 case RTE_FLOW_ACTION_TYPE_RSS: 5180 *qrss_action_pos = actions_n; 5181 break; 5182 case RTE_FLOW_ACTION_TYPE_SAMPLE: 5183 sample = actions->conf; 5184 ratio = sample->ratio; 5185 sub_type = ((const struct rte_flow_action *) 5186 (sample->actions))->type; 5187 if (ratio == 1 && attr->transfer) 5188 fdb_mirror = 1; 5189 break; 5190 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC: 5191 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST: 5192 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC: 5193 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST: 5194 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC: 5195 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST: 5196 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC: 5197 case RTE_FLOW_ACTION_TYPE_SET_TP_DST: 5198 case RTE_FLOW_ACTION_TYPE_DEC_TTL: 5199 case RTE_FLOW_ACTION_TYPE_SET_TTL: 5200 case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ: 5201 case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ: 5202 case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK: 5203 case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK: 5204 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP: 5205 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP: 5206 case RTE_FLOW_ACTION_TYPE_FLAG: 5207 case RTE_FLOW_ACTION_TYPE_MARK: 5208 case RTE_FLOW_ACTION_TYPE_SET_META: 5209 case RTE_FLOW_ACTION_TYPE_SET_TAG: 5210 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN: 5211 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 5212 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 5213 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: 5214 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: 5215 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP: 5216 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 5217 case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD: 5218 case RTE_FLOW_ACTION_TYPE_METER: 5219 if (fdb_mirror) 5220 *modify_after_mirror = 1; 5221 break; 5222 default: 5223 break; 5224 } 5225 actions_n++; 5226 } 5227 if (flag && fdb_mirror && !*modify_after_mirror) { 5228 /* FDB mirroring uses the destination array to implement 5229 * instead of FLOW_SAMPLER object. 5230 */ 5231 if (sub_type != RTE_FLOW_ACTION_TYPE_END) 5232 flag = 0; 5233 } 5234 /* Count RTE_FLOW_ACTION_TYPE_END. */ 5235 return flag ? actions_n + 1 : 0; 5236 } 5237 5238 #define SAMPLE_SUFFIX_ITEM 2 5239 5240 /** 5241 * Split the sample flow. 5242 * 5243 * As sample flow will split to two sub flow, sample flow with 5244 * sample action, the other actions will move to new suffix flow. 5245 * 5246 * Also add unique tag id with tag action in the sample flow, 5247 * the same tag id will be as match in the suffix flow. 5248 * 5249 * @param dev 5250 * Pointer to Ethernet device. 5251 * @param[in] add_tag 5252 * Add extra tag action flag. 5253 * @param[out] sfx_items 5254 * Suffix flow match items (list terminated by the END pattern item). 5255 * @param[in] actions 5256 * Associated actions (list terminated by the END action). 5257 * @param[out] actions_sfx 5258 * Suffix flow actions. 5259 * @param[out] actions_pre 5260 * Prefix flow actions. 5261 * @param[in] actions_n 5262 * The total number of actions. 5263 * @param[in] sample_action_pos 5264 * The sample action position. 5265 * @param[in] qrss_action_pos 5266 * The Queue/RSS action position. 5267 * @param[in] jump_table 5268 * Add extra jump action flag. 5269 * @param[out] error 5270 * Perform verbose error reporting if not NULL. 5271 * 5272 * @return 5273 * 0 on success, or unique flow_id, a negative errno value 5274 * otherwise and rte_errno is set. 5275 */ 5276 static int 5277 flow_sample_split_prep(struct rte_eth_dev *dev, 5278 int add_tag, 5279 struct rte_flow_item sfx_items[], 5280 const struct rte_flow_action actions[], 5281 struct rte_flow_action actions_sfx[], 5282 struct rte_flow_action actions_pre[], 5283 int actions_n, 5284 int sample_action_pos, 5285 int qrss_action_pos, 5286 int jump_table, 5287 struct rte_flow_error *error) 5288 { 5289 struct mlx5_priv *priv = dev->data->dev_private; 5290 struct mlx5_rte_flow_action_set_tag *set_tag; 5291 struct mlx5_rte_flow_item_tag *tag_spec; 5292 struct mlx5_rte_flow_item_tag *tag_mask; 5293 struct rte_flow_action_jump *jump_action; 5294 uint32_t tag_id = 0; 5295 int index; 5296 int append_index = 0; 5297 int ret; 5298 5299 if (sample_action_pos < 0) 5300 return rte_flow_error_set(error, EINVAL, 5301 RTE_FLOW_ERROR_TYPE_ACTION, 5302 NULL, "invalid position of sample " 5303 "action in list"); 5304 /* Prepare the actions for prefix and suffix flow. */ 5305 if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) { 5306 index = qrss_action_pos; 5307 /* Put the preceding the Queue/RSS action into prefix flow. */ 5308 if (index != 0) 5309 memcpy(actions_pre, actions, 5310 sizeof(struct rte_flow_action) * index); 5311 /* Put others preceding the sample action into prefix flow. */ 5312 if (sample_action_pos > index + 1) 5313 memcpy(actions_pre + index, actions + index + 1, 5314 sizeof(struct rte_flow_action) * 5315 (sample_action_pos - index - 1)); 5316 index = sample_action_pos - 1; 5317 /* Put Queue/RSS action into Suffix flow. */ 5318 memcpy(actions_sfx, actions + qrss_action_pos, 5319 sizeof(struct rte_flow_action)); 5320 actions_sfx++; 5321 } else { 5322 index = sample_action_pos; 5323 if (index != 0) 5324 memcpy(actions_pre, actions, 5325 sizeof(struct rte_flow_action) * index); 5326 } 5327 /* For CX5, add an extra tag action for NIC-RX and E-Switch ingress. 5328 * For CX6DX and above, metadata registers Cx preserve their value, 5329 * add an extra tag action for NIC-RX and E-Switch Domain. 5330 */ 5331 if (add_tag) { 5332 /* Prepare the prefix tag action. */ 5333 append_index++; 5334 set_tag = (void *)(actions_pre + actions_n + append_index); 5335 ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error); 5336 if (ret < 0) 5337 return ret; 5338 mlx5_ipool_malloc(priv->sh->ipool 5339 [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &tag_id); 5340 *set_tag = (struct mlx5_rte_flow_action_set_tag) { 5341 .id = ret, 5342 .data = tag_id, 5343 }; 5344 /* Prepare the suffix subflow items. */ 5345 tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM); 5346 tag_spec->data = tag_id; 5347 tag_spec->id = set_tag->id; 5348 tag_mask = tag_spec + 1; 5349 tag_mask->data = UINT32_MAX; 5350 sfx_items[0] = (struct rte_flow_item){ 5351 .type = (enum rte_flow_item_type) 5352 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 5353 .spec = tag_spec, 5354 .last = NULL, 5355 .mask = tag_mask, 5356 }; 5357 sfx_items[1] = (struct rte_flow_item){ 5358 .type = (enum rte_flow_item_type) 5359 RTE_FLOW_ITEM_TYPE_END, 5360 }; 5361 /* Prepare the tag action in prefix subflow. */ 5362 actions_pre[index++] = 5363 (struct rte_flow_action){ 5364 .type = (enum rte_flow_action_type) 5365 MLX5_RTE_FLOW_ACTION_TYPE_TAG, 5366 .conf = set_tag, 5367 }; 5368 } 5369 memcpy(actions_pre + index, actions + sample_action_pos, 5370 sizeof(struct rte_flow_action)); 5371 index += 1; 5372 /* For the modify action after the sample action in E-Switch mirroring, 5373 * Add the extra jump action in prefix subflow and jump into the next 5374 * table, then do the modify action in the new table. 5375 */ 5376 if (jump_table) { 5377 /* Prepare the prefix jump action. */ 5378 append_index++; 5379 jump_action = (void *)(actions_pre + actions_n + append_index); 5380 jump_action->group = jump_table; 5381 actions_pre[index++] = 5382 (struct rte_flow_action){ 5383 .type = (enum rte_flow_action_type) 5384 RTE_FLOW_ACTION_TYPE_JUMP, 5385 .conf = jump_action, 5386 }; 5387 } 5388 actions_pre[index] = (struct rte_flow_action){ 5389 .type = (enum rte_flow_action_type) 5390 RTE_FLOW_ACTION_TYPE_END, 5391 }; 5392 /* Put the actions after sample into Suffix flow. */ 5393 memcpy(actions_sfx, actions + sample_action_pos + 1, 5394 sizeof(struct rte_flow_action) * 5395 (actions_n - sample_action_pos - 1)); 5396 return tag_id; 5397 } 5398 5399 /** 5400 * The splitting for metadata feature. 5401 * 5402 * - Q/RSS action on NIC Rx should be split in order to pass by 5403 * the mreg copy table (RX_CP_TBL) and then it jumps to the 5404 * action table (RX_ACT_TBL) which has the split Q/RSS action. 5405 * 5406 * - All the actions on NIC Tx should have a mreg copy action to 5407 * copy reg_a from WQE to reg_c[0]. 5408 * 5409 * @param dev 5410 * Pointer to Ethernet device. 5411 * @param[in] flow 5412 * Parent flow structure pointer. 5413 * @param[in] attr 5414 * Flow rule attributes. 5415 * @param[in] items 5416 * Pattern specification (list terminated by the END pattern item). 5417 * @param[in] actions 5418 * Associated actions (list terminated by the END action). 5419 * @param[in] flow_split_info 5420 * Pointer to flow split info structure. 5421 * @param[out] error 5422 * Perform verbose error reporting if not NULL. 5423 * @return 5424 * 0 on success, negative value otherwise 5425 */ 5426 static int 5427 flow_create_split_metadata(struct rte_eth_dev *dev, 5428 struct rte_flow *flow, 5429 const struct rte_flow_attr *attr, 5430 const struct rte_flow_item items[], 5431 const struct rte_flow_action actions[], 5432 struct mlx5_flow_split_info *flow_split_info, 5433 struct rte_flow_error *error) 5434 { 5435 struct mlx5_priv *priv = dev->data->dev_private; 5436 struct mlx5_dev_config *config = &priv->config; 5437 const struct rte_flow_action *qrss = NULL; 5438 struct rte_flow_action *ext_actions = NULL; 5439 struct mlx5_flow *dev_flow = NULL; 5440 uint32_t qrss_id = 0; 5441 int mtr_sfx = 0; 5442 size_t act_size; 5443 int actions_n; 5444 int encap_idx; 5445 int ret; 5446 5447 /* Check whether extensive metadata feature is engaged. */ 5448 if (!config->dv_flow_en || 5449 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 5450 !mlx5_flow_ext_mreg_supported(dev)) 5451 return flow_create_split_inner(dev, flow, NULL, attr, items, 5452 actions, flow_split_info, error); 5453 actions_n = flow_parse_metadata_split_actions_info(actions, &qrss, 5454 &encap_idx); 5455 if (qrss) { 5456 /* Exclude hairpin flows from splitting. */ 5457 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) { 5458 const struct rte_flow_action_queue *queue; 5459 5460 queue = qrss->conf; 5461 if (mlx5_rxq_get_type(dev, queue->index) == 5462 MLX5_RXQ_TYPE_HAIRPIN) 5463 qrss = NULL; 5464 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) { 5465 const struct rte_flow_action_rss *rss; 5466 5467 rss = qrss->conf; 5468 if (mlx5_rxq_get_type(dev, rss->queue[0]) == 5469 MLX5_RXQ_TYPE_HAIRPIN) 5470 qrss = NULL; 5471 } 5472 } 5473 if (qrss) { 5474 /* Check if it is in meter suffix table. */ 5475 mtr_sfx = attr->group == (attr->transfer ? 5476 (MLX5_FLOW_TABLE_LEVEL_METER - 1) : 5477 MLX5_FLOW_TABLE_LEVEL_METER); 5478 /* 5479 * Q/RSS action on NIC Rx should be split in order to pass by 5480 * the mreg copy table (RX_CP_TBL) and then it jumps to the 5481 * action table (RX_ACT_TBL) which has the split Q/RSS action. 5482 */ 5483 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 5484 sizeof(struct rte_flow_action_set_tag) + 5485 sizeof(struct rte_flow_action_jump); 5486 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0, 5487 SOCKET_ID_ANY); 5488 if (!ext_actions) 5489 return rte_flow_error_set(error, ENOMEM, 5490 RTE_FLOW_ERROR_TYPE_ACTION, 5491 NULL, "no memory to split " 5492 "metadata flow"); 5493 /* 5494 * If we are the suffix flow of meter, tag already exist. 5495 * Set the tag action to void. 5496 */ 5497 if (mtr_sfx) 5498 ext_actions[qrss - actions].type = 5499 RTE_FLOW_ACTION_TYPE_VOID; 5500 else 5501 ext_actions[qrss - actions].type = 5502 (enum rte_flow_action_type) 5503 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 5504 /* 5505 * Create the new actions list with removed Q/RSS action 5506 * and appended set tag and jump to register copy table 5507 * (RX_CP_TBL). We should preallocate unique tag ID here 5508 * in advance, because it is needed for set tag action. 5509 */ 5510 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions, 5511 qrss, actions_n, error); 5512 if (!mtr_sfx && !qrss_id) { 5513 ret = -rte_errno; 5514 goto exit; 5515 } 5516 } else if (attr->egress && !attr->transfer) { 5517 /* 5518 * All the actions on NIC Tx should have a metadata register 5519 * copy action to copy reg_a from WQE to reg_c[meta] 5520 */ 5521 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 5522 sizeof(struct mlx5_flow_action_copy_mreg); 5523 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0, 5524 SOCKET_ID_ANY); 5525 if (!ext_actions) 5526 return rte_flow_error_set(error, ENOMEM, 5527 RTE_FLOW_ERROR_TYPE_ACTION, 5528 NULL, "no memory to split " 5529 "metadata flow"); 5530 /* Create the action list appended with copy register. */ 5531 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions, 5532 actions_n, error, encap_idx); 5533 if (ret < 0) 5534 goto exit; 5535 } 5536 /* Add the unmodified original or prefix subflow. */ 5537 ret = flow_create_split_inner(dev, flow, &dev_flow, attr, 5538 items, ext_actions ? ext_actions : 5539 actions, flow_split_info, error); 5540 if (ret < 0) 5541 goto exit; 5542 MLX5_ASSERT(dev_flow); 5543 if (qrss) { 5544 const struct rte_flow_attr q_attr = { 5545 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 5546 .ingress = 1, 5547 }; 5548 /* Internal PMD action to set register. */ 5549 struct mlx5_rte_flow_item_tag q_tag_spec = { 5550 .data = qrss_id, 5551 .id = REG_NON, 5552 }; 5553 struct rte_flow_item q_items[] = { 5554 { 5555 .type = (enum rte_flow_item_type) 5556 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 5557 .spec = &q_tag_spec, 5558 .last = NULL, 5559 .mask = NULL, 5560 }, 5561 { 5562 .type = RTE_FLOW_ITEM_TYPE_END, 5563 }, 5564 }; 5565 struct rte_flow_action q_actions[] = { 5566 { 5567 .type = qrss->type, 5568 .conf = qrss->conf, 5569 }, 5570 { 5571 .type = RTE_FLOW_ACTION_TYPE_END, 5572 }, 5573 }; 5574 uint64_t layers = flow_get_prefix_layer_flags(dev_flow); 5575 5576 /* 5577 * Configure the tag item only if there is no meter subflow. 5578 * Since tag is already marked in the meter suffix subflow 5579 * we can just use the meter suffix items as is. 5580 */ 5581 if (qrss_id) { 5582 /* Not meter subflow. */ 5583 MLX5_ASSERT(!mtr_sfx); 5584 /* 5585 * Put unique id in prefix flow due to it is destroyed 5586 * after suffix flow and id will be freed after there 5587 * is no actual flows with this id and identifier 5588 * reallocation becomes possible (for example, for 5589 * other flows in other threads). 5590 */ 5591 dev_flow->handle->split_flow_id = qrss_id; 5592 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, 5593 error); 5594 if (ret < 0) 5595 goto exit; 5596 q_tag_spec.id = ret; 5597 } 5598 dev_flow = NULL; 5599 /* Add suffix subflow to execute Q/RSS. */ 5600 flow_split_info->prefix_layers = layers; 5601 flow_split_info->prefix_mark = 0; 5602 ret = flow_create_split_inner(dev, flow, &dev_flow, 5603 &q_attr, mtr_sfx ? items : 5604 q_items, q_actions, 5605 flow_split_info, error); 5606 if (ret < 0) 5607 goto exit; 5608 /* qrss ID should be freed if failed. */ 5609 qrss_id = 0; 5610 MLX5_ASSERT(dev_flow); 5611 } 5612 5613 exit: 5614 /* 5615 * We do not destroy the partially created sub_flows in case of error. 5616 * These ones are included into parent flow list and will be destroyed 5617 * by flow_drv_destroy. 5618 */ 5619 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], 5620 qrss_id); 5621 mlx5_free(ext_actions); 5622 return ret; 5623 } 5624 5625 /** 5626 * Create meter internal drop flow with the original pattern. 5627 * 5628 * @param dev 5629 * Pointer to Ethernet device. 5630 * @param[in] flow 5631 * Parent flow structure pointer. 5632 * @param[in] attr 5633 * Flow rule attributes. 5634 * @param[in] items 5635 * Pattern specification (list terminated by the END pattern item). 5636 * @param[in] flow_split_info 5637 * Pointer to flow split info structure. 5638 * @param[in] fm 5639 * Pointer to flow meter structure. 5640 * @param[out] error 5641 * Perform verbose error reporting if not NULL. 5642 * @return 5643 * 0 on success, negative value otherwise 5644 */ 5645 static uint32_t 5646 flow_meter_create_drop_flow_with_org_pattern(struct rte_eth_dev *dev, 5647 struct rte_flow *flow, 5648 const struct rte_flow_attr *attr, 5649 const struct rte_flow_item items[], 5650 struct mlx5_flow_split_info *flow_split_info, 5651 struct mlx5_flow_meter_info *fm, 5652 struct rte_flow_error *error) 5653 { 5654 struct mlx5_flow *dev_flow = NULL; 5655 struct rte_flow_attr drop_attr = *attr; 5656 struct rte_flow_action drop_actions[3]; 5657 struct mlx5_flow_split_info drop_split_info = *flow_split_info; 5658 5659 MLX5_ASSERT(fm->drop_cnt); 5660 drop_actions[0].type = 5661 (enum rte_flow_action_type)MLX5_RTE_FLOW_ACTION_TYPE_COUNT; 5662 drop_actions[0].conf = (void *)(uintptr_t)fm->drop_cnt; 5663 drop_actions[1].type = RTE_FLOW_ACTION_TYPE_DROP; 5664 drop_actions[1].conf = NULL; 5665 drop_actions[2].type = RTE_FLOW_ACTION_TYPE_END; 5666 drop_actions[2].conf = NULL; 5667 drop_split_info.external = false; 5668 drop_split_info.skip_scale |= 1 << MLX5_SCALE_FLOW_GROUP_BIT; 5669 drop_split_info.table_id = MLX5_MTR_TABLE_ID_DROP; 5670 drop_attr.group = MLX5_FLOW_TABLE_LEVEL_METER; 5671 return flow_create_split_inner(dev, flow, &dev_flow, 5672 &drop_attr, items, drop_actions, 5673 &drop_split_info, error); 5674 } 5675 5676 /** 5677 * The splitting for meter feature. 5678 * 5679 * - The meter flow will be split to two flows as prefix and 5680 * suffix flow. The packets make sense only it pass the prefix 5681 * meter action. 5682 * 5683 * - Reg_C_5 is used for the packet to match betweend prefix and 5684 * suffix flow. 5685 * 5686 * @param dev 5687 * Pointer to Ethernet device. 5688 * @param[in] flow 5689 * Parent flow structure pointer. 5690 * @param[in] attr 5691 * Flow rule attributes. 5692 * @param[in] items 5693 * Pattern specification (list terminated by the END pattern item). 5694 * @param[in] actions 5695 * Associated actions (list terminated by the END action). 5696 * @param[in] flow_split_info 5697 * Pointer to flow split info structure. 5698 * @param[out] error 5699 * Perform verbose error reporting if not NULL. 5700 * @return 5701 * 0 on success, negative value otherwise 5702 */ 5703 static int 5704 flow_create_split_meter(struct rte_eth_dev *dev, 5705 struct rte_flow *flow, 5706 const struct rte_flow_attr *attr, 5707 const struct rte_flow_item items[], 5708 const struct rte_flow_action actions[], 5709 struct mlx5_flow_split_info *flow_split_info, 5710 struct rte_flow_error *error) 5711 { 5712 struct mlx5_priv *priv = dev->data->dev_private; 5713 struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace(); 5714 struct rte_flow_action *sfx_actions = NULL; 5715 struct rte_flow_action *pre_actions = NULL; 5716 struct rte_flow_item *sfx_items = NULL; 5717 struct mlx5_flow *dev_flow = NULL; 5718 struct rte_flow_attr sfx_attr = *attr; 5719 struct mlx5_flow_meter_info *fm = NULL; 5720 uint8_t skip_scale_restore; 5721 bool has_mtr = false; 5722 bool has_modify = false; 5723 bool set_mtr_reg = true; 5724 uint32_t meter_id = 0; 5725 uint32_t mtr_idx = 0; 5726 uint32_t mtr_flow_id = 0; 5727 size_t act_size; 5728 size_t item_size; 5729 int actions_n = 0; 5730 int ret = 0; 5731 5732 if (priv->mtr_en) 5733 actions_n = flow_check_meter_action(dev, actions, &has_mtr, 5734 &has_modify, &meter_id); 5735 if (has_mtr) { 5736 if (flow->meter) { 5737 fm = flow_dv_meter_find_by_idx(priv, flow->meter); 5738 if (!fm) 5739 return rte_flow_error_set(error, EINVAL, 5740 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5741 NULL, "Meter not found."); 5742 } else { 5743 fm = mlx5_flow_meter_find(priv, meter_id, &mtr_idx); 5744 if (!fm) 5745 return rte_flow_error_set(error, EINVAL, 5746 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5747 NULL, "Meter not found."); 5748 ret = mlx5_flow_meter_attach(priv, fm, 5749 &sfx_attr, error); 5750 if (ret) 5751 return -rte_errno; 5752 flow->meter = mtr_idx; 5753 } 5754 MLX5_ASSERT(wks); 5755 wks->fm = fm; 5756 /* 5757 * If it isn't default-policy Meter, and 5758 * 1. There's no action in flow to change 5759 * packet (modify/encap/decap etc.), OR 5760 * 2. No drop count needed for this meter. 5761 * no need to use regC to save meter id anymore. 5762 */ 5763 if (!fm->def_policy && (!has_modify || !fm->drop_cnt)) 5764 set_mtr_reg = false; 5765 /* Prefix actions: meter, decap, encap, tag, jump, end. */ 5766 act_size = sizeof(struct rte_flow_action) * (actions_n + 6) + 5767 sizeof(struct mlx5_rte_flow_action_set_tag); 5768 /* Suffix items: tag, vlan, port id, end. */ 5769 #define METER_SUFFIX_ITEM 4 5770 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM + 5771 sizeof(struct mlx5_rte_flow_item_tag) * 2; 5772 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size), 5773 0, SOCKET_ID_ANY); 5774 if (!sfx_actions) 5775 return rte_flow_error_set(error, ENOMEM, 5776 RTE_FLOW_ERROR_TYPE_ACTION, 5777 NULL, "no memory to split " 5778 "meter flow"); 5779 sfx_items = (struct rte_flow_item *)((char *)sfx_actions + 5780 act_size); 5781 /* There's no suffix flow for meter of non-default policy. */ 5782 if (!fm->def_policy) 5783 pre_actions = sfx_actions + 1; 5784 else 5785 pre_actions = sfx_actions + actions_n; 5786 ret = flow_meter_split_prep(dev, flow, fm, &sfx_attr, 5787 items, sfx_items, actions, 5788 sfx_actions, pre_actions, 5789 (set_mtr_reg ? &mtr_flow_id : NULL), 5790 error); 5791 if (ret) { 5792 ret = -rte_errno; 5793 goto exit; 5794 } 5795 /* Add the prefix subflow. */ 5796 flow_split_info->prefix_mark = 0; 5797 skip_scale_restore = flow_split_info->skip_scale; 5798 flow_split_info->skip_scale |= 5799 1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT; 5800 ret = flow_create_split_inner(dev, flow, &dev_flow, 5801 attr, items, pre_actions, 5802 flow_split_info, error); 5803 flow_split_info->skip_scale = skip_scale_restore; 5804 if (ret) { 5805 if (mtr_flow_id) 5806 mlx5_ipool_free(fm->flow_ipool, mtr_flow_id); 5807 ret = -rte_errno; 5808 goto exit; 5809 } 5810 if (mtr_flow_id) { 5811 dev_flow->handle->split_flow_id = mtr_flow_id; 5812 dev_flow->handle->is_meter_flow_id = 1; 5813 } 5814 if (!fm->def_policy) { 5815 if (!set_mtr_reg && fm->drop_cnt) 5816 ret = 5817 flow_meter_create_drop_flow_with_org_pattern(dev, flow, 5818 &sfx_attr, items, 5819 flow_split_info, 5820 fm, error); 5821 goto exit; 5822 } 5823 /* Setting the sfx group atrr. */ 5824 sfx_attr.group = sfx_attr.transfer ? 5825 (MLX5_FLOW_TABLE_LEVEL_METER - 1) : 5826 MLX5_FLOW_TABLE_LEVEL_METER; 5827 flow_split_info->prefix_layers = 5828 flow_get_prefix_layer_flags(dev_flow); 5829 flow_split_info->prefix_mark = dev_flow->handle->mark; 5830 flow_split_info->table_id = MLX5_MTR_TABLE_ID_SUFFIX; 5831 } 5832 /* Add the prefix subflow. */ 5833 ret = flow_create_split_metadata(dev, flow, 5834 &sfx_attr, sfx_items ? 5835 sfx_items : items, 5836 sfx_actions ? sfx_actions : actions, 5837 flow_split_info, error); 5838 exit: 5839 if (sfx_actions) 5840 mlx5_free(sfx_actions); 5841 return ret; 5842 } 5843 5844 /** 5845 * The splitting for sample feature. 5846 * 5847 * Once Sample action is detected in the action list, the flow actions should 5848 * be split into prefix sub flow and suffix sub flow. 5849 * 5850 * The original items remain in the prefix sub flow, all actions preceding the 5851 * sample action and the sample action itself will be copied to the prefix 5852 * sub flow, the actions following the sample action will be copied to the 5853 * suffix sub flow, Queue action always be located in the suffix sub flow. 5854 * 5855 * In order to make the packet from prefix sub flow matches with suffix sub 5856 * flow, an extra tag action be added into prefix sub flow, and the suffix sub 5857 * flow uses tag item with the unique flow id. 5858 * 5859 * @param dev 5860 * Pointer to Ethernet device. 5861 * @param[in] flow 5862 * Parent flow structure pointer. 5863 * @param[in] attr 5864 * Flow rule attributes. 5865 * @param[in] items 5866 * Pattern specification (list terminated by the END pattern item). 5867 * @param[in] actions 5868 * Associated actions (list terminated by the END action). 5869 * @param[in] flow_split_info 5870 * Pointer to flow split info structure. 5871 * @param[out] error 5872 * Perform verbose error reporting if not NULL. 5873 * @return 5874 * 0 on success, negative value otherwise 5875 */ 5876 static int 5877 flow_create_split_sample(struct rte_eth_dev *dev, 5878 struct rte_flow *flow, 5879 const struct rte_flow_attr *attr, 5880 const struct rte_flow_item items[], 5881 const struct rte_flow_action actions[], 5882 struct mlx5_flow_split_info *flow_split_info, 5883 struct rte_flow_error *error) 5884 { 5885 struct mlx5_priv *priv = dev->data->dev_private; 5886 struct rte_flow_action *sfx_actions = NULL; 5887 struct rte_flow_action *pre_actions = NULL; 5888 struct rte_flow_item *sfx_items = NULL; 5889 struct mlx5_flow *dev_flow = NULL; 5890 struct rte_flow_attr sfx_attr = *attr; 5891 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 5892 struct mlx5_flow_dv_sample_resource *sample_res; 5893 struct mlx5_flow_tbl_data_entry *sfx_tbl_data; 5894 struct mlx5_flow_tbl_resource *sfx_tbl; 5895 #endif 5896 size_t act_size; 5897 size_t item_size; 5898 uint32_t fdb_tx = 0; 5899 int32_t tag_id = 0; 5900 int actions_n = 0; 5901 int sample_action_pos; 5902 int qrss_action_pos; 5903 int add_tag = 0; 5904 int modify_after_mirror = 0; 5905 uint16_t jump_table = 0; 5906 const uint32_t next_ft_step = 1; 5907 int ret = 0; 5908 5909 if (priv->sampler_en) 5910 actions_n = flow_check_match_action(actions, attr, 5911 RTE_FLOW_ACTION_TYPE_SAMPLE, 5912 &sample_action_pos, &qrss_action_pos, 5913 &modify_after_mirror); 5914 if (actions_n) { 5915 /* The prefix actions must includes sample, tag, end. */ 5916 act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1) 5917 + sizeof(struct mlx5_rte_flow_action_set_tag); 5918 item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM + 5919 sizeof(struct mlx5_rte_flow_item_tag) * 2; 5920 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + 5921 item_size), 0, SOCKET_ID_ANY); 5922 if (!sfx_actions) 5923 return rte_flow_error_set(error, ENOMEM, 5924 RTE_FLOW_ERROR_TYPE_ACTION, 5925 NULL, "no memory to split " 5926 "sample flow"); 5927 /* The representor_id is UINT16_MAX for uplink. */ 5928 fdb_tx = (attr->transfer && priv->representor_id != UINT16_MAX); 5929 /* 5930 * When reg_c_preserve is set, metadata registers Cx preserve 5931 * their value even through packet duplication. 5932 */ 5933 add_tag = (!fdb_tx || priv->config.hca_attr.reg_c_preserve); 5934 if (add_tag) 5935 sfx_items = (struct rte_flow_item *)((char *)sfx_actions 5936 + act_size); 5937 if (modify_after_mirror) 5938 jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR + 5939 next_ft_step; 5940 pre_actions = sfx_actions + actions_n; 5941 tag_id = flow_sample_split_prep(dev, add_tag, sfx_items, 5942 actions, sfx_actions, 5943 pre_actions, actions_n, 5944 sample_action_pos, 5945 qrss_action_pos, jump_table, 5946 error); 5947 if (tag_id < 0 || (add_tag && !tag_id)) { 5948 ret = -rte_errno; 5949 goto exit; 5950 } 5951 if (modify_after_mirror) 5952 flow_split_info->skip_scale = 5953 1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT; 5954 /* Add the prefix subflow. */ 5955 ret = flow_create_split_inner(dev, flow, &dev_flow, attr, 5956 items, pre_actions, 5957 flow_split_info, error); 5958 if (ret) { 5959 ret = -rte_errno; 5960 goto exit; 5961 } 5962 dev_flow->handle->split_flow_id = tag_id; 5963 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 5964 if (!modify_after_mirror) { 5965 /* Set the sfx group attr. */ 5966 sample_res = (struct mlx5_flow_dv_sample_resource *) 5967 dev_flow->dv.sample_res; 5968 sfx_tbl = (struct mlx5_flow_tbl_resource *) 5969 sample_res->normal_path_tbl; 5970 sfx_tbl_data = container_of(sfx_tbl, 5971 struct mlx5_flow_tbl_data_entry, 5972 tbl); 5973 sfx_attr.group = sfx_attr.transfer ? 5974 (sfx_tbl_data->level - 1) : sfx_tbl_data->level; 5975 } else { 5976 MLX5_ASSERT(attr->transfer); 5977 sfx_attr.group = jump_table; 5978 } 5979 flow_split_info->prefix_layers = 5980 flow_get_prefix_layer_flags(dev_flow); 5981 flow_split_info->prefix_mark = dev_flow->handle->mark; 5982 /* Suffix group level already be scaled with factor, set 5983 * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale 5984 * again in translation. 5985 */ 5986 flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT; 5987 #endif 5988 } 5989 /* Add the suffix subflow. */ 5990 ret = flow_create_split_meter(dev, flow, &sfx_attr, 5991 sfx_items ? sfx_items : items, 5992 sfx_actions ? sfx_actions : actions, 5993 flow_split_info, error); 5994 exit: 5995 if (sfx_actions) 5996 mlx5_free(sfx_actions); 5997 return ret; 5998 } 5999 6000 /** 6001 * Split the flow to subflow set. The splitters might be linked 6002 * in the chain, like this: 6003 * flow_create_split_outer() calls: 6004 * flow_create_split_meter() calls: 6005 * flow_create_split_metadata(meter_subflow_0) calls: 6006 * flow_create_split_inner(metadata_subflow_0) 6007 * flow_create_split_inner(metadata_subflow_1) 6008 * flow_create_split_inner(metadata_subflow_2) 6009 * flow_create_split_metadata(meter_subflow_1) calls: 6010 * flow_create_split_inner(metadata_subflow_0) 6011 * flow_create_split_inner(metadata_subflow_1) 6012 * flow_create_split_inner(metadata_subflow_2) 6013 * 6014 * This provide flexible way to add new levels of flow splitting. 6015 * The all of successfully created subflows are included to the 6016 * parent flow dev_flow list. 6017 * 6018 * @param dev 6019 * Pointer to Ethernet device. 6020 * @param[in] flow 6021 * Parent flow structure pointer. 6022 * @param[in] attr 6023 * Flow rule attributes. 6024 * @param[in] items 6025 * Pattern specification (list terminated by the END pattern item). 6026 * @param[in] actions 6027 * Associated actions (list terminated by the END action). 6028 * @param[in] flow_split_info 6029 * Pointer to flow split info structure. 6030 * @param[out] error 6031 * Perform verbose error reporting if not NULL. 6032 * @return 6033 * 0 on success, negative value otherwise 6034 */ 6035 static int 6036 flow_create_split_outer(struct rte_eth_dev *dev, 6037 struct rte_flow *flow, 6038 const struct rte_flow_attr *attr, 6039 const struct rte_flow_item items[], 6040 const struct rte_flow_action actions[], 6041 struct mlx5_flow_split_info *flow_split_info, 6042 struct rte_flow_error *error) 6043 { 6044 int ret; 6045 6046 ret = flow_create_split_sample(dev, flow, attr, items, 6047 actions, flow_split_info, error); 6048 MLX5_ASSERT(ret <= 0); 6049 return ret; 6050 } 6051 6052 static inline struct mlx5_flow_tunnel * 6053 flow_tunnel_from_rule(const struct mlx5_flow *flow) 6054 { 6055 struct mlx5_flow_tunnel *tunnel; 6056 6057 #pragma GCC diagnostic push 6058 #pragma GCC diagnostic ignored "-Wcast-qual" 6059 tunnel = (typeof(tunnel))flow->tunnel; 6060 #pragma GCC diagnostic pop 6061 6062 return tunnel; 6063 } 6064 6065 /** 6066 * Adjust flow RSS workspace if needed. 6067 * 6068 * @param wks 6069 * Pointer to thread flow work space. 6070 * @param rss_desc 6071 * Pointer to RSS descriptor. 6072 * @param[in] nrssq_num 6073 * New RSS queue number. 6074 * 6075 * @return 6076 * 0 on success, -1 otherwise and rte_errno is set. 6077 */ 6078 static int 6079 flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks, 6080 struct mlx5_flow_rss_desc *rss_desc, 6081 uint32_t nrssq_num) 6082 { 6083 if (likely(nrssq_num <= wks->rssq_num)) 6084 return 0; 6085 rss_desc->queue = realloc(rss_desc->queue, 6086 sizeof(*rss_desc->queue) * RTE_ALIGN(nrssq_num, 2)); 6087 if (!rss_desc->queue) { 6088 rte_errno = ENOMEM; 6089 return -1; 6090 } 6091 wks->rssq_num = RTE_ALIGN(nrssq_num, 2); 6092 return 0; 6093 } 6094 6095 /** 6096 * Create a flow and add it to @p list. 6097 * 6098 * @param dev 6099 * Pointer to Ethernet device. 6100 * @param list 6101 * Pointer to a TAILQ flow list. If this parameter NULL, 6102 * no list insertion occurred, flow is just created, 6103 * this is caller's responsibility to track the 6104 * created flow. 6105 * @param[in] attr 6106 * Flow rule attributes. 6107 * @param[in] items 6108 * Pattern specification (list terminated by the END pattern item). 6109 * @param[in] actions 6110 * Associated actions (list terminated by the END action). 6111 * @param[in] external 6112 * This flow rule is created by request external to PMD. 6113 * @param[out] error 6114 * Perform verbose error reporting if not NULL. 6115 * 6116 * @return 6117 * A flow index on success, 0 otherwise and rte_errno is set. 6118 */ 6119 static uint32_t 6120 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 6121 const struct rte_flow_attr *attr, 6122 const struct rte_flow_item items[], 6123 const struct rte_flow_action original_actions[], 6124 bool external, struct rte_flow_error *error) 6125 { 6126 struct mlx5_priv *priv = dev->data->dev_private; 6127 struct rte_flow *flow = NULL; 6128 struct mlx5_flow *dev_flow; 6129 const struct rte_flow_action_rss *rss = NULL; 6130 struct mlx5_translated_action_handle 6131 indir_actions[MLX5_MAX_INDIRECT_ACTIONS]; 6132 int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS; 6133 union { 6134 struct mlx5_flow_expand_rss buf; 6135 uint8_t buffer[2048]; 6136 } expand_buffer; 6137 union { 6138 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 6139 uint8_t buffer[2048]; 6140 } actions_rx; 6141 union { 6142 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 6143 uint8_t buffer[2048]; 6144 } actions_hairpin_tx; 6145 union { 6146 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS]; 6147 uint8_t buffer[2048]; 6148 } items_tx; 6149 struct mlx5_flow_expand_rss *buf = &expand_buffer.buf; 6150 struct mlx5_flow_rss_desc *rss_desc; 6151 const struct rte_flow_action *p_actions_rx; 6152 uint32_t i; 6153 uint32_t idx = 0; 6154 int hairpin_flow; 6155 struct rte_flow_attr attr_tx = { .priority = 0 }; 6156 const struct rte_flow_action *actions; 6157 struct rte_flow_action *translated_actions = NULL; 6158 struct mlx5_flow_tunnel *tunnel; 6159 struct tunnel_default_miss_ctx default_miss_ctx = { 0, }; 6160 struct mlx5_flow_workspace *wks = mlx5_flow_push_thread_workspace(); 6161 struct mlx5_flow_split_info flow_split_info = { 6162 .external = !!external, 6163 .skip_scale = 0, 6164 .flow_idx = 0, 6165 .prefix_mark = 0, 6166 .prefix_layers = 0, 6167 .table_id = 0 6168 }; 6169 int ret; 6170 6171 MLX5_ASSERT(wks); 6172 rss_desc = &wks->rss_desc; 6173 ret = flow_action_handles_translate(dev, original_actions, 6174 indir_actions, 6175 &indir_actions_n, 6176 &translated_actions, error); 6177 if (ret < 0) { 6178 MLX5_ASSERT(translated_actions == NULL); 6179 return 0; 6180 } 6181 actions = translated_actions ? translated_actions : original_actions; 6182 p_actions_rx = actions; 6183 hairpin_flow = flow_check_hairpin_split(dev, attr, actions); 6184 ret = flow_drv_validate(dev, attr, items, p_actions_rx, 6185 external, hairpin_flow, error); 6186 if (ret < 0) 6187 goto error_before_hairpin_split; 6188 flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx); 6189 if (!flow) { 6190 rte_errno = ENOMEM; 6191 goto error_before_hairpin_split; 6192 } 6193 if (hairpin_flow > 0) { 6194 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) { 6195 rte_errno = EINVAL; 6196 goto error_before_hairpin_split; 6197 } 6198 flow_hairpin_split(dev, actions, actions_rx.actions, 6199 actions_hairpin_tx.actions, items_tx.items, 6200 idx); 6201 p_actions_rx = actions_rx.actions; 6202 } 6203 flow_split_info.flow_idx = idx; 6204 flow->drv_type = flow_get_drv_type(dev, attr); 6205 MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN && 6206 flow->drv_type < MLX5_FLOW_TYPE_MAX); 6207 memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue)); 6208 /* RSS Action only works on NIC RX domain */ 6209 if (attr->ingress && !attr->transfer) 6210 rss = flow_get_rss_action(dev, p_actions_rx); 6211 if (rss) { 6212 if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num)) 6213 return 0; 6214 /* 6215 * The following information is required by 6216 * mlx5_flow_hashfields_adjust() in advance. 6217 */ 6218 rss_desc->level = rss->level; 6219 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */ 6220 rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types; 6221 } 6222 flow->dev_handles = 0; 6223 if (rss && rss->types) { 6224 unsigned int graph_root; 6225 6226 graph_root = find_graph_root(items, rss->level); 6227 ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer), 6228 items, rss->types, 6229 mlx5_support_expansion, graph_root); 6230 MLX5_ASSERT(ret > 0 && 6231 (unsigned int)ret < sizeof(expand_buffer.buffer)); 6232 if (rte_log_can_log(mlx5_logtype, RTE_LOG_DEBUG)) { 6233 for (i = 0; i < buf->entries; ++i) 6234 mlx5_dbg__print_pattern(buf->entry[i].pattern); 6235 } 6236 } else { 6237 buf->entries = 1; 6238 buf->entry[0].pattern = (void *)(uintptr_t)items; 6239 } 6240 rss_desc->shared_rss = flow_get_shared_rss_action(dev, indir_actions, 6241 indir_actions_n); 6242 for (i = 0; i < buf->entries; ++i) { 6243 /* Initialize flow split data. */ 6244 flow_split_info.prefix_layers = 0; 6245 flow_split_info.prefix_mark = 0; 6246 flow_split_info.skip_scale = 0; 6247 /* 6248 * The splitter may create multiple dev_flows, 6249 * depending on configuration. In the simplest 6250 * case it just creates unmodified original flow. 6251 */ 6252 ret = flow_create_split_outer(dev, flow, attr, 6253 buf->entry[i].pattern, 6254 p_actions_rx, &flow_split_info, 6255 error); 6256 if (ret < 0) 6257 goto error; 6258 if (is_flow_tunnel_steer_rule(wks->flows[0].tof_type)) { 6259 ret = flow_tunnel_add_default_miss(dev, flow, attr, 6260 p_actions_rx, 6261 idx, 6262 wks->flows[0].tunnel, 6263 &default_miss_ctx, 6264 error); 6265 if (ret < 0) { 6266 mlx5_free(default_miss_ctx.queue); 6267 goto error; 6268 } 6269 } 6270 } 6271 /* Create the tx flow. */ 6272 if (hairpin_flow) { 6273 attr_tx.group = MLX5_HAIRPIN_TX_TABLE; 6274 attr_tx.ingress = 0; 6275 attr_tx.egress = 1; 6276 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items, 6277 actions_hairpin_tx.actions, 6278 idx, error); 6279 if (!dev_flow) 6280 goto error; 6281 dev_flow->flow = flow; 6282 dev_flow->external = 0; 6283 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 6284 dev_flow->handle, next); 6285 ret = flow_drv_translate(dev, dev_flow, &attr_tx, 6286 items_tx.items, 6287 actions_hairpin_tx.actions, error); 6288 if (ret < 0) 6289 goto error; 6290 } 6291 /* 6292 * Update the metadata register copy table. If extensive 6293 * metadata feature is enabled and registers are supported 6294 * we might create the extra rte_flow for each unique 6295 * MARK/FLAG action ID. 6296 * 6297 * The table is updated for ingress Flows only, because 6298 * the egress Flows belong to the different device and 6299 * copy table should be updated in peer NIC Rx domain. 6300 */ 6301 if (attr->ingress && 6302 (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) { 6303 ret = flow_mreg_update_copy_table(dev, flow, actions, error); 6304 if (ret) 6305 goto error; 6306 } 6307 /* 6308 * If the flow is external (from application) OR device is started, 6309 * OR mreg discover, then apply immediately. 6310 */ 6311 if (external || dev->data->dev_started || 6312 (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP && 6313 attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) { 6314 ret = flow_drv_apply(dev, flow, error); 6315 if (ret < 0) 6316 goto error; 6317 } 6318 if (list) { 6319 rte_spinlock_lock(&priv->flow_list_lock); 6320 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx, 6321 flow, next); 6322 rte_spinlock_unlock(&priv->flow_list_lock); 6323 } 6324 flow_rxq_flags_set(dev, flow); 6325 rte_free(translated_actions); 6326 tunnel = flow_tunnel_from_rule(wks->flows); 6327 if (tunnel) { 6328 flow->tunnel = 1; 6329 flow->tunnel_id = tunnel->tunnel_id; 6330 __atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED); 6331 mlx5_free(default_miss_ctx.queue); 6332 } 6333 mlx5_flow_pop_thread_workspace(); 6334 return idx; 6335 error: 6336 MLX5_ASSERT(flow); 6337 ret = rte_errno; /* Save rte_errno before cleanup. */ 6338 flow_mreg_del_copy_action(dev, flow); 6339 flow_drv_destroy(dev, flow); 6340 if (rss_desc->shared_rss) 6341 __atomic_sub_fetch(&((struct mlx5_shared_action_rss *) 6342 mlx5_ipool_get 6343 (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], 6344 rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED); 6345 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx); 6346 rte_errno = ret; /* Restore rte_errno. */ 6347 ret = rte_errno; 6348 rte_errno = ret; 6349 mlx5_flow_pop_thread_workspace(); 6350 error_before_hairpin_split: 6351 rte_free(translated_actions); 6352 return 0; 6353 } 6354 6355 /** 6356 * Create a dedicated flow rule on e-switch table 0 (root table), to direct all 6357 * incoming packets to table 1. 6358 * 6359 * Other flow rules, requested for group n, will be created in 6360 * e-switch table n+1. 6361 * Jump action to e-switch group n will be created to group n+1. 6362 * 6363 * Used when working in switchdev mode, to utilise advantages of table 1 6364 * and above. 6365 * 6366 * @param dev 6367 * Pointer to Ethernet device. 6368 * 6369 * @return 6370 * Pointer to flow on success, NULL otherwise and rte_errno is set. 6371 */ 6372 struct rte_flow * 6373 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev) 6374 { 6375 const struct rte_flow_attr attr = { 6376 .group = 0, 6377 .priority = 0, 6378 .ingress = 1, 6379 .egress = 0, 6380 .transfer = 1, 6381 }; 6382 const struct rte_flow_item pattern = { 6383 .type = RTE_FLOW_ITEM_TYPE_END, 6384 }; 6385 struct rte_flow_action_jump jump = { 6386 .group = 1, 6387 }; 6388 const struct rte_flow_action actions[] = { 6389 { 6390 .type = RTE_FLOW_ACTION_TYPE_JUMP, 6391 .conf = &jump, 6392 }, 6393 { 6394 .type = RTE_FLOW_ACTION_TYPE_END, 6395 }, 6396 }; 6397 struct mlx5_priv *priv = dev->data->dev_private; 6398 struct rte_flow_error error; 6399 6400 return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows, 6401 &attr, &pattern, 6402 actions, false, &error); 6403 } 6404 6405 /** 6406 * Validate a flow supported by the NIC. 6407 * 6408 * @see rte_flow_validate() 6409 * @see rte_flow_ops 6410 */ 6411 int 6412 mlx5_flow_validate(struct rte_eth_dev *dev, 6413 const struct rte_flow_attr *attr, 6414 const struct rte_flow_item items[], 6415 const struct rte_flow_action original_actions[], 6416 struct rte_flow_error *error) 6417 { 6418 int hairpin_flow; 6419 struct mlx5_translated_action_handle 6420 indir_actions[MLX5_MAX_INDIRECT_ACTIONS]; 6421 int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS; 6422 const struct rte_flow_action *actions; 6423 struct rte_flow_action *translated_actions = NULL; 6424 int ret = flow_action_handles_translate(dev, original_actions, 6425 indir_actions, 6426 &indir_actions_n, 6427 &translated_actions, error); 6428 6429 if (ret) 6430 return ret; 6431 actions = translated_actions ? translated_actions : original_actions; 6432 hairpin_flow = flow_check_hairpin_split(dev, attr, actions); 6433 ret = flow_drv_validate(dev, attr, items, actions, 6434 true, hairpin_flow, error); 6435 rte_free(translated_actions); 6436 return ret; 6437 } 6438 6439 /** 6440 * Create a flow. 6441 * 6442 * @see rte_flow_create() 6443 * @see rte_flow_ops 6444 */ 6445 struct rte_flow * 6446 mlx5_flow_create(struct rte_eth_dev *dev, 6447 const struct rte_flow_attr *attr, 6448 const struct rte_flow_item items[], 6449 const struct rte_flow_action actions[], 6450 struct rte_flow_error *error) 6451 { 6452 struct mlx5_priv *priv = dev->data->dev_private; 6453 6454 /* 6455 * If the device is not started yet, it is not allowed to created a 6456 * flow from application. PMD default flows and traffic control flows 6457 * are not affected. 6458 */ 6459 if (unlikely(!dev->data->dev_started)) { 6460 DRV_LOG(DEBUG, "port %u is not started when " 6461 "inserting a flow", dev->data->port_id); 6462 rte_flow_error_set(error, ENODEV, 6463 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 6464 NULL, 6465 "port not started"); 6466 return NULL; 6467 } 6468 6469 return (void *)(uintptr_t)flow_list_create(dev, &priv->flows, 6470 attr, items, actions, true, error); 6471 } 6472 6473 /** 6474 * Destroy a flow in a list. 6475 * 6476 * @param dev 6477 * Pointer to Ethernet device. 6478 * @param list 6479 * Pointer to the Indexed flow list. If this parameter NULL, 6480 * there is no flow removal from the list. Be noted that as 6481 * flow is add to the indexed list, memory of the indexed 6482 * list points to maybe changed as flow destroyed. 6483 * @param[in] flow_idx 6484 * Index of flow to destroy. 6485 */ 6486 static void 6487 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 6488 uint32_t flow_idx) 6489 { 6490 struct mlx5_priv *priv = dev->data->dev_private; 6491 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 6492 [MLX5_IPOOL_RTE_FLOW], flow_idx); 6493 6494 if (!flow) 6495 return; 6496 /* 6497 * Update RX queue flags only if port is started, otherwise it is 6498 * already clean. 6499 */ 6500 if (dev->data->dev_started) 6501 flow_rxq_flags_trim(dev, flow); 6502 flow_drv_destroy(dev, flow); 6503 if (list) { 6504 rte_spinlock_lock(&priv->flow_list_lock); 6505 ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, 6506 flow_idx, flow, next); 6507 rte_spinlock_unlock(&priv->flow_list_lock); 6508 } 6509 if (flow->tunnel) { 6510 struct mlx5_flow_tunnel *tunnel; 6511 6512 tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id); 6513 RTE_VERIFY(tunnel); 6514 if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED)) 6515 mlx5_flow_tunnel_free(dev, tunnel); 6516 } 6517 flow_mreg_del_copy_action(dev, flow); 6518 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 6519 } 6520 6521 /** 6522 * Destroy all flows. 6523 * 6524 * @param dev 6525 * Pointer to Ethernet device. 6526 * @param list 6527 * Pointer to the Indexed flow list. 6528 * @param active 6529 * If flushing is called avtively. 6530 */ 6531 void 6532 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active) 6533 { 6534 uint32_t num_flushed = 0; 6535 6536 while (*list) { 6537 flow_list_destroy(dev, list, *list); 6538 num_flushed++; 6539 } 6540 if (active) { 6541 DRV_LOG(INFO, "port %u: %u flows flushed before stopping", 6542 dev->data->port_id, num_flushed); 6543 } 6544 } 6545 6546 /** 6547 * Stop all default actions for flows. 6548 * 6549 * @param dev 6550 * Pointer to Ethernet device. 6551 */ 6552 void 6553 mlx5_flow_stop_default(struct rte_eth_dev *dev) 6554 { 6555 flow_mreg_del_default_copy_action(dev); 6556 flow_rxq_flags_clear(dev); 6557 } 6558 6559 /** 6560 * Start all default actions for flows. 6561 * 6562 * @param dev 6563 * Pointer to Ethernet device. 6564 * @return 6565 * 0 on success, a negative errno value otherwise and rte_errno is set. 6566 */ 6567 int 6568 mlx5_flow_start_default(struct rte_eth_dev *dev) 6569 { 6570 struct rte_flow_error error; 6571 6572 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 6573 return flow_mreg_add_default_copy_action(dev, &error); 6574 } 6575 6576 /** 6577 * Release key of thread specific flow workspace data. 6578 */ 6579 void 6580 flow_release_workspace(void *data) 6581 { 6582 struct mlx5_flow_workspace *wks = data; 6583 struct mlx5_flow_workspace *next; 6584 6585 while (wks) { 6586 next = wks->next; 6587 free(wks->rss_desc.queue); 6588 free(wks); 6589 wks = next; 6590 } 6591 } 6592 6593 /** 6594 * Get thread specific current flow workspace. 6595 * 6596 * @return pointer to thread specific flow workspace data, NULL on error. 6597 */ 6598 struct mlx5_flow_workspace* 6599 mlx5_flow_get_thread_workspace(void) 6600 { 6601 struct mlx5_flow_workspace *data; 6602 6603 data = mlx5_flow_os_get_specific_workspace(); 6604 MLX5_ASSERT(data && data->inuse); 6605 if (!data || !data->inuse) 6606 DRV_LOG(ERR, "flow workspace not initialized."); 6607 return data; 6608 } 6609 6610 /** 6611 * Allocate and init new flow workspace. 6612 * 6613 * @return pointer to flow workspace data, NULL on error. 6614 */ 6615 static struct mlx5_flow_workspace* 6616 flow_alloc_thread_workspace(void) 6617 { 6618 struct mlx5_flow_workspace *data = calloc(1, sizeof(*data)); 6619 6620 if (!data) { 6621 DRV_LOG(ERR, "Failed to allocate flow workspace " 6622 "memory."); 6623 return NULL; 6624 } 6625 data->rss_desc.queue = calloc(1, 6626 sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM); 6627 if (!data->rss_desc.queue) 6628 goto err; 6629 data->rssq_num = MLX5_RSSQ_DEFAULT_NUM; 6630 return data; 6631 err: 6632 if (data->rss_desc.queue) 6633 free(data->rss_desc.queue); 6634 free(data); 6635 return NULL; 6636 } 6637 6638 /** 6639 * Get new thread specific flow workspace. 6640 * 6641 * If current workspace inuse, create new one and set as current. 6642 * 6643 * @return pointer to thread specific flow workspace data, NULL on error. 6644 */ 6645 static struct mlx5_flow_workspace* 6646 mlx5_flow_push_thread_workspace(void) 6647 { 6648 struct mlx5_flow_workspace *curr; 6649 struct mlx5_flow_workspace *data; 6650 6651 curr = mlx5_flow_os_get_specific_workspace(); 6652 if (!curr) { 6653 data = flow_alloc_thread_workspace(); 6654 if (!data) 6655 return NULL; 6656 } else if (!curr->inuse) { 6657 data = curr; 6658 } else if (curr->next) { 6659 data = curr->next; 6660 } else { 6661 data = flow_alloc_thread_workspace(); 6662 if (!data) 6663 return NULL; 6664 curr->next = data; 6665 data->prev = curr; 6666 } 6667 data->inuse = 1; 6668 data->flow_idx = 0; 6669 /* Set as current workspace */ 6670 if (mlx5_flow_os_set_specific_workspace(data)) 6671 DRV_LOG(ERR, "Failed to set flow workspace to thread."); 6672 return data; 6673 } 6674 6675 /** 6676 * Close current thread specific flow workspace. 6677 * 6678 * If previous workspace available, set it as current. 6679 * 6680 * @return pointer to thread specific flow workspace data, NULL on error. 6681 */ 6682 static void 6683 mlx5_flow_pop_thread_workspace(void) 6684 { 6685 struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace(); 6686 6687 if (!data) 6688 return; 6689 if (!data->inuse) { 6690 DRV_LOG(ERR, "Failed to close unused flow workspace."); 6691 return; 6692 } 6693 data->inuse = 0; 6694 if (!data->prev) 6695 return; 6696 if (mlx5_flow_os_set_specific_workspace(data->prev)) 6697 DRV_LOG(ERR, "Failed to set flow workspace to thread."); 6698 } 6699 6700 /** 6701 * Verify the flow list is empty 6702 * 6703 * @param dev 6704 * Pointer to Ethernet device. 6705 * 6706 * @return the number of flows not released. 6707 */ 6708 int 6709 mlx5_flow_verify(struct rte_eth_dev *dev) 6710 { 6711 struct mlx5_priv *priv = dev->data->dev_private; 6712 struct rte_flow *flow; 6713 uint32_t idx; 6714 int ret = 0; 6715 6716 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx, 6717 flow, next) { 6718 DRV_LOG(DEBUG, "port %u flow %p still referenced", 6719 dev->data->port_id, (void *)flow); 6720 ++ret; 6721 } 6722 return ret; 6723 } 6724 6725 /** 6726 * Enable default hairpin egress flow. 6727 * 6728 * @param dev 6729 * Pointer to Ethernet device. 6730 * @param queue 6731 * The queue index. 6732 * 6733 * @return 6734 * 0 on success, a negative errno value otherwise and rte_errno is set. 6735 */ 6736 int 6737 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev, 6738 uint32_t queue) 6739 { 6740 struct mlx5_priv *priv = dev->data->dev_private; 6741 const struct rte_flow_attr attr = { 6742 .egress = 1, 6743 .priority = 0, 6744 }; 6745 struct mlx5_rte_flow_item_tx_queue queue_spec = { 6746 .queue = queue, 6747 }; 6748 struct mlx5_rte_flow_item_tx_queue queue_mask = { 6749 .queue = UINT32_MAX, 6750 }; 6751 struct rte_flow_item items[] = { 6752 { 6753 .type = (enum rte_flow_item_type) 6754 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE, 6755 .spec = &queue_spec, 6756 .last = NULL, 6757 .mask = &queue_mask, 6758 }, 6759 { 6760 .type = RTE_FLOW_ITEM_TYPE_END, 6761 }, 6762 }; 6763 struct rte_flow_action_jump jump = { 6764 .group = MLX5_HAIRPIN_TX_TABLE, 6765 }; 6766 struct rte_flow_action actions[2]; 6767 uint32_t flow_idx; 6768 struct rte_flow_error error; 6769 6770 actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP; 6771 actions[0].conf = &jump; 6772 actions[1].type = RTE_FLOW_ACTION_TYPE_END; 6773 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 6774 &attr, items, actions, false, &error); 6775 if (!flow_idx) { 6776 DRV_LOG(DEBUG, 6777 "Failed to create ctrl flow: rte_errno(%d)," 6778 " type(%d), message(%s)", 6779 rte_errno, error.type, 6780 error.message ? error.message : " (no stated reason)"); 6781 return -rte_errno; 6782 } 6783 return 0; 6784 } 6785 6786 /** 6787 * Enable a control flow configured from the control plane. 6788 * 6789 * @param dev 6790 * Pointer to Ethernet device. 6791 * @param eth_spec 6792 * An Ethernet flow spec to apply. 6793 * @param eth_mask 6794 * An Ethernet flow mask to apply. 6795 * @param vlan_spec 6796 * A VLAN flow spec to apply. 6797 * @param vlan_mask 6798 * A VLAN flow mask to apply. 6799 * 6800 * @return 6801 * 0 on success, a negative errno value otherwise and rte_errno is set. 6802 */ 6803 int 6804 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, 6805 struct rte_flow_item_eth *eth_spec, 6806 struct rte_flow_item_eth *eth_mask, 6807 struct rte_flow_item_vlan *vlan_spec, 6808 struct rte_flow_item_vlan *vlan_mask) 6809 { 6810 struct mlx5_priv *priv = dev->data->dev_private; 6811 const struct rte_flow_attr attr = { 6812 .ingress = 1, 6813 .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR, 6814 }; 6815 struct rte_flow_item items[] = { 6816 { 6817 .type = RTE_FLOW_ITEM_TYPE_ETH, 6818 .spec = eth_spec, 6819 .last = NULL, 6820 .mask = eth_mask, 6821 }, 6822 { 6823 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN : 6824 RTE_FLOW_ITEM_TYPE_END, 6825 .spec = vlan_spec, 6826 .last = NULL, 6827 .mask = vlan_mask, 6828 }, 6829 { 6830 .type = RTE_FLOW_ITEM_TYPE_END, 6831 }, 6832 }; 6833 uint16_t queue[priv->reta_idx_n]; 6834 struct rte_flow_action_rss action_rss = { 6835 .func = RTE_ETH_HASH_FUNCTION_DEFAULT, 6836 .level = 0, 6837 .types = priv->rss_conf.rss_hf, 6838 .key_len = priv->rss_conf.rss_key_len, 6839 .queue_num = priv->reta_idx_n, 6840 .key = priv->rss_conf.rss_key, 6841 .queue = queue, 6842 }; 6843 struct rte_flow_action actions[] = { 6844 { 6845 .type = RTE_FLOW_ACTION_TYPE_RSS, 6846 .conf = &action_rss, 6847 }, 6848 { 6849 .type = RTE_FLOW_ACTION_TYPE_END, 6850 }, 6851 }; 6852 uint32_t flow_idx; 6853 struct rte_flow_error error; 6854 unsigned int i; 6855 6856 if (!priv->reta_idx_n || !priv->rxqs_n) { 6857 return 0; 6858 } 6859 if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)) 6860 action_rss.types = 0; 6861 for (i = 0; i != priv->reta_idx_n; ++i) 6862 queue[i] = (*priv->reta_idx)[i]; 6863 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 6864 &attr, items, actions, false, &error); 6865 if (!flow_idx) 6866 return -rte_errno; 6867 return 0; 6868 } 6869 6870 /** 6871 * Enable a flow control configured from the control plane. 6872 * 6873 * @param dev 6874 * Pointer to Ethernet device. 6875 * @param eth_spec 6876 * An Ethernet flow spec to apply. 6877 * @param eth_mask 6878 * An Ethernet flow mask to apply. 6879 * 6880 * @return 6881 * 0 on success, a negative errno value otherwise and rte_errno is set. 6882 */ 6883 int 6884 mlx5_ctrl_flow(struct rte_eth_dev *dev, 6885 struct rte_flow_item_eth *eth_spec, 6886 struct rte_flow_item_eth *eth_mask) 6887 { 6888 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL); 6889 } 6890 6891 /** 6892 * Create default miss flow rule matching lacp traffic 6893 * 6894 * @param dev 6895 * Pointer to Ethernet device. 6896 * @param eth_spec 6897 * An Ethernet flow spec to apply. 6898 * 6899 * @return 6900 * 0 on success, a negative errno value otherwise and rte_errno is set. 6901 */ 6902 int 6903 mlx5_flow_lacp_miss(struct rte_eth_dev *dev) 6904 { 6905 struct mlx5_priv *priv = dev->data->dev_private; 6906 /* 6907 * The LACP matching is done by only using ether type since using 6908 * a multicast dst mac causes kernel to give low priority to this flow. 6909 */ 6910 static const struct rte_flow_item_eth lacp_spec = { 6911 .type = RTE_BE16(0x8809), 6912 }; 6913 static const struct rte_flow_item_eth lacp_mask = { 6914 .type = 0xffff, 6915 }; 6916 const struct rte_flow_attr attr = { 6917 .ingress = 1, 6918 }; 6919 struct rte_flow_item items[] = { 6920 { 6921 .type = RTE_FLOW_ITEM_TYPE_ETH, 6922 .spec = &lacp_spec, 6923 .mask = &lacp_mask, 6924 }, 6925 { 6926 .type = RTE_FLOW_ITEM_TYPE_END, 6927 }, 6928 }; 6929 struct rte_flow_action actions[] = { 6930 { 6931 .type = (enum rte_flow_action_type) 6932 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS, 6933 }, 6934 { 6935 .type = RTE_FLOW_ACTION_TYPE_END, 6936 }, 6937 }; 6938 struct rte_flow_error error; 6939 uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows, 6940 &attr, items, actions, false, &error); 6941 6942 if (!flow_idx) 6943 return -rte_errno; 6944 return 0; 6945 } 6946 6947 /** 6948 * Destroy a flow. 6949 * 6950 * @see rte_flow_destroy() 6951 * @see rte_flow_ops 6952 */ 6953 int 6954 mlx5_flow_destroy(struct rte_eth_dev *dev, 6955 struct rte_flow *flow, 6956 struct rte_flow_error *error __rte_unused) 6957 { 6958 struct mlx5_priv *priv = dev->data->dev_private; 6959 6960 flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow); 6961 return 0; 6962 } 6963 6964 /** 6965 * Destroy all flows. 6966 * 6967 * @see rte_flow_flush() 6968 * @see rte_flow_ops 6969 */ 6970 int 6971 mlx5_flow_flush(struct rte_eth_dev *dev, 6972 struct rte_flow_error *error __rte_unused) 6973 { 6974 struct mlx5_priv *priv = dev->data->dev_private; 6975 6976 mlx5_flow_list_flush(dev, &priv->flows, false); 6977 return 0; 6978 } 6979 6980 /** 6981 * Isolated mode. 6982 * 6983 * @see rte_flow_isolate() 6984 * @see rte_flow_ops 6985 */ 6986 int 6987 mlx5_flow_isolate(struct rte_eth_dev *dev, 6988 int enable, 6989 struct rte_flow_error *error) 6990 { 6991 struct mlx5_priv *priv = dev->data->dev_private; 6992 6993 if (dev->data->dev_started) { 6994 rte_flow_error_set(error, EBUSY, 6995 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 6996 NULL, 6997 "port must be stopped first"); 6998 return -rte_errno; 6999 } 7000 priv->isolated = !!enable; 7001 if (enable) 7002 dev->dev_ops = &mlx5_dev_ops_isolate; 7003 else 7004 dev->dev_ops = &mlx5_dev_ops; 7005 7006 dev->rx_descriptor_status = mlx5_rx_descriptor_status; 7007 dev->tx_descriptor_status = mlx5_tx_descriptor_status; 7008 7009 return 0; 7010 } 7011 7012 /** 7013 * Query a flow. 7014 * 7015 * @see rte_flow_query() 7016 * @see rte_flow_ops 7017 */ 7018 static int 7019 flow_drv_query(struct rte_eth_dev *dev, 7020 uint32_t flow_idx, 7021 const struct rte_flow_action *actions, 7022 void *data, 7023 struct rte_flow_error *error) 7024 { 7025 struct mlx5_priv *priv = dev->data->dev_private; 7026 const struct mlx5_flow_driver_ops *fops; 7027 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 7028 [MLX5_IPOOL_RTE_FLOW], 7029 flow_idx); 7030 enum mlx5_flow_drv_type ftype; 7031 7032 if (!flow) { 7033 return rte_flow_error_set(error, ENOENT, 7034 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 7035 NULL, 7036 "invalid flow handle"); 7037 } 7038 ftype = flow->drv_type; 7039 MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX); 7040 fops = flow_get_drv_ops(ftype); 7041 7042 return fops->query(dev, flow, actions, data, error); 7043 } 7044 7045 /** 7046 * Query a flow. 7047 * 7048 * @see rte_flow_query() 7049 * @see rte_flow_ops 7050 */ 7051 int 7052 mlx5_flow_query(struct rte_eth_dev *dev, 7053 struct rte_flow *flow, 7054 const struct rte_flow_action *actions, 7055 void *data, 7056 struct rte_flow_error *error) 7057 { 7058 int ret; 7059 7060 ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data, 7061 error); 7062 if (ret < 0) 7063 return ret; 7064 return 0; 7065 } 7066 7067 /** 7068 * Get rte_flow callbacks. 7069 * 7070 * @param dev 7071 * Pointer to Ethernet device structure. 7072 * @param ops 7073 * Pointer to operation-specific structure. 7074 * 7075 * @return 0 7076 */ 7077 int 7078 mlx5_flow_ops_get(struct rte_eth_dev *dev __rte_unused, 7079 const struct rte_flow_ops **ops) 7080 { 7081 *ops = &mlx5_flow_ops; 7082 return 0; 7083 } 7084 7085 /** 7086 * Validate meter policy actions. 7087 * Dispatcher for action type specific validation. 7088 * 7089 * @param[in] dev 7090 * Pointer to the Ethernet device structure. 7091 * @param[in] action 7092 * The meter policy action object to validate. 7093 * @param[in] attr 7094 * Attributes of flow to determine steering domain. 7095 * @param[out] is_rss 7096 * Is RSS or not. 7097 * @param[out] domain_bitmap 7098 * Domain bitmap. 7099 * @param[out] is_def_policy 7100 * Is default policy or not. 7101 * @param[out] error 7102 * Perform verbose error reporting if not NULL. Initialized in case of 7103 * error only. 7104 * 7105 * @return 7106 * 0 on success, otherwise negative errno value. 7107 */ 7108 int 7109 mlx5_flow_validate_mtr_acts(struct rte_eth_dev *dev, 7110 const struct rte_flow_action *actions[RTE_COLORS], 7111 struct rte_flow_attr *attr, 7112 bool *is_rss, 7113 uint8_t *domain_bitmap, 7114 bool *is_def_policy, 7115 struct rte_mtr_error *error) 7116 { 7117 const struct mlx5_flow_driver_ops *fops; 7118 7119 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7120 return fops->validate_mtr_acts(dev, actions, attr, 7121 is_rss, domain_bitmap, is_def_policy, error); 7122 } 7123 7124 /** 7125 * Destroy the meter table set. 7126 * 7127 * @param[in] dev 7128 * Pointer to Ethernet device. 7129 * @param[in] mtr_policy 7130 * Meter policy struct. 7131 */ 7132 void 7133 mlx5_flow_destroy_mtr_acts(struct rte_eth_dev *dev, 7134 struct mlx5_flow_meter_policy *mtr_policy) 7135 { 7136 const struct mlx5_flow_driver_ops *fops; 7137 7138 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7139 fops->destroy_mtr_acts(dev, mtr_policy); 7140 } 7141 7142 /** 7143 * Create policy action, lock free, 7144 * (mutex should be acquired by caller). 7145 * Dispatcher for action type specific call. 7146 * 7147 * @param[in] dev 7148 * Pointer to the Ethernet device structure. 7149 * @param[in] mtr_policy 7150 * Meter policy struct. 7151 * @param[in] action 7152 * Action specification used to create meter actions. 7153 * @param[out] error 7154 * Perform verbose error reporting if not NULL. Initialized in case of 7155 * error only. 7156 * 7157 * @return 7158 * 0 on success, otherwise negative errno value. 7159 */ 7160 int 7161 mlx5_flow_create_mtr_acts(struct rte_eth_dev *dev, 7162 struct mlx5_flow_meter_policy *mtr_policy, 7163 const struct rte_flow_action *actions[RTE_COLORS], 7164 struct rte_mtr_error *error) 7165 { 7166 const struct mlx5_flow_driver_ops *fops; 7167 7168 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7169 return fops->create_mtr_acts(dev, mtr_policy, actions, error); 7170 } 7171 7172 /** 7173 * Create policy rules, lock free, 7174 * (mutex should be acquired by caller). 7175 * Dispatcher for action type specific call. 7176 * 7177 * @param[in] dev 7178 * Pointer to the Ethernet device structure. 7179 * @param[in] mtr_policy 7180 * Meter policy struct. 7181 * 7182 * @return 7183 * 0 on success, -1 otherwise. 7184 */ 7185 int 7186 mlx5_flow_create_policy_rules(struct rte_eth_dev *dev, 7187 struct mlx5_flow_meter_policy *mtr_policy) 7188 { 7189 const struct mlx5_flow_driver_ops *fops; 7190 7191 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7192 return fops->create_policy_rules(dev, mtr_policy); 7193 } 7194 7195 /** 7196 * Destroy policy rules, lock free, 7197 * (mutex should be acquired by caller). 7198 * Dispatcher for action type specific call. 7199 * 7200 * @param[in] dev 7201 * Pointer to the Ethernet device structure. 7202 * @param[in] mtr_policy 7203 * Meter policy struct. 7204 */ 7205 void 7206 mlx5_flow_destroy_policy_rules(struct rte_eth_dev *dev, 7207 struct mlx5_flow_meter_policy *mtr_policy) 7208 { 7209 const struct mlx5_flow_driver_ops *fops; 7210 7211 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7212 fops->destroy_policy_rules(dev, mtr_policy); 7213 } 7214 7215 /** 7216 * Destroy the default policy table set. 7217 * 7218 * @param[in] dev 7219 * Pointer to Ethernet device. 7220 */ 7221 void 7222 mlx5_flow_destroy_def_policy(struct rte_eth_dev *dev) 7223 { 7224 const struct mlx5_flow_driver_ops *fops; 7225 7226 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7227 fops->destroy_def_policy(dev); 7228 } 7229 7230 /** 7231 * Destroy the default policy table set. 7232 * 7233 * @param[in] dev 7234 * Pointer to Ethernet device. 7235 * 7236 * @return 7237 * 0 on success, -1 otherwise. 7238 */ 7239 int 7240 mlx5_flow_create_def_policy(struct rte_eth_dev *dev) 7241 { 7242 const struct mlx5_flow_driver_ops *fops; 7243 7244 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7245 return fops->create_def_policy(dev); 7246 } 7247 7248 /** 7249 * Create the needed meter and suffix tables. 7250 * 7251 * @param[in] dev 7252 * Pointer to Ethernet device. 7253 * 7254 * @return 7255 * 0 on success, -1 otherwise. 7256 */ 7257 int 7258 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev, 7259 struct mlx5_flow_meter_info *fm, 7260 uint32_t mtr_idx, 7261 uint8_t domain_bitmap) 7262 { 7263 const struct mlx5_flow_driver_ops *fops; 7264 7265 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7266 return fops->create_mtr_tbls(dev, fm, mtr_idx, domain_bitmap); 7267 } 7268 7269 /** 7270 * Destroy the meter table set. 7271 * 7272 * @param[in] dev 7273 * Pointer to Ethernet device. 7274 * @param[in] tbl 7275 * Pointer to the meter table set. 7276 */ 7277 void 7278 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev, 7279 struct mlx5_flow_meter_info *fm) 7280 { 7281 const struct mlx5_flow_driver_ops *fops; 7282 7283 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7284 fops->destroy_mtr_tbls(dev, fm); 7285 } 7286 7287 /** 7288 * Destroy the global meter drop table. 7289 * 7290 * @param[in] dev 7291 * Pointer to Ethernet device. 7292 */ 7293 void 7294 mlx5_flow_destroy_mtr_drop_tbls(struct rte_eth_dev *dev) 7295 { 7296 const struct mlx5_flow_driver_ops *fops; 7297 7298 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7299 fops->destroy_mtr_drop_tbls(dev); 7300 } 7301 7302 /** 7303 * Destroy the sub policy table with RX queue. 7304 * 7305 * @param[in] dev 7306 * Pointer to Ethernet device. 7307 * @param[in] mtr_policy 7308 * Pointer to meter policy table. 7309 */ 7310 void 7311 mlx5_flow_destroy_sub_policy_with_rxq(struct rte_eth_dev *dev, 7312 struct mlx5_flow_meter_policy *mtr_policy) 7313 { 7314 const struct mlx5_flow_driver_ops *fops; 7315 7316 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7317 fops->destroy_sub_policy_with_rxq(dev, mtr_policy); 7318 } 7319 7320 /** 7321 * Allocate the needed aso flow meter id. 7322 * 7323 * @param[in] dev 7324 * Pointer to Ethernet device. 7325 * 7326 * @return 7327 * Index to aso flow meter on success, NULL otherwise. 7328 */ 7329 uint32_t 7330 mlx5_flow_mtr_alloc(struct rte_eth_dev *dev) 7331 { 7332 const struct mlx5_flow_driver_ops *fops; 7333 7334 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7335 return fops->create_meter(dev); 7336 } 7337 7338 /** 7339 * Free the aso flow meter id. 7340 * 7341 * @param[in] dev 7342 * Pointer to Ethernet device. 7343 * @param[in] mtr_idx 7344 * Index to aso flow meter to be free. 7345 * 7346 * @return 7347 * 0 on success. 7348 */ 7349 void 7350 mlx5_flow_mtr_free(struct rte_eth_dev *dev, uint32_t mtr_idx) 7351 { 7352 const struct mlx5_flow_driver_ops *fops; 7353 7354 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7355 fops->free_meter(dev, mtr_idx); 7356 } 7357 7358 /** 7359 * Allocate a counter. 7360 * 7361 * @param[in] dev 7362 * Pointer to Ethernet device structure. 7363 * 7364 * @return 7365 * Index to allocated counter on success, 0 otherwise. 7366 */ 7367 uint32_t 7368 mlx5_counter_alloc(struct rte_eth_dev *dev) 7369 { 7370 const struct mlx5_flow_driver_ops *fops; 7371 struct rte_flow_attr attr = { .transfer = 0 }; 7372 7373 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 7374 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7375 return fops->counter_alloc(dev); 7376 } 7377 DRV_LOG(ERR, 7378 "port %u counter allocate is not supported.", 7379 dev->data->port_id); 7380 return 0; 7381 } 7382 7383 /** 7384 * Free a counter. 7385 * 7386 * @param[in] dev 7387 * Pointer to Ethernet device structure. 7388 * @param[in] cnt 7389 * Index to counter to be free. 7390 */ 7391 void 7392 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt) 7393 { 7394 const struct mlx5_flow_driver_ops *fops; 7395 struct rte_flow_attr attr = { .transfer = 0 }; 7396 7397 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 7398 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7399 fops->counter_free(dev, cnt); 7400 return; 7401 } 7402 DRV_LOG(ERR, 7403 "port %u counter free is not supported.", 7404 dev->data->port_id); 7405 } 7406 7407 /** 7408 * Query counter statistics. 7409 * 7410 * @param[in] dev 7411 * Pointer to Ethernet device structure. 7412 * @param[in] cnt 7413 * Index to counter to query. 7414 * @param[in] clear 7415 * Set to clear counter statistics. 7416 * @param[out] pkts 7417 * The counter hits packets number to save. 7418 * @param[out] bytes 7419 * The counter hits bytes number to save. 7420 * 7421 * @return 7422 * 0 on success, a negative errno value otherwise. 7423 */ 7424 int 7425 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt, 7426 bool clear, uint64_t *pkts, uint64_t *bytes) 7427 { 7428 const struct mlx5_flow_driver_ops *fops; 7429 struct rte_flow_attr attr = { .transfer = 0 }; 7430 7431 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 7432 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7433 return fops->counter_query(dev, cnt, clear, pkts, bytes); 7434 } 7435 DRV_LOG(ERR, 7436 "port %u counter query is not supported.", 7437 dev->data->port_id); 7438 return -ENOTSUP; 7439 } 7440 7441 /** 7442 * Allocate a new memory for the counter values wrapped by all the needed 7443 * management. 7444 * 7445 * @param[in] sh 7446 * Pointer to mlx5_dev_ctx_shared object. 7447 * 7448 * @return 7449 * 0 on success, a negative errno value otherwise. 7450 */ 7451 static int 7452 mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh) 7453 { 7454 struct mlx5_devx_mkey_attr mkey_attr; 7455 struct mlx5_counter_stats_mem_mng *mem_mng; 7456 volatile struct flow_counter_stats *raw_data; 7457 int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES; 7458 int size = (sizeof(struct flow_counter_stats) * 7459 MLX5_COUNTERS_PER_POOL + 7460 sizeof(struct mlx5_counter_stats_raw)) * raws_n + 7461 sizeof(struct mlx5_counter_stats_mem_mng); 7462 size_t pgsize = rte_mem_page_size(); 7463 uint8_t *mem; 7464 int i; 7465 7466 if (pgsize == (size_t)-1) { 7467 DRV_LOG(ERR, "Failed to get mem page size"); 7468 rte_errno = ENOMEM; 7469 return -ENOMEM; 7470 } 7471 mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY); 7472 if (!mem) { 7473 rte_errno = ENOMEM; 7474 return -ENOMEM; 7475 } 7476 mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1; 7477 size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n; 7478 mem_mng->umem = mlx5_os_umem_reg(sh->ctx, mem, size, 7479 IBV_ACCESS_LOCAL_WRITE); 7480 if (!mem_mng->umem) { 7481 rte_errno = errno; 7482 mlx5_free(mem); 7483 return -rte_errno; 7484 } 7485 memset(&mkey_attr, 0, sizeof(mkey_attr)); 7486 mkey_attr.addr = (uintptr_t)mem; 7487 mkey_attr.size = size; 7488 mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem); 7489 mkey_attr.pd = sh->pdn; 7490 mkey_attr.relaxed_ordering_write = sh->cmng.relaxed_ordering_write; 7491 mkey_attr.relaxed_ordering_read = sh->cmng.relaxed_ordering_read; 7492 mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr); 7493 if (!mem_mng->dm) { 7494 mlx5_os_umem_dereg(mem_mng->umem); 7495 rte_errno = errno; 7496 mlx5_free(mem); 7497 return -rte_errno; 7498 } 7499 mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size); 7500 raw_data = (volatile struct flow_counter_stats *)mem; 7501 for (i = 0; i < raws_n; ++i) { 7502 mem_mng->raws[i].mem_mng = mem_mng; 7503 mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL; 7504 } 7505 for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i) 7506 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, 7507 mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i, 7508 next); 7509 LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next); 7510 sh->cmng.mem_mng = mem_mng; 7511 return 0; 7512 } 7513 7514 /** 7515 * Set the statistic memory to the new counter pool. 7516 * 7517 * @param[in] sh 7518 * Pointer to mlx5_dev_ctx_shared object. 7519 * @param[in] pool 7520 * Pointer to the pool to set the statistic memory. 7521 * 7522 * @return 7523 * 0 on success, a negative errno value otherwise. 7524 */ 7525 static int 7526 mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh, 7527 struct mlx5_flow_counter_pool *pool) 7528 { 7529 struct mlx5_flow_counter_mng *cmng = &sh->cmng; 7530 /* Resize statistic memory once used out. */ 7531 if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) && 7532 mlx5_flow_create_counter_stat_mem_mng(sh)) { 7533 DRV_LOG(ERR, "Cannot resize counter stat mem."); 7534 return -1; 7535 } 7536 rte_spinlock_lock(&pool->sl); 7537 pool->raw = cmng->mem_mng->raws + pool->index % 7538 MLX5_CNT_CONTAINER_RESIZE; 7539 rte_spinlock_unlock(&pool->sl); 7540 pool->raw_hw = NULL; 7541 return 0; 7542 } 7543 7544 #define MLX5_POOL_QUERY_FREQ_US 1000000 7545 7546 /** 7547 * Set the periodic procedure for triggering asynchronous batch queries for all 7548 * the counter pools. 7549 * 7550 * @param[in] sh 7551 * Pointer to mlx5_dev_ctx_shared object. 7552 */ 7553 void 7554 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh) 7555 { 7556 uint32_t pools_n, us; 7557 7558 pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED); 7559 us = MLX5_POOL_QUERY_FREQ_US / pools_n; 7560 DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us); 7561 if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) { 7562 sh->cmng.query_thread_on = 0; 7563 DRV_LOG(ERR, "Cannot reinitialize query alarm"); 7564 } else { 7565 sh->cmng.query_thread_on = 1; 7566 } 7567 } 7568 7569 /** 7570 * The periodic procedure for triggering asynchronous batch queries for all the 7571 * counter pools. This function is probably called by the host thread. 7572 * 7573 * @param[in] arg 7574 * The parameter for the alarm process. 7575 */ 7576 void 7577 mlx5_flow_query_alarm(void *arg) 7578 { 7579 struct mlx5_dev_ctx_shared *sh = arg; 7580 int ret; 7581 uint16_t pool_index = sh->cmng.pool_index; 7582 struct mlx5_flow_counter_mng *cmng = &sh->cmng; 7583 struct mlx5_flow_counter_pool *pool; 7584 uint16_t n_valid; 7585 7586 if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES) 7587 goto set_alarm; 7588 rte_spinlock_lock(&cmng->pool_update_sl); 7589 pool = cmng->pools[pool_index]; 7590 n_valid = cmng->n_valid; 7591 rte_spinlock_unlock(&cmng->pool_update_sl); 7592 /* Set the statistic memory to the new created pool. */ 7593 if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool))) 7594 goto set_alarm; 7595 if (pool->raw_hw) 7596 /* There is a pool query in progress. */ 7597 goto set_alarm; 7598 pool->raw_hw = 7599 LIST_FIRST(&sh->cmng.free_stat_raws); 7600 if (!pool->raw_hw) 7601 /* No free counter statistics raw memory. */ 7602 goto set_alarm; 7603 /* 7604 * Identify the counters released between query trigger and query 7605 * handle more efficiently. The counter released in this gap period 7606 * should wait for a new round of query as the new arrived packets 7607 * will not be taken into account. 7608 */ 7609 pool->query_gen++; 7610 ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0, 7611 MLX5_COUNTERS_PER_POOL, 7612 NULL, NULL, 7613 pool->raw_hw->mem_mng->dm->id, 7614 (void *)(uintptr_t) 7615 pool->raw_hw->data, 7616 sh->devx_comp, 7617 (uint64_t)(uintptr_t)pool); 7618 if (ret) { 7619 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID" 7620 " %d", pool->min_dcs->id); 7621 pool->raw_hw = NULL; 7622 goto set_alarm; 7623 } 7624 LIST_REMOVE(pool->raw_hw, next); 7625 sh->cmng.pending_queries++; 7626 pool_index++; 7627 if (pool_index >= n_valid) 7628 pool_index = 0; 7629 set_alarm: 7630 sh->cmng.pool_index = pool_index; 7631 mlx5_set_query_alarm(sh); 7632 } 7633 7634 /** 7635 * Check and callback event for new aged flow in the counter pool 7636 * 7637 * @param[in] sh 7638 * Pointer to mlx5_dev_ctx_shared object. 7639 * @param[in] pool 7640 * Pointer to Current counter pool. 7641 */ 7642 static void 7643 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh, 7644 struct mlx5_flow_counter_pool *pool) 7645 { 7646 struct mlx5_priv *priv; 7647 struct mlx5_flow_counter *cnt; 7648 struct mlx5_age_info *age_info; 7649 struct mlx5_age_param *age_param; 7650 struct mlx5_counter_stats_raw *cur = pool->raw_hw; 7651 struct mlx5_counter_stats_raw *prev = pool->raw; 7652 const uint64_t curr_time = MLX5_CURR_TIME_SEC; 7653 const uint32_t time_delta = curr_time - pool->time_of_last_age_check; 7654 uint16_t expected = AGE_CANDIDATE; 7655 uint32_t i; 7656 7657 pool->time_of_last_age_check = curr_time; 7658 for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) { 7659 cnt = MLX5_POOL_GET_CNT(pool, i); 7660 age_param = MLX5_CNT_TO_AGE(cnt); 7661 if (__atomic_load_n(&age_param->state, 7662 __ATOMIC_RELAXED) != AGE_CANDIDATE) 7663 continue; 7664 if (cur->data[i].hits != prev->data[i].hits) { 7665 __atomic_store_n(&age_param->sec_since_last_hit, 0, 7666 __ATOMIC_RELAXED); 7667 continue; 7668 } 7669 if (__atomic_add_fetch(&age_param->sec_since_last_hit, 7670 time_delta, 7671 __ATOMIC_RELAXED) <= age_param->timeout) 7672 continue; 7673 /** 7674 * Hold the lock first, or if between the 7675 * state AGE_TMOUT and tailq operation the 7676 * release happened, the release procedure 7677 * may delete a non-existent tailq node. 7678 */ 7679 priv = rte_eth_devices[age_param->port_id].data->dev_private; 7680 age_info = GET_PORT_AGE_INFO(priv); 7681 rte_spinlock_lock(&age_info->aged_sl); 7682 if (__atomic_compare_exchange_n(&age_param->state, &expected, 7683 AGE_TMOUT, false, 7684 __ATOMIC_RELAXED, 7685 __ATOMIC_RELAXED)) { 7686 TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next); 7687 MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW); 7688 } 7689 rte_spinlock_unlock(&age_info->aged_sl); 7690 } 7691 mlx5_age_event_prepare(sh); 7692 } 7693 7694 /** 7695 * Handler for the HW respond about ready values from an asynchronous batch 7696 * query. This function is probably called by the host thread. 7697 * 7698 * @param[in] sh 7699 * The pointer to the shared device context. 7700 * @param[in] async_id 7701 * The Devx async ID. 7702 * @param[in] status 7703 * The status of the completion. 7704 */ 7705 void 7706 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh, 7707 uint64_t async_id, int status) 7708 { 7709 struct mlx5_flow_counter_pool *pool = 7710 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id; 7711 struct mlx5_counter_stats_raw *raw_to_free; 7712 uint8_t query_gen = pool->query_gen ^ 1; 7713 struct mlx5_flow_counter_mng *cmng = &sh->cmng; 7714 enum mlx5_counter_type cnt_type = 7715 pool->is_aged ? MLX5_COUNTER_TYPE_AGE : 7716 MLX5_COUNTER_TYPE_ORIGIN; 7717 7718 if (unlikely(status)) { 7719 raw_to_free = pool->raw_hw; 7720 } else { 7721 raw_to_free = pool->raw; 7722 if (pool->is_aged) 7723 mlx5_flow_aging_check(sh, pool); 7724 rte_spinlock_lock(&pool->sl); 7725 pool->raw = pool->raw_hw; 7726 rte_spinlock_unlock(&pool->sl); 7727 /* Be sure the new raw counters data is updated in memory. */ 7728 rte_io_wmb(); 7729 if (!TAILQ_EMPTY(&pool->counters[query_gen])) { 7730 rte_spinlock_lock(&cmng->csl[cnt_type]); 7731 TAILQ_CONCAT(&cmng->counters[cnt_type], 7732 &pool->counters[query_gen], next); 7733 rte_spinlock_unlock(&cmng->csl[cnt_type]); 7734 } 7735 } 7736 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next); 7737 pool->raw_hw = NULL; 7738 sh->cmng.pending_queries--; 7739 } 7740 7741 static int 7742 flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table, 7743 const struct flow_grp_info *grp_info, 7744 struct rte_flow_error *error) 7745 { 7746 if (grp_info->transfer && grp_info->external && 7747 grp_info->fdb_def_rule) { 7748 if (group == UINT32_MAX) 7749 return rte_flow_error_set 7750 (error, EINVAL, 7751 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 7752 NULL, 7753 "group index not supported"); 7754 *table = group + 1; 7755 } else { 7756 *table = group; 7757 } 7758 DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table); 7759 return 0; 7760 } 7761 7762 /** 7763 * Translate the rte_flow group index to HW table value. 7764 * 7765 * If tunnel offload is disabled, all group ids converted to flow table 7766 * id using the standard method. 7767 * If tunnel offload is enabled, group id can be converted using the 7768 * standard or tunnel conversion method. Group conversion method 7769 * selection depends on flags in `grp_info` parameter: 7770 * - Internal (grp_info.external == 0) groups conversion uses the 7771 * standard method. 7772 * - Group ids in JUMP action converted with the tunnel conversion. 7773 * - Group id in rule attribute conversion depends on a rule type and 7774 * group id value: 7775 * ** non zero group attributes converted with the tunnel method 7776 * ** zero group attribute in non-tunnel rule is converted using the 7777 * standard method - there's only one root table 7778 * ** zero group attribute in steer tunnel rule is converted with the 7779 * standard method - single root table 7780 * ** zero group attribute in match tunnel rule is a special OvS 7781 * case: that value is used for portability reasons. That group 7782 * id is converted with the tunnel conversion method. 7783 * 7784 * @param[in] dev 7785 * Port device 7786 * @param[in] tunnel 7787 * PMD tunnel offload object 7788 * @param[in] group 7789 * rte_flow group index value. 7790 * @param[out] table 7791 * HW table value. 7792 * @param[in] grp_info 7793 * flags used for conversion 7794 * @param[out] error 7795 * Pointer to error structure. 7796 * 7797 * @return 7798 * 0 on success, a negative errno value otherwise and rte_errno is set. 7799 */ 7800 int 7801 mlx5_flow_group_to_table(struct rte_eth_dev *dev, 7802 const struct mlx5_flow_tunnel *tunnel, 7803 uint32_t group, uint32_t *table, 7804 const struct flow_grp_info *grp_info, 7805 struct rte_flow_error *error) 7806 { 7807 int ret; 7808 bool standard_translation; 7809 7810 if (!grp_info->skip_scale && grp_info->external && 7811 group < MLX5_MAX_TABLES_EXTERNAL) 7812 group *= MLX5_FLOW_TABLE_FACTOR; 7813 if (is_tunnel_offload_active(dev)) { 7814 standard_translation = !grp_info->external || 7815 grp_info->std_tbl_fix; 7816 } else { 7817 standard_translation = true; 7818 } 7819 DRV_LOG(DEBUG, 7820 "port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s", 7821 dev->data->port_id, group, grp_info->transfer, 7822 grp_info->external, grp_info->fdb_def_rule, 7823 standard_translation ? "STANDARD" : "TUNNEL"); 7824 if (standard_translation) 7825 ret = flow_group_to_table(dev->data->port_id, group, table, 7826 grp_info, error); 7827 else 7828 ret = tunnel_flow_group_to_flow_table(dev, tunnel, group, 7829 table, error); 7830 7831 return ret; 7832 } 7833 7834 /** 7835 * Discover availability of metadata reg_c's. 7836 * 7837 * Iteratively use test flows to check availability. 7838 * 7839 * @param[in] dev 7840 * Pointer to the Ethernet device structure. 7841 * 7842 * @return 7843 * 0 on success, a negative errno value otherwise and rte_errno is set. 7844 */ 7845 int 7846 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev) 7847 { 7848 struct mlx5_priv *priv = dev->data->dev_private; 7849 struct mlx5_dev_config *config = &priv->config; 7850 enum modify_reg idx; 7851 int n = 0; 7852 7853 /* reg_c[0] and reg_c[1] are reserved. */ 7854 config->flow_mreg_c[n++] = REG_C_0; 7855 config->flow_mreg_c[n++] = REG_C_1; 7856 /* Discover availability of other reg_c's. */ 7857 for (idx = REG_C_2; idx <= REG_C_7; ++idx) { 7858 struct rte_flow_attr attr = { 7859 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 7860 .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR, 7861 .ingress = 1, 7862 }; 7863 struct rte_flow_item items[] = { 7864 [0] = { 7865 .type = RTE_FLOW_ITEM_TYPE_END, 7866 }, 7867 }; 7868 struct rte_flow_action actions[] = { 7869 [0] = { 7870 .type = (enum rte_flow_action_type) 7871 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 7872 .conf = &(struct mlx5_flow_action_copy_mreg){ 7873 .src = REG_C_1, 7874 .dst = idx, 7875 }, 7876 }, 7877 [1] = { 7878 .type = RTE_FLOW_ACTION_TYPE_JUMP, 7879 .conf = &(struct rte_flow_action_jump){ 7880 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 7881 }, 7882 }, 7883 [2] = { 7884 .type = RTE_FLOW_ACTION_TYPE_END, 7885 }, 7886 }; 7887 uint32_t flow_idx; 7888 struct rte_flow *flow; 7889 struct rte_flow_error error; 7890 7891 if (!config->dv_flow_en) 7892 break; 7893 /* Create internal flow, validation skips copy action. */ 7894 flow_idx = flow_list_create(dev, NULL, &attr, items, 7895 actions, false, &error); 7896 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 7897 flow_idx); 7898 if (!flow) 7899 continue; 7900 config->flow_mreg_c[n++] = idx; 7901 flow_list_destroy(dev, NULL, flow_idx); 7902 } 7903 for (; n < MLX5_MREG_C_NUM; ++n) 7904 config->flow_mreg_c[n] = REG_NON; 7905 return 0; 7906 } 7907 7908 int 7909 save_dump_file(const uint8_t *data, uint32_t size, 7910 uint32_t type, uint32_t id, void *arg, FILE *file) 7911 { 7912 char line[BUF_SIZE]; 7913 uint32_t out = 0; 7914 uint32_t k; 7915 uint32_t actions_num; 7916 struct rte_flow_query_count *count; 7917 7918 memset(line, 0, BUF_SIZE); 7919 switch (type) { 7920 case DR_DUMP_REC_TYPE_PMD_MODIFY_HDR: 7921 actions_num = *(uint32_t *)(arg); 7922 out += snprintf(line + out, BUF_SIZE - out, "%d,0x%x,%d,", 7923 type, id, actions_num); 7924 break; 7925 case DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT: 7926 out += snprintf(line + out, BUF_SIZE - out, "%d,0x%x,", 7927 type, id); 7928 break; 7929 case DR_DUMP_REC_TYPE_PMD_COUNTER: 7930 count = (struct rte_flow_query_count *)arg; 7931 fprintf(file, "%d,0x%x,%" PRIu64 ",%" PRIu64 "\n", type, 7932 id, count->hits, count->bytes); 7933 return 0; 7934 default: 7935 return -1; 7936 } 7937 7938 for (k = 0; k < size; k++) { 7939 /* Make sure we do not overrun the line buffer length. */ 7940 if (out >= BUF_SIZE - 4) { 7941 line[out] = '\0'; 7942 break; 7943 } 7944 out += snprintf(line + out, BUF_SIZE - out, "%02x", 7945 (data[k]) & 0xff); 7946 } 7947 fprintf(file, "%s\n", line); 7948 return 0; 7949 } 7950 7951 int 7952 mlx5_flow_query_counter(struct rte_eth_dev *dev, struct rte_flow *flow, 7953 struct rte_flow_query_count *count, struct rte_flow_error *error) 7954 { 7955 struct rte_flow_action action[2]; 7956 enum mlx5_flow_drv_type ftype; 7957 const struct mlx5_flow_driver_ops *fops; 7958 7959 if (!flow) { 7960 return rte_flow_error_set(error, ENOENT, 7961 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 7962 NULL, 7963 "invalid flow handle"); 7964 } 7965 action[0].type = RTE_FLOW_ACTION_TYPE_COUNT; 7966 action[1].type = RTE_FLOW_ACTION_TYPE_END; 7967 if (flow->counter) { 7968 memset(count, 0, sizeof(struct rte_flow_query_count)); 7969 ftype = (enum mlx5_flow_drv_type)(flow->drv_type); 7970 MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && 7971 ftype < MLX5_FLOW_TYPE_MAX); 7972 fops = flow_get_drv_ops(ftype); 7973 return fops->query(dev, flow, action, count, error); 7974 } 7975 return -1; 7976 } 7977 7978 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 7979 /** 7980 * Dump flow ipool data to file 7981 * 7982 * @param[in] dev 7983 * The pointer to Ethernet device. 7984 * @param[in] file 7985 * A pointer to a file for output. 7986 * @param[out] error 7987 * Perform verbose error reporting if not NULL. PMDs initialize this 7988 * structure in case of error only. 7989 * @return 7990 * 0 on success, a negative value otherwise. 7991 */ 7992 int 7993 mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev, 7994 struct rte_flow *flow, FILE *file, 7995 struct rte_flow_error *error) 7996 { 7997 struct mlx5_priv *priv = dev->data->dev_private; 7998 struct mlx5_flow_dv_modify_hdr_resource *modify_hdr; 7999 struct mlx5_flow_dv_encap_decap_resource *encap_decap; 8000 uint32_t handle_idx; 8001 struct mlx5_flow_handle *dh; 8002 struct rte_flow_query_count count; 8003 uint32_t actions_num; 8004 const uint8_t *data; 8005 size_t size; 8006 uint32_t id; 8007 uint32_t type; 8008 8009 if (!flow) { 8010 return rte_flow_error_set(error, ENOENT, 8011 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 8012 NULL, 8013 "invalid flow handle"); 8014 } 8015 handle_idx = flow->dev_handles; 8016 while (handle_idx) { 8017 dh = mlx5_ipool_get(priv->sh->ipool 8018 [MLX5_IPOOL_MLX5_FLOW], handle_idx); 8019 if (!dh) 8020 continue; 8021 handle_idx = dh->next.next; 8022 id = (uint32_t)(uintptr_t)dh->drv_flow; 8023 8024 /* query counter */ 8025 type = DR_DUMP_REC_TYPE_PMD_COUNTER; 8026 if (!mlx5_flow_query_counter(dev, flow, &count, error)) 8027 save_dump_file(NULL, 0, type, 8028 id, (void *)&count, file); 8029 8030 /* Get modify_hdr and encap_decap buf from ipools. */ 8031 encap_decap = NULL; 8032 modify_hdr = dh->dvh.modify_hdr; 8033 8034 if (dh->dvh.rix_encap_decap) { 8035 encap_decap = mlx5_ipool_get(priv->sh->ipool 8036 [MLX5_IPOOL_DECAP_ENCAP], 8037 dh->dvh.rix_encap_decap); 8038 } 8039 if (modify_hdr) { 8040 data = (const uint8_t *)modify_hdr->actions; 8041 size = (size_t)(modify_hdr->actions_num) * 8; 8042 actions_num = modify_hdr->actions_num; 8043 type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR; 8044 save_dump_file(data, size, type, id, 8045 (void *)(&actions_num), file); 8046 } 8047 if (encap_decap) { 8048 data = encap_decap->buf; 8049 size = encap_decap->size; 8050 type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT; 8051 save_dump_file(data, size, type, 8052 id, NULL, file); 8053 } 8054 } 8055 return 0; 8056 } 8057 #endif 8058 8059 /** 8060 * Dump flow raw hw data to file 8061 * 8062 * @param[in] dev 8063 * The pointer to Ethernet device. 8064 * @param[in] file 8065 * A pointer to a file for output. 8066 * @param[out] error 8067 * Perform verbose error reporting if not NULL. PMDs initialize this 8068 * structure in case of error only. 8069 * @return 8070 * 0 on success, a nagative value otherwise. 8071 */ 8072 int 8073 mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow_idx, 8074 FILE *file, 8075 struct rte_flow_error *error __rte_unused) 8076 { 8077 struct mlx5_priv *priv = dev->data->dev_private; 8078 struct mlx5_dev_ctx_shared *sh = priv->sh; 8079 uint32_t handle_idx; 8080 int ret; 8081 struct mlx5_flow_handle *dh; 8082 struct rte_flow *flow; 8083 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 8084 uint32_t idx; 8085 #endif 8086 8087 if (!priv->config.dv_flow_en) { 8088 if (fputs("device dv flow disabled\n", file) <= 0) 8089 return -errno; 8090 return -ENOTSUP; 8091 } 8092 8093 /* dump all */ 8094 if (!flow_idx) { 8095 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 8096 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 8097 priv->flows, idx, flow, next) 8098 mlx5_flow_dev_dump_ipool(dev, flow, file, error); 8099 #endif 8100 return mlx5_devx_cmd_flow_dump(sh->fdb_domain, 8101 sh->rx_domain, 8102 sh->tx_domain, file); 8103 } 8104 /* dump one */ 8105 flow = mlx5_ipool_get(priv->sh->ipool 8106 [MLX5_IPOOL_RTE_FLOW], (uintptr_t)(void *)flow_idx); 8107 if (!flow) 8108 return -ENOENT; 8109 8110 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 8111 mlx5_flow_dev_dump_ipool(dev, flow, file, error); 8112 #endif 8113 handle_idx = flow->dev_handles; 8114 while (handle_idx) { 8115 dh = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], 8116 handle_idx); 8117 if (!dh) 8118 return -ENOENT; 8119 if (dh->drv_flow) { 8120 ret = mlx5_devx_cmd_flow_single_dump(dh->drv_flow, 8121 file); 8122 if (ret) 8123 return -ENOENT; 8124 } 8125 handle_idx = dh->next.next; 8126 } 8127 return 0; 8128 } 8129 8130 /** 8131 * Get aged-out flows. 8132 * 8133 * @param[in] dev 8134 * Pointer to the Ethernet device structure. 8135 * @param[in] context 8136 * The address of an array of pointers to the aged-out flows contexts. 8137 * @param[in] nb_countexts 8138 * The length of context array pointers. 8139 * @param[out] error 8140 * Perform verbose error reporting if not NULL. Initialized in case of 8141 * error only. 8142 * 8143 * @return 8144 * how many contexts get in success, otherwise negative errno value. 8145 * if nb_contexts is 0, return the amount of all aged contexts. 8146 * if nb_contexts is not 0 , return the amount of aged flows reported 8147 * in the context array. 8148 */ 8149 int 8150 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts, 8151 uint32_t nb_contexts, struct rte_flow_error *error) 8152 { 8153 const struct mlx5_flow_driver_ops *fops; 8154 struct rte_flow_attr attr = { .transfer = 0 }; 8155 8156 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 8157 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 8158 return fops->get_aged_flows(dev, contexts, nb_contexts, 8159 error); 8160 } 8161 DRV_LOG(ERR, 8162 "port %u get aged flows is not supported.", 8163 dev->data->port_id); 8164 return -ENOTSUP; 8165 } 8166 8167 /* Wrapper for driver action_validate op callback */ 8168 static int 8169 flow_drv_action_validate(struct rte_eth_dev *dev, 8170 const struct rte_flow_indir_action_conf *conf, 8171 const struct rte_flow_action *action, 8172 const struct mlx5_flow_driver_ops *fops, 8173 struct rte_flow_error *error) 8174 { 8175 static const char err_msg[] = "indirect action validation unsupported"; 8176 8177 if (!fops->action_validate) { 8178 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg); 8179 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, 8180 NULL, err_msg); 8181 return -rte_errno; 8182 } 8183 return fops->action_validate(dev, conf, action, error); 8184 } 8185 8186 /** 8187 * Destroys the shared action by handle. 8188 * 8189 * @param dev 8190 * Pointer to Ethernet device structure. 8191 * @param[in] handle 8192 * Handle for the indirect action object to be destroyed. 8193 * @param[out] error 8194 * Perform verbose error reporting if not NULL. PMDs initialize this 8195 * structure in case of error only. 8196 * 8197 * @return 8198 * 0 on success, a negative errno value otherwise and rte_errno is set. 8199 * 8200 * @note: wrapper for driver action_create op callback. 8201 */ 8202 static int 8203 mlx5_action_handle_destroy(struct rte_eth_dev *dev, 8204 struct rte_flow_action_handle *handle, 8205 struct rte_flow_error *error) 8206 { 8207 static const char err_msg[] = "indirect action destruction unsupported"; 8208 struct rte_flow_attr attr = { .transfer = 0 }; 8209 const struct mlx5_flow_driver_ops *fops = 8210 flow_get_drv_ops(flow_get_drv_type(dev, &attr)); 8211 8212 if (!fops->action_destroy) { 8213 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg); 8214 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, 8215 NULL, err_msg); 8216 return -rte_errno; 8217 } 8218 return fops->action_destroy(dev, handle, error); 8219 } 8220 8221 /* Wrapper for driver action_destroy op callback */ 8222 static int 8223 flow_drv_action_update(struct rte_eth_dev *dev, 8224 struct rte_flow_action_handle *handle, 8225 const void *update, 8226 const struct mlx5_flow_driver_ops *fops, 8227 struct rte_flow_error *error) 8228 { 8229 static const char err_msg[] = "indirect action update unsupported"; 8230 8231 if (!fops->action_update) { 8232 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg); 8233 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, 8234 NULL, err_msg); 8235 return -rte_errno; 8236 } 8237 return fops->action_update(dev, handle, update, error); 8238 } 8239 8240 /* Wrapper for driver action_destroy op callback */ 8241 static int 8242 flow_drv_action_query(struct rte_eth_dev *dev, 8243 const struct rte_flow_action_handle *handle, 8244 void *data, 8245 const struct mlx5_flow_driver_ops *fops, 8246 struct rte_flow_error *error) 8247 { 8248 static const char err_msg[] = "indirect action query unsupported"; 8249 8250 if (!fops->action_query) { 8251 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg); 8252 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, 8253 NULL, err_msg); 8254 return -rte_errno; 8255 } 8256 return fops->action_query(dev, handle, data, error); 8257 } 8258 8259 /** 8260 * Create indirect action for reuse in multiple flow rules. 8261 * 8262 * @param dev 8263 * Pointer to Ethernet device structure. 8264 * @param conf 8265 * Pointer to indirect action object configuration. 8266 * @param[in] action 8267 * Action configuration for indirect action object creation. 8268 * @param[out] error 8269 * Perform verbose error reporting if not NULL. PMDs initialize this 8270 * structure in case of error only. 8271 * @return 8272 * A valid handle in case of success, NULL otherwise and rte_errno is set. 8273 */ 8274 static struct rte_flow_action_handle * 8275 mlx5_action_handle_create(struct rte_eth_dev *dev, 8276 const struct rte_flow_indir_action_conf *conf, 8277 const struct rte_flow_action *action, 8278 struct rte_flow_error *error) 8279 { 8280 static const char err_msg[] = "indirect action creation unsupported"; 8281 struct rte_flow_attr attr = { .transfer = 0 }; 8282 const struct mlx5_flow_driver_ops *fops = 8283 flow_get_drv_ops(flow_get_drv_type(dev, &attr)); 8284 8285 if (flow_drv_action_validate(dev, conf, action, fops, error)) 8286 return NULL; 8287 if (!fops->action_create) { 8288 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg); 8289 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, 8290 NULL, err_msg); 8291 return NULL; 8292 } 8293 return fops->action_create(dev, conf, action, error); 8294 } 8295 8296 /** 8297 * Updates inplace the indirect action configuration pointed by *handle* 8298 * with the configuration provided as *update* argument. 8299 * The update of the indirect action configuration effects all flow rules 8300 * reusing the action via handle. 8301 * 8302 * @param dev 8303 * Pointer to Ethernet device structure. 8304 * @param[in] handle 8305 * Handle for the indirect action to be updated. 8306 * @param[in] update 8307 * Action specification used to modify the action pointed by handle. 8308 * *update* could be of same type with the action pointed by the *handle* 8309 * handle argument, or some other structures like a wrapper, depending on 8310 * the indirect action type. 8311 * @param[out] error 8312 * Perform verbose error reporting if not NULL. PMDs initialize this 8313 * structure in case of error only. 8314 * 8315 * @return 8316 * 0 on success, a negative errno value otherwise and rte_errno is set. 8317 */ 8318 static int 8319 mlx5_action_handle_update(struct rte_eth_dev *dev, 8320 struct rte_flow_action_handle *handle, 8321 const void *update, 8322 struct rte_flow_error *error) 8323 { 8324 struct rte_flow_attr attr = { .transfer = 0 }; 8325 const struct mlx5_flow_driver_ops *fops = 8326 flow_get_drv_ops(flow_get_drv_type(dev, &attr)); 8327 int ret; 8328 8329 ret = flow_drv_action_validate(dev, NULL, 8330 (const struct rte_flow_action *)update, fops, error); 8331 if (ret) 8332 return ret; 8333 return flow_drv_action_update(dev, handle, update, fops, 8334 error); 8335 } 8336 8337 /** 8338 * Query the indirect action by handle. 8339 * 8340 * This function allows retrieving action-specific data such as counters. 8341 * Data is gathered by special action which may be present/referenced in 8342 * more than one flow rule definition. 8343 * 8344 * see @RTE_FLOW_ACTION_TYPE_COUNT 8345 * 8346 * @param dev 8347 * Pointer to Ethernet device structure. 8348 * @param[in] handle 8349 * Handle for the indirect action to query. 8350 * @param[in, out] data 8351 * Pointer to storage for the associated query data type. 8352 * @param[out] error 8353 * Perform verbose error reporting if not NULL. PMDs initialize this 8354 * structure in case of error only. 8355 * 8356 * @return 8357 * 0 on success, a negative errno value otherwise and rte_errno is set. 8358 */ 8359 static int 8360 mlx5_action_handle_query(struct rte_eth_dev *dev, 8361 const struct rte_flow_action_handle *handle, 8362 void *data, 8363 struct rte_flow_error *error) 8364 { 8365 struct rte_flow_attr attr = { .transfer = 0 }; 8366 const struct mlx5_flow_driver_ops *fops = 8367 flow_get_drv_ops(flow_get_drv_type(dev, &attr)); 8368 8369 return flow_drv_action_query(dev, handle, data, fops, error); 8370 } 8371 8372 /** 8373 * Destroy all indirect actions (shared RSS). 8374 * 8375 * @param dev 8376 * Pointer to Ethernet device. 8377 * 8378 * @return 8379 * 0 on success, a negative errno value otherwise and rte_errno is set. 8380 */ 8381 int 8382 mlx5_action_handle_flush(struct rte_eth_dev *dev) 8383 { 8384 struct rte_flow_error error; 8385 struct mlx5_priv *priv = dev->data->dev_private; 8386 struct mlx5_shared_action_rss *shared_rss; 8387 int ret = 0; 8388 uint32_t idx; 8389 8390 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], 8391 priv->rss_shared_actions, idx, shared_rss, next) { 8392 ret |= mlx5_action_handle_destroy(dev, 8393 (struct rte_flow_action_handle *)(uintptr_t)idx, &error); 8394 } 8395 return ret; 8396 } 8397 8398 #ifndef HAVE_MLX5DV_DR 8399 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1)) 8400 #else 8401 #define MLX5_DOMAIN_SYNC_FLOW \ 8402 (MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW) 8403 #endif 8404 8405 int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains) 8406 { 8407 struct rte_eth_dev *dev = &rte_eth_devices[port_id]; 8408 const struct mlx5_flow_driver_ops *fops; 8409 int ret; 8410 struct rte_flow_attr attr = { .transfer = 0 }; 8411 8412 fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr)); 8413 ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW); 8414 if (ret > 0) 8415 ret = -ret; 8416 return ret; 8417 } 8418 8419 const struct mlx5_flow_tunnel * 8420 mlx5_get_tof(const struct rte_flow_item *item, 8421 const struct rte_flow_action *action, 8422 enum mlx5_tof_rule_type *rule_type) 8423 { 8424 for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 8425 if (item->type == (typeof(item->type)) 8426 MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL) { 8427 *rule_type = MLX5_TUNNEL_OFFLOAD_MATCH_RULE; 8428 return flow_items_to_tunnel(item); 8429 } 8430 } 8431 for (; action->conf != RTE_FLOW_ACTION_TYPE_END; action++) { 8432 if (action->type == (typeof(action->type)) 8433 MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET) { 8434 *rule_type = MLX5_TUNNEL_OFFLOAD_SET_RULE; 8435 return flow_actions_to_tunnel(action); 8436 } 8437 } 8438 return NULL; 8439 } 8440 8441 /** 8442 * tunnel offload functionalilty is defined for DV environment only 8443 */ 8444 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 8445 __extension__ 8446 union tunnel_offload_mark { 8447 uint32_t val; 8448 struct { 8449 uint32_t app_reserve:8; 8450 uint32_t table_id:15; 8451 uint32_t transfer:1; 8452 uint32_t _unused_:8; 8453 }; 8454 }; 8455 8456 static bool 8457 mlx5_access_tunnel_offload_db 8458 (struct rte_eth_dev *dev, 8459 bool (*match)(struct rte_eth_dev *, 8460 struct mlx5_flow_tunnel *, const void *), 8461 void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *), 8462 void (*miss)(struct rte_eth_dev *, void *), 8463 void *ctx, bool lock_op); 8464 8465 static int 8466 flow_tunnel_add_default_miss(struct rte_eth_dev *dev, 8467 struct rte_flow *flow, 8468 const struct rte_flow_attr *attr, 8469 const struct rte_flow_action *app_actions, 8470 uint32_t flow_idx, 8471 const struct mlx5_flow_tunnel *tunnel, 8472 struct tunnel_default_miss_ctx *ctx, 8473 struct rte_flow_error *error) 8474 { 8475 struct mlx5_priv *priv = dev->data->dev_private; 8476 struct mlx5_flow *dev_flow; 8477 struct rte_flow_attr miss_attr = *attr; 8478 const struct rte_flow_item miss_items[2] = { 8479 { 8480 .type = RTE_FLOW_ITEM_TYPE_ETH, 8481 .spec = NULL, 8482 .last = NULL, 8483 .mask = NULL 8484 }, 8485 { 8486 .type = RTE_FLOW_ITEM_TYPE_END, 8487 .spec = NULL, 8488 .last = NULL, 8489 .mask = NULL 8490 } 8491 }; 8492 union tunnel_offload_mark mark_id; 8493 struct rte_flow_action_mark miss_mark; 8494 struct rte_flow_action miss_actions[3] = { 8495 [0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark }, 8496 [2] = { .type = RTE_FLOW_ACTION_TYPE_END, .conf = NULL } 8497 }; 8498 const struct rte_flow_action_jump *jump_data; 8499 uint32_t i, flow_table = 0; /* prevent compilation warning */ 8500 struct flow_grp_info grp_info = { 8501 .external = 1, 8502 .transfer = attr->transfer, 8503 .fdb_def_rule = !!priv->fdb_def_rule, 8504 .std_tbl_fix = 0, 8505 }; 8506 int ret; 8507 8508 if (!attr->transfer) { 8509 uint32_t q_size; 8510 8511 miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS; 8512 q_size = priv->reta_idx_n * sizeof(ctx->queue[0]); 8513 ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size, 8514 0, SOCKET_ID_ANY); 8515 if (!ctx->queue) 8516 return rte_flow_error_set 8517 (error, ENOMEM, 8518 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 8519 NULL, "invalid default miss RSS"); 8520 ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT, 8521 ctx->action_rss.level = 0, 8522 ctx->action_rss.types = priv->rss_conf.rss_hf, 8523 ctx->action_rss.key_len = priv->rss_conf.rss_key_len, 8524 ctx->action_rss.queue_num = priv->reta_idx_n, 8525 ctx->action_rss.key = priv->rss_conf.rss_key, 8526 ctx->action_rss.queue = ctx->queue; 8527 if (!priv->reta_idx_n || !priv->rxqs_n) 8528 return rte_flow_error_set 8529 (error, EINVAL, 8530 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 8531 NULL, "invalid port configuration"); 8532 if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)) 8533 ctx->action_rss.types = 0; 8534 for (i = 0; i != priv->reta_idx_n; ++i) 8535 ctx->queue[i] = (*priv->reta_idx)[i]; 8536 } else { 8537 miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP; 8538 ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP; 8539 } 8540 miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw; 8541 for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++); 8542 jump_data = app_actions->conf; 8543 miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY; 8544 miss_attr.group = jump_data->group; 8545 ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group, 8546 &flow_table, &grp_info, error); 8547 if (ret) 8548 return rte_flow_error_set(error, EINVAL, 8549 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 8550 NULL, "invalid tunnel id"); 8551 mark_id.app_reserve = 0; 8552 mark_id.table_id = tunnel_flow_tbl_to_id(flow_table); 8553 mark_id.transfer = !!attr->transfer; 8554 mark_id._unused_ = 0; 8555 miss_mark.id = mark_id.val; 8556 dev_flow = flow_drv_prepare(dev, flow, &miss_attr, 8557 miss_items, miss_actions, flow_idx, error); 8558 if (!dev_flow) 8559 return -rte_errno; 8560 dev_flow->flow = flow; 8561 dev_flow->external = true; 8562 dev_flow->tunnel = tunnel; 8563 dev_flow->tof_type = MLX5_TUNNEL_OFFLOAD_MISS_RULE; 8564 /* Subflow object was created, we must include one in the list. */ 8565 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 8566 dev_flow->handle, next); 8567 DRV_LOG(DEBUG, 8568 "port %u tunnel type=%d id=%u miss rule priority=%u group=%u", 8569 dev->data->port_id, tunnel->app_tunnel.type, 8570 tunnel->tunnel_id, miss_attr.priority, miss_attr.group); 8571 ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items, 8572 miss_actions, error); 8573 if (!ret) 8574 ret = flow_mreg_update_copy_table(dev, flow, miss_actions, 8575 error); 8576 8577 return ret; 8578 } 8579 8580 static const struct mlx5_flow_tbl_data_entry * 8581 tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark) 8582 { 8583 struct mlx5_priv *priv = dev->data->dev_private; 8584 struct mlx5_dev_ctx_shared *sh = priv->sh; 8585 struct mlx5_hlist_entry *he; 8586 union tunnel_offload_mark mbits = { .val = mark }; 8587 union mlx5_flow_tbl_key table_key = { 8588 { 8589 .level = tunnel_id_to_flow_tbl(mbits.table_id), 8590 .id = 0, 8591 .reserved = 0, 8592 .dummy = 0, 8593 .is_fdb = !!mbits.transfer, 8594 .is_egress = 0, 8595 } 8596 }; 8597 he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, NULL); 8598 return he ? 8599 container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL; 8600 } 8601 8602 static void 8603 mlx5_flow_tunnel_grp2tbl_remove_cb(struct mlx5_hlist *list, 8604 struct mlx5_hlist_entry *entry) 8605 { 8606 struct mlx5_dev_ctx_shared *sh = list->ctx; 8607 struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash); 8608 8609 mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID], 8610 tunnel_flow_tbl_to_id(tte->flow_table)); 8611 mlx5_free(tte); 8612 } 8613 8614 static int 8615 mlx5_flow_tunnel_grp2tbl_match_cb(struct mlx5_hlist *list __rte_unused, 8616 struct mlx5_hlist_entry *entry, 8617 uint64_t key, void *cb_ctx __rte_unused) 8618 { 8619 union tunnel_tbl_key tbl = { 8620 .val = key, 8621 }; 8622 struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash); 8623 8624 return tbl.tunnel_id != tte->tunnel_id || tbl.group != tte->group; 8625 } 8626 8627 static struct mlx5_hlist_entry * 8628 mlx5_flow_tunnel_grp2tbl_create_cb(struct mlx5_hlist *list, uint64_t key, 8629 void *ctx __rte_unused) 8630 { 8631 struct mlx5_dev_ctx_shared *sh = list->ctx; 8632 struct tunnel_tbl_entry *tte; 8633 union tunnel_tbl_key tbl = { 8634 .val = key, 8635 }; 8636 8637 tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, 8638 sizeof(*tte), 0, 8639 SOCKET_ID_ANY); 8640 if (!tte) 8641 goto err; 8642 mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID], 8643 &tte->flow_table); 8644 if (tte->flow_table >= MLX5_MAX_TABLES) { 8645 DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.", 8646 tte->flow_table); 8647 mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID], 8648 tte->flow_table); 8649 goto err; 8650 } else if (!tte->flow_table) { 8651 goto err; 8652 } 8653 tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table); 8654 tte->tunnel_id = tbl.tunnel_id; 8655 tte->group = tbl.group; 8656 return &tte->hash; 8657 err: 8658 if (tte) 8659 mlx5_free(tte); 8660 return NULL; 8661 } 8662 8663 static uint32_t 8664 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev, 8665 const struct mlx5_flow_tunnel *tunnel, 8666 uint32_t group, uint32_t *table, 8667 struct rte_flow_error *error) 8668 { 8669 struct mlx5_hlist_entry *he; 8670 struct tunnel_tbl_entry *tte; 8671 union tunnel_tbl_key key = { 8672 .tunnel_id = tunnel ? tunnel->tunnel_id : 0, 8673 .group = group 8674 }; 8675 struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev); 8676 struct mlx5_hlist *group_hash; 8677 8678 group_hash = tunnel ? tunnel->groups : thub->groups; 8679 he = mlx5_hlist_register(group_hash, key.val, NULL); 8680 if (!he) 8681 return rte_flow_error_set(error, EINVAL, 8682 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 8683 NULL, 8684 "tunnel group index not supported"); 8685 tte = container_of(he, typeof(*tte), hash); 8686 *table = tte->flow_table; 8687 DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x", 8688 dev->data->port_id, key.tunnel_id, group, *table); 8689 return 0; 8690 } 8691 8692 static void 8693 mlx5_flow_tunnel_free(struct rte_eth_dev *dev, 8694 struct mlx5_flow_tunnel *tunnel) 8695 { 8696 struct mlx5_priv *priv = dev->data->dev_private; 8697 struct mlx5_indexed_pool *ipool; 8698 8699 DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x", 8700 dev->data->port_id, tunnel->tunnel_id); 8701 LIST_REMOVE(tunnel, chain); 8702 mlx5_hlist_destroy(tunnel->groups); 8703 ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID]; 8704 mlx5_ipool_free(ipool, tunnel->tunnel_id); 8705 } 8706 8707 static bool 8708 mlx5_access_tunnel_offload_db 8709 (struct rte_eth_dev *dev, 8710 bool (*match)(struct rte_eth_dev *, 8711 struct mlx5_flow_tunnel *, const void *), 8712 void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *), 8713 void (*miss)(struct rte_eth_dev *, void *), 8714 void *ctx, bool lock_op) 8715 { 8716 bool verdict = false; 8717 struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev); 8718 struct mlx5_flow_tunnel *tunnel; 8719 8720 rte_spinlock_lock(&thub->sl); 8721 LIST_FOREACH(tunnel, &thub->tunnels, chain) { 8722 verdict = match(dev, tunnel, (const void *)ctx); 8723 if (verdict) 8724 break; 8725 } 8726 if (!lock_op) 8727 rte_spinlock_unlock(&thub->sl); 8728 if (verdict && hit) 8729 hit(dev, tunnel, ctx); 8730 if (!verdict && miss) 8731 miss(dev, ctx); 8732 if (lock_op) 8733 rte_spinlock_unlock(&thub->sl); 8734 8735 return verdict; 8736 } 8737 8738 struct tunnel_db_find_tunnel_id_ctx { 8739 uint32_t tunnel_id; 8740 struct mlx5_flow_tunnel *tunnel; 8741 }; 8742 8743 static bool 8744 find_tunnel_id_match(struct rte_eth_dev *dev, 8745 struct mlx5_flow_tunnel *tunnel, const void *x) 8746 { 8747 const struct tunnel_db_find_tunnel_id_ctx *ctx = x; 8748 8749 RTE_SET_USED(dev); 8750 return tunnel->tunnel_id == ctx->tunnel_id; 8751 } 8752 8753 static void 8754 find_tunnel_id_hit(struct rte_eth_dev *dev, 8755 struct mlx5_flow_tunnel *tunnel, void *x) 8756 { 8757 struct tunnel_db_find_tunnel_id_ctx *ctx = x; 8758 RTE_SET_USED(dev); 8759 ctx->tunnel = tunnel; 8760 } 8761 8762 static struct mlx5_flow_tunnel * 8763 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id) 8764 { 8765 struct tunnel_db_find_tunnel_id_ctx ctx = { 8766 .tunnel_id = id, 8767 }; 8768 8769 mlx5_access_tunnel_offload_db(dev, find_tunnel_id_match, 8770 find_tunnel_id_hit, NULL, &ctx, true); 8771 8772 return ctx.tunnel; 8773 } 8774 8775 static struct mlx5_flow_tunnel * 8776 mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev, 8777 const struct rte_flow_tunnel *app_tunnel) 8778 { 8779 struct mlx5_priv *priv = dev->data->dev_private; 8780 struct mlx5_indexed_pool *ipool; 8781 struct mlx5_flow_tunnel *tunnel; 8782 uint32_t id; 8783 8784 ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID]; 8785 tunnel = mlx5_ipool_zmalloc(ipool, &id); 8786 if (!tunnel) 8787 return NULL; 8788 if (id >= MLX5_MAX_TUNNELS) { 8789 mlx5_ipool_free(ipool, id); 8790 DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id); 8791 return NULL; 8792 } 8793 tunnel->groups = mlx5_hlist_create("tunnel groups", 1024, 0, 0, 8794 mlx5_flow_tunnel_grp2tbl_create_cb, 8795 mlx5_flow_tunnel_grp2tbl_match_cb, 8796 mlx5_flow_tunnel_grp2tbl_remove_cb); 8797 if (!tunnel->groups) { 8798 mlx5_ipool_free(ipool, id); 8799 return NULL; 8800 } 8801 tunnel->groups->ctx = priv->sh; 8802 /* initiate new PMD tunnel */ 8803 memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel)); 8804 tunnel->tunnel_id = id; 8805 tunnel->action.type = (typeof(tunnel->action.type)) 8806 MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET; 8807 tunnel->action.conf = tunnel; 8808 tunnel->item.type = (typeof(tunnel->item.type)) 8809 MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL; 8810 tunnel->item.spec = tunnel; 8811 tunnel->item.last = NULL; 8812 tunnel->item.mask = NULL; 8813 8814 DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x", 8815 dev->data->port_id, tunnel->tunnel_id); 8816 8817 return tunnel; 8818 } 8819 8820 struct tunnel_db_get_tunnel_ctx { 8821 const struct rte_flow_tunnel *app_tunnel; 8822 struct mlx5_flow_tunnel *tunnel; 8823 }; 8824 8825 static bool get_tunnel_match(struct rte_eth_dev *dev, 8826 struct mlx5_flow_tunnel *tunnel, const void *x) 8827 { 8828 const struct tunnel_db_get_tunnel_ctx *ctx = x; 8829 8830 RTE_SET_USED(dev); 8831 return !memcmp(ctx->app_tunnel, &tunnel->app_tunnel, 8832 sizeof(*ctx->app_tunnel)); 8833 } 8834 8835 static void get_tunnel_hit(struct rte_eth_dev *dev, 8836 struct mlx5_flow_tunnel *tunnel, void *x) 8837 { 8838 /* called under tunnel spinlock protection */ 8839 struct tunnel_db_get_tunnel_ctx *ctx = x; 8840 8841 RTE_SET_USED(dev); 8842 tunnel->refctn++; 8843 ctx->tunnel = tunnel; 8844 } 8845 8846 static void get_tunnel_miss(struct rte_eth_dev *dev, void *x) 8847 { 8848 /* called under tunnel spinlock protection */ 8849 struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev); 8850 struct tunnel_db_get_tunnel_ctx *ctx = x; 8851 8852 rte_spinlock_unlock(&thub->sl); 8853 ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel); 8854 rte_spinlock_lock(&thub->sl); 8855 if (ctx->tunnel) { 8856 ctx->tunnel->refctn = 1; 8857 LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain); 8858 } 8859 } 8860 8861 8862 static int 8863 mlx5_get_flow_tunnel(struct rte_eth_dev *dev, 8864 const struct rte_flow_tunnel *app_tunnel, 8865 struct mlx5_flow_tunnel **tunnel) 8866 { 8867 struct tunnel_db_get_tunnel_ctx ctx = { 8868 .app_tunnel = app_tunnel, 8869 }; 8870 8871 mlx5_access_tunnel_offload_db(dev, get_tunnel_match, get_tunnel_hit, 8872 get_tunnel_miss, &ctx, true); 8873 *tunnel = ctx.tunnel; 8874 return ctx.tunnel ? 0 : -ENOMEM; 8875 } 8876 8877 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id) 8878 { 8879 struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub; 8880 8881 if (!thub) 8882 return; 8883 if (!LIST_EMPTY(&thub->tunnels)) 8884 DRV_LOG(WARNING, "port %u tunnels present", port_id); 8885 mlx5_hlist_destroy(thub->groups); 8886 mlx5_free(thub); 8887 } 8888 8889 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh) 8890 { 8891 int err; 8892 struct mlx5_flow_tunnel_hub *thub; 8893 8894 thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub), 8895 0, SOCKET_ID_ANY); 8896 if (!thub) 8897 return -ENOMEM; 8898 LIST_INIT(&thub->tunnels); 8899 rte_spinlock_init(&thub->sl); 8900 thub->groups = mlx5_hlist_create("flow groups", 8901 rte_align32pow2(MLX5_MAX_TABLES), 0, 8902 0, mlx5_flow_tunnel_grp2tbl_create_cb, 8903 mlx5_flow_tunnel_grp2tbl_match_cb, 8904 mlx5_flow_tunnel_grp2tbl_remove_cb); 8905 if (!thub->groups) { 8906 err = -rte_errno; 8907 goto err; 8908 } 8909 thub->groups->ctx = sh; 8910 sh->tunnel_hub = thub; 8911 8912 return 0; 8913 8914 err: 8915 if (thub->groups) 8916 mlx5_hlist_destroy(thub->groups); 8917 if (thub) 8918 mlx5_free(thub); 8919 return err; 8920 } 8921 8922 static inline bool 8923 mlx5_flow_tunnel_validate(struct rte_eth_dev *dev, 8924 struct rte_flow_tunnel *tunnel, 8925 const char *err_msg) 8926 { 8927 err_msg = NULL; 8928 if (!is_tunnel_offload_active(dev)) { 8929 err_msg = "tunnel offload was not activated"; 8930 goto out; 8931 } else if (!tunnel) { 8932 err_msg = "no application tunnel"; 8933 goto out; 8934 } 8935 8936 switch (tunnel->type) { 8937 default: 8938 err_msg = "unsupported tunnel type"; 8939 goto out; 8940 case RTE_FLOW_ITEM_TYPE_VXLAN: 8941 break; 8942 } 8943 8944 out: 8945 return !err_msg; 8946 } 8947 8948 static int 8949 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev, 8950 struct rte_flow_tunnel *app_tunnel, 8951 struct rte_flow_action **actions, 8952 uint32_t *num_of_actions, 8953 struct rte_flow_error *error) 8954 { 8955 int ret; 8956 struct mlx5_flow_tunnel *tunnel; 8957 const char *err_msg = NULL; 8958 bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg); 8959 8960 if (!verdict) 8961 return rte_flow_error_set(error, EINVAL, 8962 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 8963 err_msg); 8964 ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel); 8965 if (ret < 0) { 8966 return rte_flow_error_set(error, ret, 8967 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 8968 "failed to initialize pmd tunnel"); 8969 } 8970 *actions = &tunnel->action; 8971 *num_of_actions = 1; 8972 return 0; 8973 } 8974 8975 static int 8976 mlx5_flow_tunnel_match(struct rte_eth_dev *dev, 8977 struct rte_flow_tunnel *app_tunnel, 8978 struct rte_flow_item **items, 8979 uint32_t *num_of_items, 8980 struct rte_flow_error *error) 8981 { 8982 int ret; 8983 struct mlx5_flow_tunnel *tunnel; 8984 const char *err_msg = NULL; 8985 bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg); 8986 8987 if (!verdict) 8988 return rte_flow_error_set(error, EINVAL, 8989 RTE_FLOW_ERROR_TYPE_HANDLE, NULL, 8990 err_msg); 8991 ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel); 8992 if (ret < 0) { 8993 return rte_flow_error_set(error, ret, 8994 RTE_FLOW_ERROR_TYPE_HANDLE, NULL, 8995 "failed to initialize pmd tunnel"); 8996 } 8997 *items = &tunnel->item; 8998 *num_of_items = 1; 8999 return 0; 9000 } 9001 9002 struct tunnel_db_element_release_ctx { 9003 struct rte_flow_item *items; 9004 struct rte_flow_action *actions; 9005 uint32_t num_elements; 9006 struct rte_flow_error *error; 9007 int ret; 9008 }; 9009 9010 static bool 9011 tunnel_element_release_match(struct rte_eth_dev *dev, 9012 struct mlx5_flow_tunnel *tunnel, const void *x) 9013 { 9014 const struct tunnel_db_element_release_ctx *ctx = x; 9015 9016 RTE_SET_USED(dev); 9017 if (ctx->num_elements != 1) 9018 return false; 9019 else if (ctx->items) 9020 return ctx->items == &tunnel->item; 9021 else if (ctx->actions) 9022 return ctx->actions == &tunnel->action; 9023 9024 return false; 9025 } 9026 9027 static void 9028 tunnel_element_release_hit(struct rte_eth_dev *dev, 9029 struct mlx5_flow_tunnel *tunnel, void *x) 9030 { 9031 struct tunnel_db_element_release_ctx *ctx = x; 9032 ctx->ret = 0; 9033 if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED)) 9034 mlx5_flow_tunnel_free(dev, tunnel); 9035 } 9036 9037 static void 9038 tunnel_element_release_miss(struct rte_eth_dev *dev, void *x) 9039 { 9040 struct tunnel_db_element_release_ctx *ctx = x; 9041 RTE_SET_USED(dev); 9042 ctx->ret = rte_flow_error_set(ctx->error, EINVAL, 9043 RTE_FLOW_ERROR_TYPE_HANDLE, NULL, 9044 "invalid argument"); 9045 } 9046 9047 static int 9048 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev, 9049 struct rte_flow_item *pmd_items, 9050 uint32_t num_items, struct rte_flow_error *err) 9051 { 9052 struct tunnel_db_element_release_ctx ctx = { 9053 .items = pmd_items, 9054 .actions = NULL, 9055 .num_elements = num_items, 9056 .error = err, 9057 }; 9058 9059 mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match, 9060 tunnel_element_release_hit, 9061 tunnel_element_release_miss, &ctx, false); 9062 9063 return ctx.ret; 9064 } 9065 9066 static int 9067 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev, 9068 struct rte_flow_action *pmd_actions, 9069 uint32_t num_actions, struct rte_flow_error *err) 9070 { 9071 struct tunnel_db_element_release_ctx ctx = { 9072 .items = NULL, 9073 .actions = pmd_actions, 9074 .num_elements = num_actions, 9075 .error = err, 9076 }; 9077 9078 mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match, 9079 tunnel_element_release_hit, 9080 tunnel_element_release_miss, &ctx, false); 9081 9082 return ctx.ret; 9083 } 9084 9085 static int 9086 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev, 9087 struct rte_mbuf *m, 9088 struct rte_flow_restore_info *info, 9089 struct rte_flow_error *err) 9090 { 9091 uint64_t ol_flags = m->ol_flags; 9092 const struct mlx5_flow_tbl_data_entry *tble; 9093 const uint64_t mask = PKT_RX_FDIR | PKT_RX_FDIR_ID; 9094 9095 if (!is_tunnel_offload_active(dev)) { 9096 info->flags = 0; 9097 return 0; 9098 } 9099 9100 if ((ol_flags & mask) != mask) 9101 goto err; 9102 tble = tunnel_mark_decode(dev, m->hash.fdir.hi); 9103 if (!tble) { 9104 DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x", 9105 dev->data->port_id, m->hash.fdir.hi); 9106 goto err; 9107 } 9108 MLX5_ASSERT(tble->tunnel); 9109 memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel)); 9110 info->group_id = tble->group_id; 9111 info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL | 9112 RTE_FLOW_RESTORE_INFO_GROUP_ID | 9113 RTE_FLOW_RESTORE_INFO_ENCAPSULATED; 9114 9115 return 0; 9116 9117 err: 9118 return rte_flow_error_set(err, EINVAL, 9119 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 9120 "failed to get restore info"); 9121 } 9122 9123 #else /* HAVE_IBV_FLOW_DV_SUPPORT */ 9124 static int 9125 mlx5_flow_tunnel_decap_set(__rte_unused struct rte_eth_dev *dev, 9126 __rte_unused struct rte_flow_tunnel *app_tunnel, 9127 __rte_unused struct rte_flow_action **actions, 9128 __rte_unused uint32_t *num_of_actions, 9129 __rte_unused struct rte_flow_error *error) 9130 { 9131 return -ENOTSUP; 9132 } 9133 9134 static int 9135 mlx5_flow_tunnel_match(__rte_unused struct rte_eth_dev *dev, 9136 __rte_unused struct rte_flow_tunnel *app_tunnel, 9137 __rte_unused struct rte_flow_item **items, 9138 __rte_unused uint32_t *num_of_items, 9139 __rte_unused struct rte_flow_error *error) 9140 { 9141 return -ENOTSUP; 9142 } 9143 9144 static int 9145 mlx5_flow_tunnel_item_release(__rte_unused struct rte_eth_dev *dev, 9146 __rte_unused struct rte_flow_item *pmd_items, 9147 __rte_unused uint32_t num_items, 9148 __rte_unused struct rte_flow_error *err) 9149 { 9150 return -ENOTSUP; 9151 } 9152 9153 static int 9154 mlx5_flow_tunnel_action_release(__rte_unused struct rte_eth_dev *dev, 9155 __rte_unused struct rte_flow_action *pmd_action, 9156 __rte_unused uint32_t num_actions, 9157 __rte_unused struct rte_flow_error *err) 9158 { 9159 return -ENOTSUP; 9160 } 9161 9162 static int 9163 mlx5_flow_tunnel_get_restore_info(__rte_unused struct rte_eth_dev *dev, 9164 __rte_unused struct rte_mbuf *m, 9165 __rte_unused struct rte_flow_restore_info *i, 9166 __rte_unused struct rte_flow_error *err) 9167 { 9168 return -ENOTSUP; 9169 } 9170 9171 static int 9172 flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev, 9173 __rte_unused struct rte_flow *flow, 9174 __rte_unused const struct rte_flow_attr *attr, 9175 __rte_unused const struct rte_flow_action *actions, 9176 __rte_unused uint32_t flow_idx, 9177 __rte_unused const struct mlx5_flow_tunnel *tunnel, 9178 __rte_unused struct tunnel_default_miss_ctx *ctx, 9179 __rte_unused struct rte_flow_error *error) 9180 { 9181 return -ENOTSUP; 9182 } 9183 9184 static struct mlx5_flow_tunnel * 9185 mlx5_find_tunnel_id(__rte_unused struct rte_eth_dev *dev, 9186 __rte_unused uint32_t id) 9187 { 9188 return NULL; 9189 } 9190 9191 static void 9192 mlx5_flow_tunnel_free(__rte_unused struct rte_eth_dev *dev, 9193 __rte_unused struct mlx5_flow_tunnel *tunnel) 9194 { 9195 } 9196 9197 static uint32_t 9198 tunnel_flow_group_to_flow_table(__rte_unused struct rte_eth_dev *dev, 9199 __rte_unused const struct mlx5_flow_tunnel *t, 9200 __rte_unused uint32_t group, 9201 __rte_unused uint32_t *table, 9202 struct rte_flow_error *error) 9203 { 9204 return rte_flow_error_set(error, ENOTSUP, 9205 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 9206 "tunnel offload requires DV support"); 9207 } 9208 9209 void 9210 mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh, 9211 __rte_unused uint16_t port_id) 9212 { 9213 } 9214 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */ 9215 9216 static void 9217 mlx5_dbg__print_pattern(const struct rte_flow_item *item) 9218 { 9219 int ret; 9220 struct rte_flow_error error; 9221 9222 for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 9223 char *item_name; 9224 ret = rte_flow_conv(RTE_FLOW_CONV_OP_ITEM_NAME_PTR, &item_name, 9225 sizeof(item_name), 9226 (void *)(uintptr_t)item->type, &error); 9227 if (ret > 0) 9228 printf("%s ", item_name); 9229 else 9230 printf("%d\n", (int)item->type); 9231 } 9232 printf("END\n"); 9233 } 9234