1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2016 Mellanox Technologies, Ltd 4 */ 5 6 #include <netinet/in.h> 7 #include <sys/queue.h> 8 #include <stdalign.h> 9 #include <stdint.h> 10 #include <string.h> 11 #include <stdbool.h> 12 13 #include <rte_common.h> 14 #include <rte_ether.h> 15 #include <rte_ethdev_driver.h> 16 #include <rte_eal_paging.h> 17 #include <rte_flow.h> 18 #include <rte_cycles.h> 19 #include <rte_flow_driver.h> 20 #include <rte_malloc.h> 21 #include <rte_ip.h> 22 23 #include <mlx5_glue.h> 24 #include <mlx5_devx_cmds.h> 25 #include <mlx5_prm.h> 26 #include <mlx5_malloc.h> 27 28 #include "mlx5_defs.h" 29 #include "mlx5.h" 30 #include "mlx5_flow.h" 31 #include "mlx5_flow_os.h" 32 #include "mlx5_rxtx.h" 33 #include "mlx5_common_os.h" 34 35 /** Device flow drivers. */ 36 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops; 37 38 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops; 39 40 const struct mlx5_flow_driver_ops *flow_drv_ops[] = { 41 [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops, 42 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 43 [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops, 44 #endif 45 [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops, 46 [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops 47 }; 48 49 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */ 50 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \ 51 (const int []){ \ 52 __VA_ARGS__, 0, \ 53 } 54 55 /** Node object of input graph for mlx5_flow_expand_rss(). */ 56 struct mlx5_flow_expand_node { 57 const int *const next; 58 /**< 59 * List of next node indexes. Index 0 is interpreted as a terminator. 60 */ 61 const enum rte_flow_item_type type; 62 /**< Pattern item type of current node. */ 63 uint64_t rss_types; 64 /**< 65 * RSS types bit-field associated with this node 66 * (see ETH_RSS_* definitions). 67 */ 68 }; 69 70 /** Object returned by mlx5_flow_expand_rss(). */ 71 struct mlx5_flow_expand_rss { 72 uint32_t entries; 73 /**< Number of entries @p patterns and @p priorities. */ 74 struct { 75 struct rte_flow_item *pattern; /**< Expanded pattern array. */ 76 uint32_t priority; /**< Priority offset for each expansion. */ 77 } entry[]; 78 }; 79 80 static enum rte_flow_item_type 81 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item) 82 { 83 enum rte_flow_item_type ret = RTE_FLOW_ITEM_TYPE_VOID; 84 uint16_t ether_type = 0; 85 uint16_t ether_type_m; 86 uint8_t ip_next_proto = 0; 87 uint8_t ip_next_proto_m; 88 89 if (item == NULL || item->spec == NULL) 90 return ret; 91 switch (item->type) { 92 case RTE_FLOW_ITEM_TYPE_ETH: 93 if (item->mask) 94 ether_type_m = ((const struct rte_flow_item_eth *) 95 (item->mask))->type; 96 else 97 ether_type_m = rte_flow_item_eth_mask.type; 98 if (ether_type_m != RTE_BE16(0xFFFF)) 99 break; 100 ether_type = ((const struct rte_flow_item_eth *) 101 (item->spec))->type; 102 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4) 103 ret = RTE_FLOW_ITEM_TYPE_IPV4; 104 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6) 105 ret = RTE_FLOW_ITEM_TYPE_IPV6; 106 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN) 107 ret = RTE_FLOW_ITEM_TYPE_VLAN; 108 else 109 ret = RTE_FLOW_ITEM_TYPE_END; 110 break; 111 case RTE_FLOW_ITEM_TYPE_VLAN: 112 if (item->mask) 113 ether_type_m = ((const struct rte_flow_item_vlan *) 114 (item->mask))->inner_type; 115 else 116 ether_type_m = rte_flow_item_vlan_mask.inner_type; 117 if (ether_type_m != RTE_BE16(0xFFFF)) 118 break; 119 ether_type = ((const struct rte_flow_item_vlan *) 120 (item->spec))->inner_type; 121 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4) 122 ret = RTE_FLOW_ITEM_TYPE_IPV4; 123 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6) 124 ret = RTE_FLOW_ITEM_TYPE_IPV6; 125 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN) 126 ret = RTE_FLOW_ITEM_TYPE_VLAN; 127 else 128 ret = RTE_FLOW_ITEM_TYPE_END; 129 break; 130 case RTE_FLOW_ITEM_TYPE_IPV4: 131 if (item->mask) 132 ip_next_proto_m = ((const struct rte_flow_item_ipv4 *) 133 (item->mask))->hdr.next_proto_id; 134 else 135 ip_next_proto_m = 136 rte_flow_item_ipv4_mask.hdr.next_proto_id; 137 if (ip_next_proto_m != 0xFF) 138 break; 139 ip_next_proto = ((const struct rte_flow_item_ipv4 *) 140 (item->spec))->hdr.next_proto_id; 141 if (ip_next_proto == IPPROTO_UDP) 142 ret = RTE_FLOW_ITEM_TYPE_UDP; 143 else if (ip_next_proto == IPPROTO_TCP) 144 ret = RTE_FLOW_ITEM_TYPE_TCP; 145 else if (ip_next_proto == IPPROTO_IP) 146 ret = RTE_FLOW_ITEM_TYPE_IPV4; 147 else if (ip_next_proto == IPPROTO_IPV6) 148 ret = RTE_FLOW_ITEM_TYPE_IPV6; 149 else 150 ret = RTE_FLOW_ITEM_TYPE_END; 151 break; 152 case RTE_FLOW_ITEM_TYPE_IPV6: 153 if (item->mask) 154 ip_next_proto_m = ((const struct rte_flow_item_ipv6 *) 155 (item->mask))->hdr.proto; 156 else 157 ip_next_proto_m = 158 rte_flow_item_ipv6_mask.hdr.proto; 159 if (ip_next_proto_m != 0xFF) 160 break; 161 ip_next_proto = ((const struct rte_flow_item_ipv6 *) 162 (item->spec))->hdr.proto; 163 if (ip_next_proto == IPPROTO_UDP) 164 ret = RTE_FLOW_ITEM_TYPE_UDP; 165 else if (ip_next_proto == IPPROTO_TCP) 166 ret = RTE_FLOW_ITEM_TYPE_TCP; 167 else if (ip_next_proto == IPPROTO_IP) 168 ret = RTE_FLOW_ITEM_TYPE_IPV4; 169 else if (ip_next_proto == IPPROTO_IPV6) 170 ret = RTE_FLOW_ITEM_TYPE_IPV6; 171 else 172 ret = RTE_FLOW_ITEM_TYPE_END; 173 break; 174 default: 175 ret = RTE_FLOW_ITEM_TYPE_VOID; 176 break; 177 } 178 return ret; 179 } 180 181 /** 182 * Expand RSS flows into several possible flows according to the RSS hash 183 * fields requested and the driver capabilities. 184 * 185 * @param[out] buf 186 * Buffer to store the result expansion. 187 * @param[in] size 188 * Buffer size in bytes. If 0, @p buf can be NULL. 189 * @param[in] pattern 190 * User flow pattern. 191 * @param[in] types 192 * RSS types to expand (see ETH_RSS_* definitions). 193 * @param[in] graph 194 * Input graph to expand @p pattern according to @p types. 195 * @param[in] graph_root_index 196 * Index of root node in @p graph, typically 0. 197 * 198 * @return 199 * A positive value representing the size of @p buf in bytes regardless of 200 * @p size on success, a negative errno value otherwise and rte_errno is 201 * set, the following errors are defined: 202 * 203 * -E2BIG: graph-depth @p graph is too deep. 204 */ 205 static int 206 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, 207 const struct rte_flow_item *pattern, uint64_t types, 208 const struct mlx5_flow_expand_node graph[], 209 int graph_root_index) 210 { 211 const int elt_n = 8; 212 const struct rte_flow_item *item; 213 const struct mlx5_flow_expand_node *node = &graph[graph_root_index]; 214 const int *next_node; 215 const int *stack[elt_n]; 216 int stack_pos = 0; 217 struct rte_flow_item flow_items[elt_n]; 218 unsigned int i; 219 size_t lsize; 220 size_t user_pattern_size = 0; 221 void *addr = NULL; 222 const struct mlx5_flow_expand_node *next = NULL; 223 struct rte_flow_item missed_item; 224 int missed = 0; 225 int elt = 0; 226 const struct rte_flow_item *last_item = NULL; 227 228 memset(&missed_item, 0, sizeof(missed_item)); 229 lsize = offsetof(struct mlx5_flow_expand_rss, entry) + 230 elt_n * sizeof(buf->entry[0]); 231 if (lsize <= size) { 232 buf->entry[0].priority = 0; 233 buf->entry[0].pattern = (void *)&buf->entry[elt_n]; 234 buf->entries = 0; 235 addr = buf->entry[0].pattern; 236 } 237 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 238 if (item->type != RTE_FLOW_ITEM_TYPE_VOID) 239 last_item = item; 240 for (i = 0; node->next && node->next[i]; ++i) { 241 next = &graph[node->next[i]]; 242 if (next->type == item->type) 243 break; 244 } 245 if (next) 246 node = next; 247 user_pattern_size += sizeof(*item); 248 } 249 user_pattern_size += sizeof(*item); /* Handle END item. */ 250 lsize += user_pattern_size; 251 /* Copy the user pattern in the first entry of the buffer. */ 252 if (lsize <= size) { 253 rte_memcpy(addr, pattern, user_pattern_size); 254 addr = (void *)(((uintptr_t)addr) + user_pattern_size); 255 buf->entries = 1; 256 } 257 /* Start expanding. */ 258 memset(flow_items, 0, sizeof(flow_items)); 259 user_pattern_size -= sizeof(*item); 260 /* 261 * Check if the last valid item has spec set, need complete pattern, 262 * and the pattern can be used for expansion. 263 */ 264 missed_item.type = mlx5_flow_expand_rss_item_complete(last_item); 265 if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) { 266 /* Item type END indicates expansion is not required. */ 267 return lsize; 268 } 269 if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) { 270 next = NULL; 271 missed = 1; 272 for (i = 0; node->next && node->next[i]; ++i) { 273 next = &graph[node->next[i]]; 274 if (next->type == missed_item.type) { 275 flow_items[0].type = missed_item.type; 276 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END; 277 break; 278 } 279 next = NULL; 280 } 281 } 282 if (next && missed) { 283 elt = 2; /* missed item + item end. */ 284 node = next; 285 lsize += elt * sizeof(*item) + user_pattern_size; 286 if ((node->rss_types & types) && lsize <= size) { 287 buf->entry[buf->entries].priority = 1; 288 buf->entry[buf->entries].pattern = addr; 289 buf->entries++; 290 rte_memcpy(addr, buf->entry[0].pattern, 291 user_pattern_size); 292 addr = (void *)(((uintptr_t)addr) + user_pattern_size); 293 rte_memcpy(addr, flow_items, elt * sizeof(*item)); 294 addr = (void *)(((uintptr_t)addr) + 295 elt * sizeof(*item)); 296 } 297 } 298 memset(flow_items, 0, sizeof(flow_items)); 299 next_node = node->next; 300 stack[stack_pos] = next_node; 301 node = next_node ? &graph[*next_node] : NULL; 302 while (node) { 303 flow_items[stack_pos].type = node->type; 304 if (node->rss_types & types) { 305 /* 306 * compute the number of items to copy from the 307 * expansion and copy it. 308 * When the stack_pos is 0, there are 1 element in it, 309 * plus the addition END item. 310 */ 311 elt = stack_pos + 2; 312 flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END; 313 lsize += elt * sizeof(*item) + user_pattern_size; 314 if (lsize <= size) { 315 size_t n = elt * sizeof(*item); 316 317 buf->entry[buf->entries].priority = 318 stack_pos + 1 + missed; 319 buf->entry[buf->entries].pattern = addr; 320 buf->entries++; 321 rte_memcpy(addr, buf->entry[0].pattern, 322 user_pattern_size); 323 addr = (void *)(((uintptr_t)addr) + 324 user_pattern_size); 325 rte_memcpy(addr, &missed_item, 326 missed * sizeof(*item)); 327 addr = (void *)(((uintptr_t)addr) + 328 missed * sizeof(*item)); 329 rte_memcpy(addr, flow_items, n); 330 addr = (void *)(((uintptr_t)addr) + n); 331 } 332 } 333 /* Go deeper. */ 334 if (node->next) { 335 next_node = node->next; 336 if (stack_pos++ == elt_n) { 337 rte_errno = E2BIG; 338 return -rte_errno; 339 } 340 stack[stack_pos] = next_node; 341 } else if (*(next_node + 1)) { 342 /* Follow up with the next possibility. */ 343 ++next_node; 344 } else { 345 /* Move to the next path. */ 346 if (stack_pos) 347 next_node = stack[--stack_pos]; 348 next_node++; 349 stack[stack_pos] = next_node; 350 } 351 node = *next_node ? &graph[*next_node] : NULL; 352 }; 353 /* no expanded flows but we have missed item, create one rule for it */ 354 if (buf->entries == 1 && missed != 0) { 355 elt = 2; 356 lsize += elt * sizeof(*item) + user_pattern_size; 357 if (lsize <= size) { 358 buf->entry[buf->entries].priority = 1; 359 buf->entry[buf->entries].pattern = addr; 360 buf->entries++; 361 flow_items[0].type = missed_item.type; 362 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END; 363 rte_memcpy(addr, buf->entry[0].pattern, 364 user_pattern_size); 365 addr = (void *)(((uintptr_t)addr) + user_pattern_size); 366 rte_memcpy(addr, flow_items, elt * sizeof(*item)); 367 addr = (void *)(((uintptr_t)addr) + 368 elt * sizeof(*item)); 369 } 370 } 371 return lsize; 372 } 373 374 enum mlx5_expansion { 375 MLX5_EXPANSION_ROOT, 376 MLX5_EXPANSION_ROOT_OUTER, 377 MLX5_EXPANSION_ROOT_ETH_VLAN, 378 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN, 379 MLX5_EXPANSION_OUTER_ETH, 380 MLX5_EXPANSION_OUTER_ETH_VLAN, 381 MLX5_EXPANSION_OUTER_VLAN, 382 MLX5_EXPANSION_OUTER_IPV4, 383 MLX5_EXPANSION_OUTER_IPV4_UDP, 384 MLX5_EXPANSION_OUTER_IPV4_TCP, 385 MLX5_EXPANSION_OUTER_IPV6, 386 MLX5_EXPANSION_OUTER_IPV6_UDP, 387 MLX5_EXPANSION_OUTER_IPV6_TCP, 388 MLX5_EXPANSION_VXLAN, 389 MLX5_EXPANSION_VXLAN_GPE, 390 MLX5_EXPANSION_GRE, 391 MLX5_EXPANSION_MPLS, 392 MLX5_EXPANSION_ETH, 393 MLX5_EXPANSION_ETH_VLAN, 394 MLX5_EXPANSION_VLAN, 395 MLX5_EXPANSION_IPV4, 396 MLX5_EXPANSION_IPV4_UDP, 397 MLX5_EXPANSION_IPV4_TCP, 398 MLX5_EXPANSION_IPV6, 399 MLX5_EXPANSION_IPV6_UDP, 400 MLX5_EXPANSION_IPV6_TCP, 401 }; 402 403 /** Supported expansion of items. */ 404 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = { 405 [MLX5_EXPANSION_ROOT] = { 406 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 407 MLX5_EXPANSION_IPV4, 408 MLX5_EXPANSION_IPV6), 409 .type = RTE_FLOW_ITEM_TYPE_END, 410 }, 411 [MLX5_EXPANSION_ROOT_OUTER] = { 412 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH, 413 MLX5_EXPANSION_OUTER_IPV4, 414 MLX5_EXPANSION_OUTER_IPV6), 415 .type = RTE_FLOW_ITEM_TYPE_END, 416 }, 417 [MLX5_EXPANSION_ROOT_ETH_VLAN] = { 418 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN), 419 .type = RTE_FLOW_ITEM_TYPE_END, 420 }, 421 [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = { 422 .next = MLX5_FLOW_EXPAND_RSS_NEXT 423 (MLX5_EXPANSION_OUTER_ETH_VLAN), 424 .type = RTE_FLOW_ITEM_TYPE_END, 425 }, 426 [MLX5_EXPANSION_OUTER_ETH] = { 427 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 428 MLX5_EXPANSION_OUTER_IPV6, 429 MLX5_EXPANSION_MPLS), 430 .type = RTE_FLOW_ITEM_TYPE_ETH, 431 .rss_types = 0, 432 }, 433 [MLX5_EXPANSION_OUTER_ETH_VLAN] = { 434 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN), 435 .type = RTE_FLOW_ITEM_TYPE_ETH, 436 .rss_types = 0, 437 }, 438 [MLX5_EXPANSION_OUTER_VLAN] = { 439 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 440 MLX5_EXPANSION_OUTER_IPV6), 441 .type = RTE_FLOW_ITEM_TYPE_VLAN, 442 }, 443 [MLX5_EXPANSION_OUTER_IPV4] = { 444 .next = MLX5_FLOW_EXPAND_RSS_NEXT 445 (MLX5_EXPANSION_OUTER_IPV4_UDP, 446 MLX5_EXPANSION_OUTER_IPV4_TCP, 447 MLX5_EXPANSION_GRE, 448 MLX5_EXPANSION_IPV4, 449 MLX5_EXPANSION_IPV6), 450 .type = RTE_FLOW_ITEM_TYPE_IPV4, 451 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 452 ETH_RSS_NONFRAG_IPV4_OTHER, 453 }, 454 [MLX5_EXPANSION_OUTER_IPV4_UDP] = { 455 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 456 MLX5_EXPANSION_VXLAN_GPE), 457 .type = RTE_FLOW_ITEM_TYPE_UDP, 458 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 459 }, 460 [MLX5_EXPANSION_OUTER_IPV4_TCP] = { 461 .type = RTE_FLOW_ITEM_TYPE_TCP, 462 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 463 }, 464 [MLX5_EXPANSION_OUTER_IPV6] = { 465 .next = MLX5_FLOW_EXPAND_RSS_NEXT 466 (MLX5_EXPANSION_OUTER_IPV6_UDP, 467 MLX5_EXPANSION_OUTER_IPV6_TCP, 468 MLX5_EXPANSION_IPV4, 469 MLX5_EXPANSION_IPV6), 470 .type = RTE_FLOW_ITEM_TYPE_IPV6, 471 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 472 ETH_RSS_NONFRAG_IPV6_OTHER, 473 }, 474 [MLX5_EXPANSION_OUTER_IPV6_UDP] = { 475 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 476 MLX5_EXPANSION_VXLAN_GPE), 477 .type = RTE_FLOW_ITEM_TYPE_UDP, 478 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 479 }, 480 [MLX5_EXPANSION_OUTER_IPV6_TCP] = { 481 .type = RTE_FLOW_ITEM_TYPE_TCP, 482 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 483 }, 484 [MLX5_EXPANSION_VXLAN] = { 485 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 486 MLX5_EXPANSION_IPV4, 487 MLX5_EXPANSION_IPV6), 488 .type = RTE_FLOW_ITEM_TYPE_VXLAN, 489 }, 490 [MLX5_EXPANSION_VXLAN_GPE] = { 491 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 492 MLX5_EXPANSION_IPV4, 493 MLX5_EXPANSION_IPV6), 494 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE, 495 }, 496 [MLX5_EXPANSION_GRE] = { 497 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4), 498 .type = RTE_FLOW_ITEM_TYPE_GRE, 499 }, 500 [MLX5_EXPANSION_MPLS] = { 501 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 502 MLX5_EXPANSION_IPV6), 503 .type = RTE_FLOW_ITEM_TYPE_MPLS, 504 }, 505 [MLX5_EXPANSION_ETH] = { 506 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 507 MLX5_EXPANSION_IPV6), 508 .type = RTE_FLOW_ITEM_TYPE_ETH, 509 }, 510 [MLX5_EXPANSION_ETH_VLAN] = { 511 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN), 512 .type = RTE_FLOW_ITEM_TYPE_ETH, 513 }, 514 [MLX5_EXPANSION_VLAN] = { 515 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 516 MLX5_EXPANSION_IPV6), 517 .type = RTE_FLOW_ITEM_TYPE_VLAN, 518 }, 519 [MLX5_EXPANSION_IPV4] = { 520 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP, 521 MLX5_EXPANSION_IPV4_TCP), 522 .type = RTE_FLOW_ITEM_TYPE_IPV4, 523 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 524 ETH_RSS_NONFRAG_IPV4_OTHER, 525 }, 526 [MLX5_EXPANSION_IPV4_UDP] = { 527 .type = RTE_FLOW_ITEM_TYPE_UDP, 528 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 529 }, 530 [MLX5_EXPANSION_IPV4_TCP] = { 531 .type = RTE_FLOW_ITEM_TYPE_TCP, 532 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 533 }, 534 [MLX5_EXPANSION_IPV6] = { 535 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP, 536 MLX5_EXPANSION_IPV6_TCP), 537 .type = RTE_FLOW_ITEM_TYPE_IPV6, 538 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 539 ETH_RSS_NONFRAG_IPV6_OTHER, 540 }, 541 [MLX5_EXPANSION_IPV6_UDP] = { 542 .type = RTE_FLOW_ITEM_TYPE_UDP, 543 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 544 }, 545 [MLX5_EXPANSION_IPV6_TCP] = { 546 .type = RTE_FLOW_ITEM_TYPE_TCP, 547 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 548 }, 549 }; 550 551 static const struct rte_flow_ops mlx5_flow_ops = { 552 .validate = mlx5_flow_validate, 553 .create = mlx5_flow_create, 554 .destroy = mlx5_flow_destroy, 555 .flush = mlx5_flow_flush, 556 .isolate = mlx5_flow_isolate, 557 .query = mlx5_flow_query, 558 .dev_dump = mlx5_flow_dev_dump, 559 .get_aged_flows = mlx5_flow_get_aged_flows, 560 }; 561 562 /* Convert FDIR request to Generic flow. */ 563 struct mlx5_fdir { 564 struct rte_flow_attr attr; 565 struct rte_flow_item items[4]; 566 struct rte_flow_item_eth l2; 567 struct rte_flow_item_eth l2_mask; 568 union { 569 struct rte_flow_item_ipv4 ipv4; 570 struct rte_flow_item_ipv6 ipv6; 571 } l3; 572 union { 573 struct rte_flow_item_ipv4 ipv4; 574 struct rte_flow_item_ipv6 ipv6; 575 } l3_mask; 576 union { 577 struct rte_flow_item_udp udp; 578 struct rte_flow_item_tcp tcp; 579 } l4; 580 union { 581 struct rte_flow_item_udp udp; 582 struct rte_flow_item_tcp tcp; 583 } l4_mask; 584 struct rte_flow_action actions[2]; 585 struct rte_flow_action_queue queue; 586 }; 587 588 /* Tunnel information. */ 589 struct mlx5_flow_tunnel_info { 590 uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */ 591 uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */ 592 }; 593 594 static struct mlx5_flow_tunnel_info tunnels_info[] = { 595 { 596 .tunnel = MLX5_FLOW_LAYER_VXLAN, 597 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP, 598 }, 599 { 600 .tunnel = MLX5_FLOW_LAYER_GENEVE, 601 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP, 602 }, 603 { 604 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE, 605 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP, 606 }, 607 { 608 .tunnel = MLX5_FLOW_LAYER_GRE, 609 .ptype = RTE_PTYPE_TUNNEL_GRE, 610 }, 611 { 612 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP, 613 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP, 614 }, 615 { 616 .tunnel = MLX5_FLOW_LAYER_MPLS, 617 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE, 618 }, 619 { 620 .tunnel = MLX5_FLOW_LAYER_NVGRE, 621 .ptype = RTE_PTYPE_TUNNEL_NVGRE, 622 }, 623 { 624 .tunnel = MLX5_FLOW_LAYER_IPIP, 625 .ptype = RTE_PTYPE_TUNNEL_IP, 626 }, 627 { 628 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP, 629 .ptype = RTE_PTYPE_TUNNEL_IP, 630 }, 631 { 632 .tunnel = MLX5_FLOW_LAYER_GTP, 633 .ptype = RTE_PTYPE_TUNNEL_GTPU, 634 }, 635 }; 636 637 /** 638 * Translate tag ID to register. 639 * 640 * @param[in] dev 641 * Pointer to the Ethernet device structure. 642 * @param[in] feature 643 * The feature that request the register. 644 * @param[in] id 645 * The request register ID. 646 * @param[out] error 647 * Error description in case of any. 648 * 649 * @return 650 * The request register on success, a negative errno 651 * value otherwise and rte_errno is set. 652 */ 653 int 654 mlx5_flow_get_reg_id(struct rte_eth_dev *dev, 655 enum mlx5_feature_name feature, 656 uint32_t id, 657 struct rte_flow_error *error) 658 { 659 struct mlx5_priv *priv = dev->data->dev_private; 660 struct mlx5_dev_config *config = &priv->config; 661 enum modify_reg start_reg; 662 bool skip_mtr_reg = false; 663 664 switch (feature) { 665 case MLX5_HAIRPIN_RX: 666 return REG_B; 667 case MLX5_HAIRPIN_TX: 668 return REG_A; 669 case MLX5_METADATA_RX: 670 switch (config->dv_xmeta_en) { 671 case MLX5_XMETA_MODE_LEGACY: 672 return REG_B; 673 case MLX5_XMETA_MODE_META16: 674 return REG_C_0; 675 case MLX5_XMETA_MODE_META32: 676 return REG_C_1; 677 } 678 break; 679 case MLX5_METADATA_TX: 680 return REG_A; 681 case MLX5_METADATA_FDB: 682 switch (config->dv_xmeta_en) { 683 case MLX5_XMETA_MODE_LEGACY: 684 return REG_NON; 685 case MLX5_XMETA_MODE_META16: 686 return REG_C_0; 687 case MLX5_XMETA_MODE_META32: 688 return REG_C_1; 689 } 690 break; 691 case MLX5_FLOW_MARK: 692 switch (config->dv_xmeta_en) { 693 case MLX5_XMETA_MODE_LEGACY: 694 return REG_NON; 695 case MLX5_XMETA_MODE_META16: 696 return REG_C_1; 697 case MLX5_XMETA_MODE_META32: 698 return REG_C_0; 699 } 700 break; 701 case MLX5_MTR_SFX: 702 /* 703 * If meter color and flow match share one register, flow match 704 * should use the meter color register for match. 705 */ 706 if (priv->mtr_reg_share) 707 return priv->mtr_color_reg; 708 else 709 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 710 REG_C_3; 711 case MLX5_MTR_COLOR: 712 MLX5_ASSERT(priv->mtr_color_reg != REG_NON); 713 return priv->mtr_color_reg; 714 case MLX5_COPY_MARK: 715 /* 716 * Metadata COPY_MARK register using is in meter suffix sub 717 * flow while with meter. It's safe to share the same register. 718 */ 719 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3; 720 case MLX5_APP_TAG: 721 /* 722 * If meter is enable, it will engage the register for color 723 * match and flow match. If meter color match is not using the 724 * REG_C_2, need to skip the REG_C_x be used by meter color 725 * match. 726 * If meter is disable, free to use all available registers. 727 */ 728 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 729 (priv->mtr_reg_share ? REG_C_3 : REG_C_4); 730 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2); 731 if (id > (REG_C_7 - start_reg)) 732 return rte_flow_error_set(error, EINVAL, 733 RTE_FLOW_ERROR_TYPE_ITEM, 734 NULL, "invalid tag id"); 735 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON) 736 return rte_flow_error_set(error, ENOTSUP, 737 RTE_FLOW_ERROR_TYPE_ITEM, 738 NULL, "unsupported tag id"); 739 /* 740 * This case means meter is using the REG_C_x great than 2. 741 * Take care not to conflict with meter color REG_C_x. 742 * If the available index REG_C_y >= REG_C_x, skip the 743 * color register. 744 */ 745 if (skip_mtr_reg && config->flow_mreg_c 746 [id + start_reg - REG_C_0] >= priv->mtr_color_reg) { 747 if (id >= (REG_C_7 - start_reg)) 748 return rte_flow_error_set(error, EINVAL, 749 RTE_FLOW_ERROR_TYPE_ITEM, 750 NULL, "invalid tag id"); 751 if (config->flow_mreg_c 752 [id + 1 + start_reg - REG_C_0] != REG_NON) 753 return config->flow_mreg_c 754 [id + 1 + start_reg - REG_C_0]; 755 return rte_flow_error_set(error, ENOTSUP, 756 RTE_FLOW_ERROR_TYPE_ITEM, 757 NULL, "unsupported tag id"); 758 } 759 return config->flow_mreg_c[id + start_reg - REG_C_0]; 760 } 761 MLX5_ASSERT(false); 762 return rte_flow_error_set(error, EINVAL, 763 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 764 NULL, "invalid feature name"); 765 } 766 767 /** 768 * Check extensive flow metadata register support. 769 * 770 * @param dev 771 * Pointer to rte_eth_dev structure. 772 * 773 * @return 774 * True if device supports extensive flow metadata register, otherwise false. 775 */ 776 bool 777 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev) 778 { 779 struct mlx5_priv *priv = dev->data->dev_private; 780 struct mlx5_dev_config *config = &priv->config; 781 782 /* 783 * Having available reg_c can be regarded inclusively as supporting 784 * extensive flow metadata register, which could mean, 785 * - metadata register copy action by modify header. 786 * - 16 modify header actions is supported. 787 * - reg_c's are preserved across different domain (FDB and NIC) on 788 * packet loopback by flow lookup miss. 789 */ 790 return config->flow_mreg_c[2] != REG_NON; 791 } 792 793 /** 794 * Verify the @p item specifications (spec, last, mask) are compatible with the 795 * NIC capabilities. 796 * 797 * @param[in] item 798 * Item specification. 799 * @param[in] mask 800 * @p item->mask or flow default bit-masks. 801 * @param[in] nic_mask 802 * Bit-masks covering supported fields by the NIC to compare with user mask. 803 * @param[in] size 804 * Bit-masks size in bytes. 805 * @param[in] range_accepted 806 * True if range of values is accepted for specific fields, false otherwise. 807 * @param[out] error 808 * Pointer to error structure. 809 * 810 * @return 811 * 0 on success, a negative errno value otherwise and rte_errno is set. 812 */ 813 int 814 mlx5_flow_item_acceptable(const struct rte_flow_item *item, 815 const uint8_t *mask, 816 const uint8_t *nic_mask, 817 unsigned int size, 818 bool range_accepted, 819 struct rte_flow_error *error) 820 { 821 unsigned int i; 822 823 MLX5_ASSERT(nic_mask); 824 for (i = 0; i < size; ++i) 825 if ((nic_mask[i] | mask[i]) != nic_mask[i]) 826 return rte_flow_error_set(error, ENOTSUP, 827 RTE_FLOW_ERROR_TYPE_ITEM, 828 item, 829 "mask enables non supported" 830 " bits"); 831 if (!item->spec && (item->mask || item->last)) 832 return rte_flow_error_set(error, EINVAL, 833 RTE_FLOW_ERROR_TYPE_ITEM, item, 834 "mask/last without a spec is not" 835 " supported"); 836 if (item->spec && item->last && !range_accepted) { 837 uint8_t spec[size]; 838 uint8_t last[size]; 839 unsigned int i; 840 int ret; 841 842 for (i = 0; i < size; ++i) { 843 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i]; 844 last[i] = ((const uint8_t *)item->last)[i] & mask[i]; 845 } 846 ret = memcmp(spec, last, size); 847 if (ret != 0) 848 return rte_flow_error_set(error, EINVAL, 849 RTE_FLOW_ERROR_TYPE_ITEM, 850 item, 851 "range is not valid"); 852 } 853 return 0; 854 } 855 856 /** 857 * Adjust the hash fields according to the @p flow information. 858 * 859 * @param[in] dev_flow. 860 * Pointer to the mlx5_flow. 861 * @param[in] tunnel 862 * 1 when the hash field is for a tunnel item. 863 * @param[in] layer_types 864 * ETH_RSS_* types. 865 * @param[in] hash_fields 866 * Item hash fields. 867 * 868 * @return 869 * The hash fields that should be used. 870 */ 871 uint64_t 872 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc, 873 int tunnel __rte_unused, uint64_t layer_types, 874 uint64_t hash_fields) 875 { 876 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 877 int rss_request_inner = rss_desc->level >= 2; 878 879 /* Check RSS hash level for tunnel. */ 880 if (tunnel && rss_request_inner) 881 hash_fields |= IBV_RX_HASH_INNER; 882 else if (tunnel || rss_request_inner) 883 return 0; 884 #endif 885 /* Check if requested layer matches RSS hash fields. */ 886 if (!(rss_desc->types & layer_types)) 887 return 0; 888 return hash_fields; 889 } 890 891 /** 892 * Lookup and set the ptype in the data Rx part. A single Ptype can be used, 893 * if several tunnel rules are used on this queue, the tunnel ptype will be 894 * cleared. 895 * 896 * @param rxq_ctrl 897 * Rx queue to update. 898 */ 899 static void 900 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl) 901 { 902 unsigned int i; 903 uint32_t tunnel_ptype = 0; 904 905 /* Look up for the ptype to use. */ 906 for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) { 907 if (!rxq_ctrl->flow_tunnels_n[i]) 908 continue; 909 if (!tunnel_ptype) { 910 tunnel_ptype = tunnels_info[i].ptype; 911 } else { 912 tunnel_ptype = 0; 913 break; 914 } 915 } 916 rxq_ctrl->rxq.tunnel = tunnel_ptype; 917 } 918 919 /** 920 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive 921 * flow. 922 * 923 * @param[in] dev 924 * Pointer to the Ethernet device structure. 925 * @param[in] dev_handle 926 * Pointer to device flow handle structure. 927 */ 928 static void 929 flow_drv_rxq_flags_set(struct rte_eth_dev *dev, 930 struct mlx5_flow_handle *dev_handle) 931 { 932 struct mlx5_priv *priv = dev->data->dev_private; 933 const int mark = dev_handle->mark; 934 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 935 struct mlx5_hrxq *hrxq; 936 unsigned int i; 937 938 if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) 939 return; 940 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 941 dev_handle->rix_hrxq); 942 if (!hrxq) 943 return; 944 for (i = 0; i != hrxq->ind_table->queues_n; ++i) { 945 int idx = hrxq->ind_table->queues[i]; 946 struct mlx5_rxq_ctrl *rxq_ctrl = 947 container_of((*priv->rxqs)[idx], 948 struct mlx5_rxq_ctrl, rxq); 949 950 /* 951 * To support metadata register copy on Tx loopback, 952 * this must be always enabled (metadata may arive 953 * from other port - not from local flows only. 954 */ 955 if (priv->config.dv_flow_en && 956 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 957 mlx5_flow_ext_mreg_supported(dev)) { 958 rxq_ctrl->rxq.mark = 1; 959 rxq_ctrl->flow_mark_n = 1; 960 } else if (mark) { 961 rxq_ctrl->rxq.mark = 1; 962 rxq_ctrl->flow_mark_n++; 963 } 964 if (tunnel) { 965 unsigned int j; 966 967 /* Increase the counter matching the flow. */ 968 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 969 if ((tunnels_info[j].tunnel & 970 dev_handle->layers) == 971 tunnels_info[j].tunnel) { 972 rxq_ctrl->flow_tunnels_n[j]++; 973 break; 974 } 975 } 976 flow_rxq_tunnel_ptype_update(rxq_ctrl); 977 } 978 } 979 } 980 981 /** 982 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow 983 * 984 * @param[in] dev 985 * Pointer to the Ethernet device structure. 986 * @param[in] flow 987 * Pointer to flow structure. 988 */ 989 static void 990 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow) 991 { 992 struct mlx5_priv *priv = dev->data->dev_private; 993 uint32_t handle_idx; 994 struct mlx5_flow_handle *dev_handle; 995 996 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 997 handle_idx, dev_handle, next) 998 flow_drv_rxq_flags_set(dev, dev_handle); 999 } 1000 1001 /** 1002 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 1003 * device flow if no other flow uses it with the same kind of request. 1004 * 1005 * @param dev 1006 * Pointer to Ethernet device. 1007 * @param[in] dev_handle 1008 * Pointer to the device flow handle structure. 1009 */ 1010 static void 1011 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, 1012 struct mlx5_flow_handle *dev_handle) 1013 { 1014 struct mlx5_priv *priv = dev->data->dev_private; 1015 const int mark = dev_handle->mark; 1016 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 1017 struct mlx5_hrxq *hrxq; 1018 unsigned int i; 1019 1020 if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) 1021 return; 1022 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 1023 dev_handle->rix_hrxq); 1024 if (!hrxq) 1025 return; 1026 MLX5_ASSERT(dev->data->dev_started); 1027 for (i = 0; i != hrxq->ind_table->queues_n; ++i) { 1028 int idx = hrxq->ind_table->queues[i]; 1029 struct mlx5_rxq_ctrl *rxq_ctrl = 1030 container_of((*priv->rxqs)[idx], 1031 struct mlx5_rxq_ctrl, rxq); 1032 1033 if (priv->config.dv_flow_en && 1034 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 1035 mlx5_flow_ext_mreg_supported(dev)) { 1036 rxq_ctrl->rxq.mark = 1; 1037 rxq_ctrl->flow_mark_n = 1; 1038 } else if (mark) { 1039 rxq_ctrl->flow_mark_n--; 1040 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n; 1041 } 1042 if (tunnel) { 1043 unsigned int j; 1044 1045 /* Decrease the counter matching the flow. */ 1046 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 1047 if ((tunnels_info[j].tunnel & 1048 dev_handle->layers) == 1049 tunnels_info[j].tunnel) { 1050 rxq_ctrl->flow_tunnels_n[j]--; 1051 break; 1052 } 1053 } 1054 flow_rxq_tunnel_ptype_update(rxq_ctrl); 1055 } 1056 } 1057 } 1058 1059 /** 1060 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 1061 * @p flow if no other flow uses it with the same kind of request. 1062 * 1063 * @param dev 1064 * Pointer to Ethernet device. 1065 * @param[in] flow 1066 * Pointer to the flow. 1067 */ 1068 static void 1069 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow) 1070 { 1071 struct mlx5_priv *priv = dev->data->dev_private; 1072 uint32_t handle_idx; 1073 struct mlx5_flow_handle *dev_handle; 1074 1075 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 1076 handle_idx, dev_handle, next) 1077 flow_drv_rxq_flags_trim(dev, dev_handle); 1078 } 1079 1080 /** 1081 * Clear the Mark/Flag and Tunnel ptype information in all Rx queues. 1082 * 1083 * @param dev 1084 * Pointer to Ethernet device. 1085 */ 1086 static void 1087 flow_rxq_flags_clear(struct rte_eth_dev *dev) 1088 { 1089 struct mlx5_priv *priv = dev->data->dev_private; 1090 unsigned int i; 1091 1092 for (i = 0; i != priv->rxqs_n; ++i) { 1093 struct mlx5_rxq_ctrl *rxq_ctrl; 1094 unsigned int j; 1095 1096 if (!(*priv->rxqs)[i]) 1097 continue; 1098 rxq_ctrl = container_of((*priv->rxqs)[i], 1099 struct mlx5_rxq_ctrl, rxq); 1100 rxq_ctrl->flow_mark_n = 0; 1101 rxq_ctrl->rxq.mark = 0; 1102 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) 1103 rxq_ctrl->flow_tunnels_n[j] = 0; 1104 rxq_ctrl->rxq.tunnel = 0; 1105 } 1106 } 1107 1108 /** 1109 * Set the Rx queue dynamic metadata (mask and offset) for a flow 1110 * 1111 * @param[in] dev 1112 * Pointer to the Ethernet device structure. 1113 */ 1114 void 1115 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev) 1116 { 1117 struct mlx5_priv *priv = dev->data->dev_private; 1118 struct mlx5_rxq_data *data; 1119 unsigned int i; 1120 1121 for (i = 0; i != priv->rxqs_n; ++i) { 1122 if (!(*priv->rxqs)[i]) 1123 continue; 1124 data = (*priv->rxqs)[i]; 1125 if (!rte_flow_dynf_metadata_avail()) { 1126 data->dynf_meta = 0; 1127 data->flow_meta_mask = 0; 1128 data->flow_meta_offset = -1; 1129 } else { 1130 data->dynf_meta = 1; 1131 data->flow_meta_mask = rte_flow_dynf_metadata_mask; 1132 data->flow_meta_offset = rte_flow_dynf_metadata_offs; 1133 } 1134 } 1135 } 1136 1137 /* 1138 * return a pointer to the desired action in the list of actions. 1139 * 1140 * @param[in] actions 1141 * The list of actions to search the action in. 1142 * @param[in] action 1143 * The action to find. 1144 * 1145 * @return 1146 * Pointer to the action in the list, if found. NULL otherwise. 1147 */ 1148 const struct rte_flow_action * 1149 mlx5_flow_find_action(const struct rte_flow_action *actions, 1150 enum rte_flow_action_type action) 1151 { 1152 if (actions == NULL) 1153 return NULL; 1154 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) 1155 if (actions->type == action) 1156 return actions; 1157 return NULL; 1158 } 1159 1160 /* 1161 * Validate the flag action. 1162 * 1163 * @param[in] action_flags 1164 * Bit-fields that holds the actions detected until now. 1165 * @param[in] attr 1166 * Attributes of flow that includes this action. 1167 * @param[out] error 1168 * Pointer to error structure. 1169 * 1170 * @return 1171 * 0 on success, a negative errno value otherwise and rte_errno is set. 1172 */ 1173 int 1174 mlx5_flow_validate_action_flag(uint64_t action_flags, 1175 const struct rte_flow_attr *attr, 1176 struct rte_flow_error *error) 1177 { 1178 if (action_flags & MLX5_FLOW_ACTION_MARK) 1179 return rte_flow_error_set(error, EINVAL, 1180 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1181 "can't mark and flag in same flow"); 1182 if (action_flags & MLX5_FLOW_ACTION_FLAG) 1183 return rte_flow_error_set(error, EINVAL, 1184 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1185 "can't have 2 flag" 1186 " actions in same flow"); 1187 if (attr->egress) 1188 return rte_flow_error_set(error, ENOTSUP, 1189 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1190 "flag action not supported for " 1191 "egress"); 1192 return 0; 1193 } 1194 1195 /* 1196 * Validate the mark action. 1197 * 1198 * @param[in] action 1199 * Pointer to the queue action. 1200 * @param[in] action_flags 1201 * Bit-fields that holds the actions detected until now. 1202 * @param[in] attr 1203 * Attributes of flow that includes this action. 1204 * @param[out] error 1205 * Pointer to error structure. 1206 * 1207 * @return 1208 * 0 on success, a negative errno value otherwise and rte_errno is set. 1209 */ 1210 int 1211 mlx5_flow_validate_action_mark(const struct rte_flow_action *action, 1212 uint64_t action_flags, 1213 const struct rte_flow_attr *attr, 1214 struct rte_flow_error *error) 1215 { 1216 const struct rte_flow_action_mark *mark = action->conf; 1217 1218 if (!mark) 1219 return rte_flow_error_set(error, EINVAL, 1220 RTE_FLOW_ERROR_TYPE_ACTION, 1221 action, 1222 "configuration cannot be null"); 1223 if (mark->id >= MLX5_FLOW_MARK_MAX) 1224 return rte_flow_error_set(error, EINVAL, 1225 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1226 &mark->id, 1227 "mark id must in 0 <= id < " 1228 RTE_STR(MLX5_FLOW_MARK_MAX)); 1229 if (action_flags & MLX5_FLOW_ACTION_FLAG) 1230 return rte_flow_error_set(error, EINVAL, 1231 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1232 "can't flag and mark in same flow"); 1233 if (action_flags & MLX5_FLOW_ACTION_MARK) 1234 return rte_flow_error_set(error, EINVAL, 1235 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1236 "can't have 2 mark actions in same" 1237 " flow"); 1238 if (attr->egress) 1239 return rte_flow_error_set(error, ENOTSUP, 1240 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1241 "mark action not supported for " 1242 "egress"); 1243 return 0; 1244 } 1245 1246 /* 1247 * Validate the drop action. 1248 * 1249 * @param[in] action_flags 1250 * Bit-fields that holds the actions detected until now. 1251 * @param[in] attr 1252 * Attributes of flow that includes this action. 1253 * @param[out] error 1254 * Pointer to error structure. 1255 * 1256 * @return 1257 * 0 on success, a negative errno value otherwise and rte_errno is set. 1258 */ 1259 int 1260 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused, 1261 const struct rte_flow_attr *attr, 1262 struct rte_flow_error *error) 1263 { 1264 if (attr->egress) 1265 return rte_flow_error_set(error, ENOTSUP, 1266 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1267 "drop action not supported for " 1268 "egress"); 1269 return 0; 1270 } 1271 1272 /* 1273 * Validate the queue action. 1274 * 1275 * @param[in] action 1276 * Pointer to the queue action. 1277 * @param[in] action_flags 1278 * Bit-fields that holds the actions detected until now. 1279 * @param[in] dev 1280 * Pointer to the Ethernet device structure. 1281 * @param[in] attr 1282 * Attributes of flow that includes this action. 1283 * @param[out] error 1284 * Pointer to error structure. 1285 * 1286 * @return 1287 * 0 on success, a negative errno value otherwise and rte_errno is set. 1288 */ 1289 int 1290 mlx5_flow_validate_action_queue(const struct rte_flow_action *action, 1291 uint64_t action_flags, 1292 struct rte_eth_dev *dev, 1293 const struct rte_flow_attr *attr, 1294 struct rte_flow_error *error) 1295 { 1296 struct mlx5_priv *priv = dev->data->dev_private; 1297 const struct rte_flow_action_queue *queue = action->conf; 1298 1299 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1300 return rte_flow_error_set(error, EINVAL, 1301 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1302 "can't have 2 fate actions in" 1303 " same flow"); 1304 if (!priv->rxqs_n) 1305 return rte_flow_error_set(error, EINVAL, 1306 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1307 NULL, "No Rx queues configured"); 1308 if (queue->index >= priv->rxqs_n) 1309 return rte_flow_error_set(error, EINVAL, 1310 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1311 &queue->index, 1312 "queue index out of range"); 1313 if (!(*priv->rxqs)[queue->index]) 1314 return rte_flow_error_set(error, EINVAL, 1315 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1316 &queue->index, 1317 "queue is not configured"); 1318 if (attr->egress) 1319 return rte_flow_error_set(error, ENOTSUP, 1320 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1321 "queue action not supported for " 1322 "egress"); 1323 return 0; 1324 } 1325 1326 /* 1327 * Validate the rss action. 1328 * 1329 * @param[in] action 1330 * Pointer to the queue action. 1331 * @param[in] action_flags 1332 * Bit-fields that holds the actions detected until now. 1333 * @param[in] dev 1334 * Pointer to the Ethernet device structure. 1335 * @param[in] attr 1336 * Attributes of flow that includes this action. 1337 * @param[in] item_flags 1338 * Items that were detected. 1339 * @param[out] error 1340 * Pointer to error structure. 1341 * 1342 * @return 1343 * 0 on success, a negative errno value otherwise and rte_errno is set. 1344 */ 1345 int 1346 mlx5_flow_validate_action_rss(const struct rte_flow_action *action, 1347 uint64_t action_flags, 1348 struct rte_eth_dev *dev, 1349 const struct rte_flow_attr *attr, 1350 uint64_t item_flags, 1351 struct rte_flow_error *error) 1352 { 1353 struct mlx5_priv *priv = dev->data->dev_private; 1354 const struct rte_flow_action_rss *rss = action->conf; 1355 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1356 unsigned int i; 1357 1358 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1359 return rte_flow_error_set(error, EINVAL, 1360 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1361 "can't have 2 fate actions" 1362 " in same flow"); 1363 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT && 1364 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) 1365 return rte_flow_error_set(error, ENOTSUP, 1366 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1367 &rss->func, 1368 "RSS hash function not supported"); 1369 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 1370 if (rss->level > 2) 1371 #else 1372 if (rss->level > 1) 1373 #endif 1374 return rte_flow_error_set(error, ENOTSUP, 1375 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1376 &rss->level, 1377 "tunnel RSS is not supported"); 1378 /* allow RSS key_len 0 in case of NULL (default) RSS key. */ 1379 if (rss->key_len == 0 && rss->key != NULL) 1380 return rte_flow_error_set(error, ENOTSUP, 1381 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1382 &rss->key_len, 1383 "RSS hash key length 0"); 1384 if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN) 1385 return rte_flow_error_set(error, ENOTSUP, 1386 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1387 &rss->key_len, 1388 "RSS hash key too small"); 1389 if (rss->key_len > MLX5_RSS_HASH_KEY_LEN) 1390 return rte_flow_error_set(error, ENOTSUP, 1391 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1392 &rss->key_len, 1393 "RSS hash key too large"); 1394 if (rss->queue_num > priv->config.ind_table_max_size) 1395 return rte_flow_error_set(error, ENOTSUP, 1396 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1397 &rss->queue_num, 1398 "number of queues too large"); 1399 if (rss->types & MLX5_RSS_HF_MASK) 1400 return rte_flow_error_set(error, ENOTSUP, 1401 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1402 &rss->types, 1403 "some RSS protocols are not" 1404 " supported"); 1405 if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) && 1406 !(rss->types & ETH_RSS_IP)) 1407 return rte_flow_error_set(error, EINVAL, 1408 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1409 "L3 partial RSS requested but L3 RSS" 1410 " type not specified"); 1411 if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) && 1412 !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP))) 1413 return rte_flow_error_set(error, EINVAL, 1414 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1415 "L4 partial RSS requested but L4 RSS" 1416 " type not specified"); 1417 if (!priv->rxqs_n) 1418 return rte_flow_error_set(error, EINVAL, 1419 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1420 NULL, "No Rx queues configured"); 1421 if (!rss->queue_num) 1422 return rte_flow_error_set(error, EINVAL, 1423 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1424 NULL, "No queues configured"); 1425 for (i = 0; i != rss->queue_num; ++i) { 1426 if (rss->queue[i] >= priv->rxqs_n) 1427 return rte_flow_error_set 1428 (error, EINVAL, 1429 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1430 &rss->queue[i], "queue index out of range"); 1431 if (!(*priv->rxqs)[rss->queue[i]]) 1432 return rte_flow_error_set 1433 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1434 &rss->queue[i], "queue is not configured"); 1435 } 1436 if (attr->egress) 1437 return rte_flow_error_set(error, ENOTSUP, 1438 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1439 "rss action not supported for " 1440 "egress"); 1441 if (rss->level > 1 && !tunnel) 1442 return rte_flow_error_set(error, EINVAL, 1443 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1444 "inner RSS is not supported for " 1445 "non-tunnel flows"); 1446 if ((item_flags & MLX5_FLOW_LAYER_ECPRI) && 1447 !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) { 1448 return rte_flow_error_set(error, EINVAL, 1449 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1450 "RSS on eCPRI is not supported now"); 1451 } 1452 return 0; 1453 } 1454 1455 /* 1456 * Validate the default miss action. 1457 * 1458 * @param[in] action_flags 1459 * Bit-fields that holds the actions detected until now. 1460 * @param[out] error 1461 * Pointer to error structure. 1462 * 1463 * @return 1464 * 0 on success, a negative errno value otherwise and rte_errno is set. 1465 */ 1466 int 1467 mlx5_flow_validate_action_default_miss(uint64_t action_flags, 1468 const struct rte_flow_attr *attr, 1469 struct rte_flow_error *error) 1470 { 1471 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1472 return rte_flow_error_set(error, EINVAL, 1473 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1474 "can't have 2 fate actions in" 1475 " same flow"); 1476 if (attr->egress) 1477 return rte_flow_error_set(error, ENOTSUP, 1478 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1479 "default miss action not supported " 1480 "for egress"); 1481 if (attr->group) 1482 return rte_flow_error_set(error, ENOTSUP, 1483 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL, 1484 "only group 0 is supported"); 1485 if (attr->transfer) 1486 return rte_flow_error_set(error, ENOTSUP, 1487 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1488 NULL, "transfer is not supported"); 1489 return 0; 1490 } 1491 1492 /* 1493 * Validate the count action. 1494 * 1495 * @param[in] dev 1496 * Pointer to the Ethernet device structure. 1497 * @param[in] attr 1498 * Attributes of flow that includes this action. 1499 * @param[out] error 1500 * Pointer to error structure. 1501 * 1502 * @return 1503 * 0 on success, a negative errno value otherwise and rte_errno is set. 1504 */ 1505 int 1506 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused, 1507 const struct rte_flow_attr *attr, 1508 struct rte_flow_error *error) 1509 { 1510 if (attr->egress) 1511 return rte_flow_error_set(error, ENOTSUP, 1512 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1513 "count action not supported for " 1514 "egress"); 1515 return 0; 1516 } 1517 1518 /** 1519 * Verify the @p attributes will be correctly understood by the NIC and store 1520 * them in the @p flow if everything is correct. 1521 * 1522 * @param[in] dev 1523 * Pointer to the Ethernet device structure. 1524 * @param[in] attributes 1525 * Pointer to flow attributes 1526 * @param[out] error 1527 * Pointer to error structure. 1528 * 1529 * @return 1530 * 0 on success, a negative errno value otherwise and rte_errno is set. 1531 */ 1532 int 1533 mlx5_flow_validate_attributes(struct rte_eth_dev *dev, 1534 const struct rte_flow_attr *attributes, 1535 struct rte_flow_error *error) 1536 { 1537 struct mlx5_priv *priv = dev->data->dev_private; 1538 uint32_t priority_max = priv->config.flow_prio - 1; 1539 1540 if (attributes->group) 1541 return rte_flow_error_set(error, ENOTSUP, 1542 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 1543 NULL, "groups is not supported"); 1544 if (attributes->priority != MLX5_FLOW_PRIO_RSVD && 1545 attributes->priority >= priority_max) 1546 return rte_flow_error_set(error, ENOTSUP, 1547 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 1548 NULL, "priority out of range"); 1549 if (attributes->egress) 1550 return rte_flow_error_set(error, ENOTSUP, 1551 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1552 "egress is not supported"); 1553 if (attributes->transfer && !priv->config.dv_esw_en) 1554 return rte_flow_error_set(error, ENOTSUP, 1555 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1556 NULL, "transfer is not supported"); 1557 if (!attributes->ingress) 1558 return rte_flow_error_set(error, EINVAL, 1559 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, 1560 NULL, 1561 "ingress attribute is mandatory"); 1562 return 0; 1563 } 1564 1565 /** 1566 * Validate ICMP6 item. 1567 * 1568 * @param[in] item 1569 * Item specification. 1570 * @param[in] item_flags 1571 * Bit-fields that holds the items detected until now. 1572 * @param[out] error 1573 * Pointer to error structure. 1574 * 1575 * @return 1576 * 0 on success, a negative errno value otherwise and rte_errno is set. 1577 */ 1578 int 1579 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item, 1580 uint64_t item_flags, 1581 uint8_t target_protocol, 1582 struct rte_flow_error *error) 1583 { 1584 const struct rte_flow_item_icmp6 *mask = item->mask; 1585 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1586 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : 1587 MLX5_FLOW_LAYER_OUTER_L3_IPV6; 1588 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1589 MLX5_FLOW_LAYER_OUTER_L4; 1590 int ret; 1591 1592 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6) 1593 return rte_flow_error_set(error, EINVAL, 1594 RTE_FLOW_ERROR_TYPE_ITEM, item, 1595 "protocol filtering not compatible" 1596 " with ICMP6 layer"); 1597 if (!(item_flags & l3m)) 1598 return rte_flow_error_set(error, EINVAL, 1599 RTE_FLOW_ERROR_TYPE_ITEM, item, 1600 "IPv6 is mandatory to filter on" 1601 " ICMP6"); 1602 if (item_flags & l4m) 1603 return rte_flow_error_set(error, EINVAL, 1604 RTE_FLOW_ERROR_TYPE_ITEM, item, 1605 "multiple L4 layers not supported"); 1606 if (!mask) 1607 mask = &rte_flow_item_icmp6_mask; 1608 ret = mlx5_flow_item_acceptable 1609 (item, (const uint8_t *)mask, 1610 (const uint8_t *)&rte_flow_item_icmp6_mask, 1611 sizeof(struct rte_flow_item_icmp6), 1612 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 1613 if (ret < 0) 1614 return ret; 1615 return 0; 1616 } 1617 1618 /** 1619 * Validate ICMP item. 1620 * 1621 * @param[in] item 1622 * Item specification. 1623 * @param[in] item_flags 1624 * Bit-fields that holds the items detected until now. 1625 * @param[out] error 1626 * Pointer to error structure. 1627 * 1628 * @return 1629 * 0 on success, a negative errno value otherwise and rte_errno is set. 1630 */ 1631 int 1632 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item, 1633 uint64_t item_flags, 1634 uint8_t target_protocol, 1635 struct rte_flow_error *error) 1636 { 1637 const struct rte_flow_item_icmp *mask = item->mask; 1638 const struct rte_flow_item_icmp nic_mask = { 1639 .hdr.icmp_type = 0xff, 1640 .hdr.icmp_code = 0xff, 1641 .hdr.icmp_ident = RTE_BE16(0xffff), 1642 .hdr.icmp_seq_nb = RTE_BE16(0xffff), 1643 }; 1644 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1645 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : 1646 MLX5_FLOW_LAYER_OUTER_L3_IPV4; 1647 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1648 MLX5_FLOW_LAYER_OUTER_L4; 1649 int ret; 1650 1651 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP) 1652 return rte_flow_error_set(error, EINVAL, 1653 RTE_FLOW_ERROR_TYPE_ITEM, item, 1654 "protocol filtering not compatible" 1655 " with ICMP layer"); 1656 if (!(item_flags & l3m)) 1657 return rte_flow_error_set(error, EINVAL, 1658 RTE_FLOW_ERROR_TYPE_ITEM, item, 1659 "IPv4 is mandatory to filter" 1660 " on ICMP"); 1661 if (item_flags & l4m) 1662 return rte_flow_error_set(error, EINVAL, 1663 RTE_FLOW_ERROR_TYPE_ITEM, item, 1664 "multiple L4 layers not supported"); 1665 if (!mask) 1666 mask = &nic_mask; 1667 ret = mlx5_flow_item_acceptable 1668 (item, (const uint8_t *)mask, 1669 (const uint8_t *)&nic_mask, 1670 sizeof(struct rte_flow_item_icmp), 1671 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 1672 if (ret < 0) 1673 return ret; 1674 return 0; 1675 } 1676 1677 /** 1678 * Validate Ethernet item. 1679 * 1680 * @param[in] item 1681 * Item specification. 1682 * @param[in] item_flags 1683 * Bit-fields that holds the items detected until now. 1684 * @param[out] error 1685 * Pointer to error structure. 1686 * 1687 * @return 1688 * 0 on success, a negative errno value otherwise and rte_errno is set. 1689 */ 1690 int 1691 mlx5_flow_validate_item_eth(const struct rte_flow_item *item, 1692 uint64_t item_flags, 1693 struct rte_flow_error *error) 1694 { 1695 const struct rte_flow_item_eth *mask = item->mask; 1696 const struct rte_flow_item_eth nic_mask = { 1697 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1698 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1699 .type = RTE_BE16(0xffff), 1700 }; 1701 int ret; 1702 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1703 const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 1704 MLX5_FLOW_LAYER_OUTER_L2; 1705 1706 if (item_flags & ethm) 1707 return rte_flow_error_set(error, ENOTSUP, 1708 RTE_FLOW_ERROR_TYPE_ITEM, item, 1709 "multiple L2 layers not supported"); 1710 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) || 1711 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3))) 1712 return rte_flow_error_set(error, EINVAL, 1713 RTE_FLOW_ERROR_TYPE_ITEM, item, 1714 "L2 layer should not follow " 1715 "L3 layers"); 1716 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) || 1717 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN))) 1718 return rte_flow_error_set(error, EINVAL, 1719 RTE_FLOW_ERROR_TYPE_ITEM, item, 1720 "L2 layer should not follow VLAN"); 1721 if (!mask) 1722 mask = &rte_flow_item_eth_mask; 1723 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1724 (const uint8_t *)&nic_mask, 1725 sizeof(struct rte_flow_item_eth), 1726 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 1727 return ret; 1728 } 1729 1730 /** 1731 * Validate VLAN item. 1732 * 1733 * @param[in] item 1734 * Item specification. 1735 * @param[in] item_flags 1736 * Bit-fields that holds the items detected until now. 1737 * @param[in] dev 1738 * Ethernet device flow is being created on. 1739 * @param[out] error 1740 * Pointer to error structure. 1741 * 1742 * @return 1743 * 0 on success, a negative errno value otherwise and rte_errno is set. 1744 */ 1745 int 1746 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item, 1747 uint64_t item_flags, 1748 struct rte_eth_dev *dev, 1749 struct rte_flow_error *error) 1750 { 1751 const struct rte_flow_item_vlan *spec = item->spec; 1752 const struct rte_flow_item_vlan *mask = item->mask; 1753 const struct rte_flow_item_vlan nic_mask = { 1754 .tci = RTE_BE16(UINT16_MAX), 1755 .inner_type = RTE_BE16(UINT16_MAX), 1756 }; 1757 uint16_t vlan_tag = 0; 1758 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1759 int ret; 1760 const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | 1761 MLX5_FLOW_LAYER_INNER_L4) : 1762 (MLX5_FLOW_LAYER_OUTER_L3 | 1763 MLX5_FLOW_LAYER_OUTER_L4); 1764 const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : 1765 MLX5_FLOW_LAYER_OUTER_VLAN; 1766 1767 if (item_flags & vlanm) 1768 return rte_flow_error_set(error, EINVAL, 1769 RTE_FLOW_ERROR_TYPE_ITEM, item, 1770 "multiple VLAN layers not supported"); 1771 else if ((item_flags & l34m) != 0) 1772 return rte_flow_error_set(error, EINVAL, 1773 RTE_FLOW_ERROR_TYPE_ITEM, item, 1774 "VLAN cannot follow L3/L4 layer"); 1775 if (!mask) 1776 mask = &rte_flow_item_vlan_mask; 1777 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1778 (const uint8_t *)&nic_mask, 1779 sizeof(struct rte_flow_item_vlan), 1780 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 1781 if (ret) 1782 return ret; 1783 if (!tunnel && mask->tci != RTE_BE16(0x0fff)) { 1784 struct mlx5_priv *priv = dev->data->dev_private; 1785 1786 if (priv->vmwa_context) { 1787 /* 1788 * Non-NULL context means we have a virtual machine 1789 * and SR-IOV enabled, we have to create VLAN interface 1790 * to make hypervisor to setup E-Switch vport 1791 * context correctly. We avoid creating the multiple 1792 * VLAN interfaces, so we cannot support VLAN tag mask. 1793 */ 1794 return rte_flow_error_set(error, EINVAL, 1795 RTE_FLOW_ERROR_TYPE_ITEM, 1796 item, 1797 "VLAN tag mask is not" 1798 " supported in virtual" 1799 " environment"); 1800 } 1801 } 1802 if (spec) { 1803 vlan_tag = spec->tci; 1804 vlan_tag &= mask->tci; 1805 } 1806 /* 1807 * From verbs perspective an empty VLAN is equivalent 1808 * to a packet without VLAN layer. 1809 */ 1810 if (!vlan_tag) 1811 return rte_flow_error_set(error, EINVAL, 1812 RTE_FLOW_ERROR_TYPE_ITEM_SPEC, 1813 item->spec, 1814 "VLAN cannot be empty"); 1815 return 0; 1816 } 1817 1818 /** 1819 * Validate IPV4 item. 1820 * 1821 * @param[in] item 1822 * Item specification. 1823 * @param[in] item_flags 1824 * Bit-fields that holds the items detected until now. 1825 * @param[in] last_item 1826 * Previous validated item in the pattern items. 1827 * @param[in] ether_type 1828 * Type in the ethernet layer header (including dot1q). 1829 * @param[in] acc_mask 1830 * Acceptable mask, if NULL default internal default mask 1831 * will be used to check whether item fields are supported. 1832 * @param[in] range_accepted 1833 * True if range of values is accepted for specific fields, false otherwise. 1834 * @param[out] error 1835 * Pointer to error structure. 1836 * 1837 * @return 1838 * 0 on success, a negative errno value otherwise and rte_errno is set. 1839 */ 1840 int 1841 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, 1842 uint64_t item_flags, 1843 uint64_t last_item, 1844 uint16_t ether_type, 1845 const struct rte_flow_item_ipv4 *acc_mask, 1846 bool range_accepted, 1847 struct rte_flow_error *error) 1848 { 1849 const struct rte_flow_item_ipv4 *mask = item->mask; 1850 const struct rte_flow_item_ipv4 *spec = item->spec; 1851 const struct rte_flow_item_ipv4 nic_mask = { 1852 .hdr = { 1853 .src_addr = RTE_BE32(0xffffffff), 1854 .dst_addr = RTE_BE32(0xffffffff), 1855 .type_of_service = 0xff, 1856 .next_proto_id = 0xff, 1857 }, 1858 }; 1859 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1860 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1861 MLX5_FLOW_LAYER_OUTER_L3; 1862 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1863 MLX5_FLOW_LAYER_OUTER_L4; 1864 int ret; 1865 uint8_t next_proto = 0xFF; 1866 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 1867 MLX5_FLOW_LAYER_OUTER_VLAN | 1868 MLX5_FLOW_LAYER_INNER_VLAN); 1869 1870 if ((last_item & l2_vlan) && ether_type && 1871 ether_type != RTE_ETHER_TYPE_IPV4) 1872 return rte_flow_error_set(error, EINVAL, 1873 RTE_FLOW_ERROR_TYPE_ITEM, item, 1874 "IPv4 cannot follow L2/VLAN layer " 1875 "which ether type is not IPv4"); 1876 if (item_flags & MLX5_FLOW_LAYER_IPIP) { 1877 if (mask && spec) 1878 next_proto = mask->hdr.next_proto_id & 1879 spec->hdr.next_proto_id; 1880 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 1881 return rte_flow_error_set(error, EINVAL, 1882 RTE_FLOW_ERROR_TYPE_ITEM, 1883 item, 1884 "multiple tunnel " 1885 "not supported"); 1886 } 1887 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) 1888 return rte_flow_error_set(error, EINVAL, 1889 RTE_FLOW_ERROR_TYPE_ITEM, item, 1890 "wrong tunnel type - IPv6 specified " 1891 "but IPv4 item provided"); 1892 if (item_flags & l3m) 1893 return rte_flow_error_set(error, ENOTSUP, 1894 RTE_FLOW_ERROR_TYPE_ITEM, item, 1895 "multiple L3 layers not supported"); 1896 else if (item_flags & l4m) 1897 return rte_flow_error_set(error, EINVAL, 1898 RTE_FLOW_ERROR_TYPE_ITEM, item, 1899 "L3 cannot follow an L4 layer."); 1900 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 1901 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 1902 return rte_flow_error_set(error, EINVAL, 1903 RTE_FLOW_ERROR_TYPE_ITEM, item, 1904 "L3 cannot follow an NVGRE layer."); 1905 if (!mask) 1906 mask = &rte_flow_item_ipv4_mask; 1907 else if (mask->hdr.next_proto_id != 0 && 1908 mask->hdr.next_proto_id != 0xff) 1909 return rte_flow_error_set(error, EINVAL, 1910 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 1911 "partial mask is not supported" 1912 " for protocol"); 1913 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1914 acc_mask ? (const uint8_t *)acc_mask 1915 : (const uint8_t *)&nic_mask, 1916 sizeof(struct rte_flow_item_ipv4), 1917 range_accepted, error); 1918 if (ret < 0) 1919 return ret; 1920 return 0; 1921 } 1922 1923 /** 1924 * Validate IPV6 item. 1925 * 1926 * @param[in] item 1927 * Item specification. 1928 * @param[in] item_flags 1929 * Bit-fields that holds the items detected until now. 1930 * @param[in] last_item 1931 * Previous validated item in the pattern items. 1932 * @param[in] ether_type 1933 * Type in the ethernet layer header (including dot1q). 1934 * @param[in] acc_mask 1935 * Acceptable mask, if NULL default internal default mask 1936 * will be used to check whether item fields are supported. 1937 * @param[out] error 1938 * Pointer to error structure. 1939 * 1940 * @return 1941 * 0 on success, a negative errno value otherwise and rte_errno is set. 1942 */ 1943 int 1944 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, 1945 uint64_t item_flags, 1946 uint64_t last_item, 1947 uint16_t ether_type, 1948 const struct rte_flow_item_ipv6 *acc_mask, 1949 struct rte_flow_error *error) 1950 { 1951 const struct rte_flow_item_ipv6 *mask = item->mask; 1952 const struct rte_flow_item_ipv6 *spec = item->spec; 1953 const struct rte_flow_item_ipv6 nic_mask = { 1954 .hdr = { 1955 .src_addr = 1956 "\xff\xff\xff\xff\xff\xff\xff\xff" 1957 "\xff\xff\xff\xff\xff\xff\xff\xff", 1958 .dst_addr = 1959 "\xff\xff\xff\xff\xff\xff\xff\xff" 1960 "\xff\xff\xff\xff\xff\xff\xff\xff", 1961 .vtc_flow = RTE_BE32(0xffffffff), 1962 .proto = 0xff, 1963 }, 1964 }; 1965 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1966 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1967 MLX5_FLOW_LAYER_OUTER_L3; 1968 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1969 MLX5_FLOW_LAYER_OUTER_L4; 1970 int ret; 1971 uint8_t next_proto = 0xFF; 1972 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 1973 MLX5_FLOW_LAYER_OUTER_VLAN | 1974 MLX5_FLOW_LAYER_INNER_VLAN); 1975 1976 if ((last_item & l2_vlan) && ether_type && 1977 ether_type != RTE_ETHER_TYPE_IPV6) 1978 return rte_flow_error_set(error, EINVAL, 1979 RTE_FLOW_ERROR_TYPE_ITEM, item, 1980 "IPv6 cannot follow L2/VLAN layer " 1981 "which ether type is not IPv6"); 1982 if (mask && mask->hdr.proto == UINT8_MAX && spec) 1983 next_proto = spec->hdr.proto; 1984 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) { 1985 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 1986 return rte_flow_error_set(error, EINVAL, 1987 RTE_FLOW_ERROR_TYPE_ITEM, 1988 item, 1989 "multiple tunnel " 1990 "not supported"); 1991 } 1992 if (next_proto == IPPROTO_HOPOPTS || 1993 next_proto == IPPROTO_ROUTING || 1994 next_proto == IPPROTO_FRAGMENT || 1995 next_proto == IPPROTO_ESP || 1996 next_proto == IPPROTO_AH || 1997 next_proto == IPPROTO_DSTOPTS) 1998 return rte_flow_error_set(error, EINVAL, 1999 RTE_FLOW_ERROR_TYPE_ITEM, item, 2000 "IPv6 proto (next header) should " 2001 "not be set as extension header"); 2002 if (item_flags & MLX5_FLOW_LAYER_IPIP) 2003 return rte_flow_error_set(error, EINVAL, 2004 RTE_FLOW_ERROR_TYPE_ITEM, item, 2005 "wrong tunnel type - IPv4 specified " 2006 "but IPv6 item provided"); 2007 if (item_flags & l3m) 2008 return rte_flow_error_set(error, ENOTSUP, 2009 RTE_FLOW_ERROR_TYPE_ITEM, item, 2010 "multiple L3 layers not supported"); 2011 else if (item_flags & l4m) 2012 return rte_flow_error_set(error, EINVAL, 2013 RTE_FLOW_ERROR_TYPE_ITEM, item, 2014 "L3 cannot follow an L4 layer."); 2015 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 2016 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 2017 return rte_flow_error_set(error, EINVAL, 2018 RTE_FLOW_ERROR_TYPE_ITEM, item, 2019 "L3 cannot follow an NVGRE layer."); 2020 if (!mask) 2021 mask = &rte_flow_item_ipv6_mask; 2022 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 2023 acc_mask ? (const uint8_t *)acc_mask 2024 : (const uint8_t *)&nic_mask, 2025 sizeof(struct rte_flow_item_ipv6), 2026 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2027 if (ret < 0) 2028 return ret; 2029 return 0; 2030 } 2031 2032 /** 2033 * Validate UDP item. 2034 * 2035 * @param[in] item 2036 * Item specification. 2037 * @param[in] item_flags 2038 * Bit-fields that holds the items detected until now. 2039 * @param[in] target_protocol 2040 * The next protocol in the previous item. 2041 * @param[in] flow_mask 2042 * mlx5 flow-specific (DV, verbs, etc.) supported header fields mask. 2043 * @param[out] error 2044 * Pointer to error structure. 2045 * 2046 * @return 2047 * 0 on success, a negative errno value otherwise and rte_errno is set. 2048 */ 2049 int 2050 mlx5_flow_validate_item_udp(const struct rte_flow_item *item, 2051 uint64_t item_flags, 2052 uint8_t target_protocol, 2053 struct rte_flow_error *error) 2054 { 2055 const struct rte_flow_item_udp *mask = item->mask; 2056 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 2057 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 2058 MLX5_FLOW_LAYER_OUTER_L3; 2059 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 2060 MLX5_FLOW_LAYER_OUTER_L4; 2061 int ret; 2062 2063 if (target_protocol != 0xff && target_protocol != IPPROTO_UDP) 2064 return rte_flow_error_set(error, EINVAL, 2065 RTE_FLOW_ERROR_TYPE_ITEM, item, 2066 "protocol filtering not compatible" 2067 " with UDP layer"); 2068 if (!(item_flags & l3m)) 2069 return rte_flow_error_set(error, EINVAL, 2070 RTE_FLOW_ERROR_TYPE_ITEM, item, 2071 "L3 is mandatory to filter on L4"); 2072 if (item_flags & l4m) 2073 return rte_flow_error_set(error, EINVAL, 2074 RTE_FLOW_ERROR_TYPE_ITEM, item, 2075 "multiple L4 layers not supported"); 2076 if (!mask) 2077 mask = &rte_flow_item_udp_mask; 2078 ret = mlx5_flow_item_acceptable 2079 (item, (const uint8_t *)mask, 2080 (const uint8_t *)&rte_flow_item_udp_mask, 2081 sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED, 2082 error); 2083 if (ret < 0) 2084 return ret; 2085 return 0; 2086 } 2087 2088 /** 2089 * Validate TCP item. 2090 * 2091 * @param[in] item 2092 * Item specification. 2093 * @param[in] item_flags 2094 * Bit-fields that holds the items detected until now. 2095 * @param[in] target_protocol 2096 * The next protocol in the previous item. 2097 * @param[out] error 2098 * Pointer to error structure. 2099 * 2100 * @return 2101 * 0 on success, a negative errno value otherwise and rte_errno is set. 2102 */ 2103 int 2104 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item, 2105 uint64_t item_flags, 2106 uint8_t target_protocol, 2107 const struct rte_flow_item_tcp *flow_mask, 2108 struct rte_flow_error *error) 2109 { 2110 const struct rte_flow_item_tcp *mask = item->mask; 2111 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 2112 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 2113 MLX5_FLOW_LAYER_OUTER_L3; 2114 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 2115 MLX5_FLOW_LAYER_OUTER_L4; 2116 int ret; 2117 2118 MLX5_ASSERT(flow_mask); 2119 if (target_protocol != 0xff && target_protocol != IPPROTO_TCP) 2120 return rte_flow_error_set(error, EINVAL, 2121 RTE_FLOW_ERROR_TYPE_ITEM, item, 2122 "protocol filtering not compatible" 2123 " with TCP layer"); 2124 if (!(item_flags & l3m)) 2125 return rte_flow_error_set(error, EINVAL, 2126 RTE_FLOW_ERROR_TYPE_ITEM, item, 2127 "L3 is mandatory to filter on L4"); 2128 if (item_flags & l4m) 2129 return rte_flow_error_set(error, EINVAL, 2130 RTE_FLOW_ERROR_TYPE_ITEM, item, 2131 "multiple L4 layers not supported"); 2132 if (!mask) 2133 mask = &rte_flow_item_tcp_mask; 2134 ret = mlx5_flow_item_acceptable 2135 (item, (const uint8_t *)mask, 2136 (const uint8_t *)flow_mask, 2137 sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED, 2138 error); 2139 if (ret < 0) 2140 return ret; 2141 return 0; 2142 } 2143 2144 /** 2145 * Validate VXLAN item. 2146 * 2147 * @param[in] item 2148 * Item specification. 2149 * @param[in] item_flags 2150 * Bit-fields that holds the items detected until now. 2151 * @param[in] target_protocol 2152 * The next protocol in the previous item. 2153 * @param[out] error 2154 * Pointer to error structure. 2155 * 2156 * @return 2157 * 0 on success, a negative errno value otherwise and rte_errno is set. 2158 */ 2159 int 2160 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item, 2161 uint64_t item_flags, 2162 struct rte_flow_error *error) 2163 { 2164 const struct rte_flow_item_vxlan *spec = item->spec; 2165 const struct rte_flow_item_vxlan *mask = item->mask; 2166 int ret; 2167 union vni { 2168 uint32_t vlan_id; 2169 uint8_t vni[4]; 2170 } id = { .vlan_id = 0, }; 2171 2172 2173 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2174 return rte_flow_error_set(error, ENOTSUP, 2175 RTE_FLOW_ERROR_TYPE_ITEM, item, 2176 "multiple tunnel layers not" 2177 " supported"); 2178 /* 2179 * Verify only UDPv4 is present as defined in 2180 * https://tools.ietf.org/html/rfc7348 2181 */ 2182 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2183 return rte_flow_error_set(error, EINVAL, 2184 RTE_FLOW_ERROR_TYPE_ITEM, item, 2185 "no outer UDP layer found"); 2186 if (!mask) 2187 mask = &rte_flow_item_vxlan_mask; 2188 ret = mlx5_flow_item_acceptable 2189 (item, (const uint8_t *)mask, 2190 (const uint8_t *)&rte_flow_item_vxlan_mask, 2191 sizeof(struct rte_flow_item_vxlan), 2192 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2193 if (ret < 0) 2194 return ret; 2195 if (spec) { 2196 memcpy(&id.vni[1], spec->vni, 3); 2197 memcpy(&id.vni[1], mask->vni, 3); 2198 } 2199 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2200 return rte_flow_error_set(error, ENOTSUP, 2201 RTE_FLOW_ERROR_TYPE_ITEM, item, 2202 "VXLAN tunnel must be fully defined"); 2203 return 0; 2204 } 2205 2206 /** 2207 * Validate VXLAN_GPE item. 2208 * 2209 * @param[in] item 2210 * Item specification. 2211 * @param[in] item_flags 2212 * Bit-fields that holds the items detected until now. 2213 * @param[in] priv 2214 * Pointer to the private data structure. 2215 * @param[in] target_protocol 2216 * The next protocol in the previous item. 2217 * @param[out] error 2218 * Pointer to error structure. 2219 * 2220 * @return 2221 * 0 on success, a negative errno value otherwise and rte_errno is set. 2222 */ 2223 int 2224 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item, 2225 uint64_t item_flags, 2226 struct rte_eth_dev *dev, 2227 struct rte_flow_error *error) 2228 { 2229 struct mlx5_priv *priv = dev->data->dev_private; 2230 const struct rte_flow_item_vxlan_gpe *spec = item->spec; 2231 const struct rte_flow_item_vxlan_gpe *mask = item->mask; 2232 int ret; 2233 union vni { 2234 uint32_t vlan_id; 2235 uint8_t vni[4]; 2236 } id = { .vlan_id = 0, }; 2237 2238 if (!priv->config.l3_vxlan_en) 2239 return rte_flow_error_set(error, ENOTSUP, 2240 RTE_FLOW_ERROR_TYPE_ITEM, item, 2241 "L3 VXLAN is not enabled by device" 2242 " parameter and/or not configured in" 2243 " firmware"); 2244 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2245 return rte_flow_error_set(error, ENOTSUP, 2246 RTE_FLOW_ERROR_TYPE_ITEM, item, 2247 "multiple tunnel layers not" 2248 " supported"); 2249 /* 2250 * Verify only UDPv4 is present as defined in 2251 * https://tools.ietf.org/html/rfc7348 2252 */ 2253 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2254 return rte_flow_error_set(error, EINVAL, 2255 RTE_FLOW_ERROR_TYPE_ITEM, item, 2256 "no outer UDP layer found"); 2257 if (!mask) 2258 mask = &rte_flow_item_vxlan_gpe_mask; 2259 ret = mlx5_flow_item_acceptable 2260 (item, (const uint8_t *)mask, 2261 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask, 2262 sizeof(struct rte_flow_item_vxlan_gpe), 2263 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2264 if (ret < 0) 2265 return ret; 2266 if (spec) { 2267 if (spec->protocol) 2268 return rte_flow_error_set(error, ENOTSUP, 2269 RTE_FLOW_ERROR_TYPE_ITEM, 2270 item, 2271 "VxLAN-GPE protocol" 2272 " not supported"); 2273 memcpy(&id.vni[1], spec->vni, 3); 2274 memcpy(&id.vni[1], mask->vni, 3); 2275 } 2276 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2277 return rte_flow_error_set(error, ENOTSUP, 2278 RTE_FLOW_ERROR_TYPE_ITEM, item, 2279 "VXLAN-GPE tunnel must be fully" 2280 " defined"); 2281 return 0; 2282 } 2283 /** 2284 * Validate GRE Key item. 2285 * 2286 * @param[in] item 2287 * Item specification. 2288 * @param[in] item_flags 2289 * Bit flags to mark detected items. 2290 * @param[in] gre_item 2291 * Pointer to gre_item 2292 * @param[out] error 2293 * Pointer to error structure. 2294 * 2295 * @return 2296 * 0 on success, a negative errno value otherwise and rte_errno is set. 2297 */ 2298 int 2299 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item, 2300 uint64_t item_flags, 2301 const struct rte_flow_item *gre_item, 2302 struct rte_flow_error *error) 2303 { 2304 const rte_be32_t *mask = item->mask; 2305 int ret = 0; 2306 rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX); 2307 const struct rte_flow_item_gre *gre_spec; 2308 const struct rte_flow_item_gre *gre_mask; 2309 2310 if (item_flags & MLX5_FLOW_LAYER_GRE_KEY) 2311 return rte_flow_error_set(error, ENOTSUP, 2312 RTE_FLOW_ERROR_TYPE_ITEM, item, 2313 "Multiple GRE key not support"); 2314 if (!(item_flags & MLX5_FLOW_LAYER_GRE)) 2315 return rte_flow_error_set(error, ENOTSUP, 2316 RTE_FLOW_ERROR_TYPE_ITEM, item, 2317 "No preceding GRE header"); 2318 if (item_flags & MLX5_FLOW_LAYER_INNER) 2319 return rte_flow_error_set(error, ENOTSUP, 2320 RTE_FLOW_ERROR_TYPE_ITEM, item, 2321 "GRE key following a wrong item"); 2322 gre_mask = gre_item->mask; 2323 if (!gre_mask) 2324 gre_mask = &rte_flow_item_gre_mask; 2325 gre_spec = gre_item->spec; 2326 if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) && 2327 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000))) 2328 return rte_flow_error_set(error, EINVAL, 2329 RTE_FLOW_ERROR_TYPE_ITEM, item, 2330 "Key bit must be on"); 2331 2332 if (!mask) 2333 mask = &gre_key_default_mask; 2334 ret = mlx5_flow_item_acceptable 2335 (item, (const uint8_t *)mask, 2336 (const uint8_t *)&gre_key_default_mask, 2337 sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2338 return ret; 2339 } 2340 2341 /** 2342 * Validate GRE item. 2343 * 2344 * @param[in] item 2345 * Item specification. 2346 * @param[in] item_flags 2347 * Bit flags to mark detected items. 2348 * @param[in] target_protocol 2349 * The next protocol in the previous item. 2350 * @param[out] error 2351 * Pointer to error structure. 2352 * 2353 * @return 2354 * 0 on success, a negative errno value otherwise and rte_errno is set. 2355 */ 2356 int 2357 mlx5_flow_validate_item_gre(const struct rte_flow_item *item, 2358 uint64_t item_flags, 2359 uint8_t target_protocol, 2360 struct rte_flow_error *error) 2361 { 2362 const struct rte_flow_item_gre *spec __rte_unused = item->spec; 2363 const struct rte_flow_item_gre *mask = item->mask; 2364 int ret; 2365 const struct rte_flow_item_gre nic_mask = { 2366 .c_rsvd0_ver = RTE_BE16(0xB000), 2367 .protocol = RTE_BE16(UINT16_MAX), 2368 }; 2369 2370 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2371 return rte_flow_error_set(error, EINVAL, 2372 RTE_FLOW_ERROR_TYPE_ITEM, item, 2373 "protocol filtering not compatible" 2374 " with this GRE layer"); 2375 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2376 return rte_flow_error_set(error, ENOTSUP, 2377 RTE_FLOW_ERROR_TYPE_ITEM, item, 2378 "multiple tunnel layers not" 2379 " supported"); 2380 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2381 return rte_flow_error_set(error, ENOTSUP, 2382 RTE_FLOW_ERROR_TYPE_ITEM, item, 2383 "L3 Layer is missing"); 2384 if (!mask) 2385 mask = &rte_flow_item_gre_mask; 2386 ret = mlx5_flow_item_acceptable 2387 (item, (const uint8_t *)mask, 2388 (const uint8_t *)&nic_mask, 2389 sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED, 2390 error); 2391 if (ret < 0) 2392 return ret; 2393 #ifndef HAVE_MLX5DV_DR 2394 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT 2395 if (spec && (spec->protocol & mask->protocol)) 2396 return rte_flow_error_set(error, ENOTSUP, 2397 RTE_FLOW_ERROR_TYPE_ITEM, item, 2398 "without MPLS support the" 2399 " specification cannot be used for" 2400 " filtering"); 2401 #endif 2402 #endif 2403 return 0; 2404 } 2405 2406 /** 2407 * Validate Geneve item. 2408 * 2409 * @param[in] item 2410 * Item specification. 2411 * @param[in] itemFlags 2412 * Bit-fields that holds the items detected until now. 2413 * @param[in] enPriv 2414 * Pointer to the private data structure. 2415 * @param[out] error 2416 * Pointer to error structure. 2417 * 2418 * @return 2419 * 0 on success, a negative errno value otherwise and rte_errno is set. 2420 */ 2421 2422 int 2423 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item, 2424 uint64_t item_flags, 2425 struct rte_eth_dev *dev, 2426 struct rte_flow_error *error) 2427 { 2428 struct mlx5_priv *priv = dev->data->dev_private; 2429 const struct rte_flow_item_geneve *spec = item->spec; 2430 const struct rte_flow_item_geneve *mask = item->mask; 2431 int ret; 2432 uint16_t gbhdr; 2433 uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ? 2434 MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0; 2435 const struct rte_flow_item_geneve nic_mask = { 2436 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80), 2437 .vni = "\xff\xff\xff", 2438 .protocol = RTE_BE16(UINT16_MAX), 2439 }; 2440 2441 if (!priv->config.hca_attr.tunnel_stateless_geneve_rx) 2442 return rte_flow_error_set(error, ENOTSUP, 2443 RTE_FLOW_ERROR_TYPE_ITEM, item, 2444 "L3 Geneve is not enabled by device" 2445 " parameter and/or not configured in" 2446 " firmware"); 2447 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2448 return rte_flow_error_set(error, ENOTSUP, 2449 RTE_FLOW_ERROR_TYPE_ITEM, item, 2450 "multiple tunnel layers not" 2451 " supported"); 2452 /* 2453 * Verify only UDPv4 is present as defined in 2454 * https://tools.ietf.org/html/rfc7348 2455 */ 2456 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2457 return rte_flow_error_set(error, EINVAL, 2458 RTE_FLOW_ERROR_TYPE_ITEM, item, 2459 "no outer UDP layer found"); 2460 if (!mask) 2461 mask = &rte_flow_item_geneve_mask; 2462 ret = mlx5_flow_item_acceptable 2463 (item, (const uint8_t *)mask, 2464 (const uint8_t *)&nic_mask, 2465 sizeof(struct rte_flow_item_geneve), 2466 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2467 if (ret) 2468 return ret; 2469 if (spec) { 2470 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0); 2471 if (MLX5_GENEVE_VER_VAL(gbhdr) || 2472 MLX5_GENEVE_CRITO_VAL(gbhdr) || 2473 MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1) 2474 return rte_flow_error_set(error, ENOTSUP, 2475 RTE_FLOW_ERROR_TYPE_ITEM, 2476 item, 2477 "Geneve protocol unsupported" 2478 " fields are being used"); 2479 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len) 2480 return rte_flow_error_set 2481 (error, ENOTSUP, 2482 RTE_FLOW_ERROR_TYPE_ITEM, 2483 item, 2484 "Unsupported Geneve options length"); 2485 } 2486 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2487 return rte_flow_error_set 2488 (error, ENOTSUP, 2489 RTE_FLOW_ERROR_TYPE_ITEM, item, 2490 "Geneve tunnel must be fully defined"); 2491 return 0; 2492 } 2493 2494 /** 2495 * Validate MPLS item. 2496 * 2497 * @param[in] dev 2498 * Pointer to the rte_eth_dev structure. 2499 * @param[in] item 2500 * Item specification. 2501 * @param[in] item_flags 2502 * Bit-fields that holds the items detected until now. 2503 * @param[in] prev_layer 2504 * The protocol layer indicated in previous item. 2505 * @param[out] error 2506 * Pointer to error structure. 2507 * 2508 * @return 2509 * 0 on success, a negative errno value otherwise and rte_errno is set. 2510 */ 2511 int 2512 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused, 2513 const struct rte_flow_item *item __rte_unused, 2514 uint64_t item_flags __rte_unused, 2515 uint64_t prev_layer __rte_unused, 2516 struct rte_flow_error *error) 2517 { 2518 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 2519 const struct rte_flow_item_mpls *mask = item->mask; 2520 struct mlx5_priv *priv = dev->data->dev_private; 2521 int ret; 2522 2523 if (!priv->config.mpls_en) 2524 return rte_flow_error_set(error, ENOTSUP, 2525 RTE_FLOW_ERROR_TYPE_ITEM, item, 2526 "MPLS not supported or" 2527 " disabled in firmware" 2528 " configuration."); 2529 /* MPLS over IP, UDP, GRE is allowed */ 2530 if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 | 2531 MLX5_FLOW_LAYER_OUTER_L4_UDP | 2532 MLX5_FLOW_LAYER_GRE))) 2533 return rte_flow_error_set(error, EINVAL, 2534 RTE_FLOW_ERROR_TYPE_ITEM, item, 2535 "protocol filtering not compatible" 2536 " with MPLS layer"); 2537 /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */ 2538 if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) && 2539 !(item_flags & MLX5_FLOW_LAYER_GRE)) 2540 return rte_flow_error_set(error, ENOTSUP, 2541 RTE_FLOW_ERROR_TYPE_ITEM, item, 2542 "multiple tunnel layers not" 2543 " supported"); 2544 if (!mask) 2545 mask = &rte_flow_item_mpls_mask; 2546 ret = mlx5_flow_item_acceptable 2547 (item, (const uint8_t *)mask, 2548 (const uint8_t *)&rte_flow_item_mpls_mask, 2549 sizeof(struct rte_flow_item_mpls), 2550 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2551 if (ret < 0) 2552 return ret; 2553 return 0; 2554 #else 2555 return rte_flow_error_set(error, ENOTSUP, 2556 RTE_FLOW_ERROR_TYPE_ITEM, item, 2557 "MPLS is not supported by Verbs, please" 2558 " update."); 2559 #endif 2560 } 2561 2562 /** 2563 * Validate NVGRE item. 2564 * 2565 * @param[in] item 2566 * Item specification. 2567 * @param[in] item_flags 2568 * Bit flags to mark detected items. 2569 * @param[in] target_protocol 2570 * The next protocol in the previous item. 2571 * @param[out] error 2572 * Pointer to error structure. 2573 * 2574 * @return 2575 * 0 on success, a negative errno value otherwise and rte_errno is set. 2576 */ 2577 int 2578 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item, 2579 uint64_t item_flags, 2580 uint8_t target_protocol, 2581 struct rte_flow_error *error) 2582 { 2583 const struct rte_flow_item_nvgre *mask = item->mask; 2584 int ret; 2585 2586 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2587 return rte_flow_error_set(error, EINVAL, 2588 RTE_FLOW_ERROR_TYPE_ITEM, item, 2589 "protocol filtering not compatible" 2590 " with this GRE layer"); 2591 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2592 return rte_flow_error_set(error, ENOTSUP, 2593 RTE_FLOW_ERROR_TYPE_ITEM, item, 2594 "multiple tunnel layers not" 2595 " supported"); 2596 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2597 return rte_flow_error_set(error, ENOTSUP, 2598 RTE_FLOW_ERROR_TYPE_ITEM, item, 2599 "L3 Layer is missing"); 2600 if (!mask) 2601 mask = &rte_flow_item_nvgre_mask; 2602 ret = mlx5_flow_item_acceptable 2603 (item, (const uint8_t *)mask, 2604 (const uint8_t *)&rte_flow_item_nvgre_mask, 2605 sizeof(struct rte_flow_item_nvgre), 2606 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2607 if (ret < 0) 2608 return ret; 2609 return 0; 2610 } 2611 2612 /** 2613 * Validate eCPRI item. 2614 * 2615 * @param[in] item 2616 * Item specification. 2617 * @param[in] item_flags 2618 * Bit-fields that holds the items detected until now. 2619 * @param[in] last_item 2620 * Previous validated item in the pattern items. 2621 * @param[in] ether_type 2622 * Type in the ethernet layer header (including dot1q). 2623 * @param[in] acc_mask 2624 * Acceptable mask, if NULL default internal default mask 2625 * will be used to check whether item fields are supported. 2626 * @param[out] error 2627 * Pointer to error structure. 2628 * 2629 * @return 2630 * 0 on success, a negative errno value otherwise and rte_errno is set. 2631 */ 2632 int 2633 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item, 2634 uint64_t item_flags, 2635 uint64_t last_item, 2636 uint16_t ether_type, 2637 const struct rte_flow_item_ecpri *acc_mask, 2638 struct rte_flow_error *error) 2639 { 2640 const struct rte_flow_item_ecpri *mask = item->mask; 2641 const struct rte_flow_item_ecpri nic_mask = { 2642 .hdr = { 2643 .common = { 2644 .u32 = 2645 RTE_BE32(((const struct rte_ecpri_common_hdr) { 2646 .type = 0xFF, 2647 }).u32), 2648 }, 2649 .dummy[0] = 0xFFFFFFFF, 2650 }, 2651 }; 2652 const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 | 2653 MLX5_FLOW_LAYER_OUTER_VLAN); 2654 struct rte_flow_item_ecpri mask_lo; 2655 2656 if ((last_item & outer_l2_vlan) && ether_type && 2657 ether_type != RTE_ETHER_TYPE_ECPRI) 2658 return rte_flow_error_set(error, EINVAL, 2659 RTE_FLOW_ERROR_TYPE_ITEM, item, 2660 "eCPRI cannot follow L2/VLAN layer " 2661 "which ether type is not 0xAEFE."); 2662 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2663 return rte_flow_error_set(error, EINVAL, 2664 RTE_FLOW_ERROR_TYPE_ITEM, item, 2665 "eCPRI with tunnel is not supported " 2666 "right now."); 2667 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3) 2668 return rte_flow_error_set(error, ENOTSUP, 2669 RTE_FLOW_ERROR_TYPE_ITEM, item, 2670 "multiple L3 layers not supported"); 2671 else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP) 2672 return rte_flow_error_set(error, EINVAL, 2673 RTE_FLOW_ERROR_TYPE_ITEM, item, 2674 "eCPRI cannot follow a TCP layer."); 2675 /* In specification, eCPRI could be over UDP layer. */ 2676 else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP) 2677 return rte_flow_error_set(error, EINVAL, 2678 RTE_FLOW_ERROR_TYPE_ITEM, item, 2679 "eCPRI over UDP layer is not yet " 2680 "supported right now."); 2681 /* Mask for type field in common header could be zero. */ 2682 if (!mask) 2683 mask = &rte_flow_item_ecpri_mask; 2684 mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32); 2685 /* Input mask is in big-endian format. */ 2686 if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff) 2687 return rte_flow_error_set(error, EINVAL, 2688 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 2689 "partial mask is not supported " 2690 "for protocol"); 2691 else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0) 2692 return rte_flow_error_set(error, EINVAL, 2693 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 2694 "message header mask must be after " 2695 "a type mask"); 2696 return mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 2697 acc_mask ? (const uint8_t *)acc_mask 2698 : (const uint8_t *)&nic_mask, 2699 sizeof(struct rte_flow_item_ecpri), 2700 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2701 } 2702 2703 /* Allocate unique ID for the split Q/RSS subflows. */ 2704 static uint32_t 2705 flow_qrss_get_id(struct rte_eth_dev *dev) 2706 { 2707 struct mlx5_priv *priv = dev->data->dev_private; 2708 uint32_t qrss_id, ret; 2709 2710 ret = mlx5_flow_id_get(priv->qrss_id_pool, &qrss_id); 2711 if (ret) 2712 return 0; 2713 MLX5_ASSERT(qrss_id); 2714 return qrss_id; 2715 } 2716 2717 /* Free unique ID for the split Q/RSS subflows. */ 2718 static void 2719 flow_qrss_free_id(struct rte_eth_dev *dev, uint32_t qrss_id) 2720 { 2721 struct mlx5_priv *priv = dev->data->dev_private; 2722 2723 if (qrss_id) 2724 mlx5_flow_id_release(priv->qrss_id_pool, qrss_id); 2725 } 2726 2727 /** 2728 * Release resource related QUEUE/RSS action split. 2729 * 2730 * @param dev 2731 * Pointer to Ethernet device. 2732 * @param flow 2733 * Flow to release id's from. 2734 */ 2735 static void 2736 flow_mreg_split_qrss_release(struct rte_eth_dev *dev, 2737 struct rte_flow *flow) 2738 { 2739 struct mlx5_priv *priv = dev->data->dev_private; 2740 uint32_t handle_idx; 2741 struct mlx5_flow_handle *dev_handle; 2742 2743 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 2744 handle_idx, dev_handle, next) 2745 if (dev_handle->split_flow_id) 2746 flow_qrss_free_id(dev, dev_handle->split_flow_id); 2747 } 2748 2749 static int 2750 flow_null_validate(struct rte_eth_dev *dev __rte_unused, 2751 const struct rte_flow_attr *attr __rte_unused, 2752 const struct rte_flow_item items[] __rte_unused, 2753 const struct rte_flow_action actions[] __rte_unused, 2754 bool external __rte_unused, 2755 int hairpin __rte_unused, 2756 struct rte_flow_error *error) 2757 { 2758 return rte_flow_error_set(error, ENOTSUP, 2759 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2760 } 2761 2762 static struct mlx5_flow * 2763 flow_null_prepare(struct rte_eth_dev *dev __rte_unused, 2764 const struct rte_flow_attr *attr __rte_unused, 2765 const struct rte_flow_item items[] __rte_unused, 2766 const struct rte_flow_action actions[] __rte_unused, 2767 struct rte_flow_error *error) 2768 { 2769 rte_flow_error_set(error, ENOTSUP, 2770 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2771 return NULL; 2772 } 2773 2774 static int 2775 flow_null_translate(struct rte_eth_dev *dev __rte_unused, 2776 struct mlx5_flow *dev_flow __rte_unused, 2777 const struct rte_flow_attr *attr __rte_unused, 2778 const struct rte_flow_item items[] __rte_unused, 2779 const struct rte_flow_action actions[] __rte_unused, 2780 struct rte_flow_error *error) 2781 { 2782 return rte_flow_error_set(error, ENOTSUP, 2783 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2784 } 2785 2786 static int 2787 flow_null_apply(struct rte_eth_dev *dev __rte_unused, 2788 struct rte_flow *flow __rte_unused, 2789 struct rte_flow_error *error) 2790 { 2791 return rte_flow_error_set(error, ENOTSUP, 2792 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2793 } 2794 2795 static void 2796 flow_null_remove(struct rte_eth_dev *dev __rte_unused, 2797 struct rte_flow *flow __rte_unused) 2798 { 2799 } 2800 2801 static void 2802 flow_null_destroy(struct rte_eth_dev *dev __rte_unused, 2803 struct rte_flow *flow __rte_unused) 2804 { 2805 } 2806 2807 static int 2808 flow_null_query(struct rte_eth_dev *dev __rte_unused, 2809 struct rte_flow *flow __rte_unused, 2810 const struct rte_flow_action *actions __rte_unused, 2811 void *data __rte_unused, 2812 struct rte_flow_error *error) 2813 { 2814 return rte_flow_error_set(error, ENOTSUP, 2815 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2816 } 2817 2818 /* Void driver to protect from null pointer reference. */ 2819 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = { 2820 .validate = flow_null_validate, 2821 .prepare = flow_null_prepare, 2822 .translate = flow_null_translate, 2823 .apply = flow_null_apply, 2824 .remove = flow_null_remove, 2825 .destroy = flow_null_destroy, 2826 .query = flow_null_query, 2827 }; 2828 2829 /** 2830 * Select flow driver type according to flow attributes and device 2831 * configuration. 2832 * 2833 * @param[in] dev 2834 * Pointer to the dev structure. 2835 * @param[in] attr 2836 * Pointer to the flow attributes. 2837 * 2838 * @return 2839 * flow driver type, MLX5_FLOW_TYPE_MAX otherwise. 2840 */ 2841 static enum mlx5_flow_drv_type 2842 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr) 2843 { 2844 struct mlx5_priv *priv = dev->data->dev_private; 2845 /* The OS can determine first a specific flow type (DV, VERBS) */ 2846 enum mlx5_flow_drv_type type = mlx5_flow_os_get_type(); 2847 2848 if (type != MLX5_FLOW_TYPE_MAX) 2849 return type; 2850 /* If no OS specific type - continue with DV/VERBS selection */ 2851 if (attr->transfer && priv->config.dv_esw_en) 2852 type = MLX5_FLOW_TYPE_DV; 2853 if (!attr->transfer) 2854 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV : 2855 MLX5_FLOW_TYPE_VERBS; 2856 return type; 2857 } 2858 2859 #define flow_get_drv_ops(type) flow_drv_ops[type] 2860 2861 /** 2862 * Flow driver validation API. This abstracts calling driver specific functions. 2863 * The type of flow driver is determined according to flow attributes. 2864 * 2865 * @param[in] dev 2866 * Pointer to the dev structure. 2867 * @param[in] attr 2868 * Pointer to the flow attributes. 2869 * @param[in] items 2870 * Pointer to the list of items. 2871 * @param[in] actions 2872 * Pointer to the list of actions. 2873 * @param[in] external 2874 * This flow rule is created by request external to PMD. 2875 * @param[in] hairpin 2876 * Number of hairpin TX actions, 0 means classic flow. 2877 * @param[out] error 2878 * Pointer to the error structure. 2879 * 2880 * @return 2881 * 0 on success, a negative errno value otherwise and rte_errno is set. 2882 */ 2883 static inline int 2884 flow_drv_validate(struct rte_eth_dev *dev, 2885 const struct rte_flow_attr *attr, 2886 const struct rte_flow_item items[], 2887 const struct rte_flow_action actions[], 2888 bool external, int hairpin, struct rte_flow_error *error) 2889 { 2890 const struct mlx5_flow_driver_ops *fops; 2891 enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr); 2892 2893 fops = flow_get_drv_ops(type); 2894 return fops->validate(dev, attr, items, actions, external, 2895 hairpin, error); 2896 } 2897 2898 /** 2899 * Flow driver preparation API. This abstracts calling driver specific 2900 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 2901 * calculates the size of memory required for device flow, allocates the memory, 2902 * initializes the device flow and returns the pointer. 2903 * 2904 * @note 2905 * This function initializes device flow structure such as dv or verbs in 2906 * struct mlx5_flow. However, it is caller's responsibility to initialize the 2907 * rest. For example, adding returning device flow to flow->dev_flow list and 2908 * setting backward reference to the flow should be done out of this function. 2909 * layers field is not filled either. 2910 * 2911 * @param[in] dev 2912 * Pointer to the dev structure. 2913 * @param[in] attr 2914 * Pointer to the flow attributes. 2915 * @param[in] items 2916 * Pointer to the list of items. 2917 * @param[in] actions 2918 * Pointer to the list of actions. 2919 * @param[in] flow_idx 2920 * This memory pool index to the flow. 2921 * @param[out] error 2922 * Pointer to the error structure. 2923 * 2924 * @return 2925 * Pointer to device flow on success, otherwise NULL and rte_errno is set. 2926 */ 2927 static inline struct mlx5_flow * 2928 flow_drv_prepare(struct rte_eth_dev *dev, 2929 const struct rte_flow *flow, 2930 const struct rte_flow_attr *attr, 2931 const struct rte_flow_item items[], 2932 const struct rte_flow_action actions[], 2933 uint32_t flow_idx, 2934 struct rte_flow_error *error) 2935 { 2936 const struct mlx5_flow_driver_ops *fops; 2937 enum mlx5_flow_drv_type type = flow->drv_type; 2938 struct mlx5_flow *mlx5_flow = NULL; 2939 2940 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2941 fops = flow_get_drv_ops(type); 2942 mlx5_flow = fops->prepare(dev, attr, items, actions, error); 2943 if (mlx5_flow) 2944 mlx5_flow->flow_idx = flow_idx; 2945 return mlx5_flow; 2946 } 2947 2948 /** 2949 * Flow driver translation API. This abstracts calling driver specific 2950 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 2951 * translates a generic flow into a driver flow. flow_drv_prepare() must 2952 * precede. 2953 * 2954 * @note 2955 * dev_flow->layers could be filled as a result of parsing during translation 2956 * if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled 2957 * if necessary. As a flow can have multiple dev_flows by RSS flow expansion, 2958 * flow->actions could be overwritten even though all the expanded dev_flows 2959 * have the same actions. 2960 * 2961 * @param[in] dev 2962 * Pointer to the rte dev structure. 2963 * @param[in, out] dev_flow 2964 * Pointer to the mlx5 flow. 2965 * @param[in] attr 2966 * Pointer to the flow attributes. 2967 * @param[in] items 2968 * Pointer to the list of items. 2969 * @param[in] actions 2970 * Pointer to the list of actions. 2971 * @param[out] error 2972 * Pointer to the error structure. 2973 * 2974 * @return 2975 * 0 on success, a negative errno value otherwise and rte_errno is set. 2976 */ 2977 static inline int 2978 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, 2979 const struct rte_flow_attr *attr, 2980 const struct rte_flow_item items[], 2981 const struct rte_flow_action actions[], 2982 struct rte_flow_error *error) 2983 { 2984 const struct mlx5_flow_driver_ops *fops; 2985 enum mlx5_flow_drv_type type = dev_flow->flow->drv_type; 2986 2987 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2988 fops = flow_get_drv_ops(type); 2989 return fops->translate(dev, dev_flow, attr, items, actions, error); 2990 } 2991 2992 /** 2993 * Flow driver apply API. This abstracts calling driver specific functions. 2994 * Parent flow (rte_flow) should have driver type (drv_type). It applies 2995 * translated driver flows on to device. flow_drv_translate() must precede. 2996 * 2997 * @param[in] dev 2998 * Pointer to Ethernet device structure. 2999 * @param[in, out] flow 3000 * Pointer to flow structure. 3001 * @param[out] error 3002 * Pointer to error structure. 3003 * 3004 * @return 3005 * 0 on success, a negative errno value otherwise and rte_errno is set. 3006 */ 3007 static inline int 3008 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, 3009 struct rte_flow_error *error) 3010 { 3011 const struct mlx5_flow_driver_ops *fops; 3012 enum mlx5_flow_drv_type type = flow->drv_type; 3013 3014 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 3015 fops = flow_get_drv_ops(type); 3016 return fops->apply(dev, flow, error); 3017 } 3018 3019 /** 3020 * Flow driver remove API. This abstracts calling driver specific functions. 3021 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 3022 * on device. All the resources of the flow should be freed by calling 3023 * flow_drv_destroy(). 3024 * 3025 * @param[in] dev 3026 * Pointer to Ethernet device. 3027 * @param[in, out] flow 3028 * Pointer to flow structure. 3029 */ 3030 static inline void 3031 flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow) 3032 { 3033 const struct mlx5_flow_driver_ops *fops; 3034 enum mlx5_flow_drv_type type = flow->drv_type; 3035 3036 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 3037 fops = flow_get_drv_ops(type); 3038 fops->remove(dev, flow); 3039 } 3040 3041 /** 3042 * Flow driver destroy API. This abstracts calling driver specific functions. 3043 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 3044 * on device and releases resources of the flow. 3045 * 3046 * @param[in] dev 3047 * Pointer to Ethernet device. 3048 * @param[in, out] flow 3049 * Pointer to flow structure. 3050 */ 3051 static inline void 3052 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) 3053 { 3054 const struct mlx5_flow_driver_ops *fops; 3055 enum mlx5_flow_drv_type type = flow->drv_type; 3056 3057 flow_mreg_split_qrss_release(dev, flow); 3058 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 3059 fops = flow_get_drv_ops(type); 3060 fops->destroy(dev, flow); 3061 } 3062 3063 /** 3064 * Get RSS action from the action list. 3065 * 3066 * @param[in] actions 3067 * Pointer to the list of actions. 3068 * 3069 * @return 3070 * Pointer to the RSS action if exist, else return NULL. 3071 */ 3072 static const struct rte_flow_action_rss* 3073 flow_get_rss_action(const struct rte_flow_action actions[]) 3074 { 3075 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3076 switch (actions->type) { 3077 case RTE_FLOW_ACTION_TYPE_RSS: 3078 return (const struct rte_flow_action_rss *) 3079 actions->conf; 3080 default: 3081 break; 3082 } 3083 } 3084 return NULL; 3085 } 3086 3087 static unsigned int 3088 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level) 3089 { 3090 const struct rte_flow_item *item; 3091 unsigned int has_vlan = 0; 3092 3093 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 3094 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { 3095 has_vlan = 1; 3096 break; 3097 } 3098 } 3099 if (has_vlan) 3100 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN : 3101 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN; 3102 return rss_level < 2 ? MLX5_EXPANSION_ROOT : 3103 MLX5_EXPANSION_ROOT_OUTER; 3104 } 3105 3106 /** 3107 * Get layer flags from the prefix flow. 3108 * 3109 * Some flows may be split to several subflows, the prefix subflow gets the 3110 * match items and the suffix sub flow gets the actions. 3111 * Some actions need the user defined match item flags to get the detail for 3112 * the action. 3113 * This function helps the suffix flow to get the item layer flags from prefix 3114 * subflow. 3115 * 3116 * @param[in] dev_flow 3117 * Pointer the created preifx subflow. 3118 * 3119 * @return 3120 * The layers get from prefix subflow. 3121 */ 3122 static inline uint64_t 3123 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow) 3124 { 3125 uint64_t layers = 0; 3126 3127 /* 3128 * Layers bits could be localization, but usually the compiler will 3129 * help to do the optimization work for source code. 3130 * If no decap actions, use the layers directly. 3131 */ 3132 if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP)) 3133 return dev_flow->handle->layers; 3134 /* Convert L3 layers with decap action. */ 3135 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4) 3136 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4; 3137 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6) 3138 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6; 3139 /* Convert L4 layers with decap action. */ 3140 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP) 3141 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP; 3142 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP) 3143 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP; 3144 return layers; 3145 } 3146 3147 /** 3148 * Get metadata split action information. 3149 * 3150 * @param[in] actions 3151 * Pointer to the list of actions. 3152 * @param[out] qrss 3153 * Pointer to the return pointer. 3154 * @param[out] qrss_type 3155 * Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned 3156 * if no QUEUE/RSS is found. 3157 * @param[out] encap_idx 3158 * Pointer to the index of the encap action if exists, otherwise the last 3159 * action index. 3160 * 3161 * @return 3162 * Total number of actions. 3163 */ 3164 static int 3165 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[], 3166 const struct rte_flow_action **qrss, 3167 int *encap_idx) 3168 { 3169 const struct rte_flow_action_raw_encap *raw_encap; 3170 int actions_n = 0; 3171 int raw_decap_idx = -1; 3172 3173 *encap_idx = -1; 3174 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3175 switch (actions->type) { 3176 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3177 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3178 *encap_idx = actions_n; 3179 break; 3180 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3181 raw_decap_idx = actions_n; 3182 break; 3183 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3184 raw_encap = actions->conf; 3185 if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 3186 *encap_idx = raw_decap_idx != -1 ? 3187 raw_decap_idx : actions_n; 3188 break; 3189 case RTE_FLOW_ACTION_TYPE_QUEUE: 3190 case RTE_FLOW_ACTION_TYPE_RSS: 3191 *qrss = actions; 3192 break; 3193 default: 3194 break; 3195 } 3196 actions_n++; 3197 } 3198 if (*encap_idx == -1) 3199 *encap_idx = actions_n; 3200 /* Count RTE_FLOW_ACTION_TYPE_END. */ 3201 return actions_n + 1; 3202 } 3203 3204 /** 3205 * Check meter action from the action list. 3206 * 3207 * @param[in] actions 3208 * Pointer to the list of actions. 3209 * @param[out] mtr 3210 * Pointer to the meter exist flag. 3211 * 3212 * @return 3213 * Total number of actions. 3214 */ 3215 static int 3216 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr) 3217 { 3218 int actions_n = 0; 3219 3220 MLX5_ASSERT(mtr); 3221 *mtr = 0; 3222 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3223 switch (actions->type) { 3224 case RTE_FLOW_ACTION_TYPE_METER: 3225 *mtr = 1; 3226 break; 3227 default: 3228 break; 3229 } 3230 actions_n++; 3231 } 3232 /* Count RTE_FLOW_ACTION_TYPE_END. */ 3233 return actions_n + 1; 3234 } 3235 3236 /** 3237 * Check if the flow should be split due to hairpin. 3238 * The reason for the split is that in current HW we can't 3239 * support encap and push-vlan on Rx, so if a flow contains 3240 * these actions we move it to Tx. 3241 * 3242 * @param dev 3243 * Pointer to Ethernet device. 3244 * @param[in] attr 3245 * Flow rule attributes. 3246 * @param[in] actions 3247 * Associated actions (list terminated by the END action). 3248 * 3249 * @return 3250 * > 0 the number of actions and the flow should be split, 3251 * 0 when no split required. 3252 */ 3253 static int 3254 flow_check_hairpin_split(struct rte_eth_dev *dev, 3255 const struct rte_flow_attr *attr, 3256 const struct rte_flow_action actions[]) 3257 { 3258 int queue_action = 0; 3259 int action_n = 0; 3260 int split = 0; 3261 const struct rte_flow_action_queue *queue; 3262 const struct rte_flow_action_rss *rss; 3263 const struct rte_flow_action_raw_encap *raw_encap; 3264 3265 if (!attr->ingress) 3266 return 0; 3267 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3268 switch (actions->type) { 3269 case RTE_FLOW_ACTION_TYPE_QUEUE: 3270 queue = actions->conf; 3271 if (queue == NULL) 3272 return 0; 3273 if (mlx5_rxq_get_type(dev, queue->index) != 3274 MLX5_RXQ_TYPE_HAIRPIN) 3275 return 0; 3276 queue_action = 1; 3277 action_n++; 3278 break; 3279 case RTE_FLOW_ACTION_TYPE_RSS: 3280 rss = actions->conf; 3281 if (rss == NULL || rss->queue_num == 0) 3282 return 0; 3283 if (mlx5_rxq_get_type(dev, rss->queue[0]) != 3284 MLX5_RXQ_TYPE_HAIRPIN) 3285 return 0; 3286 queue_action = 1; 3287 action_n++; 3288 break; 3289 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3290 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3291 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 3292 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 3293 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: 3294 split++; 3295 action_n++; 3296 break; 3297 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3298 raw_encap = actions->conf; 3299 if (raw_encap->size > 3300 (sizeof(struct rte_flow_item_eth) + 3301 sizeof(struct rte_flow_item_ipv4))) 3302 split++; 3303 action_n++; 3304 break; 3305 default: 3306 action_n++; 3307 break; 3308 } 3309 } 3310 if (split && queue_action) 3311 return action_n; 3312 return 0; 3313 } 3314 3315 /* Declare flow create/destroy prototype in advance. */ 3316 static uint32_t 3317 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 3318 const struct rte_flow_attr *attr, 3319 const struct rte_flow_item items[], 3320 const struct rte_flow_action actions[], 3321 bool external, struct rte_flow_error *error); 3322 3323 static void 3324 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 3325 uint32_t flow_idx); 3326 3327 /** 3328 * Add a flow of copying flow metadata registers in RX_CP_TBL. 3329 * 3330 * As mark_id is unique, if there's already a registered flow for the mark_id, 3331 * return by increasing the reference counter of the resource. Otherwise, create 3332 * the resource (mcp_res) and flow. 3333 * 3334 * Flow looks like, 3335 * - If ingress port is ANY and reg_c[1] is mark_id, 3336 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3337 * 3338 * For default flow (zero mark_id), flow is like, 3339 * - If ingress port is ANY, 3340 * reg_b := reg_c[0] and jump to RX_ACT_TBL. 3341 * 3342 * @param dev 3343 * Pointer to Ethernet device. 3344 * @param mark_id 3345 * ID of MARK action, zero means default flow for META. 3346 * @param[out] error 3347 * Perform verbose error reporting if not NULL. 3348 * 3349 * @return 3350 * Associated resource on success, NULL otherwise and rte_errno is set. 3351 */ 3352 static struct mlx5_flow_mreg_copy_resource * 3353 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id, 3354 struct rte_flow_error *error) 3355 { 3356 struct mlx5_priv *priv = dev->data->dev_private; 3357 struct rte_flow_attr attr = { 3358 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 3359 .ingress = 1, 3360 }; 3361 struct mlx5_rte_flow_item_tag tag_spec = { 3362 .data = mark_id, 3363 }; 3364 struct rte_flow_item items[] = { 3365 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, }, 3366 }; 3367 struct rte_flow_action_mark ftag = { 3368 .id = mark_id, 3369 }; 3370 struct mlx5_flow_action_copy_mreg cp_mreg = { 3371 .dst = REG_B, 3372 .src = REG_NON, 3373 }; 3374 struct rte_flow_action_jump jump = { 3375 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 3376 }; 3377 struct rte_flow_action actions[] = { 3378 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, }, 3379 }; 3380 struct mlx5_flow_mreg_copy_resource *mcp_res; 3381 uint32_t idx = 0; 3382 int ret; 3383 3384 /* Fill the register fileds in the flow. */ 3385 ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error); 3386 if (ret < 0) 3387 return NULL; 3388 tag_spec.id = ret; 3389 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 3390 if (ret < 0) 3391 return NULL; 3392 cp_mreg.src = ret; 3393 /* Check if already registered. */ 3394 MLX5_ASSERT(priv->mreg_cp_tbl); 3395 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id); 3396 if (mcp_res) { 3397 /* For non-default rule. */ 3398 if (mark_id != MLX5_DEFAULT_COPY_ID) 3399 mcp_res->refcnt++; 3400 MLX5_ASSERT(mark_id != MLX5_DEFAULT_COPY_ID || 3401 mcp_res->refcnt == 1); 3402 return mcp_res; 3403 } 3404 /* Provide the full width of FLAG specific value. */ 3405 if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT)) 3406 tag_spec.data = MLX5_FLOW_MARK_DEFAULT; 3407 /* Build a new flow. */ 3408 if (mark_id != MLX5_DEFAULT_COPY_ID) { 3409 items[0] = (struct rte_flow_item){ 3410 .type = (enum rte_flow_item_type) 3411 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 3412 .spec = &tag_spec, 3413 }; 3414 items[1] = (struct rte_flow_item){ 3415 .type = RTE_FLOW_ITEM_TYPE_END, 3416 }; 3417 actions[0] = (struct rte_flow_action){ 3418 .type = (enum rte_flow_action_type) 3419 MLX5_RTE_FLOW_ACTION_TYPE_MARK, 3420 .conf = &ftag, 3421 }; 3422 actions[1] = (struct rte_flow_action){ 3423 .type = (enum rte_flow_action_type) 3424 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3425 .conf = &cp_mreg, 3426 }; 3427 actions[2] = (struct rte_flow_action){ 3428 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3429 .conf = &jump, 3430 }; 3431 actions[3] = (struct rte_flow_action){ 3432 .type = RTE_FLOW_ACTION_TYPE_END, 3433 }; 3434 } else { 3435 /* Default rule, wildcard match. */ 3436 attr.priority = MLX5_FLOW_PRIO_RSVD; 3437 items[0] = (struct rte_flow_item){ 3438 .type = RTE_FLOW_ITEM_TYPE_END, 3439 }; 3440 actions[0] = (struct rte_flow_action){ 3441 .type = (enum rte_flow_action_type) 3442 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3443 .conf = &cp_mreg, 3444 }; 3445 actions[1] = (struct rte_flow_action){ 3446 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3447 .conf = &jump, 3448 }; 3449 actions[2] = (struct rte_flow_action){ 3450 .type = RTE_FLOW_ACTION_TYPE_END, 3451 }; 3452 } 3453 /* Build a new entry. */ 3454 mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx); 3455 if (!mcp_res) { 3456 rte_errno = ENOMEM; 3457 return NULL; 3458 } 3459 mcp_res->idx = idx; 3460 /* 3461 * The copy Flows are not included in any list. There 3462 * ones are referenced from other Flows and can not 3463 * be applied, removed, deleted in ardbitrary order 3464 * by list traversing. 3465 */ 3466 mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items, 3467 actions, false, error); 3468 if (!mcp_res->rix_flow) 3469 goto error; 3470 mcp_res->refcnt++; 3471 mcp_res->hlist_ent.key = mark_id; 3472 ret = mlx5_hlist_insert(priv->mreg_cp_tbl, 3473 &mcp_res->hlist_ent); 3474 MLX5_ASSERT(!ret); 3475 if (ret) 3476 goto error; 3477 return mcp_res; 3478 error: 3479 if (mcp_res->rix_flow) 3480 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3481 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3482 return NULL; 3483 } 3484 3485 /** 3486 * Release flow in RX_CP_TBL. 3487 * 3488 * @param dev 3489 * Pointer to Ethernet device. 3490 * @flow 3491 * Parent flow for wich copying is provided. 3492 */ 3493 static void 3494 flow_mreg_del_copy_action(struct rte_eth_dev *dev, 3495 struct rte_flow *flow) 3496 { 3497 struct mlx5_flow_mreg_copy_resource *mcp_res; 3498 struct mlx5_priv *priv = dev->data->dev_private; 3499 3500 if (!flow->rix_mreg_copy) 3501 return; 3502 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3503 flow->rix_mreg_copy); 3504 if (!mcp_res || !priv->mreg_cp_tbl) 3505 return; 3506 if (flow->copy_applied) { 3507 MLX5_ASSERT(mcp_res->appcnt); 3508 flow->copy_applied = 0; 3509 --mcp_res->appcnt; 3510 if (!mcp_res->appcnt) { 3511 struct rte_flow *mcp_flow = mlx5_ipool_get 3512 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3513 mcp_res->rix_flow); 3514 3515 if (mcp_flow) 3516 flow_drv_remove(dev, mcp_flow); 3517 } 3518 } 3519 /* 3520 * We do not check availability of metadata registers here, 3521 * because copy resources are not allocated in this case. 3522 */ 3523 if (--mcp_res->refcnt) 3524 return; 3525 MLX5_ASSERT(mcp_res->rix_flow); 3526 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3527 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 3528 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3529 flow->rix_mreg_copy = 0; 3530 } 3531 3532 /** 3533 * Start flow in RX_CP_TBL. 3534 * 3535 * @param dev 3536 * Pointer to Ethernet device. 3537 * @flow 3538 * Parent flow for wich copying is provided. 3539 * 3540 * @return 3541 * 0 on success, a negative errno value otherwise and rte_errno is set. 3542 */ 3543 static int 3544 flow_mreg_start_copy_action(struct rte_eth_dev *dev, 3545 struct rte_flow *flow) 3546 { 3547 struct mlx5_flow_mreg_copy_resource *mcp_res; 3548 struct mlx5_priv *priv = dev->data->dev_private; 3549 int ret; 3550 3551 if (!flow->rix_mreg_copy || flow->copy_applied) 3552 return 0; 3553 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3554 flow->rix_mreg_copy); 3555 if (!mcp_res) 3556 return 0; 3557 if (!mcp_res->appcnt) { 3558 struct rte_flow *mcp_flow = mlx5_ipool_get 3559 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3560 mcp_res->rix_flow); 3561 3562 if (mcp_flow) { 3563 ret = flow_drv_apply(dev, mcp_flow, NULL); 3564 if (ret) 3565 return ret; 3566 } 3567 } 3568 ++mcp_res->appcnt; 3569 flow->copy_applied = 1; 3570 return 0; 3571 } 3572 3573 /** 3574 * Stop flow in RX_CP_TBL. 3575 * 3576 * @param dev 3577 * Pointer to Ethernet device. 3578 * @flow 3579 * Parent flow for wich copying is provided. 3580 */ 3581 static void 3582 flow_mreg_stop_copy_action(struct rte_eth_dev *dev, 3583 struct rte_flow *flow) 3584 { 3585 struct mlx5_flow_mreg_copy_resource *mcp_res; 3586 struct mlx5_priv *priv = dev->data->dev_private; 3587 3588 if (!flow->rix_mreg_copy || !flow->copy_applied) 3589 return; 3590 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3591 flow->rix_mreg_copy); 3592 if (!mcp_res) 3593 return; 3594 MLX5_ASSERT(mcp_res->appcnt); 3595 --mcp_res->appcnt; 3596 flow->copy_applied = 0; 3597 if (!mcp_res->appcnt) { 3598 struct rte_flow *mcp_flow = mlx5_ipool_get 3599 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3600 mcp_res->rix_flow); 3601 3602 if (mcp_flow) 3603 flow_drv_remove(dev, mcp_flow); 3604 } 3605 } 3606 3607 /** 3608 * Remove the default copy action from RX_CP_TBL. 3609 * 3610 * @param dev 3611 * Pointer to Ethernet device. 3612 */ 3613 static void 3614 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev) 3615 { 3616 struct mlx5_flow_mreg_copy_resource *mcp_res; 3617 struct mlx5_priv *priv = dev->data->dev_private; 3618 3619 /* Check if default flow is registered. */ 3620 if (!priv->mreg_cp_tbl) 3621 return; 3622 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, 3623 MLX5_DEFAULT_COPY_ID); 3624 if (!mcp_res) 3625 return; 3626 MLX5_ASSERT(mcp_res->rix_flow); 3627 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3628 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 3629 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3630 } 3631 3632 /** 3633 * Add the default copy action in in RX_CP_TBL. 3634 * 3635 * @param dev 3636 * Pointer to Ethernet device. 3637 * @param[out] error 3638 * Perform verbose error reporting if not NULL. 3639 * 3640 * @return 3641 * 0 for success, negative value otherwise and rte_errno is set. 3642 */ 3643 static int 3644 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev, 3645 struct rte_flow_error *error) 3646 { 3647 struct mlx5_priv *priv = dev->data->dev_private; 3648 struct mlx5_flow_mreg_copy_resource *mcp_res; 3649 3650 /* Check whether extensive metadata feature is engaged. */ 3651 if (!priv->config.dv_flow_en || 3652 priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3653 !mlx5_flow_ext_mreg_supported(dev) || 3654 !priv->sh->dv_regc0_mask) 3655 return 0; 3656 mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error); 3657 if (!mcp_res) 3658 return -rte_errno; 3659 return 0; 3660 } 3661 3662 /** 3663 * Add a flow of copying flow metadata registers in RX_CP_TBL. 3664 * 3665 * All the flow having Q/RSS action should be split by 3666 * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL 3667 * performs the following, 3668 * - CQE->flow_tag := reg_c[1] (MARK) 3669 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 3670 * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1] 3671 * but there should be a flow per each MARK ID set by MARK action. 3672 * 3673 * For the aforementioned reason, if there's a MARK action in flow's action 3674 * list, a corresponding flow should be added to the RX_CP_TBL in order to copy 3675 * the MARK ID to CQE's flow_tag like, 3676 * - If reg_c[1] is mark_id, 3677 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3678 * 3679 * For SET_META action which stores value in reg_c[0], as the destination is 3680 * also a flow metadata register (reg_b), adding a default flow is enough. Zero 3681 * MARK ID means the default flow. The default flow looks like, 3682 * - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3683 * 3684 * @param dev 3685 * Pointer to Ethernet device. 3686 * @param flow 3687 * Pointer to flow structure. 3688 * @param[in] actions 3689 * Pointer to the list of actions. 3690 * @param[out] error 3691 * Perform verbose error reporting if not NULL. 3692 * 3693 * @return 3694 * 0 on success, negative value otherwise and rte_errno is set. 3695 */ 3696 static int 3697 flow_mreg_update_copy_table(struct rte_eth_dev *dev, 3698 struct rte_flow *flow, 3699 const struct rte_flow_action *actions, 3700 struct rte_flow_error *error) 3701 { 3702 struct mlx5_priv *priv = dev->data->dev_private; 3703 struct mlx5_dev_config *config = &priv->config; 3704 struct mlx5_flow_mreg_copy_resource *mcp_res; 3705 const struct rte_flow_action_mark *mark; 3706 3707 /* Check whether extensive metadata feature is engaged. */ 3708 if (!config->dv_flow_en || 3709 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3710 !mlx5_flow_ext_mreg_supported(dev) || 3711 !priv->sh->dv_regc0_mask) 3712 return 0; 3713 /* Find MARK action. */ 3714 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3715 switch (actions->type) { 3716 case RTE_FLOW_ACTION_TYPE_FLAG: 3717 mcp_res = flow_mreg_add_copy_action 3718 (dev, MLX5_FLOW_MARK_DEFAULT, error); 3719 if (!mcp_res) 3720 return -rte_errno; 3721 flow->rix_mreg_copy = mcp_res->idx; 3722 if (dev->data->dev_started) { 3723 mcp_res->appcnt++; 3724 flow->copy_applied = 1; 3725 } 3726 return 0; 3727 case RTE_FLOW_ACTION_TYPE_MARK: 3728 mark = (const struct rte_flow_action_mark *) 3729 actions->conf; 3730 mcp_res = 3731 flow_mreg_add_copy_action(dev, mark->id, error); 3732 if (!mcp_res) 3733 return -rte_errno; 3734 flow->rix_mreg_copy = mcp_res->idx; 3735 if (dev->data->dev_started) { 3736 mcp_res->appcnt++; 3737 flow->copy_applied = 1; 3738 } 3739 return 0; 3740 default: 3741 break; 3742 } 3743 } 3744 return 0; 3745 } 3746 3747 #define MLX5_MAX_SPLIT_ACTIONS 24 3748 #define MLX5_MAX_SPLIT_ITEMS 24 3749 3750 /** 3751 * Split the hairpin flow. 3752 * Since HW can't support encap and push-vlan on Rx, we move these 3753 * actions to Tx. 3754 * If the count action is after the encap then we also 3755 * move the count action. in this case the count will also measure 3756 * the outer bytes. 3757 * 3758 * @param dev 3759 * Pointer to Ethernet device. 3760 * @param[in] actions 3761 * Associated actions (list terminated by the END action). 3762 * @param[out] actions_rx 3763 * Rx flow actions. 3764 * @param[out] actions_tx 3765 * Tx flow actions.. 3766 * @param[out] pattern_tx 3767 * The pattern items for the Tx flow. 3768 * @param[out] flow_id 3769 * The flow ID connected to this flow. 3770 * 3771 * @return 3772 * 0 on success. 3773 */ 3774 static int 3775 flow_hairpin_split(struct rte_eth_dev *dev, 3776 const struct rte_flow_action actions[], 3777 struct rte_flow_action actions_rx[], 3778 struct rte_flow_action actions_tx[], 3779 struct rte_flow_item pattern_tx[], 3780 uint32_t *flow_id) 3781 { 3782 struct mlx5_priv *priv = dev->data->dev_private; 3783 const struct rte_flow_action_raw_encap *raw_encap; 3784 const struct rte_flow_action_raw_decap *raw_decap; 3785 struct mlx5_rte_flow_action_set_tag *set_tag; 3786 struct rte_flow_action *tag_action; 3787 struct mlx5_rte_flow_item_tag *tag_item; 3788 struct rte_flow_item *item; 3789 char *addr; 3790 int encap = 0; 3791 3792 mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id); 3793 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3794 switch (actions->type) { 3795 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3796 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3797 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 3798 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 3799 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: 3800 rte_memcpy(actions_tx, actions, 3801 sizeof(struct rte_flow_action)); 3802 actions_tx++; 3803 break; 3804 case RTE_FLOW_ACTION_TYPE_COUNT: 3805 if (encap) { 3806 rte_memcpy(actions_tx, actions, 3807 sizeof(struct rte_flow_action)); 3808 actions_tx++; 3809 } else { 3810 rte_memcpy(actions_rx, actions, 3811 sizeof(struct rte_flow_action)); 3812 actions_rx++; 3813 } 3814 break; 3815 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3816 raw_encap = actions->conf; 3817 if (raw_encap->size > 3818 (sizeof(struct rte_flow_item_eth) + 3819 sizeof(struct rte_flow_item_ipv4))) { 3820 memcpy(actions_tx, actions, 3821 sizeof(struct rte_flow_action)); 3822 actions_tx++; 3823 encap = 1; 3824 } else { 3825 rte_memcpy(actions_rx, actions, 3826 sizeof(struct rte_flow_action)); 3827 actions_rx++; 3828 } 3829 break; 3830 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3831 raw_decap = actions->conf; 3832 if (raw_decap->size < 3833 (sizeof(struct rte_flow_item_eth) + 3834 sizeof(struct rte_flow_item_ipv4))) { 3835 memcpy(actions_tx, actions, 3836 sizeof(struct rte_flow_action)); 3837 actions_tx++; 3838 } else { 3839 rte_memcpy(actions_rx, actions, 3840 sizeof(struct rte_flow_action)); 3841 actions_rx++; 3842 } 3843 break; 3844 default: 3845 rte_memcpy(actions_rx, actions, 3846 sizeof(struct rte_flow_action)); 3847 actions_rx++; 3848 break; 3849 } 3850 } 3851 /* Add set meta action and end action for the Rx flow. */ 3852 tag_action = actions_rx; 3853 tag_action->type = (enum rte_flow_action_type) 3854 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 3855 actions_rx++; 3856 rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action)); 3857 actions_rx++; 3858 set_tag = (void *)actions_rx; 3859 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL); 3860 MLX5_ASSERT(set_tag->id > REG_NON); 3861 set_tag->data = *flow_id; 3862 tag_action->conf = set_tag; 3863 /* Create Tx item list. */ 3864 rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action)); 3865 addr = (void *)&pattern_tx[2]; 3866 item = pattern_tx; 3867 item->type = (enum rte_flow_item_type) 3868 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 3869 tag_item = (void *)addr; 3870 tag_item->data = *flow_id; 3871 tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL); 3872 MLX5_ASSERT(set_tag->id > REG_NON); 3873 item->spec = tag_item; 3874 addr += sizeof(struct mlx5_rte_flow_item_tag); 3875 tag_item = (void *)addr; 3876 tag_item->data = UINT32_MAX; 3877 tag_item->id = UINT16_MAX; 3878 item->mask = tag_item; 3879 item->last = NULL; 3880 item++; 3881 item->type = RTE_FLOW_ITEM_TYPE_END; 3882 return 0; 3883 } 3884 3885 /** 3886 * The last stage of splitting chain, just creates the subflow 3887 * without any modification. 3888 * 3889 * @param[in] dev 3890 * Pointer to Ethernet device. 3891 * @param[in] flow 3892 * Parent flow structure pointer. 3893 * @param[in, out] sub_flow 3894 * Pointer to return the created subflow, may be NULL. 3895 * @param[in] prefix_layers 3896 * Prefix subflow layers, may be 0. 3897 * @param[in] prefix_mark 3898 * Prefix subflow mark flag, may be 0. 3899 * @param[in] attr 3900 * Flow rule attributes. 3901 * @param[in] items 3902 * Pattern specification (list terminated by the END pattern item). 3903 * @param[in] actions 3904 * Associated actions (list terminated by the END action). 3905 * @param[in] external 3906 * This flow rule is created by request external to PMD. 3907 * @param[in] flow_idx 3908 * This memory pool index to the flow. 3909 * @param[out] error 3910 * Perform verbose error reporting if not NULL. 3911 * @return 3912 * 0 on success, negative value otherwise 3913 */ 3914 static int 3915 flow_create_split_inner(struct rte_eth_dev *dev, 3916 struct rte_flow *flow, 3917 struct mlx5_flow **sub_flow, 3918 uint64_t prefix_layers, 3919 uint32_t prefix_mark, 3920 const struct rte_flow_attr *attr, 3921 const struct rte_flow_item items[], 3922 const struct rte_flow_action actions[], 3923 bool external, uint32_t flow_idx, 3924 struct rte_flow_error *error) 3925 { 3926 struct mlx5_flow *dev_flow; 3927 3928 dev_flow = flow_drv_prepare(dev, flow, attr, items, actions, 3929 flow_idx, error); 3930 if (!dev_flow) 3931 return -rte_errno; 3932 dev_flow->flow = flow; 3933 dev_flow->external = external; 3934 /* Subflow object was created, we must include one in the list. */ 3935 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 3936 dev_flow->handle, next); 3937 /* 3938 * If dev_flow is as one of the suffix flow, some actions in suffix 3939 * flow may need some user defined item layer flags, and pass the 3940 * Metadate rxq mark flag to suffix flow as well. 3941 */ 3942 if (prefix_layers) 3943 dev_flow->handle->layers = prefix_layers; 3944 if (prefix_mark) 3945 dev_flow->handle->mark = 1; 3946 if (sub_flow) 3947 *sub_flow = dev_flow; 3948 return flow_drv_translate(dev, dev_flow, attr, items, actions, error); 3949 } 3950 3951 /** 3952 * Split the meter flow. 3953 * 3954 * As meter flow will split to three sub flow, other than meter 3955 * action, the other actions make sense to only meter accepts 3956 * the packet. If it need to be dropped, no other additional 3957 * actions should be take. 3958 * 3959 * One kind of special action which decapsulates the L3 tunnel 3960 * header will be in the prefix sub flow, as not to take the 3961 * L3 tunnel header into account. 3962 * 3963 * @param dev 3964 * Pointer to Ethernet device. 3965 * @param[in] items 3966 * Pattern specification (list terminated by the END pattern item). 3967 * @param[out] sfx_items 3968 * Suffix flow match items (list terminated by the END pattern item). 3969 * @param[in] actions 3970 * Associated actions (list terminated by the END action). 3971 * @param[out] actions_sfx 3972 * Suffix flow actions. 3973 * @param[out] actions_pre 3974 * Prefix flow actions. 3975 * @param[out] pattern_sfx 3976 * The pattern items for the suffix flow. 3977 * @param[out] tag_sfx 3978 * Pointer to suffix flow tag. 3979 * 3980 * @return 3981 * 0 on success. 3982 */ 3983 static int 3984 flow_meter_split_prep(struct rte_eth_dev *dev, 3985 const struct rte_flow_item items[], 3986 struct rte_flow_item sfx_items[], 3987 const struct rte_flow_action actions[], 3988 struct rte_flow_action actions_sfx[], 3989 struct rte_flow_action actions_pre[]) 3990 { 3991 struct rte_flow_action *tag_action = NULL; 3992 struct rte_flow_item *tag_item; 3993 struct mlx5_rte_flow_action_set_tag *set_tag; 3994 struct rte_flow_error error; 3995 const struct rte_flow_action_raw_encap *raw_encap; 3996 const struct rte_flow_action_raw_decap *raw_decap; 3997 struct mlx5_rte_flow_item_tag *tag_spec; 3998 struct mlx5_rte_flow_item_tag *tag_mask; 3999 uint32_t tag_id; 4000 bool copy_vlan = false; 4001 4002 /* Prepare the actions for prefix and suffix flow. */ 4003 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 4004 struct rte_flow_action **action_cur = NULL; 4005 4006 switch (actions->type) { 4007 case RTE_FLOW_ACTION_TYPE_METER: 4008 /* Add the extra tag action first. */ 4009 tag_action = actions_pre; 4010 tag_action->type = (enum rte_flow_action_type) 4011 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 4012 actions_pre++; 4013 action_cur = &actions_pre; 4014 break; 4015 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: 4016 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP: 4017 action_cur = &actions_pre; 4018 break; 4019 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 4020 raw_encap = actions->conf; 4021 if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE) 4022 action_cur = &actions_pre; 4023 break; 4024 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 4025 raw_decap = actions->conf; 4026 if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 4027 action_cur = &actions_pre; 4028 break; 4029 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 4030 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 4031 copy_vlan = true; 4032 break; 4033 default: 4034 break; 4035 } 4036 if (!action_cur) 4037 action_cur = &actions_sfx; 4038 memcpy(*action_cur, actions, sizeof(struct rte_flow_action)); 4039 (*action_cur)++; 4040 } 4041 /* Add end action to the actions. */ 4042 actions_sfx->type = RTE_FLOW_ACTION_TYPE_END; 4043 actions_pre->type = RTE_FLOW_ACTION_TYPE_END; 4044 actions_pre++; 4045 /* Set the tag. */ 4046 set_tag = (void *)actions_pre; 4047 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); 4048 /* 4049 * Get the id from the qrss_pool to make qrss share the id with meter. 4050 */ 4051 tag_id = flow_qrss_get_id(dev); 4052 set_tag->data = tag_id << MLX5_MTR_COLOR_BITS; 4053 assert(tag_action); 4054 tag_action->conf = set_tag; 4055 /* Prepare the suffix subflow items. */ 4056 tag_item = sfx_items++; 4057 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { 4058 int item_type = items->type; 4059 4060 switch (item_type) { 4061 case RTE_FLOW_ITEM_TYPE_PORT_ID: 4062 memcpy(sfx_items, items, sizeof(*sfx_items)); 4063 sfx_items++; 4064 break; 4065 case RTE_FLOW_ITEM_TYPE_VLAN: 4066 if (copy_vlan) { 4067 memcpy(sfx_items, items, sizeof(*sfx_items)); 4068 /* 4069 * Convert to internal match item, it is used 4070 * for vlan push and set vid. 4071 */ 4072 sfx_items->type = (enum rte_flow_item_type) 4073 MLX5_RTE_FLOW_ITEM_TYPE_VLAN; 4074 sfx_items++; 4075 } 4076 break; 4077 default: 4078 break; 4079 } 4080 } 4081 sfx_items->type = RTE_FLOW_ITEM_TYPE_END; 4082 sfx_items++; 4083 tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items; 4084 tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS; 4085 tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); 4086 tag_mask = tag_spec + 1; 4087 tag_mask->data = 0xffffff00; 4088 tag_item->type = (enum rte_flow_item_type) 4089 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 4090 tag_item->spec = tag_spec; 4091 tag_item->last = NULL; 4092 tag_item->mask = tag_mask; 4093 return tag_id; 4094 } 4095 4096 /** 4097 * Split action list having QUEUE/RSS for metadata register copy. 4098 * 4099 * Once Q/RSS action is detected in user's action list, the flow action 4100 * should be split in order to copy metadata registers, which will happen in 4101 * RX_CP_TBL like, 4102 * - CQE->flow_tag := reg_c[1] (MARK) 4103 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 4104 * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL. 4105 * This is because the last action of each flow must be a terminal action 4106 * (QUEUE, RSS or DROP). 4107 * 4108 * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is 4109 * stored and kept in the mlx5_flow structure per each sub_flow. 4110 * 4111 * The Q/RSS action is replaced with, 4112 * - SET_TAG, setting the allocated flow ID to reg_c[2]. 4113 * And the following JUMP action is added at the end, 4114 * - JUMP, to RX_CP_TBL. 4115 * 4116 * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by 4117 * flow_create_split_metadata() routine. The flow will look like, 4118 * - If flow ID matches (reg_c[2]), perform Q/RSS. 4119 * 4120 * @param dev 4121 * Pointer to Ethernet device. 4122 * @param[out] split_actions 4123 * Pointer to store split actions to jump to CP_TBL. 4124 * @param[in] actions 4125 * Pointer to the list of original flow actions. 4126 * @param[in] qrss 4127 * Pointer to the Q/RSS action. 4128 * @param[in] actions_n 4129 * Number of original actions. 4130 * @param[out] error 4131 * Perform verbose error reporting if not NULL. 4132 * 4133 * @return 4134 * non-zero unique flow_id on success, otherwise 0 and 4135 * error/rte_error are set. 4136 */ 4137 static uint32_t 4138 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev, 4139 struct rte_flow_action *split_actions, 4140 const struct rte_flow_action *actions, 4141 const struct rte_flow_action *qrss, 4142 int actions_n, struct rte_flow_error *error) 4143 { 4144 struct mlx5_rte_flow_action_set_tag *set_tag; 4145 struct rte_flow_action_jump *jump; 4146 const int qrss_idx = qrss - actions; 4147 uint32_t flow_id = 0; 4148 int ret = 0; 4149 4150 /* 4151 * Given actions will be split 4152 * - Replace QUEUE/RSS action with SET_TAG to set flow ID. 4153 * - Add jump to mreg CP_TBL. 4154 * As a result, there will be one more action. 4155 */ 4156 ++actions_n; 4157 memcpy(split_actions, actions, sizeof(*split_actions) * actions_n); 4158 set_tag = (void *)(split_actions + actions_n); 4159 /* 4160 * If tag action is not set to void(it means we are not the meter 4161 * suffix flow), add the tag action. Since meter suffix flow already 4162 * has the tag added. 4163 */ 4164 if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) { 4165 /* 4166 * Allocate the new subflow ID. This one is unique within 4167 * device and not shared with representors. Otherwise, 4168 * we would have to resolve multi-thread access synch 4169 * issue. Each flow on the shared device is appended 4170 * with source vport identifier, so the resulting 4171 * flows will be unique in the shared (by master and 4172 * representors) domain even if they have coinciding 4173 * IDs. 4174 */ 4175 flow_id = flow_qrss_get_id(dev); 4176 if (!flow_id) 4177 return rte_flow_error_set(error, ENOMEM, 4178 RTE_FLOW_ERROR_TYPE_ACTION, 4179 NULL, "can't allocate id " 4180 "for split Q/RSS subflow"); 4181 /* Internal SET_TAG action to set flow ID. */ 4182 *set_tag = (struct mlx5_rte_flow_action_set_tag){ 4183 .data = flow_id, 4184 }; 4185 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error); 4186 if (ret < 0) 4187 return ret; 4188 set_tag->id = ret; 4189 /* Construct new actions array. */ 4190 /* Replace QUEUE/RSS action. */ 4191 split_actions[qrss_idx] = (struct rte_flow_action){ 4192 .type = (enum rte_flow_action_type) 4193 MLX5_RTE_FLOW_ACTION_TYPE_TAG, 4194 .conf = set_tag, 4195 }; 4196 } 4197 /* JUMP action to jump to mreg copy table (CP_TBL). */ 4198 jump = (void *)(set_tag + 1); 4199 *jump = (struct rte_flow_action_jump){ 4200 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 4201 }; 4202 split_actions[actions_n - 2] = (struct rte_flow_action){ 4203 .type = RTE_FLOW_ACTION_TYPE_JUMP, 4204 .conf = jump, 4205 }; 4206 split_actions[actions_n - 1] = (struct rte_flow_action){ 4207 .type = RTE_FLOW_ACTION_TYPE_END, 4208 }; 4209 return flow_id; 4210 } 4211 4212 /** 4213 * Extend the given action list for Tx metadata copy. 4214 * 4215 * Copy the given action list to the ext_actions and add flow metadata register 4216 * copy action in order to copy reg_a set by WQE to reg_c[0]. 4217 * 4218 * @param[out] ext_actions 4219 * Pointer to the extended action list. 4220 * @param[in] actions 4221 * Pointer to the list of actions. 4222 * @param[in] actions_n 4223 * Number of actions in the list. 4224 * @param[out] error 4225 * Perform verbose error reporting if not NULL. 4226 * @param[in] encap_idx 4227 * The encap action inndex. 4228 * 4229 * @return 4230 * 0 on success, negative value otherwise 4231 */ 4232 static int 4233 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev, 4234 struct rte_flow_action *ext_actions, 4235 const struct rte_flow_action *actions, 4236 int actions_n, struct rte_flow_error *error, 4237 int encap_idx) 4238 { 4239 struct mlx5_flow_action_copy_mreg *cp_mreg = 4240 (struct mlx5_flow_action_copy_mreg *) 4241 (ext_actions + actions_n + 1); 4242 int ret; 4243 4244 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 4245 if (ret < 0) 4246 return ret; 4247 cp_mreg->dst = ret; 4248 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error); 4249 if (ret < 0) 4250 return ret; 4251 cp_mreg->src = ret; 4252 if (encap_idx != 0) 4253 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx); 4254 if (encap_idx == actions_n - 1) { 4255 ext_actions[actions_n - 1] = (struct rte_flow_action){ 4256 .type = (enum rte_flow_action_type) 4257 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 4258 .conf = cp_mreg, 4259 }; 4260 ext_actions[actions_n] = (struct rte_flow_action){ 4261 .type = RTE_FLOW_ACTION_TYPE_END, 4262 }; 4263 } else { 4264 ext_actions[encap_idx] = (struct rte_flow_action){ 4265 .type = (enum rte_flow_action_type) 4266 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 4267 .conf = cp_mreg, 4268 }; 4269 memcpy(ext_actions + encap_idx + 1, actions + encap_idx, 4270 sizeof(*ext_actions) * (actions_n - encap_idx)); 4271 } 4272 return 0; 4273 } 4274 4275 /** 4276 * Check the match action from the action list. 4277 * 4278 * @param[in] actions 4279 * Pointer to the list of actions. 4280 * @param[in] attr 4281 * Flow rule attributes. 4282 * @param[in] action 4283 * The action to be check if exist. 4284 * @param[out] match_action_pos 4285 * Pointer to the position of the matched action if exists, otherwise is -1. 4286 * @param[out] qrss_action_pos 4287 * Pointer to the position of the Queue/RSS action if exists, otherwise is -1. 4288 * 4289 * @return 4290 * > 0 the total number of actions. 4291 * 0 if not found match action in action list. 4292 */ 4293 static int 4294 flow_check_match_action(const struct rte_flow_action actions[], 4295 const struct rte_flow_attr *attr, 4296 enum rte_flow_action_type action, 4297 int *match_action_pos, int *qrss_action_pos) 4298 { 4299 const struct rte_flow_action_sample *sample; 4300 int actions_n = 0; 4301 int jump_flag = 0; 4302 uint32_t ratio = 0; 4303 int sub_type = 0; 4304 int flag = 0; 4305 4306 *match_action_pos = -1; 4307 *qrss_action_pos = -1; 4308 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 4309 if (actions->type == action) { 4310 flag = 1; 4311 *match_action_pos = actions_n; 4312 } 4313 if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE || 4314 actions->type == RTE_FLOW_ACTION_TYPE_RSS) 4315 *qrss_action_pos = actions_n; 4316 if (actions->type == RTE_FLOW_ACTION_TYPE_JUMP) 4317 jump_flag = 1; 4318 if (actions->type == RTE_FLOW_ACTION_TYPE_SAMPLE) { 4319 sample = actions->conf; 4320 ratio = sample->ratio; 4321 sub_type = ((const struct rte_flow_action *) 4322 (sample->actions))->type; 4323 } 4324 actions_n++; 4325 } 4326 if (flag && action == RTE_FLOW_ACTION_TYPE_SAMPLE && attr->transfer) { 4327 if (ratio == 1) { 4328 /* JUMP Action not support for Mirroring; 4329 * Mirroring support multi-destination; 4330 */ 4331 if (!jump_flag && sub_type != RTE_FLOW_ACTION_TYPE_END) 4332 flag = 0; 4333 } 4334 } 4335 /* Count RTE_FLOW_ACTION_TYPE_END. */ 4336 return flag ? actions_n + 1 : 0; 4337 } 4338 4339 #define SAMPLE_SUFFIX_ITEM 2 4340 4341 /** 4342 * Split the sample flow. 4343 * 4344 * As sample flow will split to two sub flow, sample flow with 4345 * sample action, the other actions will move to new suffix flow. 4346 * 4347 * Also add unique tag id with tag action in the sample flow, 4348 * the same tag id will be as match in the suffix flow. 4349 * 4350 * @param dev 4351 * Pointer to Ethernet device. 4352 * @param[in] fdb_tx 4353 * FDB egress flow flag. 4354 * @param[out] sfx_items 4355 * Suffix flow match items (list terminated by the END pattern item). 4356 * @param[in] actions 4357 * Associated actions (list terminated by the END action). 4358 * @param[out] actions_sfx 4359 * Suffix flow actions. 4360 * @param[out] actions_pre 4361 * Prefix flow actions. 4362 * @param[in] actions_n 4363 * The total number of actions. 4364 * @param[in] sample_action_pos 4365 * The sample action position. 4366 * @param[in] qrss_action_pos 4367 * The Queue/RSS action position. 4368 * @param[out] error 4369 * Perform verbose error reporting if not NULL. 4370 * 4371 * @return 4372 * 0 on success, or unique flow_id, a negative errno value 4373 * otherwise and rte_errno is set. 4374 */ 4375 static int 4376 flow_sample_split_prep(struct rte_eth_dev *dev, 4377 uint32_t fdb_tx, 4378 struct rte_flow_item sfx_items[], 4379 const struct rte_flow_action actions[], 4380 struct rte_flow_action actions_sfx[], 4381 struct rte_flow_action actions_pre[], 4382 int actions_n, 4383 int sample_action_pos, 4384 int qrss_action_pos, 4385 struct rte_flow_error *error) 4386 { 4387 struct mlx5_rte_flow_action_set_tag *set_tag; 4388 struct mlx5_rte_flow_item_tag *tag_spec; 4389 struct mlx5_rte_flow_item_tag *tag_mask; 4390 uint32_t tag_id = 0; 4391 int index; 4392 int ret; 4393 4394 if (sample_action_pos < 0) 4395 return rte_flow_error_set(error, EINVAL, 4396 RTE_FLOW_ERROR_TYPE_ACTION, 4397 NULL, "invalid position of sample " 4398 "action in list"); 4399 if (!fdb_tx) { 4400 /* Prepare the prefix tag action. */ 4401 set_tag = (void *)(actions_pre + actions_n + 1); 4402 ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error); 4403 if (ret < 0) 4404 return ret; 4405 set_tag->id = ret; 4406 tag_id = flow_qrss_get_id(dev); 4407 set_tag->data = tag_id; 4408 /* Prepare the suffix subflow items. */ 4409 tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM); 4410 tag_spec->data = tag_id; 4411 tag_spec->id = set_tag->id; 4412 tag_mask = tag_spec + 1; 4413 tag_mask->data = UINT32_MAX; 4414 sfx_items[0] = (struct rte_flow_item){ 4415 .type = (enum rte_flow_item_type) 4416 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 4417 .spec = tag_spec, 4418 .last = NULL, 4419 .mask = tag_mask, 4420 }; 4421 sfx_items[1] = (struct rte_flow_item){ 4422 .type = (enum rte_flow_item_type) 4423 RTE_FLOW_ITEM_TYPE_END, 4424 }; 4425 } 4426 /* Prepare the actions for prefix and suffix flow. */ 4427 if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) { 4428 index = qrss_action_pos; 4429 /* Put the preceding the Queue/RSS action into prefix flow. */ 4430 if (index != 0) 4431 memcpy(actions_pre, actions, 4432 sizeof(struct rte_flow_action) * index); 4433 /* Put others preceding the sample action into prefix flow. */ 4434 if (sample_action_pos > index + 1) 4435 memcpy(actions_pre + index, actions + index + 1, 4436 sizeof(struct rte_flow_action) * 4437 (sample_action_pos - index - 1)); 4438 index = sample_action_pos - 1; 4439 /* Put Queue/RSS action into Suffix flow. */ 4440 memcpy(actions_sfx, actions + qrss_action_pos, 4441 sizeof(struct rte_flow_action)); 4442 actions_sfx++; 4443 } else { 4444 index = sample_action_pos; 4445 if (index != 0) 4446 memcpy(actions_pre, actions, 4447 sizeof(struct rte_flow_action) * index); 4448 } 4449 /* Add the extra tag action for NIC-RX and E-Switch ingress. */ 4450 if (!fdb_tx) { 4451 actions_pre[index++] = 4452 (struct rte_flow_action){ 4453 .type = (enum rte_flow_action_type) 4454 MLX5_RTE_FLOW_ACTION_TYPE_TAG, 4455 .conf = set_tag, 4456 }; 4457 } 4458 memcpy(actions_pre + index, actions + sample_action_pos, 4459 sizeof(struct rte_flow_action)); 4460 index += 1; 4461 actions_pre[index] = (struct rte_flow_action){ 4462 .type = (enum rte_flow_action_type) 4463 RTE_FLOW_ACTION_TYPE_END, 4464 }; 4465 /* Put the actions after sample into Suffix flow. */ 4466 memcpy(actions_sfx, actions + sample_action_pos + 1, 4467 sizeof(struct rte_flow_action) * 4468 (actions_n - sample_action_pos - 1)); 4469 return tag_id; 4470 } 4471 4472 /** 4473 * The splitting for metadata feature. 4474 * 4475 * - Q/RSS action on NIC Rx should be split in order to pass by 4476 * the mreg copy table (RX_CP_TBL) and then it jumps to the 4477 * action table (RX_ACT_TBL) which has the split Q/RSS action. 4478 * 4479 * - All the actions on NIC Tx should have a mreg copy action to 4480 * copy reg_a from WQE to reg_c[0]. 4481 * 4482 * @param dev 4483 * Pointer to Ethernet device. 4484 * @param[in] flow 4485 * Parent flow structure pointer. 4486 * @param[in] prefix_layers 4487 * Prefix flow layer flags. 4488 * @param[in] prefix_mark 4489 * Prefix subflow mark flag, may be 0. 4490 * @param[in] attr 4491 * Flow rule attributes. 4492 * @param[in] items 4493 * Pattern specification (list terminated by the END pattern item). 4494 * @param[in] actions 4495 * Associated actions (list terminated by the END action). 4496 * @param[in] external 4497 * This flow rule is created by request external to PMD. 4498 * @param[in] flow_idx 4499 * This memory pool index to the flow. 4500 * @param[out] error 4501 * Perform verbose error reporting if not NULL. 4502 * @return 4503 * 0 on success, negative value otherwise 4504 */ 4505 static int 4506 flow_create_split_metadata(struct rte_eth_dev *dev, 4507 struct rte_flow *flow, 4508 uint64_t prefix_layers, 4509 uint32_t prefix_mark, 4510 const struct rte_flow_attr *attr, 4511 const struct rte_flow_item items[], 4512 const struct rte_flow_action actions[], 4513 bool external, uint32_t flow_idx, 4514 struct rte_flow_error *error) 4515 { 4516 struct mlx5_priv *priv = dev->data->dev_private; 4517 struct mlx5_dev_config *config = &priv->config; 4518 const struct rte_flow_action *qrss = NULL; 4519 struct rte_flow_action *ext_actions = NULL; 4520 struct mlx5_flow *dev_flow = NULL; 4521 uint32_t qrss_id = 0; 4522 int mtr_sfx = 0; 4523 size_t act_size; 4524 int actions_n; 4525 int encap_idx; 4526 int ret; 4527 4528 /* Check whether extensive metadata feature is engaged. */ 4529 if (!config->dv_flow_en || 4530 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 4531 !mlx5_flow_ext_mreg_supported(dev)) 4532 return flow_create_split_inner(dev, flow, NULL, prefix_layers, 4533 prefix_mark, attr, items, 4534 actions, external, flow_idx, 4535 error); 4536 actions_n = flow_parse_metadata_split_actions_info(actions, &qrss, 4537 &encap_idx); 4538 if (qrss) { 4539 /* Exclude hairpin flows from splitting. */ 4540 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) { 4541 const struct rte_flow_action_queue *queue; 4542 4543 queue = qrss->conf; 4544 if (mlx5_rxq_get_type(dev, queue->index) == 4545 MLX5_RXQ_TYPE_HAIRPIN) 4546 qrss = NULL; 4547 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) { 4548 const struct rte_flow_action_rss *rss; 4549 4550 rss = qrss->conf; 4551 if (mlx5_rxq_get_type(dev, rss->queue[0]) == 4552 MLX5_RXQ_TYPE_HAIRPIN) 4553 qrss = NULL; 4554 } 4555 } 4556 if (qrss) { 4557 /* Check if it is in meter suffix table. */ 4558 mtr_sfx = attr->group == (attr->transfer ? 4559 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : 4560 MLX5_FLOW_TABLE_LEVEL_SUFFIX); 4561 /* 4562 * Q/RSS action on NIC Rx should be split in order to pass by 4563 * the mreg copy table (RX_CP_TBL) and then it jumps to the 4564 * action table (RX_ACT_TBL) which has the split Q/RSS action. 4565 */ 4566 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 4567 sizeof(struct rte_flow_action_set_tag) + 4568 sizeof(struct rte_flow_action_jump); 4569 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0, 4570 SOCKET_ID_ANY); 4571 if (!ext_actions) 4572 return rte_flow_error_set(error, ENOMEM, 4573 RTE_FLOW_ERROR_TYPE_ACTION, 4574 NULL, "no memory to split " 4575 "metadata flow"); 4576 /* 4577 * If we are the suffix flow of meter, tag already exist. 4578 * Set the tag action to void. 4579 */ 4580 if (mtr_sfx) 4581 ext_actions[qrss - actions].type = 4582 RTE_FLOW_ACTION_TYPE_VOID; 4583 else 4584 ext_actions[qrss - actions].type = 4585 (enum rte_flow_action_type) 4586 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 4587 /* 4588 * Create the new actions list with removed Q/RSS action 4589 * and appended set tag and jump to register copy table 4590 * (RX_CP_TBL). We should preallocate unique tag ID here 4591 * in advance, because it is needed for set tag action. 4592 */ 4593 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions, 4594 qrss, actions_n, error); 4595 if (!mtr_sfx && !qrss_id) { 4596 ret = -rte_errno; 4597 goto exit; 4598 } 4599 } else if (attr->egress && !attr->transfer) { 4600 /* 4601 * All the actions on NIC Tx should have a metadata register 4602 * copy action to copy reg_a from WQE to reg_c[meta] 4603 */ 4604 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 4605 sizeof(struct mlx5_flow_action_copy_mreg); 4606 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0, 4607 SOCKET_ID_ANY); 4608 if (!ext_actions) 4609 return rte_flow_error_set(error, ENOMEM, 4610 RTE_FLOW_ERROR_TYPE_ACTION, 4611 NULL, "no memory to split " 4612 "metadata flow"); 4613 /* Create the action list appended with copy register. */ 4614 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions, 4615 actions_n, error, encap_idx); 4616 if (ret < 0) 4617 goto exit; 4618 } 4619 /* Add the unmodified original or prefix subflow. */ 4620 ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, 4621 prefix_mark, attr, 4622 items, ext_actions ? ext_actions : 4623 actions, external, flow_idx, error); 4624 if (ret < 0) 4625 goto exit; 4626 MLX5_ASSERT(dev_flow); 4627 if (qrss) { 4628 const struct rte_flow_attr q_attr = { 4629 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 4630 .ingress = 1, 4631 }; 4632 /* Internal PMD action to set register. */ 4633 struct mlx5_rte_flow_item_tag q_tag_spec = { 4634 .data = qrss_id, 4635 .id = REG_NON, 4636 }; 4637 struct rte_flow_item q_items[] = { 4638 { 4639 .type = (enum rte_flow_item_type) 4640 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 4641 .spec = &q_tag_spec, 4642 .last = NULL, 4643 .mask = NULL, 4644 }, 4645 { 4646 .type = RTE_FLOW_ITEM_TYPE_END, 4647 }, 4648 }; 4649 struct rte_flow_action q_actions[] = { 4650 { 4651 .type = qrss->type, 4652 .conf = qrss->conf, 4653 }, 4654 { 4655 .type = RTE_FLOW_ACTION_TYPE_END, 4656 }, 4657 }; 4658 uint64_t layers = flow_get_prefix_layer_flags(dev_flow); 4659 4660 /* 4661 * Configure the tag item only if there is no meter subflow. 4662 * Since tag is already marked in the meter suffix subflow 4663 * we can just use the meter suffix items as is. 4664 */ 4665 if (qrss_id) { 4666 /* Not meter subflow. */ 4667 MLX5_ASSERT(!mtr_sfx); 4668 /* 4669 * Put unique id in prefix flow due to it is destroyed 4670 * after suffix flow and id will be freed after there 4671 * is no actual flows with this id and identifier 4672 * reallocation becomes possible (for example, for 4673 * other flows in other threads). 4674 */ 4675 dev_flow->handle->split_flow_id = qrss_id; 4676 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, 4677 error); 4678 if (ret < 0) 4679 goto exit; 4680 q_tag_spec.id = ret; 4681 } 4682 dev_flow = NULL; 4683 /* Add suffix subflow to execute Q/RSS. */ 4684 ret = flow_create_split_inner(dev, flow, &dev_flow, layers, 0, 4685 &q_attr, mtr_sfx ? items : 4686 q_items, q_actions, 4687 external, flow_idx, error); 4688 if (ret < 0) 4689 goto exit; 4690 /* qrss ID should be freed if failed. */ 4691 qrss_id = 0; 4692 MLX5_ASSERT(dev_flow); 4693 } 4694 4695 exit: 4696 /* 4697 * We do not destroy the partially created sub_flows in case of error. 4698 * These ones are included into parent flow list and will be destroyed 4699 * by flow_drv_destroy. 4700 */ 4701 flow_qrss_free_id(dev, qrss_id); 4702 mlx5_free(ext_actions); 4703 return ret; 4704 } 4705 4706 /** 4707 * The splitting for meter feature. 4708 * 4709 * - The meter flow will be split to two flows as prefix and 4710 * suffix flow. The packets make sense only it pass the prefix 4711 * meter action. 4712 * 4713 * - Reg_C_5 is used for the packet to match betweend prefix and 4714 * suffix flow. 4715 * 4716 * @param dev 4717 * Pointer to Ethernet device. 4718 * @param[in] flow 4719 * Parent flow structure pointer. 4720 * @param[in] prefix_layers 4721 * Prefix subflow layers, may be 0. 4722 * @param[in] prefix_mark 4723 * Prefix subflow mark flag, may be 0. 4724 * @param[in] attr 4725 * Flow rule attributes. 4726 * @param[in] items 4727 * Pattern specification (list terminated by the END pattern item). 4728 * @param[in] actions 4729 * Associated actions (list terminated by the END action). 4730 * @param[in] external 4731 * This flow rule is created by request external to PMD. 4732 * @param[in] flow_idx 4733 * This memory pool index to the flow. 4734 * @param[out] error 4735 * Perform verbose error reporting if not NULL. 4736 * @return 4737 * 0 on success, negative value otherwise 4738 */ 4739 static int 4740 flow_create_split_meter(struct rte_eth_dev *dev, 4741 struct rte_flow *flow, 4742 uint64_t prefix_layers, 4743 uint32_t prefix_mark, 4744 const struct rte_flow_attr *attr, 4745 const struct rte_flow_item items[], 4746 const struct rte_flow_action actions[], 4747 bool external, uint32_t flow_idx, 4748 struct rte_flow_error *error) 4749 { 4750 struct mlx5_priv *priv = dev->data->dev_private; 4751 struct rte_flow_action *sfx_actions = NULL; 4752 struct rte_flow_action *pre_actions = NULL; 4753 struct rte_flow_item *sfx_items = NULL; 4754 struct mlx5_flow *dev_flow = NULL; 4755 struct rte_flow_attr sfx_attr = *attr; 4756 uint32_t mtr = 0; 4757 uint32_t mtr_tag_id = 0; 4758 size_t act_size; 4759 size_t item_size; 4760 int actions_n = 0; 4761 int ret; 4762 4763 if (priv->mtr_en) 4764 actions_n = flow_check_meter_action(actions, &mtr); 4765 if (mtr) { 4766 /* The five prefix actions: meter, decap, encap, tag, end. */ 4767 act_size = sizeof(struct rte_flow_action) * (actions_n + 5) + 4768 sizeof(struct mlx5_rte_flow_action_set_tag); 4769 /* tag, vlan, port id, end. */ 4770 #define METER_SUFFIX_ITEM 4 4771 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM + 4772 sizeof(struct mlx5_rte_flow_item_tag) * 2; 4773 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size), 4774 0, SOCKET_ID_ANY); 4775 if (!sfx_actions) 4776 return rte_flow_error_set(error, ENOMEM, 4777 RTE_FLOW_ERROR_TYPE_ACTION, 4778 NULL, "no memory to split " 4779 "meter flow"); 4780 sfx_items = (struct rte_flow_item *)((char *)sfx_actions + 4781 act_size); 4782 pre_actions = sfx_actions + actions_n; 4783 mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items, 4784 actions, sfx_actions, 4785 pre_actions); 4786 if (!mtr_tag_id) { 4787 ret = -rte_errno; 4788 goto exit; 4789 } 4790 /* Add the prefix subflow. */ 4791 ret = flow_create_split_inner(dev, flow, &dev_flow, 4792 prefix_layers, 0, 4793 attr, items, 4794 pre_actions, external, 4795 flow_idx, error); 4796 if (ret) { 4797 ret = -rte_errno; 4798 goto exit; 4799 } 4800 dev_flow->handle->split_flow_id = mtr_tag_id; 4801 /* Setting the sfx group atrr. */ 4802 sfx_attr.group = sfx_attr.transfer ? 4803 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : 4804 MLX5_FLOW_TABLE_LEVEL_SUFFIX; 4805 } 4806 /* Add the prefix subflow. */ 4807 ret = flow_create_split_metadata(dev, flow, dev_flow ? 4808 flow_get_prefix_layer_flags(dev_flow) : 4809 prefix_layers, dev_flow ? 4810 dev_flow->handle->mark : prefix_mark, 4811 &sfx_attr, sfx_items ? 4812 sfx_items : items, 4813 sfx_actions ? sfx_actions : actions, 4814 external, flow_idx, error); 4815 exit: 4816 if (sfx_actions) 4817 mlx5_free(sfx_actions); 4818 return ret; 4819 } 4820 4821 /** 4822 * The splitting for sample feature. 4823 * 4824 * Once Sample action is detected in the action list, the flow actions should 4825 * be split into prefix sub flow and suffix sub flow. 4826 * 4827 * The original items remain in the prefix sub flow, all actions preceding the 4828 * sample action and the sample action itself will be copied to the prefix 4829 * sub flow, the actions following the sample action will be copied to the 4830 * suffix sub flow, Queue action always be located in the suffix sub flow. 4831 * 4832 * In order to make the packet from prefix sub flow matches with suffix sub 4833 * flow, an extra tag action be added into prefix sub flow, and the suffix sub 4834 * flow uses tag item with the unique flow id. 4835 * 4836 * @param dev 4837 * Pointer to Ethernet device. 4838 * @param[in] flow 4839 * Parent flow structure pointer. 4840 * @param[in] attr 4841 * Flow rule attributes. 4842 * @param[in] items 4843 * Pattern specification (list terminated by the END pattern item). 4844 * @param[in] actions 4845 * Associated actions (list terminated by the END action). 4846 * @param[in] external 4847 * This flow rule is created by request external to PMD. 4848 * @param[in] flow_idx 4849 * This memory pool index to the flow. 4850 * @param[out] error 4851 * Perform verbose error reporting if not NULL. 4852 * @return 4853 * 0 on success, negative value otherwise 4854 */ 4855 static int 4856 flow_create_split_sample(struct rte_eth_dev *dev, 4857 struct rte_flow *flow, 4858 const struct rte_flow_attr *attr, 4859 const struct rte_flow_item items[], 4860 const struct rte_flow_action actions[], 4861 bool external, uint32_t flow_idx, 4862 struct rte_flow_error *error) 4863 { 4864 struct mlx5_priv *priv = dev->data->dev_private; 4865 struct rte_flow_action *sfx_actions = NULL; 4866 struct rte_flow_action *pre_actions = NULL; 4867 struct rte_flow_item *sfx_items = NULL; 4868 struct mlx5_flow *dev_flow = NULL; 4869 struct rte_flow_attr sfx_attr = *attr; 4870 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 4871 struct mlx5_flow_dv_sample_resource *sample_res; 4872 struct mlx5_flow_tbl_data_entry *sfx_tbl_data; 4873 struct mlx5_flow_tbl_resource *sfx_tbl; 4874 union mlx5_flow_tbl_key sfx_table_key; 4875 #endif 4876 size_t act_size; 4877 size_t item_size; 4878 uint32_t fdb_tx = 0; 4879 int32_t tag_id = 0; 4880 int actions_n = 0; 4881 int sample_action_pos; 4882 int qrss_action_pos; 4883 int ret = 0; 4884 4885 if (priv->sampler_en) 4886 actions_n = flow_check_match_action(actions, attr, 4887 RTE_FLOW_ACTION_TYPE_SAMPLE, 4888 &sample_action_pos, &qrss_action_pos); 4889 if (actions_n) { 4890 /* The prefix actions must includes sample, tag, end. */ 4891 act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1) 4892 + sizeof(struct mlx5_rte_flow_action_set_tag); 4893 item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM + 4894 sizeof(struct mlx5_rte_flow_item_tag) * 2; 4895 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + 4896 item_size), 0, SOCKET_ID_ANY); 4897 if (!sfx_actions) 4898 return rte_flow_error_set(error, ENOMEM, 4899 RTE_FLOW_ERROR_TYPE_ACTION, 4900 NULL, "no memory to split " 4901 "sample flow"); 4902 /* The representor_id is -1 for uplink. */ 4903 fdb_tx = (attr->transfer && priv->representor_id != -1); 4904 if (!fdb_tx) 4905 sfx_items = (struct rte_flow_item *)((char *)sfx_actions 4906 + act_size); 4907 pre_actions = sfx_actions + actions_n; 4908 tag_id = flow_sample_split_prep(dev, fdb_tx, sfx_items, 4909 actions, sfx_actions, 4910 pre_actions, actions_n, 4911 sample_action_pos, 4912 qrss_action_pos, error); 4913 if (tag_id < 0 || (!fdb_tx && !tag_id)) { 4914 ret = -rte_errno; 4915 goto exit; 4916 } 4917 /* Add the prefix subflow. */ 4918 ret = flow_create_split_inner(dev, flow, &dev_flow, 0, 0, attr, 4919 items, pre_actions, external, 4920 flow_idx, error); 4921 if (ret) { 4922 ret = -rte_errno; 4923 goto exit; 4924 } 4925 dev_flow->handle->split_flow_id = tag_id; 4926 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 4927 /* Set the sfx group attr. */ 4928 sample_res = (struct mlx5_flow_dv_sample_resource *) 4929 dev_flow->dv.sample_res; 4930 sfx_tbl = (struct mlx5_flow_tbl_resource *) 4931 sample_res->normal_path_tbl; 4932 sfx_tbl_data = container_of(sfx_tbl, 4933 struct mlx5_flow_tbl_data_entry, tbl); 4934 sfx_table_key.v64 = sfx_tbl_data->entry.key; 4935 sfx_attr.group = sfx_attr.transfer ? 4936 (sfx_table_key.table_id - 1) : 4937 sfx_table_key.table_id; 4938 #endif 4939 } 4940 /* Add the suffix subflow. */ 4941 ret = flow_create_split_meter(dev, flow, dev_flow ? 4942 flow_get_prefix_layer_flags(dev_flow) : 0, 4943 dev_flow ? dev_flow->handle->mark : 0, 4944 &sfx_attr, sfx_items ? sfx_items : items, 4945 sfx_actions ? sfx_actions : actions, 4946 external, flow_idx, error); 4947 exit: 4948 if (sfx_actions) 4949 mlx5_free(sfx_actions); 4950 return ret; 4951 } 4952 4953 /** 4954 * Split the flow to subflow set. The splitters might be linked 4955 * in the chain, like this: 4956 * flow_create_split_outer() calls: 4957 * flow_create_split_meter() calls: 4958 * flow_create_split_metadata(meter_subflow_0) calls: 4959 * flow_create_split_inner(metadata_subflow_0) 4960 * flow_create_split_inner(metadata_subflow_1) 4961 * flow_create_split_inner(metadata_subflow_2) 4962 * flow_create_split_metadata(meter_subflow_1) calls: 4963 * flow_create_split_inner(metadata_subflow_0) 4964 * flow_create_split_inner(metadata_subflow_1) 4965 * flow_create_split_inner(metadata_subflow_2) 4966 * 4967 * This provide flexible way to add new levels of flow splitting. 4968 * The all of successfully created subflows are included to the 4969 * parent flow dev_flow list. 4970 * 4971 * @param dev 4972 * Pointer to Ethernet device. 4973 * @param[in] flow 4974 * Parent flow structure pointer. 4975 * @param[in] attr 4976 * Flow rule attributes. 4977 * @param[in] items 4978 * Pattern specification (list terminated by the END pattern item). 4979 * @param[in] actions 4980 * Associated actions (list terminated by the END action). 4981 * @param[in] external 4982 * This flow rule is created by request external to PMD. 4983 * @param[in] flow_idx 4984 * This memory pool index to the flow. 4985 * @param[out] error 4986 * Perform verbose error reporting if not NULL. 4987 * @return 4988 * 0 on success, negative value otherwise 4989 */ 4990 static int 4991 flow_create_split_outer(struct rte_eth_dev *dev, 4992 struct rte_flow *flow, 4993 const struct rte_flow_attr *attr, 4994 const struct rte_flow_item items[], 4995 const struct rte_flow_action actions[], 4996 bool external, uint32_t flow_idx, 4997 struct rte_flow_error *error) 4998 { 4999 int ret; 5000 5001 ret = flow_create_split_sample(dev, flow, attr, items, 5002 actions, external, flow_idx, error); 5003 MLX5_ASSERT(ret <= 0); 5004 return ret; 5005 } 5006 5007 /** 5008 * Create a flow and add it to @p list. 5009 * 5010 * @param dev 5011 * Pointer to Ethernet device. 5012 * @param list 5013 * Pointer to a TAILQ flow list. If this parameter NULL, 5014 * no list insertion occurred, flow is just created, 5015 * this is caller's responsibility to track the 5016 * created flow. 5017 * @param[in] attr 5018 * Flow rule attributes. 5019 * @param[in] items 5020 * Pattern specification (list terminated by the END pattern item). 5021 * @param[in] actions 5022 * Associated actions (list terminated by the END action). 5023 * @param[in] external 5024 * This flow rule is created by request external to PMD. 5025 * @param[out] error 5026 * Perform verbose error reporting if not NULL. 5027 * 5028 * @return 5029 * A flow index on success, 0 otherwise and rte_errno is set. 5030 */ 5031 static uint32_t 5032 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 5033 const struct rte_flow_attr *attr, 5034 const struct rte_flow_item items[], 5035 const struct rte_flow_action actions[], 5036 bool external, struct rte_flow_error *error) 5037 { 5038 struct mlx5_priv *priv = dev->data->dev_private; 5039 struct rte_flow *flow = NULL; 5040 struct mlx5_flow *dev_flow; 5041 const struct rte_flow_action_rss *rss; 5042 union { 5043 struct mlx5_flow_expand_rss buf; 5044 uint8_t buffer[2048]; 5045 } expand_buffer; 5046 union { 5047 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 5048 uint8_t buffer[2048]; 5049 } actions_rx; 5050 union { 5051 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 5052 uint8_t buffer[2048]; 5053 } actions_hairpin_tx; 5054 union { 5055 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS]; 5056 uint8_t buffer[2048]; 5057 } items_tx; 5058 struct mlx5_flow_expand_rss *buf = &expand_buffer.buf; 5059 struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *) 5060 priv->rss_desc)[!!priv->flow_idx]; 5061 const struct rte_flow_action *p_actions_rx = actions; 5062 uint32_t i; 5063 uint32_t idx = 0; 5064 int hairpin_flow; 5065 uint32_t hairpin_id = 0; 5066 struct rte_flow_attr attr_tx = { .priority = 0 }; 5067 struct rte_flow_attr attr_factor = {0}; 5068 int ret; 5069 5070 memcpy((void *)&attr_factor, (const void *)attr, sizeof(*attr)); 5071 if (external) 5072 attr_factor.group *= MLX5_FLOW_TABLE_FACTOR; 5073 hairpin_flow = flow_check_hairpin_split(dev, &attr_factor, actions); 5074 ret = flow_drv_validate(dev, &attr_factor, items, p_actions_rx, 5075 external, hairpin_flow, error); 5076 if (ret < 0) 5077 return 0; 5078 if (hairpin_flow > 0) { 5079 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) { 5080 rte_errno = EINVAL; 5081 return 0; 5082 } 5083 flow_hairpin_split(dev, actions, actions_rx.actions, 5084 actions_hairpin_tx.actions, items_tx.items, 5085 &hairpin_id); 5086 p_actions_rx = actions_rx.actions; 5087 } 5088 flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx); 5089 if (!flow) { 5090 rte_errno = ENOMEM; 5091 goto error_before_flow; 5092 } 5093 flow->drv_type = flow_get_drv_type(dev, &attr_factor); 5094 if (hairpin_id != 0) 5095 flow->hairpin_flow_id = hairpin_id; 5096 MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN && 5097 flow->drv_type < MLX5_FLOW_TYPE_MAX); 5098 memset(rss_desc, 0, sizeof(*rss_desc)); 5099 rss = flow_get_rss_action(p_actions_rx); 5100 if (rss) { 5101 /* 5102 * The following information is required by 5103 * mlx5_flow_hashfields_adjust() in advance. 5104 */ 5105 rss_desc->level = rss->level; 5106 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */ 5107 rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types; 5108 } 5109 flow->dev_handles = 0; 5110 if (rss && rss->types) { 5111 unsigned int graph_root; 5112 5113 graph_root = find_graph_root(items, rss->level); 5114 ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer), 5115 items, rss->types, 5116 mlx5_support_expansion, graph_root); 5117 MLX5_ASSERT(ret > 0 && 5118 (unsigned int)ret < sizeof(expand_buffer.buffer)); 5119 } else { 5120 buf->entries = 1; 5121 buf->entry[0].pattern = (void *)(uintptr_t)items; 5122 } 5123 /* 5124 * Record the start index when there is a nested call. All sub-flows 5125 * need to be translated before another calling. 5126 * No need to use ping-pong buffer to save memory here. 5127 */ 5128 if (priv->flow_idx) { 5129 MLX5_ASSERT(!priv->flow_nested_idx); 5130 priv->flow_nested_idx = priv->flow_idx; 5131 } 5132 for (i = 0; i < buf->entries; ++i) { 5133 /* 5134 * The splitter may create multiple dev_flows, 5135 * depending on configuration. In the simplest 5136 * case it just creates unmodified original flow. 5137 */ 5138 ret = flow_create_split_outer(dev, flow, &attr_factor, 5139 buf->entry[i].pattern, 5140 p_actions_rx, external, idx, 5141 error); 5142 if (ret < 0) 5143 goto error; 5144 } 5145 /* Create the tx flow. */ 5146 if (hairpin_flow) { 5147 attr_tx.group = MLX5_HAIRPIN_TX_TABLE; 5148 attr_tx.ingress = 0; 5149 attr_tx.egress = 1; 5150 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items, 5151 actions_hairpin_tx.actions, 5152 idx, error); 5153 if (!dev_flow) 5154 goto error; 5155 dev_flow->flow = flow; 5156 dev_flow->external = 0; 5157 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 5158 dev_flow->handle, next); 5159 ret = flow_drv_translate(dev, dev_flow, &attr_tx, 5160 items_tx.items, 5161 actions_hairpin_tx.actions, error); 5162 if (ret < 0) 5163 goto error; 5164 } 5165 /* 5166 * Update the metadata register copy table. If extensive 5167 * metadata feature is enabled and registers are supported 5168 * we might create the extra rte_flow for each unique 5169 * MARK/FLAG action ID. 5170 * 5171 * The table is updated for ingress Flows only, because 5172 * the egress Flows belong to the different device and 5173 * copy table should be updated in peer NIC Rx domain. 5174 */ 5175 if (attr_factor.ingress && 5176 (external || attr_factor.group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) { 5177 ret = flow_mreg_update_copy_table(dev, flow, actions, error); 5178 if (ret) 5179 goto error; 5180 } 5181 /* 5182 * If the flow is external (from application) OR device is started, then 5183 * the flow will be applied immediately. 5184 */ 5185 if (external || dev->data->dev_started) { 5186 ret = flow_drv_apply(dev, flow, error); 5187 if (ret < 0) 5188 goto error; 5189 } 5190 if (list) 5191 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx, 5192 flow, next); 5193 flow_rxq_flags_set(dev, flow); 5194 /* Nested flow creation index recovery. */ 5195 priv->flow_idx = priv->flow_nested_idx; 5196 if (priv->flow_nested_idx) 5197 priv->flow_nested_idx = 0; 5198 return idx; 5199 error: 5200 MLX5_ASSERT(flow); 5201 ret = rte_errno; /* Save rte_errno before cleanup. */ 5202 flow_mreg_del_copy_action(dev, flow); 5203 flow_drv_destroy(dev, flow); 5204 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx); 5205 rte_errno = ret; /* Restore rte_errno. */ 5206 error_before_flow: 5207 ret = rte_errno; 5208 if (hairpin_id) 5209 mlx5_flow_id_release(priv->sh->flow_id_pool, 5210 hairpin_id); 5211 rte_errno = ret; 5212 priv->flow_idx = priv->flow_nested_idx; 5213 if (priv->flow_nested_idx) 5214 priv->flow_nested_idx = 0; 5215 return 0; 5216 } 5217 5218 /** 5219 * Create a dedicated flow rule on e-switch table 0 (root table), to direct all 5220 * incoming packets to table 1. 5221 * 5222 * Other flow rules, requested for group n, will be created in 5223 * e-switch table n+1. 5224 * Jump action to e-switch group n will be created to group n+1. 5225 * 5226 * Used when working in switchdev mode, to utilise advantages of table 1 5227 * and above. 5228 * 5229 * @param dev 5230 * Pointer to Ethernet device. 5231 * 5232 * @return 5233 * Pointer to flow on success, NULL otherwise and rte_errno is set. 5234 */ 5235 struct rte_flow * 5236 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev) 5237 { 5238 const struct rte_flow_attr attr = { 5239 .group = 0, 5240 .priority = 0, 5241 .ingress = 1, 5242 .egress = 0, 5243 .transfer = 1, 5244 }; 5245 const struct rte_flow_item pattern = { 5246 .type = RTE_FLOW_ITEM_TYPE_END, 5247 }; 5248 struct rte_flow_action_jump jump = { 5249 .group = 1, 5250 }; 5251 const struct rte_flow_action actions[] = { 5252 { 5253 .type = RTE_FLOW_ACTION_TYPE_JUMP, 5254 .conf = &jump, 5255 }, 5256 { 5257 .type = RTE_FLOW_ACTION_TYPE_END, 5258 }, 5259 }; 5260 struct mlx5_priv *priv = dev->data->dev_private; 5261 struct rte_flow_error error; 5262 5263 return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows, 5264 &attr, &pattern, 5265 actions, false, &error); 5266 } 5267 5268 /** 5269 * Validate a flow supported by the NIC. 5270 * 5271 * @see rte_flow_validate() 5272 * @see rte_flow_ops 5273 */ 5274 int 5275 mlx5_flow_validate(struct rte_eth_dev *dev, 5276 const struct rte_flow_attr *attr, 5277 const struct rte_flow_item items[], 5278 const struct rte_flow_action actions[], 5279 struct rte_flow_error *error) 5280 { 5281 int hairpin_flow; 5282 5283 hairpin_flow = flow_check_hairpin_split(dev, attr, actions); 5284 return flow_drv_validate(dev, attr, items, actions, 5285 true, hairpin_flow, error); 5286 } 5287 5288 /** 5289 * Create a flow. 5290 * 5291 * @see rte_flow_create() 5292 * @see rte_flow_ops 5293 */ 5294 struct rte_flow * 5295 mlx5_flow_create(struct rte_eth_dev *dev, 5296 const struct rte_flow_attr *attr, 5297 const struct rte_flow_item items[], 5298 const struct rte_flow_action actions[], 5299 struct rte_flow_error *error) 5300 { 5301 struct mlx5_priv *priv = dev->data->dev_private; 5302 5303 /* 5304 * If the device is not started yet, it is not allowed to created a 5305 * flow from application. PMD default flows and traffic control flows 5306 * are not affected. 5307 */ 5308 if (unlikely(!dev->data->dev_started)) { 5309 DRV_LOG(DEBUG, "port %u is not started when " 5310 "inserting a flow", dev->data->port_id); 5311 rte_flow_error_set(error, ENODEV, 5312 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5313 NULL, 5314 "port not started"); 5315 return NULL; 5316 } 5317 return (void *)(uintptr_t)flow_list_create(dev, &priv->flows, 5318 attr, items, actions, true, error); 5319 } 5320 5321 /** 5322 * Destroy a flow in a list. 5323 * 5324 * @param dev 5325 * Pointer to Ethernet device. 5326 * @param list 5327 * Pointer to the Indexed flow list. If this parameter NULL, 5328 * there is no flow removal from the list. Be noted that as 5329 * flow is add to the indexed list, memory of the indexed 5330 * list points to maybe changed as flow destroyed. 5331 * @param[in] flow_idx 5332 * Index of flow to destroy. 5333 */ 5334 static void 5335 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 5336 uint32_t flow_idx) 5337 { 5338 struct mlx5_priv *priv = dev->data->dev_private; 5339 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5340 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 5341 [MLX5_IPOOL_RTE_FLOW], flow_idx); 5342 5343 if (!flow) 5344 return; 5345 /* 5346 * Update RX queue flags only if port is started, otherwise it is 5347 * already clean. 5348 */ 5349 if (dev->data->dev_started) 5350 flow_rxq_flags_trim(dev, flow); 5351 if (flow->hairpin_flow_id) 5352 mlx5_flow_id_release(priv->sh->flow_id_pool, 5353 flow->hairpin_flow_id); 5354 flow_drv_destroy(dev, flow); 5355 if (list) 5356 ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, 5357 flow_idx, flow, next); 5358 flow_mreg_del_copy_action(dev, flow); 5359 if (flow->fdir) { 5360 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 5361 if (priv_fdir_flow->rix_flow == flow_idx) 5362 break; 5363 } 5364 if (priv_fdir_flow) { 5365 LIST_REMOVE(priv_fdir_flow, next); 5366 mlx5_free(priv_fdir_flow->fdir); 5367 mlx5_free(priv_fdir_flow); 5368 } 5369 } 5370 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 5371 } 5372 5373 /** 5374 * Destroy all flows. 5375 * 5376 * @param dev 5377 * Pointer to Ethernet device. 5378 * @param list 5379 * Pointer to the Indexed flow list. 5380 * @param active 5381 * If flushing is called avtively. 5382 */ 5383 void 5384 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active) 5385 { 5386 uint32_t num_flushed = 0; 5387 5388 while (*list) { 5389 flow_list_destroy(dev, list, *list); 5390 num_flushed++; 5391 } 5392 if (active) { 5393 DRV_LOG(INFO, "port %u: %u flows flushed before stopping", 5394 dev->data->port_id, num_flushed); 5395 } 5396 } 5397 5398 /** 5399 * Remove all flows. 5400 * 5401 * @param dev 5402 * Pointer to Ethernet device. 5403 * @param list 5404 * Pointer to the Indexed flow list. 5405 */ 5406 void 5407 mlx5_flow_stop(struct rte_eth_dev *dev, uint32_t *list) 5408 { 5409 struct mlx5_priv *priv = dev->data->dev_private; 5410 struct rte_flow *flow = NULL; 5411 uint32_t idx; 5412 5413 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx, 5414 flow, next) { 5415 flow_drv_remove(dev, flow); 5416 flow_mreg_stop_copy_action(dev, flow); 5417 } 5418 flow_mreg_del_default_copy_action(dev); 5419 flow_rxq_flags_clear(dev); 5420 } 5421 5422 /** 5423 * Add all flows. 5424 * 5425 * @param dev 5426 * Pointer to Ethernet device. 5427 * @param list 5428 * Pointer to the Indexed flow list. 5429 * 5430 * @return 5431 * 0 on success, a negative errno value otherwise and rte_errno is set. 5432 */ 5433 int 5434 mlx5_flow_start(struct rte_eth_dev *dev, uint32_t *list) 5435 { 5436 struct mlx5_priv *priv = dev->data->dev_private; 5437 struct rte_flow *flow = NULL; 5438 struct rte_flow_error error; 5439 uint32_t idx; 5440 int ret = 0; 5441 5442 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 5443 ret = flow_mreg_add_default_copy_action(dev, &error); 5444 if (ret < 0) 5445 return -rte_errno; 5446 /* Apply Flows created by application. */ 5447 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx, 5448 flow, next) { 5449 ret = flow_mreg_start_copy_action(dev, flow); 5450 if (ret < 0) 5451 goto error; 5452 ret = flow_drv_apply(dev, flow, &error); 5453 if (ret < 0) 5454 goto error; 5455 flow_rxq_flags_set(dev, flow); 5456 } 5457 return 0; 5458 error: 5459 ret = rte_errno; /* Save rte_errno before cleanup. */ 5460 mlx5_flow_stop(dev, list); 5461 rte_errno = ret; /* Restore rte_errno. */ 5462 return -rte_errno; 5463 } 5464 5465 /** 5466 * Stop all default actions for flows. 5467 * 5468 * @param dev 5469 * Pointer to Ethernet device. 5470 */ 5471 void 5472 mlx5_flow_stop_default(struct rte_eth_dev *dev) 5473 { 5474 flow_mreg_del_default_copy_action(dev); 5475 flow_rxq_flags_clear(dev); 5476 } 5477 5478 /** 5479 * Start all default actions for flows. 5480 * 5481 * @param dev 5482 * Pointer to Ethernet device. 5483 * @return 5484 * 0 on success, a negative errno value otherwise and rte_errno is set. 5485 */ 5486 int 5487 mlx5_flow_start_default(struct rte_eth_dev *dev) 5488 { 5489 struct rte_flow_error error; 5490 5491 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 5492 return flow_mreg_add_default_copy_action(dev, &error); 5493 } 5494 5495 /** 5496 * Allocate intermediate resources for flow creation. 5497 * 5498 * @param dev 5499 * Pointer to Ethernet device. 5500 */ 5501 void 5502 mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev) 5503 { 5504 struct mlx5_priv *priv = dev->data->dev_private; 5505 5506 if (!priv->inter_flows) { 5507 priv->inter_flows = mlx5_malloc(MLX5_MEM_ZERO, 5508 MLX5_NUM_MAX_DEV_FLOWS * 5509 sizeof(struct mlx5_flow) + 5510 (sizeof(struct mlx5_flow_rss_desc) + 5511 sizeof(uint16_t) * UINT16_MAX) * 2, 0, 5512 SOCKET_ID_ANY); 5513 if (!priv->inter_flows) { 5514 DRV_LOG(ERR, "can't allocate intermediate memory."); 5515 return; 5516 } 5517 } 5518 priv->rss_desc = &((struct mlx5_flow *)priv->inter_flows) 5519 [MLX5_NUM_MAX_DEV_FLOWS]; 5520 /* Reset the index. */ 5521 priv->flow_idx = 0; 5522 priv->flow_nested_idx = 0; 5523 } 5524 5525 /** 5526 * Free intermediate resources for flows. 5527 * 5528 * @param dev 5529 * Pointer to Ethernet device. 5530 */ 5531 void 5532 mlx5_flow_free_intermediate(struct rte_eth_dev *dev) 5533 { 5534 struct mlx5_priv *priv = dev->data->dev_private; 5535 5536 mlx5_free(priv->inter_flows); 5537 priv->inter_flows = NULL; 5538 } 5539 5540 /** 5541 * Verify the flow list is empty 5542 * 5543 * @param dev 5544 * Pointer to Ethernet device. 5545 * 5546 * @return the number of flows not released. 5547 */ 5548 int 5549 mlx5_flow_verify(struct rte_eth_dev *dev) 5550 { 5551 struct mlx5_priv *priv = dev->data->dev_private; 5552 struct rte_flow *flow; 5553 uint32_t idx; 5554 int ret = 0; 5555 5556 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx, 5557 flow, next) { 5558 DRV_LOG(DEBUG, "port %u flow %p still referenced", 5559 dev->data->port_id, (void *)flow); 5560 ++ret; 5561 } 5562 return ret; 5563 } 5564 5565 /** 5566 * Enable default hairpin egress flow. 5567 * 5568 * @param dev 5569 * Pointer to Ethernet device. 5570 * @param queue 5571 * The queue index. 5572 * 5573 * @return 5574 * 0 on success, a negative errno value otherwise and rte_errno is set. 5575 */ 5576 int 5577 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev, 5578 uint32_t queue) 5579 { 5580 struct mlx5_priv *priv = dev->data->dev_private; 5581 const struct rte_flow_attr attr = { 5582 .egress = 1, 5583 .priority = 0, 5584 }; 5585 struct mlx5_rte_flow_item_tx_queue queue_spec = { 5586 .queue = queue, 5587 }; 5588 struct mlx5_rte_flow_item_tx_queue queue_mask = { 5589 .queue = UINT32_MAX, 5590 }; 5591 struct rte_flow_item items[] = { 5592 { 5593 .type = (enum rte_flow_item_type) 5594 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE, 5595 .spec = &queue_spec, 5596 .last = NULL, 5597 .mask = &queue_mask, 5598 }, 5599 { 5600 .type = RTE_FLOW_ITEM_TYPE_END, 5601 }, 5602 }; 5603 struct rte_flow_action_jump jump = { 5604 .group = MLX5_HAIRPIN_TX_TABLE, 5605 }; 5606 struct rte_flow_action actions[2]; 5607 uint32_t flow_idx; 5608 struct rte_flow_error error; 5609 5610 actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP; 5611 actions[0].conf = &jump; 5612 actions[1].type = RTE_FLOW_ACTION_TYPE_END; 5613 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5614 &attr, items, actions, false, &error); 5615 if (!flow_idx) { 5616 DRV_LOG(DEBUG, 5617 "Failed to create ctrl flow: rte_errno(%d)," 5618 " type(%d), message(%s)", 5619 rte_errno, error.type, 5620 error.message ? error.message : " (no stated reason)"); 5621 return -rte_errno; 5622 } 5623 return 0; 5624 } 5625 5626 /** 5627 * Enable a control flow configured from the control plane. 5628 * 5629 * @param dev 5630 * Pointer to Ethernet device. 5631 * @param eth_spec 5632 * An Ethernet flow spec to apply. 5633 * @param eth_mask 5634 * An Ethernet flow mask to apply. 5635 * @param vlan_spec 5636 * A VLAN flow spec to apply. 5637 * @param vlan_mask 5638 * A VLAN flow mask to apply. 5639 * 5640 * @return 5641 * 0 on success, a negative errno value otherwise and rte_errno is set. 5642 */ 5643 int 5644 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, 5645 struct rte_flow_item_eth *eth_spec, 5646 struct rte_flow_item_eth *eth_mask, 5647 struct rte_flow_item_vlan *vlan_spec, 5648 struct rte_flow_item_vlan *vlan_mask) 5649 { 5650 struct mlx5_priv *priv = dev->data->dev_private; 5651 const struct rte_flow_attr attr = { 5652 .ingress = 1, 5653 .priority = MLX5_FLOW_PRIO_RSVD, 5654 }; 5655 struct rte_flow_item items[] = { 5656 { 5657 .type = RTE_FLOW_ITEM_TYPE_ETH, 5658 .spec = eth_spec, 5659 .last = NULL, 5660 .mask = eth_mask, 5661 }, 5662 { 5663 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN : 5664 RTE_FLOW_ITEM_TYPE_END, 5665 .spec = vlan_spec, 5666 .last = NULL, 5667 .mask = vlan_mask, 5668 }, 5669 { 5670 .type = RTE_FLOW_ITEM_TYPE_END, 5671 }, 5672 }; 5673 uint16_t queue[priv->reta_idx_n]; 5674 struct rte_flow_action_rss action_rss = { 5675 .func = RTE_ETH_HASH_FUNCTION_DEFAULT, 5676 .level = 0, 5677 .types = priv->rss_conf.rss_hf, 5678 .key_len = priv->rss_conf.rss_key_len, 5679 .queue_num = priv->reta_idx_n, 5680 .key = priv->rss_conf.rss_key, 5681 .queue = queue, 5682 }; 5683 struct rte_flow_action actions[] = { 5684 { 5685 .type = RTE_FLOW_ACTION_TYPE_RSS, 5686 .conf = &action_rss, 5687 }, 5688 { 5689 .type = RTE_FLOW_ACTION_TYPE_END, 5690 }, 5691 }; 5692 uint32_t flow_idx; 5693 struct rte_flow_error error; 5694 unsigned int i; 5695 5696 if (!priv->reta_idx_n || !priv->rxqs_n) { 5697 return 0; 5698 } 5699 if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)) 5700 action_rss.types = 0; 5701 for (i = 0; i != priv->reta_idx_n; ++i) 5702 queue[i] = (*priv->reta_idx)[i]; 5703 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5704 &attr, items, actions, false, &error); 5705 if (!flow_idx) 5706 return -rte_errno; 5707 return 0; 5708 } 5709 5710 /** 5711 * Enable a flow control configured from the control plane. 5712 * 5713 * @param dev 5714 * Pointer to Ethernet device. 5715 * @param eth_spec 5716 * An Ethernet flow spec to apply. 5717 * @param eth_mask 5718 * An Ethernet flow mask to apply. 5719 * 5720 * @return 5721 * 0 on success, a negative errno value otherwise and rte_errno is set. 5722 */ 5723 int 5724 mlx5_ctrl_flow(struct rte_eth_dev *dev, 5725 struct rte_flow_item_eth *eth_spec, 5726 struct rte_flow_item_eth *eth_mask) 5727 { 5728 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL); 5729 } 5730 5731 /** 5732 * Create default miss flow rule matching lacp traffic 5733 * 5734 * @param dev 5735 * Pointer to Ethernet device. 5736 * @param eth_spec 5737 * An Ethernet flow spec to apply. 5738 * 5739 * @return 5740 * 0 on success, a negative errno value otherwise and rte_errno is set. 5741 */ 5742 int 5743 mlx5_flow_lacp_miss(struct rte_eth_dev *dev) 5744 { 5745 struct mlx5_priv *priv = dev->data->dev_private; 5746 /* 5747 * The LACP matching is done by only using ether type since using 5748 * a multicast dst mac causes kernel to give low priority to this flow. 5749 */ 5750 static const struct rte_flow_item_eth lacp_spec = { 5751 .type = RTE_BE16(0x8809), 5752 }; 5753 static const struct rte_flow_item_eth lacp_mask = { 5754 .type = 0xffff, 5755 }; 5756 const struct rte_flow_attr attr = { 5757 .ingress = 1, 5758 }; 5759 struct rte_flow_item items[] = { 5760 { 5761 .type = RTE_FLOW_ITEM_TYPE_ETH, 5762 .spec = &lacp_spec, 5763 .mask = &lacp_mask, 5764 }, 5765 { 5766 .type = RTE_FLOW_ITEM_TYPE_END, 5767 }, 5768 }; 5769 struct rte_flow_action actions[] = { 5770 { 5771 .type = (enum rte_flow_action_type) 5772 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS, 5773 }, 5774 { 5775 .type = RTE_FLOW_ACTION_TYPE_END, 5776 }, 5777 }; 5778 struct rte_flow_error error; 5779 uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5780 &attr, items, actions, false, &error); 5781 5782 if (!flow_idx) 5783 return -rte_errno; 5784 return 0; 5785 } 5786 5787 /** 5788 * Destroy a flow. 5789 * 5790 * @see rte_flow_destroy() 5791 * @see rte_flow_ops 5792 */ 5793 int 5794 mlx5_flow_destroy(struct rte_eth_dev *dev, 5795 struct rte_flow *flow, 5796 struct rte_flow_error *error __rte_unused) 5797 { 5798 struct mlx5_priv *priv = dev->data->dev_private; 5799 5800 flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow); 5801 return 0; 5802 } 5803 5804 /** 5805 * Destroy all flows. 5806 * 5807 * @see rte_flow_flush() 5808 * @see rte_flow_ops 5809 */ 5810 int 5811 mlx5_flow_flush(struct rte_eth_dev *dev, 5812 struct rte_flow_error *error __rte_unused) 5813 { 5814 struct mlx5_priv *priv = dev->data->dev_private; 5815 5816 mlx5_flow_list_flush(dev, &priv->flows, false); 5817 return 0; 5818 } 5819 5820 /** 5821 * Isolated mode. 5822 * 5823 * @see rte_flow_isolate() 5824 * @see rte_flow_ops 5825 */ 5826 int 5827 mlx5_flow_isolate(struct rte_eth_dev *dev, 5828 int enable, 5829 struct rte_flow_error *error) 5830 { 5831 struct mlx5_priv *priv = dev->data->dev_private; 5832 5833 if (dev->data->dev_started) { 5834 rte_flow_error_set(error, EBUSY, 5835 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5836 NULL, 5837 "port must be stopped first"); 5838 return -rte_errno; 5839 } 5840 priv->isolated = !!enable; 5841 if (enable) 5842 dev->dev_ops = &mlx5_os_dev_ops_isolate; 5843 else 5844 dev->dev_ops = &mlx5_os_dev_ops; 5845 5846 dev->rx_descriptor_status = mlx5_rx_descriptor_status; 5847 dev->tx_descriptor_status = mlx5_tx_descriptor_status; 5848 5849 return 0; 5850 } 5851 5852 /** 5853 * Query a flow. 5854 * 5855 * @see rte_flow_query() 5856 * @see rte_flow_ops 5857 */ 5858 static int 5859 flow_drv_query(struct rte_eth_dev *dev, 5860 uint32_t flow_idx, 5861 const struct rte_flow_action *actions, 5862 void *data, 5863 struct rte_flow_error *error) 5864 { 5865 struct mlx5_priv *priv = dev->data->dev_private; 5866 const struct mlx5_flow_driver_ops *fops; 5867 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 5868 [MLX5_IPOOL_RTE_FLOW], 5869 flow_idx); 5870 enum mlx5_flow_drv_type ftype; 5871 5872 if (!flow) { 5873 return rte_flow_error_set(error, ENOENT, 5874 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5875 NULL, 5876 "invalid flow handle"); 5877 } 5878 ftype = flow->drv_type; 5879 MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX); 5880 fops = flow_get_drv_ops(ftype); 5881 5882 return fops->query(dev, flow, actions, data, error); 5883 } 5884 5885 /** 5886 * Query a flow. 5887 * 5888 * @see rte_flow_query() 5889 * @see rte_flow_ops 5890 */ 5891 int 5892 mlx5_flow_query(struct rte_eth_dev *dev, 5893 struct rte_flow *flow, 5894 const struct rte_flow_action *actions, 5895 void *data, 5896 struct rte_flow_error *error) 5897 { 5898 int ret; 5899 5900 ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data, 5901 error); 5902 if (ret < 0) 5903 return ret; 5904 return 0; 5905 } 5906 5907 /** 5908 * Convert a flow director filter to a generic flow. 5909 * 5910 * @param dev 5911 * Pointer to Ethernet device. 5912 * @param fdir_filter 5913 * Flow director filter to add. 5914 * @param attributes 5915 * Generic flow parameters structure. 5916 * 5917 * @return 5918 * 0 on success, a negative errno value otherwise and rte_errno is set. 5919 */ 5920 static int 5921 flow_fdir_filter_convert(struct rte_eth_dev *dev, 5922 const struct rte_eth_fdir_filter *fdir_filter, 5923 struct mlx5_fdir *attributes) 5924 { 5925 struct mlx5_priv *priv = dev->data->dev_private; 5926 const struct rte_eth_fdir_input *input = &fdir_filter->input; 5927 const struct rte_eth_fdir_masks *mask = 5928 &dev->data->dev_conf.fdir_conf.mask; 5929 5930 /* Validate queue number. */ 5931 if (fdir_filter->action.rx_queue >= priv->rxqs_n) { 5932 DRV_LOG(ERR, "port %u invalid queue number %d", 5933 dev->data->port_id, fdir_filter->action.rx_queue); 5934 rte_errno = EINVAL; 5935 return -rte_errno; 5936 } 5937 attributes->attr.ingress = 1; 5938 attributes->items[0] = (struct rte_flow_item) { 5939 .type = RTE_FLOW_ITEM_TYPE_ETH, 5940 .spec = &attributes->l2, 5941 .mask = &attributes->l2_mask, 5942 }; 5943 switch (fdir_filter->action.behavior) { 5944 case RTE_ETH_FDIR_ACCEPT: 5945 attributes->actions[0] = (struct rte_flow_action){ 5946 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 5947 .conf = &attributes->queue, 5948 }; 5949 break; 5950 case RTE_ETH_FDIR_REJECT: 5951 attributes->actions[0] = (struct rte_flow_action){ 5952 .type = RTE_FLOW_ACTION_TYPE_DROP, 5953 }; 5954 break; 5955 default: 5956 DRV_LOG(ERR, "port %u invalid behavior %d", 5957 dev->data->port_id, 5958 fdir_filter->action.behavior); 5959 rte_errno = ENOTSUP; 5960 return -rte_errno; 5961 } 5962 attributes->queue.index = fdir_filter->action.rx_queue; 5963 /* Handle L3. */ 5964 switch (fdir_filter->input.flow_type) { 5965 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 5966 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 5967 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 5968 attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){ 5969 .src_addr = input->flow.ip4_flow.src_ip, 5970 .dst_addr = input->flow.ip4_flow.dst_ip, 5971 .time_to_live = input->flow.ip4_flow.ttl, 5972 .type_of_service = input->flow.ip4_flow.tos, 5973 }; 5974 attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){ 5975 .src_addr = mask->ipv4_mask.src_ip, 5976 .dst_addr = mask->ipv4_mask.dst_ip, 5977 .time_to_live = mask->ipv4_mask.ttl, 5978 .type_of_service = mask->ipv4_mask.tos, 5979 .next_proto_id = mask->ipv4_mask.proto, 5980 }; 5981 attributes->items[1] = (struct rte_flow_item){ 5982 .type = RTE_FLOW_ITEM_TYPE_IPV4, 5983 .spec = &attributes->l3, 5984 .mask = &attributes->l3_mask, 5985 }; 5986 break; 5987 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 5988 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 5989 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 5990 attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){ 5991 .hop_limits = input->flow.ipv6_flow.hop_limits, 5992 .proto = input->flow.ipv6_flow.proto, 5993 }; 5994 5995 memcpy(attributes->l3.ipv6.hdr.src_addr, 5996 input->flow.ipv6_flow.src_ip, 5997 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 5998 memcpy(attributes->l3.ipv6.hdr.dst_addr, 5999 input->flow.ipv6_flow.dst_ip, 6000 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 6001 memcpy(attributes->l3_mask.ipv6.hdr.src_addr, 6002 mask->ipv6_mask.src_ip, 6003 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 6004 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr, 6005 mask->ipv6_mask.dst_ip, 6006 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 6007 attributes->items[1] = (struct rte_flow_item){ 6008 .type = RTE_FLOW_ITEM_TYPE_IPV6, 6009 .spec = &attributes->l3, 6010 .mask = &attributes->l3_mask, 6011 }; 6012 break; 6013 default: 6014 DRV_LOG(ERR, "port %u invalid flow type%d", 6015 dev->data->port_id, fdir_filter->input.flow_type); 6016 rte_errno = ENOTSUP; 6017 return -rte_errno; 6018 } 6019 /* Handle L4. */ 6020 switch (fdir_filter->input.flow_type) { 6021 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 6022 attributes->l4.udp.hdr = (struct rte_udp_hdr){ 6023 .src_port = input->flow.udp4_flow.src_port, 6024 .dst_port = input->flow.udp4_flow.dst_port, 6025 }; 6026 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){ 6027 .src_port = mask->src_port_mask, 6028 .dst_port = mask->dst_port_mask, 6029 }; 6030 attributes->items[2] = (struct rte_flow_item){ 6031 .type = RTE_FLOW_ITEM_TYPE_UDP, 6032 .spec = &attributes->l4, 6033 .mask = &attributes->l4_mask, 6034 }; 6035 break; 6036 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 6037 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){ 6038 .src_port = input->flow.tcp4_flow.src_port, 6039 .dst_port = input->flow.tcp4_flow.dst_port, 6040 }; 6041 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){ 6042 .src_port = mask->src_port_mask, 6043 .dst_port = mask->dst_port_mask, 6044 }; 6045 attributes->items[2] = (struct rte_flow_item){ 6046 .type = RTE_FLOW_ITEM_TYPE_TCP, 6047 .spec = &attributes->l4, 6048 .mask = &attributes->l4_mask, 6049 }; 6050 break; 6051 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 6052 attributes->l4.udp.hdr = (struct rte_udp_hdr){ 6053 .src_port = input->flow.udp6_flow.src_port, 6054 .dst_port = input->flow.udp6_flow.dst_port, 6055 }; 6056 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){ 6057 .src_port = mask->src_port_mask, 6058 .dst_port = mask->dst_port_mask, 6059 }; 6060 attributes->items[2] = (struct rte_flow_item){ 6061 .type = RTE_FLOW_ITEM_TYPE_UDP, 6062 .spec = &attributes->l4, 6063 .mask = &attributes->l4_mask, 6064 }; 6065 break; 6066 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 6067 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){ 6068 .src_port = input->flow.tcp6_flow.src_port, 6069 .dst_port = input->flow.tcp6_flow.dst_port, 6070 }; 6071 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){ 6072 .src_port = mask->src_port_mask, 6073 .dst_port = mask->dst_port_mask, 6074 }; 6075 attributes->items[2] = (struct rte_flow_item){ 6076 .type = RTE_FLOW_ITEM_TYPE_TCP, 6077 .spec = &attributes->l4, 6078 .mask = &attributes->l4_mask, 6079 }; 6080 break; 6081 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 6082 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 6083 break; 6084 default: 6085 DRV_LOG(ERR, "port %u invalid flow type%d", 6086 dev->data->port_id, fdir_filter->input.flow_type); 6087 rte_errno = ENOTSUP; 6088 return -rte_errno; 6089 } 6090 return 0; 6091 } 6092 6093 #define FLOW_FDIR_CMP(f1, f2, fld) \ 6094 memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld)) 6095 6096 /** 6097 * Compare two FDIR flows. If items and actions are identical, the two flows are 6098 * regarded as same. 6099 * 6100 * @param dev 6101 * Pointer to Ethernet device. 6102 * @param f1 6103 * FDIR flow to compare. 6104 * @param f2 6105 * FDIR flow to compare. 6106 * 6107 * @return 6108 * Zero on match, 1 otherwise. 6109 */ 6110 static int 6111 flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2) 6112 { 6113 if (FLOW_FDIR_CMP(f1, f2, attr) || 6114 FLOW_FDIR_CMP(f1, f2, l2) || 6115 FLOW_FDIR_CMP(f1, f2, l2_mask) || 6116 FLOW_FDIR_CMP(f1, f2, l3) || 6117 FLOW_FDIR_CMP(f1, f2, l3_mask) || 6118 FLOW_FDIR_CMP(f1, f2, l4) || 6119 FLOW_FDIR_CMP(f1, f2, l4_mask) || 6120 FLOW_FDIR_CMP(f1, f2, actions[0].type)) 6121 return 1; 6122 if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE && 6123 FLOW_FDIR_CMP(f1, f2, queue)) 6124 return 1; 6125 return 0; 6126 } 6127 6128 /** 6129 * Search device flow list to find out a matched FDIR flow. 6130 * 6131 * @param dev 6132 * Pointer to Ethernet device. 6133 * @param fdir_flow 6134 * FDIR flow to lookup. 6135 * 6136 * @return 6137 * Index of flow if found, 0 otherwise. 6138 */ 6139 static uint32_t 6140 flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow) 6141 { 6142 struct mlx5_priv *priv = dev->data->dev_private; 6143 uint32_t flow_idx = 0; 6144 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6145 6146 MLX5_ASSERT(fdir_flow); 6147 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 6148 if (!flow_fdir_cmp(priv_fdir_flow->fdir, fdir_flow)) { 6149 DRV_LOG(DEBUG, "port %u found FDIR flow %u", 6150 dev->data->port_id, flow_idx); 6151 flow_idx = priv_fdir_flow->rix_flow; 6152 break; 6153 } 6154 } 6155 return flow_idx; 6156 } 6157 6158 /** 6159 * Add new flow director filter and store it in list. 6160 * 6161 * @param dev 6162 * Pointer to Ethernet device. 6163 * @param fdir_filter 6164 * Flow director filter to add. 6165 * 6166 * @return 6167 * 0 on success, a negative errno value otherwise and rte_errno is set. 6168 */ 6169 static int 6170 flow_fdir_filter_add(struct rte_eth_dev *dev, 6171 const struct rte_eth_fdir_filter *fdir_filter) 6172 { 6173 struct mlx5_priv *priv = dev->data->dev_private; 6174 struct mlx5_fdir *fdir_flow; 6175 struct rte_flow *flow; 6176 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6177 uint32_t flow_idx; 6178 int ret; 6179 6180 fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*fdir_flow), 0, 6181 SOCKET_ID_ANY); 6182 if (!fdir_flow) { 6183 rte_errno = ENOMEM; 6184 return -rte_errno; 6185 } 6186 ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow); 6187 if (ret) 6188 goto error; 6189 flow_idx = flow_fdir_filter_lookup(dev, fdir_flow); 6190 if (flow_idx) { 6191 rte_errno = EEXIST; 6192 goto error; 6193 } 6194 priv_fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, 6195 sizeof(struct mlx5_fdir_flow), 6196 0, SOCKET_ID_ANY); 6197 if (!priv_fdir_flow) { 6198 rte_errno = ENOMEM; 6199 goto error; 6200 } 6201 flow_idx = flow_list_create(dev, &priv->flows, &fdir_flow->attr, 6202 fdir_flow->items, fdir_flow->actions, true, 6203 NULL); 6204 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 6205 if (!flow) 6206 goto error; 6207 flow->fdir = 1; 6208 priv_fdir_flow->fdir = fdir_flow; 6209 priv_fdir_flow->rix_flow = flow_idx; 6210 LIST_INSERT_HEAD(&priv->fdir_flows, priv_fdir_flow, next); 6211 DRV_LOG(DEBUG, "port %u created FDIR flow %p", 6212 dev->data->port_id, (void *)flow); 6213 return 0; 6214 error: 6215 mlx5_free(priv_fdir_flow); 6216 mlx5_free(fdir_flow); 6217 return -rte_errno; 6218 } 6219 6220 /** 6221 * Delete specific filter. 6222 * 6223 * @param dev 6224 * Pointer to Ethernet device. 6225 * @param fdir_filter 6226 * Filter to be deleted. 6227 * 6228 * @return 6229 * 0 on success, a negative errno value otherwise and rte_errno is set. 6230 */ 6231 static int 6232 flow_fdir_filter_delete(struct rte_eth_dev *dev, 6233 const struct rte_eth_fdir_filter *fdir_filter) 6234 { 6235 struct mlx5_priv *priv = dev->data->dev_private; 6236 uint32_t flow_idx; 6237 struct mlx5_fdir fdir_flow = { 6238 .attr.group = 0, 6239 }; 6240 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6241 int ret; 6242 6243 ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow); 6244 if (ret) 6245 return -rte_errno; 6246 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 6247 /* Find the fdir in priv list */ 6248 if (!flow_fdir_cmp(priv_fdir_flow->fdir, &fdir_flow)) 6249 break; 6250 } 6251 if (!priv_fdir_flow) 6252 return 0; 6253 LIST_REMOVE(priv_fdir_flow, next); 6254 flow_idx = priv_fdir_flow->rix_flow; 6255 flow_list_destroy(dev, &priv->flows, flow_idx); 6256 mlx5_free(priv_fdir_flow->fdir); 6257 mlx5_free(priv_fdir_flow); 6258 DRV_LOG(DEBUG, "port %u deleted FDIR flow %u", 6259 dev->data->port_id, flow_idx); 6260 return 0; 6261 } 6262 6263 /** 6264 * Update queue for specific filter. 6265 * 6266 * @param dev 6267 * Pointer to Ethernet device. 6268 * @param fdir_filter 6269 * Filter to be updated. 6270 * 6271 * @return 6272 * 0 on success, a negative errno value otherwise and rte_errno is set. 6273 */ 6274 static int 6275 flow_fdir_filter_update(struct rte_eth_dev *dev, 6276 const struct rte_eth_fdir_filter *fdir_filter) 6277 { 6278 int ret; 6279 6280 ret = flow_fdir_filter_delete(dev, fdir_filter); 6281 if (ret) 6282 return ret; 6283 return flow_fdir_filter_add(dev, fdir_filter); 6284 } 6285 6286 /** 6287 * Flush all filters. 6288 * 6289 * @param dev 6290 * Pointer to Ethernet device. 6291 */ 6292 static void 6293 flow_fdir_filter_flush(struct rte_eth_dev *dev) 6294 { 6295 struct mlx5_priv *priv = dev->data->dev_private; 6296 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6297 6298 while (!LIST_EMPTY(&priv->fdir_flows)) { 6299 priv_fdir_flow = LIST_FIRST(&priv->fdir_flows); 6300 LIST_REMOVE(priv_fdir_flow, next); 6301 flow_list_destroy(dev, &priv->flows, priv_fdir_flow->rix_flow); 6302 mlx5_free(priv_fdir_flow->fdir); 6303 mlx5_free(priv_fdir_flow); 6304 } 6305 } 6306 6307 /** 6308 * Get flow director information. 6309 * 6310 * @param dev 6311 * Pointer to Ethernet device. 6312 * @param[out] fdir_info 6313 * Resulting flow director information. 6314 */ 6315 static void 6316 flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) 6317 { 6318 struct rte_eth_fdir_masks *mask = 6319 &dev->data->dev_conf.fdir_conf.mask; 6320 6321 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode; 6322 fdir_info->guarant_spc = 0; 6323 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask)); 6324 fdir_info->max_flexpayload = 0; 6325 fdir_info->flow_types_mask[0] = 0; 6326 fdir_info->flex_payload_unit = 0; 6327 fdir_info->max_flex_payload_segment_num = 0; 6328 fdir_info->flex_payload_limit = 0; 6329 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf)); 6330 } 6331 6332 /** 6333 * Deal with flow director operations. 6334 * 6335 * @param dev 6336 * Pointer to Ethernet device. 6337 * @param filter_op 6338 * Operation to perform. 6339 * @param arg 6340 * Pointer to operation-specific structure. 6341 * 6342 * @return 6343 * 0 on success, a negative errno value otherwise and rte_errno is set. 6344 */ 6345 static int 6346 flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, 6347 void *arg) 6348 { 6349 enum rte_fdir_mode fdir_mode = 6350 dev->data->dev_conf.fdir_conf.mode; 6351 6352 if (filter_op == RTE_ETH_FILTER_NOP) 6353 return 0; 6354 if (fdir_mode != RTE_FDIR_MODE_PERFECT && 6355 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) { 6356 DRV_LOG(ERR, "port %u flow director mode %d not supported", 6357 dev->data->port_id, fdir_mode); 6358 rte_errno = EINVAL; 6359 return -rte_errno; 6360 } 6361 switch (filter_op) { 6362 case RTE_ETH_FILTER_ADD: 6363 return flow_fdir_filter_add(dev, arg); 6364 case RTE_ETH_FILTER_UPDATE: 6365 return flow_fdir_filter_update(dev, arg); 6366 case RTE_ETH_FILTER_DELETE: 6367 return flow_fdir_filter_delete(dev, arg); 6368 case RTE_ETH_FILTER_FLUSH: 6369 flow_fdir_filter_flush(dev); 6370 break; 6371 case RTE_ETH_FILTER_INFO: 6372 flow_fdir_info_get(dev, arg); 6373 break; 6374 default: 6375 DRV_LOG(DEBUG, "port %u unknown operation %u", 6376 dev->data->port_id, filter_op); 6377 rte_errno = EINVAL; 6378 return -rte_errno; 6379 } 6380 return 0; 6381 } 6382 6383 /** 6384 * Manage filter operations. 6385 * 6386 * @param dev 6387 * Pointer to Ethernet device structure. 6388 * @param filter_type 6389 * Filter type. 6390 * @param filter_op 6391 * Operation to perform. 6392 * @param arg 6393 * Pointer to operation-specific structure. 6394 * 6395 * @return 6396 * 0 on success, a negative errno value otherwise and rte_errno is set. 6397 */ 6398 int 6399 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, 6400 enum rte_filter_type filter_type, 6401 enum rte_filter_op filter_op, 6402 void *arg) 6403 { 6404 switch (filter_type) { 6405 case RTE_ETH_FILTER_GENERIC: 6406 if (filter_op != RTE_ETH_FILTER_GET) { 6407 rte_errno = EINVAL; 6408 return -rte_errno; 6409 } 6410 *(const void **)arg = &mlx5_flow_ops; 6411 return 0; 6412 case RTE_ETH_FILTER_FDIR: 6413 return flow_fdir_ctrl_func(dev, filter_op, arg); 6414 default: 6415 DRV_LOG(ERR, "port %u filter type (%d) not supported", 6416 dev->data->port_id, filter_type); 6417 rte_errno = ENOTSUP; 6418 return -rte_errno; 6419 } 6420 return 0; 6421 } 6422 6423 /** 6424 * Create the needed meter and suffix tables. 6425 * 6426 * @param[in] dev 6427 * Pointer to Ethernet device. 6428 * @param[in] fm 6429 * Pointer to the flow meter. 6430 * 6431 * @return 6432 * Pointer to table set on success, NULL otherwise. 6433 */ 6434 struct mlx5_meter_domains_infos * 6435 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev, 6436 const struct mlx5_flow_meter *fm) 6437 { 6438 const struct mlx5_flow_driver_ops *fops; 6439 6440 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6441 return fops->create_mtr_tbls(dev, fm); 6442 } 6443 6444 /** 6445 * Destroy the meter table set. 6446 * 6447 * @param[in] dev 6448 * Pointer to Ethernet device. 6449 * @param[in] tbl 6450 * Pointer to the meter table set. 6451 * 6452 * @return 6453 * 0 on success. 6454 */ 6455 int 6456 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev, 6457 struct mlx5_meter_domains_infos *tbls) 6458 { 6459 const struct mlx5_flow_driver_ops *fops; 6460 6461 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6462 return fops->destroy_mtr_tbls(dev, tbls); 6463 } 6464 6465 /** 6466 * Create policer rules. 6467 * 6468 * @param[in] dev 6469 * Pointer to Ethernet device. 6470 * @param[in] fm 6471 * Pointer to flow meter structure. 6472 * @param[in] attr 6473 * Pointer to flow attributes. 6474 * 6475 * @return 6476 * 0 on success, -1 otherwise. 6477 */ 6478 int 6479 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev, 6480 struct mlx5_flow_meter *fm, 6481 const struct rte_flow_attr *attr) 6482 { 6483 const struct mlx5_flow_driver_ops *fops; 6484 6485 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6486 return fops->create_policer_rules(dev, fm, attr); 6487 } 6488 6489 /** 6490 * Destroy policer rules. 6491 * 6492 * @param[in] fm 6493 * Pointer to flow meter structure. 6494 * @param[in] attr 6495 * Pointer to flow attributes. 6496 * 6497 * @return 6498 * 0 on success, -1 otherwise. 6499 */ 6500 int 6501 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev, 6502 struct mlx5_flow_meter *fm, 6503 const struct rte_flow_attr *attr) 6504 { 6505 const struct mlx5_flow_driver_ops *fops; 6506 6507 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6508 return fops->destroy_policer_rules(dev, fm, attr); 6509 } 6510 6511 /** 6512 * Allocate a counter. 6513 * 6514 * @param[in] dev 6515 * Pointer to Ethernet device structure. 6516 * 6517 * @return 6518 * Index to allocated counter on success, 0 otherwise. 6519 */ 6520 uint32_t 6521 mlx5_counter_alloc(struct rte_eth_dev *dev) 6522 { 6523 const struct mlx5_flow_driver_ops *fops; 6524 struct rte_flow_attr attr = { .transfer = 0 }; 6525 6526 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6527 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6528 return fops->counter_alloc(dev); 6529 } 6530 DRV_LOG(ERR, 6531 "port %u counter allocate is not supported.", 6532 dev->data->port_id); 6533 return 0; 6534 } 6535 6536 /** 6537 * Free a counter. 6538 * 6539 * @param[in] dev 6540 * Pointer to Ethernet device structure. 6541 * @param[in] cnt 6542 * Index to counter to be free. 6543 */ 6544 void 6545 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt) 6546 { 6547 const struct mlx5_flow_driver_ops *fops; 6548 struct rte_flow_attr attr = { .transfer = 0 }; 6549 6550 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6551 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6552 fops->counter_free(dev, cnt); 6553 return; 6554 } 6555 DRV_LOG(ERR, 6556 "port %u counter free is not supported.", 6557 dev->data->port_id); 6558 } 6559 6560 /** 6561 * Query counter statistics. 6562 * 6563 * @param[in] dev 6564 * Pointer to Ethernet device structure. 6565 * @param[in] cnt 6566 * Index to counter to query. 6567 * @param[in] clear 6568 * Set to clear counter statistics. 6569 * @param[out] pkts 6570 * The counter hits packets number to save. 6571 * @param[out] bytes 6572 * The counter hits bytes number to save. 6573 * 6574 * @return 6575 * 0 on success, a negative errno value otherwise. 6576 */ 6577 int 6578 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt, 6579 bool clear, uint64_t *pkts, uint64_t *bytes) 6580 { 6581 const struct mlx5_flow_driver_ops *fops; 6582 struct rte_flow_attr attr = { .transfer = 0 }; 6583 6584 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6585 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6586 return fops->counter_query(dev, cnt, clear, pkts, bytes); 6587 } 6588 DRV_LOG(ERR, 6589 "port %u counter query is not supported.", 6590 dev->data->port_id); 6591 return -ENOTSUP; 6592 } 6593 6594 /** 6595 * Allocate a new memory for the counter values wrapped by all the needed 6596 * management. 6597 * 6598 * @param[in] sh 6599 * Pointer to mlx5_dev_ctx_shared object. 6600 * 6601 * @return 6602 * 0 on success, a negative errno value otherwise. 6603 */ 6604 static int 6605 mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh) 6606 { 6607 struct mlx5_devx_mkey_attr mkey_attr; 6608 struct mlx5_counter_stats_mem_mng *mem_mng; 6609 volatile struct flow_counter_stats *raw_data; 6610 int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES; 6611 int size = (sizeof(struct flow_counter_stats) * 6612 MLX5_COUNTERS_PER_POOL + 6613 sizeof(struct mlx5_counter_stats_raw)) * raws_n + 6614 sizeof(struct mlx5_counter_stats_mem_mng); 6615 size_t pgsize = rte_mem_page_size(); 6616 uint8_t *mem; 6617 int i; 6618 6619 if (pgsize == (size_t)-1) { 6620 DRV_LOG(ERR, "Failed to get mem page size"); 6621 rte_errno = ENOMEM; 6622 return -ENOMEM; 6623 } 6624 mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY); 6625 if (!mem) { 6626 rte_errno = ENOMEM; 6627 return -ENOMEM; 6628 } 6629 mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1; 6630 size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n; 6631 mem_mng->umem = mlx5_glue->devx_umem_reg(sh->ctx, mem, size, 6632 IBV_ACCESS_LOCAL_WRITE); 6633 if (!mem_mng->umem) { 6634 rte_errno = errno; 6635 mlx5_free(mem); 6636 return -rte_errno; 6637 } 6638 mkey_attr.addr = (uintptr_t)mem; 6639 mkey_attr.size = size; 6640 mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem); 6641 mkey_attr.pd = sh->pdn; 6642 mkey_attr.log_entity_size = 0; 6643 mkey_attr.pg_access = 0; 6644 mkey_attr.klm_array = NULL; 6645 mkey_attr.klm_num = 0; 6646 mkey_attr.relaxed_ordering = sh->cmng.relaxed_ordering; 6647 mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr); 6648 if (!mem_mng->dm) { 6649 mlx5_glue->devx_umem_dereg(mem_mng->umem); 6650 rte_errno = errno; 6651 mlx5_free(mem); 6652 return -rte_errno; 6653 } 6654 mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size); 6655 raw_data = (volatile struct flow_counter_stats *)mem; 6656 for (i = 0; i < raws_n; ++i) { 6657 mem_mng->raws[i].mem_mng = mem_mng; 6658 mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL; 6659 } 6660 for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i) 6661 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, 6662 mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i, 6663 next); 6664 LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next); 6665 sh->cmng.mem_mng = mem_mng; 6666 return 0; 6667 } 6668 6669 /** 6670 * Set the statistic memory to the new counter pool. 6671 * 6672 * @param[in] sh 6673 * Pointer to mlx5_dev_ctx_shared object. 6674 * @param[in] pool 6675 * Pointer to the pool to set the statistic memory. 6676 * 6677 * @return 6678 * 0 on success, a negative errno value otherwise. 6679 */ 6680 static int 6681 mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh, 6682 struct mlx5_flow_counter_pool *pool) 6683 { 6684 struct mlx5_flow_counter_mng *cmng = &sh->cmng; 6685 /* Resize statistic memory once used out. */ 6686 if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) && 6687 mlx5_flow_create_counter_stat_mem_mng(sh)) { 6688 DRV_LOG(ERR, "Cannot resize counter stat mem."); 6689 return -1; 6690 } 6691 rte_spinlock_lock(&pool->sl); 6692 pool->raw = cmng->mem_mng->raws + pool->index % 6693 MLX5_CNT_CONTAINER_RESIZE; 6694 rte_spinlock_unlock(&pool->sl); 6695 pool->raw_hw = NULL; 6696 return 0; 6697 } 6698 6699 #define MLX5_POOL_QUERY_FREQ_US 1000000 6700 6701 /** 6702 * Set the periodic procedure for triggering asynchronous batch queries for all 6703 * the counter pools. 6704 * 6705 * @param[in] sh 6706 * Pointer to mlx5_dev_ctx_shared object. 6707 */ 6708 void 6709 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh) 6710 { 6711 uint32_t pools_n, us; 6712 6713 pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED); 6714 us = MLX5_POOL_QUERY_FREQ_US / pools_n; 6715 DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us); 6716 if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) { 6717 sh->cmng.query_thread_on = 0; 6718 DRV_LOG(ERR, "Cannot reinitialize query alarm"); 6719 } else { 6720 sh->cmng.query_thread_on = 1; 6721 } 6722 } 6723 6724 /** 6725 * The periodic procedure for triggering asynchronous batch queries for all the 6726 * counter pools. This function is probably called by the host thread. 6727 * 6728 * @param[in] arg 6729 * The parameter for the alarm process. 6730 */ 6731 void 6732 mlx5_flow_query_alarm(void *arg) 6733 { 6734 struct mlx5_dev_ctx_shared *sh = arg; 6735 int ret; 6736 uint16_t pool_index = sh->cmng.pool_index; 6737 struct mlx5_flow_counter_mng *cmng = &sh->cmng; 6738 struct mlx5_flow_counter_pool *pool; 6739 uint16_t n_valid; 6740 6741 if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES) 6742 goto set_alarm; 6743 rte_spinlock_lock(&cmng->pool_update_sl); 6744 pool = cmng->pools[pool_index]; 6745 n_valid = cmng->n_valid; 6746 rte_spinlock_unlock(&cmng->pool_update_sl); 6747 /* Set the statistic memory to the new created pool. */ 6748 if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool))) 6749 goto set_alarm; 6750 if (pool->raw_hw) 6751 /* There is a pool query in progress. */ 6752 goto set_alarm; 6753 pool->raw_hw = 6754 LIST_FIRST(&sh->cmng.free_stat_raws); 6755 if (!pool->raw_hw) 6756 /* No free counter statistics raw memory. */ 6757 goto set_alarm; 6758 /* 6759 * Identify the counters released between query trigger and query 6760 * handle more efficiently. The counter released in this gap period 6761 * should wait for a new round of query as the new arrived packets 6762 * will not be taken into account. 6763 */ 6764 pool->query_gen++; 6765 ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0, 6766 MLX5_COUNTERS_PER_POOL, 6767 NULL, NULL, 6768 pool->raw_hw->mem_mng->dm->id, 6769 (void *)(uintptr_t) 6770 pool->raw_hw->data, 6771 sh->devx_comp, 6772 (uint64_t)(uintptr_t)pool); 6773 if (ret) { 6774 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID" 6775 " %d", pool->min_dcs->id); 6776 pool->raw_hw = NULL; 6777 goto set_alarm; 6778 } 6779 LIST_REMOVE(pool->raw_hw, next); 6780 sh->cmng.pending_queries++; 6781 pool_index++; 6782 if (pool_index >= n_valid) 6783 pool_index = 0; 6784 set_alarm: 6785 sh->cmng.pool_index = pool_index; 6786 mlx5_set_query_alarm(sh); 6787 } 6788 6789 /** 6790 * Check and callback event for new aged flow in the counter pool 6791 * 6792 * @param[in] sh 6793 * Pointer to mlx5_dev_ctx_shared object. 6794 * @param[in] pool 6795 * Pointer to Current counter pool. 6796 */ 6797 static void 6798 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh, 6799 struct mlx5_flow_counter_pool *pool) 6800 { 6801 struct mlx5_priv *priv; 6802 struct mlx5_flow_counter *cnt; 6803 struct mlx5_age_info *age_info; 6804 struct mlx5_age_param *age_param; 6805 struct mlx5_counter_stats_raw *cur = pool->raw_hw; 6806 struct mlx5_counter_stats_raw *prev = pool->raw; 6807 const uint64_t curr_time = MLX5_CURR_TIME_SEC; 6808 const uint32_t time_delta = curr_time - pool->time_of_last_age_check; 6809 uint16_t expected = AGE_CANDIDATE; 6810 uint32_t i; 6811 6812 pool->time_of_last_age_check = curr_time; 6813 for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) { 6814 cnt = MLX5_POOL_GET_CNT(pool, i); 6815 age_param = MLX5_CNT_TO_AGE(cnt); 6816 if (__atomic_load_n(&age_param->state, 6817 __ATOMIC_RELAXED) != AGE_CANDIDATE) 6818 continue; 6819 if (cur->data[i].hits != prev->data[i].hits) { 6820 __atomic_store_n(&age_param->sec_since_last_hit, 0, 6821 __ATOMIC_RELAXED); 6822 continue; 6823 } 6824 if (__atomic_add_fetch(&age_param->sec_since_last_hit, 6825 time_delta, 6826 __ATOMIC_RELAXED) <= age_param->timeout) 6827 continue; 6828 /** 6829 * Hold the lock first, or if between the 6830 * state AGE_TMOUT and tailq operation the 6831 * release happened, the release procedure 6832 * may delete a non-existent tailq node. 6833 */ 6834 priv = rte_eth_devices[age_param->port_id].data->dev_private; 6835 age_info = GET_PORT_AGE_INFO(priv); 6836 rte_spinlock_lock(&age_info->aged_sl); 6837 if (__atomic_compare_exchange_n(&age_param->state, &expected, 6838 AGE_TMOUT, false, 6839 __ATOMIC_RELAXED, 6840 __ATOMIC_RELAXED)) { 6841 TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next); 6842 MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW); 6843 } 6844 rte_spinlock_unlock(&age_info->aged_sl); 6845 } 6846 for (i = 0; i < sh->max_port; i++) { 6847 age_info = &sh->port[i].age_info; 6848 if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW)) 6849 continue; 6850 if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) 6851 rte_eth_dev_callback_process 6852 (&rte_eth_devices[sh->port[i].devx_ih_port_id], 6853 RTE_ETH_EVENT_FLOW_AGED, NULL); 6854 age_info->flags = 0; 6855 } 6856 } 6857 6858 /** 6859 * Handler for the HW respond about ready values from an asynchronous batch 6860 * query. This function is probably called by the host thread. 6861 * 6862 * @param[in] sh 6863 * The pointer to the shared device context. 6864 * @param[in] async_id 6865 * The Devx async ID. 6866 * @param[in] status 6867 * The status of the completion. 6868 */ 6869 void 6870 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh, 6871 uint64_t async_id, int status) 6872 { 6873 struct mlx5_flow_counter_pool *pool = 6874 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id; 6875 struct mlx5_counter_stats_raw *raw_to_free; 6876 uint8_t query_gen = pool->query_gen ^ 1; 6877 struct mlx5_flow_counter_mng *cmng = &sh->cmng; 6878 enum mlx5_counter_type cnt_type = 6879 IS_AGE_POOL(pool) ? MLX5_COUNTER_TYPE_AGE : 6880 MLX5_COUNTER_TYPE_ORIGIN; 6881 6882 if (unlikely(status)) { 6883 raw_to_free = pool->raw_hw; 6884 } else { 6885 raw_to_free = pool->raw; 6886 if (IS_AGE_POOL(pool)) 6887 mlx5_flow_aging_check(sh, pool); 6888 rte_spinlock_lock(&pool->sl); 6889 pool->raw = pool->raw_hw; 6890 rte_spinlock_unlock(&pool->sl); 6891 /* Be sure the new raw counters data is updated in memory. */ 6892 rte_io_wmb(); 6893 if (!TAILQ_EMPTY(&pool->counters[query_gen])) { 6894 rte_spinlock_lock(&cmng->csl[cnt_type]); 6895 TAILQ_CONCAT(&cmng->counters[cnt_type], 6896 &pool->counters[query_gen], next); 6897 rte_spinlock_unlock(&cmng->csl[cnt_type]); 6898 } 6899 } 6900 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next); 6901 pool->raw_hw = NULL; 6902 sh->cmng.pending_queries--; 6903 } 6904 6905 /** 6906 * Translate the rte_flow group index to HW table value. 6907 * 6908 * @param[in] attributes 6909 * Pointer to flow attributes 6910 * @param[in] external 6911 * Value is part of flow rule created by request external to PMD. 6912 * @param[in] group 6913 * rte_flow group index value. 6914 * @param[out] fdb_def_rule 6915 * Whether fdb jump to table 1 is configured. 6916 * @param[out] table 6917 * HW table value. 6918 * @param[out] error 6919 * Pointer to error structure. 6920 * 6921 * @return 6922 * 0 on success, a negative errno value otherwise and rte_errno is set. 6923 */ 6924 int 6925 mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external, 6926 uint32_t group, bool fdb_def_rule, uint32_t *table, 6927 struct rte_flow_error *error) 6928 { 6929 if (attributes->transfer && external && fdb_def_rule) { 6930 if (group == UINT32_MAX) 6931 return rte_flow_error_set 6932 (error, EINVAL, 6933 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 6934 NULL, 6935 "group index not supported"); 6936 *table = group + 1; 6937 } else { 6938 *table = group; 6939 } 6940 return 0; 6941 } 6942 6943 /** 6944 * Discover availability of metadata reg_c's. 6945 * 6946 * Iteratively use test flows to check availability. 6947 * 6948 * @param[in] dev 6949 * Pointer to the Ethernet device structure. 6950 * 6951 * @return 6952 * 0 on success, a negative errno value otherwise and rte_errno is set. 6953 */ 6954 int 6955 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev) 6956 { 6957 struct mlx5_priv *priv = dev->data->dev_private; 6958 struct mlx5_dev_config *config = &priv->config; 6959 enum modify_reg idx; 6960 int n = 0; 6961 6962 /* reg_c[0] and reg_c[1] are reserved. */ 6963 config->flow_mreg_c[n++] = REG_C_0; 6964 config->flow_mreg_c[n++] = REG_C_1; 6965 /* Discover availability of other reg_c's. */ 6966 for (idx = REG_C_2; idx <= REG_C_7; ++idx) { 6967 struct rte_flow_attr attr = { 6968 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 6969 .priority = MLX5_FLOW_PRIO_RSVD, 6970 .ingress = 1, 6971 }; 6972 struct rte_flow_item items[] = { 6973 [0] = { 6974 .type = RTE_FLOW_ITEM_TYPE_END, 6975 }, 6976 }; 6977 struct rte_flow_action actions[] = { 6978 [0] = { 6979 .type = (enum rte_flow_action_type) 6980 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 6981 .conf = &(struct mlx5_flow_action_copy_mreg){ 6982 .src = REG_C_1, 6983 .dst = idx, 6984 }, 6985 }, 6986 [1] = { 6987 .type = RTE_FLOW_ACTION_TYPE_JUMP, 6988 .conf = &(struct rte_flow_action_jump){ 6989 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 6990 }, 6991 }, 6992 [2] = { 6993 .type = RTE_FLOW_ACTION_TYPE_END, 6994 }, 6995 }; 6996 uint32_t flow_idx; 6997 struct rte_flow *flow; 6998 struct rte_flow_error error; 6999 7000 if (!config->dv_flow_en) 7001 break; 7002 /* Create internal flow, validation skips copy action. */ 7003 flow_idx = flow_list_create(dev, NULL, &attr, items, 7004 actions, false, &error); 7005 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 7006 flow_idx); 7007 if (!flow) 7008 continue; 7009 if (dev->data->dev_started || !flow_drv_apply(dev, flow, NULL)) 7010 config->flow_mreg_c[n++] = idx; 7011 flow_list_destroy(dev, NULL, flow_idx); 7012 } 7013 for (; n < MLX5_MREG_C_NUM; ++n) 7014 config->flow_mreg_c[n] = REG_NON; 7015 return 0; 7016 } 7017 7018 /** 7019 * Dump flow raw hw data to file 7020 * 7021 * @param[in] dev 7022 * The pointer to Ethernet device. 7023 * @param[in] file 7024 * A pointer to a file for output. 7025 * @param[out] error 7026 * Perform verbose error reporting if not NULL. PMDs initialize this 7027 * structure in case of error only. 7028 * @return 7029 * 0 on success, a nagative value otherwise. 7030 */ 7031 int 7032 mlx5_flow_dev_dump(struct rte_eth_dev *dev, 7033 FILE *file, 7034 struct rte_flow_error *error __rte_unused) 7035 { 7036 struct mlx5_priv *priv = dev->data->dev_private; 7037 struct mlx5_dev_ctx_shared *sh = priv->sh; 7038 7039 if (!priv->config.dv_flow_en) { 7040 if (fputs("device dv flow disabled\n", file) <= 0) 7041 return -errno; 7042 return -ENOTSUP; 7043 } 7044 return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain, 7045 sh->tx_domain, file); 7046 } 7047 7048 /** 7049 * Get aged-out flows. 7050 * 7051 * @param[in] dev 7052 * Pointer to the Ethernet device structure. 7053 * @param[in] context 7054 * The address of an array of pointers to the aged-out flows contexts. 7055 * @param[in] nb_countexts 7056 * The length of context array pointers. 7057 * @param[out] error 7058 * Perform verbose error reporting if not NULL. Initialized in case of 7059 * error only. 7060 * 7061 * @return 7062 * how many contexts get in success, otherwise negative errno value. 7063 * if nb_contexts is 0, return the amount of all aged contexts. 7064 * if nb_contexts is not 0 , return the amount of aged flows reported 7065 * in the context array. 7066 */ 7067 int 7068 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts, 7069 uint32_t nb_contexts, struct rte_flow_error *error) 7070 { 7071 const struct mlx5_flow_driver_ops *fops; 7072 struct rte_flow_attr attr = { .transfer = 0 }; 7073 7074 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 7075 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7076 return fops->get_aged_flows(dev, contexts, nb_contexts, 7077 error); 7078 } 7079 DRV_LOG(ERR, 7080 "port %u get aged flows is not supported.", 7081 dev->data->port_id); 7082 return -ENOTSUP; 7083 } 7084