1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2016 Mellanox Technologies, Ltd 4 */ 5 6 #include <netinet/in.h> 7 #include <sys/queue.h> 8 #include <stdalign.h> 9 #include <stdint.h> 10 #include <string.h> 11 #include <stdbool.h> 12 13 #include <rte_common.h> 14 #include <rte_ether.h> 15 #include <rte_ethdev_driver.h> 16 #include <rte_flow.h> 17 #include <rte_cycles.h> 18 #include <rte_flow_driver.h> 19 #include <rte_malloc.h> 20 #include <rte_ip.h> 21 22 #include <mlx5_glue.h> 23 #include <mlx5_devx_cmds.h> 24 #include <mlx5_prm.h> 25 #include <mlx5_malloc.h> 26 27 #include "mlx5_defs.h" 28 #include "mlx5.h" 29 #include "mlx5_flow.h" 30 #include "mlx5_flow_os.h" 31 #include "mlx5_rxtx.h" 32 33 /** Device flow drivers. */ 34 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops; 35 36 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops; 37 38 const struct mlx5_flow_driver_ops *flow_drv_ops[] = { 39 [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops, 40 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 41 [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops, 42 #endif 43 [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops, 44 [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops 45 }; 46 47 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */ 48 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \ 49 (const int []){ \ 50 __VA_ARGS__, 0, \ 51 } 52 53 /** Node object of input graph for mlx5_flow_expand_rss(). */ 54 struct mlx5_flow_expand_node { 55 const int *const next; 56 /**< 57 * List of next node indexes. Index 0 is interpreted as a terminator. 58 */ 59 const enum rte_flow_item_type type; 60 /**< Pattern item type of current node. */ 61 uint64_t rss_types; 62 /**< 63 * RSS types bit-field associated with this node 64 * (see ETH_RSS_* definitions). 65 */ 66 }; 67 68 /** Object returned by mlx5_flow_expand_rss(). */ 69 struct mlx5_flow_expand_rss { 70 uint32_t entries; 71 /**< Number of entries @p patterns and @p priorities. */ 72 struct { 73 struct rte_flow_item *pattern; /**< Expanded pattern array. */ 74 uint32_t priority; /**< Priority offset for each expansion. */ 75 } entry[]; 76 }; 77 78 static enum rte_flow_item_type 79 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item) 80 { 81 enum rte_flow_item_type ret = RTE_FLOW_ITEM_TYPE_VOID; 82 uint16_t ether_type = 0; 83 uint16_t ether_type_m; 84 uint8_t ip_next_proto = 0; 85 uint8_t ip_next_proto_m; 86 87 if (item == NULL || item->spec == NULL) 88 return ret; 89 switch (item->type) { 90 case RTE_FLOW_ITEM_TYPE_ETH: 91 if (item->mask) 92 ether_type_m = ((const struct rte_flow_item_eth *) 93 (item->mask))->type; 94 else 95 ether_type_m = rte_flow_item_eth_mask.type; 96 if (ether_type_m != RTE_BE16(0xFFFF)) 97 break; 98 ether_type = ((const struct rte_flow_item_eth *) 99 (item->spec))->type; 100 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4) 101 ret = RTE_FLOW_ITEM_TYPE_IPV4; 102 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6) 103 ret = RTE_FLOW_ITEM_TYPE_IPV6; 104 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN) 105 ret = RTE_FLOW_ITEM_TYPE_VLAN; 106 else 107 ret = RTE_FLOW_ITEM_TYPE_END; 108 break; 109 case RTE_FLOW_ITEM_TYPE_VLAN: 110 if (item->mask) 111 ether_type_m = ((const struct rte_flow_item_vlan *) 112 (item->mask))->inner_type; 113 else 114 ether_type_m = rte_flow_item_vlan_mask.inner_type; 115 if (ether_type_m != RTE_BE16(0xFFFF)) 116 break; 117 ether_type = ((const struct rte_flow_item_vlan *) 118 (item->spec))->inner_type; 119 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4) 120 ret = RTE_FLOW_ITEM_TYPE_IPV4; 121 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6) 122 ret = RTE_FLOW_ITEM_TYPE_IPV6; 123 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN) 124 ret = RTE_FLOW_ITEM_TYPE_VLAN; 125 else 126 ret = RTE_FLOW_ITEM_TYPE_END; 127 break; 128 case RTE_FLOW_ITEM_TYPE_IPV4: 129 if (item->mask) 130 ip_next_proto_m = ((const struct rte_flow_item_ipv4 *) 131 (item->mask))->hdr.next_proto_id; 132 else 133 ip_next_proto_m = 134 rte_flow_item_ipv4_mask.hdr.next_proto_id; 135 if (ip_next_proto_m != 0xFF) 136 break; 137 ip_next_proto = ((const struct rte_flow_item_ipv4 *) 138 (item->spec))->hdr.next_proto_id; 139 if (ip_next_proto == IPPROTO_UDP) 140 ret = RTE_FLOW_ITEM_TYPE_UDP; 141 else if (ip_next_proto == IPPROTO_TCP) 142 ret = RTE_FLOW_ITEM_TYPE_TCP; 143 else if (ip_next_proto == IPPROTO_IP) 144 ret = RTE_FLOW_ITEM_TYPE_IPV4; 145 else if (ip_next_proto == IPPROTO_IPV6) 146 ret = RTE_FLOW_ITEM_TYPE_IPV6; 147 else 148 ret = RTE_FLOW_ITEM_TYPE_END; 149 break; 150 case RTE_FLOW_ITEM_TYPE_IPV6: 151 if (item->mask) 152 ip_next_proto_m = ((const struct rte_flow_item_ipv6 *) 153 (item->mask))->hdr.proto; 154 else 155 ip_next_proto_m = 156 rte_flow_item_ipv6_mask.hdr.proto; 157 if (ip_next_proto_m != 0xFF) 158 break; 159 ip_next_proto = ((const struct rte_flow_item_ipv6 *) 160 (item->spec))->hdr.proto; 161 if (ip_next_proto == IPPROTO_UDP) 162 ret = RTE_FLOW_ITEM_TYPE_UDP; 163 else if (ip_next_proto == IPPROTO_TCP) 164 ret = RTE_FLOW_ITEM_TYPE_TCP; 165 else if (ip_next_proto == IPPROTO_IP) 166 ret = RTE_FLOW_ITEM_TYPE_IPV4; 167 else if (ip_next_proto == IPPROTO_IPV6) 168 ret = RTE_FLOW_ITEM_TYPE_IPV6; 169 else 170 ret = RTE_FLOW_ITEM_TYPE_END; 171 break; 172 default: 173 ret = RTE_FLOW_ITEM_TYPE_VOID; 174 break; 175 } 176 return ret; 177 } 178 179 /** 180 * Expand RSS flows into several possible flows according to the RSS hash 181 * fields requested and the driver capabilities. 182 * 183 * @param[out] buf 184 * Buffer to store the result expansion. 185 * @param[in] size 186 * Buffer size in bytes. If 0, @p buf can be NULL. 187 * @param[in] pattern 188 * User flow pattern. 189 * @param[in] types 190 * RSS types to expand (see ETH_RSS_* definitions). 191 * @param[in] graph 192 * Input graph to expand @p pattern according to @p types. 193 * @param[in] graph_root_index 194 * Index of root node in @p graph, typically 0. 195 * 196 * @return 197 * A positive value representing the size of @p buf in bytes regardless of 198 * @p size on success, a negative errno value otherwise and rte_errno is 199 * set, the following errors are defined: 200 * 201 * -E2BIG: graph-depth @p graph is too deep. 202 */ 203 static int 204 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, 205 const struct rte_flow_item *pattern, uint64_t types, 206 const struct mlx5_flow_expand_node graph[], 207 int graph_root_index) 208 { 209 const int elt_n = 8; 210 const struct rte_flow_item *item; 211 const struct mlx5_flow_expand_node *node = &graph[graph_root_index]; 212 const int *next_node; 213 const int *stack[elt_n]; 214 int stack_pos = 0; 215 struct rte_flow_item flow_items[elt_n]; 216 unsigned int i; 217 size_t lsize; 218 size_t user_pattern_size = 0; 219 void *addr = NULL; 220 const struct mlx5_flow_expand_node *next = NULL; 221 struct rte_flow_item missed_item; 222 int missed = 0; 223 int elt = 0; 224 const struct rte_flow_item *last_item = NULL; 225 226 memset(&missed_item, 0, sizeof(missed_item)); 227 lsize = offsetof(struct mlx5_flow_expand_rss, entry) + 228 elt_n * sizeof(buf->entry[0]); 229 if (lsize <= size) { 230 buf->entry[0].priority = 0; 231 buf->entry[0].pattern = (void *)&buf->entry[elt_n]; 232 buf->entries = 0; 233 addr = buf->entry[0].pattern; 234 } 235 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 236 if (item->type != RTE_FLOW_ITEM_TYPE_VOID) 237 last_item = item; 238 for (i = 0; node->next && node->next[i]; ++i) { 239 next = &graph[node->next[i]]; 240 if (next->type == item->type) 241 break; 242 } 243 if (next) 244 node = next; 245 user_pattern_size += sizeof(*item); 246 } 247 user_pattern_size += sizeof(*item); /* Handle END item. */ 248 lsize += user_pattern_size; 249 /* Copy the user pattern in the first entry of the buffer. */ 250 if (lsize <= size) { 251 rte_memcpy(addr, pattern, user_pattern_size); 252 addr = (void *)(((uintptr_t)addr) + user_pattern_size); 253 buf->entries = 1; 254 } 255 /* Start expanding. */ 256 memset(flow_items, 0, sizeof(flow_items)); 257 user_pattern_size -= sizeof(*item); 258 /* 259 * Check if the last valid item has spec set, need complete pattern, 260 * and the pattern can be used for expansion. 261 */ 262 missed_item.type = mlx5_flow_expand_rss_item_complete(last_item); 263 if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) { 264 /* Item type END indicates expansion is not required. */ 265 return lsize; 266 } 267 if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) { 268 next = NULL; 269 missed = 1; 270 for (i = 0; node->next && node->next[i]; ++i) { 271 next = &graph[node->next[i]]; 272 if (next->type == missed_item.type) { 273 flow_items[0].type = missed_item.type; 274 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END; 275 break; 276 } 277 next = NULL; 278 } 279 } 280 if (next && missed) { 281 elt = 2; /* missed item + item end. */ 282 node = next; 283 lsize += elt * sizeof(*item) + user_pattern_size; 284 if ((node->rss_types & types) && lsize <= size) { 285 buf->entry[buf->entries].priority = 1; 286 buf->entry[buf->entries].pattern = addr; 287 buf->entries++; 288 rte_memcpy(addr, buf->entry[0].pattern, 289 user_pattern_size); 290 addr = (void *)(((uintptr_t)addr) + user_pattern_size); 291 rte_memcpy(addr, flow_items, elt * sizeof(*item)); 292 addr = (void *)(((uintptr_t)addr) + 293 elt * sizeof(*item)); 294 } 295 } 296 memset(flow_items, 0, sizeof(flow_items)); 297 next_node = node->next; 298 stack[stack_pos] = next_node; 299 node = next_node ? &graph[*next_node] : NULL; 300 while (node) { 301 flow_items[stack_pos].type = node->type; 302 if (node->rss_types & types) { 303 /* 304 * compute the number of items to copy from the 305 * expansion and copy it. 306 * When the stack_pos is 0, there are 1 element in it, 307 * plus the addition END item. 308 */ 309 elt = stack_pos + 2; 310 flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END; 311 lsize += elt * sizeof(*item) + user_pattern_size; 312 if (lsize <= size) { 313 size_t n = elt * sizeof(*item); 314 315 buf->entry[buf->entries].priority = 316 stack_pos + 1 + missed; 317 buf->entry[buf->entries].pattern = addr; 318 buf->entries++; 319 rte_memcpy(addr, buf->entry[0].pattern, 320 user_pattern_size); 321 addr = (void *)(((uintptr_t)addr) + 322 user_pattern_size); 323 rte_memcpy(addr, &missed_item, 324 missed * sizeof(*item)); 325 addr = (void *)(((uintptr_t)addr) + 326 missed * sizeof(*item)); 327 rte_memcpy(addr, flow_items, n); 328 addr = (void *)(((uintptr_t)addr) + n); 329 } 330 } 331 /* Go deeper. */ 332 if (node->next) { 333 next_node = node->next; 334 if (stack_pos++ == elt_n) { 335 rte_errno = E2BIG; 336 return -rte_errno; 337 } 338 stack[stack_pos] = next_node; 339 } else if (*(next_node + 1)) { 340 /* Follow up with the next possibility. */ 341 ++next_node; 342 } else { 343 /* Move to the next path. */ 344 if (stack_pos) 345 next_node = stack[--stack_pos]; 346 next_node++; 347 stack[stack_pos] = next_node; 348 } 349 node = *next_node ? &graph[*next_node] : NULL; 350 }; 351 /* no expanded flows but we have missed item, create one rule for it */ 352 if (buf->entries == 1 && missed != 0) { 353 elt = 2; 354 lsize += elt * sizeof(*item) + user_pattern_size; 355 if (lsize <= size) { 356 buf->entry[buf->entries].priority = 1; 357 buf->entry[buf->entries].pattern = addr; 358 buf->entries++; 359 flow_items[0].type = missed_item.type; 360 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END; 361 rte_memcpy(addr, buf->entry[0].pattern, 362 user_pattern_size); 363 addr = (void *)(((uintptr_t)addr) + user_pattern_size); 364 rte_memcpy(addr, flow_items, elt * sizeof(*item)); 365 addr = (void *)(((uintptr_t)addr) + 366 elt * sizeof(*item)); 367 } 368 } 369 return lsize; 370 } 371 372 enum mlx5_expansion { 373 MLX5_EXPANSION_ROOT, 374 MLX5_EXPANSION_ROOT_OUTER, 375 MLX5_EXPANSION_ROOT_ETH_VLAN, 376 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN, 377 MLX5_EXPANSION_OUTER_ETH, 378 MLX5_EXPANSION_OUTER_ETH_VLAN, 379 MLX5_EXPANSION_OUTER_VLAN, 380 MLX5_EXPANSION_OUTER_IPV4, 381 MLX5_EXPANSION_OUTER_IPV4_UDP, 382 MLX5_EXPANSION_OUTER_IPV4_TCP, 383 MLX5_EXPANSION_OUTER_IPV6, 384 MLX5_EXPANSION_OUTER_IPV6_UDP, 385 MLX5_EXPANSION_OUTER_IPV6_TCP, 386 MLX5_EXPANSION_VXLAN, 387 MLX5_EXPANSION_VXLAN_GPE, 388 MLX5_EXPANSION_GRE, 389 MLX5_EXPANSION_MPLS, 390 MLX5_EXPANSION_ETH, 391 MLX5_EXPANSION_ETH_VLAN, 392 MLX5_EXPANSION_VLAN, 393 MLX5_EXPANSION_IPV4, 394 MLX5_EXPANSION_IPV4_UDP, 395 MLX5_EXPANSION_IPV4_TCP, 396 MLX5_EXPANSION_IPV6, 397 MLX5_EXPANSION_IPV6_UDP, 398 MLX5_EXPANSION_IPV6_TCP, 399 }; 400 401 /** Supported expansion of items. */ 402 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = { 403 [MLX5_EXPANSION_ROOT] = { 404 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 405 MLX5_EXPANSION_IPV4, 406 MLX5_EXPANSION_IPV6), 407 .type = RTE_FLOW_ITEM_TYPE_END, 408 }, 409 [MLX5_EXPANSION_ROOT_OUTER] = { 410 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH, 411 MLX5_EXPANSION_OUTER_IPV4, 412 MLX5_EXPANSION_OUTER_IPV6), 413 .type = RTE_FLOW_ITEM_TYPE_END, 414 }, 415 [MLX5_EXPANSION_ROOT_ETH_VLAN] = { 416 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN), 417 .type = RTE_FLOW_ITEM_TYPE_END, 418 }, 419 [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = { 420 .next = MLX5_FLOW_EXPAND_RSS_NEXT 421 (MLX5_EXPANSION_OUTER_ETH_VLAN), 422 .type = RTE_FLOW_ITEM_TYPE_END, 423 }, 424 [MLX5_EXPANSION_OUTER_ETH] = { 425 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 426 MLX5_EXPANSION_OUTER_IPV6, 427 MLX5_EXPANSION_MPLS), 428 .type = RTE_FLOW_ITEM_TYPE_ETH, 429 .rss_types = 0, 430 }, 431 [MLX5_EXPANSION_OUTER_ETH_VLAN] = { 432 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN), 433 .type = RTE_FLOW_ITEM_TYPE_ETH, 434 .rss_types = 0, 435 }, 436 [MLX5_EXPANSION_OUTER_VLAN] = { 437 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 438 MLX5_EXPANSION_OUTER_IPV6), 439 .type = RTE_FLOW_ITEM_TYPE_VLAN, 440 }, 441 [MLX5_EXPANSION_OUTER_IPV4] = { 442 .next = MLX5_FLOW_EXPAND_RSS_NEXT 443 (MLX5_EXPANSION_OUTER_IPV4_UDP, 444 MLX5_EXPANSION_OUTER_IPV4_TCP, 445 MLX5_EXPANSION_GRE, 446 MLX5_EXPANSION_IPV4, 447 MLX5_EXPANSION_IPV6), 448 .type = RTE_FLOW_ITEM_TYPE_IPV4, 449 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 450 ETH_RSS_NONFRAG_IPV4_OTHER, 451 }, 452 [MLX5_EXPANSION_OUTER_IPV4_UDP] = { 453 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 454 MLX5_EXPANSION_VXLAN_GPE), 455 .type = RTE_FLOW_ITEM_TYPE_UDP, 456 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 457 }, 458 [MLX5_EXPANSION_OUTER_IPV4_TCP] = { 459 .type = RTE_FLOW_ITEM_TYPE_TCP, 460 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 461 }, 462 [MLX5_EXPANSION_OUTER_IPV6] = { 463 .next = MLX5_FLOW_EXPAND_RSS_NEXT 464 (MLX5_EXPANSION_OUTER_IPV6_UDP, 465 MLX5_EXPANSION_OUTER_IPV6_TCP, 466 MLX5_EXPANSION_IPV4, 467 MLX5_EXPANSION_IPV6), 468 .type = RTE_FLOW_ITEM_TYPE_IPV6, 469 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 470 ETH_RSS_NONFRAG_IPV6_OTHER, 471 }, 472 [MLX5_EXPANSION_OUTER_IPV6_UDP] = { 473 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 474 MLX5_EXPANSION_VXLAN_GPE), 475 .type = RTE_FLOW_ITEM_TYPE_UDP, 476 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 477 }, 478 [MLX5_EXPANSION_OUTER_IPV6_TCP] = { 479 .type = RTE_FLOW_ITEM_TYPE_TCP, 480 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 481 }, 482 [MLX5_EXPANSION_VXLAN] = { 483 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 484 MLX5_EXPANSION_IPV4, 485 MLX5_EXPANSION_IPV6), 486 .type = RTE_FLOW_ITEM_TYPE_VXLAN, 487 }, 488 [MLX5_EXPANSION_VXLAN_GPE] = { 489 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 490 MLX5_EXPANSION_IPV4, 491 MLX5_EXPANSION_IPV6), 492 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE, 493 }, 494 [MLX5_EXPANSION_GRE] = { 495 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4), 496 .type = RTE_FLOW_ITEM_TYPE_GRE, 497 }, 498 [MLX5_EXPANSION_MPLS] = { 499 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 500 MLX5_EXPANSION_IPV6), 501 .type = RTE_FLOW_ITEM_TYPE_MPLS, 502 }, 503 [MLX5_EXPANSION_ETH] = { 504 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 505 MLX5_EXPANSION_IPV6), 506 .type = RTE_FLOW_ITEM_TYPE_ETH, 507 }, 508 [MLX5_EXPANSION_ETH_VLAN] = { 509 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN), 510 .type = RTE_FLOW_ITEM_TYPE_ETH, 511 }, 512 [MLX5_EXPANSION_VLAN] = { 513 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 514 MLX5_EXPANSION_IPV6), 515 .type = RTE_FLOW_ITEM_TYPE_VLAN, 516 }, 517 [MLX5_EXPANSION_IPV4] = { 518 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP, 519 MLX5_EXPANSION_IPV4_TCP), 520 .type = RTE_FLOW_ITEM_TYPE_IPV4, 521 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 522 ETH_RSS_NONFRAG_IPV4_OTHER, 523 }, 524 [MLX5_EXPANSION_IPV4_UDP] = { 525 .type = RTE_FLOW_ITEM_TYPE_UDP, 526 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 527 }, 528 [MLX5_EXPANSION_IPV4_TCP] = { 529 .type = RTE_FLOW_ITEM_TYPE_TCP, 530 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 531 }, 532 [MLX5_EXPANSION_IPV6] = { 533 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP, 534 MLX5_EXPANSION_IPV6_TCP), 535 .type = RTE_FLOW_ITEM_TYPE_IPV6, 536 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 537 ETH_RSS_NONFRAG_IPV6_OTHER, 538 }, 539 [MLX5_EXPANSION_IPV6_UDP] = { 540 .type = RTE_FLOW_ITEM_TYPE_UDP, 541 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 542 }, 543 [MLX5_EXPANSION_IPV6_TCP] = { 544 .type = RTE_FLOW_ITEM_TYPE_TCP, 545 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 546 }, 547 }; 548 549 static const struct rte_flow_ops mlx5_flow_ops = { 550 .validate = mlx5_flow_validate, 551 .create = mlx5_flow_create, 552 .destroy = mlx5_flow_destroy, 553 .flush = mlx5_flow_flush, 554 .isolate = mlx5_flow_isolate, 555 .query = mlx5_flow_query, 556 .dev_dump = mlx5_flow_dev_dump, 557 .get_aged_flows = mlx5_flow_get_aged_flows, 558 }; 559 560 /* Convert FDIR request to Generic flow. */ 561 struct mlx5_fdir { 562 struct rte_flow_attr attr; 563 struct rte_flow_item items[4]; 564 struct rte_flow_item_eth l2; 565 struct rte_flow_item_eth l2_mask; 566 union { 567 struct rte_flow_item_ipv4 ipv4; 568 struct rte_flow_item_ipv6 ipv6; 569 } l3; 570 union { 571 struct rte_flow_item_ipv4 ipv4; 572 struct rte_flow_item_ipv6 ipv6; 573 } l3_mask; 574 union { 575 struct rte_flow_item_udp udp; 576 struct rte_flow_item_tcp tcp; 577 } l4; 578 union { 579 struct rte_flow_item_udp udp; 580 struct rte_flow_item_tcp tcp; 581 } l4_mask; 582 struct rte_flow_action actions[2]; 583 struct rte_flow_action_queue queue; 584 }; 585 586 /* Tunnel information. */ 587 struct mlx5_flow_tunnel_info { 588 uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */ 589 uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */ 590 }; 591 592 static struct mlx5_flow_tunnel_info tunnels_info[] = { 593 { 594 .tunnel = MLX5_FLOW_LAYER_VXLAN, 595 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP, 596 }, 597 { 598 .tunnel = MLX5_FLOW_LAYER_GENEVE, 599 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP, 600 }, 601 { 602 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE, 603 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP, 604 }, 605 { 606 .tunnel = MLX5_FLOW_LAYER_GRE, 607 .ptype = RTE_PTYPE_TUNNEL_GRE, 608 }, 609 { 610 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP, 611 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP, 612 }, 613 { 614 .tunnel = MLX5_FLOW_LAYER_MPLS, 615 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE, 616 }, 617 { 618 .tunnel = MLX5_FLOW_LAYER_NVGRE, 619 .ptype = RTE_PTYPE_TUNNEL_NVGRE, 620 }, 621 { 622 .tunnel = MLX5_FLOW_LAYER_IPIP, 623 .ptype = RTE_PTYPE_TUNNEL_IP, 624 }, 625 { 626 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP, 627 .ptype = RTE_PTYPE_TUNNEL_IP, 628 }, 629 { 630 .tunnel = MLX5_FLOW_LAYER_GTP, 631 .ptype = RTE_PTYPE_TUNNEL_GTPU, 632 }, 633 }; 634 635 /** 636 * Translate tag ID to register. 637 * 638 * @param[in] dev 639 * Pointer to the Ethernet device structure. 640 * @param[in] feature 641 * The feature that request the register. 642 * @param[in] id 643 * The request register ID. 644 * @param[out] error 645 * Error description in case of any. 646 * 647 * @return 648 * The request register on success, a negative errno 649 * value otherwise and rte_errno is set. 650 */ 651 int 652 mlx5_flow_get_reg_id(struct rte_eth_dev *dev, 653 enum mlx5_feature_name feature, 654 uint32_t id, 655 struct rte_flow_error *error) 656 { 657 struct mlx5_priv *priv = dev->data->dev_private; 658 struct mlx5_dev_config *config = &priv->config; 659 enum modify_reg start_reg; 660 bool skip_mtr_reg = false; 661 662 switch (feature) { 663 case MLX5_HAIRPIN_RX: 664 return REG_B; 665 case MLX5_HAIRPIN_TX: 666 return REG_A; 667 case MLX5_METADATA_RX: 668 switch (config->dv_xmeta_en) { 669 case MLX5_XMETA_MODE_LEGACY: 670 return REG_B; 671 case MLX5_XMETA_MODE_META16: 672 return REG_C_0; 673 case MLX5_XMETA_MODE_META32: 674 return REG_C_1; 675 } 676 break; 677 case MLX5_METADATA_TX: 678 return REG_A; 679 case MLX5_METADATA_FDB: 680 switch (config->dv_xmeta_en) { 681 case MLX5_XMETA_MODE_LEGACY: 682 return REG_NON; 683 case MLX5_XMETA_MODE_META16: 684 return REG_C_0; 685 case MLX5_XMETA_MODE_META32: 686 return REG_C_1; 687 } 688 break; 689 case MLX5_FLOW_MARK: 690 switch (config->dv_xmeta_en) { 691 case MLX5_XMETA_MODE_LEGACY: 692 return REG_NON; 693 case MLX5_XMETA_MODE_META16: 694 return REG_C_1; 695 case MLX5_XMETA_MODE_META32: 696 return REG_C_0; 697 } 698 break; 699 case MLX5_MTR_SFX: 700 /* 701 * If meter color and flow match share one register, flow match 702 * should use the meter color register for match. 703 */ 704 if (priv->mtr_reg_share) 705 return priv->mtr_color_reg; 706 else 707 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 708 REG_C_3; 709 case MLX5_MTR_COLOR: 710 MLX5_ASSERT(priv->mtr_color_reg != REG_NON); 711 return priv->mtr_color_reg; 712 case MLX5_COPY_MARK: 713 /* 714 * Metadata COPY_MARK register using is in meter suffix sub 715 * flow while with meter. It's safe to share the same register. 716 */ 717 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3; 718 case MLX5_APP_TAG: 719 /* 720 * If meter is enable, it will engage the register for color 721 * match and flow match. If meter color match is not using the 722 * REG_C_2, need to skip the REG_C_x be used by meter color 723 * match. 724 * If meter is disable, free to use all available registers. 725 */ 726 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 727 (priv->mtr_reg_share ? REG_C_3 : REG_C_4); 728 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2); 729 if (id > (REG_C_7 - start_reg)) 730 return rte_flow_error_set(error, EINVAL, 731 RTE_FLOW_ERROR_TYPE_ITEM, 732 NULL, "invalid tag id"); 733 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON) 734 return rte_flow_error_set(error, ENOTSUP, 735 RTE_FLOW_ERROR_TYPE_ITEM, 736 NULL, "unsupported tag id"); 737 /* 738 * This case means meter is using the REG_C_x great than 2. 739 * Take care not to conflict with meter color REG_C_x. 740 * If the available index REG_C_y >= REG_C_x, skip the 741 * color register. 742 */ 743 if (skip_mtr_reg && config->flow_mreg_c 744 [id + start_reg - REG_C_0] >= priv->mtr_color_reg) { 745 if (id >= (REG_C_7 - start_reg)) 746 return rte_flow_error_set(error, EINVAL, 747 RTE_FLOW_ERROR_TYPE_ITEM, 748 NULL, "invalid tag id"); 749 if (config->flow_mreg_c 750 [id + 1 + start_reg - REG_C_0] != REG_NON) 751 return config->flow_mreg_c 752 [id + 1 + start_reg - REG_C_0]; 753 return rte_flow_error_set(error, ENOTSUP, 754 RTE_FLOW_ERROR_TYPE_ITEM, 755 NULL, "unsupported tag id"); 756 } 757 return config->flow_mreg_c[id + start_reg - REG_C_0]; 758 } 759 MLX5_ASSERT(false); 760 return rte_flow_error_set(error, EINVAL, 761 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 762 NULL, "invalid feature name"); 763 } 764 765 /** 766 * Check extensive flow metadata register support. 767 * 768 * @param dev 769 * Pointer to rte_eth_dev structure. 770 * 771 * @return 772 * True if device supports extensive flow metadata register, otherwise false. 773 */ 774 bool 775 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev) 776 { 777 struct mlx5_priv *priv = dev->data->dev_private; 778 struct mlx5_dev_config *config = &priv->config; 779 780 /* 781 * Having available reg_c can be regarded inclusively as supporting 782 * extensive flow metadata register, which could mean, 783 * - metadata register copy action by modify header. 784 * - 16 modify header actions is supported. 785 * - reg_c's are preserved across different domain (FDB and NIC) on 786 * packet loopback by flow lookup miss. 787 */ 788 return config->flow_mreg_c[2] != REG_NON; 789 } 790 791 /** 792 * Verify the @p item specifications (spec, last, mask) are compatible with the 793 * NIC capabilities. 794 * 795 * @param[in] item 796 * Item specification. 797 * @param[in] mask 798 * @p item->mask or flow default bit-masks. 799 * @param[in] nic_mask 800 * Bit-masks covering supported fields by the NIC to compare with user mask. 801 * @param[in] size 802 * Bit-masks size in bytes. 803 * @param[in] range_accepted 804 * True if range of values is accepted for specific fields, false otherwise. 805 * @param[out] error 806 * Pointer to error structure. 807 * 808 * @return 809 * 0 on success, a negative errno value otherwise and rte_errno is set. 810 */ 811 int 812 mlx5_flow_item_acceptable(const struct rte_flow_item *item, 813 const uint8_t *mask, 814 const uint8_t *nic_mask, 815 unsigned int size, 816 bool range_accepted, 817 struct rte_flow_error *error) 818 { 819 unsigned int i; 820 821 MLX5_ASSERT(nic_mask); 822 for (i = 0; i < size; ++i) 823 if ((nic_mask[i] | mask[i]) != nic_mask[i]) 824 return rte_flow_error_set(error, ENOTSUP, 825 RTE_FLOW_ERROR_TYPE_ITEM, 826 item, 827 "mask enables non supported" 828 " bits"); 829 if (!item->spec && (item->mask || item->last)) 830 return rte_flow_error_set(error, EINVAL, 831 RTE_FLOW_ERROR_TYPE_ITEM, item, 832 "mask/last without a spec is not" 833 " supported"); 834 if (item->spec && item->last && !range_accepted) { 835 uint8_t spec[size]; 836 uint8_t last[size]; 837 unsigned int i; 838 int ret; 839 840 for (i = 0; i < size; ++i) { 841 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i]; 842 last[i] = ((const uint8_t *)item->last)[i] & mask[i]; 843 } 844 ret = memcmp(spec, last, size); 845 if (ret != 0) 846 return rte_flow_error_set(error, EINVAL, 847 RTE_FLOW_ERROR_TYPE_ITEM, 848 item, 849 "range is not valid"); 850 } 851 return 0; 852 } 853 854 /** 855 * Adjust the hash fields according to the @p flow information. 856 * 857 * @param[in] dev_flow. 858 * Pointer to the mlx5_flow. 859 * @param[in] tunnel 860 * 1 when the hash field is for a tunnel item. 861 * @param[in] layer_types 862 * ETH_RSS_* types. 863 * @param[in] hash_fields 864 * Item hash fields. 865 * 866 * @return 867 * The hash fields that should be used. 868 */ 869 uint64_t 870 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc, 871 int tunnel __rte_unused, uint64_t layer_types, 872 uint64_t hash_fields) 873 { 874 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 875 int rss_request_inner = rss_desc->level >= 2; 876 877 /* Check RSS hash level for tunnel. */ 878 if (tunnel && rss_request_inner) 879 hash_fields |= IBV_RX_HASH_INNER; 880 else if (tunnel || rss_request_inner) 881 return 0; 882 #endif 883 /* Check if requested layer matches RSS hash fields. */ 884 if (!(rss_desc->types & layer_types)) 885 return 0; 886 return hash_fields; 887 } 888 889 /** 890 * Lookup and set the ptype in the data Rx part. A single Ptype can be used, 891 * if several tunnel rules are used on this queue, the tunnel ptype will be 892 * cleared. 893 * 894 * @param rxq_ctrl 895 * Rx queue to update. 896 */ 897 static void 898 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl) 899 { 900 unsigned int i; 901 uint32_t tunnel_ptype = 0; 902 903 /* Look up for the ptype to use. */ 904 for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) { 905 if (!rxq_ctrl->flow_tunnels_n[i]) 906 continue; 907 if (!tunnel_ptype) { 908 tunnel_ptype = tunnels_info[i].ptype; 909 } else { 910 tunnel_ptype = 0; 911 break; 912 } 913 } 914 rxq_ctrl->rxq.tunnel = tunnel_ptype; 915 } 916 917 /** 918 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive 919 * flow. 920 * 921 * @param[in] dev 922 * Pointer to the Ethernet device structure. 923 * @param[in] dev_handle 924 * Pointer to device flow handle structure. 925 */ 926 static void 927 flow_drv_rxq_flags_set(struct rte_eth_dev *dev, 928 struct mlx5_flow_handle *dev_handle) 929 { 930 struct mlx5_priv *priv = dev->data->dev_private; 931 const int mark = dev_handle->mark; 932 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 933 struct mlx5_hrxq *hrxq; 934 unsigned int i; 935 936 if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) 937 return; 938 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 939 dev_handle->rix_hrxq); 940 if (!hrxq) 941 return; 942 for (i = 0; i != hrxq->ind_table->queues_n; ++i) { 943 int idx = hrxq->ind_table->queues[i]; 944 struct mlx5_rxq_ctrl *rxq_ctrl = 945 container_of((*priv->rxqs)[idx], 946 struct mlx5_rxq_ctrl, rxq); 947 948 /* 949 * To support metadata register copy on Tx loopback, 950 * this must be always enabled (metadata may arive 951 * from other port - not from local flows only. 952 */ 953 if (priv->config.dv_flow_en && 954 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 955 mlx5_flow_ext_mreg_supported(dev)) { 956 rxq_ctrl->rxq.mark = 1; 957 rxq_ctrl->flow_mark_n = 1; 958 } else if (mark) { 959 rxq_ctrl->rxq.mark = 1; 960 rxq_ctrl->flow_mark_n++; 961 } 962 if (tunnel) { 963 unsigned int j; 964 965 /* Increase the counter matching the flow. */ 966 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 967 if ((tunnels_info[j].tunnel & 968 dev_handle->layers) == 969 tunnels_info[j].tunnel) { 970 rxq_ctrl->flow_tunnels_n[j]++; 971 break; 972 } 973 } 974 flow_rxq_tunnel_ptype_update(rxq_ctrl); 975 } 976 } 977 } 978 979 /** 980 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow 981 * 982 * @param[in] dev 983 * Pointer to the Ethernet device structure. 984 * @param[in] flow 985 * Pointer to flow structure. 986 */ 987 static void 988 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow) 989 { 990 struct mlx5_priv *priv = dev->data->dev_private; 991 uint32_t handle_idx; 992 struct mlx5_flow_handle *dev_handle; 993 994 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 995 handle_idx, dev_handle, next) 996 flow_drv_rxq_flags_set(dev, dev_handle); 997 } 998 999 /** 1000 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 1001 * device flow if no other flow uses it with the same kind of request. 1002 * 1003 * @param dev 1004 * Pointer to Ethernet device. 1005 * @param[in] dev_handle 1006 * Pointer to the device flow handle structure. 1007 */ 1008 static void 1009 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, 1010 struct mlx5_flow_handle *dev_handle) 1011 { 1012 struct mlx5_priv *priv = dev->data->dev_private; 1013 const int mark = dev_handle->mark; 1014 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 1015 struct mlx5_hrxq *hrxq; 1016 unsigned int i; 1017 1018 if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) 1019 return; 1020 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 1021 dev_handle->rix_hrxq); 1022 if (!hrxq) 1023 return; 1024 MLX5_ASSERT(dev->data->dev_started); 1025 for (i = 0; i != hrxq->ind_table->queues_n; ++i) { 1026 int idx = hrxq->ind_table->queues[i]; 1027 struct mlx5_rxq_ctrl *rxq_ctrl = 1028 container_of((*priv->rxqs)[idx], 1029 struct mlx5_rxq_ctrl, rxq); 1030 1031 if (priv->config.dv_flow_en && 1032 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 1033 mlx5_flow_ext_mreg_supported(dev)) { 1034 rxq_ctrl->rxq.mark = 1; 1035 rxq_ctrl->flow_mark_n = 1; 1036 } else if (mark) { 1037 rxq_ctrl->flow_mark_n--; 1038 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n; 1039 } 1040 if (tunnel) { 1041 unsigned int j; 1042 1043 /* Decrease the counter matching the flow. */ 1044 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 1045 if ((tunnels_info[j].tunnel & 1046 dev_handle->layers) == 1047 tunnels_info[j].tunnel) { 1048 rxq_ctrl->flow_tunnels_n[j]--; 1049 break; 1050 } 1051 } 1052 flow_rxq_tunnel_ptype_update(rxq_ctrl); 1053 } 1054 } 1055 } 1056 1057 /** 1058 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 1059 * @p flow if no other flow uses it with the same kind of request. 1060 * 1061 * @param dev 1062 * Pointer to Ethernet device. 1063 * @param[in] flow 1064 * Pointer to the flow. 1065 */ 1066 static void 1067 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow) 1068 { 1069 struct mlx5_priv *priv = dev->data->dev_private; 1070 uint32_t handle_idx; 1071 struct mlx5_flow_handle *dev_handle; 1072 1073 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 1074 handle_idx, dev_handle, next) 1075 flow_drv_rxq_flags_trim(dev, dev_handle); 1076 } 1077 1078 /** 1079 * Clear the Mark/Flag and Tunnel ptype information in all Rx queues. 1080 * 1081 * @param dev 1082 * Pointer to Ethernet device. 1083 */ 1084 static void 1085 flow_rxq_flags_clear(struct rte_eth_dev *dev) 1086 { 1087 struct mlx5_priv *priv = dev->data->dev_private; 1088 unsigned int i; 1089 1090 for (i = 0; i != priv->rxqs_n; ++i) { 1091 struct mlx5_rxq_ctrl *rxq_ctrl; 1092 unsigned int j; 1093 1094 if (!(*priv->rxqs)[i]) 1095 continue; 1096 rxq_ctrl = container_of((*priv->rxqs)[i], 1097 struct mlx5_rxq_ctrl, rxq); 1098 rxq_ctrl->flow_mark_n = 0; 1099 rxq_ctrl->rxq.mark = 0; 1100 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) 1101 rxq_ctrl->flow_tunnels_n[j] = 0; 1102 rxq_ctrl->rxq.tunnel = 0; 1103 } 1104 } 1105 1106 /** 1107 * Set the Rx queue dynamic metadata (mask and offset) for a flow 1108 * 1109 * @param[in] dev 1110 * Pointer to the Ethernet device structure. 1111 */ 1112 void 1113 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev) 1114 { 1115 struct mlx5_priv *priv = dev->data->dev_private; 1116 struct mlx5_rxq_data *data; 1117 unsigned int i; 1118 1119 for (i = 0; i != priv->rxqs_n; ++i) { 1120 if (!(*priv->rxqs)[i]) 1121 continue; 1122 data = (*priv->rxqs)[i]; 1123 if (!rte_flow_dynf_metadata_avail()) { 1124 data->dynf_meta = 0; 1125 data->flow_meta_mask = 0; 1126 data->flow_meta_offset = -1; 1127 } else { 1128 data->dynf_meta = 1; 1129 data->flow_meta_mask = rte_flow_dynf_metadata_mask; 1130 data->flow_meta_offset = rte_flow_dynf_metadata_offs; 1131 } 1132 } 1133 } 1134 1135 /* 1136 * return a pointer to the desired action in the list of actions. 1137 * 1138 * @param[in] actions 1139 * The list of actions to search the action in. 1140 * @param[in] action 1141 * The action to find. 1142 * 1143 * @return 1144 * Pointer to the action in the list, if found. NULL otherwise. 1145 */ 1146 const struct rte_flow_action * 1147 mlx5_flow_find_action(const struct rte_flow_action *actions, 1148 enum rte_flow_action_type action) 1149 { 1150 if (actions == NULL) 1151 return NULL; 1152 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) 1153 if (actions->type == action) 1154 return actions; 1155 return NULL; 1156 } 1157 1158 /* 1159 * Validate the flag action. 1160 * 1161 * @param[in] action_flags 1162 * Bit-fields that holds the actions detected until now. 1163 * @param[in] attr 1164 * Attributes of flow that includes this action. 1165 * @param[out] error 1166 * Pointer to error structure. 1167 * 1168 * @return 1169 * 0 on success, a negative errno value otherwise and rte_errno is set. 1170 */ 1171 int 1172 mlx5_flow_validate_action_flag(uint64_t action_flags, 1173 const struct rte_flow_attr *attr, 1174 struct rte_flow_error *error) 1175 { 1176 if (action_flags & MLX5_FLOW_ACTION_MARK) 1177 return rte_flow_error_set(error, EINVAL, 1178 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1179 "can't mark and flag in same flow"); 1180 if (action_flags & MLX5_FLOW_ACTION_FLAG) 1181 return rte_flow_error_set(error, EINVAL, 1182 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1183 "can't have 2 flag" 1184 " actions in same flow"); 1185 if (attr->egress) 1186 return rte_flow_error_set(error, ENOTSUP, 1187 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1188 "flag action not supported for " 1189 "egress"); 1190 return 0; 1191 } 1192 1193 /* 1194 * Validate the mark action. 1195 * 1196 * @param[in] action 1197 * Pointer to the queue action. 1198 * @param[in] action_flags 1199 * Bit-fields that holds the actions detected until now. 1200 * @param[in] attr 1201 * Attributes of flow that includes this action. 1202 * @param[out] error 1203 * Pointer to error structure. 1204 * 1205 * @return 1206 * 0 on success, a negative errno value otherwise and rte_errno is set. 1207 */ 1208 int 1209 mlx5_flow_validate_action_mark(const struct rte_flow_action *action, 1210 uint64_t action_flags, 1211 const struct rte_flow_attr *attr, 1212 struct rte_flow_error *error) 1213 { 1214 const struct rte_flow_action_mark *mark = action->conf; 1215 1216 if (!mark) 1217 return rte_flow_error_set(error, EINVAL, 1218 RTE_FLOW_ERROR_TYPE_ACTION, 1219 action, 1220 "configuration cannot be null"); 1221 if (mark->id >= MLX5_FLOW_MARK_MAX) 1222 return rte_flow_error_set(error, EINVAL, 1223 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1224 &mark->id, 1225 "mark id must in 0 <= id < " 1226 RTE_STR(MLX5_FLOW_MARK_MAX)); 1227 if (action_flags & MLX5_FLOW_ACTION_FLAG) 1228 return rte_flow_error_set(error, EINVAL, 1229 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1230 "can't flag and mark in same flow"); 1231 if (action_flags & MLX5_FLOW_ACTION_MARK) 1232 return rte_flow_error_set(error, EINVAL, 1233 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1234 "can't have 2 mark actions in same" 1235 " flow"); 1236 if (attr->egress) 1237 return rte_flow_error_set(error, ENOTSUP, 1238 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1239 "mark action not supported for " 1240 "egress"); 1241 return 0; 1242 } 1243 1244 /* 1245 * Validate the drop action. 1246 * 1247 * @param[in] action_flags 1248 * Bit-fields that holds the actions detected until now. 1249 * @param[in] attr 1250 * Attributes of flow that includes this action. 1251 * @param[out] error 1252 * Pointer to error structure. 1253 * 1254 * @return 1255 * 0 on success, a negative errno value otherwise and rte_errno is set. 1256 */ 1257 int 1258 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused, 1259 const struct rte_flow_attr *attr, 1260 struct rte_flow_error *error) 1261 { 1262 if (attr->egress) 1263 return rte_flow_error_set(error, ENOTSUP, 1264 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1265 "drop action not supported for " 1266 "egress"); 1267 return 0; 1268 } 1269 1270 /* 1271 * Validate the queue action. 1272 * 1273 * @param[in] action 1274 * Pointer to the queue action. 1275 * @param[in] action_flags 1276 * Bit-fields that holds the actions detected until now. 1277 * @param[in] dev 1278 * Pointer to the Ethernet device structure. 1279 * @param[in] attr 1280 * Attributes of flow that includes this action. 1281 * @param[out] error 1282 * Pointer to error structure. 1283 * 1284 * @return 1285 * 0 on success, a negative errno value otherwise and rte_errno is set. 1286 */ 1287 int 1288 mlx5_flow_validate_action_queue(const struct rte_flow_action *action, 1289 uint64_t action_flags, 1290 struct rte_eth_dev *dev, 1291 const struct rte_flow_attr *attr, 1292 struct rte_flow_error *error) 1293 { 1294 struct mlx5_priv *priv = dev->data->dev_private; 1295 const struct rte_flow_action_queue *queue = action->conf; 1296 1297 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1298 return rte_flow_error_set(error, EINVAL, 1299 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1300 "can't have 2 fate actions in" 1301 " same flow"); 1302 if (!priv->rxqs_n) 1303 return rte_flow_error_set(error, EINVAL, 1304 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1305 NULL, "No Rx queues configured"); 1306 if (queue->index >= priv->rxqs_n) 1307 return rte_flow_error_set(error, EINVAL, 1308 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1309 &queue->index, 1310 "queue index out of range"); 1311 if (!(*priv->rxqs)[queue->index]) 1312 return rte_flow_error_set(error, EINVAL, 1313 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1314 &queue->index, 1315 "queue is not configured"); 1316 if (attr->egress) 1317 return rte_flow_error_set(error, ENOTSUP, 1318 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1319 "queue action not supported for " 1320 "egress"); 1321 return 0; 1322 } 1323 1324 /* 1325 * Validate the rss action. 1326 * 1327 * @param[in] action 1328 * Pointer to the queue action. 1329 * @param[in] action_flags 1330 * Bit-fields that holds the actions detected until now. 1331 * @param[in] dev 1332 * Pointer to the Ethernet device structure. 1333 * @param[in] attr 1334 * Attributes of flow that includes this action. 1335 * @param[in] item_flags 1336 * Items that were detected. 1337 * @param[out] error 1338 * Pointer to error structure. 1339 * 1340 * @return 1341 * 0 on success, a negative errno value otherwise and rte_errno is set. 1342 */ 1343 int 1344 mlx5_flow_validate_action_rss(const struct rte_flow_action *action, 1345 uint64_t action_flags, 1346 struct rte_eth_dev *dev, 1347 const struct rte_flow_attr *attr, 1348 uint64_t item_flags, 1349 struct rte_flow_error *error) 1350 { 1351 struct mlx5_priv *priv = dev->data->dev_private; 1352 const struct rte_flow_action_rss *rss = action->conf; 1353 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1354 unsigned int i; 1355 1356 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1357 return rte_flow_error_set(error, EINVAL, 1358 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1359 "can't have 2 fate actions" 1360 " in same flow"); 1361 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT && 1362 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) 1363 return rte_flow_error_set(error, ENOTSUP, 1364 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1365 &rss->func, 1366 "RSS hash function not supported"); 1367 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 1368 if (rss->level > 2) 1369 #else 1370 if (rss->level > 1) 1371 #endif 1372 return rte_flow_error_set(error, ENOTSUP, 1373 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1374 &rss->level, 1375 "tunnel RSS is not supported"); 1376 /* allow RSS key_len 0 in case of NULL (default) RSS key. */ 1377 if (rss->key_len == 0 && rss->key != NULL) 1378 return rte_flow_error_set(error, ENOTSUP, 1379 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1380 &rss->key_len, 1381 "RSS hash key length 0"); 1382 if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN) 1383 return rte_flow_error_set(error, ENOTSUP, 1384 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1385 &rss->key_len, 1386 "RSS hash key too small"); 1387 if (rss->key_len > MLX5_RSS_HASH_KEY_LEN) 1388 return rte_flow_error_set(error, ENOTSUP, 1389 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1390 &rss->key_len, 1391 "RSS hash key too large"); 1392 if (rss->queue_num > priv->config.ind_table_max_size) 1393 return rte_flow_error_set(error, ENOTSUP, 1394 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1395 &rss->queue_num, 1396 "number of queues too large"); 1397 if (rss->types & MLX5_RSS_HF_MASK) 1398 return rte_flow_error_set(error, ENOTSUP, 1399 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1400 &rss->types, 1401 "some RSS protocols are not" 1402 " supported"); 1403 if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) && 1404 !(rss->types & ETH_RSS_IP)) 1405 return rte_flow_error_set(error, EINVAL, 1406 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1407 "L3 partial RSS requested but L3 RSS" 1408 " type not specified"); 1409 if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) && 1410 !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP))) 1411 return rte_flow_error_set(error, EINVAL, 1412 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1413 "L4 partial RSS requested but L4 RSS" 1414 " type not specified"); 1415 if (!priv->rxqs_n) 1416 return rte_flow_error_set(error, EINVAL, 1417 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1418 NULL, "No Rx queues configured"); 1419 if (!rss->queue_num) 1420 return rte_flow_error_set(error, EINVAL, 1421 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1422 NULL, "No queues configured"); 1423 for (i = 0; i != rss->queue_num; ++i) { 1424 if (rss->queue[i] >= priv->rxqs_n) 1425 return rte_flow_error_set 1426 (error, EINVAL, 1427 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1428 &rss->queue[i], "queue index out of range"); 1429 if (!(*priv->rxqs)[rss->queue[i]]) 1430 return rte_flow_error_set 1431 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1432 &rss->queue[i], "queue is not configured"); 1433 } 1434 if (attr->egress) 1435 return rte_flow_error_set(error, ENOTSUP, 1436 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1437 "rss action not supported for " 1438 "egress"); 1439 if (rss->level > 1 && !tunnel) 1440 return rte_flow_error_set(error, EINVAL, 1441 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1442 "inner RSS is not supported for " 1443 "non-tunnel flows"); 1444 if ((item_flags & MLX5_FLOW_LAYER_ECPRI) && 1445 !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) { 1446 return rte_flow_error_set(error, EINVAL, 1447 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1448 "RSS on eCPRI is not supported now"); 1449 } 1450 return 0; 1451 } 1452 1453 /* 1454 * Validate the default miss action. 1455 * 1456 * @param[in] action_flags 1457 * Bit-fields that holds the actions detected until now. 1458 * @param[out] error 1459 * Pointer to error structure. 1460 * 1461 * @return 1462 * 0 on success, a negative errno value otherwise and rte_errno is set. 1463 */ 1464 int 1465 mlx5_flow_validate_action_default_miss(uint64_t action_flags, 1466 const struct rte_flow_attr *attr, 1467 struct rte_flow_error *error) 1468 { 1469 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1470 return rte_flow_error_set(error, EINVAL, 1471 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1472 "can't have 2 fate actions in" 1473 " same flow"); 1474 if (attr->egress) 1475 return rte_flow_error_set(error, ENOTSUP, 1476 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1477 "default miss action not supported " 1478 "for egress"); 1479 if (attr->group) 1480 return rte_flow_error_set(error, ENOTSUP, 1481 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL, 1482 "only group 0 is supported"); 1483 if (attr->transfer) 1484 return rte_flow_error_set(error, ENOTSUP, 1485 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1486 NULL, "transfer is not supported"); 1487 return 0; 1488 } 1489 1490 /* 1491 * Validate the count action. 1492 * 1493 * @param[in] dev 1494 * Pointer to the Ethernet device structure. 1495 * @param[in] attr 1496 * Attributes of flow that includes this action. 1497 * @param[out] error 1498 * Pointer to error structure. 1499 * 1500 * @return 1501 * 0 on success, a negative errno value otherwise and rte_errno is set. 1502 */ 1503 int 1504 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused, 1505 const struct rte_flow_attr *attr, 1506 struct rte_flow_error *error) 1507 { 1508 if (attr->egress) 1509 return rte_flow_error_set(error, ENOTSUP, 1510 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1511 "count action not supported for " 1512 "egress"); 1513 return 0; 1514 } 1515 1516 /** 1517 * Verify the @p attributes will be correctly understood by the NIC and store 1518 * them in the @p flow if everything is correct. 1519 * 1520 * @param[in] dev 1521 * Pointer to the Ethernet device structure. 1522 * @param[in] attributes 1523 * Pointer to flow attributes 1524 * @param[out] error 1525 * Pointer to error structure. 1526 * 1527 * @return 1528 * 0 on success, a negative errno value otherwise and rte_errno is set. 1529 */ 1530 int 1531 mlx5_flow_validate_attributes(struct rte_eth_dev *dev, 1532 const struct rte_flow_attr *attributes, 1533 struct rte_flow_error *error) 1534 { 1535 struct mlx5_priv *priv = dev->data->dev_private; 1536 uint32_t priority_max = priv->config.flow_prio - 1; 1537 1538 if (attributes->group) 1539 return rte_flow_error_set(error, ENOTSUP, 1540 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 1541 NULL, "groups is not supported"); 1542 if (attributes->priority != MLX5_FLOW_PRIO_RSVD && 1543 attributes->priority >= priority_max) 1544 return rte_flow_error_set(error, ENOTSUP, 1545 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 1546 NULL, "priority out of range"); 1547 if (attributes->egress) 1548 return rte_flow_error_set(error, ENOTSUP, 1549 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1550 "egress is not supported"); 1551 if (attributes->transfer && !priv->config.dv_esw_en) 1552 return rte_flow_error_set(error, ENOTSUP, 1553 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1554 NULL, "transfer is not supported"); 1555 if (!attributes->ingress) 1556 return rte_flow_error_set(error, EINVAL, 1557 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, 1558 NULL, 1559 "ingress attribute is mandatory"); 1560 return 0; 1561 } 1562 1563 /** 1564 * Validate ICMP6 item. 1565 * 1566 * @param[in] item 1567 * Item specification. 1568 * @param[in] item_flags 1569 * Bit-fields that holds the items detected until now. 1570 * @param[out] error 1571 * Pointer to error structure. 1572 * 1573 * @return 1574 * 0 on success, a negative errno value otherwise and rte_errno is set. 1575 */ 1576 int 1577 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item, 1578 uint64_t item_flags, 1579 uint8_t target_protocol, 1580 struct rte_flow_error *error) 1581 { 1582 const struct rte_flow_item_icmp6 *mask = item->mask; 1583 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1584 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : 1585 MLX5_FLOW_LAYER_OUTER_L3_IPV6; 1586 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1587 MLX5_FLOW_LAYER_OUTER_L4; 1588 int ret; 1589 1590 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6) 1591 return rte_flow_error_set(error, EINVAL, 1592 RTE_FLOW_ERROR_TYPE_ITEM, item, 1593 "protocol filtering not compatible" 1594 " with ICMP6 layer"); 1595 if (!(item_flags & l3m)) 1596 return rte_flow_error_set(error, EINVAL, 1597 RTE_FLOW_ERROR_TYPE_ITEM, item, 1598 "IPv6 is mandatory to filter on" 1599 " ICMP6"); 1600 if (item_flags & l4m) 1601 return rte_flow_error_set(error, EINVAL, 1602 RTE_FLOW_ERROR_TYPE_ITEM, item, 1603 "multiple L4 layers not supported"); 1604 if (!mask) 1605 mask = &rte_flow_item_icmp6_mask; 1606 ret = mlx5_flow_item_acceptable 1607 (item, (const uint8_t *)mask, 1608 (const uint8_t *)&rte_flow_item_icmp6_mask, 1609 sizeof(struct rte_flow_item_icmp6), 1610 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 1611 if (ret < 0) 1612 return ret; 1613 return 0; 1614 } 1615 1616 /** 1617 * Validate ICMP item. 1618 * 1619 * @param[in] item 1620 * Item specification. 1621 * @param[in] item_flags 1622 * Bit-fields that holds the items detected until now. 1623 * @param[out] error 1624 * Pointer to error structure. 1625 * 1626 * @return 1627 * 0 on success, a negative errno value otherwise and rte_errno is set. 1628 */ 1629 int 1630 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item, 1631 uint64_t item_flags, 1632 uint8_t target_protocol, 1633 struct rte_flow_error *error) 1634 { 1635 const struct rte_flow_item_icmp *mask = item->mask; 1636 const struct rte_flow_item_icmp nic_mask = { 1637 .hdr.icmp_type = 0xff, 1638 .hdr.icmp_code = 0xff, 1639 .hdr.icmp_ident = RTE_BE16(0xffff), 1640 .hdr.icmp_seq_nb = RTE_BE16(0xffff), 1641 }; 1642 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1643 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : 1644 MLX5_FLOW_LAYER_OUTER_L3_IPV4; 1645 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1646 MLX5_FLOW_LAYER_OUTER_L4; 1647 int ret; 1648 1649 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP) 1650 return rte_flow_error_set(error, EINVAL, 1651 RTE_FLOW_ERROR_TYPE_ITEM, item, 1652 "protocol filtering not compatible" 1653 " with ICMP layer"); 1654 if (!(item_flags & l3m)) 1655 return rte_flow_error_set(error, EINVAL, 1656 RTE_FLOW_ERROR_TYPE_ITEM, item, 1657 "IPv4 is mandatory to filter" 1658 " on ICMP"); 1659 if (item_flags & l4m) 1660 return rte_flow_error_set(error, EINVAL, 1661 RTE_FLOW_ERROR_TYPE_ITEM, item, 1662 "multiple L4 layers not supported"); 1663 if (!mask) 1664 mask = &nic_mask; 1665 ret = mlx5_flow_item_acceptable 1666 (item, (const uint8_t *)mask, 1667 (const uint8_t *)&nic_mask, 1668 sizeof(struct rte_flow_item_icmp), 1669 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 1670 if (ret < 0) 1671 return ret; 1672 return 0; 1673 } 1674 1675 /** 1676 * Validate Ethernet item. 1677 * 1678 * @param[in] item 1679 * Item specification. 1680 * @param[in] item_flags 1681 * Bit-fields that holds the items detected until now. 1682 * @param[out] error 1683 * Pointer to error structure. 1684 * 1685 * @return 1686 * 0 on success, a negative errno value otherwise and rte_errno is set. 1687 */ 1688 int 1689 mlx5_flow_validate_item_eth(const struct rte_flow_item *item, 1690 uint64_t item_flags, 1691 struct rte_flow_error *error) 1692 { 1693 const struct rte_flow_item_eth *mask = item->mask; 1694 const struct rte_flow_item_eth nic_mask = { 1695 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1696 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1697 .type = RTE_BE16(0xffff), 1698 }; 1699 int ret; 1700 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1701 const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 1702 MLX5_FLOW_LAYER_OUTER_L2; 1703 1704 if (item_flags & ethm) 1705 return rte_flow_error_set(error, ENOTSUP, 1706 RTE_FLOW_ERROR_TYPE_ITEM, item, 1707 "multiple L2 layers not supported"); 1708 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) || 1709 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3))) 1710 return rte_flow_error_set(error, EINVAL, 1711 RTE_FLOW_ERROR_TYPE_ITEM, item, 1712 "L2 layer should not follow " 1713 "L3 layers"); 1714 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) || 1715 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN))) 1716 return rte_flow_error_set(error, EINVAL, 1717 RTE_FLOW_ERROR_TYPE_ITEM, item, 1718 "L2 layer should not follow VLAN"); 1719 if (!mask) 1720 mask = &rte_flow_item_eth_mask; 1721 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1722 (const uint8_t *)&nic_mask, 1723 sizeof(struct rte_flow_item_eth), 1724 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 1725 return ret; 1726 } 1727 1728 /** 1729 * Validate VLAN item. 1730 * 1731 * @param[in] item 1732 * Item specification. 1733 * @param[in] item_flags 1734 * Bit-fields that holds the items detected until now. 1735 * @param[in] dev 1736 * Ethernet device flow is being created on. 1737 * @param[out] error 1738 * Pointer to error structure. 1739 * 1740 * @return 1741 * 0 on success, a negative errno value otherwise and rte_errno is set. 1742 */ 1743 int 1744 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item, 1745 uint64_t item_flags, 1746 struct rte_eth_dev *dev, 1747 struct rte_flow_error *error) 1748 { 1749 const struct rte_flow_item_vlan *spec = item->spec; 1750 const struct rte_flow_item_vlan *mask = item->mask; 1751 const struct rte_flow_item_vlan nic_mask = { 1752 .tci = RTE_BE16(UINT16_MAX), 1753 .inner_type = RTE_BE16(UINT16_MAX), 1754 }; 1755 uint16_t vlan_tag = 0; 1756 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1757 int ret; 1758 const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | 1759 MLX5_FLOW_LAYER_INNER_L4) : 1760 (MLX5_FLOW_LAYER_OUTER_L3 | 1761 MLX5_FLOW_LAYER_OUTER_L4); 1762 const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : 1763 MLX5_FLOW_LAYER_OUTER_VLAN; 1764 1765 if (item_flags & vlanm) 1766 return rte_flow_error_set(error, EINVAL, 1767 RTE_FLOW_ERROR_TYPE_ITEM, item, 1768 "multiple VLAN layers not supported"); 1769 else if ((item_flags & l34m) != 0) 1770 return rte_flow_error_set(error, EINVAL, 1771 RTE_FLOW_ERROR_TYPE_ITEM, item, 1772 "VLAN cannot follow L3/L4 layer"); 1773 if (!mask) 1774 mask = &rte_flow_item_vlan_mask; 1775 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1776 (const uint8_t *)&nic_mask, 1777 sizeof(struct rte_flow_item_vlan), 1778 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 1779 if (ret) 1780 return ret; 1781 if (!tunnel && mask->tci != RTE_BE16(0x0fff)) { 1782 struct mlx5_priv *priv = dev->data->dev_private; 1783 1784 if (priv->vmwa_context) { 1785 /* 1786 * Non-NULL context means we have a virtual machine 1787 * and SR-IOV enabled, we have to create VLAN interface 1788 * to make hypervisor to setup E-Switch vport 1789 * context correctly. We avoid creating the multiple 1790 * VLAN interfaces, so we cannot support VLAN tag mask. 1791 */ 1792 return rte_flow_error_set(error, EINVAL, 1793 RTE_FLOW_ERROR_TYPE_ITEM, 1794 item, 1795 "VLAN tag mask is not" 1796 " supported in virtual" 1797 " environment"); 1798 } 1799 } 1800 if (spec) { 1801 vlan_tag = spec->tci; 1802 vlan_tag &= mask->tci; 1803 } 1804 /* 1805 * From verbs perspective an empty VLAN is equivalent 1806 * to a packet without VLAN layer. 1807 */ 1808 if (!vlan_tag) 1809 return rte_flow_error_set(error, EINVAL, 1810 RTE_FLOW_ERROR_TYPE_ITEM_SPEC, 1811 item->spec, 1812 "VLAN cannot be empty"); 1813 return 0; 1814 } 1815 1816 /** 1817 * Validate IPV4 item. 1818 * 1819 * @param[in] item 1820 * Item specification. 1821 * @param[in] item_flags 1822 * Bit-fields that holds the items detected until now. 1823 * @param[in] last_item 1824 * Previous validated item in the pattern items. 1825 * @param[in] ether_type 1826 * Type in the ethernet layer header (including dot1q). 1827 * @param[in] acc_mask 1828 * Acceptable mask, if NULL default internal default mask 1829 * will be used to check whether item fields are supported. 1830 * @param[in] range_accepted 1831 * True if range of values is accepted for specific fields, false otherwise. 1832 * @param[out] error 1833 * Pointer to error structure. 1834 * 1835 * @return 1836 * 0 on success, a negative errno value otherwise and rte_errno is set. 1837 */ 1838 int 1839 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, 1840 uint64_t item_flags, 1841 uint64_t last_item, 1842 uint16_t ether_type, 1843 const struct rte_flow_item_ipv4 *acc_mask, 1844 bool range_accepted, 1845 struct rte_flow_error *error) 1846 { 1847 const struct rte_flow_item_ipv4 *mask = item->mask; 1848 const struct rte_flow_item_ipv4 *spec = item->spec; 1849 const struct rte_flow_item_ipv4 nic_mask = { 1850 .hdr = { 1851 .src_addr = RTE_BE32(0xffffffff), 1852 .dst_addr = RTE_BE32(0xffffffff), 1853 .type_of_service = 0xff, 1854 .next_proto_id = 0xff, 1855 }, 1856 }; 1857 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1858 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1859 MLX5_FLOW_LAYER_OUTER_L3; 1860 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1861 MLX5_FLOW_LAYER_OUTER_L4; 1862 int ret; 1863 uint8_t next_proto = 0xFF; 1864 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 1865 MLX5_FLOW_LAYER_OUTER_VLAN | 1866 MLX5_FLOW_LAYER_INNER_VLAN); 1867 1868 if ((last_item & l2_vlan) && ether_type && 1869 ether_type != RTE_ETHER_TYPE_IPV4) 1870 return rte_flow_error_set(error, EINVAL, 1871 RTE_FLOW_ERROR_TYPE_ITEM, item, 1872 "IPv4 cannot follow L2/VLAN layer " 1873 "which ether type is not IPv4"); 1874 if (item_flags & MLX5_FLOW_LAYER_IPIP) { 1875 if (mask && spec) 1876 next_proto = mask->hdr.next_proto_id & 1877 spec->hdr.next_proto_id; 1878 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 1879 return rte_flow_error_set(error, EINVAL, 1880 RTE_FLOW_ERROR_TYPE_ITEM, 1881 item, 1882 "multiple tunnel " 1883 "not supported"); 1884 } 1885 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) 1886 return rte_flow_error_set(error, EINVAL, 1887 RTE_FLOW_ERROR_TYPE_ITEM, item, 1888 "wrong tunnel type - IPv6 specified " 1889 "but IPv4 item provided"); 1890 if (item_flags & l3m) 1891 return rte_flow_error_set(error, ENOTSUP, 1892 RTE_FLOW_ERROR_TYPE_ITEM, item, 1893 "multiple L3 layers not supported"); 1894 else if (item_flags & l4m) 1895 return rte_flow_error_set(error, EINVAL, 1896 RTE_FLOW_ERROR_TYPE_ITEM, item, 1897 "L3 cannot follow an L4 layer."); 1898 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 1899 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 1900 return rte_flow_error_set(error, EINVAL, 1901 RTE_FLOW_ERROR_TYPE_ITEM, item, 1902 "L3 cannot follow an NVGRE layer."); 1903 if (!mask) 1904 mask = &rte_flow_item_ipv4_mask; 1905 else if (mask->hdr.next_proto_id != 0 && 1906 mask->hdr.next_proto_id != 0xff) 1907 return rte_flow_error_set(error, EINVAL, 1908 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 1909 "partial mask is not supported" 1910 " for protocol"); 1911 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1912 acc_mask ? (const uint8_t *)acc_mask 1913 : (const uint8_t *)&nic_mask, 1914 sizeof(struct rte_flow_item_ipv4), 1915 range_accepted, error); 1916 if (ret < 0) 1917 return ret; 1918 return 0; 1919 } 1920 1921 /** 1922 * Validate IPV6 item. 1923 * 1924 * @param[in] item 1925 * Item specification. 1926 * @param[in] item_flags 1927 * Bit-fields that holds the items detected until now. 1928 * @param[in] last_item 1929 * Previous validated item in the pattern items. 1930 * @param[in] ether_type 1931 * Type in the ethernet layer header (including dot1q). 1932 * @param[in] acc_mask 1933 * Acceptable mask, if NULL default internal default mask 1934 * will be used to check whether item fields are supported. 1935 * @param[out] error 1936 * Pointer to error structure. 1937 * 1938 * @return 1939 * 0 on success, a negative errno value otherwise and rte_errno is set. 1940 */ 1941 int 1942 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, 1943 uint64_t item_flags, 1944 uint64_t last_item, 1945 uint16_t ether_type, 1946 const struct rte_flow_item_ipv6 *acc_mask, 1947 struct rte_flow_error *error) 1948 { 1949 const struct rte_flow_item_ipv6 *mask = item->mask; 1950 const struct rte_flow_item_ipv6 *spec = item->spec; 1951 const struct rte_flow_item_ipv6 nic_mask = { 1952 .hdr = { 1953 .src_addr = 1954 "\xff\xff\xff\xff\xff\xff\xff\xff" 1955 "\xff\xff\xff\xff\xff\xff\xff\xff", 1956 .dst_addr = 1957 "\xff\xff\xff\xff\xff\xff\xff\xff" 1958 "\xff\xff\xff\xff\xff\xff\xff\xff", 1959 .vtc_flow = RTE_BE32(0xffffffff), 1960 .proto = 0xff, 1961 }, 1962 }; 1963 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1964 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1965 MLX5_FLOW_LAYER_OUTER_L3; 1966 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1967 MLX5_FLOW_LAYER_OUTER_L4; 1968 int ret; 1969 uint8_t next_proto = 0xFF; 1970 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 1971 MLX5_FLOW_LAYER_OUTER_VLAN | 1972 MLX5_FLOW_LAYER_INNER_VLAN); 1973 1974 if ((last_item & l2_vlan) && ether_type && 1975 ether_type != RTE_ETHER_TYPE_IPV6) 1976 return rte_flow_error_set(error, EINVAL, 1977 RTE_FLOW_ERROR_TYPE_ITEM, item, 1978 "IPv6 cannot follow L2/VLAN layer " 1979 "which ether type is not IPv6"); 1980 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) { 1981 if (mask && spec) 1982 next_proto = mask->hdr.proto & spec->hdr.proto; 1983 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 1984 return rte_flow_error_set(error, EINVAL, 1985 RTE_FLOW_ERROR_TYPE_ITEM, 1986 item, 1987 "multiple tunnel " 1988 "not supported"); 1989 } 1990 if (item_flags & MLX5_FLOW_LAYER_IPIP) 1991 return rte_flow_error_set(error, EINVAL, 1992 RTE_FLOW_ERROR_TYPE_ITEM, item, 1993 "wrong tunnel type - IPv4 specified " 1994 "but IPv6 item provided"); 1995 if (item_flags & l3m) 1996 return rte_flow_error_set(error, ENOTSUP, 1997 RTE_FLOW_ERROR_TYPE_ITEM, item, 1998 "multiple L3 layers not supported"); 1999 else if (item_flags & l4m) 2000 return rte_flow_error_set(error, EINVAL, 2001 RTE_FLOW_ERROR_TYPE_ITEM, item, 2002 "L3 cannot follow an L4 layer."); 2003 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 2004 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 2005 return rte_flow_error_set(error, EINVAL, 2006 RTE_FLOW_ERROR_TYPE_ITEM, item, 2007 "L3 cannot follow an NVGRE layer."); 2008 if (!mask) 2009 mask = &rte_flow_item_ipv6_mask; 2010 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 2011 acc_mask ? (const uint8_t *)acc_mask 2012 : (const uint8_t *)&nic_mask, 2013 sizeof(struct rte_flow_item_ipv6), 2014 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2015 if (ret < 0) 2016 return ret; 2017 return 0; 2018 } 2019 2020 /** 2021 * Validate UDP item. 2022 * 2023 * @param[in] item 2024 * Item specification. 2025 * @param[in] item_flags 2026 * Bit-fields that holds the items detected until now. 2027 * @param[in] target_protocol 2028 * The next protocol in the previous item. 2029 * @param[in] flow_mask 2030 * mlx5 flow-specific (DV, verbs, etc.) supported header fields mask. 2031 * @param[out] error 2032 * Pointer to error structure. 2033 * 2034 * @return 2035 * 0 on success, a negative errno value otherwise and rte_errno is set. 2036 */ 2037 int 2038 mlx5_flow_validate_item_udp(const struct rte_flow_item *item, 2039 uint64_t item_flags, 2040 uint8_t target_protocol, 2041 struct rte_flow_error *error) 2042 { 2043 const struct rte_flow_item_udp *mask = item->mask; 2044 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 2045 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 2046 MLX5_FLOW_LAYER_OUTER_L3; 2047 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 2048 MLX5_FLOW_LAYER_OUTER_L4; 2049 int ret; 2050 2051 if (target_protocol != 0xff && target_protocol != IPPROTO_UDP) 2052 return rte_flow_error_set(error, EINVAL, 2053 RTE_FLOW_ERROR_TYPE_ITEM, item, 2054 "protocol filtering not compatible" 2055 " with UDP layer"); 2056 if (!(item_flags & l3m)) 2057 return rte_flow_error_set(error, EINVAL, 2058 RTE_FLOW_ERROR_TYPE_ITEM, item, 2059 "L3 is mandatory to filter on L4"); 2060 if (item_flags & l4m) 2061 return rte_flow_error_set(error, EINVAL, 2062 RTE_FLOW_ERROR_TYPE_ITEM, item, 2063 "multiple L4 layers not supported"); 2064 if (!mask) 2065 mask = &rte_flow_item_udp_mask; 2066 ret = mlx5_flow_item_acceptable 2067 (item, (const uint8_t *)mask, 2068 (const uint8_t *)&rte_flow_item_udp_mask, 2069 sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED, 2070 error); 2071 if (ret < 0) 2072 return ret; 2073 return 0; 2074 } 2075 2076 /** 2077 * Validate TCP item. 2078 * 2079 * @param[in] item 2080 * Item specification. 2081 * @param[in] item_flags 2082 * Bit-fields that holds the items detected until now. 2083 * @param[in] target_protocol 2084 * The next protocol in the previous item. 2085 * @param[out] error 2086 * Pointer to error structure. 2087 * 2088 * @return 2089 * 0 on success, a negative errno value otherwise and rte_errno is set. 2090 */ 2091 int 2092 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item, 2093 uint64_t item_flags, 2094 uint8_t target_protocol, 2095 const struct rte_flow_item_tcp *flow_mask, 2096 struct rte_flow_error *error) 2097 { 2098 const struct rte_flow_item_tcp *mask = item->mask; 2099 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 2100 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 2101 MLX5_FLOW_LAYER_OUTER_L3; 2102 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 2103 MLX5_FLOW_LAYER_OUTER_L4; 2104 int ret; 2105 2106 MLX5_ASSERT(flow_mask); 2107 if (target_protocol != 0xff && target_protocol != IPPROTO_TCP) 2108 return rte_flow_error_set(error, EINVAL, 2109 RTE_FLOW_ERROR_TYPE_ITEM, item, 2110 "protocol filtering not compatible" 2111 " with TCP layer"); 2112 if (!(item_flags & l3m)) 2113 return rte_flow_error_set(error, EINVAL, 2114 RTE_FLOW_ERROR_TYPE_ITEM, item, 2115 "L3 is mandatory to filter on L4"); 2116 if (item_flags & l4m) 2117 return rte_flow_error_set(error, EINVAL, 2118 RTE_FLOW_ERROR_TYPE_ITEM, item, 2119 "multiple L4 layers not supported"); 2120 if (!mask) 2121 mask = &rte_flow_item_tcp_mask; 2122 ret = mlx5_flow_item_acceptable 2123 (item, (const uint8_t *)mask, 2124 (const uint8_t *)flow_mask, 2125 sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED, 2126 error); 2127 if (ret < 0) 2128 return ret; 2129 return 0; 2130 } 2131 2132 /** 2133 * Validate VXLAN item. 2134 * 2135 * @param[in] item 2136 * Item specification. 2137 * @param[in] item_flags 2138 * Bit-fields that holds the items detected until now. 2139 * @param[in] target_protocol 2140 * The next protocol in the previous item. 2141 * @param[out] error 2142 * Pointer to error structure. 2143 * 2144 * @return 2145 * 0 on success, a negative errno value otherwise and rte_errno is set. 2146 */ 2147 int 2148 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item, 2149 uint64_t item_flags, 2150 struct rte_flow_error *error) 2151 { 2152 const struct rte_flow_item_vxlan *spec = item->spec; 2153 const struct rte_flow_item_vxlan *mask = item->mask; 2154 int ret; 2155 union vni { 2156 uint32_t vlan_id; 2157 uint8_t vni[4]; 2158 } id = { .vlan_id = 0, }; 2159 2160 2161 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2162 return rte_flow_error_set(error, ENOTSUP, 2163 RTE_FLOW_ERROR_TYPE_ITEM, item, 2164 "multiple tunnel layers not" 2165 " supported"); 2166 /* 2167 * Verify only UDPv4 is present as defined in 2168 * https://tools.ietf.org/html/rfc7348 2169 */ 2170 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2171 return rte_flow_error_set(error, EINVAL, 2172 RTE_FLOW_ERROR_TYPE_ITEM, item, 2173 "no outer UDP layer found"); 2174 if (!mask) 2175 mask = &rte_flow_item_vxlan_mask; 2176 ret = mlx5_flow_item_acceptable 2177 (item, (const uint8_t *)mask, 2178 (const uint8_t *)&rte_flow_item_vxlan_mask, 2179 sizeof(struct rte_flow_item_vxlan), 2180 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2181 if (ret < 0) 2182 return ret; 2183 if (spec) { 2184 memcpy(&id.vni[1], spec->vni, 3); 2185 memcpy(&id.vni[1], mask->vni, 3); 2186 } 2187 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2188 return rte_flow_error_set(error, ENOTSUP, 2189 RTE_FLOW_ERROR_TYPE_ITEM, item, 2190 "VXLAN tunnel must be fully defined"); 2191 return 0; 2192 } 2193 2194 /** 2195 * Validate VXLAN_GPE item. 2196 * 2197 * @param[in] item 2198 * Item specification. 2199 * @param[in] item_flags 2200 * Bit-fields that holds the items detected until now. 2201 * @param[in] priv 2202 * Pointer to the private data structure. 2203 * @param[in] target_protocol 2204 * The next protocol in the previous item. 2205 * @param[out] error 2206 * Pointer to error structure. 2207 * 2208 * @return 2209 * 0 on success, a negative errno value otherwise and rte_errno is set. 2210 */ 2211 int 2212 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item, 2213 uint64_t item_flags, 2214 struct rte_eth_dev *dev, 2215 struct rte_flow_error *error) 2216 { 2217 struct mlx5_priv *priv = dev->data->dev_private; 2218 const struct rte_flow_item_vxlan_gpe *spec = item->spec; 2219 const struct rte_flow_item_vxlan_gpe *mask = item->mask; 2220 int ret; 2221 union vni { 2222 uint32_t vlan_id; 2223 uint8_t vni[4]; 2224 } id = { .vlan_id = 0, }; 2225 2226 if (!priv->config.l3_vxlan_en) 2227 return rte_flow_error_set(error, ENOTSUP, 2228 RTE_FLOW_ERROR_TYPE_ITEM, item, 2229 "L3 VXLAN is not enabled by device" 2230 " parameter and/or not configured in" 2231 " firmware"); 2232 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2233 return rte_flow_error_set(error, ENOTSUP, 2234 RTE_FLOW_ERROR_TYPE_ITEM, item, 2235 "multiple tunnel layers not" 2236 " supported"); 2237 /* 2238 * Verify only UDPv4 is present as defined in 2239 * https://tools.ietf.org/html/rfc7348 2240 */ 2241 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2242 return rte_flow_error_set(error, EINVAL, 2243 RTE_FLOW_ERROR_TYPE_ITEM, item, 2244 "no outer UDP layer found"); 2245 if (!mask) 2246 mask = &rte_flow_item_vxlan_gpe_mask; 2247 ret = mlx5_flow_item_acceptable 2248 (item, (const uint8_t *)mask, 2249 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask, 2250 sizeof(struct rte_flow_item_vxlan_gpe), 2251 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2252 if (ret < 0) 2253 return ret; 2254 if (spec) { 2255 if (spec->protocol) 2256 return rte_flow_error_set(error, ENOTSUP, 2257 RTE_FLOW_ERROR_TYPE_ITEM, 2258 item, 2259 "VxLAN-GPE protocol" 2260 " not supported"); 2261 memcpy(&id.vni[1], spec->vni, 3); 2262 memcpy(&id.vni[1], mask->vni, 3); 2263 } 2264 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2265 return rte_flow_error_set(error, ENOTSUP, 2266 RTE_FLOW_ERROR_TYPE_ITEM, item, 2267 "VXLAN-GPE tunnel must be fully" 2268 " defined"); 2269 return 0; 2270 } 2271 /** 2272 * Validate GRE Key item. 2273 * 2274 * @param[in] item 2275 * Item specification. 2276 * @param[in] item_flags 2277 * Bit flags to mark detected items. 2278 * @param[in] gre_item 2279 * Pointer to gre_item 2280 * @param[out] error 2281 * Pointer to error structure. 2282 * 2283 * @return 2284 * 0 on success, a negative errno value otherwise and rte_errno is set. 2285 */ 2286 int 2287 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item, 2288 uint64_t item_flags, 2289 const struct rte_flow_item *gre_item, 2290 struct rte_flow_error *error) 2291 { 2292 const rte_be32_t *mask = item->mask; 2293 int ret = 0; 2294 rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX); 2295 const struct rte_flow_item_gre *gre_spec; 2296 const struct rte_flow_item_gre *gre_mask; 2297 2298 if (item_flags & MLX5_FLOW_LAYER_GRE_KEY) 2299 return rte_flow_error_set(error, ENOTSUP, 2300 RTE_FLOW_ERROR_TYPE_ITEM, item, 2301 "Multiple GRE key not support"); 2302 if (!(item_flags & MLX5_FLOW_LAYER_GRE)) 2303 return rte_flow_error_set(error, ENOTSUP, 2304 RTE_FLOW_ERROR_TYPE_ITEM, item, 2305 "No preceding GRE header"); 2306 if (item_flags & MLX5_FLOW_LAYER_INNER) 2307 return rte_flow_error_set(error, ENOTSUP, 2308 RTE_FLOW_ERROR_TYPE_ITEM, item, 2309 "GRE key following a wrong item"); 2310 gre_mask = gre_item->mask; 2311 if (!gre_mask) 2312 gre_mask = &rte_flow_item_gre_mask; 2313 gre_spec = gre_item->spec; 2314 if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) && 2315 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000))) 2316 return rte_flow_error_set(error, EINVAL, 2317 RTE_FLOW_ERROR_TYPE_ITEM, item, 2318 "Key bit must be on"); 2319 2320 if (!mask) 2321 mask = &gre_key_default_mask; 2322 ret = mlx5_flow_item_acceptable 2323 (item, (const uint8_t *)mask, 2324 (const uint8_t *)&gre_key_default_mask, 2325 sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2326 return ret; 2327 } 2328 2329 /** 2330 * Validate GRE item. 2331 * 2332 * @param[in] item 2333 * Item specification. 2334 * @param[in] item_flags 2335 * Bit flags to mark detected items. 2336 * @param[in] target_protocol 2337 * The next protocol in the previous item. 2338 * @param[out] error 2339 * Pointer to error structure. 2340 * 2341 * @return 2342 * 0 on success, a negative errno value otherwise and rte_errno is set. 2343 */ 2344 int 2345 mlx5_flow_validate_item_gre(const struct rte_flow_item *item, 2346 uint64_t item_flags, 2347 uint8_t target_protocol, 2348 struct rte_flow_error *error) 2349 { 2350 const struct rte_flow_item_gre *spec __rte_unused = item->spec; 2351 const struct rte_flow_item_gre *mask = item->mask; 2352 int ret; 2353 const struct rte_flow_item_gre nic_mask = { 2354 .c_rsvd0_ver = RTE_BE16(0xB000), 2355 .protocol = RTE_BE16(UINT16_MAX), 2356 }; 2357 2358 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2359 return rte_flow_error_set(error, EINVAL, 2360 RTE_FLOW_ERROR_TYPE_ITEM, item, 2361 "protocol filtering not compatible" 2362 " with this GRE layer"); 2363 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2364 return rte_flow_error_set(error, ENOTSUP, 2365 RTE_FLOW_ERROR_TYPE_ITEM, item, 2366 "multiple tunnel layers not" 2367 " supported"); 2368 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2369 return rte_flow_error_set(error, ENOTSUP, 2370 RTE_FLOW_ERROR_TYPE_ITEM, item, 2371 "L3 Layer is missing"); 2372 if (!mask) 2373 mask = &rte_flow_item_gre_mask; 2374 ret = mlx5_flow_item_acceptable 2375 (item, (const uint8_t *)mask, 2376 (const uint8_t *)&nic_mask, 2377 sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED, 2378 error); 2379 if (ret < 0) 2380 return ret; 2381 #ifndef HAVE_MLX5DV_DR 2382 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT 2383 if (spec && (spec->protocol & mask->protocol)) 2384 return rte_flow_error_set(error, ENOTSUP, 2385 RTE_FLOW_ERROR_TYPE_ITEM, item, 2386 "without MPLS support the" 2387 " specification cannot be used for" 2388 " filtering"); 2389 #endif 2390 #endif 2391 return 0; 2392 } 2393 2394 /** 2395 * Validate Geneve item. 2396 * 2397 * @param[in] item 2398 * Item specification. 2399 * @param[in] itemFlags 2400 * Bit-fields that holds the items detected until now. 2401 * @param[in] enPriv 2402 * Pointer to the private data structure. 2403 * @param[out] error 2404 * Pointer to error structure. 2405 * 2406 * @return 2407 * 0 on success, a negative errno value otherwise and rte_errno is set. 2408 */ 2409 2410 int 2411 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item, 2412 uint64_t item_flags, 2413 struct rte_eth_dev *dev, 2414 struct rte_flow_error *error) 2415 { 2416 struct mlx5_priv *priv = dev->data->dev_private; 2417 const struct rte_flow_item_geneve *spec = item->spec; 2418 const struct rte_flow_item_geneve *mask = item->mask; 2419 int ret; 2420 uint16_t gbhdr; 2421 uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ? 2422 MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0; 2423 const struct rte_flow_item_geneve nic_mask = { 2424 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80), 2425 .vni = "\xff\xff\xff", 2426 .protocol = RTE_BE16(UINT16_MAX), 2427 }; 2428 2429 if (!priv->config.hca_attr.tunnel_stateless_geneve_rx) 2430 return rte_flow_error_set(error, ENOTSUP, 2431 RTE_FLOW_ERROR_TYPE_ITEM, item, 2432 "L3 Geneve is not enabled by device" 2433 " parameter and/or not configured in" 2434 " firmware"); 2435 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2436 return rte_flow_error_set(error, ENOTSUP, 2437 RTE_FLOW_ERROR_TYPE_ITEM, item, 2438 "multiple tunnel layers not" 2439 " supported"); 2440 /* 2441 * Verify only UDPv4 is present as defined in 2442 * https://tools.ietf.org/html/rfc7348 2443 */ 2444 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2445 return rte_flow_error_set(error, EINVAL, 2446 RTE_FLOW_ERROR_TYPE_ITEM, item, 2447 "no outer UDP layer found"); 2448 if (!mask) 2449 mask = &rte_flow_item_geneve_mask; 2450 ret = mlx5_flow_item_acceptable 2451 (item, (const uint8_t *)mask, 2452 (const uint8_t *)&nic_mask, 2453 sizeof(struct rte_flow_item_geneve), 2454 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2455 if (ret) 2456 return ret; 2457 if (spec) { 2458 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0); 2459 if (MLX5_GENEVE_VER_VAL(gbhdr) || 2460 MLX5_GENEVE_CRITO_VAL(gbhdr) || 2461 MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1) 2462 return rte_flow_error_set(error, ENOTSUP, 2463 RTE_FLOW_ERROR_TYPE_ITEM, 2464 item, 2465 "Geneve protocol unsupported" 2466 " fields are being used"); 2467 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len) 2468 return rte_flow_error_set 2469 (error, ENOTSUP, 2470 RTE_FLOW_ERROR_TYPE_ITEM, 2471 item, 2472 "Unsupported Geneve options length"); 2473 } 2474 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2475 return rte_flow_error_set 2476 (error, ENOTSUP, 2477 RTE_FLOW_ERROR_TYPE_ITEM, item, 2478 "Geneve tunnel must be fully defined"); 2479 return 0; 2480 } 2481 2482 /** 2483 * Validate MPLS item. 2484 * 2485 * @param[in] dev 2486 * Pointer to the rte_eth_dev structure. 2487 * @param[in] item 2488 * Item specification. 2489 * @param[in] item_flags 2490 * Bit-fields that holds the items detected until now. 2491 * @param[in] prev_layer 2492 * The protocol layer indicated in previous item. 2493 * @param[out] error 2494 * Pointer to error structure. 2495 * 2496 * @return 2497 * 0 on success, a negative errno value otherwise and rte_errno is set. 2498 */ 2499 int 2500 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused, 2501 const struct rte_flow_item *item __rte_unused, 2502 uint64_t item_flags __rte_unused, 2503 uint64_t prev_layer __rte_unused, 2504 struct rte_flow_error *error) 2505 { 2506 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 2507 const struct rte_flow_item_mpls *mask = item->mask; 2508 struct mlx5_priv *priv = dev->data->dev_private; 2509 int ret; 2510 2511 if (!priv->config.mpls_en) 2512 return rte_flow_error_set(error, ENOTSUP, 2513 RTE_FLOW_ERROR_TYPE_ITEM, item, 2514 "MPLS not supported or" 2515 " disabled in firmware" 2516 " configuration."); 2517 /* MPLS over IP, UDP, GRE is allowed */ 2518 if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 | 2519 MLX5_FLOW_LAYER_OUTER_L4_UDP | 2520 MLX5_FLOW_LAYER_GRE))) 2521 return rte_flow_error_set(error, EINVAL, 2522 RTE_FLOW_ERROR_TYPE_ITEM, item, 2523 "protocol filtering not compatible" 2524 " with MPLS layer"); 2525 /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */ 2526 if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) && 2527 !(item_flags & MLX5_FLOW_LAYER_GRE)) 2528 return rte_flow_error_set(error, ENOTSUP, 2529 RTE_FLOW_ERROR_TYPE_ITEM, item, 2530 "multiple tunnel layers not" 2531 " supported"); 2532 if (!mask) 2533 mask = &rte_flow_item_mpls_mask; 2534 ret = mlx5_flow_item_acceptable 2535 (item, (const uint8_t *)mask, 2536 (const uint8_t *)&rte_flow_item_mpls_mask, 2537 sizeof(struct rte_flow_item_mpls), 2538 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2539 if (ret < 0) 2540 return ret; 2541 return 0; 2542 #else 2543 return rte_flow_error_set(error, ENOTSUP, 2544 RTE_FLOW_ERROR_TYPE_ITEM, item, 2545 "MPLS is not supported by Verbs, please" 2546 " update."); 2547 #endif 2548 } 2549 2550 /** 2551 * Validate NVGRE item. 2552 * 2553 * @param[in] item 2554 * Item specification. 2555 * @param[in] item_flags 2556 * Bit flags to mark detected items. 2557 * @param[in] target_protocol 2558 * The next protocol in the previous item. 2559 * @param[out] error 2560 * Pointer to error structure. 2561 * 2562 * @return 2563 * 0 on success, a negative errno value otherwise and rte_errno is set. 2564 */ 2565 int 2566 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item, 2567 uint64_t item_flags, 2568 uint8_t target_protocol, 2569 struct rte_flow_error *error) 2570 { 2571 const struct rte_flow_item_nvgre *mask = item->mask; 2572 int ret; 2573 2574 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2575 return rte_flow_error_set(error, EINVAL, 2576 RTE_FLOW_ERROR_TYPE_ITEM, item, 2577 "protocol filtering not compatible" 2578 " with this GRE layer"); 2579 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2580 return rte_flow_error_set(error, ENOTSUP, 2581 RTE_FLOW_ERROR_TYPE_ITEM, item, 2582 "multiple tunnel layers not" 2583 " supported"); 2584 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2585 return rte_flow_error_set(error, ENOTSUP, 2586 RTE_FLOW_ERROR_TYPE_ITEM, item, 2587 "L3 Layer is missing"); 2588 if (!mask) 2589 mask = &rte_flow_item_nvgre_mask; 2590 ret = mlx5_flow_item_acceptable 2591 (item, (const uint8_t *)mask, 2592 (const uint8_t *)&rte_flow_item_nvgre_mask, 2593 sizeof(struct rte_flow_item_nvgre), 2594 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2595 if (ret < 0) 2596 return ret; 2597 return 0; 2598 } 2599 2600 /** 2601 * Validate eCPRI item. 2602 * 2603 * @param[in] item 2604 * Item specification. 2605 * @param[in] item_flags 2606 * Bit-fields that holds the items detected until now. 2607 * @param[in] last_item 2608 * Previous validated item in the pattern items. 2609 * @param[in] ether_type 2610 * Type in the ethernet layer header (including dot1q). 2611 * @param[in] acc_mask 2612 * Acceptable mask, if NULL default internal default mask 2613 * will be used to check whether item fields are supported. 2614 * @param[out] error 2615 * Pointer to error structure. 2616 * 2617 * @return 2618 * 0 on success, a negative errno value otherwise and rte_errno is set. 2619 */ 2620 int 2621 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item, 2622 uint64_t item_flags, 2623 uint64_t last_item, 2624 uint16_t ether_type, 2625 const struct rte_flow_item_ecpri *acc_mask, 2626 struct rte_flow_error *error) 2627 { 2628 const struct rte_flow_item_ecpri *mask = item->mask; 2629 const struct rte_flow_item_ecpri nic_mask = { 2630 .hdr = { 2631 .common = { 2632 .u32 = 2633 RTE_BE32(((const struct rte_ecpri_common_hdr) { 2634 .type = 0xFF, 2635 }).u32), 2636 }, 2637 .dummy[0] = 0xFFFFFFFF, 2638 }, 2639 }; 2640 const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 | 2641 MLX5_FLOW_LAYER_OUTER_VLAN); 2642 struct rte_flow_item_ecpri mask_lo; 2643 2644 if ((last_item & outer_l2_vlan) && ether_type && 2645 ether_type != RTE_ETHER_TYPE_ECPRI) 2646 return rte_flow_error_set(error, EINVAL, 2647 RTE_FLOW_ERROR_TYPE_ITEM, item, 2648 "eCPRI cannot follow L2/VLAN layer " 2649 "which ether type is not 0xAEFE."); 2650 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2651 return rte_flow_error_set(error, EINVAL, 2652 RTE_FLOW_ERROR_TYPE_ITEM, item, 2653 "eCPRI with tunnel is not supported " 2654 "right now."); 2655 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3) 2656 return rte_flow_error_set(error, ENOTSUP, 2657 RTE_FLOW_ERROR_TYPE_ITEM, item, 2658 "multiple L3 layers not supported"); 2659 else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP) 2660 return rte_flow_error_set(error, EINVAL, 2661 RTE_FLOW_ERROR_TYPE_ITEM, item, 2662 "eCPRI cannot follow a TCP layer."); 2663 /* In specification, eCPRI could be over UDP layer. */ 2664 else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP) 2665 return rte_flow_error_set(error, EINVAL, 2666 RTE_FLOW_ERROR_TYPE_ITEM, item, 2667 "eCPRI over UDP layer is not yet " 2668 "supported right now."); 2669 /* Mask for type field in common header could be zero. */ 2670 if (!mask) 2671 mask = &rte_flow_item_ecpri_mask; 2672 mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32); 2673 /* Input mask is in big-endian format. */ 2674 if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff) 2675 return rte_flow_error_set(error, EINVAL, 2676 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 2677 "partial mask is not supported " 2678 "for protocol"); 2679 else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0) 2680 return rte_flow_error_set(error, EINVAL, 2681 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 2682 "message header mask must be after " 2683 "a type mask"); 2684 return mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 2685 acc_mask ? (const uint8_t *)acc_mask 2686 : (const uint8_t *)&nic_mask, 2687 sizeof(struct rte_flow_item_ecpri), 2688 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2689 } 2690 2691 /* Allocate unique ID for the split Q/RSS subflows. */ 2692 static uint32_t 2693 flow_qrss_get_id(struct rte_eth_dev *dev) 2694 { 2695 struct mlx5_priv *priv = dev->data->dev_private; 2696 uint32_t qrss_id, ret; 2697 2698 ret = mlx5_flow_id_get(priv->qrss_id_pool, &qrss_id); 2699 if (ret) 2700 return 0; 2701 MLX5_ASSERT(qrss_id); 2702 return qrss_id; 2703 } 2704 2705 /* Free unique ID for the split Q/RSS subflows. */ 2706 static void 2707 flow_qrss_free_id(struct rte_eth_dev *dev, uint32_t qrss_id) 2708 { 2709 struct mlx5_priv *priv = dev->data->dev_private; 2710 2711 if (qrss_id) 2712 mlx5_flow_id_release(priv->qrss_id_pool, qrss_id); 2713 } 2714 2715 /** 2716 * Release resource related QUEUE/RSS action split. 2717 * 2718 * @param dev 2719 * Pointer to Ethernet device. 2720 * @param flow 2721 * Flow to release id's from. 2722 */ 2723 static void 2724 flow_mreg_split_qrss_release(struct rte_eth_dev *dev, 2725 struct rte_flow *flow) 2726 { 2727 struct mlx5_priv *priv = dev->data->dev_private; 2728 uint32_t handle_idx; 2729 struct mlx5_flow_handle *dev_handle; 2730 2731 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 2732 handle_idx, dev_handle, next) 2733 if (dev_handle->split_flow_id) 2734 flow_qrss_free_id(dev, dev_handle->split_flow_id); 2735 } 2736 2737 static int 2738 flow_null_validate(struct rte_eth_dev *dev __rte_unused, 2739 const struct rte_flow_attr *attr __rte_unused, 2740 const struct rte_flow_item items[] __rte_unused, 2741 const struct rte_flow_action actions[] __rte_unused, 2742 bool external __rte_unused, 2743 int hairpin __rte_unused, 2744 struct rte_flow_error *error) 2745 { 2746 return rte_flow_error_set(error, ENOTSUP, 2747 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2748 } 2749 2750 static struct mlx5_flow * 2751 flow_null_prepare(struct rte_eth_dev *dev __rte_unused, 2752 const struct rte_flow_attr *attr __rte_unused, 2753 const struct rte_flow_item items[] __rte_unused, 2754 const struct rte_flow_action actions[] __rte_unused, 2755 struct rte_flow_error *error) 2756 { 2757 rte_flow_error_set(error, ENOTSUP, 2758 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2759 return NULL; 2760 } 2761 2762 static int 2763 flow_null_translate(struct rte_eth_dev *dev __rte_unused, 2764 struct mlx5_flow *dev_flow __rte_unused, 2765 const struct rte_flow_attr *attr __rte_unused, 2766 const struct rte_flow_item items[] __rte_unused, 2767 const struct rte_flow_action actions[] __rte_unused, 2768 struct rte_flow_error *error) 2769 { 2770 return rte_flow_error_set(error, ENOTSUP, 2771 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2772 } 2773 2774 static int 2775 flow_null_apply(struct rte_eth_dev *dev __rte_unused, 2776 struct rte_flow *flow __rte_unused, 2777 struct rte_flow_error *error) 2778 { 2779 return rte_flow_error_set(error, ENOTSUP, 2780 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2781 } 2782 2783 static void 2784 flow_null_remove(struct rte_eth_dev *dev __rte_unused, 2785 struct rte_flow *flow __rte_unused) 2786 { 2787 } 2788 2789 static void 2790 flow_null_destroy(struct rte_eth_dev *dev __rte_unused, 2791 struct rte_flow *flow __rte_unused) 2792 { 2793 } 2794 2795 static int 2796 flow_null_query(struct rte_eth_dev *dev __rte_unused, 2797 struct rte_flow *flow __rte_unused, 2798 const struct rte_flow_action *actions __rte_unused, 2799 void *data __rte_unused, 2800 struct rte_flow_error *error) 2801 { 2802 return rte_flow_error_set(error, ENOTSUP, 2803 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2804 } 2805 2806 /* Void driver to protect from null pointer reference. */ 2807 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = { 2808 .validate = flow_null_validate, 2809 .prepare = flow_null_prepare, 2810 .translate = flow_null_translate, 2811 .apply = flow_null_apply, 2812 .remove = flow_null_remove, 2813 .destroy = flow_null_destroy, 2814 .query = flow_null_query, 2815 }; 2816 2817 /** 2818 * Select flow driver type according to flow attributes and device 2819 * configuration. 2820 * 2821 * @param[in] dev 2822 * Pointer to the dev structure. 2823 * @param[in] attr 2824 * Pointer to the flow attributes. 2825 * 2826 * @return 2827 * flow driver type, MLX5_FLOW_TYPE_MAX otherwise. 2828 */ 2829 static enum mlx5_flow_drv_type 2830 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr) 2831 { 2832 struct mlx5_priv *priv = dev->data->dev_private; 2833 /* The OS can determine first a specific flow type (DV, VERBS) */ 2834 enum mlx5_flow_drv_type type = mlx5_flow_os_get_type(); 2835 2836 if (type != MLX5_FLOW_TYPE_MAX) 2837 return type; 2838 /* If no OS specific type - continue with DV/VERBS selection */ 2839 if (attr->transfer && priv->config.dv_esw_en) 2840 type = MLX5_FLOW_TYPE_DV; 2841 if (!attr->transfer) 2842 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV : 2843 MLX5_FLOW_TYPE_VERBS; 2844 return type; 2845 } 2846 2847 #define flow_get_drv_ops(type) flow_drv_ops[type] 2848 2849 /** 2850 * Flow driver validation API. This abstracts calling driver specific functions. 2851 * The type of flow driver is determined according to flow attributes. 2852 * 2853 * @param[in] dev 2854 * Pointer to the dev structure. 2855 * @param[in] attr 2856 * Pointer to the flow attributes. 2857 * @param[in] items 2858 * Pointer to the list of items. 2859 * @param[in] actions 2860 * Pointer to the list of actions. 2861 * @param[in] external 2862 * This flow rule is created by request external to PMD. 2863 * @param[in] hairpin 2864 * Number of hairpin TX actions, 0 means classic flow. 2865 * @param[out] error 2866 * Pointer to the error structure. 2867 * 2868 * @return 2869 * 0 on success, a negative errno value otherwise and rte_errno is set. 2870 */ 2871 static inline int 2872 flow_drv_validate(struct rte_eth_dev *dev, 2873 const struct rte_flow_attr *attr, 2874 const struct rte_flow_item items[], 2875 const struct rte_flow_action actions[], 2876 bool external, int hairpin, struct rte_flow_error *error) 2877 { 2878 const struct mlx5_flow_driver_ops *fops; 2879 enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr); 2880 2881 fops = flow_get_drv_ops(type); 2882 return fops->validate(dev, attr, items, actions, external, 2883 hairpin, error); 2884 } 2885 2886 /** 2887 * Flow driver preparation API. This abstracts calling driver specific 2888 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 2889 * calculates the size of memory required for device flow, allocates the memory, 2890 * initializes the device flow and returns the pointer. 2891 * 2892 * @note 2893 * This function initializes device flow structure such as dv or verbs in 2894 * struct mlx5_flow. However, it is caller's responsibility to initialize the 2895 * rest. For example, adding returning device flow to flow->dev_flow list and 2896 * setting backward reference to the flow should be done out of this function. 2897 * layers field is not filled either. 2898 * 2899 * @param[in] dev 2900 * Pointer to the dev structure. 2901 * @param[in] attr 2902 * Pointer to the flow attributes. 2903 * @param[in] items 2904 * Pointer to the list of items. 2905 * @param[in] actions 2906 * Pointer to the list of actions. 2907 * @param[in] flow_idx 2908 * This memory pool index to the flow. 2909 * @param[out] error 2910 * Pointer to the error structure. 2911 * 2912 * @return 2913 * Pointer to device flow on success, otherwise NULL and rte_errno is set. 2914 */ 2915 static inline struct mlx5_flow * 2916 flow_drv_prepare(struct rte_eth_dev *dev, 2917 const struct rte_flow *flow, 2918 const struct rte_flow_attr *attr, 2919 const struct rte_flow_item items[], 2920 const struct rte_flow_action actions[], 2921 uint32_t flow_idx, 2922 struct rte_flow_error *error) 2923 { 2924 const struct mlx5_flow_driver_ops *fops; 2925 enum mlx5_flow_drv_type type = flow->drv_type; 2926 struct mlx5_flow *mlx5_flow = NULL; 2927 2928 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2929 fops = flow_get_drv_ops(type); 2930 mlx5_flow = fops->prepare(dev, attr, items, actions, error); 2931 if (mlx5_flow) 2932 mlx5_flow->flow_idx = flow_idx; 2933 return mlx5_flow; 2934 } 2935 2936 /** 2937 * Flow driver translation API. This abstracts calling driver specific 2938 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 2939 * translates a generic flow into a driver flow. flow_drv_prepare() must 2940 * precede. 2941 * 2942 * @note 2943 * dev_flow->layers could be filled as a result of parsing during translation 2944 * if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled 2945 * if necessary. As a flow can have multiple dev_flows by RSS flow expansion, 2946 * flow->actions could be overwritten even though all the expanded dev_flows 2947 * have the same actions. 2948 * 2949 * @param[in] dev 2950 * Pointer to the rte dev structure. 2951 * @param[in, out] dev_flow 2952 * Pointer to the mlx5 flow. 2953 * @param[in] attr 2954 * Pointer to the flow attributes. 2955 * @param[in] items 2956 * Pointer to the list of items. 2957 * @param[in] actions 2958 * Pointer to the list of actions. 2959 * @param[out] error 2960 * Pointer to the error structure. 2961 * 2962 * @return 2963 * 0 on success, a negative errno value otherwise and rte_errno is set. 2964 */ 2965 static inline int 2966 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, 2967 const struct rte_flow_attr *attr, 2968 const struct rte_flow_item items[], 2969 const struct rte_flow_action actions[], 2970 struct rte_flow_error *error) 2971 { 2972 const struct mlx5_flow_driver_ops *fops; 2973 enum mlx5_flow_drv_type type = dev_flow->flow->drv_type; 2974 2975 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2976 fops = flow_get_drv_ops(type); 2977 return fops->translate(dev, dev_flow, attr, items, actions, error); 2978 } 2979 2980 /** 2981 * Flow driver apply API. This abstracts calling driver specific functions. 2982 * Parent flow (rte_flow) should have driver type (drv_type). It applies 2983 * translated driver flows on to device. flow_drv_translate() must precede. 2984 * 2985 * @param[in] dev 2986 * Pointer to Ethernet device structure. 2987 * @param[in, out] flow 2988 * Pointer to flow structure. 2989 * @param[out] error 2990 * Pointer to error structure. 2991 * 2992 * @return 2993 * 0 on success, a negative errno value otherwise and rte_errno is set. 2994 */ 2995 static inline int 2996 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, 2997 struct rte_flow_error *error) 2998 { 2999 const struct mlx5_flow_driver_ops *fops; 3000 enum mlx5_flow_drv_type type = flow->drv_type; 3001 3002 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 3003 fops = flow_get_drv_ops(type); 3004 return fops->apply(dev, flow, error); 3005 } 3006 3007 /** 3008 * Flow driver remove API. This abstracts calling driver specific functions. 3009 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 3010 * on device. All the resources of the flow should be freed by calling 3011 * flow_drv_destroy(). 3012 * 3013 * @param[in] dev 3014 * Pointer to Ethernet device. 3015 * @param[in, out] flow 3016 * Pointer to flow structure. 3017 */ 3018 static inline void 3019 flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow) 3020 { 3021 const struct mlx5_flow_driver_ops *fops; 3022 enum mlx5_flow_drv_type type = flow->drv_type; 3023 3024 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 3025 fops = flow_get_drv_ops(type); 3026 fops->remove(dev, flow); 3027 } 3028 3029 /** 3030 * Flow driver destroy API. This abstracts calling driver specific functions. 3031 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 3032 * on device and releases resources of the flow. 3033 * 3034 * @param[in] dev 3035 * Pointer to Ethernet device. 3036 * @param[in, out] flow 3037 * Pointer to flow structure. 3038 */ 3039 static inline void 3040 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) 3041 { 3042 const struct mlx5_flow_driver_ops *fops; 3043 enum mlx5_flow_drv_type type = flow->drv_type; 3044 3045 flow_mreg_split_qrss_release(dev, flow); 3046 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 3047 fops = flow_get_drv_ops(type); 3048 fops->destroy(dev, flow); 3049 } 3050 3051 /** 3052 * Get RSS action from the action list. 3053 * 3054 * @param[in] actions 3055 * Pointer to the list of actions. 3056 * 3057 * @return 3058 * Pointer to the RSS action if exist, else return NULL. 3059 */ 3060 static const struct rte_flow_action_rss* 3061 flow_get_rss_action(const struct rte_flow_action actions[]) 3062 { 3063 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3064 switch (actions->type) { 3065 case RTE_FLOW_ACTION_TYPE_RSS: 3066 return (const struct rte_flow_action_rss *) 3067 actions->conf; 3068 default: 3069 break; 3070 } 3071 } 3072 return NULL; 3073 } 3074 3075 static unsigned int 3076 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level) 3077 { 3078 const struct rte_flow_item *item; 3079 unsigned int has_vlan = 0; 3080 3081 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 3082 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { 3083 has_vlan = 1; 3084 break; 3085 } 3086 } 3087 if (has_vlan) 3088 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN : 3089 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN; 3090 return rss_level < 2 ? MLX5_EXPANSION_ROOT : 3091 MLX5_EXPANSION_ROOT_OUTER; 3092 } 3093 3094 /** 3095 * Get layer flags from the prefix flow. 3096 * 3097 * Some flows may be split to several subflows, the prefix subflow gets the 3098 * match items and the suffix sub flow gets the actions. 3099 * Some actions need the user defined match item flags to get the detail for 3100 * the action. 3101 * This function helps the suffix flow to get the item layer flags from prefix 3102 * subflow. 3103 * 3104 * @param[in] dev_flow 3105 * Pointer the created preifx subflow. 3106 * 3107 * @return 3108 * The layers get from prefix subflow. 3109 */ 3110 static inline uint64_t 3111 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow) 3112 { 3113 uint64_t layers = 0; 3114 3115 /* 3116 * Layers bits could be localization, but usually the compiler will 3117 * help to do the optimization work for source code. 3118 * If no decap actions, use the layers directly. 3119 */ 3120 if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP)) 3121 return dev_flow->handle->layers; 3122 /* Convert L3 layers with decap action. */ 3123 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4) 3124 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4; 3125 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6) 3126 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6; 3127 /* Convert L4 layers with decap action. */ 3128 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP) 3129 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP; 3130 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP) 3131 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP; 3132 return layers; 3133 } 3134 3135 /** 3136 * Get metadata split action information. 3137 * 3138 * @param[in] actions 3139 * Pointer to the list of actions. 3140 * @param[out] qrss 3141 * Pointer to the return pointer. 3142 * @param[out] qrss_type 3143 * Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned 3144 * if no QUEUE/RSS is found. 3145 * @param[out] encap_idx 3146 * Pointer to the index of the encap action if exists, otherwise the last 3147 * action index. 3148 * 3149 * @return 3150 * Total number of actions. 3151 */ 3152 static int 3153 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[], 3154 const struct rte_flow_action **qrss, 3155 int *encap_idx) 3156 { 3157 const struct rte_flow_action_raw_encap *raw_encap; 3158 int actions_n = 0; 3159 int raw_decap_idx = -1; 3160 3161 *encap_idx = -1; 3162 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3163 switch (actions->type) { 3164 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3165 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3166 *encap_idx = actions_n; 3167 break; 3168 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3169 raw_decap_idx = actions_n; 3170 break; 3171 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3172 raw_encap = actions->conf; 3173 if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 3174 *encap_idx = raw_decap_idx != -1 ? 3175 raw_decap_idx : actions_n; 3176 break; 3177 case RTE_FLOW_ACTION_TYPE_QUEUE: 3178 case RTE_FLOW_ACTION_TYPE_RSS: 3179 *qrss = actions; 3180 break; 3181 default: 3182 break; 3183 } 3184 actions_n++; 3185 } 3186 if (*encap_idx == -1) 3187 *encap_idx = actions_n; 3188 /* Count RTE_FLOW_ACTION_TYPE_END. */ 3189 return actions_n + 1; 3190 } 3191 3192 /** 3193 * Check meter action from the action list. 3194 * 3195 * @param[in] actions 3196 * Pointer to the list of actions. 3197 * @param[out] mtr 3198 * Pointer to the meter exist flag. 3199 * 3200 * @return 3201 * Total number of actions. 3202 */ 3203 static int 3204 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr) 3205 { 3206 int actions_n = 0; 3207 3208 MLX5_ASSERT(mtr); 3209 *mtr = 0; 3210 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3211 switch (actions->type) { 3212 case RTE_FLOW_ACTION_TYPE_METER: 3213 *mtr = 1; 3214 break; 3215 default: 3216 break; 3217 } 3218 actions_n++; 3219 } 3220 /* Count RTE_FLOW_ACTION_TYPE_END. */ 3221 return actions_n + 1; 3222 } 3223 3224 /** 3225 * Check if the flow should be split due to hairpin. 3226 * The reason for the split is that in current HW we can't 3227 * support encap and push-vlan on Rx, so if a flow contains 3228 * these actions we move it to Tx. 3229 * 3230 * @param dev 3231 * Pointer to Ethernet device. 3232 * @param[in] attr 3233 * Flow rule attributes. 3234 * @param[in] actions 3235 * Associated actions (list terminated by the END action). 3236 * 3237 * @return 3238 * > 0 the number of actions and the flow should be split, 3239 * 0 when no split required. 3240 */ 3241 static int 3242 flow_check_hairpin_split(struct rte_eth_dev *dev, 3243 const struct rte_flow_attr *attr, 3244 const struct rte_flow_action actions[]) 3245 { 3246 int queue_action = 0; 3247 int action_n = 0; 3248 int split = 0; 3249 const struct rte_flow_action_queue *queue; 3250 const struct rte_flow_action_rss *rss; 3251 const struct rte_flow_action_raw_encap *raw_encap; 3252 3253 if (!attr->ingress) 3254 return 0; 3255 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3256 switch (actions->type) { 3257 case RTE_FLOW_ACTION_TYPE_QUEUE: 3258 queue = actions->conf; 3259 if (queue == NULL) 3260 return 0; 3261 if (mlx5_rxq_get_type(dev, queue->index) != 3262 MLX5_RXQ_TYPE_HAIRPIN) 3263 return 0; 3264 queue_action = 1; 3265 action_n++; 3266 break; 3267 case RTE_FLOW_ACTION_TYPE_RSS: 3268 rss = actions->conf; 3269 if (rss == NULL || rss->queue_num == 0) 3270 return 0; 3271 if (mlx5_rxq_get_type(dev, rss->queue[0]) != 3272 MLX5_RXQ_TYPE_HAIRPIN) 3273 return 0; 3274 queue_action = 1; 3275 action_n++; 3276 break; 3277 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3278 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3279 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 3280 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 3281 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: 3282 split++; 3283 action_n++; 3284 break; 3285 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3286 raw_encap = actions->conf; 3287 if (raw_encap->size > 3288 (sizeof(struct rte_flow_item_eth) + 3289 sizeof(struct rte_flow_item_ipv4))) 3290 split++; 3291 action_n++; 3292 break; 3293 default: 3294 action_n++; 3295 break; 3296 } 3297 } 3298 if (split && queue_action) 3299 return action_n; 3300 return 0; 3301 } 3302 3303 /* Declare flow create/destroy prototype in advance. */ 3304 static uint32_t 3305 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 3306 const struct rte_flow_attr *attr, 3307 const struct rte_flow_item items[], 3308 const struct rte_flow_action actions[], 3309 bool external, struct rte_flow_error *error); 3310 3311 static void 3312 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 3313 uint32_t flow_idx); 3314 3315 /** 3316 * Add a flow of copying flow metadata registers in RX_CP_TBL. 3317 * 3318 * As mark_id is unique, if there's already a registered flow for the mark_id, 3319 * return by increasing the reference counter of the resource. Otherwise, create 3320 * the resource (mcp_res) and flow. 3321 * 3322 * Flow looks like, 3323 * - If ingress port is ANY and reg_c[1] is mark_id, 3324 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3325 * 3326 * For default flow (zero mark_id), flow is like, 3327 * - If ingress port is ANY, 3328 * reg_b := reg_c[0] and jump to RX_ACT_TBL. 3329 * 3330 * @param dev 3331 * Pointer to Ethernet device. 3332 * @param mark_id 3333 * ID of MARK action, zero means default flow for META. 3334 * @param[out] error 3335 * Perform verbose error reporting if not NULL. 3336 * 3337 * @return 3338 * Associated resource on success, NULL otherwise and rte_errno is set. 3339 */ 3340 static struct mlx5_flow_mreg_copy_resource * 3341 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id, 3342 struct rte_flow_error *error) 3343 { 3344 struct mlx5_priv *priv = dev->data->dev_private; 3345 struct rte_flow_attr attr = { 3346 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 3347 .ingress = 1, 3348 }; 3349 struct mlx5_rte_flow_item_tag tag_spec = { 3350 .data = mark_id, 3351 }; 3352 struct rte_flow_item items[] = { 3353 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, }, 3354 }; 3355 struct rte_flow_action_mark ftag = { 3356 .id = mark_id, 3357 }; 3358 struct mlx5_flow_action_copy_mreg cp_mreg = { 3359 .dst = REG_B, 3360 .src = REG_NON, 3361 }; 3362 struct rte_flow_action_jump jump = { 3363 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 3364 }; 3365 struct rte_flow_action actions[] = { 3366 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, }, 3367 }; 3368 struct mlx5_flow_mreg_copy_resource *mcp_res; 3369 uint32_t idx = 0; 3370 int ret; 3371 3372 /* Fill the register fileds in the flow. */ 3373 ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error); 3374 if (ret < 0) 3375 return NULL; 3376 tag_spec.id = ret; 3377 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 3378 if (ret < 0) 3379 return NULL; 3380 cp_mreg.src = ret; 3381 /* Check if already registered. */ 3382 MLX5_ASSERT(priv->mreg_cp_tbl); 3383 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id); 3384 if (mcp_res) { 3385 /* For non-default rule. */ 3386 if (mark_id != MLX5_DEFAULT_COPY_ID) 3387 mcp_res->refcnt++; 3388 MLX5_ASSERT(mark_id != MLX5_DEFAULT_COPY_ID || 3389 mcp_res->refcnt == 1); 3390 return mcp_res; 3391 } 3392 /* Provide the full width of FLAG specific value. */ 3393 if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT)) 3394 tag_spec.data = MLX5_FLOW_MARK_DEFAULT; 3395 /* Build a new flow. */ 3396 if (mark_id != MLX5_DEFAULT_COPY_ID) { 3397 items[0] = (struct rte_flow_item){ 3398 .type = (enum rte_flow_item_type) 3399 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 3400 .spec = &tag_spec, 3401 }; 3402 items[1] = (struct rte_flow_item){ 3403 .type = RTE_FLOW_ITEM_TYPE_END, 3404 }; 3405 actions[0] = (struct rte_flow_action){ 3406 .type = (enum rte_flow_action_type) 3407 MLX5_RTE_FLOW_ACTION_TYPE_MARK, 3408 .conf = &ftag, 3409 }; 3410 actions[1] = (struct rte_flow_action){ 3411 .type = (enum rte_flow_action_type) 3412 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3413 .conf = &cp_mreg, 3414 }; 3415 actions[2] = (struct rte_flow_action){ 3416 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3417 .conf = &jump, 3418 }; 3419 actions[3] = (struct rte_flow_action){ 3420 .type = RTE_FLOW_ACTION_TYPE_END, 3421 }; 3422 } else { 3423 /* Default rule, wildcard match. */ 3424 attr.priority = MLX5_FLOW_PRIO_RSVD; 3425 items[0] = (struct rte_flow_item){ 3426 .type = RTE_FLOW_ITEM_TYPE_END, 3427 }; 3428 actions[0] = (struct rte_flow_action){ 3429 .type = (enum rte_flow_action_type) 3430 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3431 .conf = &cp_mreg, 3432 }; 3433 actions[1] = (struct rte_flow_action){ 3434 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3435 .conf = &jump, 3436 }; 3437 actions[2] = (struct rte_flow_action){ 3438 .type = RTE_FLOW_ACTION_TYPE_END, 3439 }; 3440 } 3441 /* Build a new entry. */ 3442 mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx); 3443 if (!mcp_res) { 3444 rte_errno = ENOMEM; 3445 return NULL; 3446 } 3447 mcp_res->idx = idx; 3448 /* 3449 * The copy Flows are not included in any list. There 3450 * ones are referenced from other Flows and can not 3451 * be applied, removed, deleted in ardbitrary order 3452 * by list traversing. 3453 */ 3454 mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items, 3455 actions, false, error); 3456 if (!mcp_res->rix_flow) 3457 goto error; 3458 mcp_res->refcnt++; 3459 mcp_res->hlist_ent.key = mark_id; 3460 ret = mlx5_hlist_insert(priv->mreg_cp_tbl, 3461 &mcp_res->hlist_ent); 3462 MLX5_ASSERT(!ret); 3463 if (ret) 3464 goto error; 3465 return mcp_res; 3466 error: 3467 if (mcp_res->rix_flow) 3468 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3469 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3470 return NULL; 3471 } 3472 3473 /** 3474 * Release flow in RX_CP_TBL. 3475 * 3476 * @param dev 3477 * Pointer to Ethernet device. 3478 * @flow 3479 * Parent flow for wich copying is provided. 3480 */ 3481 static void 3482 flow_mreg_del_copy_action(struct rte_eth_dev *dev, 3483 struct rte_flow *flow) 3484 { 3485 struct mlx5_flow_mreg_copy_resource *mcp_res; 3486 struct mlx5_priv *priv = dev->data->dev_private; 3487 3488 if (!flow->rix_mreg_copy) 3489 return; 3490 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3491 flow->rix_mreg_copy); 3492 if (!mcp_res || !priv->mreg_cp_tbl) 3493 return; 3494 if (flow->copy_applied) { 3495 MLX5_ASSERT(mcp_res->appcnt); 3496 flow->copy_applied = 0; 3497 --mcp_res->appcnt; 3498 if (!mcp_res->appcnt) { 3499 struct rte_flow *mcp_flow = mlx5_ipool_get 3500 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3501 mcp_res->rix_flow); 3502 3503 if (mcp_flow) 3504 flow_drv_remove(dev, mcp_flow); 3505 } 3506 } 3507 /* 3508 * We do not check availability of metadata registers here, 3509 * because copy resources are not allocated in this case. 3510 */ 3511 if (--mcp_res->refcnt) 3512 return; 3513 MLX5_ASSERT(mcp_res->rix_flow); 3514 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3515 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 3516 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3517 flow->rix_mreg_copy = 0; 3518 } 3519 3520 /** 3521 * Start flow in RX_CP_TBL. 3522 * 3523 * @param dev 3524 * Pointer to Ethernet device. 3525 * @flow 3526 * Parent flow for wich copying is provided. 3527 * 3528 * @return 3529 * 0 on success, a negative errno value otherwise and rte_errno is set. 3530 */ 3531 static int 3532 flow_mreg_start_copy_action(struct rte_eth_dev *dev, 3533 struct rte_flow *flow) 3534 { 3535 struct mlx5_flow_mreg_copy_resource *mcp_res; 3536 struct mlx5_priv *priv = dev->data->dev_private; 3537 int ret; 3538 3539 if (!flow->rix_mreg_copy || flow->copy_applied) 3540 return 0; 3541 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3542 flow->rix_mreg_copy); 3543 if (!mcp_res) 3544 return 0; 3545 if (!mcp_res->appcnt) { 3546 struct rte_flow *mcp_flow = mlx5_ipool_get 3547 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3548 mcp_res->rix_flow); 3549 3550 if (mcp_flow) { 3551 ret = flow_drv_apply(dev, mcp_flow, NULL); 3552 if (ret) 3553 return ret; 3554 } 3555 } 3556 ++mcp_res->appcnt; 3557 flow->copy_applied = 1; 3558 return 0; 3559 } 3560 3561 /** 3562 * Stop flow in RX_CP_TBL. 3563 * 3564 * @param dev 3565 * Pointer to Ethernet device. 3566 * @flow 3567 * Parent flow for wich copying is provided. 3568 */ 3569 static void 3570 flow_mreg_stop_copy_action(struct rte_eth_dev *dev, 3571 struct rte_flow *flow) 3572 { 3573 struct mlx5_flow_mreg_copy_resource *mcp_res; 3574 struct mlx5_priv *priv = dev->data->dev_private; 3575 3576 if (!flow->rix_mreg_copy || !flow->copy_applied) 3577 return; 3578 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3579 flow->rix_mreg_copy); 3580 if (!mcp_res) 3581 return; 3582 MLX5_ASSERT(mcp_res->appcnt); 3583 --mcp_res->appcnt; 3584 flow->copy_applied = 0; 3585 if (!mcp_res->appcnt) { 3586 struct rte_flow *mcp_flow = mlx5_ipool_get 3587 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3588 mcp_res->rix_flow); 3589 3590 if (mcp_flow) 3591 flow_drv_remove(dev, mcp_flow); 3592 } 3593 } 3594 3595 /** 3596 * Remove the default copy action from RX_CP_TBL. 3597 * 3598 * @param dev 3599 * Pointer to Ethernet device. 3600 */ 3601 static void 3602 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev) 3603 { 3604 struct mlx5_flow_mreg_copy_resource *mcp_res; 3605 struct mlx5_priv *priv = dev->data->dev_private; 3606 3607 /* Check if default flow is registered. */ 3608 if (!priv->mreg_cp_tbl) 3609 return; 3610 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, 3611 MLX5_DEFAULT_COPY_ID); 3612 if (!mcp_res) 3613 return; 3614 MLX5_ASSERT(mcp_res->rix_flow); 3615 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3616 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 3617 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3618 } 3619 3620 /** 3621 * Add the default copy action in in RX_CP_TBL. 3622 * 3623 * @param dev 3624 * Pointer to Ethernet device. 3625 * @param[out] error 3626 * Perform verbose error reporting if not NULL. 3627 * 3628 * @return 3629 * 0 for success, negative value otherwise and rte_errno is set. 3630 */ 3631 static int 3632 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev, 3633 struct rte_flow_error *error) 3634 { 3635 struct mlx5_priv *priv = dev->data->dev_private; 3636 struct mlx5_flow_mreg_copy_resource *mcp_res; 3637 3638 /* Check whether extensive metadata feature is engaged. */ 3639 if (!priv->config.dv_flow_en || 3640 priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3641 !mlx5_flow_ext_mreg_supported(dev) || 3642 !priv->sh->dv_regc0_mask) 3643 return 0; 3644 mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error); 3645 if (!mcp_res) 3646 return -rte_errno; 3647 return 0; 3648 } 3649 3650 /** 3651 * Add a flow of copying flow metadata registers in RX_CP_TBL. 3652 * 3653 * All the flow having Q/RSS action should be split by 3654 * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL 3655 * performs the following, 3656 * - CQE->flow_tag := reg_c[1] (MARK) 3657 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 3658 * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1] 3659 * but there should be a flow per each MARK ID set by MARK action. 3660 * 3661 * For the aforementioned reason, if there's a MARK action in flow's action 3662 * list, a corresponding flow should be added to the RX_CP_TBL in order to copy 3663 * the MARK ID to CQE's flow_tag like, 3664 * - If reg_c[1] is mark_id, 3665 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3666 * 3667 * For SET_META action which stores value in reg_c[0], as the destination is 3668 * also a flow metadata register (reg_b), adding a default flow is enough. Zero 3669 * MARK ID means the default flow. The default flow looks like, 3670 * - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3671 * 3672 * @param dev 3673 * Pointer to Ethernet device. 3674 * @param flow 3675 * Pointer to flow structure. 3676 * @param[in] actions 3677 * Pointer to the list of actions. 3678 * @param[out] error 3679 * Perform verbose error reporting if not NULL. 3680 * 3681 * @return 3682 * 0 on success, negative value otherwise and rte_errno is set. 3683 */ 3684 static int 3685 flow_mreg_update_copy_table(struct rte_eth_dev *dev, 3686 struct rte_flow *flow, 3687 const struct rte_flow_action *actions, 3688 struct rte_flow_error *error) 3689 { 3690 struct mlx5_priv *priv = dev->data->dev_private; 3691 struct mlx5_dev_config *config = &priv->config; 3692 struct mlx5_flow_mreg_copy_resource *mcp_res; 3693 const struct rte_flow_action_mark *mark; 3694 3695 /* Check whether extensive metadata feature is engaged. */ 3696 if (!config->dv_flow_en || 3697 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3698 !mlx5_flow_ext_mreg_supported(dev) || 3699 !priv->sh->dv_regc0_mask) 3700 return 0; 3701 /* Find MARK action. */ 3702 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3703 switch (actions->type) { 3704 case RTE_FLOW_ACTION_TYPE_FLAG: 3705 mcp_res = flow_mreg_add_copy_action 3706 (dev, MLX5_FLOW_MARK_DEFAULT, error); 3707 if (!mcp_res) 3708 return -rte_errno; 3709 flow->rix_mreg_copy = mcp_res->idx; 3710 if (dev->data->dev_started) { 3711 mcp_res->appcnt++; 3712 flow->copy_applied = 1; 3713 } 3714 return 0; 3715 case RTE_FLOW_ACTION_TYPE_MARK: 3716 mark = (const struct rte_flow_action_mark *) 3717 actions->conf; 3718 mcp_res = 3719 flow_mreg_add_copy_action(dev, mark->id, error); 3720 if (!mcp_res) 3721 return -rte_errno; 3722 flow->rix_mreg_copy = mcp_res->idx; 3723 if (dev->data->dev_started) { 3724 mcp_res->appcnt++; 3725 flow->copy_applied = 1; 3726 } 3727 return 0; 3728 default: 3729 break; 3730 } 3731 } 3732 return 0; 3733 } 3734 3735 #define MLX5_MAX_SPLIT_ACTIONS 24 3736 #define MLX5_MAX_SPLIT_ITEMS 24 3737 3738 /** 3739 * Split the hairpin flow. 3740 * Since HW can't support encap and push-vlan on Rx, we move these 3741 * actions to Tx. 3742 * If the count action is after the encap then we also 3743 * move the count action. in this case the count will also measure 3744 * the outer bytes. 3745 * 3746 * @param dev 3747 * Pointer to Ethernet device. 3748 * @param[in] actions 3749 * Associated actions (list terminated by the END action). 3750 * @param[out] actions_rx 3751 * Rx flow actions. 3752 * @param[out] actions_tx 3753 * Tx flow actions.. 3754 * @param[out] pattern_tx 3755 * The pattern items for the Tx flow. 3756 * @param[out] flow_id 3757 * The flow ID connected to this flow. 3758 * 3759 * @return 3760 * 0 on success. 3761 */ 3762 static int 3763 flow_hairpin_split(struct rte_eth_dev *dev, 3764 const struct rte_flow_action actions[], 3765 struct rte_flow_action actions_rx[], 3766 struct rte_flow_action actions_tx[], 3767 struct rte_flow_item pattern_tx[], 3768 uint32_t *flow_id) 3769 { 3770 struct mlx5_priv *priv = dev->data->dev_private; 3771 const struct rte_flow_action_raw_encap *raw_encap; 3772 const struct rte_flow_action_raw_decap *raw_decap; 3773 struct mlx5_rte_flow_action_set_tag *set_tag; 3774 struct rte_flow_action *tag_action; 3775 struct mlx5_rte_flow_item_tag *tag_item; 3776 struct rte_flow_item *item; 3777 char *addr; 3778 int encap = 0; 3779 3780 mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id); 3781 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3782 switch (actions->type) { 3783 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3784 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3785 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 3786 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 3787 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: 3788 rte_memcpy(actions_tx, actions, 3789 sizeof(struct rte_flow_action)); 3790 actions_tx++; 3791 break; 3792 case RTE_FLOW_ACTION_TYPE_COUNT: 3793 if (encap) { 3794 rte_memcpy(actions_tx, actions, 3795 sizeof(struct rte_flow_action)); 3796 actions_tx++; 3797 } else { 3798 rte_memcpy(actions_rx, actions, 3799 sizeof(struct rte_flow_action)); 3800 actions_rx++; 3801 } 3802 break; 3803 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3804 raw_encap = actions->conf; 3805 if (raw_encap->size > 3806 (sizeof(struct rte_flow_item_eth) + 3807 sizeof(struct rte_flow_item_ipv4))) { 3808 memcpy(actions_tx, actions, 3809 sizeof(struct rte_flow_action)); 3810 actions_tx++; 3811 encap = 1; 3812 } else { 3813 rte_memcpy(actions_rx, actions, 3814 sizeof(struct rte_flow_action)); 3815 actions_rx++; 3816 } 3817 break; 3818 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3819 raw_decap = actions->conf; 3820 if (raw_decap->size < 3821 (sizeof(struct rte_flow_item_eth) + 3822 sizeof(struct rte_flow_item_ipv4))) { 3823 memcpy(actions_tx, actions, 3824 sizeof(struct rte_flow_action)); 3825 actions_tx++; 3826 } else { 3827 rte_memcpy(actions_rx, actions, 3828 sizeof(struct rte_flow_action)); 3829 actions_rx++; 3830 } 3831 break; 3832 default: 3833 rte_memcpy(actions_rx, actions, 3834 sizeof(struct rte_flow_action)); 3835 actions_rx++; 3836 break; 3837 } 3838 } 3839 /* Add set meta action and end action for the Rx flow. */ 3840 tag_action = actions_rx; 3841 tag_action->type = (enum rte_flow_action_type) 3842 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 3843 actions_rx++; 3844 rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action)); 3845 actions_rx++; 3846 set_tag = (void *)actions_rx; 3847 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL); 3848 MLX5_ASSERT(set_tag->id > REG_NON); 3849 set_tag->data = *flow_id; 3850 tag_action->conf = set_tag; 3851 /* Create Tx item list. */ 3852 rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action)); 3853 addr = (void *)&pattern_tx[2]; 3854 item = pattern_tx; 3855 item->type = (enum rte_flow_item_type) 3856 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 3857 tag_item = (void *)addr; 3858 tag_item->data = *flow_id; 3859 tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL); 3860 MLX5_ASSERT(set_tag->id > REG_NON); 3861 item->spec = tag_item; 3862 addr += sizeof(struct mlx5_rte_flow_item_tag); 3863 tag_item = (void *)addr; 3864 tag_item->data = UINT32_MAX; 3865 tag_item->id = UINT16_MAX; 3866 item->mask = tag_item; 3867 item->last = NULL; 3868 item++; 3869 item->type = RTE_FLOW_ITEM_TYPE_END; 3870 return 0; 3871 } 3872 3873 /** 3874 * The last stage of splitting chain, just creates the subflow 3875 * without any modification. 3876 * 3877 * @param[in] dev 3878 * Pointer to Ethernet device. 3879 * @param[in] flow 3880 * Parent flow structure pointer. 3881 * @param[in, out] sub_flow 3882 * Pointer to return the created subflow, may be NULL. 3883 * @param[in] prefix_layers 3884 * Prefix subflow layers, may be 0. 3885 * @param[in] prefix_mark 3886 * Prefix subflow mark flag, may be 0. 3887 * @param[in] attr 3888 * Flow rule attributes. 3889 * @param[in] items 3890 * Pattern specification (list terminated by the END pattern item). 3891 * @param[in] actions 3892 * Associated actions (list terminated by the END action). 3893 * @param[in] external 3894 * This flow rule is created by request external to PMD. 3895 * @param[in] flow_idx 3896 * This memory pool index to the flow. 3897 * @param[out] error 3898 * Perform verbose error reporting if not NULL. 3899 * @return 3900 * 0 on success, negative value otherwise 3901 */ 3902 static int 3903 flow_create_split_inner(struct rte_eth_dev *dev, 3904 struct rte_flow *flow, 3905 struct mlx5_flow **sub_flow, 3906 uint64_t prefix_layers, 3907 uint32_t prefix_mark, 3908 const struct rte_flow_attr *attr, 3909 const struct rte_flow_item items[], 3910 const struct rte_flow_action actions[], 3911 bool external, uint32_t flow_idx, 3912 struct rte_flow_error *error) 3913 { 3914 struct mlx5_flow *dev_flow; 3915 3916 dev_flow = flow_drv_prepare(dev, flow, attr, items, actions, 3917 flow_idx, error); 3918 if (!dev_flow) 3919 return -rte_errno; 3920 dev_flow->flow = flow; 3921 dev_flow->external = external; 3922 /* Subflow object was created, we must include one in the list. */ 3923 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 3924 dev_flow->handle, next); 3925 /* 3926 * If dev_flow is as one of the suffix flow, some actions in suffix 3927 * flow may need some user defined item layer flags, and pass the 3928 * Metadate rxq mark flag to suffix flow as well. 3929 */ 3930 if (prefix_layers) 3931 dev_flow->handle->layers = prefix_layers; 3932 if (prefix_mark) 3933 dev_flow->handle->mark = 1; 3934 if (sub_flow) 3935 *sub_flow = dev_flow; 3936 return flow_drv_translate(dev, dev_flow, attr, items, actions, error); 3937 } 3938 3939 /** 3940 * Split the meter flow. 3941 * 3942 * As meter flow will split to three sub flow, other than meter 3943 * action, the other actions make sense to only meter accepts 3944 * the packet. If it need to be dropped, no other additional 3945 * actions should be take. 3946 * 3947 * One kind of special action which decapsulates the L3 tunnel 3948 * header will be in the prefix sub flow, as not to take the 3949 * L3 tunnel header into account. 3950 * 3951 * @param dev 3952 * Pointer to Ethernet device. 3953 * @param[in] items 3954 * Pattern specification (list terminated by the END pattern item). 3955 * @param[out] sfx_items 3956 * Suffix flow match items (list terminated by the END pattern item). 3957 * @param[in] actions 3958 * Associated actions (list terminated by the END action). 3959 * @param[out] actions_sfx 3960 * Suffix flow actions. 3961 * @param[out] actions_pre 3962 * Prefix flow actions. 3963 * @param[out] pattern_sfx 3964 * The pattern items for the suffix flow. 3965 * @param[out] tag_sfx 3966 * Pointer to suffix flow tag. 3967 * 3968 * @return 3969 * 0 on success. 3970 */ 3971 static int 3972 flow_meter_split_prep(struct rte_eth_dev *dev, 3973 const struct rte_flow_item items[], 3974 struct rte_flow_item sfx_items[], 3975 const struct rte_flow_action actions[], 3976 struct rte_flow_action actions_sfx[], 3977 struct rte_flow_action actions_pre[]) 3978 { 3979 struct rte_flow_action *tag_action = NULL; 3980 struct rte_flow_item *tag_item; 3981 struct mlx5_rte_flow_action_set_tag *set_tag; 3982 struct rte_flow_error error; 3983 const struct rte_flow_action_raw_encap *raw_encap; 3984 const struct rte_flow_action_raw_decap *raw_decap; 3985 struct mlx5_rte_flow_item_tag *tag_spec; 3986 struct mlx5_rte_flow_item_tag *tag_mask; 3987 uint32_t tag_id; 3988 bool copy_vlan = false; 3989 3990 /* Prepare the actions for prefix and suffix flow. */ 3991 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3992 struct rte_flow_action **action_cur = NULL; 3993 3994 switch (actions->type) { 3995 case RTE_FLOW_ACTION_TYPE_METER: 3996 /* Add the extra tag action first. */ 3997 tag_action = actions_pre; 3998 tag_action->type = (enum rte_flow_action_type) 3999 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 4000 actions_pre++; 4001 action_cur = &actions_pre; 4002 break; 4003 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: 4004 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP: 4005 action_cur = &actions_pre; 4006 break; 4007 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 4008 raw_encap = actions->conf; 4009 if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE) 4010 action_cur = &actions_pre; 4011 break; 4012 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 4013 raw_decap = actions->conf; 4014 if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 4015 action_cur = &actions_pre; 4016 break; 4017 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 4018 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 4019 copy_vlan = true; 4020 break; 4021 default: 4022 break; 4023 } 4024 if (!action_cur) 4025 action_cur = &actions_sfx; 4026 memcpy(*action_cur, actions, sizeof(struct rte_flow_action)); 4027 (*action_cur)++; 4028 } 4029 /* Add end action to the actions. */ 4030 actions_sfx->type = RTE_FLOW_ACTION_TYPE_END; 4031 actions_pre->type = RTE_FLOW_ACTION_TYPE_END; 4032 actions_pre++; 4033 /* Set the tag. */ 4034 set_tag = (void *)actions_pre; 4035 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); 4036 /* 4037 * Get the id from the qrss_pool to make qrss share the id with meter. 4038 */ 4039 tag_id = flow_qrss_get_id(dev); 4040 set_tag->data = tag_id << MLX5_MTR_COLOR_BITS; 4041 assert(tag_action); 4042 tag_action->conf = set_tag; 4043 /* Prepare the suffix subflow items. */ 4044 tag_item = sfx_items++; 4045 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { 4046 int item_type = items->type; 4047 4048 switch (item_type) { 4049 case RTE_FLOW_ITEM_TYPE_PORT_ID: 4050 memcpy(sfx_items, items, sizeof(*sfx_items)); 4051 sfx_items++; 4052 break; 4053 case RTE_FLOW_ITEM_TYPE_VLAN: 4054 if (copy_vlan) { 4055 memcpy(sfx_items, items, sizeof(*sfx_items)); 4056 /* 4057 * Convert to internal match item, it is used 4058 * for vlan push and set vid. 4059 */ 4060 sfx_items->type = (enum rte_flow_item_type) 4061 MLX5_RTE_FLOW_ITEM_TYPE_VLAN; 4062 sfx_items++; 4063 } 4064 break; 4065 default: 4066 break; 4067 } 4068 } 4069 sfx_items->type = RTE_FLOW_ITEM_TYPE_END; 4070 sfx_items++; 4071 tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items; 4072 tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS; 4073 tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); 4074 tag_mask = tag_spec + 1; 4075 tag_mask->data = 0xffffff00; 4076 tag_item->type = (enum rte_flow_item_type) 4077 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 4078 tag_item->spec = tag_spec; 4079 tag_item->last = NULL; 4080 tag_item->mask = tag_mask; 4081 return tag_id; 4082 } 4083 4084 /** 4085 * Split action list having QUEUE/RSS for metadata register copy. 4086 * 4087 * Once Q/RSS action is detected in user's action list, the flow action 4088 * should be split in order to copy metadata registers, which will happen in 4089 * RX_CP_TBL like, 4090 * - CQE->flow_tag := reg_c[1] (MARK) 4091 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 4092 * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL. 4093 * This is because the last action of each flow must be a terminal action 4094 * (QUEUE, RSS or DROP). 4095 * 4096 * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is 4097 * stored and kept in the mlx5_flow structure per each sub_flow. 4098 * 4099 * The Q/RSS action is replaced with, 4100 * - SET_TAG, setting the allocated flow ID to reg_c[2]. 4101 * And the following JUMP action is added at the end, 4102 * - JUMP, to RX_CP_TBL. 4103 * 4104 * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by 4105 * flow_create_split_metadata() routine. The flow will look like, 4106 * - If flow ID matches (reg_c[2]), perform Q/RSS. 4107 * 4108 * @param dev 4109 * Pointer to Ethernet device. 4110 * @param[out] split_actions 4111 * Pointer to store split actions to jump to CP_TBL. 4112 * @param[in] actions 4113 * Pointer to the list of original flow actions. 4114 * @param[in] qrss 4115 * Pointer to the Q/RSS action. 4116 * @param[in] actions_n 4117 * Number of original actions. 4118 * @param[out] error 4119 * Perform verbose error reporting if not NULL. 4120 * 4121 * @return 4122 * non-zero unique flow_id on success, otherwise 0 and 4123 * error/rte_error are set. 4124 */ 4125 static uint32_t 4126 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev, 4127 struct rte_flow_action *split_actions, 4128 const struct rte_flow_action *actions, 4129 const struct rte_flow_action *qrss, 4130 int actions_n, struct rte_flow_error *error) 4131 { 4132 struct mlx5_rte_flow_action_set_tag *set_tag; 4133 struct rte_flow_action_jump *jump; 4134 const int qrss_idx = qrss - actions; 4135 uint32_t flow_id = 0; 4136 int ret = 0; 4137 4138 /* 4139 * Given actions will be split 4140 * - Replace QUEUE/RSS action with SET_TAG to set flow ID. 4141 * - Add jump to mreg CP_TBL. 4142 * As a result, there will be one more action. 4143 */ 4144 ++actions_n; 4145 memcpy(split_actions, actions, sizeof(*split_actions) * actions_n); 4146 set_tag = (void *)(split_actions + actions_n); 4147 /* 4148 * If tag action is not set to void(it means we are not the meter 4149 * suffix flow), add the tag action. Since meter suffix flow already 4150 * has the tag added. 4151 */ 4152 if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) { 4153 /* 4154 * Allocate the new subflow ID. This one is unique within 4155 * device and not shared with representors. Otherwise, 4156 * we would have to resolve multi-thread access synch 4157 * issue. Each flow on the shared device is appended 4158 * with source vport identifier, so the resulting 4159 * flows will be unique in the shared (by master and 4160 * representors) domain even if they have coinciding 4161 * IDs. 4162 */ 4163 flow_id = flow_qrss_get_id(dev); 4164 if (!flow_id) 4165 return rte_flow_error_set(error, ENOMEM, 4166 RTE_FLOW_ERROR_TYPE_ACTION, 4167 NULL, "can't allocate id " 4168 "for split Q/RSS subflow"); 4169 /* Internal SET_TAG action to set flow ID. */ 4170 *set_tag = (struct mlx5_rte_flow_action_set_tag){ 4171 .data = flow_id, 4172 }; 4173 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error); 4174 if (ret < 0) 4175 return ret; 4176 set_tag->id = ret; 4177 /* Construct new actions array. */ 4178 /* Replace QUEUE/RSS action. */ 4179 split_actions[qrss_idx] = (struct rte_flow_action){ 4180 .type = (enum rte_flow_action_type) 4181 MLX5_RTE_FLOW_ACTION_TYPE_TAG, 4182 .conf = set_tag, 4183 }; 4184 } 4185 /* JUMP action to jump to mreg copy table (CP_TBL). */ 4186 jump = (void *)(set_tag + 1); 4187 *jump = (struct rte_flow_action_jump){ 4188 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 4189 }; 4190 split_actions[actions_n - 2] = (struct rte_flow_action){ 4191 .type = RTE_FLOW_ACTION_TYPE_JUMP, 4192 .conf = jump, 4193 }; 4194 split_actions[actions_n - 1] = (struct rte_flow_action){ 4195 .type = RTE_FLOW_ACTION_TYPE_END, 4196 }; 4197 return flow_id; 4198 } 4199 4200 /** 4201 * Extend the given action list for Tx metadata copy. 4202 * 4203 * Copy the given action list to the ext_actions and add flow metadata register 4204 * copy action in order to copy reg_a set by WQE to reg_c[0]. 4205 * 4206 * @param[out] ext_actions 4207 * Pointer to the extended action list. 4208 * @param[in] actions 4209 * Pointer to the list of actions. 4210 * @param[in] actions_n 4211 * Number of actions in the list. 4212 * @param[out] error 4213 * Perform verbose error reporting if not NULL. 4214 * @param[in] encap_idx 4215 * The encap action inndex. 4216 * 4217 * @return 4218 * 0 on success, negative value otherwise 4219 */ 4220 static int 4221 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev, 4222 struct rte_flow_action *ext_actions, 4223 const struct rte_flow_action *actions, 4224 int actions_n, struct rte_flow_error *error, 4225 int encap_idx) 4226 { 4227 struct mlx5_flow_action_copy_mreg *cp_mreg = 4228 (struct mlx5_flow_action_copy_mreg *) 4229 (ext_actions + actions_n + 1); 4230 int ret; 4231 4232 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 4233 if (ret < 0) 4234 return ret; 4235 cp_mreg->dst = ret; 4236 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error); 4237 if (ret < 0) 4238 return ret; 4239 cp_mreg->src = ret; 4240 if (encap_idx != 0) 4241 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx); 4242 if (encap_idx == actions_n - 1) { 4243 ext_actions[actions_n - 1] = (struct rte_flow_action){ 4244 .type = (enum rte_flow_action_type) 4245 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 4246 .conf = cp_mreg, 4247 }; 4248 ext_actions[actions_n] = (struct rte_flow_action){ 4249 .type = RTE_FLOW_ACTION_TYPE_END, 4250 }; 4251 } else { 4252 ext_actions[encap_idx] = (struct rte_flow_action){ 4253 .type = (enum rte_flow_action_type) 4254 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 4255 .conf = cp_mreg, 4256 }; 4257 memcpy(ext_actions + encap_idx + 1, actions + encap_idx, 4258 sizeof(*ext_actions) * (actions_n - encap_idx)); 4259 } 4260 return 0; 4261 } 4262 4263 /** 4264 * Check the match action from the action list. 4265 * 4266 * @param[in] actions 4267 * Pointer to the list of actions. 4268 * @param[in] attr 4269 * Flow rule attributes. 4270 * @param[in] action 4271 * The action to be check if exist. 4272 * @param[out] match_action_pos 4273 * Pointer to the position of the matched action if exists, otherwise is -1. 4274 * @param[out] qrss_action_pos 4275 * Pointer to the position of the Queue/RSS action if exists, otherwise is -1. 4276 * 4277 * @return 4278 * > 0 the total number of actions. 4279 * 0 if not found match action in action list. 4280 */ 4281 static int 4282 flow_check_match_action(const struct rte_flow_action actions[], 4283 const struct rte_flow_attr *attr, 4284 enum rte_flow_action_type action, 4285 int *match_action_pos, int *qrss_action_pos) 4286 { 4287 const struct rte_flow_action_sample *sample; 4288 int actions_n = 0; 4289 int jump_flag = 0; 4290 uint32_t ratio = 0; 4291 int sub_type = 0; 4292 int flag = 0; 4293 4294 *match_action_pos = -1; 4295 *qrss_action_pos = -1; 4296 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 4297 if (actions->type == action) { 4298 flag = 1; 4299 *match_action_pos = actions_n; 4300 } 4301 if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE || 4302 actions->type == RTE_FLOW_ACTION_TYPE_RSS) 4303 *qrss_action_pos = actions_n; 4304 if (actions->type == RTE_FLOW_ACTION_TYPE_JUMP) 4305 jump_flag = 1; 4306 if (actions->type == RTE_FLOW_ACTION_TYPE_SAMPLE) { 4307 sample = actions->conf; 4308 ratio = sample->ratio; 4309 sub_type = ((const struct rte_flow_action *) 4310 (sample->actions))->type; 4311 } 4312 actions_n++; 4313 } 4314 if (flag && action == RTE_FLOW_ACTION_TYPE_SAMPLE && attr->transfer) { 4315 if (ratio == 1) { 4316 /* JUMP Action not support for Mirroring; 4317 * Mirroring support multi-destination; 4318 */ 4319 if (!jump_flag && sub_type != RTE_FLOW_ACTION_TYPE_END) 4320 flag = 0; 4321 } 4322 } 4323 /* Count RTE_FLOW_ACTION_TYPE_END. */ 4324 return flag ? actions_n + 1 : 0; 4325 } 4326 4327 #define SAMPLE_SUFFIX_ITEM 2 4328 4329 /** 4330 * Split the sample flow. 4331 * 4332 * As sample flow will split to two sub flow, sample flow with 4333 * sample action, the other actions will move to new suffix flow. 4334 * 4335 * Also add unique tag id with tag action in the sample flow, 4336 * the same tag id will be as match in the suffix flow. 4337 * 4338 * @param dev 4339 * Pointer to Ethernet device. 4340 * @param[in] fdb_tx 4341 * FDB egress flow flag. 4342 * @param[out] sfx_items 4343 * Suffix flow match items (list terminated by the END pattern item). 4344 * @param[in] actions 4345 * Associated actions (list terminated by the END action). 4346 * @param[out] actions_sfx 4347 * Suffix flow actions. 4348 * @param[out] actions_pre 4349 * Prefix flow actions. 4350 * @param[in] actions_n 4351 * The total number of actions. 4352 * @param[in] sample_action_pos 4353 * The sample action position. 4354 * @param[in] qrss_action_pos 4355 * The Queue/RSS action position. 4356 * @param[out] error 4357 * Perform verbose error reporting if not NULL. 4358 * 4359 * @return 4360 * 0 on success, or unique flow_id, a negative errno value 4361 * otherwise and rte_errno is set. 4362 */ 4363 static int 4364 flow_sample_split_prep(struct rte_eth_dev *dev, 4365 uint32_t fdb_tx, 4366 struct rte_flow_item sfx_items[], 4367 const struct rte_flow_action actions[], 4368 struct rte_flow_action actions_sfx[], 4369 struct rte_flow_action actions_pre[], 4370 int actions_n, 4371 int sample_action_pos, 4372 int qrss_action_pos, 4373 struct rte_flow_error *error) 4374 { 4375 struct mlx5_rte_flow_action_set_tag *set_tag; 4376 struct mlx5_rte_flow_item_tag *tag_spec; 4377 struct mlx5_rte_flow_item_tag *tag_mask; 4378 uint32_t tag_id = 0; 4379 int index; 4380 int ret; 4381 4382 if (sample_action_pos < 0) 4383 return rte_flow_error_set(error, EINVAL, 4384 RTE_FLOW_ERROR_TYPE_ACTION, 4385 NULL, "invalid position of sample " 4386 "action in list"); 4387 if (!fdb_tx) { 4388 /* Prepare the prefix tag action. */ 4389 set_tag = (void *)(actions_pre + actions_n + 1); 4390 ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error); 4391 if (ret < 0) 4392 return ret; 4393 set_tag->id = ret; 4394 tag_id = flow_qrss_get_id(dev); 4395 set_tag->data = tag_id; 4396 /* Prepare the suffix subflow items. */ 4397 tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM); 4398 tag_spec->data = tag_id; 4399 tag_spec->id = set_tag->id; 4400 tag_mask = tag_spec + 1; 4401 tag_mask->data = UINT32_MAX; 4402 sfx_items[0] = (struct rte_flow_item){ 4403 .type = (enum rte_flow_item_type) 4404 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 4405 .spec = tag_spec, 4406 .last = NULL, 4407 .mask = tag_mask, 4408 }; 4409 sfx_items[1] = (struct rte_flow_item){ 4410 .type = (enum rte_flow_item_type) 4411 RTE_FLOW_ITEM_TYPE_END, 4412 }; 4413 } 4414 /* Prepare the actions for prefix and suffix flow. */ 4415 if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) { 4416 index = qrss_action_pos; 4417 /* Put the preceding the Queue/RSS action into prefix flow. */ 4418 if (index != 0) 4419 memcpy(actions_pre, actions, 4420 sizeof(struct rte_flow_action) * index); 4421 /* Put others preceding the sample action into prefix flow. */ 4422 if (sample_action_pos > index + 1) 4423 memcpy(actions_pre + index, actions + index + 1, 4424 sizeof(struct rte_flow_action) * 4425 (sample_action_pos - index - 1)); 4426 index = sample_action_pos - 1; 4427 /* Put Queue/RSS action into Suffix flow. */ 4428 memcpy(actions_sfx, actions + qrss_action_pos, 4429 sizeof(struct rte_flow_action)); 4430 actions_sfx++; 4431 } else { 4432 index = sample_action_pos; 4433 if (index != 0) 4434 memcpy(actions_pre, actions, 4435 sizeof(struct rte_flow_action) * index); 4436 } 4437 /* Add the extra tag action for NIC-RX and E-Switch ingress. */ 4438 if (!fdb_tx) { 4439 actions_pre[index++] = 4440 (struct rte_flow_action){ 4441 .type = (enum rte_flow_action_type) 4442 MLX5_RTE_FLOW_ACTION_TYPE_TAG, 4443 .conf = set_tag, 4444 }; 4445 } 4446 memcpy(actions_pre + index, actions + sample_action_pos, 4447 sizeof(struct rte_flow_action)); 4448 index += 1; 4449 actions_pre[index] = (struct rte_flow_action){ 4450 .type = (enum rte_flow_action_type) 4451 RTE_FLOW_ACTION_TYPE_END, 4452 }; 4453 /* Put the actions after sample into Suffix flow. */ 4454 memcpy(actions_sfx, actions + sample_action_pos + 1, 4455 sizeof(struct rte_flow_action) * 4456 (actions_n - sample_action_pos - 1)); 4457 return tag_id; 4458 } 4459 4460 /** 4461 * The splitting for metadata feature. 4462 * 4463 * - Q/RSS action on NIC Rx should be split in order to pass by 4464 * the mreg copy table (RX_CP_TBL) and then it jumps to the 4465 * action table (RX_ACT_TBL) which has the split Q/RSS action. 4466 * 4467 * - All the actions on NIC Tx should have a mreg copy action to 4468 * copy reg_a from WQE to reg_c[0]. 4469 * 4470 * @param dev 4471 * Pointer to Ethernet device. 4472 * @param[in] flow 4473 * Parent flow structure pointer. 4474 * @param[in] prefix_layers 4475 * Prefix flow layer flags. 4476 * @param[in] prefix_mark 4477 * Prefix subflow mark flag, may be 0. 4478 * @param[in] attr 4479 * Flow rule attributes. 4480 * @param[in] items 4481 * Pattern specification (list terminated by the END pattern item). 4482 * @param[in] actions 4483 * Associated actions (list terminated by the END action). 4484 * @param[in] external 4485 * This flow rule is created by request external to PMD. 4486 * @param[in] flow_idx 4487 * This memory pool index to the flow. 4488 * @param[out] error 4489 * Perform verbose error reporting if not NULL. 4490 * @return 4491 * 0 on success, negative value otherwise 4492 */ 4493 static int 4494 flow_create_split_metadata(struct rte_eth_dev *dev, 4495 struct rte_flow *flow, 4496 uint64_t prefix_layers, 4497 uint32_t prefix_mark, 4498 const struct rte_flow_attr *attr, 4499 const struct rte_flow_item items[], 4500 const struct rte_flow_action actions[], 4501 bool external, uint32_t flow_idx, 4502 struct rte_flow_error *error) 4503 { 4504 struct mlx5_priv *priv = dev->data->dev_private; 4505 struct mlx5_dev_config *config = &priv->config; 4506 const struct rte_flow_action *qrss = NULL; 4507 struct rte_flow_action *ext_actions = NULL; 4508 struct mlx5_flow *dev_flow = NULL; 4509 uint32_t qrss_id = 0; 4510 int mtr_sfx = 0; 4511 size_t act_size; 4512 int actions_n; 4513 int encap_idx; 4514 int ret; 4515 4516 /* Check whether extensive metadata feature is engaged. */ 4517 if (!config->dv_flow_en || 4518 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 4519 !mlx5_flow_ext_mreg_supported(dev)) 4520 return flow_create_split_inner(dev, flow, NULL, prefix_layers, 4521 prefix_mark, attr, items, 4522 actions, external, flow_idx, 4523 error); 4524 actions_n = flow_parse_metadata_split_actions_info(actions, &qrss, 4525 &encap_idx); 4526 if (qrss) { 4527 /* Exclude hairpin flows from splitting. */ 4528 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) { 4529 const struct rte_flow_action_queue *queue; 4530 4531 queue = qrss->conf; 4532 if (mlx5_rxq_get_type(dev, queue->index) == 4533 MLX5_RXQ_TYPE_HAIRPIN) 4534 qrss = NULL; 4535 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) { 4536 const struct rte_flow_action_rss *rss; 4537 4538 rss = qrss->conf; 4539 if (mlx5_rxq_get_type(dev, rss->queue[0]) == 4540 MLX5_RXQ_TYPE_HAIRPIN) 4541 qrss = NULL; 4542 } 4543 } 4544 if (qrss) { 4545 /* Check if it is in meter suffix table. */ 4546 mtr_sfx = attr->group == (attr->transfer ? 4547 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : 4548 MLX5_FLOW_TABLE_LEVEL_SUFFIX); 4549 /* 4550 * Q/RSS action on NIC Rx should be split in order to pass by 4551 * the mreg copy table (RX_CP_TBL) and then it jumps to the 4552 * action table (RX_ACT_TBL) which has the split Q/RSS action. 4553 */ 4554 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 4555 sizeof(struct rte_flow_action_set_tag) + 4556 sizeof(struct rte_flow_action_jump); 4557 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0, 4558 SOCKET_ID_ANY); 4559 if (!ext_actions) 4560 return rte_flow_error_set(error, ENOMEM, 4561 RTE_FLOW_ERROR_TYPE_ACTION, 4562 NULL, "no memory to split " 4563 "metadata flow"); 4564 /* 4565 * If we are the suffix flow of meter, tag already exist. 4566 * Set the tag action to void. 4567 */ 4568 if (mtr_sfx) 4569 ext_actions[qrss - actions].type = 4570 RTE_FLOW_ACTION_TYPE_VOID; 4571 else 4572 ext_actions[qrss - actions].type = 4573 (enum rte_flow_action_type) 4574 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 4575 /* 4576 * Create the new actions list with removed Q/RSS action 4577 * and appended set tag and jump to register copy table 4578 * (RX_CP_TBL). We should preallocate unique tag ID here 4579 * in advance, because it is needed for set tag action. 4580 */ 4581 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions, 4582 qrss, actions_n, error); 4583 if (!mtr_sfx && !qrss_id) { 4584 ret = -rte_errno; 4585 goto exit; 4586 } 4587 } else if (attr->egress && !attr->transfer) { 4588 /* 4589 * All the actions on NIC Tx should have a metadata register 4590 * copy action to copy reg_a from WQE to reg_c[meta] 4591 */ 4592 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 4593 sizeof(struct mlx5_flow_action_copy_mreg); 4594 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0, 4595 SOCKET_ID_ANY); 4596 if (!ext_actions) 4597 return rte_flow_error_set(error, ENOMEM, 4598 RTE_FLOW_ERROR_TYPE_ACTION, 4599 NULL, "no memory to split " 4600 "metadata flow"); 4601 /* Create the action list appended with copy register. */ 4602 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions, 4603 actions_n, error, encap_idx); 4604 if (ret < 0) 4605 goto exit; 4606 } 4607 /* Add the unmodified original or prefix subflow. */ 4608 ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, 4609 prefix_mark, attr, 4610 items, ext_actions ? ext_actions : 4611 actions, external, flow_idx, error); 4612 if (ret < 0) 4613 goto exit; 4614 MLX5_ASSERT(dev_flow); 4615 if (qrss) { 4616 const struct rte_flow_attr q_attr = { 4617 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 4618 .ingress = 1, 4619 }; 4620 /* Internal PMD action to set register. */ 4621 struct mlx5_rte_flow_item_tag q_tag_spec = { 4622 .data = qrss_id, 4623 .id = REG_NON, 4624 }; 4625 struct rte_flow_item q_items[] = { 4626 { 4627 .type = (enum rte_flow_item_type) 4628 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 4629 .spec = &q_tag_spec, 4630 .last = NULL, 4631 .mask = NULL, 4632 }, 4633 { 4634 .type = RTE_FLOW_ITEM_TYPE_END, 4635 }, 4636 }; 4637 struct rte_flow_action q_actions[] = { 4638 { 4639 .type = qrss->type, 4640 .conf = qrss->conf, 4641 }, 4642 { 4643 .type = RTE_FLOW_ACTION_TYPE_END, 4644 }, 4645 }; 4646 uint64_t layers = flow_get_prefix_layer_flags(dev_flow); 4647 4648 /* 4649 * Configure the tag item only if there is no meter subflow. 4650 * Since tag is already marked in the meter suffix subflow 4651 * we can just use the meter suffix items as is. 4652 */ 4653 if (qrss_id) { 4654 /* Not meter subflow. */ 4655 MLX5_ASSERT(!mtr_sfx); 4656 /* 4657 * Put unique id in prefix flow due to it is destroyed 4658 * after suffix flow and id will be freed after there 4659 * is no actual flows with this id and identifier 4660 * reallocation becomes possible (for example, for 4661 * other flows in other threads). 4662 */ 4663 dev_flow->handle->split_flow_id = qrss_id; 4664 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, 4665 error); 4666 if (ret < 0) 4667 goto exit; 4668 q_tag_spec.id = ret; 4669 } 4670 dev_flow = NULL; 4671 /* Add suffix subflow to execute Q/RSS. */ 4672 ret = flow_create_split_inner(dev, flow, &dev_flow, layers, 0, 4673 &q_attr, mtr_sfx ? items : 4674 q_items, q_actions, 4675 external, flow_idx, error); 4676 if (ret < 0) 4677 goto exit; 4678 /* qrss ID should be freed if failed. */ 4679 qrss_id = 0; 4680 MLX5_ASSERT(dev_flow); 4681 } 4682 4683 exit: 4684 /* 4685 * We do not destroy the partially created sub_flows in case of error. 4686 * These ones are included into parent flow list and will be destroyed 4687 * by flow_drv_destroy. 4688 */ 4689 flow_qrss_free_id(dev, qrss_id); 4690 mlx5_free(ext_actions); 4691 return ret; 4692 } 4693 4694 /** 4695 * The splitting for meter feature. 4696 * 4697 * - The meter flow will be split to two flows as prefix and 4698 * suffix flow. The packets make sense only it pass the prefix 4699 * meter action. 4700 * 4701 * - Reg_C_5 is used for the packet to match betweend prefix and 4702 * suffix flow. 4703 * 4704 * @param dev 4705 * Pointer to Ethernet device. 4706 * @param[in] flow 4707 * Parent flow structure pointer. 4708 * @param[in] prefix_layers 4709 * Prefix subflow layers, may be 0. 4710 * @param[in] prefix_mark 4711 * Prefix subflow mark flag, may be 0. 4712 * @param[in] attr 4713 * Flow rule attributes. 4714 * @param[in] items 4715 * Pattern specification (list terminated by the END pattern item). 4716 * @param[in] actions 4717 * Associated actions (list terminated by the END action). 4718 * @param[in] external 4719 * This flow rule is created by request external to PMD. 4720 * @param[in] flow_idx 4721 * This memory pool index to the flow. 4722 * @param[out] error 4723 * Perform verbose error reporting if not NULL. 4724 * @return 4725 * 0 on success, negative value otherwise 4726 */ 4727 static int 4728 flow_create_split_meter(struct rte_eth_dev *dev, 4729 struct rte_flow *flow, 4730 uint64_t prefix_layers, 4731 uint32_t prefix_mark, 4732 const struct rte_flow_attr *attr, 4733 const struct rte_flow_item items[], 4734 const struct rte_flow_action actions[], 4735 bool external, uint32_t flow_idx, 4736 struct rte_flow_error *error) 4737 { 4738 struct mlx5_priv *priv = dev->data->dev_private; 4739 struct rte_flow_action *sfx_actions = NULL; 4740 struct rte_flow_action *pre_actions = NULL; 4741 struct rte_flow_item *sfx_items = NULL; 4742 struct mlx5_flow *dev_flow = NULL; 4743 struct rte_flow_attr sfx_attr = *attr; 4744 uint32_t mtr = 0; 4745 uint32_t mtr_tag_id = 0; 4746 size_t act_size; 4747 size_t item_size; 4748 int actions_n = 0; 4749 int ret; 4750 4751 if (priv->mtr_en) 4752 actions_n = flow_check_meter_action(actions, &mtr); 4753 if (mtr) { 4754 /* The five prefix actions: meter, decap, encap, tag, end. */ 4755 act_size = sizeof(struct rte_flow_action) * (actions_n + 5) + 4756 sizeof(struct mlx5_rte_flow_action_set_tag); 4757 /* tag, vlan, port id, end. */ 4758 #define METER_SUFFIX_ITEM 4 4759 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM + 4760 sizeof(struct mlx5_rte_flow_item_tag) * 2; 4761 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size), 4762 0, SOCKET_ID_ANY); 4763 if (!sfx_actions) 4764 return rte_flow_error_set(error, ENOMEM, 4765 RTE_FLOW_ERROR_TYPE_ACTION, 4766 NULL, "no memory to split " 4767 "meter flow"); 4768 sfx_items = (struct rte_flow_item *)((char *)sfx_actions + 4769 act_size); 4770 pre_actions = sfx_actions + actions_n; 4771 mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items, 4772 actions, sfx_actions, 4773 pre_actions); 4774 if (!mtr_tag_id) { 4775 ret = -rte_errno; 4776 goto exit; 4777 } 4778 /* Add the prefix subflow. */ 4779 ret = flow_create_split_inner(dev, flow, &dev_flow, 4780 prefix_layers, 0, 4781 attr, items, 4782 pre_actions, external, 4783 flow_idx, error); 4784 if (ret) { 4785 ret = -rte_errno; 4786 goto exit; 4787 } 4788 dev_flow->handle->split_flow_id = mtr_tag_id; 4789 /* Setting the sfx group atrr. */ 4790 sfx_attr.group = sfx_attr.transfer ? 4791 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : 4792 MLX5_FLOW_TABLE_LEVEL_SUFFIX; 4793 } 4794 /* Add the prefix subflow. */ 4795 ret = flow_create_split_metadata(dev, flow, dev_flow ? 4796 flow_get_prefix_layer_flags(dev_flow) : 4797 prefix_layers, dev_flow ? 4798 dev_flow->handle->mark : prefix_mark, 4799 &sfx_attr, sfx_items ? 4800 sfx_items : items, 4801 sfx_actions ? sfx_actions : actions, 4802 external, flow_idx, error); 4803 exit: 4804 if (sfx_actions) 4805 mlx5_free(sfx_actions); 4806 return ret; 4807 } 4808 4809 /** 4810 * The splitting for sample feature. 4811 * 4812 * Once Sample action is detected in the action list, the flow actions should 4813 * be split into prefix sub flow and suffix sub flow. 4814 * 4815 * The original items remain in the prefix sub flow, all actions preceding the 4816 * sample action and the sample action itself will be copied to the prefix 4817 * sub flow, the actions following the sample action will be copied to the 4818 * suffix sub flow, Queue action always be located in the suffix sub flow. 4819 * 4820 * In order to make the packet from prefix sub flow matches with suffix sub 4821 * flow, an extra tag action be added into prefix sub flow, and the suffix sub 4822 * flow uses tag item with the unique flow id. 4823 * 4824 * @param dev 4825 * Pointer to Ethernet device. 4826 * @param[in] flow 4827 * Parent flow structure pointer. 4828 * @param[in] attr 4829 * Flow rule attributes. 4830 * @param[in] items 4831 * Pattern specification (list terminated by the END pattern item). 4832 * @param[in] actions 4833 * Associated actions (list terminated by the END action). 4834 * @param[in] external 4835 * This flow rule is created by request external to PMD. 4836 * @param[in] flow_idx 4837 * This memory pool index to the flow. 4838 * @param[out] error 4839 * Perform verbose error reporting if not NULL. 4840 * @return 4841 * 0 on success, negative value otherwise 4842 */ 4843 static int 4844 flow_create_split_sample(struct rte_eth_dev *dev, 4845 struct rte_flow *flow, 4846 const struct rte_flow_attr *attr, 4847 const struct rte_flow_item items[], 4848 const struct rte_flow_action actions[], 4849 bool external, uint32_t flow_idx, 4850 struct rte_flow_error *error) 4851 { 4852 struct mlx5_priv *priv = dev->data->dev_private; 4853 struct rte_flow_action *sfx_actions = NULL; 4854 struct rte_flow_action *pre_actions = NULL; 4855 struct rte_flow_item *sfx_items = NULL; 4856 struct mlx5_flow *dev_flow = NULL; 4857 struct rte_flow_attr sfx_attr = *attr; 4858 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 4859 struct mlx5_flow_dv_sample_resource *sample_res; 4860 struct mlx5_flow_tbl_data_entry *sfx_tbl_data; 4861 struct mlx5_flow_tbl_resource *sfx_tbl; 4862 union mlx5_flow_tbl_key sfx_table_key; 4863 #endif 4864 size_t act_size; 4865 size_t item_size; 4866 uint32_t fdb_tx = 0; 4867 int32_t tag_id = 0; 4868 int actions_n = 0; 4869 int sample_action_pos; 4870 int qrss_action_pos; 4871 int ret = 0; 4872 4873 if (priv->sampler_en) 4874 actions_n = flow_check_match_action(actions, attr, 4875 RTE_FLOW_ACTION_TYPE_SAMPLE, 4876 &sample_action_pos, &qrss_action_pos); 4877 if (actions_n) { 4878 /* The prefix actions must includes sample, tag, end. */ 4879 act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1) 4880 + sizeof(struct mlx5_rte_flow_action_set_tag); 4881 item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM + 4882 sizeof(struct mlx5_rte_flow_item_tag) * 2; 4883 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + 4884 item_size), 0, SOCKET_ID_ANY); 4885 if (!sfx_actions) 4886 return rte_flow_error_set(error, ENOMEM, 4887 RTE_FLOW_ERROR_TYPE_ACTION, 4888 NULL, "no memory to split " 4889 "sample flow"); 4890 /* The representor_id is -1 for uplink. */ 4891 fdb_tx = (attr->transfer && priv->representor_id != -1); 4892 if (!fdb_tx) 4893 sfx_items = (struct rte_flow_item *)((char *)sfx_actions 4894 + act_size); 4895 pre_actions = sfx_actions + actions_n; 4896 tag_id = flow_sample_split_prep(dev, fdb_tx, sfx_items, 4897 actions, sfx_actions, 4898 pre_actions, actions_n, 4899 sample_action_pos, 4900 qrss_action_pos, error); 4901 if (tag_id < 0 || (!fdb_tx && !tag_id)) { 4902 ret = -rte_errno; 4903 goto exit; 4904 } 4905 /* Add the prefix subflow. */ 4906 ret = flow_create_split_inner(dev, flow, &dev_flow, 0, 0, attr, 4907 items, pre_actions, external, 4908 flow_idx, error); 4909 if (ret) { 4910 ret = -rte_errno; 4911 goto exit; 4912 } 4913 dev_flow->handle->split_flow_id = tag_id; 4914 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 4915 /* Set the sfx group attr. */ 4916 sample_res = (struct mlx5_flow_dv_sample_resource *) 4917 dev_flow->dv.sample_res; 4918 sfx_tbl = (struct mlx5_flow_tbl_resource *) 4919 sample_res->normal_path_tbl; 4920 sfx_tbl_data = container_of(sfx_tbl, 4921 struct mlx5_flow_tbl_data_entry, tbl); 4922 sfx_table_key.v64 = sfx_tbl_data->entry.key; 4923 sfx_attr.group = sfx_attr.transfer ? 4924 (sfx_table_key.table_id - 1) : 4925 sfx_table_key.table_id; 4926 #endif 4927 } 4928 /* Add the suffix subflow. */ 4929 ret = flow_create_split_meter(dev, flow, dev_flow ? 4930 flow_get_prefix_layer_flags(dev_flow) : 0, 4931 dev_flow ? dev_flow->handle->mark : 0, 4932 &sfx_attr, sfx_items ? sfx_items : items, 4933 sfx_actions ? sfx_actions : actions, 4934 external, flow_idx, error); 4935 exit: 4936 if (sfx_actions) 4937 mlx5_free(sfx_actions); 4938 return ret; 4939 } 4940 4941 /** 4942 * Split the flow to subflow set. The splitters might be linked 4943 * in the chain, like this: 4944 * flow_create_split_outer() calls: 4945 * flow_create_split_meter() calls: 4946 * flow_create_split_metadata(meter_subflow_0) calls: 4947 * flow_create_split_inner(metadata_subflow_0) 4948 * flow_create_split_inner(metadata_subflow_1) 4949 * flow_create_split_inner(metadata_subflow_2) 4950 * flow_create_split_metadata(meter_subflow_1) calls: 4951 * flow_create_split_inner(metadata_subflow_0) 4952 * flow_create_split_inner(metadata_subflow_1) 4953 * flow_create_split_inner(metadata_subflow_2) 4954 * 4955 * This provide flexible way to add new levels of flow splitting. 4956 * The all of successfully created subflows are included to the 4957 * parent flow dev_flow list. 4958 * 4959 * @param dev 4960 * Pointer to Ethernet device. 4961 * @param[in] flow 4962 * Parent flow structure pointer. 4963 * @param[in] attr 4964 * Flow rule attributes. 4965 * @param[in] items 4966 * Pattern specification (list terminated by the END pattern item). 4967 * @param[in] actions 4968 * Associated actions (list terminated by the END action). 4969 * @param[in] external 4970 * This flow rule is created by request external to PMD. 4971 * @param[in] flow_idx 4972 * This memory pool index to the flow. 4973 * @param[out] error 4974 * Perform verbose error reporting if not NULL. 4975 * @return 4976 * 0 on success, negative value otherwise 4977 */ 4978 static int 4979 flow_create_split_outer(struct rte_eth_dev *dev, 4980 struct rte_flow *flow, 4981 const struct rte_flow_attr *attr, 4982 const struct rte_flow_item items[], 4983 const struct rte_flow_action actions[], 4984 bool external, uint32_t flow_idx, 4985 struct rte_flow_error *error) 4986 { 4987 int ret; 4988 4989 ret = flow_create_split_sample(dev, flow, attr, items, 4990 actions, external, flow_idx, error); 4991 MLX5_ASSERT(ret <= 0); 4992 return ret; 4993 } 4994 4995 /** 4996 * Create a flow and add it to @p list. 4997 * 4998 * @param dev 4999 * Pointer to Ethernet device. 5000 * @param list 5001 * Pointer to a TAILQ flow list. If this parameter NULL, 5002 * no list insertion occurred, flow is just created, 5003 * this is caller's responsibility to track the 5004 * created flow. 5005 * @param[in] attr 5006 * Flow rule attributes. 5007 * @param[in] items 5008 * Pattern specification (list terminated by the END pattern item). 5009 * @param[in] actions 5010 * Associated actions (list terminated by the END action). 5011 * @param[in] external 5012 * This flow rule is created by request external to PMD. 5013 * @param[out] error 5014 * Perform verbose error reporting if not NULL. 5015 * 5016 * @return 5017 * A flow index on success, 0 otherwise and rte_errno is set. 5018 */ 5019 static uint32_t 5020 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 5021 const struct rte_flow_attr *attr, 5022 const struct rte_flow_item items[], 5023 const struct rte_flow_action actions[], 5024 bool external, struct rte_flow_error *error) 5025 { 5026 struct mlx5_priv *priv = dev->data->dev_private; 5027 struct rte_flow *flow = NULL; 5028 struct mlx5_flow *dev_flow; 5029 const struct rte_flow_action_rss *rss; 5030 union { 5031 struct mlx5_flow_expand_rss buf; 5032 uint8_t buffer[2048]; 5033 } expand_buffer; 5034 union { 5035 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 5036 uint8_t buffer[2048]; 5037 } actions_rx; 5038 union { 5039 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 5040 uint8_t buffer[2048]; 5041 } actions_hairpin_tx; 5042 union { 5043 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS]; 5044 uint8_t buffer[2048]; 5045 } items_tx; 5046 struct mlx5_flow_expand_rss *buf = &expand_buffer.buf; 5047 struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *) 5048 priv->rss_desc)[!!priv->flow_idx]; 5049 const struct rte_flow_action *p_actions_rx = actions; 5050 uint32_t i; 5051 uint32_t idx = 0; 5052 int hairpin_flow; 5053 uint32_t hairpin_id = 0; 5054 struct rte_flow_attr attr_tx = { .priority = 0 }; 5055 struct rte_flow_attr attr_factor = {0}; 5056 int ret; 5057 5058 memcpy((void *)&attr_factor, (const void *)attr, sizeof(*attr)); 5059 if (external) 5060 attr_factor.group *= MLX5_FLOW_TABLE_FACTOR; 5061 hairpin_flow = flow_check_hairpin_split(dev, &attr_factor, actions); 5062 ret = flow_drv_validate(dev, &attr_factor, items, p_actions_rx, 5063 external, hairpin_flow, error); 5064 if (ret < 0) 5065 return 0; 5066 if (hairpin_flow > 0) { 5067 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) { 5068 rte_errno = EINVAL; 5069 return 0; 5070 } 5071 flow_hairpin_split(dev, actions, actions_rx.actions, 5072 actions_hairpin_tx.actions, items_tx.items, 5073 &hairpin_id); 5074 p_actions_rx = actions_rx.actions; 5075 } 5076 flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx); 5077 if (!flow) { 5078 rte_errno = ENOMEM; 5079 goto error_before_flow; 5080 } 5081 flow->drv_type = flow_get_drv_type(dev, &attr_factor); 5082 if (hairpin_id != 0) 5083 flow->hairpin_flow_id = hairpin_id; 5084 MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN && 5085 flow->drv_type < MLX5_FLOW_TYPE_MAX); 5086 memset(rss_desc, 0, sizeof(*rss_desc)); 5087 rss = flow_get_rss_action(p_actions_rx); 5088 if (rss) { 5089 /* 5090 * The following information is required by 5091 * mlx5_flow_hashfields_adjust() in advance. 5092 */ 5093 rss_desc->level = rss->level; 5094 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */ 5095 rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types; 5096 } 5097 flow->dev_handles = 0; 5098 if (rss && rss->types) { 5099 unsigned int graph_root; 5100 5101 graph_root = find_graph_root(items, rss->level); 5102 ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer), 5103 items, rss->types, 5104 mlx5_support_expansion, graph_root); 5105 MLX5_ASSERT(ret > 0 && 5106 (unsigned int)ret < sizeof(expand_buffer.buffer)); 5107 } else { 5108 buf->entries = 1; 5109 buf->entry[0].pattern = (void *)(uintptr_t)items; 5110 } 5111 /* 5112 * Record the start index when there is a nested call. All sub-flows 5113 * need to be translated before another calling. 5114 * No need to use ping-pong buffer to save memory here. 5115 */ 5116 if (priv->flow_idx) { 5117 MLX5_ASSERT(!priv->flow_nested_idx); 5118 priv->flow_nested_idx = priv->flow_idx; 5119 } 5120 for (i = 0; i < buf->entries; ++i) { 5121 /* 5122 * The splitter may create multiple dev_flows, 5123 * depending on configuration. In the simplest 5124 * case it just creates unmodified original flow. 5125 */ 5126 ret = flow_create_split_outer(dev, flow, &attr_factor, 5127 buf->entry[i].pattern, 5128 p_actions_rx, external, idx, 5129 error); 5130 if (ret < 0) 5131 goto error; 5132 } 5133 /* Create the tx flow. */ 5134 if (hairpin_flow) { 5135 attr_tx.group = MLX5_HAIRPIN_TX_TABLE; 5136 attr_tx.ingress = 0; 5137 attr_tx.egress = 1; 5138 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items, 5139 actions_hairpin_tx.actions, 5140 idx, error); 5141 if (!dev_flow) 5142 goto error; 5143 dev_flow->flow = flow; 5144 dev_flow->external = 0; 5145 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 5146 dev_flow->handle, next); 5147 ret = flow_drv_translate(dev, dev_flow, &attr_tx, 5148 items_tx.items, 5149 actions_hairpin_tx.actions, error); 5150 if (ret < 0) 5151 goto error; 5152 } 5153 /* 5154 * Update the metadata register copy table. If extensive 5155 * metadata feature is enabled and registers are supported 5156 * we might create the extra rte_flow for each unique 5157 * MARK/FLAG action ID. 5158 * 5159 * The table is updated for ingress Flows only, because 5160 * the egress Flows belong to the different device and 5161 * copy table should be updated in peer NIC Rx domain. 5162 */ 5163 if (attr_factor.ingress && 5164 (external || attr_factor.group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) { 5165 ret = flow_mreg_update_copy_table(dev, flow, actions, error); 5166 if (ret) 5167 goto error; 5168 } 5169 /* 5170 * If the flow is external (from application) OR device is started, then 5171 * the flow will be applied immediately. 5172 */ 5173 if (external || dev->data->dev_started) { 5174 ret = flow_drv_apply(dev, flow, error); 5175 if (ret < 0) 5176 goto error; 5177 } 5178 if (list) 5179 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx, 5180 flow, next); 5181 flow_rxq_flags_set(dev, flow); 5182 /* Nested flow creation index recovery. */ 5183 priv->flow_idx = priv->flow_nested_idx; 5184 if (priv->flow_nested_idx) 5185 priv->flow_nested_idx = 0; 5186 return idx; 5187 error: 5188 MLX5_ASSERT(flow); 5189 ret = rte_errno; /* Save rte_errno before cleanup. */ 5190 flow_mreg_del_copy_action(dev, flow); 5191 flow_drv_destroy(dev, flow); 5192 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx); 5193 rte_errno = ret; /* Restore rte_errno. */ 5194 error_before_flow: 5195 ret = rte_errno; 5196 if (hairpin_id) 5197 mlx5_flow_id_release(priv->sh->flow_id_pool, 5198 hairpin_id); 5199 rte_errno = ret; 5200 priv->flow_idx = priv->flow_nested_idx; 5201 if (priv->flow_nested_idx) 5202 priv->flow_nested_idx = 0; 5203 return 0; 5204 } 5205 5206 /** 5207 * Create a dedicated flow rule on e-switch table 0 (root table), to direct all 5208 * incoming packets to table 1. 5209 * 5210 * Other flow rules, requested for group n, will be created in 5211 * e-switch table n+1. 5212 * Jump action to e-switch group n will be created to group n+1. 5213 * 5214 * Used when working in switchdev mode, to utilise advantages of table 1 5215 * and above. 5216 * 5217 * @param dev 5218 * Pointer to Ethernet device. 5219 * 5220 * @return 5221 * Pointer to flow on success, NULL otherwise and rte_errno is set. 5222 */ 5223 struct rte_flow * 5224 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev) 5225 { 5226 const struct rte_flow_attr attr = { 5227 .group = 0, 5228 .priority = 0, 5229 .ingress = 1, 5230 .egress = 0, 5231 .transfer = 1, 5232 }; 5233 const struct rte_flow_item pattern = { 5234 .type = RTE_FLOW_ITEM_TYPE_END, 5235 }; 5236 struct rte_flow_action_jump jump = { 5237 .group = 1, 5238 }; 5239 const struct rte_flow_action actions[] = { 5240 { 5241 .type = RTE_FLOW_ACTION_TYPE_JUMP, 5242 .conf = &jump, 5243 }, 5244 { 5245 .type = RTE_FLOW_ACTION_TYPE_END, 5246 }, 5247 }; 5248 struct mlx5_priv *priv = dev->data->dev_private; 5249 struct rte_flow_error error; 5250 5251 return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows, 5252 &attr, &pattern, 5253 actions, false, &error); 5254 } 5255 5256 /** 5257 * Validate a flow supported by the NIC. 5258 * 5259 * @see rte_flow_validate() 5260 * @see rte_flow_ops 5261 */ 5262 int 5263 mlx5_flow_validate(struct rte_eth_dev *dev, 5264 const struct rte_flow_attr *attr, 5265 const struct rte_flow_item items[], 5266 const struct rte_flow_action actions[], 5267 struct rte_flow_error *error) 5268 { 5269 int hairpin_flow; 5270 5271 hairpin_flow = flow_check_hairpin_split(dev, attr, actions); 5272 return flow_drv_validate(dev, attr, items, actions, 5273 true, hairpin_flow, error); 5274 } 5275 5276 /** 5277 * Create a flow. 5278 * 5279 * @see rte_flow_create() 5280 * @see rte_flow_ops 5281 */ 5282 struct rte_flow * 5283 mlx5_flow_create(struct rte_eth_dev *dev, 5284 const struct rte_flow_attr *attr, 5285 const struct rte_flow_item items[], 5286 const struct rte_flow_action actions[], 5287 struct rte_flow_error *error) 5288 { 5289 struct mlx5_priv *priv = dev->data->dev_private; 5290 5291 /* 5292 * If the device is not started yet, it is not allowed to created a 5293 * flow from application. PMD default flows and traffic control flows 5294 * are not affected. 5295 */ 5296 if (unlikely(!dev->data->dev_started)) { 5297 DRV_LOG(DEBUG, "port %u is not started when " 5298 "inserting a flow", dev->data->port_id); 5299 rte_flow_error_set(error, ENODEV, 5300 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5301 NULL, 5302 "port not started"); 5303 return NULL; 5304 } 5305 return (void *)(uintptr_t)flow_list_create(dev, &priv->flows, 5306 attr, items, actions, true, error); 5307 } 5308 5309 /** 5310 * Destroy a flow in a list. 5311 * 5312 * @param dev 5313 * Pointer to Ethernet device. 5314 * @param list 5315 * Pointer to the Indexed flow list. If this parameter NULL, 5316 * there is no flow removal from the list. Be noted that as 5317 * flow is add to the indexed list, memory of the indexed 5318 * list points to maybe changed as flow destroyed. 5319 * @param[in] flow_idx 5320 * Index of flow to destroy. 5321 */ 5322 static void 5323 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 5324 uint32_t flow_idx) 5325 { 5326 struct mlx5_priv *priv = dev->data->dev_private; 5327 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5328 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 5329 [MLX5_IPOOL_RTE_FLOW], flow_idx); 5330 5331 if (!flow) 5332 return; 5333 /* 5334 * Update RX queue flags only if port is started, otherwise it is 5335 * already clean. 5336 */ 5337 if (dev->data->dev_started) 5338 flow_rxq_flags_trim(dev, flow); 5339 if (flow->hairpin_flow_id) 5340 mlx5_flow_id_release(priv->sh->flow_id_pool, 5341 flow->hairpin_flow_id); 5342 flow_drv_destroy(dev, flow); 5343 if (list) 5344 ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, 5345 flow_idx, flow, next); 5346 flow_mreg_del_copy_action(dev, flow); 5347 if (flow->fdir) { 5348 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 5349 if (priv_fdir_flow->rix_flow == flow_idx) 5350 break; 5351 } 5352 if (priv_fdir_flow) { 5353 LIST_REMOVE(priv_fdir_flow, next); 5354 mlx5_free(priv_fdir_flow->fdir); 5355 mlx5_free(priv_fdir_flow); 5356 } 5357 } 5358 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 5359 } 5360 5361 /** 5362 * Destroy all flows. 5363 * 5364 * @param dev 5365 * Pointer to Ethernet device. 5366 * @param list 5367 * Pointer to the Indexed flow list. 5368 * @param active 5369 * If flushing is called avtively. 5370 */ 5371 void 5372 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active) 5373 { 5374 uint32_t num_flushed = 0; 5375 5376 while (*list) { 5377 flow_list_destroy(dev, list, *list); 5378 num_flushed++; 5379 } 5380 if (active) { 5381 DRV_LOG(INFO, "port %u: %u flows flushed before stopping", 5382 dev->data->port_id, num_flushed); 5383 } 5384 } 5385 5386 /** 5387 * Remove all flows. 5388 * 5389 * @param dev 5390 * Pointer to Ethernet device. 5391 * @param list 5392 * Pointer to the Indexed flow list. 5393 */ 5394 void 5395 mlx5_flow_stop(struct rte_eth_dev *dev, uint32_t *list) 5396 { 5397 struct mlx5_priv *priv = dev->data->dev_private; 5398 struct rte_flow *flow = NULL; 5399 uint32_t idx; 5400 5401 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx, 5402 flow, next) { 5403 flow_drv_remove(dev, flow); 5404 flow_mreg_stop_copy_action(dev, flow); 5405 } 5406 flow_mreg_del_default_copy_action(dev); 5407 flow_rxq_flags_clear(dev); 5408 } 5409 5410 /** 5411 * Add all flows. 5412 * 5413 * @param dev 5414 * Pointer to Ethernet device. 5415 * @param list 5416 * Pointer to the Indexed flow list. 5417 * 5418 * @return 5419 * 0 on success, a negative errno value otherwise and rte_errno is set. 5420 */ 5421 int 5422 mlx5_flow_start(struct rte_eth_dev *dev, uint32_t *list) 5423 { 5424 struct mlx5_priv *priv = dev->data->dev_private; 5425 struct rte_flow *flow = NULL; 5426 struct rte_flow_error error; 5427 uint32_t idx; 5428 int ret = 0; 5429 5430 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 5431 ret = flow_mreg_add_default_copy_action(dev, &error); 5432 if (ret < 0) 5433 return -rte_errno; 5434 /* Apply Flows created by application. */ 5435 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx, 5436 flow, next) { 5437 ret = flow_mreg_start_copy_action(dev, flow); 5438 if (ret < 0) 5439 goto error; 5440 ret = flow_drv_apply(dev, flow, &error); 5441 if (ret < 0) 5442 goto error; 5443 flow_rxq_flags_set(dev, flow); 5444 } 5445 return 0; 5446 error: 5447 ret = rte_errno; /* Save rte_errno before cleanup. */ 5448 mlx5_flow_stop(dev, list); 5449 rte_errno = ret; /* Restore rte_errno. */ 5450 return -rte_errno; 5451 } 5452 5453 /** 5454 * Stop all default actions for flows. 5455 * 5456 * @param dev 5457 * Pointer to Ethernet device. 5458 */ 5459 void 5460 mlx5_flow_stop_default(struct rte_eth_dev *dev) 5461 { 5462 flow_mreg_del_default_copy_action(dev); 5463 flow_rxq_flags_clear(dev); 5464 } 5465 5466 /** 5467 * Start all default actions for flows. 5468 * 5469 * @param dev 5470 * Pointer to Ethernet device. 5471 * @return 5472 * 0 on success, a negative errno value otherwise and rte_errno is set. 5473 */ 5474 int 5475 mlx5_flow_start_default(struct rte_eth_dev *dev) 5476 { 5477 struct rte_flow_error error; 5478 5479 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 5480 return flow_mreg_add_default_copy_action(dev, &error); 5481 } 5482 5483 /** 5484 * Allocate intermediate resources for flow creation. 5485 * 5486 * @param dev 5487 * Pointer to Ethernet device. 5488 */ 5489 void 5490 mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev) 5491 { 5492 struct mlx5_priv *priv = dev->data->dev_private; 5493 5494 if (!priv->inter_flows) { 5495 priv->inter_flows = mlx5_malloc(MLX5_MEM_ZERO, 5496 MLX5_NUM_MAX_DEV_FLOWS * 5497 sizeof(struct mlx5_flow) + 5498 (sizeof(struct mlx5_flow_rss_desc) + 5499 sizeof(uint16_t) * UINT16_MAX) * 2, 0, 5500 SOCKET_ID_ANY); 5501 if (!priv->inter_flows) { 5502 DRV_LOG(ERR, "can't allocate intermediate memory."); 5503 return; 5504 } 5505 } 5506 priv->rss_desc = &((struct mlx5_flow *)priv->inter_flows) 5507 [MLX5_NUM_MAX_DEV_FLOWS]; 5508 /* Reset the index. */ 5509 priv->flow_idx = 0; 5510 priv->flow_nested_idx = 0; 5511 } 5512 5513 /** 5514 * Free intermediate resources for flows. 5515 * 5516 * @param dev 5517 * Pointer to Ethernet device. 5518 */ 5519 void 5520 mlx5_flow_free_intermediate(struct rte_eth_dev *dev) 5521 { 5522 struct mlx5_priv *priv = dev->data->dev_private; 5523 5524 mlx5_free(priv->inter_flows); 5525 priv->inter_flows = NULL; 5526 } 5527 5528 /** 5529 * Verify the flow list is empty 5530 * 5531 * @param dev 5532 * Pointer to Ethernet device. 5533 * 5534 * @return the number of flows not released. 5535 */ 5536 int 5537 mlx5_flow_verify(struct rte_eth_dev *dev) 5538 { 5539 struct mlx5_priv *priv = dev->data->dev_private; 5540 struct rte_flow *flow; 5541 uint32_t idx; 5542 int ret = 0; 5543 5544 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx, 5545 flow, next) { 5546 DRV_LOG(DEBUG, "port %u flow %p still referenced", 5547 dev->data->port_id, (void *)flow); 5548 ++ret; 5549 } 5550 return ret; 5551 } 5552 5553 /** 5554 * Enable default hairpin egress flow. 5555 * 5556 * @param dev 5557 * Pointer to Ethernet device. 5558 * @param queue 5559 * The queue index. 5560 * 5561 * @return 5562 * 0 on success, a negative errno value otherwise and rte_errno is set. 5563 */ 5564 int 5565 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev, 5566 uint32_t queue) 5567 { 5568 struct mlx5_priv *priv = dev->data->dev_private; 5569 const struct rte_flow_attr attr = { 5570 .egress = 1, 5571 .priority = 0, 5572 }; 5573 struct mlx5_rte_flow_item_tx_queue queue_spec = { 5574 .queue = queue, 5575 }; 5576 struct mlx5_rte_flow_item_tx_queue queue_mask = { 5577 .queue = UINT32_MAX, 5578 }; 5579 struct rte_flow_item items[] = { 5580 { 5581 .type = (enum rte_flow_item_type) 5582 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE, 5583 .spec = &queue_spec, 5584 .last = NULL, 5585 .mask = &queue_mask, 5586 }, 5587 { 5588 .type = RTE_FLOW_ITEM_TYPE_END, 5589 }, 5590 }; 5591 struct rte_flow_action_jump jump = { 5592 .group = MLX5_HAIRPIN_TX_TABLE, 5593 }; 5594 struct rte_flow_action actions[2]; 5595 uint32_t flow_idx; 5596 struct rte_flow_error error; 5597 5598 actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP; 5599 actions[0].conf = &jump; 5600 actions[1].type = RTE_FLOW_ACTION_TYPE_END; 5601 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5602 &attr, items, actions, false, &error); 5603 if (!flow_idx) { 5604 DRV_LOG(DEBUG, 5605 "Failed to create ctrl flow: rte_errno(%d)," 5606 " type(%d), message(%s)", 5607 rte_errno, error.type, 5608 error.message ? error.message : " (no stated reason)"); 5609 return -rte_errno; 5610 } 5611 return 0; 5612 } 5613 5614 /** 5615 * Enable a control flow configured from the control plane. 5616 * 5617 * @param dev 5618 * Pointer to Ethernet device. 5619 * @param eth_spec 5620 * An Ethernet flow spec to apply. 5621 * @param eth_mask 5622 * An Ethernet flow mask to apply. 5623 * @param vlan_spec 5624 * A VLAN flow spec to apply. 5625 * @param vlan_mask 5626 * A VLAN flow mask to apply. 5627 * 5628 * @return 5629 * 0 on success, a negative errno value otherwise and rte_errno is set. 5630 */ 5631 int 5632 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, 5633 struct rte_flow_item_eth *eth_spec, 5634 struct rte_flow_item_eth *eth_mask, 5635 struct rte_flow_item_vlan *vlan_spec, 5636 struct rte_flow_item_vlan *vlan_mask) 5637 { 5638 struct mlx5_priv *priv = dev->data->dev_private; 5639 const struct rte_flow_attr attr = { 5640 .ingress = 1, 5641 .priority = MLX5_FLOW_PRIO_RSVD, 5642 }; 5643 struct rte_flow_item items[] = { 5644 { 5645 .type = RTE_FLOW_ITEM_TYPE_ETH, 5646 .spec = eth_spec, 5647 .last = NULL, 5648 .mask = eth_mask, 5649 }, 5650 { 5651 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN : 5652 RTE_FLOW_ITEM_TYPE_END, 5653 .spec = vlan_spec, 5654 .last = NULL, 5655 .mask = vlan_mask, 5656 }, 5657 { 5658 .type = RTE_FLOW_ITEM_TYPE_END, 5659 }, 5660 }; 5661 uint16_t queue[priv->reta_idx_n]; 5662 struct rte_flow_action_rss action_rss = { 5663 .func = RTE_ETH_HASH_FUNCTION_DEFAULT, 5664 .level = 0, 5665 .types = priv->rss_conf.rss_hf, 5666 .key_len = priv->rss_conf.rss_key_len, 5667 .queue_num = priv->reta_idx_n, 5668 .key = priv->rss_conf.rss_key, 5669 .queue = queue, 5670 }; 5671 struct rte_flow_action actions[] = { 5672 { 5673 .type = RTE_FLOW_ACTION_TYPE_RSS, 5674 .conf = &action_rss, 5675 }, 5676 { 5677 .type = RTE_FLOW_ACTION_TYPE_END, 5678 }, 5679 }; 5680 uint32_t flow_idx; 5681 struct rte_flow_error error; 5682 unsigned int i; 5683 5684 if (!priv->reta_idx_n || !priv->rxqs_n) { 5685 return 0; 5686 } 5687 if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)) 5688 action_rss.types = 0; 5689 for (i = 0; i != priv->reta_idx_n; ++i) 5690 queue[i] = (*priv->reta_idx)[i]; 5691 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5692 &attr, items, actions, false, &error); 5693 if (!flow_idx) 5694 return -rte_errno; 5695 return 0; 5696 } 5697 5698 /** 5699 * Enable a flow control configured from the control plane. 5700 * 5701 * @param dev 5702 * Pointer to Ethernet device. 5703 * @param eth_spec 5704 * An Ethernet flow spec to apply. 5705 * @param eth_mask 5706 * An Ethernet flow mask to apply. 5707 * 5708 * @return 5709 * 0 on success, a negative errno value otherwise and rte_errno is set. 5710 */ 5711 int 5712 mlx5_ctrl_flow(struct rte_eth_dev *dev, 5713 struct rte_flow_item_eth *eth_spec, 5714 struct rte_flow_item_eth *eth_mask) 5715 { 5716 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL); 5717 } 5718 5719 /** 5720 * Create default miss flow rule matching lacp traffic 5721 * 5722 * @param dev 5723 * Pointer to Ethernet device. 5724 * @param eth_spec 5725 * An Ethernet flow spec to apply. 5726 * 5727 * @return 5728 * 0 on success, a negative errno value otherwise and rte_errno is set. 5729 */ 5730 int 5731 mlx5_flow_lacp_miss(struct rte_eth_dev *dev) 5732 { 5733 struct mlx5_priv *priv = dev->data->dev_private; 5734 /* 5735 * The LACP matching is done by only using ether type since using 5736 * a multicast dst mac causes kernel to give low priority to this flow. 5737 */ 5738 static const struct rte_flow_item_eth lacp_spec = { 5739 .type = RTE_BE16(0x8809), 5740 }; 5741 static const struct rte_flow_item_eth lacp_mask = { 5742 .type = 0xffff, 5743 }; 5744 const struct rte_flow_attr attr = { 5745 .ingress = 1, 5746 }; 5747 struct rte_flow_item items[] = { 5748 { 5749 .type = RTE_FLOW_ITEM_TYPE_ETH, 5750 .spec = &lacp_spec, 5751 .mask = &lacp_mask, 5752 }, 5753 { 5754 .type = RTE_FLOW_ITEM_TYPE_END, 5755 }, 5756 }; 5757 struct rte_flow_action actions[] = { 5758 { 5759 .type = (enum rte_flow_action_type) 5760 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS, 5761 }, 5762 { 5763 .type = RTE_FLOW_ACTION_TYPE_END, 5764 }, 5765 }; 5766 struct rte_flow_error error; 5767 uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5768 &attr, items, actions, false, &error); 5769 5770 if (!flow_idx) 5771 return -rte_errno; 5772 return 0; 5773 } 5774 5775 /** 5776 * Destroy a flow. 5777 * 5778 * @see rte_flow_destroy() 5779 * @see rte_flow_ops 5780 */ 5781 int 5782 mlx5_flow_destroy(struct rte_eth_dev *dev, 5783 struct rte_flow *flow, 5784 struct rte_flow_error *error __rte_unused) 5785 { 5786 struct mlx5_priv *priv = dev->data->dev_private; 5787 5788 flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow); 5789 return 0; 5790 } 5791 5792 /** 5793 * Destroy all flows. 5794 * 5795 * @see rte_flow_flush() 5796 * @see rte_flow_ops 5797 */ 5798 int 5799 mlx5_flow_flush(struct rte_eth_dev *dev, 5800 struct rte_flow_error *error __rte_unused) 5801 { 5802 struct mlx5_priv *priv = dev->data->dev_private; 5803 5804 mlx5_flow_list_flush(dev, &priv->flows, false); 5805 return 0; 5806 } 5807 5808 /** 5809 * Isolated mode. 5810 * 5811 * @see rte_flow_isolate() 5812 * @see rte_flow_ops 5813 */ 5814 int 5815 mlx5_flow_isolate(struct rte_eth_dev *dev, 5816 int enable, 5817 struct rte_flow_error *error) 5818 { 5819 struct mlx5_priv *priv = dev->data->dev_private; 5820 5821 if (dev->data->dev_started) { 5822 rte_flow_error_set(error, EBUSY, 5823 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5824 NULL, 5825 "port must be stopped first"); 5826 return -rte_errno; 5827 } 5828 priv->isolated = !!enable; 5829 if (enable) 5830 dev->dev_ops = &mlx5_os_dev_ops_isolate; 5831 else 5832 dev->dev_ops = &mlx5_os_dev_ops; 5833 5834 dev->rx_descriptor_status = mlx5_rx_descriptor_status; 5835 dev->tx_descriptor_status = mlx5_tx_descriptor_status; 5836 5837 return 0; 5838 } 5839 5840 /** 5841 * Query a flow. 5842 * 5843 * @see rte_flow_query() 5844 * @see rte_flow_ops 5845 */ 5846 static int 5847 flow_drv_query(struct rte_eth_dev *dev, 5848 uint32_t flow_idx, 5849 const struct rte_flow_action *actions, 5850 void *data, 5851 struct rte_flow_error *error) 5852 { 5853 struct mlx5_priv *priv = dev->data->dev_private; 5854 const struct mlx5_flow_driver_ops *fops; 5855 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 5856 [MLX5_IPOOL_RTE_FLOW], 5857 flow_idx); 5858 enum mlx5_flow_drv_type ftype; 5859 5860 if (!flow) { 5861 return rte_flow_error_set(error, ENOENT, 5862 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5863 NULL, 5864 "invalid flow handle"); 5865 } 5866 ftype = flow->drv_type; 5867 MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX); 5868 fops = flow_get_drv_ops(ftype); 5869 5870 return fops->query(dev, flow, actions, data, error); 5871 } 5872 5873 /** 5874 * Query a flow. 5875 * 5876 * @see rte_flow_query() 5877 * @see rte_flow_ops 5878 */ 5879 int 5880 mlx5_flow_query(struct rte_eth_dev *dev, 5881 struct rte_flow *flow, 5882 const struct rte_flow_action *actions, 5883 void *data, 5884 struct rte_flow_error *error) 5885 { 5886 int ret; 5887 5888 ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data, 5889 error); 5890 if (ret < 0) 5891 return ret; 5892 return 0; 5893 } 5894 5895 /** 5896 * Convert a flow director filter to a generic flow. 5897 * 5898 * @param dev 5899 * Pointer to Ethernet device. 5900 * @param fdir_filter 5901 * Flow director filter to add. 5902 * @param attributes 5903 * Generic flow parameters structure. 5904 * 5905 * @return 5906 * 0 on success, a negative errno value otherwise and rte_errno is set. 5907 */ 5908 static int 5909 flow_fdir_filter_convert(struct rte_eth_dev *dev, 5910 const struct rte_eth_fdir_filter *fdir_filter, 5911 struct mlx5_fdir *attributes) 5912 { 5913 struct mlx5_priv *priv = dev->data->dev_private; 5914 const struct rte_eth_fdir_input *input = &fdir_filter->input; 5915 const struct rte_eth_fdir_masks *mask = 5916 &dev->data->dev_conf.fdir_conf.mask; 5917 5918 /* Validate queue number. */ 5919 if (fdir_filter->action.rx_queue >= priv->rxqs_n) { 5920 DRV_LOG(ERR, "port %u invalid queue number %d", 5921 dev->data->port_id, fdir_filter->action.rx_queue); 5922 rte_errno = EINVAL; 5923 return -rte_errno; 5924 } 5925 attributes->attr.ingress = 1; 5926 attributes->items[0] = (struct rte_flow_item) { 5927 .type = RTE_FLOW_ITEM_TYPE_ETH, 5928 .spec = &attributes->l2, 5929 .mask = &attributes->l2_mask, 5930 }; 5931 switch (fdir_filter->action.behavior) { 5932 case RTE_ETH_FDIR_ACCEPT: 5933 attributes->actions[0] = (struct rte_flow_action){ 5934 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 5935 .conf = &attributes->queue, 5936 }; 5937 break; 5938 case RTE_ETH_FDIR_REJECT: 5939 attributes->actions[0] = (struct rte_flow_action){ 5940 .type = RTE_FLOW_ACTION_TYPE_DROP, 5941 }; 5942 break; 5943 default: 5944 DRV_LOG(ERR, "port %u invalid behavior %d", 5945 dev->data->port_id, 5946 fdir_filter->action.behavior); 5947 rte_errno = ENOTSUP; 5948 return -rte_errno; 5949 } 5950 attributes->queue.index = fdir_filter->action.rx_queue; 5951 /* Handle L3. */ 5952 switch (fdir_filter->input.flow_type) { 5953 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 5954 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 5955 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 5956 attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){ 5957 .src_addr = input->flow.ip4_flow.src_ip, 5958 .dst_addr = input->flow.ip4_flow.dst_ip, 5959 .time_to_live = input->flow.ip4_flow.ttl, 5960 .type_of_service = input->flow.ip4_flow.tos, 5961 }; 5962 attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){ 5963 .src_addr = mask->ipv4_mask.src_ip, 5964 .dst_addr = mask->ipv4_mask.dst_ip, 5965 .time_to_live = mask->ipv4_mask.ttl, 5966 .type_of_service = mask->ipv4_mask.tos, 5967 .next_proto_id = mask->ipv4_mask.proto, 5968 }; 5969 attributes->items[1] = (struct rte_flow_item){ 5970 .type = RTE_FLOW_ITEM_TYPE_IPV4, 5971 .spec = &attributes->l3, 5972 .mask = &attributes->l3_mask, 5973 }; 5974 break; 5975 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 5976 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 5977 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 5978 attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){ 5979 .hop_limits = input->flow.ipv6_flow.hop_limits, 5980 .proto = input->flow.ipv6_flow.proto, 5981 }; 5982 5983 memcpy(attributes->l3.ipv6.hdr.src_addr, 5984 input->flow.ipv6_flow.src_ip, 5985 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 5986 memcpy(attributes->l3.ipv6.hdr.dst_addr, 5987 input->flow.ipv6_flow.dst_ip, 5988 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 5989 memcpy(attributes->l3_mask.ipv6.hdr.src_addr, 5990 mask->ipv6_mask.src_ip, 5991 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 5992 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr, 5993 mask->ipv6_mask.dst_ip, 5994 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 5995 attributes->items[1] = (struct rte_flow_item){ 5996 .type = RTE_FLOW_ITEM_TYPE_IPV6, 5997 .spec = &attributes->l3, 5998 .mask = &attributes->l3_mask, 5999 }; 6000 break; 6001 default: 6002 DRV_LOG(ERR, "port %u invalid flow type%d", 6003 dev->data->port_id, fdir_filter->input.flow_type); 6004 rte_errno = ENOTSUP; 6005 return -rte_errno; 6006 } 6007 /* Handle L4. */ 6008 switch (fdir_filter->input.flow_type) { 6009 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 6010 attributes->l4.udp.hdr = (struct rte_udp_hdr){ 6011 .src_port = input->flow.udp4_flow.src_port, 6012 .dst_port = input->flow.udp4_flow.dst_port, 6013 }; 6014 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){ 6015 .src_port = mask->src_port_mask, 6016 .dst_port = mask->dst_port_mask, 6017 }; 6018 attributes->items[2] = (struct rte_flow_item){ 6019 .type = RTE_FLOW_ITEM_TYPE_UDP, 6020 .spec = &attributes->l4, 6021 .mask = &attributes->l4_mask, 6022 }; 6023 break; 6024 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 6025 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){ 6026 .src_port = input->flow.tcp4_flow.src_port, 6027 .dst_port = input->flow.tcp4_flow.dst_port, 6028 }; 6029 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){ 6030 .src_port = mask->src_port_mask, 6031 .dst_port = mask->dst_port_mask, 6032 }; 6033 attributes->items[2] = (struct rte_flow_item){ 6034 .type = RTE_FLOW_ITEM_TYPE_TCP, 6035 .spec = &attributes->l4, 6036 .mask = &attributes->l4_mask, 6037 }; 6038 break; 6039 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 6040 attributes->l4.udp.hdr = (struct rte_udp_hdr){ 6041 .src_port = input->flow.udp6_flow.src_port, 6042 .dst_port = input->flow.udp6_flow.dst_port, 6043 }; 6044 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){ 6045 .src_port = mask->src_port_mask, 6046 .dst_port = mask->dst_port_mask, 6047 }; 6048 attributes->items[2] = (struct rte_flow_item){ 6049 .type = RTE_FLOW_ITEM_TYPE_UDP, 6050 .spec = &attributes->l4, 6051 .mask = &attributes->l4_mask, 6052 }; 6053 break; 6054 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 6055 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){ 6056 .src_port = input->flow.tcp6_flow.src_port, 6057 .dst_port = input->flow.tcp6_flow.dst_port, 6058 }; 6059 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){ 6060 .src_port = mask->src_port_mask, 6061 .dst_port = mask->dst_port_mask, 6062 }; 6063 attributes->items[2] = (struct rte_flow_item){ 6064 .type = RTE_FLOW_ITEM_TYPE_TCP, 6065 .spec = &attributes->l4, 6066 .mask = &attributes->l4_mask, 6067 }; 6068 break; 6069 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 6070 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 6071 break; 6072 default: 6073 DRV_LOG(ERR, "port %u invalid flow type%d", 6074 dev->data->port_id, fdir_filter->input.flow_type); 6075 rte_errno = ENOTSUP; 6076 return -rte_errno; 6077 } 6078 return 0; 6079 } 6080 6081 #define FLOW_FDIR_CMP(f1, f2, fld) \ 6082 memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld)) 6083 6084 /** 6085 * Compare two FDIR flows. If items and actions are identical, the two flows are 6086 * regarded as same. 6087 * 6088 * @param dev 6089 * Pointer to Ethernet device. 6090 * @param f1 6091 * FDIR flow to compare. 6092 * @param f2 6093 * FDIR flow to compare. 6094 * 6095 * @return 6096 * Zero on match, 1 otherwise. 6097 */ 6098 static int 6099 flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2) 6100 { 6101 if (FLOW_FDIR_CMP(f1, f2, attr) || 6102 FLOW_FDIR_CMP(f1, f2, l2) || 6103 FLOW_FDIR_CMP(f1, f2, l2_mask) || 6104 FLOW_FDIR_CMP(f1, f2, l3) || 6105 FLOW_FDIR_CMP(f1, f2, l3_mask) || 6106 FLOW_FDIR_CMP(f1, f2, l4) || 6107 FLOW_FDIR_CMP(f1, f2, l4_mask) || 6108 FLOW_FDIR_CMP(f1, f2, actions[0].type)) 6109 return 1; 6110 if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE && 6111 FLOW_FDIR_CMP(f1, f2, queue)) 6112 return 1; 6113 return 0; 6114 } 6115 6116 /** 6117 * Search device flow list to find out a matched FDIR flow. 6118 * 6119 * @param dev 6120 * Pointer to Ethernet device. 6121 * @param fdir_flow 6122 * FDIR flow to lookup. 6123 * 6124 * @return 6125 * Index of flow if found, 0 otherwise. 6126 */ 6127 static uint32_t 6128 flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow) 6129 { 6130 struct mlx5_priv *priv = dev->data->dev_private; 6131 uint32_t flow_idx = 0; 6132 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6133 6134 MLX5_ASSERT(fdir_flow); 6135 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 6136 if (!flow_fdir_cmp(priv_fdir_flow->fdir, fdir_flow)) { 6137 DRV_LOG(DEBUG, "port %u found FDIR flow %u", 6138 dev->data->port_id, flow_idx); 6139 flow_idx = priv_fdir_flow->rix_flow; 6140 break; 6141 } 6142 } 6143 return flow_idx; 6144 } 6145 6146 /** 6147 * Add new flow director filter and store it in list. 6148 * 6149 * @param dev 6150 * Pointer to Ethernet device. 6151 * @param fdir_filter 6152 * Flow director filter to add. 6153 * 6154 * @return 6155 * 0 on success, a negative errno value otherwise and rte_errno is set. 6156 */ 6157 static int 6158 flow_fdir_filter_add(struct rte_eth_dev *dev, 6159 const struct rte_eth_fdir_filter *fdir_filter) 6160 { 6161 struct mlx5_priv *priv = dev->data->dev_private; 6162 struct mlx5_fdir *fdir_flow; 6163 struct rte_flow *flow; 6164 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6165 uint32_t flow_idx; 6166 int ret; 6167 6168 fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*fdir_flow), 0, 6169 SOCKET_ID_ANY); 6170 if (!fdir_flow) { 6171 rte_errno = ENOMEM; 6172 return -rte_errno; 6173 } 6174 ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow); 6175 if (ret) 6176 goto error; 6177 flow_idx = flow_fdir_filter_lookup(dev, fdir_flow); 6178 if (flow_idx) { 6179 rte_errno = EEXIST; 6180 goto error; 6181 } 6182 priv_fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, 6183 sizeof(struct mlx5_fdir_flow), 6184 0, SOCKET_ID_ANY); 6185 if (!priv_fdir_flow) { 6186 rte_errno = ENOMEM; 6187 goto error; 6188 } 6189 flow_idx = flow_list_create(dev, &priv->flows, &fdir_flow->attr, 6190 fdir_flow->items, fdir_flow->actions, true, 6191 NULL); 6192 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 6193 if (!flow) 6194 goto error; 6195 flow->fdir = 1; 6196 priv_fdir_flow->fdir = fdir_flow; 6197 priv_fdir_flow->rix_flow = flow_idx; 6198 LIST_INSERT_HEAD(&priv->fdir_flows, priv_fdir_flow, next); 6199 DRV_LOG(DEBUG, "port %u created FDIR flow %p", 6200 dev->data->port_id, (void *)flow); 6201 return 0; 6202 error: 6203 mlx5_free(priv_fdir_flow); 6204 mlx5_free(fdir_flow); 6205 return -rte_errno; 6206 } 6207 6208 /** 6209 * Delete specific filter. 6210 * 6211 * @param dev 6212 * Pointer to Ethernet device. 6213 * @param fdir_filter 6214 * Filter to be deleted. 6215 * 6216 * @return 6217 * 0 on success, a negative errno value otherwise and rte_errno is set. 6218 */ 6219 static int 6220 flow_fdir_filter_delete(struct rte_eth_dev *dev, 6221 const struct rte_eth_fdir_filter *fdir_filter) 6222 { 6223 struct mlx5_priv *priv = dev->data->dev_private; 6224 uint32_t flow_idx; 6225 struct mlx5_fdir fdir_flow = { 6226 .attr.group = 0, 6227 }; 6228 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6229 int ret; 6230 6231 ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow); 6232 if (ret) 6233 return -rte_errno; 6234 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 6235 /* Find the fdir in priv list */ 6236 if (!flow_fdir_cmp(priv_fdir_flow->fdir, &fdir_flow)) 6237 break; 6238 } 6239 if (!priv_fdir_flow) 6240 return 0; 6241 LIST_REMOVE(priv_fdir_flow, next); 6242 flow_idx = priv_fdir_flow->rix_flow; 6243 flow_list_destroy(dev, &priv->flows, flow_idx); 6244 mlx5_free(priv_fdir_flow->fdir); 6245 mlx5_free(priv_fdir_flow); 6246 DRV_LOG(DEBUG, "port %u deleted FDIR flow %u", 6247 dev->data->port_id, flow_idx); 6248 return 0; 6249 } 6250 6251 /** 6252 * Update queue for specific filter. 6253 * 6254 * @param dev 6255 * Pointer to Ethernet device. 6256 * @param fdir_filter 6257 * Filter to be updated. 6258 * 6259 * @return 6260 * 0 on success, a negative errno value otherwise and rte_errno is set. 6261 */ 6262 static int 6263 flow_fdir_filter_update(struct rte_eth_dev *dev, 6264 const struct rte_eth_fdir_filter *fdir_filter) 6265 { 6266 int ret; 6267 6268 ret = flow_fdir_filter_delete(dev, fdir_filter); 6269 if (ret) 6270 return ret; 6271 return flow_fdir_filter_add(dev, fdir_filter); 6272 } 6273 6274 /** 6275 * Flush all filters. 6276 * 6277 * @param dev 6278 * Pointer to Ethernet device. 6279 */ 6280 static void 6281 flow_fdir_filter_flush(struct rte_eth_dev *dev) 6282 { 6283 struct mlx5_priv *priv = dev->data->dev_private; 6284 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6285 6286 while (!LIST_EMPTY(&priv->fdir_flows)) { 6287 priv_fdir_flow = LIST_FIRST(&priv->fdir_flows); 6288 LIST_REMOVE(priv_fdir_flow, next); 6289 flow_list_destroy(dev, &priv->flows, priv_fdir_flow->rix_flow); 6290 mlx5_free(priv_fdir_flow->fdir); 6291 mlx5_free(priv_fdir_flow); 6292 } 6293 } 6294 6295 /** 6296 * Get flow director information. 6297 * 6298 * @param dev 6299 * Pointer to Ethernet device. 6300 * @param[out] fdir_info 6301 * Resulting flow director information. 6302 */ 6303 static void 6304 flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) 6305 { 6306 struct rte_eth_fdir_masks *mask = 6307 &dev->data->dev_conf.fdir_conf.mask; 6308 6309 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode; 6310 fdir_info->guarant_spc = 0; 6311 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask)); 6312 fdir_info->max_flexpayload = 0; 6313 fdir_info->flow_types_mask[0] = 0; 6314 fdir_info->flex_payload_unit = 0; 6315 fdir_info->max_flex_payload_segment_num = 0; 6316 fdir_info->flex_payload_limit = 0; 6317 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf)); 6318 } 6319 6320 /** 6321 * Deal with flow director operations. 6322 * 6323 * @param dev 6324 * Pointer to Ethernet device. 6325 * @param filter_op 6326 * Operation to perform. 6327 * @param arg 6328 * Pointer to operation-specific structure. 6329 * 6330 * @return 6331 * 0 on success, a negative errno value otherwise and rte_errno is set. 6332 */ 6333 static int 6334 flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, 6335 void *arg) 6336 { 6337 enum rte_fdir_mode fdir_mode = 6338 dev->data->dev_conf.fdir_conf.mode; 6339 6340 if (filter_op == RTE_ETH_FILTER_NOP) 6341 return 0; 6342 if (fdir_mode != RTE_FDIR_MODE_PERFECT && 6343 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) { 6344 DRV_LOG(ERR, "port %u flow director mode %d not supported", 6345 dev->data->port_id, fdir_mode); 6346 rte_errno = EINVAL; 6347 return -rte_errno; 6348 } 6349 switch (filter_op) { 6350 case RTE_ETH_FILTER_ADD: 6351 return flow_fdir_filter_add(dev, arg); 6352 case RTE_ETH_FILTER_UPDATE: 6353 return flow_fdir_filter_update(dev, arg); 6354 case RTE_ETH_FILTER_DELETE: 6355 return flow_fdir_filter_delete(dev, arg); 6356 case RTE_ETH_FILTER_FLUSH: 6357 flow_fdir_filter_flush(dev); 6358 break; 6359 case RTE_ETH_FILTER_INFO: 6360 flow_fdir_info_get(dev, arg); 6361 break; 6362 default: 6363 DRV_LOG(DEBUG, "port %u unknown operation %u", 6364 dev->data->port_id, filter_op); 6365 rte_errno = EINVAL; 6366 return -rte_errno; 6367 } 6368 return 0; 6369 } 6370 6371 /** 6372 * Manage filter operations. 6373 * 6374 * @param dev 6375 * Pointer to Ethernet device structure. 6376 * @param filter_type 6377 * Filter type. 6378 * @param filter_op 6379 * Operation to perform. 6380 * @param arg 6381 * Pointer to operation-specific structure. 6382 * 6383 * @return 6384 * 0 on success, a negative errno value otherwise and rte_errno is set. 6385 */ 6386 int 6387 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, 6388 enum rte_filter_type filter_type, 6389 enum rte_filter_op filter_op, 6390 void *arg) 6391 { 6392 switch (filter_type) { 6393 case RTE_ETH_FILTER_GENERIC: 6394 if (filter_op != RTE_ETH_FILTER_GET) { 6395 rte_errno = EINVAL; 6396 return -rte_errno; 6397 } 6398 *(const void **)arg = &mlx5_flow_ops; 6399 return 0; 6400 case RTE_ETH_FILTER_FDIR: 6401 return flow_fdir_ctrl_func(dev, filter_op, arg); 6402 default: 6403 DRV_LOG(ERR, "port %u filter type (%d) not supported", 6404 dev->data->port_id, filter_type); 6405 rte_errno = ENOTSUP; 6406 return -rte_errno; 6407 } 6408 return 0; 6409 } 6410 6411 /** 6412 * Create the needed meter and suffix tables. 6413 * 6414 * @param[in] dev 6415 * Pointer to Ethernet device. 6416 * @param[in] fm 6417 * Pointer to the flow meter. 6418 * 6419 * @return 6420 * Pointer to table set on success, NULL otherwise. 6421 */ 6422 struct mlx5_meter_domains_infos * 6423 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev, 6424 const struct mlx5_flow_meter *fm) 6425 { 6426 const struct mlx5_flow_driver_ops *fops; 6427 6428 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6429 return fops->create_mtr_tbls(dev, fm); 6430 } 6431 6432 /** 6433 * Destroy the meter table set. 6434 * 6435 * @param[in] dev 6436 * Pointer to Ethernet device. 6437 * @param[in] tbl 6438 * Pointer to the meter table set. 6439 * 6440 * @return 6441 * 0 on success. 6442 */ 6443 int 6444 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev, 6445 struct mlx5_meter_domains_infos *tbls) 6446 { 6447 const struct mlx5_flow_driver_ops *fops; 6448 6449 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6450 return fops->destroy_mtr_tbls(dev, tbls); 6451 } 6452 6453 /** 6454 * Create policer rules. 6455 * 6456 * @param[in] dev 6457 * Pointer to Ethernet device. 6458 * @param[in] fm 6459 * Pointer to flow meter structure. 6460 * @param[in] attr 6461 * Pointer to flow attributes. 6462 * 6463 * @return 6464 * 0 on success, -1 otherwise. 6465 */ 6466 int 6467 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev, 6468 struct mlx5_flow_meter *fm, 6469 const struct rte_flow_attr *attr) 6470 { 6471 const struct mlx5_flow_driver_ops *fops; 6472 6473 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6474 return fops->create_policer_rules(dev, fm, attr); 6475 } 6476 6477 /** 6478 * Destroy policer rules. 6479 * 6480 * @param[in] fm 6481 * Pointer to flow meter structure. 6482 * @param[in] attr 6483 * Pointer to flow attributes. 6484 * 6485 * @return 6486 * 0 on success, -1 otherwise. 6487 */ 6488 int 6489 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev, 6490 struct mlx5_flow_meter *fm, 6491 const struct rte_flow_attr *attr) 6492 { 6493 const struct mlx5_flow_driver_ops *fops; 6494 6495 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6496 return fops->destroy_policer_rules(dev, fm, attr); 6497 } 6498 6499 /** 6500 * Allocate a counter. 6501 * 6502 * @param[in] dev 6503 * Pointer to Ethernet device structure. 6504 * 6505 * @return 6506 * Index to allocated counter on success, 0 otherwise. 6507 */ 6508 uint32_t 6509 mlx5_counter_alloc(struct rte_eth_dev *dev) 6510 { 6511 const struct mlx5_flow_driver_ops *fops; 6512 struct rte_flow_attr attr = { .transfer = 0 }; 6513 6514 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6515 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6516 return fops->counter_alloc(dev); 6517 } 6518 DRV_LOG(ERR, 6519 "port %u counter allocate is not supported.", 6520 dev->data->port_id); 6521 return 0; 6522 } 6523 6524 /** 6525 * Free a counter. 6526 * 6527 * @param[in] dev 6528 * Pointer to Ethernet device structure. 6529 * @param[in] cnt 6530 * Index to counter to be free. 6531 */ 6532 void 6533 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt) 6534 { 6535 const struct mlx5_flow_driver_ops *fops; 6536 struct rte_flow_attr attr = { .transfer = 0 }; 6537 6538 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6539 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6540 fops->counter_free(dev, cnt); 6541 return; 6542 } 6543 DRV_LOG(ERR, 6544 "port %u counter free is not supported.", 6545 dev->data->port_id); 6546 } 6547 6548 /** 6549 * Query counter statistics. 6550 * 6551 * @param[in] dev 6552 * Pointer to Ethernet device structure. 6553 * @param[in] cnt 6554 * Index to counter to query. 6555 * @param[in] clear 6556 * Set to clear counter statistics. 6557 * @param[out] pkts 6558 * The counter hits packets number to save. 6559 * @param[out] bytes 6560 * The counter hits bytes number to save. 6561 * 6562 * @return 6563 * 0 on success, a negative errno value otherwise. 6564 */ 6565 int 6566 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt, 6567 bool clear, uint64_t *pkts, uint64_t *bytes) 6568 { 6569 const struct mlx5_flow_driver_ops *fops; 6570 struct rte_flow_attr attr = { .transfer = 0 }; 6571 6572 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6573 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6574 return fops->counter_query(dev, cnt, clear, pkts, bytes); 6575 } 6576 DRV_LOG(ERR, 6577 "port %u counter query is not supported.", 6578 dev->data->port_id); 6579 return -ENOTSUP; 6580 } 6581 6582 #define MLX5_POOL_QUERY_FREQ_US 1000000 6583 6584 /** 6585 * Get number of all validate pools. 6586 * 6587 * @param[in] sh 6588 * Pointer to mlx5_dev_ctx_shared object. 6589 * 6590 * @return 6591 * The number of all validate pools. 6592 */ 6593 static uint32_t 6594 mlx5_get_all_valid_pool_count(struct mlx5_dev_ctx_shared *sh) 6595 { 6596 int i; 6597 uint32_t pools_n = 0; 6598 6599 for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) 6600 pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid); 6601 return pools_n; 6602 } 6603 6604 /** 6605 * Set the periodic procedure for triggering asynchronous batch queries for all 6606 * the counter pools. 6607 * 6608 * @param[in] sh 6609 * Pointer to mlx5_dev_ctx_shared object. 6610 */ 6611 void 6612 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh) 6613 { 6614 uint32_t pools_n, us; 6615 6616 pools_n = mlx5_get_all_valid_pool_count(sh); 6617 us = MLX5_POOL_QUERY_FREQ_US / pools_n; 6618 DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us); 6619 if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) { 6620 sh->cmng.query_thread_on = 0; 6621 DRV_LOG(ERR, "Cannot reinitialize query alarm"); 6622 } else { 6623 sh->cmng.query_thread_on = 1; 6624 } 6625 } 6626 6627 /** 6628 * The periodic procedure for triggering asynchronous batch queries for all the 6629 * counter pools. This function is probably called by the host thread. 6630 * 6631 * @param[in] arg 6632 * The parameter for the alarm process. 6633 */ 6634 void 6635 mlx5_flow_query_alarm(void *arg) 6636 { 6637 struct mlx5_dev_ctx_shared *sh = arg; 6638 struct mlx5_devx_obj *dcs; 6639 uint16_t offset; 6640 int ret; 6641 uint8_t batch = sh->cmng.batch; 6642 uint8_t age = sh->cmng.age; 6643 uint16_t pool_index = sh->cmng.pool_index; 6644 struct mlx5_pools_container *cont; 6645 struct mlx5_flow_counter_pool *pool; 6646 int cont_loop = MLX5_CCONT_TYPE_MAX; 6647 6648 if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES) 6649 goto set_alarm; 6650 next_container: 6651 cont = MLX5_CNT_CONTAINER(sh, batch, age); 6652 rte_spinlock_lock(&cont->resize_sl); 6653 if (!cont->pools) { 6654 rte_spinlock_unlock(&cont->resize_sl); 6655 /* Check if all the containers are empty. */ 6656 if (unlikely(--cont_loop == 0)) 6657 goto set_alarm; 6658 batch ^= 0x1; 6659 pool_index = 0; 6660 if (batch == 0 && pool_index == 0) { 6661 age ^= 0x1; 6662 sh->cmng.batch = batch; 6663 sh->cmng.age = age; 6664 } 6665 goto next_container; 6666 } 6667 pool = cont->pools[pool_index]; 6668 rte_spinlock_unlock(&cont->resize_sl); 6669 if (pool->raw_hw) 6670 /* There is a pool query in progress. */ 6671 goto set_alarm; 6672 pool->raw_hw = 6673 LIST_FIRST(&sh->cmng.free_stat_raws); 6674 if (!pool->raw_hw) 6675 /* No free counter statistics raw memory. */ 6676 goto set_alarm; 6677 dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read 6678 (&pool->a64_dcs); 6679 if (dcs->id & (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1)) { 6680 /* Pool without valid counter. */ 6681 pool->raw_hw = NULL; 6682 goto next_pool; 6683 } 6684 offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL; 6685 /* 6686 * Identify the counters released between query trigger and query 6687 * handle more effiecntly. The counter released in this gap period 6688 * should wait for a new round of query as the new arrived packets 6689 * will not be taken into account. 6690 */ 6691 pool->query_gen++; 6692 ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL - 6693 offset, NULL, NULL, 6694 pool->raw_hw->mem_mng->dm->id, 6695 (void *)(uintptr_t) 6696 (pool->raw_hw->data + offset), 6697 sh->devx_comp, 6698 (uint64_t)(uintptr_t)pool); 6699 if (ret) { 6700 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID" 6701 " %d", pool->min_dcs->id); 6702 pool->raw_hw = NULL; 6703 goto set_alarm; 6704 } 6705 pool->raw_hw->min_dcs_id = dcs->id; 6706 LIST_REMOVE(pool->raw_hw, next); 6707 sh->cmng.pending_queries++; 6708 next_pool: 6709 pool_index++; 6710 if (pool_index >= rte_atomic16_read(&cont->n_valid)) { 6711 batch ^= 0x1; 6712 pool_index = 0; 6713 if (batch == 0 && pool_index == 0) 6714 age ^= 0x1; 6715 } 6716 set_alarm: 6717 sh->cmng.batch = batch; 6718 sh->cmng.pool_index = pool_index; 6719 sh->cmng.age = age; 6720 mlx5_set_query_alarm(sh); 6721 } 6722 6723 /** 6724 * Check and callback event for new aged flow in the counter pool 6725 * 6726 * @param[in] sh 6727 * Pointer to mlx5_dev_ctx_shared object. 6728 * @param[in] pool 6729 * Pointer to Current counter pool. 6730 */ 6731 static void 6732 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh, 6733 struct mlx5_flow_counter_pool *pool) 6734 { 6735 struct mlx5_priv *priv; 6736 struct mlx5_flow_counter *cnt; 6737 struct mlx5_age_info *age_info; 6738 struct mlx5_age_param *age_param; 6739 struct mlx5_counter_stats_raw *cur = pool->raw_hw; 6740 struct mlx5_counter_stats_raw *prev = pool->raw; 6741 uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10); 6742 uint32_t i; 6743 6744 for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) { 6745 cnt = MLX5_POOL_GET_CNT(pool, i); 6746 age_param = MLX5_CNT_TO_AGE(cnt); 6747 if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE) 6748 continue; 6749 if (cur->data[i].hits != prev->data[i].hits) { 6750 age_param->expire = curr + age_param->timeout; 6751 continue; 6752 } 6753 if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2)) 6754 continue; 6755 /** 6756 * Hold the lock first, or if between the 6757 * state AGE_TMOUT and tailq operation the 6758 * release happened, the release procedure 6759 * may delete a non-existent tailq node. 6760 */ 6761 priv = rte_eth_devices[age_param->port_id].data->dev_private; 6762 age_info = GET_PORT_AGE_INFO(priv); 6763 rte_spinlock_lock(&age_info->aged_sl); 6764 /* If the cpmset fails, release happens. */ 6765 if (rte_atomic16_cmpset((volatile uint16_t *) 6766 &age_param->state, 6767 AGE_CANDIDATE, 6768 AGE_TMOUT) == 6769 AGE_CANDIDATE) { 6770 TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next); 6771 MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW); 6772 } 6773 rte_spinlock_unlock(&age_info->aged_sl); 6774 } 6775 for (i = 0; i < sh->max_port; i++) { 6776 age_info = &sh->port[i].age_info; 6777 if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW)) 6778 continue; 6779 if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) 6780 rte_eth_dev_callback_process 6781 (&rte_eth_devices[sh->port[i].devx_ih_port_id], 6782 RTE_ETH_EVENT_FLOW_AGED, NULL); 6783 age_info->flags = 0; 6784 } 6785 } 6786 6787 /** 6788 * Handler for the HW respond about ready values from an asynchronous batch 6789 * query. This function is probably called by the host thread. 6790 * 6791 * @param[in] sh 6792 * The pointer to the shared device context. 6793 * @param[in] async_id 6794 * The Devx async ID. 6795 * @param[in] status 6796 * The status of the completion. 6797 */ 6798 void 6799 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh, 6800 uint64_t async_id, int status) 6801 { 6802 struct mlx5_flow_counter_pool *pool = 6803 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id; 6804 struct mlx5_counter_stats_raw *raw_to_free; 6805 uint8_t age = !!IS_AGE_POOL(pool); 6806 uint8_t query_gen = pool->query_gen ^ 1; 6807 struct mlx5_pools_container *cont = 6808 MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool), age); 6809 6810 if (unlikely(status)) { 6811 raw_to_free = pool->raw_hw; 6812 } else { 6813 raw_to_free = pool->raw; 6814 if (IS_AGE_POOL(pool)) 6815 mlx5_flow_aging_check(sh, pool); 6816 rte_spinlock_lock(&pool->sl); 6817 pool->raw = pool->raw_hw; 6818 rte_spinlock_unlock(&pool->sl); 6819 /* Be sure the new raw counters data is updated in memory. */ 6820 rte_io_wmb(); 6821 if (!TAILQ_EMPTY(&pool->counters[query_gen])) { 6822 rte_spinlock_lock(&cont->csl); 6823 TAILQ_CONCAT(&cont->counters, 6824 &pool->counters[query_gen], next); 6825 rte_spinlock_unlock(&cont->csl); 6826 } 6827 } 6828 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next); 6829 pool->raw_hw = NULL; 6830 sh->cmng.pending_queries--; 6831 } 6832 6833 /** 6834 * Translate the rte_flow group index to HW table value. 6835 * 6836 * @param[in] attributes 6837 * Pointer to flow attributes 6838 * @param[in] external 6839 * Value is part of flow rule created by request external to PMD. 6840 * @param[in] group 6841 * rte_flow group index value. 6842 * @param[out] fdb_def_rule 6843 * Whether fdb jump to table 1 is configured. 6844 * @param[out] table 6845 * HW table value. 6846 * @param[out] error 6847 * Pointer to error structure. 6848 * 6849 * @return 6850 * 0 on success, a negative errno value otherwise and rte_errno is set. 6851 */ 6852 int 6853 mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external, 6854 uint32_t group, bool fdb_def_rule, uint32_t *table, 6855 struct rte_flow_error *error) 6856 { 6857 if (attributes->transfer && external && fdb_def_rule) { 6858 if (group == UINT32_MAX) 6859 return rte_flow_error_set 6860 (error, EINVAL, 6861 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 6862 NULL, 6863 "group index not supported"); 6864 *table = group + 1; 6865 } else { 6866 *table = group; 6867 } 6868 return 0; 6869 } 6870 6871 /** 6872 * Discover availability of metadata reg_c's. 6873 * 6874 * Iteratively use test flows to check availability. 6875 * 6876 * @param[in] dev 6877 * Pointer to the Ethernet device structure. 6878 * 6879 * @return 6880 * 0 on success, a negative errno value otherwise and rte_errno is set. 6881 */ 6882 int 6883 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev) 6884 { 6885 struct mlx5_priv *priv = dev->data->dev_private; 6886 struct mlx5_dev_config *config = &priv->config; 6887 enum modify_reg idx; 6888 int n = 0; 6889 6890 /* reg_c[0] and reg_c[1] are reserved. */ 6891 config->flow_mreg_c[n++] = REG_C_0; 6892 config->flow_mreg_c[n++] = REG_C_1; 6893 /* Discover availability of other reg_c's. */ 6894 for (idx = REG_C_2; idx <= REG_C_7; ++idx) { 6895 struct rte_flow_attr attr = { 6896 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 6897 .priority = MLX5_FLOW_PRIO_RSVD, 6898 .ingress = 1, 6899 }; 6900 struct rte_flow_item items[] = { 6901 [0] = { 6902 .type = RTE_FLOW_ITEM_TYPE_END, 6903 }, 6904 }; 6905 struct rte_flow_action actions[] = { 6906 [0] = { 6907 .type = (enum rte_flow_action_type) 6908 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 6909 .conf = &(struct mlx5_flow_action_copy_mreg){ 6910 .src = REG_C_1, 6911 .dst = idx, 6912 }, 6913 }, 6914 [1] = { 6915 .type = RTE_FLOW_ACTION_TYPE_JUMP, 6916 .conf = &(struct rte_flow_action_jump){ 6917 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 6918 }, 6919 }, 6920 [2] = { 6921 .type = RTE_FLOW_ACTION_TYPE_END, 6922 }, 6923 }; 6924 uint32_t flow_idx; 6925 struct rte_flow *flow; 6926 struct rte_flow_error error; 6927 6928 if (!config->dv_flow_en) 6929 break; 6930 /* Create internal flow, validation skips copy action. */ 6931 flow_idx = flow_list_create(dev, NULL, &attr, items, 6932 actions, false, &error); 6933 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 6934 flow_idx); 6935 if (!flow) 6936 continue; 6937 if (dev->data->dev_started || !flow_drv_apply(dev, flow, NULL)) 6938 config->flow_mreg_c[n++] = idx; 6939 flow_list_destroy(dev, NULL, flow_idx); 6940 } 6941 for (; n < MLX5_MREG_C_NUM; ++n) 6942 config->flow_mreg_c[n] = REG_NON; 6943 return 0; 6944 } 6945 6946 /** 6947 * Dump flow raw hw data to file 6948 * 6949 * @param[in] dev 6950 * The pointer to Ethernet device. 6951 * @param[in] file 6952 * A pointer to a file for output. 6953 * @param[out] error 6954 * Perform verbose error reporting if not NULL. PMDs initialize this 6955 * structure in case of error only. 6956 * @return 6957 * 0 on success, a nagative value otherwise. 6958 */ 6959 int 6960 mlx5_flow_dev_dump(struct rte_eth_dev *dev, 6961 FILE *file, 6962 struct rte_flow_error *error __rte_unused) 6963 { 6964 struct mlx5_priv *priv = dev->data->dev_private; 6965 struct mlx5_dev_ctx_shared *sh = priv->sh; 6966 6967 if (!priv->config.dv_flow_en) { 6968 if (fputs("device dv flow disabled\n", file) <= 0) 6969 return -errno; 6970 return -ENOTSUP; 6971 } 6972 return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain, 6973 sh->tx_domain, file); 6974 } 6975 6976 /** 6977 * Get aged-out flows. 6978 * 6979 * @param[in] dev 6980 * Pointer to the Ethernet device structure. 6981 * @param[in] context 6982 * The address of an array of pointers to the aged-out flows contexts. 6983 * @param[in] nb_countexts 6984 * The length of context array pointers. 6985 * @param[out] error 6986 * Perform verbose error reporting if not NULL. Initialized in case of 6987 * error only. 6988 * 6989 * @return 6990 * how many contexts get in success, otherwise negative errno value. 6991 * if nb_contexts is 0, return the amount of all aged contexts. 6992 * if nb_contexts is not 0 , return the amount of aged flows reported 6993 * in the context array. 6994 */ 6995 int 6996 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts, 6997 uint32_t nb_contexts, struct rte_flow_error *error) 6998 { 6999 const struct mlx5_flow_driver_ops *fops; 7000 struct rte_flow_attr attr = { .transfer = 0 }; 7001 7002 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 7003 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7004 return fops->get_aged_flows(dev, contexts, nb_contexts, 7005 error); 7006 } 7007 DRV_LOG(ERR, 7008 "port %u get aged flows is not supported.", 7009 dev->data->port_id); 7010 return -ENOTSUP; 7011 } 7012