1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2016 Mellanox Technologies, Ltd 4 */ 5 6 #include <netinet/in.h> 7 #include <sys/queue.h> 8 #include <stdalign.h> 9 #include <stdint.h> 10 #include <string.h> 11 #include <stdbool.h> 12 13 #include <rte_common.h> 14 #include <rte_ether.h> 15 #include <rte_ethdev_driver.h> 16 #include <rte_flow.h> 17 #include <rte_cycles.h> 18 #include <rte_flow_driver.h> 19 #include <rte_malloc.h> 20 #include <rte_ip.h> 21 22 #include <mlx5_glue.h> 23 #include <mlx5_devx_cmds.h> 24 #include <mlx5_prm.h> 25 #include <mlx5_malloc.h> 26 27 #include "mlx5_defs.h" 28 #include "mlx5.h" 29 #include "mlx5_flow.h" 30 #include "mlx5_flow_os.h" 31 #include "mlx5_rxtx.h" 32 33 /** Device flow drivers. */ 34 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops; 35 36 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops; 37 38 const struct mlx5_flow_driver_ops *flow_drv_ops[] = { 39 [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops, 40 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 41 [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops, 42 #endif 43 [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops, 44 [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops 45 }; 46 47 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */ 48 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \ 49 (const int []){ \ 50 __VA_ARGS__, 0, \ 51 } 52 53 /** Node object of input graph for mlx5_flow_expand_rss(). */ 54 struct mlx5_flow_expand_node { 55 const int *const next; 56 /**< 57 * List of next node indexes. Index 0 is interpreted as a terminator. 58 */ 59 const enum rte_flow_item_type type; 60 /**< Pattern item type of current node. */ 61 uint64_t rss_types; 62 /**< 63 * RSS types bit-field associated with this node 64 * (see ETH_RSS_* definitions). 65 */ 66 }; 67 68 /** Object returned by mlx5_flow_expand_rss(). */ 69 struct mlx5_flow_expand_rss { 70 uint32_t entries; 71 /**< Number of entries @p patterns and @p priorities. */ 72 struct { 73 struct rte_flow_item *pattern; /**< Expanded pattern array. */ 74 uint32_t priority; /**< Priority offset for each expansion. */ 75 } entry[]; 76 }; 77 78 static enum rte_flow_item_type 79 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item) 80 { 81 enum rte_flow_item_type ret = RTE_FLOW_ITEM_TYPE_VOID; 82 uint16_t ether_type = 0; 83 uint16_t ether_type_m; 84 uint8_t ip_next_proto = 0; 85 uint8_t ip_next_proto_m; 86 87 if (item == NULL || item->spec == NULL) 88 return ret; 89 switch (item->type) { 90 case RTE_FLOW_ITEM_TYPE_ETH: 91 if (item->mask) 92 ether_type_m = ((const struct rte_flow_item_eth *) 93 (item->mask))->type; 94 else 95 ether_type_m = rte_flow_item_eth_mask.type; 96 if (ether_type_m != RTE_BE16(0xFFFF)) 97 break; 98 ether_type = ((const struct rte_flow_item_eth *) 99 (item->spec))->type; 100 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4) 101 ret = RTE_FLOW_ITEM_TYPE_IPV4; 102 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6) 103 ret = RTE_FLOW_ITEM_TYPE_IPV6; 104 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN) 105 ret = RTE_FLOW_ITEM_TYPE_VLAN; 106 else 107 ret = RTE_FLOW_ITEM_TYPE_END; 108 break; 109 case RTE_FLOW_ITEM_TYPE_VLAN: 110 if (item->mask) 111 ether_type_m = ((const struct rte_flow_item_vlan *) 112 (item->mask))->inner_type; 113 else 114 ether_type_m = rte_flow_item_vlan_mask.inner_type; 115 if (ether_type_m != RTE_BE16(0xFFFF)) 116 break; 117 ether_type = ((const struct rte_flow_item_vlan *) 118 (item->spec))->inner_type; 119 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4) 120 ret = RTE_FLOW_ITEM_TYPE_IPV4; 121 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6) 122 ret = RTE_FLOW_ITEM_TYPE_IPV6; 123 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN) 124 ret = RTE_FLOW_ITEM_TYPE_VLAN; 125 else 126 ret = RTE_FLOW_ITEM_TYPE_END; 127 break; 128 case RTE_FLOW_ITEM_TYPE_IPV4: 129 if (item->mask) 130 ip_next_proto_m = ((const struct rte_flow_item_ipv4 *) 131 (item->mask))->hdr.next_proto_id; 132 else 133 ip_next_proto_m = 134 rte_flow_item_ipv4_mask.hdr.next_proto_id; 135 if (ip_next_proto_m != 0xFF) 136 break; 137 ip_next_proto = ((const struct rte_flow_item_ipv4 *) 138 (item->spec))->hdr.next_proto_id; 139 if (ip_next_proto == IPPROTO_UDP) 140 ret = RTE_FLOW_ITEM_TYPE_UDP; 141 else if (ip_next_proto == IPPROTO_TCP) 142 ret = RTE_FLOW_ITEM_TYPE_TCP; 143 else if (ip_next_proto == IPPROTO_IP) 144 ret = RTE_FLOW_ITEM_TYPE_IPV4; 145 else if (ip_next_proto == IPPROTO_IPV6) 146 ret = RTE_FLOW_ITEM_TYPE_IPV6; 147 else 148 ret = RTE_FLOW_ITEM_TYPE_END; 149 break; 150 case RTE_FLOW_ITEM_TYPE_IPV6: 151 if (item->mask) 152 ip_next_proto_m = ((const struct rte_flow_item_ipv6 *) 153 (item->mask))->hdr.proto; 154 else 155 ip_next_proto_m = 156 rte_flow_item_ipv6_mask.hdr.proto; 157 if (ip_next_proto_m != 0xFF) 158 break; 159 ip_next_proto = ((const struct rte_flow_item_ipv6 *) 160 (item->spec))->hdr.proto; 161 if (ip_next_proto == IPPROTO_UDP) 162 ret = RTE_FLOW_ITEM_TYPE_UDP; 163 else if (ip_next_proto == IPPROTO_TCP) 164 ret = RTE_FLOW_ITEM_TYPE_TCP; 165 else if (ip_next_proto == IPPROTO_IP) 166 ret = RTE_FLOW_ITEM_TYPE_IPV4; 167 else if (ip_next_proto == IPPROTO_IPV6) 168 ret = RTE_FLOW_ITEM_TYPE_IPV6; 169 else 170 ret = RTE_FLOW_ITEM_TYPE_END; 171 break; 172 default: 173 ret = RTE_FLOW_ITEM_TYPE_VOID; 174 break; 175 } 176 return ret; 177 } 178 179 /** 180 * Expand RSS flows into several possible flows according to the RSS hash 181 * fields requested and the driver capabilities. 182 * 183 * @param[out] buf 184 * Buffer to store the result expansion. 185 * @param[in] size 186 * Buffer size in bytes. If 0, @p buf can be NULL. 187 * @param[in] pattern 188 * User flow pattern. 189 * @param[in] types 190 * RSS types to expand (see ETH_RSS_* definitions). 191 * @param[in] graph 192 * Input graph to expand @p pattern according to @p types. 193 * @param[in] graph_root_index 194 * Index of root node in @p graph, typically 0. 195 * 196 * @return 197 * A positive value representing the size of @p buf in bytes regardless of 198 * @p size on success, a negative errno value otherwise and rte_errno is 199 * set, the following errors are defined: 200 * 201 * -E2BIG: graph-depth @p graph is too deep. 202 */ 203 static int 204 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, 205 const struct rte_flow_item *pattern, uint64_t types, 206 const struct mlx5_flow_expand_node graph[], 207 int graph_root_index) 208 { 209 const int elt_n = 8; 210 const struct rte_flow_item *item; 211 const struct mlx5_flow_expand_node *node = &graph[graph_root_index]; 212 const int *next_node; 213 const int *stack[elt_n]; 214 int stack_pos = 0; 215 struct rte_flow_item flow_items[elt_n]; 216 unsigned int i; 217 size_t lsize; 218 size_t user_pattern_size = 0; 219 void *addr = NULL; 220 const struct mlx5_flow_expand_node *next = NULL; 221 struct rte_flow_item missed_item; 222 int missed = 0; 223 int elt = 0; 224 const struct rte_flow_item *last_item = NULL; 225 226 memset(&missed_item, 0, sizeof(missed_item)); 227 lsize = offsetof(struct mlx5_flow_expand_rss, entry) + 228 elt_n * sizeof(buf->entry[0]); 229 if (lsize <= size) { 230 buf->entry[0].priority = 0; 231 buf->entry[0].pattern = (void *)&buf->entry[elt_n]; 232 buf->entries = 0; 233 addr = buf->entry[0].pattern; 234 } 235 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 236 if (item->type != RTE_FLOW_ITEM_TYPE_VOID) 237 last_item = item; 238 for (i = 0; node->next && node->next[i]; ++i) { 239 next = &graph[node->next[i]]; 240 if (next->type == item->type) 241 break; 242 } 243 if (next) 244 node = next; 245 user_pattern_size += sizeof(*item); 246 } 247 user_pattern_size += sizeof(*item); /* Handle END item. */ 248 lsize += user_pattern_size; 249 /* Copy the user pattern in the first entry of the buffer. */ 250 if (lsize <= size) { 251 rte_memcpy(addr, pattern, user_pattern_size); 252 addr = (void *)(((uintptr_t)addr) + user_pattern_size); 253 buf->entries = 1; 254 } 255 /* Start expanding. */ 256 memset(flow_items, 0, sizeof(flow_items)); 257 user_pattern_size -= sizeof(*item); 258 /* 259 * Check if the last valid item has spec set, need complete pattern, 260 * and the pattern can be used for expansion. 261 */ 262 missed_item.type = mlx5_flow_expand_rss_item_complete(last_item); 263 if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) { 264 /* Item type END indicates expansion is not required. */ 265 return lsize; 266 } 267 if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) { 268 next = NULL; 269 missed = 1; 270 for (i = 0; node->next && node->next[i]; ++i) { 271 next = &graph[node->next[i]]; 272 if (next->type == missed_item.type) { 273 flow_items[0].type = missed_item.type; 274 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END; 275 break; 276 } 277 next = NULL; 278 } 279 } 280 if (next && missed) { 281 elt = 2; /* missed item + item end. */ 282 node = next; 283 lsize += elt * sizeof(*item) + user_pattern_size; 284 if ((node->rss_types & types) && lsize <= size) { 285 buf->entry[buf->entries].priority = 1; 286 buf->entry[buf->entries].pattern = addr; 287 buf->entries++; 288 rte_memcpy(addr, buf->entry[0].pattern, 289 user_pattern_size); 290 addr = (void *)(((uintptr_t)addr) + user_pattern_size); 291 rte_memcpy(addr, flow_items, elt * sizeof(*item)); 292 addr = (void *)(((uintptr_t)addr) + 293 elt * sizeof(*item)); 294 } 295 } 296 memset(flow_items, 0, sizeof(flow_items)); 297 next_node = node->next; 298 stack[stack_pos] = next_node; 299 node = next_node ? &graph[*next_node] : NULL; 300 while (node) { 301 flow_items[stack_pos].type = node->type; 302 if (node->rss_types & types) { 303 /* 304 * compute the number of items to copy from the 305 * expansion and copy it. 306 * When the stack_pos is 0, there are 1 element in it, 307 * plus the addition END item. 308 */ 309 elt = stack_pos + 2; 310 flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END; 311 lsize += elt * sizeof(*item) + user_pattern_size; 312 if (lsize <= size) { 313 size_t n = elt * sizeof(*item); 314 315 buf->entry[buf->entries].priority = 316 stack_pos + 1 + missed; 317 buf->entry[buf->entries].pattern = addr; 318 buf->entries++; 319 rte_memcpy(addr, buf->entry[0].pattern, 320 user_pattern_size); 321 addr = (void *)(((uintptr_t)addr) + 322 user_pattern_size); 323 rte_memcpy(addr, &missed_item, 324 missed * sizeof(*item)); 325 addr = (void *)(((uintptr_t)addr) + 326 missed * sizeof(*item)); 327 rte_memcpy(addr, flow_items, n); 328 addr = (void *)(((uintptr_t)addr) + n); 329 } 330 } 331 /* Go deeper. */ 332 if (node->next) { 333 next_node = node->next; 334 if (stack_pos++ == elt_n) { 335 rte_errno = E2BIG; 336 return -rte_errno; 337 } 338 stack[stack_pos] = next_node; 339 } else if (*(next_node + 1)) { 340 /* Follow up with the next possibility. */ 341 ++next_node; 342 } else { 343 /* Move to the next path. */ 344 if (stack_pos) 345 next_node = stack[--stack_pos]; 346 next_node++; 347 stack[stack_pos] = next_node; 348 } 349 node = *next_node ? &graph[*next_node] : NULL; 350 }; 351 /* no expanded flows but we have missed item, create one rule for it */ 352 if (buf->entries == 1 && missed != 0) { 353 elt = 2; 354 lsize += elt * sizeof(*item) + user_pattern_size; 355 if (lsize <= size) { 356 buf->entry[buf->entries].priority = 1; 357 buf->entry[buf->entries].pattern = addr; 358 buf->entries++; 359 flow_items[0].type = missed_item.type; 360 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END; 361 rte_memcpy(addr, buf->entry[0].pattern, 362 user_pattern_size); 363 addr = (void *)(((uintptr_t)addr) + user_pattern_size); 364 rte_memcpy(addr, flow_items, elt * sizeof(*item)); 365 addr = (void *)(((uintptr_t)addr) + 366 elt * sizeof(*item)); 367 } 368 } 369 return lsize; 370 } 371 372 enum mlx5_expansion { 373 MLX5_EXPANSION_ROOT, 374 MLX5_EXPANSION_ROOT_OUTER, 375 MLX5_EXPANSION_ROOT_ETH_VLAN, 376 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN, 377 MLX5_EXPANSION_OUTER_ETH, 378 MLX5_EXPANSION_OUTER_ETH_VLAN, 379 MLX5_EXPANSION_OUTER_VLAN, 380 MLX5_EXPANSION_OUTER_IPV4, 381 MLX5_EXPANSION_OUTER_IPV4_UDP, 382 MLX5_EXPANSION_OUTER_IPV4_TCP, 383 MLX5_EXPANSION_OUTER_IPV6, 384 MLX5_EXPANSION_OUTER_IPV6_UDP, 385 MLX5_EXPANSION_OUTER_IPV6_TCP, 386 MLX5_EXPANSION_VXLAN, 387 MLX5_EXPANSION_VXLAN_GPE, 388 MLX5_EXPANSION_GRE, 389 MLX5_EXPANSION_MPLS, 390 MLX5_EXPANSION_ETH, 391 MLX5_EXPANSION_ETH_VLAN, 392 MLX5_EXPANSION_VLAN, 393 MLX5_EXPANSION_IPV4, 394 MLX5_EXPANSION_IPV4_UDP, 395 MLX5_EXPANSION_IPV4_TCP, 396 MLX5_EXPANSION_IPV6, 397 MLX5_EXPANSION_IPV6_UDP, 398 MLX5_EXPANSION_IPV6_TCP, 399 }; 400 401 /** Supported expansion of items. */ 402 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = { 403 [MLX5_EXPANSION_ROOT] = { 404 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 405 MLX5_EXPANSION_IPV4, 406 MLX5_EXPANSION_IPV6), 407 .type = RTE_FLOW_ITEM_TYPE_END, 408 }, 409 [MLX5_EXPANSION_ROOT_OUTER] = { 410 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH, 411 MLX5_EXPANSION_OUTER_IPV4, 412 MLX5_EXPANSION_OUTER_IPV6), 413 .type = RTE_FLOW_ITEM_TYPE_END, 414 }, 415 [MLX5_EXPANSION_ROOT_ETH_VLAN] = { 416 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN), 417 .type = RTE_FLOW_ITEM_TYPE_END, 418 }, 419 [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = { 420 .next = MLX5_FLOW_EXPAND_RSS_NEXT 421 (MLX5_EXPANSION_OUTER_ETH_VLAN), 422 .type = RTE_FLOW_ITEM_TYPE_END, 423 }, 424 [MLX5_EXPANSION_OUTER_ETH] = { 425 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 426 MLX5_EXPANSION_OUTER_IPV6, 427 MLX5_EXPANSION_MPLS), 428 .type = RTE_FLOW_ITEM_TYPE_ETH, 429 .rss_types = 0, 430 }, 431 [MLX5_EXPANSION_OUTER_ETH_VLAN] = { 432 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN), 433 .type = RTE_FLOW_ITEM_TYPE_ETH, 434 .rss_types = 0, 435 }, 436 [MLX5_EXPANSION_OUTER_VLAN] = { 437 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 438 MLX5_EXPANSION_OUTER_IPV6), 439 .type = RTE_FLOW_ITEM_TYPE_VLAN, 440 }, 441 [MLX5_EXPANSION_OUTER_IPV4] = { 442 .next = MLX5_FLOW_EXPAND_RSS_NEXT 443 (MLX5_EXPANSION_OUTER_IPV4_UDP, 444 MLX5_EXPANSION_OUTER_IPV4_TCP, 445 MLX5_EXPANSION_GRE, 446 MLX5_EXPANSION_IPV4, 447 MLX5_EXPANSION_IPV6), 448 .type = RTE_FLOW_ITEM_TYPE_IPV4, 449 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 450 ETH_RSS_NONFRAG_IPV4_OTHER, 451 }, 452 [MLX5_EXPANSION_OUTER_IPV4_UDP] = { 453 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 454 MLX5_EXPANSION_VXLAN_GPE), 455 .type = RTE_FLOW_ITEM_TYPE_UDP, 456 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 457 }, 458 [MLX5_EXPANSION_OUTER_IPV4_TCP] = { 459 .type = RTE_FLOW_ITEM_TYPE_TCP, 460 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 461 }, 462 [MLX5_EXPANSION_OUTER_IPV6] = { 463 .next = MLX5_FLOW_EXPAND_RSS_NEXT 464 (MLX5_EXPANSION_OUTER_IPV6_UDP, 465 MLX5_EXPANSION_OUTER_IPV6_TCP, 466 MLX5_EXPANSION_IPV4, 467 MLX5_EXPANSION_IPV6), 468 .type = RTE_FLOW_ITEM_TYPE_IPV6, 469 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 470 ETH_RSS_NONFRAG_IPV6_OTHER, 471 }, 472 [MLX5_EXPANSION_OUTER_IPV6_UDP] = { 473 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 474 MLX5_EXPANSION_VXLAN_GPE), 475 .type = RTE_FLOW_ITEM_TYPE_UDP, 476 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 477 }, 478 [MLX5_EXPANSION_OUTER_IPV6_TCP] = { 479 .type = RTE_FLOW_ITEM_TYPE_TCP, 480 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 481 }, 482 [MLX5_EXPANSION_VXLAN] = { 483 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 484 MLX5_EXPANSION_IPV4, 485 MLX5_EXPANSION_IPV6), 486 .type = RTE_FLOW_ITEM_TYPE_VXLAN, 487 }, 488 [MLX5_EXPANSION_VXLAN_GPE] = { 489 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 490 MLX5_EXPANSION_IPV4, 491 MLX5_EXPANSION_IPV6), 492 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE, 493 }, 494 [MLX5_EXPANSION_GRE] = { 495 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4), 496 .type = RTE_FLOW_ITEM_TYPE_GRE, 497 }, 498 [MLX5_EXPANSION_MPLS] = { 499 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 500 MLX5_EXPANSION_IPV6), 501 .type = RTE_FLOW_ITEM_TYPE_MPLS, 502 }, 503 [MLX5_EXPANSION_ETH] = { 504 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 505 MLX5_EXPANSION_IPV6), 506 .type = RTE_FLOW_ITEM_TYPE_ETH, 507 }, 508 [MLX5_EXPANSION_ETH_VLAN] = { 509 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN), 510 .type = RTE_FLOW_ITEM_TYPE_ETH, 511 }, 512 [MLX5_EXPANSION_VLAN] = { 513 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 514 MLX5_EXPANSION_IPV6), 515 .type = RTE_FLOW_ITEM_TYPE_VLAN, 516 }, 517 [MLX5_EXPANSION_IPV4] = { 518 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP, 519 MLX5_EXPANSION_IPV4_TCP), 520 .type = RTE_FLOW_ITEM_TYPE_IPV4, 521 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 522 ETH_RSS_NONFRAG_IPV4_OTHER, 523 }, 524 [MLX5_EXPANSION_IPV4_UDP] = { 525 .type = RTE_FLOW_ITEM_TYPE_UDP, 526 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 527 }, 528 [MLX5_EXPANSION_IPV4_TCP] = { 529 .type = RTE_FLOW_ITEM_TYPE_TCP, 530 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 531 }, 532 [MLX5_EXPANSION_IPV6] = { 533 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP, 534 MLX5_EXPANSION_IPV6_TCP), 535 .type = RTE_FLOW_ITEM_TYPE_IPV6, 536 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 537 ETH_RSS_NONFRAG_IPV6_OTHER, 538 }, 539 [MLX5_EXPANSION_IPV6_UDP] = { 540 .type = RTE_FLOW_ITEM_TYPE_UDP, 541 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 542 }, 543 [MLX5_EXPANSION_IPV6_TCP] = { 544 .type = RTE_FLOW_ITEM_TYPE_TCP, 545 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 546 }, 547 }; 548 549 static const struct rte_flow_ops mlx5_flow_ops = { 550 .validate = mlx5_flow_validate, 551 .create = mlx5_flow_create, 552 .destroy = mlx5_flow_destroy, 553 .flush = mlx5_flow_flush, 554 .isolate = mlx5_flow_isolate, 555 .query = mlx5_flow_query, 556 .dev_dump = mlx5_flow_dev_dump, 557 .get_aged_flows = mlx5_flow_get_aged_flows, 558 }; 559 560 /* Convert FDIR request to Generic flow. */ 561 struct mlx5_fdir { 562 struct rte_flow_attr attr; 563 struct rte_flow_item items[4]; 564 struct rte_flow_item_eth l2; 565 struct rte_flow_item_eth l2_mask; 566 union { 567 struct rte_flow_item_ipv4 ipv4; 568 struct rte_flow_item_ipv6 ipv6; 569 } l3; 570 union { 571 struct rte_flow_item_ipv4 ipv4; 572 struct rte_flow_item_ipv6 ipv6; 573 } l3_mask; 574 union { 575 struct rte_flow_item_udp udp; 576 struct rte_flow_item_tcp tcp; 577 } l4; 578 union { 579 struct rte_flow_item_udp udp; 580 struct rte_flow_item_tcp tcp; 581 } l4_mask; 582 struct rte_flow_action actions[2]; 583 struct rte_flow_action_queue queue; 584 }; 585 586 /* Tunnel information. */ 587 struct mlx5_flow_tunnel_info { 588 uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */ 589 uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */ 590 }; 591 592 static struct mlx5_flow_tunnel_info tunnels_info[] = { 593 { 594 .tunnel = MLX5_FLOW_LAYER_VXLAN, 595 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP, 596 }, 597 { 598 .tunnel = MLX5_FLOW_LAYER_GENEVE, 599 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP, 600 }, 601 { 602 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE, 603 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP, 604 }, 605 { 606 .tunnel = MLX5_FLOW_LAYER_GRE, 607 .ptype = RTE_PTYPE_TUNNEL_GRE, 608 }, 609 { 610 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP, 611 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP, 612 }, 613 { 614 .tunnel = MLX5_FLOW_LAYER_MPLS, 615 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE, 616 }, 617 { 618 .tunnel = MLX5_FLOW_LAYER_NVGRE, 619 .ptype = RTE_PTYPE_TUNNEL_NVGRE, 620 }, 621 { 622 .tunnel = MLX5_FLOW_LAYER_IPIP, 623 .ptype = RTE_PTYPE_TUNNEL_IP, 624 }, 625 { 626 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP, 627 .ptype = RTE_PTYPE_TUNNEL_IP, 628 }, 629 { 630 .tunnel = MLX5_FLOW_LAYER_GTP, 631 .ptype = RTE_PTYPE_TUNNEL_GTPU, 632 }, 633 }; 634 635 /** 636 * Translate tag ID to register. 637 * 638 * @param[in] dev 639 * Pointer to the Ethernet device structure. 640 * @param[in] feature 641 * The feature that request the register. 642 * @param[in] id 643 * The request register ID. 644 * @param[out] error 645 * Error description in case of any. 646 * 647 * @return 648 * The request register on success, a negative errno 649 * value otherwise and rte_errno is set. 650 */ 651 int 652 mlx5_flow_get_reg_id(struct rte_eth_dev *dev, 653 enum mlx5_feature_name feature, 654 uint32_t id, 655 struct rte_flow_error *error) 656 { 657 struct mlx5_priv *priv = dev->data->dev_private; 658 struct mlx5_dev_config *config = &priv->config; 659 enum modify_reg start_reg; 660 bool skip_mtr_reg = false; 661 662 switch (feature) { 663 case MLX5_HAIRPIN_RX: 664 return REG_B; 665 case MLX5_HAIRPIN_TX: 666 return REG_A; 667 case MLX5_METADATA_RX: 668 switch (config->dv_xmeta_en) { 669 case MLX5_XMETA_MODE_LEGACY: 670 return REG_B; 671 case MLX5_XMETA_MODE_META16: 672 return REG_C_0; 673 case MLX5_XMETA_MODE_META32: 674 return REG_C_1; 675 } 676 break; 677 case MLX5_METADATA_TX: 678 return REG_A; 679 case MLX5_METADATA_FDB: 680 switch (config->dv_xmeta_en) { 681 case MLX5_XMETA_MODE_LEGACY: 682 return REG_NON; 683 case MLX5_XMETA_MODE_META16: 684 return REG_C_0; 685 case MLX5_XMETA_MODE_META32: 686 return REG_C_1; 687 } 688 break; 689 case MLX5_FLOW_MARK: 690 switch (config->dv_xmeta_en) { 691 case MLX5_XMETA_MODE_LEGACY: 692 return REG_NON; 693 case MLX5_XMETA_MODE_META16: 694 return REG_C_1; 695 case MLX5_XMETA_MODE_META32: 696 return REG_C_0; 697 } 698 break; 699 case MLX5_MTR_SFX: 700 /* 701 * If meter color and flow match share one register, flow match 702 * should use the meter color register for match. 703 */ 704 if (priv->mtr_reg_share) 705 return priv->mtr_color_reg; 706 else 707 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 708 REG_C_3; 709 case MLX5_MTR_COLOR: 710 MLX5_ASSERT(priv->mtr_color_reg != REG_NON); 711 return priv->mtr_color_reg; 712 case MLX5_COPY_MARK: 713 /* 714 * Metadata COPY_MARK register using is in meter suffix sub 715 * flow while with meter. It's safe to share the same register. 716 */ 717 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3; 718 case MLX5_APP_TAG: 719 /* 720 * If meter is enable, it will engage the register for color 721 * match and flow match. If meter color match is not using the 722 * REG_C_2, need to skip the REG_C_x be used by meter color 723 * match. 724 * If meter is disable, free to use all available registers. 725 */ 726 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 727 (priv->mtr_reg_share ? REG_C_3 : REG_C_4); 728 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2); 729 if (id > (REG_C_7 - start_reg)) 730 return rte_flow_error_set(error, EINVAL, 731 RTE_FLOW_ERROR_TYPE_ITEM, 732 NULL, "invalid tag id"); 733 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON) 734 return rte_flow_error_set(error, ENOTSUP, 735 RTE_FLOW_ERROR_TYPE_ITEM, 736 NULL, "unsupported tag id"); 737 /* 738 * This case means meter is using the REG_C_x great than 2. 739 * Take care not to conflict with meter color REG_C_x. 740 * If the available index REG_C_y >= REG_C_x, skip the 741 * color register. 742 */ 743 if (skip_mtr_reg && config->flow_mreg_c 744 [id + start_reg - REG_C_0] >= priv->mtr_color_reg) { 745 if (id >= (REG_C_7 - start_reg)) 746 return rte_flow_error_set(error, EINVAL, 747 RTE_FLOW_ERROR_TYPE_ITEM, 748 NULL, "invalid tag id"); 749 if (config->flow_mreg_c 750 [id + 1 + start_reg - REG_C_0] != REG_NON) 751 return config->flow_mreg_c 752 [id + 1 + start_reg - REG_C_0]; 753 return rte_flow_error_set(error, ENOTSUP, 754 RTE_FLOW_ERROR_TYPE_ITEM, 755 NULL, "unsupported tag id"); 756 } 757 return config->flow_mreg_c[id + start_reg - REG_C_0]; 758 } 759 MLX5_ASSERT(false); 760 return rte_flow_error_set(error, EINVAL, 761 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 762 NULL, "invalid feature name"); 763 } 764 765 /** 766 * Check extensive flow metadata register support. 767 * 768 * @param dev 769 * Pointer to rte_eth_dev structure. 770 * 771 * @return 772 * True if device supports extensive flow metadata register, otherwise false. 773 */ 774 bool 775 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev) 776 { 777 struct mlx5_priv *priv = dev->data->dev_private; 778 struct mlx5_dev_config *config = &priv->config; 779 780 /* 781 * Having available reg_c can be regarded inclusively as supporting 782 * extensive flow metadata register, which could mean, 783 * - metadata register copy action by modify header. 784 * - 16 modify header actions is supported. 785 * - reg_c's are preserved across different domain (FDB and NIC) on 786 * packet loopback by flow lookup miss. 787 */ 788 return config->flow_mreg_c[2] != REG_NON; 789 } 790 791 /** 792 * Verify the @p item specifications (spec, last, mask) are compatible with the 793 * NIC capabilities. 794 * 795 * @param[in] item 796 * Item specification. 797 * @param[in] mask 798 * @p item->mask or flow default bit-masks. 799 * @param[in] nic_mask 800 * Bit-masks covering supported fields by the NIC to compare with user mask. 801 * @param[in] size 802 * Bit-masks size in bytes. 803 * @param[out] error 804 * Pointer to error structure. 805 * 806 * @return 807 * 0 on success, a negative errno value otherwise and rte_errno is set. 808 */ 809 int 810 mlx5_flow_item_acceptable(const struct rte_flow_item *item, 811 const uint8_t *mask, 812 const uint8_t *nic_mask, 813 unsigned int size, 814 struct rte_flow_error *error) 815 { 816 unsigned int i; 817 818 MLX5_ASSERT(nic_mask); 819 for (i = 0; i < size; ++i) 820 if ((nic_mask[i] | mask[i]) != nic_mask[i]) 821 return rte_flow_error_set(error, ENOTSUP, 822 RTE_FLOW_ERROR_TYPE_ITEM, 823 item, 824 "mask enables non supported" 825 " bits"); 826 if (!item->spec && (item->mask || item->last)) 827 return rte_flow_error_set(error, EINVAL, 828 RTE_FLOW_ERROR_TYPE_ITEM, item, 829 "mask/last without a spec is not" 830 " supported"); 831 if (item->spec && item->last) { 832 uint8_t spec[size]; 833 uint8_t last[size]; 834 unsigned int i; 835 int ret; 836 837 for (i = 0; i < size; ++i) { 838 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i]; 839 last[i] = ((const uint8_t *)item->last)[i] & mask[i]; 840 } 841 ret = memcmp(spec, last, size); 842 if (ret != 0) 843 return rte_flow_error_set(error, EINVAL, 844 RTE_FLOW_ERROR_TYPE_ITEM, 845 item, 846 "range is not valid"); 847 } 848 return 0; 849 } 850 851 /** 852 * Adjust the hash fields according to the @p flow information. 853 * 854 * @param[in] dev_flow. 855 * Pointer to the mlx5_flow. 856 * @param[in] tunnel 857 * 1 when the hash field is for a tunnel item. 858 * @param[in] layer_types 859 * ETH_RSS_* types. 860 * @param[in] hash_fields 861 * Item hash fields. 862 * 863 * @return 864 * The hash fields that should be used. 865 */ 866 uint64_t 867 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc, 868 int tunnel __rte_unused, uint64_t layer_types, 869 uint64_t hash_fields) 870 { 871 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 872 int rss_request_inner = rss_desc->level >= 2; 873 874 /* Check RSS hash level for tunnel. */ 875 if (tunnel && rss_request_inner) 876 hash_fields |= IBV_RX_HASH_INNER; 877 else if (tunnel || rss_request_inner) 878 return 0; 879 #endif 880 /* Check if requested layer matches RSS hash fields. */ 881 if (!(rss_desc->types & layer_types)) 882 return 0; 883 return hash_fields; 884 } 885 886 /** 887 * Lookup and set the ptype in the data Rx part. A single Ptype can be used, 888 * if several tunnel rules are used on this queue, the tunnel ptype will be 889 * cleared. 890 * 891 * @param rxq_ctrl 892 * Rx queue to update. 893 */ 894 static void 895 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl) 896 { 897 unsigned int i; 898 uint32_t tunnel_ptype = 0; 899 900 /* Look up for the ptype to use. */ 901 for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) { 902 if (!rxq_ctrl->flow_tunnels_n[i]) 903 continue; 904 if (!tunnel_ptype) { 905 tunnel_ptype = tunnels_info[i].ptype; 906 } else { 907 tunnel_ptype = 0; 908 break; 909 } 910 } 911 rxq_ctrl->rxq.tunnel = tunnel_ptype; 912 } 913 914 /** 915 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive 916 * flow. 917 * 918 * @param[in] dev 919 * Pointer to the Ethernet device structure. 920 * @param[in] dev_handle 921 * Pointer to device flow handle structure. 922 */ 923 static void 924 flow_drv_rxq_flags_set(struct rte_eth_dev *dev, 925 struct mlx5_flow_handle *dev_handle) 926 { 927 struct mlx5_priv *priv = dev->data->dev_private; 928 const int mark = dev_handle->mark; 929 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 930 struct mlx5_hrxq *hrxq; 931 unsigned int i; 932 933 if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) 934 return; 935 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 936 dev_handle->rix_hrxq); 937 if (!hrxq) 938 return; 939 for (i = 0; i != hrxq->ind_table->queues_n; ++i) { 940 int idx = hrxq->ind_table->queues[i]; 941 struct mlx5_rxq_ctrl *rxq_ctrl = 942 container_of((*priv->rxqs)[idx], 943 struct mlx5_rxq_ctrl, rxq); 944 945 /* 946 * To support metadata register copy on Tx loopback, 947 * this must be always enabled (metadata may arive 948 * from other port - not from local flows only. 949 */ 950 if (priv->config.dv_flow_en && 951 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 952 mlx5_flow_ext_mreg_supported(dev)) { 953 rxq_ctrl->rxq.mark = 1; 954 rxq_ctrl->flow_mark_n = 1; 955 } else if (mark) { 956 rxq_ctrl->rxq.mark = 1; 957 rxq_ctrl->flow_mark_n++; 958 } 959 if (tunnel) { 960 unsigned int j; 961 962 /* Increase the counter matching the flow. */ 963 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 964 if ((tunnels_info[j].tunnel & 965 dev_handle->layers) == 966 tunnels_info[j].tunnel) { 967 rxq_ctrl->flow_tunnels_n[j]++; 968 break; 969 } 970 } 971 flow_rxq_tunnel_ptype_update(rxq_ctrl); 972 } 973 } 974 } 975 976 /** 977 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow 978 * 979 * @param[in] dev 980 * Pointer to the Ethernet device structure. 981 * @param[in] flow 982 * Pointer to flow structure. 983 */ 984 static void 985 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow) 986 { 987 struct mlx5_priv *priv = dev->data->dev_private; 988 uint32_t handle_idx; 989 struct mlx5_flow_handle *dev_handle; 990 991 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 992 handle_idx, dev_handle, next) 993 flow_drv_rxq_flags_set(dev, dev_handle); 994 } 995 996 /** 997 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 998 * device flow if no other flow uses it with the same kind of request. 999 * 1000 * @param dev 1001 * Pointer to Ethernet device. 1002 * @param[in] dev_handle 1003 * Pointer to the device flow handle structure. 1004 */ 1005 static void 1006 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, 1007 struct mlx5_flow_handle *dev_handle) 1008 { 1009 struct mlx5_priv *priv = dev->data->dev_private; 1010 const int mark = dev_handle->mark; 1011 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 1012 struct mlx5_hrxq *hrxq; 1013 unsigned int i; 1014 1015 if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) 1016 return; 1017 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 1018 dev_handle->rix_hrxq); 1019 if (!hrxq) 1020 return; 1021 MLX5_ASSERT(dev->data->dev_started); 1022 for (i = 0; i != hrxq->ind_table->queues_n; ++i) { 1023 int idx = hrxq->ind_table->queues[i]; 1024 struct mlx5_rxq_ctrl *rxq_ctrl = 1025 container_of((*priv->rxqs)[idx], 1026 struct mlx5_rxq_ctrl, rxq); 1027 1028 if (priv->config.dv_flow_en && 1029 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 1030 mlx5_flow_ext_mreg_supported(dev)) { 1031 rxq_ctrl->rxq.mark = 1; 1032 rxq_ctrl->flow_mark_n = 1; 1033 } else if (mark) { 1034 rxq_ctrl->flow_mark_n--; 1035 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n; 1036 } 1037 if (tunnel) { 1038 unsigned int j; 1039 1040 /* Decrease the counter matching the flow. */ 1041 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 1042 if ((tunnels_info[j].tunnel & 1043 dev_handle->layers) == 1044 tunnels_info[j].tunnel) { 1045 rxq_ctrl->flow_tunnels_n[j]--; 1046 break; 1047 } 1048 } 1049 flow_rxq_tunnel_ptype_update(rxq_ctrl); 1050 } 1051 } 1052 } 1053 1054 /** 1055 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 1056 * @p flow if no other flow uses it with the same kind of request. 1057 * 1058 * @param dev 1059 * Pointer to Ethernet device. 1060 * @param[in] flow 1061 * Pointer to the flow. 1062 */ 1063 static void 1064 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow) 1065 { 1066 struct mlx5_priv *priv = dev->data->dev_private; 1067 uint32_t handle_idx; 1068 struct mlx5_flow_handle *dev_handle; 1069 1070 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 1071 handle_idx, dev_handle, next) 1072 flow_drv_rxq_flags_trim(dev, dev_handle); 1073 } 1074 1075 /** 1076 * Clear the Mark/Flag and Tunnel ptype information in all Rx queues. 1077 * 1078 * @param dev 1079 * Pointer to Ethernet device. 1080 */ 1081 static void 1082 flow_rxq_flags_clear(struct rte_eth_dev *dev) 1083 { 1084 struct mlx5_priv *priv = dev->data->dev_private; 1085 unsigned int i; 1086 1087 for (i = 0; i != priv->rxqs_n; ++i) { 1088 struct mlx5_rxq_ctrl *rxq_ctrl; 1089 unsigned int j; 1090 1091 if (!(*priv->rxqs)[i]) 1092 continue; 1093 rxq_ctrl = container_of((*priv->rxqs)[i], 1094 struct mlx5_rxq_ctrl, rxq); 1095 rxq_ctrl->flow_mark_n = 0; 1096 rxq_ctrl->rxq.mark = 0; 1097 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) 1098 rxq_ctrl->flow_tunnels_n[j] = 0; 1099 rxq_ctrl->rxq.tunnel = 0; 1100 } 1101 } 1102 1103 /** 1104 * Set the Rx queue dynamic metadata (mask and offset) for a flow 1105 * 1106 * @param[in] dev 1107 * Pointer to the Ethernet device structure. 1108 */ 1109 void 1110 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev) 1111 { 1112 struct mlx5_priv *priv = dev->data->dev_private; 1113 struct mlx5_rxq_data *data; 1114 unsigned int i; 1115 1116 for (i = 0; i != priv->rxqs_n; ++i) { 1117 if (!(*priv->rxqs)[i]) 1118 continue; 1119 data = (*priv->rxqs)[i]; 1120 if (!rte_flow_dynf_metadata_avail()) { 1121 data->dynf_meta = 0; 1122 data->flow_meta_mask = 0; 1123 data->flow_meta_offset = -1; 1124 } else { 1125 data->dynf_meta = 1; 1126 data->flow_meta_mask = rte_flow_dynf_metadata_mask; 1127 data->flow_meta_offset = rte_flow_dynf_metadata_offs; 1128 } 1129 } 1130 } 1131 1132 /* 1133 * return a pointer to the desired action in the list of actions. 1134 * 1135 * @param[in] actions 1136 * The list of actions to search the action in. 1137 * @param[in] action 1138 * The action to find. 1139 * 1140 * @return 1141 * Pointer to the action in the list, if found. NULL otherwise. 1142 */ 1143 const struct rte_flow_action * 1144 mlx5_flow_find_action(const struct rte_flow_action *actions, 1145 enum rte_flow_action_type action) 1146 { 1147 if (actions == NULL) 1148 return NULL; 1149 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) 1150 if (actions->type == action) 1151 return actions; 1152 return NULL; 1153 } 1154 1155 /* 1156 * Validate the flag action. 1157 * 1158 * @param[in] action_flags 1159 * Bit-fields that holds the actions detected until now. 1160 * @param[in] attr 1161 * Attributes of flow that includes this action. 1162 * @param[out] error 1163 * Pointer to error structure. 1164 * 1165 * @return 1166 * 0 on success, a negative errno value otherwise and rte_errno is set. 1167 */ 1168 int 1169 mlx5_flow_validate_action_flag(uint64_t action_flags, 1170 const struct rte_flow_attr *attr, 1171 struct rte_flow_error *error) 1172 { 1173 if (action_flags & MLX5_FLOW_ACTION_MARK) 1174 return rte_flow_error_set(error, EINVAL, 1175 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1176 "can't mark and flag in same flow"); 1177 if (action_flags & MLX5_FLOW_ACTION_FLAG) 1178 return rte_flow_error_set(error, EINVAL, 1179 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1180 "can't have 2 flag" 1181 " actions in same flow"); 1182 if (attr->egress) 1183 return rte_flow_error_set(error, ENOTSUP, 1184 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1185 "flag action not supported for " 1186 "egress"); 1187 return 0; 1188 } 1189 1190 /* 1191 * Validate the mark action. 1192 * 1193 * @param[in] action 1194 * Pointer to the queue action. 1195 * @param[in] action_flags 1196 * Bit-fields that holds the actions detected until now. 1197 * @param[in] attr 1198 * Attributes of flow that includes this action. 1199 * @param[out] error 1200 * Pointer to error structure. 1201 * 1202 * @return 1203 * 0 on success, a negative errno value otherwise and rte_errno is set. 1204 */ 1205 int 1206 mlx5_flow_validate_action_mark(const struct rte_flow_action *action, 1207 uint64_t action_flags, 1208 const struct rte_flow_attr *attr, 1209 struct rte_flow_error *error) 1210 { 1211 const struct rte_flow_action_mark *mark = action->conf; 1212 1213 if (!mark) 1214 return rte_flow_error_set(error, EINVAL, 1215 RTE_FLOW_ERROR_TYPE_ACTION, 1216 action, 1217 "configuration cannot be null"); 1218 if (mark->id >= MLX5_FLOW_MARK_MAX) 1219 return rte_flow_error_set(error, EINVAL, 1220 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1221 &mark->id, 1222 "mark id must in 0 <= id < " 1223 RTE_STR(MLX5_FLOW_MARK_MAX)); 1224 if (action_flags & MLX5_FLOW_ACTION_FLAG) 1225 return rte_flow_error_set(error, EINVAL, 1226 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1227 "can't flag and mark in same flow"); 1228 if (action_flags & MLX5_FLOW_ACTION_MARK) 1229 return rte_flow_error_set(error, EINVAL, 1230 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1231 "can't have 2 mark actions in same" 1232 " flow"); 1233 if (attr->egress) 1234 return rte_flow_error_set(error, ENOTSUP, 1235 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1236 "mark action not supported for " 1237 "egress"); 1238 return 0; 1239 } 1240 1241 /* 1242 * Validate the drop action. 1243 * 1244 * @param[in] action_flags 1245 * Bit-fields that holds the actions detected until now. 1246 * @param[in] attr 1247 * Attributes of flow that includes this action. 1248 * @param[out] error 1249 * Pointer to error structure. 1250 * 1251 * @return 1252 * 0 on success, a negative errno value otherwise and rte_errno is set. 1253 */ 1254 int 1255 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused, 1256 const struct rte_flow_attr *attr, 1257 struct rte_flow_error *error) 1258 { 1259 if (attr->egress) 1260 return rte_flow_error_set(error, ENOTSUP, 1261 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1262 "drop action not supported for " 1263 "egress"); 1264 return 0; 1265 } 1266 1267 /* 1268 * Validate the queue action. 1269 * 1270 * @param[in] action 1271 * Pointer to the queue action. 1272 * @param[in] action_flags 1273 * Bit-fields that holds the actions detected until now. 1274 * @param[in] dev 1275 * Pointer to the Ethernet device structure. 1276 * @param[in] attr 1277 * Attributes of flow that includes this action. 1278 * @param[out] error 1279 * Pointer to error structure. 1280 * 1281 * @return 1282 * 0 on success, a negative errno value otherwise and rte_errno is set. 1283 */ 1284 int 1285 mlx5_flow_validate_action_queue(const struct rte_flow_action *action, 1286 uint64_t action_flags, 1287 struct rte_eth_dev *dev, 1288 const struct rte_flow_attr *attr, 1289 struct rte_flow_error *error) 1290 { 1291 struct mlx5_priv *priv = dev->data->dev_private; 1292 const struct rte_flow_action_queue *queue = action->conf; 1293 1294 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1295 return rte_flow_error_set(error, EINVAL, 1296 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1297 "can't have 2 fate actions in" 1298 " same flow"); 1299 if (!priv->rxqs_n) 1300 return rte_flow_error_set(error, EINVAL, 1301 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1302 NULL, "No Rx queues configured"); 1303 if (queue->index >= priv->rxqs_n) 1304 return rte_flow_error_set(error, EINVAL, 1305 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1306 &queue->index, 1307 "queue index out of range"); 1308 if (!(*priv->rxqs)[queue->index]) 1309 return rte_flow_error_set(error, EINVAL, 1310 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1311 &queue->index, 1312 "queue is not configured"); 1313 if (attr->egress) 1314 return rte_flow_error_set(error, ENOTSUP, 1315 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1316 "queue action not supported for " 1317 "egress"); 1318 return 0; 1319 } 1320 1321 /* 1322 * Validate the rss action. 1323 * 1324 * @param[in] action 1325 * Pointer to the queue action. 1326 * @param[in] action_flags 1327 * Bit-fields that holds the actions detected until now. 1328 * @param[in] dev 1329 * Pointer to the Ethernet device structure. 1330 * @param[in] attr 1331 * Attributes of flow that includes this action. 1332 * @param[in] item_flags 1333 * Items that were detected. 1334 * @param[out] error 1335 * Pointer to error structure. 1336 * 1337 * @return 1338 * 0 on success, a negative errno value otherwise and rte_errno is set. 1339 */ 1340 int 1341 mlx5_flow_validate_action_rss(const struct rte_flow_action *action, 1342 uint64_t action_flags, 1343 struct rte_eth_dev *dev, 1344 const struct rte_flow_attr *attr, 1345 uint64_t item_flags, 1346 struct rte_flow_error *error) 1347 { 1348 struct mlx5_priv *priv = dev->data->dev_private; 1349 const struct rte_flow_action_rss *rss = action->conf; 1350 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1351 unsigned int i; 1352 1353 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1354 return rte_flow_error_set(error, EINVAL, 1355 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1356 "can't have 2 fate actions" 1357 " in same flow"); 1358 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT && 1359 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) 1360 return rte_flow_error_set(error, ENOTSUP, 1361 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1362 &rss->func, 1363 "RSS hash function not supported"); 1364 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 1365 if (rss->level > 2) 1366 #else 1367 if (rss->level > 1) 1368 #endif 1369 return rte_flow_error_set(error, ENOTSUP, 1370 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1371 &rss->level, 1372 "tunnel RSS is not supported"); 1373 /* allow RSS key_len 0 in case of NULL (default) RSS key. */ 1374 if (rss->key_len == 0 && rss->key != NULL) 1375 return rte_flow_error_set(error, ENOTSUP, 1376 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1377 &rss->key_len, 1378 "RSS hash key length 0"); 1379 if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN) 1380 return rte_flow_error_set(error, ENOTSUP, 1381 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1382 &rss->key_len, 1383 "RSS hash key too small"); 1384 if (rss->key_len > MLX5_RSS_HASH_KEY_LEN) 1385 return rte_flow_error_set(error, ENOTSUP, 1386 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1387 &rss->key_len, 1388 "RSS hash key too large"); 1389 if (rss->queue_num > priv->config.ind_table_max_size) 1390 return rte_flow_error_set(error, ENOTSUP, 1391 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1392 &rss->queue_num, 1393 "number of queues too large"); 1394 if (rss->types & MLX5_RSS_HF_MASK) 1395 return rte_flow_error_set(error, ENOTSUP, 1396 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1397 &rss->types, 1398 "some RSS protocols are not" 1399 " supported"); 1400 if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) && 1401 !(rss->types & ETH_RSS_IP)) 1402 return rte_flow_error_set(error, EINVAL, 1403 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1404 "L3 partial RSS requested but L3 RSS" 1405 " type not specified"); 1406 if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) && 1407 !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP))) 1408 return rte_flow_error_set(error, EINVAL, 1409 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1410 "L4 partial RSS requested but L4 RSS" 1411 " type not specified"); 1412 if (!priv->rxqs_n) 1413 return rte_flow_error_set(error, EINVAL, 1414 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1415 NULL, "No Rx queues configured"); 1416 if (!rss->queue_num) 1417 return rte_flow_error_set(error, EINVAL, 1418 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1419 NULL, "No queues configured"); 1420 for (i = 0; i != rss->queue_num; ++i) { 1421 if (rss->queue[i] >= priv->rxqs_n) 1422 return rte_flow_error_set 1423 (error, EINVAL, 1424 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1425 &rss->queue[i], "queue index out of range"); 1426 if (!(*priv->rxqs)[rss->queue[i]]) 1427 return rte_flow_error_set 1428 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1429 &rss->queue[i], "queue is not configured"); 1430 } 1431 if (attr->egress) 1432 return rte_flow_error_set(error, ENOTSUP, 1433 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1434 "rss action not supported for " 1435 "egress"); 1436 if (rss->level > 1 && !tunnel) 1437 return rte_flow_error_set(error, EINVAL, 1438 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1439 "inner RSS is not supported for " 1440 "non-tunnel flows"); 1441 if ((item_flags & MLX5_FLOW_LAYER_ECPRI) && 1442 !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) { 1443 return rte_flow_error_set(error, EINVAL, 1444 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1445 "RSS on eCPRI is not supported now"); 1446 } 1447 return 0; 1448 } 1449 1450 /* 1451 * Validate the default miss action. 1452 * 1453 * @param[in] action_flags 1454 * Bit-fields that holds the actions detected until now. 1455 * @param[out] error 1456 * Pointer to error structure. 1457 * 1458 * @return 1459 * 0 on success, a negative errno value otherwise and rte_errno is set. 1460 */ 1461 int 1462 mlx5_flow_validate_action_default_miss(uint64_t action_flags, 1463 const struct rte_flow_attr *attr, 1464 struct rte_flow_error *error) 1465 { 1466 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1467 return rte_flow_error_set(error, EINVAL, 1468 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1469 "can't have 2 fate actions in" 1470 " same flow"); 1471 if (attr->egress) 1472 return rte_flow_error_set(error, ENOTSUP, 1473 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1474 "default miss action not supported " 1475 "for egress"); 1476 if (attr->group) 1477 return rte_flow_error_set(error, ENOTSUP, 1478 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL, 1479 "only group 0 is supported"); 1480 if (attr->transfer) 1481 return rte_flow_error_set(error, ENOTSUP, 1482 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1483 NULL, "transfer is not supported"); 1484 return 0; 1485 } 1486 1487 /* 1488 * Validate the count action. 1489 * 1490 * @param[in] dev 1491 * Pointer to the Ethernet device structure. 1492 * @param[in] attr 1493 * Attributes of flow that includes this action. 1494 * @param[out] error 1495 * Pointer to error structure. 1496 * 1497 * @return 1498 * 0 on success, a negative errno value otherwise and rte_errno is set. 1499 */ 1500 int 1501 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused, 1502 const struct rte_flow_attr *attr, 1503 struct rte_flow_error *error) 1504 { 1505 if (attr->egress) 1506 return rte_flow_error_set(error, ENOTSUP, 1507 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1508 "count action not supported for " 1509 "egress"); 1510 return 0; 1511 } 1512 1513 /** 1514 * Verify the @p attributes will be correctly understood by the NIC and store 1515 * them in the @p flow if everything is correct. 1516 * 1517 * @param[in] dev 1518 * Pointer to the Ethernet device structure. 1519 * @param[in] attributes 1520 * Pointer to flow attributes 1521 * @param[out] error 1522 * Pointer to error structure. 1523 * 1524 * @return 1525 * 0 on success, a negative errno value otherwise and rte_errno is set. 1526 */ 1527 int 1528 mlx5_flow_validate_attributes(struct rte_eth_dev *dev, 1529 const struct rte_flow_attr *attributes, 1530 struct rte_flow_error *error) 1531 { 1532 struct mlx5_priv *priv = dev->data->dev_private; 1533 uint32_t priority_max = priv->config.flow_prio - 1; 1534 1535 if (attributes->group) 1536 return rte_flow_error_set(error, ENOTSUP, 1537 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 1538 NULL, "groups is not supported"); 1539 if (attributes->priority != MLX5_FLOW_PRIO_RSVD && 1540 attributes->priority >= priority_max) 1541 return rte_flow_error_set(error, ENOTSUP, 1542 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 1543 NULL, "priority out of range"); 1544 if (attributes->egress) 1545 return rte_flow_error_set(error, ENOTSUP, 1546 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1547 "egress is not supported"); 1548 if (attributes->transfer && !priv->config.dv_esw_en) 1549 return rte_flow_error_set(error, ENOTSUP, 1550 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1551 NULL, "transfer is not supported"); 1552 if (!attributes->ingress) 1553 return rte_flow_error_set(error, EINVAL, 1554 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, 1555 NULL, 1556 "ingress attribute is mandatory"); 1557 return 0; 1558 } 1559 1560 /** 1561 * Validate ICMP6 item. 1562 * 1563 * @param[in] item 1564 * Item specification. 1565 * @param[in] item_flags 1566 * Bit-fields that holds the items detected until now. 1567 * @param[out] error 1568 * Pointer to error structure. 1569 * 1570 * @return 1571 * 0 on success, a negative errno value otherwise and rte_errno is set. 1572 */ 1573 int 1574 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item, 1575 uint64_t item_flags, 1576 uint8_t target_protocol, 1577 struct rte_flow_error *error) 1578 { 1579 const struct rte_flow_item_icmp6 *mask = item->mask; 1580 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1581 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : 1582 MLX5_FLOW_LAYER_OUTER_L3_IPV6; 1583 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1584 MLX5_FLOW_LAYER_OUTER_L4; 1585 int ret; 1586 1587 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6) 1588 return rte_flow_error_set(error, EINVAL, 1589 RTE_FLOW_ERROR_TYPE_ITEM, item, 1590 "protocol filtering not compatible" 1591 " with ICMP6 layer"); 1592 if (!(item_flags & l3m)) 1593 return rte_flow_error_set(error, EINVAL, 1594 RTE_FLOW_ERROR_TYPE_ITEM, item, 1595 "IPv6 is mandatory to filter on" 1596 " ICMP6"); 1597 if (item_flags & l4m) 1598 return rte_flow_error_set(error, EINVAL, 1599 RTE_FLOW_ERROR_TYPE_ITEM, item, 1600 "multiple L4 layers not supported"); 1601 if (!mask) 1602 mask = &rte_flow_item_icmp6_mask; 1603 ret = mlx5_flow_item_acceptable 1604 (item, (const uint8_t *)mask, 1605 (const uint8_t *)&rte_flow_item_icmp6_mask, 1606 sizeof(struct rte_flow_item_icmp6), error); 1607 if (ret < 0) 1608 return ret; 1609 return 0; 1610 } 1611 1612 /** 1613 * Validate ICMP item. 1614 * 1615 * @param[in] item 1616 * Item specification. 1617 * @param[in] item_flags 1618 * Bit-fields that holds the items detected until now. 1619 * @param[out] error 1620 * Pointer to error structure. 1621 * 1622 * @return 1623 * 0 on success, a negative errno value otherwise and rte_errno is set. 1624 */ 1625 int 1626 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item, 1627 uint64_t item_flags, 1628 uint8_t target_protocol, 1629 struct rte_flow_error *error) 1630 { 1631 const struct rte_flow_item_icmp *mask = item->mask; 1632 const struct rte_flow_item_icmp nic_mask = { 1633 .hdr.icmp_type = 0xff, 1634 .hdr.icmp_code = 0xff, 1635 .hdr.icmp_ident = RTE_BE16(0xffff), 1636 .hdr.icmp_seq_nb = RTE_BE16(0xffff), 1637 }; 1638 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1639 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : 1640 MLX5_FLOW_LAYER_OUTER_L3_IPV4; 1641 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1642 MLX5_FLOW_LAYER_OUTER_L4; 1643 int ret; 1644 1645 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP) 1646 return rte_flow_error_set(error, EINVAL, 1647 RTE_FLOW_ERROR_TYPE_ITEM, item, 1648 "protocol filtering not compatible" 1649 " with ICMP layer"); 1650 if (!(item_flags & l3m)) 1651 return rte_flow_error_set(error, EINVAL, 1652 RTE_FLOW_ERROR_TYPE_ITEM, item, 1653 "IPv4 is mandatory to filter" 1654 " on ICMP"); 1655 if (item_flags & l4m) 1656 return rte_flow_error_set(error, EINVAL, 1657 RTE_FLOW_ERROR_TYPE_ITEM, item, 1658 "multiple L4 layers not supported"); 1659 if (!mask) 1660 mask = &nic_mask; 1661 ret = mlx5_flow_item_acceptable 1662 (item, (const uint8_t *)mask, 1663 (const uint8_t *)&nic_mask, 1664 sizeof(struct rte_flow_item_icmp), error); 1665 if (ret < 0) 1666 return ret; 1667 return 0; 1668 } 1669 1670 /** 1671 * Validate Ethernet item. 1672 * 1673 * @param[in] item 1674 * Item specification. 1675 * @param[in] item_flags 1676 * Bit-fields that holds the items detected until now. 1677 * @param[out] error 1678 * Pointer to error structure. 1679 * 1680 * @return 1681 * 0 on success, a negative errno value otherwise and rte_errno is set. 1682 */ 1683 int 1684 mlx5_flow_validate_item_eth(const struct rte_flow_item *item, 1685 uint64_t item_flags, 1686 struct rte_flow_error *error) 1687 { 1688 const struct rte_flow_item_eth *mask = item->mask; 1689 const struct rte_flow_item_eth nic_mask = { 1690 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1691 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1692 .type = RTE_BE16(0xffff), 1693 }; 1694 int ret; 1695 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1696 const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 1697 MLX5_FLOW_LAYER_OUTER_L2; 1698 1699 if (item_flags & ethm) 1700 return rte_flow_error_set(error, ENOTSUP, 1701 RTE_FLOW_ERROR_TYPE_ITEM, item, 1702 "multiple L2 layers not supported"); 1703 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) || 1704 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3))) 1705 return rte_flow_error_set(error, EINVAL, 1706 RTE_FLOW_ERROR_TYPE_ITEM, item, 1707 "L2 layer should not follow " 1708 "L3 layers"); 1709 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) || 1710 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN))) 1711 return rte_flow_error_set(error, EINVAL, 1712 RTE_FLOW_ERROR_TYPE_ITEM, item, 1713 "L2 layer should not follow VLAN"); 1714 if (!mask) 1715 mask = &rte_flow_item_eth_mask; 1716 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1717 (const uint8_t *)&nic_mask, 1718 sizeof(struct rte_flow_item_eth), 1719 error); 1720 return ret; 1721 } 1722 1723 /** 1724 * Validate VLAN item. 1725 * 1726 * @param[in] item 1727 * Item specification. 1728 * @param[in] item_flags 1729 * Bit-fields that holds the items detected until now. 1730 * @param[in] dev 1731 * Ethernet device flow is being created on. 1732 * @param[out] error 1733 * Pointer to error structure. 1734 * 1735 * @return 1736 * 0 on success, a negative errno value otherwise and rte_errno is set. 1737 */ 1738 int 1739 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item, 1740 uint64_t item_flags, 1741 struct rte_eth_dev *dev, 1742 struct rte_flow_error *error) 1743 { 1744 const struct rte_flow_item_vlan *spec = item->spec; 1745 const struct rte_flow_item_vlan *mask = item->mask; 1746 const struct rte_flow_item_vlan nic_mask = { 1747 .tci = RTE_BE16(UINT16_MAX), 1748 .inner_type = RTE_BE16(UINT16_MAX), 1749 }; 1750 uint16_t vlan_tag = 0; 1751 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1752 int ret; 1753 const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | 1754 MLX5_FLOW_LAYER_INNER_L4) : 1755 (MLX5_FLOW_LAYER_OUTER_L3 | 1756 MLX5_FLOW_LAYER_OUTER_L4); 1757 const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : 1758 MLX5_FLOW_LAYER_OUTER_VLAN; 1759 1760 if (item_flags & vlanm) 1761 return rte_flow_error_set(error, EINVAL, 1762 RTE_FLOW_ERROR_TYPE_ITEM, item, 1763 "multiple VLAN layers not supported"); 1764 else if ((item_flags & l34m) != 0) 1765 return rte_flow_error_set(error, EINVAL, 1766 RTE_FLOW_ERROR_TYPE_ITEM, item, 1767 "VLAN cannot follow L3/L4 layer"); 1768 if (!mask) 1769 mask = &rte_flow_item_vlan_mask; 1770 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1771 (const uint8_t *)&nic_mask, 1772 sizeof(struct rte_flow_item_vlan), 1773 error); 1774 if (ret) 1775 return ret; 1776 if (!tunnel && mask->tci != RTE_BE16(0x0fff)) { 1777 struct mlx5_priv *priv = dev->data->dev_private; 1778 1779 if (priv->vmwa_context) { 1780 /* 1781 * Non-NULL context means we have a virtual machine 1782 * and SR-IOV enabled, we have to create VLAN interface 1783 * to make hypervisor to setup E-Switch vport 1784 * context correctly. We avoid creating the multiple 1785 * VLAN interfaces, so we cannot support VLAN tag mask. 1786 */ 1787 return rte_flow_error_set(error, EINVAL, 1788 RTE_FLOW_ERROR_TYPE_ITEM, 1789 item, 1790 "VLAN tag mask is not" 1791 " supported in virtual" 1792 " environment"); 1793 } 1794 } 1795 if (spec) { 1796 vlan_tag = spec->tci; 1797 vlan_tag &= mask->tci; 1798 } 1799 /* 1800 * From verbs perspective an empty VLAN is equivalent 1801 * to a packet without VLAN layer. 1802 */ 1803 if (!vlan_tag) 1804 return rte_flow_error_set(error, EINVAL, 1805 RTE_FLOW_ERROR_TYPE_ITEM_SPEC, 1806 item->spec, 1807 "VLAN cannot be empty"); 1808 return 0; 1809 } 1810 1811 /** 1812 * Validate IPV4 item. 1813 * 1814 * @param[in] item 1815 * Item specification. 1816 * @param[in] item_flags 1817 * Bit-fields that holds the items detected until now. 1818 * @param[in] last_item 1819 * Previous validated item in the pattern items. 1820 * @param[in] ether_type 1821 * Type in the ethernet layer header (including dot1q). 1822 * @param[in] acc_mask 1823 * Acceptable mask, if NULL default internal default mask 1824 * will be used to check whether item fields are supported. 1825 * @param[out] error 1826 * Pointer to error structure. 1827 * 1828 * @return 1829 * 0 on success, a negative errno value otherwise and rte_errno is set. 1830 */ 1831 int 1832 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, 1833 uint64_t item_flags, 1834 uint64_t last_item, 1835 uint16_t ether_type, 1836 const struct rte_flow_item_ipv4 *acc_mask, 1837 struct rte_flow_error *error) 1838 { 1839 const struct rte_flow_item_ipv4 *mask = item->mask; 1840 const struct rte_flow_item_ipv4 *spec = item->spec; 1841 const struct rte_flow_item_ipv4 nic_mask = { 1842 .hdr = { 1843 .src_addr = RTE_BE32(0xffffffff), 1844 .dst_addr = RTE_BE32(0xffffffff), 1845 .type_of_service = 0xff, 1846 .next_proto_id = 0xff, 1847 }, 1848 }; 1849 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1850 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1851 MLX5_FLOW_LAYER_OUTER_L3; 1852 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1853 MLX5_FLOW_LAYER_OUTER_L4; 1854 int ret; 1855 uint8_t next_proto = 0xFF; 1856 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 1857 MLX5_FLOW_LAYER_OUTER_VLAN | 1858 MLX5_FLOW_LAYER_INNER_VLAN); 1859 1860 if ((last_item & l2_vlan) && ether_type && 1861 ether_type != RTE_ETHER_TYPE_IPV4) 1862 return rte_flow_error_set(error, EINVAL, 1863 RTE_FLOW_ERROR_TYPE_ITEM, item, 1864 "IPv4 cannot follow L2/VLAN layer " 1865 "which ether type is not IPv4"); 1866 if (item_flags & MLX5_FLOW_LAYER_IPIP) { 1867 if (mask && spec) 1868 next_proto = mask->hdr.next_proto_id & 1869 spec->hdr.next_proto_id; 1870 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 1871 return rte_flow_error_set(error, EINVAL, 1872 RTE_FLOW_ERROR_TYPE_ITEM, 1873 item, 1874 "multiple tunnel " 1875 "not supported"); 1876 } 1877 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) 1878 return rte_flow_error_set(error, EINVAL, 1879 RTE_FLOW_ERROR_TYPE_ITEM, item, 1880 "wrong tunnel type - IPv6 specified " 1881 "but IPv4 item provided"); 1882 if (item_flags & l3m) 1883 return rte_flow_error_set(error, ENOTSUP, 1884 RTE_FLOW_ERROR_TYPE_ITEM, item, 1885 "multiple L3 layers not supported"); 1886 else if (item_flags & l4m) 1887 return rte_flow_error_set(error, EINVAL, 1888 RTE_FLOW_ERROR_TYPE_ITEM, item, 1889 "L3 cannot follow an L4 layer."); 1890 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 1891 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 1892 return rte_flow_error_set(error, EINVAL, 1893 RTE_FLOW_ERROR_TYPE_ITEM, item, 1894 "L3 cannot follow an NVGRE layer."); 1895 if (!mask) 1896 mask = &rte_flow_item_ipv4_mask; 1897 else if (mask->hdr.next_proto_id != 0 && 1898 mask->hdr.next_proto_id != 0xff) 1899 return rte_flow_error_set(error, EINVAL, 1900 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 1901 "partial mask is not supported" 1902 " for protocol"); 1903 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1904 acc_mask ? (const uint8_t *)acc_mask 1905 : (const uint8_t *)&nic_mask, 1906 sizeof(struct rte_flow_item_ipv4), 1907 error); 1908 if (ret < 0) 1909 return ret; 1910 return 0; 1911 } 1912 1913 /** 1914 * Validate IPV6 item. 1915 * 1916 * @param[in] item 1917 * Item specification. 1918 * @param[in] item_flags 1919 * Bit-fields that holds the items detected until now. 1920 * @param[in] last_item 1921 * Previous validated item in the pattern items. 1922 * @param[in] ether_type 1923 * Type in the ethernet layer header (including dot1q). 1924 * @param[in] acc_mask 1925 * Acceptable mask, if NULL default internal default mask 1926 * will be used to check whether item fields are supported. 1927 * @param[out] error 1928 * Pointer to error structure. 1929 * 1930 * @return 1931 * 0 on success, a negative errno value otherwise and rte_errno is set. 1932 */ 1933 int 1934 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, 1935 uint64_t item_flags, 1936 uint64_t last_item, 1937 uint16_t ether_type, 1938 const struct rte_flow_item_ipv6 *acc_mask, 1939 struct rte_flow_error *error) 1940 { 1941 const struct rte_flow_item_ipv6 *mask = item->mask; 1942 const struct rte_flow_item_ipv6 *spec = item->spec; 1943 const struct rte_flow_item_ipv6 nic_mask = { 1944 .hdr = { 1945 .src_addr = 1946 "\xff\xff\xff\xff\xff\xff\xff\xff" 1947 "\xff\xff\xff\xff\xff\xff\xff\xff", 1948 .dst_addr = 1949 "\xff\xff\xff\xff\xff\xff\xff\xff" 1950 "\xff\xff\xff\xff\xff\xff\xff\xff", 1951 .vtc_flow = RTE_BE32(0xffffffff), 1952 .proto = 0xff, 1953 }, 1954 }; 1955 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1956 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1957 MLX5_FLOW_LAYER_OUTER_L3; 1958 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1959 MLX5_FLOW_LAYER_OUTER_L4; 1960 int ret; 1961 uint8_t next_proto = 0xFF; 1962 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 1963 MLX5_FLOW_LAYER_OUTER_VLAN | 1964 MLX5_FLOW_LAYER_INNER_VLAN); 1965 1966 if ((last_item & l2_vlan) && ether_type && 1967 ether_type != RTE_ETHER_TYPE_IPV6) 1968 return rte_flow_error_set(error, EINVAL, 1969 RTE_FLOW_ERROR_TYPE_ITEM, item, 1970 "IPv6 cannot follow L2/VLAN layer " 1971 "which ether type is not IPv6"); 1972 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) { 1973 if (mask && spec) 1974 next_proto = mask->hdr.proto & spec->hdr.proto; 1975 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 1976 return rte_flow_error_set(error, EINVAL, 1977 RTE_FLOW_ERROR_TYPE_ITEM, 1978 item, 1979 "multiple tunnel " 1980 "not supported"); 1981 } 1982 if (item_flags & MLX5_FLOW_LAYER_IPIP) 1983 return rte_flow_error_set(error, EINVAL, 1984 RTE_FLOW_ERROR_TYPE_ITEM, item, 1985 "wrong tunnel type - IPv4 specified " 1986 "but IPv6 item provided"); 1987 if (item_flags & l3m) 1988 return rte_flow_error_set(error, ENOTSUP, 1989 RTE_FLOW_ERROR_TYPE_ITEM, item, 1990 "multiple L3 layers not supported"); 1991 else if (item_flags & l4m) 1992 return rte_flow_error_set(error, EINVAL, 1993 RTE_FLOW_ERROR_TYPE_ITEM, item, 1994 "L3 cannot follow an L4 layer."); 1995 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 1996 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 1997 return rte_flow_error_set(error, EINVAL, 1998 RTE_FLOW_ERROR_TYPE_ITEM, item, 1999 "L3 cannot follow an NVGRE layer."); 2000 if (!mask) 2001 mask = &rte_flow_item_ipv6_mask; 2002 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 2003 acc_mask ? (const uint8_t *)acc_mask 2004 : (const uint8_t *)&nic_mask, 2005 sizeof(struct rte_flow_item_ipv6), 2006 error); 2007 if (ret < 0) 2008 return ret; 2009 return 0; 2010 } 2011 2012 /** 2013 * Validate UDP item. 2014 * 2015 * @param[in] item 2016 * Item specification. 2017 * @param[in] item_flags 2018 * Bit-fields that holds the items detected until now. 2019 * @param[in] target_protocol 2020 * The next protocol in the previous item. 2021 * @param[in] flow_mask 2022 * mlx5 flow-specific (DV, verbs, etc.) supported header fields mask. 2023 * @param[out] error 2024 * Pointer to error structure. 2025 * 2026 * @return 2027 * 0 on success, a negative errno value otherwise and rte_errno is set. 2028 */ 2029 int 2030 mlx5_flow_validate_item_udp(const struct rte_flow_item *item, 2031 uint64_t item_flags, 2032 uint8_t target_protocol, 2033 struct rte_flow_error *error) 2034 { 2035 const struct rte_flow_item_udp *mask = item->mask; 2036 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 2037 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 2038 MLX5_FLOW_LAYER_OUTER_L3; 2039 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 2040 MLX5_FLOW_LAYER_OUTER_L4; 2041 int ret; 2042 2043 if (target_protocol != 0xff && target_protocol != IPPROTO_UDP) 2044 return rte_flow_error_set(error, EINVAL, 2045 RTE_FLOW_ERROR_TYPE_ITEM, item, 2046 "protocol filtering not compatible" 2047 " with UDP layer"); 2048 if (!(item_flags & l3m)) 2049 return rte_flow_error_set(error, EINVAL, 2050 RTE_FLOW_ERROR_TYPE_ITEM, item, 2051 "L3 is mandatory to filter on L4"); 2052 if (item_flags & l4m) 2053 return rte_flow_error_set(error, EINVAL, 2054 RTE_FLOW_ERROR_TYPE_ITEM, item, 2055 "multiple L4 layers not supported"); 2056 if (!mask) 2057 mask = &rte_flow_item_udp_mask; 2058 ret = mlx5_flow_item_acceptable 2059 (item, (const uint8_t *)mask, 2060 (const uint8_t *)&rte_flow_item_udp_mask, 2061 sizeof(struct rte_flow_item_udp), error); 2062 if (ret < 0) 2063 return ret; 2064 return 0; 2065 } 2066 2067 /** 2068 * Validate TCP item. 2069 * 2070 * @param[in] item 2071 * Item specification. 2072 * @param[in] item_flags 2073 * Bit-fields that holds the items detected until now. 2074 * @param[in] target_protocol 2075 * The next protocol in the previous item. 2076 * @param[out] error 2077 * Pointer to error structure. 2078 * 2079 * @return 2080 * 0 on success, a negative errno value otherwise and rte_errno is set. 2081 */ 2082 int 2083 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item, 2084 uint64_t item_flags, 2085 uint8_t target_protocol, 2086 const struct rte_flow_item_tcp *flow_mask, 2087 struct rte_flow_error *error) 2088 { 2089 const struct rte_flow_item_tcp *mask = item->mask; 2090 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 2091 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 2092 MLX5_FLOW_LAYER_OUTER_L3; 2093 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 2094 MLX5_FLOW_LAYER_OUTER_L4; 2095 int ret; 2096 2097 MLX5_ASSERT(flow_mask); 2098 if (target_protocol != 0xff && target_protocol != IPPROTO_TCP) 2099 return rte_flow_error_set(error, EINVAL, 2100 RTE_FLOW_ERROR_TYPE_ITEM, item, 2101 "protocol filtering not compatible" 2102 " with TCP layer"); 2103 if (!(item_flags & l3m)) 2104 return rte_flow_error_set(error, EINVAL, 2105 RTE_FLOW_ERROR_TYPE_ITEM, item, 2106 "L3 is mandatory to filter on L4"); 2107 if (item_flags & l4m) 2108 return rte_flow_error_set(error, EINVAL, 2109 RTE_FLOW_ERROR_TYPE_ITEM, item, 2110 "multiple L4 layers not supported"); 2111 if (!mask) 2112 mask = &rte_flow_item_tcp_mask; 2113 ret = mlx5_flow_item_acceptable 2114 (item, (const uint8_t *)mask, 2115 (const uint8_t *)flow_mask, 2116 sizeof(struct rte_flow_item_tcp), error); 2117 if (ret < 0) 2118 return ret; 2119 return 0; 2120 } 2121 2122 /** 2123 * Validate VXLAN item. 2124 * 2125 * @param[in] item 2126 * Item specification. 2127 * @param[in] item_flags 2128 * Bit-fields that holds the items detected until now. 2129 * @param[in] target_protocol 2130 * The next protocol in the previous item. 2131 * @param[out] error 2132 * Pointer to error structure. 2133 * 2134 * @return 2135 * 0 on success, a negative errno value otherwise and rte_errno is set. 2136 */ 2137 int 2138 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item, 2139 uint64_t item_flags, 2140 struct rte_flow_error *error) 2141 { 2142 const struct rte_flow_item_vxlan *spec = item->spec; 2143 const struct rte_flow_item_vxlan *mask = item->mask; 2144 int ret; 2145 union vni { 2146 uint32_t vlan_id; 2147 uint8_t vni[4]; 2148 } id = { .vlan_id = 0, }; 2149 2150 2151 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2152 return rte_flow_error_set(error, ENOTSUP, 2153 RTE_FLOW_ERROR_TYPE_ITEM, item, 2154 "multiple tunnel layers not" 2155 " supported"); 2156 /* 2157 * Verify only UDPv4 is present as defined in 2158 * https://tools.ietf.org/html/rfc7348 2159 */ 2160 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2161 return rte_flow_error_set(error, EINVAL, 2162 RTE_FLOW_ERROR_TYPE_ITEM, item, 2163 "no outer UDP layer found"); 2164 if (!mask) 2165 mask = &rte_flow_item_vxlan_mask; 2166 ret = mlx5_flow_item_acceptable 2167 (item, (const uint8_t *)mask, 2168 (const uint8_t *)&rte_flow_item_vxlan_mask, 2169 sizeof(struct rte_flow_item_vxlan), 2170 error); 2171 if (ret < 0) 2172 return ret; 2173 if (spec) { 2174 memcpy(&id.vni[1], spec->vni, 3); 2175 memcpy(&id.vni[1], mask->vni, 3); 2176 } 2177 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2178 return rte_flow_error_set(error, ENOTSUP, 2179 RTE_FLOW_ERROR_TYPE_ITEM, item, 2180 "VXLAN tunnel must be fully defined"); 2181 return 0; 2182 } 2183 2184 /** 2185 * Validate VXLAN_GPE item. 2186 * 2187 * @param[in] item 2188 * Item specification. 2189 * @param[in] item_flags 2190 * Bit-fields that holds the items detected until now. 2191 * @param[in] priv 2192 * Pointer to the private data structure. 2193 * @param[in] target_protocol 2194 * The next protocol in the previous item. 2195 * @param[out] error 2196 * Pointer to error structure. 2197 * 2198 * @return 2199 * 0 on success, a negative errno value otherwise and rte_errno is set. 2200 */ 2201 int 2202 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item, 2203 uint64_t item_flags, 2204 struct rte_eth_dev *dev, 2205 struct rte_flow_error *error) 2206 { 2207 struct mlx5_priv *priv = dev->data->dev_private; 2208 const struct rte_flow_item_vxlan_gpe *spec = item->spec; 2209 const struct rte_flow_item_vxlan_gpe *mask = item->mask; 2210 int ret; 2211 union vni { 2212 uint32_t vlan_id; 2213 uint8_t vni[4]; 2214 } id = { .vlan_id = 0, }; 2215 2216 if (!priv->config.l3_vxlan_en) 2217 return rte_flow_error_set(error, ENOTSUP, 2218 RTE_FLOW_ERROR_TYPE_ITEM, item, 2219 "L3 VXLAN is not enabled by device" 2220 " parameter and/or not configured in" 2221 " firmware"); 2222 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2223 return rte_flow_error_set(error, ENOTSUP, 2224 RTE_FLOW_ERROR_TYPE_ITEM, item, 2225 "multiple tunnel layers not" 2226 " supported"); 2227 /* 2228 * Verify only UDPv4 is present as defined in 2229 * https://tools.ietf.org/html/rfc7348 2230 */ 2231 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2232 return rte_flow_error_set(error, EINVAL, 2233 RTE_FLOW_ERROR_TYPE_ITEM, item, 2234 "no outer UDP layer found"); 2235 if (!mask) 2236 mask = &rte_flow_item_vxlan_gpe_mask; 2237 ret = mlx5_flow_item_acceptable 2238 (item, (const uint8_t *)mask, 2239 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask, 2240 sizeof(struct rte_flow_item_vxlan_gpe), 2241 error); 2242 if (ret < 0) 2243 return ret; 2244 if (spec) { 2245 if (spec->protocol) 2246 return rte_flow_error_set(error, ENOTSUP, 2247 RTE_FLOW_ERROR_TYPE_ITEM, 2248 item, 2249 "VxLAN-GPE protocol" 2250 " not supported"); 2251 memcpy(&id.vni[1], spec->vni, 3); 2252 memcpy(&id.vni[1], mask->vni, 3); 2253 } 2254 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2255 return rte_flow_error_set(error, ENOTSUP, 2256 RTE_FLOW_ERROR_TYPE_ITEM, item, 2257 "VXLAN-GPE tunnel must be fully" 2258 " defined"); 2259 return 0; 2260 } 2261 /** 2262 * Validate GRE Key item. 2263 * 2264 * @param[in] item 2265 * Item specification. 2266 * @param[in] item_flags 2267 * Bit flags to mark detected items. 2268 * @param[in] gre_item 2269 * Pointer to gre_item 2270 * @param[out] error 2271 * Pointer to error structure. 2272 * 2273 * @return 2274 * 0 on success, a negative errno value otherwise and rte_errno is set. 2275 */ 2276 int 2277 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item, 2278 uint64_t item_flags, 2279 const struct rte_flow_item *gre_item, 2280 struct rte_flow_error *error) 2281 { 2282 const rte_be32_t *mask = item->mask; 2283 int ret = 0; 2284 rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX); 2285 const struct rte_flow_item_gre *gre_spec; 2286 const struct rte_flow_item_gre *gre_mask; 2287 2288 if (item_flags & MLX5_FLOW_LAYER_GRE_KEY) 2289 return rte_flow_error_set(error, ENOTSUP, 2290 RTE_FLOW_ERROR_TYPE_ITEM, item, 2291 "Multiple GRE key not support"); 2292 if (!(item_flags & MLX5_FLOW_LAYER_GRE)) 2293 return rte_flow_error_set(error, ENOTSUP, 2294 RTE_FLOW_ERROR_TYPE_ITEM, item, 2295 "No preceding GRE header"); 2296 if (item_flags & MLX5_FLOW_LAYER_INNER) 2297 return rte_flow_error_set(error, ENOTSUP, 2298 RTE_FLOW_ERROR_TYPE_ITEM, item, 2299 "GRE key following a wrong item"); 2300 gre_mask = gre_item->mask; 2301 if (!gre_mask) 2302 gre_mask = &rte_flow_item_gre_mask; 2303 gre_spec = gre_item->spec; 2304 if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) && 2305 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000))) 2306 return rte_flow_error_set(error, EINVAL, 2307 RTE_FLOW_ERROR_TYPE_ITEM, item, 2308 "Key bit must be on"); 2309 2310 if (!mask) 2311 mask = &gre_key_default_mask; 2312 ret = mlx5_flow_item_acceptable 2313 (item, (const uint8_t *)mask, 2314 (const uint8_t *)&gre_key_default_mask, 2315 sizeof(rte_be32_t), error); 2316 return ret; 2317 } 2318 2319 /** 2320 * Validate GRE item. 2321 * 2322 * @param[in] item 2323 * Item specification. 2324 * @param[in] item_flags 2325 * Bit flags to mark detected items. 2326 * @param[in] target_protocol 2327 * The next protocol in the previous item. 2328 * @param[out] error 2329 * Pointer to error structure. 2330 * 2331 * @return 2332 * 0 on success, a negative errno value otherwise and rte_errno is set. 2333 */ 2334 int 2335 mlx5_flow_validate_item_gre(const struct rte_flow_item *item, 2336 uint64_t item_flags, 2337 uint8_t target_protocol, 2338 struct rte_flow_error *error) 2339 { 2340 const struct rte_flow_item_gre *spec __rte_unused = item->spec; 2341 const struct rte_flow_item_gre *mask = item->mask; 2342 int ret; 2343 const struct rte_flow_item_gre nic_mask = { 2344 .c_rsvd0_ver = RTE_BE16(0xB000), 2345 .protocol = RTE_BE16(UINT16_MAX), 2346 }; 2347 2348 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2349 return rte_flow_error_set(error, EINVAL, 2350 RTE_FLOW_ERROR_TYPE_ITEM, item, 2351 "protocol filtering not compatible" 2352 " with this GRE layer"); 2353 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2354 return rte_flow_error_set(error, ENOTSUP, 2355 RTE_FLOW_ERROR_TYPE_ITEM, item, 2356 "multiple tunnel layers not" 2357 " supported"); 2358 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2359 return rte_flow_error_set(error, ENOTSUP, 2360 RTE_FLOW_ERROR_TYPE_ITEM, item, 2361 "L3 Layer is missing"); 2362 if (!mask) 2363 mask = &rte_flow_item_gre_mask; 2364 ret = mlx5_flow_item_acceptable 2365 (item, (const uint8_t *)mask, 2366 (const uint8_t *)&nic_mask, 2367 sizeof(struct rte_flow_item_gre), error); 2368 if (ret < 0) 2369 return ret; 2370 #ifndef HAVE_MLX5DV_DR 2371 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT 2372 if (spec && (spec->protocol & mask->protocol)) 2373 return rte_flow_error_set(error, ENOTSUP, 2374 RTE_FLOW_ERROR_TYPE_ITEM, item, 2375 "without MPLS support the" 2376 " specification cannot be used for" 2377 " filtering"); 2378 #endif 2379 #endif 2380 return 0; 2381 } 2382 2383 /** 2384 * Validate Geneve item. 2385 * 2386 * @param[in] item 2387 * Item specification. 2388 * @param[in] itemFlags 2389 * Bit-fields that holds the items detected until now. 2390 * @param[in] enPriv 2391 * Pointer to the private data structure. 2392 * @param[out] error 2393 * Pointer to error structure. 2394 * 2395 * @return 2396 * 0 on success, a negative errno value otherwise and rte_errno is set. 2397 */ 2398 2399 int 2400 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item, 2401 uint64_t item_flags, 2402 struct rte_eth_dev *dev, 2403 struct rte_flow_error *error) 2404 { 2405 struct mlx5_priv *priv = dev->data->dev_private; 2406 const struct rte_flow_item_geneve *spec = item->spec; 2407 const struct rte_flow_item_geneve *mask = item->mask; 2408 int ret; 2409 uint16_t gbhdr; 2410 uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ? 2411 MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0; 2412 const struct rte_flow_item_geneve nic_mask = { 2413 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80), 2414 .vni = "\xff\xff\xff", 2415 .protocol = RTE_BE16(UINT16_MAX), 2416 }; 2417 2418 if (!priv->config.hca_attr.tunnel_stateless_geneve_rx) 2419 return rte_flow_error_set(error, ENOTSUP, 2420 RTE_FLOW_ERROR_TYPE_ITEM, item, 2421 "L3 Geneve is not enabled by device" 2422 " parameter and/or not configured in" 2423 " firmware"); 2424 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2425 return rte_flow_error_set(error, ENOTSUP, 2426 RTE_FLOW_ERROR_TYPE_ITEM, item, 2427 "multiple tunnel layers not" 2428 " supported"); 2429 /* 2430 * Verify only UDPv4 is present as defined in 2431 * https://tools.ietf.org/html/rfc7348 2432 */ 2433 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2434 return rte_flow_error_set(error, EINVAL, 2435 RTE_FLOW_ERROR_TYPE_ITEM, item, 2436 "no outer UDP layer found"); 2437 if (!mask) 2438 mask = &rte_flow_item_geneve_mask; 2439 ret = mlx5_flow_item_acceptable 2440 (item, (const uint8_t *)mask, 2441 (const uint8_t *)&nic_mask, 2442 sizeof(struct rte_flow_item_geneve), error); 2443 if (ret) 2444 return ret; 2445 if (spec) { 2446 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0); 2447 if (MLX5_GENEVE_VER_VAL(gbhdr) || 2448 MLX5_GENEVE_CRITO_VAL(gbhdr) || 2449 MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1) 2450 return rte_flow_error_set(error, ENOTSUP, 2451 RTE_FLOW_ERROR_TYPE_ITEM, 2452 item, 2453 "Geneve protocol unsupported" 2454 " fields are being used"); 2455 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len) 2456 return rte_flow_error_set 2457 (error, ENOTSUP, 2458 RTE_FLOW_ERROR_TYPE_ITEM, 2459 item, 2460 "Unsupported Geneve options length"); 2461 } 2462 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2463 return rte_flow_error_set 2464 (error, ENOTSUP, 2465 RTE_FLOW_ERROR_TYPE_ITEM, item, 2466 "Geneve tunnel must be fully defined"); 2467 return 0; 2468 } 2469 2470 /** 2471 * Validate MPLS item. 2472 * 2473 * @param[in] dev 2474 * Pointer to the rte_eth_dev structure. 2475 * @param[in] item 2476 * Item specification. 2477 * @param[in] item_flags 2478 * Bit-fields that holds the items detected until now. 2479 * @param[in] prev_layer 2480 * The protocol layer indicated in previous item. 2481 * @param[out] error 2482 * Pointer to error structure. 2483 * 2484 * @return 2485 * 0 on success, a negative errno value otherwise and rte_errno is set. 2486 */ 2487 int 2488 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused, 2489 const struct rte_flow_item *item __rte_unused, 2490 uint64_t item_flags __rte_unused, 2491 uint64_t prev_layer __rte_unused, 2492 struct rte_flow_error *error) 2493 { 2494 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 2495 const struct rte_flow_item_mpls *mask = item->mask; 2496 struct mlx5_priv *priv = dev->data->dev_private; 2497 int ret; 2498 2499 if (!priv->config.mpls_en) 2500 return rte_flow_error_set(error, ENOTSUP, 2501 RTE_FLOW_ERROR_TYPE_ITEM, item, 2502 "MPLS not supported or" 2503 " disabled in firmware" 2504 " configuration."); 2505 /* MPLS over IP, UDP, GRE is allowed */ 2506 if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 | 2507 MLX5_FLOW_LAYER_OUTER_L4_UDP | 2508 MLX5_FLOW_LAYER_GRE))) 2509 return rte_flow_error_set(error, EINVAL, 2510 RTE_FLOW_ERROR_TYPE_ITEM, item, 2511 "protocol filtering not compatible" 2512 " with MPLS layer"); 2513 /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */ 2514 if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) && 2515 !(item_flags & MLX5_FLOW_LAYER_GRE)) 2516 return rte_flow_error_set(error, ENOTSUP, 2517 RTE_FLOW_ERROR_TYPE_ITEM, item, 2518 "multiple tunnel layers not" 2519 " supported"); 2520 if (!mask) 2521 mask = &rte_flow_item_mpls_mask; 2522 ret = mlx5_flow_item_acceptable 2523 (item, (const uint8_t *)mask, 2524 (const uint8_t *)&rte_flow_item_mpls_mask, 2525 sizeof(struct rte_flow_item_mpls), error); 2526 if (ret < 0) 2527 return ret; 2528 return 0; 2529 #else 2530 return rte_flow_error_set(error, ENOTSUP, 2531 RTE_FLOW_ERROR_TYPE_ITEM, item, 2532 "MPLS is not supported by Verbs, please" 2533 " update."); 2534 #endif 2535 } 2536 2537 /** 2538 * Validate NVGRE item. 2539 * 2540 * @param[in] item 2541 * Item specification. 2542 * @param[in] item_flags 2543 * Bit flags to mark detected items. 2544 * @param[in] target_protocol 2545 * The next protocol in the previous item. 2546 * @param[out] error 2547 * Pointer to error structure. 2548 * 2549 * @return 2550 * 0 on success, a negative errno value otherwise and rte_errno is set. 2551 */ 2552 int 2553 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item, 2554 uint64_t item_flags, 2555 uint8_t target_protocol, 2556 struct rte_flow_error *error) 2557 { 2558 const struct rte_flow_item_nvgre *mask = item->mask; 2559 int ret; 2560 2561 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2562 return rte_flow_error_set(error, EINVAL, 2563 RTE_FLOW_ERROR_TYPE_ITEM, item, 2564 "protocol filtering not compatible" 2565 " with this GRE layer"); 2566 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2567 return rte_flow_error_set(error, ENOTSUP, 2568 RTE_FLOW_ERROR_TYPE_ITEM, item, 2569 "multiple tunnel layers not" 2570 " supported"); 2571 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2572 return rte_flow_error_set(error, ENOTSUP, 2573 RTE_FLOW_ERROR_TYPE_ITEM, item, 2574 "L3 Layer is missing"); 2575 if (!mask) 2576 mask = &rte_flow_item_nvgre_mask; 2577 ret = mlx5_flow_item_acceptable 2578 (item, (const uint8_t *)mask, 2579 (const uint8_t *)&rte_flow_item_nvgre_mask, 2580 sizeof(struct rte_flow_item_nvgre), error); 2581 if (ret < 0) 2582 return ret; 2583 return 0; 2584 } 2585 2586 /** 2587 * Validate eCPRI item. 2588 * 2589 * @param[in] item 2590 * Item specification. 2591 * @param[in] item_flags 2592 * Bit-fields that holds the items detected until now. 2593 * @param[in] last_item 2594 * Previous validated item in the pattern items. 2595 * @param[in] ether_type 2596 * Type in the ethernet layer header (including dot1q). 2597 * @param[in] acc_mask 2598 * Acceptable mask, if NULL default internal default mask 2599 * will be used to check whether item fields are supported. 2600 * @param[out] error 2601 * Pointer to error structure. 2602 * 2603 * @return 2604 * 0 on success, a negative errno value otherwise and rte_errno is set. 2605 */ 2606 int 2607 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item, 2608 uint64_t item_flags, 2609 uint64_t last_item, 2610 uint16_t ether_type, 2611 const struct rte_flow_item_ecpri *acc_mask, 2612 struct rte_flow_error *error) 2613 { 2614 const struct rte_flow_item_ecpri *mask = item->mask; 2615 const struct rte_flow_item_ecpri nic_mask = { 2616 .hdr = { 2617 .common = { 2618 .u32 = 2619 RTE_BE32(((const struct rte_ecpri_common_hdr) { 2620 .type = 0xFF, 2621 }).u32), 2622 }, 2623 .dummy[0] = 0xFFFFFFFF, 2624 }, 2625 }; 2626 const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 | 2627 MLX5_FLOW_LAYER_OUTER_VLAN); 2628 struct rte_flow_item_ecpri mask_lo; 2629 2630 if ((last_item & outer_l2_vlan) && ether_type && 2631 ether_type != RTE_ETHER_TYPE_ECPRI) 2632 return rte_flow_error_set(error, EINVAL, 2633 RTE_FLOW_ERROR_TYPE_ITEM, item, 2634 "eCPRI cannot follow L2/VLAN layer " 2635 "which ether type is not 0xAEFE."); 2636 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2637 return rte_flow_error_set(error, EINVAL, 2638 RTE_FLOW_ERROR_TYPE_ITEM, item, 2639 "eCPRI with tunnel is not supported " 2640 "right now."); 2641 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3) 2642 return rte_flow_error_set(error, ENOTSUP, 2643 RTE_FLOW_ERROR_TYPE_ITEM, item, 2644 "multiple L3 layers not supported"); 2645 else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP) 2646 return rte_flow_error_set(error, EINVAL, 2647 RTE_FLOW_ERROR_TYPE_ITEM, item, 2648 "eCPRI cannot follow a TCP layer."); 2649 /* In specification, eCPRI could be over UDP layer. */ 2650 else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP) 2651 return rte_flow_error_set(error, EINVAL, 2652 RTE_FLOW_ERROR_TYPE_ITEM, item, 2653 "eCPRI over UDP layer is not yet " 2654 "supported right now."); 2655 /* Mask for type field in common header could be zero. */ 2656 if (!mask) 2657 mask = &rte_flow_item_ecpri_mask; 2658 mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32); 2659 /* Input mask is in big-endian format. */ 2660 if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff) 2661 return rte_flow_error_set(error, EINVAL, 2662 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 2663 "partial mask is not supported " 2664 "for protocol"); 2665 else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0) 2666 return rte_flow_error_set(error, EINVAL, 2667 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 2668 "message header mask must be after " 2669 "a type mask"); 2670 return mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 2671 acc_mask ? (const uint8_t *)acc_mask 2672 : (const uint8_t *)&nic_mask, 2673 sizeof(struct rte_flow_item_ecpri), 2674 error); 2675 } 2676 2677 /* Allocate unique ID for the split Q/RSS subflows. */ 2678 static uint32_t 2679 flow_qrss_get_id(struct rte_eth_dev *dev) 2680 { 2681 struct mlx5_priv *priv = dev->data->dev_private; 2682 uint32_t qrss_id, ret; 2683 2684 ret = mlx5_flow_id_get(priv->qrss_id_pool, &qrss_id); 2685 if (ret) 2686 return 0; 2687 MLX5_ASSERT(qrss_id); 2688 return qrss_id; 2689 } 2690 2691 /* Free unique ID for the split Q/RSS subflows. */ 2692 static void 2693 flow_qrss_free_id(struct rte_eth_dev *dev, uint32_t qrss_id) 2694 { 2695 struct mlx5_priv *priv = dev->data->dev_private; 2696 2697 if (qrss_id) 2698 mlx5_flow_id_release(priv->qrss_id_pool, qrss_id); 2699 } 2700 2701 /** 2702 * Release resource related QUEUE/RSS action split. 2703 * 2704 * @param dev 2705 * Pointer to Ethernet device. 2706 * @param flow 2707 * Flow to release id's from. 2708 */ 2709 static void 2710 flow_mreg_split_qrss_release(struct rte_eth_dev *dev, 2711 struct rte_flow *flow) 2712 { 2713 struct mlx5_priv *priv = dev->data->dev_private; 2714 uint32_t handle_idx; 2715 struct mlx5_flow_handle *dev_handle; 2716 2717 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 2718 handle_idx, dev_handle, next) 2719 if (dev_handle->split_flow_id) 2720 flow_qrss_free_id(dev, dev_handle->split_flow_id); 2721 } 2722 2723 static int 2724 flow_null_validate(struct rte_eth_dev *dev __rte_unused, 2725 const struct rte_flow_attr *attr __rte_unused, 2726 const struct rte_flow_item items[] __rte_unused, 2727 const struct rte_flow_action actions[] __rte_unused, 2728 bool external __rte_unused, 2729 int hairpin __rte_unused, 2730 struct rte_flow_error *error) 2731 { 2732 return rte_flow_error_set(error, ENOTSUP, 2733 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2734 } 2735 2736 static struct mlx5_flow * 2737 flow_null_prepare(struct rte_eth_dev *dev __rte_unused, 2738 const struct rte_flow_attr *attr __rte_unused, 2739 const struct rte_flow_item items[] __rte_unused, 2740 const struct rte_flow_action actions[] __rte_unused, 2741 struct rte_flow_error *error) 2742 { 2743 rte_flow_error_set(error, ENOTSUP, 2744 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2745 return NULL; 2746 } 2747 2748 static int 2749 flow_null_translate(struct rte_eth_dev *dev __rte_unused, 2750 struct mlx5_flow *dev_flow __rte_unused, 2751 const struct rte_flow_attr *attr __rte_unused, 2752 const struct rte_flow_item items[] __rte_unused, 2753 const struct rte_flow_action actions[] __rte_unused, 2754 struct rte_flow_error *error) 2755 { 2756 return rte_flow_error_set(error, ENOTSUP, 2757 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2758 } 2759 2760 static int 2761 flow_null_apply(struct rte_eth_dev *dev __rte_unused, 2762 struct rte_flow *flow __rte_unused, 2763 struct rte_flow_error *error) 2764 { 2765 return rte_flow_error_set(error, ENOTSUP, 2766 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2767 } 2768 2769 static void 2770 flow_null_remove(struct rte_eth_dev *dev __rte_unused, 2771 struct rte_flow *flow __rte_unused) 2772 { 2773 } 2774 2775 static void 2776 flow_null_destroy(struct rte_eth_dev *dev __rte_unused, 2777 struct rte_flow *flow __rte_unused) 2778 { 2779 } 2780 2781 static int 2782 flow_null_query(struct rte_eth_dev *dev __rte_unused, 2783 struct rte_flow *flow __rte_unused, 2784 const struct rte_flow_action *actions __rte_unused, 2785 void *data __rte_unused, 2786 struct rte_flow_error *error) 2787 { 2788 return rte_flow_error_set(error, ENOTSUP, 2789 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2790 } 2791 2792 /* Void driver to protect from null pointer reference. */ 2793 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = { 2794 .validate = flow_null_validate, 2795 .prepare = flow_null_prepare, 2796 .translate = flow_null_translate, 2797 .apply = flow_null_apply, 2798 .remove = flow_null_remove, 2799 .destroy = flow_null_destroy, 2800 .query = flow_null_query, 2801 }; 2802 2803 /** 2804 * Select flow driver type according to flow attributes and device 2805 * configuration. 2806 * 2807 * @param[in] dev 2808 * Pointer to the dev structure. 2809 * @param[in] attr 2810 * Pointer to the flow attributes. 2811 * 2812 * @return 2813 * flow driver type, MLX5_FLOW_TYPE_MAX otherwise. 2814 */ 2815 static enum mlx5_flow_drv_type 2816 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr) 2817 { 2818 struct mlx5_priv *priv = dev->data->dev_private; 2819 /* The OS can determine first a specific flow type (DV, VERBS) */ 2820 enum mlx5_flow_drv_type type = mlx5_flow_os_get_type(); 2821 2822 if (type != MLX5_FLOW_TYPE_MAX) 2823 return type; 2824 /* If no OS specific type - continue with DV/VERBS selection */ 2825 if (attr->transfer && priv->config.dv_esw_en) 2826 type = MLX5_FLOW_TYPE_DV; 2827 if (!attr->transfer) 2828 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV : 2829 MLX5_FLOW_TYPE_VERBS; 2830 return type; 2831 } 2832 2833 #define flow_get_drv_ops(type) flow_drv_ops[type] 2834 2835 /** 2836 * Flow driver validation API. This abstracts calling driver specific functions. 2837 * The type of flow driver is determined according to flow attributes. 2838 * 2839 * @param[in] dev 2840 * Pointer to the dev structure. 2841 * @param[in] attr 2842 * Pointer to the flow attributes. 2843 * @param[in] items 2844 * Pointer to the list of items. 2845 * @param[in] actions 2846 * Pointer to the list of actions. 2847 * @param[in] external 2848 * This flow rule is created by request external to PMD. 2849 * @param[in] hairpin 2850 * Number of hairpin TX actions, 0 means classic flow. 2851 * @param[out] error 2852 * Pointer to the error structure. 2853 * 2854 * @return 2855 * 0 on success, a negative errno value otherwise and rte_errno is set. 2856 */ 2857 static inline int 2858 flow_drv_validate(struct rte_eth_dev *dev, 2859 const struct rte_flow_attr *attr, 2860 const struct rte_flow_item items[], 2861 const struct rte_flow_action actions[], 2862 bool external, int hairpin, struct rte_flow_error *error) 2863 { 2864 const struct mlx5_flow_driver_ops *fops; 2865 enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr); 2866 2867 fops = flow_get_drv_ops(type); 2868 return fops->validate(dev, attr, items, actions, external, 2869 hairpin, error); 2870 } 2871 2872 /** 2873 * Flow driver preparation API. This abstracts calling driver specific 2874 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 2875 * calculates the size of memory required for device flow, allocates the memory, 2876 * initializes the device flow and returns the pointer. 2877 * 2878 * @note 2879 * This function initializes device flow structure such as dv or verbs in 2880 * struct mlx5_flow. However, it is caller's responsibility to initialize the 2881 * rest. For example, adding returning device flow to flow->dev_flow list and 2882 * setting backward reference to the flow should be done out of this function. 2883 * layers field is not filled either. 2884 * 2885 * @param[in] dev 2886 * Pointer to the dev structure. 2887 * @param[in] attr 2888 * Pointer to the flow attributes. 2889 * @param[in] items 2890 * Pointer to the list of items. 2891 * @param[in] actions 2892 * Pointer to the list of actions. 2893 * @param[in] flow_idx 2894 * This memory pool index to the flow. 2895 * @param[out] error 2896 * Pointer to the error structure. 2897 * 2898 * @return 2899 * Pointer to device flow on success, otherwise NULL and rte_errno is set. 2900 */ 2901 static inline struct mlx5_flow * 2902 flow_drv_prepare(struct rte_eth_dev *dev, 2903 const struct rte_flow *flow, 2904 const struct rte_flow_attr *attr, 2905 const struct rte_flow_item items[], 2906 const struct rte_flow_action actions[], 2907 uint32_t flow_idx, 2908 struct rte_flow_error *error) 2909 { 2910 const struct mlx5_flow_driver_ops *fops; 2911 enum mlx5_flow_drv_type type = flow->drv_type; 2912 struct mlx5_flow *mlx5_flow = NULL; 2913 2914 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2915 fops = flow_get_drv_ops(type); 2916 mlx5_flow = fops->prepare(dev, attr, items, actions, error); 2917 if (mlx5_flow) 2918 mlx5_flow->flow_idx = flow_idx; 2919 return mlx5_flow; 2920 } 2921 2922 /** 2923 * Flow driver translation API. This abstracts calling driver specific 2924 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 2925 * translates a generic flow into a driver flow. flow_drv_prepare() must 2926 * precede. 2927 * 2928 * @note 2929 * dev_flow->layers could be filled as a result of parsing during translation 2930 * if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled 2931 * if necessary. As a flow can have multiple dev_flows by RSS flow expansion, 2932 * flow->actions could be overwritten even though all the expanded dev_flows 2933 * have the same actions. 2934 * 2935 * @param[in] dev 2936 * Pointer to the rte dev structure. 2937 * @param[in, out] dev_flow 2938 * Pointer to the mlx5 flow. 2939 * @param[in] attr 2940 * Pointer to the flow attributes. 2941 * @param[in] items 2942 * Pointer to the list of items. 2943 * @param[in] actions 2944 * Pointer to the list of actions. 2945 * @param[out] error 2946 * Pointer to the error structure. 2947 * 2948 * @return 2949 * 0 on success, a negative errno value otherwise and rte_errno is set. 2950 */ 2951 static inline int 2952 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, 2953 const struct rte_flow_attr *attr, 2954 const struct rte_flow_item items[], 2955 const struct rte_flow_action actions[], 2956 struct rte_flow_error *error) 2957 { 2958 const struct mlx5_flow_driver_ops *fops; 2959 enum mlx5_flow_drv_type type = dev_flow->flow->drv_type; 2960 2961 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2962 fops = flow_get_drv_ops(type); 2963 return fops->translate(dev, dev_flow, attr, items, actions, error); 2964 } 2965 2966 /** 2967 * Flow driver apply API. This abstracts calling driver specific functions. 2968 * Parent flow (rte_flow) should have driver type (drv_type). It applies 2969 * translated driver flows on to device. flow_drv_translate() must precede. 2970 * 2971 * @param[in] dev 2972 * Pointer to Ethernet device structure. 2973 * @param[in, out] flow 2974 * Pointer to flow structure. 2975 * @param[out] error 2976 * Pointer to error structure. 2977 * 2978 * @return 2979 * 0 on success, a negative errno value otherwise and rte_errno is set. 2980 */ 2981 static inline int 2982 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, 2983 struct rte_flow_error *error) 2984 { 2985 const struct mlx5_flow_driver_ops *fops; 2986 enum mlx5_flow_drv_type type = flow->drv_type; 2987 2988 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2989 fops = flow_get_drv_ops(type); 2990 return fops->apply(dev, flow, error); 2991 } 2992 2993 /** 2994 * Flow driver remove API. This abstracts calling driver specific functions. 2995 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 2996 * on device. All the resources of the flow should be freed by calling 2997 * flow_drv_destroy(). 2998 * 2999 * @param[in] dev 3000 * Pointer to Ethernet device. 3001 * @param[in, out] flow 3002 * Pointer to flow structure. 3003 */ 3004 static inline void 3005 flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow) 3006 { 3007 const struct mlx5_flow_driver_ops *fops; 3008 enum mlx5_flow_drv_type type = flow->drv_type; 3009 3010 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 3011 fops = flow_get_drv_ops(type); 3012 fops->remove(dev, flow); 3013 } 3014 3015 /** 3016 * Flow driver destroy API. This abstracts calling driver specific functions. 3017 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 3018 * on device and releases resources of the flow. 3019 * 3020 * @param[in] dev 3021 * Pointer to Ethernet device. 3022 * @param[in, out] flow 3023 * Pointer to flow structure. 3024 */ 3025 static inline void 3026 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) 3027 { 3028 const struct mlx5_flow_driver_ops *fops; 3029 enum mlx5_flow_drv_type type = flow->drv_type; 3030 3031 flow_mreg_split_qrss_release(dev, flow); 3032 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 3033 fops = flow_get_drv_ops(type); 3034 fops->destroy(dev, flow); 3035 } 3036 3037 /** 3038 * Get RSS action from the action list. 3039 * 3040 * @param[in] actions 3041 * Pointer to the list of actions. 3042 * 3043 * @return 3044 * Pointer to the RSS action if exist, else return NULL. 3045 */ 3046 static const struct rte_flow_action_rss* 3047 flow_get_rss_action(const struct rte_flow_action actions[]) 3048 { 3049 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3050 switch (actions->type) { 3051 case RTE_FLOW_ACTION_TYPE_RSS: 3052 return (const struct rte_flow_action_rss *) 3053 actions->conf; 3054 default: 3055 break; 3056 } 3057 } 3058 return NULL; 3059 } 3060 3061 static unsigned int 3062 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level) 3063 { 3064 const struct rte_flow_item *item; 3065 unsigned int has_vlan = 0; 3066 3067 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 3068 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { 3069 has_vlan = 1; 3070 break; 3071 } 3072 } 3073 if (has_vlan) 3074 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN : 3075 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN; 3076 return rss_level < 2 ? MLX5_EXPANSION_ROOT : 3077 MLX5_EXPANSION_ROOT_OUTER; 3078 } 3079 3080 /** 3081 * Get layer flags from the prefix flow. 3082 * 3083 * Some flows may be split to several subflows, the prefix subflow gets the 3084 * match items and the suffix sub flow gets the actions. 3085 * Some actions need the user defined match item flags to get the detail for 3086 * the action. 3087 * This function helps the suffix flow to get the item layer flags from prefix 3088 * subflow. 3089 * 3090 * @param[in] dev_flow 3091 * Pointer the created preifx subflow. 3092 * 3093 * @return 3094 * The layers get from prefix subflow. 3095 */ 3096 static inline uint64_t 3097 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow) 3098 { 3099 uint64_t layers = 0; 3100 3101 /* 3102 * Layers bits could be localization, but usually the compiler will 3103 * help to do the optimization work for source code. 3104 * If no decap actions, use the layers directly. 3105 */ 3106 if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP)) 3107 return dev_flow->handle->layers; 3108 /* Convert L3 layers with decap action. */ 3109 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4) 3110 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4; 3111 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6) 3112 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6; 3113 /* Convert L4 layers with decap action. */ 3114 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP) 3115 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP; 3116 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP) 3117 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP; 3118 return layers; 3119 } 3120 3121 /** 3122 * Get metadata split action information. 3123 * 3124 * @param[in] actions 3125 * Pointer to the list of actions. 3126 * @param[out] qrss 3127 * Pointer to the return pointer. 3128 * @param[out] qrss_type 3129 * Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned 3130 * if no QUEUE/RSS is found. 3131 * @param[out] encap_idx 3132 * Pointer to the index of the encap action if exists, otherwise the last 3133 * action index. 3134 * 3135 * @return 3136 * Total number of actions. 3137 */ 3138 static int 3139 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[], 3140 const struct rte_flow_action **qrss, 3141 int *encap_idx) 3142 { 3143 const struct rte_flow_action_raw_encap *raw_encap; 3144 int actions_n = 0; 3145 int raw_decap_idx = -1; 3146 3147 *encap_idx = -1; 3148 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3149 switch (actions->type) { 3150 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3151 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3152 *encap_idx = actions_n; 3153 break; 3154 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3155 raw_decap_idx = actions_n; 3156 break; 3157 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3158 raw_encap = actions->conf; 3159 if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 3160 *encap_idx = raw_decap_idx != -1 ? 3161 raw_decap_idx : actions_n; 3162 break; 3163 case RTE_FLOW_ACTION_TYPE_QUEUE: 3164 case RTE_FLOW_ACTION_TYPE_RSS: 3165 *qrss = actions; 3166 break; 3167 default: 3168 break; 3169 } 3170 actions_n++; 3171 } 3172 if (*encap_idx == -1) 3173 *encap_idx = actions_n; 3174 /* Count RTE_FLOW_ACTION_TYPE_END. */ 3175 return actions_n + 1; 3176 } 3177 3178 /** 3179 * Check meter action from the action list. 3180 * 3181 * @param[in] actions 3182 * Pointer to the list of actions. 3183 * @param[out] mtr 3184 * Pointer to the meter exist flag. 3185 * 3186 * @return 3187 * Total number of actions. 3188 */ 3189 static int 3190 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr) 3191 { 3192 int actions_n = 0; 3193 3194 MLX5_ASSERT(mtr); 3195 *mtr = 0; 3196 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3197 switch (actions->type) { 3198 case RTE_FLOW_ACTION_TYPE_METER: 3199 *mtr = 1; 3200 break; 3201 default: 3202 break; 3203 } 3204 actions_n++; 3205 } 3206 /* Count RTE_FLOW_ACTION_TYPE_END. */ 3207 return actions_n + 1; 3208 } 3209 3210 /** 3211 * Check if the flow should be split due to hairpin. 3212 * The reason for the split is that in current HW we can't 3213 * support encap and push-vlan on Rx, so if a flow contains 3214 * these actions we move it to Tx. 3215 * 3216 * @param dev 3217 * Pointer to Ethernet device. 3218 * @param[in] attr 3219 * Flow rule attributes. 3220 * @param[in] actions 3221 * Associated actions (list terminated by the END action). 3222 * 3223 * @return 3224 * > 0 the number of actions and the flow should be split, 3225 * 0 when no split required. 3226 */ 3227 static int 3228 flow_check_hairpin_split(struct rte_eth_dev *dev, 3229 const struct rte_flow_attr *attr, 3230 const struct rte_flow_action actions[]) 3231 { 3232 int queue_action = 0; 3233 int action_n = 0; 3234 int split = 0; 3235 const struct rte_flow_action_queue *queue; 3236 const struct rte_flow_action_rss *rss; 3237 const struct rte_flow_action_raw_encap *raw_encap; 3238 3239 if (!attr->ingress) 3240 return 0; 3241 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3242 switch (actions->type) { 3243 case RTE_FLOW_ACTION_TYPE_QUEUE: 3244 queue = actions->conf; 3245 if (queue == NULL) 3246 return 0; 3247 if (mlx5_rxq_get_type(dev, queue->index) != 3248 MLX5_RXQ_TYPE_HAIRPIN) 3249 return 0; 3250 queue_action = 1; 3251 action_n++; 3252 break; 3253 case RTE_FLOW_ACTION_TYPE_RSS: 3254 rss = actions->conf; 3255 if (rss == NULL || rss->queue_num == 0) 3256 return 0; 3257 if (mlx5_rxq_get_type(dev, rss->queue[0]) != 3258 MLX5_RXQ_TYPE_HAIRPIN) 3259 return 0; 3260 queue_action = 1; 3261 action_n++; 3262 break; 3263 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3264 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3265 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 3266 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 3267 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: 3268 split++; 3269 action_n++; 3270 break; 3271 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3272 raw_encap = actions->conf; 3273 if (raw_encap->size > 3274 (sizeof(struct rte_flow_item_eth) + 3275 sizeof(struct rte_flow_item_ipv4))) 3276 split++; 3277 action_n++; 3278 break; 3279 default: 3280 action_n++; 3281 break; 3282 } 3283 } 3284 if (split && queue_action) 3285 return action_n; 3286 return 0; 3287 } 3288 3289 /* Declare flow create/destroy prototype in advance. */ 3290 static uint32_t 3291 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 3292 const struct rte_flow_attr *attr, 3293 const struct rte_flow_item items[], 3294 const struct rte_flow_action actions[], 3295 bool external, struct rte_flow_error *error); 3296 3297 static void 3298 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 3299 uint32_t flow_idx); 3300 3301 /** 3302 * Add a flow of copying flow metadata registers in RX_CP_TBL. 3303 * 3304 * As mark_id is unique, if there's already a registered flow for the mark_id, 3305 * return by increasing the reference counter of the resource. Otherwise, create 3306 * the resource (mcp_res) and flow. 3307 * 3308 * Flow looks like, 3309 * - If ingress port is ANY and reg_c[1] is mark_id, 3310 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3311 * 3312 * For default flow (zero mark_id), flow is like, 3313 * - If ingress port is ANY, 3314 * reg_b := reg_c[0] and jump to RX_ACT_TBL. 3315 * 3316 * @param dev 3317 * Pointer to Ethernet device. 3318 * @param mark_id 3319 * ID of MARK action, zero means default flow for META. 3320 * @param[out] error 3321 * Perform verbose error reporting if not NULL. 3322 * 3323 * @return 3324 * Associated resource on success, NULL otherwise and rte_errno is set. 3325 */ 3326 static struct mlx5_flow_mreg_copy_resource * 3327 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id, 3328 struct rte_flow_error *error) 3329 { 3330 struct mlx5_priv *priv = dev->data->dev_private; 3331 struct rte_flow_attr attr = { 3332 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 3333 .ingress = 1, 3334 }; 3335 struct mlx5_rte_flow_item_tag tag_spec = { 3336 .data = mark_id, 3337 }; 3338 struct rte_flow_item items[] = { 3339 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, }, 3340 }; 3341 struct rte_flow_action_mark ftag = { 3342 .id = mark_id, 3343 }; 3344 struct mlx5_flow_action_copy_mreg cp_mreg = { 3345 .dst = REG_B, 3346 .src = REG_NON, 3347 }; 3348 struct rte_flow_action_jump jump = { 3349 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 3350 }; 3351 struct rte_flow_action actions[] = { 3352 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, }, 3353 }; 3354 struct mlx5_flow_mreg_copy_resource *mcp_res; 3355 uint32_t idx = 0; 3356 int ret; 3357 3358 /* Fill the register fileds in the flow. */ 3359 ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error); 3360 if (ret < 0) 3361 return NULL; 3362 tag_spec.id = ret; 3363 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 3364 if (ret < 0) 3365 return NULL; 3366 cp_mreg.src = ret; 3367 /* Check if already registered. */ 3368 MLX5_ASSERT(priv->mreg_cp_tbl); 3369 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id); 3370 if (mcp_res) { 3371 /* For non-default rule. */ 3372 if (mark_id != MLX5_DEFAULT_COPY_ID) 3373 mcp_res->refcnt++; 3374 MLX5_ASSERT(mark_id != MLX5_DEFAULT_COPY_ID || 3375 mcp_res->refcnt == 1); 3376 return mcp_res; 3377 } 3378 /* Provide the full width of FLAG specific value. */ 3379 if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT)) 3380 tag_spec.data = MLX5_FLOW_MARK_DEFAULT; 3381 /* Build a new flow. */ 3382 if (mark_id != MLX5_DEFAULT_COPY_ID) { 3383 items[0] = (struct rte_flow_item){ 3384 .type = (enum rte_flow_item_type) 3385 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 3386 .spec = &tag_spec, 3387 }; 3388 items[1] = (struct rte_flow_item){ 3389 .type = RTE_FLOW_ITEM_TYPE_END, 3390 }; 3391 actions[0] = (struct rte_flow_action){ 3392 .type = (enum rte_flow_action_type) 3393 MLX5_RTE_FLOW_ACTION_TYPE_MARK, 3394 .conf = &ftag, 3395 }; 3396 actions[1] = (struct rte_flow_action){ 3397 .type = (enum rte_flow_action_type) 3398 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3399 .conf = &cp_mreg, 3400 }; 3401 actions[2] = (struct rte_flow_action){ 3402 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3403 .conf = &jump, 3404 }; 3405 actions[3] = (struct rte_flow_action){ 3406 .type = RTE_FLOW_ACTION_TYPE_END, 3407 }; 3408 } else { 3409 /* Default rule, wildcard match. */ 3410 attr.priority = MLX5_FLOW_PRIO_RSVD; 3411 items[0] = (struct rte_flow_item){ 3412 .type = RTE_FLOW_ITEM_TYPE_END, 3413 }; 3414 actions[0] = (struct rte_flow_action){ 3415 .type = (enum rte_flow_action_type) 3416 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3417 .conf = &cp_mreg, 3418 }; 3419 actions[1] = (struct rte_flow_action){ 3420 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3421 .conf = &jump, 3422 }; 3423 actions[2] = (struct rte_flow_action){ 3424 .type = RTE_FLOW_ACTION_TYPE_END, 3425 }; 3426 } 3427 /* Build a new entry. */ 3428 mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx); 3429 if (!mcp_res) { 3430 rte_errno = ENOMEM; 3431 return NULL; 3432 } 3433 mcp_res->idx = idx; 3434 /* 3435 * The copy Flows are not included in any list. There 3436 * ones are referenced from other Flows and can not 3437 * be applied, removed, deleted in ardbitrary order 3438 * by list traversing. 3439 */ 3440 mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items, 3441 actions, false, error); 3442 if (!mcp_res->rix_flow) 3443 goto error; 3444 mcp_res->refcnt++; 3445 mcp_res->hlist_ent.key = mark_id; 3446 ret = mlx5_hlist_insert(priv->mreg_cp_tbl, 3447 &mcp_res->hlist_ent); 3448 MLX5_ASSERT(!ret); 3449 if (ret) 3450 goto error; 3451 return mcp_res; 3452 error: 3453 if (mcp_res->rix_flow) 3454 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3455 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3456 return NULL; 3457 } 3458 3459 /** 3460 * Release flow in RX_CP_TBL. 3461 * 3462 * @param dev 3463 * Pointer to Ethernet device. 3464 * @flow 3465 * Parent flow for wich copying is provided. 3466 */ 3467 static void 3468 flow_mreg_del_copy_action(struct rte_eth_dev *dev, 3469 struct rte_flow *flow) 3470 { 3471 struct mlx5_flow_mreg_copy_resource *mcp_res; 3472 struct mlx5_priv *priv = dev->data->dev_private; 3473 3474 if (!flow->rix_mreg_copy) 3475 return; 3476 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3477 flow->rix_mreg_copy); 3478 if (!mcp_res || !priv->mreg_cp_tbl) 3479 return; 3480 if (flow->copy_applied) { 3481 MLX5_ASSERT(mcp_res->appcnt); 3482 flow->copy_applied = 0; 3483 --mcp_res->appcnt; 3484 if (!mcp_res->appcnt) { 3485 struct rte_flow *mcp_flow = mlx5_ipool_get 3486 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3487 mcp_res->rix_flow); 3488 3489 if (mcp_flow) 3490 flow_drv_remove(dev, mcp_flow); 3491 } 3492 } 3493 /* 3494 * We do not check availability of metadata registers here, 3495 * because copy resources are not allocated in this case. 3496 */ 3497 if (--mcp_res->refcnt) 3498 return; 3499 MLX5_ASSERT(mcp_res->rix_flow); 3500 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3501 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 3502 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3503 flow->rix_mreg_copy = 0; 3504 } 3505 3506 /** 3507 * Start flow in RX_CP_TBL. 3508 * 3509 * @param dev 3510 * Pointer to Ethernet device. 3511 * @flow 3512 * Parent flow for wich copying is provided. 3513 * 3514 * @return 3515 * 0 on success, a negative errno value otherwise and rte_errno is set. 3516 */ 3517 static int 3518 flow_mreg_start_copy_action(struct rte_eth_dev *dev, 3519 struct rte_flow *flow) 3520 { 3521 struct mlx5_flow_mreg_copy_resource *mcp_res; 3522 struct mlx5_priv *priv = dev->data->dev_private; 3523 int ret; 3524 3525 if (!flow->rix_mreg_copy || flow->copy_applied) 3526 return 0; 3527 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3528 flow->rix_mreg_copy); 3529 if (!mcp_res) 3530 return 0; 3531 if (!mcp_res->appcnt) { 3532 struct rte_flow *mcp_flow = mlx5_ipool_get 3533 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3534 mcp_res->rix_flow); 3535 3536 if (mcp_flow) { 3537 ret = flow_drv_apply(dev, mcp_flow, NULL); 3538 if (ret) 3539 return ret; 3540 } 3541 } 3542 ++mcp_res->appcnt; 3543 flow->copy_applied = 1; 3544 return 0; 3545 } 3546 3547 /** 3548 * Stop flow in RX_CP_TBL. 3549 * 3550 * @param dev 3551 * Pointer to Ethernet device. 3552 * @flow 3553 * Parent flow for wich copying is provided. 3554 */ 3555 static void 3556 flow_mreg_stop_copy_action(struct rte_eth_dev *dev, 3557 struct rte_flow *flow) 3558 { 3559 struct mlx5_flow_mreg_copy_resource *mcp_res; 3560 struct mlx5_priv *priv = dev->data->dev_private; 3561 3562 if (!flow->rix_mreg_copy || !flow->copy_applied) 3563 return; 3564 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3565 flow->rix_mreg_copy); 3566 if (!mcp_res) 3567 return; 3568 MLX5_ASSERT(mcp_res->appcnt); 3569 --mcp_res->appcnt; 3570 flow->copy_applied = 0; 3571 if (!mcp_res->appcnt) { 3572 struct rte_flow *mcp_flow = mlx5_ipool_get 3573 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3574 mcp_res->rix_flow); 3575 3576 if (mcp_flow) 3577 flow_drv_remove(dev, mcp_flow); 3578 } 3579 } 3580 3581 /** 3582 * Remove the default copy action from RX_CP_TBL. 3583 * 3584 * @param dev 3585 * Pointer to Ethernet device. 3586 */ 3587 static void 3588 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev) 3589 { 3590 struct mlx5_flow_mreg_copy_resource *mcp_res; 3591 struct mlx5_priv *priv = dev->data->dev_private; 3592 3593 /* Check if default flow is registered. */ 3594 if (!priv->mreg_cp_tbl) 3595 return; 3596 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, 3597 MLX5_DEFAULT_COPY_ID); 3598 if (!mcp_res) 3599 return; 3600 MLX5_ASSERT(mcp_res->rix_flow); 3601 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3602 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 3603 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3604 } 3605 3606 /** 3607 * Add the default copy action in in RX_CP_TBL. 3608 * 3609 * @param dev 3610 * Pointer to Ethernet device. 3611 * @param[out] error 3612 * Perform verbose error reporting if not NULL. 3613 * 3614 * @return 3615 * 0 for success, negative value otherwise and rte_errno is set. 3616 */ 3617 static int 3618 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev, 3619 struct rte_flow_error *error) 3620 { 3621 struct mlx5_priv *priv = dev->data->dev_private; 3622 struct mlx5_flow_mreg_copy_resource *mcp_res; 3623 3624 /* Check whether extensive metadata feature is engaged. */ 3625 if (!priv->config.dv_flow_en || 3626 priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3627 !mlx5_flow_ext_mreg_supported(dev) || 3628 !priv->sh->dv_regc0_mask) 3629 return 0; 3630 mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error); 3631 if (!mcp_res) 3632 return -rte_errno; 3633 return 0; 3634 } 3635 3636 /** 3637 * Add a flow of copying flow metadata registers in RX_CP_TBL. 3638 * 3639 * All the flow having Q/RSS action should be split by 3640 * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL 3641 * performs the following, 3642 * - CQE->flow_tag := reg_c[1] (MARK) 3643 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 3644 * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1] 3645 * but there should be a flow per each MARK ID set by MARK action. 3646 * 3647 * For the aforementioned reason, if there's a MARK action in flow's action 3648 * list, a corresponding flow should be added to the RX_CP_TBL in order to copy 3649 * the MARK ID to CQE's flow_tag like, 3650 * - If reg_c[1] is mark_id, 3651 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3652 * 3653 * For SET_META action which stores value in reg_c[0], as the destination is 3654 * also a flow metadata register (reg_b), adding a default flow is enough. Zero 3655 * MARK ID means the default flow. The default flow looks like, 3656 * - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3657 * 3658 * @param dev 3659 * Pointer to Ethernet device. 3660 * @param flow 3661 * Pointer to flow structure. 3662 * @param[in] actions 3663 * Pointer to the list of actions. 3664 * @param[out] error 3665 * Perform verbose error reporting if not NULL. 3666 * 3667 * @return 3668 * 0 on success, negative value otherwise and rte_errno is set. 3669 */ 3670 static int 3671 flow_mreg_update_copy_table(struct rte_eth_dev *dev, 3672 struct rte_flow *flow, 3673 const struct rte_flow_action *actions, 3674 struct rte_flow_error *error) 3675 { 3676 struct mlx5_priv *priv = dev->data->dev_private; 3677 struct mlx5_dev_config *config = &priv->config; 3678 struct mlx5_flow_mreg_copy_resource *mcp_res; 3679 const struct rte_flow_action_mark *mark; 3680 3681 /* Check whether extensive metadata feature is engaged. */ 3682 if (!config->dv_flow_en || 3683 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3684 !mlx5_flow_ext_mreg_supported(dev) || 3685 !priv->sh->dv_regc0_mask) 3686 return 0; 3687 /* Find MARK action. */ 3688 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3689 switch (actions->type) { 3690 case RTE_FLOW_ACTION_TYPE_FLAG: 3691 mcp_res = flow_mreg_add_copy_action 3692 (dev, MLX5_FLOW_MARK_DEFAULT, error); 3693 if (!mcp_res) 3694 return -rte_errno; 3695 flow->rix_mreg_copy = mcp_res->idx; 3696 if (dev->data->dev_started) { 3697 mcp_res->appcnt++; 3698 flow->copy_applied = 1; 3699 } 3700 return 0; 3701 case RTE_FLOW_ACTION_TYPE_MARK: 3702 mark = (const struct rte_flow_action_mark *) 3703 actions->conf; 3704 mcp_res = 3705 flow_mreg_add_copy_action(dev, mark->id, error); 3706 if (!mcp_res) 3707 return -rte_errno; 3708 flow->rix_mreg_copy = mcp_res->idx; 3709 if (dev->data->dev_started) { 3710 mcp_res->appcnt++; 3711 flow->copy_applied = 1; 3712 } 3713 return 0; 3714 default: 3715 break; 3716 } 3717 } 3718 return 0; 3719 } 3720 3721 #define MLX5_MAX_SPLIT_ACTIONS 24 3722 #define MLX5_MAX_SPLIT_ITEMS 24 3723 3724 /** 3725 * Split the hairpin flow. 3726 * Since HW can't support encap and push-vlan on Rx, we move these 3727 * actions to Tx. 3728 * If the count action is after the encap then we also 3729 * move the count action. in this case the count will also measure 3730 * the outer bytes. 3731 * 3732 * @param dev 3733 * Pointer to Ethernet device. 3734 * @param[in] actions 3735 * Associated actions (list terminated by the END action). 3736 * @param[out] actions_rx 3737 * Rx flow actions. 3738 * @param[out] actions_tx 3739 * Tx flow actions.. 3740 * @param[out] pattern_tx 3741 * The pattern items for the Tx flow. 3742 * @param[out] flow_id 3743 * The flow ID connected to this flow. 3744 * 3745 * @return 3746 * 0 on success. 3747 */ 3748 static int 3749 flow_hairpin_split(struct rte_eth_dev *dev, 3750 const struct rte_flow_action actions[], 3751 struct rte_flow_action actions_rx[], 3752 struct rte_flow_action actions_tx[], 3753 struct rte_flow_item pattern_tx[], 3754 uint32_t *flow_id) 3755 { 3756 struct mlx5_priv *priv = dev->data->dev_private; 3757 const struct rte_flow_action_raw_encap *raw_encap; 3758 const struct rte_flow_action_raw_decap *raw_decap; 3759 struct mlx5_rte_flow_action_set_tag *set_tag; 3760 struct rte_flow_action *tag_action; 3761 struct mlx5_rte_flow_item_tag *tag_item; 3762 struct rte_flow_item *item; 3763 char *addr; 3764 int encap = 0; 3765 3766 mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id); 3767 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3768 switch (actions->type) { 3769 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3770 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3771 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 3772 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 3773 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: 3774 rte_memcpy(actions_tx, actions, 3775 sizeof(struct rte_flow_action)); 3776 actions_tx++; 3777 break; 3778 case RTE_FLOW_ACTION_TYPE_COUNT: 3779 if (encap) { 3780 rte_memcpy(actions_tx, actions, 3781 sizeof(struct rte_flow_action)); 3782 actions_tx++; 3783 } else { 3784 rte_memcpy(actions_rx, actions, 3785 sizeof(struct rte_flow_action)); 3786 actions_rx++; 3787 } 3788 break; 3789 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3790 raw_encap = actions->conf; 3791 if (raw_encap->size > 3792 (sizeof(struct rte_flow_item_eth) + 3793 sizeof(struct rte_flow_item_ipv4))) { 3794 memcpy(actions_tx, actions, 3795 sizeof(struct rte_flow_action)); 3796 actions_tx++; 3797 encap = 1; 3798 } else { 3799 rte_memcpy(actions_rx, actions, 3800 sizeof(struct rte_flow_action)); 3801 actions_rx++; 3802 } 3803 break; 3804 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3805 raw_decap = actions->conf; 3806 if (raw_decap->size < 3807 (sizeof(struct rte_flow_item_eth) + 3808 sizeof(struct rte_flow_item_ipv4))) { 3809 memcpy(actions_tx, actions, 3810 sizeof(struct rte_flow_action)); 3811 actions_tx++; 3812 } else { 3813 rte_memcpy(actions_rx, actions, 3814 sizeof(struct rte_flow_action)); 3815 actions_rx++; 3816 } 3817 break; 3818 default: 3819 rte_memcpy(actions_rx, actions, 3820 sizeof(struct rte_flow_action)); 3821 actions_rx++; 3822 break; 3823 } 3824 } 3825 /* Add set meta action and end action for the Rx flow. */ 3826 tag_action = actions_rx; 3827 tag_action->type = (enum rte_flow_action_type) 3828 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 3829 actions_rx++; 3830 rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action)); 3831 actions_rx++; 3832 set_tag = (void *)actions_rx; 3833 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL); 3834 MLX5_ASSERT(set_tag->id > REG_NON); 3835 set_tag->data = *flow_id; 3836 tag_action->conf = set_tag; 3837 /* Create Tx item list. */ 3838 rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action)); 3839 addr = (void *)&pattern_tx[2]; 3840 item = pattern_tx; 3841 item->type = (enum rte_flow_item_type) 3842 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 3843 tag_item = (void *)addr; 3844 tag_item->data = *flow_id; 3845 tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL); 3846 MLX5_ASSERT(set_tag->id > REG_NON); 3847 item->spec = tag_item; 3848 addr += sizeof(struct mlx5_rte_flow_item_tag); 3849 tag_item = (void *)addr; 3850 tag_item->data = UINT32_MAX; 3851 tag_item->id = UINT16_MAX; 3852 item->mask = tag_item; 3853 item->last = NULL; 3854 item++; 3855 item->type = RTE_FLOW_ITEM_TYPE_END; 3856 return 0; 3857 } 3858 3859 /** 3860 * The last stage of splitting chain, just creates the subflow 3861 * without any modification. 3862 * 3863 * @param[in] dev 3864 * Pointer to Ethernet device. 3865 * @param[in] flow 3866 * Parent flow structure pointer. 3867 * @param[in, out] sub_flow 3868 * Pointer to return the created subflow, may be NULL. 3869 * @param[in] prefix_layers 3870 * Prefix subflow layers, may be 0. 3871 * @param[in] prefix_mark 3872 * Prefix subflow mark flag, may be 0. 3873 * @param[in] attr 3874 * Flow rule attributes. 3875 * @param[in] items 3876 * Pattern specification (list terminated by the END pattern item). 3877 * @param[in] actions 3878 * Associated actions (list terminated by the END action). 3879 * @param[in] external 3880 * This flow rule is created by request external to PMD. 3881 * @param[in] flow_idx 3882 * This memory pool index to the flow. 3883 * @param[out] error 3884 * Perform verbose error reporting if not NULL. 3885 * @return 3886 * 0 on success, negative value otherwise 3887 */ 3888 static int 3889 flow_create_split_inner(struct rte_eth_dev *dev, 3890 struct rte_flow *flow, 3891 struct mlx5_flow **sub_flow, 3892 uint64_t prefix_layers, 3893 uint32_t prefix_mark, 3894 const struct rte_flow_attr *attr, 3895 const struct rte_flow_item items[], 3896 const struct rte_flow_action actions[], 3897 bool external, uint32_t flow_idx, 3898 struct rte_flow_error *error) 3899 { 3900 struct mlx5_flow *dev_flow; 3901 3902 dev_flow = flow_drv_prepare(dev, flow, attr, items, actions, 3903 flow_idx, error); 3904 if (!dev_flow) 3905 return -rte_errno; 3906 dev_flow->flow = flow; 3907 dev_flow->external = external; 3908 /* Subflow object was created, we must include one in the list. */ 3909 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 3910 dev_flow->handle, next); 3911 /* 3912 * If dev_flow is as one of the suffix flow, some actions in suffix 3913 * flow may need some user defined item layer flags, and pass the 3914 * Metadate rxq mark flag to suffix flow as well. 3915 */ 3916 if (prefix_layers) 3917 dev_flow->handle->layers = prefix_layers; 3918 if (prefix_mark) 3919 dev_flow->handle->mark = 1; 3920 if (sub_flow) 3921 *sub_flow = dev_flow; 3922 return flow_drv_translate(dev, dev_flow, attr, items, actions, error); 3923 } 3924 3925 /** 3926 * Split the meter flow. 3927 * 3928 * As meter flow will split to three sub flow, other than meter 3929 * action, the other actions make sense to only meter accepts 3930 * the packet. If it need to be dropped, no other additional 3931 * actions should be take. 3932 * 3933 * One kind of special action which decapsulates the L3 tunnel 3934 * header will be in the prefix sub flow, as not to take the 3935 * L3 tunnel header into account. 3936 * 3937 * @param dev 3938 * Pointer to Ethernet device. 3939 * @param[in] items 3940 * Pattern specification (list terminated by the END pattern item). 3941 * @param[out] sfx_items 3942 * Suffix flow match items (list terminated by the END pattern item). 3943 * @param[in] actions 3944 * Associated actions (list terminated by the END action). 3945 * @param[out] actions_sfx 3946 * Suffix flow actions. 3947 * @param[out] actions_pre 3948 * Prefix flow actions. 3949 * @param[out] pattern_sfx 3950 * The pattern items for the suffix flow. 3951 * @param[out] tag_sfx 3952 * Pointer to suffix flow tag. 3953 * 3954 * @return 3955 * 0 on success. 3956 */ 3957 static int 3958 flow_meter_split_prep(struct rte_eth_dev *dev, 3959 const struct rte_flow_item items[], 3960 struct rte_flow_item sfx_items[], 3961 const struct rte_flow_action actions[], 3962 struct rte_flow_action actions_sfx[], 3963 struct rte_flow_action actions_pre[]) 3964 { 3965 struct rte_flow_action *tag_action = NULL; 3966 struct rte_flow_item *tag_item; 3967 struct mlx5_rte_flow_action_set_tag *set_tag; 3968 struct rte_flow_error error; 3969 const struct rte_flow_action_raw_encap *raw_encap; 3970 const struct rte_flow_action_raw_decap *raw_decap; 3971 struct mlx5_rte_flow_item_tag *tag_spec; 3972 struct mlx5_rte_flow_item_tag *tag_mask; 3973 uint32_t tag_id; 3974 bool copy_vlan = false; 3975 3976 /* Prepare the actions for prefix and suffix flow. */ 3977 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3978 struct rte_flow_action **action_cur = NULL; 3979 3980 switch (actions->type) { 3981 case RTE_FLOW_ACTION_TYPE_METER: 3982 /* Add the extra tag action first. */ 3983 tag_action = actions_pre; 3984 tag_action->type = (enum rte_flow_action_type) 3985 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 3986 actions_pre++; 3987 action_cur = &actions_pre; 3988 break; 3989 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: 3990 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP: 3991 action_cur = &actions_pre; 3992 break; 3993 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3994 raw_encap = actions->conf; 3995 if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE) 3996 action_cur = &actions_pre; 3997 break; 3998 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3999 raw_decap = actions->conf; 4000 if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 4001 action_cur = &actions_pre; 4002 break; 4003 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 4004 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 4005 copy_vlan = true; 4006 break; 4007 default: 4008 break; 4009 } 4010 if (!action_cur) 4011 action_cur = &actions_sfx; 4012 memcpy(*action_cur, actions, sizeof(struct rte_flow_action)); 4013 (*action_cur)++; 4014 } 4015 /* Add end action to the actions. */ 4016 actions_sfx->type = RTE_FLOW_ACTION_TYPE_END; 4017 actions_pre->type = RTE_FLOW_ACTION_TYPE_END; 4018 actions_pre++; 4019 /* Set the tag. */ 4020 set_tag = (void *)actions_pre; 4021 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); 4022 /* 4023 * Get the id from the qrss_pool to make qrss share the id with meter. 4024 */ 4025 tag_id = flow_qrss_get_id(dev); 4026 set_tag->data = tag_id << MLX5_MTR_COLOR_BITS; 4027 assert(tag_action); 4028 tag_action->conf = set_tag; 4029 /* Prepare the suffix subflow items. */ 4030 tag_item = sfx_items++; 4031 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { 4032 int item_type = items->type; 4033 4034 switch (item_type) { 4035 case RTE_FLOW_ITEM_TYPE_PORT_ID: 4036 memcpy(sfx_items, items, sizeof(*sfx_items)); 4037 sfx_items++; 4038 break; 4039 case RTE_FLOW_ITEM_TYPE_VLAN: 4040 if (copy_vlan) { 4041 memcpy(sfx_items, items, sizeof(*sfx_items)); 4042 /* 4043 * Convert to internal match item, it is used 4044 * for vlan push and set vid. 4045 */ 4046 sfx_items->type = (enum rte_flow_item_type) 4047 MLX5_RTE_FLOW_ITEM_TYPE_VLAN; 4048 sfx_items++; 4049 } 4050 break; 4051 default: 4052 break; 4053 } 4054 } 4055 sfx_items->type = RTE_FLOW_ITEM_TYPE_END; 4056 sfx_items++; 4057 tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items; 4058 tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS; 4059 tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); 4060 tag_mask = tag_spec + 1; 4061 tag_mask->data = 0xffffff00; 4062 tag_item->type = (enum rte_flow_item_type) 4063 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 4064 tag_item->spec = tag_spec; 4065 tag_item->last = NULL; 4066 tag_item->mask = tag_mask; 4067 return tag_id; 4068 } 4069 4070 /** 4071 * Split action list having QUEUE/RSS for metadata register copy. 4072 * 4073 * Once Q/RSS action is detected in user's action list, the flow action 4074 * should be split in order to copy metadata registers, which will happen in 4075 * RX_CP_TBL like, 4076 * - CQE->flow_tag := reg_c[1] (MARK) 4077 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 4078 * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL. 4079 * This is because the last action of each flow must be a terminal action 4080 * (QUEUE, RSS or DROP). 4081 * 4082 * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is 4083 * stored and kept in the mlx5_flow structure per each sub_flow. 4084 * 4085 * The Q/RSS action is replaced with, 4086 * - SET_TAG, setting the allocated flow ID to reg_c[2]. 4087 * And the following JUMP action is added at the end, 4088 * - JUMP, to RX_CP_TBL. 4089 * 4090 * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by 4091 * flow_create_split_metadata() routine. The flow will look like, 4092 * - If flow ID matches (reg_c[2]), perform Q/RSS. 4093 * 4094 * @param dev 4095 * Pointer to Ethernet device. 4096 * @param[out] split_actions 4097 * Pointer to store split actions to jump to CP_TBL. 4098 * @param[in] actions 4099 * Pointer to the list of original flow actions. 4100 * @param[in] qrss 4101 * Pointer to the Q/RSS action. 4102 * @param[in] actions_n 4103 * Number of original actions. 4104 * @param[out] error 4105 * Perform verbose error reporting if not NULL. 4106 * 4107 * @return 4108 * non-zero unique flow_id on success, otherwise 0 and 4109 * error/rte_error are set. 4110 */ 4111 static uint32_t 4112 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev, 4113 struct rte_flow_action *split_actions, 4114 const struct rte_flow_action *actions, 4115 const struct rte_flow_action *qrss, 4116 int actions_n, struct rte_flow_error *error) 4117 { 4118 struct mlx5_rte_flow_action_set_tag *set_tag; 4119 struct rte_flow_action_jump *jump; 4120 const int qrss_idx = qrss - actions; 4121 uint32_t flow_id = 0; 4122 int ret = 0; 4123 4124 /* 4125 * Given actions will be split 4126 * - Replace QUEUE/RSS action with SET_TAG to set flow ID. 4127 * - Add jump to mreg CP_TBL. 4128 * As a result, there will be one more action. 4129 */ 4130 ++actions_n; 4131 memcpy(split_actions, actions, sizeof(*split_actions) * actions_n); 4132 set_tag = (void *)(split_actions + actions_n); 4133 /* 4134 * If tag action is not set to void(it means we are not the meter 4135 * suffix flow), add the tag action. Since meter suffix flow already 4136 * has the tag added. 4137 */ 4138 if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) { 4139 /* 4140 * Allocate the new subflow ID. This one is unique within 4141 * device and not shared with representors. Otherwise, 4142 * we would have to resolve multi-thread access synch 4143 * issue. Each flow on the shared device is appended 4144 * with source vport identifier, so the resulting 4145 * flows will be unique in the shared (by master and 4146 * representors) domain even if they have coinciding 4147 * IDs. 4148 */ 4149 flow_id = flow_qrss_get_id(dev); 4150 if (!flow_id) 4151 return rte_flow_error_set(error, ENOMEM, 4152 RTE_FLOW_ERROR_TYPE_ACTION, 4153 NULL, "can't allocate id " 4154 "for split Q/RSS subflow"); 4155 /* Internal SET_TAG action to set flow ID. */ 4156 *set_tag = (struct mlx5_rte_flow_action_set_tag){ 4157 .data = flow_id, 4158 }; 4159 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error); 4160 if (ret < 0) 4161 return ret; 4162 set_tag->id = ret; 4163 /* Construct new actions array. */ 4164 /* Replace QUEUE/RSS action. */ 4165 split_actions[qrss_idx] = (struct rte_flow_action){ 4166 .type = (enum rte_flow_action_type) 4167 MLX5_RTE_FLOW_ACTION_TYPE_TAG, 4168 .conf = set_tag, 4169 }; 4170 } 4171 /* JUMP action to jump to mreg copy table (CP_TBL). */ 4172 jump = (void *)(set_tag + 1); 4173 *jump = (struct rte_flow_action_jump){ 4174 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 4175 }; 4176 split_actions[actions_n - 2] = (struct rte_flow_action){ 4177 .type = RTE_FLOW_ACTION_TYPE_JUMP, 4178 .conf = jump, 4179 }; 4180 split_actions[actions_n - 1] = (struct rte_flow_action){ 4181 .type = RTE_FLOW_ACTION_TYPE_END, 4182 }; 4183 return flow_id; 4184 } 4185 4186 /** 4187 * Extend the given action list for Tx metadata copy. 4188 * 4189 * Copy the given action list to the ext_actions and add flow metadata register 4190 * copy action in order to copy reg_a set by WQE to reg_c[0]. 4191 * 4192 * @param[out] ext_actions 4193 * Pointer to the extended action list. 4194 * @param[in] actions 4195 * Pointer to the list of actions. 4196 * @param[in] actions_n 4197 * Number of actions in the list. 4198 * @param[out] error 4199 * Perform verbose error reporting if not NULL. 4200 * @param[in] encap_idx 4201 * The encap action inndex. 4202 * 4203 * @return 4204 * 0 on success, negative value otherwise 4205 */ 4206 static int 4207 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev, 4208 struct rte_flow_action *ext_actions, 4209 const struct rte_flow_action *actions, 4210 int actions_n, struct rte_flow_error *error, 4211 int encap_idx) 4212 { 4213 struct mlx5_flow_action_copy_mreg *cp_mreg = 4214 (struct mlx5_flow_action_copy_mreg *) 4215 (ext_actions + actions_n + 1); 4216 int ret; 4217 4218 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 4219 if (ret < 0) 4220 return ret; 4221 cp_mreg->dst = ret; 4222 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error); 4223 if (ret < 0) 4224 return ret; 4225 cp_mreg->src = ret; 4226 if (encap_idx != 0) 4227 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx); 4228 if (encap_idx == actions_n - 1) { 4229 ext_actions[actions_n - 1] = (struct rte_flow_action){ 4230 .type = (enum rte_flow_action_type) 4231 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 4232 .conf = cp_mreg, 4233 }; 4234 ext_actions[actions_n] = (struct rte_flow_action){ 4235 .type = RTE_FLOW_ACTION_TYPE_END, 4236 }; 4237 } else { 4238 ext_actions[encap_idx] = (struct rte_flow_action){ 4239 .type = (enum rte_flow_action_type) 4240 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 4241 .conf = cp_mreg, 4242 }; 4243 memcpy(ext_actions + encap_idx + 1, actions + encap_idx, 4244 sizeof(*ext_actions) * (actions_n - encap_idx)); 4245 } 4246 return 0; 4247 } 4248 4249 /** 4250 * Check the match action from the action list. 4251 * 4252 * @param[in] actions 4253 * Pointer to the list of actions. 4254 * @param[in] attr 4255 * Flow rule attributes. 4256 * @param[in] action 4257 * The action to be check if exist. 4258 * @param[out] match_action_pos 4259 * Pointer to the position of the matched action if exists, otherwise is -1. 4260 * @param[out] qrss_action_pos 4261 * Pointer to the position of the Queue/RSS action if exists, otherwise is -1. 4262 * 4263 * @return 4264 * > 0 the total number of actions. 4265 * 0 if not found match action in action list. 4266 */ 4267 static int 4268 flow_check_match_action(const struct rte_flow_action actions[], 4269 const struct rte_flow_attr *attr, 4270 enum rte_flow_action_type action, 4271 int *match_action_pos, int *qrss_action_pos) 4272 { 4273 const struct rte_flow_action_sample *sample; 4274 int actions_n = 0; 4275 int jump_flag = 0; 4276 uint32_t ratio = 0; 4277 int sub_type = 0; 4278 int flag = 0; 4279 4280 *match_action_pos = -1; 4281 *qrss_action_pos = -1; 4282 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 4283 if (actions->type == action) { 4284 flag = 1; 4285 *match_action_pos = actions_n; 4286 } 4287 if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE || 4288 actions->type == RTE_FLOW_ACTION_TYPE_RSS) 4289 *qrss_action_pos = actions_n; 4290 if (actions->type == RTE_FLOW_ACTION_TYPE_JUMP) 4291 jump_flag = 1; 4292 if (actions->type == RTE_FLOW_ACTION_TYPE_SAMPLE) { 4293 sample = actions->conf; 4294 ratio = sample->ratio; 4295 sub_type = ((const struct rte_flow_action *) 4296 (sample->actions))->type; 4297 } 4298 actions_n++; 4299 } 4300 if (flag && action == RTE_FLOW_ACTION_TYPE_SAMPLE && attr->transfer) { 4301 if (ratio == 1) { 4302 /* JUMP Action not support for Mirroring; 4303 * Mirroring support multi-destination; 4304 */ 4305 if (!jump_flag && sub_type != RTE_FLOW_ACTION_TYPE_END) 4306 flag = 0; 4307 } 4308 } 4309 /* Count RTE_FLOW_ACTION_TYPE_END. */ 4310 return flag ? actions_n + 1 : 0; 4311 } 4312 4313 #define SAMPLE_SUFFIX_ITEM 2 4314 4315 /** 4316 * Split the sample flow. 4317 * 4318 * As sample flow will split to two sub flow, sample flow with 4319 * sample action, the other actions will move to new suffix flow. 4320 * 4321 * Also add unique tag id with tag action in the sample flow, 4322 * the same tag id will be as match in the suffix flow. 4323 * 4324 * @param dev 4325 * Pointer to Ethernet device. 4326 * @param[in] fdb_tx 4327 * FDB egress flow flag. 4328 * @param[out] sfx_items 4329 * Suffix flow match items (list terminated by the END pattern item). 4330 * @param[in] actions 4331 * Associated actions (list terminated by the END action). 4332 * @param[out] actions_sfx 4333 * Suffix flow actions. 4334 * @param[out] actions_pre 4335 * Prefix flow actions. 4336 * @param[in] actions_n 4337 * The total number of actions. 4338 * @param[in] sample_action_pos 4339 * The sample action position. 4340 * @param[in] qrss_action_pos 4341 * The Queue/RSS action position. 4342 * @param[out] error 4343 * Perform verbose error reporting if not NULL. 4344 * 4345 * @return 4346 * 0 on success, or unique flow_id, a negative errno value 4347 * otherwise and rte_errno is set. 4348 */ 4349 static int 4350 flow_sample_split_prep(struct rte_eth_dev *dev, 4351 uint32_t fdb_tx, 4352 struct rte_flow_item sfx_items[], 4353 const struct rte_flow_action actions[], 4354 struct rte_flow_action actions_sfx[], 4355 struct rte_flow_action actions_pre[], 4356 int actions_n, 4357 int sample_action_pos, 4358 int qrss_action_pos, 4359 struct rte_flow_error *error) 4360 { 4361 struct mlx5_rte_flow_action_set_tag *set_tag; 4362 struct mlx5_rte_flow_item_tag *tag_spec; 4363 struct mlx5_rte_flow_item_tag *tag_mask; 4364 uint32_t tag_id = 0; 4365 int index; 4366 int ret; 4367 4368 if (sample_action_pos < 0) 4369 return rte_flow_error_set(error, EINVAL, 4370 RTE_FLOW_ERROR_TYPE_ACTION, 4371 NULL, "invalid position of sample " 4372 "action in list"); 4373 if (!fdb_tx) { 4374 /* Prepare the prefix tag action. */ 4375 set_tag = (void *)(actions_pre + actions_n + 1); 4376 ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error); 4377 if (ret < 0) 4378 return ret; 4379 set_tag->id = ret; 4380 tag_id = flow_qrss_get_id(dev); 4381 set_tag->data = tag_id; 4382 /* Prepare the suffix subflow items. */ 4383 tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM); 4384 tag_spec->data = tag_id; 4385 tag_spec->id = set_tag->id; 4386 tag_mask = tag_spec + 1; 4387 tag_mask->data = UINT32_MAX; 4388 sfx_items[0] = (struct rte_flow_item){ 4389 .type = (enum rte_flow_item_type) 4390 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 4391 .spec = tag_spec, 4392 .last = NULL, 4393 .mask = tag_mask, 4394 }; 4395 sfx_items[1] = (struct rte_flow_item){ 4396 .type = (enum rte_flow_item_type) 4397 RTE_FLOW_ITEM_TYPE_END, 4398 }; 4399 } 4400 /* Prepare the actions for prefix and suffix flow. */ 4401 if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) { 4402 index = qrss_action_pos; 4403 /* Put the preceding the Queue/RSS action into prefix flow. */ 4404 if (index != 0) 4405 memcpy(actions_pre, actions, 4406 sizeof(struct rte_flow_action) * index); 4407 /* Put others preceding the sample action into prefix flow. */ 4408 if (sample_action_pos > index + 1) 4409 memcpy(actions_pre + index, actions + index + 1, 4410 sizeof(struct rte_flow_action) * 4411 (sample_action_pos - index - 1)); 4412 index = sample_action_pos - 1; 4413 /* Put Queue/RSS action into Suffix flow. */ 4414 memcpy(actions_sfx, actions + qrss_action_pos, 4415 sizeof(struct rte_flow_action)); 4416 actions_sfx++; 4417 } else { 4418 index = sample_action_pos; 4419 if (index != 0) 4420 memcpy(actions_pre, actions, 4421 sizeof(struct rte_flow_action) * index); 4422 } 4423 /* Add the extra tag action for NIC-RX and E-Switch ingress. */ 4424 if (!fdb_tx) { 4425 actions_pre[index++] = 4426 (struct rte_flow_action){ 4427 .type = (enum rte_flow_action_type) 4428 MLX5_RTE_FLOW_ACTION_TYPE_TAG, 4429 .conf = set_tag, 4430 }; 4431 } 4432 memcpy(actions_pre + index, actions + sample_action_pos, 4433 sizeof(struct rte_flow_action)); 4434 index += 1; 4435 actions_pre[index] = (struct rte_flow_action){ 4436 .type = (enum rte_flow_action_type) 4437 RTE_FLOW_ACTION_TYPE_END, 4438 }; 4439 /* Put the actions after sample into Suffix flow. */ 4440 memcpy(actions_sfx, actions + sample_action_pos + 1, 4441 sizeof(struct rte_flow_action) * 4442 (actions_n - sample_action_pos - 1)); 4443 return tag_id; 4444 } 4445 4446 /** 4447 * The splitting for metadata feature. 4448 * 4449 * - Q/RSS action on NIC Rx should be split in order to pass by 4450 * the mreg copy table (RX_CP_TBL) and then it jumps to the 4451 * action table (RX_ACT_TBL) which has the split Q/RSS action. 4452 * 4453 * - All the actions on NIC Tx should have a mreg copy action to 4454 * copy reg_a from WQE to reg_c[0]. 4455 * 4456 * @param dev 4457 * Pointer to Ethernet device. 4458 * @param[in] flow 4459 * Parent flow structure pointer. 4460 * @param[in] prefix_layers 4461 * Prefix flow layer flags. 4462 * @param[in] prefix_mark 4463 * Prefix subflow mark flag, may be 0. 4464 * @param[in] attr 4465 * Flow rule attributes. 4466 * @param[in] items 4467 * Pattern specification (list terminated by the END pattern item). 4468 * @param[in] actions 4469 * Associated actions (list terminated by the END action). 4470 * @param[in] external 4471 * This flow rule is created by request external to PMD. 4472 * @param[in] flow_idx 4473 * This memory pool index to the flow. 4474 * @param[out] error 4475 * Perform verbose error reporting if not NULL. 4476 * @return 4477 * 0 on success, negative value otherwise 4478 */ 4479 static int 4480 flow_create_split_metadata(struct rte_eth_dev *dev, 4481 struct rte_flow *flow, 4482 uint64_t prefix_layers, 4483 uint32_t prefix_mark, 4484 const struct rte_flow_attr *attr, 4485 const struct rte_flow_item items[], 4486 const struct rte_flow_action actions[], 4487 bool external, uint32_t flow_idx, 4488 struct rte_flow_error *error) 4489 { 4490 struct mlx5_priv *priv = dev->data->dev_private; 4491 struct mlx5_dev_config *config = &priv->config; 4492 const struct rte_flow_action *qrss = NULL; 4493 struct rte_flow_action *ext_actions = NULL; 4494 struct mlx5_flow *dev_flow = NULL; 4495 uint32_t qrss_id = 0; 4496 int mtr_sfx = 0; 4497 size_t act_size; 4498 int actions_n; 4499 int encap_idx; 4500 int ret; 4501 4502 /* Check whether extensive metadata feature is engaged. */ 4503 if (!config->dv_flow_en || 4504 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 4505 !mlx5_flow_ext_mreg_supported(dev)) 4506 return flow_create_split_inner(dev, flow, NULL, prefix_layers, 4507 prefix_mark, attr, items, 4508 actions, external, flow_idx, 4509 error); 4510 actions_n = flow_parse_metadata_split_actions_info(actions, &qrss, 4511 &encap_idx); 4512 if (qrss) { 4513 /* Exclude hairpin flows from splitting. */ 4514 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) { 4515 const struct rte_flow_action_queue *queue; 4516 4517 queue = qrss->conf; 4518 if (mlx5_rxq_get_type(dev, queue->index) == 4519 MLX5_RXQ_TYPE_HAIRPIN) 4520 qrss = NULL; 4521 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) { 4522 const struct rte_flow_action_rss *rss; 4523 4524 rss = qrss->conf; 4525 if (mlx5_rxq_get_type(dev, rss->queue[0]) == 4526 MLX5_RXQ_TYPE_HAIRPIN) 4527 qrss = NULL; 4528 } 4529 } 4530 if (qrss) { 4531 /* Check if it is in meter suffix table. */ 4532 mtr_sfx = attr->group == (attr->transfer ? 4533 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : 4534 MLX5_FLOW_TABLE_LEVEL_SUFFIX); 4535 /* 4536 * Q/RSS action on NIC Rx should be split in order to pass by 4537 * the mreg copy table (RX_CP_TBL) and then it jumps to the 4538 * action table (RX_ACT_TBL) which has the split Q/RSS action. 4539 */ 4540 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 4541 sizeof(struct rte_flow_action_set_tag) + 4542 sizeof(struct rte_flow_action_jump); 4543 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0, 4544 SOCKET_ID_ANY); 4545 if (!ext_actions) 4546 return rte_flow_error_set(error, ENOMEM, 4547 RTE_FLOW_ERROR_TYPE_ACTION, 4548 NULL, "no memory to split " 4549 "metadata flow"); 4550 /* 4551 * If we are the suffix flow of meter, tag already exist. 4552 * Set the tag action to void. 4553 */ 4554 if (mtr_sfx) 4555 ext_actions[qrss - actions].type = 4556 RTE_FLOW_ACTION_TYPE_VOID; 4557 else 4558 ext_actions[qrss - actions].type = 4559 (enum rte_flow_action_type) 4560 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 4561 /* 4562 * Create the new actions list with removed Q/RSS action 4563 * and appended set tag and jump to register copy table 4564 * (RX_CP_TBL). We should preallocate unique tag ID here 4565 * in advance, because it is needed for set tag action. 4566 */ 4567 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions, 4568 qrss, actions_n, error); 4569 if (!mtr_sfx && !qrss_id) { 4570 ret = -rte_errno; 4571 goto exit; 4572 } 4573 } else if (attr->egress && !attr->transfer) { 4574 /* 4575 * All the actions on NIC Tx should have a metadata register 4576 * copy action to copy reg_a from WQE to reg_c[meta] 4577 */ 4578 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 4579 sizeof(struct mlx5_flow_action_copy_mreg); 4580 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0, 4581 SOCKET_ID_ANY); 4582 if (!ext_actions) 4583 return rte_flow_error_set(error, ENOMEM, 4584 RTE_FLOW_ERROR_TYPE_ACTION, 4585 NULL, "no memory to split " 4586 "metadata flow"); 4587 /* Create the action list appended with copy register. */ 4588 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions, 4589 actions_n, error, encap_idx); 4590 if (ret < 0) 4591 goto exit; 4592 } 4593 /* Add the unmodified original or prefix subflow. */ 4594 ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, 4595 prefix_mark, attr, 4596 items, ext_actions ? ext_actions : 4597 actions, external, flow_idx, error); 4598 if (ret < 0) 4599 goto exit; 4600 MLX5_ASSERT(dev_flow); 4601 if (qrss) { 4602 const struct rte_flow_attr q_attr = { 4603 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 4604 .ingress = 1, 4605 }; 4606 /* Internal PMD action to set register. */ 4607 struct mlx5_rte_flow_item_tag q_tag_spec = { 4608 .data = qrss_id, 4609 .id = REG_NON, 4610 }; 4611 struct rte_flow_item q_items[] = { 4612 { 4613 .type = (enum rte_flow_item_type) 4614 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 4615 .spec = &q_tag_spec, 4616 .last = NULL, 4617 .mask = NULL, 4618 }, 4619 { 4620 .type = RTE_FLOW_ITEM_TYPE_END, 4621 }, 4622 }; 4623 struct rte_flow_action q_actions[] = { 4624 { 4625 .type = qrss->type, 4626 .conf = qrss->conf, 4627 }, 4628 { 4629 .type = RTE_FLOW_ACTION_TYPE_END, 4630 }, 4631 }; 4632 uint64_t layers = flow_get_prefix_layer_flags(dev_flow); 4633 4634 /* 4635 * Configure the tag item only if there is no meter subflow. 4636 * Since tag is already marked in the meter suffix subflow 4637 * we can just use the meter suffix items as is. 4638 */ 4639 if (qrss_id) { 4640 /* Not meter subflow. */ 4641 MLX5_ASSERT(!mtr_sfx); 4642 /* 4643 * Put unique id in prefix flow due to it is destroyed 4644 * after suffix flow and id will be freed after there 4645 * is no actual flows with this id and identifier 4646 * reallocation becomes possible (for example, for 4647 * other flows in other threads). 4648 */ 4649 dev_flow->handle->split_flow_id = qrss_id; 4650 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, 4651 error); 4652 if (ret < 0) 4653 goto exit; 4654 q_tag_spec.id = ret; 4655 } 4656 dev_flow = NULL; 4657 /* Add suffix subflow to execute Q/RSS. */ 4658 ret = flow_create_split_inner(dev, flow, &dev_flow, layers, 0, 4659 &q_attr, mtr_sfx ? items : 4660 q_items, q_actions, 4661 external, flow_idx, error); 4662 if (ret < 0) 4663 goto exit; 4664 /* qrss ID should be freed if failed. */ 4665 qrss_id = 0; 4666 MLX5_ASSERT(dev_flow); 4667 } 4668 4669 exit: 4670 /* 4671 * We do not destroy the partially created sub_flows in case of error. 4672 * These ones are included into parent flow list and will be destroyed 4673 * by flow_drv_destroy. 4674 */ 4675 flow_qrss_free_id(dev, qrss_id); 4676 mlx5_free(ext_actions); 4677 return ret; 4678 } 4679 4680 /** 4681 * The splitting for meter feature. 4682 * 4683 * - The meter flow will be split to two flows as prefix and 4684 * suffix flow. The packets make sense only it pass the prefix 4685 * meter action. 4686 * 4687 * - Reg_C_5 is used for the packet to match betweend prefix and 4688 * suffix flow. 4689 * 4690 * @param dev 4691 * Pointer to Ethernet device. 4692 * @param[in] flow 4693 * Parent flow structure pointer. 4694 * @param[in] prefix_layers 4695 * Prefix subflow layers, may be 0. 4696 * @param[in] prefix_mark 4697 * Prefix subflow mark flag, may be 0. 4698 * @param[in] attr 4699 * Flow rule attributes. 4700 * @param[in] items 4701 * Pattern specification (list terminated by the END pattern item). 4702 * @param[in] actions 4703 * Associated actions (list terminated by the END action). 4704 * @param[in] external 4705 * This flow rule is created by request external to PMD. 4706 * @param[in] flow_idx 4707 * This memory pool index to the flow. 4708 * @param[out] error 4709 * Perform verbose error reporting if not NULL. 4710 * @return 4711 * 0 on success, negative value otherwise 4712 */ 4713 static int 4714 flow_create_split_meter(struct rte_eth_dev *dev, 4715 struct rte_flow *flow, 4716 uint64_t prefix_layers, 4717 uint32_t prefix_mark, 4718 const struct rte_flow_attr *attr, 4719 const struct rte_flow_item items[], 4720 const struct rte_flow_action actions[], 4721 bool external, uint32_t flow_idx, 4722 struct rte_flow_error *error) 4723 { 4724 struct mlx5_priv *priv = dev->data->dev_private; 4725 struct rte_flow_action *sfx_actions = NULL; 4726 struct rte_flow_action *pre_actions = NULL; 4727 struct rte_flow_item *sfx_items = NULL; 4728 struct mlx5_flow *dev_flow = NULL; 4729 struct rte_flow_attr sfx_attr = *attr; 4730 uint32_t mtr = 0; 4731 uint32_t mtr_tag_id = 0; 4732 size_t act_size; 4733 size_t item_size; 4734 int actions_n = 0; 4735 int ret; 4736 4737 if (priv->mtr_en) 4738 actions_n = flow_check_meter_action(actions, &mtr); 4739 if (mtr) { 4740 /* The five prefix actions: meter, decap, encap, tag, end. */ 4741 act_size = sizeof(struct rte_flow_action) * (actions_n + 5) + 4742 sizeof(struct mlx5_rte_flow_action_set_tag); 4743 /* tag, vlan, port id, end. */ 4744 #define METER_SUFFIX_ITEM 4 4745 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM + 4746 sizeof(struct mlx5_rte_flow_item_tag) * 2; 4747 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size), 4748 0, SOCKET_ID_ANY); 4749 if (!sfx_actions) 4750 return rte_flow_error_set(error, ENOMEM, 4751 RTE_FLOW_ERROR_TYPE_ACTION, 4752 NULL, "no memory to split " 4753 "meter flow"); 4754 sfx_items = (struct rte_flow_item *)((char *)sfx_actions + 4755 act_size); 4756 pre_actions = sfx_actions + actions_n; 4757 mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items, 4758 actions, sfx_actions, 4759 pre_actions); 4760 if (!mtr_tag_id) { 4761 ret = -rte_errno; 4762 goto exit; 4763 } 4764 /* Add the prefix subflow. */ 4765 ret = flow_create_split_inner(dev, flow, &dev_flow, 4766 prefix_layers, 0, 4767 attr, items, 4768 pre_actions, external, 4769 flow_idx, error); 4770 if (ret) { 4771 ret = -rte_errno; 4772 goto exit; 4773 } 4774 dev_flow->handle->split_flow_id = mtr_tag_id; 4775 /* Setting the sfx group atrr. */ 4776 sfx_attr.group = sfx_attr.transfer ? 4777 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : 4778 MLX5_FLOW_TABLE_LEVEL_SUFFIX; 4779 } 4780 /* Add the prefix subflow. */ 4781 ret = flow_create_split_metadata(dev, flow, dev_flow ? 4782 flow_get_prefix_layer_flags(dev_flow) : 4783 prefix_layers, dev_flow ? 4784 dev_flow->handle->mark : prefix_mark, 4785 &sfx_attr, sfx_items ? 4786 sfx_items : items, 4787 sfx_actions ? sfx_actions : actions, 4788 external, flow_idx, error); 4789 exit: 4790 if (sfx_actions) 4791 mlx5_free(sfx_actions); 4792 return ret; 4793 } 4794 4795 /** 4796 * The splitting for sample feature. 4797 * 4798 * Once Sample action is detected in the action list, the flow actions should 4799 * be split into prefix sub flow and suffix sub flow. 4800 * 4801 * The original items remain in the prefix sub flow, all actions preceding the 4802 * sample action and the sample action itself will be copied to the prefix 4803 * sub flow, the actions following the sample action will be copied to the 4804 * suffix sub flow, Queue action always be located in the suffix sub flow. 4805 * 4806 * In order to make the packet from prefix sub flow matches with suffix sub 4807 * flow, an extra tag action be added into prefix sub flow, and the suffix sub 4808 * flow uses tag item with the unique flow id. 4809 * 4810 * @param dev 4811 * Pointer to Ethernet device. 4812 * @param[in] flow 4813 * Parent flow structure pointer. 4814 * @param[in] attr 4815 * Flow rule attributes. 4816 * @param[in] items 4817 * Pattern specification (list terminated by the END pattern item). 4818 * @param[in] actions 4819 * Associated actions (list terminated by the END action). 4820 * @param[in] external 4821 * This flow rule is created by request external to PMD. 4822 * @param[in] flow_idx 4823 * This memory pool index to the flow. 4824 * @param[out] error 4825 * Perform verbose error reporting if not NULL. 4826 * @return 4827 * 0 on success, negative value otherwise 4828 */ 4829 static int 4830 flow_create_split_sample(struct rte_eth_dev *dev, 4831 struct rte_flow *flow, 4832 const struct rte_flow_attr *attr, 4833 const struct rte_flow_item items[], 4834 const struct rte_flow_action actions[], 4835 bool external, uint32_t flow_idx, 4836 struct rte_flow_error *error) 4837 { 4838 struct mlx5_priv *priv = dev->data->dev_private; 4839 struct rte_flow_action *sfx_actions = NULL; 4840 struct rte_flow_action *pre_actions = NULL; 4841 struct rte_flow_item *sfx_items = NULL; 4842 struct mlx5_flow *dev_flow = NULL; 4843 struct rte_flow_attr sfx_attr = *attr; 4844 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 4845 struct mlx5_flow_dv_sample_resource *sample_res; 4846 struct mlx5_flow_tbl_data_entry *sfx_tbl_data; 4847 struct mlx5_flow_tbl_resource *sfx_tbl; 4848 union mlx5_flow_tbl_key sfx_table_key; 4849 #endif 4850 size_t act_size; 4851 size_t item_size; 4852 uint32_t fdb_tx = 0; 4853 int32_t tag_id = 0; 4854 int actions_n = 0; 4855 int sample_action_pos; 4856 int qrss_action_pos; 4857 int ret = 0; 4858 4859 if (priv->sampler_en) 4860 actions_n = flow_check_match_action(actions, attr, 4861 RTE_FLOW_ACTION_TYPE_SAMPLE, 4862 &sample_action_pos, &qrss_action_pos); 4863 if (actions_n) { 4864 /* The prefix actions must includes sample, tag, end. */ 4865 act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1) 4866 + sizeof(struct mlx5_rte_flow_action_set_tag); 4867 item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM + 4868 sizeof(struct mlx5_rte_flow_item_tag) * 2; 4869 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + 4870 item_size), 0, SOCKET_ID_ANY); 4871 if (!sfx_actions) 4872 return rte_flow_error_set(error, ENOMEM, 4873 RTE_FLOW_ERROR_TYPE_ACTION, 4874 NULL, "no memory to split " 4875 "sample flow"); 4876 /* The representor_id is -1 for uplink. */ 4877 fdb_tx = (attr->transfer && priv->representor_id != -1); 4878 if (!fdb_tx) 4879 sfx_items = (struct rte_flow_item *)((char *)sfx_actions 4880 + act_size); 4881 pre_actions = sfx_actions + actions_n; 4882 tag_id = flow_sample_split_prep(dev, fdb_tx, sfx_items, 4883 actions, sfx_actions, 4884 pre_actions, actions_n, 4885 sample_action_pos, 4886 qrss_action_pos, error); 4887 if (tag_id < 0 || (!fdb_tx && !tag_id)) { 4888 ret = -rte_errno; 4889 goto exit; 4890 } 4891 /* Add the prefix subflow. */ 4892 ret = flow_create_split_inner(dev, flow, &dev_flow, 0, 0, attr, 4893 items, pre_actions, external, 4894 flow_idx, error); 4895 if (ret) { 4896 ret = -rte_errno; 4897 goto exit; 4898 } 4899 dev_flow->handle->split_flow_id = tag_id; 4900 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 4901 /* Set the sfx group attr. */ 4902 sample_res = (struct mlx5_flow_dv_sample_resource *) 4903 dev_flow->dv.sample_res; 4904 sfx_tbl = (struct mlx5_flow_tbl_resource *) 4905 sample_res->normal_path_tbl; 4906 sfx_tbl_data = container_of(sfx_tbl, 4907 struct mlx5_flow_tbl_data_entry, tbl); 4908 sfx_table_key.v64 = sfx_tbl_data->entry.key; 4909 sfx_attr.group = sfx_attr.transfer ? 4910 (sfx_table_key.table_id - 1) : 4911 sfx_table_key.table_id; 4912 #endif 4913 } 4914 /* Add the suffix subflow. */ 4915 ret = flow_create_split_meter(dev, flow, dev_flow ? 4916 flow_get_prefix_layer_flags(dev_flow) : 0, 4917 dev_flow ? dev_flow->handle->mark : 0, 4918 &sfx_attr, sfx_items ? sfx_items : items, 4919 sfx_actions ? sfx_actions : actions, 4920 external, flow_idx, error); 4921 exit: 4922 if (sfx_actions) 4923 mlx5_free(sfx_actions); 4924 return ret; 4925 } 4926 4927 /** 4928 * Split the flow to subflow set. The splitters might be linked 4929 * in the chain, like this: 4930 * flow_create_split_outer() calls: 4931 * flow_create_split_meter() calls: 4932 * flow_create_split_metadata(meter_subflow_0) calls: 4933 * flow_create_split_inner(metadata_subflow_0) 4934 * flow_create_split_inner(metadata_subflow_1) 4935 * flow_create_split_inner(metadata_subflow_2) 4936 * flow_create_split_metadata(meter_subflow_1) calls: 4937 * flow_create_split_inner(metadata_subflow_0) 4938 * flow_create_split_inner(metadata_subflow_1) 4939 * flow_create_split_inner(metadata_subflow_2) 4940 * 4941 * This provide flexible way to add new levels of flow splitting. 4942 * The all of successfully created subflows are included to the 4943 * parent flow dev_flow list. 4944 * 4945 * @param dev 4946 * Pointer to Ethernet device. 4947 * @param[in] flow 4948 * Parent flow structure pointer. 4949 * @param[in] attr 4950 * Flow rule attributes. 4951 * @param[in] items 4952 * Pattern specification (list terminated by the END pattern item). 4953 * @param[in] actions 4954 * Associated actions (list terminated by the END action). 4955 * @param[in] external 4956 * This flow rule is created by request external to PMD. 4957 * @param[in] flow_idx 4958 * This memory pool index to the flow. 4959 * @param[out] error 4960 * Perform verbose error reporting if not NULL. 4961 * @return 4962 * 0 on success, negative value otherwise 4963 */ 4964 static int 4965 flow_create_split_outer(struct rte_eth_dev *dev, 4966 struct rte_flow *flow, 4967 const struct rte_flow_attr *attr, 4968 const struct rte_flow_item items[], 4969 const struct rte_flow_action actions[], 4970 bool external, uint32_t flow_idx, 4971 struct rte_flow_error *error) 4972 { 4973 int ret; 4974 4975 ret = flow_create_split_sample(dev, flow, attr, items, 4976 actions, external, flow_idx, error); 4977 MLX5_ASSERT(ret <= 0); 4978 return ret; 4979 } 4980 4981 /** 4982 * Create a flow and add it to @p list. 4983 * 4984 * @param dev 4985 * Pointer to Ethernet device. 4986 * @param list 4987 * Pointer to a TAILQ flow list. If this parameter NULL, 4988 * no list insertion occurred, flow is just created, 4989 * this is caller's responsibility to track the 4990 * created flow. 4991 * @param[in] attr 4992 * Flow rule attributes. 4993 * @param[in] items 4994 * Pattern specification (list terminated by the END pattern item). 4995 * @param[in] actions 4996 * Associated actions (list terminated by the END action). 4997 * @param[in] external 4998 * This flow rule is created by request external to PMD. 4999 * @param[out] error 5000 * Perform verbose error reporting if not NULL. 5001 * 5002 * @return 5003 * A flow index on success, 0 otherwise and rte_errno is set. 5004 */ 5005 static uint32_t 5006 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 5007 const struct rte_flow_attr *attr, 5008 const struct rte_flow_item items[], 5009 const struct rte_flow_action actions[], 5010 bool external, struct rte_flow_error *error) 5011 { 5012 struct mlx5_priv *priv = dev->data->dev_private; 5013 struct rte_flow *flow = NULL; 5014 struct mlx5_flow *dev_flow; 5015 const struct rte_flow_action_rss *rss; 5016 union { 5017 struct mlx5_flow_expand_rss buf; 5018 uint8_t buffer[2048]; 5019 } expand_buffer; 5020 union { 5021 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 5022 uint8_t buffer[2048]; 5023 } actions_rx; 5024 union { 5025 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 5026 uint8_t buffer[2048]; 5027 } actions_hairpin_tx; 5028 union { 5029 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS]; 5030 uint8_t buffer[2048]; 5031 } items_tx; 5032 struct mlx5_flow_expand_rss *buf = &expand_buffer.buf; 5033 struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *) 5034 priv->rss_desc)[!!priv->flow_idx]; 5035 const struct rte_flow_action *p_actions_rx = actions; 5036 uint32_t i; 5037 uint32_t idx = 0; 5038 int hairpin_flow; 5039 uint32_t hairpin_id = 0; 5040 struct rte_flow_attr attr_tx = { .priority = 0 }; 5041 struct rte_flow_attr attr_factor = {0}; 5042 int ret; 5043 5044 memcpy((void *)&attr_factor, (const void *)attr, sizeof(*attr)); 5045 if (external) 5046 attr_factor.group *= MLX5_FLOW_TABLE_FACTOR; 5047 hairpin_flow = flow_check_hairpin_split(dev, &attr_factor, actions); 5048 ret = flow_drv_validate(dev, &attr_factor, items, p_actions_rx, 5049 external, hairpin_flow, error); 5050 if (ret < 0) 5051 return 0; 5052 if (hairpin_flow > 0) { 5053 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) { 5054 rte_errno = EINVAL; 5055 return 0; 5056 } 5057 flow_hairpin_split(dev, actions, actions_rx.actions, 5058 actions_hairpin_tx.actions, items_tx.items, 5059 &hairpin_id); 5060 p_actions_rx = actions_rx.actions; 5061 } 5062 flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx); 5063 if (!flow) { 5064 rte_errno = ENOMEM; 5065 goto error_before_flow; 5066 } 5067 flow->drv_type = flow_get_drv_type(dev, &attr_factor); 5068 if (hairpin_id != 0) 5069 flow->hairpin_flow_id = hairpin_id; 5070 MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN && 5071 flow->drv_type < MLX5_FLOW_TYPE_MAX); 5072 memset(rss_desc, 0, sizeof(*rss_desc)); 5073 rss = flow_get_rss_action(p_actions_rx); 5074 if (rss) { 5075 /* 5076 * The following information is required by 5077 * mlx5_flow_hashfields_adjust() in advance. 5078 */ 5079 rss_desc->level = rss->level; 5080 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */ 5081 rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types; 5082 } 5083 flow->dev_handles = 0; 5084 if (rss && rss->types) { 5085 unsigned int graph_root; 5086 5087 graph_root = find_graph_root(items, rss->level); 5088 ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer), 5089 items, rss->types, 5090 mlx5_support_expansion, graph_root); 5091 MLX5_ASSERT(ret > 0 && 5092 (unsigned int)ret < sizeof(expand_buffer.buffer)); 5093 } else { 5094 buf->entries = 1; 5095 buf->entry[0].pattern = (void *)(uintptr_t)items; 5096 } 5097 /* 5098 * Record the start index when there is a nested call. All sub-flows 5099 * need to be translated before another calling. 5100 * No need to use ping-pong buffer to save memory here. 5101 */ 5102 if (priv->flow_idx) { 5103 MLX5_ASSERT(!priv->flow_nested_idx); 5104 priv->flow_nested_idx = priv->flow_idx; 5105 } 5106 for (i = 0; i < buf->entries; ++i) { 5107 /* 5108 * The splitter may create multiple dev_flows, 5109 * depending on configuration. In the simplest 5110 * case it just creates unmodified original flow. 5111 */ 5112 ret = flow_create_split_outer(dev, flow, &attr_factor, 5113 buf->entry[i].pattern, 5114 p_actions_rx, external, idx, 5115 error); 5116 if (ret < 0) 5117 goto error; 5118 } 5119 /* Create the tx flow. */ 5120 if (hairpin_flow) { 5121 attr_tx.group = MLX5_HAIRPIN_TX_TABLE; 5122 attr_tx.ingress = 0; 5123 attr_tx.egress = 1; 5124 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items, 5125 actions_hairpin_tx.actions, 5126 idx, error); 5127 if (!dev_flow) 5128 goto error; 5129 dev_flow->flow = flow; 5130 dev_flow->external = 0; 5131 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 5132 dev_flow->handle, next); 5133 ret = flow_drv_translate(dev, dev_flow, &attr_tx, 5134 items_tx.items, 5135 actions_hairpin_tx.actions, error); 5136 if (ret < 0) 5137 goto error; 5138 } 5139 /* 5140 * Update the metadata register copy table. If extensive 5141 * metadata feature is enabled and registers are supported 5142 * we might create the extra rte_flow for each unique 5143 * MARK/FLAG action ID. 5144 * 5145 * The table is updated for ingress Flows only, because 5146 * the egress Flows belong to the different device and 5147 * copy table should be updated in peer NIC Rx domain. 5148 */ 5149 if (attr_factor.ingress && 5150 (external || attr_factor.group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) { 5151 ret = flow_mreg_update_copy_table(dev, flow, actions, error); 5152 if (ret) 5153 goto error; 5154 } 5155 /* 5156 * If the flow is external (from application) OR device is started, then 5157 * the flow will be applied immediately. 5158 */ 5159 if (external || dev->data->dev_started) { 5160 ret = flow_drv_apply(dev, flow, error); 5161 if (ret < 0) 5162 goto error; 5163 } 5164 if (list) 5165 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx, 5166 flow, next); 5167 flow_rxq_flags_set(dev, flow); 5168 /* Nested flow creation index recovery. */ 5169 priv->flow_idx = priv->flow_nested_idx; 5170 if (priv->flow_nested_idx) 5171 priv->flow_nested_idx = 0; 5172 return idx; 5173 error: 5174 MLX5_ASSERT(flow); 5175 ret = rte_errno; /* Save rte_errno before cleanup. */ 5176 flow_mreg_del_copy_action(dev, flow); 5177 flow_drv_destroy(dev, flow); 5178 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx); 5179 rte_errno = ret; /* Restore rte_errno. */ 5180 error_before_flow: 5181 ret = rte_errno; 5182 if (hairpin_id) 5183 mlx5_flow_id_release(priv->sh->flow_id_pool, 5184 hairpin_id); 5185 rte_errno = ret; 5186 priv->flow_idx = priv->flow_nested_idx; 5187 if (priv->flow_nested_idx) 5188 priv->flow_nested_idx = 0; 5189 return 0; 5190 } 5191 5192 /** 5193 * Create a dedicated flow rule on e-switch table 0 (root table), to direct all 5194 * incoming packets to table 1. 5195 * 5196 * Other flow rules, requested for group n, will be created in 5197 * e-switch table n+1. 5198 * Jump action to e-switch group n will be created to group n+1. 5199 * 5200 * Used when working in switchdev mode, to utilise advantages of table 1 5201 * and above. 5202 * 5203 * @param dev 5204 * Pointer to Ethernet device. 5205 * 5206 * @return 5207 * Pointer to flow on success, NULL otherwise and rte_errno is set. 5208 */ 5209 struct rte_flow * 5210 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev) 5211 { 5212 const struct rte_flow_attr attr = { 5213 .group = 0, 5214 .priority = 0, 5215 .ingress = 1, 5216 .egress = 0, 5217 .transfer = 1, 5218 }; 5219 const struct rte_flow_item pattern = { 5220 .type = RTE_FLOW_ITEM_TYPE_END, 5221 }; 5222 struct rte_flow_action_jump jump = { 5223 .group = 1, 5224 }; 5225 const struct rte_flow_action actions[] = { 5226 { 5227 .type = RTE_FLOW_ACTION_TYPE_JUMP, 5228 .conf = &jump, 5229 }, 5230 { 5231 .type = RTE_FLOW_ACTION_TYPE_END, 5232 }, 5233 }; 5234 struct mlx5_priv *priv = dev->data->dev_private; 5235 struct rte_flow_error error; 5236 5237 return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows, 5238 &attr, &pattern, 5239 actions, false, &error); 5240 } 5241 5242 /** 5243 * Validate a flow supported by the NIC. 5244 * 5245 * @see rte_flow_validate() 5246 * @see rte_flow_ops 5247 */ 5248 int 5249 mlx5_flow_validate(struct rte_eth_dev *dev, 5250 const struct rte_flow_attr *attr, 5251 const struct rte_flow_item items[], 5252 const struct rte_flow_action actions[], 5253 struct rte_flow_error *error) 5254 { 5255 int hairpin_flow; 5256 5257 hairpin_flow = flow_check_hairpin_split(dev, attr, actions); 5258 return flow_drv_validate(dev, attr, items, actions, 5259 true, hairpin_flow, error); 5260 } 5261 5262 /** 5263 * Create a flow. 5264 * 5265 * @see rte_flow_create() 5266 * @see rte_flow_ops 5267 */ 5268 struct rte_flow * 5269 mlx5_flow_create(struct rte_eth_dev *dev, 5270 const struct rte_flow_attr *attr, 5271 const struct rte_flow_item items[], 5272 const struct rte_flow_action actions[], 5273 struct rte_flow_error *error) 5274 { 5275 struct mlx5_priv *priv = dev->data->dev_private; 5276 5277 /* 5278 * If the device is not started yet, it is not allowed to created a 5279 * flow from application. PMD default flows and traffic control flows 5280 * are not affected. 5281 */ 5282 if (unlikely(!dev->data->dev_started)) { 5283 DRV_LOG(DEBUG, "port %u is not started when " 5284 "inserting a flow", dev->data->port_id); 5285 rte_flow_error_set(error, ENODEV, 5286 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5287 NULL, 5288 "port not started"); 5289 return NULL; 5290 } 5291 return (void *)(uintptr_t)flow_list_create(dev, &priv->flows, 5292 attr, items, actions, true, error); 5293 } 5294 5295 /** 5296 * Destroy a flow in a list. 5297 * 5298 * @param dev 5299 * Pointer to Ethernet device. 5300 * @param list 5301 * Pointer to the Indexed flow list. If this parameter NULL, 5302 * there is no flow removal from the list. Be noted that as 5303 * flow is add to the indexed list, memory of the indexed 5304 * list points to maybe changed as flow destroyed. 5305 * @param[in] flow_idx 5306 * Index of flow to destroy. 5307 */ 5308 static void 5309 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 5310 uint32_t flow_idx) 5311 { 5312 struct mlx5_priv *priv = dev->data->dev_private; 5313 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5314 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 5315 [MLX5_IPOOL_RTE_FLOW], flow_idx); 5316 5317 if (!flow) 5318 return; 5319 /* 5320 * Update RX queue flags only if port is started, otherwise it is 5321 * already clean. 5322 */ 5323 if (dev->data->dev_started) 5324 flow_rxq_flags_trim(dev, flow); 5325 if (flow->hairpin_flow_id) 5326 mlx5_flow_id_release(priv->sh->flow_id_pool, 5327 flow->hairpin_flow_id); 5328 flow_drv_destroy(dev, flow); 5329 if (list) 5330 ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, 5331 flow_idx, flow, next); 5332 flow_mreg_del_copy_action(dev, flow); 5333 if (flow->fdir) { 5334 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 5335 if (priv_fdir_flow->rix_flow == flow_idx) 5336 break; 5337 } 5338 if (priv_fdir_flow) { 5339 LIST_REMOVE(priv_fdir_flow, next); 5340 mlx5_free(priv_fdir_flow->fdir); 5341 mlx5_free(priv_fdir_flow); 5342 } 5343 } 5344 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 5345 } 5346 5347 /** 5348 * Destroy all flows. 5349 * 5350 * @param dev 5351 * Pointer to Ethernet device. 5352 * @param list 5353 * Pointer to the Indexed flow list. 5354 * @param active 5355 * If flushing is called avtively. 5356 */ 5357 void 5358 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active) 5359 { 5360 uint32_t num_flushed = 0; 5361 5362 while (*list) { 5363 flow_list_destroy(dev, list, *list); 5364 num_flushed++; 5365 } 5366 if (active) { 5367 DRV_LOG(INFO, "port %u: %u flows flushed before stopping", 5368 dev->data->port_id, num_flushed); 5369 } 5370 } 5371 5372 /** 5373 * Remove all flows. 5374 * 5375 * @param dev 5376 * Pointer to Ethernet device. 5377 * @param list 5378 * Pointer to the Indexed flow list. 5379 */ 5380 void 5381 mlx5_flow_stop(struct rte_eth_dev *dev, uint32_t *list) 5382 { 5383 struct mlx5_priv *priv = dev->data->dev_private; 5384 struct rte_flow *flow = NULL; 5385 uint32_t idx; 5386 5387 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx, 5388 flow, next) { 5389 flow_drv_remove(dev, flow); 5390 flow_mreg_stop_copy_action(dev, flow); 5391 } 5392 flow_mreg_del_default_copy_action(dev); 5393 flow_rxq_flags_clear(dev); 5394 } 5395 5396 /** 5397 * Add all flows. 5398 * 5399 * @param dev 5400 * Pointer to Ethernet device. 5401 * @param list 5402 * Pointer to the Indexed flow list. 5403 * 5404 * @return 5405 * 0 on success, a negative errno value otherwise and rte_errno is set. 5406 */ 5407 int 5408 mlx5_flow_start(struct rte_eth_dev *dev, uint32_t *list) 5409 { 5410 struct mlx5_priv *priv = dev->data->dev_private; 5411 struct rte_flow *flow = NULL; 5412 struct rte_flow_error error; 5413 uint32_t idx; 5414 int ret = 0; 5415 5416 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 5417 ret = flow_mreg_add_default_copy_action(dev, &error); 5418 if (ret < 0) 5419 return -rte_errno; 5420 /* Apply Flows created by application. */ 5421 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx, 5422 flow, next) { 5423 ret = flow_mreg_start_copy_action(dev, flow); 5424 if (ret < 0) 5425 goto error; 5426 ret = flow_drv_apply(dev, flow, &error); 5427 if (ret < 0) 5428 goto error; 5429 flow_rxq_flags_set(dev, flow); 5430 } 5431 return 0; 5432 error: 5433 ret = rte_errno; /* Save rte_errno before cleanup. */ 5434 mlx5_flow_stop(dev, list); 5435 rte_errno = ret; /* Restore rte_errno. */ 5436 return -rte_errno; 5437 } 5438 5439 /** 5440 * Stop all default actions for flows. 5441 * 5442 * @param dev 5443 * Pointer to Ethernet device. 5444 */ 5445 void 5446 mlx5_flow_stop_default(struct rte_eth_dev *dev) 5447 { 5448 flow_mreg_del_default_copy_action(dev); 5449 flow_rxq_flags_clear(dev); 5450 } 5451 5452 /** 5453 * Start all default actions for flows. 5454 * 5455 * @param dev 5456 * Pointer to Ethernet device. 5457 * @return 5458 * 0 on success, a negative errno value otherwise and rte_errno is set. 5459 */ 5460 int 5461 mlx5_flow_start_default(struct rte_eth_dev *dev) 5462 { 5463 struct rte_flow_error error; 5464 5465 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 5466 return flow_mreg_add_default_copy_action(dev, &error); 5467 } 5468 5469 /** 5470 * Allocate intermediate resources for flow creation. 5471 * 5472 * @param dev 5473 * Pointer to Ethernet device. 5474 */ 5475 void 5476 mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev) 5477 { 5478 struct mlx5_priv *priv = dev->data->dev_private; 5479 5480 if (!priv->inter_flows) { 5481 priv->inter_flows = mlx5_malloc(MLX5_MEM_ZERO, 5482 MLX5_NUM_MAX_DEV_FLOWS * 5483 sizeof(struct mlx5_flow) + 5484 (sizeof(struct mlx5_flow_rss_desc) + 5485 sizeof(uint16_t) * UINT16_MAX) * 2, 0, 5486 SOCKET_ID_ANY); 5487 if (!priv->inter_flows) { 5488 DRV_LOG(ERR, "can't allocate intermediate memory."); 5489 return; 5490 } 5491 } 5492 priv->rss_desc = &((struct mlx5_flow *)priv->inter_flows) 5493 [MLX5_NUM_MAX_DEV_FLOWS]; 5494 /* Reset the index. */ 5495 priv->flow_idx = 0; 5496 priv->flow_nested_idx = 0; 5497 } 5498 5499 /** 5500 * Free intermediate resources for flows. 5501 * 5502 * @param dev 5503 * Pointer to Ethernet device. 5504 */ 5505 void 5506 mlx5_flow_free_intermediate(struct rte_eth_dev *dev) 5507 { 5508 struct mlx5_priv *priv = dev->data->dev_private; 5509 5510 mlx5_free(priv->inter_flows); 5511 priv->inter_flows = NULL; 5512 } 5513 5514 /** 5515 * Verify the flow list is empty 5516 * 5517 * @param dev 5518 * Pointer to Ethernet device. 5519 * 5520 * @return the number of flows not released. 5521 */ 5522 int 5523 mlx5_flow_verify(struct rte_eth_dev *dev) 5524 { 5525 struct mlx5_priv *priv = dev->data->dev_private; 5526 struct rte_flow *flow; 5527 uint32_t idx; 5528 int ret = 0; 5529 5530 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx, 5531 flow, next) { 5532 DRV_LOG(DEBUG, "port %u flow %p still referenced", 5533 dev->data->port_id, (void *)flow); 5534 ++ret; 5535 } 5536 return ret; 5537 } 5538 5539 /** 5540 * Enable default hairpin egress flow. 5541 * 5542 * @param dev 5543 * Pointer to Ethernet device. 5544 * @param queue 5545 * The queue index. 5546 * 5547 * @return 5548 * 0 on success, a negative errno value otherwise and rte_errno is set. 5549 */ 5550 int 5551 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev, 5552 uint32_t queue) 5553 { 5554 struct mlx5_priv *priv = dev->data->dev_private; 5555 const struct rte_flow_attr attr = { 5556 .egress = 1, 5557 .priority = 0, 5558 }; 5559 struct mlx5_rte_flow_item_tx_queue queue_spec = { 5560 .queue = queue, 5561 }; 5562 struct mlx5_rte_flow_item_tx_queue queue_mask = { 5563 .queue = UINT32_MAX, 5564 }; 5565 struct rte_flow_item items[] = { 5566 { 5567 .type = (enum rte_flow_item_type) 5568 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE, 5569 .spec = &queue_spec, 5570 .last = NULL, 5571 .mask = &queue_mask, 5572 }, 5573 { 5574 .type = RTE_FLOW_ITEM_TYPE_END, 5575 }, 5576 }; 5577 struct rte_flow_action_jump jump = { 5578 .group = MLX5_HAIRPIN_TX_TABLE, 5579 }; 5580 struct rte_flow_action actions[2]; 5581 uint32_t flow_idx; 5582 struct rte_flow_error error; 5583 5584 actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP; 5585 actions[0].conf = &jump; 5586 actions[1].type = RTE_FLOW_ACTION_TYPE_END; 5587 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5588 &attr, items, actions, false, &error); 5589 if (!flow_idx) { 5590 DRV_LOG(DEBUG, 5591 "Failed to create ctrl flow: rte_errno(%d)," 5592 " type(%d), message(%s)", 5593 rte_errno, error.type, 5594 error.message ? error.message : " (no stated reason)"); 5595 return -rte_errno; 5596 } 5597 return 0; 5598 } 5599 5600 /** 5601 * Enable a control flow configured from the control plane. 5602 * 5603 * @param dev 5604 * Pointer to Ethernet device. 5605 * @param eth_spec 5606 * An Ethernet flow spec to apply. 5607 * @param eth_mask 5608 * An Ethernet flow mask to apply. 5609 * @param vlan_spec 5610 * A VLAN flow spec to apply. 5611 * @param vlan_mask 5612 * A VLAN flow mask to apply. 5613 * 5614 * @return 5615 * 0 on success, a negative errno value otherwise and rte_errno is set. 5616 */ 5617 int 5618 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, 5619 struct rte_flow_item_eth *eth_spec, 5620 struct rte_flow_item_eth *eth_mask, 5621 struct rte_flow_item_vlan *vlan_spec, 5622 struct rte_flow_item_vlan *vlan_mask) 5623 { 5624 struct mlx5_priv *priv = dev->data->dev_private; 5625 const struct rte_flow_attr attr = { 5626 .ingress = 1, 5627 .priority = MLX5_FLOW_PRIO_RSVD, 5628 }; 5629 struct rte_flow_item items[] = { 5630 { 5631 .type = RTE_FLOW_ITEM_TYPE_ETH, 5632 .spec = eth_spec, 5633 .last = NULL, 5634 .mask = eth_mask, 5635 }, 5636 { 5637 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN : 5638 RTE_FLOW_ITEM_TYPE_END, 5639 .spec = vlan_spec, 5640 .last = NULL, 5641 .mask = vlan_mask, 5642 }, 5643 { 5644 .type = RTE_FLOW_ITEM_TYPE_END, 5645 }, 5646 }; 5647 uint16_t queue[priv->reta_idx_n]; 5648 struct rte_flow_action_rss action_rss = { 5649 .func = RTE_ETH_HASH_FUNCTION_DEFAULT, 5650 .level = 0, 5651 .types = priv->rss_conf.rss_hf, 5652 .key_len = priv->rss_conf.rss_key_len, 5653 .queue_num = priv->reta_idx_n, 5654 .key = priv->rss_conf.rss_key, 5655 .queue = queue, 5656 }; 5657 struct rte_flow_action actions[] = { 5658 { 5659 .type = RTE_FLOW_ACTION_TYPE_RSS, 5660 .conf = &action_rss, 5661 }, 5662 { 5663 .type = RTE_FLOW_ACTION_TYPE_END, 5664 }, 5665 }; 5666 uint32_t flow_idx; 5667 struct rte_flow_error error; 5668 unsigned int i; 5669 5670 if (!priv->reta_idx_n || !priv->rxqs_n) { 5671 return 0; 5672 } 5673 if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)) 5674 action_rss.types = 0; 5675 for (i = 0; i != priv->reta_idx_n; ++i) 5676 queue[i] = (*priv->reta_idx)[i]; 5677 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5678 &attr, items, actions, false, &error); 5679 if (!flow_idx) 5680 return -rte_errno; 5681 return 0; 5682 } 5683 5684 /** 5685 * Enable a flow control configured from the control plane. 5686 * 5687 * @param dev 5688 * Pointer to Ethernet device. 5689 * @param eth_spec 5690 * An Ethernet flow spec to apply. 5691 * @param eth_mask 5692 * An Ethernet flow mask to apply. 5693 * 5694 * @return 5695 * 0 on success, a negative errno value otherwise and rte_errno is set. 5696 */ 5697 int 5698 mlx5_ctrl_flow(struct rte_eth_dev *dev, 5699 struct rte_flow_item_eth *eth_spec, 5700 struct rte_flow_item_eth *eth_mask) 5701 { 5702 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL); 5703 } 5704 5705 /** 5706 * Create default miss flow rule matching lacp traffic 5707 * 5708 * @param dev 5709 * Pointer to Ethernet device. 5710 * @param eth_spec 5711 * An Ethernet flow spec to apply. 5712 * 5713 * @return 5714 * 0 on success, a negative errno value otherwise and rte_errno is set. 5715 */ 5716 int 5717 mlx5_flow_lacp_miss(struct rte_eth_dev *dev) 5718 { 5719 struct mlx5_priv *priv = dev->data->dev_private; 5720 /* 5721 * The LACP matching is done by only using ether type since using 5722 * a multicast dst mac causes kernel to give low priority to this flow. 5723 */ 5724 static const struct rte_flow_item_eth lacp_spec = { 5725 .type = RTE_BE16(0x8809), 5726 }; 5727 static const struct rte_flow_item_eth lacp_mask = { 5728 .type = 0xffff, 5729 }; 5730 const struct rte_flow_attr attr = { 5731 .ingress = 1, 5732 }; 5733 struct rte_flow_item items[] = { 5734 { 5735 .type = RTE_FLOW_ITEM_TYPE_ETH, 5736 .spec = &lacp_spec, 5737 .mask = &lacp_mask, 5738 }, 5739 { 5740 .type = RTE_FLOW_ITEM_TYPE_END, 5741 }, 5742 }; 5743 struct rte_flow_action actions[] = { 5744 { 5745 .type = (enum rte_flow_action_type) 5746 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS, 5747 }, 5748 { 5749 .type = RTE_FLOW_ACTION_TYPE_END, 5750 }, 5751 }; 5752 struct rte_flow_error error; 5753 uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5754 &attr, items, actions, false, &error); 5755 5756 if (!flow_idx) 5757 return -rte_errno; 5758 return 0; 5759 } 5760 5761 /** 5762 * Destroy a flow. 5763 * 5764 * @see rte_flow_destroy() 5765 * @see rte_flow_ops 5766 */ 5767 int 5768 mlx5_flow_destroy(struct rte_eth_dev *dev, 5769 struct rte_flow *flow, 5770 struct rte_flow_error *error __rte_unused) 5771 { 5772 struct mlx5_priv *priv = dev->data->dev_private; 5773 5774 flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow); 5775 return 0; 5776 } 5777 5778 /** 5779 * Destroy all flows. 5780 * 5781 * @see rte_flow_flush() 5782 * @see rte_flow_ops 5783 */ 5784 int 5785 mlx5_flow_flush(struct rte_eth_dev *dev, 5786 struct rte_flow_error *error __rte_unused) 5787 { 5788 struct mlx5_priv *priv = dev->data->dev_private; 5789 5790 mlx5_flow_list_flush(dev, &priv->flows, false); 5791 return 0; 5792 } 5793 5794 /** 5795 * Isolated mode. 5796 * 5797 * @see rte_flow_isolate() 5798 * @see rte_flow_ops 5799 */ 5800 int 5801 mlx5_flow_isolate(struct rte_eth_dev *dev, 5802 int enable, 5803 struct rte_flow_error *error) 5804 { 5805 struct mlx5_priv *priv = dev->data->dev_private; 5806 5807 if (dev->data->dev_started) { 5808 rte_flow_error_set(error, EBUSY, 5809 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5810 NULL, 5811 "port must be stopped first"); 5812 return -rte_errno; 5813 } 5814 priv->isolated = !!enable; 5815 if (enable) 5816 dev->dev_ops = &mlx5_os_dev_ops_isolate; 5817 else 5818 dev->dev_ops = &mlx5_os_dev_ops; 5819 5820 dev->rx_descriptor_status = mlx5_rx_descriptor_status; 5821 dev->tx_descriptor_status = mlx5_tx_descriptor_status; 5822 5823 return 0; 5824 } 5825 5826 /** 5827 * Query a flow. 5828 * 5829 * @see rte_flow_query() 5830 * @see rte_flow_ops 5831 */ 5832 static int 5833 flow_drv_query(struct rte_eth_dev *dev, 5834 uint32_t flow_idx, 5835 const struct rte_flow_action *actions, 5836 void *data, 5837 struct rte_flow_error *error) 5838 { 5839 struct mlx5_priv *priv = dev->data->dev_private; 5840 const struct mlx5_flow_driver_ops *fops; 5841 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 5842 [MLX5_IPOOL_RTE_FLOW], 5843 flow_idx); 5844 enum mlx5_flow_drv_type ftype; 5845 5846 if (!flow) { 5847 return rte_flow_error_set(error, ENOENT, 5848 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5849 NULL, 5850 "invalid flow handle"); 5851 } 5852 ftype = flow->drv_type; 5853 MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX); 5854 fops = flow_get_drv_ops(ftype); 5855 5856 return fops->query(dev, flow, actions, data, error); 5857 } 5858 5859 /** 5860 * Query a flow. 5861 * 5862 * @see rte_flow_query() 5863 * @see rte_flow_ops 5864 */ 5865 int 5866 mlx5_flow_query(struct rte_eth_dev *dev, 5867 struct rte_flow *flow, 5868 const struct rte_flow_action *actions, 5869 void *data, 5870 struct rte_flow_error *error) 5871 { 5872 int ret; 5873 5874 ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data, 5875 error); 5876 if (ret < 0) 5877 return ret; 5878 return 0; 5879 } 5880 5881 /** 5882 * Convert a flow director filter to a generic flow. 5883 * 5884 * @param dev 5885 * Pointer to Ethernet device. 5886 * @param fdir_filter 5887 * Flow director filter to add. 5888 * @param attributes 5889 * Generic flow parameters structure. 5890 * 5891 * @return 5892 * 0 on success, a negative errno value otherwise and rte_errno is set. 5893 */ 5894 static int 5895 flow_fdir_filter_convert(struct rte_eth_dev *dev, 5896 const struct rte_eth_fdir_filter *fdir_filter, 5897 struct mlx5_fdir *attributes) 5898 { 5899 struct mlx5_priv *priv = dev->data->dev_private; 5900 const struct rte_eth_fdir_input *input = &fdir_filter->input; 5901 const struct rte_eth_fdir_masks *mask = 5902 &dev->data->dev_conf.fdir_conf.mask; 5903 5904 /* Validate queue number. */ 5905 if (fdir_filter->action.rx_queue >= priv->rxqs_n) { 5906 DRV_LOG(ERR, "port %u invalid queue number %d", 5907 dev->data->port_id, fdir_filter->action.rx_queue); 5908 rte_errno = EINVAL; 5909 return -rte_errno; 5910 } 5911 attributes->attr.ingress = 1; 5912 attributes->items[0] = (struct rte_flow_item) { 5913 .type = RTE_FLOW_ITEM_TYPE_ETH, 5914 .spec = &attributes->l2, 5915 .mask = &attributes->l2_mask, 5916 }; 5917 switch (fdir_filter->action.behavior) { 5918 case RTE_ETH_FDIR_ACCEPT: 5919 attributes->actions[0] = (struct rte_flow_action){ 5920 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 5921 .conf = &attributes->queue, 5922 }; 5923 break; 5924 case RTE_ETH_FDIR_REJECT: 5925 attributes->actions[0] = (struct rte_flow_action){ 5926 .type = RTE_FLOW_ACTION_TYPE_DROP, 5927 }; 5928 break; 5929 default: 5930 DRV_LOG(ERR, "port %u invalid behavior %d", 5931 dev->data->port_id, 5932 fdir_filter->action.behavior); 5933 rte_errno = ENOTSUP; 5934 return -rte_errno; 5935 } 5936 attributes->queue.index = fdir_filter->action.rx_queue; 5937 /* Handle L3. */ 5938 switch (fdir_filter->input.flow_type) { 5939 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 5940 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 5941 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 5942 attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){ 5943 .src_addr = input->flow.ip4_flow.src_ip, 5944 .dst_addr = input->flow.ip4_flow.dst_ip, 5945 .time_to_live = input->flow.ip4_flow.ttl, 5946 .type_of_service = input->flow.ip4_flow.tos, 5947 }; 5948 attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){ 5949 .src_addr = mask->ipv4_mask.src_ip, 5950 .dst_addr = mask->ipv4_mask.dst_ip, 5951 .time_to_live = mask->ipv4_mask.ttl, 5952 .type_of_service = mask->ipv4_mask.tos, 5953 .next_proto_id = mask->ipv4_mask.proto, 5954 }; 5955 attributes->items[1] = (struct rte_flow_item){ 5956 .type = RTE_FLOW_ITEM_TYPE_IPV4, 5957 .spec = &attributes->l3, 5958 .mask = &attributes->l3_mask, 5959 }; 5960 break; 5961 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 5962 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 5963 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 5964 attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){ 5965 .hop_limits = input->flow.ipv6_flow.hop_limits, 5966 .proto = input->flow.ipv6_flow.proto, 5967 }; 5968 5969 memcpy(attributes->l3.ipv6.hdr.src_addr, 5970 input->flow.ipv6_flow.src_ip, 5971 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 5972 memcpy(attributes->l3.ipv6.hdr.dst_addr, 5973 input->flow.ipv6_flow.dst_ip, 5974 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 5975 memcpy(attributes->l3_mask.ipv6.hdr.src_addr, 5976 mask->ipv6_mask.src_ip, 5977 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 5978 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr, 5979 mask->ipv6_mask.dst_ip, 5980 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 5981 attributes->items[1] = (struct rte_flow_item){ 5982 .type = RTE_FLOW_ITEM_TYPE_IPV6, 5983 .spec = &attributes->l3, 5984 .mask = &attributes->l3_mask, 5985 }; 5986 break; 5987 default: 5988 DRV_LOG(ERR, "port %u invalid flow type%d", 5989 dev->data->port_id, fdir_filter->input.flow_type); 5990 rte_errno = ENOTSUP; 5991 return -rte_errno; 5992 } 5993 /* Handle L4. */ 5994 switch (fdir_filter->input.flow_type) { 5995 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 5996 attributes->l4.udp.hdr = (struct rte_udp_hdr){ 5997 .src_port = input->flow.udp4_flow.src_port, 5998 .dst_port = input->flow.udp4_flow.dst_port, 5999 }; 6000 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){ 6001 .src_port = mask->src_port_mask, 6002 .dst_port = mask->dst_port_mask, 6003 }; 6004 attributes->items[2] = (struct rte_flow_item){ 6005 .type = RTE_FLOW_ITEM_TYPE_UDP, 6006 .spec = &attributes->l4, 6007 .mask = &attributes->l4_mask, 6008 }; 6009 break; 6010 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 6011 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){ 6012 .src_port = input->flow.tcp4_flow.src_port, 6013 .dst_port = input->flow.tcp4_flow.dst_port, 6014 }; 6015 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){ 6016 .src_port = mask->src_port_mask, 6017 .dst_port = mask->dst_port_mask, 6018 }; 6019 attributes->items[2] = (struct rte_flow_item){ 6020 .type = RTE_FLOW_ITEM_TYPE_TCP, 6021 .spec = &attributes->l4, 6022 .mask = &attributes->l4_mask, 6023 }; 6024 break; 6025 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 6026 attributes->l4.udp.hdr = (struct rte_udp_hdr){ 6027 .src_port = input->flow.udp6_flow.src_port, 6028 .dst_port = input->flow.udp6_flow.dst_port, 6029 }; 6030 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){ 6031 .src_port = mask->src_port_mask, 6032 .dst_port = mask->dst_port_mask, 6033 }; 6034 attributes->items[2] = (struct rte_flow_item){ 6035 .type = RTE_FLOW_ITEM_TYPE_UDP, 6036 .spec = &attributes->l4, 6037 .mask = &attributes->l4_mask, 6038 }; 6039 break; 6040 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 6041 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){ 6042 .src_port = input->flow.tcp6_flow.src_port, 6043 .dst_port = input->flow.tcp6_flow.dst_port, 6044 }; 6045 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){ 6046 .src_port = mask->src_port_mask, 6047 .dst_port = mask->dst_port_mask, 6048 }; 6049 attributes->items[2] = (struct rte_flow_item){ 6050 .type = RTE_FLOW_ITEM_TYPE_TCP, 6051 .spec = &attributes->l4, 6052 .mask = &attributes->l4_mask, 6053 }; 6054 break; 6055 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 6056 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 6057 break; 6058 default: 6059 DRV_LOG(ERR, "port %u invalid flow type%d", 6060 dev->data->port_id, fdir_filter->input.flow_type); 6061 rte_errno = ENOTSUP; 6062 return -rte_errno; 6063 } 6064 return 0; 6065 } 6066 6067 #define FLOW_FDIR_CMP(f1, f2, fld) \ 6068 memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld)) 6069 6070 /** 6071 * Compare two FDIR flows. If items and actions are identical, the two flows are 6072 * regarded as same. 6073 * 6074 * @param dev 6075 * Pointer to Ethernet device. 6076 * @param f1 6077 * FDIR flow to compare. 6078 * @param f2 6079 * FDIR flow to compare. 6080 * 6081 * @return 6082 * Zero on match, 1 otherwise. 6083 */ 6084 static int 6085 flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2) 6086 { 6087 if (FLOW_FDIR_CMP(f1, f2, attr) || 6088 FLOW_FDIR_CMP(f1, f2, l2) || 6089 FLOW_FDIR_CMP(f1, f2, l2_mask) || 6090 FLOW_FDIR_CMP(f1, f2, l3) || 6091 FLOW_FDIR_CMP(f1, f2, l3_mask) || 6092 FLOW_FDIR_CMP(f1, f2, l4) || 6093 FLOW_FDIR_CMP(f1, f2, l4_mask) || 6094 FLOW_FDIR_CMP(f1, f2, actions[0].type)) 6095 return 1; 6096 if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE && 6097 FLOW_FDIR_CMP(f1, f2, queue)) 6098 return 1; 6099 return 0; 6100 } 6101 6102 /** 6103 * Search device flow list to find out a matched FDIR flow. 6104 * 6105 * @param dev 6106 * Pointer to Ethernet device. 6107 * @param fdir_flow 6108 * FDIR flow to lookup. 6109 * 6110 * @return 6111 * Index of flow if found, 0 otherwise. 6112 */ 6113 static uint32_t 6114 flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow) 6115 { 6116 struct mlx5_priv *priv = dev->data->dev_private; 6117 uint32_t flow_idx = 0; 6118 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6119 6120 MLX5_ASSERT(fdir_flow); 6121 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 6122 if (!flow_fdir_cmp(priv_fdir_flow->fdir, fdir_flow)) { 6123 DRV_LOG(DEBUG, "port %u found FDIR flow %u", 6124 dev->data->port_id, flow_idx); 6125 flow_idx = priv_fdir_flow->rix_flow; 6126 break; 6127 } 6128 } 6129 return flow_idx; 6130 } 6131 6132 /** 6133 * Add new flow director filter and store it in list. 6134 * 6135 * @param dev 6136 * Pointer to Ethernet device. 6137 * @param fdir_filter 6138 * Flow director filter to add. 6139 * 6140 * @return 6141 * 0 on success, a negative errno value otherwise and rte_errno is set. 6142 */ 6143 static int 6144 flow_fdir_filter_add(struct rte_eth_dev *dev, 6145 const struct rte_eth_fdir_filter *fdir_filter) 6146 { 6147 struct mlx5_priv *priv = dev->data->dev_private; 6148 struct mlx5_fdir *fdir_flow; 6149 struct rte_flow *flow; 6150 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6151 uint32_t flow_idx; 6152 int ret; 6153 6154 fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*fdir_flow), 0, 6155 SOCKET_ID_ANY); 6156 if (!fdir_flow) { 6157 rte_errno = ENOMEM; 6158 return -rte_errno; 6159 } 6160 ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow); 6161 if (ret) 6162 goto error; 6163 flow_idx = flow_fdir_filter_lookup(dev, fdir_flow); 6164 if (flow_idx) { 6165 rte_errno = EEXIST; 6166 goto error; 6167 } 6168 priv_fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, 6169 sizeof(struct mlx5_fdir_flow), 6170 0, SOCKET_ID_ANY); 6171 if (!priv_fdir_flow) { 6172 rte_errno = ENOMEM; 6173 goto error; 6174 } 6175 flow_idx = flow_list_create(dev, &priv->flows, &fdir_flow->attr, 6176 fdir_flow->items, fdir_flow->actions, true, 6177 NULL); 6178 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 6179 if (!flow) 6180 goto error; 6181 flow->fdir = 1; 6182 priv_fdir_flow->fdir = fdir_flow; 6183 priv_fdir_flow->rix_flow = flow_idx; 6184 LIST_INSERT_HEAD(&priv->fdir_flows, priv_fdir_flow, next); 6185 DRV_LOG(DEBUG, "port %u created FDIR flow %p", 6186 dev->data->port_id, (void *)flow); 6187 return 0; 6188 error: 6189 mlx5_free(priv_fdir_flow); 6190 mlx5_free(fdir_flow); 6191 return -rte_errno; 6192 } 6193 6194 /** 6195 * Delete specific filter. 6196 * 6197 * @param dev 6198 * Pointer to Ethernet device. 6199 * @param fdir_filter 6200 * Filter to be deleted. 6201 * 6202 * @return 6203 * 0 on success, a negative errno value otherwise and rte_errno is set. 6204 */ 6205 static int 6206 flow_fdir_filter_delete(struct rte_eth_dev *dev, 6207 const struct rte_eth_fdir_filter *fdir_filter) 6208 { 6209 struct mlx5_priv *priv = dev->data->dev_private; 6210 uint32_t flow_idx; 6211 struct mlx5_fdir fdir_flow = { 6212 .attr.group = 0, 6213 }; 6214 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6215 int ret; 6216 6217 ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow); 6218 if (ret) 6219 return -rte_errno; 6220 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 6221 /* Find the fdir in priv list */ 6222 if (!flow_fdir_cmp(priv_fdir_flow->fdir, &fdir_flow)) 6223 break; 6224 } 6225 if (!priv_fdir_flow) 6226 return 0; 6227 LIST_REMOVE(priv_fdir_flow, next); 6228 flow_idx = priv_fdir_flow->rix_flow; 6229 flow_list_destroy(dev, &priv->flows, flow_idx); 6230 mlx5_free(priv_fdir_flow->fdir); 6231 mlx5_free(priv_fdir_flow); 6232 DRV_LOG(DEBUG, "port %u deleted FDIR flow %u", 6233 dev->data->port_id, flow_idx); 6234 return 0; 6235 } 6236 6237 /** 6238 * Update queue for specific filter. 6239 * 6240 * @param dev 6241 * Pointer to Ethernet device. 6242 * @param fdir_filter 6243 * Filter to be updated. 6244 * 6245 * @return 6246 * 0 on success, a negative errno value otherwise and rte_errno is set. 6247 */ 6248 static int 6249 flow_fdir_filter_update(struct rte_eth_dev *dev, 6250 const struct rte_eth_fdir_filter *fdir_filter) 6251 { 6252 int ret; 6253 6254 ret = flow_fdir_filter_delete(dev, fdir_filter); 6255 if (ret) 6256 return ret; 6257 return flow_fdir_filter_add(dev, fdir_filter); 6258 } 6259 6260 /** 6261 * Flush all filters. 6262 * 6263 * @param dev 6264 * Pointer to Ethernet device. 6265 */ 6266 static void 6267 flow_fdir_filter_flush(struct rte_eth_dev *dev) 6268 { 6269 struct mlx5_priv *priv = dev->data->dev_private; 6270 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6271 6272 while (!LIST_EMPTY(&priv->fdir_flows)) { 6273 priv_fdir_flow = LIST_FIRST(&priv->fdir_flows); 6274 LIST_REMOVE(priv_fdir_flow, next); 6275 flow_list_destroy(dev, &priv->flows, priv_fdir_flow->rix_flow); 6276 mlx5_free(priv_fdir_flow->fdir); 6277 mlx5_free(priv_fdir_flow); 6278 } 6279 } 6280 6281 /** 6282 * Get flow director information. 6283 * 6284 * @param dev 6285 * Pointer to Ethernet device. 6286 * @param[out] fdir_info 6287 * Resulting flow director information. 6288 */ 6289 static void 6290 flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) 6291 { 6292 struct rte_eth_fdir_masks *mask = 6293 &dev->data->dev_conf.fdir_conf.mask; 6294 6295 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode; 6296 fdir_info->guarant_spc = 0; 6297 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask)); 6298 fdir_info->max_flexpayload = 0; 6299 fdir_info->flow_types_mask[0] = 0; 6300 fdir_info->flex_payload_unit = 0; 6301 fdir_info->max_flex_payload_segment_num = 0; 6302 fdir_info->flex_payload_limit = 0; 6303 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf)); 6304 } 6305 6306 /** 6307 * Deal with flow director operations. 6308 * 6309 * @param dev 6310 * Pointer to Ethernet device. 6311 * @param filter_op 6312 * Operation to perform. 6313 * @param arg 6314 * Pointer to operation-specific structure. 6315 * 6316 * @return 6317 * 0 on success, a negative errno value otherwise and rte_errno is set. 6318 */ 6319 static int 6320 flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, 6321 void *arg) 6322 { 6323 enum rte_fdir_mode fdir_mode = 6324 dev->data->dev_conf.fdir_conf.mode; 6325 6326 if (filter_op == RTE_ETH_FILTER_NOP) 6327 return 0; 6328 if (fdir_mode != RTE_FDIR_MODE_PERFECT && 6329 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) { 6330 DRV_LOG(ERR, "port %u flow director mode %d not supported", 6331 dev->data->port_id, fdir_mode); 6332 rte_errno = EINVAL; 6333 return -rte_errno; 6334 } 6335 switch (filter_op) { 6336 case RTE_ETH_FILTER_ADD: 6337 return flow_fdir_filter_add(dev, arg); 6338 case RTE_ETH_FILTER_UPDATE: 6339 return flow_fdir_filter_update(dev, arg); 6340 case RTE_ETH_FILTER_DELETE: 6341 return flow_fdir_filter_delete(dev, arg); 6342 case RTE_ETH_FILTER_FLUSH: 6343 flow_fdir_filter_flush(dev); 6344 break; 6345 case RTE_ETH_FILTER_INFO: 6346 flow_fdir_info_get(dev, arg); 6347 break; 6348 default: 6349 DRV_LOG(DEBUG, "port %u unknown operation %u", 6350 dev->data->port_id, filter_op); 6351 rte_errno = EINVAL; 6352 return -rte_errno; 6353 } 6354 return 0; 6355 } 6356 6357 /** 6358 * Manage filter operations. 6359 * 6360 * @param dev 6361 * Pointer to Ethernet device structure. 6362 * @param filter_type 6363 * Filter type. 6364 * @param filter_op 6365 * Operation to perform. 6366 * @param arg 6367 * Pointer to operation-specific structure. 6368 * 6369 * @return 6370 * 0 on success, a negative errno value otherwise and rte_errno is set. 6371 */ 6372 int 6373 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, 6374 enum rte_filter_type filter_type, 6375 enum rte_filter_op filter_op, 6376 void *arg) 6377 { 6378 switch (filter_type) { 6379 case RTE_ETH_FILTER_GENERIC: 6380 if (filter_op != RTE_ETH_FILTER_GET) { 6381 rte_errno = EINVAL; 6382 return -rte_errno; 6383 } 6384 *(const void **)arg = &mlx5_flow_ops; 6385 return 0; 6386 case RTE_ETH_FILTER_FDIR: 6387 return flow_fdir_ctrl_func(dev, filter_op, arg); 6388 default: 6389 DRV_LOG(ERR, "port %u filter type (%d) not supported", 6390 dev->data->port_id, filter_type); 6391 rte_errno = ENOTSUP; 6392 return -rte_errno; 6393 } 6394 return 0; 6395 } 6396 6397 /** 6398 * Create the needed meter and suffix tables. 6399 * 6400 * @param[in] dev 6401 * Pointer to Ethernet device. 6402 * @param[in] fm 6403 * Pointer to the flow meter. 6404 * 6405 * @return 6406 * Pointer to table set on success, NULL otherwise. 6407 */ 6408 struct mlx5_meter_domains_infos * 6409 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev, 6410 const struct mlx5_flow_meter *fm) 6411 { 6412 const struct mlx5_flow_driver_ops *fops; 6413 6414 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6415 return fops->create_mtr_tbls(dev, fm); 6416 } 6417 6418 /** 6419 * Destroy the meter table set. 6420 * 6421 * @param[in] dev 6422 * Pointer to Ethernet device. 6423 * @param[in] tbl 6424 * Pointer to the meter table set. 6425 * 6426 * @return 6427 * 0 on success. 6428 */ 6429 int 6430 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev, 6431 struct mlx5_meter_domains_infos *tbls) 6432 { 6433 const struct mlx5_flow_driver_ops *fops; 6434 6435 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6436 return fops->destroy_mtr_tbls(dev, tbls); 6437 } 6438 6439 /** 6440 * Create policer rules. 6441 * 6442 * @param[in] dev 6443 * Pointer to Ethernet device. 6444 * @param[in] fm 6445 * Pointer to flow meter structure. 6446 * @param[in] attr 6447 * Pointer to flow attributes. 6448 * 6449 * @return 6450 * 0 on success, -1 otherwise. 6451 */ 6452 int 6453 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev, 6454 struct mlx5_flow_meter *fm, 6455 const struct rte_flow_attr *attr) 6456 { 6457 const struct mlx5_flow_driver_ops *fops; 6458 6459 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6460 return fops->create_policer_rules(dev, fm, attr); 6461 } 6462 6463 /** 6464 * Destroy policer rules. 6465 * 6466 * @param[in] fm 6467 * Pointer to flow meter structure. 6468 * @param[in] attr 6469 * Pointer to flow attributes. 6470 * 6471 * @return 6472 * 0 on success, -1 otherwise. 6473 */ 6474 int 6475 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev, 6476 struct mlx5_flow_meter *fm, 6477 const struct rte_flow_attr *attr) 6478 { 6479 const struct mlx5_flow_driver_ops *fops; 6480 6481 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6482 return fops->destroy_policer_rules(dev, fm, attr); 6483 } 6484 6485 /** 6486 * Allocate a counter. 6487 * 6488 * @param[in] dev 6489 * Pointer to Ethernet device structure. 6490 * 6491 * @return 6492 * Index to allocated counter on success, 0 otherwise. 6493 */ 6494 uint32_t 6495 mlx5_counter_alloc(struct rte_eth_dev *dev) 6496 { 6497 const struct mlx5_flow_driver_ops *fops; 6498 struct rte_flow_attr attr = { .transfer = 0 }; 6499 6500 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6501 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6502 return fops->counter_alloc(dev); 6503 } 6504 DRV_LOG(ERR, 6505 "port %u counter allocate is not supported.", 6506 dev->data->port_id); 6507 return 0; 6508 } 6509 6510 /** 6511 * Free a counter. 6512 * 6513 * @param[in] dev 6514 * Pointer to Ethernet device structure. 6515 * @param[in] cnt 6516 * Index to counter to be free. 6517 */ 6518 void 6519 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt) 6520 { 6521 const struct mlx5_flow_driver_ops *fops; 6522 struct rte_flow_attr attr = { .transfer = 0 }; 6523 6524 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6525 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6526 fops->counter_free(dev, cnt); 6527 return; 6528 } 6529 DRV_LOG(ERR, 6530 "port %u counter free is not supported.", 6531 dev->data->port_id); 6532 } 6533 6534 /** 6535 * Query counter statistics. 6536 * 6537 * @param[in] dev 6538 * Pointer to Ethernet device structure. 6539 * @param[in] cnt 6540 * Index to counter to query. 6541 * @param[in] clear 6542 * Set to clear counter statistics. 6543 * @param[out] pkts 6544 * The counter hits packets number to save. 6545 * @param[out] bytes 6546 * The counter hits bytes number to save. 6547 * 6548 * @return 6549 * 0 on success, a negative errno value otherwise. 6550 */ 6551 int 6552 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt, 6553 bool clear, uint64_t *pkts, uint64_t *bytes) 6554 { 6555 const struct mlx5_flow_driver_ops *fops; 6556 struct rte_flow_attr attr = { .transfer = 0 }; 6557 6558 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6559 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6560 return fops->counter_query(dev, cnt, clear, pkts, bytes); 6561 } 6562 DRV_LOG(ERR, 6563 "port %u counter query is not supported.", 6564 dev->data->port_id); 6565 return -ENOTSUP; 6566 } 6567 6568 #define MLX5_POOL_QUERY_FREQ_US 1000000 6569 6570 /** 6571 * Get number of all validate pools. 6572 * 6573 * @param[in] sh 6574 * Pointer to mlx5_dev_ctx_shared object. 6575 * 6576 * @return 6577 * The number of all validate pools. 6578 */ 6579 static uint32_t 6580 mlx5_get_all_valid_pool_count(struct mlx5_dev_ctx_shared *sh) 6581 { 6582 int i; 6583 uint32_t pools_n = 0; 6584 6585 for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) 6586 pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid); 6587 return pools_n; 6588 } 6589 6590 /** 6591 * Set the periodic procedure for triggering asynchronous batch queries for all 6592 * the counter pools. 6593 * 6594 * @param[in] sh 6595 * Pointer to mlx5_dev_ctx_shared object. 6596 */ 6597 void 6598 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh) 6599 { 6600 uint32_t pools_n, us; 6601 6602 pools_n = mlx5_get_all_valid_pool_count(sh); 6603 us = MLX5_POOL_QUERY_FREQ_US / pools_n; 6604 DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us); 6605 if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) { 6606 sh->cmng.query_thread_on = 0; 6607 DRV_LOG(ERR, "Cannot reinitialize query alarm"); 6608 } else { 6609 sh->cmng.query_thread_on = 1; 6610 } 6611 } 6612 6613 /** 6614 * The periodic procedure for triggering asynchronous batch queries for all the 6615 * counter pools. This function is probably called by the host thread. 6616 * 6617 * @param[in] arg 6618 * The parameter for the alarm process. 6619 */ 6620 void 6621 mlx5_flow_query_alarm(void *arg) 6622 { 6623 struct mlx5_dev_ctx_shared *sh = arg; 6624 struct mlx5_devx_obj *dcs; 6625 uint16_t offset; 6626 int ret; 6627 uint8_t batch = sh->cmng.batch; 6628 uint8_t age = sh->cmng.age; 6629 uint16_t pool_index = sh->cmng.pool_index; 6630 struct mlx5_pools_container *cont; 6631 struct mlx5_flow_counter_pool *pool; 6632 int cont_loop = MLX5_CCONT_TYPE_MAX; 6633 6634 if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES) 6635 goto set_alarm; 6636 next_container: 6637 cont = MLX5_CNT_CONTAINER(sh, batch, age); 6638 rte_spinlock_lock(&cont->resize_sl); 6639 if (!cont->pools) { 6640 rte_spinlock_unlock(&cont->resize_sl); 6641 /* Check if all the containers are empty. */ 6642 if (unlikely(--cont_loop == 0)) 6643 goto set_alarm; 6644 batch ^= 0x1; 6645 pool_index = 0; 6646 if (batch == 0 && pool_index == 0) { 6647 age ^= 0x1; 6648 sh->cmng.batch = batch; 6649 sh->cmng.age = age; 6650 } 6651 goto next_container; 6652 } 6653 pool = cont->pools[pool_index]; 6654 rte_spinlock_unlock(&cont->resize_sl); 6655 if (pool->raw_hw) 6656 /* There is a pool query in progress. */ 6657 goto set_alarm; 6658 pool->raw_hw = 6659 LIST_FIRST(&sh->cmng.free_stat_raws); 6660 if (!pool->raw_hw) 6661 /* No free counter statistics raw memory. */ 6662 goto set_alarm; 6663 dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read 6664 (&pool->a64_dcs); 6665 if (dcs->id & (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1)) { 6666 /* Pool without valid counter. */ 6667 pool->raw_hw = NULL; 6668 goto next_pool; 6669 } 6670 offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL; 6671 /* 6672 * Identify the counters released between query trigger and query 6673 * handle more effiecntly. The counter released in this gap period 6674 * should wait for a new round of query as the new arrived packets 6675 * will not be taken into account. 6676 */ 6677 pool->query_gen++; 6678 ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL - 6679 offset, NULL, NULL, 6680 pool->raw_hw->mem_mng->dm->id, 6681 (void *)(uintptr_t) 6682 (pool->raw_hw->data + offset), 6683 sh->devx_comp, 6684 (uint64_t)(uintptr_t)pool); 6685 if (ret) { 6686 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID" 6687 " %d", pool->min_dcs->id); 6688 pool->raw_hw = NULL; 6689 goto set_alarm; 6690 } 6691 pool->raw_hw->min_dcs_id = dcs->id; 6692 LIST_REMOVE(pool->raw_hw, next); 6693 sh->cmng.pending_queries++; 6694 next_pool: 6695 pool_index++; 6696 if (pool_index >= rte_atomic16_read(&cont->n_valid)) { 6697 batch ^= 0x1; 6698 pool_index = 0; 6699 if (batch == 0 && pool_index == 0) 6700 age ^= 0x1; 6701 } 6702 set_alarm: 6703 sh->cmng.batch = batch; 6704 sh->cmng.pool_index = pool_index; 6705 sh->cmng.age = age; 6706 mlx5_set_query_alarm(sh); 6707 } 6708 6709 /** 6710 * Check and callback event for new aged flow in the counter pool 6711 * 6712 * @param[in] sh 6713 * Pointer to mlx5_dev_ctx_shared object. 6714 * @param[in] pool 6715 * Pointer to Current counter pool. 6716 */ 6717 static void 6718 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh, 6719 struct mlx5_flow_counter_pool *pool) 6720 { 6721 struct mlx5_priv *priv; 6722 struct mlx5_flow_counter *cnt; 6723 struct mlx5_age_info *age_info; 6724 struct mlx5_age_param *age_param; 6725 struct mlx5_counter_stats_raw *cur = pool->raw_hw; 6726 struct mlx5_counter_stats_raw *prev = pool->raw; 6727 uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10); 6728 uint32_t i; 6729 6730 for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) { 6731 cnt = MLX5_POOL_GET_CNT(pool, i); 6732 age_param = MLX5_CNT_TO_AGE(cnt); 6733 if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE) 6734 continue; 6735 if (cur->data[i].hits != prev->data[i].hits) { 6736 age_param->expire = curr + age_param->timeout; 6737 continue; 6738 } 6739 if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2)) 6740 continue; 6741 /** 6742 * Hold the lock first, or if between the 6743 * state AGE_TMOUT and tailq operation the 6744 * release happened, the release procedure 6745 * may delete a non-existent tailq node. 6746 */ 6747 priv = rte_eth_devices[age_param->port_id].data->dev_private; 6748 age_info = GET_PORT_AGE_INFO(priv); 6749 rte_spinlock_lock(&age_info->aged_sl); 6750 /* If the cpmset fails, release happens. */ 6751 if (rte_atomic16_cmpset((volatile uint16_t *) 6752 &age_param->state, 6753 AGE_CANDIDATE, 6754 AGE_TMOUT) == 6755 AGE_CANDIDATE) { 6756 TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next); 6757 MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW); 6758 } 6759 rte_spinlock_unlock(&age_info->aged_sl); 6760 } 6761 for (i = 0; i < sh->max_port; i++) { 6762 age_info = &sh->port[i].age_info; 6763 if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW)) 6764 continue; 6765 if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) 6766 rte_eth_dev_callback_process 6767 (&rte_eth_devices[sh->port[i].devx_ih_port_id], 6768 RTE_ETH_EVENT_FLOW_AGED, NULL); 6769 age_info->flags = 0; 6770 } 6771 } 6772 6773 /** 6774 * Handler for the HW respond about ready values from an asynchronous batch 6775 * query. This function is probably called by the host thread. 6776 * 6777 * @param[in] sh 6778 * The pointer to the shared device context. 6779 * @param[in] async_id 6780 * The Devx async ID. 6781 * @param[in] status 6782 * The status of the completion. 6783 */ 6784 void 6785 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh, 6786 uint64_t async_id, int status) 6787 { 6788 struct mlx5_flow_counter_pool *pool = 6789 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id; 6790 struct mlx5_counter_stats_raw *raw_to_free; 6791 uint8_t age = !!IS_AGE_POOL(pool); 6792 uint8_t query_gen = pool->query_gen ^ 1; 6793 struct mlx5_pools_container *cont = 6794 MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool), age); 6795 6796 if (unlikely(status)) { 6797 raw_to_free = pool->raw_hw; 6798 } else { 6799 raw_to_free = pool->raw; 6800 if (IS_AGE_POOL(pool)) 6801 mlx5_flow_aging_check(sh, pool); 6802 rte_spinlock_lock(&pool->sl); 6803 pool->raw = pool->raw_hw; 6804 rte_spinlock_unlock(&pool->sl); 6805 /* Be sure the new raw counters data is updated in memory. */ 6806 rte_io_wmb(); 6807 if (!TAILQ_EMPTY(&pool->counters[query_gen])) { 6808 rte_spinlock_lock(&cont->csl); 6809 TAILQ_CONCAT(&cont->counters, 6810 &pool->counters[query_gen], next); 6811 rte_spinlock_unlock(&cont->csl); 6812 } 6813 } 6814 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next); 6815 pool->raw_hw = NULL; 6816 sh->cmng.pending_queries--; 6817 } 6818 6819 /** 6820 * Translate the rte_flow group index to HW table value. 6821 * 6822 * @param[in] attributes 6823 * Pointer to flow attributes 6824 * @param[in] external 6825 * Value is part of flow rule created by request external to PMD. 6826 * @param[in] group 6827 * rte_flow group index value. 6828 * @param[out] fdb_def_rule 6829 * Whether fdb jump to table 1 is configured. 6830 * @param[out] table 6831 * HW table value. 6832 * @param[out] error 6833 * Pointer to error structure. 6834 * 6835 * @return 6836 * 0 on success, a negative errno value otherwise and rte_errno is set. 6837 */ 6838 int 6839 mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external, 6840 uint32_t group, bool fdb_def_rule, uint32_t *table, 6841 struct rte_flow_error *error) 6842 { 6843 if (attributes->transfer && external && fdb_def_rule) { 6844 if (group == UINT32_MAX) 6845 return rte_flow_error_set 6846 (error, EINVAL, 6847 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 6848 NULL, 6849 "group index not supported"); 6850 *table = group + 1; 6851 } else { 6852 *table = group; 6853 } 6854 return 0; 6855 } 6856 6857 /** 6858 * Discover availability of metadata reg_c's. 6859 * 6860 * Iteratively use test flows to check availability. 6861 * 6862 * @param[in] dev 6863 * Pointer to the Ethernet device structure. 6864 * 6865 * @return 6866 * 0 on success, a negative errno value otherwise and rte_errno is set. 6867 */ 6868 int 6869 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev) 6870 { 6871 struct mlx5_priv *priv = dev->data->dev_private; 6872 struct mlx5_dev_config *config = &priv->config; 6873 enum modify_reg idx; 6874 int n = 0; 6875 6876 /* reg_c[0] and reg_c[1] are reserved. */ 6877 config->flow_mreg_c[n++] = REG_C_0; 6878 config->flow_mreg_c[n++] = REG_C_1; 6879 /* Discover availability of other reg_c's. */ 6880 for (idx = REG_C_2; idx <= REG_C_7; ++idx) { 6881 struct rte_flow_attr attr = { 6882 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 6883 .priority = MLX5_FLOW_PRIO_RSVD, 6884 .ingress = 1, 6885 }; 6886 struct rte_flow_item items[] = { 6887 [0] = { 6888 .type = RTE_FLOW_ITEM_TYPE_END, 6889 }, 6890 }; 6891 struct rte_flow_action actions[] = { 6892 [0] = { 6893 .type = (enum rte_flow_action_type) 6894 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 6895 .conf = &(struct mlx5_flow_action_copy_mreg){ 6896 .src = REG_C_1, 6897 .dst = idx, 6898 }, 6899 }, 6900 [1] = { 6901 .type = RTE_FLOW_ACTION_TYPE_JUMP, 6902 .conf = &(struct rte_flow_action_jump){ 6903 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 6904 }, 6905 }, 6906 [2] = { 6907 .type = RTE_FLOW_ACTION_TYPE_END, 6908 }, 6909 }; 6910 uint32_t flow_idx; 6911 struct rte_flow *flow; 6912 struct rte_flow_error error; 6913 6914 if (!config->dv_flow_en) 6915 break; 6916 /* Create internal flow, validation skips copy action. */ 6917 flow_idx = flow_list_create(dev, NULL, &attr, items, 6918 actions, false, &error); 6919 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 6920 flow_idx); 6921 if (!flow) 6922 continue; 6923 if (dev->data->dev_started || !flow_drv_apply(dev, flow, NULL)) 6924 config->flow_mreg_c[n++] = idx; 6925 flow_list_destroy(dev, NULL, flow_idx); 6926 } 6927 for (; n < MLX5_MREG_C_NUM; ++n) 6928 config->flow_mreg_c[n] = REG_NON; 6929 return 0; 6930 } 6931 6932 /** 6933 * Dump flow raw hw data to file 6934 * 6935 * @param[in] dev 6936 * The pointer to Ethernet device. 6937 * @param[in] file 6938 * A pointer to a file for output. 6939 * @param[out] error 6940 * Perform verbose error reporting if not NULL. PMDs initialize this 6941 * structure in case of error only. 6942 * @return 6943 * 0 on success, a nagative value otherwise. 6944 */ 6945 int 6946 mlx5_flow_dev_dump(struct rte_eth_dev *dev, 6947 FILE *file, 6948 struct rte_flow_error *error __rte_unused) 6949 { 6950 struct mlx5_priv *priv = dev->data->dev_private; 6951 struct mlx5_dev_ctx_shared *sh = priv->sh; 6952 6953 if (!priv->config.dv_flow_en) { 6954 if (fputs("device dv flow disabled\n", file) <= 0) 6955 return -errno; 6956 return -ENOTSUP; 6957 } 6958 return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain, 6959 sh->tx_domain, file); 6960 } 6961 6962 /** 6963 * Get aged-out flows. 6964 * 6965 * @param[in] dev 6966 * Pointer to the Ethernet device structure. 6967 * @param[in] context 6968 * The address of an array of pointers to the aged-out flows contexts. 6969 * @param[in] nb_countexts 6970 * The length of context array pointers. 6971 * @param[out] error 6972 * Perform verbose error reporting if not NULL. Initialized in case of 6973 * error only. 6974 * 6975 * @return 6976 * how many contexts get in success, otherwise negative errno value. 6977 * if nb_contexts is 0, return the amount of all aged contexts. 6978 * if nb_contexts is not 0 , return the amount of aged flows reported 6979 * in the context array. 6980 */ 6981 int 6982 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts, 6983 uint32_t nb_contexts, struct rte_flow_error *error) 6984 { 6985 const struct mlx5_flow_driver_ops *fops; 6986 struct rte_flow_attr attr = { .transfer = 0 }; 6987 6988 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6989 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6990 return fops->get_aged_flows(dev, contexts, nb_contexts, 6991 error); 6992 } 6993 DRV_LOG(ERR, 6994 "port %u get aged flows is not supported.", 6995 dev->data->port_id); 6996 return -ENOTSUP; 6997 } 6998