1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2016 Mellanox Technologies, Ltd 4 */ 5 6 #include <netinet/in.h> 7 #include <sys/queue.h> 8 #include <stdalign.h> 9 #include <stdint.h> 10 #include <string.h> 11 #include <stdbool.h> 12 13 #include <rte_common.h> 14 #include <rte_ether.h> 15 #include <rte_ethdev_driver.h> 16 #include <rte_flow.h> 17 #include <rte_cycles.h> 18 #include <rte_flow_driver.h> 19 #include <rte_malloc.h> 20 #include <rte_ip.h> 21 22 #include <mlx5_glue.h> 23 #include <mlx5_devx_cmds.h> 24 #include <mlx5_prm.h> 25 #include <mlx5_malloc.h> 26 27 #include "mlx5_defs.h" 28 #include "mlx5.h" 29 #include "mlx5_flow.h" 30 #include "mlx5_flow_os.h" 31 #include "mlx5_rxtx.h" 32 33 /** Device flow drivers. */ 34 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops; 35 36 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops; 37 38 const struct mlx5_flow_driver_ops *flow_drv_ops[] = { 39 [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops, 40 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 41 [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops, 42 #endif 43 [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops, 44 [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops 45 }; 46 47 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */ 48 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \ 49 (const int []){ \ 50 __VA_ARGS__, 0, \ 51 } 52 53 /** Node object of input graph for mlx5_flow_expand_rss(). */ 54 struct mlx5_flow_expand_node { 55 const int *const next; 56 /**< 57 * List of next node indexes. Index 0 is interpreted as a terminator. 58 */ 59 const enum rte_flow_item_type type; 60 /**< Pattern item type of current node. */ 61 uint64_t rss_types; 62 /**< 63 * RSS types bit-field associated with this node 64 * (see ETH_RSS_* definitions). 65 */ 66 }; 67 68 /** Object returned by mlx5_flow_expand_rss(). */ 69 struct mlx5_flow_expand_rss { 70 uint32_t entries; 71 /**< Number of entries @p patterns and @p priorities. */ 72 struct { 73 struct rte_flow_item *pattern; /**< Expanded pattern array. */ 74 uint32_t priority; /**< Priority offset for each expansion. */ 75 } entry[]; 76 }; 77 78 static enum rte_flow_item_type 79 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item) 80 { 81 enum rte_flow_item_type ret = RTE_FLOW_ITEM_TYPE_VOID; 82 uint16_t ether_type = 0; 83 uint16_t ether_type_m; 84 uint8_t ip_next_proto = 0; 85 uint8_t ip_next_proto_m; 86 87 if (item == NULL || item->spec == NULL) 88 return ret; 89 switch (item->type) { 90 case RTE_FLOW_ITEM_TYPE_ETH: 91 if (item->mask) 92 ether_type_m = ((const struct rte_flow_item_eth *) 93 (item->mask))->type; 94 else 95 ether_type_m = rte_flow_item_eth_mask.type; 96 if (ether_type_m != RTE_BE16(0xFFFF)) 97 break; 98 ether_type = ((const struct rte_flow_item_eth *) 99 (item->spec))->type; 100 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4) 101 ret = RTE_FLOW_ITEM_TYPE_IPV4; 102 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6) 103 ret = RTE_FLOW_ITEM_TYPE_IPV6; 104 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN) 105 ret = RTE_FLOW_ITEM_TYPE_VLAN; 106 else 107 ret = RTE_FLOW_ITEM_TYPE_END; 108 break; 109 case RTE_FLOW_ITEM_TYPE_VLAN: 110 if (item->mask) 111 ether_type_m = ((const struct rte_flow_item_vlan *) 112 (item->mask))->inner_type; 113 else 114 ether_type_m = rte_flow_item_vlan_mask.inner_type; 115 if (ether_type_m != RTE_BE16(0xFFFF)) 116 break; 117 ether_type = ((const struct rte_flow_item_vlan *) 118 (item->spec))->inner_type; 119 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4) 120 ret = RTE_FLOW_ITEM_TYPE_IPV4; 121 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6) 122 ret = RTE_FLOW_ITEM_TYPE_IPV6; 123 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN) 124 ret = RTE_FLOW_ITEM_TYPE_VLAN; 125 else 126 ret = RTE_FLOW_ITEM_TYPE_END; 127 break; 128 case RTE_FLOW_ITEM_TYPE_IPV4: 129 if (item->mask) 130 ip_next_proto_m = ((const struct rte_flow_item_ipv4 *) 131 (item->mask))->hdr.next_proto_id; 132 else 133 ip_next_proto_m = 134 rte_flow_item_ipv4_mask.hdr.next_proto_id; 135 if (ip_next_proto_m != 0xFF) 136 break; 137 ip_next_proto = ((const struct rte_flow_item_ipv4 *) 138 (item->spec))->hdr.next_proto_id; 139 if (ip_next_proto == IPPROTO_UDP) 140 ret = RTE_FLOW_ITEM_TYPE_UDP; 141 else if (ip_next_proto == IPPROTO_TCP) 142 ret = RTE_FLOW_ITEM_TYPE_TCP; 143 else if (ip_next_proto == IPPROTO_IP) 144 ret = RTE_FLOW_ITEM_TYPE_IPV4; 145 else if (ip_next_proto == IPPROTO_IPV6) 146 ret = RTE_FLOW_ITEM_TYPE_IPV6; 147 else 148 ret = RTE_FLOW_ITEM_TYPE_END; 149 break; 150 case RTE_FLOW_ITEM_TYPE_IPV6: 151 if (item->mask) 152 ip_next_proto_m = ((const struct rte_flow_item_ipv6 *) 153 (item->mask))->hdr.proto; 154 else 155 ip_next_proto_m = 156 rte_flow_item_ipv6_mask.hdr.proto; 157 if (ip_next_proto_m != 0xFF) 158 break; 159 ip_next_proto = ((const struct rte_flow_item_ipv6 *) 160 (item->spec))->hdr.proto; 161 if (ip_next_proto == IPPROTO_UDP) 162 ret = RTE_FLOW_ITEM_TYPE_UDP; 163 else if (ip_next_proto == IPPROTO_TCP) 164 ret = RTE_FLOW_ITEM_TYPE_TCP; 165 else if (ip_next_proto == IPPROTO_IP) 166 ret = RTE_FLOW_ITEM_TYPE_IPV4; 167 else if (ip_next_proto == IPPROTO_IPV6) 168 ret = RTE_FLOW_ITEM_TYPE_IPV6; 169 else 170 ret = RTE_FLOW_ITEM_TYPE_END; 171 break; 172 default: 173 ret = RTE_FLOW_ITEM_TYPE_VOID; 174 break; 175 } 176 return ret; 177 } 178 179 /** 180 * Expand RSS flows into several possible flows according to the RSS hash 181 * fields requested and the driver capabilities. 182 * 183 * @param[out] buf 184 * Buffer to store the result expansion. 185 * @param[in] size 186 * Buffer size in bytes. If 0, @p buf can be NULL. 187 * @param[in] pattern 188 * User flow pattern. 189 * @param[in] types 190 * RSS types to expand (see ETH_RSS_* definitions). 191 * @param[in] graph 192 * Input graph to expand @p pattern according to @p types. 193 * @param[in] graph_root_index 194 * Index of root node in @p graph, typically 0. 195 * 196 * @return 197 * A positive value representing the size of @p buf in bytes regardless of 198 * @p size on success, a negative errno value otherwise and rte_errno is 199 * set, the following errors are defined: 200 * 201 * -E2BIG: graph-depth @p graph is too deep. 202 */ 203 static int 204 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, 205 const struct rte_flow_item *pattern, uint64_t types, 206 const struct mlx5_flow_expand_node graph[], 207 int graph_root_index) 208 { 209 const int elt_n = 8; 210 const struct rte_flow_item *item; 211 const struct mlx5_flow_expand_node *node = &graph[graph_root_index]; 212 const int *next_node; 213 const int *stack[elt_n]; 214 int stack_pos = 0; 215 struct rte_flow_item flow_items[elt_n]; 216 unsigned int i; 217 size_t lsize; 218 size_t user_pattern_size = 0; 219 void *addr = NULL; 220 const struct mlx5_flow_expand_node *next = NULL; 221 struct rte_flow_item missed_item; 222 int missed = 0; 223 int elt = 0; 224 const struct rte_flow_item *last_item = NULL; 225 226 memset(&missed_item, 0, sizeof(missed_item)); 227 lsize = offsetof(struct mlx5_flow_expand_rss, entry) + 228 elt_n * sizeof(buf->entry[0]); 229 if (lsize <= size) { 230 buf->entry[0].priority = 0; 231 buf->entry[0].pattern = (void *)&buf->entry[elt_n]; 232 buf->entries = 0; 233 addr = buf->entry[0].pattern; 234 } 235 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 236 if (item->type != RTE_FLOW_ITEM_TYPE_VOID) 237 last_item = item; 238 for (i = 0; node->next && node->next[i]; ++i) { 239 next = &graph[node->next[i]]; 240 if (next->type == item->type) 241 break; 242 } 243 if (next) 244 node = next; 245 user_pattern_size += sizeof(*item); 246 } 247 user_pattern_size += sizeof(*item); /* Handle END item. */ 248 lsize += user_pattern_size; 249 /* Copy the user pattern in the first entry of the buffer. */ 250 if (lsize <= size) { 251 rte_memcpy(addr, pattern, user_pattern_size); 252 addr = (void *)(((uintptr_t)addr) + user_pattern_size); 253 buf->entries = 1; 254 } 255 /* Start expanding. */ 256 memset(flow_items, 0, sizeof(flow_items)); 257 user_pattern_size -= sizeof(*item); 258 /* 259 * Check if the last valid item has spec set, need complete pattern, 260 * and the pattern can be used for expansion. 261 */ 262 missed_item.type = mlx5_flow_expand_rss_item_complete(last_item); 263 if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) { 264 /* Item type END indicates expansion is not required. */ 265 return lsize; 266 } 267 if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) { 268 next = NULL; 269 missed = 1; 270 for (i = 0; node->next && node->next[i]; ++i) { 271 next = &graph[node->next[i]]; 272 if (next->type == missed_item.type) { 273 flow_items[0].type = missed_item.type; 274 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END; 275 break; 276 } 277 next = NULL; 278 } 279 } 280 if (next && missed) { 281 elt = 2; /* missed item + item end. */ 282 node = next; 283 lsize += elt * sizeof(*item) + user_pattern_size; 284 if ((node->rss_types & types) && lsize <= size) { 285 buf->entry[buf->entries].priority = 1; 286 buf->entry[buf->entries].pattern = addr; 287 buf->entries++; 288 rte_memcpy(addr, buf->entry[0].pattern, 289 user_pattern_size); 290 addr = (void *)(((uintptr_t)addr) + user_pattern_size); 291 rte_memcpy(addr, flow_items, elt * sizeof(*item)); 292 addr = (void *)(((uintptr_t)addr) + 293 elt * sizeof(*item)); 294 } 295 } 296 memset(flow_items, 0, sizeof(flow_items)); 297 next_node = node->next; 298 stack[stack_pos] = next_node; 299 node = next_node ? &graph[*next_node] : NULL; 300 while (node) { 301 flow_items[stack_pos].type = node->type; 302 if (node->rss_types & types) { 303 /* 304 * compute the number of items to copy from the 305 * expansion and copy it. 306 * When the stack_pos is 0, there are 1 element in it, 307 * plus the addition END item. 308 */ 309 elt = stack_pos + 2; 310 flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END; 311 lsize += elt * sizeof(*item) + user_pattern_size; 312 if (lsize <= size) { 313 size_t n = elt * sizeof(*item); 314 315 buf->entry[buf->entries].priority = 316 stack_pos + 1 + missed; 317 buf->entry[buf->entries].pattern = addr; 318 buf->entries++; 319 rte_memcpy(addr, buf->entry[0].pattern, 320 user_pattern_size); 321 addr = (void *)(((uintptr_t)addr) + 322 user_pattern_size); 323 rte_memcpy(addr, &missed_item, 324 missed * sizeof(*item)); 325 addr = (void *)(((uintptr_t)addr) + 326 missed * sizeof(*item)); 327 rte_memcpy(addr, flow_items, n); 328 addr = (void *)(((uintptr_t)addr) + n); 329 } 330 } 331 /* Go deeper. */ 332 if (node->next) { 333 next_node = node->next; 334 if (stack_pos++ == elt_n) { 335 rte_errno = E2BIG; 336 return -rte_errno; 337 } 338 stack[stack_pos] = next_node; 339 } else if (*(next_node + 1)) { 340 /* Follow up with the next possibility. */ 341 ++next_node; 342 } else { 343 /* Move to the next path. */ 344 if (stack_pos) 345 next_node = stack[--stack_pos]; 346 next_node++; 347 stack[stack_pos] = next_node; 348 } 349 node = *next_node ? &graph[*next_node] : NULL; 350 }; 351 /* no expanded flows but we have missed item, create one rule for it */ 352 if (buf->entries == 1 && missed != 0) { 353 elt = 2; 354 lsize += elt * sizeof(*item) + user_pattern_size; 355 if (lsize <= size) { 356 buf->entry[buf->entries].priority = 1; 357 buf->entry[buf->entries].pattern = addr; 358 buf->entries++; 359 flow_items[0].type = missed_item.type; 360 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END; 361 rte_memcpy(addr, buf->entry[0].pattern, 362 user_pattern_size); 363 addr = (void *)(((uintptr_t)addr) + user_pattern_size); 364 rte_memcpy(addr, flow_items, elt * sizeof(*item)); 365 addr = (void *)(((uintptr_t)addr) + 366 elt * sizeof(*item)); 367 } 368 } 369 return lsize; 370 } 371 372 enum mlx5_expansion { 373 MLX5_EXPANSION_ROOT, 374 MLX5_EXPANSION_ROOT_OUTER, 375 MLX5_EXPANSION_ROOT_ETH_VLAN, 376 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN, 377 MLX5_EXPANSION_OUTER_ETH, 378 MLX5_EXPANSION_OUTER_ETH_VLAN, 379 MLX5_EXPANSION_OUTER_VLAN, 380 MLX5_EXPANSION_OUTER_IPV4, 381 MLX5_EXPANSION_OUTER_IPV4_UDP, 382 MLX5_EXPANSION_OUTER_IPV4_TCP, 383 MLX5_EXPANSION_OUTER_IPV6, 384 MLX5_EXPANSION_OUTER_IPV6_UDP, 385 MLX5_EXPANSION_OUTER_IPV6_TCP, 386 MLX5_EXPANSION_VXLAN, 387 MLX5_EXPANSION_VXLAN_GPE, 388 MLX5_EXPANSION_GRE, 389 MLX5_EXPANSION_MPLS, 390 MLX5_EXPANSION_ETH, 391 MLX5_EXPANSION_ETH_VLAN, 392 MLX5_EXPANSION_VLAN, 393 MLX5_EXPANSION_IPV4, 394 MLX5_EXPANSION_IPV4_UDP, 395 MLX5_EXPANSION_IPV4_TCP, 396 MLX5_EXPANSION_IPV6, 397 MLX5_EXPANSION_IPV6_UDP, 398 MLX5_EXPANSION_IPV6_TCP, 399 }; 400 401 /** Supported expansion of items. */ 402 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = { 403 [MLX5_EXPANSION_ROOT] = { 404 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 405 MLX5_EXPANSION_IPV4, 406 MLX5_EXPANSION_IPV6), 407 .type = RTE_FLOW_ITEM_TYPE_END, 408 }, 409 [MLX5_EXPANSION_ROOT_OUTER] = { 410 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH, 411 MLX5_EXPANSION_OUTER_IPV4, 412 MLX5_EXPANSION_OUTER_IPV6), 413 .type = RTE_FLOW_ITEM_TYPE_END, 414 }, 415 [MLX5_EXPANSION_ROOT_ETH_VLAN] = { 416 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN), 417 .type = RTE_FLOW_ITEM_TYPE_END, 418 }, 419 [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = { 420 .next = MLX5_FLOW_EXPAND_RSS_NEXT 421 (MLX5_EXPANSION_OUTER_ETH_VLAN), 422 .type = RTE_FLOW_ITEM_TYPE_END, 423 }, 424 [MLX5_EXPANSION_OUTER_ETH] = { 425 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 426 MLX5_EXPANSION_OUTER_IPV6, 427 MLX5_EXPANSION_MPLS), 428 .type = RTE_FLOW_ITEM_TYPE_ETH, 429 .rss_types = 0, 430 }, 431 [MLX5_EXPANSION_OUTER_ETH_VLAN] = { 432 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN), 433 .type = RTE_FLOW_ITEM_TYPE_ETH, 434 .rss_types = 0, 435 }, 436 [MLX5_EXPANSION_OUTER_VLAN] = { 437 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 438 MLX5_EXPANSION_OUTER_IPV6), 439 .type = RTE_FLOW_ITEM_TYPE_VLAN, 440 }, 441 [MLX5_EXPANSION_OUTER_IPV4] = { 442 .next = MLX5_FLOW_EXPAND_RSS_NEXT 443 (MLX5_EXPANSION_OUTER_IPV4_UDP, 444 MLX5_EXPANSION_OUTER_IPV4_TCP, 445 MLX5_EXPANSION_GRE, 446 MLX5_EXPANSION_IPV4, 447 MLX5_EXPANSION_IPV6), 448 .type = RTE_FLOW_ITEM_TYPE_IPV4, 449 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 450 ETH_RSS_NONFRAG_IPV4_OTHER, 451 }, 452 [MLX5_EXPANSION_OUTER_IPV4_UDP] = { 453 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 454 MLX5_EXPANSION_VXLAN_GPE), 455 .type = RTE_FLOW_ITEM_TYPE_UDP, 456 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 457 }, 458 [MLX5_EXPANSION_OUTER_IPV4_TCP] = { 459 .type = RTE_FLOW_ITEM_TYPE_TCP, 460 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 461 }, 462 [MLX5_EXPANSION_OUTER_IPV6] = { 463 .next = MLX5_FLOW_EXPAND_RSS_NEXT 464 (MLX5_EXPANSION_OUTER_IPV6_UDP, 465 MLX5_EXPANSION_OUTER_IPV6_TCP, 466 MLX5_EXPANSION_IPV4, 467 MLX5_EXPANSION_IPV6), 468 .type = RTE_FLOW_ITEM_TYPE_IPV6, 469 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 470 ETH_RSS_NONFRAG_IPV6_OTHER, 471 }, 472 [MLX5_EXPANSION_OUTER_IPV6_UDP] = { 473 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 474 MLX5_EXPANSION_VXLAN_GPE), 475 .type = RTE_FLOW_ITEM_TYPE_UDP, 476 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 477 }, 478 [MLX5_EXPANSION_OUTER_IPV6_TCP] = { 479 .type = RTE_FLOW_ITEM_TYPE_TCP, 480 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 481 }, 482 [MLX5_EXPANSION_VXLAN] = { 483 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 484 MLX5_EXPANSION_IPV4, 485 MLX5_EXPANSION_IPV6), 486 .type = RTE_FLOW_ITEM_TYPE_VXLAN, 487 }, 488 [MLX5_EXPANSION_VXLAN_GPE] = { 489 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 490 MLX5_EXPANSION_IPV4, 491 MLX5_EXPANSION_IPV6), 492 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE, 493 }, 494 [MLX5_EXPANSION_GRE] = { 495 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4), 496 .type = RTE_FLOW_ITEM_TYPE_GRE, 497 }, 498 [MLX5_EXPANSION_MPLS] = { 499 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 500 MLX5_EXPANSION_IPV6), 501 .type = RTE_FLOW_ITEM_TYPE_MPLS, 502 }, 503 [MLX5_EXPANSION_ETH] = { 504 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 505 MLX5_EXPANSION_IPV6), 506 .type = RTE_FLOW_ITEM_TYPE_ETH, 507 }, 508 [MLX5_EXPANSION_ETH_VLAN] = { 509 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN), 510 .type = RTE_FLOW_ITEM_TYPE_ETH, 511 }, 512 [MLX5_EXPANSION_VLAN] = { 513 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 514 MLX5_EXPANSION_IPV6), 515 .type = RTE_FLOW_ITEM_TYPE_VLAN, 516 }, 517 [MLX5_EXPANSION_IPV4] = { 518 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP, 519 MLX5_EXPANSION_IPV4_TCP), 520 .type = RTE_FLOW_ITEM_TYPE_IPV4, 521 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 522 ETH_RSS_NONFRAG_IPV4_OTHER, 523 }, 524 [MLX5_EXPANSION_IPV4_UDP] = { 525 .type = RTE_FLOW_ITEM_TYPE_UDP, 526 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 527 }, 528 [MLX5_EXPANSION_IPV4_TCP] = { 529 .type = RTE_FLOW_ITEM_TYPE_TCP, 530 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 531 }, 532 [MLX5_EXPANSION_IPV6] = { 533 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP, 534 MLX5_EXPANSION_IPV6_TCP), 535 .type = RTE_FLOW_ITEM_TYPE_IPV6, 536 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 537 ETH_RSS_NONFRAG_IPV6_OTHER, 538 }, 539 [MLX5_EXPANSION_IPV6_UDP] = { 540 .type = RTE_FLOW_ITEM_TYPE_UDP, 541 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 542 }, 543 [MLX5_EXPANSION_IPV6_TCP] = { 544 .type = RTE_FLOW_ITEM_TYPE_TCP, 545 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 546 }, 547 }; 548 549 static const struct rte_flow_ops mlx5_flow_ops = { 550 .validate = mlx5_flow_validate, 551 .create = mlx5_flow_create, 552 .destroy = mlx5_flow_destroy, 553 .flush = mlx5_flow_flush, 554 .isolate = mlx5_flow_isolate, 555 .query = mlx5_flow_query, 556 .dev_dump = mlx5_flow_dev_dump, 557 .get_aged_flows = mlx5_flow_get_aged_flows, 558 }; 559 560 /* Convert FDIR request to Generic flow. */ 561 struct mlx5_fdir { 562 struct rte_flow_attr attr; 563 struct rte_flow_item items[4]; 564 struct rte_flow_item_eth l2; 565 struct rte_flow_item_eth l2_mask; 566 union { 567 struct rte_flow_item_ipv4 ipv4; 568 struct rte_flow_item_ipv6 ipv6; 569 } l3; 570 union { 571 struct rte_flow_item_ipv4 ipv4; 572 struct rte_flow_item_ipv6 ipv6; 573 } l3_mask; 574 union { 575 struct rte_flow_item_udp udp; 576 struct rte_flow_item_tcp tcp; 577 } l4; 578 union { 579 struct rte_flow_item_udp udp; 580 struct rte_flow_item_tcp tcp; 581 } l4_mask; 582 struct rte_flow_action actions[2]; 583 struct rte_flow_action_queue queue; 584 }; 585 586 /* Tunnel information. */ 587 struct mlx5_flow_tunnel_info { 588 uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */ 589 uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */ 590 }; 591 592 static struct mlx5_flow_tunnel_info tunnels_info[] = { 593 { 594 .tunnel = MLX5_FLOW_LAYER_VXLAN, 595 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP, 596 }, 597 { 598 .tunnel = MLX5_FLOW_LAYER_GENEVE, 599 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP, 600 }, 601 { 602 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE, 603 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP, 604 }, 605 { 606 .tunnel = MLX5_FLOW_LAYER_GRE, 607 .ptype = RTE_PTYPE_TUNNEL_GRE, 608 }, 609 { 610 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP, 611 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP, 612 }, 613 { 614 .tunnel = MLX5_FLOW_LAYER_MPLS, 615 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE, 616 }, 617 { 618 .tunnel = MLX5_FLOW_LAYER_NVGRE, 619 .ptype = RTE_PTYPE_TUNNEL_NVGRE, 620 }, 621 { 622 .tunnel = MLX5_FLOW_LAYER_IPIP, 623 .ptype = RTE_PTYPE_TUNNEL_IP, 624 }, 625 { 626 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP, 627 .ptype = RTE_PTYPE_TUNNEL_IP, 628 }, 629 { 630 .tunnel = MLX5_FLOW_LAYER_GTP, 631 .ptype = RTE_PTYPE_TUNNEL_GTPU, 632 }, 633 }; 634 635 /** 636 * Translate tag ID to register. 637 * 638 * @param[in] dev 639 * Pointer to the Ethernet device structure. 640 * @param[in] feature 641 * The feature that request the register. 642 * @param[in] id 643 * The request register ID. 644 * @param[out] error 645 * Error description in case of any. 646 * 647 * @return 648 * The request register on success, a negative errno 649 * value otherwise and rte_errno is set. 650 */ 651 int 652 mlx5_flow_get_reg_id(struct rte_eth_dev *dev, 653 enum mlx5_feature_name feature, 654 uint32_t id, 655 struct rte_flow_error *error) 656 { 657 struct mlx5_priv *priv = dev->data->dev_private; 658 struct mlx5_dev_config *config = &priv->config; 659 enum modify_reg start_reg; 660 bool skip_mtr_reg = false; 661 662 switch (feature) { 663 case MLX5_HAIRPIN_RX: 664 return REG_B; 665 case MLX5_HAIRPIN_TX: 666 return REG_A; 667 case MLX5_METADATA_RX: 668 switch (config->dv_xmeta_en) { 669 case MLX5_XMETA_MODE_LEGACY: 670 return REG_B; 671 case MLX5_XMETA_MODE_META16: 672 return REG_C_0; 673 case MLX5_XMETA_MODE_META32: 674 return REG_C_1; 675 } 676 break; 677 case MLX5_METADATA_TX: 678 return REG_A; 679 case MLX5_METADATA_FDB: 680 switch (config->dv_xmeta_en) { 681 case MLX5_XMETA_MODE_LEGACY: 682 return REG_NON; 683 case MLX5_XMETA_MODE_META16: 684 return REG_C_0; 685 case MLX5_XMETA_MODE_META32: 686 return REG_C_1; 687 } 688 break; 689 case MLX5_FLOW_MARK: 690 switch (config->dv_xmeta_en) { 691 case MLX5_XMETA_MODE_LEGACY: 692 return REG_NON; 693 case MLX5_XMETA_MODE_META16: 694 return REG_C_1; 695 case MLX5_XMETA_MODE_META32: 696 return REG_C_0; 697 } 698 break; 699 case MLX5_MTR_SFX: 700 /* 701 * If meter color and flow match share one register, flow match 702 * should use the meter color register for match. 703 */ 704 if (priv->mtr_reg_share) 705 return priv->mtr_color_reg; 706 else 707 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 708 REG_C_3; 709 case MLX5_MTR_COLOR: 710 MLX5_ASSERT(priv->mtr_color_reg != REG_NON); 711 return priv->mtr_color_reg; 712 case MLX5_COPY_MARK: 713 /* 714 * Metadata COPY_MARK register using is in meter suffix sub 715 * flow while with meter. It's safe to share the same register. 716 */ 717 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3; 718 case MLX5_APP_TAG: 719 /* 720 * If meter is enable, it will engage the register for color 721 * match and flow match. If meter color match is not using the 722 * REG_C_2, need to skip the REG_C_x be used by meter color 723 * match. 724 * If meter is disable, free to use all available registers. 725 */ 726 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 727 (priv->mtr_reg_share ? REG_C_3 : REG_C_4); 728 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2); 729 if (id > (REG_C_7 - start_reg)) 730 return rte_flow_error_set(error, EINVAL, 731 RTE_FLOW_ERROR_TYPE_ITEM, 732 NULL, "invalid tag id"); 733 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON) 734 return rte_flow_error_set(error, ENOTSUP, 735 RTE_FLOW_ERROR_TYPE_ITEM, 736 NULL, "unsupported tag id"); 737 /* 738 * This case means meter is using the REG_C_x great than 2. 739 * Take care not to conflict with meter color REG_C_x. 740 * If the available index REG_C_y >= REG_C_x, skip the 741 * color register. 742 */ 743 if (skip_mtr_reg && config->flow_mreg_c 744 [id + start_reg - REG_C_0] >= priv->mtr_color_reg) { 745 if (id >= (REG_C_7 - start_reg)) 746 return rte_flow_error_set(error, EINVAL, 747 RTE_FLOW_ERROR_TYPE_ITEM, 748 NULL, "invalid tag id"); 749 if (config->flow_mreg_c 750 [id + 1 + start_reg - REG_C_0] != REG_NON) 751 return config->flow_mreg_c 752 [id + 1 + start_reg - REG_C_0]; 753 return rte_flow_error_set(error, ENOTSUP, 754 RTE_FLOW_ERROR_TYPE_ITEM, 755 NULL, "unsupported tag id"); 756 } 757 return config->flow_mreg_c[id + start_reg - REG_C_0]; 758 } 759 MLX5_ASSERT(false); 760 return rte_flow_error_set(error, EINVAL, 761 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 762 NULL, "invalid feature name"); 763 } 764 765 /** 766 * Check extensive flow metadata register support. 767 * 768 * @param dev 769 * Pointer to rte_eth_dev structure. 770 * 771 * @return 772 * True if device supports extensive flow metadata register, otherwise false. 773 */ 774 bool 775 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev) 776 { 777 struct mlx5_priv *priv = dev->data->dev_private; 778 struct mlx5_dev_config *config = &priv->config; 779 780 /* 781 * Having available reg_c can be regarded inclusively as supporting 782 * extensive flow metadata register, which could mean, 783 * - metadata register copy action by modify header. 784 * - 16 modify header actions is supported. 785 * - reg_c's are preserved across different domain (FDB and NIC) on 786 * packet loopback by flow lookup miss. 787 */ 788 return config->flow_mreg_c[2] != REG_NON; 789 } 790 791 /** 792 * Verify the @p item specifications (spec, last, mask) are compatible with the 793 * NIC capabilities. 794 * 795 * @param[in] item 796 * Item specification. 797 * @param[in] mask 798 * @p item->mask or flow default bit-masks. 799 * @param[in] nic_mask 800 * Bit-masks covering supported fields by the NIC to compare with user mask. 801 * @param[in] size 802 * Bit-masks size in bytes. 803 * @param[in] range_accepted 804 * True if range of values is accepted for specific fields, false otherwise. 805 * @param[out] error 806 * Pointer to error structure. 807 * 808 * @return 809 * 0 on success, a negative errno value otherwise and rte_errno is set. 810 */ 811 int 812 mlx5_flow_item_acceptable(const struct rte_flow_item *item, 813 const uint8_t *mask, 814 const uint8_t *nic_mask, 815 unsigned int size, 816 bool range_accepted, 817 struct rte_flow_error *error) 818 { 819 unsigned int i; 820 821 MLX5_ASSERT(nic_mask); 822 for (i = 0; i < size; ++i) 823 if ((nic_mask[i] | mask[i]) != nic_mask[i]) 824 return rte_flow_error_set(error, ENOTSUP, 825 RTE_FLOW_ERROR_TYPE_ITEM, 826 item, 827 "mask enables non supported" 828 " bits"); 829 if (!item->spec && (item->mask || item->last)) 830 return rte_flow_error_set(error, EINVAL, 831 RTE_FLOW_ERROR_TYPE_ITEM, item, 832 "mask/last without a spec is not" 833 " supported"); 834 if (item->spec && item->last && !range_accepted) { 835 uint8_t spec[size]; 836 uint8_t last[size]; 837 unsigned int i; 838 int ret; 839 840 for (i = 0; i < size; ++i) { 841 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i]; 842 last[i] = ((const uint8_t *)item->last)[i] & mask[i]; 843 } 844 ret = memcmp(spec, last, size); 845 if (ret != 0) 846 return rte_flow_error_set(error, EINVAL, 847 RTE_FLOW_ERROR_TYPE_ITEM, 848 item, 849 "range is not valid"); 850 } 851 return 0; 852 } 853 854 /** 855 * Adjust the hash fields according to the @p flow information. 856 * 857 * @param[in] dev_flow. 858 * Pointer to the mlx5_flow. 859 * @param[in] tunnel 860 * 1 when the hash field is for a tunnel item. 861 * @param[in] layer_types 862 * ETH_RSS_* types. 863 * @param[in] hash_fields 864 * Item hash fields. 865 * 866 * @return 867 * The hash fields that should be used. 868 */ 869 uint64_t 870 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc, 871 int tunnel __rte_unused, uint64_t layer_types, 872 uint64_t hash_fields) 873 { 874 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 875 int rss_request_inner = rss_desc->level >= 2; 876 877 /* Check RSS hash level for tunnel. */ 878 if (tunnel && rss_request_inner) 879 hash_fields |= IBV_RX_HASH_INNER; 880 else if (tunnel || rss_request_inner) 881 return 0; 882 #endif 883 /* Check if requested layer matches RSS hash fields. */ 884 if (!(rss_desc->types & layer_types)) 885 return 0; 886 return hash_fields; 887 } 888 889 /** 890 * Lookup and set the ptype in the data Rx part. A single Ptype can be used, 891 * if several tunnel rules are used on this queue, the tunnel ptype will be 892 * cleared. 893 * 894 * @param rxq_ctrl 895 * Rx queue to update. 896 */ 897 static void 898 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl) 899 { 900 unsigned int i; 901 uint32_t tunnel_ptype = 0; 902 903 /* Look up for the ptype to use. */ 904 for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) { 905 if (!rxq_ctrl->flow_tunnels_n[i]) 906 continue; 907 if (!tunnel_ptype) { 908 tunnel_ptype = tunnels_info[i].ptype; 909 } else { 910 tunnel_ptype = 0; 911 break; 912 } 913 } 914 rxq_ctrl->rxq.tunnel = tunnel_ptype; 915 } 916 917 /** 918 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive 919 * flow. 920 * 921 * @param[in] dev 922 * Pointer to the Ethernet device structure. 923 * @param[in] dev_handle 924 * Pointer to device flow handle structure. 925 */ 926 static void 927 flow_drv_rxq_flags_set(struct rte_eth_dev *dev, 928 struct mlx5_flow_handle *dev_handle) 929 { 930 struct mlx5_priv *priv = dev->data->dev_private; 931 const int mark = dev_handle->mark; 932 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 933 struct mlx5_hrxq *hrxq; 934 unsigned int i; 935 936 if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) 937 return; 938 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 939 dev_handle->rix_hrxq); 940 if (!hrxq) 941 return; 942 for (i = 0; i != hrxq->ind_table->queues_n; ++i) { 943 int idx = hrxq->ind_table->queues[i]; 944 struct mlx5_rxq_ctrl *rxq_ctrl = 945 container_of((*priv->rxqs)[idx], 946 struct mlx5_rxq_ctrl, rxq); 947 948 /* 949 * To support metadata register copy on Tx loopback, 950 * this must be always enabled (metadata may arive 951 * from other port - not from local flows only. 952 */ 953 if (priv->config.dv_flow_en && 954 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 955 mlx5_flow_ext_mreg_supported(dev)) { 956 rxq_ctrl->rxq.mark = 1; 957 rxq_ctrl->flow_mark_n = 1; 958 } else if (mark) { 959 rxq_ctrl->rxq.mark = 1; 960 rxq_ctrl->flow_mark_n++; 961 } 962 if (tunnel) { 963 unsigned int j; 964 965 /* Increase the counter matching the flow. */ 966 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 967 if ((tunnels_info[j].tunnel & 968 dev_handle->layers) == 969 tunnels_info[j].tunnel) { 970 rxq_ctrl->flow_tunnels_n[j]++; 971 break; 972 } 973 } 974 flow_rxq_tunnel_ptype_update(rxq_ctrl); 975 } 976 } 977 } 978 979 /** 980 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow 981 * 982 * @param[in] dev 983 * Pointer to the Ethernet device structure. 984 * @param[in] flow 985 * Pointer to flow structure. 986 */ 987 static void 988 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow) 989 { 990 struct mlx5_priv *priv = dev->data->dev_private; 991 uint32_t handle_idx; 992 struct mlx5_flow_handle *dev_handle; 993 994 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 995 handle_idx, dev_handle, next) 996 flow_drv_rxq_flags_set(dev, dev_handle); 997 } 998 999 /** 1000 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 1001 * device flow if no other flow uses it with the same kind of request. 1002 * 1003 * @param dev 1004 * Pointer to Ethernet device. 1005 * @param[in] dev_handle 1006 * Pointer to the device flow handle structure. 1007 */ 1008 static void 1009 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, 1010 struct mlx5_flow_handle *dev_handle) 1011 { 1012 struct mlx5_priv *priv = dev->data->dev_private; 1013 const int mark = dev_handle->mark; 1014 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 1015 struct mlx5_hrxq *hrxq; 1016 unsigned int i; 1017 1018 if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) 1019 return; 1020 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 1021 dev_handle->rix_hrxq); 1022 if (!hrxq) 1023 return; 1024 MLX5_ASSERT(dev->data->dev_started); 1025 for (i = 0; i != hrxq->ind_table->queues_n; ++i) { 1026 int idx = hrxq->ind_table->queues[i]; 1027 struct mlx5_rxq_ctrl *rxq_ctrl = 1028 container_of((*priv->rxqs)[idx], 1029 struct mlx5_rxq_ctrl, rxq); 1030 1031 if (priv->config.dv_flow_en && 1032 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 1033 mlx5_flow_ext_mreg_supported(dev)) { 1034 rxq_ctrl->rxq.mark = 1; 1035 rxq_ctrl->flow_mark_n = 1; 1036 } else if (mark) { 1037 rxq_ctrl->flow_mark_n--; 1038 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n; 1039 } 1040 if (tunnel) { 1041 unsigned int j; 1042 1043 /* Decrease the counter matching the flow. */ 1044 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 1045 if ((tunnels_info[j].tunnel & 1046 dev_handle->layers) == 1047 tunnels_info[j].tunnel) { 1048 rxq_ctrl->flow_tunnels_n[j]--; 1049 break; 1050 } 1051 } 1052 flow_rxq_tunnel_ptype_update(rxq_ctrl); 1053 } 1054 } 1055 } 1056 1057 /** 1058 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 1059 * @p flow if no other flow uses it with the same kind of request. 1060 * 1061 * @param dev 1062 * Pointer to Ethernet device. 1063 * @param[in] flow 1064 * Pointer to the flow. 1065 */ 1066 static void 1067 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow) 1068 { 1069 struct mlx5_priv *priv = dev->data->dev_private; 1070 uint32_t handle_idx; 1071 struct mlx5_flow_handle *dev_handle; 1072 1073 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 1074 handle_idx, dev_handle, next) 1075 flow_drv_rxq_flags_trim(dev, dev_handle); 1076 } 1077 1078 /** 1079 * Clear the Mark/Flag and Tunnel ptype information in all Rx queues. 1080 * 1081 * @param dev 1082 * Pointer to Ethernet device. 1083 */ 1084 static void 1085 flow_rxq_flags_clear(struct rte_eth_dev *dev) 1086 { 1087 struct mlx5_priv *priv = dev->data->dev_private; 1088 unsigned int i; 1089 1090 for (i = 0; i != priv->rxqs_n; ++i) { 1091 struct mlx5_rxq_ctrl *rxq_ctrl; 1092 unsigned int j; 1093 1094 if (!(*priv->rxqs)[i]) 1095 continue; 1096 rxq_ctrl = container_of((*priv->rxqs)[i], 1097 struct mlx5_rxq_ctrl, rxq); 1098 rxq_ctrl->flow_mark_n = 0; 1099 rxq_ctrl->rxq.mark = 0; 1100 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) 1101 rxq_ctrl->flow_tunnels_n[j] = 0; 1102 rxq_ctrl->rxq.tunnel = 0; 1103 } 1104 } 1105 1106 /** 1107 * Set the Rx queue dynamic metadata (mask and offset) for a flow 1108 * 1109 * @param[in] dev 1110 * Pointer to the Ethernet device structure. 1111 */ 1112 void 1113 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev) 1114 { 1115 struct mlx5_priv *priv = dev->data->dev_private; 1116 struct mlx5_rxq_data *data; 1117 unsigned int i; 1118 1119 for (i = 0; i != priv->rxqs_n; ++i) { 1120 if (!(*priv->rxqs)[i]) 1121 continue; 1122 data = (*priv->rxqs)[i]; 1123 if (!rte_flow_dynf_metadata_avail()) { 1124 data->dynf_meta = 0; 1125 data->flow_meta_mask = 0; 1126 data->flow_meta_offset = -1; 1127 } else { 1128 data->dynf_meta = 1; 1129 data->flow_meta_mask = rte_flow_dynf_metadata_mask; 1130 data->flow_meta_offset = rte_flow_dynf_metadata_offs; 1131 } 1132 } 1133 } 1134 1135 /* 1136 * return a pointer to the desired action in the list of actions. 1137 * 1138 * @param[in] actions 1139 * The list of actions to search the action in. 1140 * @param[in] action 1141 * The action to find. 1142 * 1143 * @return 1144 * Pointer to the action in the list, if found. NULL otherwise. 1145 */ 1146 const struct rte_flow_action * 1147 mlx5_flow_find_action(const struct rte_flow_action *actions, 1148 enum rte_flow_action_type action) 1149 { 1150 if (actions == NULL) 1151 return NULL; 1152 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) 1153 if (actions->type == action) 1154 return actions; 1155 return NULL; 1156 } 1157 1158 /* 1159 * Validate the flag action. 1160 * 1161 * @param[in] action_flags 1162 * Bit-fields that holds the actions detected until now. 1163 * @param[in] attr 1164 * Attributes of flow that includes this action. 1165 * @param[out] error 1166 * Pointer to error structure. 1167 * 1168 * @return 1169 * 0 on success, a negative errno value otherwise and rte_errno is set. 1170 */ 1171 int 1172 mlx5_flow_validate_action_flag(uint64_t action_flags, 1173 const struct rte_flow_attr *attr, 1174 struct rte_flow_error *error) 1175 { 1176 if (action_flags & MLX5_FLOW_ACTION_MARK) 1177 return rte_flow_error_set(error, EINVAL, 1178 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1179 "can't mark and flag in same flow"); 1180 if (action_flags & MLX5_FLOW_ACTION_FLAG) 1181 return rte_flow_error_set(error, EINVAL, 1182 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1183 "can't have 2 flag" 1184 " actions in same flow"); 1185 if (attr->egress) 1186 return rte_flow_error_set(error, ENOTSUP, 1187 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1188 "flag action not supported for " 1189 "egress"); 1190 return 0; 1191 } 1192 1193 /* 1194 * Validate the mark action. 1195 * 1196 * @param[in] action 1197 * Pointer to the queue action. 1198 * @param[in] action_flags 1199 * Bit-fields that holds the actions detected until now. 1200 * @param[in] attr 1201 * Attributes of flow that includes this action. 1202 * @param[out] error 1203 * Pointer to error structure. 1204 * 1205 * @return 1206 * 0 on success, a negative errno value otherwise and rte_errno is set. 1207 */ 1208 int 1209 mlx5_flow_validate_action_mark(const struct rte_flow_action *action, 1210 uint64_t action_flags, 1211 const struct rte_flow_attr *attr, 1212 struct rte_flow_error *error) 1213 { 1214 const struct rte_flow_action_mark *mark = action->conf; 1215 1216 if (!mark) 1217 return rte_flow_error_set(error, EINVAL, 1218 RTE_FLOW_ERROR_TYPE_ACTION, 1219 action, 1220 "configuration cannot be null"); 1221 if (mark->id >= MLX5_FLOW_MARK_MAX) 1222 return rte_flow_error_set(error, EINVAL, 1223 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1224 &mark->id, 1225 "mark id must in 0 <= id < " 1226 RTE_STR(MLX5_FLOW_MARK_MAX)); 1227 if (action_flags & MLX5_FLOW_ACTION_FLAG) 1228 return rte_flow_error_set(error, EINVAL, 1229 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1230 "can't flag and mark in same flow"); 1231 if (action_flags & MLX5_FLOW_ACTION_MARK) 1232 return rte_flow_error_set(error, EINVAL, 1233 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1234 "can't have 2 mark actions in same" 1235 " flow"); 1236 if (attr->egress) 1237 return rte_flow_error_set(error, ENOTSUP, 1238 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1239 "mark action not supported for " 1240 "egress"); 1241 return 0; 1242 } 1243 1244 /* 1245 * Validate the drop action. 1246 * 1247 * @param[in] action_flags 1248 * Bit-fields that holds the actions detected until now. 1249 * @param[in] attr 1250 * Attributes of flow that includes this action. 1251 * @param[out] error 1252 * Pointer to error structure. 1253 * 1254 * @return 1255 * 0 on success, a negative errno value otherwise and rte_errno is set. 1256 */ 1257 int 1258 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused, 1259 const struct rte_flow_attr *attr, 1260 struct rte_flow_error *error) 1261 { 1262 if (attr->egress) 1263 return rte_flow_error_set(error, ENOTSUP, 1264 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1265 "drop action not supported for " 1266 "egress"); 1267 return 0; 1268 } 1269 1270 /* 1271 * Validate the queue action. 1272 * 1273 * @param[in] action 1274 * Pointer to the queue action. 1275 * @param[in] action_flags 1276 * Bit-fields that holds the actions detected until now. 1277 * @param[in] dev 1278 * Pointer to the Ethernet device structure. 1279 * @param[in] attr 1280 * Attributes of flow that includes this action. 1281 * @param[out] error 1282 * Pointer to error structure. 1283 * 1284 * @return 1285 * 0 on success, a negative errno value otherwise and rte_errno is set. 1286 */ 1287 int 1288 mlx5_flow_validate_action_queue(const struct rte_flow_action *action, 1289 uint64_t action_flags, 1290 struct rte_eth_dev *dev, 1291 const struct rte_flow_attr *attr, 1292 struct rte_flow_error *error) 1293 { 1294 struct mlx5_priv *priv = dev->data->dev_private; 1295 const struct rte_flow_action_queue *queue = action->conf; 1296 1297 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1298 return rte_flow_error_set(error, EINVAL, 1299 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1300 "can't have 2 fate actions in" 1301 " same flow"); 1302 if (!priv->rxqs_n) 1303 return rte_flow_error_set(error, EINVAL, 1304 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1305 NULL, "No Rx queues configured"); 1306 if (queue->index >= priv->rxqs_n) 1307 return rte_flow_error_set(error, EINVAL, 1308 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1309 &queue->index, 1310 "queue index out of range"); 1311 if (!(*priv->rxqs)[queue->index]) 1312 return rte_flow_error_set(error, EINVAL, 1313 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1314 &queue->index, 1315 "queue is not configured"); 1316 if (attr->egress) 1317 return rte_flow_error_set(error, ENOTSUP, 1318 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1319 "queue action not supported for " 1320 "egress"); 1321 return 0; 1322 } 1323 1324 /* 1325 * Validate the rss action. 1326 * 1327 * @param[in] action 1328 * Pointer to the queue action. 1329 * @param[in] action_flags 1330 * Bit-fields that holds the actions detected until now. 1331 * @param[in] dev 1332 * Pointer to the Ethernet device structure. 1333 * @param[in] attr 1334 * Attributes of flow that includes this action. 1335 * @param[in] item_flags 1336 * Items that were detected. 1337 * @param[out] error 1338 * Pointer to error structure. 1339 * 1340 * @return 1341 * 0 on success, a negative errno value otherwise and rte_errno is set. 1342 */ 1343 int 1344 mlx5_flow_validate_action_rss(const struct rte_flow_action *action, 1345 uint64_t action_flags, 1346 struct rte_eth_dev *dev, 1347 const struct rte_flow_attr *attr, 1348 uint64_t item_flags, 1349 struct rte_flow_error *error) 1350 { 1351 struct mlx5_priv *priv = dev->data->dev_private; 1352 const struct rte_flow_action_rss *rss = action->conf; 1353 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1354 unsigned int i; 1355 1356 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1357 return rte_flow_error_set(error, EINVAL, 1358 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1359 "can't have 2 fate actions" 1360 " in same flow"); 1361 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT && 1362 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) 1363 return rte_flow_error_set(error, ENOTSUP, 1364 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1365 &rss->func, 1366 "RSS hash function not supported"); 1367 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 1368 if (rss->level > 2) 1369 #else 1370 if (rss->level > 1) 1371 #endif 1372 return rte_flow_error_set(error, ENOTSUP, 1373 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1374 &rss->level, 1375 "tunnel RSS is not supported"); 1376 /* allow RSS key_len 0 in case of NULL (default) RSS key. */ 1377 if (rss->key_len == 0 && rss->key != NULL) 1378 return rte_flow_error_set(error, ENOTSUP, 1379 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1380 &rss->key_len, 1381 "RSS hash key length 0"); 1382 if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN) 1383 return rte_flow_error_set(error, ENOTSUP, 1384 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1385 &rss->key_len, 1386 "RSS hash key too small"); 1387 if (rss->key_len > MLX5_RSS_HASH_KEY_LEN) 1388 return rte_flow_error_set(error, ENOTSUP, 1389 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1390 &rss->key_len, 1391 "RSS hash key too large"); 1392 if (rss->queue_num > priv->config.ind_table_max_size) 1393 return rte_flow_error_set(error, ENOTSUP, 1394 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1395 &rss->queue_num, 1396 "number of queues too large"); 1397 if (rss->types & MLX5_RSS_HF_MASK) 1398 return rte_flow_error_set(error, ENOTSUP, 1399 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1400 &rss->types, 1401 "some RSS protocols are not" 1402 " supported"); 1403 if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) && 1404 !(rss->types & ETH_RSS_IP)) 1405 return rte_flow_error_set(error, EINVAL, 1406 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1407 "L3 partial RSS requested but L3 RSS" 1408 " type not specified"); 1409 if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) && 1410 !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP))) 1411 return rte_flow_error_set(error, EINVAL, 1412 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1413 "L4 partial RSS requested but L4 RSS" 1414 " type not specified"); 1415 if (!priv->rxqs_n) 1416 return rte_flow_error_set(error, EINVAL, 1417 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1418 NULL, "No Rx queues configured"); 1419 if (!rss->queue_num) 1420 return rte_flow_error_set(error, EINVAL, 1421 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1422 NULL, "No queues configured"); 1423 for (i = 0; i != rss->queue_num; ++i) { 1424 if (rss->queue[i] >= priv->rxqs_n) 1425 return rte_flow_error_set 1426 (error, EINVAL, 1427 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1428 &rss->queue[i], "queue index out of range"); 1429 if (!(*priv->rxqs)[rss->queue[i]]) 1430 return rte_flow_error_set 1431 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1432 &rss->queue[i], "queue is not configured"); 1433 } 1434 if (attr->egress) 1435 return rte_flow_error_set(error, ENOTSUP, 1436 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1437 "rss action not supported for " 1438 "egress"); 1439 if (rss->level > 1 && !tunnel) 1440 return rte_flow_error_set(error, EINVAL, 1441 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1442 "inner RSS is not supported for " 1443 "non-tunnel flows"); 1444 if ((item_flags & MLX5_FLOW_LAYER_ECPRI) && 1445 !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) { 1446 return rte_flow_error_set(error, EINVAL, 1447 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1448 "RSS on eCPRI is not supported now"); 1449 } 1450 return 0; 1451 } 1452 1453 /* 1454 * Validate the default miss action. 1455 * 1456 * @param[in] action_flags 1457 * Bit-fields that holds the actions detected until now. 1458 * @param[out] error 1459 * Pointer to error structure. 1460 * 1461 * @return 1462 * 0 on success, a negative errno value otherwise and rte_errno is set. 1463 */ 1464 int 1465 mlx5_flow_validate_action_default_miss(uint64_t action_flags, 1466 const struct rte_flow_attr *attr, 1467 struct rte_flow_error *error) 1468 { 1469 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1470 return rte_flow_error_set(error, EINVAL, 1471 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1472 "can't have 2 fate actions in" 1473 " same flow"); 1474 if (attr->egress) 1475 return rte_flow_error_set(error, ENOTSUP, 1476 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1477 "default miss action not supported " 1478 "for egress"); 1479 if (attr->group) 1480 return rte_flow_error_set(error, ENOTSUP, 1481 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL, 1482 "only group 0 is supported"); 1483 if (attr->transfer) 1484 return rte_flow_error_set(error, ENOTSUP, 1485 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1486 NULL, "transfer is not supported"); 1487 return 0; 1488 } 1489 1490 /* 1491 * Validate the count action. 1492 * 1493 * @param[in] dev 1494 * Pointer to the Ethernet device structure. 1495 * @param[in] attr 1496 * Attributes of flow that includes this action. 1497 * @param[out] error 1498 * Pointer to error structure. 1499 * 1500 * @return 1501 * 0 on success, a negative errno value otherwise and rte_errno is set. 1502 */ 1503 int 1504 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused, 1505 const struct rte_flow_attr *attr, 1506 struct rte_flow_error *error) 1507 { 1508 if (attr->egress) 1509 return rte_flow_error_set(error, ENOTSUP, 1510 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1511 "count action not supported for " 1512 "egress"); 1513 return 0; 1514 } 1515 1516 /** 1517 * Verify the @p attributes will be correctly understood by the NIC and store 1518 * them in the @p flow if everything is correct. 1519 * 1520 * @param[in] dev 1521 * Pointer to the Ethernet device structure. 1522 * @param[in] attributes 1523 * Pointer to flow attributes 1524 * @param[out] error 1525 * Pointer to error structure. 1526 * 1527 * @return 1528 * 0 on success, a negative errno value otherwise and rte_errno is set. 1529 */ 1530 int 1531 mlx5_flow_validate_attributes(struct rte_eth_dev *dev, 1532 const struct rte_flow_attr *attributes, 1533 struct rte_flow_error *error) 1534 { 1535 struct mlx5_priv *priv = dev->data->dev_private; 1536 uint32_t priority_max = priv->config.flow_prio - 1; 1537 1538 if (attributes->group) 1539 return rte_flow_error_set(error, ENOTSUP, 1540 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 1541 NULL, "groups is not supported"); 1542 if (attributes->priority != MLX5_FLOW_PRIO_RSVD && 1543 attributes->priority >= priority_max) 1544 return rte_flow_error_set(error, ENOTSUP, 1545 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 1546 NULL, "priority out of range"); 1547 if (attributes->egress) 1548 return rte_flow_error_set(error, ENOTSUP, 1549 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1550 "egress is not supported"); 1551 if (attributes->transfer && !priv->config.dv_esw_en) 1552 return rte_flow_error_set(error, ENOTSUP, 1553 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1554 NULL, "transfer is not supported"); 1555 if (!attributes->ingress) 1556 return rte_flow_error_set(error, EINVAL, 1557 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, 1558 NULL, 1559 "ingress attribute is mandatory"); 1560 return 0; 1561 } 1562 1563 /** 1564 * Validate ICMP6 item. 1565 * 1566 * @param[in] item 1567 * Item specification. 1568 * @param[in] item_flags 1569 * Bit-fields that holds the items detected until now. 1570 * @param[out] error 1571 * Pointer to error structure. 1572 * 1573 * @return 1574 * 0 on success, a negative errno value otherwise and rte_errno is set. 1575 */ 1576 int 1577 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item, 1578 uint64_t item_flags, 1579 uint8_t target_protocol, 1580 struct rte_flow_error *error) 1581 { 1582 const struct rte_flow_item_icmp6 *mask = item->mask; 1583 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1584 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : 1585 MLX5_FLOW_LAYER_OUTER_L3_IPV6; 1586 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1587 MLX5_FLOW_LAYER_OUTER_L4; 1588 int ret; 1589 1590 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6) 1591 return rte_flow_error_set(error, EINVAL, 1592 RTE_FLOW_ERROR_TYPE_ITEM, item, 1593 "protocol filtering not compatible" 1594 " with ICMP6 layer"); 1595 if (!(item_flags & l3m)) 1596 return rte_flow_error_set(error, EINVAL, 1597 RTE_FLOW_ERROR_TYPE_ITEM, item, 1598 "IPv6 is mandatory to filter on" 1599 " ICMP6"); 1600 if (item_flags & l4m) 1601 return rte_flow_error_set(error, EINVAL, 1602 RTE_FLOW_ERROR_TYPE_ITEM, item, 1603 "multiple L4 layers not supported"); 1604 if (!mask) 1605 mask = &rte_flow_item_icmp6_mask; 1606 ret = mlx5_flow_item_acceptable 1607 (item, (const uint8_t *)mask, 1608 (const uint8_t *)&rte_flow_item_icmp6_mask, 1609 sizeof(struct rte_flow_item_icmp6), 1610 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 1611 if (ret < 0) 1612 return ret; 1613 return 0; 1614 } 1615 1616 /** 1617 * Validate ICMP item. 1618 * 1619 * @param[in] item 1620 * Item specification. 1621 * @param[in] item_flags 1622 * Bit-fields that holds the items detected until now. 1623 * @param[out] error 1624 * Pointer to error structure. 1625 * 1626 * @return 1627 * 0 on success, a negative errno value otherwise and rte_errno is set. 1628 */ 1629 int 1630 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item, 1631 uint64_t item_flags, 1632 uint8_t target_protocol, 1633 struct rte_flow_error *error) 1634 { 1635 const struct rte_flow_item_icmp *mask = item->mask; 1636 const struct rte_flow_item_icmp nic_mask = { 1637 .hdr.icmp_type = 0xff, 1638 .hdr.icmp_code = 0xff, 1639 .hdr.icmp_ident = RTE_BE16(0xffff), 1640 .hdr.icmp_seq_nb = RTE_BE16(0xffff), 1641 }; 1642 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1643 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : 1644 MLX5_FLOW_LAYER_OUTER_L3_IPV4; 1645 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1646 MLX5_FLOW_LAYER_OUTER_L4; 1647 int ret; 1648 1649 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP) 1650 return rte_flow_error_set(error, EINVAL, 1651 RTE_FLOW_ERROR_TYPE_ITEM, item, 1652 "protocol filtering not compatible" 1653 " with ICMP layer"); 1654 if (!(item_flags & l3m)) 1655 return rte_flow_error_set(error, EINVAL, 1656 RTE_FLOW_ERROR_TYPE_ITEM, item, 1657 "IPv4 is mandatory to filter" 1658 " on ICMP"); 1659 if (item_flags & l4m) 1660 return rte_flow_error_set(error, EINVAL, 1661 RTE_FLOW_ERROR_TYPE_ITEM, item, 1662 "multiple L4 layers not supported"); 1663 if (!mask) 1664 mask = &nic_mask; 1665 ret = mlx5_flow_item_acceptable 1666 (item, (const uint8_t *)mask, 1667 (const uint8_t *)&nic_mask, 1668 sizeof(struct rte_flow_item_icmp), 1669 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 1670 if (ret < 0) 1671 return ret; 1672 return 0; 1673 } 1674 1675 /** 1676 * Validate Ethernet item. 1677 * 1678 * @param[in] item 1679 * Item specification. 1680 * @param[in] item_flags 1681 * Bit-fields that holds the items detected until now. 1682 * @param[out] error 1683 * Pointer to error structure. 1684 * 1685 * @return 1686 * 0 on success, a negative errno value otherwise and rte_errno is set. 1687 */ 1688 int 1689 mlx5_flow_validate_item_eth(const struct rte_flow_item *item, 1690 uint64_t item_flags, 1691 struct rte_flow_error *error) 1692 { 1693 const struct rte_flow_item_eth *mask = item->mask; 1694 const struct rte_flow_item_eth nic_mask = { 1695 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1696 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1697 .type = RTE_BE16(0xffff), 1698 }; 1699 int ret; 1700 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1701 const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 1702 MLX5_FLOW_LAYER_OUTER_L2; 1703 1704 if (item_flags & ethm) 1705 return rte_flow_error_set(error, ENOTSUP, 1706 RTE_FLOW_ERROR_TYPE_ITEM, item, 1707 "multiple L2 layers not supported"); 1708 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) || 1709 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3))) 1710 return rte_flow_error_set(error, EINVAL, 1711 RTE_FLOW_ERROR_TYPE_ITEM, item, 1712 "L2 layer should not follow " 1713 "L3 layers"); 1714 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) || 1715 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN))) 1716 return rte_flow_error_set(error, EINVAL, 1717 RTE_FLOW_ERROR_TYPE_ITEM, item, 1718 "L2 layer should not follow VLAN"); 1719 if (!mask) 1720 mask = &rte_flow_item_eth_mask; 1721 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1722 (const uint8_t *)&nic_mask, 1723 sizeof(struct rte_flow_item_eth), 1724 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 1725 return ret; 1726 } 1727 1728 /** 1729 * Validate VLAN item. 1730 * 1731 * @param[in] item 1732 * Item specification. 1733 * @param[in] item_flags 1734 * Bit-fields that holds the items detected until now. 1735 * @param[in] dev 1736 * Ethernet device flow is being created on. 1737 * @param[out] error 1738 * Pointer to error structure. 1739 * 1740 * @return 1741 * 0 on success, a negative errno value otherwise and rte_errno is set. 1742 */ 1743 int 1744 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item, 1745 uint64_t item_flags, 1746 struct rte_eth_dev *dev, 1747 struct rte_flow_error *error) 1748 { 1749 const struct rte_flow_item_vlan *spec = item->spec; 1750 const struct rte_flow_item_vlan *mask = item->mask; 1751 const struct rte_flow_item_vlan nic_mask = { 1752 .tci = RTE_BE16(UINT16_MAX), 1753 .inner_type = RTE_BE16(UINT16_MAX), 1754 }; 1755 uint16_t vlan_tag = 0; 1756 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1757 int ret; 1758 const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | 1759 MLX5_FLOW_LAYER_INNER_L4) : 1760 (MLX5_FLOW_LAYER_OUTER_L3 | 1761 MLX5_FLOW_LAYER_OUTER_L4); 1762 const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : 1763 MLX5_FLOW_LAYER_OUTER_VLAN; 1764 1765 if (item_flags & vlanm) 1766 return rte_flow_error_set(error, EINVAL, 1767 RTE_FLOW_ERROR_TYPE_ITEM, item, 1768 "multiple VLAN layers not supported"); 1769 else if ((item_flags & l34m) != 0) 1770 return rte_flow_error_set(error, EINVAL, 1771 RTE_FLOW_ERROR_TYPE_ITEM, item, 1772 "VLAN cannot follow L3/L4 layer"); 1773 if (!mask) 1774 mask = &rte_flow_item_vlan_mask; 1775 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1776 (const uint8_t *)&nic_mask, 1777 sizeof(struct rte_flow_item_vlan), 1778 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 1779 if (ret) 1780 return ret; 1781 if (!tunnel && mask->tci != RTE_BE16(0x0fff)) { 1782 struct mlx5_priv *priv = dev->data->dev_private; 1783 1784 if (priv->vmwa_context) { 1785 /* 1786 * Non-NULL context means we have a virtual machine 1787 * and SR-IOV enabled, we have to create VLAN interface 1788 * to make hypervisor to setup E-Switch vport 1789 * context correctly. We avoid creating the multiple 1790 * VLAN interfaces, so we cannot support VLAN tag mask. 1791 */ 1792 return rte_flow_error_set(error, EINVAL, 1793 RTE_FLOW_ERROR_TYPE_ITEM, 1794 item, 1795 "VLAN tag mask is not" 1796 " supported in virtual" 1797 " environment"); 1798 } 1799 } 1800 if (spec) { 1801 vlan_tag = spec->tci; 1802 vlan_tag &= mask->tci; 1803 } 1804 /* 1805 * From verbs perspective an empty VLAN is equivalent 1806 * to a packet without VLAN layer. 1807 */ 1808 if (!vlan_tag) 1809 return rte_flow_error_set(error, EINVAL, 1810 RTE_FLOW_ERROR_TYPE_ITEM_SPEC, 1811 item->spec, 1812 "VLAN cannot be empty"); 1813 return 0; 1814 } 1815 1816 /** 1817 * Validate IPV4 item. 1818 * 1819 * @param[in] item 1820 * Item specification. 1821 * @param[in] item_flags 1822 * Bit-fields that holds the items detected until now. 1823 * @param[in] last_item 1824 * Previous validated item in the pattern items. 1825 * @param[in] ether_type 1826 * Type in the ethernet layer header (including dot1q). 1827 * @param[in] acc_mask 1828 * Acceptable mask, if NULL default internal default mask 1829 * will be used to check whether item fields are supported. 1830 * @param[in] range_accepted 1831 * True if range of values is accepted for specific fields, false otherwise. 1832 * @param[out] error 1833 * Pointer to error structure. 1834 * 1835 * @return 1836 * 0 on success, a negative errno value otherwise and rte_errno is set. 1837 */ 1838 int 1839 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, 1840 uint64_t item_flags, 1841 uint64_t last_item, 1842 uint16_t ether_type, 1843 const struct rte_flow_item_ipv4 *acc_mask, 1844 bool range_accepted, 1845 struct rte_flow_error *error) 1846 { 1847 const struct rte_flow_item_ipv4 *mask = item->mask; 1848 const struct rte_flow_item_ipv4 *spec = item->spec; 1849 const struct rte_flow_item_ipv4 nic_mask = { 1850 .hdr = { 1851 .src_addr = RTE_BE32(0xffffffff), 1852 .dst_addr = RTE_BE32(0xffffffff), 1853 .type_of_service = 0xff, 1854 .next_proto_id = 0xff, 1855 }, 1856 }; 1857 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1858 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1859 MLX5_FLOW_LAYER_OUTER_L3; 1860 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1861 MLX5_FLOW_LAYER_OUTER_L4; 1862 int ret; 1863 uint8_t next_proto = 0xFF; 1864 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 1865 MLX5_FLOW_LAYER_OUTER_VLAN | 1866 MLX5_FLOW_LAYER_INNER_VLAN); 1867 1868 if ((last_item & l2_vlan) && ether_type && 1869 ether_type != RTE_ETHER_TYPE_IPV4) 1870 return rte_flow_error_set(error, EINVAL, 1871 RTE_FLOW_ERROR_TYPE_ITEM, item, 1872 "IPv4 cannot follow L2/VLAN layer " 1873 "which ether type is not IPv4"); 1874 if (item_flags & MLX5_FLOW_LAYER_IPIP) { 1875 if (mask && spec) 1876 next_proto = mask->hdr.next_proto_id & 1877 spec->hdr.next_proto_id; 1878 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 1879 return rte_flow_error_set(error, EINVAL, 1880 RTE_FLOW_ERROR_TYPE_ITEM, 1881 item, 1882 "multiple tunnel " 1883 "not supported"); 1884 } 1885 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) 1886 return rte_flow_error_set(error, EINVAL, 1887 RTE_FLOW_ERROR_TYPE_ITEM, item, 1888 "wrong tunnel type - IPv6 specified " 1889 "but IPv4 item provided"); 1890 if (item_flags & l3m) 1891 return rte_flow_error_set(error, ENOTSUP, 1892 RTE_FLOW_ERROR_TYPE_ITEM, item, 1893 "multiple L3 layers not supported"); 1894 else if (item_flags & l4m) 1895 return rte_flow_error_set(error, EINVAL, 1896 RTE_FLOW_ERROR_TYPE_ITEM, item, 1897 "L3 cannot follow an L4 layer."); 1898 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 1899 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 1900 return rte_flow_error_set(error, EINVAL, 1901 RTE_FLOW_ERROR_TYPE_ITEM, item, 1902 "L3 cannot follow an NVGRE layer."); 1903 if (!mask) 1904 mask = &rte_flow_item_ipv4_mask; 1905 else if (mask->hdr.next_proto_id != 0 && 1906 mask->hdr.next_proto_id != 0xff) 1907 return rte_flow_error_set(error, EINVAL, 1908 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 1909 "partial mask is not supported" 1910 " for protocol"); 1911 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1912 acc_mask ? (const uint8_t *)acc_mask 1913 : (const uint8_t *)&nic_mask, 1914 sizeof(struct rte_flow_item_ipv4), 1915 range_accepted, error); 1916 if (ret < 0) 1917 return ret; 1918 return 0; 1919 } 1920 1921 /** 1922 * Validate IPV6 item. 1923 * 1924 * @param[in] item 1925 * Item specification. 1926 * @param[in] item_flags 1927 * Bit-fields that holds the items detected until now. 1928 * @param[in] last_item 1929 * Previous validated item in the pattern items. 1930 * @param[in] ether_type 1931 * Type in the ethernet layer header (including dot1q). 1932 * @param[in] acc_mask 1933 * Acceptable mask, if NULL default internal default mask 1934 * will be used to check whether item fields are supported. 1935 * @param[out] error 1936 * Pointer to error structure. 1937 * 1938 * @return 1939 * 0 on success, a negative errno value otherwise and rte_errno is set. 1940 */ 1941 int 1942 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, 1943 uint64_t item_flags, 1944 uint64_t last_item, 1945 uint16_t ether_type, 1946 const struct rte_flow_item_ipv6 *acc_mask, 1947 struct rte_flow_error *error) 1948 { 1949 const struct rte_flow_item_ipv6 *mask = item->mask; 1950 const struct rte_flow_item_ipv6 *spec = item->spec; 1951 const struct rte_flow_item_ipv6 nic_mask = { 1952 .hdr = { 1953 .src_addr = 1954 "\xff\xff\xff\xff\xff\xff\xff\xff" 1955 "\xff\xff\xff\xff\xff\xff\xff\xff", 1956 .dst_addr = 1957 "\xff\xff\xff\xff\xff\xff\xff\xff" 1958 "\xff\xff\xff\xff\xff\xff\xff\xff", 1959 .vtc_flow = RTE_BE32(0xffffffff), 1960 .proto = 0xff, 1961 }, 1962 }; 1963 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1964 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1965 MLX5_FLOW_LAYER_OUTER_L3; 1966 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1967 MLX5_FLOW_LAYER_OUTER_L4; 1968 int ret; 1969 uint8_t next_proto = 0xFF; 1970 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 1971 MLX5_FLOW_LAYER_OUTER_VLAN | 1972 MLX5_FLOW_LAYER_INNER_VLAN); 1973 1974 if ((last_item & l2_vlan) && ether_type && 1975 ether_type != RTE_ETHER_TYPE_IPV6) 1976 return rte_flow_error_set(error, EINVAL, 1977 RTE_FLOW_ERROR_TYPE_ITEM, item, 1978 "IPv6 cannot follow L2/VLAN layer " 1979 "which ether type is not IPv6"); 1980 if (mask && mask->hdr.proto == UINT8_MAX && spec) 1981 next_proto = spec->hdr.proto; 1982 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) { 1983 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 1984 return rte_flow_error_set(error, EINVAL, 1985 RTE_FLOW_ERROR_TYPE_ITEM, 1986 item, 1987 "multiple tunnel " 1988 "not supported"); 1989 } 1990 if (next_proto == IPPROTO_HOPOPTS || 1991 next_proto == IPPROTO_ROUTING || 1992 next_proto == IPPROTO_FRAGMENT || 1993 next_proto == IPPROTO_ESP || 1994 next_proto == IPPROTO_AH || 1995 next_proto == IPPROTO_DSTOPTS) 1996 return rte_flow_error_set(error, EINVAL, 1997 RTE_FLOW_ERROR_TYPE_ITEM, item, 1998 "IPv6 proto (next header) should " 1999 "not be set as extension header"); 2000 if (item_flags & MLX5_FLOW_LAYER_IPIP) 2001 return rte_flow_error_set(error, EINVAL, 2002 RTE_FLOW_ERROR_TYPE_ITEM, item, 2003 "wrong tunnel type - IPv4 specified " 2004 "but IPv6 item provided"); 2005 if (item_flags & l3m) 2006 return rte_flow_error_set(error, ENOTSUP, 2007 RTE_FLOW_ERROR_TYPE_ITEM, item, 2008 "multiple L3 layers not supported"); 2009 else if (item_flags & l4m) 2010 return rte_flow_error_set(error, EINVAL, 2011 RTE_FLOW_ERROR_TYPE_ITEM, item, 2012 "L3 cannot follow an L4 layer."); 2013 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 2014 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 2015 return rte_flow_error_set(error, EINVAL, 2016 RTE_FLOW_ERROR_TYPE_ITEM, item, 2017 "L3 cannot follow an NVGRE layer."); 2018 if (!mask) 2019 mask = &rte_flow_item_ipv6_mask; 2020 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 2021 acc_mask ? (const uint8_t *)acc_mask 2022 : (const uint8_t *)&nic_mask, 2023 sizeof(struct rte_flow_item_ipv6), 2024 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2025 if (ret < 0) 2026 return ret; 2027 return 0; 2028 } 2029 2030 /** 2031 * Validate UDP item. 2032 * 2033 * @param[in] item 2034 * Item specification. 2035 * @param[in] item_flags 2036 * Bit-fields that holds the items detected until now. 2037 * @param[in] target_protocol 2038 * The next protocol in the previous item. 2039 * @param[in] flow_mask 2040 * mlx5 flow-specific (DV, verbs, etc.) supported header fields mask. 2041 * @param[out] error 2042 * Pointer to error structure. 2043 * 2044 * @return 2045 * 0 on success, a negative errno value otherwise and rte_errno is set. 2046 */ 2047 int 2048 mlx5_flow_validate_item_udp(const struct rte_flow_item *item, 2049 uint64_t item_flags, 2050 uint8_t target_protocol, 2051 struct rte_flow_error *error) 2052 { 2053 const struct rte_flow_item_udp *mask = item->mask; 2054 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 2055 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 2056 MLX5_FLOW_LAYER_OUTER_L3; 2057 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 2058 MLX5_FLOW_LAYER_OUTER_L4; 2059 int ret; 2060 2061 if (target_protocol != 0xff && target_protocol != IPPROTO_UDP) 2062 return rte_flow_error_set(error, EINVAL, 2063 RTE_FLOW_ERROR_TYPE_ITEM, item, 2064 "protocol filtering not compatible" 2065 " with UDP layer"); 2066 if (!(item_flags & l3m)) 2067 return rte_flow_error_set(error, EINVAL, 2068 RTE_FLOW_ERROR_TYPE_ITEM, item, 2069 "L3 is mandatory to filter on L4"); 2070 if (item_flags & l4m) 2071 return rte_flow_error_set(error, EINVAL, 2072 RTE_FLOW_ERROR_TYPE_ITEM, item, 2073 "multiple L4 layers not supported"); 2074 if (!mask) 2075 mask = &rte_flow_item_udp_mask; 2076 ret = mlx5_flow_item_acceptable 2077 (item, (const uint8_t *)mask, 2078 (const uint8_t *)&rte_flow_item_udp_mask, 2079 sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED, 2080 error); 2081 if (ret < 0) 2082 return ret; 2083 return 0; 2084 } 2085 2086 /** 2087 * Validate TCP item. 2088 * 2089 * @param[in] item 2090 * Item specification. 2091 * @param[in] item_flags 2092 * Bit-fields that holds the items detected until now. 2093 * @param[in] target_protocol 2094 * The next protocol in the previous item. 2095 * @param[out] error 2096 * Pointer to error structure. 2097 * 2098 * @return 2099 * 0 on success, a negative errno value otherwise and rte_errno is set. 2100 */ 2101 int 2102 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item, 2103 uint64_t item_flags, 2104 uint8_t target_protocol, 2105 const struct rte_flow_item_tcp *flow_mask, 2106 struct rte_flow_error *error) 2107 { 2108 const struct rte_flow_item_tcp *mask = item->mask; 2109 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 2110 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 2111 MLX5_FLOW_LAYER_OUTER_L3; 2112 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 2113 MLX5_FLOW_LAYER_OUTER_L4; 2114 int ret; 2115 2116 MLX5_ASSERT(flow_mask); 2117 if (target_protocol != 0xff && target_protocol != IPPROTO_TCP) 2118 return rte_flow_error_set(error, EINVAL, 2119 RTE_FLOW_ERROR_TYPE_ITEM, item, 2120 "protocol filtering not compatible" 2121 " with TCP layer"); 2122 if (!(item_flags & l3m)) 2123 return rte_flow_error_set(error, EINVAL, 2124 RTE_FLOW_ERROR_TYPE_ITEM, item, 2125 "L3 is mandatory to filter on L4"); 2126 if (item_flags & l4m) 2127 return rte_flow_error_set(error, EINVAL, 2128 RTE_FLOW_ERROR_TYPE_ITEM, item, 2129 "multiple L4 layers not supported"); 2130 if (!mask) 2131 mask = &rte_flow_item_tcp_mask; 2132 ret = mlx5_flow_item_acceptable 2133 (item, (const uint8_t *)mask, 2134 (const uint8_t *)flow_mask, 2135 sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED, 2136 error); 2137 if (ret < 0) 2138 return ret; 2139 return 0; 2140 } 2141 2142 /** 2143 * Validate VXLAN item. 2144 * 2145 * @param[in] item 2146 * Item specification. 2147 * @param[in] item_flags 2148 * Bit-fields that holds the items detected until now. 2149 * @param[in] target_protocol 2150 * The next protocol in the previous item. 2151 * @param[out] error 2152 * Pointer to error structure. 2153 * 2154 * @return 2155 * 0 on success, a negative errno value otherwise and rte_errno is set. 2156 */ 2157 int 2158 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item, 2159 uint64_t item_flags, 2160 struct rte_flow_error *error) 2161 { 2162 const struct rte_flow_item_vxlan *spec = item->spec; 2163 const struct rte_flow_item_vxlan *mask = item->mask; 2164 int ret; 2165 union vni { 2166 uint32_t vlan_id; 2167 uint8_t vni[4]; 2168 } id = { .vlan_id = 0, }; 2169 2170 2171 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2172 return rte_flow_error_set(error, ENOTSUP, 2173 RTE_FLOW_ERROR_TYPE_ITEM, item, 2174 "multiple tunnel layers not" 2175 " supported"); 2176 /* 2177 * Verify only UDPv4 is present as defined in 2178 * https://tools.ietf.org/html/rfc7348 2179 */ 2180 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2181 return rte_flow_error_set(error, EINVAL, 2182 RTE_FLOW_ERROR_TYPE_ITEM, item, 2183 "no outer UDP layer found"); 2184 if (!mask) 2185 mask = &rte_flow_item_vxlan_mask; 2186 ret = mlx5_flow_item_acceptable 2187 (item, (const uint8_t *)mask, 2188 (const uint8_t *)&rte_flow_item_vxlan_mask, 2189 sizeof(struct rte_flow_item_vxlan), 2190 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2191 if (ret < 0) 2192 return ret; 2193 if (spec) { 2194 memcpy(&id.vni[1], spec->vni, 3); 2195 memcpy(&id.vni[1], mask->vni, 3); 2196 } 2197 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2198 return rte_flow_error_set(error, ENOTSUP, 2199 RTE_FLOW_ERROR_TYPE_ITEM, item, 2200 "VXLAN tunnel must be fully defined"); 2201 return 0; 2202 } 2203 2204 /** 2205 * Validate VXLAN_GPE item. 2206 * 2207 * @param[in] item 2208 * Item specification. 2209 * @param[in] item_flags 2210 * Bit-fields that holds the items detected until now. 2211 * @param[in] priv 2212 * Pointer to the private data structure. 2213 * @param[in] target_protocol 2214 * The next protocol in the previous item. 2215 * @param[out] error 2216 * Pointer to error structure. 2217 * 2218 * @return 2219 * 0 on success, a negative errno value otherwise and rte_errno is set. 2220 */ 2221 int 2222 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item, 2223 uint64_t item_flags, 2224 struct rte_eth_dev *dev, 2225 struct rte_flow_error *error) 2226 { 2227 struct mlx5_priv *priv = dev->data->dev_private; 2228 const struct rte_flow_item_vxlan_gpe *spec = item->spec; 2229 const struct rte_flow_item_vxlan_gpe *mask = item->mask; 2230 int ret; 2231 union vni { 2232 uint32_t vlan_id; 2233 uint8_t vni[4]; 2234 } id = { .vlan_id = 0, }; 2235 2236 if (!priv->config.l3_vxlan_en) 2237 return rte_flow_error_set(error, ENOTSUP, 2238 RTE_FLOW_ERROR_TYPE_ITEM, item, 2239 "L3 VXLAN is not enabled by device" 2240 " parameter and/or not configured in" 2241 " firmware"); 2242 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2243 return rte_flow_error_set(error, ENOTSUP, 2244 RTE_FLOW_ERROR_TYPE_ITEM, item, 2245 "multiple tunnel layers not" 2246 " supported"); 2247 /* 2248 * Verify only UDPv4 is present as defined in 2249 * https://tools.ietf.org/html/rfc7348 2250 */ 2251 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2252 return rte_flow_error_set(error, EINVAL, 2253 RTE_FLOW_ERROR_TYPE_ITEM, item, 2254 "no outer UDP layer found"); 2255 if (!mask) 2256 mask = &rte_flow_item_vxlan_gpe_mask; 2257 ret = mlx5_flow_item_acceptable 2258 (item, (const uint8_t *)mask, 2259 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask, 2260 sizeof(struct rte_flow_item_vxlan_gpe), 2261 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2262 if (ret < 0) 2263 return ret; 2264 if (spec) { 2265 if (spec->protocol) 2266 return rte_flow_error_set(error, ENOTSUP, 2267 RTE_FLOW_ERROR_TYPE_ITEM, 2268 item, 2269 "VxLAN-GPE protocol" 2270 " not supported"); 2271 memcpy(&id.vni[1], spec->vni, 3); 2272 memcpy(&id.vni[1], mask->vni, 3); 2273 } 2274 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2275 return rte_flow_error_set(error, ENOTSUP, 2276 RTE_FLOW_ERROR_TYPE_ITEM, item, 2277 "VXLAN-GPE tunnel must be fully" 2278 " defined"); 2279 return 0; 2280 } 2281 /** 2282 * Validate GRE Key item. 2283 * 2284 * @param[in] item 2285 * Item specification. 2286 * @param[in] item_flags 2287 * Bit flags to mark detected items. 2288 * @param[in] gre_item 2289 * Pointer to gre_item 2290 * @param[out] error 2291 * Pointer to error structure. 2292 * 2293 * @return 2294 * 0 on success, a negative errno value otherwise and rte_errno is set. 2295 */ 2296 int 2297 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item, 2298 uint64_t item_flags, 2299 const struct rte_flow_item *gre_item, 2300 struct rte_flow_error *error) 2301 { 2302 const rte_be32_t *mask = item->mask; 2303 int ret = 0; 2304 rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX); 2305 const struct rte_flow_item_gre *gre_spec; 2306 const struct rte_flow_item_gre *gre_mask; 2307 2308 if (item_flags & MLX5_FLOW_LAYER_GRE_KEY) 2309 return rte_flow_error_set(error, ENOTSUP, 2310 RTE_FLOW_ERROR_TYPE_ITEM, item, 2311 "Multiple GRE key not support"); 2312 if (!(item_flags & MLX5_FLOW_LAYER_GRE)) 2313 return rte_flow_error_set(error, ENOTSUP, 2314 RTE_FLOW_ERROR_TYPE_ITEM, item, 2315 "No preceding GRE header"); 2316 if (item_flags & MLX5_FLOW_LAYER_INNER) 2317 return rte_flow_error_set(error, ENOTSUP, 2318 RTE_FLOW_ERROR_TYPE_ITEM, item, 2319 "GRE key following a wrong item"); 2320 gre_mask = gre_item->mask; 2321 if (!gre_mask) 2322 gre_mask = &rte_flow_item_gre_mask; 2323 gre_spec = gre_item->spec; 2324 if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) && 2325 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000))) 2326 return rte_flow_error_set(error, EINVAL, 2327 RTE_FLOW_ERROR_TYPE_ITEM, item, 2328 "Key bit must be on"); 2329 2330 if (!mask) 2331 mask = &gre_key_default_mask; 2332 ret = mlx5_flow_item_acceptable 2333 (item, (const uint8_t *)mask, 2334 (const uint8_t *)&gre_key_default_mask, 2335 sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2336 return ret; 2337 } 2338 2339 /** 2340 * Validate GRE item. 2341 * 2342 * @param[in] item 2343 * Item specification. 2344 * @param[in] item_flags 2345 * Bit flags to mark detected items. 2346 * @param[in] target_protocol 2347 * The next protocol in the previous item. 2348 * @param[out] error 2349 * Pointer to error structure. 2350 * 2351 * @return 2352 * 0 on success, a negative errno value otherwise and rte_errno is set. 2353 */ 2354 int 2355 mlx5_flow_validate_item_gre(const struct rte_flow_item *item, 2356 uint64_t item_flags, 2357 uint8_t target_protocol, 2358 struct rte_flow_error *error) 2359 { 2360 const struct rte_flow_item_gre *spec __rte_unused = item->spec; 2361 const struct rte_flow_item_gre *mask = item->mask; 2362 int ret; 2363 const struct rte_flow_item_gre nic_mask = { 2364 .c_rsvd0_ver = RTE_BE16(0xB000), 2365 .protocol = RTE_BE16(UINT16_MAX), 2366 }; 2367 2368 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2369 return rte_flow_error_set(error, EINVAL, 2370 RTE_FLOW_ERROR_TYPE_ITEM, item, 2371 "protocol filtering not compatible" 2372 " with this GRE layer"); 2373 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2374 return rte_flow_error_set(error, ENOTSUP, 2375 RTE_FLOW_ERROR_TYPE_ITEM, item, 2376 "multiple tunnel layers not" 2377 " supported"); 2378 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2379 return rte_flow_error_set(error, ENOTSUP, 2380 RTE_FLOW_ERROR_TYPE_ITEM, item, 2381 "L3 Layer is missing"); 2382 if (!mask) 2383 mask = &rte_flow_item_gre_mask; 2384 ret = mlx5_flow_item_acceptable 2385 (item, (const uint8_t *)mask, 2386 (const uint8_t *)&nic_mask, 2387 sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED, 2388 error); 2389 if (ret < 0) 2390 return ret; 2391 #ifndef HAVE_MLX5DV_DR 2392 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT 2393 if (spec && (spec->protocol & mask->protocol)) 2394 return rte_flow_error_set(error, ENOTSUP, 2395 RTE_FLOW_ERROR_TYPE_ITEM, item, 2396 "without MPLS support the" 2397 " specification cannot be used for" 2398 " filtering"); 2399 #endif 2400 #endif 2401 return 0; 2402 } 2403 2404 /** 2405 * Validate Geneve item. 2406 * 2407 * @param[in] item 2408 * Item specification. 2409 * @param[in] itemFlags 2410 * Bit-fields that holds the items detected until now. 2411 * @param[in] enPriv 2412 * Pointer to the private data structure. 2413 * @param[out] error 2414 * Pointer to error structure. 2415 * 2416 * @return 2417 * 0 on success, a negative errno value otherwise and rte_errno is set. 2418 */ 2419 2420 int 2421 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item, 2422 uint64_t item_flags, 2423 struct rte_eth_dev *dev, 2424 struct rte_flow_error *error) 2425 { 2426 struct mlx5_priv *priv = dev->data->dev_private; 2427 const struct rte_flow_item_geneve *spec = item->spec; 2428 const struct rte_flow_item_geneve *mask = item->mask; 2429 int ret; 2430 uint16_t gbhdr; 2431 uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ? 2432 MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0; 2433 const struct rte_flow_item_geneve nic_mask = { 2434 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80), 2435 .vni = "\xff\xff\xff", 2436 .protocol = RTE_BE16(UINT16_MAX), 2437 }; 2438 2439 if (!priv->config.hca_attr.tunnel_stateless_geneve_rx) 2440 return rte_flow_error_set(error, ENOTSUP, 2441 RTE_FLOW_ERROR_TYPE_ITEM, item, 2442 "L3 Geneve is not enabled by device" 2443 " parameter and/or not configured in" 2444 " firmware"); 2445 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2446 return rte_flow_error_set(error, ENOTSUP, 2447 RTE_FLOW_ERROR_TYPE_ITEM, item, 2448 "multiple tunnel layers not" 2449 " supported"); 2450 /* 2451 * Verify only UDPv4 is present as defined in 2452 * https://tools.ietf.org/html/rfc7348 2453 */ 2454 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2455 return rte_flow_error_set(error, EINVAL, 2456 RTE_FLOW_ERROR_TYPE_ITEM, item, 2457 "no outer UDP layer found"); 2458 if (!mask) 2459 mask = &rte_flow_item_geneve_mask; 2460 ret = mlx5_flow_item_acceptable 2461 (item, (const uint8_t *)mask, 2462 (const uint8_t *)&nic_mask, 2463 sizeof(struct rte_flow_item_geneve), 2464 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2465 if (ret) 2466 return ret; 2467 if (spec) { 2468 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0); 2469 if (MLX5_GENEVE_VER_VAL(gbhdr) || 2470 MLX5_GENEVE_CRITO_VAL(gbhdr) || 2471 MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1) 2472 return rte_flow_error_set(error, ENOTSUP, 2473 RTE_FLOW_ERROR_TYPE_ITEM, 2474 item, 2475 "Geneve protocol unsupported" 2476 " fields are being used"); 2477 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len) 2478 return rte_flow_error_set 2479 (error, ENOTSUP, 2480 RTE_FLOW_ERROR_TYPE_ITEM, 2481 item, 2482 "Unsupported Geneve options length"); 2483 } 2484 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2485 return rte_flow_error_set 2486 (error, ENOTSUP, 2487 RTE_FLOW_ERROR_TYPE_ITEM, item, 2488 "Geneve tunnel must be fully defined"); 2489 return 0; 2490 } 2491 2492 /** 2493 * Validate MPLS item. 2494 * 2495 * @param[in] dev 2496 * Pointer to the rte_eth_dev structure. 2497 * @param[in] item 2498 * Item specification. 2499 * @param[in] item_flags 2500 * Bit-fields that holds the items detected until now. 2501 * @param[in] prev_layer 2502 * The protocol layer indicated in previous item. 2503 * @param[out] error 2504 * Pointer to error structure. 2505 * 2506 * @return 2507 * 0 on success, a negative errno value otherwise and rte_errno is set. 2508 */ 2509 int 2510 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused, 2511 const struct rte_flow_item *item __rte_unused, 2512 uint64_t item_flags __rte_unused, 2513 uint64_t prev_layer __rte_unused, 2514 struct rte_flow_error *error) 2515 { 2516 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 2517 const struct rte_flow_item_mpls *mask = item->mask; 2518 struct mlx5_priv *priv = dev->data->dev_private; 2519 int ret; 2520 2521 if (!priv->config.mpls_en) 2522 return rte_flow_error_set(error, ENOTSUP, 2523 RTE_FLOW_ERROR_TYPE_ITEM, item, 2524 "MPLS not supported or" 2525 " disabled in firmware" 2526 " configuration."); 2527 /* MPLS over IP, UDP, GRE is allowed */ 2528 if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 | 2529 MLX5_FLOW_LAYER_OUTER_L4_UDP | 2530 MLX5_FLOW_LAYER_GRE))) 2531 return rte_flow_error_set(error, EINVAL, 2532 RTE_FLOW_ERROR_TYPE_ITEM, item, 2533 "protocol filtering not compatible" 2534 " with MPLS layer"); 2535 /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */ 2536 if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) && 2537 !(item_flags & MLX5_FLOW_LAYER_GRE)) 2538 return rte_flow_error_set(error, ENOTSUP, 2539 RTE_FLOW_ERROR_TYPE_ITEM, item, 2540 "multiple tunnel layers not" 2541 " supported"); 2542 if (!mask) 2543 mask = &rte_flow_item_mpls_mask; 2544 ret = mlx5_flow_item_acceptable 2545 (item, (const uint8_t *)mask, 2546 (const uint8_t *)&rte_flow_item_mpls_mask, 2547 sizeof(struct rte_flow_item_mpls), 2548 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2549 if (ret < 0) 2550 return ret; 2551 return 0; 2552 #else 2553 return rte_flow_error_set(error, ENOTSUP, 2554 RTE_FLOW_ERROR_TYPE_ITEM, item, 2555 "MPLS is not supported by Verbs, please" 2556 " update."); 2557 #endif 2558 } 2559 2560 /** 2561 * Validate NVGRE item. 2562 * 2563 * @param[in] item 2564 * Item specification. 2565 * @param[in] item_flags 2566 * Bit flags to mark detected items. 2567 * @param[in] target_protocol 2568 * The next protocol in the previous item. 2569 * @param[out] error 2570 * Pointer to error structure. 2571 * 2572 * @return 2573 * 0 on success, a negative errno value otherwise and rte_errno is set. 2574 */ 2575 int 2576 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item, 2577 uint64_t item_flags, 2578 uint8_t target_protocol, 2579 struct rte_flow_error *error) 2580 { 2581 const struct rte_flow_item_nvgre *mask = item->mask; 2582 int ret; 2583 2584 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2585 return rte_flow_error_set(error, EINVAL, 2586 RTE_FLOW_ERROR_TYPE_ITEM, item, 2587 "protocol filtering not compatible" 2588 " with this GRE layer"); 2589 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2590 return rte_flow_error_set(error, ENOTSUP, 2591 RTE_FLOW_ERROR_TYPE_ITEM, item, 2592 "multiple tunnel layers not" 2593 " supported"); 2594 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2595 return rte_flow_error_set(error, ENOTSUP, 2596 RTE_FLOW_ERROR_TYPE_ITEM, item, 2597 "L3 Layer is missing"); 2598 if (!mask) 2599 mask = &rte_flow_item_nvgre_mask; 2600 ret = mlx5_flow_item_acceptable 2601 (item, (const uint8_t *)mask, 2602 (const uint8_t *)&rte_flow_item_nvgre_mask, 2603 sizeof(struct rte_flow_item_nvgre), 2604 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2605 if (ret < 0) 2606 return ret; 2607 return 0; 2608 } 2609 2610 /** 2611 * Validate eCPRI item. 2612 * 2613 * @param[in] item 2614 * Item specification. 2615 * @param[in] item_flags 2616 * Bit-fields that holds the items detected until now. 2617 * @param[in] last_item 2618 * Previous validated item in the pattern items. 2619 * @param[in] ether_type 2620 * Type in the ethernet layer header (including dot1q). 2621 * @param[in] acc_mask 2622 * Acceptable mask, if NULL default internal default mask 2623 * will be used to check whether item fields are supported. 2624 * @param[out] error 2625 * Pointer to error structure. 2626 * 2627 * @return 2628 * 0 on success, a negative errno value otherwise and rte_errno is set. 2629 */ 2630 int 2631 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item, 2632 uint64_t item_flags, 2633 uint64_t last_item, 2634 uint16_t ether_type, 2635 const struct rte_flow_item_ecpri *acc_mask, 2636 struct rte_flow_error *error) 2637 { 2638 const struct rte_flow_item_ecpri *mask = item->mask; 2639 const struct rte_flow_item_ecpri nic_mask = { 2640 .hdr = { 2641 .common = { 2642 .u32 = 2643 RTE_BE32(((const struct rte_ecpri_common_hdr) { 2644 .type = 0xFF, 2645 }).u32), 2646 }, 2647 .dummy[0] = 0xFFFFFFFF, 2648 }, 2649 }; 2650 const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 | 2651 MLX5_FLOW_LAYER_OUTER_VLAN); 2652 struct rte_flow_item_ecpri mask_lo; 2653 2654 if ((last_item & outer_l2_vlan) && ether_type && 2655 ether_type != RTE_ETHER_TYPE_ECPRI) 2656 return rte_flow_error_set(error, EINVAL, 2657 RTE_FLOW_ERROR_TYPE_ITEM, item, 2658 "eCPRI cannot follow L2/VLAN layer " 2659 "which ether type is not 0xAEFE."); 2660 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2661 return rte_flow_error_set(error, EINVAL, 2662 RTE_FLOW_ERROR_TYPE_ITEM, item, 2663 "eCPRI with tunnel is not supported " 2664 "right now."); 2665 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3) 2666 return rte_flow_error_set(error, ENOTSUP, 2667 RTE_FLOW_ERROR_TYPE_ITEM, item, 2668 "multiple L3 layers not supported"); 2669 else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP) 2670 return rte_flow_error_set(error, EINVAL, 2671 RTE_FLOW_ERROR_TYPE_ITEM, item, 2672 "eCPRI cannot follow a TCP layer."); 2673 /* In specification, eCPRI could be over UDP layer. */ 2674 else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP) 2675 return rte_flow_error_set(error, EINVAL, 2676 RTE_FLOW_ERROR_TYPE_ITEM, item, 2677 "eCPRI over UDP layer is not yet " 2678 "supported right now."); 2679 /* Mask for type field in common header could be zero. */ 2680 if (!mask) 2681 mask = &rte_flow_item_ecpri_mask; 2682 mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32); 2683 /* Input mask is in big-endian format. */ 2684 if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff) 2685 return rte_flow_error_set(error, EINVAL, 2686 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 2687 "partial mask is not supported " 2688 "for protocol"); 2689 else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0) 2690 return rte_flow_error_set(error, EINVAL, 2691 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 2692 "message header mask must be after " 2693 "a type mask"); 2694 return mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 2695 acc_mask ? (const uint8_t *)acc_mask 2696 : (const uint8_t *)&nic_mask, 2697 sizeof(struct rte_flow_item_ecpri), 2698 MLX5_ITEM_RANGE_NOT_ACCEPTED, error); 2699 } 2700 2701 /* Allocate unique ID for the split Q/RSS subflows. */ 2702 static uint32_t 2703 flow_qrss_get_id(struct rte_eth_dev *dev) 2704 { 2705 struct mlx5_priv *priv = dev->data->dev_private; 2706 uint32_t qrss_id, ret; 2707 2708 ret = mlx5_flow_id_get(priv->qrss_id_pool, &qrss_id); 2709 if (ret) 2710 return 0; 2711 MLX5_ASSERT(qrss_id); 2712 return qrss_id; 2713 } 2714 2715 /* Free unique ID for the split Q/RSS subflows. */ 2716 static void 2717 flow_qrss_free_id(struct rte_eth_dev *dev, uint32_t qrss_id) 2718 { 2719 struct mlx5_priv *priv = dev->data->dev_private; 2720 2721 if (qrss_id) 2722 mlx5_flow_id_release(priv->qrss_id_pool, qrss_id); 2723 } 2724 2725 /** 2726 * Release resource related QUEUE/RSS action split. 2727 * 2728 * @param dev 2729 * Pointer to Ethernet device. 2730 * @param flow 2731 * Flow to release id's from. 2732 */ 2733 static void 2734 flow_mreg_split_qrss_release(struct rte_eth_dev *dev, 2735 struct rte_flow *flow) 2736 { 2737 struct mlx5_priv *priv = dev->data->dev_private; 2738 uint32_t handle_idx; 2739 struct mlx5_flow_handle *dev_handle; 2740 2741 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 2742 handle_idx, dev_handle, next) 2743 if (dev_handle->split_flow_id) 2744 flow_qrss_free_id(dev, dev_handle->split_flow_id); 2745 } 2746 2747 static int 2748 flow_null_validate(struct rte_eth_dev *dev __rte_unused, 2749 const struct rte_flow_attr *attr __rte_unused, 2750 const struct rte_flow_item items[] __rte_unused, 2751 const struct rte_flow_action actions[] __rte_unused, 2752 bool external __rte_unused, 2753 int hairpin __rte_unused, 2754 struct rte_flow_error *error) 2755 { 2756 return rte_flow_error_set(error, ENOTSUP, 2757 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2758 } 2759 2760 static struct mlx5_flow * 2761 flow_null_prepare(struct rte_eth_dev *dev __rte_unused, 2762 const struct rte_flow_attr *attr __rte_unused, 2763 const struct rte_flow_item items[] __rte_unused, 2764 const struct rte_flow_action actions[] __rte_unused, 2765 struct rte_flow_error *error) 2766 { 2767 rte_flow_error_set(error, ENOTSUP, 2768 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2769 return NULL; 2770 } 2771 2772 static int 2773 flow_null_translate(struct rte_eth_dev *dev __rte_unused, 2774 struct mlx5_flow *dev_flow __rte_unused, 2775 const struct rte_flow_attr *attr __rte_unused, 2776 const struct rte_flow_item items[] __rte_unused, 2777 const struct rte_flow_action actions[] __rte_unused, 2778 struct rte_flow_error *error) 2779 { 2780 return rte_flow_error_set(error, ENOTSUP, 2781 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2782 } 2783 2784 static int 2785 flow_null_apply(struct rte_eth_dev *dev __rte_unused, 2786 struct rte_flow *flow __rte_unused, 2787 struct rte_flow_error *error) 2788 { 2789 return rte_flow_error_set(error, ENOTSUP, 2790 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2791 } 2792 2793 static void 2794 flow_null_remove(struct rte_eth_dev *dev __rte_unused, 2795 struct rte_flow *flow __rte_unused) 2796 { 2797 } 2798 2799 static void 2800 flow_null_destroy(struct rte_eth_dev *dev __rte_unused, 2801 struct rte_flow *flow __rte_unused) 2802 { 2803 } 2804 2805 static int 2806 flow_null_query(struct rte_eth_dev *dev __rte_unused, 2807 struct rte_flow *flow __rte_unused, 2808 const struct rte_flow_action *actions __rte_unused, 2809 void *data __rte_unused, 2810 struct rte_flow_error *error) 2811 { 2812 return rte_flow_error_set(error, ENOTSUP, 2813 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2814 } 2815 2816 /* Void driver to protect from null pointer reference. */ 2817 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = { 2818 .validate = flow_null_validate, 2819 .prepare = flow_null_prepare, 2820 .translate = flow_null_translate, 2821 .apply = flow_null_apply, 2822 .remove = flow_null_remove, 2823 .destroy = flow_null_destroy, 2824 .query = flow_null_query, 2825 }; 2826 2827 /** 2828 * Select flow driver type according to flow attributes and device 2829 * configuration. 2830 * 2831 * @param[in] dev 2832 * Pointer to the dev structure. 2833 * @param[in] attr 2834 * Pointer to the flow attributes. 2835 * 2836 * @return 2837 * flow driver type, MLX5_FLOW_TYPE_MAX otherwise. 2838 */ 2839 static enum mlx5_flow_drv_type 2840 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr) 2841 { 2842 struct mlx5_priv *priv = dev->data->dev_private; 2843 /* The OS can determine first a specific flow type (DV, VERBS) */ 2844 enum mlx5_flow_drv_type type = mlx5_flow_os_get_type(); 2845 2846 if (type != MLX5_FLOW_TYPE_MAX) 2847 return type; 2848 /* If no OS specific type - continue with DV/VERBS selection */ 2849 if (attr->transfer && priv->config.dv_esw_en) 2850 type = MLX5_FLOW_TYPE_DV; 2851 if (!attr->transfer) 2852 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV : 2853 MLX5_FLOW_TYPE_VERBS; 2854 return type; 2855 } 2856 2857 #define flow_get_drv_ops(type) flow_drv_ops[type] 2858 2859 /** 2860 * Flow driver validation API. This abstracts calling driver specific functions. 2861 * The type of flow driver is determined according to flow attributes. 2862 * 2863 * @param[in] dev 2864 * Pointer to the dev structure. 2865 * @param[in] attr 2866 * Pointer to the flow attributes. 2867 * @param[in] items 2868 * Pointer to the list of items. 2869 * @param[in] actions 2870 * Pointer to the list of actions. 2871 * @param[in] external 2872 * This flow rule is created by request external to PMD. 2873 * @param[in] hairpin 2874 * Number of hairpin TX actions, 0 means classic flow. 2875 * @param[out] error 2876 * Pointer to the error structure. 2877 * 2878 * @return 2879 * 0 on success, a negative errno value otherwise and rte_errno is set. 2880 */ 2881 static inline int 2882 flow_drv_validate(struct rte_eth_dev *dev, 2883 const struct rte_flow_attr *attr, 2884 const struct rte_flow_item items[], 2885 const struct rte_flow_action actions[], 2886 bool external, int hairpin, struct rte_flow_error *error) 2887 { 2888 const struct mlx5_flow_driver_ops *fops; 2889 enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr); 2890 2891 fops = flow_get_drv_ops(type); 2892 return fops->validate(dev, attr, items, actions, external, 2893 hairpin, error); 2894 } 2895 2896 /** 2897 * Flow driver preparation API. This abstracts calling driver specific 2898 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 2899 * calculates the size of memory required for device flow, allocates the memory, 2900 * initializes the device flow and returns the pointer. 2901 * 2902 * @note 2903 * This function initializes device flow structure such as dv or verbs in 2904 * struct mlx5_flow. However, it is caller's responsibility to initialize the 2905 * rest. For example, adding returning device flow to flow->dev_flow list and 2906 * setting backward reference to the flow should be done out of this function. 2907 * layers field is not filled either. 2908 * 2909 * @param[in] dev 2910 * Pointer to the dev structure. 2911 * @param[in] attr 2912 * Pointer to the flow attributes. 2913 * @param[in] items 2914 * Pointer to the list of items. 2915 * @param[in] actions 2916 * Pointer to the list of actions. 2917 * @param[in] flow_idx 2918 * This memory pool index to the flow. 2919 * @param[out] error 2920 * Pointer to the error structure. 2921 * 2922 * @return 2923 * Pointer to device flow on success, otherwise NULL and rte_errno is set. 2924 */ 2925 static inline struct mlx5_flow * 2926 flow_drv_prepare(struct rte_eth_dev *dev, 2927 const struct rte_flow *flow, 2928 const struct rte_flow_attr *attr, 2929 const struct rte_flow_item items[], 2930 const struct rte_flow_action actions[], 2931 uint32_t flow_idx, 2932 struct rte_flow_error *error) 2933 { 2934 const struct mlx5_flow_driver_ops *fops; 2935 enum mlx5_flow_drv_type type = flow->drv_type; 2936 struct mlx5_flow *mlx5_flow = NULL; 2937 2938 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2939 fops = flow_get_drv_ops(type); 2940 mlx5_flow = fops->prepare(dev, attr, items, actions, error); 2941 if (mlx5_flow) 2942 mlx5_flow->flow_idx = flow_idx; 2943 return mlx5_flow; 2944 } 2945 2946 /** 2947 * Flow driver translation API. This abstracts calling driver specific 2948 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 2949 * translates a generic flow into a driver flow. flow_drv_prepare() must 2950 * precede. 2951 * 2952 * @note 2953 * dev_flow->layers could be filled as a result of parsing during translation 2954 * if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled 2955 * if necessary. As a flow can have multiple dev_flows by RSS flow expansion, 2956 * flow->actions could be overwritten even though all the expanded dev_flows 2957 * have the same actions. 2958 * 2959 * @param[in] dev 2960 * Pointer to the rte dev structure. 2961 * @param[in, out] dev_flow 2962 * Pointer to the mlx5 flow. 2963 * @param[in] attr 2964 * Pointer to the flow attributes. 2965 * @param[in] items 2966 * Pointer to the list of items. 2967 * @param[in] actions 2968 * Pointer to the list of actions. 2969 * @param[out] error 2970 * Pointer to the error structure. 2971 * 2972 * @return 2973 * 0 on success, a negative errno value otherwise and rte_errno is set. 2974 */ 2975 static inline int 2976 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, 2977 const struct rte_flow_attr *attr, 2978 const struct rte_flow_item items[], 2979 const struct rte_flow_action actions[], 2980 struct rte_flow_error *error) 2981 { 2982 const struct mlx5_flow_driver_ops *fops; 2983 enum mlx5_flow_drv_type type = dev_flow->flow->drv_type; 2984 2985 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2986 fops = flow_get_drv_ops(type); 2987 return fops->translate(dev, dev_flow, attr, items, actions, error); 2988 } 2989 2990 /** 2991 * Flow driver apply API. This abstracts calling driver specific functions. 2992 * Parent flow (rte_flow) should have driver type (drv_type). It applies 2993 * translated driver flows on to device. flow_drv_translate() must precede. 2994 * 2995 * @param[in] dev 2996 * Pointer to Ethernet device structure. 2997 * @param[in, out] flow 2998 * Pointer to flow structure. 2999 * @param[out] error 3000 * Pointer to error structure. 3001 * 3002 * @return 3003 * 0 on success, a negative errno value otherwise and rte_errno is set. 3004 */ 3005 static inline int 3006 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, 3007 struct rte_flow_error *error) 3008 { 3009 const struct mlx5_flow_driver_ops *fops; 3010 enum mlx5_flow_drv_type type = flow->drv_type; 3011 3012 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 3013 fops = flow_get_drv_ops(type); 3014 return fops->apply(dev, flow, error); 3015 } 3016 3017 /** 3018 * Flow driver remove API. This abstracts calling driver specific functions. 3019 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 3020 * on device. All the resources of the flow should be freed by calling 3021 * flow_drv_destroy(). 3022 * 3023 * @param[in] dev 3024 * Pointer to Ethernet device. 3025 * @param[in, out] flow 3026 * Pointer to flow structure. 3027 */ 3028 static inline void 3029 flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow) 3030 { 3031 const struct mlx5_flow_driver_ops *fops; 3032 enum mlx5_flow_drv_type type = flow->drv_type; 3033 3034 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 3035 fops = flow_get_drv_ops(type); 3036 fops->remove(dev, flow); 3037 } 3038 3039 /** 3040 * Flow driver destroy API. This abstracts calling driver specific functions. 3041 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 3042 * on device and releases resources of the flow. 3043 * 3044 * @param[in] dev 3045 * Pointer to Ethernet device. 3046 * @param[in, out] flow 3047 * Pointer to flow structure. 3048 */ 3049 static inline void 3050 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) 3051 { 3052 const struct mlx5_flow_driver_ops *fops; 3053 enum mlx5_flow_drv_type type = flow->drv_type; 3054 3055 flow_mreg_split_qrss_release(dev, flow); 3056 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 3057 fops = flow_get_drv_ops(type); 3058 fops->destroy(dev, flow); 3059 } 3060 3061 /** 3062 * Get RSS action from the action list. 3063 * 3064 * @param[in] actions 3065 * Pointer to the list of actions. 3066 * 3067 * @return 3068 * Pointer to the RSS action if exist, else return NULL. 3069 */ 3070 static const struct rte_flow_action_rss* 3071 flow_get_rss_action(const struct rte_flow_action actions[]) 3072 { 3073 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3074 switch (actions->type) { 3075 case RTE_FLOW_ACTION_TYPE_RSS: 3076 return (const struct rte_flow_action_rss *) 3077 actions->conf; 3078 default: 3079 break; 3080 } 3081 } 3082 return NULL; 3083 } 3084 3085 static unsigned int 3086 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level) 3087 { 3088 const struct rte_flow_item *item; 3089 unsigned int has_vlan = 0; 3090 3091 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 3092 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { 3093 has_vlan = 1; 3094 break; 3095 } 3096 } 3097 if (has_vlan) 3098 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN : 3099 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN; 3100 return rss_level < 2 ? MLX5_EXPANSION_ROOT : 3101 MLX5_EXPANSION_ROOT_OUTER; 3102 } 3103 3104 /** 3105 * Get layer flags from the prefix flow. 3106 * 3107 * Some flows may be split to several subflows, the prefix subflow gets the 3108 * match items and the suffix sub flow gets the actions. 3109 * Some actions need the user defined match item flags to get the detail for 3110 * the action. 3111 * This function helps the suffix flow to get the item layer flags from prefix 3112 * subflow. 3113 * 3114 * @param[in] dev_flow 3115 * Pointer the created preifx subflow. 3116 * 3117 * @return 3118 * The layers get from prefix subflow. 3119 */ 3120 static inline uint64_t 3121 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow) 3122 { 3123 uint64_t layers = 0; 3124 3125 /* 3126 * Layers bits could be localization, but usually the compiler will 3127 * help to do the optimization work for source code. 3128 * If no decap actions, use the layers directly. 3129 */ 3130 if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP)) 3131 return dev_flow->handle->layers; 3132 /* Convert L3 layers with decap action. */ 3133 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4) 3134 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4; 3135 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6) 3136 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6; 3137 /* Convert L4 layers with decap action. */ 3138 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP) 3139 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP; 3140 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP) 3141 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP; 3142 return layers; 3143 } 3144 3145 /** 3146 * Get metadata split action information. 3147 * 3148 * @param[in] actions 3149 * Pointer to the list of actions. 3150 * @param[out] qrss 3151 * Pointer to the return pointer. 3152 * @param[out] qrss_type 3153 * Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned 3154 * if no QUEUE/RSS is found. 3155 * @param[out] encap_idx 3156 * Pointer to the index of the encap action if exists, otherwise the last 3157 * action index. 3158 * 3159 * @return 3160 * Total number of actions. 3161 */ 3162 static int 3163 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[], 3164 const struct rte_flow_action **qrss, 3165 int *encap_idx) 3166 { 3167 const struct rte_flow_action_raw_encap *raw_encap; 3168 int actions_n = 0; 3169 int raw_decap_idx = -1; 3170 3171 *encap_idx = -1; 3172 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3173 switch (actions->type) { 3174 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3175 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3176 *encap_idx = actions_n; 3177 break; 3178 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3179 raw_decap_idx = actions_n; 3180 break; 3181 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3182 raw_encap = actions->conf; 3183 if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 3184 *encap_idx = raw_decap_idx != -1 ? 3185 raw_decap_idx : actions_n; 3186 break; 3187 case RTE_FLOW_ACTION_TYPE_QUEUE: 3188 case RTE_FLOW_ACTION_TYPE_RSS: 3189 *qrss = actions; 3190 break; 3191 default: 3192 break; 3193 } 3194 actions_n++; 3195 } 3196 if (*encap_idx == -1) 3197 *encap_idx = actions_n; 3198 /* Count RTE_FLOW_ACTION_TYPE_END. */ 3199 return actions_n + 1; 3200 } 3201 3202 /** 3203 * Check meter action from the action list. 3204 * 3205 * @param[in] actions 3206 * Pointer to the list of actions. 3207 * @param[out] mtr 3208 * Pointer to the meter exist flag. 3209 * 3210 * @return 3211 * Total number of actions. 3212 */ 3213 static int 3214 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr) 3215 { 3216 int actions_n = 0; 3217 3218 MLX5_ASSERT(mtr); 3219 *mtr = 0; 3220 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3221 switch (actions->type) { 3222 case RTE_FLOW_ACTION_TYPE_METER: 3223 *mtr = 1; 3224 break; 3225 default: 3226 break; 3227 } 3228 actions_n++; 3229 } 3230 /* Count RTE_FLOW_ACTION_TYPE_END. */ 3231 return actions_n + 1; 3232 } 3233 3234 /** 3235 * Check if the flow should be split due to hairpin. 3236 * The reason for the split is that in current HW we can't 3237 * support encap and push-vlan on Rx, so if a flow contains 3238 * these actions we move it to Tx. 3239 * 3240 * @param dev 3241 * Pointer to Ethernet device. 3242 * @param[in] attr 3243 * Flow rule attributes. 3244 * @param[in] actions 3245 * Associated actions (list terminated by the END action). 3246 * 3247 * @return 3248 * > 0 the number of actions and the flow should be split, 3249 * 0 when no split required. 3250 */ 3251 static int 3252 flow_check_hairpin_split(struct rte_eth_dev *dev, 3253 const struct rte_flow_attr *attr, 3254 const struct rte_flow_action actions[]) 3255 { 3256 int queue_action = 0; 3257 int action_n = 0; 3258 int split = 0; 3259 const struct rte_flow_action_queue *queue; 3260 const struct rte_flow_action_rss *rss; 3261 const struct rte_flow_action_raw_encap *raw_encap; 3262 3263 if (!attr->ingress) 3264 return 0; 3265 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3266 switch (actions->type) { 3267 case RTE_FLOW_ACTION_TYPE_QUEUE: 3268 queue = actions->conf; 3269 if (queue == NULL) 3270 return 0; 3271 if (mlx5_rxq_get_type(dev, queue->index) != 3272 MLX5_RXQ_TYPE_HAIRPIN) 3273 return 0; 3274 queue_action = 1; 3275 action_n++; 3276 break; 3277 case RTE_FLOW_ACTION_TYPE_RSS: 3278 rss = actions->conf; 3279 if (rss == NULL || rss->queue_num == 0) 3280 return 0; 3281 if (mlx5_rxq_get_type(dev, rss->queue[0]) != 3282 MLX5_RXQ_TYPE_HAIRPIN) 3283 return 0; 3284 queue_action = 1; 3285 action_n++; 3286 break; 3287 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3288 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3289 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 3290 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 3291 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: 3292 split++; 3293 action_n++; 3294 break; 3295 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3296 raw_encap = actions->conf; 3297 if (raw_encap->size > 3298 (sizeof(struct rte_flow_item_eth) + 3299 sizeof(struct rte_flow_item_ipv4))) 3300 split++; 3301 action_n++; 3302 break; 3303 default: 3304 action_n++; 3305 break; 3306 } 3307 } 3308 if (split && queue_action) 3309 return action_n; 3310 return 0; 3311 } 3312 3313 /* Declare flow create/destroy prototype in advance. */ 3314 static uint32_t 3315 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 3316 const struct rte_flow_attr *attr, 3317 const struct rte_flow_item items[], 3318 const struct rte_flow_action actions[], 3319 bool external, struct rte_flow_error *error); 3320 3321 static void 3322 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 3323 uint32_t flow_idx); 3324 3325 /** 3326 * Add a flow of copying flow metadata registers in RX_CP_TBL. 3327 * 3328 * As mark_id is unique, if there's already a registered flow for the mark_id, 3329 * return by increasing the reference counter of the resource. Otherwise, create 3330 * the resource (mcp_res) and flow. 3331 * 3332 * Flow looks like, 3333 * - If ingress port is ANY and reg_c[1] is mark_id, 3334 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3335 * 3336 * For default flow (zero mark_id), flow is like, 3337 * - If ingress port is ANY, 3338 * reg_b := reg_c[0] and jump to RX_ACT_TBL. 3339 * 3340 * @param dev 3341 * Pointer to Ethernet device. 3342 * @param mark_id 3343 * ID of MARK action, zero means default flow for META. 3344 * @param[out] error 3345 * Perform verbose error reporting if not NULL. 3346 * 3347 * @return 3348 * Associated resource on success, NULL otherwise and rte_errno is set. 3349 */ 3350 static struct mlx5_flow_mreg_copy_resource * 3351 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id, 3352 struct rte_flow_error *error) 3353 { 3354 struct mlx5_priv *priv = dev->data->dev_private; 3355 struct rte_flow_attr attr = { 3356 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 3357 .ingress = 1, 3358 }; 3359 struct mlx5_rte_flow_item_tag tag_spec = { 3360 .data = mark_id, 3361 }; 3362 struct rte_flow_item items[] = { 3363 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, }, 3364 }; 3365 struct rte_flow_action_mark ftag = { 3366 .id = mark_id, 3367 }; 3368 struct mlx5_flow_action_copy_mreg cp_mreg = { 3369 .dst = REG_B, 3370 .src = REG_NON, 3371 }; 3372 struct rte_flow_action_jump jump = { 3373 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 3374 }; 3375 struct rte_flow_action actions[] = { 3376 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, }, 3377 }; 3378 struct mlx5_flow_mreg_copy_resource *mcp_res; 3379 uint32_t idx = 0; 3380 int ret; 3381 3382 /* Fill the register fileds in the flow. */ 3383 ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error); 3384 if (ret < 0) 3385 return NULL; 3386 tag_spec.id = ret; 3387 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 3388 if (ret < 0) 3389 return NULL; 3390 cp_mreg.src = ret; 3391 /* Check if already registered. */ 3392 MLX5_ASSERT(priv->mreg_cp_tbl); 3393 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id); 3394 if (mcp_res) { 3395 /* For non-default rule. */ 3396 if (mark_id != MLX5_DEFAULT_COPY_ID) 3397 mcp_res->refcnt++; 3398 MLX5_ASSERT(mark_id != MLX5_DEFAULT_COPY_ID || 3399 mcp_res->refcnt == 1); 3400 return mcp_res; 3401 } 3402 /* Provide the full width of FLAG specific value. */ 3403 if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT)) 3404 tag_spec.data = MLX5_FLOW_MARK_DEFAULT; 3405 /* Build a new flow. */ 3406 if (mark_id != MLX5_DEFAULT_COPY_ID) { 3407 items[0] = (struct rte_flow_item){ 3408 .type = (enum rte_flow_item_type) 3409 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 3410 .spec = &tag_spec, 3411 }; 3412 items[1] = (struct rte_flow_item){ 3413 .type = RTE_FLOW_ITEM_TYPE_END, 3414 }; 3415 actions[0] = (struct rte_flow_action){ 3416 .type = (enum rte_flow_action_type) 3417 MLX5_RTE_FLOW_ACTION_TYPE_MARK, 3418 .conf = &ftag, 3419 }; 3420 actions[1] = (struct rte_flow_action){ 3421 .type = (enum rte_flow_action_type) 3422 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3423 .conf = &cp_mreg, 3424 }; 3425 actions[2] = (struct rte_flow_action){ 3426 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3427 .conf = &jump, 3428 }; 3429 actions[3] = (struct rte_flow_action){ 3430 .type = RTE_FLOW_ACTION_TYPE_END, 3431 }; 3432 } else { 3433 /* Default rule, wildcard match. */ 3434 attr.priority = MLX5_FLOW_PRIO_RSVD; 3435 items[0] = (struct rte_flow_item){ 3436 .type = RTE_FLOW_ITEM_TYPE_END, 3437 }; 3438 actions[0] = (struct rte_flow_action){ 3439 .type = (enum rte_flow_action_type) 3440 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3441 .conf = &cp_mreg, 3442 }; 3443 actions[1] = (struct rte_flow_action){ 3444 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3445 .conf = &jump, 3446 }; 3447 actions[2] = (struct rte_flow_action){ 3448 .type = RTE_FLOW_ACTION_TYPE_END, 3449 }; 3450 } 3451 /* Build a new entry. */ 3452 mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx); 3453 if (!mcp_res) { 3454 rte_errno = ENOMEM; 3455 return NULL; 3456 } 3457 mcp_res->idx = idx; 3458 /* 3459 * The copy Flows are not included in any list. There 3460 * ones are referenced from other Flows and can not 3461 * be applied, removed, deleted in ardbitrary order 3462 * by list traversing. 3463 */ 3464 mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items, 3465 actions, false, error); 3466 if (!mcp_res->rix_flow) 3467 goto error; 3468 mcp_res->refcnt++; 3469 mcp_res->hlist_ent.key = mark_id; 3470 ret = mlx5_hlist_insert(priv->mreg_cp_tbl, 3471 &mcp_res->hlist_ent); 3472 MLX5_ASSERT(!ret); 3473 if (ret) 3474 goto error; 3475 return mcp_res; 3476 error: 3477 if (mcp_res->rix_flow) 3478 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3479 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3480 return NULL; 3481 } 3482 3483 /** 3484 * Release flow in RX_CP_TBL. 3485 * 3486 * @param dev 3487 * Pointer to Ethernet device. 3488 * @flow 3489 * Parent flow for wich copying is provided. 3490 */ 3491 static void 3492 flow_mreg_del_copy_action(struct rte_eth_dev *dev, 3493 struct rte_flow *flow) 3494 { 3495 struct mlx5_flow_mreg_copy_resource *mcp_res; 3496 struct mlx5_priv *priv = dev->data->dev_private; 3497 3498 if (!flow->rix_mreg_copy) 3499 return; 3500 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3501 flow->rix_mreg_copy); 3502 if (!mcp_res || !priv->mreg_cp_tbl) 3503 return; 3504 if (flow->copy_applied) { 3505 MLX5_ASSERT(mcp_res->appcnt); 3506 flow->copy_applied = 0; 3507 --mcp_res->appcnt; 3508 if (!mcp_res->appcnt) { 3509 struct rte_flow *mcp_flow = mlx5_ipool_get 3510 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3511 mcp_res->rix_flow); 3512 3513 if (mcp_flow) 3514 flow_drv_remove(dev, mcp_flow); 3515 } 3516 } 3517 /* 3518 * We do not check availability of metadata registers here, 3519 * because copy resources are not allocated in this case. 3520 */ 3521 if (--mcp_res->refcnt) 3522 return; 3523 MLX5_ASSERT(mcp_res->rix_flow); 3524 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3525 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 3526 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3527 flow->rix_mreg_copy = 0; 3528 } 3529 3530 /** 3531 * Start flow in RX_CP_TBL. 3532 * 3533 * @param dev 3534 * Pointer to Ethernet device. 3535 * @flow 3536 * Parent flow for wich copying is provided. 3537 * 3538 * @return 3539 * 0 on success, a negative errno value otherwise and rte_errno is set. 3540 */ 3541 static int 3542 flow_mreg_start_copy_action(struct rte_eth_dev *dev, 3543 struct rte_flow *flow) 3544 { 3545 struct mlx5_flow_mreg_copy_resource *mcp_res; 3546 struct mlx5_priv *priv = dev->data->dev_private; 3547 int ret; 3548 3549 if (!flow->rix_mreg_copy || flow->copy_applied) 3550 return 0; 3551 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3552 flow->rix_mreg_copy); 3553 if (!mcp_res) 3554 return 0; 3555 if (!mcp_res->appcnt) { 3556 struct rte_flow *mcp_flow = mlx5_ipool_get 3557 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3558 mcp_res->rix_flow); 3559 3560 if (mcp_flow) { 3561 ret = flow_drv_apply(dev, mcp_flow, NULL); 3562 if (ret) 3563 return ret; 3564 } 3565 } 3566 ++mcp_res->appcnt; 3567 flow->copy_applied = 1; 3568 return 0; 3569 } 3570 3571 /** 3572 * Stop flow in RX_CP_TBL. 3573 * 3574 * @param dev 3575 * Pointer to Ethernet device. 3576 * @flow 3577 * Parent flow for wich copying is provided. 3578 */ 3579 static void 3580 flow_mreg_stop_copy_action(struct rte_eth_dev *dev, 3581 struct rte_flow *flow) 3582 { 3583 struct mlx5_flow_mreg_copy_resource *mcp_res; 3584 struct mlx5_priv *priv = dev->data->dev_private; 3585 3586 if (!flow->rix_mreg_copy || !flow->copy_applied) 3587 return; 3588 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3589 flow->rix_mreg_copy); 3590 if (!mcp_res) 3591 return; 3592 MLX5_ASSERT(mcp_res->appcnt); 3593 --mcp_res->appcnt; 3594 flow->copy_applied = 0; 3595 if (!mcp_res->appcnt) { 3596 struct rte_flow *mcp_flow = mlx5_ipool_get 3597 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3598 mcp_res->rix_flow); 3599 3600 if (mcp_flow) 3601 flow_drv_remove(dev, mcp_flow); 3602 } 3603 } 3604 3605 /** 3606 * Remove the default copy action from RX_CP_TBL. 3607 * 3608 * @param dev 3609 * Pointer to Ethernet device. 3610 */ 3611 static void 3612 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev) 3613 { 3614 struct mlx5_flow_mreg_copy_resource *mcp_res; 3615 struct mlx5_priv *priv = dev->data->dev_private; 3616 3617 /* Check if default flow is registered. */ 3618 if (!priv->mreg_cp_tbl) 3619 return; 3620 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, 3621 MLX5_DEFAULT_COPY_ID); 3622 if (!mcp_res) 3623 return; 3624 MLX5_ASSERT(mcp_res->rix_flow); 3625 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3626 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 3627 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3628 } 3629 3630 /** 3631 * Add the default copy action in in RX_CP_TBL. 3632 * 3633 * @param dev 3634 * Pointer to Ethernet device. 3635 * @param[out] error 3636 * Perform verbose error reporting if not NULL. 3637 * 3638 * @return 3639 * 0 for success, negative value otherwise and rte_errno is set. 3640 */ 3641 static int 3642 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev, 3643 struct rte_flow_error *error) 3644 { 3645 struct mlx5_priv *priv = dev->data->dev_private; 3646 struct mlx5_flow_mreg_copy_resource *mcp_res; 3647 3648 /* Check whether extensive metadata feature is engaged. */ 3649 if (!priv->config.dv_flow_en || 3650 priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3651 !mlx5_flow_ext_mreg_supported(dev) || 3652 !priv->sh->dv_regc0_mask) 3653 return 0; 3654 mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error); 3655 if (!mcp_res) 3656 return -rte_errno; 3657 return 0; 3658 } 3659 3660 /** 3661 * Add a flow of copying flow metadata registers in RX_CP_TBL. 3662 * 3663 * All the flow having Q/RSS action should be split by 3664 * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL 3665 * performs the following, 3666 * - CQE->flow_tag := reg_c[1] (MARK) 3667 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 3668 * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1] 3669 * but there should be a flow per each MARK ID set by MARK action. 3670 * 3671 * For the aforementioned reason, if there's a MARK action in flow's action 3672 * list, a corresponding flow should be added to the RX_CP_TBL in order to copy 3673 * the MARK ID to CQE's flow_tag like, 3674 * - If reg_c[1] is mark_id, 3675 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3676 * 3677 * For SET_META action which stores value in reg_c[0], as the destination is 3678 * also a flow metadata register (reg_b), adding a default flow is enough. Zero 3679 * MARK ID means the default flow. The default flow looks like, 3680 * - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3681 * 3682 * @param dev 3683 * Pointer to Ethernet device. 3684 * @param flow 3685 * Pointer to flow structure. 3686 * @param[in] actions 3687 * Pointer to the list of actions. 3688 * @param[out] error 3689 * Perform verbose error reporting if not NULL. 3690 * 3691 * @return 3692 * 0 on success, negative value otherwise and rte_errno is set. 3693 */ 3694 static int 3695 flow_mreg_update_copy_table(struct rte_eth_dev *dev, 3696 struct rte_flow *flow, 3697 const struct rte_flow_action *actions, 3698 struct rte_flow_error *error) 3699 { 3700 struct mlx5_priv *priv = dev->data->dev_private; 3701 struct mlx5_dev_config *config = &priv->config; 3702 struct mlx5_flow_mreg_copy_resource *mcp_res; 3703 const struct rte_flow_action_mark *mark; 3704 3705 /* Check whether extensive metadata feature is engaged. */ 3706 if (!config->dv_flow_en || 3707 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3708 !mlx5_flow_ext_mreg_supported(dev) || 3709 !priv->sh->dv_regc0_mask) 3710 return 0; 3711 /* Find MARK action. */ 3712 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3713 switch (actions->type) { 3714 case RTE_FLOW_ACTION_TYPE_FLAG: 3715 mcp_res = flow_mreg_add_copy_action 3716 (dev, MLX5_FLOW_MARK_DEFAULT, error); 3717 if (!mcp_res) 3718 return -rte_errno; 3719 flow->rix_mreg_copy = mcp_res->idx; 3720 if (dev->data->dev_started) { 3721 mcp_res->appcnt++; 3722 flow->copy_applied = 1; 3723 } 3724 return 0; 3725 case RTE_FLOW_ACTION_TYPE_MARK: 3726 mark = (const struct rte_flow_action_mark *) 3727 actions->conf; 3728 mcp_res = 3729 flow_mreg_add_copy_action(dev, mark->id, error); 3730 if (!mcp_res) 3731 return -rte_errno; 3732 flow->rix_mreg_copy = mcp_res->idx; 3733 if (dev->data->dev_started) { 3734 mcp_res->appcnt++; 3735 flow->copy_applied = 1; 3736 } 3737 return 0; 3738 default: 3739 break; 3740 } 3741 } 3742 return 0; 3743 } 3744 3745 #define MLX5_MAX_SPLIT_ACTIONS 24 3746 #define MLX5_MAX_SPLIT_ITEMS 24 3747 3748 /** 3749 * Split the hairpin flow. 3750 * Since HW can't support encap and push-vlan on Rx, we move these 3751 * actions to Tx. 3752 * If the count action is after the encap then we also 3753 * move the count action. in this case the count will also measure 3754 * the outer bytes. 3755 * 3756 * @param dev 3757 * Pointer to Ethernet device. 3758 * @param[in] actions 3759 * Associated actions (list terminated by the END action). 3760 * @param[out] actions_rx 3761 * Rx flow actions. 3762 * @param[out] actions_tx 3763 * Tx flow actions.. 3764 * @param[out] pattern_tx 3765 * The pattern items for the Tx flow. 3766 * @param[out] flow_id 3767 * The flow ID connected to this flow. 3768 * 3769 * @return 3770 * 0 on success. 3771 */ 3772 static int 3773 flow_hairpin_split(struct rte_eth_dev *dev, 3774 const struct rte_flow_action actions[], 3775 struct rte_flow_action actions_rx[], 3776 struct rte_flow_action actions_tx[], 3777 struct rte_flow_item pattern_tx[], 3778 uint32_t *flow_id) 3779 { 3780 struct mlx5_priv *priv = dev->data->dev_private; 3781 const struct rte_flow_action_raw_encap *raw_encap; 3782 const struct rte_flow_action_raw_decap *raw_decap; 3783 struct mlx5_rte_flow_action_set_tag *set_tag; 3784 struct rte_flow_action *tag_action; 3785 struct mlx5_rte_flow_item_tag *tag_item; 3786 struct rte_flow_item *item; 3787 char *addr; 3788 int encap = 0; 3789 3790 mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id); 3791 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3792 switch (actions->type) { 3793 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3794 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3795 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 3796 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 3797 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: 3798 rte_memcpy(actions_tx, actions, 3799 sizeof(struct rte_flow_action)); 3800 actions_tx++; 3801 break; 3802 case RTE_FLOW_ACTION_TYPE_COUNT: 3803 if (encap) { 3804 rte_memcpy(actions_tx, actions, 3805 sizeof(struct rte_flow_action)); 3806 actions_tx++; 3807 } else { 3808 rte_memcpy(actions_rx, actions, 3809 sizeof(struct rte_flow_action)); 3810 actions_rx++; 3811 } 3812 break; 3813 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3814 raw_encap = actions->conf; 3815 if (raw_encap->size > 3816 (sizeof(struct rte_flow_item_eth) + 3817 sizeof(struct rte_flow_item_ipv4))) { 3818 memcpy(actions_tx, actions, 3819 sizeof(struct rte_flow_action)); 3820 actions_tx++; 3821 encap = 1; 3822 } else { 3823 rte_memcpy(actions_rx, actions, 3824 sizeof(struct rte_flow_action)); 3825 actions_rx++; 3826 } 3827 break; 3828 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3829 raw_decap = actions->conf; 3830 if (raw_decap->size < 3831 (sizeof(struct rte_flow_item_eth) + 3832 sizeof(struct rte_flow_item_ipv4))) { 3833 memcpy(actions_tx, actions, 3834 sizeof(struct rte_flow_action)); 3835 actions_tx++; 3836 } else { 3837 rte_memcpy(actions_rx, actions, 3838 sizeof(struct rte_flow_action)); 3839 actions_rx++; 3840 } 3841 break; 3842 default: 3843 rte_memcpy(actions_rx, actions, 3844 sizeof(struct rte_flow_action)); 3845 actions_rx++; 3846 break; 3847 } 3848 } 3849 /* Add set meta action and end action for the Rx flow. */ 3850 tag_action = actions_rx; 3851 tag_action->type = (enum rte_flow_action_type) 3852 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 3853 actions_rx++; 3854 rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action)); 3855 actions_rx++; 3856 set_tag = (void *)actions_rx; 3857 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL); 3858 MLX5_ASSERT(set_tag->id > REG_NON); 3859 set_tag->data = *flow_id; 3860 tag_action->conf = set_tag; 3861 /* Create Tx item list. */ 3862 rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action)); 3863 addr = (void *)&pattern_tx[2]; 3864 item = pattern_tx; 3865 item->type = (enum rte_flow_item_type) 3866 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 3867 tag_item = (void *)addr; 3868 tag_item->data = *flow_id; 3869 tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL); 3870 MLX5_ASSERT(set_tag->id > REG_NON); 3871 item->spec = tag_item; 3872 addr += sizeof(struct mlx5_rte_flow_item_tag); 3873 tag_item = (void *)addr; 3874 tag_item->data = UINT32_MAX; 3875 tag_item->id = UINT16_MAX; 3876 item->mask = tag_item; 3877 item->last = NULL; 3878 item++; 3879 item->type = RTE_FLOW_ITEM_TYPE_END; 3880 return 0; 3881 } 3882 3883 /** 3884 * The last stage of splitting chain, just creates the subflow 3885 * without any modification. 3886 * 3887 * @param[in] dev 3888 * Pointer to Ethernet device. 3889 * @param[in] flow 3890 * Parent flow structure pointer. 3891 * @param[in, out] sub_flow 3892 * Pointer to return the created subflow, may be NULL. 3893 * @param[in] prefix_layers 3894 * Prefix subflow layers, may be 0. 3895 * @param[in] prefix_mark 3896 * Prefix subflow mark flag, may be 0. 3897 * @param[in] attr 3898 * Flow rule attributes. 3899 * @param[in] items 3900 * Pattern specification (list terminated by the END pattern item). 3901 * @param[in] actions 3902 * Associated actions (list terminated by the END action). 3903 * @param[in] external 3904 * This flow rule is created by request external to PMD. 3905 * @param[in] flow_idx 3906 * This memory pool index to the flow. 3907 * @param[out] error 3908 * Perform verbose error reporting if not NULL. 3909 * @return 3910 * 0 on success, negative value otherwise 3911 */ 3912 static int 3913 flow_create_split_inner(struct rte_eth_dev *dev, 3914 struct rte_flow *flow, 3915 struct mlx5_flow **sub_flow, 3916 uint64_t prefix_layers, 3917 uint32_t prefix_mark, 3918 const struct rte_flow_attr *attr, 3919 const struct rte_flow_item items[], 3920 const struct rte_flow_action actions[], 3921 bool external, uint32_t flow_idx, 3922 struct rte_flow_error *error) 3923 { 3924 struct mlx5_flow *dev_flow; 3925 3926 dev_flow = flow_drv_prepare(dev, flow, attr, items, actions, 3927 flow_idx, error); 3928 if (!dev_flow) 3929 return -rte_errno; 3930 dev_flow->flow = flow; 3931 dev_flow->external = external; 3932 /* Subflow object was created, we must include one in the list. */ 3933 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 3934 dev_flow->handle, next); 3935 /* 3936 * If dev_flow is as one of the suffix flow, some actions in suffix 3937 * flow may need some user defined item layer flags, and pass the 3938 * Metadate rxq mark flag to suffix flow as well. 3939 */ 3940 if (prefix_layers) 3941 dev_flow->handle->layers = prefix_layers; 3942 if (prefix_mark) 3943 dev_flow->handle->mark = 1; 3944 if (sub_flow) 3945 *sub_flow = dev_flow; 3946 return flow_drv_translate(dev, dev_flow, attr, items, actions, error); 3947 } 3948 3949 /** 3950 * Split the meter flow. 3951 * 3952 * As meter flow will split to three sub flow, other than meter 3953 * action, the other actions make sense to only meter accepts 3954 * the packet. If it need to be dropped, no other additional 3955 * actions should be take. 3956 * 3957 * One kind of special action which decapsulates the L3 tunnel 3958 * header will be in the prefix sub flow, as not to take the 3959 * L3 tunnel header into account. 3960 * 3961 * @param dev 3962 * Pointer to Ethernet device. 3963 * @param[in] items 3964 * Pattern specification (list terminated by the END pattern item). 3965 * @param[out] sfx_items 3966 * Suffix flow match items (list terminated by the END pattern item). 3967 * @param[in] actions 3968 * Associated actions (list terminated by the END action). 3969 * @param[out] actions_sfx 3970 * Suffix flow actions. 3971 * @param[out] actions_pre 3972 * Prefix flow actions. 3973 * @param[out] pattern_sfx 3974 * The pattern items for the suffix flow. 3975 * @param[out] tag_sfx 3976 * Pointer to suffix flow tag. 3977 * 3978 * @return 3979 * 0 on success. 3980 */ 3981 static int 3982 flow_meter_split_prep(struct rte_eth_dev *dev, 3983 const struct rte_flow_item items[], 3984 struct rte_flow_item sfx_items[], 3985 const struct rte_flow_action actions[], 3986 struct rte_flow_action actions_sfx[], 3987 struct rte_flow_action actions_pre[]) 3988 { 3989 struct rte_flow_action *tag_action = NULL; 3990 struct rte_flow_item *tag_item; 3991 struct mlx5_rte_flow_action_set_tag *set_tag; 3992 struct rte_flow_error error; 3993 const struct rte_flow_action_raw_encap *raw_encap; 3994 const struct rte_flow_action_raw_decap *raw_decap; 3995 struct mlx5_rte_flow_item_tag *tag_spec; 3996 struct mlx5_rte_flow_item_tag *tag_mask; 3997 uint32_t tag_id; 3998 bool copy_vlan = false; 3999 4000 /* Prepare the actions for prefix and suffix flow. */ 4001 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 4002 struct rte_flow_action **action_cur = NULL; 4003 4004 switch (actions->type) { 4005 case RTE_FLOW_ACTION_TYPE_METER: 4006 /* Add the extra tag action first. */ 4007 tag_action = actions_pre; 4008 tag_action->type = (enum rte_flow_action_type) 4009 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 4010 actions_pre++; 4011 action_cur = &actions_pre; 4012 break; 4013 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: 4014 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP: 4015 action_cur = &actions_pre; 4016 break; 4017 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 4018 raw_encap = actions->conf; 4019 if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE) 4020 action_cur = &actions_pre; 4021 break; 4022 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 4023 raw_decap = actions->conf; 4024 if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 4025 action_cur = &actions_pre; 4026 break; 4027 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 4028 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 4029 copy_vlan = true; 4030 break; 4031 default: 4032 break; 4033 } 4034 if (!action_cur) 4035 action_cur = &actions_sfx; 4036 memcpy(*action_cur, actions, sizeof(struct rte_flow_action)); 4037 (*action_cur)++; 4038 } 4039 /* Add end action to the actions. */ 4040 actions_sfx->type = RTE_FLOW_ACTION_TYPE_END; 4041 actions_pre->type = RTE_FLOW_ACTION_TYPE_END; 4042 actions_pre++; 4043 /* Set the tag. */ 4044 set_tag = (void *)actions_pre; 4045 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); 4046 /* 4047 * Get the id from the qrss_pool to make qrss share the id with meter. 4048 */ 4049 tag_id = flow_qrss_get_id(dev); 4050 set_tag->data = tag_id << MLX5_MTR_COLOR_BITS; 4051 assert(tag_action); 4052 tag_action->conf = set_tag; 4053 /* Prepare the suffix subflow items. */ 4054 tag_item = sfx_items++; 4055 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { 4056 int item_type = items->type; 4057 4058 switch (item_type) { 4059 case RTE_FLOW_ITEM_TYPE_PORT_ID: 4060 memcpy(sfx_items, items, sizeof(*sfx_items)); 4061 sfx_items++; 4062 break; 4063 case RTE_FLOW_ITEM_TYPE_VLAN: 4064 if (copy_vlan) { 4065 memcpy(sfx_items, items, sizeof(*sfx_items)); 4066 /* 4067 * Convert to internal match item, it is used 4068 * for vlan push and set vid. 4069 */ 4070 sfx_items->type = (enum rte_flow_item_type) 4071 MLX5_RTE_FLOW_ITEM_TYPE_VLAN; 4072 sfx_items++; 4073 } 4074 break; 4075 default: 4076 break; 4077 } 4078 } 4079 sfx_items->type = RTE_FLOW_ITEM_TYPE_END; 4080 sfx_items++; 4081 tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items; 4082 tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS; 4083 tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); 4084 tag_mask = tag_spec + 1; 4085 tag_mask->data = 0xffffff00; 4086 tag_item->type = (enum rte_flow_item_type) 4087 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 4088 tag_item->spec = tag_spec; 4089 tag_item->last = NULL; 4090 tag_item->mask = tag_mask; 4091 return tag_id; 4092 } 4093 4094 /** 4095 * Split action list having QUEUE/RSS for metadata register copy. 4096 * 4097 * Once Q/RSS action is detected in user's action list, the flow action 4098 * should be split in order to copy metadata registers, which will happen in 4099 * RX_CP_TBL like, 4100 * - CQE->flow_tag := reg_c[1] (MARK) 4101 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 4102 * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL. 4103 * This is because the last action of each flow must be a terminal action 4104 * (QUEUE, RSS or DROP). 4105 * 4106 * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is 4107 * stored and kept in the mlx5_flow structure per each sub_flow. 4108 * 4109 * The Q/RSS action is replaced with, 4110 * - SET_TAG, setting the allocated flow ID to reg_c[2]. 4111 * And the following JUMP action is added at the end, 4112 * - JUMP, to RX_CP_TBL. 4113 * 4114 * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by 4115 * flow_create_split_metadata() routine. The flow will look like, 4116 * - If flow ID matches (reg_c[2]), perform Q/RSS. 4117 * 4118 * @param dev 4119 * Pointer to Ethernet device. 4120 * @param[out] split_actions 4121 * Pointer to store split actions to jump to CP_TBL. 4122 * @param[in] actions 4123 * Pointer to the list of original flow actions. 4124 * @param[in] qrss 4125 * Pointer to the Q/RSS action. 4126 * @param[in] actions_n 4127 * Number of original actions. 4128 * @param[out] error 4129 * Perform verbose error reporting if not NULL. 4130 * 4131 * @return 4132 * non-zero unique flow_id on success, otherwise 0 and 4133 * error/rte_error are set. 4134 */ 4135 static uint32_t 4136 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev, 4137 struct rte_flow_action *split_actions, 4138 const struct rte_flow_action *actions, 4139 const struct rte_flow_action *qrss, 4140 int actions_n, struct rte_flow_error *error) 4141 { 4142 struct mlx5_rte_flow_action_set_tag *set_tag; 4143 struct rte_flow_action_jump *jump; 4144 const int qrss_idx = qrss - actions; 4145 uint32_t flow_id = 0; 4146 int ret = 0; 4147 4148 /* 4149 * Given actions will be split 4150 * - Replace QUEUE/RSS action with SET_TAG to set flow ID. 4151 * - Add jump to mreg CP_TBL. 4152 * As a result, there will be one more action. 4153 */ 4154 ++actions_n; 4155 memcpy(split_actions, actions, sizeof(*split_actions) * actions_n); 4156 set_tag = (void *)(split_actions + actions_n); 4157 /* 4158 * If tag action is not set to void(it means we are not the meter 4159 * suffix flow), add the tag action. Since meter suffix flow already 4160 * has the tag added. 4161 */ 4162 if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) { 4163 /* 4164 * Allocate the new subflow ID. This one is unique within 4165 * device and not shared with representors. Otherwise, 4166 * we would have to resolve multi-thread access synch 4167 * issue. Each flow on the shared device is appended 4168 * with source vport identifier, so the resulting 4169 * flows will be unique in the shared (by master and 4170 * representors) domain even if they have coinciding 4171 * IDs. 4172 */ 4173 flow_id = flow_qrss_get_id(dev); 4174 if (!flow_id) 4175 return rte_flow_error_set(error, ENOMEM, 4176 RTE_FLOW_ERROR_TYPE_ACTION, 4177 NULL, "can't allocate id " 4178 "for split Q/RSS subflow"); 4179 /* Internal SET_TAG action to set flow ID. */ 4180 *set_tag = (struct mlx5_rte_flow_action_set_tag){ 4181 .data = flow_id, 4182 }; 4183 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error); 4184 if (ret < 0) 4185 return ret; 4186 set_tag->id = ret; 4187 /* Construct new actions array. */ 4188 /* Replace QUEUE/RSS action. */ 4189 split_actions[qrss_idx] = (struct rte_flow_action){ 4190 .type = (enum rte_flow_action_type) 4191 MLX5_RTE_FLOW_ACTION_TYPE_TAG, 4192 .conf = set_tag, 4193 }; 4194 } 4195 /* JUMP action to jump to mreg copy table (CP_TBL). */ 4196 jump = (void *)(set_tag + 1); 4197 *jump = (struct rte_flow_action_jump){ 4198 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 4199 }; 4200 split_actions[actions_n - 2] = (struct rte_flow_action){ 4201 .type = RTE_FLOW_ACTION_TYPE_JUMP, 4202 .conf = jump, 4203 }; 4204 split_actions[actions_n - 1] = (struct rte_flow_action){ 4205 .type = RTE_FLOW_ACTION_TYPE_END, 4206 }; 4207 return flow_id; 4208 } 4209 4210 /** 4211 * Extend the given action list for Tx metadata copy. 4212 * 4213 * Copy the given action list to the ext_actions and add flow metadata register 4214 * copy action in order to copy reg_a set by WQE to reg_c[0]. 4215 * 4216 * @param[out] ext_actions 4217 * Pointer to the extended action list. 4218 * @param[in] actions 4219 * Pointer to the list of actions. 4220 * @param[in] actions_n 4221 * Number of actions in the list. 4222 * @param[out] error 4223 * Perform verbose error reporting if not NULL. 4224 * @param[in] encap_idx 4225 * The encap action inndex. 4226 * 4227 * @return 4228 * 0 on success, negative value otherwise 4229 */ 4230 static int 4231 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev, 4232 struct rte_flow_action *ext_actions, 4233 const struct rte_flow_action *actions, 4234 int actions_n, struct rte_flow_error *error, 4235 int encap_idx) 4236 { 4237 struct mlx5_flow_action_copy_mreg *cp_mreg = 4238 (struct mlx5_flow_action_copy_mreg *) 4239 (ext_actions + actions_n + 1); 4240 int ret; 4241 4242 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 4243 if (ret < 0) 4244 return ret; 4245 cp_mreg->dst = ret; 4246 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error); 4247 if (ret < 0) 4248 return ret; 4249 cp_mreg->src = ret; 4250 if (encap_idx != 0) 4251 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx); 4252 if (encap_idx == actions_n - 1) { 4253 ext_actions[actions_n - 1] = (struct rte_flow_action){ 4254 .type = (enum rte_flow_action_type) 4255 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 4256 .conf = cp_mreg, 4257 }; 4258 ext_actions[actions_n] = (struct rte_flow_action){ 4259 .type = RTE_FLOW_ACTION_TYPE_END, 4260 }; 4261 } else { 4262 ext_actions[encap_idx] = (struct rte_flow_action){ 4263 .type = (enum rte_flow_action_type) 4264 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 4265 .conf = cp_mreg, 4266 }; 4267 memcpy(ext_actions + encap_idx + 1, actions + encap_idx, 4268 sizeof(*ext_actions) * (actions_n - encap_idx)); 4269 } 4270 return 0; 4271 } 4272 4273 /** 4274 * Check the match action from the action list. 4275 * 4276 * @param[in] actions 4277 * Pointer to the list of actions. 4278 * @param[in] attr 4279 * Flow rule attributes. 4280 * @param[in] action 4281 * The action to be check if exist. 4282 * @param[out] match_action_pos 4283 * Pointer to the position of the matched action if exists, otherwise is -1. 4284 * @param[out] qrss_action_pos 4285 * Pointer to the position of the Queue/RSS action if exists, otherwise is -1. 4286 * 4287 * @return 4288 * > 0 the total number of actions. 4289 * 0 if not found match action in action list. 4290 */ 4291 static int 4292 flow_check_match_action(const struct rte_flow_action actions[], 4293 const struct rte_flow_attr *attr, 4294 enum rte_flow_action_type action, 4295 int *match_action_pos, int *qrss_action_pos) 4296 { 4297 const struct rte_flow_action_sample *sample; 4298 int actions_n = 0; 4299 int jump_flag = 0; 4300 uint32_t ratio = 0; 4301 int sub_type = 0; 4302 int flag = 0; 4303 4304 *match_action_pos = -1; 4305 *qrss_action_pos = -1; 4306 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 4307 if (actions->type == action) { 4308 flag = 1; 4309 *match_action_pos = actions_n; 4310 } 4311 if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE || 4312 actions->type == RTE_FLOW_ACTION_TYPE_RSS) 4313 *qrss_action_pos = actions_n; 4314 if (actions->type == RTE_FLOW_ACTION_TYPE_JUMP) 4315 jump_flag = 1; 4316 if (actions->type == RTE_FLOW_ACTION_TYPE_SAMPLE) { 4317 sample = actions->conf; 4318 ratio = sample->ratio; 4319 sub_type = ((const struct rte_flow_action *) 4320 (sample->actions))->type; 4321 } 4322 actions_n++; 4323 } 4324 if (flag && action == RTE_FLOW_ACTION_TYPE_SAMPLE && attr->transfer) { 4325 if (ratio == 1) { 4326 /* JUMP Action not support for Mirroring; 4327 * Mirroring support multi-destination; 4328 */ 4329 if (!jump_flag && sub_type != RTE_FLOW_ACTION_TYPE_END) 4330 flag = 0; 4331 } 4332 } 4333 /* Count RTE_FLOW_ACTION_TYPE_END. */ 4334 return flag ? actions_n + 1 : 0; 4335 } 4336 4337 #define SAMPLE_SUFFIX_ITEM 2 4338 4339 /** 4340 * Split the sample flow. 4341 * 4342 * As sample flow will split to two sub flow, sample flow with 4343 * sample action, the other actions will move to new suffix flow. 4344 * 4345 * Also add unique tag id with tag action in the sample flow, 4346 * the same tag id will be as match in the suffix flow. 4347 * 4348 * @param dev 4349 * Pointer to Ethernet device. 4350 * @param[in] fdb_tx 4351 * FDB egress flow flag. 4352 * @param[out] sfx_items 4353 * Suffix flow match items (list terminated by the END pattern item). 4354 * @param[in] actions 4355 * Associated actions (list terminated by the END action). 4356 * @param[out] actions_sfx 4357 * Suffix flow actions. 4358 * @param[out] actions_pre 4359 * Prefix flow actions. 4360 * @param[in] actions_n 4361 * The total number of actions. 4362 * @param[in] sample_action_pos 4363 * The sample action position. 4364 * @param[in] qrss_action_pos 4365 * The Queue/RSS action position. 4366 * @param[out] error 4367 * Perform verbose error reporting if not NULL. 4368 * 4369 * @return 4370 * 0 on success, or unique flow_id, a negative errno value 4371 * otherwise and rte_errno is set. 4372 */ 4373 static int 4374 flow_sample_split_prep(struct rte_eth_dev *dev, 4375 uint32_t fdb_tx, 4376 struct rte_flow_item sfx_items[], 4377 const struct rte_flow_action actions[], 4378 struct rte_flow_action actions_sfx[], 4379 struct rte_flow_action actions_pre[], 4380 int actions_n, 4381 int sample_action_pos, 4382 int qrss_action_pos, 4383 struct rte_flow_error *error) 4384 { 4385 struct mlx5_rte_flow_action_set_tag *set_tag; 4386 struct mlx5_rte_flow_item_tag *tag_spec; 4387 struct mlx5_rte_flow_item_tag *tag_mask; 4388 uint32_t tag_id = 0; 4389 int index; 4390 int ret; 4391 4392 if (sample_action_pos < 0) 4393 return rte_flow_error_set(error, EINVAL, 4394 RTE_FLOW_ERROR_TYPE_ACTION, 4395 NULL, "invalid position of sample " 4396 "action in list"); 4397 if (!fdb_tx) { 4398 /* Prepare the prefix tag action. */ 4399 set_tag = (void *)(actions_pre + actions_n + 1); 4400 ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error); 4401 if (ret < 0) 4402 return ret; 4403 set_tag->id = ret; 4404 tag_id = flow_qrss_get_id(dev); 4405 set_tag->data = tag_id; 4406 /* Prepare the suffix subflow items. */ 4407 tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM); 4408 tag_spec->data = tag_id; 4409 tag_spec->id = set_tag->id; 4410 tag_mask = tag_spec + 1; 4411 tag_mask->data = UINT32_MAX; 4412 sfx_items[0] = (struct rte_flow_item){ 4413 .type = (enum rte_flow_item_type) 4414 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 4415 .spec = tag_spec, 4416 .last = NULL, 4417 .mask = tag_mask, 4418 }; 4419 sfx_items[1] = (struct rte_flow_item){ 4420 .type = (enum rte_flow_item_type) 4421 RTE_FLOW_ITEM_TYPE_END, 4422 }; 4423 } 4424 /* Prepare the actions for prefix and suffix flow. */ 4425 if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) { 4426 index = qrss_action_pos; 4427 /* Put the preceding the Queue/RSS action into prefix flow. */ 4428 if (index != 0) 4429 memcpy(actions_pre, actions, 4430 sizeof(struct rte_flow_action) * index); 4431 /* Put others preceding the sample action into prefix flow. */ 4432 if (sample_action_pos > index + 1) 4433 memcpy(actions_pre + index, actions + index + 1, 4434 sizeof(struct rte_flow_action) * 4435 (sample_action_pos - index - 1)); 4436 index = sample_action_pos - 1; 4437 /* Put Queue/RSS action into Suffix flow. */ 4438 memcpy(actions_sfx, actions + qrss_action_pos, 4439 sizeof(struct rte_flow_action)); 4440 actions_sfx++; 4441 } else { 4442 index = sample_action_pos; 4443 if (index != 0) 4444 memcpy(actions_pre, actions, 4445 sizeof(struct rte_flow_action) * index); 4446 } 4447 /* Add the extra tag action for NIC-RX and E-Switch ingress. */ 4448 if (!fdb_tx) { 4449 actions_pre[index++] = 4450 (struct rte_flow_action){ 4451 .type = (enum rte_flow_action_type) 4452 MLX5_RTE_FLOW_ACTION_TYPE_TAG, 4453 .conf = set_tag, 4454 }; 4455 } 4456 memcpy(actions_pre + index, actions + sample_action_pos, 4457 sizeof(struct rte_flow_action)); 4458 index += 1; 4459 actions_pre[index] = (struct rte_flow_action){ 4460 .type = (enum rte_flow_action_type) 4461 RTE_FLOW_ACTION_TYPE_END, 4462 }; 4463 /* Put the actions after sample into Suffix flow. */ 4464 memcpy(actions_sfx, actions + sample_action_pos + 1, 4465 sizeof(struct rte_flow_action) * 4466 (actions_n - sample_action_pos - 1)); 4467 return tag_id; 4468 } 4469 4470 /** 4471 * The splitting for metadata feature. 4472 * 4473 * - Q/RSS action on NIC Rx should be split in order to pass by 4474 * the mreg copy table (RX_CP_TBL) and then it jumps to the 4475 * action table (RX_ACT_TBL) which has the split Q/RSS action. 4476 * 4477 * - All the actions on NIC Tx should have a mreg copy action to 4478 * copy reg_a from WQE to reg_c[0]. 4479 * 4480 * @param dev 4481 * Pointer to Ethernet device. 4482 * @param[in] flow 4483 * Parent flow structure pointer. 4484 * @param[in] prefix_layers 4485 * Prefix flow layer flags. 4486 * @param[in] prefix_mark 4487 * Prefix subflow mark flag, may be 0. 4488 * @param[in] attr 4489 * Flow rule attributes. 4490 * @param[in] items 4491 * Pattern specification (list terminated by the END pattern item). 4492 * @param[in] actions 4493 * Associated actions (list terminated by the END action). 4494 * @param[in] external 4495 * This flow rule is created by request external to PMD. 4496 * @param[in] flow_idx 4497 * This memory pool index to the flow. 4498 * @param[out] error 4499 * Perform verbose error reporting if not NULL. 4500 * @return 4501 * 0 on success, negative value otherwise 4502 */ 4503 static int 4504 flow_create_split_metadata(struct rte_eth_dev *dev, 4505 struct rte_flow *flow, 4506 uint64_t prefix_layers, 4507 uint32_t prefix_mark, 4508 const struct rte_flow_attr *attr, 4509 const struct rte_flow_item items[], 4510 const struct rte_flow_action actions[], 4511 bool external, uint32_t flow_idx, 4512 struct rte_flow_error *error) 4513 { 4514 struct mlx5_priv *priv = dev->data->dev_private; 4515 struct mlx5_dev_config *config = &priv->config; 4516 const struct rte_flow_action *qrss = NULL; 4517 struct rte_flow_action *ext_actions = NULL; 4518 struct mlx5_flow *dev_flow = NULL; 4519 uint32_t qrss_id = 0; 4520 int mtr_sfx = 0; 4521 size_t act_size; 4522 int actions_n; 4523 int encap_idx; 4524 int ret; 4525 4526 /* Check whether extensive metadata feature is engaged. */ 4527 if (!config->dv_flow_en || 4528 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 4529 !mlx5_flow_ext_mreg_supported(dev)) 4530 return flow_create_split_inner(dev, flow, NULL, prefix_layers, 4531 prefix_mark, attr, items, 4532 actions, external, flow_idx, 4533 error); 4534 actions_n = flow_parse_metadata_split_actions_info(actions, &qrss, 4535 &encap_idx); 4536 if (qrss) { 4537 /* Exclude hairpin flows from splitting. */ 4538 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) { 4539 const struct rte_flow_action_queue *queue; 4540 4541 queue = qrss->conf; 4542 if (mlx5_rxq_get_type(dev, queue->index) == 4543 MLX5_RXQ_TYPE_HAIRPIN) 4544 qrss = NULL; 4545 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) { 4546 const struct rte_flow_action_rss *rss; 4547 4548 rss = qrss->conf; 4549 if (mlx5_rxq_get_type(dev, rss->queue[0]) == 4550 MLX5_RXQ_TYPE_HAIRPIN) 4551 qrss = NULL; 4552 } 4553 } 4554 if (qrss) { 4555 /* Check if it is in meter suffix table. */ 4556 mtr_sfx = attr->group == (attr->transfer ? 4557 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : 4558 MLX5_FLOW_TABLE_LEVEL_SUFFIX); 4559 /* 4560 * Q/RSS action on NIC Rx should be split in order to pass by 4561 * the mreg copy table (RX_CP_TBL) and then it jumps to the 4562 * action table (RX_ACT_TBL) which has the split Q/RSS action. 4563 */ 4564 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 4565 sizeof(struct rte_flow_action_set_tag) + 4566 sizeof(struct rte_flow_action_jump); 4567 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0, 4568 SOCKET_ID_ANY); 4569 if (!ext_actions) 4570 return rte_flow_error_set(error, ENOMEM, 4571 RTE_FLOW_ERROR_TYPE_ACTION, 4572 NULL, "no memory to split " 4573 "metadata flow"); 4574 /* 4575 * If we are the suffix flow of meter, tag already exist. 4576 * Set the tag action to void. 4577 */ 4578 if (mtr_sfx) 4579 ext_actions[qrss - actions].type = 4580 RTE_FLOW_ACTION_TYPE_VOID; 4581 else 4582 ext_actions[qrss - actions].type = 4583 (enum rte_flow_action_type) 4584 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 4585 /* 4586 * Create the new actions list with removed Q/RSS action 4587 * and appended set tag and jump to register copy table 4588 * (RX_CP_TBL). We should preallocate unique tag ID here 4589 * in advance, because it is needed for set tag action. 4590 */ 4591 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions, 4592 qrss, actions_n, error); 4593 if (!mtr_sfx && !qrss_id) { 4594 ret = -rte_errno; 4595 goto exit; 4596 } 4597 } else if (attr->egress && !attr->transfer) { 4598 /* 4599 * All the actions on NIC Tx should have a metadata register 4600 * copy action to copy reg_a from WQE to reg_c[meta] 4601 */ 4602 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 4603 sizeof(struct mlx5_flow_action_copy_mreg); 4604 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0, 4605 SOCKET_ID_ANY); 4606 if (!ext_actions) 4607 return rte_flow_error_set(error, ENOMEM, 4608 RTE_FLOW_ERROR_TYPE_ACTION, 4609 NULL, "no memory to split " 4610 "metadata flow"); 4611 /* Create the action list appended with copy register. */ 4612 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions, 4613 actions_n, error, encap_idx); 4614 if (ret < 0) 4615 goto exit; 4616 } 4617 /* Add the unmodified original or prefix subflow. */ 4618 ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, 4619 prefix_mark, attr, 4620 items, ext_actions ? ext_actions : 4621 actions, external, flow_idx, error); 4622 if (ret < 0) 4623 goto exit; 4624 MLX5_ASSERT(dev_flow); 4625 if (qrss) { 4626 const struct rte_flow_attr q_attr = { 4627 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 4628 .ingress = 1, 4629 }; 4630 /* Internal PMD action to set register. */ 4631 struct mlx5_rte_flow_item_tag q_tag_spec = { 4632 .data = qrss_id, 4633 .id = REG_NON, 4634 }; 4635 struct rte_flow_item q_items[] = { 4636 { 4637 .type = (enum rte_flow_item_type) 4638 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 4639 .spec = &q_tag_spec, 4640 .last = NULL, 4641 .mask = NULL, 4642 }, 4643 { 4644 .type = RTE_FLOW_ITEM_TYPE_END, 4645 }, 4646 }; 4647 struct rte_flow_action q_actions[] = { 4648 { 4649 .type = qrss->type, 4650 .conf = qrss->conf, 4651 }, 4652 { 4653 .type = RTE_FLOW_ACTION_TYPE_END, 4654 }, 4655 }; 4656 uint64_t layers = flow_get_prefix_layer_flags(dev_flow); 4657 4658 /* 4659 * Configure the tag item only if there is no meter subflow. 4660 * Since tag is already marked in the meter suffix subflow 4661 * we can just use the meter suffix items as is. 4662 */ 4663 if (qrss_id) { 4664 /* Not meter subflow. */ 4665 MLX5_ASSERT(!mtr_sfx); 4666 /* 4667 * Put unique id in prefix flow due to it is destroyed 4668 * after suffix flow and id will be freed after there 4669 * is no actual flows with this id and identifier 4670 * reallocation becomes possible (for example, for 4671 * other flows in other threads). 4672 */ 4673 dev_flow->handle->split_flow_id = qrss_id; 4674 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, 4675 error); 4676 if (ret < 0) 4677 goto exit; 4678 q_tag_spec.id = ret; 4679 } 4680 dev_flow = NULL; 4681 /* Add suffix subflow to execute Q/RSS. */ 4682 ret = flow_create_split_inner(dev, flow, &dev_flow, layers, 0, 4683 &q_attr, mtr_sfx ? items : 4684 q_items, q_actions, 4685 external, flow_idx, error); 4686 if (ret < 0) 4687 goto exit; 4688 /* qrss ID should be freed if failed. */ 4689 qrss_id = 0; 4690 MLX5_ASSERT(dev_flow); 4691 } 4692 4693 exit: 4694 /* 4695 * We do not destroy the partially created sub_flows in case of error. 4696 * These ones are included into parent flow list and will be destroyed 4697 * by flow_drv_destroy. 4698 */ 4699 flow_qrss_free_id(dev, qrss_id); 4700 mlx5_free(ext_actions); 4701 return ret; 4702 } 4703 4704 /** 4705 * The splitting for meter feature. 4706 * 4707 * - The meter flow will be split to two flows as prefix and 4708 * suffix flow. The packets make sense only it pass the prefix 4709 * meter action. 4710 * 4711 * - Reg_C_5 is used for the packet to match betweend prefix and 4712 * suffix flow. 4713 * 4714 * @param dev 4715 * Pointer to Ethernet device. 4716 * @param[in] flow 4717 * Parent flow structure pointer. 4718 * @param[in] prefix_layers 4719 * Prefix subflow layers, may be 0. 4720 * @param[in] prefix_mark 4721 * Prefix subflow mark flag, may be 0. 4722 * @param[in] attr 4723 * Flow rule attributes. 4724 * @param[in] items 4725 * Pattern specification (list terminated by the END pattern item). 4726 * @param[in] actions 4727 * Associated actions (list terminated by the END action). 4728 * @param[in] external 4729 * This flow rule is created by request external to PMD. 4730 * @param[in] flow_idx 4731 * This memory pool index to the flow. 4732 * @param[out] error 4733 * Perform verbose error reporting if not NULL. 4734 * @return 4735 * 0 on success, negative value otherwise 4736 */ 4737 static int 4738 flow_create_split_meter(struct rte_eth_dev *dev, 4739 struct rte_flow *flow, 4740 uint64_t prefix_layers, 4741 uint32_t prefix_mark, 4742 const struct rte_flow_attr *attr, 4743 const struct rte_flow_item items[], 4744 const struct rte_flow_action actions[], 4745 bool external, uint32_t flow_idx, 4746 struct rte_flow_error *error) 4747 { 4748 struct mlx5_priv *priv = dev->data->dev_private; 4749 struct rte_flow_action *sfx_actions = NULL; 4750 struct rte_flow_action *pre_actions = NULL; 4751 struct rte_flow_item *sfx_items = NULL; 4752 struct mlx5_flow *dev_flow = NULL; 4753 struct rte_flow_attr sfx_attr = *attr; 4754 uint32_t mtr = 0; 4755 uint32_t mtr_tag_id = 0; 4756 size_t act_size; 4757 size_t item_size; 4758 int actions_n = 0; 4759 int ret; 4760 4761 if (priv->mtr_en) 4762 actions_n = flow_check_meter_action(actions, &mtr); 4763 if (mtr) { 4764 /* The five prefix actions: meter, decap, encap, tag, end. */ 4765 act_size = sizeof(struct rte_flow_action) * (actions_n + 5) + 4766 sizeof(struct mlx5_rte_flow_action_set_tag); 4767 /* tag, vlan, port id, end. */ 4768 #define METER_SUFFIX_ITEM 4 4769 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM + 4770 sizeof(struct mlx5_rte_flow_item_tag) * 2; 4771 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size), 4772 0, SOCKET_ID_ANY); 4773 if (!sfx_actions) 4774 return rte_flow_error_set(error, ENOMEM, 4775 RTE_FLOW_ERROR_TYPE_ACTION, 4776 NULL, "no memory to split " 4777 "meter flow"); 4778 sfx_items = (struct rte_flow_item *)((char *)sfx_actions + 4779 act_size); 4780 pre_actions = sfx_actions + actions_n; 4781 mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items, 4782 actions, sfx_actions, 4783 pre_actions); 4784 if (!mtr_tag_id) { 4785 ret = -rte_errno; 4786 goto exit; 4787 } 4788 /* Add the prefix subflow. */ 4789 ret = flow_create_split_inner(dev, flow, &dev_flow, 4790 prefix_layers, 0, 4791 attr, items, 4792 pre_actions, external, 4793 flow_idx, error); 4794 if (ret) { 4795 ret = -rte_errno; 4796 goto exit; 4797 } 4798 dev_flow->handle->split_flow_id = mtr_tag_id; 4799 /* Setting the sfx group atrr. */ 4800 sfx_attr.group = sfx_attr.transfer ? 4801 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : 4802 MLX5_FLOW_TABLE_LEVEL_SUFFIX; 4803 } 4804 /* Add the prefix subflow. */ 4805 ret = flow_create_split_metadata(dev, flow, dev_flow ? 4806 flow_get_prefix_layer_flags(dev_flow) : 4807 prefix_layers, dev_flow ? 4808 dev_flow->handle->mark : prefix_mark, 4809 &sfx_attr, sfx_items ? 4810 sfx_items : items, 4811 sfx_actions ? sfx_actions : actions, 4812 external, flow_idx, error); 4813 exit: 4814 if (sfx_actions) 4815 mlx5_free(sfx_actions); 4816 return ret; 4817 } 4818 4819 /** 4820 * The splitting for sample feature. 4821 * 4822 * Once Sample action is detected in the action list, the flow actions should 4823 * be split into prefix sub flow and suffix sub flow. 4824 * 4825 * The original items remain in the prefix sub flow, all actions preceding the 4826 * sample action and the sample action itself will be copied to the prefix 4827 * sub flow, the actions following the sample action will be copied to the 4828 * suffix sub flow, Queue action always be located in the suffix sub flow. 4829 * 4830 * In order to make the packet from prefix sub flow matches with suffix sub 4831 * flow, an extra tag action be added into prefix sub flow, and the suffix sub 4832 * flow uses tag item with the unique flow id. 4833 * 4834 * @param dev 4835 * Pointer to Ethernet device. 4836 * @param[in] flow 4837 * Parent flow structure pointer. 4838 * @param[in] attr 4839 * Flow rule attributes. 4840 * @param[in] items 4841 * Pattern specification (list terminated by the END pattern item). 4842 * @param[in] actions 4843 * Associated actions (list terminated by the END action). 4844 * @param[in] external 4845 * This flow rule is created by request external to PMD. 4846 * @param[in] flow_idx 4847 * This memory pool index to the flow. 4848 * @param[out] error 4849 * Perform verbose error reporting if not NULL. 4850 * @return 4851 * 0 on success, negative value otherwise 4852 */ 4853 static int 4854 flow_create_split_sample(struct rte_eth_dev *dev, 4855 struct rte_flow *flow, 4856 const struct rte_flow_attr *attr, 4857 const struct rte_flow_item items[], 4858 const struct rte_flow_action actions[], 4859 bool external, uint32_t flow_idx, 4860 struct rte_flow_error *error) 4861 { 4862 struct mlx5_priv *priv = dev->data->dev_private; 4863 struct rte_flow_action *sfx_actions = NULL; 4864 struct rte_flow_action *pre_actions = NULL; 4865 struct rte_flow_item *sfx_items = NULL; 4866 struct mlx5_flow *dev_flow = NULL; 4867 struct rte_flow_attr sfx_attr = *attr; 4868 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 4869 struct mlx5_flow_dv_sample_resource *sample_res; 4870 struct mlx5_flow_tbl_data_entry *sfx_tbl_data; 4871 struct mlx5_flow_tbl_resource *sfx_tbl; 4872 union mlx5_flow_tbl_key sfx_table_key; 4873 #endif 4874 size_t act_size; 4875 size_t item_size; 4876 uint32_t fdb_tx = 0; 4877 int32_t tag_id = 0; 4878 int actions_n = 0; 4879 int sample_action_pos; 4880 int qrss_action_pos; 4881 int ret = 0; 4882 4883 if (priv->sampler_en) 4884 actions_n = flow_check_match_action(actions, attr, 4885 RTE_FLOW_ACTION_TYPE_SAMPLE, 4886 &sample_action_pos, &qrss_action_pos); 4887 if (actions_n) { 4888 /* The prefix actions must includes sample, tag, end. */ 4889 act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1) 4890 + sizeof(struct mlx5_rte_flow_action_set_tag); 4891 item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM + 4892 sizeof(struct mlx5_rte_flow_item_tag) * 2; 4893 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + 4894 item_size), 0, SOCKET_ID_ANY); 4895 if (!sfx_actions) 4896 return rte_flow_error_set(error, ENOMEM, 4897 RTE_FLOW_ERROR_TYPE_ACTION, 4898 NULL, "no memory to split " 4899 "sample flow"); 4900 /* The representor_id is -1 for uplink. */ 4901 fdb_tx = (attr->transfer && priv->representor_id != -1); 4902 if (!fdb_tx) 4903 sfx_items = (struct rte_flow_item *)((char *)sfx_actions 4904 + act_size); 4905 pre_actions = sfx_actions + actions_n; 4906 tag_id = flow_sample_split_prep(dev, fdb_tx, sfx_items, 4907 actions, sfx_actions, 4908 pre_actions, actions_n, 4909 sample_action_pos, 4910 qrss_action_pos, error); 4911 if (tag_id < 0 || (!fdb_tx && !tag_id)) { 4912 ret = -rte_errno; 4913 goto exit; 4914 } 4915 /* Add the prefix subflow. */ 4916 ret = flow_create_split_inner(dev, flow, &dev_flow, 0, 0, attr, 4917 items, pre_actions, external, 4918 flow_idx, error); 4919 if (ret) { 4920 ret = -rte_errno; 4921 goto exit; 4922 } 4923 dev_flow->handle->split_flow_id = tag_id; 4924 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 4925 /* Set the sfx group attr. */ 4926 sample_res = (struct mlx5_flow_dv_sample_resource *) 4927 dev_flow->dv.sample_res; 4928 sfx_tbl = (struct mlx5_flow_tbl_resource *) 4929 sample_res->normal_path_tbl; 4930 sfx_tbl_data = container_of(sfx_tbl, 4931 struct mlx5_flow_tbl_data_entry, tbl); 4932 sfx_table_key.v64 = sfx_tbl_data->entry.key; 4933 sfx_attr.group = sfx_attr.transfer ? 4934 (sfx_table_key.table_id - 1) : 4935 sfx_table_key.table_id; 4936 #endif 4937 } 4938 /* Add the suffix subflow. */ 4939 ret = flow_create_split_meter(dev, flow, dev_flow ? 4940 flow_get_prefix_layer_flags(dev_flow) : 0, 4941 dev_flow ? dev_flow->handle->mark : 0, 4942 &sfx_attr, sfx_items ? sfx_items : items, 4943 sfx_actions ? sfx_actions : actions, 4944 external, flow_idx, error); 4945 exit: 4946 if (sfx_actions) 4947 mlx5_free(sfx_actions); 4948 return ret; 4949 } 4950 4951 /** 4952 * Split the flow to subflow set. The splitters might be linked 4953 * in the chain, like this: 4954 * flow_create_split_outer() calls: 4955 * flow_create_split_meter() calls: 4956 * flow_create_split_metadata(meter_subflow_0) calls: 4957 * flow_create_split_inner(metadata_subflow_0) 4958 * flow_create_split_inner(metadata_subflow_1) 4959 * flow_create_split_inner(metadata_subflow_2) 4960 * flow_create_split_metadata(meter_subflow_1) calls: 4961 * flow_create_split_inner(metadata_subflow_0) 4962 * flow_create_split_inner(metadata_subflow_1) 4963 * flow_create_split_inner(metadata_subflow_2) 4964 * 4965 * This provide flexible way to add new levels of flow splitting. 4966 * The all of successfully created subflows are included to the 4967 * parent flow dev_flow list. 4968 * 4969 * @param dev 4970 * Pointer to Ethernet device. 4971 * @param[in] flow 4972 * Parent flow structure pointer. 4973 * @param[in] attr 4974 * Flow rule attributes. 4975 * @param[in] items 4976 * Pattern specification (list terminated by the END pattern item). 4977 * @param[in] actions 4978 * Associated actions (list terminated by the END action). 4979 * @param[in] external 4980 * This flow rule is created by request external to PMD. 4981 * @param[in] flow_idx 4982 * This memory pool index to the flow. 4983 * @param[out] error 4984 * Perform verbose error reporting if not NULL. 4985 * @return 4986 * 0 on success, negative value otherwise 4987 */ 4988 static int 4989 flow_create_split_outer(struct rte_eth_dev *dev, 4990 struct rte_flow *flow, 4991 const struct rte_flow_attr *attr, 4992 const struct rte_flow_item items[], 4993 const struct rte_flow_action actions[], 4994 bool external, uint32_t flow_idx, 4995 struct rte_flow_error *error) 4996 { 4997 int ret; 4998 4999 ret = flow_create_split_sample(dev, flow, attr, items, 5000 actions, external, flow_idx, error); 5001 MLX5_ASSERT(ret <= 0); 5002 return ret; 5003 } 5004 5005 /** 5006 * Create a flow and add it to @p list. 5007 * 5008 * @param dev 5009 * Pointer to Ethernet device. 5010 * @param list 5011 * Pointer to a TAILQ flow list. If this parameter NULL, 5012 * no list insertion occurred, flow is just created, 5013 * this is caller's responsibility to track the 5014 * created flow. 5015 * @param[in] attr 5016 * Flow rule attributes. 5017 * @param[in] items 5018 * Pattern specification (list terminated by the END pattern item). 5019 * @param[in] actions 5020 * Associated actions (list terminated by the END action). 5021 * @param[in] external 5022 * This flow rule is created by request external to PMD. 5023 * @param[out] error 5024 * Perform verbose error reporting if not NULL. 5025 * 5026 * @return 5027 * A flow index on success, 0 otherwise and rte_errno is set. 5028 */ 5029 static uint32_t 5030 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 5031 const struct rte_flow_attr *attr, 5032 const struct rte_flow_item items[], 5033 const struct rte_flow_action actions[], 5034 bool external, struct rte_flow_error *error) 5035 { 5036 struct mlx5_priv *priv = dev->data->dev_private; 5037 struct rte_flow *flow = NULL; 5038 struct mlx5_flow *dev_flow; 5039 const struct rte_flow_action_rss *rss; 5040 union { 5041 struct mlx5_flow_expand_rss buf; 5042 uint8_t buffer[2048]; 5043 } expand_buffer; 5044 union { 5045 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 5046 uint8_t buffer[2048]; 5047 } actions_rx; 5048 union { 5049 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 5050 uint8_t buffer[2048]; 5051 } actions_hairpin_tx; 5052 union { 5053 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS]; 5054 uint8_t buffer[2048]; 5055 } items_tx; 5056 struct mlx5_flow_expand_rss *buf = &expand_buffer.buf; 5057 struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *) 5058 priv->rss_desc)[!!priv->flow_idx]; 5059 const struct rte_flow_action *p_actions_rx = actions; 5060 uint32_t i; 5061 uint32_t idx = 0; 5062 int hairpin_flow; 5063 uint32_t hairpin_id = 0; 5064 struct rte_flow_attr attr_tx = { .priority = 0 }; 5065 struct rte_flow_attr attr_factor = {0}; 5066 int ret; 5067 5068 memcpy((void *)&attr_factor, (const void *)attr, sizeof(*attr)); 5069 if (external) 5070 attr_factor.group *= MLX5_FLOW_TABLE_FACTOR; 5071 hairpin_flow = flow_check_hairpin_split(dev, &attr_factor, actions); 5072 ret = flow_drv_validate(dev, &attr_factor, items, p_actions_rx, 5073 external, hairpin_flow, error); 5074 if (ret < 0) 5075 return 0; 5076 if (hairpin_flow > 0) { 5077 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) { 5078 rte_errno = EINVAL; 5079 return 0; 5080 } 5081 flow_hairpin_split(dev, actions, actions_rx.actions, 5082 actions_hairpin_tx.actions, items_tx.items, 5083 &hairpin_id); 5084 p_actions_rx = actions_rx.actions; 5085 } 5086 flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx); 5087 if (!flow) { 5088 rte_errno = ENOMEM; 5089 goto error_before_flow; 5090 } 5091 flow->drv_type = flow_get_drv_type(dev, &attr_factor); 5092 if (hairpin_id != 0) 5093 flow->hairpin_flow_id = hairpin_id; 5094 MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN && 5095 flow->drv_type < MLX5_FLOW_TYPE_MAX); 5096 memset(rss_desc, 0, sizeof(*rss_desc)); 5097 rss = flow_get_rss_action(p_actions_rx); 5098 if (rss) { 5099 /* 5100 * The following information is required by 5101 * mlx5_flow_hashfields_adjust() in advance. 5102 */ 5103 rss_desc->level = rss->level; 5104 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */ 5105 rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types; 5106 } 5107 flow->dev_handles = 0; 5108 if (rss && rss->types) { 5109 unsigned int graph_root; 5110 5111 graph_root = find_graph_root(items, rss->level); 5112 ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer), 5113 items, rss->types, 5114 mlx5_support_expansion, graph_root); 5115 MLX5_ASSERT(ret > 0 && 5116 (unsigned int)ret < sizeof(expand_buffer.buffer)); 5117 } else { 5118 buf->entries = 1; 5119 buf->entry[0].pattern = (void *)(uintptr_t)items; 5120 } 5121 /* 5122 * Record the start index when there is a nested call. All sub-flows 5123 * need to be translated before another calling. 5124 * No need to use ping-pong buffer to save memory here. 5125 */ 5126 if (priv->flow_idx) { 5127 MLX5_ASSERT(!priv->flow_nested_idx); 5128 priv->flow_nested_idx = priv->flow_idx; 5129 } 5130 for (i = 0; i < buf->entries; ++i) { 5131 /* 5132 * The splitter may create multiple dev_flows, 5133 * depending on configuration. In the simplest 5134 * case it just creates unmodified original flow. 5135 */ 5136 ret = flow_create_split_outer(dev, flow, &attr_factor, 5137 buf->entry[i].pattern, 5138 p_actions_rx, external, idx, 5139 error); 5140 if (ret < 0) 5141 goto error; 5142 } 5143 /* Create the tx flow. */ 5144 if (hairpin_flow) { 5145 attr_tx.group = MLX5_HAIRPIN_TX_TABLE; 5146 attr_tx.ingress = 0; 5147 attr_tx.egress = 1; 5148 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items, 5149 actions_hairpin_tx.actions, 5150 idx, error); 5151 if (!dev_flow) 5152 goto error; 5153 dev_flow->flow = flow; 5154 dev_flow->external = 0; 5155 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 5156 dev_flow->handle, next); 5157 ret = flow_drv_translate(dev, dev_flow, &attr_tx, 5158 items_tx.items, 5159 actions_hairpin_tx.actions, error); 5160 if (ret < 0) 5161 goto error; 5162 } 5163 /* 5164 * Update the metadata register copy table. If extensive 5165 * metadata feature is enabled and registers are supported 5166 * we might create the extra rte_flow for each unique 5167 * MARK/FLAG action ID. 5168 * 5169 * The table is updated for ingress Flows only, because 5170 * the egress Flows belong to the different device and 5171 * copy table should be updated in peer NIC Rx domain. 5172 */ 5173 if (attr_factor.ingress && 5174 (external || attr_factor.group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) { 5175 ret = flow_mreg_update_copy_table(dev, flow, actions, error); 5176 if (ret) 5177 goto error; 5178 } 5179 /* 5180 * If the flow is external (from application) OR device is started, then 5181 * the flow will be applied immediately. 5182 */ 5183 if (external || dev->data->dev_started) { 5184 ret = flow_drv_apply(dev, flow, error); 5185 if (ret < 0) 5186 goto error; 5187 } 5188 if (list) 5189 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx, 5190 flow, next); 5191 flow_rxq_flags_set(dev, flow); 5192 /* Nested flow creation index recovery. */ 5193 priv->flow_idx = priv->flow_nested_idx; 5194 if (priv->flow_nested_idx) 5195 priv->flow_nested_idx = 0; 5196 return idx; 5197 error: 5198 MLX5_ASSERT(flow); 5199 ret = rte_errno; /* Save rte_errno before cleanup. */ 5200 flow_mreg_del_copy_action(dev, flow); 5201 flow_drv_destroy(dev, flow); 5202 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx); 5203 rte_errno = ret; /* Restore rte_errno. */ 5204 error_before_flow: 5205 ret = rte_errno; 5206 if (hairpin_id) 5207 mlx5_flow_id_release(priv->sh->flow_id_pool, 5208 hairpin_id); 5209 rte_errno = ret; 5210 priv->flow_idx = priv->flow_nested_idx; 5211 if (priv->flow_nested_idx) 5212 priv->flow_nested_idx = 0; 5213 return 0; 5214 } 5215 5216 /** 5217 * Create a dedicated flow rule on e-switch table 0 (root table), to direct all 5218 * incoming packets to table 1. 5219 * 5220 * Other flow rules, requested for group n, will be created in 5221 * e-switch table n+1. 5222 * Jump action to e-switch group n will be created to group n+1. 5223 * 5224 * Used when working in switchdev mode, to utilise advantages of table 1 5225 * and above. 5226 * 5227 * @param dev 5228 * Pointer to Ethernet device. 5229 * 5230 * @return 5231 * Pointer to flow on success, NULL otherwise and rte_errno is set. 5232 */ 5233 struct rte_flow * 5234 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev) 5235 { 5236 const struct rte_flow_attr attr = { 5237 .group = 0, 5238 .priority = 0, 5239 .ingress = 1, 5240 .egress = 0, 5241 .transfer = 1, 5242 }; 5243 const struct rte_flow_item pattern = { 5244 .type = RTE_FLOW_ITEM_TYPE_END, 5245 }; 5246 struct rte_flow_action_jump jump = { 5247 .group = 1, 5248 }; 5249 const struct rte_flow_action actions[] = { 5250 { 5251 .type = RTE_FLOW_ACTION_TYPE_JUMP, 5252 .conf = &jump, 5253 }, 5254 { 5255 .type = RTE_FLOW_ACTION_TYPE_END, 5256 }, 5257 }; 5258 struct mlx5_priv *priv = dev->data->dev_private; 5259 struct rte_flow_error error; 5260 5261 return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows, 5262 &attr, &pattern, 5263 actions, false, &error); 5264 } 5265 5266 /** 5267 * Validate a flow supported by the NIC. 5268 * 5269 * @see rte_flow_validate() 5270 * @see rte_flow_ops 5271 */ 5272 int 5273 mlx5_flow_validate(struct rte_eth_dev *dev, 5274 const struct rte_flow_attr *attr, 5275 const struct rte_flow_item items[], 5276 const struct rte_flow_action actions[], 5277 struct rte_flow_error *error) 5278 { 5279 int hairpin_flow; 5280 5281 hairpin_flow = flow_check_hairpin_split(dev, attr, actions); 5282 return flow_drv_validate(dev, attr, items, actions, 5283 true, hairpin_flow, error); 5284 } 5285 5286 /** 5287 * Create a flow. 5288 * 5289 * @see rte_flow_create() 5290 * @see rte_flow_ops 5291 */ 5292 struct rte_flow * 5293 mlx5_flow_create(struct rte_eth_dev *dev, 5294 const struct rte_flow_attr *attr, 5295 const struct rte_flow_item items[], 5296 const struct rte_flow_action actions[], 5297 struct rte_flow_error *error) 5298 { 5299 struct mlx5_priv *priv = dev->data->dev_private; 5300 5301 /* 5302 * If the device is not started yet, it is not allowed to created a 5303 * flow from application. PMD default flows and traffic control flows 5304 * are not affected. 5305 */ 5306 if (unlikely(!dev->data->dev_started)) { 5307 DRV_LOG(DEBUG, "port %u is not started when " 5308 "inserting a flow", dev->data->port_id); 5309 rte_flow_error_set(error, ENODEV, 5310 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5311 NULL, 5312 "port not started"); 5313 return NULL; 5314 } 5315 return (void *)(uintptr_t)flow_list_create(dev, &priv->flows, 5316 attr, items, actions, true, error); 5317 } 5318 5319 /** 5320 * Destroy a flow in a list. 5321 * 5322 * @param dev 5323 * Pointer to Ethernet device. 5324 * @param list 5325 * Pointer to the Indexed flow list. If this parameter NULL, 5326 * there is no flow removal from the list. Be noted that as 5327 * flow is add to the indexed list, memory of the indexed 5328 * list points to maybe changed as flow destroyed. 5329 * @param[in] flow_idx 5330 * Index of flow to destroy. 5331 */ 5332 static void 5333 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 5334 uint32_t flow_idx) 5335 { 5336 struct mlx5_priv *priv = dev->data->dev_private; 5337 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5338 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 5339 [MLX5_IPOOL_RTE_FLOW], flow_idx); 5340 5341 if (!flow) 5342 return; 5343 /* 5344 * Update RX queue flags only if port is started, otherwise it is 5345 * already clean. 5346 */ 5347 if (dev->data->dev_started) 5348 flow_rxq_flags_trim(dev, flow); 5349 if (flow->hairpin_flow_id) 5350 mlx5_flow_id_release(priv->sh->flow_id_pool, 5351 flow->hairpin_flow_id); 5352 flow_drv_destroy(dev, flow); 5353 if (list) 5354 ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, 5355 flow_idx, flow, next); 5356 flow_mreg_del_copy_action(dev, flow); 5357 if (flow->fdir) { 5358 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 5359 if (priv_fdir_flow->rix_flow == flow_idx) 5360 break; 5361 } 5362 if (priv_fdir_flow) { 5363 LIST_REMOVE(priv_fdir_flow, next); 5364 mlx5_free(priv_fdir_flow->fdir); 5365 mlx5_free(priv_fdir_flow); 5366 } 5367 } 5368 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 5369 } 5370 5371 /** 5372 * Destroy all flows. 5373 * 5374 * @param dev 5375 * Pointer to Ethernet device. 5376 * @param list 5377 * Pointer to the Indexed flow list. 5378 * @param active 5379 * If flushing is called avtively. 5380 */ 5381 void 5382 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active) 5383 { 5384 uint32_t num_flushed = 0; 5385 5386 while (*list) { 5387 flow_list_destroy(dev, list, *list); 5388 num_flushed++; 5389 } 5390 if (active) { 5391 DRV_LOG(INFO, "port %u: %u flows flushed before stopping", 5392 dev->data->port_id, num_flushed); 5393 } 5394 } 5395 5396 /** 5397 * Remove all flows. 5398 * 5399 * @param dev 5400 * Pointer to Ethernet device. 5401 * @param list 5402 * Pointer to the Indexed flow list. 5403 */ 5404 void 5405 mlx5_flow_stop(struct rte_eth_dev *dev, uint32_t *list) 5406 { 5407 struct mlx5_priv *priv = dev->data->dev_private; 5408 struct rte_flow *flow = NULL; 5409 uint32_t idx; 5410 5411 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx, 5412 flow, next) { 5413 flow_drv_remove(dev, flow); 5414 flow_mreg_stop_copy_action(dev, flow); 5415 } 5416 flow_mreg_del_default_copy_action(dev); 5417 flow_rxq_flags_clear(dev); 5418 } 5419 5420 /** 5421 * Add all flows. 5422 * 5423 * @param dev 5424 * Pointer to Ethernet device. 5425 * @param list 5426 * Pointer to the Indexed flow list. 5427 * 5428 * @return 5429 * 0 on success, a negative errno value otherwise and rte_errno is set. 5430 */ 5431 int 5432 mlx5_flow_start(struct rte_eth_dev *dev, uint32_t *list) 5433 { 5434 struct mlx5_priv *priv = dev->data->dev_private; 5435 struct rte_flow *flow = NULL; 5436 struct rte_flow_error error; 5437 uint32_t idx; 5438 int ret = 0; 5439 5440 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 5441 ret = flow_mreg_add_default_copy_action(dev, &error); 5442 if (ret < 0) 5443 return -rte_errno; 5444 /* Apply Flows created by application. */ 5445 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx, 5446 flow, next) { 5447 ret = flow_mreg_start_copy_action(dev, flow); 5448 if (ret < 0) 5449 goto error; 5450 ret = flow_drv_apply(dev, flow, &error); 5451 if (ret < 0) 5452 goto error; 5453 flow_rxq_flags_set(dev, flow); 5454 } 5455 return 0; 5456 error: 5457 ret = rte_errno; /* Save rte_errno before cleanup. */ 5458 mlx5_flow_stop(dev, list); 5459 rte_errno = ret; /* Restore rte_errno. */ 5460 return -rte_errno; 5461 } 5462 5463 /** 5464 * Stop all default actions for flows. 5465 * 5466 * @param dev 5467 * Pointer to Ethernet device. 5468 */ 5469 void 5470 mlx5_flow_stop_default(struct rte_eth_dev *dev) 5471 { 5472 flow_mreg_del_default_copy_action(dev); 5473 flow_rxq_flags_clear(dev); 5474 } 5475 5476 /** 5477 * Start all default actions for flows. 5478 * 5479 * @param dev 5480 * Pointer to Ethernet device. 5481 * @return 5482 * 0 on success, a negative errno value otherwise and rte_errno is set. 5483 */ 5484 int 5485 mlx5_flow_start_default(struct rte_eth_dev *dev) 5486 { 5487 struct rte_flow_error error; 5488 5489 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 5490 return flow_mreg_add_default_copy_action(dev, &error); 5491 } 5492 5493 /** 5494 * Allocate intermediate resources for flow creation. 5495 * 5496 * @param dev 5497 * Pointer to Ethernet device. 5498 */ 5499 void 5500 mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev) 5501 { 5502 struct mlx5_priv *priv = dev->data->dev_private; 5503 5504 if (!priv->inter_flows) { 5505 priv->inter_flows = mlx5_malloc(MLX5_MEM_ZERO, 5506 MLX5_NUM_MAX_DEV_FLOWS * 5507 sizeof(struct mlx5_flow) + 5508 (sizeof(struct mlx5_flow_rss_desc) + 5509 sizeof(uint16_t) * UINT16_MAX) * 2, 0, 5510 SOCKET_ID_ANY); 5511 if (!priv->inter_flows) { 5512 DRV_LOG(ERR, "can't allocate intermediate memory."); 5513 return; 5514 } 5515 } 5516 priv->rss_desc = &((struct mlx5_flow *)priv->inter_flows) 5517 [MLX5_NUM_MAX_DEV_FLOWS]; 5518 /* Reset the index. */ 5519 priv->flow_idx = 0; 5520 priv->flow_nested_idx = 0; 5521 } 5522 5523 /** 5524 * Free intermediate resources for flows. 5525 * 5526 * @param dev 5527 * Pointer to Ethernet device. 5528 */ 5529 void 5530 mlx5_flow_free_intermediate(struct rte_eth_dev *dev) 5531 { 5532 struct mlx5_priv *priv = dev->data->dev_private; 5533 5534 mlx5_free(priv->inter_flows); 5535 priv->inter_flows = NULL; 5536 } 5537 5538 /** 5539 * Verify the flow list is empty 5540 * 5541 * @param dev 5542 * Pointer to Ethernet device. 5543 * 5544 * @return the number of flows not released. 5545 */ 5546 int 5547 mlx5_flow_verify(struct rte_eth_dev *dev) 5548 { 5549 struct mlx5_priv *priv = dev->data->dev_private; 5550 struct rte_flow *flow; 5551 uint32_t idx; 5552 int ret = 0; 5553 5554 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx, 5555 flow, next) { 5556 DRV_LOG(DEBUG, "port %u flow %p still referenced", 5557 dev->data->port_id, (void *)flow); 5558 ++ret; 5559 } 5560 return ret; 5561 } 5562 5563 /** 5564 * Enable default hairpin egress flow. 5565 * 5566 * @param dev 5567 * Pointer to Ethernet device. 5568 * @param queue 5569 * The queue index. 5570 * 5571 * @return 5572 * 0 on success, a negative errno value otherwise and rte_errno is set. 5573 */ 5574 int 5575 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev, 5576 uint32_t queue) 5577 { 5578 struct mlx5_priv *priv = dev->data->dev_private; 5579 const struct rte_flow_attr attr = { 5580 .egress = 1, 5581 .priority = 0, 5582 }; 5583 struct mlx5_rte_flow_item_tx_queue queue_spec = { 5584 .queue = queue, 5585 }; 5586 struct mlx5_rte_flow_item_tx_queue queue_mask = { 5587 .queue = UINT32_MAX, 5588 }; 5589 struct rte_flow_item items[] = { 5590 { 5591 .type = (enum rte_flow_item_type) 5592 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE, 5593 .spec = &queue_spec, 5594 .last = NULL, 5595 .mask = &queue_mask, 5596 }, 5597 { 5598 .type = RTE_FLOW_ITEM_TYPE_END, 5599 }, 5600 }; 5601 struct rte_flow_action_jump jump = { 5602 .group = MLX5_HAIRPIN_TX_TABLE, 5603 }; 5604 struct rte_flow_action actions[2]; 5605 uint32_t flow_idx; 5606 struct rte_flow_error error; 5607 5608 actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP; 5609 actions[0].conf = &jump; 5610 actions[1].type = RTE_FLOW_ACTION_TYPE_END; 5611 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5612 &attr, items, actions, false, &error); 5613 if (!flow_idx) { 5614 DRV_LOG(DEBUG, 5615 "Failed to create ctrl flow: rte_errno(%d)," 5616 " type(%d), message(%s)", 5617 rte_errno, error.type, 5618 error.message ? error.message : " (no stated reason)"); 5619 return -rte_errno; 5620 } 5621 return 0; 5622 } 5623 5624 /** 5625 * Enable a control flow configured from the control plane. 5626 * 5627 * @param dev 5628 * Pointer to Ethernet device. 5629 * @param eth_spec 5630 * An Ethernet flow spec to apply. 5631 * @param eth_mask 5632 * An Ethernet flow mask to apply. 5633 * @param vlan_spec 5634 * A VLAN flow spec to apply. 5635 * @param vlan_mask 5636 * A VLAN flow mask to apply. 5637 * 5638 * @return 5639 * 0 on success, a negative errno value otherwise and rte_errno is set. 5640 */ 5641 int 5642 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, 5643 struct rte_flow_item_eth *eth_spec, 5644 struct rte_flow_item_eth *eth_mask, 5645 struct rte_flow_item_vlan *vlan_spec, 5646 struct rte_flow_item_vlan *vlan_mask) 5647 { 5648 struct mlx5_priv *priv = dev->data->dev_private; 5649 const struct rte_flow_attr attr = { 5650 .ingress = 1, 5651 .priority = MLX5_FLOW_PRIO_RSVD, 5652 }; 5653 struct rte_flow_item items[] = { 5654 { 5655 .type = RTE_FLOW_ITEM_TYPE_ETH, 5656 .spec = eth_spec, 5657 .last = NULL, 5658 .mask = eth_mask, 5659 }, 5660 { 5661 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN : 5662 RTE_FLOW_ITEM_TYPE_END, 5663 .spec = vlan_spec, 5664 .last = NULL, 5665 .mask = vlan_mask, 5666 }, 5667 { 5668 .type = RTE_FLOW_ITEM_TYPE_END, 5669 }, 5670 }; 5671 uint16_t queue[priv->reta_idx_n]; 5672 struct rte_flow_action_rss action_rss = { 5673 .func = RTE_ETH_HASH_FUNCTION_DEFAULT, 5674 .level = 0, 5675 .types = priv->rss_conf.rss_hf, 5676 .key_len = priv->rss_conf.rss_key_len, 5677 .queue_num = priv->reta_idx_n, 5678 .key = priv->rss_conf.rss_key, 5679 .queue = queue, 5680 }; 5681 struct rte_flow_action actions[] = { 5682 { 5683 .type = RTE_FLOW_ACTION_TYPE_RSS, 5684 .conf = &action_rss, 5685 }, 5686 { 5687 .type = RTE_FLOW_ACTION_TYPE_END, 5688 }, 5689 }; 5690 uint32_t flow_idx; 5691 struct rte_flow_error error; 5692 unsigned int i; 5693 5694 if (!priv->reta_idx_n || !priv->rxqs_n) { 5695 return 0; 5696 } 5697 if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)) 5698 action_rss.types = 0; 5699 for (i = 0; i != priv->reta_idx_n; ++i) 5700 queue[i] = (*priv->reta_idx)[i]; 5701 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5702 &attr, items, actions, false, &error); 5703 if (!flow_idx) 5704 return -rte_errno; 5705 return 0; 5706 } 5707 5708 /** 5709 * Enable a flow control configured from the control plane. 5710 * 5711 * @param dev 5712 * Pointer to Ethernet device. 5713 * @param eth_spec 5714 * An Ethernet flow spec to apply. 5715 * @param eth_mask 5716 * An Ethernet flow mask to apply. 5717 * 5718 * @return 5719 * 0 on success, a negative errno value otherwise and rte_errno is set. 5720 */ 5721 int 5722 mlx5_ctrl_flow(struct rte_eth_dev *dev, 5723 struct rte_flow_item_eth *eth_spec, 5724 struct rte_flow_item_eth *eth_mask) 5725 { 5726 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL); 5727 } 5728 5729 /** 5730 * Create default miss flow rule matching lacp traffic 5731 * 5732 * @param dev 5733 * Pointer to Ethernet device. 5734 * @param eth_spec 5735 * An Ethernet flow spec to apply. 5736 * 5737 * @return 5738 * 0 on success, a negative errno value otherwise and rte_errno is set. 5739 */ 5740 int 5741 mlx5_flow_lacp_miss(struct rte_eth_dev *dev) 5742 { 5743 struct mlx5_priv *priv = dev->data->dev_private; 5744 /* 5745 * The LACP matching is done by only using ether type since using 5746 * a multicast dst mac causes kernel to give low priority to this flow. 5747 */ 5748 static const struct rte_flow_item_eth lacp_spec = { 5749 .type = RTE_BE16(0x8809), 5750 }; 5751 static const struct rte_flow_item_eth lacp_mask = { 5752 .type = 0xffff, 5753 }; 5754 const struct rte_flow_attr attr = { 5755 .ingress = 1, 5756 }; 5757 struct rte_flow_item items[] = { 5758 { 5759 .type = RTE_FLOW_ITEM_TYPE_ETH, 5760 .spec = &lacp_spec, 5761 .mask = &lacp_mask, 5762 }, 5763 { 5764 .type = RTE_FLOW_ITEM_TYPE_END, 5765 }, 5766 }; 5767 struct rte_flow_action actions[] = { 5768 { 5769 .type = (enum rte_flow_action_type) 5770 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS, 5771 }, 5772 { 5773 .type = RTE_FLOW_ACTION_TYPE_END, 5774 }, 5775 }; 5776 struct rte_flow_error error; 5777 uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5778 &attr, items, actions, false, &error); 5779 5780 if (!flow_idx) 5781 return -rte_errno; 5782 return 0; 5783 } 5784 5785 /** 5786 * Destroy a flow. 5787 * 5788 * @see rte_flow_destroy() 5789 * @see rte_flow_ops 5790 */ 5791 int 5792 mlx5_flow_destroy(struct rte_eth_dev *dev, 5793 struct rte_flow *flow, 5794 struct rte_flow_error *error __rte_unused) 5795 { 5796 struct mlx5_priv *priv = dev->data->dev_private; 5797 5798 flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow); 5799 return 0; 5800 } 5801 5802 /** 5803 * Destroy all flows. 5804 * 5805 * @see rte_flow_flush() 5806 * @see rte_flow_ops 5807 */ 5808 int 5809 mlx5_flow_flush(struct rte_eth_dev *dev, 5810 struct rte_flow_error *error __rte_unused) 5811 { 5812 struct mlx5_priv *priv = dev->data->dev_private; 5813 5814 mlx5_flow_list_flush(dev, &priv->flows, false); 5815 return 0; 5816 } 5817 5818 /** 5819 * Isolated mode. 5820 * 5821 * @see rte_flow_isolate() 5822 * @see rte_flow_ops 5823 */ 5824 int 5825 mlx5_flow_isolate(struct rte_eth_dev *dev, 5826 int enable, 5827 struct rte_flow_error *error) 5828 { 5829 struct mlx5_priv *priv = dev->data->dev_private; 5830 5831 if (dev->data->dev_started) { 5832 rte_flow_error_set(error, EBUSY, 5833 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5834 NULL, 5835 "port must be stopped first"); 5836 return -rte_errno; 5837 } 5838 priv->isolated = !!enable; 5839 if (enable) 5840 dev->dev_ops = &mlx5_os_dev_ops_isolate; 5841 else 5842 dev->dev_ops = &mlx5_os_dev_ops; 5843 5844 dev->rx_descriptor_status = mlx5_rx_descriptor_status; 5845 dev->tx_descriptor_status = mlx5_tx_descriptor_status; 5846 5847 return 0; 5848 } 5849 5850 /** 5851 * Query a flow. 5852 * 5853 * @see rte_flow_query() 5854 * @see rte_flow_ops 5855 */ 5856 static int 5857 flow_drv_query(struct rte_eth_dev *dev, 5858 uint32_t flow_idx, 5859 const struct rte_flow_action *actions, 5860 void *data, 5861 struct rte_flow_error *error) 5862 { 5863 struct mlx5_priv *priv = dev->data->dev_private; 5864 const struct mlx5_flow_driver_ops *fops; 5865 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 5866 [MLX5_IPOOL_RTE_FLOW], 5867 flow_idx); 5868 enum mlx5_flow_drv_type ftype; 5869 5870 if (!flow) { 5871 return rte_flow_error_set(error, ENOENT, 5872 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5873 NULL, 5874 "invalid flow handle"); 5875 } 5876 ftype = flow->drv_type; 5877 MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX); 5878 fops = flow_get_drv_ops(ftype); 5879 5880 return fops->query(dev, flow, actions, data, error); 5881 } 5882 5883 /** 5884 * Query a flow. 5885 * 5886 * @see rte_flow_query() 5887 * @see rte_flow_ops 5888 */ 5889 int 5890 mlx5_flow_query(struct rte_eth_dev *dev, 5891 struct rte_flow *flow, 5892 const struct rte_flow_action *actions, 5893 void *data, 5894 struct rte_flow_error *error) 5895 { 5896 int ret; 5897 5898 ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data, 5899 error); 5900 if (ret < 0) 5901 return ret; 5902 return 0; 5903 } 5904 5905 /** 5906 * Convert a flow director filter to a generic flow. 5907 * 5908 * @param dev 5909 * Pointer to Ethernet device. 5910 * @param fdir_filter 5911 * Flow director filter to add. 5912 * @param attributes 5913 * Generic flow parameters structure. 5914 * 5915 * @return 5916 * 0 on success, a negative errno value otherwise and rte_errno is set. 5917 */ 5918 static int 5919 flow_fdir_filter_convert(struct rte_eth_dev *dev, 5920 const struct rte_eth_fdir_filter *fdir_filter, 5921 struct mlx5_fdir *attributes) 5922 { 5923 struct mlx5_priv *priv = dev->data->dev_private; 5924 const struct rte_eth_fdir_input *input = &fdir_filter->input; 5925 const struct rte_eth_fdir_masks *mask = 5926 &dev->data->dev_conf.fdir_conf.mask; 5927 5928 /* Validate queue number. */ 5929 if (fdir_filter->action.rx_queue >= priv->rxqs_n) { 5930 DRV_LOG(ERR, "port %u invalid queue number %d", 5931 dev->data->port_id, fdir_filter->action.rx_queue); 5932 rte_errno = EINVAL; 5933 return -rte_errno; 5934 } 5935 attributes->attr.ingress = 1; 5936 attributes->items[0] = (struct rte_flow_item) { 5937 .type = RTE_FLOW_ITEM_TYPE_ETH, 5938 .spec = &attributes->l2, 5939 .mask = &attributes->l2_mask, 5940 }; 5941 switch (fdir_filter->action.behavior) { 5942 case RTE_ETH_FDIR_ACCEPT: 5943 attributes->actions[0] = (struct rte_flow_action){ 5944 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 5945 .conf = &attributes->queue, 5946 }; 5947 break; 5948 case RTE_ETH_FDIR_REJECT: 5949 attributes->actions[0] = (struct rte_flow_action){ 5950 .type = RTE_FLOW_ACTION_TYPE_DROP, 5951 }; 5952 break; 5953 default: 5954 DRV_LOG(ERR, "port %u invalid behavior %d", 5955 dev->data->port_id, 5956 fdir_filter->action.behavior); 5957 rte_errno = ENOTSUP; 5958 return -rte_errno; 5959 } 5960 attributes->queue.index = fdir_filter->action.rx_queue; 5961 /* Handle L3. */ 5962 switch (fdir_filter->input.flow_type) { 5963 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 5964 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 5965 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 5966 attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){ 5967 .src_addr = input->flow.ip4_flow.src_ip, 5968 .dst_addr = input->flow.ip4_flow.dst_ip, 5969 .time_to_live = input->flow.ip4_flow.ttl, 5970 .type_of_service = input->flow.ip4_flow.tos, 5971 }; 5972 attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){ 5973 .src_addr = mask->ipv4_mask.src_ip, 5974 .dst_addr = mask->ipv4_mask.dst_ip, 5975 .time_to_live = mask->ipv4_mask.ttl, 5976 .type_of_service = mask->ipv4_mask.tos, 5977 .next_proto_id = mask->ipv4_mask.proto, 5978 }; 5979 attributes->items[1] = (struct rte_flow_item){ 5980 .type = RTE_FLOW_ITEM_TYPE_IPV4, 5981 .spec = &attributes->l3, 5982 .mask = &attributes->l3_mask, 5983 }; 5984 break; 5985 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 5986 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 5987 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 5988 attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){ 5989 .hop_limits = input->flow.ipv6_flow.hop_limits, 5990 .proto = input->flow.ipv6_flow.proto, 5991 }; 5992 5993 memcpy(attributes->l3.ipv6.hdr.src_addr, 5994 input->flow.ipv6_flow.src_ip, 5995 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 5996 memcpy(attributes->l3.ipv6.hdr.dst_addr, 5997 input->flow.ipv6_flow.dst_ip, 5998 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 5999 memcpy(attributes->l3_mask.ipv6.hdr.src_addr, 6000 mask->ipv6_mask.src_ip, 6001 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 6002 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr, 6003 mask->ipv6_mask.dst_ip, 6004 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 6005 attributes->items[1] = (struct rte_flow_item){ 6006 .type = RTE_FLOW_ITEM_TYPE_IPV6, 6007 .spec = &attributes->l3, 6008 .mask = &attributes->l3_mask, 6009 }; 6010 break; 6011 default: 6012 DRV_LOG(ERR, "port %u invalid flow type%d", 6013 dev->data->port_id, fdir_filter->input.flow_type); 6014 rte_errno = ENOTSUP; 6015 return -rte_errno; 6016 } 6017 /* Handle L4. */ 6018 switch (fdir_filter->input.flow_type) { 6019 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 6020 attributes->l4.udp.hdr = (struct rte_udp_hdr){ 6021 .src_port = input->flow.udp4_flow.src_port, 6022 .dst_port = input->flow.udp4_flow.dst_port, 6023 }; 6024 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){ 6025 .src_port = mask->src_port_mask, 6026 .dst_port = mask->dst_port_mask, 6027 }; 6028 attributes->items[2] = (struct rte_flow_item){ 6029 .type = RTE_FLOW_ITEM_TYPE_UDP, 6030 .spec = &attributes->l4, 6031 .mask = &attributes->l4_mask, 6032 }; 6033 break; 6034 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 6035 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){ 6036 .src_port = input->flow.tcp4_flow.src_port, 6037 .dst_port = input->flow.tcp4_flow.dst_port, 6038 }; 6039 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){ 6040 .src_port = mask->src_port_mask, 6041 .dst_port = mask->dst_port_mask, 6042 }; 6043 attributes->items[2] = (struct rte_flow_item){ 6044 .type = RTE_FLOW_ITEM_TYPE_TCP, 6045 .spec = &attributes->l4, 6046 .mask = &attributes->l4_mask, 6047 }; 6048 break; 6049 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 6050 attributes->l4.udp.hdr = (struct rte_udp_hdr){ 6051 .src_port = input->flow.udp6_flow.src_port, 6052 .dst_port = input->flow.udp6_flow.dst_port, 6053 }; 6054 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){ 6055 .src_port = mask->src_port_mask, 6056 .dst_port = mask->dst_port_mask, 6057 }; 6058 attributes->items[2] = (struct rte_flow_item){ 6059 .type = RTE_FLOW_ITEM_TYPE_UDP, 6060 .spec = &attributes->l4, 6061 .mask = &attributes->l4_mask, 6062 }; 6063 break; 6064 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 6065 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){ 6066 .src_port = input->flow.tcp6_flow.src_port, 6067 .dst_port = input->flow.tcp6_flow.dst_port, 6068 }; 6069 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){ 6070 .src_port = mask->src_port_mask, 6071 .dst_port = mask->dst_port_mask, 6072 }; 6073 attributes->items[2] = (struct rte_flow_item){ 6074 .type = RTE_FLOW_ITEM_TYPE_TCP, 6075 .spec = &attributes->l4, 6076 .mask = &attributes->l4_mask, 6077 }; 6078 break; 6079 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 6080 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 6081 break; 6082 default: 6083 DRV_LOG(ERR, "port %u invalid flow type%d", 6084 dev->data->port_id, fdir_filter->input.flow_type); 6085 rte_errno = ENOTSUP; 6086 return -rte_errno; 6087 } 6088 return 0; 6089 } 6090 6091 #define FLOW_FDIR_CMP(f1, f2, fld) \ 6092 memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld)) 6093 6094 /** 6095 * Compare two FDIR flows. If items and actions are identical, the two flows are 6096 * regarded as same. 6097 * 6098 * @param dev 6099 * Pointer to Ethernet device. 6100 * @param f1 6101 * FDIR flow to compare. 6102 * @param f2 6103 * FDIR flow to compare. 6104 * 6105 * @return 6106 * Zero on match, 1 otherwise. 6107 */ 6108 static int 6109 flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2) 6110 { 6111 if (FLOW_FDIR_CMP(f1, f2, attr) || 6112 FLOW_FDIR_CMP(f1, f2, l2) || 6113 FLOW_FDIR_CMP(f1, f2, l2_mask) || 6114 FLOW_FDIR_CMP(f1, f2, l3) || 6115 FLOW_FDIR_CMP(f1, f2, l3_mask) || 6116 FLOW_FDIR_CMP(f1, f2, l4) || 6117 FLOW_FDIR_CMP(f1, f2, l4_mask) || 6118 FLOW_FDIR_CMP(f1, f2, actions[0].type)) 6119 return 1; 6120 if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE && 6121 FLOW_FDIR_CMP(f1, f2, queue)) 6122 return 1; 6123 return 0; 6124 } 6125 6126 /** 6127 * Search device flow list to find out a matched FDIR flow. 6128 * 6129 * @param dev 6130 * Pointer to Ethernet device. 6131 * @param fdir_flow 6132 * FDIR flow to lookup. 6133 * 6134 * @return 6135 * Index of flow if found, 0 otherwise. 6136 */ 6137 static uint32_t 6138 flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow) 6139 { 6140 struct mlx5_priv *priv = dev->data->dev_private; 6141 uint32_t flow_idx = 0; 6142 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6143 6144 MLX5_ASSERT(fdir_flow); 6145 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 6146 if (!flow_fdir_cmp(priv_fdir_flow->fdir, fdir_flow)) { 6147 DRV_LOG(DEBUG, "port %u found FDIR flow %u", 6148 dev->data->port_id, flow_idx); 6149 flow_idx = priv_fdir_flow->rix_flow; 6150 break; 6151 } 6152 } 6153 return flow_idx; 6154 } 6155 6156 /** 6157 * Add new flow director filter and store it in list. 6158 * 6159 * @param dev 6160 * Pointer to Ethernet device. 6161 * @param fdir_filter 6162 * Flow director filter to add. 6163 * 6164 * @return 6165 * 0 on success, a negative errno value otherwise and rte_errno is set. 6166 */ 6167 static int 6168 flow_fdir_filter_add(struct rte_eth_dev *dev, 6169 const struct rte_eth_fdir_filter *fdir_filter) 6170 { 6171 struct mlx5_priv *priv = dev->data->dev_private; 6172 struct mlx5_fdir *fdir_flow; 6173 struct rte_flow *flow; 6174 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6175 uint32_t flow_idx; 6176 int ret; 6177 6178 fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*fdir_flow), 0, 6179 SOCKET_ID_ANY); 6180 if (!fdir_flow) { 6181 rte_errno = ENOMEM; 6182 return -rte_errno; 6183 } 6184 ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow); 6185 if (ret) 6186 goto error; 6187 flow_idx = flow_fdir_filter_lookup(dev, fdir_flow); 6188 if (flow_idx) { 6189 rte_errno = EEXIST; 6190 goto error; 6191 } 6192 priv_fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, 6193 sizeof(struct mlx5_fdir_flow), 6194 0, SOCKET_ID_ANY); 6195 if (!priv_fdir_flow) { 6196 rte_errno = ENOMEM; 6197 goto error; 6198 } 6199 flow_idx = flow_list_create(dev, &priv->flows, &fdir_flow->attr, 6200 fdir_flow->items, fdir_flow->actions, true, 6201 NULL); 6202 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 6203 if (!flow) 6204 goto error; 6205 flow->fdir = 1; 6206 priv_fdir_flow->fdir = fdir_flow; 6207 priv_fdir_flow->rix_flow = flow_idx; 6208 LIST_INSERT_HEAD(&priv->fdir_flows, priv_fdir_flow, next); 6209 DRV_LOG(DEBUG, "port %u created FDIR flow %p", 6210 dev->data->port_id, (void *)flow); 6211 return 0; 6212 error: 6213 mlx5_free(priv_fdir_flow); 6214 mlx5_free(fdir_flow); 6215 return -rte_errno; 6216 } 6217 6218 /** 6219 * Delete specific filter. 6220 * 6221 * @param dev 6222 * Pointer to Ethernet device. 6223 * @param fdir_filter 6224 * Filter to be deleted. 6225 * 6226 * @return 6227 * 0 on success, a negative errno value otherwise and rte_errno is set. 6228 */ 6229 static int 6230 flow_fdir_filter_delete(struct rte_eth_dev *dev, 6231 const struct rte_eth_fdir_filter *fdir_filter) 6232 { 6233 struct mlx5_priv *priv = dev->data->dev_private; 6234 uint32_t flow_idx; 6235 struct mlx5_fdir fdir_flow = { 6236 .attr.group = 0, 6237 }; 6238 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6239 int ret; 6240 6241 ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow); 6242 if (ret) 6243 return -rte_errno; 6244 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 6245 /* Find the fdir in priv list */ 6246 if (!flow_fdir_cmp(priv_fdir_flow->fdir, &fdir_flow)) 6247 break; 6248 } 6249 if (!priv_fdir_flow) 6250 return 0; 6251 LIST_REMOVE(priv_fdir_flow, next); 6252 flow_idx = priv_fdir_flow->rix_flow; 6253 flow_list_destroy(dev, &priv->flows, flow_idx); 6254 mlx5_free(priv_fdir_flow->fdir); 6255 mlx5_free(priv_fdir_flow); 6256 DRV_LOG(DEBUG, "port %u deleted FDIR flow %u", 6257 dev->data->port_id, flow_idx); 6258 return 0; 6259 } 6260 6261 /** 6262 * Update queue for specific filter. 6263 * 6264 * @param dev 6265 * Pointer to Ethernet device. 6266 * @param fdir_filter 6267 * Filter to be updated. 6268 * 6269 * @return 6270 * 0 on success, a negative errno value otherwise and rte_errno is set. 6271 */ 6272 static int 6273 flow_fdir_filter_update(struct rte_eth_dev *dev, 6274 const struct rte_eth_fdir_filter *fdir_filter) 6275 { 6276 int ret; 6277 6278 ret = flow_fdir_filter_delete(dev, fdir_filter); 6279 if (ret) 6280 return ret; 6281 return flow_fdir_filter_add(dev, fdir_filter); 6282 } 6283 6284 /** 6285 * Flush all filters. 6286 * 6287 * @param dev 6288 * Pointer to Ethernet device. 6289 */ 6290 static void 6291 flow_fdir_filter_flush(struct rte_eth_dev *dev) 6292 { 6293 struct mlx5_priv *priv = dev->data->dev_private; 6294 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6295 6296 while (!LIST_EMPTY(&priv->fdir_flows)) { 6297 priv_fdir_flow = LIST_FIRST(&priv->fdir_flows); 6298 LIST_REMOVE(priv_fdir_flow, next); 6299 flow_list_destroy(dev, &priv->flows, priv_fdir_flow->rix_flow); 6300 mlx5_free(priv_fdir_flow->fdir); 6301 mlx5_free(priv_fdir_flow); 6302 } 6303 } 6304 6305 /** 6306 * Get flow director information. 6307 * 6308 * @param dev 6309 * Pointer to Ethernet device. 6310 * @param[out] fdir_info 6311 * Resulting flow director information. 6312 */ 6313 static void 6314 flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) 6315 { 6316 struct rte_eth_fdir_masks *mask = 6317 &dev->data->dev_conf.fdir_conf.mask; 6318 6319 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode; 6320 fdir_info->guarant_spc = 0; 6321 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask)); 6322 fdir_info->max_flexpayload = 0; 6323 fdir_info->flow_types_mask[0] = 0; 6324 fdir_info->flex_payload_unit = 0; 6325 fdir_info->max_flex_payload_segment_num = 0; 6326 fdir_info->flex_payload_limit = 0; 6327 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf)); 6328 } 6329 6330 /** 6331 * Deal with flow director operations. 6332 * 6333 * @param dev 6334 * Pointer to Ethernet device. 6335 * @param filter_op 6336 * Operation to perform. 6337 * @param arg 6338 * Pointer to operation-specific structure. 6339 * 6340 * @return 6341 * 0 on success, a negative errno value otherwise and rte_errno is set. 6342 */ 6343 static int 6344 flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, 6345 void *arg) 6346 { 6347 enum rte_fdir_mode fdir_mode = 6348 dev->data->dev_conf.fdir_conf.mode; 6349 6350 if (filter_op == RTE_ETH_FILTER_NOP) 6351 return 0; 6352 if (fdir_mode != RTE_FDIR_MODE_PERFECT && 6353 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) { 6354 DRV_LOG(ERR, "port %u flow director mode %d not supported", 6355 dev->data->port_id, fdir_mode); 6356 rte_errno = EINVAL; 6357 return -rte_errno; 6358 } 6359 switch (filter_op) { 6360 case RTE_ETH_FILTER_ADD: 6361 return flow_fdir_filter_add(dev, arg); 6362 case RTE_ETH_FILTER_UPDATE: 6363 return flow_fdir_filter_update(dev, arg); 6364 case RTE_ETH_FILTER_DELETE: 6365 return flow_fdir_filter_delete(dev, arg); 6366 case RTE_ETH_FILTER_FLUSH: 6367 flow_fdir_filter_flush(dev); 6368 break; 6369 case RTE_ETH_FILTER_INFO: 6370 flow_fdir_info_get(dev, arg); 6371 break; 6372 default: 6373 DRV_LOG(DEBUG, "port %u unknown operation %u", 6374 dev->data->port_id, filter_op); 6375 rte_errno = EINVAL; 6376 return -rte_errno; 6377 } 6378 return 0; 6379 } 6380 6381 /** 6382 * Manage filter operations. 6383 * 6384 * @param dev 6385 * Pointer to Ethernet device structure. 6386 * @param filter_type 6387 * Filter type. 6388 * @param filter_op 6389 * Operation to perform. 6390 * @param arg 6391 * Pointer to operation-specific structure. 6392 * 6393 * @return 6394 * 0 on success, a negative errno value otherwise and rte_errno is set. 6395 */ 6396 int 6397 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, 6398 enum rte_filter_type filter_type, 6399 enum rte_filter_op filter_op, 6400 void *arg) 6401 { 6402 switch (filter_type) { 6403 case RTE_ETH_FILTER_GENERIC: 6404 if (filter_op != RTE_ETH_FILTER_GET) { 6405 rte_errno = EINVAL; 6406 return -rte_errno; 6407 } 6408 *(const void **)arg = &mlx5_flow_ops; 6409 return 0; 6410 case RTE_ETH_FILTER_FDIR: 6411 return flow_fdir_ctrl_func(dev, filter_op, arg); 6412 default: 6413 DRV_LOG(ERR, "port %u filter type (%d) not supported", 6414 dev->data->port_id, filter_type); 6415 rte_errno = ENOTSUP; 6416 return -rte_errno; 6417 } 6418 return 0; 6419 } 6420 6421 /** 6422 * Create the needed meter and suffix tables. 6423 * 6424 * @param[in] dev 6425 * Pointer to Ethernet device. 6426 * @param[in] fm 6427 * Pointer to the flow meter. 6428 * 6429 * @return 6430 * Pointer to table set on success, NULL otherwise. 6431 */ 6432 struct mlx5_meter_domains_infos * 6433 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev, 6434 const struct mlx5_flow_meter *fm) 6435 { 6436 const struct mlx5_flow_driver_ops *fops; 6437 6438 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6439 return fops->create_mtr_tbls(dev, fm); 6440 } 6441 6442 /** 6443 * Destroy the meter table set. 6444 * 6445 * @param[in] dev 6446 * Pointer to Ethernet device. 6447 * @param[in] tbl 6448 * Pointer to the meter table set. 6449 * 6450 * @return 6451 * 0 on success. 6452 */ 6453 int 6454 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev, 6455 struct mlx5_meter_domains_infos *tbls) 6456 { 6457 const struct mlx5_flow_driver_ops *fops; 6458 6459 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6460 return fops->destroy_mtr_tbls(dev, tbls); 6461 } 6462 6463 /** 6464 * Create policer rules. 6465 * 6466 * @param[in] dev 6467 * Pointer to Ethernet device. 6468 * @param[in] fm 6469 * Pointer to flow meter structure. 6470 * @param[in] attr 6471 * Pointer to flow attributes. 6472 * 6473 * @return 6474 * 0 on success, -1 otherwise. 6475 */ 6476 int 6477 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev, 6478 struct mlx5_flow_meter *fm, 6479 const struct rte_flow_attr *attr) 6480 { 6481 const struct mlx5_flow_driver_ops *fops; 6482 6483 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6484 return fops->create_policer_rules(dev, fm, attr); 6485 } 6486 6487 /** 6488 * Destroy policer rules. 6489 * 6490 * @param[in] fm 6491 * Pointer to flow meter structure. 6492 * @param[in] attr 6493 * Pointer to flow attributes. 6494 * 6495 * @return 6496 * 0 on success, -1 otherwise. 6497 */ 6498 int 6499 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev, 6500 struct mlx5_flow_meter *fm, 6501 const struct rte_flow_attr *attr) 6502 { 6503 const struct mlx5_flow_driver_ops *fops; 6504 6505 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6506 return fops->destroy_policer_rules(dev, fm, attr); 6507 } 6508 6509 /** 6510 * Allocate a counter. 6511 * 6512 * @param[in] dev 6513 * Pointer to Ethernet device structure. 6514 * 6515 * @return 6516 * Index to allocated counter on success, 0 otherwise. 6517 */ 6518 uint32_t 6519 mlx5_counter_alloc(struct rte_eth_dev *dev) 6520 { 6521 const struct mlx5_flow_driver_ops *fops; 6522 struct rte_flow_attr attr = { .transfer = 0 }; 6523 6524 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6525 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6526 return fops->counter_alloc(dev); 6527 } 6528 DRV_LOG(ERR, 6529 "port %u counter allocate is not supported.", 6530 dev->data->port_id); 6531 return 0; 6532 } 6533 6534 /** 6535 * Free a counter. 6536 * 6537 * @param[in] dev 6538 * Pointer to Ethernet device structure. 6539 * @param[in] cnt 6540 * Index to counter to be free. 6541 */ 6542 void 6543 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt) 6544 { 6545 const struct mlx5_flow_driver_ops *fops; 6546 struct rte_flow_attr attr = { .transfer = 0 }; 6547 6548 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6549 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6550 fops->counter_free(dev, cnt); 6551 return; 6552 } 6553 DRV_LOG(ERR, 6554 "port %u counter free is not supported.", 6555 dev->data->port_id); 6556 } 6557 6558 /** 6559 * Query counter statistics. 6560 * 6561 * @param[in] dev 6562 * Pointer to Ethernet device structure. 6563 * @param[in] cnt 6564 * Index to counter to query. 6565 * @param[in] clear 6566 * Set to clear counter statistics. 6567 * @param[out] pkts 6568 * The counter hits packets number to save. 6569 * @param[out] bytes 6570 * The counter hits bytes number to save. 6571 * 6572 * @return 6573 * 0 on success, a negative errno value otherwise. 6574 */ 6575 int 6576 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt, 6577 bool clear, uint64_t *pkts, uint64_t *bytes) 6578 { 6579 const struct mlx5_flow_driver_ops *fops; 6580 struct rte_flow_attr attr = { .transfer = 0 }; 6581 6582 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6583 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6584 return fops->counter_query(dev, cnt, clear, pkts, bytes); 6585 } 6586 DRV_LOG(ERR, 6587 "port %u counter query is not supported.", 6588 dev->data->port_id); 6589 return -ENOTSUP; 6590 } 6591 6592 #define MLX5_POOL_QUERY_FREQ_US 1000000 6593 6594 /** 6595 * Get number of all validate pools. 6596 * 6597 * @param[in] sh 6598 * Pointer to mlx5_dev_ctx_shared object. 6599 * 6600 * @return 6601 * The number of all validate pools. 6602 */ 6603 static uint32_t 6604 mlx5_get_all_valid_pool_count(struct mlx5_dev_ctx_shared *sh) 6605 { 6606 int i; 6607 uint32_t pools_n = 0; 6608 6609 for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) 6610 pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid); 6611 return pools_n; 6612 } 6613 6614 /** 6615 * Set the periodic procedure for triggering asynchronous batch queries for all 6616 * the counter pools. 6617 * 6618 * @param[in] sh 6619 * Pointer to mlx5_dev_ctx_shared object. 6620 */ 6621 void 6622 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh) 6623 { 6624 uint32_t pools_n, us; 6625 6626 pools_n = mlx5_get_all_valid_pool_count(sh); 6627 us = MLX5_POOL_QUERY_FREQ_US / pools_n; 6628 DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us); 6629 if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) { 6630 sh->cmng.query_thread_on = 0; 6631 DRV_LOG(ERR, "Cannot reinitialize query alarm"); 6632 } else { 6633 sh->cmng.query_thread_on = 1; 6634 } 6635 } 6636 6637 /** 6638 * The periodic procedure for triggering asynchronous batch queries for all the 6639 * counter pools. This function is probably called by the host thread. 6640 * 6641 * @param[in] arg 6642 * The parameter for the alarm process. 6643 */ 6644 void 6645 mlx5_flow_query_alarm(void *arg) 6646 { 6647 struct mlx5_dev_ctx_shared *sh = arg; 6648 struct mlx5_devx_obj *dcs; 6649 uint16_t offset; 6650 int ret; 6651 uint8_t batch = sh->cmng.batch; 6652 uint8_t age = sh->cmng.age; 6653 uint16_t pool_index = sh->cmng.pool_index; 6654 struct mlx5_pools_container *cont; 6655 struct mlx5_flow_counter_pool *pool; 6656 int cont_loop = MLX5_CCONT_TYPE_MAX; 6657 6658 if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES) 6659 goto set_alarm; 6660 next_container: 6661 cont = MLX5_CNT_CONTAINER(sh, batch, age); 6662 rte_spinlock_lock(&cont->resize_sl); 6663 if (!cont->pools) { 6664 rte_spinlock_unlock(&cont->resize_sl); 6665 /* Check if all the containers are empty. */ 6666 if (unlikely(--cont_loop == 0)) 6667 goto set_alarm; 6668 batch ^= 0x1; 6669 pool_index = 0; 6670 if (batch == 0 && pool_index == 0) { 6671 age ^= 0x1; 6672 sh->cmng.batch = batch; 6673 sh->cmng.age = age; 6674 } 6675 goto next_container; 6676 } 6677 pool = cont->pools[pool_index]; 6678 rte_spinlock_unlock(&cont->resize_sl); 6679 if (pool->raw_hw) 6680 /* There is a pool query in progress. */ 6681 goto set_alarm; 6682 pool->raw_hw = 6683 LIST_FIRST(&sh->cmng.free_stat_raws); 6684 if (!pool->raw_hw) 6685 /* No free counter statistics raw memory. */ 6686 goto set_alarm; 6687 dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read 6688 (&pool->a64_dcs); 6689 if (dcs->id & (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1)) { 6690 /* Pool without valid counter. */ 6691 pool->raw_hw = NULL; 6692 goto next_pool; 6693 } 6694 offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL; 6695 /* 6696 * Identify the counters released between query trigger and query 6697 * handle more efficiently. The counter released in this gap period 6698 * should wait for a new round of query as the new arrived packets 6699 * will not be taken into account. 6700 */ 6701 pool->query_gen++; 6702 ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL - 6703 offset, NULL, NULL, 6704 pool->raw_hw->mem_mng->dm->id, 6705 (void *)(uintptr_t) 6706 (pool->raw_hw->data + offset), 6707 sh->devx_comp, 6708 (uint64_t)(uintptr_t)pool); 6709 if (ret) { 6710 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID" 6711 " %d", pool->min_dcs->id); 6712 pool->raw_hw = NULL; 6713 goto set_alarm; 6714 } 6715 pool->raw_hw->min_dcs_id = dcs->id; 6716 LIST_REMOVE(pool->raw_hw, next); 6717 sh->cmng.pending_queries++; 6718 next_pool: 6719 pool_index++; 6720 if (pool_index >= rte_atomic16_read(&cont->n_valid)) { 6721 batch ^= 0x1; 6722 pool_index = 0; 6723 if (batch == 0 && pool_index == 0) 6724 age ^= 0x1; 6725 } 6726 set_alarm: 6727 sh->cmng.batch = batch; 6728 sh->cmng.pool_index = pool_index; 6729 sh->cmng.age = age; 6730 mlx5_set_query_alarm(sh); 6731 } 6732 6733 /** 6734 * Check and callback event for new aged flow in the counter pool 6735 * 6736 * @param[in] sh 6737 * Pointer to mlx5_dev_ctx_shared object. 6738 * @param[in] pool 6739 * Pointer to Current counter pool. 6740 */ 6741 static void 6742 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh, 6743 struct mlx5_flow_counter_pool *pool) 6744 { 6745 struct mlx5_priv *priv; 6746 struct mlx5_flow_counter *cnt; 6747 struct mlx5_age_info *age_info; 6748 struct mlx5_age_param *age_param; 6749 struct mlx5_counter_stats_raw *cur = pool->raw_hw; 6750 struct mlx5_counter_stats_raw *prev = pool->raw; 6751 const uint64_t curr_time = MLX5_CURR_TIME_SEC; 6752 const uint32_t time_delta = curr_time - pool->time_of_last_age_check; 6753 uint16_t expected = AGE_CANDIDATE; 6754 uint32_t i; 6755 6756 pool->time_of_last_age_check = curr_time; 6757 for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) { 6758 cnt = MLX5_POOL_GET_CNT(pool, i); 6759 age_param = MLX5_CNT_TO_AGE(cnt); 6760 if (__atomic_load_n(&age_param->state, 6761 __ATOMIC_RELAXED) != AGE_CANDIDATE) 6762 continue; 6763 if (cur->data[i].hits != prev->data[i].hits) { 6764 __atomic_store_n(&age_param->sec_since_last_hit, 0, 6765 __ATOMIC_RELAXED); 6766 continue; 6767 } 6768 if (__atomic_add_fetch(&age_param->sec_since_last_hit, 6769 time_delta, 6770 __ATOMIC_RELAXED) <= age_param->timeout) 6771 continue; 6772 /** 6773 * Hold the lock first, or if between the 6774 * state AGE_TMOUT and tailq operation the 6775 * release happened, the release procedure 6776 * may delete a non-existent tailq node. 6777 */ 6778 priv = rte_eth_devices[age_param->port_id].data->dev_private; 6779 age_info = GET_PORT_AGE_INFO(priv); 6780 rte_spinlock_lock(&age_info->aged_sl); 6781 if (__atomic_compare_exchange_n(&age_param->state, &expected, 6782 AGE_TMOUT, false, 6783 __ATOMIC_RELAXED, 6784 __ATOMIC_RELAXED)) { 6785 TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next); 6786 MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW); 6787 } 6788 rte_spinlock_unlock(&age_info->aged_sl); 6789 } 6790 for (i = 0; i < sh->max_port; i++) { 6791 age_info = &sh->port[i].age_info; 6792 if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW)) 6793 continue; 6794 if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) 6795 rte_eth_dev_callback_process 6796 (&rte_eth_devices[sh->port[i].devx_ih_port_id], 6797 RTE_ETH_EVENT_FLOW_AGED, NULL); 6798 age_info->flags = 0; 6799 } 6800 } 6801 6802 /** 6803 * Handler for the HW respond about ready values from an asynchronous batch 6804 * query. This function is probably called by the host thread. 6805 * 6806 * @param[in] sh 6807 * The pointer to the shared device context. 6808 * @param[in] async_id 6809 * The Devx async ID. 6810 * @param[in] status 6811 * The status of the completion. 6812 */ 6813 void 6814 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh, 6815 uint64_t async_id, int status) 6816 { 6817 struct mlx5_flow_counter_pool *pool = 6818 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id; 6819 struct mlx5_counter_stats_raw *raw_to_free; 6820 uint8_t age = !!IS_AGE_POOL(pool); 6821 uint8_t query_gen = pool->query_gen ^ 1; 6822 struct mlx5_pools_container *cont = 6823 MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool), age); 6824 6825 if (unlikely(status)) { 6826 raw_to_free = pool->raw_hw; 6827 } else { 6828 raw_to_free = pool->raw; 6829 if (IS_AGE_POOL(pool)) 6830 mlx5_flow_aging_check(sh, pool); 6831 rte_spinlock_lock(&pool->sl); 6832 pool->raw = pool->raw_hw; 6833 rte_spinlock_unlock(&pool->sl); 6834 /* Be sure the new raw counters data is updated in memory. */ 6835 rte_io_wmb(); 6836 if (!TAILQ_EMPTY(&pool->counters[query_gen])) { 6837 rte_spinlock_lock(&cont->csl); 6838 TAILQ_CONCAT(&cont->counters, 6839 &pool->counters[query_gen], next); 6840 rte_spinlock_unlock(&cont->csl); 6841 } 6842 } 6843 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next); 6844 pool->raw_hw = NULL; 6845 sh->cmng.pending_queries--; 6846 } 6847 6848 /** 6849 * Translate the rte_flow group index to HW table value. 6850 * 6851 * @param[in] attributes 6852 * Pointer to flow attributes 6853 * @param[in] external 6854 * Value is part of flow rule created by request external to PMD. 6855 * @param[in] group 6856 * rte_flow group index value. 6857 * @param[out] fdb_def_rule 6858 * Whether fdb jump to table 1 is configured. 6859 * @param[out] table 6860 * HW table value. 6861 * @param[out] error 6862 * Pointer to error structure. 6863 * 6864 * @return 6865 * 0 on success, a negative errno value otherwise and rte_errno is set. 6866 */ 6867 int 6868 mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external, 6869 uint32_t group, bool fdb_def_rule, uint32_t *table, 6870 struct rte_flow_error *error) 6871 { 6872 if (attributes->transfer && external && fdb_def_rule) { 6873 if (group == UINT32_MAX) 6874 return rte_flow_error_set 6875 (error, EINVAL, 6876 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 6877 NULL, 6878 "group index not supported"); 6879 *table = group + 1; 6880 } else { 6881 *table = group; 6882 } 6883 return 0; 6884 } 6885 6886 /** 6887 * Discover availability of metadata reg_c's. 6888 * 6889 * Iteratively use test flows to check availability. 6890 * 6891 * @param[in] dev 6892 * Pointer to the Ethernet device structure. 6893 * 6894 * @return 6895 * 0 on success, a negative errno value otherwise and rte_errno is set. 6896 */ 6897 int 6898 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev) 6899 { 6900 struct mlx5_priv *priv = dev->data->dev_private; 6901 struct mlx5_dev_config *config = &priv->config; 6902 enum modify_reg idx; 6903 int n = 0; 6904 6905 /* reg_c[0] and reg_c[1] are reserved. */ 6906 config->flow_mreg_c[n++] = REG_C_0; 6907 config->flow_mreg_c[n++] = REG_C_1; 6908 /* Discover availability of other reg_c's. */ 6909 for (idx = REG_C_2; idx <= REG_C_7; ++idx) { 6910 struct rte_flow_attr attr = { 6911 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 6912 .priority = MLX5_FLOW_PRIO_RSVD, 6913 .ingress = 1, 6914 }; 6915 struct rte_flow_item items[] = { 6916 [0] = { 6917 .type = RTE_FLOW_ITEM_TYPE_END, 6918 }, 6919 }; 6920 struct rte_flow_action actions[] = { 6921 [0] = { 6922 .type = (enum rte_flow_action_type) 6923 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 6924 .conf = &(struct mlx5_flow_action_copy_mreg){ 6925 .src = REG_C_1, 6926 .dst = idx, 6927 }, 6928 }, 6929 [1] = { 6930 .type = RTE_FLOW_ACTION_TYPE_JUMP, 6931 .conf = &(struct rte_flow_action_jump){ 6932 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 6933 }, 6934 }, 6935 [2] = { 6936 .type = RTE_FLOW_ACTION_TYPE_END, 6937 }, 6938 }; 6939 uint32_t flow_idx; 6940 struct rte_flow *flow; 6941 struct rte_flow_error error; 6942 6943 if (!config->dv_flow_en) 6944 break; 6945 /* Create internal flow, validation skips copy action. */ 6946 flow_idx = flow_list_create(dev, NULL, &attr, items, 6947 actions, false, &error); 6948 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 6949 flow_idx); 6950 if (!flow) 6951 continue; 6952 if (dev->data->dev_started || !flow_drv_apply(dev, flow, NULL)) 6953 config->flow_mreg_c[n++] = idx; 6954 flow_list_destroy(dev, NULL, flow_idx); 6955 } 6956 for (; n < MLX5_MREG_C_NUM; ++n) 6957 config->flow_mreg_c[n] = REG_NON; 6958 return 0; 6959 } 6960 6961 /** 6962 * Dump flow raw hw data to file 6963 * 6964 * @param[in] dev 6965 * The pointer to Ethernet device. 6966 * @param[in] file 6967 * A pointer to a file for output. 6968 * @param[out] error 6969 * Perform verbose error reporting if not NULL. PMDs initialize this 6970 * structure in case of error only. 6971 * @return 6972 * 0 on success, a nagative value otherwise. 6973 */ 6974 int 6975 mlx5_flow_dev_dump(struct rte_eth_dev *dev, 6976 FILE *file, 6977 struct rte_flow_error *error __rte_unused) 6978 { 6979 struct mlx5_priv *priv = dev->data->dev_private; 6980 struct mlx5_dev_ctx_shared *sh = priv->sh; 6981 6982 if (!priv->config.dv_flow_en) { 6983 if (fputs("device dv flow disabled\n", file) <= 0) 6984 return -errno; 6985 return -ENOTSUP; 6986 } 6987 return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain, 6988 sh->tx_domain, file); 6989 } 6990 6991 /** 6992 * Get aged-out flows. 6993 * 6994 * @param[in] dev 6995 * Pointer to the Ethernet device structure. 6996 * @param[in] context 6997 * The address of an array of pointers to the aged-out flows contexts. 6998 * @param[in] nb_countexts 6999 * The length of context array pointers. 7000 * @param[out] error 7001 * Perform verbose error reporting if not NULL. Initialized in case of 7002 * error only. 7003 * 7004 * @return 7005 * how many contexts get in success, otherwise negative errno value. 7006 * if nb_contexts is 0, return the amount of all aged contexts. 7007 * if nb_contexts is not 0 , return the amount of aged flows reported 7008 * in the context array. 7009 */ 7010 int 7011 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts, 7012 uint32_t nb_contexts, struct rte_flow_error *error) 7013 { 7014 const struct mlx5_flow_driver_ops *fops; 7015 struct rte_flow_attr attr = { .transfer = 0 }; 7016 7017 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 7018 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 7019 return fops->get_aged_flows(dev, contexts, nb_contexts, 7020 error); 7021 } 7022 DRV_LOG(ERR, 7023 "port %u get aged flows is not supported.", 7024 dev->data->port_id); 7025 return -ENOTSUP; 7026 } 7027