1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2016 Mellanox Technologies, Ltd 4 */ 5 6 #include <netinet/in.h> 7 #include <sys/queue.h> 8 #include <stdalign.h> 9 #include <stdint.h> 10 #include <string.h> 11 #include <stdbool.h> 12 13 #include <rte_common.h> 14 #include <rte_ether.h> 15 #include <rte_ethdev_driver.h> 16 #include <rte_flow.h> 17 #include <rte_cycles.h> 18 #include <rte_flow_driver.h> 19 #include <rte_malloc.h> 20 #include <rte_ip.h> 21 22 #include <mlx5_glue.h> 23 #include <mlx5_devx_cmds.h> 24 #include <mlx5_prm.h> 25 #include <mlx5_malloc.h> 26 27 #include "mlx5_defs.h" 28 #include "mlx5.h" 29 #include "mlx5_flow.h" 30 #include "mlx5_flow_os.h" 31 #include "mlx5_rxtx.h" 32 33 /** Device flow drivers. */ 34 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops; 35 36 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops; 37 38 const struct mlx5_flow_driver_ops *flow_drv_ops[] = { 39 [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops, 40 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 41 [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops, 42 #endif 43 [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops, 44 [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops 45 }; 46 47 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */ 48 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \ 49 (const int []){ \ 50 __VA_ARGS__, 0, \ 51 } 52 53 /** Node object of input graph for mlx5_flow_expand_rss(). */ 54 struct mlx5_flow_expand_node { 55 const int *const next; 56 /**< 57 * List of next node indexes. Index 0 is interpreted as a terminator. 58 */ 59 const enum rte_flow_item_type type; 60 /**< Pattern item type of current node. */ 61 uint64_t rss_types; 62 /**< 63 * RSS types bit-field associated with this node 64 * (see ETH_RSS_* definitions). 65 */ 66 }; 67 68 /** Object returned by mlx5_flow_expand_rss(). */ 69 struct mlx5_flow_expand_rss { 70 uint32_t entries; 71 /**< Number of entries @p patterns and @p priorities. */ 72 struct { 73 struct rte_flow_item *pattern; /**< Expanded pattern array. */ 74 uint32_t priority; /**< Priority offset for each expansion. */ 75 } entry[]; 76 }; 77 78 static enum rte_flow_item_type 79 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item) 80 { 81 enum rte_flow_item_type ret = RTE_FLOW_ITEM_TYPE_VOID; 82 uint16_t ether_type = 0; 83 uint16_t ether_type_m; 84 uint8_t ip_next_proto = 0; 85 uint8_t ip_next_proto_m; 86 87 if (item == NULL || item->spec == NULL) 88 return ret; 89 switch (item->type) { 90 case RTE_FLOW_ITEM_TYPE_ETH: 91 if (item->mask) 92 ether_type_m = ((const struct rte_flow_item_eth *) 93 (item->mask))->type; 94 else 95 ether_type_m = rte_flow_item_eth_mask.type; 96 if (ether_type_m != RTE_BE16(0xFFFF)) 97 break; 98 ether_type = ((const struct rte_flow_item_eth *) 99 (item->spec))->type; 100 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4) 101 ret = RTE_FLOW_ITEM_TYPE_IPV4; 102 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6) 103 ret = RTE_FLOW_ITEM_TYPE_IPV6; 104 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN) 105 ret = RTE_FLOW_ITEM_TYPE_VLAN; 106 else 107 ret = RTE_FLOW_ITEM_TYPE_END; 108 break; 109 case RTE_FLOW_ITEM_TYPE_VLAN: 110 if (item->mask) 111 ether_type_m = ((const struct rte_flow_item_vlan *) 112 (item->mask))->inner_type; 113 else 114 ether_type_m = rte_flow_item_vlan_mask.inner_type; 115 if (ether_type_m != RTE_BE16(0xFFFF)) 116 break; 117 ether_type = ((const struct rte_flow_item_vlan *) 118 (item->spec))->inner_type; 119 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4) 120 ret = RTE_FLOW_ITEM_TYPE_IPV4; 121 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6) 122 ret = RTE_FLOW_ITEM_TYPE_IPV6; 123 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN) 124 ret = RTE_FLOW_ITEM_TYPE_VLAN; 125 else 126 ret = RTE_FLOW_ITEM_TYPE_END; 127 break; 128 case RTE_FLOW_ITEM_TYPE_IPV4: 129 if (item->mask) 130 ip_next_proto_m = ((const struct rte_flow_item_ipv4 *) 131 (item->mask))->hdr.next_proto_id; 132 else 133 ip_next_proto_m = 134 rte_flow_item_ipv4_mask.hdr.next_proto_id; 135 if (ip_next_proto_m != 0xFF) 136 break; 137 ip_next_proto = ((const struct rte_flow_item_ipv4 *) 138 (item->spec))->hdr.next_proto_id; 139 if (ip_next_proto == IPPROTO_UDP) 140 ret = RTE_FLOW_ITEM_TYPE_UDP; 141 else if (ip_next_proto == IPPROTO_TCP) 142 ret = RTE_FLOW_ITEM_TYPE_TCP; 143 else if (ip_next_proto == IPPROTO_IP) 144 ret = RTE_FLOW_ITEM_TYPE_IPV4; 145 else if (ip_next_proto == IPPROTO_IPV6) 146 ret = RTE_FLOW_ITEM_TYPE_IPV6; 147 else 148 ret = RTE_FLOW_ITEM_TYPE_END; 149 break; 150 case RTE_FLOW_ITEM_TYPE_IPV6: 151 if (item->mask) 152 ip_next_proto_m = ((const struct rte_flow_item_ipv6 *) 153 (item->mask))->hdr.proto; 154 else 155 ip_next_proto_m = 156 rte_flow_item_ipv6_mask.hdr.proto; 157 if (ip_next_proto_m != 0xFF) 158 break; 159 ip_next_proto = ((const struct rte_flow_item_ipv6 *) 160 (item->spec))->hdr.proto; 161 if (ip_next_proto == IPPROTO_UDP) 162 ret = RTE_FLOW_ITEM_TYPE_UDP; 163 else if (ip_next_proto == IPPROTO_TCP) 164 ret = RTE_FLOW_ITEM_TYPE_TCP; 165 else if (ip_next_proto == IPPROTO_IP) 166 ret = RTE_FLOW_ITEM_TYPE_IPV4; 167 else if (ip_next_proto == IPPROTO_IPV6) 168 ret = RTE_FLOW_ITEM_TYPE_IPV6; 169 else 170 ret = RTE_FLOW_ITEM_TYPE_END; 171 break; 172 default: 173 ret = RTE_FLOW_ITEM_TYPE_VOID; 174 break; 175 } 176 return ret; 177 } 178 179 /** 180 * Expand RSS flows into several possible flows according to the RSS hash 181 * fields requested and the driver capabilities. 182 * 183 * @param[out] buf 184 * Buffer to store the result expansion. 185 * @param[in] size 186 * Buffer size in bytes. If 0, @p buf can be NULL. 187 * @param[in] pattern 188 * User flow pattern. 189 * @param[in] types 190 * RSS types to expand (see ETH_RSS_* definitions). 191 * @param[in] graph 192 * Input graph to expand @p pattern according to @p types. 193 * @param[in] graph_root_index 194 * Index of root node in @p graph, typically 0. 195 * 196 * @return 197 * A positive value representing the size of @p buf in bytes regardless of 198 * @p size on success, a negative errno value otherwise and rte_errno is 199 * set, the following errors are defined: 200 * 201 * -E2BIG: graph-depth @p graph is too deep. 202 */ 203 static int 204 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, 205 const struct rte_flow_item *pattern, uint64_t types, 206 const struct mlx5_flow_expand_node graph[], 207 int graph_root_index) 208 { 209 const int elt_n = 8; 210 const struct rte_flow_item *item; 211 const struct mlx5_flow_expand_node *node = &graph[graph_root_index]; 212 const int *next_node; 213 const int *stack[elt_n]; 214 int stack_pos = 0; 215 struct rte_flow_item flow_items[elt_n]; 216 unsigned int i; 217 size_t lsize; 218 size_t user_pattern_size = 0; 219 void *addr = NULL; 220 const struct mlx5_flow_expand_node *next = NULL; 221 struct rte_flow_item missed_item; 222 int missed = 0; 223 int elt = 0; 224 const struct rte_flow_item *last_item = NULL; 225 226 memset(&missed_item, 0, sizeof(missed_item)); 227 lsize = offsetof(struct mlx5_flow_expand_rss, entry) + 228 elt_n * sizeof(buf->entry[0]); 229 if (lsize <= size) { 230 buf->entry[0].priority = 0; 231 buf->entry[0].pattern = (void *)&buf->entry[elt_n]; 232 buf->entries = 0; 233 addr = buf->entry[0].pattern; 234 } 235 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 236 if (item->type != RTE_FLOW_ITEM_TYPE_VOID) 237 last_item = item; 238 for (i = 0; node->next && node->next[i]; ++i) { 239 next = &graph[node->next[i]]; 240 if (next->type == item->type) 241 break; 242 } 243 if (next) 244 node = next; 245 user_pattern_size += sizeof(*item); 246 } 247 user_pattern_size += sizeof(*item); /* Handle END item. */ 248 lsize += user_pattern_size; 249 /* Copy the user pattern in the first entry of the buffer. */ 250 if (lsize <= size) { 251 rte_memcpy(addr, pattern, user_pattern_size); 252 addr = (void *)(((uintptr_t)addr) + user_pattern_size); 253 buf->entries = 1; 254 } 255 /* Start expanding. */ 256 memset(flow_items, 0, sizeof(flow_items)); 257 user_pattern_size -= sizeof(*item); 258 /* 259 * Check if the last valid item has spec set, need complete pattern, 260 * and the pattern can be used for expansion. 261 */ 262 missed_item.type = mlx5_flow_expand_rss_item_complete(last_item); 263 if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) { 264 /* Item type END indicates expansion is not required. */ 265 return lsize; 266 } 267 if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) { 268 next = NULL; 269 missed = 1; 270 for (i = 0; node->next && node->next[i]; ++i) { 271 next = &graph[node->next[i]]; 272 if (next->type == missed_item.type) { 273 flow_items[0].type = missed_item.type; 274 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END; 275 break; 276 } 277 next = NULL; 278 } 279 } 280 if (next && missed) { 281 elt = 2; /* missed item + item end. */ 282 node = next; 283 lsize += elt * sizeof(*item) + user_pattern_size; 284 if ((node->rss_types & types) && lsize <= size) { 285 buf->entry[buf->entries].priority = 1; 286 buf->entry[buf->entries].pattern = addr; 287 buf->entries++; 288 rte_memcpy(addr, buf->entry[0].pattern, 289 user_pattern_size); 290 addr = (void *)(((uintptr_t)addr) + user_pattern_size); 291 rte_memcpy(addr, flow_items, elt * sizeof(*item)); 292 addr = (void *)(((uintptr_t)addr) + 293 elt * sizeof(*item)); 294 } 295 } 296 memset(flow_items, 0, sizeof(flow_items)); 297 next_node = node->next; 298 stack[stack_pos] = next_node; 299 node = next_node ? &graph[*next_node] : NULL; 300 while (node) { 301 flow_items[stack_pos].type = node->type; 302 if (node->rss_types & types) { 303 /* 304 * compute the number of items to copy from the 305 * expansion and copy it. 306 * When the stack_pos is 0, there are 1 element in it, 307 * plus the addition END item. 308 */ 309 elt = stack_pos + 2; 310 flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END; 311 lsize += elt * sizeof(*item) + user_pattern_size; 312 if (lsize <= size) { 313 size_t n = elt * sizeof(*item); 314 315 buf->entry[buf->entries].priority = 316 stack_pos + 1 + missed; 317 buf->entry[buf->entries].pattern = addr; 318 buf->entries++; 319 rte_memcpy(addr, buf->entry[0].pattern, 320 user_pattern_size); 321 addr = (void *)(((uintptr_t)addr) + 322 user_pattern_size); 323 rte_memcpy(addr, &missed_item, 324 missed * sizeof(*item)); 325 addr = (void *)(((uintptr_t)addr) + 326 missed * sizeof(*item)); 327 rte_memcpy(addr, flow_items, n); 328 addr = (void *)(((uintptr_t)addr) + n); 329 } 330 } 331 /* Go deeper. */ 332 if (node->next) { 333 next_node = node->next; 334 if (stack_pos++ == elt_n) { 335 rte_errno = E2BIG; 336 return -rte_errno; 337 } 338 stack[stack_pos] = next_node; 339 } else if (*(next_node + 1)) { 340 /* Follow up with the next possibility. */ 341 ++next_node; 342 } else { 343 /* Move to the next path. */ 344 if (stack_pos) 345 next_node = stack[--stack_pos]; 346 next_node++; 347 stack[stack_pos] = next_node; 348 } 349 node = *next_node ? &graph[*next_node] : NULL; 350 }; 351 /* no expanded flows but we have missed item, create one rule for it */ 352 if (buf->entries == 1 && missed != 0) { 353 elt = 2; 354 lsize += elt * sizeof(*item) + user_pattern_size; 355 if (lsize <= size) { 356 buf->entry[buf->entries].priority = 1; 357 buf->entry[buf->entries].pattern = addr; 358 buf->entries++; 359 flow_items[0].type = missed_item.type; 360 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END; 361 rte_memcpy(addr, buf->entry[0].pattern, 362 user_pattern_size); 363 addr = (void *)(((uintptr_t)addr) + user_pattern_size); 364 rte_memcpy(addr, flow_items, elt * sizeof(*item)); 365 addr = (void *)(((uintptr_t)addr) + 366 elt * sizeof(*item)); 367 } 368 } 369 return lsize; 370 } 371 372 enum mlx5_expansion { 373 MLX5_EXPANSION_ROOT, 374 MLX5_EXPANSION_ROOT_OUTER, 375 MLX5_EXPANSION_ROOT_ETH_VLAN, 376 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN, 377 MLX5_EXPANSION_OUTER_ETH, 378 MLX5_EXPANSION_OUTER_ETH_VLAN, 379 MLX5_EXPANSION_OUTER_VLAN, 380 MLX5_EXPANSION_OUTER_IPV4, 381 MLX5_EXPANSION_OUTER_IPV4_UDP, 382 MLX5_EXPANSION_OUTER_IPV4_TCP, 383 MLX5_EXPANSION_OUTER_IPV6, 384 MLX5_EXPANSION_OUTER_IPV6_UDP, 385 MLX5_EXPANSION_OUTER_IPV6_TCP, 386 MLX5_EXPANSION_VXLAN, 387 MLX5_EXPANSION_VXLAN_GPE, 388 MLX5_EXPANSION_GRE, 389 MLX5_EXPANSION_MPLS, 390 MLX5_EXPANSION_ETH, 391 MLX5_EXPANSION_ETH_VLAN, 392 MLX5_EXPANSION_VLAN, 393 MLX5_EXPANSION_IPV4, 394 MLX5_EXPANSION_IPV4_UDP, 395 MLX5_EXPANSION_IPV4_TCP, 396 MLX5_EXPANSION_IPV6, 397 MLX5_EXPANSION_IPV6_UDP, 398 MLX5_EXPANSION_IPV6_TCP, 399 }; 400 401 /** Supported expansion of items. */ 402 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = { 403 [MLX5_EXPANSION_ROOT] = { 404 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 405 MLX5_EXPANSION_IPV4, 406 MLX5_EXPANSION_IPV6), 407 .type = RTE_FLOW_ITEM_TYPE_END, 408 }, 409 [MLX5_EXPANSION_ROOT_OUTER] = { 410 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH, 411 MLX5_EXPANSION_OUTER_IPV4, 412 MLX5_EXPANSION_OUTER_IPV6), 413 .type = RTE_FLOW_ITEM_TYPE_END, 414 }, 415 [MLX5_EXPANSION_ROOT_ETH_VLAN] = { 416 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN), 417 .type = RTE_FLOW_ITEM_TYPE_END, 418 }, 419 [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = { 420 .next = MLX5_FLOW_EXPAND_RSS_NEXT 421 (MLX5_EXPANSION_OUTER_ETH_VLAN), 422 .type = RTE_FLOW_ITEM_TYPE_END, 423 }, 424 [MLX5_EXPANSION_OUTER_ETH] = { 425 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 426 MLX5_EXPANSION_OUTER_IPV6, 427 MLX5_EXPANSION_MPLS), 428 .type = RTE_FLOW_ITEM_TYPE_ETH, 429 .rss_types = 0, 430 }, 431 [MLX5_EXPANSION_OUTER_ETH_VLAN] = { 432 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN), 433 .type = RTE_FLOW_ITEM_TYPE_ETH, 434 .rss_types = 0, 435 }, 436 [MLX5_EXPANSION_OUTER_VLAN] = { 437 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 438 MLX5_EXPANSION_OUTER_IPV6), 439 .type = RTE_FLOW_ITEM_TYPE_VLAN, 440 }, 441 [MLX5_EXPANSION_OUTER_IPV4] = { 442 .next = MLX5_FLOW_EXPAND_RSS_NEXT 443 (MLX5_EXPANSION_OUTER_IPV4_UDP, 444 MLX5_EXPANSION_OUTER_IPV4_TCP, 445 MLX5_EXPANSION_GRE, 446 MLX5_EXPANSION_IPV4, 447 MLX5_EXPANSION_IPV6), 448 .type = RTE_FLOW_ITEM_TYPE_IPV4, 449 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 450 ETH_RSS_NONFRAG_IPV4_OTHER, 451 }, 452 [MLX5_EXPANSION_OUTER_IPV4_UDP] = { 453 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 454 MLX5_EXPANSION_VXLAN_GPE), 455 .type = RTE_FLOW_ITEM_TYPE_UDP, 456 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 457 }, 458 [MLX5_EXPANSION_OUTER_IPV4_TCP] = { 459 .type = RTE_FLOW_ITEM_TYPE_TCP, 460 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 461 }, 462 [MLX5_EXPANSION_OUTER_IPV6] = { 463 .next = MLX5_FLOW_EXPAND_RSS_NEXT 464 (MLX5_EXPANSION_OUTER_IPV6_UDP, 465 MLX5_EXPANSION_OUTER_IPV6_TCP, 466 MLX5_EXPANSION_IPV4, 467 MLX5_EXPANSION_IPV6), 468 .type = RTE_FLOW_ITEM_TYPE_IPV6, 469 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 470 ETH_RSS_NONFRAG_IPV6_OTHER, 471 }, 472 [MLX5_EXPANSION_OUTER_IPV6_UDP] = { 473 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 474 MLX5_EXPANSION_VXLAN_GPE), 475 .type = RTE_FLOW_ITEM_TYPE_UDP, 476 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 477 }, 478 [MLX5_EXPANSION_OUTER_IPV6_TCP] = { 479 .type = RTE_FLOW_ITEM_TYPE_TCP, 480 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 481 }, 482 [MLX5_EXPANSION_VXLAN] = { 483 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 484 MLX5_EXPANSION_IPV4, 485 MLX5_EXPANSION_IPV6), 486 .type = RTE_FLOW_ITEM_TYPE_VXLAN, 487 }, 488 [MLX5_EXPANSION_VXLAN_GPE] = { 489 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 490 MLX5_EXPANSION_IPV4, 491 MLX5_EXPANSION_IPV6), 492 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE, 493 }, 494 [MLX5_EXPANSION_GRE] = { 495 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4), 496 .type = RTE_FLOW_ITEM_TYPE_GRE, 497 }, 498 [MLX5_EXPANSION_MPLS] = { 499 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 500 MLX5_EXPANSION_IPV6), 501 .type = RTE_FLOW_ITEM_TYPE_MPLS, 502 }, 503 [MLX5_EXPANSION_ETH] = { 504 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 505 MLX5_EXPANSION_IPV6), 506 .type = RTE_FLOW_ITEM_TYPE_ETH, 507 }, 508 [MLX5_EXPANSION_ETH_VLAN] = { 509 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN), 510 .type = RTE_FLOW_ITEM_TYPE_ETH, 511 }, 512 [MLX5_EXPANSION_VLAN] = { 513 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 514 MLX5_EXPANSION_IPV6), 515 .type = RTE_FLOW_ITEM_TYPE_VLAN, 516 }, 517 [MLX5_EXPANSION_IPV4] = { 518 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP, 519 MLX5_EXPANSION_IPV4_TCP), 520 .type = RTE_FLOW_ITEM_TYPE_IPV4, 521 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 522 ETH_RSS_NONFRAG_IPV4_OTHER, 523 }, 524 [MLX5_EXPANSION_IPV4_UDP] = { 525 .type = RTE_FLOW_ITEM_TYPE_UDP, 526 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 527 }, 528 [MLX5_EXPANSION_IPV4_TCP] = { 529 .type = RTE_FLOW_ITEM_TYPE_TCP, 530 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 531 }, 532 [MLX5_EXPANSION_IPV6] = { 533 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP, 534 MLX5_EXPANSION_IPV6_TCP), 535 .type = RTE_FLOW_ITEM_TYPE_IPV6, 536 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 537 ETH_RSS_NONFRAG_IPV6_OTHER, 538 }, 539 [MLX5_EXPANSION_IPV6_UDP] = { 540 .type = RTE_FLOW_ITEM_TYPE_UDP, 541 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 542 }, 543 [MLX5_EXPANSION_IPV6_TCP] = { 544 .type = RTE_FLOW_ITEM_TYPE_TCP, 545 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 546 }, 547 }; 548 549 static const struct rte_flow_ops mlx5_flow_ops = { 550 .validate = mlx5_flow_validate, 551 .create = mlx5_flow_create, 552 .destroy = mlx5_flow_destroy, 553 .flush = mlx5_flow_flush, 554 .isolate = mlx5_flow_isolate, 555 .query = mlx5_flow_query, 556 .dev_dump = mlx5_flow_dev_dump, 557 .get_aged_flows = mlx5_flow_get_aged_flows, 558 }; 559 560 /* Convert FDIR request to Generic flow. */ 561 struct mlx5_fdir { 562 struct rte_flow_attr attr; 563 struct rte_flow_item items[4]; 564 struct rte_flow_item_eth l2; 565 struct rte_flow_item_eth l2_mask; 566 union { 567 struct rte_flow_item_ipv4 ipv4; 568 struct rte_flow_item_ipv6 ipv6; 569 } l3; 570 union { 571 struct rte_flow_item_ipv4 ipv4; 572 struct rte_flow_item_ipv6 ipv6; 573 } l3_mask; 574 union { 575 struct rte_flow_item_udp udp; 576 struct rte_flow_item_tcp tcp; 577 } l4; 578 union { 579 struct rte_flow_item_udp udp; 580 struct rte_flow_item_tcp tcp; 581 } l4_mask; 582 struct rte_flow_action actions[2]; 583 struct rte_flow_action_queue queue; 584 }; 585 586 /* Tunnel information. */ 587 struct mlx5_flow_tunnel_info { 588 uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */ 589 uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */ 590 }; 591 592 static struct mlx5_flow_tunnel_info tunnels_info[] = { 593 { 594 .tunnel = MLX5_FLOW_LAYER_VXLAN, 595 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP, 596 }, 597 { 598 .tunnel = MLX5_FLOW_LAYER_GENEVE, 599 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP, 600 }, 601 { 602 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE, 603 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP, 604 }, 605 { 606 .tunnel = MLX5_FLOW_LAYER_GRE, 607 .ptype = RTE_PTYPE_TUNNEL_GRE, 608 }, 609 { 610 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP, 611 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP, 612 }, 613 { 614 .tunnel = MLX5_FLOW_LAYER_MPLS, 615 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE, 616 }, 617 { 618 .tunnel = MLX5_FLOW_LAYER_NVGRE, 619 .ptype = RTE_PTYPE_TUNNEL_NVGRE, 620 }, 621 { 622 .tunnel = MLX5_FLOW_LAYER_IPIP, 623 .ptype = RTE_PTYPE_TUNNEL_IP, 624 }, 625 { 626 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP, 627 .ptype = RTE_PTYPE_TUNNEL_IP, 628 }, 629 { 630 .tunnel = MLX5_FLOW_LAYER_GTP, 631 .ptype = RTE_PTYPE_TUNNEL_GTPU, 632 }, 633 }; 634 635 /** 636 * Translate tag ID to register. 637 * 638 * @param[in] dev 639 * Pointer to the Ethernet device structure. 640 * @param[in] feature 641 * The feature that request the register. 642 * @param[in] id 643 * The request register ID. 644 * @param[out] error 645 * Error description in case of any. 646 * 647 * @return 648 * The request register on success, a negative errno 649 * value otherwise and rte_errno is set. 650 */ 651 int 652 mlx5_flow_get_reg_id(struct rte_eth_dev *dev, 653 enum mlx5_feature_name feature, 654 uint32_t id, 655 struct rte_flow_error *error) 656 { 657 struct mlx5_priv *priv = dev->data->dev_private; 658 struct mlx5_dev_config *config = &priv->config; 659 enum modify_reg start_reg; 660 bool skip_mtr_reg = false; 661 662 switch (feature) { 663 case MLX5_HAIRPIN_RX: 664 return REG_B; 665 case MLX5_HAIRPIN_TX: 666 return REG_A; 667 case MLX5_METADATA_RX: 668 switch (config->dv_xmeta_en) { 669 case MLX5_XMETA_MODE_LEGACY: 670 return REG_B; 671 case MLX5_XMETA_MODE_META16: 672 return REG_C_0; 673 case MLX5_XMETA_MODE_META32: 674 return REG_C_1; 675 } 676 break; 677 case MLX5_METADATA_TX: 678 return REG_A; 679 case MLX5_METADATA_FDB: 680 switch (config->dv_xmeta_en) { 681 case MLX5_XMETA_MODE_LEGACY: 682 return REG_NON; 683 case MLX5_XMETA_MODE_META16: 684 return REG_C_0; 685 case MLX5_XMETA_MODE_META32: 686 return REG_C_1; 687 } 688 break; 689 case MLX5_FLOW_MARK: 690 switch (config->dv_xmeta_en) { 691 case MLX5_XMETA_MODE_LEGACY: 692 return REG_NON; 693 case MLX5_XMETA_MODE_META16: 694 return REG_C_1; 695 case MLX5_XMETA_MODE_META32: 696 return REG_C_0; 697 } 698 break; 699 case MLX5_MTR_SFX: 700 /* 701 * If meter color and flow match share one register, flow match 702 * should use the meter color register for match. 703 */ 704 if (priv->mtr_reg_share) 705 return priv->mtr_color_reg; 706 else 707 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 708 REG_C_3; 709 case MLX5_MTR_COLOR: 710 MLX5_ASSERT(priv->mtr_color_reg != REG_NON); 711 return priv->mtr_color_reg; 712 case MLX5_COPY_MARK: 713 /* 714 * Metadata COPY_MARK register using is in meter suffix sub 715 * flow while with meter. It's safe to share the same register. 716 */ 717 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3; 718 case MLX5_APP_TAG: 719 /* 720 * If meter is enable, it will engage the register for color 721 * match and flow match. If meter color match is not using the 722 * REG_C_2, need to skip the REG_C_x be used by meter color 723 * match. 724 * If meter is disable, free to use all available registers. 725 */ 726 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 727 (priv->mtr_reg_share ? REG_C_3 : REG_C_4); 728 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2); 729 if (id > (REG_C_7 - start_reg)) 730 return rte_flow_error_set(error, EINVAL, 731 RTE_FLOW_ERROR_TYPE_ITEM, 732 NULL, "invalid tag id"); 733 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON) 734 return rte_flow_error_set(error, ENOTSUP, 735 RTE_FLOW_ERROR_TYPE_ITEM, 736 NULL, "unsupported tag id"); 737 /* 738 * This case means meter is using the REG_C_x great than 2. 739 * Take care not to conflict with meter color REG_C_x. 740 * If the available index REG_C_y >= REG_C_x, skip the 741 * color register. 742 */ 743 if (skip_mtr_reg && config->flow_mreg_c 744 [id + start_reg - REG_C_0] >= priv->mtr_color_reg) { 745 if (id >= (REG_C_7 - start_reg)) 746 return rte_flow_error_set(error, EINVAL, 747 RTE_FLOW_ERROR_TYPE_ITEM, 748 NULL, "invalid tag id"); 749 if (config->flow_mreg_c 750 [id + 1 + start_reg - REG_C_0] != REG_NON) 751 return config->flow_mreg_c 752 [id + 1 + start_reg - REG_C_0]; 753 return rte_flow_error_set(error, ENOTSUP, 754 RTE_FLOW_ERROR_TYPE_ITEM, 755 NULL, "unsupported tag id"); 756 } 757 return config->flow_mreg_c[id + start_reg - REG_C_0]; 758 } 759 MLX5_ASSERT(false); 760 return rte_flow_error_set(error, EINVAL, 761 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 762 NULL, "invalid feature name"); 763 } 764 765 /** 766 * Check extensive flow metadata register support. 767 * 768 * @param dev 769 * Pointer to rte_eth_dev structure. 770 * 771 * @return 772 * True if device supports extensive flow metadata register, otherwise false. 773 */ 774 bool 775 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev) 776 { 777 struct mlx5_priv *priv = dev->data->dev_private; 778 struct mlx5_dev_config *config = &priv->config; 779 780 /* 781 * Having available reg_c can be regarded inclusively as supporting 782 * extensive flow metadata register, which could mean, 783 * - metadata register copy action by modify header. 784 * - 16 modify header actions is supported. 785 * - reg_c's are preserved across different domain (FDB and NIC) on 786 * packet loopback by flow lookup miss. 787 */ 788 return config->flow_mreg_c[2] != REG_NON; 789 } 790 791 /** 792 * Verify the @p item specifications (spec, last, mask) are compatible with the 793 * NIC capabilities. 794 * 795 * @param[in] item 796 * Item specification. 797 * @param[in] mask 798 * @p item->mask or flow default bit-masks. 799 * @param[in] nic_mask 800 * Bit-masks covering supported fields by the NIC to compare with user mask. 801 * @param[in] size 802 * Bit-masks size in bytes. 803 * @param[out] error 804 * Pointer to error structure. 805 * 806 * @return 807 * 0 on success, a negative errno value otherwise and rte_errno is set. 808 */ 809 int 810 mlx5_flow_item_acceptable(const struct rte_flow_item *item, 811 const uint8_t *mask, 812 const uint8_t *nic_mask, 813 unsigned int size, 814 struct rte_flow_error *error) 815 { 816 unsigned int i; 817 818 MLX5_ASSERT(nic_mask); 819 for (i = 0; i < size; ++i) 820 if ((nic_mask[i] | mask[i]) != nic_mask[i]) 821 return rte_flow_error_set(error, ENOTSUP, 822 RTE_FLOW_ERROR_TYPE_ITEM, 823 item, 824 "mask enables non supported" 825 " bits"); 826 if (!item->spec && (item->mask || item->last)) 827 return rte_flow_error_set(error, EINVAL, 828 RTE_FLOW_ERROR_TYPE_ITEM, item, 829 "mask/last without a spec is not" 830 " supported"); 831 if (item->spec && item->last) { 832 uint8_t spec[size]; 833 uint8_t last[size]; 834 unsigned int i; 835 int ret; 836 837 for (i = 0; i < size; ++i) { 838 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i]; 839 last[i] = ((const uint8_t *)item->last)[i] & mask[i]; 840 } 841 ret = memcmp(spec, last, size); 842 if (ret != 0) 843 return rte_flow_error_set(error, EINVAL, 844 RTE_FLOW_ERROR_TYPE_ITEM, 845 item, 846 "range is not valid"); 847 } 848 return 0; 849 } 850 851 /** 852 * Adjust the hash fields according to the @p flow information. 853 * 854 * @param[in] dev_flow. 855 * Pointer to the mlx5_flow. 856 * @param[in] tunnel 857 * 1 when the hash field is for a tunnel item. 858 * @param[in] layer_types 859 * ETH_RSS_* types. 860 * @param[in] hash_fields 861 * Item hash fields. 862 * 863 * @return 864 * The hash fields that should be used. 865 */ 866 uint64_t 867 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc, 868 int tunnel __rte_unused, uint64_t layer_types, 869 uint64_t hash_fields) 870 { 871 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 872 int rss_request_inner = rss_desc->level >= 2; 873 874 /* Check RSS hash level for tunnel. */ 875 if (tunnel && rss_request_inner) 876 hash_fields |= IBV_RX_HASH_INNER; 877 else if (tunnel || rss_request_inner) 878 return 0; 879 #endif 880 /* Check if requested layer matches RSS hash fields. */ 881 if (!(rss_desc->types & layer_types)) 882 return 0; 883 return hash_fields; 884 } 885 886 /** 887 * Lookup and set the ptype in the data Rx part. A single Ptype can be used, 888 * if several tunnel rules are used on this queue, the tunnel ptype will be 889 * cleared. 890 * 891 * @param rxq_ctrl 892 * Rx queue to update. 893 */ 894 static void 895 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl) 896 { 897 unsigned int i; 898 uint32_t tunnel_ptype = 0; 899 900 /* Look up for the ptype to use. */ 901 for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) { 902 if (!rxq_ctrl->flow_tunnels_n[i]) 903 continue; 904 if (!tunnel_ptype) { 905 tunnel_ptype = tunnels_info[i].ptype; 906 } else { 907 tunnel_ptype = 0; 908 break; 909 } 910 } 911 rxq_ctrl->rxq.tunnel = tunnel_ptype; 912 } 913 914 /** 915 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive 916 * flow. 917 * 918 * @param[in] dev 919 * Pointer to the Ethernet device structure. 920 * @param[in] dev_handle 921 * Pointer to device flow handle structure. 922 */ 923 static void 924 flow_drv_rxq_flags_set(struct rte_eth_dev *dev, 925 struct mlx5_flow_handle *dev_handle) 926 { 927 struct mlx5_priv *priv = dev->data->dev_private; 928 const int mark = dev_handle->mark; 929 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 930 struct mlx5_hrxq *hrxq; 931 unsigned int i; 932 933 if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) 934 return; 935 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 936 dev_handle->rix_hrxq); 937 if (!hrxq) 938 return; 939 for (i = 0; i != hrxq->ind_table->queues_n; ++i) { 940 int idx = hrxq->ind_table->queues[i]; 941 struct mlx5_rxq_ctrl *rxq_ctrl = 942 container_of((*priv->rxqs)[idx], 943 struct mlx5_rxq_ctrl, rxq); 944 945 /* 946 * To support metadata register copy on Tx loopback, 947 * this must be always enabled (metadata may arive 948 * from other port - not from local flows only. 949 */ 950 if (priv->config.dv_flow_en && 951 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 952 mlx5_flow_ext_mreg_supported(dev)) { 953 rxq_ctrl->rxq.mark = 1; 954 rxq_ctrl->flow_mark_n = 1; 955 } else if (mark) { 956 rxq_ctrl->rxq.mark = 1; 957 rxq_ctrl->flow_mark_n++; 958 } 959 if (tunnel) { 960 unsigned int j; 961 962 /* Increase the counter matching the flow. */ 963 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 964 if ((tunnels_info[j].tunnel & 965 dev_handle->layers) == 966 tunnels_info[j].tunnel) { 967 rxq_ctrl->flow_tunnels_n[j]++; 968 break; 969 } 970 } 971 flow_rxq_tunnel_ptype_update(rxq_ctrl); 972 } 973 } 974 } 975 976 /** 977 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow 978 * 979 * @param[in] dev 980 * Pointer to the Ethernet device structure. 981 * @param[in] flow 982 * Pointer to flow structure. 983 */ 984 static void 985 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow) 986 { 987 struct mlx5_priv *priv = dev->data->dev_private; 988 uint32_t handle_idx; 989 struct mlx5_flow_handle *dev_handle; 990 991 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 992 handle_idx, dev_handle, next) 993 flow_drv_rxq_flags_set(dev, dev_handle); 994 } 995 996 /** 997 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 998 * device flow if no other flow uses it with the same kind of request. 999 * 1000 * @param dev 1001 * Pointer to Ethernet device. 1002 * @param[in] dev_handle 1003 * Pointer to the device flow handle structure. 1004 */ 1005 static void 1006 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, 1007 struct mlx5_flow_handle *dev_handle) 1008 { 1009 struct mlx5_priv *priv = dev->data->dev_private; 1010 const int mark = dev_handle->mark; 1011 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 1012 struct mlx5_hrxq *hrxq; 1013 unsigned int i; 1014 1015 if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) 1016 return; 1017 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 1018 dev_handle->rix_hrxq); 1019 if (!hrxq) 1020 return; 1021 MLX5_ASSERT(dev->data->dev_started); 1022 for (i = 0; i != hrxq->ind_table->queues_n; ++i) { 1023 int idx = hrxq->ind_table->queues[i]; 1024 struct mlx5_rxq_ctrl *rxq_ctrl = 1025 container_of((*priv->rxqs)[idx], 1026 struct mlx5_rxq_ctrl, rxq); 1027 1028 if (priv->config.dv_flow_en && 1029 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 1030 mlx5_flow_ext_mreg_supported(dev)) { 1031 rxq_ctrl->rxq.mark = 1; 1032 rxq_ctrl->flow_mark_n = 1; 1033 } else if (mark) { 1034 rxq_ctrl->flow_mark_n--; 1035 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n; 1036 } 1037 if (tunnel) { 1038 unsigned int j; 1039 1040 /* Decrease the counter matching the flow. */ 1041 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 1042 if ((tunnels_info[j].tunnel & 1043 dev_handle->layers) == 1044 tunnels_info[j].tunnel) { 1045 rxq_ctrl->flow_tunnels_n[j]--; 1046 break; 1047 } 1048 } 1049 flow_rxq_tunnel_ptype_update(rxq_ctrl); 1050 } 1051 } 1052 } 1053 1054 /** 1055 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 1056 * @p flow if no other flow uses it with the same kind of request. 1057 * 1058 * @param dev 1059 * Pointer to Ethernet device. 1060 * @param[in] flow 1061 * Pointer to the flow. 1062 */ 1063 static void 1064 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow) 1065 { 1066 struct mlx5_priv *priv = dev->data->dev_private; 1067 uint32_t handle_idx; 1068 struct mlx5_flow_handle *dev_handle; 1069 1070 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 1071 handle_idx, dev_handle, next) 1072 flow_drv_rxq_flags_trim(dev, dev_handle); 1073 } 1074 1075 /** 1076 * Clear the Mark/Flag and Tunnel ptype information in all Rx queues. 1077 * 1078 * @param dev 1079 * Pointer to Ethernet device. 1080 */ 1081 static void 1082 flow_rxq_flags_clear(struct rte_eth_dev *dev) 1083 { 1084 struct mlx5_priv *priv = dev->data->dev_private; 1085 unsigned int i; 1086 1087 for (i = 0; i != priv->rxqs_n; ++i) { 1088 struct mlx5_rxq_ctrl *rxq_ctrl; 1089 unsigned int j; 1090 1091 if (!(*priv->rxqs)[i]) 1092 continue; 1093 rxq_ctrl = container_of((*priv->rxqs)[i], 1094 struct mlx5_rxq_ctrl, rxq); 1095 rxq_ctrl->flow_mark_n = 0; 1096 rxq_ctrl->rxq.mark = 0; 1097 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) 1098 rxq_ctrl->flow_tunnels_n[j] = 0; 1099 rxq_ctrl->rxq.tunnel = 0; 1100 } 1101 } 1102 1103 /** 1104 * Set the Rx queue dynamic metadata (mask and offset) for a flow 1105 * 1106 * @param[in] dev 1107 * Pointer to the Ethernet device structure. 1108 */ 1109 void 1110 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev) 1111 { 1112 struct mlx5_priv *priv = dev->data->dev_private; 1113 struct mlx5_rxq_data *data; 1114 unsigned int i; 1115 1116 for (i = 0; i != priv->rxqs_n; ++i) { 1117 if (!(*priv->rxqs)[i]) 1118 continue; 1119 data = (*priv->rxqs)[i]; 1120 if (!rte_flow_dynf_metadata_avail()) { 1121 data->dynf_meta = 0; 1122 data->flow_meta_mask = 0; 1123 data->flow_meta_offset = -1; 1124 } else { 1125 data->dynf_meta = 1; 1126 data->flow_meta_mask = rte_flow_dynf_metadata_mask; 1127 data->flow_meta_offset = rte_flow_dynf_metadata_offs; 1128 } 1129 } 1130 } 1131 1132 /* 1133 * return a pointer to the desired action in the list of actions. 1134 * 1135 * @param[in] actions 1136 * The list of actions to search the action in. 1137 * @param[in] action 1138 * The action to find. 1139 * 1140 * @return 1141 * Pointer to the action in the list, if found. NULL otherwise. 1142 */ 1143 const struct rte_flow_action * 1144 mlx5_flow_find_action(const struct rte_flow_action *actions, 1145 enum rte_flow_action_type action) 1146 { 1147 if (actions == NULL) 1148 return NULL; 1149 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) 1150 if (actions->type == action) 1151 return actions; 1152 return NULL; 1153 } 1154 1155 /* 1156 * Validate the flag action. 1157 * 1158 * @param[in] action_flags 1159 * Bit-fields that holds the actions detected until now. 1160 * @param[in] attr 1161 * Attributes of flow that includes this action. 1162 * @param[out] error 1163 * Pointer to error structure. 1164 * 1165 * @return 1166 * 0 on success, a negative errno value otherwise and rte_errno is set. 1167 */ 1168 int 1169 mlx5_flow_validate_action_flag(uint64_t action_flags, 1170 const struct rte_flow_attr *attr, 1171 struct rte_flow_error *error) 1172 { 1173 if (action_flags & MLX5_FLOW_ACTION_MARK) 1174 return rte_flow_error_set(error, EINVAL, 1175 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1176 "can't mark and flag in same flow"); 1177 if (action_flags & MLX5_FLOW_ACTION_FLAG) 1178 return rte_flow_error_set(error, EINVAL, 1179 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1180 "can't have 2 flag" 1181 " actions in same flow"); 1182 if (attr->egress) 1183 return rte_flow_error_set(error, ENOTSUP, 1184 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1185 "flag action not supported for " 1186 "egress"); 1187 return 0; 1188 } 1189 1190 /* 1191 * Validate the mark action. 1192 * 1193 * @param[in] action 1194 * Pointer to the queue action. 1195 * @param[in] action_flags 1196 * Bit-fields that holds the actions detected until now. 1197 * @param[in] attr 1198 * Attributes of flow that includes this action. 1199 * @param[out] error 1200 * Pointer to error structure. 1201 * 1202 * @return 1203 * 0 on success, a negative errno value otherwise and rte_errno is set. 1204 */ 1205 int 1206 mlx5_flow_validate_action_mark(const struct rte_flow_action *action, 1207 uint64_t action_flags, 1208 const struct rte_flow_attr *attr, 1209 struct rte_flow_error *error) 1210 { 1211 const struct rte_flow_action_mark *mark = action->conf; 1212 1213 if (!mark) 1214 return rte_flow_error_set(error, EINVAL, 1215 RTE_FLOW_ERROR_TYPE_ACTION, 1216 action, 1217 "configuration cannot be null"); 1218 if (mark->id >= MLX5_FLOW_MARK_MAX) 1219 return rte_flow_error_set(error, EINVAL, 1220 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1221 &mark->id, 1222 "mark id must in 0 <= id < " 1223 RTE_STR(MLX5_FLOW_MARK_MAX)); 1224 if (action_flags & MLX5_FLOW_ACTION_FLAG) 1225 return rte_flow_error_set(error, EINVAL, 1226 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1227 "can't flag and mark in same flow"); 1228 if (action_flags & MLX5_FLOW_ACTION_MARK) 1229 return rte_flow_error_set(error, EINVAL, 1230 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1231 "can't have 2 mark actions in same" 1232 " flow"); 1233 if (attr->egress) 1234 return rte_flow_error_set(error, ENOTSUP, 1235 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1236 "mark action not supported for " 1237 "egress"); 1238 return 0; 1239 } 1240 1241 /* 1242 * Validate the drop action. 1243 * 1244 * @param[in] action_flags 1245 * Bit-fields that holds the actions detected until now. 1246 * @param[in] attr 1247 * Attributes of flow that includes this action. 1248 * @param[out] error 1249 * Pointer to error structure. 1250 * 1251 * @return 1252 * 0 on success, a negative errno value otherwise and rte_errno is set. 1253 */ 1254 int 1255 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused, 1256 const struct rte_flow_attr *attr, 1257 struct rte_flow_error *error) 1258 { 1259 if (attr->egress) 1260 return rte_flow_error_set(error, ENOTSUP, 1261 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1262 "drop action not supported for " 1263 "egress"); 1264 return 0; 1265 } 1266 1267 /* 1268 * Validate the queue action. 1269 * 1270 * @param[in] action 1271 * Pointer to the queue action. 1272 * @param[in] action_flags 1273 * Bit-fields that holds the actions detected until now. 1274 * @param[in] dev 1275 * Pointer to the Ethernet device structure. 1276 * @param[in] attr 1277 * Attributes of flow that includes this action. 1278 * @param[out] error 1279 * Pointer to error structure. 1280 * 1281 * @return 1282 * 0 on success, a negative errno value otherwise and rte_errno is set. 1283 */ 1284 int 1285 mlx5_flow_validate_action_queue(const struct rte_flow_action *action, 1286 uint64_t action_flags, 1287 struct rte_eth_dev *dev, 1288 const struct rte_flow_attr *attr, 1289 struct rte_flow_error *error) 1290 { 1291 struct mlx5_priv *priv = dev->data->dev_private; 1292 const struct rte_flow_action_queue *queue = action->conf; 1293 1294 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1295 return rte_flow_error_set(error, EINVAL, 1296 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1297 "can't have 2 fate actions in" 1298 " same flow"); 1299 if (!priv->rxqs_n) 1300 return rte_flow_error_set(error, EINVAL, 1301 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1302 NULL, "No Rx queues configured"); 1303 if (queue->index >= priv->rxqs_n) 1304 return rte_flow_error_set(error, EINVAL, 1305 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1306 &queue->index, 1307 "queue index out of range"); 1308 if (!(*priv->rxqs)[queue->index]) 1309 return rte_flow_error_set(error, EINVAL, 1310 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1311 &queue->index, 1312 "queue is not configured"); 1313 if (attr->egress) 1314 return rte_flow_error_set(error, ENOTSUP, 1315 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1316 "queue action not supported for " 1317 "egress"); 1318 return 0; 1319 } 1320 1321 /* 1322 * Validate the rss action. 1323 * 1324 * @param[in] action 1325 * Pointer to the queue action. 1326 * @param[in] action_flags 1327 * Bit-fields that holds the actions detected until now. 1328 * @param[in] dev 1329 * Pointer to the Ethernet device structure. 1330 * @param[in] attr 1331 * Attributes of flow that includes this action. 1332 * @param[in] item_flags 1333 * Items that were detected. 1334 * @param[out] error 1335 * Pointer to error structure. 1336 * 1337 * @return 1338 * 0 on success, a negative errno value otherwise and rte_errno is set. 1339 */ 1340 int 1341 mlx5_flow_validate_action_rss(const struct rte_flow_action *action, 1342 uint64_t action_flags, 1343 struct rte_eth_dev *dev, 1344 const struct rte_flow_attr *attr, 1345 uint64_t item_flags, 1346 struct rte_flow_error *error) 1347 { 1348 struct mlx5_priv *priv = dev->data->dev_private; 1349 const struct rte_flow_action_rss *rss = action->conf; 1350 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1351 unsigned int i; 1352 1353 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1354 return rte_flow_error_set(error, EINVAL, 1355 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1356 "can't have 2 fate actions" 1357 " in same flow"); 1358 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT && 1359 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) 1360 return rte_flow_error_set(error, ENOTSUP, 1361 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1362 &rss->func, 1363 "RSS hash function not supported"); 1364 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 1365 if (rss->level > 2) 1366 #else 1367 if (rss->level > 1) 1368 #endif 1369 return rte_flow_error_set(error, ENOTSUP, 1370 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1371 &rss->level, 1372 "tunnel RSS is not supported"); 1373 /* allow RSS key_len 0 in case of NULL (default) RSS key. */ 1374 if (rss->key_len == 0 && rss->key != NULL) 1375 return rte_flow_error_set(error, ENOTSUP, 1376 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1377 &rss->key_len, 1378 "RSS hash key length 0"); 1379 if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN) 1380 return rte_flow_error_set(error, ENOTSUP, 1381 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1382 &rss->key_len, 1383 "RSS hash key too small"); 1384 if (rss->key_len > MLX5_RSS_HASH_KEY_LEN) 1385 return rte_flow_error_set(error, ENOTSUP, 1386 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1387 &rss->key_len, 1388 "RSS hash key too large"); 1389 if (rss->queue_num > priv->config.ind_table_max_size) 1390 return rte_flow_error_set(error, ENOTSUP, 1391 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1392 &rss->queue_num, 1393 "number of queues too large"); 1394 if (rss->types & MLX5_RSS_HF_MASK) 1395 return rte_flow_error_set(error, ENOTSUP, 1396 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1397 &rss->types, 1398 "some RSS protocols are not" 1399 " supported"); 1400 if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) && 1401 !(rss->types & ETH_RSS_IP)) 1402 return rte_flow_error_set(error, EINVAL, 1403 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1404 "L3 partial RSS requested but L3 RSS" 1405 " type not specified"); 1406 if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) && 1407 !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP))) 1408 return rte_flow_error_set(error, EINVAL, 1409 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1410 "L4 partial RSS requested but L4 RSS" 1411 " type not specified"); 1412 if (!priv->rxqs_n) 1413 return rte_flow_error_set(error, EINVAL, 1414 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1415 NULL, "No Rx queues configured"); 1416 if (!rss->queue_num) 1417 return rte_flow_error_set(error, EINVAL, 1418 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1419 NULL, "No queues configured"); 1420 for (i = 0; i != rss->queue_num; ++i) { 1421 if (rss->queue[i] >= priv->rxqs_n) 1422 return rte_flow_error_set 1423 (error, EINVAL, 1424 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1425 &rss->queue[i], "queue index out of range"); 1426 if (!(*priv->rxqs)[rss->queue[i]]) 1427 return rte_flow_error_set 1428 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1429 &rss->queue[i], "queue is not configured"); 1430 } 1431 if (attr->egress) 1432 return rte_flow_error_set(error, ENOTSUP, 1433 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1434 "rss action not supported for " 1435 "egress"); 1436 if (rss->level > 1 && !tunnel) 1437 return rte_flow_error_set(error, EINVAL, 1438 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1439 "inner RSS is not supported for " 1440 "non-tunnel flows"); 1441 if ((item_flags & MLX5_FLOW_LAYER_ECPRI) && 1442 !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) { 1443 return rte_flow_error_set(error, EINVAL, 1444 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1445 "RSS on eCPRI is not supported now"); 1446 } 1447 return 0; 1448 } 1449 1450 /* 1451 * Validate the default miss action. 1452 * 1453 * @param[in] action_flags 1454 * Bit-fields that holds the actions detected until now. 1455 * @param[out] error 1456 * Pointer to error structure. 1457 * 1458 * @return 1459 * 0 on success, a negative errno value otherwise and rte_errno is set. 1460 */ 1461 int 1462 mlx5_flow_validate_action_default_miss(uint64_t action_flags, 1463 const struct rte_flow_attr *attr, 1464 struct rte_flow_error *error) 1465 { 1466 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1467 return rte_flow_error_set(error, EINVAL, 1468 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1469 "can't have 2 fate actions in" 1470 " same flow"); 1471 if (attr->egress) 1472 return rte_flow_error_set(error, ENOTSUP, 1473 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1474 "default miss action not supported " 1475 "for egress"); 1476 if (attr->group) 1477 return rte_flow_error_set(error, ENOTSUP, 1478 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL, 1479 "only group 0 is supported"); 1480 if (attr->transfer) 1481 return rte_flow_error_set(error, ENOTSUP, 1482 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1483 NULL, "transfer is not supported"); 1484 return 0; 1485 } 1486 1487 /* 1488 * Validate the count action. 1489 * 1490 * @param[in] dev 1491 * Pointer to the Ethernet device structure. 1492 * @param[in] attr 1493 * Attributes of flow that includes this action. 1494 * @param[out] error 1495 * Pointer to error structure. 1496 * 1497 * @return 1498 * 0 on success, a negative errno value otherwise and rte_errno is set. 1499 */ 1500 int 1501 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused, 1502 const struct rte_flow_attr *attr, 1503 struct rte_flow_error *error) 1504 { 1505 if (attr->egress) 1506 return rte_flow_error_set(error, ENOTSUP, 1507 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1508 "count action not supported for " 1509 "egress"); 1510 return 0; 1511 } 1512 1513 /** 1514 * Verify the @p attributes will be correctly understood by the NIC and store 1515 * them in the @p flow if everything is correct. 1516 * 1517 * @param[in] dev 1518 * Pointer to the Ethernet device structure. 1519 * @param[in] attributes 1520 * Pointer to flow attributes 1521 * @param[out] error 1522 * Pointer to error structure. 1523 * 1524 * @return 1525 * 0 on success, a negative errno value otherwise and rte_errno is set. 1526 */ 1527 int 1528 mlx5_flow_validate_attributes(struct rte_eth_dev *dev, 1529 const struct rte_flow_attr *attributes, 1530 struct rte_flow_error *error) 1531 { 1532 struct mlx5_priv *priv = dev->data->dev_private; 1533 uint32_t priority_max = priv->config.flow_prio - 1; 1534 1535 if (attributes->group) 1536 return rte_flow_error_set(error, ENOTSUP, 1537 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 1538 NULL, "groups is not supported"); 1539 if (attributes->priority != MLX5_FLOW_PRIO_RSVD && 1540 attributes->priority >= priority_max) 1541 return rte_flow_error_set(error, ENOTSUP, 1542 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 1543 NULL, "priority out of range"); 1544 if (attributes->egress) 1545 return rte_flow_error_set(error, ENOTSUP, 1546 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1547 "egress is not supported"); 1548 if (attributes->transfer && !priv->config.dv_esw_en) 1549 return rte_flow_error_set(error, ENOTSUP, 1550 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1551 NULL, "transfer is not supported"); 1552 if (!attributes->ingress) 1553 return rte_flow_error_set(error, EINVAL, 1554 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, 1555 NULL, 1556 "ingress attribute is mandatory"); 1557 return 0; 1558 } 1559 1560 /** 1561 * Validate ICMP6 item. 1562 * 1563 * @param[in] item 1564 * Item specification. 1565 * @param[in] item_flags 1566 * Bit-fields that holds the items detected until now. 1567 * @param[out] error 1568 * Pointer to error structure. 1569 * 1570 * @return 1571 * 0 on success, a negative errno value otherwise and rte_errno is set. 1572 */ 1573 int 1574 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item, 1575 uint64_t item_flags, 1576 uint8_t target_protocol, 1577 struct rte_flow_error *error) 1578 { 1579 const struct rte_flow_item_icmp6 *mask = item->mask; 1580 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1581 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : 1582 MLX5_FLOW_LAYER_OUTER_L3_IPV6; 1583 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1584 MLX5_FLOW_LAYER_OUTER_L4; 1585 int ret; 1586 1587 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6) 1588 return rte_flow_error_set(error, EINVAL, 1589 RTE_FLOW_ERROR_TYPE_ITEM, item, 1590 "protocol filtering not compatible" 1591 " with ICMP6 layer"); 1592 if (!(item_flags & l3m)) 1593 return rte_flow_error_set(error, EINVAL, 1594 RTE_FLOW_ERROR_TYPE_ITEM, item, 1595 "IPv6 is mandatory to filter on" 1596 " ICMP6"); 1597 if (item_flags & l4m) 1598 return rte_flow_error_set(error, EINVAL, 1599 RTE_FLOW_ERROR_TYPE_ITEM, item, 1600 "multiple L4 layers not supported"); 1601 if (!mask) 1602 mask = &rte_flow_item_icmp6_mask; 1603 ret = mlx5_flow_item_acceptable 1604 (item, (const uint8_t *)mask, 1605 (const uint8_t *)&rte_flow_item_icmp6_mask, 1606 sizeof(struct rte_flow_item_icmp6), error); 1607 if (ret < 0) 1608 return ret; 1609 return 0; 1610 } 1611 1612 /** 1613 * Validate ICMP item. 1614 * 1615 * @param[in] item 1616 * Item specification. 1617 * @param[in] item_flags 1618 * Bit-fields that holds the items detected until now. 1619 * @param[out] error 1620 * Pointer to error structure. 1621 * 1622 * @return 1623 * 0 on success, a negative errno value otherwise and rte_errno is set. 1624 */ 1625 int 1626 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item, 1627 uint64_t item_flags, 1628 uint8_t target_protocol, 1629 struct rte_flow_error *error) 1630 { 1631 const struct rte_flow_item_icmp *mask = item->mask; 1632 const struct rte_flow_item_icmp nic_mask = { 1633 .hdr.icmp_type = 0xff, 1634 .hdr.icmp_code = 0xff, 1635 .hdr.icmp_ident = RTE_BE16(0xffff), 1636 .hdr.icmp_seq_nb = RTE_BE16(0xffff), 1637 }; 1638 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1639 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : 1640 MLX5_FLOW_LAYER_OUTER_L3_IPV4; 1641 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1642 MLX5_FLOW_LAYER_OUTER_L4; 1643 int ret; 1644 1645 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP) 1646 return rte_flow_error_set(error, EINVAL, 1647 RTE_FLOW_ERROR_TYPE_ITEM, item, 1648 "protocol filtering not compatible" 1649 " with ICMP layer"); 1650 if (!(item_flags & l3m)) 1651 return rte_flow_error_set(error, EINVAL, 1652 RTE_FLOW_ERROR_TYPE_ITEM, item, 1653 "IPv4 is mandatory to filter" 1654 " on ICMP"); 1655 if (item_flags & l4m) 1656 return rte_flow_error_set(error, EINVAL, 1657 RTE_FLOW_ERROR_TYPE_ITEM, item, 1658 "multiple L4 layers not supported"); 1659 if (!mask) 1660 mask = &nic_mask; 1661 ret = mlx5_flow_item_acceptable 1662 (item, (const uint8_t *)mask, 1663 (const uint8_t *)&nic_mask, 1664 sizeof(struct rte_flow_item_icmp), error); 1665 if (ret < 0) 1666 return ret; 1667 return 0; 1668 } 1669 1670 /** 1671 * Validate Ethernet item. 1672 * 1673 * @param[in] item 1674 * Item specification. 1675 * @param[in] item_flags 1676 * Bit-fields that holds the items detected until now. 1677 * @param[out] error 1678 * Pointer to error structure. 1679 * 1680 * @return 1681 * 0 on success, a negative errno value otherwise and rte_errno is set. 1682 */ 1683 int 1684 mlx5_flow_validate_item_eth(const struct rte_flow_item *item, 1685 uint64_t item_flags, 1686 struct rte_flow_error *error) 1687 { 1688 const struct rte_flow_item_eth *mask = item->mask; 1689 const struct rte_flow_item_eth nic_mask = { 1690 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1691 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1692 .type = RTE_BE16(0xffff), 1693 }; 1694 int ret; 1695 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1696 const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 1697 MLX5_FLOW_LAYER_OUTER_L2; 1698 1699 if (item_flags & ethm) 1700 return rte_flow_error_set(error, ENOTSUP, 1701 RTE_FLOW_ERROR_TYPE_ITEM, item, 1702 "multiple L2 layers not supported"); 1703 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) || 1704 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3))) 1705 return rte_flow_error_set(error, EINVAL, 1706 RTE_FLOW_ERROR_TYPE_ITEM, item, 1707 "L2 layer should not follow " 1708 "L3 layers"); 1709 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) || 1710 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN))) 1711 return rte_flow_error_set(error, EINVAL, 1712 RTE_FLOW_ERROR_TYPE_ITEM, item, 1713 "L2 layer should not follow VLAN"); 1714 if (!mask) 1715 mask = &rte_flow_item_eth_mask; 1716 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1717 (const uint8_t *)&nic_mask, 1718 sizeof(struct rte_flow_item_eth), 1719 error); 1720 return ret; 1721 } 1722 1723 /** 1724 * Validate VLAN item. 1725 * 1726 * @param[in] item 1727 * Item specification. 1728 * @param[in] item_flags 1729 * Bit-fields that holds the items detected until now. 1730 * @param[in] dev 1731 * Ethernet device flow is being created on. 1732 * @param[out] error 1733 * Pointer to error structure. 1734 * 1735 * @return 1736 * 0 on success, a negative errno value otherwise and rte_errno is set. 1737 */ 1738 int 1739 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item, 1740 uint64_t item_flags, 1741 struct rte_eth_dev *dev, 1742 struct rte_flow_error *error) 1743 { 1744 const struct rte_flow_item_vlan *spec = item->spec; 1745 const struct rte_flow_item_vlan *mask = item->mask; 1746 const struct rte_flow_item_vlan nic_mask = { 1747 .tci = RTE_BE16(UINT16_MAX), 1748 .inner_type = RTE_BE16(UINT16_MAX), 1749 }; 1750 uint16_t vlan_tag = 0; 1751 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1752 int ret; 1753 const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | 1754 MLX5_FLOW_LAYER_INNER_L4) : 1755 (MLX5_FLOW_LAYER_OUTER_L3 | 1756 MLX5_FLOW_LAYER_OUTER_L4); 1757 const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : 1758 MLX5_FLOW_LAYER_OUTER_VLAN; 1759 1760 if (item_flags & vlanm) 1761 return rte_flow_error_set(error, EINVAL, 1762 RTE_FLOW_ERROR_TYPE_ITEM, item, 1763 "multiple VLAN layers not supported"); 1764 else if ((item_flags & l34m) != 0) 1765 return rte_flow_error_set(error, EINVAL, 1766 RTE_FLOW_ERROR_TYPE_ITEM, item, 1767 "VLAN cannot follow L3/L4 layer"); 1768 if (!mask) 1769 mask = &rte_flow_item_vlan_mask; 1770 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1771 (const uint8_t *)&nic_mask, 1772 sizeof(struct rte_flow_item_vlan), 1773 error); 1774 if (ret) 1775 return ret; 1776 if (!tunnel && mask->tci != RTE_BE16(0x0fff)) { 1777 struct mlx5_priv *priv = dev->data->dev_private; 1778 1779 if (priv->vmwa_context) { 1780 /* 1781 * Non-NULL context means we have a virtual machine 1782 * and SR-IOV enabled, we have to create VLAN interface 1783 * to make hypervisor to setup E-Switch vport 1784 * context correctly. We avoid creating the multiple 1785 * VLAN interfaces, so we cannot support VLAN tag mask. 1786 */ 1787 return rte_flow_error_set(error, EINVAL, 1788 RTE_FLOW_ERROR_TYPE_ITEM, 1789 item, 1790 "VLAN tag mask is not" 1791 " supported in virtual" 1792 " environment"); 1793 } 1794 } 1795 if (spec) { 1796 vlan_tag = spec->tci; 1797 vlan_tag &= mask->tci; 1798 } 1799 /* 1800 * From verbs perspective an empty VLAN is equivalent 1801 * to a packet without VLAN layer. 1802 */ 1803 if (!vlan_tag) 1804 return rte_flow_error_set(error, EINVAL, 1805 RTE_FLOW_ERROR_TYPE_ITEM_SPEC, 1806 item->spec, 1807 "VLAN cannot be empty"); 1808 return 0; 1809 } 1810 1811 /** 1812 * Validate IPV4 item. 1813 * 1814 * @param[in] item 1815 * Item specification. 1816 * @param[in] item_flags 1817 * Bit-fields that holds the items detected until now. 1818 * @param[in] last_item 1819 * Previous validated item in the pattern items. 1820 * @param[in] ether_type 1821 * Type in the ethernet layer header (including dot1q). 1822 * @param[in] acc_mask 1823 * Acceptable mask, if NULL default internal default mask 1824 * will be used to check whether item fields are supported. 1825 * @param[out] error 1826 * Pointer to error structure. 1827 * 1828 * @return 1829 * 0 on success, a negative errno value otherwise and rte_errno is set. 1830 */ 1831 int 1832 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, 1833 uint64_t item_flags, 1834 uint64_t last_item, 1835 uint16_t ether_type, 1836 const struct rte_flow_item_ipv4 *acc_mask, 1837 struct rte_flow_error *error) 1838 { 1839 const struct rte_flow_item_ipv4 *mask = item->mask; 1840 const struct rte_flow_item_ipv4 *spec = item->spec; 1841 const struct rte_flow_item_ipv4 nic_mask = { 1842 .hdr = { 1843 .src_addr = RTE_BE32(0xffffffff), 1844 .dst_addr = RTE_BE32(0xffffffff), 1845 .type_of_service = 0xff, 1846 .next_proto_id = 0xff, 1847 }, 1848 }; 1849 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1850 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1851 MLX5_FLOW_LAYER_OUTER_L3; 1852 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1853 MLX5_FLOW_LAYER_OUTER_L4; 1854 int ret; 1855 uint8_t next_proto = 0xFF; 1856 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 1857 MLX5_FLOW_LAYER_OUTER_VLAN | 1858 MLX5_FLOW_LAYER_INNER_VLAN); 1859 1860 if ((last_item & l2_vlan) && ether_type && 1861 ether_type != RTE_ETHER_TYPE_IPV4) 1862 return rte_flow_error_set(error, EINVAL, 1863 RTE_FLOW_ERROR_TYPE_ITEM, item, 1864 "IPv4 cannot follow L2/VLAN layer " 1865 "which ether type is not IPv4"); 1866 if (item_flags & MLX5_FLOW_LAYER_IPIP) { 1867 if (mask && spec) 1868 next_proto = mask->hdr.next_proto_id & 1869 spec->hdr.next_proto_id; 1870 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 1871 return rte_flow_error_set(error, EINVAL, 1872 RTE_FLOW_ERROR_TYPE_ITEM, 1873 item, 1874 "multiple tunnel " 1875 "not supported"); 1876 } 1877 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) 1878 return rte_flow_error_set(error, EINVAL, 1879 RTE_FLOW_ERROR_TYPE_ITEM, item, 1880 "wrong tunnel type - IPv6 specified " 1881 "but IPv4 item provided"); 1882 if (item_flags & l3m) 1883 return rte_flow_error_set(error, ENOTSUP, 1884 RTE_FLOW_ERROR_TYPE_ITEM, item, 1885 "multiple L3 layers not supported"); 1886 else if (item_flags & l4m) 1887 return rte_flow_error_set(error, EINVAL, 1888 RTE_FLOW_ERROR_TYPE_ITEM, item, 1889 "L3 cannot follow an L4 layer."); 1890 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 1891 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 1892 return rte_flow_error_set(error, EINVAL, 1893 RTE_FLOW_ERROR_TYPE_ITEM, item, 1894 "L3 cannot follow an NVGRE layer."); 1895 if (!mask) 1896 mask = &rte_flow_item_ipv4_mask; 1897 else if (mask->hdr.next_proto_id != 0 && 1898 mask->hdr.next_proto_id != 0xff) 1899 return rte_flow_error_set(error, EINVAL, 1900 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 1901 "partial mask is not supported" 1902 " for protocol"); 1903 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1904 acc_mask ? (const uint8_t *)acc_mask 1905 : (const uint8_t *)&nic_mask, 1906 sizeof(struct rte_flow_item_ipv4), 1907 error); 1908 if (ret < 0) 1909 return ret; 1910 return 0; 1911 } 1912 1913 /** 1914 * Validate IPV6 item. 1915 * 1916 * @param[in] item 1917 * Item specification. 1918 * @param[in] item_flags 1919 * Bit-fields that holds the items detected until now. 1920 * @param[in] last_item 1921 * Previous validated item in the pattern items. 1922 * @param[in] ether_type 1923 * Type in the ethernet layer header (including dot1q). 1924 * @param[in] acc_mask 1925 * Acceptable mask, if NULL default internal default mask 1926 * will be used to check whether item fields are supported. 1927 * @param[out] error 1928 * Pointer to error structure. 1929 * 1930 * @return 1931 * 0 on success, a negative errno value otherwise and rte_errno is set. 1932 */ 1933 int 1934 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, 1935 uint64_t item_flags, 1936 uint64_t last_item, 1937 uint16_t ether_type, 1938 const struct rte_flow_item_ipv6 *acc_mask, 1939 struct rte_flow_error *error) 1940 { 1941 const struct rte_flow_item_ipv6 *mask = item->mask; 1942 const struct rte_flow_item_ipv6 *spec = item->spec; 1943 const struct rte_flow_item_ipv6 nic_mask = { 1944 .hdr = { 1945 .src_addr = 1946 "\xff\xff\xff\xff\xff\xff\xff\xff" 1947 "\xff\xff\xff\xff\xff\xff\xff\xff", 1948 .dst_addr = 1949 "\xff\xff\xff\xff\xff\xff\xff\xff" 1950 "\xff\xff\xff\xff\xff\xff\xff\xff", 1951 .vtc_flow = RTE_BE32(0xffffffff), 1952 .proto = 0xff, 1953 }, 1954 }; 1955 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1956 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1957 MLX5_FLOW_LAYER_OUTER_L3; 1958 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1959 MLX5_FLOW_LAYER_OUTER_L4; 1960 int ret; 1961 uint8_t next_proto = 0xFF; 1962 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 1963 MLX5_FLOW_LAYER_OUTER_VLAN | 1964 MLX5_FLOW_LAYER_INNER_VLAN); 1965 1966 if ((last_item & l2_vlan) && ether_type && 1967 ether_type != RTE_ETHER_TYPE_IPV6) 1968 return rte_flow_error_set(error, EINVAL, 1969 RTE_FLOW_ERROR_TYPE_ITEM, item, 1970 "IPv6 cannot follow L2/VLAN layer " 1971 "which ether type is not IPv6"); 1972 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) { 1973 if (mask && spec) 1974 next_proto = mask->hdr.proto & spec->hdr.proto; 1975 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 1976 return rte_flow_error_set(error, EINVAL, 1977 RTE_FLOW_ERROR_TYPE_ITEM, 1978 item, 1979 "multiple tunnel " 1980 "not supported"); 1981 } 1982 if (item_flags & MLX5_FLOW_LAYER_IPIP) 1983 return rte_flow_error_set(error, EINVAL, 1984 RTE_FLOW_ERROR_TYPE_ITEM, item, 1985 "wrong tunnel type - IPv4 specified " 1986 "but IPv6 item provided"); 1987 if (item_flags & l3m) 1988 return rte_flow_error_set(error, ENOTSUP, 1989 RTE_FLOW_ERROR_TYPE_ITEM, item, 1990 "multiple L3 layers not supported"); 1991 else if (item_flags & l4m) 1992 return rte_flow_error_set(error, EINVAL, 1993 RTE_FLOW_ERROR_TYPE_ITEM, item, 1994 "L3 cannot follow an L4 layer."); 1995 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 1996 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 1997 return rte_flow_error_set(error, EINVAL, 1998 RTE_FLOW_ERROR_TYPE_ITEM, item, 1999 "L3 cannot follow an NVGRE layer."); 2000 if (!mask) 2001 mask = &rte_flow_item_ipv6_mask; 2002 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 2003 acc_mask ? (const uint8_t *)acc_mask 2004 : (const uint8_t *)&nic_mask, 2005 sizeof(struct rte_flow_item_ipv6), 2006 error); 2007 if (ret < 0) 2008 return ret; 2009 return 0; 2010 } 2011 2012 /** 2013 * Validate UDP item. 2014 * 2015 * @param[in] item 2016 * Item specification. 2017 * @param[in] item_flags 2018 * Bit-fields that holds the items detected until now. 2019 * @param[in] target_protocol 2020 * The next protocol in the previous item. 2021 * @param[in] flow_mask 2022 * mlx5 flow-specific (DV, verbs, etc.) supported header fields mask. 2023 * @param[out] error 2024 * Pointer to error structure. 2025 * 2026 * @return 2027 * 0 on success, a negative errno value otherwise and rte_errno is set. 2028 */ 2029 int 2030 mlx5_flow_validate_item_udp(const struct rte_flow_item *item, 2031 uint64_t item_flags, 2032 uint8_t target_protocol, 2033 struct rte_flow_error *error) 2034 { 2035 const struct rte_flow_item_udp *mask = item->mask; 2036 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 2037 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 2038 MLX5_FLOW_LAYER_OUTER_L3; 2039 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 2040 MLX5_FLOW_LAYER_OUTER_L4; 2041 int ret; 2042 2043 if (target_protocol != 0xff && target_protocol != IPPROTO_UDP) 2044 return rte_flow_error_set(error, EINVAL, 2045 RTE_FLOW_ERROR_TYPE_ITEM, item, 2046 "protocol filtering not compatible" 2047 " with UDP layer"); 2048 if (!(item_flags & l3m)) 2049 return rte_flow_error_set(error, EINVAL, 2050 RTE_FLOW_ERROR_TYPE_ITEM, item, 2051 "L3 is mandatory to filter on L4"); 2052 if (item_flags & l4m) 2053 return rte_flow_error_set(error, EINVAL, 2054 RTE_FLOW_ERROR_TYPE_ITEM, item, 2055 "multiple L4 layers not supported"); 2056 if (!mask) 2057 mask = &rte_flow_item_udp_mask; 2058 ret = mlx5_flow_item_acceptable 2059 (item, (const uint8_t *)mask, 2060 (const uint8_t *)&rte_flow_item_udp_mask, 2061 sizeof(struct rte_flow_item_udp), error); 2062 if (ret < 0) 2063 return ret; 2064 return 0; 2065 } 2066 2067 /** 2068 * Validate TCP item. 2069 * 2070 * @param[in] item 2071 * Item specification. 2072 * @param[in] item_flags 2073 * Bit-fields that holds the items detected until now. 2074 * @param[in] target_protocol 2075 * The next protocol in the previous item. 2076 * @param[out] error 2077 * Pointer to error structure. 2078 * 2079 * @return 2080 * 0 on success, a negative errno value otherwise and rte_errno is set. 2081 */ 2082 int 2083 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item, 2084 uint64_t item_flags, 2085 uint8_t target_protocol, 2086 const struct rte_flow_item_tcp *flow_mask, 2087 struct rte_flow_error *error) 2088 { 2089 const struct rte_flow_item_tcp *mask = item->mask; 2090 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 2091 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 2092 MLX5_FLOW_LAYER_OUTER_L3; 2093 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 2094 MLX5_FLOW_LAYER_OUTER_L4; 2095 int ret; 2096 2097 MLX5_ASSERT(flow_mask); 2098 if (target_protocol != 0xff && target_protocol != IPPROTO_TCP) 2099 return rte_flow_error_set(error, EINVAL, 2100 RTE_FLOW_ERROR_TYPE_ITEM, item, 2101 "protocol filtering not compatible" 2102 " with TCP layer"); 2103 if (!(item_flags & l3m)) 2104 return rte_flow_error_set(error, EINVAL, 2105 RTE_FLOW_ERROR_TYPE_ITEM, item, 2106 "L3 is mandatory to filter on L4"); 2107 if (item_flags & l4m) 2108 return rte_flow_error_set(error, EINVAL, 2109 RTE_FLOW_ERROR_TYPE_ITEM, item, 2110 "multiple L4 layers not supported"); 2111 if (!mask) 2112 mask = &rte_flow_item_tcp_mask; 2113 ret = mlx5_flow_item_acceptable 2114 (item, (const uint8_t *)mask, 2115 (const uint8_t *)flow_mask, 2116 sizeof(struct rte_flow_item_tcp), error); 2117 if (ret < 0) 2118 return ret; 2119 return 0; 2120 } 2121 2122 /** 2123 * Validate VXLAN item. 2124 * 2125 * @param[in] item 2126 * Item specification. 2127 * @param[in] item_flags 2128 * Bit-fields that holds the items detected until now. 2129 * @param[in] target_protocol 2130 * The next protocol in the previous item. 2131 * @param[out] error 2132 * Pointer to error structure. 2133 * 2134 * @return 2135 * 0 on success, a negative errno value otherwise and rte_errno is set. 2136 */ 2137 int 2138 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item, 2139 uint64_t item_flags, 2140 struct rte_flow_error *error) 2141 { 2142 const struct rte_flow_item_vxlan *spec = item->spec; 2143 const struct rte_flow_item_vxlan *mask = item->mask; 2144 int ret; 2145 union vni { 2146 uint32_t vlan_id; 2147 uint8_t vni[4]; 2148 } id = { .vlan_id = 0, }; 2149 2150 2151 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2152 return rte_flow_error_set(error, ENOTSUP, 2153 RTE_FLOW_ERROR_TYPE_ITEM, item, 2154 "multiple tunnel layers not" 2155 " supported"); 2156 /* 2157 * Verify only UDPv4 is present as defined in 2158 * https://tools.ietf.org/html/rfc7348 2159 */ 2160 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2161 return rte_flow_error_set(error, EINVAL, 2162 RTE_FLOW_ERROR_TYPE_ITEM, item, 2163 "no outer UDP layer found"); 2164 if (!mask) 2165 mask = &rte_flow_item_vxlan_mask; 2166 ret = mlx5_flow_item_acceptable 2167 (item, (const uint8_t *)mask, 2168 (const uint8_t *)&rte_flow_item_vxlan_mask, 2169 sizeof(struct rte_flow_item_vxlan), 2170 error); 2171 if (ret < 0) 2172 return ret; 2173 if (spec) { 2174 memcpy(&id.vni[1], spec->vni, 3); 2175 memcpy(&id.vni[1], mask->vni, 3); 2176 } 2177 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2178 return rte_flow_error_set(error, ENOTSUP, 2179 RTE_FLOW_ERROR_TYPE_ITEM, item, 2180 "VXLAN tunnel must be fully defined"); 2181 return 0; 2182 } 2183 2184 /** 2185 * Validate VXLAN_GPE item. 2186 * 2187 * @param[in] item 2188 * Item specification. 2189 * @param[in] item_flags 2190 * Bit-fields that holds the items detected until now. 2191 * @param[in] priv 2192 * Pointer to the private data structure. 2193 * @param[in] target_protocol 2194 * The next protocol in the previous item. 2195 * @param[out] error 2196 * Pointer to error structure. 2197 * 2198 * @return 2199 * 0 on success, a negative errno value otherwise and rte_errno is set. 2200 */ 2201 int 2202 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item, 2203 uint64_t item_flags, 2204 struct rte_eth_dev *dev, 2205 struct rte_flow_error *error) 2206 { 2207 struct mlx5_priv *priv = dev->data->dev_private; 2208 const struct rte_flow_item_vxlan_gpe *spec = item->spec; 2209 const struct rte_flow_item_vxlan_gpe *mask = item->mask; 2210 int ret; 2211 union vni { 2212 uint32_t vlan_id; 2213 uint8_t vni[4]; 2214 } id = { .vlan_id = 0, }; 2215 2216 if (!priv->config.l3_vxlan_en) 2217 return rte_flow_error_set(error, ENOTSUP, 2218 RTE_FLOW_ERROR_TYPE_ITEM, item, 2219 "L3 VXLAN is not enabled by device" 2220 " parameter and/or not configured in" 2221 " firmware"); 2222 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2223 return rte_flow_error_set(error, ENOTSUP, 2224 RTE_FLOW_ERROR_TYPE_ITEM, item, 2225 "multiple tunnel layers not" 2226 " supported"); 2227 /* 2228 * Verify only UDPv4 is present as defined in 2229 * https://tools.ietf.org/html/rfc7348 2230 */ 2231 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2232 return rte_flow_error_set(error, EINVAL, 2233 RTE_FLOW_ERROR_TYPE_ITEM, item, 2234 "no outer UDP layer found"); 2235 if (!mask) 2236 mask = &rte_flow_item_vxlan_gpe_mask; 2237 ret = mlx5_flow_item_acceptable 2238 (item, (const uint8_t *)mask, 2239 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask, 2240 sizeof(struct rte_flow_item_vxlan_gpe), 2241 error); 2242 if (ret < 0) 2243 return ret; 2244 if (spec) { 2245 if (spec->protocol) 2246 return rte_flow_error_set(error, ENOTSUP, 2247 RTE_FLOW_ERROR_TYPE_ITEM, 2248 item, 2249 "VxLAN-GPE protocol" 2250 " not supported"); 2251 memcpy(&id.vni[1], spec->vni, 3); 2252 memcpy(&id.vni[1], mask->vni, 3); 2253 } 2254 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2255 return rte_flow_error_set(error, ENOTSUP, 2256 RTE_FLOW_ERROR_TYPE_ITEM, item, 2257 "VXLAN-GPE tunnel must be fully" 2258 " defined"); 2259 return 0; 2260 } 2261 /** 2262 * Validate GRE Key item. 2263 * 2264 * @param[in] item 2265 * Item specification. 2266 * @param[in] item_flags 2267 * Bit flags to mark detected items. 2268 * @param[in] gre_item 2269 * Pointer to gre_item 2270 * @param[out] error 2271 * Pointer to error structure. 2272 * 2273 * @return 2274 * 0 on success, a negative errno value otherwise and rte_errno is set. 2275 */ 2276 int 2277 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item, 2278 uint64_t item_flags, 2279 const struct rte_flow_item *gre_item, 2280 struct rte_flow_error *error) 2281 { 2282 const rte_be32_t *mask = item->mask; 2283 int ret = 0; 2284 rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX); 2285 const struct rte_flow_item_gre *gre_spec; 2286 const struct rte_flow_item_gre *gre_mask; 2287 2288 if (item_flags & MLX5_FLOW_LAYER_GRE_KEY) 2289 return rte_flow_error_set(error, ENOTSUP, 2290 RTE_FLOW_ERROR_TYPE_ITEM, item, 2291 "Multiple GRE key not support"); 2292 if (!(item_flags & MLX5_FLOW_LAYER_GRE)) 2293 return rte_flow_error_set(error, ENOTSUP, 2294 RTE_FLOW_ERROR_TYPE_ITEM, item, 2295 "No preceding GRE header"); 2296 if (item_flags & MLX5_FLOW_LAYER_INNER) 2297 return rte_flow_error_set(error, ENOTSUP, 2298 RTE_FLOW_ERROR_TYPE_ITEM, item, 2299 "GRE key following a wrong item"); 2300 gre_mask = gre_item->mask; 2301 if (!gre_mask) 2302 gre_mask = &rte_flow_item_gre_mask; 2303 gre_spec = gre_item->spec; 2304 if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) && 2305 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000))) 2306 return rte_flow_error_set(error, EINVAL, 2307 RTE_FLOW_ERROR_TYPE_ITEM, item, 2308 "Key bit must be on"); 2309 2310 if (!mask) 2311 mask = &gre_key_default_mask; 2312 ret = mlx5_flow_item_acceptable 2313 (item, (const uint8_t *)mask, 2314 (const uint8_t *)&gre_key_default_mask, 2315 sizeof(rte_be32_t), error); 2316 return ret; 2317 } 2318 2319 /** 2320 * Validate GRE item. 2321 * 2322 * @param[in] item 2323 * Item specification. 2324 * @param[in] item_flags 2325 * Bit flags to mark detected items. 2326 * @param[in] target_protocol 2327 * The next protocol in the previous item. 2328 * @param[out] error 2329 * Pointer to error structure. 2330 * 2331 * @return 2332 * 0 on success, a negative errno value otherwise and rte_errno is set. 2333 */ 2334 int 2335 mlx5_flow_validate_item_gre(const struct rte_flow_item *item, 2336 uint64_t item_flags, 2337 uint8_t target_protocol, 2338 struct rte_flow_error *error) 2339 { 2340 const struct rte_flow_item_gre *spec __rte_unused = item->spec; 2341 const struct rte_flow_item_gre *mask = item->mask; 2342 int ret; 2343 const struct rte_flow_item_gre nic_mask = { 2344 .c_rsvd0_ver = RTE_BE16(0xB000), 2345 .protocol = RTE_BE16(UINT16_MAX), 2346 }; 2347 2348 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2349 return rte_flow_error_set(error, EINVAL, 2350 RTE_FLOW_ERROR_TYPE_ITEM, item, 2351 "protocol filtering not compatible" 2352 " with this GRE layer"); 2353 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2354 return rte_flow_error_set(error, ENOTSUP, 2355 RTE_FLOW_ERROR_TYPE_ITEM, item, 2356 "multiple tunnel layers not" 2357 " supported"); 2358 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2359 return rte_flow_error_set(error, ENOTSUP, 2360 RTE_FLOW_ERROR_TYPE_ITEM, item, 2361 "L3 Layer is missing"); 2362 if (!mask) 2363 mask = &rte_flow_item_gre_mask; 2364 ret = mlx5_flow_item_acceptable 2365 (item, (const uint8_t *)mask, 2366 (const uint8_t *)&nic_mask, 2367 sizeof(struct rte_flow_item_gre), error); 2368 if (ret < 0) 2369 return ret; 2370 #ifndef HAVE_MLX5DV_DR 2371 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT 2372 if (spec && (spec->protocol & mask->protocol)) 2373 return rte_flow_error_set(error, ENOTSUP, 2374 RTE_FLOW_ERROR_TYPE_ITEM, item, 2375 "without MPLS support the" 2376 " specification cannot be used for" 2377 " filtering"); 2378 #endif 2379 #endif 2380 return 0; 2381 } 2382 2383 /** 2384 * Validate Geneve item. 2385 * 2386 * @param[in] item 2387 * Item specification. 2388 * @param[in] itemFlags 2389 * Bit-fields that holds the items detected until now. 2390 * @param[in] enPriv 2391 * Pointer to the private data structure. 2392 * @param[out] error 2393 * Pointer to error structure. 2394 * 2395 * @return 2396 * 0 on success, a negative errno value otherwise and rte_errno is set. 2397 */ 2398 2399 int 2400 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item, 2401 uint64_t item_flags, 2402 struct rte_eth_dev *dev, 2403 struct rte_flow_error *error) 2404 { 2405 struct mlx5_priv *priv = dev->data->dev_private; 2406 const struct rte_flow_item_geneve *spec = item->spec; 2407 const struct rte_flow_item_geneve *mask = item->mask; 2408 int ret; 2409 uint16_t gbhdr; 2410 uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ? 2411 MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0; 2412 const struct rte_flow_item_geneve nic_mask = { 2413 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80), 2414 .vni = "\xff\xff\xff", 2415 .protocol = RTE_BE16(UINT16_MAX), 2416 }; 2417 2418 if (!priv->config.hca_attr.tunnel_stateless_geneve_rx) 2419 return rte_flow_error_set(error, ENOTSUP, 2420 RTE_FLOW_ERROR_TYPE_ITEM, item, 2421 "L3 Geneve is not enabled by device" 2422 " parameter and/or not configured in" 2423 " firmware"); 2424 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2425 return rte_flow_error_set(error, ENOTSUP, 2426 RTE_FLOW_ERROR_TYPE_ITEM, item, 2427 "multiple tunnel layers not" 2428 " supported"); 2429 /* 2430 * Verify only UDPv4 is present as defined in 2431 * https://tools.ietf.org/html/rfc7348 2432 */ 2433 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2434 return rte_flow_error_set(error, EINVAL, 2435 RTE_FLOW_ERROR_TYPE_ITEM, item, 2436 "no outer UDP layer found"); 2437 if (!mask) 2438 mask = &rte_flow_item_geneve_mask; 2439 ret = mlx5_flow_item_acceptable 2440 (item, (const uint8_t *)mask, 2441 (const uint8_t *)&nic_mask, 2442 sizeof(struct rte_flow_item_geneve), error); 2443 if (ret) 2444 return ret; 2445 if (spec) { 2446 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0); 2447 if (MLX5_GENEVE_VER_VAL(gbhdr) || 2448 MLX5_GENEVE_CRITO_VAL(gbhdr) || 2449 MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1) 2450 return rte_flow_error_set(error, ENOTSUP, 2451 RTE_FLOW_ERROR_TYPE_ITEM, 2452 item, 2453 "Geneve protocol unsupported" 2454 " fields are being used"); 2455 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len) 2456 return rte_flow_error_set 2457 (error, ENOTSUP, 2458 RTE_FLOW_ERROR_TYPE_ITEM, 2459 item, 2460 "Unsupported Geneve options length"); 2461 } 2462 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2463 return rte_flow_error_set 2464 (error, ENOTSUP, 2465 RTE_FLOW_ERROR_TYPE_ITEM, item, 2466 "Geneve tunnel must be fully defined"); 2467 return 0; 2468 } 2469 2470 /** 2471 * Validate MPLS item. 2472 * 2473 * @param[in] dev 2474 * Pointer to the rte_eth_dev structure. 2475 * @param[in] item 2476 * Item specification. 2477 * @param[in] item_flags 2478 * Bit-fields that holds the items detected until now. 2479 * @param[in] prev_layer 2480 * The protocol layer indicated in previous item. 2481 * @param[out] error 2482 * Pointer to error structure. 2483 * 2484 * @return 2485 * 0 on success, a negative errno value otherwise and rte_errno is set. 2486 */ 2487 int 2488 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused, 2489 const struct rte_flow_item *item __rte_unused, 2490 uint64_t item_flags __rte_unused, 2491 uint64_t prev_layer __rte_unused, 2492 struct rte_flow_error *error) 2493 { 2494 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 2495 const struct rte_flow_item_mpls *mask = item->mask; 2496 struct mlx5_priv *priv = dev->data->dev_private; 2497 int ret; 2498 2499 if (!priv->config.mpls_en) 2500 return rte_flow_error_set(error, ENOTSUP, 2501 RTE_FLOW_ERROR_TYPE_ITEM, item, 2502 "MPLS not supported or" 2503 " disabled in firmware" 2504 " configuration."); 2505 /* MPLS over IP, UDP, GRE is allowed */ 2506 if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 | 2507 MLX5_FLOW_LAYER_OUTER_L4_UDP | 2508 MLX5_FLOW_LAYER_GRE))) 2509 return rte_flow_error_set(error, EINVAL, 2510 RTE_FLOW_ERROR_TYPE_ITEM, item, 2511 "protocol filtering not compatible" 2512 " with MPLS layer"); 2513 /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */ 2514 if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) && 2515 !(item_flags & MLX5_FLOW_LAYER_GRE)) 2516 return rte_flow_error_set(error, ENOTSUP, 2517 RTE_FLOW_ERROR_TYPE_ITEM, item, 2518 "multiple tunnel layers not" 2519 " supported"); 2520 if (!mask) 2521 mask = &rte_flow_item_mpls_mask; 2522 ret = mlx5_flow_item_acceptable 2523 (item, (const uint8_t *)mask, 2524 (const uint8_t *)&rte_flow_item_mpls_mask, 2525 sizeof(struct rte_flow_item_mpls), error); 2526 if (ret < 0) 2527 return ret; 2528 return 0; 2529 #else 2530 return rte_flow_error_set(error, ENOTSUP, 2531 RTE_FLOW_ERROR_TYPE_ITEM, item, 2532 "MPLS is not supported by Verbs, please" 2533 " update."); 2534 #endif 2535 } 2536 2537 /** 2538 * Validate NVGRE item. 2539 * 2540 * @param[in] item 2541 * Item specification. 2542 * @param[in] item_flags 2543 * Bit flags to mark detected items. 2544 * @param[in] target_protocol 2545 * The next protocol in the previous item. 2546 * @param[out] error 2547 * Pointer to error structure. 2548 * 2549 * @return 2550 * 0 on success, a negative errno value otherwise and rte_errno is set. 2551 */ 2552 int 2553 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item, 2554 uint64_t item_flags, 2555 uint8_t target_protocol, 2556 struct rte_flow_error *error) 2557 { 2558 const struct rte_flow_item_nvgre *mask = item->mask; 2559 int ret; 2560 2561 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2562 return rte_flow_error_set(error, EINVAL, 2563 RTE_FLOW_ERROR_TYPE_ITEM, item, 2564 "protocol filtering not compatible" 2565 " with this GRE layer"); 2566 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2567 return rte_flow_error_set(error, ENOTSUP, 2568 RTE_FLOW_ERROR_TYPE_ITEM, item, 2569 "multiple tunnel layers not" 2570 " supported"); 2571 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2572 return rte_flow_error_set(error, ENOTSUP, 2573 RTE_FLOW_ERROR_TYPE_ITEM, item, 2574 "L3 Layer is missing"); 2575 if (!mask) 2576 mask = &rte_flow_item_nvgre_mask; 2577 ret = mlx5_flow_item_acceptable 2578 (item, (const uint8_t *)mask, 2579 (const uint8_t *)&rte_flow_item_nvgre_mask, 2580 sizeof(struct rte_flow_item_nvgre), error); 2581 if (ret < 0) 2582 return ret; 2583 return 0; 2584 } 2585 2586 /** 2587 * Validate eCPRI item. 2588 * 2589 * @param[in] item 2590 * Item specification. 2591 * @param[in] item_flags 2592 * Bit-fields that holds the items detected until now. 2593 * @param[in] last_item 2594 * Previous validated item in the pattern items. 2595 * @param[in] ether_type 2596 * Type in the ethernet layer header (including dot1q). 2597 * @param[in] acc_mask 2598 * Acceptable mask, if NULL default internal default mask 2599 * will be used to check whether item fields are supported. 2600 * @param[out] error 2601 * Pointer to error structure. 2602 * 2603 * @return 2604 * 0 on success, a negative errno value otherwise and rte_errno is set. 2605 */ 2606 int 2607 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item, 2608 uint64_t item_flags, 2609 uint64_t last_item, 2610 uint16_t ether_type, 2611 const struct rte_flow_item_ecpri *acc_mask, 2612 struct rte_flow_error *error) 2613 { 2614 const struct rte_flow_item_ecpri *mask = item->mask; 2615 const struct rte_flow_item_ecpri nic_mask = { 2616 .hdr = { 2617 .common = { 2618 .u32 = 2619 RTE_BE32(((const struct rte_ecpri_common_hdr) { 2620 .type = 0xFF, 2621 }).u32), 2622 }, 2623 .dummy[0] = 0xFFFFFFFF, 2624 }, 2625 }; 2626 const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 | 2627 MLX5_FLOW_LAYER_OUTER_VLAN); 2628 struct rte_flow_item_ecpri mask_lo; 2629 2630 if ((last_item & outer_l2_vlan) && ether_type && 2631 ether_type != RTE_ETHER_TYPE_ECPRI) 2632 return rte_flow_error_set(error, EINVAL, 2633 RTE_FLOW_ERROR_TYPE_ITEM, item, 2634 "eCPRI cannot follow L2/VLAN layer " 2635 "which ether type is not 0xAEFE."); 2636 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2637 return rte_flow_error_set(error, EINVAL, 2638 RTE_FLOW_ERROR_TYPE_ITEM, item, 2639 "eCPRI with tunnel is not supported " 2640 "right now."); 2641 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3) 2642 return rte_flow_error_set(error, ENOTSUP, 2643 RTE_FLOW_ERROR_TYPE_ITEM, item, 2644 "multiple L3 layers not supported"); 2645 else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP) 2646 return rte_flow_error_set(error, EINVAL, 2647 RTE_FLOW_ERROR_TYPE_ITEM, item, 2648 "eCPRI cannot follow a TCP layer."); 2649 /* In specification, eCPRI could be over UDP layer. */ 2650 else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP) 2651 return rte_flow_error_set(error, EINVAL, 2652 RTE_FLOW_ERROR_TYPE_ITEM, item, 2653 "eCPRI over UDP layer is not yet " 2654 "supported right now."); 2655 /* Mask for type field in common header could be zero. */ 2656 if (!mask) 2657 mask = &rte_flow_item_ecpri_mask; 2658 mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32); 2659 /* Input mask is in big-endian format. */ 2660 if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff) 2661 return rte_flow_error_set(error, EINVAL, 2662 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 2663 "partial mask is not supported " 2664 "for protocol"); 2665 else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0) 2666 return rte_flow_error_set(error, EINVAL, 2667 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 2668 "message header mask must be after " 2669 "a type mask"); 2670 return mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 2671 acc_mask ? (const uint8_t *)acc_mask 2672 : (const uint8_t *)&nic_mask, 2673 sizeof(struct rte_flow_item_ecpri), 2674 error); 2675 } 2676 2677 /* Allocate unique ID for the split Q/RSS subflows. */ 2678 static uint32_t 2679 flow_qrss_get_id(struct rte_eth_dev *dev) 2680 { 2681 struct mlx5_priv *priv = dev->data->dev_private; 2682 uint32_t qrss_id, ret; 2683 2684 ret = mlx5_flow_id_get(priv->qrss_id_pool, &qrss_id); 2685 if (ret) 2686 return 0; 2687 MLX5_ASSERT(qrss_id); 2688 return qrss_id; 2689 } 2690 2691 /* Free unique ID for the split Q/RSS subflows. */ 2692 static void 2693 flow_qrss_free_id(struct rte_eth_dev *dev, uint32_t qrss_id) 2694 { 2695 struct mlx5_priv *priv = dev->data->dev_private; 2696 2697 if (qrss_id) 2698 mlx5_flow_id_release(priv->qrss_id_pool, qrss_id); 2699 } 2700 2701 /** 2702 * Release resource related QUEUE/RSS action split. 2703 * 2704 * @param dev 2705 * Pointer to Ethernet device. 2706 * @param flow 2707 * Flow to release id's from. 2708 */ 2709 static void 2710 flow_mreg_split_qrss_release(struct rte_eth_dev *dev, 2711 struct rte_flow *flow) 2712 { 2713 struct mlx5_priv *priv = dev->data->dev_private; 2714 uint32_t handle_idx; 2715 struct mlx5_flow_handle *dev_handle; 2716 2717 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 2718 handle_idx, dev_handle, next) 2719 if (dev_handle->split_flow_id) 2720 flow_qrss_free_id(dev, dev_handle->split_flow_id); 2721 } 2722 2723 static int 2724 flow_null_validate(struct rte_eth_dev *dev __rte_unused, 2725 const struct rte_flow_attr *attr __rte_unused, 2726 const struct rte_flow_item items[] __rte_unused, 2727 const struct rte_flow_action actions[] __rte_unused, 2728 bool external __rte_unused, 2729 int hairpin __rte_unused, 2730 struct rte_flow_error *error) 2731 { 2732 return rte_flow_error_set(error, ENOTSUP, 2733 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2734 } 2735 2736 static struct mlx5_flow * 2737 flow_null_prepare(struct rte_eth_dev *dev __rte_unused, 2738 const struct rte_flow_attr *attr __rte_unused, 2739 const struct rte_flow_item items[] __rte_unused, 2740 const struct rte_flow_action actions[] __rte_unused, 2741 struct rte_flow_error *error) 2742 { 2743 rte_flow_error_set(error, ENOTSUP, 2744 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2745 return NULL; 2746 } 2747 2748 static int 2749 flow_null_translate(struct rte_eth_dev *dev __rte_unused, 2750 struct mlx5_flow *dev_flow __rte_unused, 2751 const struct rte_flow_attr *attr __rte_unused, 2752 const struct rte_flow_item items[] __rte_unused, 2753 const struct rte_flow_action actions[] __rte_unused, 2754 struct rte_flow_error *error) 2755 { 2756 return rte_flow_error_set(error, ENOTSUP, 2757 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2758 } 2759 2760 static int 2761 flow_null_apply(struct rte_eth_dev *dev __rte_unused, 2762 struct rte_flow *flow __rte_unused, 2763 struct rte_flow_error *error) 2764 { 2765 return rte_flow_error_set(error, ENOTSUP, 2766 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2767 } 2768 2769 static void 2770 flow_null_remove(struct rte_eth_dev *dev __rte_unused, 2771 struct rte_flow *flow __rte_unused) 2772 { 2773 } 2774 2775 static void 2776 flow_null_destroy(struct rte_eth_dev *dev __rte_unused, 2777 struct rte_flow *flow __rte_unused) 2778 { 2779 } 2780 2781 static int 2782 flow_null_query(struct rte_eth_dev *dev __rte_unused, 2783 struct rte_flow *flow __rte_unused, 2784 const struct rte_flow_action *actions __rte_unused, 2785 void *data __rte_unused, 2786 struct rte_flow_error *error) 2787 { 2788 return rte_flow_error_set(error, ENOTSUP, 2789 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2790 } 2791 2792 /* Void driver to protect from null pointer reference. */ 2793 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = { 2794 .validate = flow_null_validate, 2795 .prepare = flow_null_prepare, 2796 .translate = flow_null_translate, 2797 .apply = flow_null_apply, 2798 .remove = flow_null_remove, 2799 .destroy = flow_null_destroy, 2800 .query = flow_null_query, 2801 }; 2802 2803 /** 2804 * Select flow driver type according to flow attributes and device 2805 * configuration. 2806 * 2807 * @param[in] dev 2808 * Pointer to the dev structure. 2809 * @param[in] attr 2810 * Pointer to the flow attributes. 2811 * 2812 * @return 2813 * flow driver type, MLX5_FLOW_TYPE_MAX otherwise. 2814 */ 2815 static enum mlx5_flow_drv_type 2816 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr) 2817 { 2818 struct mlx5_priv *priv = dev->data->dev_private; 2819 /* The OS can determine first a specific flow type (DV, VERBS) */ 2820 enum mlx5_flow_drv_type type = mlx5_flow_os_get_type(); 2821 2822 if (type != MLX5_FLOW_TYPE_MAX) 2823 return type; 2824 /* If no OS specific type - continue with DV/VERBS selection */ 2825 if (attr->transfer && priv->config.dv_esw_en) 2826 type = MLX5_FLOW_TYPE_DV; 2827 if (!attr->transfer) 2828 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV : 2829 MLX5_FLOW_TYPE_VERBS; 2830 return type; 2831 } 2832 2833 #define flow_get_drv_ops(type) flow_drv_ops[type] 2834 2835 /** 2836 * Flow driver validation API. This abstracts calling driver specific functions. 2837 * The type of flow driver is determined according to flow attributes. 2838 * 2839 * @param[in] dev 2840 * Pointer to the dev structure. 2841 * @param[in] attr 2842 * Pointer to the flow attributes. 2843 * @param[in] items 2844 * Pointer to the list of items. 2845 * @param[in] actions 2846 * Pointer to the list of actions. 2847 * @param[in] external 2848 * This flow rule is created by request external to PMD. 2849 * @param[in] hairpin 2850 * Number of hairpin TX actions, 0 means classic flow. 2851 * @param[out] error 2852 * Pointer to the error structure. 2853 * 2854 * @return 2855 * 0 on success, a negative errno value otherwise and rte_errno is set. 2856 */ 2857 static inline int 2858 flow_drv_validate(struct rte_eth_dev *dev, 2859 const struct rte_flow_attr *attr, 2860 const struct rte_flow_item items[], 2861 const struct rte_flow_action actions[], 2862 bool external, int hairpin, struct rte_flow_error *error) 2863 { 2864 const struct mlx5_flow_driver_ops *fops; 2865 enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr); 2866 2867 fops = flow_get_drv_ops(type); 2868 return fops->validate(dev, attr, items, actions, external, 2869 hairpin, error); 2870 } 2871 2872 /** 2873 * Flow driver preparation API. This abstracts calling driver specific 2874 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 2875 * calculates the size of memory required for device flow, allocates the memory, 2876 * initializes the device flow and returns the pointer. 2877 * 2878 * @note 2879 * This function initializes device flow structure such as dv or verbs in 2880 * struct mlx5_flow. However, it is caller's responsibility to initialize the 2881 * rest. For example, adding returning device flow to flow->dev_flow list and 2882 * setting backward reference to the flow should be done out of this function. 2883 * layers field is not filled either. 2884 * 2885 * @param[in] dev 2886 * Pointer to the dev structure. 2887 * @param[in] attr 2888 * Pointer to the flow attributes. 2889 * @param[in] items 2890 * Pointer to the list of items. 2891 * @param[in] actions 2892 * Pointer to the list of actions. 2893 * @param[in] flow_idx 2894 * This memory pool index to the flow. 2895 * @param[out] error 2896 * Pointer to the error structure. 2897 * 2898 * @return 2899 * Pointer to device flow on success, otherwise NULL and rte_errno is set. 2900 */ 2901 static inline struct mlx5_flow * 2902 flow_drv_prepare(struct rte_eth_dev *dev, 2903 const struct rte_flow *flow, 2904 const struct rte_flow_attr *attr, 2905 const struct rte_flow_item items[], 2906 const struct rte_flow_action actions[], 2907 uint32_t flow_idx, 2908 struct rte_flow_error *error) 2909 { 2910 const struct mlx5_flow_driver_ops *fops; 2911 enum mlx5_flow_drv_type type = flow->drv_type; 2912 struct mlx5_flow *mlx5_flow = NULL; 2913 2914 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2915 fops = flow_get_drv_ops(type); 2916 mlx5_flow = fops->prepare(dev, attr, items, actions, error); 2917 if (mlx5_flow) 2918 mlx5_flow->flow_idx = flow_idx; 2919 return mlx5_flow; 2920 } 2921 2922 /** 2923 * Flow driver translation API. This abstracts calling driver specific 2924 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 2925 * translates a generic flow into a driver flow. flow_drv_prepare() must 2926 * precede. 2927 * 2928 * @note 2929 * dev_flow->layers could be filled as a result of parsing during translation 2930 * if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled 2931 * if necessary. As a flow can have multiple dev_flows by RSS flow expansion, 2932 * flow->actions could be overwritten even though all the expanded dev_flows 2933 * have the same actions. 2934 * 2935 * @param[in] dev 2936 * Pointer to the rte dev structure. 2937 * @param[in, out] dev_flow 2938 * Pointer to the mlx5 flow. 2939 * @param[in] attr 2940 * Pointer to the flow attributes. 2941 * @param[in] items 2942 * Pointer to the list of items. 2943 * @param[in] actions 2944 * Pointer to the list of actions. 2945 * @param[out] error 2946 * Pointer to the error structure. 2947 * 2948 * @return 2949 * 0 on success, a negative errno value otherwise and rte_errno is set. 2950 */ 2951 static inline int 2952 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, 2953 const struct rte_flow_attr *attr, 2954 const struct rte_flow_item items[], 2955 const struct rte_flow_action actions[], 2956 struct rte_flow_error *error) 2957 { 2958 const struct mlx5_flow_driver_ops *fops; 2959 enum mlx5_flow_drv_type type = dev_flow->flow->drv_type; 2960 2961 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2962 fops = flow_get_drv_ops(type); 2963 return fops->translate(dev, dev_flow, attr, items, actions, error); 2964 } 2965 2966 /** 2967 * Flow driver apply API. This abstracts calling driver specific functions. 2968 * Parent flow (rte_flow) should have driver type (drv_type). It applies 2969 * translated driver flows on to device. flow_drv_translate() must precede. 2970 * 2971 * @param[in] dev 2972 * Pointer to Ethernet device structure. 2973 * @param[in, out] flow 2974 * Pointer to flow structure. 2975 * @param[out] error 2976 * Pointer to error structure. 2977 * 2978 * @return 2979 * 0 on success, a negative errno value otherwise and rte_errno is set. 2980 */ 2981 static inline int 2982 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, 2983 struct rte_flow_error *error) 2984 { 2985 const struct mlx5_flow_driver_ops *fops; 2986 enum mlx5_flow_drv_type type = flow->drv_type; 2987 2988 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2989 fops = flow_get_drv_ops(type); 2990 return fops->apply(dev, flow, error); 2991 } 2992 2993 /** 2994 * Flow driver remove API. This abstracts calling driver specific functions. 2995 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 2996 * on device. All the resources of the flow should be freed by calling 2997 * flow_drv_destroy(). 2998 * 2999 * @param[in] dev 3000 * Pointer to Ethernet device. 3001 * @param[in, out] flow 3002 * Pointer to flow structure. 3003 */ 3004 static inline void 3005 flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow) 3006 { 3007 const struct mlx5_flow_driver_ops *fops; 3008 enum mlx5_flow_drv_type type = flow->drv_type; 3009 3010 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 3011 fops = flow_get_drv_ops(type); 3012 fops->remove(dev, flow); 3013 } 3014 3015 /** 3016 * Flow driver destroy API. This abstracts calling driver specific functions. 3017 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 3018 * on device and releases resources of the flow. 3019 * 3020 * @param[in] dev 3021 * Pointer to Ethernet device. 3022 * @param[in, out] flow 3023 * Pointer to flow structure. 3024 */ 3025 static inline void 3026 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) 3027 { 3028 const struct mlx5_flow_driver_ops *fops; 3029 enum mlx5_flow_drv_type type = flow->drv_type; 3030 3031 flow_mreg_split_qrss_release(dev, flow); 3032 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 3033 fops = flow_get_drv_ops(type); 3034 fops->destroy(dev, flow); 3035 } 3036 3037 /** 3038 * Get RSS action from the action list. 3039 * 3040 * @param[in] actions 3041 * Pointer to the list of actions. 3042 * 3043 * @return 3044 * Pointer to the RSS action if exist, else return NULL. 3045 */ 3046 static const struct rte_flow_action_rss* 3047 flow_get_rss_action(const struct rte_flow_action actions[]) 3048 { 3049 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3050 switch (actions->type) { 3051 case RTE_FLOW_ACTION_TYPE_RSS: 3052 return (const struct rte_flow_action_rss *) 3053 actions->conf; 3054 default: 3055 break; 3056 } 3057 } 3058 return NULL; 3059 } 3060 3061 static unsigned int 3062 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level) 3063 { 3064 const struct rte_flow_item *item; 3065 unsigned int has_vlan = 0; 3066 3067 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 3068 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { 3069 has_vlan = 1; 3070 break; 3071 } 3072 } 3073 if (has_vlan) 3074 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN : 3075 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN; 3076 return rss_level < 2 ? MLX5_EXPANSION_ROOT : 3077 MLX5_EXPANSION_ROOT_OUTER; 3078 } 3079 3080 /** 3081 * Get layer flags from the prefix flow. 3082 * 3083 * Some flows may be split to several subflows, the prefix subflow gets the 3084 * match items and the suffix sub flow gets the actions. 3085 * Some actions need the user defined match item flags to get the detail for 3086 * the action. 3087 * This function helps the suffix flow to get the item layer flags from prefix 3088 * subflow. 3089 * 3090 * @param[in] dev_flow 3091 * Pointer the created preifx subflow. 3092 * 3093 * @return 3094 * The layers get from prefix subflow. 3095 */ 3096 static inline uint64_t 3097 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow) 3098 { 3099 uint64_t layers = 0; 3100 3101 /* 3102 * Layers bits could be localization, but usually the compiler will 3103 * help to do the optimization work for source code. 3104 * If no decap actions, use the layers directly. 3105 */ 3106 if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP)) 3107 return dev_flow->handle->layers; 3108 /* Convert L3 layers with decap action. */ 3109 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4) 3110 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4; 3111 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6) 3112 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6; 3113 /* Convert L4 layers with decap action. */ 3114 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP) 3115 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP; 3116 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP) 3117 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP; 3118 return layers; 3119 } 3120 3121 /** 3122 * Get metadata split action information. 3123 * 3124 * @param[in] actions 3125 * Pointer to the list of actions. 3126 * @param[out] qrss 3127 * Pointer to the return pointer. 3128 * @param[out] qrss_type 3129 * Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned 3130 * if no QUEUE/RSS is found. 3131 * @param[out] encap_idx 3132 * Pointer to the index of the encap action if exists, otherwise the last 3133 * action index. 3134 * 3135 * @return 3136 * Total number of actions. 3137 */ 3138 static int 3139 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[], 3140 const struct rte_flow_action **qrss, 3141 int *encap_idx) 3142 { 3143 const struct rte_flow_action_raw_encap *raw_encap; 3144 int actions_n = 0; 3145 int raw_decap_idx = -1; 3146 3147 *encap_idx = -1; 3148 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3149 switch (actions->type) { 3150 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3151 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3152 *encap_idx = actions_n; 3153 break; 3154 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3155 raw_decap_idx = actions_n; 3156 break; 3157 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3158 raw_encap = actions->conf; 3159 if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 3160 *encap_idx = raw_decap_idx != -1 ? 3161 raw_decap_idx : actions_n; 3162 break; 3163 case RTE_FLOW_ACTION_TYPE_QUEUE: 3164 case RTE_FLOW_ACTION_TYPE_RSS: 3165 *qrss = actions; 3166 break; 3167 default: 3168 break; 3169 } 3170 actions_n++; 3171 } 3172 if (*encap_idx == -1) 3173 *encap_idx = actions_n; 3174 /* Count RTE_FLOW_ACTION_TYPE_END. */ 3175 return actions_n + 1; 3176 } 3177 3178 /** 3179 * Check meter action from the action list. 3180 * 3181 * @param[in] actions 3182 * Pointer to the list of actions. 3183 * @param[out] mtr 3184 * Pointer to the meter exist flag. 3185 * 3186 * @return 3187 * Total number of actions. 3188 */ 3189 static int 3190 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr) 3191 { 3192 int actions_n = 0; 3193 3194 MLX5_ASSERT(mtr); 3195 *mtr = 0; 3196 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3197 switch (actions->type) { 3198 case RTE_FLOW_ACTION_TYPE_METER: 3199 *mtr = 1; 3200 break; 3201 default: 3202 break; 3203 } 3204 actions_n++; 3205 } 3206 /* Count RTE_FLOW_ACTION_TYPE_END. */ 3207 return actions_n + 1; 3208 } 3209 3210 /** 3211 * Check if the flow should be split due to hairpin. 3212 * The reason for the split is that in current HW we can't 3213 * support encap and push-vlan on Rx, so if a flow contains 3214 * these actions we move it to Tx. 3215 * 3216 * @param dev 3217 * Pointer to Ethernet device. 3218 * @param[in] attr 3219 * Flow rule attributes. 3220 * @param[in] actions 3221 * Associated actions (list terminated by the END action). 3222 * 3223 * @return 3224 * > 0 the number of actions and the flow should be split, 3225 * 0 when no split required. 3226 */ 3227 static int 3228 flow_check_hairpin_split(struct rte_eth_dev *dev, 3229 const struct rte_flow_attr *attr, 3230 const struct rte_flow_action actions[]) 3231 { 3232 int queue_action = 0; 3233 int action_n = 0; 3234 int split = 0; 3235 const struct rte_flow_action_queue *queue; 3236 const struct rte_flow_action_rss *rss; 3237 const struct rte_flow_action_raw_encap *raw_encap; 3238 3239 if (!attr->ingress) 3240 return 0; 3241 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3242 switch (actions->type) { 3243 case RTE_FLOW_ACTION_TYPE_QUEUE: 3244 queue = actions->conf; 3245 if (queue == NULL) 3246 return 0; 3247 if (mlx5_rxq_get_type(dev, queue->index) != 3248 MLX5_RXQ_TYPE_HAIRPIN) 3249 return 0; 3250 queue_action = 1; 3251 action_n++; 3252 break; 3253 case RTE_FLOW_ACTION_TYPE_RSS: 3254 rss = actions->conf; 3255 if (rss == NULL || rss->queue_num == 0) 3256 return 0; 3257 if (mlx5_rxq_get_type(dev, rss->queue[0]) != 3258 MLX5_RXQ_TYPE_HAIRPIN) 3259 return 0; 3260 queue_action = 1; 3261 action_n++; 3262 break; 3263 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3264 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3265 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 3266 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 3267 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: 3268 split++; 3269 action_n++; 3270 break; 3271 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3272 raw_encap = actions->conf; 3273 if (raw_encap->size > 3274 (sizeof(struct rte_flow_item_eth) + 3275 sizeof(struct rte_flow_item_ipv4))) 3276 split++; 3277 action_n++; 3278 break; 3279 default: 3280 action_n++; 3281 break; 3282 } 3283 } 3284 if (split && queue_action) 3285 return action_n; 3286 return 0; 3287 } 3288 3289 /* Declare flow create/destroy prototype in advance. */ 3290 static uint32_t 3291 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 3292 const struct rte_flow_attr *attr, 3293 const struct rte_flow_item items[], 3294 const struct rte_flow_action actions[], 3295 bool external, struct rte_flow_error *error); 3296 3297 static void 3298 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 3299 uint32_t flow_idx); 3300 3301 /** 3302 * Add a flow of copying flow metadata registers in RX_CP_TBL. 3303 * 3304 * As mark_id is unique, if there's already a registered flow for the mark_id, 3305 * return by increasing the reference counter of the resource. Otherwise, create 3306 * the resource (mcp_res) and flow. 3307 * 3308 * Flow looks like, 3309 * - If ingress port is ANY and reg_c[1] is mark_id, 3310 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3311 * 3312 * For default flow (zero mark_id), flow is like, 3313 * - If ingress port is ANY, 3314 * reg_b := reg_c[0] and jump to RX_ACT_TBL. 3315 * 3316 * @param dev 3317 * Pointer to Ethernet device. 3318 * @param mark_id 3319 * ID of MARK action, zero means default flow for META. 3320 * @param[out] error 3321 * Perform verbose error reporting if not NULL. 3322 * 3323 * @return 3324 * Associated resource on success, NULL otherwise and rte_errno is set. 3325 */ 3326 static struct mlx5_flow_mreg_copy_resource * 3327 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id, 3328 struct rte_flow_error *error) 3329 { 3330 struct mlx5_priv *priv = dev->data->dev_private; 3331 struct rte_flow_attr attr = { 3332 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 3333 .ingress = 1, 3334 }; 3335 struct mlx5_rte_flow_item_tag tag_spec = { 3336 .data = mark_id, 3337 }; 3338 struct rte_flow_item items[] = { 3339 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, }, 3340 }; 3341 struct rte_flow_action_mark ftag = { 3342 .id = mark_id, 3343 }; 3344 struct mlx5_flow_action_copy_mreg cp_mreg = { 3345 .dst = REG_B, 3346 .src = REG_NON, 3347 }; 3348 struct rte_flow_action_jump jump = { 3349 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 3350 }; 3351 struct rte_flow_action actions[] = { 3352 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, }, 3353 }; 3354 struct mlx5_flow_mreg_copy_resource *mcp_res; 3355 uint32_t idx = 0; 3356 int ret; 3357 3358 /* Fill the register fileds in the flow. */ 3359 ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error); 3360 if (ret < 0) 3361 return NULL; 3362 tag_spec.id = ret; 3363 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 3364 if (ret < 0) 3365 return NULL; 3366 cp_mreg.src = ret; 3367 /* Check if already registered. */ 3368 MLX5_ASSERT(priv->mreg_cp_tbl); 3369 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id); 3370 if (mcp_res) { 3371 /* For non-default rule. */ 3372 if (mark_id != MLX5_DEFAULT_COPY_ID) 3373 mcp_res->refcnt++; 3374 MLX5_ASSERT(mark_id != MLX5_DEFAULT_COPY_ID || 3375 mcp_res->refcnt == 1); 3376 return mcp_res; 3377 } 3378 /* Provide the full width of FLAG specific value. */ 3379 if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT)) 3380 tag_spec.data = MLX5_FLOW_MARK_DEFAULT; 3381 /* Build a new flow. */ 3382 if (mark_id != MLX5_DEFAULT_COPY_ID) { 3383 items[0] = (struct rte_flow_item){ 3384 .type = (enum rte_flow_item_type) 3385 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 3386 .spec = &tag_spec, 3387 }; 3388 items[1] = (struct rte_flow_item){ 3389 .type = RTE_FLOW_ITEM_TYPE_END, 3390 }; 3391 actions[0] = (struct rte_flow_action){ 3392 .type = (enum rte_flow_action_type) 3393 MLX5_RTE_FLOW_ACTION_TYPE_MARK, 3394 .conf = &ftag, 3395 }; 3396 actions[1] = (struct rte_flow_action){ 3397 .type = (enum rte_flow_action_type) 3398 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3399 .conf = &cp_mreg, 3400 }; 3401 actions[2] = (struct rte_flow_action){ 3402 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3403 .conf = &jump, 3404 }; 3405 actions[3] = (struct rte_flow_action){ 3406 .type = RTE_FLOW_ACTION_TYPE_END, 3407 }; 3408 } else { 3409 /* Default rule, wildcard match. */ 3410 attr.priority = MLX5_FLOW_PRIO_RSVD; 3411 items[0] = (struct rte_flow_item){ 3412 .type = RTE_FLOW_ITEM_TYPE_END, 3413 }; 3414 actions[0] = (struct rte_flow_action){ 3415 .type = (enum rte_flow_action_type) 3416 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3417 .conf = &cp_mreg, 3418 }; 3419 actions[1] = (struct rte_flow_action){ 3420 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3421 .conf = &jump, 3422 }; 3423 actions[2] = (struct rte_flow_action){ 3424 .type = RTE_FLOW_ACTION_TYPE_END, 3425 }; 3426 } 3427 /* Build a new entry. */ 3428 mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx); 3429 if (!mcp_res) { 3430 rte_errno = ENOMEM; 3431 return NULL; 3432 } 3433 mcp_res->idx = idx; 3434 /* 3435 * The copy Flows are not included in any list. There 3436 * ones are referenced from other Flows and can not 3437 * be applied, removed, deleted in ardbitrary order 3438 * by list traversing. 3439 */ 3440 mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items, 3441 actions, false, error); 3442 if (!mcp_res->rix_flow) 3443 goto error; 3444 mcp_res->refcnt++; 3445 mcp_res->hlist_ent.key = mark_id; 3446 ret = mlx5_hlist_insert(priv->mreg_cp_tbl, 3447 &mcp_res->hlist_ent); 3448 MLX5_ASSERT(!ret); 3449 if (ret) 3450 goto error; 3451 return mcp_res; 3452 error: 3453 if (mcp_res->rix_flow) 3454 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3455 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3456 return NULL; 3457 } 3458 3459 /** 3460 * Release flow in RX_CP_TBL. 3461 * 3462 * @param dev 3463 * Pointer to Ethernet device. 3464 * @flow 3465 * Parent flow for wich copying is provided. 3466 */ 3467 static void 3468 flow_mreg_del_copy_action(struct rte_eth_dev *dev, 3469 struct rte_flow *flow) 3470 { 3471 struct mlx5_flow_mreg_copy_resource *mcp_res; 3472 struct mlx5_priv *priv = dev->data->dev_private; 3473 3474 if (!flow->rix_mreg_copy) 3475 return; 3476 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3477 flow->rix_mreg_copy); 3478 if (!mcp_res || !priv->mreg_cp_tbl) 3479 return; 3480 if (flow->copy_applied) { 3481 MLX5_ASSERT(mcp_res->appcnt); 3482 flow->copy_applied = 0; 3483 --mcp_res->appcnt; 3484 if (!mcp_res->appcnt) { 3485 struct rte_flow *mcp_flow = mlx5_ipool_get 3486 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3487 mcp_res->rix_flow); 3488 3489 if (mcp_flow) 3490 flow_drv_remove(dev, mcp_flow); 3491 } 3492 } 3493 /* 3494 * We do not check availability of metadata registers here, 3495 * because copy resources are not allocated in this case. 3496 */ 3497 if (--mcp_res->refcnt) 3498 return; 3499 MLX5_ASSERT(mcp_res->rix_flow); 3500 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3501 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 3502 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3503 flow->rix_mreg_copy = 0; 3504 } 3505 3506 /** 3507 * Start flow in RX_CP_TBL. 3508 * 3509 * @param dev 3510 * Pointer to Ethernet device. 3511 * @flow 3512 * Parent flow for wich copying is provided. 3513 * 3514 * @return 3515 * 0 on success, a negative errno value otherwise and rte_errno is set. 3516 */ 3517 static int 3518 flow_mreg_start_copy_action(struct rte_eth_dev *dev, 3519 struct rte_flow *flow) 3520 { 3521 struct mlx5_flow_mreg_copy_resource *mcp_res; 3522 struct mlx5_priv *priv = dev->data->dev_private; 3523 int ret; 3524 3525 if (!flow->rix_mreg_copy || flow->copy_applied) 3526 return 0; 3527 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3528 flow->rix_mreg_copy); 3529 if (!mcp_res) 3530 return 0; 3531 if (!mcp_res->appcnt) { 3532 struct rte_flow *mcp_flow = mlx5_ipool_get 3533 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3534 mcp_res->rix_flow); 3535 3536 if (mcp_flow) { 3537 ret = flow_drv_apply(dev, mcp_flow, NULL); 3538 if (ret) 3539 return ret; 3540 } 3541 } 3542 ++mcp_res->appcnt; 3543 flow->copy_applied = 1; 3544 return 0; 3545 } 3546 3547 /** 3548 * Stop flow in RX_CP_TBL. 3549 * 3550 * @param dev 3551 * Pointer to Ethernet device. 3552 * @flow 3553 * Parent flow for wich copying is provided. 3554 */ 3555 static void 3556 flow_mreg_stop_copy_action(struct rte_eth_dev *dev, 3557 struct rte_flow *flow) 3558 { 3559 struct mlx5_flow_mreg_copy_resource *mcp_res; 3560 struct mlx5_priv *priv = dev->data->dev_private; 3561 3562 if (!flow->rix_mreg_copy || !flow->copy_applied) 3563 return; 3564 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3565 flow->rix_mreg_copy); 3566 if (!mcp_res) 3567 return; 3568 MLX5_ASSERT(mcp_res->appcnt); 3569 --mcp_res->appcnt; 3570 flow->copy_applied = 0; 3571 if (!mcp_res->appcnt) { 3572 struct rte_flow *mcp_flow = mlx5_ipool_get 3573 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3574 mcp_res->rix_flow); 3575 3576 if (mcp_flow) 3577 flow_drv_remove(dev, mcp_flow); 3578 } 3579 } 3580 3581 /** 3582 * Remove the default copy action from RX_CP_TBL. 3583 * 3584 * @param dev 3585 * Pointer to Ethernet device. 3586 */ 3587 static void 3588 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev) 3589 { 3590 struct mlx5_flow_mreg_copy_resource *mcp_res; 3591 struct mlx5_priv *priv = dev->data->dev_private; 3592 3593 /* Check if default flow is registered. */ 3594 if (!priv->mreg_cp_tbl) 3595 return; 3596 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, 3597 MLX5_DEFAULT_COPY_ID); 3598 if (!mcp_res) 3599 return; 3600 MLX5_ASSERT(mcp_res->rix_flow); 3601 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3602 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 3603 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3604 } 3605 3606 /** 3607 * Add the default copy action in in RX_CP_TBL. 3608 * 3609 * @param dev 3610 * Pointer to Ethernet device. 3611 * @param[out] error 3612 * Perform verbose error reporting if not NULL. 3613 * 3614 * @return 3615 * 0 for success, negative value otherwise and rte_errno is set. 3616 */ 3617 static int 3618 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev, 3619 struct rte_flow_error *error) 3620 { 3621 struct mlx5_priv *priv = dev->data->dev_private; 3622 struct mlx5_flow_mreg_copy_resource *mcp_res; 3623 3624 /* Check whether extensive metadata feature is engaged. */ 3625 if (!priv->config.dv_flow_en || 3626 priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3627 !mlx5_flow_ext_mreg_supported(dev) || 3628 !priv->sh->dv_regc0_mask) 3629 return 0; 3630 mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error); 3631 if (!mcp_res) 3632 return -rte_errno; 3633 return 0; 3634 } 3635 3636 /** 3637 * Add a flow of copying flow metadata registers in RX_CP_TBL. 3638 * 3639 * All the flow having Q/RSS action should be split by 3640 * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL 3641 * performs the following, 3642 * - CQE->flow_tag := reg_c[1] (MARK) 3643 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 3644 * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1] 3645 * but there should be a flow per each MARK ID set by MARK action. 3646 * 3647 * For the aforementioned reason, if there's a MARK action in flow's action 3648 * list, a corresponding flow should be added to the RX_CP_TBL in order to copy 3649 * the MARK ID to CQE's flow_tag like, 3650 * - If reg_c[1] is mark_id, 3651 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3652 * 3653 * For SET_META action which stores value in reg_c[0], as the destination is 3654 * also a flow metadata register (reg_b), adding a default flow is enough. Zero 3655 * MARK ID means the default flow. The default flow looks like, 3656 * - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3657 * 3658 * @param dev 3659 * Pointer to Ethernet device. 3660 * @param flow 3661 * Pointer to flow structure. 3662 * @param[in] actions 3663 * Pointer to the list of actions. 3664 * @param[out] error 3665 * Perform verbose error reporting if not NULL. 3666 * 3667 * @return 3668 * 0 on success, negative value otherwise and rte_errno is set. 3669 */ 3670 static int 3671 flow_mreg_update_copy_table(struct rte_eth_dev *dev, 3672 struct rte_flow *flow, 3673 const struct rte_flow_action *actions, 3674 struct rte_flow_error *error) 3675 { 3676 struct mlx5_priv *priv = dev->data->dev_private; 3677 struct mlx5_dev_config *config = &priv->config; 3678 struct mlx5_flow_mreg_copy_resource *mcp_res; 3679 const struct rte_flow_action_mark *mark; 3680 3681 /* Check whether extensive metadata feature is engaged. */ 3682 if (!config->dv_flow_en || 3683 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3684 !mlx5_flow_ext_mreg_supported(dev) || 3685 !priv->sh->dv_regc0_mask) 3686 return 0; 3687 /* Find MARK action. */ 3688 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3689 switch (actions->type) { 3690 case RTE_FLOW_ACTION_TYPE_FLAG: 3691 mcp_res = flow_mreg_add_copy_action 3692 (dev, MLX5_FLOW_MARK_DEFAULT, error); 3693 if (!mcp_res) 3694 return -rte_errno; 3695 flow->rix_mreg_copy = mcp_res->idx; 3696 if (dev->data->dev_started) { 3697 mcp_res->appcnt++; 3698 flow->copy_applied = 1; 3699 } 3700 return 0; 3701 case RTE_FLOW_ACTION_TYPE_MARK: 3702 mark = (const struct rte_flow_action_mark *) 3703 actions->conf; 3704 mcp_res = 3705 flow_mreg_add_copy_action(dev, mark->id, error); 3706 if (!mcp_res) 3707 return -rte_errno; 3708 flow->rix_mreg_copy = mcp_res->idx; 3709 if (dev->data->dev_started) { 3710 mcp_res->appcnt++; 3711 flow->copy_applied = 1; 3712 } 3713 return 0; 3714 default: 3715 break; 3716 } 3717 } 3718 return 0; 3719 } 3720 3721 #define MLX5_MAX_SPLIT_ACTIONS 24 3722 #define MLX5_MAX_SPLIT_ITEMS 24 3723 3724 /** 3725 * Split the hairpin flow. 3726 * Since HW can't support encap and push-vlan on Rx, we move these 3727 * actions to Tx. 3728 * If the count action is after the encap then we also 3729 * move the count action. in this case the count will also measure 3730 * the outer bytes. 3731 * 3732 * @param dev 3733 * Pointer to Ethernet device. 3734 * @param[in] actions 3735 * Associated actions (list terminated by the END action). 3736 * @param[out] actions_rx 3737 * Rx flow actions. 3738 * @param[out] actions_tx 3739 * Tx flow actions.. 3740 * @param[out] pattern_tx 3741 * The pattern items for the Tx flow. 3742 * @param[out] flow_id 3743 * The flow ID connected to this flow. 3744 * 3745 * @return 3746 * 0 on success. 3747 */ 3748 static int 3749 flow_hairpin_split(struct rte_eth_dev *dev, 3750 const struct rte_flow_action actions[], 3751 struct rte_flow_action actions_rx[], 3752 struct rte_flow_action actions_tx[], 3753 struct rte_flow_item pattern_tx[], 3754 uint32_t *flow_id) 3755 { 3756 struct mlx5_priv *priv = dev->data->dev_private; 3757 const struct rte_flow_action_raw_encap *raw_encap; 3758 const struct rte_flow_action_raw_decap *raw_decap; 3759 struct mlx5_rte_flow_action_set_tag *set_tag; 3760 struct rte_flow_action *tag_action; 3761 struct mlx5_rte_flow_item_tag *tag_item; 3762 struct rte_flow_item *item; 3763 char *addr; 3764 int encap = 0; 3765 3766 mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id); 3767 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3768 switch (actions->type) { 3769 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3770 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3771 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 3772 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 3773 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: 3774 rte_memcpy(actions_tx, actions, 3775 sizeof(struct rte_flow_action)); 3776 actions_tx++; 3777 break; 3778 case RTE_FLOW_ACTION_TYPE_COUNT: 3779 if (encap) { 3780 rte_memcpy(actions_tx, actions, 3781 sizeof(struct rte_flow_action)); 3782 actions_tx++; 3783 } else { 3784 rte_memcpy(actions_rx, actions, 3785 sizeof(struct rte_flow_action)); 3786 actions_rx++; 3787 } 3788 break; 3789 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3790 raw_encap = actions->conf; 3791 if (raw_encap->size > 3792 (sizeof(struct rte_flow_item_eth) + 3793 sizeof(struct rte_flow_item_ipv4))) { 3794 memcpy(actions_tx, actions, 3795 sizeof(struct rte_flow_action)); 3796 actions_tx++; 3797 encap = 1; 3798 } else { 3799 rte_memcpy(actions_rx, actions, 3800 sizeof(struct rte_flow_action)); 3801 actions_rx++; 3802 } 3803 break; 3804 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3805 raw_decap = actions->conf; 3806 if (raw_decap->size < 3807 (sizeof(struct rte_flow_item_eth) + 3808 sizeof(struct rte_flow_item_ipv4))) { 3809 memcpy(actions_tx, actions, 3810 sizeof(struct rte_flow_action)); 3811 actions_tx++; 3812 } else { 3813 rte_memcpy(actions_rx, actions, 3814 sizeof(struct rte_flow_action)); 3815 actions_rx++; 3816 } 3817 break; 3818 default: 3819 rte_memcpy(actions_rx, actions, 3820 sizeof(struct rte_flow_action)); 3821 actions_rx++; 3822 break; 3823 } 3824 } 3825 /* Add set meta action and end action for the Rx flow. */ 3826 tag_action = actions_rx; 3827 tag_action->type = (enum rte_flow_action_type) 3828 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 3829 actions_rx++; 3830 rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action)); 3831 actions_rx++; 3832 set_tag = (void *)actions_rx; 3833 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL); 3834 MLX5_ASSERT(set_tag->id > REG_NON); 3835 set_tag->data = *flow_id; 3836 tag_action->conf = set_tag; 3837 /* Create Tx item list. */ 3838 rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action)); 3839 addr = (void *)&pattern_tx[2]; 3840 item = pattern_tx; 3841 item->type = (enum rte_flow_item_type) 3842 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 3843 tag_item = (void *)addr; 3844 tag_item->data = *flow_id; 3845 tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL); 3846 MLX5_ASSERT(set_tag->id > REG_NON); 3847 item->spec = tag_item; 3848 addr += sizeof(struct mlx5_rte_flow_item_tag); 3849 tag_item = (void *)addr; 3850 tag_item->data = UINT32_MAX; 3851 tag_item->id = UINT16_MAX; 3852 item->mask = tag_item; 3853 item->last = NULL; 3854 item++; 3855 item->type = RTE_FLOW_ITEM_TYPE_END; 3856 return 0; 3857 } 3858 3859 /** 3860 * The last stage of splitting chain, just creates the subflow 3861 * without any modification. 3862 * 3863 * @param[in] dev 3864 * Pointer to Ethernet device. 3865 * @param[in] flow 3866 * Parent flow structure pointer. 3867 * @param[in, out] sub_flow 3868 * Pointer to return the created subflow, may be NULL. 3869 * @param[in] prefix_layers 3870 * Prefix subflow layers, may be 0. 3871 * @param[in] prefix_mark 3872 * Prefix subflow mark flag, may be 0. 3873 * @param[in] attr 3874 * Flow rule attributes. 3875 * @param[in] items 3876 * Pattern specification (list terminated by the END pattern item). 3877 * @param[in] actions 3878 * Associated actions (list terminated by the END action). 3879 * @param[in] external 3880 * This flow rule is created by request external to PMD. 3881 * @param[in] flow_idx 3882 * This memory pool index to the flow. 3883 * @param[out] error 3884 * Perform verbose error reporting if not NULL. 3885 * @return 3886 * 0 on success, negative value otherwise 3887 */ 3888 static int 3889 flow_create_split_inner(struct rte_eth_dev *dev, 3890 struct rte_flow *flow, 3891 struct mlx5_flow **sub_flow, 3892 uint64_t prefix_layers, 3893 uint32_t prefix_mark, 3894 const struct rte_flow_attr *attr, 3895 const struct rte_flow_item items[], 3896 const struct rte_flow_action actions[], 3897 bool external, uint32_t flow_idx, 3898 struct rte_flow_error *error) 3899 { 3900 struct mlx5_flow *dev_flow; 3901 3902 dev_flow = flow_drv_prepare(dev, flow, attr, items, actions, 3903 flow_idx, error); 3904 if (!dev_flow) 3905 return -rte_errno; 3906 dev_flow->flow = flow; 3907 dev_flow->external = external; 3908 /* Subflow object was created, we must include one in the list. */ 3909 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 3910 dev_flow->handle, next); 3911 /* 3912 * If dev_flow is as one of the suffix flow, some actions in suffix 3913 * flow may need some user defined item layer flags, and pass the 3914 * Metadate rxq mark flag to suffix flow as well. 3915 */ 3916 if (prefix_layers) 3917 dev_flow->handle->layers = prefix_layers; 3918 if (prefix_mark) 3919 dev_flow->handle->mark = 1; 3920 if (sub_flow) 3921 *sub_flow = dev_flow; 3922 return flow_drv_translate(dev, dev_flow, attr, items, actions, error); 3923 } 3924 3925 /** 3926 * Split the meter flow. 3927 * 3928 * As meter flow will split to three sub flow, other than meter 3929 * action, the other actions make sense to only meter accepts 3930 * the packet. If it need to be dropped, no other additional 3931 * actions should be take. 3932 * 3933 * One kind of special action which decapsulates the L3 tunnel 3934 * header will be in the prefix sub flow, as not to take the 3935 * L3 tunnel header into account. 3936 * 3937 * @param dev 3938 * Pointer to Ethernet device. 3939 * @param[in] items 3940 * Pattern specification (list terminated by the END pattern item). 3941 * @param[out] sfx_items 3942 * Suffix flow match items (list terminated by the END pattern item). 3943 * @param[in] actions 3944 * Associated actions (list terminated by the END action). 3945 * @param[out] actions_sfx 3946 * Suffix flow actions. 3947 * @param[out] actions_pre 3948 * Prefix flow actions. 3949 * @param[out] pattern_sfx 3950 * The pattern items for the suffix flow. 3951 * @param[out] tag_sfx 3952 * Pointer to suffix flow tag. 3953 * 3954 * @return 3955 * 0 on success. 3956 */ 3957 static int 3958 flow_meter_split_prep(struct rte_eth_dev *dev, 3959 const struct rte_flow_item items[], 3960 struct rte_flow_item sfx_items[], 3961 const struct rte_flow_action actions[], 3962 struct rte_flow_action actions_sfx[], 3963 struct rte_flow_action actions_pre[]) 3964 { 3965 struct rte_flow_action *tag_action = NULL; 3966 struct rte_flow_item *tag_item; 3967 struct mlx5_rte_flow_action_set_tag *set_tag; 3968 struct rte_flow_error error; 3969 const struct rte_flow_action_raw_encap *raw_encap; 3970 const struct rte_flow_action_raw_decap *raw_decap; 3971 struct mlx5_rte_flow_item_tag *tag_spec; 3972 struct mlx5_rte_flow_item_tag *tag_mask; 3973 uint32_t tag_id; 3974 bool copy_vlan = false; 3975 3976 /* Prepare the actions for prefix and suffix flow. */ 3977 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3978 struct rte_flow_action **action_cur = NULL; 3979 3980 switch (actions->type) { 3981 case RTE_FLOW_ACTION_TYPE_METER: 3982 /* Add the extra tag action first. */ 3983 tag_action = actions_pre; 3984 tag_action->type = (enum rte_flow_action_type) 3985 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 3986 actions_pre++; 3987 action_cur = &actions_pre; 3988 break; 3989 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: 3990 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP: 3991 action_cur = &actions_pre; 3992 break; 3993 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3994 raw_encap = actions->conf; 3995 if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE) 3996 action_cur = &actions_pre; 3997 break; 3998 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3999 raw_decap = actions->conf; 4000 if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 4001 action_cur = &actions_pre; 4002 break; 4003 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 4004 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 4005 copy_vlan = true; 4006 break; 4007 default: 4008 break; 4009 } 4010 if (!action_cur) 4011 action_cur = &actions_sfx; 4012 memcpy(*action_cur, actions, sizeof(struct rte_flow_action)); 4013 (*action_cur)++; 4014 } 4015 /* Add end action to the actions. */ 4016 actions_sfx->type = RTE_FLOW_ACTION_TYPE_END; 4017 actions_pre->type = RTE_FLOW_ACTION_TYPE_END; 4018 actions_pre++; 4019 /* Set the tag. */ 4020 set_tag = (void *)actions_pre; 4021 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); 4022 /* 4023 * Get the id from the qrss_pool to make qrss share the id with meter. 4024 */ 4025 tag_id = flow_qrss_get_id(dev); 4026 set_tag->data = tag_id << MLX5_MTR_COLOR_BITS; 4027 assert(tag_action); 4028 tag_action->conf = set_tag; 4029 /* Prepare the suffix subflow items. */ 4030 tag_item = sfx_items++; 4031 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { 4032 int item_type = items->type; 4033 4034 switch (item_type) { 4035 case RTE_FLOW_ITEM_TYPE_PORT_ID: 4036 memcpy(sfx_items, items, sizeof(*sfx_items)); 4037 sfx_items++; 4038 break; 4039 case RTE_FLOW_ITEM_TYPE_VLAN: 4040 if (copy_vlan) { 4041 memcpy(sfx_items, items, sizeof(*sfx_items)); 4042 /* 4043 * Convert to internal match item, it is used 4044 * for vlan push and set vid. 4045 */ 4046 sfx_items->type = (enum rte_flow_item_type) 4047 MLX5_RTE_FLOW_ITEM_TYPE_VLAN; 4048 sfx_items++; 4049 } 4050 break; 4051 default: 4052 break; 4053 } 4054 } 4055 sfx_items->type = RTE_FLOW_ITEM_TYPE_END; 4056 sfx_items++; 4057 tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items; 4058 tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS; 4059 tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); 4060 tag_mask = tag_spec + 1; 4061 tag_mask->data = 0xffffff00; 4062 tag_item->type = (enum rte_flow_item_type) 4063 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 4064 tag_item->spec = tag_spec; 4065 tag_item->last = NULL; 4066 tag_item->mask = tag_mask; 4067 return tag_id; 4068 } 4069 4070 /** 4071 * Split action list having QUEUE/RSS for metadata register copy. 4072 * 4073 * Once Q/RSS action is detected in user's action list, the flow action 4074 * should be split in order to copy metadata registers, which will happen in 4075 * RX_CP_TBL like, 4076 * - CQE->flow_tag := reg_c[1] (MARK) 4077 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 4078 * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL. 4079 * This is because the last action of each flow must be a terminal action 4080 * (QUEUE, RSS or DROP). 4081 * 4082 * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is 4083 * stored and kept in the mlx5_flow structure per each sub_flow. 4084 * 4085 * The Q/RSS action is replaced with, 4086 * - SET_TAG, setting the allocated flow ID to reg_c[2]. 4087 * And the following JUMP action is added at the end, 4088 * - JUMP, to RX_CP_TBL. 4089 * 4090 * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by 4091 * flow_create_split_metadata() routine. The flow will look like, 4092 * - If flow ID matches (reg_c[2]), perform Q/RSS. 4093 * 4094 * @param dev 4095 * Pointer to Ethernet device. 4096 * @param[out] split_actions 4097 * Pointer to store split actions to jump to CP_TBL. 4098 * @param[in] actions 4099 * Pointer to the list of original flow actions. 4100 * @param[in] qrss 4101 * Pointer to the Q/RSS action. 4102 * @param[in] actions_n 4103 * Number of original actions. 4104 * @param[out] error 4105 * Perform verbose error reporting if not NULL. 4106 * 4107 * @return 4108 * non-zero unique flow_id on success, otherwise 0 and 4109 * error/rte_error are set. 4110 */ 4111 static uint32_t 4112 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev, 4113 struct rte_flow_action *split_actions, 4114 const struct rte_flow_action *actions, 4115 const struct rte_flow_action *qrss, 4116 int actions_n, struct rte_flow_error *error) 4117 { 4118 struct mlx5_rte_flow_action_set_tag *set_tag; 4119 struct rte_flow_action_jump *jump; 4120 const int qrss_idx = qrss - actions; 4121 uint32_t flow_id = 0; 4122 int ret = 0; 4123 4124 /* 4125 * Given actions will be split 4126 * - Replace QUEUE/RSS action with SET_TAG to set flow ID. 4127 * - Add jump to mreg CP_TBL. 4128 * As a result, there will be one more action. 4129 */ 4130 ++actions_n; 4131 memcpy(split_actions, actions, sizeof(*split_actions) * actions_n); 4132 set_tag = (void *)(split_actions + actions_n); 4133 /* 4134 * If tag action is not set to void(it means we are not the meter 4135 * suffix flow), add the tag action. Since meter suffix flow already 4136 * has the tag added. 4137 */ 4138 if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) { 4139 /* 4140 * Allocate the new subflow ID. This one is unique within 4141 * device and not shared with representors. Otherwise, 4142 * we would have to resolve multi-thread access synch 4143 * issue. Each flow on the shared device is appended 4144 * with source vport identifier, so the resulting 4145 * flows will be unique in the shared (by master and 4146 * representors) domain even if they have coinciding 4147 * IDs. 4148 */ 4149 flow_id = flow_qrss_get_id(dev); 4150 if (!flow_id) 4151 return rte_flow_error_set(error, ENOMEM, 4152 RTE_FLOW_ERROR_TYPE_ACTION, 4153 NULL, "can't allocate id " 4154 "for split Q/RSS subflow"); 4155 /* Internal SET_TAG action to set flow ID. */ 4156 *set_tag = (struct mlx5_rte_flow_action_set_tag){ 4157 .data = flow_id, 4158 }; 4159 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error); 4160 if (ret < 0) 4161 return ret; 4162 set_tag->id = ret; 4163 /* Construct new actions array. */ 4164 /* Replace QUEUE/RSS action. */ 4165 split_actions[qrss_idx] = (struct rte_flow_action){ 4166 .type = (enum rte_flow_action_type) 4167 MLX5_RTE_FLOW_ACTION_TYPE_TAG, 4168 .conf = set_tag, 4169 }; 4170 } 4171 /* JUMP action to jump to mreg copy table (CP_TBL). */ 4172 jump = (void *)(set_tag + 1); 4173 *jump = (struct rte_flow_action_jump){ 4174 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 4175 }; 4176 split_actions[actions_n - 2] = (struct rte_flow_action){ 4177 .type = RTE_FLOW_ACTION_TYPE_JUMP, 4178 .conf = jump, 4179 }; 4180 split_actions[actions_n - 1] = (struct rte_flow_action){ 4181 .type = RTE_FLOW_ACTION_TYPE_END, 4182 }; 4183 return flow_id; 4184 } 4185 4186 /** 4187 * Extend the given action list for Tx metadata copy. 4188 * 4189 * Copy the given action list to the ext_actions and add flow metadata register 4190 * copy action in order to copy reg_a set by WQE to reg_c[0]. 4191 * 4192 * @param[out] ext_actions 4193 * Pointer to the extended action list. 4194 * @param[in] actions 4195 * Pointer to the list of actions. 4196 * @param[in] actions_n 4197 * Number of actions in the list. 4198 * @param[out] error 4199 * Perform verbose error reporting if not NULL. 4200 * @param[in] encap_idx 4201 * The encap action inndex. 4202 * 4203 * @return 4204 * 0 on success, negative value otherwise 4205 */ 4206 static int 4207 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev, 4208 struct rte_flow_action *ext_actions, 4209 const struct rte_flow_action *actions, 4210 int actions_n, struct rte_flow_error *error, 4211 int encap_idx) 4212 { 4213 struct mlx5_flow_action_copy_mreg *cp_mreg = 4214 (struct mlx5_flow_action_copy_mreg *) 4215 (ext_actions + actions_n + 1); 4216 int ret; 4217 4218 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 4219 if (ret < 0) 4220 return ret; 4221 cp_mreg->dst = ret; 4222 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error); 4223 if (ret < 0) 4224 return ret; 4225 cp_mreg->src = ret; 4226 if (encap_idx != 0) 4227 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx); 4228 if (encap_idx == actions_n - 1) { 4229 ext_actions[actions_n - 1] = (struct rte_flow_action){ 4230 .type = (enum rte_flow_action_type) 4231 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 4232 .conf = cp_mreg, 4233 }; 4234 ext_actions[actions_n] = (struct rte_flow_action){ 4235 .type = RTE_FLOW_ACTION_TYPE_END, 4236 }; 4237 } else { 4238 ext_actions[encap_idx] = (struct rte_flow_action){ 4239 .type = (enum rte_flow_action_type) 4240 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 4241 .conf = cp_mreg, 4242 }; 4243 memcpy(ext_actions + encap_idx + 1, actions + encap_idx, 4244 sizeof(*ext_actions) * (actions_n - encap_idx)); 4245 } 4246 return 0; 4247 } 4248 4249 /** 4250 * Check the match action from the action list. 4251 * 4252 * @param[in] actions 4253 * Pointer to the list of actions. 4254 * @param[in] action 4255 * The action to be check if exist. 4256 * @param[out] match_action_pos 4257 * Pointer to the position of the matched action if exists, otherwise is -1. 4258 * @param[out] qrss_action_pos 4259 * Pointer to the position of the Queue/RSS action if exists, otherwise is -1. 4260 * 4261 * @return 4262 * > 0 the total number of actions. 4263 * 0 if not found match action in action list. 4264 */ 4265 static int 4266 flow_check_match_action(const struct rte_flow_action actions[], 4267 enum rte_flow_action_type action, 4268 int *match_action_pos, int *qrss_action_pos) 4269 { 4270 int actions_n = 0; 4271 int flag = 0; 4272 4273 *match_action_pos = -1; 4274 *qrss_action_pos = -1; 4275 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 4276 if (actions->type == action) { 4277 flag = 1; 4278 *match_action_pos = actions_n; 4279 } 4280 if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE || 4281 actions->type == RTE_FLOW_ACTION_TYPE_RSS) 4282 *qrss_action_pos = actions_n; 4283 actions_n++; 4284 } 4285 /* Count RTE_FLOW_ACTION_TYPE_END. */ 4286 return flag ? actions_n + 1 : 0; 4287 } 4288 4289 #define SAMPLE_SUFFIX_ITEM 2 4290 4291 /** 4292 * Split the sample flow. 4293 * 4294 * As sample flow will split to two sub flow, sample flow with 4295 * sample action, the other actions will move to new suffix flow. 4296 * 4297 * Also add unique tag id with tag action in the sample flow, 4298 * the same tag id will be as match in the suffix flow. 4299 * 4300 * @param dev 4301 * Pointer to Ethernet device. 4302 * @param[in] fdb_tx 4303 * FDB egress flow flag. 4304 * @param[out] sfx_items 4305 * Suffix flow match items (list terminated by the END pattern item). 4306 * @param[in] actions 4307 * Associated actions (list terminated by the END action). 4308 * @param[out] actions_sfx 4309 * Suffix flow actions. 4310 * @param[out] actions_pre 4311 * Prefix flow actions. 4312 * @param[in] actions_n 4313 * The total number of actions. 4314 * @param[in] sample_action_pos 4315 * The sample action position. 4316 * @param[in] qrss_action_pos 4317 * The Queue/RSS action position. 4318 * @param[out] error 4319 * Perform verbose error reporting if not NULL. 4320 * 4321 * @return 4322 * 0 on success, or unique flow_id, a negative errno value 4323 * otherwise and rte_errno is set. 4324 */ 4325 static int 4326 flow_sample_split_prep(struct rte_eth_dev *dev, 4327 uint32_t fdb_tx, 4328 struct rte_flow_item sfx_items[], 4329 const struct rte_flow_action actions[], 4330 struct rte_flow_action actions_sfx[], 4331 struct rte_flow_action actions_pre[], 4332 int actions_n, 4333 int sample_action_pos, 4334 int qrss_action_pos, 4335 struct rte_flow_error *error) 4336 { 4337 struct mlx5_rte_flow_action_set_tag *set_tag; 4338 struct mlx5_rte_flow_item_tag *tag_spec; 4339 struct mlx5_rte_flow_item_tag *tag_mask; 4340 uint32_t tag_id = 0; 4341 int index; 4342 int ret; 4343 4344 if (sample_action_pos < 0) 4345 return rte_flow_error_set(error, EINVAL, 4346 RTE_FLOW_ERROR_TYPE_ACTION, 4347 NULL, "invalid position of sample " 4348 "action in list"); 4349 if (!fdb_tx) { 4350 /* Prepare the prefix tag action. */ 4351 set_tag = (void *)(actions_pre + actions_n + 1); 4352 ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error); 4353 if (ret < 0) 4354 return ret; 4355 set_tag->id = ret; 4356 tag_id = flow_qrss_get_id(dev); 4357 set_tag->data = tag_id; 4358 /* Prepare the suffix subflow items. */ 4359 tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM); 4360 tag_spec->data = tag_id; 4361 tag_spec->id = set_tag->id; 4362 tag_mask = tag_spec + 1; 4363 tag_mask->data = UINT32_MAX; 4364 sfx_items[0] = (struct rte_flow_item){ 4365 .type = (enum rte_flow_item_type) 4366 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 4367 .spec = tag_spec, 4368 .last = NULL, 4369 .mask = tag_mask, 4370 }; 4371 sfx_items[1] = (struct rte_flow_item){ 4372 .type = (enum rte_flow_item_type) 4373 RTE_FLOW_ITEM_TYPE_END, 4374 }; 4375 } 4376 /* Prepare the actions for prefix and suffix flow. */ 4377 if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) { 4378 index = qrss_action_pos; 4379 /* Put the preceding the Queue/RSS action into prefix flow. */ 4380 if (index != 0) 4381 memcpy(actions_pre, actions, 4382 sizeof(struct rte_flow_action) * index); 4383 /* Put others preceding the sample action into prefix flow. */ 4384 if (sample_action_pos > index + 1) 4385 memcpy(actions_pre + index, actions + index + 1, 4386 sizeof(struct rte_flow_action) * 4387 (sample_action_pos - index - 1)); 4388 index = sample_action_pos - 1; 4389 /* Put Queue/RSS action into Suffix flow. */ 4390 memcpy(actions_sfx, actions + qrss_action_pos, 4391 sizeof(struct rte_flow_action)); 4392 actions_sfx++; 4393 } else { 4394 index = sample_action_pos; 4395 if (index != 0) 4396 memcpy(actions_pre, actions, 4397 sizeof(struct rte_flow_action) * index); 4398 } 4399 /* Add the extra tag action for NIC-RX and E-Switch ingress. */ 4400 if (!fdb_tx) { 4401 actions_pre[index++] = 4402 (struct rte_flow_action){ 4403 .type = (enum rte_flow_action_type) 4404 MLX5_RTE_FLOW_ACTION_TYPE_TAG, 4405 .conf = set_tag, 4406 }; 4407 } 4408 memcpy(actions_pre + index, actions + sample_action_pos, 4409 sizeof(struct rte_flow_action)); 4410 index += 1; 4411 actions_pre[index] = (struct rte_flow_action){ 4412 .type = (enum rte_flow_action_type) 4413 RTE_FLOW_ACTION_TYPE_END, 4414 }; 4415 /* Put the actions after sample into Suffix flow. */ 4416 memcpy(actions_sfx, actions + sample_action_pos + 1, 4417 sizeof(struct rte_flow_action) * 4418 (actions_n - sample_action_pos - 1)); 4419 return tag_id; 4420 } 4421 4422 /** 4423 * The splitting for metadata feature. 4424 * 4425 * - Q/RSS action on NIC Rx should be split in order to pass by 4426 * the mreg copy table (RX_CP_TBL) and then it jumps to the 4427 * action table (RX_ACT_TBL) which has the split Q/RSS action. 4428 * 4429 * - All the actions on NIC Tx should have a mreg copy action to 4430 * copy reg_a from WQE to reg_c[0]. 4431 * 4432 * @param dev 4433 * Pointer to Ethernet device. 4434 * @param[in] flow 4435 * Parent flow structure pointer. 4436 * @param[in] prefix_layers 4437 * Prefix flow layer flags. 4438 * @param[in] prefix_mark 4439 * Prefix subflow mark flag, may be 0. 4440 * @param[in] attr 4441 * Flow rule attributes. 4442 * @param[in] items 4443 * Pattern specification (list terminated by the END pattern item). 4444 * @param[in] actions 4445 * Associated actions (list terminated by the END action). 4446 * @param[in] external 4447 * This flow rule is created by request external to PMD. 4448 * @param[in] flow_idx 4449 * This memory pool index to the flow. 4450 * @param[out] error 4451 * Perform verbose error reporting if not NULL. 4452 * @return 4453 * 0 on success, negative value otherwise 4454 */ 4455 static int 4456 flow_create_split_metadata(struct rte_eth_dev *dev, 4457 struct rte_flow *flow, 4458 uint64_t prefix_layers, 4459 uint32_t prefix_mark, 4460 const struct rte_flow_attr *attr, 4461 const struct rte_flow_item items[], 4462 const struct rte_flow_action actions[], 4463 bool external, uint32_t flow_idx, 4464 struct rte_flow_error *error) 4465 { 4466 struct mlx5_priv *priv = dev->data->dev_private; 4467 struct mlx5_dev_config *config = &priv->config; 4468 const struct rte_flow_action *qrss = NULL; 4469 struct rte_flow_action *ext_actions = NULL; 4470 struct mlx5_flow *dev_flow = NULL; 4471 uint32_t qrss_id = 0; 4472 int mtr_sfx = 0; 4473 size_t act_size; 4474 int actions_n; 4475 int encap_idx; 4476 int ret; 4477 4478 /* Check whether extensive metadata feature is engaged. */ 4479 if (!config->dv_flow_en || 4480 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 4481 !mlx5_flow_ext_mreg_supported(dev)) 4482 return flow_create_split_inner(dev, flow, NULL, prefix_layers, 4483 prefix_mark, attr, items, 4484 actions, external, flow_idx, 4485 error); 4486 actions_n = flow_parse_metadata_split_actions_info(actions, &qrss, 4487 &encap_idx); 4488 if (qrss) { 4489 /* Exclude hairpin flows from splitting. */ 4490 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) { 4491 const struct rte_flow_action_queue *queue; 4492 4493 queue = qrss->conf; 4494 if (mlx5_rxq_get_type(dev, queue->index) == 4495 MLX5_RXQ_TYPE_HAIRPIN) 4496 qrss = NULL; 4497 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) { 4498 const struct rte_flow_action_rss *rss; 4499 4500 rss = qrss->conf; 4501 if (mlx5_rxq_get_type(dev, rss->queue[0]) == 4502 MLX5_RXQ_TYPE_HAIRPIN) 4503 qrss = NULL; 4504 } 4505 } 4506 if (qrss) { 4507 /* Check if it is in meter suffix table. */ 4508 mtr_sfx = attr->group == (attr->transfer ? 4509 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : 4510 MLX5_FLOW_TABLE_LEVEL_SUFFIX); 4511 /* 4512 * Q/RSS action on NIC Rx should be split in order to pass by 4513 * the mreg copy table (RX_CP_TBL) and then it jumps to the 4514 * action table (RX_ACT_TBL) which has the split Q/RSS action. 4515 */ 4516 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 4517 sizeof(struct rte_flow_action_set_tag) + 4518 sizeof(struct rte_flow_action_jump); 4519 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0, 4520 SOCKET_ID_ANY); 4521 if (!ext_actions) 4522 return rte_flow_error_set(error, ENOMEM, 4523 RTE_FLOW_ERROR_TYPE_ACTION, 4524 NULL, "no memory to split " 4525 "metadata flow"); 4526 /* 4527 * If we are the suffix flow of meter, tag already exist. 4528 * Set the tag action to void. 4529 */ 4530 if (mtr_sfx) 4531 ext_actions[qrss - actions].type = 4532 RTE_FLOW_ACTION_TYPE_VOID; 4533 else 4534 ext_actions[qrss - actions].type = 4535 (enum rte_flow_action_type) 4536 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 4537 /* 4538 * Create the new actions list with removed Q/RSS action 4539 * and appended set tag and jump to register copy table 4540 * (RX_CP_TBL). We should preallocate unique tag ID here 4541 * in advance, because it is needed for set tag action. 4542 */ 4543 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions, 4544 qrss, actions_n, error); 4545 if (!mtr_sfx && !qrss_id) { 4546 ret = -rte_errno; 4547 goto exit; 4548 } 4549 } else if (attr->egress && !attr->transfer) { 4550 /* 4551 * All the actions on NIC Tx should have a metadata register 4552 * copy action to copy reg_a from WQE to reg_c[meta] 4553 */ 4554 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 4555 sizeof(struct mlx5_flow_action_copy_mreg); 4556 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0, 4557 SOCKET_ID_ANY); 4558 if (!ext_actions) 4559 return rte_flow_error_set(error, ENOMEM, 4560 RTE_FLOW_ERROR_TYPE_ACTION, 4561 NULL, "no memory to split " 4562 "metadata flow"); 4563 /* Create the action list appended with copy register. */ 4564 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions, 4565 actions_n, error, encap_idx); 4566 if (ret < 0) 4567 goto exit; 4568 } 4569 /* Add the unmodified original or prefix subflow. */ 4570 ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, 4571 prefix_mark, attr, 4572 items, ext_actions ? ext_actions : 4573 actions, external, flow_idx, error); 4574 if (ret < 0) 4575 goto exit; 4576 MLX5_ASSERT(dev_flow); 4577 if (qrss) { 4578 const struct rte_flow_attr q_attr = { 4579 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 4580 .ingress = 1, 4581 }; 4582 /* Internal PMD action to set register. */ 4583 struct mlx5_rte_flow_item_tag q_tag_spec = { 4584 .data = qrss_id, 4585 .id = REG_NON, 4586 }; 4587 struct rte_flow_item q_items[] = { 4588 { 4589 .type = (enum rte_flow_item_type) 4590 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 4591 .spec = &q_tag_spec, 4592 .last = NULL, 4593 .mask = NULL, 4594 }, 4595 { 4596 .type = RTE_FLOW_ITEM_TYPE_END, 4597 }, 4598 }; 4599 struct rte_flow_action q_actions[] = { 4600 { 4601 .type = qrss->type, 4602 .conf = qrss->conf, 4603 }, 4604 { 4605 .type = RTE_FLOW_ACTION_TYPE_END, 4606 }, 4607 }; 4608 uint64_t layers = flow_get_prefix_layer_flags(dev_flow); 4609 4610 /* 4611 * Configure the tag item only if there is no meter subflow. 4612 * Since tag is already marked in the meter suffix subflow 4613 * we can just use the meter suffix items as is. 4614 */ 4615 if (qrss_id) { 4616 /* Not meter subflow. */ 4617 MLX5_ASSERT(!mtr_sfx); 4618 /* 4619 * Put unique id in prefix flow due to it is destroyed 4620 * after suffix flow and id will be freed after there 4621 * is no actual flows with this id and identifier 4622 * reallocation becomes possible (for example, for 4623 * other flows in other threads). 4624 */ 4625 dev_flow->handle->split_flow_id = qrss_id; 4626 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, 4627 error); 4628 if (ret < 0) 4629 goto exit; 4630 q_tag_spec.id = ret; 4631 } 4632 dev_flow = NULL; 4633 /* Add suffix subflow to execute Q/RSS. */ 4634 ret = flow_create_split_inner(dev, flow, &dev_flow, layers, 0, 4635 &q_attr, mtr_sfx ? items : 4636 q_items, q_actions, 4637 external, flow_idx, error); 4638 if (ret < 0) 4639 goto exit; 4640 /* qrss ID should be freed if failed. */ 4641 qrss_id = 0; 4642 MLX5_ASSERT(dev_flow); 4643 } 4644 4645 exit: 4646 /* 4647 * We do not destroy the partially created sub_flows in case of error. 4648 * These ones are included into parent flow list and will be destroyed 4649 * by flow_drv_destroy. 4650 */ 4651 flow_qrss_free_id(dev, qrss_id); 4652 mlx5_free(ext_actions); 4653 return ret; 4654 } 4655 4656 /** 4657 * The splitting for meter feature. 4658 * 4659 * - The meter flow will be split to two flows as prefix and 4660 * suffix flow. The packets make sense only it pass the prefix 4661 * meter action. 4662 * 4663 * - Reg_C_5 is used for the packet to match betweend prefix and 4664 * suffix flow. 4665 * 4666 * @param dev 4667 * Pointer to Ethernet device. 4668 * @param[in] flow 4669 * Parent flow structure pointer. 4670 * @param[in] prefix_layers 4671 * Prefix subflow layers, may be 0. 4672 * @param[in] prefix_mark 4673 * Prefix subflow mark flag, may be 0. 4674 * @param[in] attr 4675 * Flow rule attributes. 4676 * @param[in] items 4677 * Pattern specification (list terminated by the END pattern item). 4678 * @param[in] actions 4679 * Associated actions (list terminated by the END action). 4680 * @param[in] external 4681 * This flow rule is created by request external to PMD. 4682 * @param[in] flow_idx 4683 * This memory pool index to the flow. 4684 * @param[out] error 4685 * Perform verbose error reporting if not NULL. 4686 * @return 4687 * 0 on success, negative value otherwise 4688 */ 4689 static int 4690 flow_create_split_meter(struct rte_eth_dev *dev, 4691 struct rte_flow *flow, 4692 uint64_t prefix_layers, 4693 uint32_t prefix_mark, 4694 const struct rte_flow_attr *attr, 4695 const struct rte_flow_item items[], 4696 const struct rte_flow_action actions[], 4697 bool external, uint32_t flow_idx, 4698 struct rte_flow_error *error) 4699 { 4700 struct mlx5_priv *priv = dev->data->dev_private; 4701 struct rte_flow_action *sfx_actions = NULL; 4702 struct rte_flow_action *pre_actions = NULL; 4703 struct rte_flow_item *sfx_items = NULL; 4704 struct mlx5_flow *dev_flow = NULL; 4705 struct rte_flow_attr sfx_attr = *attr; 4706 uint32_t mtr = 0; 4707 uint32_t mtr_tag_id = 0; 4708 size_t act_size; 4709 size_t item_size; 4710 int actions_n = 0; 4711 int ret; 4712 4713 if (priv->mtr_en) 4714 actions_n = flow_check_meter_action(actions, &mtr); 4715 if (mtr) { 4716 /* The five prefix actions: meter, decap, encap, tag, end. */ 4717 act_size = sizeof(struct rte_flow_action) * (actions_n + 5) + 4718 sizeof(struct mlx5_rte_flow_action_set_tag); 4719 /* tag, vlan, port id, end. */ 4720 #define METER_SUFFIX_ITEM 4 4721 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM + 4722 sizeof(struct mlx5_rte_flow_item_tag) * 2; 4723 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size), 4724 0, SOCKET_ID_ANY); 4725 if (!sfx_actions) 4726 return rte_flow_error_set(error, ENOMEM, 4727 RTE_FLOW_ERROR_TYPE_ACTION, 4728 NULL, "no memory to split " 4729 "meter flow"); 4730 sfx_items = (struct rte_flow_item *)((char *)sfx_actions + 4731 act_size); 4732 pre_actions = sfx_actions + actions_n; 4733 mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items, 4734 actions, sfx_actions, 4735 pre_actions); 4736 if (!mtr_tag_id) { 4737 ret = -rte_errno; 4738 goto exit; 4739 } 4740 /* Add the prefix subflow. */ 4741 ret = flow_create_split_inner(dev, flow, &dev_flow, 4742 prefix_layers, 0, 4743 attr, items, 4744 pre_actions, external, 4745 flow_idx, error); 4746 if (ret) { 4747 ret = -rte_errno; 4748 goto exit; 4749 } 4750 dev_flow->handle->split_flow_id = mtr_tag_id; 4751 /* Setting the sfx group atrr. */ 4752 sfx_attr.group = sfx_attr.transfer ? 4753 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : 4754 MLX5_FLOW_TABLE_LEVEL_SUFFIX; 4755 } 4756 /* Add the prefix subflow. */ 4757 ret = flow_create_split_metadata(dev, flow, dev_flow ? 4758 flow_get_prefix_layer_flags(dev_flow) : 4759 prefix_layers, dev_flow ? 4760 dev_flow->handle->mark : prefix_mark, 4761 &sfx_attr, sfx_items ? 4762 sfx_items : items, 4763 sfx_actions ? sfx_actions : actions, 4764 external, flow_idx, error); 4765 exit: 4766 if (sfx_actions) 4767 mlx5_free(sfx_actions); 4768 return ret; 4769 } 4770 4771 /** 4772 * The splitting for sample feature. 4773 * 4774 * Once Sample action is detected in the action list, the flow actions should 4775 * be split into prefix sub flow and suffix sub flow. 4776 * 4777 * The original items remain in the prefix sub flow, all actions preceding the 4778 * sample action and the sample action itself will be copied to the prefix 4779 * sub flow, the actions following the sample action will be copied to the 4780 * suffix sub flow, Queue action always be located in the suffix sub flow. 4781 * 4782 * In order to make the packet from prefix sub flow matches with suffix sub 4783 * flow, an extra tag action be added into prefix sub flow, and the suffix sub 4784 * flow uses tag item with the unique flow id. 4785 * 4786 * @param dev 4787 * Pointer to Ethernet device. 4788 * @param[in] flow 4789 * Parent flow structure pointer. 4790 * @param[in] attr 4791 * Flow rule attributes. 4792 * @param[in] items 4793 * Pattern specification (list terminated by the END pattern item). 4794 * @param[in] actions 4795 * Associated actions (list terminated by the END action). 4796 * @param[in] external 4797 * This flow rule is created by request external to PMD. 4798 * @param[in] flow_idx 4799 * This memory pool index to the flow. 4800 * @param[out] error 4801 * Perform verbose error reporting if not NULL. 4802 * @return 4803 * 0 on success, negative value otherwise 4804 */ 4805 static int 4806 flow_create_split_sample(struct rte_eth_dev *dev, 4807 struct rte_flow *flow, 4808 const struct rte_flow_attr *attr, 4809 const struct rte_flow_item items[], 4810 const struct rte_flow_action actions[], 4811 bool external, uint32_t flow_idx, 4812 struct rte_flow_error *error) 4813 { 4814 struct mlx5_priv *priv = dev->data->dev_private; 4815 struct rte_flow_action *sfx_actions = NULL; 4816 struct rte_flow_action *pre_actions = NULL; 4817 struct rte_flow_item *sfx_items = NULL; 4818 struct mlx5_flow *dev_flow = NULL; 4819 struct rte_flow_attr sfx_attr = *attr; 4820 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 4821 struct mlx5_flow_dv_sample_resource *sample_res; 4822 struct mlx5_flow_tbl_data_entry *sfx_tbl_data; 4823 struct mlx5_flow_tbl_resource *sfx_tbl; 4824 union mlx5_flow_tbl_key sfx_table_key; 4825 #endif 4826 size_t act_size; 4827 size_t item_size; 4828 uint32_t fdb_tx = 0; 4829 int32_t tag_id = 0; 4830 int actions_n = 0; 4831 int sample_action_pos; 4832 int qrss_action_pos; 4833 int ret = 0; 4834 4835 if (priv->sampler_en) 4836 actions_n = flow_check_match_action(actions, 4837 RTE_FLOW_ACTION_TYPE_SAMPLE, 4838 &sample_action_pos, &qrss_action_pos); 4839 if (actions_n) { 4840 /* The prefix actions must includes sample, tag, end. */ 4841 act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1) 4842 + sizeof(struct mlx5_rte_flow_action_set_tag); 4843 item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM + 4844 sizeof(struct mlx5_rte_flow_item_tag) * 2; 4845 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + 4846 item_size), 0, SOCKET_ID_ANY); 4847 if (!sfx_actions) 4848 return rte_flow_error_set(error, ENOMEM, 4849 RTE_FLOW_ERROR_TYPE_ACTION, 4850 NULL, "no memory to split " 4851 "sample flow"); 4852 /* The representor_id is -1 for uplink. */ 4853 fdb_tx = (attr->transfer && priv->representor_id != -1); 4854 if (!fdb_tx) 4855 sfx_items = (struct rte_flow_item *)((char *)sfx_actions 4856 + act_size); 4857 pre_actions = sfx_actions + actions_n; 4858 tag_id = flow_sample_split_prep(dev, fdb_tx, sfx_items, 4859 actions, sfx_actions, 4860 pre_actions, actions_n, 4861 sample_action_pos, 4862 qrss_action_pos, error); 4863 if (tag_id < 0 || (!fdb_tx && !tag_id)) { 4864 ret = -rte_errno; 4865 goto exit; 4866 } 4867 /* Add the prefix subflow. */ 4868 ret = flow_create_split_inner(dev, flow, &dev_flow, 0, 0, attr, 4869 items, pre_actions, external, 4870 flow_idx, error); 4871 if (ret) { 4872 ret = -rte_errno; 4873 goto exit; 4874 } 4875 dev_flow->handle->split_flow_id = tag_id; 4876 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 4877 /* Set the sfx group attr. */ 4878 sample_res = (struct mlx5_flow_dv_sample_resource *) 4879 dev_flow->dv.sample_res; 4880 sfx_tbl = (struct mlx5_flow_tbl_resource *) 4881 sample_res->normal_path_tbl; 4882 sfx_tbl_data = container_of(sfx_tbl, 4883 struct mlx5_flow_tbl_data_entry, tbl); 4884 sfx_table_key.v64 = sfx_tbl_data->entry.key; 4885 sfx_attr.group = sfx_attr.transfer ? 4886 (sfx_table_key.table_id - 1) : 4887 sfx_table_key.table_id; 4888 #endif 4889 } 4890 /* Add the suffix subflow. */ 4891 ret = flow_create_split_meter(dev, flow, dev_flow ? 4892 flow_get_prefix_layer_flags(dev_flow) : 0, 4893 dev_flow ? dev_flow->handle->mark : 0, 4894 &sfx_attr, sfx_items ? sfx_items : items, 4895 sfx_actions ? sfx_actions : actions, 4896 external, flow_idx, error); 4897 exit: 4898 if (sfx_actions) 4899 mlx5_free(sfx_actions); 4900 return ret; 4901 } 4902 4903 /** 4904 * Split the flow to subflow set. The splitters might be linked 4905 * in the chain, like this: 4906 * flow_create_split_outer() calls: 4907 * flow_create_split_meter() calls: 4908 * flow_create_split_metadata(meter_subflow_0) calls: 4909 * flow_create_split_inner(metadata_subflow_0) 4910 * flow_create_split_inner(metadata_subflow_1) 4911 * flow_create_split_inner(metadata_subflow_2) 4912 * flow_create_split_metadata(meter_subflow_1) calls: 4913 * flow_create_split_inner(metadata_subflow_0) 4914 * flow_create_split_inner(metadata_subflow_1) 4915 * flow_create_split_inner(metadata_subflow_2) 4916 * 4917 * This provide flexible way to add new levels of flow splitting. 4918 * The all of successfully created subflows are included to the 4919 * parent flow dev_flow list. 4920 * 4921 * @param dev 4922 * Pointer to Ethernet device. 4923 * @param[in] flow 4924 * Parent flow structure pointer. 4925 * @param[in] attr 4926 * Flow rule attributes. 4927 * @param[in] items 4928 * Pattern specification (list terminated by the END pattern item). 4929 * @param[in] actions 4930 * Associated actions (list terminated by the END action). 4931 * @param[in] external 4932 * This flow rule is created by request external to PMD. 4933 * @param[in] flow_idx 4934 * This memory pool index to the flow. 4935 * @param[out] error 4936 * Perform verbose error reporting if not NULL. 4937 * @return 4938 * 0 on success, negative value otherwise 4939 */ 4940 static int 4941 flow_create_split_outer(struct rte_eth_dev *dev, 4942 struct rte_flow *flow, 4943 const struct rte_flow_attr *attr, 4944 const struct rte_flow_item items[], 4945 const struct rte_flow_action actions[], 4946 bool external, uint32_t flow_idx, 4947 struct rte_flow_error *error) 4948 { 4949 int ret; 4950 4951 ret = flow_create_split_sample(dev, flow, attr, items, 4952 actions, external, flow_idx, error); 4953 MLX5_ASSERT(ret <= 0); 4954 return ret; 4955 } 4956 4957 /** 4958 * Create a flow and add it to @p list. 4959 * 4960 * @param dev 4961 * Pointer to Ethernet device. 4962 * @param list 4963 * Pointer to a TAILQ flow list. If this parameter NULL, 4964 * no list insertion occurred, flow is just created, 4965 * this is caller's responsibility to track the 4966 * created flow. 4967 * @param[in] attr 4968 * Flow rule attributes. 4969 * @param[in] items 4970 * Pattern specification (list terminated by the END pattern item). 4971 * @param[in] actions 4972 * Associated actions (list terminated by the END action). 4973 * @param[in] external 4974 * This flow rule is created by request external to PMD. 4975 * @param[out] error 4976 * Perform verbose error reporting if not NULL. 4977 * 4978 * @return 4979 * A flow index on success, 0 otherwise and rte_errno is set. 4980 */ 4981 static uint32_t 4982 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 4983 const struct rte_flow_attr *attr, 4984 const struct rte_flow_item items[], 4985 const struct rte_flow_action actions[], 4986 bool external, struct rte_flow_error *error) 4987 { 4988 struct mlx5_priv *priv = dev->data->dev_private; 4989 struct rte_flow *flow = NULL; 4990 struct mlx5_flow *dev_flow; 4991 const struct rte_flow_action_rss *rss; 4992 union { 4993 struct mlx5_flow_expand_rss buf; 4994 uint8_t buffer[2048]; 4995 } expand_buffer; 4996 union { 4997 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 4998 uint8_t buffer[2048]; 4999 } actions_rx; 5000 union { 5001 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 5002 uint8_t buffer[2048]; 5003 } actions_hairpin_tx; 5004 union { 5005 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS]; 5006 uint8_t buffer[2048]; 5007 } items_tx; 5008 struct mlx5_flow_expand_rss *buf = &expand_buffer.buf; 5009 struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *) 5010 priv->rss_desc)[!!priv->flow_idx]; 5011 const struct rte_flow_action *p_actions_rx = actions; 5012 uint32_t i; 5013 uint32_t idx = 0; 5014 int hairpin_flow; 5015 uint32_t hairpin_id = 0; 5016 struct rte_flow_attr attr_tx = { .priority = 0 }; 5017 int ret; 5018 5019 hairpin_flow = flow_check_hairpin_split(dev, attr, actions); 5020 ret = flow_drv_validate(dev, attr, items, p_actions_rx, 5021 external, hairpin_flow, error); 5022 if (ret < 0) 5023 return 0; 5024 if (hairpin_flow > 0) { 5025 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) { 5026 rte_errno = EINVAL; 5027 return 0; 5028 } 5029 flow_hairpin_split(dev, actions, actions_rx.actions, 5030 actions_hairpin_tx.actions, items_tx.items, 5031 &hairpin_id); 5032 p_actions_rx = actions_rx.actions; 5033 } 5034 flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx); 5035 if (!flow) { 5036 rte_errno = ENOMEM; 5037 goto error_before_flow; 5038 } 5039 flow->drv_type = flow_get_drv_type(dev, attr); 5040 if (hairpin_id != 0) 5041 flow->hairpin_flow_id = hairpin_id; 5042 MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN && 5043 flow->drv_type < MLX5_FLOW_TYPE_MAX); 5044 memset(rss_desc, 0, sizeof(*rss_desc)); 5045 rss = flow_get_rss_action(p_actions_rx); 5046 if (rss) { 5047 /* 5048 * The following information is required by 5049 * mlx5_flow_hashfields_adjust() in advance. 5050 */ 5051 rss_desc->level = rss->level; 5052 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */ 5053 rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types; 5054 } 5055 flow->dev_handles = 0; 5056 if (rss && rss->types) { 5057 unsigned int graph_root; 5058 5059 graph_root = find_graph_root(items, rss->level); 5060 ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer), 5061 items, rss->types, 5062 mlx5_support_expansion, graph_root); 5063 MLX5_ASSERT(ret > 0 && 5064 (unsigned int)ret < sizeof(expand_buffer.buffer)); 5065 } else { 5066 buf->entries = 1; 5067 buf->entry[0].pattern = (void *)(uintptr_t)items; 5068 } 5069 /* 5070 * Record the start index when there is a nested call. All sub-flows 5071 * need to be translated before another calling. 5072 * No need to use ping-pong buffer to save memory here. 5073 */ 5074 if (priv->flow_idx) { 5075 MLX5_ASSERT(!priv->flow_nested_idx); 5076 priv->flow_nested_idx = priv->flow_idx; 5077 } 5078 for (i = 0; i < buf->entries; ++i) { 5079 /* 5080 * The splitter may create multiple dev_flows, 5081 * depending on configuration. In the simplest 5082 * case it just creates unmodified original flow. 5083 */ 5084 ret = flow_create_split_outer(dev, flow, attr, 5085 buf->entry[i].pattern, 5086 p_actions_rx, external, idx, 5087 error); 5088 if (ret < 0) 5089 goto error; 5090 } 5091 /* Create the tx flow. */ 5092 if (hairpin_flow) { 5093 attr_tx.group = MLX5_HAIRPIN_TX_TABLE; 5094 attr_tx.ingress = 0; 5095 attr_tx.egress = 1; 5096 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items, 5097 actions_hairpin_tx.actions, 5098 idx, error); 5099 if (!dev_flow) 5100 goto error; 5101 dev_flow->flow = flow; 5102 dev_flow->external = 0; 5103 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 5104 dev_flow->handle, next); 5105 ret = flow_drv_translate(dev, dev_flow, &attr_tx, 5106 items_tx.items, 5107 actions_hairpin_tx.actions, error); 5108 if (ret < 0) 5109 goto error; 5110 } 5111 /* 5112 * Update the metadata register copy table. If extensive 5113 * metadata feature is enabled and registers are supported 5114 * we might create the extra rte_flow for each unique 5115 * MARK/FLAG action ID. 5116 * 5117 * The table is updated for ingress Flows only, because 5118 * the egress Flows belong to the different device and 5119 * copy table should be updated in peer NIC Rx domain. 5120 */ 5121 if (attr->ingress && 5122 (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) { 5123 ret = flow_mreg_update_copy_table(dev, flow, actions, error); 5124 if (ret) 5125 goto error; 5126 } 5127 /* 5128 * If the flow is external (from application) OR device is started, then 5129 * the flow will be applied immediately. 5130 */ 5131 if (external || dev->data->dev_started) { 5132 ret = flow_drv_apply(dev, flow, error); 5133 if (ret < 0) 5134 goto error; 5135 } 5136 if (list) 5137 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx, 5138 flow, next); 5139 flow_rxq_flags_set(dev, flow); 5140 /* Nested flow creation index recovery. */ 5141 priv->flow_idx = priv->flow_nested_idx; 5142 if (priv->flow_nested_idx) 5143 priv->flow_nested_idx = 0; 5144 return idx; 5145 error: 5146 MLX5_ASSERT(flow); 5147 ret = rte_errno; /* Save rte_errno before cleanup. */ 5148 flow_mreg_del_copy_action(dev, flow); 5149 flow_drv_destroy(dev, flow); 5150 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx); 5151 rte_errno = ret; /* Restore rte_errno. */ 5152 error_before_flow: 5153 ret = rte_errno; 5154 if (hairpin_id) 5155 mlx5_flow_id_release(priv->sh->flow_id_pool, 5156 hairpin_id); 5157 rte_errno = ret; 5158 priv->flow_idx = priv->flow_nested_idx; 5159 if (priv->flow_nested_idx) 5160 priv->flow_nested_idx = 0; 5161 return 0; 5162 } 5163 5164 /** 5165 * Create a dedicated flow rule on e-switch table 0 (root table), to direct all 5166 * incoming packets to table 1. 5167 * 5168 * Other flow rules, requested for group n, will be created in 5169 * e-switch table n+1. 5170 * Jump action to e-switch group n will be created to group n+1. 5171 * 5172 * Used when working in switchdev mode, to utilise advantages of table 1 5173 * and above. 5174 * 5175 * @param dev 5176 * Pointer to Ethernet device. 5177 * 5178 * @return 5179 * Pointer to flow on success, NULL otherwise and rte_errno is set. 5180 */ 5181 struct rte_flow * 5182 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev) 5183 { 5184 const struct rte_flow_attr attr = { 5185 .group = 0, 5186 .priority = 0, 5187 .ingress = 1, 5188 .egress = 0, 5189 .transfer = 1, 5190 }; 5191 const struct rte_flow_item pattern = { 5192 .type = RTE_FLOW_ITEM_TYPE_END, 5193 }; 5194 struct rte_flow_action_jump jump = { 5195 .group = 1, 5196 }; 5197 const struct rte_flow_action actions[] = { 5198 { 5199 .type = RTE_FLOW_ACTION_TYPE_JUMP, 5200 .conf = &jump, 5201 }, 5202 { 5203 .type = RTE_FLOW_ACTION_TYPE_END, 5204 }, 5205 }; 5206 struct mlx5_priv *priv = dev->data->dev_private; 5207 struct rte_flow_error error; 5208 5209 return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows, 5210 &attr, &pattern, 5211 actions, false, &error); 5212 } 5213 5214 /** 5215 * Validate a flow supported by the NIC. 5216 * 5217 * @see rte_flow_validate() 5218 * @see rte_flow_ops 5219 */ 5220 int 5221 mlx5_flow_validate(struct rte_eth_dev *dev, 5222 const struct rte_flow_attr *attr, 5223 const struct rte_flow_item items[], 5224 const struct rte_flow_action actions[], 5225 struct rte_flow_error *error) 5226 { 5227 int hairpin_flow; 5228 5229 hairpin_flow = flow_check_hairpin_split(dev, attr, actions); 5230 return flow_drv_validate(dev, attr, items, actions, 5231 true, hairpin_flow, error); 5232 } 5233 5234 /** 5235 * Create a flow. 5236 * 5237 * @see rte_flow_create() 5238 * @see rte_flow_ops 5239 */ 5240 struct rte_flow * 5241 mlx5_flow_create(struct rte_eth_dev *dev, 5242 const struct rte_flow_attr *attr, 5243 const struct rte_flow_item items[], 5244 const struct rte_flow_action actions[], 5245 struct rte_flow_error *error) 5246 { 5247 struct mlx5_priv *priv = dev->data->dev_private; 5248 5249 /* 5250 * If the device is not started yet, it is not allowed to created a 5251 * flow from application. PMD default flows and traffic control flows 5252 * are not affected. 5253 */ 5254 if (unlikely(!dev->data->dev_started)) { 5255 DRV_LOG(DEBUG, "port %u is not started when " 5256 "inserting a flow", dev->data->port_id); 5257 rte_flow_error_set(error, ENODEV, 5258 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5259 NULL, 5260 "port not started"); 5261 return NULL; 5262 } 5263 return (void *)(uintptr_t)flow_list_create(dev, &priv->flows, 5264 attr, items, actions, true, error); 5265 } 5266 5267 /** 5268 * Destroy a flow in a list. 5269 * 5270 * @param dev 5271 * Pointer to Ethernet device. 5272 * @param list 5273 * Pointer to the Indexed flow list. If this parameter NULL, 5274 * there is no flow removal from the list. Be noted that as 5275 * flow is add to the indexed list, memory of the indexed 5276 * list points to maybe changed as flow destroyed. 5277 * @param[in] flow_idx 5278 * Index of flow to destroy. 5279 */ 5280 static void 5281 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 5282 uint32_t flow_idx) 5283 { 5284 struct mlx5_priv *priv = dev->data->dev_private; 5285 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5286 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 5287 [MLX5_IPOOL_RTE_FLOW], flow_idx); 5288 5289 if (!flow) 5290 return; 5291 /* 5292 * Update RX queue flags only if port is started, otherwise it is 5293 * already clean. 5294 */ 5295 if (dev->data->dev_started) 5296 flow_rxq_flags_trim(dev, flow); 5297 if (flow->hairpin_flow_id) 5298 mlx5_flow_id_release(priv->sh->flow_id_pool, 5299 flow->hairpin_flow_id); 5300 flow_drv_destroy(dev, flow); 5301 if (list) 5302 ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, 5303 flow_idx, flow, next); 5304 flow_mreg_del_copy_action(dev, flow); 5305 if (flow->fdir) { 5306 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 5307 if (priv_fdir_flow->rix_flow == flow_idx) 5308 break; 5309 } 5310 if (priv_fdir_flow) { 5311 LIST_REMOVE(priv_fdir_flow, next); 5312 mlx5_free(priv_fdir_flow->fdir); 5313 mlx5_free(priv_fdir_flow); 5314 } 5315 } 5316 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 5317 } 5318 5319 /** 5320 * Destroy all flows. 5321 * 5322 * @param dev 5323 * Pointer to Ethernet device. 5324 * @param list 5325 * Pointer to the Indexed flow list. 5326 * @param active 5327 * If flushing is called avtively. 5328 */ 5329 void 5330 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active) 5331 { 5332 uint32_t num_flushed = 0; 5333 5334 while (*list) { 5335 flow_list_destroy(dev, list, *list); 5336 num_flushed++; 5337 } 5338 if (active) { 5339 DRV_LOG(INFO, "port %u: %u flows flushed before stopping", 5340 dev->data->port_id, num_flushed); 5341 } 5342 } 5343 5344 /** 5345 * Remove all flows. 5346 * 5347 * @param dev 5348 * Pointer to Ethernet device. 5349 * @param list 5350 * Pointer to the Indexed flow list. 5351 */ 5352 void 5353 mlx5_flow_stop(struct rte_eth_dev *dev, uint32_t *list) 5354 { 5355 struct mlx5_priv *priv = dev->data->dev_private; 5356 struct rte_flow *flow = NULL; 5357 uint32_t idx; 5358 5359 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx, 5360 flow, next) { 5361 flow_drv_remove(dev, flow); 5362 flow_mreg_stop_copy_action(dev, flow); 5363 } 5364 flow_mreg_del_default_copy_action(dev); 5365 flow_rxq_flags_clear(dev); 5366 } 5367 5368 /** 5369 * Add all flows. 5370 * 5371 * @param dev 5372 * Pointer to Ethernet device. 5373 * @param list 5374 * Pointer to the Indexed flow list. 5375 * 5376 * @return 5377 * 0 on success, a negative errno value otherwise and rte_errno is set. 5378 */ 5379 int 5380 mlx5_flow_start(struct rte_eth_dev *dev, uint32_t *list) 5381 { 5382 struct mlx5_priv *priv = dev->data->dev_private; 5383 struct rte_flow *flow = NULL; 5384 struct rte_flow_error error; 5385 uint32_t idx; 5386 int ret = 0; 5387 5388 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 5389 ret = flow_mreg_add_default_copy_action(dev, &error); 5390 if (ret < 0) 5391 return -rte_errno; 5392 /* Apply Flows created by application. */ 5393 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx, 5394 flow, next) { 5395 ret = flow_mreg_start_copy_action(dev, flow); 5396 if (ret < 0) 5397 goto error; 5398 ret = flow_drv_apply(dev, flow, &error); 5399 if (ret < 0) 5400 goto error; 5401 flow_rxq_flags_set(dev, flow); 5402 } 5403 return 0; 5404 error: 5405 ret = rte_errno; /* Save rte_errno before cleanup. */ 5406 mlx5_flow_stop(dev, list); 5407 rte_errno = ret; /* Restore rte_errno. */ 5408 return -rte_errno; 5409 } 5410 5411 /** 5412 * Stop all default actions for flows. 5413 * 5414 * @param dev 5415 * Pointer to Ethernet device. 5416 */ 5417 void 5418 mlx5_flow_stop_default(struct rte_eth_dev *dev) 5419 { 5420 flow_mreg_del_default_copy_action(dev); 5421 flow_rxq_flags_clear(dev); 5422 } 5423 5424 /** 5425 * Start all default actions for flows. 5426 * 5427 * @param dev 5428 * Pointer to Ethernet device. 5429 * @return 5430 * 0 on success, a negative errno value otherwise and rte_errno is set. 5431 */ 5432 int 5433 mlx5_flow_start_default(struct rte_eth_dev *dev) 5434 { 5435 struct rte_flow_error error; 5436 5437 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 5438 return flow_mreg_add_default_copy_action(dev, &error); 5439 } 5440 5441 /** 5442 * Allocate intermediate resources for flow creation. 5443 * 5444 * @param dev 5445 * Pointer to Ethernet device. 5446 */ 5447 void 5448 mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev) 5449 { 5450 struct mlx5_priv *priv = dev->data->dev_private; 5451 5452 if (!priv->inter_flows) { 5453 priv->inter_flows = mlx5_malloc(MLX5_MEM_ZERO, 5454 MLX5_NUM_MAX_DEV_FLOWS * 5455 sizeof(struct mlx5_flow) + 5456 (sizeof(struct mlx5_flow_rss_desc) + 5457 sizeof(uint16_t) * UINT16_MAX) * 2, 0, 5458 SOCKET_ID_ANY); 5459 if (!priv->inter_flows) { 5460 DRV_LOG(ERR, "can't allocate intermediate memory."); 5461 return; 5462 } 5463 } 5464 priv->rss_desc = &((struct mlx5_flow *)priv->inter_flows) 5465 [MLX5_NUM_MAX_DEV_FLOWS]; 5466 /* Reset the index. */ 5467 priv->flow_idx = 0; 5468 priv->flow_nested_idx = 0; 5469 } 5470 5471 /** 5472 * Free intermediate resources for flows. 5473 * 5474 * @param dev 5475 * Pointer to Ethernet device. 5476 */ 5477 void 5478 mlx5_flow_free_intermediate(struct rte_eth_dev *dev) 5479 { 5480 struct mlx5_priv *priv = dev->data->dev_private; 5481 5482 mlx5_free(priv->inter_flows); 5483 priv->inter_flows = NULL; 5484 } 5485 5486 /** 5487 * Verify the flow list is empty 5488 * 5489 * @param dev 5490 * Pointer to Ethernet device. 5491 * 5492 * @return the number of flows not released. 5493 */ 5494 int 5495 mlx5_flow_verify(struct rte_eth_dev *dev) 5496 { 5497 struct mlx5_priv *priv = dev->data->dev_private; 5498 struct rte_flow *flow; 5499 uint32_t idx; 5500 int ret = 0; 5501 5502 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx, 5503 flow, next) { 5504 DRV_LOG(DEBUG, "port %u flow %p still referenced", 5505 dev->data->port_id, (void *)flow); 5506 ++ret; 5507 } 5508 return ret; 5509 } 5510 5511 /** 5512 * Enable default hairpin egress flow. 5513 * 5514 * @param dev 5515 * Pointer to Ethernet device. 5516 * @param queue 5517 * The queue index. 5518 * 5519 * @return 5520 * 0 on success, a negative errno value otherwise and rte_errno is set. 5521 */ 5522 int 5523 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev, 5524 uint32_t queue) 5525 { 5526 struct mlx5_priv *priv = dev->data->dev_private; 5527 const struct rte_flow_attr attr = { 5528 .egress = 1, 5529 .priority = 0, 5530 }; 5531 struct mlx5_rte_flow_item_tx_queue queue_spec = { 5532 .queue = queue, 5533 }; 5534 struct mlx5_rte_flow_item_tx_queue queue_mask = { 5535 .queue = UINT32_MAX, 5536 }; 5537 struct rte_flow_item items[] = { 5538 { 5539 .type = (enum rte_flow_item_type) 5540 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE, 5541 .spec = &queue_spec, 5542 .last = NULL, 5543 .mask = &queue_mask, 5544 }, 5545 { 5546 .type = RTE_FLOW_ITEM_TYPE_END, 5547 }, 5548 }; 5549 struct rte_flow_action_jump jump = { 5550 .group = MLX5_HAIRPIN_TX_TABLE, 5551 }; 5552 struct rte_flow_action actions[2]; 5553 uint32_t flow_idx; 5554 struct rte_flow_error error; 5555 5556 actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP; 5557 actions[0].conf = &jump; 5558 actions[1].type = RTE_FLOW_ACTION_TYPE_END; 5559 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5560 &attr, items, actions, false, &error); 5561 if (!flow_idx) { 5562 DRV_LOG(DEBUG, 5563 "Failed to create ctrl flow: rte_errno(%d)," 5564 " type(%d), message(%s)", 5565 rte_errno, error.type, 5566 error.message ? error.message : " (no stated reason)"); 5567 return -rte_errno; 5568 } 5569 return 0; 5570 } 5571 5572 /** 5573 * Enable a control flow configured from the control plane. 5574 * 5575 * @param dev 5576 * Pointer to Ethernet device. 5577 * @param eth_spec 5578 * An Ethernet flow spec to apply. 5579 * @param eth_mask 5580 * An Ethernet flow mask to apply. 5581 * @param vlan_spec 5582 * A VLAN flow spec to apply. 5583 * @param vlan_mask 5584 * A VLAN flow mask to apply. 5585 * 5586 * @return 5587 * 0 on success, a negative errno value otherwise and rte_errno is set. 5588 */ 5589 int 5590 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, 5591 struct rte_flow_item_eth *eth_spec, 5592 struct rte_flow_item_eth *eth_mask, 5593 struct rte_flow_item_vlan *vlan_spec, 5594 struct rte_flow_item_vlan *vlan_mask) 5595 { 5596 struct mlx5_priv *priv = dev->data->dev_private; 5597 const struct rte_flow_attr attr = { 5598 .ingress = 1, 5599 .priority = MLX5_FLOW_PRIO_RSVD, 5600 }; 5601 struct rte_flow_item items[] = { 5602 { 5603 .type = RTE_FLOW_ITEM_TYPE_ETH, 5604 .spec = eth_spec, 5605 .last = NULL, 5606 .mask = eth_mask, 5607 }, 5608 { 5609 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN : 5610 RTE_FLOW_ITEM_TYPE_END, 5611 .spec = vlan_spec, 5612 .last = NULL, 5613 .mask = vlan_mask, 5614 }, 5615 { 5616 .type = RTE_FLOW_ITEM_TYPE_END, 5617 }, 5618 }; 5619 uint16_t queue[priv->reta_idx_n]; 5620 struct rte_flow_action_rss action_rss = { 5621 .func = RTE_ETH_HASH_FUNCTION_DEFAULT, 5622 .level = 0, 5623 .types = priv->rss_conf.rss_hf, 5624 .key_len = priv->rss_conf.rss_key_len, 5625 .queue_num = priv->reta_idx_n, 5626 .key = priv->rss_conf.rss_key, 5627 .queue = queue, 5628 }; 5629 struct rte_flow_action actions[] = { 5630 { 5631 .type = RTE_FLOW_ACTION_TYPE_RSS, 5632 .conf = &action_rss, 5633 }, 5634 { 5635 .type = RTE_FLOW_ACTION_TYPE_END, 5636 }, 5637 }; 5638 uint32_t flow_idx; 5639 struct rte_flow_error error; 5640 unsigned int i; 5641 5642 if (!priv->reta_idx_n || !priv->rxqs_n) { 5643 return 0; 5644 } 5645 if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)) 5646 action_rss.types = 0; 5647 for (i = 0; i != priv->reta_idx_n; ++i) 5648 queue[i] = (*priv->reta_idx)[i]; 5649 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5650 &attr, items, actions, false, &error); 5651 if (!flow_idx) 5652 return -rte_errno; 5653 return 0; 5654 } 5655 5656 /** 5657 * Enable a flow control configured from the control plane. 5658 * 5659 * @param dev 5660 * Pointer to Ethernet device. 5661 * @param eth_spec 5662 * An Ethernet flow spec to apply. 5663 * @param eth_mask 5664 * An Ethernet flow mask to apply. 5665 * 5666 * @return 5667 * 0 on success, a negative errno value otherwise and rte_errno is set. 5668 */ 5669 int 5670 mlx5_ctrl_flow(struct rte_eth_dev *dev, 5671 struct rte_flow_item_eth *eth_spec, 5672 struct rte_flow_item_eth *eth_mask) 5673 { 5674 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL); 5675 } 5676 5677 /** 5678 * Create default miss flow rule matching lacp traffic 5679 * 5680 * @param dev 5681 * Pointer to Ethernet device. 5682 * @param eth_spec 5683 * An Ethernet flow spec to apply. 5684 * 5685 * @return 5686 * 0 on success, a negative errno value otherwise and rte_errno is set. 5687 */ 5688 int 5689 mlx5_flow_lacp_miss(struct rte_eth_dev *dev) 5690 { 5691 struct mlx5_priv *priv = dev->data->dev_private; 5692 /* 5693 * The LACP matching is done by only using ether type since using 5694 * a multicast dst mac causes kernel to give low priority to this flow. 5695 */ 5696 static const struct rte_flow_item_eth lacp_spec = { 5697 .type = RTE_BE16(0x8809), 5698 }; 5699 static const struct rte_flow_item_eth lacp_mask = { 5700 .type = 0xffff, 5701 }; 5702 const struct rte_flow_attr attr = { 5703 .ingress = 1, 5704 }; 5705 struct rte_flow_item items[] = { 5706 { 5707 .type = RTE_FLOW_ITEM_TYPE_ETH, 5708 .spec = &lacp_spec, 5709 .mask = &lacp_mask, 5710 }, 5711 { 5712 .type = RTE_FLOW_ITEM_TYPE_END, 5713 }, 5714 }; 5715 struct rte_flow_action actions[] = { 5716 { 5717 .type = (enum rte_flow_action_type) 5718 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS, 5719 }, 5720 { 5721 .type = RTE_FLOW_ACTION_TYPE_END, 5722 }, 5723 }; 5724 struct rte_flow_error error; 5725 uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5726 &attr, items, actions, false, &error); 5727 5728 if (!flow_idx) 5729 return -rte_errno; 5730 return 0; 5731 } 5732 5733 /** 5734 * Destroy a flow. 5735 * 5736 * @see rte_flow_destroy() 5737 * @see rte_flow_ops 5738 */ 5739 int 5740 mlx5_flow_destroy(struct rte_eth_dev *dev, 5741 struct rte_flow *flow, 5742 struct rte_flow_error *error __rte_unused) 5743 { 5744 struct mlx5_priv *priv = dev->data->dev_private; 5745 5746 flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow); 5747 return 0; 5748 } 5749 5750 /** 5751 * Destroy all flows. 5752 * 5753 * @see rte_flow_flush() 5754 * @see rte_flow_ops 5755 */ 5756 int 5757 mlx5_flow_flush(struct rte_eth_dev *dev, 5758 struct rte_flow_error *error __rte_unused) 5759 { 5760 struct mlx5_priv *priv = dev->data->dev_private; 5761 5762 mlx5_flow_list_flush(dev, &priv->flows, false); 5763 return 0; 5764 } 5765 5766 /** 5767 * Isolated mode. 5768 * 5769 * @see rte_flow_isolate() 5770 * @see rte_flow_ops 5771 */ 5772 int 5773 mlx5_flow_isolate(struct rte_eth_dev *dev, 5774 int enable, 5775 struct rte_flow_error *error) 5776 { 5777 struct mlx5_priv *priv = dev->data->dev_private; 5778 5779 if (dev->data->dev_started) { 5780 rte_flow_error_set(error, EBUSY, 5781 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5782 NULL, 5783 "port must be stopped first"); 5784 return -rte_errno; 5785 } 5786 priv->isolated = !!enable; 5787 if (enable) 5788 dev->dev_ops = &mlx5_os_dev_ops_isolate; 5789 else 5790 dev->dev_ops = &mlx5_os_dev_ops; 5791 5792 dev->rx_descriptor_status = mlx5_rx_descriptor_status; 5793 dev->tx_descriptor_status = mlx5_tx_descriptor_status; 5794 5795 return 0; 5796 } 5797 5798 /** 5799 * Query a flow. 5800 * 5801 * @see rte_flow_query() 5802 * @see rte_flow_ops 5803 */ 5804 static int 5805 flow_drv_query(struct rte_eth_dev *dev, 5806 uint32_t flow_idx, 5807 const struct rte_flow_action *actions, 5808 void *data, 5809 struct rte_flow_error *error) 5810 { 5811 struct mlx5_priv *priv = dev->data->dev_private; 5812 const struct mlx5_flow_driver_ops *fops; 5813 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 5814 [MLX5_IPOOL_RTE_FLOW], 5815 flow_idx); 5816 enum mlx5_flow_drv_type ftype; 5817 5818 if (!flow) { 5819 return rte_flow_error_set(error, ENOENT, 5820 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5821 NULL, 5822 "invalid flow handle"); 5823 } 5824 ftype = flow->drv_type; 5825 MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX); 5826 fops = flow_get_drv_ops(ftype); 5827 5828 return fops->query(dev, flow, actions, data, error); 5829 } 5830 5831 /** 5832 * Query a flow. 5833 * 5834 * @see rte_flow_query() 5835 * @see rte_flow_ops 5836 */ 5837 int 5838 mlx5_flow_query(struct rte_eth_dev *dev, 5839 struct rte_flow *flow, 5840 const struct rte_flow_action *actions, 5841 void *data, 5842 struct rte_flow_error *error) 5843 { 5844 int ret; 5845 5846 ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data, 5847 error); 5848 if (ret < 0) 5849 return ret; 5850 return 0; 5851 } 5852 5853 /** 5854 * Convert a flow director filter to a generic flow. 5855 * 5856 * @param dev 5857 * Pointer to Ethernet device. 5858 * @param fdir_filter 5859 * Flow director filter to add. 5860 * @param attributes 5861 * Generic flow parameters structure. 5862 * 5863 * @return 5864 * 0 on success, a negative errno value otherwise and rte_errno is set. 5865 */ 5866 static int 5867 flow_fdir_filter_convert(struct rte_eth_dev *dev, 5868 const struct rte_eth_fdir_filter *fdir_filter, 5869 struct mlx5_fdir *attributes) 5870 { 5871 struct mlx5_priv *priv = dev->data->dev_private; 5872 const struct rte_eth_fdir_input *input = &fdir_filter->input; 5873 const struct rte_eth_fdir_masks *mask = 5874 &dev->data->dev_conf.fdir_conf.mask; 5875 5876 /* Validate queue number. */ 5877 if (fdir_filter->action.rx_queue >= priv->rxqs_n) { 5878 DRV_LOG(ERR, "port %u invalid queue number %d", 5879 dev->data->port_id, fdir_filter->action.rx_queue); 5880 rte_errno = EINVAL; 5881 return -rte_errno; 5882 } 5883 attributes->attr.ingress = 1; 5884 attributes->items[0] = (struct rte_flow_item) { 5885 .type = RTE_FLOW_ITEM_TYPE_ETH, 5886 .spec = &attributes->l2, 5887 .mask = &attributes->l2_mask, 5888 }; 5889 switch (fdir_filter->action.behavior) { 5890 case RTE_ETH_FDIR_ACCEPT: 5891 attributes->actions[0] = (struct rte_flow_action){ 5892 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 5893 .conf = &attributes->queue, 5894 }; 5895 break; 5896 case RTE_ETH_FDIR_REJECT: 5897 attributes->actions[0] = (struct rte_flow_action){ 5898 .type = RTE_FLOW_ACTION_TYPE_DROP, 5899 }; 5900 break; 5901 default: 5902 DRV_LOG(ERR, "port %u invalid behavior %d", 5903 dev->data->port_id, 5904 fdir_filter->action.behavior); 5905 rte_errno = ENOTSUP; 5906 return -rte_errno; 5907 } 5908 attributes->queue.index = fdir_filter->action.rx_queue; 5909 /* Handle L3. */ 5910 switch (fdir_filter->input.flow_type) { 5911 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 5912 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 5913 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 5914 attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){ 5915 .src_addr = input->flow.ip4_flow.src_ip, 5916 .dst_addr = input->flow.ip4_flow.dst_ip, 5917 .time_to_live = input->flow.ip4_flow.ttl, 5918 .type_of_service = input->flow.ip4_flow.tos, 5919 }; 5920 attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){ 5921 .src_addr = mask->ipv4_mask.src_ip, 5922 .dst_addr = mask->ipv4_mask.dst_ip, 5923 .time_to_live = mask->ipv4_mask.ttl, 5924 .type_of_service = mask->ipv4_mask.tos, 5925 .next_proto_id = mask->ipv4_mask.proto, 5926 }; 5927 attributes->items[1] = (struct rte_flow_item){ 5928 .type = RTE_FLOW_ITEM_TYPE_IPV4, 5929 .spec = &attributes->l3, 5930 .mask = &attributes->l3_mask, 5931 }; 5932 break; 5933 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 5934 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 5935 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 5936 attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){ 5937 .hop_limits = input->flow.ipv6_flow.hop_limits, 5938 .proto = input->flow.ipv6_flow.proto, 5939 }; 5940 5941 memcpy(attributes->l3.ipv6.hdr.src_addr, 5942 input->flow.ipv6_flow.src_ip, 5943 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 5944 memcpy(attributes->l3.ipv6.hdr.dst_addr, 5945 input->flow.ipv6_flow.dst_ip, 5946 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 5947 memcpy(attributes->l3_mask.ipv6.hdr.src_addr, 5948 mask->ipv6_mask.src_ip, 5949 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 5950 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr, 5951 mask->ipv6_mask.dst_ip, 5952 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 5953 attributes->items[1] = (struct rte_flow_item){ 5954 .type = RTE_FLOW_ITEM_TYPE_IPV6, 5955 .spec = &attributes->l3, 5956 .mask = &attributes->l3_mask, 5957 }; 5958 break; 5959 default: 5960 DRV_LOG(ERR, "port %u invalid flow type%d", 5961 dev->data->port_id, fdir_filter->input.flow_type); 5962 rte_errno = ENOTSUP; 5963 return -rte_errno; 5964 } 5965 /* Handle L4. */ 5966 switch (fdir_filter->input.flow_type) { 5967 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 5968 attributes->l4.udp.hdr = (struct rte_udp_hdr){ 5969 .src_port = input->flow.udp4_flow.src_port, 5970 .dst_port = input->flow.udp4_flow.dst_port, 5971 }; 5972 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){ 5973 .src_port = mask->src_port_mask, 5974 .dst_port = mask->dst_port_mask, 5975 }; 5976 attributes->items[2] = (struct rte_flow_item){ 5977 .type = RTE_FLOW_ITEM_TYPE_UDP, 5978 .spec = &attributes->l4, 5979 .mask = &attributes->l4_mask, 5980 }; 5981 break; 5982 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 5983 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){ 5984 .src_port = input->flow.tcp4_flow.src_port, 5985 .dst_port = input->flow.tcp4_flow.dst_port, 5986 }; 5987 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){ 5988 .src_port = mask->src_port_mask, 5989 .dst_port = mask->dst_port_mask, 5990 }; 5991 attributes->items[2] = (struct rte_flow_item){ 5992 .type = RTE_FLOW_ITEM_TYPE_TCP, 5993 .spec = &attributes->l4, 5994 .mask = &attributes->l4_mask, 5995 }; 5996 break; 5997 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 5998 attributes->l4.udp.hdr = (struct rte_udp_hdr){ 5999 .src_port = input->flow.udp6_flow.src_port, 6000 .dst_port = input->flow.udp6_flow.dst_port, 6001 }; 6002 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){ 6003 .src_port = mask->src_port_mask, 6004 .dst_port = mask->dst_port_mask, 6005 }; 6006 attributes->items[2] = (struct rte_flow_item){ 6007 .type = RTE_FLOW_ITEM_TYPE_UDP, 6008 .spec = &attributes->l4, 6009 .mask = &attributes->l4_mask, 6010 }; 6011 break; 6012 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 6013 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){ 6014 .src_port = input->flow.tcp6_flow.src_port, 6015 .dst_port = input->flow.tcp6_flow.dst_port, 6016 }; 6017 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){ 6018 .src_port = mask->src_port_mask, 6019 .dst_port = mask->dst_port_mask, 6020 }; 6021 attributes->items[2] = (struct rte_flow_item){ 6022 .type = RTE_FLOW_ITEM_TYPE_TCP, 6023 .spec = &attributes->l4, 6024 .mask = &attributes->l4_mask, 6025 }; 6026 break; 6027 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 6028 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 6029 break; 6030 default: 6031 DRV_LOG(ERR, "port %u invalid flow type%d", 6032 dev->data->port_id, fdir_filter->input.flow_type); 6033 rte_errno = ENOTSUP; 6034 return -rte_errno; 6035 } 6036 return 0; 6037 } 6038 6039 #define FLOW_FDIR_CMP(f1, f2, fld) \ 6040 memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld)) 6041 6042 /** 6043 * Compare two FDIR flows. If items and actions are identical, the two flows are 6044 * regarded as same. 6045 * 6046 * @param dev 6047 * Pointer to Ethernet device. 6048 * @param f1 6049 * FDIR flow to compare. 6050 * @param f2 6051 * FDIR flow to compare. 6052 * 6053 * @return 6054 * Zero on match, 1 otherwise. 6055 */ 6056 static int 6057 flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2) 6058 { 6059 if (FLOW_FDIR_CMP(f1, f2, attr) || 6060 FLOW_FDIR_CMP(f1, f2, l2) || 6061 FLOW_FDIR_CMP(f1, f2, l2_mask) || 6062 FLOW_FDIR_CMP(f1, f2, l3) || 6063 FLOW_FDIR_CMP(f1, f2, l3_mask) || 6064 FLOW_FDIR_CMP(f1, f2, l4) || 6065 FLOW_FDIR_CMP(f1, f2, l4_mask) || 6066 FLOW_FDIR_CMP(f1, f2, actions[0].type)) 6067 return 1; 6068 if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE && 6069 FLOW_FDIR_CMP(f1, f2, queue)) 6070 return 1; 6071 return 0; 6072 } 6073 6074 /** 6075 * Search device flow list to find out a matched FDIR flow. 6076 * 6077 * @param dev 6078 * Pointer to Ethernet device. 6079 * @param fdir_flow 6080 * FDIR flow to lookup. 6081 * 6082 * @return 6083 * Index of flow if found, 0 otherwise. 6084 */ 6085 static uint32_t 6086 flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow) 6087 { 6088 struct mlx5_priv *priv = dev->data->dev_private; 6089 uint32_t flow_idx = 0; 6090 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6091 6092 MLX5_ASSERT(fdir_flow); 6093 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 6094 if (!flow_fdir_cmp(priv_fdir_flow->fdir, fdir_flow)) { 6095 DRV_LOG(DEBUG, "port %u found FDIR flow %u", 6096 dev->data->port_id, flow_idx); 6097 flow_idx = priv_fdir_flow->rix_flow; 6098 break; 6099 } 6100 } 6101 return flow_idx; 6102 } 6103 6104 /** 6105 * Add new flow director filter and store it in list. 6106 * 6107 * @param dev 6108 * Pointer to Ethernet device. 6109 * @param fdir_filter 6110 * Flow director filter to add. 6111 * 6112 * @return 6113 * 0 on success, a negative errno value otherwise and rte_errno is set. 6114 */ 6115 static int 6116 flow_fdir_filter_add(struct rte_eth_dev *dev, 6117 const struct rte_eth_fdir_filter *fdir_filter) 6118 { 6119 struct mlx5_priv *priv = dev->data->dev_private; 6120 struct mlx5_fdir *fdir_flow; 6121 struct rte_flow *flow; 6122 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6123 uint32_t flow_idx; 6124 int ret; 6125 6126 fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*fdir_flow), 0, 6127 SOCKET_ID_ANY); 6128 if (!fdir_flow) { 6129 rte_errno = ENOMEM; 6130 return -rte_errno; 6131 } 6132 ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow); 6133 if (ret) 6134 goto error; 6135 flow_idx = flow_fdir_filter_lookup(dev, fdir_flow); 6136 if (flow_idx) { 6137 rte_errno = EEXIST; 6138 goto error; 6139 } 6140 priv_fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, 6141 sizeof(struct mlx5_fdir_flow), 6142 0, SOCKET_ID_ANY); 6143 if (!priv_fdir_flow) { 6144 rte_errno = ENOMEM; 6145 goto error; 6146 } 6147 flow_idx = flow_list_create(dev, &priv->flows, &fdir_flow->attr, 6148 fdir_flow->items, fdir_flow->actions, true, 6149 NULL); 6150 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 6151 if (!flow) 6152 goto error; 6153 flow->fdir = 1; 6154 priv_fdir_flow->fdir = fdir_flow; 6155 priv_fdir_flow->rix_flow = flow_idx; 6156 LIST_INSERT_HEAD(&priv->fdir_flows, priv_fdir_flow, next); 6157 DRV_LOG(DEBUG, "port %u created FDIR flow %p", 6158 dev->data->port_id, (void *)flow); 6159 return 0; 6160 error: 6161 mlx5_free(priv_fdir_flow); 6162 mlx5_free(fdir_flow); 6163 return -rte_errno; 6164 } 6165 6166 /** 6167 * Delete specific filter. 6168 * 6169 * @param dev 6170 * Pointer to Ethernet device. 6171 * @param fdir_filter 6172 * Filter to be deleted. 6173 * 6174 * @return 6175 * 0 on success, a negative errno value otherwise and rte_errno is set. 6176 */ 6177 static int 6178 flow_fdir_filter_delete(struct rte_eth_dev *dev, 6179 const struct rte_eth_fdir_filter *fdir_filter) 6180 { 6181 struct mlx5_priv *priv = dev->data->dev_private; 6182 uint32_t flow_idx; 6183 struct mlx5_fdir fdir_flow = { 6184 .attr.group = 0, 6185 }; 6186 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6187 int ret; 6188 6189 ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow); 6190 if (ret) 6191 return -rte_errno; 6192 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 6193 /* Find the fdir in priv list */ 6194 if (!flow_fdir_cmp(priv_fdir_flow->fdir, &fdir_flow)) 6195 break; 6196 } 6197 if (!priv_fdir_flow) 6198 return 0; 6199 LIST_REMOVE(priv_fdir_flow, next); 6200 flow_idx = priv_fdir_flow->rix_flow; 6201 flow_list_destroy(dev, &priv->flows, flow_idx); 6202 mlx5_free(priv_fdir_flow->fdir); 6203 mlx5_free(priv_fdir_flow); 6204 DRV_LOG(DEBUG, "port %u deleted FDIR flow %u", 6205 dev->data->port_id, flow_idx); 6206 return 0; 6207 } 6208 6209 /** 6210 * Update queue for specific filter. 6211 * 6212 * @param dev 6213 * Pointer to Ethernet device. 6214 * @param fdir_filter 6215 * Filter to be updated. 6216 * 6217 * @return 6218 * 0 on success, a negative errno value otherwise and rte_errno is set. 6219 */ 6220 static int 6221 flow_fdir_filter_update(struct rte_eth_dev *dev, 6222 const struct rte_eth_fdir_filter *fdir_filter) 6223 { 6224 int ret; 6225 6226 ret = flow_fdir_filter_delete(dev, fdir_filter); 6227 if (ret) 6228 return ret; 6229 return flow_fdir_filter_add(dev, fdir_filter); 6230 } 6231 6232 /** 6233 * Flush all filters. 6234 * 6235 * @param dev 6236 * Pointer to Ethernet device. 6237 */ 6238 static void 6239 flow_fdir_filter_flush(struct rte_eth_dev *dev) 6240 { 6241 struct mlx5_priv *priv = dev->data->dev_private; 6242 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 6243 6244 while (!LIST_EMPTY(&priv->fdir_flows)) { 6245 priv_fdir_flow = LIST_FIRST(&priv->fdir_flows); 6246 LIST_REMOVE(priv_fdir_flow, next); 6247 flow_list_destroy(dev, &priv->flows, priv_fdir_flow->rix_flow); 6248 mlx5_free(priv_fdir_flow->fdir); 6249 mlx5_free(priv_fdir_flow); 6250 } 6251 } 6252 6253 /** 6254 * Get flow director information. 6255 * 6256 * @param dev 6257 * Pointer to Ethernet device. 6258 * @param[out] fdir_info 6259 * Resulting flow director information. 6260 */ 6261 static void 6262 flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) 6263 { 6264 struct rte_eth_fdir_masks *mask = 6265 &dev->data->dev_conf.fdir_conf.mask; 6266 6267 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode; 6268 fdir_info->guarant_spc = 0; 6269 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask)); 6270 fdir_info->max_flexpayload = 0; 6271 fdir_info->flow_types_mask[0] = 0; 6272 fdir_info->flex_payload_unit = 0; 6273 fdir_info->max_flex_payload_segment_num = 0; 6274 fdir_info->flex_payload_limit = 0; 6275 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf)); 6276 } 6277 6278 /** 6279 * Deal with flow director operations. 6280 * 6281 * @param dev 6282 * Pointer to Ethernet device. 6283 * @param filter_op 6284 * Operation to perform. 6285 * @param arg 6286 * Pointer to operation-specific structure. 6287 * 6288 * @return 6289 * 0 on success, a negative errno value otherwise and rte_errno is set. 6290 */ 6291 static int 6292 flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, 6293 void *arg) 6294 { 6295 enum rte_fdir_mode fdir_mode = 6296 dev->data->dev_conf.fdir_conf.mode; 6297 6298 if (filter_op == RTE_ETH_FILTER_NOP) 6299 return 0; 6300 if (fdir_mode != RTE_FDIR_MODE_PERFECT && 6301 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) { 6302 DRV_LOG(ERR, "port %u flow director mode %d not supported", 6303 dev->data->port_id, fdir_mode); 6304 rte_errno = EINVAL; 6305 return -rte_errno; 6306 } 6307 switch (filter_op) { 6308 case RTE_ETH_FILTER_ADD: 6309 return flow_fdir_filter_add(dev, arg); 6310 case RTE_ETH_FILTER_UPDATE: 6311 return flow_fdir_filter_update(dev, arg); 6312 case RTE_ETH_FILTER_DELETE: 6313 return flow_fdir_filter_delete(dev, arg); 6314 case RTE_ETH_FILTER_FLUSH: 6315 flow_fdir_filter_flush(dev); 6316 break; 6317 case RTE_ETH_FILTER_INFO: 6318 flow_fdir_info_get(dev, arg); 6319 break; 6320 default: 6321 DRV_LOG(DEBUG, "port %u unknown operation %u", 6322 dev->data->port_id, filter_op); 6323 rte_errno = EINVAL; 6324 return -rte_errno; 6325 } 6326 return 0; 6327 } 6328 6329 /** 6330 * Manage filter operations. 6331 * 6332 * @param dev 6333 * Pointer to Ethernet device structure. 6334 * @param filter_type 6335 * Filter type. 6336 * @param filter_op 6337 * Operation to perform. 6338 * @param arg 6339 * Pointer to operation-specific structure. 6340 * 6341 * @return 6342 * 0 on success, a negative errno value otherwise and rte_errno is set. 6343 */ 6344 int 6345 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, 6346 enum rte_filter_type filter_type, 6347 enum rte_filter_op filter_op, 6348 void *arg) 6349 { 6350 switch (filter_type) { 6351 case RTE_ETH_FILTER_GENERIC: 6352 if (filter_op != RTE_ETH_FILTER_GET) { 6353 rte_errno = EINVAL; 6354 return -rte_errno; 6355 } 6356 *(const void **)arg = &mlx5_flow_ops; 6357 return 0; 6358 case RTE_ETH_FILTER_FDIR: 6359 return flow_fdir_ctrl_func(dev, filter_op, arg); 6360 default: 6361 DRV_LOG(ERR, "port %u filter type (%d) not supported", 6362 dev->data->port_id, filter_type); 6363 rte_errno = ENOTSUP; 6364 return -rte_errno; 6365 } 6366 return 0; 6367 } 6368 6369 /** 6370 * Create the needed meter and suffix tables. 6371 * 6372 * @param[in] dev 6373 * Pointer to Ethernet device. 6374 * @param[in] fm 6375 * Pointer to the flow meter. 6376 * 6377 * @return 6378 * Pointer to table set on success, NULL otherwise. 6379 */ 6380 struct mlx5_meter_domains_infos * 6381 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev, 6382 const struct mlx5_flow_meter *fm) 6383 { 6384 const struct mlx5_flow_driver_ops *fops; 6385 6386 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6387 return fops->create_mtr_tbls(dev, fm); 6388 } 6389 6390 /** 6391 * Destroy the meter table set. 6392 * 6393 * @param[in] dev 6394 * Pointer to Ethernet device. 6395 * @param[in] tbl 6396 * Pointer to the meter table set. 6397 * 6398 * @return 6399 * 0 on success. 6400 */ 6401 int 6402 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev, 6403 struct mlx5_meter_domains_infos *tbls) 6404 { 6405 const struct mlx5_flow_driver_ops *fops; 6406 6407 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6408 return fops->destroy_mtr_tbls(dev, tbls); 6409 } 6410 6411 /** 6412 * Create policer rules. 6413 * 6414 * @param[in] dev 6415 * Pointer to Ethernet device. 6416 * @param[in] fm 6417 * Pointer to flow meter structure. 6418 * @param[in] attr 6419 * Pointer to flow attributes. 6420 * 6421 * @return 6422 * 0 on success, -1 otherwise. 6423 */ 6424 int 6425 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev, 6426 struct mlx5_flow_meter *fm, 6427 const struct rte_flow_attr *attr) 6428 { 6429 const struct mlx5_flow_driver_ops *fops; 6430 6431 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6432 return fops->create_policer_rules(dev, fm, attr); 6433 } 6434 6435 /** 6436 * Destroy policer rules. 6437 * 6438 * @param[in] fm 6439 * Pointer to flow meter structure. 6440 * @param[in] attr 6441 * Pointer to flow attributes. 6442 * 6443 * @return 6444 * 0 on success, -1 otherwise. 6445 */ 6446 int 6447 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev, 6448 struct mlx5_flow_meter *fm, 6449 const struct rte_flow_attr *attr) 6450 { 6451 const struct mlx5_flow_driver_ops *fops; 6452 6453 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6454 return fops->destroy_policer_rules(dev, fm, attr); 6455 } 6456 6457 /** 6458 * Allocate a counter. 6459 * 6460 * @param[in] dev 6461 * Pointer to Ethernet device structure. 6462 * 6463 * @return 6464 * Index to allocated counter on success, 0 otherwise. 6465 */ 6466 uint32_t 6467 mlx5_counter_alloc(struct rte_eth_dev *dev) 6468 { 6469 const struct mlx5_flow_driver_ops *fops; 6470 struct rte_flow_attr attr = { .transfer = 0 }; 6471 6472 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6473 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6474 return fops->counter_alloc(dev); 6475 } 6476 DRV_LOG(ERR, 6477 "port %u counter allocate is not supported.", 6478 dev->data->port_id); 6479 return 0; 6480 } 6481 6482 /** 6483 * Free a counter. 6484 * 6485 * @param[in] dev 6486 * Pointer to Ethernet device structure. 6487 * @param[in] cnt 6488 * Index to counter to be free. 6489 */ 6490 void 6491 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt) 6492 { 6493 const struct mlx5_flow_driver_ops *fops; 6494 struct rte_flow_attr attr = { .transfer = 0 }; 6495 6496 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6497 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6498 fops->counter_free(dev, cnt); 6499 return; 6500 } 6501 DRV_LOG(ERR, 6502 "port %u counter free is not supported.", 6503 dev->data->port_id); 6504 } 6505 6506 /** 6507 * Query counter statistics. 6508 * 6509 * @param[in] dev 6510 * Pointer to Ethernet device structure. 6511 * @param[in] cnt 6512 * Index to counter to query. 6513 * @param[in] clear 6514 * Set to clear counter statistics. 6515 * @param[out] pkts 6516 * The counter hits packets number to save. 6517 * @param[out] bytes 6518 * The counter hits bytes number to save. 6519 * 6520 * @return 6521 * 0 on success, a negative errno value otherwise. 6522 */ 6523 int 6524 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt, 6525 bool clear, uint64_t *pkts, uint64_t *bytes) 6526 { 6527 const struct mlx5_flow_driver_ops *fops; 6528 struct rte_flow_attr attr = { .transfer = 0 }; 6529 6530 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6531 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6532 return fops->counter_query(dev, cnt, clear, pkts, bytes); 6533 } 6534 DRV_LOG(ERR, 6535 "port %u counter query is not supported.", 6536 dev->data->port_id); 6537 return -ENOTSUP; 6538 } 6539 6540 #define MLX5_POOL_QUERY_FREQ_US 1000000 6541 6542 /** 6543 * Get number of all validate pools. 6544 * 6545 * @param[in] sh 6546 * Pointer to mlx5_dev_ctx_shared object. 6547 * 6548 * @return 6549 * The number of all validate pools. 6550 */ 6551 static uint32_t 6552 mlx5_get_all_valid_pool_count(struct mlx5_dev_ctx_shared *sh) 6553 { 6554 int i; 6555 uint32_t pools_n = 0; 6556 6557 for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) 6558 pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid); 6559 return pools_n; 6560 } 6561 6562 /** 6563 * Set the periodic procedure for triggering asynchronous batch queries for all 6564 * the counter pools. 6565 * 6566 * @param[in] sh 6567 * Pointer to mlx5_dev_ctx_shared object. 6568 */ 6569 void 6570 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh) 6571 { 6572 uint32_t pools_n, us; 6573 6574 pools_n = mlx5_get_all_valid_pool_count(sh); 6575 us = MLX5_POOL_QUERY_FREQ_US / pools_n; 6576 DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us); 6577 if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) { 6578 sh->cmng.query_thread_on = 0; 6579 DRV_LOG(ERR, "Cannot reinitialize query alarm"); 6580 } else { 6581 sh->cmng.query_thread_on = 1; 6582 } 6583 } 6584 6585 /** 6586 * The periodic procedure for triggering asynchronous batch queries for all the 6587 * counter pools. This function is probably called by the host thread. 6588 * 6589 * @param[in] arg 6590 * The parameter for the alarm process. 6591 */ 6592 void 6593 mlx5_flow_query_alarm(void *arg) 6594 { 6595 struct mlx5_dev_ctx_shared *sh = arg; 6596 struct mlx5_devx_obj *dcs; 6597 uint16_t offset; 6598 int ret; 6599 uint8_t batch = sh->cmng.batch; 6600 uint8_t age = sh->cmng.age; 6601 uint16_t pool_index = sh->cmng.pool_index; 6602 struct mlx5_pools_container *cont; 6603 struct mlx5_flow_counter_pool *pool; 6604 int cont_loop = MLX5_CCONT_TYPE_MAX; 6605 6606 if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES) 6607 goto set_alarm; 6608 next_container: 6609 cont = MLX5_CNT_CONTAINER(sh, batch, age); 6610 rte_spinlock_lock(&cont->resize_sl); 6611 if (!cont->pools) { 6612 rte_spinlock_unlock(&cont->resize_sl); 6613 /* Check if all the containers are empty. */ 6614 if (unlikely(--cont_loop == 0)) 6615 goto set_alarm; 6616 batch ^= 0x1; 6617 pool_index = 0; 6618 if (batch == 0 && pool_index == 0) { 6619 age ^= 0x1; 6620 sh->cmng.batch = batch; 6621 sh->cmng.age = age; 6622 } 6623 goto next_container; 6624 } 6625 pool = cont->pools[pool_index]; 6626 rte_spinlock_unlock(&cont->resize_sl); 6627 if (pool->raw_hw) 6628 /* There is a pool query in progress. */ 6629 goto set_alarm; 6630 pool->raw_hw = 6631 LIST_FIRST(&sh->cmng.free_stat_raws); 6632 if (!pool->raw_hw) 6633 /* No free counter statistics raw memory. */ 6634 goto set_alarm; 6635 dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read 6636 (&pool->a64_dcs); 6637 if (dcs->id & (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1)) { 6638 /* Pool without valid counter. */ 6639 pool->raw_hw = NULL; 6640 goto next_pool; 6641 } 6642 offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL; 6643 /* 6644 * Identify the counters released between query trigger and query 6645 * handle more effiecntly. The counter released in this gap period 6646 * should wait for a new round of query as the new arrived packets 6647 * will not be taken into account. 6648 */ 6649 pool->query_gen++; 6650 ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL - 6651 offset, NULL, NULL, 6652 pool->raw_hw->mem_mng->dm->id, 6653 (void *)(uintptr_t) 6654 (pool->raw_hw->data + offset), 6655 sh->devx_comp, 6656 (uint64_t)(uintptr_t)pool); 6657 if (ret) { 6658 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID" 6659 " %d", pool->min_dcs->id); 6660 pool->raw_hw = NULL; 6661 goto set_alarm; 6662 } 6663 pool->raw_hw->min_dcs_id = dcs->id; 6664 LIST_REMOVE(pool->raw_hw, next); 6665 sh->cmng.pending_queries++; 6666 next_pool: 6667 pool_index++; 6668 if (pool_index >= rte_atomic16_read(&cont->n_valid)) { 6669 batch ^= 0x1; 6670 pool_index = 0; 6671 if (batch == 0 && pool_index == 0) 6672 age ^= 0x1; 6673 } 6674 set_alarm: 6675 sh->cmng.batch = batch; 6676 sh->cmng.pool_index = pool_index; 6677 sh->cmng.age = age; 6678 mlx5_set_query_alarm(sh); 6679 } 6680 6681 /** 6682 * Check and callback event for new aged flow in the counter pool 6683 * 6684 * @param[in] sh 6685 * Pointer to mlx5_dev_ctx_shared object. 6686 * @param[in] pool 6687 * Pointer to Current counter pool. 6688 */ 6689 static void 6690 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh, 6691 struct mlx5_flow_counter_pool *pool) 6692 { 6693 struct mlx5_priv *priv; 6694 struct mlx5_flow_counter *cnt; 6695 struct mlx5_age_info *age_info; 6696 struct mlx5_age_param *age_param; 6697 struct mlx5_counter_stats_raw *cur = pool->raw_hw; 6698 struct mlx5_counter_stats_raw *prev = pool->raw; 6699 uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10); 6700 uint32_t i; 6701 6702 for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) { 6703 cnt = MLX5_POOL_GET_CNT(pool, i); 6704 age_param = MLX5_CNT_TO_AGE(cnt); 6705 if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE) 6706 continue; 6707 if (cur->data[i].hits != prev->data[i].hits) { 6708 age_param->expire = curr + age_param->timeout; 6709 continue; 6710 } 6711 if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2)) 6712 continue; 6713 /** 6714 * Hold the lock first, or if between the 6715 * state AGE_TMOUT and tailq operation the 6716 * release happened, the release procedure 6717 * may delete a non-existent tailq node. 6718 */ 6719 priv = rte_eth_devices[age_param->port_id].data->dev_private; 6720 age_info = GET_PORT_AGE_INFO(priv); 6721 rte_spinlock_lock(&age_info->aged_sl); 6722 /* If the cpmset fails, release happens. */ 6723 if (rte_atomic16_cmpset((volatile uint16_t *) 6724 &age_param->state, 6725 AGE_CANDIDATE, 6726 AGE_TMOUT) == 6727 AGE_CANDIDATE) { 6728 TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next); 6729 MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW); 6730 } 6731 rte_spinlock_unlock(&age_info->aged_sl); 6732 } 6733 for (i = 0; i < sh->max_port; i++) { 6734 age_info = &sh->port[i].age_info; 6735 if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW)) 6736 continue; 6737 if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) 6738 rte_eth_dev_callback_process 6739 (&rte_eth_devices[sh->port[i].devx_ih_port_id], 6740 RTE_ETH_EVENT_FLOW_AGED, NULL); 6741 age_info->flags = 0; 6742 } 6743 } 6744 6745 /** 6746 * Handler for the HW respond about ready values from an asynchronous batch 6747 * query. This function is probably called by the host thread. 6748 * 6749 * @param[in] sh 6750 * The pointer to the shared device context. 6751 * @param[in] async_id 6752 * The Devx async ID. 6753 * @param[in] status 6754 * The status of the completion. 6755 */ 6756 void 6757 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh, 6758 uint64_t async_id, int status) 6759 { 6760 struct mlx5_flow_counter_pool *pool = 6761 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id; 6762 struct mlx5_counter_stats_raw *raw_to_free; 6763 uint8_t age = !!IS_AGE_POOL(pool); 6764 uint8_t query_gen = pool->query_gen ^ 1; 6765 struct mlx5_pools_container *cont = 6766 MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool), age); 6767 6768 if (unlikely(status)) { 6769 raw_to_free = pool->raw_hw; 6770 } else { 6771 raw_to_free = pool->raw; 6772 if (IS_AGE_POOL(pool)) 6773 mlx5_flow_aging_check(sh, pool); 6774 rte_spinlock_lock(&pool->sl); 6775 pool->raw = pool->raw_hw; 6776 rte_spinlock_unlock(&pool->sl); 6777 /* Be sure the new raw counters data is updated in memory. */ 6778 rte_io_wmb(); 6779 if (!TAILQ_EMPTY(&pool->counters[query_gen])) { 6780 rte_spinlock_lock(&cont->csl); 6781 TAILQ_CONCAT(&cont->counters, 6782 &pool->counters[query_gen], next); 6783 rte_spinlock_unlock(&cont->csl); 6784 } 6785 } 6786 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next); 6787 pool->raw_hw = NULL; 6788 sh->cmng.pending_queries--; 6789 } 6790 6791 /** 6792 * Translate the rte_flow group index to HW table value. 6793 * 6794 * @param[in] attributes 6795 * Pointer to flow attributes 6796 * @param[in] external 6797 * Value is part of flow rule created by request external to PMD. 6798 * @param[in] group 6799 * rte_flow group index value. 6800 * @param[out] fdb_def_rule 6801 * Whether fdb jump to table 1 is configured. 6802 * @param[out] table 6803 * HW table value. 6804 * @param[out] error 6805 * Pointer to error structure. 6806 * 6807 * @return 6808 * 0 on success, a negative errno value otherwise and rte_errno is set. 6809 */ 6810 int 6811 mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external, 6812 uint32_t group, bool fdb_def_rule, uint32_t *table, 6813 struct rte_flow_error *error) 6814 { 6815 if (attributes->transfer && external && fdb_def_rule) { 6816 if (group == UINT32_MAX) 6817 return rte_flow_error_set 6818 (error, EINVAL, 6819 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 6820 NULL, 6821 "group index not supported"); 6822 *table = group + 1; 6823 } else { 6824 *table = group; 6825 } 6826 return 0; 6827 } 6828 6829 /** 6830 * Discover availability of metadata reg_c's. 6831 * 6832 * Iteratively use test flows to check availability. 6833 * 6834 * @param[in] dev 6835 * Pointer to the Ethernet device structure. 6836 * 6837 * @return 6838 * 0 on success, a negative errno value otherwise and rte_errno is set. 6839 */ 6840 int 6841 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev) 6842 { 6843 struct mlx5_priv *priv = dev->data->dev_private; 6844 struct mlx5_dev_config *config = &priv->config; 6845 enum modify_reg idx; 6846 int n = 0; 6847 6848 /* reg_c[0] and reg_c[1] are reserved. */ 6849 config->flow_mreg_c[n++] = REG_C_0; 6850 config->flow_mreg_c[n++] = REG_C_1; 6851 /* Discover availability of other reg_c's. */ 6852 for (idx = REG_C_2; idx <= REG_C_7; ++idx) { 6853 struct rte_flow_attr attr = { 6854 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 6855 .priority = MLX5_FLOW_PRIO_RSVD, 6856 .ingress = 1, 6857 }; 6858 struct rte_flow_item items[] = { 6859 [0] = { 6860 .type = RTE_FLOW_ITEM_TYPE_END, 6861 }, 6862 }; 6863 struct rte_flow_action actions[] = { 6864 [0] = { 6865 .type = (enum rte_flow_action_type) 6866 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 6867 .conf = &(struct mlx5_flow_action_copy_mreg){ 6868 .src = REG_C_1, 6869 .dst = idx, 6870 }, 6871 }, 6872 [1] = { 6873 .type = RTE_FLOW_ACTION_TYPE_JUMP, 6874 .conf = &(struct rte_flow_action_jump){ 6875 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 6876 }, 6877 }, 6878 [2] = { 6879 .type = RTE_FLOW_ACTION_TYPE_END, 6880 }, 6881 }; 6882 uint32_t flow_idx; 6883 struct rte_flow *flow; 6884 struct rte_flow_error error; 6885 6886 if (!config->dv_flow_en) 6887 break; 6888 /* Create internal flow, validation skips copy action. */ 6889 flow_idx = flow_list_create(dev, NULL, &attr, items, 6890 actions, false, &error); 6891 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 6892 flow_idx); 6893 if (!flow) 6894 continue; 6895 if (dev->data->dev_started || !flow_drv_apply(dev, flow, NULL)) 6896 config->flow_mreg_c[n++] = idx; 6897 flow_list_destroy(dev, NULL, flow_idx); 6898 } 6899 for (; n < MLX5_MREG_C_NUM; ++n) 6900 config->flow_mreg_c[n] = REG_NON; 6901 return 0; 6902 } 6903 6904 /** 6905 * Dump flow raw hw data to file 6906 * 6907 * @param[in] dev 6908 * The pointer to Ethernet device. 6909 * @param[in] file 6910 * A pointer to a file for output. 6911 * @param[out] error 6912 * Perform verbose error reporting if not NULL. PMDs initialize this 6913 * structure in case of error only. 6914 * @return 6915 * 0 on success, a nagative value otherwise. 6916 */ 6917 int 6918 mlx5_flow_dev_dump(struct rte_eth_dev *dev, 6919 FILE *file, 6920 struct rte_flow_error *error __rte_unused) 6921 { 6922 struct mlx5_priv *priv = dev->data->dev_private; 6923 struct mlx5_dev_ctx_shared *sh = priv->sh; 6924 6925 if (!priv->config.dv_flow_en) { 6926 if (fputs("device dv flow disabled\n", file) <= 0) 6927 return -errno; 6928 return -ENOTSUP; 6929 } 6930 return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain, 6931 sh->tx_domain, file); 6932 } 6933 6934 /** 6935 * Get aged-out flows. 6936 * 6937 * @param[in] dev 6938 * Pointer to the Ethernet device structure. 6939 * @param[in] context 6940 * The address of an array of pointers to the aged-out flows contexts. 6941 * @param[in] nb_countexts 6942 * The length of context array pointers. 6943 * @param[out] error 6944 * Perform verbose error reporting if not NULL. Initialized in case of 6945 * error only. 6946 * 6947 * @return 6948 * how many contexts get in success, otherwise negative errno value. 6949 * if nb_contexts is 0, return the amount of all aged contexts. 6950 * if nb_contexts is not 0 , return the amount of aged flows reported 6951 * in the context array. 6952 */ 6953 int 6954 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts, 6955 uint32_t nb_contexts, struct rte_flow_error *error) 6956 { 6957 const struct mlx5_flow_driver_ops *fops; 6958 struct rte_flow_attr attr = { .transfer = 0 }; 6959 6960 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6961 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6962 return fops->get_aged_flows(dev, contexts, nb_contexts, 6963 error); 6964 } 6965 DRV_LOG(ERR, 6966 "port %u get aged flows is not supported.", 6967 dev->data->port_id); 6968 return -ENOTSUP; 6969 } 6970