1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2016 Mellanox Technologies, Ltd 4 */ 5 6 #include <sys/queue.h> 7 #include <stdint.h> 8 #include <string.h> 9 10 /* Verbs header. */ 11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 12 #ifdef PEDANTIC 13 #pragma GCC diagnostic ignored "-Wpedantic" 14 #endif 15 #include <infiniband/verbs.h> 16 #ifdef PEDANTIC 17 #pragma GCC diagnostic error "-Wpedantic" 18 #endif 19 20 #include <rte_common.h> 21 #include <rte_ether.h> 22 #include <rte_eth_ctrl.h> 23 #include <rte_ethdev_driver.h> 24 #include <rte_flow.h> 25 #include <rte_flow_driver.h> 26 #include <rte_malloc.h> 27 #include <rte_ip.h> 28 29 #include "mlx5.h" 30 #include "mlx5_defs.h" 31 #include "mlx5_prm.h" 32 #include "mlx5_glue.h" 33 34 /* Dev ops structure defined in mlx5.c */ 35 extern const struct eth_dev_ops mlx5_dev_ops; 36 extern const struct eth_dev_ops mlx5_dev_ops_isolate; 37 38 /* Pattern outer Layer bits. */ 39 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0) 40 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1) 41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2) 42 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3) 43 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4) 44 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5) 45 46 /* Pattern inner Layer bits. */ 47 #define MLX5_FLOW_LAYER_INNER_L2 (1u << 6) 48 #define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7) 49 #define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8) 50 #define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9) 51 #define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10) 52 #define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11) 53 54 /* Pattern tunnel Layer bits. */ 55 #define MLX5_FLOW_LAYER_VXLAN (1u << 12) 56 #define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13) 57 #define MLX5_FLOW_LAYER_GRE (1u << 14) 58 #define MLX5_FLOW_LAYER_MPLS (1u << 15) 59 60 /* Outer Masks. */ 61 #define MLX5_FLOW_LAYER_OUTER_L3 \ 62 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6) 63 #define MLX5_FLOW_LAYER_OUTER_L4 \ 64 (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP) 65 #define MLX5_FLOW_LAYER_OUTER \ 66 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \ 67 MLX5_FLOW_LAYER_OUTER_L4) 68 69 /* Tunnel Masks. */ 70 #define MLX5_FLOW_LAYER_TUNNEL \ 71 (MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE | \ 72 MLX5_FLOW_LAYER_GRE | MLX5_FLOW_LAYER_MPLS) 73 74 /* Inner Masks. */ 75 #define MLX5_FLOW_LAYER_INNER_L3 \ 76 (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6) 77 #define MLX5_FLOW_LAYER_INNER_L4 \ 78 (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP) 79 #define MLX5_FLOW_LAYER_INNER \ 80 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \ 81 MLX5_FLOW_LAYER_INNER_L4) 82 83 /* Actions that modify the fate of matching traffic. */ 84 #define MLX5_FLOW_FATE_DROP (1u << 0) 85 #define MLX5_FLOW_FATE_QUEUE (1u << 1) 86 #define MLX5_FLOW_FATE_RSS (1u << 2) 87 88 /* Modify a packet. */ 89 #define MLX5_FLOW_MOD_FLAG (1u << 0) 90 #define MLX5_FLOW_MOD_MARK (1u << 1) 91 #define MLX5_FLOW_MOD_COUNT (1u << 2) 92 93 /* possible L3 layers protocols filtering. */ 94 #define MLX5_IP_PROTOCOL_TCP 6 95 #define MLX5_IP_PROTOCOL_UDP 17 96 #define MLX5_IP_PROTOCOL_GRE 47 97 #define MLX5_IP_PROTOCOL_MPLS 147 98 99 /* Priority reserved for default flows. */ 100 #define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1) 101 102 enum mlx5_expansion { 103 MLX5_EXPANSION_ROOT, 104 MLX5_EXPANSION_ROOT_OUTER, 105 MLX5_EXPANSION_OUTER_ETH, 106 MLX5_EXPANSION_OUTER_IPV4, 107 MLX5_EXPANSION_OUTER_IPV4_UDP, 108 MLX5_EXPANSION_OUTER_IPV4_TCP, 109 MLX5_EXPANSION_OUTER_IPV6, 110 MLX5_EXPANSION_OUTER_IPV6_UDP, 111 MLX5_EXPANSION_OUTER_IPV6_TCP, 112 MLX5_EXPANSION_VXLAN, 113 MLX5_EXPANSION_VXLAN_GPE, 114 MLX5_EXPANSION_GRE, 115 MLX5_EXPANSION_MPLS, 116 MLX5_EXPANSION_ETH, 117 MLX5_EXPANSION_IPV4, 118 MLX5_EXPANSION_IPV4_UDP, 119 MLX5_EXPANSION_IPV4_TCP, 120 MLX5_EXPANSION_IPV6, 121 MLX5_EXPANSION_IPV6_UDP, 122 MLX5_EXPANSION_IPV6_TCP, 123 }; 124 125 /** Supported expansion of items. */ 126 static const struct rte_flow_expand_node mlx5_support_expansion[] = { 127 [MLX5_EXPANSION_ROOT] = { 128 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 129 MLX5_EXPANSION_IPV4, 130 MLX5_EXPANSION_IPV6), 131 .type = RTE_FLOW_ITEM_TYPE_END, 132 }, 133 [MLX5_EXPANSION_ROOT_OUTER] = { 134 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH, 135 MLX5_EXPANSION_OUTER_IPV4, 136 MLX5_EXPANSION_OUTER_IPV6), 137 .type = RTE_FLOW_ITEM_TYPE_END, 138 }, 139 [MLX5_EXPANSION_OUTER_ETH] = { 140 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 141 MLX5_EXPANSION_OUTER_IPV6, 142 MLX5_EXPANSION_MPLS), 143 .type = RTE_FLOW_ITEM_TYPE_ETH, 144 .rss_types = 0, 145 }, 146 [MLX5_EXPANSION_OUTER_IPV4] = { 147 .next = RTE_FLOW_EXPAND_RSS_NEXT 148 (MLX5_EXPANSION_OUTER_IPV4_UDP, 149 MLX5_EXPANSION_OUTER_IPV4_TCP, 150 MLX5_EXPANSION_GRE), 151 .type = RTE_FLOW_ITEM_TYPE_IPV4, 152 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 153 ETH_RSS_NONFRAG_IPV4_OTHER, 154 }, 155 [MLX5_EXPANSION_OUTER_IPV4_UDP] = { 156 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 157 MLX5_EXPANSION_VXLAN_GPE), 158 .type = RTE_FLOW_ITEM_TYPE_UDP, 159 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 160 }, 161 [MLX5_EXPANSION_OUTER_IPV4_TCP] = { 162 .type = RTE_FLOW_ITEM_TYPE_TCP, 163 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 164 }, 165 [MLX5_EXPANSION_OUTER_IPV6] = { 166 .next = RTE_FLOW_EXPAND_RSS_NEXT 167 (MLX5_EXPANSION_OUTER_IPV6_UDP, 168 MLX5_EXPANSION_OUTER_IPV6_TCP), 169 .type = RTE_FLOW_ITEM_TYPE_IPV6, 170 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 171 ETH_RSS_NONFRAG_IPV6_OTHER, 172 }, 173 [MLX5_EXPANSION_OUTER_IPV6_UDP] = { 174 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 175 MLX5_EXPANSION_VXLAN_GPE), 176 .type = RTE_FLOW_ITEM_TYPE_UDP, 177 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 178 }, 179 [MLX5_EXPANSION_OUTER_IPV6_TCP] = { 180 .type = RTE_FLOW_ITEM_TYPE_TCP, 181 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 182 }, 183 [MLX5_EXPANSION_VXLAN] = { 184 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH), 185 .type = RTE_FLOW_ITEM_TYPE_VXLAN, 186 }, 187 [MLX5_EXPANSION_VXLAN_GPE] = { 188 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 189 MLX5_EXPANSION_IPV4, 190 MLX5_EXPANSION_IPV6), 191 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE, 192 }, 193 [MLX5_EXPANSION_GRE] = { 194 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4), 195 .type = RTE_FLOW_ITEM_TYPE_GRE, 196 }, 197 [MLX5_EXPANSION_MPLS] = { 198 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 199 MLX5_EXPANSION_IPV6), 200 .type = RTE_FLOW_ITEM_TYPE_MPLS, 201 }, 202 [MLX5_EXPANSION_ETH] = { 203 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 204 MLX5_EXPANSION_IPV6), 205 .type = RTE_FLOW_ITEM_TYPE_ETH, 206 }, 207 [MLX5_EXPANSION_IPV4] = { 208 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP, 209 MLX5_EXPANSION_IPV4_TCP), 210 .type = RTE_FLOW_ITEM_TYPE_IPV4, 211 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 212 ETH_RSS_NONFRAG_IPV4_OTHER, 213 }, 214 [MLX5_EXPANSION_IPV4_UDP] = { 215 .type = RTE_FLOW_ITEM_TYPE_UDP, 216 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 217 }, 218 [MLX5_EXPANSION_IPV4_TCP] = { 219 .type = RTE_FLOW_ITEM_TYPE_TCP, 220 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 221 }, 222 [MLX5_EXPANSION_IPV6] = { 223 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP, 224 MLX5_EXPANSION_IPV6_TCP), 225 .type = RTE_FLOW_ITEM_TYPE_IPV6, 226 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 227 ETH_RSS_NONFRAG_IPV6_OTHER, 228 }, 229 [MLX5_EXPANSION_IPV6_UDP] = { 230 .type = RTE_FLOW_ITEM_TYPE_UDP, 231 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 232 }, 233 [MLX5_EXPANSION_IPV6_TCP] = { 234 .type = RTE_FLOW_ITEM_TYPE_TCP, 235 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 236 }, 237 }; 238 239 /** Handles information leading to a drop fate. */ 240 struct mlx5_flow_verbs { 241 LIST_ENTRY(mlx5_flow_verbs) next; 242 unsigned int size; /**< Size of the attribute. */ 243 struct { 244 struct ibv_flow_attr *attr; 245 /**< Pointer to the Specification buffer. */ 246 uint8_t *specs; /**< Pointer to the specifications. */ 247 }; 248 struct ibv_flow *flow; /**< Verbs flow pointer. */ 249 struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */ 250 uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */ 251 }; 252 253 /* Counters information. */ 254 struct mlx5_flow_counter { 255 LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */ 256 uint32_t shared:1; /**< Share counter ID with other flow rules. */ 257 uint32_t ref_cnt:31; /**< Reference counter. */ 258 uint32_t id; /**< Counter ID. */ 259 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */ 260 uint64_t hits; /**< Number of packets matched by the rule. */ 261 uint64_t bytes; /**< Number of bytes matched by the rule. */ 262 }; 263 264 /* Flow structure. */ 265 struct rte_flow { 266 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */ 267 struct rte_flow_attr attributes; /**< User flow attribute. */ 268 uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */ 269 uint32_t layers; 270 /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */ 271 uint32_t modifier; 272 /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */ 273 uint32_t fate; 274 /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */ 275 uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */ 276 LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */ 277 struct mlx5_flow_verbs *cur_verbs; 278 /**< Current Verbs flow structure being filled. */ 279 struct mlx5_flow_counter *counter; /**< Holds Verbs flow counter. */ 280 struct rte_flow_action_rss rss;/**< RSS context. */ 281 uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */ 282 uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */ 283 }; 284 285 static const struct rte_flow_ops mlx5_flow_ops = { 286 .validate = mlx5_flow_validate, 287 .create = mlx5_flow_create, 288 .destroy = mlx5_flow_destroy, 289 .flush = mlx5_flow_flush, 290 .isolate = mlx5_flow_isolate, 291 .query = mlx5_flow_query, 292 }; 293 294 /* Convert FDIR request to Generic flow. */ 295 struct mlx5_fdir { 296 struct rte_flow_attr attr; 297 struct rte_flow_action actions[2]; 298 struct rte_flow_item items[4]; 299 struct rte_flow_item_eth l2; 300 struct rte_flow_item_eth l2_mask; 301 union { 302 struct rte_flow_item_ipv4 ipv4; 303 struct rte_flow_item_ipv6 ipv6; 304 } l3; 305 union { 306 struct rte_flow_item_ipv4 ipv4; 307 struct rte_flow_item_ipv6 ipv6; 308 } l3_mask; 309 union { 310 struct rte_flow_item_udp udp; 311 struct rte_flow_item_tcp tcp; 312 } l4; 313 union { 314 struct rte_flow_item_udp udp; 315 struct rte_flow_item_tcp tcp; 316 } l4_mask; 317 struct rte_flow_action_queue queue; 318 }; 319 320 /* Verbs specification header. */ 321 struct ibv_spec_header { 322 enum ibv_flow_spec_type type; 323 uint16_t size; 324 }; 325 326 /* 327 * Number of sub priorities. 328 * For each kind of pattern matching i.e. L2, L3, L4 to have a correct 329 * matching on the NIC (firmware dependent) L4 most have the higher priority 330 * followed by L3 and ending with L2. 331 */ 332 #define MLX5_PRIORITY_MAP_L2 2 333 #define MLX5_PRIORITY_MAP_L3 1 334 #define MLX5_PRIORITY_MAP_L4 0 335 #define MLX5_PRIORITY_MAP_MAX 3 336 337 /* Map of Verbs to Flow priority with 8 Verbs priorities. */ 338 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = { 339 { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 }, 340 }; 341 342 /* Map of Verbs to Flow priority with 16 Verbs priorities. */ 343 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = { 344 { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 }, 345 { 9, 10, 11 }, { 12, 13, 14 }, 346 }; 347 348 /* Tunnel information. */ 349 struct mlx5_flow_tunnel_info { 350 uint32_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */ 351 uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */ 352 }; 353 354 static struct mlx5_flow_tunnel_info tunnels_info[] = { 355 { 356 .tunnel = MLX5_FLOW_LAYER_VXLAN, 357 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP, 358 }, 359 { 360 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE, 361 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP, 362 }, 363 { 364 .tunnel = MLX5_FLOW_LAYER_GRE, 365 .ptype = RTE_PTYPE_TUNNEL_GRE, 366 }, 367 { 368 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP, 369 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP, 370 }, 371 { 372 .tunnel = MLX5_FLOW_LAYER_MPLS, 373 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE, 374 }, 375 }; 376 377 /** 378 * Discover the maximum number of priority available. 379 * 380 * @param[in] dev 381 * Pointer to Ethernet device. 382 * 383 * @return 384 * number of supported flow priority on success, a negative errno 385 * value otherwise and rte_errno is set. 386 */ 387 int 388 mlx5_flow_discover_priorities(struct rte_eth_dev *dev) 389 { 390 struct { 391 struct ibv_flow_attr attr; 392 struct ibv_flow_spec_eth eth; 393 struct ibv_flow_spec_action_drop drop; 394 } flow_attr = { 395 .attr = { 396 .num_of_specs = 2, 397 }, 398 .eth = { 399 .type = IBV_FLOW_SPEC_ETH, 400 .size = sizeof(struct ibv_flow_spec_eth), 401 }, 402 .drop = { 403 .size = sizeof(struct ibv_flow_spec_action_drop), 404 .type = IBV_FLOW_SPEC_ACTION_DROP, 405 }, 406 }; 407 struct ibv_flow *flow; 408 struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev); 409 uint16_t vprio[] = { 8, 16 }; 410 int i; 411 int priority = 0; 412 413 if (!drop) { 414 rte_errno = ENOTSUP; 415 return -rte_errno; 416 } 417 for (i = 0; i != RTE_DIM(vprio); i++) { 418 flow_attr.attr.priority = vprio[i] - 1; 419 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr); 420 if (!flow) 421 break; 422 claim_zero(mlx5_glue->destroy_flow(flow)); 423 priority = vprio[i]; 424 } 425 switch (priority) { 426 case 8: 427 priority = RTE_DIM(priority_map_3); 428 break; 429 case 16: 430 priority = RTE_DIM(priority_map_5); 431 break; 432 default: 433 rte_errno = ENOTSUP; 434 DRV_LOG(ERR, 435 "port %u verbs maximum priority: %d expected 8/16", 436 dev->data->port_id, vprio[i]); 437 return -rte_errno; 438 } 439 mlx5_hrxq_drop_release(dev); 440 DRV_LOG(INFO, "port %u flow maximum priority: %d", 441 dev->data->port_id, priority); 442 return priority; 443 } 444 445 /** 446 * Adjust flow priority. 447 * 448 * @param dev 449 * Pointer to Ethernet device. 450 * @param flow 451 * Pointer to an rte flow. 452 */ 453 static void 454 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow) 455 { 456 struct priv *priv = dev->data->dev_private; 457 uint32_t priority = flow->attributes.priority; 458 uint32_t subpriority = flow->cur_verbs->attr->priority; 459 460 switch (priv->config.flow_prio) { 461 case RTE_DIM(priority_map_3): 462 priority = priority_map_3[priority][subpriority]; 463 break; 464 case RTE_DIM(priority_map_5): 465 priority = priority_map_5[priority][subpriority]; 466 break; 467 } 468 flow->cur_verbs->attr->priority = priority; 469 } 470 471 /** 472 * Get a flow counter. 473 * 474 * @param[in] dev 475 * Pointer to Ethernet device. 476 * @param[in] shared 477 * Indicate if this counter is shared with other flows. 478 * @param[in] id 479 * Counter identifier. 480 * 481 * @return 482 * A pointer to the counter, NULL otherwise and rte_errno is set. 483 */ 484 static struct mlx5_flow_counter * 485 mlx5_flow_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id) 486 { 487 struct priv *priv = dev->data->dev_private; 488 struct mlx5_flow_counter *cnt; 489 490 LIST_FOREACH(cnt, &priv->flow_counters, next) { 491 if (cnt->shared != shared) 492 continue; 493 if (cnt->id != id) 494 continue; 495 cnt->ref_cnt++; 496 return cnt; 497 } 498 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 499 500 struct mlx5_flow_counter tmpl = { 501 .shared = shared, 502 .id = id, 503 .cs = mlx5_glue->create_counter_set 504 (priv->ctx, 505 &(struct ibv_counter_set_init_attr){ 506 .counter_set_id = id, 507 }), 508 .hits = 0, 509 .bytes = 0, 510 }; 511 512 if (!tmpl.cs) { 513 rte_errno = errno; 514 return NULL; 515 } 516 cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0); 517 if (!cnt) { 518 rte_errno = ENOMEM; 519 return NULL; 520 } 521 *cnt = tmpl; 522 LIST_INSERT_HEAD(&priv->flow_counters, cnt, next); 523 return cnt; 524 #endif 525 rte_errno = ENOTSUP; 526 return NULL; 527 } 528 529 /** 530 * Release a flow counter. 531 * 532 * @param[in] counter 533 * Pointer to the counter handler. 534 */ 535 static void 536 mlx5_flow_counter_release(struct mlx5_flow_counter *counter) 537 { 538 if (--counter->ref_cnt == 0) { 539 claim_zero(mlx5_glue->destroy_counter_set(counter->cs)); 540 LIST_REMOVE(counter, next); 541 rte_free(counter); 542 } 543 } 544 545 /** 546 * Verify the @p attributes will be correctly understood by the NIC and store 547 * them in the @p flow if everything is correct. 548 * 549 * @param[in] dev 550 * Pointer to Ethernet device. 551 * @param[in] attributes 552 * Pointer to flow attributes 553 * @param[in, out] flow 554 * Pointer to the rte_flow structure. 555 * @param[out] error 556 * Pointer to error structure. 557 * 558 * @return 559 * 0 on success, a negative errno value otherwise and rte_errno is set. 560 */ 561 static int 562 mlx5_flow_attributes(struct rte_eth_dev *dev, 563 const struct rte_flow_attr *attributes, 564 struct rte_flow *flow, 565 struct rte_flow_error *error) 566 { 567 uint32_t priority_max = 568 ((struct priv *)dev->data->dev_private)->config.flow_prio - 1; 569 570 if (attributes->group) 571 return rte_flow_error_set(error, ENOTSUP, 572 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 573 NULL, 574 "groups is not supported"); 575 if (attributes->priority != MLX5_FLOW_PRIO_RSVD && 576 attributes->priority >= priority_max) 577 return rte_flow_error_set(error, ENOTSUP, 578 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 579 NULL, 580 "priority out of range"); 581 if (attributes->egress) 582 return rte_flow_error_set(error, ENOTSUP, 583 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, 584 NULL, 585 "egress is not supported"); 586 if (attributes->transfer) 587 return rte_flow_error_set(error, ENOTSUP, 588 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 589 NULL, 590 "transfer is not supported"); 591 if (!attributes->ingress) 592 return rte_flow_error_set(error, ENOTSUP, 593 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, 594 NULL, 595 "ingress attribute is mandatory"); 596 flow->attributes = *attributes; 597 if (attributes->priority == MLX5_FLOW_PRIO_RSVD) 598 flow->attributes.priority = priority_max; 599 return 0; 600 } 601 602 /** 603 * Verify the @p item specifications (spec, last, mask) are compatible with the 604 * NIC capabilities. 605 * 606 * @param[in] item 607 * Item specification. 608 * @param[in] mask 609 * @p item->mask or flow default bit-masks. 610 * @param[in] nic_mask 611 * Bit-masks covering supported fields by the NIC to compare with user mask. 612 * @param[in] size 613 * Bit-masks size in bytes. 614 * @param[out] error 615 * Pointer to error structure. 616 * 617 * @return 618 * 0 on success, a negative errno value otherwise and rte_errno is set. 619 */ 620 static int 621 mlx5_flow_item_acceptable(const struct rte_flow_item *item, 622 const uint8_t *mask, 623 const uint8_t *nic_mask, 624 unsigned int size, 625 struct rte_flow_error *error) 626 { 627 unsigned int i; 628 629 assert(nic_mask); 630 for (i = 0; i < size; ++i) 631 if ((nic_mask[i] | mask[i]) != nic_mask[i]) 632 return rte_flow_error_set(error, ENOTSUP, 633 RTE_FLOW_ERROR_TYPE_ITEM, 634 item, 635 "mask enables non supported" 636 " bits"); 637 if (!item->spec && (item->mask || item->last)) 638 return rte_flow_error_set(error, EINVAL, 639 RTE_FLOW_ERROR_TYPE_ITEM, 640 item, 641 "mask/last without a spec is not" 642 " supported"); 643 if (item->spec && item->last) { 644 uint8_t spec[size]; 645 uint8_t last[size]; 646 unsigned int i; 647 int ret; 648 649 for (i = 0; i < size; ++i) { 650 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i]; 651 last[i] = ((const uint8_t *)item->last)[i] & mask[i]; 652 } 653 ret = memcmp(spec, last, size); 654 if (ret != 0) 655 return rte_flow_error_set(error, ENOTSUP, 656 RTE_FLOW_ERROR_TYPE_ITEM, 657 item, 658 "range is not supported"); 659 } 660 return 0; 661 } 662 663 /** 664 * Add a verbs item specification into @p flow. 665 * 666 * @param[in, out] flow 667 * Pointer to flow structure. 668 * @param[in] src 669 * Create specification. 670 * @param[in] size 671 * Size in bytes of the specification to copy. 672 */ 673 static void 674 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size) 675 { 676 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 677 678 if (verbs->specs) { 679 void *dst; 680 681 dst = (void *)(verbs->specs + verbs->size); 682 memcpy(dst, src, size); 683 ++verbs->attr->num_of_specs; 684 } 685 verbs->size += size; 686 } 687 688 /** 689 * Adjust verbs hash fields according to the @p flow information. 690 * 691 * @param[in, out] flow. 692 * Pointer to flow structure. 693 * @param[in] tunnel 694 * 1 when the hash field is for a tunnel item. 695 * @param[in] layer_types 696 * ETH_RSS_* types. 697 * @param[in] hash_fields 698 * Item hash fields. 699 */ 700 static void 701 mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow, 702 int tunnel __rte_unused, 703 uint32_t layer_types, uint64_t hash_fields) 704 { 705 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 706 hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0); 707 if (flow->rss.level == 2 && !tunnel) 708 hash_fields = 0; 709 else if (flow->rss.level < 2 && tunnel) 710 hash_fields = 0; 711 #endif 712 if (!(flow->rss.types & layer_types)) 713 hash_fields = 0; 714 flow->cur_verbs->hash_fields |= hash_fields; 715 } 716 717 /** 718 * Convert the @p item into a Verbs specification after ensuring the NIC 719 * will understand and process it correctly. 720 * If the necessary size for the conversion is greater than the @p flow_size, 721 * nothing is written in @p flow, the validation is still performed. 722 * 723 * @param[in] item 724 * Item specification. 725 * @param[in, out] flow 726 * Pointer to flow structure. 727 * @param[in] flow_size 728 * Size in bytes of the available space in @p flow, if too small, nothing is 729 * written. 730 * @param[out] error 731 * Pointer to error structure. 732 * 733 * @return 734 * On success the number of bytes consumed/necessary, if the returned value 735 * is lesser or equal to @p flow_size, the @p item has fully been converted, 736 * otherwise another call with this returned memory size should be done. 737 * On error, a negative errno value is returned and rte_errno is set. 738 */ 739 static int 740 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow, 741 const size_t flow_size, struct rte_flow_error *error) 742 { 743 const struct rte_flow_item_eth *spec = item->spec; 744 const struct rte_flow_item_eth *mask = item->mask; 745 const struct rte_flow_item_eth nic_mask = { 746 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 747 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", 748 .type = RTE_BE16(0xffff), 749 }; 750 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 751 const unsigned int size = sizeof(struct ibv_flow_spec_eth); 752 struct ibv_flow_spec_eth eth = { 753 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 754 .size = size, 755 }; 756 int ret; 757 758 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 759 MLX5_FLOW_LAYER_OUTER_L2)) 760 return rte_flow_error_set(error, ENOTSUP, 761 RTE_FLOW_ERROR_TYPE_ITEM, 762 item, 763 "L2 layers already configured"); 764 if (!mask) 765 mask = &rte_flow_item_eth_mask; 766 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 767 (const uint8_t *)&nic_mask, 768 sizeof(struct rte_flow_item_eth), 769 error); 770 if (ret) 771 return ret; 772 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 773 MLX5_FLOW_LAYER_OUTER_L2; 774 if (size > flow_size) 775 return size; 776 if (spec) { 777 unsigned int i; 778 779 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN); 780 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN); 781 eth.val.ether_type = spec->type; 782 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN); 783 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN); 784 eth.mask.ether_type = mask->type; 785 /* Remove unwanted bits from values. */ 786 for (i = 0; i < ETHER_ADDR_LEN; ++i) { 787 eth.val.dst_mac[i] &= eth.mask.dst_mac[i]; 788 eth.val.src_mac[i] &= eth.mask.src_mac[i]; 789 } 790 eth.val.ether_type &= eth.mask.ether_type; 791 } 792 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 793 mlx5_flow_spec_verbs_add(flow, ð, size); 794 return size; 795 } 796 797 /** 798 * Update the VLAN tag in the Verbs Ethernet specification. 799 * 800 * @param[in, out] attr 801 * Pointer to Verbs attributes structure. 802 * @param[in] eth 803 * Verbs structure containing the VLAN information to copy. 804 */ 805 static void 806 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr, 807 struct ibv_flow_spec_eth *eth) 808 { 809 unsigned int i; 810 const enum ibv_flow_spec_type search = eth->type; 811 struct ibv_spec_header *hdr = (struct ibv_spec_header *) 812 ((uint8_t *)attr + sizeof(struct ibv_flow_attr)); 813 814 for (i = 0; i != attr->num_of_specs; ++i) { 815 if (hdr->type == search) { 816 struct ibv_flow_spec_eth *e = 817 (struct ibv_flow_spec_eth *)hdr; 818 819 e->val.vlan_tag = eth->val.vlan_tag; 820 e->mask.vlan_tag = eth->mask.vlan_tag; 821 e->val.ether_type = eth->val.ether_type; 822 e->mask.ether_type = eth->mask.ether_type; 823 break; 824 } 825 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size); 826 } 827 } 828 829 /** 830 * Convert the @p item into @p flow (or by updating the already present 831 * Ethernet Verbs) specification after ensuring the NIC will understand and 832 * process it correctly. 833 * If the necessary size for the conversion is greater than the @p flow_size, 834 * nothing is written in @p flow, the validation is still performed. 835 * 836 * @param[in] item 837 * Item specification. 838 * @param[in, out] flow 839 * Pointer to flow structure. 840 * @param[in] flow_size 841 * Size in bytes of the available space in @p flow, if too small, nothing is 842 * written. 843 * @param[out] error 844 * Pointer to error structure. 845 * 846 * @return 847 * On success the number of bytes consumed/necessary, if the returned value 848 * is lesser or equal to @p flow_size, the @p item has fully been converted, 849 * otherwise another call with this returned memory size should be done. 850 * On error, a negative errno value is returned and rte_errno is set. 851 */ 852 static int 853 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow, 854 const size_t flow_size, struct rte_flow_error *error) 855 { 856 const struct rte_flow_item_vlan *spec = item->spec; 857 const struct rte_flow_item_vlan *mask = item->mask; 858 const struct rte_flow_item_vlan nic_mask = { 859 .tci = RTE_BE16(0x0fff), 860 .inner_type = RTE_BE16(0xffff), 861 }; 862 unsigned int size = sizeof(struct ibv_flow_spec_eth); 863 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 864 struct ibv_flow_spec_eth eth = { 865 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 866 .size = size, 867 }; 868 int ret; 869 const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | 870 MLX5_FLOW_LAYER_INNER_L4) : 871 (MLX5_FLOW_LAYER_OUTER_L3 | MLX5_FLOW_LAYER_OUTER_L4); 872 const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : 873 MLX5_FLOW_LAYER_OUTER_VLAN; 874 const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 875 MLX5_FLOW_LAYER_OUTER_L2; 876 877 if (flow->layers & vlanm) 878 return rte_flow_error_set(error, ENOTSUP, 879 RTE_FLOW_ERROR_TYPE_ITEM, 880 item, 881 "VLAN layer already configured"); 882 else if ((flow->layers & l34m) != 0) 883 return rte_flow_error_set(error, ENOTSUP, 884 RTE_FLOW_ERROR_TYPE_ITEM, 885 item, 886 "L2 layer cannot follow L3/L4 layer"); 887 if (!mask) 888 mask = &rte_flow_item_vlan_mask; 889 ret = mlx5_flow_item_acceptable 890 (item, (const uint8_t *)mask, 891 (const uint8_t *)&nic_mask, 892 sizeof(struct rte_flow_item_vlan), error); 893 if (ret) 894 return ret; 895 if (spec) { 896 eth.val.vlan_tag = spec->tci; 897 eth.mask.vlan_tag = mask->tci; 898 eth.val.vlan_tag &= eth.mask.vlan_tag; 899 eth.val.ether_type = spec->inner_type; 900 eth.mask.ether_type = mask->inner_type; 901 eth.val.ether_type &= eth.mask.ether_type; 902 } 903 /* 904 * From verbs perspective an empty VLAN is equivalent 905 * to a packet without VLAN layer. 906 */ 907 if (!eth.mask.vlan_tag) 908 return rte_flow_error_set(error, EINVAL, 909 RTE_FLOW_ERROR_TYPE_ITEM_SPEC, 910 item->spec, 911 "VLAN cannot be empty"); 912 if (!(flow->layers & l2m)) { 913 if (size <= flow_size) { 914 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 915 mlx5_flow_spec_verbs_add(flow, ð, size); 916 } 917 } else { 918 if (flow->cur_verbs) 919 mlx5_flow_item_vlan_update(flow->cur_verbs->attr, 920 ð); 921 size = 0; /* Only an update is done in eth specification. */ 922 } 923 flow->layers |= tunnel ? 924 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) : 925 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN); 926 return size; 927 } 928 929 /** 930 * Convert the @p item into a Verbs specification after ensuring the NIC 931 * will understand and process it correctly. 932 * If the necessary size for the conversion is greater than the @p flow_size, 933 * nothing is written in @p flow, the validation is still performed. 934 * 935 * @param[in] item 936 * Item specification. 937 * @param[in, out] flow 938 * Pointer to flow structure. 939 * @param[in] flow_size 940 * Size in bytes of the available space in @p flow, if too small, nothing is 941 * written. 942 * @param[out] error 943 * Pointer to error structure. 944 * 945 * @return 946 * On success the number of bytes consumed/necessary, if the returned value 947 * is lesser or equal to @p flow_size, the @p item has fully been converted, 948 * otherwise another call with this returned memory size should be done. 949 * On error, a negative errno value is returned and rte_errno is set. 950 */ 951 static int 952 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow, 953 const size_t flow_size, struct rte_flow_error *error) 954 { 955 const struct rte_flow_item_ipv4 *spec = item->spec; 956 const struct rte_flow_item_ipv4 *mask = item->mask; 957 const struct rte_flow_item_ipv4 nic_mask = { 958 .hdr = { 959 .src_addr = RTE_BE32(0xffffffff), 960 .dst_addr = RTE_BE32(0xffffffff), 961 .type_of_service = 0xff, 962 .next_proto_id = 0xff, 963 }, 964 }; 965 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 966 unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext); 967 struct ibv_flow_spec_ipv4_ext ipv4 = { 968 .type = IBV_FLOW_SPEC_IPV4_EXT | 969 (tunnel ? IBV_FLOW_SPEC_INNER : 0), 970 .size = size, 971 }; 972 int ret; 973 974 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 975 MLX5_FLOW_LAYER_OUTER_L3)) 976 return rte_flow_error_set(error, ENOTSUP, 977 RTE_FLOW_ERROR_TYPE_ITEM, 978 item, 979 "multiple L3 layers not supported"); 980 else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 981 MLX5_FLOW_LAYER_OUTER_L4)) 982 return rte_flow_error_set(error, ENOTSUP, 983 RTE_FLOW_ERROR_TYPE_ITEM, 984 item, 985 "L3 cannot follow an L4 layer."); 986 if (!mask) 987 mask = &rte_flow_item_ipv4_mask; 988 ret = mlx5_flow_item_acceptable 989 (item, (const uint8_t *)mask, 990 (const uint8_t *)&nic_mask, 991 sizeof(struct rte_flow_item_ipv4), error); 992 if (ret < 0) 993 return ret; 994 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : 995 MLX5_FLOW_LAYER_OUTER_L3_IPV4; 996 if (spec) { 997 ipv4.val = (struct ibv_flow_ipv4_ext_filter){ 998 .src_ip = spec->hdr.src_addr, 999 .dst_ip = spec->hdr.dst_addr, 1000 .proto = spec->hdr.next_proto_id, 1001 .tos = spec->hdr.type_of_service, 1002 }; 1003 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){ 1004 .src_ip = mask->hdr.src_addr, 1005 .dst_ip = mask->hdr.dst_addr, 1006 .proto = mask->hdr.next_proto_id, 1007 .tos = mask->hdr.type_of_service, 1008 }; 1009 /* Remove unwanted bits from values. */ 1010 ipv4.val.src_ip &= ipv4.mask.src_ip; 1011 ipv4.val.dst_ip &= ipv4.mask.dst_ip; 1012 ipv4.val.proto &= ipv4.mask.proto; 1013 ipv4.val.tos &= ipv4.mask.tos; 1014 } 1015 flow->l3_protocol_en = !!ipv4.mask.proto; 1016 flow->l3_protocol = ipv4.val.proto; 1017 if (size <= flow_size) { 1018 mlx5_flow_verbs_hashfields_adjust 1019 (flow, tunnel, 1020 (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 1021 ETH_RSS_NONFRAG_IPV4_OTHER), 1022 (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4)); 1023 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3; 1024 mlx5_flow_spec_verbs_add(flow, &ipv4, size); 1025 } 1026 return size; 1027 } 1028 1029 /** 1030 * Convert the @p item into a Verbs specification after ensuring the NIC 1031 * will understand and process it correctly. 1032 * If the necessary size for the conversion is greater than the @p flow_size, 1033 * nothing is written in @p flow, the validation is still performed. 1034 * 1035 * @param[in] item 1036 * Item specification. 1037 * @param[in, out] flow 1038 * Pointer to flow structure. 1039 * @param[in] flow_size 1040 * Size in bytes of the available space in @p flow, if too small, nothing is 1041 * written. 1042 * @param[out] error 1043 * Pointer to error structure. 1044 * 1045 * @return 1046 * On success the number of bytes consumed/necessary, if the returned value 1047 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1048 * otherwise another call with this returned memory size should be done. 1049 * On error, a negative errno value is returned and rte_errno is set. 1050 */ 1051 static int 1052 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow, 1053 const size_t flow_size, struct rte_flow_error *error) 1054 { 1055 const struct rte_flow_item_ipv6 *spec = item->spec; 1056 const struct rte_flow_item_ipv6 *mask = item->mask; 1057 const struct rte_flow_item_ipv6 nic_mask = { 1058 .hdr = { 1059 .src_addr = 1060 "\xff\xff\xff\xff\xff\xff\xff\xff" 1061 "\xff\xff\xff\xff\xff\xff\xff\xff", 1062 .dst_addr = 1063 "\xff\xff\xff\xff\xff\xff\xff\xff" 1064 "\xff\xff\xff\xff\xff\xff\xff\xff", 1065 .vtc_flow = RTE_BE32(0xffffffff), 1066 .proto = 0xff, 1067 .hop_limits = 0xff, 1068 }, 1069 }; 1070 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 1071 unsigned int size = sizeof(struct ibv_flow_spec_ipv6); 1072 struct ibv_flow_spec_ipv6 ipv6 = { 1073 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 1074 .size = size, 1075 }; 1076 int ret; 1077 1078 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1079 MLX5_FLOW_LAYER_OUTER_L3)) 1080 return rte_flow_error_set(error, ENOTSUP, 1081 RTE_FLOW_ERROR_TYPE_ITEM, 1082 item, 1083 "multiple L3 layers not supported"); 1084 else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1085 MLX5_FLOW_LAYER_OUTER_L4)) 1086 return rte_flow_error_set(error, ENOTSUP, 1087 RTE_FLOW_ERROR_TYPE_ITEM, 1088 item, 1089 "L3 cannot follow an L4 layer."); 1090 /* 1091 * IPv6 is not recognised by the NIC inside a GRE tunnel. 1092 * Such support has to be disabled as the rule will be 1093 * accepted. Issue reproduced with Mellanox OFED 4.3-3.0.2.1 and 1094 * Mellanox OFED 4.4-1.0.0.0. 1095 */ 1096 if (tunnel && flow->layers & MLX5_FLOW_LAYER_GRE) 1097 return rte_flow_error_set(error, ENOTSUP, 1098 RTE_FLOW_ERROR_TYPE_ITEM, 1099 item, 1100 "IPv6 inside a GRE tunnel is" 1101 " not recognised."); 1102 if (!mask) 1103 mask = &rte_flow_item_ipv6_mask; 1104 ret = mlx5_flow_item_acceptable 1105 (item, (const uint8_t *)mask, 1106 (const uint8_t *)&nic_mask, 1107 sizeof(struct rte_flow_item_ipv6), error); 1108 if (ret < 0) 1109 return ret; 1110 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : 1111 MLX5_FLOW_LAYER_OUTER_L3_IPV6; 1112 if (spec) { 1113 unsigned int i; 1114 uint32_t vtc_flow_val; 1115 uint32_t vtc_flow_mask; 1116 1117 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr, 1118 RTE_DIM(ipv6.val.src_ip)); 1119 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr, 1120 RTE_DIM(ipv6.val.dst_ip)); 1121 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr, 1122 RTE_DIM(ipv6.mask.src_ip)); 1123 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr, 1124 RTE_DIM(ipv6.mask.dst_ip)); 1125 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow); 1126 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow); 1127 ipv6.val.flow_label = 1128 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >> 1129 IPV6_HDR_FL_SHIFT); 1130 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >> 1131 IPV6_HDR_TC_SHIFT; 1132 ipv6.val.next_hdr = spec->hdr.proto; 1133 ipv6.val.hop_limit = spec->hdr.hop_limits; 1134 ipv6.mask.flow_label = 1135 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >> 1136 IPV6_HDR_FL_SHIFT); 1137 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >> 1138 IPV6_HDR_TC_SHIFT; 1139 ipv6.mask.next_hdr = mask->hdr.proto; 1140 ipv6.mask.hop_limit = mask->hdr.hop_limits; 1141 /* Remove unwanted bits from values. */ 1142 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) { 1143 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i]; 1144 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i]; 1145 } 1146 ipv6.val.flow_label &= ipv6.mask.flow_label; 1147 ipv6.val.traffic_class &= ipv6.mask.traffic_class; 1148 ipv6.val.next_hdr &= ipv6.mask.next_hdr; 1149 ipv6.val.hop_limit &= ipv6.mask.hop_limit; 1150 } 1151 flow->l3_protocol_en = !!ipv6.mask.next_hdr; 1152 flow->l3_protocol = ipv6.val.next_hdr; 1153 if (size <= flow_size) { 1154 mlx5_flow_verbs_hashfields_adjust 1155 (flow, tunnel, 1156 (ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER), 1157 (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6)); 1158 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3; 1159 mlx5_flow_spec_verbs_add(flow, &ipv6, size); 1160 } 1161 return size; 1162 } 1163 1164 /** 1165 * Convert the @p item into a Verbs specification after ensuring the NIC 1166 * will understand and process it correctly. 1167 * If the necessary size for the conversion is greater than the @p flow_size, 1168 * nothing is written in @p flow, the validation is still performed. 1169 * 1170 * @param[in] item 1171 * Item specification. 1172 * @param[in, out] flow 1173 * Pointer to flow structure. 1174 * @param[in] flow_size 1175 * Size in bytes of the available space in @p flow, if too small, nothing is 1176 * written. 1177 * @param[out] error 1178 * Pointer to error structure. 1179 * 1180 * @return 1181 * On success the number of bytes consumed/necessary, if the returned value 1182 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1183 * otherwise another call with this returned memory size should be done. 1184 * On error, a negative errno value is returned and rte_errno is set. 1185 */ 1186 static int 1187 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow, 1188 const size_t flow_size, struct rte_flow_error *error) 1189 { 1190 const struct rte_flow_item_udp *spec = item->spec; 1191 const struct rte_flow_item_udp *mask = item->mask; 1192 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 1193 unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp); 1194 struct ibv_flow_spec_tcp_udp udp = { 1195 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 1196 .size = size, 1197 }; 1198 int ret; 1199 1200 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP) 1201 return rte_flow_error_set(error, ENOTSUP, 1202 RTE_FLOW_ERROR_TYPE_ITEM, 1203 item, 1204 "protocol filtering not compatible" 1205 " with UDP layer"); 1206 if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1207 MLX5_FLOW_LAYER_OUTER_L3))) 1208 return rte_flow_error_set(error, ENOTSUP, 1209 RTE_FLOW_ERROR_TYPE_ITEM, 1210 item, 1211 "L3 is mandatory to filter" 1212 " on L4"); 1213 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1214 MLX5_FLOW_LAYER_OUTER_L4)) 1215 return rte_flow_error_set(error, ENOTSUP, 1216 RTE_FLOW_ERROR_TYPE_ITEM, 1217 item, 1218 "L4 layer is already" 1219 " present"); 1220 if (!mask) 1221 mask = &rte_flow_item_udp_mask; 1222 ret = mlx5_flow_item_acceptable 1223 (item, (const uint8_t *)mask, 1224 (const uint8_t *)&rte_flow_item_udp_mask, 1225 sizeof(struct rte_flow_item_udp), error); 1226 if (ret < 0) 1227 return ret; 1228 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP : 1229 MLX5_FLOW_LAYER_OUTER_L4_UDP; 1230 if (spec) { 1231 udp.val.dst_port = spec->hdr.dst_port; 1232 udp.val.src_port = spec->hdr.src_port; 1233 udp.mask.dst_port = mask->hdr.dst_port; 1234 udp.mask.src_port = mask->hdr.src_port; 1235 /* Remove unwanted bits from values. */ 1236 udp.val.src_port &= udp.mask.src_port; 1237 udp.val.dst_port &= udp.mask.dst_port; 1238 } 1239 if (size <= flow_size) { 1240 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_UDP, 1241 (IBV_RX_HASH_SRC_PORT_UDP | 1242 IBV_RX_HASH_DST_PORT_UDP)); 1243 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4; 1244 mlx5_flow_spec_verbs_add(flow, &udp, size); 1245 } 1246 return size; 1247 } 1248 1249 /** 1250 * Convert the @p item into a Verbs specification after ensuring the NIC 1251 * will understand and process it correctly. 1252 * If the necessary size for the conversion is greater than the @p flow_size, 1253 * nothing is written in @p flow, the validation is still performed. 1254 * 1255 * @param[in] item 1256 * Item specification. 1257 * @param[in, out] flow 1258 * Pointer to flow structure. 1259 * @param[in] flow_size 1260 * Size in bytes of the available space in @p flow, if too small, nothing is 1261 * written. 1262 * @param[out] error 1263 * Pointer to error structure. 1264 * 1265 * @return 1266 * On success the number of bytes consumed/necessary, if the returned value 1267 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1268 * otherwise another call with this returned memory size should be done. 1269 * On error, a negative errno value is returned and rte_errno is set. 1270 */ 1271 static int 1272 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow, 1273 const size_t flow_size, struct rte_flow_error *error) 1274 { 1275 const struct rte_flow_item_tcp *spec = item->spec; 1276 const struct rte_flow_item_tcp *mask = item->mask; 1277 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 1278 unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp); 1279 struct ibv_flow_spec_tcp_udp tcp = { 1280 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 1281 .size = size, 1282 }; 1283 int ret; 1284 1285 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP) 1286 return rte_flow_error_set(error, ENOTSUP, 1287 RTE_FLOW_ERROR_TYPE_ITEM, 1288 item, 1289 "protocol filtering not compatible" 1290 " with TCP layer"); 1291 if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1292 MLX5_FLOW_LAYER_OUTER_L3))) 1293 return rte_flow_error_set(error, ENOTSUP, 1294 RTE_FLOW_ERROR_TYPE_ITEM, 1295 item, 1296 "L3 is mandatory to filter on L4"); 1297 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1298 MLX5_FLOW_LAYER_OUTER_L4)) 1299 return rte_flow_error_set(error, ENOTSUP, 1300 RTE_FLOW_ERROR_TYPE_ITEM, 1301 item, 1302 "L4 layer is already present"); 1303 if (!mask) 1304 mask = &rte_flow_item_tcp_mask; 1305 ret = mlx5_flow_item_acceptable 1306 (item, (const uint8_t *)mask, 1307 (const uint8_t *)&rte_flow_item_tcp_mask, 1308 sizeof(struct rte_flow_item_tcp), error); 1309 if (ret < 0) 1310 return ret; 1311 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP : 1312 MLX5_FLOW_LAYER_OUTER_L4_TCP; 1313 if (spec) { 1314 tcp.val.dst_port = spec->hdr.dst_port; 1315 tcp.val.src_port = spec->hdr.src_port; 1316 tcp.mask.dst_port = mask->hdr.dst_port; 1317 tcp.mask.src_port = mask->hdr.src_port; 1318 /* Remove unwanted bits from values. */ 1319 tcp.val.src_port &= tcp.mask.src_port; 1320 tcp.val.dst_port &= tcp.mask.dst_port; 1321 } 1322 if (size <= flow_size) { 1323 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_TCP, 1324 (IBV_RX_HASH_SRC_PORT_TCP | 1325 IBV_RX_HASH_DST_PORT_TCP)); 1326 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4; 1327 mlx5_flow_spec_verbs_add(flow, &tcp, size); 1328 } 1329 return size; 1330 } 1331 1332 /** 1333 * Convert the @p item into a Verbs specification after ensuring the NIC 1334 * will understand and process it correctly. 1335 * If the necessary size for the conversion is greater than the @p flow_size, 1336 * nothing is written in @p flow, the validation is still performed. 1337 * 1338 * @param[in] item 1339 * Item specification. 1340 * @param[in, out] flow 1341 * Pointer to flow structure. 1342 * @param[in] flow_size 1343 * Size in bytes of the available space in @p flow, if too small, nothing is 1344 * written. 1345 * @param[out] error 1346 * Pointer to error structure. 1347 * 1348 * @return 1349 * On success the number of bytes consumed/necessary, if the returned value 1350 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1351 * otherwise another call with this returned memory size should be done. 1352 * On error, a negative errno value is returned and rte_errno is set. 1353 */ 1354 static int 1355 mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow, 1356 const size_t flow_size, struct rte_flow_error *error) 1357 { 1358 const struct rte_flow_item_vxlan *spec = item->spec; 1359 const struct rte_flow_item_vxlan *mask = item->mask; 1360 unsigned int size = sizeof(struct ibv_flow_spec_tunnel); 1361 struct ibv_flow_spec_tunnel vxlan = { 1362 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL, 1363 .size = size, 1364 }; 1365 int ret; 1366 union vni { 1367 uint32_t vlan_id; 1368 uint8_t vni[4]; 1369 } id = { .vlan_id = 0, }; 1370 1371 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL) 1372 return rte_flow_error_set(error, ENOTSUP, 1373 RTE_FLOW_ERROR_TYPE_ITEM, 1374 item, 1375 "a tunnel is already present"); 1376 /* 1377 * Verify only UDPv4 is present as defined in 1378 * https://tools.ietf.org/html/rfc7348 1379 */ 1380 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 1381 return rte_flow_error_set(error, ENOTSUP, 1382 RTE_FLOW_ERROR_TYPE_ITEM, 1383 item, 1384 "no outer UDP layer found"); 1385 if (!mask) 1386 mask = &rte_flow_item_vxlan_mask; 1387 ret = mlx5_flow_item_acceptable 1388 (item, (const uint8_t *)mask, 1389 (const uint8_t *)&rte_flow_item_vxlan_mask, 1390 sizeof(struct rte_flow_item_vxlan), error); 1391 if (ret < 0) 1392 return ret; 1393 if (spec) { 1394 memcpy(&id.vni[1], spec->vni, 3); 1395 vxlan.val.tunnel_id = id.vlan_id; 1396 memcpy(&id.vni[1], mask->vni, 3); 1397 vxlan.mask.tunnel_id = id.vlan_id; 1398 /* Remove unwanted bits from values. */ 1399 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id; 1400 } 1401 /* 1402 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if 1403 * only this layer is defined in the Verbs specification it is 1404 * interpreted as wildcard and all packets will match this 1405 * rule, if it follows a full stack layer (ex: eth / ipv4 / 1406 * udp), all packets matching the layers before will also 1407 * match this rule. To avoid such situation, VNI 0 is 1408 * currently refused. 1409 */ 1410 if (!vxlan.val.tunnel_id) 1411 return rte_flow_error_set(error, EINVAL, 1412 RTE_FLOW_ERROR_TYPE_ITEM, 1413 item, 1414 "VXLAN vni cannot be 0"); 1415 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER)) 1416 return rte_flow_error_set(error, EINVAL, 1417 RTE_FLOW_ERROR_TYPE_ITEM, 1418 item, 1419 "VXLAN tunnel must be fully defined"); 1420 if (size <= flow_size) { 1421 mlx5_flow_spec_verbs_add(flow, &vxlan, size); 1422 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1423 } 1424 flow->layers |= MLX5_FLOW_LAYER_VXLAN; 1425 return size; 1426 } 1427 1428 /** 1429 * Convert the @p item into a Verbs specification after ensuring the NIC 1430 * will understand and process it correctly. 1431 * If the necessary size for the conversion is greater than the @p flow_size, 1432 * nothing is written in @p flow, the validation is still performed. 1433 * 1434 * @param dev 1435 * Pointer to Ethernet device. 1436 * @param[in] item 1437 * Item specification. 1438 * @param[in, out] flow 1439 * Pointer to flow structure. 1440 * @param[in] flow_size 1441 * Size in bytes of the available space in @p flow, if too small, nothing is 1442 * written. 1443 * @param[out] error 1444 * Pointer to error structure. 1445 * 1446 * @return 1447 * On success the number of bytes consumed/necessary, if the returned value 1448 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1449 * otherwise another call with this returned memory size should be done. 1450 * On error, a negative errno value is returned and rte_errno is set. 1451 */ 1452 static int 1453 mlx5_flow_item_vxlan_gpe(struct rte_eth_dev *dev, 1454 const struct rte_flow_item *item, 1455 struct rte_flow *flow, const size_t flow_size, 1456 struct rte_flow_error *error) 1457 { 1458 const struct rte_flow_item_vxlan_gpe *spec = item->spec; 1459 const struct rte_flow_item_vxlan_gpe *mask = item->mask; 1460 unsigned int size = sizeof(struct ibv_flow_spec_tunnel); 1461 struct ibv_flow_spec_tunnel vxlan_gpe = { 1462 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL, 1463 .size = size, 1464 }; 1465 int ret; 1466 union vni { 1467 uint32_t vlan_id; 1468 uint8_t vni[4]; 1469 } id = { .vlan_id = 0, }; 1470 1471 if (!((struct priv *)dev->data->dev_private)->config.l3_vxlan_en) 1472 return rte_flow_error_set(error, ENOTSUP, 1473 RTE_FLOW_ERROR_TYPE_ITEM, 1474 item, 1475 "L3 VXLAN is not enabled by device" 1476 " parameter and/or not configured in" 1477 " firmware"); 1478 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL) 1479 return rte_flow_error_set(error, ENOTSUP, 1480 RTE_FLOW_ERROR_TYPE_ITEM, 1481 item, 1482 "a tunnel is already present"); 1483 /* 1484 * Verify only UDPv4 is present as defined in 1485 * https://tools.ietf.org/html/rfc7348 1486 */ 1487 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 1488 return rte_flow_error_set(error, ENOTSUP, 1489 RTE_FLOW_ERROR_TYPE_ITEM, 1490 item, 1491 "no outer UDP layer found"); 1492 if (!mask) 1493 mask = &rte_flow_item_vxlan_gpe_mask; 1494 ret = mlx5_flow_item_acceptable 1495 (item, (const uint8_t *)mask, 1496 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask, 1497 sizeof(struct rte_flow_item_vxlan_gpe), error); 1498 if (ret < 0) 1499 return ret; 1500 if (spec) { 1501 memcpy(&id.vni[1], spec->vni, 3); 1502 vxlan_gpe.val.tunnel_id = id.vlan_id; 1503 memcpy(&id.vni[1], mask->vni, 3); 1504 vxlan_gpe.mask.tunnel_id = id.vlan_id; 1505 if (spec->protocol) 1506 return rte_flow_error_set 1507 (error, EINVAL, 1508 RTE_FLOW_ERROR_TYPE_ITEM, 1509 item, 1510 "VxLAN-GPE protocol not supported"); 1511 /* Remove unwanted bits from values. */ 1512 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id; 1513 } 1514 /* 1515 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this 1516 * layer is defined in the Verbs specification it is interpreted as 1517 * wildcard and all packets will match this rule, if it follows a full 1518 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers 1519 * before will also match this rule. To avoid such situation, VNI 0 1520 * is currently refused. 1521 */ 1522 if (!vxlan_gpe.val.tunnel_id) 1523 return rte_flow_error_set(error, EINVAL, 1524 RTE_FLOW_ERROR_TYPE_ITEM, 1525 item, 1526 "VXLAN-GPE vni cannot be 0"); 1527 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER)) 1528 return rte_flow_error_set(error, EINVAL, 1529 RTE_FLOW_ERROR_TYPE_ITEM, 1530 item, 1531 "VXLAN-GPE tunnel must be fully" 1532 " defined"); 1533 if (size <= flow_size) { 1534 mlx5_flow_spec_verbs_add(flow, &vxlan_gpe, size); 1535 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1536 } 1537 flow->layers |= MLX5_FLOW_LAYER_VXLAN_GPE; 1538 return size; 1539 } 1540 1541 /** 1542 * Update the protocol in Verbs IPv4/IPv6 spec. 1543 * 1544 * @param[in, out] attr 1545 * Pointer to Verbs attributes structure. 1546 * @param[in] search 1547 * Specification type to search in order to update the IP protocol. 1548 * @param[in] protocol 1549 * Protocol value to set if none is present in the specification. 1550 */ 1551 static void 1552 mlx5_flow_item_gre_ip_protocol_update(struct ibv_flow_attr *attr, 1553 enum ibv_flow_spec_type search, 1554 uint8_t protocol) 1555 { 1556 unsigned int i; 1557 struct ibv_spec_header *hdr = (struct ibv_spec_header *) 1558 ((uint8_t *)attr + sizeof(struct ibv_flow_attr)); 1559 1560 if (!attr) 1561 return; 1562 for (i = 0; i != attr->num_of_specs; ++i) { 1563 if (hdr->type == search) { 1564 union { 1565 struct ibv_flow_spec_ipv4_ext *ipv4; 1566 struct ibv_flow_spec_ipv6 *ipv6; 1567 } ip; 1568 1569 switch (search) { 1570 case IBV_FLOW_SPEC_IPV4_EXT: 1571 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr; 1572 if (!ip.ipv4->val.proto) { 1573 ip.ipv4->val.proto = protocol; 1574 ip.ipv4->mask.proto = 0xff; 1575 } 1576 break; 1577 case IBV_FLOW_SPEC_IPV6: 1578 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr; 1579 if (!ip.ipv6->val.next_hdr) { 1580 ip.ipv6->val.next_hdr = protocol; 1581 ip.ipv6->mask.next_hdr = 0xff; 1582 } 1583 break; 1584 default: 1585 break; 1586 } 1587 break; 1588 } 1589 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size); 1590 } 1591 } 1592 1593 /** 1594 * Convert the @p item into a Verbs specification after ensuring the NIC 1595 * will understand and process it correctly. 1596 * It will also update the previous L3 layer with the protocol value matching 1597 * the GRE. 1598 * If the necessary size for the conversion is greater than the @p flow_size, 1599 * nothing is written in @p flow, the validation is still performed. 1600 * 1601 * @param dev 1602 * Pointer to Ethernet device. 1603 * @param[in] item 1604 * Item specification. 1605 * @param[in, out] flow 1606 * Pointer to flow structure. 1607 * @param[in] flow_size 1608 * Size in bytes of the available space in @p flow, if too small, nothing is 1609 * written. 1610 * @param[out] error 1611 * Pointer to error structure. 1612 * 1613 * @return 1614 * On success the number of bytes consumed/necessary, if the returned value 1615 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1616 * otherwise another call with this returned memory size should be done. 1617 * On error, a negative errno value is returned and rte_errno is set. 1618 */ 1619 static int 1620 mlx5_flow_item_gre(const struct rte_flow_item *item, 1621 struct rte_flow *flow, const size_t flow_size, 1622 struct rte_flow_error *error) 1623 { 1624 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 1625 const struct rte_flow_item_gre *spec = item->spec; 1626 const struct rte_flow_item_gre *mask = item->mask; 1627 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 1628 unsigned int size = sizeof(struct ibv_flow_spec_gre); 1629 struct ibv_flow_spec_gre tunnel = { 1630 .type = IBV_FLOW_SPEC_GRE, 1631 .size = size, 1632 }; 1633 #else 1634 unsigned int size = sizeof(struct ibv_flow_spec_tunnel); 1635 struct ibv_flow_spec_tunnel tunnel = { 1636 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL, 1637 .size = size, 1638 }; 1639 #endif 1640 int ret; 1641 1642 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_GRE) 1643 return rte_flow_error_set(error, ENOTSUP, 1644 RTE_FLOW_ERROR_TYPE_ITEM, 1645 item, 1646 "protocol filtering not compatible" 1647 " with this GRE layer"); 1648 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL) 1649 return rte_flow_error_set(error, ENOTSUP, 1650 RTE_FLOW_ERROR_TYPE_ITEM, 1651 item, 1652 "a tunnel is already present"); 1653 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3)) 1654 return rte_flow_error_set(error, ENOTSUP, 1655 RTE_FLOW_ERROR_TYPE_ITEM, 1656 item, 1657 "L3 Layer is missing"); 1658 if (!mask) 1659 mask = &rte_flow_item_gre_mask; 1660 ret = mlx5_flow_item_acceptable 1661 (item, (const uint8_t *)mask, 1662 (const uint8_t *)&rte_flow_item_gre_mask, 1663 sizeof(struct rte_flow_item_gre), error); 1664 if (ret < 0) 1665 return ret; 1666 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 1667 if (spec) { 1668 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver; 1669 tunnel.val.protocol = spec->protocol; 1670 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver; 1671 tunnel.mask.protocol = mask->protocol; 1672 /* Remove unwanted bits from values. */ 1673 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver; 1674 tunnel.val.protocol &= tunnel.mask.protocol; 1675 tunnel.val.key &= tunnel.mask.key; 1676 } 1677 #else 1678 if (spec && (spec->protocol & mask->protocol)) 1679 return rte_flow_error_set(error, ENOTSUP, 1680 RTE_FLOW_ERROR_TYPE_ITEM, 1681 item, 1682 "without MPLS support the" 1683 " specification cannot be used for" 1684 " filtering"); 1685 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */ 1686 if (size <= flow_size) { 1687 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3_IPV4) 1688 mlx5_flow_item_gre_ip_protocol_update 1689 (verbs->attr, IBV_FLOW_SPEC_IPV4_EXT, 1690 MLX5_IP_PROTOCOL_GRE); 1691 else 1692 mlx5_flow_item_gre_ip_protocol_update 1693 (verbs->attr, IBV_FLOW_SPEC_IPV6, 1694 MLX5_IP_PROTOCOL_GRE); 1695 mlx5_flow_spec_verbs_add(flow, &tunnel, size); 1696 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1697 } 1698 flow->layers |= MLX5_FLOW_LAYER_GRE; 1699 return size; 1700 } 1701 1702 /** 1703 * Convert the @p item into a Verbs specification after ensuring the NIC 1704 * will understand and process it correctly. 1705 * If the necessary size for the conversion is greater than the @p flow_size, 1706 * nothing is written in @p flow, the validation is still performed. 1707 * 1708 * @param[in] item 1709 * Item specification. 1710 * @param[in, out] flow 1711 * Pointer to flow structure. 1712 * @param[in] flow_size 1713 * Size in bytes of the available space in @p flow, if too small, nothing is 1714 * written. 1715 * @param[out] error 1716 * Pointer to error structure. 1717 * 1718 * @return 1719 * On success the number of bytes consumed/necessary, if the returned value 1720 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1721 * otherwise another call with this returned memory size should be done. 1722 * On error, a negative errno value is returned and rte_errno is set. 1723 */ 1724 static int 1725 mlx5_flow_item_mpls(const struct rte_flow_item *item __rte_unused, 1726 struct rte_flow *flow __rte_unused, 1727 const size_t flow_size __rte_unused, 1728 struct rte_flow_error *error) 1729 { 1730 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 1731 const struct rte_flow_item_mpls *spec = item->spec; 1732 const struct rte_flow_item_mpls *mask = item->mask; 1733 unsigned int size = sizeof(struct ibv_flow_spec_mpls); 1734 struct ibv_flow_spec_mpls mpls = { 1735 .type = IBV_FLOW_SPEC_MPLS, 1736 .size = size, 1737 }; 1738 int ret; 1739 1740 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_MPLS) 1741 return rte_flow_error_set(error, ENOTSUP, 1742 RTE_FLOW_ERROR_TYPE_ITEM, 1743 item, 1744 "protocol filtering not compatible" 1745 " with MPLS layer"); 1746 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL) 1747 return rte_flow_error_set(error, ENOTSUP, 1748 RTE_FLOW_ERROR_TYPE_ITEM, 1749 item, 1750 "a tunnel is already" 1751 " present"); 1752 if (!mask) 1753 mask = &rte_flow_item_mpls_mask; 1754 ret = mlx5_flow_item_acceptable 1755 (item, (const uint8_t *)mask, 1756 (const uint8_t *)&rte_flow_item_mpls_mask, 1757 sizeof(struct rte_flow_item_mpls), error); 1758 if (ret < 0) 1759 return ret; 1760 if (spec) { 1761 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label)); 1762 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label)); 1763 /* Remove unwanted bits from values. */ 1764 mpls.val.label &= mpls.mask.label; 1765 } 1766 if (size <= flow_size) { 1767 mlx5_flow_spec_verbs_add(flow, &mpls, size); 1768 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1769 } 1770 flow->layers |= MLX5_FLOW_LAYER_MPLS; 1771 return size; 1772 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */ 1773 return rte_flow_error_set(error, ENOTSUP, 1774 RTE_FLOW_ERROR_TYPE_ITEM, 1775 item, 1776 "MPLS is not supported by Verbs, please" 1777 " update."); 1778 } 1779 1780 /** 1781 * Convert the @p pattern into a Verbs specifications after ensuring the NIC 1782 * will understand and process it correctly. 1783 * The conversion is performed item per item, each of them is written into 1784 * the @p flow if its size is lesser or equal to @p flow_size. 1785 * Validation and memory consumption computation are still performed until the 1786 * end of @p pattern, unless an error is encountered. 1787 * 1788 * @param[in] pattern 1789 * Flow pattern. 1790 * @param[in, out] flow 1791 * Pointer to the rte_flow structure. 1792 * @param[in] flow_size 1793 * Size in bytes of the available space in @p flow, if too small some 1794 * garbage may be present. 1795 * @param[out] error 1796 * Pointer to error structure. 1797 * 1798 * @return 1799 * On success the number of bytes consumed/necessary, if the returned value 1800 * is lesser or equal to @p flow_size, the @pattern has fully been 1801 * converted, otherwise another call with this returned memory size should 1802 * be done. 1803 * On error, a negative errno value is returned and rte_errno is set. 1804 */ 1805 static int 1806 mlx5_flow_items(struct rte_eth_dev *dev, 1807 const struct rte_flow_item pattern[], 1808 struct rte_flow *flow, const size_t flow_size, 1809 struct rte_flow_error *error) 1810 { 1811 int remain = flow_size; 1812 size_t size = 0; 1813 1814 for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) { 1815 int ret = 0; 1816 1817 switch (pattern->type) { 1818 case RTE_FLOW_ITEM_TYPE_VOID: 1819 break; 1820 case RTE_FLOW_ITEM_TYPE_ETH: 1821 ret = mlx5_flow_item_eth(pattern, flow, remain, error); 1822 break; 1823 case RTE_FLOW_ITEM_TYPE_VLAN: 1824 ret = mlx5_flow_item_vlan(pattern, flow, remain, error); 1825 break; 1826 case RTE_FLOW_ITEM_TYPE_IPV4: 1827 ret = mlx5_flow_item_ipv4(pattern, flow, remain, error); 1828 break; 1829 case RTE_FLOW_ITEM_TYPE_IPV6: 1830 ret = mlx5_flow_item_ipv6(pattern, flow, remain, error); 1831 break; 1832 case RTE_FLOW_ITEM_TYPE_UDP: 1833 ret = mlx5_flow_item_udp(pattern, flow, remain, error); 1834 break; 1835 case RTE_FLOW_ITEM_TYPE_TCP: 1836 ret = mlx5_flow_item_tcp(pattern, flow, remain, error); 1837 break; 1838 case RTE_FLOW_ITEM_TYPE_VXLAN: 1839 ret = mlx5_flow_item_vxlan(pattern, flow, remain, 1840 error); 1841 break; 1842 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: 1843 ret = mlx5_flow_item_vxlan_gpe(dev, pattern, flow, 1844 remain, error); 1845 break; 1846 case RTE_FLOW_ITEM_TYPE_GRE: 1847 ret = mlx5_flow_item_gre(pattern, flow, remain, error); 1848 break; 1849 case RTE_FLOW_ITEM_TYPE_MPLS: 1850 ret = mlx5_flow_item_mpls(pattern, flow, remain, error); 1851 break; 1852 default: 1853 return rte_flow_error_set(error, ENOTSUP, 1854 RTE_FLOW_ERROR_TYPE_ITEM, 1855 pattern, 1856 "item not supported"); 1857 } 1858 if (ret < 0) 1859 return ret; 1860 if (remain > ret) 1861 remain -= ret; 1862 else 1863 remain = 0; 1864 size += ret; 1865 } 1866 if (!flow->layers) { 1867 const struct rte_flow_item item = { 1868 .type = RTE_FLOW_ITEM_TYPE_ETH, 1869 }; 1870 1871 return mlx5_flow_item_eth(&item, flow, flow_size, error); 1872 } 1873 return size; 1874 } 1875 1876 /** 1877 * Convert the @p action into a Verbs specification after ensuring the NIC 1878 * will understand and process it correctly. 1879 * If the necessary size for the conversion is greater than the @p flow_size, 1880 * nothing is written in @p flow, the validation is still performed. 1881 * 1882 * @param[in] action 1883 * Action configuration. 1884 * @param[in, out] flow 1885 * Pointer to flow structure. 1886 * @param[in] flow_size 1887 * Size in bytes of the available space in @p flow, if too small, nothing is 1888 * written. 1889 * @param[out] error 1890 * Pointer to error structure. 1891 * 1892 * @return 1893 * On success the number of bytes consumed/necessary, if the returned value 1894 * is lesser or equal to @p flow_size, the @p action has fully been 1895 * converted, otherwise another call with this returned memory size should 1896 * be done. 1897 * On error, a negative errno value is returned and rte_errno is set. 1898 */ 1899 static int 1900 mlx5_flow_action_drop(const struct rte_flow_action *action, 1901 struct rte_flow *flow, const size_t flow_size, 1902 struct rte_flow_error *error) 1903 { 1904 unsigned int size = sizeof(struct ibv_flow_spec_action_drop); 1905 struct ibv_flow_spec_action_drop drop = { 1906 .type = IBV_FLOW_SPEC_ACTION_DROP, 1907 .size = size, 1908 }; 1909 1910 if (flow->fate) 1911 return rte_flow_error_set(error, ENOTSUP, 1912 RTE_FLOW_ERROR_TYPE_ACTION, 1913 action, 1914 "multiple fate actions are not" 1915 " supported"); 1916 if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) 1917 return rte_flow_error_set(error, ENOTSUP, 1918 RTE_FLOW_ERROR_TYPE_ACTION, 1919 action, 1920 "drop is not compatible with" 1921 " flag/mark action"); 1922 if (size < flow_size) 1923 mlx5_flow_spec_verbs_add(flow, &drop, size); 1924 flow->fate |= MLX5_FLOW_FATE_DROP; 1925 return size; 1926 } 1927 1928 /** 1929 * Convert the @p action into @p flow after ensuring the NIC will understand 1930 * and process it correctly. 1931 * 1932 * @param[in] dev 1933 * Pointer to Ethernet device structure. 1934 * @param[in] action 1935 * Action configuration. 1936 * @param[in, out] flow 1937 * Pointer to flow structure. 1938 * @param[out] error 1939 * Pointer to error structure. 1940 * 1941 * @return 1942 * 0 on success, a negative errno value otherwise and rte_errno is set. 1943 */ 1944 static int 1945 mlx5_flow_action_queue(struct rte_eth_dev *dev, 1946 const struct rte_flow_action *action, 1947 struct rte_flow *flow, 1948 struct rte_flow_error *error) 1949 { 1950 struct priv *priv = dev->data->dev_private; 1951 const struct rte_flow_action_queue *queue = action->conf; 1952 1953 if (flow->fate) 1954 return rte_flow_error_set(error, ENOTSUP, 1955 RTE_FLOW_ERROR_TYPE_ACTION, 1956 action, 1957 "multiple fate actions are not" 1958 " supported"); 1959 if (queue->index >= priv->rxqs_n) 1960 return rte_flow_error_set(error, EINVAL, 1961 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1962 &queue->index, 1963 "queue index out of range"); 1964 if (!(*priv->rxqs)[queue->index]) 1965 return rte_flow_error_set(error, EINVAL, 1966 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1967 &queue->index, 1968 "queue is not configured"); 1969 if (flow->queue) 1970 (*flow->queue)[0] = queue->index; 1971 flow->rss.queue_num = 1; 1972 flow->fate |= MLX5_FLOW_FATE_QUEUE; 1973 return 0; 1974 } 1975 1976 /** 1977 * Ensure the @p action will be understood and used correctly by the NIC. 1978 * 1979 * @param dev 1980 * Pointer to Ethernet device structure. 1981 * @param action[in] 1982 * Pointer to flow actions array. 1983 * @param flow[in, out] 1984 * Pointer to the rte_flow structure. 1985 * @param error[in, out] 1986 * Pointer to error structure. 1987 * 1988 * @return 1989 * On success @p flow->queue array and @p flow->rss are filled and valid. 1990 * On error, a negative errno value is returned and rte_errno is set. 1991 */ 1992 static int 1993 mlx5_flow_action_rss(struct rte_eth_dev *dev, 1994 const struct rte_flow_action *action, 1995 struct rte_flow *flow, 1996 struct rte_flow_error *error) 1997 { 1998 struct priv *priv = dev->data->dev_private; 1999 const struct rte_flow_action_rss *rss = action->conf; 2000 unsigned int i; 2001 2002 if (flow->fate) 2003 return rte_flow_error_set(error, ENOTSUP, 2004 RTE_FLOW_ERROR_TYPE_ACTION, 2005 action, 2006 "multiple fate actions are not" 2007 " supported"); 2008 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT && 2009 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) 2010 return rte_flow_error_set(error, ENOTSUP, 2011 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2012 &rss->func, 2013 "RSS hash function not supported"); 2014 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 2015 if (rss->level > 2) 2016 #else 2017 if (rss->level > 1) 2018 #endif 2019 return rte_flow_error_set(error, ENOTSUP, 2020 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2021 &rss->level, 2022 "tunnel RSS is not supported"); 2023 if (rss->key_len < MLX5_RSS_HASH_KEY_LEN) 2024 return rte_flow_error_set(error, ENOTSUP, 2025 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2026 &rss->key_len, 2027 "RSS hash key too small"); 2028 if (rss->key_len > MLX5_RSS_HASH_KEY_LEN) 2029 return rte_flow_error_set(error, ENOTSUP, 2030 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2031 &rss->key_len, 2032 "RSS hash key too large"); 2033 if (rss->queue_num > priv->config.ind_table_max_size) 2034 return rte_flow_error_set(error, ENOTSUP, 2035 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2036 &rss->queue_num, 2037 "number of queues too large"); 2038 if (rss->types & MLX5_RSS_HF_MASK) 2039 return rte_flow_error_set(error, ENOTSUP, 2040 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2041 &rss->types, 2042 "some RSS protocols are not" 2043 " supported"); 2044 for (i = 0; i != rss->queue_num; ++i) { 2045 if (!(*priv->rxqs)[rss->queue[i]]) 2046 return rte_flow_error_set 2047 (error, EINVAL, 2048 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2049 &rss->queue[i], 2050 "queue is not configured"); 2051 } 2052 if (flow->queue) 2053 memcpy((*flow->queue), rss->queue, 2054 rss->queue_num * sizeof(uint16_t)); 2055 flow->rss.queue_num = rss->queue_num; 2056 memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN); 2057 flow->rss.types = rss->types; 2058 flow->rss.level = rss->level; 2059 flow->fate |= MLX5_FLOW_FATE_RSS; 2060 return 0; 2061 } 2062 2063 /** 2064 * Convert the @p action into a Verbs specification after ensuring the NIC 2065 * will understand and process it correctly. 2066 * If the necessary size for the conversion is greater than the @p flow_size, 2067 * nothing is written in @p flow, the validation is still performed. 2068 * 2069 * @param[in] action 2070 * Action configuration. 2071 * @param[in, out] flow 2072 * Pointer to flow structure. 2073 * @param[in] flow_size 2074 * Size in bytes of the available space in @p flow, if too small, nothing is 2075 * written. 2076 * @param[out] error 2077 * Pointer to error structure. 2078 * 2079 * @return 2080 * On success the number of bytes consumed/necessary, if the returned value 2081 * is lesser or equal to @p flow_size, the @p action has fully been 2082 * converted, otherwise another call with this returned memory size should 2083 * be done. 2084 * On error, a negative errno value is returned and rte_errno is set. 2085 */ 2086 static int 2087 mlx5_flow_action_flag(const struct rte_flow_action *action, 2088 struct rte_flow *flow, const size_t flow_size, 2089 struct rte_flow_error *error) 2090 { 2091 unsigned int size = sizeof(struct ibv_flow_spec_action_tag); 2092 struct ibv_flow_spec_action_tag tag = { 2093 .type = IBV_FLOW_SPEC_ACTION_TAG, 2094 .size = size, 2095 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT), 2096 }; 2097 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 2098 2099 if (flow->modifier & MLX5_FLOW_MOD_FLAG) 2100 return rte_flow_error_set(error, ENOTSUP, 2101 RTE_FLOW_ERROR_TYPE_ACTION, 2102 action, 2103 "flag action already present"); 2104 if (flow->fate & MLX5_FLOW_FATE_DROP) 2105 return rte_flow_error_set(error, ENOTSUP, 2106 RTE_FLOW_ERROR_TYPE_ACTION, 2107 action, 2108 "flag is not compatible with drop" 2109 " action"); 2110 if (flow->modifier & MLX5_FLOW_MOD_MARK) 2111 size = 0; 2112 else if (size <= flow_size && verbs) 2113 mlx5_flow_spec_verbs_add(flow, &tag, size); 2114 flow->modifier |= MLX5_FLOW_MOD_FLAG; 2115 return size; 2116 } 2117 2118 /** 2119 * Update verbs specification to modify the flag to mark. 2120 * 2121 * @param[in, out] verbs 2122 * Pointer to the mlx5_flow_verbs structure. 2123 * @param[in] mark_id 2124 * Mark identifier to replace the flag. 2125 */ 2126 static void 2127 mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id) 2128 { 2129 struct ibv_spec_header *hdr; 2130 int i; 2131 2132 if (!verbs) 2133 return; 2134 /* Update Verbs specification. */ 2135 hdr = (struct ibv_spec_header *)verbs->specs; 2136 if (!hdr) 2137 return; 2138 for (i = 0; i != verbs->attr->num_of_specs; ++i) { 2139 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) { 2140 struct ibv_flow_spec_action_tag *t = 2141 (struct ibv_flow_spec_action_tag *)hdr; 2142 2143 t->tag_id = mlx5_flow_mark_set(mark_id); 2144 } 2145 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size); 2146 } 2147 } 2148 2149 /** 2150 * Convert the @p action into @p flow (or by updating the already present 2151 * Flag Verbs specification) after ensuring the NIC will understand and 2152 * process it correctly. 2153 * If the necessary size for the conversion is greater than the @p flow_size, 2154 * nothing is written in @p flow, the validation is still performed. 2155 * 2156 * @param[in] action 2157 * Action configuration. 2158 * @param[in, out] flow 2159 * Pointer to flow structure. 2160 * @param[in] flow_size 2161 * Size in bytes of the available space in @p flow, if too small, nothing is 2162 * written. 2163 * @param[out] error 2164 * Pointer to error structure. 2165 * 2166 * @return 2167 * On success the number of bytes consumed/necessary, if the returned value 2168 * is lesser or equal to @p flow_size, the @p action has fully been 2169 * converted, otherwise another call with this returned memory size should 2170 * be done. 2171 * On error, a negative errno value is returned and rte_errno is set. 2172 */ 2173 static int 2174 mlx5_flow_action_mark(const struct rte_flow_action *action, 2175 struct rte_flow *flow, const size_t flow_size, 2176 struct rte_flow_error *error) 2177 { 2178 const struct rte_flow_action_mark *mark = action->conf; 2179 unsigned int size = sizeof(struct ibv_flow_spec_action_tag); 2180 struct ibv_flow_spec_action_tag tag = { 2181 .type = IBV_FLOW_SPEC_ACTION_TAG, 2182 .size = size, 2183 }; 2184 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 2185 2186 if (!mark) 2187 return rte_flow_error_set(error, EINVAL, 2188 RTE_FLOW_ERROR_TYPE_ACTION, 2189 action, 2190 "configuration cannot be null"); 2191 if (mark->id >= MLX5_FLOW_MARK_MAX) 2192 return rte_flow_error_set(error, EINVAL, 2193 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2194 &mark->id, 2195 "mark id must in 0 <= id < " 2196 RTE_STR(MLX5_FLOW_MARK_MAX)); 2197 if (flow->modifier & MLX5_FLOW_MOD_MARK) 2198 return rte_flow_error_set(error, ENOTSUP, 2199 RTE_FLOW_ERROR_TYPE_ACTION, 2200 action, 2201 "mark action already present"); 2202 if (flow->fate & MLX5_FLOW_FATE_DROP) 2203 return rte_flow_error_set(error, ENOTSUP, 2204 RTE_FLOW_ERROR_TYPE_ACTION, 2205 action, 2206 "mark is not compatible with drop" 2207 " action"); 2208 if (flow->modifier & MLX5_FLOW_MOD_FLAG) { 2209 mlx5_flow_verbs_mark_update(verbs, mark->id); 2210 size = 0; 2211 } else if (size <= flow_size) { 2212 tag.tag_id = mlx5_flow_mark_set(mark->id); 2213 mlx5_flow_spec_verbs_add(flow, &tag, size); 2214 } 2215 flow->modifier |= MLX5_FLOW_MOD_MARK; 2216 return size; 2217 } 2218 2219 /** 2220 * Convert the @p action into a Verbs specification after ensuring the NIC 2221 * will understand and process it correctly. 2222 * If the necessary size for the conversion is greater than the @p flow_size, 2223 * nothing is written in @p flow, the validation is still performed. 2224 * 2225 * @param action[in] 2226 * Action configuration. 2227 * @param flow[in, out] 2228 * Pointer to flow structure. 2229 * @param flow_size[in] 2230 * Size in bytes of the available space in @p flow, if too small, nothing is 2231 * written. 2232 * @param error[int, out] 2233 * Pointer to error structure. 2234 * 2235 * @return 2236 * On success the number of bytes consumed/necessary, if the returned value 2237 * is lesser or equal to @p flow_size, the @p action has fully been 2238 * converted, otherwise another call with this returned memory size should 2239 * be done. 2240 * On error, a negative errno value is returned and rte_errno is set. 2241 */ 2242 static int 2243 mlx5_flow_action_count(struct rte_eth_dev *dev, 2244 const struct rte_flow_action *action, 2245 struct rte_flow *flow, 2246 const size_t flow_size __rte_unused, 2247 struct rte_flow_error *error) 2248 { 2249 const struct rte_flow_action_count *count = action->conf; 2250 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 2251 unsigned int size = sizeof(struct ibv_flow_spec_counter_action); 2252 struct ibv_flow_spec_counter_action counter = { 2253 .type = IBV_FLOW_SPEC_ACTION_COUNT, 2254 .size = size, 2255 }; 2256 #endif 2257 2258 if (!flow->counter) { 2259 flow->counter = mlx5_flow_counter_new(dev, count->shared, 2260 count->id); 2261 if (!flow->counter) 2262 return rte_flow_error_set(error, ENOTSUP, 2263 RTE_FLOW_ERROR_TYPE_ACTION, 2264 action, 2265 "cannot get counter" 2266 " context."); 2267 } 2268 if (!((struct priv *)dev->data->dev_private)->config.flow_counter_en) 2269 return rte_flow_error_set(error, ENOTSUP, 2270 RTE_FLOW_ERROR_TYPE_ACTION, 2271 action, 2272 "flow counters are not supported."); 2273 flow->modifier |= MLX5_FLOW_MOD_COUNT; 2274 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 2275 counter.counter_set_handle = flow->counter->cs->handle; 2276 if (size <= flow_size) 2277 mlx5_flow_spec_verbs_add(flow, &counter, size); 2278 return size; 2279 #endif 2280 return 0; 2281 } 2282 2283 /** 2284 * Convert the @p action into @p flow after ensuring the NIC will understand 2285 * and process it correctly. 2286 * The conversion is performed action per action, each of them is written into 2287 * the @p flow if its size is lesser or equal to @p flow_size. 2288 * Validation and memory consumption computation are still performed until the 2289 * end of @p action, unless an error is encountered. 2290 * 2291 * @param[in] dev 2292 * Pointer to Ethernet device structure. 2293 * @param[in] actions 2294 * Pointer to flow actions array. 2295 * @param[in, out] flow 2296 * Pointer to the rte_flow structure. 2297 * @param[in] flow_size 2298 * Size in bytes of the available space in @p flow, if too small some 2299 * garbage may be present. 2300 * @param[out] error 2301 * Pointer to error structure. 2302 * 2303 * @return 2304 * On success the number of bytes consumed/necessary, if the returned value 2305 * is lesser or equal to @p flow_size, the @p actions has fully been 2306 * converted, otherwise another call with this returned memory size should 2307 * be done. 2308 * On error, a negative errno value is returned and rte_errno is set. 2309 */ 2310 static int 2311 mlx5_flow_actions(struct rte_eth_dev *dev, 2312 const struct rte_flow_action actions[], 2313 struct rte_flow *flow, const size_t flow_size, 2314 struct rte_flow_error *error) 2315 { 2316 size_t size = 0; 2317 int remain = flow_size; 2318 int ret = 0; 2319 2320 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2321 switch (actions->type) { 2322 case RTE_FLOW_ACTION_TYPE_VOID: 2323 break; 2324 case RTE_FLOW_ACTION_TYPE_FLAG: 2325 ret = mlx5_flow_action_flag(actions, flow, remain, 2326 error); 2327 break; 2328 case RTE_FLOW_ACTION_TYPE_MARK: 2329 ret = mlx5_flow_action_mark(actions, flow, remain, 2330 error); 2331 break; 2332 case RTE_FLOW_ACTION_TYPE_DROP: 2333 ret = mlx5_flow_action_drop(actions, flow, remain, 2334 error); 2335 break; 2336 case RTE_FLOW_ACTION_TYPE_QUEUE: 2337 ret = mlx5_flow_action_queue(dev, actions, flow, error); 2338 break; 2339 case RTE_FLOW_ACTION_TYPE_RSS: 2340 ret = mlx5_flow_action_rss(dev, actions, flow, error); 2341 break; 2342 case RTE_FLOW_ACTION_TYPE_COUNT: 2343 ret = mlx5_flow_action_count(dev, actions, flow, remain, 2344 error); 2345 break; 2346 default: 2347 return rte_flow_error_set(error, ENOTSUP, 2348 RTE_FLOW_ERROR_TYPE_ACTION, 2349 actions, 2350 "action not supported"); 2351 } 2352 if (ret < 0) 2353 return ret; 2354 if (remain > ret) 2355 remain -= ret; 2356 else 2357 remain = 0; 2358 size += ret; 2359 } 2360 if (!flow->fate) 2361 return rte_flow_error_set(error, ENOTSUP, 2362 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2363 NULL, 2364 "no fate action found"); 2365 return size; 2366 } 2367 2368 /** 2369 * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC 2370 * after ensuring the NIC will understand and process it correctly. 2371 * The conversion is only performed item/action per item/action, each of 2372 * them is written into the @p flow if its size is lesser or equal to @p 2373 * flow_size. 2374 * Validation and memory consumption computation are still performed until the 2375 * end, unless an error is encountered. 2376 * 2377 * @param[in] dev 2378 * Pointer to Ethernet device. 2379 * @param[in, out] flow 2380 * Pointer to flow structure. 2381 * @param[in] flow_size 2382 * Size in bytes of the available space in @p flow, if too small some 2383 * garbage may be present. 2384 * @param[in] attributes 2385 * Flow rule attributes. 2386 * @param[in] pattern 2387 * Pattern specification (list terminated by the END pattern item). 2388 * @param[in] actions 2389 * Associated actions (list terminated by the END action). 2390 * @param[out] error 2391 * Perform verbose error reporting if not NULL. 2392 * 2393 * @return 2394 * On success the number of bytes consumed/necessary, if the returned value 2395 * is lesser or equal to @p flow_size, the flow has fully been converted and 2396 * can be applied, otherwise another call with this returned memory size 2397 * should be done. 2398 * On error, a negative errno value is returned and rte_errno is set. 2399 */ 2400 static int 2401 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow, 2402 const size_t flow_size, 2403 const struct rte_flow_attr *attributes, 2404 const struct rte_flow_item pattern[], 2405 const struct rte_flow_action actions[], 2406 struct rte_flow_error *error) 2407 { 2408 struct rte_flow local_flow = { .layers = 0, }; 2409 size_t size = sizeof(*flow); 2410 union { 2411 struct rte_flow_expand_rss buf; 2412 uint8_t buffer[2048]; 2413 } expand_buffer; 2414 struct rte_flow_expand_rss *buf = &expand_buffer.buf; 2415 struct mlx5_flow_verbs *original_verbs = NULL; 2416 size_t original_verbs_size = 0; 2417 uint32_t original_layers = 0; 2418 int expanded_pattern_idx = 0; 2419 int ret; 2420 uint32_t i; 2421 2422 if (size > flow_size) 2423 flow = &local_flow; 2424 ret = mlx5_flow_attributes(dev, attributes, flow, error); 2425 if (ret < 0) 2426 return ret; 2427 ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error); 2428 if (ret < 0) 2429 return ret; 2430 if (local_flow.rss.types) { 2431 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer), 2432 pattern, local_flow.rss.types, 2433 mlx5_support_expansion, 2434 local_flow.rss.level < 2 ? 2435 MLX5_EXPANSION_ROOT : 2436 MLX5_EXPANSION_ROOT_OUTER); 2437 assert(ret > 0 && 2438 (unsigned int)ret < sizeof(expand_buffer.buffer)); 2439 } else { 2440 buf->entries = 1; 2441 buf->entry[0].pattern = (void *)(uintptr_t)pattern; 2442 } 2443 size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t), 2444 sizeof(void *)); 2445 if (size <= flow_size) 2446 flow->queue = (void *)(flow + 1); 2447 LIST_INIT(&flow->verbs); 2448 flow->layers = 0; 2449 flow->modifier = 0; 2450 flow->fate = 0; 2451 for (i = 0; i != buf->entries; ++i) { 2452 size_t off = size; 2453 size_t off2; 2454 2455 flow->layers = original_layers; 2456 size += sizeof(struct ibv_flow_attr) + 2457 sizeof(struct mlx5_flow_verbs); 2458 off2 = size; 2459 if (size < flow_size) { 2460 flow->cur_verbs = (void *)((uintptr_t)flow + off); 2461 flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1); 2462 flow->cur_verbs->specs = 2463 (void *)(flow->cur_verbs->attr + 1); 2464 } 2465 /* First iteration convert the pattern into Verbs. */ 2466 if (i == 0) { 2467 /* Actions don't need to be converted several time. */ 2468 ret = mlx5_flow_actions(dev, actions, flow, 2469 (size < flow_size) ? 2470 flow_size - size : 0, 2471 error); 2472 if (ret < 0) 2473 return ret; 2474 size += ret; 2475 } else { 2476 /* 2477 * Next iteration means the pattern has already been 2478 * converted and an expansion is necessary to match 2479 * the user RSS request. For that only the expanded 2480 * items will be converted, the common part with the 2481 * user pattern are just copied into the next buffer 2482 * zone. 2483 */ 2484 size += original_verbs_size; 2485 if (size < flow_size) { 2486 rte_memcpy(flow->cur_verbs->attr, 2487 original_verbs->attr, 2488 original_verbs_size + 2489 sizeof(struct ibv_flow_attr)); 2490 flow->cur_verbs->size = original_verbs_size; 2491 } 2492 } 2493 ret = mlx5_flow_items 2494 (dev, 2495 (const struct rte_flow_item *) 2496 &buf->entry[i].pattern[expanded_pattern_idx], 2497 flow, 2498 (size < flow_size) ? flow_size - size : 0, error); 2499 if (ret < 0) 2500 return ret; 2501 size += ret; 2502 if (size <= flow_size) { 2503 mlx5_flow_adjust_priority(dev, flow); 2504 LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next); 2505 } 2506 /* 2507 * Keep a pointer of the first verbs conversion and the layers 2508 * it has encountered. 2509 */ 2510 if (i == 0) { 2511 original_verbs = flow->cur_verbs; 2512 original_verbs_size = size - off2; 2513 original_layers = flow->layers; 2514 /* 2515 * move the index of the expanded pattern to the 2516 * first item not addressed yet. 2517 */ 2518 if (pattern->type == RTE_FLOW_ITEM_TYPE_END) { 2519 expanded_pattern_idx++; 2520 } else { 2521 const struct rte_flow_item *item = pattern; 2522 2523 for (item = pattern; 2524 item->type != RTE_FLOW_ITEM_TYPE_END; 2525 ++item) 2526 expanded_pattern_idx++; 2527 } 2528 } 2529 } 2530 /* Restore the origin layers in the flow. */ 2531 flow->layers = original_layers; 2532 return size; 2533 } 2534 2535 /** 2536 * Lookup and set the ptype in the data Rx part. A single Ptype can be used, 2537 * if several tunnel rules are used on this queue, the tunnel ptype will be 2538 * cleared. 2539 * 2540 * @param rxq_ctrl 2541 * Rx queue to update. 2542 */ 2543 static void 2544 mlx5_flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl) 2545 { 2546 unsigned int i; 2547 uint32_t tunnel_ptype = 0; 2548 2549 /* Look up for the ptype to use. */ 2550 for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) { 2551 if (!rxq_ctrl->flow_tunnels_n[i]) 2552 continue; 2553 if (!tunnel_ptype) { 2554 tunnel_ptype = tunnels_info[i].ptype; 2555 } else { 2556 tunnel_ptype = 0; 2557 break; 2558 } 2559 } 2560 rxq_ctrl->rxq.tunnel = tunnel_ptype; 2561 } 2562 2563 /** 2564 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the flow. 2565 * 2566 * @param[in] dev 2567 * Pointer to Ethernet device. 2568 * @param[in] flow 2569 * Pointer to flow structure. 2570 */ 2571 static void 2572 mlx5_flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow) 2573 { 2574 struct priv *priv = dev->data->dev_private; 2575 const int mark = !!(flow->modifier & 2576 (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)); 2577 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 2578 unsigned int i; 2579 2580 for (i = 0; i != flow->rss.queue_num; ++i) { 2581 int idx = (*flow->queue)[i]; 2582 struct mlx5_rxq_ctrl *rxq_ctrl = 2583 container_of((*priv->rxqs)[idx], 2584 struct mlx5_rxq_ctrl, rxq); 2585 2586 if (mark) { 2587 rxq_ctrl->rxq.mark = 1; 2588 rxq_ctrl->flow_mark_n++; 2589 } 2590 if (tunnel) { 2591 unsigned int j; 2592 2593 /* Increase the counter matching the flow. */ 2594 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 2595 if ((tunnels_info[j].tunnel & flow->layers) == 2596 tunnels_info[j].tunnel) { 2597 rxq_ctrl->flow_tunnels_n[j]++; 2598 break; 2599 } 2600 } 2601 mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl); 2602 } 2603 } 2604 } 2605 2606 /** 2607 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 2608 * @p flow if no other flow uses it with the same kind of request. 2609 * 2610 * @param dev 2611 * Pointer to Ethernet device. 2612 * @param[in] flow 2613 * Pointer to the flow. 2614 */ 2615 static void 2616 mlx5_flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow) 2617 { 2618 struct priv *priv = dev->data->dev_private; 2619 const int mark = !!(flow->modifier & 2620 (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)); 2621 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 2622 unsigned int i; 2623 2624 assert(dev->data->dev_started); 2625 for (i = 0; i != flow->rss.queue_num; ++i) { 2626 int idx = (*flow->queue)[i]; 2627 struct mlx5_rxq_ctrl *rxq_ctrl = 2628 container_of((*priv->rxqs)[idx], 2629 struct mlx5_rxq_ctrl, rxq); 2630 2631 if (mark) { 2632 rxq_ctrl->flow_mark_n--; 2633 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n; 2634 } 2635 if (tunnel) { 2636 unsigned int j; 2637 2638 /* Decrease the counter matching the flow. */ 2639 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 2640 if ((tunnels_info[j].tunnel & flow->layers) == 2641 tunnels_info[j].tunnel) { 2642 rxq_ctrl->flow_tunnels_n[j]--; 2643 break; 2644 } 2645 } 2646 mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl); 2647 } 2648 } 2649 } 2650 2651 /** 2652 * Clear the Mark/Flag and Tunnel ptype information in all Rx queues. 2653 * 2654 * @param dev 2655 * Pointer to Ethernet device. 2656 */ 2657 static void 2658 mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev) 2659 { 2660 struct priv *priv = dev->data->dev_private; 2661 unsigned int i; 2662 unsigned int idx; 2663 2664 for (idx = 0, i = 0; idx != priv->rxqs_n; ++i) { 2665 struct mlx5_rxq_ctrl *rxq_ctrl; 2666 unsigned int j; 2667 2668 if (!(*priv->rxqs)[idx]) 2669 continue; 2670 rxq_ctrl = container_of((*priv->rxqs)[idx], 2671 struct mlx5_rxq_ctrl, rxq); 2672 rxq_ctrl->flow_mark_n = 0; 2673 rxq_ctrl->rxq.mark = 0; 2674 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) 2675 rxq_ctrl->flow_tunnels_n[j] = 0; 2676 rxq_ctrl->rxq.tunnel = 0; 2677 ++idx; 2678 } 2679 } 2680 2681 /** 2682 * Validate a flow supported by the NIC. 2683 * 2684 * @see rte_flow_validate() 2685 * @see rte_flow_ops 2686 */ 2687 int 2688 mlx5_flow_validate(struct rte_eth_dev *dev, 2689 const struct rte_flow_attr *attr, 2690 const struct rte_flow_item items[], 2691 const struct rte_flow_action actions[], 2692 struct rte_flow_error *error) 2693 { 2694 int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error); 2695 2696 if (ret < 0) 2697 return ret; 2698 return 0; 2699 } 2700 2701 /** 2702 * Remove the flow. 2703 * 2704 * @param[in] dev 2705 * Pointer to Ethernet device. 2706 * @param[in, out] flow 2707 * Pointer to flow structure. 2708 */ 2709 static void 2710 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow) 2711 { 2712 struct mlx5_flow_verbs *verbs; 2713 2714 LIST_FOREACH(verbs, &flow->verbs, next) { 2715 if (verbs->flow) { 2716 claim_zero(mlx5_glue->destroy_flow(verbs->flow)); 2717 verbs->flow = NULL; 2718 } 2719 if (verbs->hrxq) { 2720 if (flow->fate & MLX5_FLOW_FATE_DROP) 2721 mlx5_hrxq_drop_release(dev); 2722 else 2723 mlx5_hrxq_release(dev, verbs->hrxq); 2724 verbs->hrxq = NULL; 2725 } 2726 } 2727 if (flow->counter) { 2728 mlx5_flow_counter_release(flow->counter); 2729 flow->counter = NULL; 2730 } 2731 } 2732 2733 /** 2734 * Apply the flow. 2735 * 2736 * @param[in] dev 2737 * Pointer to Ethernet device structure. 2738 * @param[in, out] flow 2739 * Pointer to flow structure. 2740 * @param[out] error 2741 * Pointer to error structure. 2742 * 2743 * @return 2744 * 0 on success, a negative errno value otherwise and rte_errno is set. 2745 */ 2746 static int 2747 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow, 2748 struct rte_flow_error *error) 2749 { 2750 struct mlx5_flow_verbs *verbs; 2751 int err; 2752 2753 LIST_FOREACH(verbs, &flow->verbs, next) { 2754 if (flow->fate & MLX5_FLOW_FATE_DROP) { 2755 verbs->hrxq = mlx5_hrxq_drop_new(dev); 2756 if (!verbs->hrxq) { 2757 rte_flow_error_set 2758 (error, errno, 2759 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2760 NULL, 2761 "cannot get drop hash queue"); 2762 goto error; 2763 } 2764 } else { 2765 struct mlx5_hrxq *hrxq; 2766 2767 hrxq = mlx5_hrxq_get(dev, flow->key, 2768 MLX5_RSS_HASH_KEY_LEN, 2769 verbs->hash_fields, 2770 (*flow->queue), 2771 flow->rss.queue_num); 2772 if (!hrxq) 2773 hrxq = mlx5_hrxq_new(dev, flow->key, 2774 MLX5_RSS_HASH_KEY_LEN, 2775 verbs->hash_fields, 2776 (*flow->queue), 2777 flow->rss.queue_num); 2778 if (!hrxq) { 2779 rte_flow_error_set 2780 (error, rte_errno, 2781 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2782 NULL, 2783 "cannot get hash queue"); 2784 goto error; 2785 } 2786 verbs->hrxq = hrxq; 2787 } 2788 verbs->flow = 2789 mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr); 2790 if (!verbs->flow) { 2791 rte_flow_error_set(error, errno, 2792 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2793 NULL, 2794 "hardware refuses to create flow"); 2795 goto error; 2796 } 2797 } 2798 return 0; 2799 error: 2800 err = rte_errno; /* Save rte_errno before cleanup. */ 2801 LIST_FOREACH(verbs, &flow->verbs, next) { 2802 if (verbs->hrxq) { 2803 if (flow->fate & MLX5_FLOW_FATE_DROP) 2804 mlx5_hrxq_drop_release(dev); 2805 else 2806 mlx5_hrxq_release(dev, verbs->hrxq); 2807 verbs->hrxq = NULL; 2808 } 2809 } 2810 rte_errno = err; /* Restore rte_errno. */ 2811 return -rte_errno; 2812 } 2813 2814 /** 2815 * Create a flow and add it to @p list. 2816 * 2817 * @param dev 2818 * Pointer to Ethernet device. 2819 * @param list 2820 * Pointer to a TAILQ flow list. 2821 * @param[in] attr 2822 * Flow rule attributes. 2823 * @param[in] items 2824 * Pattern specification (list terminated by the END pattern item). 2825 * @param[in] actions 2826 * Associated actions (list terminated by the END action). 2827 * @param[out] error 2828 * Perform verbose error reporting if not NULL. 2829 * 2830 * @return 2831 * A flow on success, NULL otherwise and rte_errno is set. 2832 */ 2833 static struct rte_flow * 2834 mlx5_flow_list_create(struct rte_eth_dev *dev, 2835 struct mlx5_flows *list, 2836 const struct rte_flow_attr *attr, 2837 const struct rte_flow_item items[], 2838 const struct rte_flow_action actions[], 2839 struct rte_flow_error *error) 2840 { 2841 struct rte_flow *flow = NULL; 2842 size_t size = 0; 2843 int ret; 2844 2845 ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error); 2846 if (ret < 0) 2847 return NULL; 2848 size = ret; 2849 flow = rte_calloc(__func__, 1, size, 0); 2850 if (!flow) { 2851 rte_flow_error_set(error, ENOMEM, 2852 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2853 NULL, 2854 "not enough memory to create flow"); 2855 return NULL; 2856 } 2857 ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error); 2858 if (ret < 0) { 2859 rte_free(flow); 2860 return NULL; 2861 } 2862 assert((size_t)ret == size); 2863 if (dev->data->dev_started) { 2864 ret = mlx5_flow_apply(dev, flow, error); 2865 if (ret < 0) { 2866 ret = rte_errno; /* Save rte_errno before cleanup. */ 2867 if (flow) { 2868 mlx5_flow_remove(dev, flow); 2869 rte_free(flow); 2870 } 2871 rte_errno = ret; /* Restore rte_errno. */ 2872 return NULL; 2873 } 2874 } 2875 TAILQ_INSERT_TAIL(list, flow, next); 2876 mlx5_flow_rxq_flags_set(dev, flow); 2877 return flow; 2878 } 2879 2880 /** 2881 * Create a flow. 2882 * 2883 * @see rte_flow_create() 2884 * @see rte_flow_ops 2885 */ 2886 struct rte_flow * 2887 mlx5_flow_create(struct rte_eth_dev *dev, 2888 const struct rte_flow_attr *attr, 2889 const struct rte_flow_item items[], 2890 const struct rte_flow_action actions[], 2891 struct rte_flow_error *error) 2892 { 2893 return mlx5_flow_list_create 2894 (dev, &((struct priv *)dev->data->dev_private)->flows, 2895 attr, items, actions, error); 2896 } 2897 2898 /** 2899 * Destroy a flow in a list. 2900 * 2901 * @param dev 2902 * Pointer to Ethernet device. 2903 * @param list 2904 * Pointer to a TAILQ flow list. 2905 * @param[in] flow 2906 * Flow to destroy. 2907 */ 2908 static void 2909 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list, 2910 struct rte_flow *flow) 2911 { 2912 mlx5_flow_remove(dev, flow); 2913 TAILQ_REMOVE(list, flow, next); 2914 /* 2915 * Update RX queue flags only if port is started, otherwise it is 2916 * already clean. 2917 */ 2918 if (dev->data->dev_started) 2919 mlx5_flow_rxq_flags_trim(dev, flow); 2920 rte_free(flow); 2921 } 2922 2923 /** 2924 * Destroy all flows. 2925 * 2926 * @param dev 2927 * Pointer to Ethernet device. 2928 * @param list 2929 * Pointer to a TAILQ flow list. 2930 */ 2931 void 2932 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list) 2933 { 2934 while (!TAILQ_EMPTY(list)) { 2935 struct rte_flow *flow; 2936 2937 flow = TAILQ_FIRST(list); 2938 mlx5_flow_list_destroy(dev, list, flow); 2939 } 2940 } 2941 2942 /** 2943 * Remove all flows. 2944 * 2945 * @param dev 2946 * Pointer to Ethernet device. 2947 * @param list 2948 * Pointer to a TAILQ flow list. 2949 */ 2950 void 2951 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list) 2952 { 2953 struct rte_flow *flow; 2954 2955 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) 2956 mlx5_flow_remove(dev, flow); 2957 mlx5_flow_rxq_flags_clear(dev); 2958 } 2959 2960 /** 2961 * Add all flows. 2962 * 2963 * @param dev 2964 * Pointer to Ethernet device. 2965 * @param list 2966 * Pointer to a TAILQ flow list. 2967 * 2968 * @return 2969 * 0 on success, a negative errno value otherwise and rte_errno is set. 2970 */ 2971 int 2972 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list) 2973 { 2974 struct rte_flow *flow; 2975 struct rte_flow_error error; 2976 int ret = 0; 2977 2978 TAILQ_FOREACH(flow, list, next) { 2979 ret = mlx5_flow_apply(dev, flow, &error); 2980 if (ret < 0) 2981 goto error; 2982 mlx5_flow_rxq_flags_set(dev, flow); 2983 } 2984 return 0; 2985 error: 2986 ret = rte_errno; /* Save rte_errno before cleanup. */ 2987 mlx5_flow_stop(dev, list); 2988 rte_errno = ret; /* Restore rte_errno. */ 2989 return -rte_errno; 2990 } 2991 2992 /** 2993 * Verify the flow list is empty 2994 * 2995 * @param dev 2996 * Pointer to Ethernet device. 2997 * 2998 * @return the number of flows not released. 2999 */ 3000 int 3001 mlx5_flow_verify(struct rte_eth_dev *dev) 3002 { 3003 struct priv *priv = dev->data->dev_private; 3004 struct rte_flow *flow; 3005 int ret = 0; 3006 3007 TAILQ_FOREACH(flow, &priv->flows, next) { 3008 DRV_LOG(DEBUG, "port %u flow %p still referenced", 3009 dev->data->port_id, (void *)flow); 3010 ++ret; 3011 } 3012 return ret; 3013 } 3014 3015 /** 3016 * Enable a control flow configured from the control plane. 3017 * 3018 * @param dev 3019 * Pointer to Ethernet device. 3020 * @param eth_spec 3021 * An Ethernet flow spec to apply. 3022 * @param eth_mask 3023 * An Ethernet flow mask to apply. 3024 * @param vlan_spec 3025 * A VLAN flow spec to apply. 3026 * @param vlan_mask 3027 * A VLAN flow mask to apply. 3028 * 3029 * @return 3030 * 0 on success, a negative errno value otherwise and rte_errno is set. 3031 */ 3032 int 3033 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, 3034 struct rte_flow_item_eth *eth_spec, 3035 struct rte_flow_item_eth *eth_mask, 3036 struct rte_flow_item_vlan *vlan_spec, 3037 struct rte_flow_item_vlan *vlan_mask) 3038 { 3039 struct priv *priv = dev->data->dev_private; 3040 const struct rte_flow_attr attr = { 3041 .ingress = 1, 3042 .priority = MLX5_FLOW_PRIO_RSVD, 3043 }; 3044 struct rte_flow_item items[] = { 3045 { 3046 .type = RTE_FLOW_ITEM_TYPE_ETH, 3047 .spec = eth_spec, 3048 .last = NULL, 3049 .mask = eth_mask, 3050 }, 3051 { 3052 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN : 3053 RTE_FLOW_ITEM_TYPE_END, 3054 .spec = vlan_spec, 3055 .last = NULL, 3056 .mask = vlan_mask, 3057 }, 3058 { 3059 .type = RTE_FLOW_ITEM_TYPE_END, 3060 }, 3061 }; 3062 uint16_t queue[priv->reta_idx_n]; 3063 struct rte_flow_action_rss action_rss = { 3064 .func = RTE_ETH_HASH_FUNCTION_DEFAULT, 3065 .level = 0, 3066 .types = priv->rss_conf.rss_hf, 3067 .key_len = priv->rss_conf.rss_key_len, 3068 .queue_num = priv->reta_idx_n, 3069 .key = priv->rss_conf.rss_key, 3070 .queue = queue, 3071 }; 3072 struct rte_flow_action actions[] = { 3073 { 3074 .type = RTE_FLOW_ACTION_TYPE_RSS, 3075 .conf = &action_rss, 3076 }, 3077 { 3078 .type = RTE_FLOW_ACTION_TYPE_END, 3079 }, 3080 }; 3081 struct rte_flow *flow; 3082 struct rte_flow_error error; 3083 unsigned int i; 3084 3085 if (!priv->reta_idx_n) { 3086 rte_errno = EINVAL; 3087 return -rte_errno; 3088 } 3089 for (i = 0; i != priv->reta_idx_n; ++i) 3090 queue[i] = (*priv->reta_idx)[i]; 3091 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items, 3092 actions, &error); 3093 if (!flow) 3094 return -rte_errno; 3095 return 0; 3096 } 3097 3098 /** 3099 * Enable a flow control configured from the control plane. 3100 * 3101 * @param dev 3102 * Pointer to Ethernet device. 3103 * @param eth_spec 3104 * An Ethernet flow spec to apply. 3105 * @param eth_mask 3106 * An Ethernet flow mask to apply. 3107 * 3108 * @return 3109 * 0 on success, a negative errno value otherwise and rte_errno is set. 3110 */ 3111 int 3112 mlx5_ctrl_flow(struct rte_eth_dev *dev, 3113 struct rte_flow_item_eth *eth_spec, 3114 struct rte_flow_item_eth *eth_mask) 3115 { 3116 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL); 3117 } 3118 3119 /** 3120 * Destroy a flow. 3121 * 3122 * @see rte_flow_destroy() 3123 * @see rte_flow_ops 3124 */ 3125 int 3126 mlx5_flow_destroy(struct rte_eth_dev *dev, 3127 struct rte_flow *flow, 3128 struct rte_flow_error *error __rte_unused) 3129 { 3130 struct priv *priv = dev->data->dev_private; 3131 3132 mlx5_flow_list_destroy(dev, &priv->flows, flow); 3133 return 0; 3134 } 3135 3136 /** 3137 * Destroy all flows. 3138 * 3139 * @see rte_flow_flush() 3140 * @see rte_flow_ops 3141 */ 3142 int 3143 mlx5_flow_flush(struct rte_eth_dev *dev, 3144 struct rte_flow_error *error __rte_unused) 3145 { 3146 struct priv *priv = dev->data->dev_private; 3147 3148 mlx5_flow_list_flush(dev, &priv->flows); 3149 return 0; 3150 } 3151 3152 /** 3153 * Isolated mode. 3154 * 3155 * @see rte_flow_isolate() 3156 * @see rte_flow_ops 3157 */ 3158 int 3159 mlx5_flow_isolate(struct rte_eth_dev *dev, 3160 int enable, 3161 struct rte_flow_error *error) 3162 { 3163 struct priv *priv = dev->data->dev_private; 3164 3165 if (dev->data->dev_started) { 3166 rte_flow_error_set(error, EBUSY, 3167 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3168 NULL, 3169 "port must be stopped first"); 3170 return -rte_errno; 3171 } 3172 priv->isolated = !!enable; 3173 if (enable) 3174 dev->dev_ops = &mlx5_dev_ops_isolate; 3175 else 3176 dev->dev_ops = &mlx5_dev_ops; 3177 return 0; 3178 } 3179 3180 /** 3181 * Query flow counter. 3182 * 3183 * @param flow 3184 * Pointer to the flow. 3185 * 3186 * @return 3187 * 0 on success, a negative errno value otherwise and rte_errno is set. 3188 */ 3189 static int 3190 mlx5_flow_query_count(struct rte_flow *flow __rte_unused, 3191 void *data __rte_unused, 3192 struct rte_flow_error *error) 3193 { 3194 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 3195 struct rte_flow_query_count *qc = data; 3196 uint64_t counters[2] = {0, 0}; 3197 struct ibv_query_counter_set_attr query_cs_attr = { 3198 .cs = flow->counter->cs, 3199 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE, 3200 }; 3201 struct ibv_counter_set_data query_out = { 3202 .out = counters, 3203 .outlen = 2 * sizeof(uint64_t), 3204 }; 3205 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out); 3206 3207 if (err) 3208 return rte_flow_error_set(error, err, 3209 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3210 NULL, 3211 "cannot read counter"); 3212 qc->hits_set = 1; 3213 qc->bytes_set = 1; 3214 qc->hits = counters[0] - flow->counter->hits; 3215 qc->bytes = counters[1] - flow->counter->bytes; 3216 if (qc->reset) { 3217 flow->counter->hits = counters[0]; 3218 flow->counter->bytes = counters[1]; 3219 } 3220 return 0; 3221 #endif 3222 return rte_flow_error_set(error, ENOTSUP, 3223 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3224 NULL, 3225 "counters are not available"); 3226 } 3227 3228 /** 3229 * Query a flows. 3230 * 3231 * @see rte_flow_query() 3232 * @see rte_flow_ops 3233 */ 3234 int 3235 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused, 3236 struct rte_flow *flow, 3237 const struct rte_flow_action *actions, 3238 void *data, 3239 struct rte_flow_error *error) 3240 { 3241 int ret = 0; 3242 3243 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3244 switch (actions->type) { 3245 case RTE_FLOW_ACTION_TYPE_VOID: 3246 break; 3247 case RTE_FLOW_ACTION_TYPE_COUNT: 3248 ret = mlx5_flow_query_count(flow, data, error); 3249 break; 3250 default: 3251 return rte_flow_error_set(error, ENOTSUP, 3252 RTE_FLOW_ERROR_TYPE_ACTION, 3253 actions, 3254 "action not supported"); 3255 } 3256 if (ret < 0) 3257 return ret; 3258 } 3259 return 0; 3260 } 3261 3262 /** 3263 * Convert a flow director filter to a generic flow. 3264 * 3265 * @param dev 3266 * Pointer to Ethernet device. 3267 * @param fdir_filter 3268 * Flow director filter to add. 3269 * @param attributes 3270 * Generic flow parameters structure. 3271 * 3272 * @return 3273 * 0 on success, a negative errno value otherwise and rte_errno is set. 3274 */ 3275 static int 3276 mlx5_fdir_filter_convert(struct rte_eth_dev *dev, 3277 const struct rte_eth_fdir_filter *fdir_filter, 3278 struct mlx5_fdir *attributes) 3279 { 3280 struct priv *priv = dev->data->dev_private; 3281 const struct rte_eth_fdir_input *input = &fdir_filter->input; 3282 const struct rte_eth_fdir_masks *mask = 3283 &dev->data->dev_conf.fdir_conf.mask; 3284 3285 /* Validate queue number. */ 3286 if (fdir_filter->action.rx_queue >= priv->rxqs_n) { 3287 DRV_LOG(ERR, "port %u invalid queue number %d", 3288 dev->data->port_id, fdir_filter->action.rx_queue); 3289 rte_errno = EINVAL; 3290 return -rte_errno; 3291 } 3292 attributes->attr.ingress = 1; 3293 attributes->items[0] = (struct rte_flow_item) { 3294 .type = RTE_FLOW_ITEM_TYPE_ETH, 3295 .spec = &attributes->l2, 3296 .mask = &attributes->l2_mask, 3297 }; 3298 switch (fdir_filter->action.behavior) { 3299 case RTE_ETH_FDIR_ACCEPT: 3300 attributes->actions[0] = (struct rte_flow_action){ 3301 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 3302 .conf = &attributes->queue, 3303 }; 3304 break; 3305 case RTE_ETH_FDIR_REJECT: 3306 attributes->actions[0] = (struct rte_flow_action){ 3307 .type = RTE_FLOW_ACTION_TYPE_DROP, 3308 }; 3309 break; 3310 default: 3311 DRV_LOG(ERR, "port %u invalid behavior %d", 3312 dev->data->port_id, 3313 fdir_filter->action.behavior); 3314 rte_errno = ENOTSUP; 3315 return -rte_errno; 3316 } 3317 attributes->queue.index = fdir_filter->action.rx_queue; 3318 /* Handle L3. */ 3319 switch (fdir_filter->input.flow_type) { 3320 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 3321 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 3322 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 3323 attributes->l3.ipv4.hdr = (struct ipv4_hdr){ 3324 .src_addr = input->flow.ip4_flow.src_ip, 3325 .dst_addr = input->flow.ip4_flow.dst_ip, 3326 .time_to_live = input->flow.ip4_flow.ttl, 3327 .type_of_service = input->flow.ip4_flow.tos, 3328 .next_proto_id = input->flow.ip4_flow.proto, 3329 }; 3330 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){ 3331 .src_addr = mask->ipv4_mask.src_ip, 3332 .dst_addr = mask->ipv4_mask.dst_ip, 3333 .time_to_live = mask->ipv4_mask.ttl, 3334 .type_of_service = mask->ipv4_mask.tos, 3335 .next_proto_id = mask->ipv4_mask.proto, 3336 }; 3337 attributes->items[1] = (struct rte_flow_item){ 3338 .type = RTE_FLOW_ITEM_TYPE_IPV4, 3339 .spec = &attributes->l3, 3340 .mask = &attributes->l3_mask, 3341 }; 3342 break; 3343 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 3344 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 3345 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 3346 attributes->l3.ipv6.hdr = (struct ipv6_hdr){ 3347 .hop_limits = input->flow.ipv6_flow.hop_limits, 3348 .proto = input->flow.ipv6_flow.proto, 3349 }; 3350 3351 memcpy(attributes->l3.ipv6.hdr.src_addr, 3352 input->flow.ipv6_flow.src_ip, 3353 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 3354 memcpy(attributes->l3.ipv6.hdr.dst_addr, 3355 input->flow.ipv6_flow.dst_ip, 3356 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 3357 memcpy(attributes->l3_mask.ipv6.hdr.src_addr, 3358 mask->ipv6_mask.src_ip, 3359 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 3360 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr, 3361 mask->ipv6_mask.dst_ip, 3362 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 3363 attributes->items[1] = (struct rte_flow_item){ 3364 .type = RTE_FLOW_ITEM_TYPE_IPV6, 3365 .spec = &attributes->l3, 3366 .mask = &attributes->l3_mask, 3367 }; 3368 break; 3369 default: 3370 DRV_LOG(ERR, "port %u invalid flow type%d", 3371 dev->data->port_id, fdir_filter->input.flow_type); 3372 rte_errno = ENOTSUP; 3373 return -rte_errno; 3374 } 3375 /* Handle L4. */ 3376 switch (fdir_filter->input.flow_type) { 3377 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 3378 attributes->l4.udp.hdr = (struct udp_hdr){ 3379 .src_port = input->flow.udp4_flow.src_port, 3380 .dst_port = input->flow.udp4_flow.dst_port, 3381 }; 3382 attributes->l4_mask.udp.hdr = (struct udp_hdr){ 3383 .src_port = mask->src_port_mask, 3384 .dst_port = mask->dst_port_mask, 3385 }; 3386 attributes->items[2] = (struct rte_flow_item){ 3387 .type = RTE_FLOW_ITEM_TYPE_UDP, 3388 .spec = &attributes->l4, 3389 .mask = &attributes->l4_mask, 3390 }; 3391 break; 3392 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 3393 attributes->l4.tcp.hdr = (struct tcp_hdr){ 3394 .src_port = input->flow.tcp4_flow.src_port, 3395 .dst_port = input->flow.tcp4_flow.dst_port, 3396 }; 3397 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){ 3398 .src_port = mask->src_port_mask, 3399 .dst_port = mask->dst_port_mask, 3400 }; 3401 attributes->items[2] = (struct rte_flow_item){ 3402 .type = RTE_FLOW_ITEM_TYPE_TCP, 3403 .spec = &attributes->l4, 3404 .mask = &attributes->l4_mask, 3405 }; 3406 break; 3407 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 3408 attributes->l4.udp.hdr = (struct udp_hdr){ 3409 .src_port = input->flow.udp6_flow.src_port, 3410 .dst_port = input->flow.udp6_flow.dst_port, 3411 }; 3412 attributes->l4_mask.udp.hdr = (struct udp_hdr){ 3413 .src_port = mask->src_port_mask, 3414 .dst_port = mask->dst_port_mask, 3415 }; 3416 attributes->items[2] = (struct rte_flow_item){ 3417 .type = RTE_FLOW_ITEM_TYPE_UDP, 3418 .spec = &attributes->l4, 3419 .mask = &attributes->l4_mask, 3420 }; 3421 break; 3422 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 3423 attributes->l4.tcp.hdr = (struct tcp_hdr){ 3424 .src_port = input->flow.tcp6_flow.src_port, 3425 .dst_port = input->flow.tcp6_flow.dst_port, 3426 }; 3427 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){ 3428 .src_port = mask->src_port_mask, 3429 .dst_port = mask->dst_port_mask, 3430 }; 3431 attributes->items[2] = (struct rte_flow_item){ 3432 .type = RTE_FLOW_ITEM_TYPE_TCP, 3433 .spec = &attributes->l4, 3434 .mask = &attributes->l4_mask, 3435 }; 3436 break; 3437 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 3438 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 3439 break; 3440 default: 3441 DRV_LOG(ERR, "port %u invalid flow type%d", 3442 dev->data->port_id, fdir_filter->input.flow_type); 3443 rte_errno = ENOTSUP; 3444 return -rte_errno; 3445 } 3446 return 0; 3447 } 3448 3449 /** 3450 * Add new flow director filter and store it in list. 3451 * 3452 * @param dev 3453 * Pointer to Ethernet device. 3454 * @param fdir_filter 3455 * Flow director filter to add. 3456 * 3457 * @return 3458 * 0 on success, a negative errno value otherwise and rte_errno is set. 3459 */ 3460 static int 3461 mlx5_fdir_filter_add(struct rte_eth_dev *dev, 3462 const struct rte_eth_fdir_filter *fdir_filter) 3463 { 3464 struct priv *priv = dev->data->dev_private; 3465 struct mlx5_fdir attributes = { 3466 .attr.group = 0, 3467 .l2_mask = { 3468 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00", 3469 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 3470 .type = 0, 3471 }, 3472 }; 3473 struct rte_flow_error error; 3474 struct rte_flow *flow; 3475 int ret; 3476 3477 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes); 3478 if (ret) 3479 return ret; 3480 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr, 3481 attributes.items, attributes.actions, 3482 &error); 3483 if (flow) { 3484 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id, 3485 (void *)flow); 3486 return 0; 3487 } 3488 return -rte_errno; 3489 } 3490 3491 /** 3492 * Delete specific filter. 3493 * 3494 * @param dev 3495 * Pointer to Ethernet device. 3496 * @param fdir_filter 3497 * Filter to be deleted. 3498 * 3499 * @return 3500 * 0 on success, a negative errno value otherwise and rte_errno is set. 3501 */ 3502 static int 3503 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused, 3504 const struct rte_eth_fdir_filter *fdir_filter 3505 __rte_unused) 3506 { 3507 rte_errno = ENOTSUP; 3508 return -rte_errno; 3509 } 3510 3511 /** 3512 * Update queue for specific filter. 3513 * 3514 * @param dev 3515 * Pointer to Ethernet device. 3516 * @param fdir_filter 3517 * Filter to be updated. 3518 * 3519 * @return 3520 * 0 on success, a negative errno value otherwise and rte_errno is set. 3521 */ 3522 static int 3523 mlx5_fdir_filter_update(struct rte_eth_dev *dev, 3524 const struct rte_eth_fdir_filter *fdir_filter) 3525 { 3526 int ret; 3527 3528 ret = mlx5_fdir_filter_delete(dev, fdir_filter); 3529 if (ret) 3530 return ret; 3531 return mlx5_fdir_filter_add(dev, fdir_filter); 3532 } 3533 3534 /** 3535 * Flush all filters. 3536 * 3537 * @param dev 3538 * Pointer to Ethernet device. 3539 */ 3540 static void 3541 mlx5_fdir_filter_flush(struct rte_eth_dev *dev) 3542 { 3543 struct priv *priv = dev->data->dev_private; 3544 3545 mlx5_flow_list_flush(dev, &priv->flows); 3546 } 3547 3548 /** 3549 * Get flow director information. 3550 * 3551 * @param dev 3552 * Pointer to Ethernet device. 3553 * @param[out] fdir_info 3554 * Resulting flow director information. 3555 */ 3556 static void 3557 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) 3558 { 3559 struct rte_eth_fdir_masks *mask = 3560 &dev->data->dev_conf.fdir_conf.mask; 3561 3562 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode; 3563 fdir_info->guarant_spc = 0; 3564 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask)); 3565 fdir_info->max_flexpayload = 0; 3566 fdir_info->flow_types_mask[0] = 0; 3567 fdir_info->flex_payload_unit = 0; 3568 fdir_info->max_flex_payload_segment_num = 0; 3569 fdir_info->flex_payload_limit = 0; 3570 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf)); 3571 } 3572 3573 /** 3574 * Deal with flow director operations. 3575 * 3576 * @param dev 3577 * Pointer to Ethernet device. 3578 * @param filter_op 3579 * Operation to perform. 3580 * @param arg 3581 * Pointer to operation-specific structure. 3582 * 3583 * @return 3584 * 0 on success, a negative errno value otherwise and rte_errno is set. 3585 */ 3586 static int 3587 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, 3588 void *arg) 3589 { 3590 enum rte_fdir_mode fdir_mode = 3591 dev->data->dev_conf.fdir_conf.mode; 3592 3593 if (filter_op == RTE_ETH_FILTER_NOP) 3594 return 0; 3595 if (fdir_mode != RTE_FDIR_MODE_PERFECT && 3596 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) { 3597 DRV_LOG(ERR, "port %u flow director mode %d not supported", 3598 dev->data->port_id, fdir_mode); 3599 rte_errno = EINVAL; 3600 return -rte_errno; 3601 } 3602 switch (filter_op) { 3603 case RTE_ETH_FILTER_ADD: 3604 return mlx5_fdir_filter_add(dev, arg); 3605 case RTE_ETH_FILTER_UPDATE: 3606 return mlx5_fdir_filter_update(dev, arg); 3607 case RTE_ETH_FILTER_DELETE: 3608 return mlx5_fdir_filter_delete(dev, arg); 3609 case RTE_ETH_FILTER_FLUSH: 3610 mlx5_fdir_filter_flush(dev); 3611 break; 3612 case RTE_ETH_FILTER_INFO: 3613 mlx5_fdir_info_get(dev, arg); 3614 break; 3615 default: 3616 DRV_LOG(DEBUG, "port %u unknown operation %u", 3617 dev->data->port_id, filter_op); 3618 rte_errno = EINVAL; 3619 return -rte_errno; 3620 } 3621 return 0; 3622 } 3623 3624 /** 3625 * Manage filter operations. 3626 * 3627 * @param dev 3628 * Pointer to Ethernet device structure. 3629 * @param filter_type 3630 * Filter type. 3631 * @param filter_op 3632 * Operation to perform. 3633 * @param arg 3634 * Pointer to operation-specific structure. 3635 * 3636 * @return 3637 * 0 on success, a negative errno value otherwise and rte_errno is set. 3638 */ 3639 int 3640 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, 3641 enum rte_filter_type filter_type, 3642 enum rte_filter_op filter_op, 3643 void *arg) 3644 { 3645 switch (filter_type) { 3646 case RTE_ETH_FILTER_GENERIC: 3647 if (filter_op != RTE_ETH_FILTER_GET) { 3648 rte_errno = EINVAL; 3649 return -rte_errno; 3650 } 3651 *(const void **)arg = &mlx5_flow_ops; 3652 return 0; 3653 case RTE_ETH_FILTER_FDIR: 3654 return mlx5_fdir_ctrl_func(dev, filter_op, arg); 3655 default: 3656 DRV_LOG(ERR, "port %u filter type (%d) not supported", 3657 dev->data->port_id, filter_type); 3658 rte_errno = ENOTSUP; 3659 return -rte_errno; 3660 } 3661 return 0; 3662 } 3663