1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2016 Mellanox Technologies, Ltd 4 */ 5 6 #include <sys/queue.h> 7 #include <stdalign.h> 8 #include <stdint.h> 9 #include <string.h> 10 11 /* Verbs header. */ 12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 13 #ifdef PEDANTIC 14 #pragma GCC diagnostic ignored "-Wpedantic" 15 #endif 16 #include <infiniband/verbs.h> 17 #ifdef PEDANTIC 18 #pragma GCC diagnostic error "-Wpedantic" 19 #endif 20 21 #include <rte_common.h> 22 #include <rte_ether.h> 23 #include <rte_eth_ctrl.h> 24 #include <rte_ethdev_driver.h> 25 #include <rte_flow.h> 26 #include <rte_flow_driver.h> 27 #include <rte_malloc.h> 28 #include <rte_ip.h> 29 30 #include "mlx5.h" 31 #include "mlx5_defs.h" 32 #include "mlx5_prm.h" 33 #include "mlx5_glue.h" 34 35 /* Dev ops structure defined in mlx5.c */ 36 extern const struct eth_dev_ops mlx5_dev_ops; 37 extern const struct eth_dev_ops mlx5_dev_ops_isolate; 38 39 /* Pattern outer Layer bits. */ 40 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0) 41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1) 42 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2) 43 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3) 44 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4) 45 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5) 46 47 /* Pattern inner Layer bits. */ 48 #define MLX5_FLOW_LAYER_INNER_L2 (1u << 6) 49 #define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7) 50 #define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8) 51 #define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9) 52 #define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10) 53 #define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11) 54 55 /* Pattern tunnel Layer bits. */ 56 #define MLX5_FLOW_LAYER_VXLAN (1u << 12) 57 #define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13) 58 #define MLX5_FLOW_LAYER_GRE (1u << 14) 59 #define MLX5_FLOW_LAYER_MPLS (1u << 15) 60 61 /* Outer Masks. */ 62 #define MLX5_FLOW_LAYER_OUTER_L3 \ 63 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6) 64 #define MLX5_FLOW_LAYER_OUTER_L4 \ 65 (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP) 66 #define MLX5_FLOW_LAYER_OUTER \ 67 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \ 68 MLX5_FLOW_LAYER_OUTER_L4) 69 70 /* Tunnel Masks. */ 71 #define MLX5_FLOW_LAYER_TUNNEL \ 72 (MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE | \ 73 MLX5_FLOW_LAYER_GRE | MLX5_FLOW_LAYER_MPLS) 74 75 /* Inner Masks. */ 76 #define MLX5_FLOW_LAYER_INNER_L3 \ 77 (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6) 78 #define MLX5_FLOW_LAYER_INNER_L4 \ 79 (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP) 80 #define MLX5_FLOW_LAYER_INNER \ 81 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \ 82 MLX5_FLOW_LAYER_INNER_L4) 83 84 /* Actions that modify the fate of matching traffic. */ 85 #define MLX5_FLOW_FATE_DROP (1u << 0) 86 #define MLX5_FLOW_FATE_QUEUE (1u << 1) 87 #define MLX5_FLOW_FATE_RSS (1u << 2) 88 89 /* Modify a packet. */ 90 #define MLX5_FLOW_MOD_FLAG (1u << 0) 91 #define MLX5_FLOW_MOD_MARK (1u << 1) 92 #define MLX5_FLOW_MOD_COUNT (1u << 2) 93 94 /* possible L3 layers protocols filtering. */ 95 #define MLX5_IP_PROTOCOL_TCP 6 96 #define MLX5_IP_PROTOCOL_UDP 17 97 #define MLX5_IP_PROTOCOL_GRE 47 98 #define MLX5_IP_PROTOCOL_MPLS 147 99 100 /* Priority reserved for default flows. */ 101 #define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1) 102 103 enum mlx5_expansion { 104 MLX5_EXPANSION_ROOT, 105 MLX5_EXPANSION_ROOT_OUTER, 106 MLX5_EXPANSION_OUTER_ETH, 107 MLX5_EXPANSION_OUTER_IPV4, 108 MLX5_EXPANSION_OUTER_IPV4_UDP, 109 MLX5_EXPANSION_OUTER_IPV4_TCP, 110 MLX5_EXPANSION_OUTER_IPV6, 111 MLX5_EXPANSION_OUTER_IPV6_UDP, 112 MLX5_EXPANSION_OUTER_IPV6_TCP, 113 MLX5_EXPANSION_VXLAN, 114 MLX5_EXPANSION_VXLAN_GPE, 115 MLX5_EXPANSION_GRE, 116 MLX5_EXPANSION_MPLS, 117 MLX5_EXPANSION_ETH, 118 MLX5_EXPANSION_IPV4, 119 MLX5_EXPANSION_IPV4_UDP, 120 MLX5_EXPANSION_IPV4_TCP, 121 MLX5_EXPANSION_IPV6, 122 MLX5_EXPANSION_IPV6_UDP, 123 MLX5_EXPANSION_IPV6_TCP, 124 }; 125 126 /** Supported expansion of items. */ 127 static const struct rte_flow_expand_node mlx5_support_expansion[] = { 128 [MLX5_EXPANSION_ROOT] = { 129 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 130 MLX5_EXPANSION_IPV4, 131 MLX5_EXPANSION_IPV6), 132 .type = RTE_FLOW_ITEM_TYPE_END, 133 }, 134 [MLX5_EXPANSION_ROOT_OUTER] = { 135 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH, 136 MLX5_EXPANSION_OUTER_IPV4, 137 MLX5_EXPANSION_OUTER_IPV6), 138 .type = RTE_FLOW_ITEM_TYPE_END, 139 }, 140 [MLX5_EXPANSION_OUTER_ETH] = { 141 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 142 MLX5_EXPANSION_OUTER_IPV6, 143 MLX5_EXPANSION_MPLS), 144 .type = RTE_FLOW_ITEM_TYPE_ETH, 145 .rss_types = 0, 146 }, 147 [MLX5_EXPANSION_OUTER_IPV4] = { 148 .next = RTE_FLOW_EXPAND_RSS_NEXT 149 (MLX5_EXPANSION_OUTER_IPV4_UDP, 150 MLX5_EXPANSION_OUTER_IPV4_TCP, 151 MLX5_EXPANSION_GRE), 152 .type = RTE_FLOW_ITEM_TYPE_IPV4, 153 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 154 ETH_RSS_NONFRAG_IPV4_OTHER, 155 }, 156 [MLX5_EXPANSION_OUTER_IPV4_UDP] = { 157 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 158 MLX5_EXPANSION_VXLAN_GPE), 159 .type = RTE_FLOW_ITEM_TYPE_UDP, 160 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 161 }, 162 [MLX5_EXPANSION_OUTER_IPV4_TCP] = { 163 .type = RTE_FLOW_ITEM_TYPE_TCP, 164 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 165 }, 166 [MLX5_EXPANSION_OUTER_IPV6] = { 167 .next = RTE_FLOW_EXPAND_RSS_NEXT 168 (MLX5_EXPANSION_OUTER_IPV6_UDP, 169 MLX5_EXPANSION_OUTER_IPV6_TCP), 170 .type = RTE_FLOW_ITEM_TYPE_IPV6, 171 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 172 ETH_RSS_NONFRAG_IPV6_OTHER, 173 }, 174 [MLX5_EXPANSION_OUTER_IPV6_UDP] = { 175 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 176 MLX5_EXPANSION_VXLAN_GPE), 177 .type = RTE_FLOW_ITEM_TYPE_UDP, 178 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 179 }, 180 [MLX5_EXPANSION_OUTER_IPV6_TCP] = { 181 .type = RTE_FLOW_ITEM_TYPE_TCP, 182 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 183 }, 184 [MLX5_EXPANSION_VXLAN] = { 185 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH), 186 .type = RTE_FLOW_ITEM_TYPE_VXLAN, 187 }, 188 [MLX5_EXPANSION_VXLAN_GPE] = { 189 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 190 MLX5_EXPANSION_IPV4, 191 MLX5_EXPANSION_IPV6), 192 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE, 193 }, 194 [MLX5_EXPANSION_GRE] = { 195 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4), 196 .type = RTE_FLOW_ITEM_TYPE_GRE, 197 }, 198 [MLX5_EXPANSION_MPLS] = { 199 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 200 MLX5_EXPANSION_IPV6), 201 .type = RTE_FLOW_ITEM_TYPE_MPLS, 202 }, 203 [MLX5_EXPANSION_ETH] = { 204 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 205 MLX5_EXPANSION_IPV6), 206 .type = RTE_FLOW_ITEM_TYPE_ETH, 207 }, 208 [MLX5_EXPANSION_IPV4] = { 209 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP, 210 MLX5_EXPANSION_IPV4_TCP), 211 .type = RTE_FLOW_ITEM_TYPE_IPV4, 212 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 213 ETH_RSS_NONFRAG_IPV4_OTHER, 214 }, 215 [MLX5_EXPANSION_IPV4_UDP] = { 216 .type = RTE_FLOW_ITEM_TYPE_UDP, 217 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 218 }, 219 [MLX5_EXPANSION_IPV4_TCP] = { 220 .type = RTE_FLOW_ITEM_TYPE_TCP, 221 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 222 }, 223 [MLX5_EXPANSION_IPV6] = { 224 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP, 225 MLX5_EXPANSION_IPV6_TCP), 226 .type = RTE_FLOW_ITEM_TYPE_IPV6, 227 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 228 ETH_RSS_NONFRAG_IPV6_OTHER, 229 }, 230 [MLX5_EXPANSION_IPV6_UDP] = { 231 .type = RTE_FLOW_ITEM_TYPE_UDP, 232 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 233 }, 234 [MLX5_EXPANSION_IPV6_TCP] = { 235 .type = RTE_FLOW_ITEM_TYPE_TCP, 236 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 237 }, 238 }; 239 240 /** Handles information leading to a drop fate. */ 241 struct mlx5_flow_verbs { 242 LIST_ENTRY(mlx5_flow_verbs) next; 243 unsigned int size; /**< Size of the attribute. */ 244 struct { 245 struct ibv_flow_attr *attr; 246 /**< Pointer to the Specification buffer. */ 247 uint8_t *specs; /**< Pointer to the specifications. */ 248 }; 249 struct ibv_flow *flow; /**< Verbs flow pointer. */ 250 struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */ 251 uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */ 252 }; 253 254 /* Counters information. */ 255 struct mlx5_flow_counter { 256 LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */ 257 uint32_t shared:1; /**< Share counter ID with other flow rules. */ 258 uint32_t ref_cnt:31; /**< Reference counter. */ 259 uint32_t id; /**< Counter ID. */ 260 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */ 261 uint64_t hits; /**< Number of packets matched by the rule. */ 262 uint64_t bytes; /**< Number of bytes matched by the rule. */ 263 }; 264 265 /* Flow structure. */ 266 struct rte_flow { 267 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */ 268 struct rte_flow_attr attributes; /**< User flow attribute. */ 269 uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */ 270 uint32_t layers; 271 /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */ 272 uint32_t modifier; 273 /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */ 274 uint32_t fate; 275 /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */ 276 uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */ 277 LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */ 278 struct mlx5_flow_verbs *cur_verbs; 279 /**< Current Verbs flow structure being filled. */ 280 struct mlx5_flow_counter *counter; /**< Holds Verbs flow counter. */ 281 struct rte_flow_action_rss rss;/**< RSS context. */ 282 uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */ 283 uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */ 284 void *nl_flow; /**< Netlink flow buffer if relevant. */ 285 }; 286 287 static const struct rte_flow_ops mlx5_flow_ops = { 288 .validate = mlx5_flow_validate, 289 .create = mlx5_flow_create, 290 .destroy = mlx5_flow_destroy, 291 .flush = mlx5_flow_flush, 292 .isolate = mlx5_flow_isolate, 293 .query = mlx5_flow_query, 294 }; 295 296 /* Convert FDIR request to Generic flow. */ 297 struct mlx5_fdir { 298 struct rte_flow_attr attr; 299 struct rte_flow_action actions[2]; 300 struct rte_flow_item items[4]; 301 struct rte_flow_item_eth l2; 302 struct rte_flow_item_eth l2_mask; 303 union { 304 struct rte_flow_item_ipv4 ipv4; 305 struct rte_flow_item_ipv6 ipv6; 306 } l3; 307 union { 308 struct rte_flow_item_ipv4 ipv4; 309 struct rte_flow_item_ipv6 ipv6; 310 } l3_mask; 311 union { 312 struct rte_flow_item_udp udp; 313 struct rte_flow_item_tcp tcp; 314 } l4; 315 union { 316 struct rte_flow_item_udp udp; 317 struct rte_flow_item_tcp tcp; 318 } l4_mask; 319 struct rte_flow_action_queue queue; 320 }; 321 322 /* Verbs specification header. */ 323 struct ibv_spec_header { 324 enum ibv_flow_spec_type type; 325 uint16_t size; 326 }; 327 328 /* 329 * Number of sub priorities. 330 * For each kind of pattern matching i.e. L2, L3, L4 to have a correct 331 * matching on the NIC (firmware dependent) L4 most have the higher priority 332 * followed by L3 and ending with L2. 333 */ 334 #define MLX5_PRIORITY_MAP_L2 2 335 #define MLX5_PRIORITY_MAP_L3 1 336 #define MLX5_PRIORITY_MAP_L4 0 337 #define MLX5_PRIORITY_MAP_MAX 3 338 339 /* Map of Verbs to Flow priority with 8 Verbs priorities. */ 340 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = { 341 { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 }, 342 }; 343 344 /* Map of Verbs to Flow priority with 16 Verbs priorities. */ 345 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = { 346 { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 }, 347 { 9, 10, 11 }, { 12, 13, 14 }, 348 }; 349 350 /* Tunnel information. */ 351 struct mlx5_flow_tunnel_info { 352 uint32_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */ 353 uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */ 354 }; 355 356 static struct mlx5_flow_tunnel_info tunnels_info[] = { 357 { 358 .tunnel = MLX5_FLOW_LAYER_VXLAN, 359 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP, 360 }, 361 { 362 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE, 363 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP, 364 }, 365 { 366 .tunnel = MLX5_FLOW_LAYER_GRE, 367 .ptype = RTE_PTYPE_TUNNEL_GRE, 368 }, 369 { 370 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP, 371 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP, 372 }, 373 { 374 .tunnel = MLX5_FLOW_LAYER_MPLS, 375 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE, 376 }, 377 }; 378 379 /** 380 * Discover the maximum number of priority available. 381 * 382 * @param[in] dev 383 * Pointer to Ethernet device. 384 * 385 * @return 386 * number of supported flow priority on success, a negative errno 387 * value otherwise and rte_errno is set. 388 */ 389 int 390 mlx5_flow_discover_priorities(struct rte_eth_dev *dev) 391 { 392 struct { 393 struct ibv_flow_attr attr; 394 struct ibv_flow_spec_eth eth; 395 struct ibv_flow_spec_action_drop drop; 396 } flow_attr = { 397 .attr = { 398 .num_of_specs = 2, 399 }, 400 .eth = { 401 .type = IBV_FLOW_SPEC_ETH, 402 .size = sizeof(struct ibv_flow_spec_eth), 403 }, 404 .drop = { 405 .size = sizeof(struct ibv_flow_spec_action_drop), 406 .type = IBV_FLOW_SPEC_ACTION_DROP, 407 }, 408 }; 409 struct ibv_flow *flow; 410 struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev); 411 uint16_t vprio[] = { 8, 16 }; 412 int i; 413 int priority = 0; 414 415 if (!drop) { 416 rte_errno = ENOTSUP; 417 return -rte_errno; 418 } 419 for (i = 0; i != RTE_DIM(vprio); i++) { 420 flow_attr.attr.priority = vprio[i] - 1; 421 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr); 422 if (!flow) 423 break; 424 claim_zero(mlx5_glue->destroy_flow(flow)); 425 priority = vprio[i]; 426 } 427 switch (priority) { 428 case 8: 429 priority = RTE_DIM(priority_map_3); 430 break; 431 case 16: 432 priority = RTE_DIM(priority_map_5); 433 break; 434 default: 435 rte_errno = ENOTSUP; 436 DRV_LOG(ERR, 437 "port %u verbs maximum priority: %d expected 8/16", 438 dev->data->port_id, vprio[i]); 439 return -rte_errno; 440 } 441 mlx5_hrxq_drop_release(dev); 442 DRV_LOG(INFO, "port %u flow maximum priority: %d", 443 dev->data->port_id, priority); 444 return priority; 445 } 446 447 /** 448 * Adjust flow priority. 449 * 450 * @param dev 451 * Pointer to Ethernet device. 452 * @param flow 453 * Pointer to an rte flow. 454 */ 455 static void 456 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow) 457 { 458 struct priv *priv = dev->data->dev_private; 459 uint32_t priority = flow->attributes.priority; 460 uint32_t subpriority = flow->cur_verbs->attr->priority; 461 462 switch (priv->config.flow_prio) { 463 case RTE_DIM(priority_map_3): 464 priority = priority_map_3[priority][subpriority]; 465 break; 466 case RTE_DIM(priority_map_5): 467 priority = priority_map_5[priority][subpriority]; 468 break; 469 } 470 flow->cur_verbs->attr->priority = priority; 471 } 472 473 /** 474 * Get a flow counter. 475 * 476 * @param[in] dev 477 * Pointer to Ethernet device. 478 * @param[in] shared 479 * Indicate if this counter is shared with other flows. 480 * @param[in] id 481 * Counter identifier. 482 * 483 * @return 484 * A pointer to the counter, NULL otherwise and rte_errno is set. 485 */ 486 static struct mlx5_flow_counter * 487 mlx5_flow_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id) 488 { 489 struct priv *priv = dev->data->dev_private; 490 struct mlx5_flow_counter *cnt; 491 492 LIST_FOREACH(cnt, &priv->flow_counters, next) { 493 if (cnt->shared != shared) 494 continue; 495 if (cnt->id != id) 496 continue; 497 cnt->ref_cnt++; 498 return cnt; 499 } 500 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 501 502 struct mlx5_flow_counter tmpl = { 503 .shared = shared, 504 .id = id, 505 .cs = mlx5_glue->create_counter_set 506 (priv->ctx, 507 &(struct ibv_counter_set_init_attr){ 508 .counter_set_id = id, 509 }), 510 .hits = 0, 511 .bytes = 0, 512 }; 513 514 if (!tmpl.cs) { 515 rte_errno = errno; 516 return NULL; 517 } 518 cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0); 519 if (!cnt) { 520 rte_errno = ENOMEM; 521 return NULL; 522 } 523 *cnt = tmpl; 524 LIST_INSERT_HEAD(&priv->flow_counters, cnt, next); 525 return cnt; 526 #endif 527 rte_errno = ENOTSUP; 528 return NULL; 529 } 530 531 /** 532 * Release a flow counter. 533 * 534 * @param[in] counter 535 * Pointer to the counter handler. 536 */ 537 static void 538 mlx5_flow_counter_release(struct mlx5_flow_counter *counter) 539 { 540 if (--counter->ref_cnt == 0) { 541 claim_zero(mlx5_glue->destroy_counter_set(counter->cs)); 542 LIST_REMOVE(counter, next); 543 rte_free(counter); 544 } 545 } 546 547 /** 548 * Verify the @p attributes will be correctly understood by the NIC and store 549 * them in the @p flow if everything is correct. 550 * 551 * @param[in] dev 552 * Pointer to Ethernet device. 553 * @param[in] attributes 554 * Pointer to flow attributes 555 * @param[in, out] flow 556 * Pointer to the rte_flow structure. 557 * @param[out] error 558 * Pointer to error structure. 559 * 560 * @return 561 * 0 on success, a negative errno value otherwise and rte_errno is set. 562 */ 563 static int 564 mlx5_flow_attributes(struct rte_eth_dev *dev, 565 const struct rte_flow_attr *attributes, 566 struct rte_flow *flow, 567 struct rte_flow_error *error) 568 { 569 uint32_t priority_max = 570 ((struct priv *)dev->data->dev_private)->config.flow_prio - 1; 571 572 if (attributes->group) 573 return rte_flow_error_set(error, ENOTSUP, 574 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 575 NULL, 576 "groups is not supported"); 577 if (attributes->priority != MLX5_FLOW_PRIO_RSVD && 578 attributes->priority >= priority_max) 579 return rte_flow_error_set(error, ENOTSUP, 580 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 581 NULL, 582 "priority out of range"); 583 if (attributes->egress) 584 return rte_flow_error_set(error, ENOTSUP, 585 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, 586 NULL, 587 "egress is not supported"); 588 if (attributes->transfer) 589 return rte_flow_error_set(error, ENOTSUP, 590 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 591 NULL, 592 "transfer is not supported"); 593 if (!attributes->ingress) 594 return rte_flow_error_set(error, ENOTSUP, 595 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, 596 NULL, 597 "ingress attribute is mandatory"); 598 flow->attributes = *attributes; 599 if (attributes->priority == MLX5_FLOW_PRIO_RSVD) 600 flow->attributes.priority = priority_max; 601 return 0; 602 } 603 604 /** 605 * Verify the @p item specifications (spec, last, mask) are compatible with the 606 * NIC capabilities. 607 * 608 * @param[in] item 609 * Item specification. 610 * @param[in] mask 611 * @p item->mask or flow default bit-masks. 612 * @param[in] nic_mask 613 * Bit-masks covering supported fields by the NIC to compare with user mask. 614 * @param[in] size 615 * Bit-masks size in bytes. 616 * @param[out] error 617 * Pointer to error structure. 618 * 619 * @return 620 * 0 on success, a negative errno value otherwise and rte_errno is set. 621 */ 622 static int 623 mlx5_flow_item_acceptable(const struct rte_flow_item *item, 624 const uint8_t *mask, 625 const uint8_t *nic_mask, 626 unsigned int size, 627 struct rte_flow_error *error) 628 { 629 unsigned int i; 630 631 assert(nic_mask); 632 for (i = 0; i < size; ++i) 633 if ((nic_mask[i] | mask[i]) != nic_mask[i]) 634 return rte_flow_error_set(error, ENOTSUP, 635 RTE_FLOW_ERROR_TYPE_ITEM, 636 item, 637 "mask enables non supported" 638 " bits"); 639 if (!item->spec && (item->mask || item->last)) 640 return rte_flow_error_set(error, EINVAL, 641 RTE_FLOW_ERROR_TYPE_ITEM, 642 item, 643 "mask/last without a spec is not" 644 " supported"); 645 if (item->spec && item->last) { 646 uint8_t spec[size]; 647 uint8_t last[size]; 648 unsigned int i; 649 int ret; 650 651 for (i = 0; i < size; ++i) { 652 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i]; 653 last[i] = ((const uint8_t *)item->last)[i] & mask[i]; 654 } 655 ret = memcmp(spec, last, size); 656 if (ret != 0) 657 return rte_flow_error_set(error, ENOTSUP, 658 RTE_FLOW_ERROR_TYPE_ITEM, 659 item, 660 "range is not supported"); 661 } 662 return 0; 663 } 664 665 /** 666 * Add a verbs item specification into @p flow. 667 * 668 * @param[in, out] flow 669 * Pointer to flow structure. 670 * @param[in] src 671 * Create specification. 672 * @param[in] size 673 * Size in bytes of the specification to copy. 674 */ 675 static void 676 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size) 677 { 678 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 679 680 if (verbs->specs) { 681 void *dst; 682 683 dst = (void *)(verbs->specs + verbs->size); 684 memcpy(dst, src, size); 685 ++verbs->attr->num_of_specs; 686 } 687 verbs->size += size; 688 } 689 690 /** 691 * Adjust verbs hash fields according to the @p flow information. 692 * 693 * @param[in, out] flow. 694 * Pointer to flow structure. 695 * @param[in] tunnel 696 * 1 when the hash field is for a tunnel item. 697 * @param[in] layer_types 698 * ETH_RSS_* types. 699 * @param[in] hash_fields 700 * Item hash fields. 701 */ 702 static void 703 mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow, 704 int tunnel __rte_unused, 705 uint32_t layer_types, uint64_t hash_fields) 706 { 707 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 708 hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0); 709 if (flow->rss.level == 2 && !tunnel) 710 hash_fields = 0; 711 else if (flow->rss.level < 2 && tunnel) 712 hash_fields = 0; 713 #endif 714 if (!(flow->rss.types & layer_types)) 715 hash_fields = 0; 716 flow->cur_verbs->hash_fields |= hash_fields; 717 } 718 719 /** 720 * Convert the @p item into a Verbs specification after ensuring the NIC 721 * will understand and process it correctly. 722 * If the necessary size for the conversion is greater than the @p flow_size, 723 * nothing is written in @p flow, the validation is still performed. 724 * 725 * @param[in] item 726 * Item specification. 727 * @param[in, out] flow 728 * Pointer to flow structure. 729 * @param[in] flow_size 730 * Size in bytes of the available space in @p flow, if too small, nothing is 731 * written. 732 * @param[out] error 733 * Pointer to error structure. 734 * 735 * @return 736 * On success the number of bytes consumed/necessary, if the returned value 737 * is lesser or equal to @p flow_size, the @p item has fully been converted, 738 * otherwise another call with this returned memory size should be done. 739 * On error, a negative errno value is returned and rte_errno is set. 740 */ 741 static int 742 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow, 743 const size_t flow_size, struct rte_flow_error *error) 744 { 745 const struct rte_flow_item_eth *spec = item->spec; 746 const struct rte_flow_item_eth *mask = item->mask; 747 const struct rte_flow_item_eth nic_mask = { 748 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 749 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", 750 .type = RTE_BE16(0xffff), 751 }; 752 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 753 const unsigned int size = sizeof(struct ibv_flow_spec_eth); 754 struct ibv_flow_spec_eth eth = { 755 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 756 .size = size, 757 }; 758 int ret; 759 760 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 761 MLX5_FLOW_LAYER_OUTER_L2)) 762 return rte_flow_error_set(error, ENOTSUP, 763 RTE_FLOW_ERROR_TYPE_ITEM, 764 item, 765 "L2 layers already configured"); 766 if (!mask) 767 mask = &rte_flow_item_eth_mask; 768 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 769 (const uint8_t *)&nic_mask, 770 sizeof(struct rte_flow_item_eth), 771 error); 772 if (ret) 773 return ret; 774 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 775 MLX5_FLOW_LAYER_OUTER_L2; 776 if (size > flow_size) 777 return size; 778 if (spec) { 779 unsigned int i; 780 781 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN); 782 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN); 783 eth.val.ether_type = spec->type; 784 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN); 785 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN); 786 eth.mask.ether_type = mask->type; 787 /* Remove unwanted bits from values. */ 788 for (i = 0; i < ETHER_ADDR_LEN; ++i) { 789 eth.val.dst_mac[i] &= eth.mask.dst_mac[i]; 790 eth.val.src_mac[i] &= eth.mask.src_mac[i]; 791 } 792 eth.val.ether_type &= eth.mask.ether_type; 793 } 794 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 795 mlx5_flow_spec_verbs_add(flow, ð, size); 796 return size; 797 } 798 799 /** 800 * Update the VLAN tag in the Verbs Ethernet specification. 801 * 802 * @param[in, out] attr 803 * Pointer to Verbs attributes structure. 804 * @param[in] eth 805 * Verbs structure containing the VLAN information to copy. 806 */ 807 static void 808 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr, 809 struct ibv_flow_spec_eth *eth) 810 { 811 unsigned int i; 812 const enum ibv_flow_spec_type search = eth->type; 813 struct ibv_spec_header *hdr = (struct ibv_spec_header *) 814 ((uint8_t *)attr + sizeof(struct ibv_flow_attr)); 815 816 for (i = 0; i != attr->num_of_specs; ++i) { 817 if (hdr->type == search) { 818 struct ibv_flow_spec_eth *e = 819 (struct ibv_flow_spec_eth *)hdr; 820 821 e->val.vlan_tag = eth->val.vlan_tag; 822 e->mask.vlan_tag = eth->mask.vlan_tag; 823 e->val.ether_type = eth->val.ether_type; 824 e->mask.ether_type = eth->mask.ether_type; 825 break; 826 } 827 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size); 828 } 829 } 830 831 /** 832 * Convert the @p item into @p flow (or by updating the already present 833 * Ethernet Verbs) specification after ensuring the NIC will understand and 834 * process it correctly. 835 * If the necessary size for the conversion is greater than the @p flow_size, 836 * nothing is written in @p flow, the validation is still performed. 837 * 838 * @param[in] item 839 * Item specification. 840 * @param[in, out] flow 841 * Pointer to flow structure. 842 * @param[in] flow_size 843 * Size in bytes of the available space in @p flow, if too small, nothing is 844 * written. 845 * @param[out] error 846 * Pointer to error structure. 847 * 848 * @return 849 * On success the number of bytes consumed/necessary, if the returned value 850 * is lesser or equal to @p flow_size, the @p item has fully been converted, 851 * otherwise another call with this returned memory size should be done. 852 * On error, a negative errno value is returned and rte_errno is set. 853 */ 854 static int 855 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow, 856 const size_t flow_size, struct rte_flow_error *error) 857 { 858 const struct rte_flow_item_vlan *spec = item->spec; 859 const struct rte_flow_item_vlan *mask = item->mask; 860 const struct rte_flow_item_vlan nic_mask = { 861 .tci = RTE_BE16(0x0fff), 862 .inner_type = RTE_BE16(0xffff), 863 }; 864 unsigned int size = sizeof(struct ibv_flow_spec_eth); 865 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 866 struct ibv_flow_spec_eth eth = { 867 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 868 .size = size, 869 }; 870 int ret; 871 const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | 872 MLX5_FLOW_LAYER_INNER_L4) : 873 (MLX5_FLOW_LAYER_OUTER_L3 | MLX5_FLOW_LAYER_OUTER_L4); 874 const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : 875 MLX5_FLOW_LAYER_OUTER_VLAN; 876 const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 877 MLX5_FLOW_LAYER_OUTER_L2; 878 879 if (flow->layers & vlanm) 880 return rte_flow_error_set(error, ENOTSUP, 881 RTE_FLOW_ERROR_TYPE_ITEM, 882 item, 883 "VLAN layer already configured"); 884 else if ((flow->layers & l34m) != 0) 885 return rte_flow_error_set(error, ENOTSUP, 886 RTE_FLOW_ERROR_TYPE_ITEM, 887 item, 888 "L2 layer cannot follow L3/L4 layer"); 889 if (!mask) 890 mask = &rte_flow_item_vlan_mask; 891 ret = mlx5_flow_item_acceptable 892 (item, (const uint8_t *)mask, 893 (const uint8_t *)&nic_mask, 894 sizeof(struct rte_flow_item_vlan), error); 895 if (ret) 896 return ret; 897 if (spec) { 898 eth.val.vlan_tag = spec->tci; 899 eth.mask.vlan_tag = mask->tci; 900 eth.val.vlan_tag &= eth.mask.vlan_tag; 901 eth.val.ether_type = spec->inner_type; 902 eth.mask.ether_type = mask->inner_type; 903 eth.val.ether_type &= eth.mask.ether_type; 904 } 905 /* 906 * From verbs perspective an empty VLAN is equivalent 907 * to a packet without VLAN layer. 908 */ 909 if (!eth.mask.vlan_tag) 910 return rte_flow_error_set(error, EINVAL, 911 RTE_FLOW_ERROR_TYPE_ITEM_SPEC, 912 item->spec, 913 "VLAN cannot be empty"); 914 if (!(flow->layers & l2m)) { 915 if (size <= flow_size) { 916 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 917 mlx5_flow_spec_verbs_add(flow, ð, size); 918 } 919 } else { 920 if (flow->cur_verbs) 921 mlx5_flow_item_vlan_update(flow->cur_verbs->attr, 922 ð); 923 size = 0; /* Only an update is done in eth specification. */ 924 } 925 flow->layers |= tunnel ? 926 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) : 927 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN); 928 return size; 929 } 930 931 /** 932 * Convert the @p item into a Verbs specification after ensuring the NIC 933 * will understand and process it correctly. 934 * If the necessary size for the conversion is greater than the @p flow_size, 935 * nothing is written in @p flow, the validation is still performed. 936 * 937 * @param[in] item 938 * Item specification. 939 * @param[in, out] flow 940 * Pointer to flow structure. 941 * @param[in] flow_size 942 * Size in bytes of the available space in @p flow, if too small, nothing is 943 * written. 944 * @param[out] error 945 * Pointer to error structure. 946 * 947 * @return 948 * On success the number of bytes consumed/necessary, if the returned value 949 * is lesser or equal to @p flow_size, the @p item has fully been converted, 950 * otherwise another call with this returned memory size should be done. 951 * On error, a negative errno value is returned and rte_errno is set. 952 */ 953 static int 954 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow, 955 const size_t flow_size, struct rte_flow_error *error) 956 { 957 const struct rte_flow_item_ipv4 *spec = item->spec; 958 const struct rte_flow_item_ipv4 *mask = item->mask; 959 const struct rte_flow_item_ipv4 nic_mask = { 960 .hdr = { 961 .src_addr = RTE_BE32(0xffffffff), 962 .dst_addr = RTE_BE32(0xffffffff), 963 .type_of_service = 0xff, 964 .next_proto_id = 0xff, 965 }, 966 }; 967 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 968 unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext); 969 struct ibv_flow_spec_ipv4_ext ipv4 = { 970 .type = IBV_FLOW_SPEC_IPV4_EXT | 971 (tunnel ? IBV_FLOW_SPEC_INNER : 0), 972 .size = size, 973 }; 974 int ret; 975 976 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 977 MLX5_FLOW_LAYER_OUTER_L3)) 978 return rte_flow_error_set(error, ENOTSUP, 979 RTE_FLOW_ERROR_TYPE_ITEM, 980 item, 981 "multiple L3 layers not supported"); 982 else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 983 MLX5_FLOW_LAYER_OUTER_L4)) 984 return rte_flow_error_set(error, ENOTSUP, 985 RTE_FLOW_ERROR_TYPE_ITEM, 986 item, 987 "L3 cannot follow an L4 layer."); 988 if (!mask) 989 mask = &rte_flow_item_ipv4_mask; 990 ret = mlx5_flow_item_acceptable 991 (item, (const uint8_t *)mask, 992 (const uint8_t *)&nic_mask, 993 sizeof(struct rte_flow_item_ipv4), error); 994 if (ret < 0) 995 return ret; 996 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : 997 MLX5_FLOW_LAYER_OUTER_L3_IPV4; 998 if (spec) { 999 ipv4.val = (struct ibv_flow_ipv4_ext_filter){ 1000 .src_ip = spec->hdr.src_addr, 1001 .dst_ip = spec->hdr.dst_addr, 1002 .proto = spec->hdr.next_proto_id, 1003 .tos = spec->hdr.type_of_service, 1004 }; 1005 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){ 1006 .src_ip = mask->hdr.src_addr, 1007 .dst_ip = mask->hdr.dst_addr, 1008 .proto = mask->hdr.next_proto_id, 1009 .tos = mask->hdr.type_of_service, 1010 }; 1011 /* Remove unwanted bits from values. */ 1012 ipv4.val.src_ip &= ipv4.mask.src_ip; 1013 ipv4.val.dst_ip &= ipv4.mask.dst_ip; 1014 ipv4.val.proto &= ipv4.mask.proto; 1015 ipv4.val.tos &= ipv4.mask.tos; 1016 } 1017 flow->l3_protocol_en = !!ipv4.mask.proto; 1018 flow->l3_protocol = ipv4.val.proto; 1019 if (size <= flow_size) { 1020 mlx5_flow_verbs_hashfields_adjust 1021 (flow, tunnel, 1022 (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 1023 ETH_RSS_NONFRAG_IPV4_OTHER), 1024 (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4)); 1025 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3; 1026 mlx5_flow_spec_verbs_add(flow, &ipv4, size); 1027 } 1028 return size; 1029 } 1030 1031 /** 1032 * Convert the @p item into a Verbs specification after ensuring the NIC 1033 * will understand and process it correctly. 1034 * If the necessary size for the conversion is greater than the @p flow_size, 1035 * nothing is written in @p flow, the validation is still performed. 1036 * 1037 * @param[in] item 1038 * Item specification. 1039 * @param[in, out] flow 1040 * Pointer to flow structure. 1041 * @param[in] flow_size 1042 * Size in bytes of the available space in @p flow, if too small, nothing is 1043 * written. 1044 * @param[out] error 1045 * Pointer to error structure. 1046 * 1047 * @return 1048 * On success the number of bytes consumed/necessary, if the returned value 1049 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1050 * otherwise another call with this returned memory size should be done. 1051 * On error, a negative errno value is returned and rte_errno is set. 1052 */ 1053 static int 1054 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow, 1055 const size_t flow_size, struct rte_flow_error *error) 1056 { 1057 const struct rte_flow_item_ipv6 *spec = item->spec; 1058 const struct rte_flow_item_ipv6 *mask = item->mask; 1059 const struct rte_flow_item_ipv6 nic_mask = { 1060 .hdr = { 1061 .src_addr = 1062 "\xff\xff\xff\xff\xff\xff\xff\xff" 1063 "\xff\xff\xff\xff\xff\xff\xff\xff", 1064 .dst_addr = 1065 "\xff\xff\xff\xff\xff\xff\xff\xff" 1066 "\xff\xff\xff\xff\xff\xff\xff\xff", 1067 .vtc_flow = RTE_BE32(0xffffffff), 1068 .proto = 0xff, 1069 .hop_limits = 0xff, 1070 }, 1071 }; 1072 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 1073 unsigned int size = sizeof(struct ibv_flow_spec_ipv6); 1074 struct ibv_flow_spec_ipv6 ipv6 = { 1075 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 1076 .size = size, 1077 }; 1078 int ret; 1079 1080 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1081 MLX5_FLOW_LAYER_OUTER_L3)) 1082 return rte_flow_error_set(error, ENOTSUP, 1083 RTE_FLOW_ERROR_TYPE_ITEM, 1084 item, 1085 "multiple L3 layers not supported"); 1086 else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1087 MLX5_FLOW_LAYER_OUTER_L4)) 1088 return rte_flow_error_set(error, ENOTSUP, 1089 RTE_FLOW_ERROR_TYPE_ITEM, 1090 item, 1091 "L3 cannot follow an L4 layer."); 1092 /* 1093 * IPv6 is not recognised by the NIC inside a GRE tunnel. 1094 * Such support has to be disabled as the rule will be 1095 * accepted. Issue reproduced with Mellanox OFED 4.3-3.0.2.1 and 1096 * Mellanox OFED 4.4-1.0.0.0. 1097 */ 1098 if (tunnel && flow->layers & MLX5_FLOW_LAYER_GRE) 1099 return rte_flow_error_set(error, ENOTSUP, 1100 RTE_FLOW_ERROR_TYPE_ITEM, 1101 item, 1102 "IPv6 inside a GRE tunnel is" 1103 " not recognised."); 1104 if (!mask) 1105 mask = &rte_flow_item_ipv6_mask; 1106 ret = mlx5_flow_item_acceptable 1107 (item, (const uint8_t *)mask, 1108 (const uint8_t *)&nic_mask, 1109 sizeof(struct rte_flow_item_ipv6), error); 1110 if (ret < 0) 1111 return ret; 1112 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : 1113 MLX5_FLOW_LAYER_OUTER_L3_IPV6; 1114 if (spec) { 1115 unsigned int i; 1116 uint32_t vtc_flow_val; 1117 uint32_t vtc_flow_mask; 1118 1119 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr, 1120 RTE_DIM(ipv6.val.src_ip)); 1121 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr, 1122 RTE_DIM(ipv6.val.dst_ip)); 1123 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr, 1124 RTE_DIM(ipv6.mask.src_ip)); 1125 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr, 1126 RTE_DIM(ipv6.mask.dst_ip)); 1127 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow); 1128 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow); 1129 ipv6.val.flow_label = 1130 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >> 1131 IPV6_HDR_FL_SHIFT); 1132 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >> 1133 IPV6_HDR_TC_SHIFT; 1134 ipv6.val.next_hdr = spec->hdr.proto; 1135 ipv6.val.hop_limit = spec->hdr.hop_limits; 1136 ipv6.mask.flow_label = 1137 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >> 1138 IPV6_HDR_FL_SHIFT); 1139 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >> 1140 IPV6_HDR_TC_SHIFT; 1141 ipv6.mask.next_hdr = mask->hdr.proto; 1142 ipv6.mask.hop_limit = mask->hdr.hop_limits; 1143 /* Remove unwanted bits from values. */ 1144 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) { 1145 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i]; 1146 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i]; 1147 } 1148 ipv6.val.flow_label &= ipv6.mask.flow_label; 1149 ipv6.val.traffic_class &= ipv6.mask.traffic_class; 1150 ipv6.val.next_hdr &= ipv6.mask.next_hdr; 1151 ipv6.val.hop_limit &= ipv6.mask.hop_limit; 1152 } 1153 flow->l3_protocol_en = !!ipv6.mask.next_hdr; 1154 flow->l3_protocol = ipv6.val.next_hdr; 1155 if (size <= flow_size) { 1156 mlx5_flow_verbs_hashfields_adjust 1157 (flow, tunnel, 1158 (ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER), 1159 (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6)); 1160 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3; 1161 mlx5_flow_spec_verbs_add(flow, &ipv6, size); 1162 } 1163 return size; 1164 } 1165 1166 /** 1167 * Convert the @p item into a Verbs specification after ensuring the NIC 1168 * will understand and process it correctly. 1169 * If the necessary size for the conversion is greater than the @p flow_size, 1170 * nothing is written in @p flow, the validation is still performed. 1171 * 1172 * @param[in] item 1173 * Item specification. 1174 * @param[in, out] flow 1175 * Pointer to flow structure. 1176 * @param[in] flow_size 1177 * Size in bytes of the available space in @p flow, if too small, nothing is 1178 * written. 1179 * @param[out] error 1180 * Pointer to error structure. 1181 * 1182 * @return 1183 * On success the number of bytes consumed/necessary, if the returned value 1184 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1185 * otherwise another call with this returned memory size should be done. 1186 * On error, a negative errno value is returned and rte_errno is set. 1187 */ 1188 static int 1189 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow, 1190 const size_t flow_size, struct rte_flow_error *error) 1191 { 1192 const struct rte_flow_item_udp *spec = item->spec; 1193 const struct rte_flow_item_udp *mask = item->mask; 1194 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 1195 unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp); 1196 struct ibv_flow_spec_tcp_udp udp = { 1197 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 1198 .size = size, 1199 }; 1200 int ret; 1201 1202 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP) 1203 return rte_flow_error_set(error, ENOTSUP, 1204 RTE_FLOW_ERROR_TYPE_ITEM, 1205 item, 1206 "protocol filtering not compatible" 1207 " with UDP layer"); 1208 if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1209 MLX5_FLOW_LAYER_OUTER_L3))) 1210 return rte_flow_error_set(error, ENOTSUP, 1211 RTE_FLOW_ERROR_TYPE_ITEM, 1212 item, 1213 "L3 is mandatory to filter" 1214 " on L4"); 1215 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1216 MLX5_FLOW_LAYER_OUTER_L4)) 1217 return rte_flow_error_set(error, ENOTSUP, 1218 RTE_FLOW_ERROR_TYPE_ITEM, 1219 item, 1220 "L4 layer is already" 1221 " present"); 1222 if (!mask) 1223 mask = &rte_flow_item_udp_mask; 1224 ret = mlx5_flow_item_acceptable 1225 (item, (const uint8_t *)mask, 1226 (const uint8_t *)&rte_flow_item_udp_mask, 1227 sizeof(struct rte_flow_item_udp), error); 1228 if (ret < 0) 1229 return ret; 1230 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP : 1231 MLX5_FLOW_LAYER_OUTER_L4_UDP; 1232 if (spec) { 1233 udp.val.dst_port = spec->hdr.dst_port; 1234 udp.val.src_port = spec->hdr.src_port; 1235 udp.mask.dst_port = mask->hdr.dst_port; 1236 udp.mask.src_port = mask->hdr.src_port; 1237 /* Remove unwanted bits from values. */ 1238 udp.val.src_port &= udp.mask.src_port; 1239 udp.val.dst_port &= udp.mask.dst_port; 1240 } 1241 if (size <= flow_size) { 1242 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_UDP, 1243 (IBV_RX_HASH_SRC_PORT_UDP | 1244 IBV_RX_HASH_DST_PORT_UDP)); 1245 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4; 1246 mlx5_flow_spec_verbs_add(flow, &udp, size); 1247 } 1248 return size; 1249 } 1250 1251 /** 1252 * Convert the @p item into a Verbs specification after ensuring the NIC 1253 * will understand and process it correctly. 1254 * If the necessary size for the conversion is greater than the @p flow_size, 1255 * nothing is written in @p flow, the validation is still performed. 1256 * 1257 * @param[in] item 1258 * Item specification. 1259 * @param[in, out] flow 1260 * Pointer to flow structure. 1261 * @param[in] flow_size 1262 * Size in bytes of the available space in @p flow, if too small, nothing is 1263 * written. 1264 * @param[out] error 1265 * Pointer to error structure. 1266 * 1267 * @return 1268 * On success the number of bytes consumed/necessary, if the returned value 1269 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1270 * otherwise another call with this returned memory size should be done. 1271 * On error, a negative errno value is returned and rte_errno is set. 1272 */ 1273 static int 1274 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow, 1275 const size_t flow_size, struct rte_flow_error *error) 1276 { 1277 const struct rte_flow_item_tcp *spec = item->spec; 1278 const struct rte_flow_item_tcp *mask = item->mask; 1279 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 1280 unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp); 1281 struct ibv_flow_spec_tcp_udp tcp = { 1282 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 1283 .size = size, 1284 }; 1285 int ret; 1286 1287 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP) 1288 return rte_flow_error_set(error, ENOTSUP, 1289 RTE_FLOW_ERROR_TYPE_ITEM, 1290 item, 1291 "protocol filtering not compatible" 1292 " with TCP layer"); 1293 if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1294 MLX5_FLOW_LAYER_OUTER_L3))) 1295 return rte_flow_error_set(error, ENOTSUP, 1296 RTE_FLOW_ERROR_TYPE_ITEM, 1297 item, 1298 "L3 is mandatory to filter on L4"); 1299 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1300 MLX5_FLOW_LAYER_OUTER_L4)) 1301 return rte_flow_error_set(error, ENOTSUP, 1302 RTE_FLOW_ERROR_TYPE_ITEM, 1303 item, 1304 "L4 layer is already present"); 1305 if (!mask) 1306 mask = &rte_flow_item_tcp_mask; 1307 ret = mlx5_flow_item_acceptable 1308 (item, (const uint8_t *)mask, 1309 (const uint8_t *)&rte_flow_item_tcp_mask, 1310 sizeof(struct rte_flow_item_tcp), error); 1311 if (ret < 0) 1312 return ret; 1313 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP : 1314 MLX5_FLOW_LAYER_OUTER_L4_TCP; 1315 if (spec) { 1316 tcp.val.dst_port = spec->hdr.dst_port; 1317 tcp.val.src_port = spec->hdr.src_port; 1318 tcp.mask.dst_port = mask->hdr.dst_port; 1319 tcp.mask.src_port = mask->hdr.src_port; 1320 /* Remove unwanted bits from values. */ 1321 tcp.val.src_port &= tcp.mask.src_port; 1322 tcp.val.dst_port &= tcp.mask.dst_port; 1323 } 1324 if (size <= flow_size) { 1325 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_TCP, 1326 (IBV_RX_HASH_SRC_PORT_TCP | 1327 IBV_RX_HASH_DST_PORT_TCP)); 1328 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4; 1329 mlx5_flow_spec_verbs_add(flow, &tcp, size); 1330 } 1331 return size; 1332 } 1333 1334 /** 1335 * Convert the @p item into a Verbs specification after ensuring the NIC 1336 * will understand and process it correctly. 1337 * If the necessary size for the conversion is greater than the @p flow_size, 1338 * nothing is written in @p flow, the validation is still performed. 1339 * 1340 * @param[in] item 1341 * Item specification. 1342 * @param[in, out] flow 1343 * Pointer to flow structure. 1344 * @param[in] flow_size 1345 * Size in bytes of the available space in @p flow, if too small, nothing is 1346 * written. 1347 * @param[out] error 1348 * Pointer to error structure. 1349 * 1350 * @return 1351 * On success the number of bytes consumed/necessary, if the returned value 1352 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1353 * otherwise another call with this returned memory size should be done. 1354 * On error, a negative errno value is returned and rte_errno is set. 1355 */ 1356 static int 1357 mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow, 1358 const size_t flow_size, struct rte_flow_error *error) 1359 { 1360 const struct rte_flow_item_vxlan *spec = item->spec; 1361 const struct rte_flow_item_vxlan *mask = item->mask; 1362 unsigned int size = sizeof(struct ibv_flow_spec_tunnel); 1363 struct ibv_flow_spec_tunnel vxlan = { 1364 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL, 1365 .size = size, 1366 }; 1367 int ret; 1368 union vni { 1369 uint32_t vlan_id; 1370 uint8_t vni[4]; 1371 } id = { .vlan_id = 0, }; 1372 1373 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL) 1374 return rte_flow_error_set(error, ENOTSUP, 1375 RTE_FLOW_ERROR_TYPE_ITEM, 1376 item, 1377 "a tunnel is already present"); 1378 /* 1379 * Verify only UDPv4 is present as defined in 1380 * https://tools.ietf.org/html/rfc7348 1381 */ 1382 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 1383 return rte_flow_error_set(error, ENOTSUP, 1384 RTE_FLOW_ERROR_TYPE_ITEM, 1385 item, 1386 "no outer UDP layer found"); 1387 if (!mask) 1388 mask = &rte_flow_item_vxlan_mask; 1389 ret = mlx5_flow_item_acceptable 1390 (item, (const uint8_t *)mask, 1391 (const uint8_t *)&rte_flow_item_vxlan_mask, 1392 sizeof(struct rte_flow_item_vxlan), error); 1393 if (ret < 0) 1394 return ret; 1395 if (spec) { 1396 memcpy(&id.vni[1], spec->vni, 3); 1397 vxlan.val.tunnel_id = id.vlan_id; 1398 memcpy(&id.vni[1], mask->vni, 3); 1399 vxlan.mask.tunnel_id = id.vlan_id; 1400 /* Remove unwanted bits from values. */ 1401 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id; 1402 } 1403 /* 1404 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if 1405 * only this layer is defined in the Verbs specification it is 1406 * interpreted as wildcard and all packets will match this 1407 * rule, if it follows a full stack layer (ex: eth / ipv4 / 1408 * udp), all packets matching the layers before will also 1409 * match this rule. To avoid such situation, VNI 0 is 1410 * currently refused. 1411 */ 1412 if (!vxlan.val.tunnel_id) 1413 return rte_flow_error_set(error, EINVAL, 1414 RTE_FLOW_ERROR_TYPE_ITEM, 1415 item, 1416 "VXLAN vni cannot be 0"); 1417 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER)) 1418 return rte_flow_error_set(error, EINVAL, 1419 RTE_FLOW_ERROR_TYPE_ITEM, 1420 item, 1421 "VXLAN tunnel must be fully defined"); 1422 if (size <= flow_size) { 1423 mlx5_flow_spec_verbs_add(flow, &vxlan, size); 1424 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1425 } 1426 flow->layers |= MLX5_FLOW_LAYER_VXLAN; 1427 return size; 1428 } 1429 1430 /** 1431 * Convert the @p item into a Verbs specification after ensuring the NIC 1432 * will understand and process it correctly. 1433 * If the necessary size for the conversion is greater than the @p flow_size, 1434 * nothing is written in @p flow, the validation is still performed. 1435 * 1436 * @param dev 1437 * Pointer to Ethernet device. 1438 * @param[in] item 1439 * Item specification. 1440 * @param[in, out] flow 1441 * Pointer to flow structure. 1442 * @param[in] flow_size 1443 * Size in bytes of the available space in @p flow, if too small, nothing is 1444 * written. 1445 * @param[out] error 1446 * Pointer to error structure. 1447 * 1448 * @return 1449 * On success the number of bytes consumed/necessary, if the returned value 1450 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1451 * otherwise another call with this returned memory size should be done. 1452 * On error, a negative errno value is returned and rte_errno is set. 1453 */ 1454 static int 1455 mlx5_flow_item_vxlan_gpe(struct rte_eth_dev *dev, 1456 const struct rte_flow_item *item, 1457 struct rte_flow *flow, const size_t flow_size, 1458 struct rte_flow_error *error) 1459 { 1460 const struct rte_flow_item_vxlan_gpe *spec = item->spec; 1461 const struct rte_flow_item_vxlan_gpe *mask = item->mask; 1462 unsigned int size = sizeof(struct ibv_flow_spec_tunnel); 1463 struct ibv_flow_spec_tunnel vxlan_gpe = { 1464 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL, 1465 .size = size, 1466 }; 1467 int ret; 1468 union vni { 1469 uint32_t vlan_id; 1470 uint8_t vni[4]; 1471 } id = { .vlan_id = 0, }; 1472 1473 if (!((struct priv *)dev->data->dev_private)->config.l3_vxlan_en) 1474 return rte_flow_error_set(error, ENOTSUP, 1475 RTE_FLOW_ERROR_TYPE_ITEM, 1476 item, 1477 "L3 VXLAN is not enabled by device" 1478 " parameter and/or not configured in" 1479 " firmware"); 1480 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL) 1481 return rte_flow_error_set(error, ENOTSUP, 1482 RTE_FLOW_ERROR_TYPE_ITEM, 1483 item, 1484 "a tunnel is already present"); 1485 /* 1486 * Verify only UDPv4 is present as defined in 1487 * https://tools.ietf.org/html/rfc7348 1488 */ 1489 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 1490 return rte_flow_error_set(error, ENOTSUP, 1491 RTE_FLOW_ERROR_TYPE_ITEM, 1492 item, 1493 "no outer UDP layer found"); 1494 if (!mask) 1495 mask = &rte_flow_item_vxlan_gpe_mask; 1496 ret = mlx5_flow_item_acceptable 1497 (item, (const uint8_t *)mask, 1498 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask, 1499 sizeof(struct rte_flow_item_vxlan_gpe), error); 1500 if (ret < 0) 1501 return ret; 1502 if (spec) { 1503 memcpy(&id.vni[1], spec->vni, 3); 1504 vxlan_gpe.val.tunnel_id = id.vlan_id; 1505 memcpy(&id.vni[1], mask->vni, 3); 1506 vxlan_gpe.mask.tunnel_id = id.vlan_id; 1507 if (spec->protocol) 1508 return rte_flow_error_set 1509 (error, EINVAL, 1510 RTE_FLOW_ERROR_TYPE_ITEM, 1511 item, 1512 "VxLAN-GPE protocol not supported"); 1513 /* Remove unwanted bits from values. */ 1514 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id; 1515 } 1516 /* 1517 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this 1518 * layer is defined in the Verbs specification it is interpreted as 1519 * wildcard and all packets will match this rule, if it follows a full 1520 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers 1521 * before will also match this rule. To avoid such situation, VNI 0 1522 * is currently refused. 1523 */ 1524 if (!vxlan_gpe.val.tunnel_id) 1525 return rte_flow_error_set(error, EINVAL, 1526 RTE_FLOW_ERROR_TYPE_ITEM, 1527 item, 1528 "VXLAN-GPE vni cannot be 0"); 1529 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER)) 1530 return rte_flow_error_set(error, EINVAL, 1531 RTE_FLOW_ERROR_TYPE_ITEM, 1532 item, 1533 "VXLAN-GPE tunnel must be fully" 1534 " defined"); 1535 if (size <= flow_size) { 1536 mlx5_flow_spec_verbs_add(flow, &vxlan_gpe, size); 1537 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1538 } 1539 flow->layers |= MLX5_FLOW_LAYER_VXLAN_GPE; 1540 return size; 1541 } 1542 1543 /** 1544 * Update the protocol in Verbs IPv4/IPv6 spec. 1545 * 1546 * @param[in, out] attr 1547 * Pointer to Verbs attributes structure. 1548 * @param[in] search 1549 * Specification type to search in order to update the IP protocol. 1550 * @param[in] protocol 1551 * Protocol value to set if none is present in the specification. 1552 */ 1553 static void 1554 mlx5_flow_item_gre_ip_protocol_update(struct ibv_flow_attr *attr, 1555 enum ibv_flow_spec_type search, 1556 uint8_t protocol) 1557 { 1558 unsigned int i; 1559 struct ibv_spec_header *hdr = (struct ibv_spec_header *) 1560 ((uint8_t *)attr + sizeof(struct ibv_flow_attr)); 1561 1562 if (!attr) 1563 return; 1564 for (i = 0; i != attr->num_of_specs; ++i) { 1565 if (hdr->type == search) { 1566 union { 1567 struct ibv_flow_spec_ipv4_ext *ipv4; 1568 struct ibv_flow_spec_ipv6 *ipv6; 1569 } ip; 1570 1571 switch (search) { 1572 case IBV_FLOW_SPEC_IPV4_EXT: 1573 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr; 1574 if (!ip.ipv4->val.proto) { 1575 ip.ipv4->val.proto = protocol; 1576 ip.ipv4->mask.proto = 0xff; 1577 } 1578 break; 1579 case IBV_FLOW_SPEC_IPV6: 1580 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr; 1581 if (!ip.ipv6->val.next_hdr) { 1582 ip.ipv6->val.next_hdr = protocol; 1583 ip.ipv6->mask.next_hdr = 0xff; 1584 } 1585 break; 1586 default: 1587 break; 1588 } 1589 break; 1590 } 1591 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size); 1592 } 1593 } 1594 1595 /** 1596 * Convert the @p item into a Verbs specification after ensuring the NIC 1597 * will understand and process it correctly. 1598 * It will also update the previous L3 layer with the protocol value matching 1599 * the GRE. 1600 * If the necessary size for the conversion is greater than the @p flow_size, 1601 * nothing is written in @p flow, the validation is still performed. 1602 * 1603 * @param dev 1604 * Pointer to Ethernet device. 1605 * @param[in] item 1606 * Item specification. 1607 * @param[in, out] flow 1608 * Pointer to flow structure. 1609 * @param[in] flow_size 1610 * Size in bytes of the available space in @p flow, if too small, nothing is 1611 * written. 1612 * @param[out] error 1613 * Pointer to error structure. 1614 * 1615 * @return 1616 * On success the number of bytes consumed/necessary, if the returned value 1617 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1618 * otherwise another call with this returned memory size should be done. 1619 * On error, a negative errno value is returned and rte_errno is set. 1620 */ 1621 static int 1622 mlx5_flow_item_gre(const struct rte_flow_item *item, 1623 struct rte_flow *flow, const size_t flow_size, 1624 struct rte_flow_error *error) 1625 { 1626 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 1627 const struct rte_flow_item_gre *spec = item->spec; 1628 const struct rte_flow_item_gre *mask = item->mask; 1629 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 1630 unsigned int size = sizeof(struct ibv_flow_spec_gre); 1631 struct ibv_flow_spec_gre tunnel = { 1632 .type = IBV_FLOW_SPEC_GRE, 1633 .size = size, 1634 }; 1635 #else 1636 unsigned int size = sizeof(struct ibv_flow_spec_tunnel); 1637 struct ibv_flow_spec_tunnel tunnel = { 1638 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL, 1639 .size = size, 1640 }; 1641 #endif 1642 int ret; 1643 1644 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_GRE) 1645 return rte_flow_error_set(error, ENOTSUP, 1646 RTE_FLOW_ERROR_TYPE_ITEM, 1647 item, 1648 "protocol filtering not compatible" 1649 " with this GRE layer"); 1650 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL) 1651 return rte_flow_error_set(error, ENOTSUP, 1652 RTE_FLOW_ERROR_TYPE_ITEM, 1653 item, 1654 "a tunnel is already present"); 1655 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3)) 1656 return rte_flow_error_set(error, ENOTSUP, 1657 RTE_FLOW_ERROR_TYPE_ITEM, 1658 item, 1659 "L3 Layer is missing"); 1660 if (!mask) 1661 mask = &rte_flow_item_gre_mask; 1662 ret = mlx5_flow_item_acceptable 1663 (item, (const uint8_t *)mask, 1664 (const uint8_t *)&rte_flow_item_gre_mask, 1665 sizeof(struct rte_flow_item_gre), error); 1666 if (ret < 0) 1667 return ret; 1668 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 1669 if (spec) { 1670 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver; 1671 tunnel.val.protocol = spec->protocol; 1672 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver; 1673 tunnel.mask.protocol = mask->protocol; 1674 /* Remove unwanted bits from values. */ 1675 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver; 1676 tunnel.val.protocol &= tunnel.mask.protocol; 1677 tunnel.val.key &= tunnel.mask.key; 1678 } 1679 #else 1680 if (spec && (spec->protocol & mask->protocol)) 1681 return rte_flow_error_set(error, ENOTSUP, 1682 RTE_FLOW_ERROR_TYPE_ITEM, 1683 item, 1684 "without MPLS support the" 1685 " specification cannot be used for" 1686 " filtering"); 1687 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */ 1688 if (size <= flow_size) { 1689 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3_IPV4) 1690 mlx5_flow_item_gre_ip_protocol_update 1691 (verbs->attr, IBV_FLOW_SPEC_IPV4_EXT, 1692 MLX5_IP_PROTOCOL_GRE); 1693 else 1694 mlx5_flow_item_gre_ip_protocol_update 1695 (verbs->attr, IBV_FLOW_SPEC_IPV6, 1696 MLX5_IP_PROTOCOL_GRE); 1697 mlx5_flow_spec_verbs_add(flow, &tunnel, size); 1698 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1699 } 1700 flow->layers |= MLX5_FLOW_LAYER_GRE; 1701 return size; 1702 } 1703 1704 /** 1705 * Convert the @p item into a Verbs specification after ensuring the NIC 1706 * will understand and process it correctly. 1707 * If the necessary size for the conversion is greater than the @p flow_size, 1708 * nothing is written in @p flow, the validation is still performed. 1709 * 1710 * @param[in] item 1711 * Item specification. 1712 * @param[in, out] flow 1713 * Pointer to flow structure. 1714 * @param[in] flow_size 1715 * Size in bytes of the available space in @p flow, if too small, nothing is 1716 * written. 1717 * @param[out] error 1718 * Pointer to error structure. 1719 * 1720 * @return 1721 * On success the number of bytes consumed/necessary, if the returned value 1722 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1723 * otherwise another call with this returned memory size should be done. 1724 * On error, a negative errno value is returned and rte_errno is set. 1725 */ 1726 static int 1727 mlx5_flow_item_mpls(const struct rte_flow_item *item __rte_unused, 1728 struct rte_flow *flow __rte_unused, 1729 const size_t flow_size __rte_unused, 1730 struct rte_flow_error *error) 1731 { 1732 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 1733 const struct rte_flow_item_mpls *spec = item->spec; 1734 const struct rte_flow_item_mpls *mask = item->mask; 1735 unsigned int size = sizeof(struct ibv_flow_spec_mpls); 1736 struct ibv_flow_spec_mpls mpls = { 1737 .type = IBV_FLOW_SPEC_MPLS, 1738 .size = size, 1739 }; 1740 int ret; 1741 1742 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_MPLS) 1743 return rte_flow_error_set(error, ENOTSUP, 1744 RTE_FLOW_ERROR_TYPE_ITEM, 1745 item, 1746 "protocol filtering not compatible" 1747 " with MPLS layer"); 1748 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL) 1749 return rte_flow_error_set(error, ENOTSUP, 1750 RTE_FLOW_ERROR_TYPE_ITEM, 1751 item, 1752 "a tunnel is already" 1753 " present"); 1754 if (!mask) 1755 mask = &rte_flow_item_mpls_mask; 1756 ret = mlx5_flow_item_acceptable 1757 (item, (const uint8_t *)mask, 1758 (const uint8_t *)&rte_flow_item_mpls_mask, 1759 sizeof(struct rte_flow_item_mpls), error); 1760 if (ret < 0) 1761 return ret; 1762 if (spec) { 1763 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label)); 1764 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label)); 1765 /* Remove unwanted bits from values. */ 1766 mpls.val.label &= mpls.mask.label; 1767 } 1768 if (size <= flow_size) { 1769 mlx5_flow_spec_verbs_add(flow, &mpls, size); 1770 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1771 } 1772 flow->layers |= MLX5_FLOW_LAYER_MPLS; 1773 return size; 1774 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */ 1775 return rte_flow_error_set(error, ENOTSUP, 1776 RTE_FLOW_ERROR_TYPE_ITEM, 1777 item, 1778 "MPLS is not supported by Verbs, please" 1779 " update."); 1780 } 1781 1782 /** 1783 * Convert the @p pattern into a Verbs specifications after ensuring the NIC 1784 * will understand and process it correctly. 1785 * The conversion is performed item per item, each of them is written into 1786 * the @p flow if its size is lesser or equal to @p flow_size. 1787 * Validation and memory consumption computation are still performed until the 1788 * end of @p pattern, unless an error is encountered. 1789 * 1790 * @param[in] pattern 1791 * Flow pattern. 1792 * @param[in, out] flow 1793 * Pointer to the rte_flow structure. 1794 * @param[in] flow_size 1795 * Size in bytes of the available space in @p flow, if too small some 1796 * garbage may be present. 1797 * @param[out] error 1798 * Pointer to error structure. 1799 * 1800 * @return 1801 * On success the number of bytes consumed/necessary, if the returned value 1802 * is lesser or equal to @p flow_size, the @pattern has fully been 1803 * converted, otherwise another call with this returned memory size should 1804 * be done. 1805 * On error, a negative errno value is returned and rte_errno is set. 1806 */ 1807 static int 1808 mlx5_flow_items(struct rte_eth_dev *dev, 1809 const struct rte_flow_item pattern[], 1810 struct rte_flow *flow, const size_t flow_size, 1811 struct rte_flow_error *error) 1812 { 1813 int remain = flow_size; 1814 size_t size = 0; 1815 1816 for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) { 1817 int ret = 0; 1818 1819 switch (pattern->type) { 1820 case RTE_FLOW_ITEM_TYPE_VOID: 1821 break; 1822 case RTE_FLOW_ITEM_TYPE_ETH: 1823 ret = mlx5_flow_item_eth(pattern, flow, remain, error); 1824 break; 1825 case RTE_FLOW_ITEM_TYPE_VLAN: 1826 ret = mlx5_flow_item_vlan(pattern, flow, remain, error); 1827 break; 1828 case RTE_FLOW_ITEM_TYPE_IPV4: 1829 ret = mlx5_flow_item_ipv4(pattern, flow, remain, error); 1830 break; 1831 case RTE_FLOW_ITEM_TYPE_IPV6: 1832 ret = mlx5_flow_item_ipv6(pattern, flow, remain, error); 1833 break; 1834 case RTE_FLOW_ITEM_TYPE_UDP: 1835 ret = mlx5_flow_item_udp(pattern, flow, remain, error); 1836 break; 1837 case RTE_FLOW_ITEM_TYPE_TCP: 1838 ret = mlx5_flow_item_tcp(pattern, flow, remain, error); 1839 break; 1840 case RTE_FLOW_ITEM_TYPE_VXLAN: 1841 ret = mlx5_flow_item_vxlan(pattern, flow, remain, 1842 error); 1843 break; 1844 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: 1845 ret = mlx5_flow_item_vxlan_gpe(dev, pattern, flow, 1846 remain, error); 1847 break; 1848 case RTE_FLOW_ITEM_TYPE_GRE: 1849 ret = mlx5_flow_item_gre(pattern, flow, remain, error); 1850 break; 1851 case RTE_FLOW_ITEM_TYPE_MPLS: 1852 ret = mlx5_flow_item_mpls(pattern, flow, remain, error); 1853 break; 1854 default: 1855 return rte_flow_error_set(error, ENOTSUP, 1856 RTE_FLOW_ERROR_TYPE_ITEM, 1857 pattern, 1858 "item not supported"); 1859 } 1860 if (ret < 0) 1861 return ret; 1862 if (remain > ret) 1863 remain -= ret; 1864 else 1865 remain = 0; 1866 size += ret; 1867 } 1868 if (!flow->layers) { 1869 const struct rte_flow_item item = { 1870 .type = RTE_FLOW_ITEM_TYPE_ETH, 1871 }; 1872 1873 return mlx5_flow_item_eth(&item, flow, flow_size, error); 1874 } 1875 return size; 1876 } 1877 1878 /** 1879 * Convert the @p action into a Verbs specification after ensuring the NIC 1880 * will understand and process it correctly. 1881 * If the necessary size for the conversion is greater than the @p flow_size, 1882 * nothing is written in @p flow, the validation is still performed. 1883 * 1884 * @param[in] action 1885 * Action configuration. 1886 * @param[in, out] flow 1887 * Pointer to flow structure. 1888 * @param[in] flow_size 1889 * Size in bytes of the available space in @p flow, if too small, nothing is 1890 * written. 1891 * @param[out] error 1892 * Pointer to error structure. 1893 * 1894 * @return 1895 * On success the number of bytes consumed/necessary, if the returned value 1896 * is lesser or equal to @p flow_size, the @p action has fully been 1897 * converted, otherwise another call with this returned memory size should 1898 * be done. 1899 * On error, a negative errno value is returned and rte_errno is set. 1900 */ 1901 static int 1902 mlx5_flow_action_drop(const struct rte_flow_action *action, 1903 struct rte_flow *flow, const size_t flow_size, 1904 struct rte_flow_error *error) 1905 { 1906 unsigned int size = sizeof(struct ibv_flow_spec_action_drop); 1907 struct ibv_flow_spec_action_drop drop = { 1908 .type = IBV_FLOW_SPEC_ACTION_DROP, 1909 .size = size, 1910 }; 1911 1912 if (flow->fate) 1913 return rte_flow_error_set(error, ENOTSUP, 1914 RTE_FLOW_ERROR_TYPE_ACTION, 1915 action, 1916 "multiple fate actions are not" 1917 " supported"); 1918 if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) 1919 return rte_flow_error_set(error, ENOTSUP, 1920 RTE_FLOW_ERROR_TYPE_ACTION, 1921 action, 1922 "drop is not compatible with" 1923 " flag/mark action"); 1924 if (size < flow_size) 1925 mlx5_flow_spec_verbs_add(flow, &drop, size); 1926 flow->fate |= MLX5_FLOW_FATE_DROP; 1927 return size; 1928 } 1929 1930 /** 1931 * Convert the @p action into @p flow after ensuring the NIC will understand 1932 * and process it correctly. 1933 * 1934 * @param[in] dev 1935 * Pointer to Ethernet device structure. 1936 * @param[in] action 1937 * Action configuration. 1938 * @param[in, out] flow 1939 * Pointer to flow structure. 1940 * @param[out] error 1941 * Pointer to error structure. 1942 * 1943 * @return 1944 * 0 on success, a negative errno value otherwise and rte_errno is set. 1945 */ 1946 static int 1947 mlx5_flow_action_queue(struct rte_eth_dev *dev, 1948 const struct rte_flow_action *action, 1949 struct rte_flow *flow, 1950 struct rte_flow_error *error) 1951 { 1952 struct priv *priv = dev->data->dev_private; 1953 const struct rte_flow_action_queue *queue = action->conf; 1954 1955 if (flow->fate) 1956 return rte_flow_error_set(error, ENOTSUP, 1957 RTE_FLOW_ERROR_TYPE_ACTION, 1958 action, 1959 "multiple fate actions are not" 1960 " supported"); 1961 if (queue->index >= priv->rxqs_n) 1962 return rte_flow_error_set(error, EINVAL, 1963 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1964 &queue->index, 1965 "queue index out of range"); 1966 if (!(*priv->rxqs)[queue->index]) 1967 return rte_flow_error_set(error, EINVAL, 1968 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1969 &queue->index, 1970 "queue is not configured"); 1971 if (flow->queue) 1972 (*flow->queue)[0] = queue->index; 1973 flow->rss.queue_num = 1; 1974 flow->fate |= MLX5_FLOW_FATE_QUEUE; 1975 return 0; 1976 } 1977 1978 /** 1979 * Ensure the @p action will be understood and used correctly by the NIC. 1980 * 1981 * @param dev 1982 * Pointer to Ethernet device structure. 1983 * @param action[in] 1984 * Pointer to flow actions array. 1985 * @param flow[in, out] 1986 * Pointer to the rte_flow structure. 1987 * @param error[in, out] 1988 * Pointer to error structure. 1989 * 1990 * @return 1991 * On success @p flow->queue array and @p flow->rss are filled and valid. 1992 * On error, a negative errno value is returned and rte_errno is set. 1993 */ 1994 static int 1995 mlx5_flow_action_rss(struct rte_eth_dev *dev, 1996 const struct rte_flow_action *action, 1997 struct rte_flow *flow, 1998 struct rte_flow_error *error) 1999 { 2000 struct priv *priv = dev->data->dev_private; 2001 const struct rte_flow_action_rss *rss = action->conf; 2002 unsigned int i; 2003 2004 if (flow->fate) 2005 return rte_flow_error_set(error, ENOTSUP, 2006 RTE_FLOW_ERROR_TYPE_ACTION, 2007 action, 2008 "multiple fate actions are not" 2009 " supported"); 2010 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT && 2011 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) 2012 return rte_flow_error_set(error, ENOTSUP, 2013 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2014 &rss->func, 2015 "RSS hash function not supported"); 2016 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 2017 if (rss->level > 2) 2018 #else 2019 if (rss->level > 1) 2020 #endif 2021 return rte_flow_error_set(error, ENOTSUP, 2022 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2023 &rss->level, 2024 "tunnel RSS is not supported"); 2025 if (rss->key_len < MLX5_RSS_HASH_KEY_LEN) 2026 return rte_flow_error_set(error, ENOTSUP, 2027 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2028 &rss->key_len, 2029 "RSS hash key too small"); 2030 if (rss->key_len > MLX5_RSS_HASH_KEY_LEN) 2031 return rte_flow_error_set(error, ENOTSUP, 2032 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2033 &rss->key_len, 2034 "RSS hash key too large"); 2035 if (rss->queue_num > priv->config.ind_table_max_size) 2036 return rte_flow_error_set(error, ENOTSUP, 2037 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2038 &rss->queue_num, 2039 "number of queues too large"); 2040 if (rss->types & MLX5_RSS_HF_MASK) 2041 return rte_flow_error_set(error, ENOTSUP, 2042 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2043 &rss->types, 2044 "some RSS protocols are not" 2045 " supported"); 2046 for (i = 0; i != rss->queue_num; ++i) { 2047 if (!(*priv->rxqs)[rss->queue[i]]) 2048 return rte_flow_error_set 2049 (error, EINVAL, 2050 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2051 &rss->queue[i], 2052 "queue is not configured"); 2053 } 2054 if (flow->queue) 2055 memcpy((*flow->queue), rss->queue, 2056 rss->queue_num * sizeof(uint16_t)); 2057 flow->rss.queue_num = rss->queue_num; 2058 memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN); 2059 flow->rss.types = rss->types; 2060 flow->rss.level = rss->level; 2061 flow->fate |= MLX5_FLOW_FATE_RSS; 2062 return 0; 2063 } 2064 2065 /** 2066 * Convert the @p action into a Verbs specification after ensuring the NIC 2067 * will understand and process it correctly. 2068 * If the necessary size for the conversion is greater than the @p flow_size, 2069 * nothing is written in @p flow, the validation is still performed. 2070 * 2071 * @param[in] action 2072 * Action configuration. 2073 * @param[in, out] flow 2074 * Pointer to flow structure. 2075 * @param[in] flow_size 2076 * Size in bytes of the available space in @p flow, if too small, nothing is 2077 * written. 2078 * @param[out] error 2079 * Pointer to error structure. 2080 * 2081 * @return 2082 * On success the number of bytes consumed/necessary, if the returned value 2083 * is lesser or equal to @p flow_size, the @p action has fully been 2084 * converted, otherwise another call with this returned memory size should 2085 * be done. 2086 * On error, a negative errno value is returned and rte_errno is set. 2087 */ 2088 static int 2089 mlx5_flow_action_flag(const struct rte_flow_action *action, 2090 struct rte_flow *flow, const size_t flow_size, 2091 struct rte_flow_error *error) 2092 { 2093 unsigned int size = sizeof(struct ibv_flow_spec_action_tag); 2094 struct ibv_flow_spec_action_tag tag = { 2095 .type = IBV_FLOW_SPEC_ACTION_TAG, 2096 .size = size, 2097 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT), 2098 }; 2099 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 2100 2101 if (flow->modifier & MLX5_FLOW_MOD_FLAG) 2102 return rte_flow_error_set(error, ENOTSUP, 2103 RTE_FLOW_ERROR_TYPE_ACTION, 2104 action, 2105 "flag action already present"); 2106 if (flow->fate & MLX5_FLOW_FATE_DROP) 2107 return rte_flow_error_set(error, ENOTSUP, 2108 RTE_FLOW_ERROR_TYPE_ACTION, 2109 action, 2110 "flag is not compatible with drop" 2111 " action"); 2112 if (flow->modifier & MLX5_FLOW_MOD_MARK) 2113 size = 0; 2114 else if (size <= flow_size && verbs) 2115 mlx5_flow_spec_verbs_add(flow, &tag, size); 2116 flow->modifier |= MLX5_FLOW_MOD_FLAG; 2117 return size; 2118 } 2119 2120 /** 2121 * Update verbs specification to modify the flag to mark. 2122 * 2123 * @param[in, out] verbs 2124 * Pointer to the mlx5_flow_verbs structure. 2125 * @param[in] mark_id 2126 * Mark identifier to replace the flag. 2127 */ 2128 static void 2129 mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id) 2130 { 2131 struct ibv_spec_header *hdr; 2132 int i; 2133 2134 if (!verbs) 2135 return; 2136 /* Update Verbs specification. */ 2137 hdr = (struct ibv_spec_header *)verbs->specs; 2138 if (!hdr) 2139 return; 2140 for (i = 0; i != verbs->attr->num_of_specs; ++i) { 2141 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) { 2142 struct ibv_flow_spec_action_tag *t = 2143 (struct ibv_flow_spec_action_tag *)hdr; 2144 2145 t->tag_id = mlx5_flow_mark_set(mark_id); 2146 } 2147 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size); 2148 } 2149 } 2150 2151 /** 2152 * Convert the @p action into @p flow (or by updating the already present 2153 * Flag Verbs specification) after ensuring the NIC will understand and 2154 * process it correctly. 2155 * If the necessary size for the conversion is greater than the @p flow_size, 2156 * nothing is written in @p flow, the validation is still performed. 2157 * 2158 * @param[in] action 2159 * Action configuration. 2160 * @param[in, out] flow 2161 * Pointer to flow structure. 2162 * @param[in] flow_size 2163 * Size in bytes of the available space in @p flow, if too small, nothing is 2164 * written. 2165 * @param[out] error 2166 * Pointer to error structure. 2167 * 2168 * @return 2169 * On success the number of bytes consumed/necessary, if the returned value 2170 * is lesser or equal to @p flow_size, the @p action has fully been 2171 * converted, otherwise another call with this returned memory size should 2172 * be done. 2173 * On error, a negative errno value is returned and rte_errno is set. 2174 */ 2175 static int 2176 mlx5_flow_action_mark(const struct rte_flow_action *action, 2177 struct rte_flow *flow, const size_t flow_size, 2178 struct rte_flow_error *error) 2179 { 2180 const struct rte_flow_action_mark *mark = action->conf; 2181 unsigned int size = sizeof(struct ibv_flow_spec_action_tag); 2182 struct ibv_flow_spec_action_tag tag = { 2183 .type = IBV_FLOW_SPEC_ACTION_TAG, 2184 .size = size, 2185 }; 2186 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 2187 2188 if (!mark) 2189 return rte_flow_error_set(error, EINVAL, 2190 RTE_FLOW_ERROR_TYPE_ACTION, 2191 action, 2192 "configuration cannot be null"); 2193 if (mark->id >= MLX5_FLOW_MARK_MAX) 2194 return rte_flow_error_set(error, EINVAL, 2195 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2196 &mark->id, 2197 "mark id must in 0 <= id < " 2198 RTE_STR(MLX5_FLOW_MARK_MAX)); 2199 if (flow->modifier & MLX5_FLOW_MOD_MARK) 2200 return rte_flow_error_set(error, ENOTSUP, 2201 RTE_FLOW_ERROR_TYPE_ACTION, 2202 action, 2203 "mark action already present"); 2204 if (flow->fate & MLX5_FLOW_FATE_DROP) 2205 return rte_flow_error_set(error, ENOTSUP, 2206 RTE_FLOW_ERROR_TYPE_ACTION, 2207 action, 2208 "mark is not compatible with drop" 2209 " action"); 2210 if (flow->modifier & MLX5_FLOW_MOD_FLAG) { 2211 mlx5_flow_verbs_mark_update(verbs, mark->id); 2212 size = 0; 2213 } else if (size <= flow_size) { 2214 tag.tag_id = mlx5_flow_mark_set(mark->id); 2215 mlx5_flow_spec_verbs_add(flow, &tag, size); 2216 } 2217 flow->modifier |= MLX5_FLOW_MOD_MARK; 2218 return size; 2219 } 2220 2221 /** 2222 * Convert the @p action into a Verbs specification after ensuring the NIC 2223 * will understand and process it correctly. 2224 * If the necessary size for the conversion is greater than the @p flow_size, 2225 * nothing is written in @p flow, the validation is still performed. 2226 * 2227 * @param action[in] 2228 * Action configuration. 2229 * @param flow[in, out] 2230 * Pointer to flow structure. 2231 * @param flow_size[in] 2232 * Size in bytes of the available space in @p flow, if too small, nothing is 2233 * written. 2234 * @param error[int, out] 2235 * Pointer to error structure. 2236 * 2237 * @return 2238 * On success the number of bytes consumed/necessary, if the returned value 2239 * is lesser or equal to @p flow_size, the @p action has fully been 2240 * converted, otherwise another call with this returned memory size should 2241 * be done. 2242 * On error, a negative errno value is returned and rte_errno is set. 2243 */ 2244 static int 2245 mlx5_flow_action_count(struct rte_eth_dev *dev, 2246 const struct rte_flow_action *action, 2247 struct rte_flow *flow, 2248 const size_t flow_size __rte_unused, 2249 struct rte_flow_error *error) 2250 { 2251 const struct rte_flow_action_count *count = action->conf; 2252 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 2253 unsigned int size = sizeof(struct ibv_flow_spec_counter_action); 2254 struct ibv_flow_spec_counter_action counter = { 2255 .type = IBV_FLOW_SPEC_ACTION_COUNT, 2256 .size = size, 2257 }; 2258 #endif 2259 2260 if (!flow->counter) { 2261 flow->counter = mlx5_flow_counter_new(dev, count->shared, 2262 count->id); 2263 if (!flow->counter) 2264 return rte_flow_error_set(error, ENOTSUP, 2265 RTE_FLOW_ERROR_TYPE_ACTION, 2266 action, 2267 "cannot get counter" 2268 " context."); 2269 } 2270 if (!((struct priv *)dev->data->dev_private)->config.flow_counter_en) 2271 return rte_flow_error_set(error, ENOTSUP, 2272 RTE_FLOW_ERROR_TYPE_ACTION, 2273 action, 2274 "flow counters are not supported."); 2275 flow->modifier |= MLX5_FLOW_MOD_COUNT; 2276 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 2277 counter.counter_set_handle = flow->counter->cs->handle; 2278 if (size <= flow_size) 2279 mlx5_flow_spec_verbs_add(flow, &counter, size); 2280 return size; 2281 #endif 2282 return 0; 2283 } 2284 2285 /** 2286 * Convert the @p action into @p flow after ensuring the NIC will understand 2287 * and process it correctly. 2288 * The conversion is performed action per action, each of them is written into 2289 * the @p flow if its size is lesser or equal to @p flow_size. 2290 * Validation and memory consumption computation are still performed until the 2291 * end of @p action, unless an error is encountered. 2292 * 2293 * @param[in] dev 2294 * Pointer to Ethernet device structure. 2295 * @param[in] actions 2296 * Pointer to flow actions array. 2297 * @param[in, out] flow 2298 * Pointer to the rte_flow structure. 2299 * @param[in] flow_size 2300 * Size in bytes of the available space in @p flow, if too small some 2301 * garbage may be present. 2302 * @param[out] error 2303 * Pointer to error structure. 2304 * 2305 * @return 2306 * On success the number of bytes consumed/necessary, if the returned value 2307 * is lesser or equal to @p flow_size, the @p actions has fully been 2308 * converted, otherwise another call with this returned memory size should 2309 * be done. 2310 * On error, a negative errno value is returned and rte_errno is set. 2311 */ 2312 static int 2313 mlx5_flow_actions(struct rte_eth_dev *dev, 2314 const struct rte_flow_action actions[], 2315 struct rte_flow *flow, const size_t flow_size, 2316 struct rte_flow_error *error) 2317 { 2318 size_t size = 0; 2319 int remain = flow_size; 2320 int ret = 0; 2321 2322 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2323 switch (actions->type) { 2324 case RTE_FLOW_ACTION_TYPE_VOID: 2325 break; 2326 case RTE_FLOW_ACTION_TYPE_FLAG: 2327 ret = mlx5_flow_action_flag(actions, flow, remain, 2328 error); 2329 break; 2330 case RTE_FLOW_ACTION_TYPE_MARK: 2331 ret = mlx5_flow_action_mark(actions, flow, remain, 2332 error); 2333 break; 2334 case RTE_FLOW_ACTION_TYPE_DROP: 2335 ret = mlx5_flow_action_drop(actions, flow, remain, 2336 error); 2337 break; 2338 case RTE_FLOW_ACTION_TYPE_QUEUE: 2339 ret = mlx5_flow_action_queue(dev, actions, flow, error); 2340 break; 2341 case RTE_FLOW_ACTION_TYPE_RSS: 2342 ret = mlx5_flow_action_rss(dev, actions, flow, error); 2343 break; 2344 case RTE_FLOW_ACTION_TYPE_COUNT: 2345 ret = mlx5_flow_action_count(dev, actions, flow, remain, 2346 error); 2347 break; 2348 default: 2349 return rte_flow_error_set(error, ENOTSUP, 2350 RTE_FLOW_ERROR_TYPE_ACTION, 2351 actions, 2352 "action not supported"); 2353 } 2354 if (ret < 0) 2355 return ret; 2356 if (remain > ret) 2357 remain -= ret; 2358 else 2359 remain = 0; 2360 size += ret; 2361 } 2362 if (!flow->fate) 2363 return rte_flow_error_set(error, ENOTSUP, 2364 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2365 NULL, 2366 "no fate action found"); 2367 return size; 2368 } 2369 2370 /** 2371 * Validate flow rule and fill flow structure accordingly. 2372 * 2373 * @param dev 2374 * Pointer to Ethernet device. 2375 * @param[out] flow 2376 * Pointer to flow structure. 2377 * @param flow_size 2378 * Size of allocated space for @p flow. 2379 * @param[in] attr 2380 * Flow rule attributes. 2381 * @param[in] pattern 2382 * Pattern specification (list terminated by the END pattern item). 2383 * @param[in] actions 2384 * Associated actions (list terminated by the END action). 2385 * @param[out] error 2386 * Perform verbose error reporting if not NULL. 2387 * 2388 * @return 2389 * A positive value representing the size of the flow object in bytes 2390 * regardless of @p flow_size on success, a negative errno value otherwise 2391 * and rte_errno is set. 2392 */ 2393 static int 2394 mlx5_flow_merge_switch(struct rte_eth_dev *dev, 2395 struct rte_flow *flow, 2396 size_t flow_size, 2397 const struct rte_flow_attr *attr, 2398 const struct rte_flow_item pattern[], 2399 const struct rte_flow_action actions[], 2400 struct rte_flow_error *error) 2401 { 2402 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0); 2403 uint16_t port_id[!n + n]; 2404 struct mlx5_nl_flow_ptoi ptoi[!n + n + 1]; 2405 size_t off = RTE_ALIGN_CEIL(sizeof(*flow), alignof(max_align_t)); 2406 unsigned int i; 2407 unsigned int own = 0; 2408 int ret; 2409 2410 /* At least one port is needed when no switch domain is present. */ 2411 if (!n) { 2412 n = 1; 2413 port_id[0] = dev->data->port_id; 2414 } else { 2415 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n); 2416 } 2417 for (i = 0; i != n; ++i) { 2418 struct rte_eth_dev_info dev_info; 2419 2420 rte_eth_dev_info_get(port_id[i], &dev_info); 2421 if (port_id[i] == dev->data->port_id) 2422 own = i; 2423 ptoi[i].port_id = port_id[i]; 2424 ptoi[i].ifindex = dev_info.if_index; 2425 } 2426 /* Ensure first entry of ptoi[] is the current device. */ 2427 if (own) { 2428 ptoi[n] = ptoi[0]; 2429 ptoi[0] = ptoi[own]; 2430 ptoi[own] = ptoi[n]; 2431 } 2432 /* An entry with zero ifindex terminates ptoi[]. */ 2433 ptoi[n].port_id = 0; 2434 ptoi[n].ifindex = 0; 2435 if (flow_size < off) 2436 flow_size = 0; 2437 ret = mlx5_nl_flow_transpose((uint8_t *)flow + off, 2438 flow_size ? flow_size - off : 0, 2439 ptoi, attr, pattern, actions, error); 2440 if (ret < 0) 2441 return ret; 2442 if (flow_size) { 2443 *flow = (struct rte_flow){ 2444 .attributes = *attr, 2445 .nl_flow = (uint8_t *)flow + off, 2446 }; 2447 /* 2448 * Generate a reasonably unique handle based on the address 2449 * of the target buffer. 2450 * 2451 * This is straightforward on 32-bit systems where the flow 2452 * pointer can be used directly. Otherwise, its least 2453 * significant part is taken after shifting it by the 2454 * previous power of two of the pointed buffer size. 2455 */ 2456 if (sizeof(flow) <= 4) 2457 mlx5_nl_flow_brand(flow->nl_flow, (uintptr_t)flow); 2458 else 2459 mlx5_nl_flow_brand 2460 (flow->nl_flow, 2461 (uintptr_t)flow >> 2462 rte_log2_u32(rte_align32prevpow2(flow_size))); 2463 } 2464 return off + ret; 2465 } 2466 2467 /** 2468 * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC 2469 * after ensuring the NIC will understand and process it correctly. 2470 * The conversion is only performed item/action per item/action, each of 2471 * them is written into the @p flow if its size is lesser or equal to @p 2472 * flow_size. 2473 * Validation and memory consumption computation are still performed until the 2474 * end, unless an error is encountered. 2475 * 2476 * @param[in] dev 2477 * Pointer to Ethernet device. 2478 * @param[in, out] flow 2479 * Pointer to flow structure. 2480 * @param[in] flow_size 2481 * Size in bytes of the available space in @p flow, if too small some 2482 * garbage may be present. 2483 * @param[in] attributes 2484 * Flow rule attributes. 2485 * @param[in] pattern 2486 * Pattern specification (list terminated by the END pattern item). 2487 * @param[in] actions 2488 * Associated actions (list terminated by the END action). 2489 * @param[out] error 2490 * Perform verbose error reporting if not NULL. 2491 * 2492 * @return 2493 * On success the number of bytes consumed/necessary, if the returned value 2494 * is lesser or equal to @p flow_size, the flow has fully been converted and 2495 * can be applied, otherwise another call with this returned memory size 2496 * should be done. 2497 * On error, a negative errno value is returned and rte_errno is set. 2498 */ 2499 static int 2500 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow, 2501 const size_t flow_size, 2502 const struct rte_flow_attr *attributes, 2503 const struct rte_flow_item pattern[], 2504 const struct rte_flow_action actions[], 2505 struct rte_flow_error *error) 2506 { 2507 struct rte_flow local_flow = { .layers = 0, }; 2508 size_t size = sizeof(*flow); 2509 union { 2510 struct rte_flow_expand_rss buf; 2511 uint8_t buffer[2048]; 2512 } expand_buffer; 2513 struct rte_flow_expand_rss *buf = &expand_buffer.buf; 2514 struct mlx5_flow_verbs *original_verbs = NULL; 2515 size_t original_verbs_size = 0; 2516 uint32_t original_layers = 0; 2517 int expanded_pattern_idx = 0; 2518 int ret; 2519 uint32_t i; 2520 2521 if (attributes->transfer) 2522 return mlx5_flow_merge_switch(dev, flow, flow_size, 2523 attributes, pattern, 2524 actions, error); 2525 if (size > flow_size) 2526 flow = &local_flow; 2527 ret = mlx5_flow_attributes(dev, attributes, flow, error); 2528 if (ret < 0) 2529 return ret; 2530 ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error); 2531 if (ret < 0) 2532 return ret; 2533 if (local_flow.rss.types) { 2534 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer), 2535 pattern, local_flow.rss.types, 2536 mlx5_support_expansion, 2537 local_flow.rss.level < 2 ? 2538 MLX5_EXPANSION_ROOT : 2539 MLX5_EXPANSION_ROOT_OUTER); 2540 assert(ret > 0 && 2541 (unsigned int)ret < sizeof(expand_buffer.buffer)); 2542 } else { 2543 buf->entries = 1; 2544 buf->entry[0].pattern = (void *)(uintptr_t)pattern; 2545 } 2546 size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t), 2547 sizeof(void *)); 2548 if (size <= flow_size) 2549 flow->queue = (void *)(flow + 1); 2550 LIST_INIT(&flow->verbs); 2551 flow->layers = 0; 2552 flow->modifier = 0; 2553 flow->fate = 0; 2554 for (i = 0; i != buf->entries; ++i) { 2555 size_t off = size; 2556 size_t off2; 2557 2558 flow->layers = original_layers; 2559 size += sizeof(struct ibv_flow_attr) + 2560 sizeof(struct mlx5_flow_verbs); 2561 off2 = size; 2562 if (size < flow_size) { 2563 flow->cur_verbs = (void *)((uintptr_t)flow + off); 2564 flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1); 2565 flow->cur_verbs->specs = 2566 (void *)(flow->cur_verbs->attr + 1); 2567 } 2568 /* First iteration convert the pattern into Verbs. */ 2569 if (i == 0) { 2570 /* Actions don't need to be converted several time. */ 2571 ret = mlx5_flow_actions(dev, actions, flow, 2572 (size < flow_size) ? 2573 flow_size - size : 0, 2574 error); 2575 if (ret < 0) 2576 return ret; 2577 size += ret; 2578 } else { 2579 /* 2580 * Next iteration means the pattern has already been 2581 * converted and an expansion is necessary to match 2582 * the user RSS request. For that only the expanded 2583 * items will be converted, the common part with the 2584 * user pattern are just copied into the next buffer 2585 * zone. 2586 */ 2587 size += original_verbs_size; 2588 if (size < flow_size) { 2589 rte_memcpy(flow->cur_verbs->attr, 2590 original_verbs->attr, 2591 original_verbs_size + 2592 sizeof(struct ibv_flow_attr)); 2593 flow->cur_verbs->size = original_verbs_size; 2594 } 2595 } 2596 ret = mlx5_flow_items 2597 (dev, 2598 (const struct rte_flow_item *) 2599 &buf->entry[i].pattern[expanded_pattern_idx], 2600 flow, 2601 (size < flow_size) ? flow_size - size : 0, error); 2602 if (ret < 0) 2603 return ret; 2604 size += ret; 2605 if (size <= flow_size) { 2606 mlx5_flow_adjust_priority(dev, flow); 2607 LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next); 2608 } 2609 /* 2610 * Keep a pointer of the first verbs conversion and the layers 2611 * it has encountered. 2612 */ 2613 if (i == 0) { 2614 original_verbs = flow->cur_verbs; 2615 original_verbs_size = size - off2; 2616 original_layers = flow->layers; 2617 /* 2618 * move the index of the expanded pattern to the 2619 * first item not addressed yet. 2620 */ 2621 if (pattern->type == RTE_FLOW_ITEM_TYPE_END) { 2622 expanded_pattern_idx++; 2623 } else { 2624 const struct rte_flow_item *item = pattern; 2625 2626 for (item = pattern; 2627 item->type != RTE_FLOW_ITEM_TYPE_END; 2628 ++item) 2629 expanded_pattern_idx++; 2630 } 2631 } 2632 } 2633 /* Restore the origin layers in the flow. */ 2634 flow->layers = original_layers; 2635 return size; 2636 } 2637 2638 /** 2639 * Lookup and set the ptype in the data Rx part. A single Ptype can be used, 2640 * if several tunnel rules are used on this queue, the tunnel ptype will be 2641 * cleared. 2642 * 2643 * @param rxq_ctrl 2644 * Rx queue to update. 2645 */ 2646 static void 2647 mlx5_flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl) 2648 { 2649 unsigned int i; 2650 uint32_t tunnel_ptype = 0; 2651 2652 /* Look up for the ptype to use. */ 2653 for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) { 2654 if (!rxq_ctrl->flow_tunnels_n[i]) 2655 continue; 2656 if (!tunnel_ptype) { 2657 tunnel_ptype = tunnels_info[i].ptype; 2658 } else { 2659 tunnel_ptype = 0; 2660 break; 2661 } 2662 } 2663 rxq_ctrl->rxq.tunnel = tunnel_ptype; 2664 } 2665 2666 /** 2667 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the flow. 2668 * 2669 * @param[in] dev 2670 * Pointer to Ethernet device. 2671 * @param[in] flow 2672 * Pointer to flow structure. 2673 */ 2674 static void 2675 mlx5_flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow) 2676 { 2677 struct priv *priv = dev->data->dev_private; 2678 const int mark = !!(flow->modifier & 2679 (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)); 2680 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 2681 unsigned int i; 2682 2683 for (i = 0; i != flow->rss.queue_num; ++i) { 2684 int idx = (*flow->queue)[i]; 2685 struct mlx5_rxq_ctrl *rxq_ctrl = 2686 container_of((*priv->rxqs)[idx], 2687 struct mlx5_rxq_ctrl, rxq); 2688 2689 if (mark) { 2690 rxq_ctrl->rxq.mark = 1; 2691 rxq_ctrl->flow_mark_n++; 2692 } 2693 if (tunnel) { 2694 unsigned int j; 2695 2696 /* Increase the counter matching the flow. */ 2697 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 2698 if ((tunnels_info[j].tunnel & flow->layers) == 2699 tunnels_info[j].tunnel) { 2700 rxq_ctrl->flow_tunnels_n[j]++; 2701 break; 2702 } 2703 } 2704 mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl); 2705 } 2706 } 2707 } 2708 2709 /** 2710 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 2711 * @p flow if no other flow uses it with the same kind of request. 2712 * 2713 * @param dev 2714 * Pointer to Ethernet device. 2715 * @param[in] flow 2716 * Pointer to the flow. 2717 */ 2718 static void 2719 mlx5_flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow) 2720 { 2721 struct priv *priv = dev->data->dev_private; 2722 const int mark = !!(flow->modifier & 2723 (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)); 2724 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 2725 unsigned int i; 2726 2727 assert(dev->data->dev_started); 2728 for (i = 0; i != flow->rss.queue_num; ++i) { 2729 int idx = (*flow->queue)[i]; 2730 struct mlx5_rxq_ctrl *rxq_ctrl = 2731 container_of((*priv->rxqs)[idx], 2732 struct mlx5_rxq_ctrl, rxq); 2733 2734 if (mark) { 2735 rxq_ctrl->flow_mark_n--; 2736 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n; 2737 } 2738 if (tunnel) { 2739 unsigned int j; 2740 2741 /* Decrease the counter matching the flow. */ 2742 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 2743 if ((tunnels_info[j].tunnel & flow->layers) == 2744 tunnels_info[j].tunnel) { 2745 rxq_ctrl->flow_tunnels_n[j]--; 2746 break; 2747 } 2748 } 2749 mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl); 2750 } 2751 } 2752 } 2753 2754 /** 2755 * Clear the Mark/Flag and Tunnel ptype information in all Rx queues. 2756 * 2757 * @param dev 2758 * Pointer to Ethernet device. 2759 */ 2760 static void 2761 mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev) 2762 { 2763 struct priv *priv = dev->data->dev_private; 2764 unsigned int i; 2765 2766 for (i = 0; i != priv->rxqs_n; ++i) { 2767 struct mlx5_rxq_ctrl *rxq_ctrl; 2768 unsigned int j; 2769 2770 if (!(*priv->rxqs)[i]) 2771 continue; 2772 rxq_ctrl = container_of((*priv->rxqs)[i], 2773 struct mlx5_rxq_ctrl, rxq); 2774 rxq_ctrl->flow_mark_n = 0; 2775 rxq_ctrl->rxq.mark = 0; 2776 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) 2777 rxq_ctrl->flow_tunnels_n[j] = 0; 2778 rxq_ctrl->rxq.tunnel = 0; 2779 } 2780 } 2781 2782 /** 2783 * Validate a flow supported by the NIC. 2784 * 2785 * @see rte_flow_validate() 2786 * @see rte_flow_ops 2787 */ 2788 int 2789 mlx5_flow_validate(struct rte_eth_dev *dev, 2790 const struct rte_flow_attr *attr, 2791 const struct rte_flow_item items[], 2792 const struct rte_flow_action actions[], 2793 struct rte_flow_error *error) 2794 { 2795 int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error); 2796 2797 if (ret < 0) 2798 return ret; 2799 return 0; 2800 } 2801 2802 /** 2803 * Remove the flow. 2804 * 2805 * @param[in] dev 2806 * Pointer to Ethernet device. 2807 * @param[in, out] flow 2808 * Pointer to flow structure. 2809 */ 2810 static void 2811 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow) 2812 { 2813 struct priv *priv = dev->data->dev_private; 2814 struct mlx5_flow_verbs *verbs; 2815 2816 if (flow->nl_flow && priv->mnl_socket) 2817 mlx5_nl_flow_destroy(priv->mnl_socket, flow->nl_flow, NULL); 2818 LIST_FOREACH(verbs, &flow->verbs, next) { 2819 if (verbs->flow) { 2820 claim_zero(mlx5_glue->destroy_flow(verbs->flow)); 2821 verbs->flow = NULL; 2822 } 2823 if (verbs->hrxq) { 2824 if (flow->fate & MLX5_FLOW_FATE_DROP) 2825 mlx5_hrxq_drop_release(dev); 2826 else 2827 mlx5_hrxq_release(dev, verbs->hrxq); 2828 verbs->hrxq = NULL; 2829 } 2830 } 2831 if (flow->counter) { 2832 mlx5_flow_counter_release(flow->counter); 2833 flow->counter = NULL; 2834 } 2835 } 2836 2837 /** 2838 * Apply the flow. 2839 * 2840 * @param[in] dev 2841 * Pointer to Ethernet device structure. 2842 * @param[in, out] flow 2843 * Pointer to flow structure. 2844 * @param[out] error 2845 * Pointer to error structure. 2846 * 2847 * @return 2848 * 0 on success, a negative errno value otherwise and rte_errno is set. 2849 */ 2850 static int 2851 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow, 2852 struct rte_flow_error *error) 2853 { 2854 struct priv *priv = dev->data->dev_private; 2855 struct mlx5_flow_verbs *verbs; 2856 int err; 2857 2858 LIST_FOREACH(verbs, &flow->verbs, next) { 2859 if (flow->fate & MLX5_FLOW_FATE_DROP) { 2860 verbs->hrxq = mlx5_hrxq_drop_new(dev); 2861 if (!verbs->hrxq) { 2862 rte_flow_error_set 2863 (error, errno, 2864 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2865 NULL, 2866 "cannot get drop hash queue"); 2867 goto error; 2868 } 2869 } else { 2870 struct mlx5_hrxq *hrxq; 2871 2872 hrxq = mlx5_hrxq_get(dev, flow->key, 2873 MLX5_RSS_HASH_KEY_LEN, 2874 verbs->hash_fields, 2875 (*flow->queue), 2876 flow->rss.queue_num); 2877 if (!hrxq) 2878 hrxq = mlx5_hrxq_new(dev, flow->key, 2879 MLX5_RSS_HASH_KEY_LEN, 2880 verbs->hash_fields, 2881 (*flow->queue), 2882 flow->rss.queue_num); 2883 if (!hrxq) { 2884 rte_flow_error_set 2885 (error, rte_errno, 2886 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2887 NULL, 2888 "cannot get hash queue"); 2889 goto error; 2890 } 2891 verbs->hrxq = hrxq; 2892 } 2893 verbs->flow = 2894 mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr); 2895 if (!verbs->flow) { 2896 rte_flow_error_set(error, errno, 2897 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2898 NULL, 2899 "hardware refuses to create flow"); 2900 goto error; 2901 } 2902 } 2903 if (flow->nl_flow && 2904 priv->mnl_socket && 2905 mlx5_nl_flow_create(priv->mnl_socket, flow->nl_flow, error)) 2906 goto error; 2907 return 0; 2908 error: 2909 err = rte_errno; /* Save rte_errno before cleanup. */ 2910 LIST_FOREACH(verbs, &flow->verbs, next) { 2911 if (verbs->hrxq) { 2912 if (flow->fate & MLX5_FLOW_FATE_DROP) 2913 mlx5_hrxq_drop_release(dev); 2914 else 2915 mlx5_hrxq_release(dev, verbs->hrxq); 2916 verbs->hrxq = NULL; 2917 } 2918 } 2919 rte_errno = err; /* Restore rte_errno. */ 2920 return -rte_errno; 2921 } 2922 2923 /** 2924 * Create a flow and add it to @p list. 2925 * 2926 * @param dev 2927 * Pointer to Ethernet device. 2928 * @param list 2929 * Pointer to a TAILQ flow list. 2930 * @param[in] attr 2931 * Flow rule attributes. 2932 * @param[in] items 2933 * Pattern specification (list terminated by the END pattern item). 2934 * @param[in] actions 2935 * Associated actions (list terminated by the END action). 2936 * @param[out] error 2937 * Perform verbose error reporting if not NULL. 2938 * 2939 * @return 2940 * A flow on success, NULL otherwise and rte_errno is set. 2941 */ 2942 static struct rte_flow * 2943 mlx5_flow_list_create(struct rte_eth_dev *dev, 2944 struct mlx5_flows *list, 2945 const struct rte_flow_attr *attr, 2946 const struct rte_flow_item items[], 2947 const struct rte_flow_action actions[], 2948 struct rte_flow_error *error) 2949 { 2950 struct rte_flow *flow = NULL; 2951 size_t size = 0; 2952 int ret; 2953 2954 ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error); 2955 if (ret < 0) 2956 return NULL; 2957 size = ret; 2958 flow = rte_calloc(__func__, 1, size, 0); 2959 if (!flow) { 2960 rte_flow_error_set(error, ENOMEM, 2961 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2962 NULL, 2963 "not enough memory to create flow"); 2964 return NULL; 2965 } 2966 ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error); 2967 if (ret < 0) { 2968 rte_free(flow); 2969 return NULL; 2970 } 2971 assert((size_t)ret == size); 2972 if (dev->data->dev_started) { 2973 ret = mlx5_flow_apply(dev, flow, error); 2974 if (ret < 0) { 2975 ret = rte_errno; /* Save rte_errno before cleanup. */ 2976 if (flow) { 2977 mlx5_flow_remove(dev, flow); 2978 rte_free(flow); 2979 } 2980 rte_errno = ret; /* Restore rte_errno. */ 2981 return NULL; 2982 } 2983 } 2984 TAILQ_INSERT_TAIL(list, flow, next); 2985 mlx5_flow_rxq_flags_set(dev, flow); 2986 return flow; 2987 } 2988 2989 /** 2990 * Create a flow. 2991 * 2992 * @see rte_flow_create() 2993 * @see rte_flow_ops 2994 */ 2995 struct rte_flow * 2996 mlx5_flow_create(struct rte_eth_dev *dev, 2997 const struct rte_flow_attr *attr, 2998 const struct rte_flow_item items[], 2999 const struct rte_flow_action actions[], 3000 struct rte_flow_error *error) 3001 { 3002 return mlx5_flow_list_create 3003 (dev, &((struct priv *)dev->data->dev_private)->flows, 3004 attr, items, actions, error); 3005 } 3006 3007 /** 3008 * Destroy a flow in a list. 3009 * 3010 * @param dev 3011 * Pointer to Ethernet device. 3012 * @param list 3013 * Pointer to a TAILQ flow list. 3014 * @param[in] flow 3015 * Flow to destroy. 3016 */ 3017 static void 3018 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list, 3019 struct rte_flow *flow) 3020 { 3021 mlx5_flow_remove(dev, flow); 3022 TAILQ_REMOVE(list, flow, next); 3023 /* 3024 * Update RX queue flags only if port is started, otherwise it is 3025 * already clean. 3026 */ 3027 if (dev->data->dev_started) 3028 mlx5_flow_rxq_flags_trim(dev, flow); 3029 rte_free(flow); 3030 } 3031 3032 /** 3033 * Destroy all flows. 3034 * 3035 * @param dev 3036 * Pointer to Ethernet device. 3037 * @param list 3038 * Pointer to a TAILQ flow list. 3039 */ 3040 void 3041 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list) 3042 { 3043 while (!TAILQ_EMPTY(list)) { 3044 struct rte_flow *flow; 3045 3046 flow = TAILQ_FIRST(list); 3047 mlx5_flow_list_destroy(dev, list, flow); 3048 } 3049 } 3050 3051 /** 3052 * Remove all flows. 3053 * 3054 * @param dev 3055 * Pointer to Ethernet device. 3056 * @param list 3057 * Pointer to a TAILQ flow list. 3058 */ 3059 void 3060 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list) 3061 { 3062 struct rte_flow *flow; 3063 3064 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) 3065 mlx5_flow_remove(dev, flow); 3066 mlx5_flow_rxq_flags_clear(dev); 3067 } 3068 3069 /** 3070 * Add all flows. 3071 * 3072 * @param dev 3073 * Pointer to Ethernet device. 3074 * @param list 3075 * Pointer to a TAILQ flow list. 3076 * 3077 * @return 3078 * 0 on success, a negative errno value otherwise and rte_errno is set. 3079 */ 3080 int 3081 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list) 3082 { 3083 struct rte_flow *flow; 3084 struct rte_flow_error error; 3085 int ret = 0; 3086 3087 TAILQ_FOREACH(flow, list, next) { 3088 ret = mlx5_flow_apply(dev, flow, &error); 3089 if (ret < 0) 3090 goto error; 3091 mlx5_flow_rxq_flags_set(dev, flow); 3092 } 3093 return 0; 3094 error: 3095 ret = rte_errno; /* Save rte_errno before cleanup. */ 3096 mlx5_flow_stop(dev, list); 3097 rte_errno = ret; /* Restore rte_errno. */ 3098 return -rte_errno; 3099 } 3100 3101 /** 3102 * Verify the flow list is empty 3103 * 3104 * @param dev 3105 * Pointer to Ethernet device. 3106 * 3107 * @return the number of flows not released. 3108 */ 3109 int 3110 mlx5_flow_verify(struct rte_eth_dev *dev) 3111 { 3112 struct priv *priv = dev->data->dev_private; 3113 struct rte_flow *flow; 3114 int ret = 0; 3115 3116 TAILQ_FOREACH(flow, &priv->flows, next) { 3117 DRV_LOG(DEBUG, "port %u flow %p still referenced", 3118 dev->data->port_id, (void *)flow); 3119 ++ret; 3120 } 3121 return ret; 3122 } 3123 3124 /** 3125 * Enable a control flow configured from the control plane. 3126 * 3127 * @param dev 3128 * Pointer to Ethernet device. 3129 * @param eth_spec 3130 * An Ethernet flow spec to apply. 3131 * @param eth_mask 3132 * An Ethernet flow mask to apply. 3133 * @param vlan_spec 3134 * A VLAN flow spec to apply. 3135 * @param vlan_mask 3136 * A VLAN flow mask to apply. 3137 * 3138 * @return 3139 * 0 on success, a negative errno value otherwise and rte_errno is set. 3140 */ 3141 int 3142 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, 3143 struct rte_flow_item_eth *eth_spec, 3144 struct rte_flow_item_eth *eth_mask, 3145 struct rte_flow_item_vlan *vlan_spec, 3146 struct rte_flow_item_vlan *vlan_mask) 3147 { 3148 struct priv *priv = dev->data->dev_private; 3149 const struct rte_flow_attr attr = { 3150 .ingress = 1, 3151 .priority = MLX5_FLOW_PRIO_RSVD, 3152 }; 3153 struct rte_flow_item items[] = { 3154 { 3155 .type = RTE_FLOW_ITEM_TYPE_ETH, 3156 .spec = eth_spec, 3157 .last = NULL, 3158 .mask = eth_mask, 3159 }, 3160 { 3161 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN : 3162 RTE_FLOW_ITEM_TYPE_END, 3163 .spec = vlan_spec, 3164 .last = NULL, 3165 .mask = vlan_mask, 3166 }, 3167 { 3168 .type = RTE_FLOW_ITEM_TYPE_END, 3169 }, 3170 }; 3171 uint16_t queue[priv->reta_idx_n]; 3172 struct rte_flow_action_rss action_rss = { 3173 .func = RTE_ETH_HASH_FUNCTION_DEFAULT, 3174 .level = 0, 3175 .types = priv->rss_conf.rss_hf, 3176 .key_len = priv->rss_conf.rss_key_len, 3177 .queue_num = priv->reta_idx_n, 3178 .key = priv->rss_conf.rss_key, 3179 .queue = queue, 3180 }; 3181 struct rte_flow_action actions[] = { 3182 { 3183 .type = RTE_FLOW_ACTION_TYPE_RSS, 3184 .conf = &action_rss, 3185 }, 3186 { 3187 .type = RTE_FLOW_ACTION_TYPE_END, 3188 }, 3189 }; 3190 struct rte_flow *flow; 3191 struct rte_flow_error error; 3192 unsigned int i; 3193 3194 if (!priv->reta_idx_n) { 3195 rte_errno = EINVAL; 3196 return -rte_errno; 3197 } 3198 for (i = 0; i != priv->reta_idx_n; ++i) 3199 queue[i] = (*priv->reta_idx)[i]; 3200 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items, 3201 actions, &error); 3202 if (!flow) 3203 return -rte_errno; 3204 return 0; 3205 } 3206 3207 /** 3208 * Enable a flow control configured from the control plane. 3209 * 3210 * @param dev 3211 * Pointer to Ethernet device. 3212 * @param eth_spec 3213 * An Ethernet flow spec to apply. 3214 * @param eth_mask 3215 * An Ethernet flow mask to apply. 3216 * 3217 * @return 3218 * 0 on success, a negative errno value otherwise and rte_errno is set. 3219 */ 3220 int 3221 mlx5_ctrl_flow(struct rte_eth_dev *dev, 3222 struct rte_flow_item_eth *eth_spec, 3223 struct rte_flow_item_eth *eth_mask) 3224 { 3225 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL); 3226 } 3227 3228 /** 3229 * Destroy a flow. 3230 * 3231 * @see rte_flow_destroy() 3232 * @see rte_flow_ops 3233 */ 3234 int 3235 mlx5_flow_destroy(struct rte_eth_dev *dev, 3236 struct rte_flow *flow, 3237 struct rte_flow_error *error __rte_unused) 3238 { 3239 struct priv *priv = dev->data->dev_private; 3240 3241 mlx5_flow_list_destroy(dev, &priv->flows, flow); 3242 return 0; 3243 } 3244 3245 /** 3246 * Destroy all flows. 3247 * 3248 * @see rte_flow_flush() 3249 * @see rte_flow_ops 3250 */ 3251 int 3252 mlx5_flow_flush(struct rte_eth_dev *dev, 3253 struct rte_flow_error *error __rte_unused) 3254 { 3255 struct priv *priv = dev->data->dev_private; 3256 3257 mlx5_flow_list_flush(dev, &priv->flows); 3258 return 0; 3259 } 3260 3261 /** 3262 * Isolated mode. 3263 * 3264 * @see rte_flow_isolate() 3265 * @see rte_flow_ops 3266 */ 3267 int 3268 mlx5_flow_isolate(struct rte_eth_dev *dev, 3269 int enable, 3270 struct rte_flow_error *error) 3271 { 3272 struct priv *priv = dev->data->dev_private; 3273 3274 if (dev->data->dev_started) { 3275 rte_flow_error_set(error, EBUSY, 3276 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3277 NULL, 3278 "port must be stopped first"); 3279 return -rte_errno; 3280 } 3281 priv->isolated = !!enable; 3282 if (enable) 3283 dev->dev_ops = &mlx5_dev_ops_isolate; 3284 else 3285 dev->dev_ops = &mlx5_dev_ops; 3286 return 0; 3287 } 3288 3289 /** 3290 * Query flow counter. 3291 * 3292 * @param flow 3293 * Pointer to the flow. 3294 * 3295 * @return 3296 * 0 on success, a negative errno value otherwise and rte_errno is set. 3297 */ 3298 static int 3299 mlx5_flow_query_count(struct rte_flow *flow __rte_unused, 3300 void *data __rte_unused, 3301 struct rte_flow_error *error) 3302 { 3303 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 3304 if (flow->modifier & MLX5_FLOW_MOD_COUNT) { 3305 struct rte_flow_query_count *qc = data; 3306 uint64_t counters[2] = {0, 0}; 3307 struct ibv_query_counter_set_attr query_cs_attr = { 3308 .cs = flow->counter->cs, 3309 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE, 3310 }; 3311 struct ibv_counter_set_data query_out = { 3312 .out = counters, 3313 .outlen = 2 * sizeof(uint64_t), 3314 }; 3315 int err = mlx5_glue->query_counter_set(&query_cs_attr, 3316 &query_out); 3317 3318 if (err) 3319 return rte_flow_error_set 3320 (error, err, 3321 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3322 NULL, 3323 "cannot read counter"); 3324 qc->hits_set = 1; 3325 qc->bytes_set = 1; 3326 qc->hits = counters[0] - flow->counter->hits; 3327 qc->bytes = counters[1] - flow->counter->bytes; 3328 if (qc->reset) { 3329 flow->counter->hits = counters[0]; 3330 flow->counter->bytes = counters[1]; 3331 } 3332 return 0; 3333 } 3334 return rte_flow_error_set(error, ENOTSUP, 3335 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3336 NULL, 3337 "flow does not have counter"); 3338 #endif 3339 return rte_flow_error_set(error, ENOTSUP, 3340 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3341 NULL, 3342 "counters are not available"); 3343 } 3344 3345 /** 3346 * Query a flows. 3347 * 3348 * @see rte_flow_query() 3349 * @see rte_flow_ops 3350 */ 3351 int 3352 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused, 3353 struct rte_flow *flow, 3354 const struct rte_flow_action *actions, 3355 void *data, 3356 struct rte_flow_error *error) 3357 { 3358 int ret = 0; 3359 3360 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3361 switch (actions->type) { 3362 case RTE_FLOW_ACTION_TYPE_VOID: 3363 break; 3364 case RTE_FLOW_ACTION_TYPE_COUNT: 3365 ret = mlx5_flow_query_count(flow, data, error); 3366 break; 3367 default: 3368 return rte_flow_error_set(error, ENOTSUP, 3369 RTE_FLOW_ERROR_TYPE_ACTION, 3370 actions, 3371 "action not supported"); 3372 } 3373 if (ret < 0) 3374 return ret; 3375 } 3376 return 0; 3377 } 3378 3379 /** 3380 * Convert a flow director filter to a generic flow. 3381 * 3382 * @param dev 3383 * Pointer to Ethernet device. 3384 * @param fdir_filter 3385 * Flow director filter to add. 3386 * @param attributes 3387 * Generic flow parameters structure. 3388 * 3389 * @return 3390 * 0 on success, a negative errno value otherwise and rte_errno is set. 3391 */ 3392 static int 3393 mlx5_fdir_filter_convert(struct rte_eth_dev *dev, 3394 const struct rte_eth_fdir_filter *fdir_filter, 3395 struct mlx5_fdir *attributes) 3396 { 3397 struct priv *priv = dev->data->dev_private; 3398 const struct rte_eth_fdir_input *input = &fdir_filter->input; 3399 const struct rte_eth_fdir_masks *mask = 3400 &dev->data->dev_conf.fdir_conf.mask; 3401 3402 /* Validate queue number. */ 3403 if (fdir_filter->action.rx_queue >= priv->rxqs_n) { 3404 DRV_LOG(ERR, "port %u invalid queue number %d", 3405 dev->data->port_id, fdir_filter->action.rx_queue); 3406 rte_errno = EINVAL; 3407 return -rte_errno; 3408 } 3409 attributes->attr.ingress = 1; 3410 attributes->items[0] = (struct rte_flow_item) { 3411 .type = RTE_FLOW_ITEM_TYPE_ETH, 3412 .spec = &attributes->l2, 3413 .mask = &attributes->l2_mask, 3414 }; 3415 switch (fdir_filter->action.behavior) { 3416 case RTE_ETH_FDIR_ACCEPT: 3417 attributes->actions[0] = (struct rte_flow_action){ 3418 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 3419 .conf = &attributes->queue, 3420 }; 3421 break; 3422 case RTE_ETH_FDIR_REJECT: 3423 attributes->actions[0] = (struct rte_flow_action){ 3424 .type = RTE_FLOW_ACTION_TYPE_DROP, 3425 }; 3426 break; 3427 default: 3428 DRV_LOG(ERR, "port %u invalid behavior %d", 3429 dev->data->port_id, 3430 fdir_filter->action.behavior); 3431 rte_errno = ENOTSUP; 3432 return -rte_errno; 3433 } 3434 attributes->queue.index = fdir_filter->action.rx_queue; 3435 /* Handle L3. */ 3436 switch (fdir_filter->input.flow_type) { 3437 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 3438 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 3439 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 3440 attributes->l3.ipv4.hdr = (struct ipv4_hdr){ 3441 .src_addr = input->flow.ip4_flow.src_ip, 3442 .dst_addr = input->flow.ip4_flow.dst_ip, 3443 .time_to_live = input->flow.ip4_flow.ttl, 3444 .type_of_service = input->flow.ip4_flow.tos, 3445 .next_proto_id = input->flow.ip4_flow.proto, 3446 }; 3447 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){ 3448 .src_addr = mask->ipv4_mask.src_ip, 3449 .dst_addr = mask->ipv4_mask.dst_ip, 3450 .time_to_live = mask->ipv4_mask.ttl, 3451 .type_of_service = mask->ipv4_mask.tos, 3452 .next_proto_id = mask->ipv4_mask.proto, 3453 }; 3454 attributes->items[1] = (struct rte_flow_item){ 3455 .type = RTE_FLOW_ITEM_TYPE_IPV4, 3456 .spec = &attributes->l3, 3457 .mask = &attributes->l3_mask, 3458 }; 3459 break; 3460 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 3461 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 3462 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 3463 attributes->l3.ipv6.hdr = (struct ipv6_hdr){ 3464 .hop_limits = input->flow.ipv6_flow.hop_limits, 3465 .proto = input->flow.ipv6_flow.proto, 3466 }; 3467 3468 memcpy(attributes->l3.ipv6.hdr.src_addr, 3469 input->flow.ipv6_flow.src_ip, 3470 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 3471 memcpy(attributes->l3.ipv6.hdr.dst_addr, 3472 input->flow.ipv6_flow.dst_ip, 3473 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 3474 memcpy(attributes->l3_mask.ipv6.hdr.src_addr, 3475 mask->ipv6_mask.src_ip, 3476 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 3477 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr, 3478 mask->ipv6_mask.dst_ip, 3479 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 3480 attributes->items[1] = (struct rte_flow_item){ 3481 .type = RTE_FLOW_ITEM_TYPE_IPV6, 3482 .spec = &attributes->l3, 3483 .mask = &attributes->l3_mask, 3484 }; 3485 break; 3486 default: 3487 DRV_LOG(ERR, "port %u invalid flow type%d", 3488 dev->data->port_id, fdir_filter->input.flow_type); 3489 rte_errno = ENOTSUP; 3490 return -rte_errno; 3491 } 3492 /* Handle L4. */ 3493 switch (fdir_filter->input.flow_type) { 3494 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 3495 attributes->l4.udp.hdr = (struct udp_hdr){ 3496 .src_port = input->flow.udp4_flow.src_port, 3497 .dst_port = input->flow.udp4_flow.dst_port, 3498 }; 3499 attributes->l4_mask.udp.hdr = (struct udp_hdr){ 3500 .src_port = mask->src_port_mask, 3501 .dst_port = mask->dst_port_mask, 3502 }; 3503 attributes->items[2] = (struct rte_flow_item){ 3504 .type = RTE_FLOW_ITEM_TYPE_UDP, 3505 .spec = &attributes->l4, 3506 .mask = &attributes->l4_mask, 3507 }; 3508 break; 3509 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 3510 attributes->l4.tcp.hdr = (struct tcp_hdr){ 3511 .src_port = input->flow.tcp4_flow.src_port, 3512 .dst_port = input->flow.tcp4_flow.dst_port, 3513 }; 3514 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){ 3515 .src_port = mask->src_port_mask, 3516 .dst_port = mask->dst_port_mask, 3517 }; 3518 attributes->items[2] = (struct rte_flow_item){ 3519 .type = RTE_FLOW_ITEM_TYPE_TCP, 3520 .spec = &attributes->l4, 3521 .mask = &attributes->l4_mask, 3522 }; 3523 break; 3524 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 3525 attributes->l4.udp.hdr = (struct udp_hdr){ 3526 .src_port = input->flow.udp6_flow.src_port, 3527 .dst_port = input->flow.udp6_flow.dst_port, 3528 }; 3529 attributes->l4_mask.udp.hdr = (struct udp_hdr){ 3530 .src_port = mask->src_port_mask, 3531 .dst_port = mask->dst_port_mask, 3532 }; 3533 attributes->items[2] = (struct rte_flow_item){ 3534 .type = RTE_FLOW_ITEM_TYPE_UDP, 3535 .spec = &attributes->l4, 3536 .mask = &attributes->l4_mask, 3537 }; 3538 break; 3539 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 3540 attributes->l4.tcp.hdr = (struct tcp_hdr){ 3541 .src_port = input->flow.tcp6_flow.src_port, 3542 .dst_port = input->flow.tcp6_flow.dst_port, 3543 }; 3544 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){ 3545 .src_port = mask->src_port_mask, 3546 .dst_port = mask->dst_port_mask, 3547 }; 3548 attributes->items[2] = (struct rte_flow_item){ 3549 .type = RTE_FLOW_ITEM_TYPE_TCP, 3550 .spec = &attributes->l4, 3551 .mask = &attributes->l4_mask, 3552 }; 3553 break; 3554 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 3555 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 3556 break; 3557 default: 3558 DRV_LOG(ERR, "port %u invalid flow type%d", 3559 dev->data->port_id, fdir_filter->input.flow_type); 3560 rte_errno = ENOTSUP; 3561 return -rte_errno; 3562 } 3563 return 0; 3564 } 3565 3566 /** 3567 * Add new flow director filter and store it in list. 3568 * 3569 * @param dev 3570 * Pointer to Ethernet device. 3571 * @param fdir_filter 3572 * Flow director filter to add. 3573 * 3574 * @return 3575 * 0 on success, a negative errno value otherwise and rte_errno is set. 3576 */ 3577 static int 3578 mlx5_fdir_filter_add(struct rte_eth_dev *dev, 3579 const struct rte_eth_fdir_filter *fdir_filter) 3580 { 3581 struct priv *priv = dev->data->dev_private; 3582 struct mlx5_fdir attributes = { 3583 .attr.group = 0, 3584 .l2_mask = { 3585 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00", 3586 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 3587 .type = 0, 3588 }, 3589 }; 3590 struct rte_flow_error error; 3591 struct rte_flow *flow; 3592 int ret; 3593 3594 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes); 3595 if (ret) 3596 return ret; 3597 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr, 3598 attributes.items, attributes.actions, 3599 &error); 3600 if (flow) { 3601 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id, 3602 (void *)flow); 3603 return 0; 3604 } 3605 return -rte_errno; 3606 } 3607 3608 /** 3609 * Delete specific filter. 3610 * 3611 * @param dev 3612 * Pointer to Ethernet device. 3613 * @param fdir_filter 3614 * Filter to be deleted. 3615 * 3616 * @return 3617 * 0 on success, a negative errno value otherwise and rte_errno is set. 3618 */ 3619 static int 3620 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused, 3621 const struct rte_eth_fdir_filter *fdir_filter 3622 __rte_unused) 3623 { 3624 rte_errno = ENOTSUP; 3625 return -rte_errno; 3626 } 3627 3628 /** 3629 * Update queue for specific filter. 3630 * 3631 * @param dev 3632 * Pointer to Ethernet device. 3633 * @param fdir_filter 3634 * Filter to be updated. 3635 * 3636 * @return 3637 * 0 on success, a negative errno value otherwise and rte_errno is set. 3638 */ 3639 static int 3640 mlx5_fdir_filter_update(struct rte_eth_dev *dev, 3641 const struct rte_eth_fdir_filter *fdir_filter) 3642 { 3643 int ret; 3644 3645 ret = mlx5_fdir_filter_delete(dev, fdir_filter); 3646 if (ret) 3647 return ret; 3648 return mlx5_fdir_filter_add(dev, fdir_filter); 3649 } 3650 3651 /** 3652 * Flush all filters. 3653 * 3654 * @param dev 3655 * Pointer to Ethernet device. 3656 */ 3657 static void 3658 mlx5_fdir_filter_flush(struct rte_eth_dev *dev) 3659 { 3660 struct priv *priv = dev->data->dev_private; 3661 3662 mlx5_flow_list_flush(dev, &priv->flows); 3663 } 3664 3665 /** 3666 * Get flow director information. 3667 * 3668 * @param dev 3669 * Pointer to Ethernet device. 3670 * @param[out] fdir_info 3671 * Resulting flow director information. 3672 */ 3673 static void 3674 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) 3675 { 3676 struct rte_eth_fdir_masks *mask = 3677 &dev->data->dev_conf.fdir_conf.mask; 3678 3679 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode; 3680 fdir_info->guarant_spc = 0; 3681 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask)); 3682 fdir_info->max_flexpayload = 0; 3683 fdir_info->flow_types_mask[0] = 0; 3684 fdir_info->flex_payload_unit = 0; 3685 fdir_info->max_flex_payload_segment_num = 0; 3686 fdir_info->flex_payload_limit = 0; 3687 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf)); 3688 } 3689 3690 /** 3691 * Deal with flow director operations. 3692 * 3693 * @param dev 3694 * Pointer to Ethernet device. 3695 * @param filter_op 3696 * Operation to perform. 3697 * @param arg 3698 * Pointer to operation-specific structure. 3699 * 3700 * @return 3701 * 0 on success, a negative errno value otherwise and rte_errno is set. 3702 */ 3703 static int 3704 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, 3705 void *arg) 3706 { 3707 enum rte_fdir_mode fdir_mode = 3708 dev->data->dev_conf.fdir_conf.mode; 3709 3710 if (filter_op == RTE_ETH_FILTER_NOP) 3711 return 0; 3712 if (fdir_mode != RTE_FDIR_MODE_PERFECT && 3713 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) { 3714 DRV_LOG(ERR, "port %u flow director mode %d not supported", 3715 dev->data->port_id, fdir_mode); 3716 rte_errno = EINVAL; 3717 return -rte_errno; 3718 } 3719 switch (filter_op) { 3720 case RTE_ETH_FILTER_ADD: 3721 return mlx5_fdir_filter_add(dev, arg); 3722 case RTE_ETH_FILTER_UPDATE: 3723 return mlx5_fdir_filter_update(dev, arg); 3724 case RTE_ETH_FILTER_DELETE: 3725 return mlx5_fdir_filter_delete(dev, arg); 3726 case RTE_ETH_FILTER_FLUSH: 3727 mlx5_fdir_filter_flush(dev); 3728 break; 3729 case RTE_ETH_FILTER_INFO: 3730 mlx5_fdir_info_get(dev, arg); 3731 break; 3732 default: 3733 DRV_LOG(DEBUG, "port %u unknown operation %u", 3734 dev->data->port_id, filter_op); 3735 rte_errno = EINVAL; 3736 return -rte_errno; 3737 } 3738 return 0; 3739 } 3740 3741 /** 3742 * Manage filter operations. 3743 * 3744 * @param dev 3745 * Pointer to Ethernet device structure. 3746 * @param filter_type 3747 * Filter type. 3748 * @param filter_op 3749 * Operation to perform. 3750 * @param arg 3751 * Pointer to operation-specific structure. 3752 * 3753 * @return 3754 * 0 on success, a negative errno value otherwise and rte_errno is set. 3755 */ 3756 int 3757 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, 3758 enum rte_filter_type filter_type, 3759 enum rte_filter_op filter_op, 3760 void *arg) 3761 { 3762 switch (filter_type) { 3763 case RTE_ETH_FILTER_GENERIC: 3764 if (filter_op != RTE_ETH_FILTER_GET) { 3765 rte_errno = EINVAL; 3766 return -rte_errno; 3767 } 3768 *(const void **)arg = &mlx5_flow_ops; 3769 return 0; 3770 case RTE_ETH_FILTER_FDIR: 3771 return mlx5_fdir_ctrl_func(dev, filter_op, arg); 3772 default: 3773 DRV_LOG(ERR, "port %u filter type (%d) not supported", 3774 dev->data->port_id, filter_type); 3775 rte_errno = ENOTSUP; 3776 return -rte_errno; 3777 } 3778 return 0; 3779 } 3780