1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2016 Mellanox Technologies, Ltd 4 */ 5 6 #include <sys/queue.h> 7 #include <stdalign.h> 8 #include <stdint.h> 9 #include <string.h> 10 11 /* Verbs header. */ 12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 13 #ifdef PEDANTIC 14 #pragma GCC diagnostic ignored "-Wpedantic" 15 #endif 16 #include <infiniband/verbs.h> 17 #ifdef PEDANTIC 18 #pragma GCC diagnostic error "-Wpedantic" 19 #endif 20 21 #include <rte_common.h> 22 #include <rte_ether.h> 23 #include <rte_eth_ctrl.h> 24 #include <rte_ethdev_driver.h> 25 #include <rte_flow.h> 26 #include <rte_flow_driver.h> 27 #include <rte_malloc.h> 28 #include <rte_ip.h> 29 30 #include "mlx5.h" 31 #include "mlx5_defs.h" 32 #include "mlx5_prm.h" 33 #include "mlx5_glue.h" 34 35 /* Dev ops structure defined in mlx5.c */ 36 extern const struct eth_dev_ops mlx5_dev_ops; 37 extern const struct eth_dev_ops mlx5_dev_ops_isolate; 38 39 /* Pattern outer Layer bits. */ 40 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0) 41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1) 42 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2) 43 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3) 44 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4) 45 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5) 46 47 /* Pattern inner Layer bits. */ 48 #define MLX5_FLOW_LAYER_INNER_L2 (1u << 6) 49 #define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7) 50 #define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8) 51 #define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9) 52 #define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10) 53 #define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11) 54 55 /* Pattern tunnel Layer bits. */ 56 #define MLX5_FLOW_LAYER_VXLAN (1u << 12) 57 #define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13) 58 #define MLX5_FLOW_LAYER_GRE (1u << 14) 59 #define MLX5_FLOW_LAYER_MPLS (1u << 15) 60 61 /* Outer Masks. */ 62 #define MLX5_FLOW_LAYER_OUTER_L3 \ 63 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6) 64 #define MLX5_FLOW_LAYER_OUTER_L4 \ 65 (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP) 66 #define MLX5_FLOW_LAYER_OUTER \ 67 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \ 68 MLX5_FLOW_LAYER_OUTER_L4) 69 70 /* Tunnel Masks. */ 71 #define MLX5_FLOW_LAYER_TUNNEL \ 72 (MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE | \ 73 MLX5_FLOW_LAYER_GRE | MLX5_FLOW_LAYER_MPLS) 74 75 /* Inner Masks. */ 76 #define MLX5_FLOW_LAYER_INNER_L3 \ 77 (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6) 78 #define MLX5_FLOW_LAYER_INNER_L4 \ 79 (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP) 80 #define MLX5_FLOW_LAYER_INNER \ 81 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \ 82 MLX5_FLOW_LAYER_INNER_L4) 83 84 /* Actions that modify the fate of matching traffic. */ 85 #define MLX5_FLOW_FATE_DROP (1u << 0) 86 #define MLX5_FLOW_FATE_QUEUE (1u << 1) 87 #define MLX5_FLOW_FATE_RSS (1u << 2) 88 89 /* Modify a packet. */ 90 #define MLX5_FLOW_MOD_FLAG (1u << 0) 91 #define MLX5_FLOW_MOD_MARK (1u << 1) 92 #define MLX5_FLOW_MOD_COUNT (1u << 2) 93 94 /* possible L3 layers protocols filtering. */ 95 #define MLX5_IP_PROTOCOL_TCP 6 96 #define MLX5_IP_PROTOCOL_UDP 17 97 #define MLX5_IP_PROTOCOL_GRE 47 98 #define MLX5_IP_PROTOCOL_MPLS 147 99 100 /* Priority reserved for default flows. */ 101 #define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1) 102 103 enum mlx5_expansion { 104 MLX5_EXPANSION_ROOT, 105 MLX5_EXPANSION_ROOT_OUTER, 106 MLX5_EXPANSION_ROOT_ETH_VLAN, 107 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN, 108 MLX5_EXPANSION_OUTER_ETH, 109 MLX5_EXPANSION_OUTER_ETH_VLAN, 110 MLX5_EXPANSION_OUTER_VLAN, 111 MLX5_EXPANSION_OUTER_IPV4, 112 MLX5_EXPANSION_OUTER_IPV4_UDP, 113 MLX5_EXPANSION_OUTER_IPV4_TCP, 114 MLX5_EXPANSION_OUTER_IPV6, 115 MLX5_EXPANSION_OUTER_IPV6_UDP, 116 MLX5_EXPANSION_OUTER_IPV6_TCP, 117 MLX5_EXPANSION_VXLAN, 118 MLX5_EXPANSION_VXLAN_GPE, 119 MLX5_EXPANSION_GRE, 120 MLX5_EXPANSION_MPLS, 121 MLX5_EXPANSION_ETH, 122 MLX5_EXPANSION_ETH_VLAN, 123 MLX5_EXPANSION_VLAN, 124 MLX5_EXPANSION_IPV4, 125 MLX5_EXPANSION_IPV4_UDP, 126 MLX5_EXPANSION_IPV4_TCP, 127 MLX5_EXPANSION_IPV6, 128 MLX5_EXPANSION_IPV6_UDP, 129 MLX5_EXPANSION_IPV6_TCP, 130 }; 131 132 /** Supported expansion of items. */ 133 static const struct rte_flow_expand_node mlx5_support_expansion[] = { 134 [MLX5_EXPANSION_ROOT] = { 135 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 136 MLX5_EXPANSION_IPV4, 137 MLX5_EXPANSION_IPV6), 138 .type = RTE_FLOW_ITEM_TYPE_END, 139 }, 140 [MLX5_EXPANSION_ROOT_OUTER] = { 141 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH, 142 MLX5_EXPANSION_OUTER_IPV4, 143 MLX5_EXPANSION_OUTER_IPV6), 144 .type = RTE_FLOW_ITEM_TYPE_END, 145 }, 146 [MLX5_EXPANSION_ROOT_ETH_VLAN] = { 147 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN), 148 .type = RTE_FLOW_ITEM_TYPE_END, 149 }, 150 [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = { 151 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH_VLAN), 152 .type = RTE_FLOW_ITEM_TYPE_END, 153 }, 154 [MLX5_EXPANSION_OUTER_ETH] = { 155 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 156 MLX5_EXPANSION_OUTER_IPV6, 157 MLX5_EXPANSION_MPLS), 158 .type = RTE_FLOW_ITEM_TYPE_ETH, 159 .rss_types = 0, 160 }, 161 [MLX5_EXPANSION_OUTER_ETH_VLAN] = { 162 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN), 163 .type = RTE_FLOW_ITEM_TYPE_ETH, 164 .rss_types = 0, 165 }, 166 [MLX5_EXPANSION_OUTER_VLAN] = { 167 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 168 MLX5_EXPANSION_OUTER_IPV6), 169 .type = RTE_FLOW_ITEM_TYPE_VLAN, 170 }, 171 [MLX5_EXPANSION_OUTER_IPV4] = { 172 .next = RTE_FLOW_EXPAND_RSS_NEXT 173 (MLX5_EXPANSION_OUTER_IPV4_UDP, 174 MLX5_EXPANSION_OUTER_IPV4_TCP, 175 MLX5_EXPANSION_GRE), 176 .type = RTE_FLOW_ITEM_TYPE_IPV4, 177 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 178 ETH_RSS_NONFRAG_IPV4_OTHER, 179 }, 180 [MLX5_EXPANSION_OUTER_IPV4_UDP] = { 181 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 182 MLX5_EXPANSION_VXLAN_GPE), 183 .type = RTE_FLOW_ITEM_TYPE_UDP, 184 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 185 }, 186 [MLX5_EXPANSION_OUTER_IPV4_TCP] = { 187 .type = RTE_FLOW_ITEM_TYPE_TCP, 188 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 189 }, 190 [MLX5_EXPANSION_OUTER_IPV6] = { 191 .next = RTE_FLOW_EXPAND_RSS_NEXT 192 (MLX5_EXPANSION_OUTER_IPV6_UDP, 193 MLX5_EXPANSION_OUTER_IPV6_TCP), 194 .type = RTE_FLOW_ITEM_TYPE_IPV6, 195 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 196 ETH_RSS_NONFRAG_IPV6_OTHER, 197 }, 198 [MLX5_EXPANSION_OUTER_IPV6_UDP] = { 199 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 200 MLX5_EXPANSION_VXLAN_GPE), 201 .type = RTE_FLOW_ITEM_TYPE_UDP, 202 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 203 }, 204 [MLX5_EXPANSION_OUTER_IPV6_TCP] = { 205 .type = RTE_FLOW_ITEM_TYPE_TCP, 206 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 207 }, 208 [MLX5_EXPANSION_VXLAN] = { 209 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH), 210 .type = RTE_FLOW_ITEM_TYPE_VXLAN, 211 }, 212 [MLX5_EXPANSION_VXLAN_GPE] = { 213 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 214 MLX5_EXPANSION_IPV4, 215 MLX5_EXPANSION_IPV6), 216 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE, 217 }, 218 [MLX5_EXPANSION_GRE] = { 219 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4), 220 .type = RTE_FLOW_ITEM_TYPE_GRE, 221 }, 222 [MLX5_EXPANSION_MPLS] = { 223 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 224 MLX5_EXPANSION_IPV6), 225 .type = RTE_FLOW_ITEM_TYPE_MPLS, 226 }, 227 [MLX5_EXPANSION_ETH] = { 228 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 229 MLX5_EXPANSION_IPV6), 230 .type = RTE_FLOW_ITEM_TYPE_ETH, 231 }, 232 [MLX5_EXPANSION_ETH_VLAN] = { 233 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN), 234 .type = RTE_FLOW_ITEM_TYPE_ETH, 235 }, 236 [MLX5_EXPANSION_VLAN] = { 237 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 238 MLX5_EXPANSION_IPV6), 239 .type = RTE_FLOW_ITEM_TYPE_VLAN, 240 }, 241 [MLX5_EXPANSION_IPV4] = { 242 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP, 243 MLX5_EXPANSION_IPV4_TCP), 244 .type = RTE_FLOW_ITEM_TYPE_IPV4, 245 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 246 ETH_RSS_NONFRAG_IPV4_OTHER, 247 }, 248 [MLX5_EXPANSION_IPV4_UDP] = { 249 .type = RTE_FLOW_ITEM_TYPE_UDP, 250 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 251 }, 252 [MLX5_EXPANSION_IPV4_TCP] = { 253 .type = RTE_FLOW_ITEM_TYPE_TCP, 254 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 255 }, 256 [MLX5_EXPANSION_IPV6] = { 257 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP, 258 MLX5_EXPANSION_IPV6_TCP), 259 .type = RTE_FLOW_ITEM_TYPE_IPV6, 260 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 261 ETH_RSS_NONFRAG_IPV6_OTHER, 262 }, 263 [MLX5_EXPANSION_IPV6_UDP] = { 264 .type = RTE_FLOW_ITEM_TYPE_UDP, 265 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 266 }, 267 [MLX5_EXPANSION_IPV6_TCP] = { 268 .type = RTE_FLOW_ITEM_TYPE_TCP, 269 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 270 }, 271 }; 272 273 /** Handles information leading to a drop fate. */ 274 struct mlx5_flow_verbs { 275 LIST_ENTRY(mlx5_flow_verbs) next; 276 unsigned int size; /**< Size of the attribute. */ 277 struct { 278 struct ibv_flow_attr *attr; 279 /**< Pointer to the Specification buffer. */ 280 uint8_t *specs; /**< Pointer to the specifications. */ 281 }; 282 struct ibv_flow *flow; /**< Verbs flow pointer. */ 283 struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */ 284 uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */ 285 }; 286 287 /* Counters information. */ 288 struct mlx5_flow_counter { 289 LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */ 290 uint32_t shared:1; /**< Share counter ID with other flow rules. */ 291 uint32_t ref_cnt:31; /**< Reference counter. */ 292 uint32_t id; /**< Counter ID. */ 293 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */ 294 uint64_t hits; /**< Number of packets matched by the rule. */ 295 uint64_t bytes; /**< Number of bytes matched by the rule. */ 296 }; 297 298 /* Flow structure. */ 299 struct rte_flow { 300 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */ 301 struct rte_flow_attr attributes; /**< User flow attribute. */ 302 uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */ 303 uint32_t layers; 304 /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */ 305 uint32_t modifier; 306 /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */ 307 uint32_t fate; 308 /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */ 309 uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */ 310 LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */ 311 struct mlx5_flow_verbs *cur_verbs; 312 /**< Current Verbs flow structure being filled. */ 313 struct mlx5_flow_counter *counter; /**< Holds Verbs flow counter. */ 314 struct rte_flow_action_rss rss;/**< RSS context. */ 315 uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */ 316 uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */ 317 void *nl_flow; /**< Netlink flow buffer if relevant. */ 318 }; 319 320 static const struct rte_flow_ops mlx5_flow_ops = { 321 .validate = mlx5_flow_validate, 322 .create = mlx5_flow_create, 323 .destroy = mlx5_flow_destroy, 324 .flush = mlx5_flow_flush, 325 .isolate = mlx5_flow_isolate, 326 .query = mlx5_flow_query, 327 }; 328 329 /* Convert FDIR request to Generic flow. */ 330 struct mlx5_fdir { 331 struct rte_flow_attr attr; 332 struct rte_flow_action actions[2]; 333 struct rte_flow_item items[4]; 334 struct rte_flow_item_eth l2; 335 struct rte_flow_item_eth l2_mask; 336 union { 337 struct rte_flow_item_ipv4 ipv4; 338 struct rte_flow_item_ipv6 ipv6; 339 } l3; 340 union { 341 struct rte_flow_item_ipv4 ipv4; 342 struct rte_flow_item_ipv6 ipv6; 343 } l3_mask; 344 union { 345 struct rte_flow_item_udp udp; 346 struct rte_flow_item_tcp tcp; 347 } l4; 348 union { 349 struct rte_flow_item_udp udp; 350 struct rte_flow_item_tcp tcp; 351 } l4_mask; 352 struct rte_flow_action_queue queue; 353 }; 354 355 /* Verbs specification header. */ 356 struct ibv_spec_header { 357 enum ibv_flow_spec_type type; 358 uint16_t size; 359 }; 360 361 /* 362 * Number of sub priorities. 363 * For each kind of pattern matching i.e. L2, L3, L4 to have a correct 364 * matching on the NIC (firmware dependent) L4 most have the higher priority 365 * followed by L3 and ending with L2. 366 */ 367 #define MLX5_PRIORITY_MAP_L2 2 368 #define MLX5_PRIORITY_MAP_L3 1 369 #define MLX5_PRIORITY_MAP_L4 0 370 #define MLX5_PRIORITY_MAP_MAX 3 371 372 /* Map of Verbs to Flow priority with 8 Verbs priorities. */ 373 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = { 374 { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 }, 375 }; 376 377 /* Map of Verbs to Flow priority with 16 Verbs priorities. */ 378 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = { 379 { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 }, 380 { 9, 10, 11 }, { 12, 13, 14 }, 381 }; 382 383 /* Tunnel information. */ 384 struct mlx5_flow_tunnel_info { 385 uint32_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */ 386 uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */ 387 }; 388 389 static struct mlx5_flow_tunnel_info tunnels_info[] = { 390 { 391 .tunnel = MLX5_FLOW_LAYER_VXLAN, 392 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP, 393 }, 394 { 395 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE, 396 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP, 397 }, 398 { 399 .tunnel = MLX5_FLOW_LAYER_GRE, 400 .ptype = RTE_PTYPE_TUNNEL_GRE, 401 }, 402 { 403 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP, 404 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP, 405 }, 406 { 407 .tunnel = MLX5_FLOW_LAYER_MPLS, 408 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE, 409 }, 410 }; 411 412 /** 413 * Discover the maximum number of priority available. 414 * 415 * @param[in] dev 416 * Pointer to Ethernet device. 417 * 418 * @return 419 * number of supported flow priority on success, a negative errno 420 * value otherwise and rte_errno is set. 421 */ 422 int 423 mlx5_flow_discover_priorities(struct rte_eth_dev *dev) 424 { 425 struct { 426 struct ibv_flow_attr attr; 427 struct ibv_flow_spec_eth eth; 428 struct ibv_flow_spec_action_drop drop; 429 } flow_attr = { 430 .attr = { 431 .num_of_specs = 2, 432 }, 433 .eth = { 434 .type = IBV_FLOW_SPEC_ETH, 435 .size = sizeof(struct ibv_flow_spec_eth), 436 }, 437 .drop = { 438 .size = sizeof(struct ibv_flow_spec_action_drop), 439 .type = IBV_FLOW_SPEC_ACTION_DROP, 440 }, 441 }; 442 struct ibv_flow *flow; 443 struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev); 444 uint16_t vprio[] = { 8, 16 }; 445 int i; 446 int priority = 0; 447 448 if (!drop) { 449 rte_errno = ENOTSUP; 450 return -rte_errno; 451 } 452 for (i = 0; i != RTE_DIM(vprio); i++) { 453 flow_attr.attr.priority = vprio[i] - 1; 454 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr); 455 if (!flow) 456 break; 457 claim_zero(mlx5_glue->destroy_flow(flow)); 458 priority = vprio[i]; 459 } 460 switch (priority) { 461 case 8: 462 priority = RTE_DIM(priority_map_3); 463 break; 464 case 16: 465 priority = RTE_DIM(priority_map_5); 466 break; 467 default: 468 rte_errno = ENOTSUP; 469 DRV_LOG(ERR, 470 "port %u verbs maximum priority: %d expected 8/16", 471 dev->data->port_id, vprio[i]); 472 return -rte_errno; 473 } 474 mlx5_hrxq_drop_release(dev); 475 DRV_LOG(INFO, "port %u flow maximum priority: %d", 476 dev->data->port_id, priority); 477 return priority; 478 } 479 480 /** 481 * Adjust flow priority. 482 * 483 * @param dev 484 * Pointer to Ethernet device. 485 * @param flow 486 * Pointer to an rte flow. 487 */ 488 static void 489 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow) 490 { 491 struct priv *priv = dev->data->dev_private; 492 uint32_t priority = flow->attributes.priority; 493 uint32_t subpriority = flow->cur_verbs->attr->priority; 494 495 switch (priv->config.flow_prio) { 496 case RTE_DIM(priority_map_3): 497 priority = priority_map_3[priority][subpriority]; 498 break; 499 case RTE_DIM(priority_map_5): 500 priority = priority_map_5[priority][subpriority]; 501 break; 502 } 503 flow->cur_verbs->attr->priority = priority; 504 } 505 506 /** 507 * Get a flow counter. 508 * 509 * @param[in] dev 510 * Pointer to Ethernet device. 511 * @param[in] shared 512 * Indicate if this counter is shared with other flows. 513 * @param[in] id 514 * Counter identifier. 515 * 516 * @return 517 * A pointer to the counter, NULL otherwise and rte_errno is set. 518 */ 519 static struct mlx5_flow_counter * 520 mlx5_flow_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id) 521 { 522 struct priv *priv = dev->data->dev_private; 523 struct mlx5_flow_counter *cnt; 524 525 LIST_FOREACH(cnt, &priv->flow_counters, next) { 526 if (!cnt->shared || cnt->shared != shared) 527 continue; 528 if (cnt->id != id) 529 continue; 530 cnt->ref_cnt++; 531 return cnt; 532 } 533 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 534 535 struct mlx5_flow_counter tmpl = { 536 .shared = shared, 537 .id = id, 538 .cs = mlx5_glue->create_counter_set 539 (priv->ctx, 540 &(struct ibv_counter_set_init_attr){ 541 .counter_set_id = id, 542 }), 543 .hits = 0, 544 .bytes = 0, 545 }; 546 547 if (!tmpl.cs) { 548 rte_errno = errno; 549 return NULL; 550 } 551 cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0); 552 if (!cnt) { 553 rte_errno = ENOMEM; 554 return NULL; 555 } 556 *cnt = tmpl; 557 LIST_INSERT_HEAD(&priv->flow_counters, cnt, next); 558 return cnt; 559 #endif 560 rte_errno = ENOTSUP; 561 return NULL; 562 } 563 564 /** 565 * Release a flow counter. 566 * 567 * @param[in] counter 568 * Pointer to the counter handler. 569 */ 570 static void 571 mlx5_flow_counter_release(struct mlx5_flow_counter *counter) 572 { 573 if (--counter->ref_cnt == 0) { 574 claim_zero(mlx5_glue->destroy_counter_set(counter->cs)); 575 LIST_REMOVE(counter, next); 576 rte_free(counter); 577 } 578 } 579 580 /** 581 * Verify the @p attributes will be correctly understood by the NIC and store 582 * them in the @p flow if everything is correct. 583 * 584 * @param[in] dev 585 * Pointer to Ethernet device. 586 * @param[in] attributes 587 * Pointer to flow attributes 588 * @param[in, out] flow 589 * Pointer to the rte_flow structure. 590 * @param[out] error 591 * Pointer to error structure. 592 * 593 * @return 594 * 0 on success, a negative errno value otherwise and rte_errno is set. 595 */ 596 static int 597 mlx5_flow_attributes(struct rte_eth_dev *dev, 598 const struct rte_flow_attr *attributes, 599 struct rte_flow *flow, 600 struct rte_flow_error *error) 601 { 602 uint32_t priority_max = 603 ((struct priv *)dev->data->dev_private)->config.flow_prio - 1; 604 605 if (attributes->group) 606 return rte_flow_error_set(error, ENOTSUP, 607 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 608 NULL, 609 "groups is not supported"); 610 if (attributes->priority != MLX5_FLOW_PRIO_RSVD && 611 attributes->priority >= priority_max) 612 return rte_flow_error_set(error, ENOTSUP, 613 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 614 NULL, 615 "priority out of range"); 616 if (attributes->egress) 617 return rte_flow_error_set(error, ENOTSUP, 618 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, 619 NULL, 620 "egress is not supported"); 621 if (attributes->transfer) 622 return rte_flow_error_set(error, ENOTSUP, 623 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 624 NULL, 625 "transfer is not supported"); 626 if (!attributes->ingress) 627 return rte_flow_error_set(error, ENOTSUP, 628 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, 629 NULL, 630 "ingress attribute is mandatory"); 631 flow->attributes = *attributes; 632 if (attributes->priority == MLX5_FLOW_PRIO_RSVD) 633 flow->attributes.priority = priority_max; 634 return 0; 635 } 636 637 /** 638 * Verify the @p item specifications (spec, last, mask) are compatible with the 639 * NIC capabilities. 640 * 641 * @param[in] item 642 * Item specification. 643 * @param[in] mask 644 * @p item->mask or flow default bit-masks. 645 * @param[in] nic_mask 646 * Bit-masks covering supported fields by the NIC to compare with user mask. 647 * @param[in] size 648 * Bit-masks size in bytes. 649 * @param[out] error 650 * Pointer to error structure. 651 * 652 * @return 653 * 0 on success, a negative errno value otherwise and rte_errno is set. 654 */ 655 static int 656 mlx5_flow_item_acceptable(const struct rte_flow_item *item, 657 const uint8_t *mask, 658 const uint8_t *nic_mask, 659 unsigned int size, 660 struct rte_flow_error *error) 661 { 662 unsigned int i; 663 664 assert(nic_mask); 665 for (i = 0; i < size; ++i) 666 if ((nic_mask[i] | mask[i]) != nic_mask[i]) 667 return rte_flow_error_set(error, ENOTSUP, 668 RTE_FLOW_ERROR_TYPE_ITEM, 669 item, 670 "mask enables non supported" 671 " bits"); 672 if (!item->spec && (item->mask || item->last)) 673 return rte_flow_error_set(error, EINVAL, 674 RTE_FLOW_ERROR_TYPE_ITEM, 675 item, 676 "mask/last without a spec is not" 677 " supported"); 678 if (item->spec && item->last) { 679 uint8_t spec[size]; 680 uint8_t last[size]; 681 unsigned int i; 682 int ret; 683 684 for (i = 0; i < size; ++i) { 685 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i]; 686 last[i] = ((const uint8_t *)item->last)[i] & mask[i]; 687 } 688 ret = memcmp(spec, last, size); 689 if (ret != 0) 690 return rte_flow_error_set(error, ENOTSUP, 691 RTE_FLOW_ERROR_TYPE_ITEM, 692 item, 693 "range is not supported"); 694 } 695 return 0; 696 } 697 698 /** 699 * Add a verbs item specification into @p flow. 700 * 701 * @param[in, out] flow 702 * Pointer to flow structure. 703 * @param[in] src 704 * Create specification. 705 * @param[in] size 706 * Size in bytes of the specification to copy. 707 */ 708 static void 709 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size) 710 { 711 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 712 713 if (verbs->specs) { 714 void *dst; 715 716 dst = (void *)(verbs->specs + verbs->size); 717 memcpy(dst, src, size); 718 ++verbs->attr->num_of_specs; 719 } 720 verbs->size += size; 721 } 722 723 /** 724 * Adjust verbs hash fields according to the @p flow information. 725 * 726 * @param[in, out] flow. 727 * Pointer to flow structure. 728 * @param[in] tunnel 729 * 1 when the hash field is for a tunnel item. 730 * @param[in] layer_types 731 * ETH_RSS_* types. 732 * @param[in] hash_fields 733 * Item hash fields. 734 */ 735 static void 736 mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow, 737 int tunnel __rte_unused, 738 uint32_t layer_types, uint64_t hash_fields) 739 { 740 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 741 hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0); 742 if (flow->rss.level == 2 && !tunnel) 743 hash_fields = 0; 744 else if (flow->rss.level < 2 && tunnel) 745 hash_fields = 0; 746 #endif 747 if (!(flow->rss.types & layer_types)) 748 hash_fields = 0; 749 flow->cur_verbs->hash_fields |= hash_fields; 750 } 751 752 /** 753 * Convert the @p item into a Verbs specification after ensuring the NIC 754 * will understand and process it correctly. 755 * If the necessary size for the conversion is greater than the @p flow_size, 756 * nothing is written in @p flow, the validation is still performed. 757 * 758 * @param[in] item 759 * Item specification. 760 * @param[in, out] flow 761 * Pointer to flow structure. 762 * @param[in] flow_size 763 * Size in bytes of the available space in @p flow, if too small, nothing is 764 * written. 765 * @param[out] error 766 * Pointer to error structure. 767 * 768 * @return 769 * On success the number of bytes consumed/necessary, if the returned value 770 * is lesser or equal to @p flow_size, the @p item has fully been converted, 771 * otherwise another call with this returned memory size should be done. 772 * On error, a negative errno value is returned and rte_errno is set. 773 */ 774 static int 775 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow, 776 const size_t flow_size, struct rte_flow_error *error) 777 { 778 const struct rte_flow_item_eth *spec = item->spec; 779 const struct rte_flow_item_eth *mask = item->mask; 780 const struct rte_flow_item_eth nic_mask = { 781 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 782 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", 783 .type = RTE_BE16(0xffff), 784 }; 785 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 786 const unsigned int size = sizeof(struct ibv_flow_spec_eth); 787 struct ibv_flow_spec_eth eth = { 788 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 789 .size = size, 790 }; 791 int ret; 792 793 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 794 MLX5_FLOW_LAYER_OUTER_L2)) 795 return rte_flow_error_set(error, ENOTSUP, 796 RTE_FLOW_ERROR_TYPE_ITEM, 797 item, 798 "L2 layers already configured"); 799 if (!mask) 800 mask = &rte_flow_item_eth_mask; 801 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 802 (const uint8_t *)&nic_mask, 803 sizeof(struct rte_flow_item_eth), 804 error); 805 if (ret) 806 return ret; 807 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 808 MLX5_FLOW_LAYER_OUTER_L2; 809 if (size > flow_size) 810 return size; 811 if (spec) { 812 unsigned int i; 813 814 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN); 815 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN); 816 eth.val.ether_type = spec->type; 817 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN); 818 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN); 819 eth.mask.ether_type = mask->type; 820 /* Remove unwanted bits from values. */ 821 for (i = 0; i < ETHER_ADDR_LEN; ++i) { 822 eth.val.dst_mac[i] &= eth.mask.dst_mac[i]; 823 eth.val.src_mac[i] &= eth.mask.src_mac[i]; 824 } 825 eth.val.ether_type &= eth.mask.ether_type; 826 } 827 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 828 mlx5_flow_spec_verbs_add(flow, ð, size); 829 return size; 830 } 831 832 /** 833 * Update the VLAN tag in the Verbs Ethernet specification. 834 * 835 * @param[in, out] attr 836 * Pointer to Verbs attributes structure. 837 * @param[in] eth 838 * Verbs structure containing the VLAN information to copy. 839 */ 840 static void 841 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr, 842 struct ibv_flow_spec_eth *eth) 843 { 844 unsigned int i; 845 const enum ibv_flow_spec_type search = eth->type; 846 struct ibv_spec_header *hdr = (struct ibv_spec_header *) 847 ((uint8_t *)attr + sizeof(struct ibv_flow_attr)); 848 849 for (i = 0; i != attr->num_of_specs; ++i) { 850 if (hdr->type == search) { 851 struct ibv_flow_spec_eth *e = 852 (struct ibv_flow_spec_eth *)hdr; 853 854 e->val.vlan_tag = eth->val.vlan_tag; 855 e->mask.vlan_tag = eth->mask.vlan_tag; 856 e->val.ether_type = eth->val.ether_type; 857 e->mask.ether_type = eth->mask.ether_type; 858 break; 859 } 860 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size); 861 } 862 } 863 864 /** 865 * Convert the @p item into @p flow (or by updating the already present 866 * Ethernet Verbs) specification after ensuring the NIC will understand and 867 * process it correctly. 868 * If the necessary size for the conversion is greater than the @p flow_size, 869 * nothing is written in @p flow, the validation is still performed. 870 * 871 * @param[in] item 872 * Item specification. 873 * @param[in, out] flow 874 * Pointer to flow structure. 875 * @param[in] flow_size 876 * Size in bytes of the available space in @p flow, if too small, nothing is 877 * written. 878 * @param[out] error 879 * Pointer to error structure. 880 * 881 * @return 882 * On success the number of bytes consumed/necessary, if the returned value 883 * is lesser or equal to @p flow_size, the @p item has fully been converted, 884 * otherwise another call with this returned memory size should be done. 885 * On error, a negative errno value is returned and rte_errno is set. 886 */ 887 static int 888 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow, 889 const size_t flow_size, struct rte_flow_error *error) 890 { 891 const struct rte_flow_item_vlan *spec = item->spec; 892 const struct rte_flow_item_vlan *mask = item->mask; 893 const struct rte_flow_item_vlan nic_mask = { 894 .tci = RTE_BE16(0x0fff), 895 .inner_type = RTE_BE16(0xffff), 896 }; 897 unsigned int size = sizeof(struct ibv_flow_spec_eth); 898 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 899 struct ibv_flow_spec_eth eth = { 900 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 901 .size = size, 902 }; 903 int ret; 904 const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | 905 MLX5_FLOW_LAYER_INNER_L4) : 906 (MLX5_FLOW_LAYER_OUTER_L3 | MLX5_FLOW_LAYER_OUTER_L4); 907 const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : 908 MLX5_FLOW_LAYER_OUTER_VLAN; 909 const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 910 MLX5_FLOW_LAYER_OUTER_L2; 911 912 if (flow->layers & vlanm) 913 return rte_flow_error_set(error, ENOTSUP, 914 RTE_FLOW_ERROR_TYPE_ITEM, 915 item, 916 "VLAN layer already configured"); 917 else if ((flow->layers & l34m) != 0) 918 return rte_flow_error_set(error, ENOTSUP, 919 RTE_FLOW_ERROR_TYPE_ITEM, 920 item, 921 "L2 layer cannot follow L3/L4 layer"); 922 if (!mask) 923 mask = &rte_flow_item_vlan_mask; 924 ret = mlx5_flow_item_acceptable 925 (item, (const uint8_t *)mask, 926 (const uint8_t *)&nic_mask, 927 sizeof(struct rte_flow_item_vlan), error); 928 if (ret) 929 return ret; 930 if (spec) { 931 eth.val.vlan_tag = spec->tci; 932 eth.mask.vlan_tag = mask->tci; 933 eth.val.vlan_tag &= eth.mask.vlan_tag; 934 eth.val.ether_type = spec->inner_type; 935 eth.mask.ether_type = mask->inner_type; 936 eth.val.ether_type &= eth.mask.ether_type; 937 } 938 /* 939 * From verbs perspective an empty VLAN is equivalent 940 * to a packet without VLAN layer. 941 */ 942 if (!eth.mask.vlan_tag) 943 return rte_flow_error_set(error, EINVAL, 944 RTE_FLOW_ERROR_TYPE_ITEM_SPEC, 945 item->spec, 946 "VLAN cannot be empty"); 947 if (!(flow->layers & l2m)) { 948 if (size <= flow_size) { 949 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 950 mlx5_flow_spec_verbs_add(flow, ð, size); 951 } 952 } else { 953 if (flow->cur_verbs) 954 mlx5_flow_item_vlan_update(flow->cur_verbs->attr, 955 ð); 956 size = 0; /* Only an update is done in eth specification. */ 957 } 958 flow->layers |= tunnel ? 959 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) : 960 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN); 961 return size; 962 } 963 964 /** 965 * Convert the @p item into a Verbs specification after ensuring the NIC 966 * will understand and process it correctly. 967 * If the necessary size for the conversion is greater than the @p flow_size, 968 * nothing is written in @p flow, the validation is still performed. 969 * 970 * @param[in] item 971 * Item specification. 972 * @param[in, out] flow 973 * Pointer to flow structure. 974 * @param[in] flow_size 975 * Size in bytes of the available space in @p flow, if too small, nothing is 976 * written. 977 * @param[out] error 978 * Pointer to error structure. 979 * 980 * @return 981 * On success the number of bytes consumed/necessary, if the returned value 982 * is lesser or equal to @p flow_size, the @p item has fully been converted, 983 * otherwise another call with this returned memory size should be done. 984 * On error, a negative errno value is returned and rte_errno is set. 985 */ 986 static int 987 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow, 988 const size_t flow_size, struct rte_flow_error *error) 989 { 990 const struct rte_flow_item_ipv4 *spec = item->spec; 991 const struct rte_flow_item_ipv4 *mask = item->mask; 992 const struct rte_flow_item_ipv4 nic_mask = { 993 .hdr = { 994 .src_addr = RTE_BE32(0xffffffff), 995 .dst_addr = RTE_BE32(0xffffffff), 996 .type_of_service = 0xff, 997 .next_proto_id = 0xff, 998 }, 999 }; 1000 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 1001 unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext); 1002 struct ibv_flow_spec_ipv4_ext ipv4 = { 1003 .type = IBV_FLOW_SPEC_IPV4_EXT | 1004 (tunnel ? IBV_FLOW_SPEC_INNER : 0), 1005 .size = size, 1006 }; 1007 int ret; 1008 1009 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1010 MLX5_FLOW_LAYER_OUTER_L3)) 1011 return rte_flow_error_set(error, ENOTSUP, 1012 RTE_FLOW_ERROR_TYPE_ITEM, 1013 item, 1014 "multiple L3 layers not supported"); 1015 else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1016 MLX5_FLOW_LAYER_OUTER_L4)) 1017 return rte_flow_error_set(error, ENOTSUP, 1018 RTE_FLOW_ERROR_TYPE_ITEM, 1019 item, 1020 "L3 cannot follow an L4 layer."); 1021 if (!mask) 1022 mask = &rte_flow_item_ipv4_mask; 1023 ret = mlx5_flow_item_acceptable 1024 (item, (const uint8_t *)mask, 1025 (const uint8_t *)&nic_mask, 1026 sizeof(struct rte_flow_item_ipv4), error); 1027 if (ret < 0) 1028 return ret; 1029 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : 1030 MLX5_FLOW_LAYER_OUTER_L3_IPV4; 1031 if (spec) { 1032 ipv4.val = (struct ibv_flow_ipv4_ext_filter){ 1033 .src_ip = spec->hdr.src_addr, 1034 .dst_ip = spec->hdr.dst_addr, 1035 .proto = spec->hdr.next_proto_id, 1036 .tos = spec->hdr.type_of_service, 1037 }; 1038 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){ 1039 .src_ip = mask->hdr.src_addr, 1040 .dst_ip = mask->hdr.dst_addr, 1041 .proto = mask->hdr.next_proto_id, 1042 .tos = mask->hdr.type_of_service, 1043 }; 1044 /* Remove unwanted bits from values. */ 1045 ipv4.val.src_ip &= ipv4.mask.src_ip; 1046 ipv4.val.dst_ip &= ipv4.mask.dst_ip; 1047 ipv4.val.proto &= ipv4.mask.proto; 1048 ipv4.val.tos &= ipv4.mask.tos; 1049 } 1050 flow->l3_protocol_en = !!ipv4.mask.proto; 1051 flow->l3_protocol = ipv4.val.proto; 1052 if (size <= flow_size) { 1053 mlx5_flow_verbs_hashfields_adjust 1054 (flow, tunnel, 1055 (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 1056 ETH_RSS_NONFRAG_IPV4_OTHER), 1057 (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4)); 1058 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3; 1059 mlx5_flow_spec_verbs_add(flow, &ipv4, size); 1060 } 1061 return size; 1062 } 1063 1064 /** 1065 * Convert the @p item into a Verbs specification after ensuring the NIC 1066 * will understand and process it correctly. 1067 * If the necessary size for the conversion is greater than the @p flow_size, 1068 * nothing is written in @p flow, the validation is still performed. 1069 * 1070 * @param[in] item 1071 * Item specification. 1072 * @param[in, out] flow 1073 * Pointer to flow structure. 1074 * @param[in] flow_size 1075 * Size in bytes of the available space in @p flow, if too small, nothing is 1076 * written. 1077 * @param[out] error 1078 * Pointer to error structure. 1079 * 1080 * @return 1081 * On success the number of bytes consumed/necessary, if the returned value 1082 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1083 * otherwise another call with this returned memory size should be done. 1084 * On error, a negative errno value is returned and rte_errno is set. 1085 */ 1086 static int 1087 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow, 1088 const size_t flow_size, struct rte_flow_error *error) 1089 { 1090 const struct rte_flow_item_ipv6 *spec = item->spec; 1091 const struct rte_flow_item_ipv6 *mask = item->mask; 1092 const struct rte_flow_item_ipv6 nic_mask = { 1093 .hdr = { 1094 .src_addr = 1095 "\xff\xff\xff\xff\xff\xff\xff\xff" 1096 "\xff\xff\xff\xff\xff\xff\xff\xff", 1097 .dst_addr = 1098 "\xff\xff\xff\xff\xff\xff\xff\xff" 1099 "\xff\xff\xff\xff\xff\xff\xff\xff", 1100 .vtc_flow = RTE_BE32(0xffffffff), 1101 .proto = 0xff, 1102 .hop_limits = 0xff, 1103 }, 1104 }; 1105 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 1106 unsigned int size = sizeof(struct ibv_flow_spec_ipv6); 1107 struct ibv_flow_spec_ipv6 ipv6 = { 1108 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 1109 .size = size, 1110 }; 1111 int ret; 1112 1113 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1114 MLX5_FLOW_LAYER_OUTER_L3)) 1115 return rte_flow_error_set(error, ENOTSUP, 1116 RTE_FLOW_ERROR_TYPE_ITEM, 1117 item, 1118 "multiple L3 layers not supported"); 1119 else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1120 MLX5_FLOW_LAYER_OUTER_L4)) 1121 return rte_flow_error_set(error, ENOTSUP, 1122 RTE_FLOW_ERROR_TYPE_ITEM, 1123 item, 1124 "L3 cannot follow an L4 layer."); 1125 /* 1126 * IPv6 is not recognised by the NIC inside a GRE tunnel. 1127 * Such support has to be disabled as the rule will be 1128 * accepted. Issue reproduced with Mellanox OFED 4.3-3.0.2.1 and 1129 * Mellanox OFED 4.4-1.0.0.0. 1130 */ 1131 if (tunnel && flow->layers & MLX5_FLOW_LAYER_GRE) 1132 return rte_flow_error_set(error, ENOTSUP, 1133 RTE_FLOW_ERROR_TYPE_ITEM, 1134 item, 1135 "IPv6 inside a GRE tunnel is" 1136 " not recognised."); 1137 if (!mask) 1138 mask = &rte_flow_item_ipv6_mask; 1139 ret = mlx5_flow_item_acceptable 1140 (item, (const uint8_t *)mask, 1141 (const uint8_t *)&nic_mask, 1142 sizeof(struct rte_flow_item_ipv6), error); 1143 if (ret < 0) 1144 return ret; 1145 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : 1146 MLX5_FLOW_LAYER_OUTER_L3_IPV6; 1147 if (spec) { 1148 unsigned int i; 1149 uint32_t vtc_flow_val; 1150 uint32_t vtc_flow_mask; 1151 1152 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr, 1153 RTE_DIM(ipv6.val.src_ip)); 1154 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr, 1155 RTE_DIM(ipv6.val.dst_ip)); 1156 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr, 1157 RTE_DIM(ipv6.mask.src_ip)); 1158 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr, 1159 RTE_DIM(ipv6.mask.dst_ip)); 1160 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow); 1161 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow); 1162 ipv6.val.flow_label = 1163 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >> 1164 IPV6_HDR_FL_SHIFT); 1165 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >> 1166 IPV6_HDR_TC_SHIFT; 1167 ipv6.val.next_hdr = spec->hdr.proto; 1168 ipv6.val.hop_limit = spec->hdr.hop_limits; 1169 ipv6.mask.flow_label = 1170 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >> 1171 IPV6_HDR_FL_SHIFT); 1172 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >> 1173 IPV6_HDR_TC_SHIFT; 1174 ipv6.mask.next_hdr = mask->hdr.proto; 1175 ipv6.mask.hop_limit = mask->hdr.hop_limits; 1176 /* Remove unwanted bits from values. */ 1177 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) { 1178 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i]; 1179 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i]; 1180 } 1181 ipv6.val.flow_label &= ipv6.mask.flow_label; 1182 ipv6.val.traffic_class &= ipv6.mask.traffic_class; 1183 ipv6.val.next_hdr &= ipv6.mask.next_hdr; 1184 ipv6.val.hop_limit &= ipv6.mask.hop_limit; 1185 } 1186 flow->l3_protocol_en = !!ipv6.mask.next_hdr; 1187 flow->l3_protocol = ipv6.val.next_hdr; 1188 if (size <= flow_size) { 1189 mlx5_flow_verbs_hashfields_adjust 1190 (flow, tunnel, 1191 (ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER), 1192 (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6)); 1193 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3; 1194 mlx5_flow_spec_verbs_add(flow, &ipv6, size); 1195 } 1196 return size; 1197 } 1198 1199 /** 1200 * Convert the @p item into a Verbs specification after ensuring the NIC 1201 * will understand and process it correctly. 1202 * If the necessary size for the conversion is greater than the @p flow_size, 1203 * nothing is written in @p flow, the validation is still performed. 1204 * 1205 * @param[in] item 1206 * Item specification. 1207 * @param[in, out] flow 1208 * Pointer to flow structure. 1209 * @param[in] flow_size 1210 * Size in bytes of the available space in @p flow, if too small, nothing is 1211 * written. 1212 * @param[out] error 1213 * Pointer to error structure. 1214 * 1215 * @return 1216 * On success the number of bytes consumed/necessary, if the returned value 1217 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1218 * otherwise another call with this returned memory size should be done. 1219 * On error, a negative errno value is returned and rte_errno is set. 1220 */ 1221 static int 1222 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow, 1223 const size_t flow_size, struct rte_flow_error *error) 1224 { 1225 const struct rte_flow_item_udp *spec = item->spec; 1226 const struct rte_flow_item_udp *mask = item->mask; 1227 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 1228 unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp); 1229 struct ibv_flow_spec_tcp_udp udp = { 1230 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 1231 .size = size, 1232 }; 1233 int ret; 1234 1235 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP) 1236 return rte_flow_error_set(error, ENOTSUP, 1237 RTE_FLOW_ERROR_TYPE_ITEM, 1238 item, 1239 "protocol filtering not compatible" 1240 " with UDP layer"); 1241 if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1242 MLX5_FLOW_LAYER_OUTER_L3))) 1243 return rte_flow_error_set(error, ENOTSUP, 1244 RTE_FLOW_ERROR_TYPE_ITEM, 1245 item, 1246 "L3 is mandatory to filter" 1247 " on L4"); 1248 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1249 MLX5_FLOW_LAYER_OUTER_L4)) 1250 return rte_flow_error_set(error, ENOTSUP, 1251 RTE_FLOW_ERROR_TYPE_ITEM, 1252 item, 1253 "L4 layer is already" 1254 " present"); 1255 if (!mask) 1256 mask = &rte_flow_item_udp_mask; 1257 ret = mlx5_flow_item_acceptable 1258 (item, (const uint8_t *)mask, 1259 (const uint8_t *)&rte_flow_item_udp_mask, 1260 sizeof(struct rte_flow_item_udp), error); 1261 if (ret < 0) 1262 return ret; 1263 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP : 1264 MLX5_FLOW_LAYER_OUTER_L4_UDP; 1265 if (spec) { 1266 udp.val.dst_port = spec->hdr.dst_port; 1267 udp.val.src_port = spec->hdr.src_port; 1268 udp.mask.dst_port = mask->hdr.dst_port; 1269 udp.mask.src_port = mask->hdr.src_port; 1270 /* Remove unwanted bits from values. */ 1271 udp.val.src_port &= udp.mask.src_port; 1272 udp.val.dst_port &= udp.mask.dst_port; 1273 } 1274 if (size <= flow_size) { 1275 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_UDP, 1276 (IBV_RX_HASH_SRC_PORT_UDP | 1277 IBV_RX_HASH_DST_PORT_UDP)); 1278 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4; 1279 mlx5_flow_spec_verbs_add(flow, &udp, size); 1280 } 1281 return size; 1282 } 1283 1284 /** 1285 * Convert the @p item into a Verbs specification after ensuring the NIC 1286 * will understand and process it correctly. 1287 * If the necessary size for the conversion is greater than the @p flow_size, 1288 * nothing is written in @p flow, the validation is still performed. 1289 * 1290 * @param[in] item 1291 * Item specification. 1292 * @param[in, out] flow 1293 * Pointer to flow structure. 1294 * @param[in] flow_size 1295 * Size in bytes of the available space in @p flow, if too small, nothing is 1296 * written. 1297 * @param[out] error 1298 * Pointer to error structure. 1299 * 1300 * @return 1301 * On success the number of bytes consumed/necessary, if the returned value 1302 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1303 * otherwise another call with this returned memory size should be done. 1304 * On error, a negative errno value is returned and rte_errno is set. 1305 */ 1306 static int 1307 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow, 1308 const size_t flow_size, struct rte_flow_error *error) 1309 { 1310 const struct rte_flow_item_tcp *spec = item->spec; 1311 const struct rte_flow_item_tcp *mask = item->mask; 1312 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 1313 unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp); 1314 struct ibv_flow_spec_tcp_udp tcp = { 1315 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 1316 .size = size, 1317 }; 1318 int ret; 1319 1320 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP) 1321 return rte_flow_error_set(error, ENOTSUP, 1322 RTE_FLOW_ERROR_TYPE_ITEM, 1323 item, 1324 "protocol filtering not compatible" 1325 " with TCP layer"); 1326 if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1327 MLX5_FLOW_LAYER_OUTER_L3))) 1328 return rte_flow_error_set(error, ENOTSUP, 1329 RTE_FLOW_ERROR_TYPE_ITEM, 1330 item, 1331 "L3 is mandatory to filter on L4"); 1332 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1333 MLX5_FLOW_LAYER_OUTER_L4)) 1334 return rte_flow_error_set(error, ENOTSUP, 1335 RTE_FLOW_ERROR_TYPE_ITEM, 1336 item, 1337 "L4 layer is already present"); 1338 if (!mask) 1339 mask = &rte_flow_item_tcp_mask; 1340 ret = mlx5_flow_item_acceptable 1341 (item, (const uint8_t *)mask, 1342 (const uint8_t *)&rte_flow_item_tcp_mask, 1343 sizeof(struct rte_flow_item_tcp), error); 1344 if (ret < 0) 1345 return ret; 1346 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP : 1347 MLX5_FLOW_LAYER_OUTER_L4_TCP; 1348 if (spec) { 1349 tcp.val.dst_port = spec->hdr.dst_port; 1350 tcp.val.src_port = spec->hdr.src_port; 1351 tcp.mask.dst_port = mask->hdr.dst_port; 1352 tcp.mask.src_port = mask->hdr.src_port; 1353 /* Remove unwanted bits from values. */ 1354 tcp.val.src_port &= tcp.mask.src_port; 1355 tcp.val.dst_port &= tcp.mask.dst_port; 1356 } 1357 if (size <= flow_size) { 1358 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_TCP, 1359 (IBV_RX_HASH_SRC_PORT_TCP | 1360 IBV_RX_HASH_DST_PORT_TCP)); 1361 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4; 1362 mlx5_flow_spec_verbs_add(flow, &tcp, size); 1363 } 1364 return size; 1365 } 1366 1367 /** 1368 * Convert the @p item into a Verbs specification after ensuring the NIC 1369 * will understand and process it correctly. 1370 * If the necessary size for the conversion is greater than the @p flow_size, 1371 * nothing is written in @p flow, the validation is still performed. 1372 * 1373 * @param[in] item 1374 * Item specification. 1375 * @param[in, out] flow 1376 * Pointer to flow structure. 1377 * @param[in] flow_size 1378 * Size in bytes of the available space in @p flow, if too small, nothing is 1379 * written. 1380 * @param[out] error 1381 * Pointer to error structure. 1382 * 1383 * @return 1384 * On success the number of bytes consumed/necessary, if the returned value 1385 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1386 * otherwise another call with this returned memory size should be done. 1387 * On error, a negative errno value is returned and rte_errno is set. 1388 */ 1389 static int 1390 mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow, 1391 const size_t flow_size, struct rte_flow_error *error) 1392 { 1393 const struct rte_flow_item_vxlan *spec = item->spec; 1394 const struct rte_flow_item_vxlan *mask = item->mask; 1395 unsigned int size = sizeof(struct ibv_flow_spec_tunnel); 1396 struct ibv_flow_spec_tunnel vxlan = { 1397 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL, 1398 .size = size, 1399 }; 1400 int ret; 1401 union vni { 1402 uint32_t vlan_id; 1403 uint8_t vni[4]; 1404 } id = { .vlan_id = 0, }; 1405 1406 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL) 1407 return rte_flow_error_set(error, ENOTSUP, 1408 RTE_FLOW_ERROR_TYPE_ITEM, 1409 item, 1410 "a tunnel is already present"); 1411 /* 1412 * Verify only UDPv4 is present as defined in 1413 * https://tools.ietf.org/html/rfc7348 1414 */ 1415 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 1416 return rte_flow_error_set(error, ENOTSUP, 1417 RTE_FLOW_ERROR_TYPE_ITEM, 1418 item, 1419 "no outer UDP layer found"); 1420 if (!mask) 1421 mask = &rte_flow_item_vxlan_mask; 1422 ret = mlx5_flow_item_acceptable 1423 (item, (const uint8_t *)mask, 1424 (const uint8_t *)&rte_flow_item_vxlan_mask, 1425 sizeof(struct rte_flow_item_vxlan), error); 1426 if (ret < 0) 1427 return ret; 1428 if (spec) { 1429 memcpy(&id.vni[1], spec->vni, 3); 1430 vxlan.val.tunnel_id = id.vlan_id; 1431 memcpy(&id.vni[1], mask->vni, 3); 1432 vxlan.mask.tunnel_id = id.vlan_id; 1433 /* Remove unwanted bits from values. */ 1434 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id; 1435 } 1436 /* 1437 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if 1438 * only this layer is defined in the Verbs specification it is 1439 * interpreted as wildcard and all packets will match this 1440 * rule, if it follows a full stack layer (ex: eth / ipv4 / 1441 * udp), all packets matching the layers before will also 1442 * match this rule. To avoid such situation, VNI 0 is 1443 * currently refused. 1444 */ 1445 if (!vxlan.val.tunnel_id) 1446 return rte_flow_error_set(error, EINVAL, 1447 RTE_FLOW_ERROR_TYPE_ITEM, 1448 item, 1449 "VXLAN vni cannot be 0"); 1450 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER)) 1451 return rte_flow_error_set(error, EINVAL, 1452 RTE_FLOW_ERROR_TYPE_ITEM, 1453 item, 1454 "VXLAN tunnel must be fully defined"); 1455 if (size <= flow_size) { 1456 mlx5_flow_spec_verbs_add(flow, &vxlan, size); 1457 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1458 } 1459 flow->layers |= MLX5_FLOW_LAYER_VXLAN; 1460 return size; 1461 } 1462 1463 /** 1464 * Convert the @p item into a Verbs specification after ensuring the NIC 1465 * will understand and process it correctly. 1466 * If the necessary size for the conversion is greater than the @p flow_size, 1467 * nothing is written in @p flow, the validation is still performed. 1468 * 1469 * @param dev 1470 * Pointer to Ethernet device. 1471 * @param[in] item 1472 * Item specification. 1473 * @param[in, out] flow 1474 * Pointer to flow structure. 1475 * @param[in] flow_size 1476 * Size in bytes of the available space in @p flow, if too small, nothing is 1477 * written. 1478 * @param[out] error 1479 * Pointer to error structure. 1480 * 1481 * @return 1482 * On success the number of bytes consumed/necessary, if the returned value 1483 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1484 * otherwise another call with this returned memory size should be done. 1485 * On error, a negative errno value is returned and rte_errno is set. 1486 */ 1487 static int 1488 mlx5_flow_item_vxlan_gpe(struct rte_eth_dev *dev, 1489 const struct rte_flow_item *item, 1490 struct rte_flow *flow, const size_t flow_size, 1491 struct rte_flow_error *error) 1492 { 1493 const struct rte_flow_item_vxlan_gpe *spec = item->spec; 1494 const struct rte_flow_item_vxlan_gpe *mask = item->mask; 1495 unsigned int size = sizeof(struct ibv_flow_spec_tunnel); 1496 struct ibv_flow_spec_tunnel vxlan_gpe = { 1497 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL, 1498 .size = size, 1499 }; 1500 int ret; 1501 union vni { 1502 uint32_t vlan_id; 1503 uint8_t vni[4]; 1504 } id = { .vlan_id = 0, }; 1505 1506 if (!((struct priv *)dev->data->dev_private)->config.l3_vxlan_en) 1507 return rte_flow_error_set(error, ENOTSUP, 1508 RTE_FLOW_ERROR_TYPE_ITEM, 1509 item, 1510 "L3 VXLAN is not enabled by device" 1511 " parameter and/or not configured in" 1512 " firmware"); 1513 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL) 1514 return rte_flow_error_set(error, ENOTSUP, 1515 RTE_FLOW_ERROR_TYPE_ITEM, 1516 item, 1517 "a tunnel is already present"); 1518 /* 1519 * Verify only UDPv4 is present as defined in 1520 * https://tools.ietf.org/html/rfc7348 1521 */ 1522 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 1523 return rte_flow_error_set(error, ENOTSUP, 1524 RTE_FLOW_ERROR_TYPE_ITEM, 1525 item, 1526 "no outer UDP layer found"); 1527 if (!mask) 1528 mask = &rte_flow_item_vxlan_gpe_mask; 1529 ret = mlx5_flow_item_acceptable 1530 (item, (const uint8_t *)mask, 1531 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask, 1532 sizeof(struct rte_flow_item_vxlan_gpe), error); 1533 if (ret < 0) 1534 return ret; 1535 if (spec) { 1536 memcpy(&id.vni[1], spec->vni, 3); 1537 vxlan_gpe.val.tunnel_id = id.vlan_id; 1538 memcpy(&id.vni[1], mask->vni, 3); 1539 vxlan_gpe.mask.tunnel_id = id.vlan_id; 1540 if (spec->protocol) 1541 return rte_flow_error_set 1542 (error, EINVAL, 1543 RTE_FLOW_ERROR_TYPE_ITEM, 1544 item, 1545 "VxLAN-GPE protocol not supported"); 1546 /* Remove unwanted bits from values. */ 1547 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id; 1548 } 1549 /* 1550 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this 1551 * layer is defined in the Verbs specification it is interpreted as 1552 * wildcard and all packets will match this rule, if it follows a full 1553 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers 1554 * before will also match this rule. To avoid such situation, VNI 0 1555 * is currently refused. 1556 */ 1557 if (!vxlan_gpe.val.tunnel_id) 1558 return rte_flow_error_set(error, EINVAL, 1559 RTE_FLOW_ERROR_TYPE_ITEM, 1560 item, 1561 "VXLAN-GPE vni cannot be 0"); 1562 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER)) 1563 return rte_flow_error_set(error, EINVAL, 1564 RTE_FLOW_ERROR_TYPE_ITEM, 1565 item, 1566 "VXLAN-GPE tunnel must be fully" 1567 " defined"); 1568 if (size <= flow_size) { 1569 mlx5_flow_spec_verbs_add(flow, &vxlan_gpe, size); 1570 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1571 } 1572 flow->layers |= MLX5_FLOW_LAYER_VXLAN_GPE; 1573 return size; 1574 } 1575 1576 /** 1577 * Update the protocol in Verbs IPv4/IPv6 spec. 1578 * 1579 * @param[in, out] attr 1580 * Pointer to Verbs attributes structure. 1581 * @param[in] search 1582 * Specification type to search in order to update the IP protocol. 1583 * @param[in] protocol 1584 * Protocol value to set if none is present in the specification. 1585 */ 1586 static void 1587 mlx5_flow_item_gre_ip_protocol_update(struct ibv_flow_attr *attr, 1588 enum ibv_flow_spec_type search, 1589 uint8_t protocol) 1590 { 1591 unsigned int i; 1592 struct ibv_spec_header *hdr = (struct ibv_spec_header *) 1593 ((uint8_t *)attr + sizeof(struct ibv_flow_attr)); 1594 1595 if (!attr) 1596 return; 1597 for (i = 0; i != attr->num_of_specs; ++i) { 1598 if (hdr->type == search) { 1599 union { 1600 struct ibv_flow_spec_ipv4_ext *ipv4; 1601 struct ibv_flow_spec_ipv6 *ipv6; 1602 } ip; 1603 1604 switch (search) { 1605 case IBV_FLOW_SPEC_IPV4_EXT: 1606 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr; 1607 if (!ip.ipv4->val.proto) { 1608 ip.ipv4->val.proto = protocol; 1609 ip.ipv4->mask.proto = 0xff; 1610 } 1611 break; 1612 case IBV_FLOW_SPEC_IPV6: 1613 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr; 1614 if (!ip.ipv6->val.next_hdr) { 1615 ip.ipv6->val.next_hdr = protocol; 1616 ip.ipv6->mask.next_hdr = 0xff; 1617 } 1618 break; 1619 default: 1620 break; 1621 } 1622 break; 1623 } 1624 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size); 1625 } 1626 } 1627 1628 /** 1629 * Convert the @p item into a Verbs specification after ensuring the NIC 1630 * will understand and process it correctly. 1631 * It will also update the previous L3 layer with the protocol value matching 1632 * the GRE. 1633 * If the necessary size for the conversion is greater than the @p flow_size, 1634 * nothing is written in @p flow, the validation is still performed. 1635 * 1636 * @param dev 1637 * Pointer to Ethernet device. 1638 * @param[in] item 1639 * Item specification. 1640 * @param[in, out] flow 1641 * Pointer to flow structure. 1642 * @param[in] flow_size 1643 * Size in bytes of the available space in @p flow, if too small, nothing is 1644 * written. 1645 * @param[out] error 1646 * Pointer to error structure. 1647 * 1648 * @return 1649 * On success the number of bytes consumed/necessary, if the returned value 1650 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1651 * otherwise another call with this returned memory size should be done. 1652 * On error, a negative errno value is returned and rte_errno is set. 1653 */ 1654 static int 1655 mlx5_flow_item_gre(const struct rte_flow_item *item, 1656 struct rte_flow *flow, const size_t flow_size, 1657 struct rte_flow_error *error) 1658 { 1659 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 1660 const struct rte_flow_item_gre *spec = item->spec; 1661 const struct rte_flow_item_gre *mask = item->mask; 1662 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 1663 unsigned int size = sizeof(struct ibv_flow_spec_gre); 1664 struct ibv_flow_spec_gre tunnel = { 1665 .type = IBV_FLOW_SPEC_GRE, 1666 .size = size, 1667 }; 1668 #else 1669 unsigned int size = sizeof(struct ibv_flow_spec_tunnel); 1670 struct ibv_flow_spec_tunnel tunnel = { 1671 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL, 1672 .size = size, 1673 }; 1674 #endif 1675 int ret; 1676 1677 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_GRE) 1678 return rte_flow_error_set(error, ENOTSUP, 1679 RTE_FLOW_ERROR_TYPE_ITEM, 1680 item, 1681 "protocol filtering not compatible" 1682 " with this GRE layer"); 1683 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL) 1684 return rte_flow_error_set(error, ENOTSUP, 1685 RTE_FLOW_ERROR_TYPE_ITEM, 1686 item, 1687 "a tunnel is already present"); 1688 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3)) 1689 return rte_flow_error_set(error, ENOTSUP, 1690 RTE_FLOW_ERROR_TYPE_ITEM, 1691 item, 1692 "L3 Layer is missing"); 1693 if (!mask) 1694 mask = &rte_flow_item_gre_mask; 1695 ret = mlx5_flow_item_acceptable 1696 (item, (const uint8_t *)mask, 1697 (const uint8_t *)&rte_flow_item_gre_mask, 1698 sizeof(struct rte_flow_item_gre), error); 1699 if (ret < 0) 1700 return ret; 1701 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 1702 if (spec) { 1703 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver; 1704 tunnel.val.protocol = spec->protocol; 1705 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver; 1706 tunnel.mask.protocol = mask->protocol; 1707 /* Remove unwanted bits from values. */ 1708 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver; 1709 tunnel.val.protocol &= tunnel.mask.protocol; 1710 tunnel.val.key &= tunnel.mask.key; 1711 } 1712 #else 1713 if (spec && (spec->protocol & mask->protocol)) 1714 return rte_flow_error_set(error, ENOTSUP, 1715 RTE_FLOW_ERROR_TYPE_ITEM, 1716 item, 1717 "without MPLS support the" 1718 " specification cannot be used for" 1719 " filtering"); 1720 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */ 1721 if (size <= flow_size) { 1722 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3_IPV4) 1723 mlx5_flow_item_gre_ip_protocol_update 1724 (verbs->attr, IBV_FLOW_SPEC_IPV4_EXT, 1725 MLX5_IP_PROTOCOL_GRE); 1726 else 1727 mlx5_flow_item_gre_ip_protocol_update 1728 (verbs->attr, IBV_FLOW_SPEC_IPV6, 1729 MLX5_IP_PROTOCOL_GRE); 1730 mlx5_flow_spec_verbs_add(flow, &tunnel, size); 1731 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1732 } 1733 flow->layers |= MLX5_FLOW_LAYER_GRE; 1734 return size; 1735 } 1736 1737 /** 1738 * Convert the @p item into a Verbs specification after ensuring the NIC 1739 * will understand and process it correctly. 1740 * If the necessary size for the conversion is greater than the @p flow_size, 1741 * nothing is written in @p flow, the validation is still performed. 1742 * 1743 * @param[in] item 1744 * Item specification. 1745 * @param[in, out] flow 1746 * Pointer to flow structure. 1747 * @param[in] flow_size 1748 * Size in bytes of the available space in @p flow, if too small, nothing is 1749 * written. 1750 * @param[out] error 1751 * Pointer to error structure. 1752 * 1753 * @return 1754 * On success the number of bytes consumed/necessary, if the returned value 1755 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1756 * otherwise another call with this returned memory size should be done. 1757 * On error, a negative errno value is returned and rte_errno is set. 1758 */ 1759 static int 1760 mlx5_flow_item_mpls(const struct rte_flow_item *item __rte_unused, 1761 struct rte_flow *flow __rte_unused, 1762 const size_t flow_size __rte_unused, 1763 struct rte_flow_error *error) 1764 { 1765 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 1766 const struct rte_flow_item_mpls *spec = item->spec; 1767 const struct rte_flow_item_mpls *mask = item->mask; 1768 unsigned int size = sizeof(struct ibv_flow_spec_mpls); 1769 struct ibv_flow_spec_mpls mpls = { 1770 .type = IBV_FLOW_SPEC_MPLS, 1771 .size = size, 1772 }; 1773 int ret; 1774 1775 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_MPLS) 1776 return rte_flow_error_set(error, ENOTSUP, 1777 RTE_FLOW_ERROR_TYPE_ITEM, 1778 item, 1779 "protocol filtering not compatible" 1780 " with MPLS layer"); 1781 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL) 1782 return rte_flow_error_set(error, ENOTSUP, 1783 RTE_FLOW_ERROR_TYPE_ITEM, 1784 item, 1785 "a tunnel is already" 1786 " present"); 1787 if (!mask) 1788 mask = &rte_flow_item_mpls_mask; 1789 ret = mlx5_flow_item_acceptable 1790 (item, (const uint8_t *)mask, 1791 (const uint8_t *)&rte_flow_item_mpls_mask, 1792 sizeof(struct rte_flow_item_mpls), error); 1793 if (ret < 0) 1794 return ret; 1795 if (spec) { 1796 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label)); 1797 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label)); 1798 /* Remove unwanted bits from values. */ 1799 mpls.val.label &= mpls.mask.label; 1800 } 1801 if (size <= flow_size) { 1802 mlx5_flow_spec_verbs_add(flow, &mpls, size); 1803 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1804 } 1805 flow->layers |= MLX5_FLOW_LAYER_MPLS; 1806 return size; 1807 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */ 1808 return rte_flow_error_set(error, ENOTSUP, 1809 RTE_FLOW_ERROR_TYPE_ITEM, 1810 item, 1811 "MPLS is not supported by Verbs, please" 1812 " update."); 1813 } 1814 1815 /** 1816 * Convert the @p pattern into a Verbs specifications after ensuring the NIC 1817 * will understand and process it correctly. 1818 * The conversion is performed item per item, each of them is written into 1819 * the @p flow if its size is lesser or equal to @p flow_size. 1820 * Validation and memory consumption computation are still performed until the 1821 * end of @p pattern, unless an error is encountered. 1822 * 1823 * @param[in] pattern 1824 * Flow pattern. 1825 * @param[in, out] flow 1826 * Pointer to the rte_flow structure. 1827 * @param[in] flow_size 1828 * Size in bytes of the available space in @p flow, if too small some 1829 * garbage may be present. 1830 * @param[out] error 1831 * Pointer to error structure. 1832 * 1833 * @return 1834 * On success the number of bytes consumed/necessary, if the returned value 1835 * is lesser or equal to @p flow_size, the @pattern has fully been 1836 * converted, otherwise another call with this returned memory size should 1837 * be done. 1838 * On error, a negative errno value is returned and rte_errno is set. 1839 */ 1840 static int 1841 mlx5_flow_items(struct rte_eth_dev *dev, 1842 const struct rte_flow_item pattern[], 1843 struct rte_flow *flow, const size_t flow_size, 1844 struct rte_flow_error *error) 1845 { 1846 int remain = flow_size; 1847 size_t size = 0; 1848 1849 for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) { 1850 int ret = 0; 1851 1852 switch (pattern->type) { 1853 case RTE_FLOW_ITEM_TYPE_VOID: 1854 break; 1855 case RTE_FLOW_ITEM_TYPE_ETH: 1856 ret = mlx5_flow_item_eth(pattern, flow, remain, error); 1857 break; 1858 case RTE_FLOW_ITEM_TYPE_VLAN: 1859 ret = mlx5_flow_item_vlan(pattern, flow, remain, error); 1860 break; 1861 case RTE_FLOW_ITEM_TYPE_IPV4: 1862 ret = mlx5_flow_item_ipv4(pattern, flow, remain, error); 1863 break; 1864 case RTE_FLOW_ITEM_TYPE_IPV6: 1865 ret = mlx5_flow_item_ipv6(pattern, flow, remain, error); 1866 break; 1867 case RTE_FLOW_ITEM_TYPE_UDP: 1868 ret = mlx5_flow_item_udp(pattern, flow, remain, error); 1869 break; 1870 case RTE_FLOW_ITEM_TYPE_TCP: 1871 ret = mlx5_flow_item_tcp(pattern, flow, remain, error); 1872 break; 1873 case RTE_FLOW_ITEM_TYPE_VXLAN: 1874 ret = mlx5_flow_item_vxlan(pattern, flow, remain, 1875 error); 1876 break; 1877 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: 1878 ret = mlx5_flow_item_vxlan_gpe(dev, pattern, flow, 1879 remain, error); 1880 break; 1881 case RTE_FLOW_ITEM_TYPE_GRE: 1882 ret = mlx5_flow_item_gre(pattern, flow, remain, error); 1883 break; 1884 case RTE_FLOW_ITEM_TYPE_MPLS: 1885 ret = mlx5_flow_item_mpls(pattern, flow, remain, error); 1886 break; 1887 default: 1888 return rte_flow_error_set(error, ENOTSUP, 1889 RTE_FLOW_ERROR_TYPE_ITEM, 1890 pattern, 1891 "item not supported"); 1892 } 1893 if (ret < 0) 1894 return ret; 1895 if (remain > ret) 1896 remain -= ret; 1897 else 1898 remain = 0; 1899 size += ret; 1900 } 1901 if (!flow->layers) { 1902 const struct rte_flow_item item = { 1903 .type = RTE_FLOW_ITEM_TYPE_ETH, 1904 }; 1905 1906 return mlx5_flow_item_eth(&item, flow, flow_size, error); 1907 } 1908 return size; 1909 } 1910 1911 /** 1912 * Convert the @p action into a Verbs specification after ensuring the NIC 1913 * will understand and process it correctly. 1914 * If the necessary size for the conversion is greater than the @p flow_size, 1915 * nothing is written in @p flow, the validation is still performed. 1916 * 1917 * @param[in] action 1918 * Action configuration. 1919 * @param[in, out] flow 1920 * Pointer to flow structure. 1921 * @param[in] flow_size 1922 * Size in bytes of the available space in @p flow, if too small, nothing is 1923 * written. 1924 * @param[out] error 1925 * Pointer to error structure. 1926 * 1927 * @return 1928 * On success the number of bytes consumed/necessary, if the returned value 1929 * is lesser or equal to @p flow_size, the @p action has fully been 1930 * converted, otherwise another call with this returned memory size should 1931 * be done. 1932 * On error, a negative errno value is returned and rte_errno is set. 1933 */ 1934 static int 1935 mlx5_flow_action_drop(const struct rte_flow_action *action, 1936 struct rte_flow *flow, const size_t flow_size, 1937 struct rte_flow_error *error) 1938 { 1939 unsigned int size = sizeof(struct ibv_flow_spec_action_drop); 1940 struct ibv_flow_spec_action_drop drop = { 1941 .type = IBV_FLOW_SPEC_ACTION_DROP, 1942 .size = size, 1943 }; 1944 1945 if (flow->fate) 1946 return rte_flow_error_set(error, ENOTSUP, 1947 RTE_FLOW_ERROR_TYPE_ACTION, 1948 action, 1949 "multiple fate actions are not" 1950 " supported"); 1951 if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) 1952 return rte_flow_error_set(error, ENOTSUP, 1953 RTE_FLOW_ERROR_TYPE_ACTION, 1954 action, 1955 "drop is not compatible with" 1956 " flag/mark action"); 1957 if (size < flow_size) 1958 mlx5_flow_spec_verbs_add(flow, &drop, size); 1959 flow->fate |= MLX5_FLOW_FATE_DROP; 1960 return size; 1961 } 1962 1963 /** 1964 * Convert the @p action into @p flow after ensuring the NIC will understand 1965 * and process it correctly. 1966 * 1967 * @param[in] dev 1968 * Pointer to Ethernet device structure. 1969 * @param[in] action 1970 * Action configuration. 1971 * @param[in, out] flow 1972 * Pointer to flow structure. 1973 * @param[out] error 1974 * Pointer to error structure. 1975 * 1976 * @return 1977 * 0 on success, a negative errno value otherwise and rte_errno is set. 1978 */ 1979 static int 1980 mlx5_flow_action_queue(struct rte_eth_dev *dev, 1981 const struct rte_flow_action *action, 1982 struct rte_flow *flow, 1983 struct rte_flow_error *error) 1984 { 1985 struct priv *priv = dev->data->dev_private; 1986 const struct rte_flow_action_queue *queue = action->conf; 1987 1988 if (flow->fate) 1989 return rte_flow_error_set(error, ENOTSUP, 1990 RTE_FLOW_ERROR_TYPE_ACTION, 1991 action, 1992 "multiple fate actions are not" 1993 " supported"); 1994 if (queue->index >= priv->rxqs_n) 1995 return rte_flow_error_set(error, EINVAL, 1996 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1997 &queue->index, 1998 "queue index out of range"); 1999 if (!(*priv->rxqs)[queue->index]) 2000 return rte_flow_error_set(error, EINVAL, 2001 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2002 &queue->index, 2003 "queue is not configured"); 2004 if (flow->queue) 2005 (*flow->queue)[0] = queue->index; 2006 flow->rss.queue_num = 1; 2007 flow->fate |= MLX5_FLOW_FATE_QUEUE; 2008 return 0; 2009 } 2010 2011 /** 2012 * Ensure the @p action will be understood and used correctly by the NIC. 2013 * 2014 * @param dev 2015 * Pointer to Ethernet device structure. 2016 * @param action[in] 2017 * Pointer to flow actions array. 2018 * @param flow[in, out] 2019 * Pointer to the rte_flow structure. 2020 * @param error[in, out] 2021 * Pointer to error structure. 2022 * 2023 * @return 2024 * On success @p flow->queue array and @p flow->rss are filled and valid. 2025 * On error, a negative errno value is returned and rte_errno is set. 2026 */ 2027 static int 2028 mlx5_flow_action_rss(struct rte_eth_dev *dev, 2029 const struct rte_flow_action *action, 2030 struct rte_flow *flow, 2031 struct rte_flow_error *error) 2032 { 2033 struct priv *priv = dev->data->dev_private; 2034 const struct rte_flow_action_rss *rss = action->conf; 2035 unsigned int i; 2036 2037 if (flow->fate) 2038 return rte_flow_error_set(error, ENOTSUP, 2039 RTE_FLOW_ERROR_TYPE_ACTION, 2040 action, 2041 "multiple fate actions are not" 2042 " supported"); 2043 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT && 2044 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) 2045 return rte_flow_error_set(error, ENOTSUP, 2046 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2047 &rss->func, 2048 "RSS hash function not supported"); 2049 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 2050 if (rss->level > 2) 2051 #else 2052 if (rss->level > 1) 2053 #endif 2054 return rte_flow_error_set(error, ENOTSUP, 2055 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2056 &rss->level, 2057 "tunnel RSS is not supported"); 2058 if (rss->key_len < MLX5_RSS_HASH_KEY_LEN) 2059 return rte_flow_error_set(error, ENOTSUP, 2060 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2061 &rss->key_len, 2062 "RSS hash key too small"); 2063 if (rss->key_len > MLX5_RSS_HASH_KEY_LEN) 2064 return rte_flow_error_set(error, ENOTSUP, 2065 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2066 &rss->key_len, 2067 "RSS hash key too large"); 2068 if (!rss->queue_num) 2069 return rte_flow_error_set(error, ENOTSUP, 2070 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2071 rss, 2072 "no queues were provided for RSS"); 2073 if (rss->queue_num > priv->config.ind_table_max_size) 2074 return rte_flow_error_set(error, ENOTSUP, 2075 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2076 &rss->queue_num, 2077 "number of queues too large"); 2078 if (rss->types & MLX5_RSS_HF_MASK) 2079 return rte_flow_error_set(error, ENOTSUP, 2080 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2081 &rss->types, 2082 "some RSS protocols are not" 2083 " supported"); 2084 for (i = 0; i != rss->queue_num; ++i) { 2085 if (rss->queue[i] >= priv->rxqs_n) 2086 return rte_flow_error_set 2087 (error, EINVAL, 2088 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2089 rss, 2090 "queue index out of range"); 2091 if (!(*priv->rxqs)[rss->queue[i]]) 2092 return rte_flow_error_set 2093 (error, EINVAL, 2094 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2095 &rss->queue[i], 2096 "queue is not configured"); 2097 } 2098 if (flow->queue) 2099 memcpy((*flow->queue), rss->queue, 2100 rss->queue_num * sizeof(uint16_t)); 2101 flow->rss.queue_num = rss->queue_num; 2102 memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN); 2103 flow->rss.types = rss->types; 2104 flow->rss.level = rss->level; 2105 flow->fate |= MLX5_FLOW_FATE_RSS; 2106 return 0; 2107 } 2108 2109 /** 2110 * Convert the @p action into a Verbs specification after ensuring the NIC 2111 * will understand and process it correctly. 2112 * If the necessary size for the conversion is greater than the @p flow_size, 2113 * nothing is written in @p flow, the validation is still performed. 2114 * 2115 * @param[in] action 2116 * Action configuration. 2117 * @param[in, out] flow 2118 * Pointer to flow structure. 2119 * @param[in] flow_size 2120 * Size in bytes of the available space in @p flow, if too small, nothing is 2121 * written. 2122 * @param[out] error 2123 * Pointer to error structure. 2124 * 2125 * @return 2126 * On success the number of bytes consumed/necessary, if the returned value 2127 * is lesser or equal to @p flow_size, the @p action has fully been 2128 * converted, otherwise another call with this returned memory size should 2129 * be done. 2130 * On error, a negative errno value is returned and rte_errno is set. 2131 */ 2132 static int 2133 mlx5_flow_action_flag(const struct rte_flow_action *action, 2134 struct rte_flow *flow, const size_t flow_size, 2135 struct rte_flow_error *error) 2136 { 2137 unsigned int size = sizeof(struct ibv_flow_spec_action_tag); 2138 struct ibv_flow_spec_action_tag tag = { 2139 .type = IBV_FLOW_SPEC_ACTION_TAG, 2140 .size = size, 2141 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT), 2142 }; 2143 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 2144 2145 if (flow->modifier & MLX5_FLOW_MOD_FLAG) 2146 return rte_flow_error_set(error, ENOTSUP, 2147 RTE_FLOW_ERROR_TYPE_ACTION, 2148 action, 2149 "flag action already present"); 2150 if (flow->fate & MLX5_FLOW_FATE_DROP) 2151 return rte_flow_error_set(error, ENOTSUP, 2152 RTE_FLOW_ERROR_TYPE_ACTION, 2153 action, 2154 "flag is not compatible with drop" 2155 " action"); 2156 if (flow->modifier & MLX5_FLOW_MOD_MARK) 2157 size = 0; 2158 else if (size <= flow_size && verbs) 2159 mlx5_flow_spec_verbs_add(flow, &tag, size); 2160 flow->modifier |= MLX5_FLOW_MOD_FLAG; 2161 return size; 2162 } 2163 2164 /** 2165 * Update verbs specification to modify the flag to mark. 2166 * 2167 * @param[in, out] verbs 2168 * Pointer to the mlx5_flow_verbs structure. 2169 * @param[in] mark_id 2170 * Mark identifier to replace the flag. 2171 */ 2172 static void 2173 mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id) 2174 { 2175 struct ibv_spec_header *hdr; 2176 int i; 2177 2178 if (!verbs) 2179 return; 2180 /* Update Verbs specification. */ 2181 hdr = (struct ibv_spec_header *)verbs->specs; 2182 if (!hdr) 2183 return; 2184 for (i = 0; i != verbs->attr->num_of_specs; ++i) { 2185 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) { 2186 struct ibv_flow_spec_action_tag *t = 2187 (struct ibv_flow_spec_action_tag *)hdr; 2188 2189 t->tag_id = mlx5_flow_mark_set(mark_id); 2190 } 2191 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size); 2192 } 2193 } 2194 2195 /** 2196 * Convert the @p action into @p flow (or by updating the already present 2197 * Flag Verbs specification) after ensuring the NIC will understand and 2198 * process it correctly. 2199 * If the necessary size for the conversion is greater than the @p flow_size, 2200 * nothing is written in @p flow, the validation is still performed. 2201 * 2202 * @param[in] action 2203 * Action configuration. 2204 * @param[in, out] flow 2205 * Pointer to flow structure. 2206 * @param[in] flow_size 2207 * Size in bytes of the available space in @p flow, if too small, nothing is 2208 * written. 2209 * @param[out] error 2210 * Pointer to error structure. 2211 * 2212 * @return 2213 * On success the number of bytes consumed/necessary, if the returned value 2214 * is lesser or equal to @p flow_size, the @p action has fully been 2215 * converted, otherwise another call with this returned memory size should 2216 * be done. 2217 * On error, a negative errno value is returned and rte_errno is set. 2218 */ 2219 static int 2220 mlx5_flow_action_mark(const struct rte_flow_action *action, 2221 struct rte_flow *flow, const size_t flow_size, 2222 struct rte_flow_error *error) 2223 { 2224 const struct rte_flow_action_mark *mark = action->conf; 2225 unsigned int size = sizeof(struct ibv_flow_spec_action_tag); 2226 struct ibv_flow_spec_action_tag tag = { 2227 .type = IBV_FLOW_SPEC_ACTION_TAG, 2228 .size = size, 2229 }; 2230 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 2231 2232 if (!mark) 2233 return rte_flow_error_set(error, EINVAL, 2234 RTE_FLOW_ERROR_TYPE_ACTION, 2235 action, 2236 "configuration cannot be null"); 2237 if (mark->id >= MLX5_FLOW_MARK_MAX) 2238 return rte_flow_error_set(error, EINVAL, 2239 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2240 &mark->id, 2241 "mark id must in 0 <= id < " 2242 RTE_STR(MLX5_FLOW_MARK_MAX)); 2243 if (flow->modifier & MLX5_FLOW_MOD_MARK) 2244 return rte_flow_error_set(error, ENOTSUP, 2245 RTE_FLOW_ERROR_TYPE_ACTION, 2246 action, 2247 "mark action already present"); 2248 if (flow->fate & MLX5_FLOW_FATE_DROP) 2249 return rte_flow_error_set(error, ENOTSUP, 2250 RTE_FLOW_ERROR_TYPE_ACTION, 2251 action, 2252 "mark is not compatible with drop" 2253 " action"); 2254 if (flow->modifier & MLX5_FLOW_MOD_FLAG) { 2255 mlx5_flow_verbs_mark_update(verbs, mark->id); 2256 size = 0; 2257 } else if (size <= flow_size) { 2258 tag.tag_id = mlx5_flow_mark_set(mark->id); 2259 mlx5_flow_spec_verbs_add(flow, &tag, size); 2260 } 2261 flow->modifier |= MLX5_FLOW_MOD_MARK; 2262 return size; 2263 } 2264 2265 /** 2266 * Convert the @p action into a Verbs specification after ensuring the NIC 2267 * will understand and process it correctly. 2268 * If the necessary size for the conversion is greater than the @p flow_size, 2269 * nothing is written in @p flow, the validation is still performed. 2270 * 2271 * @param action[in] 2272 * Action configuration. 2273 * @param flow[in, out] 2274 * Pointer to flow structure. 2275 * @param flow_size[in] 2276 * Size in bytes of the available space in @p flow, if too small, nothing is 2277 * written. 2278 * @param error[int, out] 2279 * Pointer to error structure. 2280 * 2281 * @return 2282 * On success the number of bytes consumed/necessary, if the returned value 2283 * is lesser or equal to @p flow_size, the @p action has fully been 2284 * converted, otherwise another call with this returned memory size should 2285 * be done. 2286 * On error, a negative errno value is returned and rte_errno is set. 2287 */ 2288 static int 2289 mlx5_flow_action_count(struct rte_eth_dev *dev, 2290 const struct rte_flow_action *action, 2291 struct rte_flow *flow, 2292 const size_t flow_size __rte_unused, 2293 struct rte_flow_error *error) 2294 { 2295 const struct rte_flow_action_count *count = action->conf; 2296 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 2297 unsigned int size = sizeof(struct ibv_flow_spec_counter_action); 2298 struct ibv_flow_spec_counter_action counter = { 2299 .type = IBV_FLOW_SPEC_ACTION_COUNT, 2300 .size = size, 2301 }; 2302 #endif 2303 2304 if (!flow->counter) { 2305 flow->counter = mlx5_flow_counter_new(dev, count->shared, 2306 count->id); 2307 if (!flow->counter) 2308 return rte_flow_error_set(error, ENOTSUP, 2309 RTE_FLOW_ERROR_TYPE_ACTION, 2310 action, 2311 "cannot get counter" 2312 " context."); 2313 } 2314 if (!((struct priv *)dev->data->dev_private)->config.flow_counter_en) 2315 return rte_flow_error_set(error, ENOTSUP, 2316 RTE_FLOW_ERROR_TYPE_ACTION, 2317 action, 2318 "flow counters are not supported."); 2319 flow->modifier |= MLX5_FLOW_MOD_COUNT; 2320 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 2321 counter.counter_set_handle = flow->counter->cs->handle; 2322 if (size <= flow_size) 2323 mlx5_flow_spec_verbs_add(flow, &counter, size); 2324 return size; 2325 #endif 2326 return 0; 2327 } 2328 2329 /** 2330 * Convert the @p action into @p flow after ensuring the NIC will understand 2331 * and process it correctly. 2332 * The conversion is performed action per action, each of them is written into 2333 * the @p flow if its size is lesser or equal to @p flow_size. 2334 * Validation and memory consumption computation are still performed until the 2335 * end of @p action, unless an error is encountered. 2336 * 2337 * @param[in] dev 2338 * Pointer to Ethernet device structure. 2339 * @param[in] actions 2340 * Pointer to flow actions array. 2341 * @param[in, out] flow 2342 * Pointer to the rte_flow structure. 2343 * @param[in] flow_size 2344 * Size in bytes of the available space in @p flow, if too small some 2345 * garbage may be present. 2346 * @param[out] error 2347 * Pointer to error structure. 2348 * 2349 * @return 2350 * On success the number of bytes consumed/necessary, if the returned value 2351 * is lesser or equal to @p flow_size, the @p actions has fully been 2352 * converted, otherwise another call with this returned memory size should 2353 * be done. 2354 * On error, a negative errno value is returned and rte_errno is set. 2355 */ 2356 static int 2357 mlx5_flow_actions(struct rte_eth_dev *dev, 2358 const struct rte_flow_action actions[], 2359 struct rte_flow *flow, const size_t flow_size, 2360 struct rte_flow_error *error) 2361 { 2362 size_t size = 0; 2363 int remain = flow_size; 2364 int ret = 0; 2365 2366 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2367 switch (actions->type) { 2368 case RTE_FLOW_ACTION_TYPE_VOID: 2369 break; 2370 case RTE_FLOW_ACTION_TYPE_FLAG: 2371 ret = mlx5_flow_action_flag(actions, flow, remain, 2372 error); 2373 break; 2374 case RTE_FLOW_ACTION_TYPE_MARK: 2375 ret = mlx5_flow_action_mark(actions, flow, remain, 2376 error); 2377 break; 2378 case RTE_FLOW_ACTION_TYPE_DROP: 2379 ret = mlx5_flow_action_drop(actions, flow, remain, 2380 error); 2381 break; 2382 case RTE_FLOW_ACTION_TYPE_QUEUE: 2383 ret = mlx5_flow_action_queue(dev, actions, flow, error); 2384 break; 2385 case RTE_FLOW_ACTION_TYPE_RSS: 2386 ret = mlx5_flow_action_rss(dev, actions, flow, error); 2387 break; 2388 case RTE_FLOW_ACTION_TYPE_COUNT: 2389 ret = mlx5_flow_action_count(dev, actions, flow, remain, 2390 error); 2391 break; 2392 default: 2393 return rte_flow_error_set(error, ENOTSUP, 2394 RTE_FLOW_ERROR_TYPE_ACTION, 2395 actions, 2396 "action not supported"); 2397 } 2398 if (ret < 0) 2399 return ret; 2400 if (remain > ret) 2401 remain -= ret; 2402 else 2403 remain = 0; 2404 size += ret; 2405 } 2406 if (!flow->fate) 2407 return rte_flow_error_set(error, ENOTSUP, 2408 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2409 NULL, 2410 "no fate action found"); 2411 return size; 2412 } 2413 2414 /** 2415 * Validate flow rule and fill flow structure accordingly. 2416 * 2417 * @param dev 2418 * Pointer to Ethernet device. 2419 * @param[out] flow 2420 * Pointer to flow structure. 2421 * @param flow_size 2422 * Size of allocated space for @p flow. 2423 * @param[in] attr 2424 * Flow rule attributes. 2425 * @param[in] pattern 2426 * Pattern specification (list terminated by the END pattern item). 2427 * @param[in] actions 2428 * Associated actions (list terminated by the END action). 2429 * @param[out] error 2430 * Perform verbose error reporting if not NULL. 2431 * 2432 * @return 2433 * A positive value representing the size of the flow object in bytes 2434 * regardless of @p flow_size on success, a negative errno value otherwise 2435 * and rte_errno is set. 2436 */ 2437 static int 2438 mlx5_flow_merge_switch(struct rte_eth_dev *dev, 2439 struct rte_flow *flow, 2440 size_t flow_size, 2441 const struct rte_flow_attr *attr, 2442 const struct rte_flow_item pattern[], 2443 const struct rte_flow_action actions[], 2444 struct rte_flow_error *error) 2445 { 2446 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0); 2447 uint16_t port_id[!n + n]; 2448 struct mlx5_nl_flow_ptoi ptoi[!n + n + 1]; 2449 size_t off = RTE_ALIGN_CEIL(sizeof(*flow), alignof(max_align_t)); 2450 unsigned int i; 2451 unsigned int own = 0; 2452 int ret; 2453 2454 /* At least one port is needed when no switch domain is present. */ 2455 if (!n) { 2456 n = 1; 2457 port_id[0] = dev->data->port_id; 2458 } else { 2459 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n); 2460 } 2461 for (i = 0; i != n; ++i) { 2462 struct rte_eth_dev_info dev_info; 2463 2464 rte_eth_dev_info_get(port_id[i], &dev_info); 2465 if (port_id[i] == dev->data->port_id) 2466 own = i; 2467 ptoi[i].port_id = port_id[i]; 2468 ptoi[i].ifindex = dev_info.if_index; 2469 } 2470 /* Ensure first entry of ptoi[] is the current device. */ 2471 if (own) { 2472 ptoi[n] = ptoi[0]; 2473 ptoi[0] = ptoi[own]; 2474 ptoi[own] = ptoi[n]; 2475 } 2476 /* An entry with zero ifindex terminates ptoi[]. */ 2477 ptoi[n].port_id = 0; 2478 ptoi[n].ifindex = 0; 2479 if (flow_size < off) 2480 flow_size = 0; 2481 ret = mlx5_nl_flow_transpose((uint8_t *)flow + off, 2482 flow_size ? flow_size - off : 0, 2483 ptoi, attr, pattern, actions, error); 2484 if (ret < 0) 2485 return ret; 2486 if (flow_size) { 2487 *flow = (struct rte_flow){ 2488 .attributes = *attr, 2489 .nl_flow = (uint8_t *)flow + off, 2490 }; 2491 /* 2492 * Generate a reasonably unique handle based on the address 2493 * of the target buffer. 2494 * 2495 * This is straightforward on 32-bit systems where the flow 2496 * pointer can be used directly. Otherwise, its least 2497 * significant part is taken after shifting it by the 2498 * previous power of two of the pointed buffer size. 2499 */ 2500 if (sizeof(flow) <= 4) 2501 mlx5_nl_flow_brand(flow->nl_flow, (uintptr_t)flow); 2502 else 2503 mlx5_nl_flow_brand 2504 (flow->nl_flow, 2505 (uintptr_t)flow >> 2506 rte_log2_u32(rte_align32prevpow2(flow_size))); 2507 } 2508 return off + ret; 2509 } 2510 2511 static unsigned int 2512 mlx5_find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level) 2513 { 2514 const struct rte_flow_item *item; 2515 unsigned int has_vlan = 0; 2516 2517 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 2518 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { 2519 has_vlan = 1; 2520 break; 2521 } 2522 } 2523 if (has_vlan) 2524 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN : 2525 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN; 2526 return rss_level < 2 ? MLX5_EXPANSION_ROOT : 2527 MLX5_EXPANSION_ROOT_OUTER; 2528 } 2529 2530 /** 2531 * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC 2532 * after ensuring the NIC will understand and process it correctly. 2533 * The conversion is only performed item/action per item/action, each of 2534 * them is written into the @p flow if its size is lesser or equal to @p 2535 * flow_size. 2536 * Validation and memory consumption computation are still performed until the 2537 * end, unless an error is encountered. 2538 * 2539 * @param[in] dev 2540 * Pointer to Ethernet device. 2541 * @param[in, out] flow 2542 * Pointer to flow structure. 2543 * @param[in] flow_size 2544 * Size in bytes of the available space in @p flow, if too small some 2545 * garbage may be present. 2546 * @param[in] attributes 2547 * Flow rule attributes. 2548 * @param[in] pattern 2549 * Pattern specification (list terminated by the END pattern item). 2550 * @param[in] actions 2551 * Associated actions (list terminated by the END action). 2552 * @param[out] error 2553 * Perform verbose error reporting if not NULL. 2554 * 2555 * @return 2556 * On success the number of bytes consumed/necessary, if the returned value 2557 * is lesser or equal to @p flow_size, the flow has fully been converted and 2558 * can be applied, otherwise another call with this returned memory size 2559 * should be done. 2560 * On error, a negative errno value is returned and rte_errno is set. 2561 */ 2562 static int 2563 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow, 2564 const size_t flow_size, 2565 const struct rte_flow_attr *attributes, 2566 const struct rte_flow_item pattern[], 2567 const struct rte_flow_action actions[], 2568 struct rte_flow_error *error) 2569 { 2570 struct rte_flow local_flow = { .layers = 0, }; 2571 size_t size = sizeof(*flow); 2572 union { 2573 struct rte_flow_expand_rss buf; 2574 uint8_t buffer[2048]; 2575 } expand_buffer; 2576 struct rte_flow_expand_rss *buf = &expand_buffer.buf; 2577 struct mlx5_flow_verbs *original_verbs = NULL; 2578 size_t original_verbs_size = 0; 2579 uint32_t original_layers = 0; 2580 int expanded_pattern_idx = 0; 2581 int ret; 2582 uint32_t i; 2583 2584 if (attributes->transfer) 2585 return mlx5_flow_merge_switch(dev, flow, flow_size, 2586 attributes, pattern, 2587 actions, error); 2588 if (size > flow_size) 2589 flow = &local_flow; 2590 ret = mlx5_flow_attributes(dev, attributes, flow, error); 2591 if (ret < 0) 2592 return ret; 2593 ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error); 2594 if (ret < 0) 2595 return ret; 2596 if (local_flow.rss.types) { 2597 unsigned int graph_root; 2598 2599 graph_root = mlx5_find_graph_root(pattern, 2600 local_flow.rss.level); 2601 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer), 2602 pattern, local_flow.rss.types, 2603 mlx5_support_expansion, 2604 graph_root); 2605 assert(ret > 0 && 2606 (unsigned int)ret < sizeof(expand_buffer.buffer)); 2607 } else { 2608 buf->entries = 1; 2609 buf->entry[0].pattern = (void *)(uintptr_t)pattern; 2610 } 2611 size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t), 2612 sizeof(void *)); 2613 if (size <= flow_size) 2614 flow->queue = (void *)(flow + 1); 2615 LIST_INIT(&flow->verbs); 2616 flow->layers = 0; 2617 flow->modifier = 0; 2618 flow->fate = 0; 2619 for (i = 0; i != buf->entries; ++i) { 2620 size_t off = size; 2621 size_t off2; 2622 2623 flow->layers = original_layers; 2624 size += sizeof(struct ibv_flow_attr) + 2625 sizeof(struct mlx5_flow_verbs); 2626 off2 = size; 2627 if (size < flow_size) { 2628 flow->cur_verbs = (void *)((uintptr_t)flow + off); 2629 flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1); 2630 flow->cur_verbs->specs = 2631 (void *)(flow->cur_verbs->attr + 1); 2632 } 2633 /* First iteration convert the pattern into Verbs. */ 2634 if (i == 0) { 2635 /* Actions don't need to be converted several time. */ 2636 ret = mlx5_flow_actions(dev, actions, flow, 2637 (size < flow_size) ? 2638 flow_size - size : 0, 2639 error); 2640 if (ret < 0) 2641 return ret; 2642 size += ret; 2643 } else { 2644 /* 2645 * Next iteration means the pattern has already been 2646 * converted and an expansion is necessary to match 2647 * the user RSS request. For that only the expanded 2648 * items will be converted, the common part with the 2649 * user pattern are just copied into the next buffer 2650 * zone. 2651 */ 2652 size += original_verbs_size; 2653 if (size < flow_size) { 2654 rte_memcpy(flow->cur_verbs->attr, 2655 original_verbs->attr, 2656 original_verbs_size + 2657 sizeof(struct ibv_flow_attr)); 2658 flow->cur_verbs->size = original_verbs_size; 2659 } 2660 } 2661 ret = mlx5_flow_items 2662 (dev, 2663 (const struct rte_flow_item *) 2664 &buf->entry[i].pattern[expanded_pattern_idx], 2665 flow, 2666 (size < flow_size) ? flow_size - size : 0, error); 2667 if (ret < 0) 2668 return ret; 2669 size += ret; 2670 if (size <= flow_size) { 2671 mlx5_flow_adjust_priority(dev, flow); 2672 LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next); 2673 } 2674 /* 2675 * Keep a pointer of the first verbs conversion and the layers 2676 * it has encountered. 2677 */ 2678 if (i == 0) { 2679 original_verbs = flow->cur_verbs; 2680 original_verbs_size = size - off2; 2681 original_layers = flow->layers; 2682 /* 2683 * move the index of the expanded pattern to the 2684 * first item not addressed yet. 2685 */ 2686 if (pattern->type == RTE_FLOW_ITEM_TYPE_END) { 2687 expanded_pattern_idx++; 2688 } else { 2689 const struct rte_flow_item *item = pattern; 2690 2691 for (item = pattern; 2692 item->type != RTE_FLOW_ITEM_TYPE_END; 2693 ++item) 2694 expanded_pattern_idx++; 2695 } 2696 } 2697 } 2698 /* Restore the origin layers in the flow. */ 2699 flow->layers = original_layers; 2700 return size; 2701 } 2702 2703 /** 2704 * Lookup and set the ptype in the data Rx part. A single Ptype can be used, 2705 * if several tunnel rules are used on this queue, the tunnel ptype will be 2706 * cleared. 2707 * 2708 * @param rxq_ctrl 2709 * Rx queue to update. 2710 */ 2711 static void 2712 mlx5_flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl) 2713 { 2714 unsigned int i; 2715 uint32_t tunnel_ptype = 0; 2716 2717 /* Look up for the ptype to use. */ 2718 for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) { 2719 if (!rxq_ctrl->flow_tunnels_n[i]) 2720 continue; 2721 if (!tunnel_ptype) { 2722 tunnel_ptype = tunnels_info[i].ptype; 2723 } else { 2724 tunnel_ptype = 0; 2725 break; 2726 } 2727 } 2728 rxq_ctrl->rxq.tunnel = tunnel_ptype; 2729 } 2730 2731 /** 2732 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the flow. 2733 * 2734 * @param[in] dev 2735 * Pointer to Ethernet device. 2736 * @param[in] flow 2737 * Pointer to flow structure. 2738 */ 2739 static void 2740 mlx5_flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow) 2741 { 2742 struct priv *priv = dev->data->dev_private; 2743 const int mark = !!(flow->modifier & 2744 (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)); 2745 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 2746 unsigned int i; 2747 2748 for (i = 0; i != flow->rss.queue_num; ++i) { 2749 int idx = (*flow->queue)[i]; 2750 struct mlx5_rxq_ctrl *rxq_ctrl = 2751 container_of((*priv->rxqs)[idx], 2752 struct mlx5_rxq_ctrl, rxq); 2753 2754 if (mark) { 2755 rxq_ctrl->rxq.mark = 1; 2756 rxq_ctrl->flow_mark_n++; 2757 } 2758 if (tunnel) { 2759 unsigned int j; 2760 2761 /* Increase the counter matching the flow. */ 2762 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 2763 if ((tunnels_info[j].tunnel & flow->layers) == 2764 tunnels_info[j].tunnel) { 2765 rxq_ctrl->flow_tunnels_n[j]++; 2766 break; 2767 } 2768 } 2769 mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl); 2770 } 2771 } 2772 } 2773 2774 /** 2775 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 2776 * @p flow if no other flow uses it with the same kind of request. 2777 * 2778 * @param dev 2779 * Pointer to Ethernet device. 2780 * @param[in] flow 2781 * Pointer to the flow. 2782 */ 2783 static void 2784 mlx5_flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow) 2785 { 2786 struct priv *priv = dev->data->dev_private; 2787 const int mark = !!(flow->modifier & 2788 (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)); 2789 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 2790 unsigned int i; 2791 2792 assert(dev->data->dev_started); 2793 for (i = 0; i != flow->rss.queue_num; ++i) { 2794 int idx = (*flow->queue)[i]; 2795 struct mlx5_rxq_ctrl *rxq_ctrl = 2796 container_of((*priv->rxqs)[idx], 2797 struct mlx5_rxq_ctrl, rxq); 2798 2799 if (mark) { 2800 rxq_ctrl->flow_mark_n--; 2801 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n; 2802 } 2803 if (tunnel) { 2804 unsigned int j; 2805 2806 /* Decrease the counter matching the flow. */ 2807 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 2808 if ((tunnels_info[j].tunnel & flow->layers) == 2809 tunnels_info[j].tunnel) { 2810 rxq_ctrl->flow_tunnels_n[j]--; 2811 break; 2812 } 2813 } 2814 mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl); 2815 } 2816 } 2817 } 2818 2819 /** 2820 * Clear the Mark/Flag and Tunnel ptype information in all Rx queues. 2821 * 2822 * @param dev 2823 * Pointer to Ethernet device. 2824 */ 2825 static void 2826 mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev) 2827 { 2828 struct priv *priv = dev->data->dev_private; 2829 unsigned int i; 2830 2831 for (i = 0; i != priv->rxqs_n; ++i) { 2832 struct mlx5_rxq_ctrl *rxq_ctrl; 2833 unsigned int j; 2834 2835 if (!(*priv->rxqs)[i]) 2836 continue; 2837 rxq_ctrl = container_of((*priv->rxqs)[i], 2838 struct mlx5_rxq_ctrl, rxq); 2839 rxq_ctrl->flow_mark_n = 0; 2840 rxq_ctrl->rxq.mark = 0; 2841 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) 2842 rxq_ctrl->flow_tunnels_n[j] = 0; 2843 rxq_ctrl->rxq.tunnel = 0; 2844 } 2845 } 2846 2847 /** 2848 * Validate a flow supported by the NIC. 2849 * 2850 * @see rte_flow_validate() 2851 * @see rte_flow_ops 2852 */ 2853 int 2854 mlx5_flow_validate(struct rte_eth_dev *dev, 2855 const struct rte_flow_attr *attr, 2856 const struct rte_flow_item items[], 2857 const struct rte_flow_action actions[], 2858 struct rte_flow_error *error) 2859 { 2860 int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error); 2861 2862 if (ret < 0) 2863 return ret; 2864 return 0; 2865 } 2866 2867 /** 2868 * Remove the flow. 2869 * 2870 * @param[in] dev 2871 * Pointer to Ethernet device. 2872 * @param[in, out] flow 2873 * Pointer to flow structure. 2874 */ 2875 static void 2876 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow) 2877 { 2878 struct priv *priv = dev->data->dev_private; 2879 struct mlx5_flow_verbs *verbs; 2880 2881 if (flow->nl_flow && priv->mnl_socket) 2882 mlx5_nl_flow_destroy(priv->mnl_socket, flow->nl_flow, NULL); 2883 LIST_FOREACH(verbs, &flow->verbs, next) { 2884 if (verbs->flow) { 2885 claim_zero(mlx5_glue->destroy_flow(verbs->flow)); 2886 verbs->flow = NULL; 2887 } 2888 if (verbs->hrxq) { 2889 if (flow->fate & MLX5_FLOW_FATE_DROP) 2890 mlx5_hrxq_drop_release(dev); 2891 else 2892 mlx5_hrxq_release(dev, verbs->hrxq); 2893 verbs->hrxq = NULL; 2894 } 2895 } 2896 if (flow->counter) { 2897 mlx5_flow_counter_release(flow->counter); 2898 flow->counter = NULL; 2899 } 2900 } 2901 2902 /** 2903 * Apply the flow. 2904 * 2905 * @param[in] dev 2906 * Pointer to Ethernet device structure. 2907 * @param[in, out] flow 2908 * Pointer to flow structure. 2909 * @param[out] error 2910 * Pointer to error structure. 2911 * 2912 * @return 2913 * 0 on success, a negative errno value otherwise and rte_errno is set. 2914 */ 2915 static int 2916 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow, 2917 struct rte_flow_error *error) 2918 { 2919 struct priv *priv = dev->data->dev_private; 2920 struct mlx5_flow_verbs *verbs; 2921 int err; 2922 2923 LIST_FOREACH(verbs, &flow->verbs, next) { 2924 if (flow->fate & MLX5_FLOW_FATE_DROP) { 2925 verbs->hrxq = mlx5_hrxq_drop_new(dev); 2926 if (!verbs->hrxq) { 2927 rte_flow_error_set 2928 (error, errno, 2929 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2930 NULL, 2931 "cannot get drop hash queue"); 2932 goto error; 2933 } 2934 } else { 2935 struct mlx5_hrxq *hrxq; 2936 2937 hrxq = mlx5_hrxq_get(dev, flow->key, 2938 MLX5_RSS_HASH_KEY_LEN, 2939 verbs->hash_fields, 2940 (*flow->queue), 2941 flow->rss.queue_num); 2942 if (!hrxq) 2943 hrxq = mlx5_hrxq_new(dev, flow->key, 2944 MLX5_RSS_HASH_KEY_LEN, 2945 verbs->hash_fields, 2946 (*flow->queue), 2947 flow->rss.queue_num, 2948 !!(flow->layers & 2949 MLX5_FLOW_LAYER_TUNNEL)); 2950 if (!hrxq) { 2951 rte_flow_error_set 2952 (error, rte_errno, 2953 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2954 NULL, 2955 "cannot get hash queue"); 2956 goto error; 2957 } 2958 verbs->hrxq = hrxq; 2959 } 2960 verbs->flow = 2961 mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr); 2962 if (!verbs->flow) { 2963 rte_flow_error_set(error, errno, 2964 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2965 NULL, 2966 "hardware refuses to create flow"); 2967 goto error; 2968 } 2969 } 2970 if (flow->nl_flow && 2971 priv->mnl_socket && 2972 mlx5_nl_flow_create(priv->mnl_socket, flow->nl_flow, error)) 2973 goto error; 2974 return 0; 2975 error: 2976 err = rte_errno; /* Save rte_errno before cleanup. */ 2977 LIST_FOREACH(verbs, &flow->verbs, next) { 2978 if (verbs->hrxq) { 2979 if (flow->fate & MLX5_FLOW_FATE_DROP) 2980 mlx5_hrxq_drop_release(dev); 2981 else 2982 mlx5_hrxq_release(dev, verbs->hrxq); 2983 verbs->hrxq = NULL; 2984 } 2985 } 2986 rte_errno = err; /* Restore rte_errno. */ 2987 return -rte_errno; 2988 } 2989 2990 /** 2991 * Create a flow and add it to @p list. 2992 * 2993 * @param dev 2994 * Pointer to Ethernet device. 2995 * @param list 2996 * Pointer to a TAILQ flow list. 2997 * @param[in] attr 2998 * Flow rule attributes. 2999 * @param[in] items 3000 * Pattern specification (list terminated by the END pattern item). 3001 * @param[in] actions 3002 * Associated actions (list terminated by the END action). 3003 * @param[out] error 3004 * Perform verbose error reporting if not NULL. 3005 * 3006 * @return 3007 * A flow on success, NULL otherwise and rte_errno is set. 3008 */ 3009 static struct rte_flow * 3010 mlx5_flow_list_create(struct rte_eth_dev *dev, 3011 struct mlx5_flows *list, 3012 const struct rte_flow_attr *attr, 3013 const struct rte_flow_item items[], 3014 const struct rte_flow_action actions[], 3015 struct rte_flow_error *error) 3016 { 3017 struct rte_flow *flow = NULL; 3018 size_t size = 0; 3019 int ret; 3020 3021 ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error); 3022 if (ret < 0) 3023 return NULL; 3024 size = ret; 3025 flow = rte_calloc(__func__, 1, size, 0); 3026 if (!flow) { 3027 rte_flow_error_set(error, ENOMEM, 3028 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3029 NULL, 3030 "not enough memory to create flow"); 3031 return NULL; 3032 } 3033 ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error); 3034 if (ret < 0) { 3035 rte_free(flow); 3036 return NULL; 3037 } 3038 assert((size_t)ret == size); 3039 if (dev->data->dev_started) { 3040 ret = mlx5_flow_apply(dev, flow, error); 3041 if (ret < 0) { 3042 ret = rte_errno; /* Save rte_errno before cleanup. */ 3043 if (flow) { 3044 mlx5_flow_remove(dev, flow); 3045 rte_free(flow); 3046 } 3047 rte_errno = ret; /* Restore rte_errno. */ 3048 return NULL; 3049 } 3050 } 3051 TAILQ_INSERT_TAIL(list, flow, next); 3052 mlx5_flow_rxq_flags_set(dev, flow); 3053 return flow; 3054 } 3055 3056 /** 3057 * Create a flow. 3058 * 3059 * @see rte_flow_create() 3060 * @see rte_flow_ops 3061 */ 3062 struct rte_flow * 3063 mlx5_flow_create(struct rte_eth_dev *dev, 3064 const struct rte_flow_attr *attr, 3065 const struct rte_flow_item items[], 3066 const struct rte_flow_action actions[], 3067 struct rte_flow_error *error) 3068 { 3069 return mlx5_flow_list_create 3070 (dev, &((struct priv *)dev->data->dev_private)->flows, 3071 attr, items, actions, error); 3072 } 3073 3074 /** 3075 * Destroy a flow in a list. 3076 * 3077 * @param dev 3078 * Pointer to Ethernet device. 3079 * @param list 3080 * Pointer to a TAILQ flow list. 3081 * @param[in] flow 3082 * Flow to destroy. 3083 */ 3084 static void 3085 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list, 3086 struct rte_flow *flow) 3087 { 3088 mlx5_flow_remove(dev, flow); 3089 TAILQ_REMOVE(list, flow, next); 3090 /* 3091 * Update RX queue flags only if port is started, otherwise it is 3092 * already clean. 3093 */ 3094 if (dev->data->dev_started) 3095 mlx5_flow_rxq_flags_trim(dev, flow); 3096 rte_free(flow); 3097 } 3098 3099 /** 3100 * Destroy all flows. 3101 * 3102 * @param dev 3103 * Pointer to Ethernet device. 3104 * @param list 3105 * Pointer to a TAILQ flow list. 3106 */ 3107 void 3108 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list) 3109 { 3110 while (!TAILQ_EMPTY(list)) { 3111 struct rte_flow *flow; 3112 3113 flow = TAILQ_FIRST(list); 3114 mlx5_flow_list_destroy(dev, list, flow); 3115 } 3116 } 3117 3118 /** 3119 * Remove all flows. 3120 * 3121 * @param dev 3122 * Pointer to Ethernet device. 3123 * @param list 3124 * Pointer to a TAILQ flow list. 3125 */ 3126 void 3127 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list) 3128 { 3129 struct rte_flow *flow; 3130 3131 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) 3132 mlx5_flow_remove(dev, flow); 3133 mlx5_flow_rxq_flags_clear(dev); 3134 } 3135 3136 /** 3137 * Add all flows. 3138 * 3139 * @param dev 3140 * Pointer to Ethernet device. 3141 * @param list 3142 * Pointer to a TAILQ flow list. 3143 * 3144 * @return 3145 * 0 on success, a negative errno value otherwise and rte_errno is set. 3146 */ 3147 int 3148 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list) 3149 { 3150 struct rte_flow *flow; 3151 struct rte_flow_error error; 3152 int ret = 0; 3153 3154 TAILQ_FOREACH(flow, list, next) { 3155 ret = mlx5_flow_apply(dev, flow, &error); 3156 if (ret < 0) 3157 goto error; 3158 mlx5_flow_rxq_flags_set(dev, flow); 3159 } 3160 return 0; 3161 error: 3162 ret = rte_errno; /* Save rte_errno before cleanup. */ 3163 mlx5_flow_stop(dev, list); 3164 rte_errno = ret; /* Restore rte_errno. */ 3165 return -rte_errno; 3166 } 3167 3168 /** 3169 * Verify the flow list is empty 3170 * 3171 * @param dev 3172 * Pointer to Ethernet device. 3173 * 3174 * @return the number of flows not released. 3175 */ 3176 int 3177 mlx5_flow_verify(struct rte_eth_dev *dev) 3178 { 3179 struct priv *priv = dev->data->dev_private; 3180 struct rte_flow *flow; 3181 int ret = 0; 3182 3183 TAILQ_FOREACH(flow, &priv->flows, next) { 3184 DRV_LOG(DEBUG, "port %u flow %p still referenced", 3185 dev->data->port_id, (void *)flow); 3186 ++ret; 3187 } 3188 return ret; 3189 } 3190 3191 /** 3192 * Enable a control flow configured from the control plane. 3193 * 3194 * @param dev 3195 * Pointer to Ethernet device. 3196 * @param eth_spec 3197 * An Ethernet flow spec to apply. 3198 * @param eth_mask 3199 * An Ethernet flow mask to apply. 3200 * @param vlan_spec 3201 * A VLAN flow spec to apply. 3202 * @param vlan_mask 3203 * A VLAN flow mask to apply. 3204 * 3205 * @return 3206 * 0 on success, a negative errno value otherwise and rte_errno is set. 3207 */ 3208 int 3209 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, 3210 struct rte_flow_item_eth *eth_spec, 3211 struct rte_flow_item_eth *eth_mask, 3212 struct rte_flow_item_vlan *vlan_spec, 3213 struct rte_flow_item_vlan *vlan_mask) 3214 { 3215 struct priv *priv = dev->data->dev_private; 3216 const struct rte_flow_attr attr = { 3217 .ingress = 1, 3218 .priority = MLX5_FLOW_PRIO_RSVD, 3219 }; 3220 struct rte_flow_item items[] = { 3221 { 3222 .type = RTE_FLOW_ITEM_TYPE_ETH, 3223 .spec = eth_spec, 3224 .last = NULL, 3225 .mask = eth_mask, 3226 }, 3227 { 3228 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN : 3229 RTE_FLOW_ITEM_TYPE_END, 3230 .spec = vlan_spec, 3231 .last = NULL, 3232 .mask = vlan_mask, 3233 }, 3234 { 3235 .type = RTE_FLOW_ITEM_TYPE_END, 3236 }, 3237 }; 3238 uint16_t queue[priv->reta_idx_n]; 3239 struct rte_flow_action_rss action_rss = { 3240 .func = RTE_ETH_HASH_FUNCTION_DEFAULT, 3241 .level = 0, 3242 .types = priv->rss_conf.rss_hf, 3243 .key_len = priv->rss_conf.rss_key_len, 3244 .queue_num = priv->reta_idx_n, 3245 .key = priv->rss_conf.rss_key, 3246 .queue = queue, 3247 }; 3248 struct rte_flow_action actions[] = { 3249 { 3250 .type = RTE_FLOW_ACTION_TYPE_RSS, 3251 .conf = &action_rss, 3252 }, 3253 { 3254 .type = RTE_FLOW_ACTION_TYPE_END, 3255 }, 3256 }; 3257 struct rte_flow *flow; 3258 struct rte_flow_error error; 3259 unsigned int i; 3260 3261 if (!priv->reta_idx_n) { 3262 rte_errno = EINVAL; 3263 return -rte_errno; 3264 } 3265 for (i = 0; i != priv->reta_idx_n; ++i) 3266 queue[i] = (*priv->reta_idx)[i]; 3267 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items, 3268 actions, &error); 3269 if (!flow) 3270 return -rte_errno; 3271 return 0; 3272 } 3273 3274 /** 3275 * Enable a flow control configured from the control plane. 3276 * 3277 * @param dev 3278 * Pointer to Ethernet device. 3279 * @param eth_spec 3280 * An Ethernet flow spec to apply. 3281 * @param eth_mask 3282 * An Ethernet flow mask to apply. 3283 * 3284 * @return 3285 * 0 on success, a negative errno value otherwise and rte_errno is set. 3286 */ 3287 int 3288 mlx5_ctrl_flow(struct rte_eth_dev *dev, 3289 struct rte_flow_item_eth *eth_spec, 3290 struct rte_flow_item_eth *eth_mask) 3291 { 3292 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL); 3293 } 3294 3295 /** 3296 * Destroy a flow. 3297 * 3298 * @see rte_flow_destroy() 3299 * @see rte_flow_ops 3300 */ 3301 int 3302 mlx5_flow_destroy(struct rte_eth_dev *dev, 3303 struct rte_flow *flow, 3304 struct rte_flow_error *error __rte_unused) 3305 { 3306 struct priv *priv = dev->data->dev_private; 3307 3308 mlx5_flow_list_destroy(dev, &priv->flows, flow); 3309 return 0; 3310 } 3311 3312 /** 3313 * Destroy all flows. 3314 * 3315 * @see rte_flow_flush() 3316 * @see rte_flow_ops 3317 */ 3318 int 3319 mlx5_flow_flush(struct rte_eth_dev *dev, 3320 struct rte_flow_error *error __rte_unused) 3321 { 3322 struct priv *priv = dev->data->dev_private; 3323 3324 mlx5_flow_list_flush(dev, &priv->flows); 3325 return 0; 3326 } 3327 3328 /** 3329 * Isolated mode. 3330 * 3331 * @see rte_flow_isolate() 3332 * @see rte_flow_ops 3333 */ 3334 int 3335 mlx5_flow_isolate(struct rte_eth_dev *dev, 3336 int enable, 3337 struct rte_flow_error *error) 3338 { 3339 struct priv *priv = dev->data->dev_private; 3340 3341 if (dev->data->dev_started) { 3342 rte_flow_error_set(error, EBUSY, 3343 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3344 NULL, 3345 "port must be stopped first"); 3346 return -rte_errno; 3347 } 3348 priv->isolated = !!enable; 3349 if (enable) 3350 dev->dev_ops = &mlx5_dev_ops_isolate; 3351 else 3352 dev->dev_ops = &mlx5_dev_ops; 3353 return 0; 3354 } 3355 3356 /** 3357 * Query flow counter. 3358 * 3359 * @param flow 3360 * Pointer to the flow. 3361 * 3362 * @return 3363 * 0 on success, a negative errno value otherwise and rte_errno is set. 3364 */ 3365 static int 3366 mlx5_flow_query_count(struct rte_flow *flow __rte_unused, 3367 void *data __rte_unused, 3368 struct rte_flow_error *error) 3369 { 3370 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 3371 if (flow->modifier & MLX5_FLOW_MOD_COUNT) { 3372 struct rte_flow_query_count *qc = data; 3373 uint64_t counters[2] = {0, 0}; 3374 struct ibv_query_counter_set_attr query_cs_attr = { 3375 .cs = flow->counter->cs, 3376 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE, 3377 }; 3378 struct ibv_counter_set_data query_out = { 3379 .out = counters, 3380 .outlen = 2 * sizeof(uint64_t), 3381 }; 3382 int err = mlx5_glue->query_counter_set(&query_cs_attr, 3383 &query_out); 3384 3385 if (err) 3386 return rte_flow_error_set 3387 (error, err, 3388 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3389 NULL, 3390 "cannot read counter"); 3391 qc->hits_set = 1; 3392 qc->bytes_set = 1; 3393 qc->hits = counters[0] - flow->counter->hits; 3394 qc->bytes = counters[1] - flow->counter->bytes; 3395 if (qc->reset) { 3396 flow->counter->hits = counters[0]; 3397 flow->counter->bytes = counters[1]; 3398 } 3399 return 0; 3400 } 3401 return rte_flow_error_set(error, ENOTSUP, 3402 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3403 NULL, 3404 "flow does not have counter"); 3405 #endif 3406 return rte_flow_error_set(error, ENOTSUP, 3407 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3408 NULL, 3409 "counters are not available"); 3410 } 3411 3412 /** 3413 * Query a flows. 3414 * 3415 * @see rte_flow_query() 3416 * @see rte_flow_ops 3417 */ 3418 int 3419 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused, 3420 struct rte_flow *flow, 3421 const struct rte_flow_action *actions, 3422 void *data, 3423 struct rte_flow_error *error) 3424 { 3425 int ret = 0; 3426 3427 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3428 switch (actions->type) { 3429 case RTE_FLOW_ACTION_TYPE_VOID: 3430 break; 3431 case RTE_FLOW_ACTION_TYPE_COUNT: 3432 ret = mlx5_flow_query_count(flow, data, error); 3433 break; 3434 default: 3435 return rte_flow_error_set(error, ENOTSUP, 3436 RTE_FLOW_ERROR_TYPE_ACTION, 3437 actions, 3438 "action not supported"); 3439 } 3440 if (ret < 0) 3441 return ret; 3442 } 3443 return 0; 3444 } 3445 3446 /** 3447 * Convert a flow director filter to a generic flow. 3448 * 3449 * @param dev 3450 * Pointer to Ethernet device. 3451 * @param fdir_filter 3452 * Flow director filter to add. 3453 * @param attributes 3454 * Generic flow parameters structure. 3455 * 3456 * @return 3457 * 0 on success, a negative errno value otherwise and rte_errno is set. 3458 */ 3459 static int 3460 mlx5_fdir_filter_convert(struct rte_eth_dev *dev, 3461 const struct rte_eth_fdir_filter *fdir_filter, 3462 struct mlx5_fdir *attributes) 3463 { 3464 struct priv *priv = dev->data->dev_private; 3465 const struct rte_eth_fdir_input *input = &fdir_filter->input; 3466 const struct rte_eth_fdir_masks *mask = 3467 &dev->data->dev_conf.fdir_conf.mask; 3468 3469 /* Validate queue number. */ 3470 if (fdir_filter->action.rx_queue >= priv->rxqs_n) { 3471 DRV_LOG(ERR, "port %u invalid queue number %d", 3472 dev->data->port_id, fdir_filter->action.rx_queue); 3473 rte_errno = EINVAL; 3474 return -rte_errno; 3475 } 3476 attributes->attr.ingress = 1; 3477 attributes->items[0] = (struct rte_flow_item) { 3478 .type = RTE_FLOW_ITEM_TYPE_ETH, 3479 .spec = &attributes->l2, 3480 .mask = &attributes->l2_mask, 3481 }; 3482 switch (fdir_filter->action.behavior) { 3483 case RTE_ETH_FDIR_ACCEPT: 3484 attributes->actions[0] = (struct rte_flow_action){ 3485 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 3486 .conf = &attributes->queue, 3487 }; 3488 break; 3489 case RTE_ETH_FDIR_REJECT: 3490 attributes->actions[0] = (struct rte_flow_action){ 3491 .type = RTE_FLOW_ACTION_TYPE_DROP, 3492 }; 3493 break; 3494 default: 3495 DRV_LOG(ERR, "port %u invalid behavior %d", 3496 dev->data->port_id, 3497 fdir_filter->action.behavior); 3498 rte_errno = ENOTSUP; 3499 return -rte_errno; 3500 } 3501 attributes->queue.index = fdir_filter->action.rx_queue; 3502 /* Handle L3. */ 3503 switch (fdir_filter->input.flow_type) { 3504 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 3505 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 3506 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 3507 attributes->l3.ipv4.hdr = (struct ipv4_hdr){ 3508 .src_addr = input->flow.ip4_flow.src_ip, 3509 .dst_addr = input->flow.ip4_flow.dst_ip, 3510 .time_to_live = input->flow.ip4_flow.ttl, 3511 .type_of_service = input->flow.ip4_flow.tos, 3512 .next_proto_id = input->flow.ip4_flow.proto, 3513 }; 3514 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){ 3515 .src_addr = mask->ipv4_mask.src_ip, 3516 .dst_addr = mask->ipv4_mask.dst_ip, 3517 .time_to_live = mask->ipv4_mask.ttl, 3518 .type_of_service = mask->ipv4_mask.tos, 3519 .next_proto_id = mask->ipv4_mask.proto, 3520 }; 3521 attributes->items[1] = (struct rte_flow_item){ 3522 .type = RTE_FLOW_ITEM_TYPE_IPV4, 3523 .spec = &attributes->l3, 3524 .mask = &attributes->l3_mask, 3525 }; 3526 break; 3527 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 3528 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 3529 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 3530 attributes->l3.ipv6.hdr = (struct ipv6_hdr){ 3531 .hop_limits = input->flow.ipv6_flow.hop_limits, 3532 .proto = input->flow.ipv6_flow.proto, 3533 }; 3534 3535 memcpy(attributes->l3.ipv6.hdr.src_addr, 3536 input->flow.ipv6_flow.src_ip, 3537 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 3538 memcpy(attributes->l3.ipv6.hdr.dst_addr, 3539 input->flow.ipv6_flow.dst_ip, 3540 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 3541 memcpy(attributes->l3_mask.ipv6.hdr.src_addr, 3542 mask->ipv6_mask.src_ip, 3543 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 3544 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr, 3545 mask->ipv6_mask.dst_ip, 3546 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 3547 attributes->items[1] = (struct rte_flow_item){ 3548 .type = RTE_FLOW_ITEM_TYPE_IPV6, 3549 .spec = &attributes->l3, 3550 .mask = &attributes->l3_mask, 3551 }; 3552 break; 3553 default: 3554 DRV_LOG(ERR, "port %u invalid flow type%d", 3555 dev->data->port_id, fdir_filter->input.flow_type); 3556 rte_errno = ENOTSUP; 3557 return -rte_errno; 3558 } 3559 /* Handle L4. */ 3560 switch (fdir_filter->input.flow_type) { 3561 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 3562 attributes->l4.udp.hdr = (struct udp_hdr){ 3563 .src_port = input->flow.udp4_flow.src_port, 3564 .dst_port = input->flow.udp4_flow.dst_port, 3565 }; 3566 attributes->l4_mask.udp.hdr = (struct udp_hdr){ 3567 .src_port = mask->src_port_mask, 3568 .dst_port = mask->dst_port_mask, 3569 }; 3570 attributes->items[2] = (struct rte_flow_item){ 3571 .type = RTE_FLOW_ITEM_TYPE_UDP, 3572 .spec = &attributes->l4, 3573 .mask = &attributes->l4_mask, 3574 }; 3575 break; 3576 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 3577 attributes->l4.tcp.hdr = (struct tcp_hdr){ 3578 .src_port = input->flow.tcp4_flow.src_port, 3579 .dst_port = input->flow.tcp4_flow.dst_port, 3580 }; 3581 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){ 3582 .src_port = mask->src_port_mask, 3583 .dst_port = mask->dst_port_mask, 3584 }; 3585 attributes->items[2] = (struct rte_flow_item){ 3586 .type = RTE_FLOW_ITEM_TYPE_TCP, 3587 .spec = &attributes->l4, 3588 .mask = &attributes->l4_mask, 3589 }; 3590 break; 3591 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 3592 attributes->l4.udp.hdr = (struct udp_hdr){ 3593 .src_port = input->flow.udp6_flow.src_port, 3594 .dst_port = input->flow.udp6_flow.dst_port, 3595 }; 3596 attributes->l4_mask.udp.hdr = (struct udp_hdr){ 3597 .src_port = mask->src_port_mask, 3598 .dst_port = mask->dst_port_mask, 3599 }; 3600 attributes->items[2] = (struct rte_flow_item){ 3601 .type = RTE_FLOW_ITEM_TYPE_UDP, 3602 .spec = &attributes->l4, 3603 .mask = &attributes->l4_mask, 3604 }; 3605 break; 3606 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 3607 attributes->l4.tcp.hdr = (struct tcp_hdr){ 3608 .src_port = input->flow.tcp6_flow.src_port, 3609 .dst_port = input->flow.tcp6_flow.dst_port, 3610 }; 3611 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){ 3612 .src_port = mask->src_port_mask, 3613 .dst_port = mask->dst_port_mask, 3614 }; 3615 attributes->items[2] = (struct rte_flow_item){ 3616 .type = RTE_FLOW_ITEM_TYPE_TCP, 3617 .spec = &attributes->l4, 3618 .mask = &attributes->l4_mask, 3619 }; 3620 break; 3621 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 3622 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 3623 break; 3624 default: 3625 DRV_LOG(ERR, "port %u invalid flow type%d", 3626 dev->data->port_id, fdir_filter->input.flow_type); 3627 rte_errno = ENOTSUP; 3628 return -rte_errno; 3629 } 3630 return 0; 3631 } 3632 3633 /** 3634 * Add new flow director filter and store it in list. 3635 * 3636 * @param dev 3637 * Pointer to Ethernet device. 3638 * @param fdir_filter 3639 * Flow director filter to add. 3640 * 3641 * @return 3642 * 0 on success, a negative errno value otherwise and rte_errno is set. 3643 */ 3644 static int 3645 mlx5_fdir_filter_add(struct rte_eth_dev *dev, 3646 const struct rte_eth_fdir_filter *fdir_filter) 3647 { 3648 struct priv *priv = dev->data->dev_private; 3649 struct mlx5_fdir attributes = { 3650 .attr.group = 0, 3651 .l2_mask = { 3652 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00", 3653 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 3654 .type = 0, 3655 }, 3656 }; 3657 struct rte_flow_error error; 3658 struct rte_flow *flow; 3659 int ret; 3660 3661 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes); 3662 if (ret) 3663 return ret; 3664 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr, 3665 attributes.items, attributes.actions, 3666 &error); 3667 if (flow) { 3668 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id, 3669 (void *)flow); 3670 return 0; 3671 } 3672 return -rte_errno; 3673 } 3674 3675 /** 3676 * Delete specific filter. 3677 * 3678 * @param dev 3679 * Pointer to Ethernet device. 3680 * @param fdir_filter 3681 * Filter to be deleted. 3682 * 3683 * @return 3684 * 0 on success, a negative errno value otherwise and rte_errno is set. 3685 */ 3686 static int 3687 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused, 3688 const struct rte_eth_fdir_filter *fdir_filter 3689 __rte_unused) 3690 { 3691 rte_errno = ENOTSUP; 3692 return -rte_errno; 3693 } 3694 3695 /** 3696 * Update queue for specific filter. 3697 * 3698 * @param dev 3699 * Pointer to Ethernet device. 3700 * @param fdir_filter 3701 * Filter to be updated. 3702 * 3703 * @return 3704 * 0 on success, a negative errno value otherwise and rte_errno is set. 3705 */ 3706 static int 3707 mlx5_fdir_filter_update(struct rte_eth_dev *dev, 3708 const struct rte_eth_fdir_filter *fdir_filter) 3709 { 3710 int ret; 3711 3712 ret = mlx5_fdir_filter_delete(dev, fdir_filter); 3713 if (ret) 3714 return ret; 3715 return mlx5_fdir_filter_add(dev, fdir_filter); 3716 } 3717 3718 /** 3719 * Flush all filters. 3720 * 3721 * @param dev 3722 * Pointer to Ethernet device. 3723 */ 3724 static void 3725 mlx5_fdir_filter_flush(struct rte_eth_dev *dev) 3726 { 3727 struct priv *priv = dev->data->dev_private; 3728 3729 mlx5_flow_list_flush(dev, &priv->flows); 3730 } 3731 3732 /** 3733 * Get flow director information. 3734 * 3735 * @param dev 3736 * Pointer to Ethernet device. 3737 * @param[out] fdir_info 3738 * Resulting flow director information. 3739 */ 3740 static void 3741 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) 3742 { 3743 struct rte_eth_fdir_masks *mask = 3744 &dev->data->dev_conf.fdir_conf.mask; 3745 3746 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode; 3747 fdir_info->guarant_spc = 0; 3748 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask)); 3749 fdir_info->max_flexpayload = 0; 3750 fdir_info->flow_types_mask[0] = 0; 3751 fdir_info->flex_payload_unit = 0; 3752 fdir_info->max_flex_payload_segment_num = 0; 3753 fdir_info->flex_payload_limit = 0; 3754 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf)); 3755 } 3756 3757 /** 3758 * Deal with flow director operations. 3759 * 3760 * @param dev 3761 * Pointer to Ethernet device. 3762 * @param filter_op 3763 * Operation to perform. 3764 * @param arg 3765 * Pointer to operation-specific structure. 3766 * 3767 * @return 3768 * 0 on success, a negative errno value otherwise and rte_errno is set. 3769 */ 3770 static int 3771 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, 3772 void *arg) 3773 { 3774 enum rte_fdir_mode fdir_mode = 3775 dev->data->dev_conf.fdir_conf.mode; 3776 3777 if (filter_op == RTE_ETH_FILTER_NOP) 3778 return 0; 3779 if (fdir_mode != RTE_FDIR_MODE_PERFECT && 3780 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) { 3781 DRV_LOG(ERR, "port %u flow director mode %d not supported", 3782 dev->data->port_id, fdir_mode); 3783 rte_errno = EINVAL; 3784 return -rte_errno; 3785 } 3786 switch (filter_op) { 3787 case RTE_ETH_FILTER_ADD: 3788 return mlx5_fdir_filter_add(dev, arg); 3789 case RTE_ETH_FILTER_UPDATE: 3790 return mlx5_fdir_filter_update(dev, arg); 3791 case RTE_ETH_FILTER_DELETE: 3792 return mlx5_fdir_filter_delete(dev, arg); 3793 case RTE_ETH_FILTER_FLUSH: 3794 mlx5_fdir_filter_flush(dev); 3795 break; 3796 case RTE_ETH_FILTER_INFO: 3797 mlx5_fdir_info_get(dev, arg); 3798 break; 3799 default: 3800 DRV_LOG(DEBUG, "port %u unknown operation %u", 3801 dev->data->port_id, filter_op); 3802 rte_errno = EINVAL; 3803 return -rte_errno; 3804 } 3805 return 0; 3806 } 3807 3808 /** 3809 * Manage filter operations. 3810 * 3811 * @param dev 3812 * Pointer to Ethernet device structure. 3813 * @param filter_type 3814 * Filter type. 3815 * @param filter_op 3816 * Operation to perform. 3817 * @param arg 3818 * Pointer to operation-specific structure. 3819 * 3820 * @return 3821 * 0 on success, a negative errno value otherwise and rte_errno is set. 3822 */ 3823 int 3824 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, 3825 enum rte_filter_type filter_type, 3826 enum rte_filter_op filter_op, 3827 void *arg) 3828 { 3829 switch (filter_type) { 3830 case RTE_ETH_FILTER_GENERIC: 3831 if (filter_op != RTE_ETH_FILTER_GET) { 3832 rte_errno = EINVAL; 3833 return -rte_errno; 3834 } 3835 *(const void **)arg = &mlx5_flow_ops; 3836 return 0; 3837 case RTE_ETH_FILTER_FDIR: 3838 return mlx5_fdir_ctrl_func(dev, filter_op, arg); 3839 default: 3840 DRV_LOG(ERR, "port %u filter type (%d) not supported", 3841 dev->data->port_id, filter_type); 3842 rte_errno = ENOTSUP; 3843 return -rte_errno; 3844 } 3845 return 0; 3846 } 3847