1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2016 Mellanox Technologies, Ltd 4 */ 5 6 #include <sys/queue.h> 7 #include <stdalign.h> 8 #include <stdint.h> 9 #include <string.h> 10 11 /* Verbs header. */ 12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 13 #ifdef PEDANTIC 14 #pragma GCC diagnostic ignored "-Wpedantic" 15 #endif 16 #include <infiniband/verbs.h> 17 #ifdef PEDANTIC 18 #pragma GCC diagnostic error "-Wpedantic" 19 #endif 20 21 #include <rte_common.h> 22 #include <rte_ether.h> 23 #include <rte_eth_ctrl.h> 24 #include <rte_ethdev_driver.h> 25 #include <rte_flow.h> 26 #include <rte_flow_driver.h> 27 #include <rte_malloc.h> 28 #include <rte_ip.h> 29 30 #include "mlx5.h" 31 #include "mlx5_defs.h" 32 #include "mlx5_prm.h" 33 #include "mlx5_glue.h" 34 35 /* Dev ops structure defined in mlx5.c */ 36 extern const struct eth_dev_ops mlx5_dev_ops; 37 extern const struct eth_dev_ops mlx5_dev_ops_isolate; 38 39 /* Pattern outer Layer bits. */ 40 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0) 41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1) 42 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2) 43 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3) 44 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4) 45 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5) 46 47 /* Pattern inner Layer bits. */ 48 #define MLX5_FLOW_LAYER_INNER_L2 (1u << 6) 49 #define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7) 50 #define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8) 51 #define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9) 52 #define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10) 53 #define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11) 54 55 /* Pattern tunnel Layer bits. */ 56 #define MLX5_FLOW_LAYER_VXLAN (1u << 12) 57 #define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13) 58 #define MLX5_FLOW_LAYER_GRE (1u << 14) 59 #define MLX5_FLOW_LAYER_MPLS (1u << 15) 60 61 /* Outer Masks. */ 62 #define MLX5_FLOW_LAYER_OUTER_L3 \ 63 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6) 64 #define MLX5_FLOW_LAYER_OUTER_L4 \ 65 (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP) 66 #define MLX5_FLOW_LAYER_OUTER \ 67 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \ 68 MLX5_FLOW_LAYER_OUTER_L4) 69 70 /* Tunnel Masks. */ 71 #define MLX5_FLOW_LAYER_TUNNEL \ 72 (MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE | \ 73 MLX5_FLOW_LAYER_GRE | MLX5_FLOW_LAYER_MPLS) 74 75 /* Inner Masks. */ 76 #define MLX5_FLOW_LAYER_INNER_L3 \ 77 (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6) 78 #define MLX5_FLOW_LAYER_INNER_L4 \ 79 (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP) 80 #define MLX5_FLOW_LAYER_INNER \ 81 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \ 82 MLX5_FLOW_LAYER_INNER_L4) 83 84 /* Actions that modify the fate of matching traffic. */ 85 #define MLX5_FLOW_FATE_DROP (1u << 0) 86 #define MLX5_FLOW_FATE_QUEUE (1u << 1) 87 #define MLX5_FLOW_FATE_RSS (1u << 2) 88 89 /* Modify a packet. */ 90 #define MLX5_FLOW_MOD_FLAG (1u << 0) 91 #define MLX5_FLOW_MOD_MARK (1u << 1) 92 #define MLX5_FLOW_MOD_COUNT (1u << 2) 93 94 /* possible L3 layers protocols filtering. */ 95 #define MLX5_IP_PROTOCOL_TCP 6 96 #define MLX5_IP_PROTOCOL_UDP 17 97 #define MLX5_IP_PROTOCOL_GRE 47 98 #define MLX5_IP_PROTOCOL_MPLS 147 99 100 /* Priority reserved for default flows. */ 101 #define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1) 102 103 enum mlx5_expansion { 104 MLX5_EXPANSION_ROOT, 105 MLX5_EXPANSION_ROOT_OUTER, 106 MLX5_EXPANSION_ROOT_ETH_VLAN, 107 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN, 108 MLX5_EXPANSION_OUTER_ETH, 109 MLX5_EXPANSION_OUTER_ETH_VLAN, 110 MLX5_EXPANSION_OUTER_VLAN, 111 MLX5_EXPANSION_OUTER_IPV4, 112 MLX5_EXPANSION_OUTER_IPV4_UDP, 113 MLX5_EXPANSION_OUTER_IPV4_TCP, 114 MLX5_EXPANSION_OUTER_IPV6, 115 MLX5_EXPANSION_OUTER_IPV6_UDP, 116 MLX5_EXPANSION_OUTER_IPV6_TCP, 117 MLX5_EXPANSION_VXLAN, 118 MLX5_EXPANSION_VXLAN_GPE, 119 MLX5_EXPANSION_GRE, 120 MLX5_EXPANSION_MPLS, 121 MLX5_EXPANSION_ETH, 122 MLX5_EXPANSION_ETH_VLAN, 123 MLX5_EXPANSION_VLAN, 124 MLX5_EXPANSION_IPV4, 125 MLX5_EXPANSION_IPV4_UDP, 126 MLX5_EXPANSION_IPV4_TCP, 127 MLX5_EXPANSION_IPV6, 128 MLX5_EXPANSION_IPV6_UDP, 129 MLX5_EXPANSION_IPV6_TCP, 130 }; 131 132 /** Supported expansion of items. */ 133 static const struct rte_flow_expand_node mlx5_support_expansion[] = { 134 [MLX5_EXPANSION_ROOT] = { 135 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 136 MLX5_EXPANSION_IPV4, 137 MLX5_EXPANSION_IPV6), 138 .type = RTE_FLOW_ITEM_TYPE_END, 139 }, 140 [MLX5_EXPANSION_ROOT_OUTER] = { 141 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH, 142 MLX5_EXPANSION_OUTER_IPV4, 143 MLX5_EXPANSION_OUTER_IPV6), 144 .type = RTE_FLOW_ITEM_TYPE_END, 145 }, 146 [MLX5_EXPANSION_ROOT_ETH_VLAN] = { 147 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN), 148 .type = RTE_FLOW_ITEM_TYPE_END, 149 }, 150 [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = { 151 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH_VLAN), 152 .type = RTE_FLOW_ITEM_TYPE_END, 153 }, 154 [MLX5_EXPANSION_OUTER_ETH] = { 155 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 156 MLX5_EXPANSION_OUTER_IPV6, 157 MLX5_EXPANSION_MPLS), 158 .type = RTE_FLOW_ITEM_TYPE_ETH, 159 .rss_types = 0, 160 }, 161 [MLX5_EXPANSION_OUTER_ETH_VLAN] = { 162 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN), 163 .type = RTE_FLOW_ITEM_TYPE_ETH, 164 .rss_types = 0, 165 }, 166 [MLX5_EXPANSION_OUTER_VLAN] = { 167 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 168 MLX5_EXPANSION_OUTER_IPV6), 169 .type = RTE_FLOW_ITEM_TYPE_VLAN, 170 }, 171 [MLX5_EXPANSION_OUTER_IPV4] = { 172 .next = RTE_FLOW_EXPAND_RSS_NEXT 173 (MLX5_EXPANSION_OUTER_IPV4_UDP, 174 MLX5_EXPANSION_OUTER_IPV4_TCP, 175 MLX5_EXPANSION_GRE), 176 .type = RTE_FLOW_ITEM_TYPE_IPV4, 177 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 178 ETH_RSS_NONFRAG_IPV4_OTHER, 179 }, 180 [MLX5_EXPANSION_OUTER_IPV4_UDP] = { 181 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 182 MLX5_EXPANSION_VXLAN_GPE), 183 .type = RTE_FLOW_ITEM_TYPE_UDP, 184 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 185 }, 186 [MLX5_EXPANSION_OUTER_IPV4_TCP] = { 187 .type = RTE_FLOW_ITEM_TYPE_TCP, 188 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 189 }, 190 [MLX5_EXPANSION_OUTER_IPV6] = { 191 .next = RTE_FLOW_EXPAND_RSS_NEXT 192 (MLX5_EXPANSION_OUTER_IPV6_UDP, 193 MLX5_EXPANSION_OUTER_IPV6_TCP), 194 .type = RTE_FLOW_ITEM_TYPE_IPV6, 195 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 196 ETH_RSS_NONFRAG_IPV6_OTHER, 197 }, 198 [MLX5_EXPANSION_OUTER_IPV6_UDP] = { 199 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 200 MLX5_EXPANSION_VXLAN_GPE), 201 .type = RTE_FLOW_ITEM_TYPE_UDP, 202 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 203 }, 204 [MLX5_EXPANSION_OUTER_IPV6_TCP] = { 205 .type = RTE_FLOW_ITEM_TYPE_TCP, 206 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 207 }, 208 [MLX5_EXPANSION_VXLAN] = { 209 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH), 210 .type = RTE_FLOW_ITEM_TYPE_VXLAN, 211 }, 212 [MLX5_EXPANSION_VXLAN_GPE] = { 213 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 214 MLX5_EXPANSION_IPV4, 215 MLX5_EXPANSION_IPV6), 216 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE, 217 }, 218 [MLX5_EXPANSION_GRE] = { 219 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4), 220 .type = RTE_FLOW_ITEM_TYPE_GRE, 221 }, 222 [MLX5_EXPANSION_MPLS] = { 223 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 224 MLX5_EXPANSION_IPV6), 225 .type = RTE_FLOW_ITEM_TYPE_MPLS, 226 }, 227 [MLX5_EXPANSION_ETH] = { 228 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 229 MLX5_EXPANSION_IPV6), 230 .type = RTE_FLOW_ITEM_TYPE_ETH, 231 }, 232 [MLX5_EXPANSION_ETH_VLAN] = { 233 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN), 234 .type = RTE_FLOW_ITEM_TYPE_ETH, 235 }, 236 [MLX5_EXPANSION_VLAN] = { 237 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 238 MLX5_EXPANSION_IPV6), 239 .type = RTE_FLOW_ITEM_TYPE_VLAN, 240 }, 241 [MLX5_EXPANSION_IPV4] = { 242 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP, 243 MLX5_EXPANSION_IPV4_TCP), 244 .type = RTE_FLOW_ITEM_TYPE_IPV4, 245 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 246 ETH_RSS_NONFRAG_IPV4_OTHER, 247 }, 248 [MLX5_EXPANSION_IPV4_UDP] = { 249 .type = RTE_FLOW_ITEM_TYPE_UDP, 250 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 251 }, 252 [MLX5_EXPANSION_IPV4_TCP] = { 253 .type = RTE_FLOW_ITEM_TYPE_TCP, 254 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 255 }, 256 [MLX5_EXPANSION_IPV6] = { 257 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP, 258 MLX5_EXPANSION_IPV6_TCP), 259 .type = RTE_FLOW_ITEM_TYPE_IPV6, 260 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 261 ETH_RSS_NONFRAG_IPV6_OTHER, 262 }, 263 [MLX5_EXPANSION_IPV6_UDP] = { 264 .type = RTE_FLOW_ITEM_TYPE_UDP, 265 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 266 }, 267 [MLX5_EXPANSION_IPV6_TCP] = { 268 .type = RTE_FLOW_ITEM_TYPE_TCP, 269 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 270 }, 271 }; 272 273 /** Handles information leading to a drop fate. */ 274 struct mlx5_flow_verbs { 275 LIST_ENTRY(mlx5_flow_verbs) next; 276 unsigned int size; /**< Size of the attribute. */ 277 struct { 278 struct ibv_flow_attr *attr; 279 /**< Pointer to the Specification buffer. */ 280 uint8_t *specs; /**< Pointer to the specifications. */ 281 }; 282 struct ibv_flow *flow; /**< Verbs flow pointer. */ 283 struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */ 284 uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */ 285 }; 286 287 /* Counters information. */ 288 struct mlx5_flow_counter { 289 LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */ 290 uint32_t shared:1; /**< Share counter ID with other flow rules. */ 291 uint32_t ref_cnt:31; /**< Reference counter. */ 292 uint32_t id; /**< Counter ID. */ 293 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */ 294 uint64_t hits; /**< Number of packets matched by the rule. */ 295 uint64_t bytes; /**< Number of bytes matched by the rule. */ 296 }; 297 298 /* Flow structure. */ 299 struct rte_flow { 300 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */ 301 struct rte_flow_attr attributes; /**< User flow attribute. */ 302 uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */ 303 uint32_t layers; 304 /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */ 305 uint32_t modifier; 306 /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */ 307 uint32_t fate; 308 /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */ 309 uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */ 310 LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */ 311 struct mlx5_flow_verbs *cur_verbs; 312 /**< Current Verbs flow structure being filled. */ 313 struct mlx5_flow_counter *counter; /**< Holds Verbs flow counter. */ 314 struct rte_flow_action_rss rss;/**< RSS context. */ 315 uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */ 316 uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */ 317 void *nl_flow; /**< Netlink flow buffer if relevant. */ 318 }; 319 320 static const struct rte_flow_ops mlx5_flow_ops = { 321 .validate = mlx5_flow_validate, 322 .create = mlx5_flow_create, 323 .destroy = mlx5_flow_destroy, 324 .flush = mlx5_flow_flush, 325 .isolate = mlx5_flow_isolate, 326 .query = mlx5_flow_query, 327 }; 328 329 /* Convert FDIR request to Generic flow. */ 330 struct mlx5_fdir { 331 struct rte_flow_attr attr; 332 struct rte_flow_action actions[2]; 333 struct rte_flow_item items[4]; 334 struct rte_flow_item_eth l2; 335 struct rte_flow_item_eth l2_mask; 336 union { 337 struct rte_flow_item_ipv4 ipv4; 338 struct rte_flow_item_ipv6 ipv6; 339 } l3; 340 union { 341 struct rte_flow_item_ipv4 ipv4; 342 struct rte_flow_item_ipv6 ipv6; 343 } l3_mask; 344 union { 345 struct rte_flow_item_udp udp; 346 struct rte_flow_item_tcp tcp; 347 } l4; 348 union { 349 struct rte_flow_item_udp udp; 350 struct rte_flow_item_tcp tcp; 351 } l4_mask; 352 struct rte_flow_action_queue queue; 353 }; 354 355 /* Verbs specification header. */ 356 struct ibv_spec_header { 357 enum ibv_flow_spec_type type; 358 uint16_t size; 359 }; 360 361 /* 362 * Number of sub priorities. 363 * For each kind of pattern matching i.e. L2, L3, L4 to have a correct 364 * matching on the NIC (firmware dependent) L4 most have the higher priority 365 * followed by L3 and ending with L2. 366 */ 367 #define MLX5_PRIORITY_MAP_L2 2 368 #define MLX5_PRIORITY_MAP_L3 1 369 #define MLX5_PRIORITY_MAP_L4 0 370 #define MLX5_PRIORITY_MAP_MAX 3 371 372 /* Map of Verbs to Flow priority with 8 Verbs priorities. */ 373 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = { 374 { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 }, 375 }; 376 377 /* Map of Verbs to Flow priority with 16 Verbs priorities. */ 378 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = { 379 { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 }, 380 { 9, 10, 11 }, { 12, 13, 14 }, 381 }; 382 383 /* Tunnel information. */ 384 struct mlx5_flow_tunnel_info { 385 uint32_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */ 386 uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */ 387 }; 388 389 static struct mlx5_flow_tunnel_info tunnels_info[] = { 390 { 391 .tunnel = MLX5_FLOW_LAYER_VXLAN, 392 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP, 393 }, 394 { 395 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE, 396 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP, 397 }, 398 { 399 .tunnel = MLX5_FLOW_LAYER_GRE, 400 .ptype = RTE_PTYPE_TUNNEL_GRE, 401 }, 402 { 403 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP, 404 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP, 405 }, 406 { 407 .tunnel = MLX5_FLOW_LAYER_MPLS, 408 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE, 409 }, 410 }; 411 412 /** 413 * Discover the maximum number of priority available. 414 * 415 * @param[in] dev 416 * Pointer to Ethernet device. 417 * 418 * @return 419 * number of supported flow priority on success, a negative errno 420 * value otherwise and rte_errno is set. 421 */ 422 int 423 mlx5_flow_discover_priorities(struct rte_eth_dev *dev) 424 { 425 struct { 426 struct ibv_flow_attr attr; 427 struct ibv_flow_spec_eth eth; 428 struct ibv_flow_spec_action_drop drop; 429 } flow_attr = { 430 .attr = { 431 .num_of_specs = 2, 432 }, 433 .eth = { 434 .type = IBV_FLOW_SPEC_ETH, 435 .size = sizeof(struct ibv_flow_spec_eth), 436 }, 437 .drop = { 438 .size = sizeof(struct ibv_flow_spec_action_drop), 439 .type = IBV_FLOW_SPEC_ACTION_DROP, 440 }, 441 }; 442 struct ibv_flow *flow; 443 struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev); 444 uint16_t vprio[] = { 8, 16 }; 445 int i; 446 int priority = 0; 447 448 if (!drop) { 449 rte_errno = ENOTSUP; 450 return -rte_errno; 451 } 452 for (i = 0; i != RTE_DIM(vprio); i++) { 453 flow_attr.attr.priority = vprio[i] - 1; 454 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr); 455 if (!flow) 456 break; 457 claim_zero(mlx5_glue->destroy_flow(flow)); 458 priority = vprio[i]; 459 } 460 switch (priority) { 461 case 8: 462 priority = RTE_DIM(priority_map_3); 463 break; 464 case 16: 465 priority = RTE_DIM(priority_map_5); 466 break; 467 default: 468 rte_errno = ENOTSUP; 469 DRV_LOG(ERR, 470 "port %u verbs maximum priority: %d expected 8/16", 471 dev->data->port_id, vprio[i]); 472 return -rte_errno; 473 } 474 mlx5_hrxq_drop_release(dev); 475 DRV_LOG(INFO, "port %u flow maximum priority: %d", 476 dev->data->port_id, priority); 477 return priority; 478 } 479 480 /** 481 * Adjust flow priority. 482 * 483 * @param dev 484 * Pointer to Ethernet device. 485 * @param flow 486 * Pointer to an rte flow. 487 */ 488 static void 489 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow) 490 { 491 struct priv *priv = dev->data->dev_private; 492 uint32_t priority = flow->attributes.priority; 493 uint32_t subpriority = flow->cur_verbs->attr->priority; 494 495 switch (priv->config.flow_prio) { 496 case RTE_DIM(priority_map_3): 497 priority = priority_map_3[priority][subpriority]; 498 break; 499 case RTE_DIM(priority_map_5): 500 priority = priority_map_5[priority][subpriority]; 501 break; 502 } 503 flow->cur_verbs->attr->priority = priority; 504 } 505 506 /** 507 * Get a flow counter. 508 * 509 * @param[in] dev 510 * Pointer to Ethernet device. 511 * @param[in] shared 512 * Indicate if this counter is shared with other flows. 513 * @param[in] id 514 * Counter identifier. 515 * 516 * @return 517 * A pointer to the counter, NULL otherwise and rte_errno is set. 518 */ 519 static struct mlx5_flow_counter * 520 mlx5_flow_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id) 521 { 522 struct priv *priv = dev->data->dev_private; 523 struct mlx5_flow_counter *cnt; 524 525 LIST_FOREACH(cnt, &priv->flow_counters, next) { 526 if (!cnt->shared || cnt->shared != shared) 527 continue; 528 if (cnt->id != id) 529 continue; 530 cnt->ref_cnt++; 531 return cnt; 532 } 533 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 534 535 struct mlx5_flow_counter tmpl = { 536 .shared = shared, 537 .id = id, 538 .cs = mlx5_glue->create_counter_set 539 (priv->ctx, 540 &(struct ibv_counter_set_init_attr){ 541 .counter_set_id = id, 542 }), 543 .hits = 0, 544 .bytes = 0, 545 }; 546 547 if (!tmpl.cs) { 548 rte_errno = errno; 549 return NULL; 550 } 551 cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0); 552 if (!cnt) { 553 rte_errno = ENOMEM; 554 return NULL; 555 } 556 *cnt = tmpl; 557 LIST_INSERT_HEAD(&priv->flow_counters, cnt, next); 558 return cnt; 559 #endif 560 rte_errno = ENOTSUP; 561 return NULL; 562 } 563 564 /** 565 * Release a flow counter. 566 * 567 * @param[in] counter 568 * Pointer to the counter handler. 569 */ 570 static void 571 mlx5_flow_counter_release(struct mlx5_flow_counter *counter) 572 { 573 if (--counter->ref_cnt == 0) { 574 claim_zero(mlx5_glue->destroy_counter_set(counter->cs)); 575 LIST_REMOVE(counter, next); 576 rte_free(counter); 577 } 578 } 579 580 /** 581 * Verify the @p attributes will be correctly understood by the NIC and store 582 * them in the @p flow if everything is correct. 583 * 584 * @param[in] dev 585 * Pointer to Ethernet device. 586 * @param[in] attributes 587 * Pointer to flow attributes 588 * @param[in, out] flow 589 * Pointer to the rte_flow structure. 590 * @param[out] error 591 * Pointer to error structure. 592 * 593 * @return 594 * 0 on success, a negative errno value otherwise and rte_errno is set. 595 */ 596 static int 597 mlx5_flow_attributes(struct rte_eth_dev *dev, 598 const struct rte_flow_attr *attributes, 599 struct rte_flow *flow, 600 struct rte_flow_error *error) 601 { 602 uint32_t priority_max = 603 ((struct priv *)dev->data->dev_private)->config.flow_prio - 1; 604 605 if (attributes->group) 606 return rte_flow_error_set(error, ENOTSUP, 607 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 608 NULL, 609 "groups is not supported"); 610 if (attributes->priority != MLX5_FLOW_PRIO_RSVD && 611 attributes->priority >= priority_max) 612 return rte_flow_error_set(error, ENOTSUP, 613 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 614 NULL, 615 "priority out of range"); 616 if (attributes->egress) 617 return rte_flow_error_set(error, ENOTSUP, 618 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, 619 NULL, 620 "egress is not supported"); 621 if (attributes->transfer) 622 return rte_flow_error_set(error, ENOTSUP, 623 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 624 NULL, 625 "transfer is not supported"); 626 if (!attributes->ingress) 627 return rte_flow_error_set(error, ENOTSUP, 628 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, 629 NULL, 630 "ingress attribute is mandatory"); 631 flow->attributes = *attributes; 632 if (attributes->priority == MLX5_FLOW_PRIO_RSVD) 633 flow->attributes.priority = priority_max; 634 return 0; 635 } 636 637 /** 638 * Verify the @p item specifications (spec, last, mask) are compatible with the 639 * NIC capabilities. 640 * 641 * @param[in] item 642 * Item specification. 643 * @param[in] mask 644 * @p item->mask or flow default bit-masks. 645 * @param[in] nic_mask 646 * Bit-masks covering supported fields by the NIC to compare with user mask. 647 * @param[in] size 648 * Bit-masks size in bytes. 649 * @param[out] error 650 * Pointer to error structure. 651 * 652 * @return 653 * 0 on success, a negative errno value otherwise and rte_errno is set. 654 */ 655 static int 656 mlx5_flow_item_acceptable(const struct rte_flow_item *item, 657 const uint8_t *mask, 658 const uint8_t *nic_mask, 659 unsigned int size, 660 struct rte_flow_error *error) 661 { 662 unsigned int i; 663 664 assert(nic_mask); 665 for (i = 0; i < size; ++i) 666 if ((nic_mask[i] | mask[i]) != nic_mask[i]) 667 return rte_flow_error_set(error, ENOTSUP, 668 RTE_FLOW_ERROR_TYPE_ITEM, 669 item, 670 "mask enables non supported" 671 " bits"); 672 if (!item->spec && (item->mask || item->last)) 673 return rte_flow_error_set(error, EINVAL, 674 RTE_FLOW_ERROR_TYPE_ITEM, 675 item, 676 "mask/last without a spec is not" 677 " supported"); 678 if (item->spec && item->last) { 679 uint8_t spec[size]; 680 uint8_t last[size]; 681 unsigned int i; 682 int ret; 683 684 for (i = 0; i < size; ++i) { 685 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i]; 686 last[i] = ((const uint8_t *)item->last)[i] & mask[i]; 687 } 688 ret = memcmp(spec, last, size); 689 if (ret != 0) 690 return rte_flow_error_set(error, ENOTSUP, 691 RTE_FLOW_ERROR_TYPE_ITEM, 692 item, 693 "range is not supported"); 694 } 695 return 0; 696 } 697 698 /** 699 * Add a verbs item specification into @p flow. 700 * 701 * @param[in, out] flow 702 * Pointer to flow structure. 703 * @param[in] src 704 * Create specification. 705 * @param[in] size 706 * Size in bytes of the specification to copy. 707 */ 708 static void 709 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size) 710 { 711 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 712 713 if (verbs->specs) { 714 void *dst; 715 716 dst = (void *)(verbs->specs + verbs->size); 717 memcpy(dst, src, size); 718 ++verbs->attr->num_of_specs; 719 } 720 verbs->size += size; 721 } 722 723 /** 724 * Adjust verbs hash fields according to the @p flow information. 725 * 726 * @param[in, out] flow. 727 * Pointer to flow structure. 728 * @param[in] tunnel 729 * 1 when the hash field is for a tunnel item. 730 * @param[in] layer_types 731 * ETH_RSS_* types. 732 * @param[in] hash_fields 733 * Item hash fields. 734 */ 735 static void 736 mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow, 737 int tunnel __rte_unused, 738 uint32_t layer_types, uint64_t hash_fields) 739 { 740 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 741 hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0); 742 if (flow->rss.level == 2 && !tunnel) 743 hash_fields = 0; 744 else if (flow->rss.level < 2 && tunnel) 745 hash_fields = 0; 746 #endif 747 if (!(flow->rss.types & layer_types)) 748 hash_fields = 0; 749 flow->cur_verbs->hash_fields |= hash_fields; 750 } 751 752 /** 753 * Convert the @p item into a Verbs specification after ensuring the NIC 754 * will understand and process it correctly. 755 * If the necessary size for the conversion is greater than the @p flow_size, 756 * nothing is written in @p flow, the validation is still performed. 757 * 758 * @param[in] item 759 * Item specification. 760 * @param[in, out] flow 761 * Pointer to flow structure. 762 * @param[in] flow_size 763 * Size in bytes of the available space in @p flow, if too small, nothing is 764 * written. 765 * @param[out] error 766 * Pointer to error structure. 767 * 768 * @return 769 * On success the number of bytes consumed/necessary, if the returned value 770 * is lesser or equal to @p flow_size, the @p item has fully been converted, 771 * otherwise another call with this returned memory size should be done. 772 * On error, a negative errno value is returned and rte_errno is set. 773 */ 774 static int 775 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow, 776 const size_t flow_size, struct rte_flow_error *error) 777 { 778 const struct rte_flow_item_eth *spec = item->spec; 779 const struct rte_flow_item_eth *mask = item->mask; 780 const struct rte_flow_item_eth nic_mask = { 781 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 782 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", 783 .type = RTE_BE16(0xffff), 784 }; 785 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 786 const unsigned int size = sizeof(struct ibv_flow_spec_eth); 787 struct ibv_flow_spec_eth eth = { 788 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 789 .size = size, 790 }; 791 int ret; 792 793 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 794 MLX5_FLOW_LAYER_OUTER_L2)) 795 return rte_flow_error_set(error, ENOTSUP, 796 RTE_FLOW_ERROR_TYPE_ITEM, 797 item, 798 "L2 layers already configured"); 799 if (!mask) 800 mask = &rte_flow_item_eth_mask; 801 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 802 (const uint8_t *)&nic_mask, 803 sizeof(struct rte_flow_item_eth), 804 error); 805 if (ret) 806 return ret; 807 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 808 MLX5_FLOW_LAYER_OUTER_L2; 809 if (size > flow_size) 810 return size; 811 if (spec) { 812 unsigned int i; 813 814 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN); 815 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN); 816 eth.val.ether_type = spec->type; 817 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN); 818 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN); 819 eth.mask.ether_type = mask->type; 820 /* Remove unwanted bits from values. */ 821 for (i = 0; i < ETHER_ADDR_LEN; ++i) { 822 eth.val.dst_mac[i] &= eth.mask.dst_mac[i]; 823 eth.val.src_mac[i] &= eth.mask.src_mac[i]; 824 } 825 eth.val.ether_type &= eth.mask.ether_type; 826 } 827 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 828 mlx5_flow_spec_verbs_add(flow, ð, size); 829 return size; 830 } 831 832 /** 833 * Update the VLAN tag in the Verbs Ethernet specification. 834 * 835 * @param[in, out] attr 836 * Pointer to Verbs attributes structure. 837 * @param[in] eth 838 * Verbs structure containing the VLAN information to copy. 839 */ 840 static void 841 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr, 842 struct ibv_flow_spec_eth *eth) 843 { 844 unsigned int i; 845 const enum ibv_flow_spec_type search = eth->type; 846 struct ibv_spec_header *hdr = (struct ibv_spec_header *) 847 ((uint8_t *)attr + sizeof(struct ibv_flow_attr)); 848 849 for (i = 0; i != attr->num_of_specs; ++i) { 850 if (hdr->type == search) { 851 struct ibv_flow_spec_eth *e = 852 (struct ibv_flow_spec_eth *)hdr; 853 854 e->val.vlan_tag = eth->val.vlan_tag; 855 e->mask.vlan_tag = eth->mask.vlan_tag; 856 e->val.ether_type = eth->val.ether_type; 857 e->mask.ether_type = eth->mask.ether_type; 858 break; 859 } 860 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size); 861 } 862 } 863 864 /** 865 * Convert the @p item into @p flow (or by updating the already present 866 * Ethernet Verbs) specification after ensuring the NIC will understand and 867 * process it correctly. 868 * If the necessary size for the conversion is greater than the @p flow_size, 869 * nothing is written in @p flow, the validation is still performed. 870 * 871 * @param[in] item 872 * Item specification. 873 * @param[in, out] flow 874 * Pointer to flow structure. 875 * @param[in] flow_size 876 * Size in bytes of the available space in @p flow, if too small, nothing is 877 * written. 878 * @param[out] error 879 * Pointer to error structure. 880 * 881 * @return 882 * On success the number of bytes consumed/necessary, if the returned value 883 * is lesser or equal to @p flow_size, the @p item has fully been converted, 884 * otherwise another call with this returned memory size should be done. 885 * On error, a negative errno value is returned and rte_errno is set. 886 */ 887 static int 888 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow, 889 const size_t flow_size, struct rte_flow_error *error) 890 { 891 const struct rte_flow_item_vlan *spec = item->spec; 892 const struct rte_flow_item_vlan *mask = item->mask; 893 const struct rte_flow_item_vlan nic_mask = { 894 .tci = RTE_BE16(0x0fff), 895 .inner_type = RTE_BE16(0xffff), 896 }; 897 unsigned int size = sizeof(struct ibv_flow_spec_eth); 898 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 899 struct ibv_flow_spec_eth eth = { 900 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 901 .size = size, 902 }; 903 int ret; 904 const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | 905 MLX5_FLOW_LAYER_INNER_L4) : 906 (MLX5_FLOW_LAYER_OUTER_L3 | MLX5_FLOW_LAYER_OUTER_L4); 907 const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : 908 MLX5_FLOW_LAYER_OUTER_VLAN; 909 const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 910 MLX5_FLOW_LAYER_OUTER_L2; 911 912 if (flow->layers & vlanm) 913 return rte_flow_error_set(error, ENOTSUP, 914 RTE_FLOW_ERROR_TYPE_ITEM, 915 item, 916 "VLAN layer already configured"); 917 else if ((flow->layers & l34m) != 0) 918 return rte_flow_error_set(error, ENOTSUP, 919 RTE_FLOW_ERROR_TYPE_ITEM, 920 item, 921 "L2 layer cannot follow L3/L4 layer"); 922 if (!mask) 923 mask = &rte_flow_item_vlan_mask; 924 ret = mlx5_flow_item_acceptable 925 (item, (const uint8_t *)mask, 926 (const uint8_t *)&nic_mask, 927 sizeof(struct rte_flow_item_vlan), error); 928 if (ret) 929 return ret; 930 if (spec) { 931 eth.val.vlan_tag = spec->tci; 932 eth.mask.vlan_tag = mask->tci; 933 eth.val.vlan_tag &= eth.mask.vlan_tag; 934 eth.val.ether_type = spec->inner_type; 935 eth.mask.ether_type = mask->inner_type; 936 eth.val.ether_type &= eth.mask.ether_type; 937 } 938 /* 939 * From verbs perspective an empty VLAN is equivalent 940 * to a packet without VLAN layer. 941 */ 942 if (!eth.mask.vlan_tag) 943 return rte_flow_error_set(error, EINVAL, 944 RTE_FLOW_ERROR_TYPE_ITEM_SPEC, 945 item->spec, 946 "VLAN cannot be empty"); 947 if (!(flow->layers & l2m)) { 948 if (size <= flow_size) { 949 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 950 mlx5_flow_spec_verbs_add(flow, ð, size); 951 } 952 } else { 953 if (flow->cur_verbs) 954 mlx5_flow_item_vlan_update(flow->cur_verbs->attr, 955 ð); 956 size = 0; /* Only an update is done in eth specification. */ 957 } 958 flow->layers |= tunnel ? 959 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) : 960 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN); 961 return size; 962 } 963 964 /** 965 * Convert the @p item into a Verbs specification after ensuring the NIC 966 * will understand and process it correctly. 967 * If the necessary size for the conversion is greater than the @p flow_size, 968 * nothing is written in @p flow, the validation is still performed. 969 * 970 * @param[in] item 971 * Item specification. 972 * @param[in, out] flow 973 * Pointer to flow structure. 974 * @param[in] flow_size 975 * Size in bytes of the available space in @p flow, if too small, nothing is 976 * written. 977 * @param[out] error 978 * Pointer to error structure. 979 * 980 * @return 981 * On success the number of bytes consumed/necessary, if the returned value 982 * is lesser or equal to @p flow_size, the @p item has fully been converted, 983 * otherwise another call with this returned memory size should be done. 984 * On error, a negative errno value is returned and rte_errno is set. 985 */ 986 static int 987 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow, 988 const size_t flow_size, struct rte_flow_error *error) 989 { 990 const struct rte_flow_item_ipv4 *spec = item->spec; 991 const struct rte_flow_item_ipv4 *mask = item->mask; 992 const struct rte_flow_item_ipv4 nic_mask = { 993 .hdr = { 994 .src_addr = RTE_BE32(0xffffffff), 995 .dst_addr = RTE_BE32(0xffffffff), 996 .type_of_service = 0xff, 997 .next_proto_id = 0xff, 998 }, 999 }; 1000 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 1001 unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext); 1002 struct ibv_flow_spec_ipv4_ext ipv4 = { 1003 .type = IBV_FLOW_SPEC_IPV4_EXT | 1004 (tunnel ? IBV_FLOW_SPEC_INNER : 0), 1005 .size = size, 1006 }; 1007 int ret; 1008 1009 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1010 MLX5_FLOW_LAYER_OUTER_L3)) 1011 return rte_flow_error_set(error, ENOTSUP, 1012 RTE_FLOW_ERROR_TYPE_ITEM, 1013 item, 1014 "multiple L3 layers not supported"); 1015 else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1016 MLX5_FLOW_LAYER_OUTER_L4)) 1017 return rte_flow_error_set(error, ENOTSUP, 1018 RTE_FLOW_ERROR_TYPE_ITEM, 1019 item, 1020 "L3 cannot follow an L4 layer."); 1021 if (!mask) 1022 mask = &rte_flow_item_ipv4_mask; 1023 ret = mlx5_flow_item_acceptable 1024 (item, (const uint8_t *)mask, 1025 (const uint8_t *)&nic_mask, 1026 sizeof(struct rte_flow_item_ipv4), error); 1027 if (ret < 0) 1028 return ret; 1029 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : 1030 MLX5_FLOW_LAYER_OUTER_L3_IPV4; 1031 if (spec) { 1032 ipv4.val = (struct ibv_flow_ipv4_ext_filter){ 1033 .src_ip = spec->hdr.src_addr, 1034 .dst_ip = spec->hdr.dst_addr, 1035 .proto = spec->hdr.next_proto_id, 1036 .tos = spec->hdr.type_of_service, 1037 }; 1038 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){ 1039 .src_ip = mask->hdr.src_addr, 1040 .dst_ip = mask->hdr.dst_addr, 1041 .proto = mask->hdr.next_proto_id, 1042 .tos = mask->hdr.type_of_service, 1043 }; 1044 /* Remove unwanted bits from values. */ 1045 ipv4.val.src_ip &= ipv4.mask.src_ip; 1046 ipv4.val.dst_ip &= ipv4.mask.dst_ip; 1047 ipv4.val.proto &= ipv4.mask.proto; 1048 ipv4.val.tos &= ipv4.mask.tos; 1049 } 1050 flow->l3_protocol_en = !!ipv4.mask.proto; 1051 flow->l3_protocol = ipv4.val.proto; 1052 if (size <= flow_size) { 1053 mlx5_flow_verbs_hashfields_adjust 1054 (flow, tunnel, 1055 (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 1056 ETH_RSS_NONFRAG_IPV4_TCP | 1057 ETH_RSS_NONFRAG_IPV4_UDP | 1058 ETH_RSS_NONFRAG_IPV4_OTHER), 1059 (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4)); 1060 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3; 1061 mlx5_flow_spec_verbs_add(flow, &ipv4, size); 1062 } 1063 return size; 1064 } 1065 1066 /** 1067 * Convert the @p item into a Verbs specification after ensuring the NIC 1068 * will understand and process it correctly. 1069 * If the necessary size for the conversion is greater than the @p flow_size, 1070 * nothing is written in @p flow, the validation is still performed. 1071 * 1072 * @param[in] item 1073 * Item specification. 1074 * @param[in, out] flow 1075 * Pointer to flow structure. 1076 * @param[in] flow_size 1077 * Size in bytes of the available space in @p flow, if too small, nothing is 1078 * written. 1079 * @param[out] error 1080 * Pointer to error structure. 1081 * 1082 * @return 1083 * On success the number of bytes consumed/necessary, if the returned value 1084 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1085 * otherwise another call with this returned memory size should be done. 1086 * On error, a negative errno value is returned and rte_errno is set. 1087 */ 1088 static int 1089 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow, 1090 const size_t flow_size, struct rte_flow_error *error) 1091 { 1092 const struct rte_flow_item_ipv6 *spec = item->spec; 1093 const struct rte_flow_item_ipv6 *mask = item->mask; 1094 const struct rte_flow_item_ipv6 nic_mask = { 1095 .hdr = { 1096 .src_addr = 1097 "\xff\xff\xff\xff\xff\xff\xff\xff" 1098 "\xff\xff\xff\xff\xff\xff\xff\xff", 1099 .dst_addr = 1100 "\xff\xff\xff\xff\xff\xff\xff\xff" 1101 "\xff\xff\xff\xff\xff\xff\xff\xff", 1102 .vtc_flow = RTE_BE32(0xffffffff), 1103 .proto = 0xff, 1104 .hop_limits = 0xff, 1105 }, 1106 }; 1107 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 1108 unsigned int size = sizeof(struct ibv_flow_spec_ipv6); 1109 struct ibv_flow_spec_ipv6 ipv6 = { 1110 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 1111 .size = size, 1112 }; 1113 int ret; 1114 1115 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1116 MLX5_FLOW_LAYER_OUTER_L3)) 1117 return rte_flow_error_set(error, ENOTSUP, 1118 RTE_FLOW_ERROR_TYPE_ITEM, 1119 item, 1120 "multiple L3 layers not supported"); 1121 else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1122 MLX5_FLOW_LAYER_OUTER_L4)) 1123 return rte_flow_error_set(error, ENOTSUP, 1124 RTE_FLOW_ERROR_TYPE_ITEM, 1125 item, 1126 "L3 cannot follow an L4 layer."); 1127 /* 1128 * IPv6 is not recognised by the NIC inside a GRE tunnel. 1129 * Such support has to be disabled as the rule will be 1130 * accepted. Issue reproduced with Mellanox OFED 4.3-3.0.2.1 and 1131 * Mellanox OFED 4.4-1.0.0.0. 1132 */ 1133 if (tunnel && flow->layers & MLX5_FLOW_LAYER_GRE) 1134 return rte_flow_error_set(error, ENOTSUP, 1135 RTE_FLOW_ERROR_TYPE_ITEM, 1136 item, 1137 "IPv6 inside a GRE tunnel is" 1138 " not recognised."); 1139 if (!mask) 1140 mask = &rte_flow_item_ipv6_mask; 1141 ret = mlx5_flow_item_acceptable 1142 (item, (const uint8_t *)mask, 1143 (const uint8_t *)&nic_mask, 1144 sizeof(struct rte_flow_item_ipv6), error); 1145 if (ret < 0) 1146 return ret; 1147 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : 1148 MLX5_FLOW_LAYER_OUTER_L3_IPV6; 1149 if (spec) { 1150 unsigned int i; 1151 uint32_t vtc_flow_val; 1152 uint32_t vtc_flow_mask; 1153 1154 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr, 1155 RTE_DIM(ipv6.val.src_ip)); 1156 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr, 1157 RTE_DIM(ipv6.val.dst_ip)); 1158 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr, 1159 RTE_DIM(ipv6.mask.src_ip)); 1160 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr, 1161 RTE_DIM(ipv6.mask.dst_ip)); 1162 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow); 1163 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow); 1164 ipv6.val.flow_label = 1165 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >> 1166 IPV6_HDR_FL_SHIFT); 1167 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >> 1168 IPV6_HDR_TC_SHIFT; 1169 ipv6.val.next_hdr = spec->hdr.proto; 1170 ipv6.val.hop_limit = spec->hdr.hop_limits; 1171 ipv6.mask.flow_label = 1172 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >> 1173 IPV6_HDR_FL_SHIFT); 1174 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >> 1175 IPV6_HDR_TC_SHIFT; 1176 ipv6.mask.next_hdr = mask->hdr.proto; 1177 ipv6.mask.hop_limit = mask->hdr.hop_limits; 1178 /* Remove unwanted bits from values. */ 1179 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) { 1180 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i]; 1181 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i]; 1182 } 1183 ipv6.val.flow_label &= ipv6.mask.flow_label; 1184 ipv6.val.traffic_class &= ipv6.mask.traffic_class; 1185 ipv6.val.next_hdr &= ipv6.mask.next_hdr; 1186 ipv6.val.hop_limit &= ipv6.mask.hop_limit; 1187 } 1188 flow->l3_protocol_en = !!ipv6.mask.next_hdr; 1189 flow->l3_protocol = ipv6.val.next_hdr; 1190 if (size <= flow_size) { 1191 mlx5_flow_verbs_hashfields_adjust 1192 (flow, tunnel, 1193 (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 1194 ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_NONFRAG_IPV6_UDP | 1195 ETH_RSS_NONFRAG_IPV6_OTHER | ETH_RSS_IPV6_EX | 1196 ETH_RSS_IPV6_TCP_EX | ETH_RSS_IPV6_UDP_EX), 1197 (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6)); 1198 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3; 1199 mlx5_flow_spec_verbs_add(flow, &ipv6, size); 1200 } 1201 return size; 1202 } 1203 1204 /** 1205 * Convert the @p item into a Verbs specification after ensuring the NIC 1206 * will understand and process it correctly. 1207 * If the necessary size for the conversion is greater than the @p flow_size, 1208 * nothing is written in @p flow, the validation is still performed. 1209 * 1210 * @param[in] item 1211 * Item specification. 1212 * @param[in, out] flow 1213 * Pointer to flow structure. 1214 * @param[in] flow_size 1215 * Size in bytes of the available space in @p flow, if too small, nothing is 1216 * written. 1217 * @param[out] error 1218 * Pointer to error structure. 1219 * 1220 * @return 1221 * On success the number of bytes consumed/necessary, if the returned value 1222 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1223 * otherwise another call with this returned memory size should be done. 1224 * On error, a negative errno value is returned and rte_errno is set. 1225 */ 1226 static int 1227 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow, 1228 const size_t flow_size, struct rte_flow_error *error) 1229 { 1230 const struct rte_flow_item_udp *spec = item->spec; 1231 const struct rte_flow_item_udp *mask = item->mask; 1232 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 1233 unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp); 1234 struct ibv_flow_spec_tcp_udp udp = { 1235 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 1236 .size = size, 1237 }; 1238 int ret; 1239 1240 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP) 1241 return rte_flow_error_set(error, ENOTSUP, 1242 RTE_FLOW_ERROR_TYPE_ITEM, 1243 item, 1244 "protocol filtering not compatible" 1245 " with UDP layer"); 1246 if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1247 MLX5_FLOW_LAYER_OUTER_L3))) 1248 return rte_flow_error_set(error, ENOTSUP, 1249 RTE_FLOW_ERROR_TYPE_ITEM, 1250 item, 1251 "L3 is mandatory to filter" 1252 " on L4"); 1253 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1254 MLX5_FLOW_LAYER_OUTER_L4)) 1255 return rte_flow_error_set(error, ENOTSUP, 1256 RTE_FLOW_ERROR_TYPE_ITEM, 1257 item, 1258 "L4 layer is already" 1259 " present"); 1260 if (!mask) 1261 mask = &rte_flow_item_udp_mask; 1262 ret = mlx5_flow_item_acceptable 1263 (item, (const uint8_t *)mask, 1264 (const uint8_t *)&rte_flow_item_udp_mask, 1265 sizeof(struct rte_flow_item_udp), error); 1266 if (ret < 0) 1267 return ret; 1268 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP : 1269 MLX5_FLOW_LAYER_OUTER_L4_UDP; 1270 if (spec) { 1271 udp.val.dst_port = spec->hdr.dst_port; 1272 udp.val.src_port = spec->hdr.src_port; 1273 udp.mask.dst_port = mask->hdr.dst_port; 1274 udp.mask.src_port = mask->hdr.src_port; 1275 /* Remove unwanted bits from values. */ 1276 udp.val.src_port &= udp.mask.src_port; 1277 udp.val.dst_port &= udp.mask.dst_port; 1278 } 1279 if (size <= flow_size) { 1280 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_UDP, 1281 (IBV_RX_HASH_SRC_PORT_UDP | 1282 IBV_RX_HASH_DST_PORT_UDP)); 1283 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4; 1284 mlx5_flow_spec_verbs_add(flow, &udp, size); 1285 } 1286 return size; 1287 } 1288 1289 /** 1290 * Convert the @p item into a Verbs specification after ensuring the NIC 1291 * will understand and process it correctly. 1292 * If the necessary size for the conversion is greater than the @p flow_size, 1293 * nothing is written in @p flow, the validation is still performed. 1294 * 1295 * @param[in] item 1296 * Item specification. 1297 * @param[in, out] flow 1298 * Pointer to flow structure. 1299 * @param[in] flow_size 1300 * Size in bytes of the available space in @p flow, if too small, nothing is 1301 * written. 1302 * @param[out] error 1303 * Pointer to error structure. 1304 * 1305 * @return 1306 * On success the number of bytes consumed/necessary, if the returned value 1307 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1308 * otherwise another call with this returned memory size should be done. 1309 * On error, a negative errno value is returned and rte_errno is set. 1310 */ 1311 static int 1312 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow, 1313 const size_t flow_size, struct rte_flow_error *error) 1314 { 1315 const struct rte_flow_item_tcp *spec = item->spec; 1316 const struct rte_flow_item_tcp *mask = item->mask; 1317 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 1318 unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp); 1319 struct ibv_flow_spec_tcp_udp tcp = { 1320 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 1321 .size = size, 1322 }; 1323 int ret; 1324 1325 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP) 1326 return rte_flow_error_set(error, ENOTSUP, 1327 RTE_FLOW_ERROR_TYPE_ITEM, 1328 item, 1329 "protocol filtering not compatible" 1330 " with TCP layer"); 1331 if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1332 MLX5_FLOW_LAYER_OUTER_L3))) 1333 return rte_flow_error_set(error, ENOTSUP, 1334 RTE_FLOW_ERROR_TYPE_ITEM, 1335 item, 1336 "L3 is mandatory to filter on L4"); 1337 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1338 MLX5_FLOW_LAYER_OUTER_L4)) 1339 return rte_flow_error_set(error, ENOTSUP, 1340 RTE_FLOW_ERROR_TYPE_ITEM, 1341 item, 1342 "L4 layer is already present"); 1343 if (!mask) 1344 mask = &rte_flow_item_tcp_mask; 1345 ret = mlx5_flow_item_acceptable 1346 (item, (const uint8_t *)mask, 1347 (const uint8_t *)&rte_flow_item_tcp_mask, 1348 sizeof(struct rte_flow_item_tcp), error); 1349 if (ret < 0) 1350 return ret; 1351 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP : 1352 MLX5_FLOW_LAYER_OUTER_L4_TCP; 1353 if (spec) { 1354 tcp.val.dst_port = spec->hdr.dst_port; 1355 tcp.val.src_port = spec->hdr.src_port; 1356 tcp.mask.dst_port = mask->hdr.dst_port; 1357 tcp.mask.src_port = mask->hdr.src_port; 1358 /* Remove unwanted bits from values. */ 1359 tcp.val.src_port &= tcp.mask.src_port; 1360 tcp.val.dst_port &= tcp.mask.dst_port; 1361 } 1362 if (size <= flow_size) { 1363 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_TCP, 1364 (IBV_RX_HASH_SRC_PORT_TCP | 1365 IBV_RX_HASH_DST_PORT_TCP)); 1366 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4; 1367 mlx5_flow_spec_verbs_add(flow, &tcp, size); 1368 } 1369 return size; 1370 } 1371 1372 /** 1373 * Convert the @p item into a Verbs specification after ensuring the NIC 1374 * will understand and process it correctly. 1375 * If the necessary size for the conversion is greater than the @p flow_size, 1376 * nothing is written in @p flow, the validation is still performed. 1377 * 1378 * @param[in] item 1379 * Item specification. 1380 * @param[in, out] flow 1381 * Pointer to flow structure. 1382 * @param[in] flow_size 1383 * Size in bytes of the available space in @p flow, if too small, nothing is 1384 * written. 1385 * @param[out] error 1386 * Pointer to error structure. 1387 * 1388 * @return 1389 * On success the number of bytes consumed/necessary, if the returned value 1390 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1391 * otherwise another call with this returned memory size should be done. 1392 * On error, a negative errno value is returned and rte_errno is set. 1393 */ 1394 static int 1395 mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow, 1396 const size_t flow_size, struct rte_flow_error *error) 1397 { 1398 const struct rte_flow_item_vxlan *spec = item->spec; 1399 const struct rte_flow_item_vxlan *mask = item->mask; 1400 unsigned int size = sizeof(struct ibv_flow_spec_tunnel); 1401 struct ibv_flow_spec_tunnel vxlan = { 1402 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL, 1403 .size = size, 1404 }; 1405 int ret; 1406 union vni { 1407 uint32_t vlan_id; 1408 uint8_t vni[4]; 1409 } id = { .vlan_id = 0, }; 1410 1411 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL) 1412 return rte_flow_error_set(error, ENOTSUP, 1413 RTE_FLOW_ERROR_TYPE_ITEM, 1414 item, 1415 "a tunnel is already present"); 1416 /* 1417 * Verify only UDPv4 is present as defined in 1418 * https://tools.ietf.org/html/rfc7348 1419 */ 1420 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 1421 return rte_flow_error_set(error, ENOTSUP, 1422 RTE_FLOW_ERROR_TYPE_ITEM, 1423 item, 1424 "no outer UDP layer found"); 1425 if (!mask) 1426 mask = &rte_flow_item_vxlan_mask; 1427 ret = mlx5_flow_item_acceptable 1428 (item, (const uint8_t *)mask, 1429 (const uint8_t *)&rte_flow_item_vxlan_mask, 1430 sizeof(struct rte_flow_item_vxlan), error); 1431 if (ret < 0) 1432 return ret; 1433 if (spec) { 1434 memcpy(&id.vni[1], spec->vni, 3); 1435 vxlan.val.tunnel_id = id.vlan_id; 1436 memcpy(&id.vni[1], mask->vni, 3); 1437 vxlan.mask.tunnel_id = id.vlan_id; 1438 /* Remove unwanted bits from values. */ 1439 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id; 1440 } 1441 /* 1442 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if 1443 * only this layer is defined in the Verbs specification it is 1444 * interpreted as wildcard and all packets will match this 1445 * rule, if it follows a full stack layer (ex: eth / ipv4 / 1446 * udp), all packets matching the layers before will also 1447 * match this rule. To avoid such situation, VNI 0 is 1448 * currently refused. 1449 */ 1450 if (!vxlan.val.tunnel_id) 1451 return rte_flow_error_set(error, EINVAL, 1452 RTE_FLOW_ERROR_TYPE_ITEM, 1453 item, 1454 "VXLAN vni cannot be 0"); 1455 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER)) 1456 return rte_flow_error_set(error, EINVAL, 1457 RTE_FLOW_ERROR_TYPE_ITEM, 1458 item, 1459 "VXLAN tunnel must be fully defined"); 1460 if (size <= flow_size) { 1461 mlx5_flow_spec_verbs_add(flow, &vxlan, size); 1462 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1463 } 1464 flow->layers |= MLX5_FLOW_LAYER_VXLAN; 1465 return size; 1466 } 1467 1468 /** 1469 * Convert the @p item into a Verbs specification after ensuring the NIC 1470 * will understand and process it correctly. 1471 * If the necessary size for the conversion is greater than the @p flow_size, 1472 * nothing is written in @p flow, the validation is still performed. 1473 * 1474 * @param dev 1475 * Pointer to Ethernet device. 1476 * @param[in] item 1477 * Item specification. 1478 * @param[in, out] flow 1479 * Pointer to flow structure. 1480 * @param[in] flow_size 1481 * Size in bytes of the available space in @p flow, if too small, nothing is 1482 * written. 1483 * @param[out] error 1484 * Pointer to error structure. 1485 * 1486 * @return 1487 * On success the number of bytes consumed/necessary, if the returned value 1488 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1489 * otherwise another call with this returned memory size should be done. 1490 * On error, a negative errno value is returned and rte_errno is set. 1491 */ 1492 static int 1493 mlx5_flow_item_vxlan_gpe(struct rte_eth_dev *dev, 1494 const struct rte_flow_item *item, 1495 struct rte_flow *flow, const size_t flow_size, 1496 struct rte_flow_error *error) 1497 { 1498 const struct rte_flow_item_vxlan_gpe *spec = item->spec; 1499 const struct rte_flow_item_vxlan_gpe *mask = item->mask; 1500 unsigned int size = sizeof(struct ibv_flow_spec_tunnel); 1501 struct ibv_flow_spec_tunnel vxlan_gpe = { 1502 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL, 1503 .size = size, 1504 }; 1505 int ret; 1506 union vni { 1507 uint32_t vlan_id; 1508 uint8_t vni[4]; 1509 } id = { .vlan_id = 0, }; 1510 1511 if (!((struct priv *)dev->data->dev_private)->config.l3_vxlan_en) 1512 return rte_flow_error_set(error, ENOTSUP, 1513 RTE_FLOW_ERROR_TYPE_ITEM, 1514 item, 1515 "L3 VXLAN is not enabled by device" 1516 " parameter and/or not configured in" 1517 " firmware"); 1518 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL) 1519 return rte_flow_error_set(error, ENOTSUP, 1520 RTE_FLOW_ERROR_TYPE_ITEM, 1521 item, 1522 "a tunnel is already present"); 1523 /* 1524 * Verify only UDPv4 is present as defined in 1525 * https://tools.ietf.org/html/rfc7348 1526 */ 1527 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 1528 return rte_flow_error_set(error, ENOTSUP, 1529 RTE_FLOW_ERROR_TYPE_ITEM, 1530 item, 1531 "no outer UDP layer found"); 1532 if (!mask) 1533 mask = &rte_flow_item_vxlan_gpe_mask; 1534 ret = mlx5_flow_item_acceptable 1535 (item, (const uint8_t *)mask, 1536 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask, 1537 sizeof(struct rte_flow_item_vxlan_gpe), error); 1538 if (ret < 0) 1539 return ret; 1540 if (spec) { 1541 memcpy(&id.vni[1], spec->vni, 3); 1542 vxlan_gpe.val.tunnel_id = id.vlan_id; 1543 memcpy(&id.vni[1], mask->vni, 3); 1544 vxlan_gpe.mask.tunnel_id = id.vlan_id; 1545 if (spec->protocol) 1546 return rte_flow_error_set 1547 (error, EINVAL, 1548 RTE_FLOW_ERROR_TYPE_ITEM, 1549 item, 1550 "VxLAN-GPE protocol not supported"); 1551 /* Remove unwanted bits from values. */ 1552 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id; 1553 } 1554 /* 1555 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this 1556 * layer is defined in the Verbs specification it is interpreted as 1557 * wildcard and all packets will match this rule, if it follows a full 1558 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers 1559 * before will also match this rule. To avoid such situation, VNI 0 1560 * is currently refused. 1561 */ 1562 if (!vxlan_gpe.val.tunnel_id) 1563 return rte_flow_error_set(error, EINVAL, 1564 RTE_FLOW_ERROR_TYPE_ITEM, 1565 item, 1566 "VXLAN-GPE vni cannot be 0"); 1567 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER)) 1568 return rte_flow_error_set(error, EINVAL, 1569 RTE_FLOW_ERROR_TYPE_ITEM, 1570 item, 1571 "VXLAN-GPE tunnel must be fully" 1572 " defined"); 1573 if (size <= flow_size) { 1574 mlx5_flow_spec_verbs_add(flow, &vxlan_gpe, size); 1575 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1576 } 1577 flow->layers |= MLX5_FLOW_LAYER_VXLAN_GPE; 1578 return size; 1579 } 1580 1581 /** 1582 * Update the protocol in Verbs IPv4/IPv6 spec. 1583 * 1584 * @param[in, out] attr 1585 * Pointer to Verbs attributes structure. 1586 * @param[in] search 1587 * Specification type to search in order to update the IP protocol. 1588 * @param[in] protocol 1589 * Protocol value to set if none is present in the specification. 1590 */ 1591 static void 1592 mlx5_flow_item_gre_ip_protocol_update(struct ibv_flow_attr *attr, 1593 enum ibv_flow_spec_type search, 1594 uint8_t protocol) 1595 { 1596 unsigned int i; 1597 struct ibv_spec_header *hdr = (struct ibv_spec_header *) 1598 ((uint8_t *)attr + sizeof(struct ibv_flow_attr)); 1599 1600 if (!attr) 1601 return; 1602 for (i = 0; i != attr->num_of_specs; ++i) { 1603 if (hdr->type == search) { 1604 union { 1605 struct ibv_flow_spec_ipv4_ext *ipv4; 1606 struct ibv_flow_spec_ipv6 *ipv6; 1607 } ip; 1608 1609 switch (search) { 1610 case IBV_FLOW_SPEC_IPV4_EXT: 1611 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr; 1612 if (!ip.ipv4->val.proto) { 1613 ip.ipv4->val.proto = protocol; 1614 ip.ipv4->mask.proto = 0xff; 1615 } 1616 break; 1617 case IBV_FLOW_SPEC_IPV6: 1618 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr; 1619 if (!ip.ipv6->val.next_hdr) { 1620 ip.ipv6->val.next_hdr = protocol; 1621 ip.ipv6->mask.next_hdr = 0xff; 1622 } 1623 break; 1624 default: 1625 break; 1626 } 1627 break; 1628 } 1629 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size); 1630 } 1631 } 1632 1633 /** 1634 * Convert the @p item into a Verbs specification after ensuring the NIC 1635 * will understand and process it correctly. 1636 * It will also update the previous L3 layer with the protocol value matching 1637 * the GRE. 1638 * If the necessary size for the conversion is greater than the @p flow_size, 1639 * nothing is written in @p flow, the validation is still performed. 1640 * 1641 * @param dev 1642 * Pointer to Ethernet device. 1643 * @param[in] item 1644 * Item specification. 1645 * @param[in, out] flow 1646 * Pointer to flow structure. 1647 * @param[in] flow_size 1648 * Size in bytes of the available space in @p flow, if too small, nothing is 1649 * written. 1650 * @param[out] error 1651 * Pointer to error structure. 1652 * 1653 * @return 1654 * On success the number of bytes consumed/necessary, if the returned value 1655 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1656 * otherwise another call with this returned memory size should be done. 1657 * On error, a negative errno value is returned and rte_errno is set. 1658 */ 1659 static int 1660 mlx5_flow_item_gre(const struct rte_flow_item *item, 1661 struct rte_flow *flow, const size_t flow_size, 1662 struct rte_flow_error *error) 1663 { 1664 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 1665 const struct rte_flow_item_gre *spec = item->spec; 1666 const struct rte_flow_item_gre *mask = item->mask; 1667 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 1668 unsigned int size = sizeof(struct ibv_flow_spec_gre); 1669 struct ibv_flow_spec_gre tunnel = { 1670 .type = IBV_FLOW_SPEC_GRE, 1671 .size = size, 1672 }; 1673 #else 1674 unsigned int size = sizeof(struct ibv_flow_spec_tunnel); 1675 struct ibv_flow_spec_tunnel tunnel = { 1676 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL, 1677 .size = size, 1678 }; 1679 #endif 1680 int ret; 1681 1682 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_GRE) 1683 return rte_flow_error_set(error, ENOTSUP, 1684 RTE_FLOW_ERROR_TYPE_ITEM, 1685 item, 1686 "protocol filtering not compatible" 1687 " with this GRE layer"); 1688 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL) 1689 return rte_flow_error_set(error, ENOTSUP, 1690 RTE_FLOW_ERROR_TYPE_ITEM, 1691 item, 1692 "a tunnel is already present"); 1693 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3)) 1694 return rte_flow_error_set(error, ENOTSUP, 1695 RTE_FLOW_ERROR_TYPE_ITEM, 1696 item, 1697 "L3 Layer is missing"); 1698 if (!mask) 1699 mask = &rte_flow_item_gre_mask; 1700 ret = mlx5_flow_item_acceptable 1701 (item, (const uint8_t *)mask, 1702 (const uint8_t *)&rte_flow_item_gre_mask, 1703 sizeof(struct rte_flow_item_gre), error); 1704 if (ret < 0) 1705 return ret; 1706 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 1707 if (spec) { 1708 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver; 1709 tunnel.val.protocol = spec->protocol; 1710 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver; 1711 tunnel.mask.protocol = mask->protocol; 1712 /* Remove unwanted bits from values. */ 1713 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver; 1714 tunnel.val.protocol &= tunnel.mask.protocol; 1715 tunnel.val.key &= tunnel.mask.key; 1716 } 1717 #else 1718 if (spec && (spec->protocol & mask->protocol)) 1719 return rte_flow_error_set(error, ENOTSUP, 1720 RTE_FLOW_ERROR_TYPE_ITEM, 1721 item, 1722 "without MPLS support the" 1723 " specification cannot be used for" 1724 " filtering"); 1725 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */ 1726 if (size <= flow_size) { 1727 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3_IPV4) 1728 mlx5_flow_item_gre_ip_protocol_update 1729 (verbs->attr, IBV_FLOW_SPEC_IPV4_EXT, 1730 MLX5_IP_PROTOCOL_GRE); 1731 else 1732 mlx5_flow_item_gre_ip_protocol_update 1733 (verbs->attr, IBV_FLOW_SPEC_IPV6, 1734 MLX5_IP_PROTOCOL_GRE); 1735 mlx5_flow_spec_verbs_add(flow, &tunnel, size); 1736 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1737 } 1738 flow->layers |= MLX5_FLOW_LAYER_GRE; 1739 return size; 1740 } 1741 1742 /** 1743 * Convert the @p item into a Verbs specification after ensuring the NIC 1744 * will understand and process it correctly. 1745 * If the necessary size for the conversion is greater than the @p flow_size, 1746 * nothing is written in @p flow, the validation is still performed. 1747 * 1748 * @param[in] item 1749 * Item specification. 1750 * @param[in, out] flow 1751 * Pointer to flow structure. 1752 * @param[in] flow_size 1753 * Size in bytes of the available space in @p flow, if too small, nothing is 1754 * written. 1755 * @param[out] error 1756 * Pointer to error structure. 1757 * 1758 * @return 1759 * On success the number of bytes consumed/necessary, if the returned value 1760 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1761 * otherwise another call with this returned memory size should be done. 1762 * On error, a negative errno value is returned and rte_errno is set. 1763 */ 1764 static int 1765 mlx5_flow_item_mpls(const struct rte_flow_item *item __rte_unused, 1766 struct rte_flow *flow __rte_unused, 1767 const size_t flow_size __rte_unused, 1768 struct rte_flow_error *error) 1769 { 1770 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 1771 const struct rte_flow_item_mpls *spec = item->spec; 1772 const struct rte_flow_item_mpls *mask = item->mask; 1773 unsigned int size = sizeof(struct ibv_flow_spec_mpls); 1774 struct ibv_flow_spec_mpls mpls = { 1775 .type = IBV_FLOW_SPEC_MPLS, 1776 .size = size, 1777 }; 1778 int ret; 1779 1780 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_MPLS) 1781 return rte_flow_error_set(error, ENOTSUP, 1782 RTE_FLOW_ERROR_TYPE_ITEM, 1783 item, 1784 "protocol filtering not compatible" 1785 " with MPLS layer"); 1786 /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */ 1787 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL && 1788 (flow->layers & MLX5_FLOW_LAYER_GRE) != MLX5_FLOW_LAYER_GRE) 1789 return rte_flow_error_set(error, ENOTSUP, 1790 RTE_FLOW_ERROR_TYPE_ITEM, 1791 item, 1792 "a tunnel is already" 1793 " present"); 1794 if (!mask) 1795 mask = &rte_flow_item_mpls_mask; 1796 ret = mlx5_flow_item_acceptable 1797 (item, (const uint8_t *)mask, 1798 (const uint8_t *)&rte_flow_item_mpls_mask, 1799 sizeof(struct rte_flow_item_mpls), error); 1800 if (ret < 0) 1801 return ret; 1802 if (spec) { 1803 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label)); 1804 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label)); 1805 /* Remove unwanted bits from values. */ 1806 mpls.val.label &= mpls.mask.label; 1807 } 1808 if (size <= flow_size) { 1809 mlx5_flow_spec_verbs_add(flow, &mpls, size); 1810 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1811 } 1812 flow->layers |= MLX5_FLOW_LAYER_MPLS; 1813 return size; 1814 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */ 1815 return rte_flow_error_set(error, ENOTSUP, 1816 RTE_FLOW_ERROR_TYPE_ITEM, 1817 item, 1818 "MPLS is not supported by Verbs, please" 1819 " update."); 1820 } 1821 1822 /** 1823 * Convert the @p pattern into a Verbs specifications after ensuring the NIC 1824 * will understand and process it correctly. 1825 * The conversion is performed item per item, each of them is written into 1826 * the @p flow if its size is lesser or equal to @p flow_size. 1827 * Validation and memory consumption computation are still performed until the 1828 * end of @p pattern, unless an error is encountered. 1829 * 1830 * @param[in] pattern 1831 * Flow pattern. 1832 * @param[in, out] flow 1833 * Pointer to the rte_flow structure. 1834 * @param[in] flow_size 1835 * Size in bytes of the available space in @p flow, if too small some 1836 * garbage may be present. 1837 * @param[out] error 1838 * Pointer to error structure. 1839 * 1840 * @return 1841 * On success the number of bytes consumed/necessary, if the returned value 1842 * is lesser or equal to @p flow_size, the @pattern has fully been 1843 * converted, otherwise another call with this returned memory size should 1844 * be done. 1845 * On error, a negative errno value is returned and rte_errno is set. 1846 */ 1847 static int 1848 mlx5_flow_items(struct rte_eth_dev *dev, 1849 const struct rte_flow_item pattern[], 1850 struct rte_flow *flow, const size_t flow_size, 1851 struct rte_flow_error *error) 1852 { 1853 int remain = flow_size; 1854 size_t size = 0; 1855 1856 for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) { 1857 int ret = 0; 1858 1859 switch (pattern->type) { 1860 case RTE_FLOW_ITEM_TYPE_VOID: 1861 break; 1862 case RTE_FLOW_ITEM_TYPE_ETH: 1863 ret = mlx5_flow_item_eth(pattern, flow, remain, error); 1864 break; 1865 case RTE_FLOW_ITEM_TYPE_VLAN: 1866 ret = mlx5_flow_item_vlan(pattern, flow, remain, error); 1867 break; 1868 case RTE_FLOW_ITEM_TYPE_IPV4: 1869 ret = mlx5_flow_item_ipv4(pattern, flow, remain, error); 1870 break; 1871 case RTE_FLOW_ITEM_TYPE_IPV6: 1872 ret = mlx5_flow_item_ipv6(pattern, flow, remain, error); 1873 break; 1874 case RTE_FLOW_ITEM_TYPE_UDP: 1875 ret = mlx5_flow_item_udp(pattern, flow, remain, error); 1876 break; 1877 case RTE_FLOW_ITEM_TYPE_TCP: 1878 ret = mlx5_flow_item_tcp(pattern, flow, remain, error); 1879 break; 1880 case RTE_FLOW_ITEM_TYPE_VXLAN: 1881 ret = mlx5_flow_item_vxlan(pattern, flow, remain, 1882 error); 1883 break; 1884 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: 1885 ret = mlx5_flow_item_vxlan_gpe(dev, pattern, flow, 1886 remain, error); 1887 break; 1888 case RTE_FLOW_ITEM_TYPE_GRE: 1889 ret = mlx5_flow_item_gre(pattern, flow, remain, error); 1890 break; 1891 case RTE_FLOW_ITEM_TYPE_MPLS: 1892 ret = mlx5_flow_item_mpls(pattern, flow, remain, error); 1893 break; 1894 default: 1895 return rte_flow_error_set(error, ENOTSUP, 1896 RTE_FLOW_ERROR_TYPE_ITEM, 1897 pattern, 1898 "item not supported"); 1899 } 1900 if (ret < 0) 1901 return ret; 1902 if (remain > ret) 1903 remain -= ret; 1904 else 1905 remain = 0; 1906 size += ret; 1907 } 1908 if (!flow->layers) { 1909 const struct rte_flow_item item = { 1910 .type = RTE_FLOW_ITEM_TYPE_ETH, 1911 }; 1912 1913 return mlx5_flow_item_eth(&item, flow, flow_size, error); 1914 } 1915 return size; 1916 } 1917 1918 /** 1919 * Convert the @p action into a Verbs specification after ensuring the NIC 1920 * will understand and process it correctly. 1921 * If the necessary size for the conversion is greater than the @p flow_size, 1922 * nothing is written in @p flow, the validation is still performed. 1923 * 1924 * @param[in] action 1925 * Action configuration. 1926 * @param[in, out] flow 1927 * Pointer to flow structure. 1928 * @param[in] flow_size 1929 * Size in bytes of the available space in @p flow, if too small, nothing is 1930 * written. 1931 * @param[out] error 1932 * Pointer to error structure. 1933 * 1934 * @return 1935 * On success the number of bytes consumed/necessary, if the returned value 1936 * is lesser or equal to @p flow_size, the @p action has fully been 1937 * converted, otherwise another call with this returned memory size should 1938 * be done. 1939 * On error, a negative errno value is returned and rte_errno is set. 1940 */ 1941 static int 1942 mlx5_flow_action_drop(const struct rte_flow_action *action, 1943 struct rte_flow *flow, const size_t flow_size, 1944 struct rte_flow_error *error) 1945 { 1946 unsigned int size = sizeof(struct ibv_flow_spec_action_drop); 1947 struct ibv_flow_spec_action_drop drop = { 1948 .type = IBV_FLOW_SPEC_ACTION_DROP, 1949 .size = size, 1950 }; 1951 1952 if (flow->fate) 1953 return rte_flow_error_set(error, ENOTSUP, 1954 RTE_FLOW_ERROR_TYPE_ACTION, 1955 action, 1956 "multiple fate actions are not" 1957 " supported"); 1958 if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) 1959 return rte_flow_error_set(error, ENOTSUP, 1960 RTE_FLOW_ERROR_TYPE_ACTION, 1961 action, 1962 "drop is not compatible with" 1963 " flag/mark action"); 1964 if (size < flow_size) 1965 mlx5_flow_spec_verbs_add(flow, &drop, size); 1966 flow->fate |= MLX5_FLOW_FATE_DROP; 1967 return size; 1968 } 1969 1970 /** 1971 * Convert the @p action into @p flow after ensuring the NIC will understand 1972 * and process it correctly. 1973 * 1974 * @param[in] dev 1975 * Pointer to Ethernet device structure. 1976 * @param[in] action 1977 * Action configuration. 1978 * @param[in, out] flow 1979 * Pointer to flow structure. 1980 * @param[out] error 1981 * Pointer to error structure. 1982 * 1983 * @return 1984 * 0 on success, a negative errno value otherwise and rte_errno is set. 1985 */ 1986 static int 1987 mlx5_flow_action_queue(struct rte_eth_dev *dev, 1988 const struct rte_flow_action *action, 1989 struct rte_flow *flow, 1990 struct rte_flow_error *error) 1991 { 1992 struct priv *priv = dev->data->dev_private; 1993 const struct rte_flow_action_queue *queue = action->conf; 1994 1995 if (flow->fate) 1996 return rte_flow_error_set(error, ENOTSUP, 1997 RTE_FLOW_ERROR_TYPE_ACTION, 1998 action, 1999 "multiple fate actions are not" 2000 " supported"); 2001 if (queue->index >= priv->rxqs_n) 2002 return rte_flow_error_set(error, EINVAL, 2003 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2004 &queue->index, 2005 "queue index out of range"); 2006 if (!(*priv->rxqs)[queue->index]) 2007 return rte_flow_error_set(error, EINVAL, 2008 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2009 &queue->index, 2010 "queue is not configured"); 2011 if (flow->queue) 2012 (*flow->queue)[0] = queue->index; 2013 flow->rss.queue_num = 1; 2014 flow->fate |= MLX5_FLOW_FATE_QUEUE; 2015 return 0; 2016 } 2017 2018 /** 2019 * Ensure the @p action will be understood and used correctly by the NIC. 2020 * 2021 * @param dev 2022 * Pointer to Ethernet device structure. 2023 * @param action[in] 2024 * Pointer to flow actions array. 2025 * @param flow[in, out] 2026 * Pointer to the rte_flow structure. 2027 * @param error[in, out] 2028 * Pointer to error structure. 2029 * 2030 * @return 2031 * On success @p flow->queue array and @p flow->rss are filled and valid. 2032 * On error, a negative errno value is returned and rte_errno is set. 2033 */ 2034 static int 2035 mlx5_flow_action_rss(struct rte_eth_dev *dev, 2036 const struct rte_flow_action *action, 2037 struct rte_flow *flow, 2038 struct rte_flow_error *error) 2039 { 2040 struct priv *priv = dev->data->dev_private; 2041 const struct rte_flow_action_rss *rss = action->conf; 2042 unsigned int i; 2043 2044 if (flow->fate) 2045 return rte_flow_error_set(error, ENOTSUP, 2046 RTE_FLOW_ERROR_TYPE_ACTION, 2047 action, 2048 "multiple fate actions are not" 2049 " supported"); 2050 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT && 2051 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) 2052 return rte_flow_error_set(error, ENOTSUP, 2053 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2054 &rss->func, 2055 "RSS hash function not supported"); 2056 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 2057 if (rss->level > 2) 2058 #else 2059 if (rss->level > 1) 2060 #endif 2061 return rte_flow_error_set(error, ENOTSUP, 2062 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2063 &rss->level, 2064 "tunnel RSS is not supported"); 2065 if (rss->key_len < MLX5_RSS_HASH_KEY_LEN) 2066 return rte_flow_error_set(error, ENOTSUP, 2067 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2068 &rss->key_len, 2069 "RSS hash key too small"); 2070 if (rss->key_len > MLX5_RSS_HASH_KEY_LEN) 2071 return rte_flow_error_set(error, ENOTSUP, 2072 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2073 &rss->key_len, 2074 "RSS hash key too large"); 2075 if (!rss->queue_num) 2076 return rte_flow_error_set(error, ENOTSUP, 2077 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2078 rss, 2079 "no queues were provided for RSS"); 2080 if (rss->queue_num > priv->config.ind_table_max_size) 2081 return rte_flow_error_set(error, ENOTSUP, 2082 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2083 &rss->queue_num, 2084 "number of queues too large"); 2085 if (rss->types & MLX5_RSS_HF_MASK) 2086 return rte_flow_error_set(error, ENOTSUP, 2087 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2088 &rss->types, 2089 "some RSS protocols are not" 2090 " supported"); 2091 for (i = 0; i != rss->queue_num; ++i) { 2092 if (rss->queue[i] >= priv->rxqs_n) 2093 return rte_flow_error_set 2094 (error, EINVAL, 2095 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2096 rss, 2097 "queue index out of range"); 2098 if (!(*priv->rxqs)[rss->queue[i]]) 2099 return rte_flow_error_set 2100 (error, EINVAL, 2101 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2102 &rss->queue[i], 2103 "queue is not configured"); 2104 } 2105 if (flow->queue) 2106 memcpy((*flow->queue), rss->queue, 2107 rss->queue_num * sizeof(uint16_t)); 2108 flow->rss.queue_num = rss->queue_num; 2109 memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN); 2110 flow->rss.types = rss->types; 2111 flow->rss.level = rss->level; 2112 flow->fate |= MLX5_FLOW_FATE_RSS; 2113 return 0; 2114 } 2115 2116 /** 2117 * Convert the @p action into a Verbs specification after ensuring the NIC 2118 * will understand and process it correctly. 2119 * If the necessary size for the conversion is greater than the @p flow_size, 2120 * nothing is written in @p flow, the validation is still performed. 2121 * 2122 * @param[in] action 2123 * Action configuration. 2124 * @param[in, out] flow 2125 * Pointer to flow structure. 2126 * @param[in] flow_size 2127 * Size in bytes of the available space in @p flow, if too small, nothing is 2128 * written. 2129 * @param[out] error 2130 * Pointer to error structure. 2131 * 2132 * @return 2133 * On success the number of bytes consumed/necessary, if the returned value 2134 * is lesser or equal to @p flow_size, the @p action has fully been 2135 * converted, otherwise another call with this returned memory size should 2136 * be done. 2137 * On error, a negative errno value is returned and rte_errno is set. 2138 */ 2139 static int 2140 mlx5_flow_action_flag(const struct rte_flow_action *action, 2141 struct rte_flow *flow, const size_t flow_size, 2142 struct rte_flow_error *error) 2143 { 2144 unsigned int size = sizeof(struct ibv_flow_spec_action_tag); 2145 struct ibv_flow_spec_action_tag tag = { 2146 .type = IBV_FLOW_SPEC_ACTION_TAG, 2147 .size = size, 2148 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT), 2149 }; 2150 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 2151 2152 if (flow->modifier & MLX5_FLOW_MOD_FLAG) 2153 return rte_flow_error_set(error, ENOTSUP, 2154 RTE_FLOW_ERROR_TYPE_ACTION, 2155 action, 2156 "flag action already present"); 2157 if (flow->fate & MLX5_FLOW_FATE_DROP) 2158 return rte_flow_error_set(error, ENOTSUP, 2159 RTE_FLOW_ERROR_TYPE_ACTION, 2160 action, 2161 "flag is not compatible with drop" 2162 " action"); 2163 if (flow->modifier & MLX5_FLOW_MOD_MARK) 2164 size = 0; 2165 else if (size <= flow_size && verbs) 2166 mlx5_flow_spec_verbs_add(flow, &tag, size); 2167 flow->modifier |= MLX5_FLOW_MOD_FLAG; 2168 return size; 2169 } 2170 2171 /** 2172 * Update verbs specification to modify the flag to mark. 2173 * 2174 * @param[in, out] verbs 2175 * Pointer to the mlx5_flow_verbs structure. 2176 * @param[in] mark_id 2177 * Mark identifier to replace the flag. 2178 */ 2179 static void 2180 mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id) 2181 { 2182 struct ibv_spec_header *hdr; 2183 int i; 2184 2185 if (!verbs) 2186 return; 2187 /* Update Verbs specification. */ 2188 hdr = (struct ibv_spec_header *)verbs->specs; 2189 if (!hdr) 2190 return; 2191 for (i = 0; i != verbs->attr->num_of_specs; ++i) { 2192 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) { 2193 struct ibv_flow_spec_action_tag *t = 2194 (struct ibv_flow_spec_action_tag *)hdr; 2195 2196 t->tag_id = mlx5_flow_mark_set(mark_id); 2197 } 2198 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size); 2199 } 2200 } 2201 2202 /** 2203 * Convert the @p action into @p flow (or by updating the already present 2204 * Flag Verbs specification) after ensuring the NIC will understand and 2205 * process it correctly. 2206 * If the necessary size for the conversion is greater than the @p flow_size, 2207 * nothing is written in @p flow, the validation is still performed. 2208 * 2209 * @param[in] action 2210 * Action configuration. 2211 * @param[in, out] flow 2212 * Pointer to flow structure. 2213 * @param[in] flow_size 2214 * Size in bytes of the available space in @p flow, if too small, nothing is 2215 * written. 2216 * @param[out] error 2217 * Pointer to error structure. 2218 * 2219 * @return 2220 * On success the number of bytes consumed/necessary, if the returned value 2221 * is lesser or equal to @p flow_size, the @p action has fully been 2222 * converted, otherwise another call with this returned memory size should 2223 * be done. 2224 * On error, a negative errno value is returned and rte_errno is set. 2225 */ 2226 static int 2227 mlx5_flow_action_mark(const struct rte_flow_action *action, 2228 struct rte_flow *flow, const size_t flow_size, 2229 struct rte_flow_error *error) 2230 { 2231 const struct rte_flow_action_mark *mark = action->conf; 2232 unsigned int size = sizeof(struct ibv_flow_spec_action_tag); 2233 struct ibv_flow_spec_action_tag tag = { 2234 .type = IBV_FLOW_SPEC_ACTION_TAG, 2235 .size = size, 2236 }; 2237 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 2238 2239 if (!mark) 2240 return rte_flow_error_set(error, EINVAL, 2241 RTE_FLOW_ERROR_TYPE_ACTION, 2242 action, 2243 "configuration cannot be null"); 2244 if (mark->id >= MLX5_FLOW_MARK_MAX) 2245 return rte_flow_error_set(error, EINVAL, 2246 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2247 &mark->id, 2248 "mark id must in 0 <= id < " 2249 RTE_STR(MLX5_FLOW_MARK_MAX)); 2250 if (flow->modifier & MLX5_FLOW_MOD_MARK) 2251 return rte_flow_error_set(error, ENOTSUP, 2252 RTE_FLOW_ERROR_TYPE_ACTION, 2253 action, 2254 "mark action already present"); 2255 if (flow->fate & MLX5_FLOW_FATE_DROP) 2256 return rte_flow_error_set(error, ENOTSUP, 2257 RTE_FLOW_ERROR_TYPE_ACTION, 2258 action, 2259 "mark is not compatible with drop" 2260 " action"); 2261 if (flow->modifier & MLX5_FLOW_MOD_FLAG) { 2262 mlx5_flow_verbs_mark_update(verbs, mark->id); 2263 size = 0; 2264 } else if (size <= flow_size) { 2265 tag.tag_id = mlx5_flow_mark_set(mark->id); 2266 mlx5_flow_spec_verbs_add(flow, &tag, size); 2267 } 2268 flow->modifier |= MLX5_FLOW_MOD_MARK; 2269 return size; 2270 } 2271 2272 /** 2273 * Convert the @p action into a Verbs specification after ensuring the NIC 2274 * will understand and process it correctly. 2275 * If the necessary size for the conversion is greater than the @p flow_size, 2276 * nothing is written in @p flow, the validation is still performed. 2277 * 2278 * @param action[in] 2279 * Action configuration. 2280 * @param flow[in, out] 2281 * Pointer to flow structure. 2282 * @param flow_size[in] 2283 * Size in bytes of the available space in @p flow, if too small, nothing is 2284 * written. 2285 * @param error[int, out] 2286 * Pointer to error structure. 2287 * 2288 * @return 2289 * On success the number of bytes consumed/necessary, if the returned value 2290 * is lesser or equal to @p flow_size, the @p action has fully been 2291 * converted, otherwise another call with this returned memory size should 2292 * be done. 2293 * On error, a negative errno value is returned and rte_errno is set. 2294 */ 2295 static int 2296 mlx5_flow_action_count(struct rte_eth_dev *dev, 2297 const struct rte_flow_action *action, 2298 struct rte_flow *flow, 2299 const size_t flow_size __rte_unused, 2300 struct rte_flow_error *error) 2301 { 2302 const struct rte_flow_action_count *count = action->conf; 2303 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 2304 unsigned int size = sizeof(struct ibv_flow_spec_counter_action); 2305 struct ibv_flow_spec_counter_action counter = { 2306 .type = IBV_FLOW_SPEC_ACTION_COUNT, 2307 .size = size, 2308 }; 2309 #endif 2310 2311 if (!flow->counter) { 2312 flow->counter = mlx5_flow_counter_new(dev, count->shared, 2313 count->id); 2314 if (!flow->counter) 2315 return rte_flow_error_set(error, ENOTSUP, 2316 RTE_FLOW_ERROR_TYPE_ACTION, 2317 action, 2318 "cannot get counter" 2319 " context."); 2320 } 2321 if (!((struct priv *)dev->data->dev_private)->config.flow_counter_en) 2322 return rte_flow_error_set(error, ENOTSUP, 2323 RTE_FLOW_ERROR_TYPE_ACTION, 2324 action, 2325 "flow counters are not supported."); 2326 flow->modifier |= MLX5_FLOW_MOD_COUNT; 2327 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 2328 counter.counter_set_handle = flow->counter->cs->handle; 2329 if (size <= flow_size) 2330 mlx5_flow_spec_verbs_add(flow, &counter, size); 2331 return size; 2332 #endif 2333 return 0; 2334 } 2335 2336 /** 2337 * Convert the @p action into @p flow after ensuring the NIC will understand 2338 * and process it correctly. 2339 * The conversion is performed action per action, each of them is written into 2340 * the @p flow if its size is lesser or equal to @p flow_size. 2341 * Validation and memory consumption computation are still performed until the 2342 * end of @p action, unless an error is encountered. 2343 * 2344 * @param[in] dev 2345 * Pointer to Ethernet device structure. 2346 * @param[in] actions 2347 * Pointer to flow actions array. 2348 * @param[in, out] flow 2349 * Pointer to the rte_flow structure. 2350 * @param[in] flow_size 2351 * Size in bytes of the available space in @p flow, if too small some 2352 * garbage may be present. 2353 * @param[out] error 2354 * Pointer to error structure. 2355 * 2356 * @return 2357 * On success the number of bytes consumed/necessary, if the returned value 2358 * is lesser or equal to @p flow_size, the @p actions has fully been 2359 * converted, otherwise another call with this returned memory size should 2360 * be done. 2361 * On error, a negative errno value is returned and rte_errno is set. 2362 */ 2363 static int 2364 mlx5_flow_actions(struct rte_eth_dev *dev, 2365 const struct rte_flow_action actions[], 2366 struct rte_flow *flow, const size_t flow_size, 2367 struct rte_flow_error *error) 2368 { 2369 size_t size = 0; 2370 int remain = flow_size; 2371 int ret = 0; 2372 2373 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2374 switch (actions->type) { 2375 case RTE_FLOW_ACTION_TYPE_VOID: 2376 break; 2377 case RTE_FLOW_ACTION_TYPE_FLAG: 2378 ret = mlx5_flow_action_flag(actions, flow, remain, 2379 error); 2380 break; 2381 case RTE_FLOW_ACTION_TYPE_MARK: 2382 ret = mlx5_flow_action_mark(actions, flow, remain, 2383 error); 2384 break; 2385 case RTE_FLOW_ACTION_TYPE_DROP: 2386 ret = mlx5_flow_action_drop(actions, flow, remain, 2387 error); 2388 break; 2389 case RTE_FLOW_ACTION_TYPE_QUEUE: 2390 ret = mlx5_flow_action_queue(dev, actions, flow, error); 2391 break; 2392 case RTE_FLOW_ACTION_TYPE_RSS: 2393 ret = mlx5_flow_action_rss(dev, actions, flow, error); 2394 break; 2395 case RTE_FLOW_ACTION_TYPE_COUNT: 2396 ret = mlx5_flow_action_count(dev, actions, flow, remain, 2397 error); 2398 break; 2399 default: 2400 return rte_flow_error_set(error, ENOTSUP, 2401 RTE_FLOW_ERROR_TYPE_ACTION, 2402 actions, 2403 "action not supported"); 2404 } 2405 if (ret < 0) 2406 return ret; 2407 if (remain > ret) 2408 remain -= ret; 2409 else 2410 remain = 0; 2411 size += ret; 2412 } 2413 if (!flow->fate) 2414 return rte_flow_error_set(error, ENOTSUP, 2415 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2416 NULL, 2417 "no fate action found"); 2418 return size; 2419 } 2420 2421 /** 2422 * Validate flow rule and fill flow structure accordingly. 2423 * 2424 * @param dev 2425 * Pointer to Ethernet device. 2426 * @param[out] flow 2427 * Pointer to flow structure. 2428 * @param flow_size 2429 * Size of allocated space for @p flow. 2430 * @param[in] attr 2431 * Flow rule attributes. 2432 * @param[in] pattern 2433 * Pattern specification (list terminated by the END pattern item). 2434 * @param[in] actions 2435 * Associated actions (list terminated by the END action). 2436 * @param[out] error 2437 * Perform verbose error reporting if not NULL. 2438 * 2439 * @return 2440 * A positive value representing the size of the flow object in bytes 2441 * regardless of @p flow_size on success, a negative errno value otherwise 2442 * and rte_errno is set. 2443 */ 2444 static int 2445 mlx5_flow_merge_switch(struct rte_eth_dev *dev, 2446 struct rte_flow *flow, 2447 size_t flow_size, 2448 const struct rte_flow_attr *attr, 2449 const struct rte_flow_item pattern[], 2450 const struct rte_flow_action actions[], 2451 struct rte_flow_error *error) 2452 { 2453 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0); 2454 uint16_t port_id[!n + n]; 2455 struct mlx5_nl_flow_ptoi ptoi[!n + n + 1]; 2456 size_t off = RTE_ALIGN_CEIL(sizeof(*flow), alignof(max_align_t)); 2457 unsigned int i; 2458 unsigned int own = 0; 2459 int ret; 2460 2461 /* At least one port is needed when no switch domain is present. */ 2462 if (!n) { 2463 n = 1; 2464 port_id[0] = dev->data->port_id; 2465 } else { 2466 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n); 2467 } 2468 for (i = 0; i != n; ++i) { 2469 struct rte_eth_dev_info dev_info; 2470 2471 rte_eth_dev_info_get(port_id[i], &dev_info); 2472 if (port_id[i] == dev->data->port_id) 2473 own = i; 2474 ptoi[i].port_id = port_id[i]; 2475 ptoi[i].ifindex = dev_info.if_index; 2476 } 2477 /* Ensure first entry of ptoi[] is the current device. */ 2478 if (own) { 2479 ptoi[n] = ptoi[0]; 2480 ptoi[0] = ptoi[own]; 2481 ptoi[own] = ptoi[n]; 2482 } 2483 /* An entry with zero ifindex terminates ptoi[]. */ 2484 ptoi[n].port_id = 0; 2485 ptoi[n].ifindex = 0; 2486 if (flow_size < off) 2487 flow_size = 0; 2488 ret = mlx5_nl_flow_transpose((uint8_t *)flow + off, 2489 flow_size ? flow_size - off : 0, 2490 ptoi, attr, pattern, actions, error); 2491 if (ret < 0) 2492 return ret; 2493 if (flow_size) { 2494 *flow = (struct rte_flow){ 2495 .attributes = *attr, 2496 .nl_flow = (uint8_t *)flow + off, 2497 }; 2498 /* 2499 * Generate a reasonably unique handle based on the address 2500 * of the target buffer. 2501 * 2502 * This is straightforward on 32-bit systems where the flow 2503 * pointer can be used directly. Otherwise, its least 2504 * significant part is taken after shifting it by the 2505 * previous power of two of the pointed buffer size. 2506 */ 2507 if (sizeof(flow) <= 4) 2508 mlx5_nl_flow_brand(flow->nl_flow, (uintptr_t)flow); 2509 else 2510 mlx5_nl_flow_brand 2511 (flow->nl_flow, 2512 (uintptr_t)flow >> 2513 rte_log2_u32(rte_align32prevpow2(flow_size))); 2514 } 2515 return off + ret; 2516 } 2517 2518 static unsigned int 2519 mlx5_find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level) 2520 { 2521 const struct rte_flow_item *item; 2522 unsigned int has_vlan = 0; 2523 2524 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 2525 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { 2526 has_vlan = 1; 2527 break; 2528 } 2529 } 2530 if (has_vlan) 2531 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN : 2532 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN; 2533 return rss_level < 2 ? MLX5_EXPANSION_ROOT : 2534 MLX5_EXPANSION_ROOT_OUTER; 2535 } 2536 2537 /** 2538 * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC 2539 * after ensuring the NIC will understand and process it correctly. 2540 * The conversion is only performed item/action per item/action, each of 2541 * them is written into the @p flow if its size is lesser or equal to @p 2542 * flow_size. 2543 * Validation and memory consumption computation are still performed until the 2544 * end, unless an error is encountered. 2545 * 2546 * @param[in] dev 2547 * Pointer to Ethernet device. 2548 * @param[in, out] flow 2549 * Pointer to flow structure. 2550 * @param[in] flow_size 2551 * Size in bytes of the available space in @p flow, if too small some 2552 * garbage may be present. 2553 * @param[in] attributes 2554 * Flow rule attributes. 2555 * @param[in] pattern 2556 * Pattern specification (list terminated by the END pattern item). 2557 * @param[in] actions 2558 * Associated actions (list terminated by the END action). 2559 * @param[out] error 2560 * Perform verbose error reporting if not NULL. 2561 * 2562 * @return 2563 * On success the number of bytes consumed/necessary, if the returned value 2564 * is lesser or equal to @p flow_size, the flow has fully been converted and 2565 * can be applied, otherwise another call with this returned memory size 2566 * should be done. 2567 * On error, a negative errno value is returned and rte_errno is set. 2568 */ 2569 static int 2570 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow, 2571 const size_t flow_size, 2572 const struct rte_flow_attr *attributes, 2573 const struct rte_flow_item pattern[], 2574 const struct rte_flow_action actions[], 2575 struct rte_flow_error *error) 2576 { 2577 struct rte_flow local_flow = { .layers = 0, }; 2578 size_t size = sizeof(*flow); 2579 union { 2580 struct rte_flow_expand_rss buf; 2581 uint8_t buffer[2048]; 2582 } expand_buffer; 2583 struct rte_flow_expand_rss *buf = &expand_buffer.buf; 2584 struct mlx5_flow_verbs *original_verbs = NULL; 2585 size_t original_verbs_size = 0; 2586 uint32_t original_layers = 0; 2587 int expanded_pattern_idx = 0; 2588 int ret; 2589 uint32_t i; 2590 2591 if (attributes->transfer) 2592 return mlx5_flow_merge_switch(dev, flow, flow_size, 2593 attributes, pattern, 2594 actions, error); 2595 if (size > flow_size) 2596 flow = &local_flow; 2597 ret = mlx5_flow_attributes(dev, attributes, flow, error); 2598 if (ret < 0) 2599 return ret; 2600 ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error); 2601 if (ret < 0) 2602 return ret; 2603 if (local_flow.rss.types) { 2604 unsigned int graph_root; 2605 2606 graph_root = mlx5_find_graph_root(pattern, 2607 local_flow.rss.level); 2608 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer), 2609 pattern, local_flow.rss.types, 2610 mlx5_support_expansion, 2611 graph_root); 2612 assert(ret > 0 && 2613 (unsigned int)ret < sizeof(expand_buffer.buffer)); 2614 } else { 2615 buf->entries = 1; 2616 buf->entry[0].pattern = (void *)(uintptr_t)pattern; 2617 } 2618 size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t), 2619 sizeof(void *)); 2620 if (size <= flow_size) 2621 flow->queue = (void *)(flow + 1); 2622 LIST_INIT(&flow->verbs); 2623 flow->layers = 0; 2624 flow->modifier = 0; 2625 flow->fate = 0; 2626 for (i = 0; i != buf->entries; ++i) { 2627 size_t off = size; 2628 size_t off2; 2629 2630 flow->layers = original_layers; 2631 size += sizeof(struct ibv_flow_attr) + 2632 sizeof(struct mlx5_flow_verbs); 2633 off2 = size; 2634 if (size < flow_size) { 2635 flow->cur_verbs = (void *)((uintptr_t)flow + off); 2636 flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1); 2637 flow->cur_verbs->specs = 2638 (void *)(flow->cur_verbs->attr + 1); 2639 } 2640 /* First iteration convert the pattern into Verbs. */ 2641 if (i == 0) { 2642 /* Actions don't need to be converted several time. */ 2643 ret = mlx5_flow_actions(dev, actions, flow, 2644 (size < flow_size) ? 2645 flow_size - size : 0, 2646 error); 2647 if (ret < 0) 2648 return ret; 2649 size += ret; 2650 } else { 2651 /* 2652 * Next iteration means the pattern has already been 2653 * converted and an expansion is necessary to match 2654 * the user RSS request. For that only the expanded 2655 * items will be converted, the common part with the 2656 * user pattern are just copied into the next buffer 2657 * zone. 2658 */ 2659 size += original_verbs_size; 2660 if (size < flow_size) { 2661 rte_memcpy(flow->cur_verbs->attr, 2662 original_verbs->attr, 2663 original_verbs_size + 2664 sizeof(struct ibv_flow_attr)); 2665 flow->cur_verbs->size = original_verbs_size; 2666 } 2667 } 2668 ret = mlx5_flow_items 2669 (dev, 2670 (const struct rte_flow_item *) 2671 &buf->entry[i].pattern[expanded_pattern_idx], 2672 flow, 2673 (size < flow_size) ? flow_size - size : 0, error); 2674 if (ret < 0) 2675 return ret; 2676 size += ret; 2677 if (size <= flow_size) { 2678 mlx5_flow_adjust_priority(dev, flow); 2679 LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next); 2680 } 2681 /* 2682 * Keep a pointer of the first verbs conversion and the layers 2683 * it has encountered. 2684 */ 2685 if (i == 0) { 2686 original_verbs = flow->cur_verbs; 2687 original_verbs_size = size - off2; 2688 original_layers = flow->layers; 2689 /* 2690 * move the index of the expanded pattern to the 2691 * first item not addressed yet. 2692 */ 2693 if (pattern->type == RTE_FLOW_ITEM_TYPE_END) { 2694 expanded_pattern_idx++; 2695 } else { 2696 const struct rte_flow_item *item = pattern; 2697 2698 for (item = pattern; 2699 item->type != RTE_FLOW_ITEM_TYPE_END; 2700 ++item) 2701 expanded_pattern_idx++; 2702 } 2703 } 2704 } 2705 /* Restore the origin layers in the flow. */ 2706 flow->layers = original_layers; 2707 return size; 2708 } 2709 2710 /** 2711 * Lookup and set the ptype in the data Rx part. A single Ptype can be used, 2712 * if several tunnel rules are used on this queue, the tunnel ptype will be 2713 * cleared. 2714 * 2715 * @param rxq_ctrl 2716 * Rx queue to update. 2717 */ 2718 static void 2719 mlx5_flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl) 2720 { 2721 unsigned int i; 2722 uint32_t tunnel_ptype = 0; 2723 2724 /* Look up for the ptype to use. */ 2725 for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) { 2726 if (!rxq_ctrl->flow_tunnels_n[i]) 2727 continue; 2728 if (!tunnel_ptype) { 2729 tunnel_ptype = tunnels_info[i].ptype; 2730 } else { 2731 tunnel_ptype = 0; 2732 break; 2733 } 2734 } 2735 rxq_ctrl->rxq.tunnel = tunnel_ptype; 2736 } 2737 2738 /** 2739 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the flow. 2740 * 2741 * @param[in] dev 2742 * Pointer to Ethernet device. 2743 * @param[in] flow 2744 * Pointer to flow structure. 2745 */ 2746 static void 2747 mlx5_flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow) 2748 { 2749 struct priv *priv = dev->data->dev_private; 2750 const int mark = !!(flow->modifier & 2751 (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)); 2752 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 2753 unsigned int i; 2754 2755 for (i = 0; i != flow->rss.queue_num; ++i) { 2756 int idx = (*flow->queue)[i]; 2757 struct mlx5_rxq_ctrl *rxq_ctrl = 2758 container_of((*priv->rxqs)[idx], 2759 struct mlx5_rxq_ctrl, rxq); 2760 2761 if (mark) { 2762 rxq_ctrl->rxq.mark = 1; 2763 rxq_ctrl->flow_mark_n++; 2764 } 2765 if (tunnel) { 2766 unsigned int j; 2767 2768 /* Increase the counter matching the flow. */ 2769 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 2770 if ((tunnels_info[j].tunnel & flow->layers) == 2771 tunnels_info[j].tunnel) { 2772 rxq_ctrl->flow_tunnels_n[j]++; 2773 break; 2774 } 2775 } 2776 mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl); 2777 } 2778 } 2779 } 2780 2781 /** 2782 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 2783 * @p flow if no other flow uses it with the same kind of request. 2784 * 2785 * @param dev 2786 * Pointer to Ethernet device. 2787 * @param[in] flow 2788 * Pointer to the flow. 2789 */ 2790 static void 2791 mlx5_flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow) 2792 { 2793 struct priv *priv = dev->data->dev_private; 2794 const int mark = !!(flow->modifier & 2795 (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)); 2796 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 2797 unsigned int i; 2798 2799 assert(dev->data->dev_started); 2800 for (i = 0; i != flow->rss.queue_num; ++i) { 2801 int idx = (*flow->queue)[i]; 2802 struct mlx5_rxq_ctrl *rxq_ctrl = 2803 container_of((*priv->rxqs)[idx], 2804 struct mlx5_rxq_ctrl, rxq); 2805 2806 if (mark) { 2807 rxq_ctrl->flow_mark_n--; 2808 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n; 2809 } 2810 if (tunnel) { 2811 unsigned int j; 2812 2813 /* Decrease the counter matching the flow. */ 2814 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 2815 if ((tunnels_info[j].tunnel & flow->layers) == 2816 tunnels_info[j].tunnel) { 2817 rxq_ctrl->flow_tunnels_n[j]--; 2818 break; 2819 } 2820 } 2821 mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl); 2822 } 2823 } 2824 } 2825 2826 /** 2827 * Clear the Mark/Flag and Tunnel ptype information in all Rx queues. 2828 * 2829 * @param dev 2830 * Pointer to Ethernet device. 2831 */ 2832 static void 2833 mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev) 2834 { 2835 struct priv *priv = dev->data->dev_private; 2836 unsigned int i; 2837 2838 for (i = 0; i != priv->rxqs_n; ++i) { 2839 struct mlx5_rxq_ctrl *rxq_ctrl; 2840 unsigned int j; 2841 2842 if (!(*priv->rxqs)[i]) 2843 continue; 2844 rxq_ctrl = container_of((*priv->rxqs)[i], 2845 struct mlx5_rxq_ctrl, rxq); 2846 rxq_ctrl->flow_mark_n = 0; 2847 rxq_ctrl->rxq.mark = 0; 2848 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) 2849 rxq_ctrl->flow_tunnels_n[j] = 0; 2850 rxq_ctrl->rxq.tunnel = 0; 2851 } 2852 } 2853 2854 /** 2855 * Validate a flow supported by the NIC. 2856 * 2857 * @see rte_flow_validate() 2858 * @see rte_flow_ops 2859 */ 2860 int 2861 mlx5_flow_validate(struct rte_eth_dev *dev, 2862 const struct rte_flow_attr *attr, 2863 const struct rte_flow_item items[], 2864 const struct rte_flow_action actions[], 2865 struct rte_flow_error *error) 2866 { 2867 int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error); 2868 2869 if (ret < 0) 2870 return ret; 2871 return 0; 2872 } 2873 2874 /** 2875 * Remove the flow. 2876 * 2877 * @param[in] dev 2878 * Pointer to Ethernet device. 2879 * @param[in, out] flow 2880 * Pointer to flow structure. 2881 */ 2882 static void 2883 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow) 2884 { 2885 struct priv *priv = dev->data->dev_private; 2886 struct mlx5_flow_verbs *verbs; 2887 2888 if (flow->nl_flow && priv->mnl_socket) 2889 mlx5_nl_flow_destroy(priv->mnl_socket, flow->nl_flow, NULL); 2890 LIST_FOREACH(verbs, &flow->verbs, next) { 2891 if (verbs->flow) { 2892 claim_zero(mlx5_glue->destroy_flow(verbs->flow)); 2893 verbs->flow = NULL; 2894 } 2895 if (verbs->hrxq) { 2896 if (flow->fate & MLX5_FLOW_FATE_DROP) 2897 mlx5_hrxq_drop_release(dev); 2898 else 2899 mlx5_hrxq_release(dev, verbs->hrxq); 2900 verbs->hrxq = NULL; 2901 } 2902 } 2903 if (flow->counter) { 2904 mlx5_flow_counter_release(flow->counter); 2905 flow->counter = NULL; 2906 } 2907 } 2908 2909 /** 2910 * Apply the flow. 2911 * 2912 * @param[in] dev 2913 * Pointer to Ethernet device structure. 2914 * @param[in, out] flow 2915 * Pointer to flow structure. 2916 * @param[out] error 2917 * Pointer to error structure. 2918 * 2919 * @return 2920 * 0 on success, a negative errno value otherwise and rte_errno is set. 2921 */ 2922 static int 2923 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow, 2924 struct rte_flow_error *error) 2925 { 2926 struct priv *priv = dev->data->dev_private; 2927 struct mlx5_flow_verbs *verbs; 2928 int err; 2929 2930 LIST_FOREACH(verbs, &flow->verbs, next) { 2931 if (flow->fate & MLX5_FLOW_FATE_DROP) { 2932 verbs->hrxq = mlx5_hrxq_drop_new(dev); 2933 if (!verbs->hrxq) { 2934 rte_flow_error_set 2935 (error, errno, 2936 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2937 NULL, 2938 "cannot get drop hash queue"); 2939 goto error; 2940 } 2941 } else { 2942 struct mlx5_hrxq *hrxq; 2943 2944 hrxq = mlx5_hrxq_get(dev, flow->key, 2945 MLX5_RSS_HASH_KEY_LEN, 2946 verbs->hash_fields, 2947 (*flow->queue), 2948 flow->rss.queue_num); 2949 if (!hrxq) 2950 hrxq = mlx5_hrxq_new(dev, flow->key, 2951 MLX5_RSS_HASH_KEY_LEN, 2952 verbs->hash_fields, 2953 (*flow->queue), 2954 flow->rss.queue_num, 2955 !!(flow->layers & 2956 MLX5_FLOW_LAYER_TUNNEL)); 2957 if (!hrxq) { 2958 rte_flow_error_set 2959 (error, rte_errno, 2960 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2961 NULL, 2962 "cannot get hash queue"); 2963 goto error; 2964 } 2965 verbs->hrxq = hrxq; 2966 } 2967 verbs->flow = 2968 mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr); 2969 if (!verbs->flow) { 2970 rte_flow_error_set(error, errno, 2971 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2972 NULL, 2973 "hardware refuses to create flow"); 2974 goto error; 2975 } 2976 } 2977 if (flow->nl_flow && 2978 priv->mnl_socket && 2979 mlx5_nl_flow_create(priv->mnl_socket, flow->nl_flow, error)) 2980 goto error; 2981 return 0; 2982 error: 2983 err = rte_errno; /* Save rte_errno before cleanup. */ 2984 LIST_FOREACH(verbs, &flow->verbs, next) { 2985 if (verbs->hrxq) { 2986 if (flow->fate & MLX5_FLOW_FATE_DROP) 2987 mlx5_hrxq_drop_release(dev); 2988 else 2989 mlx5_hrxq_release(dev, verbs->hrxq); 2990 verbs->hrxq = NULL; 2991 } 2992 } 2993 rte_errno = err; /* Restore rte_errno. */ 2994 return -rte_errno; 2995 } 2996 2997 /** 2998 * Create a flow and add it to @p list. 2999 * 3000 * @param dev 3001 * Pointer to Ethernet device. 3002 * @param list 3003 * Pointer to a TAILQ flow list. 3004 * @param[in] attr 3005 * Flow rule attributes. 3006 * @param[in] items 3007 * Pattern specification (list terminated by the END pattern item). 3008 * @param[in] actions 3009 * Associated actions (list terminated by the END action). 3010 * @param[out] error 3011 * Perform verbose error reporting if not NULL. 3012 * 3013 * @return 3014 * A flow on success, NULL otherwise and rte_errno is set. 3015 */ 3016 static struct rte_flow * 3017 mlx5_flow_list_create(struct rte_eth_dev *dev, 3018 struct mlx5_flows *list, 3019 const struct rte_flow_attr *attr, 3020 const struct rte_flow_item items[], 3021 const struct rte_flow_action actions[], 3022 struct rte_flow_error *error) 3023 { 3024 struct rte_flow *flow = NULL; 3025 size_t size = 0; 3026 int ret; 3027 3028 ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error); 3029 if (ret < 0) 3030 return NULL; 3031 size = ret; 3032 flow = rte_calloc(__func__, 1, size, 0); 3033 if (!flow) { 3034 rte_flow_error_set(error, ENOMEM, 3035 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3036 NULL, 3037 "not enough memory to create flow"); 3038 return NULL; 3039 } 3040 ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error); 3041 if (ret < 0) { 3042 rte_free(flow); 3043 return NULL; 3044 } 3045 assert((size_t)ret == size); 3046 if (dev->data->dev_started) { 3047 ret = mlx5_flow_apply(dev, flow, error); 3048 if (ret < 0) { 3049 ret = rte_errno; /* Save rte_errno before cleanup. */ 3050 if (flow) { 3051 mlx5_flow_remove(dev, flow); 3052 rte_free(flow); 3053 } 3054 rte_errno = ret; /* Restore rte_errno. */ 3055 return NULL; 3056 } 3057 } 3058 TAILQ_INSERT_TAIL(list, flow, next); 3059 mlx5_flow_rxq_flags_set(dev, flow); 3060 return flow; 3061 } 3062 3063 /** 3064 * Create a flow. 3065 * 3066 * @see rte_flow_create() 3067 * @see rte_flow_ops 3068 */ 3069 struct rte_flow * 3070 mlx5_flow_create(struct rte_eth_dev *dev, 3071 const struct rte_flow_attr *attr, 3072 const struct rte_flow_item items[], 3073 const struct rte_flow_action actions[], 3074 struct rte_flow_error *error) 3075 { 3076 return mlx5_flow_list_create 3077 (dev, &((struct priv *)dev->data->dev_private)->flows, 3078 attr, items, actions, error); 3079 } 3080 3081 /** 3082 * Destroy a flow in a list. 3083 * 3084 * @param dev 3085 * Pointer to Ethernet device. 3086 * @param list 3087 * Pointer to a TAILQ flow list. 3088 * @param[in] flow 3089 * Flow to destroy. 3090 */ 3091 static void 3092 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list, 3093 struct rte_flow *flow) 3094 { 3095 mlx5_flow_remove(dev, flow); 3096 TAILQ_REMOVE(list, flow, next); 3097 /* 3098 * Update RX queue flags only if port is started, otherwise it is 3099 * already clean. 3100 */ 3101 if (dev->data->dev_started) 3102 mlx5_flow_rxq_flags_trim(dev, flow); 3103 rte_free(flow); 3104 } 3105 3106 /** 3107 * Destroy all flows. 3108 * 3109 * @param dev 3110 * Pointer to Ethernet device. 3111 * @param list 3112 * Pointer to a TAILQ flow list. 3113 */ 3114 void 3115 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list) 3116 { 3117 while (!TAILQ_EMPTY(list)) { 3118 struct rte_flow *flow; 3119 3120 flow = TAILQ_FIRST(list); 3121 mlx5_flow_list_destroy(dev, list, flow); 3122 } 3123 } 3124 3125 /** 3126 * Remove all flows. 3127 * 3128 * @param dev 3129 * Pointer to Ethernet device. 3130 * @param list 3131 * Pointer to a TAILQ flow list. 3132 */ 3133 void 3134 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list) 3135 { 3136 struct rte_flow *flow; 3137 3138 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) 3139 mlx5_flow_remove(dev, flow); 3140 mlx5_flow_rxq_flags_clear(dev); 3141 } 3142 3143 /** 3144 * Add all flows. 3145 * 3146 * @param dev 3147 * Pointer to Ethernet device. 3148 * @param list 3149 * Pointer to a TAILQ flow list. 3150 * 3151 * @return 3152 * 0 on success, a negative errno value otherwise and rte_errno is set. 3153 */ 3154 int 3155 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list) 3156 { 3157 struct rte_flow *flow; 3158 struct rte_flow_error error; 3159 int ret = 0; 3160 3161 TAILQ_FOREACH(flow, list, next) { 3162 ret = mlx5_flow_apply(dev, flow, &error); 3163 if (ret < 0) 3164 goto error; 3165 mlx5_flow_rxq_flags_set(dev, flow); 3166 } 3167 return 0; 3168 error: 3169 ret = rte_errno; /* Save rte_errno before cleanup. */ 3170 mlx5_flow_stop(dev, list); 3171 rte_errno = ret; /* Restore rte_errno. */ 3172 return -rte_errno; 3173 } 3174 3175 /** 3176 * Verify the flow list is empty 3177 * 3178 * @param dev 3179 * Pointer to Ethernet device. 3180 * 3181 * @return the number of flows not released. 3182 */ 3183 int 3184 mlx5_flow_verify(struct rte_eth_dev *dev) 3185 { 3186 struct priv *priv = dev->data->dev_private; 3187 struct rte_flow *flow; 3188 int ret = 0; 3189 3190 TAILQ_FOREACH(flow, &priv->flows, next) { 3191 DRV_LOG(DEBUG, "port %u flow %p still referenced", 3192 dev->data->port_id, (void *)flow); 3193 ++ret; 3194 } 3195 return ret; 3196 } 3197 3198 /** 3199 * Enable a control flow configured from the control plane. 3200 * 3201 * @param dev 3202 * Pointer to Ethernet device. 3203 * @param eth_spec 3204 * An Ethernet flow spec to apply. 3205 * @param eth_mask 3206 * An Ethernet flow mask to apply. 3207 * @param vlan_spec 3208 * A VLAN flow spec to apply. 3209 * @param vlan_mask 3210 * A VLAN flow mask to apply. 3211 * 3212 * @return 3213 * 0 on success, a negative errno value otherwise and rte_errno is set. 3214 */ 3215 int 3216 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, 3217 struct rte_flow_item_eth *eth_spec, 3218 struct rte_flow_item_eth *eth_mask, 3219 struct rte_flow_item_vlan *vlan_spec, 3220 struct rte_flow_item_vlan *vlan_mask) 3221 { 3222 struct priv *priv = dev->data->dev_private; 3223 const struct rte_flow_attr attr = { 3224 .ingress = 1, 3225 .priority = MLX5_FLOW_PRIO_RSVD, 3226 }; 3227 struct rte_flow_item items[] = { 3228 { 3229 .type = RTE_FLOW_ITEM_TYPE_ETH, 3230 .spec = eth_spec, 3231 .last = NULL, 3232 .mask = eth_mask, 3233 }, 3234 { 3235 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN : 3236 RTE_FLOW_ITEM_TYPE_END, 3237 .spec = vlan_spec, 3238 .last = NULL, 3239 .mask = vlan_mask, 3240 }, 3241 { 3242 .type = RTE_FLOW_ITEM_TYPE_END, 3243 }, 3244 }; 3245 uint16_t queue[priv->reta_idx_n]; 3246 struct rte_flow_action_rss action_rss = { 3247 .func = RTE_ETH_HASH_FUNCTION_DEFAULT, 3248 .level = 0, 3249 .types = priv->rss_conf.rss_hf, 3250 .key_len = priv->rss_conf.rss_key_len, 3251 .queue_num = priv->reta_idx_n, 3252 .key = priv->rss_conf.rss_key, 3253 .queue = queue, 3254 }; 3255 struct rte_flow_action actions[] = { 3256 { 3257 .type = RTE_FLOW_ACTION_TYPE_RSS, 3258 .conf = &action_rss, 3259 }, 3260 { 3261 .type = RTE_FLOW_ACTION_TYPE_END, 3262 }, 3263 }; 3264 struct rte_flow *flow; 3265 struct rte_flow_error error; 3266 unsigned int i; 3267 3268 if (!priv->reta_idx_n) { 3269 rte_errno = EINVAL; 3270 return -rte_errno; 3271 } 3272 for (i = 0; i != priv->reta_idx_n; ++i) 3273 queue[i] = (*priv->reta_idx)[i]; 3274 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items, 3275 actions, &error); 3276 if (!flow) 3277 return -rte_errno; 3278 return 0; 3279 } 3280 3281 /** 3282 * Enable a flow control configured from the control plane. 3283 * 3284 * @param dev 3285 * Pointer to Ethernet device. 3286 * @param eth_spec 3287 * An Ethernet flow spec to apply. 3288 * @param eth_mask 3289 * An Ethernet flow mask to apply. 3290 * 3291 * @return 3292 * 0 on success, a negative errno value otherwise and rte_errno is set. 3293 */ 3294 int 3295 mlx5_ctrl_flow(struct rte_eth_dev *dev, 3296 struct rte_flow_item_eth *eth_spec, 3297 struct rte_flow_item_eth *eth_mask) 3298 { 3299 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL); 3300 } 3301 3302 /** 3303 * Destroy a flow. 3304 * 3305 * @see rte_flow_destroy() 3306 * @see rte_flow_ops 3307 */ 3308 int 3309 mlx5_flow_destroy(struct rte_eth_dev *dev, 3310 struct rte_flow *flow, 3311 struct rte_flow_error *error __rte_unused) 3312 { 3313 struct priv *priv = dev->data->dev_private; 3314 3315 mlx5_flow_list_destroy(dev, &priv->flows, flow); 3316 return 0; 3317 } 3318 3319 /** 3320 * Destroy all flows. 3321 * 3322 * @see rte_flow_flush() 3323 * @see rte_flow_ops 3324 */ 3325 int 3326 mlx5_flow_flush(struct rte_eth_dev *dev, 3327 struct rte_flow_error *error __rte_unused) 3328 { 3329 struct priv *priv = dev->data->dev_private; 3330 3331 mlx5_flow_list_flush(dev, &priv->flows); 3332 return 0; 3333 } 3334 3335 /** 3336 * Isolated mode. 3337 * 3338 * @see rte_flow_isolate() 3339 * @see rte_flow_ops 3340 */ 3341 int 3342 mlx5_flow_isolate(struct rte_eth_dev *dev, 3343 int enable, 3344 struct rte_flow_error *error) 3345 { 3346 struct priv *priv = dev->data->dev_private; 3347 3348 if (dev->data->dev_started) { 3349 rte_flow_error_set(error, EBUSY, 3350 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3351 NULL, 3352 "port must be stopped first"); 3353 return -rte_errno; 3354 } 3355 priv->isolated = !!enable; 3356 if (enable) 3357 dev->dev_ops = &mlx5_dev_ops_isolate; 3358 else 3359 dev->dev_ops = &mlx5_dev_ops; 3360 return 0; 3361 } 3362 3363 /** 3364 * Query flow counter. 3365 * 3366 * @param flow 3367 * Pointer to the flow. 3368 * 3369 * @return 3370 * 0 on success, a negative errno value otherwise and rte_errno is set. 3371 */ 3372 static int 3373 mlx5_flow_query_count(struct rte_flow *flow __rte_unused, 3374 void *data __rte_unused, 3375 struct rte_flow_error *error) 3376 { 3377 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 3378 if (flow->modifier & MLX5_FLOW_MOD_COUNT) { 3379 struct rte_flow_query_count *qc = data; 3380 uint64_t counters[2] = {0, 0}; 3381 struct ibv_query_counter_set_attr query_cs_attr = { 3382 .cs = flow->counter->cs, 3383 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE, 3384 }; 3385 struct ibv_counter_set_data query_out = { 3386 .out = counters, 3387 .outlen = 2 * sizeof(uint64_t), 3388 }; 3389 int err = mlx5_glue->query_counter_set(&query_cs_attr, 3390 &query_out); 3391 3392 if (err) 3393 return rte_flow_error_set 3394 (error, err, 3395 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3396 NULL, 3397 "cannot read counter"); 3398 qc->hits_set = 1; 3399 qc->bytes_set = 1; 3400 qc->hits = counters[0] - flow->counter->hits; 3401 qc->bytes = counters[1] - flow->counter->bytes; 3402 if (qc->reset) { 3403 flow->counter->hits = counters[0]; 3404 flow->counter->bytes = counters[1]; 3405 } 3406 return 0; 3407 } 3408 return rte_flow_error_set(error, ENOTSUP, 3409 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3410 NULL, 3411 "flow does not have counter"); 3412 #endif 3413 return rte_flow_error_set(error, ENOTSUP, 3414 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3415 NULL, 3416 "counters are not available"); 3417 } 3418 3419 /** 3420 * Query a flows. 3421 * 3422 * @see rte_flow_query() 3423 * @see rte_flow_ops 3424 */ 3425 int 3426 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused, 3427 struct rte_flow *flow, 3428 const struct rte_flow_action *actions, 3429 void *data, 3430 struct rte_flow_error *error) 3431 { 3432 int ret = 0; 3433 3434 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3435 switch (actions->type) { 3436 case RTE_FLOW_ACTION_TYPE_VOID: 3437 break; 3438 case RTE_FLOW_ACTION_TYPE_COUNT: 3439 ret = mlx5_flow_query_count(flow, data, error); 3440 break; 3441 default: 3442 return rte_flow_error_set(error, ENOTSUP, 3443 RTE_FLOW_ERROR_TYPE_ACTION, 3444 actions, 3445 "action not supported"); 3446 } 3447 if (ret < 0) 3448 return ret; 3449 } 3450 return 0; 3451 } 3452 3453 /** 3454 * Convert a flow director filter to a generic flow. 3455 * 3456 * @param dev 3457 * Pointer to Ethernet device. 3458 * @param fdir_filter 3459 * Flow director filter to add. 3460 * @param attributes 3461 * Generic flow parameters structure. 3462 * 3463 * @return 3464 * 0 on success, a negative errno value otherwise and rte_errno is set. 3465 */ 3466 static int 3467 mlx5_fdir_filter_convert(struct rte_eth_dev *dev, 3468 const struct rte_eth_fdir_filter *fdir_filter, 3469 struct mlx5_fdir *attributes) 3470 { 3471 struct priv *priv = dev->data->dev_private; 3472 const struct rte_eth_fdir_input *input = &fdir_filter->input; 3473 const struct rte_eth_fdir_masks *mask = 3474 &dev->data->dev_conf.fdir_conf.mask; 3475 3476 /* Validate queue number. */ 3477 if (fdir_filter->action.rx_queue >= priv->rxqs_n) { 3478 DRV_LOG(ERR, "port %u invalid queue number %d", 3479 dev->data->port_id, fdir_filter->action.rx_queue); 3480 rte_errno = EINVAL; 3481 return -rte_errno; 3482 } 3483 attributes->attr.ingress = 1; 3484 attributes->items[0] = (struct rte_flow_item) { 3485 .type = RTE_FLOW_ITEM_TYPE_ETH, 3486 .spec = &attributes->l2, 3487 .mask = &attributes->l2_mask, 3488 }; 3489 switch (fdir_filter->action.behavior) { 3490 case RTE_ETH_FDIR_ACCEPT: 3491 attributes->actions[0] = (struct rte_flow_action){ 3492 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 3493 .conf = &attributes->queue, 3494 }; 3495 break; 3496 case RTE_ETH_FDIR_REJECT: 3497 attributes->actions[0] = (struct rte_flow_action){ 3498 .type = RTE_FLOW_ACTION_TYPE_DROP, 3499 }; 3500 break; 3501 default: 3502 DRV_LOG(ERR, "port %u invalid behavior %d", 3503 dev->data->port_id, 3504 fdir_filter->action.behavior); 3505 rte_errno = ENOTSUP; 3506 return -rte_errno; 3507 } 3508 attributes->queue.index = fdir_filter->action.rx_queue; 3509 /* Handle L3. */ 3510 switch (fdir_filter->input.flow_type) { 3511 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 3512 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 3513 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 3514 attributes->l3.ipv4.hdr = (struct ipv4_hdr){ 3515 .src_addr = input->flow.ip4_flow.src_ip, 3516 .dst_addr = input->flow.ip4_flow.dst_ip, 3517 .time_to_live = input->flow.ip4_flow.ttl, 3518 .type_of_service = input->flow.ip4_flow.tos, 3519 .next_proto_id = input->flow.ip4_flow.proto, 3520 }; 3521 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){ 3522 .src_addr = mask->ipv4_mask.src_ip, 3523 .dst_addr = mask->ipv4_mask.dst_ip, 3524 .time_to_live = mask->ipv4_mask.ttl, 3525 .type_of_service = mask->ipv4_mask.tos, 3526 .next_proto_id = mask->ipv4_mask.proto, 3527 }; 3528 attributes->items[1] = (struct rte_flow_item){ 3529 .type = RTE_FLOW_ITEM_TYPE_IPV4, 3530 .spec = &attributes->l3, 3531 .mask = &attributes->l3_mask, 3532 }; 3533 break; 3534 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 3535 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 3536 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 3537 attributes->l3.ipv6.hdr = (struct ipv6_hdr){ 3538 .hop_limits = input->flow.ipv6_flow.hop_limits, 3539 .proto = input->flow.ipv6_flow.proto, 3540 }; 3541 3542 memcpy(attributes->l3.ipv6.hdr.src_addr, 3543 input->flow.ipv6_flow.src_ip, 3544 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 3545 memcpy(attributes->l3.ipv6.hdr.dst_addr, 3546 input->flow.ipv6_flow.dst_ip, 3547 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 3548 memcpy(attributes->l3_mask.ipv6.hdr.src_addr, 3549 mask->ipv6_mask.src_ip, 3550 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 3551 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr, 3552 mask->ipv6_mask.dst_ip, 3553 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 3554 attributes->items[1] = (struct rte_flow_item){ 3555 .type = RTE_FLOW_ITEM_TYPE_IPV6, 3556 .spec = &attributes->l3, 3557 .mask = &attributes->l3_mask, 3558 }; 3559 break; 3560 default: 3561 DRV_LOG(ERR, "port %u invalid flow type%d", 3562 dev->data->port_id, fdir_filter->input.flow_type); 3563 rte_errno = ENOTSUP; 3564 return -rte_errno; 3565 } 3566 /* Handle L4. */ 3567 switch (fdir_filter->input.flow_type) { 3568 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 3569 attributes->l4.udp.hdr = (struct udp_hdr){ 3570 .src_port = input->flow.udp4_flow.src_port, 3571 .dst_port = input->flow.udp4_flow.dst_port, 3572 }; 3573 attributes->l4_mask.udp.hdr = (struct udp_hdr){ 3574 .src_port = mask->src_port_mask, 3575 .dst_port = mask->dst_port_mask, 3576 }; 3577 attributes->items[2] = (struct rte_flow_item){ 3578 .type = RTE_FLOW_ITEM_TYPE_UDP, 3579 .spec = &attributes->l4, 3580 .mask = &attributes->l4_mask, 3581 }; 3582 break; 3583 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 3584 attributes->l4.tcp.hdr = (struct tcp_hdr){ 3585 .src_port = input->flow.tcp4_flow.src_port, 3586 .dst_port = input->flow.tcp4_flow.dst_port, 3587 }; 3588 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){ 3589 .src_port = mask->src_port_mask, 3590 .dst_port = mask->dst_port_mask, 3591 }; 3592 attributes->items[2] = (struct rte_flow_item){ 3593 .type = RTE_FLOW_ITEM_TYPE_TCP, 3594 .spec = &attributes->l4, 3595 .mask = &attributes->l4_mask, 3596 }; 3597 break; 3598 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 3599 attributes->l4.udp.hdr = (struct udp_hdr){ 3600 .src_port = input->flow.udp6_flow.src_port, 3601 .dst_port = input->flow.udp6_flow.dst_port, 3602 }; 3603 attributes->l4_mask.udp.hdr = (struct udp_hdr){ 3604 .src_port = mask->src_port_mask, 3605 .dst_port = mask->dst_port_mask, 3606 }; 3607 attributes->items[2] = (struct rte_flow_item){ 3608 .type = RTE_FLOW_ITEM_TYPE_UDP, 3609 .spec = &attributes->l4, 3610 .mask = &attributes->l4_mask, 3611 }; 3612 break; 3613 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 3614 attributes->l4.tcp.hdr = (struct tcp_hdr){ 3615 .src_port = input->flow.tcp6_flow.src_port, 3616 .dst_port = input->flow.tcp6_flow.dst_port, 3617 }; 3618 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){ 3619 .src_port = mask->src_port_mask, 3620 .dst_port = mask->dst_port_mask, 3621 }; 3622 attributes->items[2] = (struct rte_flow_item){ 3623 .type = RTE_FLOW_ITEM_TYPE_TCP, 3624 .spec = &attributes->l4, 3625 .mask = &attributes->l4_mask, 3626 }; 3627 break; 3628 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 3629 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 3630 break; 3631 default: 3632 DRV_LOG(ERR, "port %u invalid flow type%d", 3633 dev->data->port_id, fdir_filter->input.flow_type); 3634 rte_errno = ENOTSUP; 3635 return -rte_errno; 3636 } 3637 return 0; 3638 } 3639 3640 /** 3641 * Add new flow director filter and store it in list. 3642 * 3643 * @param dev 3644 * Pointer to Ethernet device. 3645 * @param fdir_filter 3646 * Flow director filter to add. 3647 * 3648 * @return 3649 * 0 on success, a negative errno value otherwise and rte_errno is set. 3650 */ 3651 static int 3652 mlx5_fdir_filter_add(struct rte_eth_dev *dev, 3653 const struct rte_eth_fdir_filter *fdir_filter) 3654 { 3655 struct priv *priv = dev->data->dev_private; 3656 struct mlx5_fdir attributes = { 3657 .attr.group = 0, 3658 .l2_mask = { 3659 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00", 3660 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 3661 .type = 0, 3662 }, 3663 }; 3664 struct rte_flow_error error; 3665 struct rte_flow *flow; 3666 int ret; 3667 3668 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes); 3669 if (ret) 3670 return ret; 3671 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr, 3672 attributes.items, attributes.actions, 3673 &error); 3674 if (flow) { 3675 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id, 3676 (void *)flow); 3677 return 0; 3678 } 3679 return -rte_errno; 3680 } 3681 3682 /** 3683 * Delete specific filter. 3684 * 3685 * @param dev 3686 * Pointer to Ethernet device. 3687 * @param fdir_filter 3688 * Filter to be deleted. 3689 * 3690 * @return 3691 * 0 on success, a negative errno value otherwise and rte_errno is set. 3692 */ 3693 static int 3694 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused, 3695 const struct rte_eth_fdir_filter *fdir_filter 3696 __rte_unused) 3697 { 3698 rte_errno = ENOTSUP; 3699 return -rte_errno; 3700 } 3701 3702 /** 3703 * Update queue for specific filter. 3704 * 3705 * @param dev 3706 * Pointer to Ethernet device. 3707 * @param fdir_filter 3708 * Filter to be updated. 3709 * 3710 * @return 3711 * 0 on success, a negative errno value otherwise and rte_errno is set. 3712 */ 3713 static int 3714 mlx5_fdir_filter_update(struct rte_eth_dev *dev, 3715 const struct rte_eth_fdir_filter *fdir_filter) 3716 { 3717 int ret; 3718 3719 ret = mlx5_fdir_filter_delete(dev, fdir_filter); 3720 if (ret) 3721 return ret; 3722 return mlx5_fdir_filter_add(dev, fdir_filter); 3723 } 3724 3725 /** 3726 * Flush all filters. 3727 * 3728 * @param dev 3729 * Pointer to Ethernet device. 3730 */ 3731 static void 3732 mlx5_fdir_filter_flush(struct rte_eth_dev *dev) 3733 { 3734 struct priv *priv = dev->data->dev_private; 3735 3736 mlx5_flow_list_flush(dev, &priv->flows); 3737 } 3738 3739 /** 3740 * Get flow director information. 3741 * 3742 * @param dev 3743 * Pointer to Ethernet device. 3744 * @param[out] fdir_info 3745 * Resulting flow director information. 3746 */ 3747 static void 3748 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) 3749 { 3750 struct rte_eth_fdir_masks *mask = 3751 &dev->data->dev_conf.fdir_conf.mask; 3752 3753 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode; 3754 fdir_info->guarant_spc = 0; 3755 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask)); 3756 fdir_info->max_flexpayload = 0; 3757 fdir_info->flow_types_mask[0] = 0; 3758 fdir_info->flex_payload_unit = 0; 3759 fdir_info->max_flex_payload_segment_num = 0; 3760 fdir_info->flex_payload_limit = 0; 3761 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf)); 3762 } 3763 3764 /** 3765 * Deal with flow director operations. 3766 * 3767 * @param dev 3768 * Pointer to Ethernet device. 3769 * @param filter_op 3770 * Operation to perform. 3771 * @param arg 3772 * Pointer to operation-specific structure. 3773 * 3774 * @return 3775 * 0 on success, a negative errno value otherwise and rte_errno is set. 3776 */ 3777 static int 3778 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, 3779 void *arg) 3780 { 3781 enum rte_fdir_mode fdir_mode = 3782 dev->data->dev_conf.fdir_conf.mode; 3783 3784 if (filter_op == RTE_ETH_FILTER_NOP) 3785 return 0; 3786 if (fdir_mode != RTE_FDIR_MODE_PERFECT && 3787 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) { 3788 DRV_LOG(ERR, "port %u flow director mode %d not supported", 3789 dev->data->port_id, fdir_mode); 3790 rte_errno = EINVAL; 3791 return -rte_errno; 3792 } 3793 switch (filter_op) { 3794 case RTE_ETH_FILTER_ADD: 3795 return mlx5_fdir_filter_add(dev, arg); 3796 case RTE_ETH_FILTER_UPDATE: 3797 return mlx5_fdir_filter_update(dev, arg); 3798 case RTE_ETH_FILTER_DELETE: 3799 return mlx5_fdir_filter_delete(dev, arg); 3800 case RTE_ETH_FILTER_FLUSH: 3801 mlx5_fdir_filter_flush(dev); 3802 break; 3803 case RTE_ETH_FILTER_INFO: 3804 mlx5_fdir_info_get(dev, arg); 3805 break; 3806 default: 3807 DRV_LOG(DEBUG, "port %u unknown operation %u", 3808 dev->data->port_id, filter_op); 3809 rte_errno = EINVAL; 3810 return -rte_errno; 3811 } 3812 return 0; 3813 } 3814 3815 /** 3816 * Manage filter operations. 3817 * 3818 * @param dev 3819 * Pointer to Ethernet device structure. 3820 * @param filter_type 3821 * Filter type. 3822 * @param filter_op 3823 * Operation to perform. 3824 * @param arg 3825 * Pointer to operation-specific structure. 3826 * 3827 * @return 3828 * 0 on success, a negative errno value otherwise and rte_errno is set. 3829 */ 3830 int 3831 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, 3832 enum rte_filter_type filter_type, 3833 enum rte_filter_op filter_op, 3834 void *arg) 3835 { 3836 switch (filter_type) { 3837 case RTE_ETH_FILTER_GENERIC: 3838 if (filter_op != RTE_ETH_FILTER_GET) { 3839 rte_errno = EINVAL; 3840 return -rte_errno; 3841 } 3842 *(const void **)arg = &mlx5_flow_ops; 3843 return 0; 3844 case RTE_ETH_FILTER_FDIR: 3845 return mlx5_fdir_ctrl_func(dev, filter_op, arg); 3846 default: 3847 DRV_LOG(ERR, "port %u filter type (%d) not supported", 3848 dev->data->port_id, filter_type); 3849 rte_errno = ENOTSUP; 3850 return -rte_errno; 3851 } 3852 return 0; 3853 } 3854