1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2016 Mellanox Technologies, Ltd 4 */ 5 6 #include <sys/queue.h> 7 #include <stdalign.h> 8 #include <stdint.h> 9 #include <string.h> 10 11 /* Verbs header. */ 12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 13 #ifdef PEDANTIC 14 #pragma GCC diagnostic ignored "-Wpedantic" 15 #endif 16 #include <infiniband/verbs.h> 17 #ifdef PEDANTIC 18 #pragma GCC diagnostic error "-Wpedantic" 19 #endif 20 21 #include <rte_common.h> 22 #include <rte_ether.h> 23 #include <rte_eth_ctrl.h> 24 #include <rte_ethdev_driver.h> 25 #include <rte_flow.h> 26 #include <rte_flow_driver.h> 27 #include <rte_malloc.h> 28 #include <rte_ip.h> 29 30 #include "mlx5.h" 31 #include "mlx5_defs.h" 32 #include "mlx5_prm.h" 33 #include "mlx5_glue.h" 34 35 /* Dev ops structure defined in mlx5.c */ 36 extern const struct eth_dev_ops mlx5_dev_ops; 37 extern const struct eth_dev_ops mlx5_dev_ops_isolate; 38 39 /* Pattern outer Layer bits. */ 40 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0) 41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1) 42 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2) 43 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3) 44 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4) 45 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5) 46 47 /* Pattern inner Layer bits. */ 48 #define MLX5_FLOW_LAYER_INNER_L2 (1u << 6) 49 #define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7) 50 #define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8) 51 #define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9) 52 #define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10) 53 #define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11) 54 55 /* Pattern tunnel Layer bits. */ 56 #define MLX5_FLOW_LAYER_VXLAN (1u << 12) 57 #define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13) 58 #define MLX5_FLOW_LAYER_GRE (1u << 14) 59 #define MLX5_FLOW_LAYER_MPLS (1u << 15) 60 61 /* Outer Masks. */ 62 #define MLX5_FLOW_LAYER_OUTER_L3 \ 63 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6) 64 #define MLX5_FLOW_LAYER_OUTER_L4 \ 65 (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP) 66 #define MLX5_FLOW_LAYER_OUTER \ 67 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \ 68 MLX5_FLOW_LAYER_OUTER_L4) 69 70 /* Tunnel Masks. */ 71 #define MLX5_FLOW_LAYER_TUNNEL \ 72 (MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE | \ 73 MLX5_FLOW_LAYER_GRE | MLX5_FLOW_LAYER_MPLS) 74 75 /* Inner Masks. */ 76 #define MLX5_FLOW_LAYER_INNER_L3 \ 77 (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6) 78 #define MLX5_FLOW_LAYER_INNER_L4 \ 79 (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP) 80 #define MLX5_FLOW_LAYER_INNER \ 81 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \ 82 MLX5_FLOW_LAYER_INNER_L4) 83 84 /* Actions that modify the fate of matching traffic. */ 85 #define MLX5_FLOW_FATE_DROP (1u << 0) 86 #define MLX5_FLOW_FATE_QUEUE (1u << 1) 87 #define MLX5_FLOW_FATE_RSS (1u << 2) 88 89 /* Modify a packet. */ 90 #define MLX5_FLOW_MOD_FLAG (1u << 0) 91 #define MLX5_FLOW_MOD_MARK (1u << 1) 92 #define MLX5_FLOW_MOD_COUNT (1u << 2) 93 94 /* possible L3 layers protocols filtering. */ 95 #define MLX5_IP_PROTOCOL_TCP 6 96 #define MLX5_IP_PROTOCOL_UDP 17 97 #define MLX5_IP_PROTOCOL_GRE 47 98 #define MLX5_IP_PROTOCOL_MPLS 147 99 100 /* Priority reserved for default flows. */ 101 #define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1) 102 103 enum mlx5_expansion { 104 MLX5_EXPANSION_ROOT, 105 MLX5_EXPANSION_ROOT_OUTER, 106 MLX5_EXPANSION_ROOT_ETH_VLAN, 107 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN, 108 MLX5_EXPANSION_OUTER_ETH, 109 MLX5_EXPANSION_OUTER_ETH_VLAN, 110 MLX5_EXPANSION_OUTER_VLAN, 111 MLX5_EXPANSION_OUTER_IPV4, 112 MLX5_EXPANSION_OUTER_IPV4_UDP, 113 MLX5_EXPANSION_OUTER_IPV4_TCP, 114 MLX5_EXPANSION_OUTER_IPV6, 115 MLX5_EXPANSION_OUTER_IPV6_UDP, 116 MLX5_EXPANSION_OUTER_IPV6_TCP, 117 MLX5_EXPANSION_VXLAN, 118 MLX5_EXPANSION_VXLAN_GPE, 119 MLX5_EXPANSION_GRE, 120 MLX5_EXPANSION_MPLS, 121 MLX5_EXPANSION_ETH, 122 MLX5_EXPANSION_ETH_VLAN, 123 MLX5_EXPANSION_VLAN, 124 MLX5_EXPANSION_IPV4, 125 MLX5_EXPANSION_IPV4_UDP, 126 MLX5_EXPANSION_IPV4_TCP, 127 MLX5_EXPANSION_IPV6, 128 MLX5_EXPANSION_IPV6_UDP, 129 MLX5_EXPANSION_IPV6_TCP, 130 }; 131 132 /** Supported expansion of items. */ 133 static const struct rte_flow_expand_node mlx5_support_expansion[] = { 134 [MLX5_EXPANSION_ROOT] = { 135 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 136 MLX5_EXPANSION_IPV4, 137 MLX5_EXPANSION_IPV6), 138 .type = RTE_FLOW_ITEM_TYPE_END, 139 }, 140 [MLX5_EXPANSION_ROOT_OUTER] = { 141 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH, 142 MLX5_EXPANSION_OUTER_IPV4, 143 MLX5_EXPANSION_OUTER_IPV6), 144 .type = RTE_FLOW_ITEM_TYPE_END, 145 }, 146 [MLX5_EXPANSION_ROOT_ETH_VLAN] = { 147 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN), 148 .type = RTE_FLOW_ITEM_TYPE_END, 149 }, 150 [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = { 151 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH_VLAN), 152 .type = RTE_FLOW_ITEM_TYPE_END, 153 }, 154 [MLX5_EXPANSION_OUTER_ETH] = { 155 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 156 MLX5_EXPANSION_OUTER_IPV6, 157 MLX5_EXPANSION_MPLS), 158 .type = RTE_FLOW_ITEM_TYPE_ETH, 159 .rss_types = 0, 160 }, 161 [MLX5_EXPANSION_OUTER_ETH_VLAN] = { 162 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN), 163 .type = RTE_FLOW_ITEM_TYPE_ETH, 164 .rss_types = 0, 165 }, 166 [MLX5_EXPANSION_OUTER_VLAN] = { 167 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 168 MLX5_EXPANSION_OUTER_IPV6), 169 .type = RTE_FLOW_ITEM_TYPE_VLAN, 170 }, 171 [MLX5_EXPANSION_OUTER_IPV4] = { 172 .next = RTE_FLOW_EXPAND_RSS_NEXT 173 (MLX5_EXPANSION_OUTER_IPV4_UDP, 174 MLX5_EXPANSION_OUTER_IPV4_TCP, 175 MLX5_EXPANSION_GRE), 176 .type = RTE_FLOW_ITEM_TYPE_IPV4, 177 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 178 ETH_RSS_NONFRAG_IPV4_OTHER, 179 }, 180 [MLX5_EXPANSION_OUTER_IPV4_UDP] = { 181 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 182 MLX5_EXPANSION_VXLAN_GPE), 183 .type = RTE_FLOW_ITEM_TYPE_UDP, 184 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 185 }, 186 [MLX5_EXPANSION_OUTER_IPV4_TCP] = { 187 .type = RTE_FLOW_ITEM_TYPE_TCP, 188 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 189 }, 190 [MLX5_EXPANSION_OUTER_IPV6] = { 191 .next = RTE_FLOW_EXPAND_RSS_NEXT 192 (MLX5_EXPANSION_OUTER_IPV6_UDP, 193 MLX5_EXPANSION_OUTER_IPV6_TCP), 194 .type = RTE_FLOW_ITEM_TYPE_IPV6, 195 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 196 ETH_RSS_NONFRAG_IPV6_OTHER, 197 }, 198 [MLX5_EXPANSION_OUTER_IPV6_UDP] = { 199 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 200 MLX5_EXPANSION_VXLAN_GPE), 201 .type = RTE_FLOW_ITEM_TYPE_UDP, 202 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 203 }, 204 [MLX5_EXPANSION_OUTER_IPV6_TCP] = { 205 .type = RTE_FLOW_ITEM_TYPE_TCP, 206 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 207 }, 208 [MLX5_EXPANSION_VXLAN] = { 209 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH), 210 .type = RTE_FLOW_ITEM_TYPE_VXLAN, 211 }, 212 [MLX5_EXPANSION_VXLAN_GPE] = { 213 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 214 MLX5_EXPANSION_IPV4, 215 MLX5_EXPANSION_IPV6), 216 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE, 217 }, 218 [MLX5_EXPANSION_GRE] = { 219 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4), 220 .type = RTE_FLOW_ITEM_TYPE_GRE, 221 }, 222 [MLX5_EXPANSION_MPLS] = { 223 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 224 MLX5_EXPANSION_IPV6), 225 .type = RTE_FLOW_ITEM_TYPE_MPLS, 226 }, 227 [MLX5_EXPANSION_ETH] = { 228 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 229 MLX5_EXPANSION_IPV6), 230 .type = RTE_FLOW_ITEM_TYPE_ETH, 231 }, 232 [MLX5_EXPANSION_ETH_VLAN] = { 233 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN), 234 .type = RTE_FLOW_ITEM_TYPE_ETH, 235 }, 236 [MLX5_EXPANSION_VLAN] = { 237 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 238 MLX5_EXPANSION_IPV6), 239 .type = RTE_FLOW_ITEM_TYPE_VLAN, 240 }, 241 [MLX5_EXPANSION_IPV4] = { 242 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP, 243 MLX5_EXPANSION_IPV4_TCP), 244 .type = RTE_FLOW_ITEM_TYPE_IPV4, 245 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 246 ETH_RSS_NONFRAG_IPV4_OTHER, 247 }, 248 [MLX5_EXPANSION_IPV4_UDP] = { 249 .type = RTE_FLOW_ITEM_TYPE_UDP, 250 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 251 }, 252 [MLX5_EXPANSION_IPV4_TCP] = { 253 .type = RTE_FLOW_ITEM_TYPE_TCP, 254 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 255 }, 256 [MLX5_EXPANSION_IPV6] = { 257 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP, 258 MLX5_EXPANSION_IPV6_TCP), 259 .type = RTE_FLOW_ITEM_TYPE_IPV6, 260 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 261 ETH_RSS_NONFRAG_IPV6_OTHER, 262 }, 263 [MLX5_EXPANSION_IPV6_UDP] = { 264 .type = RTE_FLOW_ITEM_TYPE_UDP, 265 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 266 }, 267 [MLX5_EXPANSION_IPV6_TCP] = { 268 .type = RTE_FLOW_ITEM_TYPE_TCP, 269 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 270 }, 271 }; 272 273 /** Handles information leading to a drop fate. */ 274 struct mlx5_flow_verbs { 275 LIST_ENTRY(mlx5_flow_verbs) next; 276 unsigned int size; /**< Size of the attribute. */ 277 struct { 278 struct ibv_flow_attr *attr; 279 /**< Pointer to the Specification buffer. */ 280 uint8_t *specs; /**< Pointer to the specifications. */ 281 }; 282 struct ibv_flow *flow; /**< Verbs flow pointer. */ 283 struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */ 284 uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */ 285 }; 286 287 /* Counters information. */ 288 struct mlx5_flow_counter { 289 LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */ 290 uint32_t shared:1; /**< Share counter ID with other flow rules. */ 291 uint32_t ref_cnt:31; /**< Reference counter. */ 292 uint32_t id; /**< Counter ID. */ 293 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */ 294 uint64_t hits; /**< Number of packets matched by the rule. */ 295 uint64_t bytes; /**< Number of bytes matched by the rule. */ 296 }; 297 298 /* Flow structure. */ 299 struct rte_flow { 300 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */ 301 struct rte_flow_attr attributes; /**< User flow attribute. */ 302 uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */ 303 uint32_t layers; 304 /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */ 305 uint32_t modifier; 306 /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */ 307 uint32_t fate; 308 /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */ 309 uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */ 310 LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */ 311 struct mlx5_flow_verbs *cur_verbs; 312 /**< Current Verbs flow structure being filled. */ 313 struct mlx5_flow_counter *counter; /**< Holds Verbs flow counter. */ 314 struct rte_flow_action_rss rss;/**< RSS context. */ 315 uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */ 316 uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */ 317 void *nl_flow; /**< Netlink flow buffer if relevant. */ 318 }; 319 320 static const struct rte_flow_ops mlx5_flow_ops = { 321 .validate = mlx5_flow_validate, 322 .create = mlx5_flow_create, 323 .destroy = mlx5_flow_destroy, 324 .flush = mlx5_flow_flush, 325 .isolate = mlx5_flow_isolate, 326 .query = mlx5_flow_query, 327 }; 328 329 /* Convert FDIR request to Generic flow. */ 330 struct mlx5_fdir { 331 struct rte_flow_attr attr; 332 struct rte_flow_action actions[2]; 333 struct rte_flow_item items[4]; 334 struct rte_flow_item_eth l2; 335 struct rte_flow_item_eth l2_mask; 336 union { 337 struct rte_flow_item_ipv4 ipv4; 338 struct rte_flow_item_ipv6 ipv6; 339 } l3; 340 union { 341 struct rte_flow_item_ipv4 ipv4; 342 struct rte_flow_item_ipv6 ipv6; 343 } l3_mask; 344 union { 345 struct rte_flow_item_udp udp; 346 struct rte_flow_item_tcp tcp; 347 } l4; 348 union { 349 struct rte_flow_item_udp udp; 350 struct rte_flow_item_tcp tcp; 351 } l4_mask; 352 struct rte_flow_action_queue queue; 353 }; 354 355 /* Verbs specification header. */ 356 struct ibv_spec_header { 357 enum ibv_flow_spec_type type; 358 uint16_t size; 359 }; 360 361 /* 362 * Number of sub priorities. 363 * For each kind of pattern matching i.e. L2, L3, L4 to have a correct 364 * matching on the NIC (firmware dependent) L4 most have the higher priority 365 * followed by L3 and ending with L2. 366 */ 367 #define MLX5_PRIORITY_MAP_L2 2 368 #define MLX5_PRIORITY_MAP_L3 1 369 #define MLX5_PRIORITY_MAP_L4 0 370 #define MLX5_PRIORITY_MAP_MAX 3 371 372 /* Map of Verbs to Flow priority with 8 Verbs priorities. */ 373 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = { 374 { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 }, 375 }; 376 377 /* Map of Verbs to Flow priority with 16 Verbs priorities. */ 378 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = { 379 { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 }, 380 { 9, 10, 11 }, { 12, 13, 14 }, 381 }; 382 383 /* Tunnel information. */ 384 struct mlx5_flow_tunnel_info { 385 uint32_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */ 386 uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */ 387 }; 388 389 static struct mlx5_flow_tunnel_info tunnels_info[] = { 390 { 391 .tunnel = MLX5_FLOW_LAYER_VXLAN, 392 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP, 393 }, 394 { 395 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE, 396 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP, 397 }, 398 { 399 .tunnel = MLX5_FLOW_LAYER_GRE, 400 .ptype = RTE_PTYPE_TUNNEL_GRE, 401 }, 402 { 403 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP, 404 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP, 405 }, 406 { 407 .tunnel = MLX5_FLOW_LAYER_MPLS, 408 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE, 409 }, 410 }; 411 412 /** 413 * Discover the maximum number of priority available. 414 * 415 * @param[in] dev 416 * Pointer to Ethernet device. 417 * 418 * @return 419 * number of supported flow priority on success, a negative errno 420 * value otherwise and rte_errno is set. 421 */ 422 int 423 mlx5_flow_discover_priorities(struct rte_eth_dev *dev) 424 { 425 struct { 426 struct ibv_flow_attr attr; 427 struct ibv_flow_spec_eth eth; 428 struct ibv_flow_spec_action_drop drop; 429 } flow_attr = { 430 .attr = { 431 .num_of_specs = 2, 432 }, 433 .eth = { 434 .type = IBV_FLOW_SPEC_ETH, 435 .size = sizeof(struct ibv_flow_spec_eth), 436 }, 437 .drop = { 438 .size = sizeof(struct ibv_flow_spec_action_drop), 439 .type = IBV_FLOW_SPEC_ACTION_DROP, 440 }, 441 }; 442 struct ibv_flow *flow; 443 struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev); 444 uint16_t vprio[] = { 8, 16 }; 445 int i; 446 int priority = 0; 447 448 if (!drop) { 449 rte_errno = ENOTSUP; 450 return -rte_errno; 451 } 452 for (i = 0; i != RTE_DIM(vprio); i++) { 453 flow_attr.attr.priority = vprio[i] - 1; 454 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr); 455 if (!flow) 456 break; 457 claim_zero(mlx5_glue->destroy_flow(flow)); 458 priority = vprio[i]; 459 } 460 switch (priority) { 461 case 8: 462 priority = RTE_DIM(priority_map_3); 463 break; 464 case 16: 465 priority = RTE_DIM(priority_map_5); 466 break; 467 default: 468 rte_errno = ENOTSUP; 469 DRV_LOG(ERR, 470 "port %u verbs maximum priority: %d expected 8/16", 471 dev->data->port_id, vprio[i]); 472 return -rte_errno; 473 } 474 mlx5_hrxq_drop_release(dev); 475 DRV_LOG(INFO, "port %u flow maximum priority: %d", 476 dev->data->port_id, priority); 477 return priority; 478 } 479 480 /** 481 * Adjust flow priority. 482 * 483 * @param dev 484 * Pointer to Ethernet device. 485 * @param flow 486 * Pointer to an rte flow. 487 */ 488 static void 489 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow) 490 { 491 struct priv *priv = dev->data->dev_private; 492 uint32_t priority = flow->attributes.priority; 493 uint32_t subpriority = flow->cur_verbs->attr->priority; 494 495 switch (priv->config.flow_prio) { 496 case RTE_DIM(priority_map_3): 497 priority = priority_map_3[priority][subpriority]; 498 break; 499 case RTE_DIM(priority_map_5): 500 priority = priority_map_5[priority][subpriority]; 501 break; 502 } 503 flow->cur_verbs->attr->priority = priority; 504 } 505 506 /** 507 * Get a flow counter. 508 * 509 * @param[in] dev 510 * Pointer to Ethernet device. 511 * @param[in] shared 512 * Indicate if this counter is shared with other flows. 513 * @param[in] id 514 * Counter identifier. 515 * 516 * @return 517 * A pointer to the counter, NULL otherwise and rte_errno is set. 518 */ 519 static struct mlx5_flow_counter * 520 mlx5_flow_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id) 521 { 522 struct priv *priv = dev->data->dev_private; 523 struct mlx5_flow_counter *cnt; 524 525 LIST_FOREACH(cnt, &priv->flow_counters, next) { 526 if (!cnt->shared || cnt->shared != shared) 527 continue; 528 if (cnt->id != id) 529 continue; 530 cnt->ref_cnt++; 531 return cnt; 532 } 533 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 534 535 struct mlx5_flow_counter tmpl = { 536 .shared = shared, 537 .id = id, 538 .cs = mlx5_glue->create_counter_set 539 (priv->ctx, 540 &(struct ibv_counter_set_init_attr){ 541 .counter_set_id = id, 542 }), 543 .hits = 0, 544 .bytes = 0, 545 }; 546 547 if (!tmpl.cs) { 548 rte_errno = errno; 549 return NULL; 550 } 551 cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0); 552 if (!cnt) { 553 rte_errno = ENOMEM; 554 return NULL; 555 } 556 *cnt = tmpl; 557 LIST_INSERT_HEAD(&priv->flow_counters, cnt, next); 558 return cnt; 559 #endif 560 rte_errno = ENOTSUP; 561 return NULL; 562 } 563 564 /** 565 * Release a flow counter. 566 * 567 * @param[in] counter 568 * Pointer to the counter handler. 569 */ 570 static void 571 mlx5_flow_counter_release(struct mlx5_flow_counter *counter) 572 { 573 if (--counter->ref_cnt == 0) { 574 claim_zero(mlx5_glue->destroy_counter_set(counter->cs)); 575 LIST_REMOVE(counter, next); 576 rte_free(counter); 577 } 578 } 579 580 /** 581 * Verify the @p attributes will be correctly understood by the NIC and store 582 * them in the @p flow if everything is correct. 583 * 584 * @param[in] dev 585 * Pointer to Ethernet device. 586 * @param[in] attributes 587 * Pointer to flow attributes 588 * @param[in, out] flow 589 * Pointer to the rte_flow structure. 590 * @param[out] error 591 * Pointer to error structure. 592 * 593 * @return 594 * 0 on success, a negative errno value otherwise and rte_errno is set. 595 */ 596 static int 597 mlx5_flow_attributes(struct rte_eth_dev *dev, 598 const struct rte_flow_attr *attributes, 599 struct rte_flow *flow, 600 struct rte_flow_error *error) 601 { 602 uint32_t priority_max = 603 ((struct priv *)dev->data->dev_private)->config.flow_prio - 1; 604 605 if (attributes->group) 606 return rte_flow_error_set(error, ENOTSUP, 607 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 608 NULL, 609 "groups is not supported"); 610 if (attributes->priority != MLX5_FLOW_PRIO_RSVD && 611 attributes->priority >= priority_max) 612 return rte_flow_error_set(error, ENOTSUP, 613 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 614 NULL, 615 "priority out of range"); 616 if (attributes->egress) 617 return rte_flow_error_set(error, ENOTSUP, 618 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, 619 NULL, 620 "egress is not supported"); 621 if (attributes->transfer) 622 return rte_flow_error_set(error, ENOTSUP, 623 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 624 NULL, 625 "transfer is not supported"); 626 if (!attributes->ingress) 627 return rte_flow_error_set(error, ENOTSUP, 628 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, 629 NULL, 630 "ingress attribute is mandatory"); 631 flow->attributes = *attributes; 632 if (attributes->priority == MLX5_FLOW_PRIO_RSVD) 633 flow->attributes.priority = priority_max; 634 return 0; 635 } 636 637 /** 638 * Verify the @p item specifications (spec, last, mask) are compatible with the 639 * NIC capabilities. 640 * 641 * @param[in] item 642 * Item specification. 643 * @param[in] mask 644 * @p item->mask or flow default bit-masks. 645 * @param[in] nic_mask 646 * Bit-masks covering supported fields by the NIC to compare with user mask. 647 * @param[in] size 648 * Bit-masks size in bytes. 649 * @param[out] error 650 * Pointer to error structure. 651 * 652 * @return 653 * 0 on success, a negative errno value otherwise and rte_errno is set. 654 */ 655 static int 656 mlx5_flow_item_acceptable(const struct rte_flow_item *item, 657 const uint8_t *mask, 658 const uint8_t *nic_mask, 659 unsigned int size, 660 struct rte_flow_error *error) 661 { 662 unsigned int i; 663 664 assert(nic_mask); 665 for (i = 0; i < size; ++i) 666 if ((nic_mask[i] | mask[i]) != nic_mask[i]) 667 return rte_flow_error_set(error, ENOTSUP, 668 RTE_FLOW_ERROR_TYPE_ITEM, 669 item, 670 "mask enables non supported" 671 " bits"); 672 if (!item->spec && (item->mask || item->last)) 673 return rte_flow_error_set(error, EINVAL, 674 RTE_FLOW_ERROR_TYPE_ITEM, 675 item, 676 "mask/last without a spec is not" 677 " supported"); 678 if (item->spec && item->last) { 679 uint8_t spec[size]; 680 uint8_t last[size]; 681 unsigned int i; 682 int ret; 683 684 for (i = 0; i < size; ++i) { 685 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i]; 686 last[i] = ((const uint8_t *)item->last)[i] & mask[i]; 687 } 688 ret = memcmp(spec, last, size); 689 if (ret != 0) 690 return rte_flow_error_set(error, ENOTSUP, 691 RTE_FLOW_ERROR_TYPE_ITEM, 692 item, 693 "range is not supported"); 694 } 695 return 0; 696 } 697 698 /** 699 * Add a verbs item specification into @p flow. 700 * 701 * @param[in, out] flow 702 * Pointer to flow structure. 703 * @param[in] src 704 * Create specification. 705 * @param[in] size 706 * Size in bytes of the specification to copy. 707 */ 708 static void 709 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size) 710 { 711 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 712 713 if (verbs->specs) { 714 void *dst; 715 716 dst = (void *)(verbs->specs + verbs->size); 717 memcpy(dst, src, size); 718 ++verbs->attr->num_of_specs; 719 } 720 verbs->size += size; 721 } 722 723 /** 724 * Adjust verbs hash fields according to the @p flow information. 725 * 726 * @param[in, out] flow. 727 * Pointer to flow structure. 728 * @param[in] tunnel 729 * 1 when the hash field is for a tunnel item. 730 * @param[in] layer_types 731 * ETH_RSS_* types. 732 * @param[in] hash_fields 733 * Item hash fields. 734 */ 735 static void 736 mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow, 737 int tunnel __rte_unused, 738 uint32_t layer_types, uint64_t hash_fields) 739 { 740 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 741 hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0); 742 if (flow->rss.level == 2 && !tunnel) 743 hash_fields = 0; 744 else if (flow->rss.level < 2 && tunnel) 745 hash_fields = 0; 746 #endif 747 if (!(flow->rss.types & layer_types)) 748 hash_fields = 0; 749 flow->cur_verbs->hash_fields |= hash_fields; 750 } 751 752 /** 753 * Convert the @p item into a Verbs specification after ensuring the NIC 754 * will understand and process it correctly. 755 * If the necessary size for the conversion is greater than the @p flow_size, 756 * nothing is written in @p flow, the validation is still performed. 757 * 758 * @param[in] item 759 * Item specification. 760 * @param[in, out] flow 761 * Pointer to flow structure. 762 * @param[in] flow_size 763 * Size in bytes of the available space in @p flow, if too small, nothing is 764 * written. 765 * @param[out] error 766 * Pointer to error structure. 767 * 768 * @return 769 * On success the number of bytes consumed/necessary, if the returned value 770 * is lesser or equal to @p flow_size, the @p item has fully been converted, 771 * otherwise another call with this returned memory size should be done. 772 * On error, a negative errno value is returned and rte_errno is set. 773 */ 774 static int 775 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow, 776 const size_t flow_size, struct rte_flow_error *error) 777 { 778 const struct rte_flow_item_eth *spec = item->spec; 779 const struct rte_flow_item_eth *mask = item->mask; 780 const struct rte_flow_item_eth nic_mask = { 781 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 782 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", 783 .type = RTE_BE16(0xffff), 784 }; 785 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 786 const unsigned int size = sizeof(struct ibv_flow_spec_eth); 787 struct ibv_flow_spec_eth eth = { 788 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 789 .size = size, 790 }; 791 int ret; 792 793 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 794 MLX5_FLOW_LAYER_OUTER_L2)) 795 return rte_flow_error_set(error, ENOTSUP, 796 RTE_FLOW_ERROR_TYPE_ITEM, 797 item, 798 "L2 layers already configured"); 799 if (!mask) 800 mask = &rte_flow_item_eth_mask; 801 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 802 (const uint8_t *)&nic_mask, 803 sizeof(struct rte_flow_item_eth), 804 error); 805 if (ret) 806 return ret; 807 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 808 MLX5_FLOW_LAYER_OUTER_L2; 809 if (size > flow_size) 810 return size; 811 if (spec) { 812 unsigned int i; 813 814 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN); 815 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN); 816 eth.val.ether_type = spec->type; 817 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN); 818 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN); 819 eth.mask.ether_type = mask->type; 820 /* Remove unwanted bits from values. */ 821 for (i = 0; i < ETHER_ADDR_LEN; ++i) { 822 eth.val.dst_mac[i] &= eth.mask.dst_mac[i]; 823 eth.val.src_mac[i] &= eth.mask.src_mac[i]; 824 } 825 eth.val.ether_type &= eth.mask.ether_type; 826 } 827 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 828 mlx5_flow_spec_verbs_add(flow, ð, size); 829 return size; 830 } 831 832 /** 833 * Update the VLAN tag in the Verbs Ethernet specification. 834 * 835 * @param[in, out] attr 836 * Pointer to Verbs attributes structure. 837 * @param[in] eth 838 * Verbs structure containing the VLAN information to copy. 839 */ 840 static void 841 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr, 842 struct ibv_flow_spec_eth *eth) 843 { 844 unsigned int i; 845 const enum ibv_flow_spec_type search = eth->type; 846 struct ibv_spec_header *hdr = (struct ibv_spec_header *) 847 ((uint8_t *)attr + sizeof(struct ibv_flow_attr)); 848 849 for (i = 0; i != attr->num_of_specs; ++i) { 850 if (hdr->type == search) { 851 struct ibv_flow_spec_eth *e = 852 (struct ibv_flow_spec_eth *)hdr; 853 854 e->val.vlan_tag = eth->val.vlan_tag; 855 e->mask.vlan_tag = eth->mask.vlan_tag; 856 e->val.ether_type = eth->val.ether_type; 857 e->mask.ether_type = eth->mask.ether_type; 858 break; 859 } 860 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size); 861 } 862 } 863 864 /** 865 * Convert the @p item into @p flow (or by updating the already present 866 * Ethernet Verbs) specification after ensuring the NIC will understand and 867 * process it correctly. 868 * If the necessary size for the conversion is greater than the @p flow_size, 869 * nothing is written in @p flow, the validation is still performed. 870 * 871 * @param[in] item 872 * Item specification. 873 * @param[in, out] flow 874 * Pointer to flow structure. 875 * @param[in] flow_size 876 * Size in bytes of the available space in @p flow, if too small, nothing is 877 * written. 878 * @param[out] error 879 * Pointer to error structure. 880 * 881 * @return 882 * On success the number of bytes consumed/necessary, if the returned value 883 * is lesser or equal to @p flow_size, the @p item has fully been converted, 884 * otherwise another call with this returned memory size should be done. 885 * On error, a negative errno value is returned and rte_errno is set. 886 */ 887 static int 888 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow, 889 const size_t flow_size, struct rte_flow_error *error) 890 { 891 const struct rte_flow_item_vlan *spec = item->spec; 892 const struct rte_flow_item_vlan *mask = item->mask; 893 const struct rte_flow_item_vlan nic_mask = { 894 .tci = RTE_BE16(0x0fff), 895 .inner_type = RTE_BE16(0xffff), 896 }; 897 unsigned int size = sizeof(struct ibv_flow_spec_eth); 898 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 899 struct ibv_flow_spec_eth eth = { 900 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 901 .size = size, 902 }; 903 int ret; 904 const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | 905 MLX5_FLOW_LAYER_INNER_L4) : 906 (MLX5_FLOW_LAYER_OUTER_L3 | MLX5_FLOW_LAYER_OUTER_L4); 907 const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : 908 MLX5_FLOW_LAYER_OUTER_VLAN; 909 const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 910 MLX5_FLOW_LAYER_OUTER_L2; 911 912 if (flow->layers & vlanm) 913 return rte_flow_error_set(error, ENOTSUP, 914 RTE_FLOW_ERROR_TYPE_ITEM, 915 item, 916 "VLAN layer already configured"); 917 else if ((flow->layers & l34m) != 0) 918 return rte_flow_error_set(error, ENOTSUP, 919 RTE_FLOW_ERROR_TYPE_ITEM, 920 item, 921 "L2 layer cannot follow L3/L4 layer"); 922 if (!mask) 923 mask = &rte_flow_item_vlan_mask; 924 ret = mlx5_flow_item_acceptable 925 (item, (const uint8_t *)mask, 926 (const uint8_t *)&nic_mask, 927 sizeof(struct rte_flow_item_vlan), error); 928 if (ret) 929 return ret; 930 if (spec) { 931 eth.val.vlan_tag = spec->tci; 932 eth.mask.vlan_tag = mask->tci; 933 eth.val.vlan_tag &= eth.mask.vlan_tag; 934 eth.val.ether_type = spec->inner_type; 935 eth.mask.ether_type = mask->inner_type; 936 eth.val.ether_type &= eth.mask.ether_type; 937 } 938 /* 939 * From verbs perspective an empty VLAN is equivalent 940 * to a packet without VLAN layer. 941 */ 942 if (!eth.mask.vlan_tag) 943 return rte_flow_error_set(error, EINVAL, 944 RTE_FLOW_ERROR_TYPE_ITEM_SPEC, 945 item->spec, 946 "VLAN cannot be empty"); 947 if (!(flow->layers & l2m)) { 948 if (size <= flow_size) { 949 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 950 mlx5_flow_spec_verbs_add(flow, ð, size); 951 } 952 } else { 953 if (flow->cur_verbs) 954 mlx5_flow_item_vlan_update(flow->cur_verbs->attr, 955 ð); 956 size = 0; /* Only an update is done in eth specification. */ 957 } 958 flow->layers |= tunnel ? 959 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) : 960 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN); 961 return size; 962 } 963 964 /** 965 * Convert the @p item into a Verbs specification after ensuring the NIC 966 * will understand and process it correctly. 967 * If the necessary size for the conversion is greater than the @p flow_size, 968 * nothing is written in @p flow, the validation is still performed. 969 * 970 * @param[in] item 971 * Item specification. 972 * @param[in, out] flow 973 * Pointer to flow structure. 974 * @param[in] flow_size 975 * Size in bytes of the available space in @p flow, if too small, nothing is 976 * written. 977 * @param[out] error 978 * Pointer to error structure. 979 * 980 * @return 981 * On success the number of bytes consumed/necessary, if the returned value 982 * is lesser or equal to @p flow_size, the @p item has fully been converted, 983 * otherwise another call with this returned memory size should be done. 984 * On error, a negative errno value is returned and rte_errno is set. 985 */ 986 static int 987 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow, 988 const size_t flow_size, struct rte_flow_error *error) 989 { 990 const struct rte_flow_item_ipv4 *spec = item->spec; 991 const struct rte_flow_item_ipv4 *mask = item->mask; 992 const struct rte_flow_item_ipv4 nic_mask = { 993 .hdr = { 994 .src_addr = RTE_BE32(0xffffffff), 995 .dst_addr = RTE_BE32(0xffffffff), 996 .type_of_service = 0xff, 997 .next_proto_id = 0xff, 998 }, 999 }; 1000 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 1001 unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext); 1002 struct ibv_flow_spec_ipv4_ext ipv4 = { 1003 .type = IBV_FLOW_SPEC_IPV4_EXT | 1004 (tunnel ? IBV_FLOW_SPEC_INNER : 0), 1005 .size = size, 1006 }; 1007 int ret; 1008 1009 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1010 MLX5_FLOW_LAYER_OUTER_L3)) 1011 return rte_flow_error_set(error, ENOTSUP, 1012 RTE_FLOW_ERROR_TYPE_ITEM, 1013 item, 1014 "multiple L3 layers not supported"); 1015 else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1016 MLX5_FLOW_LAYER_OUTER_L4)) 1017 return rte_flow_error_set(error, ENOTSUP, 1018 RTE_FLOW_ERROR_TYPE_ITEM, 1019 item, 1020 "L3 cannot follow an L4 layer."); 1021 if (!mask) 1022 mask = &rte_flow_item_ipv4_mask; 1023 ret = mlx5_flow_item_acceptable 1024 (item, (const uint8_t *)mask, 1025 (const uint8_t *)&nic_mask, 1026 sizeof(struct rte_flow_item_ipv4), error); 1027 if (ret < 0) 1028 return ret; 1029 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : 1030 MLX5_FLOW_LAYER_OUTER_L3_IPV4; 1031 if (spec) { 1032 ipv4.val = (struct ibv_flow_ipv4_ext_filter){ 1033 .src_ip = spec->hdr.src_addr, 1034 .dst_ip = spec->hdr.dst_addr, 1035 .proto = spec->hdr.next_proto_id, 1036 .tos = spec->hdr.type_of_service, 1037 }; 1038 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){ 1039 .src_ip = mask->hdr.src_addr, 1040 .dst_ip = mask->hdr.dst_addr, 1041 .proto = mask->hdr.next_proto_id, 1042 .tos = mask->hdr.type_of_service, 1043 }; 1044 /* Remove unwanted bits from values. */ 1045 ipv4.val.src_ip &= ipv4.mask.src_ip; 1046 ipv4.val.dst_ip &= ipv4.mask.dst_ip; 1047 ipv4.val.proto &= ipv4.mask.proto; 1048 ipv4.val.tos &= ipv4.mask.tos; 1049 } 1050 flow->l3_protocol_en = !!ipv4.mask.proto; 1051 flow->l3_protocol = ipv4.val.proto; 1052 if (size <= flow_size) { 1053 mlx5_flow_verbs_hashfields_adjust 1054 (flow, tunnel, 1055 (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 1056 ETH_RSS_NONFRAG_IPV4_OTHER), 1057 (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4)); 1058 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3; 1059 mlx5_flow_spec_verbs_add(flow, &ipv4, size); 1060 } 1061 return size; 1062 } 1063 1064 /** 1065 * Convert the @p item into a Verbs specification after ensuring the NIC 1066 * will understand and process it correctly. 1067 * If the necessary size for the conversion is greater than the @p flow_size, 1068 * nothing is written in @p flow, the validation is still performed. 1069 * 1070 * @param[in] item 1071 * Item specification. 1072 * @param[in, out] flow 1073 * Pointer to flow structure. 1074 * @param[in] flow_size 1075 * Size in bytes of the available space in @p flow, if too small, nothing is 1076 * written. 1077 * @param[out] error 1078 * Pointer to error structure. 1079 * 1080 * @return 1081 * On success the number of bytes consumed/necessary, if the returned value 1082 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1083 * otherwise another call with this returned memory size should be done. 1084 * On error, a negative errno value is returned and rte_errno is set. 1085 */ 1086 static int 1087 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow, 1088 const size_t flow_size, struct rte_flow_error *error) 1089 { 1090 const struct rte_flow_item_ipv6 *spec = item->spec; 1091 const struct rte_flow_item_ipv6 *mask = item->mask; 1092 const struct rte_flow_item_ipv6 nic_mask = { 1093 .hdr = { 1094 .src_addr = 1095 "\xff\xff\xff\xff\xff\xff\xff\xff" 1096 "\xff\xff\xff\xff\xff\xff\xff\xff", 1097 .dst_addr = 1098 "\xff\xff\xff\xff\xff\xff\xff\xff" 1099 "\xff\xff\xff\xff\xff\xff\xff\xff", 1100 .vtc_flow = RTE_BE32(0xffffffff), 1101 .proto = 0xff, 1102 .hop_limits = 0xff, 1103 }, 1104 }; 1105 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 1106 unsigned int size = sizeof(struct ibv_flow_spec_ipv6); 1107 struct ibv_flow_spec_ipv6 ipv6 = { 1108 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 1109 .size = size, 1110 }; 1111 int ret; 1112 1113 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1114 MLX5_FLOW_LAYER_OUTER_L3)) 1115 return rte_flow_error_set(error, ENOTSUP, 1116 RTE_FLOW_ERROR_TYPE_ITEM, 1117 item, 1118 "multiple L3 layers not supported"); 1119 else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1120 MLX5_FLOW_LAYER_OUTER_L4)) 1121 return rte_flow_error_set(error, ENOTSUP, 1122 RTE_FLOW_ERROR_TYPE_ITEM, 1123 item, 1124 "L3 cannot follow an L4 layer."); 1125 /* 1126 * IPv6 is not recognised by the NIC inside a GRE tunnel. 1127 * Such support has to be disabled as the rule will be 1128 * accepted. Issue reproduced with Mellanox OFED 4.3-3.0.2.1 and 1129 * Mellanox OFED 4.4-1.0.0.0. 1130 */ 1131 if (tunnel && flow->layers & MLX5_FLOW_LAYER_GRE) 1132 return rte_flow_error_set(error, ENOTSUP, 1133 RTE_FLOW_ERROR_TYPE_ITEM, 1134 item, 1135 "IPv6 inside a GRE tunnel is" 1136 " not recognised."); 1137 if (!mask) 1138 mask = &rte_flow_item_ipv6_mask; 1139 ret = mlx5_flow_item_acceptable 1140 (item, (const uint8_t *)mask, 1141 (const uint8_t *)&nic_mask, 1142 sizeof(struct rte_flow_item_ipv6), error); 1143 if (ret < 0) 1144 return ret; 1145 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : 1146 MLX5_FLOW_LAYER_OUTER_L3_IPV6; 1147 if (spec) { 1148 unsigned int i; 1149 uint32_t vtc_flow_val; 1150 uint32_t vtc_flow_mask; 1151 1152 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr, 1153 RTE_DIM(ipv6.val.src_ip)); 1154 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr, 1155 RTE_DIM(ipv6.val.dst_ip)); 1156 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr, 1157 RTE_DIM(ipv6.mask.src_ip)); 1158 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr, 1159 RTE_DIM(ipv6.mask.dst_ip)); 1160 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow); 1161 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow); 1162 ipv6.val.flow_label = 1163 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >> 1164 IPV6_HDR_FL_SHIFT); 1165 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >> 1166 IPV6_HDR_TC_SHIFT; 1167 ipv6.val.next_hdr = spec->hdr.proto; 1168 ipv6.val.hop_limit = spec->hdr.hop_limits; 1169 ipv6.mask.flow_label = 1170 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >> 1171 IPV6_HDR_FL_SHIFT); 1172 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >> 1173 IPV6_HDR_TC_SHIFT; 1174 ipv6.mask.next_hdr = mask->hdr.proto; 1175 ipv6.mask.hop_limit = mask->hdr.hop_limits; 1176 /* Remove unwanted bits from values. */ 1177 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) { 1178 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i]; 1179 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i]; 1180 } 1181 ipv6.val.flow_label &= ipv6.mask.flow_label; 1182 ipv6.val.traffic_class &= ipv6.mask.traffic_class; 1183 ipv6.val.next_hdr &= ipv6.mask.next_hdr; 1184 ipv6.val.hop_limit &= ipv6.mask.hop_limit; 1185 } 1186 flow->l3_protocol_en = !!ipv6.mask.next_hdr; 1187 flow->l3_protocol = ipv6.val.next_hdr; 1188 if (size <= flow_size) { 1189 mlx5_flow_verbs_hashfields_adjust 1190 (flow, tunnel, 1191 (ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER), 1192 (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6)); 1193 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3; 1194 mlx5_flow_spec_verbs_add(flow, &ipv6, size); 1195 } 1196 return size; 1197 } 1198 1199 /** 1200 * Convert the @p item into a Verbs specification after ensuring the NIC 1201 * will understand and process it correctly. 1202 * If the necessary size for the conversion is greater than the @p flow_size, 1203 * nothing is written in @p flow, the validation is still performed. 1204 * 1205 * @param[in] item 1206 * Item specification. 1207 * @param[in, out] flow 1208 * Pointer to flow structure. 1209 * @param[in] flow_size 1210 * Size in bytes of the available space in @p flow, if too small, nothing is 1211 * written. 1212 * @param[out] error 1213 * Pointer to error structure. 1214 * 1215 * @return 1216 * On success the number of bytes consumed/necessary, if the returned value 1217 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1218 * otherwise another call with this returned memory size should be done. 1219 * On error, a negative errno value is returned and rte_errno is set. 1220 */ 1221 static int 1222 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow, 1223 const size_t flow_size, struct rte_flow_error *error) 1224 { 1225 const struct rte_flow_item_udp *spec = item->spec; 1226 const struct rte_flow_item_udp *mask = item->mask; 1227 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 1228 unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp); 1229 struct ibv_flow_spec_tcp_udp udp = { 1230 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 1231 .size = size, 1232 }; 1233 int ret; 1234 1235 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP) 1236 return rte_flow_error_set(error, ENOTSUP, 1237 RTE_FLOW_ERROR_TYPE_ITEM, 1238 item, 1239 "protocol filtering not compatible" 1240 " with UDP layer"); 1241 if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1242 MLX5_FLOW_LAYER_OUTER_L3))) 1243 return rte_flow_error_set(error, ENOTSUP, 1244 RTE_FLOW_ERROR_TYPE_ITEM, 1245 item, 1246 "L3 is mandatory to filter" 1247 " on L4"); 1248 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1249 MLX5_FLOW_LAYER_OUTER_L4)) 1250 return rte_flow_error_set(error, ENOTSUP, 1251 RTE_FLOW_ERROR_TYPE_ITEM, 1252 item, 1253 "L4 layer is already" 1254 " present"); 1255 if (!mask) 1256 mask = &rte_flow_item_udp_mask; 1257 ret = mlx5_flow_item_acceptable 1258 (item, (const uint8_t *)mask, 1259 (const uint8_t *)&rte_flow_item_udp_mask, 1260 sizeof(struct rte_flow_item_udp), error); 1261 if (ret < 0) 1262 return ret; 1263 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP : 1264 MLX5_FLOW_LAYER_OUTER_L4_UDP; 1265 if (spec) { 1266 udp.val.dst_port = spec->hdr.dst_port; 1267 udp.val.src_port = spec->hdr.src_port; 1268 udp.mask.dst_port = mask->hdr.dst_port; 1269 udp.mask.src_port = mask->hdr.src_port; 1270 /* Remove unwanted bits from values. */ 1271 udp.val.src_port &= udp.mask.src_port; 1272 udp.val.dst_port &= udp.mask.dst_port; 1273 } 1274 if (size <= flow_size) { 1275 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_UDP, 1276 (IBV_RX_HASH_SRC_PORT_UDP | 1277 IBV_RX_HASH_DST_PORT_UDP)); 1278 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4; 1279 mlx5_flow_spec_verbs_add(flow, &udp, size); 1280 } 1281 return size; 1282 } 1283 1284 /** 1285 * Convert the @p item into a Verbs specification after ensuring the NIC 1286 * will understand and process it correctly. 1287 * If the necessary size for the conversion is greater than the @p flow_size, 1288 * nothing is written in @p flow, the validation is still performed. 1289 * 1290 * @param[in] item 1291 * Item specification. 1292 * @param[in, out] flow 1293 * Pointer to flow structure. 1294 * @param[in] flow_size 1295 * Size in bytes of the available space in @p flow, if too small, nothing is 1296 * written. 1297 * @param[out] error 1298 * Pointer to error structure. 1299 * 1300 * @return 1301 * On success the number of bytes consumed/necessary, if the returned value 1302 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1303 * otherwise another call with this returned memory size should be done. 1304 * On error, a negative errno value is returned and rte_errno is set. 1305 */ 1306 static int 1307 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow, 1308 const size_t flow_size, struct rte_flow_error *error) 1309 { 1310 const struct rte_flow_item_tcp *spec = item->spec; 1311 const struct rte_flow_item_tcp *mask = item->mask; 1312 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 1313 unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp); 1314 struct ibv_flow_spec_tcp_udp tcp = { 1315 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0), 1316 .size = size, 1317 }; 1318 int ret; 1319 1320 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP) 1321 return rte_flow_error_set(error, ENOTSUP, 1322 RTE_FLOW_ERROR_TYPE_ITEM, 1323 item, 1324 "protocol filtering not compatible" 1325 " with TCP layer"); 1326 if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1327 MLX5_FLOW_LAYER_OUTER_L3))) 1328 return rte_flow_error_set(error, ENOTSUP, 1329 RTE_FLOW_ERROR_TYPE_ITEM, 1330 item, 1331 "L3 is mandatory to filter on L4"); 1332 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1333 MLX5_FLOW_LAYER_OUTER_L4)) 1334 return rte_flow_error_set(error, ENOTSUP, 1335 RTE_FLOW_ERROR_TYPE_ITEM, 1336 item, 1337 "L4 layer is already present"); 1338 if (!mask) 1339 mask = &rte_flow_item_tcp_mask; 1340 ret = mlx5_flow_item_acceptable 1341 (item, (const uint8_t *)mask, 1342 (const uint8_t *)&rte_flow_item_tcp_mask, 1343 sizeof(struct rte_flow_item_tcp), error); 1344 if (ret < 0) 1345 return ret; 1346 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP : 1347 MLX5_FLOW_LAYER_OUTER_L4_TCP; 1348 if (spec) { 1349 tcp.val.dst_port = spec->hdr.dst_port; 1350 tcp.val.src_port = spec->hdr.src_port; 1351 tcp.mask.dst_port = mask->hdr.dst_port; 1352 tcp.mask.src_port = mask->hdr.src_port; 1353 /* Remove unwanted bits from values. */ 1354 tcp.val.src_port &= tcp.mask.src_port; 1355 tcp.val.dst_port &= tcp.mask.dst_port; 1356 } 1357 if (size <= flow_size) { 1358 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_TCP, 1359 (IBV_RX_HASH_SRC_PORT_TCP | 1360 IBV_RX_HASH_DST_PORT_TCP)); 1361 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4; 1362 mlx5_flow_spec_verbs_add(flow, &tcp, size); 1363 } 1364 return size; 1365 } 1366 1367 /** 1368 * Convert the @p item into a Verbs specification after ensuring the NIC 1369 * will understand and process it correctly. 1370 * If the necessary size for the conversion is greater than the @p flow_size, 1371 * nothing is written in @p flow, the validation is still performed. 1372 * 1373 * @param[in] item 1374 * Item specification. 1375 * @param[in, out] flow 1376 * Pointer to flow structure. 1377 * @param[in] flow_size 1378 * Size in bytes of the available space in @p flow, if too small, nothing is 1379 * written. 1380 * @param[out] error 1381 * Pointer to error structure. 1382 * 1383 * @return 1384 * On success the number of bytes consumed/necessary, if the returned value 1385 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1386 * otherwise another call with this returned memory size should be done. 1387 * On error, a negative errno value is returned and rte_errno is set. 1388 */ 1389 static int 1390 mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow, 1391 const size_t flow_size, struct rte_flow_error *error) 1392 { 1393 const struct rte_flow_item_vxlan *spec = item->spec; 1394 const struct rte_flow_item_vxlan *mask = item->mask; 1395 unsigned int size = sizeof(struct ibv_flow_spec_tunnel); 1396 struct ibv_flow_spec_tunnel vxlan = { 1397 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL, 1398 .size = size, 1399 }; 1400 int ret; 1401 union vni { 1402 uint32_t vlan_id; 1403 uint8_t vni[4]; 1404 } id = { .vlan_id = 0, }; 1405 1406 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL) 1407 return rte_flow_error_set(error, ENOTSUP, 1408 RTE_FLOW_ERROR_TYPE_ITEM, 1409 item, 1410 "a tunnel is already present"); 1411 /* 1412 * Verify only UDPv4 is present as defined in 1413 * https://tools.ietf.org/html/rfc7348 1414 */ 1415 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 1416 return rte_flow_error_set(error, ENOTSUP, 1417 RTE_FLOW_ERROR_TYPE_ITEM, 1418 item, 1419 "no outer UDP layer found"); 1420 if (!mask) 1421 mask = &rte_flow_item_vxlan_mask; 1422 ret = mlx5_flow_item_acceptable 1423 (item, (const uint8_t *)mask, 1424 (const uint8_t *)&rte_flow_item_vxlan_mask, 1425 sizeof(struct rte_flow_item_vxlan), error); 1426 if (ret < 0) 1427 return ret; 1428 if (spec) { 1429 memcpy(&id.vni[1], spec->vni, 3); 1430 vxlan.val.tunnel_id = id.vlan_id; 1431 memcpy(&id.vni[1], mask->vni, 3); 1432 vxlan.mask.tunnel_id = id.vlan_id; 1433 /* Remove unwanted bits from values. */ 1434 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id; 1435 } 1436 /* 1437 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if 1438 * only this layer is defined in the Verbs specification it is 1439 * interpreted as wildcard and all packets will match this 1440 * rule, if it follows a full stack layer (ex: eth / ipv4 / 1441 * udp), all packets matching the layers before will also 1442 * match this rule. To avoid such situation, VNI 0 is 1443 * currently refused. 1444 */ 1445 if (!vxlan.val.tunnel_id) 1446 return rte_flow_error_set(error, EINVAL, 1447 RTE_FLOW_ERROR_TYPE_ITEM, 1448 item, 1449 "VXLAN vni cannot be 0"); 1450 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER)) 1451 return rte_flow_error_set(error, EINVAL, 1452 RTE_FLOW_ERROR_TYPE_ITEM, 1453 item, 1454 "VXLAN tunnel must be fully defined"); 1455 if (size <= flow_size) { 1456 mlx5_flow_spec_verbs_add(flow, &vxlan, size); 1457 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1458 } 1459 flow->layers |= MLX5_FLOW_LAYER_VXLAN; 1460 return size; 1461 } 1462 1463 /** 1464 * Convert the @p item into a Verbs specification after ensuring the NIC 1465 * will understand and process it correctly. 1466 * If the necessary size for the conversion is greater than the @p flow_size, 1467 * nothing is written in @p flow, the validation is still performed. 1468 * 1469 * @param dev 1470 * Pointer to Ethernet device. 1471 * @param[in] item 1472 * Item specification. 1473 * @param[in, out] flow 1474 * Pointer to flow structure. 1475 * @param[in] flow_size 1476 * Size in bytes of the available space in @p flow, if too small, nothing is 1477 * written. 1478 * @param[out] error 1479 * Pointer to error structure. 1480 * 1481 * @return 1482 * On success the number of bytes consumed/necessary, if the returned value 1483 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1484 * otherwise another call with this returned memory size should be done. 1485 * On error, a negative errno value is returned and rte_errno is set. 1486 */ 1487 static int 1488 mlx5_flow_item_vxlan_gpe(struct rte_eth_dev *dev, 1489 const struct rte_flow_item *item, 1490 struct rte_flow *flow, const size_t flow_size, 1491 struct rte_flow_error *error) 1492 { 1493 const struct rte_flow_item_vxlan_gpe *spec = item->spec; 1494 const struct rte_flow_item_vxlan_gpe *mask = item->mask; 1495 unsigned int size = sizeof(struct ibv_flow_spec_tunnel); 1496 struct ibv_flow_spec_tunnel vxlan_gpe = { 1497 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL, 1498 .size = size, 1499 }; 1500 int ret; 1501 union vni { 1502 uint32_t vlan_id; 1503 uint8_t vni[4]; 1504 } id = { .vlan_id = 0, }; 1505 1506 if (!((struct priv *)dev->data->dev_private)->config.l3_vxlan_en) 1507 return rte_flow_error_set(error, ENOTSUP, 1508 RTE_FLOW_ERROR_TYPE_ITEM, 1509 item, 1510 "L3 VXLAN is not enabled by device" 1511 " parameter and/or not configured in" 1512 " firmware"); 1513 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL) 1514 return rte_flow_error_set(error, ENOTSUP, 1515 RTE_FLOW_ERROR_TYPE_ITEM, 1516 item, 1517 "a tunnel is already present"); 1518 /* 1519 * Verify only UDPv4 is present as defined in 1520 * https://tools.ietf.org/html/rfc7348 1521 */ 1522 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 1523 return rte_flow_error_set(error, ENOTSUP, 1524 RTE_FLOW_ERROR_TYPE_ITEM, 1525 item, 1526 "no outer UDP layer found"); 1527 if (!mask) 1528 mask = &rte_flow_item_vxlan_gpe_mask; 1529 ret = mlx5_flow_item_acceptable 1530 (item, (const uint8_t *)mask, 1531 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask, 1532 sizeof(struct rte_flow_item_vxlan_gpe), error); 1533 if (ret < 0) 1534 return ret; 1535 if (spec) { 1536 memcpy(&id.vni[1], spec->vni, 3); 1537 vxlan_gpe.val.tunnel_id = id.vlan_id; 1538 memcpy(&id.vni[1], mask->vni, 3); 1539 vxlan_gpe.mask.tunnel_id = id.vlan_id; 1540 if (spec->protocol) 1541 return rte_flow_error_set 1542 (error, EINVAL, 1543 RTE_FLOW_ERROR_TYPE_ITEM, 1544 item, 1545 "VxLAN-GPE protocol not supported"); 1546 /* Remove unwanted bits from values. */ 1547 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id; 1548 } 1549 /* 1550 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this 1551 * layer is defined in the Verbs specification it is interpreted as 1552 * wildcard and all packets will match this rule, if it follows a full 1553 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers 1554 * before will also match this rule. To avoid such situation, VNI 0 1555 * is currently refused. 1556 */ 1557 if (!vxlan_gpe.val.tunnel_id) 1558 return rte_flow_error_set(error, EINVAL, 1559 RTE_FLOW_ERROR_TYPE_ITEM, 1560 item, 1561 "VXLAN-GPE vni cannot be 0"); 1562 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER)) 1563 return rte_flow_error_set(error, EINVAL, 1564 RTE_FLOW_ERROR_TYPE_ITEM, 1565 item, 1566 "VXLAN-GPE tunnel must be fully" 1567 " defined"); 1568 if (size <= flow_size) { 1569 mlx5_flow_spec_verbs_add(flow, &vxlan_gpe, size); 1570 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1571 } 1572 flow->layers |= MLX5_FLOW_LAYER_VXLAN_GPE; 1573 return size; 1574 } 1575 1576 /** 1577 * Update the protocol in Verbs IPv4/IPv6 spec. 1578 * 1579 * @param[in, out] attr 1580 * Pointer to Verbs attributes structure. 1581 * @param[in] search 1582 * Specification type to search in order to update the IP protocol. 1583 * @param[in] protocol 1584 * Protocol value to set if none is present in the specification. 1585 */ 1586 static void 1587 mlx5_flow_item_gre_ip_protocol_update(struct ibv_flow_attr *attr, 1588 enum ibv_flow_spec_type search, 1589 uint8_t protocol) 1590 { 1591 unsigned int i; 1592 struct ibv_spec_header *hdr = (struct ibv_spec_header *) 1593 ((uint8_t *)attr + sizeof(struct ibv_flow_attr)); 1594 1595 if (!attr) 1596 return; 1597 for (i = 0; i != attr->num_of_specs; ++i) { 1598 if (hdr->type == search) { 1599 union { 1600 struct ibv_flow_spec_ipv4_ext *ipv4; 1601 struct ibv_flow_spec_ipv6 *ipv6; 1602 } ip; 1603 1604 switch (search) { 1605 case IBV_FLOW_SPEC_IPV4_EXT: 1606 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr; 1607 if (!ip.ipv4->val.proto) { 1608 ip.ipv4->val.proto = protocol; 1609 ip.ipv4->mask.proto = 0xff; 1610 } 1611 break; 1612 case IBV_FLOW_SPEC_IPV6: 1613 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr; 1614 if (!ip.ipv6->val.next_hdr) { 1615 ip.ipv6->val.next_hdr = protocol; 1616 ip.ipv6->mask.next_hdr = 0xff; 1617 } 1618 break; 1619 default: 1620 break; 1621 } 1622 break; 1623 } 1624 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size); 1625 } 1626 } 1627 1628 /** 1629 * Convert the @p item into a Verbs specification after ensuring the NIC 1630 * will understand and process it correctly. 1631 * It will also update the previous L3 layer with the protocol value matching 1632 * the GRE. 1633 * If the necessary size for the conversion is greater than the @p flow_size, 1634 * nothing is written in @p flow, the validation is still performed. 1635 * 1636 * @param dev 1637 * Pointer to Ethernet device. 1638 * @param[in] item 1639 * Item specification. 1640 * @param[in, out] flow 1641 * Pointer to flow structure. 1642 * @param[in] flow_size 1643 * Size in bytes of the available space in @p flow, if too small, nothing is 1644 * written. 1645 * @param[out] error 1646 * Pointer to error structure. 1647 * 1648 * @return 1649 * On success the number of bytes consumed/necessary, if the returned value 1650 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1651 * otherwise another call with this returned memory size should be done. 1652 * On error, a negative errno value is returned and rte_errno is set. 1653 */ 1654 static int 1655 mlx5_flow_item_gre(const struct rte_flow_item *item, 1656 struct rte_flow *flow, const size_t flow_size, 1657 struct rte_flow_error *error) 1658 { 1659 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 1660 const struct rte_flow_item_gre *spec = item->spec; 1661 const struct rte_flow_item_gre *mask = item->mask; 1662 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 1663 unsigned int size = sizeof(struct ibv_flow_spec_gre); 1664 struct ibv_flow_spec_gre tunnel = { 1665 .type = IBV_FLOW_SPEC_GRE, 1666 .size = size, 1667 }; 1668 #else 1669 unsigned int size = sizeof(struct ibv_flow_spec_tunnel); 1670 struct ibv_flow_spec_tunnel tunnel = { 1671 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL, 1672 .size = size, 1673 }; 1674 #endif 1675 int ret; 1676 1677 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_GRE) 1678 return rte_flow_error_set(error, ENOTSUP, 1679 RTE_FLOW_ERROR_TYPE_ITEM, 1680 item, 1681 "protocol filtering not compatible" 1682 " with this GRE layer"); 1683 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL) 1684 return rte_flow_error_set(error, ENOTSUP, 1685 RTE_FLOW_ERROR_TYPE_ITEM, 1686 item, 1687 "a tunnel is already present"); 1688 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3)) 1689 return rte_flow_error_set(error, ENOTSUP, 1690 RTE_FLOW_ERROR_TYPE_ITEM, 1691 item, 1692 "L3 Layer is missing"); 1693 if (!mask) 1694 mask = &rte_flow_item_gre_mask; 1695 ret = mlx5_flow_item_acceptable 1696 (item, (const uint8_t *)mask, 1697 (const uint8_t *)&rte_flow_item_gre_mask, 1698 sizeof(struct rte_flow_item_gre), error); 1699 if (ret < 0) 1700 return ret; 1701 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 1702 if (spec) { 1703 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver; 1704 tunnel.val.protocol = spec->protocol; 1705 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver; 1706 tunnel.mask.protocol = mask->protocol; 1707 /* Remove unwanted bits from values. */ 1708 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver; 1709 tunnel.val.protocol &= tunnel.mask.protocol; 1710 tunnel.val.key &= tunnel.mask.key; 1711 } 1712 #else 1713 if (spec && (spec->protocol & mask->protocol)) 1714 return rte_flow_error_set(error, ENOTSUP, 1715 RTE_FLOW_ERROR_TYPE_ITEM, 1716 item, 1717 "without MPLS support the" 1718 " specification cannot be used for" 1719 " filtering"); 1720 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */ 1721 if (size <= flow_size) { 1722 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3_IPV4) 1723 mlx5_flow_item_gre_ip_protocol_update 1724 (verbs->attr, IBV_FLOW_SPEC_IPV4_EXT, 1725 MLX5_IP_PROTOCOL_GRE); 1726 else 1727 mlx5_flow_item_gre_ip_protocol_update 1728 (verbs->attr, IBV_FLOW_SPEC_IPV6, 1729 MLX5_IP_PROTOCOL_GRE); 1730 mlx5_flow_spec_verbs_add(flow, &tunnel, size); 1731 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1732 } 1733 flow->layers |= MLX5_FLOW_LAYER_GRE; 1734 return size; 1735 } 1736 1737 /** 1738 * Convert the @p item into a Verbs specification after ensuring the NIC 1739 * will understand and process it correctly. 1740 * If the necessary size for the conversion is greater than the @p flow_size, 1741 * nothing is written in @p flow, the validation is still performed. 1742 * 1743 * @param[in] item 1744 * Item specification. 1745 * @param[in, out] flow 1746 * Pointer to flow structure. 1747 * @param[in] flow_size 1748 * Size in bytes of the available space in @p flow, if too small, nothing is 1749 * written. 1750 * @param[out] error 1751 * Pointer to error structure. 1752 * 1753 * @return 1754 * On success the number of bytes consumed/necessary, if the returned value 1755 * is lesser or equal to @p flow_size, the @p item has fully been converted, 1756 * otherwise another call with this returned memory size should be done. 1757 * On error, a negative errno value is returned and rte_errno is set. 1758 */ 1759 static int 1760 mlx5_flow_item_mpls(const struct rte_flow_item *item __rte_unused, 1761 struct rte_flow *flow __rte_unused, 1762 const size_t flow_size __rte_unused, 1763 struct rte_flow_error *error) 1764 { 1765 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 1766 const struct rte_flow_item_mpls *spec = item->spec; 1767 const struct rte_flow_item_mpls *mask = item->mask; 1768 unsigned int size = sizeof(struct ibv_flow_spec_mpls); 1769 struct ibv_flow_spec_mpls mpls = { 1770 .type = IBV_FLOW_SPEC_MPLS, 1771 .size = size, 1772 }; 1773 int ret; 1774 1775 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_MPLS) 1776 return rte_flow_error_set(error, ENOTSUP, 1777 RTE_FLOW_ERROR_TYPE_ITEM, 1778 item, 1779 "protocol filtering not compatible" 1780 " with MPLS layer"); 1781 /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */ 1782 if (flow->layers & MLX5_FLOW_LAYER_TUNNEL && 1783 (flow->layers & MLX5_FLOW_LAYER_GRE) != MLX5_FLOW_LAYER_GRE) 1784 return rte_flow_error_set(error, ENOTSUP, 1785 RTE_FLOW_ERROR_TYPE_ITEM, 1786 item, 1787 "a tunnel is already" 1788 " present"); 1789 if (!mask) 1790 mask = &rte_flow_item_mpls_mask; 1791 ret = mlx5_flow_item_acceptable 1792 (item, (const uint8_t *)mask, 1793 (const uint8_t *)&rte_flow_item_mpls_mask, 1794 sizeof(struct rte_flow_item_mpls), error); 1795 if (ret < 0) 1796 return ret; 1797 if (spec) { 1798 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label)); 1799 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label)); 1800 /* Remove unwanted bits from values. */ 1801 mpls.val.label &= mpls.mask.label; 1802 } 1803 if (size <= flow_size) { 1804 mlx5_flow_spec_verbs_add(flow, &mpls, size); 1805 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2; 1806 } 1807 flow->layers |= MLX5_FLOW_LAYER_MPLS; 1808 return size; 1809 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */ 1810 return rte_flow_error_set(error, ENOTSUP, 1811 RTE_FLOW_ERROR_TYPE_ITEM, 1812 item, 1813 "MPLS is not supported by Verbs, please" 1814 " update."); 1815 } 1816 1817 /** 1818 * Convert the @p pattern into a Verbs specifications after ensuring the NIC 1819 * will understand and process it correctly. 1820 * The conversion is performed item per item, each of them is written into 1821 * the @p flow if its size is lesser or equal to @p flow_size. 1822 * Validation and memory consumption computation are still performed until the 1823 * end of @p pattern, unless an error is encountered. 1824 * 1825 * @param[in] pattern 1826 * Flow pattern. 1827 * @param[in, out] flow 1828 * Pointer to the rte_flow structure. 1829 * @param[in] flow_size 1830 * Size in bytes of the available space in @p flow, if too small some 1831 * garbage may be present. 1832 * @param[out] error 1833 * Pointer to error structure. 1834 * 1835 * @return 1836 * On success the number of bytes consumed/necessary, if the returned value 1837 * is lesser or equal to @p flow_size, the @pattern has fully been 1838 * converted, otherwise another call with this returned memory size should 1839 * be done. 1840 * On error, a negative errno value is returned and rte_errno is set. 1841 */ 1842 static int 1843 mlx5_flow_items(struct rte_eth_dev *dev, 1844 const struct rte_flow_item pattern[], 1845 struct rte_flow *flow, const size_t flow_size, 1846 struct rte_flow_error *error) 1847 { 1848 int remain = flow_size; 1849 size_t size = 0; 1850 1851 for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) { 1852 int ret = 0; 1853 1854 switch (pattern->type) { 1855 case RTE_FLOW_ITEM_TYPE_VOID: 1856 break; 1857 case RTE_FLOW_ITEM_TYPE_ETH: 1858 ret = mlx5_flow_item_eth(pattern, flow, remain, error); 1859 break; 1860 case RTE_FLOW_ITEM_TYPE_VLAN: 1861 ret = mlx5_flow_item_vlan(pattern, flow, remain, error); 1862 break; 1863 case RTE_FLOW_ITEM_TYPE_IPV4: 1864 ret = mlx5_flow_item_ipv4(pattern, flow, remain, error); 1865 break; 1866 case RTE_FLOW_ITEM_TYPE_IPV6: 1867 ret = mlx5_flow_item_ipv6(pattern, flow, remain, error); 1868 break; 1869 case RTE_FLOW_ITEM_TYPE_UDP: 1870 ret = mlx5_flow_item_udp(pattern, flow, remain, error); 1871 break; 1872 case RTE_FLOW_ITEM_TYPE_TCP: 1873 ret = mlx5_flow_item_tcp(pattern, flow, remain, error); 1874 break; 1875 case RTE_FLOW_ITEM_TYPE_VXLAN: 1876 ret = mlx5_flow_item_vxlan(pattern, flow, remain, 1877 error); 1878 break; 1879 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: 1880 ret = mlx5_flow_item_vxlan_gpe(dev, pattern, flow, 1881 remain, error); 1882 break; 1883 case RTE_FLOW_ITEM_TYPE_GRE: 1884 ret = mlx5_flow_item_gre(pattern, flow, remain, error); 1885 break; 1886 case RTE_FLOW_ITEM_TYPE_MPLS: 1887 ret = mlx5_flow_item_mpls(pattern, flow, remain, error); 1888 break; 1889 default: 1890 return rte_flow_error_set(error, ENOTSUP, 1891 RTE_FLOW_ERROR_TYPE_ITEM, 1892 pattern, 1893 "item not supported"); 1894 } 1895 if (ret < 0) 1896 return ret; 1897 if (remain > ret) 1898 remain -= ret; 1899 else 1900 remain = 0; 1901 size += ret; 1902 } 1903 if (!flow->layers) { 1904 const struct rte_flow_item item = { 1905 .type = RTE_FLOW_ITEM_TYPE_ETH, 1906 }; 1907 1908 return mlx5_flow_item_eth(&item, flow, flow_size, error); 1909 } 1910 return size; 1911 } 1912 1913 /** 1914 * Convert the @p action into a Verbs specification after ensuring the NIC 1915 * will understand and process it correctly. 1916 * If the necessary size for the conversion is greater than the @p flow_size, 1917 * nothing is written in @p flow, the validation is still performed. 1918 * 1919 * @param[in] action 1920 * Action configuration. 1921 * @param[in, out] flow 1922 * Pointer to flow structure. 1923 * @param[in] flow_size 1924 * Size in bytes of the available space in @p flow, if too small, nothing is 1925 * written. 1926 * @param[out] error 1927 * Pointer to error structure. 1928 * 1929 * @return 1930 * On success the number of bytes consumed/necessary, if the returned value 1931 * is lesser or equal to @p flow_size, the @p action has fully been 1932 * converted, otherwise another call with this returned memory size should 1933 * be done. 1934 * On error, a negative errno value is returned and rte_errno is set. 1935 */ 1936 static int 1937 mlx5_flow_action_drop(const struct rte_flow_action *action, 1938 struct rte_flow *flow, const size_t flow_size, 1939 struct rte_flow_error *error) 1940 { 1941 unsigned int size = sizeof(struct ibv_flow_spec_action_drop); 1942 struct ibv_flow_spec_action_drop drop = { 1943 .type = IBV_FLOW_SPEC_ACTION_DROP, 1944 .size = size, 1945 }; 1946 1947 if (flow->fate) 1948 return rte_flow_error_set(error, ENOTSUP, 1949 RTE_FLOW_ERROR_TYPE_ACTION, 1950 action, 1951 "multiple fate actions are not" 1952 " supported"); 1953 if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) 1954 return rte_flow_error_set(error, ENOTSUP, 1955 RTE_FLOW_ERROR_TYPE_ACTION, 1956 action, 1957 "drop is not compatible with" 1958 " flag/mark action"); 1959 if (size < flow_size) 1960 mlx5_flow_spec_verbs_add(flow, &drop, size); 1961 flow->fate |= MLX5_FLOW_FATE_DROP; 1962 return size; 1963 } 1964 1965 /** 1966 * Convert the @p action into @p flow after ensuring the NIC will understand 1967 * and process it correctly. 1968 * 1969 * @param[in] dev 1970 * Pointer to Ethernet device structure. 1971 * @param[in] action 1972 * Action configuration. 1973 * @param[in, out] flow 1974 * Pointer to flow structure. 1975 * @param[out] error 1976 * Pointer to error structure. 1977 * 1978 * @return 1979 * 0 on success, a negative errno value otherwise and rte_errno is set. 1980 */ 1981 static int 1982 mlx5_flow_action_queue(struct rte_eth_dev *dev, 1983 const struct rte_flow_action *action, 1984 struct rte_flow *flow, 1985 struct rte_flow_error *error) 1986 { 1987 struct priv *priv = dev->data->dev_private; 1988 const struct rte_flow_action_queue *queue = action->conf; 1989 1990 if (flow->fate) 1991 return rte_flow_error_set(error, ENOTSUP, 1992 RTE_FLOW_ERROR_TYPE_ACTION, 1993 action, 1994 "multiple fate actions are not" 1995 " supported"); 1996 if (queue->index >= priv->rxqs_n) 1997 return rte_flow_error_set(error, EINVAL, 1998 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1999 &queue->index, 2000 "queue index out of range"); 2001 if (!(*priv->rxqs)[queue->index]) 2002 return rte_flow_error_set(error, EINVAL, 2003 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2004 &queue->index, 2005 "queue is not configured"); 2006 if (flow->queue) 2007 (*flow->queue)[0] = queue->index; 2008 flow->rss.queue_num = 1; 2009 flow->fate |= MLX5_FLOW_FATE_QUEUE; 2010 return 0; 2011 } 2012 2013 /** 2014 * Ensure the @p action will be understood and used correctly by the NIC. 2015 * 2016 * @param dev 2017 * Pointer to Ethernet device structure. 2018 * @param action[in] 2019 * Pointer to flow actions array. 2020 * @param flow[in, out] 2021 * Pointer to the rte_flow structure. 2022 * @param error[in, out] 2023 * Pointer to error structure. 2024 * 2025 * @return 2026 * On success @p flow->queue array and @p flow->rss are filled and valid. 2027 * On error, a negative errno value is returned and rte_errno is set. 2028 */ 2029 static int 2030 mlx5_flow_action_rss(struct rte_eth_dev *dev, 2031 const struct rte_flow_action *action, 2032 struct rte_flow *flow, 2033 struct rte_flow_error *error) 2034 { 2035 struct priv *priv = dev->data->dev_private; 2036 const struct rte_flow_action_rss *rss = action->conf; 2037 unsigned int i; 2038 2039 if (flow->fate) 2040 return rte_flow_error_set(error, ENOTSUP, 2041 RTE_FLOW_ERROR_TYPE_ACTION, 2042 action, 2043 "multiple fate actions are not" 2044 " supported"); 2045 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT && 2046 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) 2047 return rte_flow_error_set(error, ENOTSUP, 2048 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2049 &rss->func, 2050 "RSS hash function not supported"); 2051 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 2052 if (rss->level > 2) 2053 #else 2054 if (rss->level > 1) 2055 #endif 2056 return rte_flow_error_set(error, ENOTSUP, 2057 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2058 &rss->level, 2059 "tunnel RSS is not supported"); 2060 if (rss->key_len < MLX5_RSS_HASH_KEY_LEN) 2061 return rte_flow_error_set(error, ENOTSUP, 2062 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2063 &rss->key_len, 2064 "RSS hash key too small"); 2065 if (rss->key_len > MLX5_RSS_HASH_KEY_LEN) 2066 return rte_flow_error_set(error, ENOTSUP, 2067 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2068 &rss->key_len, 2069 "RSS hash key too large"); 2070 if (!rss->queue_num) 2071 return rte_flow_error_set(error, ENOTSUP, 2072 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2073 rss, 2074 "no queues were provided for RSS"); 2075 if (rss->queue_num > priv->config.ind_table_max_size) 2076 return rte_flow_error_set(error, ENOTSUP, 2077 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2078 &rss->queue_num, 2079 "number of queues too large"); 2080 if (rss->types & MLX5_RSS_HF_MASK) 2081 return rte_flow_error_set(error, ENOTSUP, 2082 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2083 &rss->types, 2084 "some RSS protocols are not" 2085 " supported"); 2086 for (i = 0; i != rss->queue_num; ++i) { 2087 if (rss->queue[i] >= priv->rxqs_n) 2088 return rte_flow_error_set 2089 (error, EINVAL, 2090 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2091 rss, 2092 "queue index out of range"); 2093 if (!(*priv->rxqs)[rss->queue[i]]) 2094 return rte_flow_error_set 2095 (error, EINVAL, 2096 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2097 &rss->queue[i], 2098 "queue is not configured"); 2099 } 2100 if (flow->queue) 2101 memcpy((*flow->queue), rss->queue, 2102 rss->queue_num * sizeof(uint16_t)); 2103 flow->rss.queue_num = rss->queue_num; 2104 memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN); 2105 flow->rss.types = rss->types; 2106 flow->rss.level = rss->level; 2107 flow->fate |= MLX5_FLOW_FATE_RSS; 2108 return 0; 2109 } 2110 2111 /** 2112 * Convert the @p action into a Verbs specification after ensuring the NIC 2113 * will understand and process it correctly. 2114 * If the necessary size for the conversion is greater than the @p flow_size, 2115 * nothing is written in @p flow, the validation is still performed. 2116 * 2117 * @param[in] action 2118 * Action configuration. 2119 * @param[in, out] flow 2120 * Pointer to flow structure. 2121 * @param[in] flow_size 2122 * Size in bytes of the available space in @p flow, if too small, nothing is 2123 * written. 2124 * @param[out] error 2125 * Pointer to error structure. 2126 * 2127 * @return 2128 * On success the number of bytes consumed/necessary, if the returned value 2129 * is lesser or equal to @p flow_size, the @p action has fully been 2130 * converted, otherwise another call with this returned memory size should 2131 * be done. 2132 * On error, a negative errno value is returned and rte_errno is set. 2133 */ 2134 static int 2135 mlx5_flow_action_flag(const struct rte_flow_action *action, 2136 struct rte_flow *flow, const size_t flow_size, 2137 struct rte_flow_error *error) 2138 { 2139 unsigned int size = sizeof(struct ibv_flow_spec_action_tag); 2140 struct ibv_flow_spec_action_tag tag = { 2141 .type = IBV_FLOW_SPEC_ACTION_TAG, 2142 .size = size, 2143 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT), 2144 }; 2145 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 2146 2147 if (flow->modifier & MLX5_FLOW_MOD_FLAG) 2148 return rte_flow_error_set(error, ENOTSUP, 2149 RTE_FLOW_ERROR_TYPE_ACTION, 2150 action, 2151 "flag action already present"); 2152 if (flow->fate & MLX5_FLOW_FATE_DROP) 2153 return rte_flow_error_set(error, ENOTSUP, 2154 RTE_FLOW_ERROR_TYPE_ACTION, 2155 action, 2156 "flag is not compatible with drop" 2157 " action"); 2158 if (flow->modifier & MLX5_FLOW_MOD_MARK) 2159 size = 0; 2160 else if (size <= flow_size && verbs) 2161 mlx5_flow_spec_verbs_add(flow, &tag, size); 2162 flow->modifier |= MLX5_FLOW_MOD_FLAG; 2163 return size; 2164 } 2165 2166 /** 2167 * Update verbs specification to modify the flag to mark. 2168 * 2169 * @param[in, out] verbs 2170 * Pointer to the mlx5_flow_verbs structure. 2171 * @param[in] mark_id 2172 * Mark identifier to replace the flag. 2173 */ 2174 static void 2175 mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id) 2176 { 2177 struct ibv_spec_header *hdr; 2178 int i; 2179 2180 if (!verbs) 2181 return; 2182 /* Update Verbs specification. */ 2183 hdr = (struct ibv_spec_header *)verbs->specs; 2184 if (!hdr) 2185 return; 2186 for (i = 0; i != verbs->attr->num_of_specs; ++i) { 2187 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) { 2188 struct ibv_flow_spec_action_tag *t = 2189 (struct ibv_flow_spec_action_tag *)hdr; 2190 2191 t->tag_id = mlx5_flow_mark_set(mark_id); 2192 } 2193 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size); 2194 } 2195 } 2196 2197 /** 2198 * Convert the @p action into @p flow (or by updating the already present 2199 * Flag Verbs specification) after ensuring the NIC will understand and 2200 * process it correctly. 2201 * If the necessary size for the conversion is greater than the @p flow_size, 2202 * nothing is written in @p flow, the validation is still performed. 2203 * 2204 * @param[in] action 2205 * Action configuration. 2206 * @param[in, out] flow 2207 * Pointer to flow structure. 2208 * @param[in] flow_size 2209 * Size in bytes of the available space in @p flow, if too small, nothing is 2210 * written. 2211 * @param[out] error 2212 * Pointer to error structure. 2213 * 2214 * @return 2215 * On success the number of bytes consumed/necessary, if the returned value 2216 * is lesser or equal to @p flow_size, the @p action has fully been 2217 * converted, otherwise another call with this returned memory size should 2218 * be done. 2219 * On error, a negative errno value is returned and rte_errno is set. 2220 */ 2221 static int 2222 mlx5_flow_action_mark(const struct rte_flow_action *action, 2223 struct rte_flow *flow, const size_t flow_size, 2224 struct rte_flow_error *error) 2225 { 2226 const struct rte_flow_action_mark *mark = action->conf; 2227 unsigned int size = sizeof(struct ibv_flow_spec_action_tag); 2228 struct ibv_flow_spec_action_tag tag = { 2229 .type = IBV_FLOW_SPEC_ACTION_TAG, 2230 .size = size, 2231 }; 2232 struct mlx5_flow_verbs *verbs = flow->cur_verbs; 2233 2234 if (!mark) 2235 return rte_flow_error_set(error, EINVAL, 2236 RTE_FLOW_ERROR_TYPE_ACTION, 2237 action, 2238 "configuration cannot be null"); 2239 if (mark->id >= MLX5_FLOW_MARK_MAX) 2240 return rte_flow_error_set(error, EINVAL, 2241 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 2242 &mark->id, 2243 "mark id must in 0 <= id < " 2244 RTE_STR(MLX5_FLOW_MARK_MAX)); 2245 if (flow->modifier & MLX5_FLOW_MOD_MARK) 2246 return rte_flow_error_set(error, ENOTSUP, 2247 RTE_FLOW_ERROR_TYPE_ACTION, 2248 action, 2249 "mark action already present"); 2250 if (flow->fate & MLX5_FLOW_FATE_DROP) 2251 return rte_flow_error_set(error, ENOTSUP, 2252 RTE_FLOW_ERROR_TYPE_ACTION, 2253 action, 2254 "mark is not compatible with drop" 2255 " action"); 2256 if (flow->modifier & MLX5_FLOW_MOD_FLAG) { 2257 mlx5_flow_verbs_mark_update(verbs, mark->id); 2258 size = 0; 2259 } else if (size <= flow_size) { 2260 tag.tag_id = mlx5_flow_mark_set(mark->id); 2261 mlx5_flow_spec_verbs_add(flow, &tag, size); 2262 } 2263 flow->modifier |= MLX5_FLOW_MOD_MARK; 2264 return size; 2265 } 2266 2267 /** 2268 * Convert the @p action into a Verbs specification after ensuring the NIC 2269 * will understand and process it correctly. 2270 * If the necessary size for the conversion is greater than the @p flow_size, 2271 * nothing is written in @p flow, the validation is still performed. 2272 * 2273 * @param action[in] 2274 * Action configuration. 2275 * @param flow[in, out] 2276 * Pointer to flow structure. 2277 * @param flow_size[in] 2278 * Size in bytes of the available space in @p flow, if too small, nothing is 2279 * written. 2280 * @param error[int, out] 2281 * Pointer to error structure. 2282 * 2283 * @return 2284 * On success the number of bytes consumed/necessary, if the returned value 2285 * is lesser or equal to @p flow_size, the @p action has fully been 2286 * converted, otherwise another call with this returned memory size should 2287 * be done. 2288 * On error, a negative errno value is returned and rte_errno is set. 2289 */ 2290 static int 2291 mlx5_flow_action_count(struct rte_eth_dev *dev, 2292 const struct rte_flow_action *action, 2293 struct rte_flow *flow, 2294 const size_t flow_size __rte_unused, 2295 struct rte_flow_error *error) 2296 { 2297 const struct rte_flow_action_count *count = action->conf; 2298 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 2299 unsigned int size = sizeof(struct ibv_flow_spec_counter_action); 2300 struct ibv_flow_spec_counter_action counter = { 2301 .type = IBV_FLOW_SPEC_ACTION_COUNT, 2302 .size = size, 2303 }; 2304 #endif 2305 2306 if (!flow->counter) { 2307 flow->counter = mlx5_flow_counter_new(dev, count->shared, 2308 count->id); 2309 if (!flow->counter) 2310 return rte_flow_error_set(error, ENOTSUP, 2311 RTE_FLOW_ERROR_TYPE_ACTION, 2312 action, 2313 "cannot get counter" 2314 " context."); 2315 } 2316 if (!((struct priv *)dev->data->dev_private)->config.flow_counter_en) 2317 return rte_flow_error_set(error, ENOTSUP, 2318 RTE_FLOW_ERROR_TYPE_ACTION, 2319 action, 2320 "flow counters are not supported."); 2321 flow->modifier |= MLX5_FLOW_MOD_COUNT; 2322 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 2323 counter.counter_set_handle = flow->counter->cs->handle; 2324 if (size <= flow_size) 2325 mlx5_flow_spec_verbs_add(flow, &counter, size); 2326 return size; 2327 #endif 2328 return 0; 2329 } 2330 2331 /** 2332 * Convert the @p action into @p flow after ensuring the NIC will understand 2333 * and process it correctly. 2334 * The conversion is performed action per action, each of them is written into 2335 * the @p flow if its size is lesser or equal to @p flow_size. 2336 * Validation and memory consumption computation are still performed until the 2337 * end of @p action, unless an error is encountered. 2338 * 2339 * @param[in] dev 2340 * Pointer to Ethernet device structure. 2341 * @param[in] actions 2342 * Pointer to flow actions array. 2343 * @param[in, out] flow 2344 * Pointer to the rte_flow structure. 2345 * @param[in] flow_size 2346 * Size in bytes of the available space in @p flow, if too small some 2347 * garbage may be present. 2348 * @param[out] error 2349 * Pointer to error structure. 2350 * 2351 * @return 2352 * On success the number of bytes consumed/necessary, if the returned value 2353 * is lesser or equal to @p flow_size, the @p actions has fully been 2354 * converted, otherwise another call with this returned memory size should 2355 * be done. 2356 * On error, a negative errno value is returned and rte_errno is set. 2357 */ 2358 static int 2359 mlx5_flow_actions(struct rte_eth_dev *dev, 2360 const struct rte_flow_action actions[], 2361 struct rte_flow *flow, const size_t flow_size, 2362 struct rte_flow_error *error) 2363 { 2364 size_t size = 0; 2365 int remain = flow_size; 2366 int ret = 0; 2367 2368 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2369 switch (actions->type) { 2370 case RTE_FLOW_ACTION_TYPE_VOID: 2371 break; 2372 case RTE_FLOW_ACTION_TYPE_FLAG: 2373 ret = mlx5_flow_action_flag(actions, flow, remain, 2374 error); 2375 break; 2376 case RTE_FLOW_ACTION_TYPE_MARK: 2377 ret = mlx5_flow_action_mark(actions, flow, remain, 2378 error); 2379 break; 2380 case RTE_FLOW_ACTION_TYPE_DROP: 2381 ret = mlx5_flow_action_drop(actions, flow, remain, 2382 error); 2383 break; 2384 case RTE_FLOW_ACTION_TYPE_QUEUE: 2385 ret = mlx5_flow_action_queue(dev, actions, flow, error); 2386 break; 2387 case RTE_FLOW_ACTION_TYPE_RSS: 2388 ret = mlx5_flow_action_rss(dev, actions, flow, error); 2389 break; 2390 case RTE_FLOW_ACTION_TYPE_COUNT: 2391 ret = mlx5_flow_action_count(dev, actions, flow, remain, 2392 error); 2393 break; 2394 default: 2395 return rte_flow_error_set(error, ENOTSUP, 2396 RTE_FLOW_ERROR_TYPE_ACTION, 2397 actions, 2398 "action not supported"); 2399 } 2400 if (ret < 0) 2401 return ret; 2402 if (remain > ret) 2403 remain -= ret; 2404 else 2405 remain = 0; 2406 size += ret; 2407 } 2408 if (!flow->fate) 2409 return rte_flow_error_set(error, ENOTSUP, 2410 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2411 NULL, 2412 "no fate action found"); 2413 return size; 2414 } 2415 2416 /** 2417 * Validate flow rule and fill flow structure accordingly. 2418 * 2419 * @param dev 2420 * Pointer to Ethernet device. 2421 * @param[out] flow 2422 * Pointer to flow structure. 2423 * @param flow_size 2424 * Size of allocated space for @p flow. 2425 * @param[in] attr 2426 * Flow rule attributes. 2427 * @param[in] pattern 2428 * Pattern specification (list terminated by the END pattern item). 2429 * @param[in] actions 2430 * Associated actions (list terminated by the END action). 2431 * @param[out] error 2432 * Perform verbose error reporting if not NULL. 2433 * 2434 * @return 2435 * A positive value representing the size of the flow object in bytes 2436 * regardless of @p flow_size on success, a negative errno value otherwise 2437 * and rte_errno is set. 2438 */ 2439 static int 2440 mlx5_flow_merge_switch(struct rte_eth_dev *dev, 2441 struct rte_flow *flow, 2442 size_t flow_size, 2443 const struct rte_flow_attr *attr, 2444 const struct rte_flow_item pattern[], 2445 const struct rte_flow_action actions[], 2446 struct rte_flow_error *error) 2447 { 2448 unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0); 2449 uint16_t port_id[!n + n]; 2450 struct mlx5_nl_flow_ptoi ptoi[!n + n + 1]; 2451 size_t off = RTE_ALIGN_CEIL(sizeof(*flow), alignof(max_align_t)); 2452 unsigned int i; 2453 unsigned int own = 0; 2454 int ret; 2455 2456 /* At least one port is needed when no switch domain is present. */ 2457 if (!n) { 2458 n = 1; 2459 port_id[0] = dev->data->port_id; 2460 } else { 2461 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n); 2462 } 2463 for (i = 0; i != n; ++i) { 2464 struct rte_eth_dev_info dev_info; 2465 2466 rte_eth_dev_info_get(port_id[i], &dev_info); 2467 if (port_id[i] == dev->data->port_id) 2468 own = i; 2469 ptoi[i].port_id = port_id[i]; 2470 ptoi[i].ifindex = dev_info.if_index; 2471 } 2472 /* Ensure first entry of ptoi[] is the current device. */ 2473 if (own) { 2474 ptoi[n] = ptoi[0]; 2475 ptoi[0] = ptoi[own]; 2476 ptoi[own] = ptoi[n]; 2477 } 2478 /* An entry with zero ifindex terminates ptoi[]. */ 2479 ptoi[n].port_id = 0; 2480 ptoi[n].ifindex = 0; 2481 if (flow_size < off) 2482 flow_size = 0; 2483 ret = mlx5_nl_flow_transpose((uint8_t *)flow + off, 2484 flow_size ? flow_size - off : 0, 2485 ptoi, attr, pattern, actions, error); 2486 if (ret < 0) 2487 return ret; 2488 if (flow_size) { 2489 *flow = (struct rte_flow){ 2490 .attributes = *attr, 2491 .nl_flow = (uint8_t *)flow + off, 2492 }; 2493 /* 2494 * Generate a reasonably unique handle based on the address 2495 * of the target buffer. 2496 * 2497 * This is straightforward on 32-bit systems where the flow 2498 * pointer can be used directly. Otherwise, its least 2499 * significant part is taken after shifting it by the 2500 * previous power of two of the pointed buffer size. 2501 */ 2502 if (sizeof(flow) <= 4) 2503 mlx5_nl_flow_brand(flow->nl_flow, (uintptr_t)flow); 2504 else 2505 mlx5_nl_flow_brand 2506 (flow->nl_flow, 2507 (uintptr_t)flow >> 2508 rte_log2_u32(rte_align32prevpow2(flow_size))); 2509 } 2510 return off + ret; 2511 } 2512 2513 static unsigned int 2514 mlx5_find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level) 2515 { 2516 const struct rte_flow_item *item; 2517 unsigned int has_vlan = 0; 2518 2519 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 2520 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { 2521 has_vlan = 1; 2522 break; 2523 } 2524 } 2525 if (has_vlan) 2526 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN : 2527 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN; 2528 return rss_level < 2 ? MLX5_EXPANSION_ROOT : 2529 MLX5_EXPANSION_ROOT_OUTER; 2530 } 2531 2532 /** 2533 * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC 2534 * after ensuring the NIC will understand and process it correctly. 2535 * The conversion is only performed item/action per item/action, each of 2536 * them is written into the @p flow if its size is lesser or equal to @p 2537 * flow_size. 2538 * Validation and memory consumption computation are still performed until the 2539 * end, unless an error is encountered. 2540 * 2541 * @param[in] dev 2542 * Pointer to Ethernet device. 2543 * @param[in, out] flow 2544 * Pointer to flow structure. 2545 * @param[in] flow_size 2546 * Size in bytes of the available space in @p flow, if too small some 2547 * garbage may be present. 2548 * @param[in] attributes 2549 * Flow rule attributes. 2550 * @param[in] pattern 2551 * Pattern specification (list terminated by the END pattern item). 2552 * @param[in] actions 2553 * Associated actions (list terminated by the END action). 2554 * @param[out] error 2555 * Perform verbose error reporting if not NULL. 2556 * 2557 * @return 2558 * On success the number of bytes consumed/necessary, if the returned value 2559 * is lesser or equal to @p flow_size, the flow has fully been converted and 2560 * can be applied, otherwise another call with this returned memory size 2561 * should be done. 2562 * On error, a negative errno value is returned and rte_errno is set. 2563 */ 2564 static int 2565 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow, 2566 const size_t flow_size, 2567 const struct rte_flow_attr *attributes, 2568 const struct rte_flow_item pattern[], 2569 const struct rte_flow_action actions[], 2570 struct rte_flow_error *error) 2571 { 2572 struct rte_flow local_flow = { .layers = 0, }; 2573 size_t size = sizeof(*flow); 2574 union { 2575 struct rte_flow_expand_rss buf; 2576 uint8_t buffer[2048]; 2577 } expand_buffer; 2578 struct rte_flow_expand_rss *buf = &expand_buffer.buf; 2579 struct mlx5_flow_verbs *original_verbs = NULL; 2580 size_t original_verbs_size = 0; 2581 uint32_t original_layers = 0; 2582 int expanded_pattern_idx = 0; 2583 int ret; 2584 uint32_t i; 2585 2586 if (attributes->transfer) 2587 return mlx5_flow_merge_switch(dev, flow, flow_size, 2588 attributes, pattern, 2589 actions, error); 2590 if (size > flow_size) 2591 flow = &local_flow; 2592 ret = mlx5_flow_attributes(dev, attributes, flow, error); 2593 if (ret < 0) 2594 return ret; 2595 ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error); 2596 if (ret < 0) 2597 return ret; 2598 if (local_flow.rss.types) { 2599 unsigned int graph_root; 2600 2601 graph_root = mlx5_find_graph_root(pattern, 2602 local_flow.rss.level); 2603 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer), 2604 pattern, local_flow.rss.types, 2605 mlx5_support_expansion, 2606 graph_root); 2607 assert(ret > 0 && 2608 (unsigned int)ret < sizeof(expand_buffer.buffer)); 2609 } else { 2610 buf->entries = 1; 2611 buf->entry[0].pattern = (void *)(uintptr_t)pattern; 2612 } 2613 size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t), 2614 sizeof(void *)); 2615 if (size <= flow_size) 2616 flow->queue = (void *)(flow + 1); 2617 LIST_INIT(&flow->verbs); 2618 flow->layers = 0; 2619 flow->modifier = 0; 2620 flow->fate = 0; 2621 for (i = 0; i != buf->entries; ++i) { 2622 size_t off = size; 2623 size_t off2; 2624 2625 flow->layers = original_layers; 2626 size += sizeof(struct ibv_flow_attr) + 2627 sizeof(struct mlx5_flow_verbs); 2628 off2 = size; 2629 if (size < flow_size) { 2630 flow->cur_verbs = (void *)((uintptr_t)flow + off); 2631 flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1); 2632 flow->cur_verbs->specs = 2633 (void *)(flow->cur_verbs->attr + 1); 2634 } 2635 /* First iteration convert the pattern into Verbs. */ 2636 if (i == 0) { 2637 /* Actions don't need to be converted several time. */ 2638 ret = mlx5_flow_actions(dev, actions, flow, 2639 (size < flow_size) ? 2640 flow_size - size : 0, 2641 error); 2642 if (ret < 0) 2643 return ret; 2644 size += ret; 2645 } else { 2646 /* 2647 * Next iteration means the pattern has already been 2648 * converted and an expansion is necessary to match 2649 * the user RSS request. For that only the expanded 2650 * items will be converted, the common part with the 2651 * user pattern are just copied into the next buffer 2652 * zone. 2653 */ 2654 size += original_verbs_size; 2655 if (size < flow_size) { 2656 rte_memcpy(flow->cur_verbs->attr, 2657 original_verbs->attr, 2658 original_verbs_size + 2659 sizeof(struct ibv_flow_attr)); 2660 flow->cur_verbs->size = original_verbs_size; 2661 } 2662 } 2663 ret = mlx5_flow_items 2664 (dev, 2665 (const struct rte_flow_item *) 2666 &buf->entry[i].pattern[expanded_pattern_idx], 2667 flow, 2668 (size < flow_size) ? flow_size - size : 0, error); 2669 if (ret < 0) 2670 return ret; 2671 size += ret; 2672 if (size <= flow_size) { 2673 mlx5_flow_adjust_priority(dev, flow); 2674 LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next); 2675 } 2676 /* 2677 * Keep a pointer of the first verbs conversion and the layers 2678 * it has encountered. 2679 */ 2680 if (i == 0) { 2681 original_verbs = flow->cur_verbs; 2682 original_verbs_size = size - off2; 2683 original_layers = flow->layers; 2684 /* 2685 * move the index of the expanded pattern to the 2686 * first item not addressed yet. 2687 */ 2688 if (pattern->type == RTE_FLOW_ITEM_TYPE_END) { 2689 expanded_pattern_idx++; 2690 } else { 2691 const struct rte_flow_item *item = pattern; 2692 2693 for (item = pattern; 2694 item->type != RTE_FLOW_ITEM_TYPE_END; 2695 ++item) 2696 expanded_pattern_idx++; 2697 } 2698 } 2699 } 2700 /* Restore the origin layers in the flow. */ 2701 flow->layers = original_layers; 2702 return size; 2703 } 2704 2705 /** 2706 * Lookup and set the ptype in the data Rx part. A single Ptype can be used, 2707 * if several tunnel rules are used on this queue, the tunnel ptype will be 2708 * cleared. 2709 * 2710 * @param rxq_ctrl 2711 * Rx queue to update. 2712 */ 2713 static void 2714 mlx5_flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl) 2715 { 2716 unsigned int i; 2717 uint32_t tunnel_ptype = 0; 2718 2719 /* Look up for the ptype to use. */ 2720 for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) { 2721 if (!rxq_ctrl->flow_tunnels_n[i]) 2722 continue; 2723 if (!tunnel_ptype) { 2724 tunnel_ptype = tunnels_info[i].ptype; 2725 } else { 2726 tunnel_ptype = 0; 2727 break; 2728 } 2729 } 2730 rxq_ctrl->rxq.tunnel = tunnel_ptype; 2731 } 2732 2733 /** 2734 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the flow. 2735 * 2736 * @param[in] dev 2737 * Pointer to Ethernet device. 2738 * @param[in] flow 2739 * Pointer to flow structure. 2740 */ 2741 static void 2742 mlx5_flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow) 2743 { 2744 struct priv *priv = dev->data->dev_private; 2745 const int mark = !!(flow->modifier & 2746 (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)); 2747 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 2748 unsigned int i; 2749 2750 for (i = 0; i != flow->rss.queue_num; ++i) { 2751 int idx = (*flow->queue)[i]; 2752 struct mlx5_rxq_ctrl *rxq_ctrl = 2753 container_of((*priv->rxqs)[idx], 2754 struct mlx5_rxq_ctrl, rxq); 2755 2756 if (mark) { 2757 rxq_ctrl->rxq.mark = 1; 2758 rxq_ctrl->flow_mark_n++; 2759 } 2760 if (tunnel) { 2761 unsigned int j; 2762 2763 /* Increase the counter matching the flow. */ 2764 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 2765 if ((tunnels_info[j].tunnel & flow->layers) == 2766 tunnels_info[j].tunnel) { 2767 rxq_ctrl->flow_tunnels_n[j]++; 2768 break; 2769 } 2770 } 2771 mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl); 2772 } 2773 } 2774 } 2775 2776 /** 2777 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 2778 * @p flow if no other flow uses it with the same kind of request. 2779 * 2780 * @param dev 2781 * Pointer to Ethernet device. 2782 * @param[in] flow 2783 * Pointer to the flow. 2784 */ 2785 static void 2786 mlx5_flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow) 2787 { 2788 struct priv *priv = dev->data->dev_private; 2789 const int mark = !!(flow->modifier & 2790 (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)); 2791 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL); 2792 unsigned int i; 2793 2794 assert(dev->data->dev_started); 2795 for (i = 0; i != flow->rss.queue_num; ++i) { 2796 int idx = (*flow->queue)[i]; 2797 struct mlx5_rxq_ctrl *rxq_ctrl = 2798 container_of((*priv->rxqs)[idx], 2799 struct mlx5_rxq_ctrl, rxq); 2800 2801 if (mark) { 2802 rxq_ctrl->flow_mark_n--; 2803 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n; 2804 } 2805 if (tunnel) { 2806 unsigned int j; 2807 2808 /* Decrease the counter matching the flow. */ 2809 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 2810 if ((tunnels_info[j].tunnel & flow->layers) == 2811 tunnels_info[j].tunnel) { 2812 rxq_ctrl->flow_tunnels_n[j]--; 2813 break; 2814 } 2815 } 2816 mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl); 2817 } 2818 } 2819 } 2820 2821 /** 2822 * Clear the Mark/Flag and Tunnel ptype information in all Rx queues. 2823 * 2824 * @param dev 2825 * Pointer to Ethernet device. 2826 */ 2827 static void 2828 mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev) 2829 { 2830 struct priv *priv = dev->data->dev_private; 2831 unsigned int i; 2832 2833 for (i = 0; i != priv->rxqs_n; ++i) { 2834 struct mlx5_rxq_ctrl *rxq_ctrl; 2835 unsigned int j; 2836 2837 if (!(*priv->rxqs)[i]) 2838 continue; 2839 rxq_ctrl = container_of((*priv->rxqs)[i], 2840 struct mlx5_rxq_ctrl, rxq); 2841 rxq_ctrl->flow_mark_n = 0; 2842 rxq_ctrl->rxq.mark = 0; 2843 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) 2844 rxq_ctrl->flow_tunnels_n[j] = 0; 2845 rxq_ctrl->rxq.tunnel = 0; 2846 } 2847 } 2848 2849 /** 2850 * Validate a flow supported by the NIC. 2851 * 2852 * @see rte_flow_validate() 2853 * @see rte_flow_ops 2854 */ 2855 int 2856 mlx5_flow_validate(struct rte_eth_dev *dev, 2857 const struct rte_flow_attr *attr, 2858 const struct rte_flow_item items[], 2859 const struct rte_flow_action actions[], 2860 struct rte_flow_error *error) 2861 { 2862 int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error); 2863 2864 if (ret < 0) 2865 return ret; 2866 return 0; 2867 } 2868 2869 /** 2870 * Remove the flow. 2871 * 2872 * @param[in] dev 2873 * Pointer to Ethernet device. 2874 * @param[in, out] flow 2875 * Pointer to flow structure. 2876 */ 2877 static void 2878 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow) 2879 { 2880 struct priv *priv = dev->data->dev_private; 2881 struct mlx5_flow_verbs *verbs; 2882 2883 if (flow->nl_flow && priv->mnl_socket) 2884 mlx5_nl_flow_destroy(priv->mnl_socket, flow->nl_flow, NULL); 2885 LIST_FOREACH(verbs, &flow->verbs, next) { 2886 if (verbs->flow) { 2887 claim_zero(mlx5_glue->destroy_flow(verbs->flow)); 2888 verbs->flow = NULL; 2889 } 2890 if (verbs->hrxq) { 2891 if (flow->fate & MLX5_FLOW_FATE_DROP) 2892 mlx5_hrxq_drop_release(dev); 2893 else 2894 mlx5_hrxq_release(dev, verbs->hrxq); 2895 verbs->hrxq = NULL; 2896 } 2897 } 2898 if (flow->counter) { 2899 mlx5_flow_counter_release(flow->counter); 2900 flow->counter = NULL; 2901 } 2902 } 2903 2904 /** 2905 * Apply the flow. 2906 * 2907 * @param[in] dev 2908 * Pointer to Ethernet device structure. 2909 * @param[in, out] flow 2910 * Pointer to flow structure. 2911 * @param[out] error 2912 * Pointer to error structure. 2913 * 2914 * @return 2915 * 0 on success, a negative errno value otherwise and rte_errno is set. 2916 */ 2917 static int 2918 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow, 2919 struct rte_flow_error *error) 2920 { 2921 struct priv *priv = dev->data->dev_private; 2922 struct mlx5_flow_verbs *verbs; 2923 int err; 2924 2925 LIST_FOREACH(verbs, &flow->verbs, next) { 2926 if (flow->fate & MLX5_FLOW_FATE_DROP) { 2927 verbs->hrxq = mlx5_hrxq_drop_new(dev); 2928 if (!verbs->hrxq) { 2929 rte_flow_error_set 2930 (error, errno, 2931 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2932 NULL, 2933 "cannot get drop hash queue"); 2934 goto error; 2935 } 2936 } else { 2937 struct mlx5_hrxq *hrxq; 2938 2939 hrxq = mlx5_hrxq_get(dev, flow->key, 2940 MLX5_RSS_HASH_KEY_LEN, 2941 verbs->hash_fields, 2942 (*flow->queue), 2943 flow->rss.queue_num); 2944 if (!hrxq) 2945 hrxq = mlx5_hrxq_new(dev, flow->key, 2946 MLX5_RSS_HASH_KEY_LEN, 2947 verbs->hash_fields, 2948 (*flow->queue), 2949 flow->rss.queue_num, 2950 !!(flow->layers & 2951 MLX5_FLOW_LAYER_TUNNEL)); 2952 if (!hrxq) { 2953 rte_flow_error_set 2954 (error, rte_errno, 2955 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2956 NULL, 2957 "cannot get hash queue"); 2958 goto error; 2959 } 2960 verbs->hrxq = hrxq; 2961 } 2962 verbs->flow = 2963 mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr); 2964 if (!verbs->flow) { 2965 rte_flow_error_set(error, errno, 2966 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2967 NULL, 2968 "hardware refuses to create flow"); 2969 goto error; 2970 } 2971 } 2972 if (flow->nl_flow && 2973 priv->mnl_socket && 2974 mlx5_nl_flow_create(priv->mnl_socket, flow->nl_flow, error)) 2975 goto error; 2976 return 0; 2977 error: 2978 err = rte_errno; /* Save rte_errno before cleanup. */ 2979 LIST_FOREACH(verbs, &flow->verbs, next) { 2980 if (verbs->hrxq) { 2981 if (flow->fate & MLX5_FLOW_FATE_DROP) 2982 mlx5_hrxq_drop_release(dev); 2983 else 2984 mlx5_hrxq_release(dev, verbs->hrxq); 2985 verbs->hrxq = NULL; 2986 } 2987 } 2988 rte_errno = err; /* Restore rte_errno. */ 2989 return -rte_errno; 2990 } 2991 2992 /** 2993 * Create a flow and add it to @p list. 2994 * 2995 * @param dev 2996 * Pointer to Ethernet device. 2997 * @param list 2998 * Pointer to a TAILQ flow list. 2999 * @param[in] attr 3000 * Flow rule attributes. 3001 * @param[in] items 3002 * Pattern specification (list terminated by the END pattern item). 3003 * @param[in] actions 3004 * Associated actions (list terminated by the END action). 3005 * @param[out] error 3006 * Perform verbose error reporting if not NULL. 3007 * 3008 * @return 3009 * A flow on success, NULL otherwise and rte_errno is set. 3010 */ 3011 static struct rte_flow * 3012 mlx5_flow_list_create(struct rte_eth_dev *dev, 3013 struct mlx5_flows *list, 3014 const struct rte_flow_attr *attr, 3015 const struct rte_flow_item items[], 3016 const struct rte_flow_action actions[], 3017 struct rte_flow_error *error) 3018 { 3019 struct rte_flow *flow = NULL; 3020 size_t size = 0; 3021 int ret; 3022 3023 ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error); 3024 if (ret < 0) 3025 return NULL; 3026 size = ret; 3027 flow = rte_calloc(__func__, 1, size, 0); 3028 if (!flow) { 3029 rte_flow_error_set(error, ENOMEM, 3030 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3031 NULL, 3032 "not enough memory to create flow"); 3033 return NULL; 3034 } 3035 ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error); 3036 if (ret < 0) { 3037 rte_free(flow); 3038 return NULL; 3039 } 3040 assert((size_t)ret == size); 3041 if (dev->data->dev_started) { 3042 ret = mlx5_flow_apply(dev, flow, error); 3043 if (ret < 0) { 3044 ret = rte_errno; /* Save rte_errno before cleanup. */ 3045 if (flow) { 3046 mlx5_flow_remove(dev, flow); 3047 rte_free(flow); 3048 } 3049 rte_errno = ret; /* Restore rte_errno. */ 3050 return NULL; 3051 } 3052 } 3053 TAILQ_INSERT_TAIL(list, flow, next); 3054 mlx5_flow_rxq_flags_set(dev, flow); 3055 return flow; 3056 } 3057 3058 /** 3059 * Create a flow. 3060 * 3061 * @see rte_flow_create() 3062 * @see rte_flow_ops 3063 */ 3064 struct rte_flow * 3065 mlx5_flow_create(struct rte_eth_dev *dev, 3066 const struct rte_flow_attr *attr, 3067 const struct rte_flow_item items[], 3068 const struct rte_flow_action actions[], 3069 struct rte_flow_error *error) 3070 { 3071 return mlx5_flow_list_create 3072 (dev, &((struct priv *)dev->data->dev_private)->flows, 3073 attr, items, actions, error); 3074 } 3075 3076 /** 3077 * Destroy a flow in a list. 3078 * 3079 * @param dev 3080 * Pointer to Ethernet device. 3081 * @param list 3082 * Pointer to a TAILQ flow list. 3083 * @param[in] flow 3084 * Flow to destroy. 3085 */ 3086 static void 3087 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list, 3088 struct rte_flow *flow) 3089 { 3090 mlx5_flow_remove(dev, flow); 3091 TAILQ_REMOVE(list, flow, next); 3092 /* 3093 * Update RX queue flags only if port is started, otherwise it is 3094 * already clean. 3095 */ 3096 if (dev->data->dev_started) 3097 mlx5_flow_rxq_flags_trim(dev, flow); 3098 rte_free(flow); 3099 } 3100 3101 /** 3102 * Destroy all flows. 3103 * 3104 * @param dev 3105 * Pointer to Ethernet device. 3106 * @param list 3107 * Pointer to a TAILQ flow list. 3108 */ 3109 void 3110 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list) 3111 { 3112 while (!TAILQ_EMPTY(list)) { 3113 struct rte_flow *flow; 3114 3115 flow = TAILQ_FIRST(list); 3116 mlx5_flow_list_destroy(dev, list, flow); 3117 } 3118 } 3119 3120 /** 3121 * Remove all flows. 3122 * 3123 * @param dev 3124 * Pointer to Ethernet device. 3125 * @param list 3126 * Pointer to a TAILQ flow list. 3127 */ 3128 void 3129 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list) 3130 { 3131 struct rte_flow *flow; 3132 3133 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) 3134 mlx5_flow_remove(dev, flow); 3135 mlx5_flow_rxq_flags_clear(dev); 3136 } 3137 3138 /** 3139 * Add all flows. 3140 * 3141 * @param dev 3142 * Pointer to Ethernet device. 3143 * @param list 3144 * Pointer to a TAILQ flow list. 3145 * 3146 * @return 3147 * 0 on success, a negative errno value otherwise and rte_errno is set. 3148 */ 3149 int 3150 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list) 3151 { 3152 struct rte_flow *flow; 3153 struct rte_flow_error error; 3154 int ret = 0; 3155 3156 TAILQ_FOREACH(flow, list, next) { 3157 ret = mlx5_flow_apply(dev, flow, &error); 3158 if (ret < 0) 3159 goto error; 3160 mlx5_flow_rxq_flags_set(dev, flow); 3161 } 3162 return 0; 3163 error: 3164 ret = rte_errno; /* Save rte_errno before cleanup. */ 3165 mlx5_flow_stop(dev, list); 3166 rte_errno = ret; /* Restore rte_errno. */ 3167 return -rte_errno; 3168 } 3169 3170 /** 3171 * Verify the flow list is empty 3172 * 3173 * @param dev 3174 * Pointer to Ethernet device. 3175 * 3176 * @return the number of flows not released. 3177 */ 3178 int 3179 mlx5_flow_verify(struct rte_eth_dev *dev) 3180 { 3181 struct priv *priv = dev->data->dev_private; 3182 struct rte_flow *flow; 3183 int ret = 0; 3184 3185 TAILQ_FOREACH(flow, &priv->flows, next) { 3186 DRV_LOG(DEBUG, "port %u flow %p still referenced", 3187 dev->data->port_id, (void *)flow); 3188 ++ret; 3189 } 3190 return ret; 3191 } 3192 3193 /** 3194 * Enable a control flow configured from the control plane. 3195 * 3196 * @param dev 3197 * Pointer to Ethernet device. 3198 * @param eth_spec 3199 * An Ethernet flow spec to apply. 3200 * @param eth_mask 3201 * An Ethernet flow mask to apply. 3202 * @param vlan_spec 3203 * A VLAN flow spec to apply. 3204 * @param vlan_mask 3205 * A VLAN flow mask to apply. 3206 * 3207 * @return 3208 * 0 on success, a negative errno value otherwise and rte_errno is set. 3209 */ 3210 int 3211 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, 3212 struct rte_flow_item_eth *eth_spec, 3213 struct rte_flow_item_eth *eth_mask, 3214 struct rte_flow_item_vlan *vlan_spec, 3215 struct rte_flow_item_vlan *vlan_mask) 3216 { 3217 struct priv *priv = dev->data->dev_private; 3218 const struct rte_flow_attr attr = { 3219 .ingress = 1, 3220 .priority = MLX5_FLOW_PRIO_RSVD, 3221 }; 3222 struct rte_flow_item items[] = { 3223 { 3224 .type = RTE_FLOW_ITEM_TYPE_ETH, 3225 .spec = eth_spec, 3226 .last = NULL, 3227 .mask = eth_mask, 3228 }, 3229 { 3230 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN : 3231 RTE_FLOW_ITEM_TYPE_END, 3232 .spec = vlan_spec, 3233 .last = NULL, 3234 .mask = vlan_mask, 3235 }, 3236 { 3237 .type = RTE_FLOW_ITEM_TYPE_END, 3238 }, 3239 }; 3240 uint16_t queue[priv->reta_idx_n]; 3241 struct rte_flow_action_rss action_rss = { 3242 .func = RTE_ETH_HASH_FUNCTION_DEFAULT, 3243 .level = 0, 3244 .types = priv->rss_conf.rss_hf, 3245 .key_len = priv->rss_conf.rss_key_len, 3246 .queue_num = priv->reta_idx_n, 3247 .key = priv->rss_conf.rss_key, 3248 .queue = queue, 3249 }; 3250 struct rte_flow_action actions[] = { 3251 { 3252 .type = RTE_FLOW_ACTION_TYPE_RSS, 3253 .conf = &action_rss, 3254 }, 3255 { 3256 .type = RTE_FLOW_ACTION_TYPE_END, 3257 }, 3258 }; 3259 struct rte_flow *flow; 3260 struct rte_flow_error error; 3261 unsigned int i; 3262 3263 if (!priv->reta_idx_n) { 3264 rte_errno = EINVAL; 3265 return -rte_errno; 3266 } 3267 for (i = 0; i != priv->reta_idx_n; ++i) 3268 queue[i] = (*priv->reta_idx)[i]; 3269 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items, 3270 actions, &error); 3271 if (!flow) 3272 return -rte_errno; 3273 return 0; 3274 } 3275 3276 /** 3277 * Enable a flow control configured from the control plane. 3278 * 3279 * @param dev 3280 * Pointer to Ethernet device. 3281 * @param eth_spec 3282 * An Ethernet flow spec to apply. 3283 * @param eth_mask 3284 * An Ethernet flow mask to apply. 3285 * 3286 * @return 3287 * 0 on success, a negative errno value otherwise and rte_errno is set. 3288 */ 3289 int 3290 mlx5_ctrl_flow(struct rte_eth_dev *dev, 3291 struct rte_flow_item_eth *eth_spec, 3292 struct rte_flow_item_eth *eth_mask) 3293 { 3294 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL); 3295 } 3296 3297 /** 3298 * Destroy a flow. 3299 * 3300 * @see rte_flow_destroy() 3301 * @see rte_flow_ops 3302 */ 3303 int 3304 mlx5_flow_destroy(struct rte_eth_dev *dev, 3305 struct rte_flow *flow, 3306 struct rte_flow_error *error __rte_unused) 3307 { 3308 struct priv *priv = dev->data->dev_private; 3309 3310 mlx5_flow_list_destroy(dev, &priv->flows, flow); 3311 return 0; 3312 } 3313 3314 /** 3315 * Destroy all flows. 3316 * 3317 * @see rte_flow_flush() 3318 * @see rte_flow_ops 3319 */ 3320 int 3321 mlx5_flow_flush(struct rte_eth_dev *dev, 3322 struct rte_flow_error *error __rte_unused) 3323 { 3324 struct priv *priv = dev->data->dev_private; 3325 3326 mlx5_flow_list_flush(dev, &priv->flows); 3327 return 0; 3328 } 3329 3330 /** 3331 * Isolated mode. 3332 * 3333 * @see rte_flow_isolate() 3334 * @see rte_flow_ops 3335 */ 3336 int 3337 mlx5_flow_isolate(struct rte_eth_dev *dev, 3338 int enable, 3339 struct rte_flow_error *error) 3340 { 3341 struct priv *priv = dev->data->dev_private; 3342 3343 if (dev->data->dev_started) { 3344 rte_flow_error_set(error, EBUSY, 3345 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3346 NULL, 3347 "port must be stopped first"); 3348 return -rte_errno; 3349 } 3350 priv->isolated = !!enable; 3351 if (enable) 3352 dev->dev_ops = &mlx5_dev_ops_isolate; 3353 else 3354 dev->dev_ops = &mlx5_dev_ops; 3355 return 0; 3356 } 3357 3358 /** 3359 * Query flow counter. 3360 * 3361 * @param flow 3362 * Pointer to the flow. 3363 * 3364 * @return 3365 * 0 on success, a negative errno value otherwise and rte_errno is set. 3366 */ 3367 static int 3368 mlx5_flow_query_count(struct rte_flow *flow __rte_unused, 3369 void *data __rte_unused, 3370 struct rte_flow_error *error) 3371 { 3372 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 3373 if (flow->modifier & MLX5_FLOW_MOD_COUNT) { 3374 struct rte_flow_query_count *qc = data; 3375 uint64_t counters[2] = {0, 0}; 3376 struct ibv_query_counter_set_attr query_cs_attr = { 3377 .cs = flow->counter->cs, 3378 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE, 3379 }; 3380 struct ibv_counter_set_data query_out = { 3381 .out = counters, 3382 .outlen = 2 * sizeof(uint64_t), 3383 }; 3384 int err = mlx5_glue->query_counter_set(&query_cs_attr, 3385 &query_out); 3386 3387 if (err) 3388 return rte_flow_error_set 3389 (error, err, 3390 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3391 NULL, 3392 "cannot read counter"); 3393 qc->hits_set = 1; 3394 qc->bytes_set = 1; 3395 qc->hits = counters[0] - flow->counter->hits; 3396 qc->bytes = counters[1] - flow->counter->bytes; 3397 if (qc->reset) { 3398 flow->counter->hits = counters[0]; 3399 flow->counter->bytes = counters[1]; 3400 } 3401 return 0; 3402 } 3403 return rte_flow_error_set(error, ENOTSUP, 3404 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3405 NULL, 3406 "flow does not have counter"); 3407 #endif 3408 return rte_flow_error_set(error, ENOTSUP, 3409 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3410 NULL, 3411 "counters are not available"); 3412 } 3413 3414 /** 3415 * Query a flows. 3416 * 3417 * @see rte_flow_query() 3418 * @see rte_flow_ops 3419 */ 3420 int 3421 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused, 3422 struct rte_flow *flow, 3423 const struct rte_flow_action *actions, 3424 void *data, 3425 struct rte_flow_error *error) 3426 { 3427 int ret = 0; 3428 3429 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3430 switch (actions->type) { 3431 case RTE_FLOW_ACTION_TYPE_VOID: 3432 break; 3433 case RTE_FLOW_ACTION_TYPE_COUNT: 3434 ret = mlx5_flow_query_count(flow, data, error); 3435 break; 3436 default: 3437 return rte_flow_error_set(error, ENOTSUP, 3438 RTE_FLOW_ERROR_TYPE_ACTION, 3439 actions, 3440 "action not supported"); 3441 } 3442 if (ret < 0) 3443 return ret; 3444 } 3445 return 0; 3446 } 3447 3448 /** 3449 * Convert a flow director filter to a generic flow. 3450 * 3451 * @param dev 3452 * Pointer to Ethernet device. 3453 * @param fdir_filter 3454 * Flow director filter to add. 3455 * @param attributes 3456 * Generic flow parameters structure. 3457 * 3458 * @return 3459 * 0 on success, a negative errno value otherwise and rte_errno is set. 3460 */ 3461 static int 3462 mlx5_fdir_filter_convert(struct rte_eth_dev *dev, 3463 const struct rte_eth_fdir_filter *fdir_filter, 3464 struct mlx5_fdir *attributes) 3465 { 3466 struct priv *priv = dev->data->dev_private; 3467 const struct rte_eth_fdir_input *input = &fdir_filter->input; 3468 const struct rte_eth_fdir_masks *mask = 3469 &dev->data->dev_conf.fdir_conf.mask; 3470 3471 /* Validate queue number. */ 3472 if (fdir_filter->action.rx_queue >= priv->rxqs_n) { 3473 DRV_LOG(ERR, "port %u invalid queue number %d", 3474 dev->data->port_id, fdir_filter->action.rx_queue); 3475 rte_errno = EINVAL; 3476 return -rte_errno; 3477 } 3478 attributes->attr.ingress = 1; 3479 attributes->items[0] = (struct rte_flow_item) { 3480 .type = RTE_FLOW_ITEM_TYPE_ETH, 3481 .spec = &attributes->l2, 3482 .mask = &attributes->l2_mask, 3483 }; 3484 switch (fdir_filter->action.behavior) { 3485 case RTE_ETH_FDIR_ACCEPT: 3486 attributes->actions[0] = (struct rte_flow_action){ 3487 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 3488 .conf = &attributes->queue, 3489 }; 3490 break; 3491 case RTE_ETH_FDIR_REJECT: 3492 attributes->actions[0] = (struct rte_flow_action){ 3493 .type = RTE_FLOW_ACTION_TYPE_DROP, 3494 }; 3495 break; 3496 default: 3497 DRV_LOG(ERR, "port %u invalid behavior %d", 3498 dev->data->port_id, 3499 fdir_filter->action.behavior); 3500 rte_errno = ENOTSUP; 3501 return -rte_errno; 3502 } 3503 attributes->queue.index = fdir_filter->action.rx_queue; 3504 /* Handle L3. */ 3505 switch (fdir_filter->input.flow_type) { 3506 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 3507 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 3508 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 3509 attributes->l3.ipv4.hdr = (struct ipv4_hdr){ 3510 .src_addr = input->flow.ip4_flow.src_ip, 3511 .dst_addr = input->flow.ip4_flow.dst_ip, 3512 .time_to_live = input->flow.ip4_flow.ttl, 3513 .type_of_service = input->flow.ip4_flow.tos, 3514 .next_proto_id = input->flow.ip4_flow.proto, 3515 }; 3516 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){ 3517 .src_addr = mask->ipv4_mask.src_ip, 3518 .dst_addr = mask->ipv4_mask.dst_ip, 3519 .time_to_live = mask->ipv4_mask.ttl, 3520 .type_of_service = mask->ipv4_mask.tos, 3521 .next_proto_id = mask->ipv4_mask.proto, 3522 }; 3523 attributes->items[1] = (struct rte_flow_item){ 3524 .type = RTE_FLOW_ITEM_TYPE_IPV4, 3525 .spec = &attributes->l3, 3526 .mask = &attributes->l3_mask, 3527 }; 3528 break; 3529 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 3530 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 3531 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 3532 attributes->l3.ipv6.hdr = (struct ipv6_hdr){ 3533 .hop_limits = input->flow.ipv6_flow.hop_limits, 3534 .proto = input->flow.ipv6_flow.proto, 3535 }; 3536 3537 memcpy(attributes->l3.ipv6.hdr.src_addr, 3538 input->flow.ipv6_flow.src_ip, 3539 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 3540 memcpy(attributes->l3.ipv6.hdr.dst_addr, 3541 input->flow.ipv6_flow.dst_ip, 3542 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 3543 memcpy(attributes->l3_mask.ipv6.hdr.src_addr, 3544 mask->ipv6_mask.src_ip, 3545 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 3546 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr, 3547 mask->ipv6_mask.dst_ip, 3548 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 3549 attributes->items[1] = (struct rte_flow_item){ 3550 .type = RTE_FLOW_ITEM_TYPE_IPV6, 3551 .spec = &attributes->l3, 3552 .mask = &attributes->l3_mask, 3553 }; 3554 break; 3555 default: 3556 DRV_LOG(ERR, "port %u invalid flow type%d", 3557 dev->data->port_id, fdir_filter->input.flow_type); 3558 rte_errno = ENOTSUP; 3559 return -rte_errno; 3560 } 3561 /* Handle L4. */ 3562 switch (fdir_filter->input.flow_type) { 3563 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 3564 attributes->l4.udp.hdr = (struct udp_hdr){ 3565 .src_port = input->flow.udp4_flow.src_port, 3566 .dst_port = input->flow.udp4_flow.dst_port, 3567 }; 3568 attributes->l4_mask.udp.hdr = (struct udp_hdr){ 3569 .src_port = mask->src_port_mask, 3570 .dst_port = mask->dst_port_mask, 3571 }; 3572 attributes->items[2] = (struct rte_flow_item){ 3573 .type = RTE_FLOW_ITEM_TYPE_UDP, 3574 .spec = &attributes->l4, 3575 .mask = &attributes->l4_mask, 3576 }; 3577 break; 3578 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 3579 attributes->l4.tcp.hdr = (struct tcp_hdr){ 3580 .src_port = input->flow.tcp4_flow.src_port, 3581 .dst_port = input->flow.tcp4_flow.dst_port, 3582 }; 3583 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){ 3584 .src_port = mask->src_port_mask, 3585 .dst_port = mask->dst_port_mask, 3586 }; 3587 attributes->items[2] = (struct rte_flow_item){ 3588 .type = RTE_FLOW_ITEM_TYPE_TCP, 3589 .spec = &attributes->l4, 3590 .mask = &attributes->l4_mask, 3591 }; 3592 break; 3593 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 3594 attributes->l4.udp.hdr = (struct udp_hdr){ 3595 .src_port = input->flow.udp6_flow.src_port, 3596 .dst_port = input->flow.udp6_flow.dst_port, 3597 }; 3598 attributes->l4_mask.udp.hdr = (struct udp_hdr){ 3599 .src_port = mask->src_port_mask, 3600 .dst_port = mask->dst_port_mask, 3601 }; 3602 attributes->items[2] = (struct rte_flow_item){ 3603 .type = RTE_FLOW_ITEM_TYPE_UDP, 3604 .spec = &attributes->l4, 3605 .mask = &attributes->l4_mask, 3606 }; 3607 break; 3608 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 3609 attributes->l4.tcp.hdr = (struct tcp_hdr){ 3610 .src_port = input->flow.tcp6_flow.src_port, 3611 .dst_port = input->flow.tcp6_flow.dst_port, 3612 }; 3613 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){ 3614 .src_port = mask->src_port_mask, 3615 .dst_port = mask->dst_port_mask, 3616 }; 3617 attributes->items[2] = (struct rte_flow_item){ 3618 .type = RTE_FLOW_ITEM_TYPE_TCP, 3619 .spec = &attributes->l4, 3620 .mask = &attributes->l4_mask, 3621 }; 3622 break; 3623 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 3624 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 3625 break; 3626 default: 3627 DRV_LOG(ERR, "port %u invalid flow type%d", 3628 dev->data->port_id, fdir_filter->input.flow_type); 3629 rte_errno = ENOTSUP; 3630 return -rte_errno; 3631 } 3632 return 0; 3633 } 3634 3635 /** 3636 * Add new flow director filter and store it in list. 3637 * 3638 * @param dev 3639 * Pointer to Ethernet device. 3640 * @param fdir_filter 3641 * Flow director filter to add. 3642 * 3643 * @return 3644 * 0 on success, a negative errno value otherwise and rte_errno is set. 3645 */ 3646 static int 3647 mlx5_fdir_filter_add(struct rte_eth_dev *dev, 3648 const struct rte_eth_fdir_filter *fdir_filter) 3649 { 3650 struct priv *priv = dev->data->dev_private; 3651 struct mlx5_fdir attributes = { 3652 .attr.group = 0, 3653 .l2_mask = { 3654 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00", 3655 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 3656 .type = 0, 3657 }, 3658 }; 3659 struct rte_flow_error error; 3660 struct rte_flow *flow; 3661 int ret; 3662 3663 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes); 3664 if (ret) 3665 return ret; 3666 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr, 3667 attributes.items, attributes.actions, 3668 &error); 3669 if (flow) { 3670 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id, 3671 (void *)flow); 3672 return 0; 3673 } 3674 return -rte_errno; 3675 } 3676 3677 /** 3678 * Delete specific filter. 3679 * 3680 * @param dev 3681 * Pointer to Ethernet device. 3682 * @param fdir_filter 3683 * Filter to be deleted. 3684 * 3685 * @return 3686 * 0 on success, a negative errno value otherwise and rte_errno is set. 3687 */ 3688 static int 3689 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused, 3690 const struct rte_eth_fdir_filter *fdir_filter 3691 __rte_unused) 3692 { 3693 rte_errno = ENOTSUP; 3694 return -rte_errno; 3695 } 3696 3697 /** 3698 * Update queue for specific filter. 3699 * 3700 * @param dev 3701 * Pointer to Ethernet device. 3702 * @param fdir_filter 3703 * Filter to be updated. 3704 * 3705 * @return 3706 * 0 on success, a negative errno value otherwise and rte_errno is set. 3707 */ 3708 static int 3709 mlx5_fdir_filter_update(struct rte_eth_dev *dev, 3710 const struct rte_eth_fdir_filter *fdir_filter) 3711 { 3712 int ret; 3713 3714 ret = mlx5_fdir_filter_delete(dev, fdir_filter); 3715 if (ret) 3716 return ret; 3717 return mlx5_fdir_filter_add(dev, fdir_filter); 3718 } 3719 3720 /** 3721 * Flush all filters. 3722 * 3723 * @param dev 3724 * Pointer to Ethernet device. 3725 */ 3726 static void 3727 mlx5_fdir_filter_flush(struct rte_eth_dev *dev) 3728 { 3729 struct priv *priv = dev->data->dev_private; 3730 3731 mlx5_flow_list_flush(dev, &priv->flows); 3732 } 3733 3734 /** 3735 * Get flow director information. 3736 * 3737 * @param dev 3738 * Pointer to Ethernet device. 3739 * @param[out] fdir_info 3740 * Resulting flow director information. 3741 */ 3742 static void 3743 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) 3744 { 3745 struct rte_eth_fdir_masks *mask = 3746 &dev->data->dev_conf.fdir_conf.mask; 3747 3748 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode; 3749 fdir_info->guarant_spc = 0; 3750 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask)); 3751 fdir_info->max_flexpayload = 0; 3752 fdir_info->flow_types_mask[0] = 0; 3753 fdir_info->flex_payload_unit = 0; 3754 fdir_info->max_flex_payload_segment_num = 0; 3755 fdir_info->flex_payload_limit = 0; 3756 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf)); 3757 } 3758 3759 /** 3760 * Deal with flow director operations. 3761 * 3762 * @param dev 3763 * Pointer to Ethernet device. 3764 * @param filter_op 3765 * Operation to perform. 3766 * @param arg 3767 * Pointer to operation-specific structure. 3768 * 3769 * @return 3770 * 0 on success, a negative errno value otherwise and rte_errno is set. 3771 */ 3772 static int 3773 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, 3774 void *arg) 3775 { 3776 enum rte_fdir_mode fdir_mode = 3777 dev->data->dev_conf.fdir_conf.mode; 3778 3779 if (filter_op == RTE_ETH_FILTER_NOP) 3780 return 0; 3781 if (fdir_mode != RTE_FDIR_MODE_PERFECT && 3782 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) { 3783 DRV_LOG(ERR, "port %u flow director mode %d not supported", 3784 dev->data->port_id, fdir_mode); 3785 rte_errno = EINVAL; 3786 return -rte_errno; 3787 } 3788 switch (filter_op) { 3789 case RTE_ETH_FILTER_ADD: 3790 return mlx5_fdir_filter_add(dev, arg); 3791 case RTE_ETH_FILTER_UPDATE: 3792 return mlx5_fdir_filter_update(dev, arg); 3793 case RTE_ETH_FILTER_DELETE: 3794 return mlx5_fdir_filter_delete(dev, arg); 3795 case RTE_ETH_FILTER_FLUSH: 3796 mlx5_fdir_filter_flush(dev); 3797 break; 3798 case RTE_ETH_FILTER_INFO: 3799 mlx5_fdir_info_get(dev, arg); 3800 break; 3801 default: 3802 DRV_LOG(DEBUG, "port %u unknown operation %u", 3803 dev->data->port_id, filter_op); 3804 rte_errno = EINVAL; 3805 return -rte_errno; 3806 } 3807 return 0; 3808 } 3809 3810 /** 3811 * Manage filter operations. 3812 * 3813 * @param dev 3814 * Pointer to Ethernet device structure. 3815 * @param filter_type 3816 * Filter type. 3817 * @param filter_op 3818 * Operation to perform. 3819 * @param arg 3820 * Pointer to operation-specific structure. 3821 * 3822 * @return 3823 * 0 on success, a negative errno value otherwise and rte_errno is set. 3824 */ 3825 int 3826 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, 3827 enum rte_filter_type filter_type, 3828 enum rte_filter_op filter_op, 3829 void *arg) 3830 { 3831 switch (filter_type) { 3832 case RTE_ETH_FILTER_GENERIC: 3833 if (filter_op != RTE_ETH_FILTER_GET) { 3834 rte_errno = EINVAL; 3835 return -rte_errno; 3836 } 3837 *(const void **)arg = &mlx5_flow_ops; 3838 return 0; 3839 case RTE_ETH_FILTER_FDIR: 3840 return mlx5_fdir_ctrl_func(dev, filter_op, arg); 3841 default: 3842 DRV_LOG(ERR, "port %u filter type (%d) not supported", 3843 dev->data->port_id, filter_type); 3844 rte_errno = ENOTSUP; 3845 return -rte_errno; 3846 } 3847 return 0; 3848 } 3849