1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2016 Mellanox Technologies, Ltd 4 */ 5 6 #include <netinet/in.h> 7 #include <sys/queue.h> 8 #include <stdalign.h> 9 #include <stdint.h> 10 #include <string.h> 11 #include <stdbool.h> 12 13 /* Verbs header. */ 14 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 15 #ifdef PEDANTIC 16 #pragma GCC diagnostic ignored "-Wpedantic" 17 #endif 18 #include <infiniband/verbs.h> 19 #ifdef PEDANTIC 20 #pragma GCC diagnostic error "-Wpedantic" 21 #endif 22 23 #include <rte_common.h> 24 #include <rte_ether.h> 25 #include <rte_ethdev_driver.h> 26 #include <rte_flow.h> 27 #include <rte_cycles.h> 28 #include <rte_flow_driver.h> 29 #include <rte_malloc.h> 30 #include <rte_ip.h> 31 32 #include <mlx5_glue.h> 33 #include <mlx5_devx_cmds.h> 34 #include <mlx5_prm.h> 35 36 #include "mlx5_defs.h" 37 #include "mlx5.h" 38 #include "mlx5_flow.h" 39 #include "mlx5_flow_os.h" 40 #include "mlx5_rxtx.h" 41 42 /** Device flow drivers. */ 43 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops; 44 45 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops; 46 47 const struct mlx5_flow_driver_ops *flow_drv_ops[] = { 48 [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops, 49 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 50 [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops, 51 #endif 52 [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops, 53 [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops 54 }; 55 56 enum mlx5_expansion { 57 MLX5_EXPANSION_ROOT, 58 MLX5_EXPANSION_ROOT_OUTER, 59 MLX5_EXPANSION_ROOT_ETH_VLAN, 60 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN, 61 MLX5_EXPANSION_OUTER_ETH, 62 MLX5_EXPANSION_OUTER_ETH_VLAN, 63 MLX5_EXPANSION_OUTER_VLAN, 64 MLX5_EXPANSION_OUTER_IPV4, 65 MLX5_EXPANSION_OUTER_IPV4_UDP, 66 MLX5_EXPANSION_OUTER_IPV4_TCP, 67 MLX5_EXPANSION_OUTER_IPV6, 68 MLX5_EXPANSION_OUTER_IPV6_UDP, 69 MLX5_EXPANSION_OUTER_IPV6_TCP, 70 MLX5_EXPANSION_VXLAN, 71 MLX5_EXPANSION_VXLAN_GPE, 72 MLX5_EXPANSION_GRE, 73 MLX5_EXPANSION_MPLS, 74 MLX5_EXPANSION_ETH, 75 MLX5_EXPANSION_ETH_VLAN, 76 MLX5_EXPANSION_VLAN, 77 MLX5_EXPANSION_IPV4, 78 MLX5_EXPANSION_IPV4_UDP, 79 MLX5_EXPANSION_IPV4_TCP, 80 MLX5_EXPANSION_IPV6, 81 MLX5_EXPANSION_IPV6_UDP, 82 MLX5_EXPANSION_IPV6_TCP, 83 }; 84 85 /** Supported expansion of items. */ 86 static const struct rte_flow_expand_node mlx5_support_expansion[] = { 87 [MLX5_EXPANSION_ROOT] = { 88 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 89 MLX5_EXPANSION_IPV4, 90 MLX5_EXPANSION_IPV6), 91 .type = RTE_FLOW_ITEM_TYPE_END, 92 }, 93 [MLX5_EXPANSION_ROOT_OUTER] = { 94 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH, 95 MLX5_EXPANSION_OUTER_IPV4, 96 MLX5_EXPANSION_OUTER_IPV6), 97 .type = RTE_FLOW_ITEM_TYPE_END, 98 }, 99 [MLX5_EXPANSION_ROOT_ETH_VLAN] = { 100 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN), 101 .type = RTE_FLOW_ITEM_TYPE_END, 102 }, 103 [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = { 104 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH_VLAN), 105 .type = RTE_FLOW_ITEM_TYPE_END, 106 }, 107 [MLX5_EXPANSION_OUTER_ETH] = { 108 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 109 MLX5_EXPANSION_OUTER_IPV6, 110 MLX5_EXPANSION_MPLS), 111 .type = RTE_FLOW_ITEM_TYPE_ETH, 112 .rss_types = 0, 113 }, 114 [MLX5_EXPANSION_OUTER_ETH_VLAN] = { 115 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN), 116 .type = RTE_FLOW_ITEM_TYPE_ETH, 117 .rss_types = 0, 118 }, 119 [MLX5_EXPANSION_OUTER_VLAN] = { 120 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 121 MLX5_EXPANSION_OUTER_IPV6), 122 .type = RTE_FLOW_ITEM_TYPE_VLAN, 123 }, 124 [MLX5_EXPANSION_OUTER_IPV4] = { 125 .next = RTE_FLOW_EXPAND_RSS_NEXT 126 (MLX5_EXPANSION_OUTER_IPV4_UDP, 127 MLX5_EXPANSION_OUTER_IPV4_TCP, 128 MLX5_EXPANSION_GRE, 129 MLX5_EXPANSION_IPV4, 130 MLX5_EXPANSION_IPV6), 131 .type = RTE_FLOW_ITEM_TYPE_IPV4, 132 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 133 ETH_RSS_NONFRAG_IPV4_OTHER, 134 }, 135 [MLX5_EXPANSION_OUTER_IPV4_UDP] = { 136 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 137 MLX5_EXPANSION_VXLAN_GPE), 138 .type = RTE_FLOW_ITEM_TYPE_UDP, 139 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 140 }, 141 [MLX5_EXPANSION_OUTER_IPV4_TCP] = { 142 .type = RTE_FLOW_ITEM_TYPE_TCP, 143 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 144 }, 145 [MLX5_EXPANSION_OUTER_IPV6] = { 146 .next = RTE_FLOW_EXPAND_RSS_NEXT 147 (MLX5_EXPANSION_OUTER_IPV6_UDP, 148 MLX5_EXPANSION_OUTER_IPV6_TCP, 149 MLX5_EXPANSION_IPV4, 150 MLX5_EXPANSION_IPV6), 151 .type = RTE_FLOW_ITEM_TYPE_IPV6, 152 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 153 ETH_RSS_NONFRAG_IPV6_OTHER, 154 }, 155 [MLX5_EXPANSION_OUTER_IPV6_UDP] = { 156 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 157 MLX5_EXPANSION_VXLAN_GPE), 158 .type = RTE_FLOW_ITEM_TYPE_UDP, 159 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 160 }, 161 [MLX5_EXPANSION_OUTER_IPV6_TCP] = { 162 .type = RTE_FLOW_ITEM_TYPE_TCP, 163 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 164 }, 165 [MLX5_EXPANSION_VXLAN] = { 166 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 167 MLX5_EXPANSION_IPV4, 168 MLX5_EXPANSION_IPV6), 169 .type = RTE_FLOW_ITEM_TYPE_VXLAN, 170 }, 171 [MLX5_EXPANSION_VXLAN_GPE] = { 172 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 173 MLX5_EXPANSION_IPV4, 174 MLX5_EXPANSION_IPV6), 175 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE, 176 }, 177 [MLX5_EXPANSION_GRE] = { 178 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4), 179 .type = RTE_FLOW_ITEM_TYPE_GRE, 180 }, 181 [MLX5_EXPANSION_MPLS] = { 182 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 183 MLX5_EXPANSION_IPV6), 184 .type = RTE_FLOW_ITEM_TYPE_MPLS, 185 }, 186 [MLX5_EXPANSION_ETH] = { 187 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 188 MLX5_EXPANSION_IPV6), 189 .type = RTE_FLOW_ITEM_TYPE_ETH, 190 }, 191 [MLX5_EXPANSION_ETH_VLAN] = { 192 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN), 193 .type = RTE_FLOW_ITEM_TYPE_ETH, 194 }, 195 [MLX5_EXPANSION_VLAN] = { 196 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 197 MLX5_EXPANSION_IPV6), 198 .type = RTE_FLOW_ITEM_TYPE_VLAN, 199 }, 200 [MLX5_EXPANSION_IPV4] = { 201 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP, 202 MLX5_EXPANSION_IPV4_TCP), 203 .type = RTE_FLOW_ITEM_TYPE_IPV4, 204 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 205 ETH_RSS_NONFRAG_IPV4_OTHER, 206 }, 207 [MLX5_EXPANSION_IPV4_UDP] = { 208 .type = RTE_FLOW_ITEM_TYPE_UDP, 209 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 210 }, 211 [MLX5_EXPANSION_IPV4_TCP] = { 212 .type = RTE_FLOW_ITEM_TYPE_TCP, 213 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 214 }, 215 [MLX5_EXPANSION_IPV6] = { 216 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP, 217 MLX5_EXPANSION_IPV6_TCP), 218 .type = RTE_FLOW_ITEM_TYPE_IPV6, 219 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 220 ETH_RSS_NONFRAG_IPV6_OTHER, 221 }, 222 [MLX5_EXPANSION_IPV6_UDP] = { 223 .type = RTE_FLOW_ITEM_TYPE_UDP, 224 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 225 }, 226 [MLX5_EXPANSION_IPV6_TCP] = { 227 .type = RTE_FLOW_ITEM_TYPE_TCP, 228 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 229 }, 230 }; 231 232 static const struct rte_flow_ops mlx5_flow_ops = { 233 .validate = mlx5_flow_validate, 234 .create = mlx5_flow_create, 235 .destroy = mlx5_flow_destroy, 236 .flush = mlx5_flow_flush, 237 .isolate = mlx5_flow_isolate, 238 .query = mlx5_flow_query, 239 .dev_dump = mlx5_flow_dev_dump, 240 .get_aged_flows = mlx5_flow_get_aged_flows, 241 }; 242 243 /* Convert FDIR request to Generic flow. */ 244 struct mlx5_fdir { 245 struct rte_flow_attr attr; 246 struct rte_flow_item items[4]; 247 struct rte_flow_item_eth l2; 248 struct rte_flow_item_eth l2_mask; 249 union { 250 struct rte_flow_item_ipv4 ipv4; 251 struct rte_flow_item_ipv6 ipv6; 252 } l3; 253 union { 254 struct rte_flow_item_ipv4 ipv4; 255 struct rte_flow_item_ipv6 ipv6; 256 } l3_mask; 257 union { 258 struct rte_flow_item_udp udp; 259 struct rte_flow_item_tcp tcp; 260 } l4; 261 union { 262 struct rte_flow_item_udp udp; 263 struct rte_flow_item_tcp tcp; 264 } l4_mask; 265 struct rte_flow_action actions[2]; 266 struct rte_flow_action_queue queue; 267 }; 268 269 /* Map of Verbs to Flow priority with 8 Verbs priorities. */ 270 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = { 271 { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 }, 272 }; 273 274 /* Map of Verbs to Flow priority with 16 Verbs priorities. */ 275 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = { 276 { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 }, 277 { 9, 10, 11 }, { 12, 13, 14 }, 278 }; 279 280 /* Tunnel information. */ 281 struct mlx5_flow_tunnel_info { 282 uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */ 283 uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */ 284 }; 285 286 static struct mlx5_flow_tunnel_info tunnels_info[] = { 287 { 288 .tunnel = MLX5_FLOW_LAYER_VXLAN, 289 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP, 290 }, 291 { 292 .tunnel = MLX5_FLOW_LAYER_GENEVE, 293 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP, 294 }, 295 { 296 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE, 297 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP, 298 }, 299 { 300 .tunnel = MLX5_FLOW_LAYER_GRE, 301 .ptype = RTE_PTYPE_TUNNEL_GRE, 302 }, 303 { 304 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP, 305 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP, 306 }, 307 { 308 .tunnel = MLX5_FLOW_LAYER_MPLS, 309 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE, 310 }, 311 { 312 .tunnel = MLX5_FLOW_LAYER_NVGRE, 313 .ptype = RTE_PTYPE_TUNNEL_NVGRE, 314 }, 315 { 316 .tunnel = MLX5_FLOW_LAYER_IPIP, 317 .ptype = RTE_PTYPE_TUNNEL_IP, 318 }, 319 { 320 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP, 321 .ptype = RTE_PTYPE_TUNNEL_IP, 322 }, 323 { 324 .tunnel = MLX5_FLOW_LAYER_GTP, 325 .ptype = RTE_PTYPE_TUNNEL_GTPU, 326 }, 327 }; 328 329 /** 330 * Translate tag ID to register. 331 * 332 * @param[in] dev 333 * Pointer to the Ethernet device structure. 334 * @param[in] feature 335 * The feature that request the register. 336 * @param[in] id 337 * The request register ID. 338 * @param[out] error 339 * Error description in case of any. 340 * 341 * @return 342 * The request register on success, a negative errno 343 * value otherwise and rte_errno is set. 344 */ 345 int 346 mlx5_flow_get_reg_id(struct rte_eth_dev *dev, 347 enum mlx5_feature_name feature, 348 uint32_t id, 349 struct rte_flow_error *error) 350 { 351 struct mlx5_priv *priv = dev->data->dev_private; 352 struct mlx5_dev_config *config = &priv->config; 353 enum modify_reg start_reg; 354 bool skip_mtr_reg = false; 355 356 switch (feature) { 357 case MLX5_HAIRPIN_RX: 358 return REG_B; 359 case MLX5_HAIRPIN_TX: 360 return REG_A; 361 case MLX5_METADATA_RX: 362 switch (config->dv_xmeta_en) { 363 case MLX5_XMETA_MODE_LEGACY: 364 return REG_B; 365 case MLX5_XMETA_MODE_META16: 366 return REG_C_0; 367 case MLX5_XMETA_MODE_META32: 368 return REG_C_1; 369 } 370 break; 371 case MLX5_METADATA_TX: 372 return REG_A; 373 case MLX5_METADATA_FDB: 374 switch (config->dv_xmeta_en) { 375 case MLX5_XMETA_MODE_LEGACY: 376 return REG_NONE; 377 case MLX5_XMETA_MODE_META16: 378 return REG_C_0; 379 case MLX5_XMETA_MODE_META32: 380 return REG_C_1; 381 } 382 break; 383 case MLX5_FLOW_MARK: 384 switch (config->dv_xmeta_en) { 385 case MLX5_XMETA_MODE_LEGACY: 386 return REG_NONE; 387 case MLX5_XMETA_MODE_META16: 388 return REG_C_1; 389 case MLX5_XMETA_MODE_META32: 390 return REG_C_0; 391 } 392 break; 393 case MLX5_MTR_SFX: 394 /* 395 * If meter color and flow match share one register, flow match 396 * should use the meter color register for match. 397 */ 398 if (priv->mtr_reg_share) 399 return priv->mtr_color_reg; 400 else 401 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 402 REG_C_3; 403 case MLX5_MTR_COLOR: 404 MLX5_ASSERT(priv->mtr_color_reg != REG_NONE); 405 return priv->mtr_color_reg; 406 case MLX5_COPY_MARK: 407 /* 408 * Metadata COPY_MARK register using is in meter suffix sub 409 * flow while with meter. It's safe to share the same register. 410 */ 411 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3; 412 case MLX5_APP_TAG: 413 /* 414 * If meter is enable, it will engage the register for color 415 * match and flow match. If meter color match is not using the 416 * REG_C_2, need to skip the REG_C_x be used by meter color 417 * match. 418 * If meter is disable, free to use all available registers. 419 */ 420 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 421 (priv->mtr_reg_share ? REG_C_3 : REG_C_4); 422 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2); 423 if (id > (REG_C_7 - start_reg)) 424 return rte_flow_error_set(error, EINVAL, 425 RTE_FLOW_ERROR_TYPE_ITEM, 426 NULL, "invalid tag id"); 427 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NONE) 428 return rte_flow_error_set(error, ENOTSUP, 429 RTE_FLOW_ERROR_TYPE_ITEM, 430 NULL, "unsupported tag id"); 431 /* 432 * This case means meter is using the REG_C_x great than 2. 433 * Take care not to conflict with meter color REG_C_x. 434 * If the available index REG_C_y >= REG_C_x, skip the 435 * color register. 436 */ 437 if (skip_mtr_reg && config->flow_mreg_c 438 [id + start_reg - REG_C_0] >= priv->mtr_color_reg) { 439 if (id >= (REG_C_7 - start_reg)) 440 return rte_flow_error_set(error, EINVAL, 441 RTE_FLOW_ERROR_TYPE_ITEM, 442 NULL, "invalid tag id"); 443 if (config->flow_mreg_c 444 [id + 1 + start_reg - REG_C_0] != REG_NONE) 445 return config->flow_mreg_c 446 [id + 1 + start_reg - REG_C_0]; 447 return rte_flow_error_set(error, ENOTSUP, 448 RTE_FLOW_ERROR_TYPE_ITEM, 449 NULL, "unsupported tag id"); 450 } 451 return config->flow_mreg_c[id + start_reg - REG_C_0]; 452 } 453 MLX5_ASSERT(false); 454 return rte_flow_error_set(error, EINVAL, 455 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 456 NULL, "invalid feature name"); 457 } 458 459 /** 460 * Check extensive flow metadata register support. 461 * 462 * @param dev 463 * Pointer to rte_eth_dev structure. 464 * 465 * @return 466 * True if device supports extensive flow metadata register, otherwise false. 467 */ 468 bool 469 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev) 470 { 471 struct mlx5_priv *priv = dev->data->dev_private; 472 struct mlx5_dev_config *config = &priv->config; 473 474 /* 475 * Having available reg_c can be regarded inclusively as supporting 476 * extensive flow metadata register, which could mean, 477 * - metadata register copy action by modify header. 478 * - 16 modify header actions is supported. 479 * - reg_c's are preserved across different domain (FDB and NIC) on 480 * packet loopback by flow lookup miss. 481 */ 482 return config->flow_mreg_c[2] != REG_NONE; 483 } 484 485 /** 486 * Discover the maximum number of priority available. 487 * 488 * @param[in] dev 489 * Pointer to the Ethernet device structure. 490 * 491 * @return 492 * number of supported flow priority on success, a negative errno 493 * value otherwise and rte_errno is set. 494 */ 495 int 496 mlx5_flow_discover_priorities(struct rte_eth_dev *dev) 497 { 498 struct mlx5_priv *priv = dev->data->dev_private; 499 struct { 500 struct ibv_flow_attr attr; 501 struct ibv_flow_spec_eth eth; 502 struct ibv_flow_spec_action_drop drop; 503 } flow_attr = { 504 .attr = { 505 .num_of_specs = 2, 506 .port = (uint8_t)priv->dev_port, 507 }, 508 .eth = { 509 .type = IBV_FLOW_SPEC_ETH, 510 .size = sizeof(struct ibv_flow_spec_eth), 511 }, 512 .drop = { 513 .size = sizeof(struct ibv_flow_spec_action_drop), 514 .type = IBV_FLOW_SPEC_ACTION_DROP, 515 }, 516 }; 517 struct ibv_flow *flow; 518 struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev); 519 uint16_t vprio[] = { 8, 16 }; 520 int i; 521 int priority = 0; 522 523 if (!drop) { 524 rte_errno = ENOTSUP; 525 return -rte_errno; 526 } 527 for (i = 0; i != RTE_DIM(vprio); i++) { 528 flow_attr.attr.priority = vprio[i] - 1; 529 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr); 530 if (!flow) 531 break; 532 claim_zero(mlx5_glue->destroy_flow(flow)); 533 priority = vprio[i]; 534 } 535 mlx5_hrxq_drop_release(dev); 536 switch (priority) { 537 case 8: 538 priority = RTE_DIM(priority_map_3); 539 break; 540 case 16: 541 priority = RTE_DIM(priority_map_5); 542 break; 543 default: 544 rte_errno = ENOTSUP; 545 DRV_LOG(ERR, 546 "port %u verbs maximum priority: %d expected 8/16", 547 dev->data->port_id, priority); 548 return -rte_errno; 549 } 550 DRV_LOG(INFO, "port %u flow maximum priority: %d", 551 dev->data->port_id, priority); 552 return priority; 553 } 554 555 /** 556 * Adjust flow priority based on the highest layer and the request priority. 557 * 558 * @param[in] dev 559 * Pointer to the Ethernet device structure. 560 * @param[in] priority 561 * The rule base priority. 562 * @param[in] subpriority 563 * The priority based on the items. 564 * 565 * @return 566 * The new priority. 567 */ 568 uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority, 569 uint32_t subpriority) 570 { 571 uint32_t res = 0; 572 struct mlx5_priv *priv = dev->data->dev_private; 573 574 switch (priv->config.flow_prio) { 575 case RTE_DIM(priority_map_3): 576 res = priority_map_3[priority][subpriority]; 577 break; 578 case RTE_DIM(priority_map_5): 579 res = priority_map_5[priority][subpriority]; 580 break; 581 } 582 return res; 583 } 584 585 /** 586 * Verify the @p item specifications (spec, last, mask) are compatible with the 587 * NIC capabilities. 588 * 589 * @param[in] item 590 * Item specification. 591 * @param[in] mask 592 * @p item->mask or flow default bit-masks. 593 * @param[in] nic_mask 594 * Bit-masks covering supported fields by the NIC to compare with user mask. 595 * @param[in] size 596 * Bit-masks size in bytes. 597 * @param[out] error 598 * Pointer to error structure. 599 * 600 * @return 601 * 0 on success, a negative errno value otherwise and rte_errno is set. 602 */ 603 int 604 mlx5_flow_item_acceptable(const struct rte_flow_item *item, 605 const uint8_t *mask, 606 const uint8_t *nic_mask, 607 unsigned int size, 608 struct rte_flow_error *error) 609 { 610 unsigned int i; 611 612 MLX5_ASSERT(nic_mask); 613 for (i = 0; i < size; ++i) 614 if ((nic_mask[i] | mask[i]) != nic_mask[i]) 615 return rte_flow_error_set(error, ENOTSUP, 616 RTE_FLOW_ERROR_TYPE_ITEM, 617 item, 618 "mask enables non supported" 619 " bits"); 620 if (!item->spec && (item->mask || item->last)) 621 return rte_flow_error_set(error, EINVAL, 622 RTE_FLOW_ERROR_TYPE_ITEM, item, 623 "mask/last without a spec is not" 624 " supported"); 625 if (item->spec && item->last) { 626 uint8_t spec[size]; 627 uint8_t last[size]; 628 unsigned int i; 629 int ret; 630 631 for (i = 0; i < size; ++i) { 632 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i]; 633 last[i] = ((const uint8_t *)item->last)[i] & mask[i]; 634 } 635 ret = memcmp(spec, last, size); 636 if (ret != 0) 637 return rte_flow_error_set(error, EINVAL, 638 RTE_FLOW_ERROR_TYPE_ITEM, 639 item, 640 "range is not valid"); 641 } 642 return 0; 643 } 644 645 /** 646 * Adjust the hash fields according to the @p flow information. 647 * 648 * @param[in] dev_flow. 649 * Pointer to the mlx5_flow. 650 * @param[in] tunnel 651 * 1 when the hash field is for a tunnel item. 652 * @param[in] layer_types 653 * ETH_RSS_* types. 654 * @param[in] hash_fields 655 * Item hash fields. 656 * 657 * @return 658 * The hash fields that should be used. 659 */ 660 uint64_t 661 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc, 662 int tunnel __rte_unused, uint64_t layer_types, 663 uint64_t hash_fields) 664 { 665 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 666 int rss_request_inner = rss_desc->level >= 2; 667 668 /* Check RSS hash level for tunnel. */ 669 if (tunnel && rss_request_inner) 670 hash_fields |= IBV_RX_HASH_INNER; 671 else if (tunnel || rss_request_inner) 672 return 0; 673 #endif 674 /* Check if requested layer matches RSS hash fields. */ 675 if (!(rss_desc->types & layer_types)) 676 return 0; 677 return hash_fields; 678 } 679 680 /** 681 * Lookup and set the ptype in the data Rx part. A single Ptype can be used, 682 * if several tunnel rules are used on this queue, the tunnel ptype will be 683 * cleared. 684 * 685 * @param rxq_ctrl 686 * Rx queue to update. 687 */ 688 static void 689 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl) 690 { 691 unsigned int i; 692 uint32_t tunnel_ptype = 0; 693 694 /* Look up for the ptype to use. */ 695 for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) { 696 if (!rxq_ctrl->flow_tunnels_n[i]) 697 continue; 698 if (!tunnel_ptype) { 699 tunnel_ptype = tunnels_info[i].ptype; 700 } else { 701 tunnel_ptype = 0; 702 break; 703 } 704 } 705 rxq_ctrl->rxq.tunnel = tunnel_ptype; 706 } 707 708 /** 709 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive 710 * flow. 711 * 712 * @param[in] dev 713 * Pointer to the Ethernet device structure. 714 * @param[in] dev_handle 715 * Pointer to device flow handle structure. 716 */ 717 static void 718 flow_drv_rxq_flags_set(struct rte_eth_dev *dev, 719 struct mlx5_flow_handle *dev_handle) 720 { 721 struct mlx5_priv *priv = dev->data->dev_private; 722 const int mark = dev_handle->mark; 723 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 724 struct mlx5_hrxq *hrxq; 725 unsigned int i; 726 727 if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) 728 return; 729 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 730 dev_handle->rix_hrxq); 731 if (!hrxq) 732 return; 733 for (i = 0; i != hrxq->ind_table->queues_n; ++i) { 734 int idx = hrxq->ind_table->queues[i]; 735 struct mlx5_rxq_ctrl *rxq_ctrl = 736 container_of((*priv->rxqs)[idx], 737 struct mlx5_rxq_ctrl, rxq); 738 739 /* 740 * To support metadata register copy on Tx loopback, 741 * this must be always enabled (metadata may arive 742 * from other port - not from local flows only. 743 */ 744 if (priv->config.dv_flow_en && 745 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 746 mlx5_flow_ext_mreg_supported(dev)) { 747 rxq_ctrl->rxq.mark = 1; 748 rxq_ctrl->flow_mark_n = 1; 749 } else if (mark) { 750 rxq_ctrl->rxq.mark = 1; 751 rxq_ctrl->flow_mark_n++; 752 } 753 if (tunnel) { 754 unsigned int j; 755 756 /* Increase the counter matching the flow. */ 757 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 758 if ((tunnels_info[j].tunnel & 759 dev_handle->layers) == 760 tunnels_info[j].tunnel) { 761 rxq_ctrl->flow_tunnels_n[j]++; 762 break; 763 } 764 } 765 flow_rxq_tunnel_ptype_update(rxq_ctrl); 766 } 767 } 768 } 769 770 /** 771 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow 772 * 773 * @param[in] dev 774 * Pointer to the Ethernet device structure. 775 * @param[in] flow 776 * Pointer to flow structure. 777 */ 778 static void 779 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow) 780 { 781 struct mlx5_priv *priv = dev->data->dev_private; 782 uint32_t handle_idx; 783 struct mlx5_flow_handle *dev_handle; 784 785 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 786 handle_idx, dev_handle, next) 787 flow_drv_rxq_flags_set(dev, dev_handle); 788 } 789 790 /** 791 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 792 * device flow if no other flow uses it with the same kind of request. 793 * 794 * @param dev 795 * Pointer to Ethernet device. 796 * @param[in] dev_handle 797 * Pointer to the device flow handle structure. 798 */ 799 static void 800 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, 801 struct mlx5_flow_handle *dev_handle) 802 { 803 struct mlx5_priv *priv = dev->data->dev_private; 804 const int mark = dev_handle->mark; 805 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 806 struct mlx5_hrxq *hrxq; 807 unsigned int i; 808 809 if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) 810 return; 811 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 812 dev_handle->rix_hrxq); 813 if (!hrxq) 814 return; 815 MLX5_ASSERT(dev->data->dev_started); 816 for (i = 0; i != hrxq->ind_table->queues_n; ++i) { 817 int idx = hrxq->ind_table->queues[i]; 818 struct mlx5_rxq_ctrl *rxq_ctrl = 819 container_of((*priv->rxqs)[idx], 820 struct mlx5_rxq_ctrl, rxq); 821 822 if (priv->config.dv_flow_en && 823 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 824 mlx5_flow_ext_mreg_supported(dev)) { 825 rxq_ctrl->rxq.mark = 1; 826 rxq_ctrl->flow_mark_n = 1; 827 } else if (mark) { 828 rxq_ctrl->flow_mark_n--; 829 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n; 830 } 831 if (tunnel) { 832 unsigned int j; 833 834 /* Decrease the counter matching the flow. */ 835 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 836 if ((tunnels_info[j].tunnel & 837 dev_handle->layers) == 838 tunnels_info[j].tunnel) { 839 rxq_ctrl->flow_tunnels_n[j]--; 840 break; 841 } 842 } 843 flow_rxq_tunnel_ptype_update(rxq_ctrl); 844 } 845 } 846 } 847 848 /** 849 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 850 * @p flow if no other flow uses it with the same kind of request. 851 * 852 * @param dev 853 * Pointer to Ethernet device. 854 * @param[in] flow 855 * Pointer to the flow. 856 */ 857 static void 858 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow) 859 { 860 struct mlx5_priv *priv = dev->data->dev_private; 861 uint32_t handle_idx; 862 struct mlx5_flow_handle *dev_handle; 863 864 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 865 handle_idx, dev_handle, next) 866 flow_drv_rxq_flags_trim(dev, dev_handle); 867 } 868 869 /** 870 * Clear the Mark/Flag and Tunnel ptype information in all Rx queues. 871 * 872 * @param dev 873 * Pointer to Ethernet device. 874 */ 875 static void 876 flow_rxq_flags_clear(struct rte_eth_dev *dev) 877 { 878 struct mlx5_priv *priv = dev->data->dev_private; 879 unsigned int i; 880 881 for (i = 0; i != priv->rxqs_n; ++i) { 882 struct mlx5_rxq_ctrl *rxq_ctrl; 883 unsigned int j; 884 885 if (!(*priv->rxqs)[i]) 886 continue; 887 rxq_ctrl = container_of((*priv->rxqs)[i], 888 struct mlx5_rxq_ctrl, rxq); 889 rxq_ctrl->flow_mark_n = 0; 890 rxq_ctrl->rxq.mark = 0; 891 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) 892 rxq_ctrl->flow_tunnels_n[j] = 0; 893 rxq_ctrl->rxq.tunnel = 0; 894 } 895 } 896 897 /** 898 * Set the Rx queue dynamic metadata (mask and offset) for a flow 899 * 900 * @param[in] dev 901 * Pointer to the Ethernet device structure. 902 */ 903 void 904 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev) 905 { 906 struct mlx5_priv *priv = dev->data->dev_private; 907 struct mlx5_rxq_data *data; 908 unsigned int i; 909 910 for (i = 0; i != priv->rxqs_n; ++i) { 911 if (!(*priv->rxqs)[i]) 912 continue; 913 data = (*priv->rxqs)[i]; 914 if (!rte_flow_dynf_metadata_avail()) { 915 data->dynf_meta = 0; 916 data->flow_meta_mask = 0; 917 data->flow_meta_offset = -1; 918 } else { 919 data->dynf_meta = 1; 920 data->flow_meta_mask = rte_flow_dynf_metadata_mask; 921 data->flow_meta_offset = rte_flow_dynf_metadata_offs; 922 } 923 } 924 } 925 926 /* 927 * return a pointer to the desired action in the list of actions. 928 * 929 * @param[in] actions 930 * The list of actions to search the action in. 931 * @param[in] action 932 * The action to find. 933 * 934 * @return 935 * Pointer to the action in the list, if found. NULL otherwise. 936 */ 937 const struct rte_flow_action * 938 mlx5_flow_find_action(const struct rte_flow_action *actions, 939 enum rte_flow_action_type action) 940 { 941 if (actions == NULL) 942 return NULL; 943 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) 944 if (actions->type == action) 945 return actions; 946 return NULL; 947 } 948 949 /* 950 * Validate the flag action. 951 * 952 * @param[in] action_flags 953 * Bit-fields that holds the actions detected until now. 954 * @param[in] attr 955 * Attributes of flow that includes this action. 956 * @param[out] error 957 * Pointer to error structure. 958 * 959 * @return 960 * 0 on success, a negative errno value otherwise and rte_errno is set. 961 */ 962 int 963 mlx5_flow_validate_action_flag(uint64_t action_flags, 964 const struct rte_flow_attr *attr, 965 struct rte_flow_error *error) 966 { 967 if (action_flags & MLX5_FLOW_ACTION_MARK) 968 return rte_flow_error_set(error, EINVAL, 969 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 970 "can't mark and flag in same flow"); 971 if (action_flags & MLX5_FLOW_ACTION_FLAG) 972 return rte_flow_error_set(error, EINVAL, 973 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 974 "can't have 2 flag" 975 " actions in same flow"); 976 if (attr->egress) 977 return rte_flow_error_set(error, ENOTSUP, 978 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 979 "flag action not supported for " 980 "egress"); 981 return 0; 982 } 983 984 /* 985 * Validate the mark action. 986 * 987 * @param[in] action 988 * Pointer to the queue action. 989 * @param[in] action_flags 990 * Bit-fields that holds the actions detected until now. 991 * @param[in] attr 992 * Attributes of flow that includes this action. 993 * @param[out] error 994 * Pointer to error structure. 995 * 996 * @return 997 * 0 on success, a negative errno value otherwise and rte_errno is set. 998 */ 999 int 1000 mlx5_flow_validate_action_mark(const struct rte_flow_action *action, 1001 uint64_t action_flags, 1002 const struct rte_flow_attr *attr, 1003 struct rte_flow_error *error) 1004 { 1005 const struct rte_flow_action_mark *mark = action->conf; 1006 1007 if (!mark) 1008 return rte_flow_error_set(error, EINVAL, 1009 RTE_FLOW_ERROR_TYPE_ACTION, 1010 action, 1011 "configuration cannot be null"); 1012 if (mark->id >= MLX5_FLOW_MARK_MAX) 1013 return rte_flow_error_set(error, EINVAL, 1014 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1015 &mark->id, 1016 "mark id must in 0 <= id < " 1017 RTE_STR(MLX5_FLOW_MARK_MAX)); 1018 if (action_flags & MLX5_FLOW_ACTION_FLAG) 1019 return rte_flow_error_set(error, EINVAL, 1020 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1021 "can't flag and mark in same flow"); 1022 if (action_flags & MLX5_FLOW_ACTION_MARK) 1023 return rte_flow_error_set(error, EINVAL, 1024 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1025 "can't have 2 mark actions in same" 1026 " flow"); 1027 if (attr->egress) 1028 return rte_flow_error_set(error, ENOTSUP, 1029 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1030 "mark action not supported for " 1031 "egress"); 1032 return 0; 1033 } 1034 1035 /* 1036 * Validate the drop action. 1037 * 1038 * @param[in] action_flags 1039 * Bit-fields that holds the actions detected until now. 1040 * @param[in] attr 1041 * Attributes of flow that includes this action. 1042 * @param[out] error 1043 * Pointer to error structure. 1044 * 1045 * @return 1046 * 0 on success, a negative errno value otherwise and rte_errno is set. 1047 */ 1048 int 1049 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused, 1050 const struct rte_flow_attr *attr, 1051 struct rte_flow_error *error) 1052 { 1053 if (attr->egress) 1054 return rte_flow_error_set(error, ENOTSUP, 1055 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1056 "drop action not supported for " 1057 "egress"); 1058 return 0; 1059 } 1060 1061 /* 1062 * Validate the queue action. 1063 * 1064 * @param[in] action 1065 * Pointer to the queue action. 1066 * @param[in] action_flags 1067 * Bit-fields that holds the actions detected until now. 1068 * @param[in] dev 1069 * Pointer to the Ethernet device structure. 1070 * @param[in] attr 1071 * Attributes of flow that includes this action. 1072 * @param[out] error 1073 * Pointer to error structure. 1074 * 1075 * @return 1076 * 0 on success, a negative errno value otherwise and rte_errno is set. 1077 */ 1078 int 1079 mlx5_flow_validate_action_queue(const struct rte_flow_action *action, 1080 uint64_t action_flags, 1081 struct rte_eth_dev *dev, 1082 const struct rte_flow_attr *attr, 1083 struct rte_flow_error *error) 1084 { 1085 struct mlx5_priv *priv = dev->data->dev_private; 1086 const struct rte_flow_action_queue *queue = action->conf; 1087 1088 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1089 return rte_flow_error_set(error, EINVAL, 1090 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1091 "can't have 2 fate actions in" 1092 " same flow"); 1093 if (!priv->rxqs_n) 1094 return rte_flow_error_set(error, EINVAL, 1095 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1096 NULL, "No Rx queues configured"); 1097 if (queue->index >= priv->rxqs_n) 1098 return rte_flow_error_set(error, EINVAL, 1099 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1100 &queue->index, 1101 "queue index out of range"); 1102 if (!(*priv->rxqs)[queue->index]) 1103 return rte_flow_error_set(error, EINVAL, 1104 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1105 &queue->index, 1106 "queue is not configured"); 1107 if (attr->egress) 1108 return rte_flow_error_set(error, ENOTSUP, 1109 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1110 "queue action not supported for " 1111 "egress"); 1112 return 0; 1113 } 1114 1115 /* 1116 * Validate the rss action. 1117 * 1118 * @param[in] action 1119 * Pointer to the queue action. 1120 * @param[in] action_flags 1121 * Bit-fields that holds the actions detected until now. 1122 * @param[in] dev 1123 * Pointer to the Ethernet device structure. 1124 * @param[in] attr 1125 * Attributes of flow that includes this action. 1126 * @param[in] item_flags 1127 * Items that were detected. 1128 * @param[out] error 1129 * Pointer to error structure. 1130 * 1131 * @return 1132 * 0 on success, a negative errno value otherwise and rte_errno is set. 1133 */ 1134 int 1135 mlx5_flow_validate_action_rss(const struct rte_flow_action *action, 1136 uint64_t action_flags, 1137 struct rte_eth_dev *dev, 1138 const struct rte_flow_attr *attr, 1139 uint64_t item_flags, 1140 struct rte_flow_error *error) 1141 { 1142 struct mlx5_priv *priv = dev->data->dev_private; 1143 const struct rte_flow_action_rss *rss = action->conf; 1144 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1145 unsigned int i; 1146 1147 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1148 return rte_flow_error_set(error, EINVAL, 1149 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1150 "can't have 2 fate actions" 1151 " in same flow"); 1152 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT && 1153 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) 1154 return rte_flow_error_set(error, ENOTSUP, 1155 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1156 &rss->func, 1157 "RSS hash function not supported"); 1158 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 1159 if (rss->level > 2) 1160 #else 1161 if (rss->level > 1) 1162 #endif 1163 return rte_flow_error_set(error, ENOTSUP, 1164 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1165 &rss->level, 1166 "tunnel RSS is not supported"); 1167 /* allow RSS key_len 0 in case of NULL (default) RSS key. */ 1168 if (rss->key_len == 0 && rss->key != NULL) 1169 return rte_flow_error_set(error, ENOTSUP, 1170 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1171 &rss->key_len, 1172 "RSS hash key length 0"); 1173 if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN) 1174 return rte_flow_error_set(error, ENOTSUP, 1175 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1176 &rss->key_len, 1177 "RSS hash key too small"); 1178 if (rss->key_len > MLX5_RSS_HASH_KEY_LEN) 1179 return rte_flow_error_set(error, ENOTSUP, 1180 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1181 &rss->key_len, 1182 "RSS hash key too large"); 1183 if (rss->queue_num > priv->config.ind_table_max_size) 1184 return rte_flow_error_set(error, ENOTSUP, 1185 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1186 &rss->queue_num, 1187 "number of queues too large"); 1188 if (rss->types & MLX5_RSS_HF_MASK) 1189 return rte_flow_error_set(error, ENOTSUP, 1190 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1191 &rss->types, 1192 "some RSS protocols are not" 1193 " supported"); 1194 if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) && 1195 !(rss->types & ETH_RSS_IP)) 1196 return rte_flow_error_set(error, EINVAL, 1197 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1198 "L3 partial RSS requested but L3 RSS" 1199 " type not specified"); 1200 if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) && 1201 !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP))) 1202 return rte_flow_error_set(error, EINVAL, 1203 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1204 "L4 partial RSS requested but L4 RSS" 1205 " type not specified"); 1206 if (!priv->rxqs_n) 1207 return rte_flow_error_set(error, EINVAL, 1208 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1209 NULL, "No Rx queues configured"); 1210 if (!rss->queue_num) 1211 return rte_flow_error_set(error, EINVAL, 1212 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1213 NULL, "No queues configured"); 1214 for (i = 0; i != rss->queue_num; ++i) { 1215 if (rss->queue[i] >= priv->rxqs_n) 1216 return rte_flow_error_set 1217 (error, EINVAL, 1218 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1219 &rss->queue[i], "queue index out of range"); 1220 if (!(*priv->rxqs)[rss->queue[i]]) 1221 return rte_flow_error_set 1222 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1223 &rss->queue[i], "queue is not configured"); 1224 } 1225 if (attr->egress) 1226 return rte_flow_error_set(error, ENOTSUP, 1227 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1228 "rss action not supported for " 1229 "egress"); 1230 if (rss->level > 1 && !tunnel) 1231 return rte_flow_error_set(error, EINVAL, 1232 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1233 "inner RSS is not supported for " 1234 "non-tunnel flows"); 1235 return 0; 1236 } 1237 1238 /* 1239 * Validate the default miss action. 1240 * 1241 * @param[in] action_flags 1242 * Bit-fields that holds the actions detected until now. 1243 * @param[out] error 1244 * Pointer to error structure. 1245 * 1246 * @return 1247 * 0 on success, a negative errno value otherwise and rte_errno is set. 1248 */ 1249 int 1250 mlx5_flow_validate_action_default_miss(uint64_t action_flags, 1251 const struct rte_flow_attr *attr, 1252 struct rte_flow_error *error) 1253 { 1254 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1255 return rte_flow_error_set(error, EINVAL, 1256 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1257 "can't have 2 fate actions in" 1258 " same flow"); 1259 if (attr->egress) 1260 return rte_flow_error_set(error, ENOTSUP, 1261 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1262 "default miss action not supported " 1263 "for egress"); 1264 if (attr->group) 1265 return rte_flow_error_set(error, ENOTSUP, 1266 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL, 1267 "only group 0 is supported"); 1268 if (attr->transfer) 1269 return rte_flow_error_set(error, ENOTSUP, 1270 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1271 NULL, "transfer is not supported"); 1272 return 0; 1273 } 1274 1275 /* 1276 * Validate the count action. 1277 * 1278 * @param[in] dev 1279 * Pointer to the Ethernet device structure. 1280 * @param[in] attr 1281 * Attributes of flow that includes this action. 1282 * @param[out] error 1283 * Pointer to error structure. 1284 * 1285 * @return 1286 * 0 on success, a negative errno value otherwise and rte_errno is set. 1287 */ 1288 int 1289 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused, 1290 const struct rte_flow_attr *attr, 1291 struct rte_flow_error *error) 1292 { 1293 if (attr->egress) 1294 return rte_flow_error_set(error, ENOTSUP, 1295 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1296 "count action not supported for " 1297 "egress"); 1298 return 0; 1299 } 1300 1301 /** 1302 * Verify the @p attributes will be correctly understood by the NIC and store 1303 * them in the @p flow if everything is correct. 1304 * 1305 * @param[in] dev 1306 * Pointer to the Ethernet device structure. 1307 * @param[in] attributes 1308 * Pointer to flow attributes 1309 * @param[out] error 1310 * Pointer to error structure. 1311 * 1312 * @return 1313 * 0 on success, a negative errno value otherwise and rte_errno is set. 1314 */ 1315 int 1316 mlx5_flow_validate_attributes(struct rte_eth_dev *dev, 1317 const struct rte_flow_attr *attributes, 1318 struct rte_flow_error *error) 1319 { 1320 struct mlx5_priv *priv = dev->data->dev_private; 1321 uint32_t priority_max = priv->config.flow_prio - 1; 1322 1323 if (attributes->group) 1324 return rte_flow_error_set(error, ENOTSUP, 1325 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 1326 NULL, "groups is not supported"); 1327 if (attributes->priority != MLX5_FLOW_PRIO_RSVD && 1328 attributes->priority >= priority_max) 1329 return rte_flow_error_set(error, ENOTSUP, 1330 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 1331 NULL, "priority out of range"); 1332 if (attributes->egress) 1333 return rte_flow_error_set(error, ENOTSUP, 1334 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1335 "egress is not supported"); 1336 if (attributes->transfer && !priv->config.dv_esw_en) 1337 return rte_flow_error_set(error, ENOTSUP, 1338 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1339 NULL, "transfer is not supported"); 1340 if (!attributes->ingress) 1341 return rte_flow_error_set(error, EINVAL, 1342 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, 1343 NULL, 1344 "ingress attribute is mandatory"); 1345 return 0; 1346 } 1347 1348 /** 1349 * Validate ICMP6 item. 1350 * 1351 * @param[in] item 1352 * Item specification. 1353 * @param[in] item_flags 1354 * Bit-fields that holds the items detected until now. 1355 * @param[out] error 1356 * Pointer to error structure. 1357 * 1358 * @return 1359 * 0 on success, a negative errno value otherwise and rte_errno is set. 1360 */ 1361 int 1362 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item, 1363 uint64_t item_flags, 1364 uint8_t target_protocol, 1365 struct rte_flow_error *error) 1366 { 1367 const struct rte_flow_item_icmp6 *mask = item->mask; 1368 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1369 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : 1370 MLX5_FLOW_LAYER_OUTER_L3_IPV6; 1371 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1372 MLX5_FLOW_LAYER_OUTER_L4; 1373 int ret; 1374 1375 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6) 1376 return rte_flow_error_set(error, EINVAL, 1377 RTE_FLOW_ERROR_TYPE_ITEM, item, 1378 "protocol filtering not compatible" 1379 " with ICMP6 layer"); 1380 if (!(item_flags & l3m)) 1381 return rte_flow_error_set(error, EINVAL, 1382 RTE_FLOW_ERROR_TYPE_ITEM, item, 1383 "IPv6 is mandatory to filter on" 1384 " ICMP6"); 1385 if (item_flags & l4m) 1386 return rte_flow_error_set(error, EINVAL, 1387 RTE_FLOW_ERROR_TYPE_ITEM, item, 1388 "multiple L4 layers not supported"); 1389 if (!mask) 1390 mask = &rte_flow_item_icmp6_mask; 1391 ret = mlx5_flow_item_acceptable 1392 (item, (const uint8_t *)mask, 1393 (const uint8_t *)&rte_flow_item_icmp6_mask, 1394 sizeof(struct rte_flow_item_icmp6), error); 1395 if (ret < 0) 1396 return ret; 1397 return 0; 1398 } 1399 1400 /** 1401 * Validate ICMP item. 1402 * 1403 * @param[in] item 1404 * Item specification. 1405 * @param[in] item_flags 1406 * Bit-fields that holds the items detected until now. 1407 * @param[out] error 1408 * Pointer to error structure. 1409 * 1410 * @return 1411 * 0 on success, a negative errno value otherwise and rte_errno is set. 1412 */ 1413 int 1414 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item, 1415 uint64_t item_flags, 1416 uint8_t target_protocol, 1417 struct rte_flow_error *error) 1418 { 1419 const struct rte_flow_item_icmp *mask = item->mask; 1420 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1421 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : 1422 MLX5_FLOW_LAYER_OUTER_L3_IPV4; 1423 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1424 MLX5_FLOW_LAYER_OUTER_L4; 1425 int ret; 1426 1427 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP) 1428 return rte_flow_error_set(error, EINVAL, 1429 RTE_FLOW_ERROR_TYPE_ITEM, item, 1430 "protocol filtering not compatible" 1431 " with ICMP layer"); 1432 if (!(item_flags & l3m)) 1433 return rte_flow_error_set(error, EINVAL, 1434 RTE_FLOW_ERROR_TYPE_ITEM, item, 1435 "IPv4 is mandatory to filter" 1436 " on ICMP"); 1437 if (item_flags & l4m) 1438 return rte_flow_error_set(error, EINVAL, 1439 RTE_FLOW_ERROR_TYPE_ITEM, item, 1440 "multiple L4 layers not supported"); 1441 if (!mask) 1442 mask = &rte_flow_item_icmp_mask; 1443 ret = mlx5_flow_item_acceptable 1444 (item, (const uint8_t *)mask, 1445 (const uint8_t *)&rte_flow_item_icmp_mask, 1446 sizeof(struct rte_flow_item_icmp), error); 1447 if (ret < 0) 1448 return ret; 1449 return 0; 1450 } 1451 1452 /** 1453 * Validate Ethernet item. 1454 * 1455 * @param[in] item 1456 * Item specification. 1457 * @param[in] item_flags 1458 * Bit-fields that holds the items detected until now. 1459 * @param[out] error 1460 * Pointer to error structure. 1461 * 1462 * @return 1463 * 0 on success, a negative errno value otherwise and rte_errno is set. 1464 */ 1465 int 1466 mlx5_flow_validate_item_eth(const struct rte_flow_item *item, 1467 uint64_t item_flags, 1468 struct rte_flow_error *error) 1469 { 1470 const struct rte_flow_item_eth *mask = item->mask; 1471 const struct rte_flow_item_eth nic_mask = { 1472 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1473 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1474 .type = RTE_BE16(0xffff), 1475 }; 1476 int ret; 1477 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1478 const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 1479 MLX5_FLOW_LAYER_OUTER_L2; 1480 1481 if (item_flags & ethm) 1482 return rte_flow_error_set(error, ENOTSUP, 1483 RTE_FLOW_ERROR_TYPE_ITEM, item, 1484 "multiple L2 layers not supported"); 1485 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) || 1486 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3))) 1487 return rte_flow_error_set(error, EINVAL, 1488 RTE_FLOW_ERROR_TYPE_ITEM, item, 1489 "L2 layer should not follow " 1490 "L3 layers"); 1491 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) || 1492 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN))) 1493 return rte_flow_error_set(error, EINVAL, 1494 RTE_FLOW_ERROR_TYPE_ITEM, item, 1495 "L2 layer should not follow VLAN"); 1496 if (!mask) 1497 mask = &rte_flow_item_eth_mask; 1498 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1499 (const uint8_t *)&nic_mask, 1500 sizeof(struct rte_flow_item_eth), 1501 error); 1502 return ret; 1503 } 1504 1505 /** 1506 * Validate VLAN item. 1507 * 1508 * @param[in] item 1509 * Item specification. 1510 * @param[in] item_flags 1511 * Bit-fields that holds the items detected until now. 1512 * @param[in] dev 1513 * Ethernet device flow is being created on. 1514 * @param[out] error 1515 * Pointer to error structure. 1516 * 1517 * @return 1518 * 0 on success, a negative errno value otherwise and rte_errno is set. 1519 */ 1520 int 1521 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item, 1522 uint64_t item_flags, 1523 struct rte_eth_dev *dev, 1524 struct rte_flow_error *error) 1525 { 1526 const struct rte_flow_item_vlan *spec = item->spec; 1527 const struct rte_flow_item_vlan *mask = item->mask; 1528 const struct rte_flow_item_vlan nic_mask = { 1529 .tci = RTE_BE16(UINT16_MAX), 1530 .inner_type = RTE_BE16(UINT16_MAX), 1531 }; 1532 uint16_t vlan_tag = 0; 1533 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1534 int ret; 1535 const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | 1536 MLX5_FLOW_LAYER_INNER_L4) : 1537 (MLX5_FLOW_LAYER_OUTER_L3 | 1538 MLX5_FLOW_LAYER_OUTER_L4); 1539 const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : 1540 MLX5_FLOW_LAYER_OUTER_VLAN; 1541 1542 if (item_flags & vlanm) 1543 return rte_flow_error_set(error, EINVAL, 1544 RTE_FLOW_ERROR_TYPE_ITEM, item, 1545 "multiple VLAN layers not supported"); 1546 else if ((item_flags & l34m) != 0) 1547 return rte_flow_error_set(error, EINVAL, 1548 RTE_FLOW_ERROR_TYPE_ITEM, item, 1549 "VLAN cannot follow L3/L4 layer"); 1550 if (!mask) 1551 mask = &rte_flow_item_vlan_mask; 1552 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1553 (const uint8_t *)&nic_mask, 1554 sizeof(struct rte_flow_item_vlan), 1555 error); 1556 if (ret) 1557 return ret; 1558 if (!tunnel && mask->tci != RTE_BE16(0x0fff)) { 1559 struct mlx5_priv *priv = dev->data->dev_private; 1560 1561 if (priv->vmwa_context) { 1562 /* 1563 * Non-NULL context means we have a virtual machine 1564 * and SR-IOV enabled, we have to create VLAN interface 1565 * to make hypervisor to setup E-Switch vport 1566 * context correctly. We avoid creating the multiple 1567 * VLAN interfaces, so we cannot support VLAN tag mask. 1568 */ 1569 return rte_flow_error_set(error, EINVAL, 1570 RTE_FLOW_ERROR_TYPE_ITEM, 1571 item, 1572 "VLAN tag mask is not" 1573 " supported in virtual" 1574 " environment"); 1575 } 1576 } 1577 if (spec) { 1578 vlan_tag = spec->tci; 1579 vlan_tag &= mask->tci; 1580 } 1581 /* 1582 * From verbs perspective an empty VLAN is equivalent 1583 * to a packet without VLAN layer. 1584 */ 1585 if (!vlan_tag) 1586 return rte_flow_error_set(error, EINVAL, 1587 RTE_FLOW_ERROR_TYPE_ITEM_SPEC, 1588 item->spec, 1589 "VLAN cannot be empty"); 1590 return 0; 1591 } 1592 1593 /** 1594 * Validate IPV4 item. 1595 * 1596 * @param[in] item 1597 * Item specification. 1598 * @param[in] item_flags 1599 * Bit-fields that holds the items detected until now. 1600 * @param[in] acc_mask 1601 * Acceptable mask, if NULL default internal default mask 1602 * will be used to check whether item fields are supported. 1603 * @param[out] error 1604 * Pointer to error structure. 1605 * 1606 * @return 1607 * 0 on success, a negative errno value otherwise and rte_errno is set. 1608 */ 1609 int 1610 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, 1611 uint64_t item_flags, 1612 uint64_t last_item, 1613 uint16_t ether_type, 1614 const struct rte_flow_item_ipv4 *acc_mask, 1615 struct rte_flow_error *error) 1616 { 1617 const struct rte_flow_item_ipv4 *mask = item->mask; 1618 const struct rte_flow_item_ipv4 *spec = item->spec; 1619 const struct rte_flow_item_ipv4 nic_mask = { 1620 .hdr = { 1621 .src_addr = RTE_BE32(0xffffffff), 1622 .dst_addr = RTE_BE32(0xffffffff), 1623 .type_of_service = 0xff, 1624 .next_proto_id = 0xff, 1625 }, 1626 }; 1627 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1628 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1629 MLX5_FLOW_LAYER_OUTER_L3; 1630 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1631 MLX5_FLOW_LAYER_OUTER_L4; 1632 int ret; 1633 uint8_t next_proto = 0xFF; 1634 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 1635 MLX5_FLOW_LAYER_OUTER_VLAN | 1636 MLX5_FLOW_LAYER_INNER_VLAN); 1637 1638 if ((last_item & l2_vlan) && ether_type && 1639 ether_type != RTE_ETHER_TYPE_IPV4) 1640 return rte_flow_error_set(error, EINVAL, 1641 RTE_FLOW_ERROR_TYPE_ITEM, item, 1642 "IPv4 cannot follow L2/VLAN layer " 1643 "which ether type is not IPv4"); 1644 if (item_flags & MLX5_FLOW_LAYER_IPIP) { 1645 if (mask && spec) 1646 next_proto = mask->hdr.next_proto_id & 1647 spec->hdr.next_proto_id; 1648 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 1649 return rte_flow_error_set(error, EINVAL, 1650 RTE_FLOW_ERROR_TYPE_ITEM, 1651 item, 1652 "multiple tunnel " 1653 "not supported"); 1654 } 1655 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) 1656 return rte_flow_error_set(error, EINVAL, 1657 RTE_FLOW_ERROR_TYPE_ITEM, item, 1658 "wrong tunnel type - IPv6 specified " 1659 "but IPv4 item provided"); 1660 if (item_flags & l3m) 1661 return rte_flow_error_set(error, ENOTSUP, 1662 RTE_FLOW_ERROR_TYPE_ITEM, item, 1663 "multiple L3 layers not supported"); 1664 else if (item_flags & l4m) 1665 return rte_flow_error_set(error, EINVAL, 1666 RTE_FLOW_ERROR_TYPE_ITEM, item, 1667 "L3 cannot follow an L4 layer."); 1668 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 1669 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 1670 return rte_flow_error_set(error, EINVAL, 1671 RTE_FLOW_ERROR_TYPE_ITEM, item, 1672 "L3 cannot follow an NVGRE layer."); 1673 if (!mask) 1674 mask = &rte_flow_item_ipv4_mask; 1675 else if (mask->hdr.next_proto_id != 0 && 1676 mask->hdr.next_proto_id != 0xff) 1677 return rte_flow_error_set(error, EINVAL, 1678 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 1679 "partial mask is not supported" 1680 " for protocol"); 1681 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1682 acc_mask ? (const uint8_t *)acc_mask 1683 : (const uint8_t *)&nic_mask, 1684 sizeof(struct rte_flow_item_ipv4), 1685 error); 1686 if (ret < 0) 1687 return ret; 1688 return 0; 1689 } 1690 1691 /** 1692 * Validate IPV6 item. 1693 * 1694 * @param[in] item 1695 * Item specification. 1696 * @param[in] item_flags 1697 * Bit-fields that holds the items detected until now. 1698 * @param[in] acc_mask 1699 * Acceptable mask, if NULL default internal default mask 1700 * will be used to check whether item fields are supported. 1701 * @param[out] error 1702 * Pointer to error structure. 1703 * 1704 * @return 1705 * 0 on success, a negative errno value otherwise and rte_errno is set. 1706 */ 1707 int 1708 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, 1709 uint64_t item_flags, 1710 uint64_t last_item, 1711 uint16_t ether_type, 1712 const struct rte_flow_item_ipv6 *acc_mask, 1713 struct rte_flow_error *error) 1714 { 1715 const struct rte_flow_item_ipv6 *mask = item->mask; 1716 const struct rte_flow_item_ipv6 *spec = item->spec; 1717 const struct rte_flow_item_ipv6 nic_mask = { 1718 .hdr = { 1719 .src_addr = 1720 "\xff\xff\xff\xff\xff\xff\xff\xff" 1721 "\xff\xff\xff\xff\xff\xff\xff\xff", 1722 .dst_addr = 1723 "\xff\xff\xff\xff\xff\xff\xff\xff" 1724 "\xff\xff\xff\xff\xff\xff\xff\xff", 1725 .vtc_flow = RTE_BE32(0xffffffff), 1726 .proto = 0xff, 1727 }, 1728 }; 1729 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1730 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1731 MLX5_FLOW_LAYER_OUTER_L3; 1732 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1733 MLX5_FLOW_LAYER_OUTER_L4; 1734 int ret; 1735 uint8_t next_proto = 0xFF; 1736 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 1737 MLX5_FLOW_LAYER_OUTER_VLAN | 1738 MLX5_FLOW_LAYER_INNER_VLAN); 1739 1740 if ((last_item & l2_vlan) && ether_type && 1741 ether_type != RTE_ETHER_TYPE_IPV6) 1742 return rte_flow_error_set(error, EINVAL, 1743 RTE_FLOW_ERROR_TYPE_ITEM, item, 1744 "IPv6 cannot follow L2/VLAN layer " 1745 "which ether type is not IPv6"); 1746 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) { 1747 if (mask && spec) 1748 next_proto = mask->hdr.proto & spec->hdr.proto; 1749 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 1750 return rte_flow_error_set(error, EINVAL, 1751 RTE_FLOW_ERROR_TYPE_ITEM, 1752 item, 1753 "multiple tunnel " 1754 "not supported"); 1755 } 1756 if (item_flags & MLX5_FLOW_LAYER_IPIP) 1757 return rte_flow_error_set(error, EINVAL, 1758 RTE_FLOW_ERROR_TYPE_ITEM, item, 1759 "wrong tunnel type - IPv4 specified " 1760 "but IPv6 item provided"); 1761 if (item_flags & l3m) 1762 return rte_flow_error_set(error, ENOTSUP, 1763 RTE_FLOW_ERROR_TYPE_ITEM, item, 1764 "multiple L3 layers not supported"); 1765 else if (item_flags & l4m) 1766 return rte_flow_error_set(error, EINVAL, 1767 RTE_FLOW_ERROR_TYPE_ITEM, item, 1768 "L3 cannot follow an L4 layer."); 1769 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 1770 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 1771 return rte_flow_error_set(error, EINVAL, 1772 RTE_FLOW_ERROR_TYPE_ITEM, item, 1773 "L3 cannot follow an NVGRE layer."); 1774 if (!mask) 1775 mask = &rte_flow_item_ipv6_mask; 1776 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1777 acc_mask ? (const uint8_t *)acc_mask 1778 : (const uint8_t *)&nic_mask, 1779 sizeof(struct rte_flow_item_ipv6), 1780 error); 1781 if (ret < 0) 1782 return ret; 1783 return 0; 1784 } 1785 1786 /** 1787 * Validate UDP item. 1788 * 1789 * @param[in] item 1790 * Item specification. 1791 * @param[in] item_flags 1792 * Bit-fields that holds the items detected until now. 1793 * @param[in] target_protocol 1794 * The next protocol in the previous item. 1795 * @param[in] flow_mask 1796 * mlx5 flow-specific (DV, verbs, etc.) supported header fields mask. 1797 * @param[out] error 1798 * Pointer to error structure. 1799 * 1800 * @return 1801 * 0 on success, a negative errno value otherwise and rte_errno is set. 1802 */ 1803 int 1804 mlx5_flow_validate_item_udp(const struct rte_flow_item *item, 1805 uint64_t item_flags, 1806 uint8_t target_protocol, 1807 struct rte_flow_error *error) 1808 { 1809 const struct rte_flow_item_udp *mask = item->mask; 1810 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1811 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1812 MLX5_FLOW_LAYER_OUTER_L3; 1813 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1814 MLX5_FLOW_LAYER_OUTER_L4; 1815 int ret; 1816 1817 if (target_protocol != 0xff && target_protocol != IPPROTO_UDP) 1818 return rte_flow_error_set(error, EINVAL, 1819 RTE_FLOW_ERROR_TYPE_ITEM, item, 1820 "protocol filtering not compatible" 1821 " with UDP layer"); 1822 if (!(item_flags & l3m)) 1823 return rte_flow_error_set(error, EINVAL, 1824 RTE_FLOW_ERROR_TYPE_ITEM, item, 1825 "L3 is mandatory to filter on L4"); 1826 if (item_flags & l4m) 1827 return rte_flow_error_set(error, EINVAL, 1828 RTE_FLOW_ERROR_TYPE_ITEM, item, 1829 "multiple L4 layers not supported"); 1830 if (!mask) 1831 mask = &rte_flow_item_udp_mask; 1832 ret = mlx5_flow_item_acceptable 1833 (item, (const uint8_t *)mask, 1834 (const uint8_t *)&rte_flow_item_udp_mask, 1835 sizeof(struct rte_flow_item_udp), error); 1836 if (ret < 0) 1837 return ret; 1838 return 0; 1839 } 1840 1841 /** 1842 * Validate TCP item. 1843 * 1844 * @param[in] item 1845 * Item specification. 1846 * @param[in] item_flags 1847 * Bit-fields that holds the items detected until now. 1848 * @param[in] target_protocol 1849 * The next protocol in the previous item. 1850 * @param[out] error 1851 * Pointer to error structure. 1852 * 1853 * @return 1854 * 0 on success, a negative errno value otherwise and rte_errno is set. 1855 */ 1856 int 1857 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item, 1858 uint64_t item_flags, 1859 uint8_t target_protocol, 1860 const struct rte_flow_item_tcp *flow_mask, 1861 struct rte_flow_error *error) 1862 { 1863 const struct rte_flow_item_tcp *mask = item->mask; 1864 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1865 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1866 MLX5_FLOW_LAYER_OUTER_L3; 1867 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1868 MLX5_FLOW_LAYER_OUTER_L4; 1869 int ret; 1870 1871 MLX5_ASSERT(flow_mask); 1872 if (target_protocol != 0xff && target_protocol != IPPROTO_TCP) 1873 return rte_flow_error_set(error, EINVAL, 1874 RTE_FLOW_ERROR_TYPE_ITEM, item, 1875 "protocol filtering not compatible" 1876 " with TCP layer"); 1877 if (!(item_flags & l3m)) 1878 return rte_flow_error_set(error, EINVAL, 1879 RTE_FLOW_ERROR_TYPE_ITEM, item, 1880 "L3 is mandatory to filter on L4"); 1881 if (item_flags & l4m) 1882 return rte_flow_error_set(error, EINVAL, 1883 RTE_FLOW_ERROR_TYPE_ITEM, item, 1884 "multiple L4 layers not supported"); 1885 if (!mask) 1886 mask = &rte_flow_item_tcp_mask; 1887 ret = mlx5_flow_item_acceptable 1888 (item, (const uint8_t *)mask, 1889 (const uint8_t *)flow_mask, 1890 sizeof(struct rte_flow_item_tcp), error); 1891 if (ret < 0) 1892 return ret; 1893 return 0; 1894 } 1895 1896 /** 1897 * Validate VXLAN item. 1898 * 1899 * @param[in] item 1900 * Item specification. 1901 * @param[in] item_flags 1902 * Bit-fields that holds the items detected until now. 1903 * @param[in] target_protocol 1904 * The next protocol in the previous item. 1905 * @param[out] error 1906 * Pointer to error structure. 1907 * 1908 * @return 1909 * 0 on success, a negative errno value otherwise and rte_errno is set. 1910 */ 1911 int 1912 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item, 1913 uint64_t item_flags, 1914 struct rte_flow_error *error) 1915 { 1916 const struct rte_flow_item_vxlan *spec = item->spec; 1917 const struct rte_flow_item_vxlan *mask = item->mask; 1918 int ret; 1919 union vni { 1920 uint32_t vlan_id; 1921 uint8_t vni[4]; 1922 } id = { .vlan_id = 0, }; 1923 1924 1925 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 1926 return rte_flow_error_set(error, ENOTSUP, 1927 RTE_FLOW_ERROR_TYPE_ITEM, item, 1928 "multiple tunnel layers not" 1929 " supported"); 1930 /* 1931 * Verify only UDPv4 is present as defined in 1932 * https://tools.ietf.org/html/rfc7348 1933 */ 1934 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 1935 return rte_flow_error_set(error, EINVAL, 1936 RTE_FLOW_ERROR_TYPE_ITEM, item, 1937 "no outer UDP layer found"); 1938 if (!mask) 1939 mask = &rte_flow_item_vxlan_mask; 1940 ret = mlx5_flow_item_acceptable 1941 (item, (const uint8_t *)mask, 1942 (const uint8_t *)&rte_flow_item_vxlan_mask, 1943 sizeof(struct rte_flow_item_vxlan), 1944 error); 1945 if (ret < 0) 1946 return ret; 1947 if (spec) { 1948 memcpy(&id.vni[1], spec->vni, 3); 1949 memcpy(&id.vni[1], mask->vni, 3); 1950 } 1951 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 1952 return rte_flow_error_set(error, ENOTSUP, 1953 RTE_FLOW_ERROR_TYPE_ITEM, item, 1954 "VXLAN tunnel must be fully defined"); 1955 return 0; 1956 } 1957 1958 /** 1959 * Validate VXLAN_GPE item. 1960 * 1961 * @param[in] item 1962 * Item specification. 1963 * @param[in] item_flags 1964 * Bit-fields that holds the items detected until now. 1965 * @param[in] priv 1966 * Pointer to the private data structure. 1967 * @param[in] target_protocol 1968 * The next protocol in the previous item. 1969 * @param[out] error 1970 * Pointer to error structure. 1971 * 1972 * @return 1973 * 0 on success, a negative errno value otherwise and rte_errno is set. 1974 */ 1975 int 1976 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item, 1977 uint64_t item_flags, 1978 struct rte_eth_dev *dev, 1979 struct rte_flow_error *error) 1980 { 1981 struct mlx5_priv *priv = dev->data->dev_private; 1982 const struct rte_flow_item_vxlan_gpe *spec = item->spec; 1983 const struct rte_flow_item_vxlan_gpe *mask = item->mask; 1984 int ret; 1985 union vni { 1986 uint32_t vlan_id; 1987 uint8_t vni[4]; 1988 } id = { .vlan_id = 0, }; 1989 1990 if (!priv->config.l3_vxlan_en) 1991 return rte_flow_error_set(error, ENOTSUP, 1992 RTE_FLOW_ERROR_TYPE_ITEM, item, 1993 "L3 VXLAN is not enabled by device" 1994 " parameter and/or not configured in" 1995 " firmware"); 1996 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 1997 return rte_flow_error_set(error, ENOTSUP, 1998 RTE_FLOW_ERROR_TYPE_ITEM, item, 1999 "multiple tunnel layers not" 2000 " supported"); 2001 /* 2002 * Verify only UDPv4 is present as defined in 2003 * https://tools.ietf.org/html/rfc7348 2004 */ 2005 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2006 return rte_flow_error_set(error, EINVAL, 2007 RTE_FLOW_ERROR_TYPE_ITEM, item, 2008 "no outer UDP layer found"); 2009 if (!mask) 2010 mask = &rte_flow_item_vxlan_gpe_mask; 2011 ret = mlx5_flow_item_acceptable 2012 (item, (const uint8_t *)mask, 2013 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask, 2014 sizeof(struct rte_flow_item_vxlan_gpe), 2015 error); 2016 if (ret < 0) 2017 return ret; 2018 if (spec) { 2019 if (spec->protocol) 2020 return rte_flow_error_set(error, ENOTSUP, 2021 RTE_FLOW_ERROR_TYPE_ITEM, 2022 item, 2023 "VxLAN-GPE protocol" 2024 " not supported"); 2025 memcpy(&id.vni[1], spec->vni, 3); 2026 memcpy(&id.vni[1], mask->vni, 3); 2027 } 2028 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2029 return rte_flow_error_set(error, ENOTSUP, 2030 RTE_FLOW_ERROR_TYPE_ITEM, item, 2031 "VXLAN-GPE tunnel must be fully" 2032 " defined"); 2033 return 0; 2034 } 2035 /** 2036 * Validate GRE Key item. 2037 * 2038 * @param[in] item 2039 * Item specification. 2040 * @param[in] item_flags 2041 * Bit flags to mark detected items. 2042 * @param[in] gre_item 2043 * Pointer to gre_item 2044 * @param[out] error 2045 * Pointer to error structure. 2046 * 2047 * @return 2048 * 0 on success, a negative errno value otherwise and rte_errno is set. 2049 */ 2050 int 2051 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item, 2052 uint64_t item_flags, 2053 const struct rte_flow_item *gre_item, 2054 struct rte_flow_error *error) 2055 { 2056 const rte_be32_t *mask = item->mask; 2057 int ret = 0; 2058 rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX); 2059 const struct rte_flow_item_gre *gre_spec; 2060 const struct rte_flow_item_gre *gre_mask; 2061 2062 if (item_flags & MLX5_FLOW_LAYER_GRE_KEY) 2063 return rte_flow_error_set(error, ENOTSUP, 2064 RTE_FLOW_ERROR_TYPE_ITEM, item, 2065 "Multiple GRE key not support"); 2066 if (!(item_flags & MLX5_FLOW_LAYER_GRE)) 2067 return rte_flow_error_set(error, ENOTSUP, 2068 RTE_FLOW_ERROR_TYPE_ITEM, item, 2069 "No preceding GRE header"); 2070 if (item_flags & MLX5_FLOW_LAYER_INNER) 2071 return rte_flow_error_set(error, ENOTSUP, 2072 RTE_FLOW_ERROR_TYPE_ITEM, item, 2073 "GRE key following a wrong item"); 2074 gre_mask = gre_item->mask; 2075 if (!gre_mask) 2076 gre_mask = &rte_flow_item_gre_mask; 2077 gre_spec = gre_item->spec; 2078 if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) && 2079 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000))) 2080 return rte_flow_error_set(error, EINVAL, 2081 RTE_FLOW_ERROR_TYPE_ITEM, item, 2082 "Key bit must be on"); 2083 2084 if (!mask) 2085 mask = &gre_key_default_mask; 2086 ret = mlx5_flow_item_acceptable 2087 (item, (const uint8_t *)mask, 2088 (const uint8_t *)&gre_key_default_mask, 2089 sizeof(rte_be32_t), error); 2090 return ret; 2091 } 2092 2093 /** 2094 * Validate GRE item. 2095 * 2096 * @param[in] item 2097 * Item specification. 2098 * @param[in] item_flags 2099 * Bit flags to mark detected items. 2100 * @param[in] target_protocol 2101 * The next protocol in the previous item. 2102 * @param[out] error 2103 * Pointer to error structure. 2104 * 2105 * @return 2106 * 0 on success, a negative errno value otherwise and rte_errno is set. 2107 */ 2108 int 2109 mlx5_flow_validate_item_gre(const struct rte_flow_item *item, 2110 uint64_t item_flags, 2111 uint8_t target_protocol, 2112 struct rte_flow_error *error) 2113 { 2114 const struct rte_flow_item_gre *spec __rte_unused = item->spec; 2115 const struct rte_flow_item_gre *mask = item->mask; 2116 int ret; 2117 const struct rte_flow_item_gre nic_mask = { 2118 .c_rsvd0_ver = RTE_BE16(0xB000), 2119 .protocol = RTE_BE16(UINT16_MAX), 2120 }; 2121 2122 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2123 return rte_flow_error_set(error, EINVAL, 2124 RTE_FLOW_ERROR_TYPE_ITEM, item, 2125 "protocol filtering not compatible" 2126 " with this GRE layer"); 2127 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2128 return rte_flow_error_set(error, ENOTSUP, 2129 RTE_FLOW_ERROR_TYPE_ITEM, item, 2130 "multiple tunnel layers not" 2131 " supported"); 2132 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2133 return rte_flow_error_set(error, ENOTSUP, 2134 RTE_FLOW_ERROR_TYPE_ITEM, item, 2135 "L3 Layer is missing"); 2136 if (!mask) 2137 mask = &rte_flow_item_gre_mask; 2138 ret = mlx5_flow_item_acceptable 2139 (item, (const uint8_t *)mask, 2140 (const uint8_t *)&nic_mask, 2141 sizeof(struct rte_flow_item_gre), error); 2142 if (ret < 0) 2143 return ret; 2144 #ifndef HAVE_MLX5DV_DR 2145 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT 2146 if (spec && (spec->protocol & mask->protocol)) 2147 return rte_flow_error_set(error, ENOTSUP, 2148 RTE_FLOW_ERROR_TYPE_ITEM, item, 2149 "without MPLS support the" 2150 " specification cannot be used for" 2151 " filtering"); 2152 #endif 2153 #endif 2154 return 0; 2155 } 2156 2157 /** 2158 * Validate Geneve item. 2159 * 2160 * @param[in] item 2161 * Item specification. 2162 * @param[in] itemFlags 2163 * Bit-fields that holds the items detected until now. 2164 * @param[in] enPriv 2165 * Pointer to the private data structure. 2166 * @param[out] error 2167 * Pointer to error structure. 2168 * 2169 * @return 2170 * 0 on success, a negative errno value otherwise and rte_errno is set. 2171 */ 2172 2173 int 2174 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item, 2175 uint64_t item_flags, 2176 struct rte_eth_dev *dev, 2177 struct rte_flow_error *error) 2178 { 2179 struct mlx5_priv *priv = dev->data->dev_private; 2180 const struct rte_flow_item_geneve *spec = item->spec; 2181 const struct rte_flow_item_geneve *mask = item->mask; 2182 int ret; 2183 uint16_t gbhdr; 2184 uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ? 2185 MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0; 2186 const struct rte_flow_item_geneve nic_mask = { 2187 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80), 2188 .vni = "\xff\xff\xff", 2189 .protocol = RTE_BE16(UINT16_MAX), 2190 }; 2191 2192 if (!priv->config.hca_attr.tunnel_stateless_geneve_rx) 2193 return rte_flow_error_set(error, ENOTSUP, 2194 RTE_FLOW_ERROR_TYPE_ITEM, item, 2195 "L3 Geneve is not enabled by device" 2196 " parameter and/or not configured in" 2197 " firmware"); 2198 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2199 return rte_flow_error_set(error, ENOTSUP, 2200 RTE_FLOW_ERROR_TYPE_ITEM, item, 2201 "multiple tunnel layers not" 2202 " supported"); 2203 /* 2204 * Verify only UDPv4 is present as defined in 2205 * https://tools.ietf.org/html/rfc7348 2206 */ 2207 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2208 return rte_flow_error_set(error, EINVAL, 2209 RTE_FLOW_ERROR_TYPE_ITEM, item, 2210 "no outer UDP layer found"); 2211 if (!mask) 2212 mask = &rte_flow_item_geneve_mask; 2213 ret = mlx5_flow_item_acceptable 2214 (item, (const uint8_t *)mask, 2215 (const uint8_t *)&nic_mask, 2216 sizeof(struct rte_flow_item_geneve), error); 2217 if (ret) 2218 return ret; 2219 if (spec) { 2220 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0); 2221 if (MLX5_GENEVE_VER_VAL(gbhdr) || 2222 MLX5_GENEVE_CRITO_VAL(gbhdr) || 2223 MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1) 2224 return rte_flow_error_set(error, ENOTSUP, 2225 RTE_FLOW_ERROR_TYPE_ITEM, 2226 item, 2227 "Geneve protocol unsupported" 2228 " fields are being used"); 2229 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len) 2230 return rte_flow_error_set 2231 (error, ENOTSUP, 2232 RTE_FLOW_ERROR_TYPE_ITEM, 2233 item, 2234 "Unsupported Geneve options length"); 2235 } 2236 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2237 return rte_flow_error_set 2238 (error, ENOTSUP, 2239 RTE_FLOW_ERROR_TYPE_ITEM, item, 2240 "Geneve tunnel must be fully defined"); 2241 return 0; 2242 } 2243 2244 /** 2245 * Validate MPLS item. 2246 * 2247 * @param[in] dev 2248 * Pointer to the rte_eth_dev structure. 2249 * @param[in] item 2250 * Item specification. 2251 * @param[in] item_flags 2252 * Bit-fields that holds the items detected until now. 2253 * @param[in] prev_layer 2254 * The protocol layer indicated in previous item. 2255 * @param[out] error 2256 * Pointer to error structure. 2257 * 2258 * @return 2259 * 0 on success, a negative errno value otherwise and rte_errno is set. 2260 */ 2261 int 2262 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused, 2263 const struct rte_flow_item *item __rte_unused, 2264 uint64_t item_flags __rte_unused, 2265 uint64_t prev_layer __rte_unused, 2266 struct rte_flow_error *error) 2267 { 2268 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 2269 const struct rte_flow_item_mpls *mask = item->mask; 2270 struct mlx5_priv *priv = dev->data->dev_private; 2271 int ret; 2272 2273 if (!priv->config.mpls_en) 2274 return rte_flow_error_set(error, ENOTSUP, 2275 RTE_FLOW_ERROR_TYPE_ITEM, item, 2276 "MPLS not supported or" 2277 " disabled in firmware" 2278 " configuration."); 2279 /* MPLS over IP, UDP, GRE is allowed */ 2280 if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 | 2281 MLX5_FLOW_LAYER_OUTER_L4_UDP | 2282 MLX5_FLOW_LAYER_GRE))) 2283 return rte_flow_error_set(error, EINVAL, 2284 RTE_FLOW_ERROR_TYPE_ITEM, item, 2285 "protocol filtering not compatible" 2286 " with MPLS layer"); 2287 /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */ 2288 if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) && 2289 !(item_flags & MLX5_FLOW_LAYER_GRE)) 2290 return rte_flow_error_set(error, ENOTSUP, 2291 RTE_FLOW_ERROR_TYPE_ITEM, item, 2292 "multiple tunnel layers not" 2293 " supported"); 2294 if (!mask) 2295 mask = &rte_flow_item_mpls_mask; 2296 ret = mlx5_flow_item_acceptable 2297 (item, (const uint8_t *)mask, 2298 (const uint8_t *)&rte_flow_item_mpls_mask, 2299 sizeof(struct rte_flow_item_mpls), error); 2300 if (ret < 0) 2301 return ret; 2302 return 0; 2303 #else 2304 return rte_flow_error_set(error, ENOTSUP, 2305 RTE_FLOW_ERROR_TYPE_ITEM, item, 2306 "MPLS is not supported by Verbs, please" 2307 " update."); 2308 #endif 2309 } 2310 2311 /** 2312 * Validate NVGRE item. 2313 * 2314 * @param[in] item 2315 * Item specification. 2316 * @param[in] item_flags 2317 * Bit flags to mark detected items. 2318 * @param[in] target_protocol 2319 * The next protocol in the previous item. 2320 * @param[out] error 2321 * Pointer to error structure. 2322 * 2323 * @return 2324 * 0 on success, a negative errno value otherwise and rte_errno is set. 2325 */ 2326 int 2327 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item, 2328 uint64_t item_flags, 2329 uint8_t target_protocol, 2330 struct rte_flow_error *error) 2331 { 2332 const struct rte_flow_item_nvgre *mask = item->mask; 2333 int ret; 2334 2335 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2336 return rte_flow_error_set(error, EINVAL, 2337 RTE_FLOW_ERROR_TYPE_ITEM, item, 2338 "protocol filtering not compatible" 2339 " with this GRE layer"); 2340 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2341 return rte_flow_error_set(error, ENOTSUP, 2342 RTE_FLOW_ERROR_TYPE_ITEM, item, 2343 "multiple tunnel layers not" 2344 " supported"); 2345 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2346 return rte_flow_error_set(error, ENOTSUP, 2347 RTE_FLOW_ERROR_TYPE_ITEM, item, 2348 "L3 Layer is missing"); 2349 if (!mask) 2350 mask = &rte_flow_item_nvgre_mask; 2351 ret = mlx5_flow_item_acceptable 2352 (item, (const uint8_t *)mask, 2353 (const uint8_t *)&rte_flow_item_nvgre_mask, 2354 sizeof(struct rte_flow_item_nvgre), error); 2355 if (ret < 0) 2356 return ret; 2357 return 0; 2358 } 2359 2360 /* Allocate unique ID for the split Q/RSS subflows. */ 2361 static uint32_t 2362 flow_qrss_get_id(struct rte_eth_dev *dev) 2363 { 2364 struct mlx5_priv *priv = dev->data->dev_private; 2365 uint32_t qrss_id, ret; 2366 2367 ret = mlx5_flow_id_get(priv->qrss_id_pool, &qrss_id); 2368 if (ret) 2369 return 0; 2370 MLX5_ASSERT(qrss_id); 2371 return qrss_id; 2372 } 2373 2374 /* Free unique ID for the split Q/RSS subflows. */ 2375 static void 2376 flow_qrss_free_id(struct rte_eth_dev *dev, uint32_t qrss_id) 2377 { 2378 struct mlx5_priv *priv = dev->data->dev_private; 2379 2380 if (qrss_id) 2381 mlx5_flow_id_release(priv->qrss_id_pool, qrss_id); 2382 } 2383 2384 /** 2385 * Release resource related QUEUE/RSS action split. 2386 * 2387 * @param dev 2388 * Pointer to Ethernet device. 2389 * @param flow 2390 * Flow to release id's from. 2391 */ 2392 static void 2393 flow_mreg_split_qrss_release(struct rte_eth_dev *dev, 2394 struct rte_flow *flow) 2395 { 2396 struct mlx5_priv *priv = dev->data->dev_private; 2397 uint32_t handle_idx; 2398 struct mlx5_flow_handle *dev_handle; 2399 2400 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 2401 handle_idx, dev_handle, next) 2402 if (dev_handle->split_flow_id) 2403 flow_qrss_free_id(dev, dev_handle->split_flow_id); 2404 } 2405 2406 static int 2407 flow_null_validate(struct rte_eth_dev *dev __rte_unused, 2408 const struct rte_flow_attr *attr __rte_unused, 2409 const struct rte_flow_item items[] __rte_unused, 2410 const struct rte_flow_action actions[] __rte_unused, 2411 bool external __rte_unused, 2412 int hairpin __rte_unused, 2413 struct rte_flow_error *error) 2414 { 2415 return rte_flow_error_set(error, ENOTSUP, 2416 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2417 } 2418 2419 static struct mlx5_flow * 2420 flow_null_prepare(struct rte_eth_dev *dev __rte_unused, 2421 const struct rte_flow_attr *attr __rte_unused, 2422 const struct rte_flow_item items[] __rte_unused, 2423 const struct rte_flow_action actions[] __rte_unused, 2424 struct rte_flow_error *error) 2425 { 2426 rte_flow_error_set(error, ENOTSUP, 2427 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2428 return NULL; 2429 } 2430 2431 static int 2432 flow_null_translate(struct rte_eth_dev *dev __rte_unused, 2433 struct mlx5_flow *dev_flow __rte_unused, 2434 const struct rte_flow_attr *attr __rte_unused, 2435 const struct rte_flow_item items[] __rte_unused, 2436 const struct rte_flow_action actions[] __rte_unused, 2437 struct rte_flow_error *error) 2438 { 2439 return rte_flow_error_set(error, ENOTSUP, 2440 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2441 } 2442 2443 static int 2444 flow_null_apply(struct rte_eth_dev *dev __rte_unused, 2445 struct rte_flow *flow __rte_unused, 2446 struct rte_flow_error *error) 2447 { 2448 return rte_flow_error_set(error, ENOTSUP, 2449 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2450 } 2451 2452 static void 2453 flow_null_remove(struct rte_eth_dev *dev __rte_unused, 2454 struct rte_flow *flow __rte_unused) 2455 { 2456 } 2457 2458 static void 2459 flow_null_destroy(struct rte_eth_dev *dev __rte_unused, 2460 struct rte_flow *flow __rte_unused) 2461 { 2462 } 2463 2464 static int 2465 flow_null_query(struct rte_eth_dev *dev __rte_unused, 2466 struct rte_flow *flow __rte_unused, 2467 const struct rte_flow_action *actions __rte_unused, 2468 void *data __rte_unused, 2469 struct rte_flow_error *error) 2470 { 2471 return rte_flow_error_set(error, ENOTSUP, 2472 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2473 } 2474 2475 /* Void driver to protect from null pointer reference. */ 2476 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = { 2477 .validate = flow_null_validate, 2478 .prepare = flow_null_prepare, 2479 .translate = flow_null_translate, 2480 .apply = flow_null_apply, 2481 .remove = flow_null_remove, 2482 .destroy = flow_null_destroy, 2483 .query = flow_null_query, 2484 }; 2485 2486 /** 2487 * Select flow driver type according to flow attributes and device 2488 * configuration. 2489 * 2490 * @param[in] dev 2491 * Pointer to the dev structure. 2492 * @param[in] attr 2493 * Pointer to the flow attributes. 2494 * 2495 * @return 2496 * flow driver type, MLX5_FLOW_TYPE_MAX otherwise. 2497 */ 2498 static enum mlx5_flow_drv_type 2499 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr) 2500 { 2501 struct mlx5_priv *priv = dev->data->dev_private; 2502 /* The OS can determine first a specific flow type (DV, VERBS) */ 2503 enum mlx5_flow_drv_type type = mlx5_flow_os_get_type(); 2504 2505 if (type != MLX5_FLOW_TYPE_MAX) 2506 return type; 2507 /* If no OS specific type - continue with DV/VERBS selection */ 2508 if (attr->transfer && priv->config.dv_esw_en) 2509 type = MLX5_FLOW_TYPE_DV; 2510 if (!attr->transfer) 2511 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV : 2512 MLX5_FLOW_TYPE_VERBS; 2513 return type; 2514 } 2515 2516 #define flow_get_drv_ops(type) flow_drv_ops[type] 2517 2518 /** 2519 * Flow driver validation API. This abstracts calling driver specific functions. 2520 * The type of flow driver is determined according to flow attributes. 2521 * 2522 * @param[in] dev 2523 * Pointer to the dev structure. 2524 * @param[in] attr 2525 * Pointer to the flow attributes. 2526 * @param[in] items 2527 * Pointer to the list of items. 2528 * @param[in] actions 2529 * Pointer to the list of actions. 2530 * @param[in] external 2531 * This flow rule is created by request external to PMD. 2532 * @param[in] hairpin 2533 * Number of hairpin TX actions, 0 means classic flow. 2534 * @param[out] error 2535 * Pointer to the error structure. 2536 * 2537 * @return 2538 * 0 on success, a negative errno value otherwise and rte_errno is set. 2539 */ 2540 static inline int 2541 flow_drv_validate(struct rte_eth_dev *dev, 2542 const struct rte_flow_attr *attr, 2543 const struct rte_flow_item items[], 2544 const struct rte_flow_action actions[], 2545 bool external, int hairpin, struct rte_flow_error *error) 2546 { 2547 const struct mlx5_flow_driver_ops *fops; 2548 enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr); 2549 2550 fops = flow_get_drv_ops(type); 2551 return fops->validate(dev, attr, items, actions, external, 2552 hairpin, error); 2553 } 2554 2555 /** 2556 * Flow driver preparation API. This abstracts calling driver specific 2557 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 2558 * calculates the size of memory required for device flow, allocates the memory, 2559 * initializes the device flow and returns the pointer. 2560 * 2561 * @note 2562 * This function initializes device flow structure such as dv or verbs in 2563 * struct mlx5_flow. However, it is caller's responsibility to initialize the 2564 * rest. For example, adding returning device flow to flow->dev_flow list and 2565 * setting backward reference to the flow should be done out of this function. 2566 * layers field is not filled either. 2567 * 2568 * @param[in] dev 2569 * Pointer to the dev structure. 2570 * @param[in] attr 2571 * Pointer to the flow attributes. 2572 * @param[in] items 2573 * Pointer to the list of items. 2574 * @param[in] actions 2575 * Pointer to the list of actions. 2576 * @param[in] flow_idx 2577 * This memory pool index to the flow. 2578 * @param[out] error 2579 * Pointer to the error structure. 2580 * 2581 * @return 2582 * Pointer to device flow on success, otherwise NULL and rte_errno is set. 2583 */ 2584 static inline struct mlx5_flow * 2585 flow_drv_prepare(struct rte_eth_dev *dev, 2586 const struct rte_flow *flow, 2587 const struct rte_flow_attr *attr, 2588 const struct rte_flow_item items[], 2589 const struct rte_flow_action actions[], 2590 uint32_t flow_idx, 2591 struct rte_flow_error *error) 2592 { 2593 const struct mlx5_flow_driver_ops *fops; 2594 enum mlx5_flow_drv_type type = flow->drv_type; 2595 struct mlx5_flow *mlx5_flow = NULL; 2596 2597 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2598 fops = flow_get_drv_ops(type); 2599 mlx5_flow = fops->prepare(dev, attr, items, actions, error); 2600 if (mlx5_flow) 2601 mlx5_flow->flow_idx = flow_idx; 2602 return mlx5_flow; 2603 } 2604 2605 /** 2606 * Flow driver translation API. This abstracts calling driver specific 2607 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 2608 * translates a generic flow into a driver flow. flow_drv_prepare() must 2609 * precede. 2610 * 2611 * @note 2612 * dev_flow->layers could be filled as a result of parsing during translation 2613 * if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled 2614 * if necessary. As a flow can have multiple dev_flows by RSS flow expansion, 2615 * flow->actions could be overwritten even though all the expanded dev_flows 2616 * have the same actions. 2617 * 2618 * @param[in] dev 2619 * Pointer to the rte dev structure. 2620 * @param[in, out] dev_flow 2621 * Pointer to the mlx5 flow. 2622 * @param[in] attr 2623 * Pointer to the flow attributes. 2624 * @param[in] items 2625 * Pointer to the list of items. 2626 * @param[in] actions 2627 * Pointer to the list of actions. 2628 * @param[out] error 2629 * Pointer to the error structure. 2630 * 2631 * @return 2632 * 0 on success, a negative errno value otherwise and rte_errno is set. 2633 */ 2634 static inline int 2635 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, 2636 const struct rte_flow_attr *attr, 2637 const struct rte_flow_item items[], 2638 const struct rte_flow_action actions[], 2639 struct rte_flow_error *error) 2640 { 2641 const struct mlx5_flow_driver_ops *fops; 2642 enum mlx5_flow_drv_type type = dev_flow->flow->drv_type; 2643 2644 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2645 fops = flow_get_drv_ops(type); 2646 return fops->translate(dev, dev_flow, attr, items, actions, error); 2647 } 2648 2649 /** 2650 * Flow driver apply API. This abstracts calling driver specific functions. 2651 * Parent flow (rte_flow) should have driver type (drv_type). It applies 2652 * translated driver flows on to device. flow_drv_translate() must precede. 2653 * 2654 * @param[in] dev 2655 * Pointer to Ethernet device structure. 2656 * @param[in, out] flow 2657 * Pointer to flow structure. 2658 * @param[out] error 2659 * Pointer to error structure. 2660 * 2661 * @return 2662 * 0 on success, a negative errno value otherwise and rte_errno is set. 2663 */ 2664 static inline int 2665 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, 2666 struct rte_flow_error *error) 2667 { 2668 const struct mlx5_flow_driver_ops *fops; 2669 enum mlx5_flow_drv_type type = flow->drv_type; 2670 2671 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2672 fops = flow_get_drv_ops(type); 2673 return fops->apply(dev, flow, error); 2674 } 2675 2676 /** 2677 * Flow driver remove API. This abstracts calling driver specific functions. 2678 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 2679 * on device. All the resources of the flow should be freed by calling 2680 * flow_drv_destroy(). 2681 * 2682 * @param[in] dev 2683 * Pointer to Ethernet device. 2684 * @param[in, out] flow 2685 * Pointer to flow structure. 2686 */ 2687 static inline void 2688 flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow) 2689 { 2690 const struct mlx5_flow_driver_ops *fops; 2691 enum mlx5_flow_drv_type type = flow->drv_type; 2692 2693 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2694 fops = flow_get_drv_ops(type); 2695 fops->remove(dev, flow); 2696 } 2697 2698 /** 2699 * Flow driver destroy API. This abstracts calling driver specific functions. 2700 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 2701 * on device and releases resources of the flow. 2702 * 2703 * @param[in] dev 2704 * Pointer to Ethernet device. 2705 * @param[in, out] flow 2706 * Pointer to flow structure. 2707 */ 2708 static inline void 2709 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) 2710 { 2711 const struct mlx5_flow_driver_ops *fops; 2712 enum mlx5_flow_drv_type type = flow->drv_type; 2713 2714 flow_mreg_split_qrss_release(dev, flow); 2715 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2716 fops = flow_get_drv_ops(type); 2717 fops->destroy(dev, flow); 2718 } 2719 2720 /** 2721 * Get RSS action from the action list. 2722 * 2723 * @param[in] actions 2724 * Pointer to the list of actions. 2725 * 2726 * @return 2727 * Pointer to the RSS action if exist, else return NULL. 2728 */ 2729 static const struct rte_flow_action_rss* 2730 flow_get_rss_action(const struct rte_flow_action actions[]) 2731 { 2732 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2733 switch (actions->type) { 2734 case RTE_FLOW_ACTION_TYPE_RSS: 2735 return (const struct rte_flow_action_rss *) 2736 actions->conf; 2737 default: 2738 break; 2739 } 2740 } 2741 return NULL; 2742 } 2743 2744 static unsigned int 2745 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level) 2746 { 2747 const struct rte_flow_item *item; 2748 unsigned int has_vlan = 0; 2749 2750 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 2751 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { 2752 has_vlan = 1; 2753 break; 2754 } 2755 } 2756 if (has_vlan) 2757 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN : 2758 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN; 2759 return rss_level < 2 ? MLX5_EXPANSION_ROOT : 2760 MLX5_EXPANSION_ROOT_OUTER; 2761 } 2762 2763 /** 2764 * Get layer flags from the prefix flow. 2765 * 2766 * Some flows may be split to several subflows, the prefix subflow gets the 2767 * match items and the suffix sub flow gets the actions. 2768 * Some actions need the user defined match item flags to get the detail for 2769 * the action. 2770 * This function helps the suffix flow to get the item layer flags from prefix 2771 * subflow. 2772 * 2773 * @param[in] dev_flow 2774 * Pointer the created preifx subflow. 2775 * 2776 * @return 2777 * The layers get from prefix subflow. 2778 */ 2779 static inline uint64_t 2780 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow) 2781 { 2782 uint64_t layers = 0; 2783 2784 /* 2785 * Layers bits could be localization, but usually the compiler will 2786 * help to do the optimization work for source code. 2787 * If no decap actions, use the layers directly. 2788 */ 2789 if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP)) 2790 return dev_flow->handle->layers; 2791 /* Convert L3 layers with decap action. */ 2792 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4) 2793 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4; 2794 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6) 2795 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6; 2796 /* Convert L4 layers with decap action. */ 2797 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP) 2798 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP; 2799 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP) 2800 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP; 2801 return layers; 2802 } 2803 2804 /** 2805 * Get metadata split action information. 2806 * 2807 * @param[in] actions 2808 * Pointer to the list of actions. 2809 * @param[out] qrss 2810 * Pointer to the return pointer. 2811 * @param[out] qrss_type 2812 * Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned 2813 * if no QUEUE/RSS is found. 2814 * @param[out] encap_idx 2815 * Pointer to the index of the encap action if exists, otherwise the last 2816 * action index. 2817 * 2818 * @return 2819 * Total number of actions. 2820 */ 2821 static int 2822 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[], 2823 const struct rte_flow_action **qrss, 2824 int *encap_idx) 2825 { 2826 const struct rte_flow_action_raw_encap *raw_encap; 2827 int actions_n = 0; 2828 int raw_decap_idx = -1; 2829 2830 *encap_idx = -1; 2831 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2832 switch (actions->type) { 2833 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 2834 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 2835 *encap_idx = actions_n; 2836 break; 2837 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 2838 raw_decap_idx = actions_n; 2839 break; 2840 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 2841 raw_encap = actions->conf; 2842 if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 2843 *encap_idx = raw_decap_idx != -1 ? 2844 raw_decap_idx : actions_n; 2845 break; 2846 case RTE_FLOW_ACTION_TYPE_QUEUE: 2847 case RTE_FLOW_ACTION_TYPE_RSS: 2848 *qrss = actions; 2849 break; 2850 default: 2851 break; 2852 } 2853 actions_n++; 2854 } 2855 if (*encap_idx == -1) 2856 *encap_idx = actions_n; 2857 /* Count RTE_FLOW_ACTION_TYPE_END. */ 2858 return actions_n + 1; 2859 } 2860 2861 /** 2862 * Check meter action from the action list. 2863 * 2864 * @param[in] actions 2865 * Pointer to the list of actions. 2866 * @param[out] mtr 2867 * Pointer to the meter exist flag. 2868 * 2869 * @return 2870 * Total number of actions. 2871 */ 2872 static int 2873 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr) 2874 { 2875 int actions_n = 0; 2876 2877 MLX5_ASSERT(mtr); 2878 *mtr = 0; 2879 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2880 switch (actions->type) { 2881 case RTE_FLOW_ACTION_TYPE_METER: 2882 *mtr = 1; 2883 break; 2884 default: 2885 break; 2886 } 2887 actions_n++; 2888 } 2889 /* Count RTE_FLOW_ACTION_TYPE_END. */ 2890 return actions_n + 1; 2891 } 2892 2893 /** 2894 * Check if the flow should be splited due to hairpin. 2895 * The reason for the split is that in current HW we can't 2896 * support encap on Rx, so if a flow have encap we move it 2897 * to Tx. 2898 * 2899 * @param dev 2900 * Pointer to Ethernet device. 2901 * @param[in] attr 2902 * Flow rule attributes. 2903 * @param[in] actions 2904 * Associated actions (list terminated by the END action). 2905 * 2906 * @return 2907 * > 0 the number of actions and the flow should be split, 2908 * 0 when no split required. 2909 */ 2910 static int 2911 flow_check_hairpin_split(struct rte_eth_dev *dev, 2912 const struct rte_flow_attr *attr, 2913 const struct rte_flow_action actions[]) 2914 { 2915 int queue_action = 0; 2916 int action_n = 0; 2917 int encap = 0; 2918 const struct rte_flow_action_queue *queue; 2919 const struct rte_flow_action_rss *rss; 2920 const struct rte_flow_action_raw_encap *raw_encap; 2921 2922 if (!attr->ingress) 2923 return 0; 2924 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2925 switch (actions->type) { 2926 case RTE_FLOW_ACTION_TYPE_QUEUE: 2927 queue = actions->conf; 2928 if (queue == NULL) 2929 return 0; 2930 if (mlx5_rxq_get_type(dev, queue->index) != 2931 MLX5_RXQ_TYPE_HAIRPIN) 2932 return 0; 2933 queue_action = 1; 2934 action_n++; 2935 break; 2936 case RTE_FLOW_ACTION_TYPE_RSS: 2937 rss = actions->conf; 2938 if (rss == NULL || rss->queue_num == 0) 2939 return 0; 2940 if (mlx5_rxq_get_type(dev, rss->queue[0]) != 2941 MLX5_RXQ_TYPE_HAIRPIN) 2942 return 0; 2943 queue_action = 1; 2944 action_n++; 2945 break; 2946 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 2947 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 2948 encap = 1; 2949 action_n++; 2950 break; 2951 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 2952 raw_encap = actions->conf; 2953 if (raw_encap->size > 2954 (sizeof(struct rte_flow_item_eth) + 2955 sizeof(struct rte_flow_item_ipv4))) 2956 encap = 1; 2957 action_n++; 2958 break; 2959 default: 2960 action_n++; 2961 break; 2962 } 2963 } 2964 if (encap == 1 && queue_action) 2965 return action_n; 2966 return 0; 2967 } 2968 2969 /* Declare flow create/destroy prototype in advance. */ 2970 static uint32_t 2971 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 2972 const struct rte_flow_attr *attr, 2973 const struct rte_flow_item items[], 2974 const struct rte_flow_action actions[], 2975 bool external, struct rte_flow_error *error); 2976 2977 static void 2978 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 2979 uint32_t flow_idx); 2980 2981 /** 2982 * Add a flow of copying flow metadata registers in RX_CP_TBL. 2983 * 2984 * As mark_id is unique, if there's already a registered flow for the mark_id, 2985 * return by increasing the reference counter of the resource. Otherwise, create 2986 * the resource (mcp_res) and flow. 2987 * 2988 * Flow looks like, 2989 * - If ingress port is ANY and reg_c[1] is mark_id, 2990 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 2991 * 2992 * For default flow (zero mark_id), flow is like, 2993 * - If ingress port is ANY, 2994 * reg_b := reg_c[0] and jump to RX_ACT_TBL. 2995 * 2996 * @param dev 2997 * Pointer to Ethernet device. 2998 * @param mark_id 2999 * ID of MARK action, zero means default flow for META. 3000 * @param[out] error 3001 * Perform verbose error reporting if not NULL. 3002 * 3003 * @return 3004 * Associated resource on success, NULL otherwise and rte_errno is set. 3005 */ 3006 static struct mlx5_flow_mreg_copy_resource * 3007 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id, 3008 struct rte_flow_error *error) 3009 { 3010 struct mlx5_priv *priv = dev->data->dev_private; 3011 struct rte_flow_attr attr = { 3012 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 3013 .ingress = 1, 3014 }; 3015 struct mlx5_rte_flow_item_tag tag_spec = { 3016 .data = mark_id, 3017 }; 3018 struct rte_flow_item items[] = { 3019 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, }, 3020 }; 3021 struct rte_flow_action_mark ftag = { 3022 .id = mark_id, 3023 }; 3024 struct mlx5_flow_action_copy_mreg cp_mreg = { 3025 .dst = REG_B, 3026 .src = 0, 3027 }; 3028 struct rte_flow_action_jump jump = { 3029 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 3030 }; 3031 struct rte_flow_action actions[] = { 3032 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, }, 3033 }; 3034 struct mlx5_flow_mreg_copy_resource *mcp_res; 3035 uint32_t idx = 0; 3036 int ret; 3037 3038 /* Fill the register fileds in the flow. */ 3039 ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error); 3040 if (ret < 0) 3041 return NULL; 3042 tag_spec.id = ret; 3043 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 3044 if (ret < 0) 3045 return NULL; 3046 cp_mreg.src = ret; 3047 /* Check if already registered. */ 3048 MLX5_ASSERT(priv->mreg_cp_tbl); 3049 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id); 3050 if (mcp_res) { 3051 /* For non-default rule. */ 3052 if (mark_id != MLX5_DEFAULT_COPY_ID) 3053 mcp_res->refcnt++; 3054 MLX5_ASSERT(mark_id != MLX5_DEFAULT_COPY_ID || 3055 mcp_res->refcnt == 1); 3056 return mcp_res; 3057 } 3058 /* Provide the full width of FLAG specific value. */ 3059 if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT)) 3060 tag_spec.data = MLX5_FLOW_MARK_DEFAULT; 3061 /* Build a new flow. */ 3062 if (mark_id != MLX5_DEFAULT_COPY_ID) { 3063 items[0] = (struct rte_flow_item){ 3064 .type = (enum rte_flow_item_type) 3065 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 3066 .spec = &tag_spec, 3067 }; 3068 items[1] = (struct rte_flow_item){ 3069 .type = RTE_FLOW_ITEM_TYPE_END, 3070 }; 3071 actions[0] = (struct rte_flow_action){ 3072 .type = (enum rte_flow_action_type) 3073 MLX5_RTE_FLOW_ACTION_TYPE_MARK, 3074 .conf = &ftag, 3075 }; 3076 actions[1] = (struct rte_flow_action){ 3077 .type = (enum rte_flow_action_type) 3078 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3079 .conf = &cp_mreg, 3080 }; 3081 actions[2] = (struct rte_flow_action){ 3082 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3083 .conf = &jump, 3084 }; 3085 actions[3] = (struct rte_flow_action){ 3086 .type = RTE_FLOW_ACTION_TYPE_END, 3087 }; 3088 } else { 3089 /* Default rule, wildcard match. */ 3090 attr.priority = MLX5_FLOW_PRIO_RSVD; 3091 items[0] = (struct rte_flow_item){ 3092 .type = RTE_FLOW_ITEM_TYPE_END, 3093 }; 3094 actions[0] = (struct rte_flow_action){ 3095 .type = (enum rte_flow_action_type) 3096 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3097 .conf = &cp_mreg, 3098 }; 3099 actions[1] = (struct rte_flow_action){ 3100 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3101 .conf = &jump, 3102 }; 3103 actions[2] = (struct rte_flow_action){ 3104 .type = RTE_FLOW_ACTION_TYPE_END, 3105 }; 3106 } 3107 /* Build a new entry. */ 3108 mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx); 3109 if (!mcp_res) { 3110 rte_errno = ENOMEM; 3111 return NULL; 3112 } 3113 mcp_res->idx = idx; 3114 /* 3115 * The copy Flows are not included in any list. There 3116 * ones are referenced from other Flows and can not 3117 * be applied, removed, deleted in ardbitrary order 3118 * by list traversing. 3119 */ 3120 mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items, 3121 actions, false, error); 3122 if (!mcp_res->rix_flow) 3123 goto error; 3124 mcp_res->refcnt++; 3125 mcp_res->hlist_ent.key = mark_id; 3126 ret = mlx5_hlist_insert(priv->mreg_cp_tbl, 3127 &mcp_res->hlist_ent); 3128 MLX5_ASSERT(!ret); 3129 if (ret) 3130 goto error; 3131 return mcp_res; 3132 error: 3133 if (mcp_res->rix_flow) 3134 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3135 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3136 return NULL; 3137 } 3138 3139 /** 3140 * Release flow in RX_CP_TBL. 3141 * 3142 * @param dev 3143 * Pointer to Ethernet device. 3144 * @flow 3145 * Parent flow for wich copying is provided. 3146 */ 3147 static void 3148 flow_mreg_del_copy_action(struct rte_eth_dev *dev, 3149 struct rte_flow *flow) 3150 { 3151 struct mlx5_flow_mreg_copy_resource *mcp_res; 3152 struct mlx5_priv *priv = dev->data->dev_private; 3153 3154 if (!flow->rix_mreg_copy) 3155 return; 3156 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3157 flow->rix_mreg_copy); 3158 if (!mcp_res || !priv->mreg_cp_tbl) 3159 return; 3160 if (flow->copy_applied) { 3161 MLX5_ASSERT(mcp_res->appcnt); 3162 flow->copy_applied = 0; 3163 --mcp_res->appcnt; 3164 if (!mcp_res->appcnt) { 3165 struct rte_flow *mcp_flow = mlx5_ipool_get 3166 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3167 mcp_res->rix_flow); 3168 3169 if (mcp_flow) 3170 flow_drv_remove(dev, mcp_flow); 3171 } 3172 } 3173 /* 3174 * We do not check availability of metadata registers here, 3175 * because copy resources are not allocated in this case. 3176 */ 3177 if (--mcp_res->refcnt) 3178 return; 3179 MLX5_ASSERT(mcp_res->rix_flow); 3180 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3181 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 3182 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3183 flow->rix_mreg_copy = 0; 3184 } 3185 3186 /** 3187 * Start flow in RX_CP_TBL. 3188 * 3189 * @param dev 3190 * Pointer to Ethernet device. 3191 * @flow 3192 * Parent flow for wich copying is provided. 3193 * 3194 * @return 3195 * 0 on success, a negative errno value otherwise and rte_errno is set. 3196 */ 3197 static int 3198 flow_mreg_start_copy_action(struct rte_eth_dev *dev, 3199 struct rte_flow *flow) 3200 { 3201 struct mlx5_flow_mreg_copy_resource *mcp_res; 3202 struct mlx5_priv *priv = dev->data->dev_private; 3203 int ret; 3204 3205 if (!flow->rix_mreg_copy || flow->copy_applied) 3206 return 0; 3207 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3208 flow->rix_mreg_copy); 3209 if (!mcp_res) 3210 return 0; 3211 if (!mcp_res->appcnt) { 3212 struct rte_flow *mcp_flow = mlx5_ipool_get 3213 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3214 mcp_res->rix_flow); 3215 3216 if (mcp_flow) { 3217 ret = flow_drv_apply(dev, mcp_flow, NULL); 3218 if (ret) 3219 return ret; 3220 } 3221 } 3222 ++mcp_res->appcnt; 3223 flow->copy_applied = 1; 3224 return 0; 3225 } 3226 3227 /** 3228 * Stop flow in RX_CP_TBL. 3229 * 3230 * @param dev 3231 * Pointer to Ethernet device. 3232 * @flow 3233 * Parent flow for wich copying is provided. 3234 */ 3235 static void 3236 flow_mreg_stop_copy_action(struct rte_eth_dev *dev, 3237 struct rte_flow *flow) 3238 { 3239 struct mlx5_flow_mreg_copy_resource *mcp_res; 3240 struct mlx5_priv *priv = dev->data->dev_private; 3241 3242 if (!flow->rix_mreg_copy || !flow->copy_applied) 3243 return; 3244 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3245 flow->rix_mreg_copy); 3246 if (!mcp_res) 3247 return; 3248 MLX5_ASSERT(mcp_res->appcnt); 3249 --mcp_res->appcnt; 3250 flow->copy_applied = 0; 3251 if (!mcp_res->appcnt) { 3252 struct rte_flow *mcp_flow = mlx5_ipool_get 3253 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3254 mcp_res->rix_flow); 3255 3256 if (mcp_flow) 3257 flow_drv_remove(dev, mcp_flow); 3258 } 3259 } 3260 3261 /** 3262 * Remove the default copy action from RX_CP_TBL. 3263 * 3264 * @param dev 3265 * Pointer to Ethernet device. 3266 */ 3267 static void 3268 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev) 3269 { 3270 struct mlx5_flow_mreg_copy_resource *mcp_res; 3271 struct mlx5_priv *priv = dev->data->dev_private; 3272 3273 /* Check if default flow is registered. */ 3274 if (!priv->mreg_cp_tbl) 3275 return; 3276 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, 3277 MLX5_DEFAULT_COPY_ID); 3278 if (!mcp_res) 3279 return; 3280 MLX5_ASSERT(mcp_res->rix_flow); 3281 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3282 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 3283 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3284 } 3285 3286 /** 3287 * Add the default copy action in in RX_CP_TBL. 3288 * 3289 * @param dev 3290 * Pointer to Ethernet device. 3291 * @param[out] error 3292 * Perform verbose error reporting if not NULL. 3293 * 3294 * @return 3295 * 0 for success, negative value otherwise and rte_errno is set. 3296 */ 3297 static int 3298 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev, 3299 struct rte_flow_error *error) 3300 { 3301 struct mlx5_priv *priv = dev->data->dev_private; 3302 struct mlx5_flow_mreg_copy_resource *mcp_res; 3303 3304 /* Check whether extensive metadata feature is engaged. */ 3305 if (!priv->config.dv_flow_en || 3306 priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3307 !mlx5_flow_ext_mreg_supported(dev) || 3308 !priv->sh->dv_regc0_mask) 3309 return 0; 3310 mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error); 3311 if (!mcp_res) 3312 return -rte_errno; 3313 return 0; 3314 } 3315 3316 /** 3317 * Add a flow of copying flow metadata registers in RX_CP_TBL. 3318 * 3319 * All the flow having Q/RSS action should be split by 3320 * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL 3321 * performs the following, 3322 * - CQE->flow_tag := reg_c[1] (MARK) 3323 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 3324 * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1] 3325 * but there should be a flow per each MARK ID set by MARK action. 3326 * 3327 * For the aforementioned reason, if there's a MARK action in flow's action 3328 * list, a corresponding flow should be added to the RX_CP_TBL in order to copy 3329 * the MARK ID to CQE's flow_tag like, 3330 * - If reg_c[1] is mark_id, 3331 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3332 * 3333 * For SET_META action which stores value in reg_c[0], as the destination is 3334 * also a flow metadata register (reg_b), adding a default flow is enough. Zero 3335 * MARK ID means the default flow. The default flow looks like, 3336 * - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3337 * 3338 * @param dev 3339 * Pointer to Ethernet device. 3340 * @param flow 3341 * Pointer to flow structure. 3342 * @param[in] actions 3343 * Pointer to the list of actions. 3344 * @param[out] error 3345 * Perform verbose error reporting if not NULL. 3346 * 3347 * @return 3348 * 0 on success, negative value otherwise and rte_errno is set. 3349 */ 3350 static int 3351 flow_mreg_update_copy_table(struct rte_eth_dev *dev, 3352 struct rte_flow *flow, 3353 const struct rte_flow_action *actions, 3354 struct rte_flow_error *error) 3355 { 3356 struct mlx5_priv *priv = dev->data->dev_private; 3357 struct mlx5_dev_config *config = &priv->config; 3358 struct mlx5_flow_mreg_copy_resource *mcp_res; 3359 const struct rte_flow_action_mark *mark; 3360 3361 /* Check whether extensive metadata feature is engaged. */ 3362 if (!config->dv_flow_en || 3363 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3364 !mlx5_flow_ext_mreg_supported(dev) || 3365 !priv->sh->dv_regc0_mask) 3366 return 0; 3367 /* Find MARK action. */ 3368 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3369 switch (actions->type) { 3370 case RTE_FLOW_ACTION_TYPE_FLAG: 3371 mcp_res = flow_mreg_add_copy_action 3372 (dev, MLX5_FLOW_MARK_DEFAULT, error); 3373 if (!mcp_res) 3374 return -rte_errno; 3375 flow->rix_mreg_copy = mcp_res->idx; 3376 if (dev->data->dev_started) { 3377 mcp_res->appcnt++; 3378 flow->copy_applied = 1; 3379 } 3380 return 0; 3381 case RTE_FLOW_ACTION_TYPE_MARK: 3382 mark = (const struct rte_flow_action_mark *) 3383 actions->conf; 3384 mcp_res = 3385 flow_mreg_add_copy_action(dev, mark->id, error); 3386 if (!mcp_res) 3387 return -rte_errno; 3388 flow->rix_mreg_copy = mcp_res->idx; 3389 if (dev->data->dev_started) { 3390 mcp_res->appcnt++; 3391 flow->copy_applied = 1; 3392 } 3393 return 0; 3394 default: 3395 break; 3396 } 3397 } 3398 return 0; 3399 } 3400 3401 #define MLX5_MAX_SPLIT_ACTIONS 24 3402 #define MLX5_MAX_SPLIT_ITEMS 24 3403 3404 /** 3405 * Split the hairpin flow. 3406 * Since HW can't support encap on Rx we move the encap to Tx. 3407 * If the count action is after the encap then we also 3408 * move the count action. in this case the count will also measure 3409 * the outer bytes. 3410 * 3411 * @param dev 3412 * Pointer to Ethernet device. 3413 * @param[in] actions 3414 * Associated actions (list terminated by the END action). 3415 * @param[out] actions_rx 3416 * Rx flow actions. 3417 * @param[out] actions_tx 3418 * Tx flow actions.. 3419 * @param[out] pattern_tx 3420 * The pattern items for the Tx flow. 3421 * @param[out] flow_id 3422 * The flow ID connected to this flow. 3423 * 3424 * @return 3425 * 0 on success. 3426 */ 3427 static int 3428 flow_hairpin_split(struct rte_eth_dev *dev, 3429 const struct rte_flow_action actions[], 3430 struct rte_flow_action actions_rx[], 3431 struct rte_flow_action actions_tx[], 3432 struct rte_flow_item pattern_tx[], 3433 uint32_t *flow_id) 3434 { 3435 struct mlx5_priv *priv = dev->data->dev_private; 3436 const struct rte_flow_action_raw_encap *raw_encap; 3437 const struct rte_flow_action_raw_decap *raw_decap; 3438 struct mlx5_rte_flow_action_set_tag *set_tag; 3439 struct rte_flow_action *tag_action; 3440 struct mlx5_rte_flow_item_tag *tag_item; 3441 struct rte_flow_item *item; 3442 char *addr; 3443 int encap = 0; 3444 3445 mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id); 3446 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3447 switch (actions->type) { 3448 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3449 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3450 rte_memcpy(actions_tx, actions, 3451 sizeof(struct rte_flow_action)); 3452 actions_tx++; 3453 break; 3454 case RTE_FLOW_ACTION_TYPE_COUNT: 3455 if (encap) { 3456 rte_memcpy(actions_tx, actions, 3457 sizeof(struct rte_flow_action)); 3458 actions_tx++; 3459 } else { 3460 rte_memcpy(actions_rx, actions, 3461 sizeof(struct rte_flow_action)); 3462 actions_rx++; 3463 } 3464 break; 3465 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3466 raw_encap = actions->conf; 3467 if (raw_encap->size > 3468 (sizeof(struct rte_flow_item_eth) + 3469 sizeof(struct rte_flow_item_ipv4))) { 3470 memcpy(actions_tx, actions, 3471 sizeof(struct rte_flow_action)); 3472 actions_tx++; 3473 encap = 1; 3474 } else { 3475 rte_memcpy(actions_rx, actions, 3476 sizeof(struct rte_flow_action)); 3477 actions_rx++; 3478 } 3479 break; 3480 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3481 raw_decap = actions->conf; 3482 if (raw_decap->size < 3483 (sizeof(struct rte_flow_item_eth) + 3484 sizeof(struct rte_flow_item_ipv4))) { 3485 memcpy(actions_tx, actions, 3486 sizeof(struct rte_flow_action)); 3487 actions_tx++; 3488 } else { 3489 rte_memcpy(actions_rx, actions, 3490 sizeof(struct rte_flow_action)); 3491 actions_rx++; 3492 } 3493 break; 3494 default: 3495 rte_memcpy(actions_rx, actions, 3496 sizeof(struct rte_flow_action)); 3497 actions_rx++; 3498 break; 3499 } 3500 } 3501 /* Add set meta action and end action for the Rx flow. */ 3502 tag_action = actions_rx; 3503 tag_action->type = (enum rte_flow_action_type) 3504 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 3505 actions_rx++; 3506 rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action)); 3507 actions_rx++; 3508 set_tag = (void *)actions_rx; 3509 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL); 3510 MLX5_ASSERT(set_tag->id > REG_NONE); 3511 set_tag->data = *flow_id; 3512 tag_action->conf = set_tag; 3513 /* Create Tx item list. */ 3514 rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action)); 3515 addr = (void *)&pattern_tx[2]; 3516 item = pattern_tx; 3517 item->type = (enum rte_flow_item_type) 3518 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 3519 tag_item = (void *)addr; 3520 tag_item->data = *flow_id; 3521 tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL); 3522 MLX5_ASSERT(set_tag->id > REG_NONE); 3523 item->spec = tag_item; 3524 addr += sizeof(struct mlx5_rte_flow_item_tag); 3525 tag_item = (void *)addr; 3526 tag_item->data = UINT32_MAX; 3527 tag_item->id = UINT16_MAX; 3528 item->mask = tag_item; 3529 addr += sizeof(struct mlx5_rte_flow_item_tag); 3530 item->last = NULL; 3531 item++; 3532 item->type = RTE_FLOW_ITEM_TYPE_END; 3533 return 0; 3534 } 3535 3536 /** 3537 * The last stage of splitting chain, just creates the subflow 3538 * without any modification. 3539 * 3540 * @param[in] dev 3541 * Pointer to Ethernet device. 3542 * @param[in] flow 3543 * Parent flow structure pointer. 3544 * @param[in, out] sub_flow 3545 * Pointer to return the created subflow, may be NULL. 3546 * @param[in] prefix_layers 3547 * Prefix subflow layers, may be 0. 3548 * @param[in] attr 3549 * Flow rule attributes. 3550 * @param[in] items 3551 * Pattern specification (list terminated by the END pattern item). 3552 * @param[in] actions 3553 * Associated actions (list terminated by the END action). 3554 * @param[in] external 3555 * This flow rule is created by request external to PMD. 3556 * @param[in] flow_idx 3557 * This memory pool index to the flow. 3558 * @param[out] error 3559 * Perform verbose error reporting if not NULL. 3560 * @return 3561 * 0 on success, negative value otherwise 3562 */ 3563 static int 3564 flow_create_split_inner(struct rte_eth_dev *dev, 3565 struct rte_flow *flow, 3566 struct mlx5_flow **sub_flow, 3567 uint64_t prefix_layers, 3568 const struct rte_flow_attr *attr, 3569 const struct rte_flow_item items[], 3570 const struct rte_flow_action actions[], 3571 bool external, uint32_t flow_idx, 3572 struct rte_flow_error *error) 3573 { 3574 struct mlx5_flow *dev_flow; 3575 3576 dev_flow = flow_drv_prepare(dev, flow, attr, items, actions, 3577 flow_idx, error); 3578 if (!dev_flow) 3579 return -rte_errno; 3580 dev_flow->flow = flow; 3581 dev_flow->external = external; 3582 /* Subflow object was created, we must include one in the list. */ 3583 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 3584 dev_flow->handle, next); 3585 /* 3586 * If dev_flow is as one of the suffix flow, some actions in suffix 3587 * flow may need some user defined item layer flags. 3588 */ 3589 if (prefix_layers) 3590 dev_flow->handle->layers = prefix_layers; 3591 if (sub_flow) 3592 *sub_flow = dev_flow; 3593 return flow_drv_translate(dev, dev_flow, attr, items, actions, error); 3594 } 3595 3596 /** 3597 * Split the meter flow. 3598 * 3599 * As meter flow will split to three sub flow, other than meter 3600 * action, the other actions make sense to only meter accepts 3601 * the packet. If it need to be dropped, no other additional 3602 * actions should be take. 3603 * 3604 * One kind of special action which decapsulates the L3 tunnel 3605 * header will be in the prefix sub flow, as not to take the 3606 * L3 tunnel header into account. 3607 * 3608 * @param dev 3609 * Pointer to Ethernet device. 3610 * @param[in] items 3611 * Pattern specification (list terminated by the END pattern item). 3612 * @param[out] sfx_items 3613 * Suffix flow match items (list terminated by the END pattern item). 3614 * @param[in] actions 3615 * Associated actions (list terminated by the END action). 3616 * @param[out] actions_sfx 3617 * Suffix flow actions. 3618 * @param[out] actions_pre 3619 * Prefix flow actions. 3620 * @param[out] pattern_sfx 3621 * The pattern items for the suffix flow. 3622 * @param[out] tag_sfx 3623 * Pointer to suffix flow tag. 3624 * 3625 * @return 3626 * 0 on success. 3627 */ 3628 static int 3629 flow_meter_split_prep(struct rte_eth_dev *dev, 3630 const struct rte_flow_item items[], 3631 struct rte_flow_item sfx_items[], 3632 const struct rte_flow_action actions[], 3633 struct rte_flow_action actions_sfx[], 3634 struct rte_flow_action actions_pre[]) 3635 { 3636 struct rte_flow_action *tag_action = NULL; 3637 struct rte_flow_item *tag_item; 3638 struct mlx5_rte_flow_action_set_tag *set_tag; 3639 struct rte_flow_error error; 3640 const struct rte_flow_action_raw_encap *raw_encap; 3641 const struct rte_flow_action_raw_decap *raw_decap; 3642 struct mlx5_rte_flow_item_tag *tag_spec; 3643 struct mlx5_rte_flow_item_tag *tag_mask; 3644 uint32_t tag_id; 3645 bool copy_vlan = false; 3646 3647 /* Prepare the actions for prefix and suffix flow. */ 3648 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3649 struct rte_flow_action **action_cur = NULL; 3650 3651 switch (actions->type) { 3652 case RTE_FLOW_ACTION_TYPE_METER: 3653 /* Add the extra tag action first. */ 3654 tag_action = actions_pre; 3655 tag_action->type = (enum rte_flow_action_type) 3656 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 3657 actions_pre++; 3658 action_cur = &actions_pre; 3659 break; 3660 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: 3661 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP: 3662 action_cur = &actions_pre; 3663 break; 3664 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3665 raw_encap = actions->conf; 3666 if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE) 3667 action_cur = &actions_pre; 3668 break; 3669 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3670 raw_decap = actions->conf; 3671 if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 3672 action_cur = &actions_pre; 3673 break; 3674 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 3675 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 3676 copy_vlan = true; 3677 break; 3678 default: 3679 break; 3680 } 3681 if (!action_cur) 3682 action_cur = &actions_sfx; 3683 memcpy(*action_cur, actions, sizeof(struct rte_flow_action)); 3684 (*action_cur)++; 3685 } 3686 /* Add end action to the actions. */ 3687 actions_sfx->type = RTE_FLOW_ACTION_TYPE_END; 3688 actions_pre->type = RTE_FLOW_ACTION_TYPE_END; 3689 actions_pre++; 3690 /* Set the tag. */ 3691 set_tag = (void *)actions_pre; 3692 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); 3693 /* 3694 * Get the id from the qrss_pool to make qrss share the id with meter. 3695 */ 3696 tag_id = flow_qrss_get_id(dev); 3697 set_tag->data = tag_id << MLX5_MTR_COLOR_BITS; 3698 assert(tag_action); 3699 tag_action->conf = set_tag; 3700 /* Prepare the suffix subflow items. */ 3701 tag_item = sfx_items++; 3702 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { 3703 int item_type = items->type; 3704 3705 switch (item_type) { 3706 case RTE_FLOW_ITEM_TYPE_PORT_ID: 3707 memcpy(sfx_items, items, sizeof(*sfx_items)); 3708 sfx_items++; 3709 break; 3710 case RTE_FLOW_ITEM_TYPE_VLAN: 3711 if (copy_vlan) { 3712 memcpy(sfx_items, items, sizeof(*sfx_items)); 3713 /* 3714 * Convert to internal match item, it is used 3715 * for vlan push and set vid. 3716 */ 3717 sfx_items->type = (enum rte_flow_item_type) 3718 MLX5_RTE_FLOW_ITEM_TYPE_VLAN; 3719 sfx_items++; 3720 } 3721 break; 3722 default: 3723 break; 3724 } 3725 } 3726 sfx_items->type = RTE_FLOW_ITEM_TYPE_END; 3727 sfx_items++; 3728 tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items; 3729 tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS; 3730 tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); 3731 tag_mask = tag_spec + 1; 3732 tag_mask->data = 0xffffff00; 3733 tag_item->type = (enum rte_flow_item_type) 3734 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 3735 tag_item->spec = tag_spec; 3736 tag_item->last = NULL; 3737 tag_item->mask = tag_mask; 3738 return tag_id; 3739 } 3740 3741 /** 3742 * Split action list having QUEUE/RSS for metadata register copy. 3743 * 3744 * Once Q/RSS action is detected in user's action list, the flow action 3745 * should be split in order to copy metadata registers, which will happen in 3746 * RX_CP_TBL like, 3747 * - CQE->flow_tag := reg_c[1] (MARK) 3748 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 3749 * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL. 3750 * This is because the last action of each flow must be a terminal action 3751 * (QUEUE, RSS or DROP). 3752 * 3753 * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is 3754 * stored and kept in the mlx5_flow structure per each sub_flow. 3755 * 3756 * The Q/RSS action is replaced with, 3757 * - SET_TAG, setting the allocated flow ID to reg_c[2]. 3758 * And the following JUMP action is added at the end, 3759 * - JUMP, to RX_CP_TBL. 3760 * 3761 * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by 3762 * flow_create_split_metadata() routine. The flow will look like, 3763 * - If flow ID matches (reg_c[2]), perform Q/RSS. 3764 * 3765 * @param dev 3766 * Pointer to Ethernet device. 3767 * @param[out] split_actions 3768 * Pointer to store split actions to jump to CP_TBL. 3769 * @param[in] actions 3770 * Pointer to the list of original flow actions. 3771 * @param[in] qrss 3772 * Pointer to the Q/RSS action. 3773 * @param[in] actions_n 3774 * Number of original actions. 3775 * @param[out] error 3776 * Perform verbose error reporting if not NULL. 3777 * 3778 * @return 3779 * non-zero unique flow_id on success, otherwise 0 and 3780 * error/rte_error are set. 3781 */ 3782 static uint32_t 3783 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev, 3784 struct rte_flow_action *split_actions, 3785 const struct rte_flow_action *actions, 3786 const struct rte_flow_action *qrss, 3787 int actions_n, struct rte_flow_error *error) 3788 { 3789 struct mlx5_rte_flow_action_set_tag *set_tag; 3790 struct rte_flow_action_jump *jump; 3791 const int qrss_idx = qrss - actions; 3792 uint32_t flow_id = 0; 3793 int ret = 0; 3794 3795 /* 3796 * Given actions will be split 3797 * - Replace QUEUE/RSS action with SET_TAG to set flow ID. 3798 * - Add jump to mreg CP_TBL. 3799 * As a result, there will be one more action. 3800 */ 3801 ++actions_n; 3802 memcpy(split_actions, actions, sizeof(*split_actions) * actions_n); 3803 set_tag = (void *)(split_actions + actions_n); 3804 /* 3805 * If tag action is not set to void(it means we are not the meter 3806 * suffix flow), add the tag action. Since meter suffix flow already 3807 * has the tag added. 3808 */ 3809 if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) { 3810 /* 3811 * Allocate the new subflow ID. This one is unique within 3812 * device and not shared with representors. Otherwise, 3813 * we would have to resolve multi-thread access synch 3814 * issue. Each flow on the shared device is appended 3815 * with source vport identifier, so the resulting 3816 * flows will be unique in the shared (by master and 3817 * representors) domain even if they have coinciding 3818 * IDs. 3819 */ 3820 flow_id = flow_qrss_get_id(dev); 3821 if (!flow_id) 3822 return rte_flow_error_set(error, ENOMEM, 3823 RTE_FLOW_ERROR_TYPE_ACTION, 3824 NULL, "can't allocate id " 3825 "for split Q/RSS subflow"); 3826 /* Internal SET_TAG action to set flow ID. */ 3827 *set_tag = (struct mlx5_rte_flow_action_set_tag){ 3828 .data = flow_id, 3829 }; 3830 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error); 3831 if (ret < 0) 3832 return ret; 3833 set_tag->id = ret; 3834 /* Construct new actions array. */ 3835 /* Replace QUEUE/RSS action. */ 3836 split_actions[qrss_idx] = (struct rte_flow_action){ 3837 .type = (enum rte_flow_action_type) 3838 MLX5_RTE_FLOW_ACTION_TYPE_TAG, 3839 .conf = set_tag, 3840 }; 3841 } 3842 /* JUMP action to jump to mreg copy table (CP_TBL). */ 3843 jump = (void *)(set_tag + 1); 3844 *jump = (struct rte_flow_action_jump){ 3845 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 3846 }; 3847 split_actions[actions_n - 2] = (struct rte_flow_action){ 3848 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3849 .conf = jump, 3850 }; 3851 split_actions[actions_n - 1] = (struct rte_flow_action){ 3852 .type = RTE_FLOW_ACTION_TYPE_END, 3853 }; 3854 return flow_id; 3855 } 3856 3857 /** 3858 * Extend the given action list for Tx metadata copy. 3859 * 3860 * Copy the given action list to the ext_actions and add flow metadata register 3861 * copy action in order to copy reg_a set by WQE to reg_c[0]. 3862 * 3863 * @param[out] ext_actions 3864 * Pointer to the extended action list. 3865 * @param[in] actions 3866 * Pointer to the list of actions. 3867 * @param[in] actions_n 3868 * Number of actions in the list. 3869 * @param[out] error 3870 * Perform verbose error reporting if not NULL. 3871 * @param[in] encap_idx 3872 * The encap action inndex. 3873 * 3874 * @return 3875 * 0 on success, negative value otherwise 3876 */ 3877 static int 3878 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev, 3879 struct rte_flow_action *ext_actions, 3880 const struct rte_flow_action *actions, 3881 int actions_n, struct rte_flow_error *error, 3882 int encap_idx) 3883 { 3884 struct mlx5_flow_action_copy_mreg *cp_mreg = 3885 (struct mlx5_flow_action_copy_mreg *) 3886 (ext_actions + actions_n + 1); 3887 int ret; 3888 3889 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 3890 if (ret < 0) 3891 return ret; 3892 cp_mreg->dst = ret; 3893 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error); 3894 if (ret < 0) 3895 return ret; 3896 cp_mreg->src = ret; 3897 if (encap_idx != 0) 3898 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx); 3899 if (encap_idx == actions_n - 1) { 3900 ext_actions[actions_n - 1] = (struct rte_flow_action){ 3901 .type = (enum rte_flow_action_type) 3902 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3903 .conf = cp_mreg, 3904 }; 3905 ext_actions[actions_n] = (struct rte_flow_action){ 3906 .type = RTE_FLOW_ACTION_TYPE_END, 3907 }; 3908 } else { 3909 ext_actions[encap_idx] = (struct rte_flow_action){ 3910 .type = (enum rte_flow_action_type) 3911 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3912 .conf = cp_mreg, 3913 }; 3914 memcpy(ext_actions + encap_idx + 1, actions + encap_idx, 3915 sizeof(*ext_actions) * (actions_n - encap_idx)); 3916 } 3917 return 0; 3918 } 3919 3920 /** 3921 * The splitting for metadata feature. 3922 * 3923 * - Q/RSS action on NIC Rx should be split in order to pass by 3924 * the mreg copy table (RX_CP_TBL) and then it jumps to the 3925 * action table (RX_ACT_TBL) which has the split Q/RSS action. 3926 * 3927 * - All the actions on NIC Tx should have a mreg copy action to 3928 * copy reg_a from WQE to reg_c[0]. 3929 * 3930 * @param dev 3931 * Pointer to Ethernet device. 3932 * @param[in] flow 3933 * Parent flow structure pointer. 3934 * @param[in] prefix_layers 3935 * Prefix flow layer flags. 3936 * @param[in] attr 3937 * Flow rule attributes. 3938 * @param[in] items 3939 * Pattern specification (list terminated by the END pattern item). 3940 * @param[in] actions 3941 * Associated actions (list terminated by the END action). 3942 * @param[in] external 3943 * This flow rule is created by request external to PMD. 3944 * @param[in] flow_idx 3945 * This memory pool index to the flow. 3946 * @param[out] error 3947 * Perform verbose error reporting if not NULL. 3948 * @return 3949 * 0 on success, negative value otherwise 3950 */ 3951 static int 3952 flow_create_split_metadata(struct rte_eth_dev *dev, 3953 struct rte_flow *flow, 3954 uint64_t prefix_layers, 3955 const struct rte_flow_attr *attr, 3956 const struct rte_flow_item items[], 3957 const struct rte_flow_action actions[], 3958 bool external, uint32_t flow_idx, 3959 struct rte_flow_error *error) 3960 { 3961 struct mlx5_priv *priv = dev->data->dev_private; 3962 struct mlx5_dev_config *config = &priv->config; 3963 const struct rte_flow_action *qrss = NULL; 3964 struct rte_flow_action *ext_actions = NULL; 3965 struct mlx5_flow *dev_flow = NULL; 3966 uint32_t qrss_id = 0; 3967 int mtr_sfx = 0; 3968 size_t act_size; 3969 int actions_n; 3970 int encap_idx; 3971 int ret; 3972 3973 /* Check whether extensive metadata feature is engaged. */ 3974 if (!config->dv_flow_en || 3975 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3976 !mlx5_flow_ext_mreg_supported(dev)) 3977 return flow_create_split_inner(dev, flow, NULL, prefix_layers, 3978 attr, items, actions, external, 3979 flow_idx, error); 3980 actions_n = flow_parse_metadata_split_actions_info(actions, &qrss, 3981 &encap_idx); 3982 if (qrss) { 3983 /* Exclude hairpin flows from splitting. */ 3984 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) { 3985 const struct rte_flow_action_queue *queue; 3986 3987 queue = qrss->conf; 3988 if (mlx5_rxq_get_type(dev, queue->index) == 3989 MLX5_RXQ_TYPE_HAIRPIN) 3990 qrss = NULL; 3991 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) { 3992 const struct rte_flow_action_rss *rss; 3993 3994 rss = qrss->conf; 3995 if (mlx5_rxq_get_type(dev, rss->queue[0]) == 3996 MLX5_RXQ_TYPE_HAIRPIN) 3997 qrss = NULL; 3998 } 3999 } 4000 if (qrss) { 4001 /* Check if it is in meter suffix table. */ 4002 mtr_sfx = attr->group == (attr->transfer ? 4003 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : 4004 MLX5_FLOW_TABLE_LEVEL_SUFFIX); 4005 /* 4006 * Q/RSS action on NIC Rx should be split in order to pass by 4007 * the mreg copy table (RX_CP_TBL) and then it jumps to the 4008 * action table (RX_ACT_TBL) which has the split Q/RSS action. 4009 */ 4010 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 4011 sizeof(struct rte_flow_action_set_tag) + 4012 sizeof(struct rte_flow_action_jump); 4013 ext_actions = rte_zmalloc(__func__, act_size, 0); 4014 if (!ext_actions) 4015 return rte_flow_error_set(error, ENOMEM, 4016 RTE_FLOW_ERROR_TYPE_ACTION, 4017 NULL, "no memory to split " 4018 "metadata flow"); 4019 /* 4020 * If we are the suffix flow of meter, tag already exist. 4021 * Set the tag action to void. 4022 */ 4023 if (mtr_sfx) 4024 ext_actions[qrss - actions].type = 4025 RTE_FLOW_ACTION_TYPE_VOID; 4026 else 4027 ext_actions[qrss - actions].type = 4028 (enum rte_flow_action_type) 4029 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 4030 /* 4031 * Create the new actions list with removed Q/RSS action 4032 * and appended set tag and jump to register copy table 4033 * (RX_CP_TBL). We should preallocate unique tag ID here 4034 * in advance, because it is needed for set tag action. 4035 */ 4036 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions, 4037 qrss, actions_n, error); 4038 if (!mtr_sfx && !qrss_id) { 4039 ret = -rte_errno; 4040 goto exit; 4041 } 4042 } else if (attr->egress && !attr->transfer) { 4043 /* 4044 * All the actions on NIC Tx should have a metadata register 4045 * copy action to copy reg_a from WQE to reg_c[meta] 4046 */ 4047 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 4048 sizeof(struct mlx5_flow_action_copy_mreg); 4049 ext_actions = rte_zmalloc(__func__, act_size, 0); 4050 if (!ext_actions) 4051 return rte_flow_error_set(error, ENOMEM, 4052 RTE_FLOW_ERROR_TYPE_ACTION, 4053 NULL, "no memory to split " 4054 "metadata flow"); 4055 /* Create the action list appended with copy register. */ 4056 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions, 4057 actions_n, error, encap_idx); 4058 if (ret < 0) 4059 goto exit; 4060 } 4061 /* Add the unmodified original or prefix subflow. */ 4062 ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, attr, 4063 items, ext_actions ? ext_actions : 4064 actions, external, flow_idx, error); 4065 if (ret < 0) 4066 goto exit; 4067 MLX5_ASSERT(dev_flow); 4068 if (qrss) { 4069 const struct rte_flow_attr q_attr = { 4070 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 4071 .ingress = 1, 4072 }; 4073 /* Internal PMD action to set register. */ 4074 struct mlx5_rte_flow_item_tag q_tag_spec = { 4075 .data = qrss_id, 4076 .id = 0, 4077 }; 4078 struct rte_flow_item q_items[] = { 4079 { 4080 .type = (enum rte_flow_item_type) 4081 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 4082 .spec = &q_tag_spec, 4083 .last = NULL, 4084 .mask = NULL, 4085 }, 4086 { 4087 .type = RTE_FLOW_ITEM_TYPE_END, 4088 }, 4089 }; 4090 struct rte_flow_action q_actions[] = { 4091 { 4092 .type = qrss->type, 4093 .conf = qrss->conf, 4094 }, 4095 { 4096 .type = RTE_FLOW_ACTION_TYPE_END, 4097 }, 4098 }; 4099 uint64_t layers = flow_get_prefix_layer_flags(dev_flow); 4100 4101 /* 4102 * Configure the tag item only if there is no meter subflow. 4103 * Since tag is already marked in the meter suffix subflow 4104 * we can just use the meter suffix items as is. 4105 */ 4106 if (qrss_id) { 4107 /* Not meter subflow. */ 4108 MLX5_ASSERT(!mtr_sfx); 4109 /* 4110 * Put unique id in prefix flow due to it is destroyed 4111 * after suffix flow and id will be freed after there 4112 * is no actual flows with this id and identifier 4113 * reallocation becomes possible (for example, for 4114 * other flows in other threads). 4115 */ 4116 dev_flow->handle->split_flow_id = qrss_id; 4117 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, 4118 error); 4119 if (ret < 0) 4120 goto exit; 4121 q_tag_spec.id = ret; 4122 } 4123 dev_flow = NULL; 4124 /* Add suffix subflow to execute Q/RSS. */ 4125 ret = flow_create_split_inner(dev, flow, &dev_flow, layers, 4126 &q_attr, mtr_sfx ? items : 4127 q_items, q_actions, 4128 external, flow_idx, error); 4129 if (ret < 0) 4130 goto exit; 4131 /* qrss ID should be freed if failed. */ 4132 qrss_id = 0; 4133 MLX5_ASSERT(dev_flow); 4134 } 4135 4136 exit: 4137 /* 4138 * We do not destroy the partially created sub_flows in case of error. 4139 * These ones are included into parent flow list and will be destroyed 4140 * by flow_drv_destroy. 4141 */ 4142 flow_qrss_free_id(dev, qrss_id); 4143 rte_free(ext_actions); 4144 return ret; 4145 } 4146 4147 /** 4148 * The splitting for meter feature. 4149 * 4150 * - The meter flow will be split to two flows as prefix and 4151 * suffix flow. The packets make sense only it pass the prefix 4152 * meter action. 4153 * 4154 * - Reg_C_5 is used for the packet to match betweend prefix and 4155 * suffix flow. 4156 * 4157 * @param dev 4158 * Pointer to Ethernet device. 4159 * @param[in] flow 4160 * Parent flow structure pointer. 4161 * @param[in] attr 4162 * Flow rule attributes. 4163 * @param[in] items 4164 * Pattern specification (list terminated by the END pattern item). 4165 * @param[in] actions 4166 * Associated actions (list terminated by the END action). 4167 * @param[in] external 4168 * This flow rule is created by request external to PMD. 4169 * @param[in] flow_idx 4170 * This memory pool index to the flow. 4171 * @param[out] error 4172 * Perform verbose error reporting if not NULL. 4173 * @return 4174 * 0 on success, negative value otherwise 4175 */ 4176 static int 4177 flow_create_split_meter(struct rte_eth_dev *dev, 4178 struct rte_flow *flow, 4179 const struct rte_flow_attr *attr, 4180 const struct rte_flow_item items[], 4181 const struct rte_flow_action actions[], 4182 bool external, uint32_t flow_idx, 4183 struct rte_flow_error *error) 4184 { 4185 struct mlx5_priv *priv = dev->data->dev_private; 4186 struct rte_flow_action *sfx_actions = NULL; 4187 struct rte_flow_action *pre_actions = NULL; 4188 struct rte_flow_item *sfx_items = NULL; 4189 struct mlx5_flow *dev_flow = NULL; 4190 struct rte_flow_attr sfx_attr = *attr; 4191 uint32_t mtr = 0; 4192 uint32_t mtr_tag_id = 0; 4193 size_t act_size; 4194 size_t item_size; 4195 int actions_n = 0; 4196 int ret; 4197 4198 if (priv->mtr_en) 4199 actions_n = flow_check_meter_action(actions, &mtr); 4200 if (mtr) { 4201 /* The five prefix actions: meter, decap, encap, tag, end. */ 4202 act_size = sizeof(struct rte_flow_action) * (actions_n + 5) + 4203 sizeof(struct mlx5_rte_flow_action_set_tag); 4204 /* tag, vlan, port id, end. */ 4205 #define METER_SUFFIX_ITEM 4 4206 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM + 4207 sizeof(struct mlx5_rte_flow_item_tag) * 2; 4208 sfx_actions = rte_zmalloc(__func__, (act_size + item_size), 0); 4209 if (!sfx_actions) 4210 return rte_flow_error_set(error, ENOMEM, 4211 RTE_FLOW_ERROR_TYPE_ACTION, 4212 NULL, "no memory to split " 4213 "meter flow"); 4214 sfx_items = (struct rte_flow_item *)((char *)sfx_actions + 4215 act_size); 4216 pre_actions = sfx_actions + actions_n; 4217 mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items, 4218 actions, sfx_actions, 4219 pre_actions); 4220 if (!mtr_tag_id) { 4221 ret = -rte_errno; 4222 goto exit; 4223 } 4224 /* Add the prefix subflow. */ 4225 ret = flow_create_split_inner(dev, flow, &dev_flow, 0, attr, 4226 items, pre_actions, external, 4227 flow_idx, error); 4228 if (ret) { 4229 ret = -rte_errno; 4230 goto exit; 4231 } 4232 dev_flow->handle->split_flow_id = mtr_tag_id; 4233 /* Setting the sfx group atrr. */ 4234 sfx_attr.group = sfx_attr.transfer ? 4235 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : 4236 MLX5_FLOW_TABLE_LEVEL_SUFFIX; 4237 } 4238 /* Add the prefix subflow. */ 4239 ret = flow_create_split_metadata(dev, flow, dev_flow ? 4240 flow_get_prefix_layer_flags(dev_flow) : 4241 0, &sfx_attr, 4242 sfx_items ? sfx_items : items, 4243 sfx_actions ? sfx_actions : actions, 4244 external, flow_idx, error); 4245 exit: 4246 if (sfx_actions) 4247 rte_free(sfx_actions); 4248 return ret; 4249 } 4250 4251 /** 4252 * Split the flow to subflow set. The splitters might be linked 4253 * in the chain, like this: 4254 * flow_create_split_outer() calls: 4255 * flow_create_split_meter() calls: 4256 * flow_create_split_metadata(meter_subflow_0) calls: 4257 * flow_create_split_inner(metadata_subflow_0) 4258 * flow_create_split_inner(metadata_subflow_1) 4259 * flow_create_split_inner(metadata_subflow_2) 4260 * flow_create_split_metadata(meter_subflow_1) calls: 4261 * flow_create_split_inner(metadata_subflow_0) 4262 * flow_create_split_inner(metadata_subflow_1) 4263 * flow_create_split_inner(metadata_subflow_2) 4264 * 4265 * This provide flexible way to add new levels of flow splitting. 4266 * The all of successfully created subflows are included to the 4267 * parent flow dev_flow list. 4268 * 4269 * @param dev 4270 * Pointer to Ethernet device. 4271 * @param[in] flow 4272 * Parent flow structure pointer. 4273 * @param[in] attr 4274 * Flow rule attributes. 4275 * @param[in] items 4276 * Pattern specification (list terminated by the END pattern item). 4277 * @param[in] actions 4278 * Associated actions (list terminated by the END action). 4279 * @param[in] external 4280 * This flow rule is created by request external to PMD. 4281 * @param[in] flow_idx 4282 * This memory pool index to the flow. 4283 * @param[out] error 4284 * Perform verbose error reporting if not NULL. 4285 * @return 4286 * 0 on success, negative value otherwise 4287 */ 4288 static int 4289 flow_create_split_outer(struct rte_eth_dev *dev, 4290 struct rte_flow *flow, 4291 const struct rte_flow_attr *attr, 4292 const struct rte_flow_item items[], 4293 const struct rte_flow_action actions[], 4294 bool external, uint32_t flow_idx, 4295 struct rte_flow_error *error) 4296 { 4297 int ret; 4298 4299 ret = flow_create_split_meter(dev, flow, attr, items, 4300 actions, external, flow_idx, error); 4301 MLX5_ASSERT(ret <= 0); 4302 return ret; 4303 } 4304 4305 /** 4306 * Create a flow and add it to @p list. 4307 * 4308 * @param dev 4309 * Pointer to Ethernet device. 4310 * @param list 4311 * Pointer to a TAILQ flow list. If this parameter NULL, 4312 * no list insertion occurred, flow is just created, 4313 * this is caller's responsibility to track the 4314 * created flow. 4315 * @param[in] attr 4316 * Flow rule attributes. 4317 * @param[in] items 4318 * Pattern specification (list terminated by the END pattern item). 4319 * @param[in] actions 4320 * Associated actions (list terminated by the END action). 4321 * @param[in] external 4322 * This flow rule is created by request external to PMD. 4323 * @param[out] error 4324 * Perform verbose error reporting if not NULL. 4325 * 4326 * @return 4327 * A flow index on success, 0 otherwise and rte_errno is set. 4328 */ 4329 static uint32_t 4330 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 4331 const struct rte_flow_attr *attr, 4332 const struct rte_flow_item items[], 4333 const struct rte_flow_action actions[], 4334 bool external, struct rte_flow_error *error) 4335 { 4336 struct mlx5_priv *priv = dev->data->dev_private; 4337 struct rte_flow *flow = NULL; 4338 struct mlx5_flow *dev_flow; 4339 const struct rte_flow_action_rss *rss; 4340 union { 4341 struct rte_flow_expand_rss buf; 4342 uint8_t buffer[2048]; 4343 } expand_buffer; 4344 union { 4345 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 4346 uint8_t buffer[2048]; 4347 } actions_rx; 4348 union { 4349 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 4350 uint8_t buffer[2048]; 4351 } actions_hairpin_tx; 4352 union { 4353 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS]; 4354 uint8_t buffer[2048]; 4355 } items_tx; 4356 struct rte_flow_expand_rss *buf = &expand_buffer.buf; 4357 struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *) 4358 priv->rss_desc)[!!priv->flow_idx]; 4359 const struct rte_flow_action *p_actions_rx = actions; 4360 uint32_t i; 4361 uint32_t idx = 0; 4362 int hairpin_flow; 4363 uint32_t hairpin_id = 0; 4364 struct rte_flow_attr attr_tx = { .priority = 0 }; 4365 int ret; 4366 4367 hairpin_flow = flow_check_hairpin_split(dev, attr, actions); 4368 ret = flow_drv_validate(dev, attr, items, p_actions_rx, 4369 external, hairpin_flow, error); 4370 if (ret < 0) 4371 return 0; 4372 if (hairpin_flow > 0) { 4373 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) { 4374 rte_errno = EINVAL; 4375 return 0; 4376 } 4377 flow_hairpin_split(dev, actions, actions_rx.actions, 4378 actions_hairpin_tx.actions, items_tx.items, 4379 &hairpin_id); 4380 p_actions_rx = actions_rx.actions; 4381 } 4382 flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx); 4383 if (!flow) { 4384 rte_errno = ENOMEM; 4385 goto error_before_flow; 4386 } 4387 flow->drv_type = flow_get_drv_type(dev, attr); 4388 if (hairpin_id != 0) 4389 flow->hairpin_flow_id = hairpin_id; 4390 MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN && 4391 flow->drv_type < MLX5_FLOW_TYPE_MAX); 4392 memset(rss_desc, 0, sizeof(*rss_desc)); 4393 rss = flow_get_rss_action(p_actions_rx); 4394 if (rss) { 4395 /* 4396 * The following information is required by 4397 * mlx5_flow_hashfields_adjust() in advance. 4398 */ 4399 rss_desc->level = rss->level; 4400 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */ 4401 rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types; 4402 } 4403 flow->dev_handles = 0; 4404 if (rss && rss->types) { 4405 unsigned int graph_root; 4406 4407 graph_root = find_graph_root(items, rss->level); 4408 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer), 4409 items, rss->types, 4410 mlx5_support_expansion, 4411 graph_root); 4412 MLX5_ASSERT(ret > 0 && 4413 (unsigned int)ret < sizeof(expand_buffer.buffer)); 4414 } else { 4415 buf->entries = 1; 4416 buf->entry[0].pattern = (void *)(uintptr_t)items; 4417 } 4418 /* 4419 * Record the start index when there is a nested call. All sub-flows 4420 * need to be translated before another calling. 4421 * No need to use ping-pong buffer to save memory here. 4422 */ 4423 if (priv->flow_idx) { 4424 MLX5_ASSERT(!priv->flow_nested_idx); 4425 priv->flow_nested_idx = priv->flow_idx; 4426 } 4427 for (i = 0; i < buf->entries; ++i) { 4428 /* 4429 * The splitter may create multiple dev_flows, 4430 * depending on configuration. In the simplest 4431 * case it just creates unmodified original flow. 4432 */ 4433 ret = flow_create_split_outer(dev, flow, attr, 4434 buf->entry[i].pattern, 4435 p_actions_rx, external, idx, 4436 error); 4437 if (ret < 0) 4438 goto error; 4439 } 4440 /* Create the tx flow. */ 4441 if (hairpin_flow) { 4442 attr_tx.group = MLX5_HAIRPIN_TX_TABLE; 4443 attr_tx.ingress = 0; 4444 attr_tx.egress = 1; 4445 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items, 4446 actions_hairpin_tx.actions, 4447 idx, error); 4448 if (!dev_flow) 4449 goto error; 4450 dev_flow->flow = flow; 4451 dev_flow->external = 0; 4452 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 4453 dev_flow->handle, next); 4454 ret = flow_drv_translate(dev, dev_flow, &attr_tx, 4455 items_tx.items, 4456 actions_hairpin_tx.actions, error); 4457 if (ret < 0) 4458 goto error; 4459 } 4460 /* 4461 * Update the metadata register copy table. If extensive 4462 * metadata feature is enabled and registers are supported 4463 * we might create the extra rte_flow for each unique 4464 * MARK/FLAG action ID. 4465 * 4466 * The table is updated for ingress Flows only, because 4467 * the egress Flows belong to the different device and 4468 * copy table should be updated in peer NIC Rx domain. 4469 */ 4470 if (attr->ingress && 4471 (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) { 4472 ret = flow_mreg_update_copy_table(dev, flow, actions, error); 4473 if (ret) 4474 goto error; 4475 } 4476 /* 4477 * If the flow is external (from application) OR device is started, then 4478 * the flow will be applied immediately. 4479 */ 4480 if (external || dev->data->dev_started) { 4481 ret = flow_drv_apply(dev, flow, error); 4482 if (ret < 0) 4483 goto error; 4484 } 4485 if (list) 4486 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx, 4487 flow, next); 4488 flow_rxq_flags_set(dev, flow); 4489 /* Nested flow creation index recovery. */ 4490 priv->flow_idx = priv->flow_nested_idx; 4491 if (priv->flow_nested_idx) 4492 priv->flow_nested_idx = 0; 4493 return idx; 4494 error: 4495 MLX5_ASSERT(flow); 4496 ret = rte_errno; /* Save rte_errno before cleanup. */ 4497 flow_mreg_del_copy_action(dev, flow); 4498 flow_drv_destroy(dev, flow); 4499 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx); 4500 rte_errno = ret; /* Restore rte_errno. */ 4501 error_before_flow: 4502 ret = rte_errno; 4503 if (hairpin_id) 4504 mlx5_flow_id_release(priv->sh->flow_id_pool, 4505 hairpin_id); 4506 rte_errno = ret; 4507 priv->flow_idx = priv->flow_nested_idx; 4508 if (priv->flow_nested_idx) 4509 priv->flow_nested_idx = 0; 4510 return 0; 4511 } 4512 4513 /** 4514 * Create a dedicated flow rule on e-switch table 0 (root table), to direct all 4515 * incoming packets to table 1. 4516 * 4517 * Other flow rules, requested for group n, will be created in 4518 * e-switch table n+1. 4519 * Jump action to e-switch group n will be created to group n+1. 4520 * 4521 * Used when working in switchdev mode, to utilise advantages of table 1 4522 * and above. 4523 * 4524 * @param dev 4525 * Pointer to Ethernet device. 4526 * 4527 * @return 4528 * Pointer to flow on success, NULL otherwise and rte_errno is set. 4529 */ 4530 struct rte_flow * 4531 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev) 4532 { 4533 const struct rte_flow_attr attr = { 4534 .group = 0, 4535 .priority = 0, 4536 .ingress = 1, 4537 .egress = 0, 4538 .transfer = 1, 4539 }; 4540 const struct rte_flow_item pattern = { 4541 .type = RTE_FLOW_ITEM_TYPE_END, 4542 }; 4543 struct rte_flow_action_jump jump = { 4544 .group = 1, 4545 }; 4546 const struct rte_flow_action actions[] = { 4547 { 4548 .type = RTE_FLOW_ACTION_TYPE_JUMP, 4549 .conf = &jump, 4550 }, 4551 { 4552 .type = RTE_FLOW_ACTION_TYPE_END, 4553 }, 4554 }; 4555 struct mlx5_priv *priv = dev->data->dev_private; 4556 struct rte_flow_error error; 4557 4558 return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows, 4559 &attr, &pattern, 4560 actions, false, &error); 4561 } 4562 4563 /** 4564 * Validate a flow supported by the NIC. 4565 * 4566 * @see rte_flow_validate() 4567 * @see rte_flow_ops 4568 */ 4569 int 4570 mlx5_flow_validate(struct rte_eth_dev *dev, 4571 const struct rte_flow_attr *attr, 4572 const struct rte_flow_item items[], 4573 const struct rte_flow_action actions[], 4574 struct rte_flow_error *error) 4575 { 4576 int hairpin_flow; 4577 4578 hairpin_flow = flow_check_hairpin_split(dev, attr, actions); 4579 return flow_drv_validate(dev, attr, items, actions, 4580 true, hairpin_flow, error); 4581 } 4582 4583 /** 4584 * Create a flow. 4585 * 4586 * @see rte_flow_create() 4587 * @see rte_flow_ops 4588 */ 4589 struct rte_flow * 4590 mlx5_flow_create(struct rte_eth_dev *dev, 4591 const struct rte_flow_attr *attr, 4592 const struct rte_flow_item items[], 4593 const struct rte_flow_action actions[], 4594 struct rte_flow_error *error) 4595 { 4596 struct mlx5_priv *priv = dev->data->dev_private; 4597 4598 /* 4599 * If the device is not started yet, it is not allowed to created a 4600 * flow from application. PMD default flows and traffic control flows 4601 * are not affected. 4602 */ 4603 if (unlikely(!dev->data->dev_started)) { 4604 DRV_LOG(DEBUG, "port %u is not started when " 4605 "inserting a flow", dev->data->port_id); 4606 rte_flow_error_set(error, ENODEV, 4607 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 4608 NULL, 4609 "port not started"); 4610 return NULL; 4611 } 4612 return (void *)(uintptr_t)flow_list_create(dev, &priv->flows, 4613 attr, items, actions, true, error); 4614 } 4615 4616 /** 4617 * Destroy a flow in a list. 4618 * 4619 * @param dev 4620 * Pointer to Ethernet device. 4621 * @param list 4622 * Pointer to the Indexed flow list. If this parameter NULL, 4623 * there is no flow removal from the list. Be noted that as 4624 * flow is add to the indexed list, memory of the indexed 4625 * list points to maybe changed as flow destroyed. 4626 * @param[in] flow_idx 4627 * Index of flow to destroy. 4628 */ 4629 static void 4630 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 4631 uint32_t flow_idx) 4632 { 4633 struct mlx5_priv *priv = dev->data->dev_private; 4634 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 4635 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 4636 [MLX5_IPOOL_RTE_FLOW], flow_idx); 4637 4638 if (!flow) 4639 return; 4640 /* 4641 * Update RX queue flags only if port is started, otherwise it is 4642 * already clean. 4643 */ 4644 if (dev->data->dev_started) 4645 flow_rxq_flags_trim(dev, flow); 4646 if (flow->hairpin_flow_id) 4647 mlx5_flow_id_release(priv->sh->flow_id_pool, 4648 flow->hairpin_flow_id); 4649 flow_drv_destroy(dev, flow); 4650 if (list) 4651 ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, 4652 flow_idx, flow, next); 4653 flow_mreg_del_copy_action(dev, flow); 4654 if (flow->fdir) { 4655 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 4656 if (priv_fdir_flow->rix_flow == flow_idx) 4657 break; 4658 } 4659 if (priv_fdir_flow) { 4660 LIST_REMOVE(priv_fdir_flow, next); 4661 rte_free(priv_fdir_flow->fdir); 4662 rte_free(priv_fdir_flow); 4663 } 4664 } 4665 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 4666 } 4667 4668 /** 4669 * Destroy all flows. 4670 * 4671 * @param dev 4672 * Pointer to Ethernet device. 4673 * @param list 4674 * Pointer to the Indexed flow list. 4675 * @param active 4676 * If flushing is called avtively. 4677 */ 4678 void 4679 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active) 4680 { 4681 uint32_t num_flushed = 0; 4682 4683 while (*list) { 4684 flow_list_destroy(dev, list, *list); 4685 num_flushed++; 4686 } 4687 if (active) { 4688 DRV_LOG(INFO, "port %u: %u flows flushed before stopping", 4689 dev->data->port_id, num_flushed); 4690 } 4691 } 4692 4693 /** 4694 * Remove all flows. 4695 * 4696 * @param dev 4697 * Pointer to Ethernet device. 4698 * @param list 4699 * Pointer to the Indexed flow list. 4700 */ 4701 void 4702 mlx5_flow_stop(struct rte_eth_dev *dev, uint32_t *list) 4703 { 4704 struct mlx5_priv *priv = dev->data->dev_private; 4705 struct rte_flow *flow = NULL; 4706 uint32_t idx; 4707 4708 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx, 4709 flow, next) { 4710 flow_drv_remove(dev, flow); 4711 flow_mreg_stop_copy_action(dev, flow); 4712 } 4713 flow_mreg_del_default_copy_action(dev); 4714 flow_rxq_flags_clear(dev); 4715 } 4716 4717 /** 4718 * Add all flows. 4719 * 4720 * @param dev 4721 * Pointer to Ethernet device. 4722 * @param list 4723 * Pointer to the Indexed flow list. 4724 * 4725 * @return 4726 * 0 on success, a negative errno value otherwise and rte_errno is set. 4727 */ 4728 int 4729 mlx5_flow_start(struct rte_eth_dev *dev, uint32_t *list) 4730 { 4731 struct mlx5_priv *priv = dev->data->dev_private; 4732 struct rte_flow *flow = NULL; 4733 struct rte_flow_error error; 4734 uint32_t idx; 4735 int ret = 0; 4736 4737 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 4738 ret = flow_mreg_add_default_copy_action(dev, &error); 4739 if (ret < 0) 4740 return -rte_errno; 4741 /* Apply Flows created by application. */ 4742 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx, 4743 flow, next) { 4744 ret = flow_mreg_start_copy_action(dev, flow); 4745 if (ret < 0) 4746 goto error; 4747 ret = flow_drv_apply(dev, flow, &error); 4748 if (ret < 0) 4749 goto error; 4750 flow_rxq_flags_set(dev, flow); 4751 } 4752 return 0; 4753 error: 4754 ret = rte_errno; /* Save rte_errno before cleanup. */ 4755 mlx5_flow_stop(dev, list); 4756 rte_errno = ret; /* Restore rte_errno. */ 4757 return -rte_errno; 4758 } 4759 4760 /** 4761 * Stop all default actions for flows. 4762 * 4763 * @param dev 4764 * Pointer to Ethernet device. 4765 */ 4766 void 4767 mlx5_flow_stop_default(struct rte_eth_dev *dev) 4768 { 4769 flow_mreg_del_default_copy_action(dev); 4770 flow_rxq_flags_clear(dev); 4771 } 4772 4773 /** 4774 * Start all default actions for flows. 4775 * 4776 * @param dev 4777 * Pointer to Ethernet device. 4778 * @return 4779 * 0 on success, a negative errno value otherwise and rte_errno is set. 4780 */ 4781 int 4782 mlx5_flow_start_default(struct rte_eth_dev *dev) 4783 { 4784 struct rte_flow_error error; 4785 4786 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 4787 return flow_mreg_add_default_copy_action(dev, &error); 4788 } 4789 4790 /** 4791 * Allocate intermediate resources for flow creation. 4792 * 4793 * @param dev 4794 * Pointer to Ethernet device. 4795 */ 4796 void 4797 mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev) 4798 { 4799 struct mlx5_priv *priv = dev->data->dev_private; 4800 4801 if (!priv->inter_flows) { 4802 priv->inter_flows = rte_calloc(__func__, 1, 4803 MLX5_NUM_MAX_DEV_FLOWS * 4804 sizeof(struct mlx5_flow) + 4805 (sizeof(struct mlx5_flow_rss_desc) + 4806 sizeof(uint16_t) * UINT16_MAX) * 2, 0); 4807 if (!priv->inter_flows) { 4808 DRV_LOG(ERR, "can't allocate intermediate memory."); 4809 return; 4810 } 4811 } 4812 priv->rss_desc = &((struct mlx5_flow *)priv->inter_flows) 4813 [MLX5_NUM_MAX_DEV_FLOWS]; 4814 /* Reset the index. */ 4815 priv->flow_idx = 0; 4816 priv->flow_nested_idx = 0; 4817 } 4818 4819 /** 4820 * Free intermediate resources for flows. 4821 * 4822 * @param dev 4823 * Pointer to Ethernet device. 4824 */ 4825 void 4826 mlx5_flow_free_intermediate(struct rte_eth_dev *dev) 4827 { 4828 struct mlx5_priv *priv = dev->data->dev_private; 4829 4830 rte_free(priv->inter_flows); 4831 priv->inter_flows = NULL; 4832 } 4833 4834 /** 4835 * Verify the flow list is empty 4836 * 4837 * @param dev 4838 * Pointer to Ethernet device. 4839 * 4840 * @return the number of flows not released. 4841 */ 4842 int 4843 mlx5_flow_verify(struct rte_eth_dev *dev) 4844 { 4845 struct mlx5_priv *priv = dev->data->dev_private; 4846 struct rte_flow *flow; 4847 uint32_t idx; 4848 int ret = 0; 4849 4850 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx, 4851 flow, next) { 4852 DRV_LOG(DEBUG, "port %u flow %p still referenced", 4853 dev->data->port_id, (void *)flow); 4854 ++ret; 4855 } 4856 return ret; 4857 } 4858 4859 /** 4860 * Enable default hairpin egress flow. 4861 * 4862 * @param dev 4863 * Pointer to Ethernet device. 4864 * @param queue 4865 * The queue index. 4866 * 4867 * @return 4868 * 0 on success, a negative errno value otherwise and rte_errno is set. 4869 */ 4870 int 4871 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev, 4872 uint32_t queue) 4873 { 4874 struct mlx5_priv *priv = dev->data->dev_private; 4875 const struct rte_flow_attr attr = { 4876 .egress = 1, 4877 .priority = 0, 4878 }; 4879 struct mlx5_rte_flow_item_tx_queue queue_spec = { 4880 .queue = queue, 4881 }; 4882 struct mlx5_rte_flow_item_tx_queue queue_mask = { 4883 .queue = UINT32_MAX, 4884 }; 4885 struct rte_flow_item items[] = { 4886 { 4887 .type = (enum rte_flow_item_type) 4888 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE, 4889 .spec = &queue_spec, 4890 .last = NULL, 4891 .mask = &queue_mask, 4892 }, 4893 { 4894 .type = RTE_FLOW_ITEM_TYPE_END, 4895 }, 4896 }; 4897 struct rte_flow_action_jump jump = { 4898 .group = MLX5_HAIRPIN_TX_TABLE, 4899 }; 4900 struct rte_flow_action actions[2]; 4901 uint32_t flow_idx; 4902 struct rte_flow_error error; 4903 4904 actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP; 4905 actions[0].conf = &jump; 4906 actions[1].type = RTE_FLOW_ACTION_TYPE_END; 4907 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 4908 &attr, items, actions, false, &error); 4909 if (!flow_idx) { 4910 DRV_LOG(DEBUG, 4911 "Failed to create ctrl flow: rte_errno(%d)," 4912 " type(%d), message(%s)", 4913 rte_errno, error.type, 4914 error.message ? error.message : " (no stated reason)"); 4915 return -rte_errno; 4916 } 4917 return 0; 4918 } 4919 4920 /** 4921 * Enable a control flow configured from the control plane. 4922 * 4923 * @param dev 4924 * Pointer to Ethernet device. 4925 * @param eth_spec 4926 * An Ethernet flow spec to apply. 4927 * @param eth_mask 4928 * An Ethernet flow mask to apply. 4929 * @param vlan_spec 4930 * A VLAN flow spec to apply. 4931 * @param vlan_mask 4932 * A VLAN flow mask to apply. 4933 * 4934 * @return 4935 * 0 on success, a negative errno value otherwise and rte_errno is set. 4936 */ 4937 int 4938 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, 4939 struct rte_flow_item_eth *eth_spec, 4940 struct rte_flow_item_eth *eth_mask, 4941 struct rte_flow_item_vlan *vlan_spec, 4942 struct rte_flow_item_vlan *vlan_mask) 4943 { 4944 struct mlx5_priv *priv = dev->data->dev_private; 4945 const struct rte_flow_attr attr = { 4946 .ingress = 1, 4947 .priority = MLX5_FLOW_PRIO_RSVD, 4948 }; 4949 struct rte_flow_item items[] = { 4950 { 4951 .type = RTE_FLOW_ITEM_TYPE_ETH, 4952 .spec = eth_spec, 4953 .last = NULL, 4954 .mask = eth_mask, 4955 }, 4956 { 4957 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN : 4958 RTE_FLOW_ITEM_TYPE_END, 4959 .spec = vlan_spec, 4960 .last = NULL, 4961 .mask = vlan_mask, 4962 }, 4963 { 4964 .type = RTE_FLOW_ITEM_TYPE_END, 4965 }, 4966 }; 4967 uint16_t queue[priv->reta_idx_n]; 4968 struct rte_flow_action_rss action_rss = { 4969 .func = RTE_ETH_HASH_FUNCTION_DEFAULT, 4970 .level = 0, 4971 .types = priv->rss_conf.rss_hf, 4972 .key_len = priv->rss_conf.rss_key_len, 4973 .queue_num = priv->reta_idx_n, 4974 .key = priv->rss_conf.rss_key, 4975 .queue = queue, 4976 }; 4977 struct rte_flow_action actions[] = { 4978 { 4979 .type = RTE_FLOW_ACTION_TYPE_RSS, 4980 .conf = &action_rss, 4981 }, 4982 { 4983 .type = RTE_FLOW_ACTION_TYPE_END, 4984 }, 4985 }; 4986 uint32_t flow_idx; 4987 struct rte_flow_error error; 4988 unsigned int i; 4989 4990 if (!priv->reta_idx_n || !priv->rxqs_n) { 4991 return 0; 4992 } 4993 if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)) 4994 action_rss.types = 0; 4995 for (i = 0; i != priv->reta_idx_n; ++i) 4996 queue[i] = (*priv->reta_idx)[i]; 4997 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 4998 &attr, items, actions, false, &error); 4999 if (!flow_idx) 5000 return -rte_errno; 5001 return 0; 5002 } 5003 5004 /** 5005 * Enable a flow control configured from the control plane. 5006 * 5007 * @param dev 5008 * Pointer to Ethernet device. 5009 * @param eth_spec 5010 * An Ethernet flow spec to apply. 5011 * @param eth_mask 5012 * An Ethernet flow mask to apply. 5013 * 5014 * @return 5015 * 0 on success, a negative errno value otherwise and rte_errno is set. 5016 */ 5017 int 5018 mlx5_ctrl_flow(struct rte_eth_dev *dev, 5019 struct rte_flow_item_eth *eth_spec, 5020 struct rte_flow_item_eth *eth_mask) 5021 { 5022 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL); 5023 } 5024 5025 /** 5026 * Create default miss flow rule matching lacp traffic 5027 * 5028 * @param dev 5029 * Pointer to Ethernet device. 5030 * @param eth_spec 5031 * An Ethernet flow spec to apply. 5032 * 5033 * @return 5034 * 0 on success, a negative errno value otherwise and rte_errno is set. 5035 */ 5036 int 5037 mlx5_flow_lacp_miss(struct rte_eth_dev *dev) 5038 { 5039 struct mlx5_priv *priv = dev->data->dev_private; 5040 /* 5041 * The LACP matching is done by only using ether type since using 5042 * a multicast dst mac causes kernel to give low priority to this flow. 5043 */ 5044 static const struct rte_flow_item_eth lacp_spec = { 5045 .type = RTE_BE16(0x8809), 5046 }; 5047 static const struct rte_flow_item_eth lacp_mask = { 5048 .type = 0xffff, 5049 }; 5050 const struct rte_flow_attr attr = { 5051 .ingress = 1, 5052 }; 5053 struct rte_flow_item items[] = { 5054 { 5055 .type = RTE_FLOW_ITEM_TYPE_ETH, 5056 .spec = &lacp_spec, 5057 .mask = &lacp_mask, 5058 }, 5059 { 5060 .type = RTE_FLOW_ITEM_TYPE_END, 5061 }, 5062 }; 5063 struct rte_flow_action actions[] = { 5064 { 5065 .type = (enum rte_flow_action_type) 5066 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS, 5067 }, 5068 { 5069 .type = RTE_FLOW_ACTION_TYPE_END, 5070 }, 5071 }; 5072 struct rte_flow_error error; 5073 uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5074 &attr, items, actions, false, &error); 5075 5076 if (!flow_idx) 5077 return -rte_errno; 5078 return 0; 5079 } 5080 5081 /** 5082 * Destroy a flow. 5083 * 5084 * @see rte_flow_destroy() 5085 * @see rte_flow_ops 5086 */ 5087 int 5088 mlx5_flow_destroy(struct rte_eth_dev *dev, 5089 struct rte_flow *flow, 5090 struct rte_flow_error *error __rte_unused) 5091 { 5092 struct mlx5_priv *priv = dev->data->dev_private; 5093 5094 flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow); 5095 return 0; 5096 } 5097 5098 /** 5099 * Destroy all flows. 5100 * 5101 * @see rte_flow_flush() 5102 * @see rte_flow_ops 5103 */ 5104 int 5105 mlx5_flow_flush(struct rte_eth_dev *dev, 5106 struct rte_flow_error *error __rte_unused) 5107 { 5108 struct mlx5_priv *priv = dev->data->dev_private; 5109 5110 mlx5_flow_list_flush(dev, &priv->flows, false); 5111 return 0; 5112 } 5113 5114 /** 5115 * Isolated mode. 5116 * 5117 * @see rte_flow_isolate() 5118 * @see rte_flow_ops 5119 */ 5120 int 5121 mlx5_flow_isolate(struct rte_eth_dev *dev, 5122 int enable, 5123 struct rte_flow_error *error) 5124 { 5125 struct mlx5_priv *priv = dev->data->dev_private; 5126 5127 if (dev->data->dev_started) { 5128 rte_flow_error_set(error, EBUSY, 5129 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5130 NULL, 5131 "port must be stopped first"); 5132 return -rte_errno; 5133 } 5134 priv->isolated = !!enable; 5135 if (enable) 5136 dev->dev_ops = &mlx5_os_dev_ops_isolate; 5137 else 5138 dev->dev_ops = &mlx5_os_dev_ops; 5139 return 0; 5140 } 5141 5142 /** 5143 * Query a flow. 5144 * 5145 * @see rte_flow_query() 5146 * @see rte_flow_ops 5147 */ 5148 static int 5149 flow_drv_query(struct rte_eth_dev *dev, 5150 uint32_t flow_idx, 5151 const struct rte_flow_action *actions, 5152 void *data, 5153 struct rte_flow_error *error) 5154 { 5155 struct mlx5_priv *priv = dev->data->dev_private; 5156 const struct mlx5_flow_driver_ops *fops; 5157 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 5158 [MLX5_IPOOL_RTE_FLOW], 5159 flow_idx); 5160 enum mlx5_flow_drv_type ftype; 5161 5162 if (!flow) { 5163 return rte_flow_error_set(error, ENOENT, 5164 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5165 NULL, 5166 "invalid flow handle"); 5167 } 5168 ftype = flow->drv_type; 5169 MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX); 5170 fops = flow_get_drv_ops(ftype); 5171 5172 return fops->query(dev, flow, actions, data, error); 5173 } 5174 5175 /** 5176 * Query a flow. 5177 * 5178 * @see rte_flow_query() 5179 * @see rte_flow_ops 5180 */ 5181 int 5182 mlx5_flow_query(struct rte_eth_dev *dev, 5183 struct rte_flow *flow, 5184 const struct rte_flow_action *actions, 5185 void *data, 5186 struct rte_flow_error *error) 5187 { 5188 int ret; 5189 5190 ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data, 5191 error); 5192 if (ret < 0) 5193 return ret; 5194 return 0; 5195 } 5196 5197 /** 5198 * Convert a flow director filter to a generic flow. 5199 * 5200 * @param dev 5201 * Pointer to Ethernet device. 5202 * @param fdir_filter 5203 * Flow director filter to add. 5204 * @param attributes 5205 * Generic flow parameters structure. 5206 * 5207 * @return 5208 * 0 on success, a negative errno value otherwise and rte_errno is set. 5209 */ 5210 static int 5211 flow_fdir_filter_convert(struct rte_eth_dev *dev, 5212 const struct rte_eth_fdir_filter *fdir_filter, 5213 struct mlx5_fdir *attributes) 5214 { 5215 struct mlx5_priv *priv = dev->data->dev_private; 5216 const struct rte_eth_fdir_input *input = &fdir_filter->input; 5217 const struct rte_eth_fdir_masks *mask = 5218 &dev->data->dev_conf.fdir_conf.mask; 5219 5220 /* Validate queue number. */ 5221 if (fdir_filter->action.rx_queue >= priv->rxqs_n) { 5222 DRV_LOG(ERR, "port %u invalid queue number %d", 5223 dev->data->port_id, fdir_filter->action.rx_queue); 5224 rte_errno = EINVAL; 5225 return -rte_errno; 5226 } 5227 attributes->attr.ingress = 1; 5228 attributes->items[0] = (struct rte_flow_item) { 5229 .type = RTE_FLOW_ITEM_TYPE_ETH, 5230 .spec = &attributes->l2, 5231 .mask = &attributes->l2_mask, 5232 }; 5233 switch (fdir_filter->action.behavior) { 5234 case RTE_ETH_FDIR_ACCEPT: 5235 attributes->actions[0] = (struct rte_flow_action){ 5236 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 5237 .conf = &attributes->queue, 5238 }; 5239 break; 5240 case RTE_ETH_FDIR_REJECT: 5241 attributes->actions[0] = (struct rte_flow_action){ 5242 .type = RTE_FLOW_ACTION_TYPE_DROP, 5243 }; 5244 break; 5245 default: 5246 DRV_LOG(ERR, "port %u invalid behavior %d", 5247 dev->data->port_id, 5248 fdir_filter->action.behavior); 5249 rte_errno = ENOTSUP; 5250 return -rte_errno; 5251 } 5252 attributes->queue.index = fdir_filter->action.rx_queue; 5253 /* Handle L3. */ 5254 switch (fdir_filter->input.flow_type) { 5255 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 5256 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 5257 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 5258 attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){ 5259 .src_addr = input->flow.ip4_flow.src_ip, 5260 .dst_addr = input->flow.ip4_flow.dst_ip, 5261 .time_to_live = input->flow.ip4_flow.ttl, 5262 .type_of_service = input->flow.ip4_flow.tos, 5263 }; 5264 attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){ 5265 .src_addr = mask->ipv4_mask.src_ip, 5266 .dst_addr = mask->ipv4_mask.dst_ip, 5267 .time_to_live = mask->ipv4_mask.ttl, 5268 .type_of_service = mask->ipv4_mask.tos, 5269 .next_proto_id = mask->ipv4_mask.proto, 5270 }; 5271 attributes->items[1] = (struct rte_flow_item){ 5272 .type = RTE_FLOW_ITEM_TYPE_IPV4, 5273 .spec = &attributes->l3, 5274 .mask = &attributes->l3_mask, 5275 }; 5276 break; 5277 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 5278 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 5279 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 5280 attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){ 5281 .hop_limits = input->flow.ipv6_flow.hop_limits, 5282 .proto = input->flow.ipv6_flow.proto, 5283 }; 5284 5285 memcpy(attributes->l3.ipv6.hdr.src_addr, 5286 input->flow.ipv6_flow.src_ip, 5287 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 5288 memcpy(attributes->l3.ipv6.hdr.dst_addr, 5289 input->flow.ipv6_flow.dst_ip, 5290 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 5291 memcpy(attributes->l3_mask.ipv6.hdr.src_addr, 5292 mask->ipv6_mask.src_ip, 5293 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 5294 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr, 5295 mask->ipv6_mask.dst_ip, 5296 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 5297 attributes->items[1] = (struct rte_flow_item){ 5298 .type = RTE_FLOW_ITEM_TYPE_IPV6, 5299 .spec = &attributes->l3, 5300 .mask = &attributes->l3_mask, 5301 }; 5302 break; 5303 default: 5304 DRV_LOG(ERR, "port %u invalid flow type%d", 5305 dev->data->port_id, fdir_filter->input.flow_type); 5306 rte_errno = ENOTSUP; 5307 return -rte_errno; 5308 } 5309 /* Handle L4. */ 5310 switch (fdir_filter->input.flow_type) { 5311 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 5312 attributes->l4.udp.hdr = (struct rte_udp_hdr){ 5313 .src_port = input->flow.udp4_flow.src_port, 5314 .dst_port = input->flow.udp4_flow.dst_port, 5315 }; 5316 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){ 5317 .src_port = mask->src_port_mask, 5318 .dst_port = mask->dst_port_mask, 5319 }; 5320 attributes->items[2] = (struct rte_flow_item){ 5321 .type = RTE_FLOW_ITEM_TYPE_UDP, 5322 .spec = &attributes->l4, 5323 .mask = &attributes->l4_mask, 5324 }; 5325 break; 5326 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 5327 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){ 5328 .src_port = input->flow.tcp4_flow.src_port, 5329 .dst_port = input->flow.tcp4_flow.dst_port, 5330 }; 5331 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){ 5332 .src_port = mask->src_port_mask, 5333 .dst_port = mask->dst_port_mask, 5334 }; 5335 attributes->items[2] = (struct rte_flow_item){ 5336 .type = RTE_FLOW_ITEM_TYPE_TCP, 5337 .spec = &attributes->l4, 5338 .mask = &attributes->l4_mask, 5339 }; 5340 break; 5341 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 5342 attributes->l4.udp.hdr = (struct rte_udp_hdr){ 5343 .src_port = input->flow.udp6_flow.src_port, 5344 .dst_port = input->flow.udp6_flow.dst_port, 5345 }; 5346 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){ 5347 .src_port = mask->src_port_mask, 5348 .dst_port = mask->dst_port_mask, 5349 }; 5350 attributes->items[2] = (struct rte_flow_item){ 5351 .type = RTE_FLOW_ITEM_TYPE_UDP, 5352 .spec = &attributes->l4, 5353 .mask = &attributes->l4_mask, 5354 }; 5355 break; 5356 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 5357 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){ 5358 .src_port = input->flow.tcp6_flow.src_port, 5359 .dst_port = input->flow.tcp6_flow.dst_port, 5360 }; 5361 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){ 5362 .src_port = mask->src_port_mask, 5363 .dst_port = mask->dst_port_mask, 5364 }; 5365 attributes->items[2] = (struct rte_flow_item){ 5366 .type = RTE_FLOW_ITEM_TYPE_TCP, 5367 .spec = &attributes->l4, 5368 .mask = &attributes->l4_mask, 5369 }; 5370 break; 5371 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 5372 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 5373 break; 5374 default: 5375 DRV_LOG(ERR, "port %u invalid flow type%d", 5376 dev->data->port_id, fdir_filter->input.flow_type); 5377 rte_errno = ENOTSUP; 5378 return -rte_errno; 5379 } 5380 return 0; 5381 } 5382 5383 #define FLOW_FDIR_CMP(f1, f2, fld) \ 5384 memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld)) 5385 5386 /** 5387 * Compare two FDIR flows. If items and actions are identical, the two flows are 5388 * regarded as same. 5389 * 5390 * @param dev 5391 * Pointer to Ethernet device. 5392 * @param f1 5393 * FDIR flow to compare. 5394 * @param f2 5395 * FDIR flow to compare. 5396 * 5397 * @return 5398 * Zero on match, 1 otherwise. 5399 */ 5400 static int 5401 flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2) 5402 { 5403 if (FLOW_FDIR_CMP(f1, f2, attr) || 5404 FLOW_FDIR_CMP(f1, f2, l2) || 5405 FLOW_FDIR_CMP(f1, f2, l2_mask) || 5406 FLOW_FDIR_CMP(f1, f2, l3) || 5407 FLOW_FDIR_CMP(f1, f2, l3_mask) || 5408 FLOW_FDIR_CMP(f1, f2, l4) || 5409 FLOW_FDIR_CMP(f1, f2, l4_mask) || 5410 FLOW_FDIR_CMP(f1, f2, actions[0].type)) 5411 return 1; 5412 if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE && 5413 FLOW_FDIR_CMP(f1, f2, queue)) 5414 return 1; 5415 return 0; 5416 } 5417 5418 /** 5419 * Search device flow list to find out a matched FDIR flow. 5420 * 5421 * @param dev 5422 * Pointer to Ethernet device. 5423 * @param fdir_flow 5424 * FDIR flow to lookup. 5425 * 5426 * @return 5427 * Index of flow if found, 0 otherwise. 5428 */ 5429 static uint32_t 5430 flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow) 5431 { 5432 struct mlx5_priv *priv = dev->data->dev_private; 5433 uint32_t flow_idx = 0; 5434 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5435 5436 MLX5_ASSERT(fdir_flow); 5437 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 5438 if (!flow_fdir_cmp(priv_fdir_flow->fdir, fdir_flow)) { 5439 DRV_LOG(DEBUG, "port %u found FDIR flow %u", 5440 dev->data->port_id, flow_idx); 5441 flow_idx = priv_fdir_flow->rix_flow; 5442 break; 5443 } 5444 } 5445 return flow_idx; 5446 } 5447 5448 /** 5449 * Add new flow director filter and store it in list. 5450 * 5451 * @param dev 5452 * Pointer to Ethernet device. 5453 * @param fdir_filter 5454 * Flow director filter to add. 5455 * 5456 * @return 5457 * 0 on success, a negative errno value otherwise and rte_errno is set. 5458 */ 5459 static int 5460 flow_fdir_filter_add(struct rte_eth_dev *dev, 5461 const struct rte_eth_fdir_filter *fdir_filter) 5462 { 5463 struct mlx5_priv *priv = dev->data->dev_private; 5464 struct mlx5_fdir *fdir_flow; 5465 struct rte_flow *flow; 5466 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5467 uint32_t flow_idx; 5468 int ret; 5469 5470 fdir_flow = rte_zmalloc(__func__, sizeof(*fdir_flow), 0); 5471 if (!fdir_flow) { 5472 rte_errno = ENOMEM; 5473 return -rte_errno; 5474 } 5475 ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow); 5476 if (ret) 5477 goto error; 5478 flow_idx = flow_fdir_filter_lookup(dev, fdir_flow); 5479 if (flow_idx) { 5480 rte_errno = EEXIST; 5481 goto error; 5482 } 5483 priv_fdir_flow = rte_zmalloc(__func__, sizeof(struct mlx5_fdir_flow), 5484 0); 5485 if (!priv_fdir_flow) { 5486 rte_errno = ENOMEM; 5487 goto error; 5488 } 5489 flow_idx = flow_list_create(dev, &priv->flows, &fdir_flow->attr, 5490 fdir_flow->items, fdir_flow->actions, true, 5491 NULL); 5492 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 5493 if (!flow) 5494 goto error; 5495 flow->fdir = 1; 5496 priv_fdir_flow->fdir = fdir_flow; 5497 priv_fdir_flow->rix_flow = flow_idx; 5498 LIST_INSERT_HEAD(&priv->fdir_flows, priv_fdir_flow, next); 5499 DRV_LOG(DEBUG, "port %u created FDIR flow %p", 5500 dev->data->port_id, (void *)flow); 5501 return 0; 5502 error: 5503 rte_free(priv_fdir_flow); 5504 rte_free(fdir_flow); 5505 return -rte_errno; 5506 } 5507 5508 /** 5509 * Delete specific filter. 5510 * 5511 * @param dev 5512 * Pointer to Ethernet device. 5513 * @param fdir_filter 5514 * Filter to be deleted. 5515 * 5516 * @return 5517 * 0 on success, a negative errno value otherwise and rte_errno is set. 5518 */ 5519 static int 5520 flow_fdir_filter_delete(struct rte_eth_dev *dev, 5521 const struct rte_eth_fdir_filter *fdir_filter) 5522 { 5523 struct mlx5_priv *priv = dev->data->dev_private; 5524 uint32_t flow_idx; 5525 struct mlx5_fdir fdir_flow = { 5526 .attr.group = 0, 5527 }; 5528 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5529 int ret; 5530 5531 ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow); 5532 if (ret) 5533 return -rte_errno; 5534 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 5535 /* Find the fdir in priv list */ 5536 if (!flow_fdir_cmp(priv_fdir_flow->fdir, &fdir_flow)) 5537 break; 5538 } 5539 if (!priv_fdir_flow) 5540 return 0; 5541 LIST_REMOVE(priv_fdir_flow, next); 5542 flow_idx = priv_fdir_flow->rix_flow; 5543 flow_list_destroy(dev, &priv->flows, flow_idx); 5544 rte_free(priv_fdir_flow->fdir); 5545 rte_free(priv_fdir_flow); 5546 DRV_LOG(DEBUG, "port %u deleted FDIR flow %u", 5547 dev->data->port_id, flow_idx); 5548 return 0; 5549 } 5550 5551 /** 5552 * Update queue for specific filter. 5553 * 5554 * @param dev 5555 * Pointer to Ethernet device. 5556 * @param fdir_filter 5557 * Filter to be updated. 5558 * 5559 * @return 5560 * 0 on success, a negative errno value otherwise and rte_errno is set. 5561 */ 5562 static int 5563 flow_fdir_filter_update(struct rte_eth_dev *dev, 5564 const struct rte_eth_fdir_filter *fdir_filter) 5565 { 5566 int ret; 5567 5568 ret = flow_fdir_filter_delete(dev, fdir_filter); 5569 if (ret) 5570 return ret; 5571 return flow_fdir_filter_add(dev, fdir_filter); 5572 } 5573 5574 /** 5575 * Flush all filters. 5576 * 5577 * @param dev 5578 * Pointer to Ethernet device. 5579 */ 5580 static void 5581 flow_fdir_filter_flush(struct rte_eth_dev *dev) 5582 { 5583 struct mlx5_priv *priv = dev->data->dev_private; 5584 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5585 5586 while (!LIST_EMPTY(&priv->fdir_flows)) { 5587 priv_fdir_flow = LIST_FIRST(&priv->fdir_flows); 5588 LIST_REMOVE(priv_fdir_flow, next); 5589 flow_list_destroy(dev, &priv->flows, priv_fdir_flow->rix_flow); 5590 rte_free(priv_fdir_flow->fdir); 5591 rte_free(priv_fdir_flow); 5592 } 5593 } 5594 5595 /** 5596 * Get flow director information. 5597 * 5598 * @param dev 5599 * Pointer to Ethernet device. 5600 * @param[out] fdir_info 5601 * Resulting flow director information. 5602 */ 5603 static void 5604 flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) 5605 { 5606 struct rte_eth_fdir_masks *mask = 5607 &dev->data->dev_conf.fdir_conf.mask; 5608 5609 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode; 5610 fdir_info->guarant_spc = 0; 5611 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask)); 5612 fdir_info->max_flexpayload = 0; 5613 fdir_info->flow_types_mask[0] = 0; 5614 fdir_info->flex_payload_unit = 0; 5615 fdir_info->max_flex_payload_segment_num = 0; 5616 fdir_info->flex_payload_limit = 0; 5617 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf)); 5618 } 5619 5620 /** 5621 * Deal with flow director operations. 5622 * 5623 * @param dev 5624 * Pointer to Ethernet device. 5625 * @param filter_op 5626 * Operation to perform. 5627 * @param arg 5628 * Pointer to operation-specific structure. 5629 * 5630 * @return 5631 * 0 on success, a negative errno value otherwise and rte_errno is set. 5632 */ 5633 static int 5634 flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, 5635 void *arg) 5636 { 5637 enum rte_fdir_mode fdir_mode = 5638 dev->data->dev_conf.fdir_conf.mode; 5639 5640 if (filter_op == RTE_ETH_FILTER_NOP) 5641 return 0; 5642 if (fdir_mode != RTE_FDIR_MODE_PERFECT && 5643 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) { 5644 DRV_LOG(ERR, "port %u flow director mode %d not supported", 5645 dev->data->port_id, fdir_mode); 5646 rte_errno = EINVAL; 5647 return -rte_errno; 5648 } 5649 switch (filter_op) { 5650 case RTE_ETH_FILTER_ADD: 5651 return flow_fdir_filter_add(dev, arg); 5652 case RTE_ETH_FILTER_UPDATE: 5653 return flow_fdir_filter_update(dev, arg); 5654 case RTE_ETH_FILTER_DELETE: 5655 return flow_fdir_filter_delete(dev, arg); 5656 case RTE_ETH_FILTER_FLUSH: 5657 flow_fdir_filter_flush(dev); 5658 break; 5659 case RTE_ETH_FILTER_INFO: 5660 flow_fdir_info_get(dev, arg); 5661 break; 5662 default: 5663 DRV_LOG(DEBUG, "port %u unknown operation %u", 5664 dev->data->port_id, filter_op); 5665 rte_errno = EINVAL; 5666 return -rte_errno; 5667 } 5668 return 0; 5669 } 5670 5671 /** 5672 * Manage filter operations. 5673 * 5674 * @param dev 5675 * Pointer to Ethernet device structure. 5676 * @param filter_type 5677 * Filter type. 5678 * @param filter_op 5679 * Operation to perform. 5680 * @param arg 5681 * Pointer to operation-specific structure. 5682 * 5683 * @return 5684 * 0 on success, a negative errno value otherwise and rte_errno is set. 5685 */ 5686 int 5687 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, 5688 enum rte_filter_type filter_type, 5689 enum rte_filter_op filter_op, 5690 void *arg) 5691 { 5692 switch (filter_type) { 5693 case RTE_ETH_FILTER_GENERIC: 5694 if (filter_op != RTE_ETH_FILTER_GET) { 5695 rte_errno = EINVAL; 5696 return -rte_errno; 5697 } 5698 *(const void **)arg = &mlx5_flow_ops; 5699 return 0; 5700 case RTE_ETH_FILTER_FDIR: 5701 return flow_fdir_ctrl_func(dev, filter_op, arg); 5702 default: 5703 DRV_LOG(ERR, "port %u filter type (%d) not supported", 5704 dev->data->port_id, filter_type); 5705 rte_errno = ENOTSUP; 5706 return -rte_errno; 5707 } 5708 return 0; 5709 } 5710 5711 /** 5712 * Create the needed meter and suffix tables. 5713 * 5714 * @param[in] dev 5715 * Pointer to Ethernet device. 5716 * @param[in] fm 5717 * Pointer to the flow meter. 5718 * 5719 * @return 5720 * Pointer to table set on success, NULL otherwise. 5721 */ 5722 struct mlx5_meter_domains_infos * 5723 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev, 5724 const struct mlx5_flow_meter *fm) 5725 { 5726 const struct mlx5_flow_driver_ops *fops; 5727 5728 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5729 return fops->create_mtr_tbls(dev, fm); 5730 } 5731 5732 /** 5733 * Destroy the meter table set. 5734 * 5735 * @param[in] dev 5736 * Pointer to Ethernet device. 5737 * @param[in] tbl 5738 * Pointer to the meter table set. 5739 * 5740 * @return 5741 * 0 on success. 5742 */ 5743 int 5744 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev, 5745 struct mlx5_meter_domains_infos *tbls) 5746 { 5747 const struct mlx5_flow_driver_ops *fops; 5748 5749 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5750 return fops->destroy_mtr_tbls(dev, tbls); 5751 } 5752 5753 /** 5754 * Create policer rules. 5755 * 5756 * @param[in] dev 5757 * Pointer to Ethernet device. 5758 * @param[in] fm 5759 * Pointer to flow meter structure. 5760 * @param[in] attr 5761 * Pointer to flow attributes. 5762 * 5763 * @return 5764 * 0 on success, -1 otherwise. 5765 */ 5766 int 5767 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev, 5768 struct mlx5_flow_meter *fm, 5769 const struct rte_flow_attr *attr) 5770 { 5771 const struct mlx5_flow_driver_ops *fops; 5772 5773 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5774 return fops->create_policer_rules(dev, fm, attr); 5775 } 5776 5777 /** 5778 * Destroy policer rules. 5779 * 5780 * @param[in] fm 5781 * Pointer to flow meter structure. 5782 * @param[in] attr 5783 * Pointer to flow attributes. 5784 * 5785 * @return 5786 * 0 on success, -1 otherwise. 5787 */ 5788 int 5789 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev, 5790 struct mlx5_flow_meter *fm, 5791 const struct rte_flow_attr *attr) 5792 { 5793 const struct mlx5_flow_driver_ops *fops; 5794 5795 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5796 return fops->destroy_policer_rules(dev, fm, attr); 5797 } 5798 5799 /** 5800 * Allocate a counter. 5801 * 5802 * @param[in] dev 5803 * Pointer to Ethernet device structure. 5804 * 5805 * @return 5806 * Index to allocated counter on success, 0 otherwise. 5807 */ 5808 uint32_t 5809 mlx5_counter_alloc(struct rte_eth_dev *dev) 5810 { 5811 const struct mlx5_flow_driver_ops *fops; 5812 struct rte_flow_attr attr = { .transfer = 0 }; 5813 5814 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 5815 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5816 return fops->counter_alloc(dev); 5817 } 5818 DRV_LOG(ERR, 5819 "port %u counter allocate is not supported.", 5820 dev->data->port_id); 5821 return 0; 5822 } 5823 5824 /** 5825 * Free a counter. 5826 * 5827 * @param[in] dev 5828 * Pointer to Ethernet device structure. 5829 * @param[in] cnt 5830 * Index to counter to be free. 5831 */ 5832 void 5833 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt) 5834 { 5835 const struct mlx5_flow_driver_ops *fops; 5836 struct rte_flow_attr attr = { .transfer = 0 }; 5837 5838 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 5839 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5840 fops->counter_free(dev, cnt); 5841 return; 5842 } 5843 DRV_LOG(ERR, 5844 "port %u counter free is not supported.", 5845 dev->data->port_id); 5846 } 5847 5848 /** 5849 * Query counter statistics. 5850 * 5851 * @param[in] dev 5852 * Pointer to Ethernet device structure. 5853 * @param[in] cnt 5854 * Index to counter to query. 5855 * @param[in] clear 5856 * Set to clear counter statistics. 5857 * @param[out] pkts 5858 * The counter hits packets number to save. 5859 * @param[out] bytes 5860 * The counter hits bytes number to save. 5861 * 5862 * @return 5863 * 0 on success, a negative errno value otherwise. 5864 */ 5865 int 5866 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt, 5867 bool clear, uint64_t *pkts, uint64_t *bytes) 5868 { 5869 const struct mlx5_flow_driver_ops *fops; 5870 struct rte_flow_attr attr = { .transfer = 0 }; 5871 5872 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 5873 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5874 return fops->counter_query(dev, cnt, clear, pkts, bytes); 5875 } 5876 DRV_LOG(ERR, 5877 "port %u counter query is not supported.", 5878 dev->data->port_id); 5879 return -ENOTSUP; 5880 } 5881 5882 #define MLX5_POOL_QUERY_FREQ_US 1000000 5883 5884 /** 5885 * Get number of all validate pools. 5886 * 5887 * @param[in] sh 5888 * Pointer to mlx5_dev_ctx_shared object. 5889 * 5890 * @return 5891 * The number of all validate pools. 5892 */ 5893 static uint32_t 5894 mlx5_get_all_valid_pool_count(struct mlx5_dev_ctx_shared *sh) 5895 { 5896 int i; 5897 uint32_t pools_n = 0; 5898 5899 for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) 5900 pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid); 5901 return pools_n; 5902 } 5903 5904 /** 5905 * Set the periodic procedure for triggering asynchronous batch queries for all 5906 * the counter pools. 5907 * 5908 * @param[in] sh 5909 * Pointer to mlx5_dev_ctx_shared object. 5910 */ 5911 void 5912 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh) 5913 { 5914 uint32_t pools_n, us; 5915 5916 pools_n = mlx5_get_all_valid_pool_count(sh); 5917 us = MLX5_POOL_QUERY_FREQ_US / pools_n; 5918 DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us); 5919 if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) { 5920 sh->cmng.query_thread_on = 0; 5921 DRV_LOG(ERR, "Cannot reinitialize query alarm"); 5922 } else { 5923 sh->cmng.query_thread_on = 1; 5924 } 5925 } 5926 5927 /** 5928 * The periodic procedure for triggering asynchronous batch queries for all the 5929 * counter pools. This function is probably called by the host thread. 5930 * 5931 * @param[in] arg 5932 * The parameter for the alarm process. 5933 */ 5934 void 5935 mlx5_flow_query_alarm(void *arg) 5936 { 5937 struct mlx5_dev_ctx_shared *sh = arg; 5938 struct mlx5_devx_obj *dcs; 5939 uint16_t offset; 5940 int ret; 5941 uint8_t batch = sh->cmng.batch; 5942 uint8_t age = sh->cmng.age; 5943 uint16_t pool_index = sh->cmng.pool_index; 5944 struct mlx5_pools_container *cont; 5945 struct mlx5_flow_counter_pool *pool; 5946 int cont_loop = MLX5_CCONT_TYPE_MAX; 5947 5948 if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES) 5949 goto set_alarm; 5950 next_container: 5951 cont = MLX5_CNT_CONTAINER(sh, batch, age); 5952 rte_spinlock_lock(&cont->resize_sl); 5953 if (!cont->pools) { 5954 rte_spinlock_unlock(&cont->resize_sl); 5955 /* Check if all the containers are empty. */ 5956 if (unlikely(--cont_loop == 0)) 5957 goto set_alarm; 5958 batch ^= 0x1; 5959 pool_index = 0; 5960 if (batch == 0 && pool_index == 0) { 5961 age ^= 0x1; 5962 sh->cmng.batch = batch; 5963 sh->cmng.age = age; 5964 } 5965 goto next_container; 5966 } 5967 pool = cont->pools[pool_index]; 5968 rte_spinlock_unlock(&cont->resize_sl); 5969 if (pool->raw_hw) 5970 /* There is a pool query in progress. */ 5971 goto set_alarm; 5972 pool->raw_hw = 5973 LIST_FIRST(&sh->cmng.free_stat_raws); 5974 if (!pool->raw_hw) 5975 /* No free counter statistics raw memory. */ 5976 goto set_alarm; 5977 dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read 5978 (&pool->a64_dcs); 5979 offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL; 5980 /* 5981 * Identify the counters released between query trigger and query 5982 * handle more effiecntly. The counter released in this gap period 5983 * should wait for a new round of query as the new arrived packets 5984 * will not be taken into account. 5985 */ 5986 pool->query_gen++; 5987 ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL - 5988 offset, NULL, NULL, 5989 pool->raw_hw->mem_mng->dm->id, 5990 (void *)(uintptr_t) 5991 (pool->raw_hw->data + offset), 5992 sh->devx_comp, 5993 (uint64_t)(uintptr_t)pool); 5994 if (ret) { 5995 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID" 5996 " %d", pool->min_dcs->id); 5997 pool->raw_hw = NULL; 5998 goto set_alarm; 5999 } 6000 pool->raw_hw->min_dcs_id = dcs->id; 6001 LIST_REMOVE(pool->raw_hw, next); 6002 sh->cmng.pending_queries++; 6003 pool_index++; 6004 if (pool_index >= rte_atomic16_read(&cont->n_valid)) { 6005 batch ^= 0x1; 6006 pool_index = 0; 6007 if (batch == 0 && pool_index == 0) 6008 age ^= 0x1; 6009 } 6010 set_alarm: 6011 sh->cmng.batch = batch; 6012 sh->cmng.pool_index = pool_index; 6013 sh->cmng.age = age; 6014 mlx5_set_query_alarm(sh); 6015 } 6016 6017 /** 6018 * Check and callback event for new aged flow in the counter pool 6019 * 6020 * @param[in] sh 6021 * Pointer to mlx5_dev_ctx_shared object. 6022 * @param[in] pool 6023 * Pointer to Current counter pool. 6024 */ 6025 static void 6026 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh, 6027 struct mlx5_flow_counter_pool *pool) 6028 { 6029 struct mlx5_priv *priv; 6030 struct mlx5_flow_counter *cnt; 6031 struct mlx5_age_info *age_info; 6032 struct mlx5_age_param *age_param; 6033 struct mlx5_counter_stats_raw *cur = pool->raw_hw; 6034 struct mlx5_counter_stats_raw *prev = pool->raw; 6035 uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10); 6036 uint32_t i; 6037 6038 for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) { 6039 cnt = MLX5_POOL_GET_CNT(pool, i); 6040 age_param = MLX5_CNT_TO_AGE(cnt); 6041 if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE) 6042 continue; 6043 if (cur->data[i].hits != prev->data[i].hits) { 6044 age_param->expire = curr + age_param->timeout; 6045 continue; 6046 } 6047 if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2)) 6048 continue; 6049 /** 6050 * Hold the lock first, or if between the 6051 * state AGE_TMOUT and tailq operation the 6052 * release happened, the release procedure 6053 * may delete a non-existent tailq node. 6054 */ 6055 priv = rte_eth_devices[age_param->port_id].data->dev_private; 6056 age_info = GET_PORT_AGE_INFO(priv); 6057 rte_spinlock_lock(&age_info->aged_sl); 6058 /* If the cpmset fails, release happens. */ 6059 if (rte_atomic16_cmpset((volatile uint16_t *) 6060 &age_param->state, 6061 AGE_CANDIDATE, 6062 AGE_TMOUT) == 6063 AGE_CANDIDATE) { 6064 TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next); 6065 MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW); 6066 } 6067 rte_spinlock_unlock(&age_info->aged_sl); 6068 } 6069 for (i = 0; i < sh->max_port; i++) { 6070 age_info = &sh->port[i].age_info; 6071 if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW)) 6072 continue; 6073 if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) 6074 _rte_eth_dev_callback_process 6075 (&rte_eth_devices[sh->port[i].devx_ih_port_id], 6076 RTE_ETH_EVENT_FLOW_AGED, NULL); 6077 age_info->flags = 0; 6078 } 6079 } 6080 6081 /** 6082 * Handler for the HW respond about ready values from an asynchronous batch 6083 * query. This function is probably called by the host thread. 6084 * 6085 * @param[in] sh 6086 * The pointer to the shared device context. 6087 * @param[in] async_id 6088 * The Devx async ID. 6089 * @param[in] status 6090 * The status of the completion. 6091 */ 6092 void 6093 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh, 6094 uint64_t async_id, int status) 6095 { 6096 struct mlx5_flow_counter_pool *pool = 6097 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id; 6098 struct mlx5_counter_stats_raw *raw_to_free; 6099 uint8_t age = !!IS_AGE_POOL(pool); 6100 uint8_t query_gen = pool->query_gen ^ 1; 6101 struct mlx5_pools_container *cont = 6102 MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool), age); 6103 6104 if (unlikely(status)) { 6105 raw_to_free = pool->raw_hw; 6106 } else { 6107 raw_to_free = pool->raw; 6108 if (IS_AGE_POOL(pool)) 6109 mlx5_flow_aging_check(sh, pool); 6110 rte_spinlock_lock(&pool->sl); 6111 pool->raw = pool->raw_hw; 6112 rte_spinlock_unlock(&pool->sl); 6113 /* Be sure the new raw counters data is updated in memory. */ 6114 rte_cio_wmb(); 6115 if (!TAILQ_EMPTY(&pool->counters[query_gen])) { 6116 rte_spinlock_lock(&cont->csl); 6117 TAILQ_CONCAT(&cont->counters, 6118 &pool->counters[query_gen], next); 6119 rte_spinlock_unlock(&cont->csl); 6120 } 6121 } 6122 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next); 6123 pool->raw_hw = NULL; 6124 sh->cmng.pending_queries--; 6125 } 6126 6127 /** 6128 * Translate the rte_flow group index to HW table value. 6129 * 6130 * @param[in] attributes 6131 * Pointer to flow attributes 6132 * @param[in] external 6133 * Value is part of flow rule created by request external to PMD. 6134 * @param[in] group 6135 * rte_flow group index value. 6136 * @param[out] fdb_def_rule 6137 * Whether fdb jump to table 1 is configured. 6138 * @param[out] table 6139 * HW table value. 6140 * @param[out] error 6141 * Pointer to error structure. 6142 * 6143 * @return 6144 * 0 on success, a negative errno value otherwise and rte_errno is set. 6145 */ 6146 int 6147 mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external, 6148 uint32_t group, bool fdb_def_rule, uint32_t *table, 6149 struct rte_flow_error *error) 6150 { 6151 if (attributes->transfer && external && fdb_def_rule) { 6152 if (group == UINT32_MAX) 6153 return rte_flow_error_set 6154 (error, EINVAL, 6155 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 6156 NULL, 6157 "group index not supported"); 6158 *table = group + 1; 6159 } else { 6160 *table = group; 6161 } 6162 return 0; 6163 } 6164 6165 /** 6166 * Discover availability of metadata reg_c's. 6167 * 6168 * Iteratively use test flows to check availability. 6169 * 6170 * @param[in] dev 6171 * Pointer to the Ethernet device structure. 6172 * 6173 * @return 6174 * 0 on success, a negative errno value otherwise and rte_errno is set. 6175 */ 6176 int 6177 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev) 6178 { 6179 struct mlx5_priv *priv = dev->data->dev_private; 6180 struct mlx5_dev_config *config = &priv->config; 6181 enum modify_reg idx; 6182 int n = 0; 6183 6184 /* reg_c[0] and reg_c[1] are reserved. */ 6185 config->flow_mreg_c[n++] = REG_C_0; 6186 config->flow_mreg_c[n++] = REG_C_1; 6187 /* Discover availability of other reg_c's. */ 6188 for (idx = REG_C_2; idx <= REG_C_7; ++idx) { 6189 struct rte_flow_attr attr = { 6190 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 6191 .priority = MLX5_FLOW_PRIO_RSVD, 6192 .ingress = 1, 6193 }; 6194 struct rte_flow_item items[] = { 6195 [0] = { 6196 .type = RTE_FLOW_ITEM_TYPE_END, 6197 }, 6198 }; 6199 struct rte_flow_action actions[] = { 6200 [0] = { 6201 .type = (enum rte_flow_action_type) 6202 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 6203 .conf = &(struct mlx5_flow_action_copy_mreg){ 6204 .src = REG_C_1, 6205 .dst = idx, 6206 }, 6207 }, 6208 [1] = { 6209 .type = RTE_FLOW_ACTION_TYPE_JUMP, 6210 .conf = &(struct rte_flow_action_jump){ 6211 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 6212 }, 6213 }, 6214 [2] = { 6215 .type = RTE_FLOW_ACTION_TYPE_END, 6216 }, 6217 }; 6218 uint32_t flow_idx; 6219 struct rte_flow *flow; 6220 struct rte_flow_error error; 6221 6222 if (!config->dv_flow_en) 6223 break; 6224 /* Create internal flow, validation skips copy action. */ 6225 flow_idx = flow_list_create(dev, NULL, &attr, items, 6226 actions, false, &error); 6227 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 6228 flow_idx); 6229 if (!flow) 6230 continue; 6231 if (dev->data->dev_started || !flow_drv_apply(dev, flow, NULL)) 6232 config->flow_mreg_c[n++] = idx; 6233 flow_list_destroy(dev, NULL, flow_idx); 6234 } 6235 for (; n < MLX5_MREG_C_NUM; ++n) 6236 config->flow_mreg_c[n] = REG_NONE; 6237 return 0; 6238 } 6239 6240 /** 6241 * Dump flow raw hw data to file 6242 * 6243 * @param[in] dev 6244 * The pointer to Ethernet device. 6245 * @param[in] file 6246 * A pointer to a file for output. 6247 * @param[out] error 6248 * Perform verbose error reporting if not NULL. PMDs initialize this 6249 * structure in case of error only. 6250 * @return 6251 * 0 on success, a nagative value otherwise. 6252 */ 6253 int 6254 mlx5_flow_dev_dump(struct rte_eth_dev *dev, 6255 FILE *file, 6256 struct rte_flow_error *error __rte_unused) 6257 { 6258 struct mlx5_priv *priv = dev->data->dev_private; 6259 struct mlx5_dev_ctx_shared *sh = priv->sh; 6260 6261 return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain, 6262 sh->tx_domain, file); 6263 } 6264 6265 /** 6266 * Get aged-out flows. 6267 * 6268 * @param[in] dev 6269 * Pointer to the Ethernet device structure. 6270 * @param[in] context 6271 * The address of an array of pointers to the aged-out flows contexts. 6272 * @param[in] nb_countexts 6273 * The length of context array pointers. 6274 * @param[out] error 6275 * Perform verbose error reporting if not NULL. Initialized in case of 6276 * error only. 6277 * 6278 * @return 6279 * how many contexts get in success, otherwise negative errno value. 6280 * if nb_contexts is 0, return the amount of all aged contexts. 6281 * if nb_contexts is not 0 , return the amount of aged flows reported 6282 * in the context array. 6283 */ 6284 int 6285 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts, 6286 uint32_t nb_contexts, struct rte_flow_error *error) 6287 { 6288 const struct mlx5_flow_driver_ops *fops; 6289 struct rte_flow_attr attr = { .transfer = 0 }; 6290 6291 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6292 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6293 return fops->get_aged_flows(dev, contexts, nb_contexts, 6294 error); 6295 } 6296 DRV_LOG(ERR, 6297 "port %u get aged flows is not supported.", 6298 dev->data->port_id); 6299 return -ENOTSUP; 6300 } 6301