1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2016 Mellanox Technologies, Ltd 4 */ 5 6 #include <netinet/in.h> 7 #include <sys/queue.h> 8 #include <stdalign.h> 9 #include <stdint.h> 10 #include <string.h> 11 #include <stdbool.h> 12 13 /* Verbs header. */ 14 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 15 #ifdef PEDANTIC 16 #pragma GCC diagnostic ignored "-Wpedantic" 17 #endif 18 #include <infiniband/verbs.h> 19 #ifdef PEDANTIC 20 #pragma GCC diagnostic error "-Wpedantic" 21 #endif 22 23 #include <rte_common.h> 24 #include <rte_ether.h> 25 #include <rte_ethdev_driver.h> 26 #include <rte_flow.h> 27 #include <rte_cycles.h> 28 #include <rte_flow_driver.h> 29 #include <rte_malloc.h> 30 #include <rte_ip.h> 31 32 #include <mlx5_glue.h> 33 #include <mlx5_devx_cmds.h> 34 #include <mlx5_prm.h> 35 36 #include "mlx5_defs.h" 37 #include "mlx5.h" 38 #include "mlx5_flow.h" 39 #include "mlx5_rxtx.h" 40 41 /** Device flow drivers. */ 42 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 43 extern const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops; 44 #endif 45 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops; 46 47 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops; 48 49 const struct mlx5_flow_driver_ops *flow_drv_ops[] = { 50 [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops, 51 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 52 [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops, 53 #endif 54 [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops, 55 [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops 56 }; 57 58 enum mlx5_expansion { 59 MLX5_EXPANSION_ROOT, 60 MLX5_EXPANSION_ROOT_OUTER, 61 MLX5_EXPANSION_ROOT_ETH_VLAN, 62 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN, 63 MLX5_EXPANSION_OUTER_ETH, 64 MLX5_EXPANSION_OUTER_ETH_VLAN, 65 MLX5_EXPANSION_OUTER_VLAN, 66 MLX5_EXPANSION_OUTER_IPV4, 67 MLX5_EXPANSION_OUTER_IPV4_UDP, 68 MLX5_EXPANSION_OUTER_IPV4_TCP, 69 MLX5_EXPANSION_OUTER_IPV6, 70 MLX5_EXPANSION_OUTER_IPV6_UDP, 71 MLX5_EXPANSION_OUTER_IPV6_TCP, 72 MLX5_EXPANSION_VXLAN, 73 MLX5_EXPANSION_VXLAN_GPE, 74 MLX5_EXPANSION_GRE, 75 MLX5_EXPANSION_MPLS, 76 MLX5_EXPANSION_ETH, 77 MLX5_EXPANSION_ETH_VLAN, 78 MLX5_EXPANSION_VLAN, 79 MLX5_EXPANSION_IPV4, 80 MLX5_EXPANSION_IPV4_UDP, 81 MLX5_EXPANSION_IPV4_TCP, 82 MLX5_EXPANSION_IPV6, 83 MLX5_EXPANSION_IPV6_UDP, 84 MLX5_EXPANSION_IPV6_TCP, 85 }; 86 87 /** Supported expansion of items. */ 88 static const struct rte_flow_expand_node mlx5_support_expansion[] = { 89 [MLX5_EXPANSION_ROOT] = { 90 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 91 MLX5_EXPANSION_IPV4, 92 MLX5_EXPANSION_IPV6), 93 .type = RTE_FLOW_ITEM_TYPE_END, 94 }, 95 [MLX5_EXPANSION_ROOT_OUTER] = { 96 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH, 97 MLX5_EXPANSION_OUTER_IPV4, 98 MLX5_EXPANSION_OUTER_IPV6), 99 .type = RTE_FLOW_ITEM_TYPE_END, 100 }, 101 [MLX5_EXPANSION_ROOT_ETH_VLAN] = { 102 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN), 103 .type = RTE_FLOW_ITEM_TYPE_END, 104 }, 105 [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = { 106 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH_VLAN), 107 .type = RTE_FLOW_ITEM_TYPE_END, 108 }, 109 [MLX5_EXPANSION_OUTER_ETH] = { 110 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 111 MLX5_EXPANSION_OUTER_IPV6, 112 MLX5_EXPANSION_MPLS), 113 .type = RTE_FLOW_ITEM_TYPE_ETH, 114 .rss_types = 0, 115 }, 116 [MLX5_EXPANSION_OUTER_ETH_VLAN] = { 117 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN), 118 .type = RTE_FLOW_ITEM_TYPE_ETH, 119 .rss_types = 0, 120 }, 121 [MLX5_EXPANSION_OUTER_VLAN] = { 122 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 123 MLX5_EXPANSION_OUTER_IPV6), 124 .type = RTE_FLOW_ITEM_TYPE_VLAN, 125 }, 126 [MLX5_EXPANSION_OUTER_IPV4] = { 127 .next = RTE_FLOW_EXPAND_RSS_NEXT 128 (MLX5_EXPANSION_OUTER_IPV4_UDP, 129 MLX5_EXPANSION_OUTER_IPV4_TCP, 130 MLX5_EXPANSION_GRE, 131 MLX5_EXPANSION_IPV4, 132 MLX5_EXPANSION_IPV6), 133 .type = RTE_FLOW_ITEM_TYPE_IPV4, 134 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 135 ETH_RSS_NONFRAG_IPV4_OTHER, 136 }, 137 [MLX5_EXPANSION_OUTER_IPV4_UDP] = { 138 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 139 MLX5_EXPANSION_VXLAN_GPE), 140 .type = RTE_FLOW_ITEM_TYPE_UDP, 141 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 142 }, 143 [MLX5_EXPANSION_OUTER_IPV4_TCP] = { 144 .type = RTE_FLOW_ITEM_TYPE_TCP, 145 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 146 }, 147 [MLX5_EXPANSION_OUTER_IPV6] = { 148 .next = RTE_FLOW_EXPAND_RSS_NEXT 149 (MLX5_EXPANSION_OUTER_IPV6_UDP, 150 MLX5_EXPANSION_OUTER_IPV6_TCP, 151 MLX5_EXPANSION_IPV4, 152 MLX5_EXPANSION_IPV6), 153 .type = RTE_FLOW_ITEM_TYPE_IPV6, 154 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 155 ETH_RSS_NONFRAG_IPV6_OTHER, 156 }, 157 [MLX5_EXPANSION_OUTER_IPV6_UDP] = { 158 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 159 MLX5_EXPANSION_VXLAN_GPE), 160 .type = RTE_FLOW_ITEM_TYPE_UDP, 161 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 162 }, 163 [MLX5_EXPANSION_OUTER_IPV6_TCP] = { 164 .type = RTE_FLOW_ITEM_TYPE_TCP, 165 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 166 }, 167 [MLX5_EXPANSION_VXLAN] = { 168 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 169 MLX5_EXPANSION_IPV4, 170 MLX5_EXPANSION_IPV6), 171 .type = RTE_FLOW_ITEM_TYPE_VXLAN, 172 }, 173 [MLX5_EXPANSION_VXLAN_GPE] = { 174 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 175 MLX5_EXPANSION_IPV4, 176 MLX5_EXPANSION_IPV6), 177 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE, 178 }, 179 [MLX5_EXPANSION_GRE] = { 180 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4), 181 .type = RTE_FLOW_ITEM_TYPE_GRE, 182 }, 183 [MLX5_EXPANSION_MPLS] = { 184 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 185 MLX5_EXPANSION_IPV6), 186 .type = RTE_FLOW_ITEM_TYPE_MPLS, 187 }, 188 [MLX5_EXPANSION_ETH] = { 189 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 190 MLX5_EXPANSION_IPV6), 191 .type = RTE_FLOW_ITEM_TYPE_ETH, 192 }, 193 [MLX5_EXPANSION_ETH_VLAN] = { 194 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN), 195 .type = RTE_FLOW_ITEM_TYPE_ETH, 196 }, 197 [MLX5_EXPANSION_VLAN] = { 198 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 199 MLX5_EXPANSION_IPV6), 200 .type = RTE_FLOW_ITEM_TYPE_VLAN, 201 }, 202 [MLX5_EXPANSION_IPV4] = { 203 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP, 204 MLX5_EXPANSION_IPV4_TCP), 205 .type = RTE_FLOW_ITEM_TYPE_IPV4, 206 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 207 ETH_RSS_NONFRAG_IPV4_OTHER, 208 }, 209 [MLX5_EXPANSION_IPV4_UDP] = { 210 .type = RTE_FLOW_ITEM_TYPE_UDP, 211 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 212 }, 213 [MLX5_EXPANSION_IPV4_TCP] = { 214 .type = RTE_FLOW_ITEM_TYPE_TCP, 215 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 216 }, 217 [MLX5_EXPANSION_IPV6] = { 218 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP, 219 MLX5_EXPANSION_IPV6_TCP), 220 .type = RTE_FLOW_ITEM_TYPE_IPV6, 221 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 222 ETH_RSS_NONFRAG_IPV6_OTHER, 223 }, 224 [MLX5_EXPANSION_IPV6_UDP] = { 225 .type = RTE_FLOW_ITEM_TYPE_UDP, 226 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 227 }, 228 [MLX5_EXPANSION_IPV6_TCP] = { 229 .type = RTE_FLOW_ITEM_TYPE_TCP, 230 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 231 }, 232 }; 233 234 static const struct rte_flow_ops mlx5_flow_ops = { 235 .validate = mlx5_flow_validate, 236 .create = mlx5_flow_create, 237 .destroy = mlx5_flow_destroy, 238 .flush = mlx5_flow_flush, 239 .isolate = mlx5_flow_isolate, 240 .query = mlx5_flow_query, 241 .dev_dump = mlx5_flow_dev_dump, 242 .get_aged_flows = mlx5_flow_get_aged_flows, 243 }; 244 245 /* Convert FDIR request to Generic flow. */ 246 struct mlx5_fdir { 247 struct rte_flow_attr attr; 248 struct rte_flow_item items[4]; 249 struct rte_flow_item_eth l2; 250 struct rte_flow_item_eth l2_mask; 251 union { 252 struct rte_flow_item_ipv4 ipv4; 253 struct rte_flow_item_ipv6 ipv6; 254 } l3; 255 union { 256 struct rte_flow_item_ipv4 ipv4; 257 struct rte_flow_item_ipv6 ipv6; 258 } l3_mask; 259 union { 260 struct rte_flow_item_udp udp; 261 struct rte_flow_item_tcp tcp; 262 } l4; 263 union { 264 struct rte_flow_item_udp udp; 265 struct rte_flow_item_tcp tcp; 266 } l4_mask; 267 struct rte_flow_action actions[2]; 268 struct rte_flow_action_queue queue; 269 }; 270 271 /* Map of Verbs to Flow priority with 8 Verbs priorities. */ 272 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = { 273 { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 }, 274 }; 275 276 /* Map of Verbs to Flow priority with 16 Verbs priorities. */ 277 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = { 278 { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 }, 279 { 9, 10, 11 }, { 12, 13, 14 }, 280 }; 281 282 /* Tunnel information. */ 283 struct mlx5_flow_tunnel_info { 284 uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */ 285 uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */ 286 }; 287 288 static struct mlx5_flow_tunnel_info tunnels_info[] = { 289 { 290 .tunnel = MLX5_FLOW_LAYER_VXLAN, 291 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP, 292 }, 293 { 294 .tunnel = MLX5_FLOW_LAYER_GENEVE, 295 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP, 296 }, 297 { 298 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE, 299 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP, 300 }, 301 { 302 .tunnel = MLX5_FLOW_LAYER_GRE, 303 .ptype = RTE_PTYPE_TUNNEL_GRE, 304 }, 305 { 306 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP, 307 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP, 308 }, 309 { 310 .tunnel = MLX5_FLOW_LAYER_MPLS, 311 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE, 312 }, 313 { 314 .tunnel = MLX5_FLOW_LAYER_NVGRE, 315 .ptype = RTE_PTYPE_TUNNEL_NVGRE, 316 }, 317 { 318 .tunnel = MLX5_FLOW_LAYER_IPIP, 319 .ptype = RTE_PTYPE_TUNNEL_IP, 320 }, 321 { 322 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP, 323 .ptype = RTE_PTYPE_TUNNEL_IP, 324 }, 325 { 326 .tunnel = MLX5_FLOW_LAYER_GTP, 327 .ptype = RTE_PTYPE_TUNNEL_GTPU, 328 }, 329 }; 330 331 /** 332 * Translate tag ID to register. 333 * 334 * @param[in] dev 335 * Pointer to the Ethernet device structure. 336 * @param[in] feature 337 * The feature that request the register. 338 * @param[in] id 339 * The request register ID. 340 * @param[out] error 341 * Error description in case of any. 342 * 343 * @return 344 * The request register on success, a negative errno 345 * value otherwise and rte_errno is set. 346 */ 347 int 348 mlx5_flow_get_reg_id(struct rte_eth_dev *dev, 349 enum mlx5_feature_name feature, 350 uint32_t id, 351 struct rte_flow_error *error) 352 { 353 struct mlx5_priv *priv = dev->data->dev_private; 354 struct mlx5_dev_config *config = &priv->config; 355 enum modify_reg start_reg; 356 bool skip_mtr_reg = false; 357 358 switch (feature) { 359 case MLX5_HAIRPIN_RX: 360 return REG_B; 361 case MLX5_HAIRPIN_TX: 362 return REG_A; 363 case MLX5_METADATA_RX: 364 switch (config->dv_xmeta_en) { 365 case MLX5_XMETA_MODE_LEGACY: 366 return REG_B; 367 case MLX5_XMETA_MODE_META16: 368 return REG_C_0; 369 case MLX5_XMETA_MODE_META32: 370 return REG_C_1; 371 } 372 break; 373 case MLX5_METADATA_TX: 374 return REG_A; 375 case MLX5_METADATA_FDB: 376 switch (config->dv_xmeta_en) { 377 case MLX5_XMETA_MODE_LEGACY: 378 return REG_NONE; 379 case MLX5_XMETA_MODE_META16: 380 return REG_C_0; 381 case MLX5_XMETA_MODE_META32: 382 return REG_C_1; 383 } 384 break; 385 case MLX5_FLOW_MARK: 386 switch (config->dv_xmeta_en) { 387 case MLX5_XMETA_MODE_LEGACY: 388 return REG_NONE; 389 case MLX5_XMETA_MODE_META16: 390 return REG_C_1; 391 case MLX5_XMETA_MODE_META32: 392 return REG_C_0; 393 } 394 break; 395 case MLX5_MTR_SFX: 396 /* 397 * If meter color and flow match share one register, flow match 398 * should use the meter color register for match. 399 */ 400 if (priv->mtr_reg_share) 401 return priv->mtr_color_reg; 402 else 403 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 404 REG_C_3; 405 case MLX5_MTR_COLOR: 406 MLX5_ASSERT(priv->mtr_color_reg != REG_NONE); 407 return priv->mtr_color_reg; 408 case MLX5_COPY_MARK: 409 /* 410 * Metadata COPY_MARK register using is in meter suffix sub 411 * flow while with meter. It's safe to share the same register. 412 */ 413 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3; 414 case MLX5_APP_TAG: 415 /* 416 * If meter is enable, it will engage the register for color 417 * match and flow match. If meter color match is not using the 418 * REG_C_2, need to skip the REG_C_x be used by meter color 419 * match. 420 * If meter is disable, free to use all available registers. 421 */ 422 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 423 (priv->mtr_reg_share ? REG_C_3 : REG_C_4); 424 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2); 425 if (id > (REG_C_7 - start_reg)) 426 return rte_flow_error_set(error, EINVAL, 427 RTE_FLOW_ERROR_TYPE_ITEM, 428 NULL, "invalid tag id"); 429 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NONE) 430 return rte_flow_error_set(error, ENOTSUP, 431 RTE_FLOW_ERROR_TYPE_ITEM, 432 NULL, "unsupported tag id"); 433 /* 434 * This case means meter is using the REG_C_x great than 2. 435 * Take care not to conflict with meter color REG_C_x. 436 * If the available index REG_C_y >= REG_C_x, skip the 437 * color register. 438 */ 439 if (skip_mtr_reg && config->flow_mreg_c 440 [id + start_reg - REG_C_0] >= priv->mtr_color_reg) { 441 if (id >= (REG_C_7 - start_reg)) 442 return rte_flow_error_set(error, EINVAL, 443 RTE_FLOW_ERROR_TYPE_ITEM, 444 NULL, "invalid tag id"); 445 if (config->flow_mreg_c 446 [id + 1 + start_reg - REG_C_0] != REG_NONE) 447 return config->flow_mreg_c 448 [id + 1 + start_reg - REG_C_0]; 449 return rte_flow_error_set(error, ENOTSUP, 450 RTE_FLOW_ERROR_TYPE_ITEM, 451 NULL, "unsupported tag id"); 452 } 453 return config->flow_mreg_c[id + start_reg - REG_C_0]; 454 } 455 MLX5_ASSERT(false); 456 return rte_flow_error_set(error, EINVAL, 457 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 458 NULL, "invalid feature name"); 459 } 460 461 /** 462 * Check extensive flow metadata register support. 463 * 464 * @param dev 465 * Pointer to rte_eth_dev structure. 466 * 467 * @return 468 * True if device supports extensive flow metadata register, otherwise false. 469 */ 470 bool 471 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev) 472 { 473 struct mlx5_priv *priv = dev->data->dev_private; 474 struct mlx5_dev_config *config = &priv->config; 475 476 /* 477 * Having available reg_c can be regarded inclusively as supporting 478 * extensive flow metadata register, which could mean, 479 * - metadata register copy action by modify header. 480 * - 16 modify header actions is supported. 481 * - reg_c's are preserved across different domain (FDB and NIC) on 482 * packet loopback by flow lookup miss. 483 */ 484 return config->flow_mreg_c[2] != REG_NONE; 485 } 486 487 /** 488 * Discover the maximum number of priority available. 489 * 490 * @param[in] dev 491 * Pointer to the Ethernet device structure. 492 * 493 * @return 494 * number of supported flow priority on success, a negative errno 495 * value otherwise and rte_errno is set. 496 */ 497 int 498 mlx5_flow_discover_priorities(struct rte_eth_dev *dev) 499 { 500 struct mlx5_priv *priv = dev->data->dev_private; 501 struct { 502 struct ibv_flow_attr attr; 503 struct ibv_flow_spec_eth eth; 504 struct ibv_flow_spec_action_drop drop; 505 } flow_attr = { 506 .attr = { 507 .num_of_specs = 2, 508 .port = (uint8_t)priv->dev_port, 509 }, 510 .eth = { 511 .type = IBV_FLOW_SPEC_ETH, 512 .size = sizeof(struct ibv_flow_spec_eth), 513 }, 514 .drop = { 515 .size = sizeof(struct ibv_flow_spec_action_drop), 516 .type = IBV_FLOW_SPEC_ACTION_DROP, 517 }, 518 }; 519 struct ibv_flow *flow; 520 struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev); 521 uint16_t vprio[] = { 8, 16 }; 522 int i; 523 int priority = 0; 524 525 if (!drop) { 526 rte_errno = ENOTSUP; 527 return -rte_errno; 528 } 529 for (i = 0; i != RTE_DIM(vprio); i++) { 530 flow_attr.attr.priority = vprio[i] - 1; 531 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr); 532 if (!flow) 533 break; 534 claim_zero(mlx5_glue->destroy_flow(flow)); 535 priority = vprio[i]; 536 } 537 mlx5_hrxq_drop_release(dev); 538 switch (priority) { 539 case 8: 540 priority = RTE_DIM(priority_map_3); 541 break; 542 case 16: 543 priority = RTE_DIM(priority_map_5); 544 break; 545 default: 546 rte_errno = ENOTSUP; 547 DRV_LOG(ERR, 548 "port %u verbs maximum priority: %d expected 8/16", 549 dev->data->port_id, priority); 550 return -rte_errno; 551 } 552 DRV_LOG(INFO, "port %u flow maximum priority: %d", 553 dev->data->port_id, priority); 554 return priority; 555 } 556 557 /** 558 * Adjust flow priority based on the highest layer and the request priority. 559 * 560 * @param[in] dev 561 * Pointer to the Ethernet device structure. 562 * @param[in] priority 563 * The rule base priority. 564 * @param[in] subpriority 565 * The priority based on the items. 566 * 567 * @return 568 * The new priority. 569 */ 570 uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority, 571 uint32_t subpriority) 572 { 573 uint32_t res = 0; 574 struct mlx5_priv *priv = dev->data->dev_private; 575 576 switch (priv->config.flow_prio) { 577 case RTE_DIM(priority_map_3): 578 res = priority_map_3[priority][subpriority]; 579 break; 580 case RTE_DIM(priority_map_5): 581 res = priority_map_5[priority][subpriority]; 582 break; 583 } 584 return res; 585 } 586 587 /** 588 * Verify the @p item specifications (spec, last, mask) are compatible with the 589 * NIC capabilities. 590 * 591 * @param[in] item 592 * Item specification. 593 * @param[in] mask 594 * @p item->mask or flow default bit-masks. 595 * @param[in] nic_mask 596 * Bit-masks covering supported fields by the NIC to compare with user mask. 597 * @param[in] size 598 * Bit-masks size in bytes. 599 * @param[out] error 600 * Pointer to error structure. 601 * 602 * @return 603 * 0 on success, a negative errno value otherwise and rte_errno is set. 604 */ 605 int 606 mlx5_flow_item_acceptable(const struct rte_flow_item *item, 607 const uint8_t *mask, 608 const uint8_t *nic_mask, 609 unsigned int size, 610 struct rte_flow_error *error) 611 { 612 unsigned int i; 613 614 MLX5_ASSERT(nic_mask); 615 for (i = 0; i < size; ++i) 616 if ((nic_mask[i] | mask[i]) != nic_mask[i]) 617 return rte_flow_error_set(error, ENOTSUP, 618 RTE_FLOW_ERROR_TYPE_ITEM, 619 item, 620 "mask enables non supported" 621 " bits"); 622 if (!item->spec && (item->mask || item->last)) 623 return rte_flow_error_set(error, EINVAL, 624 RTE_FLOW_ERROR_TYPE_ITEM, item, 625 "mask/last without a spec is not" 626 " supported"); 627 if (item->spec && item->last) { 628 uint8_t spec[size]; 629 uint8_t last[size]; 630 unsigned int i; 631 int ret; 632 633 for (i = 0; i < size; ++i) { 634 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i]; 635 last[i] = ((const uint8_t *)item->last)[i] & mask[i]; 636 } 637 ret = memcmp(spec, last, size); 638 if (ret != 0) 639 return rte_flow_error_set(error, EINVAL, 640 RTE_FLOW_ERROR_TYPE_ITEM, 641 item, 642 "range is not valid"); 643 } 644 return 0; 645 } 646 647 /** 648 * Adjust the hash fields according to the @p flow information. 649 * 650 * @param[in] dev_flow. 651 * Pointer to the mlx5_flow. 652 * @param[in] tunnel 653 * 1 when the hash field is for a tunnel item. 654 * @param[in] layer_types 655 * ETH_RSS_* types. 656 * @param[in] hash_fields 657 * Item hash fields. 658 * 659 * @return 660 * The hash fields that should be used. 661 */ 662 uint64_t 663 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc, 664 int tunnel __rte_unused, uint64_t layer_types, 665 uint64_t hash_fields) 666 { 667 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 668 int rss_request_inner = rss_desc->level >= 2; 669 670 /* Check RSS hash level for tunnel. */ 671 if (tunnel && rss_request_inner) 672 hash_fields |= IBV_RX_HASH_INNER; 673 else if (tunnel || rss_request_inner) 674 return 0; 675 #endif 676 /* Check if requested layer matches RSS hash fields. */ 677 if (!(rss_desc->types & layer_types)) 678 return 0; 679 return hash_fields; 680 } 681 682 /** 683 * Lookup and set the ptype in the data Rx part. A single Ptype can be used, 684 * if several tunnel rules are used on this queue, the tunnel ptype will be 685 * cleared. 686 * 687 * @param rxq_ctrl 688 * Rx queue to update. 689 */ 690 static void 691 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl) 692 { 693 unsigned int i; 694 uint32_t tunnel_ptype = 0; 695 696 /* Look up for the ptype to use. */ 697 for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) { 698 if (!rxq_ctrl->flow_tunnels_n[i]) 699 continue; 700 if (!tunnel_ptype) { 701 tunnel_ptype = tunnels_info[i].ptype; 702 } else { 703 tunnel_ptype = 0; 704 break; 705 } 706 } 707 rxq_ctrl->rxq.tunnel = tunnel_ptype; 708 } 709 710 /** 711 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive 712 * flow. 713 * 714 * @param[in] dev 715 * Pointer to the Ethernet device structure. 716 * @param[in] dev_handle 717 * Pointer to device flow handle structure. 718 */ 719 static void 720 flow_drv_rxq_flags_set(struct rte_eth_dev *dev, 721 struct mlx5_flow_handle *dev_handle) 722 { 723 struct mlx5_priv *priv = dev->data->dev_private; 724 const int mark = dev_handle->mark; 725 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 726 struct mlx5_hrxq *hrxq; 727 unsigned int i; 728 729 if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) 730 return; 731 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 732 dev_handle->rix_hrxq); 733 if (!hrxq) 734 return; 735 for (i = 0; i != hrxq->ind_table->queues_n; ++i) { 736 int idx = hrxq->ind_table->queues[i]; 737 struct mlx5_rxq_ctrl *rxq_ctrl = 738 container_of((*priv->rxqs)[idx], 739 struct mlx5_rxq_ctrl, rxq); 740 741 /* 742 * To support metadata register copy on Tx loopback, 743 * this must be always enabled (metadata may arive 744 * from other port - not from local flows only. 745 */ 746 if (priv->config.dv_flow_en && 747 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 748 mlx5_flow_ext_mreg_supported(dev)) { 749 rxq_ctrl->rxq.mark = 1; 750 rxq_ctrl->flow_mark_n = 1; 751 } else if (mark) { 752 rxq_ctrl->rxq.mark = 1; 753 rxq_ctrl->flow_mark_n++; 754 } 755 if (tunnel) { 756 unsigned int j; 757 758 /* Increase the counter matching the flow. */ 759 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 760 if ((tunnels_info[j].tunnel & 761 dev_handle->layers) == 762 tunnels_info[j].tunnel) { 763 rxq_ctrl->flow_tunnels_n[j]++; 764 break; 765 } 766 } 767 flow_rxq_tunnel_ptype_update(rxq_ctrl); 768 } 769 } 770 } 771 772 /** 773 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow 774 * 775 * @param[in] dev 776 * Pointer to the Ethernet device structure. 777 * @param[in] flow 778 * Pointer to flow structure. 779 */ 780 static void 781 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow) 782 { 783 struct mlx5_priv *priv = dev->data->dev_private; 784 uint32_t handle_idx; 785 struct mlx5_flow_handle *dev_handle; 786 787 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 788 handle_idx, dev_handle, next) 789 flow_drv_rxq_flags_set(dev, dev_handle); 790 } 791 792 /** 793 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 794 * device flow if no other flow uses it with the same kind of request. 795 * 796 * @param dev 797 * Pointer to Ethernet device. 798 * @param[in] dev_handle 799 * Pointer to the device flow handle structure. 800 */ 801 static void 802 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, 803 struct mlx5_flow_handle *dev_handle) 804 { 805 struct mlx5_priv *priv = dev->data->dev_private; 806 const int mark = dev_handle->mark; 807 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 808 struct mlx5_hrxq *hrxq; 809 unsigned int i; 810 811 if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) 812 return; 813 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 814 dev_handle->rix_hrxq); 815 if (!hrxq) 816 return; 817 MLX5_ASSERT(dev->data->dev_started); 818 for (i = 0; i != hrxq->ind_table->queues_n; ++i) { 819 int idx = hrxq->ind_table->queues[i]; 820 struct mlx5_rxq_ctrl *rxq_ctrl = 821 container_of((*priv->rxqs)[idx], 822 struct mlx5_rxq_ctrl, rxq); 823 824 if (priv->config.dv_flow_en && 825 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 826 mlx5_flow_ext_mreg_supported(dev)) { 827 rxq_ctrl->rxq.mark = 1; 828 rxq_ctrl->flow_mark_n = 1; 829 } else if (mark) { 830 rxq_ctrl->flow_mark_n--; 831 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n; 832 } 833 if (tunnel) { 834 unsigned int j; 835 836 /* Decrease the counter matching the flow. */ 837 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 838 if ((tunnels_info[j].tunnel & 839 dev_handle->layers) == 840 tunnels_info[j].tunnel) { 841 rxq_ctrl->flow_tunnels_n[j]--; 842 break; 843 } 844 } 845 flow_rxq_tunnel_ptype_update(rxq_ctrl); 846 } 847 } 848 } 849 850 /** 851 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 852 * @p flow if no other flow uses it with the same kind of request. 853 * 854 * @param dev 855 * Pointer to Ethernet device. 856 * @param[in] flow 857 * Pointer to the flow. 858 */ 859 static void 860 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow) 861 { 862 struct mlx5_priv *priv = dev->data->dev_private; 863 uint32_t handle_idx; 864 struct mlx5_flow_handle *dev_handle; 865 866 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 867 handle_idx, dev_handle, next) 868 flow_drv_rxq_flags_trim(dev, dev_handle); 869 } 870 871 /** 872 * Clear the Mark/Flag and Tunnel ptype information in all Rx queues. 873 * 874 * @param dev 875 * Pointer to Ethernet device. 876 */ 877 static void 878 flow_rxq_flags_clear(struct rte_eth_dev *dev) 879 { 880 struct mlx5_priv *priv = dev->data->dev_private; 881 unsigned int i; 882 883 for (i = 0; i != priv->rxqs_n; ++i) { 884 struct mlx5_rxq_ctrl *rxq_ctrl; 885 unsigned int j; 886 887 if (!(*priv->rxqs)[i]) 888 continue; 889 rxq_ctrl = container_of((*priv->rxqs)[i], 890 struct mlx5_rxq_ctrl, rxq); 891 rxq_ctrl->flow_mark_n = 0; 892 rxq_ctrl->rxq.mark = 0; 893 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) 894 rxq_ctrl->flow_tunnels_n[j] = 0; 895 rxq_ctrl->rxq.tunnel = 0; 896 } 897 } 898 899 /** 900 * Set the Rx queue dynamic metadata (mask and offset) for a flow 901 * 902 * @param[in] dev 903 * Pointer to the Ethernet device structure. 904 */ 905 void 906 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev) 907 { 908 struct mlx5_priv *priv = dev->data->dev_private; 909 struct mlx5_rxq_data *data; 910 unsigned int i; 911 912 for (i = 0; i != priv->rxqs_n; ++i) { 913 if (!(*priv->rxqs)[i]) 914 continue; 915 data = (*priv->rxqs)[i]; 916 if (!rte_flow_dynf_metadata_avail()) { 917 data->dynf_meta = 0; 918 data->flow_meta_mask = 0; 919 data->flow_meta_offset = -1; 920 } else { 921 data->dynf_meta = 1; 922 data->flow_meta_mask = rte_flow_dynf_metadata_mask; 923 data->flow_meta_offset = rte_flow_dynf_metadata_offs; 924 } 925 } 926 } 927 928 /* 929 * return a pointer to the desired action in the list of actions. 930 * 931 * @param[in] actions 932 * The list of actions to search the action in. 933 * @param[in] action 934 * The action to find. 935 * 936 * @return 937 * Pointer to the action in the list, if found. NULL otherwise. 938 */ 939 const struct rte_flow_action * 940 mlx5_flow_find_action(const struct rte_flow_action *actions, 941 enum rte_flow_action_type action) 942 { 943 if (actions == NULL) 944 return NULL; 945 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) 946 if (actions->type == action) 947 return actions; 948 return NULL; 949 } 950 951 /* 952 * Validate the flag action. 953 * 954 * @param[in] action_flags 955 * Bit-fields that holds the actions detected until now. 956 * @param[in] attr 957 * Attributes of flow that includes this action. 958 * @param[out] error 959 * Pointer to error structure. 960 * 961 * @return 962 * 0 on success, a negative errno value otherwise and rte_errno is set. 963 */ 964 int 965 mlx5_flow_validate_action_flag(uint64_t action_flags, 966 const struct rte_flow_attr *attr, 967 struct rte_flow_error *error) 968 { 969 if (action_flags & MLX5_FLOW_ACTION_MARK) 970 return rte_flow_error_set(error, EINVAL, 971 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 972 "can't mark and flag in same flow"); 973 if (action_flags & MLX5_FLOW_ACTION_FLAG) 974 return rte_flow_error_set(error, EINVAL, 975 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 976 "can't have 2 flag" 977 " actions in same flow"); 978 if (attr->egress) 979 return rte_flow_error_set(error, ENOTSUP, 980 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 981 "flag action not supported for " 982 "egress"); 983 return 0; 984 } 985 986 /* 987 * Validate the mark action. 988 * 989 * @param[in] action 990 * Pointer to the queue action. 991 * @param[in] action_flags 992 * Bit-fields that holds the actions detected until now. 993 * @param[in] attr 994 * Attributes of flow that includes this action. 995 * @param[out] error 996 * Pointer to error structure. 997 * 998 * @return 999 * 0 on success, a negative errno value otherwise and rte_errno is set. 1000 */ 1001 int 1002 mlx5_flow_validate_action_mark(const struct rte_flow_action *action, 1003 uint64_t action_flags, 1004 const struct rte_flow_attr *attr, 1005 struct rte_flow_error *error) 1006 { 1007 const struct rte_flow_action_mark *mark = action->conf; 1008 1009 if (!mark) 1010 return rte_flow_error_set(error, EINVAL, 1011 RTE_FLOW_ERROR_TYPE_ACTION, 1012 action, 1013 "configuration cannot be null"); 1014 if (mark->id >= MLX5_FLOW_MARK_MAX) 1015 return rte_flow_error_set(error, EINVAL, 1016 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1017 &mark->id, 1018 "mark id must in 0 <= id < " 1019 RTE_STR(MLX5_FLOW_MARK_MAX)); 1020 if (action_flags & MLX5_FLOW_ACTION_FLAG) 1021 return rte_flow_error_set(error, EINVAL, 1022 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1023 "can't flag and mark in same flow"); 1024 if (action_flags & MLX5_FLOW_ACTION_MARK) 1025 return rte_flow_error_set(error, EINVAL, 1026 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1027 "can't have 2 mark actions in same" 1028 " flow"); 1029 if (attr->egress) 1030 return rte_flow_error_set(error, ENOTSUP, 1031 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1032 "mark action not supported for " 1033 "egress"); 1034 return 0; 1035 } 1036 1037 /* 1038 * Validate the drop action. 1039 * 1040 * @param[in] action_flags 1041 * Bit-fields that holds the actions detected until now. 1042 * @param[in] attr 1043 * Attributes of flow that includes this action. 1044 * @param[out] error 1045 * Pointer to error structure. 1046 * 1047 * @return 1048 * 0 on success, a negative errno value otherwise and rte_errno is set. 1049 */ 1050 int 1051 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused, 1052 const struct rte_flow_attr *attr, 1053 struct rte_flow_error *error) 1054 { 1055 if (attr->egress) 1056 return rte_flow_error_set(error, ENOTSUP, 1057 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1058 "drop action not supported for " 1059 "egress"); 1060 return 0; 1061 } 1062 1063 /* 1064 * Validate the queue action. 1065 * 1066 * @param[in] action 1067 * Pointer to the queue action. 1068 * @param[in] action_flags 1069 * Bit-fields that holds the actions detected until now. 1070 * @param[in] dev 1071 * Pointer to the Ethernet device structure. 1072 * @param[in] attr 1073 * Attributes of flow that includes this action. 1074 * @param[out] error 1075 * Pointer to error structure. 1076 * 1077 * @return 1078 * 0 on success, a negative errno value otherwise and rte_errno is set. 1079 */ 1080 int 1081 mlx5_flow_validate_action_queue(const struct rte_flow_action *action, 1082 uint64_t action_flags, 1083 struct rte_eth_dev *dev, 1084 const struct rte_flow_attr *attr, 1085 struct rte_flow_error *error) 1086 { 1087 struct mlx5_priv *priv = dev->data->dev_private; 1088 const struct rte_flow_action_queue *queue = action->conf; 1089 1090 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1091 return rte_flow_error_set(error, EINVAL, 1092 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1093 "can't have 2 fate actions in" 1094 " same flow"); 1095 if (!priv->rxqs_n) 1096 return rte_flow_error_set(error, EINVAL, 1097 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1098 NULL, "No Rx queues configured"); 1099 if (queue->index >= priv->rxqs_n) 1100 return rte_flow_error_set(error, EINVAL, 1101 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1102 &queue->index, 1103 "queue index out of range"); 1104 if (!(*priv->rxqs)[queue->index]) 1105 return rte_flow_error_set(error, EINVAL, 1106 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1107 &queue->index, 1108 "queue is not configured"); 1109 if (attr->egress) 1110 return rte_flow_error_set(error, ENOTSUP, 1111 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1112 "queue action not supported for " 1113 "egress"); 1114 return 0; 1115 } 1116 1117 /* 1118 * Validate the rss action. 1119 * 1120 * @param[in] action 1121 * Pointer to the queue action. 1122 * @param[in] action_flags 1123 * Bit-fields that holds the actions detected until now. 1124 * @param[in] dev 1125 * Pointer to the Ethernet device structure. 1126 * @param[in] attr 1127 * Attributes of flow that includes this action. 1128 * @param[in] item_flags 1129 * Items that were detected. 1130 * @param[out] error 1131 * Pointer to error structure. 1132 * 1133 * @return 1134 * 0 on success, a negative errno value otherwise and rte_errno is set. 1135 */ 1136 int 1137 mlx5_flow_validate_action_rss(const struct rte_flow_action *action, 1138 uint64_t action_flags, 1139 struct rte_eth_dev *dev, 1140 const struct rte_flow_attr *attr, 1141 uint64_t item_flags, 1142 struct rte_flow_error *error) 1143 { 1144 struct mlx5_priv *priv = dev->data->dev_private; 1145 const struct rte_flow_action_rss *rss = action->conf; 1146 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1147 unsigned int i; 1148 1149 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1150 return rte_flow_error_set(error, EINVAL, 1151 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1152 "can't have 2 fate actions" 1153 " in same flow"); 1154 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT && 1155 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) 1156 return rte_flow_error_set(error, ENOTSUP, 1157 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1158 &rss->func, 1159 "RSS hash function not supported"); 1160 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 1161 if (rss->level > 2) 1162 #else 1163 if (rss->level > 1) 1164 #endif 1165 return rte_flow_error_set(error, ENOTSUP, 1166 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1167 &rss->level, 1168 "tunnel RSS is not supported"); 1169 /* allow RSS key_len 0 in case of NULL (default) RSS key. */ 1170 if (rss->key_len == 0 && rss->key != NULL) 1171 return rte_flow_error_set(error, ENOTSUP, 1172 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1173 &rss->key_len, 1174 "RSS hash key length 0"); 1175 if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN) 1176 return rte_flow_error_set(error, ENOTSUP, 1177 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1178 &rss->key_len, 1179 "RSS hash key too small"); 1180 if (rss->key_len > MLX5_RSS_HASH_KEY_LEN) 1181 return rte_flow_error_set(error, ENOTSUP, 1182 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1183 &rss->key_len, 1184 "RSS hash key too large"); 1185 if (rss->queue_num > priv->config.ind_table_max_size) 1186 return rte_flow_error_set(error, ENOTSUP, 1187 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1188 &rss->queue_num, 1189 "number of queues too large"); 1190 if (rss->types & MLX5_RSS_HF_MASK) 1191 return rte_flow_error_set(error, ENOTSUP, 1192 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1193 &rss->types, 1194 "some RSS protocols are not" 1195 " supported"); 1196 if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) && 1197 !(rss->types & ETH_RSS_IP)) 1198 return rte_flow_error_set(error, EINVAL, 1199 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1200 "L3 partial RSS requested but L3 RSS" 1201 " type not specified"); 1202 if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) && 1203 !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP))) 1204 return rte_flow_error_set(error, EINVAL, 1205 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1206 "L4 partial RSS requested but L4 RSS" 1207 " type not specified"); 1208 if (!priv->rxqs_n) 1209 return rte_flow_error_set(error, EINVAL, 1210 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1211 NULL, "No Rx queues configured"); 1212 if (!rss->queue_num) 1213 return rte_flow_error_set(error, EINVAL, 1214 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1215 NULL, "No queues configured"); 1216 for (i = 0; i != rss->queue_num; ++i) { 1217 if (rss->queue[i] >= priv->rxqs_n) 1218 return rte_flow_error_set 1219 (error, EINVAL, 1220 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1221 &rss->queue[i], "queue index out of range"); 1222 if (!(*priv->rxqs)[rss->queue[i]]) 1223 return rte_flow_error_set 1224 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1225 &rss->queue[i], "queue is not configured"); 1226 } 1227 if (attr->egress) 1228 return rte_flow_error_set(error, ENOTSUP, 1229 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1230 "rss action not supported for " 1231 "egress"); 1232 if (rss->level > 1 && !tunnel) 1233 return rte_flow_error_set(error, EINVAL, 1234 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1235 "inner RSS is not supported for " 1236 "non-tunnel flows"); 1237 return 0; 1238 } 1239 1240 /* 1241 * Validate the default miss action. 1242 * 1243 * @param[in] action_flags 1244 * Bit-fields that holds the actions detected until now. 1245 * @param[out] error 1246 * Pointer to error structure. 1247 * 1248 * @return 1249 * 0 on success, a negative errno value otherwise and rte_errno is set. 1250 */ 1251 int 1252 mlx5_flow_validate_action_default_miss(uint64_t action_flags, 1253 const struct rte_flow_attr *attr, 1254 struct rte_flow_error *error) 1255 { 1256 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1257 return rte_flow_error_set(error, EINVAL, 1258 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1259 "can't have 2 fate actions in" 1260 " same flow"); 1261 if (attr->egress) 1262 return rte_flow_error_set(error, ENOTSUP, 1263 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1264 "default miss action not supported " 1265 "for egress"); 1266 if (attr->group) 1267 return rte_flow_error_set(error, ENOTSUP, 1268 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL, 1269 "only group 0 is supported"); 1270 if (attr->transfer) 1271 return rte_flow_error_set(error, ENOTSUP, 1272 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1273 NULL, "transfer is not supported"); 1274 return 0; 1275 } 1276 1277 /* 1278 * Validate the count action. 1279 * 1280 * @param[in] dev 1281 * Pointer to the Ethernet device structure. 1282 * @param[in] attr 1283 * Attributes of flow that includes this action. 1284 * @param[out] error 1285 * Pointer to error structure. 1286 * 1287 * @return 1288 * 0 on success, a negative errno value otherwise and rte_errno is set. 1289 */ 1290 int 1291 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused, 1292 const struct rte_flow_attr *attr, 1293 struct rte_flow_error *error) 1294 { 1295 if (attr->egress) 1296 return rte_flow_error_set(error, ENOTSUP, 1297 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1298 "count action not supported for " 1299 "egress"); 1300 return 0; 1301 } 1302 1303 /** 1304 * Verify the @p attributes will be correctly understood by the NIC and store 1305 * them in the @p flow if everything is correct. 1306 * 1307 * @param[in] dev 1308 * Pointer to the Ethernet device structure. 1309 * @param[in] attributes 1310 * Pointer to flow attributes 1311 * @param[out] error 1312 * Pointer to error structure. 1313 * 1314 * @return 1315 * 0 on success, a negative errno value otherwise and rte_errno is set. 1316 */ 1317 int 1318 mlx5_flow_validate_attributes(struct rte_eth_dev *dev, 1319 const struct rte_flow_attr *attributes, 1320 struct rte_flow_error *error) 1321 { 1322 struct mlx5_priv *priv = dev->data->dev_private; 1323 uint32_t priority_max = priv->config.flow_prio - 1; 1324 1325 if (attributes->group) 1326 return rte_flow_error_set(error, ENOTSUP, 1327 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 1328 NULL, "groups is not supported"); 1329 if (attributes->priority != MLX5_FLOW_PRIO_RSVD && 1330 attributes->priority >= priority_max) 1331 return rte_flow_error_set(error, ENOTSUP, 1332 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 1333 NULL, "priority out of range"); 1334 if (attributes->egress) 1335 return rte_flow_error_set(error, ENOTSUP, 1336 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1337 "egress is not supported"); 1338 if (attributes->transfer && !priv->config.dv_esw_en) 1339 return rte_flow_error_set(error, ENOTSUP, 1340 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1341 NULL, "transfer is not supported"); 1342 if (!attributes->ingress) 1343 return rte_flow_error_set(error, EINVAL, 1344 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, 1345 NULL, 1346 "ingress attribute is mandatory"); 1347 return 0; 1348 } 1349 1350 /** 1351 * Validate ICMP6 item. 1352 * 1353 * @param[in] item 1354 * Item specification. 1355 * @param[in] item_flags 1356 * Bit-fields that holds the items detected until now. 1357 * @param[out] error 1358 * Pointer to error structure. 1359 * 1360 * @return 1361 * 0 on success, a negative errno value otherwise and rte_errno is set. 1362 */ 1363 int 1364 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item, 1365 uint64_t item_flags, 1366 uint8_t target_protocol, 1367 struct rte_flow_error *error) 1368 { 1369 const struct rte_flow_item_icmp6 *mask = item->mask; 1370 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1371 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : 1372 MLX5_FLOW_LAYER_OUTER_L3_IPV6; 1373 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1374 MLX5_FLOW_LAYER_OUTER_L4; 1375 int ret; 1376 1377 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6) 1378 return rte_flow_error_set(error, EINVAL, 1379 RTE_FLOW_ERROR_TYPE_ITEM, item, 1380 "protocol filtering not compatible" 1381 " with ICMP6 layer"); 1382 if (!(item_flags & l3m)) 1383 return rte_flow_error_set(error, EINVAL, 1384 RTE_FLOW_ERROR_TYPE_ITEM, item, 1385 "IPv6 is mandatory to filter on" 1386 " ICMP6"); 1387 if (item_flags & l4m) 1388 return rte_flow_error_set(error, EINVAL, 1389 RTE_FLOW_ERROR_TYPE_ITEM, item, 1390 "multiple L4 layers not supported"); 1391 if (!mask) 1392 mask = &rte_flow_item_icmp6_mask; 1393 ret = mlx5_flow_item_acceptable 1394 (item, (const uint8_t *)mask, 1395 (const uint8_t *)&rte_flow_item_icmp6_mask, 1396 sizeof(struct rte_flow_item_icmp6), error); 1397 if (ret < 0) 1398 return ret; 1399 return 0; 1400 } 1401 1402 /** 1403 * Validate ICMP item. 1404 * 1405 * @param[in] item 1406 * Item specification. 1407 * @param[in] item_flags 1408 * Bit-fields that holds the items detected until now. 1409 * @param[out] error 1410 * Pointer to error structure. 1411 * 1412 * @return 1413 * 0 on success, a negative errno value otherwise and rte_errno is set. 1414 */ 1415 int 1416 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item, 1417 uint64_t item_flags, 1418 uint8_t target_protocol, 1419 struct rte_flow_error *error) 1420 { 1421 const struct rte_flow_item_icmp *mask = item->mask; 1422 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1423 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : 1424 MLX5_FLOW_LAYER_OUTER_L3_IPV4; 1425 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1426 MLX5_FLOW_LAYER_OUTER_L4; 1427 int ret; 1428 1429 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP) 1430 return rte_flow_error_set(error, EINVAL, 1431 RTE_FLOW_ERROR_TYPE_ITEM, item, 1432 "protocol filtering not compatible" 1433 " with ICMP layer"); 1434 if (!(item_flags & l3m)) 1435 return rte_flow_error_set(error, EINVAL, 1436 RTE_FLOW_ERROR_TYPE_ITEM, item, 1437 "IPv4 is mandatory to filter" 1438 " on ICMP"); 1439 if (item_flags & l4m) 1440 return rte_flow_error_set(error, EINVAL, 1441 RTE_FLOW_ERROR_TYPE_ITEM, item, 1442 "multiple L4 layers not supported"); 1443 if (!mask) 1444 mask = &rte_flow_item_icmp_mask; 1445 ret = mlx5_flow_item_acceptable 1446 (item, (const uint8_t *)mask, 1447 (const uint8_t *)&rte_flow_item_icmp_mask, 1448 sizeof(struct rte_flow_item_icmp), error); 1449 if (ret < 0) 1450 return ret; 1451 return 0; 1452 } 1453 1454 /** 1455 * Validate Ethernet item. 1456 * 1457 * @param[in] item 1458 * Item specification. 1459 * @param[in] item_flags 1460 * Bit-fields that holds the items detected until now. 1461 * @param[out] error 1462 * Pointer to error structure. 1463 * 1464 * @return 1465 * 0 on success, a negative errno value otherwise and rte_errno is set. 1466 */ 1467 int 1468 mlx5_flow_validate_item_eth(const struct rte_flow_item *item, 1469 uint64_t item_flags, 1470 struct rte_flow_error *error) 1471 { 1472 const struct rte_flow_item_eth *mask = item->mask; 1473 const struct rte_flow_item_eth nic_mask = { 1474 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1475 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1476 .type = RTE_BE16(0xffff), 1477 }; 1478 int ret; 1479 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1480 const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 1481 MLX5_FLOW_LAYER_OUTER_L2; 1482 1483 if (item_flags & ethm) 1484 return rte_flow_error_set(error, ENOTSUP, 1485 RTE_FLOW_ERROR_TYPE_ITEM, item, 1486 "multiple L2 layers not supported"); 1487 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) || 1488 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3))) 1489 return rte_flow_error_set(error, EINVAL, 1490 RTE_FLOW_ERROR_TYPE_ITEM, item, 1491 "L2 layer should not follow " 1492 "L3 layers"); 1493 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) || 1494 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN))) 1495 return rte_flow_error_set(error, EINVAL, 1496 RTE_FLOW_ERROR_TYPE_ITEM, item, 1497 "L2 layer should not follow VLAN"); 1498 if (!mask) 1499 mask = &rte_flow_item_eth_mask; 1500 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1501 (const uint8_t *)&nic_mask, 1502 sizeof(struct rte_flow_item_eth), 1503 error); 1504 return ret; 1505 } 1506 1507 /** 1508 * Validate VLAN item. 1509 * 1510 * @param[in] item 1511 * Item specification. 1512 * @param[in] item_flags 1513 * Bit-fields that holds the items detected until now. 1514 * @param[in] dev 1515 * Ethernet device flow is being created on. 1516 * @param[out] error 1517 * Pointer to error structure. 1518 * 1519 * @return 1520 * 0 on success, a negative errno value otherwise and rte_errno is set. 1521 */ 1522 int 1523 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item, 1524 uint64_t item_flags, 1525 struct rte_eth_dev *dev, 1526 struct rte_flow_error *error) 1527 { 1528 const struct rte_flow_item_vlan *spec = item->spec; 1529 const struct rte_flow_item_vlan *mask = item->mask; 1530 const struct rte_flow_item_vlan nic_mask = { 1531 .tci = RTE_BE16(UINT16_MAX), 1532 .inner_type = RTE_BE16(UINT16_MAX), 1533 }; 1534 uint16_t vlan_tag = 0; 1535 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1536 int ret; 1537 const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | 1538 MLX5_FLOW_LAYER_INNER_L4) : 1539 (MLX5_FLOW_LAYER_OUTER_L3 | 1540 MLX5_FLOW_LAYER_OUTER_L4); 1541 const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : 1542 MLX5_FLOW_LAYER_OUTER_VLAN; 1543 1544 if (item_flags & vlanm) 1545 return rte_flow_error_set(error, EINVAL, 1546 RTE_FLOW_ERROR_TYPE_ITEM, item, 1547 "multiple VLAN layers not supported"); 1548 else if ((item_flags & l34m) != 0) 1549 return rte_flow_error_set(error, EINVAL, 1550 RTE_FLOW_ERROR_TYPE_ITEM, item, 1551 "VLAN cannot follow L3/L4 layer"); 1552 if (!mask) 1553 mask = &rte_flow_item_vlan_mask; 1554 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1555 (const uint8_t *)&nic_mask, 1556 sizeof(struct rte_flow_item_vlan), 1557 error); 1558 if (ret) 1559 return ret; 1560 if (!tunnel && mask->tci != RTE_BE16(0x0fff)) { 1561 struct mlx5_priv *priv = dev->data->dev_private; 1562 1563 if (priv->vmwa_context) { 1564 /* 1565 * Non-NULL context means we have a virtual machine 1566 * and SR-IOV enabled, we have to create VLAN interface 1567 * to make hypervisor to setup E-Switch vport 1568 * context correctly. We avoid creating the multiple 1569 * VLAN interfaces, so we cannot support VLAN tag mask. 1570 */ 1571 return rte_flow_error_set(error, EINVAL, 1572 RTE_FLOW_ERROR_TYPE_ITEM, 1573 item, 1574 "VLAN tag mask is not" 1575 " supported in virtual" 1576 " environment"); 1577 } 1578 } 1579 if (spec) { 1580 vlan_tag = spec->tci; 1581 vlan_tag &= mask->tci; 1582 } 1583 /* 1584 * From verbs perspective an empty VLAN is equivalent 1585 * to a packet without VLAN layer. 1586 */ 1587 if (!vlan_tag) 1588 return rte_flow_error_set(error, EINVAL, 1589 RTE_FLOW_ERROR_TYPE_ITEM_SPEC, 1590 item->spec, 1591 "VLAN cannot be empty"); 1592 return 0; 1593 } 1594 1595 /** 1596 * Validate IPV4 item. 1597 * 1598 * @param[in] item 1599 * Item specification. 1600 * @param[in] item_flags 1601 * Bit-fields that holds the items detected until now. 1602 * @param[in] acc_mask 1603 * Acceptable mask, if NULL default internal default mask 1604 * will be used to check whether item fields are supported. 1605 * @param[out] error 1606 * Pointer to error structure. 1607 * 1608 * @return 1609 * 0 on success, a negative errno value otherwise and rte_errno is set. 1610 */ 1611 int 1612 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, 1613 uint64_t item_flags, 1614 uint64_t last_item, 1615 uint16_t ether_type, 1616 const struct rte_flow_item_ipv4 *acc_mask, 1617 struct rte_flow_error *error) 1618 { 1619 const struct rte_flow_item_ipv4 *mask = item->mask; 1620 const struct rte_flow_item_ipv4 *spec = item->spec; 1621 const struct rte_flow_item_ipv4 nic_mask = { 1622 .hdr = { 1623 .src_addr = RTE_BE32(0xffffffff), 1624 .dst_addr = RTE_BE32(0xffffffff), 1625 .type_of_service = 0xff, 1626 .next_proto_id = 0xff, 1627 }, 1628 }; 1629 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1630 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1631 MLX5_FLOW_LAYER_OUTER_L3; 1632 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1633 MLX5_FLOW_LAYER_OUTER_L4; 1634 int ret; 1635 uint8_t next_proto = 0xFF; 1636 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 1637 MLX5_FLOW_LAYER_OUTER_VLAN | 1638 MLX5_FLOW_LAYER_INNER_VLAN); 1639 1640 if ((last_item & l2_vlan) && ether_type && 1641 ether_type != RTE_ETHER_TYPE_IPV4) 1642 return rte_flow_error_set(error, EINVAL, 1643 RTE_FLOW_ERROR_TYPE_ITEM, item, 1644 "IPv4 cannot follow L2/VLAN layer " 1645 "which ether type is not IPv4"); 1646 if (item_flags & MLX5_FLOW_LAYER_IPIP) { 1647 if (mask && spec) 1648 next_proto = mask->hdr.next_proto_id & 1649 spec->hdr.next_proto_id; 1650 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 1651 return rte_flow_error_set(error, EINVAL, 1652 RTE_FLOW_ERROR_TYPE_ITEM, 1653 item, 1654 "multiple tunnel " 1655 "not supported"); 1656 } 1657 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) 1658 return rte_flow_error_set(error, EINVAL, 1659 RTE_FLOW_ERROR_TYPE_ITEM, item, 1660 "wrong tunnel type - IPv6 specified " 1661 "but IPv4 item provided"); 1662 if (item_flags & l3m) 1663 return rte_flow_error_set(error, ENOTSUP, 1664 RTE_FLOW_ERROR_TYPE_ITEM, item, 1665 "multiple L3 layers not supported"); 1666 else if (item_flags & l4m) 1667 return rte_flow_error_set(error, EINVAL, 1668 RTE_FLOW_ERROR_TYPE_ITEM, item, 1669 "L3 cannot follow an L4 layer."); 1670 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 1671 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 1672 return rte_flow_error_set(error, EINVAL, 1673 RTE_FLOW_ERROR_TYPE_ITEM, item, 1674 "L3 cannot follow an NVGRE layer."); 1675 if (!mask) 1676 mask = &rte_flow_item_ipv4_mask; 1677 else if (mask->hdr.next_proto_id != 0 && 1678 mask->hdr.next_proto_id != 0xff) 1679 return rte_flow_error_set(error, EINVAL, 1680 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 1681 "partial mask is not supported" 1682 " for protocol"); 1683 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1684 acc_mask ? (const uint8_t *)acc_mask 1685 : (const uint8_t *)&nic_mask, 1686 sizeof(struct rte_flow_item_ipv4), 1687 error); 1688 if (ret < 0) 1689 return ret; 1690 return 0; 1691 } 1692 1693 /** 1694 * Validate IPV6 item. 1695 * 1696 * @param[in] item 1697 * Item specification. 1698 * @param[in] item_flags 1699 * Bit-fields that holds the items detected until now. 1700 * @param[in] acc_mask 1701 * Acceptable mask, if NULL default internal default mask 1702 * will be used to check whether item fields are supported. 1703 * @param[out] error 1704 * Pointer to error structure. 1705 * 1706 * @return 1707 * 0 on success, a negative errno value otherwise and rte_errno is set. 1708 */ 1709 int 1710 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, 1711 uint64_t item_flags, 1712 uint64_t last_item, 1713 uint16_t ether_type, 1714 const struct rte_flow_item_ipv6 *acc_mask, 1715 struct rte_flow_error *error) 1716 { 1717 const struct rte_flow_item_ipv6 *mask = item->mask; 1718 const struct rte_flow_item_ipv6 *spec = item->spec; 1719 const struct rte_flow_item_ipv6 nic_mask = { 1720 .hdr = { 1721 .src_addr = 1722 "\xff\xff\xff\xff\xff\xff\xff\xff" 1723 "\xff\xff\xff\xff\xff\xff\xff\xff", 1724 .dst_addr = 1725 "\xff\xff\xff\xff\xff\xff\xff\xff" 1726 "\xff\xff\xff\xff\xff\xff\xff\xff", 1727 .vtc_flow = RTE_BE32(0xffffffff), 1728 .proto = 0xff, 1729 }, 1730 }; 1731 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1732 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1733 MLX5_FLOW_LAYER_OUTER_L3; 1734 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1735 MLX5_FLOW_LAYER_OUTER_L4; 1736 int ret; 1737 uint8_t next_proto = 0xFF; 1738 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 1739 MLX5_FLOW_LAYER_OUTER_VLAN | 1740 MLX5_FLOW_LAYER_INNER_VLAN); 1741 1742 if ((last_item & l2_vlan) && ether_type && 1743 ether_type != RTE_ETHER_TYPE_IPV6) 1744 return rte_flow_error_set(error, EINVAL, 1745 RTE_FLOW_ERROR_TYPE_ITEM, item, 1746 "IPv6 cannot follow L2/VLAN layer " 1747 "which ether type is not IPv6"); 1748 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) { 1749 if (mask && spec) 1750 next_proto = mask->hdr.proto & spec->hdr.proto; 1751 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 1752 return rte_flow_error_set(error, EINVAL, 1753 RTE_FLOW_ERROR_TYPE_ITEM, 1754 item, 1755 "multiple tunnel " 1756 "not supported"); 1757 } 1758 if (item_flags & MLX5_FLOW_LAYER_IPIP) 1759 return rte_flow_error_set(error, EINVAL, 1760 RTE_FLOW_ERROR_TYPE_ITEM, item, 1761 "wrong tunnel type - IPv4 specified " 1762 "but IPv6 item provided"); 1763 if (item_flags & l3m) 1764 return rte_flow_error_set(error, ENOTSUP, 1765 RTE_FLOW_ERROR_TYPE_ITEM, item, 1766 "multiple L3 layers not supported"); 1767 else if (item_flags & l4m) 1768 return rte_flow_error_set(error, EINVAL, 1769 RTE_FLOW_ERROR_TYPE_ITEM, item, 1770 "L3 cannot follow an L4 layer."); 1771 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 1772 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 1773 return rte_flow_error_set(error, EINVAL, 1774 RTE_FLOW_ERROR_TYPE_ITEM, item, 1775 "L3 cannot follow an NVGRE layer."); 1776 if (!mask) 1777 mask = &rte_flow_item_ipv6_mask; 1778 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1779 acc_mask ? (const uint8_t *)acc_mask 1780 : (const uint8_t *)&nic_mask, 1781 sizeof(struct rte_flow_item_ipv6), 1782 error); 1783 if (ret < 0) 1784 return ret; 1785 return 0; 1786 } 1787 1788 /** 1789 * Validate UDP item. 1790 * 1791 * @param[in] item 1792 * Item specification. 1793 * @param[in] item_flags 1794 * Bit-fields that holds the items detected until now. 1795 * @param[in] target_protocol 1796 * The next protocol in the previous item. 1797 * @param[in] flow_mask 1798 * mlx5 flow-specific (DV, verbs, etc.) supported header fields mask. 1799 * @param[out] error 1800 * Pointer to error structure. 1801 * 1802 * @return 1803 * 0 on success, a negative errno value otherwise and rte_errno is set. 1804 */ 1805 int 1806 mlx5_flow_validate_item_udp(const struct rte_flow_item *item, 1807 uint64_t item_flags, 1808 uint8_t target_protocol, 1809 struct rte_flow_error *error) 1810 { 1811 const struct rte_flow_item_udp *mask = item->mask; 1812 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1813 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1814 MLX5_FLOW_LAYER_OUTER_L3; 1815 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1816 MLX5_FLOW_LAYER_OUTER_L4; 1817 int ret; 1818 1819 if (target_protocol != 0xff && target_protocol != IPPROTO_UDP) 1820 return rte_flow_error_set(error, EINVAL, 1821 RTE_FLOW_ERROR_TYPE_ITEM, item, 1822 "protocol filtering not compatible" 1823 " with UDP layer"); 1824 if (!(item_flags & l3m)) 1825 return rte_flow_error_set(error, EINVAL, 1826 RTE_FLOW_ERROR_TYPE_ITEM, item, 1827 "L3 is mandatory to filter on L4"); 1828 if (item_flags & l4m) 1829 return rte_flow_error_set(error, EINVAL, 1830 RTE_FLOW_ERROR_TYPE_ITEM, item, 1831 "multiple L4 layers not supported"); 1832 if (!mask) 1833 mask = &rte_flow_item_udp_mask; 1834 ret = mlx5_flow_item_acceptable 1835 (item, (const uint8_t *)mask, 1836 (const uint8_t *)&rte_flow_item_udp_mask, 1837 sizeof(struct rte_flow_item_udp), error); 1838 if (ret < 0) 1839 return ret; 1840 return 0; 1841 } 1842 1843 /** 1844 * Validate TCP item. 1845 * 1846 * @param[in] item 1847 * Item specification. 1848 * @param[in] item_flags 1849 * Bit-fields that holds the items detected until now. 1850 * @param[in] target_protocol 1851 * The next protocol in the previous item. 1852 * @param[out] error 1853 * Pointer to error structure. 1854 * 1855 * @return 1856 * 0 on success, a negative errno value otherwise and rte_errno is set. 1857 */ 1858 int 1859 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item, 1860 uint64_t item_flags, 1861 uint8_t target_protocol, 1862 const struct rte_flow_item_tcp *flow_mask, 1863 struct rte_flow_error *error) 1864 { 1865 const struct rte_flow_item_tcp *mask = item->mask; 1866 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1867 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1868 MLX5_FLOW_LAYER_OUTER_L3; 1869 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1870 MLX5_FLOW_LAYER_OUTER_L4; 1871 int ret; 1872 1873 MLX5_ASSERT(flow_mask); 1874 if (target_protocol != 0xff && target_protocol != IPPROTO_TCP) 1875 return rte_flow_error_set(error, EINVAL, 1876 RTE_FLOW_ERROR_TYPE_ITEM, item, 1877 "protocol filtering not compatible" 1878 " with TCP layer"); 1879 if (!(item_flags & l3m)) 1880 return rte_flow_error_set(error, EINVAL, 1881 RTE_FLOW_ERROR_TYPE_ITEM, item, 1882 "L3 is mandatory to filter on L4"); 1883 if (item_flags & l4m) 1884 return rte_flow_error_set(error, EINVAL, 1885 RTE_FLOW_ERROR_TYPE_ITEM, item, 1886 "multiple L4 layers not supported"); 1887 if (!mask) 1888 mask = &rte_flow_item_tcp_mask; 1889 ret = mlx5_flow_item_acceptable 1890 (item, (const uint8_t *)mask, 1891 (const uint8_t *)flow_mask, 1892 sizeof(struct rte_flow_item_tcp), error); 1893 if (ret < 0) 1894 return ret; 1895 return 0; 1896 } 1897 1898 /** 1899 * Validate VXLAN item. 1900 * 1901 * @param[in] item 1902 * Item specification. 1903 * @param[in] item_flags 1904 * Bit-fields that holds the items detected until now. 1905 * @param[in] target_protocol 1906 * The next protocol in the previous item. 1907 * @param[out] error 1908 * Pointer to error structure. 1909 * 1910 * @return 1911 * 0 on success, a negative errno value otherwise and rte_errno is set. 1912 */ 1913 int 1914 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item, 1915 uint64_t item_flags, 1916 struct rte_flow_error *error) 1917 { 1918 const struct rte_flow_item_vxlan *spec = item->spec; 1919 const struct rte_flow_item_vxlan *mask = item->mask; 1920 int ret; 1921 union vni { 1922 uint32_t vlan_id; 1923 uint8_t vni[4]; 1924 } id = { .vlan_id = 0, }; 1925 1926 1927 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 1928 return rte_flow_error_set(error, ENOTSUP, 1929 RTE_FLOW_ERROR_TYPE_ITEM, item, 1930 "multiple tunnel layers not" 1931 " supported"); 1932 /* 1933 * Verify only UDPv4 is present as defined in 1934 * https://tools.ietf.org/html/rfc7348 1935 */ 1936 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 1937 return rte_flow_error_set(error, EINVAL, 1938 RTE_FLOW_ERROR_TYPE_ITEM, item, 1939 "no outer UDP layer found"); 1940 if (!mask) 1941 mask = &rte_flow_item_vxlan_mask; 1942 ret = mlx5_flow_item_acceptable 1943 (item, (const uint8_t *)mask, 1944 (const uint8_t *)&rte_flow_item_vxlan_mask, 1945 sizeof(struct rte_flow_item_vxlan), 1946 error); 1947 if (ret < 0) 1948 return ret; 1949 if (spec) { 1950 memcpy(&id.vni[1], spec->vni, 3); 1951 memcpy(&id.vni[1], mask->vni, 3); 1952 } 1953 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 1954 return rte_flow_error_set(error, ENOTSUP, 1955 RTE_FLOW_ERROR_TYPE_ITEM, item, 1956 "VXLAN tunnel must be fully defined"); 1957 return 0; 1958 } 1959 1960 /** 1961 * Validate VXLAN_GPE item. 1962 * 1963 * @param[in] item 1964 * Item specification. 1965 * @param[in] item_flags 1966 * Bit-fields that holds the items detected until now. 1967 * @param[in] priv 1968 * Pointer to the private data structure. 1969 * @param[in] target_protocol 1970 * The next protocol in the previous item. 1971 * @param[out] error 1972 * Pointer to error structure. 1973 * 1974 * @return 1975 * 0 on success, a negative errno value otherwise and rte_errno is set. 1976 */ 1977 int 1978 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item, 1979 uint64_t item_flags, 1980 struct rte_eth_dev *dev, 1981 struct rte_flow_error *error) 1982 { 1983 struct mlx5_priv *priv = dev->data->dev_private; 1984 const struct rte_flow_item_vxlan_gpe *spec = item->spec; 1985 const struct rte_flow_item_vxlan_gpe *mask = item->mask; 1986 int ret; 1987 union vni { 1988 uint32_t vlan_id; 1989 uint8_t vni[4]; 1990 } id = { .vlan_id = 0, }; 1991 1992 if (!priv->config.l3_vxlan_en) 1993 return rte_flow_error_set(error, ENOTSUP, 1994 RTE_FLOW_ERROR_TYPE_ITEM, item, 1995 "L3 VXLAN is not enabled by device" 1996 " parameter and/or not configured in" 1997 " firmware"); 1998 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 1999 return rte_flow_error_set(error, ENOTSUP, 2000 RTE_FLOW_ERROR_TYPE_ITEM, item, 2001 "multiple tunnel layers not" 2002 " supported"); 2003 /* 2004 * Verify only UDPv4 is present as defined in 2005 * https://tools.ietf.org/html/rfc7348 2006 */ 2007 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2008 return rte_flow_error_set(error, EINVAL, 2009 RTE_FLOW_ERROR_TYPE_ITEM, item, 2010 "no outer UDP layer found"); 2011 if (!mask) 2012 mask = &rte_flow_item_vxlan_gpe_mask; 2013 ret = mlx5_flow_item_acceptable 2014 (item, (const uint8_t *)mask, 2015 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask, 2016 sizeof(struct rte_flow_item_vxlan_gpe), 2017 error); 2018 if (ret < 0) 2019 return ret; 2020 if (spec) { 2021 if (spec->protocol) 2022 return rte_flow_error_set(error, ENOTSUP, 2023 RTE_FLOW_ERROR_TYPE_ITEM, 2024 item, 2025 "VxLAN-GPE protocol" 2026 " not supported"); 2027 memcpy(&id.vni[1], spec->vni, 3); 2028 memcpy(&id.vni[1], mask->vni, 3); 2029 } 2030 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2031 return rte_flow_error_set(error, ENOTSUP, 2032 RTE_FLOW_ERROR_TYPE_ITEM, item, 2033 "VXLAN-GPE tunnel must be fully" 2034 " defined"); 2035 return 0; 2036 } 2037 /** 2038 * Validate GRE Key item. 2039 * 2040 * @param[in] item 2041 * Item specification. 2042 * @param[in] item_flags 2043 * Bit flags to mark detected items. 2044 * @param[in] gre_item 2045 * Pointer to gre_item 2046 * @param[out] error 2047 * Pointer to error structure. 2048 * 2049 * @return 2050 * 0 on success, a negative errno value otherwise and rte_errno is set. 2051 */ 2052 int 2053 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item, 2054 uint64_t item_flags, 2055 const struct rte_flow_item *gre_item, 2056 struct rte_flow_error *error) 2057 { 2058 const rte_be32_t *mask = item->mask; 2059 int ret = 0; 2060 rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX); 2061 const struct rte_flow_item_gre *gre_spec; 2062 const struct rte_flow_item_gre *gre_mask; 2063 2064 if (item_flags & MLX5_FLOW_LAYER_GRE_KEY) 2065 return rte_flow_error_set(error, ENOTSUP, 2066 RTE_FLOW_ERROR_TYPE_ITEM, item, 2067 "Multiple GRE key not support"); 2068 if (!(item_flags & MLX5_FLOW_LAYER_GRE)) 2069 return rte_flow_error_set(error, ENOTSUP, 2070 RTE_FLOW_ERROR_TYPE_ITEM, item, 2071 "No preceding GRE header"); 2072 if (item_flags & MLX5_FLOW_LAYER_INNER) 2073 return rte_flow_error_set(error, ENOTSUP, 2074 RTE_FLOW_ERROR_TYPE_ITEM, item, 2075 "GRE key following a wrong item"); 2076 gre_mask = gre_item->mask; 2077 if (!gre_mask) 2078 gre_mask = &rte_flow_item_gre_mask; 2079 gre_spec = gre_item->spec; 2080 if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) && 2081 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000))) 2082 return rte_flow_error_set(error, EINVAL, 2083 RTE_FLOW_ERROR_TYPE_ITEM, item, 2084 "Key bit must be on"); 2085 2086 if (!mask) 2087 mask = &gre_key_default_mask; 2088 ret = mlx5_flow_item_acceptable 2089 (item, (const uint8_t *)mask, 2090 (const uint8_t *)&gre_key_default_mask, 2091 sizeof(rte_be32_t), error); 2092 return ret; 2093 } 2094 2095 /** 2096 * Validate GRE item. 2097 * 2098 * @param[in] item 2099 * Item specification. 2100 * @param[in] item_flags 2101 * Bit flags to mark detected items. 2102 * @param[in] target_protocol 2103 * The next protocol in the previous item. 2104 * @param[out] error 2105 * Pointer to error structure. 2106 * 2107 * @return 2108 * 0 on success, a negative errno value otherwise and rte_errno is set. 2109 */ 2110 int 2111 mlx5_flow_validate_item_gre(const struct rte_flow_item *item, 2112 uint64_t item_flags, 2113 uint8_t target_protocol, 2114 struct rte_flow_error *error) 2115 { 2116 const struct rte_flow_item_gre *spec __rte_unused = item->spec; 2117 const struct rte_flow_item_gre *mask = item->mask; 2118 int ret; 2119 const struct rte_flow_item_gre nic_mask = { 2120 .c_rsvd0_ver = RTE_BE16(0xB000), 2121 .protocol = RTE_BE16(UINT16_MAX), 2122 }; 2123 2124 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2125 return rte_flow_error_set(error, EINVAL, 2126 RTE_FLOW_ERROR_TYPE_ITEM, item, 2127 "protocol filtering not compatible" 2128 " with this GRE layer"); 2129 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2130 return rte_flow_error_set(error, ENOTSUP, 2131 RTE_FLOW_ERROR_TYPE_ITEM, item, 2132 "multiple tunnel layers not" 2133 " supported"); 2134 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2135 return rte_flow_error_set(error, ENOTSUP, 2136 RTE_FLOW_ERROR_TYPE_ITEM, item, 2137 "L3 Layer is missing"); 2138 if (!mask) 2139 mask = &rte_flow_item_gre_mask; 2140 ret = mlx5_flow_item_acceptable 2141 (item, (const uint8_t *)mask, 2142 (const uint8_t *)&nic_mask, 2143 sizeof(struct rte_flow_item_gre), error); 2144 if (ret < 0) 2145 return ret; 2146 #ifndef HAVE_MLX5DV_DR 2147 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT 2148 if (spec && (spec->protocol & mask->protocol)) 2149 return rte_flow_error_set(error, ENOTSUP, 2150 RTE_FLOW_ERROR_TYPE_ITEM, item, 2151 "without MPLS support the" 2152 " specification cannot be used for" 2153 " filtering"); 2154 #endif 2155 #endif 2156 return 0; 2157 } 2158 2159 /** 2160 * Validate Geneve item. 2161 * 2162 * @param[in] item 2163 * Item specification. 2164 * @param[in] itemFlags 2165 * Bit-fields that holds the items detected until now. 2166 * @param[in] enPriv 2167 * Pointer to the private data structure. 2168 * @param[out] error 2169 * Pointer to error structure. 2170 * 2171 * @return 2172 * 0 on success, a negative errno value otherwise and rte_errno is set. 2173 */ 2174 2175 int 2176 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item, 2177 uint64_t item_flags, 2178 struct rte_eth_dev *dev, 2179 struct rte_flow_error *error) 2180 { 2181 struct mlx5_priv *priv = dev->data->dev_private; 2182 const struct rte_flow_item_geneve *spec = item->spec; 2183 const struct rte_flow_item_geneve *mask = item->mask; 2184 int ret; 2185 uint16_t gbhdr; 2186 uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ? 2187 MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0; 2188 const struct rte_flow_item_geneve nic_mask = { 2189 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80), 2190 .vni = "\xff\xff\xff", 2191 .protocol = RTE_BE16(UINT16_MAX), 2192 }; 2193 2194 if (!priv->config.hca_attr.tunnel_stateless_geneve_rx) 2195 return rte_flow_error_set(error, ENOTSUP, 2196 RTE_FLOW_ERROR_TYPE_ITEM, item, 2197 "L3 Geneve is not enabled by device" 2198 " parameter and/or not configured in" 2199 " firmware"); 2200 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2201 return rte_flow_error_set(error, ENOTSUP, 2202 RTE_FLOW_ERROR_TYPE_ITEM, item, 2203 "multiple tunnel layers not" 2204 " supported"); 2205 /* 2206 * Verify only UDPv4 is present as defined in 2207 * https://tools.ietf.org/html/rfc7348 2208 */ 2209 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2210 return rte_flow_error_set(error, EINVAL, 2211 RTE_FLOW_ERROR_TYPE_ITEM, item, 2212 "no outer UDP layer found"); 2213 if (!mask) 2214 mask = &rte_flow_item_geneve_mask; 2215 ret = mlx5_flow_item_acceptable 2216 (item, (const uint8_t *)mask, 2217 (const uint8_t *)&nic_mask, 2218 sizeof(struct rte_flow_item_geneve), error); 2219 if (ret) 2220 return ret; 2221 if (spec) { 2222 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0); 2223 if (MLX5_GENEVE_VER_VAL(gbhdr) || 2224 MLX5_GENEVE_CRITO_VAL(gbhdr) || 2225 MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1) 2226 return rte_flow_error_set(error, ENOTSUP, 2227 RTE_FLOW_ERROR_TYPE_ITEM, 2228 item, 2229 "Geneve protocol unsupported" 2230 " fields are being used"); 2231 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len) 2232 return rte_flow_error_set 2233 (error, ENOTSUP, 2234 RTE_FLOW_ERROR_TYPE_ITEM, 2235 item, 2236 "Unsupported Geneve options length"); 2237 } 2238 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2239 return rte_flow_error_set 2240 (error, ENOTSUP, 2241 RTE_FLOW_ERROR_TYPE_ITEM, item, 2242 "Geneve tunnel must be fully defined"); 2243 return 0; 2244 } 2245 2246 /** 2247 * Validate MPLS item. 2248 * 2249 * @param[in] dev 2250 * Pointer to the rte_eth_dev structure. 2251 * @param[in] item 2252 * Item specification. 2253 * @param[in] item_flags 2254 * Bit-fields that holds the items detected until now. 2255 * @param[in] prev_layer 2256 * The protocol layer indicated in previous item. 2257 * @param[out] error 2258 * Pointer to error structure. 2259 * 2260 * @return 2261 * 0 on success, a negative errno value otherwise and rte_errno is set. 2262 */ 2263 int 2264 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused, 2265 const struct rte_flow_item *item __rte_unused, 2266 uint64_t item_flags __rte_unused, 2267 uint64_t prev_layer __rte_unused, 2268 struct rte_flow_error *error) 2269 { 2270 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 2271 const struct rte_flow_item_mpls *mask = item->mask; 2272 struct mlx5_priv *priv = dev->data->dev_private; 2273 int ret; 2274 2275 if (!priv->config.mpls_en) 2276 return rte_flow_error_set(error, ENOTSUP, 2277 RTE_FLOW_ERROR_TYPE_ITEM, item, 2278 "MPLS not supported or" 2279 " disabled in firmware" 2280 " configuration."); 2281 /* MPLS over IP, UDP, GRE is allowed */ 2282 if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 | 2283 MLX5_FLOW_LAYER_OUTER_L4_UDP | 2284 MLX5_FLOW_LAYER_GRE))) 2285 return rte_flow_error_set(error, EINVAL, 2286 RTE_FLOW_ERROR_TYPE_ITEM, item, 2287 "protocol filtering not compatible" 2288 " with MPLS layer"); 2289 /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */ 2290 if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) && 2291 !(item_flags & MLX5_FLOW_LAYER_GRE)) 2292 return rte_flow_error_set(error, ENOTSUP, 2293 RTE_FLOW_ERROR_TYPE_ITEM, item, 2294 "multiple tunnel layers not" 2295 " supported"); 2296 if (!mask) 2297 mask = &rte_flow_item_mpls_mask; 2298 ret = mlx5_flow_item_acceptable 2299 (item, (const uint8_t *)mask, 2300 (const uint8_t *)&rte_flow_item_mpls_mask, 2301 sizeof(struct rte_flow_item_mpls), error); 2302 if (ret < 0) 2303 return ret; 2304 return 0; 2305 #else 2306 return rte_flow_error_set(error, ENOTSUP, 2307 RTE_FLOW_ERROR_TYPE_ITEM, item, 2308 "MPLS is not supported by Verbs, please" 2309 " update."); 2310 #endif 2311 } 2312 2313 /** 2314 * Validate NVGRE item. 2315 * 2316 * @param[in] item 2317 * Item specification. 2318 * @param[in] item_flags 2319 * Bit flags to mark detected items. 2320 * @param[in] target_protocol 2321 * The next protocol in the previous item. 2322 * @param[out] error 2323 * Pointer to error structure. 2324 * 2325 * @return 2326 * 0 on success, a negative errno value otherwise and rte_errno is set. 2327 */ 2328 int 2329 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item, 2330 uint64_t item_flags, 2331 uint8_t target_protocol, 2332 struct rte_flow_error *error) 2333 { 2334 const struct rte_flow_item_nvgre *mask = item->mask; 2335 int ret; 2336 2337 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2338 return rte_flow_error_set(error, EINVAL, 2339 RTE_FLOW_ERROR_TYPE_ITEM, item, 2340 "protocol filtering not compatible" 2341 " with this GRE layer"); 2342 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2343 return rte_flow_error_set(error, ENOTSUP, 2344 RTE_FLOW_ERROR_TYPE_ITEM, item, 2345 "multiple tunnel layers not" 2346 " supported"); 2347 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2348 return rte_flow_error_set(error, ENOTSUP, 2349 RTE_FLOW_ERROR_TYPE_ITEM, item, 2350 "L3 Layer is missing"); 2351 if (!mask) 2352 mask = &rte_flow_item_nvgre_mask; 2353 ret = mlx5_flow_item_acceptable 2354 (item, (const uint8_t *)mask, 2355 (const uint8_t *)&rte_flow_item_nvgre_mask, 2356 sizeof(struct rte_flow_item_nvgre), error); 2357 if (ret < 0) 2358 return ret; 2359 return 0; 2360 } 2361 2362 /* Allocate unique ID for the split Q/RSS subflows. */ 2363 static uint32_t 2364 flow_qrss_get_id(struct rte_eth_dev *dev) 2365 { 2366 struct mlx5_priv *priv = dev->data->dev_private; 2367 uint32_t qrss_id, ret; 2368 2369 ret = mlx5_flow_id_get(priv->qrss_id_pool, &qrss_id); 2370 if (ret) 2371 return 0; 2372 MLX5_ASSERT(qrss_id); 2373 return qrss_id; 2374 } 2375 2376 /* Free unique ID for the split Q/RSS subflows. */ 2377 static void 2378 flow_qrss_free_id(struct rte_eth_dev *dev, uint32_t qrss_id) 2379 { 2380 struct mlx5_priv *priv = dev->data->dev_private; 2381 2382 if (qrss_id) 2383 mlx5_flow_id_release(priv->qrss_id_pool, qrss_id); 2384 } 2385 2386 /** 2387 * Release resource related QUEUE/RSS action split. 2388 * 2389 * @param dev 2390 * Pointer to Ethernet device. 2391 * @param flow 2392 * Flow to release id's from. 2393 */ 2394 static void 2395 flow_mreg_split_qrss_release(struct rte_eth_dev *dev, 2396 struct rte_flow *flow) 2397 { 2398 struct mlx5_priv *priv = dev->data->dev_private; 2399 uint32_t handle_idx; 2400 struct mlx5_flow_handle *dev_handle; 2401 2402 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 2403 handle_idx, dev_handle, next) 2404 if (dev_handle->split_flow_id) 2405 flow_qrss_free_id(dev, dev_handle->split_flow_id); 2406 } 2407 2408 static int 2409 flow_null_validate(struct rte_eth_dev *dev __rte_unused, 2410 const struct rte_flow_attr *attr __rte_unused, 2411 const struct rte_flow_item items[] __rte_unused, 2412 const struct rte_flow_action actions[] __rte_unused, 2413 bool external __rte_unused, 2414 int hairpin __rte_unused, 2415 struct rte_flow_error *error) 2416 { 2417 return rte_flow_error_set(error, ENOTSUP, 2418 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2419 } 2420 2421 static struct mlx5_flow * 2422 flow_null_prepare(struct rte_eth_dev *dev __rte_unused, 2423 const struct rte_flow_attr *attr __rte_unused, 2424 const struct rte_flow_item items[] __rte_unused, 2425 const struct rte_flow_action actions[] __rte_unused, 2426 struct rte_flow_error *error) 2427 { 2428 rte_flow_error_set(error, ENOTSUP, 2429 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2430 return NULL; 2431 } 2432 2433 static int 2434 flow_null_translate(struct rte_eth_dev *dev __rte_unused, 2435 struct mlx5_flow *dev_flow __rte_unused, 2436 const struct rte_flow_attr *attr __rte_unused, 2437 const struct rte_flow_item items[] __rte_unused, 2438 const struct rte_flow_action actions[] __rte_unused, 2439 struct rte_flow_error *error) 2440 { 2441 return rte_flow_error_set(error, ENOTSUP, 2442 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2443 } 2444 2445 static int 2446 flow_null_apply(struct rte_eth_dev *dev __rte_unused, 2447 struct rte_flow *flow __rte_unused, 2448 struct rte_flow_error *error) 2449 { 2450 return rte_flow_error_set(error, ENOTSUP, 2451 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2452 } 2453 2454 static void 2455 flow_null_remove(struct rte_eth_dev *dev __rte_unused, 2456 struct rte_flow *flow __rte_unused) 2457 { 2458 } 2459 2460 static void 2461 flow_null_destroy(struct rte_eth_dev *dev __rte_unused, 2462 struct rte_flow *flow __rte_unused) 2463 { 2464 } 2465 2466 static int 2467 flow_null_query(struct rte_eth_dev *dev __rte_unused, 2468 struct rte_flow *flow __rte_unused, 2469 const struct rte_flow_action *actions __rte_unused, 2470 void *data __rte_unused, 2471 struct rte_flow_error *error) 2472 { 2473 return rte_flow_error_set(error, ENOTSUP, 2474 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2475 } 2476 2477 /* Void driver to protect from null pointer reference. */ 2478 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = { 2479 .validate = flow_null_validate, 2480 .prepare = flow_null_prepare, 2481 .translate = flow_null_translate, 2482 .apply = flow_null_apply, 2483 .remove = flow_null_remove, 2484 .destroy = flow_null_destroy, 2485 .query = flow_null_query, 2486 }; 2487 2488 /** 2489 * Select flow driver type according to flow attributes and device 2490 * configuration. 2491 * 2492 * @param[in] dev 2493 * Pointer to the dev structure. 2494 * @param[in] attr 2495 * Pointer to the flow attributes. 2496 * 2497 * @return 2498 * flow driver type, MLX5_FLOW_TYPE_MAX otherwise. 2499 */ 2500 static enum mlx5_flow_drv_type 2501 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr) 2502 { 2503 struct mlx5_priv *priv = dev->data->dev_private; 2504 enum mlx5_flow_drv_type type = MLX5_FLOW_TYPE_MAX; 2505 2506 if (attr->transfer && priv->config.dv_esw_en) 2507 type = MLX5_FLOW_TYPE_DV; 2508 if (!attr->transfer) 2509 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV : 2510 MLX5_FLOW_TYPE_VERBS; 2511 return type; 2512 } 2513 2514 #define flow_get_drv_ops(type) flow_drv_ops[type] 2515 2516 /** 2517 * Flow driver validation API. This abstracts calling driver specific functions. 2518 * The type of flow driver is determined according to flow attributes. 2519 * 2520 * @param[in] dev 2521 * Pointer to the dev structure. 2522 * @param[in] attr 2523 * Pointer to the flow attributes. 2524 * @param[in] items 2525 * Pointer to the list of items. 2526 * @param[in] actions 2527 * Pointer to the list of actions. 2528 * @param[in] external 2529 * This flow rule is created by request external to PMD. 2530 * @param[in] hairpin 2531 * Number of hairpin TX actions, 0 means classic flow. 2532 * @param[out] error 2533 * Pointer to the error structure. 2534 * 2535 * @return 2536 * 0 on success, a negative errno value otherwise and rte_errno is set. 2537 */ 2538 static inline int 2539 flow_drv_validate(struct rte_eth_dev *dev, 2540 const struct rte_flow_attr *attr, 2541 const struct rte_flow_item items[], 2542 const struct rte_flow_action actions[], 2543 bool external, int hairpin, struct rte_flow_error *error) 2544 { 2545 const struct mlx5_flow_driver_ops *fops; 2546 enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr); 2547 2548 fops = flow_get_drv_ops(type); 2549 return fops->validate(dev, attr, items, actions, external, 2550 hairpin, error); 2551 } 2552 2553 /** 2554 * Flow driver preparation API. This abstracts calling driver specific 2555 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 2556 * calculates the size of memory required for device flow, allocates the memory, 2557 * initializes the device flow and returns the pointer. 2558 * 2559 * @note 2560 * This function initializes device flow structure such as dv or verbs in 2561 * struct mlx5_flow. However, it is caller's responsibility to initialize the 2562 * rest. For example, adding returning device flow to flow->dev_flow list and 2563 * setting backward reference to the flow should be done out of this function. 2564 * layers field is not filled either. 2565 * 2566 * @param[in] dev 2567 * Pointer to the dev structure. 2568 * @param[in] attr 2569 * Pointer to the flow attributes. 2570 * @param[in] items 2571 * Pointer to the list of items. 2572 * @param[in] actions 2573 * Pointer to the list of actions. 2574 * @param[in] flow_idx 2575 * This memory pool index to the flow. 2576 * @param[out] error 2577 * Pointer to the error structure. 2578 * 2579 * @return 2580 * Pointer to device flow on success, otherwise NULL and rte_errno is set. 2581 */ 2582 static inline struct mlx5_flow * 2583 flow_drv_prepare(struct rte_eth_dev *dev, 2584 const struct rte_flow *flow, 2585 const struct rte_flow_attr *attr, 2586 const struct rte_flow_item items[], 2587 const struct rte_flow_action actions[], 2588 uint32_t flow_idx, 2589 struct rte_flow_error *error) 2590 { 2591 const struct mlx5_flow_driver_ops *fops; 2592 enum mlx5_flow_drv_type type = flow->drv_type; 2593 struct mlx5_flow *mlx5_flow = NULL; 2594 2595 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2596 fops = flow_get_drv_ops(type); 2597 mlx5_flow = fops->prepare(dev, attr, items, actions, error); 2598 if (mlx5_flow) 2599 mlx5_flow->flow_idx = flow_idx; 2600 return mlx5_flow; 2601 } 2602 2603 /** 2604 * Flow driver translation API. This abstracts calling driver specific 2605 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 2606 * translates a generic flow into a driver flow. flow_drv_prepare() must 2607 * precede. 2608 * 2609 * @note 2610 * dev_flow->layers could be filled as a result of parsing during translation 2611 * if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled 2612 * if necessary. As a flow can have multiple dev_flows by RSS flow expansion, 2613 * flow->actions could be overwritten even though all the expanded dev_flows 2614 * have the same actions. 2615 * 2616 * @param[in] dev 2617 * Pointer to the rte dev structure. 2618 * @param[in, out] dev_flow 2619 * Pointer to the mlx5 flow. 2620 * @param[in] attr 2621 * Pointer to the flow attributes. 2622 * @param[in] items 2623 * Pointer to the list of items. 2624 * @param[in] actions 2625 * Pointer to the list of actions. 2626 * @param[out] error 2627 * Pointer to the error structure. 2628 * 2629 * @return 2630 * 0 on success, a negative errno value otherwise and rte_errno is set. 2631 */ 2632 static inline int 2633 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, 2634 const struct rte_flow_attr *attr, 2635 const struct rte_flow_item items[], 2636 const struct rte_flow_action actions[], 2637 struct rte_flow_error *error) 2638 { 2639 const struct mlx5_flow_driver_ops *fops; 2640 enum mlx5_flow_drv_type type = dev_flow->flow->drv_type; 2641 2642 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2643 fops = flow_get_drv_ops(type); 2644 return fops->translate(dev, dev_flow, attr, items, actions, error); 2645 } 2646 2647 /** 2648 * Flow driver apply API. This abstracts calling driver specific functions. 2649 * Parent flow (rte_flow) should have driver type (drv_type). It applies 2650 * translated driver flows on to device. flow_drv_translate() must precede. 2651 * 2652 * @param[in] dev 2653 * Pointer to Ethernet device structure. 2654 * @param[in, out] flow 2655 * Pointer to flow structure. 2656 * @param[out] error 2657 * Pointer to error structure. 2658 * 2659 * @return 2660 * 0 on success, a negative errno value otherwise and rte_errno is set. 2661 */ 2662 static inline int 2663 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, 2664 struct rte_flow_error *error) 2665 { 2666 const struct mlx5_flow_driver_ops *fops; 2667 enum mlx5_flow_drv_type type = flow->drv_type; 2668 2669 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2670 fops = flow_get_drv_ops(type); 2671 return fops->apply(dev, flow, error); 2672 } 2673 2674 /** 2675 * Flow driver remove API. This abstracts calling driver specific functions. 2676 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 2677 * on device. All the resources of the flow should be freed by calling 2678 * flow_drv_destroy(). 2679 * 2680 * @param[in] dev 2681 * Pointer to Ethernet device. 2682 * @param[in, out] flow 2683 * Pointer to flow structure. 2684 */ 2685 static inline void 2686 flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow) 2687 { 2688 const struct mlx5_flow_driver_ops *fops; 2689 enum mlx5_flow_drv_type type = flow->drv_type; 2690 2691 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2692 fops = flow_get_drv_ops(type); 2693 fops->remove(dev, flow); 2694 } 2695 2696 /** 2697 * Flow driver destroy API. This abstracts calling driver specific functions. 2698 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 2699 * on device and releases resources of the flow. 2700 * 2701 * @param[in] dev 2702 * Pointer to Ethernet device. 2703 * @param[in, out] flow 2704 * Pointer to flow structure. 2705 */ 2706 static inline void 2707 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) 2708 { 2709 const struct mlx5_flow_driver_ops *fops; 2710 enum mlx5_flow_drv_type type = flow->drv_type; 2711 2712 flow_mreg_split_qrss_release(dev, flow); 2713 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2714 fops = flow_get_drv_ops(type); 2715 fops->destroy(dev, flow); 2716 } 2717 2718 /** 2719 * Get RSS action from the action list. 2720 * 2721 * @param[in] actions 2722 * Pointer to the list of actions. 2723 * 2724 * @return 2725 * Pointer to the RSS action if exist, else return NULL. 2726 */ 2727 static const struct rte_flow_action_rss* 2728 flow_get_rss_action(const struct rte_flow_action actions[]) 2729 { 2730 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2731 switch (actions->type) { 2732 case RTE_FLOW_ACTION_TYPE_RSS: 2733 return (const struct rte_flow_action_rss *) 2734 actions->conf; 2735 default: 2736 break; 2737 } 2738 } 2739 return NULL; 2740 } 2741 2742 static unsigned int 2743 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level) 2744 { 2745 const struct rte_flow_item *item; 2746 unsigned int has_vlan = 0; 2747 2748 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 2749 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { 2750 has_vlan = 1; 2751 break; 2752 } 2753 } 2754 if (has_vlan) 2755 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN : 2756 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN; 2757 return rss_level < 2 ? MLX5_EXPANSION_ROOT : 2758 MLX5_EXPANSION_ROOT_OUTER; 2759 } 2760 2761 /** 2762 * Get layer flags from the prefix flow. 2763 * 2764 * Some flows may be split to several subflows, the prefix subflow gets the 2765 * match items and the suffix sub flow gets the actions. 2766 * Some actions need the user defined match item flags to get the detail for 2767 * the action. 2768 * This function helps the suffix flow to get the item layer flags from prefix 2769 * subflow. 2770 * 2771 * @param[in] dev_flow 2772 * Pointer the created preifx subflow. 2773 * 2774 * @return 2775 * The layers get from prefix subflow. 2776 */ 2777 static inline uint64_t 2778 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow) 2779 { 2780 uint64_t layers = 0; 2781 2782 /* 2783 * Layers bits could be localization, but usually the compiler will 2784 * help to do the optimization work for source code. 2785 * If no decap actions, use the layers directly. 2786 */ 2787 if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP)) 2788 return dev_flow->handle->layers; 2789 /* Convert L3 layers with decap action. */ 2790 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4) 2791 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4; 2792 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6) 2793 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6; 2794 /* Convert L4 layers with decap action. */ 2795 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP) 2796 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP; 2797 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP) 2798 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP; 2799 return layers; 2800 } 2801 2802 /** 2803 * Get metadata split action information. 2804 * 2805 * @param[in] actions 2806 * Pointer to the list of actions. 2807 * @param[out] qrss 2808 * Pointer to the return pointer. 2809 * @param[out] qrss_type 2810 * Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned 2811 * if no QUEUE/RSS is found. 2812 * @param[out] encap_idx 2813 * Pointer to the index of the encap action if exists, otherwise the last 2814 * action index. 2815 * 2816 * @return 2817 * Total number of actions. 2818 */ 2819 static int 2820 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[], 2821 const struct rte_flow_action **qrss, 2822 int *encap_idx) 2823 { 2824 const struct rte_flow_action_raw_encap *raw_encap; 2825 int actions_n = 0; 2826 int raw_decap_idx = -1; 2827 2828 *encap_idx = -1; 2829 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2830 switch (actions->type) { 2831 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 2832 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 2833 *encap_idx = actions_n; 2834 break; 2835 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 2836 raw_decap_idx = actions_n; 2837 break; 2838 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 2839 raw_encap = actions->conf; 2840 if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 2841 *encap_idx = raw_decap_idx != -1 ? 2842 raw_decap_idx : actions_n; 2843 break; 2844 case RTE_FLOW_ACTION_TYPE_QUEUE: 2845 case RTE_FLOW_ACTION_TYPE_RSS: 2846 *qrss = actions; 2847 break; 2848 default: 2849 break; 2850 } 2851 actions_n++; 2852 } 2853 if (*encap_idx == -1) 2854 *encap_idx = actions_n; 2855 /* Count RTE_FLOW_ACTION_TYPE_END. */ 2856 return actions_n + 1; 2857 } 2858 2859 /** 2860 * Check meter action from the action list. 2861 * 2862 * @param[in] actions 2863 * Pointer to the list of actions. 2864 * @param[out] mtr 2865 * Pointer to the meter exist flag. 2866 * 2867 * @return 2868 * Total number of actions. 2869 */ 2870 static int 2871 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr) 2872 { 2873 int actions_n = 0; 2874 2875 MLX5_ASSERT(mtr); 2876 *mtr = 0; 2877 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2878 switch (actions->type) { 2879 case RTE_FLOW_ACTION_TYPE_METER: 2880 *mtr = 1; 2881 break; 2882 default: 2883 break; 2884 } 2885 actions_n++; 2886 } 2887 /* Count RTE_FLOW_ACTION_TYPE_END. */ 2888 return actions_n + 1; 2889 } 2890 2891 /** 2892 * Check if the flow should be splited due to hairpin. 2893 * The reason for the split is that in current HW we can't 2894 * support encap on Rx, so if a flow have encap we move it 2895 * to Tx. 2896 * 2897 * @param dev 2898 * Pointer to Ethernet device. 2899 * @param[in] attr 2900 * Flow rule attributes. 2901 * @param[in] actions 2902 * Associated actions (list terminated by the END action). 2903 * 2904 * @return 2905 * > 0 the number of actions and the flow should be split, 2906 * 0 when no split required. 2907 */ 2908 static int 2909 flow_check_hairpin_split(struct rte_eth_dev *dev, 2910 const struct rte_flow_attr *attr, 2911 const struct rte_flow_action actions[]) 2912 { 2913 int queue_action = 0; 2914 int action_n = 0; 2915 int encap = 0; 2916 const struct rte_flow_action_queue *queue; 2917 const struct rte_flow_action_rss *rss; 2918 const struct rte_flow_action_raw_encap *raw_encap; 2919 2920 if (!attr->ingress) 2921 return 0; 2922 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2923 switch (actions->type) { 2924 case RTE_FLOW_ACTION_TYPE_QUEUE: 2925 queue = actions->conf; 2926 if (queue == NULL) 2927 return 0; 2928 if (mlx5_rxq_get_type(dev, queue->index) != 2929 MLX5_RXQ_TYPE_HAIRPIN) 2930 return 0; 2931 queue_action = 1; 2932 action_n++; 2933 break; 2934 case RTE_FLOW_ACTION_TYPE_RSS: 2935 rss = actions->conf; 2936 if (rss == NULL || rss->queue_num == 0) 2937 return 0; 2938 if (mlx5_rxq_get_type(dev, rss->queue[0]) != 2939 MLX5_RXQ_TYPE_HAIRPIN) 2940 return 0; 2941 queue_action = 1; 2942 action_n++; 2943 break; 2944 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 2945 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 2946 encap = 1; 2947 action_n++; 2948 break; 2949 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 2950 raw_encap = actions->conf; 2951 if (raw_encap->size > 2952 (sizeof(struct rte_flow_item_eth) + 2953 sizeof(struct rte_flow_item_ipv4))) 2954 encap = 1; 2955 action_n++; 2956 break; 2957 default: 2958 action_n++; 2959 break; 2960 } 2961 } 2962 if (encap == 1 && queue_action) 2963 return action_n; 2964 return 0; 2965 } 2966 2967 /* Declare flow create/destroy prototype in advance. */ 2968 static uint32_t 2969 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 2970 const struct rte_flow_attr *attr, 2971 const struct rte_flow_item items[], 2972 const struct rte_flow_action actions[], 2973 bool external, struct rte_flow_error *error); 2974 2975 static void 2976 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 2977 uint32_t flow_idx); 2978 2979 /** 2980 * Add a flow of copying flow metadata registers in RX_CP_TBL. 2981 * 2982 * As mark_id is unique, if there's already a registered flow for the mark_id, 2983 * return by increasing the reference counter of the resource. Otherwise, create 2984 * the resource (mcp_res) and flow. 2985 * 2986 * Flow looks like, 2987 * - If ingress port is ANY and reg_c[1] is mark_id, 2988 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 2989 * 2990 * For default flow (zero mark_id), flow is like, 2991 * - If ingress port is ANY, 2992 * reg_b := reg_c[0] and jump to RX_ACT_TBL. 2993 * 2994 * @param dev 2995 * Pointer to Ethernet device. 2996 * @param mark_id 2997 * ID of MARK action, zero means default flow for META. 2998 * @param[out] error 2999 * Perform verbose error reporting if not NULL. 3000 * 3001 * @return 3002 * Associated resource on success, NULL otherwise and rte_errno is set. 3003 */ 3004 static struct mlx5_flow_mreg_copy_resource * 3005 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id, 3006 struct rte_flow_error *error) 3007 { 3008 struct mlx5_priv *priv = dev->data->dev_private; 3009 struct rte_flow_attr attr = { 3010 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 3011 .ingress = 1, 3012 }; 3013 struct mlx5_rte_flow_item_tag tag_spec = { 3014 .data = mark_id, 3015 }; 3016 struct rte_flow_item items[] = { 3017 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, }, 3018 }; 3019 struct rte_flow_action_mark ftag = { 3020 .id = mark_id, 3021 }; 3022 struct mlx5_flow_action_copy_mreg cp_mreg = { 3023 .dst = REG_B, 3024 .src = 0, 3025 }; 3026 struct rte_flow_action_jump jump = { 3027 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 3028 }; 3029 struct rte_flow_action actions[] = { 3030 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, }, 3031 }; 3032 struct mlx5_flow_mreg_copy_resource *mcp_res; 3033 uint32_t idx = 0; 3034 int ret; 3035 3036 /* Fill the register fileds in the flow. */ 3037 ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error); 3038 if (ret < 0) 3039 return NULL; 3040 tag_spec.id = ret; 3041 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 3042 if (ret < 0) 3043 return NULL; 3044 cp_mreg.src = ret; 3045 /* Check if already registered. */ 3046 MLX5_ASSERT(priv->mreg_cp_tbl); 3047 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id); 3048 if (mcp_res) { 3049 /* For non-default rule. */ 3050 if (mark_id != MLX5_DEFAULT_COPY_ID) 3051 mcp_res->refcnt++; 3052 MLX5_ASSERT(mark_id != MLX5_DEFAULT_COPY_ID || 3053 mcp_res->refcnt == 1); 3054 return mcp_res; 3055 } 3056 /* Provide the full width of FLAG specific value. */ 3057 if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT)) 3058 tag_spec.data = MLX5_FLOW_MARK_DEFAULT; 3059 /* Build a new flow. */ 3060 if (mark_id != MLX5_DEFAULT_COPY_ID) { 3061 items[0] = (struct rte_flow_item){ 3062 .type = (enum rte_flow_item_type) 3063 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 3064 .spec = &tag_spec, 3065 }; 3066 items[1] = (struct rte_flow_item){ 3067 .type = RTE_FLOW_ITEM_TYPE_END, 3068 }; 3069 actions[0] = (struct rte_flow_action){ 3070 .type = (enum rte_flow_action_type) 3071 MLX5_RTE_FLOW_ACTION_TYPE_MARK, 3072 .conf = &ftag, 3073 }; 3074 actions[1] = (struct rte_flow_action){ 3075 .type = (enum rte_flow_action_type) 3076 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3077 .conf = &cp_mreg, 3078 }; 3079 actions[2] = (struct rte_flow_action){ 3080 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3081 .conf = &jump, 3082 }; 3083 actions[3] = (struct rte_flow_action){ 3084 .type = RTE_FLOW_ACTION_TYPE_END, 3085 }; 3086 } else { 3087 /* Default rule, wildcard match. */ 3088 attr.priority = MLX5_FLOW_PRIO_RSVD; 3089 items[0] = (struct rte_flow_item){ 3090 .type = RTE_FLOW_ITEM_TYPE_END, 3091 }; 3092 actions[0] = (struct rte_flow_action){ 3093 .type = (enum rte_flow_action_type) 3094 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3095 .conf = &cp_mreg, 3096 }; 3097 actions[1] = (struct rte_flow_action){ 3098 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3099 .conf = &jump, 3100 }; 3101 actions[2] = (struct rte_flow_action){ 3102 .type = RTE_FLOW_ACTION_TYPE_END, 3103 }; 3104 } 3105 /* Build a new entry. */ 3106 mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx); 3107 if (!mcp_res) { 3108 rte_errno = ENOMEM; 3109 return NULL; 3110 } 3111 mcp_res->idx = idx; 3112 /* 3113 * The copy Flows are not included in any list. There 3114 * ones are referenced from other Flows and can not 3115 * be applied, removed, deleted in ardbitrary order 3116 * by list traversing. 3117 */ 3118 mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items, 3119 actions, false, error); 3120 if (!mcp_res->rix_flow) 3121 goto error; 3122 mcp_res->refcnt++; 3123 mcp_res->hlist_ent.key = mark_id; 3124 ret = mlx5_hlist_insert(priv->mreg_cp_tbl, 3125 &mcp_res->hlist_ent); 3126 MLX5_ASSERT(!ret); 3127 if (ret) 3128 goto error; 3129 return mcp_res; 3130 error: 3131 if (mcp_res->rix_flow) 3132 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3133 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3134 return NULL; 3135 } 3136 3137 /** 3138 * Release flow in RX_CP_TBL. 3139 * 3140 * @param dev 3141 * Pointer to Ethernet device. 3142 * @flow 3143 * Parent flow for wich copying is provided. 3144 */ 3145 static void 3146 flow_mreg_del_copy_action(struct rte_eth_dev *dev, 3147 struct rte_flow *flow) 3148 { 3149 struct mlx5_flow_mreg_copy_resource *mcp_res; 3150 struct mlx5_priv *priv = dev->data->dev_private; 3151 3152 if (!flow->rix_mreg_copy) 3153 return; 3154 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3155 flow->rix_mreg_copy); 3156 if (!mcp_res || !priv->mreg_cp_tbl) 3157 return; 3158 if (flow->copy_applied) { 3159 MLX5_ASSERT(mcp_res->appcnt); 3160 flow->copy_applied = 0; 3161 --mcp_res->appcnt; 3162 if (!mcp_res->appcnt) { 3163 struct rte_flow *mcp_flow = mlx5_ipool_get 3164 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3165 mcp_res->rix_flow); 3166 3167 if (mcp_flow) 3168 flow_drv_remove(dev, mcp_flow); 3169 } 3170 } 3171 /* 3172 * We do not check availability of metadata registers here, 3173 * because copy resources are not allocated in this case. 3174 */ 3175 if (--mcp_res->refcnt) 3176 return; 3177 MLX5_ASSERT(mcp_res->rix_flow); 3178 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3179 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 3180 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3181 flow->rix_mreg_copy = 0; 3182 } 3183 3184 /** 3185 * Start flow in RX_CP_TBL. 3186 * 3187 * @param dev 3188 * Pointer to Ethernet device. 3189 * @flow 3190 * Parent flow for wich copying is provided. 3191 * 3192 * @return 3193 * 0 on success, a negative errno value otherwise and rte_errno is set. 3194 */ 3195 static int 3196 flow_mreg_start_copy_action(struct rte_eth_dev *dev, 3197 struct rte_flow *flow) 3198 { 3199 struct mlx5_flow_mreg_copy_resource *mcp_res; 3200 struct mlx5_priv *priv = dev->data->dev_private; 3201 int ret; 3202 3203 if (!flow->rix_mreg_copy || flow->copy_applied) 3204 return 0; 3205 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3206 flow->rix_mreg_copy); 3207 if (!mcp_res) 3208 return 0; 3209 if (!mcp_res->appcnt) { 3210 struct rte_flow *mcp_flow = mlx5_ipool_get 3211 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3212 mcp_res->rix_flow); 3213 3214 if (mcp_flow) { 3215 ret = flow_drv_apply(dev, mcp_flow, NULL); 3216 if (ret) 3217 return ret; 3218 } 3219 } 3220 ++mcp_res->appcnt; 3221 flow->copy_applied = 1; 3222 return 0; 3223 } 3224 3225 /** 3226 * Stop flow in RX_CP_TBL. 3227 * 3228 * @param dev 3229 * Pointer to Ethernet device. 3230 * @flow 3231 * Parent flow for wich copying is provided. 3232 */ 3233 static void 3234 flow_mreg_stop_copy_action(struct rte_eth_dev *dev, 3235 struct rte_flow *flow) 3236 { 3237 struct mlx5_flow_mreg_copy_resource *mcp_res; 3238 struct mlx5_priv *priv = dev->data->dev_private; 3239 3240 if (!flow->rix_mreg_copy || !flow->copy_applied) 3241 return; 3242 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3243 flow->rix_mreg_copy); 3244 if (!mcp_res) 3245 return; 3246 MLX5_ASSERT(mcp_res->appcnt); 3247 --mcp_res->appcnt; 3248 flow->copy_applied = 0; 3249 if (!mcp_res->appcnt) { 3250 struct rte_flow *mcp_flow = mlx5_ipool_get 3251 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3252 mcp_res->rix_flow); 3253 3254 if (mcp_flow) 3255 flow_drv_remove(dev, mcp_flow); 3256 } 3257 } 3258 3259 /** 3260 * Remove the default copy action from RX_CP_TBL. 3261 * 3262 * @param dev 3263 * Pointer to Ethernet device. 3264 */ 3265 static void 3266 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev) 3267 { 3268 struct mlx5_flow_mreg_copy_resource *mcp_res; 3269 struct mlx5_priv *priv = dev->data->dev_private; 3270 3271 /* Check if default flow is registered. */ 3272 if (!priv->mreg_cp_tbl) 3273 return; 3274 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, 3275 MLX5_DEFAULT_COPY_ID); 3276 if (!mcp_res) 3277 return; 3278 MLX5_ASSERT(mcp_res->rix_flow); 3279 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3280 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 3281 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3282 } 3283 3284 /** 3285 * Add the default copy action in in RX_CP_TBL. 3286 * 3287 * @param dev 3288 * Pointer to Ethernet device. 3289 * @param[out] error 3290 * Perform verbose error reporting if not NULL. 3291 * 3292 * @return 3293 * 0 for success, negative value otherwise and rte_errno is set. 3294 */ 3295 static int 3296 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev, 3297 struct rte_flow_error *error) 3298 { 3299 struct mlx5_priv *priv = dev->data->dev_private; 3300 struct mlx5_flow_mreg_copy_resource *mcp_res; 3301 3302 /* Check whether extensive metadata feature is engaged. */ 3303 if (!priv->config.dv_flow_en || 3304 priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3305 !mlx5_flow_ext_mreg_supported(dev) || 3306 !priv->sh->dv_regc0_mask) 3307 return 0; 3308 mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error); 3309 if (!mcp_res) 3310 return -rte_errno; 3311 return 0; 3312 } 3313 3314 /** 3315 * Add a flow of copying flow metadata registers in RX_CP_TBL. 3316 * 3317 * All the flow having Q/RSS action should be split by 3318 * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL 3319 * performs the following, 3320 * - CQE->flow_tag := reg_c[1] (MARK) 3321 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 3322 * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1] 3323 * but there should be a flow per each MARK ID set by MARK action. 3324 * 3325 * For the aforementioned reason, if there's a MARK action in flow's action 3326 * list, a corresponding flow should be added to the RX_CP_TBL in order to copy 3327 * the MARK ID to CQE's flow_tag like, 3328 * - If reg_c[1] is mark_id, 3329 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3330 * 3331 * For SET_META action which stores value in reg_c[0], as the destination is 3332 * also a flow metadata register (reg_b), adding a default flow is enough. Zero 3333 * MARK ID means the default flow. The default flow looks like, 3334 * - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3335 * 3336 * @param dev 3337 * Pointer to Ethernet device. 3338 * @param flow 3339 * Pointer to flow structure. 3340 * @param[in] actions 3341 * Pointer to the list of actions. 3342 * @param[out] error 3343 * Perform verbose error reporting if not NULL. 3344 * 3345 * @return 3346 * 0 on success, negative value otherwise and rte_errno is set. 3347 */ 3348 static int 3349 flow_mreg_update_copy_table(struct rte_eth_dev *dev, 3350 struct rte_flow *flow, 3351 const struct rte_flow_action *actions, 3352 struct rte_flow_error *error) 3353 { 3354 struct mlx5_priv *priv = dev->data->dev_private; 3355 struct mlx5_dev_config *config = &priv->config; 3356 struct mlx5_flow_mreg_copy_resource *mcp_res; 3357 const struct rte_flow_action_mark *mark; 3358 3359 /* Check whether extensive metadata feature is engaged. */ 3360 if (!config->dv_flow_en || 3361 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3362 !mlx5_flow_ext_mreg_supported(dev) || 3363 !priv->sh->dv_regc0_mask) 3364 return 0; 3365 /* Find MARK action. */ 3366 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3367 switch (actions->type) { 3368 case RTE_FLOW_ACTION_TYPE_FLAG: 3369 mcp_res = flow_mreg_add_copy_action 3370 (dev, MLX5_FLOW_MARK_DEFAULT, error); 3371 if (!mcp_res) 3372 return -rte_errno; 3373 flow->rix_mreg_copy = mcp_res->idx; 3374 if (dev->data->dev_started) { 3375 mcp_res->appcnt++; 3376 flow->copy_applied = 1; 3377 } 3378 return 0; 3379 case RTE_FLOW_ACTION_TYPE_MARK: 3380 mark = (const struct rte_flow_action_mark *) 3381 actions->conf; 3382 mcp_res = 3383 flow_mreg_add_copy_action(dev, mark->id, error); 3384 if (!mcp_res) 3385 return -rte_errno; 3386 flow->rix_mreg_copy = mcp_res->idx; 3387 if (dev->data->dev_started) { 3388 mcp_res->appcnt++; 3389 flow->copy_applied = 1; 3390 } 3391 return 0; 3392 default: 3393 break; 3394 } 3395 } 3396 return 0; 3397 } 3398 3399 #define MLX5_MAX_SPLIT_ACTIONS 24 3400 #define MLX5_MAX_SPLIT_ITEMS 24 3401 3402 /** 3403 * Split the hairpin flow. 3404 * Since HW can't support encap on Rx we move the encap to Tx. 3405 * If the count action is after the encap then we also 3406 * move the count action. in this case the count will also measure 3407 * the outer bytes. 3408 * 3409 * @param dev 3410 * Pointer to Ethernet device. 3411 * @param[in] actions 3412 * Associated actions (list terminated by the END action). 3413 * @param[out] actions_rx 3414 * Rx flow actions. 3415 * @param[out] actions_tx 3416 * Tx flow actions.. 3417 * @param[out] pattern_tx 3418 * The pattern items for the Tx flow. 3419 * @param[out] flow_id 3420 * The flow ID connected to this flow. 3421 * 3422 * @return 3423 * 0 on success. 3424 */ 3425 static int 3426 flow_hairpin_split(struct rte_eth_dev *dev, 3427 const struct rte_flow_action actions[], 3428 struct rte_flow_action actions_rx[], 3429 struct rte_flow_action actions_tx[], 3430 struct rte_flow_item pattern_tx[], 3431 uint32_t *flow_id) 3432 { 3433 struct mlx5_priv *priv = dev->data->dev_private; 3434 const struct rte_flow_action_raw_encap *raw_encap; 3435 const struct rte_flow_action_raw_decap *raw_decap; 3436 struct mlx5_rte_flow_action_set_tag *set_tag; 3437 struct rte_flow_action *tag_action; 3438 struct mlx5_rte_flow_item_tag *tag_item; 3439 struct rte_flow_item *item; 3440 char *addr; 3441 int encap = 0; 3442 3443 mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id); 3444 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3445 switch (actions->type) { 3446 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3447 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3448 rte_memcpy(actions_tx, actions, 3449 sizeof(struct rte_flow_action)); 3450 actions_tx++; 3451 break; 3452 case RTE_FLOW_ACTION_TYPE_COUNT: 3453 if (encap) { 3454 rte_memcpy(actions_tx, actions, 3455 sizeof(struct rte_flow_action)); 3456 actions_tx++; 3457 } else { 3458 rte_memcpy(actions_rx, actions, 3459 sizeof(struct rte_flow_action)); 3460 actions_rx++; 3461 } 3462 break; 3463 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3464 raw_encap = actions->conf; 3465 if (raw_encap->size > 3466 (sizeof(struct rte_flow_item_eth) + 3467 sizeof(struct rte_flow_item_ipv4))) { 3468 memcpy(actions_tx, actions, 3469 sizeof(struct rte_flow_action)); 3470 actions_tx++; 3471 encap = 1; 3472 } else { 3473 rte_memcpy(actions_rx, actions, 3474 sizeof(struct rte_flow_action)); 3475 actions_rx++; 3476 } 3477 break; 3478 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3479 raw_decap = actions->conf; 3480 if (raw_decap->size < 3481 (sizeof(struct rte_flow_item_eth) + 3482 sizeof(struct rte_flow_item_ipv4))) { 3483 memcpy(actions_tx, actions, 3484 sizeof(struct rte_flow_action)); 3485 actions_tx++; 3486 } else { 3487 rte_memcpy(actions_rx, actions, 3488 sizeof(struct rte_flow_action)); 3489 actions_rx++; 3490 } 3491 break; 3492 default: 3493 rte_memcpy(actions_rx, actions, 3494 sizeof(struct rte_flow_action)); 3495 actions_rx++; 3496 break; 3497 } 3498 } 3499 /* Add set meta action and end action for the Rx flow. */ 3500 tag_action = actions_rx; 3501 tag_action->type = (enum rte_flow_action_type) 3502 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 3503 actions_rx++; 3504 rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action)); 3505 actions_rx++; 3506 set_tag = (void *)actions_rx; 3507 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL); 3508 MLX5_ASSERT(set_tag->id > REG_NONE); 3509 set_tag->data = *flow_id; 3510 tag_action->conf = set_tag; 3511 /* Create Tx item list. */ 3512 rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action)); 3513 addr = (void *)&pattern_tx[2]; 3514 item = pattern_tx; 3515 item->type = (enum rte_flow_item_type) 3516 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 3517 tag_item = (void *)addr; 3518 tag_item->data = *flow_id; 3519 tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL); 3520 MLX5_ASSERT(set_tag->id > REG_NONE); 3521 item->spec = tag_item; 3522 addr += sizeof(struct mlx5_rte_flow_item_tag); 3523 tag_item = (void *)addr; 3524 tag_item->data = UINT32_MAX; 3525 tag_item->id = UINT16_MAX; 3526 item->mask = tag_item; 3527 addr += sizeof(struct mlx5_rte_flow_item_tag); 3528 item->last = NULL; 3529 item++; 3530 item->type = RTE_FLOW_ITEM_TYPE_END; 3531 return 0; 3532 } 3533 3534 /** 3535 * The last stage of splitting chain, just creates the subflow 3536 * without any modification. 3537 * 3538 * @param[in] dev 3539 * Pointer to Ethernet device. 3540 * @param[in] flow 3541 * Parent flow structure pointer. 3542 * @param[in, out] sub_flow 3543 * Pointer to return the created subflow, may be NULL. 3544 * @param[in] prefix_layers 3545 * Prefix subflow layers, may be 0. 3546 * @param[in] attr 3547 * Flow rule attributes. 3548 * @param[in] items 3549 * Pattern specification (list terminated by the END pattern item). 3550 * @param[in] actions 3551 * Associated actions (list terminated by the END action). 3552 * @param[in] external 3553 * This flow rule is created by request external to PMD. 3554 * @param[in] flow_idx 3555 * This memory pool index to the flow. 3556 * @param[out] error 3557 * Perform verbose error reporting if not NULL. 3558 * @return 3559 * 0 on success, negative value otherwise 3560 */ 3561 static int 3562 flow_create_split_inner(struct rte_eth_dev *dev, 3563 struct rte_flow *flow, 3564 struct mlx5_flow **sub_flow, 3565 uint64_t prefix_layers, 3566 const struct rte_flow_attr *attr, 3567 const struct rte_flow_item items[], 3568 const struct rte_flow_action actions[], 3569 bool external, uint32_t flow_idx, 3570 struct rte_flow_error *error) 3571 { 3572 struct mlx5_flow *dev_flow; 3573 3574 dev_flow = flow_drv_prepare(dev, flow, attr, items, actions, 3575 flow_idx, error); 3576 if (!dev_flow) 3577 return -rte_errno; 3578 dev_flow->flow = flow; 3579 dev_flow->external = external; 3580 /* Subflow object was created, we must include one in the list. */ 3581 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 3582 dev_flow->handle, next); 3583 /* 3584 * If dev_flow is as one of the suffix flow, some actions in suffix 3585 * flow may need some user defined item layer flags. 3586 */ 3587 if (prefix_layers) 3588 dev_flow->handle->layers = prefix_layers; 3589 if (sub_flow) 3590 *sub_flow = dev_flow; 3591 return flow_drv_translate(dev, dev_flow, attr, items, actions, error); 3592 } 3593 3594 /** 3595 * Split the meter flow. 3596 * 3597 * As meter flow will split to three sub flow, other than meter 3598 * action, the other actions make sense to only meter accepts 3599 * the packet. If it need to be dropped, no other additional 3600 * actions should be take. 3601 * 3602 * One kind of special action which decapsulates the L3 tunnel 3603 * header will be in the prefix sub flow, as not to take the 3604 * L3 tunnel header into account. 3605 * 3606 * @param dev 3607 * Pointer to Ethernet device. 3608 * @param[in] items 3609 * Pattern specification (list terminated by the END pattern item). 3610 * @param[out] sfx_items 3611 * Suffix flow match items (list terminated by the END pattern item). 3612 * @param[in] actions 3613 * Associated actions (list terminated by the END action). 3614 * @param[out] actions_sfx 3615 * Suffix flow actions. 3616 * @param[out] actions_pre 3617 * Prefix flow actions. 3618 * @param[out] pattern_sfx 3619 * The pattern items for the suffix flow. 3620 * @param[out] tag_sfx 3621 * Pointer to suffix flow tag. 3622 * 3623 * @return 3624 * 0 on success. 3625 */ 3626 static int 3627 flow_meter_split_prep(struct rte_eth_dev *dev, 3628 const struct rte_flow_item items[], 3629 struct rte_flow_item sfx_items[], 3630 const struct rte_flow_action actions[], 3631 struct rte_flow_action actions_sfx[], 3632 struct rte_flow_action actions_pre[]) 3633 { 3634 struct rte_flow_action *tag_action = NULL; 3635 struct rte_flow_item *tag_item; 3636 struct mlx5_rte_flow_action_set_tag *set_tag; 3637 struct rte_flow_error error; 3638 const struct rte_flow_action_raw_encap *raw_encap; 3639 const struct rte_flow_action_raw_decap *raw_decap; 3640 struct mlx5_rte_flow_item_tag *tag_spec; 3641 struct mlx5_rte_flow_item_tag *tag_mask; 3642 uint32_t tag_id; 3643 bool copy_vlan = false; 3644 3645 /* Prepare the actions for prefix and suffix flow. */ 3646 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3647 struct rte_flow_action **action_cur = NULL; 3648 3649 switch (actions->type) { 3650 case RTE_FLOW_ACTION_TYPE_METER: 3651 /* Add the extra tag action first. */ 3652 tag_action = actions_pre; 3653 tag_action->type = (enum rte_flow_action_type) 3654 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 3655 actions_pre++; 3656 action_cur = &actions_pre; 3657 break; 3658 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: 3659 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP: 3660 action_cur = &actions_pre; 3661 break; 3662 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3663 raw_encap = actions->conf; 3664 if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE) 3665 action_cur = &actions_pre; 3666 break; 3667 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3668 raw_decap = actions->conf; 3669 if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 3670 action_cur = &actions_pre; 3671 break; 3672 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 3673 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 3674 copy_vlan = true; 3675 break; 3676 default: 3677 break; 3678 } 3679 if (!action_cur) 3680 action_cur = &actions_sfx; 3681 memcpy(*action_cur, actions, sizeof(struct rte_flow_action)); 3682 (*action_cur)++; 3683 } 3684 /* Add end action to the actions. */ 3685 actions_sfx->type = RTE_FLOW_ACTION_TYPE_END; 3686 actions_pre->type = RTE_FLOW_ACTION_TYPE_END; 3687 actions_pre++; 3688 /* Set the tag. */ 3689 set_tag = (void *)actions_pre; 3690 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); 3691 /* 3692 * Get the id from the qrss_pool to make qrss share the id with meter. 3693 */ 3694 tag_id = flow_qrss_get_id(dev); 3695 set_tag->data = tag_id << MLX5_MTR_COLOR_BITS; 3696 assert(tag_action); 3697 tag_action->conf = set_tag; 3698 /* Prepare the suffix subflow items. */ 3699 tag_item = sfx_items++; 3700 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { 3701 int item_type = items->type; 3702 3703 switch (item_type) { 3704 case RTE_FLOW_ITEM_TYPE_PORT_ID: 3705 memcpy(sfx_items, items, sizeof(*sfx_items)); 3706 sfx_items++; 3707 break; 3708 case RTE_FLOW_ITEM_TYPE_VLAN: 3709 if (copy_vlan) { 3710 memcpy(sfx_items, items, sizeof(*sfx_items)); 3711 /* 3712 * Convert to internal match item, it is used 3713 * for vlan push and set vid. 3714 */ 3715 sfx_items->type = (enum rte_flow_item_type) 3716 MLX5_RTE_FLOW_ITEM_TYPE_VLAN; 3717 sfx_items++; 3718 } 3719 break; 3720 default: 3721 break; 3722 } 3723 } 3724 sfx_items->type = RTE_FLOW_ITEM_TYPE_END; 3725 sfx_items++; 3726 tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items; 3727 tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS; 3728 tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); 3729 tag_mask = tag_spec + 1; 3730 tag_mask->data = 0xffffff00; 3731 tag_item->type = (enum rte_flow_item_type) 3732 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 3733 tag_item->spec = tag_spec; 3734 tag_item->last = NULL; 3735 tag_item->mask = tag_mask; 3736 return tag_id; 3737 } 3738 3739 /** 3740 * Split action list having QUEUE/RSS for metadata register copy. 3741 * 3742 * Once Q/RSS action is detected in user's action list, the flow action 3743 * should be split in order to copy metadata registers, which will happen in 3744 * RX_CP_TBL like, 3745 * - CQE->flow_tag := reg_c[1] (MARK) 3746 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 3747 * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL. 3748 * This is because the last action of each flow must be a terminal action 3749 * (QUEUE, RSS or DROP). 3750 * 3751 * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is 3752 * stored and kept in the mlx5_flow structure per each sub_flow. 3753 * 3754 * The Q/RSS action is replaced with, 3755 * - SET_TAG, setting the allocated flow ID to reg_c[2]. 3756 * And the following JUMP action is added at the end, 3757 * - JUMP, to RX_CP_TBL. 3758 * 3759 * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by 3760 * flow_create_split_metadata() routine. The flow will look like, 3761 * - If flow ID matches (reg_c[2]), perform Q/RSS. 3762 * 3763 * @param dev 3764 * Pointer to Ethernet device. 3765 * @param[out] split_actions 3766 * Pointer to store split actions to jump to CP_TBL. 3767 * @param[in] actions 3768 * Pointer to the list of original flow actions. 3769 * @param[in] qrss 3770 * Pointer to the Q/RSS action. 3771 * @param[in] actions_n 3772 * Number of original actions. 3773 * @param[out] error 3774 * Perform verbose error reporting if not NULL. 3775 * 3776 * @return 3777 * non-zero unique flow_id on success, otherwise 0 and 3778 * error/rte_error are set. 3779 */ 3780 static uint32_t 3781 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev, 3782 struct rte_flow_action *split_actions, 3783 const struct rte_flow_action *actions, 3784 const struct rte_flow_action *qrss, 3785 int actions_n, struct rte_flow_error *error) 3786 { 3787 struct mlx5_rte_flow_action_set_tag *set_tag; 3788 struct rte_flow_action_jump *jump; 3789 const int qrss_idx = qrss - actions; 3790 uint32_t flow_id = 0; 3791 int ret = 0; 3792 3793 /* 3794 * Given actions will be split 3795 * - Replace QUEUE/RSS action with SET_TAG to set flow ID. 3796 * - Add jump to mreg CP_TBL. 3797 * As a result, there will be one more action. 3798 */ 3799 ++actions_n; 3800 memcpy(split_actions, actions, sizeof(*split_actions) * actions_n); 3801 set_tag = (void *)(split_actions + actions_n); 3802 /* 3803 * If tag action is not set to void(it means we are not the meter 3804 * suffix flow), add the tag action. Since meter suffix flow already 3805 * has the tag added. 3806 */ 3807 if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) { 3808 /* 3809 * Allocate the new subflow ID. This one is unique within 3810 * device and not shared with representors. Otherwise, 3811 * we would have to resolve multi-thread access synch 3812 * issue. Each flow on the shared device is appended 3813 * with source vport identifier, so the resulting 3814 * flows will be unique in the shared (by master and 3815 * representors) domain even if they have coinciding 3816 * IDs. 3817 */ 3818 flow_id = flow_qrss_get_id(dev); 3819 if (!flow_id) 3820 return rte_flow_error_set(error, ENOMEM, 3821 RTE_FLOW_ERROR_TYPE_ACTION, 3822 NULL, "can't allocate id " 3823 "for split Q/RSS subflow"); 3824 /* Internal SET_TAG action to set flow ID. */ 3825 *set_tag = (struct mlx5_rte_flow_action_set_tag){ 3826 .data = flow_id, 3827 }; 3828 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error); 3829 if (ret < 0) 3830 return ret; 3831 set_tag->id = ret; 3832 /* Construct new actions array. */ 3833 /* Replace QUEUE/RSS action. */ 3834 split_actions[qrss_idx] = (struct rte_flow_action){ 3835 .type = (enum rte_flow_action_type) 3836 MLX5_RTE_FLOW_ACTION_TYPE_TAG, 3837 .conf = set_tag, 3838 }; 3839 } 3840 /* JUMP action to jump to mreg copy table (CP_TBL). */ 3841 jump = (void *)(set_tag + 1); 3842 *jump = (struct rte_flow_action_jump){ 3843 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 3844 }; 3845 split_actions[actions_n - 2] = (struct rte_flow_action){ 3846 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3847 .conf = jump, 3848 }; 3849 split_actions[actions_n - 1] = (struct rte_flow_action){ 3850 .type = RTE_FLOW_ACTION_TYPE_END, 3851 }; 3852 return flow_id; 3853 } 3854 3855 /** 3856 * Extend the given action list for Tx metadata copy. 3857 * 3858 * Copy the given action list to the ext_actions and add flow metadata register 3859 * copy action in order to copy reg_a set by WQE to reg_c[0]. 3860 * 3861 * @param[out] ext_actions 3862 * Pointer to the extended action list. 3863 * @param[in] actions 3864 * Pointer to the list of actions. 3865 * @param[in] actions_n 3866 * Number of actions in the list. 3867 * @param[out] error 3868 * Perform verbose error reporting if not NULL. 3869 * @param[in] encap_idx 3870 * The encap action inndex. 3871 * 3872 * @return 3873 * 0 on success, negative value otherwise 3874 */ 3875 static int 3876 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev, 3877 struct rte_flow_action *ext_actions, 3878 const struct rte_flow_action *actions, 3879 int actions_n, struct rte_flow_error *error, 3880 int encap_idx) 3881 { 3882 struct mlx5_flow_action_copy_mreg *cp_mreg = 3883 (struct mlx5_flow_action_copy_mreg *) 3884 (ext_actions + actions_n + 1); 3885 int ret; 3886 3887 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 3888 if (ret < 0) 3889 return ret; 3890 cp_mreg->dst = ret; 3891 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error); 3892 if (ret < 0) 3893 return ret; 3894 cp_mreg->src = ret; 3895 if (encap_idx != 0) 3896 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx); 3897 if (encap_idx == actions_n - 1) { 3898 ext_actions[actions_n - 1] = (struct rte_flow_action){ 3899 .type = (enum rte_flow_action_type) 3900 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3901 .conf = cp_mreg, 3902 }; 3903 ext_actions[actions_n] = (struct rte_flow_action){ 3904 .type = RTE_FLOW_ACTION_TYPE_END, 3905 }; 3906 } else { 3907 ext_actions[encap_idx] = (struct rte_flow_action){ 3908 .type = (enum rte_flow_action_type) 3909 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3910 .conf = cp_mreg, 3911 }; 3912 memcpy(ext_actions + encap_idx + 1, actions + encap_idx, 3913 sizeof(*ext_actions) * (actions_n - encap_idx)); 3914 } 3915 return 0; 3916 } 3917 3918 /** 3919 * The splitting for metadata feature. 3920 * 3921 * - Q/RSS action on NIC Rx should be split in order to pass by 3922 * the mreg copy table (RX_CP_TBL) and then it jumps to the 3923 * action table (RX_ACT_TBL) which has the split Q/RSS action. 3924 * 3925 * - All the actions on NIC Tx should have a mreg copy action to 3926 * copy reg_a from WQE to reg_c[0]. 3927 * 3928 * @param dev 3929 * Pointer to Ethernet device. 3930 * @param[in] flow 3931 * Parent flow structure pointer. 3932 * @param[in] prefix_layers 3933 * Prefix flow layer flags. 3934 * @param[in] attr 3935 * Flow rule attributes. 3936 * @param[in] items 3937 * Pattern specification (list terminated by the END pattern item). 3938 * @param[in] actions 3939 * Associated actions (list terminated by the END action). 3940 * @param[in] external 3941 * This flow rule is created by request external to PMD. 3942 * @param[in] flow_idx 3943 * This memory pool index to the flow. 3944 * @param[out] error 3945 * Perform verbose error reporting if not NULL. 3946 * @return 3947 * 0 on success, negative value otherwise 3948 */ 3949 static int 3950 flow_create_split_metadata(struct rte_eth_dev *dev, 3951 struct rte_flow *flow, 3952 uint64_t prefix_layers, 3953 const struct rte_flow_attr *attr, 3954 const struct rte_flow_item items[], 3955 const struct rte_flow_action actions[], 3956 bool external, uint32_t flow_idx, 3957 struct rte_flow_error *error) 3958 { 3959 struct mlx5_priv *priv = dev->data->dev_private; 3960 struct mlx5_dev_config *config = &priv->config; 3961 const struct rte_flow_action *qrss = NULL; 3962 struct rte_flow_action *ext_actions = NULL; 3963 struct mlx5_flow *dev_flow = NULL; 3964 uint32_t qrss_id = 0; 3965 int mtr_sfx = 0; 3966 size_t act_size; 3967 int actions_n; 3968 int encap_idx; 3969 int ret; 3970 3971 /* Check whether extensive metadata feature is engaged. */ 3972 if (!config->dv_flow_en || 3973 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3974 !mlx5_flow_ext_mreg_supported(dev)) 3975 return flow_create_split_inner(dev, flow, NULL, prefix_layers, 3976 attr, items, actions, external, 3977 flow_idx, error); 3978 actions_n = flow_parse_metadata_split_actions_info(actions, &qrss, 3979 &encap_idx); 3980 if (qrss) { 3981 /* Exclude hairpin flows from splitting. */ 3982 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) { 3983 const struct rte_flow_action_queue *queue; 3984 3985 queue = qrss->conf; 3986 if (mlx5_rxq_get_type(dev, queue->index) == 3987 MLX5_RXQ_TYPE_HAIRPIN) 3988 qrss = NULL; 3989 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) { 3990 const struct rte_flow_action_rss *rss; 3991 3992 rss = qrss->conf; 3993 if (mlx5_rxq_get_type(dev, rss->queue[0]) == 3994 MLX5_RXQ_TYPE_HAIRPIN) 3995 qrss = NULL; 3996 } 3997 } 3998 if (qrss) { 3999 /* Check if it is in meter suffix table. */ 4000 mtr_sfx = attr->group == (attr->transfer ? 4001 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : 4002 MLX5_FLOW_TABLE_LEVEL_SUFFIX); 4003 /* 4004 * Q/RSS action on NIC Rx should be split in order to pass by 4005 * the mreg copy table (RX_CP_TBL) and then it jumps to the 4006 * action table (RX_ACT_TBL) which has the split Q/RSS action. 4007 */ 4008 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 4009 sizeof(struct rte_flow_action_set_tag) + 4010 sizeof(struct rte_flow_action_jump); 4011 ext_actions = rte_zmalloc(__func__, act_size, 0); 4012 if (!ext_actions) 4013 return rte_flow_error_set(error, ENOMEM, 4014 RTE_FLOW_ERROR_TYPE_ACTION, 4015 NULL, "no memory to split " 4016 "metadata flow"); 4017 /* 4018 * If we are the suffix flow of meter, tag already exist. 4019 * Set the tag action to void. 4020 */ 4021 if (mtr_sfx) 4022 ext_actions[qrss - actions].type = 4023 RTE_FLOW_ACTION_TYPE_VOID; 4024 else 4025 ext_actions[qrss - actions].type = 4026 (enum rte_flow_action_type) 4027 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 4028 /* 4029 * Create the new actions list with removed Q/RSS action 4030 * and appended set tag and jump to register copy table 4031 * (RX_CP_TBL). We should preallocate unique tag ID here 4032 * in advance, because it is needed for set tag action. 4033 */ 4034 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions, 4035 qrss, actions_n, error); 4036 if (!mtr_sfx && !qrss_id) { 4037 ret = -rte_errno; 4038 goto exit; 4039 } 4040 } else if (attr->egress && !attr->transfer) { 4041 /* 4042 * All the actions on NIC Tx should have a metadata register 4043 * copy action to copy reg_a from WQE to reg_c[meta] 4044 */ 4045 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 4046 sizeof(struct mlx5_flow_action_copy_mreg); 4047 ext_actions = rte_zmalloc(__func__, act_size, 0); 4048 if (!ext_actions) 4049 return rte_flow_error_set(error, ENOMEM, 4050 RTE_FLOW_ERROR_TYPE_ACTION, 4051 NULL, "no memory to split " 4052 "metadata flow"); 4053 /* Create the action list appended with copy register. */ 4054 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions, 4055 actions_n, error, encap_idx); 4056 if (ret < 0) 4057 goto exit; 4058 } 4059 /* Add the unmodified original or prefix subflow. */ 4060 ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, attr, 4061 items, ext_actions ? ext_actions : 4062 actions, external, flow_idx, error); 4063 if (ret < 0) 4064 goto exit; 4065 MLX5_ASSERT(dev_flow); 4066 if (qrss) { 4067 const struct rte_flow_attr q_attr = { 4068 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 4069 .ingress = 1, 4070 }; 4071 /* Internal PMD action to set register. */ 4072 struct mlx5_rte_flow_item_tag q_tag_spec = { 4073 .data = qrss_id, 4074 .id = 0, 4075 }; 4076 struct rte_flow_item q_items[] = { 4077 { 4078 .type = (enum rte_flow_item_type) 4079 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 4080 .spec = &q_tag_spec, 4081 .last = NULL, 4082 .mask = NULL, 4083 }, 4084 { 4085 .type = RTE_FLOW_ITEM_TYPE_END, 4086 }, 4087 }; 4088 struct rte_flow_action q_actions[] = { 4089 { 4090 .type = qrss->type, 4091 .conf = qrss->conf, 4092 }, 4093 { 4094 .type = RTE_FLOW_ACTION_TYPE_END, 4095 }, 4096 }; 4097 uint64_t layers = flow_get_prefix_layer_flags(dev_flow); 4098 4099 /* 4100 * Configure the tag item only if there is no meter subflow. 4101 * Since tag is already marked in the meter suffix subflow 4102 * we can just use the meter suffix items as is. 4103 */ 4104 if (qrss_id) { 4105 /* Not meter subflow. */ 4106 MLX5_ASSERT(!mtr_sfx); 4107 /* 4108 * Put unique id in prefix flow due to it is destroyed 4109 * after suffix flow and id will be freed after there 4110 * is no actual flows with this id and identifier 4111 * reallocation becomes possible (for example, for 4112 * other flows in other threads). 4113 */ 4114 dev_flow->handle->split_flow_id = qrss_id; 4115 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, 4116 error); 4117 if (ret < 0) 4118 goto exit; 4119 q_tag_spec.id = ret; 4120 } 4121 dev_flow = NULL; 4122 /* Add suffix subflow to execute Q/RSS. */ 4123 ret = flow_create_split_inner(dev, flow, &dev_flow, layers, 4124 &q_attr, mtr_sfx ? items : 4125 q_items, q_actions, 4126 external, flow_idx, error); 4127 if (ret < 0) 4128 goto exit; 4129 /* qrss ID should be freed if failed. */ 4130 qrss_id = 0; 4131 MLX5_ASSERT(dev_flow); 4132 } 4133 4134 exit: 4135 /* 4136 * We do not destroy the partially created sub_flows in case of error. 4137 * These ones are included into parent flow list and will be destroyed 4138 * by flow_drv_destroy. 4139 */ 4140 flow_qrss_free_id(dev, qrss_id); 4141 rte_free(ext_actions); 4142 return ret; 4143 } 4144 4145 /** 4146 * The splitting for meter feature. 4147 * 4148 * - The meter flow will be split to two flows as prefix and 4149 * suffix flow. The packets make sense only it pass the prefix 4150 * meter action. 4151 * 4152 * - Reg_C_5 is used for the packet to match betweend prefix and 4153 * suffix flow. 4154 * 4155 * @param dev 4156 * Pointer to Ethernet device. 4157 * @param[in] flow 4158 * Parent flow structure pointer. 4159 * @param[in] attr 4160 * Flow rule attributes. 4161 * @param[in] items 4162 * Pattern specification (list terminated by the END pattern item). 4163 * @param[in] actions 4164 * Associated actions (list terminated by the END action). 4165 * @param[in] external 4166 * This flow rule is created by request external to PMD. 4167 * @param[in] flow_idx 4168 * This memory pool index to the flow. 4169 * @param[out] error 4170 * Perform verbose error reporting if not NULL. 4171 * @return 4172 * 0 on success, negative value otherwise 4173 */ 4174 static int 4175 flow_create_split_meter(struct rte_eth_dev *dev, 4176 struct rte_flow *flow, 4177 const struct rte_flow_attr *attr, 4178 const struct rte_flow_item items[], 4179 const struct rte_flow_action actions[], 4180 bool external, uint32_t flow_idx, 4181 struct rte_flow_error *error) 4182 { 4183 struct mlx5_priv *priv = dev->data->dev_private; 4184 struct rte_flow_action *sfx_actions = NULL; 4185 struct rte_flow_action *pre_actions = NULL; 4186 struct rte_flow_item *sfx_items = NULL; 4187 struct mlx5_flow *dev_flow = NULL; 4188 struct rte_flow_attr sfx_attr = *attr; 4189 uint32_t mtr = 0; 4190 uint32_t mtr_tag_id = 0; 4191 size_t act_size; 4192 size_t item_size; 4193 int actions_n = 0; 4194 int ret; 4195 4196 if (priv->mtr_en) 4197 actions_n = flow_check_meter_action(actions, &mtr); 4198 if (mtr) { 4199 /* The five prefix actions: meter, decap, encap, tag, end. */ 4200 act_size = sizeof(struct rte_flow_action) * (actions_n + 5) + 4201 sizeof(struct mlx5_rte_flow_action_set_tag); 4202 /* tag, vlan, port id, end. */ 4203 #define METER_SUFFIX_ITEM 4 4204 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM + 4205 sizeof(struct mlx5_rte_flow_item_tag) * 2; 4206 sfx_actions = rte_zmalloc(__func__, (act_size + item_size), 0); 4207 if (!sfx_actions) 4208 return rte_flow_error_set(error, ENOMEM, 4209 RTE_FLOW_ERROR_TYPE_ACTION, 4210 NULL, "no memory to split " 4211 "meter flow"); 4212 sfx_items = (struct rte_flow_item *)((char *)sfx_actions + 4213 act_size); 4214 pre_actions = sfx_actions + actions_n; 4215 mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items, 4216 actions, sfx_actions, 4217 pre_actions); 4218 if (!mtr_tag_id) { 4219 ret = -rte_errno; 4220 goto exit; 4221 } 4222 /* Add the prefix subflow. */ 4223 ret = flow_create_split_inner(dev, flow, &dev_flow, 0, attr, 4224 items, pre_actions, external, 4225 flow_idx, error); 4226 if (ret) { 4227 ret = -rte_errno; 4228 goto exit; 4229 } 4230 dev_flow->handle->split_flow_id = mtr_tag_id; 4231 /* Setting the sfx group atrr. */ 4232 sfx_attr.group = sfx_attr.transfer ? 4233 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : 4234 MLX5_FLOW_TABLE_LEVEL_SUFFIX; 4235 } 4236 /* Add the prefix subflow. */ 4237 ret = flow_create_split_metadata(dev, flow, dev_flow ? 4238 flow_get_prefix_layer_flags(dev_flow) : 4239 0, &sfx_attr, 4240 sfx_items ? sfx_items : items, 4241 sfx_actions ? sfx_actions : actions, 4242 external, flow_idx, error); 4243 exit: 4244 if (sfx_actions) 4245 rte_free(sfx_actions); 4246 return ret; 4247 } 4248 4249 /** 4250 * Split the flow to subflow set. The splitters might be linked 4251 * in the chain, like this: 4252 * flow_create_split_outer() calls: 4253 * flow_create_split_meter() calls: 4254 * flow_create_split_metadata(meter_subflow_0) calls: 4255 * flow_create_split_inner(metadata_subflow_0) 4256 * flow_create_split_inner(metadata_subflow_1) 4257 * flow_create_split_inner(metadata_subflow_2) 4258 * flow_create_split_metadata(meter_subflow_1) calls: 4259 * flow_create_split_inner(metadata_subflow_0) 4260 * flow_create_split_inner(metadata_subflow_1) 4261 * flow_create_split_inner(metadata_subflow_2) 4262 * 4263 * This provide flexible way to add new levels of flow splitting. 4264 * The all of successfully created subflows are included to the 4265 * parent flow dev_flow list. 4266 * 4267 * @param dev 4268 * Pointer to Ethernet device. 4269 * @param[in] flow 4270 * Parent flow structure pointer. 4271 * @param[in] attr 4272 * Flow rule attributes. 4273 * @param[in] items 4274 * Pattern specification (list terminated by the END pattern item). 4275 * @param[in] actions 4276 * Associated actions (list terminated by the END action). 4277 * @param[in] external 4278 * This flow rule is created by request external to PMD. 4279 * @param[in] flow_idx 4280 * This memory pool index to the flow. 4281 * @param[out] error 4282 * Perform verbose error reporting if not NULL. 4283 * @return 4284 * 0 on success, negative value otherwise 4285 */ 4286 static int 4287 flow_create_split_outer(struct rte_eth_dev *dev, 4288 struct rte_flow *flow, 4289 const struct rte_flow_attr *attr, 4290 const struct rte_flow_item items[], 4291 const struct rte_flow_action actions[], 4292 bool external, uint32_t flow_idx, 4293 struct rte_flow_error *error) 4294 { 4295 int ret; 4296 4297 ret = flow_create_split_meter(dev, flow, attr, items, 4298 actions, external, flow_idx, error); 4299 MLX5_ASSERT(ret <= 0); 4300 return ret; 4301 } 4302 4303 /** 4304 * Create a flow and add it to @p list. 4305 * 4306 * @param dev 4307 * Pointer to Ethernet device. 4308 * @param list 4309 * Pointer to a TAILQ flow list. If this parameter NULL, 4310 * no list insertion occurred, flow is just created, 4311 * this is caller's responsibility to track the 4312 * created flow. 4313 * @param[in] attr 4314 * Flow rule attributes. 4315 * @param[in] items 4316 * Pattern specification (list terminated by the END pattern item). 4317 * @param[in] actions 4318 * Associated actions (list terminated by the END action). 4319 * @param[in] external 4320 * This flow rule is created by request external to PMD. 4321 * @param[out] error 4322 * Perform verbose error reporting if not NULL. 4323 * 4324 * @return 4325 * A flow index on success, 0 otherwise and rte_errno is set. 4326 */ 4327 static uint32_t 4328 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 4329 const struct rte_flow_attr *attr, 4330 const struct rte_flow_item items[], 4331 const struct rte_flow_action actions[], 4332 bool external, struct rte_flow_error *error) 4333 { 4334 struct mlx5_priv *priv = dev->data->dev_private; 4335 struct rte_flow *flow = NULL; 4336 struct mlx5_flow *dev_flow; 4337 const struct rte_flow_action_rss *rss; 4338 union { 4339 struct rte_flow_expand_rss buf; 4340 uint8_t buffer[2048]; 4341 } expand_buffer; 4342 union { 4343 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 4344 uint8_t buffer[2048]; 4345 } actions_rx; 4346 union { 4347 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 4348 uint8_t buffer[2048]; 4349 } actions_hairpin_tx; 4350 union { 4351 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS]; 4352 uint8_t buffer[2048]; 4353 } items_tx; 4354 struct rte_flow_expand_rss *buf = &expand_buffer.buf; 4355 struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *) 4356 priv->rss_desc)[!!priv->flow_idx]; 4357 const struct rte_flow_action *p_actions_rx = actions; 4358 uint32_t i; 4359 uint32_t idx = 0; 4360 int hairpin_flow; 4361 uint32_t hairpin_id = 0; 4362 struct rte_flow_attr attr_tx = { .priority = 0 }; 4363 int ret; 4364 4365 hairpin_flow = flow_check_hairpin_split(dev, attr, actions); 4366 ret = flow_drv_validate(dev, attr, items, p_actions_rx, 4367 external, hairpin_flow, error); 4368 if (ret < 0) 4369 return 0; 4370 if (hairpin_flow > 0) { 4371 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) { 4372 rte_errno = EINVAL; 4373 return 0; 4374 } 4375 flow_hairpin_split(dev, actions, actions_rx.actions, 4376 actions_hairpin_tx.actions, items_tx.items, 4377 &hairpin_id); 4378 p_actions_rx = actions_rx.actions; 4379 } 4380 flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx); 4381 if (!flow) { 4382 rte_errno = ENOMEM; 4383 goto error_before_flow; 4384 } 4385 flow->drv_type = flow_get_drv_type(dev, attr); 4386 if (hairpin_id != 0) 4387 flow->hairpin_flow_id = hairpin_id; 4388 MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN && 4389 flow->drv_type < MLX5_FLOW_TYPE_MAX); 4390 memset(rss_desc, 0, sizeof(*rss_desc)); 4391 rss = flow_get_rss_action(p_actions_rx); 4392 if (rss) { 4393 /* 4394 * The following information is required by 4395 * mlx5_flow_hashfields_adjust() in advance. 4396 */ 4397 rss_desc->level = rss->level; 4398 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */ 4399 rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types; 4400 } 4401 flow->dev_handles = 0; 4402 if (rss && rss->types) { 4403 unsigned int graph_root; 4404 4405 graph_root = find_graph_root(items, rss->level); 4406 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer), 4407 items, rss->types, 4408 mlx5_support_expansion, 4409 graph_root); 4410 MLX5_ASSERT(ret > 0 && 4411 (unsigned int)ret < sizeof(expand_buffer.buffer)); 4412 } else { 4413 buf->entries = 1; 4414 buf->entry[0].pattern = (void *)(uintptr_t)items; 4415 } 4416 /* 4417 * Record the start index when there is a nested call. All sub-flows 4418 * need to be translated before another calling. 4419 * No need to use ping-pong buffer to save memory here. 4420 */ 4421 if (priv->flow_idx) { 4422 MLX5_ASSERT(!priv->flow_nested_idx); 4423 priv->flow_nested_idx = priv->flow_idx; 4424 } 4425 for (i = 0; i < buf->entries; ++i) { 4426 /* 4427 * The splitter may create multiple dev_flows, 4428 * depending on configuration. In the simplest 4429 * case it just creates unmodified original flow. 4430 */ 4431 ret = flow_create_split_outer(dev, flow, attr, 4432 buf->entry[i].pattern, 4433 p_actions_rx, external, idx, 4434 error); 4435 if (ret < 0) 4436 goto error; 4437 } 4438 /* Create the tx flow. */ 4439 if (hairpin_flow) { 4440 attr_tx.group = MLX5_HAIRPIN_TX_TABLE; 4441 attr_tx.ingress = 0; 4442 attr_tx.egress = 1; 4443 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items, 4444 actions_hairpin_tx.actions, 4445 idx, error); 4446 if (!dev_flow) 4447 goto error; 4448 dev_flow->flow = flow; 4449 dev_flow->external = 0; 4450 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 4451 dev_flow->handle, next); 4452 ret = flow_drv_translate(dev, dev_flow, &attr_tx, 4453 items_tx.items, 4454 actions_hairpin_tx.actions, error); 4455 if (ret < 0) 4456 goto error; 4457 } 4458 /* 4459 * Update the metadata register copy table. If extensive 4460 * metadata feature is enabled and registers are supported 4461 * we might create the extra rte_flow for each unique 4462 * MARK/FLAG action ID. 4463 * 4464 * The table is updated for ingress Flows only, because 4465 * the egress Flows belong to the different device and 4466 * copy table should be updated in peer NIC Rx domain. 4467 */ 4468 if (attr->ingress && 4469 (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) { 4470 ret = flow_mreg_update_copy_table(dev, flow, actions, error); 4471 if (ret) 4472 goto error; 4473 } 4474 /* 4475 * If the flow is external (from application) OR device is started, then 4476 * the flow will be applied immediately. 4477 */ 4478 if (external || dev->data->dev_started) { 4479 ret = flow_drv_apply(dev, flow, error); 4480 if (ret < 0) 4481 goto error; 4482 } 4483 if (list) 4484 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx, 4485 flow, next); 4486 flow_rxq_flags_set(dev, flow); 4487 /* Nested flow creation index recovery. */ 4488 priv->flow_idx = priv->flow_nested_idx; 4489 if (priv->flow_nested_idx) 4490 priv->flow_nested_idx = 0; 4491 return idx; 4492 error: 4493 MLX5_ASSERT(flow); 4494 ret = rte_errno; /* Save rte_errno before cleanup. */ 4495 flow_mreg_del_copy_action(dev, flow); 4496 flow_drv_destroy(dev, flow); 4497 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx); 4498 rte_errno = ret; /* Restore rte_errno. */ 4499 error_before_flow: 4500 ret = rte_errno; 4501 if (hairpin_id) 4502 mlx5_flow_id_release(priv->sh->flow_id_pool, 4503 hairpin_id); 4504 rte_errno = ret; 4505 priv->flow_idx = priv->flow_nested_idx; 4506 if (priv->flow_nested_idx) 4507 priv->flow_nested_idx = 0; 4508 return 0; 4509 } 4510 4511 /** 4512 * Create a dedicated flow rule on e-switch table 0 (root table), to direct all 4513 * incoming packets to table 1. 4514 * 4515 * Other flow rules, requested for group n, will be created in 4516 * e-switch table n+1. 4517 * Jump action to e-switch group n will be created to group n+1. 4518 * 4519 * Used when working in switchdev mode, to utilise advantages of table 1 4520 * and above. 4521 * 4522 * @param dev 4523 * Pointer to Ethernet device. 4524 * 4525 * @return 4526 * Pointer to flow on success, NULL otherwise and rte_errno is set. 4527 */ 4528 struct rte_flow * 4529 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev) 4530 { 4531 const struct rte_flow_attr attr = { 4532 .group = 0, 4533 .priority = 0, 4534 .ingress = 1, 4535 .egress = 0, 4536 .transfer = 1, 4537 }; 4538 const struct rte_flow_item pattern = { 4539 .type = RTE_FLOW_ITEM_TYPE_END, 4540 }; 4541 struct rte_flow_action_jump jump = { 4542 .group = 1, 4543 }; 4544 const struct rte_flow_action actions[] = { 4545 { 4546 .type = RTE_FLOW_ACTION_TYPE_JUMP, 4547 .conf = &jump, 4548 }, 4549 { 4550 .type = RTE_FLOW_ACTION_TYPE_END, 4551 }, 4552 }; 4553 struct mlx5_priv *priv = dev->data->dev_private; 4554 struct rte_flow_error error; 4555 4556 return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows, 4557 &attr, &pattern, 4558 actions, false, &error); 4559 } 4560 4561 /** 4562 * Validate a flow supported by the NIC. 4563 * 4564 * @see rte_flow_validate() 4565 * @see rte_flow_ops 4566 */ 4567 int 4568 mlx5_flow_validate(struct rte_eth_dev *dev, 4569 const struct rte_flow_attr *attr, 4570 const struct rte_flow_item items[], 4571 const struct rte_flow_action actions[], 4572 struct rte_flow_error *error) 4573 { 4574 int hairpin_flow; 4575 4576 hairpin_flow = flow_check_hairpin_split(dev, attr, actions); 4577 return flow_drv_validate(dev, attr, items, actions, 4578 true, hairpin_flow, error); 4579 } 4580 4581 /** 4582 * Create a flow. 4583 * 4584 * @see rte_flow_create() 4585 * @see rte_flow_ops 4586 */ 4587 struct rte_flow * 4588 mlx5_flow_create(struct rte_eth_dev *dev, 4589 const struct rte_flow_attr *attr, 4590 const struct rte_flow_item items[], 4591 const struct rte_flow_action actions[], 4592 struct rte_flow_error *error) 4593 { 4594 struct mlx5_priv *priv = dev->data->dev_private; 4595 4596 /* 4597 * If the device is not started yet, it is not allowed to created a 4598 * flow from application. PMD default flows and traffic control flows 4599 * are not affected. 4600 */ 4601 if (unlikely(!dev->data->dev_started)) { 4602 DRV_LOG(DEBUG, "port %u is not started when " 4603 "inserting a flow", dev->data->port_id); 4604 rte_flow_error_set(error, ENODEV, 4605 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 4606 NULL, 4607 "port not started"); 4608 return NULL; 4609 } 4610 return (void *)(uintptr_t)flow_list_create(dev, &priv->flows, 4611 attr, items, actions, true, error); 4612 } 4613 4614 /** 4615 * Destroy a flow in a list. 4616 * 4617 * @param dev 4618 * Pointer to Ethernet device. 4619 * @param list 4620 * Pointer to the Indexed flow list. If this parameter NULL, 4621 * there is no flow removal from the list. Be noted that as 4622 * flow is add to the indexed list, memory of the indexed 4623 * list points to maybe changed as flow destroyed. 4624 * @param[in] flow_idx 4625 * Index of flow to destroy. 4626 */ 4627 static void 4628 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 4629 uint32_t flow_idx) 4630 { 4631 struct mlx5_priv *priv = dev->data->dev_private; 4632 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 4633 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 4634 [MLX5_IPOOL_RTE_FLOW], flow_idx); 4635 4636 if (!flow) 4637 return; 4638 /* 4639 * Update RX queue flags only if port is started, otherwise it is 4640 * already clean. 4641 */ 4642 if (dev->data->dev_started) 4643 flow_rxq_flags_trim(dev, flow); 4644 if (flow->hairpin_flow_id) 4645 mlx5_flow_id_release(priv->sh->flow_id_pool, 4646 flow->hairpin_flow_id); 4647 flow_drv_destroy(dev, flow); 4648 if (list) 4649 ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, 4650 flow_idx, flow, next); 4651 flow_mreg_del_copy_action(dev, flow); 4652 if (flow->fdir) { 4653 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 4654 if (priv_fdir_flow->rix_flow == flow_idx) 4655 break; 4656 } 4657 if (priv_fdir_flow) { 4658 LIST_REMOVE(priv_fdir_flow, next); 4659 rte_free(priv_fdir_flow->fdir); 4660 rte_free(priv_fdir_flow); 4661 } 4662 } 4663 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 4664 } 4665 4666 /** 4667 * Destroy all flows. 4668 * 4669 * @param dev 4670 * Pointer to Ethernet device. 4671 * @param list 4672 * Pointer to the Indexed flow list. 4673 * @param active 4674 * If flushing is called avtively. 4675 */ 4676 void 4677 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active) 4678 { 4679 uint32_t num_flushed = 0; 4680 4681 while (*list) { 4682 flow_list_destroy(dev, list, *list); 4683 num_flushed++; 4684 } 4685 if (active) { 4686 DRV_LOG(INFO, "port %u: %u flows flushed before stopping", 4687 dev->data->port_id, num_flushed); 4688 } 4689 } 4690 4691 /** 4692 * Remove all flows. 4693 * 4694 * @param dev 4695 * Pointer to Ethernet device. 4696 * @param list 4697 * Pointer to the Indexed flow list. 4698 */ 4699 void 4700 mlx5_flow_stop(struct rte_eth_dev *dev, uint32_t *list) 4701 { 4702 struct mlx5_priv *priv = dev->data->dev_private; 4703 struct rte_flow *flow = NULL; 4704 uint32_t idx; 4705 4706 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx, 4707 flow, next) { 4708 flow_drv_remove(dev, flow); 4709 flow_mreg_stop_copy_action(dev, flow); 4710 } 4711 flow_mreg_del_default_copy_action(dev); 4712 flow_rxq_flags_clear(dev); 4713 } 4714 4715 /** 4716 * Add all flows. 4717 * 4718 * @param dev 4719 * Pointer to Ethernet device. 4720 * @param list 4721 * Pointer to the Indexed flow list. 4722 * 4723 * @return 4724 * 0 on success, a negative errno value otherwise and rte_errno is set. 4725 */ 4726 int 4727 mlx5_flow_start(struct rte_eth_dev *dev, uint32_t *list) 4728 { 4729 struct mlx5_priv *priv = dev->data->dev_private; 4730 struct rte_flow *flow = NULL; 4731 struct rte_flow_error error; 4732 uint32_t idx; 4733 int ret = 0; 4734 4735 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 4736 ret = flow_mreg_add_default_copy_action(dev, &error); 4737 if (ret < 0) 4738 return -rte_errno; 4739 /* Apply Flows created by application. */ 4740 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx, 4741 flow, next) { 4742 ret = flow_mreg_start_copy_action(dev, flow); 4743 if (ret < 0) 4744 goto error; 4745 ret = flow_drv_apply(dev, flow, &error); 4746 if (ret < 0) 4747 goto error; 4748 flow_rxq_flags_set(dev, flow); 4749 } 4750 return 0; 4751 error: 4752 ret = rte_errno; /* Save rte_errno before cleanup. */ 4753 mlx5_flow_stop(dev, list); 4754 rte_errno = ret; /* Restore rte_errno. */ 4755 return -rte_errno; 4756 } 4757 4758 /** 4759 * Stop all default actions for flows. 4760 * 4761 * @param dev 4762 * Pointer to Ethernet device. 4763 */ 4764 void 4765 mlx5_flow_stop_default(struct rte_eth_dev *dev) 4766 { 4767 flow_mreg_del_default_copy_action(dev); 4768 flow_rxq_flags_clear(dev); 4769 } 4770 4771 /** 4772 * Start all default actions for flows. 4773 * 4774 * @param dev 4775 * Pointer to Ethernet device. 4776 * @return 4777 * 0 on success, a negative errno value otherwise and rte_errno is set. 4778 */ 4779 int 4780 mlx5_flow_start_default(struct rte_eth_dev *dev) 4781 { 4782 struct rte_flow_error error; 4783 4784 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 4785 return flow_mreg_add_default_copy_action(dev, &error); 4786 } 4787 4788 /** 4789 * Allocate intermediate resources for flow creation. 4790 * 4791 * @param dev 4792 * Pointer to Ethernet device. 4793 */ 4794 void 4795 mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev) 4796 { 4797 struct mlx5_priv *priv = dev->data->dev_private; 4798 4799 if (!priv->inter_flows) { 4800 priv->inter_flows = rte_calloc(__func__, 1, 4801 MLX5_NUM_MAX_DEV_FLOWS * 4802 sizeof(struct mlx5_flow) + 4803 (sizeof(struct mlx5_flow_rss_desc) + 4804 sizeof(uint16_t) * UINT16_MAX) * 2, 0); 4805 if (!priv->inter_flows) { 4806 DRV_LOG(ERR, "can't allocate intermediate memory."); 4807 return; 4808 } 4809 } 4810 priv->rss_desc = &((struct mlx5_flow *)priv->inter_flows) 4811 [MLX5_NUM_MAX_DEV_FLOWS]; 4812 /* Reset the index. */ 4813 priv->flow_idx = 0; 4814 priv->flow_nested_idx = 0; 4815 } 4816 4817 /** 4818 * Free intermediate resources for flows. 4819 * 4820 * @param dev 4821 * Pointer to Ethernet device. 4822 */ 4823 void 4824 mlx5_flow_free_intermediate(struct rte_eth_dev *dev) 4825 { 4826 struct mlx5_priv *priv = dev->data->dev_private; 4827 4828 rte_free(priv->inter_flows); 4829 priv->inter_flows = NULL; 4830 } 4831 4832 /** 4833 * Verify the flow list is empty 4834 * 4835 * @param dev 4836 * Pointer to Ethernet device. 4837 * 4838 * @return the number of flows not released. 4839 */ 4840 int 4841 mlx5_flow_verify(struct rte_eth_dev *dev) 4842 { 4843 struct mlx5_priv *priv = dev->data->dev_private; 4844 struct rte_flow *flow; 4845 uint32_t idx; 4846 int ret = 0; 4847 4848 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx, 4849 flow, next) { 4850 DRV_LOG(DEBUG, "port %u flow %p still referenced", 4851 dev->data->port_id, (void *)flow); 4852 ++ret; 4853 } 4854 return ret; 4855 } 4856 4857 /** 4858 * Enable default hairpin egress flow. 4859 * 4860 * @param dev 4861 * Pointer to Ethernet device. 4862 * @param queue 4863 * The queue index. 4864 * 4865 * @return 4866 * 0 on success, a negative errno value otherwise and rte_errno is set. 4867 */ 4868 int 4869 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev, 4870 uint32_t queue) 4871 { 4872 struct mlx5_priv *priv = dev->data->dev_private; 4873 const struct rte_flow_attr attr = { 4874 .egress = 1, 4875 .priority = 0, 4876 }; 4877 struct mlx5_rte_flow_item_tx_queue queue_spec = { 4878 .queue = queue, 4879 }; 4880 struct mlx5_rte_flow_item_tx_queue queue_mask = { 4881 .queue = UINT32_MAX, 4882 }; 4883 struct rte_flow_item items[] = { 4884 { 4885 .type = (enum rte_flow_item_type) 4886 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE, 4887 .spec = &queue_spec, 4888 .last = NULL, 4889 .mask = &queue_mask, 4890 }, 4891 { 4892 .type = RTE_FLOW_ITEM_TYPE_END, 4893 }, 4894 }; 4895 struct rte_flow_action_jump jump = { 4896 .group = MLX5_HAIRPIN_TX_TABLE, 4897 }; 4898 struct rte_flow_action actions[2]; 4899 uint32_t flow_idx; 4900 struct rte_flow_error error; 4901 4902 actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP; 4903 actions[0].conf = &jump; 4904 actions[1].type = RTE_FLOW_ACTION_TYPE_END; 4905 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 4906 &attr, items, actions, false, &error); 4907 if (!flow_idx) { 4908 DRV_LOG(DEBUG, 4909 "Failed to create ctrl flow: rte_errno(%d)," 4910 " type(%d), message(%s)", 4911 rte_errno, error.type, 4912 error.message ? error.message : " (no stated reason)"); 4913 return -rte_errno; 4914 } 4915 return 0; 4916 } 4917 4918 /** 4919 * Enable a control flow configured from the control plane. 4920 * 4921 * @param dev 4922 * Pointer to Ethernet device. 4923 * @param eth_spec 4924 * An Ethernet flow spec to apply. 4925 * @param eth_mask 4926 * An Ethernet flow mask to apply. 4927 * @param vlan_spec 4928 * A VLAN flow spec to apply. 4929 * @param vlan_mask 4930 * A VLAN flow mask to apply. 4931 * 4932 * @return 4933 * 0 on success, a negative errno value otherwise and rte_errno is set. 4934 */ 4935 int 4936 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, 4937 struct rte_flow_item_eth *eth_spec, 4938 struct rte_flow_item_eth *eth_mask, 4939 struct rte_flow_item_vlan *vlan_spec, 4940 struct rte_flow_item_vlan *vlan_mask) 4941 { 4942 struct mlx5_priv *priv = dev->data->dev_private; 4943 const struct rte_flow_attr attr = { 4944 .ingress = 1, 4945 .priority = MLX5_FLOW_PRIO_RSVD, 4946 }; 4947 struct rte_flow_item items[] = { 4948 { 4949 .type = RTE_FLOW_ITEM_TYPE_ETH, 4950 .spec = eth_spec, 4951 .last = NULL, 4952 .mask = eth_mask, 4953 }, 4954 { 4955 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN : 4956 RTE_FLOW_ITEM_TYPE_END, 4957 .spec = vlan_spec, 4958 .last = NULL, 4959 .mask = vlan_mask, 4960 }, 4961 { 4962 .type = RTE_FLOW_ITEM_TYPE_END, 4963 }, 4964 }; 4965 uint16_t queue[priv->reta_idx_n]; 4966 struct rte_flow_action_rss action_rss = { 4967 .func = RTE_ETH_HASH_FUNCTION_DEFAULT, 4968 .level = 0, 4969 .types = priv->rss_conf.rss_hf, 4970 .key_len = priv->rss_conf.rss_key_len, 4971 .queue_num = priv->reta_idx_n, 4972 .key = priv->rss_conf.rss_key, 4973 .queue = queue, 4974 }; 4975 struct rte_flow_action actions[] = { 4976 { 4977 .type = RTE_FLOW_ACTION_TYPE_RSS, 4978 .conf = &action_rss, 4979 }, 4980 { 4981 .type = RTE_FLOW_ACTION_TYPE_END, 4982 }, 4983 }; 4984 uint32_t flow_idx; 4985 struct rte_flow_error error; 4986 unsigned int i; 4987 4988 if (!priv->reta_idx_n || !priv->rxqs_n) { 4989 return 0; 4990 } 4991 if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)) 4992 action_rss.types = 0; 4993 for (i = 0; i != priv->reta_idx_n; ++i) 4994 queue[i] = (*priv->reta_idx)[i]; 4995 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 4996 &attr, items, actions, false, &error); 4997 if (!flow_idx) 4998 return -rte_errno; 4999 return 0; 5000 } 5001 5002 /** 5003 * Enable a flow control configured from the control plane. 5004 * 5005 * @param dev 5006 * Pointer to Ethernet device. 5007 * @param eth_spec 5008 * An Ethernet flow spec to apply. 5009 * @param eth_mask 5010 * An Ethernet flow mask to apply. 5011 * 5012 * @return 5013 * 0 on success, a negative errno value otherwise and rte_errno is set. 5014 */ 5015 int 5016 mlx5_ctrl_flow(struct rte_eth_dev *dev, 5017 struct rte_flow_item_eth *eth_spec, 5018 struct rte_flow_item_eth *eth_mask) 5019 { 5020 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL); 5021 } 5022 5023 /** 5024 * Create default miss flow rule matching lacp traffic 5025 * 5026 * @param dev 5027 * Pointer to Ethernet device. 5028 * @param eth_spec 5029 * An Ethernet flow spec to apply. 5030 * 5031 * @return 5032 * 0 on success, a negative errno value otherwise and rte_errno is set. 5033 */ 5034 int 5035 mlx5_flow_lacp_miss(struct rte_eth_dev *dev) 5036 { 5037 struct mlx5_priv *priv = dev->data->dev_private; 5038 /* 5039 * The LACP matching is done by only using ether type since using 5040 * a multicast dst mac causes kernel to give low priority to this flow. 5041 */ 5042 static const struct rte_flow_item_eth lacp_spec = { 5043 .type = RTE_BE16(0x8809), 5044 }; 5045 static const struct rte_flow_item_eth lacp_mask = { 5046 .type = 0xffff, 5047 }; 5048 const struct rte_flow_attr attr = { 5049 .ingress = 1, 5050 }; 5051 struct rte_flow_item items[] = { 5052 { 5053 .type = RTE_FLOW_ITEM_TYPE_ETH, 5054 .spec = &lacp_spec, 5055 .mask = &lacp_mask, 5056 }, 5057 { 5058 .type = RTE_FLOW_ITEM_TYPE_END, 5059 }, 5060 }; 5061 struct rte_flow_action actions[] = { 5062 { 5063 .type = (enum rte_flow_action_type) 5064 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS, 5065 }, 5066 { 5067 .type = RTE_FLOW_ACTION_TYPE_END, 5068 }, 5069 }; 5070 struct rte_flow_error error; 5071 uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5072 &attr, items, actions, false, &error); 5073 5074 if (!flow_idx) 5075 return -rte_errno; 5076 return 0; 5077 } 5078 5079 /** 5080 * Destroy a flow. 5081 * 5082 * @see rte_flow_destroy() 5083 * @see rte_flow_ops 5084 */ 5085 int 5086 mlx5_flow_destroy(struct rte_eth_dev *dev, 5087 struct rte_flow *flow, 5088 struct rte_flow_error *error __rte_unused) 5089 { 5090 struct mlx5_priv *priv = dev->data->dev_private; 5091 5092 flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow); 5093 return 0; 5094 } 5095 5096 /** 5097 * Destroy all flows. 5098 * 5099 * @see rte_flow_flush() 5100 * @see rte_flow_ops 5101 */ 5102 int 5103 mlx5_flow_flush(struct rte_eth_dev *dev, 5104 struct rte_flow_error *error __rte_unused) 5105 { 5106 struct mlx5_priv *priv = dev->data->dev_private; 5107 5108 mlx5_flow_list_flush(dev, &priv->flows, false); 5109 return 0; 5110 } 5111 5112 /** 5113 * Isolated mode. 5114 * 5115 * @see rte_flow_isolate() 5116 * @see rte_flow_ops 5117 */ 5118 int 5119 mlx5_flow_isolate(struct rte_eth_dev *dev, 5120 int enable, 5121 struct rte_flow_error *error) 5122 { 5123 struct mlx5_priv *priv = dev->data->dev_private; 5124 5125 if (dev->data->dev_started) { 5126 rte_flow_error_set(error, EBUSY, 5127 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5128 NULL, 5129 "port must be stopped first"); 5130 return -rte_errno; 5131 } 5132 priv->isolated = !!enable; 5133 if (enable) 5134 dev->dev_ops = &mlx5_os_dev_ops_isolate; 5135 else 5136 dev->dev_ops = &mlx5_os_dev_ops; 5137 return 0; 5138 } 5139 5140 /** 5141 * Query a flow. 5142 * 5143 * @see rte_flow_query() 5144 * @see rte_flow_ops 5145 */ 5146 static int 5147 flow_drv_query(struct rte_eth_dev *dev, 5148 uint32_t flow_idx, 5149 const struct rte_flow_action *actions, 5150 void *data, 5151 struct rte_flow_error *error) 5152 { 5153 struct mlx5_priv *priv = dev->data->dev_private; 5154 const struct mlx5_flow_driver_ops *fops; 5155 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 5156 [MLX5_IPOOL_RTE_FLOW], 5157 flow_idx); 5158 enum mlx5_flow_drv_type ftype; 5159 5160 if (!flow) { 5161 return rte_flow_error_set(error, ENOENT, 5162 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5163 NULL, 5164 "invalid flow handle"); 5165 } 5166 ftype = flow->drv_type; 5167 MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX); 5168 fops = flow_get_drv_ops(ftype); 5169 5170 return fops->query(dev, flow, actions, data, error); 5171 } 5172 5173 /** 5174 * Query a flow. 5175 * 5176 * @see rte_flow_query() 5177 * @see rte_flow_ops 5178 */ 5179 int 5180 mlx5_flow_query(struct rte_eth_dev *dev, 5181 struct rte_flow *flow, 5182 const struct rte_flow_action *actions, 5183 void *data, 5184 struct rte_flow_error *error) 5185 { 5186 int ret; 5187 5188 ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data, 5189 error); 5190 if (ret < 0) 5191 return ret; 5192 return 0; 5193 } 5194 5195 /** 5196 * Convert a flow director filter to a generic flow. 5197 * 5198 * @param dev 5199 * Pointer to Ethernet device. 5200 * @param fdir_filter 5201 * Flow director filter to add. 5202 * @param attributes 5203 * Generic flow parameters structure. 5204 * 5205 * @return 5206 * 0 on success, a negative errno value otherwise and rte_errno is set. 5207 */ 5208 static int 5209 flow_fdir_filter_convert(struct rte_eth_dev *dev, 5210 const struct rte_eth_fdir_filter *fdir_filter, 5211 struct mlx5_fdir *attributes) 5212 { 5213 struct mlx5_priv *priv = dev->data->dev_private; 5214 const struct rte_eth_fdir_input *input = &fdir_filter->input; 5215 const struct rte_eth_fdir_masks *mask = 5216 &dev->data->dev_conf.fdir_conf.mask; 5217 5218 /* Validate queue number. */ 5219 if (fdir_filter->action.rx_queue >= priv->rxqs_n) { 5220 DRV_LOG(ERR, "port %u invalid queue number %d", 5221 dev->data->port_id, fdir_filter->action.rx_queue); 5222 rte_errno = EINVAL; 5223 return -rte_errno; 5224 } 5225 attributes->attr.ingress = 1; 5226 attributes->items[0] = (struct rte_flow_item) { 5227 .type = RTE_FLOW_ITEM_TYPE_ETH, 5228 .spec = &attributes->l2, 5229 .mask = &attributes->l2_mask, 5230 }; 5231 switch (fdir_filter->action.behavior) { 5232 case RTE_ETH_FDIR_ACCEPT: 5233 attributes->actions[0] = (struct rte_flow_action){ 5234 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 5235 .conf = &attributes->queue, 5236 }; 5237 break; 5238 case RTE_ETH_FDIR_REJECT: 5239 attributes->actions[0] = (struct rte_flow_action){ 5240 .type = RTE_FLOW_ACTION_TYPE_DROP, 5241 }; 5242 break; 5243 default: 5244 DRV_LOG(ERR, "port %u invalid behavior %d", 5245 dev->data->port_id, 5246 fdir_filter->action.behavior); 5247 rte_errno = ENOTSUP; 5248 return -rte_errno; 5249 } 5250 attributes->queue.index = fdir_filter->action.rx_queue; 5251 /* Handle L3. */ 5252 switch (fdir_filter->input.flow_type) { 5253 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 5254 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 5255 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 5256 attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){ 5257 .src_addr = input->flow.ip4_flow.src_ip, 5258 .dst_addr = input->flow.ip4_flow.dst_ip, 5259 .time_to_live = input->flow.ip4_flow.ttl, 5260 .type_of_service = input->flow.ip4_flow.tos, 5261 }; 5262 attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){ 5263 .src_addr = mask->ipv4_mask.src_ip, 5264 .dst_addr = mask->ipv4_mask.dst_ip, 5265 .time_to_live = mask->ipv4_mask.ttl, 5266 .type_of_service = mask->ipv4_mask.tos, 5267 .next_proto_id = mask->ipv4_mask.proto, 5268 }; 5269 attributes->items[1] = (struct rte_flow_item){ 5270 .type = RTE_FLOW_ITEM_TYPE_IPV4, 5271 .spec = &attributes->l3, 5272 .mask = &attributes->l3_mask, 5273 }; 5274 break; 5275 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 5276 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 5277 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 5278 attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){ 5279 .hop_limits = input->flow.ipv6_flow.hop_limits, 5280 .proto = input->flow.ipv6_flow.proto, 5281 }; 5282 5283 memcpy(attributes->l3.ipv6.hdr.src_addr, 5284 input->flow.ipv6_flow.src_ip, 5285 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 5286 memcpy(attributes->l3.ipv6.hdr.dst_addr, 5287 input->flow.ipv6_flow.dst_ip, 5288 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 5289 memcpy(attributes->l3_mask.ipv6.hdr.src_addr, 5290 mask->ipv6_mask.src_ip, 5291 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 5292 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr, 5293 mask->ipv6_mask.dst_ip, 5294 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 5295 attributes->items[1] = (struct rte_flow_item){ 5296 .type = RTE_FLOW_ITEM_TYPE_IPV6, 5297 .spec = &attributes->l3, 5298 .mask = &attributes->l3_mask, 5299 }; 5300 break; 5301 default: 5302 DRV_LOG(ERR, "port %u invalid flow type%d", 5303 dev->data->port_id, fdir_filter->input.flow_type); 5304 rte_errno = ENOTSUP; 5305 return -rte_errno; 5306 } 5307 /* Handle L4. */ 5308 switch (fdir_filter->input.flow_type) { 5309 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 5310 attributes->l4.udp.hdr = (struct rte_udp_hdr){ 5311 .src_port = input->flow.udp4_flow.src_port, 5312 .dst_port = input->flow.udp4_flow.dst_port, 5313 }; 5314 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){ 5315 .src_port = mask->src_port_mask, 5316 .dst_port = mask->dst_port_mask, 5317 }; 5318 attributes->items[2] = (struct rte_flow_item){ 5319 .type = RTE_FLOW_ITEM_TYPE_UDP, 5320 .spec = &attributes->l4, 5321 .mask = &attributes->l4_mask, 5322 }; 5323 break; 5324 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 5325 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){ 5326 .src_port = input->flow.tcp4_flow.src_port, 5327 .dst_port = input->flow.tcp4_flow.dst_port, 5328 }; 5329 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){ 5330 .src_port = mask->src_port_mask, 5331 .dst_port = mask->dst_port_mask, 5332 }; 5333 attributes->items[2] = (struct rte_flow_item){ 5334 .type = RTE_FLOW_ITEM_TYPE_TCP, 5335 .spec = &attributes->l4, 5336 .mask = &attributes->l4_mask, 5337 }; 5338 break; 5339 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 5340 attributes->l4.udp.hdr = (struct rte_udp_hdr){ 5341 .src_port = input->flow.udp6_flow.src_port, 5342 .dst_port = input->flow.udp6_flow.dst_port, 5343 }; 5344 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){ 5345 .src_port = mask->src_port_mask, 5346 .dst_port = mask->dst_port_mask, 5347 }; 5348 attributes->items[2] = (struct rte_flow_item){ 5349 .type = RTE_FLOW_ITEM_TYPE_UDP, 5350 .spec = &attributes->l4, 5351 .mask = &attributes->l4_mask, 5352 }; 5353 break; 5354 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 5355 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){ 5356 .src_port = input->flow.tcp6_flow.src_port, 5357 .dst_port = input->flow.tcp6_flow.dst_port, 5358 }; 5359 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){ 5360 .src_port = mask->src_port_mask, 5361 .dst_port = mask->dst_port_mask, 5362 }; 5363 attributes->items[2] = (struct rte_flow_item){ 5364 .type = RTE_FLOW_ITEM_TYPE_TCP, 5365 .spec = &attributes->l4, 5366 .mask = &attributes->l4_mask, 5367 }; 5368 break; 5369 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 5370 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 5371 break; 5372 default: 5373 DRV_LOG(ERR, "port %u invalid flow type%d", 5374 dev->data->port_id, fdir_filter->input.flow_type); 5375 rte_errno = ENOTSUP; 5376 return -rte_errno; 5377 } 5378 return 0; 5379 } 5380 5381 #define FLOW_FDIR_CMP(f1, f2, fld) \ 5382 memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld)) 5383 5384 /** 5385 * Compare two FDIR flows. If items and actions are identical, the two flows are 5386 * regarded as same. 5387 * 5388 * @param dev 5389 * Pointer to Ethernet device. 5390 * @param f1 5391 * FDIR flow to compare. 5392 * @param f2 5393 * FDIR flow to compare. 5394 * 5395 * @return 5396 * Zero on match, 1 otherwise. 5397 */ 5398 static int 5399 flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2) 5400 { 5401 if (FLOW_FDIR_CMP(f1, f2, attr) || 5402 FLOW_FDIR_CMP(f1, f2, l2) || 5403 FLOW_FDIR_CMP(f1, f2, l2_mask) || 5404 FLOW_FDIR_CMP(f1, f2, l3) || 5405 FLOW_FDIR_CMP(f1, f2, l3_mask) || 5406 FLOW_FDIR_CMP(f1, f2, l4) || 5407 FLOW_FDIR_CMP(f1, f2, l4_mask) || 5408 FLOW_FDIR_CMP(f1, f2, actions[0].type)) 5409 return 1; 5410 if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE && 5411 FLOW_FDIR_CMP(f1, f2, queue)) 5412 return 1; 5413 return 0; 5414 } 5415 5416 /** 5417 * Search device flow list to find out a matched FDIR flow. 5418 * 5419 * @param dev 5420 * Pointer to Ethernet device. 5421 * @param fdir_flow 5422 * FDIR flow to lookup. 5423 * 5424 * @return 5425 * Index of flow if found, 0 otherwise. 5426 */ 5427 static uint32_t 5428 flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow) 5429 { 5430 struct mlx5_priv *priv = dev->data->dev_private; 5431 uint32_t flow_idx = 0; 5432 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5433 5434 MLX5_ASSERT(fdir_flow); 5435 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 5436 if (!flow_fdir_cmp(priv_fdir_flow->fdir, fdir_flow)) { 5437 DRV_LOG(DEBUG, "port %u found FDIR flow %u", 5438 dev->data->port_id, flow_idx); 5439 flow_idx = priv_fdir_flow->rix_flow; 5440 break; 5441 } 5442 } 5443 return flow_idx; 5444 } 5445 5446 /** 5447 * Add new flow director filter and store it in list. 5448 * 5449 * @param dev 5450 * Pointer to Ethernet device. 5451 * @param fdir_filter 5452 * Flow director filter to add. 5453 * 5454 * @return 5455 * 0 on success, a negative errno value otherwise and rte_errno is set. 5456 */ 5457 static int 5458 flow_fdir_filter_add(struct rte_eth_dev *dev, 5459 const struct rte_eth_fdir_filter *fdir_filter) 5460 { 5461 struct mlx5_priv *priv = dev->data->dev_private; 5462 struct mlx5_fdir *fdir_flow; 5463 struct rte_flow *flow; 5464 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5465 uint32_t flow_idx; 5466 int ret; 5467 5468 fdir_flow = rte_zmalloc(__func__, sizeof(*fdir_flow), 0); 5469 if (!fdir_flow) { 5470 rte_errno = ENOMEM; 5471 return -rte_errno; 5472 } 5473 ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow); 5474 if (ret) 5475 goto error; 5476 flow_idx = flow_fdir_filter_lookup(dev, fdir_flow); 5477 if (flow_idx) { 5478 rte_errno = EEXIST; 5479 goto error; 5480 } 5481 priv_fdir_flow = rte_zmalloc(__func__, sizeof(struct mlx5_fdir_flow), 5482 0); 5483 if (!priv_fdir_flow) { 5484 rte_errno = ENOMEM; 5485 goto error; 5486 } 5487 flow_idx = flow_list_create(dev, &priv->flows, &fdir_flow->attr, 5488 fdir_flow->items, fdir_flow->actions, true, 5489 NULL); 5490 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 5491 if (!flow) 5492 goto error; 5493 flow->fdir = 1; 5494 priv_fdir_flow->fdir = fdir_flow; 5495 priv_fdir_flow->rix_flow = flow_idx; 5496 LIST_INSERT_HEAD(&priv->fdir_flows, priv_fdir_flow, next); 5497 DRV_LOG(DEBUG, "port %u created FDIR flow %p", 5498 dev->data->port_id, (void *)flow); 5499 return 0; 5500 error: 5501 rte_free(priv_fdir_flow); 5502 rte_free(fdir_flow); 5503 return -rte_errno; 5504 } 5505 5506 /** 5507 * Delete specific filter. 5508 * 5509 * @param dev 5510 * Pointer to Ethernet device. 5511 * @param fdir_filter 5512 * Filter to be deleted. 5513 * 5514 * @return 5515 * 0 on success, a negative errno value otherwise and rte_errno is set. 5516 */ 5517 static int 5518 flow_fdir_filter_delete(struct rte_eth_dev *dev, 5519 const struct rte_eth_fdir_filter *fdir_filter) 5520 { 5521 struct mlx5_priv *priv = dev->data->dev_private; 5522 uint32_t flow_idx; 5523 struct mlx5_fdir fdir_flow = { 5524 .attr.group = 0, 5525 }; 5526 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5527 int ret; 5528 5529 ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow); 5530 if (ret) 5531 return -rte_errno; 5532 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 5533 /* Find the fdir in priv list */ 5534 if (!flow_fdir_cmp(priv_fdir_flow->fdir, &fdir_flow)) 5535 break; 5536 } 5537 if (!priv_fdir_flow) 5538 return 0; 5539 LIST_REMOVE(priv_fdir_flow, next); 5540 flow_idx = priv_fdir_flow->rix_flow; 5541 flow_list_destroy(dev, &priv->flows, flow_idx); 5542 rte_free(priv_fdir_flow->fdir); 5543 rte_free(priv_fdir_flow); 5544 DRV_LOG(DEBUG, "port %u deleted FDIR flow %u", 5545 dev->data->port_id, flow_idx); 5546 return 0; 5547 } 5548 5549 /** 5550 * Update queue for specific filter. 5551 * 5552 * @param dev 5553 * Pointer to Ethernet device. 5554 * @param fdir_filter 5555 * Filter to be updated. 5556 * 5557 * @return 5558 * 0 on success, a negative errno value otherwise and rte_errno is set. 5559 */ 5560 static int 5561 flow_fdir_filter_update(struct rte_eth_dev *dev, 5562 const struct rte_eth_fdir_filter *fdir_filter) 5563 { 5564 int ret; 5565 5566 ret = flow_fdir_filter_delete(dev, fdir_filter); 5567 if (ret) 5568 return ret; 5569 return flow_fdir_filter_add(dev, fdir_filter); 5570 } 5571 5572 /** 5573 * Flush all filters. 5574 * 5575 * @param dev 5576 * Pointer to Ethernet device. 5577 */ 5578 static void 5579 flow_fdir_filter_flush(struct rte_eth_dev *dev) 5580 { 5581 struct mlx5_priv *priv = dev->data->dev_private; 5582 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5583 5584 while (!LIST_EMPTY(&priv->fdir_flows)) { 5585 priv_fdir_flow = LIST_FIRST(&priv->fdir_flows); 5586 LIST_REMOVE(priv_fdir_flow, next); 5587 flow_list_destroy(dev, &priv->flows, priv_fdir_flow->rix_flow); 5588 rte_free(priv_fdir_flow->fdir); 5589 rte_free(priv_fdir_flow); 5590 } 5591 } 5592 5593 /** 5594 * Get flow director information. 5595 * 5596 * @param dev 5597 * Pointer to Ethernet device. 5598 * @param[out] fdir_info 5599 * Resulting flow director information. 5600 */ 5601 static void 5602 flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) 5603 { 5604 struct rte_eth_fdir_masks *mask = 5605 &dev->data->dev_conf.fdir_conf.mask; 5606 5607 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode; 5608 fdir_info->guarant_spc = 0; 5609 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask)); 5610 fdir_info->max_flexpayload = 0; 5611 fdir_info->flow_types_mask[0] = 0; 5612 fdir_info->flex_payload_unit = 0; 5613 fdir_info->max_flex_payload_segment_num = 0; 5614 fdir_info->flex_payload_limit = 0; 5615 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf)); 5616 } 5617 5618 /** 5619 * Deal with flow director operations. 5620 * 5621 * @param dev 5622 * Pointer to Ethernet device. 5623 * @param filter_op 5624 * Operation to perform. 5625 * @param arg 5626 * Pointer to operation-specific structure. 5627 * 5628 * @return 5629 * 0 on success, a negative errno value otherwise and rte_errno is set. 5630 */ 5631 static int 5632 flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, 5633 void *arg) 5634 { 5635 enum rte_fdir_mode fdir_mode = 5636 dev->data->dev_conf.fdir_conf.mode; 5637 5638 if (filter_op == RTE_ETH_FILTER_NOP) 5639 return 0; 5640 if (fdir_mode != RTE_FDIR_MODE_PERFECT && 5641 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) { 5642 DRV_LOG(ERR, "port %u flow director mode %d not supported", 5643 dev->data->port_id, fdir_mode); 5644 rte_errno = EINVAL; 5645 return -rte_errno; 5646 } 5647 switch (filter_op) { 5648 case RTE_ETH_FILTER_ADD: 5649 return flow_fdir_filter_add(dev, arg); 5650 case RTE_ETH_FILTER_UPDATE: 5651 return flow_fdir_filter_update(dev, arg); 5652 case RTE_ETH_FILTER_DELETE: 5653 return flow_fdir_filter_delete(dev, arg); 5654 case RTE_ETH_FILTER_FLUSH: 5655 flow_fdir_filter_flush(dev); 5656 break; 5657 case RTE_ETH_FILTER_INFO: 5658 flow_fdir_info_get(dev, arg); 5659 break; 5660 default: 5661 DRV_LOG(DEBUG, "port %u unknown operation %u", 5662 dev->data->port_id, filter_op); 5663 rte_errno = EINVAL; 5664 return -rte_errno; 5665 } 5666 return 0; 5667 } 5668 5669 /** 5670 * Manage filter operations. 5671 * 5672 * @param dev 5673 * Pointer to Ethernet device structure. 5674 * @param filter_type 5675 * Filter type. 5676 * @param filter_op 5677 * Operation to perform. 5678 * @param arg 5679 * Pointer to operation-specific structure. 5680 * 5681 * @return 5682 * 0 on success, a negative errno value otherwise and rte_errno is set. 5683 */ 5684 int 5685 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, 5686 enum rte_filter_type filter_type, 5687 enum rte_filter_op filter_op, 5688 void *arg) 5689 { 5690 switch (filter_type) { 5691 case RTE_ETH_FILTER_GENERIC: 5692 if (filter_op != RTE_ETH_FILTER_GET) { 5693 rte_errno = EINVAL; 5694 return -rte_errno; 5695 } 5696 *(const void **)arg = &mlx5_flow_ops; 5697 return 0; 5698 case RTE_ETH_FILTER_FDIR: 5699 return flow_fdir_ctrl_func(dev, filter_op, arg); 5700 default: 5701 DRV_LOG(ERR, "port %u filter type (%d) not supported", 5702 dev->data->port_id, filter_type); 5703 rte_errno = ENOTSUP; 5704 return -rte_errno; 5705 } 5706 return 0; 5707 } 5708 5709 /** 5710 * Create the needed meter and suffix tables. 5711 * 5712 * @param[in] dev 5713 * Pointer to Ethernet device. 5714 * @param[in] fm 5715 * Pointer to the flow meter. 5716 * 5717 * @return 5718 * Pointer to table set on success, NULL otherwise. 5719 */ 5720 struct mlx5_meter_domains_infos * 5721 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev, 5722 const struct mlx5_flow_meter *fm) 5723 { 5724 const struct mlx5_flow_driver_ops *fops; 5725 5726 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5727 return fops->create_mtr_tbls(dev, fm); 5728 } 5729 5730 /** 5731 * Destroy the meter table set. 5732 * 5733 * @param[in] dev 5734 * Pointer to Ethernet device. 5735 * @param[in] tbl 5736 * Pointer to the meter table set. 5737 * 5738 * @return 5739 * 0 on success. 5740 */ 5741 int 5742 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev, 5743 struct mlx5_meter_domains_infos *tbls) 5744 { 5745 const struct mlx5_flow_driver_ops *fops; 5746 5747 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5748 return fops->destroy_mtr_tbls(dev, tbls); 5749 } 5750 5751 /** 5752 * Create policer rules. 5753 * 5754 * @param[in] dev 5755 * Pointer to Ethernet device. 5756 * @param[in] fm 5757 * Pointer to flow meter structure. 5758 * @param[in] attr 5759 * Pointer to flow attributes. 5760 * 5761 * @return 5762 * 0 on success, -1 otherwise. 5763 */ 5764 int 5765 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev, 5766 struct mlx5_flow_meter *fm, 5767 const struct rte_flow_attr *attr) 5768 { 5769 const struct mlx5_flow_driver_ops *fops; 5770 5771 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5772 return fops->create_policer_rules(dev, fm, attr); 5773 } 5774 5775 /** 5776 * Destroy policer rules. 5777 * 5778 * @param[in] fm 5779 * Pointer to flow meter structure. 5780 * @param[in] attr 5781 * Pointer to flow attributes. 5782 * 5783 * @return 5784 * 0 on success, -1 otherwise. 5785 */ 5786 int 5787 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev, 5788 struct mlx5_flow_meter *fm, 5789 const struct rte_flow_attr *attr) 5790 { 5791 const struct mlx5_flow_driver_ops *fops; 5792 5793 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5794 return fops->destroy_policer_rules(dev, fm, attr); 5795 } 5796 5797 /** 5798 * Allocate a counter. 5799 * 5800 * @param[in] dev 5801 * Pointer to Ethernet device structure. 5802 * 5803 * @return 5804 * Index to allocated counter on success, 0 otherwise. 5805 */ 5806 uint32_t 5807 mlx5_counter_alloc(struct rte_eth_dev *dev) 5808 { 5809 const struct mlx5_flow_driver_ops *fops; 5810 struct rte_flow_attr attr = { .transfer = 0 }; 5811 5812 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 5813 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5814 return fops->counter_alloc(dev); 5815 } 5816 DRV_LOG(ERR, 5817 "port %u counter allocate is not supported.", 5818 dev->data->port_id); 5819 return 0; 5820 } 5821 5822 /** 5823 * Free a counter. 5824 * 5825 * @param[in] dev 5826 * Pointer to Ethernet device structure. 5827 * @param[in] cnt 5828 * Index to counter to be free. 5829 */ 5830 void 5831 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt) 5832 { 5833 const struct mlx5_flow_driver_ops *fops; 5834 struct rte_flow_attr attr = { .transfer = 0 }; 5835 5836 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 5837 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5838 fops->counter_free(dev, cnt); 5839 return; 5840 } 5841 DRV_LOG(ERR, 5842 "port %u counter free is not supported.", 5843 dev->data->port_id); 5844 } 5845 5846 /** 5847 * Query counter statistics. 5848 * 5849 * @param[in] dev 5850 * Pointer to Ethernet device structure. 5851 * @param[in] cnt 5852 * Index to counter to query. 5853 * @param[in] clear 5854 * Set to clear counter statistics. 5855 * @param[out] pkts 5856 * The counter hits packets number to save. 5857 * @param[out] bytes 5858 * The counter hits bytes number to save. 5859 * 5860 * @return 5861 * 0 on success, a negative errno value otherwise. 5862 */ 5863 int 5864 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt, 5865 bool clear, uint64_t *pkts, uint64_t *bytes) 5866 { 5867 const struct mlx5_flow_driver_ops *fops; 5868 struct rte_flow_attr attr = { .transfer = 0 }; 5869 5870 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 5871 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5872 return fops->counter_query(dev, cnt, clear, pkts, bytes); 5873 } 5874 DRV_LOG(ERR, 5875 "port %u counter query is not supported.", 5876 dev->data->port_id); 5877 return -ENOTSUP; 5878 } 5879 5880 #define MLX5_POOL_QUERY_FREQ_US 1000000 5881 5882 /** 5883 * Get number of all validate pools. 5884 * 5885 * @param[in] sh 5886 * Pointer to mlx5_dev_ctx_shared object. 5887 * 5888 * @return 5889 * The number of all validate pools. 5890 */ 5891 static uint32_t 5892 mlx5_get_all_valid_pool_count(struct mlx5_dev_ctx_shared *sh) 5893 { 5894 int i; 5895 uint32_t pools_n = 0; 5896 5897 for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) 5898 pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid); 5899 return pools_n; 5900 } 5901 5902 /** 5903 * Set the periodic procedure for triggering asynchronous batch queries for all 5904 * the counter pools. 5905 * 5906 * @param[in] sh 5907 * Pointer to mlx5_dev_ctx_shared object. 5908 */ 5909 void 5910 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh) 5911 { 5912 uint32_t pools_n, us; 5913 5914 pools_n = mlx5_get_all_valid_pool_count(sh); 5915 us = MLX5_POOL_QUERY_FREQ_US / pools_n; 5916 DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us); 5917 if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) { 5918 sh->cmng.query_thread_on = 0; 5919 DRV_LOG(ERR, "Cannot reinitialize query alarm"); 5920 } else { 5921 sh->cmng.query_thread_on = 1; 5922 } 5923 } 5924 5925 /** 5926 * The periodic procedure for triggering asynchronous batch queries for all the 5927 * counter pools. This function is probably called by the host thread. 5928 * 5929 * @param[in] arg 5930 * The parameter for the alarm process. 5931 */ 5932 void 5933 mlx5_flow_query_alarm(void *arg) 5934 { 5935 struct mlx5_dev_ctx_shared *sh = arg; 5936 struct mlx5_devx_obj *dcs; 5937 uint16_t offset; 5938 int ret; 5939 uint8_t batch = sh->cmng.batch; 5940 uint8_t age = sh->cmng.age; 5941 uint16_t pool_index = sh->cmng.pool_index; 5942 struct mlx5_pools_container *cont; 5943 struct mlx5_flow_counter_pool *pool; 5944 int cont_loop = MLX5_CCONT_TYPE_MAX; 5945 5946 if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES) 5947 goto set_alarm; 5948 next_container: 5949 cont = MLX5_CNT_CONTAINER(sh, batch, age); 5950 rte_spinlock_lock(&cont->resize_sl); 5951 if (!cont->pools) { 5952 rte_spinlock_unlock(&cont->resize_sl); 5953 /* Check if all the containers are empty. */ 5954 if (unlikely(--cont_loop == 0)) 5955 goto set_alarm; 5956 batch ^= 0x1; 5957 pool_index = 0; 5958 if (batch == 0 && pool_index == 0) { 5959 age ^= 0x1; 5960 sh->cmng.batch = batch; 5961 sh->cmng.age = age; 5962 } 5963 goto next_container; 5964 } 5965 pool = cont->pools[pool_index]; 5966 rte_spinlock_unlock(&cont->resize_sl); 5967 if (pool->raw_hw) 5968 /* There is a pool query in progress. */ 5969 goto set_alarm; 5970 pool->raw_hw = 5971 LIST_FIRST(&sh->cmng.free_stat_raws); 5972 if (!pool->raw_hw) 5973 /* No free counter statistics raw memory. */ 5974 goto set_alarm; 5975 dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read 5976 (&pool->a64_dcs); 5977 offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL; 5978 /* 5979 * Identify the counters released between query trigger and query 5980 * handle more effiecntly. The counter released in this gap period 5981 * should wait for a new round of query as the new arrived packets 5982 * will not be taken into account. 5983 */ 5984 pool->query_gen++; 5985 ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL - 5986 offset, NULL, NULL, 5987 pool->raw_hw->mem_mng->dm->id, 5988 (void *)(uintptr_t) 5989 (pool->raw_hw->data + offset), 5990 sh->devx_comp, 5991 (uint64_t)(uintptr_t)pool); 5992 if (ret) { 5993 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID" 5994 " %d", pool->min_dcs->id); 5995 pool->raw_hw = NULL; 5996 goto set_alarm; 5997 } 5998 pool->raw_hw->min_dcs_id = dcs->id; 5999 LIST_REMOVE(pool->raw_hw, next); 6000 sh->cmng.pending_queries++; 6001 pool_index++; 6002 if (pool_index >= rte_atomic16_read(&cont->n_valid)) { 6003 batch ^= 0x1; 6004 pool_index = 0; 6005 if (batch == 0 && pool_index == 0) 6006 age ^= 0x1; 6007 } 6008 set_alarm: 6009 sh->cmng.batch = batch; 6010 sh->cmng.pool_index = pool_index; 6011 sh->cmng.age = age; 6012 mlx5_set_query_alarm(sh); 6013 } 6014 6015 /** 6016 * Check and callback event for new aged flow in the counter pool 6017 * 6018 * @param[in] sh 6019 * Pointer to mlx5_dev_ctx_shared object. 6020 * @param[in] pool 6021 * Pointer to Current counter pool. 6022 */ 6023 static void 6024 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh, 6025 struct mlx5_flow_counter_pool *pool) 6026 { 6027 struct mlx5_priv *priv; 6028 struct mlx5_flow_counter *cnt; 6029 struct mlx5_age_info *age_info; 6030 struct mlx5_age_param *age_param; 6031 struct mlx5_counter_stats_raw *cur = pool->raw_hw; 6032 struct mlx5_counter_stats_raw *prev = pool->raw; 6033 uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10); 6034 uint32_t i; 6035 6036 for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) { 6037 cnt = MLX5_POOL_GET_CNT(pool, i); 6038 age_param = MLX5_CNT_TO_AGE(cnt); 6039 if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE) 6040 continue; 6041 if (cur->data[i].hits != prev->data[i].hits) { 6042 age_param->expire = curr + age_param->timeout; 6043 continue; 6044 } 6045 if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2)) 6046 continue; 6047 /** 6048 * Hold the lock first, or if between the 6049 * state AGE_TMOUT and tailq operation the 6050 * release happened, the release procedure 6051 * may delete a non-existent tailq node. 6052 */ 6053 priv = rte_eth_devices[age_param->port_id].data->dev_private; 6054 age_info = GET_PORT_AGE_INFO(priv); 6055 rte_spinlock_lock(&age_info->aged_sl); 6056 /* If the cpmset fails, release happens. */ 6057 if (rte_atomic16_cmpset((volatile uint16_t *) 6058 &age_param->state, 6059 AGE_CANDIDATE, 6060 AGE_TMOUT) == 6061 AGE_CANDIDATE) { 6062 TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next); 6063 MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW); 6064 } 6065 rte_spinlock_unlock(&age_info->aged_sl); 6066 } 6067 for (i = 0; i < sh->max_port; i++) { 6068 age_info = &sh->port[i].age_info; 6069 if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW)) 6070 continue; 6071 if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) 6072 _rte_eth_dev_callback_process 6073 (&rte_eth_devices[sh->port[i].devx_ih_port_id], 6074 RTE_ETH_EVENT_FLOW_AGED, NULL); 6075 age_info->flags = 0; 6076 } 6077 } 6078 6079 /** 6080 * Handler for the HW respond about ready values from an asynchronous batch 6081 * query. This function is probably called by the host thread. 6082 * 6083 * @param[in] sh 6084 * The pointer to the shared device context. 6085 * @param[in] async_id 6086 * The Devx async ID. 6087 * @param[in] status 6088 * The status of the completion. 6089 */ 6090 void 6091 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh, 6092 uint64_t async_id, int status) 6093 { 6094 struct mlx5_flow_counter_pool *pool = 6095 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id; 6096 struct mlx5_counter_stats_raw *raw_to_free; 6097 uint8_t age = !!IS_AGE_POOL(pool); 6098 uint8_t query_gen = pool->query_gen ^ 1; 6099 struct mlx5_pools_container *cont = 6100 MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool), age); 6101 6102 if (unlikely(status)) { 6103 raw_to_free = pool->raw_hw; 6104 } else { 6105 raw_to_free = pool->raw; 6106 if (IS_AGE_POOL(pool)) 6107 mlx5_flow_aging_check(sh, pool); 6108 rte_spinlock_lock(&pool->sl); 6109 pool->raw = pool->raw_hw; 6110 rte_spinlock_unlock(&pool->sl); 6111 /* Be sure the new raw counters data is updated in memory. */ 6112 rte_cio_wmb(); 6113 if (!TAILQ_EMPTY(&pool->counters[query_gen])) { 6114 rte_spinlock_lock(&cont->csl); 6115 TAILQ_CONCAT(&cont->counters, 6116 &pool->counters[query_gen], next); 6117 rte_spinlock_unlock(&cont->csl); 6118 } 6119 } 6120 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next); 6121 pool->raw_hw = NULL; 6122 sh->cmng.pending_queries--; 6123 } 6124 6125 /** 6126 * Translate the rte_flow group index to HW table value. 6127 * 6128 * @param[in] attributes 6129 * Pointer to flow attributes 6130 * @param[in] external 6131 * Value is part of flow rule created by request external to PMD. 6132 * @param[in] group 6133 * rte_flow group index value. 6134 * @param[out] fdb_def_rule 6135 * Whether fdb jump to table 1 is configured. 6136 * @param[out] table 6137 * HW table value. 6138 * @param[out] error 6139 * Pointer to error structure. 6140 * 6141 * @return 6142 * 0 on success, a negative errno value otherwise and rte_errno is set. 6143 */ 6144 int 6145 mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external, 6146 uint32_t group, bool fdb_def_rule, uint32_t *table, 6147 struct rte_flow_error *error) 6148 { 6149 if (attributes->transfer && external && fdb_def_rule) { 6150 if (group == UINT32_MAX) 6151 return rte_flow_error_set 6152 (error, EINVAL, 6153 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 6154 NULL, 6155 "group index not supported"); 6156 *table = group + 1; 6157 } else { 6158 *table = group; 6159 } 6160 return 0; 6161 } 6162 6163 /** 6164 * Discover availability of metadata reg_c's. 6165 * 6166 * Iteratively use test flows to check availability. 6167 * 6168 * @param[in] dev 6169 * Pointer to the Ethernet device structure. 6170 * 6171 * @return 6172 * 0 on success, a negative errno value otherwise and rte_errno is set. 6173 */ 6174 int 6175 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev) 6176 { 6177 struct mlx5_priv *priv = dev->data->dev_private; 6178 struct mlx5_dev_config *config = &priv->config; 6179 enum modify_reg idx; 6180 int n = 0; 6181 6182 /* reg_c[0] and reg_c[1] are reserved. */ 6183 config->flow_mreg_c[n++] = REG_C_0; 6184 config->flow_mreg_c[n++] = REG_C_1; 6185 /* Discover availability of other reg_c's. */ 6186 for (idx = REG_C_2; idx <= REG_C_7; ++idx) { 6187 struct rte_flow_attr attr = { 6188 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 6189 .priority = MLX5_FLOW_PRIO_RSVD, 6190 .ingress = 1, 6191 }; 6192 struct rte_flow_item items[] = { 6193 [0] = { 6194 .type = RTE_FLOW_ITEM_TYPE_END, 6195 }, 6196 }; 6197 struct rte_flow_action actions[] = { 6198 [0] = { 6199 .type = (enum rte_flow_action_type) 6200 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 6201 .conf = &(struct mlx5_flow_action_copy_mreg){ 6202 .src = REG_C_1, 6203 .dst = idx, 6204 }, 6205 }, 6206 [1] = { 6207 .type = RTE_FLOW_ACTION_TYPE_JUMP, 6208 .conf = &(struct rte_flow_action_jump){ 6209 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 6210 }, 6211 }, 6212 [2] = { 6213 .type = RTE_FLOW_ACTION_TYPE_END, 6214 }, 6215 }; 6216 uint32_t flow_idx; 6217 struct rte_flow *flow; 6218 struct rte_flow_error error; 6219 6220 if (!config->dv_flow_en) 6221 break; 6222 /* Create internal flow, validation skips copy action. */ 6223 flow_idx = flow_list_create(dev, NULL, &attr, items, 6224 actions, false, &error); 6225 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 6226 flow_idx); 6227 if (!flow) 6228 continue; 6229 if (dev->data->dev_started || !flow_drv_apply(dev, flow, NULL)) 6230 config->flow_mreg_c[n++] = idx; 6231 flow_list_destroy(dev, NULL, flow_idx); 6232 } 6233 for (; n < MLX5_MREG_C_NUM; ++n) 6234 config->flow_mreg_c[n] = REG_NONE; 6235 return 0; 6236 } 6237 6238 /** 6239 * Dump flow raw hw data to file 6240 * 6241 * @param[in] dev 6242 * The pointer to Ethernet device. 6243 * @param[in] file 6244 * A pointer to a file for output. 6245 * @param[out] error 6246 * Perform verbose error reporting if not NULL. PMDs initialize this 6247 * structure in case of error only. 6248 * @return 6249 * 0 on success, a nagative value otherwise. 6250 */ 6251 int 6252 mlx5_flow_dev_dump(struct rte_eth_dev *dev, 6253 FILE *file, 6254 struct rte_flow_error *error __rte_unused) 6255 { 6256 struct mlx5_priv *priv = dev->data->dev_private; 6257 struct mlx5_dev_ctx_shared *sh = priv->sh; 6258 6259 return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain, 6260 sh->tx_domain, file); 6261 } 6262 6263 /** 6264 * Get aged-out flows. 6265 * 6266 * @param[in] dev 6267 * Pointer to the Ethernet device structure. 6268 * @param[in] context 6269 * The address of an array of pointers to the aged-out flows contexts. 6270 * @param[in] nb_countexts 6271 * The length of context array pointers. 6272 * @param[out] error 6273 * Perform verbose error reporting if not NULL. Initialized in case of 6274 * error only. 6275 * 6276 * @return 6277 * how many contexts get in success, otherwise negative errno value. 6278 * if nb_contexts is 0, return the amount of all aged contexts. 6279 * if nb_contexts is not 0 , return the amount of aged flows reported 6280 * in the context array. 6281 */ 6282 int 6283 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts, 6284 uint32_t nb_contexts, struct rte_flow_error *error) 6285 { 6286 const struct mlx5_flow_driver_ops *fops; 6287 struct rte_flow_attr attr = { .transfer = 0 }; 6288 6289 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6290 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6291 return fops->get_aged_flows(dev, contexts, nb_contexts, 6292 error); 6293 } 6294 DRV_LOG(ERR, 6295 "port %u get aged flows is not supported.", 6296 dev->data->port_id); 6297 return -ENOTSUP; 6298 } 6299