1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2016 Mellanox Technologies, Ltd 4 */ 5 6 #include <netinet/in.h> 7 #include <sys/queue.h> 8 #include <stdalign.h> 9 #include <stdint.h> 10 #include <string.h> 11 #include <stdbool.h> 12 13 /* Verbs header. */ 14 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 15 #ifdef PEDANTIC 16 #pragma GCC diagnostic ignored "-Wpedantic" 17 #endif 18 #include <infiniband/verbs.h> 19 #ifdef PEDANTIC 20 #pragma GCC diagnostic error "-Wpedantic" 21 #endif 22 23 #include <rte_common.h> 24 #include <rte_ether.h> 25 #include <rte_ethdev_driver.h> 26 #include <rte_flow.h> 27 #include <rte_cycles.h> 28 #include <rte_flow_driver.h> 29 #include <rte_malloc.h> 30 #include <rte_ip.h> 31 32 #include <mlx5_glue.h> 33 #include <mlx5_devx_cmds.h> 34 #include <mlx5_prm.h> 35 #include <mlx5_malloc.h> 36 37 #include "mlx5_defs.h" 38 #include "mlx5.h" 39 #include "mlx5_flow.h" 40 #include "mlx5_flow_os.h" 41 #include "mlx5_rxtx.h" 42 43 /** Device flow drivers. */ 44 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops; 45 46 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops; 47 48 const struct mlx5_flow_driver_ops *flow_drv_ops[] = { 49 [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops, 50 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 51 [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops, 52 #endif 53 [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops, 54 [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops 55 }; 56 57 enum mlx5_expansion { 58 MLX5_EXPANSION_ROOT, 59 MLX5_EXPANSION_ROOT_OUTER, 60 MLX5_EXPANSION_ROOT_ETH_VLAN, 61 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN, 62 MLX5_EXPANSION_OUTER_ETH, 63 MLX5_EXPANSION_OUTER_ETH_VLAN, 64 MLX5_EXPANSION_OUTER_VLAN, 65 MLX5_EXPANSION_OUTER_IPV4, 66 MLX5_EXPANSION_OUTER_IPV4_UDP, 67 MLX5_EXPANSION_OUTER_IPV4_TCP, 68 MLX5_EXPANSION_OUTER_IPV6, 69 MLX5_EXPANSION_OUTER_IPV6_UDP, 70 MLX5_EXPANSION_OUTER_IPV6_TCP, 71 MLX5_EXPANSION_VXLAN, 72 MLX5_EXPANSION_VXLAN_GPE, 73 MLX5_EXPANSION_GRE, 74 MLX5_EXPANSION_MPLS, 75 MLX5_EXPANSION_ETH, 76 MLX5_EXPANSION_ETH_VLAN, 77 MLX5_EXPANSION_VLAN, 78 MLX5_EXPANSION_IPV4, 79 MLX5_EXPANSION_IPV4_UDP, 80 MLX5_EXPANSION_IPV4_TCP, 81 MLX5_EXPANSION_IPV6, 82 MLX5_EXPANSION_IPV6_UDP, 83 MLX5_EXPANSION_IPV6_TCP, 84 }; 85 86 /** Supported expansion of items. */ 87 static const struct rte_flow_expand_node mlx5_support_expansion[] = { 88 [MLX5_EXPANSION_ROOT] = { 89 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 90 MLX5_EXPANSION_IPV4, 91 MLX5_EXPANSION_IPV6), 92 .type = RTE_FLOW_ITEM_TYPE_END, 93 }, 94 [MLX5_EXPANSION_ROOT_OUTER] = { 95 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH, 96 MLX5_EXPANSION_OUTER_IPV4, 97 MLX5_EXPANSION_OUTER_IPV6), 98 .type = RTE_FLOW_ITEM_TYPE_END, 99 }, 100 [MLX5_EXPANSION_ROOT_ETH_VLAN] = { 101 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN), 102 .type = RTE_FLOW_ITEM_TYPE_END, 103 }, 104 [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = { 105 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH_VLAN), 106 .type = RTE_FLOW_ITEM_TYPE_END, 107 }, 108 [MLX5_EXPANSION_OUTER_ETH] = { 109 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 110 MLX5_EXPANSION_OUTER_IPV6, 111 MLX5_EXPANSION_MPLS), 112 .type = RTE_FLOW_ITEM_TYPE_ETH, 113 .rss_types = 0, 114 }, 115 [MLX5_EXPANSION_OUTER_ETH_VLAN] = { 116 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN), 117 .type = RTE_FLOW_ITEM_TYPE_ETH, 118 .rss_types = 0, 119 }, 120 [MLX5_EXPANSION_OUTER_VLAN] = { 121 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 122 MLX5_EXPANSION_OUTER_IPV6), 123 .type = RTE_FLOW_ITEM_TYPE_VLAN, 124 }, 125 [MLX5_EXPANSION_OUTER_IPV4] = { 126 .next = RTE_FLOW_EXPAND_RSS_NEXT 127 (MLX5_EXPANSION_OUTER_IPV4_UDP, 128 MLX5_EXPANSION_OUTER_IPV4_TCP, 129 MLX5_EXPANSION_GRE, 130 MLX5_EXPANSION_IPV4, 131 MLX5_EXPANSION_IPV6), 132 .type = RTE_FLOW_ITEM_TYPE_IPV4, 133 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 134 ETH_RSS_NONFRAG_IPV4_OTHER, 135 }, 136 [MLX5_EXPANSION_OUTER_IPV4_UDP] = { 137 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 138 MLX5_EXPANSION_VXLAN_GPE), 139 .type = RTE_FLOW_ITEM_TYPE_UDP, 140 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 141 }, 142 [MLX5_EXPANSION_OUTER_IPV4_TCP] = { 143 .type = RTE_FLOW_ITEM_TYPE_TCP, 144 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 145 }, 146 [MLX5_EXPANSION_OUTER_IPV6] = { 147 .next = RTE_FLOW_EXPAND_RSS_NEXT 148 (MLX5_EXPANSION_OUTER_IPV6_UDP, 149 MLX5_EXPANSION_OUTER_IPV6_TCP, 150 MLX5_EXPANSION_IPV4, 151 MLX5_EXPANSION_IPV6), 152 .type = RTE_FLOW_ITEM_TYPE_IPV6, 153 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 154 ETH_RSS_NONFRAG_IPV6_OTHER, 155 }, 156 [MLX5_EXPANSION_OUTER_IPV6_UDP] = { 157 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 158 MLX5_EXPANSION_VXLAN_GPE), 159 .type = RTE_FLOW_ITEM_TYPE_UDP, 160 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 161 }, 162 [MLX5_EXPANSION_OUTER_IPV6_TCP] = { 163 .type = RTE_FLOW_ITEM_TYPE_TCP, 164 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 165 }, 166 [MLX5_EXPANSION_VXLAN] = { 167 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 168 MLX5_EXPANSION_IPV4, 169 MLX5_EXPANSION_IPV6), 170 .type = RTE_FLOW_ITEM_TYPE_VXLAN, 171 }, 172 [MLX5_EXPANSION_VXLAN_GPE] = { 173 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 174 MLX5_EXPANSION_IPV4, 175 MLX5_EXPANSION_IPV6), 176 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE, 177 }, 178 [MLX5_EXPANSION_GRE] = { 179 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4), 180 .type = RTE_FLOW_ITEM_TYPE_GRE, 181 }, 182 [MLX5_EXPANSION_MPLS] = { 183 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 184 MLX5_EXPANSION_IPV6), 185 .type = RTE_FLOW_ITEM_TYPE_MPLS, 186 }, 187 [MLX5_EXPANSION_ETH] = { 188 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 189 MLX5_EXPANSION_IPV6), 190 .type = RTE_FLOW_ITEM_TYPE_ETH, 191 }, 192 [MLX5_EXPANSION_ETH_VLAN] = { 193 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN), 194 .type = RTE_FLOW_ITEM_TYPE_ETH, 195 }, 196 [MLX5_EXPANSION_VLAN] = { 197 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 198 MLX5_EXPANSION_IPV6), 199 .type = RTE_FLOW_ITEM_TYPE_VLAN, 200 }, 201 [MLX5_EXPANSION_IPV4] = { 202 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP, 203 MLX5_EXPANSION_IPV4_TCP), 204 .type = RTE_FLOW_ITEM_TYPE_IPV4, 205 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 206 ETH_RSS_NONFRAG_IPV4_OTHER, 207 }, 208 [MLX5_EXPANSION_IPV4_UDP] = { 209 .type = RTE_FLOW_ITEM_TYPE_UDP, 210 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 211 }, 212 [MLX5_EXPANSION_IPV4_TCP] = { 213 .type = RTE_FLOW_ITEM_TYPE_TCP, 214 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 215 }, 216 [MLX5_EXPANSION_IPV6] = { 217 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP, 218 MLX5_EXPANSION_IPV6_TCP), 219 .type = RTE_FLOW_ITEM_TYPE_IPV6, 220 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 221 ETH_RSS_NONFRAG_IPV6_OTHER, 222 }, 223 [MLX5_EXPANSION_IPV6_UDP] = { 224 .type = RTE_FLOW_ITEM_TYPE_UDP, 225 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 226 }, 227 [MLX5_EXPANSION_IPV6_TCP] = { 228 .type = RTE_FLOW_ITEM_TYPE_TCP, 229 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 230 }, 231 }; 232 233 static const struct rte_flow_ops mlx5_flow_ops = { 234 .validate = mlx5_flow_validate, 235 .create = mlx5_flow_create, 236 .destroy = mlx5_flow_destroy, 237 .flush = mlx5_flow_flush, 238 .isolate = mlx5_flow_isolate, 239 .query = mlx5_flow_query, 240 .dev_dump = mlx5_flow_dev_dump, 241 .get_aged_flows = mlx5_flow_get_aged_flows, 242 }; 243 244 /* Convert FDIR request to Generic flow. */ 245 struct mlx5_fdir { 246 struct rte_flow_attr attr; 247 struct rte_flow_item items[4]; 248 struct rte_flow_item_eth l2; 249 struct rte_flow_item_eth l2_mask; 250 union { 251 struct rte_flow_item_ipv4 ipv4; 252 struct rte_flow_item_ipv6 ipv6; 253 } l3; 254 union { 255 struct rte_flow_item_ipv4 ipv4; 256 struct rte_flow_item_ipv6 ipv6; 257 } l3_mask; 258 union { 259 struct rte_flow_item_udp udp; 260 struct rte_flow_item_tcp tcp; 261 } l4; 262 union { 263 struct rte_flow_item_udp udp; 264 struct rte_flow_item_tcp tcp; 265 } l4_mask; 266 struct rte_flow_action actions[2]; 267 struct rte_flow_action_queue queue; 268 }; 269 270 /* Map of Verbs to Flow priority with 8 Verbs priorities. */ 271 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = { 272 { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 }, 273 }; 274 275 /* Map of Verbs to Flow priority with 16 Verbs priorities. */ 276 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = { 277 { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 }, 278 { 9, 10, 11 }, { 12, 13, 14 }, 279 }; 280 281 /* Tunnel information. */ 282 struct mlx5_flow_tunnel_info { 283 uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */ 284 uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */ 285 }; 286 287 static struct mlx5_flow_tunnel_info tunnels_info[] = { 288 { 289 .tunnel = MLX5_FLOW_LAYER_VXLAN, 290 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP, 291 }, 292 { 293 .tunnel = MLX5_FLOW_LAYER_GENEVE, 294 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP, 295 }, 296 { 297 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE, 298 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP, 299 }, 300 { 301 .tunnel = MLX5_FLOW_LAYER_GRE, 302 .ptype = RTE_PTYPE_TUNNEL_GRE, 303 }, 304 { 305 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP, 306 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP, 307 }, 308 { 309 .tunnel = MLX5_FLOW_LAYER_MPLS, 310 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE, 311 }, 312 { 313 .tunnel = MLX5_FLOW_LAYER_NVGRE, 314 .ptype = RTE_PTYPE_TUNNEL_NVGRE, 315 }, 316 { 317 .tunnel = MLX5_FLOW_LAYER_IPIP, 318 .ptype = RTE_PTYPE_TUNNEL_IP, 319 }, 320 { 321 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP, 322 .ptype = RTE_PTYPE_TUNNEL_IP, 323 }, 324 { 325 .tunnel = MLX5_FLOW_LAYER_GTP, 326 .ptype = RTE_PTYPE_TUNNEL_GTPU, 327 }, 328 }; 329 330 /** 331 * Translate tag ID to register. 332 * 333 * @param[in] dev 334 * Pointer to the Ethernet device structure. 335 * @param[in] feature 336 * The feature that request the register. 337 * @param[in] id 338 * The request register ID. 339 * @param[out] error 340 * Error description in case of any. 341 * 342 * @return 343 * The request register on success, a negative errno 344 * value otherwise and rte_errno is set. 345 */ 346 int 347 mlx5_flow_get_reg_id(struct rte_eth_dev *dev, 348 enum mlx5_feature_name feature, 349 uint32_t id, 350 struct rte_flow_error *error) 351 { 352 struct mlx5_priv *priv = dev->data->dev_private; 353 struct mlx5_dev_config *config = &priv->config; 354 enum modify_reg start_reg; 355 bool skip_mtr_reg = false; 356 357 switch (feature) { 358 case MLX5_HAIRPIN_RX: 359 return REG_B; 360 case MLX5_HAIRPIN_TX: 361 return REG_A; 362 case MLX5_METADATA_RX: 363 switch (config->dv_xmeta_en) { 364 case MLX5_XMETA_MODE_LEGACY: 365 return REG_B; 366 case MLX5_XMETA_MODE_META16: 367 return REG_C_0; 368 case MLX5_XMETA_MODE_META32: 369 return REG_C_1; 370 } 371 break; 372 case MLX5_METADATA_TX: 373 return REG_A; 374 case MLX5_METADATA_FDB: 375 switch (config->dv_xmeta_en) { 376 case MLX5_XMETA_MODE_LEGACY: 377 return REG_NONE; 378 case MLX5_XMETA_MODE_META16: 379 return REG_C_0; 380 case MLX5_XMETA_MODE_META32: 381 return REG_C_1; 382 } 383 break; 384 case MLX5_FLOW_MARK: 385 switch (config->dv_xmeta_en) { 386 case MLX5_XMETA_MODE_LEGACY: 387 return REG_NONE; 388 case MLX5_XMETA_MODE_META16: 389 return REG_C_1; 390 case MLX5_XMETA_MODE_META32: 391 return REG_C_0; 392 } 393 break; 394 case MLX5_MTR_SFX: 395 /* 396 * If meter color and flow match share one register, flow match 397 * should use the meter color register for match. 398 */ 399 if (priv->mtr_reg_share) 400 return priv->mtr_color_reg; 401 else 402 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 403 REG_C_3; 404 case MLX5_MTR_COLOR: 405 MLX5_ASSERT(priv->mtr_color_reg != REG_NONE); 406 return priv->mtr_color_reg; 407 case MLX5_COPY_MARK: 408 /* 409 * Metadata COPY_MARK register using is in meter suffix sub 410 * flow while with meter. It's safe to share the same register. 411 */ 412 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3; 413 case MLX5_APP_TAG: 414 /* 415 * If meter is enable, it will engage the register for color 416 * match and flow match. If meter color match is not using the 417 * REG_C_2, need to skip the REG_C_x be used by meter color 418 * match. 419 * If meter is disable, free to use all available registers. 420 */ 421 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 422 (priv->mtr_reg_share ? REG_C_3 : REG_C_4); 423 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2); 424 if (id > (REG_C_7 - start_reg)) 425 return rte_flow_error_set(error, EINVAL, 426 RTE_FLOW_ERROR_TYPE_ITEM, 427 NULL, "invalid tag id"); 428 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NONE) 429 return rte_flow_error_set(error, ENOTSUP, 430 RTE_FLOW_ERROR_TYPE_ITEM, 431 NULL, "unsupported tag id"); 432 /* 433 * This case means meter is using the REG_C_x great than 2. 434 * Take care not to conflict with meter color REG_C_x. 435 * If the available index REG_C_y >= REG_C_x, skip the 436 * color register. 437 */ 438 if (skip_mtr_reg && config->flow_mreg_c 439 [id + start_reg - REG_C_0] >= priv->mtr_color_reg) { 440 if (id >= (REG_C_7 - start_reg)) 441 return rte_flow_error_set(error, EINVAL, 442 RTE_FLOW_ERROR_TYPE_ITEM, 443 NULL, "invalid tag id"); 444 if (config->flow_mreg_c 445 [id + 1 + start_reg - REG_C_0] != REG_NONE) 446 return config->flow_mreg_c 447 [id + 1 + start_reg - REG_C_0]; 448 return rte_flow_error_set(error, ENOTSUP, 449 RTE_FLOW_ERROR_TYPE_ITEM, 450 NULL, "unsupported tag id"); 451 } 452 return config->flow_mreg_c[id + start_reg - REG_C_0]; 453 } 454 MLX5_ASSERT(false); 455 return rte_flow_error_set(error, EINVAL, 456 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 457 NULL, "invalid feature name"); 458 } 459 460 /** 461 * Check extensive flow metadata register support. 462 * 463 * @param dev 464 * Pointer to rte_eth_dev structure. 465 * 466 * @return 467 * True if device supports extensive flow metadata register, otherwise false. 468 */ 469 bool 470 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev) 471 { 472 struct mlx5_priv *priv = dev->data->dev_private; 473 struct mlx5_dev_config *config = &priv->config; 474 475 /* 476 * Having available reg_c can be regarded inclusively as supporting 477 * extensive flow metadata register, which could mean, 478 * - metadata register copy action by modify header. 479 * - 16 modify header actions is supported. 480 * - reg_c's are preserved across different domain (FDB and NIC) on 481 * packet loopback by flow lookup miss. 482 */ 483 return config->flow_mreg_c[2] != REG_NONE; 484 } 485 486 /** 487 * Discover the maximum number of priority available. 488 * 489 * @param[in] dev 490 * Pointer to the Ethernet device structure. 491 * 492 * @return 493 * number of supported flow priority on success, a negative errno 494 * value otherwise and rte_errno is set. 495 */ 496 int 497 mlx5_flow_discover_priorities(struct rte_eth_dev *dev) 498 { 499 struct mlx5_priv *priv = dev->data->dev_private; 500 struct { 501 struct ibv_flow_attr attr; 502 struct ibv_flow_spec_eth eth; 503 struct ibv_flow_spec_action_drop drop; 504 } flow_attr = { 505 .attr = { 506 .num_of_specs = 2, 507 .port = (uint8_t)priv->dev_port, 508 }, 509 .eth = { 510 .type = IBV_FLOW_SPEC_ETH, 511 .size = sizeof(struct ibv_flow_spec_eth), 512 }, 513 .drop = { 514 .size = sizeof(struct ibv_flow_spec_action_drop), 515 .type = IBV_FLOW_SPEC_ACTION_DROP, 516 }, 517 }; 518 struct ibv_flow *flow; 519 struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev); 520 uint16_t vprio[] = { 8, 16 }; 521 int i; 522 int priority = 0; 523 524 if (!drop) { 525 rte_errno = ENOTSUP; 526 return -rte_errno; 527 } 528 for (i = 0; i != RTE_DIM(vprio); i++) { 529 flow_attr.attr.priority = vprio[i] - 1; 530 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr); 531 if (!flow) 532 break; 533 claim_zero(mlx5_glue->destroy_flow(flow)); 534 priority = vprio[i]; 535 } 536 mlx5_hrxq_drop_release(dev); 537 switch (priority) { 538 case 8: 539 priority = RTE_DIM(priority_map_3); 540 break; 541 case 16: 542 priority = RTE_DIM(priority_map_5); 543 break; 544 default: 545 rte_errno = ENOTSUP; 546 DRV_LOG(ERR, 547 "port %u verbs maximum priority: %d expected 8/16", 548 dev->data->port_id, priority); 549 return -rte_errno; 550 } 551 DRV_LOG(INFO, "port %u flow maximum priority: %d", 552 dev->data->port_id, priority); 553 return priority; 554 } 555 556 /** 557 * Adjust flow priority based on the highest layer and the request priority. 558 * 559 * @param[in] dev 560 * Pointer to the Ethernet device structure. 561 * @param[in] priority 562 * The rule base priority. 563 * @param[in] subpriority 564 * The priority based on the items. 565 * 566 * @return 567 * The new priority. 568 */ 569 uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority, 570 uint32_t subpriority) 571 { 572 uint32_t res = 0; 573 struct mlx5_priv *priv = dev->data->dev_private; 574 575 switch (priv->config.flow_prio) { 576 case RTE_DIM(priority_map_3): 577 res = priority_map_3[priority][subpriority]; 578 break; 579 case RTE_DIM(priority_map_5): 580 res = priority_map_5[priority][subpriority]; 581 break; 582 } 583 return res; 584 } 585 586 /** 587 * Verify the @p item specifications (spec, last, mask) are compatible with the 588 * NIC capabilities. 589 * 590 * @param[in] item 591 * Item specification. 592 * @param[in] mask 593 * @p item->mask or flow default bit-masks. 594 * @param[in] nic_mask 595 * Bit-masks covering supported fields by the NIC to compare with user mask. 596 * @param[in] size 597 * Bit-masks size in bytes. 598 * @param[out] error 599 * Pointer to error structure. 600 * 601 * @return 602 * 0 on success, a negative errno value otherwise and rte_errno is set. 603 */ 604 int 605 mlx5_flow_item_acceptable(const struct rte_flow_item *item, 606 const uint8_t *mask, 607 const uint8_t *nic_mask, 608 unsigned int size, 609 struct rte_flow_error *error) 610 { 611 unsigned int i; 612 613 MLX5_ASSERT(nic_mask); 614 for (i = 0; i < size; ++i) 615 if ((nic_mask[i] | mask[i]) != nic_mask[i]) 616 return rte_flow_error_set(error, ENOTSUP, 617 RTE_FLOW_ERROR_TYPE_ITEM, 618 item, 619 "mask enables non supported" 620 " bits"); 621 if (!item->spec && (item->mask || item->last)) 622 return rte_flow_error_set(error, EINVAL, 623 RTE_FLOW_ERROR_TYPE_ITEM, item, 624 "mask/last without a spec is not" 625 " supported"); 626 if (item->spec && item->last) { 627 uint8_t spec[size]; 628 uint8_t last[size]; 629 unsigned int i; 630 int ret; 631 632 for (i = 0; i < size; ++i) { 633 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i]; 634 last[i] = ((const uint8_t *)item->last)[i] & mask[i]; 635 } 636 ret = memcmp(spec, last, size); 637 if (ret != 0) 638 return rte_flow_error_set(error, EINVAL, 639 RTE_FLOW_ERROR_TYPE_ITEM, 640 item, 641 "range is not valid"); 642 } 643 return 0; 644 } 645 646 /** 647 * Adjust the hash fields according to the @p flow information. 648 * 649 * @param[in] dev_flow. 650 * Pointer to the mlx5_flow. 651 * @param[in] tunnel 652 * 1 when the hash field is for a tunnel item. 653 * @param[in] layer_types 654 * ETH_RSS_* types. 655 * @param[in] hash_fields 656 * Item hash fields. 657 * 658 * @return 659 * The hash fields that should be used. 660 */ 661 uint64_t 662 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc, 663 int tunnel __rte_unused, uint64_t layer_types, 664 uint64_t hash_fields) 665 { 666 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 667 int rss_request_inner = rss_desc->level >= 2; 668 669 /* Check RSS hash level for tunnel. */ 670 if (tunnel && rss_request_inner) 671 hash_fields |= IBV_RX_HASH_INNER; 672 else if (tunnel || rss_request_inner) 673 return 0; 674 #endif 675 /* Check if requested layer matches RSS hash fields. */ 676 if (!(rss_desc->types & layer_types)) 677 return 0; 678 return hash_fields; 679 } 680 681 /** 682 * Lookup and set the ptype in the data Rx part. A single Ptype can be used, 683 * if several tunnel rules are used on this queue, the tunnel ptype will be 684 * cleared. 685 * 686 * @param rxq_ctrl 687 * Rx queue to update. 688 */ 689 static void 690 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl) 691 { 692 unsigned int i; 693 uint32_t tunnel_ptype = 0; 694 695 /* Look up for the ptype to use. */ 696 for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) { 697 if (!rxq_ctrl->flow_tunnels_n[i]) 698 continue; 699 if (!tunnel_ptype) { 700 tunnel_ptype = tunnels_info[i].ptype; 701 } else { 702 tunnel_ptype = 0; 703 break; 704 } 705 } 706 rxq_ctrl->rxq.tunnel = tunnel_ptype; 707 } 708 709 /** 710 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive 711 * flow. 712 * 713 * @param[in] dev 714 * Pointer to the Ethernet device structure. 715 * @param[in] dev_handle 716 * Pointer to device flow handle structure. 717 */ 718 static void 719 flow_drv_rxq_flags_set(struct rte_eth_dev *dev, 720 struct mlx5_flow_handle *dev_handle) 721 { 722 struct mlx5_priv *priv = dev->data->dev_private; 723 const int mark = dev_handle->mark; 724 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 725 struct mlx5_hrxq *hrxq; 726 unsigned int i; 727 728 if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) 729 return; 730 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 731 dev_handle->rix_hrxq); 732 if (!hrxq) 733 return; 734 for (i = 0; i != hrxq->ind_table->queues_n; ++i) { 735 int idx = hrxq->ind_table->queues[i]; 736 struct mlx5_rxq_ctrl *rxq_ctrl = 737 container_of((*priv->rxqs)[idx], 738 struct mlx5_rxq_ctrl, rxq); 739 740 /* 741 * To support metadata register copy on Tx loopback, 742 * this must be always enabled (metadata may arive 743 * from other port - not from local flows only. 744 */ 745 if (priv->config.dv_flow_en && 746 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 747 mlx5_flow_ext_mreg_supported(dev)) { 748 rxq_ctrl->rxq.mark = 1; 749 rxq_ctrl->flow_mark_n = 1; 750 } else if (mark) { 751 rxq_ctrl->rxq.mark = 1; 752 rxq_ctrl->flow_mark_n++; 753 } 754 if (tunnel) { 755 unsigned int j; 756 757 /* Increase the counter matching the flow. */ 758 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 759 if ((tunnels_info[j].tunnel & 760 dev_handle->layers) == 761 tunnels_info[j].tunnel) { 762 rxq_ctrl->flow_tunnels_n[j]++; 763 break; 764 } 765 } 766 flow_rxq_tunnel_ptype_update(rxq_ctrl); 767 } 768 } 769 } 770 771 /** 772 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow 773 * 774 * @param[in] dev 775 * Pointer to the Ethernet device structure. 776 * @param[in] flow 777 * Pointer to flow structure. 778 */ 779 static void 780 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow) 781 { 782 struct mlx5_priv *priv = dev->data->dev_private; 783 uint32_t handle_idx; 784 struct mlx5_flow_handle *dev_handle; 785 786 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 787 handle_idx, dev_handle, next) 788 flow_drv_rxq_flags_set(dev, dev_handle); 789 } 790 791 /** 792 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 793 * device flow if no other flow uses it with the same kind of request. 794 * 795 * @param dev 796 * Pointer to Ethernet device. 797 * @param[in] dev_handle 798 * Pointer to the device flow handle structure. 799 */ 800 static void 801 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, 802 struct mlx5_flow_handle *dev_handle) 803 { 804 struct mlx5_priv *priv = dev->data->dev_private; 805 const int mark = dev_handle->mark; 806 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 807 struct mlx5_hrxq *hrxq; 808 unsigned int i; 809 810 if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) 811 return; 812 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 813 dev_handle->rix_hrxq); 814 if (!hrxq) 815 return; 816 MLX5_ASSERT(dev->data->dev_started); 817 for (i = 0; i != hrxq->ind_table->queues_n; ++i) { 818 int idx = hrxq->ind_table->queues[i]; 819 struct mlx5_rxq_ctrl *rxq_ctrl = 820 container_of((*priv->rxqs)[idx], 821 struct mlx5_rxq_ctrl, rxq); 822 823 if (priv->config.dv_flow_en && 824 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 825 mlx5_flow_ext_mreg_supported(dev)) { 826 rxq_ctrl->rxq.mark = 1; 827 rxq_ctrl->flow_mark_n = 1; 828 } else if (mark) { 829 rxq_ctrl->flow_mark_n--; 830 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n; 831 } 832 if (tunnel) { 833 unsigned int j; 834 835 /* Decrease the counter matching the flow. */ 836 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 837 if ((tunnels_info[j].tunnel & 838 dev_handle->layers) == 839 tunnels_info[j].tunnel) { 840 rxq_ctrl->flow_tunnels_n[j]--; 841 break; 842 } 843 } 844 flow_rxq_tunnel_ptype_update(rxq_ctrl); 845 } 846 } 847 } 848 849 /** 850 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 851 * @p flow if no other flow uses it with the same kind of request. 852 * 853 * @param dev 854 * Pointer to Ethernet device. 855 * @param[in] flow 856 * Pointer to the flow. 857 */ 858 static void 859 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow) 860 { 861 struct mlx5_priv *priv = dev->data->dev_private; 862 uint32_t handle_idx; 863 struct mlx5_flow_handle *dev_handle; 864 865 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 866 handle_idx, dev_handle, next) 867 flow_drv_rxq_flags_trim(dev, dev_handle); 868 } 869 870 /** 871 * Clear the Mark/Flag and Tunnel ptype information in all Rx queues. 872 * 873 * @param dev 874 * Pointer to Ethernet device. 875 */ 876 static void 877 flow_rxq_flags_clear(struct rte_eth_dev *dev) 878 { 879 struct mlx5_priv *priv = dev->data->dev_private; 880 unsigned int i; 881 882 for (i = 0; i != priv->rxqs_n; ++i) { 883 struct mlx5_rxq_ctrl *rxq_ctrl; 884 unsigned int j; 885 886 if (!(*priv->rxqs)[i]) 887 continue; 888 rxq_ctrl = container_of((*priv->rxqs)[i], 889 struct mlx5_rxq_ctrl, rxq); 890 rxq_ctrl->flow_mark_n = 0; 891 rxq_ctrl->rxq.mark = 0; 892 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) 893 rxq_ctrl->flow_tunnels_n[j] = 0; 894 rxq_ctrl->rxq.tunnel = 0; 895 } 896 } 897 898 /** 899 * Set the Rx queue dynamic metadata (mask and offset) for a flow 900 * 901 * @param[in] dev 902 * Pointer to the Ethernet device structure. 903 */ 904 void 905 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev) 906 { 907 struct mlx5_priv *priv = dev->data->dev_private; 908 struct mlx5_rxq_data *data; 909 unsigned int i; 910 911 for (i = 0; i != priv->rxqs_n; ++i) { 912 if (!(*priv->rxqs)[i]) 913 continue; 914 data = (*priv->rxqs)[i]; 915 if (!rte_flow_dynf_metadata_avail()) { 916 data->dynf_meta = 0; 917 data->flow_meta_mask = 0; 918 data->flow_meta_offset = -1; 919 } else { 920 data->dynf_meta = 1; 921 data->flow_meta_mask = rte_flow_dynf_metadata_mask; 922 data->flow_meta_offset = rte_flow_dynf_metadata_offs; 923 } 924 } 925 } 926 927 /* 928 * return a pointer to the desired action in the list of actions. 929 * 930 * @param[in] actions 931 * The list of actions to search the action in. 932 * @param[in] action 933 * The action to find. 934 * 935 * @return 936 * Pointer to the action in the list, if found. NULL otherwise. 937 */ 938 const struct rte_flow_action * 939 mlx5_flow_find_action(const struct rte_flow_action *actions, 940 enum rte_flow_action_type action) 941 { 942 if (actions == NULL) 943 return NULL; 944 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) 945 if (actions->type == action) 946 return actions; 947 return NULL; 948 } 949 950 /* 951 * Validate the flag action. 952 * 953 * @param[in] action_flags 954 * Bit-fields that holds the actions detected until now. 955 * @param[in] attr 956 * Attributes of flow that includes this action. 957 * @param[out] error 958 * Pointer to error structure. 959 * 960 * @return 961 * 0 on success, a negative errno value otherwise and rte_errno is set. 962 */ 963 int 964 mlx5_flow_validate_action_flag(uint64_t action_flags, 965 const struct rte_flow_attr *attr, 966 struct rte_flow_error *error) 967 { 968 if (action_flags & MLX5_FLOW_ACTION_MARK) 969 return rte_flow_error_set(error, EINVAL, 970 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 971 "can't mark and flag in same flow"); 972 if (action_flags & MLX5_FLOW_ACTION_FLAG) 973 return rte_flow_error_set(error, EINVAL, 974 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 975 "can't have 2 flag" 976 " actions in same flow"); 977 if (attr->egress) 978 return rte_flow_error_set(error, ENOTSUP, 979 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 980 "flag action not supported for " 981 "egress"); 982 return 0; 983 } 984 985 /* 986 * Validate the mark action. 987 * 988 * @param[in] action 989 * Pointer to the queue action. 990 * @param[in] action_flags 991 * Bit-fields that holds the actions detected until now. 992 * @param[in] attr 993 * Attributes of flow that includes this action. 994 * @param[out] error 995 * Pointer to error structure. 996 * 997 * @return 998 * 0 on success, a negative errno value otherwise and rte_errno is set. 999 */ 1000 int 1001 mlx5_flow_validate_action_mark(const struct rte_flow_action *action, 1002 uint64_t action_flags, 1003 const struct rte_flow_attr *attr, 1004 struct rte_flow_error *error) 1005 { 1006 const struct rte_flow_action_mark *mark = action->conf; 1007 1008 if (!mark) 1009 return rte_flow_error_set(error, EINVAL, 1010 RTE_FLOW_ERROR_TYPE_ACTION, 1011 action, 1012 "configuration cannot be null"); 1013 if (mark->id >= MLX5_FLOW_MARK_MAX) 1014 return rte_flow_error_set(error, EINVAL, 1015 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1016 &mark->id, 1017 "mark id must in 0 <= id < " 1018 RTE_STR(MLX5_FLOW_MARK_MAX)); 1019 if (action_flags & MLX5_FLOW_ACTION_FLAG) 1020 return rte_flow_error_set(error, EINVAL, 1021 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1022 "can't flag and mark in same flow"); 1023 if (action_flags & MLX5_FLOW_ACTION_MARK) 1024 return rte_flow_error_set(error, EINVAL, 1025 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1026 "can't have 2 mark actions in same" 1027 " flow"); 1028 if (attr->egress) 1029 return rte_flow_error_set(error, ENOTSUP, 1030 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1031 "mark action not supported for " 1032 "egress"); 1033 return 0; 1034 } 1035 1036 /* 1037 * Validate the drop action. 1038 * 1039 * @param[in] action_flags 1040 * Bit-fields that holds the actions detected until now. 1041 * @param[in] attr 1042 * Attributes of flow that includes this action. 1043 * @param[out] error 1044 * Pointer to error structure. 1045 * 1046 * @return 1047 * 0 on success, a negative errno value otherwise and rte_errno is set. 1048 */ 1049 int 1050 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused, 1051 const struct rte_flow_attr *attr, 1052 struct rte_flow_error *error) 1053 { 1054 if (attr->egress) 1055 return rte_flow_error_set(error, ENOTSUP, 1056 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1057 "drop action not supported for " 1058 "egress"); 1059 return 0; 1060 } 1061 1062 /* 1063 * Validate the queue action. 1064 * 1065 * @param[in] action 1066 * Pointer to the queue action. 1067 * @param[in] action_flags 1068 * Bit-fields that holds the actions detected until now. 1069 * @param[in] dev 1070 * Pointer to the Ethernet device structure. 1071 * @param[in] attr 1072 * Attributes of flow that includes this action. 1073 * @param[out] error 1074 * Pointer to error structure. 1075 * 1076 * @return 1077 * 0 on success, a negative errno value otherwise and rte_errno is set. 1078 */ 1079 int 1080 mlx5_flow_validate_action_queue(const struct rte_flow_action *action, 1081 uint64_t action_flags, 1082 struct rte_eth_dev *dev, 1083 const struct rte_flow_attr *attr, 1084 struct rte_flow_error *error) 1085 { 1086 struct mlx5_priv *priv = dev->data->dev_private; 1087 const struct rte_flow_action_queue *queue = action->conf; 1088 1089 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1090 return rte_flow_error_set(error, EINVAL, 1091 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1092 "can't have 2 fate actions in" 1093 " same flow"); 1094 if (!priv->rxqs_n) 1095 return rte_flow_error_set(error, EINVAL, 1096 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1097 NULL, "No Rx queues configured"); 1098 if (queue->index >= priv->rxqs_n) 1099 return rte_flow_error_set(error, EINVAL, 1100 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1101 &queue->index, 1102 "queue index out of range"); 1103 if (!(*priv->rxqs)[queue->index]) 1104 return rte_flow_error_set(error, EINVAL, 1105 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1106 &queue->index, 1107 "queue is not configured"); 1108 if (attr->egress) 1109 return rte_flow_error_set(error, ENOTSUP, 1110 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1111 "queue action not supported for " 1112 "egress"); 1113 return 0; 1114 } 1115 1116 /* 1117 * Validate the rss action. 1118 * 1119 * @param[in] action 1120 * Pointer to the queue action. 1121 * @param[in] action_flags 1122 * Bit-fields that holds the actions detected until now. 1123 * @param[in] dev 1124 * Pointer to the Ethernet device structure. 1125 * @param[in] attr 1126 * Attributes of flow that includes this action. 1127 * @param[in] item_flags 1128 * Items that were detected. 1129 * @param[out] error 1130 * Pointer to error structure. 1131 * 1132 * @return 1133 * 0 on success, a negative errno value otherwise and rte_errno is set. 1134 */ 1135 int 1136 mlx5_flow_validate_action_rss(const struct rte_flow_action *action, 1137 uint64_t action_flags, 1138 struct rte_eth_dev *dev, 1139 const struct rte_flow_attr *attr, 1140 uint64_t item_flags, 1141 struct rte_flow_error *error) 1142 { 1143 struct mlx5_priv *priv = dev->data->dev_private; 1144 const struct rte_flow_action_rss *rss = action->conf; 1145 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1146 unsigned int i; 1147 1148 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1149 return rte_flow_error_set(error, EINVAL, 1150 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1151 "can't have 2 fate actions" 1152 " in same flow"); 1153 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT && 1154 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) 1155 return rte_flow_error_set(error, ENOTSUP, 1156 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1157 &rss->func, 1158 "RSS hash function not supported"); 1159 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 1160 if (rss->level > 2) 1161 #else 1162 if (rss->level > 1) 1163 #endif 1164 return rte_flow_error_set(error, ENOTSUP, 1165 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1166 &rss->level, 1167 "tunnel RSS is not supported"); 1168 /* allow RSS key_len 0 in case of NULL (default) RSS key. */ 1169 if (rss->key_len == 0 && rss->key != NULL) 1170 return rte_flow_error_set(error, ENOTSUP, 1171 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1172 &rss->key_len, 1173 "RSS hash key length 0"); 1174 if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN) 1175 return rte_flow_error_set(error, ENOTSUP, 1176 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1177 &rss->key_len, 1178 "RSS hash key too small"); 1179 if (rss->key_len > MLX5_RSS_HASH_KEY_LEN) 1180 return rte_flow_error_set(error, ENOTSUP, 1181 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1182 &rss->key_len, 1183 "RSS hash key too large"); 1184 if (rss->queue_num > priv->config.ind_table_max_size) 1185 return rte_flow_error_set(error, ENOTSUP, 1186 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1187 &rss->queue_num, 1188 "number of queues too large"); 1189 if (rss->types & MLX5_RSS_HF_MASK) 1190 return rte_flow_error_set(error, ENOTSUP, 1191 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1192 &rss->types, 1193 "some RSS protocols are not" 1194 " supported"); 1195 if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) && 1196 !(rss->types & ETH_RSS_IP)) 1197 return rte_flow_error_set(error, EINVAL, 1198 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1199 "L3 partial RSS requested but L3 RSS" 1200 " type not specified"); 1201 if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) && 1202 !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP))) 1203 return rte_flow_error_set(error, EINVAL, 1204 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1205 "L4 partial RSS requested but L4 RSS" 1206 " type not specified"); 1207 if (!priv->rxqs_n) 1208 return rte_flow_error_set(error, EINVAL, 1209 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1210 NULL, "No Rx queues configured"); 1211 if (!rss->queue_num) 1212 return rte_flow_error_set(error, EINVAL, 1213 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1214 NULL, "No queues configured"); 1215 for (i = 0; i != rss->queue_num; ++i) { 1216 if (rss->queue[i] >= priv->rxqs_n) 1217 return rte_flow_error_set 1218 (error, EINVAL, 1219 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1220 &rss->queue[i], "queue index out of range"); 1221 if (!(*priv->rxqs)[rss->queue[i]]) 1222 return rte_flow_error_set 1223 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1224 &rss->queue[i], "queue is not configured"); 1225 } 1226 if (attr->egress) 1227 return rte_flow_error_set(error, ENOTSUP, 1228 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1229 "rss action not supported for " 1230 "egress"); 1231 if (rss->level > 1 && !tunnel) 1232 return rte_flow_error_set(error, EINVAL, 1233 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1234 "inner RSS is not supported for " 1235 "non-tunnel flows"); 1236 if ((item_flags & MLX5_FLOW_LAYER_ECPRI) && 1237 !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) { 1238 return rte_flow_error_set(error, EINVAL, 1239 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1240 "RSS on eCPRI is not supported now"); 1241 } 1242 return 0; 1243 } 1244 1245 /* 1246 * Validate the default miss action. 1247 * 1248 * @param[in] action_flags 1249 * Bit-fields that holds the actions detected until now. 1250 * @param[out] error 1251 * Pointer to error structure. 1252 * 1253 * @return 1254 * 0 on success, a negative errno value otherwise and rte_errno is set. 1255 */ 1256 int 1257 mlx5_flow_validate_action_default_miss(uint64_t action_flags, 1258 const struct rte_flow_attr *attr, 1259 struct rte_flow_error *error) 1260 { 1261 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1262 return rte_flow_error_set(error, EINVAL, 1263 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1264 "can't have 2 fate actions in" 1265 " same flow"); 1266 if (attr->egress) 1267 return rte_flow_error_set(error, ENOTSUP, 1268 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1269 "default miss action not supported " 1270 "for egress"); 1271 if (attr->group) 1272 return rte_flow_error_set(error, ENOTSUP, 1273 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL, 1274 "only group 0 is supported"); 1275 if (attr->transfer) 1276 return rte_flow_error_set(error, ENOTSUP, 1277 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1278 NULL, "transfer is not supported"); 1279 return 0; 1280 } 1281 1282 /* 1283 * Validate the count action. 1284 * 1285 * @param[in] dev 1286 * Pointer to the Ethernet device structure. 1287 * @param[in] attr 1288 * Attributes of flow that includes this action. 1289 * @param[out] error 1290 * Pointer to error structure. 1291 * 1292 * @return 1293 * 0 on success, a negative errno value otherwise and rte_errno is set. 1294 */ 1295 int 1296 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused, 1297 const struct rte_flow_attr *attr, 1298 struct rte_flow_error *error) 1299 { 1300 if (attr->egress) 1301 return rte_flow_error_set(error, ENOTSUP, 1302 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1303 "count action not supported for " 1304 "egress"); 1305 return 0; 1306 } 1307 1308 /** 1309 * Verify the @p attributes will be correctly understood by the NIC and store 1310 * them in the @p flow if everything is correct. 1311 * 1312 * @param[in] dev 1313 * Pointer to the Ethernet device structure. 1314 * @param[in] attributes 1315 * Pointer to flow attributes 1316 * @param[out] error 1317 * Pointer to error structure. 1318 * 1319 * @return 1320 * 0 on success, a negative errno value otherwise and rte_errno is set. 1321 */ 1322 int 1323 mlx5_flow_validate_attributes(struct rte_eth_dev *dev, 1324 const struct rte_flow_attr *attributes, 1325 struct rte_flow_error *error) 1326 { 1327 struct mlx5_priv *priv = dev->data->dev_private; 1328 uint32_t priority_max = priv->config.flow_prio - 1; 1329 1330 if (attributes->group) 1331 return rte_flow_error_set(error, ENOTSUP, 1332 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 1333 NULL, "groups is not supported"); 1334 if (attributes->priority != MLX5_FLOW_PRIO_RSVD && 1335 attributes->priority >= priority_max) 1336 return rte_flow_error_set(error, ENOTSUP, 1337 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 1338 NULL, "priority out of range"); 1339 if (attributes->egress) 1340 return rte_flow_error_set(error, ENOTSUP, 1341 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1342 "egress is not supported"); 1343 if (attributes->transfer && !priv->config.dv_esw_en) 1344 return rte_flow_error_set(error, ENOTSUP, 1345 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1346 NULL, "transfer is not supported"); 1347 if (!attributes->ingress) 1348 return rte_flow_error_set(error, EINVAL, 1349 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, 1350 NULL, 1351 "ingress attribute is mandatory"); 1352 return 0; 1353 } 1354 1355 /** 1356 * Validate ICMP6 item. 1357 * 1358 * @param[in] item 1359 * Item specification. 1360 * @param[in] item_flags 1361 * Bit-fields that holds the items detected until now. 1362 * @param[out] error 1363 * Pointer to error structure. 1364 * 1365 * @return 1366 * 0 on success, a negative errno value otherwise and rte_errno is set. 1367 */ 1368 int 1369 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item, 1370 uint64_t item_flags, 1371 uint8_t target_protocol, 1372 struct rte_flow_error *error) 1373 { 1374 const struct rte_flow_item_icmp6 *mask = item->mask; 1375 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1376 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : 1377 MLX5_FLOW_LAYER_OUTER_L3_IPV6; 1378 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1379 MLX5_FLOW_LAYER_OUTER_L4; 1380 int ret; 1381 1382 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6) 1383 return rte_flow_error_set(error, EINVAL, 1384 RTE_FLOW_ERROR_TYPE_ITEM, item, 1385 "protocol filtering not compatible" 1386 " with ICMP6 layer"); 1387 if (!(item_flags & l3m)) 1388 return rte_flow_error_set(error, EINVAL, 1389 RTE_FLOW_ERROR_TYPE_ITEM, item, 1390 "IPv6 is mandatory to filter on" 1391 " ICMP6"); 1392 if (item_flags & l4m) 1393 return rte_flow_error_set(error, EINVAL, 1394 RTE_FLOW_ERROR_TYPE_ITEM, item, 1395 "multiple L4 layers not supported"); 1396 if (!mask) 1397 mask = &rte_flow_item_icmp6_mask; 1398 ret = mlx5_flow_item_acceptable 1399 (item, (const uint8_t *)mask, 1400 (const uint8_t *)&rte_flow_item_icmp6_mask, 1401 sizeof(struct rte_flow_item_icmp6), error); 1402 if (ret < 0) 1403 return ret; 1404 return 0; 1405 } 1406 1407 /** 1408 * Validate ICMP item. 1409 * 1410 * @param[in] item 1411 * Item specification. 1412 * @param[in] item_flags 1413 * Bit-fields that holds the items detected until now. 1414 * @param[out] error 1415 * Pointer to error structure. 1416 * 1417 * @return 1418 * 0 on success, a negative errno value otherwise and rte_errno is set. 1419 */ 1420 int 1421 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item, 1422 uint64_t item_flags, 1423 uint8_t target_protocol, 1424 struct rte_flow_error *error) 1425 { 1426 const struct rte_flow_item_icmp *mask = item->mask; 1427 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1428 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : 1429 MLX5_FLOW_LAYER_OUTER_L3_IPV4; 1430 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1431 MLX5_FLOW_LAYER_OUTER_L4; 1432 int ret; 1433 1434 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP) 1435 return rte_flow_error_set(error, EINVAL, 1436 RTE_FLOW_ERROR_TYPE_ITEM, item, 1437 "protocol filtering not compatible" 1438 " with ICMP layer"); 1439 if (!(item_flags & l3m)) 1440 return rte_flow_error_set(error, EINVAL, 1441 RTE_FLOW_ERROR_TYPE_ITEM, item, 1442 "IPv4 is mandatory to filter" 1443 " on ICMP"); 1444 if (item_flags & l4m) 1445 return rte_flow_error_set(error, EINVAL, 1446 RTE_FLOW_ERROR_TYPE_ITEM, item, 1447 "multiple L4 layers not supported"); 1448 if (!mask) 1449 mask = &rte_flow_item_icmp_mask; 1450 ret = mlx5_flow_item_acceptable 1451 (item, (const uint8_t *)mask, 1452 (const uint8_t *)&rte_flow_item_icmp_mask, 1453 sizeof(struct rte_flow_item_icmp), error); 1454 if (ret < 0) 1455 return ret; 1456 return 0; 1457 } 1458 1459 /** 1460 * Validate Ethernet item. 1461 * 1462 * @param[in] item 1463 * Item specification. 1464 * @param[in] item_flags 1465 * Bit-fields that holds the items detected until now. 1466 * @param[out] error 1467 * Pointer to error structure. 1468 * 1469 * @return 1470 * 0 on success, a negative errno value otherwise and rte_errno is set. 1471 */ 1472 int 1473 mlx5_flow_validate_item_eth(const struct rte_flow_item *item, 1474 uint64_t item_flags, 1475 struct rte_flow_error *error) 1476 { 1477 const struct rte_flow_item_eth *mask = item->mask; 1478 const struct rte_flow_item_eth nic_mask = { 1479 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1480 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1481 .type = RTE_BE16(0xffff), 1482 }; 1483 int ret; 1484 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1485 const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 1486 MLX5_FLOW_LAYER_OUTER_L2; 1487 1488 if (item_flags & ethm) 1489 return rte_flow_error_set(error, ENOTSUP, 1490 RTE_FLOW_ERROR_TYPE_ITEM, item, 1491 "multiple L2 layers not supported"); 1492 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) || 1493 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3))) 1494 return rte_flow_error_set(error, EINVAL, 1495 RTE_FLOW_ERROR_TYPE_ITEM, item, 1496 "L2 layer should not follow " 1497 "L3 layers"); 1498 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) || 1499 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN))) 1500 return rte_flow_error_set(error, EINVAL, 1501 RTE_FLOW_ERROR_TYPE_ITEM, item, 1502 "L2 layer should not follow VLAN"); 1503 if (!mask) 1504 mask = &rte_flow_item_eth_mask; 1505 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1506 (const uint8_t *)&nic_mask, 1507 sizeof(struct rte_flow_item_eth), 1508 error); 1509 return ret; 1510 } 1511 1512 /** 1513 * Validate VLAN item. 1514 * 1515 * @param[in] item 1516 * Item specification. 1517 * @param[in] item_flags 1518 * Bit-fields that holds the items detected until now. 1519 * @param[in] dev 1520 * Ethernet device flow is being created on. 1521 * @param[out] error 1522 * Pointer to error structure. 1523 * 1524 * @return 1525 * 0 on success, a negative errno value otherwise and rte_errno is set. 1526 */ 1527 int 1528 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item, 1529 uint64_t item_flags, 1530 struct rte_eth_dev *dev, 1531 struct rte_flow_error *error) 1532 { 1533 const struct rte_flow_item_vlan *spec = item->spec; 1534 const struct rte_flow_item_vlan *mask = item->mask; 1535 const struct rte_flow_item_vlan nic_mask = { 1536 .tci = RTE_BE16(UINT16_MAX), 1537 .inner_type = RTE_BE16(UINT16_MAX), 1538 }; 1539 uint16_t vlan_tag = 0; 1540 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1541 int ret; 1542 const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | 1543 MLX5_FLOW_LAYER_INNER_L4) : 1544 (MLX5_FLOW_LAYER_OUTER_L3 | 1545 MLX5_FLOW_LAYER_OUTER_L4); 1546 const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : 1547 MLX5_FLOW_LAYER_OUTER_VLAN; 1548 1549 if (item_flags & vlanm) 1550 return rte_flow_error_set(error, EINVAL, 1551 RTE_FLOW_ERROR_TYPE_ITEM, item, 1552 "multiple VLAN layers not supported"); 1553 else if ((item_flags & l34m) != 0) 1554 return rte_flow_error_set(error, EINVAL, 1555 RTE_FLOW_ERROR_TYPE_ITEM, item, 1556 "VLAN cannot follow L3/L4 layer"); 1557 if (!mask) 1558 mask = &rte_flow_item_vlan_mask; 1559 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1560 (const uint8_t *)&nic_mask, 1561 sizeof(struct rte_flow_item_vlan), 1562 error); 1563 if (ret) 1564 return ret; 1565 if (!tunnel && mask->tci != RTE_BE16(0x0fff)) { 1566 struct mlx5_priv *priv = dev->data->dev_private; 1567 1568 if (priv->vmwa_context) { 1569 /* 1570 * Non-NULL context means we have a virtual machine 1571 * and SR-IOV enabled, we have to create VLAN interface 1572 * to make hypervisor to setup E-Switch vport 1573 * context correctly. We avoid creating the multiple 1574 * VLAN interfaces, so we cannot support VLAN tag mask. 1575 */ 1576 return rte_flow_error_set(error, EINVAL, 1577 RTE_FLOW_ERROR_TYPE_ITEM, 1578 item, 1579 "VLAN tag mask is not" 1580 " supported in virtual" 1581 " environment"); 1582 } 1583 } 1584 if (spec) { 1585 vlan_tag = spec->tci; 1586 vlan_tag &= mask->tci; 1587 } 1588 /* 1589 * From verbs perspective an empty VLAN is equivalent 1590 * to a packet without VLAN layer. 1591 */ 1592 if (!vlan_tag) 1593 return rte_flow_error_set(error, EINVAL, 1594 RTE_FLOW_ERROR_TYPE_ITEM_SPEC, 1595 item->spec, 1596 "VLAN cannot be empty"); 1597 return 0; 1598 } 1599 1600 /** 1601 * Validate IPV4 item. 1602 * 1603 * @param[in] item 1604 * Item specification. 1605 * @param[in] item_flags 1606 * Bit-fields that holds the items detected until now. 1607 * @param[in] last_item 1608 * Previous validated item in the pattern items. 1609 * @param[in] ether_type 1610 * Type in the ethernet layer header (including dot1q). 1611 * @param[in] acc_mask 1612 * Acceptable mask, if NULL default internal default mask 1613 * will be used to check whether item fields are supported. 1614 * @param[out] error 1615 * Pointer to error structure. 1616 * 1617 * @return 1618 * 0 on success, a negative errno value otherwise and rte_errno is set. 1619 */ 1620 int 1621 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, 1622 uint64_t item_flags, 1623 uint64_t last_item, 1624 uint16_t ether_type, 1625 const struct rte_flow_item_ipv4 *acc_mask, 1626 struct rte_flow_error *error) 1627 { 1628 const struct rte_flow_item_ipv4 *mask = item->mask; 1629 const struct rte_flow_item_ipv4 *spec = item->spec; 1630 const struct rte_flow_item_ipv4 nic_mask = { 1631 .hdr = { 1632 .src_addr = RTE_BE32(0xffffffff), 1633 .dst_addr = RTE_BE32(0xffffffff), 1634 .type_of_service = 0xff, 1635 .next_proto_id = 0xff, 1636 }, 1637 }; 1638 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1639 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1640 MLX5_FLOW_LAYER_OUTER_L3; 1641 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1642 MLX5_FLOW_LAYER_OUTER_L4; 1643 int ret; 1644 uint8_t next_proto = 0xFF; 1645 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 1646 MLX5_FLOW_LAYER_OUTER_VLAN | 1647 MLX5_FLOW_LAYER_INNER_VLAN); 1648 1649 if ((last_item & l2_vlan) && ether_type && 1650 ether_type != RTE_ETHER_TYPE_IPV4) 1651 return rte_flow_error_set(error, EINVAL, 1652 RTE_FLOW_ERROR_TYPE_ITEM, item, 1653 "IPv4 cannot follow L2/VLAN layer " 1654 "which ether type is not IPv4"); 1655 if (item_flags & MLX5_FLOW_LAYER_IPIP) { 1656 if (mask && spec) 1657 next_proto = mask->hdr.next_proto_id & 1658 spec->hdr.next_proto_id; 1659 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 1660 return rte_flow_error_set(error, EINVAL, 1661 RTE_FLOW_ERROR_TYPE_ITEM, 1662 item, 1663 "multiple tunnel " 1664 "not supported"); 1665 } 1666 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) 1667 return rte_flow_error_set(error, EINVAL, 1668 RTE_FLOW_ERROR_TYPE_ITEM, item, 1669 "wrong tunnel type - IPv6 specified " 1670 "but IPv4 item provided"); 1671 if (item_flags & l3m) 1672 return rte_flow_error_set(error, ENOTSUP, 1673 RTE_FLOW_ERROR_TYPE_ITEM, item, 1674 "multiple L3 layers not supported"); 1675 else if (item_flags & l4m) 1676 return rte_flow_error_set(error, EINVAL, 1677 RTE_FLOW_ERROR_TYPE_ITEM, item, 1678 "L3 cannot follow an L4 layer."); 1679 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 1680 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 1681 return rte_flow_error_set(error, EINVAL, 1682 RTE_FLOW_ERROR_TYPE_ITEM, item, 1683 "L3 cannot follow an NVGRE layer."); 1684 if (!mask) 1685 mask = &rte_flow_item_ipv4_mask; 1686 else if (mask->hdr.next_proto_id != 0 && 1687 mask->hdr.next_proto_id != 0xff) 1688 return rte_flow_error_set(error, EINVAL, 1689 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 1690 "partial mask is not supported" 1691 " for protocol"); 1692 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1693 acc_mask ? (const uint8_t *)acc_mask 1694 : (const uint8_t *)&nic_mask, 1695 sizeof(struct rte_flow_item_ipv4), 1696 error); 1697 if (ret < 0) 1698 return ret; 1699 return 0; 1700 } 1701 1702 /** 1703 * Validate IPV6 item. 1704 * 1705 * @param[in] item 1706 * Item specification. 1707 * @param[in] item_flags 1708 * Bit-fields that holds the items detected until now. 1709 * @param[in] last_item 1710 * Previous validated item in the pattern items. 1711 * @param[in] ether_type 1712 * Type in the ethernet layer header (including dot1q). 1713 * @param[in] acc_mask 1714 * Acceptable mask, if NULL default internal default mask 1715 * will be used to check whether item fields are supported. 1716 * @param[out] error 1717 * Pointer to error structure. 1718 * 1719 * @return 1720 * 0 on success, a negative errno value otherwise and rte_errno is set. 1721 */ 1722 int 1723 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, 1724 uint64_t item_flags, 1725 uint64_t last_item, 1726 uint16_t ether_type, 1727 const struct rte_flow_item_ipv6 *acc_mask, 1728 struct rte_flow_error *error) 1729 { 1730 const struct rte_flow_item_ipv6 *mask = item->mask; 1731 const struct rte_flow_item_ipv6 *spec = item->spec; 1732 const struct rte_flow_item_ipv6 nic_mask = { 1733 .hdr = { 1734 .src_addr = 1735 "\xff\xff\xff\xff\xff\xff\xff\xff" 1736 "\xff\xff\xff\xff\xff\xff\xff\xff", 1737 .dst_addr = 1738 "\xff\xff\xff\xff\xff\xff\xff\xff" 1739 "\xff\xff\xff\xff\xff\xff\xff\xff", 1740 .vtc_flow = RTE_BE32(0xffffffff), 1741 .proto = 0xff, 1742 }, 1743 }; 1744 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1745 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1746 MLX5_FLOW_LAYER_OUTER_L3; 1747 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1748 MLX5_FLOW_LAYER_OUTER_L4; 1749 int ret; 1750 uint8_t next_proto = 0xFF; 1751 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 1752 MLX5_FLOW_LAYER_OUTER_VLAN | 1753 MLX5_FLOW_LAYER_INNER_VLAN); 1754 1755 if ((last_item & l2_vlan) && ether_type && 1756 ether_type != RTE_ETHER_TYPE_IPV6) 1757 return rte_flow_error_set(error, EINVAL, 1758 RTE_FLOW_ERROR_TYPE_ITEM, item, 1759 "IPv6 cannot follow L2/VLAN layer " 1760 "which ether type is not IPv6"); 1761 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) { 1762 if (mask && spec) 1763 next_proto = mask->hdr.proto & spec->hdr.proto; 1764 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 1765 return rte_flow_error_set(error, EINVAL, 1766 RTE_FLOW_ERROR_TYPE_ITEM, 1767 item, 1768 "multiple tunnel " 1769 "not supported"); 1770 } 1771 if (item_flags & MLX5_FLOW_LAYER_IPIP) 1772 return rte_flow_error_set(error, EINVAL, 1773 RTE_FLOW_ERROR_TYPE_ITEM, item, 1774 "wrong tunnel type - IPv4 specified " 1775 "but IPv6 item provided"); 1776 if (item_flags & l3m) 1777 return rte_flow_error_set(error, ENOTSUP, 1778 RTE_FLOW_ERROR_TYPE_ITEM, item, 1779 "multiple L3 layers not supported"); 1780 else if (item_flags & l4m) 1781 return rte_flow_error_set(error, EINVAL, 1782 RTE_FLOW_ERROR_TYPE_ITEM, item, 1783 "L3 cannot follow an L4 layer."); 1784 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 1785 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 1786 return rte_flow_error_set(error, EINVAL, 1787 RTE_FLOW_ERROR_TYPE_ITEM, item, 1788 "L3 cannot follow an NVGRE layer."); 1789 if (!mask) 1790 mask = &rte_flow_item_ipv6_mask; 1791 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1792 acc_mask ? (const uint8_t *)acc_mask 1793 : (const uint8_t *)&nic_mask, 1794 sizeof(struct rte_flow_item_ipv6), 1795 error); 1796 if (ret < 0) 1797 return ret; 1798 return 0; 1799 } 1800 1801 /** 1802 * Validate UDP item. 1803 * 1804 * @param[in] item 1805 * Item specification. 1806 * @param[in] item_flags 1807 * Bit-fields that holds the items detected until now. 1808 * @param[in] target_protocol 1809 * The next protocol in the previous item. 1810 * @param[in] flow_mask 1811 * mlx5 flow-specific (DV, verbs, etc.) supported header fields mask. 1812 * @param[out] error 1813 * Pointer to error structure. 1814 * 1815 * @return 1816 * 0 on success, a negative errno value otherwise and rte_errno is set. 1817 */ 1818 int 1819 mlx5_flow_validate_item_udp(const struct rte_flow_item *item, 1820 uint64_t item_flags, 1821 uint8_t target_protocol, 1822 struct rte_flow_error *error) 1823 { 1824 const struct rte_flow_item_udp *mask = item->mask; 1825 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1826 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1827 MLX5_FLOW_LAYER_OUTER_L3; 1828 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1829 MLX5_FLOW_LAYER_OUTER_L4; 1830 int ret; 1831 1832 if (target_protocol != 0xff && target_protocol != IPPROTO_UDP) 1833 return rte_flow_error_set(error, EINVAL, 1834 RTE_FLOW_ERROR_TYPE_ITEM, item, 1835 "protocol filtering not compatible" 1836 " with UDP layer"); 1837 if (!(item_flags & l3m)) 1838 return rte_flow_error_set(error, EINVAL, 1839 RTE_FLOW_ERROR_TYPE_ITEM, item, 1840 "L3 is mandatory to filter on L4"); 1841 if (item_flags & l4m) 1842 return rte_flow_error_set(error, EINVAL, 1843 RTE_FLOW_ERROR_TYPE_ITEM, item, 1844 "multiple L4 layers not supported"); 1845 if (!mask) 1846 mask = &rte_flow_item_udp_mask; 1847 ret = mlx5_flow_item_acceptable 1848 (item, (const uint8_t *)mask, 1849 (const uint8_t *)&rte_flow_item_udp_mask, 1850 sizeof(struct rte_flow_item_udp), error); 1851 if (ret < 0) 1852 return ret; 1853 return 0; 1854 } 1855 1856 /** 1857 * Validate TCP item. 1858 * 1859 * @param[in] item 1860 * Item specification. 1861 * @param[in] item_flags 1862 * Bit-fields that holds the items detected until now. 1863 * @param[in] target_protocol 1864 * The next protocol in the previous item. 1865 * @param[out] error 1866 * Pointer to error structure. 1867 * 1868 * @return 1869 * 0 on success, a negative errno value otherwise and rte_errno is set. 1870 */ 1871 int 1872 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item, 1873 uint64_t item_flags, 1874 uint8_t target_protocol, 1875 const struct rte_flow_item_tcp *flow_mask, 1876 struct rte_flow_error *error) 1877 { 1878 const struct rte_flow_item_tcp *mask = item->mask; 1879 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1880 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1881 MLX5_FLOW_LAYER_OUTER_L3; 1882 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1883 MLX5_FLOW_LAYER_OUTER_L4; 1884 int ret; 1885 1886 MLX5_ASSERT(flow_mask); 1887 if (target_protocol != 0xff && target_protocol != IPPROTO_TCP) 1888 return rte_flow_error_set(error, EINVAL, 1889 RTE_FLOW_ERROR_TYPE_ITEM, item, 1890 "protocol filtering not compatible" 1891 " with TCP layer"); 1892 if (!(item_flags & l3m)) 1893 return rte_flow_error_set(error, EINVAL, 1894 RTE_FLOW_ERROR_TYPE_ITEM, item, 1895 "L3 is mandatory to filter on L4"); 1896 if (item_flags & l4m) 1897 return rte_flow_error_set(error, EINVAL, 1898 RTE_FLOW_ERROR_TYPE_ITEM, item, 1899 "multiple L4 layers not supported"); 1900 if (!mask) 1901 mask = &rte_flow_item_tcp_mask; 1902 ret = mlx5_flow_item_acceptable 1903 (item, (const uint8_t *)mask, 1904 (const uint8_t *)flow_mask, 1905 sizeof(struct rte_flow_item_tcp), error); 1906 if (ret < 0) 1907 return ret; 1908 return 0; 1909 } 1910 1911 /** 1912 * Validate VXLAN item. 1913 * 1914 * @param[in] item 1915 * Item specification. 1916 * @param[in] item_flags 1917 * Bit-fields that holds the items detected until now. 1918 * @param[in] target_protocol 1919 * The next protocol in the previous item. 1920 * @param[out] error 1921 * Pointer to error structure. 1922 * 1923 * @return 1924 * 0 on success, a negative errno value otherwise and rte_errno is set. 1925 */ 1926 int 1927 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item, 1928 uint64_t item_flags, 1929 struct rte_flow_error *error) 1930 { 1931 const struct rte_flow_item_vxlan *spec = item->spec; 1932 const struct rte_flow_item_vxlan *mask = item->mask; 1933 int ret; 1934 union vni { 1935 uint32_t vlan_id; 1936 uint8_t vni[4]; 1937 } id = { .vlan_id = 0, }; 1938 1939 1940 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 1941 return rte_flow_error_set(error, ENOTSUP, 1942 RTE_FLOW_ERROR_TYPE_ITEM, item, 1943 "multiple tunnel layers not" 1944 " supported"); 1945 /* 1946 * Verify only UDPv4 is present as defined in 1947 * https://tools.ietf.org/html/rfc7348 1948 */ 1949 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 1950 return rte_flow_error_set(error, EINVAL, 1951 RTE_FLOW_ERROR_TYPE_ITEM, item, 1952 "no outer UDP layer found"); 1953 if (!mask) 1954 mask = &rte_flow_item_vxlan_mask; 1955 ret = mlx5_flow_item_acceptable 1956 (item, (const uint8_t *)mask, 1957 (const uint8_t *)&rte_flow_item_vxlan_mask, 1958 sizeof(struct rte_flow_item_vxlan), 1959 error); 1960 if (ret < 0) 1961 return ret; 1962 if (spec) { 1963 memcpy(&id.vni[1], spec->vni, 3); 1964 memcpy(&id.vni[1], mask->vni, 3); 1965 } 1966 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 1967 return rte_flow_error_set(error, ENOTSUP, 1968 RTE_FLOW_ERROR_TYPE_ITEM, item, 1969 "VXLAN tunnel must be fully defined"); 1970 return 0; 1971 } 1972 1973 /** 1974 * Validate VXLAN_GPE item. 1975 * 1976 * @param[in] item 1977 * Item specification. 1978 * @param[in] item_flags 1979 * Bit-fields that holds the items detected until now. 1980 * @param[in] priv 1981 * Pointer to the private data structure. 1982 * @param[in] target_protocol 1983 * The next protocol in the previous item. 1984 * @param[out] error 1985 * Pointer to error structure. 1986 * 1987 * @return 1988 * 0 on success, a negative errno value otherwise and rte_errno is set. 1989 */ 1990 int 1991 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item, 1992 uint64_t item_flags, 1993 struct rte_eth_dev *dev, 1994 struct rte_flow_error *error) 1995 { 1996 struct mlx5_priv *priv = dev->data->dev_private; 1997 const struct rte_flow_item_vxlan_gpe *spec = item->spec; 1998 const struct rte_flow_item_vxlan_gpe *mask = item->mask; 1999 int ret; 2000 union vni { 2001 uint32_t vlan_id; 2002 uint8_t vni[4]; 2003 } id = { .vlan_id = 0, }; 2004 2005 if (!priv->config.l3_vxlan_en) 2006 return rte_flow_error_set(error, ENOTSUP, 2007 RTE_FLOW_ERROR_TYPE_ITEM, item, 2008 "L3 VXLAN is not enabled by device" 2009 " parameter and/or not configured in" 2010 " firmware"); 2011 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2012 return rte_flow_error_set(error, ENOTSUP, 2013 RTE_FLOW_ERROR_TYPE_ITEM, item, 2014 "multiple tunnel layers not" 2015 " supported"); 2016 /* 2017 * Verify only UDPv4 is present as defined in 2018 * https://tools.ietf.org/html/rfc7348 2019 */ 2020 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2021 return rte_flow_error_set(error, EINVAL, 2022 RTE_FLOW_ERROR_TYPE_ITEM, item, 2023 "no outer UDP layer found"); 2024 if (!mask) 2025 mask = &rte_flow_item_vxlan_gpe_mask; 2026 ret = mlx5_flow_item_acceptable 2027 (item, (const uint8_t *)mask, 2028 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask, 2029 sizeof(struct rte_flow_item_vxlan_gpe), 2030 error); 2031 if (ret < 0) 2032 return ret; 2033 if (spec) { 2034 if (spec->protocol) 2035 return rte_flow_error_set(error, ENOTSUP, 2036 RTE_FLOW_ERROR_TYPE_ITEM, 2037 item, 2038 "VxLAN-GPE protocol" 2039 " not supported"); 2040 memcpy(&id.vni[1], spec->vni, 3); 2041 memcpy(&id.vni[1], mask->vni, 3); 2042 } 2043 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2044 return rte_flow_error_set(error, ENOTSUP, 2045 RTE_FLOW_ERROR_TYPE_ITEM, item, 2046 "VXLAN-GPE tunnel must be fully" 2047 " defined"); 2048 return 0; 2049 } 2050 /** 2051 * Validate GRE Key item. 2052 * 2053 * @param[in] item 2054 * Item specification. 2055 * @param[in] item_flags 2056 * Bit flags to mark detected items. 2057 * @param[in] gre_item 2058 * Pointer to gre_item 2059 * @param[out] error 2060 * Pointer to error structure. 2061 * 2062 * @return 2063 * 0 on success, a negative errno value otherwise and rte_errno is set. 2064 */ 2065 int 2066 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item, 2067 uint64_t item_flags, 2068 const struct rte_flow_item *gre_item, 2069 struct rte_flow_error *error) 2070 { 2071 const rte_be32_t *mask = item->mask; 2072 int ret = 0; 2073 rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX); 2074 const struct rte_flow_item_gre *gre_spec; 2075 const struct rte_flow_item_gre *gre_mask; 2076 2077 if (item_flags & MLX5_FLOW_LAYER_GRE_KEY) 2078 return rte_flow_error_set(error, ENOTSUP, 2079 RTE_FLOW_ERROR_TYPE_ITEM, item, 2080 "Multiple GRE key not support"); 2081 if (!(item_flags & MLX5_FLOW_LAYER_GRE)) 2082 return rte_flow_error_set(error, ENOTSUP, 2083 RTE_FLOW_ERROR_TYPE_ITEM, item, 2084 "No preceding GRE header"); 2085 if (item_flags & MLX5_FLOW_LAYER_INNER) 2086 return rte_flow_error_set(error, ENOTSUP, 2087 RTE_FLOW_ERROR_TYPE_ITEM, item, 2088 "GRE key following a wrong item"); 2089 gre_mask = gre_item->mask; 2090 if (!gre_mask) 2091 gre_mask = &rte_flow_item_gre_mask; 2092 gre_spec = gre_item->spec; 2093 if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) && 2094 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000))) 2095 return rte_flow_error_set(error, EINVAL, 2096 RTE_FLOW_ERROR_TYPE_ITEM, item, 2097 "Key bit must be on"); 2098 2099 if (!mask) 2100 mask = &gre_key_default_mask; 2101 ret = mlx5_flow_item_acceptable 2102 (item, (const uint8_t *)mask, 2103 (const uint8_t *)&gre_key_default_mask, 2104 sizeof(rte_be32_t), error); 2105 return ret; 2106 } 2107 2108 /** 2109 * Validate GRE item. 2110 * 2111 * @param[in] item 2112 * Item specification. 2113 * @param[in] item_flags 2114 * Bit flags to mark detected items. 2115 * @param[in] target_protocol 2116 * The next protocol in the previous item. 2117 * @param[out] error 2118 * Pointer to error structure. 2119 * 2120 * @return 2121 * 0 on success, a negative errno value otherwise and rte_errno is set. 2122 */ 2123 int 2124 mlx5_flow_validate_item_gre(const struct rte_flow_item *item, 2125 uint64_t item_flags, 2126 uint8_t target_protocol, 2127 struct rte_flow_error *error) 2128 { 2129 const struct rte_flow_item_gre *spec __rte_unused = item->spec; 2130 const struct rte_flow_item_gre *mask = item->mask; 2131 int ret; 2132 const struct rte_flow_item_gre nic_mask = { 2133 .c_rsvd0_ver = RTE_BE16(0xB000), 2134 .protocol = RTE_BE16(UINT16_MAX), 2135 }; 2136 2137 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2138 return rte_flow_error_set(error, EINVAL, 2139 RTE_FLOW_ERROR_TYPE_ITEM, item, 2140 "protocol filtering not compatible" 2141 " with this GRE layer"); 2142 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2143 return rte_flow_error_set(error, ENOTSUP, 2144 RTE_FLOW_ERROR_TYPE_ITEM, item, 2145 "multiple tunnel layers not" 2146 " supported"); 2147 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2148 return rte_flow_error_set(error, ENOTSUP, 2149 RTE_FLOW_ERROR_TYPE_ITEM, item, 2150 "L3 Layer is missing"); 2151 if (!mask) 2152 mask = &rte_flow_item_gre_mask; 2153 ret = mlx5_flow_item_acceptable 2154 (item, (const uint8_t *)mask, 2155 (const uint8_t *)&nic_mask, 2156 sizeof(struct rte_flow_item_gre), error); 2157 if (ret < 0) 2158 return ret; 2159 #ifndef HAVE_MLX5DV_DR 2160 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT 2161 if (spec && (spec->protocol & mask->protocol)) 2162 return rte_flow_error_set(error, ENOTSUP, 2163 RTE_FLOW_ERROR_TYPE_ITEM, item, 2164 "without MPLS support the" 2165 " specification cannot be used for" 2166 " filtering"); 2167 #endif 2168 #endif 2169 return 0; 2170 } 2171 2172 /** 2173 * Validate Geneve item. 2174 * 2175 * @param[in] item 2176 * Item specification. 2177 * @param[in] itemFlags 2178 * Bit-fields that holds the items detected until now. 2179 * @param[in] enPriv 2180 * Pointer to the private data structure. 2181 * @param[out] error 2182 * Pointer to error structure. 2183 * 2184 * @return 2185 * 0 on success, a negative errno value otherwise and rte_errno is set. 2186 */ 2187 2188 int 2189 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item, 2190 uint64_t item_flags, 2191 struct rte_eth_dev *dev, 2192 struct rte_flow_error *error) 2193 { 2194 struct mlx5_priv *priv = dev->data->dev_private; 2195 const struct rte_flow_item_geneve *spec = item->spec; 2196 const struct rte_flow_item_geneve *mask = item->mask; 2197 int ret; 2198 uint16_t gbhdr; 2199 uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ? 2200 MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0; 2201 const struct rte_flow_item_geneve nic_mask = { 2202 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80), 2203 .vni = "\xff\xff\xff", 2204 .protocol = RTE_BE16(UINT16_MAX), 2205 }; 2206 2207 if (!priv->config.hca_attr.tunnel_stateless_geneve_rx) 2208 return rte_flow_error_set(error, ENOTSUP, 2209 RTE_FLOW_ERROR_TYPE_ITEM, item, 2210 "L3 Geneve is not enabled by device" 2211 " parameter and/or not configured in" 2212 " firmware"); 2213 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2214 return rte_flow_error_set(error, ENOTSUP, 2215 RTE_FLOW_ERROR_TYPE_ITEM, item, 2216 "multiple tunnel layers not" 2217 " supported"); 2218 /* 2219 * Verify only UDPv4 is present as defined in 2220 * https://tools.ietf.org/html/rfc7348 2221 */ 2222 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2223 return rte_flow_error_set(error, EINVAL, 2224 RTE_FLOW_ERROR_TYPE_ITEM, item, 2225 "no outer UDP layer found"); 2226 if (!mask) 2227 mask = &rte_flow_item_geneve_mask; 2228 ret = mlx5_flow_item_acceptable 2229 (item, (const uint8_t *)mask, 2230 (const uint8_t *)&nic_mask, 2231 sizeof(struct rte_flow_item_geneve), error); 2232 if (ret) 2233 return ret; 2234 if (spec) { 2235 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0); 2236 if (MLX5_GENEVE_VER_VAL(gbhdr) || 2237 MLX5_GENEVE_CRITO_VAL(gbhdr) || 2238 MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1) 2239 return rte_flow_error_set(error, ENOTSUP, 2240 RTE_FLOW_ERROR_TYPE_ITEM, 2241 item, 2242 "Geneve protocol unsupported" 2243 " fields are being used"); 2244 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len) 2245 return rte_flow_error_set 2246 (error, ENOTSUP, 2247 RTE_FLOW_ERROR_TYPE_ITEM, 2248 item, 2249 "Unsupported Geneve options length"); 2250 } 2251 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2252 return rte_flow_error_set 2253 (error, ENOTSUP, 2254 RTE_FLOW_ERROR_TYPE_ITEM, item, 2255 "Geneve tunnel must be fully defined"); 2256 return 0; 2257 } 2258 2259 /** 2260 * Validate MPLS item. 2261 * 2262 * @param[in] dev 2263 * Pointer to the rte_eth_dev structure. 2264 * @param[in] item 2265 * Item specification. 2266 * @param[in] item_flags 2267 * Bit-fields that holds the items detected until now. 2268 * @param[in] prev_layer 2269 * The protocol layer indicated in previous item. 2270 * @param[out] error 2271 * Pointer to error structure. 2272 * 2273 * @return 2274 * 0 on success, a negative errno value otherwise and rte_errno is set. 2275 */ 2276 int 2277 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused, 2278 const struct rte_flow_item *item __rte_unused, 2279 uint64_t item_flags __rte_unused, 2280 uint64_t prev_layer __rte_unused, 2281 struct rte_flow_error *error) 2282 { 2283 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 2284 const struct rte_flow_item_mpls *mask = item->mask; 2285 struct mlx5_priv *priv = dev->data->dev_private; 2286 int ret; 2287 2288 if (!priv->config.mpls_en) 2289 return rte_flow_error_set(error, ENOTSUP, 2290 RTE_FLOW_ERROR_TYPE_ITEM, item, 2291 "MPLS not supported or" 2292 " disabled in firmware" 2293 " configuration."); 2294 /* MPLS over IP, UDP, GRE is allowed */ 2295 if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 | 2296 MLX5_FLOW_LAYER_OUTER_L4_UDP | 2297 MLX5_FLOW_LAYER_GRE))) 2298 return rte_flow_error_set(error, EINVAL, 2299 RTE_FLOW_ERROR_TYPE_ITEM, item, 2300 "protocol filtering not compatible" 2301 " with MPLS layer"); 2302 /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */ 2303 if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) && 2304 !(item_flags & MLX5_FLOW_LAYER_GRE)) 2305 return rte_flow_error_set(error, ENOTSUP, 2306 RTE_FLOW_ERROR_TYPE_ITEM, item, 2307 "multiple tunnel layers not" 2308 " supported"); 2309 if (!mask) 2310 mask = &rte_flow_item_mpls_mask; 2311 ret = mlx5_flow_item_acceptable 2312 (item, (const uint8_t *)mask, 2313 (const uint8_t *)&rte_flow_item_mpls_mask, 2314 sizeof(struct rte_flow_item_mpls), error); 2315 if (ret < 0) 2316 return ret; 2317 return 0; 2318 #else 2319 return rte_flow_error_set(error, ENOTSUP, 2320 RTE_FLOW_ERROR_TYPE_ITEM, item, 2321 "MPLS is not supported by Verbs, please" 2322 " update."); 2323 #endif 2324 } 2325 2326 /** 2327 * Validate NVGRE item. 2328 * 2329 * @param[in] item 2330 * Item specification. 2331 * @param[in] item_flags 2332 * Bit flags to mark detected items. 2333 * @param[in] target_protocol 2334 * The next protocol in the previous item. 2335 * @param[out] error 2336 * Pointer to error structure. 2337 * 2338 * @return 2339 * 0 on success, a negative errno value otherwise and rte_errno is set. 2340 */ 2341 int 2342 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item, 2343 uint64_t item_flags, 2344 uint8_t target_protocol, 2345 struct rte_flow_error *error) 2346 { 2347 const struct rte_flow_item_nvgre *mask = item->mask; 2348 int ret; 2349 2350 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2351 return rte_flow_error_set(error, EINVAL, 2352 RTE_FLOW_ERROR_TYPE_ITEM, item, 2353 "protocol filtering not compatible" 2354 " with this GRE layer"); 2355 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2356 return rte_flow_error_set(error, ENOTSUP, 2357 RTE_FLOW_ERROR_TYPE_ITEM, item, 2358 "multiple tunnel layers not" 2359 " supported"); 2360 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2361 return rte_flow_error_set(error, ENOTSUP, 2362 RTE_FLOW_ERROR_TYPE_ITEM, item, 2363 "L3 Layer is missing"); 2364 if (!mask) 2365 mask = &rte_flow_item_nvgre_mask; 2366 ret = mlx5_flow_item_acceptable 2367 (item, (const uint8_t *)mask, 2368 (const uint8_t *)&rte_flow_item_nvgre_mask, 2369 sizeof(struct rte_flow_item_nvgre), error); 2370 if (ret < 0) 2371 return ret; 2372 return 0; 2373 } 2374 2375 /** 2376 * Validate eCPRI item. 2377 * 2378 * @param[in] item 2379 * Item specification. 2380 * @param[in] item_flags 2381 * Bit-fields that holds the items detected until now. 2382 * @param[in] last_item 2383 * Previous validated item in the pattern items. 2384 * @param[in] ether_type 2385 * Type in the ethernet layer header (including dot1q). 2386 * @param[in] acc_mask 2387 * Acceptable mask, if NULL default internal default mask 2388 * will be used to check whether item fields are supported. 2389 * @param[out] error 2390 * Pointer to error structure. 2391 * 2392 * @return 2393 * 0 on success, a negative errno value otherwise and rte_errno is set. 2394 */ 2395 int 2396 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item, 2397 uint64_t item_flags, 2398 uint64_t last_item, 2399 uint16_t ether_type, 2400 const struct rte_flow_item_ecpri *acc_mask, 2401 struct rte_flow_error *error) 2402 { 2403 const struct rte_flow_item_ecpri *mask = item->mask; 2404 const struct rte_flow_item_ecpri nic_mask = { 2405 .hdr = { 2406 .common = { 2407 .u32 = 2408 RTE_BE32(((const struct rte_ecpri_common_hdr) { 2409 .type = 0xFF, 2410 }).u32), 2411 }, 2412 .dummy[0] = 0xFFFFFFFF, 2413 }, 2414 }; 2415 const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 | 2416 MLX5_FLOW_LAYER_OUTER_VLAN); 2417 struct rte_flow_item_ecpri mask_lo; 2418 2419 if ((last_item & outer_l2_vlan) && ether_type && 2420 ether_type != RTE_ETHER_TYPE_ECPRI) 2421 return rte_flow_error_set(error, EINVAL, 2422 RTE_FLOW_ERROR_TYPE_ITEM, item, 2423 "eCPRI cannot follow L2/VLAN layer " 2424 "which ether type is not 0xAEFE."); 2425 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2426 return rte_flow_error_set(error, EINVAL, 2427 RTE_FLOW_ERROR_TYPE_ITEM, item, 2428 "eCPRI with tunnel is not supported " 2429 "right now."); 2430 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3) 2431 return rte_flow_error_set(error, ENOTSUP, 2432 RTE_FLOW_ERROR_TYPE_ITEM, item, 2433 "multiple L3 layers not supported"); 2434 else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP) 2435 return rte_flow_error_set(error, EINVAL, 2436 RTE_FLOW_ERROR_TYPE_ITEM, item, 2437 "eCPRI cannot follow a TCP layer."); 2438 /* In specification, eCPRI could be over UDP layer. */ 2439 else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP) 2440 return rte_flow_error_set(error, EINVAL, 2441 RTE_FLOW_ERROR_TYPE_ITEM, item, 2442 "eCPRI over UDP layer is not yet " 2443 "supported right now."); 2444 /* Mask for type field in common header could be zero. */ 2445 if (!mask) 2446 mask = &rte_flow_item_ecpri_mask; 2447 mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32); 2448 /* Input mask is in big-endian format. */ 2449 if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff) 2450 return rte_flow_error_set(error, EINVAL, 2451 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 2452 "partial mask is not supported " 2453 "for protocol"); 2454 else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0) 2455 return rte_flow_error_set(error, EINVAL, 2456 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 2457 "message header mask must be after " 2458 "a type mask"); 2459 return mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 2460 acc_mask ? (const uint8_t *)acc_mask 2461 : (const uint8_t *)&nic_mask, 2462 sizeof(struct rte_flow_item_ecpri), 2463 error); 2464 } 2465 2466 /* Allocate unique ID for the split Q/RSS subflows. */ 2467 static uint32_t 2468 flow_qrss_get_id(struct rte_eth_dev *dev) 2469 { 2470 struct mlx5_priv *priv = dev->data->dev_private; 2471 uint32_t qrss_id, ret; 2472 2473 ret = mlx5_flow_id_get(priv->qrss_id_pool, &qrss_id); 2474 if (ret) 2475 return 0; 2476 MLX5_ASSERT(qrss_id); 2477 return qrss_id; 2478 } 2479 2480 /* Free unique ID for the split Q/RSS subflows. */ 2481 static void 2482 flow_qrss_free_id(struct rte_eth_dev *dev, uint32_t qrss_id) 2483 { 2484 struct mlx5_priv *priv = dev->data->dev_private; 2485 2486 if (qrss_id) 2487 mlx5_flow_id_release(priv->qrss_id_pool, qrss_id); 2488 } 2489 2490 /** 2491 * Release resource related QUEUE/RSS action split. 2492 * 2493 * @param dev 2494 * Pointer to Ethernet device. 2495 * @param flow 2496 * Flow to release id's from. 2497 */ 2498 static void 2499 flow_mreg_split_qrss_release(struct rte_eth_dev *dev, 2500 struct rte_flow *flow) 2501 { 2502 struct mlx5_priv *priv = dev->data->dev_private; 2503 uint32_t handle_idx; 2504 struct mlx5_flow_handle *dev_handle; 2505 2506 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 2507 handle_idx, dev_handle, next) 2508 if (dev_handle->split_flow_id) 2509 flow_qrss_free_id(dev, dev_handle->split_flow_id); 2510 } 2511 2512 static int 2513 flow_null_validate(struct rte_eth_dev *dev __rte_unused, 2514 const struct rte_flow_attr *attr __rte_unused, 2515 const struct rte_flow_item items[] __rte_unused, 2516 const struct rte_flow_action actions[] __rte_unused, 2517 bool external __rte_unused, 2518 int hairpin __rte_unused, 2519 struct rte_flow_error *error) 2520 { 2521 return rte_flow_error_set(error, ENOTSUP, 2522 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2523 } 2524 2525 static struct mlx5_flow * 2526 flow_null_prepare(struct rte_eth_dev *dev __rte_unused, 2527 const struct rte_flow_attr *attr __rte_unused, 2528 const struct rte_flow_item items[] __rte_unused, 2529 const struct rte_flow_action actions[] __rte_unused, 2530 struct rte_flow_error *error) 2531 { 2532 rte_flow_error_set(error, ENOTSUP, 2533 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2534 return NULL; 2535 } 2536 2537 static int 2538 flow_null_translate(struct rte_eth_dev *dev __rte_unused, 2539 struct mlx5_flow *dev_flow __rte_unused, 2540 const struct rte_flow_attr *attr __rte_unused, 2541 const struct rte_flow_item items[] __rte_unused, 2542 const struct rte_flow_action actions[] __rte_unused, 2543 struct rte_flow_error *error) 2544 { 2545 return rte_flow_error_set(error, ENOTSUP, 2546 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2547 } 2548 2549 static int 2550 flow_null_apply(struct rte_eth_dev *dev __rte_unused, 2551 struct rte_flow *flow __rte_unused, 2552 struct rte_flow_error *error) 2553 { 2554 return rte_flow_error_set(error, ENOTSUP, 2555 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2556 } 2557 2558 static void 2559 flow_null_remove(struct rte_eth_dev *dev __rte_unused, 2560 struct rte_flow *flow __rte_unused) 2561 { 2562 } 2563 2564 static void 2565 flow_null_destroy(struct rte_eth_dev *dev __rte_unused, 2566 struct rte_flow *flow __rte_unused) 2567 { 2568 } 2569 2570 static int 2571 flow_null_query(struct rte_eth_dev *dev __rte_unused, 2572 struct rte_flow *flow __rte_unused, 2573 const struct rte_flow_action *actions __rte_unused, 2574 void *data __rte_unused, 2575 struct rte_flow_error *error) 2576 { 2577 return rte_flow_error_set(error, ENOTSUP, 2578 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2579 } 2580 2581 /* Void driver to protect from null pointer reference. */ 2582 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = { 2583 .validate = flow_null_validate, 2584 .prepare = flow_null_prepare, 2585 .translate = flow_null_translate, 2586 .apply = flow_null_apply, 2587 .remove = flow_null_remove, 2588 .destroy = flow_null_destroy, 2589 .query = flow_null_query, 2590 }; 2591 2592 /** 2593 * Select flow driver type according to flow attributes and device 2594 * configuration. 2595 * 2596 * @param[in] dev 2597 * Pointer to the dev structure. 2598 * @param[in] attr 2599 * Pointer to the flow attributes. 2600 * 2601 * @return 2602 * flow driver type, MLX5_FLOW_TYPE_MAX otherwise. 2603 */ 2604 static enum mlx5_flow_drv_type 2605 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr) 2606 { 2607 struct mlx5_priv *priv = dev->data->dev_private; 2608 /* The OS can determine first a specific flow type (DV, VERBS) */ 2609 enum mlx5_flow_drv_type type = mlx5_flow_os_get_type(); 2610 2611 if (type != MLX5_FLOW_TYPE_MAX) 2612 return type; 2613 /* If no OS specific type - continue with DV/VERBS selection */ 2614 if (attr->transfer && priv->config.dv_esw_en) 2615 type = MLX5_FLOW_TYPE_DV; 2616 if (!attr->transfer) 2617 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV : 2618 MLX5_FLOW_TYPE_VERBS; 2619 return type; 2620 } 2621 2622 #define flow_get_drv_ops(type) flow_drv_ops[type] 2623 2624 /** 2625 * Flow driver validation API. This abstracts calling driver specific functions. 2626 * The type of flow driver is determined according to flow attributes. 2627 * 2628 * @param[in] dev 2629 * Pointer to the dev structure. 2630 * @param[in] attr 2631 * Pointer to the flow attributes. 2632 * @param[in] items 2633 * Pointer to the list of items. 2634 * @param[in] actions 2635 * Pointer to the list of actions. 2636 * @param[in] external 2637 * This flow rule is created by request external to PMD. 2638 * @param[in] hairpin 2639 * Number of hairpin TX actions, 0 means classic flow. 2640 * @param[out] error 2641 * Pointer to the error structure. 2642 * 2643 * @return 2644 * 0 on success, a negative errno value otherwise and rte_errno is set. 2645 */ 2646 static inline int 2647 flow_drv_validate(struct rte_eth_dev *dev, 2648 const struct rte_flow_attr *attr, 2649 const struct rte_flow_item items[], 2650 const struct rte_flow_action actions[], 2651 bool external, int hairpin, struct rte_flow_error *error) 2652 { 2653 const struct mlx5_flow_driver_ops *fops; 2654 enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr); 2655 2656 fops = flow_get_drv_ops(type); 2657 return fops->validate(dev, attr, items, actions, external, 2658 hairpin, error); 2659 } 2660 2661 /** 2662 * Flow driver preparation API. This abstracts calling driver specific 2663 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 2664 * calculates the size of memory required for device flow, allocates the memory, 2665 * initializes the device flow and returns the pointer. 2666 * 2667 * @note 2668 * This function initializes device flow structure such as dv or verbs in 2669 * struct mlx5_flow. However, it is caller's responsibility to initialize the 2670 * rest. For example, adding returning device flow to flow->dev_flow list and 2671 * setting backward reference to the flow should be done out of this function. 2672 * layers field is not filled either. 2673 * 2674 * @param[in] dev 2675 * Pointer to the dev structure. 2676 * @param[in] attr 2677 * Pointer to the flow attributes. 2678 * @param[in] items 2679 * Pointer to the list of items. 2680 * @param[in] actions 2681 * Pointer to the list of actions. 2682 * @param[in] flow_idx 2683 * This memory pool index to the flow. 2684 * @param[out] error 2685 * Pointer to the error structure. 2686 * 2687 * @return 2688 * Pointer to device flow on success, otherwise NULL and rte_errno is set. 2689 */ 2690 static inline struct mlx5_flow * 2691 flow_drv_prepare(struct rte_eth_dev *dev, 2692 const struct rte_flow *flow, 2693 const struct rte_flow_attr *attr, 2694 const struct rte_flow_item items[], 2695 const struct rte_flow_action actions[], 2696 uint32_t flow_idx, 2697 struct rte_flow_error *error) 2698 { 2699 const struct mlx5_flow_driver_ops *fops; 2700 enum mlx5_flow_drv_type type = flow->drv_type; 2701 struct mlx5_flow *mlx5_flow = NULL; 2702 2703 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2704 fops = flow_get_drv_ops(type); 2705 mlx5_flow = fops->prepare(dev, attr, items, actions, error); 2706 if (mlx5_flow) 2707 mlx5_flow->flow_idx = flow_idx; 2708 return mlx5_flow; 2709 } 2710 2711 /** 2712 * Flow driver translation API. This abstracts calling driver specific 2713 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 2714 * translates a generic flow into a driver flow. flow_drv_prepare() must 2715 * precede. 2716 * 2717 * @note 2718 * dev_flow->layers could be filled as a result of parsing during translation 2719 * if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled 2720 * if necessary. As a flow can have multiple dev_flows by RSS flow expansion, 2721 * flow->actions could be overwritten even though all the expanded dev_flows 2722 * have the same actions. 2723 * 2724 * @param[in] dev 2725 * Pointer to the rte dev structure. 2726 * @param[in, out] dev_flow 2727 * Pointer to the mlx5 flow. 2728 * @param[in] attr 2729 * Pointer to the flow attributes. 2730 * @param[in] items 2731 * Pointer to the list of items. 2732 * @param[in] actions 2733 * Pointer to the list of actions. 2734 * @param[out] error 2735 * Pointer to the error structure. 2736 * 2737 * @return 2738 * 0 on success, a negative errno value otherwise and rte_errno is set. 2739 */ 2740 static inline int 2741 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, 2742 const struct rte_flow_attr *attr, 2743 const struct rte_flow_item items[], 2744 const struct rte_flow_action actions[], 2745 struct rte_flow_error *error) 2746 { 2747 const struct mlx5_flow_driver_ops *fops; 2748 enum mlx5_flow_drv_type type = dev_flow->flow->drv_type; 2749 2750 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2751 fops = flow_get_drv_ops(type); 2752 return fops->translate(dev, dev_flow, attr, items, actions, error); 2753 } 2754 2755 /** 2756 * Flow driver apply API. This abstracts calling driver specific functions. 2757 * Parent flow (rte_flow) should have driver type (drv_type). It applies 2758 * translated driver flows on to device. flow_drv_translate() must precede. 2759 * 2760 * @param[in] dev 2761 * Pointer to Ethernet device structure. 2762 * @param[in, out] flow 2763 * Pointer to flow structure. 2764 * @param[out] error 2765 * Pointer to error structure. 2766 * 2767 * @return 2768 * 0 on success, a negative errno value otherwise and rte_errno is set. 2769 */ 2770 static inline int 2771 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, 2772 struct rte_flow_error *error) 2773 { 2774 const struct mlx5_flow_driver_ops *fops; 2775 enum mlx5_flow_drv_type type = flow->drv_type; 2776 2777 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2778 fops = flow_get_drv_ops(type); 2779 return fops->apply(dev, flow, error); 2780 } 2781 2782 /** 2783 * Flow driver remove API. This abstracts calling driver specific functions. 2784 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 2785 * on device. All the resources of the flow should be freed by calling 2786 * flow_drv_destroy(). 2787 * 2788 * @param[in] dev 2789 * Pointer to Ethernet device. 2790 * @param[in, out] flow 2791 * Pointer to flow structure. 2792 */ 2793 static inline void 2794 flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow) 2795 { 2796 const struct mlx5_flow_driver_ops *fops; 2797 enum mlx5_flow_drv_type type = flow->drv_type; 2798 2799 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2800 fops = flow_get_drv_ops(type); 2801 fops->remove(dev, flow); 2802 } 2803 2804 /** 2805 * Flow driver destroy API. This abstracts calling driver specific functions. 2806 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 2807 * on device and releases resources of the flow. 2808 * 2809 * @param[in] dev 2810 * Pointer to Ethernet device. 2811 * @param[in, out] flow 2812 * Pointer to flow structure. 2813 */ 2814 static inline void 2815 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) 2816 { 2817 const struct mlx5_flow_driver_ops *fops; 2818 enum mlx5_flow_drv_type type = flow->drv_type; 2819 2820 flow_mreg_split_qrss_release(dev, flow); 2821 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2822 fops = flow_get_drv_ops(type); 2823 fops->destroy(dev, flow); 2824 } 2825 2826 /** 2827 * Get RSS action from the action list. 2828 * 2829 * @param[in] actions 2830 * Pointer to the list of actions. 2831 * 2832 * @return 2833 * Pointer to the RSS action if exist, else return NULL. 2834 */ 2835 static const struct rte_flow_action_rss* 2836 flow_get_rss_action(const struct rte_flow_action actions[]) 2837 { 2838 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2839 switch (actions->type) { 2840 case RTE_FLOW_ACTION_TYPE_RSS: 2841 return (const struct rte_flow_action_rss *) 2842 actions->conf; 2843 default: 2844 break; 2845 } 2846 } 2847 return NULL; 2848 } 2849 2850 static unsigned int 2851 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level) 2852 { 2853 const struct rte_flow_item *item; 2854 unsigned int has_vlan = 0; 2855 2856 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 2857 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { 2858 has_vlan = 1; 2859 break; 2860 } 2861 } 2862 if (has_vlan) 2863 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN : 2864 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN; 2865 return rss_level < 2 ? MLX5_EXPANSION_ROOT : 2866 MLX5_EXPANSION_ROOT_OUTER; 2867 } 2868 2869 /** 2870 * Get layer flags from the prefix flow. 2871 * 2872 * Some flows may be split to several subflows, the prefix subflow gets the 2873 * match items and the suffix sub flow gets the actions. 2874 * Some actions need the user defined match item flags to get the detail for 2875 * the action. 2876 * This function helps the suffix flow to get the item layer flags from prefix 2877 * subflow. 2878 * 2879 * @param[in] dev_flow 2880 * Pointer the created preifx subflow. 2881 * 2882 * @return 2883 * The layers get from prefix subflow. 2884 */ 2885 static inline uint64_t 2886 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow) 2887 { 2888 uint64_t layers = 0; 2889 2890 /* 2891 * Layers bits could be localization, but usually the compiler will 2892 * help to do the optimization work for source code. 2893 * If no decap actions, use the layers directly. 2894 */ 2895 if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP)) 2896 return dev_flow->handle->layers; 2897 /* Convert L3 layers with decap action. */ 2898 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4) 2899 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4; 2900 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6) 2901 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6; 2902 /* Convert L4 layers with decap action. */ 2903 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP) 2904 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP; 2905 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP) 2906 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP; 2907 return layers; 2908 } 2909 2910 /** 2911 * Get metadata split action information. 2912 * 2913 * @param[in] actions 2914 * Pointer to the list of actions. 2915 * @param[out] qrss 2916 * Pointer to the return pointer. 2917 * @param[out] qrss_type 2918 * Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned 2919 * if no QUEUE/RSS is found. 2920 * @param[out] encap_idx 2921 * Pointer to the index of the encap action if exists, otherwise the last 2922 * action index. 2923 * 2924 * @return 2925 * Total number of actions. 2926 */ 2927 static int 2928 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[], 2929 const struct rte_flow_action **qrss, 2930 int *encap_idx) 2931 { 2932 const struct rte_flow_action_raw_encap *raw_encap; 2933 int actions_n = 0; 2934 int raw_decap_idx = -1; 2935 2936 *encap_idx = -1; 2937 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2938 switch (actions->type) { 2939 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 2940 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 2941 *encap_idx = actions_n; 2942 break; 2943 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 2944 raw_decap_idx = actions_n; 2945 break; 2946 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 2947 raw_encap = actions->conf; 2948 if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 2949 *encap_idx = raw_decap_idx != -1 ? 2950 raw_decap_idx : actions_n; 2951 break; 2952 case RTE_FLOW_ACTION_TYPE_QUEUE: 2953 case RTE_FLOW_ACTION_TYPE_RSS: 2954 *qrss = actions; 2955 break; 2956 default: 2957 break; 2958 } 2959 actions_n++; 2960 } 2961 if (*encap_idx == -1) 2962 *encap_idx = actions_n; 2963 /* Count RTE_FLOW_ACTION_TYPE_END. */ 2964 return actions_n + 1; 2965 } 2966 2967 /** 2968 * Check meter action from the action list. 2969 * 2970 * @param[in] actions 2971 * Pointer to the list of actions. 2972 * @param[out] mtr 2973 * Pointer to the meter exist flag. 2974 * 2975 * @return 2976 * Total number of actions. 2977 */ 2978 static int 2979 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr) 2980 { 2981 int actions_n = 0; 2982 2983 MLX5_ASSERT(mtr); 2984 *mtr = 0; 2985 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2986 switch (actions->type) { 2987 case RTE_FLOW_ACTION_TYPE_METER: 2988 *mtr = 1; 2989 break; 2990 default: 2991 break; 2992 } 2993 actions_n++; 2994 } 2995 /* Count RTE_FLOW_ACTION_TYPE_END. */ 2996 return actions_n + 1; 2997 } 2998 2999 /** 3000 * Check if the flow should be splited due to hairpin. 3001 * The reason for the split is that in current HW we can't 3002 * support encap on Rx, so if a flow have encap we move it 3003 * to Tx. 3004 * 3005 * @param dev 3006 * Pointer to Ethernet device. 3007 * @param[in] attr 3008 * Flow rule attributes. 3009 * @param[in] actions 3010 * Associated actions (list terminated by the END action). 3011 * 3012 * @return 3013 * > 0 the number of actions and the flow should be split, 3014 * 0 when no split required. 3015 */ 3016 static int 3017 flow_check_hairpin_split(struct rte_eth_dev *dev, 3018 const struct rte_flow_attr *attr, 3019 const struct rte_flow_action actions[]) 3020 { 3021 int queue_action = 0; 3022 int action_n = 0; 3023 int encap = 0; 3024 const struct rte_flow_action_queue *queue; 3025 const struct rte_flow_action_rss *rss; 3026 const struct rte_flow_action_raw_encap *raw_encap; 3027 3028 if (!attr->ingress) 3029 return 0; 3030 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3031 switch (actions->type) { 3032 case RTE_FLOW_ACTION_TYPE_QUEUE: 3033 queue = actions->conf; 3034 if (queue == NULL) 3035 return 0; 3036 if (mlx5_rxq_get_type(dev, queue->index) != 3037 MLX5_RXQ_TYPE_HAIRPIN) 3038 return 0; 3039 queue_action = 1; 3040 action_n++; 3041 break; 3042 case RTE_FLOW_ACTION_TYPE_RSS: 3043 rss = actions->conf; 3044 if (rss == NULL || rss->queue_num == 0) 3045 return 0; 3046 if (mlx5_rxq_get_type(dev, rss->queue[0]) != 3047 MLX5_RXQ_TYPE_HAIRPIN) 3048 return 0; 3049 queue_action = 1; 3050 action_n++; 3051 break; 3052 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3053 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3054 encap = 1; 3055 action_n++; 3056 break; 3057 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3058 raw_encap = actions->conf; 3059 if (raw_encap->size > 3060 (sizeof(struct rte_flow_item_eth) + 3061 sizeof(struct rte_flow_item_ipv4))) 3062 encap = 1; 3063 action_n++; 3064 break; 3065 default: 3066 action_n++; 3067 break; 3068 } 3069 } 3070 if (encap == 1 && queue_action) 3071 return action_n; 3072 return 0; 3073 } 3074 3075 /* Declare flow create/destroy prototype in advance. */ 3076 static uint32_t 3077 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 3078 const struct rte_flow_attr *attr, 3079 const struct rte_flow_item items[], 3080 const struct rte_flow_action actions[], 3081 bool external, struct rte_flow_error *error); 3082 3083 static void 3084 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 3085 uint32_t flow_idx); 3086 3087 /** 3088 * Add a flow of copying flow metadata registers in RX_CP_TBL. 3089 * 3090 * As mark_id is unique, if there's already a registered flow for the mark_id, 3091 * return by increasing the reference counter of the resource. Otherwise, create 3092 * the resource (mcp_res) and flow. 3093 * 3094 * Flow looks like, 3095 * - If ingress port is ANY and reg_c[1] is mark_id, 3096 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3097 * 3098 * For default flow (zero mark_id), flow is like, 3099 * - If ingress port is ANY, 3100 * reg_b := reg_c[0] and jump to RX_ACT_TBL. 3101 * 3102 * @param dev 3103 * Pointer to Ethernet device. 3104 * @param mark_id 3105 * ID of MARK action, zero means default flow for META. 3106 * @param[out] error 3107 * Perform verbose error reporting if not NULL. 3108 * 3109 * @return 3110 * Associated resource on success, NULL otherwise and rte_errno is set. 3111 */ 3112 static struct mlx5_flow_mreg_copy_resource * 3113 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id, 3114 struct rte_flow_error *error) 3115 { 3116 struct mlx5_priv *priv = dev->data->dev_private; 3117 struct rte_flow_attr attr = { 3118 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 3119 .ingress = 1, 3120 }; 3121 struct mlx5_rte_flow_item_tag tag_spec = { 3122 .data = mark_id, 3123 }; 3124 struct rte_flow_item items[] = { 3125 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, }, 3126 }; 3127 struct rte_flow_action_mark ftag = { 3128 .id = mark_id, 3129 }; 3130 struct mlx5_flow_action_copy_mreg cp_mreg = { 3131 .dst = REG_B, 3132 .src = 0, 3133 }; 3134 struct rte_flow_action_jump jump = { 3135 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 3136 }; 3137 struct rte_flow_action actions[] = { 3138 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, }, 3139 }; 3140 struct mlx5_flow_mreg_copy_resource *mcp_res; 3141 uint32_t idx = 0; 3142 int ret; 3143 3144 /* Fill the register fileds in the flow. */ 3145 ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error); 3146 if (ret < 0) 3147 return NULL; 3148 tag_spec.id = ret; 3149 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 3150 if (ret < 0) 3151 return NULL; 3152 cp_mreg.src = ret; 3153 /* Check if already registered. */ 3154 MLX5_ASSERT(priv->mreg_cp_tbl); 3155 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id); 3156 if (mcp_res) { 3157 /* For non-default rule. */ 3158 if (mark_id != MLX5_DEFAULT_COPY_ID) 3159 mcp_res->refcnt++; 3160 MLX5_ASSERT(mark_id != MLX5_DEFAULT_COPY_ID || 3161 mcp_res->refcnt == 1); 3162 return mcp_res; 3163 } 3164 /* Provide the full width of FLAG specific value. */ 3165 if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT)) 3166 tag_spec.data = MLX5_FLOW_MARK_DEFAULT; 3167 /* Build a new flow. */ 3168 if (mark_id != MLX5_DEFAULT_COPY_ID) { 3169 items[0] = (struct rte_flow_item){ 3170 .type = (enum rte_flow_item_type) 3171 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 3172 .spec = &tag_spec, 3173 }; 3174 items[1] = (struct rte_flow_item){ 3175 .type = RTE_FLOW_ITEM_TYPE_END, 3176 }; 3177 actions[0] = (struct rte_flow_action){ 3178 .type = (enum rte_flow_action_type) 3179 MLX5_RTE_FLOW_ACTION_TYPE_MARK, 3180 .conf = &ftag, 3181 }; 3182 actions[1] = (struct rte_flow_action){ 3183 .type = (enum rte_flow_action_type) 3184 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3185 .conf = &cp_mreg, 3186 }; 3187 actions[2] = (struct rte_flow_action){ 3188 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3189 .conf = &jump, 3190 }; 3191 actions[3] = (struct rte_flow_action){ 3192 .type = RTE_FLOW_ACTION_TYPE_END, 3193 }; 3194 } else { 3195 /* Default rule, wildcard match. */ 3196 attr.priority = MLX5_FLOW_PRIO_RSVD; 3197 items[0] = (struct rte_flow_item){ 3198 .type = RTE_FLOW_ITEM_TYPE_END, 3199 }; 3200 actions[0] = (struct rte_flow_action){ 3201 .type = (enum rte_flow_action_type) 3202 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3203 .conf = &cp_mreg, 3204 }; 3205 actions[1] = (struct rte_flow_action){ 3206 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3207 .conf = &jump, 3208 }; 3209 actions[2] = (struct rte_flow_action){ 3210 .type = RTE_FLOW_ACTION_TYPE_END, 3211 }; 3212 } 3213 /* Build a new entry. */ 3214 mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx); 3215 if (!mcp_res) { 3216 rte_errno = ENOMEM; 3217 return NULL; 3218 } 3219 mcp_res->idx = idx; 3220 /* 3221 * The copy Flows are not included in any list. There 3222 * ones are referenced from other Flows and can not 3223 * be applied, removed, deleted in ardbitrary order 3224 * by list traversing. 3225 */ 3226 mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items, 3227 actions, false, error); 3228 if (!mcp_res->rix_flow) 3229 goto error; 3230 mcp_res->refcnt++; 3231 mcp_res->hlist_ent.key = mark_id; 3232 ret = mlx5_hlist_insert(priv->mreg_cp_tbl, 3233 &mcp_res->hlist_ent); 3234 MLX5_ASSERT(!ret); 3235 if (ret) 3236 goto error; 3237 return mcp_res; 3238 error: 3239 if (mcp_res->rix_flow) 3240 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3241 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3242 return NULL; 3243 } 3244 3245 /** 3246 * Release flow in RX_CP_TBL. 3247 * 3248 * @param dev 3249 * Pointer to Ethernet device. 3250 * @flow 3251 * Parent flow for wich copying is provided. 3252 */ 3253 static void 3254 flow_mreg_del_copy_action(struct rte_eth_dev *dev, 3255 struct rte_flow *flow) 3256 { 3257 struct mlx5_flow_mreg_copy_resource *mcp_res; 3258 struct mlx5_priv *priv = dev->data->dev_private; 3259 3260 if (!flow->rix_mreg_copy) 3261 return; 3262 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3263 flow->rix_mreg_copy); 3264 if (!mcp_res || !priv->mreg_cp_tbl) 3265 return; 3266 if (flow->copy_applied) { 3267 MLX5_ASSERT(mcp_res->appcnt); 3268 flow->copy_applied = 0; 3269 --mcp_res->appcnt; 3270 if (!mcp_res->appcnt) { 3271 struct rte_flow *mcp_flow = mlx5_ipool_get 3272 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3273 mcp_res->rix_flow); 3274 3275 if (mcp_flow) 3276 flow_drv_remove(dev, mcp_flow); 3277 } 3278 } 3279 /* 3280 * We do not check availability of metadata registers here, 3281 * because copy resources are not allocated in this case. 3282 */ 3283 if (--mcp_res->refcnt) 3284 return; 3285 MLX5_ASSERT(mcp_res->rix_flow); 3286 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3287 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 3288 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3289 flow->rix_mreg_copy = 0; 3290 } 3291 3292 /** 3293 * Start flow in RX_CP_TBL. 3294 * 3295 * @param dev 3296 * Pointer to Ethernet device. 3297 * @flow 3298 * Parent flow for wich copying is provided. 3299 * 3300 * @return 3301 * 0 on success, a negative errno value otherwise and rte_errno is set. 3302 */ 3303 static int 3304 flow_mreg_start_copy_action(struct rte_eth_dev *dev, 3305 struct rte_flow *flow) 3306 { 3307 struct mlx5_flow_mreg_copy_resource *mcp_res; 3308 struct mlx5_priv *priv = dev->data->dev_private; 3309 int ret; 3310 3311 if (!flow->rix_mreg_copy || flow->copy_applied) 3312 return 0; 3313 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3314 flow->rix_mreg_copy); 3315 if (!mcp_res) 3316 return 0; 3317 if (!mcp_res->appcnt) { 3318 struct rte_flow *mcp_flow = mlx5_ipool_get 3319 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3320 mcp_res->rix_flow); 3321 3322 if (mcp_flow) { 3323 ret = flow_drv_apply(dev, mcp_flow, NULL); 3324 if (ret) 3325 return ret; 3326 } 3327 } 3328 ++mcp_res->appcnt; 3329 flow->copy_applied = 1; 3330 return 0; 3331 } 3332 3333 /** 3334 * Stop flow in RX_CP_TBL. 3335 * 3336 * @param dev 3337 * Pointer to Ethernet device. 3338 * @flow 3339 * Parent flow for wich copying is provided. 3340 */ 3341 static void 3342 flow_mreg_stop_copy_action(struct rte_eth_dev *dev, 3343 struct rte_flow *flow) 3344 { 3345 struct mlx5_flow_mreg_copy_resource *mcp_res; 3346 struct mlx5_priv *priv = dev->data->dev_private; 3347 3348 if (!flow->rix_mreg_copy || !flow->copy_applied) 3349 return; 3350 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3351 flow->rix_mreg_copy); 3352 if (!mcp_res) 3353 return; 3354 MLX5_ASSERT(mcp_res->appcnt); 3355 --mcp_res->appcnt; 3356 flow->copy_applied = 0; 3357 if (!mcp_res->appcnt) { 3358 struct rte_flow *mcp_flow = mlx5_ipool_get 3359 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3360 mcp_res->rix_flow); 3361 3362 if (mcp_flow) 3363 flow_drv_remove(dev, mcp_flow); 3364 } 3365 } 3366 3367 /** 3368 * Remove the default copy action from RX_CP_TBL. 3369 * 3370 * @param dev 3371 * Pointer to Ethernet device. 3372 */ 3373 static void 3374 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev) 3375 { 3376 struct mlx5_flow_mreg_copy_resource *mcp_res; 3377 struct mlx5_priv *priv = dev->data->dev_private; 3378 3379 /* Check if default flow is registered. */ 3380 if (!priv->mreg_cp_tbl) 3381 return; 3382 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, 3383 MLX5_DEFAULT_COPY_ID); 3384 if (!mcp_res) 3385 return; 3386 MLX5_ASSERT(mcp_res->rix_flow); 3387 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3388 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 3389 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3390 } 3391 3392 /** 3393 * Add the default copy action in in RX_CP_TBL. 3394 * 3395 * @param dev 3396 * Pointer to Ethernet device. 3397 * @param[out] error 3398 * Perform verbose error reporting if not NULL. 3399 * 3400 * @return 3401 * 0 for success, negative value otherwise and rte_errno is set. 3402 */ 3403 static int 3404 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev, 3405 struct rte_flow_error *error) 3406 { 3407 struct mlx5_priv *priv = dev->data->dev_private; 3408 struct mlx5_flow_mreg_copy_resource *mcp_res; 3409 3410 /* Check whether extensive metadata feature is engaged. */ 3411 if (!priv->config.dv_flow_en || 3412 priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3413 !mlx5_flow_ext_mreg_supported(dev) || 3414 !priv->sh->dv_regc0_mask) 3415 return 0; 3416 mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error); 3417 if (!mcp_res) 3418 return -rte_errno; 3419 return 0; 3420 } 3421 3422 /** 3423 * Add a flow of copying flow metadata registers in RX_CP_TBL. 3424 * 3425 * All the flow having Q/RSS action should be split by 3426 * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL 3427 * performs the following, 3428 * - CQE->flow_tag := reg_c[1] (MARK) 3429 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 3430 * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1] 3431 * but there should be a flow per each MARK ID set by MARK action. 3432 * 3433 * For the aforementioned reason, if there's a MARK action in flow's action 3434 * list, a corresponding flow should be added to the RX_CP_TBL in order to copy 3435 * the MARK ID to CQE's flow_tag like, 3436 * - If reg_c[1] is mark_id, 3437 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3438 * 3439 * For SET_META action which stores value in reg_c[0], as the destination is 3440 * also a flow metadata register (reg_b), adding a default flow is enough. Zero 3441 * MARK ID means the default flow. The default flow looks like, 3442 * - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3443 * 3444 * @param dev 3445 * Pointer to Ethernet device. 3446 * @param flow 3447 * Pointer to flow structure. 3448 * @param[in] actions 3449 * Pointer to the list of actions. 3450 * @param[out] error 3451 * Perform verbose error reporting if not NULL. 3452 * 3453 * @return 3454 * 0 on success, negative value otherwise and rte_errno is set. 3455 */ 3456 static int 3457 flow_mreg_update_copy_table(struct rte_eth_dev *dev, 3458 struct rte_flow *flow, 3459 const struct rte_flow_action *actions, 3460 struct rte_flow_error *error) 3461 { 3462 struct mlx5_priv *priv = dev->data->dev_private; 3463 struct mlx5_dev_config *config = &priv->config; 3464 struct mlx5_flow_mreg_copy_resource *mcp_res; 3465 const struct rte_flow_action_mark *mark; 3466 3467 /* Check whether extensive metadata feature is engaged. */ 3468 if (!config->dv_flow_en || 3469 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3470 !mlx5_flow_ext_mreg_supported(dev) || 3471 !priv->sh->dv_regc0_mask) 3472 return 0; 3473 /* Find MARK action. */ 3474 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3475 switch (actions->type) { 3476 case RTE_FLOW_ACTION_TYPE_FLAG: 3477 mcp_res = flow_mreg_add_copy_action 3478 (dev, MLX5_FLOW_MARK_DEFAULT, error); 3479 if (!mcp_res) 3480 return -rte_errno; 3481 flow->rix_mreg_copy = mcp_res->idx; 3482 if (dev->data->dev_started) { 3483 mcp_res->appcnt++; 3484 flow->copy_applied = 1; 3485 } 3486 return 0; 3487 case RTE_FLOW_ACTION_TYPE_MARK: 3488 mark = (const struct rte_flow_action_mark *) 3489 actions->conf; 3490 mcp_res = 3491 flow_mreg_add_copy_action(dev, mark->id, error); 3492 if (!mcp_res) 3493 return -rte_errno; 3494 flow->rix_mreg_copy = mcp_res->idx; 3495 if (dev->data->dev_started) { 3496 mcp_res->appcnt++; 3497 flow->copy_applied = 1; 3498 } 3499 return 0; 3500 default: 3501 break; 3502 } 3503 } 3504 return 0; 3505 } 3506 3507 #define MLX5_MAX_SPLIT_ACTIONS 24 3508 #define MLX5_MAX_SPLIT_ITEMS 24 3509 3510 /** 3511 * Split the hairpin flow. 3512 * Since HW can't support encap on Rx we move the encap to Tx. 3513 * If the count action is after the encap then we also 3514 * move the count action. in this case the count will also measure 3515 * the outer bytes. 3516 * 3517 * @param dev 3518 * Pointer to Ethernet device. 3519 * @param[in] actions 3520 * Associated actions (list terminated by the END action). 3521 * @param[out] actions_rx 3522 * Rx flow actions. 3523 * @param[out] actions_tx 3524 * Tx flow actions.. 3525 * @param[out] pattern_tx 3526 * The pattern items for the Tx flow. 3527 * @param[out] flow_id 3528 * The flow ID connected to this flow. 3529 * 3530 * @return 3531 * 0 on success. 3532 */ 3533 static int 3534 flow_hairpin_split(struct rte_eth_dev *dev, 3535 const struct rte_flow_action actions[], 3536 struct rte_flow_action actions_rx[], 3537 struct rte_flow_action actions_tx[], 3538 struct rte_flow_item pattern_tx[], 3539 uint32_t *flow_id) 3540 { 3541 struct mlx5_priv *priv = dev->data->dev_private; 3542 const struct rte_flow_action_raw_encap *raw_encap; 3543 const struct rte_flow_action_raw_decap *raw_decap; 3544 struct mlx5_rte_flow_action_set_tag *set_tag; 3545 struct rte_flow_action *tag_action; 3546 struct mlx5_rte_flow_item_tag *tag_item; 3547 struct rte_flow_item *item; 3548 char *addr; 3549 int encap = 0; 3550 3551 mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id); 3552 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3553 switch (actions->type) { 3554 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3555 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3556 rte_memcpy(actions_tx, actions, 3557 sizeof(struct rte_flow_action)); 3558 actions_tx++; 3559 break; 3560 case RTE_FLOW_ACTION_TYPE_COUNT: 3561 if (encap) { 3562 rte_memcpy(actions_tx, actions, 3563 sizeof(struct rte_flow_action)); 3564 actions_tx++; 3565 } else { 3566 rte_memcpy(actions_rx, actions, 3567 sizeof(struct rte_flow_action)); 3568 actions_rx++; 3569 } 3570 break; 3571 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3572 raw_encap = actions->conf; 3573 if (raw_encap->size > 3574 (sizeof(struct rte_flow_item_eth) + 3575 sizeof(struct rte_flow_item_ipv4))) { 3576 memcpy(actions_tx, actions, 3577 sizeof(struct rte_flow_action)); 3578 actions_tx++; 3579 encap = 1; 3580 } else { 3581 rte_memcpy(actions_rx, actions, 3582 sizeof(struct rte_flow_action)); 3583 actions_rx++; 3584 } 3585 break; 3586 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3587 raw_decap = actions->conf; 3588 if (raw_decap->size < 3589 (sizeof(struct rte_flow_item_eth) + 3590 sizeof(struct rte_flow_item_ipv4))) { 3591 memcpy(actions_tx, actions, 3592 sizeof(struct rte_flow_action)); 3593 actions_tx++; 3594 } else { 3595 rte_memcpy(actions_rx, actions, 3596 sizeof(struct rte_flow_action)); 3597 actions_rx++; 3598 } 3599 break; 3600 default: 3601 rte_memcpy(actions_rx, actions, 3602 sizeof(struct rte_flow_action)); 3603 actions_rx++; 3604 break; 3605 } 3606 } 3607 /* Add set meta action and end action for the Rx flow. */ 3608 tag_action = actions_rx; 3609 tag_action->type = (enum rte_flow_action_type) 3610 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 3611 actions_rx++; 3612 rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action)); 3613 actions_rx++; 3614 set_tag = (void *)actions_rx; 3615 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL); 3616 MLX5_ASSERT(set_tag->id > REG_NONE); 3617 set_tag->data = *flow_id; 3618 tag_action->conf = set_tag; 3619 /* Create Tx item list. */ 3620 rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action)); 3621 addr = (void *)&pattern_tx[2]; 3622 item = pattern_tx; 3623 item->type = (enum rte_flow_item_type) 3624 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 3625 tag_item = (void *)addr; 3626 tag_item->data = *flow_id; 3627 tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL); 3628 MLX5_ASSERT(set_tag->id > REG_NONE); 3629 item->spec = tag_item; 3630 addr += sizeof(struct mlx5_rte_flow_item_tag); 3631 tag_item = (void *)addr; 3632 tag_item->data = UINT32_MAX; 3633 tag_item->id = UINT16_MAX; 3634 item->mask = tag_item; 3635 addr += sizeof(struct mlx5_rte_flow_item_tag); 3636 item->last = NULL; 3637 item++; 3638 item->type = RTE_FLOW_ITEM_TYPE_END; 3639 return 0; 3640 } 3641 3642 /** 3643 * The last stage of splitting chain, just creates the subflow 3644 * without any modification. 3645 * 3646 * @param[in] dev 3647 * Pointer to Ethernet device. 3648 * @param[in] flow 3649 * Parent flow structure pointer. 3650 * @param[in, out] sub_flow 3651 * Pointer to return the created subflow, may be NULL. 3652 * @param[in] prefix_layers 3653 * Prefix subflow layers, may be 0. 3654 * @param[in] attr 3655 * Flow rule attributes. 3656 * @param[in] items 3657 * Pattern specification (list terminated by the END pattern item). 3658 * @param[in] actions 3659 * Associated actions (list terminated by the END action). 3660 * @param[in] external 3661 * This flow rule is created by request external to PMD. 3662 * @param[in] flow_idx 3663 * This memory pool index to the flow. 3664 * @param[out] error 3665 * Perform verbose error reporting if not NULL. 3666 * @return 3667 * 0 on success, negative value otherwise 3668 */ 3669 static int 3670 flow_create_split_inner(struct rte_eth_dev *dev, 3671 struct rte_flow *flow, 3672 struct mlx5_flow **sub_flow, 3673 uint64_t prefix_layers, 3674 const struct rte_flow_attr *attr, 3675 const struct rte_flow_item items[], 3676 const struct rte_flow_action actions[], 3677 bool external, uint32_t flow_idx, 3678 struct rte_flow_error *error) 3679 { 3680 struct mlx5_flow *dev_flow; 3681 3682 dev_flow = flow_drv_prepare(dev, flow, attr, items, actions, 3683 flow_idx, error); 3684 if (!dev_flow) 3685 return -rte_errno; 3686 dev_flow->flow = flow; 3687 dev_flow->external = external; 3688 /* Subflow object was created, we must include one in the list. */ 3689 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 3690 dev_flow->handle, next); 3691 /* 3692 * If dev_flow is as one of the suffix flow, some actions in suffix 3693 * flow may need some user defined item layer flags. 3694 */ 3695 if (prefix_layers) 3696 dev_flow->handle->layers = prefix_layers; 3697 if (sub_flow) 3698 *sub_flow = dev_flow; 3699 return flow_drv_translate(dev, dev_flow, attr, items, actions, error); 3700 } 3701 3702 /** 3703 * Split the meter flow. 3704 * 3705 * As meter flow will split to three sub flow, other than meter 3706 * action, the other actions make sense to only meter accepts 3707 * the packet. If it need to be dropped, no other additional 3708 * actions should be take. 3709 * 3710 * One kind of special action which decapsulates the L3 tunnel 3711 * header will be in the prefix sub flow, as not to take the 3712 * L3 tunnel header into account. 3713 * 3714 * @param dev 3715 * Pointer to Ethernet device. 3716 * @param[in] items 3717 * Pattern specification (list terminated by the END pattern item). 3718 * @param[out] sfx_items 3719 * Suffix flow match items (list terminated by the END pattern item). 3720 * @param[in] actions 3721 * Associated actions (list terminated by the END action). 3722 * @param[out] actions_sfx 3723 * Suffix flow actions. 3724 * @param[out] actions_pre 3725 * Prefix flow actions. 3726 * @param[out] pattern_sfx 3727 * The pattern items for the suffix flow. 3728 * @param[out] tag_sfx 3729 * Pointer to suffix flow tag. 3730 * 3731 * @return 3732 * 0 on success. 3733 */ 3734 static int 3735 flow_meter_split_prep(struct rte_eth_dev *dev, 3736 const struct rte_flow_item items[], 3737 struct rte_flow_item sfx_items[], 3738 const struct rte_flow_action actions[], 3739 struct rte_flow_action actions_sfx[], 3740 struct rte_flow_action actions_pre[]) 3741 { 3742 struct rte_flow_action *tag_action = NULL; 3743 struct rte_flow_item *tag_item; 3744 struct mlx5_rte_flow_action_set_tag *set_tag; 3745 struct rte_flow_error error; 3746 const struct rte_flow_action_raw_encap *raw_encap; 3747 const struct rte_flow_action_raw_decap *raw_decap; 3748 struct mlx5_rte_flow_item_tag *tag_spec; 3749 struct mlx5_rte_flow_item_tag *tag_mask; 3750 uint32_t tag_id; 3751 bool copy_vlan = false; 3752 3753 /* Prepare the actions for prefix and suffix flow. */ 3754 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3755 struct rte_flow_action **action_cur = NULL; 3756 3757 switch (actions->type) { 3758 case RTE_FLOW_ACTION_TYPE_METER: 3759 /* Add the extra tag action first. */ 3760 tag_action = actions_pre; 3761 tag_action->type = (enum rte_flow_action_type) 3762 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 3763 actions_pre++; 3764 action_cur = &actions_pre; 3765 break; 3766 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: 3767 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP: 3768 action_cur = &actions_pre; 3769 break; 3770 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3771 raw_encap = actions->conf; 3772 if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE) 3773 action_cur = &actions_pre; 3774 break; 3775 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3776 raw_decap = actions->conf; 3777 if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 3778 action_cur = &actions_pre; 3779 break; 3780 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 3781 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 3782 copy_vlan = true; 3783 break; 3784 default: 3785 break; 3786 } 3787 if (!action_cur) 3788 action_cur = &actions_sfx; 3789 memcpy(*action_cur, actions, sizeof(struct rte_flow_action)); 3790 (*action_cur)++; 3791 } 3792 /* Add end action to the actions. */ 3793 actions_sfx->type = RTE_FLOW_ACTION_TYPE_END; 3794 actions_pre->type = RTE_FLOW_ACTION_TYPE_END; 3795 actions_pre++; 3796 /* Set the tag. */ 3797 set_tag = (void *)actions_pre; 3798 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); 3799 /* 3800 * Get the id from the qrss_pool to make qrss share the id with meter. 3801 */ 3802 tag_id = flow_qrss_get_id(dev); 3803 set_tag->data = tag_id << MLX5_MTR_COLOR_BITS; 3804 assert(tag_action); 3805 tag_action->conf = set_tag; 3806 /* Prepare the suffix subflow items. */ 3807 tag_item = sfx_items++; 3808 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { 3809 int item_type = items->type; 3810 3811 switch (item_type) { 3812 case RTE_FLOW_ITEM_TYPE_PORT_ID: 3813 memcpy(sfx_items, items, sizeof(*sfx_items)); 3814 sfx_items++; 3815 break; 3816 case RTE_FLOW_ITEM_TYPE_VLAN: 3817 if (copy_vlan) { 3818 memcpy(sfx_items, items, sizeof(*sfx_items)); 3819 /* 3820 * Convert to internal match item, it is used 3821 * for vlan push and set vid. 3822 */ 3823 sfx_items->type = (enum rte_flow_item_type) 3824 MLX5_RTE_FLOW_ITEM_TYPE_VLAN; 3825 sfx_items++; 3826 } 3827 break; 3828 default: 3829 break; 3830 } 3831 } 3832 sfx_items->type = RTE_FLOW_ITEM_TYPE_END; 3833 sfx_items++; 3834 tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items; 3835 tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS; 3836 tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); 3837 tag_mask = tag_spec + 1; 3838 tag_mask->data = 0xffffff00; 3839 tag_item->type = (enum rte_flow_item_type) 3840 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 3841 tag_item->spec = tag_spec; 3842 tag_item->last = NULL; 3843 tag_item->mask = tag_mask; 3844 return tag_id; 3845 } 3846 3847 /** 3848 * Split action list having QUEUE/RSS for metadata register copy. 3849 * 3850 * Once Q/RSS action is detected in user's action list, the flow action 3851 * should be split in order to copy metadata registers, which will happen in 3852 * RX_CP_TBL like, 3853 * - CQE->flow_tag := reg_c[1] (MARK) 3854 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 3855 * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL. 3856 * This is because the last action of each flow must be a terminal action 3857 * (QUEUE, RSS or DROP). 3858 * 3859 * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is 3860 * stored and kept in the mlx5_flow structure per each sub_flow. 3861 * 3862 * The Q/RSS action is replaced with, 3863 * - SET_TAG, setting the allocated flow ID to reg_c[2]. 3864 * And the following JUMP action is added at the end, 3865 * - JUMP, to RX_CP_TBL. 3866 * 3867 * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by 3868 * flow_create_split_metadata() routine. The flow will look like, 3869 * - If flow ID matches (reg_c[2]), perform Q/RSS. 3870 * 3871 * @param dev 3872 * Pointer to Ethernet device. 3873 * @param[out] split_actions 3874 * Pointer to store split actions to jump to CP_TBL. 3875 * @param[in] actions 3876 * Pointer to the list of original flow actions. 3877 * @param[in] qrss 3878 * Pointer to the Q/RSS action. 3879 * @param[in] actions_n 3880 * Number of original actions. 3881 * @param[out] error 3882 * Perform verbose error reporting if not NULL. 3883 * 3884 * @return 3885 * non-zero unique flow_id on success, otherwise 0 and 3886 * error/rte_error are set. 3887 */ 3888 static uint32_t 3889 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev, 3890 struct rte_flow_action *split_actions, 3891 const struct rte_flow_action *actions, 3892 const struct rte_flow_action *qrss, 3893 int actions_n, struct rte_flow_error *error) 3894 { 3895 struct mlx5_rte_flow_action_set_tag *set_tag; 3896 struct rte_flow_action_jump *jump; 3897 const int qrss_idx = qrss - actions; 3898 uint32_t flow_id = 0; 3899 int ret = 0; 3900 3901 /* 3902 * Given actions will be split 3903 * - Replace QUEUE/RSS action with SET_TAG to set flow ID. 3904 * - Add jump to mreg CP_TBL. 3905 * As a result, there will be one more action. 3906 */ 3907 ++actions_n; 3908 memcpy(split_actions, actions, sizeof(*split_actions) * actions_n); 3909 set_tag = (void *)(split_actions + actions_n); 3910 /* 3911 * If tag action is not set to void(it means we are not the meter 3912 * suffix flow), add the tag action. Since meter suffix flow already 3913 * has the tag added. 3914 */ 3915 if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) { 3916 /* 3917 * Allocate the new subflow ID. This one is unique within 3918 * device and not shared with representors. Otherwise, 3919 * we would have to resolve multi-thread access synch 3920 * issue. Each flow on the shared device is appended 3921 * with source vport identifier, so the resulting 3922 * flows will be unique in the shared (by master and 3923 * representors) domain even if they have coinciding 3924 * IDs. 3925 */ 3926 flow_id = flow_qrss_get_id(dev); 3927 if (!flow_id) 3928 return rte_flow_error_set(error, ENOMEM, 3929 RTE_FLOW_ERROR_TYPE_ACTION, 3930 NULL, "can't allocate id " 3931 "for split Q/RSS subflow"); 3932 /* Internal SET_TAG action to set flow ID. */ 3933 *set_tag = (struct mlx5_rte_flow_action_set_tag){ 3934 .data = flow_id, 3935 }; 3936 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error); 3937 if (ret < 0) 3938 return ret; 3939 set_tag->id = ret; 3940 /* Construct new actions array. */ 3941 /* Replace QUEUE/RSS action. */ 3942 split_actions[qrss_idx] = (struct rte_flow_action){ 3943 .type = (enum rte_flow_action_type) 3944 MLX5_RTE_FLOW_ACTION_TYPE_TAG, 3945 .conf = set_tag, 3946 }; 3947 } 3948 /* JUMP action to jump to mreg copy table (CP_TBL). */ 3949 jump = (void *)(set_tag + 1); 3950 *jump = (struct rte_flow_action_jump){ 3951 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 3952 }; 3953 split_actions[actions_n - 2] = (struct rte_flow_action){ 3954 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3955 .conf = jump, 3956 }; 3957 split_actions[actions_n - 1] = (struct rte_flow_action){ 3958 .type = RTE_FLOW_ACTION_TYPE_END, 3959 }; 3960 return flow_id; 3961 } 3962 3963 /** 3964 * Extend the given action list for Tx metadata copy. 3965 * 3966 * Copy the given action list to the ext_actions and add flow metadata register 3967 * copy action in order to copy reg_a set by WQE to reg_c[0]. 3968 * 3969 * @param[out] ext_actions 3970 * Pointer to the extended action list. 3971 * @param[in] actions 3972 * Pointer to the list of actions. 3973 * @param[in] actions_n 3974 * Number of actions in the list. 3975 * @param[out] error 3976 * Perform verbose error reporting if not NULL. 3977 * @param[in] encap_idx 3978 * The encap action inndex. 3979 * 3980 * @return 3981 * 0 on success, negative value otherwise 3982 */ 3983 static int 3984 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev, 3985 struct rte_flow_action *ext_actions, 3986 const struct rte_flow_action *actions, 3987 int actions_n, struct rte_flow_error *error, 3988 int encap_idx) 3989 { 3990 struct mlx5_flow_action_copy_mreg *cp_mreg = 3991 (struct mlx5_flow_action_copy_mreg *) 3992 (ext_actions + actions_n + 1); 3993 int ret; 3994 3995 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 3996 if (ret < 0) 3997 return ret; 3998 cp_mreg->dst = ret; 3999 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error); 4000 if (ret < 0) 4001 return ret; 4002 cp_mreg->src = ret; 4003 if (encap_idx != 0) 4004 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx); 4005 if (encap_idx == actions_n - 1) { 4006 ext_actions[actions_n - 1] = (struct rte_flow_action){ 4007 .type = (enum rte_flow_action_type) 4008 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 4009 .conf = cp_mreg, 4010 }; 4011 ext_actions[actions_n] = (struct rte_flow_action){ 4012 .type = RTE_FLOW_ACTION_TYPE_END, 4013 }; 4014 } else { 4015 ext_actions[encap_idx] = (struct rte_flow_action){ 4016 .type = (enum rte_flow_action_type) 4017 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 4018 .conf = cp_mreg, 4019 }; 4020 memcpy(ext_actions + encap_idx + 1, actions + encap_idx, 4021 sizeof(*ext_actions) * (actions_n - encap_idx)); 4022 } 4023 return 0; 4024 } 4025 4026 /** 4027 * The splitting for metadata feature. 4028 * 4029 * - Q/RSS action on NIC Rx should be split in order to pass by 4030 * the mreg copy table (RX_CP_TBL) and then it jumps to the 4031 * action table (RX_ACT_TBL) which has the split Q/RSS action. 4032 * 4033 * - All the actions on NIC Tx should have a mreg copy action to 4034 * copy reg_a from WQE to reg_c[0]. 4035 * 4036 * @param dev 4037 * Pointer to Ethernet device. 4038 * @param[in] flow 4039 * Parent flow structure pointer. 4040 * @param[in] prefix_layers 4041 * Prefix flow layer flags. 4042 * @param[in] attr 4043 * Flow rule attributes. 4044 * @param[in] items 4045 * Pattern specification (list terminated by the END pattern item). 4046 * @param[in] actions 4047 * Associated actions (list terminated by the END action). 4048 * @param[in] external 4049 * This flow rule is created by request external to PMD. 4050 * @param[in] flow_idx 4051 * This memory pool index to the flow. 4052 * @param[out] error 4053 * Perform verbose error reporting if not NULL. 4054 * @return 4055 * 0 on success, negative value otherwise 4056 */ 4057 static int 4058 flow_create_split_metadata(struct rte_eth_dev *dev, 4059 struct rte_flow *flow, 4060 uint64_t prefix_layers, 4061 const struct rte_flow_attr *attr, 4062 const struct rte_flow_item items[], 4063 const struct rte_flow_action actions[], 4064 bool external, uint32_t flow_idx, 4065 struct rte_flow_error *error) 4066 { 4067 struct mlx5_priv *priv = dev->data->dev_private; 4068 struct mlx5_dev_config *config = &priv->config; 4069 const struct rte_flow_action *qrss = NULL; 4070 struct rte_flow_action *ext_actions = NULL; 4071 struct mlx5_flow *dev_flow = NULL; 4072 uint32_t qrss_id = 0; 4073 int mtr_sfx = 0; 4074 size_t act_size; 4075 int actions_n; 4076 int encap_idx; 4077 int ret; 4078 4079 /* Check whether extensive metadata feature is engaged. */ 4080 if (!config->dv_flow_en || 4081 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 4082 !mlx5_flow_ext_mreg_supported(dev)) 4083 return flow_create_split_inner(dev, flow, NULL, prefix_layers, 4084 attr, items, actions, external, 4085 flow_idx, error); 4086 actions_n = flow_parse_metadata_split_actions_info(actions, &qrss, 4087 &encap_idx); 4088 if (qrss) { 4089 /* Exclude hairpin flows from splitting. */ 4090 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) { 4091 const struct rte_flow_action_queue *queue; 4092 4093 queue = qrss->conf; 4094 if (mlx5_rxq_get_type(dev, queue->index) == 4095 MLX5_RXQ_TYPE_HAIRPIN) 4096 qrss = NULL; 4097 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) { 4098 const struct rte_flow_action_rss *rss; 4099 4100 rss = qrss->conf; 4101 if (mlx5_rxq_get_type(dev, rss->queue[0]) == 4102 MLX5_RXQ_TYPE_HAIRPIN) 4103 qrss = NULL; 4104 } 4105 } 4106 if (qrss) { 4107 /* Check if it is in meter suffix table. */ 4108 mtr_sfx = attr->group == (attr->transfer ? 4109 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : 4110 MLX5_FLOW_TABLE_LEVEL_SUFFIX); 4111 /* 4112 * Q/RSS action on NIC Rx should be split in order to pass by 4113 * the mreg copy table (RX_CP_TBL) and then it jumps to the 4114 * action table (RX_ACT_TBL) which has the split Q/RSS action. 4115 */ 4116 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 4117 sizeof(struct rte_flow_action_set_tag) + 4118 sizeof(struct rte_flow_action_jump); 4119 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0, 4120 SOCKET_ID_ANY); 4121 if (!ext_actions) 4122 return rte_flow_error_set(error, ENOMEM, 4123 RTE_FLOW_ERROR_TYPE_ACTION, 4124 NULL, "no memory to split " 4125 "metadata flow"); 4126 /* 4127 * If we are the suffix flow of meter, tag already exist. 4128 * Set the tag action to void. 4129 */ 4130 if (mtr_sfx) 4131 ext_actions[qrss - actions].type = 4132 RTE_FLOW_ACTION_TYPE_VOID; 4133 else 4134 ext_actions[qrss - actions].type = 4135 (enum rte_flow_action_type) 4136 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 4137 /* 4138 * Create the new actions list with removed Q/RSS action 4139 * and appended set tag and jump to register copy table 4140 * (RX_CP_TBL). We should preallocate unique tag ID here 4141 * in advance, because it is needed for set tag action. 4142 */ 4143 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions, 4144 qrss, actions_n, error); 4145 if (!mtr_sfx && !qrss_id) { 4146 ret = -rte_errno; 4147 goto exit; 4148 } 4149 } else if (attr->egress && !attr->transfer) { 4150 /* 4151 * All the actions on NIC Tx should have a metadata register 4152 * copy action to copy reg_a from WQE to reg_c[meta] 4153 */ 4154 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 4155 sizeof(struct mlx5_flow_action_copy_mreg); 4156 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0, 4157 SOCKET_ID_ANY); 4158 if (!ext_actions) 4159 return rte_flow_error_set(error, ENOMEM, 4160 RTE_FLOW_ERROR_TYPE_ACTION, 4161 NULL, "no memory to split " 4162 "metadata flow"); 4163 /* Create the action list appended with copy register. */ 4164 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions, 4165 actions_n, error, encap_idx); 4166 if (ret < 0) 4167 goto exit; 4168 } 4169 /* Add the unmodified original or prefix subflow. */ 4170 ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, attr, 4171 items, ext_actions ? ext_actions : 4172 actions, external, flow_idx, error); 4173 if (ret < 0) 4174 goto exit; 4175 MLX5_ASSERT(dev_flow); 4176 if (qrss) { 4177 const struct rte_flow_attr q_attr = { 4178 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 4179 .ingress = 1, 4180 }; 4181 /* Internal PMD action to set register. */ 4182 struct mlx5_rte_flow_item_tag q_tag_spec = { 4183 .data = qrss_id, 4184 .id = 0, 4185 }; 4186 struct rte_flow_item q_items[] = { 4187 { 4188 .type = (enum rte_flow_item_type) 4189 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 4190 .spec = &q_tag_spec, 4191 .last = NULL, 4192 .mask = NULL, 4193 }, 4194 { 4195 .type = RTE_FLOW_ITEM_TYPE_END, 4196 }, 4197 }; 4198 struct rte_flow_action q_actions[] = { 4199 { 4200 .type = qrss->type, 4201 .conf = qrss->conf, 4202 }, 4203 { 4204 .type = RTE_FLOW_ACTION_TYPE_END, 4205 }, 4206 }; 4207 uint64_t layers = flow_get_prefix_layer_flags(dev_flow); 4208 4209 /* 4210 * Configure the tag item only if there is no meter subflow. 4211 * Since tag is already marked in the meter suffix subflow 4212 * we can just use the meter suffix items as is. 4213 */ 4214 if (qrss_id) { 4215 /* Not meter subflow. */ 4216 MLX5_ASSERT(!mtr_sfx); 4217 /* 4218 * Put unique id in prefix flow due to it is destroyed 4219 * after suffix flow and id will be freed after there 4220 * is no actual flows with this id and identifier 4221 * reallocation becomes possible (for example, for 4222 * other flows in other threads). 4223 */ 4224 dev_flow->handle->split_flow_id = qrss_id; 4225 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, 4226 error); 4227 if (ret < 0) 4228 goto exit; 4229 q_tag_spec.id = ret; 4230 } 4231 dev_flow = NULL; 4232 /* Add suffix subflow to execute Q/RSS. */ 4233 ret = flow_create_split_inner(dev, flow, &dev_flow, layers, 4234 &q_attr, mtr_sfx ? items : 4235 q_items, q_actions, 4236 external, flow_idx, error); 4237 if (ret < 0) 4238 goto exit; 4239 /* qrss ID should be freed if failed. */ 4240 qrss_id = 0; 4241 MLX5_ASSERT(dev_flow); 4242 } 4243 4244 exit: 4245 /* 4246 * We do not destroy the partially created sub_flows in case of error. 4247 * These ones are included into parent flow list and will be destroyed 4248 * by flow_drv_destroy. 4249 */ 4250 flow_qrss_free_id(dev, qrss_id); 4251 mlx5_free(ext_actions); 4252 return ret; 4253 } 4254 4255 /** 4256 * The splitting for meter feature. 4257 * 4258 * - The meter flow will be split to two flows as prefix and 4259 * suffix flow. The packets make sense only it pass the prefix 4260 * meter action. 4261 * 4262 * - Reg_C_5 is used for the packet to match betweend prefix and 4263 * suffix flow. 4264 * 4265 * @param dev 4266 * Pointer to Ethernet device. 4267 * @param[in] flow 4268 * Parent flow structure pointer. 4269 * @param[in] attr 4270 * Flow rule attributes. 4271 * @param[in] items 4272 * Pattern specification (list terminated by the END pattern item). 4273 * @param[in] actions 4274 * Associated actions (list terminated by the END action). 4275 * @param[in] external 4276 * This flow rule is created by request external to PMD. 4277 * @param[in] flow_idx 4278 * This memory pool index to the flow. 4279 * @param[out] error 4280 * Perform verbose error reporting if not NULL. 4281 * @return 4282 * 0 on success, negative value otherwise 4283 */ 4284 static int 4285 flow_create_split_meter(struct rte_eth_dev *dev, 4286 struct rte_flow *flow, 4287 const struct rte_flow_attr *attr, 4288 const struct rte_flow_item items[], 4289 const struct rte_flow_action actions[], 4290 bool external, uint32_t flow_idx, 4291 struct rte_flow_error *error) 4292 { 4293 struct mlx5_priv *priv = dev->data->dev_private; 4294 struct rte_flow_action *sfx_actions = NULL; 4295 struct rte_flow_action *pre_actions = NULL; 4296 struct rte_flow_item *sfx_items = NULL; 4297 struct mlx5_flow *dev_flow = NULL; 4298 struct rte_flow_attr sfx_attr = *attr; 4299 uint32_t mtr = 0; 4300 uint32_t mtr_tag_id = 0; 4301 size_t act_size; 4302 size_t item_size; 4303 int actions_n = 0; 4304 int ret; 4305 4306 if (priv->mtr_en) 4307 actions_n = flow_check_meter_action(actions, &mtr); 4308 if (mtr) { 4309 /* The five prefix actions: meter, decap, encap, tag, end. */ 4310 act_size = sizeof(struct rte_flow_action) * (actions_n + 5) + 4311 sizeof(struct mlx5_rte_flow_action_set_tag); 4312 /* tag, vlan, port id, end. */ 4313 #define METER_SUFFIX_ITEM 4 4314 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM + 4315 sizeof(struct mlx5_rte_flow_item_tag) * 2; 4316 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size), 4317 0, SOCKET_ID_ANY); 4318 if (!sfx_actions) 4319 return rte_flow_error_set(error, ENOMEM, 4320 RTE_FLOW_ERROR_TYPE_ACTION, 4321 NULL, "no memory to split " 4322 "meter flow"); 4323 sfx_items = (struct rte_flow_item *)((char *)sfx_actions + 4324 act_size); 4325 pre_actions = sfx_actions + actions_n; 4326 mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items, 4327 actions, sfx_actions, 4328 pre_actions); 4329 if (!mtr_tag_id) { 4330 ret = -rte_errno; 4331 goto exit; 4332 } 4333 /* Add the prefix subflow. */ 4334 ret = flow_create_split_inner(dev, flow, &dev_flow, 0, attr, 4335 items, pre_actions, external, 4336 flow_idx, error); 4337 if (ret) { 4338 ret = -rte_errno; 4339 goto exit; 4340 } 4341 dev_flow->handle->split_flow_id = mtr_tag_id; 4342 /* Setting the sfx group atrr. */ 4343 sfx_attr.group = sfx_attr.transfer ? 4344 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : 4345 MLX5_FLOW_TABLE_LEVEL_SUFFIX; 4346 } 4347 /* Add the prefix subflow. */ 4348 ret = flow_create_split_metadata(dev, flow, dev_flow ? 4349 flow_get_prefix_layer_flags(dev_flow) : 4350 0, &sfx_attr, 4351 sfx_items ? sfx_items : items, 4352 sfx_actions ? sfx_actions : actions, 4353 external, flow_idx, error); 4354 exit: 4355 if (sfx_actions) 4356 mlx5_free(sfx_actions); 4357 return ret; 4358 } 4359 4360 /** 4361 * Split the flow to subflow set. The splitters might be linked 4362 * in the chain, like this: 4363 * flow_create_split_outer() calls: 4364 * flow_create_split_meter() calls: 4365 * flow_create_split_metadata(meter_subflow_0) calls: 4366 * flow_create_split_inner(metadata_subflow_0) 4367 * flow_create_split_inner(metadata_subflow_1) 4368 * flow_create_split_inner(metadata_subflow_2) 4369 * flow_create_split_metadata(meter_subflow_1) calls: 4370 * flow_create_split_inner(metadata_subflow_0) 4371 * flow_create_split_inner(metadata_subflow_1) 4372 * flow_create_split_inner(metadata_subflow_2) 4373 * 4374 * This provide flexible way to add new levels of flow splitting. 4375 * The all of successfully created subflows are included to the 4376 * parent flow dev_flow list. 4377 * 4378 * @param dev 4379 * Pointer to Ethernet device. 4380 * @param[in] flow 4381 * Parent flow structure pointer. 4382 * @param[in] attr 4383 * Flow rule attributes. 4384 * @param[in] items 4385 * Pattern specification (list terminated by the END pattern item). 4386 * @param[in] actions 4387 * Associated actions (list terminated by the END action). 4388 * @param[in] external 4389 * This flow rule is created by request external to PMD. 4390 * @param[in] flow_idx 4391 * This memory pool index to the flow. 4392 * @param[out] error 4393 * Perform verbose error reporting if not NULL. 4394 * @return 4395 * 0 on success, negative value otherwise 4396 */ 4397 static int 4398 flow_create_split_outer(struct rte_eth_dev *dev, 4399 struct rte_flow *flow, 4400 const struct rte_flow_attr *attr, 4401 const struct rte_flow_item items[], 4402 const struct rte_flow_action actions[], 4403 bool external, uint32_t flow_idx, 4404 struct rte_flow_error *error) 4405 { 4406 int ret; 4407 4408 ret = flow_create_split_meter(dev, flow, attr, items, 4409 actions, external, flow_idx, error); 4410 MLX5_ASSERT(ret <= 0); 4411 return ret; 4412 } 4413 4414 /** 4415 * Create a flow and add it to @p list. 4416 * 4417 * @param dev 4418 * Pointer to Ethernet device. 4419 * @param list 4420 * Pointer to a TAILQ flow list. If this parameter NULL, 4421 * no list insertion occurred, flow is just created, 4422 * this is caller's responsibility to track the 4423 * created flow. 4424 * @param[in] attr 4425 * Flow rule attributes. 4426 * @param[in] items 4427 * Pattern specification (list terminated by the END pattern item). 4428 * @param[in] actions 4429 * Associated actions (list terminated by the END action). 4430 * @param[in] external 4431 * This flow rule is created by request external to PMD. 4432 * @param[out] error 4433 * Perform verbose error reporting if not NULL. 4434 * 4435 * @return 4436 * A flow index on success, 0 otherwise and rte_errno is set. 4437 */ 4438 static uint32_t 4439 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 4440 const struct rte_flow_attr *attr, 4441 const struct rte_flow_item items[], 4442 const struct rte_flow_action actions[], 4443 bool external, struct rte_flow_error *error) 4444 { 4445 struct mlx5_priv *priv = dev->data->dev_private; 4446 struct rte_flow *flow = NULL; 4447 struct mlx5_flow *dev_flow; 4448 const struct rte_flow_action_rss *rss; 4449 union { 4450 struct rte_flow_expand_rss buf; 4451 uint8_t buffer[2048]; 4452 } expand_buffer; 4453 union { 4454 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 4455 uint8_t buffer[2048]; 4456 } actions_rx; 4457 union { 4458 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 4459 uint8_t buffer[2048]; 4460 } actions_hairpin_tx; 4461 union { 4462 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS]; 4463 uint8_t buffer[2048]; 4464 } items_tx; 4465 struct rte_flow_expand_rss *buf = &expand_buffer.buf; 4466 struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *) 4467 priv->rss_desc)[!!priv->flow_idx]; 4468 const struct rte_flow_action *p_actions_rx = actions; 4469 uint32_t i; 4470 uint32_t idx = 0; 4471 int hairpin_flow; 4472 uint32_t hairpin_id = 0; 4473 struct rte_flow_attr attr_tx = { .priority = 0 }; 4474 int ret; 4475 4476 hairpin_flow = flow_check_hairpin_split(dev, attr, actions); 4477 ret = flow_drv_validate(dev, attr, items, p_actions_rx, 4478 external, hairpin_flow, error); 4479 if (ret < 0) 4480 return 0; 4481 if (hairpin_flow > 0) { 4482 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) { 4483 rte_errno = EINVAL; 4484 return 0; 4485 } 4486 flow_hairpin_split(dev, actions, actions_rx.actions, 4487 actions_hairpin_tx.actions, items_tx.items, 4488 &hairpin_id); 4489 p_actions_rx = actions_rx.actions; 4490 } 4491 flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx); 4492 if (!flow) { 4493 rte_errno = ENOMEM; 4494 goto error_before_flow; 4495 } 4496 flow->drv_type = flow_get_drv_type(dev, attr); 4497 if (hairpin_id != 0) 4498 flow->hairpin_flow_id = hairpin_id; 4499 MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN && 4500 flow->drv_type < MLX5_FLOW_TYPE_MAX); 4501 memset(rss_desc, 0, sizeof(*rss_desc)); 4502 rss = flow_get_rss_action(p_actions_rx); 4503 if (rss) { 4504 /* 4505 * The following information is required by 4506 * mlx5_flow_hashfields_adjust() in advance. 4507 */ 4508 rss_desc->level = rss->level; 4509 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */ 4510 rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types; 4511 } 4512 flow->dev_handles = 0; 4513 if (rss && rss->types) { 4514 unsigned int graph_root; 4515 4516 graph_root = find_graph_root(items, rss->level); 4517 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer), 4518 items, rss->types, 4519 mlx5_support_expansion, 4520 graph_root); 4521 MLX5_ASSERT(ret > 0 && 4522 (unsigned int)ret < sizeof(expand_buffer.buffer)); 4523 } else { 4524 buf->entries = 1; 4525 buf->entry[0].pattern = (void *)(uintptr_t)items; 4526 } 4527 /* 4528 * Record the start index when there is a nested call. All sub-flows 4529 * need to be translated before another calling. 4530 * No need to use ping-pong buffer to save memory here. 4531 */ 4532 if (priv->flow_idx) { 4533 MLX5_ASSERT(!priv->flow_nested_idx); 4534 priv->flow_nested_idx = priv->flow_idx; 4535 } 4536 for (i = 0; i < buf->entries; ++i) { 4537 /* 4538 * The splitter may create multiple dev_flows, 4539 * depending on configuration. In the simplest 4540 * case it just creates unmodified original flow. 4541 */ 4542 ret = flow_create_split_outer(dev, flow, attr, 4543 buf->entry[i].pattern, 4544 p_actions_rx, external, idx, 4545 error); 4546 if (ret < 0) 4547 goto error; 4548 } 4549 /* Create the tx flow. */ 4550 if (hairpin_flow) { 4551 attr_tx.group = MLX5_HAIRPIN_TX_TABLE; 4552 attr_tx.ingress = 0; 4553 attr_tx.egress = 1; 4554 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items, 4555 actions_hairpin_tx.actions, 4556 idx, error); 4557 if (!dev_flow) 4558 goto error; 4559 dev_flow->flow = flow; 4560 dev_flow->external = 0; 4561 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 4562 dev_flow->handle, next); 4563 ret = flow_drv_translate(dev, dev_flow, &attr_tx, 4564 items_tx.items, 4565 actions_hairpin_tx.actions, error); 4566 if (ret < 0) 4567 goto error; 4568 } 4569 /* 4570 * Update the metadata register copy table. If extensive 4571 * metadata feature is enabled and registers are supported 4572 * we might create the extra rte_flow for each unique 4573 * MARK/FLAG action ID. 4574 * 4575 * The table is updated for ingress Flows only, because 4576 * the egress Flows belong to the different device and 4577 * copy table should be updated in peer NIC Rx domain. 4578 */ 4579 if (attr->ingress && 4580 (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) { 4581 ret = flow_mreg_update_copy_table(dev, flow, actions, error); 4582 if (ret) 4583 goto error; 4584 } 4585 /* 4586 * If the flow is external (from application) OR device is started, then 4587 * the flow will be applied immediately. 4588 */ 4589 if (external || dev->data->dev_started) { 4590 ret = flow_drv_apply(dev, flow, error); 4591 if (ret < 0) 4592 goto error; 4593 } 4594 if (list) 4595 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx, 4596 flow, next); 4597 flow_rxq_flags_set(dev, flow); 4598 /* Nested flow creation index recovery. */ 4599 priv->flow_idx = priv->flow_nested_idx; 4600 if (priv->flow_nested_idx) 4601 priv->flow_nested_idx = 0; 4602 return idx; 4603 error: 4604 MLX5_ASSERT(flow); 4605 ret = rte_errno; /* Save rte_errno before cleanup. */ 4606 flow_mreg_del_copy_action(dev, flow); 4607 flow_drv_destroy(dev, flow); 4608 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx); 4609 rte_errno = ret; /* Restore rte_errno. */ 4610 error_before_flow: 4611 ret = rte_errno; 4612 if (hairpin_id) 4613 mlx5_flow_id_release(priv->sh->flow_id_pool, 4614 hairpin_id); 4615 rte_errno = ret; 4616 priv->flow_idx = priv->flow_nested_idx; 4617 if (priv->flow_nested_idx) 4618 priv->flow_nested_idx = 0; 4619 return 0; 4620 } 4621 4622 /** 4623 * Create a dedicated flow rule on e-switch table 0 (root table), to direct all 4624 * incoming packets to table 1. 4625 * 4626 * Other flow rules, requested for group n, will be created in 4627 * e-switch table n+1. 4628 * Jump action to e-switch group n will be created to group n+1. 4629 * 4630 * Used when working in switchdev mode, to utilise advantages of table 1 4631 * and above. 4632 * 4633 * @param dev 4634 * Pointer to Ethernet device. 4635 * 4636 * @return 4637 * Pointer to flow on success, NULL otherwise and rte_errno is set. 4638 */ 4639 struct rte_flow * 4640 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev) 4641 { 4642 const struct rte_flow_attr attr = { 4643 .group = 0, 4644 .priority = 0, 4645 .ingress = 1, 4646 .egress = 0, 4647 .transfer = 1, 4648 }; 4649 const struct rte_flow_item pattern = { 4650 .type = RTE_FLOW_ITEM_TYPE_END, 4651 }; 4652 struct rte_flow_action_jump jump = { 4653 .group = 1, 4654 }; 4655 const struct rte_flow_action actions[] = { 4656 { 4657 .type = RTE_FLOW_ACTION_TYPE_JUMP, 4658 .conf = &jump, 4659 }, 4660 { 4661 .type = RTE_FLOW_ACTION_TYPE_END, 4662 }, 4663 }; 4664 struct mlx5_priv *priv = dev->data->dev_private; 4665 struct rte_flow_error error; 4666 4667 return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows, 4668 &attr, &pattern, 4669 actions, false, &error); 4670 } 4671 4672 /** 4673 * Validate a flow supported by the NIC. 4674 * 4675 * @see rte_flow_validate() 4676 * @see rte_flow_ops 4677 */ 4678 int 4679 mlx5_flow_validate(struct rte_eth_dev *dev, 4680 const struct rte_flow_attr *attr, 4681 const struct rte_flow_item items[], 4682 const struct rte_flow_action actions[], 4683 struct rte_flow_error *error) 4684 { 4685 int hairpin_flow; 4686 4687 hairpin_flow = flow_check_hairpin_split(dev, attr, actions); 4688 return flow_drv_validate(dev, attr, items, actions, 4689 true, hairpin_flow, error); 4690 } 4691 4692 /** 4693 * Create a flow. 4694 * 4695 * @see rte_flow_create() 4696 * @see rte_flow_ops 4697 */ 4698 struct rte_flow * 4699 mlx5_flow_create(struct rte_eth_dev *dev, 4700 const struct rte_flow_attr *attr, 4701 const struct rte_flow_item items[], 4702 const struct rte_flow_action actions[], 4703 struct rte_flow_error *error) 4704 { 4705 struct mlx5_priv *priv = dev->data->dev_private; 4706 4707 /* 4708 * If the device is not started yet, it is not allowed to created a 4709 * flow from application. PMD default flows and traffic control flows 4710 * are not affected. 4711 */ 4712 if (unlikely(!dev->data->dev_started)) { 4713 DRV_LOG(DEBUG, "port %u is not started when " 4714 "inserting a flow", dev->data->port_id); 4715 rte_flow_error_set(error, ENODEV, 4716 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 4717 NULL, 4718 "port not started"); 4719 return NULL; 4720 } 4721 return (void *)(uintptr_t)flow_list_create(dev, &priv->flows, 4722 attr, items, actions, true, error); 4723 } 4724 4725 /** 4726 * Destroy a flow in a list. 4727 * 4728 * @param dev 4729 * Pointer to Ethernet device. 4730 * @param list 4731 * Pointer to the Indexed flow list. If this parameter NULL, 4732 * there is no flow removal from the list. Be noted that as 4733 * flow is add to the indexed list, memory of the indexed 4734 * list points to maybe changed as flow destroyed. 4735 * @param[in] flow_idx 4736 * Index of flow to destroy. 4737 */ 4738 static void 4739 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 4740 uint32_t flow_idx) 4741 { 4742 struct mlx5_priv *priv = dev->data->dev_private; 4743 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 4744 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 4745 [MLX5_IPOOL_RTE_FLOW], flow_idx); 4746 4747 if (!flow) 4748 return; 4749 /* 4750 * Update RX queue flags only if port is started, otherwise it is 4751 * already clean. 4752 */ 4753 if (dev->data->dev_started) 4754 flow_rxq_flags_trim(dev, flow); 4755 if (flow->hairpin_flow_id) 4756 mlx5_flow_id_release(priv->sh->flow_id_pool, 4757 flow->hairpin_flow_id); 4758 flow_drv_destroy(dev, flow); 4759 if (list) 4760 ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, 4761 flow_idx, flow, next); 4762 flow_mreg_del_copy_action(dev, flow); 4763 if (flow->fdir) { 4764 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 4765 if (priv_fdir_flow->rix_flow == flow_idx) 4766 break; 4767 } 4768 if (priv_fdir_flow) { 4769 LIST_REMOVE(priv_fdir_flow, next); 4770 mlx5_free(priv_fdir_flow->fdir); 4771 mlx5_free(priv_fdir_flow); 4772 } 4773 } 4774 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 4775 } 4776 4777 /** 4778 * Destroy all flows. 4779 * 4780 * @param dev 4781 * Pointer to Ethernet device. 4782 * @param list 4783 * Pointer to the Indexed flow list. 4784 * @param active 4785 * If flushing is called avtively. 4786 */ 4787 void 4788 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active) 4789 { 4790 uint32_t num_flushed = 0; 4791 4792 while (*list) { 4793 flow_list_destroy(dev, list, *list); 4794 num_flushed++; 4795 } 4796 if (active) { 4797 DRV_LOG(INFO, "port %u: %u flows flushed before stopping", 4798 dev->data->port_id, num_flushed); 4799 } 4800 } 4801 4802 /** 4803 * Remove all flows. 4804 * 4805 * @param dev 4806 * Pointer to Ethernet device. 4807 * @param list 4808 * Pointer to the Indexed flow list. 4809 */ 4810 void 4811 mlx5_flow_stop(struct rte_eth_dev *dev, uint32_t *list) 4812 { 4813 struct mlx5_priv *priv = dev->data->dev_private; 4814 struct rte_flow *flow = NULL; 4815 uint32_t idx; 4816 4817 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx, 4818 flow, next) { 4819 flow_drv_remove(dev, flow); 4820 flow_mreg_stop_copy_action(dev, flow); 4821 } 4822 flow_mreg_del_default_copy_action(dev); 4823 flow_rxq_flags_clear(dev); 4824 } 4825 4826 /** 4827 * Add all flows. 4828 * 4829 * @param dev 4830 * Pointer to Ethernet device. 4831 * @param list 4832 * Pointer to the Indexed flow list. 4833 * 4834 * @return 4835 * 0 on success, a negative errno value otherwise and rte_errno is set. 4836 */ 4837 int 4838 mlx5_flow_start(struct rte_eth_dev *dev, uint32_t *list) 4839 { 4840 struct mlx5_priv *priv = dev->data->dev_private; 4841 struct rte_flow *flow = NULL; 4842 struct rte_flow_error error; 4843 uint32_t idx; 4844 int ret = 0; 4845 4846 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 4847 ret = flow_mreg_add_default_copy_action(dev, &error); 4848 if (ret < 0) 4849 return -rte_errno; 4850 /* Apply Flows created by application. */ 4851 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx, 4852 flow, next) { 4853 ret = flow_mreg_start_copy_action(dev, flow); 4854 if (ret < 0) 4855 goto error; 4856 ret = flow_drv_apply(dev, flow, &error); 4857 if (ret < 0) 4858 goto error; 4859 flow_rxq_flags_set(dev, flow); 4860 } 4861 return 0; 4862 error: 4863 ret = rte_errno; /* Save rte_errno before cleanup. */ 4864 mlx5_flow_stop(dev, list); 4865 rte_errno = ret; /* Restore rte_errno. */ 4866 return -rte_errno; 4867 } 4868 4869 /** 4870 * Stop all default actions for flows. 4871 * 4872 * @param dev 4873 * Pointer to Ethernet device. 4874 */ 4875 void 4876 mlx5_flow_stop_default(struct rte_eth_dev *dev) 4877 { 4878 flow_mreg_del_default_copy_action(dev); 4879 flow_rxq_flags_clear(dev); 4880 } 4881 4882 /** 4883 * Start all default actions for flows. 4884 * 4885 * @param dev 4886 * Pointer to Ethernet device. 4887 * @return 4888 * 0 on success, a negative errno value otherwise and rte_errno is set. 4889 */ 4890 int 4891 mlx5_flow_start_default(struct rte_eth_dev *dev) 4892 { 4893 struct rte_flow_error error; 4894 4895 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 4896 return flow_mreg_add_default_copy_action(dev, &error); 4897 } 4898 4899 /** 4900 * Allocate intermediate resources for flow creation. 4901 * 4902 * @param dev 4903 * Pointer to Ethernet device. 4904 */ 4905 void 4906 mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev) 4907 { 4908 struct mlx5_priv *priv = dev->data->dev_private; 4909 4910 if (!priv->inter_flows) { 4911 priv->inter_flows = mlx5_malloc(MLX5_MEM_ZERO, 4912 MLX5_NUM_MAX_DEV_FLOWS * 4913 sizeof(struct mlx5_flow) + 4914 (sizeof(struct mlx5_flow_rss_desc) + 4915 sizeof(uint16_t) * UINT16_MAX) * 2, 0, 4916 SOCKET_ID_ANY); 4917 if (!priv->inter_flows) { 4918 DRV_LOG(ERR, "can't allocate intermediate memory."); 4919 return; 4920 } 4921 } 4922 priv->rss_desc = &((struct mlx5_flow *)priv->inter_flows) 4923 [MLX5_NUM_MAX_DEV_FLOWS]; 4924 /* Reset the index. */ 4925 priv->flow_idx = 0; 4926 priv->flow_nested_idx = 0; 4927 } 4928 4929 /** 4930 * Free intermediate resources for flows. 4931 * 4932 * @param dev 4933 * Pointer to Ethernet device. 4934 */ 4935 void 4936 mlx5_flow_free_intermediate(struct rte_eth_dev *dev) 4937 { 4938 struct mlx5_priv *priv = dev->data->dev_private; 4939 4940 mlx5_free(priv->inter_flows); 4941 priv->inter_flows = NULL; 4942 } 4943 4944 /** 4945 * Verify the flow list is empty 4946 * 4947 * @param dev 4948 * Pointer to Ethernet device. 4949 * 4950 * @return the number of flows not released. 4951 */ 4952 int 4953 mlx5_flow_verify(struct rte_eth_dev *dev) 4954 { 4955 struct mlx5_priv *priv = dev->data->dev_private; 4956 struct rte_flow *flow; 4957 uint32_t idx; 4958 int ret = 0; 4959 4960 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx, 4961 flow, next) { 4962 DRV_LOG(DEBUG, "port %u flow %p still referenced", 4963 dev->data->port_id, (void *)flow); 4964 ++ret; 4965 } 4966 return ret; 4967 } 4968 4969 /** 4970 * Enable default hairpin egress flow. 4971 * 4972 * @param dev 4973 * Pointer to Ethernet device. 4974 * @param queue 4975 * The queue index. 4976 * 4977 * @return 4978 * 0 on success, a negative errno value otherwise and rte_errno is set. 4979 */ 4980 int 4981 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev, 4982 uint32_t queue) 4983 { 4984 struct mlx5_priv *priv = dev->data->dev_private; 4985 const struct rte_flow_attr attr = { 4986 .egress = 1, 4987 .priority = 0, 4988 }; 4989 struct mlx5_rte_flow_item_tx_queue queue_spec = { 4990 .queue = queue, 4991 }; 4992 struct mlx5_rte_flow_item_tx_queue queue_mask = { 4993 .queue = UINT32_MAX, 4994 }; 4995 struct rte_flow_item items[] = { 4996 { 4997 .type = (enum rte_flow_item_type) 4998 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE, 4999 .spec = &queue_spec, 5000 .last = NULL, 5001 .mask = &queue_mask, 5002 }, 5003 { 5004 .type = RTE_FLOW_ITEM_TYPE_END, 5005 }, 5006 }; 5007 struct rte_flow_action_jump jump = { 5008 .group = MLX5_HAIRPIN_TX_TABLE, 5009 }; 5010 struct rte_flow_action actions[2]; 5011 uint32_t flow_idx; 5012 struct rte_flow_error error; 5013 5014 actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP; 5015 actions[0].conf = &jump; 5016 actions[1].type = RTE_FLOW_ACTION_TYPE_END; 5017 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5018 &attr, items, actions, false, &error); 5019 if (!flow_idx) { 5020 DRV_LOG(DEBUG, 5021 "Failed to create ctrl flow: rte_errno(%d)," 5022 " type(%d), message(%s)", 5023 rte_errno, error.type, 5024 error.message ? error.message : " (no stated reason)"); 5025 return -rte_errno; 5026 } 5027 return 0; 5028 } 5029 5030 /** 5031 * Enable a control flow configured from the control plane. 5032 * 5033 * @param dev 5034 * Pointer to Ethernet device. 5035 * @param eth_spec 5036 * An Ethernet flow spec to apply. 5037 * @param eth_mask 5038 * An Ethernet flow mask to apply. 5039 * @param vlan_spec 5040 * A VLAN flow spec to apply. 5041 * @param vlan_mask 5042 * A VLAN flow mask to apply. 5043 * 5044 * @return 5045 * 0 on success, a negative errno value otherwise and rte_errno is set. 5046 */ 5047 int 5048 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, 5049 struct rte_flow_item_eth *eth_spec, 5050 struct rte_flow_item_eth *eth_mask, 5051 struct rte_flow_item_vlan *vlan_spec, 5052 struct rte_flow_item_vlan *vlan_mask) 5053 { 5054 struct mlx5_priv *priv = dev->data->dev_private; 5055 const struct rte_flow_attr attr = { 5056 .ingress = 1, 5057 .priority = MLX5_FLOW_PRIO_RSVD, 5058 }; 5059 struct rte_flow_item items[] = { 5060 { 5061 .type = RTE_FLOW_ITEM_TYPE_ETH, 5062 .spec = eth_spec, 5063 .last = NULL, 5064 .mask = eth_mask, 5065 }, 5066 { 5067 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN : 5068 RTE_FLOW_ITEM_TYPE_END, 5069 .spec = vlan_spec, 5070 .last = NULL, 5071 .mask = vlan_mask, 5072 }, 5073 { 5074 .type = RTE_FLOW_ITEM_TYPE_END, 5075 }, 5076 }; 5077 uint16_t queue[priv->reta_idx_n]; 5078 struct rte_flow_action_rss action_rss = { 5079 .func = RTE_ETH_HASH_FUNCTION_DEFAULT, 5080 .level = 0, 5081 .types = priv->rss_conf.rss_hf, 5082 .key_len = priv->rss_conf.rss_key_len, 5083 .queue_num = priv->reta_idx_n, 5084 .key = priv->rss_conf.rss_key, 5085 .queue = queue, 5086 }; 5087 struct rte_flow_action actions[] = { 5088 { 5089 .type = RTE_FLOW_ACTION_TYPE_RSS, 5090 .conf = &action_rss, 5091 }, 5092 { 5093 .type = RTE_FLOW_ACTION_TYPE_END, 5094 }, 5095 }; 5096 uint32_t flow_idx; 5097 struct rte_flow_error error; 5098 unsigned int i; 5099 5100 if (!priv->reta_idx_n || !priv->rxqs_n) { 5101 return 0; 5102 } 5103 if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)) 5104 action_rss.types = 0; 5105 for (i = 0; i != priv->reta_idx_n; ++i) 5106 queue[i] = (*priv->reta_idx)[i]; 5107 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5108 &attr, items, actions, false, &error); 5109 if (!flow_idx) 5110 return -rte_errno; 5111 return 0; 5112 } 5113 5114 /** 5115 * Enable a flow control configured from the control plane. 5116 * 5117 * @param dev 5118 * Pointer to Ethernet device. 5119 * @param eth_spec 5120 * An Ethernet flow spec to apply. 5121 * @param eth_mask 5122 * An Ethernet flow mask to apply. 5123 * 5124 * @return 5125 * 0 on success, a negative errno value otherwise and rte_errno is set. 5126 */ 5127 int 5128 mlx5_ctrl_flow(struct rte_eth_dev *dev, 5129 struct rte_flow_item_eth *eth_spec, 5130 struct rte_flow_item_eth *eth_mask) 5131 { 5132 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL); 5133 } 5134 5135 /** 5136 * Create default miss flow rule matching lacp traffic 5137 * 5138 * @param dev 5139 * Pointer to Ethernet device. 5140 * @param eth_spec 5141 * An Ethernet flow spec to apply. 5142 * 5143 * @return 5144 * 0 on success, a negative errno value otherwise and rte_errno is set. 5145 */ 5146 int 5147 mlx5_flow_lacp_miss(struct rte_eth_dev *dev) 5148 { 5149 struct mlx5_priv *priv = dev->data->dev_private; 5150 /* 5151 * The LACP matching is done by only using ether type since using 5152 * a multicast dst mac causes kernel to give low priority to this flow. 5153 */ 5154 static const struct rte_flow_item_eth lacp_spec = { 5155 .type = RTE_BE16(0x8809), 5156 }; 5157 static const struct rte_flow_item_eth lacp_mask = { 5158 .type = 0xffff, 5159 }; 5160 const struct rte_flow_attr attr = { 5161 .ingress = 1, 5162 }; 5163 struct rte_flow_item items[] = { 5164 { 5165 .type = RTE_FLOW_ITEM_TYPE_ETH, 5166 .spec = &lacp_spec, 5167 .mask = &lacp_mask, 5168 }, 5169 { 5170 .type = RTE_FLOW_ITEM_TYPE_END, 5171 }, 5172 }; 5173 struct rte_flow_action actions[] = { 5174 { 5175 .type = (enum rte_flow_action_type) 5176 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS, 5177 }, 5178 { 5179 .type = RTE_FLOW_ACTION_TYPE_END, 5180 }, 5181 }; 5182 struct rte_flow_error error; 5183 uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5184 &attr, items, actions, false, &error); 5185 5186 if (!flow_idx) 5187 return -rte_errno; 5188 return 0; 5189 } 5190 5191 /** 5192 * Destroy a flow. 5193 * 5194 * @see rte_flow_destroy() 5195 * @see rte_flow_ops 5196 */ 5197 int 5198 mlx5_flow_destroy(struct rte_eth_dev *dev, 5199 struct rte_flow *flow, 5200 struct rte_flow_error *error __rte_unused) 5201 { 5202 struct mlx5_priv *priv = dev->data->dev_private; 5203 5204 flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow); 5205 return 0; 5206 } 5207 5208 /** 5209 * Destroy all flows. 5210 * 5211 * @see rte_flow_flush() 5212 * @see rte_flow_ops 5213 */ 5214 int 5215 mlx5_flow_flush(struct rte_eth_dev *dev, 5216 struct rte_flow_error *error __rte_unused) 5217 { 5218 struct mlx5_priv *priv = dev->data->dev_private; 5219 5220 mlx5_flow_list_flush(dev, &priv->flows, false); 5221 return 0; 5222 } 5223 5224 /** 5225 * Isolated mode. 5226 * 5227 * @see rte_flow_isolate() 5228 * @see rte_flow_ops 5229 */ 5230 int 5231 mlx5_flow_isolate(struct rte_eth_dev *dev, 5232 int enable, 5233 struct rte_flow_error *error) 5234 { 5235 struct mlx5_priv *priv = dev->data->dev_private; 5236 5237 if (dev->data->dev_started) { 5238 rte_flow_error_set(error, EBUSY, 5239 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5240 NULL, 5241 "port must be stopped first"); 5242 return -rte_errno; 5243 } 5244 priv->isolated = !!enable; 5245 if (enable) 5246 dev->dev_ops = &mlx5_os_dev_ops_isolate; 5247 else 5248 dev->dev_ops = &mlx5_os_dev_ops; 5249 return 0; 5250 } 5251 5252 /** 5253 * Query a flow. 5254 * 5255 * @see rte_flow_query() 5256 * @see rte_flow_ops 5257 */ 5258 static int 5259 flow_drv_query(struct rte_eth_dev *dev, 5260 uint32_t flow_idx, 5261 const struct rte_flow_action *actions, 5262 void *data, 5263 struct rte_flow_error *error) 5264 { 5265 struct mlx5_priv *priv = dev->data->dev_private; 5266 const struct mlx5_flow_driver_ops *fops; 5267 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 5268 [MLX5_IPOOL_RTE_FLOW], 5269 flow_idx); 5270 enum mlx5_flow_drv_type ftype; 5271 5272 if (!flow) { 5273 return rte_flow_error_set(error, ENOENT, 5274 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5275 NULL, 5276 "invalid flow handle"); 5277 } 5278 ftype = flow->drv_type; 5279 MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX); 5280 fops = flow_get_drv_ops(ftype); 5281 5282 return fops->query(dev, flow, actions, data, error); 5283 } 5284 5285 /** 5286 * Query a flow. 5287 * 5288 * @see rte_flow_query() 5289 * @see rte_flow_ops 5290 */ 5291 int 5292 mlx5_flow_query(struct rte_eth_dev *dev, 5293 struct rte_flow *flow, 5294 const struct rte_flow_action *actions, 5295 void *data, 5296 struct rte_flow_error *error) 5297 { 5298 int ret; 5299 5300 ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data, 5301 error); 5302 if (ret < 0) 5303 return ret; 5304 return 0; 5305 } 5306 5307 /** 5308 * Convert a flow director filter to a generic flow. 5309 * 5310 * @param dev 5311 * Pointer to Ethernet device. 5312 * @param fdir_filter 5313 * Flow director filter to add. 5314 * @param attributes 5315 * Generic flow parameters structure. 5316 * 5317 * @return 5318 * 0 on success, a negative errno value otherwise and rte_errno is set. 5319 */ 5320 static int 5321 flow_fdir_filter_convert(struct rte_eth_dev *dev, 5322 const struct rte_eth_fdir_filter *fdir_filter, 5323 struct mlx5_fdir *attributes) 5324 { 5325 struct mlx5_priv *priv = dev->data->dev_private; 5326 const struct rte_eth_fdir_input *input = &fdir_filter->input; 5327 const struct rte_eth_fdir_masks *mask = 5328 &dev->data->dev_conf.fdir_conf.mask; 5329 5330 /* Validate queue number. */ 5331 if (fdir_filter->action.rx_queue >= priv->rxqs_n) { 5332 DRV_LOG(ERR, "port %u invalid queue number %d", 5333 dev->data->port_id, fdir_filter->action.rx_queue); 5334 rte_errno = EINVAL; 5335 return -rte_errno; 5336 } 5337 attributes->attr.ingress = 1; 5338 attributes->items[0] = (struct rte_flow_item) { 5339 .type = RTE_FLOW_ITEM_TYPE_ETH, 5340 .spec = &attributes->l2, 5341 .mask = &attributes->l2_mask, 5342 }; 5343 switch (fdir_filter->action.behavior) { 5344 case RTE_ETH_FDIR_ACCEPT: 5345 attributes->actions[0] = (struct rte_flow_action){ 5346 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 5347 .conf = &attributes->queue, 5348 }; 5349 break; 5350 case RTE_ETH_FDIR_REJECT: 5351 attributes->actions[0] = (struct rte_flow_action){ 5352 .type = RTE_FLOW_ACTION_TYPE_DROP, 5353 }; 5354 break; 5355 default: 5356 DRV_LOG(ERR, "port %u invalid behavior %d", 5357 dev->data->port_id, 5358 fdir_filter->action.behavior); 5359 rte_errno = ENOTSUP; 5360 return -rte_errno; 5361 } 5362 attributes->queue.index = fdir_filter->action.rx_queue; 5363 /* Handle L3. */ 5364 switch (fdir_filter->input.flow_type) { 5365 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 5366 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 5367 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 5368 attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){ 5369 .src_addr = input->flow.ip4_flow.src_ip, 5370 .dst_addr = input->flow.ip4_flow.dst_ip, 5371 .time_to_live = input->flow.ip4_flow.ttl, 5372 .type_of_service = input->flow.ip4_flow.tos, 5373 }; 5374 attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){ 5375 .src_addr = mask->ipv4_mask.src_ip, 5376 .dst_addr = mask->ipv4_mask.dst_ip, 5377 .time_to_live = mask->ipv4_mask.ttl, 5378 .type_of_service = mask->ipv4_mask.tos, 5379 .next_proto_id = mask->ipv4_mask.proto, 5380 }; 5381 attributes->items[1] = (struct rte_flow_item){ 5382 .type = RTE_FLOW_ITEM_TYPE_IPV4, 5383 .spec = &attributes->l3, 5384 .mask = &attributes->l3_mask, 5385 }; 5386 break; 5387 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 5388 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 5389 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 5390 attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){ 5391 .hop_limits = input->flow.ipv6_flow.hop_limits, 5392 .proto = input->flow.ipv6_flow.proto, 5393 }; 5394 5395 memcpy(attributes->l3.ipv6.hdr.src_addr, 5396 input->flow.ipv6_flow.src_ip, 5397 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 5398 memcpy(attributes->l3.ipv6.hdr.dst_addr, 5399 input->flow.ipv6_flow.dst_ip, 5400 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 5401 memcpy(attributes->l3_mask.ipv6.hdr.src_addr, 5402 mask->ipv6_mask.src_ip, 5403 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 5404 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr, 5405 mask->ipv6_mask.dst_ip, 5406 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 5407 attributes->items[1] = (struct rte_flow_item){ 5408 .type = RTE_FLOW_ITEM_TYPE_IPV6, 5409 .spec = &attributes->l3, 5410 .mask = &attributes->l3_mask, 5411 }; 5412 break; 5413 default: 5414 DRV_LOG(ERR, "port %u invalid flow type%d", 5415 dev->data->port_id, fdir_filter->input.flow_type); 5416 rte_errno = ENOTSUP; 5417 return -rte_errno; 5418 } 5419 /* Handle L4. */ 5420 switch (fdir_filter->input.flow_type) { 5421 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 5422 attributes->l4.udp.hdr = (struct rte_udp_hdr){ 5423 .src_port = input->flow.udp4_flow.src_port, 5424 .dst_port = input->flow.udp4_flow.dst_port, 5425 }; 5426 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){ 5427 .src_port = mask->src_port_mask, 5428 .dst_port = mask->dst_port_mask, 5429 }; 5430 attributes->items[2] = (struct rte_flow_item){ 5431 .type = RTE_FLOW_ITEM_TYPE_UDP, 5432 .spec = &attributes->l4, 5433 .mask = &attributes->l4_mask, 5434 }; 5435 break; 5436 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 5437 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){ 5438 .src_port = input->flow.tcp4_flow.src_port, 5439 .dst_port = input->flow.tcp4_flow.dst_port, 5440 }; 5441 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){ 5442 .src_port = mask->src_port_mask, 5443 .dst_port = mask->dst_port_mask, 5444 }; 5445 attributes->items[2] = (struct rte_flow_item){ 5446 .type = RTE_FLOW_ITEM_TYPE_TCP, 5447 .spec = &attributes->l4, 5448 .mask = &attributes->l4_mask, 5449 }; 5450 break; 5451 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 5452 attributes->l4.udp.hdr = (struct rte_udp_hdr){ 5453 .src_port = input->flow.udp6_flow.src_port, 5454 .dst_port = input->flow.udp6_flow.dst_port, 5455 }; 5456 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){ 5457 .src_port = mask->src_port_mask, 5458 .dst_port = mask->dst_port_mask, 5459 }; 5460 attributes->items[2] = (struct rte_flow_item){ 5461 .type = RTE_FLOW_ITEM_TYPE_UDP, 5462 .spec = &attributes->l4, 5463 .mask = &attributes->l4_mask, 5464 }; 5465 break; 5466 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 5467 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){ 5468 .src_port = input->flow.tcp6_flow.src_port, 5469 .dst_port = input->flow.tcp6_flow.dst_port, 5470 }; 5471 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){ 5472 .src_port = mask->src_port_mask, 5473 .dst_port = mask->dst_port_mask, 5474 }; 5475 attributes->items[2] = (struct rte_flow_item){ 5476 .type = RTE_FLOW_ITEM_TYPE_TCP, 5477 .spec = &attributes->l4, 5478 .mask = &attributes->l4_mask, 5479 }; 5480 break; 5481 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 5482 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 5483 break; 5484 default: 5485 DRV_LOG(ERR, "port %u invalid flow type%d", 5486 dev->data->port_id, fdir_filter->input.flow_type); 5487 rte_errno = ENOTSUP; 5488 return -rte_errno; 5489 } 5490 return 0; 5491 } 5492 5493 #define FLOW_FDIR_CMP(f1, f2, fld) \ 5494 memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld)) 5495 5496 /** 5497 * Compare two FDIR flows. If items and actions are identical, the two flows are 5498 * regarded as same. 5499 * 5500 * @param dev 5501 * Pointer to Ethernet device. 5502 * @param f1 5503 * FDIR flow to compare. 5504 * @param f2 5505 * FDIR flow to compare. 5506 * 5507 * @return 5508 * Zero on match, 1 otherwise. 5509 */ 5510 static int 5511 flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2) 5512 { 5513 if (FLOW_FDIR_CMP(f1, f2, attr) || 5514 FLOW_FDIR_CMP(f1, f2, l2) || 5515 FLOW_FDIR_CMP(f1, f2, l2_mask) || 5516 FLOW_FDIR_CMP(f1, f2, l3) || 5517 FLOW_FDIR_CMP(f1, f2, l3_mask) || 5518 FLOW_FDIR_CMP(f1, f2, l4) || 5519 FLOW_FDIR_CMP(f1, f2, l4_mask) || 5520 FLOW_FDIR_CMP(f1, f2, actions[0].type)) 5521 return 1; 5522 if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE && 5523 FLOW_FDIR_CMP(f1, f2, queue)) 5524 return 1; 5525 return 0; 5526 } 5527 5528 /** 5529 * Search device flow list to find out a matched FDIR flow. 5530 * 5531 * @param dev 5532 * Pointer to Ethernet device. 5533 * @param fdir_flow 5534 * FDIR flow to lookup. 5535 * 5536 * @return 5537 * Index of flow if found, 0 otherwise. 5538 */ 5539 static uint32_t 5540 flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow) 5541 { 5542 struct mlx5_priv *priv = dev->data->dev_private; 5543 uint32_t flow_idx = 0; 5544 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5545 5546 MLX5_ASSERT(fdir_flow); 5547 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 5548 if (!flow_fdir_cmp(priv_fdir_flow->fdir, fdir_flow)) { 5549 DRV_LOG(DEBUG, "port %u found FDIR flow %u", 5550 dev->data->port_id, flow_idx); 5551 flow_idx = priv_fdir_flow->rix_flow; 5552 break; 5553 } 5554 } 5555 return flow_idx; 5556 } 5557 5558 /** 5559 * Add new flow director filter and store it in list. 5560 * 5561 * @param dev 5562 * Pointer to Ethernet device. 5563 * @param fdir_filter 5564 * Flow director filter to add. 5565 * 5566 * @return 5567 * 0 on success, a negative errno value otherwise and rte_errno is set. 5568 */ 5569 static int 5570 flow_fdir_filter_add(struct rte_eth_dev *dev, 5571 const struct rte_eth_fdir_filter *fdir_filter) 5572 { 5573 struct mlx5_priv *priv = dev->data->dev_private; 5574 struct mlx5_fdir *fdir_flow; 5575 struct rte_flow *flow; 5576 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5577 uint32_t flow_idx; 5578 int ret; 5579 5580 fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*fdir_flow), 0, 5581 SOCKET_ID_ANY); 5582 if (!fdir_flow) { 5583 rte_errno = ENOMEM; 5584 return -rte_errno; 5585 } 5586 ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow); 5587 if (ret) 5588 goto error; 5589 flow_idx = flow_fdir_filter_lookup(dev, fdir_flow); 5590 if (flow_idx) { 5591 rte_errno = EEXIST; 5592 goto error; 5593 } 5594 priv_fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, 5595 sizeof(struct mlx5_fdir_flow), 5596 0, SOCKET_ID_ANY); 5597 if (!priv_fdir_flow) { 5598 rte_errno = ENOMEM; 5599 goto error; 5600 } 5601 flow_idx = flow_list_create(dev, &priv->flows, &fdir_flow->attr, 5602 fdir_flow->items, fdir_flow->actions, true, 5603 NULL); 5604 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 5605 if (!flow) 5606 goto error; 5607 flow->fdir = 1; 5608 priv_fdir_flow->fdir = fdir_flow; 5609 priv_fdir_flow->rix_flow = flow_idx; 5610 LIST_INSERT_HEAD(&priv->fdir_flows, priv_fdir_flow, next); 5611 DRV_LOG(DEBUG, "port %u created FDIR flow %p", 5612 dev->data->port_id, (void *)flow); 5613 return 0; 5614 error: 5615 mlx5_free(priv_fdir_flow); 5616 mlx5_free(fdir_flow); 5617 return -rte_errno; 5618 } 5619 5620 /** 5621 * Delete specific filter. 5622 * 5623 * @param dev 5624 * Pointer to Ethernet device. 5625 * @param fdir_filter 5626 * Filter to be deleted. 5627 * 5628 * @return 5629 * 0 on success, a negative errno value otherwise and rte_errno is set. 5630 */ 5631 static int 5632 flow_fdir_filter_delete(struct rte_eth_dev *dev, 5633 const struct rte_eth_fdir_filter *fdir_filter) 5634 { 5635 struct mlx5_priv *priv = dev->data->dev_private; 5636 uint32_t flow_idx; 5637 struct mlx5_fdir fdir_flow = { 5638 .attr.group = 0, 5639 }; 5640 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5641 int ret; 5642 5643 ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow); 5644 if (ret) 5645 return -rte_errno; 5646 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 5647 /* Find the fdir in priv list */ 5648 if (!flow_fdir_cmp(priv_fdir_flow->fdir, &fdir_flow)) 5649 break; 5650 } 5651 if (!priv_fdir_flow) 5652 return 0; 5653 LIST_REMOVE(priv_fdir_flow, next); 5654 flow_idx = priv_fdir_flow->rix_flow; 5655 flow_list_destroy(dev, &priv->flows, flow_idx); 5656 mlx5_free(priv_fdir_flow->fdir); 5657 mlx5_free(priv_fdir_flow); 5658 DRV_LOG(DEBUG, "port %u deleted FDIR flow %u", 5659 dev->data->port_id, flow_idx); 5660 return 0; 5661 } 5662 5663 /** 5664 * Update queue for specific filter. 5665 * 5666 * @param dev 5667 * Pointer to Ethernet device. 5668 * @param fdir_filter 5669 * Filter to be updated. 5670 * 5671 * @return 5672 * 0 on success, a negative errno value otherwise and rte_errno is set. 5673 */ 5674 static int 5675 flow_fdir_filter_update(struct rte_eth_dev *dev, 5676 const struct rte_eth_fdir_filter *fdir_filter) 5677 { 5678 int ret; 5679 5680 ret = flow_fdir_filter_delete(dev, fdir_filter); 5681 if (ret) 5682 return ret; 5683 return flow_fdir_filter_add(dev, fdir_filter); 5684 } 5685 5686 /** 5687 * Flush all filters. 5688 * 5689 * @param dev 5690 * Pointer to Ethernet device. 5691 */ 5692 static void 5693 flow_fdir_filter_flush(struct rte_eth_dev *dev) 5694 { 5695 struct mlx5_priv *priv = dev->data->dev_private; 5696 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5697 5698 while (!LIST_EMPTY(&priv->fdir_flows)) { 5699 priv_fdir_flow = LIST_FIRST(&priv->fdir_flows); 5700 LIST_REMOVE(priv_fdir_flow, next); 5701 flow_list_destroy(dev, &priv->flows, priv_fdir_flow->rix_flow); 5702 mlx5_free(priv_fdir_flow->fdir); 5703 mlx5_free(priv_fdir_flow); 5704 } 5705 } 5706 5707 /** 5708 * Get flow director information. 5709 * 5710 * @param dev 5711 * Pointer to Ethernet device. 5712 * @param[out] fdir_info 5713 * Resulting flow director information. 5714 */ 5715 static void 5716 flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) 5717 { 5718 struct rte_eth_fdir_masks *mask = 5719 &dev->data->dev_conf.fdir_conf.mask; 5720 5721 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode; 5722 fdir_info->guarant_spc = 0; 5723 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask)); 5724 fdir_info->max_flexpayload = 0; 5725 fdir_info->flow_types_mask[0] = 0; 5726 fdir_info->flex_payload_unit = 0; 5727 fdir_info->max_flex_payload_segment_num = 0; 5728 fdir_info->flex_payload_limit = 0; 5729 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf)); 5730 } 5731 5732 /** 5733 * Deal with flow director operations. 5734 * 5735 * @param dev 5736 * Pointer to Ethernet device. 5737 * @param filter_op 5738 * Operation to perform. 5739 * @param arg 5740 * Pointer to operation-specific structure. 5741 * 5742 * @return 5743 * 0 on success, a negative errno value otherwise and rte_errno is set. 5744 */ 5745 static int 5746 flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, 5747 void *arg) 5748 { 5749 enum rte_fdir_mode fdir_mode = 5750 dev->data->dev_conf.fdir_conf.mode; 5751 5752 if (filter_op == RTE_ETH_FILTER_NOP) 5753 return 0; 5754 if (fdir_mode != RTE_FDIR_MODE_PERFECT && 5755 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) { 5756 DRV_LOG(ERR, "port %u flow director mode %d not supported", 5757 dev->data->port_id, fdir_mode); 5758 rte_errno = EINVAL; 5759 return -rte_errno; 5760 } 5761 switch (filter_op) { 5762 case RTE_ETH_FILTER_ADD: 5763 return flow_fdir_filter_add(dev, arg); 5764 case RTE_ETH_FILTER_UPDATE: 5765 return flow_fdir_filter_update(dev, arg); 5766 case RTE_ETH_FILTER_DELETE: 5767 return flow_fdir_filter_delete(dev, arg); 5768 case RTE_ETH_FILTER_FLUSH: 5769 flow_fdir_filter_flush(dev); 5770 break; 5771 case RTE_ETH_FILTER_INFO: 5772 flow_fdir_info_get(dev, arg); 5773 break; 5774 default: 5775 DRV_LOG(DEBUG, "port %u unknown operation %u", 5776 dev->data->port_id, filter_op); 5777 rte_errno = EINVAL; 5778 return -rte_errno; 5779 } 5780 return 0; 5781 } 5782 5783 /** 5784 * Manage filter operations. 5785 * 5786 * @param dev 5787 * Pointer to Ethernet device structure. 5788 * @param filter_type 5789 * Filter type. 5790 * @param filter_op 5791 * Operation to perform. 5792 * @param arg 5793 * Pointer to operation-specific structure. 5794 * 5795 * @return 5796 * 0 on success, a negative errno value otherwise and rte_errno is set. 5797 */ 5798 int 5799 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, 5800 enum rte_filter_type filter_type, 5801 enum rte_filter_op filter_op, 5802 void *arg) 5803 { 5804 switch (filter_type) { 5805 case RTE_ETH_FILTER_GENERIC: 5806 if (filter_op != RTE_ETH_FILTER_GET) { 5807 rte_errno = EINVAL; 5808 return -rte_errno; 5809 } 5810 *(const void **)arg = &mlx5_flow_ops; 5811 return 0; 5812 case RTE_ETH_FILTER_FDIR: 5813 return flow_fdir_ctrl_func(dev, filter_op, arg); 5814 default: 5815 DRV_LOG(ERR, "port %u filter type (%d) not supported", 5816 dev->data->port_id, filter_type); 5817 rte_errno = ENOTSUP; 5818 return -rte_errno; 5819 } 5820 return 0; 5821 } 5822 5823 /** 5824 * Create the needed meter and suffix tables. 5825 * 5826 * @param[in] dev 5827 * Pointer to Ethernet device. 5828 * @param[in] fm 5829 * Pointer to the flow meter. 5830 * 5831 * @return 5832 * Pointer to table set on success, NULL otherwise. 5833 */ 5834 struct mlx5_meter_domains_infos * 5835 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev, 5836 const struct mlx5_flow_meter *fm) 5837 { 5838 const struct mlx5_flow_driver_ops *fops; 5839 5840 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5841 return fops->create_mtr_tbls(dev, fm); 5842 } 5843 5844 /** 5845 * Destroy the meter table set. 5846 * 5847 * @param[in] dev 5848 * Pointer to Ethernet device. 5849 * @param[in] tbl 5850 * Pointer to the meter table set. 5851 * 5852 * @return 5853 * 0 on success. 5854 */ 5855 int 5856 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev, 5857 struct mlx5_meter_domains_infos *tbls) 5858 { 5859 const struct mlx5_flow_driver_ops *fops; 5860 5861 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5862 return fops->destroy_mtr_tbls(dev, tbls); 5863 } 5864 5865 /** 5866 * Create policer rules. 5867 * 5868 * @param[in] dev 5869 * Pointer to Ethernet device. 5870 * @param[in] fm 5871 * Pointer to flow meter structure. 5872 * @param[in] attr 5873 * Pointer to flow attributes. 5874 * 5875 * @return 5876 * 0 on success, -1 otherwise. 5877 */ 5878 int 5879 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev, 5880 struct mlx5_flow_meter *fm, 5881 const struct rte_flow_attr *attr) 5882 { 5883 const struct mlx5_flow_driver_ops *fops; 5884 5885 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5886 return fops->create_policer_rules(dev, fm, attr); 5887 } 5888 5889 /** 5890 * Destroy policer rules. 5891 * 5892 * @param[in] fm 5893 * Pointer to flow meter structure. 5894 * @param[in] attr 5895 * Pointer to flow attributes. 5896 * 5897 * @return 5898 * 0 on success, -1 otherwise. 5899 */ 5900 int 5901 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev, 5902 struct mlx5_flow_meter *fm, 5903 const struct rte_flow_attr *attr) 5904 { 5905 const struct mlx5_flow_driver_ops *fops; 5906 5907 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5908 return fops->destroy_policer_rules(dev, fm, attr); 5909 } 5910 5911 /** 5912 * Allocate a counter. 5913 * 5914 * @param[in] dev 5915 * Pointer to Ethernet device structure. 5916 * 5917 * @return 5918 * Index to allocated counter on success, 0 otherwise. 5919 */ 5920 uint32_t 5921 mlx5_counter_alloc(struct rte_eth_dev *dev) 5922 { 5923 const struct mlx5_flow_driver_ops *fops; 5924 struct rte_flow_attr attr = { .transfer = 0 }; 5925 5926 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 5927 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5928 return fops->counter_alloc(dev); 5929 } 5930 DRV_LOG(ERR, 5931 "port %u counter allocate is not supported.", 5932 dev->data->port_id); 5933 return 0; 5934 } 5935 5936 /** 5937 * Free a counter. 5938 * 5939 * @param[in] dev 5940 * Pointer to Ethernet device structure. 5941 * @param[in] cnt 5942 * Index to counter to be free. 5943 */ 5944 void 5945 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt) 5946 { 5947 const struct mlx5_flow_driver_ops *fops; 5948 struct rte_flow_attr attr = { .transfer = 0 }; 5949 5950 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 5951 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5952 fops->counter_free(dev, cnt); 5953 return; 5954 } 5955 DRV_LOG(ERR, 5956 "port %u counter free is not supported.", 5957 dev->data->port_id); 5958 } 5959 5960 /** 5961 * Query counter statistics. 5962 * 5963 * @param[in] dev 5964 * Pointer to Ethernet device structure. 5965 * @param[in] cnt 5966 * Index to counter to query. 5967 * @param[in] clear 5968 * Set to clear counter statistics. 5969 * @param[out] pkts 5970 * The counter hits packets number to save. 5971 * @param[out] bytes 5972 * The counter hits bytes number to save. 5973 * 5974 * @return 5975 * 0 on success, a negative errno value otherwise. 5976 */ 5977 int 5978 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt, 5979 bool clear, uint64_t *pkts, uint64_t *bytes) 5980 { 5981 const struct mlx5_flow_driver_ops *fops; 5982 struct rte_flow_attr attr = { .transfer = 0 }; 5983 5984 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 5985 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5986 return fops->counter_query(dev, cnt, clear, pkts, bytes); 5987 } 5988 DRV_LOG(ERR, 5989 "port %u counter query is not supported.", 5990 dev->data->port_id); 5991 return -ENOTSUP; 5992 } 5993 5994 #define MLX5_POOL_QUERY_FREQ_US 1000000 5995 5996 /** 5997 * Get number of all validate pools. 5998 * 5999 * @param[in] sh 6000 * Pointer to mlx5_dev_ctx_shared object. 6001 * 6002 * @return 6003 * The number of all validate pools. 6004 */ 6005 static uint32_t 6006 mlx5_get_all_valid_pool_count(struct mlx5_dev_ctx_shared *sh) 6007 { 6008 int i; 6009 uint32_t pools_n = 0; 6010 6011 for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) 6012 pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid); 6013 return pools_n; 6014 } 6015 6016 /** 6017 * Set the periodic procedure for triggering asynchronous batch queries for all 6018 * the counter pools. 6019 * 6020 * @param[in] sh 6021 * Pointer to mlx5_dev_ctx_shared object. 6022 */ 6023 void 6024 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh) 6025 { 6026 uint32_t pools_n, us; 6027 6028 pools_n = mlx5_get_all_valid_pool_count(sh); 6029 us = MLX5_POOL_QUERY_FREQ_US / pools_n; 6030 DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us); 6031 if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) { 6032 sh->cmng.query_thread_on = 0; 6033 DRV_LOG(ERR, "Cannot reinitialize query alarm"); 6034 } else { 6035 sh->cmng.query_thread_on = 1; 6036 } 6037 } 6038 6039 /** 6040 * The periodic procedure for triggering asynchronous batch queries for all the 6041 * counter pools. This function is probably called by the host thread. 6042 * 6043 * @param[in] arg 6044 * The parameter for the alarm process. 6045 */ 6046 void 6047 mlx5_flow_query_alarm(void *arg) 6048 { 6049 struct mlx5_dev_ctx_shared *sh = arg; 6050 struct mlx5_devx_obj *dcs; 6051 uint16_t offset; 6052 int ret; 6053 uint8_t batch = sh->cmng.batch; 6054 uint8_t age = sh->cmng.age; 6055 uint16_t pool_index = sh->cmng.pool_index; 6056 struct mlx5_pools_container *cont; 6057 struct mlx5_flow_counter_pool *pool; 6058 int cont_loop = MLX5_CCONT_TYPE_MAX; 6059 6060 if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES) 6061 goto set_alarm; 6062 next_container: 6063 cont = MLX5_CNT_CONTAINER(sh, batch, age); 6064 rte_spinlock_lock(&cont->resize_sl); 6065 if (!cont->pools) { 6066 rte_spinlock_unlock(&cont->resize_sl); 6067 /* Check if all the containers are empty. */ 6068 if (unlikely(--cont_loop == 0)) 6069 goto set_alarm; 6070 batch ^= 0x1; 6071 pool_index = 0; 6072 if (batch == 0 && pool_index == 0) { 6073 age ^= 0x1; 6074 sh->cmng.batch = batch; 6075 sh->cmng.age = age; 6076 } 6077 goto next_container; 6078 } 6079 pool = cont->pools[pool_index]; 6080 rte_spinlock_unlock(&cont->resize_sl); 6081 if (pool->raw_hw) 6082 /* There is a pool query in progress. */ 6083 goto set_alarm; 6084 pool->raw_hw = 6085 LIST_FIRST(&sh->cmng.free_stat_raws); 6086 if (!pool->raw_hw) 6087 /* No free counter statistics raw memory. */ 6088 goto set_alarm; 6089 dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read 6090 (&pool->a64_dcs); 6091 offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL; 6092 /* 6093 * Identify the counters released between query trigger and query 6094 * handle more effiecntly. The counter released in this gap period 6095 * should wait for a new round of query as the new arrived packets 6096 * will not be taken into account. 6097 */ 6098 pool->query_gen++; 6099 ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL - 6100 offset, NULL, NULL, 6101 pool->raw_hw->mem_mng->dm->id, 6102 (void *)(uintptr_t) 6103 (pool->raw_hw->data + offset), 6104 sh->devx_comp, 6105 (uint64_t)(uintptr_t)pool); 6106 if (ret) { 6107 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID" 6108 " %d", pool->min_dcs->id); 6109 pool->raw_hw = NULL; 6110 goto set_alarm; 6111 } 6112 pool->raw_hw->min_dcs_id = dcs->id; 6113 LIST_REMOVE(pool->raw_hw, next); 6114 sh->cmng.pending_queries++; 6115 pool_index++; 6116 if (pool_index >= rte_atomic16_read(&cont->n_valid)) { 6117 batch ^= 0x1; 6118 pool_index = 0; 6119 if (batch == 0 && pool_index == 0) 6120 age ^= 0x1; 6121 } 6122 set_alarm: 6123 sh->cmng.batch = batch; 6124 sh->cmng.pool_index = pool_index; 6125 sh->cmng.age = age; 6126 mlx5_set_query_alarm(sh); 6127 } 6128 6129 /** 6130 * Check and callback event for new aged flow in the counter pool 6131 * 6132 * @param[in] sh 6133 * Pointer to mlx5_dev_ctx_shared object. 6134 * @param[in] pool 6135 * Pointer to Current counter pool. 6136 */ 6137 static void 6138 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh, 6139 struct mlx5_flow_counter_pool *pool) 6140 { 6141 struct mlx5_priv *priv; 6142 struct mlx5_flow_counter *cnt; 6143 struct mlx5_age_info *age_info; 6144 struct mlx5_age_param *age_param; 6145 struct mlx5_counter_stats_raw *cur = pool->raw_hw; 6146 struct mlx5_counter_stats_raw *prev = pool->raw; 6147 uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10); 6148 uint32_t i; 6149 6150 for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) { 6151 cnt = MLX5_POOL_GET_CNT(pool, i); 6152 age_param = MLX5_CNT_TO_AGE(cnt); 6153 if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE) 6154 continue; 6155 if (cur->data[i].hits != prev->data[i].hits) { 6156 age_param->expire = curr + age_param->timeout; 6157 continue; 6158 } 6159 if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2)) 6160 continue; 6161 /** 6162 * Hold the lock first, or if between the 6163 * state AGE_TMOUT and tailq operation the 6164 * release happened, the release procedure 6165 * may delete a non-existent tailq node. 6166 */ 6167 priv = rte_eth_devices[age_param->port_id].data->dev_private; 6168 age_info = GET_PORT_AGE_INFO(priv); 6169 rte_spinlock_lock(&age_info->aged_sl); 6170 /* If the cpmset fails, release happens. */ 6171 if (rte_atomic16_cmpset((volatile uint16_t *) 6172 &age_param->state, 6173 AGE_CANDIDATE, 6174 AGE_TMOUT) == 6175 AGE_CANDIDATE) { 6176 TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next); 6177 MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW); 6178 } 6179 rte_spinlock_unlock(&age_info->aged_sl); 6180 } 6181 for (i = 0; i < sh->max_port; i++) { 6182 age_info = &sh->port[i].age_info; 6183 if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW)) 6184 continue; 6185 if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) 6186 _rte_eth_dev_callback_process 6187 (&rte_eth_devices[sh->port[i].devx_ih_port_id], 6188 RTE_ETH_EVENT_FLOW_AGED, NULL); 6189 age_info->flags = 0; 6190 } 6191 } 6192 6193 /** 6194 * Handler for the HW respond about ready values from an asynchronous batch 6195 * query. This function is probably called by the host thread. 6196 * 6197 * @param[in] sh 6198 * The pointer to the shared device context. 6199 * @param[in] async_id 6200 * The Devx async ID. 6201 * @param[in] status 6202 * The status of the completion. 6203 */ 6204 void 6205 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh, 6206 uint64_t async_id, int status) 6207 { 6208 struct mlx5_flow_counter_pool *pool = 6209 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id; 6210 struct mlx5_counter_stats_raw *raw_to_free; 6211 uint8_t age = !!IS_AGE_POOL(pool); 6212 uint8_t query_gen = pool->query_gen ^ 1; 6213 struct mlx5_pools_container *cont = 6214 MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool), age); 6215 6216 if (unlikely(status)) { 6217 raw_to_free = pool->raw_hw; 6218 } else { 6219 raw_to_free = pool->raw; 6220 if (IS_AGE_POOL(pool)) 6221 mlx5_flow_aging_check(sh, pool); 6222 rte_spinlock_lock(&pool->sl); 6223 pool->raw = pool->raw_hw; 6224 rte_spinlock_unlock(&pool->sl); 6225 /* Be sure the new raw counters data is updated in memory. */ 6226 rte_cio_wmb(); 6227 if (!TAILQ_EMPTY(&pool->counters[query_gen])) { 6228 rte_spinlock_lock(&cont->csl); 6229 TAILQ_CONCAT(&cont->counters, 6230 &pool->counters[query_gen], next); 6231 rte_spinlock_unlock(&cont->csl); 6232 } 6233 } 6234 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next); 6235 pool->raw_hw = NULL; 6236 sh->cmng.pending_queries--; 6237 } 6238 6239 /** 6240 * Translate the rte_flow group index to HW table value. 6241 * 6242 * @param[in] attributes 6243 * Pointer to flow attributes 6244 * @param[in] external 6245 * Value is part of flow rule created by request external to PMD. 6246 * @param[in] group 6247 * rte_flow group index value. 6248 * @param[out] fdb_def_rule 6249 * Whether fdb jump to table 1 is configured. 6250 * @param[out] table 6251 * HW table value. 6252 * @param[out] error 6253 * Pointer to error structure. 6254 * 6255 * @return 6256 * 0 on success, a negative errno value otherwise and rte_errno is set. 6257 */ 6258 int 6259 mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external, 6260 uint32_t group, bool fdb_def_rule, uint32_t *table, 6261 struct rte_flow_error *error) 6262 { 6263 if (attributes->transfer && external && fdb_def_rule) { 6264 if (group == UINT32_MAX) 6265 return rte_flow_error_set 6266 (error, EINVAL, 6267 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 6268 NULL, 6269 "group index not supported"); 6270 *table = group + 1; 6271 } else { 6272 *table = group; 6273 } 6274 return 0; 6275 } 6276 6277 /** 6278 * Discover availability of metadata reg_c's. 6279 * 6280 * Iteratively use test flows to check availability. 6281 * 6282 * @param[in] dev 6283 * Pointer to the Ethernet device structure. 6284 * 6285 * @return 6286 * 0 on success, a negative errno value otherwise and rte_errno is set. 6287 */ 6288 int 6289 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev) 6290 { 6291 struct mlx5_priv *priv = dev->data->dev_private; 6292 struct mlx5_dev_config *config = &priv->config; 6293 enum modify_reg idx; 6294 int n = 0; 6295 6296 /* reg_c[0] and reg_c[1] are reserved. */ 6297 config->flow_mreg_c[n++] = REG_C_0; 6298 config->flow_mreg_c[n++] = REG_C_1; 6299 /* Discover availability of other reg_c's. */ 6300 for (idx = REG_C_2; idx <= REG_C_7; ++idx) { 6301 struct rte_flow_attr attr = { 6302 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 6303 .priority = MLX5_FLOW_PRIO_RSVD, 6304 .ingress = 1, 6305 }; 6306 struct rte_flow_item items[] = { 6307 [0] = { 6308 .type = RTE_FLOW_ITEM_TYPE_END, 6309 }, 6310 }; 6311 struct rte_flow_action actions[] = { 6312 [0] = { 6313 .type = (enum rte_flow_action_type) 6314 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 6315 .conf = &(struct mlx5_flow_action_copy_mreg){ 6316 .src = REG_C_1, 6317 .dst = idx, 6318 }, 6319 }, 6320 [1] = { 6321 .type = RTE_FLOW_ACTION_TYPE_JUMP, 6322 .conf = &(struct rte_flow_action_jump){ 6323 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 6324 }, 6325 }, 6326 [2] = { 6327 .type = RTE_FLOW_ACTION_TYPE_END, 6328 }, 6329 }; 6330 uint32_t flow_idx; 6331 struct rte_flow *flow; 6332 struct rte_flow_error error; 6333 6334 if (!config->dv_flow_en) 6335 break; 6336 /* Create internal flow, validation skips copy action. */ 6337 flow_idx = flow_list_create(dev, NULL, &attr, items, 6338 actions, false, &error); 6339 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 6340 flow_idx); 6341 if (!flow) 6342 continue; 6343 if (dev->data->dev_started || !flow_drv_apply(dev, flow, NULL)) 6344 config->flow_mreg_c[n++] = idx; 6345 flow_list_destroy(dev, NULL, flow_idx); 6346 } 6347 for (; n < MLX5_MREG_C_NUM; ++n) 6348 config->flow_mreg_c[n] = REG_NONE; 6349 return 0; 6350 } 6351 6352 /** 6353 * Dump flow raw hw data to file 6354 * 6355 * @param[in] dev 6356 * The pointer to Ethernet device. 6357 * @param[in] file 6358 * A pointer to a file for output. 6359 * @param[out] error 6360 * Perform verbose error reporting if not NULL. PMDs initialize this 6361 * structure in case of error only. 6362 * @return 6363 * 0 on success, a nagative value otherwise. 6364 */ 6365 int 6366 mlx5_flow_dev_dump(struct rte_eth_dev *dev, 6367 FILE *file, 6368 struct rte_flow_error *error __rte_unused) 6369 { 6370 struct mlx5_priv *priv = dev->data->dev_private; 6371 struct mlx5_dev_ctx_shared *sh = priv->sh; 6372 6373 return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain, 6374 sh->tx_domain, file); 6375 } 6376 6377 /** 6378 * Get aged-out flows. 6379 * 6380 * @param[in] dev 6381 * Pointer to the Ethernet device structure. 6382 * @param[in] context 6383 * The address of an array of pointers to the aged-out flows contexts. 6384 * @param[in] nb_countexts 6385 * The length of context array pointers. 6386 * @param[out] error 6387 * Perform verbose error reporting if not NULL. Initialized in case of 6388 * error only. 6389 * 6390 * @return 6391 * how many contexts get in success, otherwise negative errno value. 6392 * if nb_contexts is 0, return the amount of all aged contexts. 6393 * if nb_contexts is not 0 , return the amount of aged flows reported 6394 * in the context array. 6395 */ 6396 int 6397 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts, 6398 uint32_t nb_contexts, struct rte_flow_error *error) 6399 { 6400 const struct mlx5_flow_driver_ops *fops; 6401 struct rte_flow_attr attr = { .transfer = 0 }; 6402 6403 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6404 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6405 return fops->get_aged_flows(dev, contexts, nb_contexts, 6406 error); 6407 } 6408 DRV_LOG(ERR, 6409 "port %u get aged flows is not supported.", 6410 dev->data->port_id); 6411 return -ENOTSUP; 6412 } 6413