1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2016 Mellanox Technologies, Ltd 4 */ 5 6 #include <netinet/in.h> 7 #include <sys/queue.h> 8 #include <stdalign.h> 9 #include <stdint.h> 10 #include <string.h> 11 #include <stdbool.h> 12 13 #include <rte_common.h> 14 #include <rte_ether.h> 15 #include <rte_ethdev_driver.h> 16 #include <rte_flow.h> 17 #include <rte_cycles.h> 18 #include <rte_flow_driver.h> 19 #include <rte_malloc.h> 20 #include <rte_ip.h> 21 22 #include <mlx5_glue.h> 23 #include <mlx5_devx_cmds.h> 24 #include <mlx5_prm.h> 25 #include <mlx5_malloc.h> 26 27 #include "mlx5_defs.h" 28 #include "mlx5.h" 29 #include "mlx5_flow.h" 30 #include "mlx5_flow_os.h" 31 #include "mlx5_rxtx.h" 32 33 /** Device flow drivers. */ 34 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops; 35 36 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops; 37 38 const struct mlx5_flow_driver_ops *flow_drv_ops[] = { 39 [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops, 40 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 41 [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops, 42 #endif 43 [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops, 44 [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops 45 }; 46 47 enum mlx5_expansion { 48 MLX5_EXPANSION_ROOT, 49 MLX5_EXPANSION_ROOT_OUTER, 50 MLX5_EXPANSION_ROOT_ETH_VLAN, 51 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN, 52 MLX5_EXPANSION_OUTER_ETH, 53 MLX5_EXPANSION_OUTER_ETH_VLAN, 54 MLX5_EXPANSION_OUTER_VLAN, 55 MLX5_EXPANSION_OUTER_IPV4, 56 MLX5_EXPANSION_OUTER_IPV4_UDP, 57 MLX5_EXPANSION_OUTER_IPV4_TCP, 58 MLX5_EXPANSION_OUTER_IPV6, 59 MLX5_EXPANSION_OUTER_IPV6_UDP, 60 MLX5_EXPANSION_OUTER_IPV6_TCP, 61 MLX5_EXPANSION_VXLAN, 62 MLX5_EXPANSION_VXLAN_GPE, 63 MLX5_EXPANSION_GRE, 64 MLX5_EXPANSION_MPLS, 65 MLX5_EXPANSION_ETH, 66 MLX5_EXPANSION_ETH_VLAN, 67 MLX5_EXPANSION_VLAN, 68 MLX5_EXPANSION_IPV4, 69 MLX5_EXPANSION_IPV4_UDP, 70 MLX5_EXPANSION_IPV4_TCP, 71 MLX5_EXPANSION_IPV6, 72 MLX5_EXPANSION_IPV6_UDP, 73 MLX5_EXPANSION_IPV6_TCP, 74 }; 75 76 /** Supported expansion of items. */ 77 static const struct rte_flow_expand_node mlx5_support_expansion[] = { 78 [MLX5_EXPANSION_ROOT] = { 79 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 80 MLX5_EXPANSION_IPV4, 81 MLX5_EXPANSION_IPV6), 82 .type = RTE_FLOW_ITEM_TYPE_END, 83 }, 84 [MLX5_EXPANSION_ROOT_OUTER] = { 85 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH, 86 MLX5_EXPANSION_OUTER_IPV4, 87 MLX5_EXPANSION_OUTER_IPV6), 88 .type = RTE_FLOW_ITEM_TYPE_END, 89 }, 90 [MLX5_EXPANSION_ROOT_ETH_VLAN] = { 91 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN), 92 .type = RTE_FLOW_ITEM_TYPE_END, 93 }, 94 [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = { 95 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH_VLAN), 96 .type = RTE_FLOW_ITEM_TYPE_END, 97 }, 98 [MLX5_EXPANSION_OUTER_ETH] = { 99 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 100 MLX5_EXPANSION_OUTER_IPV6, 101 MLX5_EXPANSION_MPLS), 102 .type = RTE_FLOW_ITEM_TYPE_ETH, 103 .rss_types = 0, 104 }, 105 [MLX5_EXPANSION_OUTER_ETH_VLAN] = { 106 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN), 107 .type = RTE_FLOW_ITEM_TYPE_ETH, 108 .rss_types = 0, 109 }, 110 [MLX5_EXPANSION_OUTER_VLAN] = { 111 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, 112 MLX5_EXPANSION_OUTER_IPV6), 113 .type = RTE_FLOW_ITEM_TYPE_VLAN, 114 }, 115 [MLX5_EXPANSION_OUTER_IPV4] = { 116 .next = RTE_FLOW_EXPAND_RSS_NEXT 117 (MLX5_EXPANSION_OUTER_IPV4_UDP, 118 MLX5_EXPANSION_OUTER_IPV4_TCP, 119 MLX5_EXPANSION_GRE, 120 MLX5_EXPANSION_IPV4, 121 MLX5_EXPANSION_IPV6), 122 .type = RTE_FLOW_ITEM_TYPE_IPV4, 123 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 124 ETH_RSS_NONFRAG_IPV4_OTHER, 125 }, 126 [MLX5_EXPANSION_OUTER_IPV4_UDP] = { 127 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 128 MLX5_EXPANSION_VXLAN_GPE), 129 .type = RTE_FLOW_ITEM_TYPE_UDP, 130 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 131 }, 132 [MLX5_EXPANSION_OUTER_IPV4_TCP] = { 133 .type = RTE_FLOW_ITEM_TYPE_TCP, 134 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 135 }, 136 [MLX5_EXPANSION_OUTER_IPV6] = { 137 .next = RTE_FLOW_EXPAND_RSS_NEXT 138 (MLX5_EXPANSION_OUTER_IPV6_UDP, 139 MLX5_EXPANSION_OUTER_IPV6_TCP, 140 MLX5_EXPANSION_IPV4, 141 MLX5_EXPANSION_IPV6), 142 .type = RTE_FLOW_ITEM_TYPE_IPV6, 143 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 144 ETH_RSS_NONFRAG_IPV6_OTHER, 145 }, 146 [MLX5_EXPANSION_OUTER_IPV6_UDP] = { 147 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, 148 MLX5_EXPANSION_VXLAN_GPE), 149 .type = RTE_FLOW_ITEM_TYPE_UDP, 150 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 151 }, 152 [MLX5_EXPANSION_OUTER_IPV6_TCP] = { 153 .type = RTE_FLOW_ITEM_TYPE_TCP, 154 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 155 }, 156 [MLX5_EXPANSION_VXLAN] = { 157 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 158 MLX5_EXPANSION_IPV4, 159 MLX5_EXPANSION_IPV6), 160 .type = RTE_FLOW_ITEM_TYPE_VXLAN, 161 }, 162 [MLX5_EXPANSION_VXLAN_GPE] = { 163 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, 164 MLX5_EXPANSION_IPV4, 165 MLX5_EXPANSION_IPV6), 166 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE, 167 }, 168 [MLX5_EXPANSION_GRE] = { 169 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4), 170 .type = RTE_FLOW_ITEM_TYPE_GRE, 171 }, 172 [MLX5_EXPANSION_MPLS] = { 173 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 174 MLX5_EXPANSION_IPV6), 175 .type = RTE_FLOW_ITEM_TYPE_MPLS, 176 }, 177 [MLX5_EXPANSION_ETH] = { 178 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 179 MLX5_EXPANSION_IPV6), 180 .type = RTE_FLOW_ITEM_TYPE_ETH, 181 }, 182 [MLX5_EXPANSION_ETH_VLAN] = { 183 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN), 184 .type = RTE_FLOW_ITEM_TYPE_ETH, 185 }, 186 [MLX5_EXPANSION_VLAN] = { 187 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, 188 MLX5_EXPANSION_IPV6), 189 .type = RTE_FLOW_ITEM_TYPE_VLAN, 190 }, 191 [MLX5_EXPANSION_IPV4] = { 192 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP, 193 MLX5_EXPANSION_IPV4_TCP), 194 .type = RTE_FLOW_ITEM_TYPE_IPV4, 195 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | 196 ETH_RSS_NONFRAG_IPV4_OTHER, 197 }, 198 [MLX5_EXPANSION_IPV4_UDP] = { 199 .type = RTE_FLOW_ITEM_TYPE_UDP, 200 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, 201 }, 202 [MLX5_EXPANSION_IPV4_TCP] = { 203 .type = RTE_FLOW_ITEM_TYPE_TCP, 204 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, 205 }, 206 [MLX5_EXPANSION_IPV6] = { 207 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP, 208 MLX5_EXPANSION_IPV6_TCP), 209 .type = RTE_FLOW_ITEM_TYPE_IPV6, 210 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | 211 ETH_RSS_NONFRAG_IPV6_OTHER, 212 }, 213 [MLX5_EXPANSION_IPV6_UDP] = { 214 .type = RTE_FLOW_ITEM_TYPE_UDP, 215 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, 216 }, 217 [MLX5_EXPANSION_IPV6_TCP] = { 218 .type = RTE_FLOW_ITEM_TYPE_TCP, 219 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, 220 }, 221 }; 222 223 static const struct rte_flow_ops mlx5_flow_ops = { 224 .validate = mlx5_flow_validate, 225 .create = mlx5_flow_create, 226 .destroy = mlx5_flow_destroy, 227 .flush = mlx5_flow_flush, 228 .isolate = mlx5_flow_isolate, 229 .query = mlx5_flow_query, 230 .dev_dump = mlx5_flow_dev_dump, 231 .get_aged_flows = mlx5_flow_get_aged_flows, 232 }; 233 234 /* Convert FDIR request to Generic flow. */ 235 struct mlx5_fdir { 236 struct rte_flow_attr attr; 237 struct rte_flow_item items[4]; 238 struct rte_flow_item_eth l2; 239 struct rte_flow_item_eth l2_mask; 240 union { 241 struct rte_flow_item_ipv4 ipv4; 242 struct rte_flow_item_ipv6 ipv6; 243 } l3; 244 union { 245 struct rte_flow_item_ipv4 ipv4; 246 struct rte_flow_item_ipv6 ipv6; 247 } l3_mask; 248 union { 249 struct rte_flow_item_udp udp; 250 struct rte_flow_item_tcp tcp; 251 } l4; 252 union { 253 struct rte_flow_item_udp udp; 254 struct rte_flow_item_tcp tcp; 255 } l4_mask; 256 struct rte_flow_action actions[2]; 257 struct rte_flow_action_queue queue; 258 }; 259 260 /* Tunnel information. */ 261 struct mlx5_flow_tunnel_info { 262 uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */ 263 uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */ 264 }; 265 266 static struct mlx5_flow_tunnel_info tunnels_info[] = { 267 { 268 .tunnel = MLX5_FLOW_LAYER_VXLAN, 269 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP, 270 }, 271 { 272 .tunnel = MLX5_FLOW_LAYER_GENEVE, 273 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP, 274 }, 275 { 276 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE, 277 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP, 278 }, 279 { 280 .tunnel = MLX5_FLOW_LAYER_GRE, 281 .ptype = RTE_PTYPE_TUNNEL_GRE, 282 }, 283 { 284 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP, 285 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP, 286 }, 287 { 288 .tunnel = MLX5_FLOW_LAYER_MPLS, 289 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE, 290 }, 291 { 292 .tunnel = MLX5_FLOW_LAYER_NVGRE, 293 .ptype = RTE_PTYPE_TUNNEL_NVGRE, 294 }, 295 { 296 .tunnel = MLX5_FLOW_LAYER_IPIP, 297 .ptype = RTE_PTYPE_TUNNEL_IP, 298 }, 299 { 300 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP, 301 .ptype = RTE_PTYPE_TUNNEL_IP, 302 }, 303 { 304 .tunnel = MLX5_FLOW_LAYER_GTP, 305 .ptype = RTE_PTYPE_TUNNEL_GTPU, 306 }, 307 }; 308 309 /** 310 * Translate tag ID to register. 311 * 312 * @param[in] dev 313 * Pointer to the Ethernet device structure. 314 * @param[in] feature 315 * The feature that request the register. 316 * @param[in] id 317 * The request register ID. 318 * @param[out] error 319 * Error description in case of any. 320 * 321 * @return 322 * The request register on success, a negative errno 323 * value otherwise and rte_errno is set. 324 */ 325 int 326 mlx5_flow_get_reg_id(struct rte_eth_dev *dev, 327 enum mlx5_feature_name feature, 328 uint32_t id, 329 struct rte_flow_error *error) 330 { 331 struct mlx5_priv *priv = dev->data->dev_private; 332 struct mlx5_dev_config *config = &priv->config; 333 enum modify_reg start_reg; 334 bool skip_mtr_reg = false; 335 336 switch (feature) { 337 case MLX5_HAIRPIN_RX: 338 return REG_B; 339 case MLX5_HAIRPIN_TX: 340 return REG_A; 341 case MLX5_METADATA_RX: 342 switch (config->dv_xmeta_en) { 343 case MLX5_XMETA_MODE_LEGACY: 344 return REG_B; 345 case MLX5_XMETA_MODE_META16: 346 return REG_C_0; 347 case MLX5_XMETA_MODE_META32: 348 return REG_C_1; 349 } 350 break; 351 case MLX5_METADATA_TX: 352 return REG_A; 353 case MLX5_METADATA_FDB: 354 switch (config->dv_xmeta_en) { 355 case MLX5_XMETA_MODE_LEGACY: 356 return REG_NONE; 357 case MLX5_XMETA_MODE_META16: 358 return REG_C_0; 359 case MLX5_XMETA_MODE_META32: 360 return REG_C_1; 361 } 362 break; 363 case MLX5_FLOW_MARK: 364 switch (config->dv_xmeta_en) { 365 case MLX5_XMETA_MODE_LEGACY: 366 return REG_NONE; 367 case MLX5_XMETA_MODE_META16: 368 return REG_C_1; 369 case MLX5_XMETA_MODE_META32: 370 return REG_C_0; 371 } 372 break; 373 case MLX5_MTR_SFX: 374 /* 375 * If meter color and flow match share one register, flow match 376 * should use the meter color register for match. 377 */ 378 if (priv->mtr_reg_share) 379 return priv->mtr_color_reg; 380 else 381 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 382 REG_C_3; 383 case MLX5_MTR_COLOR: 384 MLX5_ASSERT(priv->mtr_color_reg != REG_NONE); 385 return priv->mtr_color_reg; 386 case MLX5_COPY_MARK: 387 /* 388 * Metadata COPY_MARK register using is in meter suffix sub 389 * flow while with meter. It's safe to share the same register. 390 */ 391 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3; 392 case MLX5_APP_TAG: 393 /* 394 * If meter is enable, it will engage the register for color 395 * match and flow match. If meter color match is not using the 396 * REG_C_2, need to skip the REG_C_x be used by meter color 397 * match. 398 * If meter is disable, free to use all available registers. 399 */ 400 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 : 401 (priv->mtr_reg_share ? REG_C_3 : REG_C_4); 402 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2); 403 if (id > (REG_C_7 - start_reg)) 404 return rte_flow_error_set(error, EINVAL, 405 RTE_FLOW_ERROR_TYPE_ITEM, 406 NULL, "invalid tag id"); 407 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NONE) 408 return rte_flow_error_set(error, ENOTSUP, 409 RTE_FLOW_ERROR_TYPE_ITEM, 410 NULL, "unsupported tag id"); 411 /* 412 * This case means meter is using the REG_C_x great than 2. 413 * Take care not to conflict with meter color REG_C_x. 414 * If the available index REG_C_y >= REG_C_x, skip the 415 * color register. 416 */ 417 if (skip_mtr_reg && config->flow_mreg_c 418 [id + start_reg - REG_C_0] >= priv->mtr_color_reg) { 419 if (id >= (REG_C_7 - start_reg)) 420 return rte_flow_error_set(error, EINVAL, 421 RTE_FLOW_ERROR_TYPE_ITEM, 422 NULL, "invalid tag id"); 423 if (config->flow_mreg_c 424 [id + 1 + start_reg - REG_C_0] != REG_NONE) 425 return config->flow_mreg_c 426 [id + 1 + start_reg - REG_C_0]; 427 return rte_flow_error_set(error, ENOTSUP, 428 RTE_FLOW_ERROR_TYPE_ITEM, 429 NULL, "unsupported tag id"); 430 } 431 return config->flow_mreg_c[id + start_reg - REG_C_0]; 432 } 433 MLX5_ASSERT(false); 434 return rte_flow_error_set(error, EINVAL, 435 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 436 NULL, "invalid feature name"); 437 } 438 439 /** 440 * Check extensive flow metadata register support. 441 * 442 * @param dev 443 * Pointer to rte_eth_dev structure. 444 * 445 * @return 446 * True if device supports extensive flow metadata register, otherwise false. 447 */ 448 bool 449 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev) 450 { 451 struct mlx5_priv *priv = dev->data->dev_private; 452 struct mlx5_dev_config *config = &priv->config; 453 454 /* 455 * Having available reg_c can be regarded inclusively as supporting 456 * extensive flow metadata register, which could mean, 457 * - metadata register copy action by modify header. 458 * - 16 modify header actions is supported. 459 * - reg_c's are preserved across different domain (FDB and NIC) on 460 * packet loopback by flow lookup miss. 461 */ 462 return config->flow_mreg_c[2] != REG_NONE; 463 } 464 465 /** 466 * Verify the @p item specifications (spec, last, mask) are compatible with the 467 * NIC capabilities. 468 * 469 * @param[in] item 470 * Item specification. 471 * @param[in] mask 472 * @p item->mask or flow default bit-masks. 473 * @param[in] nic_mask 474 * Bit-masks covering supported fields by the NIC to compare with user mask. 475 * @param[in] size 476 * Bit-masks size in bytes. 477 * @param[out] error 478 * Pointer to error structure. 479 * 480 * @return 481 * 0 on success, a negative errno value otherwise and rte_errno is set. 482 */ 483 int 484 mlx5_flow_item_acceptable(const struct rte_flow_item *item, 485 const uint8_t *mask, 486 const uint8_t *nic_mask, 487 unsigned int size, 488 struct rte_flow_error *error) 489 { 490 unsigned int i; 491 492 MLX5_ASSERT(nic_mask); 493 for (i = 0; i < size; ++i) 494 if ((nic_mask[i] | mask[i]) != nic_mask[i]) 495 return rte_flow_error_set(error, ENOTSUP, 496 RTE_FLOW_ERROR_TYPE_ITEM, 497 item, 498 "mask enables non supported" 499 " bits"); 500 if (!item->spec && (item->mask || item->last)) 501 return rte_flow_error_set(error, EINVAL, 502 RTE_FLOW_ERROR_TYPE_ITEM, item, 503 "mask/last without a spec is not" 504 " supported"); 505 if (item->spec && item->last) { 506 uint8_t spec[size]; 507 uint8_t last[size]; 508 unsigned int i; 509 int ret; 510 511 for (i = 0; i < size; ++i) { 512 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i]; 513 last[i] = ((const uint8_t *)item->last)[i] & mask[i]; 514 } 515 ret = memcmp(spec, last, size); 516 if (ret != 0) 517 return rte_flow_error_set(error, EINVAL, 518 RTE_FLOW_ERROR_TYPE_ITEM, 519 item, 520 "range is not valid"); 521 } 522 return 0; 523 } 524 525 /** 526 * Adjust the hash fields according to the @p flow information. 527 * 528 * @param[in] dev_flow. 529 * Pointer to the mlx5_flow. 530 * @param[in] tunnel 531 * 1 when the hash field is for a tunnel item. 532 * @param[in] layer_types 533 * ETH_RSS_* types. 534 * @param[in] hash_fields 535 * Item hash fields. 536 * 537 * @return 538 * The hash fields that should be used. 539 */ 540 uint64_t 541 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc, 542 int tunnel __rte_unused, uint64_t layer_types, 543 uint64_t hash_fields) 544 { 545 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 546 int rss_request_inner = rss_desc->level >= 2; 547 548 /* Check RSS hash level for tunnel. */ 549 if (tunnel && rss_request_inner) 550 hash_fields |= IBV_RX_HASH_INNER; 551 else if (tunnel || rss_request_inner) 552 return 0; 553 #endif 554 /* Check if requested layer matches RSS hash fields. */ 555 if (!(rss_desc->types & layer_types)) 556 return 0; 557 return hash_fields; 558 } 559 560 /** 561 * Lookup and set the ptype in the data Rx part. A single Ptype can be used, 562 * if several tunnel rules are used on this queue, the tunnel ptype will be 563 * cleared. 564 * 565 * @param rxq_ctrl 566 * Rx queue to update. 567 */ 568 static void 569 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl) 570 { 571 unsigned int i; 572 uint32_t tunnel_ptype = 0; 573 574 /* Look up for the ptype to use. */ 575 for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) { 576 if (!rxq_ctrl->flow_tunnels_n[i]) 577 continue; 578 if (!tunnel_ptype) { 579 tunnel_ptype = tunnels_info[i].ptype; 580 } else { 581 tunnel_ptype = 0; 582 break; 583 } 584 } 585 rxq_ctrl->rxq.tunnel = tunnel_ptype; 586 } 587 588 /** 589 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive 590 * flow. 591 * 592 * @param[in] dev 593 * Pointer to the Ethernet device structure. 594 * @param[in] dev_handle 595 * Pointer to device flow handle structure. 596 */ 597 static void 598 flow_drv_rxq_flags_set(struct rte_eth_dev *dev, 599 struct mlx5_flow_handle *dev_handle) 600 { 601 struct mlx5_priv *priv = dev->data->dev_private; 602 const int mark = dev_handle->mark; 603 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 604 struct mlx5_hrxq *hrxq; 605 unsigned int i; 606 607 if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) 608 return; 609 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 610 dev_handle->rix_hrxq); 611 if (!hrxq) 612 return; 613 for (i = 0; i != hrxq->ind_table->queues_n; ++i) { 614 int idx = hrxq->ind_table->queues[i]; 615 struct mlx5_rxq_ctrl *rxq_ctrl = 616 container_of((*priv->rxqs)[idx], 617 struct mlx5_rxq_ctrl, rxq); 618 619 /* 620 * To support metadata register copy on Tx loopback, 621 * this must be always enabled (metadata may arive 622 * from other port - not from local flows only. 623 */ 624 if (priv->config.dv_flow_en && 625 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 626 mlx5_flow_ext_mreg_supported(dev)) { 627 rxq_ctrl->rxq.mark = 1; 628 rxq_ctrl->flow_mark_n = 1; 629 } else if (mark) { 630 rxq_ctrl->rxq.mark = 1; 631 rxq_ctrl->flow_mark_n++; 632 } 633 if (tunnel) { 634 unsigned int j; 635 636 /* Increase the counter matching the flow. */ 637 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 638 if ((tunnels_info[j].tunnel & 639 dev_handle->layers) == 640 tunnels_info[j].tunnel) { 641 rxq_ctrl->flow_tunnels_n[j]++; 642 break; 643 } 644 } 645 flow_rxq_tunnel_ptype_update(rxq_ctrl); 646 } 647 } 648 } 649 650 /** 651 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow 652 * 653 * @param[in] dev 654 * Pointer to the Ethernet device structure. 655 * @param[in] flow 656 * Pointer to flow structure. 657 */ 658 static void 659 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow) 660 { 661 struct mlx5_priv *priv = dev->data->dev_private; 662 uint32_t handle_idx; 663 struct mlx5_flow_handle *dev_handle; 664 665 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 666 handle_idx, dev_handle, next) 667 flow_drv_rxq_flags_set(dev, dev_handle); 668 } 669 670 /** 671 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 672 * device flow if no other flow uses it with the same kind of request. 673 * 674 * @param dev 675 * Pointer to Ethernet device. 676 * @param[in] dev_handle 677 * Pointer to the device flow handle structure. 678 */ 679 static void 680 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, 681 struct mlx5_flow_handle *dev_handle) 682 { 683 struct mlx5_priv *priv = dev->data->dev_private; 684 const int mark = dev_handle->mark; 685 const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); 686 struct mlx5_hrxq *hrxq; 687 unsigned int i; 688 689 if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) 690 return; 691 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], 692 dev_handle->rix_hrxq); 693 if (!hrxq) 694 return; 695 MLX5_ASSERT(dev->data->dev_started); 696 for (i = 0; i != hrxq->ind_table->queues_n; ++i) { 697 int idx = hrxq->ind_table->queues[i]; 698 struct mlx5_rxq_ctrl *rxq_ctrl = 699 container_of((*priv->rxqs)[idx], 700 struct mlx5_rxq_ctrl, rxq); 701 702 if (priv->config.dv_flow_en && 703 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && 704 mlx5_flow_ext_mreg_supported(dev)) { 705 rxq_ctrl->rxq.mark = 1; 706 rxq_ctrl->flow_mark_n = 1; 707 } else if (mark) { 708 rxq_ctrl->flow_mark_n--; 709 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n; 710 } 711 if (tunnel) { 712 unsigned int j; 713 714 /* Decrease the counter matching the flow. */ 715 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) { 716 if ((tunnels_info[j].tunnel & 717 dev_handle->layers) == 718 tunnels_info[j].tunnel) { 719 rxq_ctrl->flow_tunnels_n[j]--; 720 break; 721 } 722 } 723 flow_rxq_tunnel_ptype_update(rxq_ctrl); 724 } 725 } 726 } 727 728 /** 729 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the 730 * @p flow if no other flow uses it with the same kind of request. 731 * 732 * @param dev 733 * Pointer to Ethernet device. 734 * @param[in] flow 735 * Pointer to the flow. 736 */ 737 static void 738 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow) 739 { 740 struct mlx5_priv *priv = dev->data->dev_private; 741 uint32_t handle_idx; 742 struct mlx5_flow_handle *dev_handle; 743 744 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 745 handle_idx, dev_handle, next) 746 flow_drv_rxq_flags_trim(dev, dev_handle); 747 } 748 749 /** 750 * Clear the Mark/Flag and Tunnel ptype information in all Rx queues. 751 * 752 * @param dev 753 * Pointer to Ethernet device. 754 */ 755 static void 756 flow_rxq_flags_clear(struct rte_eth_dev *dev) 757 { 758 struct mlx5_priv *priv = dev->data->dev_private; 759 unsigned int i; 760 761 for (i = 0; i != priv->rxqs_n; ++i) { 762 struct mlx5_rxq_ctrl *rxq_ctrl; 763 unsigned int j; 764 765 if (!(*priv->rxqs)[i]) 766 continue; 767 rxq_ctrl = container_of((*priv->rxqs)[i], 768 struct mlx5_rxq_ctrl, rxq); 769 rxq_ctrl->flow_mark_n = 0; 770 rxq_ctrl->rxq.mark = 0; 771 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) 772 rxq_ctrl->flow_tunnels_n[j] = 0; 773 rxq_ctrl->rxq.tunnel = 0; 774 } 775 } 776 777 /** 778 * Set the Rx queue dynamic metadata (mask and offset) for a flow 779 * 780 * @param[in] dev 781 * Pointer to the Ethernet device structure. 782 */ 783 void 784 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev) 785 { 786 struct mlx5_priv *priv = dev->data->dev_private; 787 struct mlx5_rxq_data *data; 788 unsigned int i; 789 790 for (i = 0; i != priv->rxqs_n; ++i) { 791 if (!(*priv->rxqs)[i]) 792 continue; 793 data = (*priv->rxqs)[i]; 794 if (!rte_flow_dynf_metadata_avail()) { 795 data->dynf_meta = 0; 796 data->flow_meta_mask = 0; 797 data->flow_meta_offset = -1; 798 } else { 799 data->dynf_meta = 1; 800 data->flow_meta_mask = rte_flow_dynf_metadata_mask; 801 data->flow_meta_offset = rte_flow_dynf_metadata_offs; 802 } 803 } 804 } 805 806 /* 807 * return a pointer to the desired action in the list of actions. 808 * 809 * @param[in] actions 810 * The list of actions to search the action in. 811 * @param[in] action 812 * The action to find. 813 * 814 * @return 815 * Pointer to the action in the list, if found. NULL otherwise. 816 */ 817 const struct rte_flow_action * 818 mlx5_flow_find_action(const struct rte_flow_action *actions, 819 enum rte_flow_action_type action) 820 { 821 if (actions == NULL) 822 return NULL; 823 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) 824 if (actions->type == action) 825 return actions; 826 return NULL; 827 } 828 829 /* 830 * Validate the flag action. 831 * 832 * @param[in] action_flags 833 * Bit-fields that holds the actions detected until now. 834 * @param[in] attr 835 * Attributes of flow that includes this action. 836 * @param[out] error 837 * Pointer to error structure. 838 * 839 * @return 840 * 0 on success, a negative errno value otherwise and rte_errno is set. 841 */ 842 int 843 mlx5_flow_validate_action_flag(uint64_t action_flags, 844 const struct rte_flow_attr *attr, 845 struct rte_flow_error *error) 846 { 847 if (action_flags & MLX5_FLOW_ACTION_MARK) 848 return rte_flow_error_set(error, EINVAL, 849 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 850 "can't mark and flag in same flow"); 851 if (action_flags & MLX5_FLOW_ACTION_FLAG) 852 return rte_flow_error_set(error, EINVAL, 853 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 854 "can't have 2 flag" 855 " actions in same flow"); 856 if (attr->egress) 857 return rte_flow_error_set(error, ENOTSUP, 858 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 859 "flag action not supported for " 860 "egress"); 861 return 0; 862 } 863 864 /* 865 * Validate the mark action. 866 * 867 * @param[in] action 868 * Pointer to the queue action. 869 * @param[in] action_flags 870 * Bit-fields that holds the actions detected until now. 871 * @param[in] attr 872 * Attributes of flow that includes this action. 873 * @param[out] error 874 * Pointer to error structure. 875 * 876 * @return 877 * 0 on success, a negative errno value otherwise and rte_errno is set. 878 */ 879 int 880 mlx5_flow_validate_action_mark(const struct rte_flow_action *action, 881 uint64_t action_flags, 882 const struct rte_flow_attr *attr, 883 struct rte_flow_error *error) 884 { 885 const struct rte_flow_action_mark *mark = action->conf; 886 887 if (!mark) 888 return rte_flow_error_set(error, EINVAL, 889 RTE_FLOW_ERROR_TYPE_ACTION, 890 action, 891 "configuration cannot be null"); 892 if (mark->id >= MLX5_FLOW_MARK_MAX) 893 return rte_flow_error_set(error, EINVAL, 894 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 895 &mark->id, 896 "mark id must in 0 <= id < " 897 RTE_STR(MLX5_FLOW_MARK_MAX)); 898 if (action_flags & MLX5_FLOW_ACTION_FLAG) 899 return rte_flow_error_set(error, EINVAL, 900 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 901 "can't flag and mark in same flow"); 902 if (action_flags & MLX5_FLOW_ACTION_MARK) 903 return rte_flow_error_set(error, EINVAL, 904 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 905 "can't have 2 mark actions in same" 906 " flow"); 907 if (attr->egress) 908 return rte_flow_error_set(error, ENOTSUP, 909 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 910 "mark action not supported for " 911 "egress"); 912 return 0; 913 } 914 915 /* 916 * Validate the drop action. 917 * 918 * @param[in] action_flags 919 * Bit-fields that holds the actions detected until now. 920 * @param[in] attr 921 * Attributes of flow that includes this action. 922 * @param[out] error 923 * Pointer to error structure. 924 * 925 * @return 926 * 0 on success, a negative errno value otherwise and rte_errno is set. 927 */ 928 int 929 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused, 930 const struct rte_flow_attr *attr, 931 struct rte_flow_error *error) 932 { 933 if (attr->egress) 934 return rte_flow_error_set(error, ENOTSUP, 935 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 936 "drop action not supported for " 937 "egress"); 938 return 0; 939 } 940 941 /* 942 * Validate the queue action. 943 * 944 * @param[in] action 945 * Pointer to the queue action. 946 * @param[in] action_flags 947 * Bit-fields that holds the actions detected until now. 948 * @param[in] dev 949 * Pointer to the Ethernet device structure. 950 * @param[in] attr 951 * Attributes of flow that includes this action. 952 * @param[out] error 953 * Pointer to error structure. 954 * 955 * @return 956 * 0 on success, a negative errno value otherwise and rte_errno is set. 957 */ 958 int 959 mlx5_flow_validate_action_queue(const struct rte_flow_action *action, 960 uint64_t action_flags, 961 struct rte_eth_dev *dev, 962 const struct rte_flow_attr *attr, 963 struct rte_flow_error *error) 964 { 965 struct mlx5_priv *priv = dev->data->dev_private; 966 const struct rte_flow_action_queue *queue = action->conf; 967 968 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 969 return rte_flow_error_set(error, EINVAL, 970 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 971 "can't have 2 fate actions in" 972 " same flow"); 973 if (!priv->rxqs_n) 974 return rte_flow_error_set(error, EINVAL, 975 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 976 NULL, "No Rx queues configured"); 977 if (queue->index >= priv->rxqs_n) 978 return rte_flow_error_set(error, EINVAL, 979 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 980 &queue->index, 981 "queue index out of range"); 982 if (!(*priv->rxqs)[queue->index]) 983 return rte_flow_error_set(error, EINVAL, 984 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 985 &queue->index, 986 "queue is not configured"); 987 if (attr->egress) 988 return rte_flow_error_set(error, ENOTSUP, 989 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 990 "queue action not supported for " 991 "egress"); 992 return 0; 993 } 994 995 /* 996 * Validate the rss action. 997 * 998 * @param[in] action 999 * Pointer to the queue action. 1000 * @param[in] action_flags 1001 * Bit-fields that holds the actions detected until now. 1002 * @param[in] dev 1003 * Pointer to the Ethernet device structure. 1004 * @param[in] attr 1005 * Attributes of flow that includes this action. 1006 * @param[in] item_flags 1007 * Items that were detected. 1008 * @param[out] error 1009 * Pointer to error structure. 1010 * 1011 * @return 1012 * 0 on success, a negative errno value otherwise and rte_errno is set. 1013 */ 1014 int 1015 mlx5_flow_validate_action_rss(const struct rte_flow_action *action, 1016 uint64_t action_flags, 1017 struct rte_eth_dev *dev, 1018 const struct rte_flow_attr *attr, 1019 uint64_t item_flags, 1020 struct rte_flow_error *error) 1021 { 1022 struct mlx5_priv *priv = dev->data->dev_private; 1023 const struct rte_flow_action_rss *rss = action->conf; 1024 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1025 unsigned int i; 1026 1027 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1028 return rte_flow_error_set(error, EINVAL, 1029 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1030 "can't have 2 fate actions" 1031 " in same flow"); 1032 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT && 1033 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) 1034 return rte_flow_error_set(error, ENOTSUP, 1035 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1036 &rss->func, 1037 "RSS hash function not supported"); 1038 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 1039 if (rss->level > 2) 1040 #else 1041 if (rss->level > 1) 1042 #endif 1043 return rte_flow_error_set(error, ENOTSUP, 1044 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1045 &rss->level, 1046 "tunnel RSS is not supported"); 1047 /* allow RSS key_len 0 in case of NULL (default) RSS key. */ 1048 if (rss->key_len == 0 && rss->key != NULL) 1049 return rte_flow_error_set(error, ENOTSUP, 1050 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1051 &rss->key_len, 1052 "RSS hash key length 0"); 1053 if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN) 1054 return rte_flow_error_set(error, ENOTSUP, 1055 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1056 &rss->key_len, 1057 "RSS hash key too small"); 1058 if (rss->key_len > MLX5_RSS_HASH_KEY_LEN) 1059 return rte_flow_error_set(error, ENOTSUP, 1060 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1061 &rss->key_len, 1062 "RSS hash key too large"); 1063 if (rss->queue_num > priv->config.ind_table_max_size) 1064 return rte_flow_error_set(error, ENOTSUP, 1065 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1066 &rss->queue_num, 1067 "number of queues too large"); 1068 if (rss->types & MLX5_RSS_HF_MASK) 1069 return rte_flow_error_set(error, ENOTSUP, 1070 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1071 &rss->types, 1072 "some RSS protocols are not" 1073 " supported"); 1074 if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) && 1075 !(rss->types & ETH_RSS_IP)) 1076 return rte_flow_error_set(error, EINVAL, 1077 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1078 "L3 partial RSS requested but L3 RSS" 1079 " type not specified"); 1080 if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) && 1081 !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP))) 1082 return rte_flow_error_set(error, EINVAL, 1083 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1084 "L4 partial RSS requested but L4 RSS" 1085 " type not specified"); 1086 if (!priv->rxqs_n) 1087 return rte_flow_error_set(error, EINVAL, 1088 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1089 NULL, "No Rx queues configured"); 1090 if (!rss->queue_num) 1091 return rte_flow_error_set(error, EINVAL, 1092 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1093 NULL, "No queues configured"); 1094 for (i = 0; i != rss->queue_num; ++i) { 1095 if (rss->queue[i] >= priv->rxqs_n) 1096 return rte_flow_error_set 1097 (error, EINVAL, 1098 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1099 &rss->queue[i], "queue index out of range"); 1100 if (!(*priv->rxqs)[rss->queue[i]]) 1101 return rte_flow_error_set 1102 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1103 &rss->queue[i], "queue is not configured"); 1104 } 1105 if (attr->egress) 1106 return rte_flow_error_set(error, ENOTSUP, 1107 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1108 "rss action not supported for " 1109 "egress"); 1110 if (rss->level > 1 && !tunnel) 1111 return rte_flow_error_set(error, EINVAL, 1112 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1113 "inner RSS is not supported for " 1114 "non-tunnel flows"); 1115 if ((item_flags & MLX5_FLOW_LAYER_ECPRI) && 1116 !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) { 1117 return rte_flow_error_set(error, EINVAL, 1118 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, 1119 "RSS on eCPRI is not supported now"); 1120 } 1121 return 0; 1122 } 1123 1124 /* 1125 * Validate the default miss action. 1126 * 1127 * @param[in] action_flags 1128 * Bit-fields that holds the actions detected until now. 1129 * @param[out] error 1130 * Pointer to error structure. 1131 * 1132 * @return 1133 * 0 on success, a negative errno value otherwise and rte_errno is set. 1134 */ 1135 int 1136 mlx5_flow_validate_action_default_miss(uint64_t action_flags, 1137 const struct rte_flow_attr *attr, 1138 struct rte_flow_error *error) 1139 { 1140 if (action_flags & MLX5_FLOW_FATE_ACTIONS) 1141 return rte_flow_error_set(error, EINVAL, 1142 RTE_FLOW_ERROR_TYPE_ACTION, NULL, 1143 "can't have 2 fate actions in" 1144 " same flow"); 1145 if (attr->egress) 1146 return rte_flow_error_set(error, ENOTSUP, 1147 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1148 "default miss action not supported " 1149 "for egress"); 1150 if (attr->group) 1151 return rte_flow_error_set(error, ENOTSUP, 1152 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL, 1153 "only group 0 is supported"); 1154 if (attr->transfer) 1155 return rte_flow_error_set(error, ENOTSUP, 1156 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1157 NULL, "transfer is not supported"); 1158 return 0; 1159 } 1160 1161 /* 1162 * Validate the count action. 1163 * 1164 * @param[in] dev 1165 * Pointer to the Ethernet device structure. 1166 * @param[in] attr 1167 * Attributes of flow that includes this action. 1168 * @param[out] error 1169 * Pointer to error structure. 1170 * 1171 * @return 1172 * 0 on success, a negative errno value otherwise and rte_errno is set. 1173 */ 1174 int 1175 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused, 1176 const struct rte_flow_attr *attr, 1177 struct rte_flow_error *error) 1178 { 1179 if (attr->egress) 1180 return rte_flow_error_set(error, ENOTSUP, 1181 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1182 "count action not supported for " 1183 "egress"); 1184 return 0; 1185 } 1186 1187 /** 1188 * Verify the @p attributes will be correctly understood by the NIC and store 1189 * them in the @p flow if everything is correct. 1190 * 1191 * @param[in] dev 1192 * Pointer to the Ethernet device structure. 1193 * @param[in] attributes 1194 * Pointer to flow attributes 1195 * @param[out] error 1196 * Pointer to error structure. 1197 * 1198 * @return 1199 * 0 on success, a negative errno value otherwise and rte_errno is set. 1200 */ 1201 int 1202 mlx5_flow_validate_attributes(struct rte_eth_dev *dev, 1203 const struct rte_flow_attr *attributes, 1204 struct rte_flow_error *error) 1205 { 1206 struct mlx5_priv *priv = dev->data->dev_private; 1207 uint32_t priority_max = priv->config.flow_prio - 1; 1208 1209 if (attributes->group) 1210 return rte_flow_error_set(error, ENOTSUP, 1211 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 1212 NULL, "groups is not supported"); 1213 if (attributes->priority != MLX5_FLOW_PRIO_RSVD && 1214 attributes->priority >= priority_max) 1215 return rte_flow_error_set(error, ENOTSUP, 1216 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 1217 NULL, "priority out of range"); 1218 if (attributes->egress) 1219 return rte_flow_error_set(error, ENOTSUP, 1220 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, 1221 "egress is not supported"); 1222 if (attributes->transfer && !priv->config.dv_esw_en) 1223 return rte_flow_error_set(error, ENOTSUP, 1224 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 1225 NULL, "transfer is not supported"); 1226 if (!attributes->ingress) 1227 return rte_flow_error_set(error, EINVAL, 1228 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, 1229 NULL, 1230 "ingress attribute is mandatory"); 1231 return 0; 1232 } 1233 1234 /** 1235 * Validate ICMP6 item. 1236 * 1237 * @param[in] item 1238 * Item specification. 1239 * @param[in] item_flags 1240 * Bit-fields that holds the items detected until now. 1241 * @param[out] error 1242 * Pointer to error structure. 1243 * 1244 * @return 1245 * 0 on success, a negative errno value otherwise and rte_errno is set. 1246 */ 1247 int 1248 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item, 1249 uint64_t item_flags, 1250 uint8_t target_protocol, 1251 struct rte_flow_error *error) 1252 { 1253 const struct rte_flow_item_icmp6 *mask = item->mask; 1254 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1255 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : 1256 MLX5_FLOW_LAYER_OUTER_L3_IPV6; 1257 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1258 MLX5_FLOW_LAYER_OUTER_L4; 1259 int ret; 1260 1261 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6) 1262 return rte_flow_error_set(error, EINVAL, 1263 RTE_FLOW_ERROR_TYPE_ITEM, item, 1264 "protocol filtering not compatible" 1265 " with ICMP6 layer"); 1266 if (!(item_flags & l3m)) 1267 return rte_flow_error_set(error, EINVAL, 1268 RTE_FLOW_ERROR_TYPE_ITEM, item, 1269 "IPv6 is mandatory to filter on" 1270 " ICMP6"); 1271 if (item_flags & l4m) 1272 return rte_flow_error_set(error, EINVAL, 1273 RTE_FLOW_ERROR_TYPE_ITEM, item, 1274 "multiple L4 layers not supported"); 1275 if (!mask) 1276 mask = &rte_flow_item_icmp6_mask; 1277 ret = mlx5_flow_item_acceptable 1278 (item, (const uint8_t *)mask, 1279 (const uint8_t *)&rte_flow_item_icmp6_mask, 1280 sizeof(struct rte_flow_item_icmp6), error); 1281 if (ret < 0) 1282 return ret; 1283 return 0; 1284 } 1285 1286 /** 1287 * Validate ICMP item. 1288 * 1289 * @param[in] item 1290 * Item specification. 1291 * @param[in] item_flags 1292 * Bit-fields that holds the items detected until now. 1293 * @param[out] error 1294 * Pointer to error structure. 1295 * 1296 * @return 1297 * 0 on success, a negative errno value otherwise and rte_errno is set. 1298 */ 1299 int 1300 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item, 1301 uint64_t item_flags, 1302 uint8_t target_protocol, 1303 struct rte_flow_error *error) 1304 { 1305 const struct rte_flow_item_icmp *mask = item->mask; 1306 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1307 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : 1308 MLX5_FLOW_LAYER_OUTER_L3_IPV4; 1309 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1310 MLX5_FLOW_LAYER_OUTER_L4; 1311 int ret; 1312 1313 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP) 1314 return rte_flow_error_set(error, EINVAL, 1315 RTE_FLOW_ERROR_TYPE_ITEM, item, 1316 "protocol filtering not compatible" 1317 " with ICMP layer"); 1318 if (!(item_flags & l3m)) 1319 return rte_flow_error_set(error, EINVAL, 1320 RTE_FLOW_ERROR_TYPE_ITEM, item, 1321 "IPv4 is mandatory to filter" 1322 " on ICMP"); 1323 if (item_flags & l4m) 1324 return rte_flow_error_set(error, EINVAL, 1325 RTE_FLOW_ERROR_TYPE_ITEM, item, 1326 "multiple L4 layers not supported"); 1327 if (!mask) 1328 mask = &rte_flow_item_icmp_mask; 1329 ret = mlx5_flow_item_acceptable 1330 (item, (const uint8_t *)mask, 1331 (const uint8_t *)&rte_flow_item_icmp_mask, 1332 sizeof(struct rte_flow_item_icmp), error); 1333 if (ret < 0) 1334 return ret; 1335 return 0; 1336 } 1337 1338 /** 1339 * Validate Ethernet item. 1340 * 1341 * @param[in] item 1342 * Item specification. 1343 * @param[in] item_flags 1344 * Bit-fields that holds the items detected until now. 1345 * @param[out] error 1346 * Pointer to error structure. 1347 * 1348 * @return 1349 * 0 on success, a negative errno value otherwise and rte_errno is set. 1350 */ 1351 int 1352 mlx5_flow_validate_item_eth(const struct rte_flow_item *item, 1353 uint64_t item_flags, 1354 struct rte_flow_error *error) 1355 { 1356 const struct rte_flow_item_eth *mask = item->mask; 1357 const struct rte_flow_item_eth nic_mask = { 1358 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1359 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1360 .type = RTE_BE16(0xffff), 1361 }; 1362 int ret; 1363 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1364 const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : 1365 MLX5_FLOW_LAYER_OUTER_L2; 1366 1367 if (item_flags & ethm) 1368 return rte_flow_error_set(error, ENOTSUP, 1369 RTE_FLOW_ERROR_TYPE_ITEM, item, 1370 "multiple L2 layers not supported"); 1371 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) || 1372 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3))) 1373 return rte_flow_error_set(error, EINVAL, 1374 RTE_FLOW_ERROR_TYPE_ITEM, item, 1375 "L2 layer should not follow " 1376 "L3 layers"); 1377 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) || 1378 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN))) 1379 return rte_flow_error_set(error, EINVAL, 1380 RTE_FLOW_ERROR_TYPE_ITEM, item, 1381 "L2 layer should not follow VLAN"); 1382 if (!mask) 1383 mask = &rte_flow_item_eth_mask; 1384 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1385 (const uint8_t *)&nic_mask, 1386 sizeof(struct rte_flow_item_eth), 1387 error); 1388 return ret; 1389 } 1390 1391 /** 1392 * Validate VLAN item. 1393 * 1394 * @param[in] item 1395 * Item specification. 1396 * @param[in] item_flags 1397 * Bit-fields that holds the items detected until now. 1398 * @param[in] dev 1399 * Ethernet device flow is being created on. 1400 * @param[out] error 1401 * Pointer to error structure. 1402 * 1403 * @return 1404 * 0 on success, a negative errno value otherwise and rte_errno is set. 1405 */ 1406 int 1407 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item, 1408 uint64_t item_flags, 1409 struct rte_eth_dev *dev, 1410 struct rte_flow_error *error) 1411 { 1412 const struct rte_flow_item_vlan *spec = item->spec; 1413 const struct rte_flow_item_vlan *mask = item->mask; 1414 const struct rte_flow_item_vlan nic_mask = { 1415 .tci = RTE_BE16(UINT16_MAX), 1416 .inner_type = RTE_BE16(UINT16_MAX), 1417 }; 1418 uint16_t vlan_tag = 0; 1419 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1420 int ret; 1421 const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | 1422 MLX5_FLOW_LAYER_INNER_L4) : 1423 (MLX5_FLOW_LAYER_OUTER_L3 | 1424 MLX5_FLOW_LAYER_OUTER_L4); 1425 const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : 1426 MLX5_FLOW_LAYER_OUTER_VLAN; 1427 1428 if (item_flags & vlanm) 1429 return rte_flow_error_set(error, EINVAL, 1430 RTE_FLOW_ERROR_TYPE_ITEM, item, 1431 "multiple VLAN layers not supported"); 1432 else if ((item_flags & l34m) != 0) 1433 return rte_flow_error_set(error, EINVAL, 1434 RTE_FLOW_ERROR_TYPE_ITEM, item, 1435 "VLAN cannot follow L3/L4 layer"); 1436 if (!mask) 1437 mask = &rte_flow_item_vlan_mask; 1438 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1439 (const uint8_t *)&nic_mask, 1440 sizeof(struct rte_flow_item_vlan), 1441 error); 1442 if (ret) 1443 return ret; 1444 if (!tunnel && mask->tci != RTE_BE16(0x0fff)) { 1445 struct mlx5_priv *priv = dev->data->dev_private; 1446 1447 if (priv->vmwa_context) { 1448 /* 1449 * Non-NULL context means we have a virtual machine 1450 * and SR-IOV enabled, we have to create VLAN interface 1451 * to make hypervisor to setup E-Switch vport 1452 * context correctly. We avoid creating the multiple 1453 * VLAN interfaces, so we cannot support VLAN tag mask. 1454 */ 1455 return rte_flow_error_set(error, EINVAL, 1456 RTE_FLOW_ERROR_TYPE_ITEM, 1457 item, 1458 "VLAN tag mask is not" 1459 " supported in virtual" 1460 " environment"); 1461 } 1462 } 1463 if (spec) { 1464 vlan_tag = spec->tci; 1465 vlan_tag &= mask->tci; 1466 } 1467 /* 1468 * From verbs perspective an empty VLAN is equivalent 1469 * to a packet without VLAN layer. 1470 */ 1471 if (!vlan_tag) 1472 return rte_flow_error_set(error, EINVAL, 1473 RTE_FLOW_ERROR_TYPE_ITEM_SPEC, 1474 item->spec, 1475 "VLAN cannot be empty"); 1476 return 0; 1477 } 1478 1479 /** 1480 * Validate IPV4 item. 1481 * 1482 * @param[in] item 1483 * Item specification. 1484 * @param[in] item_flags 1485 * Bit-fields that holds the items detected until now. 1486 * @param[in] last_item 1487 * Previous validated item in the pattern items. 1488 * @param[in] ether_type 1489 * Type in the ethernet layer header (including dot1q). 1490 * @param[in] acc_mask 1491 * Acceptable mask, if NULL default internal default mask 1492 * will be used to check whether item fields are supported. 1493 * @param[out] error 1494 * Pointer to error structure. 1495 * 1496 * @return 1497 * 0 on success, a negative errno value otherwise and rte_errno is set. 1498 */ 1499 int 1500 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, 1501 uint64_t item_flags, 1502 uint64_t last_item, 1503 uint16_t ether_type, 1504 const struct rte_flow_item_ipv4 *acc_mask, 1505 struct rte_flow_error *error) 1506 { 1507 const struct rte_flow_item_ipv4 *mask = item->mask; 1508 const struct rte_flow_item_ipv4 *spec = item->spec; 1509 const struct rte_flow_item_ipv4 nic_mask = { 1510 .hdr = { 1511 .src_addr = RTE_BE32(0xffffffff), 1512 .dst_addr = RTE_BE32(0xffffffff), 1513 .type_of_service = 0xff, 1514 .next_proto_id = 0xff, 1515 }, 1516 }; 1517 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1518 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1519 MLX5_FLOW_LAYER_OUTER_L3; 1520 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1521 MLX5_FLOW_LAYER_OUTER_L4; 1522 int ret; 1523 uint8_t next_proto = 0xFF; 1524 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 1525 MLX5_FLOW_LAYER_OUTER_VLAN | 1526 MLX5_FLOW_LAYER_INNER_VLAN); 1527 1528 if ((last_item & l2_vlan) && ether_type && 1529 ether_type != RTE_ETHER_TYPE_IPV4) 1530 return rte_flow_error_set(error, EINVAL, 1531 RTE_FLOW_ERROR_TYPE_ITEM, item, 1532 "IPv4 cannot follow L2/VLAN layer " 1533 "which ether type is not IPv4"); 1534 if (item_flags & MLX5_FLOW_LAYER_IPIP) { 1535 if (mask && spec) 1536 next_proto = mask->hdr.next_proto_id & 1537 spec->hdr.next_proto_id; 1538 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 1539 return rte_flow_error_set(error, EINVAL, 1540 RTE_FLOW_ERROR_TYPE_ITEM, 1541 item, 1542 "multiple tunnel " 1543 "not supported"); 1544 } 1545 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) 1546 return rte_flow_error_set(error, EINVAL, 1547 RTE_FLOW_ERROR_TYPE_ITEM, item, 1548 "wrong tunnel type - IPv6 specified " 1549 "but IPv4 item provided"); 1550 if (item_flags & l3m) 1551 return rte_flow_error_set(error, ENOTSUP, 1552 RTE_FLOW_ERROR_TYPE_ITEM, item, 1553 "multiple L3 layers not supported"); 1554 else if (item_flags & l4m) 1555 return rte_flow_error_set(error, EINVAL, 1556 RTE_FLOW_ERROR_TYPE_ITEM, item, 1557 "L3 cannot follow an L4 layer."); 1558 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 1559 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 1560 return rte_flow_error_set(error, EINVAL, 1561 RTE_FLOW_ERROR_TYPE_ITEM, item, 1562 "L3 cannot follow an NVGRE layer."); 1563 if (!mask) 1564 mask = &rte_flow_item_ipv4_mask; 1565 else if (mask->hdr.next_proto_id != 0 && 1566 mask->hdr.next_proto_id != 0xff) 1567 return rte_flow_error_set(error, EINVAL, 1568 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 1569 "partial mask is not supported" 1570 " for protocol"); 1571 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1572 acc_mask ? (const uint8_t *)acc_mask 1573 : (const uint8_t *)&nic_mask, 1574 sizeof(struct rte_flow_item_ipv4), 1575 error); 1576 if (ret < 0) 1577 return ret; 1578 return 0; 1579 } 1580 1581 /** 1582 * Validate IPV6 item. 1583 * 1584 * @param[in] item 1585 * Item specification. 1586 * @param[in] item_flags 1587 * Bit-fields that holds the items detected until now. 1588 * @param[in] last_item 1589 * Previous validated item in the pattern items. 1590 * @param[in] ether_type 1591 * Type in the ethernet layer header (including dot1q). 1592 * @param[in] acc_mask 1593 * Acceptable mask, if NULL default internal default mask 1594 * will be used to check whether item fields are supported. 1595 * @param[out] error 1596 * Pointer to error structure. 1597 * 1598 * @return 1599 * 0 on success, a negative errno value otherwise and rte_errno is set. 1600 */ 1601 int 1602 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, 1603 uint64_t item_flags, 1604 uint64_t last_item, 1605 uint16_t ether_type, 1606 const struct rte_flow_item_ipv6 *acc_mask, 1607 struct rte_flow_error *error) 1608 { 1609 const struct rte_flow_item_ipv6 *mask = item->mask; 1610 const struct rte_flow_item_ipv6 *spec = item->spec; 1611 const struct rte_flow_item_ipv6 nic_mask = { 1612 .hdr = { 1613 .src_addr = 1614 "\xff\xff\xff\xff\xff\xff\xff\xff" 1615 "\xff\xff\xff\xff\xff\xff\xff\xff", 1616 .dst_addr = 1617 "\xff\xff\xff\xff\xff\xff\xff\xff" 1618 "\xff\xff\xff\xff\xff\xff\xff\xff", 1619 .vtc_flow = RTE_BE32(0xffffffff), 1620 .proto = 0xff, 1621 }, 1622 }; 1623 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1624 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1625 MLX5_FLOW_LAYER_OUTER_L3; 1626 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1627 MLX5_FLOW_LAYER_OUTER_L4; 1628 int ret; 1629 uint8_t next_proto = 0xFF; 1630 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 | 1631 MLX5_FLOW_LAYER_OUTER_VLAN | 1632 MLX5_FLOW_LAYER_INNER_VLAN); 1633 1634 if ((last_item & l2_vlan) && ether_type && 1635 ether_type != RTE_ETHER_TYPE_IPV6) 1636 return rte_flow_error_set(error, EINVAL, 1637 RTE_FLOW_ERROR_TYPE_ITEM, item, 1638 "IPv6 cannot follow L2/VLAN layer " 1639 "which ether type is not IPv6"); 1640 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) { 1641 if (mask && spec) 1642 next_proto = mask->hdr.proto & spec->hdr.proto; 1643 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) 1644 return rte_flow_error_set(error, EINVAL, 1645 RTE_FLOW_ERROR_TYPE_ITEM, 1646 item, 1647 "multiple tunnel " 1648 "not supported"); 1649 } 1650 if (item_flags & MLX5_FLOW_LAYER_IPIP) 1651 return rte_flow_error_set(error, EINVAL, 1652 RTE_FLOW_ERROR_TYPE_ITEM, item, 1653 "wrong tunnel type - IPv4 specified " 1654 "but IPv6 item provided"); 1655 if (item_flags & l3m) 1656 return rte_flow_error_set(error, ENOTSUP, 1657 RTE_FLOW_ERROR_TYPE_ITEM, item, 1658 "multiple L3 layers not supported"); 1659 else if (item_flags & l4m) 1660 return rte_flow_error_set(error, EINVAL, 1661 RTE_FLOW_ERROR_TYPE_ITEM, item, 1662 "L3 cannot follow an L4 layer."); 1663 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) && 1664 !(item_flags & MLX5_FLOW_LAYER_INNER_L2)) 1665 return rte_flow_error_set(error, EINVAL, 1666 RTE_FLOW_ERROR_TYPE_ITEM, item, 1667 "L3 cannot follow an NVGRE layer."); 1668 if (!mask) 1669 mask = &rte_flow_item_ipv6_mask; 1670 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 1671 acc_mask ? (const uint8_t *)acc_mask 1672 : (const uint8_t *)&nic_mask, 1673 sizeof(struct rte_flow_item_ipv6), 1674 error); 1675 if (ret < 0) 1676 return ret; 1677 return 0; 1678 } 1679 1680 /** 1681 * Validate UDP item. 1682 * 1683 * @param[in] item 1684 * Item specification. 1685 * @param[in] item_flags 1686 * Bit-fields that holds the items detected until now. 1687 * @param[in] target_protocol 1688 * The next protocol in the previous item. 1689 * @param[in] flow_mask 1690 * mlx5 flow-specific (DV, verbs, etc.) supported header fields mask. 1691 * @param[out] error 1692 * Pointer to error structure. 1693 * 1694 * @return 1695 * 0 on success, a negative errno value otherwise and rte_errno is set. 1696 */ 1697 int 1698 mlx5_flow_validate_item_udp(const struct rte_flow_item *item, 1699 uint64_t item_flags, 1700 uint8_t target_protocol, 1701 struct rte_flow_error *error) 1702 { 1703 const struct rte_flow_item_udp *mask = item->mask; 1704 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1705 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1706 MLX5_FLOW_LAYER_OUTER_L3; 1707 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1708 MLX5_FLOW_LAYER_OUTER_L4; 1709 int ret; 1710 1711 if (target_protocol != 0xff && target_protocol != IPPROTO_UDP) 1712 return rte_flow_error_set(error, EINVAL, 1713 RTE_FLOW_ERROR_TYPE_ITEM, item, 1714 "protocol filtering not compatible" 1715 " with UDP layer"); 1716 if (!(item_flags & l3m)) 1717 return rte_flow_error_set(error, EINVAL, 1718 RTE_FLOW_ERROR_TYPE_ITEM, item, 1719 "L3 is mandatory to filter on L4"); 1720 if (item_flags & l4m) 1721 return rte_flow_error_set(error, EINVAL, 1722 RTE_FLOW_ERROR_TYPE_ITEM, item, 1723 "multiple L4 layers not supported"); 1724 if (!mask) 1725 mask = &rte_flow_item_udp_mask; 1726 ret = mlx5_flow_item_acceptable 1727 (item, (const uint8_t *)mask, 1728 (const uint8_t *)&rte_flow_item_udp_mask, 1729 sizeof(struct rte_flow_item_udp), error); 1730 if (ret < 0) 1731 return ret; 1732 return 0; 1733 } 1734 1735 /** 1736 * Validate TCP item. 1737 * 1738 * @param[in] item 1739 * Item specification. 1740 * @param[in] item_flags 1741 * Bit-fields that holds the items detected until now. 1742 * @param[in] target_protocol 1743 * The next protocol in the previous item. 1744 * @param[out] error 1745 * Pointer to error structure. 1746 * 1747 * @return 1748 * 0 on success, a negative errno value otherwise and rte_errno is set. 1749 */ 1750 int 1751 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item, 1752 uint64_t item_flags, 1753 uint8_t target_protocol, 1754 const struct rte_flow_item_tcp *flow_mask, 1755 struct rte_flow_error *error) 1756 { 1757 const struct rte_flow_item_tcp *mask = item->mask; 1758 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); 1759 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 : 1760 MLX5_FLOW_LAYER_OUTER_L3; 1761 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 : 1762 MLX5_FLOW_LAYER_OUTER_L4; 1763 int ret; 1764 1765 MLX5_ASSERT(flow_mask); 1766 if (target_protocol != 0xff && target_protocol != IPPROTO_TCP) 1767 return rte_flow_error_set(error, EINVAL, 1768 RTE_FLOW_ERROR_TYPE_ITEM, item, 1769 "protocol filtering not compatible" 1770 " with TCP layer"); 1771 if (!(item_flags & l3m)) 1772 return rte_flow_error_set(error, EINVAL, 1773 RTE_FLOW_ERROR_TYPE_ITEM, item, 1774 "L3 is mandatory to filter on L4"); 1775 if (item_flags & l4m) 1776 return rte_flow_error_set(error, EINVAL, 1777 RTE_FLOW_ERROR_TYPE_ITEM, item, 1778 "multiple L4 layers not supported"); 1779 if (!mask) 1780 mask = &rte_flow_item_tcp_mask; 1781 ret = mlx5_flow_item_acceptable 1782 (item, (const uint8_t *)mask, 1783 (const uint8_t *)flow_mask, 1784 sizeof(struct rte_flow_item_tcp), error); 1785 if (ret < 0) 1786 return ret; 1787 return 0; 1788 } 1789 1790 /** 1791 * Validate VXLAN item. 1792 * 1793 * @param[in] item 1794 * Item specification. 1795 * @param[in] item_flags 1796 * Bit-fields that holds the items detected until now. 1797 * @param[in] target_protocol 1798 * The next protocol in the previous item. 1799 * @param[out] error 1800 * Pointer to error structure. 1801 * 1802 * @return 1803 * 0 on success, a negative errno value otherwise and rte_errno is set. 1804 */ 1805 int 1806 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item, 1807 uint64_t item_flags, 1808 struct rte_flow_error *error) 1809 { 1810 const struct rte_flow_item_vxlan *spec = item->spec; 1811 const struct rte_flow_item_vxlan *mask = item->mask; 1812 int ret; 1813 union vni { 1814 uint32_t vlan_id; 1815 uint8_t vni[4]; 1816 } id = { .vlan_id = 0, }; 1817 1818 1819 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 1820 return rte_flow_error_set(error, ENOTSUP, 1821 RTE_FLOW_ERROR_TYPE_ITEM, item, 1822 "multiple tunnel layers not" 1823 " supported"); 1824 /* 1825 * Verify only UDPv4 is present as defined in 1826 * https://tools.ietf.org/html/rfc7348 1827 */ 1828 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 1829 return rte_flow_error_set(error, EINVAL, 1830 RTE_FLOW_ERROR_TYPE_ITEM, item, 1831 "no outer UDP layer found"); 1832 if (!mask) 1833 mask = &rte_flow_item_vxlan_mask; 1834 ret = mlx5_flow_item_acceptable 1835 (item, (const uint8_t *)mask, 1836 (const uint8_t *)&rte_flow_item_vxlan_mask, 1837 sizeof(struct rte_flow_item_vxlan), 1838 error); 1839 if (ret < 0) 1840 return ret; 1841 if (spec) { 1842 memcpy(&id.vni[1], spec->vni, 3); 1843 memcpy(&id.vni[1], mask->vni, 3); 1844 } 1845 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 1846 return rte_flow_error_set(error, ENOTSUP, 1847 RTE_FLOW_ERROR_TYPE_ITEM, item, 1848 "VXLAN tunnel must be fully defined"); 1849 return 0; 1850 } 1851 1852 /** 1853 * Validate VXLAN_GPE item. 1854 * 1855 * @param[in] item 1856 * Item specification. 1857 * @param[in] item_flags 1858 * Bit-fields that holds the items detected until now. 1859 * @param[in] priv 1860 * Pointer to the private data structure. 1861 * @param[in] target_protocol 1862 * The next protocol in the previous item. 1863 * @param[out] error 1864 * Pointer to error structure. 1865 * 1866 * @return 1867 * 0 on success, a negative errno value otherwise and rte_errno is set. 1868 */ 1869 int 1870 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item, 1871 uint64_t item_flags, 1872 struct rte_eth_dev *dev, 1873 struct rte_flow_error *error) 1874 { 1875 struct mlx5_priv *priv = dev->data->dev_private; 1876 const struct rte_flow_item_vxlan_gpe *spec = item->spec; 1877 const struct rte_flow_item_vxlan_gpe *mask = item->mask; 1878 int ret; 1879 union vni { 1880 uint32_t vlan_id; 1881 uint8_t vni[4]; 1882 } id = { .vlan_id = 0, }; 1883 1884 if (!priv->config.l3_vxlan_en) 1885 return rte_flow_error_set(error, ENOTSUP, 1886 RTE_FLOW_ERROR_TYPE_ITEM, item, 1887 "L3 VXLAN is not enabled by device" 1888 " parameter and/or not configured in" 1889 " firmware"); 1890 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 1891 return rte_flow_error_set(error, ENOTSUP, 1892 RTE_FLOW_ERROR_TYPE_ITEM, item, 1893 "multiple tunnel layers not" 1894 " supported"); 1895 /* 1896 * Verify only UDPv4 is present as defined in 1897 * https://tools.ietf.org/html/rfc7348 1898 */ 1899 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 1900 return rte_flow_error_set(error, EINVAL, 1901 RTE_FLOW_ERROR_TYPE_ITEM, item, 1902 "no outer UDP layer found"); 1903 if (!mask) 1904 mask = &rte_flow_item_vxlan_gpe_mask; 1905 ret = mlx5_flow_item_acceptable 1906 (item, (const uint8_t *)mask, 1907 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask, 1908 sizeof(struct rte_flow_item_vxlan_gpe), 1909 error); 1910 if (ret < 0) 1911 return ret; 1912 if (spec) { 1913 if (spec->protocol) 1914 return rte_flow_error_set(error, ENOTSUP, 1915 RTE_FLOW_ERROR_TYPE_ITEM, 1916 item, 1917 "VxLAN-GPE protocol" 1918 " not supported"); 1919 memcpy(&id.vni[1], spec->vni, 3); 1920 memcpy(&id.vni[1], mask->vni, 3); 1921 } 1922 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 1923 return rte_flow_error_set(error, ENOTSUP, 1924 RTE_FLOW_ERROR_TYPE_ITEM, item, 1925 "VXLAN-GPE tunnel must be fully" 1926 " defined"); 1927 return 0; 1928 } 1929 /** 1930 * Validate GRE Key item. 1931 * 1932 * @param[in] item 1933 * Item specification. 1934 * @param[in] item_flags 1935 * Bit flags to mark detected items. 1936 * @param[in] gre_item 1937 * Pointer to gre_item 1938 * @param[out] error 1939 * Pointer to error structure. 1940 * 1941 * @return 1942 * 0 on success, a negative errno value otherwise and rte_errno is set. 1943 */ 1944 int 1945 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item, 1946 uint64_t item_flags, 1947 const struct rte_flow_item *gre_item, 1948 struct rte_flow_error *error) 1949 { 1950 const rte_be32_t *mask = item->mask; 1951 int ret = 0; 1952 rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX); 1953 const struct rte_flow_item_gre *gre_spec; 1954 const struct rte_flow_item_gre *gre_mask; 1955 1956 if (item_flags & MLX5_FLOW_LAYER_GRE_KEY) 1957 return rte_flow_error_set(error, ENOTSUP, 1958 RTE_FLOW_ERROR_TYPE_ITEM, item, 1959 "Multiple GRE key not support"); 1960 if (!(item_flags & MLX5_FLOW_LAYER_GRE)) 1961 return rte_flow_error_set(error, ENOTSUP, 1962 RTE_FLOW_ERROR_TYPE_ITEM, item, 1963 "No preceding GRE header"); 1964 if (item_flags & MLX5_FLOW_LAYER_INNER) 1965 return rte_flow_error_set(error, ENOTSUP, 1966 RTE_FLOW_ERROR_TYPE_ITEM, item, 1967 "GRE key following a wrong item"); 1968 gre_mask = gre_item->mask; 1969 if (!gre_mask) 1970 gre_mask = &rte_flow_item_gre_mask; 1971 gre_spec = gre_item->spec; 1972 if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) && 1973 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000))) 1974 return rte_flow_error_set(error, EINVAL, 1975 RTE_FLOW_ERROR_TYPE_ITEM, item, 1976 "Key bit must be on"); 1977 1978 if (!mask) 1979 mask = &gre_key_default_mask; 1980 ret = mlx5_flow_item_acceptable 1981 (item, (const uint8_t *)mask, 1982 (const uint8_t *)&gre_key_default_mask, 1983 sizeof(rte_be32_t), error); 1984 return ret; 1985 } 1986 1987 /** 1988 * Validate GRE item. 1989 * 1990 * @param[in] item 1991 * Item specification. 1992 * @param[in] item_flags 1993 * Bit flags to mark detected items. 1994 * @param[in] target_protocol 1995 * The next protocol in the previous item. 1996 * @param[out] error 1997 * Pointer to error structure. 1998 * 1999 * @return 2000 * 0 on success, a negative errno value otherwise and rte_errno is set. 2001 */ 2002 int 2003 mlx5_flow_validate_item_gre(const struct rte_flow_item *item, 2004 uint64_t item_flags, 2005 uint8_t target_protocol, 2006 struct rte_flow_error *error) 2007 { 2008 const struct rte_flow_item_gre *spec __rte_unused = item->spec; 2009 const struct rte_flow_item_gre *mask = item->mask; 2010 int ret; 2011 const struct rte_flow_item_gre nic_mask = { 2012 .c_rsvd0_ver = RTE_BE16(0xB000), 2013 .protocol = RTE_BE16(UINT16_MAX), 2014 }; 2015 2016 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2017 return rte_flow_error_set(error, EINVAL, 2018 RTE_FLOW_ERROR_TYPE_ITEM, item, 2019 "protocol filtering not compatible" 2020 " with this GRE layer"); 2021 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2022 return rte_flow_error_set(error, ENOTSUP, 2023 RTE_FLOW_ERROR_TYPE_ITEM, item, 2024 "multiple tunnel layers not" 2025 " supported"); 2026 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2027 return rte_flow_error_set(error, ENOTSUP, 2028 RTE_FLOW_ERROR_TYPE_ITEM, item, 2029 "L3 Layer is missing"); 2030 if (!mask) 2031 mask = &rte_flow_item_gre_mask; 2032 ret = mlx5_flow_item_acceptable 2033 (item, (const uint8_t *)mask, 2034 (const uint8_t *)&nic_mask, 2035 sizeof(struct rte_flow_item_gre), error); 2036 if (ret < 0) 2037 return ret; 2038 #ifndef HAVE_MLX5DV_DR 2039 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT 2040 if (spec && (spec->protocol & mask->protocol)) 2041 return rte_flow_error_set(error, ENOTSUP, 2042 RTE_FLOW_ERROR_TYPE_ITEM, item, 2043 "without MPLS support the" 2044 " specification cannot be used for" 2045 " filtering"); 2046 #endif 2047 #endif 2048 return 0; 2049 } 2050 2051 /** 2052 * Validate Geneve item. 2053 * 2054 * @param[in] item 2055 * Item specification. 2056 * @param[in] itemFlags 2057 * Bit-fields that holds the items detected until now. 2058 * @param[in] enPriv 2059 * Pointer to the private data structure. 2060 * @param[out] error 2061 * Pointer to error structure. 2062 * 2063 * @return 2064 * 0 on success, a negative errno value otherwise and rte_errno is set. 2065 */ 2066 2067 int 2068 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item, 2069 uint64_t item_flags, 2070 struct rte_eth_dev *dev, 2071 struct rte_flow_error *error) 2072 { 2073 struct mlx5_priv *priv = dev->data->dev_private; 2074 const struct rte_flow_item_geneve *spec = item->spec; 2075 const struct rte_flow_item_geneve *mask = item->mask; 2076 int ret; 2077 uint16_t gbhdr; 2078 uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ? 2079 MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0; 2080 const struct rte_flow_item_geneve nic_mask = { 2081 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80), 2082 .vni = "\xff\xff\xff", 2083 .protocol = RTE_BE16(UINT16_MAX), 2084 }; 2085 2086 if (!priv->config.hca_attr.tunnel_stateless_geneve_rx) 2087 return rte_flow_error_set(error, ENOTSUP, 2088 RTE_FLOW_ERROR_TYPE_ITEM, item, 2089 "L3 Geneve is not enabled by device" 2090 " parameter and/or not configured in" 2091 " firmware"); 2092 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2093 return rte_flow_error_set(error, ENOTSUP, 2094 RTE_FLOW_ERROR_TYPE_ITEM, item, 2095 "multiple tunnel layers not" 2096 " supported"); 2097 /* 2098 * Verify only UDPv4 is present as defined in 2099 * https://tools.ietf.org/html/rfc7348 2100 */ 2101 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) 2102 return rte_flow_error_set(error, EINVAL, 2103 RTE_FLOW_ERROR_TYPE_ITEM, item, 2104 "no outer UDP layer found"); 2105 if (!mask) 2106 mask = &rte_flow_item_geneve_mask; 2107 ret = mlx5_flow_item_acceptable 2108 (item, (const uint8_t *)mask, 2109 (const uint8_t *)&nic_mask, 2110 sizeof(struct rte_flow_item_geneve), error); 2111 if (ret) 2112 return ret; 2113 if (spec) { 2114 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0); 2115 if (MLX5_GENEVE_VER_VAL(gbhdr) || 2116 MLX5_GENEVE_CRITO_VAL(gbhdr) || 2117 MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1) 2118 return rte_flow_error_set(error, ENOTSUP, 2119 RTE_FLOW_ERROR_TYPE_ITEM, 2120 item, 2121 "Geneve protocol unsupported" 2122 " fields are being used"); 2123 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len) 2124 return rte_flow_error_set 2125 (error, ENOTSUP, 2126 RTE_FLOW_ERROR_TYPE_ITEM, 2127 item, 2128 "Unsupported Geneve options length"); 2129 } 2130 if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) 2131 return rte_flow_error_set 2132 (error, ENOTSUP, 2133 RTE_FLOW_ERROR_TYPE_ITEM, item, 2134 "Geneve tunnel must be fully defined"); 2135 return 0; 2136 } 2137 2138 /** 2139 * Validate MPLS item. 2140 * 2141 * @param[in] dev 2142 * Pointer to the rte_eth_dev structure. 2143 * @param[in] item 2144 * Item specification. 2145 * @param[in] item_flags 2146 * Bit-fields that holds the items detected until now. 2147 * @param[in] prev_layer 2148 * The protocol layer indicated in previous item. 2149 * @param[out] error 2150 * Pointer to error structure. 2151 * 2152 * @return 2153 * 0 on success, a negative errno value otherwise and rte_errno is set. 2154 */ 2155 int 2156 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused, 2157 const struct rte_flow_item *item __rte_unused, 2158 uint64_t item_flags __rte_unused, 2159 uint64_t prev_layer __rte_unused, 2160 struct rte_flow_error *error) 2161 { 2162 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT 2163 const struct rte_flow_item_mpls *mask = item->mask; 2164 struct mlx5_priv *priv = dev->data->dev_private; 2165 int ret; 2166 2167 if (!priv->config.mpls_en) 2168 return rte_flow_error_set(error, ENOTSUP, 2169 RTE_FLOW_ERROR_TYPE_ITEM, item, 2170 "MPLS not supported or" 2171 " disabled in firmware" 2172 " configuration."); 2173 /* MPLS over IP, UDP, GRE is allowed */ 2174 if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 | 2175 MLX5_FLOW_LAYER_OUTER_L4_UDP | 2176 MLX5_FLOW_LAYER_GRE))) 2177 return rte_flow_error_set(error, EINVAL, 2178 RTE_FLOW_ERROR_TYPE_ITEM, item, 2179 "protocol filtering not compatible" 2180 " with MPLS layer"); 2181 /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */ 2182 if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) && 2183 !(item_flags & MLX5_FLOW_LAYER_GRE)) 2184 return rte_flow_error_set(error, ENOTSUP, 2185 RTE_FLOW_ERROR_TYPE_ITEM, item, 2186 "multiple tunnel layers not" 2187 " supported"); 2188 if (!mask) 2189 mask = &rte_flow_item_mpls_mask; 2190 ret = mlx5_flow_item_acceptable 2191 (item, (const uint8_t *)mask, 2192 (const uint8_t *)&rte_flow_item_mpls_mask, 2193 sizeof(struct rte_flow_item_mpls), error); 2194 if (ret < 0) 2195 return ret; 2196 return 0; 2197 #else 2198 return rte_flow_error_set(error, ENOTSUP, 2199 RTE_FLOW_ERROR_TYPE_ITEM, item, 2200 "MPLS is not supported by Verbs, please" 2201 " update."); 2202 #endif 2203 } 2204 2205 /** 2206 * Validate NVGRE item. 2207 * 2208 * @param[in] item 2209 * Item specification. 2210 * @param[in] item_flags 2211 * Bit flags to mark detected items. 2212 * @param[in] target_protocol 2213 * The next protocol in the previous item. 2214 * @param[out] error 2215 * Pointer to error structure. 2216 * 2217 * @return 2218 * 0 on success, a negative errno value otherwise and rte_errno is set. 2219 */ 2220 int 2221 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item, 2222 uint64_t item_flags, 2223 uint8_t target_protocol, 2224 struct rte_flow_error *error) 2225 { 2226 const struct rte_flow_item_nvgre *mask = item->mask; 2227 int ret; 2228 2229 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE) 2230 return rte_flow_error_set(error, EINVAL, 2231 RTE_FLOW_ERROR_TYPE_ITEM, item, 2232 "protocol filtering not compatible" 2233 " with this GRE layer"); 2234 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2235 return rte_flow_error_set(error, ENOTSUP, 2236 RTE_FLOW_ERROR_TYPE_ITEM, item, 2237 "multiple tunnel layers not" 2238 " supported"); 2239 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3)) 2240 return rte_flow_error_set(error, ENOTSUP, 2241 RTE_FLOW_ERROR_TYPE_ITEM, item, 2242 "L3 Layer is missing"); 2243 if (!mask) 2244 mask = &rte_flow_item_nvgre_mask; 2245 ret = mlx5_flow_item_acceptable 2246 (item, (const uint8_t *)mask, 2247 (const uint8_t *)&rte_flow_item_nvgre_mask, 2248 sizeof(struct rte_flow_item_nvgre), error); 2249 if (ret < 0) 2250 return ret; 2251 return 0; 2252 } 2253 2254 /** 2255 * Validate eCPRI item. 2256 * 2257 * @param[in] item 2258 * Item specification. 2259 * @param[in] item_flags 2260 * Bit-fields that holds the items detected until now. 2261 * @param[in] last_item 2262 * Previous validated item in the pattern items. 2263 * @param[in] ether_type 2264 * Type in the ethernet layer header (including dot1q). 2265 * @param[in] acc_mask 2266 * Acceptable mask, if NULL default internal default mask 2267 * will be used to check whether item fields are supported. 2268 * @param[out] error 2269 * Pointer to error structure. 2270 * 2271 * @return 2272 * 0 on success, a negative errno value otherwise and rte_errno is set. 2273 */ 2274 int 2275 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item, 2276 uint64_t item_flags, 2277 uint64_t last_item, 2278 uint16_t ether_type, 2279 const struct rte_flow_item_ecpri *acc_mask, 2280 struct rte_flow_error *error) 2281 { 2282 const struct rte_flow_item_ecpri *mask = item->mask; 2283 const struct rte_flow_item_ecpri nic_mask = { 2284 .hdr = { 2285 .common = { 2286 .u32 = 2287 RTE_BE32(((const struct rte_ecpri_common_hdr) { 2288 .type = 0xFF, 2289 }).u32), 2290 }, 2291 .dummy[0] = 0xFFFFFFFF, 2292 }, 2293 }; 2294 const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 | 2295 MLX5_FLOW_LAYER_OUTER_VLAN); 2296 struct rte_flow_item_ecpri mask_lo; 2297 2298 if ((last_item & outer_l2_vlan) && ether_type && 2299 ether_type != RTE_ETHER_TYPE_ECPRI) 2300 return rte_flow_error_set(error, EINVAL, 2301 RTE_FLOW_ERROR_TYPE_ITEM, item, 2302 "eCPRI cannot follow L2/VLAN layer " 2303 "which ether type is not 0xAEFE."); 2304 if (item_flags & MLX5_FLOW_LAYER_TUNNEL) 2305 return rte_flow_error_set(error, EINVAL, 2306 RTE_FLOW_ERROR_TYPE_ITEM, item, 2307 "eCPRI with tunnel is not supported " 2308 "right now."); 2309 if (item_flags & MLX5_FLOW_LAYER_OUTER_L3) 2310 return rte_flow_error_set(error, ENOTSUP, 2311 RTE_FLOW_ERROR_TYPE_ITEM, item, 2312 "multiple L3 layers not supported"); 2313 else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP) 2314 return rte_flow_error_set(error, EINVAL, 2315 RTE_FLOW_ERROR_TYPE_ITEM, item, 2316 "eCPRI cannot follow a TCP layer."); 2317 /* In specification, eCPRI could be over UDP layer. */ 2318 else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP) 2319 return rte_flow_error_set(error, EINVAL, 2320 RTE_FLOW_ERROR_TYPE_ITEM, item, 2321 "eCPRI over UDP layer is not yet " 2322 "supported right now."); 2323 /* Mask for type field in common header could be zero. */ 2324 if (!mask) 2325 mask = &rte_flow_item_ecpri_mask; 2326 mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32); 2327 /* Input mask is in big-endian format. */ 2328 if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff) 2329 return rte_flow_error_set(error, EINVAL, 2330 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 2331 "partial mask is not supported " 2332 "for protocol"); 2333 else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0) 2334 return rte_flow_error_set(error, EINVAL, 2335 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, 2336 "message header mask must be after " 2337 "a type mask"); 2338 return mlx5_flow_item_acceptable(item, (const uint8_t *)mask, 2339 acc_mask ? (const uint8_t *)acc_mask 2340 : (const uint8_t *)&nic_mask, 2341 sizeof(struct rte_flow_item_ecpri), 2342 error); 2343 } 2344 2345 /* Allocate unique ID for the split Q/RSS subflows. */ 2346 static uint32_t 2347 flow_qrss_get_id(struct rte_eth_dev *dev) 2348 { 2349 struct mlx5_priv *priv = dev->data->dev_private; 2350 uint32_t qrss_id, ret; 2351 2352 ret = mlx5_flow_id_get(priv->qrss_id_pool, &qrss_id); 2353 if (ret) 2354 return 0; 2355 MLX5_ASSERT(qrss_id); 2356 return qrss_id; 2357 } 2358 2359 /* Free unique ID for the split Q/RSS subflows. */ 2360 static void 2361 flow_qrss_free_id(struct rte_eth_dev *dev, uint32_t qrss_id) 2362 { 2363 struct mlx5_priv *priv = dev->data->dev_private; 2364 2365 if (qrss_id) 2366 mlx5_flow_id_release(priv->qrss_id_pool, qrss_id); 2367 } 2368 2369 /** 2370 * Release resource related QUEUE/RSS action split. 2371 * 2372 * @param dev 2373 * Pointer to Ethernet device. 2374 * @param flow 2375 * Flow to release id's from. 2376 */ 2377 static void 2378 flow_mreg_split_qrss_release(struct rte_eth_dev *dev, 2379 struct rte_flow *flow) 2380 { 2381 struct mlx5_priv *priv = dev->data->dev_private; 2382 uint32_t handle_idx; 2383 struct mlx5_flow_handle *dev_handle; 2384 2385 SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, 2386 handle_idx, dev_handle, next) 2387 if (dev_handle->split_flow_id) 2388 flow_qrss_free_id(dev, dev_handle->split_flow_id); 2389 } 2390 2391 static int 2392 flow_null_validate(struct rte_eth_dev *dev __rte_unused, 2393 const struct rte_flow_attr *attr __rte_unused, 2394 const struct rte_flow_item items[] __rte_unused, 2395 const struct rte_flow_action actions[] __rte_unused, 2396 bool external __rte_unused, 2397 int hairpin __rte_unused, 2398 struct rte_flow_error *error) 2399 { 2400 return rte_flow_error_set(error, ENOTSUP, 2401 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2402 } 2403 2404 static struct mlx5_flow * 2405 flow_null_prepare(struct rte_eth_dev *dev __rte_unused, 2406 const struct rte_flow_attr *attr __rte_unused, 2407 const struct rte_flow_item items[] __rte_unused, 2408 const struct rte_flow_action actions[] __rte_unused, 2409 struct rte_flow_error *error) 2410 { 2411 rte_flow_error_set(error, ENOTSUP, 2412 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2413 return NULL; 2414 } 2415 2416 static int 2417 flow_null_translate(struct rte_eth_dev *dev __rte_unused, 2418 struct mlx5_flow *dev_flow __rte_unused, 2419 const struct rte_flow_attr *attr __rte_unused, 2420 const struct rte_flow_item items[] __rte_unused, 2421 const struct rte_flow_action actions[] __rte_unused, 2422 struct rte_flow_error *error) 2423 { 2424 return rte_flow_error_set(error, ENOTSUP, 2425 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2426 } 2427 2428 static int 2429 flow_null_apply(struct rte_eth_dev *dev __rte_unused, 2430 struct rte_flow *flow __rte_unused, 2431 struct rte_flow_error *error) 2432 { 2433 return rte_flow_error_set(error, ENOTSUP, 2434 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2435 } 2436 2437 static void 2438 flow_null_remove(struct rte_eth_dev *dev __rte_unused, 2439 struct rte_flow *flow __rte_unused) 2440 { 2441 } 2442 2443 static void 2444 flow_null_destroy(struct rte_eth_dev *dev __rte_unused, 2445 struct rte_flow *flow __rte_unused) 2446 { 2447 } 2448 2449 static int 2450 flow_null_query(struct rte_eth_dev *dev __rte_unused, 2451 struct rte_flow *flow __rte_unused, 2452 const struct rte_flow_action *actions __rte_unused, 2453 void *data __rte_unused, 2454 struct rte_flow_error *error) 2455 { 2456 return rte_flow_error_set(error, ENOTSUP, 2457 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); 2458 } 2459 2460 /* Void driver to protect from null pointer reference. */ 2461 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = { 2462 .validate = flow_null_validate, 2463 .prepare = flow_null_prepare, 2464 .translate = flow_null_translate, 2465 .apply = flow_null_apply, 2466 .remove = flow_null_remove, 2467 .destroy = flow_null_destroy, 2468 .query = flow_null_query, 2469 }; 2470 2471 /** 2472 * Select flow driver type according to flow attributes and device 2473 * configuration. 2474 * 2475 * @param[in] dev 2476 * Pointer to the dev structure. 2477 * @param[in] attr 2478 * Pointer to the flow attributes. 2479 * 2480 * @return 2481 * flow driver type, MLX5_FLOW_TYPE_MAX otherwise. 2482 */ 2483 static enum mlx5_flow_drv_type 2484 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr) 2485 { 2486 struct mlx5_priv *priv = dev->data->dev_private; 2487 /* The OS can determine first a specific flow type (DV, VERBS) */ 2488 enum mlx5_flow_drv_type type = mlx5_flow_os_get_type(); 2489 2490 if (type != MLX5_FLOW_TYPE_MAX) 2491 return type; 2492 /* If no OS specific type - continue with DV/VERBS selection */ 2493 if (attr->transfer && priv->config.dv_esw_en) 2494 type = MLX5_FLOW_TYPE_DV; 2495 if (!attr->transfer) 2496 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV : 2497 MLX5_FLOW_TYPE_VERBS; 2498 return type; 2499 } 2500 2501 #define flow_get_drv_ops(type) flow_drv_ops[type] 2502 2503 /** 2504 * Flow driver validation API. This abstracts calling driver specific functions. 2505 * The type of flow driver is determined according to flow attributes. 2506 * 2507 * @param[in] dev 2508 * Pointer to the dev structure. 2509 * @param[in] attr 2510 * Pointer to the flow attributes. 2511 * @param[in] items 2512 * Pointer to the list of items. 2513 * @param[in] actions 2514 * Pointer to the list of actions. 2515 * @param[in] external 2516 * This flow rule is created by request external to PMD. 2517 * @param[in] hairpin 2518 * Number of hairpin TX actions, 0 means classic flow. 2519 * @param[out] error 2520 * Pointer to the error structure. 2521 * 2522 * @return 2523 * 0 on success, a negative errno value otherwise and rte_errno is set. 2524 */ 2525 static inline int 2526 flow_drv_validate(struct rte_eth_dev *dev, 2527 const struct rte_flow_attr *attr, 2528 const struct rte_flow_item items[], 2529 const struct rte_flow_action actions[], 2530 bool external, int hairpin, struct rte_flow_error *error) 2531 { 2532 const struct mlx5_flow_driver_ops *fops; 2533 enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr); 2534 2535 fops = flow_get_drv_ops(type); 2536 return fops->validate(dev, attr, items, actions, external, 2537 hairpin, error); 2538 } 2539 2540 /** 2541 * Flow driver preparation API. This abstracts calling driver specific 2542 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 2543 * calculates the size of memory required for device flow, allocates the memory, 2544 * initializes the device flow and returns the pointer. 2545 * 2546 * @note 2547 * This function initializes device flow structure such as dv or verbs in 2548 * struct mlx5_flow. However, it is caller's responsibility to initialize the 2549 * rest. For example, adding returning device flow to flow->dev_flow list and 2550 * setting backward reference to the flow should be done out of this function. 2551 * layers field is not filled either. 2552 * 2553 * @param[in] dev 2554 * Pointer to the dev structure. 2555 * @param[in] attr 2556 * Pointer to the flow attributes. 2557 * @param[in] items 2558 * Pointer to the list of items. 2559 * @param[in] actions 2560 * Pointer to the list of actions. 2561 * @param[in] flow_idx 2562 * This memory pool index to the flow. 2563 * @param[out] error 2564 * Pointer to the error structure. 2565 * 2566 * @return 2567 * Pointer to device flow on success, otherwise NULL and rte_errno is set. 2568 */ 2569 static inline struct mlx5_flow * 2570 flow_drv_prepare(struct rte_eth_dev *dev, 2571 const struct rte_flow *flow, 2572 const struct rte_flow_attr *attr, 2573 const struct rte_flow_item items[], 2574 const struct rte_flow_action actions[], 2575 uint32_t flow_idx, 2576 struct rte_flow_error *error) 2577 { 2578 const struct mlx5_flow_driver_ops *fops; 2579 enum mlx5_flow_drv_type type = flow->drv_type; 2580 struct mlx5_flow *mlx5_flow = NULL; 2581 2582 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2583 fops = flow_get_drv_ops(type); 2584 mlx5_flow = fops->prepare(dev, attr, items, actions, error); 2585 if (mlx5_flow) 2586 mlx5_flow->flow_idx = flow_idx; 2587 return mlx5_flow; 2588 } 2589 2590 /** 2591 * Flow driver translation API. This abstracts calling driver specific 2592 * functions. Parent flow (rte_flow) should have driver type (drv_type). It 2593 * translates a generic flow into a driver flow. flow_drv_prepare() must 2594 * precede. 2595 * 2596 * @note 2597 * dev_flow->layers could be filled as a result of parsing during translation 2598 * if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled 2599 * if necessary. As a flow can have multiple dev_flows by RSS flow expansion, 2600 * flow->actions could be overwritten even though all the expanded dev_flows 2601 * have the same actions. 2602 * 2603 * @param[in] dev 2604 * Pointer to the rte dev structure. 2605 * @param[in, out] dev_flow 2606 * Pointer to the mlx5 flow. 2607 * @param[in] attr 2608 * Pointer to the flow attributes. 2609 * @param[in] items 2610 * Pointer to the list of items. 2611 * @param[in] actions 2612 * Pointer to the list of actions. 2613 * @param[out] error 2614 * Pointer to the error structure. 2615 * 2616 * @return 2617 * 0 on success, a negative errno value otherwise and rte_errno is set. 2618 */ 2619 static inline int 2620 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, 2621 const struct rte_flow_attr *attr, 2622 const struct rte_flow_item items[], 2623 const struct rte_flow_action actions[], 2624 struct rte_flow_error *error) 2625 { 2626 const struct mlx5_flow_driver_ops *fops; 2627 enum mlx5_flow_drv_type type = dev_flow->flow->drv_type; 2628 2629 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2630 fops = flow_get_drv_ops(type); 2631 return fops->translate(dev, dev_flow, attr, items, actions, error); 2632 } 2633 2634 /** 2635 * Flow driver apply API. This abstracts calling driver specific functions. 2636 * Parent flow (rte_flow) should have driver type (drv_type). It applies 2637 * translated driver flows on to device. flow_drv_translate() must precede. 2638 * 2639 * @param[in] dev 2640 * Pointer to Ethernet device structure. 2641 * @param[in, out] flow 2642 * Pointer to flow structure. 2643 * @param[out] error 2644 * Pointer to error structure. 2645 * 2646 * @return 2647 * 0 on success, a negative errno value otherwise and rte_errno is set. 2648 */ 2649 static inline int 2650 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, 2651 struct rte_flow_error *error) 2652 { 2653 const struct mlx5_flow_driver_ops *fops; 2654 enum mlx5_flow_drv_type type = flow->drv_type; 2655 2656 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2657 fops = flow_get_drv_ops(type); 2658 return fops->apply(dev, flow, error); 2659 } 2660 2661 /** 2662 * Flow driver remove API. This abstracts calling driver specific functions. 2663 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 2664 * on device. All the resources of the flow should be freed by calling 2665 * flow_drv_destroy(). 2666 * 2667 * @param[in] dev 2668 * Pointer to Ethernet device. 2669 * @param[in, out] flow 2670 * Pointer to flow structure. 2671 */ 2672 static inline void 2673 flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow) 2674 { 2675 const struct mlx5_flow_driver_ops *fops; 2676 enum mlx5_flow_drv_type type = flow->drv_type; 2677 2678 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2679 fops = flow_get_drv_ops(type); 2680 fops->remove(dev, flow); 2681 } 2682 2683 /** 2684 * Flow driver destroy API. This abstracts calling driver specific functions. 2685 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow 2686 * on device and releases resources of the flow. 2687 * 2688 * @param[in] dev 2689 * Pointer to Ethernet device. 2690 * @param[in, out] flow 2691 * Pointer to flow structure. 2692 */ 2693 static inline void 2694 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) 2695 { 2696 const struct mlx5_flow_driver_ops *fops; 2697 enum mlx5_flow_drv_type type = flow->drv_type; 2698 2699 flow_mreg_split_qrss_release(dev, flow); 2700 MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); 2701 fops = flow_get_drv_ops(type); 2702 fops->destroy(dev, flow); 2703 } 2704 2705 /** 2706 * Get RSS action from the action list. 2707 * 2708 * @param[in] actions 2709 * Pointer to the list of actions. 2710 * 2711 * @return 2712 * Pointer to the RSS action if exist, else return NULL. 2713 */ 2714 static const struct rte_flow_action_rss* 2715 flow_get_rss_action(const struct rte_flow_action actions[]) 2716 { 2717 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2718 switch (actions->type) { 2719 case RTE_FLOW_ACTION_TYPE_RSS: 2720 return (const struct rte_flow_action_rss *) 2721 actions->conf; 2722 default: 2723 break; 2724 } 2725 } 2726 return NULL; 2727 } 2728 2729 static unsigned int 2730 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level) 2731 { 2732 const struct rte_flow_item *item; 2733 unsigned int has_vlan = 0; 2734 2735 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { 2736 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { 2737 has_vlan = 1; 2738 break; 2739 } 2740 } 2741 if (has_vlan) 2742 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN : 2743 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN; 2744 return rss_level < 2 ? MLX5_EXPANSION_ROOT : 2745 MLX5_EXPANSION_ROOT_OUTER; 2746 } 2747 2748 /** 2749 * Get layer flags from the prefix flow. 2750 * 2751 * Some flows may be split to several subflows, the prefix subflow gets the 2752 * match items and the suffix sub flow gets the actions. 2753 * Some actions need the user defined match item flags to get the detail for 2754 * the action. 2755 * This function helps the suffix flow to get the item layer flags from prefix 2756 * subflow. 2757 * 2758 * @param[in] dev_flow 2759 * Pointer the created preifx subflow. 2760 * 2761 * @return 2762 * The layers get from prefix subflow. 2763 */ 2764 static inline uint64_t 2765 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow) 2766 { 2767 uint64_t layers = 0; 2768 2769 /* 2770 * Layers bits could be localization, but usually the compiler will 2771 * help to do the optimization work for source code. 2772 * If no decap actions, use the layers directly. 2773 */ 2774 if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP)) 2775 return dev_flow->handle->layers; 2776 /* Convert L3 layers with decap action. */ 2777 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4) 2778 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4; 2779 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6) 2780 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6; 2781 /* Convert L4 layers with decap action. */ 2782 if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP) 2783 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP; 2784 else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP) 2785 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP; 2786 return layers; 2787 } 2788 2789 /** 2790 * Get metadata split action information. 2791 * 2792 * @param[in] actions 2793 * Pointer to the list of actions. 2794 * @param[out] qrss 2795 * Pointer to the return pointer. 2796 * @param[out] qrss_type 2797 * Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned 2798 * if no QUEUE/RSS is found. 2799 * @param[out] encap_idx 2800 * Pointer to the index of the encap action if exists, otherwise the last 2801 * action index. 2802 * 2803 * @return 2804 * Total number of actions. 2805 */ 2806 static int 2807 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[], 2808 const struct rte_flow_action **qrss, 2809 int *encap_idx) 2810 { 2811 const struct rte_flow_action_raw_encap *raw_encap; 2812 int actions_n = 0; 2813 int raw_decap_idx = -1; 2814 2815 *encap_idx = -1; 2816 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2817 switch (actions->type) { 2818 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 2819 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 2820 *encap_idx = actions_n; 2821 break; 2822 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 2823 raw_decap_idx = actions_n; 2824 break; 2825 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 2826 raw_encap = actions->conf; 2827 if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 2828 *encap_idx = raw_decap_idx != -1 ? 2829 raw_decap_idx : actions_n; 2830 break; 2831 case RTE_FLOW_ACTION_TYPE_QUEUE: 2832 case RTE_FLOW_ACTION_TYPE_RSS: 2833 *qrss = actions; 2834 break; 2835 default: 2836 break; 2837 } 2838 actions_n++; 2839 } 2840 if (*encap_idx == -1) 2841 *encap_idx = actions_n; 2842 /* Count RTE_FLOW_ACTION_TYPE_END. */ 2843 return actions_n + 1; 2844 } 2845 2846 /** 2847 * Check meter action from the action list. 2848 * 2849 * @param[in] actions 2850 * Pointer to the list of actions. 2851 * @param[out] mtr 2852 * Pointer to the meter exist flag. 2853 * 2854 * @return 2855 * Total number of actions. 2856 */ 2857 static int 2858 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr) 2859 { 2860 int actions_n = 0; 2861 2862 MLX5_ASSERT(mtr); 2863 *mtr = 0; 2864 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2865 switch (actions->type) { 2866 case RTE_FLOW_ACTION_TYPE_METER: 2867 *mtr = 1; 2868 break; 2869 default: 2870 break; 2871 } 2872 actions_n++; 2873 } 2874 /* Count RTE_FLOW_ACTION_TYPE_END. */ 2875 return actions_n + 1; 2876 } 2877 2878 /** 2879 * Check if the flow should be split due to hairpin. 2880 * The reason for the split is that in current HW we can't 2881 * support encap and push-vlan on Rx, so if a flow contains 2882 * these actions we move it to Tx. 2883 * 2884 * @param dev 2885 * Pointer to Ethernet device. 2886 * @param[in] attr 2887 * Flow rule attributes. 2888 * @param[in] actions 2889 * Associated actions (list terminated by the END action). 2890 * 2891 * @return 2892 * > 0 the number of actions and the flow should be split, 2893 * 0 when no split required. 2894 */ 2895 static int 2896 flow_check_hairpin_split(struct rte_eth_dev *dev, 2897 const struct rte_flow_attr *attr, 2898 const struct rte_flow_action actions[]) 2899 { 2900 int queue_action = 0; 2901 int action_n = 0; 2902 int split = 0; 2903 const struct rte_flow_action_queue *queue; 2904 const struct rte_flow_action_rss *rss; 2905 const struct rte_flow_action_raw_encap *raw_encap; 2906 2907 if (!attr->ingress) 2908 return 0; 2909 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 2910 switch (actions->type) { 2911 case RTE_FLOW_ACTION_TYPE_QUEUE: 2912 queue = actions->conf; 2913 if (queue == NULL) 2914 return 0; 2915 if (mlx5_rxq_get_type(dev, queue->index) != 2916 MLX5_RXQ_TYPE_HAIRPIN) 2917 return 0; 2918 queue_action = 1; 2919 action_n++; 2920 break; 2921 case RTE_FLOW_ACTION_TYPE_RSS: 2922 rss = actions->conf; 2923 if (rss == NULL || rss->queue_num == 0) 2924 return 0; 2925 if (mlx5_rxq_get_type(dev, rss->queue[0]) != 2926 MLX5_RXQ_TYPE_HAIRPIN) 2927 return 0; 2928 queue_action = 1; 2929 action_n++; 2930 break; 2931 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 2932 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 2933 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 2934 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 2935 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: 2936 split++; 2937 action_n++; 2938 break; 2939 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 2940 raw_encap = actions->conf; 2941 if (raw_encap->size > 2942 (sizeof(struct rte_flow_item_eth) + 2943 sizeof(struct rte_flow_item_ipv4))) 2944 split++; 2945 action_n++; 2946 break; 2947 default: 2948 action_n++; 2949 break; 2950 } 2951 } 2952 if (split && queue_action) 2953 return action_n; 2954 return 0; 2955 } 2956 2957 /* Declare flow create/destroy prototype in advance. */ 2958 static uint32_t 2959 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 2960 const struct rte_flow_attr *attr, 2961 const struct rte_flow_item items[], 2962 const struct rte_flow_action actions[], 2963 bool external, struct rte_flow_error *error); 2964 2965 static void 2966 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 2967 uint32_t flow_idx); 2968 2969 /** 2970 * Add a flow of copying flow metadata registers in RX_CP_TBL. 2971 * 2972 * As mark_id is unique, if there's already a registered flow for the mark_id, 2973 * return by increasing the reference counter of the resource. Otherwise, create 2974 * the resource (mcp_res) and flow. 2975 * 2976 * Flow looks like, 2977 * - If ingress port is ANY and reg_c[1] is mark_id, 2978 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 2979 * 2980 * For default flow (zero mark_id), flow is like, 2981 * - If ingress port is ANY, 2982 * reg_b := reg_c[0] and jump to RX_ACT_TBL. 2983 * 2984 * @param dev 2985 * Pointer to Ethernet device. 2986 * @param mark_id 2987 * ID of MARK action, zero means default flow for META. 2988 * @param[out] error 2989 * Perform verbose error reporting if not NULL. 2990 * 2991 * @return 2992 * Associated resource on success, NULL otherwise and rte_errno is set. 2993 */ 2994 static struct mlx5_flow_mreg_copy_resource * 2995 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id, 2996 struct rte_flow_error *error) 2997 { 2998 struct mlx5_priv *priv = dev->data->dev_private; 2999 struct rte_flow_attr attr = { 3000 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 3001 .ingress = 1, 3002 }; 3003 struct mlx5_rte_flow_item_tag tag_spec = { 3004 .data = mark_id, 3005 }; 3006 struct rte_flow_item items[] = { 3007 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, }, 3008 }; 3009 struct rte_flow_action_mark ftag = { 3010 .id = mark_id, 3011 }; 3012 struct mlx5_flow_action_copy_mreg cp_mreg = { 3013 .dst = REG_B, 3014 .src = 0, 3015 }; 3016 struct rte_flow_action_jump jump = { 3017 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 3018 }; 3019 struct rte_flow_action actions[] = { 3020 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, }, 3021 }; 3022 struct mlx5_flow_mreg_copy_resource *mcp_res; 3023 uint32_t idx = 0; 3024 int ret; 3025 3026 /* Fill the register fileds in the flow. */ 3027 ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error); 3028 if (ret < 0) 3029 return NULL; 3030 tag_spec.id = ret; 3031 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 3032 if (ret < 0) 3033 return NULL; 3034 cp_mreg.src = ret; 3035 /* Check if already registered. */ 3036 MLX5_ASSERT(priv->mreg_cp_tbl); 3037 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id); 3038 if (mcp_res) { 3039 /* For non-default rule. */ 3040 if (mark_id != MLX5_DEFAULT_COPY_ID) 3041 mcp_res->refcnt++; 3042 MLX5_ASSERT(mark_id != MLX5_DEFAULT_COPY_ID || 3043 mcp_res->refcnt == 1); 3044 return mcp_res; 3045 } 3046 /* Provide the full width of FLAG specific value. */ 3047 if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT)) 3048 tag_spec.data = MLX5_FLOW_MARK_DEFAULT; 3049 /* Build a new flow. */ 3050 if (mark_id != MLX5_DEFAULT_COPY_ID) { 3051 items[0] = (struct rte_flow_item){ 3052 .type = (enum rte_flow_item_type) 3053 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 3054 .spec = &tag_spec, 3055 }; 3056 items[1] = (struct rte_flow_item){ 3057 .type = RTE_FLOW_ITEM_TYPE_END, 3058 }; 3059 actions[0] = (struct rte_flow_action){ 3060 .type = (enum rte_flow_action_type) 3061 MLX5_RTE_FLOW_ACTION_TYPE_MARK, 3062 .conf = &ftag, 3063 }; 3064 actions[1] = (struct rte_flow_action){ 3065 .type = (enum rte_flow_action_type) 3066 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3067 .conf = &cp_mreg, 3068 }; 3069 actions[2] = (struct rte_flow_action){ 3070 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3071 .conf = &jump, 3072 }; 3073 actions[3] = (struct rte_flow_action){ 3074 .type = RTE_FLOW_ACTION_TYPE_END, 3075 }; 3076 } else { 3077 /* Default rule, wildcard match. */ 3078 attr.priority = MLX5_FLOW_PRIO_RSVD; 3079 items[0] = (struct rte_flow_item){ 3080 .type = RTE_FLOW_ITEM_TYPE_END, 3081 }; 3082 actions[0] = (struct rte_flow_action){ 3083 .type = (enum rte_flow_action_type) 3084 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3085 .conf = &cp_mreg, 3086 }; 3087 actions[1] = (struct rte_flow_action){ 3088 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3089 .conf = &jump, 3090 }; 3091 actions[2] = (struct rte_flow_action){ 3092 .type = RTE_FLOW_ACTION_TYPE_END, 3093 }; 3094 } 3095 /* Build a new entry. */ 3096 mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx); 3097 if (!mcp_res) { 3098 rte_errno = ENOMEM; 3099 return NULL; 3100 } 3101 mcp_res->idx = idx; 3102 /* 3103 * The copy Flows are not included in any list. There 3104 * ones are referenced from other Flows and can not 3105 * be applied, removed, deleted in ardbitrary order 3106 * by list traversing. 3107 */ 3108 mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items, 3109 actions, false, error); 3110 if (!mcp_res->rix_flow) 3111 goto error; 3112 mcp_res->refcnt++; 3113 mcp_res->hlist_ent.key = mark_id; 3114 ret = mlx5_hlist_insert(priv->mreg_cp_tbl, 3115 &mcp_res->hlist_ent); 3116 MLX5_ASSERT(!ret); 3117 if (ret) 3118 goto error; 3119 return mcp_res; 3120 error: 3121 if (mcp_res->rix_flow) 3122 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3123 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3124 return NULL; 3125 } 3126 3127 /** 3128 * Release flow in RX_CP_TBL. 3129 * 3130 * @param dev 3131 * Pointer to Ethernet device. 3132 * @flow 3133 * Parent flow for wich copying is provided. 3134 */ 3135 static void 3136 flow_mreg_del_copy_action(struct rte_eth_dev *dev, 3137 struct rte_flow *flow) 3138 { 3139 struct mlx5_flow_mreg_copy_resource *mcp_res; 3140 struct mlx5_priv *priv = dev->data->dev_private; 3141 3142 if (!flow->rix_mreg_copy) 3143 return; 3144 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3145 flow->rix_mreg_copy); 3146 if (!mcp_res || !priv->mreg_cp_tbl) 3147 return; 3148 if (flow->copy_applied) { 3149 MLX5_ASSERT(mcp_res->appcnt); 3150 flow->copy_applied = 0; 3151 --mcp_res->appcnt; 3152 if (!mcp_res->appcnt) { 3153 struct rte_flow *mcp_flow = mlx5_ipool_get 3154 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3155 mcp_res->rix_flow); 3156 3157 if (mcp_flow) 3158 flow_drv_remove(dev, mcp_flow); 3159 } 3160 } 3161 /* 3162 * We do not check availability of metadata registers here, 3163 * because copy resources are not allocated in this case. 3164 */ 3165 if (--mcp_res->refcnt) 3166 return; 3167 MLX5_ASSERT(mcp_res->rix_flow); 3168 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3169 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 3170 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3171 flow->rix_mreg_copy = 0; 3172 } 3173 3174 /** 3175 * Start flow in RX_CP_TBL. 3176 * 3177 * @param dev 3178 * Pointer to Ethernet device. 3179 * @flow 3180 * Parent flow for wich copying is provided. 3181 * 3182 * @return 3183 * 0 on success, a negative errno value otherwise and rte_errno is set. 3184 */ 3185 static int 3186 flow_mreg_start_copy_action(struct rte_eth_dev *dev, 3187 struct rte_flow *flow) 3188 { 3189 struct mlx5_flow_mreg_copy_resource *mcp_res; 3190 struct mlx5_priv *priv = dev->data->dev_private; 3191 int ret; 3192 3193 if (!flow->rix_mreg_copy || flow->copy_applied) 3194 return 0; 3195 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3196 flow->rix_mreg_copy); 3197 if (!mcp_res) 3198 return 0; 3199 if (!mcp_res->appcnt) { 3200 struct rte_flow *mcp_flow = mlx5_ipool_get 3201 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3202 mcp_res->rix_flow); 3203 3204 if (mcp_flow) { 3205 ret = flow_drv_apply(dev, mcp_flow, NULL); 3206 if (ret) 3207 return ret; 3208 } 3209 } 3210 ++mcp_res->appcnt; 3211 flow->copy_applied = 1; 3212 return 0; 3213 } 3214 3215 /** 3216 * Stop flow in RX_CP_TBL. 3217 * 3218 * @param dev 3219 * Pointer to Ethernet device. 3220 * @flow 3221 * Parent flow for wich copying is provided. 3222 */ 3223 static void 3224 flow_mreg_stop_copy_action(struct rte_eth_dev *dev, 3225 struct rte_flow *flow) 3226 { 3227 struct mlx5_flow_mreg_copy_resource *mcp_res; 3228 struct mlx5_priv *priv = dev->data->dev_private; 3229 3230 if (!flow->rix_mreg_copy || !flow->copy_applied) 3231 return; 3232 mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], 3233 flow->rix_mreg_copy); 3234 if (!mcp_res) 3235 return; 3236 MLX5_ASSERT(mcp_res->appcnt); 3237 --mcp_res->appcnt; 3238 flow->copy_applied = 0; 3239 if (!mcp_res->appcnt) { 3240 struct rte_flow *mcp_flow = mlx5_ipool_get 3241 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 3242 mcp_res->rix_flow); 3243 3244 if (mcp_flow) 3245 flow_drv_remove(dev, mcp_flow); 3246 } 3247 } 3248 3249 /** 3250 * Remove the default copy action from RX_CP_TBL. 3251 * 3252 * @param dev 3253 * Pointer to Ethernet device. 3254 */ 3255 static void 3256 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev) 3257 { 3258 struct mlx5_flow_mreg_copy_resource *mcp_res; 3259 struct mlx5_priv *priv = dev->data->dev_private; 3260 3261 /* Check if default flow is registered. */ 3262 if (!priv->mreg_cp_tbl) 3263 return; 3264 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, 3265 MLX5_DEFAULT_COPY_ID); 3266 if (!mcp_res) 3267 return; 3268 MLX5_ASSERT(mcp_res->rix_flow); 3269 flow_list_destroy(dev, NULL, mcp_res->rix_flow); 3270 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent); 3271 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx); 3272 } 3273 3274 /** 3275 * Add the default copy action in in RX_CP_TBL. 3276 * 3277 * @param dev 3278 * Pointer to Ethernet device. 3279 * @param[out] error 3280 * Perform verbose error reporting if not NULL. 3281 * 3282 * @return 3283 * 0 for success, negative value otherwise and rte_errno is set. 3284 */ 3285 static int 3286 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev, 3287 struct rte_flow_error *error) 3288 { 3289 struct mlx5_priv *priv = dev->data->dev_private; 3290 struct mlx5_flow_mreg_copy_resource *mcp_res; 3291 3292 /* Check whether extensive metadata feature is engaged. */ 3293 if (!priv->config.dv_flow_en || 3294 priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3295 !mlx5_flow_ext_mreg_supported(dev) || 3296 !priv->sh->dv_regc0_mask) 3297 return 0; 3298 mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error); 3299 if (!mcp_res) 3300 return -rte_errno; 3301 return 0; 3302 } 3303 3304 /** 3305 * Add a flow of copying flow metadata registers in RX_CP_TBL. 3306 * 3307 * All the flow having Q/RSS action should be split by 3308 * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL 3309 * performs the following, 3310 * - CQE->flow_tag := reg_c[1] (MARK) 3311 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 3312 * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1] 3313 * but there should be a flow per each MARK ID set by MARK action. 3314 * 3315 * For the aforementioned reason, if there's a MARK action in flow's action 3316 * list, a corresponding flow should be added to the RX_CP_TBL in order to copy 3317 * the MARK ID to CQE's flow_tag like, 3318 * - If reg_c[1] is mark_id, 3319 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3320 * 3321 * For SET_META action which stores value in reg_c[0], as the destination is 3322 * also a flow metadata register (reg_b), adding a default flow is enough. Zero 3323 * MARK ID means the default flow. The default flow looks like, 3324 * - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL. 3325 * 3326 * @param dev 3327 * Pointer to Ethernet device. 3328 * @param flow 3329 * Pointer to flow structure. 3330 * @param[in] actions 3331 * Pointer to the list of actions. 3332 * @param[out] error 3333 * Perform verbose error reporting if not NULL. 3334 * 3335 * @return 3336 * 0 on success, negative value otherwise and rte_errno is set. 3337 */ 3338 static int 3339 flow_mreg_update_copy_table(struct rte_eth_dev *dev, 3340 struct rte_flow *flow, 3341 const struct rte_flow_action *actions, 3342 struct rte_flow_error *error) 3343 { 3344 struct mlx5_priv *priv = dev->data->dev_private; 3345 struct mlx5_dev_config *config = &priv->config; 3346 struct mlx5_flow_mreg_copy_resource *mcp_res; 3347 const struct rte_flow_action_mark *mark; 3348 3349 /* Check whether extensive metadata feature is engaged. */ 3350 if (!config->dv_flow_en || 3351 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3352 !mlx5_flow_ext_mreg_supported(dev) || 3353 !priv->sh->dv_regc0_mask) 3354 return 0; 3355 /* Find MARK action. */ 3356 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3357 switch (actions->type) { 3358 case RTE_FLOW_ACTION_TYPE_FLAG: 3359 mcp_res = flow_mreg_add_copy_action 3360 (dev, MLX5_FLOW_MARK_DEFAULT, error); 3361 if (!mcp_res) 3362 return -rte_errno; 3363 flow->rix_mreg_copy = mcp_res->idx; 3364 if (dev->data->dev_started) { 3365 mcp_res->appcnt++; 3366 flow->copy_applied = 1; 3367 } 3368 return 0; 3369 case RTE_FLOW_ACTION_TYPE_MARK: 3370 mark = (const struct rte_flow_action_mark *) 3371 actions->conf; 3372 mcp_res = 3373 flow_mreg_add_copy_action(dev, mark->id, error); 3374 if (!mcp_res) 3375 return -rte_errno; 3376 flow->rix_mreg_copy = mcp_res->idx; 3377 if (dev->data->dev_started) { 3378 mcp_res->appcnt++; 3379 flow->copy_applied = 1; 3380 } 3381 return 0; 3382 default: 3383 break; 3384 } 3385 } 3386 return 0; 3387 } 3388 3389 #define MLX5_MAX_SPLIT_ACTIONS 24 3390 #define MLX5_MAX_SPLIT_ITEMS 24 3391 3392 /** 3393 * Split the hairpin flow. 3394 * Since HW can't support encap and push-vlan on Rx, we move these 3395 * actions to Tx. 3396 * If the count action is after the encap then we also 3397 * move the count action. in this case the count will also measure 3398 * the outer bytes. 3399 * 3400 * @param dev 3401 * Pointer to Ethernet device. 3402 * @param[in] actions 3403 * Associated actions (list terminated by the END action). 3404 * @param[out] actions_rx 3405 * Rx flow actions. 3406 * @param[out] actions_tx 3407 * Tx flow actions.. 3408 * @param[out] pattern_tx 3409 * The pattern items for the Tx flow. 3410 * @param[out] flow_id 3411 * The flow ID connected to this flow. 3412 * 3413 * @return 3414 * 0 on success. 3415 */ 3416 static int 3417 flow_hairpin_split(struct rte_eth_dev *dev, 3418 const struct rte_flow_action actions[], 3419 struct rte_flow_action actions_rx[], 3420 struct rte_flow_action actions_tx[], 3421 struct rte_flow_item pattern_tx[], 3422 uint32_t *flow_id) 3423 { 3424 struct mlx5_priv *priv = dev->data->dev_private; 3425 const struct rte_flow_action_raw_encap *raw_encap; 3426 const struct rte_flow_action_raw_decap *raw_decap; 3427 struct mlx5_rte_flow_action_set_tag *set_tag; 3428 struct rte_flow_action *tag_action; 3429 struct mlx5_rte_flow_item_tag *tag_item; 3430 struct rte_flow_item *item; 3431 char *addr; 3432 int encap = 0; 3433 3434 mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id); 3435 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3436 switch (actions->type) { 3437 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: 3438 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: 3439 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 3440 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 3441 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: 3442 rte_memcpy(actions_tx, actions, 3443 sizeof(struct rte_flow_action)); 3444 actions_tx++; 3445 break; 3446 case RTE_FLOW_ACTION_TYPE_COUNT: 3447 if (encap) { 3448 rte_memcpy(actions_tx, actions, 3449 sizeof(struct rte_flow_action)); 3450 actions_tx++; 3451 } else { 3452 rte_memcpy(actions_rx, actions, 3453 sizeof(struct rte_flow_action)); 3454 actions_rx++; 3455 } 3456 break; 3457 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3458 raw_encap = actions->conf; 3459 if (raw_encap->size > 3460 (sizeof(struct rte_flow_item_eth) + 3461 sizeof(struct rte_flow_item_ipv4))) { 3462 memcpy(actions_tx, actions, 3463 sizeof(struct rte_flow_action)); 3464 actions_tx++; 3465 encap = 1; 3466 } else { 3467 rte_memcpy(actions_rx, actions, 3468 sizeof(struct rte_flow_action)); 3469 actions_rx++; 3470 } 3471 break; 3472 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3473 raw_decap = actions->conf; 3474 if (raw_decap->size < 3475 (sizeof(struct rte_flow_item_eth) + 3476 sizeof(struct rte_flow_item_ipv4))) { 3477 memcpy(actions_tx, actions, 3478 sizeof(struct rte_flow_action)); 3479 actions_tx++; 3480 } else { 3481 rte_memcpy(actions_rx, actions, 3482 sizeof(struct rte_flow_action)); 3483 actions_rx++; 3484 } 3485 break; 3486 default: 3487 rte_memcpy(actions_rx, actions, 3488 sizeof(struct rte_flow_action)); 3489 actions_rx++; 3490 break; 3491 } 3492 } 3493 /* Add set meta action and end action for the Rx flow. */ 3494 tag_action = actions_rx; 3495 tag_action->type = (enum rte_flow_action_type) 3496 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 3497 actions_rx++; 3498 rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action)); 3499 actions_rx++; 3500 set_tag = (void *)actions_rx; 3501 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL); 3502 MLX5_ASSERT(set_tag->id > REG_NONE); 3503 set_tag->data = *flow_id; 3504 tag_action->conf = set_tag; 3505 /* Create Tx item list. */ 3506 rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action)); 3507 addr = (void *)&pattern_tx[2]; 3508 item = pattern_tx; 3509 item->type = (enum rte_flow_item_type) 3510 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 3511 tag_item = (void *)addr; 3512 tag_item->data = *flow_id; 3513 tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL); 3514 MLX5_ASSERT(set_tag->id > REG_NONE); 3515 item->spec = tag_item; 3516 addr += sizeof(struct mlx5_rte_flow_item_tag); 3517 tag_item = (void *)addr; 3518 tag_item->data = UINT32_MAX; 3519 tag_item->id = UINT16_MAX; 3520 item->mask = tag_item; 3521 addr += sizeof(struct mlx5_rte_flow_item_tag); 3522 item->last = NULL; 3523 item++; 3524 item->type = RTE_FLOW_ITEM_TYPE_END; 3525 return 0; 3526 } 3527 3528 /** 3529 * The last stage of splitting chain, just creates the subflow 3530 * without any modification. 3531 * 3532 * @param[in] dev 3533 * Pointer to Ethernet device. 3534 * @param[in] flow 3535 * Parent flow structure pointer. 3536 * @param[in, out] sub_flow 3537 * Pointer to return the created subflow, may be NULL. 3538 * @param[in] prefix_layers 3539 * Prefix subflow layers, may be 0. 3540 * @param[in] attr 3541 * Flow rule attributes. 3542 * @param[in] items 3543 * Pattern specification (list terminated by the END pattern item). 3544 * @param[in] actions 3545 * Associated actions (list terminated by the END action). 3546 * @param[in] external 3547 * This flow rule is created by request external to PMD. 3548 * @param[in] flow_idx 3549 * This memory pool index to the flow. 3550 * @param[out] error 3551 * Perform verbose error reporting if not NULL. 3552 * @return 3553 * 0 on success, negative value otherwise 3554 */ 3555 static int 3556 flow_create_split_inner(struct rte_eth_dev *dev, 3557 struct rte_flow *flow, 3558 struct mlx5_flow **sub_flow, 3559 uint64_t prefix_layers, 3560 const struct rte_flow_attr *attr, 3561 const struct rte_flow_item items[], 3562 const struct rte_flow_action actions[], 3563 bool external, uint32_t flow_idx, 3564 struct rte_flow_error *error) 3565 { 3566 struct mlx5_flow *dev_flow; 3567 3568 dev_flow = flow_drv_prepare(dev, flow, attr, items, actions, 3569 flow_idx, error); 3570 if (!dev_flow) 3571 return -rte_errno; 3572 dev_flow->flow = flow; 3573 dev_flow->external = external; 3574 /* Subflow object was created, we must include one in the list. */ 3575 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 3576 dev_flow->handle, next); 3577 /* 3578 * If dev_flow is as one of the suffix flow, some actions in suffix 3579 * flow may need some user defined item layer flags. 3580 */ 3581 if (prefix_layers) 3582 dev_flow->handle->layers = prefix_layers; 3583 if (sub_flow) 3584 *sub_flow = dev_flow; 3585 return flow_drv_translate(dev, dev_flow, attr, items, actions, error); 3586 } 3587 3588 /** 3589 * Split the meter flow. 3590 * 3591 * As meter flow will split to three sub flow, other than meter 3592 * action, the other actions make sense to only meter accepts 3593 * the packet. If it need to be dropped, no other additional 3594 * actions should be take. 3595 * 3596 * One kind of special action which decapsulates the L3 tunnel 3597 * header will be in the prefix sub flow, as not to take the 3598 * L3 tunnel header into account. 3599 * 3600 * @param dev 3601 * Pointer to Ethernet device. 3602 * @param[in] items 3603 * Pattern specification (list terminated by the END pattern item). 3604 * @param[out] sfx_items 3605 * Suffix flow match items (list terminated by the END pattern item). 3606 * @param[in] actions 3607 * Associated actions (list terminated by the END action). 3608 * @param[out] actions_sfx 3609 * Suffix flow actions. 3610 * @param[out] actions_pre 3611 * Prefix flow actions. 3612 * @param[out] pattern_sfx 3613 * The pattern items for the suffix flow. 3614 * @param[out] tag_sfx 3615 * Pointer to suffix flow tag. 3616 * 3617 * @return 3618 * 0 on success. 3619 */ 3620 static int 3621 flow_meter_split_prep(struct rte_eth_dev *dev, 3622 const struct rte_flow_item items[], 3623 struct rte_flow_item sfx_items[], 3624 const struct rte_flow_action actions[], 3625 struct rte_flow_action actions_sfx[], 3626 struct rte_flow_action actions_pre[]) 3627 { 3628 struct rte_flow_action *tag_action = NULL; 3629 struct rte_flow_item *tag_item; 3630 struct mlx5_rte_flow_action_set_tag *set_tag; 3631 struct rte_flow_error error; 3632 const struct rte_flow_action_raw_encap *raw_encap; 3633 const struct rte_flow_action_raw_decap *raw_decap; 3634 struct mlx5_rte_flow_item_tag *tag_spec; 3635 struct mlx5_rte_flow_item_tag *tag_mask; 3636 uint32_t tag_id; 3637 bool copy_vlan = false; 3638 3639 /* Prepare the actions for prefix and suffix flow. */ 3640 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { 3641 struct rte_flow_action **action_cur = NULL; 3642 3643 switch (actions->type) { 3644 case RTE_FLOW_ACTION_TYPE_METER: 3645 /* Add the extra tag action first. */ 3646 tag_action = actions_pre; 3647 tag_action->type = (enum rte_flow_action_type) 3648 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 3649 actions_pre++; 3650 action_cur = &actions_pre; 3651 break; 3652 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: 3653 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP: 3654 action_cur = &actions_pre; 3655 break; 3656 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: 3657 raw_encap = actions->conf; 3658 if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE) 3659 action_cur = &actions_pre; 3660 break; 3661 case RTE_FLOW_ACTION_TYPE_RAW_DECAP: 3662 raw_decap = actions->conf; 3663 if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE) 3664 action_cur = &actions_pre; 3665 break; 3666 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: 3667 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: 3668 copy_vlan = true; 3669 break; 3670 default: 3671 break; 3672 } 3673 if (!action_cur) 3674 action_cur = &actions_sfx; 3675 memcpy(*action_cur, actions, sizeof(struct rte_flow_action)); 3676 (*action_cur)++; 3677 } 3678 /* Add end action to the actions. */ 3679 actions_sfx->type = RTE_FLOW_ACTION_TYPE_END; 3680 actions_pre->type = RTE_FLOW_ACTION_TYPE_END; 3681 actions_pre++; 3682 /* Set the tag. */ 3683 set_tag = (void *)actions_pre; 3684 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); 3685 /* 3686 * Get the id from the qrss_pool to make qrss share the id with meter. 3687 */ 3688 tag_id = flow_qrss_get_id(dev); 3689 set_tag->data = tag_id << MLX5_MTR_COLOR_BITS; 3690 assert(tag_action); 3691 tag_action->conf = set_tag; 3692 /* Prepare the suffix subflow items. */ 3693 tag_item = sfx_items++; 3694 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { 3695 int item_type = items->type; 3696 3697 switch (item_type) { 3698 case RTE_FLOW_ITEM_TYPE_PORT_ID: 3699 memcpy(sfx_items, items, sizeof(*sfx_items)); 3700 sfx_items++; 3701 break; 3702 case RTE_FLOW_ITEM_TYPE_VLAN: 3703 if (copy_vlan) { 3704 memcpy(sfx_items, items, sizeof(*sfx_items)); 3705 /* 3706 * Convert to internal match item, it is used 3707 * for vlan push and set vid. 3708 */ 3709 sfx_items->type = (enum rte_flow_item_type) 3710 MLX5_RTE_FLOW_ITEM_TYPE_VLAN; 3711 sfx_items++; 3712 } 3713 break; 3714 default: 3715 break; 3716 } 3717 } 3718 sfx_items->type = RTE_FLOW_ITEM_TYPE_END; 3719 sfx_items++; 3720 tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items; 3721 tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS; 3722 tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); 3723 tag_mask = tag_spec + 1; 3724 tag_mask->data = 0xffffff00; 3725 tag_item->type = (enum rte_flow_item_type) 3726 MLX5_RTE_FLOW_ITEM_TYPE_TAG; 3727 tag_item->spec = tag_spec; 3728 tag_item->last = NULL; 3729 tag_item->mask = tag_mask; 3730 return tag_id; 3731 } 3732 3733 /** 3734 * Split action list having QUEUE/RSS for metadata register copy. 3735 * 3736 * Once Q/RSS action is detected in user's action list, the flow action 3737 * should be split in order to copy metadata registers, which will happen in 3738 * RX_CP_TBL like, 3739 * - CQE->flow_tag := reg_c[1] (MARK) 3740 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META) 3741 * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL. 3742 * This is because the last action of each flow must be a terminal action 3743 * (QUEUE, RSS or DROP). 3744 * 3745 * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is 3746 * stored and kept in the mlx5_flow structure per each sub_flow. 3747 * 3748 * The Q/RSS action is replaced with, 3749 * - SET_TAG, setting the allocated flow ID to reg_c[2]. 3750 * And the following JUMP action is added at the end, 3751 * - JUMP, to RX_CP_TBL. 3752 * 3753 * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by 3754 * flow_create_split_metadata() routine. The flow will look like, 3755 * - If flow ID matches (reg_c[2]), perform Q/RSS. 3756 * 3757 * @param dev 3758 * Pointer to Ethernet device. 3759 * @param[out] split_actions 3760 * Pointer to store split actions to jump to CP_TBL. 3761 * @param[in] actions 3762 * Pointer to the list of original flow actions. 3763 * @param[in] qrss 3764 * Pointer to the Q/RSS action. 3765 * @param[in] actions_n 3766 * Number of original actions. 3767 * @param[out] error 3768 * Perform verbose error reporting if not NULL. 3769 * 3770 * @return 3771 * non-zero unique flow_id on success, otherwise 0 and 3772 * error/rte_error are set. 3773 */ 3774 static uint32_t 3775 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev, 3776 struct rte_flow_action *split_actions, 3777 const struct rte_flow_action *actions, 3778 const struct rte_flow_action *qrss, 3779 int actions_n, struct rte_flow_error *error) 3780 { 3781 struct mlx5_rte_flow_action_set_tag *set_tag; 3782 struct rte_flow_action_jump *jump; 3783 const int qrss_idx = qrss - actions; 3784 uint32_t flow_id = 0; 3785 int ret = 0; 3786 3787 /* 3788 * Given actions will be split 3789 * - Replace QUEUE/RSS action with SET_TAG to set flow ID. 3790 * - Add jump to mreg CP_TBL. 3791 * As a result, there will be one more action. 3792 */ 3793 ++actions_n; 3794 memcpy(split_actions, actions, sizeof(*split_actions) * actions_n); 3795 set_tag = (void *)(split_actions + actions_n); 3796 /* 3797 * If tag action is not set to void(it means we are not the meter 3798 * suffix flow), add the tag action. Since meter suffix flow already 3799 * has the tag added. 3800 */ 3801 if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) { 3802 /* 3803 * Allocate the new subflow ID. This one is unique within 3804 * device and not shared with representors. Otherwise, 3805 * we would have to resolve multi-thread access synch 3806 * issue. Each flow on the shared device is appended 3807 * with source vport identifier, so the resulting 3808 * flows will be unique in the shared (by master and 3809 * representors) domain even if they have coinciding 3810 * IDs. 3811 */ 3812 flow_id = flow_qrss_get_id(dev); 3813 if (!flow_id) 3814 return rte_flow_error_set(error, ENOMEM, 3815 RTE_FLOW_ERROR_TYPE_ACTION, 3816 NULL, "can't allocate id " 3817 "for split Q/RSS subflow"); 3818 /* Internal SET_TAG action to set flow ID. */ 3819 *set_tag = (struct mlx5_rte_flow_action_set_tag){ 3820 .data = flow_id, 3821 }; 3822 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error); 3823 if (ret < 0) 3824 return ret; 3825 set_tag->id = ret; 3826 /* Construct new actions array. */ 3827 /* Replace QUEUE/RSS action. */ 3828 split_actions[qrss_idx] = (struct rte_flow_action){ 3829 .type = (enum rte_flow_action_type) 3830 MLX5_RTE_FLOW_ACTION_TYPE_TAG, 3831 .conf = set_tag, 3832 }; 3833 } 3834 /* JUMP action to jump to mreg copy table (CP_TBL). */ 3835 jump = (void *)(set_tag + 1); 3836 *jump = (struct rte_flow_action_jump){ 3837 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 3838 }; 3839 split_actions[actions_n - 2] = (struct rte_flow_action){ 3840 .type = RTE_FLOW_ACTION_TYPE_JUMP, 3841 .conf = jump, 3842 }; 3843 split_actions[actions_n - 1] = (struct rte_flow_action){ 3844 .type = RTE_FLOW_ACTION_TYPE_END, 3845 }; 3846 return flow_id; 3847 } 3848 3849 /** 3850 * Extend the given action list for Tx metadata copy. 3851 * 3852 * Copy the given action list to the ext_actions and add flow metadata register 3853 * copy action in order to copy reg_a set by WQE to reg_c[0]. 3854 * 3855 * @param[out] ext_actions 3856 * Pointer to the extended action list. 3857 * @param[in] actions 3858 * Pointer to the list of actions. 3859 * @param[in] actions_n 3860 * Number of actions in the list. 3861 * @param[out] error 3862 * Perform verbose error reporting if not NULL. 3863 * @param[in] encap_idx 3864 * The encap action inndex. 3865 * 3866 * @return 3867 * 0 on success, negative value otherwise 3868 */ 3869 static int 3870 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev, 3871 struct rte_flow_action *ext_actions, 3872 const struct rte_flow_action *actions, 3873 int actions_n, struct rte_flow_error *error, 3874 int encap_idx) 3875 { 3876 struct mlx5_flow_action_copy_mreg *cp_mreg = 3877 (struct mlx5_flow_action_copy_mreg *) 3878 (ext_actions + actions_n + 1); 3879 int ret; 3880 3881 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error); 3882 if (ret < 0) 3883 return ret; 3884 cp_mreg->dst = ret; 3885 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error); 3886 if (ret < 0) 3887 return ret; 3888 cp_mreg->src = ret; 3889 if (encap_idx != 0) 3890 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx); 3891 if (encap_idx == actions_n - 1) { 3892 ext_actions[actions_n - 1] = (struct rte_flow_action){ 3893 .type = (enum rte_flow_action_type) 3894 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3895 .conf = cp_mreg, 3896 }; 3897 ext_actions[actions_n] = (struct rte_flow_action){ 3898 .type = RTE_FLOW_ACTION_TYPE_END, 3899 }; 3900 } else { 3901 ext_actions[encap_idx] = (struct rte_flow_action){ 3902 .type = (enum rte_flow_action_type) 3903 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 3904 .conf = cp_mreg, 3905 }; 3906 memcpy(ext_actions + encap_idx + 1, actions + encap_idx, 3907 sizeof(*ext_actions) * (actions_n - encap_idx)); 3908 } 3909 return 0; 3910 } 3911 3912 /** 3913 * The splitting for metadata feature. 3914 * 3915 * - Q/RSS action on NIC Rx should be split in order to pass by 3916 * the mreg copy table (RX_CP_TBL) and then it jumps to the 3917 * action table (RX_ACT_TBL) which has the split Q/RSS action. 3918 * 3919 * - All the actions on NIC Tx should have a mreg copy action to 3920 * copy reg_a from WQE to reg_c[0]. 3921 * 3922 * @param dev 3923 * Pointer to Ethernet device. 3924 * @param[in] flow 3925 * Parent flow structure pointer. 3926 * @param[in] prefix_layers 3927 * Prefix flow layer flags. 3928 * @param[in] attr 3929 * Flow rule attributes. 3930 * @param[in] items 3931 * Pattern specification (list terminated by the END pattern item). 3932 * @param[in] actions 3933 * Associated actions (list terminated by the END action). 3934 * @param[in] external 3935 * This flow rule is created by request external to PMD. 3936 * @param[in] flow_idx 3937 * This memory pool index to the flow. 3938 * @param[out] error 3939 * Perform verbose error reporting if not NULL. 3940 * @return 3941 * 0 on success, negative value otherwise 3942 */ 3943 static int 3944 flow_create_split_metadata(struct rte_eth_dev *dev, 3945 struct rte_flow *flow, 3946 uint64_t prefix_layers, 3947 const struct rte_flow_attr *attr, 3948 const struct rte_flow_item items[], 3949 const struct rte_flow_action actions[], 3950 bool external, uint32_t flow_idx, 3951 struct rte_flow_error *error) 3952 { 3953 struct mlx5_priv *priv = dev->data->dev_private; 3954 struct mlx5_dev_config *config = &priv->config; 3955 const struct rte_flow_action *qrss = NULL; 3956 struct rte_flow_action *ext_actions = NULL; 3957 struct mlx5_flow *dev_flow = NULL; 3958 uint32_t qrss_id = 0; 3959 int mtr_sfx = 0; 3960 size_t act_size; 3961 int actions_n; 3962 int encap_idx; 3963 int ret; 3964 3965 /* Check whether extensive metadata feature is engaged. */ 3966 if (!config->dv_flow_en || 3967 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || 3968 !mlx5_flow_ext_mreg_supported(dev)) 3969 return flow_create_split_inner(dev, flow, NULL, prefix_layers, 3970 attr, items, actions, external, 3971 flow_idx, error); 3972 actions_n = flow_parse_metadata_split_actions_info(actions, &qrss, 3973 &encap_idx); 3974 if (qrss) { 3975 /* Exclude hairpin flows from splitting. */ 3976 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) { 3977 const struct rte_flow_action_queue *queue; 3978 3979 queue = qrss->conf; 3980 if (mlx5_rxq_get_type(dev, queue->index) == 3981 MLX5_RXQ_TYPE_HAIRPIN) 3982 qrss = NULL; 3983 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) { 3984 const struct rte_flow_action_rss *rss; 3985 3986 rss = qrss->conf; 3987 if (mlx5_rxq_get_type(dev, rss->queue[0]) == 3988 MLX5_RXQ_TYPE_HAIRPIN) 3989 qrss = NULL; 3990 } 3991 } 3992 if (qrss) { 3993 /* Check if it is in meter suffix table. */ 3994 mtr_sfx = attr->group == (attr->transfer ? 3995 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : 3996 MLX5_FLOW_TABLE_LEVEL_SUFFIX); 3997 /* 3998 * Q/RSS action on NIC Rx should be split in order to pass by 3999 * the mreg copy table (RX_CP_TBL) and then it jumps to the 4000 * action table (RX_ACT_TBL) which has the split Q/RSS action. 4001 */ 4002 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 4003 sizeof(struct rte_flow_action_set_tag) + 4004 sizeof(struct rte_flow_action_jump); 4005 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0, 4006 SOCKET_ID_ANY); 4007 if (!ext_actions) 4008 return rte_flow_error_set(error, ENOMEM, 4009 RTE_FLOW_ERROR_TYPE_ACTION, 4010 NULL, "no memory to split " 4011 "metadata flow"); 4012 /* 4013 * If we are the suffix flow of meter, tag already exist. 4014 * Set the tag action to void. 4015 */ 4016 if (mtr_sfx) 4017 ext_actions[qrss - actions].type = 4018 RTE_FLOW_ACTION_TYPE_VOID; 4019 else 4020 ext_actions[qrss - actions].type = 4021 (enum rte_flow_action_type) 4022 MLX5_RTE_FLOW_ACTION_TYPE_TAG; 4023 /* 4024 * Create the new actions list with removed Q/RSS action 4025 * and appended set tag and jump to register copy table 4026 * (RX_CP_TBL). We should preallocate unique tag ID here 4027 * in advance, because it is needed for set tag action. 4028 */ 4029 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions, 4030 qrss, actions_n, error); 4031 if (!mtr_sfx && !qrss_id) { 4032 ret = -rte_errno; 4033 goto exit; 4034 } 4035 } else if (attr->egress && !attr->transfer) { 4036 /* 4037 * All the actions on NIC Tx should have a metadata register 4038 * copy action to copy reg_a from WQE to reg_c[meta] 4039 */ 4040 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + 4041 sizeof(struct mlx5_flow_action_copy_mreg); 4042 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0, 4043 SOCKET_ID_ANY); 4044 if (!ext_actions) 4045 return rte_flow_error_set(error, ENOMEM, 4046 RTE_FLOW_ERROR_TYPE_ACTION, 4047 NULL, "no memory to split " 4048 "metadata flow"); 4049 /* Create the action list appended with copy register. */ 4050 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions, 4051 actions_n, error, encap_idx); 4052 if (ret < 0) 4053 goto exit; 4054 } 4055 /* Add the unmodified original or prefix subflow. */ 4056 ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, attr, 4057 items, ext_actions ? ext_actions : 4058 actions, external, flow_idx, error); 4059 if (ret < 0) 4060 goto exit; 4061 MLX5_ASSERT(dev_flow); 4062 if (qrss) { 4063 const struct rte_flow_attr q_attr = { 4064 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 4065 .ingress = 1, 4066 }; 4067 /* Internal PMD action to set register. */ 4068 struct mlx5_rte_flow_item_tag q_tag_spec = { 4069 .data = qrss_id, 4070 .id = 0, 4071 }; 4072 struct rte_flow_item q_items[] = { 4073 { 4074 .type = (enum rte_flow_item_type) 4075 MLX5_RTE_FLOW_ITEM_TYPE_TAG, 4076 .spec = &q_tag_spec, 4077 .last = NULL, 4078 .mask = NULL, 4079 }, 4080 { 4081 .type = RTE_FLOW_ITEM_TYPE_END, 4082 }, 4083 }; 4084 struct rte_flow_action q_actions[] = { 4085 { 4086 .type = qrss->type, 4087 .conf = qrss->conf, 4088 }, 4089 { 4090 .type = RTE_FLOW_ACTION_TYPE_END, 4091 }, 4092 }; 4093 uint64_t layers = flow_get_prefix_layer_flags(dev_flow); 4094 4095 /* 4096 * Configure the tag item only if there is no meter subflow. 4097 * Since tag is already marked in the meter suffix subflow 4098 * we can just use the meter suffix items as is. 4099 */ 4100 if (qrss_id) { 4101 /* Not meter subflow. */ 4102 MLX5_ASSERT(!mtr_sfx); 4103 /* 4104 * Put unique id in prefix flow due to it is destroyed 4105 * after suffix flow and id will be freed after there 4106 * is no actual flows with this id and identifier 4107 * reallocation becomes possible (for example, for 4108 * other flows in other threads). 4109 */ 4110 dev_flow->handle->split_flow_id = qrss_id; 4111 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, 4112 error); 4113 if (ret < 0) 4114 goto exit; 4115 q_tag_spec.id = ret; 4116 } 4117 dev_flow = NULL; 4118 /* Add suffix subflow to execute Q/RSS. */ 4119 ret = flow_create_split_inner(dev, flow, &dev_flow, layers, 4120 &q_attr, mtr_sfx ? items : 4121 q_items, q_actions, 4122 external, flow_idx, error); 4123 if (ret < 0) 4124 goto exit; 4125 /* qrss ID should be freed if failed. */ 4126 qrss_id = 0; 4127 MLX5_ASSERT(dev_flow); 4128 } 4129 4130 exit: 4131 /* 4132 * We do not destroy the partially created sub_flows in case of error. 4133 * These ones are included into parent flow list and will be destroyed 4134 * by flow_drv_destroy. 4135 */ 4136 flow_qrss_free_id(dev, qrss_id); 4137 mlx5_free(ext_actions); 4138 return ret; 4139 } 4140 4141 /** 4142 * The splitting for meter feature. 4143 * 4144 * - The meter flow will be split to two flows as prefix and 4145 * suffix flow. The packets make sense only it pass the prefix 4146 * meter action. 4147 * 4148 * - Reg_C_5 is used for the packet to match betweend prefix and 4149 * suffix flow. 4150 * 4151 * @param dev 4152 * Pointer to Ethernet device. 4153 * @param[in] flow 4154 * Parent flow structure pointer. 4155 * @param[in] attr 4156 * Flow rule attributes. 4157 * @param[in] items 4158 * Pattern specification (list terminated by the END pattern item). 4159 * @param[in] actions 4160 * Associated actions (list terminated by the END action). 4161 * @param[in] external 4162 * This flow rule is created by request external to PMD. 4163 * @param[in] flow_idx 4164 * This memory pool index to the flow. 4165 * @param[out] error 4166 * Perform verbose error reporting if not NULL. 4167 * @return 4168 * 0 on success, negative value otherwise 4169 */ 4170 static int 4171 flow_create_split_meter(struct rte_eth_dev *dev, 4172 struct rte_flow *flow, 4173 const struct rte_flow_attr *attr, 4174 const struct rte_flow_item items[], 4175 const struct rte_flow_action actions[], 4176 bool external, uint32_t flow_idx, 4177 struct rte_flow_error *error) 4178 { 4179 struct mlx5_priv *priv = dev->data->dev_private; 4180 struct rte_flow_action *sfx_actions = NULL; 4181 struct rte_flow_action *pre_actions = NULL; 4182 struct rte_flow_item *sfx_items = NULL; 4183 struct mlx5_flow *dev_flow = NULL; 4184 struct rte_flow_attr sfx_attr = *attr; 4185 uint32_t mtr = 0; 4186 uint32_t mtr_tag_id = 0; 4187 size_t act_size; 4188 size_t item_size; 4189 int actions_n = 0; 4190 int ret; 4191 4192 if (priv->mtr_en) 4193 actions_n = flow_check_meter_action(actions, &mtr); 4194 if (mtr) { 4195 /* The five prefix actions: meter, decap, encap, tag, end. */ 4196 act_size = sizeof(struct rte_flow_action) * (actions_n + 5) + 4197 sizeof(struct mlx5_rte_flow_action_set_tag); 4198 /* tag, vlan, port id, end. */ 4199 #define METER_SUFFIX_ITEM 4 4200 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM + 4201 sizeof(struct mlx5_rte_flow_item_tag) * 2; 4202 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size), 4203 0, SOCKET_ID_ANY); 4204 if (!sfx_actions) 4205 return rte_flow_error_set(error, ENOMEM, 4206 RTE_FLOW_ERROR_TYPE_ACTION, 4207 NULL, "no memory to split " 4208 "meter flow"); 4209 sfx_items = (struct rte_flow_item *)((char *)sfx_actions + 4210 act_size); 4211 pre_actions = sfx_actions + actions_n; 4212 mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items, 4213 actions, sfx_actions, 4214 pre_actions); 4215 if (!mtr_tag_id) { 4216 ret = -rte_errno; 4217 goto exit; 4218 } 4219 /* Add the prefix subflow. */ 4220 ret = flow_create_split_inner(dev, flow, &dev_flow, 0, attr, 4221 items, pre_actions, external, 4222 flow_idx, error); 4223 if (ret) { 4224 ret = -rte_errno; 4225 goto exit; 4226 } 4227 dev_flow->handle->split_flow_id = mtr_tag_id; 4228 /* Setting the sfx group atrr. */ 4229 sfx_attr.group = sfx_attr.transfer ? 4230 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : 4231 MLX5_FLOW_TABLE_LEVEL_SUFFIX; 4232 } 4233 /* Add the prefix subflow. */ 4234 ret = flow_create_split_metadata(dev, flow, dev_flow ? 4235 flow_get_prefix_layer_flags(dev_flow) : 4236 0, &sfx_attr, 4237 sfx_items ? sfx_items : items, 4238 sfx_actions ? sfx_actions : actions, 4239 external, flow_idx, error); 4240 exit: 4241 if (sfx_actions) 4242 mlx5_free(sfx_actions); 4243 return ret; 4244 } 4245 4246 /** 4247 * Split the flow to subflow set. The splitters might be linked 4248 * in the chain, like this: 4249 * flow_create_split_outer() calls: 4250 * flow_create_split_meter() calls: 4251 * flow_create_split_metadata(meter_subflow_0) calls: 4252 * flow_create_split_inner(metadata_subflow_0) 4253 * flow_create_split_inner(metadata_subflow_1) 4254 * flow_create_split_inner(metadata_subflow_2) 4255 * flow_create_split_metadata(meter_subflow_1) calls: 4256 * flow_create_split_inner(metadata_subflow_0) 4257 * flow_create_split_inner(metadata_subflow_1) 4258 * flow_create_split_inner(metadata_subflow_2) 4259 * 4260 * This provide flexible way to add new levels of flow splitting. 4261 * The all of successfully created subflows are included to the 4262 * parent flow dev_flow list. 4263 * 4264 * @param dev 4265 * Pointer to Ethernet device. 4266 * @param[in] flow 4267 * Parent flow structure pointer. 4268 * @param[in] attr 4269 * Flow rule attributes. 4270 * @param[in] items 4271 * Pattern specification (list terminated by the END pattern item). 4272 * @param[in] actions 4273 * Associated actions (list terminated by the END action). 4274 * @param[in] external 4275 * This flow rule is created by request external to PMD. 4276 * @param[in] flow_idx 4277 * This memory pool index to the flow. 4278 * @param[out] error 4279 * Perform verbose error reporting if not NULL. 4280 * @return 4281 * 0 on success, negative value otherwise 4282 */ 4283 static int 4284 flow_create_split_outer(struct rte_eth_dev *dev, 4285 struct rte_flow *flow, 4286 const struct rte_flow_attr *attr, 4287 const struct rte_flow_item items[], 4288 const struct rte_flow_action actions[], 4289 bool external, uint32_t flow_idx, 4290 struct rte_flow_error *error) 4291 { 4292 int ret; 4293 4294 ret = flow_create_split_meter(dev, flow, attr, items, 4295 actions, external, flow_idx, error); 4296 MLX5_ASSERT(ret <= 0); 4297 return ret; 4298 } 4299 4300 /** 4301 * Create a flow and add it to @p list. 4302 * 4303 * @param dev 4304 * Pointer to Ethernet device. 4305 * @param list 4306 * Pointer to a TAILQ flow list. If this parameter NULL, 4307 * no list insertion occurred, flow is just created, 4308 * this is caller's responsibility to track the 4309 * created flow. 4310 * @param[in] attr 4311 * Flow rule attributes. 4312 * @param[in] items 4313 * Pattern specification (list terminated by the END pattern item). 4314 * @param[in] actions 4315 * Associated actions (list terminated by the END action). 4316 * @param[in] external 4317 * This flow rule is created by request external to PMD. 4318 * @param[out] error 4319 * Perform verbose error reporting if not NULL. 4320 * 4321 * @return 4322 * A flow index on success, 0 otherwise and rte_errno is set. 4323 */ 4324 static uint32_t 4325 flow_list_create(struct rte_eth_dev *dev, uint32_t *list, 4326 const struct rte_flow_attr *attr, 4327 const struct rte_flow_item items[], 4328 const struct rte_flow_action actions[], 4329 bool external, struct rte_flow_error *error) 4330 { 4331 struct mlx5_priv *priv = dev->data->dev_private; 4332 struct rte_flow *flow = NULL; 4333 struct mlx5_flow *dev_flow; 4334 const struct rte_flow_action_rss *rss; 4335 union { 4336 struct rte_flow_expand_rss buf; 4337 uint8_t buffer[2048]; 4338 } expand_buffer; 4339 union { 4340 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 4341 uint8_t buffer[2048]; 4342 } actions_rx; 4343 union { 4344 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; 4345 uint8_t buffer[2048]; 4346 } actions_hairpin_tx; 4347 union { 4348 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS]; 4349 uint8_t buffer[2048]; 4350 } items_tx; 4351 struct rte_flow_expand_rss *buf = &expand_buffer.buf; 4352 struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *) 4353 priv->rss_desc)[!!priv->flow_idx]; 4354 const struct rte_flow_action *p_actions_rx = actions; 4355 uint32_t i; 4356 uint32_t idx = 0; 4357 int hairpin_flow; 4358 uint32_t hairpin_id = 0; 4359 struct rte_flow_attr attr_tx = { .priority = 0 }; 4360 int ret; 4361 4362 hairpin_flow = flow_check_hairpin_split(dev, attr, actions); 4363 ret = flow_drv_validate(dev, attr, items, p_actions_rx, 4364 external, hairpin_flow, error); 4365 if (ret < 0) 4366 return 0; 4367 if (hairpin_flow > 0) { 4368 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) { 4369 rte_errno = EINVAL; 4370 return 0; 4371 } 4372 flow_hairpin_split(dev, actions, actions_rx.actions, 4373 actions_hairpin_tx.actions, items_tx.items, 4374 &hairpin_id); 4375 p_actions_rx = actions_rx.actions; 4376 } 4377 flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx); 4378 if (!flow) { 4379 rte_errno = ENOMEM; 4380 goto error_before_flow; 4381 } 4382 flow->drv_type = flow_get_drv_type(dev, attr); 4383 if (hairpin_id != 0) 4384 flow->hairpin_flow_id = hairpin_id; 4385 MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN && 4386 flow->drv_type < MLX5_FLOW_TYPE_MAX); 4387 memset(rss_desc, 0, sizeof(*rss_desc)); 4388 rss = flow_get_rss_action(p_actions_rx); 4389 if (rss) { 4390 /* 4391 * The following information is required by 4392 * mlx5_flow_hashfields_adjust() in advance. 4393 */ 4394 rss_desc->level = rss->level; 4395 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */ 4396 rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types; 4397 } 4398 flow->dev_handles = 0; 4399 if (rss && rss->types) { 4400 unsigned int graph_root; 4401 4402 graph_root = find_graph_root(items, rss->level); 4403 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer), 4404 items, rss->types, 4405 mlx5_support_expansion, 4406 graph_root); 4407 MLX5_ASSERT(ret > 0 && 4408 (unsigned int)ret < sizeof(expand_buffer.buffer)); 4409 } else { 4410 buf->entries = 1; 4411 buf->entry[0].pattern = (void *)(uintptr_t)items; 4412 } 4413 /* 4414 * Record the start index when there is a nested call. All sub-flows 4415 * need to be translated before another calling. 4416 * No need to use ping-pong buffer to save memory here. 4417 */ 4418 if (priv->flow_idx) { 4419 MLX5_ASSERT(!priv->flow_nested_idx); 4420 priv->flow_nested_idx = priv->flow_idx; 4421 } 4422 for (i = 0; i < buf->entries; ++i) { 4423 /* 4424 * The splitter may create multiple dev_flows, 4425 * depending on configuration. In the simplest 4426 * case it just creates unmodified original flow. 4427 */ 4428 ret = flow_create_split_outer(dev, flow, attr, 4429 buf->entry[i].pattern, 4430 p_actions_rx, external, idx, 4431 error); 4432 if (ret < 0) 4433 goto error; 4434 } 4435 /* Create the tx flow. */ 4436 if (hairpin_flow) { 4437 attr_tx.group = MLX5_HAIRPIN_TX_TABLE; 4438 attr_tx.ingress = 0; 4439 attr_tx.egress = 1; 4440 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items, 4441 actions_hairpin_tx.actions, 4442 idx, error); 4443 if (!dev_flow) 4444 goto error; 4445 dev_flow->flow = flow; 4446 dev_flow->external = 0; 4447 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, 4448 dev_flow->handle, next); 4449 ret = flow_drv_translate(dev, dev_flow, &attr_tx, 4450 items_tx.items, 4451 actions_hairpin_tx.actions, error); 4452 if (ret < 0) 4453 goto error; 4454 } 4455 /* 4456 * Update the metadata register copy table. If extensive 4457 * metadata feature is enabled and registers are supported 4458 * we might create the extra rte_flow for each unique 4459 * MARK/FLAG action ID. 4460 * 4461 * The table is updated for ingress Flows only, because 4462 * the egress Flows belong to the different device and 4463 * copy table should be updated in peer NIC Rx domain. 4464 */ 4465 if (attr->ingress && 4466 (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) { 4467 ret = flow_mreg_update_copy_table(dev, flow, actions, error); 4468 if (ret) 4469 goto error; 4470 } 4471 /* 4472 * If the flow is external (from application) OR device is started, then 4473 * the flow will be applied immediately. 4474 */ 4475 if (external || dev->data->dev_started) { 4476 ret = flow_drv_apply(dev, flow, error); 4477 if (ret < 0) 4478 goto error; 4479 } 4480 if (list) 4481 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx, 4482 flow, next); 4483 flow_rxq_flags_set(dev, flow); 4484 /* Nested flow creation index recovery. */ 4485 priv->flow_idx = priv->flow_nested_idx; 4486 if (priv->flow_nested_idx) 4487 priv->flow_nested_idx = 0; 4488 return idx; 4489 error: 4490 MLX5_ASSERT(flow); 4491 ret = rte_errno; /* Save rte_errno before cleanup. */ 4492 flow_mreg_del_copy_action(dev, flow); 4493 flow_drv_destroy(dev, flow); 4494 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx); 4495 rte_errno = ret; /* Restore rte_errno. */ 4496 error_before_flow: 4497 ret = rte_errno; 4498 if (hairpin_id) 4499 mlx5_flow_id_release(priv->sh->flow_id_pool, 4500 hairpin_id); 4501 rte_errno = ret; 4502 priv->flow_idx = priv->flow_nested_idx; 4503 if (priv->flow_nested_idx) 4504 priv->flow_nested_idx = 0; 4505 return 0; 4506 } 4507 4508 /** 4509 * Create a dedicated flow rule on e-switch table 0 (root table), to direct all 4510 * incoming packets to table 1. 4511 * 4512 * Other flow rules, requested for group n, will be created in 4513 * e-switch table n+1. 4514 * Jump action to e-switch group n will be created to group n+1. 4515 * 4516 * Used when working in switchdev mode, to utilise advantages of table 1 4517 * and above. 4518 * 4519 * @param dev 4520 * Pointer to Ethernet device. 4521 * 4522 * @return 4523 * Pointer to flow on success, NULL otherwise and rte_errno is set. 4524 */ 4525 struct rte_flow * 4526 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev) 4527 { 4528 const struct rte_flow_attr attr = { 4529 .group = 0, 4530 .priority = 0, 4531 .ingress = 1, 4532 .egress = 0, 4533 .transfer = 1, 4534 }; 4535 const struct rte_flow_item pattern = { 4536 .type = RTE_FLOW_ITEM_TYPE_END, 4537 }; 4538 struct rte_flow_action_jump jump = { 4539 .group = 1, 4540 }; 4541 const struct rte_flow_action actions[] = { 4542 { 4543 .type = RTE_FLOW_ACTION_TYPE_JUMP, 4544 .conf = &jump, 4545 }, 4546 { 4547 .type = RTE_FLOW_ACTION_TYPE_END, 4548 }, 4549 }; 4550 struct mlx5_priv *priv = dev->data->dev_private; 4551 struct rte_flow_error error; 4552 4553 return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows, 4554 &attr, &pattern, 4555 actions, false, &error); 4556 } 4557 4558 /** 4559 * Validate a flow supported by the NIC. 4560 * 4561 * @see rte_flow_validate() 4562 * @see rte_flow_ops 4563 */ 4564 int 4565 mlx5_flow_validate(struct rte_eth_dev *dev, 4566 const struct rte_flow_attr *attr, 4567 const struct rte_flow_item items[], 4568 const struct rte_flow_action actions[], 4569 struct rte_flow_error *error) 4570 { 4571 int hairpin_flow; 4572 4573 hairpin_flow = flow_check_hairpin_split(dev, attr, actions); 4574 return flow_drv_validate(dev, attr, items, actions, 4575 true, hairpin_flow, error); 4576 } 4577 4578 /** 4579 * Create a flow. 4580 * 4581 * @see rte_flow_create() 4582 * @see rte_flow_ops 4583 */ 4584 struct rte_flow * 4585 mlx5_flow_create(struct rte_eth_dev *dev, 4586 const struct rte_flow_attr *attr, 4587 const struct rte_flow_item items[], 4588 const struct rte_flow_action actions[], 4589 struct rte_flow_error *error) 4590 { 4591 struct mlx5_priv *priv = dev->data->dev_private; 4592 4593 /* 4594 * If the device is not started yet, it is not allowed to created a 4595 * flow from application. PMD default flows and traffic control flows 4596 * are not affected. 4597 */ 4598 if (unlikely(!dev->data->dev_started)) { 4599 DRV_LOG(DEBUG, "port %u is not started when " 4600 "inserting a flow", dev->data->port_id); 4601 rte_flow_error_set(error, ENODEV, 4602 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 4603 NULL, 4604 "port not started"); 4605 return NULL; 4606 } 4607 return (void *)(uintptr_t)flow_list_create(dev, &priv->flows, 4608 attr, items, actions, true, error); 4609 } 4610 4611 /** 4612 * Destroy a flow in a list. 4613 * 4614 * @param dev 4615 * Pointer to Ethernet device. 4616 * @param list 4617 * Pointer to the Indexed flow list. If this parameter NULL, 4618 * there is no flow removal from the list. Be noted that as 4619 * flow is add to the indexed list, memory of the indexed 4620 * list points to maybe changed as flow destroyed. 4621 * @param[in] flow_idx 4622 * Index of flow to destroy. 4623 */ 4624 static void 4625 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, 4626 uint32_t flow_idx) 4627 { 4628 struct mlx5_priv *priv = dev->data->dev_private; 4629 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 4630 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 4631 [MLX5_IPOOL_RTE_FLOW], flow_idx); 4632 4633 if (!flow) 4634 return; 4635 /* 4636 * Update RX queue flags only if port is started, otherwise it is 4637 * already clean. 4638 */ 4639 if (dev->data->dev_started) 4640 flow_rxq_flags_trim(dev, flow); 4641 if (flow->hairpin_flow_id) 4642 mlx5_flow_id_release(priv->sh->flow_id_pool, 4643 flow->hairpin_flow_id); 4644 flow_drv_destroy(dev, flow); 4645 if (list) 4646 ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, 4647 flow_idx, flow, next); 4648 flow_mreg_del_copy_action(dev, flow); 4649 if (flow->fdir) { 4650 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 4651 if (priv_fdir_flow->rix_flow == flow_idx) 4652 break; 4653 } 4654 if (priv_fdir_flow) { 4655 LIST_REMOVE(priv_fdir_flow, next); 4656 mlx5_free(priv_fdir_flow->fdir); 4657 mlx5_free(priv_fdir_flow); 4658 } 4659 } 4660 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 4661 } 4662 4663 /** 4664 * Destroy all flows. 4665 * 4666 * @param dev 4667 * Pointer to Ethernet device. 4668 * @param list 4669 * Pointer to the Indexed flow list. 4670 * @param active 4671 * If flushing is called avtively. 4672 */ 4673 void 4674 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active) 4675 { 4676 uint32_t num_flushed = 0; 4677 4678 while (*list) { 4679 flow_list_destroy(dev, list, *list); 4680 num_flushed++; 4681 } 4682 if (active) { 4683 DRV_LOG(INFO, "port %u: %u flows flushed before stopping", 4684 dev->data->port_id, num_flushed); 4685 } 4686 } 4687 4688 /** 4689 * Remove all flows. 4690 * 4691 * @param dev 4692 * Pointer to Ethernet device. 4693 * @param list 4694 * Pointer to the Indexed flow list. 4695 */ 4696 void 4697 mlx5_flow_stop(struct rte_eth_dev *dev, uint32_t *list) 4698 { 4699 struct mlx5_priv *priv = dev->data->dev_private; 4700 struct rte_flow *flow = NULL; 4701 uint32_t idx; 4702 4703 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx, 4704 flow, next) { 4705 flow_drv_remove(dev, flow); 4706 flow_mreg_stop_copy_action(dev, flow); 4707 } 4708 flow_mreg_del_default_copy_action(dev); 4709 flow_rxq_flags_clear(dev); 4710 } 4711 4712 /** 4713 * Add all flows. 4714 * 4715 * @param dev 4716 * Pointer to Ethernet device. 4717 * @param list 4718 * Pointer to the Indexed flow list. 4719 * 4720 * @return 4721 * 0 on success, a negative errno value otherwise and rte_errno is set. 4722 */ 4723 int 4724 mlx5_flow_start(struct rte_eth_dev *dev, uint32_t *list) 4725 { 4726 struct mlx5_priv *priv = dev->data->dev_private; 4727 struct rte_flow *flow = NULL; 4728 struct rte_flow_error error; 4729 uint32_t idx; 4730 int ret = 0; 4731 4732 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 4733 ret = flow_mreg_add_default_copy_action(dev, &error); 4734 if (ret < 0) 4735 return -rte_errno; 4736 /* Apply Flows created by application. */ 4737 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx, 4738 flow, next) { 4739 ret = flow_mreg_start_copy_action(dev, flow); 4740 if (ret < 0) 4741 goto error; 4742 ret = flow_drv_apply(dev, flow, &error); 4743 if (ret < 0) 4744 goto error; 4745 flow_rxq_flags_set(dev, flow); 4746 } 4747 return 0; 4748 error: 4749 ret = rte_errno; /* Save rte_errno before cleanup. */ 4750 mlx5_flow_stop(dev, list); 4751 rte_errno = ret; /* Restore rte_errno. */ 4752 return -rte_errno; 4753 } 4754 4755 /** 4756 * Stop all default actions for flows. 4757 * 4758 * @param dev 4759 * Pointer to Ethernet device. 4760 */ 4761 void 4762 mlx5_flow_stop_default(struct rte_eth_dev *dev) 4763 { 4764 flow_mreg_del_default_copy_action(dev); 4765 flow_rxq_flags_clear(dev); 4766 } 4767 4768 /** 4769 * Start all default actions for flows. 4770 * 4771 * @param dev 4772 * Pointer to Ethernet device. 4773 * @return 4774 * 0 on success, a negative errno value otherwise and rte_errno is set. 4775 */ 4776 int 4777 mlx5_flow_start_default(struct rte_eth_dev *dev) 4778 { 4779 struct rte_flow_error error; 4780 4781 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */ 4782 return flow_mreg_add_default_copy_action(dev, &error); 4783 } 4784 4785 /** 4786 * Allocate intermediate resources for flow creation. 4787 * 4788 * @param dev 4789 * Pointer to Ethernet device. 4790 */ 4791 void 4792 mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev) 4793 { 4794 struct mlx5_priv *priv = dev->data->dev_private; 4795 4796 if (!priv->inter_flows) { 4797 priv->inter_flows = mlx5_malloc(MLX5_MEM_ZERO, 4798 MLX5_NUM_MAX_DEV_FLOWS * 4799 sizeof(struct mlx5_flow) + 4800 (sizeof(struct mlx5_flow_rss_desc) + 4801 sizeof(uint16_t) * UINT16_MAX) * 2, 0, 4802 SOCKET_ID_ANY); 4803 if (!priv->inter_flows) { 4804 DRV_LOG(ERR, "can't allocate intermediate memory."); 4805 return; 4806 } 4807 } 4808 priv->rss_desc = &((struct mlx5_flow *)priv->inter_flows) 4809 [MLX5_NUM_MAX_DEV_FLOWS]; 4810 /* Reset the index. */ 4811 priv->flow_idx = 0; 4812 priv->flow_nested_idx = 0; 4813 } 4814 4815 /** 4816 * Free intermediate resources for flows. 4817 * 4818 * @param dev 4819 * Pointer to Ethernet device. 4820 */ 4821 void 4822 mlx5_flow_free_intermediate(struct rte_eth_dev *dev) 4823 { 4824 struct mlx5_priv *priv = dev->data->dev_private; 4825 4826 mlx5_free(priv->inter_flows); 4827 priv->inter_flows = NULL; 4828 } 4829 4830 /** 4831 * Verify the flow list is empty 4832 * 4833 * @param dev 4834 * Pointer to Ethernet device. 4835 * 4836 * @return the number of flows not released. 4837 */ 4838 int 4839 mlx5_flow_verify(struct rte_eth_dev *dev) 4840 { 4841 struct mlx5_priv *priv = dev->data->dev_private; 4842 struct rte_flow *flow; 4843 uint32_t idx; 4844 int ret = 0; 4845 4846 ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx, 4847 flow, next) { 4848 DRV_LOG(DEBUG, "port %u flow %p still referenced", 4849 dev->data->port_id, (void *)flow); 4850 ++ret; 4851 } 4852 return ret; 4853 } 4854 4855 /** 4856 * Enable default hairpin egress flow. 4857 * 4858 * @param dev 4859 * Pointer to Ethernet device. 4860 * @param queue 4861 * The queue index. 4862 * 4863 * @return 4864 * 0 on success, a negative errno value otherwise and rte_errno is set. 4865 */ 4866 int 4867 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev, 4868 uint32_t queue) 4869 { 4870 struct mlx5_priv *priv = dev->data->dev_private; 4871 const struct rte_flow_attr attr = { 4872 .egress = 1, 4873 .priority = 0, 4874 }; 4875 struct mlx5_rte_flow_item_tx_queue queue_spec = { 4876 .queue = queue, 4877 }; 4878 struct mlx5_rte_flow_item_tx_queue queue_mask = { 4879 .queue = UINT32_MAX, 4880 }; 4881 struct rte_flow_item items[] = { 4882 { 4883 .type = (enum rte_flow_item_type) 4884 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE, 4885 .spec = &queue_spec, 4886 .last = NULL, 4887 .mask = &queue_mask, 4888 }, 4889 { 4890 .type = RTE_FLOW_ITEM_TYPE_END, 4891 }, 4892 }; 4893 struct rte_flow_action_jump jump = { 4894 .group = MLX5_HAIRPIN_TX_TABLE, 4895 }; 4896 struct rte_flow_action actions[2]; 4897 uint32_t flow_idx; 4898 struct rte_flow_error error; 4899 4900 actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP; 4901 actions[0].conf = &jump; 4902 actions[1].type = RTE_FLOW_ACTION_TYPE_END; 4903 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 4904 &attr, items, actions, false, &error); 4905 if (!flow_idx) { 4906 DRV_LOG(DEBUG, 4907 "Failed to create ctrl flow: rte_errno(%d)," 4908 " type(%d), message(%s)", 4909 rte_errno, error.type, 4910 error.message ? error.message : " (no stated reason)"); 4911 return -rte_errno; 4912 } 4913 return 0; 4914 } 4915 4916 /** 4917 * Enable a control flow configured from the control plane. 4918 * 4919 * @param dev 4920 * Pointer to Ethernet device. 4921 * @param eth_spec 4922 * An Ethernet flow spec to apply. 4923 * @param eth_mask 4924 * An Ethernet flow mask to apply. 4925 * @param vlan_spec 4926 * A VLAN flow spec to apply. 4927 * @param vlan_mask 4928 * A VLAN flow mask to apply. 4929 * 4930 * @return 4931 * 0 on success, a negative errno value otherwise and rte_errno is set. 4932 */ 4933 int 4934 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, 4935 struct rte_flow_item_eth *eth_spec, 4936 struct rte_flow_item_eth *eth_mask, 4937 struct rte_flow_item_vlan *vlan_spec, 4938 struct rte_flow_item_vlan *vlan_mask) 4939 { 4940 struct mlx5_priv *priv = dev->data->dev_private; 4941 const struct rte_flow_attr attr = { 4942 .ingress = 1, 4943 .priority = MLX5_FLOW_PRIO_RSVD, 4944 }; 4945 struct rte_flow_item items[] = { 4946 { 4947 .type = RTE_FLOW_ITEM_TYPE_ETH, 4948 .spec = eth_spec, 4949 .last = NULL, 4950 .mask = eth_mask, 4951 }, 4952 { 4953 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN : 4954 RTE_FLOW_ITEM_TYPE_END, 4955 .spec = vlan_spec, 4956 .last = NULL, 4957 .mask = vlan_mask, 4958 }, 4959 { 4960 .type = RTE_FLOW_ITEM_TYPE_END, 4961 }, 4962 }; 4963 uint16_t queue[priv->reta_idx_n]; 4964 struct rte_flow_action_rss action_rss = { 4965 .func = RTE_ETH_HASH_FUNCTION_DEFAULT, 4966 .level = 0, 4967 .types = priv->rss_conf.rss_hf, 4968 .key_len = priv->rss_conf.rss_key_len, 4969 .queue_num = priv->reta_idx_n, 4970 .key = priv->rss_conf.rss_key, 4971 .queue = queue, 4972 }; 4973 struct rte_flow_action actions[] = { 4974 { 4975 .type = RTE_FLOW_ACTION_TYPE_RSS, 4976 .conf = &action_rss, 4977 }, 4978 { 4979 .type = RTE_FLOW_ACTION_TYPE_END, 4980 }, 4981 }; 4982 uint32_t flow_idx; 4983 struct rte_flow_error error; 4984 unsigned int i; 4985 4986 if (!priv->reta_idx_n || !priv->rxqs_n) { 4987 return 0; 4988 } 4989 if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)) 4990 action_rss.types = 0; 4991 for (i = 0; i != priv->reta_idx_n; ++i) 4992 queue[i] = (*priv->reta_idx)[i]; 4993 flow_idx = flow_list_create(dev, &priv->ctrl_flows, 4994 &attr, items, actions, false, &error); 4995 if (!flow_idx) 4996 return -rte_errno; 4997 return 0; 4998 } 4999 5000 /** 5001 * Enable a flow control configured from the control plane. 5002 * 5003 * @param dev 5004 * Pointer to Ethernet device. 5005 * @param eth_spec 5006 * An Ethernet flow spec to apply. 5007 * @param eth_mask 5008 * An Ethernet flow mask to apply. 5009 * 5010 * @return 5011 * 0 on success, a negative errno value otherwise and rte_errno is set. 5012 */ 5013 int 5014 mlx5_ctrl_flow(struct rte_eth_dev *dev, 5015 struct rte_flow_item_eth *eth_spec, 5016 struct rte_flow_item_eth *eth_mask) 5017 { 5018 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL); 5019 } 5020 5021 /** 5022 * Create default miss flow rule matching lacp traffic 5023 * 5024 * @param dev 5025 * Pointer to Ethernet device. 5026 * @param eth_spec 5027 * An Ethernet flow spec to apply. 5028 * 5029 * @return 5030 * 0 on success, a negative errno value otherwise and rte_errno is set. 5031 */ 5032 int 5033 mlx5_flow_lacp_miss(struct rte_eth_dev *dev) 5034 { 5035 struct mlx5_priv *priv = dev->data->dev_private; 5036 /* 5037 * The LACP matching is done by only using ether type since using 5038 * a multicast dst mac causes kernel to give low priority to this flow. 5039 */ 5040 static const struct rte_flow_item_eth lacp_spec = { 5041 .type = RTE_BE16(0x8809), 5042 }; 5043 static const struct rte_flow_item_eth lacp_mask = { 5044 .type = 0xffff, 5045 }; 5046 const struct rte_flow_attr attr = { 5047 .ingress = 1, 5048 }; 5049 struct rte_flow_item items[] = { 5050 { 5051 .type = RTE_FLOW_ITEM_TYPE_ETH, 5052 .spec = &lacp_spec, 5053 .mask = &lacp_mask, 5054 }, 5055 { 5056 .type = RTE_FLOW_ITEM_TYPE_END, 5057 }, 5058 }; 5059 struct rte_flow_action actions[] = { 5060 { 5061 .type = (enum rte_flow_action_type) 5062 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS, 5063 }, 5064 { 5065 .type = RTE_FLOW_ACTION_TYPE_END, 5066 }, 5067 }; 5068 struct rte_flow_error error; 5069 uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows, 5070 &attr, items, actions, false, &error); 5071 5072 if (!flow_idx) 5073 return -rte_errno; 5074 return 0; 5075 } 5076 5077 /** 5078 * Destroy a flow. 5079 * 5080 * @see rte_flow_destroy() 5081 * @see rte_flow_ops 5082 */ 5083 int 5084 mlx5_flow_destroy(struct rte_eth_dev *dev, 5085 struct rte_flow *flow, 5086 struct rte_flow_error *error __rte_unused) 5087 { 5088 struct mlx5_priv *priv = dev->data->dev_private; 5089 5090 flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow); 5091 return 0; 5092 } 5093 5094 /** 5095 * Destroy all flows. 5096 * 5097 * @see rte_flow_flush() 5098 * @see rte_flow_ops 5099 */ 5100 int 5101 mlx5_flow_flush(struct rte_eth_dev *dev, 5102 struct rte_flow_error *error __rte_unused) 5103 { 5104 struct mlx5_priv *priv = dev->data->dev_private; 5105 5106 mlx5_flow_list_flush(dev, &priv->flows, false); 5107 return 0; 5108 } 5109 5110 /** 5111 * Isolated mode. 5112 * 5113 * @see rte_flow_isolate() 5114 * @see rte_flow_ops 5115 */ 5116 int 5117 mlx5_flow_isolate(struct rte_eth_dev *dev, 5118 int enable, 5119 struct rte_flow_error *error) 5120 { 5121 struct mlx5_priv *priv = dev->data->dev_private; 5122 5123 if (dev->data->dev_started) { 5124 rte_flow_error_set(error, EBUSY, 5125 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5126 NULL, 5127 "port must be stopped first"); 5128 return -rte_errno; 5129 } 5130 priv->isolated = !!enable; 5131 if (enable) 5132 dev->dev_ops = &mlx5_os_dev_ops_isolate; 5133 else 5134 dev->dev_ops = &mlx5_os_dev_ops; 5135 return 0; 5136 } 5137 5138 /** 5139 * Query a flow. 5140 * 5141 * @see rte_flow_query() 5142 * @see rte_flow_ops 5143 */ 5144 static int 5145 flow_drv_query(struct rte_eth_dev *dev, 5146 uint32_t flow_idx, 5147 const struct rte_flow_action *actions, 5148 void *data, 5149 struct rte_flow_error *error) 5150 { 5151 struct mlx5_priv *priv = dev->data->dev_private; 5152 const struct mlx5_flow_driver_ops *fops; 5153 struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool 5154 [MLX5_IPOOL_RTE_FLOW], 5155 flow_idx); 5156 enum mlx5_flow_drv_type ftype; 5157 5158 if (!flow) { 5159 return rte_flow_error_set(error, ENOENT, 5160 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 5161 NULL, 5162 "invalid flow handle"); 5163 } 5164 ftype = flow->drv_type; 5165 MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX); 5166 fops = flow_get_drv_ops(ftype); 5167 5168 return fops->query(dev, flow, actions, data, error); 5169 } 5170 5171 /** 5172 * Query a flow. 5173 * 5174 * @see rte_flow_query() 5175 * @see rte_flow_ops 5176 */ 5177 int 5178 mlx5_flow_query(struct rte_eth_dev *dev, 5179 struct rte_flow *flow, 5180 const struct rte_flow_action *actions, 5181 void *data, 5182 struct rte_flow_error *error) 5183 { 5184 int ret; 5185 5186 ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data, 5187 error); 5188 if (ret < 0) 5189 return ret; 5190 return 0; 5191 } 5192 5193 /** 5194 * Convert a flow director filter to a generic flow. 5195 * 5196 * @param dev 5197 * Pointer to Ethernet device. 5198 * @param fdir_filter 5199 * Flow director filter to add. 5200 * @param attributes 5201 * Generic flow parameters structure. 5202 * 5203 * @return 5204 * 0 on success, a negative errno value otherwise and rte_errno is set. 5205 */ 5206 static int 5207 flow_fdir_filter_convert(struct rte_eth_dev *dev, 5208 const struct rte_eth_fdir_filter *fdir_filter, 5209 struct mlx5_fdir *attributes) 5210 { 5211 struct mlx5_priv *priv = dev->data->dev_private; 5212 const struct rte_eth_fdir_input *input = &fdir_filter->input; 5213 const struct rte_eth_fdir_masks *mask = 5214 &dev->data->dev_conf.fdir_conf.mask; 5215 5216 /* Validate queue number. */ 5217 if (fdir_filter->action.rx_queue >= priv->rxqs_n) { 5218 DRV_LOG(ERR, "port %u invalid queue number %d", 5219 dev->data->port_id, fdir_filter->action.rx_queue); 5220 rte_errno = EINVAL; 5221 return -rte_errno; 5222 } 5223 attributes->attr.ingress = 1; 5224 attributes->items[0] = (struct rte_flow_item) { 5225 .type = RTE_FLOW_ITEM_TYPE_ETH, 5226 .spec = &attributes->l2, 5227 .mask = &attributes->l2_mask, 5228 }; 5229 switch (fdir_filter->action.behavior) { 5230 case RTE_ETH_FDIR_ACCEPT: 5231 attributes->actions[0] = (struct rte_flow_action){ 5232 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 5233 .conf = &attributes->queue, 5234 }; 5235 break; 5236 case RTE_ETH_FDIR_REJECT: 5237 attributes->actions[0] = (struct rte_flow_action){ 5238 .type = RTE_FLOW_ACTION_TYPE_DROP, 5239 }; 5240 break; 5241 default: 5242 DRV_LOG(ERR, "port %u invalid behavior %d", 5243 dev->data->port_id, 5244 fdir_filter->action.behavior); 5245 rte_errno = ENOTSUP; 5246 return -rte_errno; 5247 } 5248 attributes->queue.index = fdir_filter->action.rx_queue; 5249 /* Handle L3. */ 5250 switch (fdir_filter->input.flow_type) { 5251 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 5252 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 5253 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 5254 attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){ 5255 .src_addr = input->flow.ip4_flow.src_ip, 5256 .dst_addr = input->flow.ip4_flow.dst_ip, 5257 .time_to_live = input->flow.ip4_flow.ttl, 5258 .type_of_service = input->flow.ip4_flow.tos, 5259 }; 5260 attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){ 5261 .src_addr = mask->ipv4_mask.src_ip, 5262 .dst_addr = mask->ipv4_mask.dst_ip, 5263 .time_to_live = mask->ipv4_mask.ttl, 5264 .type_of_service = mask->ipv4_mask.tos, 5265 .next_proto_id = mask->ipv4_mask.proto, 5266 }; 5267 attributes->items[1] = (struct rte_flow_item){ 5268 .type = RTE_FLOW_ITEM_TYPE_IPV4, 5269 .spec = &attributes->l3, 5270 .mask = &attributes->l3_mask, 5271 }; 5272 break; 5273 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 5274 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 5275 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 5276 attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){ 5277 .hop_limits = input->flow.ipv6_flow.hop_limits, 5278 .proto = input->flow.ipv6_flow.proto, 5279 }; 5280 5281 memcpy(attributes->l3.ipv6.hdr.src_addr, 5282 input->flow.ipv6_flow.src_ip, 5283 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 5284 memcpy(attributes->l3.ipv6.hdr.dst_addr, 5285 input->flow.ipv6_flow.dst_ip, 5286 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 5287 memcpy(attributes->l3_mask.ipv6.hdr.src_addr, 5288 mask->ipv6_mask.src_ip, 5289 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 5290 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr, 5291 mask->ipv6_mask.dst_ip, 5292 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 5293 attributes->items[1] = (struct rte_flow_item){ 5294 .type = RTE_FLOW_ITEM_TYPE_IPV6, 5295 .spec = &attributes->l3, 5296 .mask = &attributes->l3_mask, 5297 }; 5298 break; 5299 default: 5300 DRV_LOG(ERR, "port %u invalid flow type%d", 5301 dev->data->port_id, fdir_filter->input.flow_type); 5302 rte_errno = ENOTSUP; 5303 return -rte_errno; 5304 } 5305 /* Handle L4. */ 5306 switch (fdir_filter->input.flow_type) { 5307 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 5308 attributes->l4.udp.hdr = (struct rte_udp_hdr){ 5309 .src_port = input->flow.udp4_flow.src_port, 5310 .dst_port = input->flow.udp4_flow.dst_port, 5311 }; 5312 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){ 5313 .src_port = mask->src_port_mask, 5314 .dst_port = mask->dst_port_mask, 5315 }; 5316 attributes->items[2] = (struct rte_flow_item){ 5317 .type = RTE_FLOW_ITEM_TYPE_UDP, 5318 .spec = &attributes->l4, 5319 .mask = &attributes->l4_mask, 5320 }; 5321 break; 5322 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 5323 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){ 5324 .src_port = input->flow.tcp4_flow.src_port, 5325 .dst_port = input->flow.tcp4_flow.dst_port, 5326 }; 5327 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){ 5328 .src_port = mask->src_port_mask, 5329 .dst_port = mask->dst_port_mask, 5330 }; 5331 attributes->items[2] = (struct rte_flow_item){ 5332 .type = RTE_FLOW_ITEM_TYPE_TCP, 5333 .spec = &attributes->l4, 5334 .mask = &attributes->l4_mask, 5335 }; 5336 break; 5337 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 5338 attributes->l4.udp.hdr = (struct rte_udp_hdr){ 5339 .src_port = input->flow.udp6_flow.src_port, 5340 .dst_port = input->flow.udp6_flow.dst_port, 5341 }; 5342 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){ 5343 .src_port = mask->src_port_mask, 5344 .dst_port = mask->dst_port_mask, 5345 }; 5346 attributes->items[2] = (struct rte_flow_item){ 5347 .type = RTE_FLOW_ITEM_TYPE_UDP, 5348 .spec = &attributes->l4, 5349 .mask = &attributes->l4_mask, 5350 }; 5351 break; 5352 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 5353 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){ 5354 .src_port = input->flow.tcp6_flow.src_port, 5355 .dst_port = input->flow.tcp6_flow.dst_port, 5356 }; 5357 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){ 5358 .src_port = mask->src_port_mask, 5359 .dst_port = mask->dst_port_mask, 5360 }; 5361 attributes->items[2] = (struct rte_flow_item){ 5362 .type = RTE_FLOW_ITEM_TYPE_TCP, 5363 .spec = &attributes->l4, 5364 .mask = &attributes->l4_mask, 5365 }; 5366 break; 5367 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 5368 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 5369 break; 5370 default: 5371 DRV_LOG(ERR, "port %u invalid flow type%d", 5372 dev->data->port_id, fdir_filter->input.flow_type); 5373 rte_errno = ENOTSUP; 5374 return -rte_errno; 5375 } 5376 return 0; 5377 } 5378 5379 #define FLOW_FDIR_CMP(f1, f2, fld) \ 5380 memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld)) 5381 5382 /** 5383 * Compare two FDIR flows. If items and actions are identical, the two flows are 5384 * regarded as same. 5385 * 5386 * @param dev 5387 * Pointer to Ethernet device. 5388 * @param f1 5389 * FDIR flow to compare. 5390 * @param f2 5391 * FDIR flow to compare. 5392 * 5393 * @return 5394 * Zero on match, 1 otherwise. 5395 */ 5396 static int 5397 flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2) 5398 { 5399 if (FLOW_FDIR_CMP(f1, f2, attr) || 5400 FLOW_FDIR_CMP(f1, f2, l2) || 5401 FLOW_FDIR_CMP(f1, f2, l2_mask) || 5402 FLOW_FDIR_CMP(f1, f2, l3) || 5403 FLOW_FDIR_CMP(f1, f2, l3_mask) || 5404 FLOW_FDIR_CMP(f1, f2, l4) || 5405 FLOW_FDIR_CMP(f1, f2, l4_mask) || 5406 FLOW_FDIR_CMP(f1, f2, actions[0].type)) 5407 return 1; 5408 if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE && 5409 FLOW_FDIR_CMP(f1, f2, queue)) 5410 return 1; 5411 return 0; 5412 } 5413 5414 /** 5415 * Search device flow list to find out a matched FDIR flow. 5416 * 5417 * @param dev 5418 * Pointer to Ethernet device. 5419 * @param fdir_flow 5420 * FDIR flow to lookup. 5421 * 5422 * @return 5423 * Index of flow if found, 0 otherwise. 5424 */ 5425 static uint32_t 5426 flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow) 5427 { 5428 struct mlx5_priv *priv = dev->data->dev_private; 5429 uint32_t flow_idx = 0; 5430 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5431 5432 MLX5_ASSERT(fdir_flow); 5433 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 5434 if (!flow_fdir_cmp(priv_fdir_flow->fdir, fdir_flow)) { 5435 DRV_LOG(DEBUG, "port %u found FDIR flow %u", 5436 dev->data->port_id, flow_idx); 5437 flow_idx = priv_fdir_flow->rix_flow; 5438 break; 5439 } 5440 } 5441 return flow_idx; 5442 } 5443 5444 /** 5445 * Add new flow director filter and store it in list. 5446 * 5447 * @param dev 5448 * Pointer to Ethernet device. 5449 * @param fdir_filter 5450 * Flow director filter to add. 5451 * 5452 * @return 5453 * 0 on success, a negative errno value otherwise and rte_errno is set. 5454 */ 5455 static int 5456 flow_fdir_filter_add(struct rte_eth_dev *dev, 5457 const struct rte_eth_fdir_filter *fdir_filter) 5458 { 5459 struct mlx5_priv *priv = dev->data->dev_private; 5460 struct mlx5_fdir *fdir_flow; 5461 struct rte_flow *flow; 5462 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5463 uint32_t flow_idx; 5464 int ret; 5465 5466 fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*fdir_flow), 0, 5467 SOCKET_ID_ANY); 5468 if (!fdir_flow) { 5469 rte_errno = ENOMEM; 5470 return -rte_errno; 5471 } 5472 ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow); 5473 if (ret) 5474 goto error; 5475 flow_idx = flow_fdir_filter_lookup(dev, fdir_flow); 5476 if (flow_idx) { 5477 rte_errno = EEXIST; 5478 goto error; 5479 } 5480 priv_fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, 5481 sizeof(struct mlx5_fdir_flow), 5482 0, SOCKET_ID_ANY); 5483 if (!priv_fdir_flow) { 5484 rte_errno = ENOMEM; 5485 goto error; 5486 } 5487 flow_idx = flow_list_create(dev, &priv->flows, &fdir_flow->attr, 5488 fdir_flow->items, fdir_flow->actions, true, 5489 NULL); 5490 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); 5491 if (!flow) 5492 goto error; 5493 flow->fdir = 1; 5494 priv_fdir_flow->fdir = fdir_flow; 5495 priv_fdir_flow->rix_flow = flow_idx; 5496 LIST_INSERT_HEAD(&priv->fdir_flows, priv_fdir_flow, next); 5497 DRV_LOG(DEBUG, "port %u created FDIR flow %p", 5498 dev->data->port_id, (void *)flow); 5499 return 0; 5500 error: 5501 mlx5_free(priv_fdir_flow); 5502 mlx5_free(fdir_flow); 5503 return -rte_errno; 5504 } 5505 5506 /** 5507 * Delete specific filter. 5508 * 5509 * @param dev 5510 * Pointer to Ethernet device. 5511 * @param fdir_filter 5512 * Filter to be deleted. 5513 * 5514 * @return 5515 * 0 on success, a negative errno value otherwise and rte_errno is set. 5516 */ 5517 static int 5518 flow_fdir_filter_delete(struct rte_eth_dev *dev, 5519 const struct rte_eth_fdir_filter *fdir_filter) 5520 { 5521 struct mlx5_priv *priv = dev->data->dev_private; 5522 uint32_t flow_idx; 5523 struct mlx5_fdir fdir_flow = { 5524 .attr.group = 0, 5525 }; 5526 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5527 int ret; 5528 5529 ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow); 5530 if (ret) 5531 return -rte_errno; 5532 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { 5533 /* Find the fdir in priv list */ 5534 if (!flow_fdir_cmp(priv_fdir_flow->fdir, &fdir_flow)) 5535 break; 5536 } 5537 if (!priv_fdir_flow) 5538 return 0; 5539 LIST_REMOVE(priv_fdir_flow, next); 5540 flow_idx = priv_fdir_flow->rix_flow; 5541 flow_list_destroy(dev, &priv->flows, flow_idx); 5542 mlx5_free(priv_fdir_flow->fdir); 5543 mlx5_free(priv_fdir_flow); 5544 DRV_LOG(DEBUG, "port %u deleted FDIR flow %u", 5545 dev->data->port_id, flow_idx); 5546 return 0; 5547 } 5548 5549 /** 5550 * Update queue for specific filter. 5551 * 5552 * @param dev 5553 * Pointer to Ethernet device. 5554 * @param fdir_filter 5555 * Filter to be updated. 5556 * 5557 * @return 5558 * 0 on success, a negative errno value otherwise and rte_errno is set. 5559 */ 5560 static int 5561 flow_fdir_filter_update(struct rte_eth_dev *dev, 5562 const struct rte_eth_fdir_filter *fdir_filter) 5563 { 5564 int ret; 5565 5566 ret = flow_fdir_filter_delete(dev, fdir_filter); 5567 if (ret) 5568 return ret; 5569 return flow_fdir_filter_add(dev, fdir_filter); 5570 } 5571 5572 /** 5573 * Flush all filters. 5574 * 5575 * @param dev 5576 * Pointer to Ethernet device. 5577 */ 5578 static void 5579 flow_fdir_filter_flush(struct rte_eth_dev *dev) 5580 { 5581 struct mlx5_priv *priv = dev->data->dev_private; 5582 struct mlx5_fdir_flow *priv_fdir_flow = NULL; 5583 5584 while (!LIST_EMPTY(&priv->fdir_flows)) { 5585 priv_fdir_flow = LIST_FIRST(&priv->fdir_flows); 5586 LIST_REMOVE(priv_fdir_flow, next); 5587 flow_list_destroy(dev, &priv->flows, priv_fdir_flow->rix_flow); 5588 mlx5_free(priv_fdir_flow->fdir); 5589 mlx5_free(priv_fdir_flow); 5590 } 5591 } 5592 5593 /** 5594 * Get flow director information. 5595 * 5596 * @param dev 5597 * Pointer to Ethernet device. 5598 * @param[out] fdir_info 5599 * Resulting flow director information. 5600 */ 5601 static void 5602 flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) 5603 { 5604 struct rte_eth_fdir_masks *mask = 5605 &dev->data->dev_conf.fdir_conf.mask; 5606 5607 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode; 5608 fdir_info->guarant_spc = 0; 5609 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask)); 5610 fdir_info->max_flexpayload = 0; 5611 fdir_info->flow_types_mask[0] = 0; 5612 fdir_info->flex_payload_unit = 0; 5613 fdir_info->max_flex_payload_segment_num = 0; 5614 fdir_info->flex_payload_limit = 0; 5615 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf)); 5616 } 5617 5618 /** 5619 * Deal with flow director operations. 5620 * 5621 * @param dev 5622 * Pointer to Ethernet device. 5623 * @param filter_op 5624 * Operation to perform. 5625 * @param arg 5626 * Pointer to operation-specific structure. 5627 * 5628 * @return 5629 * 0 on success, a negative errno value otherwise and rte_errno is set. 5630 */ 5631 static int 5632 flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, 5633 void *arg) 5634 { 5635 enum rte_fdir_mode fdir_mode = 5636 dev->data->dev_conf.fdir_conf.mode; 5637 5638 if (filter_op == RTE_ETH_FILTER_NOP) 5639 return 0; 5640 if (fdir_mode != RTE_FDIR_MODE_PERFECT && 5641 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) { 5642 DRV_LOG(ERR, "port %u flow director mode %d not supported", 5643 dev->data->port_id, fdir_mode); 5644 rte_errno = EINVAL; 5645 return -rte_errno; 5646 } 5647 switch (filter_op) { 5648 case RTE_ETH_FILTER_ADD: 5649 return flow_fdir_filter_add(dev, arg); 5650 case RTE_ETH_FILTER_UPDATE: 5651 return flow_fdir_filter_update(dev, arg); 5652 case RTE_ETH_FILTER_DELETE: 5653 return flow_fdir_filter_delete(dev, arg); 5654 case RTE_ETH_FILTER_FLUSH: 5655 flow_fdir_filter_flush(dev); 5656 break; 5657 case RTE_ETH_FILTER_INFO: 5658 flow_fdir_info_get(dev, arg); 5659 break; 5660 default: 5661 DRV_LOG(DEBUG, "port %u unknown operation %u", 5662 dev->data->port_id, filter_op); 5663 rte_errno = EINVAL; 5664 return -rte_errno; 5665 } 5666 return 0; 5667 } 5668 5669 /** 5670 * Manage filter operations. 5671 * 5672 * @param dev 5673 * Pointer to Ethernet device structure. 5674 * @param filter_type 5675 * Filter type. 5676 * @param filter_op 5677 * Operation to perform. 5678 * @param arg 5679 * Pointer to operation-specific structure. 5680 * 5681 * @return 5682 * 0 on success, a negative errno value otherwise and rte_errno is set. 5683 */ 5684 int 5685 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, 5686 enum rte_filter_type filter_type, 5687 enum rte_filter_op filter_op, 5688 void *arg) 5689 { 5690 switch (filter_type) { 5691 case RTE_ETH_FILTER_GENERIC: 5692 if (filter_op != RTE_ETH_FILTER_GET) { 5693 rte_errno = EINVAL; 5694 return -rte_errno; 5695 } 5696 *(const void **)arg = &mlx5_flow_ops; 5697 return 0; 5698 case RTE_ETH_FILTER_FDIR: 5699 return flow_fdir_ctrl_func(dev, filter_op, arg); 5700 default: 5701 DRV_LOG(ERR, "port %u filter type (%d) not supported", 5702 dev->data->port_id, filter_type); 5703 rte_errno = ENOTSUP; 5704 return -rte_errno; 5705 } 5706 return 0; 5707 } 5708 5709 /** 5710 * Create the needed meter and suffix tables. 5711 * 5712 * @param[in] dev 5713 * Pointer to Ethernet device. 5714 * @param[in] fm 5715 * Pointer to the flow meter. 5716 * 5717 * @return 5718 * Pointer to table set on success, NULL otherwise. 5719 */ 5720 struct mlx5_meter_domains_infos * 5721 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev, 5722 const struct mlx5_flow_meter *fm) 5723 { 5724 const struct mlx5_flow_driver_ops *fops; 5725 5726 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5727 return fops->create_mtr_tbls(dev, fm); 5728 } 5729 5730 /** 5731 * Destroy the meter table set. 5732 * 5733 * @param[in] dev 5734 * Pointer to Ethernet device. 5735 * @param[in] tbl 5736 * Pointer to the meter table set. 5737 * 5738 * @return 5739 * 0 on success. 5740 */ 5741 int 5742 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev, 5743 struct mlx5_meter_domains_infos *tbls) 5744 { 5745 const struct mlx5_flow_driver_ops *fops; 5746 5747 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5748 return fops->destroy_mtr_tbls(dev, tbls); 5749 } 5750 5751 /** 5752 * Create policer rules. 5753 * 5754 * @param[in] dev 5755 * Pointer to Ethernet device. 5756 * @param[in] fm 5757 * Pointer to flow meter structure. 5758 * @param[in] attr 5759 * Pointer to flow attributes. 5760 * 5761 * @return 5762 * 0 on success, -1 otherwise. 5763 */ 5764 int 5765 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev, 5766 struct mlx5_flow_meter *fm, 5767 const struct rte_flow_attr *attr) 5768 { 5769 const struct mlx5_flow_driver_ops *fops; 5770 5771 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5772 return fops->create_policer_rules(dev, fm, attr); 5773 } 5774 5775 /** 5776 * Destroy policer rules. 5777 * 5778 * @param[in] fm 5779 * Pointer to flow meter structure. 5780 * @param[in] attr 5781 * Pointer to flow attributes. 5782 * 5783 * @return 5784 * 0 on success, -1 otherwise. 5785 */ 5786 int 5787 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev, 5788 struct mlx5_flow_meter *fm, 5789 const struct rte_flow_attr *attr) 5790 { 5791 const struct mlx5_flow_driver_ops *fops; 5792 5793 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5794 return fops->destroy_policer_rules(dev, fm, attr); 5795 } 5796 5797 /** 5798 * Allocate a counter. 5799 * 5800 * @param[in] dev 5801 * Pointer to Ethernet device structure. 5802 * 5803 * @return 5804 * Index to allocated counter on success, 0 otherwise. 5805 */ 5806 uint32_t 5807 mlx5_counter_alloc(struct rte_eth_dev *dev) 5808 { 5809 const struct mlx5_flow_driver_ops *fops; 5810 struct rte_flow_attr attr = { .transfer = 0 }; 5811 5812 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 5813 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5814 return fops->counter_alloc(dev); 5815 } 5816 DRV_LOG(ERR, 5817 "port %u counter allocate is not supported.", 5818 dev->data->port_id); 5819 return 0; 5820 } 5821 5822 /** 5823 * Free a counter. 5824 * 5825 * @param[in] dev 5826 * Pointer to Ethernet device structure. 5827 * @param[in] cnt 5828 * Index to counter to be free. 5829 */ 5830 void 5831 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt) 5832 { 5833 const struct mlx5_flow_driver_ops *fops; 5834 struct rte_flow_attr attr = { .transfer = 0 }; 5835 5836 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 5837 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5838 fops->counter_free(dev, cnt); 5839 return; 5840 } 5841 DRV_LOG(ERR, 5842 "port %u counter free is not supported.", 5843 dev->data->port_id); 5844 } 5845 5846 /** 5847 * Query counter statistics. 5848 * 5849 * @param[in] dev 5850 * Pointer to Ethernet device structure. 5851 * @param[in] cnt 5852 * Index to counter to query. 5853 * @param[in] clear 5854 * Set to clear counter statistics. 5855 * @param[out] pkts 5856 * The counter hits packets number to save. 5857 * @param[out] bytes 5858 * The counter hits bytes number to save. 5859 * 5860 * @return 5861 * 0 on success, a negative errno value otherwise. 5862 */ 5863 int 5864 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt, 5865 bool clear, uint64_t *pkts, uint64_t *bytes) 5866 { 5867 const struct mlx5_flow_driver_ops *fops; 5868 struct rte_flow_attr attr = { .transfer = 0 }; 5869 5870 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 5871 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 5872 return fops->counter_query(dev, cnt, clear, pkts, bytes); 5873 } 5874 DRV_LOG(ERR, 5875 "port %u counter query is not supported.", 5876 dev->data->port_id); 5877 return -ENOTSUP; 5878 } 5879 5880 #define MLX5_POOL_QUERY_FREQ_US 1000000 5881 5882 /** 5883 * Get number of all validate pools. 5884 * 5885 * @param[in] sh 5886 * Pointer to mlx5_dev_ctx_shared object. 5887 * 5888 * @return 5889 * The number of all validate pools. 5890 */ 5891 static uint32_t 5892 mlx5_get_all_valid_pool_count(struct mlx5_dev_ctx_shared *sh) 5893 { 5894 int i; 5895 uint32_t pools_n = 0; 5896 5897 for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) 5898 pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid); 5899 return pools_n; 5900 } 5901 5902 /** 5903 * Set the periodic procedure for triggering asynchronous batch queries for all 5904 * the counter pools. 5905 * 5906 * @param[in] sh 5907 * Pointer to mlx5_dev_ctx_shared object. 5908 */ 5909 void 5910 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh) 5911 { 5912 uint32_t pools_n, us; 5913 5914 pools_n = mlx5_get_all_valid_pool_count(sh); 5915 us = MLX5_POOL_QUERY_FREQ_US / pools_n; 5916 DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us); 5917 if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) { 5918 sh->cmng.query_thread_on = 0; 5919 DRV_LOG(ERR, "Cannot reinitialize query alarm"); 5920 } else { 5921 sh->cmng.query_thread_on = 1; 5922 } 5923 } 5924 5925 /** 5926 * The periodic procedure for triggering asynchronous batch queries for all the 5927 * counter pools. This function is probably called by the host thread. 5928 * 5929 * @param[in] arg 5930 * The parameter for the alarm process. 5931 */ 5932 void 5933 mlx5_flow_query_alarm(void *arg) 5934 { 5935 struct mlx5_dev_ctx_shared *sh = arg; 5936 struct mlx5_devx_obj *dcs; 5937 uint16_t offset; 5938 int ret; 5939 uint8_t batch = sh->cmng.batch; 5940 uint8_t age = sh->cmng.age; 5941 uint16_t pool_index = sh->cmng.pool_index; 5942 struct mlx5_pools_container *cont; 5943 struct mlx5_flow_counter_pool *pool; 5944 int cont_loop = MLX5_CCONT_TYPE_MAX; 5945 5946 if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES) 5947 goto set_alarm; 5948 next_container: 5949 cont = MLX5_CNT_CONTAINER(sh, batch, age); 5950 rte_spinlock_lock(&cont->resize_sl); 5951 if (!cont->pools) { 5952 rte_spinlock_unlock(&cont->resize_sl); 5953 /* Check if all the containers are empty. */ 5954 if (unlikely(--cont_loop == 0)) 5955 goto set_alarm; 5956 batch ^= 0x1; 5957 pool_index = 0; 5958 if (batch == 0 && pool_index == 0) { 5959 age ^= 0x1; 5960 sh->cmng.batch = batch; 5961 sh->cmng.age = age; 5962 } 5963 goto next_container; 5964 } 5965 pool = cont->pools[pool_index]; 5966 rte_spinlock_unlock(&cont->resize_sl); 5967 if (pool->raw_hw) 5968 /* There is a pool query in progress. */ 5969 goto set_alarm; 5970 pool->raw_hw = 5971 LIST_FIRST(&sh->cmng.free_stat_raws); 5972 if (!pool->raw_hw) 5973 /* No free counter statistics raw memory. */ 5974 goto set_alarm; 5975 dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read 5976 (&pool->a64_dcs); 5977 offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL; 5978 /* 5979 * Identify the counters released between query trigger and query 5980 * handle more effiecntly. The counter released in this gap period 5981 * should wait for a new round of query as the new arrived packets 5982 * will not be taken into account. 5983 */ 5984 pool->query_gen++; 5985 ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL - 5986 offset, NULL, NULL, 5987 pool->raw_hw->mem_mng->dm->id, 5988 (void *)(uintptr_t) 5989 (pool->raw_hw->data + offset), 5990 sh->devx_comp, 5991 (uint64_t)(uintptr_t)pool); 5992 if (ret) { 5993 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID" 5994 " %d", pool->min_dcs->id); 5995 pool->raw_hw = NULL; 5996 goto set_alarm; 5997 } 5998 pool->raw_hw->min_dcs_id = dcs->id; 5999 LIST_REMOVE(pool->raw_hw, next); 6000 sh->cmng.pending_queries++; 6001 pool_index++; 6002 if (pool_index >= rte_atomic16_read(&cont->n_valid)) { 6003 batch ^= 0x1; 6004 pool_index = 0; 6005 if (batch == 0 && pool_index == 0) 6006 age ^= 0x1; 6007 } 6008 set_alarm: 6009 sh->cmng.batch = batch; 6010 sh->cmng.pool_index = pool_index; 6011 sh->cmng.age = age; 6012 mlx5_set_query_alarm(sh); 6013 } 6014 6015 /** 6016 * Check and callback event for new aged flow in the counter pool 6017 * 6018 * @param[in] sh 6019 * Pointer to mlx5_dev_ctx_shared object. 6020 * @param[in] pool 6021 * Pointer to Current counter pool. 6022 */ 6023 static void 6024 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh, 6025 struct mlx5_flow_counter_pool *pool) 6026 { 6027 struct mlx5_priv *priv; 6028 struct mlx5_flow_counter *cnt; 6029 struct mlx5_age_info *age_info; 6030 struct mlx5_age_param *age_param; 6031 struct mlx5_counter_stats_raw *cur = pool->raw_hw; 6032 struct mlx5_counter_stats_raw *prev = pool->raw; 6033 uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10); 6034 uint32_t i; 6035 6036 for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) { 6037 cnt = MLX5_POOL_GET_CNT(pool, i); 6038 age_param = MLX5_CNT_TO_AGE(cnt); 6039 if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE) 6040 continue; 6041 if (cur->data[i].hits != prev->data[i].hits) { 6042 age_param->expire = curr + age_param->timeout; 6043 continue; 6044 } 6045 if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2)) 6046 continue; 6047 /** 6048 * Hold the lock first, or if between the 6049 * state AGE_TMOUT and tailq operation the 6050 * release happened, the release procedure 6051 * may delete a non-existent tailq node. 6052 */ 6053 priv = rte_eth_devices[age_param->port_id].data->dev_private; 6054 age_info = GET_PORT_AGE_INFO(priv); 6055 rte_spinlock_lock(&age_info->aged_sl); 6056 /* If the cpmset fails, release happens. */ 6057 if (rte_atomic16_cmpset((volatile uint16_t *) 6058 &age_param->state, 6059 AGE_CANDIDATE, 6060 AGE_TMOUT) == 6061 AGE_CANDIDATE) { 6062 TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next); 6063 MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW); 6064 } 6065 rte_spinlock_unlock(&age_info->aged_sl); 6066 } 6067 for (i = 0; i < sh->max_port; i++) { 6068 age_info = &sh->port[i].age_info; 6069 if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW)) 6070 continue; 6071 if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) 6072 _rte_eth_dev_callback_process 6073 (&rte_eth_devices[sh->port[i].devx_ih_port_id], 6074 RTE_ETH_EVENT_FLOW_AGED, NULL); 6075 age_info->flags = 0; 6076 } 6077 } 6078 6079 /** 6080 * Handler for the HW respond about ready values from an asynchronous batch 6081 * query. This function is probably called by the host thread. 6082 * 6083 * @param[in] sh 6084 * The pointer to the shared device context. 6085 * @param[in] async_id 6086 * The Devx async ID. 6087 * @param[in] status 6088 * The status of the completion. 6089 */ 6090 void 6091 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh, 6092 uint64_t async_id, int status) 6093 { 6094 struct mlx5_flow_counter_pool *pool = 6095 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id; 6096 struct mlx5_counter_stats_raw *raw_to_free; 6097 uint8_t age = !!IS_AGE_POOL(pool); 6098 uint8_t query_gen = pool->query_gen ^ 1; 6099 struct mlx5_pools_container *cont = 6100 MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool), age); 6101 6102 if (unlikely(status)) { 6103 raw_to_free = pool->raw_hw; 6104 } else { 6105 raw_to_free = pool->raw; 6106 if (IS_AGE_POOL(pool)) 6107 mlx5_flow_aging_check(sh, pool); 6108 rte_spinlock_lock(&pool->sl); 6109 pool->raw = pool->raw_hw; 6110 rte_spinlock_unlock(&pool->sl); 6111 /* Be sure the new raw counters data is updated in memory. */ 6112 rte_cio_wmb(); 6113 if (!TAILQ_EMPTY(&pool->counters[query_gen])) { 6114 rte_spinlock_lock(&cont->csl); 6115 TAILQ_CONCAT(&cont->counters, 6116 &pool->counters[query_gen], next); 6117 rte_spinlock_unlock(&cont->csl); 6118 } 6119 } 6120 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next); 6121 pool->raw_hw = NULL; 6122 sh->cmng.pending_queries--; 6123 } 6124 6125 /** 6126 * Translate the rte_flow group index to HW table value. 6127 * 6128 * @param[in] attributes 6129 * Pointer to flow attributes 6130 * @param[in] external 6131 * Value is part of flow rule created by request external to PMD. 6132 * @param[in] group 6133 * rte_flow group index value. 6134 * @param[out] fdb_def_rule 6135 * Whether fdb jump to table 1 is configured. 6136 * @param[out] table 6137 * HW table value. 6138 * @param[out] error 6139 * Pointer to error structure. 6140 * 6141 * @return 6142 * 0 on success, a negative errno value otherwise and rte_errno is set. 6143 */ 6144 int 6145 mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external, 6146 uint32_t group, bool fdb_def_rule, uint32_t *table, 6147 struct rte_flow_error *error) 6148 { 6149 if (attributes->transfer && external && fdb_def_rule) { 6150 if (group == UINT32_MAX) 6151 return rte_flow_error_set 6152 (error, EINVAL, 6153 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 6154 NULL, 6155 "group index not supported"); 6156 *table = group + 1; 6157 } else { 6158 *table = group; 6159 } 6160 return 0; 6161 } 6162 6163 /** 6164 * Discover availability of metadata reg_c's. 6165 * 6166 * Iteratively use test flows to check availability. 6167 * 6168 * @param[in] dev 6169 * Pointer to the Ethernet device structure. 6170 * 6171 * @return 6172 * 0 on success, a negative errno value otherwise and rte_errno is set. 6173 */ 6174 int 6175 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev) 6176 { 6177 struct mlx5_priv *priv = dev->data->dev_private; 6178 struct mlx5_dev_config *config = &priv->config; 6179 enum modify_reg idx; 6180 int n = 0; 6181 6182 /* reg_c[0] and reg_c[1] are reserved. */ 6183 config->flow_mreg_c[n++] = REG_C_0; 6184 config->flow_mreg_c[n++] = REG_C_1; 6185 /* Discover availability of other reg_c's. */ 6186 for (idx = REG_C_2; idx <= REG_C_7; ++idx) { 6187 struct rte_flow_attr attr = { 6188 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, 6189 .priority = MLX5_FLOW_PRIO_RSVD, 6190 .ingress = 1, 6191 }; 6192 struct rte_flow_item items[] = { 6193 [0] = { 6194 .type = RTE_FLOW_ITEM_TYPE_END, 6195 }, 6196 }; 6197 struct rte_flow_action actions[] = { 6198 [0] = { 6199 .type = (enum rte_flow_action_type) 6200 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, 6201 .conf = &(struct mlx5_flow_action_copy_mreg){ 6202 .src = REG_C_1, 6203 .dst = idx, 6204 }, 6205 }, 6206 [1] = { 6207 .type = RTE_FLOW_ACTION_TYPE_JUMP, 6208 .conf = &(struct rte_flow_action_jump){ 6209 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, 6210 }, 6211 }, 6212 [2] = { 6213 .type = RTE_FLOW_ACTION_TYPE_END, 6214 }, 6215 }; 6216 uint32_t flow_idx; 6217 struct rte_flow *flow; 6218 struct rte_flow_error error; 6219 6220 if (!config->dv_flow_en) 6221 break; 6222 /* Create internal flow, validation skips copy action. */ 6223 flow_idx = flow_list_create(dev, NULL, &attr, items, 6224 actions, false, &error); 6225 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], 6226 flow_idx); 6227 if (!flow) 6228 continue; 6229 if (dev->data->dev_started || !flow_drv_apply(dev, flow, NULL)) 6230 config->flow_mreg_c[n++] = idx; 6231 flow_list_destroy(dev, NULL, flow_idx); 6232 } 6233 for (; n < MLX5_MREG_C_NUM; ++n) 6234 config->flow_mreg_c[n] = REG_NONE; 6235 return 0; 6236 } 6237 6238 /** 6239 * Dump flow raw hw data to file 6240 * 6241 * @param[in] dev 6242 * The pointer to Ethernet device. 6243 * @param[in] file 6244 * A pointer to a file for output. 6245 * @param[out] error 6246 * Perform verbose error reporting if not NULL. PMDs initialize this 6247 * structure in case of error only. 6248 * @return 6249 * 0 on success, a nagative value otherwise. 6250 */ 6251 int 6252 mlx5_flow_dev_dump(struct rte_eth_dev *dev, 6253 FILE *file, 6254 struct rte_flow_error *error __rte_unused) 6255 { 6256 struct mlx5_priv *priv = dev->data->dev_private; 6257 struct mlx5_dev_ctx_shared *sh = priv->sh; 6258 6259 return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain, 6260 sh->tx_domain, file); 6261 } 6262 6263 /** 6264 * Get aged-out flows. 6265 * 6266 * @param[in] dev 6267 * Pointer to the Ethernet device structure. 6268 * @param[in] context 6269 * The address of an array of pointers to the aged-out flows contexts. 6270 * @param[in] nb_countexts 6271 * The length of context array pointers. 6272 * @param[out] error 6273 * Perform verbose error reporting if not NULL. Initialized in case of 6274 * error only. 6275 * 6276 * @return 6277 * how many contexts get in success, otherwise negative errno value. 6278 * if nb_contexts is 0, return the amount of all aged contexts. 6279 * if nb_contexts is not 0 , return the amount of aged flows reported 6280 * in the context array. 6281 */ 6282 int 6283 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts, 6284 uint32_t nb_contexts, struct rte_flow_error *error) 6285 { 6286 const struct mlx5_flow_driver_ops *fops; 6287 struct rte_flow_attr attr = { .transfer = 0 }; 6288 6289 if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) { 6290 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV); 6291 return fops->get_aged_flows(dev, contexts, nb_contexts, 6292 error); 6293 } 6294 DRV_LOG(ERR, 6295 "port %u get aged flows is not supported.", 6296 dev->data->port_id); 6297 return -ENOTSUP; 6298 } 6299