1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2016 6WIND S.A. 3 * Copyright 2016 Mellanox Technologies, Ltd 4 */ 5 6 #include <sys/queue.h> 7 #include <stdint.h> 8 #include <string.h> 9 10 /* Verbs header. */ 11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 12 #ifdef PEDANTIC 13 #pragma GCC diagnostic ignored "-Wpedantic" 14 #endif 15 #include <infiniband/verbs.h> 16 #ifdef PEDANTIC 17 #pragma GCC diagnostic error "-Wpedantic" 18 #endif 19 20 #include <rte_common.h> 21 #include <rte_ether.h> 22 #include <rte_eth_ctrl.h> 23 #include <rte_ethdev_driver.h> 24 #include <rte_flow.h> 25 #include <rte_flow_driver.h> 26 #include <rte_malloc.h> 27 #include <rte_ip.h> 28 29 #include "mlx5.h" 30 #include "mlx5_defs.h" 31 #include "mlx5_prm.h" 32 #include "mlx5_glue.h" 33 34 /* Flow priority for control plane flows. */ 35 #define MLX5_CTRL_FLOW_PRIORITY 1 36 37 /* Internet Protocol versions. */ 38 #define MLX5_IPV4 4 39 #define MLX5_IPV6 6 40 #define MLX5_GRE 47 41 42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 43 struct ibv_flow_spec_counter_action { 44 int dummy; 45 }; 46 #endif 47 48 /* Dev ops structure defined in mlx5.c */ 49 extern const struct eth_dev_ops mlx5_dev_ops; 50 extern const struct eth_dev_ops mlx5_dev_ops_isolate; 51 52 /** Structure give to the conversion functions. */ 53 struct mlx5_flow_data { 54 struct rte_eth_dev *dev; /** Ethernet device. */ 55 struct mlx5_flow_parse *parser; /** Parser context. */ 56 struct rte_flow_error *error; /** Error context. */ 57 }; 58 59 static int 60 mlx5_flow_create_eth(const struct rte_flow_item *item, 61 const void *default_mask, 62 struct mlx5_flow_data *data); 63 64 static int 65 mlx5_flow_create_vlan(const struct rte_flow_item *item, 66 const void *default_mask, 67 struct mlx5_flow_data *data); 68 69 static int 70 mlx5_flow_create_ipv4(const struct rte_flow_item *item, 71 const void *default_mask, 72 struct mlx5_flow_data *data); 73 74 static int 75 mlx5_flow_create_ipv6(const struct rte_flow_item *item, 76 const void *default_mask, 77 struct mlx5_flow_data *data); 78 79 static int 80 mlx5_flow_create_udp(const struct rte_flow_item *item, 81 const void *default_mask, 82 struct mlx5_flow_data *data); 83 84 static int 85 mlx5_flow_create_tcp(const struct rte_flow_item *item, 86 const void *default_mask, 87 struct mlx5_flow_data *data); 88 89 static int 90 mlx5_flow_create_vxlan(const struct rte_flow_item *item, 91 const void *default_mask, 92 struct mlx5_flow_data *data); 93 94 static int 95 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item, 96 const void *default_mask, 97 struct mlx5_flow_data *data); 98 99 static int 100 mlx5_flow_create_gre(const struct rte_flow_item *item, 101 const void *default_mask, 102 struct mlx5_flow_data *data); 103 104 struct mlx5_flow_parse; 105 106 static void 107 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src, 108 unsigned int size); 109 110 static int 111 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id); 112 113 static int 114 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser); 115 116 /* Hash RX queue types. */ 117 enum hash_rxq_type { 118 HASH_RXQ_TCPV4, 119 HASH_RXQ_UDPV4, 120 HASH_RXQ_IPV4, 121 HASH_RXQ_TCPV6, 122 HASH_RXQ_UDPV6, 123 HASH_RXQ_IPV6, 124 HASH_RXQ_ETH, 125 HASH_RXQ_TUNNEL, 126 }; 127 128 /* Initialization data for hash RX queue. */ 129 struct hash_rxq_init { 130 uint64_t hash_fields; /* Fields that participate in the hash. */ 131 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */ 132 unsigned int flow_priority; /* Flow priority to use. */ 133 unsigned int ip_version; /* Internet protocol. */ 134 }; 135 136 /* Initialization data for hash RX queues. */ 137 const struct hash_rxq_init hash_rxq_init[] = { 138 [HASH_RXQ_TCPV4] = { 139 .hash_fields = (IBV_RX_HASH_SRC_IPV4 | 140 IBV_RX_HASH_DST_IPV4 | 141 IBV_RX_HASH_SRC_PORT_TCP | 142 IBV_RX_HASH_DST_PORT_TCP), 143 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP, 144 .flow_priority = 0, 145 .ip_version = MLX5_IPV4, 146 }, 147 [HASH_RXQ_UDPV4] = { 148 .hash_fields = (IBV_RX_HASH_SRC_IPV4 | 149 IBV_RX_HASH_DST_IPV4 | 150 IBV_RX_HASH_SRC_PORT_UDP | 151 IBV_RX_HASH_DST_PORT_UDP), 152 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP, 153 .flow_priority = 0, 154 .ip_version = MLX5_IPV4, 155 }, 156 [HASH_RXQ_IPV4] = { 157 .hash_fields = (IBV_RX_HASH_SRC_IPV4 | 158 IBV_RX_HASH_DST_IPV4), 159 .dpdk_rss_hf = (ETH_RSS_IPV4 | 160 ETH_RSS_FRAG_IPV4), 161 .flow_priority = 1, 162 .ip_version = MLX5_IPV4, 163 }, 164 [HASH_RXQ_TCPV6] = { 165 .hash_fields = (IBV_RX_HASH_SRC_IPV6 | 166 IBV_RX_HASH_DST_IPV6 | 167 IBV_RX_HASH_SRC_PORT_TCP | 168 IBV_RX_HASH_DST_PORT_TCP), 169 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP, 170 .flow_priority = 0, 171 .ip_version = MLX5_IPV6, 172 }, 173 [HASH_RXQ_UDPV6] = { 174 .hash_fields = (IBV_RX_HASH_SRC_IPV6 | 175 IBV_RX_HASH_DST_IPV6 | 176 IBV_RX_HASH_SRC_PORT_UDP | 177 IBV_RX_HASH_DST_PORT_UDP), 178 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP, 179 .flow_priority = 0, 180 .ip_version = MLX5_IPV6, 181 }, 182 [HASH_RXQ_IPV6] = { 183 .hash_fields = (IBV_RX_HASH_SRC_IPV6 | 184 IBV_RX_HASH_DST_IPV6), 185 .dpdk_rss_hf = (ETH_RSS_IPV6 | 186 ETH_RSS_FRAG_IPV6), 187 .flow_priority = 1, 188 .ip_version = MLX5_IPV6, 189 }, 190 [HASH_RXQ_ETH] = { 191 .hash_fields = 0, 192 .dpdk_rss_hf = 0, 193 .flow_priority = 2, 194 }, 195 }; 196 197 /* Number of entries in hash_rxq_init[]. */ 198 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init); 199 200 /** Structure for holding counter stats. */ 201 struct mlx5_flow_counter_stats { 202 uint64_t hits; /**< Number of packets matched by the rule. */ 203 uint64_t bytes; /**< Number of bytes matched by the rule. */ 204 }; 205 206 /** Structure for Drop queue. */ 207 struct mlx5_hrxq_drop { 208 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */ 209 struct ibv_qp *qp; /**< Verbs queue pair. */ 210 struct ibv_wq *wq; /**< Verbs work queue. */ 211 struct ibv_cq *cq; /**< Verbs completion queue. */ 212 }; 213 214 /* Flows structures. */ 215 struct mlx5_flow { 216 uint64_t hash_fields; /**< Fields that participate in the hash. */ 217 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */ 218 struct ibv_flow *ibv_flow; /**< Verbs flow. */ 219 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */ 220 }; 221 222 /* Drop flows structures. */ 223 struct mlx5_flow_drop { 224 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */ 225 struct ibv_flow *ibv_flow; /**< Verbs flow. */ 226 }; 227 228 struct rte_flow { 229 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */ 230 uint32_t mark:1; /**< Set if the flow is marked. */ 231 uint32_t drop:1; /**< Drop queue. */ 232 struct rte_flow_action_rss rss_conf; /**< RSS configuration */ 233 uint16_t (*queues)[]; /**< Queues indexes to use. */ 234 uint8_t rss_key[40]; /**< copy of the RSS key. */ 235 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */ 236 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */ 237 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */ 238 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)]; 239 /**< Flow with Rx queue. */ 240 }; 241 242 /** Static initializer for items. */ 243 #define ITEMS(...) \ 244 (const enum rte_flow_item_type []){ \ 245 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \ 246 } 247 248 #define IS_TUNNEL(type) ( \ 249 (type) == RTE_FLOW_ITEM_TYPE_VXLAN || \ 250 (type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \ 251 (type) == RTE_FLOW_ITEM_TYPE_GRE) 252 253 const uint32_t flow_ptype[] = { 254 [RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN, 255 [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE, 256 [RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE, 257 }; 258 259 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12) 260 261 const uint32_t ptype_ext[] = { 262 [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN | 263 RTE_PTYPE_L4_UDP, 264 [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)] = RTE_PTYPE_TUNNEL_VXLAN_GPE | 265 RTE_PTYPE_L4_UDP, 266 [PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE, 267 }; 268 269 /** Structure to generate a simple graph of layers supported by the NIC. */ 270 struct mlx5_flow_items { 271 /** List of possible actions for these items. */ 272 const enum rte_flow_action_type *const actions; 273 /** Bit-masks corresponding to the possibilities for the item. */ 274 const void *mask; 275 /** 276 * Default bit-masks to use when item->mask is not provided. When 277 * \default_mask is also NULL, the full supported bit-mask (\mask) is 278 * used instead. 279 */ 280 const void *default_mask; 281 /** Bit-masks size in bytes. */ 282 const unsigned int mask_sz; 283 /** 284 * Conversion function from rte_flow to NIC specific flow. 285 * 286 * @param item 287 * rte_flow item to convert. 288 * @param default_mask 289 * Default bit-masks to use when item->mask is not provided. 290 * @param data 291 * Internal structure to store the conversion. 292 * 293 * @return 294 * 0 on success, a negative errno value otherwise and rte_errno is 295 * set. 296 */ 297 int (*convert)(const struct rte_flow_item *item, 298 const void *default_mask, 299 struct mlx5_flow_data *data); 300 /** Size in bytes of the destination structure. */ 301 const unsigned int dst_sz; 302 /** List of possible following items. */ 303 const enum rte_flow_item_type *const items; 304 }; 305 306 /** Valid action for this PMD. */ 307 static const enum rte_flow_action_type valid_actions[] = { 308 RTE_FLOW_ACTION_TYPE_DROP, 309 RTE_FLOW_ACTION_TYPE_QUEUE, 310 RTE_FLOW_ACTION_TYPE_MARK, 311 RTE_FLOW_ACTION_TYPE_FLAG, 312 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 313 RTE_FLOW_ACTION_TYPE_COUNT, 314 #endif 315 RTE_FLOW_ACTION_TYPE_END, 316 }; 317 318 /** Graph of supported items and associated actions. */ 319 static const struct mlx5_flow_items mlx5_flow_items[] = { 320 [RTE_FLOW_ITEM_TYPE_END] = { 321 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH, 322 RTE_FLOW_ITEM_TYPE_VXLAN, 323 RTE_FLOW_ITEM_TYPE_VXLAN_GPE, 324 RTE_FLOW_ITEM_TYPE_GRE), 325 }, 326 [RTE_FLOW_ITEM_TYPE_ETH] = { 327 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN, 328 RTE_FLOW_ITEM_TYPE_IPV4, 329 RTE_FLOW_ITEM_TYPE_IPV6), 330 .actions = valid_actions, 331 .mask = &(const struct rte_flow_item_eth){ 332 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 333 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", 334 .type = -1, 335 }, 336 .default_mask = &rte_flow_item_eth_mask, 337 .mask_sz = sizeof(struct rte_flow_item_eth), 338 .convert = mlx5_flow_create_eth, 339 .dst_sz = sizeof(struct ibv_flow_spec_eth), 340 }, 341 [RTE_FLOW_ITEM_TYPE_VLAN] = { 342 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4, 343 RTE_FLOW_ITEM_TYPE_IPV6), 344 .actions = valid_actions, 345 .mask = &(const struct rte_flow_item_vlan){ 346 .tci = -1, 347 .inner_type = -1, 348 }, 349 .default_mask = &rte_flow_item_vlan_mask, 350 .mask_sz = sizeof(struct rte_flow_item_vlan), 351 .convert = mlx5_flow_create_vlan, 352 .dst_sz = 0, 353 }, 354 [RTE_FLOW_ITEM_TYPE_IPV4] = { 355 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP, 356 RTE_FLOW_ITEM_TYPE_TCP, 357 RTE_FLOW_ITEM_TYPE_GRE), 358 .actions = valid_actions, 359 .mask = &(const struct rte_flow_item_ipv4){ 360 .hdr = { 361 .src_addr = -1, 362 .dst_addr = -1, 363 .type_of_service = -1, 364 .next_proto_id = -1, 365 }, 366 }, 367 .default_mask = &rte_flow_item_ipv4_mask, 368 .mask_sz = sizeof(struct rte_flow_item_ipv4), 369 .convert = mlx5_flow_create_ipv4, 370 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext), 371 }, 372 [RTE_FLOW_ITEM_TYPE_IPV6] = { 373 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP, 374 RTE_FLOW_ITEM_TYPE_TCP, 375 RTE_FLOW_ITEM_TYPE_GRE), 376 .actions = valid_actions, 377 .mask = &(const struct rte_flow_item_ipv6){ 378 .hdr = { 379 .src_addr = { 380 0xff, 0xff, 0xff, 0xff, 381 0xff, 0xff, 0xff, 0xff, 382 0xff, 0xff, 0xff, 0xff, 383 0xff, 0xff, 0xff, 0xff, 384 }, 385 .dst_addr = { 386 0xff, 0xff, 0xff, 0xff, 387 0xff, 0xff, 0xff, 0xff, 388 0xff, 0xff, 0xff, 0xff, 389 0xff, 0xff, 0xff, 0xff, 390 }, 391 .vtc_flow = -1, 392 .proto = -1, 393 .hop_limits = -1, 394 }, 395 }, 396 .default_mask = &rte_flow_item_ipv6_mask, 397 .mask_sz = sizeof(struct rte_flow_item_ipv6), 398 .convert = mlx5_flow_create_ipv6, 399 .dst_sz = sizeof(struct ibv_flow_spec_ipv6), 400 }, 401 [RTE_FLOW_ITEM_TYPE_UDP] = { 402 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN, 403 RTE_FLOW_ITEM_TYPE_VXLAN_GPE), 404 .actions = valid_actions, 405 .mask = &(const struct rte_flow_item_udp){ 406 .hdr = { 407 .src_port = -1, 408 .dst_port = -1, 409 }, 410 }, 411 .default_mask = &rte_flow_item_udp_mask, 412 .mask_sz = sizeof(struct rte_flow_item_udp), 413 .convert = mlx5_flow_create_udp, 414 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp), 415 }, 416 [RTE_FLOW_ITEM_TYPE_TCP] = { 417 .actions = valid_actions, 418 .mask = &(const struct rte_flow_item_tcp){ 419 .hdr = { 420 .src_port = -1, 421 .dst_port = -1, 422 }, 423 }, 424 .default_mask = &rte_flow_item_tcp_mask, 425 .mask_sz = sizeof(struct rte_flow_item_tcp), 426 .convert = mlx5_flow_create_tcp, 427 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp), 428 }, 429 [RTE_FLOW_ITEM_TYPE_GRE] = { 430 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH, 431 RTE_FLOW_ITEM_TYPE_IPV4, 432 RTE_FLOW_ITEM_TYPE_IPV6), 433 .actions = valid_actions, 434 .mask = &(const struct rte_flow_item_gre){ 435 .protocol = -1, 436 }, 437 .default_mask = &rte_flow_item_gre_mask, 438 .mask_sz = sizeof(struct rte_flow_item_gre), 439 .convert = mlx5_flow_create_gre, 440 .dst_sz = sizeof(struct ibv_flow_spec_tunnel), 441 }, 442 [RTE_FLOW_ITEM_TYPE_VXLAN] = { 443 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH, 444 RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */ 445 RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */ 446 .actions = valid_actions, 447 .mask = &(const struct rte_flow_item_vxlan){ 448 .vni = "\xff\xff\xff", 449 }, 450 .default_mask = &rte_flow_item_vxlan_mask, 451 .mask_sz = sizeof(struct rte_flow_item_vxlan), 452 .convert = mlx5_flow_create_vxlan, 453 .dst_sz = sizeof(struct ibv_flow_spec_tunnel), 454 }, 455 [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = { 456 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH, 457 RTE_FLOW_ITEM_TYPE_IPV4, 458 RTE_FLOW_ITEM_TYPE_IPV6), 459 .actions = valid_actions, 460 .mask = &(const struct rte_flow_item_vxlan_gpe){ 461 .vni = "\xff\xff\xff", 462 }, 463 .default_mask = &rte_flow_item_vxlan_gpe_mask, 464 .mask_sz = sizeof(struct rte_flow_item_vxlan_gpe), 465 .convert = mlx5_flow_create_vxlan_gpe, 466 .dst_sz = sizeof(struct ibv_flow_spec_tunnel), 467 }, 468 }; 469 470 /** Structure to pass to the conversion function. */ 471 struct mlx5_flow_parse { 472 uint32_t inner; /**< Verbs value, set once tunnel is encountered. */ 473 uint32_t create:1; 474 /**< Whether resources should remain after a validate. */ 475 uint32_t drop:1; /**< Target is a drop queue. */ 476 uint32_t mark:1; /**< Mark is present in the flow. */ 477 uint32_t count:1; /**< Count is present in the flow. */ 478 uint32_t mark_id; /**< Mark identifier. */ 479 struct rte_flow_action_rss rss_conf; /**< RSS configuration */ 480 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */ 481 uint8_t rss_key[40]; /**< copy of the RSS key. */ 482 enum hash_rxq_type layer; /**< Last pattern layer detected. */ 483 enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */ 484 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */ 485 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */ 486 struct { 487 struct ibv_flow_attr *ibv_attr; 488 /**< Pointer to Verbs attributes. */ 489 unsigned int offset; 490 /**< Current position or total size of the attribute. */ 491 uint64_t hash_fields; /**< Verbs hash fields. */ 492 } queue[RTE_DIM(hash_rxq_init)]; 493 }; 494 495 static const struct rte_flow_ops mlx5_flow_ops = { 496 .validate = mlx5_flow_validate, 497 .create = mlx5_flow_create, 498 .destroy = mlx5_flow_destroy, 499 .flush = mlx5_flow_flush, 500 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 501 .query = mlx5_flow_query, 502 #else 503 .query = NULL, 504 #endif 505 .isolate = mlx5_flow_isolate, 506 }; 507 508 /* Convert FDIR request to Generic flow. */ 509 struct mlx5_fdir { 510 struct rte_flow_attr attr; 511 struct rte_flow_action actions[2]; 512 struct rte_flow_item items[4]; 513 struct rte_flow_item_eth l2; 514 struct rte_flow_item_eth l2_mask; 515 union { 516 struct rte_flow_item_ipv4 ipv4; 517 struct rte_flow_item_ipv6 ipv6; 518 } l3; 519 union { 520 struct rte_flow_item_ipv4 ipv4; 521 struct rte_flow_item_ipv6 ipv6; 522 } l3_mask; 523 union { 524 struct rte_flow_item_udp udp; 525 struct rte_flow_item_tcp tcp; 526 } l4; 527 union { 528 struct rte_flow_item_udp udp; 529 struct rte_flow_item_tcp tcp; 530 } l4_mask; 531 struct rte_flow_action_queue queue; 532 }; 533 534 /* Verbs specification header. */ 535 struct ibv_spec_header { 536 enum ibv_flow_spec_type type; 537 uint16_t size; 538 }; 539 540 /** 541 * Check item is fully supported by the NIC matching capability. 542 * 543 * @param item[in] 544 * Item specification. 545 * @param mask[in] 546 * Bit-masks covering supported fields to compare with spec, last and mask in 547 * \item. 548 * @param size 549 * Bit-Mask size in bytes. 550 * 551 * @return 552 * 0 on success, a negative errno value otherwise and rte_errno is set. 553 */ 554 static int 555 mlx5_flow_item_validate(const struct rte_flow_item *item, 556 const uint8_t *mask, unsigned int size) 557 { 558 unsigned int i; 559 const uint8_t *spec = item->spec; 560 const uint8_t *last = item->last; 561 const uint8_t *m = item->mask ? item->mask : mask; 562 563 if (!spec && (item->mask || last)) 564 goto error; 565 if (!spec) 566 return 0; 567 /* 568 * Single-pass check to make sure that: 569 * - item->mask is supported, no bits are set outside mask. 570 * - Both masked item->spec and item->last are equal (no range 571 * supported). 572 */ 573 for (i = 0; i < size; i++) { 574 if (!m[i]) 575 continue; 576 if ((m[i] | mask[i]) != mask[i]) 577 goto error; 578 if (last && ((spec[i] & m[i]) != (last[i] & m[i]))) 579 goto error; 580 } 581 return 0; 582 error: 583 rte_errno = ENOTSUP; 584 return -rte_errno; 585 } 586 587 /** 588 * Extract attribute to the parser. 589 * 590 * @param[in] attr 591 * Flow rule attributes. 592 * @param[out] error 593 * Perform verbose error reporting if not NULL. 594 * 595 * @return 596 * 0 on success, a negative errno value otherwise and rte_errno is set. 597 */ 598 static int 599 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr, 600 struct rte_flow_error *error) 601 { 602 if (attr->group) { 603 rte_flow_error_set(error, ENOTSUP, 604 RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 605 NULL, 606 "groups are not supported"); 607 return -rte_errno; 608 } 609 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) { 610 rte_flow_error_set(error, ENOTSUP, 611 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 612 NULL, 613 "priorities are not supported"); 614 return -rte_errno; 615 } 616 if (attr->egress) { 617 rte_flow_error_set(error, ENOTSUP, 618 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, 619 NULL, 620 "egress is not supported"); 621 return -rte_errno; 622 } 623 if (attr->transfer) { 624 rte_flow_error_set(error, ENOTSUP, 625 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 626 NULL, 627 "transfer is not supported"); 628 return -rte_errno; 629 } 630 if (!attr->ingress) { 631 rte_flow_error_set(error, ENOTSUP, 632 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, 633 NULL, 634 "only ingress is supported"); 635 return -rte_errno; 636 } 637 return 0; 638 } 639 640 /** 641 * Extract actions request to the parser. 642 * 643 * @param dev 644 * Pointer to Ethernet device. 645 * @param[in] actions 646 * Associated actions (list terminated by the END action). 647 * @param[out] error 648 * Perform verbose error reporting if not NULL. 649 * @param[in, out] parser 650 * Internal parser structure. 651 * 652 * @return 653 * 0 on success, a negative errno value otherwise and rte_errno is set. 654 */ 655 static int 656 mlx5_flow_convert_actions(struct rte_eth_dev *dev, 657 const struct rte_flow_action actions[], 658 struct rte_flow_error *error, 659 struct mlx5_flow_parse *parser) 660 { 661 enum { FATE = 1, MARK = 2, COUNT = 4, }; 662 uint32_t overlap = 0; 663 struct priv *priv = dev->data->dev_private; 664 665 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) { 666 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) { 667 continue; 668 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) { 669 if (overlap & FATE) 670 goto exit_action_overlap; 671 overlap |= FATE; 672 parser->drop = 1; 673 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) { 674 const struct rte_flow_action_queue *queue = 675 (const struct rte_flow_action_queue *) 676 actions->conf; 677 678 if (overlap & FATE) 679 goto exit_action_overlap; 680 overlap |= FATE; 681 if (!queue || (queue->index > (priv->rxqs_n - 1))) 682 goto exit_action_not_supported; 683 parser->queues[0] = queue->index; 684 parser->rss_conf = (struct rte_flow_action_rss){ 685 .queue_num = 1, 686 .queue = parser->queues, 687 }; 688 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) { 689 const struct rte_flow_action_rss *rss = 690 (const struct rte_flow_action_rss *) 691 actions->conf; 692 const uint8_t *rss_key; 693 uint32_t rss_key_len; 694 uint16_t n; 695 696 if (overlap & FATE) 697 goto exit_action_overlap; 698 overlap |= FATE; 699 if (rss->func && 700 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) { 701 rte_flow_error_set(error, EINVAL, 702 RTE_FLOW_ERROR_TYPE_ACTION, 703 actions, 704 "the only supported RSS hash" 705 " function is Toeplitz"); 706 return -rte_errno; 707 } 708 #ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT 709 if (parser->rss_conf.level > 1) { 710 rte_flow_error_set(error, EINVAL, 711 RTE_FLOW_ERROR_TYPE_ACTION, 712 actions, 713 "a nonzero RSS encapsulation" 714 " level is not supported"); 715 return -rte_errno; 716 } 717 #endif 718 if (parser->rss_conf.level > 2) { 719 rte_flow_error_set(error, EINVAL, 720 RTE_FLOW_ERROR_TYPE_ACTION, 721 actions, 722 "RSS encapsulation level" 723 " > 1 is not supported"); 724 return -rte_errno; 725 } 726 if (rss->types & MLX5_RSS_HF_MASK) { 727 rte_flow_error_set(error, EINVAL, 728 RTE_FLOW_ERROR_TYPE_ACTION, 729 actions, 730 "unsupported RSS type" 731 " requested"); 732 return -rte_errno; 733 } 734 if (rss->key_len) { 735 rss_key_len = rss->key_len; 736 rss_key = rss->key; 737 } else { 738 rss_key_len = rss_hash_default_key_len; 739 rss_key = rss_hash_default_key; 740 } 741 if (rss_key_len != RTE_DIM(parser->rss_key)) { 742 rte_flow_error_set(error, EINVAL, 743 RTE_FLOW_ERROR_TYPE_ACTION, 744 actions, 745 "RSS hash key must be" 746 " exactly 40 bytes long"); 747 return -rte_errno; 748 } 749 if (!rss->queue_num) { 750 rte_flow_error_set(error, EINVAL, 751 RTE_FLOW_ERROR_TYPE_ACTION, 752 actions, 753 "no valid queues"); 754 return -rte_errno; 755 } 756 if (rss->queue_num > RTE_DIM(parser->queues)) { 757 rte_flow_error_set(error, EINVAL, 758 RTE_FLOW_ERROR_TYPE_ACTION, 759 actions, 760 "too many queues for RSS" 761 " context"); 762 return -rte_errno; 763 } 764 for (n = 0; n < rss->queue_num; ++n) { 765 if (rss->queue[n] >= priv->rxqs_n) { 766 rte_flow_error_set(error, EINVAL, 767 RTE_FLOW_ERROR_TYPE_ACTION, 768 actions, 769 "queue id > number of" 770 " queues"); 771 return -rte_errno; 772 } 773 } 774 parser->rss_conf = (struct rte_flow_action_rss){ 775 .func = RTE_ETH_HASH_FUNCTION_DEFAULT, 776 .level = rss->level, 777 .types = rss->types, 778 .key_len = rss_key_len, 779 .queue_num = rss->queue_num, 780 .key = memcpy(parser->rss_key, rss_key, 781 sizeof(*rss_key) * rss_key_len), 782 .queue = memcpy(parser->queues, rss->queue, 783 sizeof(*rss->queue) * 784 rss->queue_num), 785 }; 786 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) { 787 const struct rte_flow_action_mark *mark = 788 (const struct rte_flow_action_mark *) 789 actions->conf; 790 791 if (overlap & MARK) 792 goto exit_action_overlap; 793 overlap |= MARK; 794 if (!mark) { 795 rte_flow_error_set(error, EINVAL, 796 RTE_FLOW_ERROR_TYPE_ACTION, 797 actions, 798 "mark must be defined"); 799 return -rte_errno; 800 } else if (mark->id >= MLX5_FLOW_MARK_MAX) { 801 rte_flow_error_set(error, ENOTSUP, 802 RTE_FLOW_ERROR_TYPE_ACTION, 803 actions, 804 "mark must be between 0" 805 " and 16777199"); 806 return -rte_errno; 807 } 808 parser->mark = 1; 809 parser->mark_id = mark->id; 810 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) { 811 if (overlap & MARK) 812 goto exit_action_overlap; 813 overlap |= MARK; 814 parser->mark = 1; 815 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT && 816 priv->config.flow_counter_en) { 817 if (overlap & COUNT) 818 goto exit_action_overlap; 819 overlap |= COUNT; 820 parser->count = 1; 821 } else { 822 goto exit_action_not_supported; 823 } 824 } 825 /* When fate is unknown, drop traffic. */ 826 if (!(overlap & FATE)) 827 parser->drop = 1; 828 if (parser->drop && parser->mark) 829 parser->mark = 0; 830 if (!parser->rss_conf.queue_num && !parser->drop) { 831 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, 832 NULL, "no valid action"); 833 return -rte_errno; 834 } 835 return 0; 836 exit_action_not_supported: 837 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, 838 actions, "action not supported"); 839 return -rte_errno; 840 exit_action_overlap: 841 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, 842 actions, "overlapping actions are not supported"); 843 return -rte_errno; 844 } 845 846 /** 847 * Validate items. 848 * 849 * @param[in] items 850 * Pattern specification (list terminated by the END pattern item). 851 * @param[out] error 852 * Perform verbose error reporting if not NULL. 853 * @param[in, out] parser 854 * Internal parser structure. 855 * 856 * @return 857 * 0 on success, a negative errno value otherwise and rte_errno is set. 858 */ 859 static int 860 mlx5_flow_convert_items_validate(struct rte_eth_dev *dev, 861 const struct rte_flow_item items[], 862 struct rte_flow_error *error, 863 struct mlx5_flow_parse *parser) 864 { 865 struct priv *priv = dev->data->dev_private; 866 const struct mlx5_flow_items *cur_item = mlx5_flow_items; 867 unsigned int i; 868 int ret = 0; 869 870 /* Initialise the offsets to start after verbs attribute. */ 871 for (i = 0; i != hash_rxq_init_n; ++i) 872 parser->queue[i].offset = sizeof(struct ibv_flow_attr); 873 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) { 874 const struct mlx5_flow_items *token = NULL; 875 unsigned int n; 876 877 if (items->type == RTE_FLOW_ITEM_TYPE_VOID) 878 continue; 879 for (i = 0; 880 cur_item->items && 881 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END; 882 ++i) { 883 if (cur_item->items[i] == items->type) { 884 token = &mlx5_flow_items[items->type]; 885 break; 886 } 887 } 888 if (!token) { 889 ret = -ENOTSUP; 890 goto exit_item_not_supported; 891 } 892 cur_item = token; 893 ret = mlx5_flow_item_validate(items, 894 (const uint8_t *)cur_item->mask, 895 cur_item->mask_sz); 896 if (ret) 897 goto exit_item_not_supported; 898 if (IS_TUNNEL(items->type)) { 899 if (parser->tunnel) { 900 rte_flow_error_set(error, ENOTSUP, 901 RTE_FLOW_ERROR_TYPE_ITEM, 902 items, 903 "Cannot recognize multiple" 904 " tunnel encapsulations."); 905 return -rte_errno; 906 } 907 if (!priv->config.tunnel_en && 908 parser->rss_conf.level > 1) { 909 rte_flow_error_set(error, ENOTSUP, 910 RTE_FLOW_ERROR_TYPE_ITEM, 911 items, 912 "RSS on tunnel is not supported"); 913 return -rte_errno; 914 } 915 parser->inner = IBV_FLOW_SPEC_INNER; 916 parser->tunnel = flow_ptype[items->type]; 917 } 918 if (parser->drop) { 919 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz; 920 } else { 921 for (n = 0; n != hash_rxq_init_n; ++n) 922 parser->queue[n].offset += cur_item->dst_sz; 923 } 924 } 925 if (parser->drop) { 926 parser->queue[HASH_RXQ_ETH].offset += 927 sizeof(struct ibv_flow_spec_action_drop); 928 } 929 if (parser->mark) { 930 for (i = 0; i != hash_rxq_init_n; ++i) 931 parser->queue[i].offset += 932 sizeof(struct ibv_flow_spec_action_tag); 933 } 934 if (parser->count) { 935 unsigned int size = sizeof(struct ibv_flow_spec_counter_action); 936 937 for (i = 0; i != hash_rxq_init_n; ++i) 938 parser->queue[i].offset += size; 939 } 940 return 0; 941 exit_item_not_supported: 942 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM, 943 items, "item not supported"); 944 } 945 946 /** 947 * Allocate memory space to store verbs flow attributes. 948 * 949 * @param[in] size 950 * Amount of byte to allocate. 951 * @param[out] error 952 * Perform verbose error reporting if not NULL. 953 * 954 * @return 955 * A verbs flow attribute on success, NULL otherwise and rte_errno is set. 956 */ 957 static struct ibv_flow_attr * 958 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error) 959 { 960 struct ibv_flow_attr *ibv_attr; 961 962 ibv_attr = rte_calloc(__func__, 1, size, 0); 963 if (!ibv_attr) { 964 rte_flow_error_set(error, ENOMEM, 965 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 966 NULL, 967 "cannot allocate verbs spec attributes"); 968 return NULL; 969 } 970 return ibv_attr; 971 } 972 973 /** 974 * Make inner packet matching with an higher priority from the non Inner 975 * matching. 976 * 977 * @param dev 978 * Pointer to Ethernet device. 979 * @param[in, out] parser 980 * Internal parser structure. 981 * @param attr 982 * User flow attribute. 983 */ 984 static void 985 mlx5_flow_update_priority(struct rte_eth_dev *dev, 986 struct mlx5_flow_parse *parser, 987 const struct rte_flow_attr *attr) 988 { 989 struct priv *priv = dev->data->dev_private; 990 unsigned int i; 991 uint16_t priority; 992 993 /* 8 priorities >= 16 priorities 994 * Control flow: 4-7 8-15 995 * User normal flow: 1-3 4-7 996 * User tunnel flow: 0-2 0-3 997 */ 998 priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8; 999 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8) 1000 priority /= 2; 1001 /* 1002 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs 1003 * priorities, lower 4 otherwise. 1004 */ 1005 if (!parser->inner) { 1006 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8) 1007 priority += 1; 1008 else 1009 priority += MLX5_VERBS_FLOW_PRIO_8 / 2; 1010 } 1011 if (parser->drop) { 1012 parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority + 1013 hash_rxq_init[HASH_RXQ_ETH].flow_priority; 1014 return; 1015 } 1016 for (i = 0; i != hash_rxq_init_n; ++i) { 1017 if (!parser->queue[i].ibv_attr) 1018 continue; 1019 parser->queue[i].ibv_attr->priority = priority + 1020 hash_rxq_init[i].flow_priority; 1021 } 1022 } 1023 1024 /** 1025 * Finalise verbs flow attributes. 1026 * 1027 * @param[in, out] parser 1028 * Internal parser structure. 1029 */ 1030 static void 1031 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser) 1032 { 1033 unsigned int i; 1034 uint32_t inner = parser->inner; 1035 1036 /* Don't create extra flows for outer RSS. */ 1037 if (parser->tunnel && parser->rss_conf.level < 2) 1038 return; 1039 /* 1040 * Fill missing layers in verbs specifications, or compute the correct 1041 * offset to allocate the memory space for the attributes and 1042 * specifications. 1043 */ 1044 for (i = 0; i != hash_rxq_init_n - 1; ++i) { 1045 union { 1046 struct ibv_flow_spec_ipv4_ext ipv4; 1047 struct ibv_flow_spec_ipv6 ipv6; 1048 struct ibv_flow_spec_tcp_udp udp_tcp; 1049 struct ibv_flow_spec_eth eth; 1050 } specs; 1051 void *dst; 1052 uint16_t size; 1053 1054 if (i == parser->layer) 1055 continue; 1056 if (parser->layer == HASH_RXQ_ETH || 1057 parser->layer == HASH_RXQ_TUNNEL) { 1058 if (hash_rxq_init[i].ip_version == MLX5_IPV4) { 1059 size = sizeof(struct ibv_flow_spec_ipv4_ext); 1060 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){ 1061 .type = inner | IBV_FLOW_SPEC_IPV4_EXT, 1062 .size = size, 1063 }; 1064 } else { 1065 size = sizeof(struct ibv_flow_spec_ipv6); 1066 specs.ipv6 = (struct ibv_flow_spec_ipv6){ 1067 .type = inner | IBV_FLOW_SPEC_IPV6, 1068 .size = size, 1069 }; 1070 } 1071 if (parser->queue[i].ibv_attr) { 1072 dst = (void *)((uintptr_t) 1073 parser->queue[i].ibv_attr + 1074 parser->queue[i].offset); 1075 memcpy(dst, &specs, size); 1076 ++parser->queue[i].ibv_attr->num_of_specs; 1077 } 1078 parser->queue[i].offset += size; 1079 } 1080 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) || 1081 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) { 1082 size = sizeof(struct ibv_flow_spec_tcp_udp); 1083 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) { 1084 .type = inner | ((i == HASH_RXQ_UDPV4 || 1085 i == HASH_RXQ_UDPV6) ? 1086 IBV_FLOW_SPEC_UDP : 1087 IBV_FLOW_SPEC_TCP), 1088 .size = size, 1089 }; 1090 if (parser->queue[i].ibv_attr) { 1091 dst = (void *)((uintptr_t) 1092 parser->queue[i].ibv_attr + 1093 parser->queue[i].offset); 1094 memcpy(dst, &specs, size); 1095 ++parser->queue[i].ibv_attr->num_of_specs; 1096 } 1097 parser->queue[i].offset += size; 1098 } 1099 } 1100 } 1101 1102 /** 1103 * Update flows according to pattern and RSS hash fields. 1104 * 1105 * @param[in, out] parser 1106 * Internal parser structure. 1107 * 1108 * @return 1109 * 0 on success, a negative errno value otherwise and rte_errno is set. 1110 */ 1111 static int 1112 mlx5_flow_convert_rss(struct mlx5_flow_parse *parser) 1113 { 1114 unsigned int i; 1115 enum hash_rxq_type start; 1116 enum hash_rxq_type layer; 1117 int outer = parser->tunnel && parser->rss_conf.level < 2; 1118 uint64_t rss = parser->rss_conf.types; 1119 1120 /* Default to outer RSS. */ 1121 if (!parser->rss_conf.level) 1122 parser->rss_conf.level = 1; 1123 layer = outer ? parser->out_layer : parser->layer; 1124 if (layer == HASH_RXQ_TUNNEL) 1125 layer = HASH_RXQ_ETH; 1126 if (outer) { 1127 /* Only one hash type for outer RSS. */ 1128 if (rss && layer == HASH_RXQ_ETH) { 1129 start = HASH_RXQ_TCPV4; 1130 } else if (rss && layer != HASH_RXQ_ETH && 1131 !(rss & hash_rxq_init[layer].dpdk_rss_hf)) { 1132 /* If RSS not match L4 pattern, try L3 RSS. */ 1133 if (layer < HASH_RXQ_IPV4) 1134 layer = HASH_RXQ_IPV4; 1135 else if (layer > HASH_RXQ_IPV4 && layer < HASH_RXQ_IPV6) 1136 layer = HASH_RXQ_IPV6; 1137 start = layer; 1138 } else { 1139 start = layer; 1140 } 1141 /* Scan first valid hash type. */ 1142 for (i = start; rss && i <= layer; ++i) { 1143 if (!parser->queue[i].ibv_attr) 1144 continue; 1145 if (hash_rxq_init[i].dpdk_rss_hf & rss) 1146 break; 1147 } 1148 if (rss && i <= layer) 1149 parser->queue[layer].hash_fields = 1150 hash_rxq_init[i].hash_fields; 1151 /* Trim unused hash types. */ 1152 for (i = 0; i != hash_rxq_init_n; ++i) { 1153 if (parser->queue[i].ibv_attr && i != layer) { 1154 rte_free(parser->queue[i].ibv_attr); 1155 parser->queue[i].ibv_attr = NULL; 1156 } 1157 } 1158 } else { 1159 /* Expand for inner or normal RSS. */ 1160 if (rss && (layer == HASH_RXQ_ETH || layer == HASH_RXQ_IPV4)) 1161 start = HASH_RXQ_TCPV4; 1162 else if (rss && layer == HASH_RXQ_IPV6) 1163 start = HASH_RXQ_TCPV6; 1164 else 1165 start = layer; 1166 /* For L4 pattern, try L3 RSS if no L4 RSS. */ 1167 /* Trim unused hash types. */ 1168 for (i = 0; i != hash_rxq_init_n; ++i) { 1169 if (!parser->queue[i].ibv_attr) 1170 continue; 1171 if (i < start || i > layer) { 1172 rte_free(parser->queue[i].ibv_attr); 1173 parser->queue[i].ibv_attr = NULL; 1174 continue; 1175 } 1176 if (!rss) 1177 continue; 1178 if (hash_rxq_init[i].dpdk_rss_hf & rss) { 1179 parser->queue[i].hash_fields = 1180 hash_rxq_init[i].hash_fields; 1181 } else if (i != layer) { 1182 /* Remove unused RSS expansion. */ 1183 rte_free(parser->queue[i].ibv_attr); 1184 parser->queue[i].ibv_attr = NULL; 1185 } else if (layer < HASH_RXQ_IPV4 && 1186 (hash_rxq_init[HASH_RXQ_IPV4].dpdk_rss_hf & 1187 rss)) { 1188 /* Allow IPv4 RSS on L4 pattern. */ 1189 parser->queue[i].hash_fields = 1190 hash_rxq_init[HASH_RXQ_IPV4] 1191 .hash_fields; 1192 } else if (i > HASH_RXQ_IPV4 && i < HASH_RXQ_IPV6 && 1193 (hash_rxq_init[HASH_RXQ_IPV6].dpdk_rss_hf & 1194 rss)) { 1195 /* Allow IPv4 RSS on L4 pattern. */ 1196 parser->queue[i].hash_fields = 1197 hash_rxq_init[HASH_RXQ_IPV6] 1198 .hash_fields; 1199 } 1200 } 1201 } 1202 return 0; 1203 } 1204 1205 /** 1206 * Validate and convert a flow supported by the NIC. 1207 * 1208 * @param dev 1209 * Pointer to Ethernet device. 1210 * @param[in] attr 1211 * Flow rule attributes. 1212 * @param[in] pattern 1213 * Pattern specification (list terminated by the END pattern item). 1214 * @param[in] actions 1215 * Associated actions (list terminated by the END action). 1216 * @param[out] error 1217 * Perform verbose error reporting if not NULL. 1218 * @param[in, out] parser 1219 * Internal parser structure. 1220 * 1221 * @return 1222 * 0 on success, a negative errno value otherwise and rte_errno is set. 1223 */ 1224 static int 1225 mlx5_flow_convert(struct rte_eth_dev *dev, 1226 const struct rte_flow_attr *attr, 1227 const struct rte_flow_item items[], 1228 const struct rte_flow_action actions[], 1229 struct rte_flow_error *error, 1230 struct mlx5_flow_parse *parser) 1231 { 1232 const struct mlx5_flow_items *cur_item = mlx5_flow_items; 1233 unsigned int i; 1234 int ret; 1235 1236 /* First step. Validate the attributes, items and actions. */ 1237 *parser = (struct mlx5_flow_parse){ 1238 .create = parser->create, 1239 .layer = HASH_RXQ_ETH, 1240 .mark_id = MLX5_FLOW_MARK_DEFAULT, 1241 }; 1242 ret = mlx5_flow_convert_attributes(attr, error); 1243 if (ret) 1244 return ret; 1245 ret = mlx5_flow_convert_actions(dev, actions, error, parser); 1246 if (ret) 1247 return ret; 1248 ret = mlx5_flow_convert_items_validate(dev, items, error, parser); 1249 if (ret) 1250 return ret; 1251 mlx5_flow_convert_finalise(parser); 1252 /* 1253 * Second step. 1254 * Allocate the memory space to store verbs specifications. 1255 */ 1256 if (parser->drop) { 1257 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset; 1258 1259 parser->queue[HASH_RXQ_ETH].ibv_attr = 1260 mlx5_flow_convert_allocate(offset, error); 1261 if (!parser->queue[HASH_RXQ_ETH].ibv_attr) 1262 goto exit_enomem; 1263 parser->queue[HASH_RXQ_ETH].offset = 1264 sizeof(struct ibv_flow_attr); 1265 } else { 1266 for (i = 0; i != hash_rxq_init_n; ++i) { 1267 unsigned int offset; 1268 1269 offset = parser->queue[i].offset; 1270 parser->queue[i].ibv_attr = 1271 mlx5_flow_convert_allocate(offset, error); 1272 if (!parser->queue[i].ibv_attr) 1273 goto exit_enomem; 1274 parser->queue[i].offset = sizeof(struct ibv_flow_attr); 1275 } 1276 } 1277 /* Third step. Conversion parse, fill the specifications. */ 1278 parser->inner = 0; 1279 parser->tunnel = 0; 1280 parser->layer = HASH_RXQ_ETH; 1281 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) { 1282 struct mlx5_flow_data data = { 1283 .dev = dev, 1284 .parser = parser, 1285 .error = error, 1286 }; 1287 1288 if (items->type == RTE_FLOW_ITEM_TYPE_VOID) 1289 continue; 1290 cur_item = &mlx5_flow_items[items->type]; 1291 ret = cur_item->convert(items, 1292 (cur_item->default_mask ? 1293 cur_item->default_mask : 1294 cur_item->mask), 1295 &data); 1296 if (ret) 1297 goto exit_free; 1298 } 1299 if (!parser->drop) { 1300 /* RSS check, remove unused hash types. */ 1301 ret = mlx5_flow_convert_rss(parser); 1302 if (ret) 1303 goto exit_free; 1304 /* Complete missing specification. */ 1305 mlx5_flow_convert_finalise(parser); 1306 } 1307 mlx5_flow_update_priority(dev, parser, attr); 1308 if (parser->mark) 1309 mlx5_flow_create_flag_mark(parser, parser->mark_id); 1310 if (parser->count && parser->create) { 1311 mlx5_flow_create_count(dev, parser); 1312 if (!parser->cs) 1313 goto exit_count_error; 1314 } 1315 exit_free: 1316 /* Only verification is expected, all resources should be released. */ 1317 if (!parser->create) { 1318 for (i = 0; i != hash_rxq_init_n; ++i) { 1319 if (parser->queue[i].ibv_attr) { 1320 rte_free(parser->queue[i].ibv_attr); 1321 parser->queue[i].ibv_attr = NULL; 1322 } 1323 } 1324 } 1325 return ret; 1326 exit_enomem: 1327 for (i = 0; i != hash_rxq_init_n; ++i) { 1328 if (parser->queue[i].ibv_attr) { 1329 rte_free(parser->queue[i].ibv_attr); 1330 parser->queue[i].ibv_attr = NULL; 1331 } 1332 } 1333 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 1334 NULL, "cannot allocate verbs spec attributes"); 1335 return -rte_errno; 1336 exit_count_error: 1337 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 1338 NULL, "cannot create counter"); 1339 return -rte_errno; 1340 } 1341 1342 /** 1343 * Copy the specification created into the flow. 1344 * 1345 * @param parser 1346 * Internal parser structure. 1347 * @param src 1348 * Create specification. 1349 * @param size 1350 * Size in bytes of the specification to copy. 1351 */ 1352 static void 1353 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src, 1354 unsigned int size) 1355 { 1356 unsigned int i; 1357 void *dst; 1358 1359 for (i = 0; i != hash_rxq_init_n; ++i) { 1360 if (!parser->queue[i].ibv_attr) 1361 continue; 1362 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr + 1363 parser->queue[i].offset); 1364 memcpy(dst, src, size); 1365 ++parser->queue[i].ibv_attr->num_of_specs; 1366 parser->queue[i].offset += size; 1367 } 1368 } 1369 1370 /** 1371 * Convert Ethernet item to Verbs specification. 1372 * 1373 * @param item[in] 1374 * Item specification. 1375 * @param default_mask[in] 1376 * Default bit-masks to use when item->mask is not provided. 1377 * @param data[in, out] 1378 * User structure. 1379 * 1380 * @return 1381 * 0 on success, a negative errno value otherwise and rte_errno is set. 1382 */ 1383 static int 1384 mlx5_flow_create_eth(const struct rte_flow_item *item, 1385 const void *default_mask, 1386 struct mlx5_flow_data *data) 1387 { 1388 const struct rte_flow_item_eth *spec = item->spec; 1389 const struct rte_flow_item_eth *mask = item->mask; 1390 struct mlx5_flow_parse *parser = data->parser; 1391 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth); 1392 struct ibv_flow_spec_eth eth = { 1393 .type = parser->inner | IBV_FLOW_SPEC_ETH, 1394 .size = eth_size, 1395 }; 1396 1397 parser->layer = HASH_RXQ_ETH; 1398 if (spec) { 1399 unsigned int i; 1400 1401 if (!mask) 1402 mask = default_mask; 1403 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN); 1404 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN); 1405 eth.val.ether_type = spec->type; 1406 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN); 1407 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN); 1408 eth.mask.ether_type = mask->type; 1409 /* Remove unwanted bits from values. */ 1410 for (i = 0; i < ETHER_ADDR_LEN; ++i) { 1411 eth.val.dst_mac[i] &= eth.mask.dst_mac[i]; 1412 eth.val.src_mac[i] &= eth.mask.src_mac[i]; 1413 } 1414 eth.val.ether_type &= eth.mask.ether_type; 1415 } 1416 mlx5_flow_create_copy(parser, ð, eth_size); 1417 return 0; 1418 } 1419 1420 /** 1421 * Convert VLAN item to Verbs specification. 1422 * 1423 * @param item[in] 1424 * Item specification. 1425 * @param default_mask[in] 1426 * Default bit-masks to use when item->mask is not provided. 1427 * @param data[in, out] 1428 * User structure. 1429 * 1430 * @return 1431 * 0 on success, a negative errno value otherwise and rte_errno is set. 1432 */ 1433 static int 1434 mlx5_flow_create_vlan(const struct rte_flow_item *item, 1435 const void *default_mask, 1436 struct mlx5_flow_data *data) 1437 { 1438 const struct rte_flow_item_vlan *spec = item->spec; 1439 const struct rte_flow_item_vlan *mask = item->mask; 1440 struct mlx5_flow_parse *parser = data->parser; 1441 struct ibv_flow_spec_eth *eth; 1442 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth); 1443 const char *msg = "VLAN cannot be empty"; 1444 1445 if (spec) { 1446 unsigned int i; 1447 if (!mask) 1448 mask = default_mask; 1449 1450 for (i = 0; i != hash_rxq_init_n; ++i) { 1451 if (!parser->queue[i].ibv_attr) 1452 continue; 1453 1454 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr + 1455 parser->queue[i].offset - eth_size); 1456 eth->val.vlan_tag = spec->tci; 1457 eth->mask.vlan_tag = mask->tci; 1458 eth->val.vlan_tag &= eth->mask.vlan_tag; 1459 /* 1460 * From verbs perspective an empty VLAN is equivalent 1461 * to a packet without VLAN layer. 1462 */ 1463 if (!eth->mask.vlan_tag) 1464 goto error; 1465 /* Outer TPID cannot be matched. */ 1466 if (eth->mask.ether_type) { 1467 msg = "VLAN TPID matching is not supported"; 1468 goto error; 1469 } 1470 eth->val.ether_type = spec->inner_type; 1471 eth->mask.ether_type = mask->inner_type; 1472 eth->val.ether_type &= eth->mask.ether_type; 1473 } 1474 return 0; 1475 } 1476 error: 1477 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, 1478 item, msg); 1479 } 1480 1481 /** 1482 * Convert IPv4 item to Verbs specification. 1483 * 1484 * @param item[in] 1485 * Item specification. 1486 * @param default_mask[in] 1487 * Default bit-masks to use when item->mask is not provided. 1488 * @param data[in, out] 1489 * User structure. 1490 * 1491 * @return 1492 * 0 on success, a negative errno value otherwise and rte_errno is set. 1493 */ 1494 static int 1495 mlx5_flow_create_ipv4(const struct rte_flow_item *item, 1496 const void *default_mask, 1497 struct mlx5_flow_data *data) 1498 { 1499 struct priv *priv = data->dev->data->dev_private; 1500 const struct rte_flow_item_ipv4 *spec = item->spec; 1501 const struct rte_flow_item_ipv4 *mask = item->mask; 1502 struct mlx5_flow_parse *parser = data->parser; 1503 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext); 1504 struct ibv_flow_spec_ipv4_ext ipv4 = { 1505 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT, 1506 .size = ipv4_size, 1507 }; 1508 1509 if (parser->layer == HASH_RXQ_TUNNEL && 1510 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] && 1511 !priv->config.l3_vxlan_en) 1512 return rte_flow_error_set(data->error, EINVAL, 1513 RTE_FLOW_ERROR_TYPE_ITEM, 1514 item, 1515 "L3 VXLAN not enabled by device" 1516 " parameter and/or not configured" 1517 " in firmware"); 1518 parser->layer = HASH_RXQ_IPV4; 1519 if (spec) { 1520 if (!mask) 1521 mask = default_mask; 1522 ipv4.val = (struct ibv_flow_ipv4_ext_filter){ 1523 .src_ip = spec->hdr.src_addr, 1524 .dst_ip = spec->hdr.dst_addr, 1525 .proto = spec->hdr.next_proto_id, 1526 .tos = spec->hdr.type_of_service, 1527 }; 1528 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){ 1529 .src_ip = mask->hdr.src_addr, 1530 .dst_ip = mask->hdr.dst_addr, 1531 .proto = mask->hdr.next_proto_id, 1532 .tos = mask->hdr.type_of_service, 1533 }; 1534 /* Remove unwanted bits from values. */ 1535 ipv4.val.src_ip &= ipv4.mask.src_ip; 1536 ipv4.val.dst_ip &= ipv4.mask.dst_ip; 1537 ipv4.val.proto &= ipv4.mask.proto; 1538 ipv4.val.tos &= ipv4.mask.tos; 1539 } 1540 mlx5_flow_create_copy(parser, &ipv4, ipv4_size); 1541 return 0; 1542 } 1543 1544 /** 1545 * Convert IPv6 item to Verbs specification. 1546 * 1547 * @param item[in] 1548 * Item specification. 1549 * @param default_mask[in] 1550 * Default bit-masks to use when item->mask is not provided. 1551 * @param data[in, out] 1552 * User structure. 1553 * 1554 * @return 1555 * 0 on success, a negative errno value otherwise and rte_errno is set. 1556 */ 1557 static int 1558 mlx5_flow_create_ipv6(const struct rte_flow_item *item, 1559 const void *default_mask, 1560 struct mlx5_flow_data *data) 1561 { 1562 struct priv *priv = data->dev->data->dev_private; 1563 const struct rte_flow_item_ipv6 *spec = item->spec; 1564 const struct rte_flow_item_ipv6 *mask = item->mask; 1565 struct mlx5_flow_parse *parser = data->parser; 1566 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6); 1567 struct ibv_flow_spec_ipv6 ipv6 = { 1568 .type = parser->inner | IBV_FLOW_SPEC_IPV6, 1569 .size = ipv6_size, 1570 }; 1571 1572 if (parser->layer == HASH_RXQ_TUNNEL && 1573 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] && 1574 !priv->config.l3_vxlan_en) 1575 return rte_flow_error_set(data->error, EINVAL, 1576 RTE_FLOW_ERROR_TYPE_ITEM, 1577 item, 1578 "L3 VXLAN not enabled by device" 1579 " parameter and/or not configured" 1580 " in firmware"); 1581 parser->layer = HASH_RXQ_IPV6; 1582 if (spec) { 1583 unsigned int i; 1584 uint32_t vtc_flow_val; 1585 uint32_t vtc_flow_mask; 1586 1587 if (!mask) 1588 mask = default_mask; 1589 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr, 1590 RTE_DIM(ipv6.val.src_ip)); 1591 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr, 1592 RTE_DIM(ipv6.val.dst_ip)); 1593 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr, 1594 RTE_DIM(ipv6.mask.src_ip)); 1595 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr, 1596 RTE_DIM(ipv6.mask.dst_ip)); 1597 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow); 1598 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow); 1599 ipv6.val.flow_label = 1600 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >> 1601 IPV6_HDR_FL_SHIFT); 1602 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >> 1603 IPV6_HDR_TC_SHIFT; 1604 ipv6.val.next_hdr = spec->hdr.proto; 1605 ipv6.val.hop_limit = spec->hdr.hop_limits; 1606 ipv6.mask.flow_label = 1607 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >> 1608 IPV6_HDR_FL_SHIFT); 1609 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >> 1610 IPV6_HDR_TC_SHIFT; 1611 ipv6.mask.next_hdr = mask->hdr.proto; 1612 ipv6.mask.hop_limit = mask->hdr.hop_limits; 1613 /* Remove unwanted bits from values. */ 1614 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) { 1615 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i]; 1616 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i]; 1617 } 1618 ipv6.val.flow_label &= ipv6.mask.flow_label; 1619 ipv6.val.traffic_class &= ipv6.mask.traffic_class; 1620 ipv6.val.next_hdr &= ipv6.mask.next_hdr; 1621 ipv6.val.hop_limit &= ipv6.mask.hop_limit; 1622 } 1623 mlx5_flow_create_copy(parser, &ipv6, ipv6_size); 1624 return 0; 1625 } 1626 1627 /** 1628 * Convert UDP item to Verbs specification. 1629 * 1630 * @param item[in] 1631 * Item specification. 1632 * @param default_mask[in] 1633 * Default bit-masks to use when item->mask is not provided. 1634 * @param data[in, out] 1635 * User structure. 1636 * 1637 * @return 1638 * 0 on success, a negative errno value otherwise and rte_errno is set. 1639 */ 1640 static int 1641 mlx5_flow_create_udp(const struct rte_flow_item *item, 1642 const void *default_mask, 1643 struct mlx5_flow_data *data) 1644 { 1645 const struct rte_flow_item_udp *spec = item->spec; 1646 const struct rte_flow_item_udp *mask = item->mask; 1647 struct mlx5_flow_parse *parser = data->parser; 1648 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp); 1649 struct ibv_flow_spec_tcp_udp udp = { 1650 .type = parser->inner | IBV_FLOW_SPEC_UDP, 1651 .size = udp_size, 1652 }; 1653 1654 if (parser->layer == HASH_RXQ_IPV4) 1655 parser->layer = HASH_RXQ_UDPV4; 1656 else 1657 parser->layer = HASH_RXQ_UDPV6; 1658 if (spec) { 1659 if (!mask) 1660 mask = default_mask; 1661 udp.val.dst_port = spec->hdr.dst_port; 1662 udp.val.src_port = spec->hdr.src_port; 1663 udp.mask.dst_port = mask->hdr.dst_port; 1664 udp.mask.src_port = mask->hdr.src_port; 1665 /* Remove unwanted bits from values. */ 1666 udp.val.src_port &= udp.mask.src_port; 1667 udp.val.dst_port &= udp.mask.dst_port; 1668 } 1669 mlx5_flow_create_copy(parser, &udp, udp_size); 1670 return 0; 1671 } 1672 1673 /** 1674 * Convert TCP item to Verbs specification. 1675 * 1676 * @param item[in] 1677 * Item specification. 1678 * @param default_mask[in] 1679 * Default bit-masks to use when item->mask is not provided. 1680 * @param data[in, out] 1681 * User structure. 1682 * 1683 * @return 1684 * 0 on success, a negative errno value otherwise and rte_errno is set. 1685 */ 1686 static int 1687 mlx5_flow_create_tcp(const struct rte_flow_item *item, 1688 const void *default_mask, 1689 struct mlx5_flow_data *data) 1690 { 1691 const struct rte_flow_item_tcp *spec = item->spec; 1692 const struct rte_flow_item_tcp *mask = item->mask; 1693 struct mlx5_flow_parse *parser = data->parser; 1694 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp); 1695 struct ibv_flow_spec_tcp_udp tcp = { 1696 .type = parser->inner | IBV_FLOW_SPEC_TCP, 1697 .size = tcp_size, 1698 }; 1699 1700 if (parser->layer == HASH_RXQ_IPV4) 1701 parser->layer = HASH_RXQ_TCPV4; 1702 else 1703 parser->layer = HASH_RXQ_TCPV6; 1704 if (spec) { 1705 if (!mask) 1706 mask = default_mask; 1707 tcp.val.dst_port = spec->hdr.dst_port; 1708 tcp.val.src_port = spec->hdr.src_port; 1709 tcp.mask.dst_port = mask->hdr.dst_port; 1710 tcp.mask.src_port = mask->hdr.src_port; 1711 /* Remove unwanted bits from values. */ 1712 tcp.val.src_port &= tcp.mask.src_port; 1713 tcp.val.dst_port &= tcp.mask.dst_port; 1714 } 1715 mlx5_flow_create_copy(parser, &tcp, tcp_size); 1716 return 0; 1717 } 1718 1719 /** 1720 * Convert VXLAN item to Verbs specification. 1721 * 1722 * @param item[in] 1723 * Item specification. 1724 * @param default_mask[in] 1725 * Default bit-masks to use when item->mask is not provided. 1726 * @param data[in, out] 1727 * User structure. 1728 * 1729 * @return 1730 * 0 on success, a negative errno value otherwise and rte_errno is set. 1731 */ 1732 static int 1733 mlx5_flow_create_vxlan(const struct rte_flow_item *item, 1734 const void *default_mask, 1735 struct mlx5_flow_data *data) 1736 { 1737 const struct rte_flow_item_vxlan *spec = item->spec; 1738 const struct rte_flow_item_vxlan *mask = item->mask; 1739 struct mlx5_flow_parse *parser = data->parser; 1740 unsigned int size = sizeof(struct ibv_flow_spec_tunnel); 1741 struct ibv_flow_spec_tunnel vxlan = { 1742 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL, 1743 .size = size, 1744 }; 1745 union vni { 1746 uint32_t vlan_id; 1747 uint8_t vni[4]; 1748 } id; 1749 1750 id.vni[0] = 0; 1751 parser->inner = IBV_FLOW_SPEC_INNER; 1752 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)]; 1753 parser->out_layer = parser->layer; 1754 parser->layer = HASH_RXQ_TUNNEL; 1755 /* Default VXLAN to outer RSS. */ 1756 if (!parser->rss_conf.level) 1757 parser->rss_conf.level = 1; 1758 if (spec) { 1759 if (!mask) 1760 mask = default_mask; 1761 memcpy(&id.vni[1], spec->vni, 3); 1762 vxlan.val.tunnel_id = id.vlan_id; 1763 memcpy(&id.vni[1], mask->vni, 3); 1764 vxlan.mask.tunnel_id = id.vlan_id; 1765 /* Remove unwanted bits from values. */ 1766 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id; 1767 } 1768 /* 1769 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this 1770 * layer is defined in the Verbs specification it is interpreted as 1771 * wildcard and all packets will match this rule, if it follows a full 1772 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers 1773 * before will also match this rule. 1774 * To avoid such situation, VNI 0 is currently refused. 1775 */ 1776 /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */ 1777 if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id) 1778 return rte_flow_error_set(data->error, EINVAL, 1779 RTE_FLOW_ERROR_TYPE_ITEM, 1780 item, 1781 "VxLAN vni cannot be 0"); 1782 mlx5_flow_create_copy(parser, &vxlan, size); 1783 return 0; 1784 } 1785 1786 /** 1787 * Convert VXLAN-GPE item to Verbs specification. 1788 * 1789 * @param item[in] 1790 * Item specification. 1791 * @param default_mask[in] 1792 * Default bit-masks to use when item->mask is not provided. 1793 * @param data[in, out] 1794 * User structure. 1795 * 1796 * @return 1797 * 0 on success, a negative errno value otherwise and rte_errno is set. 1798 */ 1799 static int 1800 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item, 1801 const void *default_mask, 1802 struct mlx5_flow_data *data) 1803 { 1804 struct priv *priv = data->dev->data->dev_private; 1805 const struct rte_flow_item_vxlan_gpe *spec = item->spec; 1806 const struct rte_flow_item_vxlan_gpe *mask = item->mask; 1807 struct mlx5_flow_parse *parser = data->parser; 1808 unsigned int size = sizeof(struct ibv_flow_spec_tunnel); 1809 struct ibv_flow_spec_tunnel vxlan = { 1810 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL, 1811 .size = size, 1812 }; 1813 union vni { 1814 uint32_t vlan_id; 1815 uint8_t vni[4]; 1816 } id; 1817 1818 if (!priv->config.l3_vxlan_en) 1819 return rte_flow_error_set(data->error, EINVAL, 1820 RTE_FLOW_ERROR_TYPE_ITEM, 1821 item, 1822 "L3 VXLAN not enabled by device" 1823 " parameter and/or not configured" 1824 " in firmware"); 1825 id.vni[0] = 0; 1826 parser->inner = IBV_FLOW_SPEC_INNER; 1827 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)]; 1828 parser->out_layer = parser->layer; 1829 parser->layer = HASH_RXQ_TUNNEL; 1830 /* Default VXLAN-GPE to outer RSS. */ 1831 if (!parser->rss_conf.level) 1832 parser->rss_conf.level = 1; 1833 if (spec) { 1834 if (!mask) 1835 mask = default_mask; 1836 memcpy(&id.vni[1], spec->vni, 3); 1837 vxlan.val.tunnel_id = id.vlan_id; 1838 memcpy(&id.vni[1], mask->vni, 3); 1839 vxlan.mask.tunnel_id = id.vlan_id; 1840 if (spec->protocol) 1841 return rte_flow_error_set(data->error, EINVAL, 1842 RTE_FLOW_ERROR_TYPE_ITEM, 1843 item, 1844 "VxLAN-GPE protocol not" 1845 " supported"); 1846 /* Remove unwanted bits from values. */ 1847 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id; 1848 } 1849 /* 1850 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this 1851 * layer is defined in the Verbs specification it is interpreted as 1852 * wildcard and all packets will match this rule, if it follows a full 1853 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers 1854 * before will also match this rule. 1855 * To avoid such situation, VNI 0 is currently refused. 1856 */ 1857 /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */ 1858 if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id) 1859 return rte_flow_error_set(data->error, EINVAL, 1860 RTE_FLOW_ERROR_TYPE_ITEM, 1861 item, 1862 "VxLAN-GPE vni cannot be 0"); 1863 mlx5_flow_create_copy(parser, &vxlan, size); 1864 return 0; 1865 } 1866 1867 /** 1868 * Convert GRE item to Verbs specification. 1869 * 1870 * @param item[in] 1871 * Item specification. 1872 * @param default_mask[in] 1873 * Default bit-masks to use when item->mask is not provided. 1874 * @param data[in, out] 1875 * User structure. 1876 * 1877 * @return 1878 * 0 on success, a negative errno value otherwise and rte_errno is set. 1879 */ 1880 static int 1881 mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused, 1882 const void *default_mask __rte_unused, 1883 struct mlx5_flow_data *data) 1884 { 1885 struct mlx5_flow_parse *parser = data->parser; 1886 unsigned int size = sizeof(struct ibv_flow_spec_tunnel); 1887 struct ibv_flow_spec_tunnel tunnel = { 1888 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL, 1889 .size = size, 1890 }; 1891 struct ibv_flow_spec_ipv4_ext *ipv4; 1892 struct ibv_flow_spec_ipv6 *ipv6; 1893 unsigned int i; 1894 1895 parser->inner = IBV_FLOW_SPEC_INNER; 1896 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)]; 1897 parser->out_layer = parser->layer; 1898 parser->layer = HASH_RXQ_TUNNEL; 1899 /* Default GRE to inner RSS. */ 1900 if (!parser->rss_conf.level) 1901 parser->rss_conf.level = 2; 1902 /* Update encapsulation IP layer protocol. */ 1903 for (i = 0; i != hash_rxq_init_n; ++i) { 1904 if (!parser->queue[i].ibv_attr) 1905 continue; 1906 if (parser->out_layer == HASH_RXQ_IPV4) { 1907 ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr + 1908 parser->queue[i].offset - 1909 sizeof(struct ibv_flow_spec_ipv4_ext)); 1910 if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE) 1911 break; 1912 ipv4->val.proto = MLX5_GRE; 1913 ipv4->mask.proto = 0xff; 1914 } else if (parser->out_layer == HASH_RXQ_IPV6) { 1915 ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr + 1916 parser->queue[i].offset - 1917 sizeof(struct ibv_flow_spec_ipv6)); 1918 if (ipv6->mask.next_hdr && 1919 ipv6->val.next_hdr != MLX5_GRE) 1920 break; 1921 ipv6->val.next_hdr = MLX5_GRE; 1922 ipv6->mask.next_hdr = 0xff; 1923 } 1924 } 1925 if (i != hash_rxq_init_n) 1926 return rte_flow_error_set(data->error, EINVAL, 1927 RTE_FLOW_ERROR_TYPE_ITEM, 1928 item, 1929 "IP protocol of GRE must be 47"); 1930 mlx5_flow_create_copy(parser, &tunnel, size); 1931 return 0; 1932 } 1933 1934 /** 1935 * Convert mark/flag action to Verbs specification. 1936 * 1937 * @param parser 1938 * Internal parser structure. 1939 * @param mark_id 1940 * Mark identifier. 1941 * 1942 * @return 1943 * 0 on success, a negative errno value otherwise and rte_errno is set. 1944 */ 1945 static int 1946 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id) 1947 { 1948 unsigned int size = sizeof(struct ibv_flow_spec_action_tag); 1949 struct ibv_flow_spec_action_tag tag = { 1950 .type = IBV_FLOW_SPEC_ACTION_TAG, 1951 .size = size, 1952 .tag_id = mlx5_flow_mark_set(mark_id), 1953 }; 1954 1955 assert(parser->mark); 1956 mlx5_flow_create_copy(parser, &tag, size); 1957 return 0; 1958 } 1959 1960 /** 1961 * Convert count action to Verbs specification. 1962 * 1963 * @param dev 1964 * Pointer to Ethernet device. 1965 * @param parser 1966 * Pointer to MLX5 flow parser structure. 1967 * 1968 * @return 1969 * 0 on success, a negative errno value otherwise and rte_errno is set. 1970 */ 1971 static int 1972 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused, 1973 struct mlx5_flow_parse *parser __rte_unused) 1974 { 1975 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 1976 struct priv *priv = dev->data->dev_private; 1977 unsigned int size = sizeof(struct ibv_flow_spec_counter_action); 1978 struct ibv_counter_set_init_attr init_attr = {0}; 1979 struct ibv_flow_spec_counter_action counter = { 1980 .type = IBV_FLOW_SPEC_ACTION_COUNT, 1981 .size = size, 1982 .counter_set_handle = 0, 1983 }; 1984 1985 init_attr.counter_set_id = 0; 1986 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr); 1987 if (!parser->cs) { 1988 rte_errno = EINVAL; 1989 return -rte_errno; 1990 } 1991 counter.counter_set_handle = parser->cs->handle; 1992 mlx5_flow_create_copy(parser, &counter, size); 1993 #endif 1994 return 0; 1995 } 1996 1997 /** 1998 * Complete flow rule creation with a drop queue. 1999 * 2000 * @param dev 2001 * Pointer to Ethernet device. 2002 * @param parser 2003 * Internal parser structure. 2004 * @param flow 2005 * Pointer to the rte_flow. 2006 * @param[out] error 2007 * Perform verbose error reporting if not NULL. 2008 * 2009 * @return 2010 * 0 on success, a negative errno value otherwise and rte_errno is set. 2011 */ 2012 static int 2013 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev, 2014 struct mlx5_flow_parse *parser, 2015 struct rte_flow *flow, 2016 struct rte_flow_error *error) 2017 { 2018 struct priv *priv = dev->data->dev_private; 2019 struct ibv_flow_spec_action_drop *drop; 2020 unsigned int size = sizeof(struct ibv_flow_spec_action_drop); 2021 2022 assert(priv->pd); 2023 assert(priv->ctx); 2024 flow->drop = 1; 2025 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr + 2026 parser->queue[HASH_RXQ_ETH].offset); 2027 *drop = (struct ibv_flow_spec_action_drop){ 2028 .type = IBV_FLOW_SPEC_ACTION_DROP, 2029 .size = size, 2030 }; 2031 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs; 2032 parser->queue[HASH_RXQ_ETH].offset += size; 2033 flow->frxq[HASH_RXQ_ETH].ibv_attr = 2034 parser->queue[HASH_RXQ_ETH].ibv_attr; 2035 if (parser->count) 2036 flow->cs = parser->cs; 2037 if (!priv->dev->data->dev_started) 2038 return 0; 2039 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL; 2040 flow->frxq[HASH_RXQ_ETH].ibv_flow = 2041 mlx5_glue->create_flow(priv->flow_drop_queue->qp, 2042 flow->frxq[HASH_RXQ_ETH].ibv_attr); 2043 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) { 2044 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 2045 NULL, "flow rule creation failure"); 2046 goto error; 2047 } 2048 return 0; 2049 error: 2050 assert(flow); 2051 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) { 2052 claim_zero(mlx5_glue->destroy_flow 2053 (flow->frxq[HASH_RXQ_ETH].ibv_flow)); 2054 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL; 2055 } 2056 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) { 2057 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr); 2058 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL; 2059 } 2060 if (flow->cs) { 2061 claim_zero(mlx5_glue->destroy_counter_set(flow->cs)); 2062 flow->cs = NULL; 2063 parser->cs = NULL; 2064 } 2065 return -rte_errno; 2066 } 2067 2068 /** 2069 * Create hash Rx queues when RSS is enabled. 2070 * 2071 * @param dev 2072 * Pointer to Ethernet device. 2073 * @param parser 2074 * Internal parser structure. 2075 * @param flow 2076 * Pointer to the rte_flow. 2077 * @param[out] error 2078 * Perform verbose error reporting if not NULL. 2079 * 2080 * @return 2081 * 0 on success, a negative errno value otherwise and rte_errno is set. 2082 */ 2083 static int 2084 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev, 2085 struct mlx5_flow_parse *parser, 2086 struct rte_flow *flow, 2087 struct rte_flow_error *error) 2088 { 2089 struct priv *priv = dev->data->dev_private; 2090 unsigned int i; 2091 2092 for (i = 0; i != hash_rxq_init_n; ++i) { 2093 if (!parser->queue[i].ibv_attr) 2094 continue; 2095 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr; 2096 parser->queue[i].ibv_attr = NULL; 2097 flow->frxq[i].hash_fields = parser->queue[i].hash_fields; 2098 if (!priv->dev->data->dev_started) 2099 continue; 2100 flow->frxq[i].hrxq = 2101 mlx5_hrxq_get(dev, 2102 parser->rss_conf.key, 2103 parser->rss_conf.key_len, 2104 flow->frxq[i].hash_fields, 2105 parser->rss_conf.queue, 2106 parser->rss_conf.queue_num, 2107 parser->tunnel, 2108 parser->rss_conf.level); 2109 if (flow->frxq[i].hrxq) 2110 continue; 2111 flow->frxq[i].hrxq = 2112 mlx5_hrxq_new(dev, 2113 parser->rss_conf.key, 2114 parser->rss_conf.key_len, 2115 flow->frxq[i].hash_fields, 2116 parser->rss_conf.queue, 2117 parser->rss_conf.queue_num, 2118 parser->tunnel, 2119 parser->rss_conf.level); 2120 if (!flow->frxq[i].hrxq) { 2121 return rte_flow_error_set(error, ENOMEM, 2122 RTE_FLOW_ERROR_TYPE_HANDLE, 2123 NULL, 2124 "cannot create hash rxq"); 2125 } 2126 } 2127 return 0; 2128 } 2129 2130 /** 2131 * RXQ update after flow rule creation. 2132 * 2133 * @param dev 2134 * Pointer to Ethernet device. 2135 * @param flow 2136 * Pointer to the flow rule. 2137 */ 2138 static void 2139 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow) 2140 { 2141 struct priv *priv = dev->data->dev_private; 2142 unsigned int i; 2143 unsigned int j; 2144 2145 if (!dev->data->dev_started) 2146 return; 2147 for (i = 0; i != flow->rss_conf.queue_num; ++i) { 2148 struct mlx5_rxq_data *rxq_data = (*priv->rxqs) 2149 [(*flow->queues)[i]]; 2150 struct mlx5_rxq_ctrl *rxq_ctrl = 2151 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 2152 uint8_t tunnel = PTYPE_IDX(flow->tunnel); 2153 2154 rxq_data->mark |= flow->mark; 2155 if (!tunnel) 2156 continue; 2157 rxq_ctrl->tunnel_types[tunnel] += 1; 2158 /* Clear tunnel type if more than one tunnel types set. */ 2159 for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) { 2160 if (j == tunnel) 2161 continue; 2162 if (rxq_ctrl->tunnel_types[j] > 0) { 2163 rxq_data->tunnel = 0; 2164 break; 2165 } 2166 } 2167 if (j == RTE_DIM(rxq_ctrl->tunnel_types)) 2168 rxq_data->tunnel = flow->tunnel; 2169 } 2170 } 2171 2172 /** 2173 * Dump flow hash RX queue detail. 2174 * 2175 * @param dev 2176 * Pointer to Ethernet device. 2177 * @param flow 2178 * Pointer to the rte_flow. 2179 * @param hrxq_idx 2180 * Hash RX queue index. 2181 */ 2182 static void 2183 mlx5_flow_dump(struct rte_eth_dev *dev __rte_unused, 2184 struct rte_flow *flow __rte_unused, 2185 unsigned int hrxq_idx __rte_unused) 2186 { 2187 #ifndef NDEBUG 2188 uintptr_t spec_ptr; 2189 uint16_t j; 2190 char buf[256]; 2191 uint8_t off; 2192 2193 spec_ptr = (uintptr_t)(flow->frxq[hrxq_idx].ibv_attr + 1); 2194 for (j = 0, off = 0; j < flow->frxq[hrxq_idx].ibv_attr->num_of_specs; 2195 j++) { 2196 struct ibv_flow_spec *spec = (void *)spec_ptr; 2197 off += sprintf(buf + off, " %x(%hu)", spec->hdr.type, 2198 spec->hdr.size); 2199 spec_ptr += spec->hdr.size; 2200 } 2201 DRV_LOG(DEBUG, 2202 "port %u Verbs flow %p type %u: hrxq:%p qp:%p ind:%p," 2203 " hash:%" PRIx64 "/%u specs:%hhu(%hu), priority:%hu, type:%d," 2204 " flags:%x, comp_mask:%x specs:%s", 2205 dev->data->port_id, (void *)flow, hrxq_idx, 2206 (void *)flow->frxq[hrxq_idx].hrxq, 2207 (void *)flow->frxq[hrxq_idx].hrxq->qp, 2208 (void *)flow->frxq[hrxq_idx].hrxq->ind_table, 2209 flow->frxq[hrxq_idx].hash_fields | 2210 (flow->tunnel && 2211 flow->rss_conf.level > 1 ? (uint32_t)IBV_RX_HASH_INNER : 0), 2212 flow->rss_conf.queue_num, 2213 flow->frxq[hrxq_idx].ibv_attr->num_of_specs, 2214 flow->frxq[hrxq_idx].ibv_attr->size, 2215 flow->frxq[hrxq_idx].ibv_attr->priority, 2216 flow->frxq[hrxq_idx].ibv_attr->type, 2217 flow->frxq[hrxq_idx].ibv_attr->flags, 2218 flow->frxq[hrxq_idx].ibv_attr->comp_mask, 2219 buf); 2220 #endif 2221 } 2222 2223 /** 2224 * Complete flow rule creation. 2225 * 2226 * @param dev 2227 * Pointer to Ethernet device. 2228 * @param parser 2229 * Internal parser structure. 2230 * @param flow 2231 * Pointer to the rte_flow. 2232 * @param[out] error 2233 * Perform verbose error reporting if not NULL. 2234 * 2235 * @return 2236 * 0 on success, a negative errno value otherwise and rte_errno is set. 2237 */ 2238 static int 2239 mlx5_flow_create_action_queue(struct rte_eth_dev *dev, 2240 struct mlx5_flow_parse *parser, 2241 struct rte_flow *flow, 2242 struct rte_flow_error *error) 2243 { 2244 struct priv *priv = dev->data->dev_private; 2245 int ret; 2246 unsigned int i; 2247 unsigned int flows_n = 0; 2248 2249 assert(priv->pd); 2250 assert(priv->ctx); 2251 assert(!parser->drop); 2252 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error); 2253 if (ret) 2254 goto error; 2255 if (parser->count) 2256 flow->cs = parser->cs; 2257 if (!priv->dev->data->dev_started) 2258 return 0; 2259 for (i = 0; i != hash_rxq_init_n; ++i) { 2260 if (!flow->frxq[i].hrxq) 2261 continue; 2262 flow->frxq[i].ibv_flow = 2263 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp, 2264 flow->frxq[i].ibv_attr); 2265 mlx5_flow_dump(dev, flow, i); 2266 if (!flow->frxq[i].ibv_flow) { 2267 rte_flow_error_set(error, ENOMEM, 2268 RTE_FLOW_ERROR_TYPE_HANDLE, 2269 NULL, "flow rule creation failure"); 2270 goto error; 2271 } 2272 ++flows_n; 2273 } 2274 if (!flows_n) { 2275 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE, 2276 NULL, "internal error in flow creation"); 2277 goto error; 2278 } 2279 mlx5_flow_create_update_rxqs(dev, flow); 2280 return 0; 2281 error: 2282 ret = rte_errno; /* Save rte_errno before cleanup. */ 2283 assert(flow); 2284 for (i = 0; i != hash_rxq_init_n; ++i) { 2285 if (flow->frxq[i].ibv_flow) { 2286 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow; 2287 2288 claim_zero(mlx5_glue->destroy_flow(ibv_flow)); 2289 } 2290 if (flow->frxq[i].hrxq) 2291 mlx5_hrxq_release(dev, flow->frxq[i].hrxq); 2292 if (flow->frxq[i].ibv_attr) 2293 rte_free(flow->frxq[i].ibv_attr); 2294 } 2295 if (flow->cs) { 2296 claim_zero(mlx5_glue->destroy_counter_set(flow->cs)); 2297 flow->cs = NULL; 2298 parser->cs = NULL; 2299 } 2300 rte_errno = ret; /* Restore rte_errno. */ 2301 return -rte_errno; 2302 } 2303 2304 /** 2305 * Convert a flow. 2306 * 2307 * @param dev 2308 * Pointer to Ethernet device. 2309 * @param list 2310 * Pointer to a TAILQ flow list. 2311 * @param[in] attr 2312 * Flow rule attributes. 2313 * @param[in] pattern 2314 * Pattern specification (list terminated by the END pattern item). 2315 * @param[in] actions 2316 * Associated actions (list terminated by the END action). 2317 * @param[out] error 2318 * Perform verbose error reporting if not NULL. 2319 * 2320 * @return 2321 * A flow on success, NULL otherwise and rte_errno is set. 2322 */ 2323 static struct rte_flow * 2324 mlx5_flow_list_create(struct rte_eth_dev *dev, 2325 struct mlx5_flows *list, 2326 const struct rte_flow_attr *attr, 2327 const struct rte_flow_item items[], 2328 const struct rte_flow_action actions[], 2329 struct rte_flow_error *error) 2330 { 2331 struct mlx5_flow_parse parser = { .create = 1, }; 2332 struct rte_flow *flow = NULL; 2333 unsigned int i; 2334 int ret; 2335 2336 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser); 2337 if (ret) 2338 goto exit; 2339 flow = rte_calloc(__func__, 1, 2340 sizeof(*flow) + 2341 parser.rss_conf.queue_num * sizeof(uint16_t), 2342 0); 2343 if (!flow) { 2344 rte_flow_error_set(error, ENOMEM, 2345 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 2346 NULL, 2347 "cannot allocate flow memory"); 2348 return NULL; 2349 } 2350 /* Copy configuration. */ 2351 flow->queues = (uint16_t (*)[])(flow + 1); 2352 flow->tunnel = parser.tunnel; 2353 flow->rss_conf = (struct rte_flow_action_rss){ 2354 .func = RTE_ETH_HASH_FUNCTION_DEFAULT, 2355 .level = 0, 2356 .types = parser.rss_conf.types, 2357 .key_len = parser.rss_conf.key_len, 2358 .queue_num = parser.rss_conf.queue_num, 2359 .key = memcpy(flow->rss_key, parser.rss_conf.key, 2360 sizeof(*parser.rss_conf.key) * 2361 parser.rss_conf.key_len), 2362 .queue = memcpy(flow->queues, parser.rss_conf.queue, 2363 sizeof(*parser.rss_conf.queue) * 2364 parser.rss_conf.queue_num), 2365 }; 2366 flow->mark = parser.mark; 2367 /* finalise the flow. */ 2368 if (parser.drop) 2369 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow, 2370 error); 2371 else 2372 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error); 2373 if (ret) 2374 goto exit; 2375 TAILQ_INSERT_TAIL(list, flow, next); 2376 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id, 2377 (void *)flow); 2378 return flow; 2379 exit: 2380 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id, 2381 error->message); 2382 for (i = 0; i != hash_rxq_init_n; ++i) { 2383 if (parser.queue[i].ibv_attr) 2384 rte_free(parser.queue[i].ibv_attr); 2385 } 2386 rte_free(flow); 2387 return NULL; 2388 } 2389 2390 /** 2391 * Validate a flow supported by the NIC. 2392 * 2393 * @see rte_flow_validate() 2394 * @see rte_flow_ops 2395 */ 2396 int 2397 mlx5_flow_validate(struct rte_eth_dev *dev, 2398 const struct rte_flow_attr *attr, 2399 const struct rte_flow_item items[], 2400 const struct rte_flow_action actions[], 2401 struct rte_flow_error *error) 2402 { 2403 struct mlx5_flow_parse parser = { .create = 0, }; 2404 2405 return mlx5_flow_convert(dev, attr, items, actions, error, &parser); 2406 } 2407 2408 /** 2409 * Create a flow. 2410 * 2411 * @see rte_flow_create() 2412 * @see rte_flow_ops 2413 */ 2414 struct rte_flow * 2415 mlx5_flow_create(struct rte_eth_dev *dev, 2416 const struct rte_flow_attr *attr, 2417 const struct rte_flow_item items[], 2418 const struct rte_flow_action actions[], 2419 struct rte_flow_error *error) 2420 { 2421 struct priv *priv = dev->data->dev_private; 2422 2423 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions, 2424 error); 2425 } 2426 2427 /** 2428 * Destroy a flow in a list. 2429 * 2430 * @param dev 2431 * Pointer to Ethernet device. 2432 * @param list 2433 * Pointer to a TAILQ flow list. 2434 * @param[in] flow 2435 * Flow to destroy. 2436 */ 2437 static void 2438 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list, 2439 struct rte_flow *flow) 2440 { 2441 struct priv *priv = dev->data->dev_private; 2442 unsigned int i; 2443 2444 if (flow->drop || !dev->data->dev_started) 2445 goto free; 2446 for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) { 2447 /* Update queue tunnel type. */ 2448 struct mlx5_rxq_data *rxq_data = (*priv->rxqs) 2449 [(*flow->queues)[i]]; 2450 struct mlx5_rxq_ctrl *rxq_ctrl = 2451 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 2452 uint8_t tunnel = PTYPE_IDX(flow->tunnel); 2453 2454 assert(rxq_ctrl->tunnel_types[tunnel] > 0); 2455 rxq_ctrl->tunnel_types[tunnel] -= 1; 2456 if (!rxq_ctrl->tunnel_types[tunnel]) { 2457 /* Update tunnel type. */ 2458 uint8_t j; 2459 uint8_t types = 0; 2460 uint8_t last; 2461 2462 for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++) 2463 if (rxq_ctrl->tunnel_types[j]) { 2464 types += 1; 2465 last = j; 2466 } 2467 /* Keep same if more than one tunnel types left. */ 2468 if (types == 1) 2469 rxq_data->tunnel = ptype_ext[last]; 2470 else if (types == 0) 2471 /* No tunnel type left. */ 2472 rxq_data->tunnel = 0; 2473 } 2474 } 2475 for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) { 2476 struct rte_flow *tmp; 2477 int mark = 0; 2478 2479 /* 2480 * To remove the mark from the queue, the queue must not be 2481 * present in any other marked flow (RSS or not). 2482 */ 2483 TAILQ_FOREACH(tmp, list, next) { 2484 unsigned int j; 2485 uint16_t *tqs = NULL; 2486 uint16_t tq_n = 0; 2487 2488 if (!tmp->mark) 2489 continue; 2490 for (j = 0; j != hash_rxq_init_n; ++j) { 2491 if (!tmp->frxq[j].hrxq) 2492 continue; 2493 tqs = tmp->frxq[j].hrxq->ind_table->queues; 2494 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n; 2495 } 2496 if (!tq_n) 2497 continue; 2498 for (j = 0; (j != tq_n) && !mark; j++) 2499 if (tqs[j] == (*flow->queues)[i]) 2500 mark = 1; 2501 } 2502 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark; 2503 } 2504 free: 2505 if (flow->drop) { 2506 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) 2507 claim_zero(mlx5_glue->destroy_flow 2508 (flow->frxq[HASH_RXQ_ETH].ibv_flow)); 2509 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr); 2510 } else { 2511 for (i = 0; i != hash_rxq_init_n; ++i) { 2512 struct mlx5_flow *frxq = &flow->frxq[i]; 2513 2514 if (frxq->ibv_flow) 2515 claim_zero(mlx5_glue->destroy_flow 2516 (frxq->ibv_flow)); 2517 if (frxq->hrxq) 2518 mlx5_hrxq_release(dev, frxq->hrxq); 2519 if (frxq->ibv_attr) 2520 rte_free(frxq->ibv_attr); 2521 } 2522 } 2523 if (flow->cs) { 2524 claim_zero(mlx5_glue->destroy_counter_set(flow->cs)); 2525 flow->cs = NULL; 2526 } 2527 TAILQ_REMOVE(list, flow, next); 2528 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id, 2529 (void *)flow); 2530 rte_free(flow); 2531 } 2532 2533 /** 2534 * Destroy all flows. 2535 * 2536 * @param dev 2537 * Pointer to Ethernet device. 2538 * @param list 2539 * Pointer to a TAILQ flow list. 2540 */ 2541 void 2542 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list) 2543 { 2544 while (!TAILQ_EMPTY(list)) { 2545 struct rte_flow *flow; 2546 2547 flow = TAILQ_FIRST(list); 2548 mlx5_flow_list_destroy(dev, list, flow); 2549 } 2550 } 2551 2552 /** 2553 * Create drop queue. 2554 * 2555 * @param dev 2556 * Pointer to Ethernet device. 2557 * 2558 * @return 2559 * 0 on success, a negative errno value otherwise and rte_errno is set. 2560 */ 2561 int 2562 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev) 2563 { 2564 struct priv *priv = dev->data->dev_private; 2565 struct mlx5_hrxq_drop *fdq = NULL; 2566 2567 assert(priv->pd); 2568 assert(priv->ctx); 2569 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0); 2570 if (!fdq) { 2571 DRV_LOG(WARNING, 2572 "port %u cannot allocate memory for drop queue", 2573 dev->data->port_id); 2574 rte_errno = ENOMEM; 2575 return -rte_errno; 2576 } 2577 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0); 2578 if (!fdq->cq) { 2579 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue", 2580 dev->data->port_id); 2581 rte_errno = errno; 2582 goto error; 2583 } 2584 fdq->wq = mlx5_glue->create_wq 2585 (priv->ctx, 2586 &(struct ibv_wq_init_attr){ 2587 .wq_type = IBV_WQT_RQ, 2588 .max_wr = 1, 2589 .max_sge = 1, 2590 .pd = priv->pd, 2591 .cq = fdq->cq, 2592 }); 2593 if (!fdq->wq) { 2594 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue", 2595 dev->data->port_id); 2596 rte_errno = errno; 2597 goto error; 2598 } 2599 fdq->ind_table = mlx5_glue->create_rwq_ind_table 2600 (priv->ctx, 2601 &(struct ibv_rwq_ind_table_init_attr){ 2602 .log_ind_tbl_size = 0, 2603 .ind_tbl = &fdq->wq, 2604 .comp_mask = 0, 2605 }); 2606 if (!fdq->ind_table) { 2607 DRV_LOG(WARNING, 2608 "port %u cannot allocate indirection table for drop" 2609 " queue", 2610 dev->data->port_id); 2611 rte_errno = errno; 2612 goto error; 2613 } 2614 fdq->qp = mlx5_glue->create_qp_ex 2615 (priv->ctx, 2616 &(struct ibv_qp_init_attr_ex){ 2617 .qp_type = IBV_QPT_RAW_PACKET, 2618 .comp_mask = 2619 IBV_QP_INIT_ATTR_PD | 2620 IBV_QP_INIT_ATTR_IND_TABLE | 2621 IBV_QP_INIT_ATTR_RX_HASH, 2622 .rx_hash_conf = (struct ibv_rx_hash_conf){ 2623 .rx_hash_function = 2624 IBV_RX_HASH_FUNC_TOEPLITZ, 2625 .rx_hash_key_len = rss_hash_default_key_len, 2626 .rx_hash_key = rss_hash_default_key, 2627 .rx_hash_fields_mask = 0, 2628 }, 2629 .rwq_ind_tbl = fdq->ind_table, 2630 .pd = priv->pd 2631 }); 2632 if (!fdq->qp) { 2633 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue", 2634 dev->data->port_id); 2635 rte_errno = errno; 2636 goto error; 2637 } 2638 priv->flow_drop_queue = fdq; 2639 return 0; 2640 error: 2641 if (fdq->qp) 2642 claim_zero(mlx5_glue->destroy_qp(fdq->qp)); 2643 if (fdq->ind_table) 2644 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table)); 2645 if (fdq->wq) 2646 claim_zero(mlx5_glue->destroy_wq(fdq->wq)); 2647 if (fdq->cq) 2648 claim_zero(mlx5_glue->destroy_cq(fdq->cq)); 2649 if (fdq) 2650 rte_free(fdq); 2651 priv->flow_drop_queue = NULL; 2652 return -rte_errno; 2653 } 2654 2655 /** 2656 * Delete drop queue. 2657 * 2658 * @param dev 2659 * Pointer to Ethernet device. 2660 */ 2661 void 2662 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev) 2663 { 2664 struct priv *priv = dev->data->dev_private; 2665 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue; 2666 2667 if (!fdq) 2668 return; 2669 if (fdq->qp) 2670 claim_zero(mlx5_glue->destroy_qp(fdq->qp)); 2671 if (fdq->ind_table) 2672 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table)); 2673 if (fdq->wq) 2674 claim_zero(mlx5_glue->destroy_wq(fdq->wq)); 2675 if (fdq->cq) 2676 claim_zero(mlx5_glue->destroy_cq(fdq->cq)); 2677 rte_free(fdq); 2678 priv->flow_drop_queue = NULL; 2679 } 2680 2681 /** 2682 * Remove all flows. 2683 * 2684 * @param dev 2685 * Pointer to Ethernet device. 2686 * @param list 2687 * Pointer to a TAILQ flow list. 2688 */ 2689 void 2690 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list) 2691 { 2692 struct priv *priv = dev->data->dev_private; 2693 struct rte_flow *flow; 2694 unsigned int i; 2695 2696 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) { 2697 struct mlx5_ind_table_ibv *ind_tbl = NULL; 2698 2699 if (flow->drop) { 2700 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) 2701 continue; 2702 claim_zero(mlx5_glue->destroy_flow 2703 (flow->frxq[HASH_RXQ_ETH].ibv_flow)); 2704 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL; 2705 DRV_LOG(DEBUG, "port %u flow %p removed", 2706 dev->data->port_id, (void *)flow); 2707 /* Next flow. */ 2708 continue; 2709 } 2710 /* Verify the flow has not already been cleaned. */ 2711 for (i = 0; i != hash_rxq_init_n; ++i) { 2712 if (!flow->frxq[i].ibv_flow) 2713 continue; 2714 /* 2715 * Indirection table may be necessary to remove the 2716 * flags in the Rx queues. 2717 * This helps to speed-up the process by avoiding 2718 * another loop. 2719 */ 2720 ind_tbl = flow->frxq[i].hrxq->ind_table; 2721 break; 2722 } 2723 if (i == hash_rxq_init_n) 2724 return; 2725 if (flow->mark) { 2726 assert(ind_tbl); 2727 for (i = 0; i != ind_tbl->queues_n; ++i) 2728 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0; 2729 } 2730 for (i = 0; i != hash_rxq_init_n; ++i) { 2731 if (!flow->frxq[i].ibv_flow) 2732 continue; 2733 claim_zero(mlx5_glue->destroy_flow 2734 (flow->frxq[i].ibv_flow)); 2735 flow->frxq[i].ibv_flow = NULL; 2736 mlx5_hrxq_release(dev, flow->frxq[i].hrxq); 2737 flow->frxq[i].hrxq = NULL; 2738 } 2739 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id, 2740 (void *)flow); 2741 } 2742 /* Cleanup Rx queue tunnel info. */ 2743 for (i = 0; i != priv->rxqs_n; ++i) { 2744 struct mlx5_rxq_data *q = (*priv->rxqs)[i]; 2745 struct mlx5_rxq_ctrl *rxq_ctrl = 2746 container_of(q, struct mlx5_rxq_ctrl, rxq); 2747 2748 if (!q) 2749 continue; 2750 memset((void *)rxq_ctrl->tunnel_types, 0, 2751 sizeof(rxq_ctrl->tunnel_types)); 2752 q->tunnel = 0; 2753 } 2754 } 2755 2756 /** 2757 * Add all flows. 2758 * 2759 * @param dev 2760 * Pointer to Ethernet device. 2761 * @param list 2762 * Pointer to a TAILQ flow list. 2763 * 2764 * @return 2765 * 0 on success, a negative errno value otherwise and rte_errno is set. 2766 */ 2767 int 2768 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list) 2769 { 2770 struct priv *priv = dev->data->dev_private; 2771 struct rte_flow *flow; 2772 2773 TAILQ_FOREACH(flow, list, next) { 2774 unsigned int i; 2775 2776 if (flow->drop) { 2777 flow->frxq[HASH_RXQ_ETH].ibv_flow = 2778 mlx5_glue->create_flow 2779 (priv->flow_drop_queue->qp, 2780 flow->frxq[HASH_RXQ_ETH].ibv_attr); 2781 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) { 2782 DRV_LOG(DEBUG, 2783 "port %u flow %p cannot be applied", 2784 dev->data->port_id, (void *)flow); 2785 rte_errno = EINVAL; 2786 return -rte_errno; 2787 } 2788 DRV_LOG(DEBUG, "port %u flow %p applied", 2789 dev->data->port_id, (void *)flow); 2790 /* Next flow. */ 2791 continue; 2792 } 2793 for (i = 0; i != hash_rxq_init_n; ++i) { 2794 if (!flow->frxq[i].ibv_attr) 2795 continue; 2796 flow->frxq[i].hrxq = 2797 mlx5_hrxq_get(dev, flow->rss_conf.key, 2798 flow->rss_conf.key_len, 2799 flow->frxq[i].hash_fields, 2800 flow->rss_conf.queue, 2801 flow->rss_conf.queue_num, 2802 flow->tunnel, 2803 flow->rss_conf.level); 2804 if (flow->frxq[i].hrxq) 2805 goto flow_create; 2806 flow->frxq[i].hrxq = 2807 mlx5_hrxq_new(dev, flow->rss_conf.key, 2808 flow->rss_conf.key_len, 2809 flow->frxq[i].hash_fields, 2810 flow->rss_conf.queue, 2811 flow->rss_conf.queue_num, 2812 flow->tunnel, 2813 flow->rss_conf.level); 2814 if (!flow->frxq[i].hrxq) { 2815 DRV_LOG(DEBUG, 2816 "port %u flow %p cannot create hash" 2817 " rxq", 2818 dev->data->port_id, (void *)flow); 2819 rte_errno = EINVAL; 2820 return -rte_errno; 2821 } 2822 flow_create: 2823 mlx5_flow_dump(dev, flow, i); 2824 flow->frxq[i].ibv_flow = 2825 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp, 2826 flow->frxq[i].ibv_attr); 2827 if (!flow->frxq[i].ibv_flow) { 2828 DRV_LOG(DEBUG, 2829 "port %u flow %p type %u cannot be" 2830 " applied", 2831 dev->data->port_id, (void *)flow, i); 2832 rte_errno = EINVAL; 2833 return -rte_errno; 2834 } 2835 } 2836 mlx5_flow_create_update_rxqs(dev, flow); 2837 } 2838 return 0; 2839 } 2840 2841 /** 2842 * Verify the flow list is empty 2843 * 2844 * @param dev 2845 * Pointer to Ethernet device. 2846 * 2847 * @return the number of flows not released. 2848 */ 2849 int 2850 mlx5_flow_verify(struct rte_eth_dev *dev) 2851 { 2852 struct priv *priv = dev->data->dev_private; 2853 struct rte_flow *flow; 2854 int ret = 0; 2855 2856 TAILQ_FOREACH(flow, &priv->flows, next) { 2857 DRV_LOG(DEBUG, "port %u flow %p still referenced", 2858 dev->data->port_id, (void *)flow); 2859 ++ret; 2860 } 2861 return ret; 2862 } 2863 2864 /** 2865 * Enable a control flow configured from the control plane. 2866 * 2867 * @param dev 2868 * Pointer to Ethernet device. 2869 * @param eth_spec 2870 * An Ethernet flow spec to apply. 2871 * @param eth_mask 2872 * An Ethernet flow mask to apply. 2873 * @param vlan_spec 2874 * A VLAN flow spec to apply. 2875 * @param vlan_mask 2876 * A VLAN flow mask to apply. 2877 * 2878 * @return 2879 * 0 on success, a negative errno value otherwise and rte_errno is set. 2880 */ 2881 int 2882 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, 2883 struct rte_flow_item_eth *eth_spec, 2884 struct rte_flow_item_eth *eth_mask, 2885 struct rte_flow_item_vlan *vlan_spec, 2886 struct rte_flow_item_vlan *vlan_mask) 2887 { 2888 struct priv *priv = dev->data->dev_private; 2889 const struct rte_flow_attr attr = { 2890 .ingress = 1, 2891 .priority = MLX5_CTRL_FLOW_PRIORITY, 2892 }; 2893 struct rte_flow_item items[] = { 2894 { 2895 .type = RTE_FLOW_ITEM_TYPE_ETH, 2896 .spec = eth_spec, 2897 .last = NULL, 2898 .mask = eth_mask, 2899 }, 2900 { 2901 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN : 2902 RTE_FLOW_ITEM_TYPE_END, 2903 .spec = vlan_spec, 2904 .last = NULL, 2905 .mask = vlan_mask, 2906 }, 2907 { 2908 .type = RTE_FLOW_ITEM_TYPE_END, 2909 }, 2910 }; 2911 uint16_t queue[priv->reta_idx_n]; 2912 struct rte_flow_action_rss action_rss = { 2913 .func = RTE_ETH_HASH_FUNCTION_DEFAULT, 2914 .level = 0, 2915 .types = priv->rss_conf.rss_hf, 2916 .key_len = priv->rss_conf.rss_key_len, 2917 .queue_num = priv->reta_idx_n, 2918 .key = priv->rss_conf.rss_key, 2919 .queue = queue, 2920 }; 2921 struct rte_flow_action actions[] = { 2922 { 2923 .type = RTE_FLOW_ACTION_TYPE_RSS, 2924 .conf = &action_rss, 2925 }, 2926 { 2927 .type = RTE_FLOW_ACTION_TYPE_END, 2928 }, 2929 }; 2930 struct rte_flow *flow; 2931 struct rte_flow_error error; 2932 unsigned int i; 2933 2934 if (!priv->reta_idx_n) { 2935 rte_errno = EINVAL; 2936 return -rte_errno; 2937 } 2938 for (i = 0; i != priv->reta_idx_n; ++i) 2939 queue[i] = (*priv->reta_idx)[i]; 2940 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items, 2941 actions, &error); 2942 if (!flow) 2943 return -rte_errno; 2944 return 0; 2945 } 2946 2947 /** 2948 * Enable a flow control configured from the control plane. 2949 * 2950 * @param dev 2951 * Pointer to Ethernet device. 2952 * @param eth_spec 2953 * An Ethernet flow spec to apply. 2954 * @param eth_mask 2955 * An Ethernet flow mask to apply. 2956 * 2957 * @return 2958 * 0 on success, a negative errno value otherwise and rte_errno is set. 2959 */ 2960 int 2961 mlx5_ctrl_flow(struct rte_eth_dev *dev, 2962 struct rte_flow_item_eth *eth_spec, 2963 struct rte_flow_item_eth *eth_mask) 2964 { 2965 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL); 2966 } 2967 2968 /** 2969 * Destroy a flow. 2970 * 2971 * @see rte_flow_destroy() 2972 * @see rte_flow_ops 2973 */ 2974 int 2975 mlx5_flow_destroy(struct rte_eth_dev *dev, 2976 struct rte_flow *flow, 2977 struct rte_flow_error *error __rte_unused) 2978 { 2979 struct priv *priv = dev->data->dev_private; 2980 2981 mlx5_flow_list_destroy(dev, &priv->flows, flow); 2982 return 0; 2983 } 2984 2985 /** 2986 * Destroy all flows. 2987 * 2988 * @see rte_flow_flush() 2989 * @see rte_flow_ops 2990 */ 2991 int 2992 mlx5_flow_flush(struct rte_eth_dev *dev, 2993 struct rte_flow_error *error __rte_unused) 2994 { 2995 struct priv *priv = dev->data->dev_private; 2996 2997 mlx5_flow_list_flush(dev, &priv->flows); 2998 return 0; 2999 } 3000 3001 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT 3002 /** 3003 * Query flow counter. 3004 * 3005 * @param cs 3006 * the counter set. 3007 * @param counter_value 3008 * returned data from the counter. 3009 * 3010 * @return 3011 * 0 on success, a negative errno value otherwise and rte_errno is set. 3012 */ 3013 static int 3014 mlx5_flow_query_count(struct ibv_counter_set *cs, 3015 struct mlx5_flow_counter_stats *counter_stats, 3016 struct rte_flow_query_count *query_count, 3017 struct rte_flow_error *error) 3018 { 3019 uint64_t counters[2]; 3020 struct ibv_query_counter_set_attr query_cs_attr = { 3021 .cs = cs, 3022 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE, 3023 }; 3024 struct ibv_counter_set_data query_out = { 3025 .out = counters, 3026 .outlen = 2 * sizeof(uint64_t), 3027 }; 3028 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out); 3029 3030 if (err) 3031 return rte_flow_error_set(error, err, 3032 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3033 NULL, 3034 "cannot read counter"); 3035 query_count->hits_set = 1; 3036 query_count->bytes_set = 1; 3037 query_count->hits = counters[0] - counter_stats->hits; 3038 query_count->bytes = counters[1] - counter_stats->bytes; 3039 if (query_count->reset) { 3040 counter_stats->hits = counters[0]; 3041 counter_stats->bytes = counters[1]; 3042 } 3043 return 0; 3044 } 3045 3046 /** 3047 * Query a flows. 3048 * 3049 * @see rte_flow_query() 3050 * @see rte_flow_ops 3051 */ 3052 int 3053 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused, 3054 struct rte_flow *flow, 3055 const struct rte_flow_action *action __rte_unused, 3056 void *data, 3057 struct rte_flow_error *error) 3058 { 3059 if (flow->cs) { 3060 int ret; 3061 3062 ret = mlx5_flow_query_count(flow->cs, 3063 &flow->counter_stats, 3064 (struct rte_flow_query_count *)data, 3065 error); 3066 if (ret) 3067 return ret; 3068 } else { 3069 return rte_flow_error_set(error, EINVAL, 3070 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3071 NULL, 3072 "no counter found for flow"); 3073 } 3074 return 0; 3075 } 3076 #endif 3077 3078 /** 3079 * Isolated mode. 3080 * 3081 * @see rte_flow_isolate() 3082 * @see rte_flow_ops 3083 */ 3084 int 3085 mlx5_flow_isolate(struct rte_eth_dev *dev, 3086 int enable, 3087 struct rte_flow_error *error) 3088 { 3089 struct priv *priv = dev->data->dev_private; 3090 3091 if (dev->data->dev_started) { 3092 rte_flow_error_set(error, EBUSY, 3093 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 3094 NULL, 3095 "port must be stopped first"); 3096 return -rte_errno; 3097 } 3098 priv->isolated = !!enable; 3099 if (enable) 3100 priv->dev->dev_ops = &mlx5_dev_ops_isolate; 3101 else 3102 priv->dev->dev_ops = &mlx5_dev_ops; 3103 return 0; 3104 } 3105 3106 /** 3107 * Convert a flow director filter to a generic flow. 3108 * 3109 * @param dev 3110 * Pointer to Ethernet device. 3111 * @param fdir_filter 3112 * Flow director filter to add. 3113 * @param attributes 3114 * Generic flow parameters structure. 3115 * 3116 * @return 3117 * 0 on success, a negative errno value otherwise and rte_errno is set. 3118 */ 3119 static int 3120 mlx5_fdir_filter_convert(struct rte_eth_dev *dev, 3121 const struct rte_eth_fdir_filter *fdir_filter, 3122 struct mlx5_fdir *attributes) 3123 { 3124 struct priv *priv = dev->data->dev_private; 3125 const struct rte_eth_fdir_input *input = &fdir_filter->input; 3126 const struct rte_eth_fdir_masks *mask = 3127 &dev->data->dev_conf.fdir_conf.mask; 3128 3129 /* Validate queue number. */ 3130 if (fdir_filter->action.rx_queue >= priv->rxqs_n) { 3131 DRV_LOG(ERR, "port %u invalid queue number %d", 3132 dev->data->port_id, fdir_filter->action.rx_queue); 3133 rte_errno = EINVAL; 3134 return -rte_errno; 3135 } 3136 attributes->attr.ingress = 1; 3137 attributes->items[0] = (struct rte_flow_item) { 3138 .type = RTE_FLOW_ITEM_TYPE_ETH, 3139 .spec = &attributes->l2, 3140 .mask = &attributes->l2_mask, 3141 }; 3142 switch (fdir_filter->action.behavior) { 3143 case RTE_ETH_FDIR_ACCEPT: 3144 attributes->actions[0] = (struct rte_flow_action){ 3145 .type = RTE_FLOW_ACTION_TYPE_QUEUE, 3146 .conf = &attributes->queue, 3147 }; 3148 break; 3149 case RTE_ETH_FDIR_REJECT: 3150 attributes->actions[0] = (struct rte_flow_action){ 3151 .type = RTE_FLOW_ACTION_TYPE_DROP, 3152 }; 3153 break; 3154 default: 3155 DRV_LOG(ERR, "port %u invalid behavior %d", 3156 dev->data->port_id, 3157 fdir_filter->action.behavior); 3158 rte_errno = ENOTSUP; 3159 return -rte_errno; 3160 } 3161 attributes->queue.index = fdir_filter->action.rx_queue; 3162 /* Handle L3. */ 3163 switch (fdir_filter->input.flow_type) { 3164 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 3165 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 3166 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 3167 attributes->l3.ipv4.hdr = (struct ipv4_hdr){ 3168 .src_addr = input->flow.ip4_flow.src_ip, 3169 .dst_addr = input->flow.ip4_flow.dst_ip, 3170 .time_to_live = input->flow.ip4_flow.ttl, 3171 .type_of_service = input->flow.ip4_flow.tos, 3172 .next_proto_id = input->flow.ip4_flow.proto, 3173 }; 3174 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){ 3175 .src_addr = mask->ipv4_mask.src_ip, 3176 .dst_addr = mask->ipv4_mask.dst_ip, 3177 .time_to_live = mask->ipv4_mask.ttl, 3178 .type_of_service = mask->ipv4_mask.tos, 3179 .next_proto_id = mask->ipv4_mask.proto, 3180 }; 3181 attributes->items[1] = (struct rte_flow_item){ 3182 .type = RTE_FLOW_ITEM_TYPE_IPV4, 3183 .spec = &attributes->l3, 3184 .mask = &attributes->l3_mask, 3185 }; 3186 break; 3187 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 3188 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 3189 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 3190 attributes->l3.ipv6.hdr = (struct ipv6_hdr){ 3191 .hop_limits = input->flow.ipv6_flow.hop_limits, 3192 .proto = input->flow.ipv6_flow.proto, 3193 }; 3194 3195 memcpy(attributes->l3.ipv6.hdr.src_addr, 3196 input->flow.ipv6_flow.src_ip, 3197 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 3198 memcpy(attributes->l3.ipv6.hdr.dst_addr, 3199 input->flow.ipv6_flow.dst_ip, 3200 RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); 3201 memcpy(attributes->l3_mask.ipv6.hdr.src_addr, 3202 mask->ipv6_mask.src_ip, 3203 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 3204 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr, 3205 mask->ipv6_mask.dst_ip, 3206 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); 3207 attributes->items[1] = (struct rte_flow_item){ 3208 .type = RTE_FLOW_ITEM_TYPE_IPV6, 3209 .spec = &attributes->l3, 3210 .mask = &attributes->l3_mask, 3211 }; 3212 break; 3213 default: 3214 DRV_LOG(ERR, "port %u invalid flow type%d", 3215 dev->data->port_id, fdir_filter->input.flow_type); 3216 rte_errno = ENOTSUP; 3217 return -rte_errno; 3218 } 3219 /* Handle L4. */ 3220 switch (fdir_filter->input.flow_type) { 3221 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: 3222 attributes->l4.udp.hdr = (struct udp_hdr){ 3223 .src_port = input->flow.udp4_flow.src_port, 3224 .dst_port = input->flow.udp4_flow.dst_port, 3225 }; 3226 attributes->l4_mask.udp.hdr = (struct udp_hdr){ 3227 .src_port = mask->src_port_mask, 3228 .dst_port = mask->dst_port_mask, 3229 }; 3230 attributes->items[2] = (struct rte_flow_item){ 3231 .type = RTE_FLOW_ITEM_TYPE_UDP, 3232 .spec = &attributes->l4, 3233 .mask = &attributes->l4_mask, 3234 }; 3235 break; 3236 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: 3237 attributes->l4.tcp.hdr = (struct tcp_hdr){ 3238 .src_port = input->flow.tcp4_flow.src_port, 3239 .dst_port = input->flow.tcp4_flow.dst_port, 3240 }; 3241 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){ 3242 .src_port = mask->src_port_mask, 3243 .dst_port = mask->dst_port_mask, 3244 }; 3245 attributes->items[2] = (struct rte_flow_item){ 3246 .type = RTE_FLOW_ITEM_TYPE_TCP, 3247 .spec = &attributes->l4, 3248 .mask = &attributes->l4_mask, 3249 }; 3250 break; 3251 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: 3252 attributes->l4.udp.hdr = (struct udp_hdr){ 3253 .src_port = input->flow.udp6_flow.src_port, 3254 .dst_port = input->flow.udp6_flow.dst_port, 3255 }; 3256 attributes->l4_mask.udp.hdr = (struct udp_hdr){ 3257 .src_port = mask->src_port_mask, 3258 .dst_port = mask->dst_port_mask, 3259 }; 3260 attributes->items[2] = (struct rte_flow_item){ 3261 .type = RTE_FLOW_ITEM_TYPE_UDP, 3262 .spec = &attributes->l4, 3263 .mask = &attributes->l4_mask, 3264 }; 3265 break; 3266 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: 3267 attributes->l4.tcp.hdr = (struct tcp_hdr){ 3268 .src_port = input->flow.tcp6_flow.src_port, 3269 .dst_port = input->flow.tcp6_flow.dst_port, 3270 }; 3271 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){ 3272 .src_port = mask->src_port_mask, 3273 .dst_port = mask->dst_port_mask, 3274 }; 3275 attributes->items[2] = (struct rte_flow_item){ 3276 .type = RTE_FLOW_ITEM_TYPE_TCP, 3277 .spec = &attributes->l4, 3278 .mask = &attributes->l4_mask, 3279 }; 3280 break; 3281 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: 3282 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: 3283 break; 3284 default: 3285 DRV_LOG(ERR, "port %u invalid flow type%d", 3286 dev->data->port_id, fdir_filter->input.flow_type); 3287 rte_errno = ENOTSUP; 3288 return -rte_errno; 3289 } 3290 return 0; 3291 } 3292 3293 /** 3294 * Add new flow director filter and store it in list. 3295 * 3296 * @param dev 3297 * Pointer to Ethernet device. 3298 * @param fdir_filter 3299 * Flow director filter to add. 3300 * 3301 * @return 3302 * 0 on success, a negative errno value otherwise and rte_errno is set. 3303 */ 3304 static int 3305 mlx5_fdir_filter_add(struct rte_eth_dev *dev, 3306 const struct rte_eth_fdir_filter *fdir_filter) 3307 { 3308 struct priv *priv = dev->data->dev_private; 3309 struct mlx5_fdir attributes = { 3310 .attr.group = 0, 3311 .l2_mask = { 3312 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00", 3313 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 3314 .type = 0, 3315 }, 3316 }; 3317 struct mlx5_flow_parse parser = { 3318 .layer = HASH_RXQ_ETH, 3319 }; 3320 struct rte_flow_error error; 3321 struct rte_flow *flow; 3322 int ret; 3323 3324 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes); 3325 if (ret) 3326 return ret; 3327 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items, 3328 attributes.actions, &error, &parser); 3329 if (ret) 3330 return ret; 3331 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr, 3332 attributes.items, attributes.actions, 3333 &error); 3334 if (flow) { 3335 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id, 3336 (void *)flow); 3337 return 0; 3338 } 3339 return -rte_errno; 3340 } 3341 3342 /** 3343 * Delete specific filter. 3344 * 3345 * @param dev 3346 * Pointer to Ethernet device. 3347 * @param fdir_filter 3348 * Filter to be deleted. 3349 * 3350 * @return 3351 * 0 on success, a negative errno value otherwise and rte_errno is set. 3352 */ 3353 static int 3354 mlx5_fdir_filter_delete(struct rte_eth_dev *dev, 3355 const struct rte_eth_fdir_filter *fdir_filter) 3356 { 3357 struct priv *priv = dev->data->dev_private; 3358 struct mlx5_fdir attributes = { 3359 .attr.group = 0, 3360 }; 3361 struct mlx5_flow_parse parser = { 3362 .create = 1, 3363 .layer = HASH_RXQ_ETH, 3364 }; 3365 struct rte_flow_error error; 3366 struct rte_flow *flow; 3367 unsigned int i; 3368 int ret; 3369 3370 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes); 3371 if (ret) 3372 return ret; 3373 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items, 3374 attributes.actions, &error, &parser); 3375 if (ret) 3376 goto exit; 3377 /* 3378 * Special case for drop action which is only set in the 3379 * specifications when the flow is created. In this situation the 3380 * drop specification is missing. 3381 */ 3382 if (parser.drop) { 3383 struct ibv_flow_spec_action_drop *drop; 3384 3385 drop = (void *)((uintptr_t)parser.queue[parser.layer].ibv_attr + 3386 parser.queue[parser.layer].offset); 3387 *drop = (struct ibv_flow_spec_action_drop){ 3388 .type = IBV_FLOW_SPEC_ACTION_DROP, 3389 .size = sizeof(struct ibv_flow_spec_action_drop), 3390 }; 3391 parser.queue[parser.layer].ibv_attr->num_of_specs++; 3392 } 3393 TAILQ_FOREACH(flow, &priv->flows, next) { 3394 struct ibv_flow_attr *attr; 3395 struct ibv_spec_header *attr_h; 3396 void *spec; 3397 struct ibv_flow_attr *flow_attr; 3398 struct ibv_spec_header *flow_h; 3399 void *flow_spec; 3400 unsigned int specs_n; 3401 3402 attr = parser.queue[parser.layer].ibv_attr; 3403 flow_attr = flow->frxq[parser.layer].ibv_attr; 3404 /* Compare first the attributes. */ 3405 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr))) 3406 continue; 3407 if (attr->num_of_specs == 0) 3408 continue; 3409 spec = (void *)((uintptr_t)attr + 3410 sizeof(struct ibv_flow_attr)); 3411 flow_spec = (void *)((uintptr_t)flow_attr + 3412 sizeof(struct ibv_flow_attr)); 3413 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs); 3414 for (i = 0; i != specs_n; ++i) { 3415 attr_h = spec; 3416 flow_h = flow_spec; 3417 if (memcmp(spec, flow_spec, 3418 RTE_MIN(attr_h->size, flow_h->size))) 3419 goto wrong_flow; 3420 spec = (void *)((uintptr_t)spec + attr_h->size); 3421 flow_spec = (void *)((uintptr_t)flow_spec + 3422 flow_h->size); 3423 } 3424 /* At this point, the flow match. */ 3425 break; 3426 wrong_flow: 3427 /* The flow does not match. */ 3428 continue; 3429 } 3430 ret = rte_errno; /* Save rte_errno before cleanup. */ 3431 if (flow) 3432 mlx5_flow_list_destroy(dev, &priv->flows, flow); 3433 exit: 3434 for (i = 0; i != hash_rxq_init_n; ++i) { 3435 if (parser.queue[i].ibv_attr) 3436 rte_free(parser.queue[i].ibv_attr); 3437 } 3438 rte_errno = ret; /* Restore rte_errno. */ 3439 return -rte_errno; 3440 } 3441 3442 /** 3443 * Update queue for specific filter. 3444 * 3445 * @param dev 3446 * Pointer to Ethernet device. 3447 * @param fdir_filter 3448 * Filter to be updated. 3449 * 3450 * @return 3451 * 0 on success, a negative errno value otherwise and rte_errno is set. 3452 */ 3453 static int 3454 mlx5_fdir_filter_update(struct rte_eth_dev *dev, 3455 const struct rte_eth_fdir_filter *fdir_filter) 3456 { 3457 int ret; 3458 3459 ret = mlx5_fdir_filter_delete(dev, fdir_filter); 3460 if (ret) 3461 return ret; 3462 return mlx5_fdir_filter_add(dev, fdir_filter); 3463 } 3464 3465 /** 3466 * Flush all filters. 3467 * 3468 * @param dev 3469 * Pointer to Ethernet device. 3470 */ 3471 static void 3472 mlx5_fdir_filter_flush(struct rte_eth_dev *dev) 3473 { 3474 struct priv *priv = dev->data->dev_private; 3475 3476 mlx5_flow_list_flush(dev, &priv->flows); 3477 } 3478 3479 /** 3480 * Get flow director information. 3481 * 3482 * @param dev 3483 * Pointer to Ethernet device. 3484 * @param[out] fdir_info 3485 * Resulting flow director information. 3486 */ 3487 static void 3488 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) 3489 { 3490 struct priv *priv = dev->data->dev_private; 3491 struct rte_eth_fdir_masks *mask = 3492 &priv->dev->data->dev_conf.fdir_conf.mask; 3493 3494 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode; 3495 fdir_info->guarant_spc = 0; 3496 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask)); 3497 fdir_info->max_flexpayload = 0; 3498 fdir_info->flow_types_mask[0] = 0; 3499 fdir_info->flex_payload_unit = 0; 3500 fdir_info->max_flex_payload_segment_num = 0; 3501 fdir_info->flex_payload_limit = 0; 3502 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf)); 3503 } 3504 3505 /** 3506 * Deal with flow director operations. 3507 * 3508 * @param dev 3509 * Pointer to Ethernet device. 3510 * @param filter_op 3511 * Operation to perform. 3512 * @param arg 3513 * Pointer to operation-specific structure. 3514 * 3515 * @return 3516 * 0 on success, a negative errno value otherwise and rte_errno is set. 3517 */ 3518 static int 3519 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, 3520 void *arg) 3521 { 3522 struct priv *priv = dev->data->dev_private; 3523 enum rte_fdir_mode fdir_mode = 3524 priv->dev->data->dev_conf.fdir_conf.mode; 3525 3526 if (filter_op == RTE_ETH_FILTER_NOP) 3527 return 0; 3528 if (fdir_mode != RTE_FDIR_MODE_PERFECT && 3529 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) { 3530 DRV_LOG(ERR, "port %u flow director mode %d not supported", 3531 dev->data->port_id, fdir_mode); 3532 rte_errno = EINVAL; 3533 return -rte_errno; 3534 } 3535 switch (filter_op) { 3536 case RTE_ETH_FILTER_ADD: 3537 return mlx5_fdir_filter_add(dev, arg); 3538 case RTE_ETH_FILTER_UPDATE: 3539 return mlx5_fdir_filter_update(dev, arg); 3540 case RTE_ETH_FILTER_DELETE: 3541 return mlx5_fdir_filter_delete(dev, arg); 3542 case RTE_ETH_FILTER_FLUSH: 3543 mlx5_fdir_filter_flush(dev); 3544 break; 3545 case RTE_ETH_FILTER_INFO: 3546 mlx5_fdir_info_get(dev, arg); 3547 break; 3548 default: 3549 DRV_LOG(DEBUG, "port %u unknown operation %u", 3550 dev->data->port_id, filter_op); 3551 rte_errno = EINVAL; 3552 return -rte_errno; 3553 } 3554 return 0; 3555 } 3556 3557 /** 3558 * Manage filter operations. 3559 * 3560 * @param dev 3561 * Pointer to Ethernet device structure. 3562 * @param filter_type 3563 * Filter type. 3564 * @param filter_op 3565 * Operation to perform. 3566 * @param arg 3567 * Pointer to operation-specific structure. 3568 * 3569 * @return 3570 * 0 on success, a negative errno value otherwise and rte_errno is set. 3571 */ 3572 int 3573 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, 3574 enum rte_filter_type filter_type, 3575 enum rte_filter_op filter_op, 3576 void *arg) 3577 { 3578 switch (filter_type) { 3579 case RTE_ETH_FILTER_GENERIC: 3580 if (filter_op != RTE_ETH_FILTER_GET) { 3581 rte_errno = EINVAL; 3582 return -rte_errno; 3583 } 3584 *(const void **)arg = &mlx5_flow_ops; 3585 return 0; 3586 case RTE_ETH_FILTER_FDIR: 3587 return mlx5_fdir_ctrl_func(dev, filter_op, arg); 3588 default: 3589 DRV_LOG(ERR, "port %u filter type (%d) not supported", 3590 dev->data->port_id, filter_type); 3591 rte_errno = ENOTSUP; 3592 return -rte_errno; 3593 } 3594 return 0; 3595 } 3596 3597 /** 3598 * Detect number of Verbs flow priorities supported. 3599 * 3600 * @param dev 3601 * Pointer to Ethernet device. 3602 * 3603 * @return 3604 * number of supported Verbs flow priority. 3605 */ 3606 unsigned int 3607 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev) 3608 { 3609 struct priv *priv = dev->data->dev_private; 3610 unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8; 3611 struct { 3612 struct ibv_flow_attr attr; 3613 struct ibv_flow_spec_eth eth; 3614 struct ibv_flow_spec_action_drop drop; 3615 } flow_attr = { 3616 .attr = { 3617 .num_of_specs = 2, 3618 }, 3619 .eth = { 3620 .type = IBV_FLOW_SPEC_ETH, 3621 .size = sizeof(struct ibv_flow_spec_eth), 3622 }, 3623 .drop = { 3624 .size = sizeof(struct ibv_flow_spec_action_drop), 3625 .type = IBV_FLOW_SPEC_ACTION_DROP, 3626 }, 3627 }; 3628 struct ibv_flow *flow; 3629 3630 do { 3631 flow_attr.attr.priority = verb_priorities - 1; 3632 flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp, 3633 &flow_attr.attr); 3634 if (flow) { 3635 claim_zero(mlx5_glue->destroy_flow(flow)); 3636 /* Try more priorities. */ 3637 verb_priorities *= 2; 3638 } else { 3639 /* Failed, restore last right number. */ 3640 verb_priorities /= 2; 3641 break; 3642 } 3643 } while (1); 3644 DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d," 3645 " user flow priorities: %d", 3646 dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY); 3647 return verb_priorities; 3648 } 3649