1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2017 6WIND S.A. 3 * Copyright 2017 Mellanox Technologies, Ltd 4 */ 5 6 #include <errno.h> 7 #include <string.h> 8 #include <unistd.h> 9 #include <sys/queue.h> 10 #include <sys/resource.h> 11 12 #include <rte_byteorder.h> 13 #include <rte_jhash.h> 14 #include <rte_thash.h> 15 #include <rte_random.h> 16 #include <rte_malloc.h> 17 #include <rte_eth_tap.h> 18 #include <rte_uuid.h> 19 20 #include <tap_flow.h> 21 #include <tap_tcmsgs.h> 22 #include <tap_rss.h> 23 24 #ifdef HAVE_BPF_RSS 25 /* Workaround for warning in bpftool generated skeleton code */ 26 #pragma GCC diagnostic push 27 #pragma GCC diagnostic ignored "-Wcast-qual" 28 #include "tap_rss.skel.h" 29 #pragma GCC diagnostic pop 30 #endif 31 32 #define ISOLATE_HANDLE 1 33 #define REMOTE_PROMISCUOUS_HANDLE 2 34 35 struct rte_flow { 36 LIST_ENTRY(rte_flow) next; /* Pointer to the next rte_flow structure */ 37 struct rte_flow *remote_flow; /* associated remote flow */ 38 struct nlmsg msg; 39 }; 40 41 struct convert_data { 42 uint16_t eth_type; 43 uint16_t ip_proto; 44 uint8_t vlan; 45 struct rte_flow *flow; 46 }; 47 48 struct remote_rule { 49 struct rte_flow_attr attr; 50 struct rte_flow_item items[2]; 51 struct rte_flow_action actions[2]; 52 int mirred; 53 }; 54 55 struct action_data { 56 char id[16]; 57 58 union { 59 struct tc_gact gact; 60 struct tc_mirred mirred; 61 struct skbedit { 62 struct tc_skbedit skbedit; 63 uint16_t queue; 64 uint32_t mark; 65 } skbedit; 66 #ifdef HAVE_BPF_RSS 67 struct bpf { 68 struct tc_act_bpf bpf; 69 uint32_t map_key; 70 int bpf_fd; 71 const char *annotation; 72 } bpf; 73 #endif 74 }; 75 }; 76 77 static int tap_flow_create_eth(const struct rte_flow_item *item, void *data); 78 static int tap_flow_create_vlan(const struct rte_flow_item *item, void *data); 79 static int tap_flow_create_ipv4(const struct rte_flow_item *item, void *data); 80 static int tap_flow_create_ipv6(const struct rte_flow_item *item, void *data); 81 static int tap_flow_create_udp(const struct rte_flow_item *item, void *data); 82 static int tap_flow_create_tcp(const struct rte_flow_item *item, void *data); 83 static int 84 tap_flow_validate(struct rte_eth_dev *dev, 85 const struct rte_flow_attr *attr, 86 const struct rte_flow_item items[], 87 const struct rte_flow_action actions[], 88 struct rte_flow_error *error); 89 90 static struct rte_flow * 91 tap_flow_create(struct rte_eth_dev *dev, 92 const struct rte_flow_attr *attr, 93 const struct rte_flow_item items[], 94 const struct rte_flow_action actions[], 95 struct rte_flow_error *error); 96 97 static void 98 tap_flow_free(struct pmd_internals *pmd, 99 struct rte_flow *flow); 100 101 static int 102 tap_flow_destroy(struct rte_eth_dev *dev, 103 struct rte_flow *flow, 104 struct rte_flow_error *error); 105 106 static int 107 tap_flow_isolate(struct rte_eth_dev *dev, 108 int set, 109 struct rte_flow_error *error); 110 111 #ifdef HAVE_BPF_RSS 112 static int rss_enable(struct pmd_internals *pmd, struct rte_flow_error *error); 113 static int rss_add_actions(struct rte_flow *flow, struct pmd_internals *pmd, 114 const struct rte_flow_action_rss *rss, 115 struct rte_flow_error *error); 116 #endif 117 118 static const struct rte_flow_ops tap_flow_ops = { 119 .validate = tap_flow_validate, 120 .create = tap_flow_create, 121 .destroy = tap_flow_destroy, 122 .flush = tap_flow_flush, 123 .isolate = tap_flow_isolate, 124 }; 125 126 /* Static initializer for items. */ 127 #define ITEMS(...) \ 128 (const enum rte_flow_item_type []){ \ 129 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \ 130 } 131 132 /* Structure to generate a simple graph of layers supported by the NIC. */ 133 struct tap_flow_items { 134 /* Bit-mask corresponding to what is supported for this item. */ 135 const void *mask; 136 const unsigned int mask_sz; /* Bit-mask size in bytes. */ 137 /* 138 * Bit-mask corresponding to the default mask, if none is provided 139 * along with the item. 140 */ 141 const void *default_mask; 142 /** 143 * Conversion function from rte_flow to netlink attributes. 144 * 145 * @param item 146 * rte_flow item to convert. 147 * @param data 148 * Internal structure to store the conversion. 149 * 150 * @return 151 * 0 on success, negative value otherwise. 152 */ 153 int (*convert)(const struct rte_flow_item *item, void *data); 154 /** List of possible following items. */ 155 const enum rte_flow_item_type *const items; 156 }; 157 158 /* Graph of supported items and associated actions. */ 159 static const struct tap_flow_items tap_flow_items[] = { 160 [RTE_FLOW_ITEM_TYPE_END] = { 161 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH), 162 }, 163 [RTE_FLOW_ITEM_TYPE_ETH] = { 164 .items = ITEMS( 165 RTE_FLOW_ITEM_TYPE_VLAN, 166 RTE_FLOW_ITEM_TYPE_IPV4, 167 RTE_FLOW_ITEM_TYPE_IPV6), 168 .mask = &(const struct rte_flow_item_eth){ 169 .hdr.dst_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, 170 .hdr.src_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, 171 .hdr.ether_type = -1, 172 }, 173 .mask_sz = sizeof(struct rte_flow_item_eth), 174 .default_mask = &rte_flow_item_eth_mask, 175 .convert = tap_flow_create_eth, 176 }, 177 [RTE_FLOW_ITEM_TYPE_VLAN] = { 178 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4, 179 RTE_FLOW_ITEM_TYPE_IPV6), 180 .mask = &(const struct rte_flow_item_vlan){ 181 /* DEI matching is not supported */ 182 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN 183 .hdr.vlan_tci = 0xffef, 184 #else 185 .hdr.vlan_tci = 0xefff, 186 #endif 187 .hdr.eth_proto = -1, 188 }, 189 .mask_sz = sizeof(struct rte_flow_item_vlan), 190 .default_mask = &rte_flow_item_vlan_mask, 191 .convert = tap_flow_create_vlan, 192 }, 193 [RTE_FLOW_ITEM_TYPE_IPV4] = { 194 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP, 195 RTE_FLOW_ITEM_TYPE_TCP), 196 .mask = &(const struct rte_flow_item_ipv4){ 197 .hdr = { 198 .src_addr = -1, 199 .dst_addr = -1, 200 .next_proto_id = -1, 201 }, 202 }, 203 .mask_sz = sizeof(struct rte_flow_item_ipv4), 204 .default_mask = &rte_flow_item_ipv4_mask, 205 .convert = tap_flow_create_ipv4, 206 }, 207 [RTE_FLOW_ITEM_TYPE_IPV6] = { 208 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP, 209 RTE_FLOW_ITEM_TYPE_TCP), 210 .mask = &(const struct rte_flow_item_ipv6){ 211 .hdr = { 212 .src_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 213 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, 214 .dst_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 215 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, 216 .proto = -1, 217 }, 218 }, 219 .mask_sz = sizeof(struct rte_flow_item_ipv6), 220 .default_mask = &rte_flow_item_ipv6_mask, 221 .convert = tap_flow_create_ipv6, 222 }, 223 [RTE_FLOW_ITEM_TYPE_UDP] = { 224 .mask = &(const struct rte_flow_item_udp){ 225 .hdr = { 226 .src_port = -1, 227 .dst_port = -1, 228 }, 229 }, 230 .mask_sz = sizeof(struct rte_flow_item_udp), 231 .default_mask = &rte_flow_item_udp_mask, 232 .convert = tap_flow_create_udp, 233 }, 234 [RTE_FLOW_ITEM_TYPE_TCP] = { 235 .mask = &(const struct rte_flow_item_tcp){ 236 .hdr = { 237 .src_port = -1, 238 .dst_port = -1, 239 }, 240 }, 241 .mask_sz = sizeof(struct rte_flow_item_tcp), 242 .default_mask = &rte_flow_item_tcp_mask, 243 .convert = tap_flow_create_tcp, 244 }, 245 }; 246 247 /* 248 * TC rules, by growing priority 249 * 250 * Remote netdevice Tap netdevice 251 * +-------------+-------------+ +-------------+-------------+ 252 * | Ingress | Egress | | Ingress | Egress | 253 * |-------------|-------------| |-------------|-------------| 254 * | | \ / | | | REMOTE TX | prio 1 255 * | | \ / | | | \ / | prio 2 256 * | EXPLICIT | \ / | | EXPLICIT | \ / | . 257 * | | \ / | | | \ / | . 258 * | RULES | X | | RULES | X | . 259 * | . | / \ | | . | / \ | . 260 * | . | / \ | | . | / \ | . 261 * | . | / \ | | . | / \ | . 262 * | . | / \ | | . | / \ | . 263 * 264 * .... .... .... .... 265 * 266 * | . | \ / | | . | \ / | . 267 * | . | \ / | | . | \ / | . 268 * | | \ / | | | \ / | 269 * | LOCAL_MAC | \ / | | \ / | \ / | last prio - 5 270 * | PROMISC | X | | \ / | X | last prio - 4 271 * | ALLMULTI | / \ | | X | / \ | last prio - 3 272 * | BROADCAST | / \ | | / \ | / \ | last prio - 2 273 * | BROADCASTV6 | / \ | | / \ | / \ | last prio - 1 274 * | xx | / \ | | ISOLATE | / \ | last prio 275 * +-------------+-------------+ +-------------+-------------+ 276 * 277 * The implicit flow rules are stored in a list in with mandatorily the last two 278 * being the ISOLATE and REMOTE_TX rules. e.g.: 279 * 280 * LOCAL_MAC -> BROADCAST -> BROADCASTV6 -> REMOTE_TX -> ISOLATE -> NULL 281 * 282 * That enables tap_flow_isolate() to remove implicit rules by popping the list 283 * head and remove it as long as it applies on the remote netdevice. The 284 * implicit rule for TX redirection is not removed, as isolate concerns only 285 * incoming traffic. 286 */ 287 288 static struct remote_rule implicit_rte_flows[TAP_REMOTE_MAX_IDX] = { 289 [TAP_REMOTE_LOCAL_MAC] = { 290 .attr = { 291 .group = MAX_GROUP, 292 .priority = PRIORITY_MASK - TAP_REMOTE_LOCAL_MAC, 293 .ingress = 1, 294 }, 295 .items[0] = { 296 .type = RTE_FLOW_ITEM_TYPE_ETH, 297 .mask = &(const struct rte_flow_item_eth){ 298 .hdr.dst_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, 299 }, 300 }, 301 .items[1] = { 302 .type = RTE_FLOW_ITEM_TYPE_END, 303 }, 304 .mirred = TCA_EGRESS_REDIR, 305 }, 306 [TAP_REMOTE_BROADCAST] = { 307 .attr = { 308 .group = MAX_GROUP, 309 .priority = PRIORITY_MASK - TAP_REMOTE_BROADCAST, 310 .ingress = 1, 311 }, 312 .items[0] = { 313 .type = RTE_FLOW_ITEM_TYPE_ETH, 314 .mask = &(const struct rte_flow_item_eth){ 315 .hdr.dst_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, 316 }, 317 .spec = &(const struct rte_flow_item_eth){ 318 .hdr.dst_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, 319 }, 320 }, 321 .items[1] = { 322 .type = RTE_FLOW_ITEM_TYPE_END, 323 }, 324 .mirred = TCA_EGRESS_MIRROR, 325 }, 326 [TAP_REMOTE_BROADCASTV6] = { 327 .attr = { 328 .group = MAX_GROUP, 329 .priority = PRIORITY_MASK - TAP_REMOTE_BROADCASTV6, 330 .ingress = 1, 331 }, 332 .items[0] = { 333 .type = RTE_FLOW_ITEM_TYPE_ETH, 334 .mask = &(const struct rte_flow_item_eth){ 335 .hdr.dst_addr.addr_bytes = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 }, 336 }, 337 .spec = &(const struct rte_flow_item_eth){ 338 .hdr.dst_addr.addr_bytes = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 }, 339 }, 340 }, 341 .items[1] = { 342 .type = RTE_FLOW_ITEM_TYPE_END, 343 }, 344 .mirred = TCA_EGRESS_MIRROR, 345 }, 346 [TAP_REMOTE_PROMISC] = { 347 .attr = { 348 .group = MAX_GROUP, 349 .priority = PRIORITY_MASK - TAP_REMOTE_PROMISC, 350 .ingress = 1, 351 }, 352 .items[0] = { 353 .type = RTE_FLOW_ITEM_TYPE_VOID, 354 }, 355 .items[1] = { 356 .type = RTE_FLOW_ITEM_TYPE_END, 357 }, 358 .mirred = TCA_EGRESS_MIRROR, 359 }, 360 [TAP_REMOTE_ALLMULTI] = { 361 .attr = { 362 .group = MAX_GROUP, 363 .priority = PRIORITY_MASK - TAP_REMOTE_ALLMULTI, 364 .ingress = 1, 365 }, 366 .items[0] = { 367 .type = RTE_FLOW_ITEM_TYPE_ETH, 368 .mask = &(const struct rte_flow_item_eth){ 369 .hdr.dst_addr.addr_bytes = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 }, 370 }, 371 .spec = &(const struct rte_flow_item_eth){ 372 .hdr.dst_addr.addr_bytes = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 }, 373 }, 374 }, 375 .items[1] = { 376 .type = RTE_FLOW_ITEM_TYPE_END, 377 }, 378 .mirred = TCA_EGRESS_MIRROR, 379 }, 380 [TAP_REMOTE_TX] = { 381 .attr = { 382 .group = 0, 383 .priority = TAP_REMOTE_TX, 384 .egress = 1, 385 }, 386 .items[0] = { 387 .type = RTE_FLOW_ITEM_TYPE_VOID, 388 }, 389 .items[1] = { 390 .type = RTE_FLOW_ITEM_TYPE_END, 391 }, 392 .mirred = TCA_EGRESS_MIRROR, 393 }, 394 [TAP_ISOLATE] = { 395 .attr = { 396 .group = MAX_GROUP, 397 .priority = PRIORITY_MASK - TAP_ISOLATE, 398 .ingress = 1, 399 }, 400 .items[0] = { 401 .type = RTE_FLOW_ITEM_TYPE_VOID, 402 }, 403 .items[1] = { 404 .type = RTE_FLOW_ITEM_TYPE_END, 405 }, 406 }, 407 }; 408 409 /** 410 * Make as much checks as possible on an Ethernet item, and if a flow is 411 * provided, fill it appropriately with Ethernet info. 412 * 413 * @param[in] item 414 * Item specification. 415 * @param[in, out] data 416 * Additional data structure to tell next layers we've been here. 417 * 418 * @return 419 * 0 if checks are alright, -1 otherwise. 420 */ 421 static int 422 tap_flow_create_eth(const struct rte_flow_item *item, void *data) 423 { 424 struct convert_data *info = (struct convert_data *)data; 425 const struct rte_flow_item_eth *spec = item->spec; 426 const struct rte_flow_item_eth *mask = item->mask; 427 struct rte_flow *flow = info->flow; 428 struct nlmsg *msg; 429 430 /* use default mask if none provided */ 431 if (!mask) 432 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_ETH].default_mask; 433 /* TC does not support eth_type masking. Only accept if exact match. */ 434 if (mask->hdr.ether_type && mask->hdr.ether_type != 0xffff) 435 return -1; 436 if (!spec) 437 return 0; 438 /* store eth_type for consistency if ipv4/6 pattern item comes next */ 439 if (spec->hdr.ether_type & mask->hdr.ether_type) 440 info->eth_type = spec->hdr.ether_type; 441 if (!flow) 442 return 0; 443 msg = &flow->msg; 444 if (!rte_is_zero_ether_addr(&mask->hdr.dst_addr)) { 445 tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_DST, 446 RTE_ETHER_ADDR_LEN, 447 &spec->hdr.dst_addr.addr_bytes); 448 tap_nlattr_add(&msg->nh, 449 TCA_FLOWER_KEY_ETH_DST_MASK, RTE_ETHER_ADDR_LEN, 450 &mask->hdr.dst_addr.addr_bytes); 451 } 452 if (!rte_is_zero_ether_addr(&mask->hdr.src_addr)) { 453 tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_SRC, 454 RTE_ETHER_ADDR_LEN, 455 &spec->hdr.src_addr.addr_bytes); 456 tap_nlattr_add(&msg->nh, 457 TCA_FLOWER_KEY_ETH_SRC_MASK, RTE_ETHER_ADDR_LEN, 458 &mask->hdr.src_addr.addr_bytes); 459 } 460 return 0; 461 } 462 463 /** 464 * Make as much checks as possible on a VLAN item, and if a flow is provided, 465 * fill it appropriately with VLAN info. 466 * 467 * @param[in] item 468 * Item specification. 469 * @param[in, out] data 470 * Additional data structure to tell next layers we've been here. 471 * 472 * @return 473 * 0 if checks are alright, -1 otherwise. 474 */ 475 static int 476 tap_flow_create_vlan(const struct rte_flow_item *item, void *data) 477 { 478 struct convert_data *info = (struct convert_data *)data; 479 const struct rte_flow_item_vlan *spec = item->spec; 480 const struct rte_flow_item_vlan *mask = item->mask; 481 struct rte_flow *flow = info->flow; 482 struct nlmsg *msg; 483 484 /* use default mask if none provided */ 485 if (!mask) 486 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_VLAN].default_mask; 487 /* Outer TPID cannot be matched. */ 488 if (info->eth_type) 489 return -1; 490 /* Double-tagging not supported. */ 491 if (info->vlan) 492 return -1; 493 info->vlan = 1; 494 if (mask->hdr.eth_proto) { 495 /* TC does not support partial eth_type masking */ 496 if (mask->hdr.eth_proto != RTE_BE16(0xffff)) 497 return -1; 498 info->eth_type = spec->hdr.eth_proto; 499 } 500 if (!flow) 501 return 0; 502 msg = &flow->msg; 503 msg->t.tcm_info = TC_H_MAKE(msg->t.tcm_info, htons(ETH_P_8021Q)); 504 #define VLAN_PRIO(tci) ((tci) >> 13) 505 #define VLAN_ID(tci) ((tci) & 0xfff) 506 if (!spec) 507 return 0; 508 if (spec->hdr.vlan_tci) { 509 uint16_t tci = ntohs(spec->hdr.vlan_tci) & mask->hdr.vlan_tci; 510 uint16_t prio = VLAN_PRIO(tci); 511 uint8_t vid = VLAN_ID(tci); 512 513 if (prio) 514 tap_nlattr_add8(&msg->nh, 515 TCA_FLOWER_KEY_VLAN_PRIO, prio); 516 if (vid) 517 tap_nlattr_add16(&msg->nh, 518 TCA_FLOWER_KEY_VLAN_ID, vid); 519 } 520 return 0; 521 } 522 523 /** 524 * Make as much checks as possible on an IPv4 item, and if a flow is provided, 525 * fill it appropriately with IPv4 info. 526 * 527 * @param[in] item 528 * Item specification. 529 * @param[in, out] data 530 * Additional data structure to tell next layers we've been here. 531 * 532 * @return 533 * 0 if checks are alright, -1 otherwise. 534 */ 535 static int 536 tap_flow_create_ipv4(const struct rte_flow_item *item, void *data) 537 { 538 struct convert_data *info = (struct convert_data *)data; 539 const struct rte_flow_item_ipv4 *spec = item->spec; 540 const struct rte_flow_item_ipv4 *mask = item->mask; 541 struct rte_flow *flow = info->flow; 542 struct nlmsg *msg; 543 544 /* use default mask if none provided */ 545 if (!mask) 546 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_IPV4].default_mask; 547 /* check that previous eth type is compatible with ipv4 */ 548 if (info->eth_type && info->eth_type != htons(ETH_P_IP)) 549 return -1; 550 /* store ip_proto for consistency if udp/tcp pattern item comes next */ 551 if (spec) 552 info->ip_proto = spec->hdr.next_proto_id; 553 if (!flow) 554 return 0; 555 msg = &flow->msg; 556 if (!info->eth_type) 557 info->eth_type = htons(ETH_P_IP); 558 if (!spec) 559 return 0; 560 if (mask->hdr.dst_addr) { 561 tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST, 562 spec->hdr.dst_addr); 563 tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST_MASK, 564 mask->hdr.dst_addr); 565 } 566 if (mask->hdr.src_addr) { 567 tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC, 568 spec->hdr.src_addr); 569 tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC_MASK, 570 mask->hdr.src_addr); 571 } 572 if (spec->hdr.next_proto_id) 573 tap_nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, 574 spec->hdr.next_proto_id); 575 return 0; 576 } 577 578 /** 579 * Make as much checks as possible on an IPv6 item, and if a flow is provided, 580 * fill it appropriately with IPv6 info. 581 * 582 * @param[in] item 583 * Item specification. 584 * @param[in, out] data 585 * Additional data structure to tell next layers we've been here. 586 * 587 * @return 588 * 0 if checks are alright, -1 otherwise. 589 */ 590 static int 591 tap_flow_create_ipv6(const struct rte_flow_item *item, void *data) 592 { 593 struct convert_data *info = (struct convert_data *)data; 594 const struct rte_flow_item_ipv6 *spec = item->spec; 595 const struct rte_flow_item_ipv6 *mask = item->mask; 596 struct rte_flow *flow = info->flow; 597 uint8_t empty_addr[16] = { 0 }; 598 struct nlmsg *msg; 599 600 /* use default mask if none provided */ 601 if (!mask) 602 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_IPV6].default_mask; 603 /* check that previous eth type is compatible with ipv6 */ 604 if (info->eth_type && info->eth_type != htons(ETH_P_IPV6)) 605 return -1; 606 /* store ip_proto for consistency if udp/tcp pattern item comes next */ 607 if (spec) 608 info->ip_proto = spec->hdr.proto; 609 if (!flow) 610 return 0; 611 msg = &flow->msg; 612 if (!info->eth_type) 613 info->eth_type = htons(ETH_P_IPV6); 614 if (!spec) 615 return 0; 616 if (memcmp(mask->hdr.dst_addr, empty_addr, 16)) { 617 tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST, 618 sizeof(spec->hdr.dst_addr), &spec->hdr.dst_addr); 619 tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST_MASK, 620 sizeof(mask->hdr.dst_addr), &mask->hdr.dst_addr); 621 } 622 if (memcmp(mask->hdr.src_addr, empty_addr, 16)) { 623 tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC, 624 sizeof(spec->hdr.src_addr), &spec->hdr.src_addr); 625 tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC_MASK, 626 sizeof(mask->hdr.src_addr), &mask->hdr.src_addr); 627 } 628 if (spec->hdr.proto) 629 tap_nlattr_add8(&msg->nh, 630 TCA_FLOWER_KEY_IP_PROTO, spec->hdr.proto); 631 return 0; 632 } 633 634 /** 635 * Make as much checks as possible on a UDP item, and if a flow is provided, 636 * fill it appropriately with UDP info. 637 * 638 * @param[in] item 639 * Item specification. 640 * @param[in, out] data 641 * Additional data structure to tell next layers we've been here. 642 * 643 * @return 644 * 0 if checks are alright, -1 otherwise. 645 */ 646 static int 647 tap_flow_create_udp(const struct rte_flow_item *item, void *data) 648 { 649 struct convert_data *info = (struct convert_data *)data; 650 const struct rte_flow_item_udp *spec = item->spec; 651 const struct rte_flow_item_udp *mask = item->mask; 652 struct rte_flow *flow = info->flow; 653 struct nlmsg *msg; 654 655 /* use default mask if none provided */ 656 if (!mask) 657 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_UDP].default_mask; 658 /* check that previous ip_proto is compatible with udp */ 659 if (info->ip_proto && info->ip_proto != IPPROTO_UDP) 660 return -1; 661 /* TC does not support UDP port masking. Only accept if exact match. */ 662 if ((mask->hdr.src_port && mask->hdr.src_port != 0xffff) || 663 (mask->hdr.dst_port && mask->hdr.dst_port != 0xffff)) 664 return -1; 665 if (!flow) 666 return 0; 667 msg = &flow->msg; 668 tap_nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_UDP); 669 if (!spec) 670 return 0; 671 if (mask->hdr.dst_port) 672 tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_DST, 673 spec->hdr.dst_port); 674 if (mask->hdr.src_port) 675 tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_SRC, 676 spec->hdr.src_port); 677 return 0; 678 } 679 680 /** 681 * Make as much checks as possible on a TCP item, and if a flow is provided, 682 * fill it appropriately with TCP info. 683 * 684 * @param[in] item 685 * Item specification. 686 * @param[in, out] data 687 * Additional data structure to tell next layers we've been here. 688 * 689 * @return 690 * 0 if checks are alright, -1 otherwise. 691 */ 692 static int 693 tap_flow_create_tcp(const struct rte_flow_item *item, void *data) 694 { 695 struct convert_data *info = (struct convert_data *)data; 696 const struct rte_flow_item_tcp *spec = item->spec; 697 const struct rte_flow_item_tcp *mask = item->mask; 698 struct rte_flow *flow = info->flow; 699 struct nlmsg *msg; 700 701 /* use default mask if none provided */ 702 if (!mask) 703 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_TCP].default_mask; 704 /* check that previous ip_proto is compatible with tcp */ 705 if (info->ip_proto && info->ip_proto != IPPROTO_TCP) 706 return -1; 707 /* TC does not support TCP port masking. Only accept if exact match. */ 708 if ((mask->hdr.src_port && mask->hdr.src_port != 0xffff) || 709 (mask->hdr.dst_port && mask->hdr.dst_port != 0xffff)) 710 return -1; 711 if (!flow) 712 return 0; 713 msg = &flow->msg; 714 tap_nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_TCP); 715 if (!spec) 716 return 0; 717 if (mask->hdr.dst_port) 718 tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_DST, 719 spec->hdr.dst_port); 720 if (mask->hdr.src_port) 721 tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_SRC, 722 spec->hdr.src_port); 723 return 0; 724 } 725 726 /** 727 * Check support for a given item. 728 * 729 * @param[in] item 730 * Item specification. 731 * @param size 732 * Bit-Mask size in bytes. 733 * @param[in] supported_mask 734 * Bit-mask covering supported fields to compare with spec, last and mask in 735 * \item. 736 * @param[in] default_mask 737 * Bit-mask default mask if none is provided in \item. 738 * 739 * @return 740 * 0 on success. 741 */ 742 static int 743 tap_flow_item_validate(const struct rte_flow_item *item, 744 unsigned int size, 745 const uint8_t *supported_mask, 746 const uint8_t *default_mask) 747 { 748 int ret = 0; 749 750 /* An empty layer is allowed, as long as all fields are NULL */ 751 if (!item->spec && (item->mask || item->last)) 752 return -1; 753 /* Is the item spec compatible with what the NIC supports? */ 754 if (item->spec && !item->mask) { 755 unsigned int i; 756 const uint8_t *spec = item->spec; 757 758 for (i = 0; i < size; ++i) 759 if ((spec[i] | supported_mask[i]) != supported_mask[i]) 760 return -1; 761 /* Is the default mask compatible with what the NIC supports? */ 762 for (i = 0; i < size; i++) 763 if ((default_mask[i] | supported_mask[i]) != 764 supported_mask[i]) 765 return -1; 766 } 767 /* Is the item last compatible with what the NIC supports? */ 768 if (item->last && !item->mask) { 769 unsigned int i; 770 const uint8_t *spec = item->last; 771 772 for (i = 0; i < size; ++i) 773 if ((spec[i] | supported_mask[i]) != supported_mask[i]) 774 return -1; 775 } 776 /* Is the item mask compatible with what the NIC supports? */ 777 if (item->mask) { 778 unsigned int i; 779 const uint8_t *spec = item->mask; 780 781 for (i = 0; i < size; ++i) 782 if ((spec[i] | supported_mask[i]) != supported_mask[i]) 783 return -1; 784 } 785 /** 786 * Once masked, Are item spec and item last equal? 787 * TC does not support range so anything else is invalid. 788 */ 789 if (item->spec && item->last) { 790 uint8_t spec[size]; 791 uint8_t last[size]; 792 const uint8_t *apply = default_mask; 793 unsigned int i; 794 795 if (item->mask) 796 apply = item->mask; 797 for (i = 0; i < size; ++i) { 798 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i]; 799 last[i] = ((const uint8_t *)item->last)[i] & apply[i]; 800 } 801 ret = memcmp(spec, last, size); 802 } 803 return ret; 804 } 805 806 /** 807 * Configure the kernel with a TC action and its configured parameters 808 * Handled actions: "gact", "mirred", "skbedit", "bpf" 809 * 810 * @param[in] flow 811 * Pointer to rte flow containing the netlink message 812 * 813 * @param[in, out] act_index 814 * Pointer to action sequence number in the TC command 815 * 816 * @param[in] adata 817 * Pointer to struct holding the action parameters 818 * 819 * @return 820 * -1 on failure, 0 on success 821 */ 822 static int 823 add_action(struct rte_flow *flow, size_t *act_index, struct action_data *adata) 824 { 825 struct nlmsg *msg = &flow->msg; 826 827 if (tap_nlattr_nested_start(msg, (*act_index)++) < 0) 828 return -1; 829 830 tap_nlattr_add(&msg->nh, TCA_ACT_KIND, 831 strlen(adata->id) + 1, adata->id); 832 if (tap_nlattr_nested_start(msg, TCA_ACT_OPTIONS) < 0) 833 return -1; 834 if (strcmp("gact", adata->id) == 0) { 835 tap_nlattr_add(&msg->nh, TCA_GACT_PARMS, sizeof(adata->gact), 836 &adata->gact); 837 } else if (strcmp("mirred", adata->id) == 0) { 838 if (adata->mirred.eaction == TCA_EGRESS_MIRROR) 839 adata->mirred.action = TC_ACT_PIPE; 840 else /* REDIRECT */ 841 adata->mirred.action = TC_ACT_STOLEN; 842 tap_nlattr_add(&msg->nh, TCA_MIRRED_PARMS, 843 sizeof(adata->mirred), 844 &adata->mirred); 845 } else if (strcmp("skbedit", adata->id) == 0) { 846 tap_nlattr_add(&msg->nh, TCA_SKBEDIT_PARMS, 847 sizeof(adata->skbedit.skbedit), &adata->skbedit.skbedit); 848 if (adata->skbedit.mark) 849 tap_nlattr_add32(&msg->nh, TCA_SKBEDIT_MARK, adata->skbedit.mark); 850 else 851 tap_nlattr_add16(&msg->nh, TCA_SKBEDIT_QUEUE_MAPPING, adata->skbedit.queue); 852 } else if (strcmp("bpf", adata->id) == 0) { 853 #ifdef HAVE_BPF_RSS 854 tap_nlattr_add32(&msg->nh, TCA_ACT_BPF_FD, adata->bpf.bpf_fd); 855 tap_nlattr_add(&msg->nh, TCA_ACT_BPF_NAME, 856 strlen(adata->bpf.annotation) + 1, 857 adata->bpf.annotation); 858 tap_nlattr_add(&msg->nh, TCA_ACT_BPF_PARMS, 859 sizeof(adata->bpf.bpf), 860 &adata->bpf.bpf); 861 #else 862 TAP_LOG(ERR, "Internal error: bpf requested but not supported"); 863 return -1; 864 #endif 865 } else { 866 TAP_LOG(ERR, "Internal error: unknown action: %s", adata->id); 867 return -1; 868 } 869 tap_nlattr_nested_finish(msg); /* nested TCA_ACT_OPTIONS */ 870 tap_nlattr_nested_finish(msg); /* nested act_index */ 871 return 0; 872 } 873 874 /** 875 * Helper function to send a series of TC actions to the kernel 876 * 877 * @param[in] flow 878 * Pointer to rte flow containing the netlink message 879 * 880 * @param[in] nb_actions 881 * Number of actions in an array of action structs 882 * 883 * @param[in] data 884 * Pointer to an array of action structs 885 * 886 * @param[in] classifier_actions 887 * The classifier on behave of which the actions are configured 888 * 889 * @return 890 * -1 on failure, 0 on success 891 */ 892 static int 893 add_actions(struct rte_flow *flow, int nb_actions, struct action_data *data, 894 int classifier_action) 895 { 896 struct nlmsg *msg = &flow->msg; 897 size_t act_index = 1; 898 int i; 899 900 if (tap_nlattr_nested_start(msg, classifier_action) < 0) 901 return -1; 902 for (i = 0; i < nb_actions; i++) 903 if (add_action(flow, &act_index, data + i) < 0) 904 return -1; 905 tap_nlattr_nested_finish(msg); /* nested TCA_FLOWER_ACT */ 906 return 0; 907 } 908 909 /** 910 * Validate a flow supported by TC. 911 * If flow param is not NULL, then also fill the netlink message inside. 912 * 913 * @param pmd 914 * Pointer to private structure. 915 * @param[in] attr 916 * Flow rule attributes. 917 * @param[in] pattern 918 * Pattern specification (list terminated by the END pattern item). 919 * @param[in] actions 920 * Associated actions (list terminated by the END action). 921 * @param[out] error 922 * Perform verbose error reporting if not NULL. 923 * @param[in, out] flow 924 * Flow structure to update. 925 * @param[in] mirred 926 * If set to TCA_EGRESS_REDIR, provided actions will be replaced with a 927 * redirection to the tap netdevice, and the TC rule will be configured 928 * on the remote netdevice in pmd. 929 * If set to TCA_EGRESS_MIRROR, provided actions will be replaced with a 930 * mirroring to the tap netdevice, and the TC rule will be configured 931 * on the remote netdevice in pmd. Matching packets will thus be duplicated. 932 * If set to 0, the standard behavior is to be used: set correct actions for 933 * the TC rule, and apply it on the tap netdevice. 934 * 935 * @return 936 * 0 on success, a negative errno value otherwise and rte_errno is set. 937 */ 938 static int 939 priv_flow_process(struct pmd_internals *pmd, 940 const struct rte_flow_attr *attr, 941 const struct rte_flow_item items[], 942 const struct rte_flow_action actions[], 943 struct rte_flow_error *error, 944 struct rte_flow *flow, 945 int mirred) 946 { 947 const struct tap_flow_items *cur_item = tap_flow_items; 948 struct convert_data data = { 949 .eth_type = 0, 950 .ip_proto = 0, 951 .flow = flow, 952 }; 953 int action = 0; /* Only one action authorized for now */ 954 955 if (attr->transfer) { 956 rte_flow_error_set( 957 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 958 NULL, "transfer is not supported"); 959 return -rte_errno; 960 } 961 if (attr->group > MAX_GROUP) { 962 rte_flow_error_set( 963 error, EINVAL, RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 964 NULL, "group value too big: cannot exceed 15"); 965 return -rte_errno; 966 } 967 if (attr->priority > MAX_PRIORITY) { 968 rte_flow_error_set( 969 error, EINVAL, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 970 NULL, "priority value too big"); 971 return -rte_errno; 972 } else if (flow) { 973 uint16_t group = attr->group << GROUP_SHIFT; 974 uint16_t prio = group | (attr->priority + 975 RSS_PRIORITY_OFFSET + PRIORITY_OFFSET); 976 flow->msg.t.tcm_info = TC_H_MAKE(prio << 16, 977 flow->msg.t.tcm_info); 978 } 979 if (flow) { 980 if (mirred) { 981 /* 982 * If attr->ingress, the rule applies on remote ingress 983 * to match incoming packets 984 * If attr->egress, the rule applies on tap ingress (as 985 * seen from the kernel) to deal with packets going out 986 * from the DPDK app. 987 */ 988 flow->msg.t.tcm_parent = TC_H_MAKE(TC_H_INGRESS, 0); 989 } else { 990 /* Standard rule on tap egress (kernel standpoint). */ 991 flow->msg.t.tcm_parent = 992 TC_H_MAKE(MULTIQ_MAJOR_HANDLE, 0); 993 } 994 /* use flower filter type */ 995 tap_nlattr_add(&flow->msg.nh, TCA_KIND, sizeof("flower"), "flower"); 996 if (tap_nlattr_nested_start(&flow->msg, TCA_OPTIONS) < 0) { 997 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_ACTION, 998 actions, "could not allocated netlink msg"); 999 goto exit_return_error; 1000 } 1001 } 1002 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) { 1003 const struct tap_flow_items *token = NULL; 1004 unsigned int i; 1005 int err = 0; 1006 1007 if (items->type == RTE_FLOW_ITEM_TYPE_VOID) 1008 continue; 1009 for (i = 0; 1010 cur_item->items && 1011 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END; 1012 ++i) { 1013 if (cur_item->items[i] == items->type) { 1014 token = &tap_flow_items[items->type]; 1015 break; 1016 } 1017 } 1018 if (!token) 1019 goto exit_item_not_supported; 1020 cur_item = token; 1021 err = tap_flow_item_validate( 1022 items, cur_item->mask_sz, 1023 (const uint8_t *)cur_item->mask, 1024 (const uint8_t *)cur_item->default_mask); 1025 if (err) 1026 goto exit_item_not_supported; 1027 if (flow && cur_item->convert) { 1028 err = cur_item->convert(items, &data); 1029 if (err) 1030 goto exit_item_not_supported; 1031 } 1032 } 1033 if (flow) { 1034 if (data.vlan) { 1035 tap_nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE, 1036 htons(ETH_P_8021Q)); 1037 tap_nlattr_add16(&flow->msg.nh, 1038 TCA_FLOWER_KEY_VLAN_ETH_TYPE, 1039 data.eth_type ? 1040 data.eth_type : htons(ETH_P_ALL)); 1041 } else if (data.eth_type) { 1042 tap_nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE, 1043 data.eth_type); 1044 } 1045 } 1046 if (mirred && flow) { 1047 struct action_data adata = { 1048 .id = "mirred", 1049 .mirred = { 1050 .eaction = mirred, 1051 }, 1052 }; 1053 1054 /* 1055 * If attr->egress && mirred, then this is a special 1056 * case where the rule must be applied on the tap, to 1057 * redirect packets coming from the DPDK App, out 1058 * through the remote netdevice. 1059 */ 1060 adata.mirred.ifindex = attr->ingress ? pmd->if_index : 1061 pmd->remote_if_index; 1062 if (mirred == TCA_EGRESS_MIRROR) 1063 adata.mirred.action = TC_ACT_PIPE; 1064 else 1065 adata.mirred.action = TC_ACT_STOLEN; 1066 if (add_actions(flow, 1, &adata, TCA_FLOWER_ACT) < 0) 1067 goto exit_action_not_supported; 1068 else 1069 goto end; 1070 } 1071 actions: 1072 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) { 1073 int err = 0; 1074 1075 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) { 1076 continue; 1077 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) { 1078 if (action) 1079 goto exit_action_not_supported; 1080 action = 1; 1081 if (flow) { 1082 struct action_data adata = { 1083 .id = "gact", 1084 .gact = { 1085 .action = TC_ACT_SHOT, 1086 }, 1087 }; 1088 1089 err = add_actions(flow, 1, &adata, 1090 TCA_FLOWER_ACT); 1091 } 1092 } else if (actions->type == RTE_FLOW_ACTION_TYPE_PASSTHRU) { 1093 if (action) 1094 goto exit_action_not_supported; 1095 action = 1; 1096 if (flow) { 1097 struct action_data adata = { 1098 .id = "gact", 1099 .gact = { 1100 /* continue */ 1101 .action = TC_ACT_UNSPEC, 1102 }, 1103 }; 1104 1105 err = add_actions(flow, 1, &adata, TCA_FLOWER_ACT); 1106 } 1107 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) { 1108 const struct rte_flow_action_queue *queue = 1109 (const struct rte_flow_action_queue *) 1110 actions->conf; 1111 1112 if (action) 1113 goto exit_action_not_supported; 1114 action = 1; 1115 if (queue->index >= pmd->dev->data->nb_rx_queues) { 1116 rte_flow_error_set(error, ERANGE, 1117 RTE_FLOW_ERROR_TYPE_ACTION, actions, 1118 "queue index out of range"); 1119 goto exit_return_error; 1120 } 1121 if (flow) { 1122 struct action_data adata = { 1123 .id = "skbedit", 1124 .skbedit = { 1125 .skbedit = { 1126 .action = TC_ACT_PIPE, 1127 }, 1128 .queue = queue->index, 1129 }, 1130 }; 1131 1132 err = add_actions(flow, 1, &adata, 1133 TCA_FLOWER_ACT); 1134 } 1135 #ifdef HAVE_BPF_RSS 1136 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) { 1137 const struct rte_flow_action_rss *rss = 1138 (const struct rte_flow_action_rss *) 1139 actions->conf; 1140 1141 if (action++) 1142 goto exit_action_not_supported; 1143 1144 if (pmd->rss == NULL) { 1145 err = rss_enable(pmd, error); 1146 if (err) 1147 goto exit_return_error; 1148 } 1149 if (flow) 1150 err = rss_add_actions(flow, pmd, rss, error); 1151 #endif 1152 } else { 1153 goto exit_action_not_supported; 1154 } 1155 if (err) 1156 goto exit_return_error; 1157 } 1158 /* When fate is unknown, drop traffic. */ 1159 if (!action) { 1160 static const struct rte_flow_action drop[] = { 1161 { .type = RTE_FLOW_ACTION_TYPE_DROP, }, 1162 { .type = RTE_FLOW_ACTION_TYPE_END, }, 1163 }; 1164 1165 actions = drop; 1166 goto actions; 1167 } 1168 end: 1169 if (flow) 1170 tap_nlattr_nested_finish(&flow->msg); /* nested TCA_OPTIONS */ 1171 return 0; 1172 exit_item_not_supported: 1173 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, 1174 items, "item not supported"); 1175 return -rte_errno; 1176 exit_action_not_supported: 1177 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, 1178 actions, "action not supported"); 1179 exit_return_error: 1180 return -rte_errno; 1181 } 1182 1183 1184 1185 /** 1186 * Validate a flow. 1187 * 1188 * @see rte_flow_validate() 1189 * @see rte_flow_ops 1190 */ 1191 static int 1192 tap_flow_validate(struct rte_eth_dev *dev, 1193 const struct rte_flow_attr *attr, 1194 const struct rte_flow_item items[], 1195 const struct rte_flow_action actions[], 1196 struct rte_flow_error *error) 1197 { 1198 struct pmd_internals *pmd = dev->data->dev_private; 1199 1200 return priv_flow_process(pmd, attr, items, actions, error, NULL, 0); 1201 } 1202 1203 /** 1204 * Set a unique handle in a flow. 1205 * 1206 * The kernel supports TC rules with equal priority, as long as they use the 1207 * same matching fields (e.g.: dst mac and ipv4) with different values (and 1208 * full mask to ensure no collision is possible). 1209 * In those rules, the handle (uint32_t) is the part that would identify 1210 * specifically each rule. 1211 * 1212 * Use jhash of the flow pointer to make a unique handle. 1213 * 1214 * @param[in, out] flow 1215 * The flow that needs its handle set. 1216 */ 1217 static void 1218 tap_flow_set_handle(struct rte_flow *flow) 1219 { 1220 union { 1221 struct rte_flow *flow; 1222 uint32_t words[sizeof(flow) / sizeof(uint32_t)]; 1223 } tmp = { 1224 .flow = flow, 1225 }; 1226 uint32_t handle; 1227 static uint64_t hash_seed; 1228 1229 if (hash_seed == 0) 1230 hash_seed = rte_rand(); 1231 1232 handle = rte_jhash_32b(tmp.words, sizeof(flow) / sizeof(uint32_t), hash_seed); 1233 1234 /* must be at least 1 to avoid letting the kernel choose one for us */ 1235 if (!handle) 1236 handle = 1; 1237 flow->msg.t.tcm_handle = handle; 1238 } 1239 1240 /** 1241 * Free the flow opened file descriptors and allocated memory 1242 * 1243 * @param[in] flow 1244 * Pointer to the flow to free 1245 * 1246 */ 1247 static void 1248 tap_flow_free(struct pmd_internals *pmd __rte_unused, struct rte_flow *flow) 1249 { 1250 if (!flow) 1251 return; 1252 1253 #ifdef HAVE_BPF_RSS 1254 struct tap_rss *rss = pmd->rss; 1255 if (rss) 1256 bpf_map__delete_elem(rss->maps.rss_map, 1257 &flow->msg.t.tcm_handle, sizeof(uint32_t), 0); 1258 #endif 1259 /* Free flow allocated memory */ 1260 rte_free(flow); 1261 } 1262 1263 /** 1264 * Create a flow. 1265 * 1266 * @see rte_flow_create() 1267 * @see rte_flow_ops 1268 */ 1269 static struct rte_flow * 1270 tap_flow_create(struct rte_eth_dev *dev, 1271 const struct rte_flow_attr *attr, 1272 const struct rte_flow_item items[], 1273 const struct rte_flow_action actions[], 1274 struct rte_flow_error *error) 1275 { 1276 struct pmd_internals *pmd = dev->data->dev_private; 1277 struct rte_flow *remote_flow = NULL; 1278 struct rte_flow *flow = NULL; 1279 struct nlmsg *msg = NULL; 1280 int err; 1281 1282 if (!pmd->if_index) { 1283 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, 1284 NULL, 1285 "can't create rule, ifindex not found"); 1286 goto fail; 1287 } 1288 /* 1289 * No rules configured through standard rte_flow should be set on the 1290 * priorities used by implicit rules. 1291 */ 1292 if ((attr->group == MAX_GROUP) && 1293 attr->priority > (MAX_PRIORITY - TAP_REMOTE_MAX_IDX)) { 1294 rte_flow_error_set( 1295 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 1296 NULL, "priority value too big"); 1297 goto fail; 1298 } 1299 flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0); 1300 if (!flow) { 1301 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 1302 NULL, "cannot allocate memory for rte_flow"); 1303 goto fail; 1304 } 1305 msg = &flow->msg; 1306 tc_init_msg(msg, pmd->if_index, RTM_NEWTFILTER, 1307 NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE); 1308 msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL)); 1309 tap_flow_set_handle(flow); 1310 if (priv_flow_process(pmd, attr, items, actions, error, flow, 0)) 1311 goto fail; 1312 err = tap_nl_send(pmd->nlsk_fd, &msg->nh); 1313 if (err < 0) { 1314 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, 1315 NULL, "couldn't send request to kernel"); 1316 goto fail; 1317 } 1318 err = tap_nl_recv_ack(pmd->nlsk_fd); 1319 if (err < 0) { 1320 TAP_LOG(ERR, 1321 "Kernel refused TC filter rule creation (%d): %s", 1322 errno, strerror(errno)); 1323 rte_flow_error_set(error, EEXIST, RTE_FLOW_ERROR_TYPE_HANDLE, 1324 NULL, 1325 "overlapping rules or Kernel too old for flower support"); 1326 goto fail; 1327 } 1328 LIST_INSERT_HEAD(&pmd->flows, flow, next); 1329 /** 1330 * If a remote device is configured, a TC rule with identical items for 1331 * matching must be set on that device, with a single action: redirect 1332 * to the local pmd->if_index. 1333 */ 1334 if (pmd->remote_if_index) { 1335 remote_flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0); 1336 if (!remote_flow) { 1337 rte_flow_error_set( 1338 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, 1339 "cannot allocate memory for rte_flow"); 1340 goto fail; 1341 } 1342 msg = &remote_flow->msg; 1343 /* set the rule if_index for the remote netdevice */ 1344 tc_init_msg( 1345 msg, pmd->remote_if_index, RTM_NEWTFILTER, 1346 NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE); 1347 msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL)); 1348 tap_flow_set_handle(remote_flow); 1349 if (priv_flow_process(pmd, attr, items, NULL, 1350 error, remote_flow, TCA_EGRESS_REDIR)) { 1351 rte_flow_error_set( 1352 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 1353 NULL, "rte flow rule validation failed"); 1354 goto fail; 1355 } 1356 err = tap_nl_send(pmd->nlsk_fd, &msg->nh); 1357 if (err < 0) { 1358 rte_flow_error_set( 1359 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 1360 NULL, "Failure sending nl request"); 1361 goto fail; 1362 } 1363 err = tap_nl_recv_ack(pmd->nlsk_fd); 1364 if (err < 0) { 1365 TAP_LOG(ERR, 1366 "Kernel refused TC filter rule creation (%d): %s", 1367 errno, strerror(errno)); 1368 rte_flow_error_set( 1369 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 1370 NULL, 1371 "overlapping rules or Kernel too old for flower support"); 1372 goto fail; 1373 } 1374 flow->remote_flow = remote_flow; 1375 } 1376 return flow; 1377 fail: 1378 rte_free(remote_flow); 1379 if (flow) 1380 tap_flow_free(pmd, flow); 1381 return NULL; 1382 } 1383 1384 /** 1385 * Destroy a flow using pointer to pmd_internal. 1386 * 1387 * @param[in, out] pmd 1388 * Pointer to private structure. 1389 * @param[in] flow 1390 * Pointer to the flow to destroy. 1391 * @param[in, out] error 1392 * Pointer to the flow error handler 1393 * 1394 * @return 0 if the flow could be destroyed, -1 otherwise. 1395 */ 1396 static int 1397 tap_flow_destroy_pmd(struct pmd_internals *pmd, 1398 struct rte_flow *flow, 1399 struct rte_flow_error *error) 1400 { 1401 struct rte_flow *remote_flow = flow->remote_flow; 1402 int ret = 0; 1403 1404 LIST_REMOVE(flow, next); 1405 flow->msg.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1406 flow->msg.nh.nlmsg_type = RTM_DELTFILTER; 1407 1408 ret = tap_nl_send(pmd->nlsk_fd, &flow->msg.nh); 1409 if (ret < 0) { 1410 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, 1411 NULL, "couldn't send request to kernel"); 1412 goto end; 1413 } 1414 ret = tap_nl_recv_ack(pmd->nlsk_fd); 1415 /* If errno is ENOENT, the rule is already no longer in the kernel. */ 1416 if (ret < 0 && errno == ENOENT) 1417 ret = 0; 1418 if (ret < 0) { 1419 TAP_LOG(ERR, 1420 "Kernel refused TC filter rule deletion (%d): %s", 1421 errno, strerror(errno)); 1422 rte_flow_error_set( 1423 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, 1424 "couldn't receive kernel ack to our request"); 1425 goto end; 1426 } 1427 1428 if (remote_flow) { 1429 remote_flow->msg.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1430 remote_flow->msg.nh.nlmsg_type = RTM_DELTFILTER; 1431 1432 ret = tap_nl_send(pmd->nlsk_fd, &remote_flow->msg.nh); 1433 if (ret < 0) { 1434 rte_flow_error_set( 1435 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 1436 NULL, "Failure sending nl request"); 1437 goto end; 1438 } 1439 ret = tap_nl_recv_ack(pmd->nlsk_fd); 1440 if (ret < 0 && errno == ENOENT) 1441 ret = 0; 1442 if (ret < 0) { 1443 TAP_LOG(ERR, 1444 "Kernel refused TC filter rule deletion (%d): %s", 1445 errno, strerror(errno)); 1446 rte_flow_error_set( 1447 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 1448 NULL, "Failure trying to receive nl ack"); 1449 goto end; 1450 } 1451 } 1452 end: 1453 rte_free(remote_flow); 1454 tap_flow_free(pmd, flow); 1455 return ret; 1456 } 1457 1458 /** 1459 * Destroy a flow. 1460 * 1461 * @see rte_flow_destroy() 1462 * @see rte_flow_ops 1463 */ 1464 static int 1465 tap_flow_destroy(struct rte_eth_dev *dev, 1466 struct rte_flow *flow, 1467 struct rte_flow_error *error) 1468 { 1469 struct pmd_internals *pmd = dev->data->dev_private; 1470 1471 return tap_flow_destroy_pmd(pmd, flow, error); 1472 } 1473 1474 /** 1475 * Enable/disable flow isolation. 1476 * 1477 * @see rte_flow_isolate() 1478 * @see rte_flow_ops 1479 */ 1480 static int 1481 tap_flow_isolate(struct rte_eth_dev *dev, 1482 int set, 1483 struct rte_flow_error *error __rte_unused) 1484 { 1485 struct pmd_internals *pmd = dev->data->dev_private; 1486 struct pmd_process_private *process_private = dev->process_private; 1487 1488 /* normalize 'set' variable to contain 0 or 1 values */ 1489 if (set) 1490 set = 1; 1491 /* if already in the right isolation mode - nothing to do */ 1492 if ((set ^ pmd->flow_isolate) == 0) 1493 return 0; 1494 /* mark the isolation mode for tap_flow_implicit_create() */ 1495 pmd->flow_isolate = set; 1496 /* 1497 * If netdevice is there, setup appropriate flow rules immediately. 1498 * Otherwise it will be set when bringing up the netdevice (tun_alloc). 1499 */ 1500 if (process_private->fds[0] == -1) 1501 return 0; 1502 1503 if (set) { 1504 struct rte_flow *remote_flow; 1505 1506 while (1) { 1507 remote_flow = LIST_FIRST(&pmd->implicit_flows); 1508 if (!remote_flow) 1509 break; 1510 /* 1511 * Remove all implicit rules on the remote. 1512 * Keep the local rule to redirect packets on TX. 1513 * Keep also the last implicit local rule: ISOLATE. 1514 */ 1515 if (remote_flow->msg.t.tcm_ifindex == pmd->if_index) 1516 break; 1517 if (tap_flow_destroy_pmd(pmd, remote_flow, NULL) < 0) 1518 goto error; 1519 } 1520 /* Switch the TC rule according to pmd->flow_isolate */ 1521 if (tap_flow_implicit_create(pmd, TAP_ISOLATE) == -1) 1522 goto error; 1523 } else { 1524 /* Switch the TC rule according to pmd->flow_isolate */ 1525 if (tap_flow_implicit_create(pmd, TAP_ISOLATE) == -1) 1526 goto error; 1527 if (!pmd->remote_if_index) 1528 return 0; 1529 if (tap_flow_implicit_create(pmd, TAP_REMOTE_TX) < 0) 1530 goto error; 1531 if (tap_flow_implicit_create(pmd, TAP_REMOTE_LOCAL_MAC) < 0) 1532 goto error; 1533 if (tap_flow_implicit_create(pmd, TAP_REMOTE_BROADCAST) < 0) 1534 goto error; 1535 if (tap_flow_implicit_create(pmd, TAP_REMOTE_BROADCASTV6) < 0) 1536 goto error; 1537 if (dev->data->promiscuous && 1538 tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC) < 0) 1539 goto error; 1540 if (dev->data->all_multicast && 1541 tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI) < 0) 1542 goto error; 1543 } 1544 return 0; 1545 error: 1546 pmd->flow_isolate = 0; 1547 return rte_flow_error_set( 1548 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 1549 "TC rule creation failed"); 1550 } 1551 1552 /** 1553 * Destroy all flows. 1554 * 1555 * @see rte_flow_flush() 1556 * @see rte_flow_ops 1557 */ 1558 int 1559 tap_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error) 1560 { 1561 struct pmd_internals *pmd = dev->data->dev_private; 1562 struct rte_flow *flow; 1563 1564 while (!LIST_EMPTY(&pmd->flows)) { 1565 flow = LIST_FIRST(&pmd->flows); 1566 if (tap_flow_destroy(dev, flow, error) < 0) 1567 return -1; 1568 } 1569 return 0; 1570 } 1571 1572 /** 1573 * Add an implicit flow rule on the remote device to make sure traffic gets to 1574 * the tap netdevice from there. 1575 * 1576 * @param pmd 1577 * Pointer to private structure. 1578 * @param[in] idx 1579 * The idx in the implicit_rte_flows array specifying which rule to apply. 1580 * 1581 * @return -1 if the rule couldn't be applied, 0 otherwise. 1582 */ 1583 int tap_flow_implicit_create(struct pmd_internals *pmd, 1584 enum implicit_rule_index idx) 1585 { 1586 uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; 1587 struct rte_flow_action *actions = implicit_rte_flows[idx].actions; 1588 struct rte_flow_action isolate_actions[2] = { 1589 [1] = { 1590 .type = RTE_FLOW_ACTION_TYPE_END, 1591 }, 1592 }; 1593 struct rte_flow_item *items = implicit_rte_flows[idx].items; 1594 struct rte_flow_attr *attr = &implicit_rte_flows[idx].attr; 1595 struct rte_flow_item_eth eth_local = { .hdr.ether_type = 0 }; 1596 unsigned int if_index = pmd->remote_if_index; 1597 struct rte_flow *remote_flow = NULL; 1598 struct nlmsg *msg = NULL; 1599 int err = 0; 1600 struct rte_flow_item items_local[2] = { 1601 [0] = { 1602 .type = items[0].type, 1603 .spec = ð_local, 1604 .mask = items[0].mask, 1605 }, 1606 [1] = { 1607 .type = items[1].type, 1608 } 1609 }; 1610 1611 remote_flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0); 1612 if (!remote_flow) { 1613 TAP_LOG(ERR, "Cannot allocate memory for rte_flow"); 1614 goto fail; 1615 } 1616 msg = &remote_flow->msg; 1617 if (idx == TAP_REMOTE_TX) { 1618 if_index = pmd->if_index; 1619 } else if (idx == TAP_ISOLATE) { 1620 if_index = pmd->if_index; 1621 /* Don't be exclusive for this rule, it can be changed later. */ 1622 flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE; 1623 isolate_actions[0].type = pmd->flow_isolate ? 1624 RTE_FLOW_ACTION_TYPE_DROP : 1625 RTE_FLOW_ACTION_TYPE_PASSTHRU; 1626 actions = isolate_actions; 1627 } else if (idx == TAP_REMOTE_LOCAL_MAC) { 1628 /* 1629 * eth addr couldn't be set in implicit_rte_flows[] as it is not 1630 * known at compile time. 1631 */ 1632 memcpy(ð_local.hdr.dst_addr, &pmd->eth_addr, sizeof(pmd->eth_addr)); 1633 items = items_local; 1634 } 1635 tc_init_msg(msg, if_index, RTM_NEWTFILTER, flags); 1636 msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL)); 1637 /* 1638 * The ISOLATE rule is always present and must have a static handle, as 1639 * the action is changed whether the feature is enabled (DROP) or 1640 * disabled (PASSTHRU). 1641 * There is just one REMOTE_PROMISCUOUS rule in all cases. It should 1642 * have a static handle such that adding it twice will fail with EEXIST 1643 * with any kernel version. Remark: old kernels may falsely accept the 1644 * same REMOTE_PROMISCUOUS rules if they had different handles. 1645 */ 1646 if (idx == TAP_ISOLATE) 1647 remote_flow->msg.t.tcm_handle = ISOLATE_HANDLE; 1648 else if (idx == TAP_REMOTE_PROMISC) 1649 remote_flow->msg.t.tcm_handle = REMOTE_PROMISCUOUS_HANDLE; 1650 else 1651 tap_flow_set_handle(remote_flow); 1652 if (priv_flow_process(pmd, attr, items, actions, NULL, 1653 remote_flow, implicit_rte_flows[idx].mirred)) { 1654 TAP_LOG(ERR, "rte flow rule validation failed"); 1655 goto fail; 1656 } 1657 err = tap_nl_send(pmd->nlsk_fd, &msg->nh); 1658 if (err < 0) { 1659 TAP_LOG(ERR, "Failure sending nl request"); 1660 goto fail; 1661 } 1662 err = tap_nl_recv_ack(pmd->nlsk_fd); 1663 if (err < 0) { 1664 /* Silently ignore re-entering existing rule */ 1665 if (errno == EEXIST) 1666 goto success; 1667 TAP_LOG(ERR, 1668 "Kernel refused TC filter rule creation (%d): %s", 1669 errno, strerror(errno)); 1670 goto fail; 1671 } 1672 LIST_INSERT_HEAD(&pmd->implicit_flows, remote_flow, next); 1673 success: 1674 return 0; 1675 fail: 1676 rte_free(remote_flow); 1677 return -1; 1678 } 1679 1680 /** 1681 * Remove specific implicit flow rule on the remote device. 1682 * 1683 * @param[in, out] pmd 1684 * Pointer to private structure. 1685 * @param[in] idx 1686 * The idx in the implicit_rte_flows array specifying which rule to remove. 1687 * 1688 * @return -1 if one of the implicit rules couldn't be created, 0 otherwise. 1689 */ 1690 int tap_flow_implicit_destroy(struct pmd_internals *pmd, 1691 enum implicit_rule_index idx) 1692 { 1693 struct rte_flow *remote_flow; 1694 int cur_prio = -1; 1695 int idx_prio = implicit_rte_flows[idx].attr.priority + PRIORITY_OFFSET; 1696 1697 for (remote_flow = LIST_FIRST(&pmd->implicit_flows); 1698 remote_flow; 1699 remote_flow = LIST_NEXT(remote_flow, next)) { 1700 cur_prio = (remote_flow->msg.t.tcm_info >> 16) & PRIORITY_MASK; 1701 if (cur_prio != idx_prio) 1702 continue; 1703 return tap_flow_destroy_pmd(pmd, remote_flow, NULL); 1704 } 1705 return 0; 1706 } 1707 1708 /** 1709 * Destroy all implicit flows. 1710 * 1711 * @see rte_flow_flush() 1712 */ 1713 int 1714 tap_flow_implicit_flush(struct pmd_internals *pmd, struct rte_flow_error *error) 1715 { 1716 struct rte_flow *remote_flow; 1717 1718 while (!LIST_EMPTY(&pmd->implicit_flows)) { 1719 remote_flow = LIST_FIRST(&pmd->implicit_flows); 1720 if (tap_flow_destroy_pmd(pmd, remote_flow, error) < 0) 1721 return -1; 1722 } 1723 return 0; 1724 } 1725 1726 /** 1727 * Cleanup when device is closed 1728 */ 1729 void tap_flow_bpf_destroy(struct pmd_internals *pmd __rte_unused) 1730 { 1731 #ifdef HAVE_BPF_RSS 1732 tap_rss__destroy(pmd->rss); 1733 pmd->rss = NULL; 1734 #endif 1735 } 1736 1737 #ifdef HAVE_BPF_RSS 1738 /** 1739 * Enable RSS on tap: create TC rules for queuing. 1740 * 1741 * @param[in, out] pmd 1742 * Pointer to private structure. 1743 * 1744 * @param[in] attr 1745 * Pointer to rte_flow to get flow group 1746 * 1747 * @param[out] error 1748 * Pointer to error reporting if not NULL. 1749 * 1750 * @return 0 on success, negative value on failure. 1751 */ 1752 static int rss_enable(struct pmd_internals *pmd, struct rte_flow_error *error) 1753 { 1754 int err; 1755 1756 /* Load the BPF program (defined in tap_bpf.h from skeleton) */ 1757 pmd->rss = tap_rss__open_and_load(); 1758 if (pmd->rss == NULL) { 1759 TAP_LOG(ERR, "Failed to load BPF object: %s", strerror(errno)); 1760 rte_flow_error_set(error, errno, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, 1761 "BPF object could not be loaded"); 1762 return -errno; 1763 } 1764 1765 /* Attach the maps defined in BPF program */ 1766 err = tap_rss__attach(pmd->rss); 1767 if (err < 0) { 1768 TAP_LOG(ERR, "Failed to attach BPF object: %d", err); 1769 rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, 1770 "BPF object could not be attached"); 1771 tap_flow_bpf_destroy(pmd); 1772 return err; 1773 } 1774 1775 return 0; 1776 } 1777 1778 /* Default RSS hash key also used by mlx devices */ 1779 static const uint8_t rss_hash_default_key[] = { 1780 0x2c, 0xc6, 0x81, 0xd1, 1781 0x5b, 0xdb, 0xf4, 0xf7, 1782 0xfc, 0xa2, 0x83, 0x19, 1783 0xdb, 0x1a, 0x3e, 0x94, 1784 0x6b, 0x9e, 0x38, 0xd9, 1785 0x2c, 0x9c, 0x03, 0xd1, 1786 0xad, 0x99, 0x44, 0xa7, 1787 0xd9, 0x56, 0x3d, 0x59, 1788 0x06, 0x3c, 0x25, 0xf3, 1789 0xfc, 0x1f, 0xdc, 0x2a, 1790 }; 1791 1792 /** 1793 * Add RSS hash calculations and queue selection 1794 * 1795 * @param[in, out] pmd 1796 * Pointer to internal structure. Used to set/get RSS map fd 1797 * 1798 * @param[in] rss 1799 * Pointer to RSS flow actions 1800 * 1801 * @param[out] error 1802 * Pointer to error reporting if not NULL. 1803 * 1804 * @return 0 on success, negative value on failure 1805 */ 1806 static int rss_add_actions(struct rte_flow *flow, struct pmd_internals *pmd, 1807 const struct rte_flow_action_rss *rss, 1808 struct rte_flow_error *error) 1809 { 1810 const struct bpf_program *rss_prog = pmd->rss->progs.rss_flow_action; 1811 struct rss_key rss_entry = { }; 1812 const uint8_t *key_in; 1813 uint32_t hash_type = 0; 1814 uint32_t handle = flow->msg.t.tcm_handle; 1815 unsigned int i; 1816 int err; 1817 1818 /* Check supported RSS features */ 1819 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT) 1820 return rte_flow_error_set 1821 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 1822 "non-default RSS hash functions are not supported"); 1823 if (rss->level) 1824 return rte_flow_error_set 1825 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 1826 "a nonzero RSS encapsulation level is not supported"); 1827 1828 if (rss->queue_num == 0 || rss->queue_num >= TAP_MAX_QUEUES) 1829 return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 1830 "invalid number of queues"); 1831 1832 /* 1833 * Follow the semantics of RSS key (see rte_ethdev.h) 1834 * There are two valid cases: 1835 * 1. key_length of zero, and key must be NULL; 1836 * this uses the default driver key. 1837 * 1838 * 2. key_length is the TAP_RSS_HASH_KEY_SIZE (40 bytes) 1839 * and the key must not be NULL. 1840 * 1841 * Anything else is an error. 1842 */ 1843 if (rss->key_len == 0) { 1844 if (rss->key != NULL) 1845 return rte_flow_error_set(error, ENOTSUP, 1846 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1847 &rss->key_len, "RSS hash key length 0"); 1848 key_in = rss_hash_default_key; 1849 } else { 1850 if (rss->key_len != TAP_RSS_HASH_KEY_SIZE) 1851 return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 1852 NULL, "RSS hash invalid key length"); 1853 if (rss->key == NULL) 1854 return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 1855 NULL, "RSS hash key is NULL"); 1856 key_in = rss->key; 1857 } 1858 1859 if (rss->types & TAP_RSS_HF_MASK) 1860 return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 1861 NULL, "RSS hash type not supported"); 1862 1863 if (rss->types & (RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV4_TCP)) 1864 hash_type |= RTE_BIT32(HASH_FIELD_IPV4_L3_L4); 1865 else if (rss->types & (RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4)) 1866 hash_type |= RTE_BIT32(HASH_FIELD_IPV4_L3); 1867 1868 if (rss->types & (RTE_ETH_RSS_NONFRAG_IPV6_UDP | RTE_ETH_RSS_NONFRAG_IPV6_TCP)) 1869 hash_type |= RTE_BIT32(HASH_FIELD_IPV6_L3_L4); 1870 else if (rss->types & (RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 | RTE_ETH_RSS_IPV6_EX)) 1871 hash_type |= RTE_BIT32(HASH_FIELD_IPV6_L3); 1872 1873 rss_entry.hash_fields = hash_type; 1874 rte_convert_rss_key((const uint32_t *)key_in, (uint32_t *)rss_entry.key, 1875 TAP_RSS_HASH_KEY_SIZE); 1876 1877 /* Update RSS map entry with queues */ 1878 rss_entry.nb_queues = rss->queue_num; 1879 for (i = 0; i < rss->queue_num; i++) 1880 rss_entry.queues[i] = rss->queue[i]; 1881 1882 1883 /* Add this way for BPF to find entry in map */ 1884 err = bpf_map__update_elem(pmd->rss->maps.rss_map, 1885 &handle, sizeof(handle), 1886 &rss_entry, sizeof(rss_entry), 0); 1887 if (err) { 1888 TAP_LOG(ERR, 1889 "Failed to update BPF map entry %#x (%d): %s", 1890 handle, errno, strerror(errno)); 1891 rte_flow_error_set( 1892 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, 1893 "Kernel too old or not configured " 1894 "to support BPF maps updates"); 1895 1896 return -ENOTSUP; 1897 } 1898 1899 /* Add actions to mark packet then run the RSS BPF program */ 1900 struct action_data adata[] = { 1901 { 1902 .id = "skbedit", 1903 .skbedit = { 1904 .skbedit.action = TC_ACT_PIPE, 1905 .mark = handle, 1906 }, 1907 }, 1908 { 1909 .id = "bpf", 1910 .bpf = { 1911 .bpf.action = TC_ACT_PIPE, 1912 .annotation = "tap_rss", 1913 .bpf_fd = bpf_program__fd(rss_prog), 1914 }, 1915 }, 1916 }; 1917 1918 return add_actions(flow, RTE_DIM(adata), adata, TCA_FLOWER_ACT); 1919 } 1920 #endif 1921 1922 /** 1923 * Get rte_flow operations. 1924 * 1925 * @param dev 1926 * Pointer to Ethernet device structure. 1927 * @param ops 1928 * Pointer to operation-specific structure. 1929 * 1930 * @return 1931 * 0 on success, negative errno value on failure. 1932 */ 1933 int 1934 tap_dev_flow_ops_get(struct rte_eth_dev *dev __rte_unused, 1935 const struct rte_flow_ops **ops) 1936 { 1937 *ops = &tap_flow_ops; 1938 return 0; 1939 } 1940