1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2017 6WIND S.A. 3 * Copyright 2017 Mellanox Technologies, Ltd 4 */ 5 6 #include <errno.h> 7 #include <string.h> 8 #include <unistd.h> 9 #include <sys/queue.h> 10 #include <sys/resource.h> 11 12 #include <rte_byteorder.h> 13 #include <rte_jhash.h> 14 #include <rte_thash.h> 15 #include <rte_random.h> 16 #include <rte_malloc.h> 17 #include <rte_eth_tap.h> 18 #include <rte_uuid.h> 19 20 #include <tap_flow.h> 21 #include <tap_tcmsgs.h> 22 #include <tap_rss.h> 23 24 #ifdef HAVE_BPF_RSS 25 /* Workaround for warning in bpftool generated skeleton code */ 26 __rte_diagnostic_push 27 __rte_diagnostic_ignored_wcast_qual 28 #include "tap_rss.skel.h" 29 __rte_diagnostic_pop 30 #endif 31 32 #define ISOLATE_HANDLE 1 33 #define REMOTE_PROMISCUOUS_HANDLE 2 34 35 struct rte_flow { 36 LIST_ENTRY(rte_flow) next; /* Pointer to the next rte_flow structure */ 37 struct rte_flow *remote_flow; /* associated remote flow */ 38 struct tap_nlmsg msg; 39 }; 40 41 struct convert_data { 42 uint16_t eth_type; 43 uint16_t ip_proto; 44 uint8_t vlan; 45 struct rte_flow *flow; 46 }; 47 48 struct remote_rule { 49 struct rte_flow_attr attr; 50 struct rte_flow_item items[2]; 51 struct rte_flow_action actions[2]; 52 int mirred; 53 }; 54 55 struct action_data { 56 char id[16]; 57 58 union { 59 struct tc_gact gact; 60 struct tc_mirred mirred; 61 struct skbedit { 62 struct tc_skbedit skbedit; 63 uint16_t queue; 64 uint32_t mark; 65 } skbedit; 66 #ifdef HAVE_BPF_RSS 67 struct bpf { 68 struct tc_act_bpf bpf; 69 uint32_t map_key; 70 int bpf_fd; 71 const char *annotation; 72 } bpf; 73 #endif 74 }; 75 }; 76 77 static int tap_flow_create_eth(const struct rte_flow_item *item, struct convert_data *info); 78 static int tap_flow_create_vlan(const struct rte_flow_item *item, struct convert_data *info); 79 static int tap_flow_create_ipv4(const struct rte_flow_item *item, struct convert_data *info); 80 static int tap_flow_create_ipv6(const struct rte_flow_item *item, struct convert_data *info); 81 static int tap_flow_create_udp(const struct rte_flow_item *item, struct convert_data *info); 82 static int tap_flow_create_tcp(const struct rte_flow_item *item, struct convert_data *info); 83 static int 84 tap_flow_validate(struct rte_eth_dev *dev, 85 const struct rte_flow_attr *attr, 86 const struct rte_flow_item items[], 87 const struct rte_flow_action actions[], 88 struct rte_flow_error *error); 89 90 static struct rte_flow * 91 tap_flow_create(struct rte_eth_dev *dev, 92 const struct rte_flow_attr *attr, 93 const struct rte_flow_item items[], 94 const struct rte_flow_action actions[], 95 struct rte_flow_error *error); 96 97 static void 98 tap_flow_free(struct pmd_internals *pmd, 99 struct rte_flow *flow); 100 101 static int 102 tap_flow_destroy(struct rte_eth_dev *dev, 103 struct rte_flow *flow, 104 struct rte_flow_error *error); 105 106 static int 107 tap_flow_isolate(struct rte_eth_dev *dev, 108 int set, 109 struct rte_flow_error *error); 110 111 #ifdef HAVE_BPF_RSS 112 static int rss_enable(struct pmd_internals *pmd, struct rte_flow_error *error); 113 static int rss_add_actions(struct rte_flow *flow, struct pmd_internals *pmd, 114 const struct rte_flow_action_rss *rss, 115 struct rte_flow_error *error); 116 #endif 117 118 static const struct rte_flow_ops tap_flow_ops = { 119 .validate = tap_flow_validate, 120 .create = tap_flow_create, 121 .destroy = tap_flow_destroy, 122 .flush = tap_flow_flush, 123 .isolate = tap_flow_isolate, 124 }; 125 126 /* Static initializer for items. */ 127 #define ITEMS(...) \ 128 (const enum rte_flow_item_type []){ \ 129 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \ 130 } 131 132 /* Structure to generate a simple graph of layers supported by the NIC. */ 133 struct tap_flow_items { 134 /* Bit-mask corresponding to what is supported for this item. */ 135 const void *mask; 136 const unsigned int mask_sz; /* Bit-mask size in bytes. */ 137 /* 138 * Bit-mask corresponding to the default mask, if none is provided 139 * along with the item. 140 */ 141 const void *default_mask; 142 /* Conversion function from rte_flow to netlink attributes. */ 143 int (*convert)(const struct rte_flow_item *item, struct convert_data *info); 144 145 /* List of possible following items. */ 146 const enum rte_flow_item_type *const items; 147 }; 148 149 /* Graph of supported items and associated actions. */ 150 static const struct tap_flow_items tap_flow_items[] = { 151 [RTE_FLOW_ITEM_TYPE_END] = { 152 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH), 153 }, 154 [RTE_FLOW_ITEM_TYPE_ETH] = { 155 .items = ITEMS( 156 RTE_FLOW_ITEM_TYPE_VLAN, 157 RTE_FLOW_ITEM_TYPE_IPV4, 158 RTE_FLOW_ITEM_TYPE_IPV6), 159 .mask = &(const struct rte_flow_item_eth){ 160 .hdr.dst_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, 161 .hdr.src_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, 162 .hdr.ether_type = -1, 163 }, 164 .mask_sz = sizeof(struct rte_flow_item_eth), 165 .default_mask = &rte_flow_item_eth_mask, 166 .convert = tap_flow_create_eth, 167 }, 168 [RTE_FLOW_ITEM_TYPE_VLAN] = { 169 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4, 170 RTE_FLOW_ITEM_TYPE_IPV6), 171 .mask = &(const struct rte_flow_item_vlan){ 172 /* DEI matching is not supported */ 173 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN 174 .hdr.vlan_tci = 0xffef, 175 #else 176 .hdr.vlan_tci = 0xefff, 177 #endif 178 .hdr.eth_proto = -1, 179 }, 180 .mask_sz = sizeof(struct rte_flow_item_vlan), 181 .default_mask = &rte_flow_item_vlan_mask, 182 .convert = tap_flow_create_vlan, 183 }, 184 [RTE_FLOW_ITEM_TYPE_IPV4] = { 185 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP, 186 RTE_FLOW_ITEM_TYPE_TCP), 187 .mask = &(const struct rte_flow_item_ipv4){ 188 .hdr = { 189 .src_addr = -1, 190 .dst_addr = -1, 191 .next_proto_id = -1, 192 }, 193 }, 194 .mask_sz = sizeof(struct rte_flow_item_ipv4), 195 .default_mask = &rte_flow_item_ipv4_mask, 196 .convert = tap_flow_create_ipv4, 197 }, 198 [RTE_FLOW_ITEM_TYPE_IPV6] = { 199 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP, 200 RTE_FLOW_ITEM_TYPE_TCP), 201 .mask = &(const struct rte_flow_item_ipv6){ 202 .hdr = { 203 .src_addr = RTE_IPV6_MASK_FULL, 204 .dst_addr = RTE_IPV6_MASK_FULL, 205 .proto = -1, 206 }, 207 }, 208 .mask_sz = sizeof(struct rte_flow_item_ipv6), 209 .default_mask = &rte_flow_item_ipv6_mask, 210 .convert = tap_flow_create_ipv6, 211 }, 212 [RTE_FLOW_ITEM_TYPE_UDP] = { 213 .mask = &(const struct rte_flow_item_udp){ 214 .hdr = { 215 .src_port = -1, 216 .dst_port = -1, 217 }, 218 }, 219 .mask_sz = sizeof(struct rte_flow_item_udp), 220 .default_mask = &rte_flow_item_udp_mask, 221 .convert = tap_flow_create_udp, 222 }, 223 [RTE_FLOW_ITEM_TYPE_TCP] = { 224 .mask = &(const struct rte_flow_item_tcp){ 225 .hdr = { 226 .src_port = -1, 227 .dst_port = -1, 228 }, 229 }, 230 .mask_sz = sizeof(struct rte_flow_item_tcp), 231 .default_mask = &rte_flow_item_tcp_mask, 232 .convert = tap_flow_create_tcp, 233 }, 234 }; 235 236 /* 237 * TC rules, by growing priority 238 * 239 * Remote netdevice Tap netdevice 240 * +-------------+-------------+ +-------------+-------------+ 241 * | Ingress | Egress | | Ingress | Egress | 242 * |-------------|-------------| |-------------|-------------| 243 * | | \ / | | | REMOTE TX | prio 1 244 * | | \ / | | | \ / | prio 2 245 * | EXPLICIT | \ / | | EXPLICIT | \ / | . 246 * | | \ / | | | \ / | . 247 * | RULES | X | | RULES | X | . 248 * | . | / \ | | . | / \ | . 249 * | . | / \ | | . | / \ | . 250 * | . | / \ | | . | / \ | . 251 * | . | / \ | | . | / \ | . 252 * 253 * .... .... .... .... 254 * 255 * | . | \ / | | . | \ / | . 256 * | . | \ / | | . | \ / | . 257 * | | \ / | | | \ / | 258 * | LOCAL_MAC | \ / | | \ / | \ / | last prio - 5 259 * | PROMISC | X | | \ / | X | last prio - 4 260 * | ALLMULTI | / \ | | X | / \ | last prio - 3 261 * | BROADCAST | / \ | | / \ | / \ | last prio - 2 262 * | BROADCASTV6 | / \ | | / \ | / \ | last prio - 1 263 * | xx | / \ | | ISOLATE | / \ | last prio 264 * +-------------+-------------+ +-------------+-------------+ 265 * 266 * The implicit flow rules are stored in a list in with mandatorily the last two 267 * being the ISOLATE and REMOTE_TX rules. e.g.: 268 * 269 * LOCAL_MAC -> BROADCAST -> BROADCASTV6 -> REMOTE_TX -> ISOLATE -> NULL 270 * 271 * That enables tap_flow_isolate() to remove implicit rules by popping the list 272 * head and remove it as long as it applies on the remote netdevice. The 273 * implicit rule for TX redirection is not removed, as isolate concerns only 274 * incoming traffic. 275 */ 276 277 static struct remote_rule implicit_rte_flows[TAP_REMOTE_MAX_IDX] = { 278 [TAP_REMOTE_LOCAL_MAC] = { 279 .attr = { 280 .group = MAX_GROUP, 281 .priority = PRIORITY_MASK - TAP_REMOTE_LOCAL_MAC, 282 .ingress = 1, 283 }, 284 .items[0] = { 285 .type = RTE_FLOW_ITEM_TYPE_ETH, 286 .mask = &(const struct rte_flow_item_eth){ 287 .hdr.dst_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, 288 }, 289 }, 290 .items[1] = { 291 .type = RTE_FLOW_ITEM_TYPE_END, 292 }, 293 .mirred = TCA_EGRESS_REDIR, 294 }, 295 [TAP_REMOTE_BROADCAST] = { 296 .attr = { 297 .group = MAX_GROUP, 298 .priority = PRIORITY_MASK - TAP_REMOTE_BROADCAST, 299 .ingress = 1, 300 }, 301 .items[0] = { 302 .type = RTE_FLOW_ITEM_TYPE_ETH, 303 .mask = &(const struct rte_flow_item_eth){ 304 .hdr.dst_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, 305 }, 306 .spec = &(const struct rte_flow_item_eth){ 307 .hdr.dst_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, 308 }, 309 }, 310 .items[1] = { 311 .type = RTE_FLOW_ITEM_TYPE_END, 312 }, 313 .mirred = TCA_EGRESS_MIRROR, 314 }, 315 [TAP_REMOTE_BROADCASTV6] = { 316 .attr = { 317 .group = MAX_GROUP, 318 .priority = PRIORITY_MASK - TAP_REMOTE_BROADCASTV6, 319 .ingress = 1, 320 }, 321 .items[0] = { 322 .type = RTE_FLOW_ITEM_TYPE_ETH, 323 .mask = &(const struct rte_flow_item_eth){ 324 .hdr.dst_addr.addr_bytes = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 }, 325 }, 326 .spec = &(const struct rte_flow_item_eth){ 327 .hdr.dst_addr.addr_bytes = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 }, 328 }, 329 }, 330 .items[1] = { 331 .type = RTE_FLOW_ITEM_TYPE_END, 332 }, 333 .mirred = TCA_EGRESS_MIRROR, 334 }, 335 [TAP_REMOTE_PROMISC] = { 336 .attr = { 337 .group = MAX_GROUP, 338 .priority = PRIORITY_MASK - TAP_REMOTE_PROMISC, 339 .ingress = 1, 340 }, 341 .items[0] = { 342 .type = RTE_FLOW_ITEM_TYPE_VOID, 343 }, 344 .items[1] = { 345 .type = RTE_FLOW_ITEM_TYPE_END, 346 }, 347 .mirred = TCA_EGRESS_MIRROR, 348 }, 349 [TAP_REMOTE_ALLMULTI] = { 350 .attr = { 351 .group = MAX_GROUP, 352 .priority = PRIORITY_MASK - TAP_REMOTE_ALLMULTI, 353 .ingress = 1, 354 }, 355 .items[0] = { 356 .type = RTE_FLOW_ITEM_TYPE_ETH, 357 .mask = &(const struct rte_flow_item_eth){ 358 .hdr.dst_addr.addr_bytes = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 }, 359 }, 360 .spec = &(const struct rte_flow_item_eth){ 361 .hdr.dst_addr.addr_bytes = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 }, 362 }, 363 }, 364 .items[1] = { 365 .type = RTE_FLOW_ITEM_TYPE_END, 366 }, 367 .mirred = TCA_EGRESS_MIRROR, 368 }, 369 [TAP_REMOTE_TX] = { 370 .attr = { 371 .group = 0, 372 .priority = TAP_REMOTE_TX, 373 .egress = 1, 374 }, 375 .items[0] = { 376 .type = RTE_FLOW_ITEM_TYPE_VOID, 377 }, 378 .items[1] = { 379 .type = RTE_FLOW_ITEM_TYPE_END, 380 }, 381 .mirred = TCA_EGRESS_MIRROR, 382 }, 383 [TAP_ISOLATE] = { 384 .attr = { 385 .group = MAX_GROUP, 386 .priority = PRIORITY_MASK - TAP_ISOLATE, 387 .ingress = 1, 388 }, 389 .items[0] = { 390 .type = RTE_FLOW_ITEM_TYPE_VOID, 391 }, 392 .items[1] = { 393 .type = RTE_FLOW_ITEM_TYPE_END, 394 }, 395 }, 396 }; 397 398 /** 399 * Make as much checks as possible on an Ethernet item, and if a flow is 400 * provided, fill it appropriately with Ethernet info. 401 * 402 * @param[in] item 403 * Item specification. 404 * @param[in, out] data 405 * Additional data structure to tell next layers we've been here. 406 * 407 * @return 408 * 0 if checks are alright, -1 otherwise. 409 */ 410 static int 411 tap_flow_create_eth(const struct rte_flow_item *item, struct convert_data *info) 412 { 413 const struct rte_flow_item_eth *spec = item->spec; 414 const struct rte_flow_item_eth *mask = item->mask; 415 struct rte_flow *flow = info->flow; 416 struct tap_nlmsg *msg; 417 418 /* use default mask if none provided */ 419 if (!mask) 420 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_ETH].default_mask; 421 /* TC does not support eth_type masking. Only accept if exact match. */ 422 if (mask->hdr.ether_type && mask->hdr.ether_type != 0xffff) 423 return -1; 424 if (!spec) 425 return 0; 426 /* store eth_type for consistency if ipv4/6 pattern item comes next */ 427 if (spec->hdr.ether_type & mask->hdr.ether_type) 428 info->eth_type = spec->hdr.ether_type; 429 if (!flow) 430 return 0; 431 msg = &flow->msg; 432 if (!rte_is_zero_ether_addr(&mask->hdr.dst_addr)) { 433 tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_DST, 434 RTE_ETHER_ADDR_LEN, 435 &spec->hdr.dst_addr.addr_bytes); 436 tap_nlattr_add(&msg->nh, 437 TCA_FLOWER_KEY_ETH_DST_MASK, RTE_ETHER_ADDR_LEN, 438 &mask->hdr.dst_addr.addr_bytes); 439 } 440 if (!rte_is_zero_ether_addr(&mask->hdr.src_addr)) { 441 tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_SRC, 442 RTE_ETHER_ADDR_LEN, 443 &spec->hdr.src_addr.addr_bytes); 444 tap_nlattr_add(&msg->nh, 445 TCA_FLOWER_KEY_ETH_SRC_MASK, RTE_ETHER_ADDR_LEN, 446 &mask->hdr.src_addr.addr_bytes); 447 } 448 return 0; 449 } 450 451 /** 452 * Make as much checks as possible on a VLAN item, and if a flow is provided, 453 * fill it appropriately with VLAN info. 454 * 455 * @param[in] item 456 * Item specification. 457 * @param[in, out] data 458 * Additional data structure to tell next layers we've been here. 459 * 460 * @return 461 * 0 if checks are alright, -1 otherwise. 462 */ 463 static int 464 tap_flow_create_vlan(const struct rte_flow_item *item, struct convert_data *info) 465 { 466 const struct rte_flow_item_vlan *spec = item->spec; 467 const struct rte_flow_item_vlan *mask = item->mask; 468 struct rte_flow *flow = info->flow; 469 struct tap_nlmsg *msg; 470 471 /* use default mask if none provided */ 472 if (!mask) 473 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_VLAN].default_mask; 474 /* Outer TPID cannot be matched. */ 475 if (info->eth_type) 476 return -1; 477 /* Double-tagging not supported. */ 478 if (info->vlan) 479 return -1; 480 info->vlan = 1; 481 if (mask->hdr.eth_proto) { 482 /* TC does not support partial eth_type masking */ 483 if (mask->hdr.eth_proto != RTE_BE16(0xffff)) 484 return -1; 485 info->eth_type = spec->hdr.eth_proto; 486 } 487 if (!flow) 488 return 0; 489 msg = &flow->msg; 490 msg->t.tcm_info = TC_H_MAKE(msg->t.tcm_info, htons(ETH_P_8021Q)); 491 #define VLAN_PRIO(tci) ((tci) >> 13) 492 #define VLAN_ID(tci) ((tci) & 0xfff) 493 if (!spec) 494 return 0; 495 if (spec->hdr.vlan_tci) { 496 uint16_t tci = ntohs(spec->hdr.vlan_tci) & mask->hdr.vlan_tci; 497 uint16_t prio = VLAN_PRIO(tci); 498 uint8_t vid = VLAN_ID(tci); 499 500 if (prio) 501 tap_nlattr_add8(&msg->nh, 502 TCA_FLOWER_KEY_VLAN_PRIO, prio); 503 if (vid) 504 tap_nlattr_add16(&msg->nh, 505 TCA_FLOWER_KEY_VLAN_ID, vid); 506 } 507 return 0; 508 } 509 510 /** 511 * Make as much checks as possible on an IPv4 item, and if a flow is provided, 512 * fill it appropriately with IPv4 info. 513 * 514 * @param[in] item 515 * Item specification. 516 * @param[in, out] data 517 * Additional data structure to tell next layers we've been here. 518 * 519 * @return 520 * 0 if checks are alright, -1 otherwise. 521 */ 522 static int 523 tap_flow_create_ipv4(const struct rte_flow_item *item, struct convert_data *info) 524 { 525 const struct rte_flow_item_ipv4 *spec = item->spec; 526 const struct rte_flow_item_ipv4 *mask = item->mask; 527 struct rte_flow *flow = info->flow; 528 struct tap_nlmsg *msg; 529 530 /* use default mask if none provided */ 531 if (!mask) 532 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_IPV4].default_mask; 533 /* check that previous eth type is compatible with ipv4 */ 534 if (info->eth_type && info->eth_type != htons(ETH_P_IP)) 535 return -1; 536 /* store ip_proto for consistency if udp/tcp pattern item comes next */ 537 if (spec) 538 info->ip_proto = spec->hdr.next_proto_id; 539 if (!flow) 540 return 0; 541 msg = &flow->msg; 542 if (!info->eth_type) 543 info->eth_type = htons(ETH_P_IP); 544 if (!spec) 545 return 0; 546 if (mask->hdr.dst_addr) { 547 tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST, 548 spec->hdr.dst_addr); 549 tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST_MASK, 550 mask->hdr.dst_addr); 551 } 552 if (mask->hdr.src_addr) { 553 tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC, 554 spec->hdr.src_addr); 555 tap_nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC_MASK, 556 mask->hdr.src_addr); 557 } 558 if (spec->hdr.next_proto_id) 559 tap_nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, 560 spec->hdr.next_proto_id); 561 return 0; 562 } 563 564 /** 565 * Make as much checks as possible on an IPv6 item, and if a flow is provided, 566 * fill it appropriately with IPv6 info. 567 * 568 * @param[in] item 569 * Item specification. 570 * @param[in, out] data 571 * Additional data structure to tell next layers we've been here. 572 * 573 * @return 574 * 0 if checks are alright, -1 otherwise. 575 */ 576 static int 577 tap_flow_create_ipv6(const struct rte_flow_item *item, struct convert_data *info) 578 { 579 const struct rte_flow_item_ipv6 *spec = item->spec; 580 const struct rte_flow_item_ipv6 *mask = item->mask; 581 struct rte_flow *flow = info->flow; 582 uint8_t empty_addr[16] = { 0 }; 583 struct tap_nlmsg *msg; 584 585 /* use default mask if none provided */ 586 if (!mask) 587 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_IPV6].default_mask; 588 /* check that previous eth type is compatible with ipv6 */ 589 if (info->eth_type && info->eth_type != htons(ETH_P_IPV6)) 590 return -1; 591 /* store ip_proto for consistency if udp/tcp pattern item comes next */ 592 if (spec) 593 info->ip_proto = spec->hdr.proto; 594 if (!flow) 595 return 0; 596 msg = &flow->msg; 597 if (!info->eth_type) 598 info->eth_type = htons(ETH_P_IPV6); 599 if (!spec) 600 return 0; 601 if (memcmp(&mask->hdr.dst_addr, empty_addr, 16)) { 602 tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST, 603 sizeof(spec->hdr.dst_addr), &spec->hdr.dst_addr); 604 tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST_MASK, 605 sizeof(mask->hdr.dst_addr), &mask->hdr.dst_addr); 606 } 607 if (memcmp(&mask->hdr.src_addr, empty_addr, 16)) { 608 tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC, 609 sizeof(spec->hdr.src_addr), &spec->hdr.src_addr); 610 tap_nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC_MASK, 611 sizeof(mask->hdr.src_addr), &mask->hdr.src_addr); 612 } 613 if (spec->hdr.proto) 614 tap_nlattr_add8(&msg->nh, 615 TCA_FLOWER_KEY_IP_PROTO, spec->hdr.proto); 616 return 0; 617 } 618 619 /** 620 * Make as much checks as possible on a UDP item, and if a flow is provided, 621 * fill it appropriately with UDP info. 622 * 623 * @param[in] item 624 * Item specification. 625 * @param[in, out] data 626 * Additional data structure to tell next layers we've been here. 627 * 628 * @return 629 * 0 if checks are alright, -1 otherwise. 630 */ 631 static int 632 tap_flow_create_udp(const struct rte_flow_item *item, struct convert_data *info) 633 { 634 const struct rte_flow_item_udp *spec = item->spec; 635 const struct rte_flow_item_udp *mask = item->mask; 636 struct rte_flow *flow = info->flow; 637 struct tap_nlmsg *msg; 638 639 /* use default mask if none provided */ 640 if (!mask) 641 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_UDP].default_mask; 642 /* check that previous ip_proto is compatible with udp */ 643 if (info->ip_proto && info->ip_proto != IPPROTO_UDP) 644 return -1; 645 /* TC does not support UDP port masking. Only accept if exact match. */ 646 if ((mask->hdr.src_port && mask->hdr.src_port != 0xffff) || 647 (mask->hdr.dst_port && mask->hdr.dst_port != 0xffff)) 648 return -1; 649 if (!flow) 650 return 0; 651 msg = &flow->msg; 652 tap_nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_UDP); 653 if (!spec) 654 return 0; 655 if (mask->hdr.dst_port) 656 tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_DST, 657 spec->hdr.dst_port); 658 if (mask->hdr.src_port) 659 tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_SRC, 660 spec->hdr.src_port); 661 return 0; 662 } 663 664 /** 665 * Make as much checks as possible on a TCP item, and if a flow is provided, 666 * fill it appropriately with TCP info. 667 * 668 * @param[in] item 669 * Item specification. 670 * @param[in, out] data 671 * Additional data structure to tell next layers we've been here. 672 * 673 * @return 674 * 0 if checks are alright, -1 otherwise. 675 */ 676 static int 677 tap_flow_create_tcp(const struct rte_flow_item *item, struct convert_data *info) 678 { 679 const struct rte_flow_item_tcp *spec = item->spec; 680 const struct rte_flow_item_tcp *mask = item->mask; 681 struct rte_flow *flow = info->flow; 682 struct tap_nlmsg *msg; 683 684 /* use default mask if none provided */ 685 if (!mask) 686 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_TCP].default_mask; 687 /* check that previous ip_proto is compatible with tcp */ 688 if (info->ip_proto && info->ip_proto != IPPROTO_TCP) 689 return -1; 690 /* TC does not support TCP port masking. Only accept if exact match. */ 691 if ((mask->hdr.src_port && mask->hdr.src_port != 0xffff) || 692 (mask->hdr.dst_port && mask->hdr.dst_port != 0xffff)) 693 return -1; 694 if (!flow) 695 return 0; 696 msg = &flow->msg; 697 tap_nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_TCP); 698 if (!spec) 699 return 0; 700 if (mask->hdr.dst_port) 701 tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_DST, 702 spec->hdr.dst_port); 703 if (mask->hdr.src_port) 704 tap_nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_SRC, 705 spec->hdr.src_port); 706 return 0; 707 } 708 709 /** 710 * Check support for a given item. 711 * 712 * @param[in] item 713 * Item specification. 714 * @param size 715 * Bit-Mask size in bytes. 716 * @param[in] supported_mask 717 * Bit-mask covering supported fields to compare with spec, last and mask in 718 * \item. 719 * @param[in] default_mask 720 * Bit-mask default mask if none is provided in \item. 721 * 722 * @return 723 * 0 on success. 724 */ 725 static int 726 tap_flow_item_validate(const struct rte_flow_item *item, 727 unsigned int size, 728 const uint8_t *supported_mask, 729 const uint8_t *default_mask) 730 { 731 int ret = 0; 732 733 /* An empty layer is allowed, as long as all fields are NULL */ 734 if (!item->spec && (item->mask || item->last)) 735 return -1; 736 /* Is the item spec compatible with what the NIC supports? */ 737 if (item->spec && !item->mask) { 738 unsigned int i; 739 const uint8_t *spec = item->spec; 740 741 for (i = 0; i < size; ++i) 742 if ((spec[i] | supported_mask[i]) != supported_mask[i]) 743 return -1; 744 /* Is the default mask compatible with what the NIC supports? */ 745 for (i = 0; i < size; i++) 746 if ((default_mask[i] | supported_mask[i]) != 747 supported_mask[i]) 748 return -1; 749 } 750 /* Is the item last compatible with what the NIC supports? */ 751 if (item->last && !item->mask) { 752 unsigned int i; 753 const uint8_t *spec = item->last; 754 755 for (i = 0; i < size; ++i) 756 if ((spec[i] | supported_mask[i]) != supported_mask[i]) 757 return -1; 758 } 759 /* Is the item mask compatible with what the NIC supports? */ 760 if (item->mask) { 761 unsigned int i; 762 const uint8_t *spec = item->mask; 763 764 for (i = 0; i < size; ++i) 765 if ((spec[i] | supported_mask[i]) != supported_mask[i]) 766 return -1; 767 } 768 /** 769 * Once masked, Are item spec and item last equal? 770 * TC does not support range so anything else is invalid. 771 */ 772 if (item->spec && item->last) { 773 uint8_t spec[size]; 774 uint8_t last[size]; 775 const uint8_t *apply = default_mask; 776 unsigned int i; 777 778 if (item->mask) 779 apply = item->mask; 780 for (i = 0; i < size; ++i) { 781 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i]; 782 last[i] = ((const uint8_t *)item->last)[i] & apply[i]; 783 } 784 ret = memcmp(spec, last, size); 785 } 786 return ret; 787 } 788 789 /** 790 * Configure the kernel with a TC action and its configured parameters 791 * Handled actions: "gact", "mirred", "skbedit", "bpf" 792 * 793 * @param[in] flow 794 * Pointer to rte flow containing the netlink message 795 * 796 * @param[in, out] act_index 797 * Pointer to action sequence number in the TC command 798 * 799 * @param[in] adata 800 * Pointer to struct holding the action parameters 801 * 802 * @return 803 * -1 on failure, 0 on success 804 */ 805 static int 806 add_action(struct rte_flow *flow, size_t *act_index, struct action_data *adata) 807 { 808 struct tap_nlmsg *msg = &flow->msg; 809 810 if (tap_nlattr_nested_start(msg, (*act_index)++) < 0) 811 return -1; 812 813 tap_nlattr_add(&msg->nh, TCA_ACT_KIND, 814 strlen(adata->id) + 1, adata->id); 815 if (tap_nlattr_nested_start(msg, TCA_ACT_OPTIONS) < 0) 816 return -1; 817 if (strcmp("gact", adata->id) == 0) { 818 tap_nlattr_add(&msg->nh, TCA_GACT_PARMS, sizeof(adata->gact), 819 &adata->gact); 820 } else if (strcmp("mirred", adata->id) == 0) { 821 if (adata->mirred.eaction == TCA_EGRESS_MIRROR) 822 adata->mirred.action = TC_ACT_PIPE; 823 else /* REDIRECT */ 824 adata->mirred.action = TC_ACT_STOLEN; 825 tap_nlattr_add(&msg->nh, TCA_MIRRED_PARMS, 826 sizeof(adata->mirred), 827 &adata->mirred); 828 } else if (strcmp("skbedit", adata->id) == 0) { 829 tap_nlattr_add(&msg->nh, TCA_SKBEDIT_PARMS, 830 sizeof(adata->skbedit.skbedit), &adata->skbedit.skbedit); 831 if (adata->skbedit.mark) 832 tap_nlattr_add32(&msg->nh, TCA_SKBEDIT_MARK, adata->skbedit.mark); 833 else 834 tap_nlattr_add16(&msg->nh, TCA_SKBEDIT_QUEUE_MAPPING, adata->skbedit.queue); 835 } else if (strcmp("bpf", adata->id) == 0) { 836 #ifdef HAVE_BPF_RSS 837 tap_nlattr_add32(&msg->nh, TCA_ACT_BPF_FD, adata->bpf.bpf_fd); 838 tap_nlattr_add(&msg->nh, TCA_ACT_BPF_NAME, 839 strlen(adata->bpf.annotation) + 1, 840 adata->bpf.annotation); 841 tap_nlattr_add(&msg->nh, TCA_ACT_BPF_PARMS, 842 sizeof(adata->bpf.bpf), 843 &adata->bpf.bpf); 844 #else 845 TAP_LOG(ERR, "Internal error: bpf requested but not supported"); 846 return -1; 847 #endif 848 } else { 849 TAP_LOG(ERR, "Internal error: unknown action: %s", adata->id); 850 return -1; 851 } 852 tap_nlattr_nested_finish(msg); /* nested TCA_ACT_OPTIONS */ 853 tap_nlattr_nested_finish(msg); /* nested act_index */ 854 return 0; 855 } 856 857 /** 858 * Helper function to send a series of TC actions to the kernel 859 * 860 * @param[in] flow 861 * Pointer to rte flow containing the netlink message 862 * 863 * @param[in] nb_actions 864 * Number of actions in an array of action structs 865 * 866 * @param[in] data 867 * Pointer to an array of action structs 868 * 869 * @param[in] classifier_actions 870 * The classifier on behave of which the actions are configured 871 * 872 * @return 873 * -1 on failure, 0 on success 874 */ 875 static int 876 add_actions(struct rte_flow *flow, int nb_actions, struct action_data *data, 877 int classifier_action) 878 { 879 struct tap_nlmsg *msg = &flow->msg; 880 size_t act_index = 1; 881 int i; 882 883 if (tap_nlattr_nested_start(msg, classifier_action) < 0) 884 return -1; 885 for (i = 0; i < nb_actions; i++) 886 if (add_action(flow, &act_index, data + i) < 0) 887 return -1; 888 tap_nlattr_nested_finish(msg); /* nested TCA_FLOWER_ACT */ 889 return 0; 890 } 891 892 /** 893 * Validate a flow supported by TC. 894 * If flow param is not NULL, then also fill the netlink message inside. 895 * 896 * @param pmd 897 * Pointer to private structure. 898 * @param[in] attr 899 * Flow rule attributes. 900 * @param[in] pattern 901 * Pattern specification (list terminated by the END pattern item). 902 * @param[in] actions 903 * Associated actions (list terminated by the END action). 904 * @param[out] error 905 * Perform verbose error reporting if not NULL. 906 * @param[in, out] flow 907 * Flow structure to update. 908 * @param[in] mirred 909 * If set to TCA_EGRESS_REDIR, provided actions will be replaced with a 910 * redirection to the tap netdevice, and the TC rule will be configured 911 * on the remote netdevice in pmd. 912 * If set to TCA_EGRESS_MIRROR, provided actions will be replaced with a 913 * mirroring to the tap netdevice, and the TC rule will be configured 914 * on the remote netdevice in pmd. Matching packets will thus be duplicated. 915 * If set to 0, the standard behavior is to be used: set correct actions for 916 * the TC rule, and apply it on the tap netdevice. 917 * 918 * @return 919 * 0 on success, a negative errno value otherwise and rte_errno is set. 920 */ 921 static int 922 priv_flow_process(struct pmd_internals *pmd, 923 const struct rte_flow_attr *attr, 924 const struct rte_flow_item items[], 925 const struct rte_flow_action actions[], 926 struct rte_flow_error *error, 927 struct rte_flow *flow, 928 int mirred) 929 { 930 const struct tap_flow_items *cur_item = tap_flow_items; 931 struct convert_data data = { 932 .eth_type = 0, 933 .ip_proto = 0, 934 .flow = flow, 935 }; 936 int action = 0; /* Only one action authorized for now */ 937 938 if (attr->transfer) { 939 rte_flow_error_set( 940 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, 941 NULL, "transfer is not supported"); 942 return -rte_errno; 943 } 944 if (attr->group > MAX_GROUP) { 945 rte_flow_error_set( 946 error, EINVAL, RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 947 NULL, "group value too big: cannot exceed 15"); 948 return -rte_errno; 949 } 950 if (attr->priority > MAX_PRIORITY) { 951 rte_flow_error_set( 952 error, EINVAL, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 953 NULL, "priority value too big"); 954 return -rte_errno; 955 } else if (flow) { 956 uint16_t group = attr->group << GROUP_SHIFT; 957 uint16_t prio = group | (attr->priority + 958 RSS_PRIORITY_OFFSET + PRIORITY_OFFSET); 959 flow->msg.t.tcm_info = TC_H_MAKE(prio << 16, 960 flow->msg.t.tcm_info); 961 } 962 if (flow) { 963 if (mirred) { 964 /* 965 * If attr->ingress, the rule applies on remote ingress 966 * to match incoming packets 967 * If attr->egress, the rule applies on tap ingress (as 968 * seen from the kernel) to deal with packets going out 969 * from the DPDK app. 970 */ 971 flow->msg.t.tcm_parent = TC_H_MAKE(TC_H_INGRESS, 0); 972 } else { 973 /* Standard rule on tap egress (kernel standpoint). */ 974 flow->msg.t.tcm_parent = 975 TC_H_MAKE(MULTIQ_MAJOR_HANDLE, 0); 976 } 977 /* use flower filter type */ 978 tap_nlattr_add(&flow->msg.nh, TCA_KIND, sizeof("flower"), "flower"); 979 if (tap_nlattr_nested_start(&flow->msg, TCA_OPTIONS) < 0) { 980 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_ACTION, 981 actions, "could not allocated netlink msg"); 982 goto exit_return_error; 983 } 984 } 985 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) { 986 const struct tap_flow_items *token = NULL; 987 unsigned int i; 988 int err = 0; 989 990 if (items->type == RTE_FLOW_ITEM_TYPE_VOID) 991 continue; 992 for (i = 0; 993 cur_item->items && 994 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END; 995 ++i) { 996 if (cur_item->items[i] == items->type) { 997 token = &tap_flow_items[items->type]; 998 break; 999 } 1000 } 1001 if (!token) 1002 goto exit_item_not_supported; 1003 cur_item = token; 1004 err = tap_flow_item_validate( 1005 items, cur_item->mask_sz, 1006 (const uint8_t *)cur_item->mask, 1007 (const uint8_t *)cur_item->default_mask); 1008 if (err) 1009 goto exit_item_not_supported; 1010 if (flow && cur_item->convert) { 1011 err = cur_item->convert(items, &data); 1012 if (err) 1013 goto exit_item_not_supported; 1014 } 1015 } 1016 if (flow) { 1017 if (data.vlan) { 1018 tap_nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE, 1019 htons(ETH_P_8021Q)); 1020 tap_nlattr_add16(&flow->msg.nh, 1021 TCA_FLOWER_KEY_VLAN_ETH_TYPE, 1022 data.eth_type ? 1023 data.eth_type : htons(ETH_P_ALL)); 1024 } else if (data.eth_type) { 1025 tap_nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE, 1026 data.eth_type); 1027 } 1028 } 1029 if (mirred && flow) { 1030 struct action_data adata = { 1031 .id = "mirred", 1032 .mirred = { 1033 .eaction = mirred, 1034 }, 1035 }; 1036 1037 /* 1038 * If attr->egress && mirred, then this is a special 1039 * case where the rule must be applied on the tap, to 1040 * redirect packets coming from the DPDK App, out 1041 * through the remote netdevice. 1042 */ 1043 adata.mirred.ifindex = attr->ingress ? pmd->if_index : 1044 pmd->remote_if_index; 1045 if (mirred == TCA_EGRESS_MIRROR) 1046 adata.mirred.action = TC_ACT_PIPE; 1047 else 1048 adata.mirred.action = TC_ACT_STOLEN; 1049 if (add_actions(flow, 1, &adata, TCA_FLOWER_ACT) < 0) 1050 goto exit_action_not_supported; 1051 else 1052 goto end; 1053 } 1054 actions: 1055 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) { 1056 int err = 0; 1057 1058 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) { 1059 continue; 1060 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) { 1061 if (action) 1062 goto exit_action_not_supported; 1063 action = 1; 1064 if (flow) { 1065 struct action_data adata = { 1066 .id = "gact", 1067 .gact = { 1068 .action = TC_ACT_SHOT, 1069 }, 1070 }; 1071 1072 err = add_actions(flow, 1, &adata, 1073 TCA_FLOWER_ACT); 1074 } 1075 } else if (actions->type == RTE_FLOW_ACTION_TYPE_PASSTHRU) { 1076 if (action) 1077 goto exit_action_not_supported; 1078 action = 1; 1079 if (flow) { 1080 struct action_data adata = { 1081 .id = "gact", 1082 .gact = { 1083 /* continue */ 1084 .action = TC_ACT_UNSPEC, 1085 }, 1086 }; 1087 1088 err = add_actions(flow, 1, &adata, TCA_FLOWER_ACT); 1089 } 1090 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) { 1091 const struct rte_flow_action_queue *queue = 1092 (const struct rte_flow_action_queue *) 1093 actions->conf; 1094 1095 if (action) 1096 goto exit_action_not_supported; 1097 action = 1; 1098 if (queue->index >= pmd->dev->data->nb_rx_queues) { 1099 rte_flow_error_set(error, ERANGE, 1100 RTE_FLOW_ERROR_TYPE_ACTION, actions, 1101 "queue index out of range"); 1102 goto exit_return_error; 1103 } 1104 if (flow) { 1105 struct action_data adata = { 1106 .id = "skbedit", 1107 .skbedit = { 1108 .skbedit = { 1109 .action = TC_ACT_PIPE, 1110 }, 1111 .queue = queue->index, 1112 }, 1113 }; 1114 1115 err = add_actions(flow, 1, &adata, 1116 TCA_FLOWER_ACT); 1117 } 1118 #ifdef HAVE_BPF_RSS 1119 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) { 1120 const struct rte_flow_action_rss *rss = 1121 (const struct rte_flow_action_rss *) 1122 actions->conf; 1123 1124 if (action++) 1125 goto exit_action_not_supported; 1126 1127 if (pmd->rss == NULL) { 1128 err = rss_enable(pmd, error); 1129 if (err) 1130 goto exit_return_error; 1131 } 1132 if (flow) 1133 err = rss_add_actions(flow, pmd, rss, error); 1134 #endif 1135 } else { 1136 goto exit_action_not_supported; 1137 } 1138 if (err) 1139 goto exit_return_error; 1140 } 1141 /* When fate is unknown, drop traffic. */ 1142 if (!action) { 1143 static const struct rte_flow_action drop[] = { 1144 { .type = RTE_FLOW_ACTION_TYPE_DROP, }, 1145 { .type = RTE_FLOW_ACTION_TYPE_END, }, 1146 }; 1147 1148 actions = drop; 1149 goto actions; 1150 } 1151 end: 1152 if (flow) 1153 tap_nlattr_nested_finish(&flow->msg); /* nested TCA_OPTIONS */ 1154 return 0; 1155 exit_item_not_supported: 1156 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, 1157 items, "item not supported"); 1158 return -rte_errno; 1159 exit_action_not_supported: 1160 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, 1161 actions, "action not supported"); 1162 exit_return_error: 1163 return -rte_errno; 1164 } 1165 1166 1167 1168 /** 1169 * Validate a flow. 1170 * 1171 * @see rte_flow_validate() 1172 * @see rte_flow_ops 1173 */ 1174 static int 1175 tap_flow_validate(struct rte_eth_dev *dev, 1176 const struct rte_flow_attr *attr, 1177 const struct rte_flow_item items[], 1178 const struct rte_flow_action actions[], 1179 struct rte_flow_error *error) 1180 { 1181 struct pmd_internals *pmd = dev->data->dev_private; 1182 1183 return priv_flow_process(pmd, attr, items, actions, error, NULL, 0); 1184 } 1185 1186 /** 1187 * Set a unique handle in a flow. 1188 * 1189 * The kernel supports TC rules with equal priority, as long as they use the 1190 * same matching fields (e.g.: dst mac and ipv4) with different values (and 1191 * full mask to ensure no collision is possible). 1192 * In those rules, the handle (uint32_t) is the part that would identify 1193 * specifically each rule. 1194 * 1195 * Use jhash of the flow pointer to make a unique handle. 1196 * 1197 * @param[in, out] flow 1198 * The flow that needs its handle set. 1199 */ 1200 static void 1201 tap_flow_set_handle(struct rte_flow *flow) 1202 { 1203 union { 1204 struct rte_flow *flow; 1205 uint32_t words[sizeof(flow) / sizeof(uint32_t)]; 1206 } tmp = { 1207 .flow = flow, 1208 }; 1209 uint32_t handle; 1210 static uint64_t hash_seed; 1211 1212 if (hash_seed == 0) 1213 hash_seed = rte_rand(); 1214 1215 handle = rte_jhash_32b(tmp.words, sizeof(flow) / sizeof(uint32_t), hash_seed); 1216 1217 /* must be at least 1 to avoid letting the kernel choose one for us */ 1218 if (!handle) 1219 handle = 1; 1220 flow->msg.t.tcm_handle = handle; 1221 } 1222 1223 /** 1224 * Free the flow opened file descriptors and allocated memory 1225 * 1226 * @param[in] flow 1227 * Pointer to the flow to free 1228 * 1229 */ 1230 static void 1231 tap_flow_free(struct pmd_internals *pmd __rte_unused, struct rte_flow *flow) 1232 { 1233 if (!flow) 1234 return; 1235 1236 #ifdef HAVE_BPF_RSS 1237 struct tap_rss *rss = pmd->rss; 1238 if (rss) 1239 bpf_map__delete_elem(rss->maps.rss_map, 1240 &flow->msg.t.tcm_handle, sizeof(uint32_t), 0); 1241 #endif 1242 /* Free flow allocated memory */ 1243 rte_free(flow); 1244 } 1245 1246 /** 1247 * Create a flow. 1248 * 1249 * @see rte_flow_create() 1250 * @see rte_flow_ops 1251 */ 1252 static struct rte_flow * 1253 tap_flow_create(struct rte_eth_dev *dev, 1254 const struct rte_flow_attr *attr, 1255 const struct rte_flow_item items[], 1256 const struct rte_flow_action actions[], 1257 struct rte_flow_error *error) 1258 { 1259 struct pmd_internals *pmd = dev->data->dev_private; 1260 struct rte_flow *remote_flow = NULL; 1261 struct rte_flow *flow = NULL; 1262 struct tap_nlmsg *msg = NULL; 1263 int err; 1264 1265 if (!pmd->if_index) { 1266 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, 1267 NULL, 1268 "can't create rule, ifindex not found"); 1269 goto fail; 1270 } 1271 /* 1272 * No rules configured through standard rte_flow should be set on the 1273 * priorities used by implicit rules. 1274 */ 1275 if ((attr->group == MAX_GROUP) && 1276 attr->priority > (MAX_PRIORITY - TAP_REMOTE_MAX_IDX)) { 1277 rte_flow_error_set( 1278 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 1279 NULL, "priority value too big"); 1280 goto fail; 1281 } 1282 flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0); 1283 if (!flow) { 1284 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 1285 NULL, "cannot allocate memory for rte_flow"); 1286 goto fail; 1287 } 1288 msg = &flow->msg; 1289 tc_init_msg(msg, pmd->if_index, RTM_NEWTFILTER, 1290 NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE); 1291 msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL)); 1292 tap_flow_set_handle(flow); 1293 if (priv_flow_process(pmd, attr, items, actions, error, flow, 0)) 1294 goto fail; 1295 err = tap_nl_send(pmd->nlsk_fd, &msg->nh); 1296 if (err < 0) { 1297 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, 1298 NULL, "couldn't send request to kernel"); 1299 goto fail; 1300 } 1301 err = tap_nl_recv_ack(pmd->nlsk_fd); 1302 if (err < 0) { 1303 TAP_LOG(ERR, 1304 "Kernel refused TC filter rule creation (%d): %s", 1305 errno, strerror(errno)); 1306 rte_flow_error_set(error, EEXIST, RTE_FLOW_ERROR_TYPE_HANDLE, 1307 NULL, 1308 "overlapping rules or Kernel too old for flower support"); 1309 goto fail; 1310 } 1311 LIST_INSERT_HEAD(&pmd->flows, flow, next); 1312 /** 1313 * If a remote device is configured, a TC rule with identical items for 1314 * matching must be set on that device, with a single action: redirect 1315 * to the local pmd->if_index. 1316 */ 1317 if (pmd->remote_if_index) { 1318 remote_flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0); 1319 if (!remote_flow) { 1320 rte_flow_error_set( 1321 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, 1322 "cannot allocate memory for rte_flow"); 1323 goto fail; 1324 } 1325 msg = &remote_flow->msg; 1326 /* set the rule if_index for the remote netdevice */ 1327 tc_init_msg( 1328 msg, pmd->remote_if_index, RTM_NEWTFILTER, 1329 NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE); 1330 msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL)); 1331 tap_flow_set_handle(remote_flow); 1332 if (priv_flow_process(pmd, attr, items, NULL, 1333 error, remote_flow, TCA_EGRESS_REDIR)) { 1334 rte_flow_error_set( 1335 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 1336 NULL, "rte flow rule validation failed"); 1337 goto fail; 1338 } 1339 err = tap_nl_send(pmd->nlsk_fd, &msg->nh); 1340 if (err < 0) { 1341 rte_flow_error_set( 1342 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 1343 NULL, "Failure sending nl request"); 1344 goto fail; 1345 } 1346 err = tap_nl_recv_ack(pmd->nlsk_fd); 1347 if (err < 0) { 1348 TAP_LOG(ERR, 1349 "Kernel refused TC filter rule creation (%d): %s", 1350 errno, strerror(errno)); 1351 rte_flow_error_set( 1352 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 1353 NULL, 1354 "overlapping rules or Kernel too old for flower support"); 1355 goto fail; 1356 } 1357 flow->remote_flow = remote_flow; 1358 } 1359 return flow; 1360 fail: 1361 rte_free(remote_flow); 1362 if (flow) 1363 tap_flow_free(pmd, flow); 1364 return NULL; 1365 } 1366 1367 /** 1368 * Destroy a flow using pointer to pmd_internal. 1369 * 1370 * @param[in, out] pmd 1371 * Pointer to private structure. 1372 * @param[in] flow 1373 * Pointer to the flow to destroy. 1374 * @param[in, out] error 1375 * Pointer to the flow error handler 1376 * 1377 * @return 0 if the flow could be destroyed, -1 otherwise. 1378 */ 1379 static int 1380 tap_flow_destroy_pmd(struct pmd_internals *pmd, 1381 struct rte_flow *flow, 1382 struct rte_flow_error *error) 1383 { 1384 struct rte_flow *remote_flow = flow->remote_flow; 1385 int ret = 0; 1386 1387 LIST_REMOVE(flow, next); 1388 flow->msg.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1389 flow->msg.nh.nlmsg_type = RTM_DELTFILTER; 1390 1391 ret = tap_nl_send(pmd->nlsk_fd, &flow->msg.nh); 1392 if (ret < 0) { 1393 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, 1394 NULL, "couldn't send request to kernel"); 1395 goto end; 1396 } 1397 ret = tap_nl_recv_ack(pmd->nlsk_fd); 1398 /* If errno is ENOENT, the rule is already no longer in the kernel. */ 1399 if (ret < 0 && errno == ENOENT) 1400 ret = 0; 1401 if (ret < 0) { 1402 TAP_LOG(ERR, 1403 "Kernel refused TC filter rule deletion (%d): %s", 1404 errno, strerror(errno)); 1405 rte_flow_error_set( 1406 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, 1407 "couldn't receive kernel ack to our request"); 1408 goto end; 1409 } 1410 1411 if (remote_flow) { 1412 remote_flow->msg.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1413 remote_flow->msg.nh.nlmsg_type = RTM_DELTFILTER; 1414 1415 ret = tap_nl_send(pmd->nlsk_fd, &remote_flow->msg.nh); 1416 if (ret < 0) { 1417 rte_flow_error_set( 1418 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 1419 NULL, "Failure sending nl request"); 1420 goto end; 1421 } 1422 ret = tap_nl_recv_ack(pmd->nlsk_fd); 1423 if (ret < 0 && errno == ENOENT) 1424 ret = 0; 1425 if (ret < 0) { 1426 TAP_LOG(ERR, 1427 "Kernel refused TC filter rule deletion (%d): %s", 1428 errno, strerror(errno)); 1429 rte_flow_error_set( 1430 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 1431 NULL, "Failure trying to receive nl ack"); 1432 goto end; 1433 } 1434 } 1435 end: 1436 rte_free(remote_flow); 1437 tap_flow_free(pmd, flow); 1438 return ret; 1439 } 1440 1441 /** 1442 * Destroy a flow. 1443 * 1444 * @see rte_flow_destroy() 1445 * @see rte_flow_ops 1446 */ 1447 static int 1448 tap_flow_destroy(struct rte_eth_dev *dev, 1449 struct rte_flow *flow, 1450 struct rte_flow_error *error) 1451 { 1452 struct pmd_internals *pmd = dev->data->dev_private; 1453 1454 return tap_flow_destroy_pmd(pmd, flow, error); 1455 } 1456 1457 /** 1458 * Enable/disable flow isolation. 1459 * 1460 * @see rte_flow_isolate() 1461 * @see rte_flow_ops 1462 */ 1463 static int 1464 tap_flow_isolate(struct rte_eth_dev *dev, 1465 int set, 1466 struct rte_flow_error *error __rte_unused) 1467 { 1468 struct pmd_internals *pmd = dev->data->dev_private; 1469 struct pmd_process_private *process_private = dev->process_private; 1470 1471 /* normalize 'set' variable to contain 0 or 1 values */ 1472 if (set) 1473 set = 1; 1474 /* if already in the right isolation mode - nothing to do */ 1475 if ((set ^ pmd->flow_isolate) == 0) 1476 return 0; 1477 /* mark the isolation mode for tap_flow_implicit_create() */ 1478 pmd->flow_isolate = set; 1479 /* 1480 * If netdevice is there, setup appropriate flow rules immediately. 1481 * Otherwise it will be set when bringing up the netdevice (tun_alloc). 1482 */ 1483 if (process_private->fds[0] == -1) 1484 return 0; 1485 1486 if (set) { 1487 struct rte_flow *remote_flow; 1488 1489 while (1) { 1490 remote_flow = LIST_FIRST(&pmd->implicit_flows); 1491 if (!remote_flow) 1492 break; 1493 /* 1494 * Remove all implicit rules on the remote. 1495 * Keep the local rule to redirect packets on TX. 1496 * Keep also the last implicit local rule: ISOLATE. 1497 */ 1498 if (remote_flow->msg.t.tcm_ifindex == pmd->if_index) 1499 break; 1500 if (tap_flow_destroy_pmd(pmd, remote_flow, NULL) < 0) 1501 goto error; 1502 } 1503 /* Switch the TC rule according to pmd->flow_isolate */ 1504 if (tap_flow_implicit_create(pmd, TAP_ISOLATE) == -1) 1505 goto error; 1506 } else { 1507 /* Switch the TC rule according to pmd->flow_isolate */ 1508 if (tap_flow_implicit_create(pmd, TAP_ISOLATE) == -1) 1509 goto error; 1510 if (!pmd->remote_if_index) 1511 return 0; 1512 if (tap_flow_implicit_create(pmd, TAP_REMOTE_TX) < 0) 1513 goto error; 1514 if (tap_flow_implicit_create(pmd, TAP_REMOTE_LOCAL_MAC) < 0) 1515 goto error; 1516 if (tap_flow_implicit_create(pmd, TAP_REMOTE_BROADCAST) < 0) 1517 goto error; 1518 if (tap_flow_implicit_create(pmd, TAP_REMOTE_BROADCASTV6) < 0) 1519 goto error; 1520 if (dev->data->promiscuous && 1521 tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC) < 0) 1522 goto error; 1523 if (dev->data->all_multicast && 1524 tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI) < 0) 1525 goto error; 1526 } 1527 return 0; 1528 error: 1529 pmd->flow_isolate = 0; 1530 return rte_flow_error_set( 1531 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 1532 "TC rule creation failed"); 1533 } 1534 1535 /** 1536 * Destroy all flows. 1537 * 1538 * @see rte_flow_flush() 1539 * @see rte_flow_ops 1540 */ 1541 int 1542 tap_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error) 1543 { 1544 struct pmd_internals *pmd = dev->data->dev_private; 1545 struct rte_flow *flow; 1546 1547 while (!LIST_EMPTY(&pmd->flows)) { 1548 flow = LIST_FIRST(&pmd->flows); 1549 if (tap_flow_destroy(dev, flow, error) < 0) 1550 return -1; 1551 } 1552 return 0; 1553 } 1554 1555 /** 1556 * Add an implicit flow rule on the remote device to make sure traffic gets to 1557 * the tap netdevice from there. 1558 * 1559 * @param pmd 1560 * Pointer to private structure. 1561 * @param[in] idx 1562 * The idx in the implicit_rte_flows array specifying which rule to apply. 1563 * 1564 * @return -1 if the rule couldn't be applied, 0 otherwise. 1565 */ 1566 int tap_flow_implicit_create(struct pmd_internals *pmd, 1567 enum implicit_rule_index idx) 1568 { 1569 uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; 1570 struct rte_flow_action *actions = implicit_rte_flows[idx].actions; 1571 struct rte_flow_action isolate_actions[2] = { 1572 [1] = { 1573 .type = RTE_FLOW_ACTION_TYPE_END, 1574 }, 1575 }; 1576 struct rte_flow_item *items = implicit_rte_flows[idx].items; 1577 struct rte_flow_attr *attr = &implicit_rte_flows[idx].attr; 1578 struct rte_flow_item_eth eth_local = { .hdr.ether_type = 0 }; 1579 unsigned int if_index = pmd->remote_if_index; 1580 struct rte_flow *remote_flow = NULL; 1581 struct tap_nlmsg *msg = NULL; 1582 int err = 0; 1583 struct rte_flow_item items_local[2] = { 1584 [0] = { 1585 .type = items[0].type, 1586 .spec = ð_local, 1587 .mask = items[0].mask, 1588 }, 1589 [1] = { 1590 .type = items[1].type, 1591 } 1592 }; 1593 1594 remote_flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0); 1595 if (!remote_flow) { 1596 TAP_LOG(ERR, "Cannot allocate memory for rte_flow"); 1597 goto fail; 1598 } 1599 msg = &remote_flow->msg; 1600 if (idx == TAP_REMOTE_TX) { 1601 if_index = pmd->if_index; 1602 } else if (idx == TAP_ISOLATE) { 1603 if_index = pmd->if_index; 1604 /* Don't be exclusive for this rule, it can be changed later. */ 1605 flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE; 1606 isolate_actions[0].type = pmd->flow_isolate ? 1607 RTE_FLOW_ACTION_TYPE_DROP : 1608 RTE_FLOW_ACTION_TYPE_PASSTHRU; 1609 actions = isolate_actions; 1610 } else if (idx == TAP_REMOTE_LOCAL_MAC) { 1611 /* 1612 * eth addr couldn't be set in implicit_rte_flows[] as it is not 1613 * known at compile time. 1614 */ 1615 memcpy(ð_local.hdr.dst_addr, &pmd->eth_addr, sizeof(pmd->eth_addr)); 1616 items = items_local; 1617 } 1618 tc_init_msg(msg, if_index, RTM_NEWTFILTER, flags); 1619 msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL)); 1620 /* 1621 * The ISOLATE rule is always present and must have a static handle, as 1622 * the action is changed whether the feature is enabled (DROP) or 1623 * disabled (PASSTHRU). 1624 * There is just one REMOTE_PROMISCUOUS rule in all cases. It should 1625 * have a static handle such that adding it twice will fail with EEXIST 1626 * with any kernel version. Remark: old kernels may falsely accept the 1627 * same REMOTE_PROMISCUOUS rules if they had different handles. 1628 */ 1629 if (idx == TAP_ISOLATE) 1630 remote_flow->msg.t.tcm_handle = ISOLATE_HANDLE; 1631 else if (idx == TAP_REMOTE_PROMISC) 1632 remote_flow->msg.t.tcm_handle = REMOTE_PROMISCUOUS_HANDLE; 1633 else 1634 tap_flow_set_handle(remote_flow); 1635 if (priv_flow_process(pmd, attr, items, actions, NULL, 1636 remote_flow, implicit_rte_flows[idx].mirred)) { 1637 TAP_LOG(ERR, "rte flow rule validation failed"); 1638 goto fail; 1639 } 1640 err = tap_nl_send(pmd->nlsk_fd, &msg->nh); 1641 if (err < 0) { 1642 TAP_LOG(ERR, "Failure sending nl request"); 1643 goto fail; 1644 } 1645 err = tap_nl_recv_ack(pmd->nlsk_fd); 1646 if (err < 0) { 1647 /* Silently ignore re-entering existing rule */ 1648 if (errno == EEXIST) 1649 goto success; 1650 TAP_LOG(ERR, 1651 "Kernel refused TC filter rule creation (%d): %s", 1652 errno, strerror(errno)); 1653 goto fail; 1654 } 1655 LIST_INSERT_HEAD(&pmd->implicit_flows, remote_flow, next); 1656 success: 1657 return 0; 1658 fail: 1659 rte_free(remote_flow); 1660 return -1; 1661 } 1662 1663 /** 1664 * Remove specific implicit flow rule on the remote device. 1665 * 1666 * @param[in, out] pmd 1667 * Pointer to private structure. 1668 * @param[in] idx 1669 * The idx in the implicit_rte_flows array specifying which rule to remove. 1670 * 1671 * @return -1 if one of the implicit rules couldn't be created, 0 otherwise. 1672 */ 1673 int tap_flow_implicit_destroy(struct pmd_internals *pmd, 1674 enum implicit_rule_index idx) 1675 { 1676 struct rte_flow *remote_flow; 1677 int cur_prio = -1; 1678 int idx_prio = implicit_rte_flows[idx].attr.priority + PRIORITY_OFFSET; 1679 1680 for (remote_flow = LIST_FIRST(&pmd->implicit_flows); 1681 remote_flow; 1682 remote_flow = LIST_NEXT(remote_flow, next)) { 1683 cur_prio = (remote_flow->msg.t.tcm_info >> 16) & PRIORITY_MASK; 1684 if (cur_prio != idx_prio) 1685 continue; 1686 return tap_flow_destroy_pmd(pmd, remote_flow, NULL); 1687 } 1688 return 0; 1689 } 1690 1691 /** 1692 * Destroy all implicit flows. 1693 * 1694 * @see rte_flow_flush() 1695 */ 1696 int 1697 tap_flow_implicit_flush(struct pmd_internals *pmd, struct rte_flow_error *error) 1698 { 1699 struct rte_flow *remote_flow; 1700 1701 while (!LIST_EMPTY(&pmd->implicit_flows)) { 1702 remote_flow = LIST_FIRST(&pmd->implicit_flows); 1703 if (tap_flow_destroy_pmd(pmd, remote_flow, error) < 0) 1704 return -1; 1705 } 1706 return 0; 1707 } 1708 1709 /** 1710 * Cleanup when device is closed 1711 */ 1712 void tap_flow_bpf_destroy(struct pmd_internals *pmd __rte_unused) 1713 { 1714 #ifdef HAVE_BPF_RSS 1715 tap_rss__destroy(pmd->rss); 1716 pmd->rss = NULL; 1717 #endif 1718 } 1719 1720 #ifdef HAVE_BPF_RSS 1721 /** 1722 * Enable RSS on tap: create TC rules for queuing. 1723 * 1724 * @param[in, out] pmd 1725 * Pointer to private structure. 1726 * 1727 * @param[in] attr 1728 * Pointer to rte_flow to get flow group 1729 * 1730 * @param[out] error 1731 * Pointer to error reporting if not NULL. 1732 * 1733 * @return 0 on success, negative value on failure. 1734 */ 1735 static int rss_enable(struct pmd_internals *pmd, struct rte_flow_error *error) 1736 { 1737 int err; 1738 1739 /* Load the BPF program (defined in tap_bpf.h from skeleton) */ 1740 pmd->rss = tap_rss__open_and_load(); 1741 if (pmd->rss == NULL) { 1742 TAP_LOG(ERR, "Failed to load BPF object: %s", strerror(errno)); 1743 rte_flow_error_set(error, errno, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, 1744 "BPF object could not be loaded"); 1745 return -errno; 1746 } 1747 1748 /* Attach the maps defined in BPF program */ 1749 err = tap_rss__attach(pmd->rss); 1750 if (err < 0) { 1751 TAP_LOG(ERR, "Failed to attach BPF object: %d", err); 1752 rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, 1753 "BPF object could not be attached"); 1754 tap_flow_bpf_destroy(pmd); 1755 return err; 1756 } 1757 1758 return 0; 1759 } 1760 1761 /* Default RSS hash key also used by mlx devices */ 1762 static const uint8_t rss_hash_default_key[] = { 1763 0x2c, 0xc6, 0x81, 0xd1, 1764 0x5b, 0xdb, 0xf4, 0xf7, 1765 0xfc, 0xa2, 0x83, 0x19, 1766 0xdb, 0x1a, 0x3e, 0x94, 1767 0x6b, 0x9e, 0x38, 0xd9, 1768 0x2c, 0x9c, 0x03, 0xd1, 1769 0xad, 0x99, 0x44, 0xa7, 1770 0xd9, 0x56, 0x3d, 0x59, 1771 0x06, 0x3c, 0x25, 0xf3, 1772 0xfc, 0x1f, 0xdc, 0x2a, 1773 }; 1774 1775 /** 1776 * Add RSS hash calculations and queue selection 1777 * 1778 * @param[in, out] pmd 1779 * Pointer to internal structure. Used to set/get RSS map fd 1780 * 1781 * @param[in] rss 1782 * Pointer to RSS flow actions 1783 * 1784 * @param[out] error 1785 * Pointer to error reporting if not NULL. 1786 * 1787 * @return 0 on success, negative value on failure 1788 */ 1789 static int rss_add_actions(struct rte_flow *flow, struct pmd_internals *pmd, 1790 const struct rte_flow_action_rss *rss, 1791 struct rte_flow_error *error) 1792 { 1793 const struct bpf_program *rss_prog = pmd->rss->progs.rss_flow_action; 1794 struct rss_key rss_entry = { }; 1795 const uint8_t *key_in; 1796 uint32_t hash_type = 0; 1797 uint32_t handle = flow->msg.t.tcm_handle; 1798 unsigned int i; 1799 int err; 1800 1801 /* Check supported RSS features */ 1802 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT) 1803 return rte_flow_error_set 1804 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 1805 "non-default RSS hash functions are not supported"); 1806 if (rss->level) 1807 return rte_flow_error_set 1808 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 1809 "a nonzero RSS encapsulation level is not supported"); 1810 1811 if (rss->queue_num == 0 || rss->queue_num >= TAP_MAX_QUEUES) 1812 return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 1813 "invalid number of queues"); 1814 1815 /* 1816 * Follow the semantics of RSS key (see rte_ethdev.h) 1817 * There are two valid cases: 1818 * 1. key_length of zero, and key must be NULL; 1819 * this uses the default driver key. 1820 * 1821 * 2. key_length is the TAP_RSS_HASH_KEY_SIZE (40 bytes) 1822 * and the key must not be NULL. 1823 * 1824 * Anything else is an error. 1825 */ 1826 if (rss->key_len == 0) { 1827 if (rss->key != NULL) 1828 return rte_flow_error_set(error, ENOTSUP, 1829 RTE_FLOW_ERROR_TYPE_ACTION_CONF, 1830 &rss->key_len, "RSS hash key length 0"); 1831 key_in = rss_hash_default_key; 1832 } else { 1833 if (rss->key_len != TAP_RSS_HASH_KEY_SIZE) 1834 return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 1835 NULL, "RSS hash invalid key length"); 1836 if (rss->key == NULL) 1837 return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 1838 NULL, "RSS hash key is NULL"); 1839 key_in = rss->key; 1840 } 1841 1842 if (rss->types & TAP_RSS_HF_MASK) 1843 return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 1844 NULL, "RSS hash type not supported"); 1845 1846 if (rss->types & (RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV4_TCP)) 1847 hash_type |= RTE_BIT32(HASH_FIELD_IPV4_L3_L4); 1848 else if (rss->types & (RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4)) 1849 hash_type |= RTE_BIT32(HASH_FIELD_IPV4_L3); 1850 1851 if (rss->types & (RTE_ETH_RSS_NONFRAG_IPV6_UDP | RTE_ETH_RSS_NONFRAG_IPV6_TCP)) 1852 hash_type |= RTE_BIT32(HASH_FIELD_IPV6_L3_L4); 1853 else if (rss->types & (RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 | RTE_ETH_RSS_IPV6_EX)) 1854 hash_type |= RTE_BIT32(HASH_FIELD_IPV6_L3); 1855 1856 rss_entry.hash_fields = hash_type; 1857 rte_convert_rss_key((const uint32_t *)key_in, (uint32_t *)rss_entry.key, 1858 TAP_RSS_HASH_KEY_SIZE); 1859 1860 /* Update RSS map entry with queues */ 1861 rss_entry.nb_queues = rss->queue_num; 1862 for (i = 0; i < rss->queue_num; i++) 1863 rss_entry.queues[i] = rss->queue[i]; 1864 1865 1866 /* Add this way for BPF to find entry in map */ 1867 err = bpf_map__update_elem(pmd->rss->maps.rss_map, 1868 &handle, sizeof(handle), 1869 &rss_entry, sizeof(rss_entry), 0); 1870 if (err) { 1871 TAP_LOG(ERR, 1872 "Failed to update BPF map entry %#x (%d): %s", 1873 handle, errno, strerror(errno)); 1874 rte_flow_error_set( 1875 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, 1876 "Kernel too old or not configured " 1877 "to support BPF maps updates"); 1878 1879 return -ENOTSUP; 1880 } 1881 1882 /* Add actions to mark packet then run the RSS BPF program */ 1883 struct action_data adata[] = { 1884 { 1885 .id = "skbedit", 1886 .skbedit = { 1887 .skbedit.action = TC_ACT_PIPE, 1888 .mark = handle, 1889 }, 1890 }, 1891 { 1892 .id = "bpf", 1893 .bpf = { 1894 .bpf.action = TC_ACT_PIPE, 1895 .annotation = "tap_rss", 1896 .bpf_fd = bpf_program__fd(rss_prog), 1897 }, 1898 }, 1899 }; 1900 1901 return add_actions(flow, RTE_DIM(adata), adata, TCA_FLOWER_ACT); 1902 } 1903 #endif 1904 1905 /** 1906 * Get rte_flow operations. 1907 * 1908 * @param dev 1909 * Pointer to Ethernet device structure. 1910 * @param ops 1911 * Pointer to operation-specific structure. 1912 * 1913 * @return 1914 * 0 on success, negative errno value on failure. 1915 */ 1916 int 1917 tap_dev_flow_ops_get(struct rte_eth_dev *dev __rte_unused, 1918 const struct rte_flow_ops **ops) 1919 { 1920 *ops = &tap_flow_ops; 1921 return 0; 1922 } 1923