1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2017 6WIND S.A. 5 * Copyright 2017 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <errno.h> 35 #include <string.h> 36 #include <sys/queue.h> 37 38 #include <rte_byteorder.h> 39 #include <rte_jhash.h> 40 #include <rte_malloc.h> 41 #include <rte_eth_tap.h> 42 #include <tap_flow.h> 43 #include <tap_autoconf.h> 44 #include <tap_tcmsgs.h> 45 46 #ifndef HAVE_TC_FLOWER 47 /* 48 * For kernels < 4.2, this enum is not defined. Runtime checks will be made to 49 * avoid sending TC messages the kernel cannot understand. 50 */ 51 enum { 52 TCA_FLOWER_UNSPEC, 53 TCA_FLOWER_CLASSID, 54 TCA_FLOWER_INDEV, 55 TCA_FLOWER_ACT, 56 TCA_FLOWER_KEY_ETH_DST, /* ETH_ALEN */ 57 TCA_FLOWER_KEY_ETH_DST_MASK, /* ETH_ALEN */ 58 TCA_FLOWER_KEY_ETH_SRC, /* ETH_ALEN */ 59 TCA_FLOWER_KEY_ETH_SRC_MASK, /* ETH_ALEN */ 60 TCA_FLOWER_KEY_ETH_TYPE, /* be16 */ 61 TCA_FLOWER_KEY_IP_PROTO, /* u8 */ 62 TCA_FLOWER_KEY_IPV4_SRC, /* be32 */ 63 TCA_FLOWER_KEY_IPV4_SRC_MASK, /* be32 */ 64 TCA_FLOWER_KEY_IPV4_DST, /* be32 */ 65 TCA_FLOWER_KEY_IPV4_DST_MASK, /* be32 */ 66 TCA_FLOWER_KEY_IPV6_SRC, /* struct in6_addr */ 67 TCA_FLOWER_KEY_IPV6_SRC_MASK, /* struct in6_addr */ 68 TCA_FLOWER_KEY_IPV6_DST, /* struct in6_addr */ 69 TCA_FLOWER_KEY_IPV6_DST_MASK, /* struct in6_addr */ 70 TCA_FLOWER_KEY_TCP_SRC, /* be16 */ 71 TCA_FLOWER_KEY_TCP_DST, /* be16 */ 72 TCA_FLOWER_KEY_UDP_SRC, /* be16 */ 73 TCA_FLOWER_KEY_UDP_DST, /* be16 */ 74 }; 75 #endif 76 #ifndef HAVE_TC_VLAN_ID 77 enum { 78 /* TCA_FLOWER_FLAGS, */ 79 TCA_FLOWER_KEY_VLAN_ID = TCA_FLOWER_KEY_UDP_DST + 2, /* be16 */ 80 TCA_FLOWER_KEY_VLAN_PRIO, /* u8 */ 81 TCA_FLOWER_KEY_VLAN_ETH_TYPE, /* be16 */ 82 }; 83 #endif 84 85 struct rte_flow { 86 LIST_ENTRY(rte_flow) next; /* Pointer to the next rte_flow structure */ 87 struct rte_flow *remote_flow; /* associated remote flow */ 88 struct nlmsg msg; 89 }; 90 91 struct convert_data { 92 uint16_t eth_type; 93 uint16_t ip_proto; 94 uint8_t vlan; 95 struct rte_flow *flow; 96 }; 97 98 struct remote_rule { 99 struct rte_flow_attr attr; 100 struct rte_flow_item items[2]; 101 int mirred; 102 }; 103 104 static int tap_flow_create_eth(const struct rte_flow_item *item, void *data); 105 static int tap_flow_create_vlan(const struct rte_flow_item *item, void *data); 106 static int tap_flow_create_ipv4(const struct rte_flow_item *item, void *data); 107 static int tap_flow_create_ipv6(const struct rte_flow_item *item, void *data); 108 static int tap_flow_create_udp(const struct rte_flow_item *item, void *data); 109 static int tap_flow_create_tcp(const struct rte_flow_item *item, void *data); 110 static int 111 tap_flow_validate(struct rte_eth_dev *dev, 112 const struct rte_flow_attr *attr, 113 const struct rte_flow_item items[], 114 const struct rte_flow_action actions[], 115 struct rte_flow_error *error); 116 117 static struct rte_flow * 118 tap_flow_create(struct rte_eth_dev *dev, 119 const struct rte_flow_attr *attr, 120 const struct rte_flow_item items[], 121 const struct rte_flow_action actions[], 122 struct rte_flow_error *error); 123 124 static int 125 tap_flow_destroy(struct rte_eth_dev *dev, 126 struct rte_flow *flow, 127 struct rte_flow_error *error); 128 129 static const struct rte_flow_ops tap_flow_ops = { 130 .validate = tap_flow_validate, 131 .create = tap_flow_create, 132 .destroy = tap_flow_destroy, 133 .flush = tap_flow_flush, 134 }; 135 136 /* Static initializer for items. */ 137 #define ITEMS(...) \ 138 (const enum rte_flow_item_type []){ \ 139 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \ 140 } 141 142 /* Structure to generate a simple graph of layers supported by the NIC. */ 143 struct tap_flow_items { 144 /* Bit-mask corresponding to what is supported for this item. */ 145 const void *mask; 146 const unsigned int mask_sz; /* Bit-mask size in bytes. */ 147 /* 148 * Bit-mask corresponding to the default mask, if none is provided 149 * along with the item. 150 */ 151 const void *default_mask; 152 /** 153 * Conversion function from rte_flow to netlink attributes. 154 * 155 * @param item 156 * rte_flow item to convert. 157 * @param data 158 * Internal structure to store the conversion. 159 * 160 * @return 161 * 0 on success, negative value otherwise. 162 */ 163 int (*convert)(const struct rte_flow_item *item, void *data); 164 /** List of possible following items. */ 165 const enum rte_flow_item_type *const items; 166 }; 167 168 /* Graph of supported items and associated actions. */ 169 static const struct tap_flow_items tap_flow_items[] = { 170 [RTE_FLOW_ITEM_TYPE_END] = { 171 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH), 172 }, 173 [RTE_FLOW_ITEM_TYPE_ETH] = { 174 .items = ITEMS( 175 RTE_FLOW_ITEM_TYPE_VLAN, 176 RTE_FLOW_ITEM_TYPE_IPV4, 177 RTE_FLOW_ITEM_TYPE_IPV6), 178 .mask = &(const struct rte_flow_item_eth){ 179 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 180 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", 181 .type = -1, 182 }, 183 .mask_sz = sizeof(struct rte_flow_item_eth), 184 .default_mask = &rte_flow_item_eth_mask, 185 .convert = tap_flow_create_eth, 186 }, 187 [RTE_FLOW_ITEM_TYPE_VLAN] = { 188 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4, 189 RTE_FLOW_ITEM_TYPE_IPV6), 190 .mask = &(const struct rte_flow_item_vlan){ 191 .tpid = -1, 192 /* DEI matching is not supported */ 193 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN 194 .tci = 0xffef, 195 #else 196 .tci = 0xefff, 197 #endif 198 }, 199 .mask_sz = sizeof(struct rte_flow_item_vlan), 200 .default_mask = &rte_flow_item_vlan_mask, 201 .convert = tap_flow_create_vlan, 202 }, 203 [RTE_FLOW_ITEM_TYPE_IPV4] = { 204 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP, 205 RTE_FLOW_ITEM_TYPE_TCP), 206 .mask = &(const struct rte_flow_item_ipv4){ 207 .hdr = { 208 .src_addr = -1, 209 .dst_addr = -1, 210 .next_proto_id = -1, 211 }, 212 }, 213 .mask_sz = sizeof(struct rte_flow_item_ipv4), 214 .default_mask = &rte_flow_item_ipv4_mask, 215 .convert = tap_flow_create_ipv4, 216 }, 217 [RTE_FLOW_ITEM_TYPE_IPV6] = { 218 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP, 219 RTE_FLOW_ITEM_TYPE_TCP), 220 .mask = &(const struct rte_flow_item_ipv6){ 221 .hdr = { 222 .src_addr = { 223 "\xff\xff\xff\xff\xff\xff\xff\xff" 224 "\xff\xff\xff\xff\xff\xff\xff\xff", 225 }, 226 .dst_addr = { 227 "\xff\xff\xff\xff\xff\xff\xff\xff" 228 "\xff\xff\xff\xff\xff\xff\xff\xff", 229 }, 230 .proto = -1, 231 }, 232 }, 233 .mask_sz = sizeof(struct rte_flow_item_ipv6), 234 .default_mask = &rte_flow_item_ipv6_mask, 235 .convert = tap_flow_create_ipv6, 236 }, 237 [RTE_FLOW_ITEM_TYPE_UDP] = { 238 .mask = &(const struct rte_flow_item_udp){ 239 .hdr = { 240 .src_port = -1, 241 .dst_port = -1, 242 }, 243 }, 244 .mask_sz = sizeof(struct rte_flow_item_udp), 245 .default_mask = &rte_flow_item_udp_mask, 246 .convert = tap_flow_create_udp, 247 }, 248 [RTE_FLOW_ITEM_TYPE_TCP] = { 249 .mask = &(const struct rte_flow_item_tcp){ 250 .hdr = { 251 .src_port = -1, 252 .dst_port = -1, 253 }, 254 }, 255 .mask_sz = sizeof(struct rte_flow_item_tcp), 256 .default_mask = &rte_flow_item_tcp_mask, 257 .convert = tap_flow_create_tcp, 258 }, 259 }; 260 261 static struct remote_rule implicit_rte_flows[TAP_REMOTE_MAX_IDX] = { 262 [TAP_REMOTE_LOCAL_MAC] = { 263 .attr = { 264 .group = MAX_GROUP, 265 .priority = PRIORITY_MASK - TAP_REMOTE_LOCAL_MAC, 266 .ingress = 1, 267 }, 268 .items[0] = { 269 .type = RTE_FLOW_ITEM_TYPE_ETH, 270 .mask = &(const struct rte_flow_item_eth){ 271 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 272 }, 273 }, 274 .items[1] = { 275 .type = RTE_FLOW_ITEM_TYPE_END, 276 }, 277 .mirred = TCA_EGRESS_REDIR, 278 }, 279 [TAP_REMOTE_BROADCAST] = { 280 .attr = { 281 .group = MAX_GROUP, 282 .priority = PRIORITY_MASK - TAP_REMOTE_BROADCAST, 283 .ingress = 1, 284 }, 285 .items[0] = { 286 .type = RTE_FLOW_ITEM_TYPE_ETH, 287 .mask = &(const struct rte_flow_item_eth){ 288 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 289 }, 290 .spec = &(const struct rte_flow_item_eth){ 291 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 292 }, 293 }, 294 .items[1] = { 295 .type = RTE_FLOW_ITEM_TYPE_END, 296 }, 297 .mirred = TCA_EGRESS_MIRROR, 298 }, 299 [TAP_REMOTE_BROADCASTV6] = { 300 .attr = { 301 .group = MAX_GROUP, 302 .priority = PRIORITY_MASK - TAP_REMOTE_BROADCASTV6, 303 .ingress = 1, 304 }, 305 .items[0] = { 306 .type = RTE_FLOW_ITEM_TYPE_ETH, 307 .mask = &(const struct rte_flow_item_eth){ 308 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00", 309 }, 310 .spec = &(const struct rte_flow_item_eth){ 311 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00", 312 }, 313 }, 314 .items[1] = { 315 .type = RTE_FLOW_ITEM_TYPE_END, 316 }, 317 .mirred = TCA_EGRESS_MIRROR, 318 }, 319 [TAP_REMOTE_PROMISC] = { 320 .attr = { 321 .group = MAX_GROUP, 322 .priority = PRIORITY_MASK - TAP_REMOTE_PROMISC, 323 .ingress = 1, 324 }, 325 .items[0] = { 326 .type = RTE_FLOW_ITEM_TYPE_VOID, 327 }, 328 .items[1] = { 329 .type = RTE_FLOW_ITEM_TYPE_END, 330 }, 331 .mirred = TCA_EGRESS_MIRROR, 332 }, 333 [TAP_REMOTE_ALLMULTI] = { 334 .attr = { 335 .group = MAX_GROUP, 336 .priority = PRIORITY_MASK - TAP_REMOTE_ALLMULTI, 337 .ingress = 1, 338 }, 339 .items[0] = { 340 .type = RTE_FLOW_ITEM_TYPE_ETH, 341 .mask = &(const struct rte_flow_item_eth){ 342 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00", 343 }, 344 .spec = &(const struct rte_flow_item_eth){ 345 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00", 346 }, 347 }, 348 .items[1] = { 349 .type = RTE_FLOW_ITEM_TYPE_END, 350 }, 351 .mirred = TCA_EGRESS_MIRROR, 352 }, 353 [TAP_REMOTE_TX] = { 354 .attr = { 355 .group = 0, 356 .priority = TAP_REMOTE_TX, 357 .egress = 1, 358 }, 359 .items[0] = { 360 .type = RTE_FLOW_ITEM_TYPE_VOID, 361 }, 362 .items[1] = { 363 .type = RTE_FLOW_ITEM_TYPE_END, 364 }, 365 .mirred = TCA_EGRESS_MIRROR, 366 }, 367 }; 368 369 /** 370 * Make as much checks as possible on an Ethernet item, and if a flow is 371 * provided, fill it appropriately with Ethernet info. 372 * 373 * @param[in] item 374 * Item specification. 375 * @param[in, out] data 376 * Additional data structure to tell next layers we've been here. 377 * 378 * @return 379 * 0 if checks are alright, -1 otherwise. 380 */ 381 static int 382 tap_flow_create_eth(const struct rte_flow_item *item, void *data) 383 { 384 struct convert_data *info = (struct convert_data *)data; 385 const struct rte_flow_item_eth *spec = item->spec; 386 const struct rte_flow_item_eth *mask = item->mask; 387 struct rte_flow *flow = info->flow; 388 struct nlmsg *msg; 389 390 /* use default mask if none provided */ 391 if (!mask) 392 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_ETH].default_mask; 393 /* TC does not support eth_type masking. Only accept if exact match. */ 394 if (mask->type && mask->type != 0xffff) 395 return -1; 396 if (!spec) 397 return 0; 398 /* store eth_type for consistency if ipv4/6 pattern item comes next */ 399 if (spec->type & mask->type) 400 info->eth_type = spec->type; 401 if (!flow) 402 return 0; 403 msg = &flow->msg; 404 if (!is_zero_ether_addr(&spec->dst)) { 405 nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_DST, ETHER_ADDR_LEN, 406 &spec->dst.addr_bytes); 407 nlattr_add(&msg->nh, 408 TCA_FLOWER_KEY_ETH_DST_MASK, ETHER_ADDR_LEN, 409 &mask->dst.addr_bytes); 410 } 411 if (!is_zero_ether_addr(&mask->src)) { 412 nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_SRC, ETHER_ADDR_LEN, 413 &spec->src.addr_bytes); 414 nlattr_add(&msg->nh, 415 TCA_FLOWER_KEY_ETH_SRC_MASK, ETHER_ADDR_LEN, 416 &mask->src.addr_bytes); 417 } 418 return 0; 419 } 420 421 /** 422 * Make as much checks as possible on a VLAN item, and if a flow is provided, 423 * fill it appropriately with VLAN info. 424 * 425 * @param[in] item 426 * Item specification. 427 * @param[in, out] data 428 * Additional data structure to tell next layers we've been here. 429 * 430 * @return 431 * 0 if checks are alright, -1 otherwise. 432 */ 433 static int 434 tap_flow_create_vlan(const struct rte_flow_item *item, void *data) 435 { 436 struct convert_data *info = (struct convert_data *)data; 437 const struct rte_flow_item_vlan *spec = item->spec; 438 const struct rte_flow_item_vlan *mask = item->mask; 439 struct rte_flow *flow = info->flow; 440 struct nlmsg *msg; 441 442 /* use default mask if none provided */ 443 if (!mask) 444 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_VLAN].default_mask; 445 /* TC does not support tpid masking. Only accept if exact match. */ 446 if (mask->tpid && mask->tpid != 0xffff) 447 return -1; 448 /* Double-tagging not supported. */ 449 if (spec && mask->tpid && spec->tpid != htons(ETH_P_8021Q)) 450 return -1; 451 info->vlan = 1; 452 if (!flow) 453 return 0; 454 msg = &flow->msg; 455 msg->t.tcm_info = TC_H_MAKE(msg->t.tcm_info, htons(ETH_P_8021Q)); 456 #define VLAN_PRIO(tci) ((tci) >> 13) 457 #define VLAN_ID(tci) ((tci) & 0xfff) 458 if (!spec) 459 return 0; 460 if (spec->tci) { 461 uint16_t tci = ntohs(spec->tci) & mask->tci; 462 uint16_t prio = VLAN_PRIO(tci); 463 uint8_t vid = VLAN_ID(tci); 464 465 if (prio) 466 nlattr_add8(&msg->nh, TCA_FLOWER_KEY_VLAN_PRIO, prio); 467 if (vid) 468 nlattr_add16(&msg->nh, TCA_FLOWER_KEY_VLAN_ID, vid); 469 } 470 return 0; 471 } 472 473 /** 474 * Make as much checks as possible on an IPv4 item, and if a flow is provided, 475 * fill it appropriately with IPv4 info. 476 * 477 * @param[in] item 478 * Item specification. 479 * @param[in, out] data 480 * Additional data structure to tell next layers we've been here. 481 * 482 * @return 483 * 0 if checks are alright, -1 otherwise. 484 */ 485 static int 486 tap_flow_create_ipv4(const struct rte_flow_item *item, void *data) 487 { 488 struct convert_data *info = (struct convert_data *)data; 489 const struct rte_flow_item_ipv4 *spec = item->spec; 490 const struct rte_flow_item_ipv4 *mask = item->mask; 491 struct rte_flow *flow = info->flow; 492 struct nlmsg *msg; 493 494 /* use default mask if none provided */ 495 if (!mask) 496 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_IPV4].default_mask; 497 /* check that previous eth type is compatible with ipv4 */ 498 if (info->eth_type && info->eth_type != htons(ETH_P_IP)) 499 return -1; 500 /* store ip_proto for consistency if udp/tcp pattern item comes next */ 501 if (spec) 502 info->ip_proto = spec->hdr.next_proto_id; 503 if (!flow) 504 return 0; 505 msg = &flow->msg; 506 if (!info->eth_type) 507 info->eth_type = htons(ETH_P_IP); 508 if (!spec) 509 return 0; 510 if (spec->hdr.dst_addr) { 511 nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST, 512 spec->hdr.dst_addr); 513 nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST_MASK, 514 mask->hdr.dst_addr); 515 } 516 if (spec->hdr.src_addr) { 517 nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC, 518 spec->hdr.src_addr); 519 nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC_MASK, 520 mask->hdr.src_addr); 521 } 522 if (spec->hdr.next_proto_id) 523 nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, 524 spec->hdr.next_proto_id); 525 return 0; 526 } 527 528 /** 529 * Make as much checks as possible on an IPv6 item, and if a flow is provided, 530 * fill it appropriately with IPv6 info. 531 * 532 * @param[in] item 533 * Item specification. 534 * @param[in, out] data 535 * Additional data structure to tell next layers we've been here. 536 * 537 * @return 538 * 0 if checks are alright, -1 otherwise. 539 */ 540 static int 541 tap_flow_create_ipv6(const struct rte_flow_item *item, void *data) 542 { 543 struct convert_data *info = (struct convert_data *)data; 544 const struct rte_flow_item_ipv6 *spec = item->spec; 545 const struct rte_flow_item_ipv6 *mask = item->mask; 546 struct rte_flow *flow = info->flow; 547 uint8_t empty_addr[16] = { 0 }; 548 struct nlmsg *msg; 549 550 /* use default mask if none provided */ 551 if (!mask) 552 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_IPV6].default_mask; 553 /* check that previous eth type is compatible with ipv6 */ 554 if (info->eth_type && info->eth_type != htons(ETH_P_IPV6)) 555 return -1; 556 /* store ip_proto for consistency if udp/tcp pattern item comes next */ 557 if (spec) 558 info->ip_proto = spec->hdr.proto; 559 if (!flow) 560 return 0; 561 msg = &flow->msg; 562 if (!info->eth_type) 563 info->eth_type = htons(ETH_P_IPV6); 564 if (!spec) 565 return 0; 566 if (memcmp(spec->hdr.dst_addr, empty_addr, 16)) { 567 nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST, 568 sizeof(spec->hdr.dst_addr), &spec->hdr.dst_addr); 569 nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST_MASK, 570 sizeof(mask->hdr.dst_addr), &mask->hdr.dst_addr); 571 } 572 if (memcmp(spec->hdr.src_addr, empty_addr, 16)) { 573 nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC, 574 sizeof(spec->hdr.src_addr), &spec->hdr.src_addr); 575 nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC_MASK, 576 sizeof(mask->hdr.src_addr), &mask->hdr.src_addr); 577 } 578 if (spec->hdr.proto) 579 nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, spec->hdr.proto); 580 return 0; 581 } 582 583 /** 584 * Make as much checks as possible on a UDP item, and if a flow is provided, 585 * fill it appropriately with UDP info. 586 * 587 * @param[in] item 588 * Item specification. 589 * @param[in, out] data 590 * Additional data structure to tell next layers we've been here. 591 * 592 * @return 593 * 0 if checks are alright, -1 otherwise. 594 */ 595 static int 596 tap_flow_create_udp(const struct rte_flow_item *item, void *data) 597 { 598 struct convert_data *info = (struct convert_data *)data; 599 const struct rte_flow_item_udp *spec = item->spec; 600 const struct rte_flow_item_udp *mask = item->mask; 601 struct rte_flow *flow = info->flow; 602 struct nlmsg *msg; 603 604 /* use default mask if none provided */ 605 if (!mask) 606 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_UDP].default_mask; 607 /* check that previous ip_proto is compatible with udp */ 608 if (info->ip_proto && info->ip_proto != IPPROTO_UDP) 609 return -1; 610 /* TC does not support UDP port masking. Only accept if exact match. */ 611 if ((mask->hdr.src_port && mask->hdr.src_port != 0xffff) || 612 (mask->hdr.dst_port && mask->hdr.dst_port != 0xffff)) 613 return -1; 614 if (!flow) 615 return 0; 616 msg = &flow->msg; 617 nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_UDP); 618 if (!spec) 619 return 0; 620 if (spec->hdr.dst_port & mask->hdr.dst_port) 621 nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_DST, 622 spec->hdr.dst_port); 623 if (spec->hdr.src_port & mask->hdr.src_port) 624 nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_SRC, 625 spec->hdr.src_port); 626 return 0; 627 } 628 629 /** 630 * Make as much checks as possible on a TCP item, and if a flow is provided, 631 * fill it appropriately with TCP info. 632 * 633 * @param[in] item 634 * Item specification. 635 * @param[in, out] data 636 * Additional data structure to tell next layers we've been here. 637 * 638 * @return 639 * 0 if checks are alright, -1 otherwise. 640 */ 641 static int 642 tap_flow_create_tcp(const struct rte_flow_item *item, void *data) 643 { 644 struct convert_data *info = (struct convert_data *)data; 645 const struct rte_flow_item_tcp *spec = item->spec; 646 const struct rte_flow_item_tcp *mask = item->mask; 647 struct rte_flow *flow = info->flow; 648 struct nlmsg *msg; 649 650 /* use default mask if none provided */ 651 if (!mask) 652 mask = tap_flow_items[RTE_FLOW_ITEM_TYPE_TCP].default_mask; 653 /* check that previous ip_proto is compatible with tcp */ 654 if (info->ip_proto && info->ip_proto != IPPROTO_TCP) 655 return -1; 656 /* TC does not support TCP port masking. Only accept if exact match. */ 657 if ((mask->hdr.src_port && mask->hdr.src_port != 0xffff) || 658 (mask->hdr.dst_port && mask->hdr.dst_port != 0xffff)) 659 return -1; 660 if (!flow) 661 return 0; 662 msg = &flow->msg; 663 nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_TCP); 664 if (!spec) 665 return 0; 666 if (spec->hdr.dst_port & mask->hdr.dst_port) 667 nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_DST, 668 spec->hdr.dst_port); 669 if (spec->hdr.src_port & mask->hdr.src_port) 670 nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_SRC, 671 spec->hdr.src_port); 672 return 0; 673 } 674 675 /** 676 * Check support for a given item. 677 * 678 * @param[in] item 679 * Item specification. 680 * @param size 681 * Bit-Mask size in bytes. 682 * @param[in] supported_mask 683 * Bit-mask covering supported fields to compare with spec, last and mask in 684 * \item. 685 * @param[in] default_mask 686 * Bit-mask default mask if none is provided in \item. 687 * 688 * @return 689 * 0 on success. 690 */ 691 static int 692 tap_flow_item_validate(const struct rte_flow_item *item, 693 unsigned int size, 694 const uint8_t *supported_mask, 695 const uint8_t *default_mask) 696 { 697 int ret = 0; 698 699 /* An empty layer is allowed, as long as all fields are NULL */ 700 if (!item->spec && (item->mask || item->last)) 701 return -1; 702 /* Is the item spec compatible with what the NIC supports? */ 703 if (item->spec && !item->mask) { 704 unsigned int i; 705 const uint8_t *spec = item->spec; 706 707 for (i = 0; i < size; ++i) 708 if ((spec[i] | supported_mask[i]) != supported_mask[i]) 709 return -1; 710 /* Is the default mask compatible with what the NIC supports? */ 711 for (i = 0; i < size; i++) 712 if ((default_mask[i] | supported_mask[i]) != 713 supported_mask[i]) 714 return -1; 715 } 716 /* Is the item last compatible with what the NIC supports? */ 717 if (item->last && !item->mask) { 718 unsigned int i; 719 const uint8_t *spec = item->last; 720 721 for (i = 0; i < size; ++i) 722 if ((spec[i] | supported_mask[i]) != supported_mask[i]) 723 return -1; 724 } 725 /* Is the item mask compatible with what the NIC supports? */ 726 if (item->mask) { 727 unsigned int i; 728 const uint8_t *spec = item->mask; 729 730 for (i = 0; i < size; ++i) 731 if ((spec[i] | supported_mask[i]) != supported_mask[i]) 732 return -1; 733 } 734 /** 735 * Once masked, Are item spec and item last equal? 736 * TC does not support range so anything else is invalid. 737 */ 738 if (item->spec && item->last) { 739 uint8_t spec[size]; 740 uint8_t last[size]; 741 const uint8_t *apply = default_mask; 742 unsigned int i; 743 744 if (item->mask) 745 apply = item->mask; 746 for (i = 0; i < size; ++i) { 747 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i]; 748 last[i] = ((const uint8_t *)item->last)[i] & apply[i]; 749 } 750 ret = memcmp(spec, last, size); 751 } 752 return ret; 753 } 754 755 /** 756 * Transform a DROP/PASSTHRU action item in the provided flow for TC. 757 * 758 * @param[in, out] flow 759 * Flow to be filled. 760 * @param[in] action 761 * Appropriate action to be set in the TCA_GACT_PARMS structure. 762 * 763 * @return 764 * 0 if checks are alright, -1 otherwise. 765 */ 766 static int 767 add_action_gact(struct rte_flow *flow, int action) 768 { 769 struct nlmsg *msg = &flow->msg; 770 size_t act_index = 1; 771 struct tc_gact p = { 772 .action = action 773 }; 774 775 if (nlattr_nested_start(msg, TCA_FLOWER_ACT) < 0) 776 return -1; 777 if (nlattr_nested_start(msg, act_index++) < 0) 778 return -1; 779 nlattr_add(&msg->nh, TCA_ACT_KIND, sizeof("gact"), "gact"); 780 if (nlattr_nested_start(msg, TCA_ACT_OPTIONS) < 0) 781 return -1; 782 nlattr_add(&msg->nh, TCA_GACT_PARMS, sizeof(p), &p); 783 nlattr_nested_finish(msg); /* nested TCA_ACT_OPTIONS */ 784 nlattr_nested_finish(msg); /* nested act_index */ 785 nlattr_nested_finish(msg); /* nested TCA_FLOWER_ACT */ 786 return 0; 787 } 788 789 /** 790 * Transform a MIRRED action item in the provided flow for TC. 791 * 792 * @param[in, out] flow 793 * Flow to be filled. 794 * @param[in] ifindex 795 * Netdevice ifindex, where to mirror/redirect packet to. 796 * @param[in] action_type 797 * Either TCA_EGRESS_REDIR for redirection or TCA_EGRESS_MIRROR for mirroring. 798 * 799 * @return 800 * 0 if checks are alright, -1 otherwise. 801 */ 802 static int 803 add_action_mirred(struct rte_flow *flow, uint16_t ifindex, uint16_t action_type) 804 { 805 struct nlmsg *msg = &flow->msg; 806 size_t act_index = 1; 807 struct tc_mirred p = { 808 .eaction = action_type, 809 .ifindex = ifindex, 810 }; 811 812 if (nlattr_nested_start(msg, TCA_FLOWER_ACT) < 0) 813 return -1; 814 if (nlattr_nested_start(msg, act_index++) < 0) 815 return -1; 816 nlattr_add(&msg->nh, TCA_ACT_KIND, sizeof("mirred"), "mirred"); 817 if (nlattr_nested_start(msg, TCA_ACT_OPTIONS) < 0) 818 return -1; 819 if (action_type == TCA_EGRESS_MIRROR) 820 p.action = TC_ACT_PIPE; 821 else /* REDIRECT */ 822 p.action = TC_ACT_STOLEN; 823 nlattr_add(&msg->nh, TCA_MIRRED_PARMS, sizeof(p), &p); 824 nlattr_nested_finish(msg); /* nested TCA_ACT_OPTIONS */ 825 nlattr_nested_finish(msg); /* nested act_index */ 826 nlattr_nested_finish(msg); /* nested TCA_FLOWER_ACT */ 827 return 0; 828 } 829 830 /** 831 * Transform a QUEUE action item in the provided flow for TC. 832 * 833 * @param[in, out] flow 834 * Flow to be filled. 835 * @param[in] queue 836 * Queue id to use. 837 * 838 * @return 839 * 0 if checks are alright, -1 otherwise. 840 */ 841 static int 842 add_action_skbedit(struct rte_flow *flow, uint16_t queue) 843 { 844 struct nlmsg *msg = &flow->msg; 845 size_t act_index = 1; 846 struct tc_skbedit p = { 847 .action = TC_ACT_PIPE 848 }; 849 850 if (nlattr_nested_start(msg, TCA_FLOWER_ACT) < 0) 851 return -1; 852 if (nlattr_nested_start(msg, act_index++) < 0) 853 return -1; 854 nlattr_add(&msg->nh, TCA_ACT_KIND, sizeof("skbedit"), "skbedit"); 855 if (nlattr_nested_start(msg, TCA_ACT_OPTIONS) < 0) 856 return -1; 857 nlattr_add(&msg->nh, TCA_SKBEDIT_PARMS, sizeof(p), &p); 858 nlattr_add16(&msg->nh, TCA_SKBEDIT_QUEUE_MAPPING, queue); 859 nlattr_nested_finish(msg); /* nested TCA_ACT_OPTIONS */ 860 nlattr_nested_finish(msg); /* nested act_index */ 861 nlattr_nested_finish(msg); /* nested TCA_FLOWER_ACT */ 862 return 0; 863 } 864 865 /** 866 * Validate a flow supported by TC. 867 * If flow param is not NULL, then also fill the netlink message inside. 868 * 869 * @param pmd 870 * Pointer to private structure. 871 * @param[in] attr 872 * Flow rule attributes. 873 * @param[in] pattern 874 * Pattern specification (list terminated by the END pattern item). 875 * @param[in] actions 876 * Associated actions (list terminated by the END action). 877 * @param[out] error 878 * Perform verbose error reporting if not NULL. 879 * @param[in, out] flow 880 * Flow structure to update. 881 * @param[in] mirred 882 * If set to TCA_EGRESS_REDIR, provided actions will be replaced with a 883 * redirection to the tap netdevice, and the TC rule will be configured 884 * on the remote netdevice in pmd. 885 * If set to TCA_EGRESS_MIRROR, provided actions will be replaced with a 886 * mirroring to the tap netdevice, and the TC rule will be configured 887 * on the remote netdevice in pmd. Matching packets will thus be duplicated. 888 * If set to 0, the standard behavior is to be used: set correct actions for 889 * the TC rule, and apply it on the tap netdevice. 890 * 891 * @return 892 * 0 on success, a negative errno value otherwise and rte_errno is set. 893 */ 894 static int 895 priv_flow_process(struct pmd_internals *pmd, 896 const struct rte_flow_attr *attr, 897 const struct rte_flow_item items[], 898 const struct rte_flow_action actions[], 899 struct rte_flow_error *error, 900 struct rte_flow *flow, 901 int mirred) 902 { 903 const struct tap_flow_items *cur_item = tap_flow_items; 904 struct convert_data data = { 905 .eth_type = 0, 906 .ip_proto = 0, 907 .flow = flow, 908 }; 909 int action = 0; /* Only one action authorized for now */ 910 911 if (attr->group > MAX_GROUP) { 912 rte_flow_error_set( 913 error, EINVAL, RTE_FLOW_ERROR_TYPE_ATTR_GROUP, 914 NULL, "group value too big: cannot exceed 15"); 915 return -rte_errno; 916 } 917 if (attr->priority > MAX_PRIORITY) { 918 rte_flow_error_set( 919 error, EINVAL, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 920 NULL, "priority value too big"); 921 return -rte_errno; 922 } else if (flow) { 923 uint16_t group = attr->group << GROUP_SHIFT; 924 uint16_t prio = group | (attr->priority + PRIORITY_OFFSET); 925 flow->msg.t.tcm_info = TC_H_MAKE(prio << 16, 926 flow->msg.t.tcm_info); 927 } 928 if (flow) { 929 if (mirred) { 930 /* 931 * If attr->ingress, the rule applies on remote ingress 932 * to match incoming packets 933 * If attr->egress, the rule applies on tap ingress (as 934 * seen from the kernel) to deal with packets going out 935 * from the DPDK app. 936 */ 937 flow->msg.t.tcm_parent = TC_H_MAKE(TC_H_INGRESS, 0); 938 } else { 939 /* Standard rule on tap egress (kernel standpoint). */ 940 flow->msg.t.tcm_parent = 941 TC_H_MAKE(MULTIQ_MAJOR_HANDLE, 0); 942 } 943 /* use flower filter type */ 944 nlattr_add(&flow->msg.nh, TCA_KIND, sizeof("flower"), "flower"); 945 if (nlattr_nested_start(&flow->msg, TCA_OPTIONS) < 0) 946 goto exit_item_not_supported; 947 } 948 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) { 949 const struct tap_flow_items *token = NULL; 950 unsigned int i; 951 int err = 0; 952 953 if (items->type == RTE_FLOW_ITEM_TYPE_VOID) 954 continue; 955 for (i = 0; 956 cur_item->items && 957 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END; 958 ++i) { 959 if (cur_item->items[i] == items->type) { 960 token = &tap_flow_items[items->type]; 961 break; 962 } 963 } 964 if (!token) 965 goto exit_item_not_supported; 966 cur_item = token; 967 err = tap_flow_item_validate( 968 items, cur_item->mask_sz, 969 (const uint8_t *)cur_item->mask, 970 (const uint8_t *)cur_item->default_mask); 971 if (err) 972 goto exit_item_not_supported; 973 if (flow && cur_item->convert) { 974 if (!pmd->flower_vlan_support && 975 cur_item->convert == tap_flow_create_vlan) 976 goto exit_item_not_supported; 977 err = cur_item->convert(items, &data); 978 if (err) 979 goto exit_item_not_supported; 980 } 981 } 982 if (flow) { 983 if (pmd->flower_vlan_support && data.vlan) { 984 nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE, 985 htons(ETH_P_8021Q)); 986 nlattr_add16(&flow->msg.nh, 987 TCA_FLOWER_KEY_VLAN_ETH_TYPE, 988 data.eth_type ? 989 data.eth_type : htons(ETH_P_ALL)); 990 } else if (data.eth_type) { 991 nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE, 992 data.eth_type); 993 } 994 } 995 if (mirred && flow) { 996 uint16_t if_index = pmd->if_index; 997 998 /* 999 * If attr->egress && mirred, then this is a special 1000 * case where the rule must be applied on the tap, to 1001 * redirect packets coming from the DPDK App, out 1002 * through the remote netdevice. 1003 */ 1004 if (attr->egress) 1005 if_index = pmd->remote_if_index; 1006 if (add_action_mirred(flow, if_index, mirred) < 0) 1007 goto exit_action_not_supported; 1008 else 1009 goto end; 1010 } 1011 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) { 1012 int err = 0; 1013 1014 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) { 1015 continue; 1016 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) { 1017 if (action) 1018 goto exit_action_not_supported; 1019 action = 1; 1020 if (flow) 1021 err = add_action_gact(flow, TC_ACT_SHOT); 1022 } else if (actions->type == RTE_FLOW_ACTION_TYPE_PASSTHRU) { 1023 if (action) 1024 goto exit_action_not_supported; 1025 action = 1; 1026 if (flow) 1027 err = add_action_gact(flow, TC_ACT_UNSPEC); 1028 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) { 1029 const struct rte_flow_action_queue *queue = 1030 (const struct rte_flow_action_queue *) 1031 actions->conf; 1032 if (action) 1033 goto exit_action_not_supported; 1034 action = 1; 1035 if (!queue || (queue->index >= pmd->nb_queues)) 1036 goto exit_action_not_supported; 1037 if (flow) 1038 err = add_action_skbedit(flow, queue->index); 1039 } else { 1040 goto exit_action_not_supported; 1041 } 1042 if (err) 1043 goto exit_action_not_supported; 1044 } 1045 end: 1046 if (flow) 1047 nlattr_nested_finish(&flow->msg); /* nested TCA_OPTIONS */ 1048 return 0; 1049 exit_item_not_supported: 1050 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, 1051 items, "item not supported"); 1052 return -rte_errno; 1053 exit_action_not_supported: 1054 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, 1055 actions, "action not supported"); 1056 return -rte_errno; 1057 } 1058 1059 1060 1061 /** 1062 * Validate a flow. 1063 * 1064 * @see rte_flow_validate() 1065 * @see rte_flow_ops 1066 */ 1067 static int 1068 tap_flow_validate(struct rte_eth_dev *dev, 1069 const struct rte_flow_attr *attr, 1070 const struct rte_flow_item items[], 1071 const struct rte_flow_action actions[], 1072 struct rte_flow_error *error) 1073 { 1074 struct pmd_internals *pmd = dev->data->dev_private; 1075 1076 return priv_flow_process(pmd, attr, items, actions, error, NULL, 0); 1077 } 1078 1079 /** 1080 * Set a unique handle in a flow. 1081 * 1082 * The kernel supports TC rules with equal priority, as long as they use the 1083 * same matching fields (e.g.: dst mac and ipv4) with different values (and 1084 * full mask to ensure no collision is possible). 1085 * In those rules, the handle (uint32_t) is the part that would identify 1086 * specifically each rule. 1087 * 1088 * On 32-bit architectures, the handle can simply be the flow's pointer address. 1089 * On 64-bit architectures, we rely on jhash(flow) to find a (sufficiently) 1090 * unique handle. 1091 * 1092 * @param[in, out] flow 1093 * The flow that needs its handle set. 1094 */ 1095 static void 1096 tap_flow_set_handle(struct rte_flow *flow) 1097 { 1098 uint32_t handle = 0; 1099 1100 if (sizeof(flow) > 4) 1101 handle = rte_jhash(&flow, sizeof(flow), 1); 1102 else 1103 handle = (uintptr_t)flow; 1104 /* must be at least 1 to avoid letting the kernel choose one for us */ 1105 if (!handle) 1106 handle = 1; 1107 flow->msg.t.tcm_handle = handle; 1108 } 1109 1110 /** 1111 * Create a flow. 1112 * 1113 * @see rte_flow_create() 1114 * @see rte_flow_ops 1115 */ 1116 static struct rte_flow * 1117 tap_flow_create(struct rte_eth_dev *dev, 1118 const struct rte_flow_attr *attr, 1119 const struct rte_flow_item items[], 1120 const struct rte_flow_action actions[], 1121 struct rte_flow_error *error) 1122 { 1123 struct pmd_internals *pmd = dev->data->dev_private; 1124 struct rte_flow *remote_flow = NULL; 1125 struct rte_flow *flow = NULL; 1126 struct nlmsg *msg = NULL; 1127 int err; 1128 1129 if (!pmd->if_index) { 1130 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, 1131 NULL, 1132 "can't create rule, ifindex not found"); 1133 goto fail; 1134 } 1135 /* 1136 * No rules configured through standard rte_flow should be set on the 1137 * priorities used by implicit rules. 1138 */ 1139 if ((attr->group == MAX_GROUP) && 1140 attr->priority > (MAX_PRIORITY - TAP_REMOTE_MAX_IDX)) { 1141 rte_flow_error_set( 1142 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, 1143 NULL, "priority value too big"); 1144 goto fail; 1145 } 1146 flow = rte_malloc(__func__, sizeof(struct rte_flow), 0); 1147 if (!flow) { 1148 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 1149 NULL, "cannot allocate memory for rte_flow"); 1150 goto fail; 1151 } 1152 msg = &flow->msg; 1153 tc_init_msg(msg, pmd->if_index, RTM_NEWTFILTER, 1154 NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE); 1155 msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL)); 1156 tap_flow_set_handle(flow); 1157 if (priv_flow_process(pmd, attr, items, actions, error, flow, 0)) 1158 goto fail; 1159 err = nl_send(pmd->nlsk_fd, &msg->nh); 1160 if (err < 0) { 1161 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, 1162 NULL, "couldn't send request to kernel"); 1163 goto fail; 1164 } 1165 err = nl_recv_ack(pmd->nlsk_fd); 1166 if (err < 0) { 1167 RTE_LOG(ERR, PMD, 1168 "Kernel refused TC filter rule creation (%d): %s\n", 1169 errno, strerror(errno)); 1170 rte_flow_error_set(error, EEXIST, RTE_FLOW_ERROR_TYPE_HANDLE, 1171 NULL, "overlapping rules"); 1172 goto fail; 1173 } 1174 LIST_INSERT_HEAD(&pmd->flows, flow, next); 1175 /** 1176 * If a remote device is configured, a TC rule with identical items for 1177 * matching must be set on that device, with a single action: redirect 1178 * to the local pmd->if_index. 1179 */ 1180 if (pmd->remote_if_index) { 1181 remote_flow = rte_malloc(__func__, sizeof(struct rte_flow), 0); 1182 if (!remote_flow) { 1183 rte_flow_error_set( 1184 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, 1185 "cannot allocate memory for rte_flow"); 1186 goto fail; 1187 } 1188 msg = &remote_flow->msg; 1189 /* set the rule if_index for the remote netdevice */ 1190 tc_init_msg( 1191 msg, pmd->remote_if_index, RTM_NEWTFILTER, 1192 NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE); 1193 msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL)); 1194 tap_flow_set_handle(remote_flow); 1195 if (priv_flow_process(pmd, attr, items, NULL, 1196 error, remote_flow, TCA_EGRESS_REDIR)) { 1197 rte_flow_error_set( 1198 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 1199 NULL, "rte flow rule validation failed"); 1200 goto fail; 1201 } 1202 err = nl_send(pmd->nlsk_fd, &msg->nh); 1203 if (err < 0) { 1204 rte_flow_error_set( 1205 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 1206 NULL, "Failure sending nl request"); 1207 goto fail; 1208 } 1209 err = nl_recv_ack(pmd->nlsk_fd); 1210 if (err < 0) { 1211 RTE_LOG(ERR, PMD, 1212 "Kernel refused TC filter rule creation (%d): %s\n", 1213 errno, strerror(errno)); 1214 rte_flow_error_set( 1215 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 1216 NULL, "overlapping rules"); 1217 goto fail; 1218 } 1219 flow->remote_flow = remote_flow; 1220 } 1221 return flow; 1222 fail: 1223 if (remote_flow) 1224 rte_free(remote_flow); 1225 if (flow) 1226 rte_free(flow); 1227 return NULL; 1228 } 1229 1230 /** 1231 * Destroy a flow using pointer to pmd_internal. 1232 * 1233 * @param[in, out] pmd 1234 * Pointer to private structure. 1235 * @param[in] flow 1236 * Pointer to the flow to destroy. 1237 * @param[in, out] error 1238 * Pointer to the flow error handler 1239 * 1240 * @return 0 if the flow could be destroyed, -1 otherwise. 1241 */ 1242 static int 1243 tap_flow_destroy_pmd(struct pmd_internals *pmd, 1244 struct rte_flow *flow, 1245 struct rte_flow_error *error) 1246 { 1247 struct rte_flow *remote_flow = flow->remote_flow; 1248 int ret = 0; 1249 1250 LIST_REMOVE(flow, next); 1251 flow->msg.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1252 flow->msg.nh.nlmsg_type = RTM_DELTFILTER; 1253 1254 ret = nl_send(pmd->nlsk_fd, &flow->msg.nh); 1255 if (ret < 0) { 1256 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, 1257 NULL, "couldn't send request to kernel"); 1258 goto end; 1259 } 1260 ret = nl_recv_ack(pmd->nlsk_fd); 1261 /* If errno is ENOENT, the rule is already no longer in the kernel. */ 1262 if (ret < 0 && errno == ENOENT) 1263 ret = 0; 1264 if (ret < 0) { 1265 RTE_LOG(ERR, PMD, 1266 "Kernel refused TC filter rule deletion (%d): %s\n", 1267 errno, strerror(errno)); 1268 rte_flow_error_set( 1269 error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, 1270 "couldn't receive kernel ack to our request"); 1271 goto end; 1272 } 1273 if (remote_flow) { 1274 remote_flow->msg.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; 1275 remote_flow->msg.nh.nlmsg_type = RTM_DELTFILTER; 1276 1277 ret = nl_send(pmd->nlsk_fd, &remote_flow->msg.nh); 1278 if (ret < 0) { 1279 rte_flow_error_set( 1280 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 1281 NULL, "Failure sending nl request"); 1282 goto end; 1283 } 1284 ret = nl_recv_ack(pmd->nlsk_fd); 1285 if (ret < 0 && errno == ENOENT) 1286 ret = 0; 1287 if (ret < 0) { 1288 RTE_LOG(ERR, PMD, 1289 "Kernel refused TC filter rule deletion (%d): %s\n", 1290 errno, strerror(errno)); 1291 rte_flow_error_set( 1292 error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, 1293 NULL, "Failure trying to receive nl ack"); 1294 goto end; 1295 } 1296 } 1297 end: 1298 if (remote_flow) 1299 rte_free(remote_flow); 1300 rte_free(flow); 1301 return ret; 1302 } 1303 1304 /** 1305 * Destroy a flow. 1306 * 1307 * @see rte_flow_destroy() 1308 * @see rte_flow_ops 1309 */ 1310 static int 1311 tap_flow_destroy(struct rte_eth_dev *dev, 1312 struct rte_flow *flow, 1313 struct rte_flow_error *error) 1314 { 1315 struct pmd_internals *pmd = dev->data->dev_private; 1316 1317 return tap_flow_destroy_pmd(pmd, flow, error); 1318 } 1319 1320 /** 1321 * Destroy all flows. 1322 * 1323 * @see rte_flow_flush() 1324 * @see rte_flow_ops 1325 */ 1326 int 1327 tap_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error) 1328 { 1329 struct pmd_internals *pmd = dev->data->dev_private; 1330 struct rte_flow *flow; 1331 1332 while (!LIST_EMPTY(&pmd->flows)) { 1333 flow = LIST_FIRST(&pmd->flows); 1334 if (tap_flow_destroy(dev, flow, error) < 0) 1335 return -1; 1336 } 1337 return 0; 1338 } 1339 1340 /** 1341 * Add an implicit flow rule on the remote device to make sure traffic gets to 1342 * the tap netdevice from there. 1343 * 1344 * @param pmd 1345 * Pointer to private structure. 1346 * @param[in] idx 1347 * The idx in the implicit_rte_flows array specifying which rule to apply. 1348 * 1349 * @return -1 if the rule couldn't be applied, 0 otherwise. 1350 */ 1351 int tap_flow_implicit_create(struct pmd_internals *pmd, 1352 enum implicit_rule_index idx) 1353 { 1354 struct rte_flow_item *items = implicit_rte_flows[idx].items; 1355 struct rte_flow_attr *attr = &implicit_rte_flows[idx].attr; 1356 struct rte_flow_item_eth eth_local = { .type = 0 }; 1357 uint16_t if_index = pmd->remote_if_index; 1358 struct rte_flow *remote_flow = NULL; 1359 struct nlmsg *msg = NULL; 1360 int err = 0; 1361 struct rte_flow_item items_local[2] = { 1362 [0] = { 1363 .type = items[0].type, 1364 .spec = ð_local, 1365 .mask = items[0].mask, 1366 }, 1367 [1] = { 1368 .type = items[1].type, 1369 } 1370 }; 1371 1372 remote_flow = rte_malloc(__func__, sizeof(struct rte_flow), 0); 1373 if (!remote_flow) { 1374 RTE_LOG(ERR, PMD, "Cannot allocate memory for rte_flow"); 1375 goto fail; 1376 } 1377 msg = &remote_flow->msg; 1378 if (idx == TAP_REMOTE_TX) { 1379 if_index = pmd->if_index; 1380 } else if (idx == TAP_REMOTE_LOCAL_MAC) { 1381 /* 1382 * eth addr couldn't be set in implicit_rte_flows[] as it is not 1383 * known at compile time. 1384 */ 1385 memcpy(ð_local.dst, &pmd->eth_addr, sizeof(pmd->eth_addr)); 1386 items = items_local; 1387 } 1388 tc_init_msg(msg, if_index, RTM_NEWTFILTER, 1389 NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE); 1390 msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL)); 1391 tap_flow_set_handle(remote_flow); 1392 if (priv_flow_process(pmd, attr, items, NULL, NULL, 1393 remote_flow, implicit_rte_flows[idx].mirred)) { 1394 RTE_LOG(ERR, PMD, "rte flow rule validation failed\n"); 1395 goto fail; 1396 } 1397 err = nl_send(pmd->nlsk_fd, &msg->nh); 1398 if (err < 0) { 1399 RTE_LOG(ERR, PMD, "Failure sending nl request"); 1400 goto fail; 1401 } 1402 err = nl_recv_ack(pmd->nlsk_fd); 1403 if (err < 0) { 1404 RTE_LOG(ERR, PMD, 1405 "Kernel refused TC filter rule creation (%d): %s\n", 1406 errno, strerror(errno)); 1407 goto fail; 1408 } 1409 LIST_INSERT_HEAD(&pmd->implicit_flows, remote_flow, next); 1410 return 0; 1411 fail: 1412 if (remote_flow) 1413 rte_free(remote_flow); 1414 return -1; 1415 } 1416 1417 /** 1418 * Remove specific implicit flow rule on the remote device. 1419 * 1420 * @param[in, out] pmd 1421 * Pointer to private structure. 1422 * @param[in] idx 1423 * The idx in the implicit_rte_flows array specifying which rule to remove. 1424 * 1425 * @return -1 if one of the implicit rules couldn't be created, 0 otherwise. 1426 */ 1427 int tap_flow_implicit_destroy(struct pmd_internals *pmd, 1428 enum implicit_rule_index idx) 1429 { 1430 struct rte_flow *remote_flow; 1431 int cur_prio = -1; 1432 int idx_prio = implicit_rte_flows[idx].attr.priority + PRIORITY_OFFSET; 1433 1434 for (remote_flow = LIST_FIRST(&pmd->implicit_flows); 1435 remote_flow; 1436 remote_flow = LIST_NEXT(remote_flow, next)) { 1437 cur_prio = (remote_flow->msg.t.tcm_info >> 16) & PRIORITY_MASK; 1438 if (cur_prio != idx_prio) 1439 continue; 1440 return tap_flow_destroy_pmd(pmd, remote_flow, NULL); 1441 } 1442 return 0; 1443 } 1444 1445 /** 1446 * Destroy all implicit flows. 1447 * 1448 * @see rte_flow_flush() 1449 */ 1450 int 1451 tap_flow_implicit_flush(struct pmd_internals *pmd, struct rte_flow_error *error) 1452 { 1453 struct rte_flow *remote_flow; 1454 1455 while (!LIST_EMPTY(&pmd->implicit_flows)) { 1456 remote_flow = LIST_FIRST(&pmd->implicit_flows); 1457 if (tap_flow_destroy_pmd(pmd, remote_flow, error) < 0) 1458 return -1; 1459 } 1460 return 0; 1461 } 1462 1463 /** 1464 * Manage filter operations. 1465 * 1466 * @param dev 1467 * Pointer to Ethernet device structure. 1468 * @param filter_type 1469 * Filter type. 1470 * @param filter_op 1471 * Operation to perform. 1472 * @param arg 1473 * Pointer to operation-specific structure. 1474 * 1475 * @return 1476 * 0 on success, negative errno value on failure. 1477 */ 1478 int 1479 tap_dev_filter_ctrl(struct rte_eth_dev *dev, 1480 enum rte_filter_type filter_type, 1481 enum rte_filter_op filter_op, 1482 void *arg) 1483 { 1484 struct pmd_internals *pmd = dev->data->dev_private; 1485 1486 if (!pmd->flower_support) 1487 return -ENOTSUP; 1488 switch (filter_type) { 1489 case RTE_ETH_FILTER_GENERIC: 1490 if (filter_op != RTE_ETH_FILTER_GET) 1491 return -EINVAL; 1492 *(const void **)arg = &tap_flow_ops; 1493 return 0; 1494 default: 1495 RTE_LOG(ERR, PMD, "%p: filter type (%d) not supported", 1496 (void *)dev, filter_type); 1497 } 1498 return -EINVAL; 1499 } 1500 1501